{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 81.8027950310559, "eval_steps": 2783, "global_step": 948258, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004830917874396135, "grad_norm": 2.5537285804748535, "learning_rate": 3.9539899352983465e-05, "loss": 5.5989, "step": 56 }, { "epoch": 0.00966183574879227, "grad_norm": 0.9791821837425232, "learning_rate": 7.979870596693027e-05, "loss": 4.2859, "step": 112 }, { "epoch": 0.014492753623188406, "grad_norm": 1.1985901594161987, "learning_rate": 0.00012005751258087708, "loss": 4.0112, "step": 168 }, { "epoch": 0.01932367149758454, "grad_norm": 1.6471028327941895, "learning_rate": 0.00016031631919482387, "loss": 3.8553, "step": 224 }, { "epoch": 0.024154589371980676, "grad_norm": 2.3778419494628906, "learning_rate": 0.0002005751258087707, "loss": 3.7549, "step": 280 }, { "epoch": 0.028985507246376812, "grad_norm": 2.404374122619629, "learning_rate": 0.0002408339324227175, "loss": 3.6717, "step": 336 }, { "epoch": 0.033816425120772944, "grad_norm": 2.839930295944214, "learning_rate": 0.00028109273903666426, "loss": 3.6109, "step": 392 }, { "epoch": 0.03864734299516908, "grad_norm": 10.898863792419434, "learning_rate": 0.0003213515456506111, "loss": 3.5459, "step": 448 }, { "epoch": 0.043478260869565216, "grad_norm": 1.387969732284546, "learning_rate": 0.0003616103522645579, "loss": 3.4739, "step": 504 }, { "epoch": 0.04830917874396135, "grad_norm": 4.0545196533203125, "learning_rate": 0.00040186915887850466, "loss": 3.419, "step": 560 }, { "epoch": 0.05314009661835749, "grad_norm": 2.1500046253204346, "learning_rate": 0.0004421279654924515, "loss": 3.3747, "step": 616 }, { "epoch": 0.057971014492753624, "grad_norm": 2.3752946853637695, "learning_rate": 0.0004823867721063983, "loss": 3.3339, "step": 672 }, { "epoch": 0.06280193236714976, "grad_norm": 2.856977701187134, "learning_rate": 0.0005226455787203452, "loss": 3.3246, "step": 728 }, { "epoch": 0.06763285024154589, "grad_norm": 1.4701484441757202, "learning_rate": 0.0005629043853342918, "loss": 3.2797, "step": 784 }, { "epoch": 0.07246376811594203, "grad_norm": 1.2803332805633545, "learning_rate": 0.0006031631919482386, "loss": 3.2239, "step": 840 }, { "epoch": 0.07729468599033816, "grad_norm": 1.0485491752624512, "learning_rate": 0.0006434219985621856, "loss": 3.172, "step": 896 }, { "epoch": 0.0821256038647343, "grad_norm": 1.3146588802337646, "learning_rate": 0.0006836808051761322, "loss": 3.168, "step": 952 }, { "epoch": 0.08695652173913043, "grad_norm": 1.1967456340789795, "learning_rate": 0.000723939611790079, "loss": 3.164, "step": 1008 }, { "epoch": 0.09178743961352658, "grad_norm": 4.6934051513671875, "learning_rate": 0.000764198418404026, "loss": 3.1539, "step": 1064 }, { "epoch": 0.0966183574879227, "grad_norm": 1.1069180965423584, "learning_rate": 0.0008044572250179727, "loss": 3.151, "step": 1120 }, { "epoch": 0.10144927536231885, "grad_norm": 1.1855157613754272, "learning_rate": 0.0008447160316319194, "loss": 3.1367, "step": 1176 }, { "epoch": 0.10628019323671498, "grad_norm": 2.2205240726470947, "learning_rate": 0.0008849748382458663, "loss": 3.1277, "step": 1232 }, { "epoch": 0.1111111111111111, "grad_norm": 16.359506607055664, "learning_rate": 0.0009252336448598131, "loss": 3.1434, "step": 1288 }, { "epoch": 0.11594202898550725, "grad_norm": 1.0570050477981567, "learning_rate": 0.0009654924514737598, "loss": 3.1172, "step": 1344 }, { "epoch": 0.12077294685990338, "grad_norm": 1.5385373830795288, "learning_rate": 0.001, "loss": 3.0791, "step": 1400 }, { "epoch": 0.12560386473429952, "grad_norm": 2.051922082901001, "learning_rate": 0.001, "loss": 3.0802, "step": 1456 }, { "epoch": 0.13043478260869565, "grad_norm": 0.982364296913147, "learning_rate": 0.001, "loss": 3.0863, "step": 1512 }, { "epoch": 0.13526570048309178, "grad_norm": 0.7527421116828918, "learning_rate": 0.001, "loss": 3.0592, "step": 1568 }, { "epoch": 0.14009661835748793, "grad_norm": 0.9949536919593811, "learning_rate": 0.001, "loss": 3.0478, "step": 1624 }, { "epoch": 0.14492753623188406, "grad_norm": 0.9814801216125488, "learning_rate": 0.001, "loss": 3.0359, "step": 1680 }, { "epoch": 0.1497584541062802, "grad_norm": 0.5726284980773926, "learning_rate": 0.001, "loss": 3.0048, "step": 1736 }, { "epoch": 0.15458937198067632, "grad_norm": 0.730490505695343, "learning_rate": 0.001, "loss": 2.9867, "step": 1792 }, { "epoch": 0.15942028985507245, "grad_norm": 0.7469838261604309, "learning_rate": 0.001, "loss": 2.9971, "step": 1848 }, { "epoch": 0.1642512077294686, "grad_norm": 2.1876564025878906, "learning_rate": 0.001, "loss": 2.9858, "step": 1904 }, { "epoch": 0.16908212560386474, "grad_norm": 0.5412757396697998, "learning_rate": 0.001, "loss": 2.9758, "step": 1960 }, { "epoch": 0.17391304347826086, "grad_norm": 0.6346341371536255, "learning_rate": 0.001, "loss": 2.9901, "step": 2016 }, { "epoch": 0.178743961352657, "grad_norm": 1.2752057313919067, "learning_rate": 0.001, "loss": 2.9721, "step": 2072 }, { "epoch": 0.18357487922705315, "grad_norm": 0.5239083170890808, "learning_rate": 0.001, "loss": 2.9763, "step": 2128 }, { "epoch": 0.18840579710144928, "grad_norm": 0.7067678570747375, "learning_rate": 0.001, "loss": 2.9666, "step": 2184 }, { "epoch": 0.1932367149758454, "grad_norm": 0.9121713638305664, "learning_rate": 0.001, "loss": 2.9591, "step": 2240 }, { "epoch": 0.19806763285024154, "grad_norm": 0.545002818107605, "learning_rate": 0.001, "loss": 2.946, "step": 2296 }, { "epoch": 0.2028985507246377, "grad_norm": 1.5359148979187012, "learning_rate": 0.001, "loss": 2.9436, "step": 2352 }, { "epoch": 0.20772946859903382, "grad_norm": 1.2984882593154907, "learning_rate": 0.001, "loss": 2.9719, "step": 2408 }, { "epoch": 0.21256038647342995, "grad_norm": 0.5250646471977234, "learning_rate": 0.001, "loss": 2.957, "step": 2464 }, { "epoch": 0.21739130434782608, "grad_norm": 0.6154928803443909, "learning_rate": 0.001, "loss": 2.9321, "step": 2520 }, { "epoch": 0.2222222222222222, "grad_norm": 0.8087754249572754, "learning_rate": 0.001, "loss": 2.9349, "step": 2576 }, { "epoch": 0.22705314009661837, "grad_norm": 0.6237892508506775, "learning_rate": 0.001, "loss": 2.9337, "step": 2632 }, { "epoch": 0.2318840579710145, "grad_norm": 0.4661741852760315, "learning_rate": 0.001, "loss": 2.9089, "step": 2688 }, { "epoch": 0.23671497584541062, "grad_norm": 0.5616157054901123, "learning_rate": 0.001, "loss": 2.897, "step": 2744 }, { "epoch": 0.24154589371980675, "grad_norm": 14.491456031799316, "learning_rate": 0.001, "loss": 2.8879, "step": 2800 }, { "epoch": 0.2463768115942029, "grad_norm": 1.5201140642166138, "learning_rate": 0.001, "loss": 2.891, "step": 2856 }, { "epoch": 0.25120772946859904, "grad_norm": 1.0428102016448975, "learning_rate": 0.001, "loss": 2.9029, "step": 2912 }, { "epoch": 0.2560386473429952, "grad_norm": 0.6364895701408386, "learning_rate": 0.001, "loss": 2.8891, "step": 2968 }, { "epoch": 0.2608695652173913, "grad_norm": 0.5871291756629944, "learning_rate": 0.001, "loss": 2.8822, "step": 3024 }, { "epoch": 0.26570048309178745, "grad_norm": 1.1912660598754883, "learning_rate": 0.001, "loss": 2.8918, "step": 3080 }, { "epoch": 0.27053140096618356, "grad_norm": 0.6245777606964111, "learning_rate": 0.001, "loss": 2.8923, "step": 3136 }, { "epoch": 0.2753623188405797, "grad_norm": 0.5338204503059387, "learning_rate": 0.001, "loss": 2.8884, "step": 3192 }, { "epoch": 0.28019323671497587, "grad_norm": 1.2675994634628296, "learning_rate": 0.001, "loss": 2.8719, "step": 3248 }, { "epoch": 0.28502415458937197, "grad_norm": 0.5750728845596313, "learning_rate": 0.001, "loss": 2.8696, "step": 3304 }, { "epoch": 0.2898550724637681, "grad_norm": 0.5912678837776184, "learning_rate": 0.001, "loss": 2.865, "step": 3360 }, { "epoch": 0.2946859903381642, "grad_norm": 1.8540053367614746, "learning_rate": 0.001, "loss": 2.8636, "step": 3416 }, { "epoch": 0.2995169082125604, "grad_norm": 0.8271320462226868, "learning_rate": 0.001, "loss": 2.865, "step": 3472 }, { "epoch": 0.30434782608695654, "grad_norm": 0.6457290649414062, "learning_rate": 0.001, "loss": 2.8557, "step": 3528 }, { "epoch": 0.30917874396135264, "grad_norm": 0.5149587988853455, "learning_rate": 0.001, "loss": 2.8526, "step": 3584 }, { "epoch": 0.3140096618357488, "grad_norm": 0.39953938126564026, "learning_rate": 0.001, "loss": 2.8266, "step": 3640 }, { "epoch": 0.3188405797101449, "grad_norm": 0.5023137927055359, "learning_rate": 0.001, "loss": 2.8189, "step": 3696 }, { "epoch": 0.32367149758454106, "grad_norm": 0.4729611575603485, "learning_rate": 0.001, "loss": 2.8405, "step": 3752 }, { "epoch": 0.3285024154589372, "grad_norm": 0.5361127853393555, "learning_rate": 0.001, "loss": 2.822, "step": 3808 }, { "epoch": 0.3333333333333333, "grad_norm": 0.5213225483894348, "learning_rate": 0.001, "loss": 2.8117, "step": 3864 }, { "epoch": 0.33816425120772947, "grad_norm": 0.4088309705257416, "learning_rate": 0.001, "loss": 2.8038, "step": 3920 }, { "epoch": 0.34299516908212563, "grad_norm": 0.5374923348426819, "learning_rate": 0.001, "loss": 2.817, "step": 3976 }, { "epoch": 0.34782608695652173, "grad_norm": 1.1506937742233276, "learning_rate": 0.001, "loss": 2.8348, "step": 4032 }, { "epoch": 0.3526570048309179, "grad_norm": 26.974454879760742, "learning_rate": 0.001, "loss": 2.8107, "step": 4088 }, { "epoch": 0.357487922705314, "grad_norm": 0.4402843713760376, "learning_rate": 0.001, "loss": 2.7993, "step": 4144 }, { "epoch": 0.36231884057971014, "grad_norm": 0.6160932183265686, "learning_rate": 0.001, "loss": 2.7813, "step": 4200 }, { "epoch": 0.3671497584541063, "grad_norm": 0.4454444646835327, "learning_rate": 0.001, "loss": 2.8068, "step": 4256 }, { "epoch": 0.3719806763285024, "grad_norm": 0.5421523451805115, "learning_rate": 0.001, "loss": 2.8237, "step": 4312 }, { "epoch": 0.37681159420289856, "grad_norm": 2.7624282836914062, "learning_rate": 0.001, "loss": 2.8097, "step": 4368 }, { "epoch": 0.38164251207729466, "grad_norm": 1.5045050382614136, "learning_rate": 0.001, "loss": 2.79, "step": 4424 }, { "epoch": 0.3864734299516908, "grad_norm": 0.4124404191970825, "learning_rate": 0.001, "loss": 2.782, "step": 4480 }, { "epoch": 0.391304347826087, "grad_norm": 0.4538789987564087, "learning_rate": 0.001, "loss": 2.7813, "step": 4536 }, { "epoch": 0.3961352657004831, "grad_norm": 0.6743329167366028, "learning_rate": 0.001, "loss": 2.7994, "step": 4592 }, { "epoch": 0.40096618357487923, "grad_norm": 1.868725061416626, "learning_rate": 0.001, "loss": 2.7789, "step": 4648 }, { "epoch": 0.4057971014492754, "grad_norm": 0.7265204191207886, "learning_rate": 0.001, "loss": 2.8014, "step": 4704 }, { "epoch": 0.4106280193236715, "grad_norm": 0.8547886610031128, "learning_rate": 0.001, "loss": 2.7869, "step": 4760 }, { "epoch": 0.41545893719806765, "grad_norm": 2.316642999649048, "learning_rate": 0.001, "loss": 2.7914, "step": 4816 }, { "epoch": 0.42028985507246375, "grad_norm": 0.5473759770393372, "learning_rate": 0.001, "loss": 2.7652, "step": 4872 }, { "epoch": 0.4251207729468599, "grad_norm": 0.42073145508766174, "learning_rate": 0.001, "loss": 2.7829, "step": 4928 }, { "epoch": 0.42995169082125606, "grad_norm": 0.40427178144454956, "learning_rate": 0.001, "loss": 2.7786, "step": 4984 }, { "epoch": 0.43478260869565216, "grad_norm": 2.3570165634155273, "learning_rate": 0.001, "loss": 2.7673, "step": 5040 }, { "epoch": 0.4396135265700483, "grad_norm": 0.5806084275245667, "learning_rate": 0.001, "loss": 2.7583, "step": 5096 }, { "epoch": 0.4444444444444444, "grad_norm": 0.4433354139328003, "learning_rate": 0.001, "loss": 2.7771, "step": 5152 }, { "epoch": 0.4492753623188406, "grad_norm": 0.5527592897415161, "learning_rate": 0.001, "loss": 2.7567, "step": 5208 }, { "epoch": 0.45410628019323673, "grad_norm": 0.5190341472625732, "learning_rate": 0.001, "loss": 2.7683, "step": 5264 }, { "epoch": 0.45893719806763283, "grad_norm": 0.7538208365440369, "learning_rate": 0.001, "loss": 2.7646, "step": 5320 }, { "epoch": 0.463768115942029, "grad_norm": 0.5732608437538147, "learning_rate": 0.001, "loss": 2.7684, "step": 5376 }, { "epoch": 0.46859903381642515, "grad_norm": 0.9430089592933655, "learning_rate": 0.001, "loss": 2.7726, "step": 5432 }, { "epoch": 0.47342995169082125, "grad_norm": 0.4525170922279358, "learning_rate": 0.001, "loss": 2.7672, "step": 5488 }, { "epoch": 0.4782608695652174, "grad_norm": 1.7289822101593018, "learning_rate": 0.001, "loss": 2.7696, "step": 5544 }, { "epoch": 0.4830917874396135, "grad_norm": 0.7026426792144775, "learning_rate": 0.001, "loss": 2.7558, "step": 5600 }, { "epoch": 0.48792270531400966, "grad_norm": 1.3366003036499023, "learning_rate": 0.001, "loss": 2.7427, "step": 5656 }, { "epoch": 0.4927536231884058, "grad_norm": 0.5988692045211792, "learning_rate": 0.001, "loss": 2.7447, "step": 5712 }, { "epoch": 0.4975845410628019, "grad_norm": 0.421079158782959, "learning_rate": 0.001, "loss": 2.7457, "step": 5768 }, { "epoch": 0.5024154589371981, "grad_norm": 0.6967017650604248, "learning_rate": 0.001, "loss": 2.752, "step": 5824 }, { "epoch": 0.5072463768115942, "grad_norm": 0.520272433757782, "learning_rate": 0.001, "loss": 2.7445, "step": 5880 }, { "epoch": 0.5120772946859904, "grad_norm": 0.5526834726333618, "learning_rate": 0.001, "loss": 2.7273, "step": 5936 }, { "epoch": 0.5169082125603864, "grad_norm": 0.44426271319389343, "learning_rate": 0.001, "loss": 2.7303, "step": 5992 }, { "epoch": 0.5217391304347826, "grad_norm": 0.600886881351471, "learning_rate": 0.001, "loss": 2.7161, "step": 6048 }, { "epoch": 0.5265700483091788, "grad_norm": 0.4419015049934387, "learning_rate": 0.001, "loss": 2.7262, "step": 6104 }, { "epoch": 0.5314009661835749, "grad_norm": 0.6358337998390198, "learning_rate": 0.001, "loss": 2.7241, "step": 6160 }, { "epoch": 0.5362318840579711, "grad_norm": 0.7351999282836914, "learning_rate": 0.001, "loss": 2.7136, "step": 6216 }, { "epoch": 0.5410628019323671, "grad_norm": 0.3952183127403259, "learning_rate": 0.001, "loss": 2.7197, "step": 6272 }, { "epoch": 0.5458937198067633, "grad_norm": 0.7855304479598999, "learning_rate": 0.001, "loss": 2.7155, "step": 6328 }, { "epoch": 0.5507246376811594, "grad_norm": 0.4529874920845032, "learning_rate": 0.001, "loss": 2.7097, "step": 6384 }, { "epoch": 0.5555555555555556, "grad_norm": 0.43028897047042847, "learning_rate": 0.001, "loss": 2.7161, "step": 6440 }, { "epoch": 0.5603864734299517, "grad_norm": 0.7543787360191345, "learning_rate": 0.001, "loss": 2.7073, "step": 6496 }, { "epoch": 0.5652173913043478, "grad_norm": 0.4770491421222687, "learning_rate": 0.001, "loss": 2.705, "step": 6552 }, { "epoch": 0.5700483091787439, "grad_norm": 0.5735542178153992, "learning_rate": 0.001, "loss": 2.698, "step": 6608 }, { "epoch": 0.5748792270531401, "grad_norm": 0.4636959731578827, "learning_rate": 0.001, "loss": 2.7053, "step": 6664 }, { "epoch": 0.5797101449275363, "grad_norm": 0.4329095184803009, "learning_rate": 0.001, "loss": 2.7017, "step": 6720 }, { "epoch": 0.5845410628019324, "grad_norm": 15.111337661743164, "learning_rate": 0.001, "loss": 2.6841, "step": 6776 }, { "epoch": 0.5893719806763285, "grad_norm": 1.0087028741836548, "learning_rate": 0.001, "loss": 2.6914, "step": 6832 }, { "epoch": 0.5942028985507246, "grad_norm": 0.38676732778549194, "learning_rate": 0.001, "loss": 2.6915, "step": 6888 }, { "epoch": 0.5990338164251208, "grad_norm": 0.5741737484931946, "learning_rate": 0.001, "loss": 2.6856, "step": 6944 }, { "epoch": 0.6038647342995169, "grad_norm": 0.5002315640449524, "learning_rate": 0.001, "loss": 2.6959, "step": 7000 }, { "epoch": 0.6086956521739131, "grad_norm": 0.5718513131141663, "learning_rate": 0.001, "loss": 2.7031, "step": 7056 }, { "epoch": 0.6135265700483091, "grad_norm": 0.5187315344810486, "learning_rate": 0.001, "loss": 2.7019, "step": 7112 }, { "epoch": 0.6183574879227053, "grad_norm": 0.7257050275802612, "learning_rate": 0.001, "loss": 2.6985, "step": 7168 }, { "epoch": 0.6231884057971014, "grad_norm": 0.6796103715896606, "learning_rate": 0.001, "loss": 2.7002, "step": 7224 }, { "epoch": 0.6280193236714976, "grad_norm": 0.6530753970146179, "learning_rate": 0.001, "loss": 2.6943, "step": 7280 }, { "epoch": 0.6328502415458938, "grad_norm": 0.6346811652183533, "learning_rate": 0.001, "loss": 2.7002, "step": 7336 }, { "epoch": 0.6376811594202898, "grad_norm": 2.5241458415985107, "learning_rate": 0.001, "loss": 2.6976, "step": 7392 }, { "epoch": 0.642512077294686, "grad_norm": 0.6433071494102478, "learning_rate": 0.001, "loss": 2.7005, "step": 7448 }, { "epoch": 0.6473429951690821, "grad_norm": 0.44189754128456116, "learning_rate": 0.001, "loss": 2.6813, "step": 7504 }, { "epoch": 0.6521739130434783, "grad_norm": 0.5451778769493103, "learning_rate": 0.001, "loss": 2.6796, "step": 7560 }, { "epoch": 0.6570048309178744, "grad_norm": 0.4468247890472412, "learning_rate": 0.001, "loss": 2.6873, "step": 7616 }, { "epoch": 0.6618357487922706, "grad_norm": 0.5533420443534851, "learning_rate": 0.001, "loss": 2.6867, "step": 7672 }, { "epoch": 0.6666666666666666, "grad_norm": 0.40803027153015137, "learning_rate": 0.001, "loss": 2.7191, "step": 7728 }, { "epoch": 0.6714975845410628, "grad_norm": 0.47552070021629333, "learning_rate": 0.001, "loss": 2.6886, "step": 7784 }, { "epoch": 0.6763285024154589, "grad_norm": 0.621493399143219, "learning_rate": 0.001, "loss": 2.6887, "step": 7840 }, { "epoch": 0.6811594202898551, "grad_norm": 0.8410442471504211, "learning_rate": 0.001, "loss": 2.6666, "step": 7896 }, { "epoch": 0.6859903381642513, "grad_norm": 0.8546284437179565, "learning_rate": 0.001, "loss": 2.667, "step": 7952 }, { "epoch": 0.6908212560386473, "grad_norm": 0.42941534519195557, "learning_rate": 0.001, "loss": 2.6665, "step": 8008 }, { "epoch": 0.6956521739130435, "grad_norm": 0.603959321975708, "learning_rate": 0.001, "loss": 2.6733, "step": 8064 }, { "epoch": 0.7004830917874396, "grad_norm": 0.6979280710220337, "learning_rate": 0.001, "loss": 2.665, "step": 8120 }, { "epoch": 0.7053140096618358, "grad_norm": 0.5473920702934265, "learning_rate": 0.001, "loss": 2.6592, "step": 8176 }, { "epoch": 0.7101449275362319, "grad_norm": 3.5187981128692627, "learning_rate": 0.001, "loss": 2.653, "step": 8232 }, { "epoch": 0.714975845410628, "grad_norm": 0.6898009181022644, "learning_rate": 0.001, "loss": 2.6634, "step": 8288 }, { "epoch": 0.7198067632850241, "grad_norm": 0.6146760582923889, "learning_rate": 0.001, "loss": 2.6719, "step": 8344 }, { "epoch": 0.7246376811594203, "grad_norm": 0.723409116268158, "learning_rate": 0.001, "loss": 2.6557, "step": 8400 }, { "epoch": 0.7294685990338164, "grad_norm": 0.7172557711601257, "learning_rate": 0.001, "loss": 2.6575, "step": 8456 }, { "epoch": 0.7342995169082126, "grad_norm": 0.5060122013092041, "learning_rate": 0.001, "loss": 2.6628, "step": 8512 }, { "epoch": 0.7391304347826086, "grad_norm": 1.128843069076538, "learning_rate": 0.001, "loss": 2.6571, "step": 8568 }, { "epoch": 0.7439613526570048, "grad_norm": 0.44131237268447876, "learning_rate": 0.001, "loss": 2.6568, "step": 8624 }, { "epoch": 0.748792270531401, "grad_norm": 1.0537309646606445, "learning_rate": 0.001, "loss": 2.6586, "step": 8680 }, { "epoch": 0.7536231884057971, "grad_norm": 0.5386660099029541, "learning_rate": 0.001, "loss": 2.6512, "step": 8736 }, { "epoch": 0.7584541062801933, "grad_norm": 0.43016335368156433, "learning_rate": 0.001, "loss": 2.6554, "step": 8792 }, { "epoch": 0.7632850241545893, "grad_norm": 2.5915980339050293, "learning_rate": 0.001, "loss": 2.6363, "step": 8848 }, { "epoch": 0.7681159420289855, "grad_norm": 0.533907949924469, "learning_rate": 0.001, "loss": 2.6539, "step": 8904 }, { "epoch": 0.7729468599033816, "grad_norm": 0.4950580894947052, "learning_rate": 0.001, "loss": 2.6495, "step": 8960 }, { "epoch": 0.7777777777777778, "grad_norm": 1.6670414209365845, "learning_rate": 0.001, "loss": 2.6701, "step": 9016 }, { "epoch": 0.782608695652174, "grad_norm": 0.44445034861564636, "learning_rate": 0.001, "loss": 2.6641, "step": 9072 }, { "epoch": 0.7874396135265701, "grad_norm": 0.5860577821731567, "learning_rate": 0.001, "loss": 2.6442, "step": 9128 }, { "epoch": 0.7922705314009661, "grad_norm": 0.3840193450450897, "learning_rate": 0.001, "loss": 2.6559, "step": 9184 }, { "epoch": 0.7971014492753623, "grad_norm": 0.9412776827812195, "learning_rate": 0.001, "loss": 2.6315, "step": 9240 }, { "epoch": 0.8019323671497585, "grad_norm": 0.4733305275440216, "learning_rate": 0.001, "loss": 2.6414, "step": 9296 }, { "epoch": 0.8067632850241546, "grad_norm": 0.7020483613014221, "learning_rate": 0.001, "loss": 2.6484, "step": 9352 }, { "epoch": 0.8115942028985508, "grad_norm": 0.6107551455497742, "learning_rate": 0.001, "loss": 2.6384, "step": 9408 }, { "epoch": 0.8164251207729468, "grad_norm": 0.49444472789764404, "learning_rate": 0.001, "loss": 2.6365, "step": 9464 }, { "epoch": 0.821256038647343, "grad_norm": 0.4061006009578705, "learning_rate": 0.001, "loss": 2.6277, "step": 9520 }, { "epoch": 0.8260869565217391, "grad_norm": 0.38213226199150085, "learning_rate": 0.001, "loss": 2.6328, "step": 9576 }, { "epoch": 0.8309178743961353, "grad_norm": 0.4887073040008545, "learning_rate": 0.001, "loss": 2.6279, "step": 9632 }, { "epoch": 0.8357487922705314, "grad_norm": 0.5472325682640076, "learning_rate": 0.001, "loss": 2.6209, "step": 9688 }, { "epoch": 0.8405797101449275, "grad_norm": 0.47439444065093994, "learning_rate": 0.001, "loss": 2.6298, "step": 9744 }, { "epoch": 0.8454106280193237, "grad_norm": 1.0408443212509155, "learning_rate": 0.001, "loss": 2.6428, "step": 9800 }, { "epoch": 0.8502415458937198, "grad_norm": 0.37197762727737427, "learning_rate": 0.001, "loss": 2.6288, "step": 9856 }, { "epoch": 0.855072463768116, "grad_norm": 0.6389931440353394, "learning_rate": 0.001, "loss": 2.6346, "step": 9912 }, { "epoch": 0.8599033816425121, "grad_norm": 0.42257222533226013, "learning_rate": 0.001, "loss": 2.6203, "step": 9968 }, { "epoch": 0.8647342995169082, "grad_norm": 0.5119916796684265, "learning_rate": 0.001, "loss": 2.6045, "step": 10024 }, { "epoch": 0.8695652173913043, "grad_norm": 0.40754446387290955, "learning_rate": 0.001, "loss": 2.6078, "step": 10080 }, { "epoch": 0.8743961352657005, "grad_norm": 1.4318445920944214, "learning_rate": 0.001, "loss": 2.6103, "step": 10136 }, { "epoch": 0.8792270531400966, "grad_norm": 0.47217634320259094, "learning_rate": 0.001, "loss": 2.6194, "step": 10192 }, { "epoch": 0.8840579710144928, "grad_norm": 0.44421687722206116, "learning_rate": 0.001, "loss": 2.614, "step": 10248 }, { "epoch": 0.8888888888888888, "grad_norm": 0.49661970138549805, "learning_rate": 0.001, "loss": 2.6052, "step": 10304 }, { "epoch": 0.893719806763285, "grad_norm": 30.351938247680664, "learning_rate": 0.001, "loss": 2.6135, "step": 10360 }, { "epoch": 0.8985507246376812, "grad_norm": 0.5131446123123169, "learning_rate": 0.001, "loss": 2.6159, "step": 10416 }, { "epoch": 0.9033816425120773, "grad_norm": 0.5762516856193542, "learning_rate": 0.001, "loss": 2.6145, "step": 10472 }, { "epoch": 0.9082125603864735, "grad_norm": 0.4128069579601288, "learning_rate": 0.001, "loss": 2.6181, "step": 10528 }, { "epoch": 0.9130434782608695, "grad_norm": 0.4651366174221039, "learning_rate": 0.001, "loss": 2.6083, "step": 10584 }, { "epoch": 0.9178743961352657, "grad_norm": 0.42533278465270996, "learning_rate": 0.001, "loss": 2.6271, "step": 10640 }, { "epoch": 0.9227053140096618, "grad_norm": 0.4388916492462158, "learning_rate": 0.001, "loss": 2.5972, "step": 10696 }, { "epoch": 0.927536231884058, "grad_norm": 0.5819514989852905, "learning_rate": 0.001, "loss": 2.5942, "step": 10752 }, { "epoch": 0.9323671497584541, "grad_norm": 0.5410124659538269, "learning_rate": 0.001, "loss": 2.5966, "step": 10808 }, { "epoch": 0.9371980676328503, "grad_norm": 2.474395751953125, "learning_rate": 0.001, "loss": 2.6074, "step": 10864 }, { "epoch": 0.9420289855072463, "grad_norm": 1.7280431985855103, "learning_rate": 0.001, "loss": 2.5975, "step": 10920 }, { "epoch": 0.9468599033816425, "grad_norm": 0.5472970008850098, "learning_rate": 0.001, "loss": 2.6011, "step": 10976 }, { "epoch": 0.9516908212560387, "grad_norm": 0.6419417858123779, "learning_rate": 0.001, "loss": 2.5942, "step": 11032 }, { "epoch": 0.9565217391304348, "grad_norm": 0.49544742703437805, "learning_rate": 0.001, "loss": 2.5985, "step": 11088 }, { "epoch": 0.961352657004831, "grad_norm": 0.41243302822113037, "learning_rate": 0.001, "loss": 2.5896, "step": 11144 }, { "epoch": 0.966183574879227, "grad_norm": 0.437237411737442, "learning_rate": 0.001, "loss": 2.5921, "step": 11200 }, { "epoch": 0.9710144927536232, "grad_norm": 0.4309455454349518, "learning_rate": 0.001, "loss": 2.5913, "step": 11256 }, { "epoch": 0.9758454106280193, "grad_norm": 0.49835360050201416, "learning_rate": 0.001, "loss": 2.5867, "step": 11312 }, { "epoch": 0.9806763285024155, "grad_norm": 0.7193912267684937, "learning_rate": 0.001, "loss": 2.5837, "step": 11368 }, { "epoch": 0.9855072463768116, "grad_norm": 1.3050037622451782, "learning_rate": 0.001, "loss": 2.5919, "step": 11424 }, { "epoch": 0.9903381642512077, "grad_norm": 0.41976985335350037, "learning_rate": 0.001, "loss": 2.5937, "step": 11480 }, { "epoch": 0.9951690821256038, "grad_norm": 0.41240671277046204, "learning_rate": 0.001, "loss": 2.5878, "step": 11536 }, { "epoch": 1.0, "grad_norm": 0.7706860899925232, "learning_rate": 0.001, "loss": 2.5806, "step": 11592 }, { "epoch": 1.0048309178743962, "grad_norm": 3.327430248260498, "learning_rate": 0.001, "loss": 2.5611, "step": 11648 }, { "epoch": 1.0096618357487923, "grad_norm": 0.4381028115749359, "learning_rate": 0.001, "loss": 2.5579, "step": 11704 }, { "epoch": 1.0144927536231885, "grad_norm": 0.36722248792648315, "learning_rate": 0.001, "loss": 2.5613, "step": 11760 }, { "epoch": 1.0193236714975846, "grad_norm": 0.5505610108375549, "learning_rate": 0.001, "loss": 2.5644, "step": 11816 }, { "epoch": 1.0241545893719808, "grad_norm": 0.3663302958011627, "learning_rate": 0.001, "loss": 2.5538, "step": 11872 }, { "epoch": 1.0289855072463767, "grad_norm": 0.7702949047088623, "learning_rate": 0.001, "loss": 2.5446, "step": 11928 }, { "epoch": 1.0338164251207729, "grad_norm": 0.5923094153404236, "learning_rate": 0.001, "loss": 2.5511, "step": 11984 }, { "epoch": 1.038647342995169, "grad_norm": 0.8538995981216431, "learning_rate": 0.001, "loss": 2.5559, "step": 12040 }, { "epoch": 1.0434782608695652, "grad_norm": 0.46077409386634827, "learning_rate": 0.001, "loss": 2.5492, "step": 12096 }, { "epoch": 1.0483091787439613, "grad_norm": 0.4534064829349518, "learning_rate": 0.001, "loss": 2.5517, "step": 12152 }, { "epoch": 1.0531400966183575, "grad_norm": 19.5181941986084, "learning_rate": 0.001, "loss": 2.5423, "step": 12208 }, { "epoch": 1.0579710144927537, "grad_norm": 0.5569064617156982, "learning_rate": 0.001, "loss": 2.5709, "step": 12264 }, { "epoch": 1.0628019323671498, "grad_norm": 1.4424077272415161, "learning_rate": 0.001, "loss": 2.5801, "step": 12320 }, { "epoch": 1.067632850241546, "grad_norm": 0.5305166244506836, "learning_rate": 0.001, "loss": 2.5554, "step": 12376 }, { "epoch": 1.0724637681159421, "grad_norm": 1.1172679662704468, "learning_rate": 0.001, "loss": 2.5635, "step": 12432 }, { "epoch": 1.077294685990338, "grad_norm": 0.6793813109397888, "learning_rate": 0.001, "loss": 2.5673, "step": 12488 }, { "epoch": 1.0821256038647342, "grad_norm": 1.1641849279403687, "learning_rate": 0.001, "loss": 2.5855, "step": 12544 }, { "epoch": 1.0869565217391304, "grad_norm": 0.5341097116470337, "learning_rate": 0.001, "loss": 2.5788, "step": 12600 }, { "epoch": 1.0917874396135265, "grad_norm": 0.49252578616142273, "learning_rate": 0.001, "loss": 2.5746, "step": 12656 }, { "epoch": 1.0966183574879227, "grad_norm": 0.7578367590904236, "learning_rate": 0.001, "loss": 2.5494, "step": 12712 }, { "epoch": 1.1014492753623188, "grad_norm": 0.7495577335357666, "learning_rate": 0.001, "loss": 2.5565, "step": 12768 }, { "epoch": 1.106280193236715, "grad_norm": 0.8345962762832642, "learning_rate": 0.001, "loss": 2.5549, "step": 12824 }, { "epoch": 1.1111111111111112, "grad_norm": 0.8163776993751526, "learning_rate": 0.001, "loss": 2.5496, "step": 12880 }, { "epoch": 1.1159420289855073, "grad_norm": 2.7103028297424316, "learning_rate": 0.001, "loss": 2.555, "step": 12936 }, { "epoch": 1.1207729468599035, "grad_norm": 1.8082388639450073, "learning_rate": 0.001, "loss": 2.5497, "step": 12992 }, { "epoch": 1.1256038647342996, "grad_norm": 0.5198656320571899, "learning_rate": 0.001, "loss": 2.5321, "step": 13048 }, { "epoch": 1.1304347826086956, "grad_norm": 0.7601255178451538, "learning_rate": 0.001, "loss": 2.5489, "step": 13104 }, { "epoch": 1.1352657004830917, "grad_norm": 0.5740835070610046, "learning_rate": 0.001, "loss": 2.5454, "step": 13160 }, { "epoch": 1.1400966183574879, "grad_norm": 0.6274489760398865, "learning_rate": 0.001, "loss": 2.532, "step": 13216 }, { "epoch": 1.144927536231884, "grad_norm": 0.5133732557296753, "learning_rate": 0.001, "loss": 2.5303, "step": 13272 }, { "epoch": 1.1497584541062802, "grad_norm": 0.6891068816184998, "learning_rate": 0.001, "loss": 2.5397, "step": 13328 }, { "epoch": 1.1545893719806763, "grad_norm": 0.6237878203392029, "learning_rate": 0.001, "loss": 2.5461, "step": 13384 }, { "epoch": 1.1594202898550725, "grad_norm": 0.9357394576072693, "learning_rate": 0.001, "loss": 2.5487, "step": 13440 }, { "epoch": 1.1642512077294687, "grad_norm": 0.7408100962638855, "learning_rate": 0.001, "loss": 2.542, "step": 13496 }, { "epoch": 1.1690821256038648, "grad_norm": 0.42847955226898193, "learning_rate": 0.001, "loss": 2.5479, "step": 13552 }, { "epoch": 1.1739130434782608, "grad_norm": 0.6769561767578125, "learning_rate": 0.001, "loss": 2.5479, "step": 13608 }, { "epoch": 1.178743961352657, "grad_norm": 1.5430186986923218, "learning_rate": 0.001, "loss": 2.5502, "step": 13664 }, { "epoch": 1.183574879227053, "grad_norm": 0.7028371691703796, "learning_rate": 0.001, "loss": 2.5502, "step": 13720 }, { "epoch": 1.1884057971014492, "grad_norm": 0.43584156036376953, "learning_rate": 0.001, "loss": 2.5536, "step": 13776 }, { "epoch": 1.1932367149758454, "grad_norm": 0.5009216666221619, "learning_rate": 0.001, "loss": 2.5383, "step": 13832 }, { "epoch": 1.1980676328502415, "grad_norm": 0.4713088274002075, "learning_rate": 0.001, "loss": 2.5316, "step": 13888 }, { "epoch": 1.2028985507246377, "grad_norm": 4.079718112945557, "learning_rate": 0.001, "loss": 2.5395, "step": 13944 }, { "epoch": 1.2077294685990339, "grad_norm": 0.5456583499908447, "learning_rate": 0.001, "loss": 2.5382, "step": 14000 }, { "epoch": 1.21256038647343, "grad_norm": 0.512478768825531, "learning_rate": 0.001, "loss": 2.5208, "step": 14056 }, { "epoch": 1.2173913043478262, "grad_norm": 0.3990437388420105, "learning_rate": 0.001, "loss": 2.5248, "step": 14112 }, { "epoch": 1.2222222222222223, "grad_norm": 0.6852943897247314, "learning_rate": 0.001, "loss": 2.5159, "step": 14168 }, { "epoch": 1.2270531400966185, "grad_norm": 0.6256563067436218, "learning_rate": 0.001, "loss": 2.5295, "step": 14224 }, { "epoch": 1.2318840579710144, "grad_norm": 0.6702936887741089, "learning_rate": 0.001, "loss": 2.5288, "step": 14280 }, { "epoch": 1.2367149758454106, "grad_norm": 0.5786619782447815, "learning_rate": 0.001, "loss": 2.537, "step": 14336 }, { "epoch": 1.2415458937198067, "grad_norm": 0.6425799131393433, "learning_rate": 0.001, "loss": 2.5313, "step": 14392 }, { "epoch": 1.2463768115942029, "grad_norm": 0.46116113662719727, "learning_rate": 0.001, "loss": 2.5306, "step": 14448 }, { "epoch": 1.251207729468599, "grad_norm": 0.52046799659729, "learning_rate": 0.001, "loss": 2.5407, "step": 14504 }, { "epoch": 1.2560386473429952, "grad_norm": 0.48209285736083984, "learning_rate": 0.001, "loss": 2.5422, "step": 14560 }, { "epoch": 1.2608695652173914, "grad_norm": 0.8265407681465149, "learning_rate": 0.001, "loss": 2.5347, "step": 14616 }, { "epoch": 1.2657004830917875, "grad_norm": 0.4520986080169678, "learning_rate": 0.001, "loss": 2.5347, "step": 14672 }, { "epoch": 1.2705314009661834, "grad_norm": 0.6868043541908264, "learning_rate": 0.001, "loss": 2.5379, "step": 14728 }, { "epoch": 1.2753623188405796, "grad_norm": 0.5073258280754089, "learning_rate": 0.001, "loss": 2.5232, "step": 14784 }, { "epoch": 1.2801932367149758, "grad_norm": 4.345489025115967, "learning_rate": 0.001, "loss": 2.5282, "step": 14840 }, { "epoch": 1.285024154589372, "grad_norm": 0.6298401951789856, "learning_rate": 0.001, "loss": 2.5255, "step": 14896 }, { "epoch": 1.289855072463768, "grad_norm": 8.941032409667969, "learning_rate": 0.001, "loss": 2.5527, "step": 14952 }, { "epoch": 1.2946859903381642, "grad_norm": 1.0845667123794556, "learning_rate": 0.001, "loss": 2.5406, "step": 15008 }, { "epoch": 1.2995169082125604, "grad_norm": 0.5030841827392578, "learning_rate": 0.001, "loss": 2.5283, "step": 15064 }, { "epoch": 1.3043478260869565, "grad_norm": 0.533234179019928, "learning_rate": 0.001, "loss": 2.5361, "step": 15120 }, { "epoch": 1.3091787439613527, "grad_norm": 1.023293375968933, "learning_rate": 0.001, "loss": 2.5345, "step": 15176 }, { "epoch": 1.3140096618357489, "grad_norm": 5.5791826248168945, "learning_rate": 0.001, "loss": 2.5309, "step": 15232 }, { "epoch": 1.318840579710145, "grad_norm": 4.967889308929443, "learning_rate": 0.001, "loss": 2.5395, "step": 15288 }, { "epoch": 1.3236714975845412, "grad_norm": 5.1368889808654785, "learning_rate": 0.001, "loss": 2.5617, "step": 15344 }, { "epoch": 1.3285024154589373, "grad_norm": 1.534320592880249, "learning_rate": 0.001, "loss": 2.5659, "step": 15400 }, { "epoch": 1.3333333333333333, "grad_norm": 0.9548292756080627, "learning_rate": 0.001, "loss": 2.5445, "step": 15456 }, { "epoch": 1.3381642512077294, "grad_norm": 0.7324444055557251, "learning_rate": 0.001, "loss": 2.5397, "step": 15512 }, { "epoch": 1.3429951690821256, "grad_norm": 3.2579472064971924, "learning_rate": 0.001, "loss": 2.5363, "step": 15568 }, { "epoch": 1.3478260869565217, "grad_norm": 0.6363402605056763, "learning_rate": 0.001, "loss": 2.5334, "step": 15624 }, { "epoch": 1.3526570048309179, "grad_norm": 0.5935930013656616, "learning_rate": 0.001, "loss": 2.532, "step": 15680 }, { "epoch": 1.357487922705314, "grad_norm": 0.8492759466171265, "learning_rate": 0.001, "loss": 2.5298, "step": 15736 }, { "epoch": 1.3623188405797102, "grad_norm": 0.5029017925262451, "learning_rate": 0.001, "loss": 2.5355, "step": 15792 }, { "epoch": 1.3671497584541064, "grad_norm": 0.5794525742530823, "learning_rate": 0.001, "loss": 2.5371, "step": 15848 }, { "epoch": 1.3719806763285023, "grad_norm": 1.3159703016281128, "learning_rate": 0.001, "loss": 2.524, "step": 15904 }, { "epoch": 1.3768115942028984, "grad_norm": 0.4209741950035095, "learning_rate": 0.001, "loss": 2.5255, "step": 15960 }, { "epoch": 1.3816425120772946, "grad_norm": 0.5418590307235718, "learning_rate": 0.001, "loss": 2.5166, "step": 16016 }, { "epoch": 1.3864734299516908, "grad_norm": 0.7169878482818604, "learning_rate": 0.001, "loss": 2.5188, "step": 16072 }, { "epoch": 1.391304347826087, "grad_norm": 0.5207206010818481, "learning_rate": 0.001, "loss": 2.5142, "step": 16128 }, { "epoch": 1.396135265700483, "grad_norm": 0.6924039721488953, "learning_rate": 0.001, "loss": 2.5015, "step": 16184 }, { "epoch": 1.4009661835748792, "grad_norm": 1.1841403245925903, "learning_rate": 0.001, "loss": 2.5206, "step": 16240 }, { "epoch": 1.4057971014492754, "grad_norm": 0.43086743354797363, "learning_rate": 0.001, "loss": 2.511, "step": 16296 }, { "epoch": 1.4106280193236715, "grad_norm": 0.47768867015838623, "learning_rate": 0.001, "loss": 2.5183, "step": 16352 }, { "epoch": 1.4154589371980677, "grad_norm": 1.4415768384933472, "learning_rate": 0.001, "loss": 2.51, "step": 16408 }, { "epoch": 1.4202898550724639, "grad_norm": 0.9194180965423584, "learning_rate": 0.001, "loss": 2.5097, "step": 16464 }, { "epoch": 1.42512077294686, "grad_norm": 0.4032771587371826, "learning_rate": 0.001, "loss": 2.5214, "step": 16520 }, { "epoch": 1.4299516908212562, "grad_norm": 0.3488192558288574, "learning_rate": 0.001, "loss": 2.5076, "step": 16576 }, { "epoch": 1.434782608695652, "grad_norm": 0.4139269292354584, "learning_rate": 0.001, "loss": 2.5203, "step": 16632 }, { "epoch": 1.4396135265700483, "grad_norm": 0.4293974041938782, "learning_rate": 0.001, "loss": 2.513, "step": 16688 }, { "epoch": 1.4444444444444444, "grad_norm": 0.5713045001029968, "learning_rate": 0.001, "loss": 2.5055, "step": 16744 }, { "epoch": 1.4492753623188406, "grad_norm": 0.6148470640182495, "learning_rate": 0.001, "loss": 2.5064, "step": 16800 }, { "epoch": 1.4541062801932367, "grad_norm": 0.6875100135803223, "learning_rate": 0.001, "loss": 2.5174, "step": 16856 }, { "epoch": 1.458937198067633, "grad_norm": 0.959662914276123, "learning_rate": 0.001, "loss": 2.5159, "step": 16912 }, { "epoch": 1.463768115942029, "grad_norm": 0.7817173600196838, "learning_rate": 0.001, "loss": 2.5173, "step": 16968 }, { "epoch": 1.4685990338164252, "grad_norm": 0.9040431976318359, "learning_rate": 0.001, "loss": 2.5087, "step": 17024 }, { "epoch": 1.4734299516908211, "grad_norm": 0.4779967665672302, "learning_rate": 0.001, "loss": 2.5099, "step": 17080 }, { "epoch": 1.4782608695652173, "grad_norm": 2.2653839588165283, "learning_rate": 0.001, "loss": 2.5015, "step": 17136 }, { "epoch": 1.4830917874396135, "grad_norm": 0.6779986023902893, "learning_rate": 0.001, "loss": 2.517, "step": 17192 }, { "epoch": 1.4879227053140096, "grad_norm": 0.8404067754745483, "learning_rate": 0.001, "loss": 2.5209, "step": 17248 }, { "epoch": 1.4927536231884058, "grad_norm": 2.1037940979003906, "learning_rate": 0.001, "loss": 2.5054, "step": 17304 }, { "epoch": 1.497584541062802, "grad_norm": 0.5338457226753235, "learning_rate": 0.001, "loss": 2.5078, "step": 17360 }, { "epoch": 1.502415458937198, "grad_norm": 0.6839510798454285, "learning_rate": 0.001, "loss": 2.5158, "step": 17416 }, { "epoch": 1.5072463768115942, "grad_norm": 0.6963435411453247, "learning_rate": 0.001, "loss": 2.5223, "step": 17472 }, { "epoch": 1.5120772946859904, "grad_norm": 0.7761881351470947, "learning_rate": 0.001, "loss": 2.5113, "step": 17528 }, { "epoch": 1.5169082125603865, "grad_norm": 0.4439939856529236, "learning_rate": 0.001, "loss": 2.5035, "step": 17584 }, { "epoch": 1.5217391304347827, "grad_norm": 8.835694313049316, "learning_rate": 0.001, "loss": 2.5081, "step": 17640 }, { "epoch": 1.5265700483091789, "grad_norm": 0.654516339302063, "learning_rate": 0.001, "loss": 2.5211, "step": 17696 }, { "epoch": 1.531400966183575, "grad_norm": 0.7336230874061584, "learning_rate": 0.001, "loss": 2.4982, "step": 17752 }, { "epoch": 1.5362318840579712, "grad_norm": 1.263433575630188, "learning_rate": 0.001, "loss": 2.4997, "step": 17808 }, { "epoch": 1.541062801932367, "grad_norm": 0.46169614791870117, "learning_rate": 0.001, "loss": 2.5066, "step": 17864 }, { "epoch": 1.5458937198067633, "grad_norm": 1.5880149602890015, "learning_rate": 0.001, "loss": 2.4869, "step": 17920 }, { "epoch": 1.5507246376811594, "grad_norm": 0.6592299342155457, "learning_rate": 0.001, "loss": 2.4857, "step": 17976 }, { "epoch": 1.5555555555555556, "grad_norm": 0.42128682136535645, "learning_rate": 0.001, "loss": 2.493, "step": 18032 }, { "epoch": 1.5603864734299517, "grad_norm": 0.7555729746818542, "learning_rate": 0.001, "loss": 2.4786, "step": 18088 }, { "epoch": 1.5652173913043477, "grad_norm": 0.6741869449615479, "learning_rate": 0.001, "loss": 2.4889, "step": 18144 }, { "epoch": 1.5700483091787438, "grad_norm": 0.6820804476737976, "learning_rate": 0.001, "loss": 2.5124, "step": 18200 }, { "epoch": 1.57487922705314, "grad_norm": 0.7633949518203735, "learning_rate": 0.001, "loss": 2.4979, "step": 18256 }, { "epoch": 1.5797101449275361, "grad_norm": 0.4473211169242859, "learning_rate": 0.001, "loss": 2.5009, "step": 18312 }, { "epoch": 1.5845410628019323, "grad_norm": 0.6472347378730774, "learning_rate": 0.001, "loss": 2.4978, "step": 18368 }, { "epoch": 1.5893719806763285, "grad_norm": 0.46679043769836426, "learning_rate": 0.001, "loss": 2.4903, "step": 18424 }, { "epoch": 1.5942028985507246, "grad_norm": 1.9245352745056152, "learning_rate": 0.001, "loss": 2.494, "step": 18480 }, { "epoch": 1.5990338164251208, "grad_norm": 2.9718055725097656, "learning_rate": 0.001, "loss": 2.4925, "step": 18536 }, { "epoch": 1.603864734299517, "grad_norm": 0.7821285724639893, "learning_rate": 0.001, "loss": 2.4897, "step": 18592 }, { "epoch": 1.608695652173913, "grad_norm": 0.39512935280799866, "learning_rate": 0.001, "loss": 2.4837, "step": 18648 }, { "epoch": 1.6135265700483092, "grad_norm": 201.1911163330078, "learning_rate": 0.001, "loss": 2.4799, "step": 18704 }, { "epoch": 1.6183574879227054, "grad_norm": 0.4764461815357208, "learning_rate": 0.001, "loss": 2.4948, "step": 18760 }, { "epoch": 1.6231884057971016, "grad_norm": 0.422262042760849, "learning_rate": 0.001, "loss": 2.4756, "step": 18816 }, { "epoch": 1.6280193236714977, "grad_norm": 0.7426127791404724, "learning_rate": 0.001, "loss": 2.4776, "step": 18872 }, { "epoch": 1.6328502415458939, "grad_norm": 0.6004542708396912, "learning_rate": 0.001, "loss": 2.4736, "step": 18928 }, { "epoch": 1.6376811594202898, "grad_norm": 0.511694073677063, "learning_rate": 0.001, "loss": 2.4928, "step": 18984 }, { "epoch": 1.642512077294686, "grad_norm": 0.9940207600593567, "learning_rate": 0.001, "loss": 2.5022, "step": 19040 }, { "epoch": 1.6473429951690821, "grad_norm": 0.5756489634513855, "learning_rate": 0.001, "loss": 2.4926, "step": 19096 }, { "epoch": 1.6521739130434783, "grad_norm": 0.7159668207168579, "learning_rate": 0.001, "loss": 2.5008, "step": 19152 }, { "epoch": 1.6570048309178744, "grad_norm": 0.6493143439292908, "learning_rate": 0.001, "loss": 2.5039, "step": 19208 }, { "epoch": 1.6618357487922706, "grad_norm": 0.5644376277923584, "learning_rate": 0.001, "loss": 2.4634, "step": 19264 }, { "epoch": 1.6666666666666665, "grad_norm": 1.4159189462661743, "learning_rate": 0.001, "loss": 2.4742, "step": 19320 }, { "epoch": 1.6714975845410627, "grad_norm": 1.2761787176132202, "learning_rate": 0.001, "loss": 2.5077, "step": 19376 }, { "epoch": 1.6763285024154588, "grad_norm": 0.5674095153808594, "learning_rate": 0.001, "loss": 2.4865, "step": 19432 }, { "epoch": 1.681159420289855, "grad_norm": 0.4240173399448395, "learning_rate": 0.001, "loss": 2.4782, "step": 19488 }, { "epoch": 1.6859903381642511, "grad_norm": 0.951404869556427, "learning_rate": 0.001, "loss": 2.4718, "step": 19544 }, { "epoch": 1.6908212560386473, "grad_norm": 1.0875020027160645, "learning_rate": 0.001, "loss": 2.4819, "step": 19600 }, { "epoch": 1.6956521739130435, "grad_norm": 0.904477059841156, "learning_rate": 0.001, "loss": 2.4951, "step": 19656 }, { "epoch": 1.7004830917874396, "grad_norm": 0.6886747479438782, "learning_rate": 0.001, "loss": 2.4934, "step": 19712 }, { "epoch": 1.7053140096618358, "grad_norm": 0.7169251441955566, "learning_rate": 0.001, "loss": 2.4836, "step": 19768 }, { "epoch": 1.710144927536232, "grad_norm": 0.47756174206733704, "learning_rate": 0.001, "loss": 2.4878, "step": 19824 }, { "epoch": 1.714975845410628, "grad_norm": 0.4913206100463867, "learning_rate": 0.001, "loss": 2.476, "step": 19880 }, { "epoch": 1.7198067632850242, "grad_norm": 1.1071933507919312, "learning_rate": 0.001, "loss": 2.4667, "step": 19936 }, { "epoch": 1.7246376811594204, "grad_norm": 0.5026872158050537, "learning_rate": 0.001, "loss": 2.4782, "step": 19992 }, { "epoch": 1.7294685990338166, "grad_norm": 0.6514714360237122, "learning_rate": 0.001, "loss": 2.4647, "step": 20048 }, { "epoch": 1.7342995169082127, "grad_norm": 0.5138649344444275, "learning_rate": 0.001, "loss": 2.467, "step": 20104 }, { "epoch": 1.7391304347826086, "grad_norm": 0.5914953947067261, "learning_rate": 0.001, "loss": 2.4651, "step": 20160 }, { "epoch": 1.7439613526570048, "grad_norm": 0.5537645816802979, "learning_rate": 0.001, "loss": 2.4685, "step": 20216 }, { "epoch": 1.748792270531401, "grad_norm": 1.2123079299926758, "learning_rate": 0.001, "loss": 2.462, "step": 20272 }, { "epoch": 1.7536231884057971, "grad_norm": 0.7601057291030884, "learning_rate": 0.001, "loss": 2.4921, "step": 20328 }, { "epoch": 1.7584541062801933, "grad_norm": 2.7119219303131104, "learning_rate": 0.001, "loss": 2.5011, "step": 20384 }, { "epoch": 1.7632850241545892, "grad_norm": 0.4809827208518982, "learning_rate": 0.001, "loss": 2.4822, "step": 20440 }, { "epoch": 1.7681159420289854, "grad_norm": 0.5912278294563293, "learning_rate": 0.001, "loss": 2.5019, "step": 20496 }, { "epoch": 1.7729468599033815, "grad_norm": 1.2461860179901123, "learning_rate": 0.001, "loss": 2.4881, "step": 20552 }, { "epoch": 1.7777777777777777, "grad_norm": 0.6390478014945984, "learning_rate": 0.001, "loss": 2.4783, "step": 20608 }, { "epoch": 1.7826086956521738, "grad_norm": 0.5740552544593811, "learning_rate": 0.001, "loss": 2.5142, "step": 20664 }, { "epoch": 1.78743961352657, "grad_norm": 1.578294277191162, "learning_rate": 0.001, "loss": 2.4988, "step": 20720 }, { "epoch": 1.7922705314009661, "grad_norm": 0.613778829574585, "learning_rate": 0.001, "loss": 2.4975, "step": 20776 }, { "epoch": 1.7971014492753623, "grad_norm": 0.6002821326255798, "learning_rate": 0.001, "loss": 2.4776, "step": 20832 }, { "epoch": 1.8019323671497585, "grad_norm": 0.7093578577041626, "learning_rate": 0.001, "loss": 2.4774, "step": 20888 }, { "epoch": 1.8067632850241546, "grad_norm": 0.5581182241439819, "learning_rate": 0.001, "loss": 2.4622, "step": 20944 }, { "epoch": 1.8115942028985508, "grad_norm": 0.7352020144462585, "learning_rate": 0.001, "loss": 2.4627, "step": 21000 }, { "epoch": 1.816425120772947, "grad_norm": 2.4271843433380127, "learning_rate": 0.001, "loss": 2.4801, "step": 21056 }, { "epoch": 1.821256038647343, "grad_norm": 0.5340695977210999, "learning_rate": 0.001, "loss": 2.4902, "step": 21112 }, { "epoch": 1.8260869565217392, "grad_norm": 0.46885180473327637, "learning_rate": 0.001, "loss": 2.4881, "step": 21168 }, { "epoch": 1.8309178743961354, "grad_norm": 1.0845648050308228, "learning_rate": 0.001, "loss": 2.4685, "step": 21224 }, { "epoch": 1.8357487922705316, "grad_norm": 1.0396238565444946, "learning_rate": 0.001, "loss": 2.465, "step": 21280 }, { "epoch": 1.8405797101449275, "grad_norm": 0.8950973153114319, "learning_rate": 0.001, "loss": 2.4743, "step": 21336 }, { "epoch": 1.8454106280193237, "grad_norm": 0.9389447569847107, "learning_rate": 0.001, "loss": 2.4888, "step": 21392 }, { "epoch": 1.8502415458937198, "grad_norm": 0.5466434955596924, "learning_rate": 0.001, "loss": 2.4757, "step": 21448 }, { "epoch": 1.855072463768116, "grad_norm": 2.5765113830566406, "learning_rate": 0.001, "loss": 2.4701, "step": 21504 }, { "epoch": 1.8599033816425121, "grad_norm": 0.6471238732337952, "learning_rate": 0.001, "loss": 2.4813, "step": 21560 }, { "epoch": 1.864734299516908, "grad_norm": 3.128875970840454, "learning_rate": 0.001, "loss": 2.4798, "step": 21616 }, { "epoch": 1.8695652173913042, "grad_norm": 1.321737289428711, "learning_rate": 0.001, "loss": 2.4852, "step": 21672 }, { "epoch": 1.8743961352657004, "grad_norm": 3.308558702468872, "learning_rate": 0.001, "loss": 2.4896, "step": 21728 }, { "epoch": 1.8792270531400965, "grad_norm": 0.8100047707557678, "learning_rate": 0.001, "loss": 2.4785, "step": 21784 }, { "epoch": 1.8840579710144927, "grad_norm": 2.5754692554473877, "learning_rate": 0.001, "loss": 2.4809, "step": 21840 }, { "epoch": 1.8888888888888888, "grad_norm": 5.025903224945068, "learning_rate": 0.001, "loss": 2.4905, "step": 21896 }, { "epoch": 1.893719806763285, "grad_norm": 0.5873627066612244, "learning_rate": 0.001, "loss": 2.48, "step": 21952 }, { "epoch": 1.8985507246376812, "grad_norm": 0.41663891077041626, "learning_rate": 0.001, "loss": 2.4703, "step": 22008 }, { "epoch": 1.9033816425120773, "grad_norm": 2.2055370807647705, "learning_rate": 0.001, "loss": 2.4637, "step": 22064 }, { "epoch": 1.9082125603864735, "grad_norm": 0.5622344017028809, "learning_rate": 0.001, "loss": 2.4748, "step": 22120 }, { "epoch": 1.9130434782608696, "grad_norm": 0.4542618691921234, "learning_rate": 0.001, "loss": 2.4666, "step": 22176 }, { "epoch": 1.9178743961352658, "grad_norm": 0.974029541015625, "learning_rate": 0.001, "loss": 2.4585, "step": 22232 }, { "epoch": 1.922705314009662, "grad_norm": 0.6217877268791199, "learning_rate": 0.001, "loss": 2.4649, "step": 22288 }, { "epoch": 1.927536231884058, "grad_norm": 0.5779778361320496, "learning_rate": 0.001, "loss": 2.4563, "step": 22344 }, { "epoch": 1.9323671497584543, "grad_norm": 0.48157015442848206, "learning_rate": 0.001, "loss": 2.4602, "step": 22400 }, { "epoch": 1.9371980676328504, "grad_norm": 0.6020475625991821, "learning_rate": 0.001, "loss": 2.4505, "step": 22456 }, { "epoch": 1.9420289855072463, "grad_norm": 0.534919261932373, "learning_rate": 0.001, "loss": 2.4587, "step": 22512 }, { "epoch": 1.9468599033816425, "grad_norm": 0.5620009899139404, "learning_rate": 0.001, "loss": 2.4592, "step": 22568 }, { "epoch": 1.9516908212560387, "grad_norm": 0.9667491912841797, "learning_rate": 0.001, "loss": 2.4745, "step": 22624 }, { "epoch": 1.9565217391304348, "grad_norm": 0.9978834390640259, "learning_rate": 0.001, "loss": 2.4855, "step": 22680 }, { "epoch": 1.961352657004831, "grad_norm": 0.6425617933273315, "learning_rate": 0.001, "loss": 2.4812, "step": 22736 }, { "epoch": 1.966183574879227, "grad_norm": 1.0051997900009155, "learning_rate": 0.001, "loss": 2.4851, "step": 22792 }, { "epoch": 1.971014492753623, "grad_norm": 0.9055406451225281, "learning_rate": 0.001, "loss": 2.4946, "step": 22848 }, { "epoch": 1.9758454106280192, "grad_norm": 0.9933205842971802, "learning_rate": 0.001, "loss": 2.5218, "step": 22904 }, { "epoch": 1.9806763285024154, "grad_norm": 4.04213809967041, "learning_rate": 0.001, "loss": 2.4898, "step": 22960 }, { "epoch": 1.9855072463768115, "grad_norm": 0.6505899429321289, "learning_rate": 0.001, "loss": 2.4845, "step": 23016 }, { "epoch": 1.9903381642512077, "grad_norm": 0.9572941660881042, "learning_rate": 0.001, "loss": 2.4716, "step": 23072 }, { "epoch": 1.9951690821256038, "grad_norm": 1.3944183588027954, "learning_rate": 0.001, "loss": 2.4616, "step": 23128 }, { "epoch": 2.0, "grad_norm": 1.0966757535934448, "learning_rate": 0.001, "loss": 2.4681, "step": 23184 }, { "epoch": 2.004830917874396, "grad_norm": 1.0105741024017334, "learning_rate": 0.001, "loss": 2.4401, "step": 23240 }, { "epoch": 2.0096618357487923, "grad_norm": 0.920160174369812, "learning_rate": 0.001, "loss": 2.4322, "step": 23296 }, { "epoch": 2.0144927536231885, "grad_norm": 0.7583364844322205, "learning_rate": 0.001, "loss": 2.4339, "step": 23352 }, { "epoch": 2.0193236714975846, "grad_norm": 0.4173266887664795, "learning_rate": 0.001, "loss": 2.4171, "step": 23408 }, { "epoch": 2.024154589371981, "grad_norm": 1.54490327835083, "learning_rate": 0.001, "loss": 2.4264, "step": 23464 }, { "epoch": 2.028985507246377, "grad_norm": 52.289432525634766, "learning_rate": 0.001, "loss": 2.4346, "step": 23520 }, { "epoch": 2.033816425120773, "grad_norm": 5.7289719581604, "learning_rate": 0.001, "loss": 2.4591, "step": 23576 }, { "epoch": 2.0386473429951693, "grad_norm": 1.1728147268295288, "learning_rate": 0.001, "loss": 2.4574, "step": 23632 }, { "epoch": 2.0434782608695654, "grad_norm": 0.7576716542243958, "learning_rate": 0.001, "loss": 2.4416, "step": 23688 }, { "epoch": 2.0483091787439616, "grad_norm": 1.6649419069290161, "learning_rate": 0.001, "loss": 2.4562, "step": 23744 }, { "epoch": 2.0531400966183573, "grad_norm": 1.3794751167297363, "learning_rate": 0.001, "loss": 2.4485, "step": 23800 }, { "epoch": 2.0579710144927534, "grad_norm": 0.5330275297164917, "learning_rate": 0.001, "loss": 2.4663, "step": 23856 }, { "epoch": 2.0628019323671496, "grad_norm": 0.6258566975593567, "learning_rate": 0.001, "loss": 2.4456, "step": 23912 }, { "epoch": 2.0676328502415457, "grad_norm": 0.7583393454551697, "learning_rate": 0.001, "loss": 2.4413, "step": 23968 }, { "epoch": 2.072463768115942, "grad_norm": 1.1774613857269287, "learning_rate": 0.001, "loss": 2.4271, "step": 24024 }, { "epoch": 2.077294685990338, "grad_norm": 0.7557084560394287, "learning_rate": 0.001, "loss": 2.4344, "step": 24080 }, { "epoch": 2.082125603864734, "grad_norm": 1.0778863430023193, "learning_rate": 0.001, "loss": 2.4356, "step": 24136 }, { "epoch": 2.0869565217391304, "grad_norm": 1.0254830121994019, "learning_rate": 0.001, "loss": 2.4305, "step": 24192 }, { "epoch": 2.0917874396135265, "grad_norm": 1.5008212327957153, "learning_rate": 0.001, "loss": 2.4283, "step": 24248 }, { "epoch": 2.0966183574879227, "grad_norm": 1.0089281797409058, "learning_rate": 0.001, "loss": 2.4314, "step": 24304 }, { "epoch": 2.101449275362319, "grad_norm": 1.9808462858200073, "learning_rate": 0.001, "loss": 2.4301, "step": 24360 }, { "epoch": 2.106280193236715, "grad_norm": 0.7853475213050842, "learning_rate": 0.001, "loss": 2.4551, "step": 24416 }, { "epoch": 2.111111111111111, "grad_norm": 2.9563910961151123, "learning_rate": 0.001, "loss": 2.4392, "step": 24472 }, { "epoch": 2.1159420289855073, "grad_norm": 5.112704277038574, "learning_rate": 0.001, "loss": 2.436, "step": 24528 }, { "epoch": 2.1207729468599035, "grad_norm": 2.355679512023926, "learning_rate": 0.001, "loss": 2.4323, "step": 24584 }, { "epoch": 2.1256038647342996, "grad_norm": 1.002687692642212, "learning_rate": 0.001, "loss": 2.4337, "step": 24640 }, { "epoch": 2.130434782608696, "grad_norm": 0.48910364508628845, "learning_rate": 0.001, "loss": 2.4386, "step": 24696 }, { "epoch": 2.135265700483092, "grad_norm": 0.6243553161621094, "learning_rate": 0.001, "loss": 2.4367, "step": 24752 }, { "epoch": 2.140096618357488, "grad_norm": 0.6357074975967407, "learning_rate": 0.001, "loss": 2.4165, "step": 24808 }, { "epoch": 2.1449275362318843, "grad_norm": 0.6496347784996033, "learning_rate": 0.001, "loss": 2.4182, "step": 24864 }, { "epoch": 2.14975845410628, "grad_norm": 1.040175437927246, "learning_rate": 0.001, "loss": 2.4306, "step": 24920 }, { "epoch": 2.154589371980676, "grad_norm": 0.988701343536377, "learning_rate": 0.001, "loss": 2.4526, "step": 24976 }, { "epoch": 2.1594202898550723, "grad_norm": 0.997657060623169, "learning_rate": 0.001, "loss": 2.4393, "step": 25032 }, { "epoch": 2.1642512077294684, "grad_norm": 0.5957661271095276, "learning_rate": 0.001, "loss": 2.4464, "step": 25088 }, { "epoch": 2.1690821256038646, "grad_norm": 4.283563137054443, "learning_rate": 0.001, "loss": 2.452, "step": 25144 }, { "epoch": 2.1739130434782608, "grad_norm": 0.6921948194503784, "learning_rate": 0.001, "loss": 2.4507, "step": 25200 }, { "epoch": 2.178743961352657, "grad_norm": 3.557818651199341, "learning_rate": 0.001, "loss": 2.4363, "step": 25256 }, { "epoch": 2.183574879227053, "grad_norm": 0.8437843918800354, "learning_rate": 0.001, "loss": 2.425, "step": 25312 }, { "epoch": 2.1884057971014492, "grad_norm": 0.5580781102180481, "learning_rate": 0.001, "loss": 2.4246, "step": 25368 }, { "epoch": 2.1932367149758454, "grad_norm": 0.7081442475318909, "learning_rate": 0.001, "loss": 2.4275, "step": 25424 }, { "epoch": 2.1980676328502415, "grad_norm": 0.7513576149940491, "learning_rate": 0.001, "loss": 2.4184, "step": 25480 }, { "epoch": 2.2028985507246377, "grad_norm": 0.8365774750709534, "learning_rate": 0.001, "loss": 2.4172, "step": 25536 }, { "epoch": 2.207729468599034, "grad_norm": 0.9274559020996094, "learning_rate": 0.001, "loss": 2.4368, "step": 25592 }, { "epoch": 2.21256038647343, "grad_norm": 3.1649367809295654, "learning_rate": 0.001, "loss": 2.4443, "step": 25648 }, { "epoch": 2.217391304347826, "grad_norm": 4.661854267120361, "learning_rate": 0.001, "loss": 2.4351, "step": 25704 }, { "epoch": 2.2222222222222223, "grad_norm": 0.6997871398925781, "learning_rate": 0.001, "loss": 2.418, "step": 25760 }, { "epoch": 2.2270531400966185, "grad_norm": 0.6769877076148987, "learning_rate": 0.001, "loss": 2.4135, "step": 25816 }, { "epoch": 2.2318840579710146, "grad_norm": 0.5421584248542786, "learning_rate": 0.001, "loss": 2.4134, "step": 25872 }, { "epoch": 2.236714975845411, "grad_norm": 5.050682544708252, "learning_rate": 0.001, "loss": 2.4485, "step": 25928 }, { "epoch": 2.241545893719807, "grad_norm": 2.9767906665802, "learning_rate": 0.001, "loss": 2.4348, "step": 25984 }, { "epoch": 2.246376811594203, "grad_norm": 0.7618628144264221, "learning_rate": 0.001, "loss": 2.4202, "step": 26040 }, { "epoch": 2.2512077294685993, "grad_norm": 1.7416142225265503, "learning_rate": 0.001, "loss": 2.4208, "step": 26096 }, { "epoch": 2.2560386473429954, "grad_norm": 1.0441137552261353, "learning_rate": 0.001, "loss": 2.4213, "step": 26152 }, { "epoch": 2.260869565217391, "grad_norm": 0.8699880242347717, "learning_rate": 0.001, "loss": 2.4057, "step": 26208 }, { "epoch": 2.2657004830917873, "grad_norm": 0.8893274068832397, "learning_rate": 0.001, "loss": 2.4261, "step": 26264 }, { "epoch": 2.2705314009661834, "grad_norm": 1.335638403892517, "learning_rate": 0.001, "loss": 2.4237, "step": 26320 }, { "epoch": 2.2753623188405796, "grad_norm": 0.8254622220993042, "learning_rate": 0.001, "loss": 2.4434, "step": 26376 }, { "epoch": 2.2801932367149758, "grad_norm": 0.9731737375259399, "learning_rate": 0.001, "loss": 2.4238, "step": 26432 }, { "epoch": 2.285024154589372, "grad_norm": 0.9786226153373718, "learning_rate": 0.001, "loss": 2.4306, "step": 26488 }, { "epoch": 2.289855072463768, "grad_norm": 0.9375965595245361, "learning_rate": 0.001, "loss": 2.4184, "step": 26544 }, { "epoch": 2.2946859903381642, "grad_norm": 0.7358525395393372, "learning_rate": 0.001, "loss": 2.427, "step": 26600 }, { "epoch": 2.2995169082125604, "grad_norm": 1.1536989212036133, "learning_rate": 0.001, "loss": 2.4207, "step": 26656 }, { "epoch": 2.3043478260869565, "grad_norm": 1.206375002861023, "learning_rate": 0.001, "loss": 2.4209, "step": 26712 }, { "epoch": 2.3091787439613527, "grad_norm": 2.6680002212524414, "learning_rate": 0.001, "loss": 2.4268, "step": 26768 }, { "epoch": 2.314009661835749, "grad_norm": 2.0401179790496826, "learning_rate": 0.001, "loss": 2.4262, "step": 26824 }, { "epoch": 2.318840579710145, "grad_norm": 1.0442848205566406, "learning_rate": 0.001, "loss": 2.4438, "step": 26880 }, { "epoch": 2.323671497584541, "grad_norm": 1.3826639652252197, "learning_rate": 0.001, "loss": 2.443, "step": 26936 }, { "epoch": 2.3285024154589373, "grad_norm": 1.0294597148895264, "learning_rate": 0.001, "loss": 2.4465, "step": 26992 }, { "epoch": 2.3333333333333335, "grad_norm": 0.6414815187454224, "learning_rate": 0.001, "loss": 2.4212, "step": 27048 }, { "epoch": 2.3381642512077296, "grad_norm": 0.5294144153594971, "learning_rate": 0.001, "loss": 2.4202, "step": 27104 }, { "epoch": 2.342995169082126, "grad_norm": 0.589288055896759, "learning_rate": 0.001, "loss": 2.4226, "step": 27160 }, { "epoch": 2.3478260869565215, "grad_norm": 0.5152631998062134, "learning_rate": 0.001, "loss": 2.4139, "step": 27216 }, { "epoch": 2.3526570048309177, "grad_norm": 0.8567907810211182, "learning_rate": 0.001, "loss": 2.4279, "step": 27272 }, { "epoch": 2.357487922705314, "grad_norm": 1.1003116369247437, "learning_rate": 0.001, "loss": 2.426, "step": 27328 }, { "epoch": 2.36231884057971, "grad_norm": 0.5347234606742859, "learning_rate": 0.001, "loss": 2.4184, "step": 27384 }, { "epoch": 2.367149758454106, "grad_norm": 2.898610830307007, "learning_rate": 0.001, "loss": 2.4114, "step": 27440 }, { "epoch": 2.3719806763285023, "grad_norm": 0.6177157759666443, "learning_rate": 0.001, "loss": 2.4163, "step": 27496 }, { "epoch": 2.3768115942028984, "grad_norm": 1.2459274530410767, "learning_rate": 0.001, "loss": 2.4205, "step": 27552 }, { "epoch": 2.3816425120772946, "grad_norm": 0.676740288734436, "learning_rate": 0.001, "loss": 2.4079, "step": 27608 }, { "epoch": 2.3864734299516908, "grad_norm": 4.750284194946289, "learning_rate": 0.001, "loss": 2.4174, "step": 27664 }, { "epoch": 2.391304347826087, "grad_norm": 0.7142317891120911, "learning_rate": 0.001, "loss": 2.4184, "step": 27720 }, { "epoch": 2.396135265700483, "grad_norm": 0.7198566794395447, "learning_rate": 0.001, "loss": 2.4104, "step": 27776 }, { "epoch": 2.4009661835748792, "grad_norm": 1.5782791376113892, "learning_rate": 0.001, "loss": 2.4156, "step": 27832 }, { "epoch": 2.4057971014492754, "grad_norm": 0.6034999489784241, "learning_rate": 0.001, "loss": 2.4137, "step": 27888 }, { "epoch": 2.4106280193236715, "grad_norm": 0.803419828414917, "learning_rate": 0.001, "loss": 2.4355, "step": 27944 }, { "epoch": 2.4154589371980677, "grad_norm": 0.4411523938179016, "learning_rate": 0.001, "loss": 2.4033, "step": 28000 }, { "epoch": 2.420289855072464, "grad_norm": 0.7060615420341492, "learning_rate": 0.001, "loss": 2.4017, "step": 28056 }, { "epoch": 2.42512077294686, "grad_norm": 1.1304291486740112, "learning_rate": 0.001, "loss": 2.4224, "step": 28112 }, { "epoch": 2.429951690821256, "grad_norm": 1.5663508176803589, "learning_rate": 0.001, "loss": 2.4447, "step": 28168 }, { "epoch": 2.4347826086956523, "grad_norm": 0.8626444339752197, "learning_rate": 0.001, "loss": 2.4373, "step": 28224 }, { "epoch": 2.4396135265700485, "grad_norm": 1.1484856605529785, "learning_rate": 0.001, "loss": 2.4197, "step": 28280 }, { "epoch": 2.4444444444444446, "grad_norm": 4.306599140167236, "learning_rate": 0.001, "loss": 2.4194, "step": 28336 }, { "epoch": 2.449275362318841, "grad_norm": 1.4168084859848022, "learning_rate": 0.001, "loss": 2.4192, "step": 28392 }, { "epoch": 2.454106280193237, "grad_norm": 0.7608858942985535, "learning_rate": 0.001, "loss": 2.4183, "step": 28448 }, { "epoch": 2.4589371980676327, "grad_norm": 0.8891749382019043, "learning_rate": 0.001, "loss": 2.4234, "step": 28504 }, { "epoch": 2.463768115942029, "grad_norm": 1.1063004732131958, "learning_rate": 0.001, "loss": 2.4095, "step": 28560 }, { "epoch": 2.468599033816425, "grad_norm": 1.788999319076538, "learning_rate": 0.001, "loss": 2.4042, "step": 28616 }, { "epoch": 2.473429951690821, "grad_norm": 10.093421936035156, "learning_rate": 0.001, "loss": 2.4096, "step": 28672 }, { "epoch": 2.4782608695652173, "grad_norm": 0.8079579472541809, "learning_rate": 0.001, "loss": 2.4206, "step": 28728 }, { "epoch": 2.4830917874396135, "grad_norm": 1.0279505252838135, "learning_rate": 0.001, "loss": 2.4228, "step": 28784 }, { "epoch": 2.4879227053140096, "grad_norm": 0.5833010077476501, "learning_rate": 0.001, "loss": 2.4188, "step": 28840 }, { "epoch": 2.4927536231884058, "grad_norm": 1.3775608539581299, "learning_rate": 0.001, "loss": 2.4335, "step": 28896 }, { "epoch": 2.497584541062802, "grad_norm": 1.3201850652694702, "learning_rate": 0.001, "loss": 2.4225, "step": 28952 }, { "epoch": 2.502415458937198, "grad_norm": 0.9545646905899048, "learning_rate": 0.001, "loss": 2.4268, "step": 29008 }, { "epoch": 2.5072463768115942, "grad_norm": 1.4709309339523315, "learning_rate": 0.001, "loss": 2.421, "step": 29064 }, { "epoch": 2.5120772946859904, "grad_norm": 1.4570496082305908, "learning_rate": 0.001, "loss": 2.4136, "step": 29120 }, { "epoch": 2.5169082125603865, "grad_norm": 0.827422022819519, "learning_rate": 0.001, "loss": 2.4117, "step": 29176 }, { "epoch": 2.5217391304347827, "grad_norm": 4.7756829261779785, "learning_rate": 0.001, "loss": 2.4091, "step": 29232 }, { "epoch": 2.526570048309179, "grad_norm": 0.78351891040802, "learning_rate": 0.001, "loss": 2.4321, "step": 29288 }, { "epoch": 2.531400966183575, "grad_norm": 1.6681420803070068, "learning_rate": 0.001, "loss": 2.4213, "step": 29344 }, { "epoch": 2.536231884057971, "grad_norm": 0.878593921661377, "learning_rate": 0.001, "loss": 2.4263, "step": 29400 }, { "epoch": 2.541062801932367, "grad_norm": 1.1612260341644287, "learning_rate": 0.001, "loss": 2.4086, "step": 29456 }, { "epoch": 2.545893719806763, "grad_norm": 0.6242434978485107, "learning_rate": 0.001, "loss": 2.4028, "step": 29512 }, { "epoch": 2.550724637681159, "grad_norm": 0.6720430254936218, "learning_rate": 0.001, "loss": 2.4017, "step": 29568 }, { "epoch": 2.5555555555555554, "grad_norm": 1.0026371479034424, "learning_rate": 0.001, "loss": 2.39, "step": 29624 }, { "epoch": 2.5603864734299515, "grad_norm": 1.4545741081237793, "learning_rate": 0.001, "loss": 2.3999, "step": 29680 }, { "epoch": 2.5652173913043477, "grad_norm": 1.666398286819458, "learning_rate": 0.001, "loss": 2.4321, "step": 29736 }, { "epoch": 2.570048309178744, "grad_norm": 1.4893743991851807, "learning_rate": 0.001, "loss": 2.4469, "step": 29792 }, { "epoch": 2.57487922705314, "grad_norm": 1.9381307363510132, "learning_rate": 0.001, "loss": 2.4638, "step": 29848 }, { "epoch": 2.579710144927536, "grad_norm": 3.441112518310547, "learning_rate": 0.001, "loss": 2.4324, "step": 29904 }, { "epoch": 2.5845410628019323, "grad_norm": 0.7186459302902222, "learning_rate": 0.001, "loss": 2.4248, "step": 29960 }, { "epoch": 2.5893719806763285, "grad_norm": 2.555569648742676, "learning_rate": 0.001, "loss": 2.4287, "step": 30016 }, { "epoch": 2.5942028985507246, "grad_norm": 3.84259033203125, "learning_rate": 0.001, "loss": 2.4291, "step": 30072 }, { "epoch": 2.5990338164251208, "grad_norm": 1.7482112646102905, "learning_rate": 0.001, "loss": 2.4289, "step": 30128 }, { "epoch": 2.603864734299517, "grad_norm": 0.4381943643093109, "learning_rate": 0.001, "loss": 2.4185, "step": 30184 }, { "epoch": 2.608695652173913, "grad_norm": 1.1325384378433228, "learning_rate": 0.001, "loss": 2.4051, "step": 30240 }, { "epoch": 2.6135265700483092, "grad_norm": 1.4725890159606934, "learning_rate": 0.001, "loss": 2.4256, "step": 30296 }, { "epoch": 2.6183574879227054, "grad_norm": 0.6949960589408875, "learning_rate": 0.001, "loss": 2.424, "step": 30352 }, { "epoch": 2.6231884057971016, "grad_norm": 1.008844017982483, "learning_rate": 0.001, "loss": 2.4252, "step": 30408 }, { "epoch": 2.6280193236714977, "grad_norm": 0.7304251790046692, "learning_rate": 0.001, "loss": 2.4212, "step": 30464 }, { "epoch": 2.632850241545894, "grad_norm": 1.120819330215454, "learning_rate": 0.001, "loss": 2.4291, "step": 30520 }, { "epoch": 2.63768115942029, "grad_norm": 1.4787769317626953, "learning_rate": 0.001, "loss": 2.4368, "step": 30576 }, { "epoch": 2.642512077294686, "grad_norm": 2.0001964569091797, "learning_rate": 0.001, "loss": 2.4326, "step": 30632 }, { "epoch": 2.6473429951690823, "grad_norm": 0.625577449798584, "learning_rate": 0.001, "loss": 2.4416, "step": 30688 }, { "epoch": 2.6521739130434785, "grad_norm": 0.8403634428977966, "learning_rate": 0.001, "loss": 2.4254, "step": 30744 }, { "epoch": 2.6570048309178746, "grad_norm": 0.8532978296279907, "learning_rate": 0.001, "loss": 2.4213, "step": 30800 }, { "epoch": 2.661835748792271, "grad_norm": 1.9018747806549072, "learning_rate": 0.001, "loss": 2.4101, "step": 30856 }, { "epoch": 2.6666666666666665, "grad_norm": 0.7936710715293884, "learning_rate": 0.001, "loss": 2.417, "step": 30912 }, { "epoch": 2.6714975845410627, "grad_norm": 1.145591139793396, "learning_rate": 0.001, "loss": 2.4236, "step": 30968 }, { "epoch": 2.676328502415459, "grad_norm": 1.7701199054718018, "learning_rate": 0.001, "loss": 2.426, "step": 31024 }, { "epoch": 2.681159420289855, "grad_norm": 2.0457608699798584, "learning_rate": 0.001, "loss": 2.4161, "step": 31080 }, { "epoch": 2.685990338164251, "grad_norm": 2.239499807357788, "learning_rate": 0.001, "loss": 2.4072, "step": 31136 }, { "epoch": 2.6908212560386473, "grad_norm": 1.0082460641860962, "learning_rate": 0.001, "loss": 2.418, "step": 31192 }, { "epoch": 2.6956521739130435, "grad_norm": 1.1277071237564087, "learning_rate": 0.001, "loss": 2.4139, "step": 31248 }, { "epoch": 2.7004830917874396, "grad_norm": 1.4032684564590454, "learning_rate": 0.001, "loss": 2.4174, "step": 31304 }, { "epoch": 2.7053140096618358, "grad_norm": 0.9336313605308533, "learning_rate": 0.001, "loss": 2.4108, "step": 31360 }, { "epoch": 2.710144927536232, "grad_norm": 1.1663310527801514, "learning_rate": 0.001, "loss": 2.4309, "step": 31416 }, { "epoch": 2.714975845410628, "grad_norm": 0.6759818196296692, "learning_rate": 0.001, "loss": 2.4193, "step": 31472 }, { "epoch": 2.7198067632850242, "grad_norm": 0.6269552111625671, "learning_rate": 0.001, "loss": 2.4199, "step": 31528 }, { "epoch": 2.7246376811594204, "grad_norm": 0.4199528098106384, "learning_rate": 0.001, "loss": 2.402, "step": 31584 }, { "epoch": 2.7294685990338166, "grad_norm": 0.5285377502441406, "learning_rate": 0.001, "loss": 2.4001, "step": 31640 }, { "epoch": 2.7342995169082127, "grad_norm": 9.366227149963379, "learning_rate": 0.001, "loss": 2.3955, "step": 31696 }, { "epoch": 2.7391304347826084, "grad_norm": 1.677487850189209, "learning_rate": 0.001, "loss": 2.4229, "step": 31752 }, { "epoch": 2.7439613526570046, "grad_norm": 1.4587961435317993, "learning_rate": 0.001, "loss": 2.4199, "step": 31808 }, { "epoch": 2.7487922705314007, "grad_norm": 2.9107236862182617, "learning_rate": 0.001, "loss": 2.4217, "step": 31864 }, { "epoch": 2.753623188405797, "grad_norm": 0.9053725600242615, "learning_rate": 0.001, "loss": 2.4047, "step": 31920 }, { "epoch": 2.758454106280193, "grad_norm": 2.644174098968506, "learning_rate": 0.001, "loss": 2.4136, "step": 31976 }, { "epoch": 2.763285024154589, "grad_norm": 6.017156600952148, "learning_rate": 0.001, "loss": 2.4273, "step": 32032 }, { "epoch": 2.7681159420289854, "grad_norm": 1.0742233991622925, "learning_rate": 0.001, "loss": 2.4398, "step": 32088 }, { "epoch": 2.7729468599033815, "grad_norm": 1.4463682174682617, "learning_rate": 0.001, "loss": 2.4396, "step": 32144 }, { "epoch": 2.7777777777777777, "grad_norm": 2.0621907711029053, "learning_rate": 0.001, "loss": 2.4205, "step": 32200 }, { "epoch": 2.782608695652174, "grad_norm": 1.5583444833755493, "learning_rate": 0.001, "loss": 2.4205, "step": 32256 }, { "epoch": 2.78743961352657, "grad_norm": 1.4091781377792358, "learning_rate": 0.001, "loss": 2.4293, "step": 32312 }, { "epoch": 2.792270531400966, "grad_norm": 2.341855049133301, "learning_rate": 0.001, "loss": 2.4186, "step": 32368 }, { "epoch": 2.7971014492753623, "grad_norm": 2.265354633331299, "learning_rate": 0.001, "loss": 2.4147, "step": 32424 }, { "epoch": 2.8019323671497585, "grad_norm": 0.8014535903930664, "learning_rate": 0.001, "loss": 2.4212, "step": 32480 }, { "epoch": 2.8067632850241546, "grad_norm": 1.4704686403274536, "learning_rate": 0.001, "loss": 2.408, "step": 32536 }, { "epoch": 2.8115942028985508, "grad_norm": 1.1337703466415405, "learning_rate": 0.001, "loss": 2.4283, "step": 32592 }, { "epoch": 2.816425120772947, "grad_norm": 1.2696563005447388, "learning_rate": 0.001, "loss": 2.4296, "step": 32648 }, { "epoch": 2.821256038647343, "grad_norm": 2.815958261489868, "learning_rate": 0.001, "loss": 2.4238, "step": 32704 }, { "epoch": 2.8260869565217392, "grad_norm": 1.2532445192337036, "learning_rate": 0.001, "loss": 2.4316, "step": 32760 }, { "epoch": 2.8309178743961354, "grad_norm": 1.9834295511245728, "learning_rate": 0.001, "loss": 2.4249, "step": 32816 }, { "epoch": 2.8357487922705316, "grad_norm": 1.0654997825622559, "learning_rate": 0.001, "loss": 2.4156, "step": 32872 }, { "epoch": 2.8405797101449277, "grad_norm": 2.375105619430542, "learning_rate": 0.001, "loss": 2.4218, "step": 32928 }, { "epoch": 2.845410628019324, "grad_norm": 0.7590191960334778, "learning_rate": 0.001, "loss": 2.4053, "step": 32984 }, { "epoch": 2.85024154589372, "grad_norm": 0.9580590128898621, "learning_rate": 0.001, "loss": 2.4126, "step": 33040 }, { "epoch": 2.855072463768116, "grad_norm": 1.3444570302963257, "learning_rate": 0.001, "loss": 2.4201, "step": 33096 }, { "epoch": 2.8599033816425123, "grad_norm": 1.0784677267074585, "learning_rate": 0.001, "loss": 2.4255, "step": 33152 }, { "epoch": 2.864734299516908, "grad_norm": 1.7011573314666748, "learning_rate": 0.001, "loss": 2.4174, "step": 33208 }, { "epoch": 2.869565217391304, "grad_norm": 0.7227048873901367, "learning_rate": 0.001, "loss": 2.4227, "step": 33264 }, { "epoch": 2.8743961352657004, "grad_norm": 1.003725290298462, "learning_rate": 0.001, "loss": 2.4237, "step": 33320 }, { "epoch": 2.8792270531400965, "grad_norm": 3.9613237380981445, "learning_rate": 0.001, "loss": 2.4162, "step": 33376 }, { "epoch": 2.8840579710144927, "grad_norm": 2.6184682846069336, "learning_rate": 0.001, "loss": 2.4105, "step": 33432 }, { "epoch": 2.888888888888889, "grad_norm": 1.3031649589538574, "learning_rate": 0.001, "loss": 2.4192, "step": 33488 }, { "epoch": 2.893719806763285, "grad_norm": 9.451916694641113, "learning_rate": 0.001, "loss": 2.4395, "step": 33544 }, { "epoch": 2.898550724637681, "grad_norm": 1.3001573085784912, "learning_rate": 0.001, "loss": 2.4331, "step": 33600 }, { "epoch": 2.9033816425120773, "grad_norm": 0.5848278999328613, "learning_rate": 0.001, "loss": 2.4337, "step": 33656 }, { "epoch": 2.9082125603864735, "grad_norm": 0.8421841263771057, "learning_rate": 0.001, "loss": 2.436, "step": 33712 }, { "epoch": 2.9130434782608696, "grad_norm": 1.9961779117584229, "learning_rate": 0.001, "loss": 2.4331, "step": 33768 }, { "epoch": 2.917874396135266, "grad_norm": 2.2637486457824707, "learning_rate": 0.001, "loss": 2.4234, "step": 33824 }, { "epoch": 2.922705314009662, "grad_norm": 0.6603698134422302, "learning_rate": 0.001, "loss": 2.4233, "step": 33880 }, { "epoch": 2.927536231884058, "grad_norm": 1.0390866994857788, "learning_rate": 0.001, "loss": 2.4183, "step": 33936 }, { "epoch": 2.9323671497584543, "grad_norm": 1.7142407894134521, "learning_rate": 0.001, "loss": 2.435, "step": 33992 }, { "epoch": 2.9371980676328504, "grad_norm": 0.8366157412528992, "learning_rate": 0.001, "loss": 2.4312, "step": 34048 }, { "epoch": 2.942028985507246, "grad_norm": 2.7395501136779785, "learning_rate": 0.001, "loss": 2.421, "step": 34104 }, { "epoch": 2.9468599033816423, "grad_norm": 6.661783218383789, "learning_rate": 0.001, "loss": 2.4213, "step": 34160 }, { "epoch": 2.9516908212560384, "grad_norm": 1.2035876512527466, "learning_rate": 0.001, "loss": 2.4559, "step": 34216 }, { "epoch": 2.9565217391304346, "grad_norm": 14.095723152160645, "learning_rate": 0.001, "loss": 2.4303, "step": 34272 }, { "epoch": 2.9613526570048307, "grad_norm": 1.9609922170639038, "learning_rate": 0.001, "loss": 2.4221, "step": 34328 }, { "epoch": 2.966183574879227, "grad_norm": 2.0201287269592285, "learning_rate": 0.001, "loss": 2.4337, "step": 34384 }, { "epoch": 2.971014492753623, "grad_norm": 0.8179659247398376, "learning_rate": 0.001, "loss": 2.4274, "step": 34440 }, { "epoch": 2.975845410628019, "grad_norm": 0.998927891254425, "learning_rate": 0.001, "loss": 2.4182, "step": 34496 }, { "epoch": 2.9806763285024154, "grad_norm": 1.5369782447814941, "learning_rate": 0.001, "loss": 2.404, "step": 34552 }, { "epoch": 2.9855072463768115, "grad_norm": 0.8678004145622253, "learning_rate": 0.001, "loss": 2.4057, "step": 34608 }, { "epoch": 2.9903381642512077, "grad_norm": 5.1712260246276855, "learning_rate": 0.001, "loss": 2.4016, "step": 34664 }, { "epoch": 2.995169082125604, "grad_norm": 3.1465790271759033, "learning_rate": 0.001, "loss": 2.4078, "step": 34720 }, { "epoch": 3.0, "grad_norm": 1.173697829246521, "learning_rate": 0.001, "loss": 2.4113, "step": 34776 }, { "epoch": 3.004830917874396, "grad_norm": 0.6932175159454346, "learning_rate": 0.001, "loss": 2.3669, "step": 34832 }, { "epoch": 3.0096618357487923, "grad_norm": 0.6690827012062073, "learning_rate": 0.001, "loss": 2.3564, "step": 34888 }, { "epoch": 3.0144927536231885, "grad_norm": 1.135644793510437, "learning_rate": 0.001, "loss": 2.3536, "step": 34944 }, { "epoch": 3.0193236714975846, "grad_norm": 0.9978145956993103, "learning_rate": 0.001, "loss": 2.3597, "step": 35000 }, { "epoch": 3.024154589371981, "grad_norm": 1.3222076892852783, "learning_rate": 0.001, "loss": 2.3672, "step": 35056 }, { "epoch": 3.028985507246377, "grad_norm": 1.1678580045700073, "learning_rate": 0.001, "loss": 2.3754, "step": 35112 }, { "epoch": 3.033816425120773, "grad_norm": 1.3769093751907349, "learning_rate": 0.001, "loss": 2.3743, "step": 35168 }, { "epoch": 3.0386473429951693, "grad_norm": 5.447660446166992, "learning_rate": 0.001, "loss": 2.391, "step": 35224 }, { "epoch": 3.0434782608695654, "grad_norm": 0.9091969728469849, "learning_rate": 0.001, "loss": 2.3728, "step": 35280 }, { "epoch": 3.0483091787439616, "grad_norm": 2.0814619064331055, "learning_rate": 0.001, "loss": 2.3758, "step": 35336 }, { "epoch": 3.0531400966183573, "grad_norm": 5.833859443664551, "learning_rate": 0.001, "loss": 2.3669, "step": 35392 }, { "epoch": 3.0579710144927534, "grad_norm": 1.9732599258422852, "learning_rate": 0.001, "loss": 2.3788, "step": 35448 }, { "epoch": 3.0628019323671496, "grad_norm": 2.450575828552246, "learning_rate": 0.001, "loss": 2.384, "step": 35504 }, { "epoch": 3.0676328502415457, "grad_norm": 25.521682739257812, "learning_rate": 0.001, "loss": 2.376, "step": 35560 }, { "epoch": 3.072463768115942, "grad_norm": 1.3444690704345703, "learning_rate": 0.001, "loss": 2.3685, "step": 35616 }, { "epoch": 3.077294685990338, "grad_norm": 1.0060046911239624, "learning_rate": 0.001, "loss": 2.3669, "step": 35672 }, { "epoch": 3.082125603864734, "grad_norm": 0.6015172600746155, "learning_rate": 0.001, "loss": 2.3713, "step": 35728 }, { "epoch": 3.0869565217391304, "grad_norm": 0.646270215511322, "learning_rate": 0.001, "loss": 2.3624, "step": 35784 }, { "epoch": 3.0917874396135265, "grad_norm": 0.7399847507476807, "learning_rate": 0.001, "loss": 2.3608, "step": 35840 }, { "epoch": 3.0966183574879227, "grad_norm": 2.9859659671783447, "learning_rate": 0.001, "loss": 2.3609, "step": 35896 }, { "epoch": 3.101449275362319, "grad_norm": 1.0483369827270508, "learning_rate": 0.001, "loss": 2.3751, "step": 35952 }, { "epoch": 3.106280193236715, "grad_norm": 2.3558247089385986, "learning_rate": 0.001, "loss": 2.3808, "step": 36008 }, { "epoch": 3.111111111111111, "grad_norm": 0.7647687792778015, "learning_rate": 0.001, "loss": 2.3737, "step": 36064 }, { "epoch": 3.1159420289855073, "grad_norm": 1.0547163486480713, "learning_rate": 0.001, "loss": 2.3622, "step": 36120 }, { "epoch": 3.1207729468599035, "grad_norm": 0.7350274920463562, "learning_rate": 0.001, "loss": 2.3519, "step": 36176 }, { "epoch": 3.1256038647342996, "grad_norm": 1.322343349456787, "learning_rate": 0.001, "loss": 2.3671, "step": 36232 }, { "epoch": 3.130434782608696, "grad_norm": 1.419541835784912, "learning_rate": 0.001, "loss": 2.3643, "step": 36288 }, { "epoch": 3.135265700483092, "grad_norm": 0.640250563621521, "learning_rate": 0.001, "loss": 2.356, "step": 36344 }, { "epoch": 3.140096618357488, "grad_norm": 1.2903010845184326, "learning_rate": 0.001, "loss": 2.3529, "step": 36400 }, { "epoch": 3.1449275362318843, "grad_norm": 0.8562397956848145, "learning_rate": 0.001, "loss": 2.3652, "step": 36456 }, { "epoch": 3.14975845410628, "grad_norm": 1.3351072072982788, "learning_rate": 0.001, "loss": 2.3707, "step": 36512 }, { "epoch": 3.154589371980676, "grad_norm": 1.0129743814468384, "learning_rate": 0.001, "loss": 2.3673, "step": 36568 }, { "epoch": 3.1594202898550723, "grad_norm": 1.540778636932373, "learning_rate": 0.001, "loss": 2.3822, "step": 36624 }, { "epoch": 3.1642512077294684, "grad_norm": 1.474141240119934, "learning_rate": 0.001, "loss": 2.394, "step": 36680 }, { "epoch": 3.1690821256038646, "grad_norm": 1.1241166591644287, "learning_rate": 0.001, "loss": 2.3829, "step": 36736 }, { "epoch": 3.1739130434782608, "grad_norm": 1.6276636123657227, "learning_rate": 0.001, "loss": 2.4077, "step": 36792 }, { "epoch": 3.178743961352657, "grad_norm": 3.3927533626556396, "learning_rate": 0.001, "loss": 2.3993, "step": 36848 }, { "epoch": 3.183574879227053, "grad_norm": 2.856873035430908, "learning_rate": 0.001, "loss": 2.3929, "step": 36904 }, { "epoch": 3.1884057971014492, "grad_norm": 0.8577069044113159, "learning_rate": 0.001, "loss": 2.3923, "step": 36960 }, { "epoch": 3.1932367149758454, "grad_norm": 1.7790100574493408, "learning_rate": 0.001, "loss": 2.3837, "step": 37016 }, { "epoch": 3.1980676328502415, "grad_norm": 1.3106088638305664, "learning_rate": 0.001, "loss": 2.3818, "step": 37072 }, { "epoch": 3.2028985507246377, "grad_norm": 3.6986899375915527, "learning_rate": 0.001, "loss": 2.3823, "step": 37128 }, { "epoch": 3.207729468599034, "grad_norm": 0.8908578157424927, "learning_rate": 0.001, "loss": 2.382, "step": 37184 }, { "epoch": 3.21256038647343, "grad_norm": 1.2004361152648926, "learning_rate": 0.001, "loss": 2.3904, "step": 37240 }, { "epoch": 3.217391304347826, "grad_norm": 1.119553565979004, "learning_rate": 0.001, "loss": 2.3771, "step": 37296 }, { "epoch": 3.2222222222222223, "grad_norm": 1.9667686223983765, "learning_rate": 0.001, "loss": 2.39, "step": 37352 }, { "epoch": 3.2270531400966185, "grad_norm": 6.152589797973633, "learning_rate": 0.001, "loss": 2.3972, "step": 37408 }, { "epoch": 3.2318840579710146, "grad_norm": 1.4891480207443237, "learning_rate": 0.001, "loss": 2.3827, "step": 37464 }, { "epoch": 3.236714975845411, "grad_norm": 0.7430106997489929, "learning_rate": 0.001, "loss": 2.3867, "step": 37520 }, { "epoch": 3.241545893719807, "grad_norm": 3.5744717121124268, "learning_rate": 0.001, "loss": 2.3889, "step": 37576 }, { "epoch": 3.246376811594203, "grad_norm": 0.8001721501350403, "learning_rate": 0.001, "loss": 2.3608, "step": 37632 }, { "epoch": 3.2512077294685993, "grad_norm": 5.070853233337402, "learning_rate": 0.001, "loss": 2.3563, "step": 37688 }, { "epoch": 3.2560386473429954, "grad_norm": 0.9352041482925415, "learning_rate": 0.001, "loss": 2.3497, "step": 37744 }, { "epoch": 3.260869565217391, "grad_norm": 0.6637979745864868, "learning_rate": 0.001, "loss": 2.3715, "step": 37800 }, { "epoch": 3.2657004830917873, "grad_norm": 1.2793562412261963, "learning_rate": 0.001, "loss": 2.3667, "step": 37856 }, { "epoch": 3.2705314009661834, "grad_norm": 2.063894271850586, "learning_rate": 0.001, "loss": 2.3751, "step": 37912 }, { "epoch": 3.2753623188405796, "grad_norm": 1.5859637260437012, "learning_rate": 0.001, "loss": 2.3676, "step": 37968 }, { "epoch": 3.2801932367149758, "grad_norm": 1.7910484075546265, "learning_rate": 0.001, "loss": 2.3684, "step": 38024 }, { "epoch": 3.285024154589372, "grad_norm": 0.799378514289856, "learning_rate": 0.001, "loss": 2.3761, "step": 38080 }, { "epoch": 3.289855072463768, "grad_norm": 4.270195484161377, "learning_rate": 0.001, "loss": 2.3861, "step": 38136 }, { "epoch": 3.2946859903381642, "grad_norm": 1.1123530864715576, "learning_rate": 0.001, "loss": 2.377, "step": 38192 }, { "epoch": 3.2995169082125604, "grad_norm": 12.888733863830566, "learning_rate": 0.001, "loss": 2.3744, "step": 38248 }, { "epoch": 3.3043478260869565, "grad_norm": 1.509628176689148, "learning_rate": 0.001, "loss": 2.3722, "step": 38304 }, { "epoch": 3.3091787439613527, "grad_norm": 3.0714049339294434, "learning_rate": 0.001, "loss": 2.3778, "step": 38360 }, { "epoch": 3.314009661835749, "grad_norm": 1.1967629194259644, "learning_rate": 0.001, "loss": 2.3735, "step": 38416 }, { "epoch": 3.318840579710145, "grad_norm": 19.75279998779297, "learning_rate": 0.001, "loss": 2.3656, "step": 38472 }, { "epoch": 3.323671497584541, "grad_norm": 1.0594563484191895, "learning_rate": 0.001, "loss": 2.3639, "step": 38528 }, { "epoch": 3.3285024154589373, "grad_norm": 3.912726879119873, "learning_rate": 0.001, "loss": 2.3538, "step": 38584 }, { "epoch": 3.3333333333333335, "grad_norm": 1.6983665227890015, "learning_rate": 0.001, "loss": 2.3834, "step": 38640 }, { "epoch": 3.3381642512077296, "grad_norm": 1.1165878772735596, "learning_rate": 0.001, "loss": 2.3839, "step": 38696 }, { "epoch": 3.342995169082126, "grad_norm": 4.868439674377441, "learning_rate": 0.001, "loss": 2.3871, "step": 38752 }, { "epoch": 3.3478260869565215, "grad_norm": 0.9852940440177917, "learning_rate": 0.001, "loss": 2.3875, "step": 38808 }, { "epoch": 3.3526570048309177, "grad_norm": 1.9336177110671997, "learning_rate": 0.001, "loss": 2.3655, "step": 38864 }, { "epoch": 3.357487922705314, "grad_norm": 2.272597074508667, "learning_rate": 0.001, "loss": 2.3883, "step": 38920 }, { "epoch": 3.36231884057971, "grad_norm": 0.9089294075965881, "learning_rate": 0.001, "loss": 2.3731, "step": 38976 }, { "epoch": 3.367149758454106, "grad_norm": 1.0074656009674072, "learning_rate": 0.001, "loss": 2.3778, "step": 39032 }, { "epoch": 3.3719806763285023, "grad_norm": 0.8165333271026611, "learning_rate": 0.001, "loss": 2.3656, "step": 39088 }, { "epoch": 3.3768115942028984, "grad_norm": 2.1966381072998047, "learning_rate": 0.001, "loss": 2.37, "step": 39144 }, { "epoch": 3.3816425120772946, "grad_norm": 6.151560306549072, "learning_rate": 0.001, "loss": 2.3625, "step": 39200 }, { "epoch": 3.3864734299516908, "grad_norm": 1.6020865440368652, "learning_rate": 0.001, "loss": 2.3578, "step": 39256 }, { "epoch": 3.391304347826087, "grad_norm": 1.2496628761291504, "learning_rate": 0.001, "loss": 2.3733, "step": 39312 }, { "epoch": 3.396135265700483, "grad_norm": 4.8708415031433105, "learning_rate": 0.001, "loss": 2.3903, "step": 39368 }, { "epoch": 3.4009661835748792, "grad_norm": 5.318726062774658, "learning_rate": 0.001, "loss": 2.3869, "step": 39424 }, { "epoch": 3.4057971014492754, "grad_norm": 1.2490580081939697, "learning_rate": 0.001, "loss": 2.3868, "step": 39480 }, { "epoch": 3.4106280193236715, "grad_norm": 0.7086006999015808, "learning_rate": 0.001, "loss": 2.3629, "step": 39536 }, { "epoch": 3.4154589371980677, "grad_norm": 0.7509753108024597, "learning_rate": 0.001, "loss": 2.3701, "step": 39592 }, { "epoch": 3.420289855072464, "grad_norm": 0.6684341430664062, "learning_rate": 0.001, "loss": 2.3713, "step": 39648 }, { "epoch": 3.42512077294686, "grad_norm": 2.8551783561706543, "learning_rate": 0.001, "loss": 2.3598, "step": 39704 }, { "epoch": 3.429951690821256, "grad_norm": 0.6681911945343018, "learning_rate": 0.001, "loss": 2.3663, "step": 39760 }, { "epoch": 3.4347826086956523, "grad_norm": 0.7762264013290405, "learning_rate": 0.001, "loss": 2.374, "step": 39816 }, { "epoch": 3.4396135265700485, "grad_norm": 0.6667366027832031, "learning_rate": 0.001, "loss": 2.3668, "step": 39872 }, { "epoch": 3.4444444444444446, "grad_norm": 0.9514179229736328, "learning_rate": 0.001, "loss": 2.3556, "step": 39928 }, { "epoch": 3.449275362318841, "grad_norm": 1.7346069812774658, "learning_rate": 0.001, "loss": 2.3612, "step": 39984 }, { "epoch": 3.454106280193237, "grad_norm": 3.320202589035034, "learning_rate": 0.001, "loss": 2.3536, "step": 40040 }, { "epoch": 3.4589371980676327, "grad_norm": 0.8877231478691101, "learning_rate": 0.001, "loss": 2.345, "step": 40096 }, { "epoch": 3.463768115942029, "grad_norm": 1.4169694185256958, "learning_rate": 0.001, "loss": 2.359, "step": 40152 }, { "epoch": 3.468599033816425, "grad_norm": 0.502339243888855, "learning_rate": 0.001, "loss": 2.3615, "step": 40208 }, { "epoch": 3.473429951690821, "grad_norm": 0.5285590887069702, "learning_rate": 0.001, "loss": 2.3535, "step": 40264 }, { "epoch": 3.4782608695652173, "grad_norm": 1.1485893726348877, "learning_rate": 0.001, "loss": 2.3511, "step": 40320 }, { "epoch": 3.4830917874396135, "grad_norm": 1.5130213499069214, "learning_rate": 0.001, "loss": 2.3458, "step": 40376 }, { "epoch": 3.4879227053140096, "grad_norm": 0.7220565676689148, "learning_rate": 0.001, "loss": 2.3439, "step": 40432 }, { "epoch": 3.4927536231884058, "grad_norm": 0.5075478553771973, "learning_rate": 0.001, "loss": 2.3487, "step": 40488 }, { "epoch": 3.497584541062802, "grad_norm": 0.9343673586845398, "learning_rate": 0.001, "loss": 2.3489, "step": 40544 }, { "epoch": 3.502415458937198, "grad_norm": 1.2456401586532593, "learning_rate": 0.001, "loss": 2.3695, "step": 40600 }, { "epoch": 3.5072463768115942, "grad_norm": 0.6643732190132141, "learning_rate": 0.001, "loss": 2.3577, "step": 40656 }, { "epoch": 3.5120772946859904, "grad_norm": 0.9231414794921875, "learning_rate": 0.001, "loss": 2.3388, "step": 40712 }, { "epoch": 3.5169082125603865, "grad_norm": 0.7390984296798706, "learning_rate": 0.001, "loss": 2.3442, "step": 40768 }, { "epoch": 3.5217391304347827, "grad_norm": 0.9680396318435669, "learning_rate": 0.001, "loss": 2.3412, "step": 40824 }, { "epoch": 3.526570048309179, "grad_norm": 1.5306947231292725, "learning_rate": 0.001, "loss": 2.3529, "step": 40880 }, { "epoch": 3.531400966183575, "grad_norm": 1.0215588808059692, "learning_rate": 0.001, "loss": 2.3539, "step": 40936 }, { "epoch": 3.536231884057971, "grad_norm": 0.6781653761863708, "learning_rate": 0.001, "loss": 2.3463, "step": 40992 }, { "epoch": 3.541062801932367, "grad_norm": 2.7816197872161865, "learning_rate": 0.001, "loss": 2.3695, "step": 41048 }, { "epoch": 3.545893719806763, "grad_norm": 1.1754366159439087, "learning_rate": 0.001, "loss": 2.3644, "step": 41104 }, { "epoch": 3.550724637681159, "grad_norm": 0.45001232624053955, "learning_rate": 0.001, "loss": 2.3557, "step": 41160 }, { "epoch": 3.5555555555555554, "grad_norm": 1.077300786972046, "learning_rate": 0.001, "loss": 2.3658, "step": 41216 }, { "epoch": 3.5603864734299515, "grad_norm": 0.5185337662696838, "learning_rate": 0.001, "loss": 2.3554, "step": 41272 }, { "epoch": 3.5652173913043477, "grad_norm": 0.7596719264984131, "learning_rate": 0.001, "loss": 2.3617, "step": 41328 }, { "epoch": 3.570048309178744, "grad_norm": 2.055612564086914, "learning_rate": 0.001, "loss": 2.3923, "step": 41384 }, { "epoch": 3.57487922705314, "grad_norm": 2.0701406002044678, "learning_rate": 0.001, "loss": 2.3913, "step": 41440 }, { "epoch": 3.579710144927536, "grad_norm": 1.7943741083145142, "learning_rate": 0.001, "loss": 2.3745, "step": 41496 }, { "epoch": 3.5845410628019323, "grad_norm": 0.8631559014320374, "learning_rate": 0.001, "loss": 2.3578, "step": 41552 }, { "epoch": 3.5893719806763285, "grad_norm": 0.800894558429718, "learning_rate": 0.001, "loss": 2.3661, "step": 41608 }, { "epoch": 3.5942028985507246, "grad_norm": 1.5744913816452026, "learning_rate": 0.001, "loss": 2.3576, "step": 41664 }, { "epoch": 3.5990338164251208, "grad_norm": 0.7339915633201599, "learning_rate": 0.001, "loss": 2.3562, "step": 41720 }, { "epoch": 3.603864734299517, "grad_norm": 2.8779172897338867, "learning_rate": 0.001, "loss": 2.3571, "step": 41776 }, { "epoch": 3.608695652173913, "grad_norm": 1.536848783493042, "learning_rate": 0.001, "loss": 2.3668, "step": 41832 }, { "epoch": 3.6135265700483092, "grad_norm": 2.2175509929656982, "learning_rate": 0.001, "loss": 2.3465, "step": 41888 }, { "epoch": 3.6183574879227054, "grad_norm": 0.8303064107894897, "learning_rate": 0.001, "loss": 2.3456, "step": 41944 }, { "epoch": 3.6231884057971016, "grad_norm": 0.8914777040481567, "learning_rate": 0.001, "loss": 2.3377, "step": 42000 }, { "epoch": 3.6280193236714977, "grad_norm": 0.8713904619216919, "learning_rate": 0.001, "loss": 2.3453, "step": 42056 }, { "epoch": 3.632850241545894, "grad_norm": 2.1183042526245117, "learning_rate": 0.001, "loss": 2.3424, "step": 42112 }, { "epoch": 3.63768115942029, "grad_norm": 1.0792075395584106, "learning_rate": 0.001, "loss": 2.3469, "step": 42168 }, { "epoch": 3.642512077294686, "grad_norm": 1.2308788299560547, "learning_rate": 0.001, "loss": 2.362, "step": 42224 }, { "epoch": 3.6473429951690823, "grad_norm": 1.8587312698364258, "learning_rate": 0.001, "loss": 2.3539, "step": 42280 }, { "epoch": 3.6521739130434785, "grad_norm": 0.7397122383117676, "learning_rate": 0.001, "loss": 2.3419, "step": 42336 }, { "epoch": 3.6570048309178746, "grad_norm": 0.6592168211936951, "learning_rate": 0.001, "loss": 2.3266, "step": 42392 }, { "epoch": 3.661835748792271, "grad_norm": 0.8108003735542297, "learning_rate": 0.001, "loss": 2.3363, "step": 42448 }, { "epoch": 3.6666666666666665, "grad_norm": 0.8156822919845581, "learning_rate": 0.001, "loss": 2.3565, "step": 42504 }, { "epoch": 3.6714975845410627, "grad_norm": 0.9192153215408325, "learning_rate": 0.001, "loss": 2.3567, "step": 42560 }, { "epoch": 3.676328502415459, "grad_norm": 0.9951876401901245, "learning_rate": 0.001, "loss": 2.3583, "step": 42616 }, { "epoch": 3.681159420289855, "grad_norm": 1.245253562927246, "learning_rate": 0.001, "loss": 2.3465, "step": 42672 }, { "epoch": 3.685990338164251, "grad_norm": 1.2041829824447632, "learning_rate": 0.001, "loss": 2.3535, "step": 42728 }, { "epoch": 3.6908212560386473, "grad_norm": 2.3600947856903076, "learning_rate": 0.001, "loss": 2.3513, "step": 42784 }, { "epoch": 3.6956521739130435, "grad_norm": 1.3931894302368164, "learning_rate": 0.001, "loss": 2.3649, "step": 42840 }, { "epoch": 3.7004830917874396, "grad_norm": 1.2088695764541626, "learning_rate": 0.001, "loss": 2.3467, "step": 42896 }, { "epoch": 3.7053140096618358, "grad_norm": 0.7605326175689697, "learning_rate": 0.001, "loss": 2.3452, "step": 42952 }, { "epoch": 3.710144927536232, "grad_norm": 1.9344475269317627, "learning_rate": 0.001, "loss": 2.3464, "step": 43008 }, { "epoch": 3.714975845410628, "grad_norm": 4.121121883392334, "learning_rate": 0.001, "loss": 2.353, "step": 43064 }, { "epoch": 3.7198067632850242, "grad_norm": 0.7761598229408264, "learning_rate": 0.001, "loss": 2.346, "step": 43120 }, { "epoch": 3.7246376811594204, "grad_norm": 1.034733772277832, "learning_rate": 0.001, "loss": 2.3597, "step": 43176 }, { "epoch": 3.7294685990338166, "grad_norm": 0.6464439630508423, "learning_rate": 0.001, "loss": 2.359, "step": 43232 }, { "epoch": 3.7342995169082127, "grad_norm": 3.42350172996521, "learning_rate": 0.001, "loss": 2.3582, "step": 43288 }, { "epoch": 3.7391304347826084, "grad_norm": 0.6033596396446228, "learning_rate": 0.001, "loss": 2.3409, "step": 43344 }, { "epoch": 3.7439613526570046, "grad_norm": 0.5961412191390991, "learning_rate": 0.001, "loss": 2.3418, "step": 43400 }, { "epoch": 3.7487922705314007, "grad_norm": 0.8360373377799988, "learning_rate": 0.001, "loss": 2.3427, "step": 43456 }, { "epoch": 3.753623188405797, "grad_norm": 28.263586044311523, "learning_rate": 0.001, "loss": 2.3364, "step": 43512 }, { "epoch": 3.758454106280193, "grad_norm": 5.00962495803833, "learning_rate": 0.001, "loss": 2.3359, "step": 43568 }, { "epoch": 3.763285024154589, "grad_norm": 0.5710622072219849, "learning_rate": 0.001, "loss": 2.333, "step": 43624 }, { "epoch": 3.7681159420289854, "grad_norm": 0.9183070659637451, "learning_rate": 0.001, "loss": 2.3388, "step": 43680 }, { "epoch": 3.7729468599033815, "grad_norm": 0.6936941146850586, "learning_rate": 0.001, "loss": 2.3309, "step": 43736 }, { "epoch": 3.7777777777777777, "grad_norm": 1.2598804235458374, "learning_rate": 0.001, "loss": 2.3337, "step": 43792 }, { "epoch": 3.782608695652174, "grad_norm": 2.6248106956481934, "learning_rate": 0.001, "loss": 2.3619, "step": 43848 }, { "epoch": 3.78743961352657, "grad_norm": 0.989886999130249, "learning_rate": 0.001, "loss": 2.3554, "step": 43904 }, { "epoch": 3.792270531400966, "grad_norm": 3.3399837017059326, "learning_rate": 0.001, "loss": 2.3388, "step": 43960 }, { "epoch": 3.7971014492753623, "grad_norm": 2.2985455989837646, "learning_rate": 0.001, "loss": 2.337, "step": 44016 }, { "epoch": 3.8019323671497585, "grad_norm": 0.9566785097122192, "learning_rate": 0.001, "loss": 2.3436, "step": 44072 }, { "epoch": 3.8067632850241546, "grad_norm": 21.195798873901367, "learning_rate": 0.001, "loss": 2.339, "step": 44128 }, { "epoch": 3.8115942028985508, "grad_norm": 0.740143358707428, "learning_rate": 0.001, "loss": 2.3431, "step": 44184 }, { "epoch": 3.816425120772947, "grad_norm": 4.102219104766846, "learning_rate": 0.001, "loss": 2.3453, "step": 44240 }, { "epoch": 3.821256038647343, "grad_norm": 0.8171431422233582, "learning_rate": 0.001, "loss": 2.3447, "step": 44296 }, { "epoch": 3.8260869565217392, "grad_norm": 1.02897047996521, "learning_rate": 0.001, "loss": 2.3345, "step": 44352 }, { "epoch": 3.8309178743961354, "grad_norm": 1.1245150566101074, "learning_rate": 0.001, "loss": 2.3513, "step": 44408 }, { "epoch": 3.8357487922705316, "grad_norm": 0.6639866232872009, "learning_rate": 0.001, "loss": 2.3618, "step": 44464 }, { "epoch": 3.8405797101449277, "grad_norm": 29.38960838317871, "learning_rate": 0.001, "loss": 2.3647, "step": 44520 }, { "epoch": 3.845410628019324, "grad_norm": 0.9987631440162659, "learning_rate": 0.001, "loss": 2.3577, "step": 44576 }, { "epoch": 3.85024154589372, "grad_norm": 3.8040685653686523, "learning_rate": 0.001, "loss": 2.3563, "step": 44632 }, { "epoch": 3.855072463768116, "grad_norm": 1.2185957431793213, "learning_rate": 0.001, "loss": 2.3569, "step": 44688 }, { "epoch": 3.8599033816425123, "grad_norm": 0.7800244688987732, "learning_rate": 0.001, "loss": 2.3533, "step": 44744 }, { "epoch": 3.864734299516908, "grad_norm": 0.5030954480171204, "learning_rate": 0.001, "loss": 2.3544, "step": 44800 }, { "epoch": 3.869565217391304, "grad_norm": 2.9667656421661377, "learning_rate": 0.001, "loss": 2.3588, "step": 44856 }, { "epoch": 3.8743961352657004, "grad_norm": 0.5120835900306702, "learning_rate": 0.001, "loss": 2.3369, "step": 44912 }, { "epoch": 3.8792270531400965, "grad_norm": 0.4867868721485138, "learning_rate": 0.001, "loss": 2.3426, "step": 44968 }, { "epoch": 3.8840579710144927, "grad_norm": 0.6894313097000122, "learning_rate": 0.001, "loss": 2.3384, "step": 45024 }, { "epoch": 3.888888888888889, "grad_norm": 1.0191646814346313, "learning_rate": 0.001, "loss": 2.341, "step": 45080 }, { "epoch": 3.893719806763285, "grad_norm": 1.1036583185195923, "learning_rate": 0.001, "loss": 2.3347, "step": 45136 }, { "epoch": 3.898550724637681, "grad_norm": 1.3702045679092407, "learning_rate": 0.001, "loss": 2.3467, "step": 45192 }, { "epoch": 3.9033816425120773, "grad_norm": 3.228801727294922, "learning_rate": 0.001, "loss": 2.3804, "step": 45248 }, { "epoch": 3.9082125603864735, "grad_norm": 1.7107059955596924, "learning_rate": 0.001, "loss": 2.3604, "step": 45304 }, { "epoch": 3.9130434782608696, "grad_norm": 3.484675884246826, "learning_rate": 0.001, "loss": 2.3519, "step": 45360 }, { "epoch": 3.917874396135266, "grad_norm": 0.9079760313034058, "learning_rate": 0.001, "loss": 2.3438, "step": 45416 }, { "epoch": 3.922705314009662, "grad_norm": 0.9418869614601135, "learning_rate": 0.001, "loss": 2.3397, "step": 45472 }, { "epoch": 3.927536231884058, "grad_norm": 2.3312509059906006, "learning_rate": 0.001, "loss": 2.336, "step": 45528 }, { "epoch": 3.9323671497584543, "grad_norm": 0.617605984210968, "learning_rate": 0.001, "loss": 2.3388, "step": 45584 }, { "epoch": 3.9371980676328504, "grad_norm": 0.6304759383201599, "learning_rate": 0.001, "loss": 2.3294, "step": 45640 }, { "epoch": 3.942028985507246, "grad_norm": 0.9962377548217773, "learning_rate": 0.001, "loss": 2.3311, "step": 45696 }, { "epoch": 3.9468599033816423, "grad_norm": 1.7532322406768799, "learning_rate": 0.001, "loss": 2.3495, "step": 45752 }, { "epoch": 3.9516908212560384, "grad_norm": 0.6233911514282227, "learning_rate": 0.001, "loss": 2.3499, "step": 45808 }, { "epoch": 3.9565217391304346, "grad_norm": 5.571962833404541, "learning_rate": 0.001, "loss": 2.3529, "step": 45864 }, { "epoch": 3.9613526570048307, "grad_norm": 1.1596258878707886, "learning_rate": 0.001, "loss": 2.3522, "step": 45920 }, { "epoch": 3.966183574879227, "grad_norm": 1.252943992614746, "learning_rate": 0.001, "loss": 2.3653, "step": 45976 }, { "epoch": 3.971014492753623, "grad_norm": 2.0798392295837402, "learning_rate": 0.001, "loss": 2.3519, "step": 46032 }, { "epoch": 3.975845410628019, "grad_norm": 2.891594409942627, "learning_rate": 0.001, "loss": 2.3491, "step": 46088 }, { "epoch": 3.9806763285024154, "grad_norm": 1.825319766998291, "learning_rate": 0.001, "loss": 2.3438, "step": 46144 }, { "epoch": 3.9855072463768115, "grad_norm": 1.724923014640808, "learning_rate": 0.001, "loss": 2.3465, "step": 46200 }, { "epoch": 3.9903381642512077, "grad_norm": 0.7429100275039673, "learning_rate": 0.001, "loss": 2.336, "step": 46256 }, { "epoch": 3.995169082125604, "grad_norm": 0.5450997948646545, "learning_rate": 0.001, "loss": 2.3371, "step": 46312 }, { "epoch": 4.0, "grad_norm": 1.657492995262146, "learning_rate": 0.001, "loss": 2.3274, "step": 46368 }, { "epoch": 4.004830917874396, "grad_norm": 0.9657808542251587, "learning_rate": 0.001, "loss": 2.3042, "step": 46424 }, { "epoch": 4.009661835748792, "grad_norm": 0.600604772567749, "learning_rate": 0.001, "loss": 2.3016, "step": 46480 }, { "epoch": 4.0144927536231885, "grad_norm": 0.924943745136261, "learning_rate": 0.001, "loss": 2.2868, "step": 46536 }, { "epoch": 4.019323671497585, "grad_norm": 1.3035205602645874, "learning_rate": 0.001, "loss": 2.2841, "step": 46592 }, { "epoch": 4.024154589371981, "grad_norm": 1.3386552333831787, "learning_rate": 0.001, "loss": 2.2942, "step": 46648 }, { "epoch": 4.028985507246377, "grad_norm": 1.0220056772232056, "learning_rate": 0.001, "loss": 2.3054, "step": 46704 }, { "epoch": 4.033816425120773, "grad_norm": 0.7145740985870361, "learning_rate": 0.001, "loss": 2.3049, "step": 46760 }, { "epoch": 4.038647342995169, "grad_norm": 3.8825621604919434, "learning_rate": 0.001, "loss": 2.3003, "step": 46816 }, { "epoch": 4.043478260869565, "grad_norm": 0.8055011630058289, "learning_rate": 0.001, "loss": 2.303, "step": 46872 }, { "epoch": 4.048309178743962, "grad_norm": 2.655247449874878, "learning_rate": 0.001, "loss": 2.3049, "step": 46928 }, { "epoch": 4.053140096618358, "grad_norm": 1.0282988548278809, "learning_rate": 0.001, "loss": 2.3202, "step": 46984 }, { "epoch": 4.057971014492754, "grad_norm": 0.9139070510864258, "learning_rate": 0.001, "loss": 2.3198, "step": 47040 }, { "epoch": 4.06280193236715, "grad_norm": 1.8222072124481201, "learning_rate": 0.001, "loss": 2.3254, "step": 47096 }, { "epoch": 4.067632850241546, "grad_norm": 1.2185211181640625, "learning_rate": 0.001, "loss": 2.3282, "step": 47152 }, { "epoch": 4.072463768115942, "grad_norm": 1.2186691761016846, "learning_rate": 0.001, "loss": 2.3232, "step": 47208 }, { "epoch": 4.0772946859903385, "grad_norm": 1.0944017171859741, "learning_rate": 0.001, "loss": 2.3133, "step": 47264 }, { "epoch": 4.082125603864735, "grad_norm": 2.1194827556610107, "learning_rate": 0.001, "loss": 2.3058, "step": 47320 }, { "epoch": 4.086956521739131, "grad_norm": 0.6063691973686218, "learning_rate": 0.001, "loss": 2.2968, "step": 47376 }, { "epoch": 4.091787439613527, "grad_norm": 3.2358484268188477, "learning_rate": 0.001, "loss": 2.3189, "step": 47432 }, { "epoch": 4.096618357487923, "grad_norm": 3.020056962966919, "learning_rate": 0.001, "loss": 2.3264, "step": 47488 }, { "epoch": 4.101449275362318, "grad_norm": 1.6198798418045044, "learning_rate": 0.001, "loss": 2.3141, "step": 47544 }, { "epoch": 4.106280193236715, "grad_norm": 0.7674989104270935, "learning_rate": 0.001, "loss": 2.3134, "step": 47600 }, { "epoch": 4.111111111111111, "grad_norm": 2.0637145042419434, "learning_rate": 0.001, "loss": 2.302, "step": 47656 }, { "epoch": 4.115942028985507, "grad_norm": 0.9420506358146667, "learning_rate": 0.001, "loss": 2.3089, "step": 47712 }, { "epoch": 4.120772946859903, "grad_norm": 1.3257102966308594, "learning_rate": 0.001, "loss": 2.3085, "step": 47768 }, { "epoch": 4.125603864734299, "grad_norm": 1.247165560722351, "learning_rate": 0.001, "loss": 2.3131, "step": 47824 }, { "epoch": 4.130434782608695, "grad_norm": 0.8171547055244446, "learning_rate": 0.001, "loss": 2.3057, "step": 47880 }, { "epoch": 4.1352657004830915, "grad_norm": 1.356412410736084, "learning_rate": 0.001, "loss": 2.3158, "step": 47936 }, { "epoch": 4.140096618357488, "grad_norm": 0.8632891774177551, "learning_rate": 0.001, "loss": 2.3179, "step": 47992 }, { "epoch": 4.144927536231884, "grad_norm": 1.7877506017684937, "learning_rate": 0.001, "loss": 2.3102, "step": 48048 }, { "epoch": 4.14975845410628, "grad_norm": 0.7102479338645935, "learning_rate": 0.001, "loss": 2.313, "step": 48104 }, { "epoch": 4.154589371980676, "grad_norm": 1.066693663597107, "learning_rate": 0.001, "loss": 2.3231, "step": 48160 }, { "epoch": 4.159420289855072, "grad_norm": 0.9703291654586792, "learning_rate": 0.001, "loss": 2.3412, "step": 48216 }, { "epoch": 4.164251207729468, "grad_norm": 0.6456874012947083, "learning_rate": 0.001, "loss": 2.3408, "step": 48272 }, { "epoch": 4.169082125603865, "grad_norm": 2.233797788619995, "learning_rate": 0.001, "loss": 2.3462, "step": 48328 }, { "epoch": 4.173913043478261, "grad_norm": 0.7574034929275513, "learning_rate": 0.001, "loss": 2.329, "step": 48384 }, { "epoch": 4.178743961352657, "grad_norm": 4.511397838592529, "learning_rate": 0.001, "loss": 2.3114, "step": 48440 }, { "epoch": 4.183574879227053, "grad_norm": 1.9656661748886108, "learning_rate": 0.001, "loss": 2.3126, "step": 48496 }, { "epoch": 4.188405797101449, "grad_norm": 0.8456041216850281, "learning_rate": 0.001, "loss": 2.3165, "step": 48552 }, { "epoch": 4.193236714975845, "grad_norm": 0.8470112681388855, "learning_rate": 0.001, "loss": 2.3002, "step": 48608 }, { "epoch": 4.1980676328502415, "grad_norm": 1.1490195989608765, "learning_rate": 0.001, "loss": 2.3093, "step": 48664 }, { "epoch": 4.202898550724638, "grad_norm": 1.7943440675735474, "learning_rate": 0.001, "loss": 2.3118, "step": 48720 }, { "epoch": 4.207729468599034, "grad_norm": 2.768073797225952, "learning_rate": 0.001, "loss": 2.3263, "step": 48776 }, { "epoch": 4.21256038647343, "grad_norm": 2.6669111251831055, "learning_rate": 0.001, "loss": 2.3253, "step": 48832 }, { "epoch": 4.217391304347826, "grad_norm": 1.7654824256896973, "learning_rate": 0.001, "loss": 2.3146, "step": 48888 }, { "epoch": 4.222222222222222, "grad_norm": 2.6006264686584473, "learning_rate": 0.001, "loss": 2.3115, "step": 48944 }, { "epoch": 4.2270531400966185, "grad_norm": 0.9127808213233948, "learning_rate": 0.001, "loss": 2.3162, "step": 49000 }, { "epoch": 4.231884057971015, "grad_norm": 0.8338162899017334, "learning_rate": 0.001, "loss": 2.3179, "step": 49056 }, { "epoch": 4.236714975845411, "grad_norm": 3.532031536102295, "learning_rate": 0.001, "loss": 2.3216, "step": 49112 }, { "epoch": 4.241545893719807, "grad_norm": 1.8172245025634766, "learning_rate": 0.001, "loss": 2.3287, "step": 49168 }, { "epoch": 4.246376811594203, "grad_norm": 1.6738920211791992, "learning_rate": 0.001, "loss": 2.3288, "step": 49224 }, { "epoch": 4.251207729468599, "grad_norm": 8.565791130065918, "learning_rate": 0.001, "loss": 2.3467, "step": 49280 }, { "epoch": 4.256038647342995, "grad_norm": 0.9684979319572449, "learning_rate": 0.001, "loss": 2.3482, "step": 49336 }, { "epoch": 4.260869565217392, "grad_norm": 0.7297049164772034, "learning_rate": 0.001, "loss": 2.3401, "step": 49392 }, { "epoch": 4.265700483091788, "grad_norm": 1.755307674407959, "learning_rate": 0.001, "loss": 2.339, "step": 49448 }, { "epoch": 4.270531400966184, "grad_norm": 1.299764633178711, "learning_rate": 0.001, "loss": 2.3326, "step": 49504 }, { "epoch": 4.27536231884058, "grad_norm": 0.7953894138336182, "learning_rate": 0.001, "loss": 2.313, "step": 49560 }, { "epoch": 4.280193236714976, "grad_norm": 5.326798915863037, "learning_rate": 0.001, "loss": 2.3136, "step": 49616 }, { "epoch": 4.285024154589372, "grad_norm": 0.6478893756866455, "learning_rate": 0.001, "loss": 2.324, "step": 49672 }, { "epoch": 4.2898550724637685, "grad_norm": 2.68768572807312, "learning_rate": 0.001, "loss": 2.3243, "step": 49728 }, { "epoch": 4.294685990338165, "grad_norm": 4.061188220977783, "learning_rate": 0.001, "loss": 2.328, "step": 49784 }, { "epoch": 4.29951690821256, "grad_norm": 3.017909049987793, "learning_rate": 0.001, "loss": 2.3194, "step": 49840 }, { "epoch": 4.304347826086957, "grad_norm": 20.849956512451172, "learning_rate": 0.001, "loss": 2.3196, "step": 49896 }, { "epoch": 4.309178743961352, "grad_norm": 1.5654706954956055, "learning_rate": 0.001, "loss": 2.3183, "step": 49952 }, { "epoch": 4.314009661835748, "grad_norm": 1.9425815343856812, "learning_rate": 0.001, "loss": 2.3212, "step": 50008 }, { "epoch": 4.318840579710145, "grad_norm": 1.0273939371109009, "learning_rate": 0.001, "loss": 2.32, "step": 50064 }, { "epoch": 4.323671497584541, "grad_norm": 1.2710562944412231, "learning_rate": 0.001, "loss": 2.3277, "step": 50120 }, { "epoch": 4.328502415458937, "grad_norm": 0.6078893542289734, "learning_rate": 0.001, "loss": 2.3262, "step": 50176 }, { "epoch": 4.333333333333333, "grad_norm": 1.3283988237380981, "learning_rate": 0.001, "loss": 2.3322, "step": 50232 }, { "epoch": 4.338164251207729, "grad_norm": 4.818864822387695, "learning_rate": 0.001, "loss": 2.337, "step": 50288 }, { "epoch": 4.342995169082125, "grad_norm": 0.9144983291625977, "learning_rate": 0.001, "loss": 2.343, "step": 50344 }, { "epoch": 4.3478260869565215, "grad_norm": 1.5212979316711426, "learning_rate": 0.001, "loss": 2.33, "step": 50400 }, { "epoch": 4.352657004830918, "grad_norm": 1.1908268928527832, "learning_rate": 0.001, "loss": 2.3208, "step": 50456 }, { "epoch": 4.357487922705314, "grad_norm": 0.7681655287742615, "learning_rate": 0.001, "loss": 2.305, "step": 50512 }, { "epoch": 4.36231884057971, "grad_norm": 1.3250541687011719, "learning_rate": 0.001, "loss": 2.3104, "step": 50568 }, { "epoch": 4.367149758454106, "grad_norm": 3.156174421310425, "learning_rate": 0.001, "loss": 2.3074, "step": 50624 }, { "epoch": 4.371980676328502, "grad_norm": 0.6974619030952454, "learning_rate": 0.001, "loss": 2.3126, "step": 50680 }, { "epoch": 4.3768115942028984, "grad_norm": 0.8445868492126465, "learning_rate": 0.001, "loss": 2.3006, "step": 50736 }, { "epoch": 4.381642512077295, "grad_norm": 0.9576389193534851, "learning_rate": 0.001, "loss": 2.3192, "step": 50792 }, { "epoch": 4.386473429951691, "grad_norm": 2.0181868076324463, "learning_rate": 0.001, "loss": 2.3117, "step": 50848 }, { "epoch": 4.391304347826087, "grad_norm": 2.806661367416382, "learning_rate": 0.001, "loss": 2.3253, "step": 50904 }, { "epoch": 4.396135265700483, "grad_norm": 2.122889518737793, "learning_rate": 0.001, "loss": 2.3485, "step": 50960 }, { "epoch": 4.400966183574879, "grad_norm": 1.5743014812469482, "learning_rate": 0.001, "loss": 2.3542, "step": 51016 }, { "epoch": 4.405797101449275, "grad_norm": 0.6997669339179993, "learning_rate": 0.001, "loss": 2.3423, "step": 51072 }, { "epoch": 4.4106280193236715, "grad_norm": 1.544404149055481, "learning_rate": 0.001, "loss": 2.3334, "step": 51128 }, { "epoch": 4.415458937198068, "grad_norm": 0.7238136529922485, "learning_rate": 0.001, "loss": 2.3272, "step": 51184 }, { "epoch": 4.420289855072464, "grad_norm": 1.068169355392456, "learning_rate": 0.001, "loss": 2.33, "step": 51240 }, { "epoch": 4.42512077294686, "grad_norm": 9.178274154663086, "learning_rate": 0.001, "loss": 2.3207, "step": 51296 }, { "epoch": 4.429951690821256, "grad_norm": 0.7421141862869263, "learning_rate": 0.001, "loss": 2.3107, "step": 51352 }, { "epoch": 4.434782608695652, "grad_norm": 2.0498499870300293, "learning_rate": 0.001, "loss": 2.3354, "step": 51408 }, { "epoch": 4.4396135265700485, "grad_norm": 1.3518296480178833, "learning_rate": 0.001, "loss": 2.3227, "step": 51464 }, { "epoch": 4.444444444444445, "grad_norm": 0.5711866021156311, "learning_rate": 0.001, "loss": 2.3205, "step": 51520 }, { "epoch": 4.449275362318841, "grad_norm": 0.9583142995834351, "learning_rate": 0.001, "loss": 2.31, "step": 51576 }, { "epoch": 4.454106280193237, "grad_norm": 0.9213517904281616, "learning_rate": 0.001, "loss": 2.3185, "step": 51632 }, { "epoch": 4.458937198067633, "grad_norm": 1.1538068056106567, "learning_rate": 0.001, "loss": 2.333, "step": 51688 }, { "epoch": 4.463768115942029, "grad_norm": 0.8258316516876221, "learning_rate": 0.001, "loss": 2.3246, "step": 51744 }, { "epoch": 4.468599033816425, "grad_norm": 0.8119305968284607, "learning_rate": 0.001, "loss": 2.3348, "step": 51800 }, { "epoch": 4.473429951690822, "grad_norm": 1.4755408763885498, "learning_rate": 0.001, "loss": 2.3104, "step": 51856 }, { "epoch": 4.478260869565218, "grad_norm": 0.9129286408424377, "learning_rate": 0.001, "loss": 2.3173, "step": 51912 }, { "epoch": 4.483091787439614, "grad_norm": 0.6550956964492798, "learning_rate": 0.001, "loss": 2.3084, "step": 51968 }, { "epoch": 4.48792270531401, "grad_norm": 0.9634305834770203, "learning_rate": 0.001, "loss": 2.3055, "step": 52024 }, { "epoch": 4.492753623188406, "grad_norm": 1.192183494567871, "learning_rate": 0.001, "loss": 2.3051, "step": 52080 }, { "epoch": 4.4975845410628015, "grad_norm": 1.9183534383773804, "learning_rate": 0.001, "loss": 2.3209, "step": 52136 }, { "epoch": 4.5024154589371985, "grad_norm": 2.6884896755218506, "learning_rate": 0.001, "loss": 2.3101, "step": 52192 }, { "epoch": 4.507246376811594, "grad_norm": 0.943748414516449, "learning_rate": 0.001, "loss": 2.3101, "step": 52248 }, { "epoch": 4.512077294685991, "grad_norm": 1.654974102973938, "learning_rate": 0.001, "loss": 2.3182, "step": 52304 }, { "epoch": 4.516908212560386, "grad_norm": 1.3912440538406372, "learning_rate": 0.001, "loss": 2.3219, "step": 52360 }, { "epoch": 4.521739130434782, "grad_norm": 1.3495748043060303, "learning_rate": 0.001, "loss": 2.3039, "step": 52416 }, { "epoch": 4.526570048309178, "grad_norm": 3.488729238510132, "learning_rate": 0.001, "loss": 2.2987, "step": 52472 }, { "epoch": 4.531400966183575, "grad_norm": 1.2651500701904297, "learning_rate": 0.001, "loss": 2.2985, "step": 52528 }, { "epoch": 4.536231884057971, "grad_norm": 0.8455155491828918, "learning_rate": 0.001, "loss": 2.2993, "step": 52584 }, { "epoch": 4.541062801932367, "grad_norm": 1.6375813484191895, "learning_rate": 0.001, "loss": 2.292, "step": 52640 }, { "epoch": 4.545893719806763, "grad_norm": 1.1256200075149536, "learning_rate": 0.001, "loss": 2.2997, "step": 52696 }, { "epoch": 4.550724637681159, "grad_norm": 1.5138390064239502, "learning_rate": 0.001, "loss": 2.3013, "step": 52752 }, { "epoch": 4.555555555555555, "grad_norm": 0.7223426103591919, "learning_rate": 0.001, "loss": 2.2979, "step": 52808 }, { "epoch": 4.5603864734299515, "grad_norm": 2.692262649536133, "learning_rate": 0.001, "loss": 2.3171, "step": 52864 }, { "epoch": 4.565217391304348, "grad_norm": 1.9587488174438477, "learning_rate": 0.001, "loss": 2.3123, "step": 52920 }, { "epoch": 4.570048309178744, "grad_norm": 1.3185560703277588, "learning_rate": 0.001, "loss": 2.3227, "step": 52976 }, { "epoch": 4.57487922705314, "grad_norm": 3.9335124492645264, "learning_rate": 0.001, "loss": 2.3099, "step": 53032 }, { "epoch": 4.579710144927536, "grad_norm": 1.239708423614502, "learning_rate": 0.001, "loss": 2.3212, "step": 53088 }, { "epoch": 4.584541062801932, "grad_norm": 1.176710844039917, "learning_rate": 0.001, "loss": 2.3238, "step": 53144 }, { "epoch": 4.5893719806763285, "grad_norm": 1.0926462411880493, "learning_rate": 0.001, "loss": 2.3362, "step": 53200 }, { "epoch": 4.594202898550725, "grad_norm": 1.1184386014938354, "learning_rate": 0.001, "loss": 2.3409, "step": 53256 }, { "epoch": 4.599033816425121, "grad_norm": 1.2343361377716064, "learning_rate": 0.001, "loss": 2.3317, "step": 53312 }, { "epoch": 4.603864734299517, "grad_norm": 0.8025084733963013, "learning_rate": 0.001, "loss": 2.35, "step": 53368 }, { "epoch": 4.608695652173913, "grad_norm": 1.1020928621292114, "learning_rate": 0.001, "loss": 2.3389, "step": 53424 }, { "epoch": 4.613526570048309, "grad_norm": 1.9558590650558472, "learning_rate": 0.001, "loss": 2.3278, "step": 53480 }, { "epoch": 4.618357487922705, "grad_norm": 1.644404649734497, "learning_rate": 0.001, "loss": 2.3245, "step": 53536 }, { "epoch": 4.6231884057971016, "grad_norm": 6.147590637207031, "learning_rate": 0.001, "loss": 2.3259, "step": 53592 }, { "epoch": 4.628019323671498, "grad_norm": 1.4744198322296143, "learning_rate": 0.001, "loss": 2.3307, "step": 53648 }, { "epoch": 4.632850241545894, "grad_norm": 1.4356679916381836, "learning_rate": 0.001, "loss": 2.3391, "step": 53704 }, { "epoch": 4.63768115942029, "grad_norm": 1.2997158765792847, "learning_rate": 0.001, "loss": 2.3385, "step": 53760 }, { "epoch": 4.642512077294686, "grad_norm": 1.5287188291549683, "learning_rate": 0.001, "loss": 2.3363, "step": 53816 }, { "epoch": 4.647342995169082, "grad_norm": 1.5143572092056274, "learning_rate": 0.001, "loss": 2.3383, "step": 53872 }, { "epoch": 4.6521739130434785, "grad_norm": 3.656484842300415, "learning_rate": 0.001, "loss": 2.3268, "step": 53928 }, { "epoch": 4.657004830917875, "grad_norm": 1.028181791305542, "learning_rate": 0.001, "loss": 2.3258, "step": 53984 }, { "epoch": 4.661835748792271, "grad_norm": 5.046627998352051, "learning_rate": 0.001, "loss": 2.3307, "step": 54040 }, { "epoch": 4.666666666666667, "grad_norm": 0.7748187184333801, "learning_rate": 0.001, "loss": 2.328, "step": 54096 }, { "epoch": 4.671497584541063, "grad_norm": 0.9460355639457703, "learning_rate": 0.001, "loss": 2.3256, "step": 54152 }, { "epoch": 4.676328502415459, "grad_norm": 2.8801820278167725, "learning_rate": 0.001, "loss": 2.3247, "step": 54208 }, { "epoch": 4.681159420289855, "grad_norm": 0.9241681694984436, "learning_rate": 0.001, "loss": 2.3478, "step": 54264 }, { "epoch": 4.685990338164252, "grad_norm": 0.6218664646148682, "learning_rate": 0.001, "loss": 2.3448, "step": 54320 }, { "epoch": 4.690821256038648, "grad_norm": 1.4080214500427246, "learning_rate": 0.001, "loss": 2.3311, "step": 54376 }, { "epoch": 4.695652173913043, "grad_norm": 0.8577357530593872, "learning_rate": 0.001, "loss": 2.3223, "step": 54432 }, { "epoch": 4.70048309178744, "grad_norm": 2.301870822906494, "learning_rate": 0.001, "loss": 2.313, "step": 54488 }, { "epoch": 4.705314009661835, "grad_norm": 0.7508590817451477, "learning_rate": 0.001, "loss": 2.3285, "step": 54544 }, { "epoch": 4.710144927536232, "grad_norm": 1.3048603534698486, "learning_rate": 0.001, "loss": 2.317, "step": 54600 }, { "epoch": 4.714975845410628, "grad_norm": 0.8950731158256531, "learning_rate": 0.001, "loss": 2.3089, "step": 54656 }, { "epoch": 4.719806763285024, "grad_norm": 1.0115829706192017, "learning_rate": 0.001, "loss": 2.2971, "step": 54712 }, { "epoch": 4.72463768115942, "grad_norm": 0.7804195880889893, "learning_rate": 0.001, "loss": 2.3047, "step": 54768 }, { "epoch": 4.729468599033816, "grad_norm": 2.3576929569244385, "learning_rate": 0.001, "loss": 2.2954, "step": 54824 }, { "epoch": 4.734299516908212, "grad_norm": 1.9797247648239136, "learning_rate": 0.001, "loss": 2.3044, "step": 54880 }, { "epoch": 4.739130434782608, "grad_norm": 1.1571468114852905, "learning_rate": 0.001, "loss": 2.3173, "step": 54936 }, { "epoch": 4.743961352657005, "grad_norm": 0.8506631851196289, "learning_rate": 0.001, "loss": 2.3097, "step": 54992 }, { "epoch": 4.748792270531401, "grad_norm": 0.8268190026283264, "learning_rate": 0.001, "loss": 2.2963, "step": 55048 }, { "epoch": 4.753623188405797, "grad_norm": 1.88607919216156, "learning_rate": 0.001, "loss": 2.2998, "step": 55104 }, { "epoch": 4.758454106280193, "grad_norm": 0.9432274103164673, "learning_rate": 0.001, "loss": 2.3113, "step": 55160 }, { "epoch": 4.763285024154589, "grad_norm": 0.725443959236145, "learning_rate": 0.001, "loss": 2.3005, "step": 55216 }, { "epoch": 4.768115942028985, "grad_norm": 1.2860841751098633, "learning_rate": 0.001, "loss": 2.2938, "step": 55272 }, { "epoch": 4.7729468599033815, "grad_norm": 0.9735112190246582, "learning_rate": 0.001, "loss": 2.3147, "step": 55328 }, { "epoch": 4.777777777777778, "grad_norm": 0.9253095984458923, "learning_rate": 0.001, "loss": 2.2996, "step": 55384 }, { "epoch": 4.782608695652174, "grad_norm": 1.4768403768539429, "learning_rate": 0.001, "loss": 2.3024, "step": 55440 }, { "epoch": 4.78743961352657, "grad_norm": 1.6015561819076538, "learning_rate": 0.001, "loss": 2.3053, "step": 55496 }, { "epoch": 4.792270531400966, "grad_norm": 0.9914491772651672, "learning_rate": 0.001, "loss": 2.2909, "step": 55552 }, { "epoch": 4.797101449275362, "grad_norm": 1.04567551612854, "learning_rate": 0.001, "loss": 2.3037, "step": 55608 }, { "epoch": 4.8019323671497585, "grad_norm": 0.932278573513031, "learning_rate": 0.001, "loss": 2.3047, "step": 55664 }, { "epoch": 4.806763285024155, "grad_norm": 2.336017608642578, "learning_rate": 0.001, "loss": 2.298, "step": 55720 }, { "epoch": 4.811594202898551, "grad_norm": 0.8514236807823181, "learning_rate": 0.001, "loss": 2.2985, "step": 55776 }, { "epoch": 4.816425120772947, "grad_norm": 0.9563355445861816, "learning_rate": 0.001, "loss": 2.3065, "step": 55832 }, { "epoch": 4.821256038647343, "grad_norm": 2.0424678325653076, "learning_rate": 0.001, "loss": 2.3127, "step": 55888 }, { "epoch": 4.826086956521739, "grad_norm": 0.9988775849342346, "learning_rate": 0.001, "loss": 2.3129, "step": 55944 }, { "epoch": 4.830917874396135, "grad_norm": 0.6675341725349426, "learning_rate": 0.001, "loss": 2.2977, "step": 56000 }, { "epoch": 4.835748792270532, "grad_norm": 0.8867150545120239, "learning_rate": 0.001, "loss": 2.2963, "step": 56056 }, { "epoch": 4.840579710144928, "grad_norm": 1.4586399793624878, "learning_rate": 0.001, "loss": 2.2996, "step": 56112 }, { "epoch": 4.845410628019324, "grad_norm": 0.9547634124755859, "learning_rate": 0.001, "loss": 2.3178, "step": 56168 }, { "epoch": 4.85024154589372, "grad_norm": 0.606196403503418, "learning_rate": 0.001, "loss": 2.3202, "step": 56224 }, { "epoch": 4.855072463768116, "grad_norm": 0.6954506635665894, "learning_rate": 0.001, "loss": 2.3034, "step": 56280 }, { "epoch": 4.859903381642512, "grad_norm": 1.4388718605041504, "learning_rate": 0.001, "loss": 2.3043, "step": 56336 }, { "epoch": 4.8647342995169085, "grad_norm": 1.8744267225265503, "learning_rate": 0.001, "loss": 2.2976, "step": 56392 }, { "epoch": 4.869565217391305, "grad_norm": 14.347280502319336, "learning_rate": 0.001, "loss": 2.3003, "step": 56448 }, { "epoch": 4.874396135265701, "grad_norm": 0.818060040473938, "learning_rate": 0.001, "loss": 2.2995, "step": 56504 }, { "epoch": 4.879227053140097, "grad_norm": 1.3645793199539185, "learning_rate": 0.001, "loss": 2.3081, "step": 56560 }, { "epoch": 4.884057971014493, "grad_norm": 0.8686359524726868, "learning_rate": 0.001, "loss": 2.3143, "step": 56616 }, { "epoch": 4.888888888888889, "grad_norm": 3.609219789505005, "learning_rate": 0.001, "loss": 2.3189, "step": 56672 }, { "epoch": 4.8937198067632846, "grad_norm": 1.5765020847320557, "learning_rate": 0.001, "loss": 2.3137, "step": 56728 }, { "epoch": 4.898550724637682, "grad_norm": 0.9636671543121338, "learning_rate": 0.001, "loss": 2.3266, "step": 56784 }, { "epoch": 4.903381642512077, "grad_norm": 0.5818440318107605, "learning_rate": 0.001, "loss": 2.3104, "step": 56840 }, { "epoch": 4.908212560386474, "grad_norm": 0.9418597221374512, "learning_rate": 0.001, "loss": 2.3213, "step": 56896 }, { "epoch": 4.913043478260869, "grad_norm": 1.7600048780441284, "learning_rate": 0.001, "loss": 2.3159, "step": 56952 }, { "epoch": 4.917874396135265, "grad_norm": 2.075094223022461, "learning_rate": 0.001, "loss": 2.3185, "step": 57008 }, { "epoch": 4.9227053140096615, "grad_norm": 70.46051025390625, "learning_rate": 0.001, "loss": 2.3041, "step": 57064 }, { "epoch": 4.927536231884058, "grad_norm": 6.2453227043151855, "learning_rate": 0.001, "loss": 2.2921, "step": 57120 }, { "epoch": 4.932367149758454, "grad_norm": 1.1778651475906372, "learning_rate": 0.001, "loss": 2.3201, "step": 57176 }, { "epoch": 4.93719806763285, "grad_norm": 1.4549427032470703, "learning_rate": 0.001, "loss": 2.3297, "step": 57232 }, { "epoch": 4.942028985507246, "grad_norm": 3.4987800121307373, "learning_rate": 0.001, "loss": 2.3402, "step": 57288 }, { "epoch": 4.946859903381642, "grad_norm": 1.0558435916900635, "learning_rate": 0.001, "loss": 2.3375, "step": 57344 }, { "epoch": 4.951690821256038, "grad_norm": 1.290441632270813, "learning_rate": 0.001, "loss": 2.3181, "step": 57400 }, { "epoch": 4.956521739130435, "grad_norm": 1.7993686199188232, "learning_rate": 0.001, "loss": 2.3217, "step": 57456 }, { "epoch": 4.961352657004831, "grad_norm": 1.179622769355774, "learning_rate": 0.001, "loss": 2.329, "step": 57512 }, { "epoch": 4.966183574879227, "grad_norm": 1.1743459701538086, "learning_rate": 0.001, "loss": 2.3237, "step": 57568 }, { "epoch": 4.971014492753623, "grad_norm": 1.2919784784317017, "learning_rate": 0.001, "loss": 2.3119, "step": 57624 }, { "epoch": 4.975845410628019, "grad_norm": 9.568537712097168, "learning_rate": 0.001, "loss": 2.3115, "step": 57680 }, { "epoch": 4.980676328502415, "grad_norm": 0.928508460521698, "learning_rate": 0.001, "loss": 2.3275, "step": 57736 }, { "epoch": 4.9855072463768115, "grad_norm": 2.3657476902008057, "learning_rate": 0.001, "loss": 2.3186, "step": 57792 }, { "epoch": 4.990338164251208, "grad_norm": 0.8450965881347656, "learning_rate": 0.001, "loss": 2.2992, "step": 57848 }, { "epoch": 4.995169082125604, "grad_norm": 0.8341897130012512, "learning_rate": 0.001, "loss": 2.2955, "step": 57904 }, { "epoch": 5.0, "grad_norm": 1.2438653707504272, "learning_rate": 0.001, "loss": 2.3025, "step": 57960 }, { "epoch": 5.004830917874396, "grad_norm": 0.9436646103858948, "learning_rate": 0.001, "loss": 2.2737, "step": 58016 }, { "epoch": 5.009661835748792, "grad_norm": 0.7943527698516846, "learning_rate": 0.001, "loss": 2.2679, "step": 58072 }, { "epoch": 5.0144927536231885, "grad_norm": 0.6496822237968445, "learning_rate": 0.001, "loss": 2.2715, "step": 58128 }, { "epoch": 5.019323671497585, "grad_norm": 1.3944389820098877, "learning_rate": 0.001, "loss": 2.2746, "step": 58184 }, { "epoch": 5.024154589371981, "grad_norm": 0.8490382432937622, "learning_rate": 0.001, "loss": 2.2733, "step": 58240 }, { "epoch": 5.028985507246377, "grad_norm": 1.5853235721588135, "learning_rate": 0.001, "loss": 2.267, "step": 58296 }, { "epoch": 5.033816425120773, "grad_norm": 2.906805992126465, "learning_rate": 0.001, "loss": 2.2706, "step": 58352 }, { "epoch": 5.038647342995169, "grad_norm": 1.4250094890594482, "learning_rate": 0.001, "loss": 2.2689, "step": 58408 }, { "epoch": 5.043478260869565, "grad_norm": 4.093613624572754, "learning_rate": 0.001, "loss": 2.2761, "step": 58464 }, { "epoch": 5.048309178743962, "grad_norm": 1.2158496379852295, "learning_rate": 0.001, "loss": 2.291, "step": 58520 }, { "epoch": 5.053140096618358, "grad_norm": 0.8814330101013184, "learning_rate": 0.001, "loss": 2.2736, "step": 58576 }, { "epoch": 5.057971014492754, "grad_norm": 0.8530583381652832, "learning_rate": 0.001, "loss": 2.2708, "step": 58632 }, { "epoch": 5.06280193236715, "grad_norm": 1.7054238319396973, "learning_rate": 0.001, "loss": 2.2688, "step": 58688 }, { "epoch": 5.067632850241546, "grad_norm": 0.7102516293525696, "learning_rate": 0.001, "loss": 2.2675, "step": 58744 }, { "epoch": 5.072463768115942, "grad_norm": 0.6261909604072571, "learning_rate": 0.001, "loss": 2.2558, "step": 58800 }, { "epoch": 5.0772946859903385, "grad_norm": 2.0799224376678467, "learning_rate": 0.001, "loss": 2.2613, "step": 58856 }, { "epoch": 5.082125603864735, "grad_norm": 0.8586704730987549, "learning_rate": 0.001, "loss": 2.262, "step": 58912 }, { "epoch": 5.086956521739131, "grad_norm": 0.7287746667861938, "learning_rate": 0.001, "loss": 2.2575, "step": 58968 }, { "epoch": 5.091787439613527, "grad_norm": 0.6179029941558838, "learning_rate": 0.001, "loss": 2.2536, "step": 59024 }, { "epoch": 5.096618357487923, "grad_norm": 2.5027058124542236, "learning_rate": 0.001, "loss": 2.265, "step": 59080 }, { "epoch": 5.101449275362318, "grad_norm": 2.4114880561828613, "learning_rate": 0.001, "loss": 2.2519, "step": 59136 }, { "epoch": 5.106280193236715, "grad_norm": 0.9805002212524414, "learning_rate": 0.001, "loss": 2.2478, "step": 59192 }, { "epoch": 5.111111111111111, "grad_norm": 0.8979089856147766, "learning_rate": 0.001, "loss": 2.2458, "step": 59248 }, { "epoch": 5.115942028985507, "grad_norm": 1.0549947023391724, "learning_rate": 0.001, "loss": 2.2485, "step": 59304 }, { "epoch": 5.120772946859903, "grad_norm": 0.9608076214790344, "learning_rate": 0.001, "loss": 2.2544, "step": 59360 }, { "epoch": 5.125603864734299, "grad_norm": 1.2952042818069458, "learning_rate": 0.001, "loss": 2.2558, "step": 59416 }, { "epoch": 5.130434782608695, "grad_norm": 1.2467831373214722, "learning_rate": 0.001, "loss": 2.2751, "step": 59472 }, { "epoch": 5.1352657004830915, "grad_norm": 1.6446683406829834, "learning_rate": 0.001, "loss": 2.2576, "step": 59528 }, { "epoch": 5.140096618357488, "grad_norm": 0.6389644145965576, "learning_rate": 0.001, "loss": 2.2533, "step": 59584 }, { "epoch": 5.144927536231884, "grad_norm": 1.7211169004440308, "learning_rate": 0.001, "loss": 2.2651, "step": 59640 }, { "epoch": 5.14975845410628, "grad_norm": 1.44622802734375, "learning_rate": 0.001, "loss": 2.2654, "step": 59696 }, { "epoch": 5.154589371980676, "grad_norm": 1.2150685787200928, "learning_rate": 0.001, "loss": 2.2703, "step": 59752 }, { "epoch": 5.159420289855072, "grad_norm": 2.292452812194824, "learning_rate": 0.001, "loss": 2.2706, "step": 59808 }, { "epoch": 5.164251207729468, "grad_norm": 1.2551698684692383, "learning_rate": 0.001, "loss": 2.2727, "step": 59864 }, { "epoch": 5.169082125603865, "grad_norm": 1.8675438165664673, "learning_rate": 0.001, "loss": 2.2745, "step": 59920 }, { "epoch": 5.173913043478261, "grad_norm": 1.4108449220657349, "learning_rate": 0.001, "loss": 2.2764, "step": 59976 }, { "epoch": 5.178743961352657, "grad_norm": 1.1421384811401367, "learning_rate": 0.001, "loss": 2.2782, "step": 60032 }, { "epoch": 5.183574879227053, "grad_norm": 1.0807000398635864, "learning_rate": 0.001, "loss": 2.2662, "step": 60088 }, { "epoch": 5.188405797101449, "grad_norm": 1.0078704357147217, "learning_rate": 0.001, "loss": 2.2602, "step": 60144 }, { "epoch": 5.193236714975845, "grad_norm": 1.1299046277999878, "learning_rate": 0.001, "loss": 2.2643, "step": 60200 }, { "epoch": 5.1980676328502415, "grad_norm": 1.1986722946166992, "learning_rate": 0.001, "loss": 2.2504, "step": 60256 }, { "epoch": 5.202898550724638, "grad_norm": 1.0549452304840088, "learning_rate": 0.001, "loss": 2.2565, "step": 60312 }, { "epoch": 5.207729468599034, "grad_norm": 2.037277936935425, "learning_rate": 0.001, "loss": 2.2465, "step": 60368 }, { "epoch": 5.21256038647343, "grad_norm": 2.5596325397491455, "learning_rate": 0.001, "loss": 2.2676, "step": 60424 }, { "epoch": 5.217391304347826, "grad_norm": 1.34528386592865, "learning_rate": 0.001, "loss": 2.2692, "step": 60480 }, { "epoch": 5.222222222222222, "grad_norm": 1.1312228441238403, "learning_rate": 0.001, "loss": 2.2777, "step": 60536 }, { "epoch": 5.2270531400966185, "grad_norm": 0.7738250494003296, "learning_rate": 0.001, "loss": 2.2779, "step": 60592 }, { "epoch": 5.231884057971015, "grad_norm": 1.8783988952636719, "learning_rate": 0.001, "loss": 2.282, "step": 60648 }, { "epoch": 5.236714975845411, "grad_norm": 3.2178571224212646, "learning_rate": 0.001, "loss": 2.2858, "step": 60704 }, { "epoch": 5.241545893719807, "grad_norm": 1.4991389513015747, "learning_rate": 0.001, "loss": 2.2652, "step": 60760 }, { "epoch": 5.246376811594203, "grad_norm": 1.0514947175979614, "learning_rate": 0.001, "loss": 2.2692, "step": 60816 }, { "epoch": 5.251207729468599, "grad_norm": 1.5282636880874634, "learning_rate": 0.001, "loss": 2.2621, "step": 60872 }, { "epoch": 5.256038647342995, "grad_norm": 0.6508818864822388, "learning_rate": 0.001, "loss": 2.2707, "step": 60928 }, { "epoch": 5.260869565217392, "grad_norm": 1.6331833600997925, "learning_rate": 0.001, "loss": 2.2656, "step": 60984 }, { "epoch": 5.265700483091788, "grad_norm": 1.6199853420257568, "learning_rate": 0.001, "loss": 2.2651, "step": 61040 }, { "epoch": 5.270531400966184, "grad_norm": 1.4836833477020264, "learning_rate": 0.001, "loss": 2.2685, "step": 61096 }, { "epoch": 5.27536231884058, "grad_norm": 1.1068944931030273, "learning_rate": 0.001, "loss": 2.2629, "step": 61152 }, { "epoch": 5.280193236714976, "grad_norm": 1.0773953199386597, "learning_rate": 0.001, "loss": 2.2695, "step": 61208 }, { "epoch": 5.285024154589372, "grad_norm": 5.323557376861572, "learning_rate": 0.001, "loss": 2.2745, "step": 61264 }, { "epoch": 5.2898550724637685, "grad_norm": 1.236106276512146, "learning_rate": 0.001, "loss": 2.2789, "step": 61320 }, { "epoch": 5.294685990338165, "grad_norm": 1.1621880531311035, "learning_rate": 0.001, "loss": 2.2767, "step": 61376 }, { "epoch": 5.29951690821256, "grad_norm": 1.3490560054779053, "learning_rate": 0.001, "loss": 2.277, "step": 61432 }, { "epoch": 5.304347826086957, "grad_norm": 0.9776967167854309, "learning_rate": 0.001, "loss": 2.2639, "step": 61488 }, { "epoch": 5.309178743961352, "grad_norm": 0.8823137879371643, "learning_rate": 0.001, "loss": 2.2806, "step": 61544 }, { "epoch": 5.314009661835748, "grad_norm": 0.8365609049797058, "learning_rate": 0.001, "loss": 2.27, "step": 61600 }, { "epoch": 5.318840579710145, "grad_norm": 0.8970274925231934, "learning_rate": 0.001, "loss": 2.2635, "step": 61656 }, { "epoch": 5.323671497584541, "grad_norm": 1.1043164730072021, "learning_rate": 0.001, "loss": 2.2573, "step": 61712 }, { "epoch": 5.328502415458937, "grad_norm": 1.1831847429275513, "learning_rate": 0.001, "loss": 2.2688, "step": 61768 }, { "epoch": 5.333333333333333, "grad_norm": 0.8626671433448792, "learning_rate": 0.001, "loss": 2.268, "step": 61824 }, { "epoch": 5.338164251207729, "grad_norm": 1.0557650327682495, "learning_rate": 0.001, "loss": 2.273, "step": 61880 }, { "epoch": 5.342995169082125, "grad_norm": 1.1638703346252441, "learning_rate": 0.001, "loss": 2.2703, "step": 61936 }, { "epoch": 5.3478260869565215, "grad_norm": 2.4471399784088135, "learning_rate": 0.001, "loss": 2.2685, "step": 61992 }, { "epoch": 5.352657004830918, "grad_norm": 0.674362301826477, "learning_rate": 0.001, "loss": 2.2726, "step": 62048 }, { "epoch": 5.357487922705314, "grad_norm": 2.021547794342041, "learning_rate": 0.001, "loss": 2.2807, "step": 62104 }, { "epoch": 5.36231884057971, "grad_norm": 1.0344667434692383, "learning_rate": 0.001, "loss": 2.2719, "step": 62160 }, { "epoch": 5.367149758454106, "grad_norm": 2.5653374195098877, "learning_rate": 0.001, "loss": 2.2827, "step": 62216 }, { "epoch": 5.371980676328502, "grad_norm": 1.1311075687408447, "learning_rate": 0.001, "loss": 2.2844, "step": 62272 }, { "epoch": 5.3768115942028984, "grad_norm": 0.9872215986251831, "learning_rate": 0.001, "loss": 2.2627, "step": 62328 }, { "epoch": 5.381642512077295, "grad_norm": 0.82745760679245, "learning_rate": 0.001, "loss": 2.2649, "step": 62384 }, { "epoch": 5.386473429951691, "grad_norm": 0.8367785811424255, "learning_rate": 0.001, "loss": 2.2808, "step": 62440 }, { "epoch": 5.391304347826087, "grad_norm": 1.1019151210784912, "learning_rate": 0.001, "loss": 2.2663, "step": 62496 }, { "epoch": 5.396135265700483, "grad_norm": 0.8462756276130676, "learning_rate": 0.001, "loss": 2.2602, "step": 62552 }, { "epoch": 5.400966183574879, "grad_norm": 0.7107194066047668, "learning_rate": 0.001, "loss": 2.2541, "step": 62608 }, { "epoch": 5.405797101449275, "grad_norm": 0.49883630871772766, "learning_rate": 0.001, "loss": 2.2563, "step": 62664 }, { "epoch": 5.4106280193236715, "grad_norm": 0.430463969707489, "learning_rate": 0.001, "loss": 2.2473, "step": 62720 }, { "epoch": 5.415458937198068, "grad_norm": 0.8459985852241516, "learning_rate": 0.001, "loss": 2.2575, "step": 62776 }, { "epoch": 5.420289855072464, "grad_norm": 0.6483646035194397, "learning_rate": 0.001, "loss": 2.2652, "step": 62832 }, { "epoch": 5.42512077294686, "grad_norm": 0.7575286030769348, "learning_rate": 0.001, "loss": 2.2548, "step": 62888 }, { "epoch": 5.429951690821256, "grad_norm": 0.6327202916145325, "learning_rate": 0.001, "loss": 2.2484, "step": 62944 }, { "epoch": 5.434782608695652, "grad_norm": 1.3381153345108032, "learning_rate": 0.001, "loss": 2.2424, "step": 63000 }, { "epoch": 5.4396135265700485, "grad_norm": 1.031955361366272, "learning_rate": 0.001, "loss": 2.2487, "step": 63056 }, { "epoch": 5.444444444444445, "grad_norm": 1.4858100414276123, "learning_rate": 0.001, "loss": 2.2451, "step": 63112 }, { "epoch": 5.449275362318841, "grad_norm": 1.1018794775009155, "learning_rate": 0.001, "loss": 2.2457, "step": 63168 }, { "epoch": 5.454106280193237, "grad_norm": 0.8303359746932983, "learning_rate": 0.001, "loss": 2.2573, "step": 63224 }, { "epoch": 5.458937198067633, "grad_norm": 1.1348568201065063, "learning_rate": 0.001, "loss": 2.2566, "step": 63280 }, { "epoch": 5.463768115942029, "grad_norm": 1.037598967552185, "learning_rate": 0.001, "loss": 2.2509, "step": 63336 }, { "epoch": 5.468599033816425, "grad_norm": 5.798953533172607, "learning_rate": 0.001, "loss": 2.2505, "step": 63392 }, { "epoch": 5.473429951690822, "grad_norm": 3.5276036262512207, "learning_rate": 0.001, "loss": 2.2477, "step": 63448 }, { "epoch": 5.478260869565218, "grad_norm": 1.2610571384429932, "learning_rate": 0.001, "loss": 2.2487, "step": 63504 }, { "epoch": 5.483091787439614, "grad_norm": 1.2737233638763428, "learning_rate": 0.001, "loss": 2.259, "step": 63560 }, { "epoch": 5.48792270531401, "grad_norm": 6.124037265777588, "learning_rate": 0.001, "loss": 2.2611, "step": 63616 }, { "epoch": 5.492753623188406, "grad_norm": 2.610708236694336, "learning_rate": 0.001, "loss": 2.2604, "step": 63672 }, { "epoch": 5.4975845410628015, "grad_norm": 2.1107146739959717, "learning_rate": 0.001, "loss": 2.2806, "step": 63728 }, { "epoch": 5.5024154589371985, "grad_norm": 1.5488975048065186, "learning_rate": 0.001, "loss": 2.2776, "step": 63784 }, { "epoch": 5.507246376811594, "grad_norm": 1.5342985391616821, "learning_rate": 0.001, "loss": 2.2661, "step": 63840 }, { "epoch": 5.512077294685991, "grad_norm": 1.1326875686645508, "learning_rate": 0.001, "loss": 2.2691, "step": 63896 }, { "epoch": 5.516908212560386, "grad_norm": 1.4246026277542114, "learning_rate": 0.001, "loss": 2.2542, "step": 63952 }, { "epoch": 5.521739130434782, "grad_norm": 2.2407288551330566, "learning_rate": 0.001, "loss": 2.2725, "step": 64008 }, { "epoch": 5.526570048309178, "grad_norm": 1.3345484733581543, "learning_rate": 0.001, "loss": 2.282, "step": 64064 }, { "epoch": 5.531400966183575, "grad_norm": 0.9723467826843262, "learning_rate": 0.001, "loss": 2.2746, "step": 64120 }, { "epoch": 5.536231884057971, "grad_norm": 0.6756110191345215, "learning_rate": 0.001, "loss": 2.2643, "step": 64176 }, { "epoch": 5.541062801932367, "grad_norm": 1.0184459686279297, "learning_rate": 0.001, "loss": 2.2647, "step": 64232 }, { "epoch": 5.545893719806763, "grad_norm": 1.639488935470581, "learning_rate": 0.001, "loss": 2.2621, "step": 64288 }, { "epoch": 5.550724637681159, "grad_norm": 1.3570550680160522, "learning_rate": 0.001, "loss": 2.2619, "step": 64344 }, { "epoch": 5.555555555555555, "grad_norm": 0.7477699518203735, "learning_rate": 0.001, "loss": 2.2644, "step": 64400 }, { "epoch": 5.5603864734299515, "grad_norm": 0.7368486523628235, "learning_rate": 0.001, "loss": 2.2627, "step": 64456 }, { "epoch": 5.565217391304348, "grad_norm": 1.2352867126464844, "learning_rate": 0.001, "loss": 2.2642, "step": 64512 }, { "epoch": 5.570048309178744, "grad_norm": 2.5308146476745605, "learning_rate": 0.001, "loss": 2.2629, "step": 64568 }, { "epoch": 5.57487922705314, "grad_norm": 1.1007472276687622, "learning_rate": 0.001, "loss": 2.2791, "step": 64624 }, { "epoch": 5.579710144927536, "grad_norm": 1.4356293678283691, "learning_rate": 0.001, "loss": 2.2925, "step": 64680 }, { "epoch": 5.584541062801932, "grad_norm": 1.9607384204864502, "learning_rate": 0.001, "loss": 2.2955, "step": 64736 }, { "epoch": 5.5893719806763285, "grad_norm": 1.1231130361557007, "learning_rate": 0.001, "loss": 2.3006, "step": 64792 }, { "epoch": 5.594202898550725, "grad_norm": 1.3609702587127686, "learning_rate": 0.001, "loss": 2.3073, "step": 64848 }, { "epoch": 5.599033816425121, "grad_norm": 1.394912838935852, "learning_rate": 0.001, "loss": 2.3038, "step": 64904 }, { "epoch": 5.603864734299517, "grad_norm": 1.9593900442123413, "learning_rate": 0.001, "loss": 2.2907, "step": 64960 }, { "epoch": 5.608695652173913, "grad_norm": 1.0045404434204102, "learning_rate": 0.001, "loss": 2.294, "step": 65016 }, { "epoch": 5.613526570048309, "grad_norm": 1.468446969985962, "learning_rate": 0.001, "loss": 2.2959, "step": 65072 }, { "epoch": 5.618357487922705, "grad_norm": 0.8016161918640137, "learning_rate": 0.001, "loss": 2.2989, "step": 65128 }, { "epoch": 5.6231884057971016, "grad_norm": 1.8443540334701538, "learning_rate": 0.001, "loss": 2.3027, "step": 65184 }, { "epoch": 5.628019323671498, "grad_norm": 0.7068531513214111, "learning_rate": 0.001, "loss": 2.291, "step": 65240 }, { "epoch": 5.632850241545894, "grad_norm": 0.5824094414710999, "learning_rate": 0.001, "loss": 2.2843, "step": 65296 }, { "epoch": 5.63768115942029, "grad_norm": 1.8601717948913574, "learning_rate": 0.001, "loss": 2.2819, "step": 65352 }, { "epoch": 5.642512077294686, "grad_norm": 0.8813537955284119, "learning_rate": 0.001, "loss": 2.2926, "step": 65408 }, { "epoch": 5.647342995169082, "grad_norm": 1.7260870933532715, "learning_rate": 0.001, "loss": 2.284, "step": 65464 }, { "epoch": 5.6521739130434785, "grad_norm": 1.0956451892852783, "learning_rate": 0.001, "loss": 2.2852, "step": 65520 }, { "epoch": 5.657004830917875, "grad_norm": 17.495576858520508, "learning_rate": 0.001, "loss": 2.2816, "step": 65576 }, { "epoch": 5.661835748792271, "grad_norm": 1.0441806316375732, "learning_rate": 0.001, "loss": 2.2706, "step": 65632 }, { "epoch": 5.666666666666667, "grad_norm": 1.7222089767456055, "learning_rate": 0.001, "loss": 2.2786, "step": 65688 }, { "epoch": 5.671497584541063, "grad_norm": 0.9240567088127136, "learning_rate": 0.001, "loss": 2.2586, "step": 65744 }, { "epoch": 5.676328502415459, "grad_norm": 1.1051487922668457, "learning_rate": 0.001, "loss": 2.2699, "step": 65800 }, { "epoch": 5.681159420289855, "grad_norm": 1.9677304029464722, "learning_rate": 0.001, "loss": 2.2705, "step": 65856 }, { "epoch": 5.685990338164252, "grad_norm": 4.950940132141113, "learning_rate": 0.001, "loss": 2.2722, "step": 65912 }, { "epoch": 5.690821256038648, "grad_norm": 10.856415748596191, "learning_rate": 0.001, "loss": 2.2818, "step": 65968 }, { "epoch": 5.695652173913043, "grad_norm": 1.793298363685608, "learning_rate": 0.001, "loss": 2.2698, "step": 66024 }, { "epoch": 5.70048309178744, "grad_norm": 0.6615787744522095, "learning_rate": 0.001, "loss": 2.2663, "step": 66080 }, { "epoch": 5.705314009661835, "grad_norm": 0.8052457571029663, "learning_rate": 0.001, "loss": 2.275, "step": 66136 }, { "epoch": 5.710144927536232, "grad_norm": 0.7035436034202576, "learning_rate": 0.001, "loss": 2.2749, "step": 66192 }, { "epoch": 5.714975845410628, "grad_norm": 1.8040974140167236, "learning_rate": 0.001, "loss": 2.2671, "step": 66248 }, { "epoch": 5.719806763285024, "grad_norm": 1.2789722681045532, "learning_rate": 0.001, "loss": 2.2826, "step": 66304 }, { "epoch": 5.72463768115942, "grad_norm": 1.8050535917282104, "learning_rate": 0.001, "loss": 2.2792, "step": 66360 }, { "epoch": 5.729468599033816, "grad_norm": 1.0771936178207397, "learning_rate": 0.001, "loss": 2.2736, "step": 66416 }, { "epoch": 5.734299516908212, "grad_norm": 1.2183178663253784, "learning_rate": 0.001, "loss": 2.2668, "step": 66472 }, { "epoch": 5.739130434782608, "grad_norm": 1.0171388387680054, "learning_rate": 0.001, "loss": 2.2857, "step": 66528 }, { "epoch": 5.743961352657005, "grad_norm": 1.5964573621749878, "learning_rate": 0.001, "loss": 2.2629, "step": 66584 }, { "epoch": 5.748792270531401, "grad_norm": 1.9840853214263916, "learning_rate": 0.001, "loss": 2.2657, "step": 66640 }, { "epoch": 5.753623188405797, "grad_norm": 1.0129733085632324, "learning_rate": 0.001, "loss": 2.2687, "step": 66696 }, { "epoch": 5.758454106280193, "grad_norm": 1.0350550413131714, "learning_rate": 0.001, "loss": 2.2665, "step": 66752 }, { "epoch": 5.763285024154589, "grad_norm": 1.2609366178512573, "learning_rate": 0.001, "loss": 2.256, "step": 66808 }, { "epoch": 5.768115942028985, "grad_norm": 0.7500649094581604, "learning_rate": 0.001, "loss": 2.2594, "step": 66864 }, { "epoch": 5.7729468599033815, "grad_norm": 1.179863691329956, "learning_rate": 0.001, "loss": 2.2613, "step": 66920 }, { "epoch": 5.777777777777778, "grad_norm": 0.6870161294937134, "learning_rate": 0.001, "loss": 2.2546, "step": 66976 }, { "epoch": 5.782608695652174, "grad_norm": 2.775158643722534, "learning_rate": 0.001, "loss": 2.2559, "step": 67032 }, { "epoch": 5.78743961352657, "grad_norm": 1.0315377712249756, "learning_rate": 0.001, "loss": 2.2865, "step": 67088 }, { "epoch": 5.792270531400966, "grad_norm": 2.08611798286438, "learning_rate": 0.001, "loss": 2.2739, "step": 67144 }, { "epoch": 5.797101449275362, "grad_norm": 1.2024372816085815, "learning_rate": 0.001, "loss": 2.2702, "step": 67200 }, { "epoch": 5.8019323671497585, "grad_norm": 0.8497179746627808, "learning_rate": 0.001, "loss": 2.2666, "step": 67256 }, { "epoch": 5.806763285024155, "grad_norm": 1.0432077646255493, "learning_rate": 0.001, "loss": 2.2739, "step": 67312 }, { "epoch": 5.811594202898551, "grad_norm": 1.8335925340652466, "learning_rate": 0.001, "loss": 2.275, "step": 67368 }, { "epoch": 5.816425120772947, "grad_norm": 1.273441195487976, "learning_rate": 0.001, "loss": 2.2725, "step": 67424 }, { "epoch": 5.821256038647343, "grad_norm": 1.55553138256073, "learning_rate": 0.001, "loss": 2.2798, "step": 67480 }, { "epoch": 5.826086956521739, "grad_norm": 1.0334409475326538, "learning_rate": 0.001, "loss": 2.2885, "step": 67536 }, { "epoch": 5.830917874396135, "grad_norm": 1.8036469221115112, "learning_rate": 0.001, "loss": 2.2719, "step": 67592 }, { "epoch": 5.835748792270532, "grad_norm": 1.5342423915863037, "learning_rate": 0.001, "loss": 2.2657, "step": 67648 }, { "epoch": 5.840579710144928, "grad_norm": 1.4219903945922852, "learning_rate": 0.001, "loss": 2.2592, "step": 67704 }, { "epoch": 5.845410628019324, "grad_norm": 0.7149852514266968, "learning_rate": 0.001, "loss": 2.2738, "step": 67760 }, { "epoch": 5.85024154589372, "grad_norm": 5.3016676902771, "learning_rate": 0.001, "loss": 2.2678, "step": 67816 }, { "epoch": 5.855072463768116, "grad_norm": 1.096000075340271, "learning_rate": 0.001, "loss": 2.2659, "step": 67872 }, { "epoch": 5.859903381642512, "grad_norm": 0.9542959928512573, "learning_rate": 0.001, "loss": 2.2773, "step": 67928 }, { "epoch": 5.8647342995169085, "grad_norm": 0.5224072337150574, "learning_rate": 0.001, "loss": 2.2673, "step": 67984 }, { "epoch": 5.869565217391305, "grad_norm": 0.7417703866958618, "learning_rate": 0.001, "loss": 2.2751, "step": 68040 }, { "epoch": 5.874396135265701, "grad_norm": 1.5904245376586914, "learning_rate": 0.001, "loss": 2.2715, "step": 68096 }, { "epoch": 5.879227053140097, "grad_norm": 0.864590048789978, "learning_rate": 0.001, "loss": 2.269, "step": 68152 }, { "epoch": 5.884057971014493, "grad_norm": 1.5074886083602905, "learning_rate": 0.001, "loss": 2.2769, "step": 68208 }, { "epoch": 5.888888888888889, "grad_norm": 1.6723995208740234, "learning_rate": 0.001, "loss": 2.2773, "step": 68264 }, { "epoch": 5.8937198067632846, "grad_norm": 1.0331274271011353, "learning_rate": 0.001, "loss": 2.2765, "step": 68320 }, { "epoch": 5.898550724637682, "grad_norm": 1.7588037252426147, "learning_rate": 0.001, "loss": 2.2803, "step": 68376 }, { "epoch": 5.903381642512077, "grad_norm": 4.212721824645996, "learning_rate": 0.001, "loss": 2.2727, "step": 68432 }, { "epoch": 5.908212560386474, "grad_norm": 2.2231578826904297, "learning_rate": 0.001, "loss": 2.2831, "step": 68488 }, { "epoch": 5.913043478260869, "grad_norm": 2.2967207431793213, "learning_rate": 0.001, "loss": 2.2694, "step": 68544 }, { "epoch": 5.917874396135265, "grad_norm": 4.24258279800415, "learning_rate": 0.001, "loss": 2.2678, "step": 68600 }, { "epoch": 5.9227053140096615, "grad_norm": 1.2127195596694946, "learning_rate": 0.001, "loss": 2.2651, "step": 68656 }, { "epoch": 5.927536231884058, "grad_norm": 2.5435125827789307, "learning_rate": 0.001, "loss": 2.2685, "step": 68712 }, { "epoch": 5.932367149758454, "grad_norm": 3.300612688064575, "learning_rate": 0.001, "loss": 2.2539, "step": 68768 }, { "epoch": 5.93719806763285, "grad_norm": 0.6289768815040588, "learning_rate": 0.001, "loss": 2.2597, "step": 68824 }, { "epoch": 5.942028985507246, "grad_norm": 0.7482408881187439, "learning_rate": 0.001, "loss": 2.2725, "step": 68880 }, { "epoch": 5.946859903381642, "grad_norm": 1.8951795101165771, "learning_rate": 0.001, "loss": 2.2587, "step": 68936 }, { "epoch": 5.951690821256038, "grad_norm": 1.0117673873901367, "learning_rate": 0.001, "loss": 2.2665, "step": 68992 }, { "epoch": 5.956521739130435, "grad_norm": 0.553583562374115, "learning_rate": 0.001, "loss": 2.2683, "step": 69048 }, { "epoch": 5.961352657004831, "grad_norm": 2.6252622604370117, "learning_rate": 0.001, "loss": 2.2709, "step": 69104 }, { "epoch": 5.966183574879227, "grad_norm": 3.1359682083129883, "learning_rate": 0.001, "loss": 2.2571, "step": 69160 }, { "epoch": 5.971014492753623, "grad_norm": 0.9322596192359924, "learning_rate": 0.001, "loss": 2.2537, "step": 69216 }, { "epoch": 5.975845410628019, "grad_norm": 0.7134868502616882, "learning_rate": 0.001, "loss": 2.2535, "step": 69272 }, { "epoch": 5.980676328502415, "grad_norm": 0.679645836353302, "learning_rate": 0.001, "loss": 2.2545, "step": 69328 }, { "epoch": 5.9855072463768115, "grad_norm": 1.235795259475708, "learning_rate": 0.001, "loss": 2.2646, "step": 69384 }, { "epoch": 5.990338164251208, "grad_norm": 0.5267893075942993, "learning_rate": 0.001, "loss": 2.2545, "step": 69440 }, { "epoch": 5.995169082125604, "grad_norm": 0.6231503486633301, "learning_rate": 0.001, "loss": 2.2529, "step": 69496 }, { "epoch": 6.0, "grad_norm": 3.025648593902588, "learning_rate": 0.001, "loss": 2.2599, "step": 69552 }, { "epoch": 6.004830917874396, "grad_norm": 1.4466320276260376, "learning_rate": 0.001, "loss": 2.2099, "step": 69608 }, { "epoch": 6.009661835748792, "grad_norm": 0.5512242317199707, "learning_rate": 0.001, "loss": 2.2267, "step": 69664 }, { "epoch": 6.0144927536231885, "grad_norm": 1.1341667175292969, "learning_rate": 0.001, "loss": 2.224, "step": 69720 }, { "epoch": 6.019323671497585, "grad_norm": 1.3873621225357056, "learning_rate": 0.001, "loss": 2.2403, "step": 69776 }, { "epoch": 6.024154589371981, "grad_norm": 0.9209643006324768, "learning_rate": 0.001, "loss": 2.2248, "step": 69832 }, { "epoch": 6.028985507246377, "grad_norm": 0.7108995318412781, "learning_rate": 0.001, "loss": 2.2273, "step": 69888 }, { "epoch": 6.033816425120773, "grad_norm": 1.4621247053146362, "learning_rate": 0.001, "loss": 2.2126, "step": 69944 }, { "epoch": 6.038647342995169, "grad_norm": 1.5459744930267334, "learning_rate": 0.001, "loss": 2.2137, "step": 70000 }, { "epoch": 6.043478260869565, "grad_norm": 1.1624717712402344, "learning_rate": 0.001, "loss": 2.2296, "step": 70056 }, { "epoch": 6.048309178743962, "grad_norm": 0.7049641609191895, "learning_rate": 0.001, "loss": 2.2292, "step": 70112 }, { "epoch": 6.053140096618358, "grad_norm": 1.0620265007019043, "learning_rate": 0.001, "loss": 2.2118, "step": 70168 }, { "epoch": 6.057971014492754, "grad_norm": 1.0865387916564941, "learning_rate": 0.001, "loss": 2.2103, "step": 70224 }, { "epoch": 6.06280193236715, "grad_norm": 0.7274052500724792, "learning_rate": 0.001, "loss": 2.2221, "step": 70280 }, { "epoch": 6.067632850241546, "grad_norm": 2.709317684173584, "learning_rate": 0.001, "loss": 2.2219, "step": 70336 }, { "epoch": 6.072463768115942, "grad_norm": 0.8704111576080322, "learning_rate": 0.001, "loss": 2.2385, "step": 70392 }, { "epoch": 6.0772946859903385, "grad_norm": 1.0704371929168701, "learning_rate": 0.001, "loss": 2.2447, "step": 70448 }, { "epoch": 6.082125603864735, "grad_norm": 1.4001491069793701, "learning_rate": 0.001, "loss": 2.2549, "step": 70504 }, { "epoch": 6.086956521739131, "grad_norm": 1.9169684648513794, "learning_rate": 0.001, "loss": 2.2501, "step": 70560 }, { "epoch": 6.091787439613527, "grad_norm": 3.661534309387207, "learning_rate": 0.001, "loss": 2.2273, "step": 70616 }, { "epoch": 6.096618357487923, "grad_norm": 0.7327979207038879, "learning_rate": 0.001, "loss": 2.216, "step": 70672 }, { "epoch": 6.101449275362318, "grad_norm": 1.284769058227539, "learning_rate": 0.001, "loss": 2.2361, "step": 70728 }, { "epoch": 6.106280193236715, "grad_norm": 1.4347878694534302, "learning_rate": 0.001, "loss": 2.2399, "step": 70784 }, { "epoch": 6.111111111111111, "grad_norm": 0.9479652643203735, "learning_rate": 0.001, "loss": 2.2569, "step": 70840 }, { "epoch": 6.115942028985507, "grad_norm": 1.351218342781067, "learning_rate": 0.001, "loss": 2.2354, "step": 70896 }, { "epoch": 6.120772946859903, "grad_norm": 2.2644903659820557, "learning_rate": 0.001, "loss": 2.2392, "step": 70952 }, { "epoch": 6.125603864734299, "grad_norm": 4.74054479598999, "learning_rate": 0.001, "loss": 2.2352, "step": 71008 }, { "epoch": 6.130434782608695, "grad_norm": 0.6032834649085999, "learning_rate": 0.001, "loss": 2.2393, "step": 71064 }, { "epoch": 6.1352657004830915, "grad_norm": 1.5596091747283936, "learning_rate": 0.001, "loss": 2.2268, "step": 71120 }, { "epoch": 6.140096618357488, "grad_norm": 2.4547011852264404, "learning_rate": 0.001, "loss": 2.238, "step": 71176 }, { "epoch": 6.144927536231884, "grad_norm": 1.2931201457977295, "learning_rate": 0.001, "loss": 2.2611, "step": 71232 }, { "epoch": 6.14975845410628, "grad_norm": 3.7375707626342773, "learning_rate": 0.001, "loss": 2.2591, "step": 71288 }, { "epoch": 6.154589371980676, "grad_norm": 0.9994610548019409, "learning_rate": 0.001, "loss": 2.2564, "step": 71344 }, { "epoch": 6.159420289855072, "grad_norm": 1.5988612174987793, "learning_rate": 0.001, "loss": 2.2535, "step": 71400 }, { "epoch": 6.164251207729468, "grad_norm": 2.0000829696655273, "learning_rate": 0.001, "loss": 2.244, "step": 71456 }, { "epoch": 6.169082125603865, "grad_norm": 1.9968081712722778, "learning_rate": 0.001, "loss": 2.2332, "step": 71512 }, { "epoch": 6.173913043478261, "grad_norm": 0.7419748306274414, "learning_rate": 0.001, "loss": 2.2375, "step": 71568 }, { "epoch": 6.178743961352657, "grad_norm": 3.2289023399353027, "learning_rate": 0.001, "loss": 2.2352, "step": 71624 }, { "epoch": 6.183574879227053, "grad_norm": 1.1539735794067383, "learning_rate": 0.001, "loss": 2.2321, "step": 71680 }, { "epoch": 6.188405797101449, "grad_norm": 2.0009260177612305, "learning_rate": 0.001, "loss": 2.2345, "step": 71736 }, { "epoch": 6.193236714975845, "grad_norm": 1.4143390655517578, "learning_rate": 0.001, "loss": 2.2513, "step": 71792 }, { "epoch": 6.1980676328502415, "grad_norm": 0.821320116519928, "learning_rate": 0.001, "loss": 2.244, "step": 71848 }, { "epoch": 6.202898550724638, "grad_norm": 1.3411365747451782, "learning_rate": 0.001, "loss": 2.2337, "step": 71904 }, { "epoch": 6.207729468599034, "grad_norm": 0.9753725528717041, "learning_rate": 0.001, "loss": 2.2215, "step": 71960 }, { "epoch": 6.21256038647343, "grad_norm": 1.8683110475540161, "learning_rate": 0.001, "loss": 2.2348, "step": 72016 }, { "epoch": 6.217391304347826, "grad_norm": 2.7504191398620605, "learning_rate": 0.001, "loss": 2.2432, "step": 72072 }, { "epoch": 6.222222222222222, "grad_norm": 1.5068525075912476, "learning_rate": 0.001, "loss": 2.2317, "step": 72128 }, { "epoch": 6.2270531400966185, "grad_norm": 2.5044116973876953, "learning_rate": 0.001, "loss": 2.2489, "step": 72184 }, { "epoch": 6.231884057971015, "grad_norm": 2.02871036529541, "learning_rate": 0.001, "loss": 2.2476, "step": 72240 }, { "epoch": 6.236714975845411, "grad_norm": 3.200634002685547, "learning_rate": 0.001, "loss": 2.245, "step": 72296 }, { "epoch": 6.241545893719807, "grad_norm": 1.611269474029541, "learning_rate": 0.001, "loss": 2.2301, "step": 72352 }, { "epoch": 6.246376811594203, "grad_norm": 0.9519205093383789, "learning_rate": 0.001, "loss": 2.2352, "step": 72408 }, { "epoch": 6.251207729468599, "grad_norm": 0.6087763905525208, "learning_rate": 0.001, "loss": 2.2226, "step": 72464 }, { "epoch": 6.256038647342995, "grad_norm": 1.847956895828247, "learning_rate": 0.001, "loss": 2.2218, "step": 72520 }, { "epoch": 6.260869565217392, "grad_norm": 1.410396933555603, "learning_rate": 0.001, "loss": 2.2229, "step": 72576 }, { "epoch": 6.265700483091788, "grad_norm": 0.6673827767372131, "learning_rate": 0.001, "loss": 2.2204, "step": 72632 }, { "epoch": 6.270531400966184, "grad_norm": 0.9775403141975403, "learning_rate": 0.001, "loss": 2.2245, "step": 72688 }, { "epoch": 6.27536231884058, "grad_norm": 1.1978809833526611, "learning_rate": 0.001, "loss": 2.2278, "step": 72744 }, { "epoch": 6.280193236714976, "grad_norm": 1.3326667547225952, "learning_rate": 0.001, "loss": 2.228, "step": 72800 }, { "epoch": 6.285024154589372, "grad_norm": 2.3851635456085205, "learning_rate": 0.001, "loss": 2.2301, "step": 72856 }, { "epoch": 6.2898550724637685, "grad_norm": 1.7464731931686401, "learning_rate": 0.001, "loss": 2.2262, "step": 72912 }, { "epoch": 6.294685990338165, "grad_norm": 0.8745136857032776, "learning_rate": 0.001, "loss": 2.2318, "step": 72968 }, { "epoch": 6.29951690821256, "grad_norm": 1.8794758319854736, "learning_rate": 0.001, "loss": 2.2347, "step": 73024 }, { "epoch": 6.304347826086957, "grad_norm": 1.7814747095108032, "learning_rate": 0.001, "loss": 2.2199, "step": 73080 }, { "epoch": 6.309178743961352, "grad_norm": 1.2474782466888428, "learning_rate": 0.001, "loss": 2.2245, "step": 73136 }, { "epoch": 6.314009661835748, "grad_norm": 1.2666617631912231, "learning_rate": 0.001, "loss": 2.2356, "step": 73192 }, { "epoch": 6.318840579710145, "grad_norm": 1.0419021844863892, "learning_rate": 0.001, "loss": 2.2403, "step": 73248 }, { "epoch": 6.323671497584541, "grad_norm": 0.806438148021698, "learning_rate": 0.001, "loss": 2.237, "step": 73304 }, { "epoch": 6.328502415458937, "grad_norm": 7.231851577758789, "learning_rate": 0.001, "loss": 2.22, "step": 73360 }, { "epoch": 6.333333333333333, "grad_norm": 0.8140851855278015, "learning_rate": 0.001, "loss": 2.2358, "step": 73416 }, { "epoch": 6.338164251207729, "grad_norm": 1.1485127210617065, "learning_rate": 0.001, "loss": 2.2299, "step": 73472 }, { "epoch": 6.342995169082125, "grad_norm": 1.318444013595581, "learning_rate": 0.001, "loss": 2.2175, "step": 73528 }, { "epoch": 6.3478260869565215, "grad_norm": 1.9077707529067993, "learning_rate": 0.001, "loss": 2.2283, "step": 73584 }, { "epoch": 6.352657004830918, "grad_norm": 2.7870867252349854, "learning_rate": 0.001, "loss": 2.2295, "step": 73640 }, { "epoch": 6.357487922705314, "grad_norm": 0.8493619561195374, "learning_rate": 0.001, "loss": 2.2435, "step": 73696 }, { "epoch": 6.36231884057971, "grad_norm": 0.8781208992004395, "learning_rate": 0.001, "loss": 2.2417, "step": 73752 }, { "epoch": 6.367149758454106, "grad_norm": 1.2606106996536255, "learning_rate": 0.001, "loss": 2.2398, "step": 73808 }, { "epoch": 6.371980676328502, "grad_norm": 0.990530252456665, "learning_rate": 0.001, "loss": 2.2377, "step": 73864 }, { "epoch": 6.3768115942028984, "grad_norm": 1.307255744934082, "learning_rate": 0.001, "loss": 2.2466, "step": 73920 }, { "epoch": 6.381642512077295, "grad_norm": 1.8456193208694458, "learning_rate": 0.001, "loss": 2.2656, "step": 73976 }, { "epoch": 6.386473429951691, "grad_norm": 1.7953139543533325, "learning_rate": 0.001, "loss": 2.2413, "step": 74032 }, { "epoch": 6.391304347826087, "grad_norm": 1.7777838706970215, "learning_rate": 0.001, "loss": 2.2571, "step": 74088 }, { "epoch": 6.396135265700483, "grad_norm": 2.4233973026275635, "learning_rate": 0.001, "loss": 2.2799, "step": 74144 }, { "epoch": 6.400966183574879, "grad_norm": 0.886583149433136, "learning_rate": 0.001, "loss": 2.2642, "step": 74200 }, { "epoch": 6.405797101449275, "grad_norm": 1.8131858110427856, "learning_rate": 0.001, "loss": 2.2558, "step": 74256 }, { "epoch": 6.4106280193236715, "grad_norm": 1.290224313735962, "learning_rate": 0.001, "loss": 2.2607, "step": 74312 }, { "epoch": 6.415458937198068, "grad_norm": 1.22830069065094, "learning_rate": 0.001, "loss": 2.2563, "step": 74368 }, { "epoch": 6.420289855072464, "grad_norm": 1.52664315700531, "learning_rate": 0.001, "loss": 2.2447, "step": 74424 }, { "epoch": 6.42512077294686, "grad_norm": 0.9392516016960144, "learning_rate": 0.001, "loss": 2.2345, "step": 74480 }, { "epoch": 6.429951690821256, "grad_norm": 1.6208679676055908, "learning_rate": 0.001, "loss": 2.2428, "step": 74536 }, { "epoch": 6.434782608695652, "grad_norm": 2.518866539001465, "learning_rate": 0.001, "loss": 2.2451, "step": 74592 }, { "epoch": 6.4396135265700485, "grad_norm": 1.559274673461914, "learning_rate": 0.001, "loss": 2.2569, "step": 74648 }, { "epoch": 6.444444444444445, "grad_norm": 1.8809869289398193, "learning_rate": 0.001, "loss": 2.2337, "step": 74704 }, { "epoch": 6.449275362318841, "grad_norm": 0.9125791192054749, "learning_rate": 0.001, "loss": 2.2445, "step": 74760 }, { "epoch": 6.454106280193237, "grad_norm": 1.2871743440628052, "learning_rate": 0.001, "loss": 2.2389, "step": 74816 }, { "epoch": 6.458937198067633, "grad_norm": 1.753193974494934, "learning_rate": 0.001, "loss": 2.2217, "step": 74872 }, { "epoch": 6.463768115942029, "grad_norm": 1.5740556716918945, "learning_rate": 0.001, "loss": 2.2409, "step": 74928 }, { "epoch": 6.468599033816425, "grad_norm": 0.9776777029037476, "learning_rate": 0.001, "loss": 2.2501, "step": 74984 }, { "epoch": 6.473429951690822, "grad_norm": 2.7576019763946533, "learning_rate": 0.001, "loss": 2.2347, "step": 75040 }, { "epoch": 6.478260869565218, "grad_norm": 1.692238450050354, "learning_rate": 0.001, "loss": 2.2413, "step": 75096 }, { "epoch": 6.483091787439614, "grad_norm": 2.3111934661865234, "learning_rate": 0.001, "loss": 2.25, "step": 75152 }, { "epoch": 6.48792270531401, "grad_norm": 1.3569540977478027, "learning_rate": 0.001, "loss": 2.246, "step": 75208 }, { "epoch": 6.492753623188406, "grad_norm": 1.2361676692962646, "learning_rate": 0.001, "loss": 2.2471, "step": 75264 }, { "epoch": 6.4975845410628015, "grad_norm": 1.815401554107666, "learning_rate": 0.001, "loss": 2.2403, "step": 75320 }, { "epoch": 6.5024154589371985, "grad_norm": 0.7901356816291809, "learning_rate": 0.001, "loss": 2.2515, "step": 75376 }, { "epoch": 6.507246376811594, "grad_norm": 1.708166480064392, "learning_rate": 0.001, "loss": 2.2431, "step": 75432 }, { "epoch": 6.512077294685991, "grad_norm": 1.9460251331329346, "learning_rate": 0.001, "loss": 2.2422, "step": 75488 }, { "epoch": 6.516908212560386, "grad_norm": 1.216374158859253, "learning_rate": 0.001, "loss": 2.2542, "step": 75544 }, { "epoch": 6.521739130434782, "grad_norm": 1.1076815128326416, "learning_rate": 0.001, "loss": 2.2578, "step": 75600 }, { "epoch": 6.526570048309178, "grad_norm": 0.6794477701187134, "learning_rate": 0.001, "loss": 2.2626, "step": 75656 }, { "epoch": 6.531400966183575, "grad_norm": 0.8402165770530701, "learning_rate": 0.001, "loss": 2.2426, "step": 75712 }, { "epoch": 6.536231884057971, "grad_norm": 1.274253249168396, "learning_rate": 0.001, "loss": 2.2507, "step": 75768 }, { "epoch": 6.541062801932367, "grad_norm": 1.4902313947677612, "learning_rate": 0.001, "loss": 2.2423, "step": 75824 }, { "epoch": 6.545893719806763, "grad_norm": 4.480154037475586, "learning_rate": 0.001, "loss": 2.2486, "step": 75880 }, { "epoch": 6.550724637681159, "grad_norm": 1.0384182929992676, "learning_rate": 0.001, "loss": 2.2435, "step": 75936 }, { "epoch": 6.555555555555555, "grad_norm": 13.618975639343262, "learning_rate": 0.001, "loss": 2.2412, "step": 75992 }, { "epoch": 6.5603864734299515, "grad_norm": 3.3809757232666016, "learning_rate": 0.001, "loss": 2.2482, "step": 76048 }, { "epoch": 6.565217391304348, "grad_norm": 2.76737117767334, "learning_rate": 0.001, "loss": 2.2448, "step": 76104 }, { "epoch": 6.570048309178744, "grad_norm": 2.8874478340148926, "learning_rate": 0.001, "loss": 2.2387, "step": 76160 }, { "epoch": 6.57487922705314, "grad_norm": 3.6475632190704346, "learning_rate": 0.001, "loss": 2.241, "step": 76216 }, { "epoch": 6.579710144927536, "grad_norm": 1.2049617767333984, "learning_rate": 0.001, "loss": 2.2542, "step": 76272 }, { "epoch": 6.584541062801932, "grad_norm": 0.9089396595954895, "learning_rate": 0.001, "loss": 2.2429, "step": 76328 }, { "epoch": 6.5893719806763285, "grad_norm": 0.6770321726799011, "learning_rate": 0.001, "loss": 2.2398, "step": 76384 }, { "epoch": 6.594202898550725, "grad_norm": 1.1739739179611206, "learning_rate": 0.001, "loss": 2.2395, "step": 76440 }, { "epoch": 6.599033816425121, "grad_norm": 1.048844337463379, "learning_rate": 0.001, "loss": 2.2346, "step": 76496 }, { "epoch": 6.603864734299517, "grad_norm": 1.7137600183486938, "learning_rate": 0.001, "loss": 2.2425, "step": 76552 }, { "epoch": 6.608695652173913, "grad_norm": 1.755640983581543, "learning_rate": 0.001, "loss": 2.2484, "step": 76608 }, { "epoch": 6.613526570048309, "grad_norm": 1.1844213008880615, "learning_rate": 0.001, "loss": 2.274, "step": 76664 }, { "epoch": 6.618357487922705, "grad_norm": 1.3580411672592163, "learning_rate": 0.001, "loss": 2.2766, "step": 76720 }, { "epoch": 6.6231884057971016, "grad_norm": 1.012831211090088, "learning_rate": 0.001, "loss": 2.2566, "step": 76776 }, { "epoch": 6.628019323671498, "grad_norm": 0.9028856754302979, "learning_rate": 0.001, "loss": 2.2507, "step": 76832 }, { "epoch": 6.632850241545894, "grad_norm": 5.565920829772949, "learning_rate": 0.001, "loss": 2.2461, "step": 76888 }, { "epoch": 6.63768115942029, "grad_norm": 2.8750646114349365, "learning_rate": 0.001, "loss": 2.2417, "step": 76944 }, { "epoch": 6.642512077294686, "grad_norm": 0.9302629232406616, "learning_rate": 0.001, "loss": 2.2519, "step": 77000 }, { "epoch": 6.647342995169082, "grad_norm": 1.9370646476745605, "learning_rate": 0.001, "loss": 2.2519, "step": 77056 }, { "epoch": 6.6521739130434785, "grad_norm": 3.0804717540740967, "learning_rate": 0.001, "loss": 2.2349, "step": 77112 }, { "epoch": 6.657004830917875, "grad_norm": 2.720844030380249, "learning_rate": 0.001, "loss": 2.2414, "step": 77168 }, { "epoch": 6.661835748792271, "grad_norm": 0.8230652809143066, "learning_rate": 0.001, "loss": 2.2371, "step": 77224 }, { "epoch": 6.666666666666667, "grad_norm": 1.7672905921936035, "learning_rate": 0.001, "loss": 2.2415, "step": 77280 }, { "epoch": 6.671497584541063, "grad_norm": 3.14939022064209, "learning_rate": 0.001, "loss": 2.2349, "step": 77336 }, { "epoch": 6.676328502415459, "grad_norm": 1.050691843032837, "learning_rate": 0.001, "loss": 2.2318, "step": 77392 }, { "epoch": 6.681159420289855, "grad_norm": 1.1251823902130127, "learning_rate": 0.001, "loss": 2.2357, "step": 77448 }, { "epoch": 6.685990338164252, "grad_norm": 1.0894018411636353, "learning_rate": 0.001, "loss": 2.2333, "step": 77504 }, { "epoch": 6.690821256038648, "grad_norm": 1.1541368961334229, "learning_rate": 0.001, "loss": 2.221, "step": 77560 }, { "epoch": 6.695652173913043, "grad_norm": 1.9985852241516113, "learning_rate": 0.001, "loss": 2.2278, "step": 77616 }, { "epoch": 6.70048309178744, "grad_norm": 1.553889274597168, "learning_rate": 0.001, "loss": 2.2184, "step": 77672 }, { "epoch": 6.705314009661835, "grad_norm": 0.6037139296531677, "learning_rate": 0.001, "loss": 2.2242, "step": 77728 }, { "epoch": 6.710144927536232, "grad_norm": 0.6900601983070374, "learning_rate": 0.001, "loss": 2.2171, "step": 77784 }, { "epoch": 6.714975845410628, "grad_norm": 1.2441445589065552, "learning_rate": 0.001, "loss": 2.2232, "step": 77840 }, { "epoch": 6.719806763285024, "grad_norm": 1.2396204471588135, "learning_rate": 0.001, "loss": 2.2227, "step": 77896 }, { "epoch": 6.72463768115942, "grad_norm": 3.7759761810302734, "learning_rate": 0.001, "loss": 2.2273, "step": 77952 }, { "epoch": 6.729468599033816, "grad_norm": 1.964001178741455, "learning_rate": 0.001, "loss": 2.2272, "step": 78008 }, { "epoch": 6.734299516908212, "grad_norm": 0.6876692175865173, "learning_rate": 0.001, "loss": 2.2338, "step": 78064 }, { "epoch": 6.739130434782608, "grad_norm": 1.4361850023269653, "learning_rate": 0.001, "loss": 2.2463, "step": 78120 }, { "epoch": 6.743961352657005, "grad_norm": 2.5210978984832764, "learning_rate": 0.001, "loss": 2.2572, "step": 78176 }, { "epoch": 6.748792270531401, "grad_norm": 1.664169430732727, "learning_rate": 0.001, "loss": 2.2696, "step": 78232 }, { "epoch": 6.753623188405797, "grad_norm": 3.230752468109131, "learning_rate": 0.001, "loss": 2.2431, "step": 78288 }, { "epoch": 6.758454106280193, "grad_norm": 1.7360711097717285, "learning_rate": 0.001, "loss": 2.2514, "step": 78344 }, { "epoch": 6.763285024154589, "grad_norm": 0.8954463005065918, "learning_rate": 0.001, "loss": 2.2382, "step": 78400 }, { "epoch": 6.768115942028985, "grad_norm": 1.1951336860656738, "learning_rate": 0.001, "loss": 2.2545, "step": 78456 }, { "epoch": 6.7729468599033815, "grad_norm": 1.5610833168029785, "learning_rate": 0.001, "loss": 2.2428, "step": 78512 }, { "epoch": 6.777777777777778, "grad_norm": 0.7657495737075806, "learning_rate": 0.001, "loss": 2.2534, "step": 78568 }, { "epoch": 6.782608695652174, "grad_norm": 1.01096510887146, "learning_rate": 0.001, "loss": 2.257, "step": 78624 }, { "epoch": 6.78743961352657, "grad_norm": 0.55475914478302, "learning_rate": 0.001, "loss": 2.2394, "step": 78680 }, { "epoch": 6.792270531400966, "grad_norm": 2.5494346618652344, "learning_rate": 0.001, "loss": 2.2471, "step": 78736 }, { "epoch": 6.797101449275362, "grad_norm": 0.659465491771698, "learning_rate": 0.001, "loss": 2.2477, "step": 78792 }, { "epoch": 6.8019323671497585, "grad_norm": 0.8349037766456604, "learning_rate": 0.001, "loss": 2.2452, "step": 78848 }, { "epoch": 6.806763285024155, "grad_norm": 7.057497978210449, "learning_rate": 0.001, "loss": 2.2435, "step": 78904 }, { "epoch": 6.811594202898551, "grad_norm": 3.615966558456421, "learning_rate": 0.001, "loss": 2.2336, "step": 78960 }, { "epoch": 6.816425120772947, "grad_norm": 0.8992481231689453, "learning_rate": 0.001, "loss": 2.2412, "step": 79016 }, { "epoch": 6.821256038647343, "grad_norm": 4.003536701202393, "learning_rate": 0.001, "loss": 2.2411, "step": 79072 }, { "epoch": 6.826086956521739, "grad_norm": 1.813976764678955, "learning_rate": 0.001, "loss": 2.2483, "step": 79128 }, { "epoch": 6.830917874396135, "grad_norm": 1.2164362668991089, "learning_rate": 0.001, "loss": 2.262, "step": 79184 }, { "epoch": 6.835748792270532, "grad_norm": 2.0430679321289062, "learning_rate": 0.001, "loss": 2.2718, "step": 79240 }, { "epoch": 6.840579710144928, "grad_norm": 0.9740785956382751, "learning_rate": 0.001, "loss": 2.2625, "step": 79296 }, { "epoch": 6.845410628019324, "grad_norm": 1.4133433103561401, "learning_rate": 0.001, "loss": 2.2542, "step": 79352 }, { "epoch": 6.85024154589372, "grad_norm": 0.9220939874649048, "learning_rate": 0.001, "loss": 2.2553, "step": 79408 }, { "epoch": 6.855072463768116, "grad_norm": 1.1142477989196777, "learning_rate": 0.001, "loss": 2.2389, "step": 79464 }, { "epoch": 6.859903381642512, "grad_norm": 0.8727350831031799, "learning_rate": 0.001, "loss": 2.235, "step": 79520 }, { "epoch": 6.8647342995169085, "grad_norm": 1.6178958415985107, "learning_rate": 0.001, "loss": 2.2438, "step": 79576 }, { "epoch": 6.869565217391305, "grad_norm": 1.7619056701660156, "learning_rate": 0.001, "loss": 2.2479, "step": 79632 }, { "epoch": 6.874396135265701, "grad_norm": 1.4165889024734497, "learning_rate": 0.001, "loss": 2.2468, "step": 79688 }, { "epoch": 6.879227053140097, "grad_norm": 4.498415470123291, "learning_rate": 0.001, "loss": 2.232, "step": 79744 }, { "epoch": 6.884057971014493, "grad_norm": 1.6327322721481323, "learning_rate": 0.001, "loss": 2.2396, "step": 79800 }, { "epoch": 6.888888888888889, "grad_norm": 2.4959449768066406, "learning_rate": 0.001, "loss": 2.245, "step": 79856 }, { "epoch": 6.8937198067632846, "grad_norm": 1.4688910245895386, "learning_rate": 0.001, "loss": 2.264, "step": 79912 }, { "epoch": 6.898550724637682, "grad_norm": 0.7763709425926208, "learning_rate": 0.001, "loss": 2.2553, "step": 79968 }, { "epoch": 6.903381642512077, "grad_norm": 1.0467098951339722, "learning_rate": 0.001, "loss": 2.2452, "step": 80024 }, { "epoch": 6.908212560386474, "grad_norm": 0.8108733892440796, "learning_rate": 0.001, "loss": 2.2377, "step": 80080 }, { "epoch": 6.913043478260869, "grad_norm": 1.369469404220581, "learning_rate": 0.001, "loss": 2.2315, "step": 80136 }, { "epoch": 6.917874396135265, "grad_norm": 0.9366617202758789, "learning_rate": 0.001, "loss": 2.2349, "step": 80192 }, { "epoch": 6.9227053140096615, "grad_norm": 1.6140553951263428, "learning_rate": 0.001, "loss": 2.2507, "step": 80248 }, { "epoch": 6.927536231884058, "grad_norm": 2.0513768196105957, "learning_rate": 0.001, "loss": 2.2569, "step": 80304 }, { "epoch": 6.932367149758454, "grad_norm": 1.9008907079696655, "learning_rate": 0.001, "loss": 2.2514, "step": 80360 }, { "epoch": 6.93719806763285, "grad_norm": 1.3423054218292236, "learning_rate": 0.001, "loss": 2.2426, "step": 80416 }, { "epoch": 6.942028985507246, "grad_norm": 1.5680043697357178, "learning_rate": 0.001, "loss": 2.2475, "step": 80472 }, { "epoch": 6.946859903381642, "grad_norm": 1.1729775667190552, "learning_rate": 0.001, "loss": 2.2409, "step": 80528 }, { "epoch": 6.951690821256038, "grad_norm": 1.0784313678741455, "learning_rate": 0.001, "loss": 2.2388, "step": 80584 }, { "epoch": 6.956521739130435, "grad_norm": 1.2925214767456055, "learning_rate": 0.001, "loss": 2.2514, "step": 80640 }, { "epoch": 6.961352657004831, "grad_norm": 13.722448348999023, "learning_rate": 0.001, "loss": 2.2625, "step": 80696 }, { "epoch": 6.966183574879227, "grad_norm": 1.8979257345199585, "learning_rate": 0.001, "loss": 2.264, "step": 80752 }, { "epoch": 6.971014492753623, "grad_norm": 0.9370417594909668, "learning_rate": 0.001, "loss": 2.2566, "step": 80808 }, { "epoch": 6.975845410628019, "grad_norm": 0.6652025580406189, "learning_rate": 0.001, "loss": 2.2436, "step": 80864 }, { "epoch": 6.980676328502415, "grad_norm": 0.8809888958930969, "learning_rate": 0.001, "loss": 2.2367, "step": 80920 }, { "epoch": 6.9855072463768115, "grad_norm": 1.762130618095398, "learning_rate": 0.001, "loss": 2.2375, "step": 80976 }, { "epoch": 6.990338164251208, "grad_norm": 1.014733910560608, "learning_rate": 0.001, "loss": 2.2391, "step": 81032 }, { "epoch": 6.995169082125604, "grad_norm": 1.2004274129867554, "learning_rate": 0.001, "loss": 2.2385, "step": 81088 }, { "epoch": 7.0, "grad_norm": 1.8822312355041504, "learning_rate": 0.001, "loss": 2.2413, "step": 81144 }, { "epoch": 7.004830917874396, "grad_norm": 1.796080231666565, "learning_rate": 0.001, "loss": 2.2041, "step": 81200 }, { "epoch": 7.009661835748792, "grad_norm": 1.3070597648620605, "learning_rate": 0.001, "loss": 2.2204, "step": 81256 }, { "epoch": 7.0144927536231885, "grad_norm": 1.2614771127700806, "learning_rate": 0.001, "loss": 2.2149, "step": 81312 }, { "epoch": 7.019323671497585, "grad_norm": 1.524032711982727, "learning_rate": 0.001, "loss": 2.2134, "step": 81368 }, { "epoch": 7.024154589371981, "grad_norm": 3.0564124584198, "learning_rate": 0.001, "loss": 2.2144, "step": 81424 }, { "epoch": 7.028985507246377, "grad_norm": 2.0705578327178955, "learning_rate": 0.001, "loss": 2.2052, "step": 81480 }, { "epoch": 7.033816425120773, "grad_norm": 1.733300805091858, "learning_rate": 0.001, "loss": 2.2065, "step": 81536 }, { "epoch": 7.038647342995169, "grad_norm": 1.6060824394226074, "learning_rate": 0.001, "loss": 2.2169, "step": 81592 }, { "epoch": 7.043478260869565, "grad_norm": 1.5984892845153809, "learning_rate": 0.001, "loss": 2.2119, "step": 81648 }, { "epoch": 7.048309178743962, "grad_norm": 1.4020204544067383, "learning_rate": 0.001, "loss": 2.2215, "step": 81704 }, { "epoch": 7.053140096618358, "grad_norm": 1.1294715404510498, "learning_rate": 0.001, "loss": 2.2039, "step": 81760 }, { "epoch": 7.057971014492754, "grad_norm": 0.8854770660400391, "learning_rate": 0.001, "loss": 2.2105, "step": 81816 }, { "epoch": 7.06280193236715, "grad_norm": 0.8262887597084045, "learning_rate": 0.001, "loss": 2.2215, "step": 81872 }, { "epoch": 7.067632850241546, "grad_norm": 2.3546652793884277, "learning_rate": 0.001, "loss": 2.2215, "step": 81928 }, { "epoch": 7.072463768115942, "grad_norm": 0.6846729516983032, "learning_rate": 0.001, "loss": 2.2077, "step": 81984 }, { "epoch": 7.0772946859903385, "grad_norm": 0.5966305136680603, "learning_rate": 0.001, "loss": 2.1964, "step": 82040 }, { "epoch": 7.082125603864735, "grad_norm": 1.664222002029419, "learning_rate": 0.001, "loss": 2.206, "step": 82096 }, { "epoch": 7.086956521739131, "grad_norm": 1.0919033288955688, "learning_rate": 0.001, "loss": 2.2054, "step": 82152 }, { "epoch": 7.091787439613527, "grad_norm": 1.8295831680297852, "learning_rate": 0.001, "loss": 2.2127, "step": 82208 }, { "epoch": 7.096618357487923, "grad_norm": 2.2516045570373535, "learning_rate": 0.001, "loss": 2.2337, "step": 82264 }, { "epoch": 7.101449275362318, "grad_norm": 5.178409576416016, "learning_rate": 0.001, "loss": 2.2471, "step": 82320 }, { "epoch": 7.106280193236715, "grad_norm": 0.7064712643623352, "learning_rate": 0.001, "loss": 2.2287, "step": 82376 }, { "epoch": 7.111111111111111, "grad_norm": 3.479949474334717, "learning_rate": 0.001, "loss": 2.2156, "step": 82432 }, { "epoch": 7.115942028985507, "grad_norm": 0.8123447299003601, "learning_rate": 0.001, "loss": 2.2151, "step": 82488 }, { "epoch": 7.120772946859903, "grad_norm": 2.58071231842041, "learning_rate": 0.001, "loss": 2.207, "step": 82544 }, { "epoch": 7.125603864734299, "grad_norm": 1.3098042011260986, "learning_rate": 0.001, "loss": 2.2162, "step": 82600 }, { "epoch": 7.130434782608695, "grad_norm": 1.6242272853851318, "learning_rate": 0.001, "loss": 2.2239, "step": 82656 }, { "epoch": 7.1352657004830915, "grad_norm": 3.981215476989746, "learning_rate": 0.001, "loss": 2.2344, "step": 82712 }, { "epoch": 7.140096618357488, "grad_norm": 0.9496408700942993, "learning_rate": 0.001, "loss": 2.241, "step": 82768 }, { "epoch": 7.144927536231884, "grad_norm": 0.976984977722168, "learning_rate": 0.001, "loss": 2.235, "step": 82824 }, { "epoch": 7.14975845410628, "grad_norm": 2.24456787109375, "learning_rate": 0.001, "loss": 2.2109, "step": 82880 }, { "epoch": 7.154589371980676, "grad_norm": 3.479337692260742, "learning_rate": 0.001, "loss": 2.2146, "step": 82936 }, { "epoch": 7.159420289855072, "grad_norm": 1.3374027013778687, "learning_rate": 0.001, "loss": 2.2204, "step": 82992 }, { "epoch": 7.164251207729468, "grad_norm": 1.1138250827789307, "learning_rate": 0.001, "loss": 2.221, "step": 83048 }, { "epoch": 7.169082125603865, "grad_norm": 3.209102153778076, "learning_rate": 0.001, "loss": 2.2176, "step": 83104 }, { "epoch": 7.173913043478261, "grad_norm": 37.93331527709961, "learning_rate": 0.001, "loss": 2.2227, "step": 83160 }, { "epoch": 7.178743961352657, "grad_norm": 5.81146240234375, "learning_rate": 0.001, "loss": 2.2122, "step": 83216 }, { "epoch": 7.183574879227053, "grad_norm": 1.0278918743133545, "learning_rate": 0.001, "loss": 2.2159, "step": 83272 }, { "epoch": 7.188405797101449, "grad_norm": 1.2807291746139526, "learning_rate": 0.001, "loss": 2.2051, "step": 83328 }, { "epoch": 7.193236714975845, "grad_norm": 1.3941764831542969, "learning_rate": 0.001, "loss": 2.195, "step": 83384 }, { "epoch": 7.1980676328502415, "grad_norm": 1.1143519878387451, "learning_rate": 0.001, "loss": 2.1979, "step": 83440 }, { "epoch": 7.202898550724638, "grad_norm": 3.5452349185943604, "learning_rate": 0.001, "loss": 2.2041, "step": 83496 }, { "epoch": 7.207729468599034, "grad_norm": 1.208636999130249, "learning_rate": 0.001, "loss": 2.2085, "step": 83552 }, { "epoch": 7.21256038647343, "grad_norm": 3.902401924133301, "learning_rate": 0.001, "loss": 2.2062, "step": 83608 }, { "epoch": 7.217391304347826, "grad_norm": 1.7481462955474854, "learning_rate": 0.001, "loss": 2.2024, "step": 83664 }, { "epoch": 7.222222222222222, "grad_norm": 4.274281024932861, "learning_rate": 0.001, "loss": 2.208, "step": 83720 }, { "epoch": 7.2270531400966185, "grad_norm": 1.5858558416366577, "learning_rate": 0.001, "loss": 2.2057, "step": 83776 }, { "epoch": 7.231884057971015, "grad_norm": 1.0880944728851318, "learning_rate": 0.001, "loss": 2.2037, "step": 83832 }, { "epoch": 7.236714975845411, "grad_norm": 2.803246259689331, "learning_rate": 0.001, "loss": 2.2075, "step": 83888 }, { "epoch": 7.241545893719807, "grad_norm": 3.473266363143921, "learning_rate": 0.001, "loss": 2.2299, "step": 83944 }, { "epoch": 7.246376811594203, "grad_norm": 0.6685742139816284, "learning_rate": 0.001, "loss": 2.2155, "step": 84000 }, { "epoch": 7.251207729468599, "grad_norm": 1.4059321880340576, "learning_rate": 0.001, "loss": 2.2108, "step": 84056 }, { "epoch": 7.256038647342995, "grad_norm": 1.5146740674972534, "learning_rate": 0.001, "loss": 2.2106, "step": 84112 }, { "epoch": 7.260869565217392, "grad_norm": 0.7235586047172546, "learning_rate": 0.001, "loss": 2.2076, "step": 84168 }, { "epoch": 7.265700483091788, "grad_norm": 2.5590157508850098, "learning_rate": 0.001, "loss": 2.197, "step": 84224 }, { "epoch": 7.270531400966184, "grad_norm": 3.471428155899048, "learning_rate": 0.001, "loss": 2.1998, "step": 84280 }, { "epoch": 7.27536231884058, "grad_norm": 0.5133311152458191, "learning_rate": 0.001, "loss": 2.1858, "step": 84336 }, { "epoch": 7.280193236714976, "grad_norm": 0.6764386296272278, "learning_rate": 0.001, "loss": 2.1977, "step": 84392 }, { "epoch": 7.285024154589372, "grad_norm": 0.5385075807571411, "learning_rate": 0.001, "loss": 2.1901, "step": 84448 }, { "epoch": 7.2898550724637685, "grad_norm": 4.9898481369018555, "learning_rate": 0.001, "loss": 2.1931, "step": 84504 }, { "epoch": 7.294685990338165, "grad_norm": 0.6643825173377991, "learning_rate": 0.001, "loss": 2.1954, "step": 84560 }, { "epoch": 7.29951690821256, "grad_norm": 0.7024177312850952, "learning_rate": 0.001, "loss": 2.2061, "step": 84616 }, { "epoch": 7.304347826086957, "grad_norm": 1.3688068389892578, "learning_rate": 0.001, "loss": 2.2016, "step": 84672 }, { "epoch": 7.309178743961352, "grad_norm": 1.1807702779769897, "learning_rate": 0.001, "loss": 2.1983, "step": 84728 }, { "epoch": 7.314009661835748, "grad_norm": 1.9392491579055786, "learning_rate": 0.001, "loss": 2.2, "step": 84784 }, { "epoch": 7.318840579710145, "grad_norm": 1.2109184265136719, "learning_rate": 0.001, "loss": 2.2115, "step": 84840 }, { "epoch": 7.323671497584541, "grad_norm": 0.7793823480606079, "learning_rate": 0.001, "loss": 2.2187, "step": 84896 }, { "epoch": 7.328502415458937, "grad_norm": 1.2927424907684326, "learning_rate": 0.001, "loss": 2.2221, "step": 84952 }, { "epoch": 7.333333333333333, "grad_norm": 1.0203886032104492, "learning_rate": 0.001, "loss": 2.2025, "step": 85008 }, { "epoch": 7.338164251207729, "grad_norm": 1.8057535886764526, "learning_rate": 0.001, "loss": 2.1936, "step": 85064 }, { "epoch": 7.342995169082125, "grad_norm": 0.8386402726173401, "learning_rate": 0.001, "loss": 2.1963, "step": 85120 }, { "epoch": 7.3478260869565215, "grad_norm": 0.7808031439781189, "learning_rate": 0.001, "loss": 2.1943, "step": 85176 }, { "epoch": 7.352657004830918, "grad_norm": 0.9829596877098083, "learning_rate": 0.001, "loss": 2.1888, "step": 85232 }, { "epoch": 7.357487922705314, "grad_norm": 6.140841007232666, "learning_rate": 0.001, "loss": 2.1846, "step": 85288 }, { "epoch": 7.36231884057971, "grad_norm": 0.734255313873291, "learning_rate": 0.001, "loss": 2.1944, "step": 85344 }, { "epoch": 7.367149758454106, "grad_norm": 1.21676766872406, "learning_rate": 0.001, "loss": 2.1993, "step": 85400 }, { "epoch": 7.371980676328502, "grad_norm": 0.9128022789955139, "learning_rate": 0.001, "loss": 2.2153, "step": 85456 }, { "epoch": 7.3768115942028984, "grad_norm": 0.7662680149078369, "learning_rate": 0.001, "loss": 2.2212, "step": 85512 }, { "epoch": 7.381642512077295, "grad_norm": 0.6822033524513245, "learning_rate": 0.001, "loss": 2.2165, "step": 85568 }, { "epoch": 7.386473429951691, "grad_norm": 1.139710783958435, "learning_rate": 0.001, "loss": 2.2116, "step": 85624 }, { "epoch": 7.391304347826087, "grad_norm": 1.697280764579773, "learning_rate": 0.001, "loss": 2.2045, "step": 85680 }, { "epoch": 7.396135265700483, "grad_norm": 1.1251060962677002, "learning_rate": 0.001, "loss": 2.1933, "step": 85736 }, { "epoch": 7.400966183574879, "grad_norm": 11.62124252319336, "learning_rate": 0.001, "loss": 2.2, "step": 85792 }, { "epoch": 7.405797101449275, "grad_norm": 1.5566468238830566, "learning_rate": 0.001, "loss": 2.2132, "step": 85848 }, { "epoch": 7.4106280193236715, "grad_norm": 0.5916629433631897, "learning_rate": 0.001, "loss": 2.218, "step": 85904 }, { "epoch": 7.415458937198068, "grad_norm": 0.5133522748947144, "learning_rate": 0.001, "loss": 2.2169, "step": 85960 }, { "epoch": 7.420289855072464, "grad_norm": 1.4335851669311523, "learning_rate": 0.001, "loss": 2.2005, "step": 86016 }, { "epoch": 7.42512077294686, "grad_norm": 1.2312803268432617, "learning_rate": 0.001, "loss": 2.1896, "step": 86072 }, { "epoch": 7.429951690821256, "grad_norm": 2.4313249588012695, "learning_rate": 0.001, "loss": 2.2068, "step": 86128 }, { "epoch": 7.434782608695652, "grad_norm": 0.9686657190322876, "learning_rate": 0.001, "loss": 2.2088, "step": 86184 }, { "epoch": 7.4396135265700485, "grad_norm": 1.062145709991455, "learning_rate": 0.001, "loss": 2.2127, "step": 86240 }, { "epoch": 7.444444444444445, "grad_norm": 2.3246278762817383, "learning_rate": 0.001, "loss": 2.2043, "step": 86296 }, { "epoch": 7.449275362318841, "grad_norm": 4.4462666511535645, "learning_rate": 0.001, "loss": 2.2115, "step": 86352 }, { "epoch": 7.454106280193237, "grad_norm": 0.818331778049469, "learning_rate": 0.001, "loss": 2.2074, "step": 86408 }, { "epoch": 7.458937198067633, "grad_norm": 0.9463566541671753, "learning_rate": 0.001, "loss": 2.2058, "step": 86464 }, { "epoch": 7.463768115942029, "grad_norm": 1.2033153772354126, "learning_rate": 0.001, "loss": 2.1978, "step": 86520 }, { "epoch": 7.468599033816425, "grad_norm": 0.770296037197113, "learning_rate": 0.001, "loss": 2.1916, "step": 86576 }, { "epoch": 7.473429951690822, "grad_norm": 0.6733403205871582, "learning_rate": 0.001, "loss": 2.2066, "step": 86632 }, { "epoch": 7.478260869565218, "grad_norm": 1.081403374671936, "learning_rate": 0.001, "loss": 2.2136, "step": 86688 }, { "epoch": 7.483091787439614, "grad_norm": 1.9005900621414185, "learning_rate": 0.001, "loss": 2.2174, "step": 86744 }, { "epoch": 7.48792270531401, "grad_norm": 1.2378462553024292, "learning_rate": 0.001, "loss": 2.245, "step": 86800 }, { "epoch": 7.492753623188406, "grad_norm": 1.2297759056091309, "learning_rate": 0.001, "loss": 2.2454, "step": 86856 }, { "epoch": 7.4975845410628015, "grad_norm": 1.5183945894241333, "learning_rate": 0.001, "loss": 2.2391, "step": 86912 }, { "epoch": 7.5024154589371985, "grad_norm": 0.6511871218681335, "learning_rate": 0.001, "loss": 2.2295, "step": 86968 }, { "epoch": 7.507246376811594, "grad_norm": 2.7433860301971436, "learning_rate": 0.001, "loss": 2.2174, "step": 87024 }, { "epoch": 7.512077294685991, "grad_norm": 1.301574945449829, "learning_rate": 0.001, "loss": 2.2301, "step": 87080 }, { "epoch": 7.516908212560386, "grad_norm": 0.941912829875946, "learning_rate": 0.001, "loss": 2.2168, "step": 87136 }, { "epoch": 7.521739130434782, "grad_norm": 0.9183884263038635, "learning_rate": 0.001, "loss": 2.2215, "step": 87192 }, { "epoch": 7.526570048309178, "grad_norm": 0.6286992430686951, "learning_rate": 0.001, "loss": 2.2281, "step": 87248 }, { "epoch": 7.531400966183575, "grad_norm": 0.9259273409843445, "learning_rate": 0.001, "loss": 2.2257, "step": 87304 }, { "epoch": 7.536231884057971, "grad_norm": 0.8690216541290283, "learning_rate": 0.001, "loss": 2.229, "step": 87360 }, { "epoch": 7.541062801932367, "grad_norm": 0.6534125208854675, "learning_rate": 0.001, "loss": 2.2241, "step": 87416 }, { "epoch": 7.545893719806763, "grad_norm": 1.9568803310394287, "learning_rate": 0.001, "loss": 2.2081, "step": 87472 }, { "epoch": 7.550724637681159, "grad_norm": 0.7396754622459412, "learning_rate": 0.001, "loss": 2.2085, "step": 87528 }, { "epoch": 7.555555555555555, "grad_norm": 1.5574623346328735, "learning_rate": 0.001, "loss": 2.224, "step": 87584 }, { "epoch": 7.5603864734299515, "grad_norm": 7.3153977394104, "learning_rate": 0.001, "loss": 2.2291, "step": 87640 }, { "epoch": 7.565217391304348, "grad_norm": 2.7998430728912354, "learning_rate": 0.001, "loss": 2.2335, "step": 87696 }, { "epoch": 7.570048309178744, "grad_norm": 0.7265670895576477, "learning_rate": 0.001, "loss": 2.2203, "step": 87752 }, { "epoch": 7.57487922705314, "grad_norm": 0.7056453227996826, "learning_rate": 0.001, "loss": 2.2171, "step": 87808 }, { "epoch": 7.579710144927536, "grad_norm": 0.8008729219436646, "learning_rate": 0.001, "loss": 2.2118, "step": 87864 }, { "epoch": 7.584541062801932, "grad_norm": 0.6358042359352112, "learning_rate": 0.001, "loss": 2.2028, "step": 87920 }, { "epoch": 7.5893719806763285, "grad_norm": 0.6235592365264893, "learning_rate": 0.001, "loss": 2.2018, "step": 87976 }, { "epoch": 7.594202898550725, "grad_norm": 0.8529534339904785, "learning_rate": 0.001, "loss": 2.1901, "step": 88032 }, { "epoch": 7.599033816425121, "grad_norm": 1.933587908744812, "learning_rate": 0.001, "loss": 2.1873, "step": 88088 }, { "epoch": 7.603864734299517, "grad_norm": 1.0945197343826294, "learning_rate": 0.001, "loss": 2.1867, "step": 88144 }, { "epoch": 7.608695652173913, "grad_norm": 7.0664544105529785, "learning_rate": 0.001, "loss": 2.1921, "step": 88200 }, { "epoch": 7.613526570048309, "grad_norm": 0.8298237919807434, "learning_rate": 0.001, "loss": 2.1961, "step": 88256 }, { "epoch": 7.618357487922705, "grad_norm": 1.1436445713043213, "learning_rate": 0.001, "loss": 2.1956, "step": 88312 }, { "epoch": 7.6231884057971016, "grad_norm": 2.2656469345092773, "learning_rate": 0.001, "loss": 2.199, "step": 88368 }, { "epoch": 7.628019323671498, "grad_norm": 2.6382322311401367, "learning_rate": 0.001, "loss": 2.2054, "step": 88424 }, { "epoch": 7.632850241545894, "grad_norm": 1.5132769346237183, "learning_rate": 0.001, "loss": 2.1956, "step": 88480 }, { "epoch": 7.63768115942029, "grad_norm": 1.0352468490600586, "learning_rate": 0.001, "loss": 2.2094, "step": 88536 }, { "epoch": 7.642512077294686, "grad_norm": 0.8177199959754944, "learning_rate": 0.001, "loss": 2.2215, "step": 88592 }, { "epoch": 7.647342995169082, "grad_norm": 1.1241800785064697, "learning_rate": 0.001, "loss": 2.2044, "step": 88648 }, { "epoch": 7.6521739130434785, "grad_norm": 1.8689950704574585, "learning_rate": 0.001, "loss": 2.1874, "step": 88704 }, { "epoch": 7.657004830917875, "grad_norm": 5.128040790557861, "learning_rate": 0.001, "loss": 2.2042, "step": 88760 }, { "epoch": 7.661835748792271, "grad_norm": 0.718639075756073, "learning_rate": 0.001, "loss": 2.2074, "step": 88816 }, { "epoch": 7.666666666666667, "grad_norm": 4.167617321014404, "learning_rate": 0.001, "loss": 2.2048, "step": 88872 }, { "epoch": 7.671497584541063, "grad_norm": 0.6368312835693359, "learning_rate": 0.001, "loss": 2.1949, "step": 88928 }, { "epoch": 7.676328502415459, "grad_norm": 1.9549771547317505, "learning_rate": 0.001, "loss": 2.2016, "step": 88984 }, { "epoch": 7.681159420289855, "grad_norm": 0.9314081072807312, "learning_rate": 0.001, "loss": 2.2097, "step": 89040 }, { "epoch": 7.685990338164252, "grad_norm": 0.6683293581008911, "learning_rate": 0.001, "loss": 2.1973, "step": 89096 }, { "epoch": 7.690821256038648, "grad_norm": 6.989969253540039, "learning_rate": 0.001, "loss": 2.2074, "step": 89152 }, { "epoch": 7.695652173913043, "grad_norm": 3.9997243881225586, "learning_rate": 0.001, "loss": 2.1933, "step": 89208 }, { "epoch": 7.70048309178744, "grad_norm": 1.7864831686019897, "learning_rate": 0.001, "loss": 2.2055, "step": 89264 }, { "epoch": 7.705314009661835, "grad_norm": 52.917266845703125, "learning_rate": 0.001, "loss": 2.2008, "step": 89320 }, { "epoch": 7.710144927536232, "grad_norm": 2.218038558959961, "learning_rate": 0.001, "loss": 2.2094, "step": 89376 }, { "epoch": 7.714975845410628, "grad_norm": 1.853356957435608, "learning_rate": 0.001, "loss": 2.2182, "step": 89432 }, { "epoch": 7.719806763285024, "grad_norm": 1.5955795049667358, "learning_rate": 0.001, "loss": 2.2294, "step": 89488 }, { "epoch": 7.72463768115942, "grad_norm": 1.6717778444290161, "learning_rate": 0.001, "loss": 2.2275, "step": 89544 }, { "epoch": 7.729468599033816, "grad_norm": 2.0329580307006836, "learning_rate": 0.001, "loss": 2.2164, "step": 89600 }, { "epoch": 7.734299516908212, "grad_norm": 2.770157814025879, "learning_rate": 0.001, "loss": 2.2225, "step": 89656 }, { "epoch": 7.739130434782608, "grad_norm": 1.4086664915084839, "learning_rate": 0.001, "loss": 2.2087, "step": 89712 }, { "epoch": 7.743961352657005, "grad_norm": 0.9120765924453735, "learning_rate": 0.001, "loss": 2.2146, "step": 89768 }, { "epoch": 7.748792270531401, "grad_norm": 0.8088471293449402, "learning_rate": 0.001, "loss": 2.1993, "step": 89824 }, { "epoch": 7.753623188405797, "grad_norm": 1.652666449546814, "learning_rate": 0.001, "loss": 2.1927, "step": 89880 }, { "epoch": 7.758454106280193, "grad_norm": 1.7669382095336914, "learning_rate": 0.001, "loss": 2.1954, "step": 89936 }, { "epoch": 7.763285024154589, "grad_norm": 0.5624907612800598, "learning_rate": 0.001, "loss": 2.2078, "step": 89992 }, { "epoch": 7.768115942028985, "grad_norm": 4.082359313964844, "learning_rate": 0.001, "loss": 2.2113, "step": 90048 }, { "epoch": 7.7729468599033815, "grad_norm": 3.621448516845703, "learning_rate": 0.001, "loss": 2.2033, "step": 90104 }, { "epoch": 7.777777777777778, "grad_norm": 0.8657955527305603, "learning_rate": 0.001, "loss": 2.1914, "step": 90160 }, { "epoch": 7.782608695652174, "grad_norm": 0.6873970031738281, "learning_rate": 0.001, "loss": 2.1991, "step": 90216 }, { "epoch": 7.78743961352657, "grad_norm": 0.7847729921340942, "learning_rate": 0.001, "loss": 2.1947, "step": 90272 }, { "epoch": 7.792270531400966, "grad_norm": 1.9442344903945923, "learning_rate": 0.001, "loss": 2.1848, "step": 90328 }, { "epoch": 7.797101449275362, "grad_norm": 0.575197160243988, "learning_rate": 0.001, "loss": 2.1963, "step": 90384 }, { "epoch": 7.8019323671497585, "grad_norm": 5.769298076629639, "learning_rate": 0.001, "loss": 2.1888, "step": 90440 }, { "epoch": 7.806763285024155, "grad_norm": 1.173632264137268, "learning_rate": 0.001, "loss": 2.1859, "step": 90496 }, { "epoch": 7.811594202898551, "grad_norm": 1.359930396080017, "learning_rate": 0.001, "loss": 2.192, "step": 90552 }, { "epoch": 7.816425120772947, "grad_norm": 4.218874931335449, "learning_rate": 0.001, "loss": 2.1801, "step": 90608 }, { "epoch": 7.821256038647343, "grad_norm": 2.3309452533721924, "learning_rate": 0.001, "loss": 2.1896, "step": 90664 }, { "epoch": 7.826086956521739, "grad_norm": 0.7840883135795593, "learning_rate": 0.001, "loss": 2.1914, "step": 90720 }, { "epoch": 7.830917874396135, "grad_norm": 0.8858395218849182, "learning_rate": 0.001, "loss": 2.1815, "step": 90776 }, { "epoch": 7.835748792270532, "grad_norm": 0.5710480809211731, "learning_rate": 0.001, "loss": 2.1912, "step": 90832 }, { "epoch": 7.840579710144928, "grad_norm": 2.7840609550476074, "learning_rate": 0.001, "loss": 2.1852, "step": 90888 }, { "epoch": 7.845410628019324, "grad_norm": 1.8541259765625, "learning_rate": 0.001, "loss": 2.1845, "step": 90944 }, { "epoch": 7.85024154589372, "grad_norm": 2.0087831020355225, "learning_rate": 0.001, "loss": 2.1877, "step": 91000 }, { "epoch": 7.855072463768116, "grad_norm": 1.6200796365737915, "learning_rate": 0.001, "loss": 2.1731, "step": 91056 }, { "epoch": 7.859903381642512, "grad_norm": 0.52519690990448, "learning_rate": 0.001, "loss": 2.181, "step": 91112 }, { "epoch": 7.8647342995169085, "grad_norm": 0.44364088773727417, "learning_rate": 0.001, "loss": 2.1881, "step": 91168 }, { "epoch": 7.869565217391305, "grad_norm": 0.8852233290672302, "learning_rate": 0.001, "loss": 2.1879, "step": 91224 }, { "epoch": 7.874396135265701, "grad_norm": 0.858814537525177, "learning_rate": 0.001, "loss": 2.1933, "step": 91280 }, { "epoch": 7.879227053140097, "grad_norm": 0.45903480052948, "learning_rate": 0.001, "loss": 2.1868, "step": 91336 }, { "epoch": 7.884057971014493, "grad_norm": 2.9495484828948975, "learning_rate": 0.001, "loss": 2.1938, "step": 91392 }, { "epoch": 7.888888888888889, "grad_norm": 0.49399885535240173, "learning_rate": 0.001, "loss": 2.1889, "step": 91448 }, { "epoch": 7.8937198067632846, "grad_norm": 6.074120998382568, "learning_rate": 0.001, "loss": 2.1937, "step": 91504 }, { "epoch": 7.898550724637682, "grad_norm": 0.9523034691810608, "learning_rate": 0.001, "loss": 2.198, "step": 91560 }, { "epoch": 7.903381642512077, "grad_norm": 1.7021063566207886, "learning_rate": 0.001, "loss": 2.2166, "step": 91616 }, { "epoch": 7.908212560386474, "grad_norm": 0.6701593399047852, "learning_rate": 0.001, "loss": 2.2101, "step": 91672 }, { "epoch": 7.913043478260869, "grad_norm": 0.4944227933883667, "learning_rate": 0.001, "loss": 2.1882, "step": 91728 }, { "epoch": 7.917874396135265, "grad_norm": 0.6741495132446289, "learning_rate": 0.001, "loss": 2.1885, "step": 91784 }, { "epoch": 7.9227053140096615, "grad_norm": 0.7432145476341248, "learning_rate": 0.001, "loss": 2.194, "step": 91840 }, { "epoch": 7.927536231884058, "grad_norm": 0.7453572750091553, "learning_rate": 0.001, "loss": 2.1912, "step": 91896 }, { "epoch": 7.932367149758454, "grad_norm": 0.5134963989257812, "learning_rate": 0.001, "loss": 2.1898, "step": 91952 }, { "epoch": 7.93719806763285, "grad_norm": 1.3348597288131714, "learning_rate": 0.001, "loss": 2.1981, "step": 92008 }, { "epoch": 7.942028985507246, "grad_norm": 1.9832841157913208, "learning_rate": 0.001, "loss": 2.1847, "step": 92064 }, { "epoch": 7.946859903381642, "grad_norm": 2.156421184539795, "learning_rate": 0.001, "loss": 2.1818, "step": 92120 }, { "epoch": 7.951690821256038, "grad_norm": 1.3970669507980347, "learning_rate": 0.001, "loss": 2.1859, "step": 92176 }, { "epoch": 7.956521739130435, "grad_norm": 1.942958116531372, "learning_rate": 0.001, "loss": 2.1815, "step": 92232 }, { "epoch": 7.961352657004831, "grad_norm": 0.6472920775413513, "learning_rate": 0.001, "loss": 2.1847, "step": 92288 }, { "epoch": 7.966183574879227, "grad_norm": 1.6507965326309204, "learning_rate": 0.001, "loss": 2.1974, "step": 92344 }, { "epoch": 7.971014492753623, "grad_norm": 1.4307045936584473, "learning_rate": 0.001, "loss": 2.2161, "step": 92400 }, { "epoch": 7.975845410628019, "grad_norm": 1.8888859748840332, "learning_rate": 0.001, "loss": 2.2025, "step": 92456 }, { "epoch": 7.980676328502415, "grad_norm": 2.721738576889038, "learning_rate": 0.001, "loss": 2.2216, "step": 92512 }, { "epoch": 7.9855072463768115, "grad_norm": 1.122875690460205, "learning_rate": 0.001, "loss": 2.2279, "step": 92568 }, { "epoch": 7.990338164251208, "grad_norm": 2.393629312515259, "learning_rate": 0.001, "loss": 2.206, "step": 92624 }, { "epoch": 7.995169082125604, "grad_norm": 1.147132158279419, "learning_rate": 0.001, "loss": 2.2115, "step": 92680 }, { "epoch": 8.0, "grad_norm": 1.2341094017028809, "learning_rate": 0.001, "loss": 2.2159, "step": 92736 }, { "epoch": 8.004830917874395, "grad_norm": 0.8923884630203247, "learning_rate": 0.001, "loss": 2.1539, "step": 92792 }, { "epoch": 8.009661835748792, "grad_norm": 4.119471549987793, "learning_rate": 0.001, "loss": 2.1729, "step": 92848 }, { "epoch": 8.014492753623188, "grad_norm": 1.702335000038147, "learning_rate": 0.001, "loss": 2.2047, "step": 92904 }, { "epoch": 8.019323671497585, "grad_norm": 1.6770316362380981, "learning_rate": 0.001, "loss": 2.2026, "step": 92960 }, { "epoch": 8.02415458937198, "grad_norm": 0.8387877941131592, "learning_rate": 0.001, "loss": 2.1985, "step": 93016 }, { "epoch": 8.028985507246377, "grad_norm": 1.9522343873977661, "learning_rate": 0.001, "loss": 2.1867, "step": 93072 }, { "epoch": 8.033816425120772, "grad_norm": 3.7816824913024902, "learning_rate": 0.001, "loss": 2.1839, "step": 93128 }, { "epoch": 8.03864734299517, "grad_norm": 6.2592644691467285, "learning_rate": 0.001, "loss": 2.1701, "step": 93184 }, { "epoch": 8.043478260869565, "grad_norm": 0.7666885852813721, "learning_rate": 0.001, "loss": 2.1828, "step": 93240 }, { "epoch": 8.048309178743962, "grad_norm": 0.8134300112724304, "learning_rate": 0.001, "loss": 2.1728, "step": 93296 }, { "epoch": 8.053140096618357, "grad_norm": 2.354168176651001, "learning_rate": 0.001, "loss": 2.1881, "step": 93352 }, { "epoch": 8.057971014492754, "grad_norm": 1.0118948221206665, "learning_rate": 0.001, "loss": 2.1763, "step": 93408 }, { "epoch": 8.06280193236715, "grad_norm": 0.5297091007232666, "learning_rate": 0.001, "loss": 2.1641, "step": 93464 }, { "epoch": 8.067632850241546, "grad_norm": 2.466261625289917, "learning_rate": 0.001, "loss": 2.1735, "step": 93520 }, { "epoch": 8.072463768115941, "grad_norm": 1.4624782800674438, "learning_rate": 0.001, "loss": 2.1616, "step": 93576 }, { "epoch": 8.077294685990339, "grad_norm": 0.5605184435844421, "learning_rate": 0.001, "loss": 2.1751, "step": 93632 }, { "epoch": 8.082125603864734, "grad_norm": 1.3781135082244873, "learning_rate": 0.001, "loss": 2.1655, "step": 93688 }, { "epoch": 8.08695652173913, "grad_norm": 1.0592467784881592, "learning_rate": 0.001, "loss": 2.1661, "step": 93744 }, { "epoch": 8.091787439613526, "grad_norm": 1.1301743984222412, "learning_rate": 0.001, "loss": 2.1659, "step": 93800 }, { "epoch": 8.096618357487923, "grad_norm": 1.3702608346939087, "learning_rate": 0.001, "loss": 2.1732, "step": 93856 }, { "epoch": 8.101449275362318, "grad_norm": 1.1399483680725098, "learning_rate": 0.001, "loss": 2.17, "step": 93912 }, { "epoch": 8.106280193236715, "grad_norm": 16.944828033447266, "learning_rate": 0.001, "loss": 2.1682, "step": 93968 }, { "epoch": 8.11111111111111, "grad_norm": 1.5871983766555786, "learning_rate": 0.001, "loss": 2.1581, "step": 94024 }, { "epoch": 8.115942028985508, "grad_norm": 1.0786978006362915, "learning_rate": 0.001, "loss": 2.159, "step": 94080 }, { "epoch": 8.120772946859903, "grad_norm": 0.6798693537712097, "learning_rate": 0.001, "loss": 2.1563, "step": 94136 }, { "epoch": 8.1256038647343, "grad_norm": 0.4582396149635315, "learning_rate": 0.001, "loss": 2.1585, "step": 94192 }, { "epoch": 8.130434782608695, "grad_norm": 0.5516502857208252, "learning_rate": 0.001, "loss": 2.173, "step": 94248 }, { "epoch": 8.135265700483092, "grad_norm": 1.9195241928100586, "learning_rate": 0.001, "loss": 2.1535, "step": 94304 }, { "epoch": 8.140096618357488, "grad_norm": 0.7121683955192566, "learning_rate": 0.001, "loss": 2.1457, "step": 94360 }, { "epoch": 8.144927536231885, "grad_norm": 0.49970006942749023, "learning_rate": 0.001, "loss": 2.1489, "step": 94416 }, { "epoch": 8.14975845410628, "grad_norm": 0.5234577059745789, "learning_rate": 0.001, "loss": 2.1571, "step": 94472 }, { "epoch": 8.154589371980677, "grad_norm": 0.5559391379356384, "learning_rate": 0.001, "loss": 2.1392, "step": 94528 }, { "epoch": 8.159420289855072, "grad_norm": 0.3659181296825409, "learning_rate": 0.001, "loss": 2.1491, "step": 94584 }, { "epoch": 8.16425120772947, "grad_norm": 0.6247583627700806, "learning_rate": 0.001, "loss": 2.1371, "step": 94640 }, { "epoch": 8.169082125603865, "grad_norm": 1.5511116981506348, "learning_rate": 0.001, "loss": 2.1407, "step": 94696 }, { "epoch": 8.173913043478262, "grad_norm": 0.6923587322235107, "learning_rate": 0.001, "loss": 2.1301, "step": 94752 }, { "epoch": 8.178743961352657, "grad_norm": 2.3903884887695312, "learning_rate": 0.001, "loss": 2.1388, "step": 94808 }, { "epoch": 8.183574879227054, "grad_norm": 0.7824434041976929, "learning_rate": 0.001, "loss": 2.145, "step": 94864 }, { "epoch": 8.18840579710145, "grad_norm": 0.6399248242378235, "learning_rate": 0.001, "loss": 2.1457, "step": 94920 }, { "epoch": 8.193236714975846, "grad_norm": 1.8636595010757446, "learning_rate": 0.001, "loss": 2.1398, "step": 94976 }, { "epoch": 8.198067632850242, "grad_norm": 0.8062956929206848, "learning_rate": 0.001, "loss": 2.1383, "step": 95032 }, { "epoch": 8.202898550724637, "grad_norm": 1.3062527179718018, "learning_rate": 0.001, "loss": 2.131, "step": 95088 }, { "epoch": 8.207729468599034, "grad_norm": 2.9180166721343994, "learning_rate": 0.001, "loss": 2.1501, "step": 95144 }, { "epoch": 8.21256038647343, "grad_norm": 0.9812737107276917, "learning_rate": 0.001, "loss": 2.1643, "step": 95200 }, { "epoch": 8.217391304347826, "grad_norm": 3.122885227203369, "learning_rate": 0.001, "loss": 2.1654, "step": 95256 }, { "epoch": 8.222222222222221, "grad_norm": 0.9283051490783691, "learning_rate": 0.001, "loss": 2.1704, "step": 95312 }, { "epoch": 8.227053140096618, "grad_norm": 2.0975658893585205, "learning_rate": 0.001, "loss": 2.1705, "step": 95368 }, { "epoch": 8.231884057971014, "grad_norm": 0.7845547199249268, "learning_rate": 0.001, "loss": 2.1616, "step": 95424 }, { "epoch": 8.23671497584541, "grad_norm": 0.9119700789451599, "learning_rate": 0.001, "loss": 2.1593, "step": 95480 }, { "epoch": 8.241545893719806, "grad_norm": 4.552369117736816, "learning_rate": 0.001, "loss": 2.1599, "step": 95536 }, { "epoch": 8.246376811594203, "grad_norm": 0.8500065207481384, "learning_rate": 0.001, "loss": 2.1605, "step": 95592 }, { "epoch": 8.251207729468598, "grad_norm": 0.9760816693305969, "learning_rate": 0.001, "loss": 2.1686, "step": 95648 }, { "epoch": 8.256038647342995, "grad_norm": 1.4121878147125244, "learning_rate": 0.001, "loss": 2.155, "step": 95704 }, { "epoch": 8.26086956521739, "grad_norm": 1.4397386312484741, "learning_rate": 0.001, "loss": 2.1753, "step": 95760 }, { "epoch": 8.265700483091788, "grad_norm": 0.7009803056716919, "learning_rate": 0.001, "loss": 2.1649, "step": 95816 }, { "epoch": 8.270531400966183, "grad_norm": 0.6118043065071106, "learning_rate": 0.001, "loss": 2.1591, "step": 95872 }, { "epoch": 8.27536231884058, "grad_norm": 2.562849760055542, "learning_rate": 0.001, "loss": 2.1592, "step": 95928 }, { "epoch": 8.280193236714975, "grad_norm": 0.6190630793571472, "learning_rate": 0.001, "loss": 2.1639, "step": 95984 }, { "epoch": 8.285024154589372, "grad_norm": 1.7951046228408813, "learning_rate": 0.001, "loss": 2.177, "step": 96040 }, { "epoch": 8.289855072463768, "grad_norm": 0.9006018042564392, "learning_rate": 0.001, "loss": 2.1659, "step": 96096 }, { "epoch": 8.294685990338165, "grad_norm": 4.852769374847412, "learning_rate": 0.001, "loss": 2.1471, "step": 96152 }, { "epoch": 8.29951690821256, "grad_norm": 1.3894976377487183, "learning_rate": 0.001, "loss": 2.1584, "step": 96208 }, { "epoch": 8.304347826086957, "grad_norm": 0.8317987322807312, "learning_rate": 0.001, "loss": 2.1566, "step": 96264 }, { "epoch": 8.309178743961352, "grad_norm": 0.7113953232765198, "learning_rate": 0.001, "loss": 2.1752, "step": 96320 }, { "epoch": 8.31400966183575, "grad_norm": 0.6266844272613525, "learning_rate": 0.001, "loss": 2.1716, "step": 96376 }, { "epoch": 8.318840579710145, "grad_norm": 0.6783342957496643, "learning_rate": 0.001, "loss": 2.1718, "step": 96432 }, { "epoch": 8.323671497584542, "grad_norm": 0.48579269647598267, "learning_rate": 0.001, "loss": 2.1651, "step": 96488 }, { "epoch": 8.328502415458937, "grad_norm": 0.5348486304283142, "learning_rate": 0.001, "loss": 2.1703, "step": 96544 }, { "epoch": 8.333333333333334, "grad_norm": 0.9735738039016724, "learning_rate": 0.001, "loss": 2.1747, "step": 96600 }, { "epoch": 8.33816425120773, "grad_norm": 1.0612797737121582, "learning_rate": 0.001, "loss": 2.1737, "step": 96656 }, { "epoch": 8.342995169082126, "grad_norm": 0.7015147805213928, "learning_rate": 0.001, "loss": 2.1646, "step": 96712 }, { "epoch": 8.347826086956522, "grad_norm": 0.9940712451934814, "learning_rate": 0.001, "loss": 2.1683, "step": 96768 }, { "epoch": 8.352657004830919, "grad_norm": 1.1415281295776367, "learning_rate": 0.001, "loss": 2.1649, "step": 96824 }, { "epoch": 8.357487922705314, "grad_norm": 1.21604585647583, "learning_rate": 0.001, "loss": 2.1727, "step": 96880 }, { "epoch": 8.36231884057971, "grad_norm": 1.1453938484191895, "learning_rate": 0.001, "loss": 2.1631, "step": 96936 }, { "epoch": 8.367149758454106, "grad_norm": 0.8788756728172302, "learning_rate": 0.001, "loss": 2.1518, "step": 96992 }, { "epoch": 8.371980676328503, "grad_norm": 4.4988694190979, "learning_rate": 0.001, "loss": 2.1537, "step": 97048 }, { "epoch": 8.376811594202898, "grad_norm": 0.41735342144966125, "learning_rate": 0.001, "loss": 2.1734, "step": 97104 }, { "epoch": 8.381642512077295, "grad_norm": 0.9588692784309387, "learning_rate": 0.001, "loss": 2.1615, "step": 97160 }, { "epoch": 8.38647342995169, "grad_norm": 0.5758078098297119, "learning_rate": 0.001, "loss": 2.1513, "step": 97216 }, { "epoch": 8.391304347826088, "grad_norm": 2.615999221801758, "learning_rate": 0.001, "loss": 2.1599, "step": 97272 }, { "epoch": 8.396135265700483, "grad_norm": 1.309092402458191, "learning_rate": 0.001, "loss": 2.1498, "step": 97328 }, { "epoch": 8.40096618357488, "grad_norm": 2.516789436340332, "learning_rate": 0.001, "loss": 2.1479, "step": 97384 }, { "epoch": 8.405797101449275, "grad_norm": 1.4301115274429321, "learning_rate": 0.001, "loss": 2.1533, "step": 97440 }, { "epoch": 8.41062801932367, "grad_norm": 1.1253650188446045, "learning_rate": 0.001, "loss": 2.1529, "step": 97496 }, { "epoch": 8.415458937198068, "grad_norm": 0.5327553749084473, "learning_rate": 0.001, "loss": 2.1589, "step": 97552 }, { "epoch": 8.420289855072463, "grad_norm": 1.2098220586776733, "learning_rate": 0.001, "loss": 2.1482, "step": 97608 }, { "epoch": 8.42512077294686, "grad_norm": 0.6792951822280884, "learning_rate": 0.001, "loss": 2.1455, "step": 97664 }, { "epoch": 8.429951690821255, "grad_norm": 0.7897965312004089, "learning_rate": 0.001, "loss": 2.1528, "step": 97720 }, { "epoch": 8.434782608695652, "grad_norm": 1.8171448707580566, "learning_rate": 0.001, "loss": 2.1665, "step": 97776 }, { "epoch": 8.439613526570048, "grad_norm": 0.6654888391494751, "learning_rate": 0.001, "loss": 2.1696, "step": 97832 }, { "epoch": 8.444444444444445, "grad_norm": 1.768674373626709, "learning_rate": 0.001, "loss": 2.1728, "step": 97888 }, { "epoch": 8.44927536231884, "grad_norm": 0.6529266238212585, "learning_rate": 0.001, "loss": 2.1757, "step": 97944 }, { "epoch": 8.454106280193237, "grad_norm": 1.8409507274627686, "learning_rate": 0.001, "loss": 2.169, "step": 98000 }, { "epoch": 8.458937198067632, "grad_norm": 2.000258684158325, "learning_rate": 0.001, "loss": 2.1679, "step": 98056 }, { "epoch": 8.46376811594203, "grad_norm": 0.7438505291938782, "learning_rate": 0.001, "loss": 2.1631, "step": 98112 }, { "epoch": 8.468599033816425, "grad_norm": 1.2274531126022339, "learning_rate": 0.001, "loss": 2.1674, "step": 98168 }, { "epoch": 8.473429951690822, "grad_norm": 1.0805602073669434, "learning_rate": 0.001, "loss": 2.1793, "step": 98224 }, { "epoch": 8.478260869565217, "grad_norm": 1.033689022064209, "learning_rate": 0.001, "loss": 2.1678, "step": 98280 }, { "epoch": 8.483091787439614, "grad_norm": 1.1021382808685303, "learning_rate": 0.001, "loss": 2.1658, "step": 98336 }, { "epoch": 8.48792270531401, "grad_norm": 1.0279886722564697, "learning_rate": 0.001, "loss": 2.1785, "step": 98392 }, { "epoch": 8.492753623188406, "grad_norm": 1.193163514137268, "learning_rate": 0.001, "loss": 2.1683, "step": 98448 }, { "epoch": 8.497584541062801, "grad_norm": 0.682515561580658, "learning_rate": 0.001, "loss": 2.1691, "step": 98504 }, { "epoch": 8.502415458937199, "grad_norm": 1.073784351348877, "learning_rate": 0.001, "loss": 2.1682, "step": 98560 }, { "epoch": 8.507246376811594, "grad_norm": 1.4975244998931885, "learning_rate": 0.001, "loss": 2.1791, "step": 98616 }, { "epoch": 8.51207729468599, "grad_norm": 1.937719464302063, "learning_rate": 0.001, "loss": 2.1682, "step": 98672 }, { "epoch": 8.516908212560386, "grad_norm": 1.4181437492370605, "learning_rate": 0.001, "loss": 2.1641, "step": 98728 }, { "epoch": 8.521739130434783, "grad_norm": 0.86642986536026, "learning_rate": 0.001, "loss": 2.1648, "step": 98784 }, { "epoch": 8.526570048309178, "grad_norm": 1.679457426071167, "learning_rate": 0.001, "loss": 2.165, "step": 98840 }, { "epoch": 8.531400966183575, "grad_norm": 2.2595467567443848, "learning_rate": 0.001, "loss": 2.1585, "step": 98896 }, { "epoch": 8.53623188405797, "grad_norm": 1.4689456224441528, "learning_rate": 0.001, "loss": 2.1649, "step": 98952 }, { "epoch": 8.541062801932368, "grad_norm": 0.45793503522872925, "learning_rate": 0.001, "loss": 2.1831, "step": 99008 }, { "epoch": 8.545893719806763, "grad_norm": 1.0827895402908325, "learning_rate": 0.001, "loss": 2.1811, "step": 99064 }, { "epoch": 8.55072463768116, "grad_norm": 0.6022539734840393, "learning_rate": 0.001, "loss": 2.171, "step": 99120 }, { "epoch": 8.555555555555555, "grad_norm": 5.037740230560303, "learning_rate": 0.001, "loss": 2.1781, "step": 99176 }, { "epoch": 8.560386473429952, "grad_norm": 0.8766276240348816, "learning_rate": 0.001, "loss": 2.1487, "step": 99232 }, { "epoch": 8.565217391304348, "grad_norm": 3.8220744132995605, "learning_rate": 0.001, "loss": 2.1715, "step": 99288 }, { "epoch": 8.570048309178745, "grad_norm": 0.882576584815979, "learning_rate": 0.001, "loss": 2.1682, "step": 99344 }, { "epoch": 8.57487922705314, "grad_norm": 2.8687374591827393, "learning_rate": 0.001, "loss": 2.1851, "step": 99400 }, { "epoch": 8.579710144927537, "grad_norm": 0.8978865146636963, "learning_rate": 0.001, "loss": 2.1775, "step": 99456 }, { "epoch": 8.584541062801932, "grad_norm": 2.5586705207824707, "learning_rate": 0.001, "loss": 2.1802, "step": 99512 }, { "epoch": 8.58937198067633, "grad_norm": 1.1842213869094849, "learning_rate": 0.001, "loss": 2.1616, "step": 99568 }, { "epoch": 8.594202898550725, "grad_norm": 2.43939208984375, "learning_rate": 0.001, "loss": 2.1754, "step": 99624 }, { "epoch": 8.59903381642512, "grad_norm": 2.8253753185272217, "learning_rate": 0.001, "loss": 2.1737, "step": 99680 }, { "epoch": 8.603864734299517, "grad_norm": 3.521724224090576, "learning_rate": 0.001, "loss": 2.2008, "step": 99736 }, { "epoch": 8.608695652173914, "grad_norm": 5.175851345062256, "learning_rate": 0.001, "loss": 2.2124, "step": 99792 }, { "epoch": 8.61352657004831, "grad_norm": 0.8156281113624573, "learning_rate": 0.001, "loss": 2.1964, "step": 99848 }, { "epoch": 8.618357487922705, "grad_norm": 1.0975911617279053, "learning_rate": 0.001, "loss": 2.1744, "step": 99904 }, { "epoch": 8.623188405797102, "grad_norm": 0.8442077040672302, "learning_rate": 0.001, "loss": 2.1622, "step": 99960 }, { "epoch": 8.628019323671497, "grad_norm": 1.4752484560012817, "learning_rate": 0.001, "loss": 2.1721, "step": 100016 }, { "epoch": 8.632850241545894, "grad_norm": 3.280287742614746, "learning_rate": 0.001, "loss": 2.1676, "step": 100072 }, { "epoch": 8.63768115942029, "grad_norm": 1.961851954460144, "learning_rate": 0.001, "loss": 2.1675, "step": 100128 }, { "epoch": 8.642512077294686, "grad_norm": 1.1180341243743896, "learning_rate": 0.001, "loss": 2.1785, "step": 100184 }, { "epoch": 8.647342995169081, "grad_norm": 2.5496907234191895, "learning_rate": 0.001, "loss": 2.172, "step": 100240 }, { "epoch": 8.652173913043478, "grad_norm": 2.1761929988861084, "learning_rate": 0.001, "loss": 2.1734, "step": 100296 }, { "epoch": 8.657004830917874, "grad_norm": 10.224677085876465, "learning_rate": 0.001, "loss": 2.1923, "step": 100352 }, { "epoch": 8.66183574879227, "grad_norm": 1.2687700986862183, "learning_rate": 0.001, "loss": 2.204, "step": 100408 }, { "epoch": 8.666666666666666, "grad_norm": 1.0741652250289917, "learning_rate": 0.001, "loss": 2.1874, "step": 100464 }, { "epoch": 8.671497584541063, "grad_norm": 3.31343674659729, "learning_rate": 0.001, "loss": 2.197, "step": 100520 }, { "epoch": 8.676328502415458, "grad_norm": 1.0871697664260864, "learning_rate": 0.001, "loss": 2.2159, "step": 100576 }, { "epoch": 8.681159420289855, "grad_norm": 0.6209431290626526, "learning_rate": 0.001, "loss": 2.2235, "step": 100632 }, { "epoch": 8.68599033816425, "grad_norm": 1.3920118808746338, "learning_rate": 0.001, "loss": 2.2001, "step": 100688 }, { "epoch": 8.690821256038648, "grad_norm": 2.679579496383667, "learning_rate": 0.001, "loss": 2.1922, "step": 100744 }, { "epoch": 8.695652173913043, "grad_norm": 0.6335155367851257, "learning_rate": 0.001, "loss": 2.2013, "step": 100800 }, { "epoch": 8.70048309178744, "grad_norm": 2.219010829925537, "learning_rate": 0.001, "loss": 2.1966, "step": 100856 }, { "epoch": 8.705314009661835, "grad_norm": 1.9883054494857788, "learning_rate": 0.001, "loss": 2.1833, "step": 100912 }, { "epoch": 8.710144927536232, "grad_norm": 0.6085325479507446, "learning_rate": 0.001, "loss": 2.1857, "step": 100968 }, { "epoch": 8.714975845410628, "grad_norm": 0.6379920840263367, "learning_rate": 0.001, "loss": 2.18, "step": 101024 }, { "epoch": 8.719806763285025, "grad_norm": 1.0121045112609863, "learning_rate": 0.001, "loss": 2.1757, "step": 101080 }, { "epoch": 8.72463768115942, "grad_norm": 1.31614351272583, "learning_rate": 0.001, "loss": 2.1875, "step": 101136 }, { "epoch": 8.729468599033817, "grad_norm": 4.128320217132568, "learning_rate": 0.001, "loss": 2.1793, "step": 101192 }, { "epoch": 8.734299516908212, "grad_norm": 3.072277784347534, "learning_rate": 0.001, "loss": 2.1834, "step": 101248 }, { "epoch": 8.73913043478261, "grad_norm": 0.8143222332000732, "learning_rate": 0.001, "loss": 2.1787, "step": 101304 }, { "epoch": 8.743961352657005, "grad_norm": 1.0020711421966553, "learning_rate": 0.001, "loss": 2.1944, "step": 101360 }, { "epoch": 8.748792270531402, "grad_norm": 1.4619877338409424, "learning_rate": 0.001, "loss": 2.1801, "step": 101416 }, { "epoch": 8.753623188405797, "grad_norm": 1.8221583366394043, "learning_rate": 0.001, "loss": 2.1969, "step": 101472 }, { "epoch": 8.758454106280194, "grad_norm": 1.1051881313323975, "learning_rate": 0.001, "loss": 2.2227, "step": 101528 }, { "epoch": 8.76328502415459, "grad_norm": 0.569442093372345, "learning_rate": 0.001, "loss": 2.2234, "step": 101584 }, { "epoch": 8.768115942028986, "grad_norm": 1.9901152849197388, "learning_rate": 0.001, "loss": 2.2059, "step": 101640 }, { "epoch": 8.772946859903382, "grad_norm": 1.6924843788146973, "learning_rate": 0.001, "loss": 2.1941, "step": 101696 }, { "epoch": 8.777777777777779, "grad_norm": 5.261808395385742, "learning_rate": 0.001, "loss": 2.1987, "step": 101752 }, { "epoch": 8.782608695652174, "grad_norm": 2.2304725646972656, "learning_rate": 0.001, "loss": 2.1942, "step": 101808 }, { "epoch": 8.78743961352657, "grad_norm": 0.7107840180397034, "learning_rate": 0.001, "loss": 2.1935, "step": 101864 }, { "epoch": 8.792270531400966, "grad_norm": 2.6893982887268066, "learning_rate": 0.001, "loss": 2.1859, "step": 101920 }, { "epoch": 8.797101449275363, "grad_norm": 5.3514580726623535, "learning_rate": 0.001, "loss": 2.2025, "step": 101976 }, { "epoch": 8.801932367149758, "grad_norm": 1.1213563680648804, "learning_rate": 0.001, "loss": 2.2021, "step": 102032 }, { "epoch": 8.806763285024154, "grad_norm": 5.150223255157471, "learning_rate": 0.001, "loss": 2.2162, "step": 102088 }, { "epoch": 8.81159420289855, "grad_norm": 1.5704729557037354, "learning_rate": 0.001, "loss": 2.2082, "step": 102144 }, { "epoch": 8.816425120772946, "grad_norm": 1.5262094736099243, "learning_rate": 0.001, "loss": 2.1913, "step": 102200 }, { "epoch": 8.821256038647343, "grad_norm": 1.3060446977615356, "learning_rate": 0.001, "loss": 2.1829, "step": 102256 }, { "epoch": 8.826086956521738, "grad_norm": 1.790102481842041, "learning_rate": 0.001, "loss": 2.1871, "step": 102312 }, { "epoch": 8.830917874396135, "grad_norm": 1.2355036735534668, "learning_rate": 0.001, "loss": 2.1935, "step": 102368 }, { "epoch": 8.83574879227053, "grad_norm": 0.8439340591430664, "learning_rate": 0.001, "loss": 2.1835, "step": 102424 }, { "epoch": 8.840579710144928, "grad_norm": 1.7418797016143799, "learning_rate": 0.001, "loss": 2.1836, "step": 102480 }, { "epoch": 8.845410628019323, "grad_norm": 1.4281748533248901, "learning_rate": 0.001, "loss": 2.1908, "step": 102536 }, { "epoch": 8.85024154589372, "grad_norm": 1.382093906402588, "learning_rate": 0.001, "loss": 2.1806, "step": 102592 }, { "epoch": 8.855072463768115, "grad_norm": 1.306244969367981, "learning_rate": 0.001, "loss": 2.1767, "step": 102648 }, { "epoch": 8.859903381642512, "grad_norm": 1.1055924892425537, "learning_rate": 0.001, "loss": 2.1899, "step": 102704 }, { "epoch": 8.864734299516908, "grad_norm": 2.741365671157837, "learning_rate": 0.001, "loss": 2.1841, "step": 102760 }, { "epoch": 8.869565217391305, "grad_norm": 1.1803412437438965, "learning_rate": 0.001, "loss": 2.1877, "step": 102816 }, { "epoch": 8.8743961352657, "grad_norm": 0.6093894243240356, "learning_rate": 0.001, "loss": 2.1893, "step": 102872 }, { "epoch": 8.879227053140097, "grad_norm": 3.0857136249542236, "learning_rate": 0.001, "loss": 2.1818, "step": 102928 }, { "epoch": 8.884057971014492, "grad_norm": 14.2505521774292, "learning_rate": 0.001, "loss": 2.1915, "step": 102984 }, { "epoch": 8.88888888888889, "grad_norm": 2.1241681575775146, "learning_rate": 0.001, "loss": 2.22, "step": 103040 }, { "epoch": 8.893719806763285, "grad_norm": 20.421709060668945, "learning_rate": 0.001, "loss": 2.2492, "step": 103096 }, { "epoch": 8.898550724637682, "grad_norm": 5.274570941925049, "learning_rate": 0.001, "loss": 2.235, "step": 103152 }, { "epoch": 8.903381642512077, "grad_norm": 13.373640060424805, "learning_rate": 0.001, "loss": 2.2229, "step": 103208 }, { "epoch": 8.908212560386474, "grad_norm": 1.3923168182373047, "learning_rate": 0.001, "loss": 2.2151, "step": 103264 }, { "epoch": 8.91304347826087, "grad_norm": 1.205040454864502, "learning_rate": 0.001, "loss": 2.2069, "step": 103320 }, { "epoch": 8.917874396135266, "grad_norm": 1.282835602760315, "learning_rate": 0.001, "loss": 2.2045, "step": 103376 }, { "epoch": 8.922705314009661, "grad_norm": 3.139082908630371, "learning_rate": 0.001, "loss": 2.2119, "step": 103432 }, { "epoch": 8.927536231884059, "grad_norm": 0.7206853628158569, "learning_rate": 0.001, "loss": 2.1976, "step": 103488 }, { "epoch": 8.932367149758454, "grad_norm": 0.8087875843048096, "learning_rate": 0.001, "loss": 2.2041, "step": 103544 }, { "epoch": 8.93719806763285, "grad_norm": 0.742152214050293, "learning_rate": 0.001, "loss": 2.1842, "step": 103600 }, { "epoch": 8.942028985507246, "grad_norm": 0.900603711605072, "learning_rate": 0.001, "loss": 2.2008, "step": 103656 }, { "epoch": 8.946859903381643, "grad_norm": 1.6056673526763916, "learning_rate": 0.001, "loss": 2.2029, "step": 103712 }, { "epoch": 8.951690821256038, "grad_norm": 0.666178822517395, "learning_rate": 0.001, "loss": 2.204, "step": 103768 }, { "epoch": 8.956521739130435, "grad_norm": 1.124280333518982, "learning_rate": 0.001, "loss": 2.1965, "step": 103824 }, { "epoch": 8.96135265700483, "grad_norm": 1.827895164489746, "learning_rate": 0.001, "loss": 2.1878, "step": 103880 }, { "epoch": 8.966183574879228, "grad_norm": 1.0022568702697754, "learning_rate": 0.001, "loss": 2.1889, "step": 103936 }, { "epoch": 8.971014492753623, "grad_norm": 1.6951721906661987, "learning_rate": 0.001, "loss": 2.1928, "step": 103992 }, { "epoch": 8.97584541062802, "grad_norm": 1.7363156080245972, "learning_rate": 0.001, "loss": 2.1912, "step": 104048 }, { "epoch": 8.980676328502415, "grad_norm": 2.5289146900177, "learning_rate": 0.001, "loss": 2.1939, "step": 104104 }, { "epoch": 8.985507246376812, "grad_norm": 1.0142520666122437, "learning_rate": 0.001, "loss": 2.1959, "step": 104160 }, { "epoch": 8.990338164251208, "grad_norm": 0.6294025182723999, "learning_rate": 0.001, "loss": 2.1821, "step": 104216 }, { "epoch": 8.995169082125603, "grad_norm": 0.784690797328949, "learning_rate": 0.001, "loss": 2.185, "step": 104272 }, { "epoch": 9.0, "grad_norm": 2.744249105453491, "learning_rate": 0.001, "loss": 2.1917, "step": 104328 }, { "epoch": 9.004830917874395, "grad_norm": 1.0873427391052246, "learning_rate": 0.001, "loss": 2.1707, "step": 104384 }, { "epoch": 9.009661835748792, "grad_norm": 1.0031739473342896, "learning_rate": 0.001, "loss": 2.1732, "step": 104440 }, { "epoch": 9.014492753623188, "grad_norm": 1.0339198112487793, "learning_rate": 0.001, "loss": 2.1602, "step": 104496 }, { "epoch": 9.019323671497585, "grad_norm": 1.4131979942321777, "learning_rate": 0.001, "loss": 2.1643, "step": 104552 }, { "epoch": 9.02415458937198, "grad_norm": 2.60891056060791, "learning_rate": 0.001, "loss": 2.1774, "step": 104608 }, { "epoch": 9.028985507246377, "grad_norm": 3.1860134601593018, "learning_rate": 0.001, "loss": 2.1834, "step": 104664 }, { "epoch": 9.033816425120772, "grad_norm": 1.3319181203842163, "learning_rate": 0.001, "loss": 2.2006, "step": 104720 }, { "epoch": 9.03864734299517, "grad_norm": 1.3886021375656128, "learning_rate": 0.001, "loss": 2.2042, "step": 104776 }, { "epoch": 9.043478260869565, "grad_norm": 1.2281749248504639, "learning_rate": 0.001, "loss": 2.2066, "step": 104832 }, { "epoch": 9.048309178743962, "grad_norm": 1.7456295490264893, "learning_rate": 0.001, "loss": 2.1788, "step": 104888 }, { "epoch": 9.053140096618357, "grad_norm": 1.5728164911270142, "learning_rate": 0.001, "loss": 2.154, "step": 104944 }, { "epoch": 9.057971014492754, "grad_norm": 0.6722880005836487, "learning_rate": 0.001, "loss": 2.1564, "step": 105000 }, { "epoch": 9.06280193236715, "grad_norm": 1.349203109741211, "learning_rate": 0.001, "loss": 2.1661, "step": 105056 }, { "epoch": 9.067632850241546, "grad_norm": 1.1271167993545532, "learning_rate": 0.001, "loss": 2.1715, "step": 105112 }, { "epoch": 9.072463768115941, "grad_norm": 0.9470292329788208, "learning_rate": 0.001, "loss": 2.1697, "step": 105168 }, { "epoch": 9.077294685990339, "grad_norm": 2.252506971359253, "learning_rate": 0.001, "loss": 2.1741, "step": 105224 }, { "epoch": 9.082125603864734, "grad_norm": 0.9157953858375549, "learning_rate": 0.001, "loss": 2.1664, "step": 105280 }, { "epoch": 9.08695652173913, "grad_norm": 3.8057448863983154, "learning_rate": 0.001, "loss": 2.1784, "step": 105336 }, { "epoch": 9.091787439613526, "grad_norm": 1.052957534790039, "learning_rate": 0.001, "loss": 2.1677, "step": 105392 }, { "epoch": 9.096618357487923, "grad_norm": 1.5191885232925415, "learning_rate": 0.001, "loss": 2.1721, "step": 105448 }, { "epoch": 9.101449275362318, "grad_norm": 2.828864336013794, "learning_rate": 0.001, "loss": 2.1662, "step": 105504 }, { "epoch": 9.106280193236715, "grad_norm": 3.506580114364624, "learning_rate": 0.001, "loss": 2.1694, "step": 105560 }, { "epoch": 9.11111111111111, "grad_norm": 1.8464301824569702, "learning_rate": 0.001, "loss": 2.1664, "step": 105616 }, { "epoch": 9.115942028985508, "grad_norm": 2.55763578414917, "learning_rate": 0.001, "loss": 2.1833, "step": 105672 }, { "epoch": 9.120772946859903, "grad_norm": 1.7007839679718018, "learning_rate": 0.001, "loss": 2.1982, "step": 105728 }, { "epoch": 9.1256038647343, "grad_norm": 1.8573815822601318, "learning_rate": 0.001, "loss": 2.1733, "step": 105784 }, { "epoch": 9.130434782608695, "grad_norm": 0.9144348502159119, "learning_rate": 0.001, "loss": 2.1735, "step": 105840 }, { "epoch": 9.135265700483092, "grad_norm": 0.9272240400314331, "learning_rate": 0.001, "loss": 2.1523, "step": 105896 }, { "epoch": 9.140096618357488, "grad_norm": 1.7727491855621338, "learning_rate": 0.001, "loss": 2.1569, "step": 105952 }, { "epoch": 9.144927536231885, "grad_norm": 1.354276418685913, "learning_rate": 0.001, "loss": 2.1621, "step": 106008 }, { "epoch": 9.14975845410628, "grad_norm": 1.5559078454971313, "learning_rate": 0.001, "loss": 2.1794, "step": 106064 }, { "epoch": 9.154589371980677, "grad_norm": 2.2065834999084473, "learning_rate": 0.001, "loss": 2.1581, "step": 106120 }, { "epoch": 9.159420289855072, "grad_norm": 1.2813655138015747, "learning_rate": 0.001, "loss": 2.1528, "step": 106176 }, { "epoch": 9.16425120772947, "grad_norm": 4.577296733856201, "learning_rate": 0.001, "loss": 2.1515, "step": 106232 }, { "epoch": 9.169082125603865, "grad_norm": 2.1098666191101074, "learning_rate": 0.001, "loss": 2.1502, "step": 106288 }, { "epoch": 9.173913043478262, "grad_norm": 1.1017571687698364, "learning_rate": 0.001, "loss": 2.1512, "step": 106344 }, { "epoch": 9.178743961352657, "grad_norm": 2.2283856868743896, "learning_rate": 0.001, "loss": 2.147, "step": 106400 }, { "epoch": 9.183574879227054, "grad_norm": 3.9506516456604004, "learning_rate": 0.001, "loss": 2.1782, "step": 106456 }, { "epoch": 9.18840579710145, "grad_norm": 1.1358847618103027, "learning_rate": 0.001, "loss": 2.1711, "step": 106512 }, { "epoch": 9.193236714975846, "grad_norm": 0.7863360047340393, "learning_rate": 0.001, "loss": 2.1594, "step": 106568 }, { "epoch": 9.198067632850242, "grad_norm": 1.1799285411834717, "learning_rate": 0.001, "loss": 2.153, "step": 106624 }, { "epoch": 9.202898550724637, "grad_norm": 12.447437286376953, "learning_rate": 0.001, "loss": 2.1776, "step": 106680 }, { "epoch": 9.207729468599034, "grad_norm": 1.1490237712860107, "learning_rate": 0.001, "loss": 2.1833, "step": 106736 }, { "epoch": 9.21256038647343, "grad_norm": 0.7120616436004639, "learning_rate": 0.001, "loss": 2.1889, "step": 106792 }, { "epoch": 9.217391304347826, "grad_norm": 1.934424638748169, "learning_rate": 0.001, "loss": 2.2002, "step": 106848 }, { "epoch": 9.222222222222221, "grad_norm": 1.3106374740600586, "learning_rate": 0.001, "loss": 2.1844, "step": 106904 }, { "epoch": 9.227053140096618, "grad_norm": 1.4730052947998047, "learning_rate": 0.001, "loss": 2.1683, "step": 106960 }, { "epoch": 9.231884057971014, "grad_norm": 1.317531943321228, "learning_rate": 0.001, "loss": 2.166, "step": 107016 }, { "epoch": 9.23671497584541, "grad_norm": 0.8591775894165039, "learning_rate": 0.001, "loss": 2.1578, "step": 107072 }, { "epoch": 9.241545893719806, "grad_norm": 1.2508798837661743, "learning_rate": 0.001, "loss": 2.1471, "step": 107128 }, { "epoch": 9.246376811594203, "grad_norm": 2.97257924079895, "learning_rate": 0.001, "loss": 2.1496, "step": 107184 }, { "epoch": 9.251207729468598, "grad_norm": 1.3477306365966797, "learning_rate": 0.001, "loss": 2.1568, "step": 107240 }, { "epoch": 9.256038647342995, "grad_norm": 2.1898815631866455, "learning_rate": 0.001, "loss": 2.1492, "step": 107296 }, { "epoch": 9.26086956521739, "grad_norm": 4.246581554412842, "learning_rate": 0.001, "loss": 2.1609, "step": 107352 }, { "epoch": 9.265700483091788, "grad_norm": 0.9123812913894653, "learning_rate": 0.001, "loss": 2.1547, "step": 107408 }, { "epoch": 9.270531400966183, "grad_norm": 1.096420407295227, "learning_rate": 0.001, "loss": 2.1662, "step": 107464 }, { "epoch": 9.27536231884058, "grad_norm": 1.0242642164230347, "learning_rate": 0.001, "loss": 2.1561, "step": 107520 }, { "epoch": 9.280193236714975, "grad_norm": 1.5103753805160522, "learning_rate": 0.001, "loss": 2.149, "step": 107576 }, { "epoch": 9.285024154589372, "grad_norm": 1.3984957933425903, "learning_rate": 0.001, "loss": 2.1554, "step": 107632 }, { "epoch": 9.289855072463768, "grad_norm": 1.3554457426071167, "learning_rate": 0.001, "loss": 2.1482, "step": 107688 }, { "epoch": 9.294685990338165, "grad_norm": 0.9875816702842712, "learning_rate": 0.001, "loss": 2.1525, "step": 107744 }, { "epoch": 9.29951690821256, "grad_norm": 3.760477304458618, "learning_rate": 0.001, "loss": 2.1418, "step": 107800 }, { "epoch": 9.304347826086957, "grad_norm": 1.089552640914917, "learning_rate": 0.001, "loss": 2.1552, "step": 107856 }, { "epoch": 9.309178743961352, "grad_norm": 2.134943723678589, "learning_rate": 0.001, "loss": 2.1519, "step": 107912 }, { "epoch": 9.31400966183575, "grad_norm": 1.717183232307434, "learning_rate": 0.001, "loss": 2.1462, "step": 107968 }, { "epoch": 9.318840579710145, "grad_norm": 0.8495106101036072, "learning_rate": 0.001, "loss": 2.1372, "step": 108024 }, { "epoch": 9.323671497584542, "grad_norm": 2.423074245452881, "learning_rate": 0.001, "loss": 2.1359, "step": 108080 }, { "epoch": 9.328502415458937, "grad_norm": 5.244233131408691, "learning_rate": 0.001, "loss": 2.1487, "step": 108136 }, { "epoch": 9.333333333333334, "grad_norm": 1.1853563785552979, "learning_rate": 0.001, "loss": 2.1417, "step": 108192 }, { "epoch": 9.33816425120773, "grad_norm": 1.2336734533309937, "learning_rate": 0.001, "loss": 2.1455, "step": 108248 }, { "epoch": 9.342995169082126, "grad_norm": 1.2554857730865479, "learning_rate": 0.001, "loss": 2.1532, "step": 108304 }, { "epoch": 9.347826086956522, "grad_norm": 0.6140924096107483, "learning_rate": 0.001, "loss": 2.1555, "step": 108360 }, { "epoch": 9.352657004830919, "grad_norm": 1.9439496994018555, "learning_rate": 0.001, "loss": 2.176, "step": 108416 }, { "epoch": 9.357487922705314, "grad_norm": 6.822696685791016, "learning_rate": 0.001, "loss": 2.164, "step": 108472 }, { "epoch": 9.36231884057971, "grad_norm": 13.39431095123291, "learning_rate": 0.001, "loss": 2.1556, "step": 108528 }, { "epoch": 9.367149758454106, "grad_norm": 2.1246016025543213, "learning_rate": 0.001, "loss": 2.15, "step": 108584 }, { "epoch": 9.371980676328503, "grad_norm": 5.193103313446045, "learning_rate": 0.001, "loss": 2.1737, "step": 108640 }, { "epoch": 9.376811594202898, "grad_norm": 1.4407577514648438, "learning_rate": 0.001, "loss": 2.1622, "step": 108696 }, { "epoch": 9.381642512077295, "grad_norm": 1.016109585762024, "learning_rate": 0.001, "loss": 2.177, "step": 108752 }, { "epoch": 9.38647342995169, "grad_norm": 4.421100616455078, "learning_rate": 0.001, "loss": 2.1711, "step": 108808 }, { "epoch": 9.391304347826088, "grad_norm": 1.0442662239074707, "learning_rate": 0.001, "loss": 2.1762, "step": 108864 }, { "epoch": 9.396135265700483, "grad_norm": 1.4544677734375, "learning_rate": 0.001, "loss": 2.1649, "step": 108920 }, { "epoch": 9.40096618357488, "grad_norm": 1.0817885398864746, "learning_rate": 0.001, "loss": 2.1552, "step": 108976 }, { "epoch": 9.405797101449275, "grad_norm": 0.9749849438667297, "learning_rate": 0.001, "loss": 2.1529, "step": 109032 }, { "epoch": 9.41062801932367, "grad_norm": 60.81449890136719, "learning_rate": 0.001, "loss": 2.1571, "step": 109088 }, { "epoch": 9.415458937198068, "grad_norm": 1.931065320968628, "learning_rate": 0.001, "loss": 2.1533, "step": 109144 }, { "epoch": 9.420289855072463, "grad_norm": 0.7593839764595032, "learning_rate": 0.001, "loss": 2.1659, "step": 109200 }, { "epoch": 9.42512077294686, "grad_norm": 1.9358320236206055, "learning_rate": 0.001, "loss": 2.1591, "step": 109256 }, { "epoch": 9.429951690821255, "grad_norm": 1.0968706607818604, "learning_rate": 0.001, "loss": 2.1709, "step": 109312 }, { "epoch": 9.434782608695652, "grad_norm": 3.0567328929901123, "learning_rate": 0.001, "loss": 2.1692, "step": 109368 }, { "epoch": 9.439613526570048, "grad_norm": 2.960401773452759, "learning_rate": 0.001, "loss": 2.1677, "step": 109424 }, { "epoch": 9.444444444444445, "grad_norm": 0.9102815985679626, "learning_rate": 0.001, "loss": 2.1849, "step": 109480 }, { "epoch": 9.44927536231884, "grad_norm": 1.1373631954193115, "learning_rate": 0.001, "loss": 2.1796, "step": 109536 }, { "epoch": 9.454106280193237, "grad_norm": 0.7736930847167969, "learning_rate": 0.001, "loss": 2.1781, "step": 109592 }, { "epoch": 9.458937198067632, "grad_norm": 1.9168815612792969, "learning_rate": 0.001, "loss": 2.1783, "step": 109648 }, { "epoch": 9.46376811594203, "grad_norm": 1.665312647819519, "learning_rate": 0.001, "loss": 2.1805, "step": 109704 }, { "epoch": 9.468599033816425, "grad_norm": 0.9396242499351501, "learning_rate": 0.001, "loss": 2.1662, "step": 109760 }, { "epoch": 9.473429951690822, "grad_norm": 0.7805250883102417, "learning_rate": 0.001, "loss": 2.1605, "step": 109816 }, { "epoch": 9.478260869565217, "grad_norm": 0.7406161427497864, "learning_rate": 0.001, "loss": 2.1566, "step": 109872 }, { "epoch": 9.483091787439614, "grad_norm": 1.0757853984832764, "learning_rate": 0.001, "loss": 2.1488, "step": 109928 }, { "epoch": 9.48792270531401, "grad_norm": 0.8147990107536316, "learning_rate": 0.001, "loss": 2.156, "step": 109984 }, { "epoch": 9.492753623188406, "grad_norm": 1.850212574005127, "learning_rate": 0.001, "loss": 2.1587, "step": 110040 }, { "epoch": 9.497584541062801, "grad_norm": 2.406399965286255, "learning_rate": 0.001, "loss": 2.1521, "step": 110096 }, { "epoch": 9.502415458937199, "grad_norm": 1.3661991357803345, "learning_rate": 0.001, "loss": 2.1549, "step": 110152 }, { "epoch": 9.507246376811594, "grad_norm": 53.109100341796875, "learning_rate": 0.001, "loss": 2.1556, "step": 110208 }, { "epoch": 9.51207729468599, "grad_norm": 1.399226427078247, "learning_rate": 0.001, "loss": 2.1653, "step": 110264 }, { "epoch": 9.516908212560386, "grad_norm": 0.9833760857582092, "learning_rate": 0.001, "loss": 2.1491, "step": 110320 }, { "epoch": 9.521739130434783, "grad_norm": 3.0959413051605225, "learning_rate": 0.001, "loss": 2.1497, "step": 110376 }, { "epoch": 9.526570048309178, "grad_norm": 1.8578912019729614, "learning_rate": 0.001, "loss": 2.1451, "step": 110432 }, { "epoch": 9.531400966183575, "grad_norm": 3.6915457248687744, "learning_rate": 0.001, "loss": 2.1632, "step": 110488 }, { "epoch": 9.53623188405797, "grad_norm": 0.5958594679832458, "learning_rate": 0.001, "loss": 2.1623, "step": 110544 }, { "epoch": 9.541062801932368, "grad_norm": 1.164693832397461, "learning_rate": 0.001, "loss": 2.1522, "step": 110600 }, { "epoch": 9.545893719806763, "grad_norm": 1.7256300449371338, "learning_rate": 0.001, "loss": 2.1501, "step": 110656 }, { "epoch": 9.55072463768116, "grad_norm": 1.6894583702087402, "learning_rate": 0.001, "loss": 2.1586, "step": 110712 }, { "epoch": 9.555555555555555, "grad_norm": 17.32471466064453, "learning_rate": 0.001, "loss": 2.1679, "step": 110768 }, { "epoch": 9.560386473429952, "grad_norm": 1.196189522743225, "learning_rate": 0.001, "loss": 2.17, "step": 110824 }, { "epoch": 9.565217391304348, "grad_norm": 1.9024502038955688, "learning_rate": 0.001, "loss": 2.152, "step": 110880 }, { "epoch": 9.570048309178745, "grad_norm": 1.2744781970977783, "learning_rate": 0.001, "loss": 2.1484, "step": 110936 }, { "epoch": 9.57487922705314, "grad_norm": 1.4014837741851807, "learning_rate": 0.001, "loss": 2.1663, "step": 110992 }, { "epoch": 9.579710144927537, "grad_norm": 1.0443902015686035, "learning_rate": 0.001, "loss": 2.1792, "step": 111048 }, { "epoch": 9.584541062801932, "grad_norm": 1.8221991062164307, "learning_rate": 0.001, "loss": 2.1664, "step": 111104 }, { "epoch": 9.58937198067633, "grad_norm": 0.8239999413490295, "learning_rate": 0.001, "loss": 2.1849, "step": 111160 }, { "epoch": 9.594202898550725, "grad_norm": 3.60684871673584, "learning_rate": 0.001, "loss": 2.1722, "step": 111216 }, { "epoch": 9.59903381642512, "grad_norm": 1.3424363136291504, "learning_rate": 0.001, "loss": 2.1686, "step": 111272 }, { "epoch": 9.603864734299517, "grad_norm": 0.8992599844932556, "learning_rate": 0.001, "loss": 2.1637, "step": 111328 }, { "epoch": 9.608695652173914, "grad_norm": 1.7839155197143555, "learning_rate": 0.001, "loss": 2.1564, "step": 111384 }, { "epoch": 9.61352657004831, "grad_norm": 2.079591751098633, "learning_rate": 0.001, "loss": 2.1632, "step": 111440 }, { "epoch": 9.618357487922705, "grad_norm": 1.8810837268829346, "learning_rate": 0.001, "loss": 2.1653, "step": 111496 }, { "epoch": 9.623188405797102, "grad_norm": 3.459331512451172, "learning_rate": 0.001, "loss": 2.1547, "step": 111552 }, { "epoch": 9.628019323671497, "grad_norm": 1.6049835681915283, "learning_rate": 0.001, "loss": 2.1696, "step": 111608 }, { "epoch": 9.632850241545894, "grad_norm": 1.325453758239746, "learning_rate": 0.001, "loss": 2.1868, "step": 111664 }, { "epoch": 9.63768115942029, "grad_norm": 4.009735107421875, "learning_rate": 0.001, "loss": 2.1699, "step": 111720 }, { "epoch": 9.642512077294686, "grad_norm": 1.6072872877120972, "learning_rate": 0.001, "loss": 2.166, "step": 111776 }, { "epoch": 9.647342995169081, "grad_norm": 1.1499840021133423, "learning_rate": 0.001, "loss": 2.1646, "step": 111832 }, { "epoch": 9.652173913043478, "grad_norm": 0.8730283975601196, "learning_rate": 0.001, "loss": 2.1686, "step": 111888 }, { "epoch": 9.657004830917874, "grad_norm": 1.9930415153503418, "learning_rate": 0.001, "loss": 2.1654, "step": 111944 }, { "epoch": 9.66183574879227, "grad_norm": 1.2306714057922363, "learning_rate": 0.001, "loss": 2.1788, "step": 112000 }, { "epoch": 9.666666666666666, "grad_norm": 2.412792444229126, "learning_rate": 0.001, "loss": 2.1812, "step": 112056 }, { "epoch": 9.671497584541063, "grad_norm": 1.0699572563171387, "learning_rate": 0.001, "loss": 2.1845, "step": 112112 }, { "epoch": 9.676328502415458, "grad_norm": 5.60946798324585, "learning_rate": 0.001, "loss": 2.1791, "step": 112168 }, { "epoch": 9.681159420289855, "grad_norm": 2.9559884071350098, "learning_rate": 0.001, "loss": 2.1961, "step": 112224 }, { "epoch": 9.68599033816425, "grad_norm": 1.124289631843567, "learning_rate": 0.001, "loss": 2.1916, "step": 112280 }, { "epoch": 9.690821256038648, "grad_norm": 1.33521568775177, "learning_rate": 0.001, "loss": 2.1817, "step": 112336 }, { "epoch": 9.695652173913043, "grad_norm": 1.4196842908859253, "learning_rate": 0.001, "loss": 2.1772, "step": 112392 }, { "epoch": 9.70048309178744, "grad_norm": 1.963876724243164, "learning_rate": 0.001, "loss": 2.1909, "step": 112448 }, { "epoch": 9.705314009661835, "grad_norm": 2.2582218647003174, "learning_rate": 0.001, "loss": 2.1939, "step": 112504 }, { "epoch": 9.710144927536232, "grad_norm": 2.133075714111328, "learning_rate": 0.001, "loss": 2.1901, "step": 112560 }, { "epoch": 9.714975845410628, "grad_norm": 0.8689550161361694, "learning_rate": 0.001, "loss": 2.1928, "step": 112616 }, { "epoch": 9.719806763285025, "grad_norm": 1.3315383195877075, "learning_rate": 0.001, "loss": 2.1973, "step": 112672 }, { "epoch": 9.72463768115942, "grad_norm": 1.694207787513733, "learning_rate": 0.001, "loss": 2.1817, "step": 112728 }, { "epoch": 9.729468599033817, "grad_norm": 0.88382488489151, "learning_rate": 0.001, "loss": 2.1899, "step": 112784 }, { "epoch": 9.734299516908212, "grad_norm": 1.7167845964431763, "learning_rate": 0.001, "loss": 2.2142, "step": 112840 }, { "epoch": 9.73913043478261, "grad_norm": 3.2816264629364014, "learning_rate": 0.001, "loss": 2.2034, "step": 112896 }, { "epoch": 9.743961352657005, "grad_norm": 9.500353813171387, "learning_rate": 0.001, "loss": 2.2049, "step": 112952 }, { "epoch": 9.748792270531402, "grad_norm": 14.029086112976074, "learning_rate": 0.001, "loss": 2.2004, "step": 113008 }, { "epoch": 9.753623188405797, "grad_norm": 3.5523064136505127, "learning_rate": 0.001, "loss": 2.1968, "step": 113064 }, { "epoch": 9.758454106280194, "grad_norm": 2.552189350128174, "learning_rate": 0.001, "loss": 2.1881, "step": 113120 }, { "epoch": 9.76328502415459, "grad_norm": 1.7633849382400513, "learning_rate": 0.001, "loss": 2.1924, "step": 113176 }, { "epoch": 9.768115942028986, "grad_norm": 1.5622775554656982, "learning_rate": 0.001, "loss": 2.1879, "step": 113232 }, { "epoch": 9.772946859903382, "grad_norm": 1.186415672302246, "learning_rate": 0.001, "loss": 2.1956, "step": 113288 }, { "epoch": 9.777777777777779, "grad_norm": 1.710774302482605, "learning_rate": 0.001, "loss": 2.1866, "step": 113344 }, { "epoch": 9.782608695652174, "grad_norm": 1.7137800455093384, "learning_rate": 0.001, "loss": 2.1871, "step": 113400 }, { "epoch": 9.78743961352657, "grad_norm": 1.5322470664978027, "learning_rate": 0.001, "loss": 2.1924, "step": 113456 }, { "epoch": 9.792270531400966, "grad_norm": 1.3550103902816772, "learning_rate": 0.001, "loss": 2.1964, "step": 113512 }, { "epoch": 9.797101449275363, "grad_norm": 1.6661609411239624, "learning_rate": 0.001, "loss": 2.1969, "step": 113568 }, { "epoch": 9.801932367149758, "grad_norm": 1.63509202003479, "learning_rate": 0.001, "loss": 2.1916, "step": 113624 }, { "epoch": 9.806763285024154, "grad_norm": 1.312102198600769, "learning_rate": 0.001, "loss": 2.2097, "step": 113680 }, { "epoch": 9.81159420289855, "grad_norm": 1.6421797275543213, "learning_rate": 0.001, "loss": 2.1935, "step": 113736 }, { "epoch": 9.816425120772946, "grad_norm": 1.615949273109436, "learning_rate": 0.001, "loss": 2.1851, "step": 113792 }, { "epoch": 9.821256038647343, "grad_norm": 3.91939115524292, "learning_rate": 0.001, "loss": 2.1883, "step": 113848 }, { "epoch": 9.826086956521738, "grad_norm": 1.369583249092102, "learning_rate": 0.001, "loss": 2.2042, "step": 113904 }, { "epoch": 9.830917874396135, "grad_norm": 3.2655386924743652, "learning_rate": 0.001, "loss": 2.1877, "step": 113960 }, { "epoch": 9.83574879227053, "grad_norm": 0.9581074714660645, "learning_rate": 0.001, "loss": 2.2027, "step": 114016 }, { "epoch": 9.840579710144928, "grad_norm": 3.2936856746673584, "learning_rate": 0.001, "loss": 2.1996, "step": 114072 }, { "epoch": 9.845410628019323, "grad_norm": 1.1268800497055054, "learning_rate": 0.001, "loss": 2.1959, "step": 114128 }, { "epoch": 9.85024154589372, "grad_norm": 1.1536705493927002, "learning_rate": 0.001, "loss": 2.1947, "step": 114184 }, { "epoch": 9.855072463768115, "grad_norm": 1.005270004272461, "learning_rate": 0.001, "loss": 2.1832, "step": 114240 }, { "epoch": 9.859903381642512, "grad_norm": 4.5433783531188965, "learning_rate": 0.001, "loss": 2.182, "step": 114296 }, { "epoch": 9.864734299516908, "grad_norm": 1.4915143251419067, "learning_rate": 0.001, "loss": 2.1697, "step": 114352 }, { "epoch": 9.869565217391305, "grad_norm": 1.7086212635040283, "learning_rate": 0.001, "loss": 2.1653, "step": 114408 }, { "epoch": 9.8743961352657, "grad_norm": 2.2827277183532715, "learning_rate": 0.001, "loss": 2.1818, "step": 114464 }, { "epoch": 9.879227053140097, "grad_norm": 2.942509412765503, "learning_rate": 0.001, "loss": 2.1755, "step": 114520 }, { "epoch": 9.884057971014492, "grad_norm": 3.590705633163452, "learning_rate": 0.001, "loss": 2.1699, "step": 114576 }, { "epoch": 9.88888888888889, "grad_norm": 9.507326126098633, "learning_rate": 0.001, "loss": 2.1685, "step": 114632 }, { "epoch": 9.893719806763285, "grad_norm": 1.8674633502960205, "learning_rate": 0.001, "loss": 2.1755, "step": 114688 }, { "epoch": 9.898550724637682, "grad_norm": 1.510111689567566, "learning_rate": 0.001, "loss": 2.1737, "step": 114744 }, { "epoch": 9.903381642512077, "grad_norm": 1.132345199584961, "learning_rate": 0.001, "loss": 2.1684, "step": 114800 }, { "epoch": 9.908212560386474, "grad_norm": 2.128943920135498, "learning_rate": 0.001, "loss": 2.1788, "step": 114856 }, { "epoch": 9.91304347826087, "grad_norm": 0.9413778781890869, "learning_rate": 0.001, "loss": 2.1952, "step": 114912 }, { "epoch": 9.917874396135266, "grad_norm": 1.1830946207046509, "learning_rate": 0.001, "loss": 2.1836, "step": 114968 }, { "epoch": 9.922705314009661, "grad_norm": 2.6084482669830322, "learning_rate": 0.001, "loss": 2.1742, "step": 115024 }, { "epoch": 9.927536231884059, "grad_norm": 2.902265787124634, "learning_rate": 0.001, "loss": 2.1742, "step": 115080 }, { "epoch": 9.932367149758454, "grad_norm": 0.7828874588012695, "learning_rate": 0.001, "loss": 2.1884, "step": 115136 }, { "epoch": 9.93719806763285, "grad_norm": 1.3071789741516113, "learning_rate": 0.001, "loss": 2.1708, "step": 115192 }, { "epoch": 9.942028985507246, "grad_norm": 2.4230844974517822, "learning_rate": 0.001, "loss": 2.1827, "step": 115248 }, { "epoch": 9.946859903381643, "grad_norm": 1.1827428340911865, "learning_rate": 0.001, "loss": 2.1866, "step": 115304 }, { "epoch": 9.951690821256038, "grad_norm": 0.6334431171417236, "learning_rate": 0.001, "loss": 2.1995, "step": 115360 }, { "epoch": 9.956521739130435, "grad_norm": 1.0589326620101929, "learning_rate": 0.001, "loss": 2.1958, "step": 115416 }, { "epoch": 9.96135265700483, "grad_norm": 0.9200308322906494, "learning_rate": 0.001, "loss": 2.1966, "step": 115472 }, { "epoch": 9.966183574879228, "grad_norm": 4.167257308959961, "learning_rate": 0.001, "loss": 2.2287, "step": 115528 }, { "epoch": 9.971014492753623, "grad_norm": 1.3077119588851929, "learning_rate": 0.001, "loss": 2.2365, "step": 115584 }, { "epoch": 9.97584541062802, "grad_norm": 1.1540191173553467, "learning_rate": 0.001, "loss": 2.2286, "step": 115640 }, { "epoch": 9.980676328502415, "grad_norm": 0.6693556308746338, "learning_rate": 0.001, "loss": 2.233, "step": 115696 }, { "epoch": 9.985507246376812, "grad_norm": 0.5801311135292053, "learning_rate": 0.001, "loss": 2.2333, "step": 115752 }, { "epoch": 9.990338164251208, "grad_norm": 2.2549819946289062, "learning_rate": 0.001, "loss": 2.2139, "step": 115808 }, { "epoch": 9.995169082125603, "grad_norm": 1.105181097984314, "learning_rate": 0.001, "loss": 2.1973, "step": 115864 }, { "epoch": 10.0, "grad_norm": 1.5114330053329468, "learning_rate": 0.001, "loss": 2.2032, "step": 115920 }, { "epoch": 10.004830917874395, "grad_norm": 5.178586483001709, "learning_rate": 0.001, "loss": 2.1612, "step": 115976 }, { "epoch": 10.009661835748792, "grad_norm": 1.6251518726348877, "learning_rate": 0.001, "loss": 2.1602, "step": 116032 }, { "epoch": 10.014492753623188, "grad_norm": 2.286675453186035, "learning_rate": 0.001, "loss": 2.1547, "step": 116088 }, { "epoch": 10.019323671497585, "grad_norm": 1.1216520071029663, "learning_rate": 0.001, "loss": 2.1655, "step": 116144 }, { "epoch": 10.02415458937198, "grad_norm": 2.1495821475982666, "learning_rate": 0.001, "loss": 2.1414, "step": 116200 }, { "epoch": 10.028985507246377, "grad_norm": 1.5574885606765747, "learning_rate": 0.001, "loss": 2.1341, "step": 116256 }, { "epoch": 10.033816425120772, "grad_norm": 1.5737286806106567, "learning_rate": 0.001, "loss": 2.1399, "step": 116312 }, { "epoch": 10.03864734299517, "grad_norm": 6.003429412841797, "learning_rate": 0.001, "loss": 2.135, "step": 116368 }, { "epoch": 10.043478260869565, "grad_norm": 1.8048330545425415, "learning_rate": 0.001, "loss": 2.1464, "step": 116424 }, { "epoch": 10.048309178743962, "grad_norm": 1.5536140203475952, "learning_rate": 0.001, "loss": 2.1513, "step": 116480 }, { "epoch": 10.053140096618357, "grad_norm": 7.6224775314331055, "learning_rate": 0.001, "loss": 2.1605, "step": 116536 }, { "epoch": 10.057971014492754, "grad_norm": 1.0521200895309448, "learning_rate": 0.001, "loss": 2.1657, "step": 116592 }, { "epoch": 10.06280193236715, "grad_norm": 1.350377082824707, "learning_rate": 0.001, "loss": 2.1852, "step": 116648 }, { "epoch": 10.067632850241546, "grad_norm": 1.0037842988967896, "learning_rate": 0.001, "loss": 2.178, "step": 116704 }, { "epoch": 10.072463768115941, "grad_norm": 0.8875685930252075, "learning_rate": 0.001, "loss": 2.172, "step": 116760 }, { "epoch": 10.077294685990339, "grad_norm": 4.938282012939453, "learning_rate": 0.001, "loss": 2.1573, "step": 116816 }, { "epoch": 10.082125603864734, "grad_norm": 0.5736998915672302, "learning_rate": 0.001, "loss": 2.1408, "step": 116872 }, { "epoch": 10.08695652173913, "grad_norm": 1.6928740739822388, "learning_rate": 0.001, "loss": 2.1426, "step": 116928 }, { "epoch": 10.091787439613526, "grad_norm": 0.7179147005081177, "learning_rate": 0.001, "loss": 2.1476, "step": 116984 }, { "epoch": 10.096618357487923, "grad_norm": 1.4036375284194946, "learning_rate": 0.001, "loss": 2.1505, "step": 117040 }, { "epoch": 10.101449275362318, "grad_norm": 1.5228289365768433, "learning_rate": 0.001, "loss": 2.1659, "step": 117096 }, { "epoch": 10.106280193236715, "grad_norm": 1.519679307937622, "learning_rate": 0.001, "loss": 2.1831, "step": 117152 }, { "epoch": 10.11111111111111, "grad_norm": 1.7719697952270508, "learning_rate": 0.001, "loss": 2.1725, "step": 117208 }, { "epoch": 10.115942028985508, "grad_norm": 1.4096169471740723, "learning_rate": 0.001, "loss": 2.1852, "step": 117264 }, { "epoch": 10.120772946859903, "grad_norm": 1.4333758354187012, "learning_rate": 0.001, "loss": 2.1574, "step": 117320 }, { "epoch": 10.1256038647343, "grad_norm": 2.2329275608062744, "learning_rate": 0.001, "loss": 2.1499, "step": 117376 }, { "epoch": 10.130434782608695, "grad_norm": 1.9216878414154053, "learning_rate": 0.001, "loss": 2.1551, "step": 117432 }, { "epoch": 10.135265700483092, "grad_norm": 1.291159987449646, "learning_rate": 0.001, "loss": 2.1671, "step": 117488 }, { "epoch": 10.140096618357488, "grad_norm": 3.5178771018981934, "learning_rate": 0.001, "loss": 2.1656, "step": 117544 }, { "epoch": 10.144927536231885, "grad_norm": 0.962126612663269, "learning_rate": 0.001, "loss": 2.1668, "step": 117600 }, { "epoch": 10.14975845410628, "grad_norm": 0.7964538335800171, "learning_rate": 0.001, "loss": 2.1575, "step": 117656 }, { "epoch": 10.154589371980677, "grad_norm": 0.5703120231628418, "learning_rate": 0.001, "loss": 2.1566, "step": 117712 }, { "epoch": 10.159420289855072, "grad_norm": 6.9353461265563965, "learning_rate": 0.001, "loss": 2.1644, "step": 117768 }, { "epoch": 10.16425120772947, "grad_norm": 2.117323875427246, "learning_rate": 0.001, "loss": 2.1638, "step": 117824 }, { "epoch": 10.169082125603865, "grad_norm": 1.7028167247772217, "learning_rate": 0.001, "loss": 2.1622, "step": 117880 }, { "epoch": 10.173913043478262, "grad_norm": 2.015174150466919, "learning_rate": 0.001, "loss": 2.1675, "step": 117936 }, { "epoch": 10.178743961352657, "grad_norm": 1.4474927186965942, "learning_rate": 0.001, "loss": 2.1773, "step": 117992 }, { "epoch": 10.183574879227054, "grad_norm": 1.9509433507919312, "learning_rate": 0.001, "loss": 2.1792, "step": 118048 }, { "epoch": 10.18840579710145, "grad_norm": 3.8931899070739746, "learning_rate": 0.001, "loss": 2.1739, "step": 118104 }, { "epoch": 10.193236714975846, "grad_norm": 11.294742584228516, "learning_rate": 0.001, "loss": 2.1798, "step": 118160 }, { "epoch": 10.198067632850242, "grad_norm": 1.359959363937378, "learning_rate": 0.001, "loss": 2.1741, "step": 118216 }, { "epoch": 10.202898550724637, "grad_norm": 1.4701234102249146, "learning_rate": 0.001, "loss": 2.1738, "step": 118272 }, { "epoch": 10.207729468599034, "grad_norm": 5.7130632400512695, "learning_rate": 0.001, "loss": 2.1667, "step": 118328 }, { "epoch": 10.21256038647343, "grad_norm": 11.105052947998047, "learning_rate": 0.001, "loss": 2.1774, "step": 118384 }, { "epoch": 10.217391304347826, "grad_norm": 1.362142562866211, "learning_rate": 0.001, "loss": 2.1781, "step": 118440 }, { "epoch": 10.222222222222221, "grad_norm": 1.7012754678726196, "learning_rate": 0.001, "loss": 2.1578, "step": 118496 }, { "epoch": 10.227053140096618, "grad_norm": 2.72023606300354, "learning_rate": 0.001, "loss": 2.1584, "step": 118552 }, { "epoch": 10.231884057971014, "grad_norm": 2.6579036712646484, "learning_rate": 0.001, "loss": 2.1582, "step": 118608 }, { "epoch": 10.23671497584541, "grad_norm": 3.486313581466675, "learning_rate": 0.001, "loss": 2.1577, "step": 118664 }, { "epoch": 10.241545893719806, "grad_norm": 4.33111572265625, "learning_rate": 0.001, "loss": 2.1522, "step": 118720 }, { "epoch": 10.246376811594203, "grad_norm": 0.9709329009056091, "learning_rate": 0.001, "loss": 2.158, "step": 118776 }, { "epoch": 10.251207729468598, "grad_norm": 1.9588534832000732, "learning_rate": 0.001, "loss": 2.1596, "step": 118832 }, { "epoch": 10.256038647342995, "grad_norm": 1.4229439496994019, "learning_rate": 0.001, "loss": 2.1604, "step": 118888 }, { "epoch": 10.26086956521739, "grad_norm": 1.3870952129364014, "learning_rate": 0.001, "loss": 2.1672, "step": 118944 }, { "epoch": 10.265700483091788, "grad_norm": 2.0013091564178467, "learning_rate": 0.001, "loss": 2.1833, "step": 119000 }, { "epoch": 10.270531400966183, "grad_norm": 1.0226377248764038, "learning_rate": 0.001, "loss": 2.1621, "step": 119056 }, { "epoch": 10.27536231884058, "grad_norm": 3.3339829444885254, "learning_rate": 0.001, "loss": 2.1581, "step": 119112 }, { "epoch": 10.280193236714975, "grad_norm": 1.468506097793579, "learning_rate": 0.001, "loss": 2.1621, "step": 119168 }, { "epoch": 10.285024154589372, "grad_norm": 1.8375539779663086, "learning_rate": 0.001, "loss": 2.1681, "step": 119224 }, { "epoch": 10.289855072463768, "grad_norm": 2.368908643722534, "learning_rate": 0.001, "loss": 2.1732, "step": 119280 }, { "epoch": 10.294685990338165, "grad_norm": 0.7699496746063232, "learning_rate": 0.001, "loss": 2.1618, "step": 119336 }, { "epoch": 10.29951690821256, "grad_norm": 5.445796012878418, "learning_rate": 0.001, "loss": 2.1587, "step": 119392 }, { "epoch": 10.304347826086957, "grad_norm": 1.6550136804580688, "learning_rate": 0.001, "loss": 2.1526, "step": 119448 }, { "epoch": 10.309178743961352, "grad_norm": 0.967065155506134, "learning_rate": 0.001, "loss": 2.1569, "step": 119504 }, { "epoch": 10.31400966183575, "grad_norm": 0.7379729151725769, "learning_rate": 0.001, "loss": 2.1524, "step": 119560 }, { "epoch": 10.318840579710145, "grad_norm": 1.800350308418274, "learning_rate": 0.001, "loss": 2.1544, "step": 119616 }, { "epoch": 10.323671497584542, "grad_norm": 1.2661592960357666, "learning_rate": 0.001, "loss": 2.1591, "step": 119672 }, { "epoch": 10.328502415458937, "grad_norm": 4.901576519012451, "learning_rate": 0.001, "loss": 2.1428, "step": 119728 }, { "epoch": 10.333333333333334, "grad_norm": 2.654900312423706, "learning_rate": 0.001, "loss": 2.145, "step": 119784 }, { "epoch": 10.33816425120773, "grad_norm": 2.5500903129577637, "learning_rate": 0.001, "loss": 2.1315, "step": 119840 }, { "epoch": 10.342995169082126, "grad_norm": 1.9111007452011108, "learning_rate": 0.001, "loss": 2.1436, "step": 119896 }, { "epoch": 10.347826086956522, "grad_norm": 1.2700903415679932, "learning_rate": 0.001, "loss": 2.1443, "step": 119952 }, { "epoch": 10.352657004830919, "grad_norm": 1.0220659971237183, "learning_rate": 0.001, "loss": 2.142, "step": 120008 }, { "epoch": 10.357487922705314, "grad_norm": 1.5118297338485718, "learning_rate": 0.001, "loss": 2.1379, "step": 120064 }, { "epoch": 10.36231884057971, "grad_norm": 0.7818297147750854, "learning_rate": 0.001, "loss": 2.1425, "step": 120120 }, { "epoch": 10.367149758454106, "grad_norm": 2.0427939891815186, "learning_rate": 0.001, "loss": 2.1497, "step": 120176 }, { "epoch": 10.371980676328503, "grad_norm": 3.5443224906921387, "learning_rate": 0.001, "loss": 2.1559, "step": 120232 }, { "epoch": 10.376811594202898, "grad_norm": 2.7562294006347656, "learning_rate": 0.001, "loss": 2.1672, "step": 120288 }, { "epoch": 10.381642512077295, "grad_norm": 1.0163748264312744, "learning_rate": 0.001, "loss": 2.1703, "step": 120344 }, { "epoch": 10.38647342995169, "grad_norm": 0.8418638706207275, "learning_rate": 0.001, "loss": 2.162, "step": 120400 }, { "epoch": 10.391304347826088, "grad_norm": 0.8190461993217468, "learning_rate": 0.001, "loss": 2.1607, "step": 120456 }, { "epoch": 10.396135265700483, "grad_norm": 1.17571222782135, "learning_rate": 0.001, "loss": 2.1501, "step": 120512 }, { "epoch": 10.40096618357488, "grad_norm": 1.0930935144424438, "learning_rate": 0.001, "loss": 2.1556, "step": 120568 }, { "epoch": 10.405797101449275, "grad_norm": 0.7186071276664734, "learning_rate": 0.001, "loss": 2.147, "step": 120624 }, { "epoch": 10.41062801932367, "grad_norm": 0.8140950798988342, "learning_rate": 0.001, "loss": 2.1433, "step": 120680 }, { "epoch": 10.415458937198068, "grad_norm": 1.202014684677124, "learning_rate": 0.001, "loss": 2.1453, "step": 120736 }, { "epoch": 10.420289855072463, "grad_norm": 0.7525999546051025, "learning_rate": 0.001, "loss": 2.1478, "step": 120792 }, { "epoch": 10.42512077294686, "grad_norm": 0.7609923481941223, "learning_rate": 0.001, "loss": 2.1499, "step": 120848 }, { "epoch": 10.429951690821255, "grad_norm": 5.396042823791504, "learning_rate": 0.001, "loss": 2.1476, "step": 120904 }, { "epoch": 10.434782608695652, "grad_norm": 1.032334804534912, "learning_rate": 0.001, "loss": 2.1488, "step": 120960 }, { "epoch": 10.439613526570048, "grad_norm": 1.0725252628326416, "learning_rate": 0.001, "loss": 2.1625, "step": 121016 }, { "epoch": 10.444444444444445, "grad_norm": 0.7636986374855042, "learning_rate": 0.001, "loss": 2.1707, "step": 121072 }, { "epoch": 10.44927536231884, "grad_norm": 1.3219259977340698, "learning_rate": 0.001, "loss": 2.1801, "step": 121128 }, { "epoch": 10.454106280193237, "grad_norm": 1.2190625667572021, "learning_rate": 0.001, "loss": 2.1805, "step": 121184 }, { "epoch": 10.458937198067632, "grad_norm": 2.133542776107788, "learning_rate": 0.001, "loss": 2.1935, "step": 121240 }, { "epoch": 10.46376811594203, "grad_norm": 6.187461853027344, "learning_rate": 0.001, "loss": 2.1962, "step": 121296 }, { "epoch": 10.468599033816425, "grad_norm": 1.1827586889266968, "learning_rate": 0.001, "loss": 2.1904, "step": 121352 }, { "epoch": 10.473429951690822, "grad_norm": 1.6734812259674072, "learning_rate": 0.001, "loss": 2.1576, "step": 121408 }, { "epoch": 10.478260869565217, "grad_norm": 1.8922410011291504, "learning_rate": 0.001, "loss": 2.1535, "step": 121464 }, { "epoch": 10.483091787439614, "grad_norm": 0.827451765537262, "learning_rate": 0.001, "loss": 2.1394, "step": 121520 }, { "epoch": 10.48792270531401, "grad_norm": 2.311612844467163, "learning_rate": 0.001, "loss": 2.1376, "step": 121576 }, { "epoch": 10.492753623188406, "grad_norm": 1.1984610557556152, "learning_rate": 0.001, "loss": 2.1345, "step": 121632 }, { "epoch": 10.497584541062801, "grad_norm": 1.046425223350525, "learning_rate": 0.001, "loss": 2.1359, "step": 121688 }, { "epoch": 10.502415458937199, "grad_norm": 3.223189115524292, "learning_rate": 0.001, "loss": 2.1565, "step": 121744 }, { "epoch": 10.507246376811594, "grad_norm": 1.6878868341445923, "learning_rate": 0.001, "loss": 2.1459, "step": 121800 }, { "epoch": 10.51207729468599, "grad_norm": 0.9725037813186646, "learning_rate": 0.001, "loss": 2.1424, "step": 121856 }, { "epoch": 10.516908212560386, "grad_norm": 0.5949786305427551, "learning_rate": 0.001, "loss": 2.1485, "step": 121912 }, { "epoch": 10.521739130434783, "grad_norm": 1.2117705345153809, "learning_rate": 0.001, "loss": 2.1444, "step": 121968 }, { "epoch": 10.526570048309178, "grad_norm": 1.1697850227355957, "learning_rate": 0.001, "loss": 2.1415, "step": 122024 }, { "epoch": 10.531400966183575, "grad_norm": 2.1525418758392334, "learning_rate": 0.001, "loss": 2.1492, "step": 122080 }, { "epoch": 10.53623188405797, "grad_norm": 5.725024700164795, "learning_rate": 0.001, "loss": 2.1523, "step": 122136 }, { "epoch": 10.541062801932368, "grad_norm": 2.6298274993896484, "learning_rate": 0.001, "loss": 2.1799, "step": 122192 }, { "epoch": 10.545893719806763, "grad_norm": 2.074086904525757, "learning_rate": 0.001, "loss": 2.1718, "step": 122248 }, { "epoch": 10.55072463768116, "grad_norm": 2.927433967590332, "learning_rate": 0.001, "loss": 2.1838, "step": 122304 }, { "epoch": 10.555555555555555, "grad_norm": 2.6468324661254883, "learning_rate": 0.001, "loss": 2.1839, "step": 122360 }, { "epoch": 10.560386473429952, "grad_norm": 3.347214698791504, "learning_rate": 0.001, "loss": 2.1812, "step": 122416 }, { "epoch": 10.565217391304348, "grad_norm": 1.0131034851074219, "learning_rate": 0.001, "loss": 2.1923, "step": 122472 }, { "epoch": 10.570048309178745, "grad_norm": 1.5336928367614746, "learning_rate": 0.001, "loss": 2.1805, "step": 122528 }, { "epoch": 10.57487922705314, "grad_norm": 5.579852104187012, "learning_rate": 0.001, "loss": 2.1784, "step": 122584 }, { "epoch": 10.579710144927537, "grad_norm": 3.902099609375, "learning_rate": 0.001, "loss": 2.1826, "step": 122640 }, { "epoch": 10.584541062801932, "grad_norm": 1.4280476570129395, "learning_rate": 0.001, "loss": 2.179, "step": 122696 }, { "epoch": 10.58937198067633, "grad_norm": 2.199371814727783, "learning_rate": 0.001, "loss": 2.1884, "step": 122752 }, { "epoch": 10.594202898550725, "grad_norm": 1.8500422239303589, "learning_rate": 0.001, "loss": 2.1975, "step": 122808 }, { "epoch": 10.59903381642512, "grad_norm": 1.2162278890609741, "learning_rate": 0.001, "loss": 2.1831, "step": 122864 }, { "epoch": 10.603864734299517, "grad_norm": 1.2840019464492798, "learning_rate": 0.001, "loss": 2.1973, "step": 122920 }, { "epoch": 10.608695652173914, "grad_norm": 1.730076789855957, "learning_rate": 0.001, "loss": 2.2062, "step": 122976 }, { "epoch": 10.61352657004831, "grad_norm": 1.1242142915725708, "learning_rate": 0.001, "loss": 2.1959, "step": 123032 }, { "epoch": 10.618357487922705, "grad_norm": 3.820575714111328, "learning_rate": 0.001, "loss": 2.1967, "step": 123088 }, { "epoch": 10.623188405797102, "grad_norm": 2.3786606788635254, "learning_rate": 0.001, "loss": 2.1959, "step": 123144 }, { "epoch": 10.628019323671497, "grad_norm": 4.595869541168213, "learning_rate": 0.001, "loss": 2.1948, "step": 123200 }, { "epoch": 10.632850241545894, "grad_norm": 1.2908978462219238, "learning_rate": 0.001, "loss": 2.166, "step": 123256 }, { "epoch": 10.63768115942029, "grad_norm": 2.2055788040161133, "learning_rate": 0.001, "loss": 2.1674, "step": 123312 }, { "epoch": 10.642512077294686, "grad_norm": 1.5530894994735718, "learning_rate": 0.001, "loss": 2.1765, "step": 123368 }, { "epoch": 10.647342995169081, "grad_norm": 2.4489030838012695, "learning_rate": 0.001, "loss": 2.1696, "step": 123424 }, { "epoch": 10.652173913043478, "grad_norm": 1.9906662702560425, "learning_rate": 0.001, "loss": 2.1919, "step": 123480 }, { "epoch": 10.657004830917874, "grad_norm": 2.92899489402771, "learning_rate": 0.001, "loss": 2.1816, "step": 123536 }, { "epoch": 10.66183574879227, "grad_norm": 1.9007189273834229, "learning_rate": 0.001, "loss": 2.2027, "step": 123592 }, { "epoch": 10.666666666666666, "grad_norm": 1.301443338394165, "learning_rate": 0.001, "loss": 2.2062, "step": 123648 }, { "epoch": 10.671497584541063, "grad_norm": 1.3572661876678467, "learning_rate": 0.001, "loss": 2.2066, "step": 123704 }, { "epoch": 10.676328502415458, "grad_norm": 3.151829242706299, "learning_rate": 0.001, "loss": 2.2135, "step": 123760 }, { "epoch": 10.681159420289855, "grad_norm": 6.817243576049805, "learning_rate": 0.001, "loss": 2.2025, "step": 123816 }, { "epoch": 10.68599033816425, "grad_norm": 2.553074598312378, "learning_rate": 0.001, "loss": 2.1885, "step": 123872 }, { "epoch": 10.690821256038648, "grad_norm": 7.715940952301025, "learning_rate": 0.001, "loss": 2.1763, "step": 123928 }, { "epoch": 10.695652173913043, "grad_norm": 1.651444911956787, "learning_rate": 0.001, "loss": 2.173, "step": 123984 }, { "epoch": 10.70048309178744, "grad_norm": 1.3205150365829468, "learning_rate": 0.001, "loss": 2.1886, "step": 124040 }, { "epoch": 10.705314009661835, "grad_norm": 1.0851335525512695, "learning_rate": 0.001, "loss": 2.1901, "step": 124096 }, { "epoch": 10.710144927536232, "grad_norm": 1.4675530195236206, "learning_rate": 0.001, "loss": 2.1846, "step": 124152 }, { "epoch": 10.714975845410628, "grad_norm": 1.7252501249313354, "learning_rate": 0.001, "loss": 2.1878, "step": 124208 }, { "epoch": 10.719806763285025, "grad_norm": 2.2222039699554443, "learning_rate": 0.001, "loss": 2.1849, "step": 124264 }, { "epoch": 10.72463768115942, "grad_norm": 4.243847370147705, "learning_rate": 0.001, "loss": 2.1787, "step": 124320 }, { "epoch": 10.729468599033817, "grad_norm": 146.34423828125, "learning_rate": 0.001, "loss": 2.1765, "step": 124376 }, { "epoch": 10.734299516908212, "grad_norm": 6.666025638580322, "learning_rate": 0.001, "loss": 2.1898, "step": 124432 }, { "epoch": 10.73913043478261, "grad_norm": 1.2949142456054688, "learning_rate": 0.001, "loss": 2.1871, "step": 124488 }, { "epoch": 10.743961352657005, "grad_norm": 2.8519201278686523, "learning_rate": 0.001, "loss": 2.1809, "step": 124544 }, { "epoch": 10.748792270531402, "grad_norm": 2.52215576171875, "learning_rate": 0.001, "loss": 2.1885, "step": 124600 }, { "epoch": 10.753623188405797, "grad_norm": 1.3800928592681885, "learning_rate": 0.001, "loss": 2.1792, "step": 124656 }, { "epoch": 10.758454106280194, "grad_norm": 1.5619258880615234, "learning_rate": 0.001, "loss": 2.1761, "step": 124712 }, { "epoch": 10.76328502415459, "grad_norm": 5.771429061889648, "learning_rate": 0.001, "loss": 2.1715, "step": 124768 }, { "epoch": 10.768115942028986, "grad_norm": 2.991969108581543, "learning_rate": 0.001, "loss": 2.1681, "step": 124824 }, { "epoch": 10.772946859903382, "grad_norm": 7.448109149932861, "learning_rate": 0.001, "loss": 2.1793, "step": 124880 }, { "epoch": 10.777777777777779, "grad_norm": 2.432189464569092, "learning_rate": 0.001, "loss": 2.2094, "step": 124936 }, { "epoch": 10.782608695652174, "grad_norm": 1.4482777118682861, "learning_rate": 0.001, "loss": 2.1856, "step": 124992 }, { "epoch": 10.78743961352657, "grad_norm": 1.8974546194076538, "learning_rate": 0.001, "loss": 2.1959, "step": 125048 }, { "epoch": 10.792270531400966, "grad_norm": 3.711042881011963, "learning_rate": 0.001, "loss": 2.1835, "step": 125104 }, { "epoch": 10.797101449275363, "grad_norm": 0.7546063661575317, "learning_rate": 0.001, "loss": 2.1694, "step": 125160 }, { "epoch": 10.801932367149758, "grad_norm": 1.4827537536621094, "learning_rate": 0.001, "loss": 2.163, "step": 125216 }, { "epoch": 10.806763285024154, "grad_norm": 1.6204676628112793, "learning_rate": 0.001, "loss": 2.1715, "step": 125272 }, { "epoch": 10.81159420289855, "grad_norm": 3.52702260017395, "learning_rate": 0.001, "loss": 2.1776, "step": 125328 }, { "epoch": 10.816425120772946, "grad_norm": 1.9293352365493774, "learning_rate": 0.001, "loss": 2.1799, "step": 125384 }, { "epoch": 10.821256038647343, "grad_norm": 3.903444528579712, "learning_rate": 0.001, "loss": 2.1913, "step": 125440 }, { "epoch": 10.826086956521738, "grad_norm": 1.6531853675842285, "learning_rate": 0.001, "loss": 2.1973, "step": 125496 }, { "epoch": 10.830917874396135, "grad_norm": 1.8802529573440552, "learning_rate": 0.001, "loss": 2.2041, "step": 125552 }, { "epoch": 10.83574879227053, "grad_norm": 4.121514797210693, "learning_rate": 0.001, "loss": 2.18, "step": 125608 }, { "epoch": 10.840579710144928, "grad_norm": 1.433053731918335, "learning_rate": 0.001, "loss": 2.1811, "step": 125664 }, { "epoch": 10.845410628019323, "grad_norm": 1.7132673263549805, "learning_rate": 0.001, "loss": 2.1901, "step": 125720 }, { "epoch": 10.85024154589372, "grad_norm": 3.892742395401001, "learning_rate": 0.001, "loss": 2.1978, "step": 125776 }, { "epoch": 10.855072463768115, "grad_norm": 1.0258229970932007, "learning_rate": 0.001, "loss": 2.1856, "step": 125832 }, { "epoch": 10.859903381642512, "grad_norm": 1.0017156600952148, "learning_rate": 0.001, "loss": 2.1848, "step": 125888 }, { "epoch": 10.864734299516908, "grad_norm": 4.105360507965088, "learning_rate": 0.001, "loss": 2.1838, "step": 125944 }, { "epoch": 10.869565217391305, "grad_norm": 1.692204236984253, "learning_rate": 0.001, "loss": 2.1758, "step": 126000 }, { "epoch": 10.8743961352657, "grad_norm": 1.0897178649902344, "learning_rate": 0.001, "loss": 2.175, "step": 126056 }, { "epoch": 10.879227053140097, "grad_norm": 1.420247197151184, "learning_rate": 0.001, "loss": 2.1745, "step": 126112 }, { "epoch": 10.884057971014492, "grad_norm": 0.9701505303382874, "learning_rate": 0.001, "loss": 2.1654, "step": 126168 }, { "epoch": 10.88888888888889, "grad_norm": 1.5234839916229248, "learning_rate": 0.001, "loss": 2.1707, "step": 126224 }, { "epoch": 10.893719806763285, "grad_norm": 1.4400521516799927, "learning_rate": 0.001, "loss": 2.1791, "step": 126280 }, { "epoch": 10.898550724637682, "grad_norm": 1.0738338232040405, "learning_rate": 0.001, "loss": 2.1646, "step": 126336 }, { "epoch": 10.903381642512077, "grad_norm": 2.5842297077178955, "learning_rate": 0.001, "loss": 2.1558, "step": 126392 }, { "epoch": 10.908212560386474, "grad_norm": 1.2197184562683105, "learning_rate": 0.001, "loss": 2.151, "step": 126448 }, { "epoch": 10.91304347826087, "grad_norm": 5.161417484283447, "learning_rate": 0.001, "loss": 2.1601, "step": 126504 }, { "epoch": 10.917874396135266, "grad_norm": 3.266233205795288, "learning_rate": 0.001, "loss": 2.1568, "step": 126560 }, { "epoch": 10.922705314009661, "grad_norm": 1.3476234674453735, "learning_rate": 0.001, "loss": 2.1645, "step": 126616 }, { "epoch": 10.927536231884059, "grad_norm": 2.2833454608917236, "learning_rate": 0.001, "loss": 2.1716, "step": 126672 }, { "epoch": 10.932367149758454, "grad_norm": 1.0055052042007446, "learning_rate": 0.001, "loss": 2.1674, "step": 126728 }, { "epoch": 10.93719806763285, "grad_norm": 1.4048173427581787, "learning_rate": 0.001, "loss": 2.164, "step": 126784 }, { "epoch": 10.942028985507246, "grad_norm": 1.6197589635849, "learning_rate": 0.001, "loss": 2.1719, "step": 126840 }, { "epoch": 10.946859903381643, "grad_norm": 1.6318053007125854, "learning_rate": 0.001, "loss": 2.1755, "step": 126896 }, { "epoch": 10.951690821256038, "grad_norm": 3.1759567260742188, "learning_rate": 0.001, "loss": 2.1653, "step": 126952 }, { "epoch": 10.956521739130435, "grad_norm": 2.743807554244995, "learning_rate": 0.001, "loss": 2.165, "step": 127008 }, { "epoch": 10.96135265700483, "grad_norm": 1.2281099557876587, "learning_rate": 0.001, "loss": 2.1625, "step": 127064 }, { "epoch": 10.966183574879228, "grad_norm": 1.0311485528945923, "learning_rate": 0.001, "loss": 2.1712, "step": 127120 }, { "epoch": 10.971014492753623, "grad_norm": 7.506829261779785, "learning_rate": 0.001, "loss": 2.1828, "step": 127176 }, { "epoch": 10.97584541062802, "grad_norm": 1.3829762935638428, "learning_rate": 0.001, "loss": 2.1875, "step": 127232 }, { "epoch": 10.980676328502415, "grad_norm": 1.8979687690734863, "learning_rate": 0.001, "loss": 2.1774, "step": 127288 }, { "epoch": 10.985507246376812, "grad_norm": 6.267608165740967, "learning_rate": 0.001, "loss": 2.1829, "step": 127344 }, { "epoch": 10.990338164251208, "grad_norm": 1.2983322143554688, "learning_rate": 0.001, "loss": 2.1832, "step": 127400 }, { "epoch": 10.995169082125603, "grad_norm": 1.2553904056549072, "learning_rate": 0.001, "loss": 2.1818, "step": 127456 }, { "epoch": 11.0, "grad_norm": 2.4120144844055176, "learning_rate": 0.001, "loss": 2.182, "step": 127512 }, { "epoch": 11.004830917874395, "grad_norm": 1.2111910581588745, "learning_rate": 0.001, "loss": 2.1361, "step": 127568 }, { "epoch": 11.009661835748792, "grad_norm": 1.063620686531067, "learning_rate": 0.001, "loss": 2.1304, "step": 127624 }, { "epoch": 11.014492753623188, "grad_norm": 1.7111425399780273, "learning_rate": 0.001, "loss": 2.1328, "step": 127680 }, { "epoch": 11.019323671497585, "grad_norm": 1.431949257850647, "learning_rate": 0.001, "loss": 2.139, "step": 127736 }, { "epoch": 11.02415458937198, "grad_norm": 1.9574198722839355, "learning_rate": 0.001, "loss": 2.1329, "step": 127792 }, { "epoch": 11.028985507246377, "grad_norm": 1.8330541849136353, "learning_rate": 0.001, "loss": 2.1608, "step": 127848 }, { "epoch": 11.033816425120772, "grad_norm": 1.199495792388916, "learning_rate": 0.001, "loss": 2.1735, "step": 127904 }, { "epoch": 11.03864734299517, "grad_norm": 5.455221652984619, "learning_rate": 0.001, "loss": 2.1544, "step": 127960 }, { "epoch": 11.043478260869565, "grad_norm": 1.5906717777252197, "learning_rate": 0.001, "loss": 2.1561, "step": 128016 }, { "epoch": 11.048309178743962, "grad_norm": 4.229872703552246, "learning_rate": 0.001, "loss": 2.158, "step": 128072 }, { "epoch": 11.053140096618357, "grad_norm": 2.4469618797302246, "learning_rate": 0.001, "loss": 2.1599, "step": 128128 }, { "epoch": 11.057971014492754, "grad_norm": 1.4909590482711792, "learning_rate": 0.001, "loss": 2.1614, "step": 128184 }, { "epoch": 11.06280193236715, "grad_norm": 1.8766353130340576, "learning_rate": 0.001, "loss": 2.1615, "step": 128240 }, { "epoch": 11.067632850241546, "grad_norm": 2.002718687057495, "learning_rate": 0.001, "loss": 2.1544, "step": 128296 }, { "epoch": 11.072463768115941, "grad_norm": 2.9536755084991455, "learning_rate": 0.001, "loss": 2.1572, "step": 128352 }, { "epoch": 11.077294685990339, "grad_norm": 7.31076192855835, "learning_rate": 0.001, "loss": 2.1772, "step": 128408 }, { "epoch": 11.082125603864734, "grad_norm": 3.5101513862609863, "learning_rate": 0.001, "loss": 2.1876, "step": 128464 }, { "epoch": 11.08695652173913, "grad_norm": 7.660124778747559, "learning_rate": 0.001, "loss": 2.1807, "step": 128520 }, { "epoch": 11.091787439613526, "grad_norm": 2.4206292629241943, "learning_rate": 0.001, "loss": 2.1796, "step": 128576 }, { "epoch": 11.096618357487923, "grad_norm": 2.630587100982666, "learning_rate": 0.001, "loss": 2.1876, "step": 128632 }, { "epoch": 11.101449275362318, "grad_norm": 6.899549961090088, "learning_rate": 0.001, "loss": 2.2005, "step": 128688 }, { "epoch": 11.106280193236715, "grad_norm": 13.317012786865234, "learning_rate": 0.001, "loss": 2.2191, "step": 128744 }, { "epoch": 11.11111111111111, "grad_norm": 2.4693360328674316, "learning_rate": 0.001, "loss": 2.2013, "step": 128800 }, { "epoch": 11.115942028985508, "grad_norm": 1.7507514953613281, "learning_rate": 0.001, "loss": 2.1827, "step": 128856 }, { "epoch": 11.120772946859903, "grad_norm": 25.85240364074707, "learning_rate": 0.001, "loss": 2.1829, "step": 128912 }, { "epoch": 11.1256038647343, "grad_norm": 4.992105960845947, "learning_rate": 0.001, "loss": 2.1848, "step": 128968 }, { "epoch": 11.130434782608695, "grad_norm": 1.643103003501892, "learning_rate": 0.001, "loss": 2.1838, "step": 129024 }, { "epoch": 11.135265700483092, "grad_norm": 1.097631812095642, "learning_rate": 0.001, "loss": 2.1954, "step": 129080 }, { "epoch": 11.140096618357488, "grad_norm": 2.4652206897735596, "learning_rate": 0.001, "loss": 2.1855, "step": 129136 }, { "epoch": 11.144927536231885, "grad_norm": 1.1412129402160645, "learning_rate": 0.001, "loss": 2.1792, "step": 129192 }, { "epoch": 11.14975845410628, "grad_norm": 1.0345125198364258, "learning_rate": 0.001, "loss": 2.1869, "step": 129248 }, { "epoch": 11.154589371980677, "grad_norm": 1.363893747329712, "learning_rate": 0.001, "loss": 2.18, "step": 129304 }, { "epoch": 11.159420289855072, "grad_norm": 0.6385408043861389, "learning_rate": 0.001, "loss": 2.1717, "step": 129360 }, { "epoch": 11.16425120772947, "grad_norm": 2.4874866008758545, "learning_rate": 0.001, "loss": 2.1684, "step": 129416 }, { "epoch": 11.169082125603865, "grad_norm": 4.056140422821045, "learning_rate": 0.001, "loss": 2.1568, "step": 129472 }, { "epoch": 11.173913043478262, "grad_norm": 0.8279305696487427, "learning_rate": 0.001, "loss": 2.1596, "step": 129528 }, { "epoch": 11.178743961352657, "grad_norm": 1.3949216604232788, "learning_rate": 0.001, "loss": 2.1551, "step": 129584 }, { "epoch": 11.183574879227054, "grad_norm": 1.515504002571106, "learning_rate": 0.001, "loss": 2.1448, "step": 129640 }, { "epoch": 11.18840579710145, "grad_norm": 1.1438316106796265, "learning_rate": 0.001, "loss": 2.1436, "step": 129696 }, { "epoch": 11.193236714975846, "grad_norm": 0.8959769606590271, "learning_rate": 0.001, "loss": 2.1386, "step": 129752 }, { "epoch": 11.198067632850242, "grad_norm": 1.9335591793060303, "learning_rate": 0.001, "loss": 2.131, "step": 129808 }, { "epoch": 11.202898550724637, "grad_norm": 1.6763603687286377, "learning_rate": 0.001, "loss": 2.1403, "step": 129864 }, { "epoch": 11.207729468599034, "grad_norm": 0.9397940635681152, "learning_rate": 0.001, "loss": 2.1603, "step": 129920 }, { "epoch": 11.21256038647343, "grad_norm": 2.0035507678985596, "learning_rate": 0.001, "loss": 2.159, "step": 129976 }, { "epoch": 11.217391304347826, "grad_norm": 4.153164863586426, "learning_rate": 0.001, "loss": 2.1718, "step": 130032 }, { "epoch": 11.222222222222221, "grad_norm": 2.4249260425567627, "learning_rate": 0.001, "loss": 2.1789, "step": 130088 }, { "epoch": 11.227053140096618, "grad_norm": 1.4497575759887695, "learning_rate": 0.001, "loss": 2.1619, "step": 130144 }, { "epoch": 11.231884057971014, "grad_norm": 1.4982216358184814, "learning_rate": 0.001, "loss": 2.1423, "step": 130200 }, { "epoch": 11.23671497584541, "grad_norm": 1.3686857223510742, "learning_rate": 0.001, "loss": 2.1365, "step": 130256 }, { "epoch": 11.241545893719806, "grad_norm": 2.336411952972412, "learning_rate": 0.001, "loss": 2.1395, "step": 130312 }, { "epoch": 11.246376811594203, "grad_norm": 2.833037853240967, "learning_rate": 0.001, "loss": 2.144, "step": 130368 }, { "epoch": 11.251207729468598, "grad_norm": 1.3380255699157715, "learning_rate": 0.001, "loss": 2.1444, "step": 130424 }, { "epoch": 11.256038647342995, "grad_norm": 6.243875980377197, "learning_rate": 0.001, "loss": 2.1557, "step": 130480 }, { "epoch": 11.26086956521739, "grad_norm": 0.9205509424209595, "learning_rate": 0.001, "loss": 2.1409, "step": 130536 }, { "epoch": 11.265700483091788, "grad_norm": 1.081005573272705, "learning_rate": 0.001, "loss": 2.1498, "step": 130592 }, { "epoch": 11.270531400966183, "grad_norm": 2.0074262619018555, "learning_rate": 0.001, "loss": 2.1494, "step": 130648 }, { "epoch": 11.27536231884058, "grad_norm": 6.902637004852295, "learning_rate": 0.001, "loss": 2.1601, "step": 130704 }, { "epoch": 11.280193236714975, "grad_norm": 1.7552622556686401, "learning_rate": 0.001, "loss": 2.1705, "step": 130760 }, { "epoch": 11.285024154589372, "grad_norm": 1.0672425031661987, "learning_rate": 0.001, "loss": 2.1923, "step": 130816 }, { "epoch": 11.289855072463768, "grad_norm": 1.2315629720687866, "learning_rate": 0.001, "loss": 2.1814, "step": 130872 }, { "epoch": 11.294685990338165, "grad_norm": 2.650214195251465, "learning_rate": 0.001, "loss": 2.166, "step": 130928 }, { "epoch": 11.29951690821256, "grad_norm": 1.0113489627838135, "learning_rate": 0.001, "loss": 2.1577, "step": 130984 }, { "epoch": 11.304347826086957, "grad_norm": 0.7434856295585632, "learning_rate": 0.001, "loss": 2.1523, "step": 131040 }, { "epoch": 11.309178743961352, "grad_norm": 2.6903069019317627, "learning_rate": 0.001, "loss": 2.145, "step": 131096 }, { "epoch": 11.31400966183575, "grad_norm": 0.7639374136924744, "learning_rate": 0.001, "loss": 2.1603, "step": 131152 }, { "epoch": 11.318840579710145, "grad_norm": 7.009921073913574, "learning_rate": 0.001, "loss": 2.1613, "step": 131208 }, { "epoch": 11.323671497584542, "grad_norm": 1.8917431831359863, "learning_rate": 0.001, "loss": 2.1763, "step": 131264 }, { "epoch": 11.328502415458937, "grad_norm": 2.3466415405273438, "learning_rate": 0.001, "loss": 2.1955, "step": 131320 }, { "epoch": 11.333333333333334, "grad_norm": 1.9311261177062988, "learning_rate": 0.001, "loss": 2.192, "step": 131376 }, { "epoch": 11.33816425120773, "grad_norm": 3.4147510528564453, "learning_rate": 0.001, "loss": 2.1842, "step": 131432 }, { "epoch": 11.342995169082126, "grad_norm": 2.3967483043670654, "learning_rate": 0.001, "loss": 2.1629, "step": 131488 }, { "epoch": 11.347826086956522, "grad_norm": 1.3805783987045288, "learning_rate": 0.001, "loss": 2.1735, "step": 131544 }, { "epoch": 11.352657004830919, "grad_norm": 1.4286952018737793, "learning_rate": 0.001, "loss": 2.166, "step": 131600 }, { "epoch": 11.357487922705314, "grad_norm": 3.0122897624969482, "learning_rate": 0.001, "loss": 2.1708, "step": 131656 }, { "epoch": 11.36231884057971, "grad_norm": 2.378324031829834, "learning_rate": 0.001, "loss": 2.165, "step": 131712 }, { "epoch": 11.367149758454106, "grad_norm": 1.1295639276504517, "learning_rate": 0.001, "loss": 2.1554, "step": 131768 }, { "epoch": 11.371980676328503, "grad_norm": 7.065188884735107, "learning_rate": 0.001, "loss": 2.1571, "step": 131824 }, { "epoch": 11.376811594202898, "grad_norm": 3.6286349296569824, "learning_rate": 0.001, "loss": 2.163, "step": 131880 }, { "epoch": 11.381642512077295, "grad_norm": 0.7777979373931885, "learning_rate": 0.001, "loss": 2.1649, "step": 131936 }, { "epoch": 11.38647342995169, "grad_norm": 1.3991354703903198, "learning_rate": 0.001, "loss": 2.1506, "step": 131992 }, { "epoch": 11.391304347826088, "grad_norm": 2.2683894634246826, "learning_rate": 0.001, "loss": 2.1498, "step": 132048 }, { "epoch": 11.396135265700483, "grad_norm": 2.3293395042419434, "learning_rate": 0.001, "loss": 2.1575, "step": 132104 }, { "epoch": 11.40096618357488, "grad_norm": 0.8002650737762451, "learning_rate": 0.001, "loss": 2.1556, "step": 132160 }, { "epoch": 11.405797101449275, "grad_norm": 1.4381272792816162, "learning_rate": 0.001, "loss": 2.1537, "step": 132216 }, { "epoch": 11.41062801932367, "grad_norm": 5.938973426818848, "learning_rate": 0.001, "loss": 2.1626, "step": 132272 }, { "epoch": 11.415458937198068, "grad_norm": 24.3090763092041, "learning_rate": 0.001, "loss": 2.1774, "step": 132328 }, { "epoch": 11.420289855072463, "grad_norm": 2.8144443035125732, "learning_rate": 0.001, "loss": 2.1803, "step": 132384 }, { "epoch": 11.42512077294686, "grad_norm": 1.408156394958496, "learning_rate": 0.001, "loss": 2.1812, "step": 132440 }, { "epoch": 11.429951690821255, "grad_norm": 1.4469841718673706, "learning_rate": 0.001, "loss": 2.1893, "step": 132496 }, { "epoch": 11.434782608695652, "grad_norm": 1.8647303581237793, "learning_rate": 0.001, "loss": 2.1821, "step": 132552 }, { "epoch": 11.439613526570048, "grad_norm": 1.9917547702789307, "learning_rate": 0.001, "loss": 2.1693, "step": 132608 }, { "epoch": 11.444444444444445, "grad_norm": 1.5972574949264526, "learning_rate": 0.001, "loss": 2.166, "step": 132664 }, { "epoch": 11.44927536231884, "grad_norm": 1.7513078451156616, "learning_rate": 0.001, "loss": 2.1541, "step": 132720 }, { "epoch": 11.454106280193237, "grad_norm": 2.1618237495422363, "learning_rate": 0.001, "loss": 2.1541, "step": 132776 }, { "epoch": 11.458937198067632, "grad_norm": 1.2123358249664307, "learning_rate": 0.001, "loss": 2.1688, "step": 132832 }, { "epoch": 11.46376811594203, "grad_norm": 3.722198486328125, "learning_rate": 0.001, "loss": 2.176, "step": 132888 }, { "epoch": 11.468599033816425, "grad_norm": 2.611532688140869, "learning_rate": 0.001, "loss": 2.1826, "step": 132944 }, { "epoch": 11.473429951690822, "grad_norm": 1.7881027460098267, "learning_rate": 0.001, "loss": 2.1585, "step": 133000 }, { "epoch": 11.478260869565217, "grad_norm": 1.67707097530365, "learning_rate": 0.001, "loss": 2.153, "step": 133056 }, { "epoch": 11.483091787439614, "grad_norm": 1.211462378501892, "learning_rate": 0.001, "loss": 2.1596, "step": 133112 }, { "epoch": 11.48792270531401, "grad_norm": 3.2668378353118896, "learning_rate": 0.001, "loss": 2.1522, "step": 133168 }, { "epoch": 11.492753623188406, "grad_norm": 2.447831630706787, "learning_rate": 0.001, "loss": 2.1499, "step": 133224 }, { "epoch": 11.497584541062801, "grad_norm": 1.2445319890975952, "learning_rate": 0.001, "loss": 2.1625, "step": 133280 }, { "epoch": 11.502415458937199, "grad_norm": 0.9578527808189392, "learning_rate": 0.001, "loss": 2.1486, "step": 133336 }, { "epoch": 11.507246376811594, "grad_norm": 0.7497963309288025, "learning_rate": 0.001, "loss": 2.1518, "step": 133392 }, { "epoch": 11.51207729468599, "grad_norm": 1.878481388092041, "learning_rate": 0.001, "loss": 2.1487, "step": 133448 }, { "epoch": 11.516908212560386, "grad_norm": 1.6610082387924194, "learning_rate": 0.001, "loss": 2.1461, "step": 133504 }, { "epoch": 11.521739130434783, "grad_norm": 4.432641983032227, "learning_rate": 0.001, "loss": 2.1457, "step": 133560 }, { "epoch": 11.526570048309178, "grad_norm": 1.100925087928772, "learning_rate": 0.001, "loss": 2.1462, "step": 133616 }, { "epoch": 11.531400966183575, "grad_norm": 3.5141429901123047, "learning_rate": 0.001, "loss": 2.1405, "step": 133672 }, { "epoch": 11.53623188405797, "grad_norm": 1.9338464736938477, "learning_rate": 0.001, "loss": 2.151, "step": 133728 }, { "epoch": 11.541062801932368, "grad_norm": 1.7894530296325684, "learning_rate": 0.001, "loss": 2.1449, "step": 133784 }, { "epoch": 11.545893719806763, "grad_norm": 0.7991990447044373, "learning_rate": 0.001, "loss": 2.1443, "step": 133840 }, { "epoch": 11.55072463768116, "grad_norm": 2.088209390640259, "learning_rate": 0.001, "loss": 2.1564, "step": 133896 }, { "epoch": 11.555555555555555, "grad_norm": 2.1392955780029297, "learning_rate": 0.001, "loss": 2.1456, "step": 133952 }, { "epoch": 11.560386473429952, "grad_norm": 4.288720607757568, "learning_rate": 0.001, "loss": 2.1507, "step": 134008 }, { "epoch": 11.565217391304348, "grad_norm": 1.2219886779785156, "learning_rate": 0.001, "loss": 2.155, "step": 134064 }, { "epoch": 11.570048309178745, "grad_norm": 1.0434324741363525, "learning_rate": 0.001, "loss": 2.1599, "step": 134120 }, { "epoch": 11.57487922705314, "grad_norm": 3.612537384033203, "learning_rate": 0.001, "loss": 2.169, "step": 134176 }, { "epoch": 11.579710144927537, "grad_norm": 1.1100342273712158, "learning_rate": 0.001, "loss": 2.16, "step": 134232 }, { "epoch": 11.584541062801932, "grad_norm": 43.956790924072266, "learning_rate": 0.001, "loss": 2.1619, "step": 134288 }, { "epoch": 11.58937198067633, "grad_norm": 2.0098605155944824, "learning_rate": 0.001, "loss": 2.1593, "step": 134344 }, { "epoch": 11.594202898550725, "grad_norm": 1.0257587432861328, "learning_rate": 0.001, "loss": 2.1598, "step": 134400 }, { "epoch": 11.59903381642512, "grad_norm": 2.1896414756774902, "learning_rate": 0.001, "loss": 2.1667, "step": 134456 }, { "epoch": 11.603864734299517, "grad_norm": 1.5707670450210571, "learning_rate": 0.001, "loss": 2.1578, "step": 134512 }, { "epoch": 11.608695652173914, "grad_norm": 3.058683156967163, "learning_rate": 0.001, "loss": 2.1476, "step": 134568 }, { "epoch": 11.61352657004831, "grad_norm": 90.78943634033203, "learning_rate": 0.001, "loss": 2.1577, "step": 134624 }, { "epoch": 11.618357487922705, "grad_norm": 0.8176459074020386, "learning_rate": 0.001, "loss": 2.1488, "step": 134680 }, { "epoch": 11.623188405797102, "grad_norm": 1.7974375486373901, "learning_rate": 0.001, "loss": 2.1629, "step": 134736 }, { "epoch": 11.628019323671497, "grad_norm": 0.8804119825363159, "learning_rate": 0.001, "loss": 2.1623, "step": 134792 }, { "epoch": 11.632850241545894, "grad_norm": 0.8612844347953796, "learning_rate": 0.001, "loss": 2.1794, "step": 134848 }, { "epoch": 11.63768115942029, "grad_norm": 1.7096879482269287, "learning_rate": 0.001, "loss": 2.1721, "step": 134904 }, { "epoch": 11.642512077294686, "grad_norm": 1.6035691499710083, "learning_rate": 0.001, "loss": 2.1554, "step": 134960 }, { "epoch": 11.647342995169081, "grad_norm": 2.2473886013031006, "learning_rate": 0.001, "loss": 2.1641, "step": 135016 }, { "epoch": 11.652173913043478, "grad_norm": 2.8684732913970947, "learning_rate": 0.001, "loss": 2.1551, "step": 135072 }, { "epoch": 11.657004830917874, "grad_norm": 1.1871248483657837, "learning_rate": 0.001, "loss": 2.1589, "step": 135128 }, { "epoch": 11.66183574879227, "grad_norm": 2.1372108459472656, "learning_rate": 0.001, "loss": 2.1786, "step": 135184 }, { "epoch": 11.666666666666666, "grad_norm": 2.134202718734741, "learning_rate": 0.001, "loss": 2.1682, "step": 135240 }, { "epoch": 11.671497584541063, "grad_norm": 1.339689016342163, "learning_rate": 0.001, "loss": 2.1536, "step": 135296 }, { "epoch": 11.676328502415458, "grad_norm": 2.0542263984680176, "learning_rate": 0.001, "loss": 2.1496, "step": 135352 }, { "epoch": 11.681159420289855, "grad_norm": 1.365377426147461, "learning_rate": 0.001, "loss": 2.1402, "step": 135408 }, { "epoch": 11.68599033816425, "grad_norm": 1.2262927293777466, "learning_rate": 0.001, "loss": 2.1284, "step": 135464 }, { "epoch": 11.690821256038648, "grad_norm": 1.1472821235656738, "learning_rate": 0.001, "loss": 2.1397, "step": 135520 }, { "epoch": 11.695652173913043, "grad_norm": 1.2871700525283813, "learning_rate": 0.001, "loss": 2.135, "step": 135576 }, { "epoch": 11.70048309178744, "grad_norm": 0.8797616362571716, "learning_rate": 0.001, "loss": 2.1341, "step": 135632 }, { "epoch": 11.705314009661835, "grad_norm": 1.3007557392120361, "learning_rate": 0.001, "loss": 2.1409, "step": 135688 }, { "epoch": 11.710144927536232, "grad_norm": 2.126065969467163, "learning_rate": 0.001, "loss": 2.1421, "step": 135744 }, { "epoch": 11.714975845410628, "grad_norm": 2.382359266281128, "learning_rate": 0.001, "loss": 2.1439, "step": 135800 }, { "epoch": 11.719806763285025, "grad_norm": 3.3054733276367188, "learning_rate": 0.001, "loss": 2.1553, "step": 135856 }, { "epoch": 11.72463768115942, "grad_norm": 1.831474781036377, "learning_rate": 0.001, "loss": 2.1671, "step": 135912 }, { "epoch": 11.729468599033817, "grad_norm": 1.4319506883621216, "learning_rate": 0.001, "loss": 2.1632, "step": 135968 }, { "epoch": 11.734299516908212, "grad_norm": 1.1082159280776978, "learning_rate": 0.001, "loss": 2.162, "step": 136024 }, { "epoch": 11.73913043478261, "grad_norm": 1.0175840854644775, "learning_rate": 0.001, "loss": 2.1566, "step": 136080 }, { "epoch": 11.743961352657005, "grad_norm": 0.8453747034072876, "learning_rate": 0.001, "loss": 2.1437, "step": 136136 }, { "epoch": 11.748792270531402, "grad_norm": 1.7449556589126587, "learning_rate": 0.001, "loss": 2.1546, "step": 136192 }, { "epoch": 11.753623188405797, "grad_norm": 0.975506067276001, "learning_rate": 0.001, "loss": 2.1587, "step": 136248 }, { "epoch": 11.758454106280194, "grad_norm": 1.2254399061203003, "learning_rate": 0.001, "loss": 2.1486, "step": 136304 }, { "epoch": 11.76328502415459, "grad_norm": 1.4558777809143066, "learning_rate": 0.001, "loss": 2.1614, "step": 136360 }, { "epoch": 11.768115942028986, "grad_norm": 3.111281394958496, "learning_rate": 0.001, "loss": 2.1693, "step": 136416 }, { "epoch": 11.772946859903382, "grad_norm": 3.4066739082336426, "learning_rate": 0.001, "loss": 2.1833, "step": 136472 }, { "epoch": 11.777777777777779, "grad_norm": 3.0420312881469727, "learning_rate": 0.001, "loss": 2.192, "step": 136528 }, { "epoch": 11.782608695652174, "grad_norm": 3.7077364921569824, "learning_rate": 0.001, "loss": 2.1885, "step": 136584 }, { "epoch": 11.78743961352657, "grad_norm": 2.0996952056884766, "learning_rate": 0.001, "loss": 2.1795, "step": 136640 }, { "epoch": 11.792270531400966, "grad_norm": 1.9807239770889282, "learning_rate": 0.001, "loss": 2.1722, "step": 136696 }, { "epoch": 11.797101449275363, "grad_norm": 2.2017979621887207, "learning_rate": 0.001, "loss": 2.1667, "step": 136752 }, { "epoch": 11.801932367149758, "grad_norm": 1.8431072235107422, "learning_rate": 0.001, "loss": 2.1539, "step": 136808 }, { "epoch": 11.806763285024154, "grad_norm": 1.7913908958435059, "learning_rate": 0.001, "loss": 2.1653, "step": 136864 }, { "epoch": 11.81159420289855, "grad_norm": 2.1519887447357178, "learning_rate": 0.001, "loss": 2.1749, "step": 136920 }, { "epoch": 11.816425120772946, "grad_norm": 0.7495535612106323, "learning_rate": 0.001, "loss": 2.1789, "step": 136976 }, { "epoch": 11.821256038647343, "grad_norm": 0.7424222230911255, "learning_rate": 0.001, "loss": 2.1691, "step": 137032 }, { "epoch": 11.826086956521738, "grad_norm": 2.66715931892395, "learning_rate": 0.001, "loss": 2.1772, "step": 137088 }, { "epoch": 11.830917874396135, "grad_norm": 1.4277801513671875, "learning_rate": 0.001, "loss": 2.165, "step": 137144 }, { "epoch": 11.83574879227053, "grad_norm": 2.4702582359313965, "learning_rate": 0.001, "loss": 2.1683, "step": 137200 }, { "epoch": 11.840579710144928, "grad_norm": 2.1138689517974854, "learning_rate": 0.001, "loss": 2.1941, "step": 137256 }, { "epoch": 11.845410628019323, "grad_norm": 1.0289188623428345, "learning_rate": 0.001, "loss": 2.1854, "step": 137312 }, { "epoch": 11.85024154589372, "grad_norm": 6.840153217315674, "learning_rate": 0.001, "loss": 2.1753, "step": 137368 }, { "epoch": 11.855072463768115, "grad_norm": 1.8663305044174194, "learning_rate": 0.001, "loss": 2.1778, "step": 137424 }, { "epoch": 11.859903381642512, "grad_norm": 11.929183959960938, "learning_rate": 0.001, "loss": 2.1917, "step": 137480 }, { "epoch": 11.864734299516908, "grad_norm": 6.134036064147949, "learning_rate": 0.001, "loss": 2.1867, "step": 137536 }, { "epoch": 11.869565217391305, "grad_norm": 8.179972648620605, "learning_rate": 0.001, "loss": 2.1902, "step": 137592 }, { "epoch": 11.8743961352657, "grad_norm": 1.656633973121643, "learning_rate": 0.001, "loss": 2.169, "step": 137648 }, { "epoch": 11.879227053140097, "grad_norm": 1.4404828548431396, "learning_rate": 0.001, "loss": 2.1699, "step": 137704 }, { "epoch": 11.884057971014492, "grad_norm": 4.035676956176758, "learning_rate": 0.001, "loss": 2.1627, "step": 137760 }, { "epoch": 11.88888888888889, "grad_norm": 3.11236572265625, "learning_rate": 0.001, "loss": 2.1563, "step": 137816 }, { "epoch": 11.893719806763285, "grad_norm": 6.423616886138916, "learning_rate": 0.001, "loss": 2.161, "step": 137872 }, { "epoch": 11.898550724637682, "grad_norm": 1.2624446153640747, "learning_rate": 0.001, "loss": 2.1547, "step": 137928 }, { "epoch": 11.903381642512077, "grad_norm": 1.8141759634017944, "learning_rate": 0.001, "loss": 2.1653, "step": 137984 }, { "epoch": 11.908212560386474, "grad_norm": 3.13934063911438, "learning_rate": 0.001, "loss": 2.169, "step": 138040 }, { "epoch": 11.91304347826087, "grad_norm": 0.7363304495811462, "learning_rate": 0.001, "loss": 2.1718, "step": 138096 }, { "epoch": 11.917874396135266, "grad_norm": 7.272291660308838, "learning_rate": 0.001, "loss": 2.1721, "step": 138152 }, { "epoch": 11.922705314009661, "grad_norm": 2.5603482723236084, "learning_rate": 0.001, "loss": 2.1707, "step": 138208 }, { "epoch": 11.927536231884059, "grad_norm": 6.911461353302002, "learning_rate": 0.001, "loss": 2.1828, "step": 138264 }, { "epoch": 11.932367149758454, "grad_norm": 2.6392252445220947, "learning_rate": 0.001, "loss": 2.1714, "step": 138320 }, { "epoch": 11.93719806763285, "grad_norm": 0.8566041588783264, "learning_rate": 0.001, "loss": 2.1745, "step": 138376 }, { "epoch": 11.942028985507246, "grad_norm": 3.405165672302246, "learning_rate": 0.001, "loss": 2.1859, "step": 138432 }, { "epoch": 11.946859903381643, "grad_norm": 0.9229844808578491, "learning_rate": 0.001, "loss": 2.1723, "step": 138488 }, { "epoch": 11.951690821256038, "grad_norm": 0.9978861212730408, "learning_rate": 0.001, "loss": 2.1812, "step": 138544 }, { "epoch": 11.956521739130435, "grad_norm": 2.7527482509613037, "learning_rate": 0.001, "loss": 2.1809, "step": 138600 }, { "epoch": 11.96135265700483, "grad_norm": 3.1028177738189697, "learning_rate": 0.001, "loss": 2.1768, "step": 138656 }, { "epoch": 11.966183574879228, "grad_norm": 1.0113414525985718, "learning_rate": 0.001, "loss": 2.1804, "step": 138712 }, { "epoch": 11.971014492753623, "grad_norm": 4.682823181152344, "learning_rate": 0.001, "loss": 2.1734, "step": 138768 }, { "epoch": 11.97584541062802, "grad_norm": 0.6051453948020935, "learning_rate": 0.001, "loss": 2.1691, "step": 138824 }, { "epoch": 11.980676328502415, "grad_norm": 2.108956813812256, "learning_rate": 0.001, "loss": 2.165, "step": 138880 }, { "epoch": 11.985507246376812, "grad_norm": 0.39985644817352295, "learning_rate": 0.001, "loss": 2.1514, "step": 138936 }, { "epoch": 11.990338164251208, "grad_norm": 0.8410494923591614, "learning_rate": 0.001, "loss": 2.1606, "step": 138992 }, { "epoch": 11.995169082125603, "grad_norm": 1.4523165225982666, "learning_rate": 0.001, "loss": 2.1601, "step": 139048 }, { "epoch": 12.0, "grad_norm": 3.06986141204834, "learning_rate": 0.001, "loss": 2.1758, "step": 139104 }, { "epoch": 12.004830917874395, "grad_norm": 1.7074397802352905, "learning_rate": 0.001, "loss": 2.1382, "step": 139160 }, { "epoch": 12.009661835748792, "grad_norm": 1.0722459554672241, "learning_rate": 0.001, "loss": 2.1318, "step": 139216 }, { "epoch": 12.014492753623188, "grad_norm": 2.7510175704956055, "learning_rate": 0.001, "loss": 2.1191, "step": 139272 }, { "epoch": 12.019323671497585, "grad_norm": 1.4080379009246826, "learning_rate": 0.001, "loss": 2.1216, "step": 139328 }, { "epoch": 12.02415458937198, "grad_norm": 1.3800874948501587, "learning_rate": 0.001, "loss": 2.1238, "step": 139384 }, { "epoch": 12.028985507246377, "grad_norm": 6.7165679931640625, "learning_rate": 0.001, "loss": 2.1396, "step": 139440 }, { "epoch": 12.033816425120772, "grad_norm": 1.1638861894607544, "learning_rate": 0.001, "loss": 2.1195, "step": 139496 }, { "epoch": 12.03864734299517, "grad_norm": 1.0675334930419922, "learning_rate": 0.001, "loss": 2.1187, "step": 139552 }, { "epoch": 12.043478260869565, "grad_norm": 2.1167635917663574, "learning_rate": 0.001, "loss": 2.1139, "step": 139608 }, { "epoch": 12.048309178743962, "grad_norm": 1.0412802696228027, "learning_rate": 0.001, "loss": 2.1215, "step": 139664 }, { "epoch": 12.053140096618357, "grad_norm": 1.233525037765503, "learning_rate": 0.001, "loss": 2.1031, "step": 139720 }, { "epoch": 12.057971014492754, "grad_norm": 1.584393858909607, "learning_rate": 0.001, "loss": 2.1119, "step": 139776 }, { "epoch": 12.06280193236715, "grad_norm": 1.3282197713851929, "learning_rate": 0.001, "loss": 2.1215, "step": 139832 }, { "epoch": 12.067632850241546, "grad_norm": 1.5350204706192017, "learning_rate": 0.001, "loss": 2.1312, "step": 139888 }, { "epoch": 12.072463768115941, "grad_norm": 1.5638731718063354, "learning_rate": 0.001, "loss": 2.1227, "step": 139944 }, { "epoch": 12.077294685990339, "grad_norm": 1.862272024154663, "learning_rate": 0.001, "loss": 2.1254, "step": 140000 }, { "epoch": 12.082125603864734, "grad_norm": 11.566842079162598, "learning_rate": 0.001, "loss": 2.1347, "step": 140056 }, { "epoch": 12.08695652173913, "grad_norm": 13.141790390014648, "learning_rate": 0.001, "loss": 2.1386, "step": 140112 }, { "epoch": 12.091787439613526, "grad_norm": 42.42540740966797, "learning_rate": 0.001, "loss": 2.1418, "step": 140168 }, { "epoch": 12.096618357487923, "grad_norm": 1.0227177143096924, "learning_rate": 0.001, "loss": 2.1627, "step": 140224 }, { "epoch": 12.101449275362318, "grad_norm": 1.3916877508163452, "learning_rate": 0.001, "loss": 2.1645, "step": 140280 }, { "epoch": 12.106280193236715, "grad_norm": 2.8892128467559814, "learning_rate": 0.001, "loss": 2.1544, "step": 140336 }, { "epoch": 12.11111111111111, "grad_norm": 1.1164380311965942, "learning_rate": 0.001, "loss": 2.1603, "step": 140392 }, { "epoch": 12.115942028985508, "grad_norm": 1.561012625694275, "learning_rate": 0.001, "loss": 2.1503, "step": 140448 }, { "epoch": 12.120772946859903, "grad_norm": 2.6087722778320312, "learning_rate": 0.001, "loss": 2.1337, "step": 140504 }, { "epoch": 12.1256038647343, "grad_norm": 19.377309799194336, "learning_rate": 0.001, "loss": 2.1279, "step": 140560 }, { "epoch": 12.130434782608695, "grad_norm": 1.271406888961792, "learning_rate": 0.001, "loss": 2.1202, "step": 140616 }, { "epoch": 12.135265700483092, "grad_norm": 1.743664264678955, "learning_rate": 0.001, "loss": 2.1267, "step": 140672 }, { "epoch": 12.140096618357488, "grad_norm": 1.625611662864685, "learning_rate": 0.001, "loss": 2.1208, "step": 140728 }, { "epoch": 12.144927536231885, "grad_norm": 2.3711163997650146, "learning_rate": 0.001, "loss": 2.1316, "step": 140784 }, { "epoch": 12.14975845410628, "grad_norm": 4.0641608238220215, "learning_rate": 0.001, "loss": 2.1425, "step": 140840 }, { "epoch": 12.154589371980677, "grad_norm": 2.2252283096313477, "learning_rate": 0.001, "loss": 2.1538, "step": 140896 }, { "epoch": 12.159420289855072, "grad_norm": 28.733789443969727, "learning_rate": 0.001, "loss": 2.1415, "step": 140952 }, { "epoch": 12.16425120772947, "grad_norm": 1.5870418548583984, "learning_rate": 0.001, "loss": 2.1433, "step": 141008 }, { "epoch": 12.169082125603865, "grad_norm": 2.2028937339782715, "learning_rate": 0.001, "loss": 2.1799, "step": 141064 }, { "epoch": 12.173913043478262, "grad_norm": 4.300158500671387, "learning_rate": 0.001, "loss": 2.1864, "step": 141120 }, { "epoch": 12.178743961352657, "grad_norm": 16.284698486328125, "learning_rate": 0.001, "loss": 2.1829, "step": 141176 }, { "epoch": 12.183574879227054, "grad_norm": 5.202531814575195, "learning_rate": 0.001, "loss": 2.1746, "step": 141232 }, { "epoch": 12.18840579710145, "grad_norm": 2.1948845386505127, "learning_rate": 0.001, "loss": 2.177, "step": 141288 }, { "epoch": 12.193236714975846, "grad_norm": 2.1246914863586426, "learning_rate": 0.001, "loss": 2.1818, "step": 141344 }, { "epoch": 12.198067632850242, "grad_norm": 2.7810511589050293, "learning_rate": 0.001, "loss": 2.1812, "step": 141400 }, { "epoch": 12.202898550724637, "grad_norm": 1.7156684398651123, "learning_rate": 0.001, "loss": 2.1628, "step": 141456 }, { "epoch": 12.207729468599034, "grad_norm": 1.5086328983306885, "learning_rate": 0.001, "loss": 2.17, "step": 141512 }, { "epoch": 12.21256038647343, "grad_norm": 2.2895312309265137, "learning_rate": 0.001, "loss": 2.1491, "step": 141568 }, { "epoch": 12.217391304347826, "grad_norm": 3.1194708347320557, "learning_rate": 0.001, "loss": 2.1551, "step": 141624 }, { "epoch": 12.222222222222221, "grad_norm": 1.0032291412353516, "learning_rate": 0.001, "loss": 2.1492, "step": 141680 }, { "epoch": 12.227053140096618, "grad_norm": 4.8116960525512695, "learning_rate": 0.001, "loss": 2.1408, "step": 141736 }, { "epoch": 12.231884057971014, "grad_norm": 6.912002086639404, "learning_rate": 0.001, "loss": 2.1415, "step": 141792 }, { "epoch": 12.23671497584541, "grad_norm": 1.4610812664031982, "learning_rate": 0.001, "loss": 2.1448, "step": 141848 }, { "epoch": 12.241545893719806, "grad_norm": 4.043923377990723, "learning_rate": 0.001, "loss": 2.1564, "step": 141904 }, { "epoch": 12.246376811594203, "grad_norm": 1.829960823059082, "learning_rate": 0.001, "loss": 2.161, "step": 141960 }, { "epoch": 12.251207729468598, "grad_norm": 3.8297719955444336, "learning_rate": 0.001, "loss": 2.154, "step": 142016 }, { "epoch": 12.256038647342995, "grad_norm": 2.3305394649505615, "learning_rate": 0.001, "loss": 2.1602, "step": 142072 }, { "epoch": 12.26086956521739, "grad_norm": 2.1256890296936035, "learning_rate": 0.001, "loss": 2.1574, "step": 142128 }, { "epoch": 12.265700483091788, "grad_norm": 1.3211880922317505, "learning_rate": 0.001, "loss": 2.1559, "step": 142184 }, { "epoch": 12.270531400966183, "grad_norm": 3.230886936187744, "learning_rate": 0.001, "loss": 2.1701, "step": 142240 }, { "epoch": 12.27536231884058, "grad_norm": 1.9857741594314575, "learning_rate": 0.001, "loss": 2.1872, "step": 142296 }, { "epoch": 12.280193236714975, "grad_norm": 1.8681553602218628, "learning_rate": 0.001, "loss": 2.1851, "step": 142352 }, { "epoch": 12.285024154589372, "grad_norm": 4.764926910400391, "learning_rate": 0.001, "loss": 2.1832, "step": 142408 }, { "epoch": 12.289855072463768, "grad_norm": 2.012326240539551, "learning_rate": 0.001, "loss": 2.1847, "step": 142464 }, { "epoch": 12.294685990338165, "grad_norm": 16.375873565673828, "learning_rate": 0.001, "loss": 2.1858, "step": 142520 }, { "epoch": 12.29951690821256, "grad_norm": 1.235822081565857, "learning_rate": 0.001, "loss": 2.1956, "step": 142576 }, { "epoch": 12.304347826086957, "grad_norm": 5.588258743286133, "learning_rate": 0.001, "loss": 2.203, "step": 142632 }, { "epoch": 12.309178743961352, "grad_norm": 5.072676181793213, "learning_rate": 0.001, "loss": 2.1697, "step": 142688 }, { "epoch": 12.31400966183575, "grad_norm": 2.5074424743652344, "learning_rate": 0.001, "loss": 2.1717, "step": 142744 }, { "epoch": 12.318840579710145, "grad_norm": 2.709120512008667, "learning_rate": 0.001, "loss": 2.1665, "step": 142800 }, { "epoch": 12.323671497584542, "grad_norm": 3.240938186645508, "learning_rate": 0.001, "loss": 2.155, "step": 142856 }, { "epoch": 12.328502415458937, "grad_norm": 1.7258764505386353, "learning_rate": 0.001, "loss": 2.169, "step": 142912 }, { "epoch": 12.333333333333334, "grad_norm": 1.3388620615005493, "learning_rate": 0.001, "loss": 2.154, "step": 142968 }, { "epoch": 12.33816425120773, "grad_norm": 8.502300262451172, "learning_rate": 0.001, "loss": 2.1571, "step": 143024 }, { "epoch": 12.342995169082126, "grad_norm": 0.8782457709312439, "learning_rate": 0.001, "loss": 2.1672, "step": 143080 }, { "epoch": 12.347826086956522, "grad_norm": 1.126036524772644, "learning_rate": 0.001, "loss": 2.1621, "step": 143136 }, { "epoch": 12.352657004830919, "grad_norm": 0.6641754508018494, "learning_rate": 0.001, "loss": 2.1583, "step": 143192 }, { "epoch": 12.357487922705314, "grad_norm": 0.7980713844299316, "learning_rate": 0.001, "loss": 2.1414, "step": 143248 }, { "epoch": 12.36231884057971, "grad_norm": 1.9634695053100586, "learning_rate": 0.001, "loss": 2.1471, "step": 143304 }, { "epoch": 12.367149758454106, "grad_norm": 1.4545438289642334, "learning_rate": 0.001, "loss": 2.1398, "step": 143360 }, { "epoch": 12.371980676328503, "grad_norm": 1.6943532228469849, "learning_rate": 0.001, "loss": 2.1414, "step": 143416 }, { "epoch": 12.376811594202898, "grad_norm": 0.664146363735199, "learning_rate": 0.001, "loss": 2.1331, "step": 143472 }, { "epoch": 12.381642512077295, "grad_norm": 0.9882853031158447, "learning_rate": 0.001, "loss": 2.1202, "step": 143528 }, { "epoch": 12.38647342995169, "grad_norm": 2.566378116607666, "learning_rate": 0.001, "loss": 2.1315, "step": 143584 }, { "epoch": 12.391304347826088, "grad_norm": 2.5037691593170166, "learning_rate": 0.001, "loss": 2.1423, "step": 143640 }, { "epoch": 12.396135265700483, "grad_norm": 5.4365105628967285, "learning_rate": 0.001, "loss": 2.1253, "step": 143696 }, { "epoch": 12.40096618357488, "grad_norm": 0.8732381463050842, "learning_rate": 0.001, "loss": 2.1363, "step": 143752 }, { "epoch": 12.405797101449275, "grad_norm": 3.01924204826355, "learning_rate": 0.001, "loss": 2.1362, "step": 143808 }, { "epoch": 12.41062801932367, "grad_norm": 0.6369227766990662, "learning_rate": 0.001, "loss": 2.1306, "step": 143864 }, { "epoch": 12.415458937198068, "grad_norm": 1.3475645780563354, "learning_rate": 0.001, "loss": 2.1441, "step": 143920 }, { "epoch": 12.420289855072463, "grad_norm": 16.77593994140625, "learning_rate": 0.001, "loss": 2.134, "step": 143976 }, { "epoch": 12.42512077294686, "grad_norm": 1.5985064506530762, "learning_rate": 0.001, "loss": 2.1316, "step": 144032 }, { "epoch": 12.429951690821255, "grad_norm": 1.7535443305969238, "learning_rate": 0.001, "loss": 2.1401, "step": 144088 }, { "epoch": 12.434782608695652, "grad_norm": 0.8143284916877747, "learning_rate": 0.001, "loss": 2.1407, "step": 144144 }, { "epoch": 12.439613526570048, "grad_norm": 0.7159766554832458, "learning_rate": 0.001, "loss": 2.1432, "step": 144200 }, { "epoch": 12.444444444444445, "grad_norm": 3.6786153316497803, "learning_rate": 0.001, "loss": 2.153, "step": 144256 }, { "epoch": 12.44927536231884, "grad_norm": 0.9263442754745483, "learning_rate": 0.001, "loss": 2.143, "step": 144312 }, { "epoch": 12.454106280193237, "grad_norm": 1.5495977401733398, "learning_rate": 0.001, "loss": 2.1377, "step": 144368 }, { "epoch": 12.458937198067632, "grad_norm": 1.5098943710327148, "learning_rate": 0.001, "loss": 2.1353, "step": 144424 }, { "epoch": 12.46376811594203, "grad_norm": 0.6181824207305908, "learning_rate": 0.001, "loss": 2.1272, "step": 144480 }, { "epoch": 12.468599033816425, "grad_norm": 2.3013219833374023, "learning_rate": 0.001, "loss": 2.127, "step": 144536 }, { "epoch": 12.473429951690822, "grad_norm": 2.7798948287963867, "learning_rate": 0.001, "loss": 2.1483, "step": 144592 }, { "epoch": 12.478260869565217, "grad_norm": 1.0423344373703003, "learning_rate": 0.001, "loss": 2.1516, "step": 144648 }, { "epoch": 12.483091787439614, "grad_norm": 1.4084854125976562, "learning_rate": 0.001, "loss": 2.146, "step": 144704 }, { "epoch": 12.48792270531401, "grad_norm": 1.077108383178711, "learning_rate": 0.001, "loss": 2.139, "step": 144760 }, { "epoch": 12.492753623188406, "grad_norm": 1.4833850860595703, "learning_rate": 0.001, "loss": 2.1199, "step": 144816 }, { "epoch": 12.497584541062801, "grad_norm": 3.583406448364258, "learning_rate": 0.001, "loss": 2.1206, "step": 144872 }, { "epoch": 12.502415458937199, "grad_norm": 3.388789415359497, "learning_rate": 0.001, "loss": 2.1259, "step": 144928 }, { "epoch": 12.507246376811594, "grad_norm": 1.5271341800689697, "learning_rate": 0.001, "loss": 2.136, "step": 144984 }, { "epoch": 12.51207729468599, "grad_norm": 0.9995395541191101, "learning_rate": 0.001, "loss": 2.1438, "step": 145040 }, { "epoch": 12.516908212560386, "grad_norm": 1.144813895225525, "learning_rate": 0.001, "loss": 2.1434, "step": 145096 }, { "epoch": 12.521739130434783, "grad_norm": 18.142234802246094, "learning_rate": 0.001, "loss": 2.136, "step": 145152 }, { "epoch": 12.526570048309178, "grad_norm": 1.2944560050964355, "learning_rate": 0.001, "loss": 2.1269, "step": 145208 }, { "epoch": 12.531400966183575, "grad_norm": 1.6908440589904785, "learning_rate": 0.001, "loss": 2.1338, "step": 145264 }, { "epoch": 12.53623188405797, "grad_norm": 1.6211026906967163, "learning_rate": 0.001, "loss": 2.1375, "step": 145320 }, { "epoch": 12.541062801932368, "grad_norm": 0.6614188551902771, "learning_rate": 0.001, "loss": 2.1364, "step": 145376 }, { "epoch": 12.545893719806763, "grad_norm": 1.1642796993255615, "learning_rate": 0.001, "loss": 2.1338, "step": 145432 }, { "epoch": 12.55072463768116, "grad_norm": 1.3793939352035522, "learning_rate": 0.001, "loss": 2.1303, "step": 145488 }, { "epoch": 12.555555555555555, "grad_norm": 0.9628016352653503, "learning_rate": 0.001, "loss": 2.1281, "step": 145544 }, { "epoch": 12.560386473429952, "grad_norm": 4.906208515167236, "learning_rate": 0.001, "loss": 2.1197, "step": 145600 }, { "epoch": 12.565217391304348, "grad_norm": 1.371993064880371, "learning_rate": 0.001, "loss": 2.1226, "step": 145656 }, { "epoch": 12.570048309178745, "grad_norm": 0.5131025314331055, "learning_rate": 0.001, "loss": 2.1155, "step": 145712 }, { "epoch": 12.57487922705314, "grad_norm": 0.8918805718421936, "learning_rate": 0.001, "loss": 2.1084, "step": 145768 }, { "epoch": 12.579710144927537, "grad_norm": 1.0736923217773438, "learning_rate": 0.001, "loss": 2.1246, "step": 145824 }, { "epoch": 12.584541062801932, "grad_norm": 0.9937901496887207, "learning_rate": 0.001, "loss": 2.114, "step": 145880 }, { "epoch": 12.58937198067633, "grad_norm": 2.28429913520813, "learning_rate": 0.001, "loss": 2.1217, "step": 145936 }, { "epoch": 12.594202898550725, "grad_norm": 1.0660549402236938, "learning_rate": 0.001, "loss": 2.1258, "step": 145992 }, { "epoch": 12.59903381642512, "grad_norm": 1.0357987880706787, "learning_rate": 0.001, "loss": 2.1279, "step": 146048 }, { "epoch": 12.603864734299517, "grad_norm": 1.8048168420791626, "learning_rate": 0.001, "loss": 2.1272, "step": 146104 }, { "epoch": 12.608695652173914, "grad_norm": 1.326817512512207, "learning_rate": 0.001, "loss": 2.1131, "step": 146160 }, { "epoch": 12.61352657004831, "grad_norm": 0.8317714929580688, "learning_rate": 0.001, "loss": 2.1251, "step": 146216 }, { "epoch": 12.618357487922705, "grad_norm": 0.592576265335083, "learning_rate": 0.001, "loss": 2.118, "step": 146272 }, { "epoch": 12.623188405797102, "grad_norm": 3.277266263961792, "learning_rate": 0.001, "loss": 2.1097, "step": 146328 }, { "epoch": 12.628019323671497, "grad_norm": 0.5582002997398376, "learning_rate": 0.001, "loss": 2.1347, "step": 146384 }, { "epoch": 12.632850241545894, "grad_norm": 0.9426195025444031, "learning_rate": 0.001, "loss": 2.1392, "step": 146440 }, { "epoch": 12.63768115942029, "grad_norm": 0.9630420804023743, "learning_rate": 0.001, "loss": 2.1327, "step": 146496 }, { "epoch": 12.642512077294686, "grad_norm": 0.8565064668655396, "learning_rate": 0.001, "loss": 2.1326, "step": 146552 }, { "epoch": 12.647342995169081, "grad_norm": 1.0423953533172607, "learning_rate": 0.001, "loss": 2.1316, "step": 146608 }, { "epoch": 12.652173913043478, "grad_norm": 1.3161524534225464, "learning_rate": 0.001, "loss": 2.1291, "step": 146664 }, { "epoch": 12.657004830917874, "grad_norm": 2.927644729614258, "learning_rate": 0.001, "loss": 2.1383, "step": 146720 }, { "epoch": 12.66183574879227, "grad_norm": 1.5749315023422241, "learning_rate": 0.001, "loss": 2.1354, "step": 146776 }, { "epoch": 12.666666666666666, "grad_norm": 2.4168426990509033, "learning_rate": 0.001, "loss": 2.1367, "step": 146832 }, { "epoch": 12.671497584541063, "grad_norm": 1.2343653440475464, "learning_rate": 0.001, "loss": 2.1447, "step": 146888 }, { "epoch": 12.676328502415458, "grad_norm": 2.962412118911743, "learning_rate": 0.001, "loss": 2.1426, "step": 146944 }, { "epoch": 12.681159420289855, "grad_norm": 1.166462779045105, "learning_rate": 0.001, "loss": 2.1246, "step": 147000 }, { "epoch": 12.68599033816425, "grad_norm": 1.6188726425170898, "learning_rate": 0.001, "loss": 2.1282, "step": 147056 }, { "epoch": 12.690821256038648, "grad_norm": 1.0293705463409424, "learning_rate": 0.001, "loss": 2.1255, "step": 147112 }, { "epoch": 12.695652173913043, "grad_norm": 1.152613878250122, "learning_rate": 0.001, "loss": 2.1325, "step": 147168 }, { "epoch": 12.70048309178744, "grad_norm": 4.03063440322876, "learning_rate": 0.001, "loss": 2.1168, "step": 147224 }, { "epoch": 12.705314009661835, "grad_norm": 1.128021240234375, "learning_rate": 0.001, "loss": 2.1261, "step": 147280 }, { "epoch": 12.710144927536232, "grad_norm": 0.9907158613204956, "learning_rate": 0.001, "loss": 2.1198, "step": 147336 }, { "epoch": 12.714975845410628, "grad_norm": 0.7401301860809326, "learning_rate": 0.001, "loss": 2.1198, "step": 147392 }, { "epoch": 12.719806763285025, "grad_norm": 0.8615275025367737, "learning_rate": 0.001, "loss": 2.129, "step": 147448 }, { "epoch": 12.72463768115942, "grad_norm": 0.9050840139389038, "learning_rate": 0.001, "loss": 2.1328, "step": 147504 }, { "epoch": 12.729468599033817, "grad_norm": 2.3885610103607178, "learning_rate": 0.001, "loss": 2.135, "step": 147560 }, { "epoch": 12.734299516908212, "grad_norm": 0.40776655077934265, "learning_rate": 0.001, "loss": 2.1237, "step": 147616 }, { "epoch": 12.73913043478261, "grad_norm": 0.9876236915588379, "learning_rate": 0.001, "loss": 2.1265, "step": 147672 }, { "epoch": 12.743961352657005, "grad_norm": 1.3310588598251343, "learning_rate": 0.001, "loss": 2.1201, "step": 147728 }, { "epoch": 12.748792270531402, "grad_norm": 1.1427407264709473, "learning_rate": 0.001, "loss": 2.1285, "step": 147784 }, { "epoch": 12.753623188405797, "grad_norm": 1.1546401977539062, "learning_rate": 0.001, "loss": 2.1301, "step": 147840 }, { "epoch": 12.758454106280194, "grad_norm": 1.5894087553024292, "learning_rate": 0.001, "loss": 2.1401, "step": 147896 }, { "epoch": 12.76328502415459, "grad_norm": 1.8177663087844849, "learning_rate": 0.001, "loss": 2.1254, "step": 147952 }, { "epoch": 12.768115942028986, "grad_norm": 2.881303071975708, "learning_rate": 0.001, "loss": 2.1438, "step": 148008 }, { "epoch": 12.772946859903382, "grad_norm": 1.6366645097732544, "learning_rate": 0.001, "loss": 2.139, "step": 148064 }, { "epoch": 12.777777777777779, "grad_norm": 1.1959857940673828, "learning_rate": 0.001, "loss": 2.1541, "step": 148120 }, { "epoch": 12.782608695652174, "grad_norm": 1.8994476795196533, "learning_rate": 0.001, "loss": 2.1476, "step": 148176 }, { "epoch": 12.78743961352657, "grad_norm": 0.5184637308120728, "learning_rate": 0.001, "loss": 2.1332, "step": 148232 }, { "epoch": 12.792270531400966, "grad_norm": 2.3582370281219482, "learning_rate": 0.001, "loss": 2.1453, "step": 148288 }, { "epoch": 12.797101449275363, "grad_norm": 1.3353848457336426, "learning_rate": 0.001, "loss": 2.144, "step": 148344 }, { "epoch": 12.801932367149758, "grad_norm": 2.4771721363067627, "learning_rate": 0.001, "loss": 2.1426, "step": 148400 }, { "epoch": 12.806763285024154, "grad_norm": 0.7120132446289062, "learning_rate": 0.001, "loss": 2.1413, "step": 148456 }, { "epoch": 12.81159420289855, "grad_norm": 1.5477908849716187, "learning_rate": 0.001, "loss": 2.1396, "step": 148512 }, { "epoch": 12.816425120772946, "grad_norm": 1.6299446821212769, "learning_rate": 0.001, "loss": 2.1559, "step": 148568 }, { "epoch": 12.821256038647343, "grad_norm": 1.9400720596313477, "learning_rate": 0.001, "loss": 2.1467, "step": 148624 }, { "epoch": 12.826086956521738, "grad_norm": 1.8135931491851807, "learning_rate": 0.001, "loss": 2.1203, "step": 148680 }, { "epoch": 12.830917874396135, "grad_norm": 1.7615090608596802, "learning_rate": 0.001, "loss": 2.1302, "step": 148736 }, { "epoch": 12.83574879227053, "grad_norm": 4.531754493713379, "learning_rate": 0.001, "loss": 2.157, "step": 148792 }, { "epoch": 12.840579710144928, "grad_norm": 1.1010205745697021, "learning_rate": 0.001, "loss": 2.1384, "step": 148848 }, { "epoch": 12.845410628019323, "grad_norm": 1.0222194194793701, "learning_rate": 0.001, "loss": 2.1342, "step": 148904 }, { "epoch": 12.85024154589372, "grad_norm": 1.8879467248916626, "learning_rate": 0.001, "loss": 2.1438, "step": 148960 }, { "epoch": 12.855072463768115, "grad_norm": 2.7745132446289062, "learning_rate": 0.001, "loss": 2.144, "step": 149016 }, { "epoch": 12.859903381642512, "grad_norm": 2.1177420616149902, "learning_rate": 0.001, "loss": 2.1389, "step": 149072 }, { "epoch": 12.864734299516908, "grad_norm": 2.9606330394744873, "learning_rate": 0.001, "loss": 2.1424, "step": 149128 }, { "epoch": 12.869565217391305, "grad_norm": 1.2833441495895386, "learning_rate": 0.001, "loss": 2.1473, "step": 149184 }, { "epoch": 12.8743961352657, "grad_norm": 1.89826500415802, "learning_rate": 0.001, "loss": 2.1428, "step": 149240 }, { "epoch": 12.879227053140097, "grad_norm": 1.7878497838974, "learning_rate": 0.001, "loss": 2.1366, "step": 149296 }, { "epoch": 12.884057971014492, "grad_norm": 1.2785511016845703, "learning_rate": 0.001, "loss": 2.1302, "step": 149352 }, { "epoch": 12.88888888888889, "grad_norm": 1.3714414834976196, "learning_rate": 0.001, "loss": 2.1327, "step": 149408 }, { "epoch": 12.893719806763285, "grad_norm": 2.212644338607788, "learning_rate": 0.001, "loss": 2.1332, "step": 149464 }, { "epoch": 12.898550724637682, "grad_norm": 1.8334863185882568, "learning_rate": 0.001, "loss": 2.1387, "step": 149520 }, { "epoch": 12.903381642512077, "grad_norm": 1.0166122913360596, "learning_rate": 0.001, "loss": 2.1418, "step": 149576 }, { "epoch": 12.908212560386474, "grad_norm": 6.022477149963379, "learning_rate": 0.001, "loss": 2.1487, "step": 149632 }, { "epoch": 12.91304347826087, "grad_norm": 2.2472314834594727, "learning_rate": 0.001, "loss": 2.144, "step": 149688 }, { "epoch": 12.917874396135266, "grad_norm": 5.197749614715576, "learning_rate": 0.001, "loss": 2.1533, "step": 149744 }, { "epoch": 12.922705314009661, "grad_norm": 15.629669189453125, "learning_rate": 0.001, "loss": 2.1827, "step": 149800 }, { "epoch": 12.927536231884059, "grad_norm": 1.5433567762374878, "learning_rate": 0.001, "loss": 2.1723, "step": 149856 }, { "epoch": 12.932367149758454, "grad_norm": 3.1883909702301025, "learning_rate": 0.001, "loss": 2.178, "step": 149912 }, { "epoch": 12.93719806763285, "grad_norm": 3.1321969032287598, "learning_rate": 0.001, "loss": 2.1627, "step": 149968 }, { "epoch": 12.942028985507246, "grad_norm": 10.574917793273926, "learning_rate": 0.001, "loss": 2.1501, "step": 150024 }, { "epoch": 12.946859903381643, "grad_norm": 1.0977131128311157, "learning_rate": 0.001, "loss": 2.1635, "step": 150080 }, { "epoch": 12.951690821256038, "grad_norm": 2.353797435760498, "learning_rate": 0.001, "loss": 2.189, "step": 150136 }, { "epoch": 12.956521739130435, "grad_norm": 177.41091918945312, "learning_rate": 0.001, "loss": 2.2147, "step": 150192 }, { "epoch": 12.96135265700483, "grad_norm": 2.0154869556427, "learning_rate": 0.001, "loss": 2.2232, "step": 150248 }, { "epoch": 12.966183574879228, "grad_norm": 2.557548761367798, "learning_rate": 0.001, "loss": 2.2052, "step": 150304 }, { "epoch": 12.971014492753623, "grad_norm": 4.553218841552734, "learning_rate": 0.001, "loss": 2.2037, "step": 150360 }, { "epoch": 12.97584541062802, "grad_norm": 6.355138778686523, "learning_rate": 0.001, "loss": 2.2178, "step": 150416 }, { "epoch": 12.980676328502415, "grad_norm": 3.4625821113586426, "learning_rate": 0.001, "loss": 2.2096, "step": 150472 }, { "epoch": 12.985507246376812, "grad_norm": 2.4670097827911377, "learning_rate": 0.001, "loss": 2.1882, "step": 150528 }, { "epoch": 12.990338164251208, "grad_norm": 1.7348414659500122, "learning_rate": 0.001, "loss": 2.199, "step": 150584 }, { "epoch": 12.995169082125603, "grad_norm": 3.511028528213501, "learning_rate": 0.001, "loss": 2.2021, "step": 150640 }, { "epoch": 13.0, "grad_norm": 7.550862789154053, "learning_rate": 0.001, "loss": 2.2044, "step": 150696 }, { "epoch": 13.004830917874395, "grad_norm": 1.4284648895263672, "learning_rate": 0.001, "loss": 2.1599, "step": 150752 }, { "epoch": 13.009661835748792, "grad_norm": 1.9730991125106812, "learning_rate": 0.001, "loss": 2.1659, "step": 150808 }, { "epoch": 13.014492753623188, "grad_norm": 0.8409374356269836, "learning_rate": 0.001, "loss": 2.1646, "step": 150864 }, { "epoch": 13.019323671497585, "grad_norm": 1.1762151718139648, "learning_rate": 0.001, "loss": 2.1676, "step": 150920 }, { "epoch": 13.02415458937198, "grad_norm": 4.034763336181641, "learning_rate": 0.001, "loss": 2.1587, "step": 150976 }, { "epoch": 13.028985507246377, "grad_norm": 1.5850826501846313, "learning_rate": 0.001, "loss": 2.141, "step": 151032 }, { "epoch": 13.033816425120772, "grad_norm": 1.5828746557235718, "learning_rate": 0.001, "loss": 2.1312, "step": 151088 }, { "epoch": 13.03864734299517, "grad_norm": 12.526509284973145, "learning_rate": 0.001, "loss": 2.132, "step": 151144 }, { "epoch": 13.043478260869565, "grad_norm": 13.837462425231934, "learning_rate": 0.001, "loss": 2.1342, "step": 151200 }, { "epoch": 13.048309178743962, "grad_norm": 15.72929859161377, "learning_rate": 0.001, "loss": 2.1442, "step": 151256 }, { "epoch": 13.053140096618357, "grad_norm": 2.5083811283111572, "learning_rate": 0.001, "loss": 2.1526, "step": 151312 }, { "epoch": 13.057971014492754, "grad_norm": 91.62779998779297, "learning_rate": 0.001, "loss": 2.1549, "step": 151368 }, { "epoch": 13.06280193236715, "grad_norm": 1.9386768341064453, "learning_rate": 0.001, "loss": 2.1421, "step": 151424 }, { "epoch": 13.067632850241546, "grad_norm": 2.060494899749756, "learning_rate": 0.001, "loss": 2.1397, "step": 151480 }, { "epoch": 13.072463768115941, "grad_norm": 1.8773630857467651, "learning_rate": 0.001, "loss": 2.123, "step": 151536 }, { "epoch": 13.077294685990339, "grad_norm": 3.6586945056915283, "learning_rate": 0.001, "loss": 2.1259, "step": 151592 }, { "epoch": 13.082125603864734, "grad_norm": 1.596848964691162, "learning_rate": 0.001, "loss": 2.1413, "step": 151648 }, { "epoch": 13.08695652173913, "grad_norm": 1.0399446487426758, "learning_rate": 0.001, "loss": 2.1193, "step": 151704 }, { "epoch": 13.091787439613526, "grad_norm": 0.9175421595573425, "learning_rate": 0.001, "loss": 2.1149, "step": 151760 }, { "epoch": 13.096618357487923, "grad_norm": 2.4593353271484375, "learning_rate": 0.001, "loss": 2.1208, "step": 151816 }, { "epoch": 13.101449275362318, "grad_norm": 2.027021884918213, "learning_rate": 0.001, "loss": 2.1212, "step": 151872 }, { "epoch": 13.106280193236715, "grad_norm": 0.9635437726974487, "learning_rate": 0.001, "loss": 2.1147, "step": 151928 }, { "epoch": 13.11111111111111, "grad_norm": 1.103538990020752, "learning_rate": 0.001, "loss": 2.1168, "step": 151984 }, { "epoch": 13.115942028985508, "grad_norm": 2.2070937156677246, "learning_rate": 0.001, "loss": 2.1023, "step": 152040 }, { "epoch": 13.120772946859903, "grad_norm": 1.435902714729309, "learning_rate": 0.001, "loss": 2.1016, "step": 152096 }, { "epoch": 13.1256038647343, "grad_norm": 1.9959419965744019, "learning_rate": 0.001, "loss": 2.1293, "step": 152152 }, { "epoch": 13.130434782608695, "grad_norm": 2.2775208950042725, "learning_rate": 0.001, "loss": 2.1338, "step": 152208 }, { "epoch": 13.135265700483092, "grad_norm": 0.8768938183784485, "learning_rate": 0.001, "loss": 2.1206, "step": 152264 }, { "epoch": 13.140096618357488, "grad_norm": 1.1370960474014282, "learning_rate": 0.001, "loss": 2.106, "step": 152320 }, { "epoch": 13.144927536231885, "grad_norm": 2.0057201385498047, "learning_rate": 0.001, "loss": 2.1204, "step": 152376 }, { "epoch": 13.14975845410628, "grad_norm": 2.1446590423583984, "learning_rate": 0.001, "loss": 2.139, "step": 152432 }, { "epoch": 13.154589371980677, "grad_norm": 1.389958381652832, "learning_rate": 0.001, "loss": 2.1428, "step": 152488 }, { "epoch": 13.159420289855072, "grad_norm": 1.2191355228424072, "learning_rate": 0.001, "loss": 2.1402, "step": 152544 }, { "epoch": 13.16425120772947, "grad_norm": 7.4249444007873535, "learning_rate": 0.001, "loss": 2.1488, "step": 152600 }, { "epoch": 13.169082125603865, "grad_norm": 1.4022554159164429, "learning_rate": 0.001, "loss": 2.1363, "step": 152656 }, { "epoch": 13.173913043478262, "grad_norm": 1.2038792371749878, "learning_rate": 0.001, "loss": 2.1532, "step": 152712 }, { "epoch": 13.178743961352657, "grad_norm": 1.5878230333328247, "learning_rate": 0.001, "loss": 2.1483, "step": 152768 }, { "epoch": 13.183574879227054, "grad_norm": 1.1976397037506104, "learning_rate": 0.001, "loss": 2.1433, "step": 152824 }, { "epoch": 13.18840579710145, "grad_norm": 1.1416399478912354, "learning_rate": 0.001, "loss": 2.1581, "step": 152880 }, { "epoch": 13.193236714975846, "grad_norm": 0.850429892539978, "learning_rate": 0.001, "loss": 2.1529, "step": 152936 }, { "epoch": 13.198067632850242, "grad_norm": 2.028083086013794, "learning_rate": 0.001, "loss": 2.1474, "step": 152992 }, { "epoch": 13.202898550724637, "grad_norm": 3.1031394004821777, "learning_rate": 0.001, "loss": 2.1317, "step": 153048 }, { "epoch": 13.207729468599034, "grad_norm": 1.853583574295044, "learning_rate": 0.001, "loss": 2.112, "step": 153104 }, { "epoch": 13.21256038647343, "grad_norm": 1.3505733013153076, "learning_rate": 0.001, "loss": 2.1158, "step": 153160 }, { "epoch": 13.217391304347826, "grad_norm": 0.6872023344039917, "learning_rate": 0.001, "loss": 2.1535, "step": 153216 }, { "epoch": 13.222222222222221, "grad_norm": 0.7635059356689453, "learning_rate": 0.001, "loss": 2.1599, "step": 153272 }, { "epoch": 13.227053140096618, "grad_norm": 2.9427006244659424, "learning_rate": 0.001, "loss": 2.1642, "step": 153328 }, { "epoch": 13.231884057971014, "grad_norm": 2.549839496612549, "learning_rate": 0.001, "loss": 2.1473, "step": 153384 }, { "epoch": 13.23671497584541, "grad_norm": 0.8575700521469116, "learning_rate": 0.001, "loss": 2.1302, "step": 153440 }, { "epoch": 13.241545893719806, "grad_norm": 3.396904706954956, "learning_rate": 0.001, "loss": 2.111, "step": 153496 }, { "epoch": 13.246376811594203, "grad_norm": 1.2614824771881104, "learning_rate": 0.001, "loss": 2.1113, "step": 153552 }, { "epoch": 13.251207729468598, "grad_norm": 1.2675766944885254, "learning_rate": 0.001, "loss": 2.1043, "step": 153608 }, { "epoch": 13.256038647342995, "grad_norm": 1.7872023582458496, "learning_rate": 0.001, "loss": 2.1061, "step": 153664 }, { "epoch": 13.26086956521739, "grad_norm": 1.1799516677856445, "learning_rate": 0.001, "loss": 2.105, "step": 153720 }, { "epoch": 13.265700483091788, "grad_norm": 1.4408347606658936, "learning_rate": 0.001, "loss": 2.1114, "step": 153776 }, { "epoch": 13.270531400966183, "grad_norm": 0.4788872301578522, "learning_rate": 0.001, "loss": 2.0996, "step": 153832 }, { "epoch": 13.27536231884058, "grad_norm": 0.8686665892601013, "learning_rate": 0.001, "loss": 2.0938, "step": 153888 }, { "epoch": 13.280193236714975, "grad_norm": 2.249607801437378, "learning_rate": 0.001, "loss": 2.108, "step": 153944 }, { "epoch": 13.285024154589372, "grad_norm": 2.659045696258545, "learning_rate": 0.001, "loss": 2.1034, "step": 154000 }, { "epoch": 13.289855072463768, "grad_norm": 2.5369389057159424, "learning_rate": 0.001, "loss": 2.0995, "step": 154056 }, { "epoch": 13.294685990338165, "grad_norm": 2.343069553375244, "learning_rate": 0.001, "loss": 2.1065, "step": 154112 }, { "epoch": 13.29951690821256, "grad_norm": 0.6565819978713989, "learning_rate": 0.001, "loss": 2.1048, "step": 154168 }, { "epoch": 13.304347826086957, "grad_norm": 0.7682580351829529, "learning_rate": 0.001, "loss": 2.1073, "step": 154224 }, { "epoch": 13.309178743961352, "grad_norm": 0.6823811531066895, "learning_rate": 0.001, "loss": 2.1103, "step": 154280 }, { "epoch": 13.31400966183575, "grad_norm": 1.133642554283142, "learning_rate": 0.001, "loss": 2.1158, "step": 154336 }, { "epoch": 13.318840579710145, "grad_norm": 0.7315028309822083, "learning_rate": 0.001, "loss": 2.1124, "step": 154392 }, { "epoch": 13.323671497584542, "grad_norm": 1.7674360275268555, "learning_rate": 0.001, "loss": 2.1057, "step": 154448 }, { "epoch": 13.328502415458937, "grad_norm": 0.7310938239097595, "learning_rate": 0.001, "loss": 2.1181, "step": 154504 }, { "epoch": 13.333333333333334, "grad_norm": 0.9955859780311584, "learning_rate": 0.001, "loss": 2.1203, "step": 154560 }, { "epoch": 13.33816425120773, "grad_norm": 1.5533608198165894, "learning_rate": 0.001, "loss": 2.1115, "step": 154616 }, { "epoch": 13.342995169082126, "grad_norm": 1.0048167705535889, "learning_rate": 0.001, "loss": 2.1044, "step": 154672 }, { "epoch": 13.347826086956522, "grad_norm": 1.2621773481369019, "learning_rate": 0.001, "loss": 2.1072, "step": 154728 }, { "epoch": 13.352657004830919, "grad_norm": 1.8814021348953247, "learning_rate": 0.001, "loss": 2.1007, "step": 154784 }, { "epoch": 13.357487922705314, "grad_norm": 1.6272419691085815, "learning_rate": 0.001, "loss": 2.1043, "step": 154840 }, { "epoch": 13.36231884057971, "grad_norm": 12.52737808227539, "learning_rate": 0.001, "loss": 2.13, "step": 154896 }, { "epoch": 13.367149758454106, "grad_norm": 3.332087516784668, "learning_rate": 0.001, "loss": 2.1573, "step": 154952 }, { "epoch": 13.371980676328503, "grad_norm": 1.649582028388977, "learning_rate": 0.001, "loss": 2.1535, "step": 155008 }, { "epoch": 13.376811594202898, "grad_norm": 1.4080270528793335, "learning_rate": 0.001, "loss": 2.154, "step": 155064 }, { "epoch": 13.381642512077295, "grad_norm": 2.415435314178467, "learning_rate": 0.001, "loss": 2.1437, "step": 155120 }, { "epoch": 13.38647342995169, "grad_norm": 2.4696104526519775, "learning_rate": 0.001, "loss": 2.1283, "step": 155176 }, { "epoch": 13.391304347826088, "grad_norm": 2.0682575702667236, "learning_rate": 0.001, "loss": 2.1201, "step": 155232 }, { "epoch": 13.396135265700483, "grad_norm": 0.7371551990509033, "learning_rate": 0.001, "loss": 2.1356, "step": 155288 }, { "epoch": 13.40096618357488, "grad_norm": 0.8683264255523682, "learning_rate": 0.001, "loss": 2.1353, "step": 155344 }, { "epoch": 13.405797101449275, "grad_norm": 1.6585768461227417, "learning_rate": 0.001, "loss": 2.1448, "step": 155400 }, { "epoch": 13.41062801932367, "grad_norm": 1.6927316188812256, "learning_rate": 0.001, "loss": 2.1311, "step": 155456 }, { "epoch": 13.415458937198068, "grad_norm": 0.7282180190086365, "learning_rate": 0.001, "loss": 2.1277, "step": 155512 }, { "epoch": 13.420289855072463, "grad_norm": 0.9816649556159973, "learning_rate": 0.001, "loss": 2.1294, "step": 155568 }, { "epoch": 13.42512077294686, "grad_norm": 2.6126668453216553, "learning_rate": 0.001, "loss": 2.1448, "step": 155624 }, { "epoch": 13.429951690821255, "grad_norm": 1.0526080131530762, "learning_rate": 0.001, "loss": 2.1339, "step": 155680 }, { "epoch": 13.434782608695652, "grad_norm": 98.00154113769531, "learning_rate": 0.001, "loss": 2.145, "step": 155736 }, { "epoch": 13.439613526570048, "grad_norm": 0.7751989364624023, "learning_rate": 0.001, "loss": 2.1229, "step": 155792 }, { "epoch": 13.444444444444445, "grad_norm": 1.1652582883834839, "learning_rate": 0.001, "loss": 2.1228, "step": 155848 }, { "epoch": 13.44927536231884, "grad_norm": 2.596214532852173, "learning_rate": 0.001, "loss": 2.1247, "step": 155904 }, { "epoch": 13.454106280193237, "grad_norm": 1.4621740579605103, "learning_rate": 0.001, "loss": 2.1096, "step": 155960 }, { "epoch": 13.458937198067632, "grad_norm": 1.411218523979187, "learning_rate": 0.001, "loss": 2.1246, "step": 156016 }, { "epoch": 13.46376811594203, "grad_norm": 1.3023192882537842, "learning_rate": 0.001, "loss": 2.1185, "step": 156072 }, { "epoch": 13.468599033816425, "grad_norm": 0.6995598673820496, "learning_rate": 0.001, "loss": 2.1184, "step": 156128 }, { "epoch": 13.473429951690822, "grad_norm": 3.9683218002319336, "learning_rate": 0.001, "loss": 2.1181, "step": 156184 }, { "epoch": 13.478260869565217, "grad_norm": 0.8429069519042969, "learning_rate": 0.001, "loss": 2.1187, "step": 156240 }, { "epoch": 13.483091787439614, "grad_norm": 1.305495023727417, "learning_rate": 0.001, "loss": 2.1066, "step": 156296 }, { "epoch": 13.48792270531401, "grad_norm": 1.988076090812683, "learning_rate": 0.001, "loss": 2.1105, "step": 156352 }, { "epoch": 13.492753623188406, "grad_norm": 1.893061876296997, "learning_rate": 0.001, "loss": 2.1198, "step": 156408 }, { "epoch": 13.497584541062801, "grad_norm": 1.5898001194000244, "learning_rate": 0.001, "loss": 2.1353, "step": 156464 }, { "epoch": 13.502415458937199, "grad_norm": 2.0219902992248535, "learning_rate": 0.001, "loss": 2.1471, "step": 156520 }, { "epoch": 13.507246376811594, "grad_norm": 2.892352342605591, "learning_rate": 0.001, "loss": 2.1276, "step": 156576 }, { "epoch": 13.51207729468599, "grad_norm": 1.5758299827575684, "learning_rate": 0.001, "loss": 2.1275, "step": 156632 }, { "epoch": 13.516908212560386, "grad_norm": 1.6753467321395874, "learning_rate": 0.001, "loss": 2.1164, "step": 156688 }, { "epoch": 13.521739130434783, "grad_norm": 12.08422565460205, "learning_rate": 0.001, "loss": 2.1133, "step": 156744 }, { "epoch": 13.526570048309178, "grad_norm": 1.9895418882369995, "learning_rate": 0.001, "loss": 2.1286, "step": 156800 }, { "epoch": 13.531400966183575, "grad_norm": 0.9658872485160828, "learning_rate": 0.001, "loss": 2.1262, "step": 156856 }, { "epoch": 13.53623188405797, "grad_norm": 1.978919506072998, "learning_rate": 0.001, "loss": 2.1267, "step": 156912 }, { "epoch": 13.541062801932368, "grad_norm": 0.47410324215888977, "learning_rate": 0.001, "loss": 2.119, "step": 156968 }, { "epoch": 13.545893719806763, "grad_norm": 1.0026180744171143, "learning_rate": 0.001, "loss": 2.1295, "step": 157024 }, { "epoch": 13.55072463768116, "grad_norm": 0.7485067248344421, "learning_rate": 0.001, "loss": 2.1307, "step": 157080 }, { "epoch": 13.555555555555555, "grad_norm": 0.5274030566215515, "learning_rate": 0.001, "loss": 2.1214, "step": 157136 }, { "epoch": 13.560386473429952, "grad_norm": 0.8862578868865967, "learning_rate": 0.001, "loss": 2.116, "step": 157192 }, { "epoch": 13.565217391304348, "grad_norm": 1.3899259567260742, "learning_rate": 0.001, "loss": 2.1215, "step": 157248 }, { "epoch": 13.570048309178745, "grad_norm": 2.7743959426879883, "learning_rate": 0.001, "loss": 2.1156, "step": 157304 }, { "epoch": 13.57487922705314, "grad_norm": 1.5771849155426025, "learning_rate": 0.001, "loss": 2.1199, "step": 157360 }, { "epoch": 13.579710144927537, "grad_norm": 1.2374780178070068, "learning_rate": 0.001, "loss": 2.1087, "step": 157416 }, { "epoch": 13.584541062801932, "grad_norm": 3.2778754234313965, "learning_rate": 0.001, "loss": 2.1208, "step": 157472 }, { "epoch": 13.58937198067633, "grad_norm": 0.86004239320755, "learning_rate": 0.001, "loss": 2.1202, "step": 157528 }, { "epoch": 13.594202898550725, "grad_norm": 1.7727586030960083, "learning_rate": 0.001, "loss": 2.1231, "step": 157584 }, { "epoch": 13.59903381642512, "grad_norm": 3.322543144226074, "learning_rate": 0.001, "loss": 2.1219, "step": 157640 }, { "epoch": 13.603864734299517, "grad_norm": 2.1620090007781982, "learning_rate": 0.001, "loss": 2.112, "step": 157696 }, { "epoch": 13.608695652173914, "grad_norm": 0.5596914887428284, "learning_rate": 0.001, "loss": 2.1121, "step": 157752 }, { "epoch": 13.61352657004831, "grad_norm": 1.358747959136963, "learning_rate": 0.001, "loss": 2.1193, "step": 157808 }, { "epoch": 13.618357487922705, "grad_norm": 2.925055503845215, "learning_rate": 0.001, "loss": 2.1263, "step": 157864 }, { "epoch": 13.623188405797102, "grad_norm": 5.203530311584473, "learning_rate": 0.001, "loss": 2.1318, "step": 157920 }, { "epoch": 13.628019323671497, "grad_norm": 0.6694729924201965, "learning_rate": 0.001, "loss": 2.137, "step": 157976 }, { "epoch": 13.632850241545894, "grad_norm": 1.621677041053772, "learning_rate": 0.001, "loss": 2.129, "step": 158032 }, { "epoch": 13.63768115942029, "grad_norm": 1.223446249961853, "learning_rate": 0.001, "loss": 2.1205, "step": 158088 }, { "epoch": 13.642512077294686, "grad_norm": 1.7448548078536987, "learning_rate": 0.001, "loss": 2.1102, "step": 158144 }, { "epoch": 13.647342995169081, "grad_norm": 2.5689926147460938, "learning_rate": 0.001, "loss": 2.1081, "step": 158200 }, { "epoch": 13.652173913043478, "grad_norm": 1.0271503925323486, "learning_rate": 0.001, "loss": 2.1187, "step": 158256 }, { "epoch": 13.657004830917874, "grad_norm": 0.9414145350456238, "learning_rate": 0.001, "loss": 2.1146, "step": 158312 }, { "epoch": 13.66183574879227, "grad_norm": 1.4684207439422607, "learning_rate": 0.001, "loss": 2.1214, "step": 158368 }, { "epoch": 13.666666666666666, "grad_norm": 2.3854098320007324, "learning_rate": 0.001, "loss": 2.1141, "step": 158424 }, { "epoch": 13.671497584541063, "grad_norm": 1.349774718284607, "learning_rate": 0.001, "loss": 2.1136, "step": 158480 }, { "epoch": 13.676328502415458, "grad_norm": 1.9693864583969116, "learning_rate": 0.001, "loss": 2.1303, "step": 158536 }, { "epoch": 13.681159420289855, "grad_norm": 0.796342670917511, "learning_rate": 0.001, "loss": 2.118, "step": 158592 }, { "epoch": 13.68599033816425, "grad_norm": 1.3799043893814087, "learning_rate": 0.001, "loss": 2.1185, "step": 158648 }, { "epoch": 13.690821256038648, "grad_norm": 2.128614664077759, "learning_rate": 0.001, "loss": 2.1151, "step": 158704 }, { "epoch": 13.695652173913043, "grad_norm": 0.8599096536636353, "learning_rate": 0.001, "loss": 2.1014, "step": 158760 }, { "epoch": 13.70048309178744, "grad_norm": 1.0084664821624756, "learning_rate": 0.001, "loss": 2.0926, "step": 158816 }, { "epoch": 13.705314009661835, "grad_norm": 1.0774191617965698, "learning_rate": 0.001, "loss": 2.0974, "step": 158872 }, { "epoch": 13.710144927536232, "grad_norm": 1.2552127838134766, "learning_rate": 0.001, "loss": 2.1014, "step": 158928 }, { "epoch": 13.714975845410628, "grad_norm": 0.8322638273239136, "learning_rate": 0.001, "loss": 2.1208, "step": 158984 }, { "epoch": 13.719806763285025, "grad_norm": 0.8590813279151917, "learning_rate": 0.001, "loss": 2.1423, "step": 159040 }, { "epoch": 13.72463768115942, "grad_norm": 2.231987953186035, "learning_rate": 0.001, "loss": 2.115, "step": 159096 }, { "epoch": 13.729468599033817, "grad_norm": 0.6681892275810242, "learning_rate": 0.001, "loss": 2.1026, "step": 159152 }, { "epoch": 13.734299516908212, "grad_norm": 1.50006902217865, "learning_rate": 0.001, "loss": 2.1132, "step": 159208 }, { "epoch": 13.73913043478261, "grad_norm": 1.1472631692886353, "learning_rate": 0.001, "loss": 2.1195, "step": 159264 }, { "epoch": 13.743961352657005, "grad_norm": 1.0082118511199951, "learning_rate": 0.001, "loss": 2.1241, "step": 159320 }, { "epoch": 13.748792270531402, "grad_norm": 2.1933016777038574, "learning_rate": 0.001, "loss": 2.1254, "step": 159376 }, { "epoch": 13.753623188405797, "grad_norm": 1.2152806520462036, "learning_rate": 0.001, "loss": 2.1215, "step": 159432 }, { "epoch": 13.758454106280194, "grad_norm": 0.7440258264541626, "learning_rate": 0.001, "loss": 2.1263, "step": 159488 }, { "epoch": 13.76328502415459, "grad_norm": 1.1146398782730103, "learning_rate": 0.001, "loss": 2.1124, "step": 159544 }, { "epoch": 13.768115942028986, "grad_norm": 0.7433760762214661, "learning_rate": 0.001, "loss": 2.1123, "step": 159600 }, { "epoch": 13.772946859903382, "grad_norm": 1.4501004219055176, "learning_rate": 0.001, "loss": 2.115, "step": 159656 }, { "epoch": 13.777777777777779, "grad_norm": 1.6814368963241577, "learning_rate": 0.001, "loss": 2.1142, "step": 159712 }, { "epoch": 13.782608695652174, "grad_norm": 1.9064940214157104, "learning_rate": 0.001, "loss": 2.0953, "step": 159768 }, { "epoch": 13.78743961352657, "grad_norm": 1.6684178113937378, "learning_rate": 0.001, "loss": 2.091, "step": 159824 }, { "epoch": 13.792270531400966, "grad_norm": 0.7099637389183044, "learning_rate": 0.001, "loss": 2.0985, "step": 159880 }, { "epoch": 13.797101449275363, "grad_norm": 0.9087736010551453, "learning_rate": 0.001, "loss": 2.0967, "step": 159936 }, { "epoch": 13.801932367149758, "grad_norm": 0.7317041754722595, "learning_rate": 0.001, "loss": 2.0998, "step": 159992 }, { "epoch": 13.806763285024154, "grad_norm": 0.5762949585914612, "learning_rate": 0.001, "loss": 2.0929, "step": 160048 }, { "epoch": 13.81159420289855, "grad_norm": 0.5864407420158386, "learning_rate": 0.001, "loss": 2.0944, "step": 160104 }, { "epoch": 13.816425120772946, "grad_norm": 0.7257387042045593, "learning_rate": 0.001, "loss": 2.0959, "step": 160160 }, { "epoch": 13.821256038647343, "grad_norm": 1.3213415145874023, "learning_rate": 0.001, "loss": 2.0926, "step": 160216 }, { "epoch": 13.826086956521738, "grad_norm": 1.233480453491211, "learning_rate": 0.001, "loss": 2.0955, "step": 160272 }, { "epoch": 13.830917874396135, "grad_norm": 1.3564972877502441, "learning_rate": 0.001, "loss": 2.0941, "step": 160328 }, { "epoch": 13.83574879227053, "grad_norm": 1.0533579587936401, "learning_rate": 0.001, "loss": 2.0858, "step": 160384 }, { "epoch": 13.840579710144928, "grad_norm": 3.3225958347320557, "learning_rate": 0.001, "loss": 2.0924, "step": 160440 }, { "epoch": 13.845410628019323, "grad_norm": 0.7427592277526855, "learning_rate": 0.001, "loss": 2.0955, "step": 160496 }, { "epoch": 13.85024154589372, "grad_norm": 0.7221114039421082, "learning_rate": 0.001, "loss": 2.1161, "step": 160552 }, { "epoch": 13.855072463768115, "grad_norm": 1.6178414821624756, "learning_rate": 0.001, "loss": 2.0917, "step": 160608 }, { "epoch": 13.859903381642512, "grad_norm": 1.0564931631088257, "learning_rate": 0.001, "loss": 2.0871, "step": 160664 }, { "epoch": 13.864734299516908, "grad_norm": 2.0206096172332764, "learning_rate": 0.001, "loss": 2.0953, "step": 160720 }, { "epoch": 13.869565217391305, "grad_norm": 1.6121093034744263, "learning_rate": 0.001, "loss": 2.1018, "step": 160776 }, { "epoch": 13.8743961352657, "grad_norm": 1.6579445600509644, "learning_rate": 0.001, "loss": 2.0775, "step": 160832 }, { "epoch": 13.879227053140097, "grad_norm": 1.3393802642822266, "learning_rate": 0.001, "loss": 2.0904, "step": 160888 }, { "epoch": 13.884057971014492, "grad_norm": 0.931817889213562, "learning_rate": 0.001, "loss": 2.0982, "step": 160944 }, { "epoch": 13.88888888888889, "grad_norm": 3.4905459880828857, "learning_rate": 0.001, "loss": 2.0944, "step": 161000 }, { "epoch": 13.893719806763285, "grad_norm": 0.8015429973602295, "learning_rate": 0.001, "loss": 2.0962, "step": 161056 }, { "epoch": 13.898550724637682, "grad_norm": 1.297957181930542, "learning_rate": 0.001, "loss": 2.1112, "step": 161112 }, { "epoch": 13.903381642512077, "grad_norm": 0.9302045702934265, "learning_rate": 0.001, "loss": 2.1166, "step": 161168 }, { "epoch": 13.908212560386474, "grad_norm": 1.845407247543335, "learning_rate": 0.001, "loss": 2.1084, "step": 161224 }, { "epoch": 13.91304347826087, "grad_norm": 9.368751525878906, "learning_rate": 0.001, "loss": 2.1229, "step": 161280 }, { "epoch": 13.917874396135266, "grad_norm": 1.476915717124939, "learning_rate": 0.001, "loss": 2.1269, "step": 161336 }, { "epoch": 13.922705314009661, "grad_norm": 1.9280927181243896, "learning_rate": 0.001, "loss": 2.1397, "step": 161392 }, { "epoch": 13.927536231884059, "grad_norm": 1.451669454574585, "learning_rate": 0.001, "loss": 2.1369, "step": 161448 }, { "epoch": 13.932367149758454, "grad_norm": 3.7889719009399414, "learning_rate": 0.001, "loss": 2.1288, "step": 161504 }, { "epoch": 13.93719806763285, "grad_norm": 1.2046613693237305, "learning_rate": 0.001, "loss": 2.145, "step": 161560 }, { "epoch": 13.942028985507246, "grad_norm": 0.908202052116394, "learning_rate": 0.001, "loss": 2.1672, "step": 161616 }, { "epoch": 13.946859903381643, "grad_norm": 2.666133165359497, "learning_rate": 0.001, "loss": 2.1516, "step": 161672 }, { "epoch": 13.951690821256038, "grad_norm": 6.771801948547363, "learning_rate": 0.001, "loss": 2.154, "step": 161728 }, { "epoch": 13.956521739130435, "grad_norm": 4.490646839141846, "learning_rate": 0.001, "loss": 2.1557, "step": 161784 }, { "epoch": 13.96135265700483, "grad_norm": 2.0039854049682617, "learning_rate": 0.001, "loss": 2.1393, "step": 161840 }, { "epoch": 13.966183574879228, "grad_norm": 3.488013505935669, "learning_rate": 0.001, "loss": 2.1505, "step": 161896 }, { "epoch": 13.971014492753623, "grad_norm": 3.316882610321045, "learning_rate": 0.001, "loss": 2.1408, "step": 161952 }, { "epoch": 13.97584541062802, "grad_norm": 1.202447772026062, "learning_rate": 0.001, "loss": 2.1579, "step": 162008 }, { "epoch": 13.980676328502415, "grad_norm": 1.1322122812271118, "learning_rate": 0.001, "loss": 2.1522, "step": 162064 }, { "epoch": 13.985507246376812, "grad_norm": 1.975663423538208, "learning_rate": 0.001, "loss": 2.1347, "step": 162120 }, { "epoch": 13.990338164251208, "grad_norm": 0.9171027541160583, "learning_rate": 0.001, "loss": 2.1297, "step": 162176 }, { "epoch": 13.995169082125603, "grad_norm": 4.064940452575684, "learning_rate": 0.001, "loss": 2.1212, "step": 162232 }, { "epoch": 14.0, "grad_norm": 1.2812541723251343, "learning_rate": 0.001, "loss": 2.1161, "step": 162288 }, { "epoch": 14.004830917874395, "grad_norm": 2.015765428543091, "learning_rate": 0.001, "loss": 2.0894, "step": 162344 }, { "epoch": 14.009661835748792, "grad_norm": 3.5633678436279297, "learning_rate": 0.001, "loss": 2.0815, "step": 162400 }, { "epoch": 14.014492753623188, "grad_norm": 3.0259530544281006, "learning_rate": 0.001, "loss": 2.0797, "step": 162456 }, { "epoch": 14.019323671497585, "grad_norm": 4.439858913421631, "learning_rate": 0.001, "loss": 2.0767, "step": 162512 }, { "epoch": 14.02415458937198, "grad_norm": 2.9818506240844727, "learning_rate": 0.001, "loss": 2.0782, "step": 162568 }, { "epoch": 14.028985507246377, "grad_norm": 1.519334077835083, "learning_rate": 0.001, "loss": 2.0839, "step": 162624 }, { "epoch": 14.033816425120772, "grad_norm": 1.7865744829177856, "learning_rate": 0.001, "loss": 2.0762, "step": 162680 }, { "epoch": 14.03864734299517, "grad_norm": 1.9717751741409302, "learning_rate": 0.001, "loss": 2.0816, "step": 162736 }, { "epoch": 14.043478260869565, "grad_norm": 1.1875979900360107, "learning_rate": 0.001, "loss": 2.1026, "step": 162792 }, { "epoch": 14.048309178743962, "grad_norm": 1.6881129741668701, "learning_rate": 0.001, "loss": 2.1151, "step": 162848 }, { "epoch": 14.053140096618357, "grad_norm": 3.7378671169281006, "learning_rate": 0.001, "loss": 2.1004, "step": 162904 }, { "epoch": 14.057971014492754, "grad_norm": 1.7965749502182007, "learning_rate": 0.001, "loss": 2.1061, "step": 162960 }, { "epoch": 14.06280193236715, "grad_norm": 0.9295099973678589, "learning_rate": 0.001, "loss": 2.1229, "step": 163016 }, { "epoch": 14.067632850241546, "grad_norm": 2.1705219745635986, "learning_rate": 0.001, "loss": 2.1148, "step": 163072 }, { "epoch": 14.072463768115941, "grad_norm": 1.94368314743042, "learning_rate": 0.001, "loss": 2.1011, "step": 163128 }, { "epoch": 14.077294685990339, "grad_norm": 0.7417897582054138, "learning_rate": 0.001, "loss": 2.0961, "step": 163184 }, { "epoch": 14.082125603864734, "grad_norm": 4.0052289962768555, "learning_rate": 0.001, "loss": 2.0778, "step": 163240 }, { "epoch": 14.08695652173913, "grad_norm": 1.0168331861495972, "learning_rate": 0.001, "loss": 2.0787, "step": 163296 }, { "epoch": 14.091787439613526, "grad_norm": 1.8557634353637695, "learning_rate": 0.001, "loss": 2.0787, "step": 163352 }, { "epoch": 14.096618357487923, "grad_norm": 0.3780621290206909, "learning_rate": 0.001, "loss": 2.0791, "step": 163408 }, { "epoch": 14.101449275362318, "grad_norm": 0.5136595368385315, "learning_rate": 0.001, "loss": 2.0753, "step": 163464 }, { "epoch": 14.106280193236715, "grad_norm": 0.9920003414154053, "learning_rate": 0.001, "loss": 2.0765, "step": 163520 }, { "epoch": 14.11111111111111, "grad_norm": 0.8664082884788513, "learning_rate": 0.001, "loss": 2.0759, "step": 163576 }, { "epoch": 14.115942028985508, "grad_norm": 0.57419753074646, "learning_rate": 0.001, "loss": 2.0785, "step": 163632 }, { "epoch": 14.120772946859903, "grad_norm": 1.2664976119995117, "learning_rate": 0.001, "loss": 2.0739, "step": 163688 }, { "epoch": 14.1256038647343, "grad_norm": 0.5676619410514832, "learning_rate": 0.001, "loss": 2.0816, "step": 163744 }, { "epoch": 14.130434782608695, "grad_norm": 0.7228012084960938, "learning_rate": 0.001, "loss": 2.1026, "step": 163800 }, { "epoch": 14.135265700483092, "grad_norm": 2.3994898796081543, "learning_rate": 0.001, "loss": 2.0864, "step": 163856 }, { "epoch": 14.140096618357488, "grad_norm": 0.7544922828674316, "learning_rate": 0.001, "loss": 2.0908, "step": 163912 }, { "epoch": 14.144927536231885, "grad_norm": 0.5263815522193909, "learning_rate": 0.001, "loss": 2.0771, "step": 163968 }, { "epoch": 14.14975845410628, "grad_norm": 3.943847179412842, "learning_rate": 0.001, "loss": 2.0705, "step": 164024 }, { "epoch": 14.154589371980677, "grad_norm": 0.6299681067466736, "learning_rate": 0.001, "loss": 2.0649, "step": 164080 }, { "epoch": 14.159420289855072, "grad_norm": 0.7022569179534912, "learning_rate": 0.001, "loss": 2.0776, "step": 164136 }, { "epoch": 14.16425120772947, "grad_norm": 1.2700905799865723, "learning_rate": 0.001, "loss": 2.0971, "step": 164192 }, { "epoch": 14.169082125603865, "grad_norm": 3.574528217315674, "learning_rate": 0.001, "loss": 2.089, "step": 164248 }, { "epoch": 14.173913043478262, "grad_norm": 20.486120223999023, "learning_rate": 0.001, "loss": 2.0898, "step": 164304 }, { "epoch": 14.178743961352657, "grad_norm": 5.954614639282227, "learning_rate": 0.001, "loss": 2.0926, "step": 164360 }, { "epoch": 14.183574879227054, "grad_norm": 4.658265590667725, "learning_rate": 0.001, "loss": 2.0864, "step": 164416 }, { "epoch": 14.18840579710145, "grad_norm": 0.9387691617012024, "learning_rate": 0.001, "loss": 2.1033, "step": 164472 }, { "epoch": 14.193236714975846, "grad_norm": 0.806334376335144, "learning_rate": 0.001, "loss": 2.1112, "step": 164528 }, { "epoch": 14.198067632850242, "grad_norm": 1.323714017868042, "learning_rate": 0.001, "loss": 2.0923, "step": 164584 }, { "epoch": 14.202898550724637, "grad_norm": 0.9407135248184204, "learning_rate": 0.001, "loss": 2.0884, "step": 164640 }, { "epoch": 14.207729468599034, "grad_norm": 0.5886964201927185, "learning_rate": 0.001, "loss": 2.1017, "step": 164696 }, { "epoch": 14.21256038647343, "grad_norm": 3.506727933883667, "learning_rate": 0.001, "loss": 2.0947, "step": 164752 }, { "epoch": 14.217391304347826, "grad_norm": 1.226180911064148, "learning_rate": 0.001, "loss": 2.0922, "step": 164808 }, { "epoch": 14.222222222222221, "grad_norm": 1.6454286575317383, "learning_rate": 0.001, "loss": 2.0912, "step": 164864 }, { "epoch": 14.227053140096618, "grad_norm": 1.7647241353988647, "learning_rate": 0.001, "loss": 2.0857, "step": 164920 }, { "epoch": 14.231884057971014, "grad_norm": 0.6546169519424438, "learning_rate": 0.001, "loss": 2.0917, "step": 164976 }, { "epoch": 14.23671497584541, "grad_norm": 2.7022316455841064, "learning_rate": 0.001, "loss": 2.0928, "step": 165032 }, { "epoch": 14.241545893719806, "grad_norm": 0.9677340388298035, "learning_rate": 0.001, "loss": 2.0896, "step": 165088 }, { "epoch": 14.246376811594203, "grad_norm": 1.0026935338974, "learning_rate": 0.001, "loss": 2.0921, "step": 165144 }, { "epoch": 14.251207729468598, "grad_norm": 1.4015276432037354, "learning_rate": 0.001, "loss": 2.0867, "step": 165200 }, { "epoch": 14.256038647342995, "grad_norm": 0.4426155090332031, "learning_rate": 0.001, "loss": 2.0857, "step": 165256 }, { "epoch": 14.26086956521739, "grad_norm": 0.7532420754432678, "learning_rate": 0.001, "loss": 2.0797, "step": 165312 }, { "epoch": 14.265700483091788, "grad_norm": 0.9576014876365662, "learning_rate": 0.001, "loss": 2.098, "step": 165368 }, { "epoch": 14.270531400966183, "grad_norm": 1.3186644315719604, "learning_rate": 0.001, "loss": 2.0991, "step": 165424 }, { "epoch": 14.27536231884058, "grad_norm": 1.3279974460601807, "learning_rate": 0.001, "loss": 2.0847, "step": 165480 }, { "epoch": 14.280193236714975, "grad_norm": 1.023114800453186, "learning_rate": 0.001, "loss": 2.102, "step": 165536 }, { "epoch": 14.285024154589372, "grad_norm": 2.983393907546997, "learning_rate": 0.001, "loss": 2.0917, "step": 165592 }, { "epoch": 14.289855072463768, "grad_norm": 0.5952508449554443, "learning_rate": 0.001, "loss": 2.0753, "step": 165648 }, { "epoch": 14.294685990338165, "grad_norm": 1.6369107961654663, "learning_rate": 0.001, "loss": 2.0787, "step": 165704 }, { "epoch": 14.29951690821256, "grad_norm": 0.763378381729126, "learning_rate": 0.001, "loss": 2.0701, "step": 165760 }, { "epoch": 14.304347826086957, "grad_norm": 1.1152658462524414, "learning_rate": 0.001, "loss": 2.079, "step": 165816 }, { "epoch": 14.309178743961352, "grad_norm": 6.66530704498291, "learning_rate": 0.001, "loss": 2.0827, "step": 165872 }, { "epoch": 14.31400966183575, "grad_norm": 0.7748475074768066, "learning_rate": 0.001, "loss": 2.0775, "step": 165928 }, { "epoch": 14.318840579710145, "grad_norm": 1.5329574346542358, "learning_rate": 0.001, "loss": 2.0785, "step": 165984 }, { "epoch": 14.323671497584542, "grad_norm": 1.5411434173583984, "learning_rate": 0.001, "loss": 2.0697, "step": 166040 }, { "epoch": 14.328502415458937, "grad_norm": 0.6459455490112305, "learning_rate": 0.001, "loss": 2.0788, "step": 166096 }, { "epoch": 14.333333333333334, "grad_norm": 0.47820526361465454, "learning_rate": 0.001, "loss": 2.0717, "step": 166152 }, { "epoch": 14.33816425120773, "grad_norm": 0.44646474719047546, "learning_rate": 0.001, "loss": 2.0754, "step": 166208 }, { "epoch": 14.342995169082126, "grad_norm": 0.6516669392585754, "learning_rate": 0.001, "loss": 2.0827, "step": 166264 }, { "epoch": 14.347826086956522, "grad_norm": 0.6498791575431824, "learning_rate": 0.001, "loss": 2.0698, "step": 166320 }, { "epoch": 14.352657004830919, "grad_norm": 1.2011828422546387, "learning_rate": 0.001, "loss": 2.0725, "step": 166376 }, { "epoch": 14.357487922705314, "grad_norm": 0.6755707263946533, "learning_rate": 0.001, "loss": 2.0807, "step": 166432 }, { "epoch": 14.36231884057971, "grad_norm": 1.3804514408111572, "learning_rate": 0.001, "loss": 2.0726, "step": 166488 }, { "epoch": 14.367149758454106, "grad_norm": 1.330936312675476, "learning_rate": 0.001, "loss": 2.065, "step": 166544 }, { "epoch": 14.371980676328503, "grad_norm": 0.6674533486366272, "learning_rate": 0.001, "loss": 2.0773, "step": 166600 }, { "epoch": 14.376811594202898, "grad_norm": 0.43645772337913513, "learning_rate": 0.001, "loss": 2.072, "step": 166656 }, { "epoch": 14.381642512077295, "grad_norm": 0.5057602524757385, "learning_rate": 0.001, "loss": 2.065, "step": 166712 }, { "epoch": 14.38647342995169, "grad_norm": 0.56394362449646, "learning_rate": 0.001, "loss": 2.0707, "step": 166768 }, { "epoch": 14.391304347826088, "grad_norm": 0.6102176904678345, "learning_rate": 0.001, "loss": 2.0657, "step": 166824 }, { "epoch": 14.396135265700483, "grad_norm": 1.0363285541534424, "learning_rate": 0.001, "loss": 2.0723, "step": 166880 }, { "epoch": 14.40096618357488, "grad_norm": 0.7264270186424255, "learning_rate": 0.001, "loss": 2.0628, "step": 166936 }, { "epoch": 14.405797101449275, "grad_norm": 0.8421317338943481, "learning_rate": 0.001, "loss": 2.0742, "step": 166992 }, { "epoch": 14.41062801932367, "grad_norm": 1.6796785593032837, "learning_rate": 0.001, "loss": 2.0681, "step": 167048 }, { "epoch": 14.415458937198068, "grad_norm": 0.9827716946601868, "learning_rate": 0.001, "loss": 2.0591, "step": 167104 }, { "epoch": 14.420289855072463, "grad_norm": 0.8008168339729309, "learning_rate": 0.001, "loss": 2.0641, "step": 167160 }, { "epoch": 14.42512077294686, "grad_norm": 0.6461685299873352, "learning_rate": 0.001, "loss": 2.0627, "step": 167216 }, { "epoch": 14.429951690821255, "grad_norm": 0.5084369778633118, "learning_rate": 0.001, "loss": 2.0543, "step": 167272 }, { "epoch": 14.434782608695652, "grad_norm": 0.3755362033843994, "learning_rate": 0.001, "loss": 2.0592, "step": 167328 }, { "epoch": 14.439613526570048, "grad_norm": 0.9225021004676819, "learning_rate": 0.001, "loss": 2.0512, "step": 167384 }, { "epoch": 14.444444444444445, "grad_norm": 0.40125328302383423, "learning_rate": 0.001, "loss": 2.061, "step": 167440 }, { "epoch": 14.44927536231884, "grad_norm": 2.9441957473754883, "learning_rate": 0.001, "loss": 2.0944, "step": 167496 }, { "epoch": 14.454106280193237, "grad_norm": 3.9784598350524902, "learning_rate": 0.001, "loss": 2.0986, "step": 167552 }, { "epoch": 14.458937198067632, "grad_norm": 0.5904225707054138, "learning_rate": 0.001, "loss": 2.0942, "step": 167608 }, { "epoch": 14.46376811594203, "grad_norm": 0.4019748270511627, "learning_rate": 0.001, "loss": 2.0819, "step": 167664 }, { "epoch": 14.468599033816425, "grad_norm": 0.652597963809967, "learning_rate": 0.001, "loss": 2.0773, "step": 167720 }, { "epoch": 14.473429951690822, "grad_norm": 0.4811539053916931, "learning_rate": 0.001, "loss": 2.0706, "step": 167776 }, { "epoch": 14.478260869565217, "grad_norm": 0.4728718400001526, "learning_rate": 0.001, "loss": 2.0635, "step": 167832 }, { "epoch": 14.483091787439614, "grad_norm": 0.5088309049606323, "learning_rate": 0.001, "loss": 2.0614, "step": 167888 }, { "epoch": 14.48792270531401, "grad_norm": 0.5286533832550049, "learning_rate": 0.001, "loss": 2.0603, "step": 167944 }, { "epoch": 14.492753623188406, "grad_norm": 0.6797974705696106, "learning_rate": 0.001, "loss": 2.0521, "step": 168000 }, { "epoch": 14.497584541062801, "grad_norm": 0.6201916933059692, "learning_rate": 0.001, "loss": 2.075, "step": 168056 }, { "epoch": 14.502415458937199, "grad_norm": 0.552043616771698, "learning_rate": 0.001, "loss": 2.0652, "step": 168112 }, { "epoch": 14.507246376811594, "grad_norm": 0.7064962387084961, "learning_rate": 0.001, "loss": 2.078, "step": 168168 }, { "epoch": 14.51207729468599, "grad_norm": 7.383607864379883, "learning_rate": 0.001, "loss": 2.0809, "step": 168224 }, { "epoch": 14.516908212560386, "grad_norm": 2.657451868057251, "learning_rate": 0.001, "loss": 2.095, "step": 168280 }, { "epoch": 14.521739130434783, "grad_norm": 0.592877209186554, "learning_rate": 0.001, "loss": 2.0939, "step": 168336 }, { "epoch": 14.526570048309178, "grad_norm": 2.029914140701294, "learning_rate": 0.001, "loss": 2.0773, "step": 168392 }, { "epoch": 14.531400966183575, "grad_norm": 0.9684410095214844, "learning_rate": 0.001, "loss": 2.0831, "step": 168448 }, { "epoch": 14.53623188405797, "grad_norm": 1.4609384536743164, "learning_rate": 0.001, "loss": 2.0976, "step": 168504 }, { "epoch": 14.541062801932368, "grad_norm": 0.7181475162506104, "learning_rate": 0.001, "loss": 2.115, "step": 168560 }, { "epoch": 14.545893719806763, "grad_norm": 0.5920982956886292, "learning_rate": 0.001, "loss": 2.1118, "step": 168616 }, { "epoch": 14.55072463768116, "grad_norm": 6.049862861633301, "learning_rate": 0.001, "loss": 2.1096, "step": 168672 }, { "epoch": 14.555555555555555, "grad_norm": 2.6849656105041504, "learning_rate": 0.001, "loss": 2.0984, "step": 168728 }, { "epoch": 14.560386473429952, "grad_norm": 2.490931749343872, "learning_rate": 0.001, "loss": 2.0967, "step": 168784 }, { "epoch": 14.565217391304348, "grad_norm": 0.6823988556861877, "learning_rate": 0.001, "loss": 2.0987, "step": 168840 }, { "epoch": 14.570048309178745, "grad_norm": 1.7376644611358643, "learning_rate": 0.001, "loss": 2.1137, "step": 168896 }, { "epoch": 14.57487922705314, "grad_norm": 1.6851379871368408, "learning_rate": 0.001, "loss": 2.1016, "step": 168952 }, { "epoch": 14.579710144927537, "grad_norm": 1.1215546131134033, "learning_rate": 0.001, "loss": 2.1057, "step": 169008 }, { "epoch": 14.584541062801932, "grad_norm": 1.5103774070739746, "learning_rate": 0.001, "loss": 2.108, "step": 169064 }, { "epoch": 14.58937198067633, "grad_norm": 2.852341890335083, "learning_rate": 0.001, "loss": 2.1345, "step": 169120 }, { "epoch": 14.594202898550725, "grad_norm": 1.6364822387695312, "learning_rate": 0.001, "loss": 2.1329, "step": 169176 }, { "epoch": 14.59903381642512, "grad_norm": 3.4666504859924316, "learning_rate": 0.001, "loss": 2.1331, "step": 169232 }, { "epoch": 14.603864734299517, "grad_norm": 4.621094226837158, "learning_rate": 0.001, "loss": 2.1303, "step": 169288 }, { "epoch": 14.608695652173914, "grad_norm": 2.3316121101379395, "learning_rate": 0.001, "loss": 2.1165, "step": 169344 }, { "epoch": 14.61352657004831, "grad_norm": 2.956366539001465, "learning_rate": 0.001, "loss": 2.1103, "step": 169400 }, { "epoch": 14.618357487922705, "grad_norm": 1.5519510507583618, "learning_rate": 0.001, "loss": 2.1078, "step": 169456 }, { "epoch": 14.623188405797102, "grad_norm": 1.516550064086914, "learning_rate": 0.001, "loss": 2.1029, "step": 169512 }, { "epoch": 14.628019323671497, "grad_norm": 4.001878261566162, "learning_rate": 0.001, "loss": 2.0955, "step": 169568 }, { "epoch": 14.632850241545894, "grad_norm": 1.6552058458328247, "learning_rate": 0.001, "loss": 2.097, "step": 169624 }, { "epoch": 14.63768115942029, "grad_norm": 1.3740285634994507, "learning_rate": 0.001, "loss": 2.0998, "step": 169680 }, { "epoch": 14.642512077294686, "grad_norm": 0.5937606692314148, "learning_rate": 0.001, "loss": 2.0974, "step": 169736 }, { "epoch": 14.647342995169081, "grad_norm": 0.4707351624965668, "learning_rate": 0.001, "loss": 2.1263, "step": 169792 }, { "epoch": 14.652173913043478, "grad_norm": 0.47917288541793823, "learning_rate": 0.001, "loss": 2.1024, "step": 169848 }, { "epoch": 14.657004830917874, "grad_norm": 0.6686074137687683, "learning_rate": 0.001, "loss": 2.0939, "step": 169904 }, { "epoch": 14.66183574879227, "grad_norm": 0.9329943060874939, "learning_rate": 0.001, "loss": 2.089, "step": 169960 }, { "epoch": 14.666666666666666, "grad_norm": 1.7489795684814453, "learning_rate": 0.001, "loss": 2.0847, "step": 170016 }, { "epoch": 14.671497584541063, "grad_norm": 0.6932048201560974, "learning_rate": 0.001, "loss": 2.085, "step": 170072 }, { "epoch": 14.676328502415458, "grad_norm": 0.7575638294219971, "learning_rate": 0.001, "loss": 2.0761, "step": 170128 }, { "epoch": 14.681159420289855, "grad_norm": 0.5367351770401001, "learning_rate": 0.001, "loss": 2.0911, "step": 170184 }, { "epoch": 14.68599033816425, "grad_norm": 0.5035954713821411, "learning_rate": 0.001, "loss": 2.0885, "step": 170240 }, { "epoch": 14.690821256038648, "grad_norm": 0.5958396196365356, "learning_rate": 0.001, "loss": 2.0645, "step": 170296 }, { "epoch": 14.695652173913043, "grad_norm": 0.5565921068191528, "learning_rate": 0.001, "loss": 2.0695, "step": 170352 }, { "epoch": 14.70048309178744, "grad_norm": 0.8306350708007812, "learning_rate": 0.001, "loss": 2.0716, "step": 170408 }, { "epoch": 14.705314009661835, "grad_norm": 0.5590113997459412, "learning_rate": 0.001, "loss": 2.0656, "step": 170464 }, { "epoch": 14.710144927536232, "grad_norm": 0.9812882542610168, "learning_rate": 0.001, "loss": 2.0739, "step": 170520 }, { "epoch": 14.714975845410628, "grad_norm": 1.226194977760315, "learning_rate": 0.001, "loss": 2.0671, "step": 170576 }, { "epoch": 14.719806763285025, "grad_norm": 1.9902857542037964, "learning_rate": 0.001, "loss": 2.0687, "step": 170632 }, { "epoch": 14.72463768115942, "grad_norm": 2.0700135231018066, "learning_rate": 0.001, "loss": 2.1006, "step": 170688 }, { "epoch": 14.729468599033817, "grad_norm": 0.551053524017334, "learning_rate": 0.001, "loss": 2.0866, "step": 170744 }, { "epoch": 14.734299516908212, "grad_norm": 0.5387642979621887, "learning_rate": 0.001, "loss": 2.0741, "step": 170800 }, { "epoch": 14.73913043478261, "grad_norm": 0.39276209473609924, "learning_rate": 0.001, "loss": 2.0791, "step": 170856 }, { "epoch": 14.743961352657005, "grad_norm": 6.496521949768066, "learning_rate": 0.001, "loss": 2.0797, "step": 170912 }, { "epoch": 14.748792270531402, "grad_norm": 0.7262991070747375, "learning_rate": 0.001, "loss": 2.0883, "step": 170968 }, { "epoch": 14.753623188405797, "grad_norm": 3.1189754009246826, "learning_rate": 0.001, "loss": 2.0849, "step": 171024 }, { "epoch": 14.758454106280194, "grad_norm": 0.6200608015060425, "learning_rate": 0.001, "loss": 2.0812, "step": 171080 }, { "epoch": 14.76328502415459, "grad_norm": 0.5921116471290588, "learning_rate": 0.001, "loss": 2.0924, "step": 171136 }, { "epoch": 14.768115942028986, "grad_norm": 3.781170606613159, "learning_rate": 0.001, "loss": 2.0938, "step": 171192 }, { "epoch": 14.772946859903382, "grad_norm": 0.4791521430015564, "learning_rate": 0.001, "loss": 2.0757, "step": 171248 }, { "epoch": 14.777777777777779, "grad_norm": 0.5061606168746948, "learning_rate": 0.001, "loss": 2.0835, "step": 171304 }, { "epoch": 14.782608695652174, "grad_norm": 0.9491889476776123, "learning_rate": 0.001, "loss": 2.0846, "step": 171360 }, { "epoch": 14.78743961352657, "grad_norm": 0.35187625885009766, "learning_rate": 0.001, "loss": 2.0745, "step": 171416 }, { "epoch": 14.792270531400966, "grad_norm": 1.5387614965438843, "learning_rate": 0.001, "loss": 2.0767, "step": 171472 }, { "epoch": 14.797101449275363, "grad_norm": 0.6680706739425659, "learning_rate": 0.001, "loss": 2.0822, "step": 171528 }, { "epoch": 14.801932367149758, "grad_norm": 0.6752028465270996, "learning_rate": 0.001, "loss": 2.0871, "step": 171584 }, { "epoch": 14.806763285024154, "grad_norm": 0.6330510973930359, "learning_rate": 0.001, "loss": 2.0802, "step": 171640 }, { "epoch": 14.81159420289855, "grad_norm": 0.5179868340492249, "learning_rate": 0.001, "loss": 2.0696, "step": 171696 }, { "epoch": 14.816425120772946, "grad_norm": 0.5982415080070496, "learning_rate": 0.001, "loss": 2.0885, "step": 171752 }, { "epoch": 14.821256038647343, "grad_norm": 0.7442541122436523, "learning_rate": 0.001, "loss": 2.0844, "step": 171808 }, { "epoch": 14.826086956521738, "grad_norm": 0.8595708012580872, "learning_rate": 0.001, "loss": 2.0659, "step": 171864 }, { "epoch": 14.830917874396135, "grad_norm": 1.9857449531555176, "learning_rate": 0.001, "loss": 2.0718, "step": 171920 }, { "epoch": 14.83574879227053, "grad_norm": 0.7514676451683044, "learning_rate": 0.001, "loss": 2.0702, "step": 171976 }, { "epoch": 14.840579710144928, "grad_norm": 2.298125982284546, "learning_rate": 0.001, "loss": 2.0668, "step": 172032 }, { "epoch": 14.845410628019323, "grad_norm": 2.7248640060424805, "learning_rate": 0.001, "loss": 2.0513, "step": 172088 }, { "epoch": 14.85024154589372, "grad_norm": 0.5040480494499207, "learning_rate": 0.001, "loss": 2.0721, "step": 172144 }, { "epoch": 14.855072463768115, "grad_norm": 1.8116939067840576, "learning_rate": 0.001, "loss": 2.0494, "step": 172200 }, { "epoch": 14.859903381642512, "grad_norm": 2.097075939178467, "learning_rate": 0.001, "loss": 2.0619, "step": 172256 }, { "epoch": 14.864734299516908, "grad_norm": 1.0462239980697632, "learning_rate": 0.001, "loss": 2.0635, "step": 172312 }, { "epoch": 14.869565217391305, "grad_norm": 1.0059878826141357, "learning_rate": 0.001, "loss": 2.0747, "step": 172368 }, { "epoch": 14.8743961352657, "grad_norm": 0.5482062101364136, "learning_rate": 0.001, "loss": 2.0837, "step": 172424 }, { "epoch": 14.879227053140097, "grad_norm": 0.5053689479827881, "learning_rate": 0.001, "loss": 2.0878, "step": 172480 }, { "epoch": 14.884057971014492, "grad_norm": 0.6957647800445557, "learning_rate": 0.001, "loss": 2.0968, "step": 172536 }, { "epoch": 14.88888888888889, "grad_norm": 1.373155951499939, "learning_rate": 0.001, "loss": 2.0965, "step": 172592 }, { "epoch": 14.893719806763285, "grad_norm": 1.6637959480285645, "learning_rate": 0.001, "loss": 2.0849, "step": 172648 }, { "epoch": 14.898550724637682, "grad_norm": 0.7371339201927185, "learning_rate": 0.001, "loss": 2.0723, "step": 172704 }, { "epoch": 14.903381642512077, "grad_norm": 2.699892044067383, "learning_rate": 0.001, "loss": 2.0744, "step": 172760 }, { "epoch": 14.908212560386474, "grad_norm": 2.290555477142334, "learning_rate": 0.001, "loss": 2.0834, "step": 172816 }, { "epoch": 14.91304347826087, "grad_norm": 3.486701250076294, "learning_rate": 0.001, "loss": 2.11, "step": 172872 }, { "epoch": 14.917874396135266, "grad_norm": 2.754394292831421, "learning_rate": 0.001, "loss": 2.1203, "step": 172928 }, { "epoch": 14.922705314009661, "grad_norm": 2.6272425651550293, "learning_rate": 0.001, "loss": 2.1018, "step": 172984 }, { "epoch": 14.927536231884059, "grad_norm": 1.0866725444793701, "learning_rate": 0.001, "loss": 2.0964, "step": 173040 }, { "epoch": 14.932367149758454, "grad_norm": 0.8815363049507141, "learning_rate": 0.001, "loss": 2.123, "step": 173096 }, { "epoch": 14.93719806763285, "grad_norm": 2.0062432289123535, "learning_rate": 0.001, "loss": 2.1019, "step": 173152 }, { "epoch": 14.942028985507246, "grad_norm": 1.198691725730896, "learning_rate": 0.001, "loss": 2.0836, "step": 173208 }, { "epoch": 14.946859903381643, "grad_norm": 2.507596492767334, "learning_rate": 0.001, "loss": 2.0808, "step": 173264 }, { "epoch": 14.951690821256038, "grad_norm": 0.8321933150291443, "learning_rate": 0.001, "loss": 2.093, "step": 173320 }, { "epoch": 14.956521739130435, "grad_norm": 1.0588809251785278, "learning_rate": 0.001, "loss": 2.1236, "step": 173376 }, { "epoch": 14.96135265700483, "grad_norm": 0.8044658303260803, "learning_rate": 0.001, "loss": 2.1221, "step": 173432 }, { "epoch": 14.966183574879228, "grad_norm": 1.4201046228408813, "learning_rate": 0.001, "loss": 2.0964, "step": 173488 }, { "epoch": 14.971014492753623, "grad_norm": 0.7640172243118286, "learning_rate": 0.001, "loss": 2.0873, "step": 173544 }, { "epoch": 14.97584541062802, "grad_norm": 4.29083776473999, "learning_rate": 0.001, "loss": 2.0898, "step": 173600 }, { "epoch": 14.980676328502415, "grad_norm": 1.1134473085403442, "learning_rate": 0.001, "loss": 2.086, "step": 173656 }, { "epoch": 14.985507246376812, "grad_norm": 1.2478002309799194, "learning_rate": 0.001, "loss": 2.0936, "step": 173712 }, { "epoch": 14.990338164251208, "grad_norm": 1.1813775300979614, "learning_rate": 0.001, "loss": 2.0994, "step": 173768 }, { "epoch": 14.995169082125603, "grad_norm": 2.0440359115600586, "learning_rate": 0.001, "loss": 2.1031, "step": 173824 }, { "epoch": 15.0, "grad_norm": 1.2008017301559448, "learning_rate": 0.001, "loss": 2.1042, "step": 173880 }, { "epoch": 15.004830917874395, "grad_norm": 2.8801169395446777, "learning_rate": 0.001, "loss": 2.0839, "step": 173936 }, { "epoch": 15.009661835748792, "grad_norm": 2.73030161857605, "learning_rate": 0.001, "loss": 2.083, "step": 173992 }, { "epoch": 15.014492753623188, "grad_norm": 15.803240776062012, "learning_rate": 0.001, "loss": 2.0829, "step": 174048 }, { "epoch": 15.019323671497585, "grad_norm": 0.7853598594665527, "learning_rate": 0.001, "loss": 2.0704, "step": 174104 }, { "epoch": 15.02415458937198, "grad_norm": 0.692227303981781, "learning_rate": 0.001, "loss": 2.0774, "step": 174160 }, { "epoch": 15.028985507246377, "grad_norm": 1.200893759727478, "learning_rate": 0.001, "loss": 2.0658, "step": 174216 }, { "epoch": 15.033816425120772, "grad_norm": 2.0408151149749756, "learning_rate": 0.001, "loss": 2.0656, "step": 174272 }, { "epoch": 15.03864734299517, "grad_norm": 1.1072137355804443, "learning_rate": 0.001, "loss": 2.0707, "step": 174328 }, { "epoch": 15.043478260869565, "grad_norm": 3.77675724029541, "learning_rate": 0.001, "loss": 2.0623, "step": 174384 }, { "epoch": 15.048309178743962, "grad_norm": 2.0611681938171387, "learning_rate": 0.001, "loss": 2.0503, "step": 174440 }, { "epoch": 15.053140096618357, "grad_norm": 1.0527141094207764, "learning_rate": 0.001, "loss": 2.0488, "step": 174496 }, { "epoch": 15.057971014492754, "grad_norm": 2.0727038383483887, "learning_rate": 0.001, "loss": 2.0519, "step": 174552 }, { "epoch": 15.06280193236715, "grad_norm": 7.793724536895752, "learning_rate": 0.001, "loss": 2.0593, "step": 174608 }, { "epoch": 15.067632850241546, "grad_norm": 1.8213049173355103, "learning_rate": 0.001, "loss": 2.0536, "step": 174664 }, { "epoch": 15.072463768115941, "grad_norm": 2.0224125385284424, "learning_rate": 0.001, "loss": 2.0852, "step": 174720 }, { "epoch": 15.077294685990339, "grad_norm": 0.8237075805664062, "learning_rate": 0.001, "loss": 2.0908, "step": 174776 }, { "epoch": 15.082125603864734, "grad_norm": 1.308464527130127, "learning_rate": 0.001, "loss": 2.0881, "step": 174832 }, { "epoch": 15.08695652173913, "grad_norm": 0.875292181968689, "learning_rate": 0.001, "loss": 2.0748, "step": 174888 }, { "epoch": 15.091787439613526, "grad_norm": 2.6404037475585938, "learning_rate": 0.001, "loss": 2.0831, "step": 174944 }, { "epoch": 15.096618357487923, "grad_norm": 2.686084032058716, "learning_rate": 0.001, "loss": 2.0669, "step": 175000 }, { "epoch": 15.101449275362318, "grad_norm": 6.5948100090026855, "learning_rate": 0.001, "loss": 2.0577, "step": 175056 }, { "epoch": 15.106280193236715, "grad_norm": 0.43562135100364685, "learning_rate": 0.001, "loss": 2.0688, "step": 175112 }, { "epoch": 15.11111111111111, "grad_norm": 0.8576856255531311, "learning_rate": 0.001, "loss": 2.0677, "step": 175168 }, { "epoch": 15.115942028985508, "grad_norm": 1.9057815074920654, "learning_rate": 0.001, "loss": 2.0662, "step": 175224 }, { "epoch": 15.120772946859903, "grad_norm": 0.6675427556037903, "learning_rate": 0.001, "loss": 2.0771, "step": 175280 }, { "epoch": 15.1256038647343, "grad_norm": 2.9588990211486816, "learning_rate": 0.001, "loss": 2.077, "step": 175336 }, { "epoch": 15.130434782608695, "grad_norm": 3.4082581996917725, "learning_rate": 0.001, "loss": 2.0581, "step": 175392 }, { "epoch": 15.135265700483092, "grad_norm": 0.7630171179771423, "learning_rate": 0.001, "loss": 2.0753, "step": 175448 }, { "epoch": 15.140096618357488, "grad_norm": 26.633541107177734, "learning_rate": 0.001, "loss": 2.0917, "step": 175504 }, { "epoch": 15.144927536231885, "grad_norm": 2.04732084274292, "learning_rate": 0.001, "loss": 2.0741, "step": 175560 }, { "epoch": 15.14975845410628, "grad_norm": 1.5380600690841675, "learning_rate": 0.001, "loss": 2.0726, "step": 175616 }, { "epoch": 15.154589371980677, "grad_norm": 1.9612212181091309, "learning_rate": 0.001, "loss": 2.0644, "step": 175672 }, { "epoch": 15.159420289855072, "grad_norm": 1.4751856327056885, "learning_rate": 0.001, "loss": 2.067, "step": 175728 }, { "epoch": 15.16425120772947, "grad_norm": 1.3604212999343872, "learning_rate": 0.001, "loss": 2.0591, "step": 175784 }, { "epoch": 15.169082125603865, "grad_norm": 0.4815526604652405, "learning_rate": 0.001, "loss": 2.0588, "step": 175840 }, { "epoch": 15.173913043478262, "grad_norm": 0.9640748500823975, "learning_rate": 0.001, "loss": 2.0745, "step": 175896 }, { "epoch": 15.178743961352657, "grad_norm": 1.3795249462127686, "learning_rate": 0.001, "loss": 2.0829, "step": 175952 }, { "epoch": 15.183574879227054, "grad_norm": 0.7823031544685364, "learning_rate": 0.001, "loss": 2.0783, "step": 176008 }, { "epoch": 15.18840579710145, "grad_norm": 0.3591630458831787, "learning_rate": 0.001, "loss": 2.0659, "step": 176064 }, { "epoch": 15.193236714975846, "grad_norm": 1.758555293083191, "learning_rate": 0.001, "loss": 2.0659, "step": 176120 }, { "epoch": 15.198067632850242, "grad_norm": 0.614258885383606, "learning_rate": 0.001, "loss": 2.0427, "step": 176176 }, { "epoch": 15.202898550724637, "grad_norm": 0.8358861804008484, "learning_rate": 0.001, "loss": 2.0504, "step": 176232 }, { "epoch": 15.207729468599034, "grad_norm": 4.243839740753174, "learning_rate": 0.001, "loss": 2.0703, "step": 176288 }, { "epoch": 15.21256038647343, "grad_norm": 3.7503905296325684, "learning_rate": 0.001, "loss": 2.0897, "step": 176344 }, { "epoch": 15.217391304347826, "grad_norm": 1.2701398134231567, "learning_rate": 0.001, "loss": 2.0589, "step": 176400 }, { "epoch": 15.222222222222221, "grad_norm": 1.6691104173660278, "learning_rate": 0.001, "loss": 2.0652, "step": 176456 }, { "epoch": 15.227053140096618, "grad_norm": 0.6347532272338867, "learning_rate": 0.001, "loss": 2.0831, "step": 176512 }, { "epoch": 15.231884057971014, "grad_norm": 1.848062515258789, "learning_rate": 0.001, "loss": 2.0849, "step": 176568 }, { "epoch": 15.23671497584541, "grad_norm": 2.6668789386749268, "learning_rate": 0.001, "loss": 2.0843, "step": 176624 }, { "epoch": 15.241545893719806, "grad_norm": 0.9065186381340027, "learning_rate": 0.001, "loss": 2.0665, "step": 176680 }, { "epoch": 15.246376811594203, "grad_norm": 0.6160411834716797, "learning_rate": 0.001, "loss": 2.0749, "step": 176736 }, { "epoch": 15.251207729468598, "grad_norm": 0.4809981882572174, "learning_rate": 0.001, "loss": 2.0729, "step": 176792 }, { "epoch": 15.256038647342995, "grad_norm": 0.49896150827407837, "learning_rate": 0.001, "loss": 2.074, "step": 176848 }, { "epoch": 15.26086956521739, "grad_norm": 1.1309893131256104, "learning_rate": 0.001, "loss": 2.0759, "step": 176904 }, { "epoch": 15.265700483091788, "grad_norm": 0.8129328489303589, "learning_rate": 0.001, "loss": 2.0633, "step": 176960 }, { "epoch": 15.270531400966183, "grad_norm": 1.58247709274292, "learning_rate": 0.001, "loss": 2.0658, "step": 177016 }, { "epoch": 15.27536231884058, "grad_norm": 2.141803503036499, "learning_rate": 0.001, "loss": 2.0629, "step": 177072 }, { "epoch": 15.280193236714975, "grad_norm": 0.7735856771469116, "learning_rate": 0.001, "loss": 2.066, "step": 177128 }, { "epoch": 15.285024154589372, "grad_norm": 1.1034777164459229, "learning_rate": 0.001, "loss": 2.0656, "step": 177184 }, { "epoch": 15.289855072463768, "grad_norm": 0.8623336553573608, "learning_rate": 0.001, "loss": 2.0688, "step": 177240 }, { "epoch": 15.294685990338165, "grad_norm": 1.3948649168014526, "learning_rate": 0.001, "loss": 2.0788, "step": 177296 }, { "epoch": 15.29951690821256, "grad_norm": 2.8162314891815186, "learning_rate": 0.001, "loss": 2.0871, "step": 177352 }, { "epoch": 15.304347826086957, "grad_norm": 0.8825968503952026, "learning_rate": 0.001, "loss": 2.0728, "step": 177408 }, { "epoch": 15.309178743961352, "grad_norm": 2.773601531982422, "learning_rate": 0.001, "loss": 2.0977, "step": 177464 }, { "epoch": 15.31400966183575, "grad_norm": 0.6833995580673218, "learning_rate": 0.001, "loss": 2.0845, "step": 177520 }, { "epoch": 15.318840579710145, "grad_norm": 1.07160222530365, "learning_rate": 0.001, "loss": 2.0751, "step": 177576 }, { "epoch": 15.323671497584542, "grad_norm": 4.794754505157471, "learning_rate": 0.001, "loss": 2.0675, "step": 177632 }, { "epoch": 15.328502415458937, "grad_norm": 0.7153533101081848, "learning_rate": 0.001, "loss": 2.0563, "step": 177688 }, { "epoch": 15.333333333333334, "grad_norm": 2.763734817504883, "learning_rate": 0.001, "loss": 2.0641, "step": 177744 }, { "epoch": 15.33816425120773, "grad_norm": 2.5734941959381104, "learning_rate": 0.001, "loss": 2.0519, "step": 177800 }, { "epoch": 15.342995169082126, "grad_norm": 1.5281262397766113, "learning_rate": 0.001, "loss": 2.0511, "step": 177856 }, { "epoch": 15.347826086956522, "grad_norm": 3.705000400543213, "learning_rate": 0.001, "loss": 2.056, "step": 177912 }, { "epoch": 15.352657004830919, "grad_norm": 0.9009491205215454, "learning_rate": 0.001, "loss": 2.0577, "step": 177968 }, { "epoch": 15.357487922705314, "grad_norm": 1.4239006042480469, "learning_rate": 0.001, "loss": 2.0628, "step": 178024 }, { "epoch": 15.36231884057971, "grad_norm": 0.815852701663971, "learning_rate": 0.001, "loss": 2.0767, "step": 178080 }, { "epoch": 15.367149758454106, "grad_norm": 1.8801230192184448, "learning_rate": 0.001, "loss": 2.0707, "step": 178136 }, { "epoch": 15.371980676328503, "grad_norm": 1.3395931720733643, "learning_rate": 0.001, "loss": 2.0628, "step": 178192 }, { "epoch": 15.376811594202898, "grad_norm": 0.7946965098381042, "learning_rate": 0.001, "loss": 2.0886, "step": 178248 }, { "epoch": 15.381642512077295, "grad_norm": 4.0611443519592285, "learning_rate": 0.001, "loss": 2.0826, "step": 178304 }, { "epoch": 15.38647342995169, "grad_norm": 13.194400787353516, "learning_rate": 0.001, "loss": 2.0809, "step": 178360 }, { "epoch": 15.391304347826088, "grad_norm": 1.3880815505981445, "learning_rate": 0.001, "loss": 2.0741, "step": 178416 }, { "epoch": 15.396135265700483, "grad_norm": 0.47312793135643005, "learning_rate": 0.001, "loss": 2.0665, "step": 178472 }, { "epoch": 15.40096618357488, "grad_norm": 1.9929115772247314, "learning_rate": 0.001, "loss": 2.077, "step": 178528 }, { "epoch": 15.405797101449275, "grad_norm": 12.217371940612793, "learning_rate": 0.001, "loss": 2.0614, "step": 178584 }, { "epoch": 15.41062801932367, "grad_norm": 1.0362637042999268, "learning_rate": 0.001, "loss": 2.0675, "step": 178640 }, { "epoch": 15.415458937198068, "grad_norm": 1.1414883136749268, "learning_rate": 0.001, "loss": 2.0654, "step": 178696 }, { "epoch": 15.420289855072463, "grad_norm": 1.9589189291000366, "learning_rate": 0.001, "loss": 2.0667, "step": 178752 }, { "epoch": 15.42512077294686, "grad_norm": 1.7324857711791992, "learning_rate": 0.001, "loss": 2.0746, "step": 178808 }, { "epoch": 15.429951690821255, "grad_norm": 0.7011397480964661, "learning_rate": 0.001, "loss": 2.0658, "step": 178864 }, { "epoch": 15.434782608695652, "grad_norm": 10.742341041564941, "learning_rate": 0.001, "loss": 2.0747, "step": 178920 }, { "epoch": 15.439613526570048, "grad_norm": 0.4875578284263611, "learning_rate": 0.001, "loss": 2.0845, "step": 178976 }, { "epoch": 15.444444444444445, "grad_norm": 11.339142799377441, "learning_rate": 0.001, "loss": 2.0809, "step": 179032 }, { "epoch": 15.44927536231884, "grad_norm": 1.1817768812179565, "learning_rate": 0.001, "loss": 2.0765, "step": 179088 }, { "epoch": 15.454106280193237, "grad_norm": 0.7472344636917114, "learning_rate": 0.001, "loss": 2.0891, "step": 179144 }, { "epoch": 15.458937198067632, "grad_norm": 0.4393731653690338, "learning_rate": 0.001, "loss": 2.0817, "step": 179200 }, { "epoch": 15.46376811594203, "grad_norm": 0.7980862259864807, "learning_rate": 0.001, "loss": 2.0828, "step": 179256 }, { "epoch": 15.468599033816425, "grad_norm": 0.6689130663871765, "learning_rate": 0.001, "loss": 2.0651, "step": 179312 }, { "epoch": 15.473429951690822, "grad_norm": 0.6272009611129761, "learning_rate": 0.001, "loss": 2.0844, "step": 179368 }, { "epoch": 15.478260869565217, "grad_norm": 0.5187233090400696, "learning_rate": 0.001, "loss": 2.0872, "step": 179424 }, { "epoch": 15.483091787439614, "grad_norm": 12.676966667175293, "learning_rate": 0.001, "loss": 2.0807, "step": 179480 }, { "epoch": 15.48792270531401, "grad_norm": 6.730846881866455, "learning_rate": 0.001, "loss": 2.0866, "step": 179536 }, { "epoch": 15.492753623188406, "grad_norm": 0.6037222743034363, "learning_rate": 0.001, "loss": 2.0849, "step": 179592 }, { "epoch": 15.497584541062801, "grad_norm": 2.9655251502990723, "learning_rate": 0.001, "loss": 2.0826, "step": 179648 }, { "epoch": 15.502415458937199, "grad_norm": 3.5572848320007324, "learning_rate": 0.001, "loss": 2.0958, "step": 179704 }, { "epoch": 15.507246376811594, "grad_norm": 0.6195342540740967, "learning_rate": 0.001, "loss": 2.098, "step": 179760 }, { "epoch": 15.51207729468599, "grad_norm": 1.8525114059448242, "learning_rate": 0.001, "loss": 2.0848, "step": 179816 }, { "epoch": 15.516908212560386, "grad_norm": 3.5596747398376465, "learning_rate": 0.001, "loss": 2.0785, "step": 179872 }, { "epoch": 15.521739130434783, "grad_norm": 2.71140456199646, "learning_rate": 0.001, "loss": 2.0815, "step": 179928 }, { "epoch": 15.526570048309178, "grad_norm": 4.051966190338135, "learning_rate": 0.001, "loss": 2.0966, "step": 179984 }, { "epoch": 15.531400966183575, "grad_norm": 1.5408616065979004, "learning_rate": 0.001, "loss": 2.1054, "step": 180040 }, { "epoch": 15.53623188405797, "grad_norm": 2.197767734527588, "learning_rate": 0.001, "loss": 2.1243, "step": 180096 }, { "epoch": 15.541062801932368, "grad_norm": 1.8272418975830078, "learning_rate": 0.001, "loss": 2.1012, "step": 180152 }, { "epoch": 15.545893719806763, "grad_norm": 0.5757448077201843, "learning_rate": 0.001, "loss": 2.1124, "step": 180208 }, { "epoch": 15.55072463768116, "grad_norm": 1.2812471389770508, "learning_rate": 0.001, "loss": 2.1236, "step": 180264 }, { "epoch": 15.555555555555555, "grad_norm": 0.48313093185424805, "learning_rate": 0.001, "loss": 2.106, "step": 180320 }, { "epoch": 15.560386473429952, "grad_norm": 0.6699447631835938, "learning_rate": 0.001, "loss": 2.0914, "step": 180376 }, { "epoch": 15.565217391304348, "grad_norm": 0.8164467811584473, "learning_rate": 0.001, "loss": 2.0871, "step": 180432 }, { "epoch": 15.570048309178745, "grad_norm": 0.6131106615066528, "learning_rate": 0.001, "loss": 2.0856, "step": 180488 }, { "epoch": 15.57487922705314, "grad_norm": 0.964442789554596, "learning_rate": 0.001, "loss": 2.076, "step": 180544 }, { "epoch": 15.579710144927537, "grad_norm": 1.0887730121612549, "learning_rate": 0.001, "loss": 2.0736, "step": 180600 }, { "epoch": 15.584541062801932, "grad_norm": 1.0917245149612427, "learning_rate": 0.001, "loss": 2.0707, "step": 180656 }, { "epoch": 15.58937198067633, "grad_norm": 0.9515587687492371, "learning_rate": 0.001, "loss": 2.0601, "step": 180712 }, { "epoch": 15.594202898550725, "grad_norm": 0.5061343908309937, "learning_rate": 0.001, "loss": 2.0612, "step": 180768 }, { "epoch": 15.59903381642512, "grad_norm": 0.8528938889503479, "learning_rate": 0.001, "loss": 2.0606, "step": 180824 }, { "epoch": 15.603864734299517, "grad_norm": 0.5530614256858826, "learning_rate": 0.001, "loss": 2.0586, "step": 180880 }, { "epoch": 15.608695652173914, "grad_norm": 0.5599806904792786, "learning_rate": 0.001, "loss": 2.0673, "step": 180936 }, { "epoch": 15.61352657004831, "grad_norm": 8.718918800354004, "learning_rate": 0.001, "loss": 2.0261, "step": 180992 }, { "epoch": 15.618357487922705, "grad_norm": 5.93067741394043, "learning_rate": 0.001, "loss": 2.0332, "step": 181048 }, { "epoch": 15.623188405797102, "grad_norm": 0.3749951422214508, "learning_rate": 0.001, "loss": 2.0435, "step": 181104 }, { "epoch": 15.628019323671497, "grad_norm": 2.1972835063934326, "learning_rate": 0.001, "loss": 2.0358, "step": 181160 }, { "epoch": 15.632850241545894, "grad_norm": 0.5415869951248169, "learning_rate": 0.001, "loss": 2.0356, "step": 181216 }, { "epoch": 15.63768115942029, "grad_norm": 3.1783981323242188, "learning_rate": 0.001, "loss": 2.0236, "step": 181272 }, { "epoch": 15.642512077294686, "grad_norm": 5.332545280456543, "learning_rate": 0.001, "loss": 2.0392, "step": 181328 }, { "epoch": 15.647342995169081, "grad_norm": 1.6674243211746216, "learning_rate": 0.001, "loss": 2.0346, "step": 181384 }, { "epoch": 15.652173913043478, "grad_norm": 1.740112066268921, "learning_rate": 0.001, "loss": 2.029, "step": 181440 }, { "epoch": 15.657004830917874, "grad_norm": 1.439554214477539, "learning_rate": 0.001, "loss": 2.0336, "step": 181496 }, { "epoch": 15.66183574879227, "grad_norm": 2.1376547813415527, "learning_rate": 0.001, "loss": 2.0321, "step": 181552 }, { "epoch": 15.666666666666666, "grad_norm": 1.2525209188461304, "learning_rate": 0.001, "loss": 2.0593, "step": 181608 }, { "epoch": 15.671497584541063, "grad_norm": 3.7702012062072754, "learning_rate": 0.001, "loss": 2.0545, "step": 181664 }, { "epoch": 15.676328502415458, "grad_norm": 2.050967216491699, "learning_rate": 0.001, "loss": 2.05, "step": 181720 }, { "epoch": 15.681159420289855, "grad_norm": 0.9646165370941162, "learning_rate": 0.001, "loss": 2.0443, "step": 181776 }, { "epoch": 15.68599033816425, "grad_norm": 2.4290432929992676, "learning_rate": 0.001, "loss": 2.0561, "step": 181832 }, { "epoch": 15.690821256038648, "grad_norm": 1.2598652839660645, "learning_rate": 0.001, "loss": 2.0527, "step": 181888 }, { "epoch": 15.695652173913043, "grad_norm": 2.173372983932495, "learning_rate": 0.001, "loss": 2.0657, "step": 181944 }, { "epoch": 15.70048309178744, "grad_norm": 1.6048988103866577, "learning_rate": 0.001, "loss": 2.064, "step": 182000 }, { "epoch": 15.705314009661835, "grad_norm": 81.71548461914062, "learning_rate": 0.001, "loss": 2.0626, "step": 182056 }, { "epoch": 15.710144927536232, "grad_norm": 7.592867851257324, "learning_rate": 0.001, "loss": 2.0513, "step": 182112 }, { "epoch": 15.714975845410628, "grad_norm": 2.293421506881714, "learning_rate": 0.001, "loss": 2.0621, "step": 182168 }, { "epoch": 15.719806763285025, "grad_norm": 3.173964262008667, "learning_rate": 0.001, "loss": 2.0565, "step": 182224 }, { "epoch": 15.72463768115942, "grad_norm": 2.251840114593506, "learning_rate": 0.001, "loss": 2.0606, "step": 182280 }, { "epoch": 15.729468599033817, "grad_norm": 2.4754340648651123, "learning_rate": 0.001, "loss": 2.0664, "step": 182336 }, { "epoch": 15.734299516908212, "grad_norm": 1.4506685733795166, "learning_rate": 0.001, "loss": 2.0561, "step": 182392 }, { "epoch": 15.73913043478261, "grad_norm": 1.3857123851776123, "learning_rate": 0.001, "loss": 2.0484, "step": 182448 }, { "epoch": 15.743961352657005, "grad_norm": 2.828580141067505, "learning_rate": 0.001, "loss": 2.062, "step": 182504 }, { "epoch": 15.748792270531402, "grad_norm": 2.3600261211395264, "learning_rate": 0.001, "loss": 2.0553, "step": 182560 }, { "epoch": 15.753623188405797, "grad_norm": 3.6167361736297607, "learning_rate": 0.001, "loss": 2.0503, "step": 182616 }, { "epoch": 15.758454106280194, "grad_norm": 1.123544692993164, "learning_rate": 0.001, "loss": 2.0508, "step": 182672 }, { "epoch": 15.76328502415459, "grad_norm": 26.867191314697266, "learning_rate": 0.001, "loss": 2.042, "step": 182728 }, { "epoch": 15.768115942028986, "grad_norm": 2.123518228530884, "learning_rate": 0.001, "loss": 2.0407, "step": 182784 }, { "epoch": 15.772946859903382, "grad_norm": 0.5891660451889038, "learning_rate": 0.001, "loss": 2.0321, "step": 182840 }, { "epoch": 15.777777777777779, "grad_norm": 2.4394891262054443, "learning_rate": 0.001, "loss": 2.0383, "step": 182896 }, { "epoch": 15.782608695652174, "grad_norm": 90.72672271728516, "learning_rate": 0.001, "loss": 2.0539, "step": 182952 }, { "epoch": 15.78743961352657, "grad_norm": 0.6709445118904114, "learning_rate": 0.001, "loss": 2.0503, "step": 183008 }, { "epoch": 15.792270531400966, "grad_norm": 0.8341113924980164, "learning_rate": 0.001, "loss": 2.0376, "step": 183064 }, { "epoch": 15.797101449275363, "grad_norm": 0.7178948521614075, "learning_rate": 0.001, "loss": 2.0439, "step": 183120 }, { "epoch": 15.801932367149758, "grad_norm": 1.4892902374267578, "learning_rate": 0.001, "loss": 2.0617, "step": 183176 }, { "epoch": 15.806763285024154, "grad_norm": 1.0534706115722656, "learning_rate": 0.001, "loss": 2.0899, "step": 183232 }, { "epoch": 15.81159420289855, "grad_norm": 9.325037002563477, "learning_rate": 0.001, "loss": 2.0697, "step": 183288 }, { "epoch": 15.816425120772946, "grad_norm": 2.459322929382324, "learning_rate": 0.001, "loss": 2.0634, "step": 183344 }, { "epoch": 15.821256038647343, "grad_norm": 0.5397865772247314, "learning_rate": 0.001, "loss": 2.0555, "step": 183400 }, { "epoch": 15.826086956521738, "grad_norm": 4.8079304695129395, "learning_rate": 0.001, "loss": 2.0689, "step": 183456 }, { "epoch": 15.830917874396135, "grad_norm": 1.0921560525894165, "learning_rate": 0.001, "loss": 2.0678, "step": 183512 }, { "epoch": 15.83574879227053, "grad_norm": 1.0716294050216675, "learning_rate": 0.001, "loss": 2.0594, "step": 183568 }, { "epoch": 15.840579710144928, "grad_norm": 1.8330527544021606, "learning_rate": 0.001, "loss": 2.052, "step": 183624 }, { "epoch": 15.845410628019323, "grad_norm": 0.46384838223457336, "learning_rate": 0.001, "loss": 2.0566, "step": 183680 }, { "epoch": 15.85024154589372, "grad_norm": 0.8690996766090393, "learning_rate": 0.001, "loss": 2.0444, "step": 183736 }, { "epoch": 15.855072463768115, "grad_norm": 0.5121428966522217, "learning_rate": 0.001, "loss": 2.053, "step": 183792 }, { "epoch": 15.859903381642512, "grad_norm": 0.7626178860664368, "learning_rate": 0.001, "loss": 2.0402, "step": 183848 }, { "epoch": 15.864734299516908, "grad_norm": 0.7143154740333557, "learning_rate": 0.001, "loss": 2.031, "step": 183904 }, { "epoch": 15.869565217391305, "grad_norm": 0.6300618052482605, "learning_rate": 0.001, "loss": 2.0429, "step": 183960 }, { "epoch": 15.8743961352657, "grad_norm": 1.8488914966583252, "learning_rate": 0.001, "loss": 2.0487, "step": 184016 }, { "epoch": 15.879227053140097, "grad_norm": 0.43330931663513184, "learning_rate": 0.001, "loss": 2.0502, "step": 184072 }, { "epoch": 15.884057971014492, "grad_norm": 1.4612081050872803, "learning_rate": 0.001, "loss": 2.0494, "step": 184128 }, { "epoch": 15.88888888888889, "grad_norm": 0.44561025500297546, "learning_rate": 0.001, "loss": 2.053, "step": 184184 }, { "epoch": 15.893719806763285, "grad_norm": 0.5649104118347168, "learning_rate": 0.001, "loss": 2.0799, "step": 184240 }, { "epoch": 15.898550724637682, "grad_norm": 0.5883121490478516, "learning_rate": 0.001, "loss": 2.0793, "step": 184296 }, { "epoch": 15.903381642512077, "grad_norm": 0.6387181878089905, "learning_rate": 0.001, "loss": 2.0722, "step": 184352 }, { "epoch": 15.908212560386474, "grad_norm": 0.8768320083618164, "learning_rate": 0.001, "loss": 2.0599, "step": 184408 }, { "epoch": 15.91304347826087, "grad_norm": 0.44215306639671326, "learning_rate": 0.001, "loss": 2.0465, "step": 184464 }, { "epoch": 15.917874396135266, "grad_norm": 0.7613080739974976, "learning_rate": 0.001, "loss": 2.055, "step": 184520 }, { "epoch": 15.922705314009661, "grad_norm": 0.6234574317932129, "learning_rate": 0.001, "loss": 2.0388, "step": 184576 }, { "epoch": 15.927536231884059, "grad_norm": 1.9232505559921265, "learning_rate": 0.001, "loss": 2.047, "step": 184632 }, { "epoch": 15.932367149758454, "grad_norm": 0.6034908890724182, "learning_rate": 0.001, "loss": 2.0612, "step": 184688 }, { "epoch": 15.93719806763285, "grad_norm": 1.0975749492645264, "learning_rate": 0.001, "loss": 2.0643, "step": 184744 }, { "epoch": 15.942028985507246, "grad_norm": 0.9936833381652832, "learning_rate": 0.001, "loss": 2.0755, "step": 184800 }, { "epoch": 15.946859903381643, "grad_norm": 4.649092674255371, "learning_rate": 0.001, "loss": 2.0814, "step": 184856 }, { "epoch": 15.951690821256038, "grad_norm": 1.0066566467285156, "learning_rate": 0.001, "loss": 2.0794, "step": 184912 }, { "epoch": 15.956521739130435, "grad_norm": 0.5824914574623108, "learning_rate": 0.001, "loss": 2.0828, "step": 184968 }, { "epoch": 15.96135265700483, "grad_norm": 0.7705357670783997, "learning_rate": 0.001, "loss": 2.0767, "step": 185024 }, { "epoch": 15.966183574879228, "grad_norm": 1.243199348449707, "learning_rate": 0.001, "loss": 2.0838, "step": 185080 }, { "epoch": 15.971014492753623, "grad_norm": 1.8424952030181885, "learning_rate": 0.001, "loss": 2.0818, "step": 185136 }, { "epoch": 15.97584541062802, "grad_norm": 0.6935544013977051, "learning_rate": 0.001, "loss": 2.0778, "step": 185192 }, { "epoch": 15.980676328502415, "grad_norm": 1.2499427795410156, "learning_rate": 0.001, "loss": 2.0822, "step": 185248 }, { "epoch": 15.985507246376812, "grad_norm": 2.3306241035461426, "learning_rate": 0.001, "loss": 2.0778, "step": 185304 }, { "epoch": 15.990338164251208, "grad_norm": 1.9763392210006714, "learning_rate": 0.001, "loss": 2.0803, "step": 185360 }, { "epoch": 15.995169082125603, "grad_norm": 3.810502290725708, "learning_rate": 0.001, "loss": 2.1005, "step": 185416 }, { "epoch": 16.0, "grad_norm": 4.783773899078369, "learning_rate": 0.001, "loss": 2.0863, "step": 185472 }, { "epoch": 16.004830917874397, "grad_norm": 1.484560489654541, "learning_rate": 0.001, "loss": 2.0592, "step": 185528 }, { "epoch": 16.00966183574879, "grad_norm": 4.203853607177734, "learning_rate": 0.001, "loss": 2.0676, "step": 185584 }, { "epoch": 16.014492753623188, "grad_norm": 0.35111045837402344, "learning_rate": 0.001, "loss": 2.0543, "step": 185640 }, { "epoch": 16.019323671497585, "grad_norm": 1.1685441732406616, "learning_rate": 0.001, "loss": 2.0656, "step": 185696 }, { "epoch": 16.02415458937198, "grad_norm": 1.0759357213974, "learning_rate": 0.001, "loss": 2.0555, "step": 185752 }, { "epoch": 16.028985507246375, "grad_norm": 1.6144546270370483, "learning_rate": 0.001, "loss": 2.0457, "step": 185808 }, { "epoch": 16.033816425120772, "grad_norm": 0.6961482167243958, "learning_rate": 0.001, "loss": 2.0396, "step": 185864 }, { "epoch": 16.03864734299517, "grad_norm": 1.4635748863220215, "learning_rate": 0.001, "loss": 2.0424, "step": 185920 }, { "epoch": 16.043478260869566, "grad_norm": 2.843299150466919, "learning_rate": 0.001, "loss": 2.049, "step": 185976 }, { "epoch": 16.04830917874396, "grad_norm": 0.6566775441169739, "learning_rate": 0.001, "loss": 2.0586, "step": 186032 }, { "epoch": 16.053140096618357, "grad_norm": 2.746415376663208, "learning_rate": 0.001, "loss": 2.0514, "step": 186088 }, { "epoch": 16.057971014492754, "grad_norm": 0.7152809500694275, "learning_rate": 0.001, "loss": 2.0417, "step": 186144 }, { "epoch": 16.06280193236715, "grad_norm": 0.7769956588745117, "learning_rate": 0.001, "loss": 2.04, "step": 186200 }, { "epoch": 16.067632850241544, "grad_norm": 0.5652279853820801, "learning_rate": 0.001, "loss": 2.0417, "step": 186256 }, { "epoch": 16.07246376811594, "grad_norm": 0.34586387872695923, "learning_rate": 0.001, "loss": 2.0478, "step": 186312 }, { "epoch": 16.07729468599034, "grad_norm": 0.9556130766868591, "learning_rate": 0.001, "loss": 2.0387, "step": 186368 }, { "epoch": 16.082125603864736, "grad_norm": 2.0306713581085205, "learning_rate": 0.001, "loss": 2.0405, "step": 186424 }, { "epoch": 16.08695652173913, "grad_norm": 0.4355758726596832, "learning_rate": 0.001, "loss": 2.024, "step": 186480 }, { "epoch": 16.091787439613526, "grad_norm": 2.6172306537628174, "learning_rate": 0.001, "loss": 2.0269, "step": 186536 }, { "epoch": 16.096618357487923, "grad_norm": 2.0479471683502197, "learning_rate": 0.001, "loss": 2.0365, "step": 186592 }, { "epoch": 16.10144927536232, "grad_norm": 1.1867527961730957, "learning_rate": 0.001, "loss": 2.0342, "step": 186648 }, { "epoch": 16.106280193236714, "grad_norm": 4.775577545166016, "learning_rate": 0.001, "loss": 2.0365, "step": 186704 }, { "epoch": 16.11111111111111, "grad_norm": 0.986009955406189, "learning_rate": 0.001, "loss": 2.0305, "step": 186760 }, { "epoch": 16.115942028985508, "grad_norm": 14.898215293884277, "learning_rate": 0.001, "loss": 2.0392, "step": 186816 }, { "epoch": 16.120772946859905, "grad_norm": 5.029755592346191, "learning_rate": 0.001, "loss": 2.0429, "step": 186872 }, { "epoch": 16.1256038647343, "grad_norm": 1.3929964303970337, "learning_rate": 0.001, "loss": 2.0507, "step": 186928 }, { "epoch": 16.130434782608695, "grad_norm": 1.0490055084228516, "learning_rate": 0.001, "loss": 2.0768, "step": 186984 }, { "epoch": 16.135265700483092, "grad_norm": 2.575105905532837, "learning_rate": 0.001, "loss": 2.0654, "step": 187040 }, { "epoch": 16.14009661835749, "grad_norm": 2.0277960300445557, "learning_rate": 0.001, "loss": 2.0672, "step": 187096 }, { "epoch": 16.144927536231883, "grad_norm": 1.6006652116775513, "learning_rate": 0.001, "loss": 2.0845, "step": 187152 }, { "epoch": 16.14975845410628, "grad_norm": 0.9333575367927551, "learning_rate": 0.001, "loss": 2.0924, "step": 187208 }, { "epoch": 16.154589371980677, "grad_norm": 1.1374971866607666, "learning_rate": 0.001, "loss": 2.0983, "step": 187264 }, { "epoch": 16.159420289855074, "grad_norm": 1.6902309656143188, "learning_rate": 0.001, "loss": 2.0883, "step": 187320 }, { "epoch": 16.164251207729468, "grad_norm": 0.85816490650177, "learning_rate": 0.001, "loss": 2.0724, "step": 187376 }, { "epoch": 16.169082125603865, "grad_norm": 1.3843284845352173, "learning_rate": 0.001, "loss": 2.0681, "step": 187432 }, { "epoch": 16.17391304347826, "grad_norm": 0.9836299419403076, "learning_rate": 0.001, "loss": 2.0528, "step": 187488 }, { "epoch": 16.17874396135266, "grad_norm": 12.954838752746582, "learning_rate": 0.001, "loss": 2.0579, "step": 187544 }, { "epoch": 16.183574879227052, "grad_norm": 0.7564798593521118, "learning_rate": 0.001, "loss": 2.0536, "step": 187600 }, { "epoch": 16.18840579710145, "grad_norm": 4.486562728881836, "learning_rate": 0.001, "loss": 2.0483, "step": 187656 }, { "epoch": 16.193236714975846, "grad_norm": 1.0050679445266724, "learning_rate": 0.001, "loss": 2.0481, "step": 187712 }, { "epoch": 16.19806763285024, "grad_norm": 1.318315029144287, "learning_rate": 0.001, "loss": 2.0369, "step": 187768 }, { "epoch": 16.202898550724637, "grad_norm": 1.0909481048583984, "learning_rate": 0.001, "loss": 2.0535, "step": 187824 }, { "epoch": 16.207729468599034, "grad_norm": 0.6456713080406189, "learning_rate": 0.001, "loss": 2.0562, "step": 187880 }, { "epoch": 16.21256038647343, "grad_norm": 0.7630131840705872, "learning_rate": 0.001, "loss": 2.0512, "step": 187936 }, { "epoch": 16.217391304347824, "grad_norm": 0.6994858980178833, "learning_rate": 0.001, "loss": 2.0462, "step": 187992 }, { "epoch": 16.22222222222222, "grad_norm": 138.99832153320312, "learning_rate": 0.001, "loss": 2.0429, "step": 188048 }, { "epoch": 16.22705314009662, "grad_norm": 1.063202977180481, "learning_rate": 0.001, "loss": 2.0385, "step": 188104 }, { "epoch": 16.231884057971016, "grad_norm": 1.0482465028762817, "learning_rate": 0.001, "loss": 2.0369, "step": 188160 }, { "epoch": 16.23671497584541, "grad_norm": 0.6204719543457031, "learning_rate": 0.001, "loss": 2.0325, "step": 188216 }, { "epoch": 16.241545893719806, "grad_norm": 0.5433629155158997, "learning_rate": 0.001, "loss": 2.0473, "step": 188272 }, { "epoch": 16.246376811594203, "grad_norm": 0.6457331776618958, "learning_rate": 0.001, "loss": 2.0499, "step": 188328 }, { "epoch": 16.2512077294686, "grad_norm": 0.9773275256156921, "learning_rate": 0.001, "loss": 2.0747, "step": 188384 }, { "epoch": 16.256038647342994, "grad_norm": 1.3162868022918701, "learning_rate": 0.001, "loss": 2.076, "step": 188440 }, { "epoch": 16.26086956521739, "grad_norm": 2.528379201889038, "learning_rate": 0.001, "loss": 2.0655, "step": 188496 }, { "epoch": 16.265700483091788, "grad_norm": 3.2349584102630615, "learning_rate": 0.001, "loss": 2.0576, "step": 188552 }, { "epoch": 16.270531400966185, "grad_norm": 13.773297309875488, "learning_rate": 0.001, "loss": 2.0565, "step": 188608 }, { "epoch": 16.27536231884058, "grad_norm": 3543.341552734375, "learning_rate": 0.001, "loss": 2.0575, "step": 188664 }, { "epoch": 16.280193236714975, "grad_norm": 1.4820069074630737, "learning_rate": 0.001, "loss": 2.0496, "step": 188720 }, { "epoch": 16.285024154589372, "grad_norm": 1.4564237594604492, "learning_rate": 0.001, "loss": 2.0477, "step": 188776 }, { "epoch": 16.28985507246377, "grad_norm": 1.422391414642334, "learning_rate": 0.001, "loss": 2.0393, "step": 188832 }, { "epoch": 16.294685990338163, "grad_norm": 1.2921226024627686, "learning_rate": 0.001, "loss": 2.0425, "step": 188888 }, { "epoch": 16.29951690821256, "grad_norm": 2.743957042694092, "learning_rate": 0.001, "loss": 2.0581, "step": 188944 }, { "epoch": 16.304347826086957, "grad_norm": 0.8751780986785889, "learning_rate": 0.001, "loss": 2.0667, "step": 189000 }, { "epoch": 16.309178743961354, "grad_norm": 1.3919097185134888, "learning_rate": 0.001, "loss": 2.0556, "step": 189056 }, { "epoch": 16.314009661835748, "grad_norm": 3.9211606979370117, "learning_rate": 0.001, "loss": 2.0602, "step": 189112 }, { "epoch": 16.318840579710145, "grad_norm": 4.1606597900390625, "learning_rate": 0.001, "loss": 2.0558, "step": 189168 }, { "epoch": 16.32367149758454, "grad_norm": 5.697816848754883, "learning_rate": 0.001, "loss": 2.0694, "step": 189224 }, { "epoch": 16.32850241545894, "grad_norm": 2.9886012077331543, "learning_rate": 0.001, "loss": 2.0666, "step": 189280 }, { "epoch": 16.333333333333332, "grad_norm": 2.0707736015319824, "learning_rate": 0.001, "loss": 2.0578, "step": 189336 }, { "epoch": 16.33816425120773, "grad_norm": 2.8228771686553955, "learning_rate": 0.001, "loss": 2.0639, "step": 189392 }, { "epoch": 16.342995169082126, "grad_norm": 1.3293325901031494, "learning_rate": 0.001, "loss": 2.0677, "step": 189448 }, { "epoch": 16.347826086956523, "grad_norm": 1.0883820056915283, "learning_rate": 0.001, "loss": 2.0676, "step": 189504 }, { "epoch": 16.352657004830917, "grad_norm": 0.9181184768676758, "learning_rate": 0.001, "loss": 2.0579, "step": 189560 }, { "epoch": 16.357487922705314, "grad_norm": 2.282514810562134, "learning_rate": 0.001, "loss": 2.0515, "step": 189616 }, { "epoch": 16.36231884057971, "grad_norm": 0.8217980265617371, "learning_rate": 0.001, "loss": 2.0494, "step": 189672 }, { "epoch": 16.367149758454108, "grad_norm": 0.9382248520851135, "learning_rate": 0.001, "loss": 2.0657, "step": 189728 }, { "epoch": 16.3719806763285, "grad_norm": 7.725281715393066, "learning_rate": 0.001, "loss": 2.0618, "step": 189784 }, { "epoch": 16.3768115942029, "grad_norm": 1.3862109184265137, "learning_rate": 0.001, "loss": 2.0495, "step": 189840 }, { "epoch": 16.381642512077295, "grad_norm": 2.093127965927124, "learning_rate": 0.001, "loss": 2.0524, "step": 189896 }, { "epoch": 16.386473429951693, "grad_norm": 2.0543019771575928, "learning_rate": 0.001, "loss": 2.0407, "step": 189952 }, { "epoch": 16.391304347826086, "grad_norm": 1.3897541761398315, "learning_rate": 0.001, "loss": 2.0543, "step": 190008 }, { "epoch": 16.396135265700483, "grad_norm": 2.6832361221313477, "learning_rate": 0.001, "loss": 2.0392, "step": 190064 }, { "epoch": 16.40096618357488, "grad_norm": 0.3399752378463745, "learning_rate": 0.001, "loss": 2.049, "step": 190120 }, { "epoch": 16.405797101449274, "grad_norm": 1.0992438793182373, "learning_rate": 0.001, "loss": 2.0484, "step": 190176 }, { "epoch": 16.41062801932367, "grad_norm": 0.8764998912811279, "learning_rate": 0.001, "loss": 2.0668, "step": 190232 }, { "epoch": 16.415458937198068, "grad_norm": 2.9381978511810303, "learning_rate": 0.001, "loss": 2.0503, "step": 190288 }, { "epoch": 16.420289855072465, "grad_norm": 0.549014151096344, "learning_rate": 0.001, "loss": 2.058, "step": 190344 }, { "epoch": 16.42512077294686, "grad_norm": 0.6904180645942688, "learning_rate": 0.001, "loss": 2.0393, "step": 190400 }, { "epoch": 16.429951690821255, "grad_norm": 1.2345902919769287, "learning_rate": 0.001, "loss": 2.0372, "step": 190456 }, { "epoch": 16.434782608695652, "grad_norm": 1.625508427619934, "learning_rate": 0.001, "loss": 2.0402, "step": 190512 }, { "epoch": 16.43961352657005, "grad_norm": 1.1619187593460083, "learning_rate": 0.001, "loss": 2.0462, "step": 190568 }, { "epoch": 16.444444444444443, "grad_norm": 0.6694504022598267, "learning_rate": 0.001, "loss": 2.0406, "step": 190624 }, { "epoch": 16.44927536231884, "grad_norm": 6.3962860107421875, "learning_rate": 0.001, "loss": 2.0441, "step": 190680 }, { "epoch": 16.454106280193237, "grad_norm": 0.7398471832275391, "learning_rate": 0.001, "loss": 2.0471, "step": 190736 }, { "epoch": 16.458937198067634, "grad_norm": 1.052964448928833, "learning_rate": 0.001, "loss": 2.0464, "step": 190792 }, { "epoch": 16.463768115942027, "grad_norm": 3.402850866317749, "learning_rate": 0.001, "loss": 2.0506, "step": 190848 }, { "epoch": 16.468599033816425, "grad_norm": 0.5928205847740173, "learning_rate": 0.001, "loss": 2.045, "step": 190904 }, { "epoch": 16.47342995169082, "grad_norm": 1.0342724323272705, "learning_rate": 0.001, "loss": 2.0449, "step": 190960 }, { "epoch": 16.47826086956522, "grad_norm": 1.663536548614502, "learning_rate": 0.001, "loss": 2.0497, "step": 191016 }, { "epoch": 16.483091787439612, "grad_norm": 0.3610554039478302, "learning_rate": 0.001, "loss": 2.0368, "step": 191072 }, { "epoch": 16.48792270531401, "grad_norm": 1.7068403959274292, "learning_rate": 0.001, "loss": 2.0324, "step": 191128 }, { "epoch": 16.492753623188406, "grad_norm": 1.5902267694473267, "learning_rate": 0.001, "loss": 2.0276, "step": 191184 }, { "epoch": 16.497584541062803, "grad_norm": 0.4901221990585327, "learning_rate": 0.001, "loss": 2.0246, "step": 191240 }, { "epoch": 16.502415458937197, "grad_norm": 0.8506833910942078, "learning_rate": 0.001, "loss": 2.0389, "step": 191296 }, { "epoch": 16.507246376811594, "grad_norm": 1.642460584640503, "learning_rate": 0.001, "loss": 2.0334, "step": 191352 }, { "epoch": 16.51207729468599, "grad_norm": 12.916961669921875, "learning_rate": 0.001, "loss": 2.0428, "step": 191408 }, { "epoch": 16.516908212560388, "grad_norm": 0.913677990436554, "learning_rate": 0.001, "loss": 2.0574, "step": 191464 }, { "epoch": 16.52173913043478, "grad_norm": 0.5878140926361084, "learning_rate": 0.001, "loss": 2.0541, "step": 191520 }, { "epoch": 16.52657004830918, "grad_norm": 2.80161452293396, "learning_rate": 0.001, "loss": 2.0525, "step": 191576 }, { "epoch": 16.531400966183575, "grad_norm": 0.6691017150878906, "learning_rate": 0.001, "loss": 2.0512, "step": 191632 }, { "epoch": 16.536231884057973, "grad_norm": 7.426950931549072, "learning_rate": 0.001, "loss": 2.0578, "step": 191688 }, { "epoch": 16.541062801932366, "grad_norm": 2.8254494667053223, "learning_rate": 0.001, "loss": 2.0597, "step": 191744 }, { "epoch": 16.545893719806763, "grad_norm": 1.4224467277526855, "learning_rate": 0.001, "loss": 2.0549, "step": 191800 }, { "epoch": 16.55072463768116, "grad_norm": 0.7792167663574219, "learning_rate": 0.001, "loss": 2.0587, "step": 191856 }, { "epoch": 16.555555555555557, "grad_norm": 2.31687331199646, "learning_rate": 0.001, "loss": 2.0427, "step": 191912 }, { "epoch": 16.56038647342995, "grad_norm": 1.6561695337295532, "learning_rate": 0.001, "loss": 2.0321, "step": 191968 }, { "epoch": 16.565217391304348, "grad_norm": 1.8316993713378906, "learning_rate": 0.001, "loss": 2.0397, "step": 192024 }, { "epoch": 16.570048309178745, "grad_norm": 0.8019382953643799, "learning_rate": 0.001, "loss": 2.0575, "step": 192080 }, { "epoch": 16.57487922705314, "grad_norm": 1.310551404953003, "learning_rate": 0.001, "loss": 2.0515, "step": 192136 }, { "epoch": 16.579710144927535, "grad_norm": 0.566013514995575, "learning_rate": 0.001, "loss": 2.0388, "step": 192192 }, { "epoch": 16.584541062801932, "grad_norm": 0.9311992526054382, "learning_rate": 0.001, "loss": 2.0622, "step": 192248 }, { "epoch": 16.58937198067633, "grad_norm": 2.2935314178466797, "learning_rate": 0.001, "loss": 2.0571, "step": 192304 }, { "epoch": 16.594202898550726, "grad_norm": 1.7149572372436523, "learning_rate": 0.001, "loss": 2.045, "step": 192360 }, { "epoch": 16.59903381642512, "grad_norm": 0.6001467704772949, "learning_rate": 0.001, "loss": 2.0345, "step": 192416 }, { "epoch": 16.603864734299517, "grad_norm": 31.393375396728516, "learning_rate": 0.001, "loss": 2.0385, "step": 192472 }, { "epoch": 16.608695652173914, "grad_norm": 1.7302800416946411, "learning_rate": 0.001, "loss": 2.0372, "step": 192528 }, { "epoch": 16.613526570048307, "grad_norm": 0.5918297171592712, "learning_rate": 0.001, "loss": 2.0434, "step": 192584 }, { "epoch": 16.618357487922705, "grad_norm": 0.4707130193710327, "learning_rate": 0.001, "loss": 2.0492, "step": 192640 }, { "epoch": 16.6231884057971, "grad_norm": 0.7781787514686584, "learning_rate": 0.001, "loss": 2.0441, "step": 192696 }, { "epoch": 16.6280193236715, "grad_norm": 0.3937075436115265, "learning_rate": 0.001, "loss": 2.0464, "step": 192752 }, { "epoch": 16.632850241545892, "grad_norm": 0.6312329769134521, "learning_rate": 0.001, "loss": 2.0421, "step": 192808 }, { "epoch": 16.63768115942029, "grad_norm": 0.47556814551353455, "learning_rate": 0.001, "loss": 2.0431, "step": 192864 }, { "epoch": 16.642512077294686, "grad_norm": 2.72989559173584, "learning_rate": 0.001, "loss": 2.042, "step": 192920 }, { "epoch": 16.647342995169083, "grad_norm": 0.5594395995140076, "learning_rate": 0.001, "loss": 2.0633, "step": 192976 }, { "epoch": 16.652173913043477, "grad_norm": 0.4032968580722809, "learning_rate": 0.001, "loss": 2.0592, "step": 193032 }, { "epoch": 16.657004830917874, "grad_norm": 0.3391912281513214, "learning_rate": 0.001, "loss": 2.056, "step": 193088 }, { "epoch": 16.66183574879227, "grad_norm": 1.115797996520996, "learning_rate": 0.001, "loss": 2.0545, "step": 193144 }, { "epoch": 16.666666666666668, "grad_norm": 0.47218626737594604, "learning_rate": 0.001, "loss": 2.0448, "step": 193200 }, { "epoch": 16.67149758454106, "grad_norm": 0.4615825116634369, "learning_rate": 0.001, "loss": 2.0393, "step": 193256 }, { "epoch": 16.67632850241546, "grad_norm": 0.34725797176361084, "learning_rate": 0.001, "loss": 2.0459, "step": 193312 }, { "epoch": 16.681159420289855, "grad_norm": 0.9175338745117188, "learning_rate": 0.001, "loss": 2.0455, "step": 193368 }, { "epoch": 16.685990338164252, "grad_norm": 0.4513278901576996, "learning_rate": 0.001, "loss": 2.0422, "step": 193424 }, { "epoch": 16.690821256038646, "grad_norm": 0.7281111478805542, "learning_rate": 0.001, "loss": 2.0489, "step": 193480 }, { "epoch": 16.695652173913043, "grad_norm": 1.4144949913024902, "learning_rate": 0.001, "loss": 2.0409, "step": 193536 }, { "epoch": 16.70048309178744, "grad_norm": 0.3072492778301239, "learning_rate": 0.001, "loss": 2.0291, "step": 193592 }, { "epoch": 16.705314009661837, "grad_norm": 0.5312104821205139, "learning_rate": 0.001, "loss": 2.0364, "step": 193648 }, { "epoch": 16.71014492753623, "grad_norm": 0.6134706735610962, "learning_rate": 0.001, "loss": 2.0323, "step": 193704 }, { "epoch": 16.714975845410628, "grad_norm": 0.5133123397827148, "learning_rate": 0.001, "loss": 2.0237, "step": 193760 }, { "epoch": 16.719806763285025, "grad_norm": 0.7568499445915222, "learning_rate": 0.001, "loss": 2.0234, "step": 193816 }, { "epoch": 16.72463768115942, "grad_norm": 0.6134992837905884, "learning_rate": 0.001, "loss": 2.0174, "step": 193872 }, { "epoch": 16.729468599033815, "grad_norm": 0.23798410594463348, "learning_rate": 0.001, "loss": 2.0387, "step": 193928 }, { "epoch": 16.734299516908212, "grad_norm": 0.3881063759326935, "learning_rate": 0.001, "loss": 2.0401, "step": 193984 }, { "epoch": 16.73913043478261, "grad_norm": 0.5284119248390198, "learning_rate": 0.001, "loss": 2.0368, "step": 194040 }, { "epoch": 16.743961352657006, "grad_norm": 11.572823524475098, "learning_rate": 0.001, "loss": 2.0388, "step": 194096 }, { "epoch": 16.7487922705314, "grad_norm": 0.9035015106201172, "learning_rate": 0.001, "loss": 2.0318, "step": 194152 }, { "epoch": 16.753623188405797, "grad_norm": 0.5194437503814697, "learning_rate": 0.001, "loss": 2.0321, "step": 194208 }, { "epoch": 16.758454106280194, "grad_norm": 0.4773789346218109, "learning_rate": 0.001, "loss": 2.0342, "step": 194264 }, { "epoch": 16.76328502415459, "grad_norm": 0.4673251211643219, "learning_rate": 0.001, "loss": 2.0408, "step": 194320 }, { "epoch": 16.768115942028984, "grad_norm": 0.35106217861175537, "learning_rate": 0.001, "loss": 2.0449, "step": 194376 }, { "epoch": 16.77294685990338, "grad_norm": 0.42789706587791443, "learning_rate": 0.001, "loss": 2.0412, "step": 194432 }, { "epoch": 16.77777777777778, "grad_norm": 0.32567098736763, "learning_rate": 0.001, "loss": 2.0315, "step": 194488 }, { "epoch": 16.782608695652176, "grad_norm": 0.5352192521095276, "learning_rate": 0.001, "loss": 2.0253, "step": 194544 }, { "epoch": 16.78743961352657, "grad_norm": 0.6705586910247803, "learning_rate": 0.001, "loss": 2.0249, "step": 194600 }, { "epoch": 16.792270531400966, "grad_norm": 0.5575780272483826, "learning_rate": 0.001, "loss": 2.0267, "step": 194656 }, { "epoch": 16.797101449275363, "grad_norm": 0.634821355342865, "learning_rate": 0.001, "loss": 2.0466, "step": 194712 }, { "epoch": 16.80193236714976, "grad_norm": 0.3211246728897095, "learning_rate": 0.001, "loss": 2.0486, "step": 194768 }, { "epoch": 16.806763285024154, "grad_norm": 0.9176349639892578, "learning_rate": 0.001, "loss": 2.0421, "step": 194824 }, { "epoch": 16.81159420289855, "grad_norm": 1.6583536863327026, "learning_rate": 0.001, "loss": 2.0562, "step": 194880 }, { "epoch": 16.816425120772948, "grad_norm": 0.6718212366104126, "learning_rate": 0.001, "loss": 2.0531, "step": 194936 }, { "epoch": 16.82125603864734, "grad_norm": 0.30208057165145874, "learning_rate": 0.001, "loss": 2.0354, "step": 194992 }, { "epoch": 16.82608695652174, "grad_norm": 1.1301578283309937, "learning_rate": 0.001, "loss": 2.0383, "step": 195048 }, { "epoch": 16.830917874396135, "grad_norm": 0.3630686402320862, "learning_rate": 0.001, "loss": 2.0264, "step": 195104 }, { "epoch": 16.835748792270532, "grad_norm": 0.5475546717643738, "learning_rate": 0.001, "loss": 2.0261, "step": 195160 }, { "epoch": 16.840579710144926, "grad_norm": 0.38853809237480164, "learning_rate": 0.001, "loss": 2.0378, "step": 195216 }, { "epoch": 16.845410628019323, "grad_norm": 0.43732473254203796, "learning_rate": 0.001, "loss": 2.0318, "step": 195272 }, { "epoch": 16.85024154589372, "grad_norm": 0.7336903810501099, "learning_rate": 0.001, "loss": 2.0355, "step": 195328 }, { "epoch": 16.855072463768117, "grad_norm": 0.3635547459125519, "learning_rate": 0.001, "loss": 2.0258, "step": 195384 }, { "epoch": 16.85990338164251, "grad_norm": 0.8626202940940857, "learning_rate": 0.001, "loss": 2.0259, "step": 195440 }, { "epoch": 16.864734299516908, "grad_norm": 1.1758482456207275, "learning_rate": 0.001, "loss": 2.0291, "step": 195496 }, { "epoch": 16.869565217391305, "grad_norm": 0.4273427128791809, "learning_rate": 0.001, "loss": 2.0139, "step": 195552 }, { "epoch": 16.8743961352657, "grad_norm": 0.9766685962677002, "learning_rate": 0.001, "loss": 2.033, "step": 195608 }, { "epoch": 16.879227053140095, "grad_norm": 1.9136470556259155, "learning_rate": 0.001, "loss": 2.0626, "step": 195664 }, { "epoch": 16.884057971014492, "grad_norm": 1.6740261316299438, "learning_rate": 0.001, "loss": 2.0535, "step": 195720 }, { "epoch": 16.88888888888889, "grad_norm": 1.4089282751083374, "learning_rate": 0.001, "loss": 2.039, "step": 195776 }, { "epoch": 16.893719806763286, "grad_norm": 2.856243848800659, "learning_rate": 0.001, "loss": 2.0532, "step": 195832 }, { "epoch": 16.89855072463768, "grad_norm": 0.27055734395980835, "learning_rate": 0.001, "loss": 2.0414, "step": 195888 }, { "epoch": 16.903381642512077, "grad_norm": 1.418712854385376, "learning_rate": 0.001, "loss": 2.0551, "step": 195944 }, { "epoch": 16.908212560386474, "grad_norm": 0.41182950139045715, "learning_rate": 0.001, "loss": 2.0431, "step": 196000 }, { "epoch": 16.91304347826087, "grad_norm": 0.49568501114845276, "learning_rate": 0.001, "loss": 2.0396, "step": 196056 }, { "epoch": 16.917874396135264, "grad_norm": 0.39828601479530334, "learning_rate": 0.001, "loss": 2.03, "step": 196112 }, { "epoch": 16.92270531400966, "grad_norm": 0.4218588173389435, "learning_rate": 0.001, "loss": 2.0322, "step": 196168 }, { "epoch": 16.92753623188406, "grad_norm": 0.33129236102104187, "learning_rate": 0.001, "loss": 2.0301, "step": 196224 }, { "epoch": 16.932367149758456, "grad_norm": 0.2899501919746399, "learning_rate": 0.001, "loss": 2.0358, "step": 196280 }, { "epoch": 16.93719806763285, "grad_norm": 0.33048245310783386, "learning_rate": 0.001, "loss": 2.0275, "step": 196336 }, { "epoch": 16.942028985507246, "grad_norm": 0.7601819038391113, "learning_rate": 0.001, "loss": 2.0264, "step": 196392 }, { "epoch": 16.946859903381643, "grad_norm": 1.2421513795852661, "learning_rate": 0.001, "loss": 2.0294, "step": 196448 }, { "epoch": 16.95169082125604, "grad_norm": 2.4832963943481445, "learning_rate": 0.001, "loss": 2.0176, "step": 196504 }, { "epoch": 16.956521739130434, "grad_norm": 3.9885926246643066, "learning_rate": 0.001, "loss": 2.0262, "step": 196560 }, { "epoch": 16.96135265700483, "grad_norm": 3.799417495727539, "learning_rate": 0.001, "loss": 2.0483, "step": 196616 }, { "epoch": 16.966183574879228, "grad_norm": 1.228283405303955, "learning_rate": 0.001, "loss": 2.046, "step": 196672 }, { "epoch": 16.971014492753625, "grad_norm": 4.565097808837891, "learning_rate": 0.001, "loss": 2.0446, "step": 196728 }, { "epoch": 16.97584541062802, "grad_norm": 1.1718043088912964, "learning_rate": 0.001, "loss": 2.0566, "step": 196784 }, { "epoch": 16.980676328502415, "grad_norm": 0.9650245308876038, "learning_rate": 0.001, "loss": 2.0506, "step": 196840 }, { "epoch": 16.985507246376812, "grad_norm": 22.02593231201172, "learning_rate": 0.001, "loss": 2.0424, "step": 196896 }, { "epoch": 16.990338164251206, "grad_norm": 0.6295512318611145, "learning_rate": 0.001, "loss": 2.0344, "step": 196952 }, { "epoch": 16.995169082125603, "grad_norm": 0.7233116626739502, "learning_rate": 0.001, "loss": 2.0298, "step": 197008 }, { "epoch": 17.0, "grad_norm": 0.9202417135238647, "learning_rate": 0.001, "loss": 2.0264, "step": 197064 }, { "epoch": 17.004830917874397, "grad_norm": 0.7357917428016663, "learning_rate": 0.001, "loss": 1.9913, "step": 197120 }, { "epoch": 17.00966183574879, "grad_norm": 1.363856315612793, "learning_rate": 0.001, "loss": 1.9916, "step": 197176 }, { "epoch": 17.014492753623188, "grad_norm": 0.7536722421646118, "learning_rate": 0.001, "loss": 1.9872, "step": 197232 }, { "epoch": 17.019323671497585, "grad_norm": 0.4628455638885498, "learning_rate": 0.001, "loss": 1.9983, "step": 197288 }, { "epoch": 17.02415458937198, "grad_norm": 0.43860211968421936, "learning_rate": 0.001, "loss": 1.9898, "step": 197344 }, { "epoch": 17.028985507246375, "grad_norm": 1.0862034559249878, "learning_rate": 0.001, "loss": 2.01, "step": 197400 }, { "epoch": 17.033816425120772, "grad_norm": 0.41091638803482056, "learning_rate": 0.001, "loss": 2.0078, "step": 197456 }, { "epoch": 17.03864734299517, "grad_norm": 0.8949408531188965, "learning_rate": 0.001, "loss": 1.9963, "step": 197512 }, { "epoch": 17.043478260869566, "grad_norm": 0.4607747793197632, "learning_rate": 0.001, "loss": 1.9903, "step": 197568 }, { "epoch": 17.04830917874396, "grad_norm": 0.4450814723968506, "learning_rate": 0.001, "loss": 1.9989, "step": 197624 }, { "epoch": 17.053140096618357, "grad_norm": 0.6968145966529846, "learning_rate": 0.001, "loss": 2.0258, "step": 197680 }, { "epoch": 17.057971014492754, "grad_norm": 0.3505527079105377, "learning_rate": 0.001, "loss": 2.0374, "step": 197736 }, { "epoch": 17.06280193236715, "grad_norm": 0.7007247805595398, "learning_rate": 0.001, "loss": 2.0241, "step": 197792 }, { "epoch": 17.067632850241544, "grad_norm": 0.42565181851387024, "learning_rate": 0.001, "loss": 2.0158, "step": 197848 }, { "epoch": 17.07246376811594, "grad_norm": 0.6678960919380188, "learning_rate": 0.001, "loss": 2.0191, "step": 197904 }, { "epoch": 17.07729468599034, "grad_norm": 4.585575103759766, "learning_rate": 0.001, "loss": 2.0139, "step": 197960 }, { "epoch": 17.082125603864736, "grad_norm": 7.68037748336792, "learning_rate": 0.001, "loss": 2.0153, "step": 198016 }, { "epoch": 17.08695652173913, "grad_norm": 2.2260894775390625, "learning_rate": 0.001, "loss": 2.0219, "step": 198072 }, { "epoch": 17.091787439613526, "grad_norm": 4.9096198081970215, "learning_rate": 0.001, "loss": 2.0295, "step": 198128 }, { "epoch": 17.096618357487923, "grad_norm": 1.7353177070617676, "learning_rate": 0.001, "loss": 2.0289, "step": 198184 }, { "epoch": 17.10144927536232, "grad_norm": 1.8456236124038696, "learning_rate": 0.001, "loss": 2.0475, "step": 198240 }, { "epoch": 17.106280193236714, "grad_norm": 3.275623321533203, "learning_rate": 0.001, "loss": 2.0504, "step": 198296 }, { "epoch": 17.11111111111111, "grad_norm": 2.0498526096343994, "learning_rate": 0.001, "loss": 2.0347, "step": 198352 }, { "epoch": 17.115942028985508, "grad_norm": 1.6902790069580078, "learning_rate": 0.001, "loss": 2.0453, "step": 198408 }, { "epoch": 17.120772946859905, "grad_norm": 14.961019515991211, "learning_rate": 0.001, "loss": 2.0749, "step": 198464 }, { "epoch": 17.1256038647343, "grad_norm": 9.504125595092773, "learning_rate": 0.001, "loss": 2.1208, "step": 198520 }, { "epoch": 17.130434782608695, "grad_norm": 3.910196542739868, "learning_rate": 0.001, "loss": 2.1016, "step": 198576 }, { "epoch": 17.135265700483092, "grad_norm": 89.04476165771484, "learning_rate": 0.001, "loss": 2.0685, "step": 198632 }, { "epoch": 17.14009661835749, "grad_norm": 2.2351114749908447, "learning_rate": 0.001, "loss": 2.0557, "step": 198688 }, { "epoch": 17.144927536231883, "grad_norm": 0.8914165496826172, "learning_rate": 0.001, "loss": 2.0747, "step": 198744 }, { "epoch": 17.14975845410628, "grad_norm": 0.8623120188713074, "learning_rate": 0.001, "loss": 2.0776, "step": 198800 }, { "epoch": 17.154589371980677, "grad_norm": 2.2080469131469727, "learning_rate": 0.001, "loss": 2.062, "step": 198856 }, { "epoch": 17.159420289855074, "grad_norm": 2.3244779109954834, "learning_rate": 0.001, "loss": 2.0849, "step": 198912 }, { "epoch": 17.164251207729468, "grad_norm": 3.2196238040924072, "learning_rate": 0.001, "loss": 2.0722, "step": 198968 }, { "epoch": 17.169082125603865, "grad_norm": 7.231638431549072, "learning_rate": 0.001, "loss": 2.0453, "step": 199024 }, { "epoch": 17.17391304347826, "grad_norm": 0.8193380832672119, "learning_rate": 0.001, "loss": 2.0565, "step": 199080 }, { "epoch": 17.17874396135266, "grad_norm": 1.0477856397628784, "learning_rate": 0.001, "loss": 2.057, "step": 199136 }, { "epoch": 17.183574879227052, "grad_norm": 2.18863582611084, "learning_rate": 0.001, "loss": 2.044, "step": 199192 }, { "epoch": 17.18840579710145, "grad_norm": 6.6848249435424805, "learning_rate": 0.001, "loss": 2.0369, "step": 199248 }, { "epoch": 17.193236714975846, "grad_norm": 2.0866594314575195, "learning_rate": 0.001, "loss": 2.0491, "step": 199304 }, { "epoch": 17.19806763285024, "grad_norm": 0.6480128765106201, "learning_rate": 0.001, "loss": 2.0412, "step": 199360 }, { "epoch": 17.202898550724637, "grad_norm": 2.501214027404785, "learning_rate": 0.001, "loss": 2.0439, "step": 199416 }, { "epoch": 17.207729468599034, "grad_norm": 0.3921353816986084, "learning_rate": 0.001, "loss": 2.045, "step": 199472 }, { "epoch": 17.21256038647343, "grad_norm": 16.650386810302734, "learning_rate": 0.001, "loss": 2.0279, "step": 199528 }, { "epoch": 17.217391304347824, "grad_norm": 4.484215259552002, "learning_rate": 0.001, "loss": 2.0351, "step": 199584 }, { "epoch": 17.22222222222222, "grad_norm": 11.456953048706055, "learning_rate": 0.001, "loss": 2.0329, "step": 199640 }, { "epoch": 17.22705314009662, "grad_norm": 8.215957641601562, "learning_rate": 0.001, "loss": 2.0216, "step": 199696 }, { "epoch": 17.231884057971016, "grad_norm": 0.8254291415214539, "learning_rate": 0.001, "loss": 2.0257, "step": 199752 }, { "epoch": 17.23671497584541, "grad_norm": 0.8101583123207092, "learning_rate": 0.001, "loss": 2.0326, "step": 199808 }, { "epoch": 17.241545893719806, "grad_norm": 0.9279499650001526, "learning_rate": 0.001, "loss": 2.0257, "step": 199864 }, { "epoch": 17.246376811594203, "grad_norm": 0.5354316830635071, "learning_rate": 0.001, "loss": 2.0268, "step": 199920 }, { "epoch": 17.2512077294686, "grad_norm": 0.5018588900566101, "learning_rate": 0.001, "loss": 2.03, "step": 199976 }, { "epoch": 17.256038647342994, "grad_norm": 0.687272846698761, "learning_rate": 0.001, "loss": 2.0225, "step": 200032 }, { "epoch": 17.26086956521739, "grad_norm": 1.1128239631652832, "learning_rate": 0.001, "loss": 2.0178, "step": 200088 }, { "epoch": 17.265700483091788, "grad_norm": 0.3646056354045868, "learning_rate": 0.001, "loss": 2.0201, "step": 200144 }, { "epoch": 17.270531400966185, "grad_norm": 3.7217750549316406, "learning_rate": 0.001, "loss": 2.0146, "step": 200200 }, { "epoch": 17.27536231884058, "grad_norm": 1.0923576354980469, "learning_rate": 0.001, "loss": 2.0199, "step": 200256 }, { "epoch": 17.280193236714975, "grad_norm": 0.4362781047821045, "learning_rate": 0.001, "loss": 2.0248, "step": 200312 }, { "epoch": 17.285024154589372, "grad_norm": 2.7038536071777344, "learning_rate": 0.001, "loss": 2.0455, "step": 200368 }, { "epoch": 17.28985507246377, "grad_norm": 0.8134745359420776, "learning_rate": 0.001, "loss": 2.031, "step": 200424 }, { "epoch": 17.294685990338163, "grad_norm": 1.8891823291778564, "learning_rate": 0.001, "loss": 2.0245, "step": 200480 }, { "epoch": 17.29951690821256, "grad_norm": 0.6027181148529053, "learning_rate": 0.001, "loss": 2.0163, "step": 200536 }, { "epoch": 17.304347826086957, "grad_norm": 0.7690904140472412, "learning_rate": 0.001, "loss": 2.0142, "step": 200592 }, { "epoch": 17.309178743961354, "grad_norm": 1.082108736038208, "learning_rate": 0.001, "loss": 2.0055, "step": 200648 }, { "epoch": 17.314009661835748, "grad_norm": 0.49429255723953247, "learning_rate": 0.001, "loss": 2.0199, "step": 200704 }, { "epoch": 17.318840579710145, "grad_norm": 8.144294738769531, "learning_rate": 0.001, "loss": 2.0111, "step": 200760 }, { "epoch": 17.32367149758454, "grad_norm": 1.2012593746185303, "learning_rate": 0.001, "loss": 1.9996, "step": 200816 }, { "epoch": 17.32850241545894, "grad_norm": 0.45788559317588806, "learning_rate": 0.001, "loss": 2.0101, "step": 200872 }, { "epoch": 17.333333333333332, "grad_norm": 1.644631028175354, "learning_rate": 0.001, "loss": 2.0104, "step": 200928 }, { "epoch": 17.33816425120773, "grad_norm": 4.120462894439697, "learning_rate": 0.001, "loss": 2.0094, "step": 200984 }, { "epoch": 17.342995169082126, "grad_norm": 0.2877126634120941, "learning_rate": 0.001, "loss": 2.0078, "step": 201040 }, { "epoch": 17.347826086956523, "grad_norm": 0.8104161620140076, "learning_rate": 0.001, "loss": 2.01, "step": 201096 }, { "epoch": 17.352657004830917, "grad_norm": 0.7426979541778564, "learning_rate": 0.001, "loss": 2.0098, "step": 201152 }, { "epoch": 17.357487922705314, "grad_norm": 1.6111778020858765, "learning_rate": 0.001, "loss": 2.0168, "step": 201208 }, { "epoch": 17.36231884057971, "grad_norm": 0.7598965764045715, "learning_rate": 0.001, "loss": 2.0034, "step": 201264 }, { "epoch": 17.367149758454108, "grad_norm": 0.5758513808250427, "learning_rate": 0.001, "loss": 2.0228, "step": 201320 }, { "epoch": 17.3719806763285, "grad_norm": 1.135926365852356, "learning_rate": 0.001, "loss": 2.0055, "step": 201376 }, { "epoch": 17.3768115942029, "grad_norm": 3.106802225112915, "learning_rate": 0.001, "loss": 2.0016, "step": 201432 }, { "epoch": 17.381642512077295, "grad_norm": 2.6268579959869385, "learning_rate": 0.001, "loss": 2.0048, "step": 201488 }, { "epoch": 17.386473429951693, "grad_norm": 0.4911486506462097, "learning_rate": 0.001, "loss": 2.0022, "step": 201544 }, { "epoch": 17.391304347826086, "grad_norm": 0.5255958437919617, "learning_rate": 0.001, "loss": 2.0047, "step": 201600 }, { "epoch": 17.396135265700483, "grad_norm": 0.46415650844573975, "learning_rate": 0.001, "loss": 2.0073, "step": 201656 }, { "epoch": 17.40096618357488, "grad_norm": 0.4485037326812744, "learning_rate": 0.001, "loss": 1.9943, "step": 201712 }, { "epoch": 17.405797101449274, "grad_norm": 0.7632162570953369, "learning_rate": 0.001, "loss": 1.9933, "step": 201768 }, { "epoch": 17.41062801932367, "grad_norm": 3.6975579261779785, "learning_rate": 0.001, "loss": 1.983, "step": 201824 }, { "epoch": 17.415458937198068, "grad_norm": 1.308274745941162, "learning_rate": 0.001, "loss": 1.982, "step": 201880 }, { "epoch": 17.420289855072465, "grad_norm": 0.34257379174232483, "learning_rate": 0.001, "loss": 1.9997, "step": 201936 }, { "epoch": 17.42512077294686, "grad_norm": 0.5163534283638, "learning_rate": 0.001, "loss": 1.9982, "step": 201992 }, { "epoch": 17.429951690821255, "grad_norm": 0.9127593040466309, "learning_rate": 0.001, "loss": 1.9969, "step": 202048 }, { "epoch": 17.434782608695652, "grad_norm": 0.341212660074234, "learning_rate": 0.001, "loss": 2.0065, "step": 202104 }, { "epoch": 17.43961352657005, "grad_norm": 1.1749262809753418, "learning_rate": 0.001, "loss": 1.9972, "step": 202160 }, { "epoch": 17.444444444444443, "grad_norm": 0.6761568188667297, "learning_rate": 0.001, "loss": 1.9978, "step": 202216 }, { "epoch": 17.44927536231884, "grad_norm": 2.245246410369873, "learning_rate": 0.001, "loss": 1.9848, "step": 202272 }, { "epoch": 17.454106280193237, "grad_norm": 0.4219343662261963, "learning_rate": 0.001, "loss": 1.9917, "step": 202328 }, { "epoch": 17.458937198067634, "grad_norm": 0.6091845631599426, "learning_rate": 0.001, "loss": 1.9879, "step": 202384 }, { "epoch": 17.463768115942027, "grad_norm": 0.6387943625450134, "learning_rate": 0.001, "loss": 1.989, "step": 202440 }, { "epoch": 17.468599033816425, "grad_norm": 0.3861483931541443, "learning_rate": 0.001, "loss": 1.9963, "step": 202496 }, { "epoch": 17.47342995169082, "grad_norm": 0.4580955505371094, "learning_rate": 0.001, "loss": 2.0025, "step": 202552 }, { "epoch": 17.47826086956522, "grad_norm": 0.9553045034408569, "learning_rate": 0.001, "loss": 1.9998, "step": 202608 }, { "epoch": 17.483091787439612, "grad_norm": 0.41008260846138, "learning_rate": 0.001, "loss": 1.9996, "step": 202664 }, { "epoch": 17.48792270531401, "grad_norm": 0.3904622793197632, "learning_rate": 0.001, "loss": 2.012, "step": 202720 }, { "epoch": 17.492753623188406, "grad_norm": 0.43343502283096313, "learning_rate": 0.001, "loss": 2.0329, "step": 202776 }, { "epoch": 17.497584541062803, "grad_norm": 0.30230212211608887, "learning_rate": 0.001, "loss": 2.0326, "step": 202832 }, { "epoch": 17.502415458937197, "grad_norm": 0.35011816024780273, "learning_rate": 0.001, "loss": 2.0207, "step": 202888 }, { "epoch": 17.507246376811594, "grad_norm": 0.8570321202278137, "learning_rate": 0.001, "loss": 2.0277, "step": 202944 }, { "epoch": 17.51207729468599, "grad_norm": 1.2873200178146362, "learning_rate": 0.001, "loss": 2.0365, "step": 203000 }, { "epoch": 17.516908212560388, "grad_norm": 1.4232642650604248, "learning_rate": 0.001, "loss": 2.0346, "step": 203056 }, { "epoch": 17.52173913043478, "grad_norm": 0.8065872192382812, "learning_rate": 0.001, "loss": 2.0325, "step": 203112 }, { "epoch": 17.52657004830918, "grad_norm": 0.36926472187042236, "learning_rate": 0.001, "loss": 2.033, "step": 203168 }, { "epoch": 17.531400966183575, "grad_norm": 0.5115698575973511, "learning_rate": 0.001, "loss": 2.0324, "step": 203224 }, { "epoch": 17.536231884057973, "grad_norm": 1.2330418825149536, "learning_rate": 0.001, "loss": 2.0171, "step": 203280 }, { "epoch": 17.541062801932366, "grad_norm": 1.1197870969772339, "learning_rate": 0.001, "loss": 2.0157, "step": 203336 }, { "epoch": 17.545893719806763, "grad_norm": 0.703370988368988, "learning_rate": 0.001, "loss": 2.0033, "step": 203392 }, { "epoch": 17.55072463768116, "grad_norm": 4.859795093536377, "learning_rate": 0.001, "loss": 2.008, "step": 203448 }, { "epoch": 17.555555555555557, "grad_norm": 3.150716543197632, "learning_rate": 0.001, "loss": 2.0202, "step": 203504 }, { "epoch": 17.56038647342995, "grad_norm": 0.6640255451202393, "learning_rate": 0.001, "loss": 2.0098, "step": 203560 }, { "epoch": 17.565217391304348, "grad_norm": 1.415740966796875, "learning_rate": 0.001, "loss": 2.0059, "step": 203616 }, { "epoch": 17.570048309178745, "grad_norm": 0.8966888785362244, "learning_rate": 0.001, "loss": 2.028, "step": 203672 }, { "epoch": 17.57487922705314, "grad_norm": 0.38754573464393616, "learning_rate": 0.001, "loss": 2.016, "step": 203728 }, { "epoch": 17.579710144927535, "grad_norm": 0.35220766067504883, "learning_rate": 0.001, "loss": 2.0191, "step": 203784 }, { "epoch": 17.584541062801932, "grad_norm": 0.3672716021537781, "learning_rate": 0.001, "loss": 2.0229, "step": 203840 }, { "epoch": 17.58937198067633, "grad_norm": 0.3008093237876892, "learning_rate": 0.001, "loss": 2.0234, "step": 203896 }, { "epoch": 17.594202898550726, "grad_norm": 0.7144272923469543, "learning_rate": 0.001, "loss": 2.0053, "step": 203952 }, { "epoch": 17.59903381642512, "grad_norm": 0.3190706670284271, "learning_rate": 0.001, "loss": 2.0101, "step": 204008 }, { "epoch": 17.603864734299517, "grad_norm": 0.29227423667907715, "learning_rate": 0.001, "loss": 2.0013, "step": 204064 }, { "epoch": 17.608695652173914, "grad_norm": 0.3204523026943207, "learning_rate": 0.001, "loss": 2.0057, "step": 204120 }, { "epoch": 17.613526570048307, "grad_norm": 0.3938024342060089, "learning_rate": 0.001, "loss": 2.0122, "step": 204176 }, { "epoch": 17.618357487922705, "grad_norm": 4.193080902099609, "learning_rate": 0.001, "loss": 2.018, "step": 204232 }, { "epoch": 17.6231884057971, "grad_norm": 0.3865472376346588, "learning_rate": 0.001, "loss": 2.0056, "step": 204288 }, { "epoch": 17.6280193236715, "grad_norm": 0.3245698809623718, "learning_rate": 0.001, "loss": 1.9988, "step": 204344 }, { "epoch": 17.632850241545892, "grad_norm": 2.2892158031463623, "learning_rate": 0.001, "loss": 2.0025, "step": 204400 }, { "epoch": 17.63768115942029, "grad_norm": 3.89587664604187, "learning_rate": 0.001, "loss": 2.0011, "step": 204456 }, { "epoch": 17.642512077294686, "grad_norm": 0.3852275013923645, "learning_rate": 0.001, "loss": 2.0043, "step": 204512 }, { "epoch": 17.647342995169083, "grad_norm": 0.8561948537826538, "learning_rate": 0.001, "loss": 2.0132, "step": 204568 }, { "epoch": 17.652173913043477, "grad_norm": 0.6353474259376526, "learning_rate": 0.001, "loss": 2.0342, "step": 204624 }, { "epoch": 17.657004830917874, "grad_norm": 0.5256075263023376, "learning_rate": 0.001, "loss": 2.0279, "step": 204680 }, { "epoch": 17.66183574879227, "grad_norm": 0.7293254137039185, "learning_rate": 0.001, "loss": 2.0162, "step": 204736 }, { "epoch": 17.666666666666668, "grad_norm": 0.47616949677467346, "learning_rate": 0.001, "loss": 2.0065, "step": 204792 }, { "epoch": 17.67149758454106, "grad_norm": 1.3632248640060425, "learning_rate": 0.001, "loss": 2.013, "step": 204848 }, { "epoch": 17.67632850241546, "grad_norm": 0.8381468653678894, "learning_rate": 0.001, "loss": 2.0098, "step": 204904 }, { "epoch": 17.681159420289855, "grad_norm": 0.42445775866508484, "learning_rate": 0.001, "loss": 2.008, "step": 204960 }, { "epoch": 17.685990338164252, "grad_norm": 0.3829394578933716, "learning_rate": 0.001, "loss": 2.0076, "step": 205016 }, { "epoch": 17.690821256038646, "grad_norm": 2.4244043827056885, "learning_rate": 0.001, "loss": 2.0177, "step": 205072 }, { "epoch": 17.695652173913043, "grad_norm": 0.943936824798584, "learning_rate": 0.001, "loss": 2.0208, "step": 205128 }, { "epoch": 17.70048309178744, "grad_norm": 2.515855312347412, "learning_rate": 0.001, "loss": 2.0263, "step": 205184 }, { "epoch": 17.705314009661837, "grad_norm": 1.1545761823654175, "learning_rate": 0.001, "loss": 2.0157, "step": 205240 }, { "epoch": 17.71014492753623, "grad_norm": 1.326422929763794, "learning_rate": 0.001, "loss": 2.0279, "step": 205296 }, { "epoch": 17.714975845410628, "grad_norm": 1.9896457195281982, "learning_rate": 0.001, "loss": 2.0353, "step": 205352 }, { "epoch": 17.719806763285025, "grad_norm": 1.214542269706726, "learning_rate": 0.001, "loss": 2.0338, "step": 205408 }, { "epoch": 17.72463768115942, "grad_norm": 1.8464019298553467, "learning_rate": 0.001, "loss": 2.0492, "step": 205464 }, { "epoch": 17.729468599033815, "grad_norm": 1.2762267589569092, "learning_rate": 0.001, "loss": 2.0539, "step": 205520 }, { "epoch": 17.734299516908212, "grad_norm": 0.6249876022338867, "learning_rate": 0.001, "loss": 2.0498, "step": 205576 }, { "epoch": 17.73913043478261, "grad_norm": 0.34604132175445557, "learning_rate": 0.001, "loss": 2.0406, "step": 205632 }, { "epoch": 17.743961352657006, "grad_norm": 1.9437006711959839, "learning_rate": 0.001, "loss": 2.0226, "step": 205688 }, { "epoch": 17.7487922705314, "grad_norm": 0.4825698733329773, "learning_rate": 0.001, "loss": 2.0242, "step": 205744 }, { "epoch": 17.753623188405797, "grad_norm": 0.6121832132339478, "learning_rate": 0.001, "loss": 2.0159, "step": 205800 }, { "epoch": 17.758454106280194, "grad_norm": 0.47594112157821655, "learning_rate": 0.001, "loss": 2.0203, "step": 205856 }, { "epoch": 17.76328502415459, "grad_norm": 1.5213972330093384, "learning_rate": 0.001, "loss": 2.0249, "step": 205912 }, { "epoch": 17.768115942028984, "grad_norm": 0.8917885422706604, "learning_rate": 0.001, "loss": 2.0404, "step": 205968 }, { "epoch": 17.77294685990338, "grad_norm": 0.6616286635398865, "learning_rate": 0.001, "loss": 2.0243, "step": 206024 }, { "epoch": 17.77777777777778, "grad_norm": 0.988978385925293, "learning_rate": 0.001, "loss": 2.0111, "step": 206080 }, { "epoch": 17.782608695652176, "grad_norm": 0.8258740901947021, "learning_rate": 0.001, "loss": 2.0228, "step": 206136 }, { "epoch": 17.78743961352657, "grad_norm": 0.9602023363113403, "learning_rate": 0.001, "loss": 2.017, "step": 206192 }, { "epoch": 17.792270531400966, "grad_norm": 1.483341097831726, "learning_rate": 0.001, "loss": 2.0096, "step": 206248 }, { "epoch": 17.797101449275363, "grad_norm": 0.8836691379547119, "learning_rate": 0.001, "loss": 2.0137, "step": 206304 }, { "epoch": 17.80193236714976, "grad_norm": 1.424958348274231, "learning_rate": 0.001, "loss": 2.0146, "step": 206360 }, { "epoch": 17.806763285024154, "grad_norm": 0.7646310329437256, "learning_rate": 0.001, "loss": 2.011, "step": 206416 }, { "epoch": 17.81159420289855, "grad_norm": 1.6540539264678955, "learning_rate": 0.001, "loss": 1.9993, "step": 206472 }, { "epoch": 17.816425120772948, "grad_norm": 0.627555787563324, "learning_rate": 0.001, "loss": 2.0174, "step": 206528 }, { "epoch": 17.82125603864734, "grad_norm": 0.8958575129508972, "learning_rate": 0.001, "loss": 2.0174, "step": 206584 }, { "epoch": 17.82608695652174, "grad_norm": 1.3169989585876465, "learning_rate": 0.001, "loss": 2.0358, "step": 206640 }, { "epoch": 17.830917874396135, "grad_norm": 0.43286895751953125, "learning_rate": 0.001, "loss": 2.041, "step": 206696 }, { "epoch": 17.835748792270532, "grad_norm": 0.7443773150444031, "learning_rate": 0.001, "loss": 2.0299, "step": 206752 }, { "epoch": 17.840579710144926, "grad_norm": 1.0382713079452515, "learning_rate": 0.001, "loss": 2.0287, "step": 206808 }, { "epoch": 17.845410628019323, "grad_norm": 1.7661292552947998, "learning_rate": 0.001, "loss": 2.0273, "step": 206864 }, { "epoch": 17.85024154589372, "grad_norm": 1.2433719635009766, "learning_rate": 0.001, "loss": 2.0492, "step": 206920 }, { "epoch": 17.855072463768117, "grad_norm": 1.3783491849899292, "learning_rate": 0.001, "loss": 2.0384, "step": 206976 }, { "epoch": 17.85990338164251, "grad_norm": 0.6853235363960266, "learning_rate": 0.001, "loss": 2.0362, "step": 207032 }, { "epoch": 17.864734299516908, "grad_norm": 1.6107745170593262, "learning_rate": 0.001, "loss": 2.0256, "step": 207088 }, { "epoch": 17.869565217391305, "grad_norm": 1.0396264791488647, "learning_rate": 0.001, "loss": 2.0302, "step": 207144 }, { "epoch": 17.8743961352657, "grad_norm": 0.6649816036224365, "learning_rate": 0.001, "loss": 2.0309, "step": 207200 }, { "epoch": 17.879227053140095, "grad_norm": 1.2221636772155762, "learning_rate": 0.001, "loss": 2.0258, "step": 207256 }, { "epoch": 17.884057971014492, "grad_norm": 12.234846115112305, "learning_rate": 0.001, "loss": 2.0332, "step": 207312 }, { "epoch": 17.88888888888889, "grad_norm": 1.5809814929962158, "learning_rate": 0.001, "loss": 2.0522, "step": 207368 }, { "epoch": 17.893719806763286, "grad_norm": 0.5922200083732605, "learning_rate": 0.001, "loss": 2.0467, "step": 207424 }, { "epoch": 17.89855072463768, "grad_norm": 0.7181801199913025, "learning_rate": 0.001, "loss": 2.0437, "step": 207480 }, { "epoch": 17.903381642512077, "grad_norm": 1.1437773704528809, "learning_rate": 0.001, "loss": 2.0339, "step": 207536 }, { "epoch": 17.908212560386474, "grad_norm": 2.955794334411621, "learning_rate": 0.001, "loss": 2.0262, "step": 207592 }, { "epoch": 17.91304347826087, "grad_norm": 3.1599080562591553, "learning_rate": 0.001, "loss": 2.026, "step": 207648 }, { "epoch": 17.917874396135264, "grad_norm": 0.8696035742759705, "learning_rate": 0.001, "loss": 2.0407, "step": 207704 }, { "epoch": 17.92270531400966, "grad_norm": 1.1342369318008423, "learning_rate": 0.001, "loss": 2.0462, "step": 207760 }, { "epoch": 17.92753623188406, "grad_norm": 0.48327529430389404, "learning_rate": 0.001, "loss": 2.0398, "step": 207816 }, { "epoch": 17.932367149758456, "grad_norm": 1.3128464221954346, "learning_rate": 0.001, "loss": 2.0413, "step": 207872 }, { "epoch": 17.93719806763285, "grad_norm": 0.7495733499526978, "learning_rate": 0.001, "loss": 2.0379, "step": 207928 }, { "epoch": 17.942028985507246, "grad_norm": 1.0244556665420532, "learning_rate": 0.001, "loss": 2.0277, "step": 207984 }, { "epoch": 17.946859903381643, "grad_norm": 1.3469117879867554, "learning_rate": 0.001, "loss": 2.0323, "step": 208040 }, { "epoch": 17.95169082125604, "grad_norm": 0.9481945633888245, "learning_rate": 0.001, "loss": 2.0272, "step": 208096 }, { "epoch": 17.956521739130434, "grad_norm": 1.437888503074646, "learning_rate": 0.001, "loss": 2.0422, "step": 208152 }, { "epoch": 17.96135265700483, "grad_norm": 0.5852994322776794, "learning_rate": 0.001, "loss": 2.0411, "step": 208208 }, { "epoch": 17.966183574879228, "grad_norm": 0.6143671870231628, "learning_rate": 0.001, "loss": 2.0517, "step": 208264 }, { "epoch": 17.971014492753625, "grad_norm": 1.1441049575805664, "learning_rate": 0.001, "loss": 2.044, "step": 208320 }, { "epoch": 17.97584541062802, "grad_norm": 1.3598695993423462, "learning_rate": 0.001, "loss": 2.0292, "step": 208376 }, { "epoch": 17.980676328502415, "grad_norm": 0.8377267718315125, "learning_rate": 0.001, "loss": 2.0283, "step": 208432 }, { "epoch": 17.985507246376812, "grad_norm": 0.9452416896820068, "learning_rate": 0.001, "loss": 2.0371, "step": 208488 }, { "epoch": 17.990338164251206, "grad_norm": 1.0680909156799316, "learning_rate": 0.001, "loss": 2.0314, "step": 208544 }, { "epoch": 17.995169082125603, "grad_norm": 0.7076923251152039, "learning_rate": 0.001, "loss": 2.0214, "step": 208600 }, { "epoch": 18.0, "grad_norm": 2.3420917987823486, "learning_rate": 0.001, "loss": 2.0244, "step": 208656 }, { "epoch": 18.004830917874397, "grad_norm": 0.6423053741455078, "learning_rate": 0.001, "loss": 2.0009, "step": 208712 }, { "epoch": 18.00966183574879, "grad_norm": 0.608394980430603, "learning_rate": 0.001, "loss": 1.9884, "step": 208768 }, { "epoch": 18.014492753623188, "grad_norm": 0.6217007040977478, "learning_rate": 0.001, "loss": 1.9875, "step": 208824 }, { "epoch": 18.019323671497585, "grad_norm": 3.0535802841186523, "learning_rate": 0.001, "loss": 1.9889, "step": 208880 }, { "epoch": 18.02415458937198, "grad_norm": 9.73165512084961, "learning_rate": 0.001, "loss": 1.9903, "step": 208936 }, { "epoch": 18.028985507246375, "grad_norm": 0.5923244953155518, "learning_rate": 0.001, "loss": 1.9967, "step": 208992 }, { "epoch": 18.033816425120772, "grad_norm": 0.8756278157234192, "learning_rate": 0.001, "loss": 2.0016, "step": 209048 }, { "epoch": 18.03864734299517, "grad_norm": 0.6837737560272217, "learning_rate": 0.001, "loss": 1.9924, "step": 209104 }, { "epoch": 18.043478260869566, "grad_norm": 1.5591719150543213, "learning_rate": 0.001, "loss": 1.9897, "step": 209160 }, { "epoch": 18.04830917874396, "grad_norm": 1.744718313217163, "learning_rate": 0.001, "loss": 1.9936, "step": 209216 }, { "epoch": 18.053140096618357, "grad_norm": 0.7881166934967041, "learning_rate": 0.001, "loss": 2.0045, "step": 209272 }, { "epoch": 18.057971014492754, "grad_norm": 1.719242811203003, "learning_rate": 0.001, "loss": 2.0106, "step": 209328 }, { "epoch": 18.06280193236715, "grad_norm": 3.994037628173828, "learning_rate": 0.001, "loss": 2.02, "step": 209384 }, { "epoch": 18.067632850241544, "grad_norm": 4.949288845062256, "learning_rate": 0.001, "loss": 2.0138, "step": 209440 }, { "epoch": 18.07246376811594, "grad_norm": 0.6637479066848755, "learning_rate": 0.001, "loss": 2.0225, "step": 209496 }, { "epoch": 18.07729468599034, "grad_norm": 0.434272825717926, "learning_rate": 0.001, "loss": 2.0164, "step": 209552 }, { "epoch": 18.082125603864736, "grad_norm": 1.797939658164978, "learning_rate": 0.001, "loss": 2.0099, "step": 209608 }, { "epoch": 18.08695652173913, "grad_norm": 0.5884961485862732, "learning_rate": 0.001, "loss": 2.0063, "step": 209664 }, { "epoch": 18.091787439613526, "grad_norm": 7.537419319152832, "learning_rate": 0.001, "loss": 2.0075, "step": 209720 }, { "epoch": 18.096618357487923, "grad_norm": 6.30743408203125, "learning_rate": 0.001, "loss": 2.0013, "step": 209776 }, { "epoch": 18.10144927536232, "grad_norm": 0.9096853137016296, "learning_rate": 0.001, "loss": 2.0143, "step": 209832 }, { "epoch": 18.106280193236714, "grad_norm": 1.7086690664291382, "learning_rate": 0.001, "loss": 1.9956, "step": 209888 }, { "epoch": 18.11111111111111, "grad_norm": 1.3940095901489258, "learning_rate": 0.001, "loss": 1.9906, "step": 209944 }, { "epoch": 18.115942028985508, "grad_norm": 0.5439285039901733, "learning_rate": 0.001, "loss": 1.9706, "step": 210000 }, { "epoch": 18.120772946859905, "grad_norm": 0.6971027255058289, "learning_rate": 0.001, "loss": 1.9933, "step": 210056 }, { "epoch": 18.1256038647343, "grad_norm": 0.44979310035705566, "learning_rate": 0.001, "loss": 1.9914, "step": 210112 }, { "epoch": 18.130434782608695, "grad_norm": 4.765186309814453, "learning_rate": 0.001, "loss": 1.9958, "step": 210168 }, { "epoch": 18.135265700483092, "grad_norm": 0.5212612152099609, "learning_rate": 0.001, "loss": 1.9854, "step": 210224 }, { "epoch": 18.14009661835749, "grad_norm": 0.9277397990226746, "learning_rate": 0.001, "loss": 1.9877, "step": 210280 }, { "epoch": 18.144927536231883, "grad_norm": 4.402560710906982, "learning_rate": 0.001, "loss": 1.996, "step": 210336 }, { "epoch": 18.14975845410628, "grad_norm": 1.205804705619812, "learning_rate": 0.001, "loss": 1.9919, "step": 210392 }, { "epoch": 18.154589371980677, "grad_norm": 0.6661087274551392, "learning_rate": 0.001, "loss": 1.9859, "step": 210448 }, { "epoch": 18.159420289855074, "grad_norm": 1.2227925062179565, "learning_rate": 0.001, "loss": 1.9809, "step": 210504 }, { "epoch": 18.164251207729468, "grad_norm": 0.5242561101913452, "learning_rate": 0.001, "loss": 1.9968, "step": 210560 }, { "epoch": 18.169082125603865, "grad_norm": 2.146090030670166, "learning_rate": 0.001, "loss": 2.0018, "step": 210616 }, { "epoch": 18.17391304347826, "grad_norm": 11.038692474365234, "learning_rate": 0.001, "loss": 2.0037, "step": 210672 }, { "epoch": 18.17874396135266, "grad_norm": 0.8741036653518677, "learning_rate": 0.001, "loss": 1.9969, "step": 210728 }, { "epoch": 18.183574879227052, "grad_norm": 0.8639529347419739, "learning_rate": 0.001, "loss": 1.9918, "step": 210784 }, { "epoch": 18.18840579710145, "grad_norm": 1.4663217067718506, "learning_rate": 0.001, "loss": 1.9952, "step": 210840 }, { "epoch": 18.193236714975846, "grad_norm": 0.664835512638092, "learning_rate": 0.001, "loss": 1.9997, "step": 210896 }, { "epoch": 18.19806763285024, "grad_norm": 0.5348080396652222, "learning_rate": 0.001, "loss": 2.0063, "step": 210952 }, { "epoch": 18.202898550724637, "grad_norm": 1.5506716966629028, "learning_rate": 0.001, "loss": 2.0106, "step": 211008 }, { "epoch": 18.207729468599034, "grad_norm": 0.4524490237236023, "learning_rate": 0.001, "loss": 2.0049, "step": 211064 }, { "epoch": 18.21256038647343, "grad_norm": 3.699216365814209, "learning_rate": 0.001, "loss": 2.0165, "step": 211120 }, { "epoch": 18.217391304347824, "grad_norm": 2.090256452560425, "learning_rate": 0.001, "loss": 2.0055, "step": 211176 }, { "epoch": 18.22222222222222, "grad_norm": 0.47229790687561035, "learning_rate": 0.001, "loss": 2.0047, "step": 211232 }, { "epoch": 18.22705314009662, "grad_norm": 0.9265060424804688, "learning_rate": 0.001, "loss": 1.9924, "step": 211288 }, { "epoch": 18.231884057971016, "grad_norm": 1.0612058639526367, "learning_rate": 0.001, "loss": 1.9908, "step": 211344 }, { "epoch": 18.23671497584541, "grad_norm": 0.5027437806129456, "learning_rate": 0.001, "loss": 1.9897, "step": 211400 }, { "epoch": 18.241545893719806, "grad_norm": 1.002432942390442, "learning_rate": 0.001, "loss": 1.9947, "step": 211456 }, { "epoch": 18.246376811594203, "grad_norm": 0.40407782793045044, "learning_rate": 0.001, "loss": 1.9804, "step": 211512 }, { "epoch": 18.2512077294686, "grad_norm": 0.5669754147529602, "learning_rate": 0.001, "loss": 1.9788, "step": 211568 }, { "epoch": 18.256038647342994, "grad_norm": 0.5416087508201599, "learning_rate": 0.001, "loss": 1.9846, "step": 211624 }, { "epoch": 18.26086956521739, "grad_norm": 0.5927562117576599, "learning_rate": 0.001, "loss": 1.9898, "step": 211680 }, { "epoch": 18.265700483091788, "grad_norm": 1.023227334022522, "learning_rate": 0.001, "loss": 1.9987, "step": 211736 }, { "epoch": 18.270531400966185, "grad_norm": 0.4366677701473236, "learning_rate": 0.001, "loss": 2.0027, "step": 211792 }, { "epoch": 18.27536231884058, "grad_norm": 1.4531903266906738, "learning_rate": 0.001, "loss": 1.9961, "step": 211848 }, { "epoch": 18.280193236714975, "grad_norm": 1.572837233543396, "learning_rate": 0.001, "loss": 1.987, "step": 211904 }, { "epoch": 18.285024154589372, "grad_norm": 0.632854700088501, "learning_rate": 0.001, "loss": 1.9851, "step": 211960 }, { "epoch": 18.28985507246377, "grad_norm": 0.6068235635757446, "learning_rate": 0.001, "loss": 1.9804, "step": 212016 }, { "epoch": 18.294685990338163, "grad_norm": 0.43270981311798096, "learning_rate": 0.001, "loss": 1.99, "step": 212072 }, { "epoch": 18.29951690821256, "grad_norm": 0.9107401371002197, "learning_rate": 0.001, "loss": 1.9869, "step": 212128 }, { "epoch": 18.304347826086957, "grad_norm": 1.099626898765564, "learning_rate": 0.001, "loss": 1.9835, "step": 212184 }, { "epoch": 18.309178743961354, "grad_norm": 0.3801335394382477, "learning_rate": 0.001, "loss": 2.0022, "step": 212240 }, { "epoch": 18.314009661835748, "grad_norm": 0.41175681352615356, "learning_rate": 0.001, "loss": 1.9892, "step": 212296 }, { "epoch": 18.318840579710145, "grad_norm": 0.3538585305213928, "learning_rate": 0.001, "loss": 1.9837, "step": 212352 }, { "epoch": 18.32367149758454, "grad_norm": 0.800585150718689, "learning_rate": 0.001, "loss": 1.9781, "step": 212408 }, { "epoch": 18.32850241545894, "grad_norm": 0.6442814469337463, "learning_rate": 0.001, "loss": 1.9916, "step": 212464 }, { "epoch": 18.333333333333332, "grad_norm": 0.3548004925251007, "learning_rate": 0.001, "loss": 1.9816, "step": 212520 }, { "epoch": 18.33816425120773, "grad_norm": 0.3411487340927124, "learning_rate": 0.001, "loss": 1.9878, "step": 212576 }, { "epoch": 18.342995169082126, "grad_norm": 0.3141336441040039, "learning_rate": 0.001, "loss": 1.9908, "step": 212632 }, { "epoch": 18.347826086956523, "grad_norm": 0.3833332657814026, "learning_rate": 0.001, "loss": 1.9755, "step": 212688 }, { "epoch": 18.352657004830917, "grad_norm": 0.4970197081565857, "learning_rate": 0.001, "loss": 1.9793, "step": 212744 }, { "epoch": 18.357487922705314, "grad_norm": 0.47132453322410583, "learning_rate": 0.001, "loss": 1.9706, "step": 212800 }, { "epoch": 18.36231884057971, "grad_norm": 0.47788524627685547, "learning_rate": 0.001, "loss": 1.9742, "step": 212856 }, { "epoch": 18.367149758454108, "grad_norm": 0.5253958702087402, "learning_rate": 0.001, "loss": 1.9786, "step": 212912 }, { "epoch": 18.3719806763285, "grad_norm": 0.6821537613868713, "learning_rate": 0.001, "loss": 1.9747, "step": 212968 }, { "epoch": 18.3768115942029, "grad_norm": 0.5410891175270081, "learning_rate": 0.001, "loss": 1.9659, "step": 213024 }, { "epoch": 18.381642512077295, "grad_norm": 1.0737476348876953, "learning_rate": 0.001, "loss": 1.9721, "step": 213080 }, { "epoch": 18.386473429951693, "grad_norm": 5.27506160736084, "learning_rate": 0.001, "loss": 1.9784, "step": 213136 }, { "epoch": 18.391304347826086, "grad_norm": 1.2847074270248413, "learning_rate": 0.001, "loss": 1.975, "step": 213192 }, { "epoch": 18.396135265700483, "grad_norm": 1.0320801734924316, "learning_rate": 0.001, "loss": 1.9833, "step": 213248 }, { "epoch": 18.40096618357488, "grad_norm": 1.0841103792190552, "learning_rate": 0.001, "loss": 1.9793, "step": 213304 }, { "epoch": 18.405797101449274, "grad_norm": 0.36594316363334656, "learning_rate": 0.001, "loss": 1.976, "step": 213360 }, { "epoch": 18.41062801932367, "grad_norm": 0.32960349321365356, "learning_rate": 0.001, "loss": 1.9807, "step": 213416 }, { "epoch": 18.415458937198068, "grad_norm": 3.309676170349121, "learning_rate": 0.001, "loss": 1.98, "step": 213472 }, { "epoch": 18.420289855072465, "grad_norm": 0.3207552134990692, "learning_rate": 0.001, "loss": 1.9925, "step": 213528 }, { "epoch": 18.42512077294686, "grad_norm": 0.5544682741165161, "learning_rate": 0.001, "loss": 1.9801, "step": 213584 }, { "epoch": 18.429951690821255, "grad_norm": 0.3301583230495453, "learning_rate": 0.001, "loss": 1.9813, "step": 213640 }, { "epoch": 18.434782608695652, "grad_norm": 0.29986175894737244, "learning_rate": 0.001, "loss": 1.9918, "step": 213696 }, { "epoch": 18.43961352657005, "grad_norm": 1.6060774326324463, "learning_rate": 0.001, "loss": 1.9851, "step": 213752 }, { "epoch": 18.444444444444443, "grad_norm": 0.30713963508605957, "learning_rate": 0.001, "loss": 1.9937, "step": 213808 }, { "epoch": 18.44927536231884, "grad_norm": 0.3522479236125946, "learning_rate": 0.001, "loss": 1.9925, "step": 213864 }, { "epoch": 18.454106280193237, "grad_norm": 0.8559319376945496, "learning_rate": 0.001, "loss": 1.9847, "step": 213920 }, { "epoch": 18.458937198067634, "grad_norm": 1.3294062614440918, "learning_rate": 0.001, "loss": 1.9785, "step": 213976 }, { "epoch": 18.463768115942027, "grad_norm": 0.6562640070915222, "learning_rate": 0.001, "loss": 1.9873, "step": 214032 }, { "epoch": 18.468599033816425, "grad_norm": 0.3295597732067108, "learning_rate": 0.001, "loss": 1.9839, "step": 214088 }, { "epoch": 18.47342995169082, "grad_norm": 0.5714313983917236, "learning_rate": 0.001, "loss": 1.9792, "step": 214144 }, { "epoch": 18.47826086956522, "grad_norm": 0.48473870754241943, "learning_rate": 0.001, "loss": 1.9837, "step": 214200 }, { "epoch": 18.483091787439612, "grad_norm": 0.3419836163520813, "learning_rate": 0.001, "loss": 1.9836, "step": 214256 }, { "epoch": 18.48792270531401, "grad_norm": 0.3923450708389282, "learning_rate": 0.001, "loss": 1.9809, "step": 214312 }, { "epoch": 18.492753623188406, "grad_norm": 0.30418074131011963, "learning_rate": 0.001, "loss": 1.973, "step": 214368 }, { "epoch": 18.497584541062803, "grad_norm": 0.814293622970581, "learning_rate": 0.001, "loss": 1.9661, "step": 214424 }, { "epoch": 18.502415458937197, "grad_norm": 0.6015337109565735, "learning_rate": 0.001, "loss": 1.9937, "step": 214480 }, { "epoch": 18.507246376811594, "grad_norm": 2.4349822998046875, "learning_rate": 0.001, "loss": 2.0089, "step": 214536 }, { "epoch": 18.51207729468599, "grad_norm": 1.044708490371704, "learning_rate": 0.001, "loss": 2.0097, "step": 214592 }, { "epoch": 18.516908212560388, "grad_norm": 1.8226161003112793, "learning_rate": 0.001, "loss": 1.9983, "step": 214648 }, { "epoch": 18.52173913043478, "grad_norm": 0.6571477651596069, "learning_rate": 0.001, "loss": 1.991, "step": 214704 }, { "epoch": 18.52657004830918, "grad_norm": 1.1567578315734863, "learning_rate": 0.001, "loss": 2.011, "step": 214760 }, { "epoch": 18.531400966183575, "grad_norm": 2.9129598140716553, "learning_rate": 0.001, "loss": 2.0363, "step": 214816 }, { "epoch": 18.536231884057973, "grad_norm": 3.226778030395508, "learning_rate": 0.001, "loss": 2.0383, "step": 214872 }, { "epoch": 18.541062801932366, "grad_norm": 1.9838179349899292, "learning_rate": 0.001, "loss": 2.0341, "step": 214928 }, { "epoch": 18.545893719806763, "grad_norm": 2.919928550720215, "learning_rate": 0.001, "loss": 2.0382, "step": 214984 }, { "epoch": 18.55072463768116, "grad_norm": 1.4649657011032104, "learning_rate": 0.001, "loss": 2.0575, "step": 215040 }, { "epoch": 18.555555555555557, "grad_norm": 0.7409532070159912, "learning_rate": 0.001, "loss": 2.0387, "step": 215096 }, { "epoch": 18.56038647342995, "grad_norm": 0.48125436902046204, "learning_rate": 0.001, "loss": 2.0192, "step": 215152 }, { "epoch": 18.565217391304348, "grad_norm": 0.8807508945465088, "learning_rate": 0.001, "loss": 2.0104, "step": 215208 }, { "epoch": 18.570048309178745, "grad_norm": 0.9692062735557556, "learning_rate": 0.001, "loss": 2.0036, "step": 215264 }, { "epoch": 18.57487922705314, "grad_norm": 2.3533763885498047, "learning_rate": 0.001, "loss": 2.0039, "step": 215320 }, { "epoch": 18.579710144927535, "grad_norm": 0.8640643954277039, "learning_rate": 0.001, "loss": 1.9904, "step": 215376 }, { "epoch": 18.584541062801932, "grad_norm": 1.8725852966308594, "learning_rate": 0.001, "loss": 1.9856, "step": 215432 }, { "epoch": 18.58937198067633, "grad_norm": 1.1481963396072388, "learning_rate": 0.001, "loss": 1.9968, "step": 215488 }, { "epoch": 18.594202898550726, "grad_norm": 0.7930685877799988, "learning_rate": 0.001, "loss": 1.9909, "step": 215544 }, { "epoch": 18.59903381642512, "grad_norm": 1.2087026834487915, "learning_rate": 0.001, "loss": 1.9948, "step": 215600 }, { "epoch": 18.603864734299517, "grad_norm": 2.5625784397125244, "learning_rate": 0.001, "loss": 2.0023, "step": 215656 }, { "epoch": 18.608695652173914, "grad_norm": 1.762405514717102, "learning_rate": 0.001, "loss": 2.0036, "step": 215712 }, { "epoch": 18.613526570048307, "grad_norm": 1.0530073642730713, "learning_rate": 0.001, "loss": 2.0122, "step": 215768 }, { "epoch": 18.618357487922705, "grad_norm": 0.4137307107448578, "learning_rate": 0.001, "loss": 2.0064, "step": 215824 }, { "epoch": 18.6231884057971, "grad_norm": 0.5080470442771912, "learning_rate": 0.001, "loss": 2.0149, "step": 215880 }, { "epoch": 18.6280193236715, "grad_norm": 1.3162708282470703, "learning_rate": 0.001, "loss": 2.0165, "step": 215936 }, { "epoch": 18.632850241545892, "grad_norm": 1.289952278137207, "learning_rate": 0.001, "loss": 2.0119, "step": 215992 }, { "epoch": 18.63768115942029, "grad_norm": 0.5914528965950012, "learning_rate": 0.001, "loss": 2.0223, "step": 216048 }, { "epoch": 18.642512077294686, "grad_norm": 0.5511854887008667, "learning_rate": 0.001, "loss": 2.0098, "step": 216104 }, { "epoch": 18.647342995169083, "grad_norm": 0.6083223819732666, "learning_rate": 0.001, "loss": 2.0021, "step": 216160 }, { "epoch": 18.652173913043477, "grad_norm": 3.420048475265503, "learning_rate": 0.001, "loss": 2.0013, "step": 216216 }, { "epoch": 18.657004830917874, "grad_norm": 0.49880850315093994, "learning_rate": 0.001, "loss": 2.0075, "step": 216272 }, { "epoch": 18.66183574879227, "grad_norm": 0.8125979900360107, "learning_rate": 0.001, "loss": 1.9906, "step": 216328 }, { "epoch": 18.666666666666668, "grad_norm": 0.44447895884513855, "learning_rate": 0.001, "loss": 1.9937, "step": 216384 }, { "epoch": 18.67149758454106, "grad_norm": 0.40350720286369324, "learning_rate": 0.001, "loss": 1.9888, "step": 216440 }, { "epoch": 18.67632850241546, "grad_norm": 0.4602723717689514, "learning_rate": 0.001, "loss": 1.9904, "step": 216496 }, { "epoch": 18.681159420289855, "grad_norm": 2.628710985183716, "learning_rate": 0.001, "loss": 1.9891, "step": 216552 }, { "epoch": 18.685990338164252, "grad_norm": 0.44171974062919617, "learning_rate": 0.001, "loss": 1.9961, "step": 216608 }, { "epoch": 18.690821256038646, "grad_norm": 0.3909667432308197, "learning_rate": 0.001, "loss": 1.9891, "step": 216664 }, { "epoch": 18.695652173913043, "grad_norm": 0.6817483901977539, "learning_rate": 0.001, "loss": 1.9829, "step": 216720 }, { "epoch": 18.70048309178744, "grad_norm": 0.40724992752075195, "learning_rate": 0.001, "loss": 1.9892, "step": 216776 }, { "epoch": 18.705314009661837, "grad_norm": 0.5674595236778259, "learning_rate": 0.001, "loss": 1.985, "step": 216832 }, { "epoch": 18.71014492753623, "grad_norm": 0.7696330547332764, "learning_rate": 0.001, "loss": 1.994, "step": 216888 }, { "epoch": 18.714975845410628, "grad_norm": 0.3742004334926605, "learning_rate": 0.001, "loss": 1.9831, "step": 216944 }, { "epoch": 18.719806763285025, "grad_norm": 0.936351478099823, "learning_rate": 0.001, "loss": 1.9892, "step": 217000 }, { "epoch": 18.72463768115942, "grad_norm": 0.37267982959747314, "learning_rate": 0.001, "loss": 1.9833, "step": 217056 }, { "epoch": 18.729468599033815, "grad_norm": 2.975992441177368, "learning_rate": 0.001, "loss": 1.9766, "step": 217112 }, { "epoch": 18.734299516908212, "grad_norm": 1.151590347290039, "learning_rate": 0.001, "loss": 1.9813, "step": 217168 }, { "epoch": 18.73913043478261, "grad_norm": 1.8585960865020752, "learning_rate": 0.001, "loss": 1.9814, "step": 217224 }, { "epoch": 18.743961352657006, "grad_norm": 8.291177749633789, "learning_rate": 0.001, "loss": 1.9981, "step": 217280 }, { "epoch": 18.7487922705314, "grad_norm": 2.0655674934387207, "learning_rate": 0.001, "loss": 1.9948, "step": 217336 }, { "epoch": 18.753623188405797, "grad_norm": 1.350785493850708, "learning_rate": 0.001, "loss": 2.0186, "step": 217392 }, { "epoch": 18.758454106280194, "grad_norm": 1.0436420440673828, "learning_rate": 0.001, "loss": 2.0351, "step": 217448 }, { "epoch": 18.76328502415459, "grad_norm": 2.8785197734832764, "learning_rate": 0.001, "loss": 2.0694, "step": 217504 }, { "epoch": 18.768115942028984, "grad_norm": 1.2601258754730225, "learning_rate": 0.001, "loss": 2.0833, "step": 217560 }, { "epoch": 18.77294685990338, "grad_norm": 1.5567734241485596, "learning_rate": 0.001, "loss": 2.0833, "step": 217616 }, { "epoch": 18.77777777777778, "grad_norm": 0.930168628692627, "learning_rate": 0.001, "loss": 2.061, "step": 217672 }, { "epoch": 18.782608695652176, "grad_norm": 2.3632185459136963, "learning_rate": 0.001, "loss": 2.0468, "step": 217728 }, { "epoch": 18.78743961352657, "grad_norm": 1.0484144687652588, "learning_rate": 0.001, "loss": 2.0458, "step": 217784 }, { "epoch": 18.792270531400966, "grad_norm": 0.4987468719482422, "learning_rate": 0.001, "loss": 2.0512, "step": 217840 }, { "epoch": 18.797101449275363, "grad_norm": 1.6167337894439697, "learning_rate": 0.001, "loss": 2.0459, "step": 217896 }, { "epoch": 18.80193236714976, "grad_norm": 0.5412013530731201, "learning_rate": 0.001, "loss": 2.0521, "step": 217952 }, { "epoch": 18.806763285024154, "grad_norm": 1.7752583026885986, "learning_rate": 0.001, "loss": 2.0304, "step": 218008 }, { "epoch": 18.81159420289855, "grad_norm": 0.9795591831207275, "learning_rate": 0.001, "loss": 2.0247, "step": 218064 }, { "epoch": 18.816425120772948, "grad_norm": 1.9375684261322021, "learning_rate": 0.001, "loss": 2.0383, "step": 218120 }, { "epoch": 18.82125603864734, "grad_norm": 10.737184524536133, "learning_rate": 0.001, "loss": 2.0358, "step": 218176 }, { "epoch": 18.82608695652174, "grad_norm": 0.7466415762901306, "learning_rate": 0.001, "loss": 2.0255, "step": 218232 }, { "epoch": 18.830917874396135, "grad_norm": 2.184471845626831, "learning_rate": 0.001, "loss": 2.0099, "step": 218288 }, { "epoch": 18.835748792270532, "grad_norm": 0.9999092817306519, "learning_rate": 0.001, "loss": 2.0176, "step": 218344 }, { "epoch": 18.840579710144926, "grad_norm": 1.2198060750961304, "learning_rate": 0.001, "loss": 2.0186, "step": 218400 }, { "epoch": 18.845410628019323, "grad_norm": 0.639378011226654, "learning_rate": 0.001, "loss": 2.0248, "step": 218456 }, { "epoch": 18.85024154589372, "grad_norm": 1.5673160552978516, "learning_rate": 0.001, "loss": 2.0217, "step": 218512 }, { "epoch": 18.855072463768117, "grad_norm": 0.9663440585136414, "learning_rate": 0.001, "loss": 2.0384, "step": 218568 }, { "epoch": 18.85990338164251, "grad_norm": 1.1632825136184692, "learning_rate": 0.001, "loss": 2.0388, "step": 218624 }, { "epoch": 18.864734299516908, "grad_norm": 0.5182122588157654, "learning_rate": 0.001, "loss": 2.0331, "step": 218680 }, { "epoch": 18.869565217391305, "grad_norm": 1.1156498193740845, "learning_rate": 0.001, "loss": 2.0213, "step": 218736 }, { "epoch": 18.8743961352657, "grad_norm": 1.1048035621643066, "learning_rate": 0.001, "loss": 2.0165, "step": 218792 }, { "epoch": 18.879227053140095, "grad_norm": 2.802283525466919, "learning_rate": 0.001, "loss": 2.0173, "step": 218848 }, { "epoch": 18.884057971014492, "grad_norm": 2.455322027206421, "learning_rate": 0.001, "loss": 2.0135, "step": 218904 }, { "epoch": 18.88888888888889, "grad_norm": 1.1782455444335938, "learning_rate": 0.001, "loss": 2.0298, "step": 218960 }, { "epoch": 18.893719806763286, "grad_norm": 0.7688833475112915, "learning_rate": 0.001, "loss": 2.0232, "step": 219016 }, { "epoch": 18.89855072463768, "grad_norm": 1.4346656799316406, "learning_rate": 0.001, "loss": 2.032, "step": 219072 }, { "epoch": 18.903381642512077, "grad_norm": 1.6698538064956665, "learning_rate": 0.001, "loss": 2.0304, "step": 219128 }, { "epoch": 18.908212560386474, "grad_norm": 1.014288306236267, "learning_rate": 0.001, "loss": 2.0281, "step": 219184 }, { "epoch": 18.91304347826087, "grad_norm": 0.6735665202140808, "learning_rate": 0.001, "loss": 2.0311, "step": 219240 }, { "epoch": 18.917874396135264, "grad_norm": 2.1199779510498047, "learning_rate": 0.001, "loss": 2.0265, "step": 219296 }, { "epoch": 18.92270531400966, "grad_norm": 0.9367926120758057, "learning_rate": 0.001, "loss": 2.0268, "step": 219352 }, { "epoch": 18.92753623188406, "grad_norm": 1.0031538009643555, "learning_rate": 0.001, "loss": 2.0132, "step": 219408 }, { "epoch": 18.932367149758456, "grad_norm": 1.0376001596450806, "learning_rate": 0.001, "loss": 2.0098, "step": 219464 }, { "epoch": 18.93719806763285, "grad_norm": 2.2143495082855225, "learning_rate": 0.001, "loss": 2.0227, "step": 219520 }, { "epoch": 18.942028985507246, "grad_norm": 3.0990049839019775, "learning_rate": 0.001, "loss": 2.0217, "step": 219576 }, { "epoch": 18.946859903381643, "grad_norm": 1.3573864698410034, "learning_rate": 0.001, "loss": 2.0215, "step": 219632 }, { "epoch": 18.95169082125604, "grad_norm": 4.626236438751221, "learning_rate": 0.001, "loss": 2.0355, "step": 219688 }, { "epoch": 18.956521739130434, "grad_norm": 1.6406254768371582, "learning_rate": 0.001, "loss": 2.0343, "step": 219744 }, { "epoch": 18.96135265700483, "grad_norm": 3.6270246505737305, "learning_rate": 0.001, "loss": 2.0283, "step": 219800 }, { "epoch": 18.966183574879228, "grad_norm": 1.5768861770629883, "learning_rate": 0.001, "loss": 2.0334, "step": 219856 }, { "epoch": 18.971014492753625, "grad_norm": 1.299428939819336, "learning_rate": 0.001, "loss": 2.0309, "step": 219912 }, { "epoch": 18.97584541062802, "grad_norm": 1.4760308265686035, "learning_rate": 0.001, "loss": 2.0302, "step": 219968 }, { "epoch": 18.980676328502415, "grad_norm": 2.3277783393859863, "learning_rate": 0.001, "loss": 2.0248, "step": 220024 }, { "epoch": 18.985507246376812, "grad_norm": 2.1314172744750977, "learning_rate": 0.001, "loss": 2.0104, "step": 220080 }, { "epoch": 18.990338164251206, "grad_norm": 0.6804816722869873, "learning_rate": 0.001, "loss": 2.0114, "step": 220136 }, { "epoch": 18.995169082125603, "grad_norm": 0.5855560302734375, "learning_rate": 0.001, "loss": 2.0091, "step": 220192 }, { "epoch": 19.0, "grad_norm": 0.9299209117889404, "learning_rate": 0.001, "loss": 2.0251, "step": 220248 }, { "epoch": 19.004830917874397, "grad_norm": 1.3240443468093872, "learning_rate": 0.001, "loss": 1.9814, "step": 220304 }, { "epoch": 19.00966183574879, "grad_norm": 1.9409055709838867, "learning_rate": 0.001, "loss": 1.9854, "step": 220360 }, { "epoch": 19.014492753623188, "grad_norm": 1.02378249168396, "learning_rate": 0.001, "loss": 1.9874, "step": 220416 }, { "epoch": 19.019323671497585, "grad_norm": 0.5449873805046082, "learning_rate": 0.001, "loss": 1.9794, "step": 220472 }, { "epoch": 19.02415458937198, "grad_norm": 0.6264786124229431, "learning_rate": 0.001, "loss": 1.9753, "step": 220528 }, { "epoch": 19.028985507246375, "grad_norm": 0.6267743110656738, "learning_rate": 0.001, "loss": 1.9767, "step": 220584 }, { "epoch": 19.033816425120772, "grad_norm": 0.39243680238723755, "learning_rate": 0.001, "loss": 1.9774, "step": 220640 }, { "epoch": 19.03864734299517, "grad_norm": 1.022716760635376, "learning_rate": 0.001, "loss": 1.9687, "step": 220696 }, { "epoch": 19.043478260869566, "grad_norm": 0.38020774722099304, "learning_rate": 0.001, "loss": 1.9724, "step": 220752 }, { "epoch": 19.04830917874396, "grad_norm": 1.5430448055267334, "learning_rate": 0.001, "loss": 1.9738, "step": 220808 }, { "epoch": 19.053140096618357, "grad_norm": 2.1182010173797607, "learning_rate": 0.001, "loss": 1.9766, "step": 220864 }, { "epoch": 19.057971014492754, "grad_norm": 0.8641088008880615, "learning_rate": 0.001, "loss": 1.9746, "step": 220920 }, { "epoch": 19.06280193236715, "grad_norm": 3.1122047901153564, "learning_rate": 0.001, "loss": 1.9763, "step": 220976 }, { "epoch": 19.067632850241544, "grad_norm": 0.40802857279777527, "learning_rate": 0.001, "loss": 1.971, "step": 221032 }, { "epoch": 19.07246376811594, "grad_norm": 2.81142258644104, "learning_rate": 0.001, "loss": 1.9685, "step": 221088 }, { "epoch": 19.07729468599034, "grad_norm": 1.2277580499649048, "learning_rate": 0.001, "loss": 1.9762, "step": 221144 }, { "epoch": 19.082125603864736, "grad_norm": 0.9606290459632874, "learning_rate": 0.001, "loss": 1.9672, "step": 221200 }, { "epoch": 19.08695652173913, "grad_norm": 0.7883738875389099, "learning_rate": 0.001, "loss": 1.9714, "step": 221256 }, { "epoch": 19.091787439613526, "grad_norm": 1.73380446434021, "learning_rate": 0.001, "loss": 1.9706, "step": 221312 }, { "epoch": 19.096618357487923, "grad_norm": 0.3786293566226959, "learning_rate": 0.001, "loss": 1.9771, "step": 221368 }, { "epoch": 19.10144927536232, "grad_norm": 1.057479739189148, "learning_rate": 0.001, "loss": 1.9771, "step": 221424 }, { "epoch": 19.106280193236714, "grad_norm": 0.7734542489051819, "learning_rate": 0.001, "loss": 1.9751, "step": 221480 }, { "epoch": 19.11111111111111, "grad_norm": 0.5324559211730957, "learning_rate": 0.001, "loss": 1.9838, "step": 221536 }, { "epoch": 19.115942028985508, "grad_norm": 1.2888987064361572, "learning_rate": 0.001, "loss": 1.9709, "step": 221592 }, { "epoch": 19.120772946859905, "grad_norm": 0.9148516654968262, "learning_rate": 0.001, "loss": 1.9747, "step": 221648 }, { "epoch": 19.1256038647343, "grad_norm": 0.7805019021034241, "learning_rate": 0.001, "loss": 1.9767, "step": 221704 }, { "epoch": 19.130434782608695, "grad_norm": 2.173671007156372, "learning_rate": 0.001, "loss": 1.9752, "step": 221760 }, { "epoch": 19.135265700483092, "grad_norm": 0.40187910199165344, "learning_rate": 0.001, "loss": 1.9741, "step": 221816 }, { "epoch": 19.14009661835749, "grad_norm": 1.976532220840454, "learning_rate": 0.001, "loss": 1.9676, "step": 221872 }, { "epoch": 19.144927536231883, "grad_norm": 0.28142642974853516, "learning_rate": 0.001, "loss": 1.9613, "step": 221928 }, { "epoch": 19.14975845410628, "grad_norm": 0.4090527594089508, "learning_rate": 0.001, "loss": 1.9603, "step": 221984 }, { "epoch": 19.154589371980677, "grad_norm": 1.0258517265319824, "learning_rate": 0.001, "loss": 1.9692, "step": 222040 }, { "epoch": 19.159420289855074, "grad_norm": 1.1290998458862305, "learning_rate": 0.001, "loss": 1.9646, "step": 222096 }, { "epoch": 19.164251207729468, "grad_norm": 0.6550341248512268, "learning_rate": 0.001, "loss": 1.964, "step": 222152 }, { "epoch": 19.169082125603865, "grad_norm": 0.31408998370170593, "learning_rate": 0.001, "loss": 1.9712, "step": 222208 }, { "epoch": 19.17391304347826, "grad_norm": 0.6046551465988159, "learning_rate": 0.001, "loss": 1.9812, "step": 222264 }, { "epoch": 19.17874396135266, "grad_norm": 0.9274047017097473, "learning_rate": 0.001, "loss": 1.9724, "step": 222320 }, { "epoch": 19.183574879227052, "grad_norm": 0.6885830760002136, "learning_rate": 0.001, "loss": 1.974, "step": 222376 }, { "epoch": 19.18840579710145, "grad_norm": 1.0630031824111938, "learning_rate": 0.001, "loss": 1.9751, "step": 222432 }, { "epoch": 19.193236714975846, "grad_norm": 0.6759706735610962, "learning_rate": 0.001, "loss": 1.9776, "step": 222488 }, { "epoch": 19.19806763285024, "grad_norm": 1.002660870552063, "learning_rate": 0.001, "loss": 1.9781, "step": 222544 }, { "epoch": 19.202898550724637, "grad_norm": 0.9634578227996826, "learning_rate": 0.001, "loss": 1.9772, "step": 222600 }, { "epoch": 19.207729468599034, "grad_norm": 0.9043963551521301, "learning_rate": 0.001, "loss": 1.9781, "step": 222656 }, { "epoch": 19.21256038647343, "grad_norm": 0.9274563193321228, "learning_rate": 0.001, "loss": 1.9903, "step": 222712 }, { "epoch": 19.217391304347824, "grad_norm": 0.5402976274490356, "learning_rate": 0.001, "loss": 1.9973, "step": 222768 }, { "epoch": 19.22222222222222, "grad_norm": 0.6415870785713196, "learning_rate": 0.001, "loss": 1.9761, "step": 222824 }, { "epoch": 19.22705314009662, "grad_norm": 0.6730472445487976, "learning_rate": 0.001, "loss": 1.9795, "step": 222880 }, { "epoch": 19.231884057971016, "grad_norm": 0.574005126953125, "learning_rate": 0.001, "loss": 1.9758, "step": 222936 }, { "epoch": 19.23671497584541, "grad_norm": 0.5289649367332458, "learning_rate": 0.001, "loss": 1.9769, "step": 222992 }, { "epoch": 19.241545893719806, "grad_norm": 0.5496167540550232, "learning_rate": 0.001, "loss": 1.9719, "step": 223048 }, { "epoch": 19.246376811594203, "grad_norm": 0.5212436318397522, "learning_rate": 0.001, "loss": 1.9758, "step": 223104 }, { "epoch": 19.2512077294686, "grad_norm": 0.7783918380737305, "learning_rate": 0.001, "loss": 1.9636, "step": 223160 }, { "epoch": 19.256038647342994, "grad_norm": 0.45680466294288635, "learning_rate": 0.001, "loss": 1.9701, "step": 223216 }, { "epoch": 19.26086956521739, "grad_norm": 0.4279637932777405, "learning_rate": 0.001, "loss": 1.9632, "step": 223272 }, { "epoch": 19.265700483091788, "grad_norm": 0.3881438672542572, "learning_rate": 0.001, "loss": 1.9787, "step": 223328 }, { "epoch": 19.270531400966185, "grad_norm": 4.5589799880981445, "learning_rate": 0.001, "loss": 1.9653, "step": 223384 }, { "epoch": 19.27536231884058, "grad_norm": 1.1751387119293213, "learning_rate": 0.001, "loss": 1.9645, "step": 223440 }, { "epoch": 19.280193236714975, "grad_norm": 0.8862648606300354, "learning_rate": 0.001, "loss": 1.9549, "step": 223496 }, { "epoch": 19.285024154589372, "grad_norm": 0.8736650347709656, "learning_rate": 0.001, "loss": 1.9618, "step": 223552 }, { "epoch": 19.28985507246377, "grad_norm": 1.2138983011245728, "learning_rate": 0.001, "loss": 1.9639, "step": 223608 }, { "epoch": 19.294685990338163, "grad_norm": 1.3407403230667114, "learning_rate": 0.001, "loss": 1.9616, "step": 223664 }, { "epoch": 19.29951690821256, "grad_norm": 1.4980202913284302, "learning_rate": 0.001, "loss": 1.9703, "step": 223720 }, { "epoch": 19.304347826086957, "grad_norm": 1.0528651475906372, "learning_rate": 0.001, "loss": 1.9699, "step": 223776 }, { "epoch": 19.309178743961354, "grad_norm": 4.161685943603516, "learning_rate": 0.001, "loss": 1.9678, "step": 223832 }, { "epoch": 19.314009661835748, "grad_norm": 0.678322434425354, "learning_rate": 0.001, "loss": 1.9811, "step": 223888 }, { "epoch": 19.318840579710145, "grad_norm": 0.9019632935523987, "learning_rate": 0.001, "loss": 1.9781, "step": 223944 }, { "epoch": 19.32367149758454, "grad_norm": 0.7608224749565125, "learning_rate": 0.001, "loss": 1.9721, "step": 224000 }, { "epoch": 19.32850241545894, "grad_norm": 0.6512464284896851, "learning_rate": 0.001, "loss": 1.9797, "step": 224056 }, { "epoch": 19.333333333333332, "grad_norm": 1.1273747682571411, "learning_rate": 0.001, "loss": 1.9773, "step": 224112 }, { "epoch": 19.33816425120773, "grad_norm": 3.248605966567993, "learning_rate": 0.001, "loss": 1.9793, "step": 224168 }, { "epoch": 19.342995169082126, "grad_norm": 0.8719470500946045, "learning_rate": 0.001, "loss": 1.9844, "step": 224224 }, { "epoch": 19.347826086956523, "grad_norm": 0.774939775466919, "learning_rate": 0.001, "loss": 1.9964, "step": 224280 }, { "epoch": 19.352657004830917, "grad_norm": 0.5201621651649475, "learning_rate": 0.001, "loss": 1.9824, "step": 224336 }, { "epoch": 19.357487922705314, "grad_norm": 1.3088537454605103, "learning_rate": 0.001, "loss": 1.9977, "step": 224392 }, { "epoch": 19.36231884057971, "grad_norm": 2.41510272026062, "learning_rate": 0.001, "loss": 1.9913, "step": 224448 }, { "epoch": 19.367149758454108, "grad_norm": 1.0621293783187866, "learning_rate": 0.001, "loss": 1.9842, "step": 224504 }, { "epoch": 19.3719806763285, "grad_norm": 0.48479539155960083, "learning_rate": 0.001, "loss": 1.9843, "step": 224560 }, { "epoch": 19.3768115942029, "grad_norm": 0.7369958758354187, "learning_rate": 0.001, "loss": 1.9773, "step": 224616 }, { "epoch": 19.381642512077295, "grad_norm": 0.6733920574188232, "learning_rate": 0.001, "loss": 1.9798, "step": 224672 }, { "epoch": 19.386473429951693, "grad_norm": 0.717187762260437, "learning_rate": 0.001, "loss": 1.9765, "step": 224728 }, { "epoch": 19.391304347826086, "grad_norm": 1.4908002614974976, "learning_rate": 0.001, "loss": 1.9828, "step": 224784 }, { "epoch": 19.396135265700483, "grad_norm": 0.478899210691452, "learning_rate": 0.001, "loss": 1.9796, "step": 224840 }, { "epoch": 19.40096618357488, "grad_norm": 1.7314268350601196, "learning_rate": 0.001, "loss": 1.9762, "step": 224896 }, { "epoch": 19.405797101449274, "grad_norm": 0.9426409006118774, "learning_rate": 0.001, "loss": 1.98, "step": 224952 }, { "epoch": 19.41062801932367, "grad_norm": 1.0732628107070923, "learning_rate": 0.001, "loss": 1.9746, "step": 225008 }, { "epoch": 19.415458937198068, "grad_norm": 0.4627336263656616, "learning_rate": 0.001, "loss": 1.9871, "step": 225064 }, { "epoch": 19.420289855072465, "grad_norm": 2.255784749984741, "learning_rate": 0.001, "loss": 1.9789, "step": 225120 }, { "epoch": 19.42512077294686, "grad_norm": 1.3085148334503174, "learning_rate": 0.001, "loss": 1.9982, "step": 225176 }, { "epoch": 19.429951690821255, "grad_norm": 1.4280967712402344, "learning_rate": 0.001, "loss": 2.0059, "step": 225232 }, { "epoch": 19.434782608695652, "grad_norm": 1.0377458333969116, "learning_rate": 0.001, "loss": 1.9999, "step": 225288 }, { "epoch": 19.43961352657005, "grad_norm": 0.5367633700370789, "learning_rate": 0.001, "loss": 2.0083, "step": 225344 }, { "epoch": 19.444444444444443, "grad_norm": 1.6878571510314941, "learning_rate": 0.001, "loss": 2.0079, "step": 225400 }, { "epoch": 19.44927536231884, "grad_norm": 0.8240814208984375, "learning_rate": 0.001, "loss": 1.9977, "step": 225456 }, { "epoch": 19.454106280193237, "grad_norm": 1.278629183769226, "learning_rate": 0.001, "loss": 1.9974, "step": 225512 }, { "epoch": 19.458937198067634, "grad_norm": 2.1589179039001465, "learning_rate": 0.001, "loss": 1.9868, "step": 225568 }, { "epoch": 19.463768115942027, "grad_norm": 0.6857483983039856, "learning_rate": 0.001, "loss": 1.9954, "step": 225624 }, { "epoch": 19.468599033816425, "grad_norm": 0.6987135410308838, "learning_rate": 0.001, "loss": 1.9865, "step": 225680 }, { "epoch": 19.47342995169082, "grad_norm": 0.6640617847442627, "learning_rate": 0.001, "loss": 1.9869, "step": 225736 }, { "epoch": 19.47826086956522, "grad_norm": 0.5606048107147217, "learning_rate": 0.001, "loss": 1.99, "step": 225792 }, { "epoch": 19.483091787439612, "grad_norm": 0.8694623112678528, "learning_rate": 0.001, "loss": 1.9795, "step": 225848 }, { "epoch": 19.48792270531401, "grad_norm": 1.1421536207199097, "learning_rate": 0.001, "loss": 1.9801, "step": 225904 }, { "epoch": 19.492753623188406, "grad_norm": 4.96950626373291, "learning_rate": 0.001, "loss": 1.9882, "step": 225960 }, { "epoch": 19.497584541062803, "grad_norm": 1.1328555345535278, "learning_rate": 0.001, "loss": 1.9839, "step": 226016 }, { "epoch": 19.502415458937197, "grad_norm": 0.5209496021270752, "learning_rate": 0.001, "loss": 1.9797, "step": 226072 }, { "epoch": 19.507246376811594, "grad_norm": 0.528617262840271, "learning_rate": 0.001, "loss": 1.9836, "step": 226128 }, { "epoch": 19.51207729468599, "grad_norm": 0.7308517098426819, "learning_rate": 0.001, "loss": 1.9707, "step": 226184 }, { "epoch": 19.516908212560388, "grad_norm": 0.6073090434074402, "learning_rate": 0.001, "loss": 1.9784, "step": 226240 }, { "epoch": 19.52173913043478, "grad_norm": 0.6104119420051575, "learning_rate": 0.001, "loss": 1.9792, "step": 226296 }, { "epoch": 19.52657004830918, "grad_norm": 1.0912226438522339, "learning_rate": 0.001, "loss": 1.9781, "step": 226352 }, { "epoch": 19.531400966183575, "grad_norm": 1.078590989112854, "learning_rate": 0.001, "loss": 1.9759, "step": 226408 }, { "epoch": 19.536231884057973, "grad_norm": 0.5421713590621948, "learning_rate": 0.001, "loss": 1.9919, "step": 226464 }, { "epoch": 19.541062801932366, "grad_norm": 0.5305871367454529, "learning_rate": 0.001, "loss": 1.9841, "step": 226520 }, { "epoch": 19.545893719806763, "grad_norm": 0.7110370397567749, "learning_rate": 0.001, "loss": 1.9936, "step": 226576 }, { "epoch": 19.55072463768116, "grad_norm": 1.0523451566696167, "learning_rate": 0.001, "loss": 1.9974, "step": 226632 }, { "epoch": 19.555555555555557, "grad_norm": 0.5616431832313538, "learning_rate": 0.001, "loss": 1.9781, "step": 226688 }, { "epoch": 19.56038647342995, "grad_norm": 1.0167936086654663, "learning_rate": 0.001, "loss": 1.9798, "step": 226744 }, { "epoch": 19.565217391304348, "grad_norm": 0.645546019077301, "learning_rate": 0.001, "loss": 1.9773, "step": 226800 }, { "epoch": 19.570048309178745, "grad_norm": 0.3923267424106598, "learning_rate": 0.001, "loss": 1.9725, "step": 226856 }, { "epoch": 19.57487922705314, "grad_norm": 0.5731530785560608, "learning_rate": 0.001, "loss": 1.9824, "step": 226912 }, { "epoch": 19.579710144927535, "grad_norm": 0.44377586245536804, "learning_rate": 0.001, "loss": 1.979, "step": 226968 }, { "epoch": 19.584541062801932, "grad_norm": 1.0439039468765259, "learning_rate": 0.001, "loss": 1.9757, "step": 227024 }, { "epoch": 19.58937198067633, "grad_norm": 0.7364981770515442, "learning_rate": 0.001, "loss": 1.9745, "step": 227080 }, { "epoch": 19.594202898550726, "grad_norm": 1.5196747779846191, "learning_rate": 0.001, "loss": 1.977, "step": 227136 }, { "epoch": 19.59903381642512, "grad_norm": 2.876661777496338, "learning_rate": 0.001, "loss": 1.9834, "step": 227192 }, { "epoch": 19.603864734299517, "grad_norm": 0.5320111513137817, "learning_rate": 0.001, "loss": 1.978, "step": 227248 }, { "epoch": 19.608695652173914, "grad_norm": 2.156527519226074, "learning_rate": 0.001, "loss": 1.9792, "step": 227304 }, { "epoch": 19.613526570048307, "grad_norm": 0.59865802526474, "learning_rate": 0.001, "loss": 1.9736, "step": 227360 }, { "epoch": 19.618357487922705, "grad_norm": 0.9262649416923523, "learning_rate": 0.001, "loss": 1.9847, "step": 227416 }, { "epoch": 19.6231884057971, "grad_norm": 1.1218934059143066, "learning_rate": 0.001, "loss": 1.9805, "step": 227472 }, { "epoch": 19.6280193236715, "grad_norm": 1.2085765600204468, "learning_rate": 0.001, "loss": 1.9778, "step": 227528 }, { "epoch": 19.632850241545892, "grad_norm": 0.8585699796676636, "learning_rate": 0.001, "loss": 1.9848, "step": 227584 }, { "epoch": 19.63768115942029, "grad_norm": 1.0675084590911865, "learning_rate": 0.001, "loss": 1.9894, "step": 227640 }, { "epoch": 19.642512077294686, "grad_norm": 1.194161057472229, "learning_rate": 0.001, "loss": 1.9752, "step": 227696 }, { "epoch": 19.647342995169083, "grad_norm": 0.6294888257980347, "learning_rate": 0.001, "loss": 1.9892, "step": 227752 }, { "epoch": 19.652173913043477, "grad_norm": 0.8624576926231384, "learning_rate": 0.001, "loss": 1.9978, "step": 227808 }, { "epoch": 19.657004830917874, "grad_norm": 0.6598075032234192, "learning_rate": 0.001, "loss": 1.9911, "step": 227864 }, { "epoch": 19.66183574879227, "grad_norm": 0.6938336491584778, "learning_rate": 0.001, "loss": 1.9744, "step": 227920 }, { "epoch": 19.666666666666668, "grad_norm": 0.7940035462379456, "learning_rate": 0.001, "loss": 1.9842, "step": 227976 }, { "epoch": 19.67149758454106, "grad_norm": 1.3795771598815918, "learning_rate": 0.001, "loss": 1.9897, "step": 228032 }, { "epoch": 19.67632850241546, "grad_norm": 0.7683830261230469, "learning_rate": 0.001, "loss": 1.9829, "step": 228088 }, { "epoch": 19.681159420289855, "grad_norm": 0.514057457447052, "learning_rate": 0.001, "loss": 1.9816, "step": 228144 }, { "epoch": 19.685990338164252, "grad_norm": 0.6565588712692261, "learning_rate": 0.001, "loss": 1.9878, "step": 228200 }, { "epoch": 19.690821256038646, "grad_norm": 0.8971356749534607, "learning_rate": 0.001, "loss": 1.9893, "step": 228256 }, { "epoch": 19.695652173913043, "grad_norm": 0.7112553715705872, "learning_rate": 0.001, "loss": 1.9878, "step": 228312 }, { "epoch": 19.70048309178744, "grad_norm": 0.7366810441017151, "learning_rate": 0.001, "loss": 1.9869, "step": 228368 }, { "epoch": 19.705314009661837, "grad_norm": 1.141973614692688, "learning_rate": 0.001, "loss": 1.9899, "step": 228424 }, { "epoch": 19.71014492753623, "grad_norm": 1.2263668775558472, "learning_rate": 0.001, "loss": 1.9937, "step": 228480 }, { "epoch": 19.714975845410628, "grad_norm": 0.36490365862846375, "learning_rate": 0.001, "loss": 1.9935, "step": 228536 }, { "epoch": 19.719806763285025, "grad_norm": 7.048654556274414, "learning_rate": 0.001, "loss": 1.9904, "step": 228592 }, { "epoch": 19.72463768115942, "grad_norm": 0.6913021802902222, "learning_rate": 0.001, "loss": 1.9791, "step": 228648 }, { "epoch": 19.729468599033815, "grad_norm": 0.5232803821563721, "learning_rate": 0.001, "loss": 1.9716, "step": 228704 }, { "epoch": 19.734299516908212, "grad_norm": 2.327164888381958, "learning_rate": 0.001, "loss": 1.9808, "step": 228760 }, { "epoch": 19.73913043478261, "grad_norm": 0.7183934450149536, "learning_rate": 0.001, "loss": 1.9811, "step": 228816 }, { "epoch": 19.743961352657006, "grad_norm": 0.7957116365432739, "learning_rate": 0.001, "loss": 1.9875, "step": 228872 }, { "epoch": 19.7487922705314, "grad_norm": 0.9885707497596741, "learning_rate": 0.001, "loss": 1.9767, "step": 228928 }, { "epoch": 19.753623188405797, "grad_norm": 0.3768506348133087, "learning_rate": 0.001, "loss": 1.9796, "step": 228984 }, { "epoch": 19.758454106280194, "grad_norm": 0.7667130827903748, "learning_rate": 0.001, "loss": 1.9828, "step": 229040 }, { "epoch": 19.76328502415459, "grad_norm": 1.3434653282165527, "learning_rate": 0.001, "loss": 1.9828, "step": 229096 }, { "epoch": 19.768115942028984, "grad_norm": 1.129123568534851, "learning_rate": 0.001, "loss": 2.0026, "step": 229152 }, { "epoch": 19.77294685990338, "grad_norm": 1.3615796566009521, "learning_rate": 0.001, "loss": 1.9888, "step": 229208 }, { "epoch": 19.77777777777778, "grad_norm": 0.6908249855041504, "learning_rate": 0.001, "loss": 1.9866, "step": 229264 }, { "epoch": 19.782608695652176, "grad_norm": 0.659037709236145, "learning_rate": 0.001, "loss": 1.9931, "step": 229320 }, { "epoch": 19.78743961352657, "grad_norm": 1.0920194387435913, "learning_rate": 0.001, "loss": 1.9833, "step": 229376 }, { "epoch": 19.792270531400966, "grad_norm": 0.835077702999115, "learning_rate": 0.001, "loss": 1.9785, "step": 229432 }, { "epoch": 19.797101449275363, "grad_norm": 0.8240960240364075, "learning_rate": 0.001, "loss": 1.9823, "step": 229488 }, { "epoch": 19.80193236714976, "grad_norm": 0.6170571446418762, "learning_rate": 0.001, "loss": 1.9864, "step": 229544 }, { "epoch": 19.806763285024154, "grad_norm": 0.672433614730835, "learning_rate": 0.001, "loss": 1.9819, "step": 229600 }, { "epoch": 19.81159420289855, "grad_norm": 0.631607711315155, "learning_rate": 0.001, "loss": 1.9789, "step": 229656 }, { "epoch": 19.816425120772948, "grad_norm": 0.7736732959747314, "learning_rate": 0.001, "loss": 1.9868, "step": 229712 }, { "epoch": 19.82125603864734, "grad_norm": 1.0211807489395142, "learning_rate": 0.001, "loss": 1.9879, "step": 229768 }, { "epoch": 19.82608695652174, "grad_norm": 0.4449273645877838, "learning_rate": 0.001, "loss": 1.9927, "step": 229824 }, { "epoch": 19.830917874396135, "grad_norm": 2.2995989322662354, "learning_rate": 0.001, "loss": 1.9864, "step": 229880 }, { "epoch": 19.835748792270532, "grad_norm": 0.6107759475708008, "learning_rate": 0.001, "loss": 1.9815, "step": 229936 }, { "epoch": 19.840579710144926, "grad_norm": 0.7510660290718079, "learning_rate": 0.001, "loss": 1.9988, "step": 229992 }, { "epoch": 19.845410628019323, "grad_norm": 0.4979003667831421, "learning_rate": 0.001, "loss": 1.9925, "step": 230048 }, { "epoch": 19.85024154589372, "grad_norm": 0.8143007755279541, "learning_rate": 0.001, "loss": 1.9949, "step": 230104 }, { "epoch": 19.855072463768117, "grad_norm": 0.7308885455131531, "learning_rate": 0.001, "loss": 2.0326, "step": 230160 }, { "epoch": 19.85990338164251, "grad_norm": 2.2266623973846436, "learning_rate": 0.001, "loss": 2.0238, "step": 230216 }, { "epoch": 19.864734299516908, "grad_norm": 0.8096334338188171, "learning_rate": 0.001, "loss": 2.0284, "step": 230272 }, { "epoch": 19.869565217391305, "grad_norm": 2.0409088134765625, "learning_rate": 0.001, "loss": 2.0419, "step": 230328 }, { "epoch": 19.8743961352657, "grad_norm": 1.359377145767212, "learning_rate": 0.001, "loss": 2.0245, "step": 230384 }, { "epoch": 19.879227053140095, "grad_norm": 1.4059951305389404, "learning_rate": 0.001, "loss": 2.0202, "step": 230440 }, { "epoch": 19.884057971014492, "grad_norm": 0.5972735285758972, "learning_rate": 0.001, "loss": 1.9967, "step": 230496 }, { "epoch": 19.88888888888889, "grad_norm": 1.6779500246047974, "learning_rate": 0.001, "loss": 2.0037, "step": 230552 }, { "epoch": 19.893719806763286, "grad_norm": 1.0977379083633423, "learning_rate": 0.001, "loss": 2.0117, "step": 230608 }, { "epoch": 19.89855072463768, "grad_norm": 0.6582438945770264, "learning_rate": 0.001, "loss": 2.0048, "step": 230664 }, { "epoch": 19.903381642512077, "grad_norm": 1.4949947595596313, "learning_rate": 0.001, "loss": 2.0004, "step": 230720 }, { "epoch": 19.908212560386474, "grad_norm": 2.100862979888916, "learning_rate": 0.001, "loss": 1.9998, "step": 230776 }, { "epoch": 19.91304347826087, "grad_norm": 0.6234824657440186, "learning_rate": 0.001, "loss": 1.9959, "step": 230832 }, { "epoch": 19.917874396135264, "grad_norm": 0.7279660105705261, "learning_rate": 0.001, "loss": 1.9978, "step": 230888 }, { "epoch": 19.92270531400966, "grad_norm": 0.8285762071609497, "learning_rate": 0.001, "loss": 2.0119, "step": 230944 }, { "epoch": 19.92753623188406, "grad_norm": 1.867323875427246, "learning_rate": 0.001, "loss": 2.0022, "step": 231000 }, { "epoch": 19.932367149758456, "grad_norm": 0.9133573174476624, "learning_rate": 0.001, "loss": 1.9993, "step": 231056 }, { "epoch": 19.93719806763285, "grad_norm": 0.6855758428573608, "learning_rate": 0.001, "loss": 2.0008, "step": 231112 }, { "epoch": 19.942028985507246, "grad_norm": 0.9852924346923828, "learning_rate": 0.001, "loss": 1.9971, "step": 231168 }, { "epoch": 19.946859903381643, "grad_norm": 1.0809262990951538, "learning_rate": 0.001, "loss": 2.0023, "step": 231224 }, { "epoch": 19.95169082125604, "grad_norm": 1.0367103815078735, "learning_rate": 0.001, "loss": 1.9968, "step": 231280 }, { "epoch": 19.956521739130434, "grad_norm": 1.608024001121521, "learning_rate": 0.001, "loss": 1.9999, "step": 231336 }, { "epoch": 19.96135265700483, "grad_norm": 1.5402884483337402, "learning_rate": 0.001, "loss": 1.9969, "step": 231392 }, { "epoch": 19.966183574879228, "grad_norm": 1.115765929222107, "learning_rate": 0.001, "loss": 2.0021, "step": 231448 }, { "epoch": 19.971014492753625, "grad_norm": 0.7538047432899475, "learning_rate": 0.001, "loss": 1.9897, "step": 231504 }, { "epoch": 19.97584541062802, "grad_norm": 2.4690496921539307, "learning_rate": 0.001, "loss": 1.9988, "step": 231560 }, { "epoch": 19.980676328502415, "grad_norm": 0.557518482208252, "learning_rate": 0.001, "loss": 1.9967, "step": 231616 }, { "epoch": 19.985507246376812, "grad_norm": 1.1611177921295166, "learning_rate": 0.001, "loss": 1.9929, "step": 231672 }, { "epoch": 19.990338164251206, "grad_norm": 3.2506847381591797, "learning_rate": 0.001, "loss": 1.992, "step": 231728 }, { "epoch": 19.995169082125603, "grad_norm": 0.4481503665447235, "learning_rate": 0.001, "loss": 1.9838, "step": 231784 }, { "epoch": 20.0, "grad_norm": 0.9680782556533813, "learning_rate": 0.001, "loss": 1.9729, "step": 231840 }, { "epoch": 20.004830917874397, "grad_norm": 0.9590798616409302, "learning_rate": 0.001, "loss": 1.9479, "step": 231896 }, { "epoch": 20.00966183574879, "grad_norm": 0.9135307669639587, "learning_rate": 0.001, "loss": 1.9511, "step": 231952 }, { "epoch": 20.014492753623188, "grad_norm": 1.6131869554519653, "learning_rate": 0.001, "loss": 1.9446, "step": 232008 }, { "epoch": 20.019323671497585, "grad_norm": 0.6807425618171692, "learning_rate": 0.001, "loss": 1.9399, "step": 232064 }, { "epoch": 20.02415458937198, "grad_norm": 0.7212848663330078, "learning_rate": 0.001, "loss": 1.9534, "step": 232120 }, { "epoch": 20.028985507246375, "grad_norm": 4.037197113037109, "learning_rate": 0.001, "loss": 1.9684, "step": 232176 }, { "epoch": 20.033816425120772, "grad_norm": 0.7982797622680664, "learning_rate": 0.001, "loss": 1.9605, "step": 232232 }, { "epoch": 20.03864734299517, "grad_norm": 0.7451841235160828, "learning_rate": 0.001, "loss": 1.9602, "step": 232288 }, { "epoch": 20.043478260869566, "grad_norm": 1.8921542167663574, "learning_rate": 0.001, "loss": 1.9628, "step": 232344 }, { "epoch": 20.04830917874396, "grad_norm": 1.2120712995529175, "learning_rate": 0.001, "loss": 1.9679, "step": 232400 }, { "epoch": 20.053140096618357, "grad_norm": 0.7218134999275208, "learning_rate": 0.001, "loss": 1.9679, "step": 232456 }, { "epoch": 20.057971014492754, "grad_norm": 0.7021347880363464, "learning_rate": 0.001, "loss": 1.9639, "step": 232512 }, { "epoch": 20.06280193236715, "grad_norm": 0.43417754769325256, "learning_rate": 0.001, "loss": 1.946, "step": 232568 }, { "epoch": 20.067632850241544, "grad_norm": 0.8222406506538391, "learning_rate": 0.001, "loss": 1.9543, "step": 232624 }, { "epoch": 20.07246376811594, "grad_norm": 3.986311435699463, "learning_rate": 0.001, "loss": 1.9451, "step": 232680 }, { "epoch": 20.07729468599034, "grad_norm": 0.6204794645309448, "learning_rate": 0.001, "loss": 1.9466, "step": 232736 }, { "epoch": 20.082125603864736, "grad_norm": 1.3899047374725342, "learning_rate": 0.001, "loss": 1.9551, "step": 232792 }, { "epoch": 20.08695652173913, "grad_norm": 1.5048880577087402, "learning_rate": 0.001, "loss": 1.9655, "step": 232848 }, { "epoch": 20.091787439613526, "grad_norm": 0.8524477481842041, "learning_rate": 0.001, "loss": 1.9634, "step": 232904 }, { "epoch": 20.096618357487923, "grad_norm": 0.9918443560600281, "learning_rate": 0.001, "loss": 1.9755, "step": 232960 }, { "epoch": 20.10144927536232, "grad_norm": 1.0246857404708862, "learning_rate": 0.001, "loss": 1.9691, "step": 233016 }, { "epoch": 20.106280193236714, "grad_norm": 0.8825823068618774, "learning_rate": 0.001, "loss": 1.981, "step": 233072 }, { "epoch": 20.11111111111111, "grad_norm": 1.254522681236267, "learning_rate": 0.001, "loss": 1.9884, "step": 233128 }, { "epoch": 20.115942028985508, "grad_norm": 1.3309866189956665, "learning_rate": 0.001, "loss": 1.9841, "step": 233184 }, { "epoch": 20.120772946859905, "grad_norm": 0.4176846444606781, "learning_rate": 0.001, "loss": 1.987, "step": 233240 }, { "epoch": 20.1256038647343, "grad_norm": 1.0715160369873047, "learning_rate": 0.001, "loss": 1.9659, "step": 233296 }, { "epoch": 20.130434782608695, "grad_norm": 0.9458504319190979, "learning_rate": 0.001, "loss": 1.9786, "step": 233352 }, { "epoch": 20.135265700483092, "grad_norm": 0.8436697125434875, "learning_rate": 0.001, "loss": 1.9877, "step": 233408 }, { "epoch": 20.14009661835749, "grad_norm": 2.2962377071380615, "learning_rate": 0.001, "loss": 1.9741, "step": 233464 }, { "epoch": 20.144927536231883, "grad_norm": 1.7697343826293945, "learning_rate": 0.001, "loss": 1.9631, "step": 233520 }, { "epoch": 20.14975845410628, "grad_norm": 0.9942611455917358, "learning_rate": 0.001, "loss": 1.9645, "step": 233576 }, { "epoch": 20.154589371980677, "grad_norm": 1.7412433624267578, "learning_rate": 0.001, "loss": 1.9755, "step": 233632 }, { "epoch": 20.159420289855074, "grad_norm": 0.9654859304428101, "learning_rate": 0.001, "loss": 1.971, "step": 233688 }, { "epoch": 20.164251207729468, "grad_norm": 2.0905511379241943, "learning_rate": 0.001, "loss": 1.9869, "step": 233744 }, { "epoch": 20.169082125603865, "grad_norm": 8.522933006286621, "learning_rate": 0.001, "loss": 1.9756, "step": 233800 }, { "epoch": 20.17391304347826, "grad_norm": 2.926584243774414, "learning_rate": 0.001, "loss": 1.9718, "step": 233856 }, { "epoch": 20.17874396135266, "grad_norm": 0.5673848986625671, "learning_rate": 0.001, "loss": 1.9786, "step": 233912 }, { "epoch": 20.183574879227052, "grad_norm": 0.9512795209884644, "learning_rate": 0.001, "loss": 1.9703, "step": 233968 }, { "epoch": 20.18840579710145, "grad_norm": 0.8473700881004333, "learning_rate": 0.001, "loss": 1.9734, "step": 234024 }, { "epoch": 20.193236714975846, "grad_norm": 1.9674136638641357, "learning_rate": 0.001, "loss": 1.9703, "step": 234080 }, { "epoch": 20.19806763285024, "grad_norm": 4.291630268096924, "learning_rate": 0.001, "loss": 1.973, "step": 234136 }, { "epoch": 20.202898550724637, "grad_norm": 0.6540539264678955, "learning_rate": 0.001, "loss": 1.964, "step": 234192 }, { "epoch": 20.207729468599034, "grad_norm": 2.7743849754333496, "learning_rate": 0.001, "loss": 1.9725, "step": 234248 }, { "epoch": 20.21256038647343, "grad_norm": 1.1074353456497192, "learning_rate": 0.001, "loss": 1.9837, "step": 234304 }, { "epoch": 20.217391304347824, "grad_norm": 0.7145338654518127, "learning_rate": 0.001, "loss": 1.9923, "step": 234360 }, { "epoch": 20.22222222222222, "grad_norm": 1.6077476739883423, "learning_rate": 0.001, "loss": 1.9757, "step": 234416 }, { "epoch": 20.22705314009662, "grad_norm": 1.3347930908203125, "learning_rate": 0.001, "loss": 1.9694, "step": 234472 }, { "epoch": 20.231884057971016, "grad_norm": 1.9627556800842285, "learning_rate": 0.001, "loss": 1.9697, "step": 234528 }, { "epoch": 20.23671497584541, "grad_norm": 0.6664007902145386, "learning_rate": 0.001, "loss": 1.973, "step": 234584 }, { "epoch": 20.241545893719806, "grad_norm": 2.4399492740631104, "learning_rate": 0.001, "loss": 1.9724, "step": 234640 }, { "epoch": 20.246376811594203, "grad_norm": 1.4547888040542603, "learning_rate": 0.001, "loss": 1.963, "step": 234696 }, { "epoch": 20.2512077294686, "grad_norm": 1.2826889753341675, "learning_rate": 0.001, "loss": 1.9709, "step": 234752 }, { "epoch": 20.256038647342994, "grad_norm": 1.1072330474853516, "learning_rate": 0.001, "loss": 1.9797, "step": 234808 }, { "epoch": 20.26086956521739, "grad_norm": 1.7266050577163696, "learning_rate": 0.001, "loss": 1.976, "step": 234864 }, { "epoch": 20.265700483091788, "grad_norm": 0.5628601908683777, "learning_rate": 0.001, "loss": 1.9668, "step": 234920 }, { "epoch": 20.270531400966185, "grad_norm": 1.725822925567627, "learning_rate": 0.001, "loss": 1.9673, "step": 234976 }, { "epoch": 20.27536231884058, "grad_norm": 0.7686460614204407, "learning_rate": 0.001, "loss": 1.9683, "step": 235032 }, { "epoch": 20.280193236714975, "grad_norm": 0.58089280128479, "learning_rate": 0.001, "loss": 1.97, "step": 235088 }, { "epoch": 20.285024154589372, "grad_norm": 0.7374380230903625, "learning_rate": 0.001, "loss": 1.9712, "step": 235144 }, { "epoch": 20.28985507246377, "grad_norm": 0.4292374849319458, "learning_rate": 0.001, "loss": 1.9669, "step": 235200 }, { "epoch": 20.294685990338163, "grad_norm": 0.5511637926101685, "learning_rate": 0.001, "loss": 1.9654, "step": 235256 }, { "epoch": 20.29951690821256, "grad_norm": 0.9945524334907532, "learning_rate": 0.001, "loss": 1.9639, "step": 235312 }, { "epoch": 20.304347826086957, "grad_norm": 0.37212470173835754, "learning_rate": 0.001, "loss": 1.9681, "step": 235368 }, { "epoch": 20.309178743961354, "grad_norm": 0.699050784111023, "learning_rate": 0.001, "loss": 1.9586, "step": 235424 }, { "epoch": 20.314009661835748, "grad_norm": 0.4967105984687805, "learning_rate": 0.001, "loss": 1.9574, "step": 235480 }, { "epoch": 20.318840579710145, "grad_norm": 1.2617794275283813, "learning_rate": 0.001, "loss": 1.9528, "step": 235536 }, { "epoch": 20.32367149758454, "grad_norm": 3.291747570037842, "learning_rate": 0.001, "loss": 1.9599, "step": 235592 }, { "epoch": 20.32850241545894, "grad_norm": 3.361870527267456, "learning_rate": 0.001, "loss": 1.9768, "step": 235648 }, { "epoch": 20.333333333333332, "grad_norm": 1.6715673208236694, "learning_rate": 0.001, "loss": 1.9608, "step": 235704 }, { "epoch": 20.33816425120773, "grad_norm": 1.1638327836990356, "learning_rate": 0.001, "loss": 1.9639, "step": 235760 }, { "epoch": 20.342995169082126, "grad_norm": 1.3658504486083984, "learning_rate": 0.001, "loss": 1.9694, "step": 235816 }, { "epoch": 20.347826086956523, "grad_norm": 1.2266860008239746, "learning_rate": 0.001, "loss": 1.9647, "step": 235872 }, { "epoch": 20.352657004830917, "grad_norm": 0.7702404856681824, "learning_rate": 0.001, "loss": 1.9693, "step": 235928 }, { "epoch": 20.357487922705314, "grad_norm": 0.6060357093811035, "learning_rate": 0.001, "loss": 1.9668, "step": 235984 }, { "epoch": 20.36231884057971, "grad_norm": 2.484067916870117, "learning_rate": 0.001, "loss": 1.9574, "step": 236040 }, { "epoch": 20.367149758454108, "grad_norm": 0.48463204503059387, "learning_rate": 0.001, "loss": 1.966, "step": 236096 }, { "epoch": 20.3719806763285, "grad_norm": 0.6289322972297668, "learning_rate": 0.001, "loss": 1.9767, "step": 236152 }, { "epoch": 20.3768115942029, "grad_norm": 1.8797649145126343, "learning_rate": 0.001, "loss": 1.9703, "step": 236208 }, { "epoch": 20.381642512077295, "grad_norm": 0.6994513869285583, "learning_rate": 0.001, "loss": 1.9644, "step": 236264 }, { "epoch": 20.386473429951693, "grad_norm": 1.5874524116516113, "learning_rate": 0.001, "loss": 1.9574, "step": 236320 }, { "epoch": 20.391304347826086, "grad_norm": 0.9595794677734375, "learning_rate": 0.001, "loss": 1.9543, "step": 236376 }, { "epoch": 20.396135265700483, "grad_norm": 0.7710196375846863, "learning_rate": 0.001, "loss": 1.9564, "step": 236432 }, { "epoch": 20.40096618357488, "grad_norm": 1.320600986480713, "learning_rate": 0.001, "loss": 1.9473, "step": 236488 }, { "epoch": 20.405797101449274, "grad_norm": 1.52142333984375, "learning_rate": 0.001, "loss": 1.959, "step": 236544 }, { "epoch": 20.41062801932367, "grad_norm": 0.9746045470237732, "learning_rate": 0.001, "loss": 1.9591, "step": 236600 }, { "epoch": 20.415458937198068, "grad_norm": 0.8647395968437195, "learning_rate": 0.001, "loss": 1.9562, "step": 236656 }, { "epoch": 20.420289855072465, "grad_norm": 0.9446902871131897, "learning_rate": 0.001, "loss": 1.947, "step": 236712 }, { "epoch": 20.42512077294686, "grad_norm": 1.2758747339248657, "learning_rate": 0.001, "loss": 1.9627, "step": 236768 }, { "epoch": 20.429951690821255, "grad_norm": 0.8882046937942505, "learning_rate": 0.001, "loss": 1.9611, "step": 236824 }, { "epoch": 20.434782608695652, "grad_norm": 1.239133596420288, "learning_rate": 0.001, "loss": 1.971, "step": 236880 }, { "epoch": 20.43961352657005, "grad_norm": 0.73228520154953, "learning_rate": 0.001, "loss": 1.9678, "step": 236936 }, { "epoch": 20.444444444444443, "grad_norm": 1.2003484964370728, "learning_rate": 0.001, "loss": 1.9661, "step": 236992 }, { "epoch": 20.44927536231884, "grad_norm": 0.7513173222541809, "learning_rate": 0.001, "loss": 1.9508, "step": 237048 }, { "epoch": 20.454106280193237, "grad_norm": 0.9872759580612183, "learning_rate": 0.001, "loss": 1.9552, "step": 237104 }, { "epoch": 20.458937198067634, "grad_norm": 1.145910382270813, "learning_rate": 0.001, "loss": 1.9623, "step": 237160 }, { "epoch": 20.463768115942027, "grad_norm": 0.9457828402519226, "learning_rate": 0.001, "loss": 1.9656, "step": 237216 }, { "epoch": 20.468599033816425, "grad_norm": 0.5922992825508118, "learning_rate": 0.001, "loss": 1.9683, "step": 237272 }, { "epoch": 20.47342995169082, "grad_norm": 0.8686397671699524, "learning_rate": 0.001, "loss": 1.9672, "step": 237328 }, { "epoch": 20.47826086956522, "grad_norm": 0.43666476011276245, "learning_rate": 0.001, "loss": 1.9717, "step": 237384 }, { "epoch": 20.483091787439612, "grad_norm": 0.813663899898529, "learning_rate": 0.001, "loss": 1.9803, "step": 237440 }, { "epoch": 20.48792270531401, "grad_norm": 0.9560878872871399, "learning_rate": 0.001, "loss": 1.9589, "step": 237496 }, { "epoch": 20.492753623188406, "grad_norm": 1.1921910047531128, "learning_rate": 0.001, "loss": 1.956, "step": 237552 }, { "epoch": 20.497584541062803, "grad_norm": 0.5158993601799011, "learning_rate": 0.001, "loss": 1.948, "step": 237608 }, { "epoch": 20.502415458937197, "grad_norm": 0.5725904107093811, "learning_rate": 0.001, "loss": 1.9586, "step": 237664 }, { "epoch": 20.507246376811594, "grad_norm": 0.49894335865974426, "learning_rate": 0.001, "loss": 1.9687, "step": 237720 }, { "epoch": 20.51207729468599, "grad_norm": 0.7635971903800964, "learning_rate": 0.001, "loss": 1.9532, "step": 237776 }, { "epoch": 20.516908212560388, "grad_norm": 0.48177558183670044, "learning_rate": 0.001, "loss": 1.9567, "step": 237832 }, { "epoch": 20.52173913043478, "grad_norm": 0.34677308797836304, "learning_rate": 0.001, "loss": 1.9485, "step": 237888 }, { "epoch": 20.52657004830918, "grad_norm": 1.500851035118103, "learning_rate": 0.001, "loss": 1.9579, "step": 237944 }, { "epoch": 20.531400966183575, "grad_norm": 0.6042265295982361, "learning_rate": 0.001, "loss": 1.9487, "step": 238000 }, { "epoch": 20.536231884057973, "grad_norm": 0.9513376355171204, "learning_rate": 0.001, "loss": 1.9542, "step": 238056 }, { "epoch": 20.541062801932366, "grad_norm": 0.5707837343215942, "learning_rate": 0.001, "loss": 1.9597, "step": 238112 }, { "epoch": 20.545893719806763, "grad_norm": 1.4689085483551025, "learning_rate": 0.001, "loss": 1.9625, "step": 238168 }, { "epoch": 20.55072463768116, "grad_norm": 0.47666293382644653, "learning_rate": 0.001, "loss": 1.9573, "step": 238224 }, { "epoch": 20.555555555555557, "grad_norm": 1.582378625869751, "learning_rate": 0.001, "loss": 1.9551, "step": 238280 }, { "epoch": 20.56038647342995, "grad_norm": 0.7956146597862244, "learning_rate": 0.001, "loss": 1.9573, "step": 238336 }, { "epoch": 20.565217391304348, "grad_norm": 3.0987062454223633, "learning_rate": 0.001, "loss": 1.9573, "step": 238392 }, { "epoch": 20.570048309178745, "grad_norm": 2.240318775177002, "learning_rate": 0.001, "loss": 1.9525, "step": 238448 }, { "epoch": 20.57487922705314, "grad_norm": 3.2296688556671143, "learning_rate": 0.001, "loss": 1.9641, "step": 238504 }, { "epoch": 20.579710144927535, "grad_norm": 12.198625564575195, "learning_rate": 0.001, "loss": 1.9738, "step": 238560 }, { "epoch": 20.584541062801932, "grad_norm": 1.043789267539978, "learning_rate": 0.001, "loss": 1.9538, "step": 238616 }, { "epoch": 20.58937198067633, "grad_norm": 1.2483795881271362, "learning_rate": 0.001, "loss": 1.9599, "step": 238672 }, { "epoch": 20.594202898550726, "grad_norm": 0.42853376269340515, "learning_rate": 0.001, "loss": 1.9625, "step": 238728 }, { "epoch": 20.59903381642512, "grad_norm": 1.8072736263275146, "learning_rate": 0.001, "loss": 1.9561, "step": 238784 }, { "epoch": 20.603864734299517, "grad_norm": 1.2119892835617065, "learning_rate": 0.001, "loss": 1.9563, "step": 238840 }, { "epoch": 20.608695652173914, "grad_norm": 0.8437949419021606, "learning_rate": 0.001, "loss": 1.9599, "step": 238896 }, { "epoch": 20.613526570048307, "grad_norm": 0.5938432216644287, "learning_rate": 0.001, "loss": 1.9531, "step": 238952 }, { "epoch": 20.618357487922705, "grad_norm": 0.5191885232925415, "learning_rate": 0.001, "loss": 1.9477, "step": 239008 }, { "epoch": 20.6231884057971, "grad_norm": 0.5827580690383911, "learning_rate": 0.001, "loss": 1.9444, "step": 239064 }, { "epoch": 20.6280193236715, "grad_norm": 0.575282871723175, "learning_rate": 0.001, "loss": 1.9457, "step": 239120 }, { "epoch": 20.632850241545892, "grad_norm": 1.3534448146820068, "learning_rate": 0.001, "loss": 1.9528, "step": 239176 }, { "epoch": 20.63768115942029, "grad_norm": 0.29908227920532227, "learning_rate": 0.001, "loss": 1.9446, "step": 239232 }, { "epoch": 20.642512077294686, "grad_norm": 0.4297437369823456, "learning_rate": 0.001, "loss": 1.9578, "step": 239288 }, { "epoch": 20.647342995169083, "grad_norm": 9.909271240234375, "learning_rate": 0.001, "loss": 1.9516, "step": 239344 }, { "epoch": 20.652173913043477, "grad_norm": 2.4474244117736816, "learning_rate": 0.001, "loss": 1.9469, "step": 239400 }, { "epoch": 20.657004830917874, "grad_norm": 1.4169622659683228, "learning_rate": 0.001, "loss": 1.9467, "step": 239456 }, { "epoch": 20.66183574879227, "grad_norm": 0.5401244759559631, "learning_rate": 0.001, "loss": 1.9458, "step": 239512 }, { "epoch": 20.666666666666668, "grad_norm": 0.5860778093338013, "learning_rate": 0.001, "loss": 1.9592, "step": 239568 }, { "epoch": 20.67149758454106, "grad_norm": 0.3608378767967224, "learning_rate": 0.001, "loss": 1.961, "step": 239624 }, { "epoch": 20.67632850241546, "grad_norm": 1.0052027702331543, "learning_rate": 0.001, "loss": 1.9602, "step": 239680 }, { "epoch": 20.681159420289855, "grad_norm": 0.9896047711372375, "learning_rate": 0.001, "loss": 1.9541, "step": 239736 }, { "epoch": 20.685990338164252, "grad_norm": 0.6349100470542908, "learning_rate": 0.001, "loss": 1.9628, "step": 239792 }, { "epoch": 20.690821256038646, "grad_norm": 0.674525260925293, "learning_rate": 0.001, "loss": 1.9598, "step": 239848 }, { "epoch": 20.695652173913043, "grad_norm": 0.47772717475891113, "learning_rate": 0.001, "loss": 1.9625, "step": 239904 }, { "epoch": 20.70048309178744, "grad_norm": 1.067716360092163, "learning_rate": 0.001, "loss": 1.9568, "step": 239960 }, { "epoch": 20.705314009661837, "grad_norm": 0.8412366509437561, "learning_rate": 0.001, "loss": 1.9475, "step": 240016 }, { "epoch": 20.71014492753623, "grad_norm": 2.141857385635376, "learning_rate": 0.001, "loss": 1.947, "step": 240072 }, { "epoch": 20.714975845410628, "grad_norm": 0.4832453429698944, "learning_rate": 0.001, "loss": 1.9451, "step": 240128 }, { "epoch": 20.719806763285025, "grad_norm": 0.8958045840263367, "learning_rate": 0.001, "loss": 1.934, "step": 240184 }, { "epoch": 20.72463768115942, "grad_norm": 0.7899490594863892, "learning_rate": 0.001, "loss": 1.9443, "step": 240240 }, { "epoch": 20.729468599033815, "grad_norm": 0.6653137803077698, "learning_rate": 0.001, "loss": 1.967, "step": 240296 }, { "epoch": 20.734299516908212, "grad_norm": 0.5967240929603577, "learning_rate": 0.001, "loss": 1.9642, "step": 240352 }, { "epoch": 20.73913043478261, "grad_norm": 0.7364872097969055, "learning_rate": 0.001, "loss": 1.9585, "step": 240408 }, { "epoch": 20.743961352657006, "grad_norm": 0.5708608031272888, "learning_rate": 0.001, "loss": 1.9609, "step": 240464 }, { "epoch": 20.7487922705314, "grad_norm": 0.9739353656768799, "learning_rate": 0.001, "loss": 1.9587, "step": 240520 }, { "epoch": 20.753623188405797, "grad_norm": 0.5648186802864075, "learning_rate": 0.001, "loss": 1.9648, "step": 240576 }, { "epoch": 20.758454106280194, "grad_norm": 0.45446285605430603, "learning_rate": 0.001, "loss": 1.9644, "step": 240632 }, { "epoch": 20.76328502415459, "grad_norm": 0.7312005162239075, "learning_rate": 0.001, "loss": 1.9563, "step": 240688 }, { "epoch": 20.768115942028984, "grad_norm": 0.9654141664505005, "learning_rate": 0.001, "loss": 1.9631, "step": 240744 }, { "epoch": 20.77294685990338, "grad_norm": 0.8596492409706116, "learning_rate": 0.001, "loss": 1.9565, "step": 240800 }, { "epoch": 20.77777777777778, "grad_norm": 1.0629878044128418, "learning_rate": 0.001, "loss": 1.9608, "step": 240856 }, { "epoch": 20.782608695652176, "grad_norm": 0.4907311797142029, "learning_rate": 0.001, "loss": 1.9584, "step": 240912 }, { "epoch": 20.78743961352657, "grad_norm": 3.250532627105713, "learning_rate": 0.001, "loss": 1.949, "step": 240968 }, { "epoch": 20.792270531400966, "grad_norm": 4.6710309982299805, "learning_rate": 0.001, "loss": 1.9441, "step": 241024 }, { "epoch": 20.797101449275363, "grad_norm": 1.580987572669983, "learning_rate": 0.001, "loss": 1.9583, "step": 241080 }, { "epoch": 20.80193236714976, "grad_norm": 2.200958490371704, "learning_rate": 0.001, "loss": 1.9699, "step": 241136 }, { "epoch": 20.806763285024154, "grad_norm": 1.9953570365905762, "learning_rate": 0.001, "loss": 1.9696, "step": 241192 }, { "epoch": 20.81159420289855, "grad_norm": 0.43790823221206665, "learning_rate": 0.001, "loss": 1.9647, "step": 241248 }, { "epoch": 20.816425120772948, "grad_norm": 0.3859184980392456, "learning_rate": 0.001, "loss": 1.9552, "step": 241304 }, { "epoch": 20.82125603864734, "grad_norm": 0.6125442981719971, "learning_rate": 0.001, "loss": 1.9596, "step": 241360 }, { "epoch": 20.82608695652174, "grad_norm": 1.0904134511947632, "learning_rate": 0.001, "loss": 1.9697, "step": 241416 }, { "epoch": 20.830917874396135, "grad_norm": 1.3848557472229004, "learning_rate": 0.001, "loss": 1.9709, "step": 241472 }, { "epoch": 20.835748792270532, "grad_norm": 1.8763023614883423, "learning_rate": 0.001, "loss": 1.9597, "step": 241528 }, { "epoch": 20.840579710144926, "grad_norm": 1.2586201429367065, "learning_rate": 0.001, "loss": 1.9677, "step": 241584 }, { "epoch": 20.845410628019323, "grad_norm": 1.1125701665878296, "learning_rate": 0.001, "loss": 1.9712, "step": 241640 }, { "epoch": 20.85024154589372, "grad_norm": 1.9199594259262085, "learning_rate": 0.001, "loss": 1.9586, "step": 241696 }, { "epoch": 20.855072463768117, "grad_norm": 1.488676905632019, "learning_rate": 0.001, "loss": 1.9612, "step": 241752 }, { "epoch": 20.85990338164251, "grad_norm": 0.9288828372955322, "learning_rate": 0.001, "loss": 1.9496, "step": 241808 }, { "epoch": 20.864734299516908, "grad_norm": 1.688301682472229, "learning_rate": 0.001, "loss": 1.9491, "step": 241864 }, { "epoch": 20.869565217391305, "grad_norm": 0.3567260801792145, "learning_rate": 0.001, "loss": 1.9582, "step": 241920 }, { "epoch": 20.8743961352657, "grad_norm": 1.4549705982208252, "learning_rate": 0.001, "loss": 1.9629, "step": 241976 }, { "epoch": 20.879227053140095, "grad_norm": 1.4151066541671753, "learning_rate": 0.001, "loss": 1.9586, "step": 242032 }, { "epoch": 20.884057971014492, "grad_norm": 0.5660560727119446, "learning_rate": 0.001, "loss": 1.9616, "step": 242088 }, { "epoch": 20.88888888888889, "grad_norm": 1.82489013671875, "learning_rate": 0.001, "loss": 1.9505, "step": 242144 }, { "epoch": 20.893719806763286, "grad_norm": 4.623912334442139, "learning_rate": 0.001, "loss": 1.9553, "step": 242200 }, { "epoch": 20.89855072463768, "grad_norm": 0.346300333738327, "learning_rate": 0.001, "loss": 1.9493, "step": 242256 }, { "epoch": 20.903381642512077, "grad_norm": 0.7645660638809204, "learning_rate": 0.001, "loss": 1.9508, "step": 242312 }, { "epoch": 20.908212560386474, "grad_norm": 0.4117395281791687, "learning_rate": 0.001, "loss": 1.9501, "step": 242368 }, { "epoch": 20.91304347826087, "grad_norm": 1.3760991096496582, "learning_rate": 0.001, "loss": 1.9573, "step": 242424 }, { "epoch": 20.917874396135264, "grad_norm": 1.501476526260376, "learning_rate": 0.001, "loss": 1.985, "step": 242480 }, { "epoch": 20.92270531400966, "grad_norm": 0.8489803075790405, "learning_rate": 0.001, "loss": 1.9632, "step": 242536 }, { "epoch": 20.92753623188406, "grad_norm": 0.6838896870613098, "learning_rate": 0.001, "loss": 1.9537, "step": 242592 }, { "epoch": 20.932367149758456, "grad_norm": 1.043967843055725, "learning_rate": 0.001, "loss": 1.9525, "step": 242648 }, { "epoch": 20.93719806763285, "grad_norm": 0.951227605342865, "learning_rate": 0.001, "loss": 1.9512, "step": 242704 }, { "epoch": 20.942028985507246, "grad_norm": 1.2392356395721436, "learning_rate": 0.001, "loss": 1.9531, "step": 242760 }, { "epoch": 20.946859903381643, "grad_norm": 1.1150872707366943, "learning_rate": 0.001, "loss": 1.9594, "step": 242816 }, { "epoch": 20.95169082125604, "grad_norm": 1.5052411556243896, "learning_rate": 0.001, "loss": 1.9732, "step": 242872 }, { "epoch": 20.956521739130434, "grad_norm": 1.1393407583236694, "learning_rate": 0.001, "loss": 1.9759, "step": 242928 }, { "epoch": 20.96135265700483, "grad_norm": 0.6628881692886353, "learning_rate": 0.001, "loss": 1.9674, "step": 242984 }, { "epoch": 20.966183574879228, "grad_norm": 0.44781407713890076, "learning_rate": 0.001, "loss": 1.972, "step": 243040 }, { "epoch": 20.971014492753625, "grad_norm": 2.7506120204925537, "learning_rate": 0.001, "loss": 1.9802, "step": 243096 }, { "epoch": 20.97584541062802, "grad_norm": 1.3479582071304321, "learning_rate": 0.001, "loss": 1.9959, "step": 243152 }, { "epoch": 20.980676328502415, "grad_norm": 1.218040108680725, "learning_rate": 0.001, "loss": 1.9829, "step": 243208 }, { "epoch": 20.985507246376812, "grad_norm": 0.7947763204574585, "learning_rate": 0.001, "loss": 1.9793, "step": 243264 }, { "epoch": 20.990338164251206, "grad_norm": 1.330209493637085, "learning_rate": 0.001, "loss": 1.9712, "step": 243320 }, { "epoch": 20.995169082125603, "grad_norm": 1.218634009361267, "learning_rate": 0.001, "loss": 1.9714, "step": 243376 }, { "epoch": 21.0, "grad_norm": 0.7437846064567566, "learning_rate": 0.001, "loss": 1.9746, "step": 243432 }, { "epoch": 21.004830917874397, "grad_norm": 1.6389663219451904, "learning_rate": 0.001, "loss": 1.9342, "step": 243488 }, { "epoch": 21.00966183574879, "grad_norm": 1.472157597541809, "learning_rate": 0.001, "loss": 1.9357, "step": 243544 }, { "epoch": 21.014492753623188, "grad_norm": 0.6316161751747131, "learning_rate": 0.001, "loss": 1.9418, "step": 243600 }, { "epoch": 21.019323671497585, "grad_norm": 0.4211271107196808, "learning_rate": 0.001, "loss": 1.9396, "step": 243656 }, { "epoch": 21.02415458937198, "grad_norm": 1.4315630197525024, "learning_rate": 0.001, "loss": 1.9387, "step": 243712 }, { "epoch": 21.028985507246375, "grad_norm": 0.746213436126709, "learning_rate": 0.001, "loss": 1.9381, "step": 243768 }, { "epoch": 21.033816425120772, "grad_norm": 0.7639557123184204, "learning_rate": 0.001, "loss": 1.9325, "step": 243824 }, { "epoch": 21.03864734299517, "grad_norm": 4.488649845123291, "learning_rate": 0.001, "loss": 1.9366, "step": 243880 }, { "epoch": 21.043478260869566, "grad_norm": 0.8225687742233276, "learning_rate": 0.001, "loss": 1.9344, "step": 243936 }, { "epoch": 21.04830917874396, "grad_norm": 1.2400037050247192, "learning_rate": 0.001, "loss": 1.9347, "step": 243992 }, { "epoch": 21.053140096618357, "grad_norm": 1.25209641456604, "learning_rate": 0.001, "loss": 1.9313, "step": 244048 }, { "epoch": 21.057971014492754, "grad_norm": 1.6570799350738525, "learning_rate": 0.001, "loss": 1.9266, "step": 244104 }, { "epoch": 21.06280193236715, "grad_norm": 2.0417397022247314, "learning_rate": 0.001, "loss": 1.9246, "step": 244160 }, { "epoch": 21.067632850241544, "grad_norm": 0.47704049944877625, "learning_rate": 0.001, "loss": 1.9384, "step": 244216 }, { "epoch": 21.07246376811594, "grad_norm": 0.8360207676887512, "learning_rate": 0.001, "loss": 1.9381, "step": 244272 }, { "epoch": 21.07729468599034, "grad_norm": 0.41122159361839294, "learning_rate": 0.001, "loss": 1.937, "step": 244328 }, { "epoch": 21.082125603864736, "grad_norm": 0.5890462398529053, "learning_rate": 0.001, "loss": 1.9341, "step": 244384 }, { "epoch": 21.08695652173913, "grad_norm": 0.4236384630203247, "learning_rate": 0.001, "loss": 1.9342, "step": 244440 }, { "epoch": 21.091787439613526, "grad_norm": 0.4738198518753052, "learning_rate": 0.001, "loss": 1.9301, "step": 244496 }, { "epoch": 21.096618357487923, "grad_norm": 0.9215365052223206, "learning_rate": 0.001, "loss": 1.9248, "step": 244552 }, { "epoch": 21.10144927536232, "grad_norm": 0.7103099822998047, "learning_rate": 0.001, "loss": 1.9308, "step": 244608 }, { "epoch": 21.106280193236714, "grad_norm": 0.7142167687416077, "learning_rate": 0.001, "loss": 1.9262, "step": 244664 }, { "epoch": 21.11111111111111, "grad_norm": 0.5004721879959106, "learning_rate": 0.001, "loss": 1.9236, "step": 244720 }, { "epoch": 21.115942028985508, "grad_norm": 0.8549935817718506, "learning_rate": 0.001, "loss": 1.9362, "step": 244776 }, { "epoch": 21.120772946859905, "grad_norm": 0.7482916712760925, "learning_rate": 0.001, "loss": 1.9389, "step": 244832 }, { "epoch": 21.1256038647343, "grad_norm": 0.8914834856987, "learning_rate": 0.001, "loss": 1.9333, "step": 244888 }, { "epoch": 21.130434782608695, "grad_norm": 1.3610881567001343, "learning_rate": 0.001, "loss": 1.9311, "step": 244944 }, { "epoch": 21.135265700483092, "grad_norm": 2.021111011505127, "learning_rate": 0.001, "loss": 1.9482, "step": 245000 }, { "epoch": 21.14009661835749, "grad_norm": 1.15712571144104, "learning_rate": 0.001, "loss": 1.9554, "step": 245056 }, { "epoch": 21.144927536231883, "grad_norm": 1.3414568901062012, "learning_rate": 0.001, "loss": 1.9551, "step": 245112 }, { "epoch": 21.14975845410628, "grad_norm": 1.4819480180740356, "learning_rate": 0.001, "loss": 1.95, "step": 245168 }, { "epoch": 21.154589371980677, "grad_norm": 3.9842798709869385, "learning_rate": 0.001, "loss": 1.9694, "step": 245224 }, { "epoch": 21.159420289855074, "grad_norm": 0.8567904233932495, "learning_rate": 0.001, "loss": 1.9619, "step": 245280 }, { "epoch": 21.164251207729468, "grad_norm": 1.3237590789794922, "learning_rate": 0.001, "loss": 1.9512, "step": 245336 }, { "epoch": 21.169082125603865, "grad_norm": 0.42718395590782166, "learning_rate": 0.001, "loss": 1.9642, "step": 245392 }, { "epoch": 21.17391304347826, "grad_norm": 0.46414899826049805, "learning_rate": 0.001, "loss": 1.956, "step": 245448 }, { "epoch": 21.17874396135266, "grad_norm": 0.48734915256500244, "learning_rate": 0.001, "loss": 1.9487, "step": 245504 }, { "epoch": 21.183574879227052, "grad_norm": 2.552711248397827, "learning_rate": 0.001, "loss": 1.9428, "step": 245560 }, { "epoch": 21.18840579710145, "grad_norm": 5.905545711517334, "learning_rate": 0.001, "loss": 1.9305, "step": 245616 }, { "epoch": 21.193236714975846, "grad_norm": 0.7736048102378845, "learning_rate": 0.001, "loss": 1.9291, "step": 245672 }, { "epoch": 21.19806763285024, "grad_norm": 0.5431773662567139, "learning_rate": 0.001, "loss": 1.9357, "step": 245728 }, { "epoch": 21.202898550724637, "grad_norm": 0.8890239000320435, "learning_rate": 0.001, "loss": 1.9248, "step": 245784 }, { "epoch": 21.207729468599034, "grad_norm": 0.8334510326385498, "learning_rate": 0.001, "loss": 1.9355, "step": 245840 }, { "epoch": 21.21256038647343, "grad_norm": 0.6951484680175781, "learning_rate": 0.001, "loss": 1.9376, "step": 245896 }, { "epoch": 21.217391304347824, "grad_norm": 0.33767789602279663, "learning_rate": 0.001, "loss": 1.9379, "step": 245952 }, { "epoch": 21.22222222222222, "grad_norm": 0.296726256608963, "learning_rate": 0.001, "loss": 1.9389, "step": 246008 }, { "epoch": 21.22705314009662, "grad_norm": 0.6288598775863647, "learning_rate": 0.001, "loss": 1.9383, "step": 246064 }, { "epoch": 21.231884057971016, "grad_norm": 3.858222246170044, "learning_rate": 0.001, "loss": 1.931, "step": 246120 }, { "epoch": 21.23671497584541, "grad_norm": 9.175567626953125, "learning_rate": 0.001, "loss": 1.9343, "step": 246176 }, { "epoch": 21.241545893719806, "grad_norm": 0.35068923234939575, "learning_rate": 0.001, "loss": 1.9267, "step": 246232 }, { "epoch": 21.246376811594203, "grad_norm": 0.40852609276771545, "learning_rate": 0.001, "loss": 1.9379, "step": 246288 }, { "epoch": 21.2512077294686, "grad_norm": 0.8824296593666077, "learning_rate": 0.001, "loss": 1.9357, "step": 246344 }, { "epoch": 21.256038647342994, "grad_norm": 0.507612407207489, "learning_rate": 0.001, "loss": 1.9396, "step": 246400 }, { "epoch": 21.26086956521739, "grad_norm": 0.4482591450214386, "learning_rate": 0.001, "loss": 1.9308, "step": 246456 }, { "epoch": 21.265700483091788, "grad_norm": 1.2142528295516968, "learning_rate": 0.001, "loss": 1.9399, "step": 246512 }, { "epoch": 21.270531400966185, "grad_norm": 0.3617037832736969, "learning_rate": 0.001, "loss": 1.9418, "step": 246568 }, { "epoch": 21.27536231884058, "grad_norm": 1.38692045211792, "learning_rate": 0.001, "loss": 1.9378, "step": 246624 }, { "epoch": 21.280193236714975, "grad_norm": 0.7056344747543335, "learning_rate": 0.001, "loss": 1.9331, "step": 246680 }, { "epoch": 21.285024154589372, "grad_norm": 0.641124963760376, "learning_rate": 0.001, "loss": 1.9351, "step": 246736 }, { "epoch": 21.28985507246377, "grad_norm": 0.4981977939605713, "learning_rate": 0.001, "loss": 1.9279, "step": 246792 }, { "epoch": 21.294685990338163, "grad_norm": 2.1799654960632324, "learning_rate": 0.001, "loss": 1.9383, "step": 246848 }, { "epoch": 21.29951690821256, "grad_norm": 0.5045608878135681, "learning_rate": 0.001, "loss": 1.9342, "step": 246904 }, { "epoch": 21.304347826086957, "grad_norm": 0.5069738030433655, "learning_rate": 0.001, "loss": 1.9353, "step": 246960 }, { "epoch": 21.309178743961354, "grad_norm": 2.1662988662719727, "learning_rate": 0.001, "loss": 1.9326, "step": 247016 }, { "epoch": 21.314009661835748, "grad_norm": 1.150823712348938, "learning_rate": 0.001, "loss": 1.9466, "step": 247072 }, { "epoch": 21.318840579710145, "grad_norm": 0.6821958422660828, "learning_rate": 0.001, "loss": 1.9352, "step": 247128 }, { "epoch": 21.32367149758454, "grad_norm": 0.4901876747608185, "learning_rate": 0.001, "loss": 1.9294, "step": 247184 }, { "epoch": 21.32850241545894, "grad_norm": 0.48851287364959717, "learning_rate": 0.001, "loss": 1.935, "step": 247240 }, { "epoch": 21.333333333333332, "grad_norm": 1.0849577188491821, "learning_rate": 0.001, "loss": 1.9513, "step": 247296 }, { "epoch": 21.33816425120773, "grad_norm": 1.6245534420013428, "learning_rate": 0.001, "loss": 1.9428, "step": 247352 }, { "epoch": 21.342995169082126, "grad_norm": 1.0170176029205322, "learning_rate": 0.001, "loss": 1.9521, "step": 247408 }, { "epoch": 21.347826086956523, "grad_norm": 0.8316085934638977, "learning_rate": 0.001, "loss": 1.9505, "step": 247464 }, { "epoch": 21.352657004830917, "grad_norm": 0.5427383780479431, "learning_rate": 0.001, "loss": 1.9648, "step": 247520 }, { "epoch": 21.357487922705314, "grad_norm": 0.48038238286972046, "learning_rate": 0.001, "loss": 1.9459, "step": 247576 }, { "epoch": 21.36231884057971, "grad_norm": 0.5514872074127197, "learning_rate": 0.001, "loss": 1.9428, "step": 247632 }, { "epoch": 21.367149758454108, "grad_norm": 0.5876020193099976, "learning_rate": 0.001, "loss": 1.9514, "step": 247688 }, { "epoch": 21.3719806763285, "grad_norm": 0.559477686882019, "learning_rate": 0.001, "loss": 1.9504, "step": 247744 }, { "epoch": 21.3768115942029, "grad_norm": 0.9388629198074341, "learning_rate": 0.001, "loss": 1.9457, "step": 247800 }, { "epoch": 21.381642512077295, "grad_norm": 0.5193232893943787, "learning_rate": 0.001, "loss": 1.9406, "step": 247856 }, { "epoch": 21.386473429951693, "grad_norm": 0.466985821723938, "learning_rate": 0.001, "loss": 1.9601, "step": 247912 }, { "epoch": 21.391304347826086, "grad_norm": 0.5548844933509827, "learning_rate": 0.001, "loss": 1.9647, "step": 247968 }, { "epoch": 21.396135265700483, "grad_norm": 0.7892430424690247, "learning_rate": 0.001, "loss": 1.9664, "step": 248024 }, { "epoch": 21.40096618357488, "grad_norm": 2.712905168533325, "learning_rate": 0.001, "loss": 1.9643, "step": 248080 }, { "epoch": 21.405797101449274, "grad_norm": 1.1068408489227295, "learning_rate": 0.001, "loss": 1.96, "step": 248136 }, { "epoch": 21.41062801932367, "grad_norm": 1.3370461463928223, "learning_rate": 0.001, "loss": 1.9575, "step": 248192 }, { "epoch": 21.415458937198068, "grad_norm": 1.0986648797988892, "learning_rate": 0.001, "loss": 1.9567, "step": 248248 }, { "epoch": 21.420289855072465, "grad_norm": 0.8226195573806763, "learning_rate": 0.001, "loss": 1.9541, "step": 248304 }, { "epoch": 21.42512077294686, "grad_norm": 0.7474024891853333, "learning_rate": 0.001, "loss": 1.9416, "step": 248360 }, { "epoch": 21.429951690821255, "grad_norm": 0.8005509376525879, "learning_rate": 0.001, "loss": 1.9543, "step": 248416 }, { "epoch": 21.434782608695652, "grad_norm": 0.5598627328872681, "learning_rate": 0.001, "loss": 1.9454, "step": 248472 }, { "epoch": 21.43961352657005, "grad_norm": 0.8114826679229736, "learning_rate": 0.001, "loss": 1.9524, "step": 248528 }, { "epoch": 21.444444444444443, "grad_norm": 0.8691081404685974, "learning_rate": 0.001, "loss": 1.948, "step": 248584 }, { "epoch": 21.44927536231884, "grad_norm": 6.244614601135254, "learning_rate": 0.001, "loss": 1.9491, "step": 248640 }, { "epoch": 21.454106280193237, "grad_norm": 0.8425182104110718, "learning_rate": 0.001, "loss": 1.9522, "step": 248696 }, { "epoch": 21.458937198067634, "grad_norm": 1.651371717453003, "learning_rate": 0.001, "loss": 1.9539, "step": 248752 }, { "epoch": 21.463768115942027, "grad_norm": 2.101410388946533, "learning_rate": 0.001, "loss": 1.9477, "step": 248808 }, { "epoch": 21.468599033816425, "grad_norm": 0.5928142070770264, "learning_rate": 0.001, "loss": 1.9579, "step": 248864 }, { "epoch": 21.47342995169082, "grad_norm": 0.8492199778556824, "learning_rate": 0.001, "loss": 1.9586, "step": 248920 }, { "epoch": 21.47826086956522, "grad_norm": 0.7929604053497314, "learning_rate": 0.001, "loss": 1.9696, "step": 248976 }, { "epoch": 21.483091787439612, "grad_norm": 0.4867192804813385, "learning_rate": 0.001, "loss": 1.9625, "step": 249032 }, { "epoch": 21.48792270531401, "grad_norm": 0.45051974058151245, "learning_rate": 0.001, "loss": 1.957, "step": 249088 }, { "epoch": 21.492753623188406, "grad_norm": 5.42129373550415, "learning_rate": 0.001, "loss": 1.9661, "step": 249144 }, { "epoch": 21.497584541062803, "grad_norm": 0.7890036702156067, "learning_rate": 0.001, "loss": 1.9513, "step": 249200 }, { "epoch": 21.502415458937197, "grad_norm": 1.4707787036895752, "learning_rate": 0.001, "loss": 1.9629, "step": 249256 }, { "epoch": 21.507246376811594, "grad_norm": 1.2665071487426758, "learning_rate": 0.001, "loss": 1.9633, "step": 249312 }, { "epoch": 21.51207729468599, "grad_norm": 0.975257158279419, "learning_rate": 0.001, "loss": 1.9693, "step": 249368 }, { "epoch": 21.516908212560388, "grad_norm": 0.6110181212425232, "learning_rate": 0.001, "loss": 1.9626, "step": 249424 }, { "epoch": 21.52173913043478, "grad_norm": 1.4485918283462524, "learning_rate": 0.001, "loss": 1.964, "step": 249480 }, { "epoch": 21.52657004830918, "grad_norm": 1.9796454906463623, "learning_rate": 0.001, "loss": 1.97, "step": 249536 }, { "epoch": 21.531400966183575, "grad_norm": 0.555401086807251, "learning_rate": 0.001, "loss": 1.9702, "step": 249592 }, { "epoch": 21.536231884057973, "grad_norm": 1.062963604927063, "learning_rate": 0.001, "loss": 1.9654, "step": 249648 }, { "epoch": 21.541062801932366, "grad_norm": 0.538298487663269, "learning_rate": 0.001, "loss": 1.9558, "step": 249704 }, { "epoch": 21.545893719806763, "grad_norm": 0.4900556802749634, "learning_rate": 0.001, "loss": 1.9608, "step": 249760 }, { "epoch": 21.55072463768116, "grad_norm": 0.5375021696090698, "learning_rate": 0.001, "loss": 1.9531, "step": 249816 }, { "epoch": 21.555555555555557, "grad_norm": 3.2594358921051025, "learning_rate": 0.001, "loss": 1.9561, "step": 249872 }, { "epoch": 21.56038647342995, "grad_norm": 1.2410610914230347, "learning_rate": 0.001, "loss": 1.966, "step": 249928 }, { "epoch": 21.565217391304348, "grad_norm": 1.7509667873382568, "learning_rate": 0.001, "loss": 1.9605, "step": 249984 }, { "epoch": 21.570048309178745, "grad_norm": 0.5602964162826538, "learning_rate": 0.001, "loss": 1.9669, "step": 250040 }, { "epoch": 21.57487922705314, "grad_norm": 0.9668024182319641, "learning_rate": 0.001, "loss": 1.9668, "step": 250096 }, { "epoch": 21.579710144927535, "grad_norm": 1.0046262741088867, "learning_rate": 0.001, "loss": 1.964, "step": 250152 }, { "epoch": 21.584541062801932, "grad_norm": 0.8888593912124634, "learning_rate": 0.001, "loss": 1.9575, "step": 250208 }, { "epoch": 21.58937198067633, "grad_norm": 0.644279956817627, "learning_rate": 0.001, "loss": 1.9601, "step": 250264 }, { "epoch": 21.594202898550726, "grad_norm": 0.8787930011749268, "learning_rate": 0.001, "loss": 1.9654, "step": 250320 }, { "epoch": 21.59903381642512, "grad_norm": 6.46877384185791, "learning_rate": 0.001, "loss": 1.9708, "step": 250376 }, { "epoch": 21.603864734299517, "grad_norm": 8.018998146057129, "learning_rate": 0.001, "loss": 1.958, "step": 250432 }, { "epoch": 21.608695652173914, "grad_norm": 0.4972819983959198, "learning_rate": 0.001, "loss": 1.9457, "step": 250488 }, { "epoch": 21.613526570048307, "grad_norm": 0.5577712059020996, "learning_rate": 0.001, "loss": 1.9596, "step": 250544 }, { "epoch": 21.618357487922705, "grad_norm": 0.8427213430404663, "learning_rate": 0.001, "loss": 1.955, "step": 250600 }, { "epoch": 21.6231884057971, "grad_norm": 0.662353515625, "learning_rate": 0.001, "loss": 1.9601, "step": 250656 }, { "epoch": 21.6280193236715, "grad_norm": 0.8701677322387695, "learning_rate": 0.001, "loss": 1.9579, "step": 250712 }, { "epoch": 21.632850241545892, "grad_norm": 0.6668132543563843, "learning_rate": 0.001, "loss": 1.9521, "step": 250768 }, { "epoch": 21.63768115942029, "grad_norm": 0.5875878930091858, "learning_rate": 0.001, "loss": 1.9385, "step": 250824 }, { "epoch": 21.642512077294686, "grad_norm": 1.148645281791687, "learning_rate": 0.001, "loss": 1.9497, "step": 250880 }, { "epoch": 21.647342995169083, "grad_norm": 0.7564665079116821, "learning_rate": 0.001, "loss": 1.96, "step": 250936 }, { "epoch": 21.652173913043477, "grad_norm": 2.311861276626587, "learning_rate": 0.001, "loss": 1.9512, "step": 250992 }, { "epoch": 21.657004830917874, "grad_norm": 1.6183760166168213, "learning_rate": 0.001, "loss": 1.9551, "step": 251048 }, { "epoch": 21.66183574879227, "grad_norm": 0.9762577414512634, "learning_rate": 0.001, "loss": 1.9565, "step": 251104 }, { "epoch": 21.666666666666668, "grad_norm": 0.5730143189430237, "learning_rate": 0.001, "loss": 1.9685, "step": 251160 }, { "epoch": 21.67149758454106, "grad_norm": 0.5153380632400513, "learning_rate": 0.001, "loss": 1.9565, "step": 251216 }, { "epoch": 21.67632850241546, "grad_norm": 0.9978387355804443, "learning_rate": 0.001, "loss": 1.9589, "step": 251272 }, { "epoch": 21.681159420289855, "grad_norm": 1.0492725372314453, "learning_rate": 0.001, "loss": 1.9601, "step": 251328 }, { "epoch": 21.685990338164252, "grad_norm": 0.42663252353668213, "learning_rate": 0.001, "loss": 1.9624, "step": 251384 }, { "epoch": 21.690821256038646, "grad_norm": 0.9065369367599487, "learning_rate": 0.001, "loss": 1.9574, "step": 251440 }, { "epoch": 21.695652173913043, "grad_norm": 0.8139636516571045, "learning_rate": 0.001, "loss": 1.9693, "step": 251496 }, { "epoch": 21.70048309178744, "grad_norm": 0.4350810647010803, "learning_rate": 0.001, "loss": 1.9842, "step": 251552 }, { "epoch": 21.705314009661837, "grad_norm": 0.7480289340019226, "learning_rate": 0.001, "loss": 1.9733, "step": 251608 }, { "epoch": 21.71014492753623, "grad_norm": 0.6209117770195007, "learning_rate": 0.001, "loss": 1.9764, "step": 251664 }, { "epoch": 21.714975845410628, "grad_norm": 1.1292963027954102, "learning_rate": 0.001, "loss": 1.9683, "step": 251720 }, { "epoch": 21.719806763285025, "grad_norm": 0.5321400165557861, "learning_rate": 0.001, "loss": 1.9562, "step": 251776 }, { "epoch": 21.72463768115942, "grad_norm": 1.6908754110336304, "learning_rate": 0.001, "loss": 1.9479, "step": 251832 }, { "epoch": 21.729468599033815, "grad_norm": 4.08009672164917, "learning_rate": 0.001, "loss": 1.9612, "step": 251888 }, { "epoch": 21.734299516908212, "grad_norm": 0.5517629384994507, "learning_rate": 0.001, "loss": 1.9542, "step": 251944 }, { "epoch": 21.73913043478261, "grad_norm": 0.5395469069480896, "learning_rate": 0.001, "loss": 1.9501, "step": 252000 }, { "epoch": 21.743961352657006, "grad_norm": 8.448577880859375, "learning_rate": 0.001, "loss": 1.9488, "step": 252056 }, { "epoch": 21.7487922705314, "grad_norm": 0.4472997784614563, "learning_rate": 0.001, "loss": 1.9465, "step": 252112 }, { "epoch": 21.753623188405797, "grad_norm": 0.576563835144043, "learning_rate": 0.001, "loss": 1.9431, "step": 252168 }, { "epoch": 21.758454106280194, "grad_norm": 0.43651366233825684, "learning_rate": 0.001, "loss": 1.938, "step": 252224 }, { "epoch": 21.76328502415459, "grad_norm": 0.3002920150756836, "learning_rate": 0.001, "loss": 1.9533, "step": 252280 }, { "epoch": 21.768115942028984, "grad_norm": 0.4790380001068115, "learning_rate": 0.001, "loss": 1.9446, "step": 252336 }, { "epoch": 21.77294685990338, "grad_norm": 1.0764104127883911, "learning_rate": 0.001, "loss": 1.9644, "step": 252392 }, { "epoch": 21.77777777777778, "grad_norm": 0.5034885406494141, "learning_rate": 0.001, "loss": 1.9535, "step": 252448 }, { "epoch": 21.782608695652176, "grad_norm": 0.7224101424217224, "learning_rate": 0.001, "loss": 1.9495, "step": 252504 }, { "epoch": 21.78743961352657, "grad_norm": 1.0009119510650635, "learning_rate": 0.001, "loss": 1.947, "step": 252560 }, { "epoch": 21.792270531400966, "grad_norm": 0.9348419308662415, "learning_rate": 0.001, "loss": 1.9512, "step": 252616 }, { "epoch": 21.797101449275363, "grad_norm": 10.893620491027832, "learning_rate": 0.001, "loss": 1.9488, "step": 252672 }, { "epoch": 21.80193236714976, "grad_norm": 1.8768951892852783, "learning_rate": 0.001, "loss": 1.9554, "step": 252728 }, { "epoch": 21.806763285024154, "grad_norm": 0.6475788354873657, "learning_rate": 0.001, "loss": 1.9549, "step": 252784 }, { "epoch": 21.81159420289855, "grad_norm": 0.5225663781166077, "learning_rate": 0.001, "loss": 1.954, "step": 252840 }, { "epoch": 21.816425120772948, "grad_norm": 0.8639033436775208, "learning_rate": 0.001, "loss": 1.9581, "step": 252896 }, { "epoch": 21.82125603864734, "grad_norm": 0.6297056674957275, "learning_rate": 0.001, "loss": 1.9534, "step": 252952 }, { "epoch": 21.82608695652174, "grad_norm": 0.7101808786392212, "learning_rate": 0.001, "loss": 1.9752, "step": 253008 }, { "epoch": 21.830917874396135, "grad_norm": 0.9261828660964966, "learning_rate": 0.001, "loss": 1.9769, "step": 253064 }, { "epoch": 21.835748792270532, "grad_norm": 0.5968183279037476, "learning_rate": 0.001, "loss": 1.9633, "step": 253120 }, { "epoch": 21.840579710144926, "grad_norm": 0.34197914600372314, "learning_rate": 0.001, "loss": 1.9627, "step": 253176 }, { "epoch": 21.845410628019323, "grad_norm": 0.5174090266227722, "learning_rate": 0.001, "loss": 1.9664, "step": 253232 }, { "epoch": 21.85024154589372, "grad_norm": 0.4941200017929077, "learning_rate": 0.001, "loss": 1.962, "step": 253288 }, { "epoch": 21.855072463768117, "grad_norm": 0.7915153503417969, "learning_rate": 0.001, "loss": 1.9569, "step": 253344 }, { "epoch": 21.85990338164251, "grad_norm": 1.2240104675292969, "learning_rate": 0.001, "loss": 1.952, "step": 253400 }, { "epoch": 21.864734299516908, "grad_norm": 0.4947909414768219, "learning_rate": 0.001, "loss": 1.9553, "step": 253456 }, { "epoch": 21.869565217391305, "grad_norm": 1.1203914880752563, "learning_rate": 0.001, "loss": 1.9581, "step": 253512 }, { "epoch": 21.8743961352657, "grad_norm": 0.45310690999031067, "learning_rate": 0.001, "loss": 1.9558, "step": 253568 }, { "epoch": 21.879227053140095, "grad_norm": 0.7869296073913574, "learning_rate": 0.001, "loss": 1.9506, "step": 253624 }, { "epoch": 21.884057971014492, "grad_norm": 0.7560972571372986, "learning_rate": 0.001, "loss": 1.9458, "step": 253680 }, { "epoch": 21.88888888888889, "grad_norm": 0.4116303026676178, "learning_rate": 0.001, "loss": 1.9573, "step": 253736 }, { "epoch": 21.893719806763286, "grad_norm": 0.6125767230987549, "learning_rate": 0.001, "loss": 1.9526, "step": 253792 }, { "epoch": 21.89855072463768, "grad_norm": 0.7014297246932983, "learning_rate": 0.001, "loss": 1.9817, "step": 253848 }, { "epoch": 21.903381642512077, "grad_norm": 0.861863374710083, "learning_rate": 0.001, "loss": 1.9777, "step": 253904 }, { "epoch": 21.908212560386474, "grad_norm": 0.4163946509361267, "learning_rate": 0.001, "loss": 1.965, "step": 253960 }, { "epoch": 21.91304347826087, "grad_norm": 1.5239109992980957, "learning_rate": 0.001, "loss": 1.9494, "step": 254016 }, { "epoch": 21.917874396135264, "grad_norm": 0.559363067150116, "learning_rate": 0.001, "loss": 1.9656, "step": 254072 }, { "epoch": 21.92270531400966, "grad_norm": 2.498509407043457, "learning_rate": 0.001, "loss": 1.9598, "step": 254128 }, { "epoch": 21.92753623188406, "grad_norm": 0.9224415421485901, "learning_rate": 0.001, "loss": 1.9677, "step": 254184 }, { "epoch": 21.932367149758456, "grad_norm": 6.18450403213501, "learning_rate": 0.001, "loss": 2.0191, "step": 254240 }, { "epoch": 21.93719806763285, "grad_norm": 2.443880319595337, "learning_rate": 0.001, "loss": 1.9948, "step": 254296 }, { "epoch": 21.942028985507246, "grad_norm": 0.8769632577896118, "learning_rate": 0.001, "loss": 1.986, "step": 254352 }, { "epoch": 21.946859903381643, "grad_norm": 1.0776855945587158, "learning_rate": 0.001, "loss": 1.9765, "step": 254408 }, { "epoch": 21.95169082125604, "grad_norm": 1.7792994976043701, "learning_rate": 0.001, "loss": 1.9878, "step": 254464 }, { "epoch": 21.956521739130434, "grad_norm": 2.6442644596099854, "learning_rate": 0.001, "loss": 1.9823, "step": 254520 }, { "epoch": 21.96135265700483, "grad_norm": 1.9945226907730103, "learning_rate": 0.001, "loss": 1.99, "step": 254576 }, { "epoch": 21.966183574879228, "grad_norm": 0.5213829278945923, "learning_rate": 0.001, "loss": 2.0043, "step": 254632 }, { "epoch": 21.971014492753625, "grad_norm": 0.6495558023452759, "learning_rate": 0.001, "loss": 1.9943, "step": 254688 }, { "epoch": 21.97584541062802, "grad_norm": 3.5094666481018066, "learning_rate": 0.001, "loss": 1.9883, "step": 254744 }, { "epoch": 21.980676328502415, "grad_norm": 0.7619023323059082, "learning_rate": 0.001, "loss": 2.0011, "step": 254800 }, { "epoch": 21.985507246376812, "grad_norm": 0.5413810610771179, "learning_rate": 0.001, "loss": 1.997, "step": 254856 }, { "epoch": 21.990338164251206, "grad_norm": 0.38880395889282227, "learning_rate": 0.001, "loss": 1.9819, "step": 254912 }, { "epoch": 21.995169082125603, "grad_norm": 0.6126607656478882, "learning_rate": 0.001, "loss": 1.9962, "step": 254968 }, { "epoch": 22.0, "grad_norm": 0.2876018285751343, "learning_rate": 0.001, "loss": 1.9782, "step": 255024 }, { "epoch": 22.004830917874397, "grad_norm": 1.6907565593719482, "learning_rate": 0.001, "loss": 1.9427, "step": 255080 }, { "epoch": 22.00966183574879, "grad_norm": 2.5868358612060547, "learning_rate": 0.001, "loss": 1.9524, "step": 255136 }, { "epoch": 22.014492753623188, "grad_norm": 0.373887836933136, "learning_rate": 0.001, "loss": 1.9462, "step": 255192 }, { "epoch": 22.019323671497585, "grad_norm": 0.6144969463348389, "learning_rate": 0.001, "loss": 1.9499, "step": 255248 }, { "epoch": 22.02415458937198, "grad_norm": 2.028465986251831, "learning_rate": 0.001, "loss": 1.9426, "step": 255304 }, { "epoch": 22.028985507246375, "grad_norm": 0.5481564998626709, "learning_rate": 0.001, "loss": 1.9414, "step": 255360 }, { "epoch": 22.033816425120772, "grad_norm": 0.5983251929283142, "learning_rate": 0.001, "loss": 1.9317, "step": 255416 }, { "epoch": 22.03864734299517, "grad_norm": 2.5769171714782715, "learning_rate": 0.001, "loss": 1.9324, "step": 255472 }, { "epoch": 22.043478260869566, "grad_norm": 1.0366284847259521, "learning_rate": 0.001, "loss": 1.9367, "step": 255528 }, { "epoch": 22.04830917874396, "grad_norm": 1.253514051437378, "learning_rate": 0.001, "loss": 1.9357, "step": 255584 }, { "epoch": 22.053140096618357, "grad_norm": 2.0568759441375732, "learning_rate": 0.001, "loss": 1.9293, "step": 255640 }, { "epoch": 22.057971014492754, "grad_norm": 0.457313597202301, "learning_rate": 0.001, "loss": 1.9292, "step": 255696 }, { "epoch": 22.06280193236715, "grad_norm": 1.003413200378418, "learning_rate": 0.001, "loss": 1.9358, "step": 255752 }, { "epoch": 22.067632850241544, "grad_norm": 1.9449431896209717, "learning_rate": 0.001, "loss": 1.9521, "step": 255808 }, { "epoch": 22.07246376811594, "grad_norm": 2.135148525238037, "learning_rate": 0.001, "loss": 1.944, "step": 255864 }, { "epoch": 22.07729468599034, "grad_norm": 1.2061859369277954, "learning_rate": 0.001, "loss": 1.9501, "step": 255920 }, { "epoch": 22.082125603864736, "grad_norm": 0.8198530673980713, "learning_rate": 0.001, "loss": 1.9509, "step": 255976 }, { "epoch": 22.08695652173913, "grad_norm": 1.3472731113433838, "learning_rate": 0.001, "loss": 1.9412, "step": 256032 }, { "epoch": 22.091787439613526, "grad_norm": 1.099510908126831, "learning_rate": 0.001, "loss": 1.9385, "step": 256088 }, { "epoch": 22.096618357487923, "grad_norm": 0.6818681955337524, "learning_rate": 0.001, "loss": 1.9387, "step": 256144 }, { "epoch": 22.10144927536232, "grad_norm": 1.3436921834945679, "learning_rate": 0.001, "loss": 1.9472, "step": 256200 }, { "epoch": 22.106280193236714, "grad_norm": 0.5090356469154358, "learning_rate": 0.001, "loss": 1.9653, "step": 256256 }, { "epoch": 22.11111111111111, "grad_norm": 0.6917729377746582, "learning_rate": 0.001, "loss": 1.9651, "step": 256312 }, { "epoch": 22.115942028985508, "grad_norm": 0.6725999712944031, "learning_rate": 0.001, "loss": 1.954, "step": 256368 }, { "epoch": 22.120772946859905, "grad_norm": 2.0450620651245117, "learning_rate": 0.001, "loss": 1.9479, "step": 256424 }, { "epoch": 22.1256038647343, "grad_norm": 1.9453144073486328, "learning_rate": 0.001, "loss": 1.9493, "step": 256480 }, { "epoch": 22.130434782608695, "grad_norm": 1.6416852474212646, "learning_rate": 0.001, "loss": 1.9627, "step": 256536 }, { "epoch": 22.135265700483092, "grad_norm": 3.5912067890167236, "learning_rate": 0.001, "loss": 1.973, "step": 256592 }, { "epoch": 22.14009661835749, "grad_norm": 0.9328228831291199, "learning_rate": 0.001, "loss": 1.9733, "step": 256648 }, { "epoch": 22.144927536231883, "grad_norm": 1.6226860284805298, "learning_rate": 0.001, "loss": 1.9663, "step": 256704 }, { "epoch": 22.14975845410628, "grad_norm": 9.201728820800781, "learning_rate": 0.001, "loss": 1.9643, "step": 256760 }, { "epoch": 22.154589371980677, "grad_norm": 0.9437302947044373, "learning_rate": 0.001, "loss": 1.9568, "step": 256816 }, { "epoch": 22.159420289855074, "grad_norm": 2.7350730895996094, "learning_rate": 0.001, "loss": 1.9589, "step": 256872 }, { "epoch": 22.164251207729468, "grad_norm": 3.4525701999664307, "learning_rate": 0.001, "loss": 1.95, "step": 256928 }, { "epoch": 22.169082125603865, "grad_norm": 0.9513868689537048, "learning_rate": 0.001, "loss": 1.9371, "step": 256984 }, { "epoch": 22.17391304347826, "grad_norm": 1.0627455711364746, "learning_rate": 0.001, "loss": 1.9447, "step": 257040 }, { "epoch": 22.17874396135266, "grad_norm": 2.0969691276550293, "learning_rate": 0.001, "loss": 1.9483, "step": 257096 }, { "epoch": 22.183574879227052, "grad_norm": 3.5297787189483643, "learning_rate": 0.001, "loss": 1.9683, "step": 257152 }, { "epoch": 22.18840579710145, "grad_norm": 2.0619828701019287, "learning_rate": 0.001, "loss": 1.9726, "step": 257208 }, { "epoch": 22.193236714975846, "grad_norm": 1.5788389444351196, "learning_rate": 0.001, "loss": 1.9595, "step": 257264 }, { "epoch": 22.19806763285024, "grad_norm": 2.6914665699005127, "learning_rate": 0.001, "loss": 1.9776, "step": 257320 }, { "epoch": 22.202898550724637, "grad_norm": 3.4024970531463623, "learning_rate": 0.001, "loss": 1.976, "step": 257376 }, { "epoch": 22.207729468599034, "grad_norm": 1.668730616569519, "learning_rate": 0.001, "loss": 1.9785, "step": 257432 }, { "epoch": 22.21256038647343, "grad_norm": 1.8232885599136353, "learning_rate": 0.001, "loss": 1.974, "step": 257488 }, { "epoch": 22.217391304347824, "grad_norm": 1.1327075958251953, "learning_rate": 0.001, "loss": 1.9736, "step": 257544 }, { "epoch": 22.22222222222222, "grad_norm": 1.664980173110962, "learning_rate": 0.001, "loss": 1.9579, "step": 257600 }, { "epoch": 22.22705314009662, "grad_norm": 2.429081678390503, "learning_rate": 0.001, "loss": 1.9636, "step": 257656 }, { "epoch": 22.231884057971016, "grad_norm": 2.0256426334381104, "learning_rate": 0.001, "loss": 1.9682, "step": 257712 }, { "epoch": 22.23671497584541, "grad_norm": 1.3792738914489746, "learning_rate": 0.001, "loss": 1.9635, "step": 257768 }, { "epoch": 22.241545893719806, "grad_norm": 0.6907938122749329, "learning_rate": 0.001, "loss": 1.9657, "step": 257824 }, { "epoch": 22.246376811594203, "grad_norm": 2.450981855392456, "learning_rate": 0.001, "loss": 1.9755, "step": 257880 }, { "epoch": 22.2512077294686, "grad_norm": 0.929191529750824, "learning_rate": 0.001, "loss": 1.9717, "step": 257936 }, { "epoch": 22.256038647342994, "grad_norm": 0.8927347660064697, "learning_rate": 0.001, "loss": 1.9687, "step": 257992 }, { "epoch": 22.26086956521739, "grad_norm": 1.0469293594360352, "learning_rate": 0.001, "loss": 1.9681, "step": 258048 }, { "epoch": 22.265700483091788, "grad_norm": 1.4159148931503296, "learning_rate": 0.001, "loss": 1.9693, "step": 258104 }, { "epoch": 22.270531400966185, "grad_norm": 2.8150036334991455, "learning_rate": 0.001, "loss": 1.9659, "step": 258160 }, { "epoch": 22.27536231884058, "grad_norm": 1.4112862348556519, "learning_rate": 0.001, "loss": 1.9685, "step": 258216 }, { "epoch": 22.280193236714975, "grad_norm": 1.6838390827178955, "learning_rate": 0.001, "loss": 1.9568, "step": 258272 }, { "epoch": 22.285024154589372, "grad_norm": 0.8449746370315552, "learning_rate": 0.001, "loss": 1.9663, "step": 258328 }, { "epoch": 22.28985507246377, "grad_norm": 1.397044062614441, "learning_rate": 0.001, "loss": 1.9595, "step": 258384 }, { "epoch": 22.294685990338163, "grad_norm": 0.6615034937858582, "learning_rate": 0.001, "loss": 1.9577, "step": 258440 }, { "epoch": 22.29951690821256, "grad_norm": 1.102160096168518, "learning_rate": 0.001, "loss": 1.9608, "step": 258496 }, { "epoch": 22.304347826086957, "grad_norm": 6.598901748657227, "learning_rate": 0.001, "loss": 1.971, "step": 258552 }, { "epoch": 22.309178743961354, "grad_norm": 0.7988231182098389, "learning_rate": 0.001, "loss": 1.973, "step": 258608 }, { "epoch": 22.314009661835748, "grad_norm": 1.2946897745132446, "learning_rate": 0.001, "loss": 1.9623, "step": 258664 }, { "epoch": 22.318840579710145, "grad_norm": 0.6789493560791016, "learning_rate": 0.001, "loss": 1.9583, "step": 258720 }, { "epoch": 22.32367149758454, "grad_norm": 1.1809608936309814, "learning_rate": 0.001, "loss": 1.9578, "step": 258776 }, { "epoch": 22.32850241545894, "grad_norm": 0.37459585070610046, "learning_rate": 0.001, "loss": 1.952, "step": 258832 }, { "epoch": 22.333333333333332, "grad_norm": 1.11725914478302, "learning_rate": 0.001, "loss": 1.957, "step": 258888 }, { "epoch": 22.33816425120773, "grad_norm": 1.155311942100525, "learning_rate": 0.001, "loss": 1.9567, "step": 258944 }, { "epoch": 22.342995169082126, "grad_norm": 0.4662001132965088, "learning_rate": 0.001, "loss": 1.9639, "step": 259000 }, { "epoch": 22.347826086956523, "grad_norm": 1.0789437294006348, "learning_rate": 0.001, "loss": 1.9549, "step": 259056 }, { "epoch": 22.352657004830917, "grad_norm": 0.4004863202571869, "learning_rate": 0.001, "loss": 1.9528, "step": 259112 }, { "epoch": 22.357487922705314, "grad_norm": 1.6848139762878418, "learning_rate": 0.001, "loss": 1.9478, "step": 259168 }, { "epoch": 22.36231884057971, "grad_norm": 0.5138862133026123, "learning_rate": 0.001, "loss": 1.943, "step": 259224 }, { "epoch": 22.367149758454108, "grad_norm": 0.48951512575149536, "learning_rate": 0.001, "loss": 1.9462, "step": 259280 }, { "epoch": 22.3719806763285, "grad_norm": 1.8375505208969116, "learning_rate": 0.001, "loss": 1.9468, "step": 259336 }, { "epoch": 22.3768115942029, "grad_norm": 3.8329737186431885, "learning_rate": 0.001, "loss": 1.9509, "step": 259392 }, { "epoch": 22.381642512077295, "grad_norm": 3.703517436981201, "learning_rate": 0.001, "loss": 1.9579, "step": 259448 }, { "epoch": 22.386473429951693, "grad_norm": 1.425474762916565, "learning_rate": 0.001, "loss": 1.9575, "step": 259504 }, { "epoch": 22.391304347826086, "grad_norm": 0.7898656129837036, "learning_rate": 0.001, "loss": 1.9562, "step": 259560 }, { "epoch": 22.396135265700483, "grad_norm": 0.5097714066505432, "learning_rate": 0.001, "loss": 1.9487, "step": 259616 }, { "epoch": 22.40096618357488, "grad_norm": 1.0579745769500732, "learning_rate": 0.001, "loss": 1.9608, "step": 259672 }, { "epoch": 22.405797101449274, "grad_norm": 2.4833602905273438, "learning_rate": 0.001, "loss": 1.9549, "step": 259728 }, { "epoch": 22.41062801932367, "grad_norm": 0.5513945817947388, "learning_rate": 0.001, "loss": 1.9462, "step": 259784 }, { "epoch": 22.415458937198068, "grad_norm": 0.6396842002868652, "learning_rate": 0.001, "loss": 1.9553, "step": 259840 }, { "epoch": 22.420289855072465, "grad_norm": 0.5756796002388, "learning_rate": 0.001, "loss": 1.9577, "step": 259896 }, { "epoch": 22.42512077294686, "grad_norm": 1.0407577753067017, "learning_rate": 0.001, "loss": 1.9584, "step": 259952 }, { "epoch": 22.429951690821255, "grad_norm": 0.8441076874732971, "learning_rate": 0.001, "loss": 1.9621, "step": 260008 }, { "epoch": 22.434782608695652, "grad_norm": 0.40860188007354736, "learning_rate": 0.001, "loss": 1.9587, "step": 260064 }, { "epoch": 22.43961352657005, "grad_norm": 1.200321912765503, "learning_rate": 0.001, "loss": 1.9453, "step": 260120 }, { "epoch": 22.444444444444443, "grad_norm": 1.1166417598724365, "learning_rate": 0.001, "loss": 1.9499, "step": 260176 }, { "epoch": 22.44927536231884, "grad_norm": 0.9163259863853455, "learning_rate": 0.001, "loss": 1.949, "step": 260232 }, { "epoch": 22.454106280193237, "grad_norm": 0.7720276713371277, "learning_rate": 0.001, "loss": 1.9493, "step": 260288 }, { "epoch": 22.458937198067634, "grad_norm": 1.336298942565918, "learning_rate": 0.001, "loss": 1.9481, "step": 260344 }, { "epoch": 22.463768115942027, "grad_norm": 1.046801209449768, "learning_rate": 0.001, "loss": 1.9402, "step": 260400 }, { "epoch": 22.468599033816425, "grad_norm": 0.36165645718574524, "learning_rate": 0.001, "loss": 1.9427, "step": 260456 }, { "epoch": 22.47342995169082, "grad_norm": 0.5344457626342773, "learning_rate": 0.001, "loss": 1.931, "step": 260512 }, { "epoch": 22.47826086956522, "grad_norm": 1.3417903184890747, "learning_rate": 0.001, "loss": 1.9395, "step": 260568 }, { "epoch": 22.483091787439612, "grad_norm": 2.8190696239471436, "learning_rate": 0.001, "loss": 1.9437, "step": 260624 }, { "epoch": 22.48792270531401, "grad_norm": 0.7186737656593323, "learning_rate": 0.001, "loss": 1.944, "step": 260680 }, { "epoch": 22.492753623188406, "grad_norm": 1.1330615282058716, "learning_rate": 0.001, "loss": 1.933, "step": 260736 }, { "epoch": 22.497584541062803, "grad_norm": 0.5884013772010803, "learning_rate": 0.001, "loss": 1.9414, "step": 260792 }, { "epoch": 22.502415458937197, "grad_norm": 1.282533884048462, "learning_rate": 0.001, "loss": 1.9434, "step": 260848 }, { "epoch": 22.507246376811594, "grad_norm": 0.92621248960495, "learning_rate": 0.001, "loss": 1.9455, "step": 260904 }, { "epoch": 22.51207729468599, "grad_norm": 0.4475570321083069, "learning_rate": 0.001, "loss": 1.9466, "step": 260960 }, { "epoch": 22.516908212560388, "grad_norm": 0.9518371224403381, "learning_rate": 0.001, "loss": 1.9419, "step": 261016 }, { "epoch": 22.52173913043478, "grad_norm": 0.4265706539154053, "learning_rate": 0.001, "loss": 1.9355, "step": 261072 }, { "epoch": 22.52657004830918, "grad_norm": 0.686202347278595, "learning_rate": 0.001, "loss": 1.9423, "step": 261128 }, { "epoch": 22.531400966183575, "grad_norm": 0.7299894690513611, "learning_rate": 0.001, "loss": 1.9279, "step": 261184 }, { "epoch": 22.536231884057973, "grad_norm": 2.3413619995117188, "learning_rate": 0.001, "loss": 1.9372, "step": 261240 }, { "epoch": 22.541062801932366, "grad_norm": 0.9178498983383179, "learning_rate": 0.001, "loss": 1.9369, "step": 261296 }, { "epoch": 22.545893719806763, "grad_norm": 0.8261169195175171, "learning_rate": 0.001, "loss": 1.9391, "step": 261352 }, { "epoch": 22.55072463768116, "grad_norm": 0.5099338293075562, "learning_rate": 0.001, "loss": 1.9392, "step": 261408 }, { "epoch": 22.555555555555557, "grad_norm": 0.6083644032478333, "learning_rate": 0.001, "loss": 1.9446, "step": 261464 }, { "epoch": 22.56038647342995, "grad_norm": 0.6935392022132874, "learning_rate": 0.001, "loss": 1.9526, "step": 261520 }, { "epoch": 22.565217391304348, "grad_norm": 1.1220214366912842, "learning_rate": 0.001, "loss": 1.947, "step": 261576 }, { "epoch": 22.570048309178745, "grad_norm": 0.49901720881462097, "learning_rate": 0.001, "loss": 1.9426, "step": 261632 }, { "epoch": 22.57487922705314, "grad_norm": 1.1224339008331299, "learning_rate": 0.001, "loss": 1.9494, "step": 261688 }, { "epoch": 22.579710144927535, "grad_norm": 1.5109951496124268, "learning_rate": 0.001, "loss": 1.9571, "step": 261744 }, { "epoch": 22.584541062801932, "grad_norm": 1.7177014350891113, "learning_rate": 0.001, "loss": 1.9471, "step": 261800 }, { "epoch": 22.58937198067633, "grad_norm": 0.31399255990982056, "learning_rate": 0.001, "loss": 1.9452, "step": 261856 }, { "epoch": 22.594202898550726, "grad_norm": 1.3104915618896484, "learning_rate": 0.001, "loss": 1.944, "step": 261912 }, { "epoch": 22.59903381642512, "grad_norm": 0.6545467972755432, "learning_rate": 0.001, "loss": 1.9369, "step": 261968 }, { "epoch": 22.603864734299517, "grad_norm": 0.49555808305740356, "learning_rate": 0.001, "loss": 1.9378, "step": 262024 }, { "epoch": 22.608695652173914, "grad_norm": 0.7857664227485657, "learning_rate": 0.001, "loss": 1.9496, "step": 262080 }, { "epoch": 22.613526570048307, "grad_norm": 0.35892102122306824, "learning_rate": 0.001, "loss": 1.951, "step": 262136 }, { "epoch": 22.618357487922705, "grad_norm": 0.6544508337974548, "learning_rate": 0.001, "loss": 1.9429, "step": 262192 }, { "epoch": 22.6231884057971, "grad_norm": 0.42215535044670105, "learning_rate": 0.001, "loss": 1.9331, "step": 262248 }, { "epoch": 22.6280193236715, "grad_norm": 1.4311772584915161, "learning_rate": 0.001, "loss": 1.9538, "step": 262304 }, { "epoch": 22.632850241545892, "grad_norm": 0.33909595012664795, "learning_rate": 0.001, "loss": 1.9479, "step": 262360 }, { "epoch": 22.63768115942029, "grad_norm": 0.6888494491577148, "learning_rate": 0.001, "loss": 1.9347, "step": 262416 }, { "epoch": 22.642512077294686, "grad_norm": 0.7251987457275391, "learning_rate": 0.001, "loss": 1.9536, "step": 262472 }, { "epoch": 22.647342995169083, "grad_norm": 0.98261958360672, "learning_rate": 0.001, "loss": 1.9581, "step": 262528 }, { "epoch": 22.652173913043477, "grad_norm": 0.4945251941680908, "learning_rate": 0.001, "loss": 1.9555, "step": 262584 }, { "epoch": 22.657004830917874, "grad_norm": 1.670192003250122, "learning_rate": 0.001, "loss": 1.9472, "step": 262640 }, { "epoch": 22.66183574879227, "grad_norm": 1.2051167488098145, "learning_rate": 0.001, "loss": 1.9592, "step": 262696 }, { "epoch": 22.666666666666668, "grad_norm": 2.4427123069763184, "learning_rate": 0.001, "loss": 1.9454, "step": 262752 }, { "epoch": 22.67149758454106, "grad_norm": 1.210001826286316, "learning_rate": 0.001, "loss": 1.9552, "step": 262808 }, { "epoch": 22.67632850241546, "grad_norm": 0.5402510166168213, "learning_rate": 0.001, "loss": 1.9516, "step": 262864 }, { "epoch": 22.681159420289855, "grad_norm": 1.3431830406188965, "learning_rate": 0.001, "loss": 1.9525, "step": 262920 }, { "epoch": 22.685990338164252, "grad_norm": 0.5810667872428894, "learning_rate": 0.001, "loss": 1.9515, "step": 262976 }, { "epoch": 22.690821256038646, "grad_norm": 2.991968870162964, "learning_rate": 0.001, "loss": 1.9561, "step": 263032 }, { "epoch": 22.695652173913043, "grad_norm": 2.019123077392578, "learning_rate": 0.001, "loss": 1.9539, "step": 263088 }, { "epoch": 22.70048309178744, "grad_norm": 4.0230865478515625, "learning_rate": 0.001, "loss": 1.9634, "step": 263144 }, { "epoch": 22.705314009661837, "grad_norm": 5.997570037841797, "learning_rate": 0.001, "loss": 1.9585, "step": 263200 }, { "epoch": 22.71014492753623, "grad_norm": 1.5734467506408691, "learning_rate": 0.001, "loss": 1.9598, "step": 263256 }, { "epoch": 22.714975845410628, "grad_norm": 1.5002771615982056, "learning_rate": 0.001, "loss": 1.955, "step": 263312 }, { "epoch": 22.719806763285025, "grad_norm": 1.072264313697815, "learning_rate": 0.001, "loss": 1.96, "step": 263368 }, { "epoch": 22.72463768115942, "grad_norm": 0.8158003687858582, "learning_rate": 0.001, "loss": 1.964, "step": 263424 }, { "epoch": 22.729468599033815, "grad_norm": 7.6841816902160645, "learning_rate": 0.001, "loss": 1.9657, "step": 263480 }, { "epoch": 22.734299516908212, "grad_norm": 0.35602399706840515, "learning_rate": 0.001, "loss": 1.9505, "step": 263536 }, { "epoch": 22.73913043478261, "grad_norm": 3.834961175918579, "learning_rate": 0.001, "loss": 1.9471, "step": 263592 }, { "epoch": 22.743961352657006, "grad_norm": 1.1503311395645142, "learning_rate": 0.001, "loss": 1.9422, "step": 263648 }, { "epoch": 22.7487922705314, "grad_norm": 2.0164794921875, "learning_rate": 0.001, "loss": 1.9425, "step": 263704 }, { "epoch": 22.753623188405797, "grad_norm": 0.3509223759174347, "learning_rate": 0.001, "loss": 1.9396, "step": 263760 }, { "epoch": 22.758454106280194, "grad_norm": 0.9490211009979248, "learning_rate": 0.001, "loss": 1.9367, "step": 263816 }, { "epoch": 22.76328502415459, "grad_norm": 0.7100268602371216, "learning_rate": 0.001, "loss": 1.9328, "step": 263872 }, { "epoch": 22.768115942028984, "grad_norm": 0.572968065738678, "learning_rate": 0.001, "loss": 1.937, "step": 263928 }, { "epoch": 22.77294685990338, "grad_norm": 0.5126324892044067, "learning_rate": 0.001, "loss": 1.9326, "step": 263984 }, { "epoch": 22.77777777777778, "grad_norm": 2.8755831718444824, "learning_rate": 0.001, "loss": 1.9236, "step": 264040 }, { "epoch": 22.782608695652176, "grad_norm": 0.2994726598262787, "learning_rate": 0.001, "loss": 1.9353, "step": 264096 }, { "epoch": 22.78743961352657, "grad_norm": 0.9157410264015198, "learning_rate": 0.001, "loss": 1.9348, "step": 264152 }, { "epoch": 22.792270531400966, "grad_norm": 0.5256150960922241, "learning_rate": 0.001, "loss": 1.9411, "step": 264208 }, { "epoch": 22.797101449275363, "grad_norm": 1.080043077468872, "learning_rate": 0.001, "loss": 1.9361, "step": 264264 }, { "epoch": 22.80193236714976, "grad_norm": 0.4238528311252594, "learning_rate": 0.001, "loss": 1.9336, "step": 264320 }, { "epoch": 22.806763285024154, "grad_norm": 0.33494430780410767, "learning_rate": 0.001, "loss": 1.9299, "step": 264376 }, { "epoch": 22.81159420289855, "grad_norm": 0.6373468637466431, "learning_rate": 0.001, "loss": 1.931, "step": 264432 }, { "epoch": 22.816425120772948, "grad_norm": 1.9185104370117188, "learning_rate": 0.001, "loss": 1.9452, "step": 264488 }, { "epoch": 22.82125603864734, "grad_norm": 4.179603099822998, "learning_rate": 0.001, "loss": 1.9352, "step": 264544 }, { "epoch": 22.82608695652174, "grad_norm": 16.9693660736084, "learning_rate": 0.001, "loss": 1.9481, "step": 264600 }, { "epoch": 22.830917874396135, "grad_norm": 0.431379497051239, "learning_rate": 0.001, "loss": 1.9401, "step": 264656 }, { "epoch": 22.835748792270532, "grad_norm": 0.48328131437301636, "learning_rate": 0.001, "loss": 1.9415, "step": 264712 }, { "epoch": 22.840579710144926, "grad_norm": 0.3476356267929077, "learning_rate": 0.001, "loss": 1.9343, "step": 264768 }, { "epoch": 22.845410628019323, "grad_norm": 1.6065945625305176, "learning_rate": 0.001, "loss": 1.9359, "step": 264824 }, { "epoch": 22.85024154589372, "grad_norm": 0.8579918742179871, "learning_rate": 0.001, "loss": 1.9473, "step": 264880 }, { "epoch": 22.855072463768117, "grad_norm": 0.678260326385498, "learning_rate": 0.001, "loss": 1.9437, "step": 264936 }, { "epoch": 22.85990338164251, "grad_norm": 1.4892468452453613, "learning_rate": 0.001, "loss": 1.9596, "step": 264992 }, { "epoch": 22.864734299516908, "grad_norm": 0.3541586995124817, "learning_rate": 0.001, "loss": 1.9522, "step": 265048 }, { "epoch": 22.869565217391305, "grad_norm": 0.328909695148468, "learning_rate": 0.001, "loss": 1.9413, "step": 265104 }, { "epoch": 22.8743961352657, "grad_norm": 0.42425069212913513, "learning_rate": 0.001, "loss": 1.9496, "step": 265160 }, { "epoch": 22.879227053140095, "grad_norm": 1.1150788068771362, "learning_rate": 0.001, "loss": 1.9402, "step": 265216 }, { "epoch": 22.884057971014492, "grad_norm": 1.2667944431304932, "learning_rate": 0.001, "loss": 1.9502, "step": 265272 }, { "epoch": 22.88888888888889, "grad_norm": 0.9590685963630676, "learning_rate": 0.001, "loss": 1.9565, "step": 265328 }, { "epoch": 22.893719806763286, "grad_norm": 0.48470574617385864, "learning_rate": 0.001, "loss": 1.9455, "step": 265384 }, { "epoch": 22.89855072463768, "grad_norm": 0.7624456286430359, "learning_rate": 0.001, "loss": 1.9391, "step": 265440 }, { "epoch": 22.903381642512077, "grad_norm": 0.44625183939933777, "learning_rate": 0.001, "loss": 1.9341, "step": 265496 }, { "epoch": 22.908212560386474, "grad_norm": 0.4900270700454712, "learning_rate": 0.001, "loss": 1.9433, "step": 265552 }, { "epoch": 22.91304347826087, "grad_norm": 0.409993052482605, "learning_rate": 0.001, "loss": 1.9526, "step": 265608 }, { "epoch": 22.917874396135264, "grad_norm": 1.2805964946746826, "learning_rate": 0.001, "loss": 1.942, "step": 265664 }, { "epoch": 22.92270531400966, "grad_norm": 0.45714086294174194, "learning_rate": 0.001, "loss": 1.9451, "step": 265720 }, { "epoch": 22.92753623188406, "grad_norm": 0.6697834134101868, "learning_rate": 0.001, "loss": 1.9438, "step": 265776 }, { "epoch": 22.932367149758456, "grad_norm": 1.9860825538635254, "learning_rate": 0.001, "loss": 1.9645, "step": 265832 }, { "epoch": 22.93719806763285, "grad_norm": 0.4236160218715668, "learning_rate": 0.001, "loss": 1.9564, "step": 265888 }, { "epoch": 22.942028985507246, "grad_norm": 11.387377738952637, "learning_rate": 0.001, "loss": 1.9652, "step": 265944 }, { "epoch": 22.946859903381643, "grad_norm": 6.489840507507324, "learning_rate": 0.001, "loss": 1.9927, "step": 266000 }, { "epoch": 22.95169082125604, "grad_norm": 1.2012877464294434, "learning_rate": 0.001, "loss": 1.9726, "step": 266056 }, { "epoch": 22.956521739130434, "grad_norm": 0.5947778820991516, "learning_rate": 0.001, "loss": 1.9632, "step": 266112 }, { "epoch": 22.96135265700483, "grad_norm": 0.6485669016838074, "learning_rate": 0.001, "loss": 1.9597, "step": 266168 }, { "epoch": 22.966183574879228, "grad_norm": 1.7881348133087158, "learning_rate": 0.001, "loss": 1.9629, "step": 266224 }, { "epoch": 22.971014492753625, "grad_norm": 0.38715559244155884, "learning_rate": 0.001, "loss": 1.9528, "step": 266280 }, { "epoch": 22.97584541062802, "grad_norm": 0.6476491689682007, "learning_rate": 0.001, "loss": 1.9587, "step": 266336 }, { "epoch": 22.980676328502415, "grad_norm": 0.9831525683403015, "learning_rate": 0.001, "loss": 1.9766, "step": 266392 }, { "epoch": 22.985507246376812, "grad_norm": 1.6372984647750854, "learning_rate": 0.001, "loss": 1.9687, "step": 266448 }, { "epoch": 22.990338164251206, "grad_norm": 1.201743483543396, "learning_rate": 0.001, "loss": 1.9591, "step": 266504 }, { "epoch": 22.995169082125603, "grad_norm": 0.8657007217407227, "learning_rate": 0.001, "loss": 1.9533, "step": 266560 }, { "epoch": 23.0, "grad_norm": 0.5309742093086243, "learning_rate": 0.001, "loss": 1.9551, "step": 266616 }, { "epoch": 23.004830917874397, "grad_norm": 0.44991835951805115, "learning_rate": 0.001, "loss": 1.9153, "step": 266672 }, { "epoch": 23.00966183574879, "grad_norm": 0.7238132357597351, "learning_rate": 0.001, "loss": 1.9129, "step": 266728 }, { "epoch": 23.014492753623188, "grad_norm": 0.6165079474449158, "learning_rate": 0.001, "loss": 1.9192, "step": 266784 }, { "epoch": 23.019323671497585, "grad_norm": 0.3864997625350952, "learning_rate": 0.001, "loss": 1.9213, "step": 266840 }, { "epoch": 23.02415458937198, "grad_norm": 2.08044171333313, "learning_rate": 0.001, "loss": 1.9079, "step": 266896 }, { "epoch": 23.028985507246375, "grad_norm": 0.41637352108955383, "learning_rate": 0.001, "loss": 1.9034, "step": 266952 }, { "epoch": 23.033816425120772, "grad_norm": 0.2871229648590088, "learning_rate": 0.001, "loss": 1.9032, "step": 267008 }, { "epoch": 23.03864734299517, "grad_norm": 0.48673805594444275, "learning_rate": 0.001, "loss": 1.9013, "step": 267064 }, { "epoch": 23.043478260869566, "grad_norm": 1.3410234451293945, "learning_rate": 0.001, "loss": 1.9032, "step": 267120 }, { "epoch": 23.04830917874396, "grad_norm": 0.5184250473976135, "learning_rate": 0.001, "loss": 1.9099, "step": 267176 }, { "epoch": 23.053140096618357, "grad_norm": 0.3497347831726074, "learning_rate": 0.001, "loss": 1.917, "step": 267232 }, { "epoch": 23.057971014492754, "grad_norm": 1.1664408445358276, "learning_rate": 0.001, "loss": 1.921, "step": 267288 }, { "epoch": 23.06280193236715, "grad_norm": 0.45426231622695923, "learning_rate": 0.001, "loss": 1.9101, "step": 267344 }, { "epoch": 23.067632850241544, "grad_norm": 0.833065927028656, "learning_rate": 0.001, "loss": 1.9098, "step": 267400 }, { "epoch": 23.07246376811594, "grad_norm": 0.6405877470970154, "learning_rate": 0.001, "loss": 1.9242, "step": 267456 }, { "epoch": 23.07729468599034, "grad_norm": 0.3535110652446747, "learning_rate": 0.001, "loss": 1.9143, "step": 267512 }, { "epoch": 23.082125603864736, "grad_norm": 0.7435158491134644, "learning_rate": 0.001, "loss": 1.9063, "step": 267568 }, { "epoch": 23.08695652173913, "grad_norm": 1.4694548845291138, "learning_rate": 0.001, "loss": 1.9115, "step": 267624 }, { "epoch": 23.091787439613526, "grad_norm": 0.5443825721740723, "learning_rate": 0.001, "loss": 1.9243, "step": 267680 }, { "epoch": 23.096618357487923, "grad_norm": 1.9281439781188965, "learning_rate": 0.001, "loss": 1.9108, "step": 267736 }, { "epoch": 23.10144927536232, "grad_norm": 3.498905658721924, "learning_rate": 0.001, "loss": 1.9149, "step": 267792 }, { "epoch": 23.106280193236714, "grad_norm": 0.4698292315006256, "learning_rate": 0.001, "loss": 1.9205, "step": 267848 }, { "epoch": 23.11111111111111, "grad_norm": 0.6591044664382935, "learning_rate": 0.001, "loss": 1.925, "step": 267904 }, { "epoch": 23.115942028985508, "grad_norm": 0.9701511859893799, "learning_rate": 0.001, "loss": 1.9264, "step": 267960 }, { "epoch": 23.120772946859905, "grad_norm": 1.7473798990249634, "learning_rate": 0.001, "loss": 1.9244, "step": 268016 }, { "epoch": 23.1256038647343, "grad_norm": 1.1335879564285278, "learning_rate": 0.001, "loss": 1.9253, "step": 268072 }, { "epoch": 23.130434782608695, "grad_norm": 0.6535305976867676, "learning_rate": 0.001, "loss": 1.9285, "step": 268128 }, { "epoch": 23.135265700483092, "grad_norm": 0.9533494710922241, "learning_rate": 0.001, "loss": 1.9293, "step": 268184 }, { "epoch": 23.14009661835749, "grad_norm": 0.6012714505195618, "learning_rate": 0.001, "loss": 1.9458, "step": 268240 }, { "epoch": 23.144927536231883, "grad_norm": 0.97189861536026, "learning_rate": 0.001, "loss": 1.9405, "step": 268296 }, { "epoch": 23.14975845410628, "grad_norm": 0.41616836190223694, "learning_rate": 0.001, "loss": 1.9363, "step": 268352 }, { "epoch": 23.154589371980677, "grad_norm": 1.1855220794677734, "learning_rate": 0.001, "loss": 1.9197, "step": 268408 }, { "epoch": 23.159420289855074, "grad_norm": 0.9708030223846436, "learning_rate": 0.001, "loss": 1.9225, "step": 268464 }, { "epoch": 23.164251207729468, "grad_norm": 0.7180496454238892, "learning_rate": 0.001, "loss": 1.9173, "step": 268520 }, { "epoch": 23.169082125603865, "grad_norm": 0.44036784768104553, "learning_rate": 0.001, "loss": 1.9196, "step": 268576 }, { "epoch": 23.17391304347826, "grad_norm": 0.8156594038009644, "learning_rate": 0.001, "loss": 1.9118, "step": 268632 }, { "epoch": 23.17874396135266, "grad_norm": 0.5292342901229858, "learning_rate": 0.001, "loss": 1.9125, "step": 268688 }, { "epoch": 23.183574879227052, "grad_norm": 0.8278608918190002, "learning_rate": 0.001, "loss": 1.9213, "step": 268744 }, { "epoch": 23.18840579710145, "grad_norm": 1.8440238237380981, "learning_rate": 0.001, "loss": 1.9168, "step": 268800 }, { "epoch": 23.193236714975846, "grad_norm": 1.2640239000320435, "learning_rate": 0.001, "loss": 1.9111, "step": 268856 }, { "epoch": 23.19806763285024, "grad_norm": 1.6505805253982544, "learning_rate": 0.001, "loss": 1.9142, "step": 268912 }, { "epoch": 23.202898550724637, "grad_norm": 0.7507835626602173, "learning_rate": 0.001, "loss": 1.916, "step": 268968 }, { "epoch": 23.207729468599034, "grad_norm": 0.6968690752983093, "learning_rate": 0.001, "loss": 1.9077, "step": 269024 }, { "epoch": 23.21256038647343, "grad_norm": 0.44710081815719604, "learning_rate": 0.001, "loss": 1.9208, "step": 269080 }, { "epoch": 23.217391304347824, "grad_norm": 2.3278090953826904, "learning_rate": 0.001, "loss": 1.9082, "step": 269136 }, { "epoch": 23.22222222222222, "grad_norm": 0.7018927335739136, "learning_rate": 0.001, "loss": 1.9075, "step": 269192 }, { "epoch": 23.22705314009662, "grad_norm": 0.6166730523109436, "learning_rate": 0.001, "loss": 1.9133, "step": 269248 }, { "epoch": 23.231884057971016, "grad_norm": 2.4484236240386963, "learning_rate": 0.001, "loss": 1.9207, "step": 269304 }, { "epoch": 23.23671497584541, "grad_norm": 0.44285163283348083, "learning_rate": 0.001, "loss": 1.9267, "step": 269360 }, { "epoch": 23.241545893719806, "grad_norm": 0.5581911206245422, "learning_rate": 0.001, "loss": 1.9172, "step": 269416 }, { "epoch": 23.246376811594203, "grad_norm": 1.135427474975586, "learning_rate": 0.001, "loss": 1.9268, "step": 269472 }, { "epoch": 23.2512077294686, "grad_norm": 0.9358544945716858, "learning_rate": 0.001, "loss": 1.926, "step": 269528 }, { "epoch": 23.256038647342994, "grad_norm": 0.7905175089836121, "learning_rate": 0.001, "loss": 1.913, "step": 269584 }, { "epoch": 23.26086956521739, "grad_norm": 1.0659804344177246, "learning_rate": 0.001, "loss": 1.9087, "step": 269640 }, { "epoch": 23.265700483091788, "grad_norm": 0.39708977937698364, "learning_rate": 0.001, "loss": 1.9268, "step": 269696 }, { "epoch": 23.270531400966185, "grad_norm": 2.2125682830810547, "learning_rate": 0.001, "loss": 1.9285, "step": 269752 }, { "epoch": 23.27536231884058, "grad_norm": 0.63042813539505, "learning_rate": 0.001, "loss": 1.9315, "step": 269808 }, { "epoch": 23.280193236714975, "grad_norm": 0.5057905912399292, "learning_rate": 0.001, "loss": 1.9136, "step": 269864 }, { "epoch": 23.285024154589372, "grad_norm": 0.6847552061080933, "learning_rate": 0.001, "loss": 1.9216, "step": 269920 }, { "epoch": 23.28985507246377, "grad_norm": 0.7906085848808289, "learning_rate": 0.001, "loss": 1.9179, "step": 269976 }, { "epoch": 23.294685990338163, "grad_norm": 0.6572842597961426, "learning_rate": 0.001, "loss": 1.925, "step": 270032 }, { "epoch": 23.29951690821256, "grad_norm": 2.6821482181549072, "learning_rate": 0.001, "loss": 1.9338, "step": 270088 }, { "epoch": 23.304347826086957, "grad_norm": 16.653940200805664, "learning_rate": 0.001, "loss": 1.938, "step": 270144 }, { "epoch": 23.309178743961354, "grad_norm": 1.4522290229797363, "learning_rate": 0.001, "loss": 1.9419, "step": 270200 }, { "epoch": 23.314009661835748, "grad_norm": 2.248033046722412, "learning_rate": 0.001, "loss": 1.9318, "step": 270256 }, { "epoch": 23.318840579710145, "grad_norm": 2.474079132080078, "learning_rate": 0.001, "loss": 1.9322, "step": 270312 }, { "epoch": 23.32367149758454, "grad_norm": 0.5638799667358398, "learning_rate": 0.001, "loss": 1.9458, "step": 270368 }, { "epoch": 23.32850241545894, "grad_norm": 0.9347525835037231, "learning_rate": 0.001, "loss": 1.9452, "step": 270424 }, { "epoch": 23.333333333333332, "grad_norm": 0.6353224515914917, "learning_rate": 0.001, "loss": 1.9473, "step": 270480 }, { "epoch": 23.33816425120773, "grad_norm": 13.776043891906738, "learning_rate": 0.001, "loss": 1.9432, "step": 270536 }, { "epoch": 23.342995169082126, "grad_norm": 0.9044269919395447, "learning_rate": 0.001, "loss": 1.9396, "step": 270592 }, { "epoch": 23.347826086956523, "grad_norm": 0.5013958215713501, "learning_rate": 0.001, "loss": 1.9478, "step": 270648 }, { "epoch": 23.352657004830917, "grad_norm": 1.413511037826538, "learning_rate": 0.001, "loss": 1.9481, "step": 270704 }, { "epoch": 23.357487922705314, "grad_norm": 1.0283443927764893, "learning_rate": 0.001, "loss": 1.9455, "step": 270760 }, { "epoch": 23.36231884057971, "grad_norm": 0.4326821565628052, "learning_rate": 0.001, "loss": 1.9305, "step": 270816 }, { "epoch": 23.367149758454108, "grad_norm": 0.513903021812439, "learning_rate": 0.001, "loss": 1.9313, "step": 270872 }, { "epoch": 23.3719806763285, "grad_norm": 1.6875108480453491, "learning_rate": 0.001, "loss": 1.9228, "step": 270928 }, { "epoch": 23.3768115942029, "grad_norm": 0.6795575618743896, "learning_rate": 0.001, "loss": 1.9263, "step": 270984 }, { "epoch": 23.381642512077295, "grad_norm": 0.40820255875587463, "learning_rate": 0.001, "loss": 1.9278, "step": 271040 }, { "epoch": 23.386473429951693, "grad_norm": 0.39137184619903564, "learning_rate": 0.001, "loss": 1.935, "step": 271096 }, { "epoch": 23.391304347826086, "grad_norm": 0.9427878856658936, "learning_rate": 0.001, "loss": 1.9358, "step": 271152 }, { "epoch": 23.396135265700483, "grad_norm": 0.803469717502594, "learning_rate": 0.001, "loss": 1.933, "step": 271208 }, { "epoch": 23.40096618357488, "grad_norm": 0.5574473142623901, "learning_rate": 0.001, "loss": 1.9374, "step": 271264 }, { "epoch": 23.405797101449274, "grad_norm": 0.9779496788978577, "learning_rate": 0.001, "loss": 1.9358, "step": 271320 }, { "epoch": 23.41062801932367, "grad_norm": 0.9210959076881409, "learning_rate": 0.001, "loss": 1.9385, "step": 271376 }, { "epoch": 23.415458937198068, "grad_norm": 1.215304970741272, "learning_rate": 0.001, "loss": 1.9464, "step": 271432 }, { "epoch": 23.420289855072465, "grad_norm": 1.754247784614563, "learning_rate": 0.001, "loss": 1.9331, "step": 271488 }, { "epoch": 23.42512077294686, "grad_norm": 0.5789891481399536, "learning_rate": 0.001, "loss": 1.9337, "step": 271544 }, { "epoch": 23.429951690821255, "grad_norm": 3.837973117828369, "learning_rate": 0.001, "loss": 1.9241, "step": 271600 }, { "epoch": 23.434782608695652, "grad_norm": 0.5541539192199707, "learning_rate": 0.001, "loss": 1.9251, "step": 271656 }, { "epoch": 23.43961352657005, "grad_norm": 0.6401068568229675, "learning_rate": 0.001, "loss": 1.933, "step": 271712 }, { "epoch": 23.444444444444443, "grad_norm": 4.834632873535156, "learning_rate": 0.001, "loss": 1.9261, "step": 271768 }, { "epoch": 23.44927536231884, "grad_norm": 0.6982259750366211, "learning_rate": 0.001, "loss": 1.9233, "step": 271824 }, { "epoch": 23.454106280193237, "grad_norm": 0.6482769846916199, "learning_rate": 0.001, "loss": 1.9293, "step": 271880 }, { "epoch": 23.458937198067634, "grad_norm": 0.29602423310279846, "learning_rate": 0.001, "loss": 1.936, "step": 271936 }, { "epoch": 23.463768115942027, "grad_norm": 0.7366529107093811, "learning_rate": 0.001, "loss": 1.926, "step": 271992 }, { "epoch": 23.468599033816425, "grad_norm": 0.7094994187355042, "learning_rate": 0.001, "loss": 1.9302, "step": 272048 }, { "epoch": 23.47342995169082, "grad_norm": 0.3691755533218384, "learning_rate": 0.001, "loss": 1.9278, "step": 272104 }, { "epoch": 23.47826086956522, "grad_norm": 0.6301421523094177, "learning_rate": 0.001, "loss": 1.9332, "step": 272160 }, { "epoch": 23.483091787439612, "grad_norm": 1.0800213813781738, "learning_rate": 0.001, "loss": 1.9457, "step": 272216 }, { "epoch": 23.48792270531401, "grad_norm": 1.2811214923858643, "learning_rate": 0.001, "loss": 1.9453, "step": 272272 }, { "epoch": 23.492753623188406, "grad_norm": 1.2232860326766968, "learning_rate": 0.001, "loss": 1.94, "step": 272328 }, { "epoch": 23.497584541062803, "grad_norm": 0.7985347509384155, "learning_rate": 0.001, "loss": 1.9425, "step": 272384 }, { "epoch": 23.502415458937197, "grad_norm": 1.2063349485397339, "learning_rate": 0.001, "loss": 1.9311, "step": 272440 }, { "epoch": 23.507246376811594, "grad_norm": 0.3971107304096222, "learning_rate": 0.001, "loss": 1.9282, "step": 272496 }, { "epoch": 23.51207729468599, "grad_norm": 0.5735574960708618, "learning_rate": 0.001, "loss": 1.9155, "step": 272552 }, { "epoch": 23.516908212560388, "grad_norm": 0.613368570804596, "learning_rate": 0.001, "loss": 1.9353, "step": 272608 }, { "epoch": 23.52173913043478, "grad_norm": 0.3817559778690338, "learning_rate": 0.001, "loss": 1.9254, "step": 272664 }, { "epoch": 23.52657004830918, "grad_norm": 2.759641170501709, "learning_rate": 0.001, "loss": 1.9198, "step": 272720 }, { "epoch": 23.531400966183575, "grad_norm": 2.7991578578948975, "learning_rate": 0.001, "loss": 1.934, "step": 272776 }, { "epoch": 23.536231884057973, "grad_norm": 0.6614062190055847, "learning_rate": 0.001, "loss": 1.9448, "step": 272832 }, { "epoch": 23.541062801932366, "grad_norm": 1.376397967338562, "learning_rate": 0.001, "loss": 1.955, "step": 272888 }, { "epoch": 23.545893719806763, "grad_norm": 0.43629536032676697, "learning_rate": 0.001, "loss": 1.9366, "step": 272944 }, { "epoch": 23.55072463768116, "grad_norm": 0.5023860335350037, "learning_rate": 0.001, "loss": 1.9353, "step": 273000 }, { "epoch": 23.555555555555557, "grad_norm": 0.6915550231933594, "learning_rate": 0.001, "loss": 1.9348, "step": 273056 }, { "epoch": 23.56038647342995, "grad_norm": 1.468752145767212, "learning_rate": 0.001, "loss": 1.9475, "step": 273112 }, { "epoch": 23.565217391304348, "grad_norm": 1.9340236186981201, "learning_rate": 0.001, "loss": 1.9348, "step": 273168 }, { "epoch": 23.570048309178745, "grad_norm": 0.5305664539337158, "learning_rate": 0.001, "loss": 1.9357, "step": 273224 }, { "epoch": 23.57487922705314, "grad_norm": 0.859559953212738, "learning_rate": 0.001, "loss": 1.9352, "step": 273280 }, { "epoch": 23.579710144927535, "grad_norm": 0.8822153210639954, "learning_rate": 0.001, "loss": 1.9334, "step": 273336 }, { "epoch": 23.584541062801932, "grad_norm": 0.6296571493148804, "learning_rate": 0.001, "loss": 1.9437, "step": 273392 }, { "epoch": 23.58937198067633, "grad_norm": 1.165420413017273, "learning_rate": 0.001, "loss": 1.9532, "step": 273448 }, { "epoch": 23.594202898550726, "grad_norm": 0.3977389335632324, "learning_rate": 0.001, "loss": 1.9552, "step": 273504 }, { "epoch": 23.59903381642512, "grad_norm": 1.1903618574142456, "learning_rate": 0.001, "loss": 1.9627, "step": 273560 }, { "epoch": 23.603864734299517, "grad_norm": 1.353217363357544, "learning_rate": 0.001, "loss": 1.9471, "step": 273616 }, { "epoch": 23.608695652173914, "grad_norm": 1.2180839776992798, "learning_rate": 0.001, "loss": 1.9364, "step": 273672 }, { "epoch": 23.613526570048307, "grad_norm": 0.4751861095428467, "learning_rate": 0.001, "loss": 1.93, "step": 273728 }, { "epoch": 23.618357487922705, "grad_norm": 0.9146961569786072, "learning_rate": 0.001, "loss": 1.932, "step": 273784 }, { "epoch": 23.6231884057971, "grad_norm": 0.5517374873161316, "learning_rate": 0.001, "loss": 1.9304, "step": 273840 }, { "epoch": 23.6280193236715, "grad_norm": 2.276715040206909, "learning_rate": 0.001, "loss": 1.936, "step": 273896 }, { "epoch": 23.632850241545892, "grad_norm": 0.6137073636054993, "learning_rate": 0.001, "loss": 1.9293, "step": 273952 }, { "epoch": 23.63768115942029, "grad_norm": 0.6306750774383545, "learning_rate": 0.001, "loss": 1.9159, "step": 274008 }, { "epoch": 23.642512077294686, "grad_norm": 1.0202962160110474, "learning_rate": 0.001, "loss": 1.9202, "step": 274064 }, { "epoch": 23.647342995169083, "grad_norm": 0.8184786438941956, "learning_rate": 0.001, "loss": 1.92, "step": 274120 }, { "epoch": 23.652173913043477, "grad_norm": 0.5771294832229614, "learning_rate": 0.001, "loss": 1.921, "step": 274176 }, { "epoch": 23.657004830917874, "grad_norm": 0.999147355556488, "learning_rate": 0.001, "loss": 1.9385, "step": 274232 }, { "epoch": 23.66183574879227, "grad_norm": 1.0157830715179443, "learning_rate": 0.001, "loss": 1.9354, "step": 274288 }, { "epoch": 23.666666666666668, "grad_norm": 1.5097806453704834, "learning_rate": 0.001, "loss": 1.9556, "step": 274344 }, { "epoch": 23.67149758454106, "grad_norm": 0.7526201009750366, "learning_rate": 0.001, "loss": 1.9477, "step": 274400 }, { "epoch": 23.67632850241546, "grad_norm": 2.9513816833496094, "learning_rate": 0.001, "loss": 1.9335, "step": 274456 }, { "epoch": 23.681159420289855, "grad_norm": 1.3187352418899536, "learning_rate": 0.001, "loss": 1.9352, "step": 274512 }, { "epoch": 23.685990338164252, "grad_norm": 1.5674198865890503, "learning_rate": 0.001, "loss": 1.9372, "step": 274568 }, { "epoch": 23.690821256038646, "grad_norm": 2.2266364097595215, "learning_rate": 0.001, "loss": 1.9383, "step": 274624 }, { "epoch": 23.695652173913043, "grad_norm": 1.3555537462234497, "learning_rate": 0.001, "loss": 1.9405, "step": 274680 }, { "epoch": 23.70048309178744, "grad_norm": 1.062801480293274, "learning_rate": 0.001, "loss": 1.9376, "step": 274736 }, { "epoch": 23.705314009661837, "grad_norm": 1.090857744216919, "learning_rate": 0.001, "loss": 1.933, "step": 274792 }, { "epoch": 23.71014492753623, "grad_norm": 1.7963591814041138, "learning_rate": 0.001, "loss": 1.9413, "step": 274848 }, { "epoch": 23.714975845410628, "grad_norm": 0.6484135389328003, "learning_rate": 0.001, "loss": 1.9368, "step": 274904 }, { "epoch": 23.719806763285025, "grad_norm": 0.6178915500640869, "learning_rate": 0.001, "loss": 1.9415, "step": 274960 }, { "epoch": 23.72463768115942, "grad_norm": 1.8334332704544067, "learning_rate": 0.001, "loss": 1.934, "step": 275016 }, { "epoch": 23.729468599033815, "grad_norm": 1.631144404411316, "learning_rate": 0.001, "loss": 1.9416, "step": 275072 }, { "epoch": 23.734299516908212, "grad_norm": 0.9070121049880981, "learning_rate": 0.001, "loss": 1.9406, "step": 275128 }, { "epoch": 23.73913043478261, "grad_norm": 0.8993938565254211, "learning_rate": 0.001, "loss": 1.9447, "step": 275184 }, { "epoch": 23.743961352657006, "grad_norm": 1.5650254487991333, "learning_rate": 0.001, "loss": 1.938, "step": 275240 }, { "epoch": 23.7487922705314, "grad_norm": 2.48445463180542, "learning_rate": 0.001, "loss": 1.9383, "step": 275296 }, { "epoch": 23.753623188405797, "grad_norm": 0.9374467134475708, "learning_rate": 0.001, "loss": 1.9403, "step": 275352 }, { "epoch": 23.758454106280194, "grad_norm": 0.7290564775466919, "learning_rate": 0.001, "loss": 1.931, "step": 275408 }, { "epoch": 23.76328502415459, "grad_norm": 1.9603421688079834, "learning_rate": 0.001, "loss": 1.9355, "step": 275464 }, { "epoch": 23.768115942028984, "grad_norm": 0.5246350169181824, "learning_rate": 0.001, "loss": 1.9287, "step": 275520 }, { "epoch": 23.77294685990338, "grad_norm": 0.40989193320274353, "learning_rate": 0.001, "loss": 1.9327, "step": 275576 }, { "epoch": 23.77777777777778, "grad_norm": 0.46656450629234314, "learning_rate": 0.001, "loss": 1.928, "step": 275632 }, { "epoch": 23.782608695652176, "grad_norm": 1.1532413959503174, "learning_rate": 0.001, "loss": 1.9357, "step": 275688 }, { "epoch": 23.78743961352657, "grad_norm": 0.6365094780921936, "learning_rate": 0.001, "loss": 1.9278, "step": 275744 }, { "epoch": 23.792270531400966, "grad_norm": 1.0296921730041504, "learning_rate": 0.001, "loss": 1.9241, "step": 275800 }, { "epoch": 23.797101449275363, "grad_norm": 0.5454058647155762, "learning_rate": 0.001, "loss": 1.9259, "step": 275856 }, { "epoch": 23.80193236714976, "grad_norm": 0.7904008030891418, "learning_rate": 0.001, "loss": 1.9276, "step": 275912 }, { "epoch": 23.806763285024154, "grad_norm": 1.605818510055542, "learning_rate": 0.001, "loss": 1.9365, "step": 275968 }, { "epoch": 23.81159420289855, "grad_norm": 1.0176571607589722, "learning_rate": 0.001, "loss": 1.9354, "step": 276024 }, { "epoch": 23.816425120772948, "grad_norm": 2.5698883533477783, "learning_rate": 0.001, "loss": 1.9346, "step": 276080 }, { "epoch": 23.82125603864734, "grad_norm": 0.46160316467285156, "learning_rate": 0.001, "loss": 1.9415, "step": 276136 }, { "epoch": 23.82608695652174, "grad_norm": 0.6733005046844482, "learning_rate": 0.001, "loss": 1.9211, "step": 276192 }, { "epoch": 23.830917874396135, "grad_norm": 0.961740255355835, "learning_rate": 0.001, "loss": 1.9296, "step": 276248 }, { "epoch": 23.835748792270532, "grad_norm": 2.839693069458008, "learning_rate": 0.001, "loss": 1.9241, "step": 276304 }, { "epoch": 23.840579710144926, "grad_norm": 0.9255725145339966, "learning_rate": 0.001, "loss": 1.9231, "step": 276360 }, { "epoch": 23.845410628019323, "grad_norm": 1.6812142133712769, "learning_rate": 0.001, "loss": 1.9349, "step": 276416 }, { "epoch": 23.85024154589372, "grad_norm": 0.38448408246040344, "learning_rate": 0.001, "loss": 1.9341, "step": 276472 }, { "epoch": 23.855072463768117, "grad_norm": 1.389699935913086, "learning_rate": 0.001, "loss": 1.9398, "step": 276528 }, { "epoch": 23.85990338164251, "grad_norm": 1.6831470727920532, "learning_rate": 0.001, "loss": 1.9359, "step": 276584 }, { "epoch": 23.864734299516908, "grad_norm": 0.9034839868545532, "learning_rate": 0.001, "loss": 1.9394, "step": 276640 }, { "epoch": 23.869565217391305, "grad_norm": 3.222108840942383, "learning_rate": 0.001, "loss": 1.9352, "step": 276696 }, { "epoch": 23.8743961352657, "grad_norm": 1.3451405763626099, "learning_rate": 0.001, "loss": 1.9248, "step": 276752 }, { "epoch": 23.879227053140095, "grad_norm": 0.7348869442939758, "learning_rate": 0.001, "loss": 1.9359, "step": 276808 }, { "epoch": 23.884057971014492, "grad_norm": 0.6763084530830383, "learning_rate": 0.001, "loss": 1.9272, "step": 276864 }, { "epoch": 23.88888888888889, "grad_norm": 1.1968436241149902, "learning_rate": 0.001, "loss": 1.9454, "step": 276920 }, { "epoch": 23.893719806763286, "grad_norm": 1.2304675579071045, "learning_rate": 0.001, "loss": 1.9486, "step": 276976 }, { "epoch": 23.89855072463768, "grad_norm": 1.362490177154541, "learning_rate": 0.001, "loss": 1.9645, "step": 277032 }, { "epoch": 23.903381642512077, "grad_norm": 1.351412296295166, "learning_rate": 0.001, "loss": 1.9602, "step": 277088 }, { "epoch": 23.908212560386474, "grad_norm": 1.486250400543213, "learning_rate": 0.001, "loss": 1.9793, "step": 277144 }, { "epoch": 23.91304347826087, "grad_norm": 0.759766161441803, "learning_rate": 0.001, "loss": 1.9803, "step": 277200 }, { "epoch": 23.917874396135264, "grad_norm": 1.292884111404419, "learning_rate": 0.001, "loss": 1.9625, "step": 277256 }, { "epoch": 23.92270531400966, "grad_norm": 0.5014551281929016, "learning_rate": 0.001, "loss": 1.9593, "step": 277312 }, { "epoch": 23.92753623188406, "grad_norm": 0.46127235889434814, "learning_rate": 0.001, "loss": 1.9588, "step": 277368 }, { "epoch": 23.932367149758456, "grad_norm": 2.2300260066986084, "learning_rate": 0.001, "loss": 1.944, "step": 277424 }, { "epoch": 23.93719806763285, "grad_norm": 1.0253379344940186, "learning_rate": 0.001, "loss": 1.9501, "step": 277480 }, { "epoch": 23.942028985507246, "grad_norm": 0.87002032995224, "learning_rate": 0.001, "loss": 1.9409, "step": 277536 }, { "epoch": 23.946859903381643, "grad_norm": 1.2491779327392578, "learning_rate": 0.001, "loss": 1.9522, "step": 277592 }, { "epoch": 23.95169082125604, "grad_norm": 0.5064098238945007, "learning_rate": 0.001, "loss": 1.9595, "step": 277648 }, { "epoch": 23.956521739130434, "grad_norm": 1.01318359375, "learning_rate": 0.001, "loss": 1.9589, "step": 277704 }, { "epoch": 23.96135265700483, "grad_norm": 0.4085215628147125, "learning_rate": 0.001, "loss": 1.952, "step": 277760 }, { "epoch": 23.966183574879228, "grad_norm": 3.1914901733398438, "learning_rate": 0.001, "loss": 1.9466, "step": 277816 }, { "epoch": 23.971014492753625, "grad_norm": 1.040095567703247, "learning_rate": 0.001, "loss": 1.937, "step": 277872 }, { "epoch": 23.97584541062802, "grad_norm": 0.6660473942756653, "learning_rate": 0.001, "loss": 1.9384, "step": 277928 }, { "epoch": 23.980676328502415, "grad_norm": 0.9203644394874573, "learning_rate": 0.001, "loss": 1.9511, "step": 277984 }, { "epoch": 23.985507246376812, "grad_norm": 2.4745121002197266, "learning_rate": 0.001, "loss": 1.9384, "step": 278040 }, { "epoch": 23.990338164251206, "grad_norm": 1.9933804273605347, "learning_rate": 0.001, "loss": 1.9472, "step": 278096 }, { "epoch": 23.995169082125603, "grad_norm": 1.2811702489852905, "learning_rate": 0.001, "loss": 1.9364, "step": 278152 }, { "epoch": 24.0, "grad_norm": 1.7240533828735352, "learning_rate": 0.001, "loss": 1.9419, "step": 278208 }, { "epoch": 24.004830917874397, "grad_norm": 0.7032594680786133, "learning_rate": 0.001, "loss": 1.9071, "step": 278264 }, { "epoch": 24.00966183574879, "grad_norm": 0.4524131715297699, "learning_rate": 0.001, "loss": 1.9034, "step": 278320 }, { "epoch": 24.014492753623188, "grad_norm": 0.6271324753761292, "learning_rate": 0.001, "loss": 1.8983, "step": 278376 }, { "epoch": 24.019323671497585, "grad_norm": 5.7993268966674805, "learning_rate": 0.001, "loss": 1.9055, "step": 278432 }, { "epoch": 24.02415458937198, "grad_norm": 0.6311468482017517, "learning_rate": 0.001, "loss": 1.9108, "step": 278488 }, { "epoch": 24.028985507246375, "grad_norm": 1.0532279014587402, "learning_rate": 0.001, "loss": 1.9029, "step": 278544 }, { "epoch": 24.033816425120772, "grad_norm": 0.5748487114906311, "learning_rate": 0.001, "loss": 1.9065, "step": 278600 }, { "epoch": 24.03864734299517, "grad_norm": 0.5513666272163391, "learning_rate": 0.001, "loss": 1.9181, "step": 278656 }, { "epoch": 24.043478260869566, "grad_norm": 0.4437922239303589, "learning_rate": 0.001, "loss": 1.917, "step": 278712 }, { "epoch": 24.04830917874396, "grad_norm": 0.6994848251342773, "learning_rate": 0.001, "loss": 1.9171, "step": 278768 }, { "epoch": 24.053140096618357, "grad_norm": 0.5937466025352478, "learning_rate": 0.001, "loss": 1.9203, "step": 278824 }, { "epoch": 24.057971014492754, "grad_norm": 2.7160017490386963, "learning_rate": 0.001, "loss": 1.9168, "step": 278880 }, { "epoch": 24.06280193236715, "grad_norm": 0.5684502124786377, "learning_rate": 0.001, "loss": 1.9128, "step": 278936 }, { "epoch": 24.067632850241544, "grad_norm": 0.9820815324783325, "learning_rate": 0.001, "loss": 1.9113, "step": 278992 }, { "epoch": 24.07246376811594, "grad_norm": 1.1331963539123535, "learning_rate": 0.001, "loss": 1.901, "step": 279048 }, { "epoch": 24.07729468599034, "grad_norm": 1.1206762790679932, "learning_rate": 0.001, "loss": 1.892, "step": 279104 }, { "epoch": 24.082125603864736, "grad_norm": 1.8506113290786743, "learning_rate": 0.001, "loss": 1.888, "step": 279160 }, { "epoch": 24.08695652173913, "grad_norm": 1.0762802362442017, "learning_rate": 0.001, "loss": 1.8909, "step": 279216 }, { "epoch": 24.091787439613526, "grad_norm": 2.2971434593200684, "learning_rate": 0.001, "loss": 1.8876, "step": 279272 }, { "epoch": 24.096618357487923, "grad_norm": 2.967075824737549, "learning_rate": 0.001, "loss": 1.9016, "step": 279328 }, { "epoch": 24.10144927536232, "grad_norm": 0.9901528358459473, "learning_rate": 0.001, "loss": 1.8964, "step": 279384 }, { "epoch": 24.106280193236714, "grad_norm": 1.8009085655212402, "learning_rate": 0.001, "loss": 1.9047, "step": 279440 }, { "epoch": 24.11111111111111, "grad_norm": 0.6187200546264648, "learning_rate": 0.001, "loss": 1.8976, "step": 279496 }, { "epoch": 24.115942028985508, "grad_norm": 0.43210309743881226, "learning_rate": 0.001, "loss": 1.906, "step": 279552 }, { "epoch": 24.120772946859905, "grad_norm": 0.5662825703620911, "learning_rate": 0.001, "loss": 1.9059, "step": 279608 }, { "epoch": 24.1256038647343, "grad_norm": 0.3383672535419464, "learning_rate": 0.001, "loss": 1.9169, "step": 279664 }, { "epoch": 24.130434782608695, "grad_norm": 0.38147589564323425, "learning_rate": 0.001, "loss": 1.9167, "step": 279720 }, { "epoch": 24.135265700483092, "grad_norm": 1.9904968738555908, "learning_rate": 0.001, "loss": 1.9143, "step": 279776 }, { "epoch": 24.14009661835749, "grad_norm": 0.4460856318473816, "learning_rate": 0.001, "loss": 1.9122, "step": 279832 }, { "epoch": 24.144927536231883, "grad_norm": 0.43119677901268005, "learning_rate": 0.001, "loss": 1.9156, "step": 279888 }, { "epoch": 24.14975845410628, "grad_norm": 1.5527560710906982, "learning_rate": 0.001, "loss": 1.9021, "step": 279944 }, { "epoch": 24.154589371980677, "grad_norm": 0.27000075578689575, "learning_rate": 0.001, "loss": 1.9093, "step": 280000 }, { "epoch": 24.159420289855074, "grad_norm": 3.288224697113037, "learning_rate": 0.001, "loss": 1.9068, "step": 280056 }, { "epoch": 24.164251207729468, "grad_norm": 0.6336233615875244, "learning_rate": 0.001, "loss": 1.9056, "step": 280112 }, { "epoch": 24.169082125603865, "grad_norm": 2.2870900630950928, "learning_rate": 0.001, "loss": 1.9096, "step": 280168 }, { "epoch": 24.17391304347826, "grad_norm": 0.7440868020057678, "learning_rate": 0.001, "loss": 1.9276, "step": 280224 }, { "epoch": 24.17874396135266, "grad_norm": 7.545788288116455, "learning_rate": 0.001, "loss": 1.9186, "step": 280280 }, { "epoch": 24.183574879227052, "grad_norm": 2.4397919178009033, "learning_rate": 0.001, "loss": 1.912, "step": 280336 }, { "epoch": 24.18840579710145, "grad_norm": 0.5629308223724365, "learning_rate": 0.001, "loss": 1.9282, "step": 280392 }, { "epoch": 24.193236714975846, "grad_norm": 2.577620506286621, "learning_rate": 0.001, "loss": 1.9249, "step": 280448 }, { "epoch": 24.19806763285024, "grad_norm": 1.189853310585022, "learning_rate": 0.001, "loss": 1.9211, "step": 280504 }, { "epoch": 24.202898550724637, "grad_norm": 0.8739112019538879, "learning_rate": 0.001, "loss": 1.9149, "step": 280560 }, { "epoch": 24.207729468599034, "grad_norm": 0.6673154830932617, "learning_rate": 0.001, "loss": 1.9188, "step": 280616 }, { "epoch": 24.21256038647343, "grad_norm": 0.6294225454330444, "learning_rate": 0.001, "loss": 1.9261, "step": 280672 }, { "epoch": 24.217391304347824, "grad_norm": 1.6908996105194092, "learning_rate": 0.001, "loss": 1.9277, "step": 280728 }, { "epoch": 24.22222222222222, "grad_norm": 1.5422178506851196, "learning_rate": 0.001, "loss": 1.9265, "step": 280784 }, { "epoch": 24.22705314009662, "grad_norm": 0.3957167863845825, "learning_rate": 0.001, "loss": 1.9215, "step": 280840 }, { "epoch": 24.231884057971016, "grad_norm": 1.4532999992370605, "learning_rate": 0.001, "loss": 1.923, "step": 280896 }, { "epoch": 24.23671497584541, "grad_norm": 3.4129905700683594, "learning_rate": 0.001, "loss": 1.9234, "step": 280952 }, { "epoch": 24.241545893719806, "grad_norm": 0.7897972464561462, "learning_rate": 0.001, "loss": 1.9227, "step": 281008 }, { "epoch": 24.246376811594203, "grad_norm": 0.6017054915428162, "learning_rate": 0.001, "loss": 1.9234, "step": 281064 }, { "epoch": 24.2512077294686, "grad_norm": 1.723306655883789, "learning_rate": 0.001, "loss": 1.9249, "step": 281120 }, { "epoch": 24.256038647342994, "grad_norm": 0.5447843670845032, "learning_rate": 0.001, "loss": 1.9186, "step": 281176 }, { "epoch": 24.26086956521739, "grad_norm": 0.3186738193035126, "learning_rate": 0.001, "loss": 1.9232, "step": 281232 }, { "epoch": 24.265700483091788, "grad_norm": 0.7094520926475525, "learning_rate": 0.001, "loss": 1.9104, "step": 281288 }, { "epoch": 24.270531400966185, "grad_norm": 0.804150402545929, "learning_rate": 0.001, "loss": 1.9097, "step": 281344 }, { "epoch": 24.27536231884058, "grad_norm": 1.580153226852417, "learning_rate": 0.001, "loss": 1.9146, "step": 281400 }, { "epoch": 24.280193236714975, "grad_norm": 1.3783105611801147, "learning_rate": 0.001, "loss": 1.9236, "step": 281456 }, { "epoch": 24.285024154589372, "grad_norm": 1.4467170238494873, "learning_rate": 0.001, "loss": 1.9224, "step": 281512 }, { "epoch": 24.28985507246377, "grad_norm": 1.048298954963684, "learning_rate": 0.001, "loss": 1.9332, "step": 281568 }, { "epoch": 24.294685990338163, "grad_norm": 1.2644479274749756, "learning_rate": 0.001, "loss": 1.9327, "step": 281624 }, { "epoch": 24.29951690821256, "grad_norm": 0.5416079163551331, "learning_rate": 0.001, "loss": 1.9278, "step": 281680 }, { "epoch": 24.304347826086957, "grad_norm": 1.05036461353302, "learning_rate": 0.001, "loss": 1.9206, "step": 281736 }, { "epoch": 24.309178743961354, "grad_norm": 0.428485631942749, "learning_rate": 0.001, "loss": 1.9107, "step": 281792 }, { "epoch": 24.314009661835748, "grad_norm": 0.8467603325843811, "learning_rate": 0.001, "loss": 1.9152, "step": 281848 }, { "epoch": 24.318840579710145, "grad_norm": 1.9940674304962158, "learning_rate": 0.001, "loss": 1.9133, "step": 281904 }, { "epoch": 24.32367149758454, "grad_norm": 0.5763525366783142, "learning_rate": 0.001, "loss": 1.9231, "step": 281960 }, { "epoch": 24.32850241545894, "grad_norm": 3.374732732772827, "learning_rate": 0.001, "loss": 1.9131, "step": 282016 }, { "epoch": 24.333333333333332, "grad_norm": 1.113442063331604, "learning_rate": 0.001, "loss": 1.9148, "step": 282072 }, { "epoch": 24.33816425120773, "grad_norm": 1.1274182796478271, "learning_rate": 0.001, "loss": 1.9087, "step": 282128 }, { "epoch": 24.342995169082126, "grad_norm": 0.6083889603614807, "learning_rate": 0.001, "loss": 1.9101, "step": 282184 }, { "epoch": 24.347826086956523, "grad_norm": 1.5237759351730347, "learning_rate": 0.001, "loss": 1.9221, "step": 282240 }, { "epoch": 24.352657004830917, "grad_norm": 0.906283974647522, "learning_rate": 0.001, "loss": 1.9193, "step": 282296 }, { "epoch": 24.357487922705314, "grad_norm": 1.0883668661117554, "learning_rate": 0.001, "loss": 1.9191, "step": 282352 }, { "epoch": 24.36231884057971, "grad_norm": 2.2442147731781006, "learning_rate": 0.001, "loss": 1.9095, "step": 282408 }, { "epoch": 24.367149758454108, "grad_norm": 2.4565682411193848, "learning_rate": 0.001, "loss": 1.9167, "step": 282464 }, { "epoch": 24.3719806763285, "grad_norm": 0.7146531343460083, "learning_rate": 0.001, "loss": 1.916, "step": 282520 }, { "epoch": 24.3768115942029, "grad_norm": 0.7050408720970154, "learning_rate": 0.001, "loss": 1.9241, "step": 282576 }, { "epoch": 24.381642512077295, "grad_norm": 4.046645164489746, "learning_rate": 0.001, "loss": 1.9323, "step": 282632 }, { "epoch": 24.386473429951693, "grad_norm": 2.13010573387146, "learning_rate": 0.001, "loss": 1.9221, "step": 282688 }, { "epoch": 24.391304347826086, "grad_norm": 0.566875696182251, "learning_rate": 0.001, "loss": 1.9288, "step": 282744 }, { "epoch": 24.396135265700483, "grad_norm": 2.4855856895446777, "learning_rate": 0.001, "loss": 1.9182, "step": 282800 }, { "epoch": 24.40096618357488, "grad_norm": 0.4139580726623535, "learning_rate": 0.001, "loss": 1.9184, "step": 282856 }, { "epoch": 24.405797101449274, "grad_norm": 1.4570844173431396, "learning_rate": 0.001, "loss": 1.9331, "step": 282912 }, { "epoch": 24.41062801932367, "grad_norm": 4.85396671295166, "learning_rate": 0.001, "loss": 1.9447, "step": 282968 }, { "epoch": 24.415458937198068, "grad_norm": 1.0623300075531006, "learning_rate": 0.001, "loss": 1.9351, "step": 283024 }, { "epoch": 24.420289855072465, "grad_norm": 3.9003710746765137, "learning_rate": 0.001, "loss": 1.9182, "step": 283080 }, { "epoch": 24.42512077294686, "grad_norm": 1.6495261192321777, "learning_rate": 0.001, "loss": 1.9318, "step": 283136 }, { "epoch": 24.429951690821255, "grad_norm": 0.9540917277336121, "learning_rate": 0.001, "loss": 1.9456, "step": 283192 }, { "epoch": 24.434782608695652, "grad_norm": 1.2446115016937256, "learning_rate": 0.001, "loss": 1.9324, "step": 283248 }, { "epoch": 24.43961352657005, "grad_norm": 0.6882824301719666, "learning_rate": 0.001, "loss": 1.9393, "step": 283304 }, { "epoch": 24.444444444444443, "grad_norm": 0.6733536720275879, "learning_rate": 0.001, "loss": 1.9462, "step": 283360 }, { "epoch": 24.44927536231884, "grad_norm": 1.2083462476730347, "learning_rate": 0.001, "loss": 1.9673, "step": 283416 }, { "epoch": 24.454106280193237, "grad_norm": 1.1044378280639648, "learning_rate": 0.001, "loss": 1.9504, "step": 283472 }, { "epoch": 24.458937198067634, "grad_norm": 2.113809108734131, "learning_rate": 0.001, "loss": 1.9504, "step": 283528 }, { "epoch": 24.463768115942027, "grad_norm": 0.47570937871932983, "learning_rate": 0.001, "loss": 1.9392, "step": 283584 }, { "epoch": 24.468599033816425, "grad_norm": 1.5000444650650024, "learning_rate": 0.001, "loss": 1.938, "step": 283640 }, { "epoch": 24.47342995169082, "grad_norm": 1.0407072305679321, "learning_rate": 0.001, "loss": 1.931, "step": 283696 }, { "epoch": 24.47826086956522, "grad_norm": 6.89382266998291, "learning_rate": 0.001, "loss": 1.9351, "step": 283752 }, { "epoch": 24.483091787439612, "grad_norm": 6.6253790855407715, "learning_rate": 0.001, "loss": 1.9307, "step": 283808 }, { "epoch": 24.48792270531401, "grad_norm": 2.150550365447998, "learning_rate": 0.001, "loss": 1.9321, "step": 283864 }, { "epoch": 24.492753623188406, "grad_norm": 1.5275256633758545, "learning_rate": 0.001, "loss": 1.9355, "step": 283920 }, { "epoch": 24.497584541062803, "grad_norm": 0.5016854405403137, "learning_rate": 0.001, "loss": 1.9506, "step": 283976 }, { "epoch": 24.502415458937197, "grad_norm": 0.7780137658119202, "learning_rate": 0.001, "loss": 1.9549, "step": 284032 }, { "epoch": 24.507246376811594, "grad_norm": 1.1209287643432617, "learning_rate": 0.001, "loss": 1.948, "step": 284088 }, { "epoch": 24.51207729468599, "grad_norm": 1.2572706937789917, "learning_rate": 0.001, "loss": 1.9412, "step": 284144 }, { "epoch": 24.516908212560388, "grad_norm": 2.046964406967163, "learning_rate": 0.001, "loss": 1.9412, "step": 284200 }, { "epoch": 24.52173913043478, "grad_norm": 1.0316088199615479, "learning_rate": 0.001, "loss": 1.9409, "step": 284256 }, { "epoch": 24.52657004830918, "grad_norm": 1.9627680778503418, "learning_rate": 0.001, "loss": 1.9466, "step": 284312 }, { "epoch": 24.531400966183575, "grad_norm": 3.5076000690460205, "learning_rate": 0.001, "loss": 1.9313, "step": 284368 }, { "epoch": 24.536231884057973, "grad_norm": 1.429106593132019, "learning_rate": 0.001, "loss": 1.9476, "step": 284424 }, { "epoch": 24.541062801932366, "grad_norm": 15.782718658447266, "learning_rate": 0.001, "loss": 1.9504, "step": 284480 }, { "epoch": 24.545893719806763, "grad_norm": 1.0051147937774658, "learning_rate": 0.001, "loss": 1.9585, "step": 284536 }, { "epoch": 24.55072463768116, "grad_norm": 1.2437424659729004, "learning_rate": 0.001, "loss": 1.9675, "step": 284592 }, { "epoch": 24.555555555555557, "grad_norm": 0.7899636626243591, "learning_rate": 0.001, "loss": 1.9781, "step": 284648 }, { "epoch": 24.56038647342995, "grad_norm": 1.4735686779022217, "learning_rate": 0.001, "loss": 1.9693, "step": 284704 }, { "epoch": 24.565217391304348, "grad_norm": 0.9253683090209961, "learning_rate": 0.001, "loss": 1.9705, "step": 284760 }, { "epoch": 24.570048309178745, "grad_norm": 1.17081618309021, "learning_rate": 0.001, "loss": 1.9686, "step": 284816 }, { "epoch": 24.57487922705314, "grad_norm": 0.8421620726585388, "learning_rate": 0.001, "loss": 1.9653, "step": 284872 }, { "epoch": 24.579710144927535, "grad_norm": 2.7979695796966553, "learning_rate": 0.001, "loss": 1.9644, "step": 284928 }, { "epoch": 24.584541062801932, "grad_norm": 0.5268198251724243, "learning_rate": 0.001, "loss": 1.9559, "step": 284984 }, { "epoch": 24.58937198067633, "grad_norm": 2.684612512588501, "learning_rate": 0.001, "loss": 1.9496, "step": 285040 }, { "epoch": 24.594202898550726, "grad_norm": 2.848207950592041, "learning_rate": 0.001, "loss": 1.9588, "step": 285096 }, { "epoch": 24.59903381642512, "grad_norm": 0.6829081773757935, "learning_rate": 0.001, "loss": 1.9425, "step": 285152 }, { "epoch": 24.603864734299517, "grad_norm": 0.4179551899433136, "learning_rate": 0.001, "loss": 1.9347, "step": 285208 }, { "epoch": 24.608695652173914, "grad_norm": 0.4337264597415924, "learning_rate": 0.001, "loss": 1.9362, "step": 285264 }, { "epoch": 24.613526570048307, "grad_norm": 0.8992215991020203, "learning_rate": 0.001, "loss": 1.9378, "step": 285320 }, { "epoch": 24.618357487922705, "grad_norm": 0.6492766737937927, "learning_rate": 0.001, "loss": 1.9284, "step": 285376 }, { "epoch": 24.6231884057971, "grad_norm": 1.1232877969741821, "learning_rate": 0.001, "loss": 1.9261, "step": 285432 }, { "epoch": 24.6280193236715, "grad_norm": 1.0397026538848877, "learning_rate": 0.001, "loss": 1.9288, "step": 285488 }, { "epoch": 24.632850241545892, "grad_norm": 0.74684077501297, "learning_rate": 0.001, "loss": 1.9283, "step": 285544 }, { "epoch": 24.63768115942029, "grad_norm": 0.8136125206947327, "learning_rate": 0.001, "loss": 1.9362, "step": 285600 }, { "epoch": 24.642512077294686, "grad_norm": 10.070075035095215, "learning_rate": 0.001, "loss": 1.9267, "step": 285656 }, { "epoch": 24.647342995169083, "grad_norm": 3.998183488845825, "learning_rate": 0.001, "loss": 1.9176, "step": 285712 }, { "epoch": 24.652173913043477, "grad_norm": 1.1154167652130127, "learning_rate": 0.001, "loss": 1.9308, "step": 285768 }, { "epoch": 24.657004830917874, "grad_norm": 3.9229440689086914, "learning_rate": 0.001, "loss": 1.9356, "step": 285824 }, { "epoch": 24.66183574879227, "grad_norm": 1.5403419733047485, "learning_rate": 0.001, "loss": 1.9296, "step": 285880 }, { "epoch": 24.666666666666668, "grad_norm": 0.8178623914718628, "learning_rate": 0.001, "loss": 1.9296, "step": 285936 }, { "epoch": 24.67149758454106, "grad_norm": 5.438370704650879, "learning_rate": 0.001, "loss": 1.9276, "step": 285992 }, { "epoch": 24.67632850241546, "grad_norm": 1.1394742727279663, "learning_rate": 0.001, "loss": 1.9357, "step": 286048 }, { "epoch": 24.681159420289855, "grad_norm": 1.0972161293029785, "learning_rate": 0.001, "loss": 1.9366, "step": 286104 }, { "epoch": 24.685990338164252, "grad_norm": 3.26716685295105, "learning_rate": 0.001, "loss": 1.9377, "step": 286160 }, { "epoch": 24.690821256038646, "grad_norm": 1.0800268650054932, "learning_rate": 0.001, "loss": 1.928, "step": 286216 }, { "epoch": 24.695652173913043, "grad_norm": 0.8661267757415771, "learning_rate": 0.001, "loss": 1.9326, "step": 286272 }, { "epoch": 24.70048309178744, "grad_norm": 0.5988258123397827, "learning_rate": 0.001, "loss": 1.9285, "step": 286328 }, { "epoch": 24.705314009661837, "grad_norm": 0.5859854221343994, "learning_rate": 0.001, "loss": 1.9302, "step": 286384 }, { "epoch": 24.71014492753623, "grad_norm": 1.8245928287506104, "learning_rate": 0.001, "loss": 1.9379, "step": 286440 }, { "epoch": 24.714975845410628, "grad_norm": 0.36968106031417847, "learning_rate": 0.001, "loss": 1.9435, "step": 286496 }, { "epoch": 24.719806763285025, "grad_norm": 0.6545636653900146, "learning_rate": 0.001, "loss": 1.9397, "step": 286552 }, { "epoch": 24.72463768115942, "grad_norm": 1.5007156133651733, "learning_rate": 0.001, "loss": 1.936, "step": 286608 }, { "epoch": 24.729468599033815, "grad_norm": 1.6536756753921509, "learning_rate": 0.001, "loss": 1.9341, "step": 286664 }, { "epoch": 24.734299516908212, "grad_norm": 0.7244811058044434, "learning_rate": 0.001, "loss": 1.9329, "step": 286720 }, { "epoch": 24.73913043478261, "grad_norm": 1.6437448263168335, "learning_rate": 0.001, "loss": 1.929, "step": 286776 }, { "epoch": 24.743961352657006, "grad_norm": 0.5581583380699158, "learning_rate": 0.001, "loss": 1.9384, "step": 286832 }, { "epoch": 24.7487922705314, "grad_norm": 0.5002501606941223, "learning_rate": 0.001, "loss": 1.9299, "step": 286888 }, { "epoch": 24.753623188405797, "grad_norm": 0.6342955231666565, "learning_rate": 0.001, "loss": 1.9325, "step": 286944 }, { "epoch": 24.758454106280194, "grad_norm": 0.7209481596946716, "learning_rate": 0.001, "loss": 1.9363, "step": 287000 }, { "epoch": 24.76328502415459, "grad_norm": 0.4335455000400543, "learning_rate": 0.001, "loss": 1.9333, "step": 287056 }, { "epoch": 24.768115942028984, "grad_norm": 1.286346435546875, "learning_rate": 0.001, "loss": 1.9411, "step": 287112 }, { "epoch": 24.77294685990338, "grad_norm": 3.266732692718506, "learning_rate": 0.001, "loss": 1.9312, "step": 287168 }, { "epoch": 24.77777777777778, "grad_norm": 0.4348663091659546, "learning_rate": 0.001, "loss": 1.9389, "step": 287224 }, { "epoch": 24.782608695652176, "grad_norm": 1.258651614189148, "learning_rate": 0.001, "loss": 1.9349, "step": 287280 }, { "epoch": 24.78743961352657, "grad_norm": 0.5606732368469238, "learning_rate": 0.001, "loss": 1.9324, "step": 287336 }, { "epoch": 24.792270531400966, "grad_norm": 0.7041652202606201, "learning_rate": 0.001, "loss": 1.9283, "step": 287392 }, { "epoch": 24.797101449275363, "grad_norm": 0.34374329447746277, "learning_rate": 0.001, "loss": 1.9427, "step": 287448 }, { "epoch": 24.80193236714976, "grad_norm": 0.5982248783111572, "learning_rate": 0.001, "loss": 1.938, "step": 287504 }, { "epoch": 24.806763285024154, "grad_norm": 1.4653364419937134, "learning_rate": 0.001, "loss": 1.936, "step": 287560 }, { "epoch": 24.81159420289855, "grad_norm": 1.584633231163025, "learning_rate": 0.001, "loss": 1.9471, "step": 287616 }, { "epoch": 24.816425120772948, "grad_norm": 1.4942760467529297, "learning_rate": 0.001, "loss": 1.9411, "step": 287672 }, { "epoch": 24.82125603864734, "grad_norm": 2.931358814239502, "learning_rate": 0.001, "loss": 1.9511, "step": 287728 }, { "epoch": 24.82608695652174, "grad_norm": 0.7946625351905823, "learning_rate": 0.001, "loss": 1.9392, "step": 287784 }, { "epoch": 24.830917874396135, "grad_norm": 0.9004411697387695, "learning_rate": 0.001, "loss": 1.9463, "step": 287840 }, { "epoch": 24.835748792270532, "grad_norm": 0.44295451045036316, "learning_rate": 0.001, "loss": 1.9381, "step": 287896 }, { "epoch": 24.840579710144926, "grad_norm": 0.8131876587867737, "learning_rate": 0.001, "loss": 1.936, "step": 287952 }, { "epoch": 24.845410628019323, "grad_norm": 0.8626725077629089, "learning_rate": 0.001, "loss": 1.9352, "step": 288008 }, { "epoch": 24.85024154589372, "grad_norm": 0.4049046039581299, "learning_rate": 0.001, "loss": 1.9373, "step": 288064 }, { "epoch": 24.855072463768117, "grad_norm": 0.6616743803024292, "learning_rate": 0.001, "loss": 1.9535, "step": 288120 }, { "epoch": 24.85990338164251, "grad_norm": 5.975124835968018, "learning_rate": 0.001, "loss": 1.9412, "step": 288176 }, { "epoch": 24.864734299516908, "grad_norm": 1.0922489166259766, "learning_rate": 0.001, "loss": 1.9281, "step": 288232 }, { "epoch": 24.869565217391305, "grad_norm": 1.129918098449707, "learning_rate": 0.001, "loss": 1.9297, "step": 288288 }, { "epoch": 24.8743961352657, "grad_norm": 0.7542330622673035, "learning_rate": 0.001, "loss": 1.9355, "step": 288344 }, { "epoch": 24.879227053140095, "grad_norm": 1.5211526155471802, "learning_rate": 0.001, "loss": 1.9324, "step": 288400 }, { "epoch": 24.884057971014492, "grad_norm": 1.161846399307251, "learning_rate": 0.001, "loss": 1.9313, "step": 288456 }, { "epoch": 24.88888888888889, "grad_norm": 0.9829748272895813, "learning_rate": 0.001, "loss": 1.9246, "step": 288512 }, { "epoch": 24.893719806763286, "grad_norm": 0.3879932761192322, "learning_rate": 0.001, "loss": 1.9312, "step": 288568 }, { "epoch": 24.89855072463768, "grad_norm": 2.8497977256774902, "learning_rate": 0.001, "loss": 1.9349, "step": 288624 }, { "epoch": 24.903381642512077, "grad_norm": 0.8585650324821472, "learning_rate": 0.001, "loss": 1.9372, "step": 288680 }, { "epoch": 24.908212560386474, "grad_norm": 0.4140551686286926, "learning_rate": 0.001, "loss": 1.9396, "step": 288736 }, { "epoch": 24.91304347826087, "grad_norm": 0.8719210028648376, "learning_rate": 0.001, "loss": 1.9389, "step": 288792 }, { "epoch": 24.917874396135264, "grad_norm": 1.5678306818008423, "learning_rate": 0.001, "loss": 1.9254, "step": 288848 }, { "epoch": 24.92270531400966, "grad_norm": 0.6155852675437927, "learning_rate": 0.001, "loss": 1.9267, "step": 288904 }, { "epoch": 24.92753623188406, "grad_norm": 1.3455864191055298, "learning_rate": 0.001, "loss": 1.9346, "step": 288960 }, { "epoch": 24.932367149758456, "grad_norm": 0.5619357824325562, "learning_rate": 0.001, "loss": 1.9422, "step": 289016 }, { "epoch": 24.93719806763285, "grad_norm": 1.0533734560012817, "learning_rate": 0.001, "loss": 1.9363, "step": 289072 }, { "epoch": 24.942028985507246, "grad_norm": 0.5143280029296875, "learning_rate": 0.001, "loss": 1.9376, "step": 289128 }, { "epoch": 24.946859903381643, "grad_norm": 3.120244026184082, "learning_rate": 0.001, "loss": 1.9342, "step": 289184 }, { "epoch": 24.95169082125604, "grad_norm": 0.6211585402488708, "learning_rate": 0.001, "loss": 1.937, "step": 289240 }, { "epoch": 24.956521739130434, "grad_norm": 0.7097315192222595, "learning_rate": 0.001, "loss": 1.934, "step": 289296 }, { "epoch": 24.96135265700483, "grad_norm": 0.3881317377090454, "learning_rate": 0.001, "loss": 1.9356, "step": 289352 }, { "epoch": 24.966183574879228, "grad_norm": 0.4759860634803772, "learning_rate": 0.001, "loss": 1.9244, "step": 289408 }, { "epoch": 24.971014492753625, "grad_norm": 0.5969966053962708, "learning_rate": 0.001, "loss": 1.919, "step": 289464 }, { "epoch": 24.97584541062802, "grad_norm": 1.1887179613113403, "learning_rate": 0.001, "loss": 1.9184, "step": 289520 }, { "epoch": 24.980676328502415, "grad_norm": 0.37710118293762207, "learning_rate": 0.001, "loss": 1.9266, "step": 289576 }, { "epoch": 24.985507246376812, "grad_norm": 0.3579617738723755, "learning_rate": 0.001, "loss": 1.9223, "step": 289632 }, { "epoch": 24.990338164251206, "grad_norm": 2.7368972301483154, "learning_rate": 0.001, "loss": 1.9329, "step": 289688 }, { "epoch": 24.995169082125603, "grad_norm": 0.5299686789512634, "learning_rate": 0.001, "loss": 1.9329, "step": 289744 }, { "epoch": 25.0, "grad_norm": 0.6297286152839661, "learning_rate": 0.001, "loss": 1.9429, "step": 289800 }, { "epoch": 25.004830917874397, "grad_norm": 0.4525673985481262, "learning_rate": 0.001, "loss": 1.8939, "step": 289856 }, { "epoch": 25.00966183574879, "grad_norm": 1.337429404258728, "learning_rate": 0.001, "loss": 1.8973, "step": 289912 }, { "epoch": 25.014492753623188, "grad_norm": 0.42868876457214355, "learning_rate": 0.001, "loss": 1.8883, "step": 289968 }, { "epoch": 25.019323671497585, "grad_norm": 0.3213861286640167, "learning_rate": 0.001, "loss": 1.8896, "step": 290024 }, { "epoch": 25.02415458937198, "grad_norm": 0.9243196845054626, "learning_rate": 0.001, "loss": 1.8925, "step": 290080 }, { "epoch": 25.028985507246375, "grad_norm": 0.7700756192207336, "learning_rate": 0.001, "loss": 1.894, "step": 290136 }, { "epoch": 25.033816425120772, "grad_norm": 0.37991103529930115, "learning_rate": 0.001, "loss": 1.8958, "step": 290192 }, { "epoch": 25.03864734299517, "grad_norm": 0.6052849888801575, "learning_rate": 0.001, "loss": 1.9125, "step": 290248 }, { "epoch": 25.043478260869566, "grad_norm": 0.5753774046897888, "learning_rate": 0.001, "loss": 1.9022, "step": 290304 }, { "epoch": 25.04830917874396, "grad_norm": 0.8124651312828064, "learning_rate": 0.001, "loss": 1.9048, "step": 290360 }, { "epoch": 25.053140096618357, "grad_norm": 0.7410440444946289, "learning_rate": 0.001, "loss": 1.8953, "step": 290416 }, { "epoch": 25.057971014492754, "grad_norm": 0.8376240134239197, "learning_rate": 0.001, "loss": 1.8952, "step": 290472 }, { "epoch": 25.06280193236715, "grad_norm": 0.5952937006950378, "learning_rate": 0.001, "loss": 1.9086, "step": 290528 }, { "epoch": 25.067632850241544, "grad_norm": 0.49595123529434204, "learning_rate": 0.001, "loss": 1.9099, "step": 290584 }, { "epoch": 25.07246376811594, "grad_norm": 0.9433848261833191, "learning_rate": 0.001, "loss": 1.8922, "step": 290640 }, { "epoch": 25.07729468599034, "grad_norm": 0.9255042672157288, "learning_rate": 0.001, "loss": 1.8915, "step": 290696 }, { "epoch": 25.082125603864736, "grad_norm": 0.42907917499542236, "learning_rate": 0.001, "loss": 1.8935, "step": 290752 }, { "epoch": 25.08695652173913, "grad_norm": 0.4167092740535736, "learning_rate": 0.001, "loss": 1.8914, "step": 290808 }, { "epoch": 25.091787439613526, "grad_norm": 0.45365434885025024, "learning_rate": 0.001, "loss": 1.8861, "step": 290864 }, { "epoch": 25.096618357487923, "grad_norm": 0.7556871175765991, "learning_rate": 0.001, "loss": 1.9013, "step": 290920 }, { "epoch": 25.10144927536232, "grad_norm": 0.319809228181839, "learning_rate": 0.001, "loss": 1.895, "step": 290976 }, { "epoch": 25.106280193236714, "grad_norm": 0.7540002465248108, "learning_rate": 0.001, "loss": 1.8959, "step": 291032 }, { "epoch": 25.11111111111111, "grad_norm": 0.5418335199356079, "learning_rate": 0.001, "loss": 1.8948, "step": 291088 }, { "epoch": 25.115942028985508, "grad_norm": 0.7253740429878235, "learning_rate": 0.001, "loss": 1.8994, "step": 291144 }, { "epoch": 25.120772946859905, "grad_norm": 0.6484837532043457, "learning_rate": 0.001, "loss": 1.9009, "step": 291200 }, { "epoch": 25.1256038647343, "grad_norm": 0.8925520777702332, "learning_rate": 0.001, "loss": 1.8945, "step": 291256 }, { "epoch": 25.130434782608695, "grad_norm": 0.9301453828811646, "learning_rate": 0.001, "loss": 1.8928, "step": 291312 }, { "epoch": 25.135265700483092, "grad_norm": 3.4035086631774902, "learning_rate": 0.001, "loss": 1.8902, "step": 291368 }, { "epoch": 25.14009661835749, "grad_norm": 0.4157479405403137, "learning_rate": 0.001, "loss": 1.8846, "step": 291424 }, { "epoch": 25.144927536231883, "grad_norm": 0.5445131063461304, "learning_rate": 0.001, "loss": 1.8868, "step": 291480 }, { "epoch": 25.14975845410628, "grad_norm": 0.27865248918533325, "learning_rate": 0.001, "loss": 1.9009, "step": 291536 }, { "epoch": 25.154589371980677, "grad_norm": 0.3499648869037628, "learning_rate": 0.001, "loss": 1.8931, "step": 291592 }, { "epoch": 25.159420289855074, "grad_norm": 0.4969694912433624, "learning_rate": 0.001, "loss": 1.9101, "step": 291648 }, { "epoch": 25.164251207729468, "grad_norm": 0.29891514778137207, "learning_rate": 0.001, "loss": 1.9218, "step": 291704 }, { "epoch": 25.169082125603865, "grad_norm": 8.23755168914795, "learning_rate": 0.001, "loss": 1.9258, "step": 291760 }, { "epoch": 25.17391304347826, "grad_norm": 2.853146553039551, "learning_rate": 0.001, "loss": 1.9217, "step": 291816 }, { "epoch": 25.17874396135266, "grad_norm": 1.3786201477050781, "learning_rate": 0.001, "loss": 1.9288, "step": 291872 }, { "epoch": 25.183574879227052, "grad_norm": 0.5281625390052795, "learning_rate": 0.001, "loss": 1.9213, "step": 291928 }, { "epoch": 25.18840579710145, "grad_norm": 1.1346683502197266, "learning_rate": 0.001, "loss": 1.9212, "step": 291984 }, { "epoch": 25.193236714975846, "grad_norm": 0.6556458473205566, "learning_rate": 0.001, "loss": 1.9116, "step": 292040 }, { "epoch": 25.19806763285024, "grad_norm": 0.6012808680534363, "learning_rate": 0.001, "loss": 1.9088, "step": 292096 }, { "epoch": 25.202898550724637, "grad_norm": 0.9514915347099304, "learning_rate": 0.001, "loss": 1.9028, "step": 292152 }, { "epoch": 25.207729468599034, "grad_norm": 0.9847424030303955, "learning_rate": 0.001, "loss": 1.903, "step": 292208 }, { "epoch": 25.21256038647343, "grad_norm": 0.44935745000839233, "learning_rate": 0.001, "loss": 1.9132, "step": 292264 }, { "epoch": 25.217391304347824, "grad_norm": 4.4888105392456055, "learning_rate": 0.001, "loss": 1.9125, "step": 292320 }, { "epoch": 25.22222222222222, "grad_norm": 0.7369659543037415, "learning_rate": 0.001, "loss": 1.9117, "step": 292376 }, { "epoch": 25.22705314009662, "grad_norm": 14.997528076171875, "learning_rate": 0.001, "loss": 1.9065, "step": 292432 }, { "epoch": 25.231884057971016, "grad_norm": 0.8875183463096619, "learning_rate": 0.001, "loss": 1.905, "step": 292488 }, { "epoch": 25.23671497584541, "grad_norm": 7.130239009857178, "learning_rate": 0.001, "loss": 1.8985, "step": 292544 }, { "epoch": 25.241545893719806, "grad_norm": 0.9772172570228577, "learning_rate": 0.001, "loss": 1.8985, "step": 292600 }, { "epoch": 25.246376811594203, "grad_norm": 2.7604634761810303, "learning_rate": 0.001, "loss": 1.9017, "step": 292656 }, { "epoch": 25.2512077294686, "grad_norm": 1.0480865240097046, "learning_rate": 0.001, "loss": 1.9028, "step": 292712 }, { "epoch": 25.256038647342994, "grad_norm": 0.4690069556236267, "learning_rate": 0.001, "loss": 1.9113, "step": 292768 }, { "epoch": 25.26086956521739, "grad_norm": 1.7872720956802368, "learning_rate": 0.001, "loss": 1.9223, "step": 292824 }, { "epoch": 25.265700483091788, "grad_norm": 1.577032446861267, "learning_rate": 0.001, "loss": 1.9093, "step": 292880 }, { "epoch": 25.270531400966185, "grad_norm": 0.27479788661003113, "learning_rate": 0.001, "loss": 1.9037, "step": 292936 }, { "epoch": 25.27536231884058, "grad_norm": 1.9263094663619995, "learning_rate": 0.001, "loss": 1.9083, "step": 292992 }, { "epoch": 25.280193236714975, "grad_norm": 0.5642146468162537, "learning_rate": 0.001, "loss": 1.9015, "step": 293048 }, { "epoch": 25.285024154589372, "grad_norm": 2.787116289138794, "learning_rate": 0.001, "loss": 1.9051, "step": 293104 }, { "epoch": 25.28985507246377, "grad_norm": 3.6648001670837402, "learning_rate": 0.001, "loss": 1.9044, "step": 293160 }, { "epoch": 25.294685990338163, "grad_norm": 1.8598066568374634, "learning_rate": 0.001, "loss": 1.9179, "step": 293216 }, { "epoch": 25.29951690821256, "grad_norm": 0.5635346174240112, "learning_rate": 0.001, "loss": 1.925, "step": 293272 }, { "epoch": 25.304347826086957, "grad_norm": 0.408155620098114, "learning_rate": 0.001, "loss": 1.895, "step": 293328 }, { "epoch": 25.309178743961354, "grad_norm": 2.83010196685791, "learning_rate": 0.001, "loss": 1.8942, "step": 293384 }, { "epoch": 25.314009661835748, "grad_norm": 0.6202123165130615, "learning_rate": 0.001, "loss": 1.8963, "step": 293440 }, { "epoch": 25.318840579710145, "grad_norm": 0.7521714568138123, "learning_rate": 0.001, "loss": 1.9047, "step": 293496 }, { "epoch": 25.32367149758454, "grad_norm": 1.0924556255340576, "learning_rate": 0.001, "loss": 1.8953, "step": 293552 }, { "epoch": 25.32850241545894, "grad_norm": 0.4207269549369812, "learning_rate": 0.001, "loss": 1.8974, "step": 293608 }, { "epoch": 25.333333333333332, "grad_norm": 1.9933527708053589, "learning_rate": 0.001, "loss": 1.8875, "step": 293664 }, { "epoch": 25.33816425120773, "grad_norm": 1.5796427726745605, "learning_rate": 0.001, "loss": 1.8965, "step": 293720 }, { "epoch": 25.342995169082126, "grad_norm": 3.365229368209839, "learning_rate": 0.001, "loss": 1.9063, "step": 293776 }, { "epoch": 25.347826086956523, "grad_norm": 0.43115368485450745, "learning_rate": 0.001, "loss": 1.9114, "step": 293832 }, { "epoch": 25.352657004830917, "grad_norm": 4.12042236328125, "learning_rate": 0.001, "loss": 1.8997, "step": 293888 }, { "epoch": 25.357487922705314, "grad_norm": 1.3762151002883911, "learning_rate": 0.001, "loss": 1.9046, "step": 293944 }, { "epoch": 25.36231884057971, "grad_norm": 0.4094729721546173, "learning_rate": 0.001, "loss": 1.9083, "step": 294000 }, { "epoch": 25.367149758454108, "grad_norm": 0.7817860245704651, "learning_rate": 0.001, "loss": 1.9004, "step": 294056 }, { "epoch": 25.3719806763285, "grad_norm": 0.6060207486152649, "learning_rate": 0.001, "loss": 1.904, "step": 294112 }, { "epoch": 25.3768115942029, "grad_norm": 1.3999605178833008, "learning_rate": 0.001, "loss": 1.913, "step": 294168 }, { "epoch": 25.381642512077295, "grad_norm": 1.6263113021850586, "learning_rate": 0.001, "loss": 1.9202, "step": 294224 }, { "epoch": 25.386473429951693, "grad_norm": 2.385714292526245, "learning_rate": 0.001, "loss": 1.9287, "step": 294280 }, { "epoch": 25.391304347826086, "grad_norm": 1.8584405183792114, "learning_rate": 0.001, "loss": 1.9301, "step": 294336 }, { "epoch": 25.396135265700483, "grad_norm": 0.6920762658119202, "learning_rate": 0.001, "loss": 1.9165, "step": 294392 }, { "epoch": 25.40096618357488, "grad_norm": 0.5580245852470398, "learning_rate": 0.001, "loss": 1.9099, "step": 294448 }, { "epoch": 25.405797101449274, "grad_norm": 0.687025249004364, "learning_rate": 0.001, "loss": 1.9164, "step": 294504 }, { "epoch": 25.41062801932367, "grad_norm": 1.1209429502487183, "learning_rate": 0.001, "loss": 1.9314, "step": 294560 }, { "epoch": 25.415458937198068, "grad_norm": 0.8178371787071228, "learning_rate": 0.001, "loss": 1.9285, "step": 294616 }, { "epoch": 25.420289855072465, "grad_norm": 0.632466733455658, "learning_rate": 0.001, "loss": 1.9308, "step": 294672 }, { "epoch": 25.42512077294686, "grad_norm": 0.37882667779922485, "learning_rate": 0.001, "loss": 1.9216, "step": 294728 }, { "epoch": 25.429951690821255, "grad_norm": 0.672427773475647, "learning_rate": 0.001, "loss": 1.9182, "step": 294784 }, { "epoch": 25.434782608695652, "grad_norm": 1.0975173711776733, "learning_rate": 0.001, "loss": 1.9061, "step": 294840 }, { "epoch": 25.43961352657005, "grad_norm": 0.32201433181762695, "learning_rate": 0.001, "loss": 1.9118, "step": 294896 }, { "epoch": 25.444444444444443, "grad_norm": 0.39145660400390625, "learning_rate": 0.001, "loss": 1.9092, "step": 294952 }, { "epoch": 25.44927536231884, "grad_norm": 1.644083023071289, "learning_rate": 0.001, "loss": 1.9003, "step": 295008 }, { "epoch": 25.454106280193237, "grad_norm": 1.29334557056427, "learning_rate": 0.001, "loss": 1.9071, "step": 295064 }, { "epoch": 25.458937198067634, "grad_norm": 0.38255631923675537, "learning_rate": 0.001, "loss": 1.9079, "step": 295120 }, { "epoch": 25.463768115942027, "grad_norm": 0.27681881189346313, "learning_rate": 0.001, "loss": 1.9023, "step": 295176 }, { "epoch": 25.468599033816425, "grad_norm": 0.4592108726501465, "learning_rate": 0.001, "loss": 1.9097, "step": 295232 }, { "epoch": 25.47342995169082, "grad_norm": 3.836824655532837, "learning_rate": 0.001, "loss": 1.902, "step": 295288 }, { "epoch": 25.47826086956522, "grad_norm": 0.4101463258266449, "learning_rate": 0.001, "loss": 1.9092, "step": 295344 }, { "epoch": 25.483091787439612, "grad_norm": 0.47329702973365784, "learning_rate": 0.001, "loss": 1.9131, "step": 295400 }, { "epoch": 25.48792270531401, "grad_norm": 0.8570764660835266, "learning_rate": 0.001, "loss": 1.9215, "step": 295456 }, { "epoch": 25.492753623188406, "grad_norm": 4.283381462097168, "learning_rate": 0.001, "loss": 1.9173, "step": 295512 }, { "epoch": 25.497584541062803, "grad_norm": 1.0258461236953735, "learning_rate": 0.001, "loss": 1.9145, "step": 295568 }, { "epoch": 25.502415458937197, "grad_norm": 0.6075412631034851, "learning_rate": 0.001, "loss": 1.9045, "step": 295624 }, { "epoch": 25.507246376811594, "grad_norm": 0.3578660190105438, "learning_rate": 0.001, "loss": 1.9027, "step": 295680 }, { "epoch": 25.51207729468599, "grad_norm": 1.0285652875900269, "learning_rate": 0.001, "loss": 1.9069, "step": 295736 }, { "epoch": 25.516908212560388, "grad_norm": 0.3425889313220978, "learning_rate": 0.001, "loss": 1.9039, "step": 295792 }, { "epoch": 25.52173913043478, "grad_norm": 0.7712044715881348, "learning_rate": 0.001, "loss": 1.9079, "step": 295848 }, { "epoch": 25.52657004830918, "grad_norm": 1.4006562232971191, "learning_rate": 0.001, "loss": 1.9029, "step": 295904 }, { "epoch": 25.531400966183575, "grad_norm": 1.7159932851791382, "learning_rate": 0.001, "loss": 1.9061, "step": 295960 }, { "epoch": 25.536231884057973, "grad_norm": 0.2915489673614502, "learning_rate": 0.001, "loss": 1.9097, "step": 296016 }, { "epoch": 25.541062801932366, "grad_norm": 0.7826337218284607, "learning_rate": 0.001, "loss": 1.9149, "step": 296072 }, { "epoch": 25.545893719806763, "grad_norm": 1.243208408355713, "learning_rate": 0.001, "loss": 1.9084, "step": 296128 }, { "epoch": 25.55072463768116, "grad_norm": 0.7755004167556763, "learning_rate": 0.001, "loss": 1.9004, "step": 296184 }, { "epoch": 25.555555555555557, "grad_norm": 3.1196846961975098, "learning_rate": 0.001, "loss": 1.9033, "step": 296240 }, { "epoch": 25.56038647342995, "grad_norm": 0.38815218210220337, "learning_rate": 0.001, "loss": 1.9113, "step": 296296 }, { "epoch": 25.565217391304348, "grad_norm": 0.6186689734458923, "learning_rate": 0.001, "loss": 1.9013, "step": 296352 }, { "epoch": 25.570048309178745, "grad_norm": 0.7853377461433411, "learning_rate": 0.001, "loss": 1.9104, "step": 296408 }, { "epoch": 25.57487922705314, "grad_norm": 2.117340087890625, "learning_rate": 0.001, "loss": 1.9078, "step": 296464 }, { "epoch": 25.579710144927535, "grad_norm": 0.3405117392539978, "learning_rate": 0.001, "loss": 1.906, "step": 296520 }, { "epoch": 25.584541062801932, "grad_norm": 1.4066188335418701, "learning_rate": 0.001, "loss": 1.9063, "step": 296576 }, { "epoch": 25.58937198067633, "grad_norm": 2.6741034984588623, "learning_rate": 0.001, "loss": 1.9059, "step": 296632 }, { "epoch": 25.594202898550726, "grad_norm": 0.9391655921936035, "learning_rate": 0.001, "loss": 1.9018, "step": 296688 }, { "epoch": 25.59903381642512, "grad_norm": 0.38697245717048645, "learning_rate": 0.001, "loss": 1.9045, "step": 296744 }, { "epoch": 25.603864734299517, "grad_norm": 1.3267372846603394, "learning_rate": 0.001, "loss": 1.9107, "step": 296800 }, { "epoch": 25.608695652173914, "grad_norm": 0.345544695854187, "learning_rate": 0.001, "loss": 1.9083, "step": 296856 }, { "epoch": 25.613526570048307, "grad_norm": 0.32526302337646484, "learning_rate": 0.001, "loss": 1.9014, "step": 296912 }, { "epoch": 25.618357487922705, "grad_norm": 0.3191330134868622, "learning_rate": 0.001, "loss": 1.9017, "step": 296968 }, { "epoch": 25.6231884057971, "grad_norm": 0.5620341897010803, "learning_rate": 0.001, "loss": 1.9068, "step": 297024 }, { "epoch": 25.6280193236715, "grad_norm": 0.31250351667404175, "learning_rate": 0.001, "loss": 1.9124, "step": 297080 }, { "epoch": 25.632850241545892, "grad_norm": 0.7494297027587891, "learning_rate": 0.001, "loss": 1.9064, "step": 297136 }, { "epoch": 25.63768115942029, "grad_norm": 0.6574238538742065, "learning_rate": 0.001, "loss": 1.908, "step": 297192 }, { "epoch": 25.642512077294686, "grad_norm": 2.87058687210083, "learning_rate": 0.001, "loss": 1.9047, "step": 297248 }, { "epoch": 25.647342995169083, "grad_norm": 0.4257795512676239, "learning_rate": 0.001, "loss": 1.9079, "step": 297304 }, { "epoch": 25.652173913043477, "grad_norm": 0.6079059839248657, "learning_rate": 0.001, "loss": 1.9051, "step": 297360 }, { "epoch": 25.657004830917874, "grad_norm": 0.8318992257118225, "learning_rate": 0.001, "loss": 1.8974, "step": 297416 }, { "epoch": 25.66183574879227, "grad_norm": 1.1041759252548218, "learning_rate": 0.001, "loss": 1.8898, "step": 297472 }, { "epoch": 25.666666666666668, "grad_norm": 0.3551042675971985, "learning_rate": 0.001, "loss": 1.8893, "step": 297528 }, { "epoch": 25.67149758454106, "grad_norm": 0.3162704408168793, "learning_rate": 0.001, "loss": 1.9046, "step": 297584 }, { "epoch": 25.67632850241546, "grad_norm": 0.29571276903152466, "learning_rate": 0.001, "loss": 1.912, "step": 297640 }, { "epoch": 25.681159420289855, "grad_norm": 0.36899498105049133, "learning_rate": 0.001, "loss": 1.8996, "step": 297696 }, { "epoch": 25.685990338164252, "grad_norm": 0.6283080577850342, "learning_rate": 0.001, "loss": 1.8975, "step": 297752 }, { "epoch": 25.690821256038646, "grad_norm": 1.0238908529281616, "learning_rate": 0.001, "loss": 1.8969, "step": 297808 }, { "epoch": 25.695652173913043, "grad_norm": 0.41631507873535156, "learning_rate": 0.001, "loss": 1.9119, "step": 297864 }, { "epoch": 25.70048309178744, "grad_norm": 13.57119083404541, "learning_rate": 0.001, "loss": 1.9105, "step": 297920 }, { "epoch": 25.705314009661837, "grad_norm": 1.7018839120864868, "learning_rate": 0.001, "loss": 1.8944, "step": 297976 }, { "epoch": 25.71014492753623, "grad_norm": 1.874880313873291, "learning_rate": 0.001, "loss": 1.9113, "step": 298032 }, { "epoch": 25.714975845410628, "grad_norm": 0.8662129640579224, "learning_rate": 0.001, "loss": 1.8973, "step": 298088 }, { "epoch": 25.719806763285025, "grad_norm": 0.40759047865867615, "learning_rate": 0.001, "loss": 1.9034, "step": 298144 }, { "epoch": 25.72463768115942, "grad_norm": 0.49374592304229736, "learning_rate": 0.001, "loss": 1.9078, "step": 298200 }, { "epoch": 25.729468599033815, "grad_norm": 0.6400483846664429, "learning_rate": 0.001, "loss": 1.9147, "step": 298256 }, { "epoch": 25.734299516908212, "grad_norm": 0.3507595658302307, "learning_rate": 0.001, "loss": 1.9139, "step": 298312 }, { "epoch": 25.73913043478261, "grad_norm": 0.5335170030593872, "learning_rate": 0.001, "loss": 1.9047, "step": 298368 }, { "epoch": 25.743961352657006, "grad_norm": 0.3188813626766205, "learning_rate": 0.001, "loss": 1.8995, "step": 298424 }, { "epoch": 25.7487922705314, "grad_norm": 0.2959311306476593, "learning_rate": 0.001, "loss": 1.9036, "step": 298480 }, { "epoch": 25.753623188405797, "grad_norm": 0.4437108337879181, "learning_rate": 0.001, "loss": 1.8933, "step": 298536 }, { "epoch": 25.758454106280194, "grad_norm": 0.3364206552505493, "learning_rate": 0.001, "loss": 1.8986, "step": 298592 }, { "epoch": 25.76328502415459, "grad_norm": 0.6662651300430298, "learning_rate": 0.001, "loss": 1.9046, "step": 298648 }, { "epoch": 25.768115942028984, "grad_norm": 3.5131893157958984, "learning_rate": 0.001, "loss": 1.9039, "step": 298704 }, { "epoch": 25.77294685990338, "grad_norm": 0.42375093698501587, "learning_rate": 0.001, "loss": 1.8917, "step": 298760 }, { "epoch": 25.77777777777778, "grad_norm": 0.5596851110458374, "learning_rate": 0.001, "loss": 1.8976, "step": 298816 }, { "epoch": 25.782608695652176, "grad_norm": 1.331814169883728, "learning_rate": 0.001, "loss": 1.9112, "step": 298872 }, { "epoch": 25.78743961352657, "grad_norm": 0.44547462463378906, "learning_rate": 0.001, "loss": 1.905, "step": 298928 }, { "epoch": 25.792270531400966, "grad_norm": 5.6350507736206055, "learning_rate": 0.001, "loss": 1.913, "step": 298984 }, { "epoch": 25.797101449275363, "grad_norm": 0.7228078842163086, "learning_rate": 0.001, "loss": 1.9269, "step": 299040 }, { "epoch": 25.80193236714976, "grad_norm": 1.2135993242263794, "learning_rate": 0.001, "loss": 1.9211, "step": 299096 }, { "epoch": 25.806763285024154, "grad_norm": 1.1408370733261108, "learning_rate": 0.001, "loss": 1.9235, "step": 299152 }, { "epoch": 25.81159420289855, "grad_norm": 0.7281011939048767, "learning_rate": 0.001, "loss": 1.914, "step": 299208 }, { "epoch": 25.816425120772948, "grad_norm": 0.4898208677768707, "learning_rate": 0.001, "loss": 1.8962, "step": 299264 }, { "epoch": 25.82125603864734, "grad_norm": 1.246077537536621, "learning_rate": 0.001, "loss": 1.9248, "step": 299320 }, { "epoch": 25.82608695652174, "grad_norm": 2.373201847076416, "learning_rate": 0.001, "loss": 1.9255, "step": 299376 }, { "epoch": 25.830917874396135, "grad_norm": 1.2950632572174072, "learning_rate": 0.001, "loss": 1.9141, "step": 299432 }, { "epoch": 25.835748792270532, "grad_norm": 0.3583345413208008, "learning_rate": 0.001, "loss": 1.9208, "step": 299488 }, { "epoch": 25.840579710144926, "grad_norm": 0.7596715688705444, "learning_rate": 0.001, "loss": 1.9065, "step": 299544 }, { "epoch": 25.845410628019323, "grad_norm": 0.3765631914138794, "learning_rate": 0.001, "loss": 1.9013, "step": 299600 }, { "epoch": 25.85024154589372, "grad_norm": 0.3842770755290985, "learning_rate": 0.001, "loss": 1.914, "step": 299656 }, { "epoch": 25.855072463768117, "grad_norm": 1.05859375, "learning_rate": 0.001, "loss": 1.9111, "step": 299712 }, { "epoch": 25.85990338164251, "grad_norm": 0.9447957873344421, "learning_rate": 0.001, "loss": 1.9161, "step": 299768 }, { "epoch": 25.864734299516908, "grad_norm": 0.43140825629234314, "learning_rate": 0.001, "loss": 1.914, "step": 299824 }, { "epoch": 25.869565217391305, "grad_norm": 1.1701570749282837, "learning_rate": 0.001, "loss": 1.918, "step": 299880 }, { "epoch": 25.8743961352657, "grad_norm": 1.0709367990493774, "learning_rate": 0.001, "loss": 1.9142, "step": 299936 }, { "epoch": 25.879227053140095, "grad_norm": 0.38440442085266113, "learning_rate": 0.001, "loss": 1.9301, "step": 299992 }, { "epoch": 25.884057971014492, "grad_norm": 2.866610288619995, "learning_rate": 0.001, "loss": 1.9214, "step": 300048 }, { "epoch": 25.88888888888889, "grad_norm": 3.443896532058716, "learning_rate": 0.001, "loss": 1.9175, "step": 300104 }, { "epoch": 25.893719806763286, "grad_norm": 0.5511997938156128, "learning_rate": 0.001, "loss": 1.9204, "step": 300160 }, { "epoch": 25.89855072463768, "grad_norm": 0.29944583773612976, "learning_rate": 0.001, "loss": 1.9189, "step": 300216 }, { "epoch": 25.903381642512077, "grad_norm": 1.086601972579956, "learning_rate": 0.001, "loss": 1.9112, "step": 300272 }, { "epoch": 25.908212560386474, "grad_norm": 0.8408346176147461, "learning_rate": 0.001, "loss": 1.9176, "step": 300328 }, { "epoch": 25.91304347826087, "grad_norm": 0.36783310770988464, "learning_rate": 0.001, "loss": 1.9205, "step": 300384 }, { "epoch": 25.917874396135264, "grad_norm": 0.6311558485031128, "learning_rate": 0.001, "loss": 1.9139, "step": 300440 }, { "epoch": 25.92270531400966, "grad_norm": 0.47691747546195984, "learning_rate": 0.001, "loss": 1.9067, "step": 300496 }, { "epoch": 25.92753623188406, "grad_norm": 0.6378122568130493, "learning_rate": 0.001, "loss": 1.9002, "step": 300552 }, { "epoch": 25.932367149758456, "grad_norm": 0.8052208423614502, "learning_rate": 0.001, "loss": 1.9079, "step": 300608 }, { "epoch": 25.93719806763285, "grad_norm": 0.8299881815910339, "learning_rate": 0.001, "loss": 1.9101, "step": 300664 }, { "epoch": 25.942028985507246, "grad_norm": 0.3130775988101959, "learning_rate": 0.001, "loss": 1.9023, "step": 300720 }, { "epoch": 25.946859903381643, "grad_norm": 0.5967893004417419, "learning_rate": 0.001, "loss": 1.9047, "step": 300776 }, { "epoch": 25.95169082125604, "grad_norm": 0.5926712155342102, "learning_rate": 0.001, "loss": 1.894, "step": 300832 }, { "epoch": 25.956521739130434, "grad_norm": 0.5201270580291748, "learning_rate": 0.001, "loss": 1.9009, "step": 300888 }, { "epoch": 25.96135265700483, "grad_norm": 1.2219924926757812, "learning_rate": 0.001, "loss": 1.9183, "step": 300944 }, { "epoch": 25.966183574879228, "grad_norm": 1.3014028072357178, "learning_rate": 0.001, "loss": 1.9227, "step": 301000 }, { "epoch": 25.971014492753625, "grad_norm": 0.6416466236114502, "learning_rate": 0.001, "loss": 1.9303, "step": 301056 }, { "epoch": 25.97584541062802, "grad_norm": 0.7122479677200317, "learning_rate": 0.001, "loss": 1.9299, "step": 301112 }, { "epoch": 25.980676328502415, "grad_norm": 1.0494407415390015, "learning_rate": 0.001, "loss": 1.922, "step": 301168 }, { "epoch": 25.985507246376812, "grad_norm": 0.9618339538574219, "learning_rate": 0.001, "loss": 1.9197, "step": 301224 }, { "epoch": 25.990338164251206, "grad_norm": 0.6108476519584656, "learning_rate": 0.001, "loss": 1.9209, "step": 301280 }, { "epoch": 25.995169082125603, "grad_norm": 1.281829833984375, "learning_rate": 0.001, "loss": 1.9196, "step": 301336 }, { "epoch": 26.0, "grad_norm": 1.3143168687820435, "learning_rate": 0.001, "loss": 1.9205, "step": 301392 }, { "epoch": 26.004830917874397, "grad_norm": 1.1090672016143799, "learning_rate": 0.001, "loss": 1.8746, "step": 301448 }, { "epoch": 26.00966183574879, "grad_norm": 0.5035786032676697, "learning_rate": 0.001, "loss": 1.878, "step": 301504 }, { "epoch": 26.014492753623188, "grad_norm": 1.1404213905334473, "learning_rate": 0.001, "loss": 1.8671, "step": 301560 }, { "epoch": 26.019323671497585, "grad_norm": 0.5109812617301941, "learning_rate": 0.001, "loss": 1.8704, "step": 301616 }, { "epoch": 26.02415458937198, "grad_norm": 0.5612033009529114, "learning_rate": 0.001, "loss": 1.865, "step": 301672 }, { "epoch": 26.028985507246375, "grad_norm": 0.5191264748573303, "learning_rate": 0.001, "loss": 1.8738, "step": 301728 }, { "epoch": 26.033816425120772, "grad_norm": 0.42789584398269653, "learning_rate": 0.001, "loss": 1.871, "step": 301784 }, { "epoch": 26.03864734299517, "grad_norm": 0.3447798788547516, "learning_rate": 0.001, "loss": 1.8651, "step": 301840 }, { "epoch": 26.043478260869566, "grad_norm": 0.49298641085624695, "learning_rate": 0.001, "loss": 1.8816, "step": 301896 }, { "epoch": 26.04830917874396, "grad_norm": 0.4427963197231293, "learning_rate": 0.001, "loss": 1.8724, "step": 301952 }, { "epoch": 26.053140096618357, "grad_norm": 0.5729649662971497, "learning_rate": 0.001, "loss": 1.8702, "step": 302008 }, { "epoch": 26.057971014492754, "grad_norm": 1.5502758026123047, "learning_rate": 0.001, "loss": 1.8744, "step": 302064 }, { "epoch": 26.06280193236715, "grad_norm": 0.9390813112258911, "learning_rate": 0.001, "loss": 1.8776, "step": 302120 }, { "epoch": 26.067632850241544, "grad_norm": 1.3725903034210205, "learning_rate": 0.001, "loss": 1.8813, "step": 302176 }, { "epoch": 26.07246376811594, "grad_norm": 0.46793410181999207, "learning_rate": 0.001, "loss": 1.8942, "step": 302232 }, { "epoch": 26.07729468599034, "grad_norm": 1.2029708623886108, "learning_rate": 0.001, "loss": 1.8851, "step": 302288 }, { "epoch": 26.082125603864736, "grad_norm": 0.49923932552337646, "learning_rate": 0.001, "loss": 1.8989, "step": 302344 }, { "epoch": 26.08695652173913, "grad_norm": 0.6857315897941589, "learning_rate": 0.001, "loss": 1.8894, "step": 302400 }, { "epoch": 26.091787439613526, "grad_norm": 2.213942289352417, "learning_rate": 0.001, "loss": 1.8791, "step": 302456 }, { "epoch": 26.096618357487923, "grad_norm": 0.4258173704147339, "learning_rate": 0.001, "loss": 1.8871, "step": 302512 }, { "epoch": 26.10144927536232, "grad_norm": 1.16960871219635, "learning_rate": 0.001, "loss": 1.8899, "step": 302568 }, { "epoch": 26.106280193236714, "grad_norm": 0.9616075754165649, "learning_rate": 0.001, "loss": 1.8783, "step": 302624 }, { "epoch": 26.11111111111111, "grad_norm": 0.7716442942619324, "learning_rate": 0.001, "loss": 1.8726, "step": 302680 }, { "epoch": 26.115942028985508, "grad_norm": 5.355496406555176, "learning_rate": 0.001, "loss": 1.8799, "step": 302736 }, { "epoch": 26.120772946859905, "grad_norm": 1.204249382019043, "learning_rate": 0.001, "loss": 1.8727, "step": 302792 }, { "epoch": 26.1256038647343, "grad_norm": 1.827998399734497, "learning_rate": 0.001, "loss": 1.8788, "step": 302848 }, { "epoch": 26.130434782608695, "grad_norm": 2.4009156227111816, "learning_rate": 0.001, "loss": 1.87, "step": 302904 }, { "epoch": 26.135265700483092, "grad_norm": 0.2766123116016388, "learning_rate": 0.001, "loss": 1.8851, "step": 302960 }, { "epoch": 26.14009661835749, "grad_norm": 0.9976978898048401, "learning_rate": 0.001, "loss": 1.8847, "step": 303016 }, { "epoch": 26.144927536231883, "grad_norm": 0.30852484703063965, "learning_rate": 0.001, "loss": 1.8775, "step": 303072 }, { "epoch": 26.14975845410628, "grad_norm": 2.1905627250671387, "learning_rate": 0.001, "loss": 1.8689, "step": 303128 }, { "epoch": 26.154589371980677, "grad_norm": 0.8048823475837708, "learning_rate": 0.001, "loss": 1.869, "step": 303184 }, { "epoch": 26.159420289855074, "grad_norm": 0.613875150680542, "learning_rate": 0.001, "loss": 1.876, "step": 303240 }, { "epoch": 26.164251207729468, "grad_norm": 0.9056879878044128, "learning_rate": 0.001, "loss": 1.8837, "step": 303296 }, { "epoch": 26.169082125603865, "grad_norm": 1.5494990348815918, "learning_rate": 0.001, "loss": 1.8723, "step": 303352 }, { "epoch": 26.17391304347826, "grad_norm": 4.792817115783691, "learning_rate": 0.001, "loss": 1.8841, "step": 303408 }, { "epoch": 26.17874396135266, "grad_norm": 0.3415108323097229, "learning_rate": 0.001, "loss": 1.8802, "step": 303464 }, { "epoch": 26.183574879227052, "grad_norm": 1.3616974353790283, "learning_rate": 0.001, "loss": 1.8812, "step": 303520 }, { "epoch": 26.18840579710145, "grad_norm": 0.40016573667526245, "learning_rate": 0.001, "loss": 1.887, "step": 303576 }, { "epoch": 26.193236714975846, "grad_norm": 1.1108522415161133, "learning_rate": 0.001, "loss": 1.8926, "step": 303632 }, { "epoch": 26.19806763285024, "grad_norm": 1.0270116329193115, "learning_rate": 0.001, "loss": 1.8783, "step": 303688 }, { "epoch": 26.202898550724637, "grad_norm": 0.4639199376106262, "learning_rate": 0.001, "loss": 1.8783, "step": 303744 }, { "epoch": 26.207729468599034, "grad_norm": 0.2807302176952362, "learning_rate": 0.001, "loss": 1.87, "step": 303800 }, { "epoch": 26.21256038647343, "grad_norm": 5.228604316711426, "learning_rate": 0.001, "loss": 1.8717, "step": 303856 }, { "epoch": 26.217391304347824, "grad_norm": 0.8545970916748047, "learning_rate": 0.001, "loss": 1.8832, "step": 303912 }, { "epoch": 26.22222222222222, "grad_norm": 1.9023536443710327, "learning_rate": 0.001, "loss": 1.8811, "step": 303968 }, { "epoch": 26.22705314009662, "grad_norm": 0.4753367006778717, "learning_rate": 0.001, "loss": 1.8838, "step": 304024 }, { "epoch": 26.231884057971016, "grad_norm": 0.37560272216796875, "learning_rate": 0.001, "loss": 1.8773, "step": 304080 }, { "epoch": 26.23671497584541, "grad_norm": 0.3396928310394287, "learning_rate": 0.001, "loss": 1.8767, "step": 304136 }, { "epoch": 26.241545893719806, "grad_norm": 1.4739775657653809, "learning_rate": 0.001, "loss": 1.8865, "step": 304192 }, { "epoch": 26.246376811594203, "grad_norm": 0.41447797417640686, "learning_rate": 0.001, "loss": 1.8867, "step": 304248 }, { "epoch": 26.2512077294686, "grad_norm": 3.348151683807373, "learning_rate": 0.001, "loss": 1.8896, "step": 304304 }, { "epoch": 26.256038647342994, "grad_norm": 0.5316768884658813, "learning_rate": 0.001, "loss": 1.8813, "step": 304360 }, { "epoch": 26.26086956521739, "grad_norm": 3.1807913780212402, "learning_rate": 0.001, "loss": 1.8851, "step": 304416 }, { "epoch": 26.265700483091788, "grad_norm": 0.9746037125587463, "learning_rate": 0.001, "loss": 1.8784, "step": 304472 }, { "epoch": 26.270531400966185, "grad_norm": 0.6775360107421875, "learning_rate": 0.001, "loss": 1.8754, "step": 304528 }, { "epoch": 26.27536231884058, "grad_norm": 0.6768845915794373, "learning_rate": 0.001, "loss": 1.8793, "step": 304584 }, { "epoch": 26.280193236714975, "grad_norm": 0.5649595260620117, "learning_rate": 0.001, "loss": 1.8747, "step": 304640 }, { "epoch": 26.285024154589372, "grad_norm": 0.33543792366981506, "learning_rate": 0.001, "loss": 1.8768, "step": 304696 }, { "epoch": 26.28985507246377, "grad_norm": 0.2526896893978119, "learning_rate": 0.001, "loss": 1.8703, "step": 304752 }, { "epoch": 26.294685990338163, "grad_norm": 0.3276289701461792, "learning_rate": 0.001, "loss": 1.8704, "step": 304808 }, { "epoch": 26.29951690821256, "grad_norm": 9.126632690429688, "learning_rate": 0.001, "loss": 1.8708, "step": 304864 }, { "epoch": 26.304347826086957, "grad_norm": 0.6791106462478638, "learning_rate": 0.001, "loss": 1.8884, "step": 304920 }, { "epoch": 26.309178743961354, "grad_norm": 0.885800838470459, "learning_rate": 0.001, "loss": 1.8883, "step": 304976 }, { "epoch": 26.314009661835748, "grad_norm": 0.8597341775894165, "learning_rate": 0.001, "loss": 1.8957, "step": 305032 }, { "epoch": 26.318840579710145, "grad_norm": 0.8280271887779236, "learning_rate": 0.001, "loss": 1.8858, "step": 305088 }, { "epoch": 26.32367149758454, "grad_norm": 0.317353755235672, "learning_rate": 0.001, "loss": 1.8882, "step": 305144 }, { "epoch": 26.32850241545894, "grad_norm": 6.9839630126953125, "learning_rate": 0.001, "loss": 1.9069, "step": 305200 }, { "epoch": 26.333333333333332, "grad_norm": 0.3132542669773102, "learning_rate": 0.001, "loss": 1.8812, "step": 305256 }, { "epoch": 26.33816425120773, "grad_norm": 6.474642276763916, "learning_rate": 0.001, "loss": 1.8936, "step": 305312 }, { "epoch": 26.342995169082126, "grad_norm": 2.4401490688323975, "learning_rate": 0.001, "loss": 1.8914, "step": 305368 }, { "epoch": 26.347826086956523, "grad_norm": 7.178806304931641, "learning_rate": 0.001, "loss": 1.89, "step": 305424 }, { "epoch": 26.352657004830917, "grad_norm": 3.663526773452759, "learning_rate": 0.001, "loss": 1.8872, "step": 305480 }, { "epoch": 26.357487922705314, "grad_norm": 0.6609997749328613, "learning_rate": 0.001, "loss": 1.8908, "step": 305536 }, { "epoch": 26.36231884057971, "grad_norm": 0.3757002651691437, "learning_rate": 0.001, "loss": 1.8922, "step": 305592 }, { "epoch": 26.367149758454108, "grad_norm": 1.704759955406189, "learning_rate": 0.001, "loss": 1.8866, "step": 305648 }, { "epoch": 26.3719806763285, "grad_norm": 0.5327828526496887, "learning_rate": 0.001, "loss": 1.8755, "step": 305704 }, { "epoch": 26.3768115942029, "grad_norm": 0.5192166566848755, "learning_rate": 0.001, "loss": 1.8821, "step": 305760 }, { "epoch": 26.381642512077295, "grad_norm": 0.6626099348068237, "learning_rate": 0.001, "loss": 1.8777, "step": 305816 }, { "epoch": 26.386473429951693, "grad_norm": 7.298976421356201, "learning_rate": 0.001, "loss": 1.8825, "step": 305872 }, { "epoch": 26.391304347826086, "grad_norm": 0.8154512643814087, "learning_rate": 0.001, "loss": 1.8897, "step": 305928 }, { "epoch": 26.396135265700483, "grad_norm": 0.33252355456352234, "learning_rate": 0.001, "loss": 1.8804, "step": 305984 }, { "epoch": 26.40096618357488, "grad_norm": 0.7255956530570984, "learning_rate": 0.001, "loss": 1.8855, "step": 306040 }, { "epoch": 26.405797101449274, "grad_norm": 0.94120854139328, "learning_rate": 0.001, "loss": 1.8826, "step": 306096 }, { "epoch": 26.41062801932367, "grad_norm": 0.2682323753833771, "learning_rate": 0.001, "loss": 1.8851, "step": 306152 }, { "epoch": 26.415458937198068, "grad_norm": 1.1721643209457397, "learning_rate": 0.001, "loss": 1.8834, "step": 306208 }, { "epoch": 26.420289855072465, "grad_norm": 0.2778533101081848, "learning_rate": 0.001, "loss": 1.8729, "step": 306264 }, { "epoch": 26.42512077294686, "grad_norm": 0.3333447277545929, "learning_rate": 0.001, "loss": 1.8845, "step": 306320 }, { "epoch": 26.429951690821255, "grad_norm": 0.2832978367805481, "learning_rate": 0.001, "loss": 1.8755, "step": 306376 }, { "epoch": 26.434782608695652, "grad_norm": 0.883465051651001, "learning_rate": 0.001, "loss": 1.8828, "step": 306432 }, { "epoch": 26.43961352657005, "grad_norm": 0.4079696536064148, "learning_rate": 0.001, "loss": 1.875, "step": 306488 }, { "epoch": 26.444444444444443, "grad_norm": 0.3065531253814697, "learning_rate": 0.001, "loss": 1.8765, "step": 306544 }, { "epoch": 26.44927536231884, "grad_norm": 0.569281280040741, "learning_rate": 0.001, "loss": 1.8903, "step": 306600 }, { "epoch": 26.454106280193237, "grad_norm": 0.3361499607563019, "learning_rate": 0.001, "loss": 1.8999, "step": 306656 }, { "epoch": 26.458937198067634, "grad_norm": 0.39027315378189087, "learning_rate": 0.001, "loss": 1.9017, "step": 306712 }, { "epoch": 26.463768115942027, "grad_norm": 0.3455759286880493, "learning_rate": 0.001, "loss": 1.8974, "step": 306768 }, { "epoch": 26.468599033816425, "grad_norm": 0.3283372223377228, "learning_rate": 0.001, "loss": 1.8923, "step": 306824 }, { "epoch": 26.47342995169082, "grad_norm": 0.30351710319519043, "learning_rate": 0.001, "loss": 1.8857, "step": 306880 }, { "epoch": 26.47826086956522, "grad_norm": 0.3195769488811493, "learning_rate": 0.001, "loss": 1.8859, "step": 306936 }, { "epoch": 26.483091787439612, "grad_norm": 0.7026917338371277, "learning_rate": 0.001, "loss": 1.8837, "step": 306992 }, { "epoch": 26.48792270531401, "grad_norm": 0.3198241591453552, "learning_rate": 0.001, "loss": 1.8837, "step": 307048 }, { "epoch": 26.492753623188406, "grad_norm": 0.5403355360031128, "learning_rate": 0.001, "loss": 1.8952, "step": 307104 }, { "epoch": 26.497584541062803, "grad_norm": 1.1172131299972534, "learning_rate": 0.001, "loss": 1.8867, "step": 307160 }, { "epoch": 26.502415458937197, "grad_norm": 1.1870065927505493, "learning_rate": 0.001, "loss": 1.9141, "step": 307216 }, { "epoch": 26.507246376811594, "grad_norm": 2.358741521835327, "learning_rate": 0.001, "loss": 1.929, "step": 307272 }, { "epoch": 26.51207729468599, "grad_norm": 0.39125892519950867, "learning_rate": 0.001, "loss": 1.9167, "step": 307328 }, { "epoch": 26.516908212560388, "grad_norm": 0.7804172039031982, "learning_rate": 0.001, "loss": 1.8994, "step": 307384 }, { "epoch": 26.52173913043478, "grad_norm": 2.6584928035736084, "learning_rate": 0.001, "loss": 1.8934, "step": 307440 }, { "epoch": 26.52657004830918, "grad_norm": 1.4944427013397217, "learning_rate": 0.001, "loss": 1.901, "step": 307496 }, { "epoch": 26.531400966183575, "grad_norm": 0.5590356588363647, "learning_rate": 0.001, "loss": 1.8994, "step": 307552 }, { "epoch": 26.536231884057973, "grad_norm": 1.2691519260406494, "learning_rate": 0.001, "loss": 1.8963, "step": 307608 }, { "epoch": 26.541062801932366, "grad_norm": 2.2038352489471436, "learning_rate": 0.001, "loss": 1.8961, "step": 307664 }, { "epoch": 26.545893719806763, "grad_norm": 18.64430809020996, "learning_rate": 0.001, "loss": 1.8852, "step": 307720 }, { "epoch": 26.55072463768116, "grad_norm": 0.9700139164924622, "learning_rate": 0.001, "loss": 1.899, "step": 307776 }, { "epoch": 26.555555555555557, "grad_norm": 0.9777677655220032, "learning_rate": 0.001, "loss": 1.9145, "step": 307832 }, { "epoch": 26.56038647342995, "grad_norm": 0.8899800181388855, "learning_rate": 0.001, "loss": 1.9049, "step": 307888 }, { "epoch": 26.565217391304348, "grad_norm": 0.48456913232803345, "learning_rate": 0.001, "loss": 1.8916, "step": 307944 }, { "epoch": 26.570048309178745, "grad_norm": 0.41189396381378174, "learning_rate": 0.001, "loss": 1.9031, "step": 308000 }, { "epoch": 26.57487922705314, "grad_norm": 1.8014603853225708, "learning_rate": 0.001, "loss": 1.8989, "step": 308056 }, { "epoch": 26.579710144927535, "grad_norm": 1.531384825706482, "learning_rate": 0.001, "loss": 1.8974, "step": 308112 }, { "epoch": 26.584541062801932, "grad_norm": 1.1755337715148926, "learning_rate": 0.001, "loss": 1.8886, "step": 308168 }, { "epoch": 26.58937198067633, "grad_norm": 10.700976371765137, "learning_rate": 0.001, "loss": 1.8956, "step": 308224 }, { "epoch": 26.594202898550726, "grad_norm": 3.254497528076172, "learning_rate": 0.001, "loss": 1.9164, "step": 308280 }, { "epoch": 26.59903381642512, "grad_norm": 1.1473145484924316, "learning_rate": 0.001, "loss": 1.919, "step": 308336 }, { "epoch": 26.603864734299517, "grad_norm": 1.8856450319290161, "learning_rate": 0.001, "loss": 1.9368, "step": 308392 }, { "epoch": 26.608695652173914, "grad_norm": 2.5101125240325928, "learning_rate": 0.001, "loss": 1.9449, "step": 308448 }, { "epoch": 26.613526570048307, "grad_norm": 1.7259069681167603, "learning_rate": 0.001, "loss": 1.9423, "step": 308504 }, { "epoch": 26.618357487922705, "grad_norm": 1.6580649614334106, "learning_rate": 0.001, "loss": 1.9319, "step": 308560 }, { "epoch": 26.6231884057971, "grad_norm": 1.1380752325057983, "learning_rate": 0.001, "loss": 1.9181, "step": 308616 }, { "epoch": 26.6280193236715, "grad_norm": 0.766771137714386, "learning_rate": 0.001, "loss": 1.9123, "step": 308672 }, { "epoch": 26.632850241545892, "grad_norm": 5.427463054656982, "learning_rate": 0.001, "loss": 1.9195, "step": 308728 }, { "epoch": 26.63768115942029, "grad_norm": 0.8008475303649902, "learning_rate": 0.001, "loss": 1.9184, "step": 308784 }, { "epoch": 26.642512077294686, "grad_norm": 1.1295855045318604, "learning_rate": 0.001, "loss": 1.9153, "step": 308840 }, { "epoch": 26.647342995169083, "grad_norm": 0.4221721887588501, "learning_rate": 0.001, "loss": 1.9032, "step": 308896 }, { "epoch": 26.652173913043477, "grad_norm": 2.990657329559326, "learning_rate": 0.001, "loss": 1.8925, "step": 308952 }, { "epoch": 26.657004830917874, "grad_norm": 0.8321185111999512, "learning_rate": 0.001, "loss": 1.896, "step": 309008 }, { "epoch": 26.66183574879227, "grad_norm": 0.3358325958251953, "learning_rate": 0.001, "loss": 1.9013, "step": 309064 }, { "epoch": 26.666666666666668, "grad_norm": 0.641778826713562, "learning_rate": 0.001, "loss": 1.8994, "step": 309120 }, { "epoch": 26.67149758454106, "grad_norm": 0.38921916484832764, "learning_rate": 0.001, "loss": 1.8912, "step": 309176 }, { "epoch": 26.67632850241546, "grad_norm": 0.35960710048675537, "learning_rate": 0.001, "loss": 1.886, "step": 309232 }, { "epoch": 26.681159420289855, "grad_norm": 0.4628863036632538, "learning_rate": 0.001, "loss": 1.8826, "step": 309288 }, { "epoch": 26.685990338164252, "grad_norm": 2.0270535945892334, "learning_rate": 0.001, "loss": 1.8941, "step": 309344 }, { "epoch": 26.690821256038646, "grad_norm": 2.1707756519317627, "learning_rate": 0.001, "loss": 1.8989, "step": 309400 }, { "epoch": 26.695652173913043, "grad_norm": 0.6616970300674438, "learning_rate": 0.001, "loss": 1.8959, "step": 309456 }, { "epoch": 26.70048309178744, "grad_norm": 0.3734076917171478, "learning_rate": 0.001, "loss": 1.9049, "step": 309512 }, { "epoch": 26.705314009661837, "grad_norm": 1.3182293176651, "learning_rate": 0.001, "loss": 1.889, "step": 309568 }, { "epoch": 26.71014492753623, "grad_norm": 0.6974278688430786, "learning_rate": 0.001, "loss": 1.8905, "step": 309624 }, { "epoch": 26.714975845410628, "grad_norm": 1.2047744989395142, "learning_rate": 0.001, "loss": 1.9039, "step": 309680 }, { "epoch": 26.719806763285025, "grad_norm": 2.1396772861480713, "learning_rate": 0.001, "loss": 1.9095, "step": 309736 }, { "epoch": 26.72463768115942, "grad_norm": 0.3909177780151367, "learning_rate": 0.001, "loss": 1.9091, "step": 309792 }, { "epoch": 26.729468599033815, "grad_norm": 7.051060676574707, "learning_rate": 0.001, "loss": 1.9042, "step": 309848 }, { "epoch": 26.734299516908212, "grad_norm": 0.8563029170036316, "learning_rate": 0.001, "loss": 1.9002, "step": 309904 }, { "epoch": 26.73913043478261, "grad_norm": 1.8648775815963745, "learning_rate": 0.001, "loss": 1.9019, "step": 309960 }, { "epoch": 26.743961352657006, "grad_norm": 1.2807790040969849, "learning_rate": 0.001, "loss": 1.8963, "step": 310016 }, { "epoch": 26.7487922705314, "grad_norm": 0.6904706358909607, "learning_rate": 0.001, "loss": 1.9017, "step": 310072 }, { "epoch": 26.753623188405797, "grad_norm": 0.381023108959198, "learning_rate": 0.001, "loss": 1.9143, "step": 310128 }, { "epoch": 26.758454106280194, "grad_norm": 1.7539176940917969, "learning_rate": 0.001, "loss": 1.9192, "step": 310184 }, { "epoch": 26.76328502415459, "grad_norm": 0.4614386558532715, "learning_rate": 0.001, "loss": 1.9185, "step": 310240 }, { "epoch": 26.768115942028984, "grad_norm": 2.982783555984497, "learning_rate": 0.001, "loss": 1.9108, "step": 310296 }, { "epoch": 26.77294685990338, "grad_norm": 0.5641258358955383, "learning_rate": 0.001, "loss": 1.9035, "step": 310352 }, { "epoch": 26.77777777777778, "grad_norm": 0.40009805560112, "learning_rate": 0.001, "loss": 1.9015, "step": 310408 }, { "epoch": 26.782608695652176, "grad_norm": 0.7269598841667175, "learning_rate": 0.001, "loss": 1.9003, "step": 310464 }, { "epoch": 26.78743961352657, "grad_norm": 0.5545816421508789, "learning_rate": 0.001, "loss": 1.9094, "step": 310520 }, { "epoch": 26.792270531400966, "grad_norm": 1.7288382053375244, "learning_rate": 0.001, "loss": 1.897, "step": 310576 }, { "epoch": 26.797101449275363, "grad_norm": 0.689685583114624, "learning_rate": 0.001, "loss": 1.8999, "step": 310632 }, { "epoch": 26.80193236714976, "grad_norm": 0.6434476375579834, "learning_rate": 0.001, "loss": 1.9001, "step": 310688 }, { "epoch": 26.806763285024154, "grad_norm": 0.7111773490905762, "learning_rate": 0.001, "loss": 1.9066, "step": 310744 }, { "epoch": 26.81159420289855, "grad_norm": 1.208738088607788, "learning_rate": 0.001, "loss": 1.9017, "step": 310800 }, { "epoch": 26.816425120772948, "grad_norm": 0.28572335839271545, "learning_rate": 0.001, "loss": 1.9076, "step": 310856 }, { "epoch": 26.82125603864734, "grad_norm": 1.7035478353500366, "learning_rate": 0.001, "loss": 1.9058, "step": 310912 }, { "epoch": 26.82608695652174, "grad_norm": 0.909943163394928, "learning_rate": 0.001, "loss": 1.9099, "step": 310968 }, { "epoch": 26.830917874396135, "grad_norm": 1.4705640077590942, "learning_rate": 0.001, "loss": 1.9045, "step": 311024 }, { "epoch": 26.835748792270532, "grad_norm": 1.6536723375320435, "learning_rate": 0.001, "loss": 1.9108, "step": 311080 }, { "epoch": 26.840579710144926, "grad_norm": 2.310727596282959, "learning_rate": 0.001, "loss": 1.9037, "step": 311136 }, { "epoch": 26.845410628019323, "grad_norm": 0.8978720307350159, "learning_rate": 0.001, "loss": 1.8984, "step": 311192 }, { "epoch": 26.85024154589372, "grad_norm": 1.3442364931106567, "learning_rate": 0.001, "loss": 1.8903, "step": 311248 }, { "epoch": 26.855072463768117, "grad_norm": 1.4992104768753052, "learning_rate": 0.001, "loss": 1.8943, "step": 311304 }, { "epoch": 26.85990338164251, "grad_norm": 1.4222944974899292, "learning_rate": 0.001, "loss": 1.896, "step": 311360 }, { "epoch": 26.864734299516908, "grad_norm": 0.3838197588920593, "learning_rate": 0.001, "loss": 1.9007, "step": 311416 }, { "epoch": 26.869565217391305, "grad_norm": 2.465280771255493, "learning_rate": 0.001, "loss": 1.8999, "step": 311472 }, { "epoch": 26.8743961352657, "grad_norm": 1.7289129495620728, "learning_rate": 0.001, "loss": 1.9006, "step": 311528 }, { "epoch": 26.879227053140095, "grad_norm": 0.9332231879234314, "learning_rate": 0.001, "loss": 1.9111, "step": 311584 }, { "epoch": 26.884057971014492, "grad_norm": 0.3344719707965851, "learning_rate": 0.001, "loss": 1.9266, "step": 311640 }, { "epoch": 26.88888888888889, "grad_norm": 0.40268272161483765, "learning_rate": 0.001, "loss": 1.9252, "step": 311696 }, { "epoch": 26.893719806763286, "grad_norm": 1.498618483543396, "learning_rate": 0.001, "loss": 1.9116, "step": 311752 }, { "epoch": 26.89855072463768, "grad_norm": 17.675731658935547, "learning_rate": 0.001, "loss": 1.9036, "step": 311808 }, { "epoch": 26.903381642512077, "grad_norm": 0.592947781085968, "learning_rate": 0.001, "loss": 1.9053, "step": 311864 }, { "epoch": 26.908212560386474, "grad_norm": 1.4547990560531616, "learning_rate": 0.001, "loss": 1.9095, "step": 311920 }, { "epoch": 26.91304347826087, "grad_norm": 0.6640229821205139, "learning_rate": 0.001, "loss": 1.9126, "step": 311976 }, { "epoch": 26.917874396135264, "grad_norm": 1.9883171319961548, "learning_rate": 0.001, "loss": 1.9135, "step": 312032 }, { "epoch": 26.92270531400966, "grad_norm": 0.6683921813964844, "learning_rate": 0.001, "loss": 1.9048, "step": 312088 }, { "epoch": 26.92753623188406, "grad_norm": 1.5512480735778809, "learning_rate": 0.001, "loss": 1.9066, "step": 312144 }, { "epoch": 26.932367149758456, "grad_norm": 0.49740952253341675, "learning_rate": 0.001, "loss": 1.9035, "step": 312200 }, { "epoch": 26.93719806763285, "grad_norm": 2.413365602493286, "learning_rate": 0.001, "loss": 1.9114, "step": 312256 }, { "epoch": 26.942028985507246, "grad_norm": 1.005081295967102, "learning_rate": 0.001, "loss": 1.9289, "step": 312312 }, { "epoch": 26.946859903381643, "grad_norm": 0.6323103308677673, "learning_rate": 0.001, "loss": 1.9196, "step": 312368 }, { "epoch": 26.95169082125604, "grad_norm": 1.4417591094970703, "learning_rate": 0.001, "loss": 1.9032, "step": 312424 }, { "epoch": 26.956521739130434, "grad_norm": 0.3168693482875824, "learning_rate": 0.001, "loss": 1.9094, "step": 312480 }, { "epoch": 26.96135265700483, "grad_norm": 1.1715271472930908, "learning_rate": 0.001, "loss": 1.9067, "step": 312536 }, { "epoch": 26.966183574879228, "grad_norm": 0.533421516418457, "learning_rate": 0.001, "loss": 1.9128, "step": 312592 }, { "epoch": 26.971014492753625, "grad_norm": 0.40822833776474, "learning_rate": 0.001, "loss": 1.9193, "step": 312648 }, { "epoch": 26.97584541062802, "grad_norm": 0.5746835470199585, "learning_rate": 0.001, "loss": 1.909, "step": 312704 }, { "epoch": 26.980676328502415, "grad_norm": 0.8537691831588745, "learning_rate": 0.001, "loss": 1.9145, "step": 312760 }, { "epoch": 26.985507246376812, "grad_norm": 1.048047661781311, "learning_rate": 0.001, "loss": 1.9266, "step": 312816 }, { "epoch": 26.990338164251206, "grad_norm": 0.6718924641609192, "learning_rate": 0.001, "loss": 1.9143, "step": 312872 }, { "epoch": 26.995169082125603, "grad_norm": 1.1036442518234253, "learning_rate": 0.001, "loss": 1.9087, "step": 312928 }, { "epoch": 27.0, "grad_norm": 0.8787195086479187, "learning_rate": 0.001, "loss": 1.9044, "step": 312984 }, { "epoch": 27.004830917874397, "grad_norm": 1.326701045036316, "learning_rate": 0.001, "loss": 1.8706, "step": 313040 }, { "epoch": 27.00966183574879, "grad_norm": 16.42797088623047, "learning_rate": 0.001, "loss": 1.8744, "step": 313096 }, { "epoch": 27.014492753623188, "grad_norm": 0.8020104765892029, "learning_rate": 0.001, "loss": 1.8741, "step": 313152 }, { "epoch": 27.019323671497585, "grad_norm": 1.0050700902938843, "learning_rate": 0.001, "loss": 1.8727, "step": 313208 }, { "epoch": 27.02415458937198, "grad_norm": 3.5074846744537354, "learning_rate": 0.001, "loss": 1.8741, "step": 313264 }, { "epoch": 27.028985507246375, "grad_norm": 0.5766336917877197, "learning_rate": 0.001, "loss": 1.8874, "step": 313320 }, { "epoch": 27.033816425120772, "grad_norm": 2.3538620471954346, "learning_rate": 0.001, "loss": 1.883, "step": 313376 }, { "epoch": 27.03864734299517, "grad_norm": 0.753011167049408, "learning_rate": 0.001, "loss": 1.8808, "step": 313432 }, { "epoch": 27.043478260869566, "grad_norm": 0.5945261120796204, "learning_rate": 0.001, "loss": 1.879, "step": 313488 }, { "epoch": 27.04830917874396, "grad_norm": 0.523683488368988, "learning_rate": 0.001, "loss": 1.8698, "step": 313544 }, { "epoch": 27.053140096618357, "grad_norm": 0.3526722192764282, "learning_rate": 0.001, "loss": 1.8775, "step": 313600 }, { "epoch": 27.057971014492754, "grad_norm": 0.5040813088417053, "learning_rate": 0.001, "loss": 1.8663, "step": 313656 }, { "epoch": 27.06280193236715, "grad_norm": 1.0941710472106934, "learning_rate": 0.001, "loss": 1.8658, "step": 313712 }, { "epoch": 27.067632850241544, "grad_norm": 1.4529997110366821, "learning_rate": 0.001, "loss": 1.875, "step": 313768 }, { "epoch": 27.07246376811594, "grad_norm": 0.7842084765434265, "learning_rate": 0.001, "loss": 1.8651, "step": 313824 }, { "epoch": 27.07729468599034, "grad_norm": 0.4730626940727234, "learning_rate": 0.001, "loss": 1.8601, "step": 313880 }, { "epoch": 27.082125603864736, "grad_norm": 1.0959725379943848, "learning_rate": 0.001, "loss": 1.8585, "step": 313936 }, { "epoch": 27.08695652173913, "grad_norm": 0.8946459889411926, "learning_rate": 0.001, "loss": 1.8601, "step": 313992 }, { "epoch": 27.091787439613526, "grad_norm": 1.3689531087875366, "learning_rate": 0.001, "loss": 1.8505, "step": 314048 }, { "epoch": 27.096618357487923, "grad_norm": 1.4602844715118408, "learning_rate": 0.001, "loss": 1.8606, "step": 314104 }, { "epoch": 27.10144927536232, "grad_norm": 0.5630793571472168, "learning_rate": 0.001, "loss": 1.8763, "step": 314160 }, { "epoch": 27.106280193236714, "grad_norm": 1.3670883178710938, "learning_rate": 0.001, "loss": 1.8786, "step": 314216 }, { "epoch": 27.11111111111111, "grad_norm": 0.5482655167579651, "learning_rate": 0.001, "loss": 1.8774, "step": 314272 }, { "epoch": 27.115942028985508, "grad_norm": 0.6280721426010132, "learning_rate": 0.001, "loss": 1.8712, "step": 314328 }, { "epoch": 27.120772946859905, "grad_norm": 0.6933141946792603, "learning_rate": 0.001, "loss": 1.8789, "step": 314384 }, { "epoch": 27.1256038647343, "grad_norm": 0.5642929673194885, "learning_rate": 0.001, "loss": 1.8858, "step": 314440 }, { "epoch": 27.130434782608695, "grad_norm": 0.47165897488594055, "learning_rate": 0.001, "loss": 1.8811, "step": 314496 }, { "epoch": 27.135265700483092, "grad_norm": 2.2702157497406006, "learning_rate": 0.001, "loss": 1.8782, "step": 314552 }, { "epoch": 27.14009661835749, "grad_norm": 1.172485589981079, "learning_rate": 0.001, "loss": 1.8695, "step": 314608 }, { "epoch": 27.144927536231883, "grad_norm": 0.5493935346603394, "learning_rate": 0.001, "loss": 1.8614, "step": 314664 }, { "epoch": 27.14975845410628, "grad_norm": 0.28513163328170776, "learning_rate": 0.001, "loss": 1.8718, "step": 314720 }, { "epoch": 27.154589371980677, "grad_norm": 0.5382900238037109, "learning_rate": 0.001, "loss": 1.8717, "step": 314776 }, { "epoch": 27.159420289855074, "grad_norm": 0.5325965881347656, "learning_rate": 0.001, "loss": 1.8639, "step": 314832 }, { "epoch": 27.164251207729468, "grad_norm": 0.5712432265281677, "learning_rate": 0.001, "loss": 1.8717, "step": 314888 }, { "epoch": 27.169082125603865, "grad_norm": 0.41147926449775696, "learning_rate": 0.001, "loss": 1.8805, "step": 314944 }, { "epoch": 27.17391304347826, "grad_norm": 0.7136927247047424, "learning_rate": 0.001, "loss": 1.8674, "step": 315000 }, { "epoch": 27.17874396135266, "grad_norm": 1.326589584350586, "learning_rate": 0.001, "loss": 1.8616, "step": 315056 }, { "epoch": 27.183574879227052, "grad_norm": 2.189208745956421, "learning_rate": 0.001, "loss": 1.8757, "step": 315112 }, { "epoch": 27.18840579710145, "grad_norm": 1.004647135734558, "learning_rate": 0.001, "loss": 1.8881, "step": 315168 }, { "epoch": 27.193236714975846, "grad_norm": 0.8220815062522888, "learning_rate": 0.001, "loss": 1.8783, "step": 315224 }, { "epoch": 27.19806763285024, "grad_norm": 0.5307570099830627, "learning_rate": 0.001, "loss": 1.8717, "step": 315280 }, { "epoch": 27.202898550724637, "grad_norm": 0.46694618463516235, "learning_rate": 0.001, "loss": 1.8646, "step": 315336 }, { "epoch": 27.207729468599034, "grad_norm": 0.5688669085502625, "learning_rate": 0.001, "loss": 1.8697, "step": 315392 }, { "epoch": 27.21256038647343, "grad_norm": 1.0991936922073364, "learning_rate": 0.001, "loss": 1.8741, "step": 315448 }, { "epoch": 27.217391304347824, "grad_norm": 1.3987079858779907, "learning_rate": 0.001, "loss": 1.8741, "step": 315504 }, { "epoch": 27.22222222222222, "grad_norm": 0.35779833793640137, "learning_rate": 0.001, "loss": 1.8726, "step": 315560 }, { "epoch": 27.22705314009662, "grad_norm": 2.180145263671875, "learning_rate": 0.001, "loss": 1.8711, "step": 315616 }, { "epoch": 27.231884057971016, "grad_norm": 0.4075707495212555, "learning_rate": 0.001, "loss": 1.8738, "step": 315672 }, { "epoch": 27.23671497584541, "grad_norm": 0.6211092472076416, "learning_rate": 0.001, "loss": 1.872, "step": 315728 }, { "epoch": 27.241545893719806, "grad_norm": 0.4445870816707611, "learning_rate": 0.001, "loss": 1.8676, "step": 315784 }, { "epoch": 27.246376811594203, "grad_norm": 0.2655699551105499, "learning_rate": 0.001, "loss": 1.8792, "step": 315840 }, { "epoch": 27.2512077294686, "grad_norm": 0.6398695111274719, "learning_rate": 0.001, "loss": 1.8798, "step": 315896 }, { "epoch": 27.256038647342994, "grad_norm": 0.5590243935585022, "learning_rate": 0.001, "loss": 1.8841, "step": 315952 }, { "epoch": 27.26086956521739, "grad_norm": 0.5827743411064148, "learning_rate": 0.001, "loss": 1.877, "step": 316008 }, { "epoch": 27.265700483091788, "grad_norm": 0.543076753616333, "learning_rate": 0.001, "loss": 1.879, "step": 316064 }, { "epoch": 27.270531400966185, "grad_norm": 0.4645482003688812, "learning_rate": 0.001, "loss": 1.9005, "step": 316120 }, { "epoch": 27.27536231884058, "grad_norm": 1.1291449069976807, "learning_rate": 0.001, "loss": 1.8973, "step": 316176 }, { "epoch": 27.280193236714975, "grad_norm": 0.7766647934913635, "learning_rate": 0.001, "loss": 1.891, "step": 316232 }, { "epoch": 27.285024154589372, "grad_norm": 0.43069392442703247, "learning_rate": 0.001, "loss": 1.9002, "step": 316288 }, { "epoch": 27.28985507246377, "grad_norm": 1.9576739072799683, "learning_rate": 0.001, "loss": 1.8846, "step": 316344 }, { "epoch": 27.294685990338163, "grad_norm": 0.36968687176704407, "learning_rate": 0.001, "loss": 1.8842, "step": 316400 }, { "epoch": 27.29951690821256, "grad_norm": 1.5662766695022583, "learning_rate": 0.001, "loss": 1.8977, "step": 316456 }, { "epoch": 27.304347826086957, "grad_norm": 0.6104956269264221, "learning_rate": 0.001, "loss": 1.9044, "step": 316512 }, { "epoch": 27.309178743961354, "grad_norm": 0.5514392256736755, "learning_rate": 0.001, "loss": 1.9121, "step": 316568 }, { "epoch": 27.314009661835748, "grad_norm": 0.3508784770965576, "learning_rate": 0.001, "loss": 1.9166, "step": 316624 }, { "epoch": 27.318840579710145, "grad_norm": 0.5754507184028625, "learning_rate": 0.001, "loss": 1.8906, "step": 316680 }, { "epoch": 27.32367149758454, "grad_norm": 0.883436381816864, "learning_rate": 0.001, "loss": 1.8968, "step": 316736 }, { "epoch": 27.32850241545894, "grad_norm": 2.083041191101074, "learning_rate": 0.001, "loss": 1.9031, "step": 316792 }, { "epoch": 27.333333333333332, "grad_norm": 0.956459641456604, "learning_rate": 0.001, "loss": 1.8948, "step": 316848 }, { "epoch": 27.33816425120773, "grad_norm": 0.9388265013694763, "learning_rate": 0.001, "loss": 1.8938, "step": 316904 }, { "epoch": 27.342995169082126, "grad_norm": 0.7901542782783508, "learning_rate": 0.001, "loss": 1.8905, "step": 316960 }, { "epoch": 27.347826086956523, "grad_norm": 0.4316692352294922, "learning_rate": 0.001, "loss": 1.8947, "step": 317016 }, { "epoch": 27.352657004830917, "grad_norm": 0.5385074019432068, "learning_rate": 0.001, "loss": 1.8903, "step": 317072 }, { "epoch": 27.357487922705314, "grad_norm": 0.2966645359992981, "learning_rate": 0.001, "loss": 1.8818, "step": 317128 }, { "epoch": 27.36231884057971, "grad_norm": 0.6879773139953613, "learning_rate": 0.001, "loss": 1.8895, "step": 317184 }, { "epoch": 27.367149758454108, "grad_norm": 0.879963219165802, "learning_rate": 0.001, "loss": 1.8757, "step": 317240 }, { "epoch": 27.3719806763285, "grad_norm": 1.953042984008789, "learning_rate": 0.001, "loss": 1.8737, "step": 317296 }, { "epoch": 27.3768115942029, "grad_norm": 1.9178799390792847, "learning_rate": 0.001, "loss": 1.8835, "step": 317352 }, { "epoch": 27.381642512077295, "grad_norm": 0.7519900798797607, "learning_rate": 0.001, "loss": 1.8875, "step": 317408 }, { "epoch": 27.386473429951693, "grad_norm": 2.2220876216888428, "learning_rate": 0.001, "loss": 1.8833, "step": 317464 }, { "epoch": 27.391304347826086, "grad_norm": 0.6742727756500244, "learning_rate": 0.001, "loss": 1.8864, "step": 317520 }, { "epoch": 27.396135265700483, "grad_norm": 1.0539315938949585, "learning_rate": 0.001, "loss": 1.8883, "step": 317576 }, { "epoch": 27.40096618357488, "grad_norm": 0.2949395477771759, "learning_rate": 0.001, "loss": 1.898, "step": 317632 }, { "epoch": 27.405797101449274, "grad_norm": 0.3182970881462097, "learning_rate": 0.001, "loss": 1.8974, "step": 317688 }, { "epoch": 27.41062801932367, "grad_norm": 0.6619102358818054, "learning_rate": 0.001, "loss": 1.8793, "step": 317744 }, { "epoch": 27.415458937198068, "grad_norm": 0.4429330825805664, "learning_rate": 0.001, "loss": 1.8813, "step": 317800 }, { "epoch": 27.420289855072465, "grad_norm": 0.4018218517303467, "learning_rate": 0.001, "loss": 1.8822, "step": 317856 }, { "epoch": 27.42512077294686, "grad_norm": 0.4882408380508423, "learning_rate": 0.001, "loss": 1.8808, "step": 317912 }, { "epoch": 27.429951690821255, "grad_norm": 1.5550724267959595, "learning_rate": 0.001, "loss": 1.8742, "step": 317968 }, { "epoch": 27.434782608695652, "grad_norm": 0.7251642942428589, "learning_rate": 0.001, "loss": 1.8752, "step": 318024 }, { "epoch": 27.43961352657005, "grad_norm": 1.447607159614563, "learning_rate": 0.001, "loss": 1.877, "step": 318080 }, { "epoch": 27.444444444444443, "grad_norm": 0.7382452487945557, "learning_rate": 0.001, "loss": 1.8778, "step": 318136 }, { "epoch": 27.44927536231884, "grad_norm": 0.6004638075828552, "learning_rate": 0.001, "loss": 1.8802, "step": 318192 }, { "epoch": 27.454106280193237, "grad_norm": 0.5994868874549866, "learning_rate": 0.001, "loss": 1.8845, "step": 318248 }, { "epoch": 27.458937198067634, "grad_norm": 0.499619722366333, "learning_rate": 0.001, "loss": 1.8937, "step": 318304 }, { "epoch": 27.463768115942027, "grad_norm": 0.7170670628547668, "learning_rate": 0.001, "loss": 1.899, "step": 318360 }, { "epoch": 27.468599033816425, "grad_norm": 0.3206084668636322, "learning_rate": 0.001, "loss": 1.8891, "step": 318416 }, { "epoch": 27.47342995169082, "grad_norm": 0.4154198467731476, "learning_rate": 0.001, "loss": 1.8802, "step": 318472 }, { "epoch": 27.47826086956522, "grad_norm": 0.48203417658805847, "learning_rate": 0.001, "loss": 1.8802, "step": 318528 }, { "epoch": 27.483091787439612, "grad_norm": 1.2199218273162842, "learning_rate": 0.001, "loss": 1.8815, "step": 318584 }, { "epoch": 27.48792270531401, "grad_norm": 3.0930137634277344, "learning_rate": 0.001, "loss": 1.8845, "step": 318640 }, { "epoch": 27.492753623188406, "grad_norm": 0.4436224699020386, "learning_rate": 0.001, "loss": 1.8831, "step": 318696 }, { "epoch": 27.497584541062803, "grad_norm": 0.7236968278884888, "learning_rate": 0.001, "loss": 1.8844, "step": 318752 }, { "epoch": 27.502415458937197, "grad_norm": 0.3699961304664612, "learning_rate": 0.001, "loss": 1.8863, "step": 318808 }, { "epoch": 27.507246376811594, "grad_norm": 0.6518350839614868, "learning_rate": 0.001, "loss": 1.8851, "step": 318864 }, { "epoch": 27.51207729468599, "grad_norm": 1.2222529649734497, "learning_rate": 0.001, "loss": 1.8839, "step": 318920 }, { "epoch": 27.516908212560388, "grad_norm": 1.0932095050811768, "learning_rate": 0.001, "loss": 1.8964, "step": 318976 }, { "epoch": 27.52173913043478, "grad_norm": 0.6933474540710449, "learning_rate": 0.001, "loss": 1.882, "step": 319032 }, { "epoch": 27.52657004830918, "grad_norm": 2.3337810039520264, "learning_rate": 0.001, "loss": 1.8958, "step": 319088 }, { "epoch": 27.531400966183575, "grad_norm": 1.5713279247283936, "learning_rate": 0.001, "loss": 1.8986, "step": 319144 }, { "epoch": 27.536231884057973, "grad_norm": 2.0587315559387207, "learning_rate": 0.001, "loss": 1.9114, "step": 319200 }, { "epoch": 27.541062801932366, "grad_norm": 0.8205888271331787, "learning_rate": 0.001, "loss": 1.907, "step": 319256 }, { "epoch": 27.545893719806763, "grad_norm": 0.47125864028930664, "learning_rate": 0.001, "loss": 1.8988, "step": 319312 }, { "epoch": 27.55072463768116, "grad_norm": 0.9135292768478394, "learning_rate": 0.001, "loss": 1.9016, "step": 319368 }, { "epoch": 27.555555555555557, "grad_norm": 1.260481357574463, "learning_rate": 0.001, "loss": 1.8844, "step": 319424 }, { "epoch": 27.56038647342995, "grad_norm": 0.728749692440033, "learning_rate": 0.001, "loss": 1.9079, "step": 319480 }, { "epoch": 27.565217391304348, "grad_norm": 0.372332364320755, "learning_rate": 0.001, "loss": 1.9039, "step": 319536 }, { "epoch": 27.570048309178745, "grad_norm": 0.6417818665504456, "learning_rate": 0.001, "loss": 1.9063, "step": 319592 }, { "epoch": 27.57487922705314, "grad_norm": 0.24539490044116974, "learning_rate": 0.001, "loss": 1.9033, "step": 319648 }, { "epoch": 27.579710144927535, "grad_norm": 0.644845724105835, "learning_rate": 0.001, "loss": 1.8935, "step": 319704 }, { "epoch": 27.584541062801932, "grad_norm": 0.3748593330383301, "learning_rate": 0.001, "loss": 1.8935, "step": 319760 }, { "epoch": 27.58937198067633, "grad_norm": 0.7903972864151001, "learning_rate": 0.001, "loss": 1.902, "step": 319816 }, { "epoch": 27.594202898550726, "grad_norm": 0.39561012387275696, "learning_rate": 0.001, "loss": 1.8867, "step": 319872 }, { "epoch": 27.59903381642512, "grad_norm": 10.904019355773926, "learning_rate": 0.001, "loss": 1.8924, "step": 319928 }, { "epoch": 27.603864734299517, "grad_norm": 0.33296820521354675, "learning_rate": 0.001, "loss": 1.8913, "step": 319984 }, { "epoch": 27.608695652173914, "grad_norm": 3.3433420658111572, "learning_rate": 0.001, "loss": 1.8959, "step": 320040 }, { "epoch": 27.613526570048307, "grad_norm": 1.7524195909500122, "learning_rate": 0.001, "loss": 1.893, "step": 320096 }, { "epoch": 27.618357487922705, "grad_norm": 5.968977928161621, "learning_rate": 0.001, "loss": 1.8968, "step": 320152 }, { "epoch": 27.6231884057971, "grad_norm": 4.030668258666992, "learning_rate": 0.001, "loss": 1.8801, "step": 320208 }, { "epoch": 27.6280193236715, "grad_norm": 0.5673682689666748, "learning_rate": 0.001, "loss": 1.8862, "step": 320264 }, { "epoch": 27.632850241545892, "grad_norm": 0.480268657207489, "learning_rate": 0.001, "loss": 1.8832, "step": 320320 }, { "epoch": 27.63768115942029, "grad_norm": 1.0985263586044312, "learning_rate": 0.001, "loss": 1.8795, "step": 320376 }, { "epoch": 27.642512077294686, "grad_norm": 0.46542710065841675, "learning_rate": 0.001, "loss": 1.8826, "step": 320432 }, { "epoch": 27.647342995169083, "grad_norm": 0.911636471748352, "learning_rate": 0.001, "loss": 1.887, "step": 320488 }, { "epoch": 27.652173913043477, "grad_norm": 0.38735121488571167, "learning_rate": 0.001, "loss": 1.8906, "step": 320544 }, { "epoch": 27.657004830917874, "grad_norm": 0.2978653013706207, "learning_rate": 0.001, "loss": 1.9011, "step": 320600 }, { "epoch": 27.66183574879227, "grad_norm": 0.3818797171115875, "learning_rate": 0.001, "loss": 1.8859, "step": 320656 }, { "epoch": 27.666666666666668, "grad_norm": 6.442737102508545, "learning_rate": 0.001, "loss": 1.8869, "step": 320712 }, { "epoch": 27.67149758454106, "grad_norm": 0.4381128251552582, "learning_rate": 0.001, "loss": 1.88, "step": 320768 }, { "epoch": 27.67632850241546, "grad_norm": 0.3563942313194275, "learning_rate": 0.001, "loss": 1.8886, "step": 320824 }, { "epoch": 27.681159420289855, "grad_norm": 0.33426153659820557, "learning_rate": 0.001, "loss": 1.8952, "step": 320880 }, { "epoch": 27.685990338164252, "grad_norm": 0.591313898563385, "learning_rate": 0.001, "loss": 1.8838, "step": 320936 }, { "epoch": 27.690821256038646, "grad_norm": 0.538343608379364, "learning_rate": 0.001, "loss": 1.8899, "step": 320992 }, { "epoch": 27.695652173913043, "grad_norm": 0.33096420764923096, "learning_rate": 0.001, "loss": 1.8842, "step": 321048 }, { "epoch": 27.70048309178744, "grad_norm": 1.6692655086517334, "learning_rate": 0.001, "loss": 1.8814, "step": 321104 }, { "epoch": 27.705314009661837, "grad_norm": 0.8772959113121033, "learning_rate": 0.001, "loss": 1.8851, "step": 321160 }, { "epoch": 27.71014492753623, "grad_norm": 0.7430129647254944, "learning_rate": 0.001, "loss": 1.8802, "step": 321216 }, { "epoch": 27.714975845410628, "grad_norm": 6.376121997833252, "learning_rate": 0.001, "loss": 1.8946, "step": 321272 }, { "epoch": 27.719806763285025, "grad_norm": 0.45732006430625916, "learning_rate": 0.001, "loss": 1.8939, "step": 321328 }, { "epoch": 27.72463768115942, "grad_norm": 2.5487396717071533, "learning_rate": 0.001, "loss": 1.8937, "step": 321384 }, { "epoch": 27.729468599033815, "grad_norm": 0.322135329246521, "learning_rate": 0.001, "loss": 1.9008, "step": 321440 }, { "epoch": 27.734299516908212, "grad_norm": 16.541006088256836, "learning_rate": 0.001, "loss": 1.9014, "step": 321496 }, { "epoch": 27.73913043478261, "grad_norm": 0.31626319885253906, "learning_rate": 0.001, "loss": 1.8968, "step": 321552 }, { "epoch": 27.743961352657006, "grad_norm": 0.3314145803451538, "learning_rate": 0.001, "loss": 1.8915, "step": 321608 }, { "epoch": 27.7487922705314, "grad_norm": 1.5720685720443726, "learning_rate": 0.001, "loss": 1.8989, "step": 321664 }, { "epoch": 27.753623188405797, "grad_norm": 0.9092897176742554, "learning_rate": 0.001, "loss": 1.8886, "step": 321720 }, { "epoch": 27.758454106280194, "grad_norm": 0.7346388697624207, "learning_rate": 0.001, "loss": 1.883, "step": 321776 }, { "epoch": 27.76328502415459, "grad_norm": 2.519763946533203, "learning_rate": 0.001, "loss": 1.8852, "step": 321832 }, { "epoch": 27.768115942028984, "grad_norm": 0.6742886900901794, "learning_rate": 0.001, "loss": 1.895, "step": 321888 }, { "epoch": 27.77294685990338, "grad_norm": 0.9749161005020142, "learning_rate": 0.001, "loss": 1.8932, "step": 321944 }, { "epoch": 27.77777777777778, "grad_norm": 0.3334799110889435, "learning_rate": 0.001, "loss": 1.8958, "step": 322000 }, { "epoch": 27.782608695652176, "grad_norm": 2.679762601852417, "learning_rate": 0.001, "loss": 1.8973, "step": 322056 }, { "epoch": 27.78743961352657, "grad_norm": 1.8996022939682007, "learning_rate": 0.001, "loss": 1.8989, "step": 322112 }, { "epoch": 27.792270531400966, "grad_norm": 3.7399845123291016, "learning_rate": 0.001, "loss": 1.904, "step": 322168 }, { "epoch": 27.797101449275363, "grad_norm": 0.9479333162307739, "learning_rate": 0.001, "loss": 1.9064, "step": 322224 }, { "epoch": 27.80193236714976, "grad_norm": 0.28084588050842285, "learning_rate": 0.001, "loss": 1.9243, "step": 322280 }, { "epoch": 27.806763285024154, "grad_norm": 0.603751003742218, "learning_rate": 0.001, "loss": 1.929, "step": 322336 }, { "epoch": 27.81159420289855, "grad_norm": 0.5026025176048279, "learning_rate": 0.001, "loss": 1.9212, "step": 322392 }, { "epoch": 27.816425120772948, "grad_norm": 0.8195517659187317, "learning_rate": 0.001, "loss": 1.9125, "step": 322448 }, { "epoch": 27.82125603864734, "grad_norm": 2.9567036628723145, "learning_rate": 0.001, "loss": 1.9094, "step": 322504 }, { "epoch": 27.82608695652174, "grad_norm": 5.535406112670898, "learning_rate": 0.001, "loss": 1.9189, "step": 322560 }, { "epoch": 27.830917874396135, "grad_norm": 0.9300132989883423, "learning_rate": 0.001, "loss": 1.9077, "step": 322616 }, { "epoch": 27.835748792270532, "grad_norm": 0.36531877517700195, "learning_rate": 0.001, "loss": 1.9054, "step": 322672 }, { "epoch": 27.840579710144926, "grad_norm": 1.9135181903839111, "learning_rate": 0.001, "loss": 1.9071, "step": 322728 }, { "epoch": 27.845410628019323, "grad_norm": 0.8453105092048645, "learning_rate": 0.001, "loss": 1.9184, "step": 322784 }, { "epoch": 27.85024154589372, "grad_norm": 0.35372525453567505, "learning_rate": 0.001, "loss": 1.9105, "step": 322840 }, { "epoch": 27.855072463768117, "grad_norm": 0.5117209553718567, "learning_rate": 0.001, "loss": 1.9258, "step": 322896 }, { "epoch": 27.85990338164251, "grad_norm": 0.629283607006073, "learning_rate": 0.001, "loss": 1.9318, "step": 322952 }, { "epoch": 27.864734299516908, "grad_norm": 0.5136920809745789, "learning_rate": 0.001, "loss": 1.9101, "step": 323008 }, { "epoch": 27.869565217391305, "grad_norm": 1.8003183603286743, "learning_rate": 0.001, "loss": 1.9257, "step": 323064 }, { "epoch": 27.8743961352657, "grad_norm": 0.9285981059074402, "learning_rate": 0.001, "loss": 1.9012, "step": 323120 }, { "epoch": 27.879227053140095, "grad_norm": 0.8813960552215576, "learning_rate": 0.001, "loss": 1.9041, "step": 323176 }, { "epoch": 27.884057971014492, "grad_norm": 0.6235100626945496, "learning_rate": 0.001, "loss": 1.9007, "step": 323232 }, { "epoch": 27.88888888888889, "grad_norm": 0.3910762071609497, "learning_rate": 0.001, "loss": 1.9127, "step": 323288 }, { "epoch": 27.893719806763286, "grad_norm": 8.673907279968262, "learning_rate": 0.001, "loss": 1.9082, "step": 323344 }, { "epoch": 27.89855072463768, "grad_norm": 3.1357994079589844, "learning_rate": 0.001, "loss": 1.9214, "step": 323400 }, { "epoch": 27.903381642512077, "grad_norm": 0.3850042223930359, "learning_rate": 0.001, "loss": 1.9151, "step": 323456 }, { "epoch": 27.908212560386474, "grad_norm": 0.30180904269218445, "learning_rate": 0.001, "loss": 1.9371, "step": 323512 }, { "epoch": 27.91304347826087, "grad_norm": 0.6604330539703369, "learning_rate": 0.001, "loss": 1.9429, "step": 323568 }, { "epoch": 27.917874396135264, "grad_norm": 0.5030882358551025, "learning_rate": 0.001, "loss": 1.9253, "step": 323624 }, { "epoch": 27.92270531400966, "grad_norm": 0.5363969206809998, "learning_rate": 0.001, "loss": 1.9049, "step": 323680 }, { "epoch": 27.92753623188406, "grad_norm": 1.0833579301834106, "learning_rate": 0.001, "loss": 1.9076, "step": 323736 }, { "epoch": 27.932367149758456, "grad_norm": 2.429192066192627, "learning_rate": 0.001, "loss": 1.906, "step": 323792 }, { "epoch": 27.93719806763285, "grad_norm": 2.371067762374878, "learning_rate": 0.001, "loss": 1.8969, "step": 323848 }, { "epoch": 27.942028985507246, "grad_norm": 0.9529378414154053, "learning_rate": 0.001, "loss": 1.9039, "step": 323904 }, { "epoch": 27.946859903381643, "grad_norm": 1.3829976320266724, "learning_rate": 0.001, "loss": 1.9014, "step": 323960 }, { "epoch": 27.95169082125604, "grad_norm": 14.774099349975586, "learning_rate": 0.001, "loss": 1.9239, "step": 324016 }, { "epoch": 27.956521739130434, "grad_norm": 18.525075912475586, "learning_rate": 0.001, "loss": 1.9404, "step": 324072 }, { "epoch": 27.96135265700483, "grad_norm": 0.5167252421379089, "learning_rate": 0.001, "loss": 1.9476, "step": 324128 }, { "epoch": 27.966183574879228, "grad_norm": 0.6662850975990295, "learning_rate": 0.001, "loss": 1.9395, "step": 324184 }, { "epoch": 27.971014492753625, "grad_norm": 0.41508254408836365, "learning_rate": 0.001, "loss": 1.9303, "step": 324240 }, { "epoch": 27.97584541062802, "grad_norm": 0.5331782102584839, "learning_rate": 0.001, "loss": 1.9173, "step": 324296 }, { "epoch": 27.980676328502415, "grad_norm": 3.6407034397125244, "learning_rate": 0.001, "loss": 1.9082, "step": 324352 }, { "epoch": 27.985507246376812, "grad_norm": 122.23438262939453, "learning_rate": 0.001, "loss": 1.9093, "step": 324408 }, { "epoch": 27.990338164251206, "grad_norm": 1.300862431526184, "learning_rate": 0.001, "loss": 1.9166, "step": 324464 }, { "epoch": 27.995169082125603, "grad_norm": 0.6456590294837952, "learning_rate": 0.001, "loss": 1.911, "step": 324520 }, { "epoch": 28.0, "grad_norm": 0.552609920501709, "learning_rate": 0.001, "loss": 1.9095, "step": 324576 }, { "epoch": 28.004830917874397, "grad_norm": 0.48888495564460754, "learning_rate": 0.001, "loss": 1.876, "step": 324632 }, { "epoch": 28.00966183574879, "grad_norm": 0.342519074678421, "learning_rate": 0.001, "loss": 1.8614, "step": 324688 }, { "epoch": 28.014492753623188, "grad_norm": 0.4688197672367096, "learning_rate": 0.001, "loss": 1.8703, "step": 324744 }, { "epoch": 28.019323671497585, "grad_norm": 0.9068715572357178, "learning_rate": 0.001, "loss": 1.8726, "step": 324800 }, { "epoch": 28.02415458937198, "grad_norm": 1.2657779455184937, "learning_rate": 0.001, "loss": 1.854, "step": 324856 }, { "epoch": 28.028985507246375, "grad_norm": 2.6490018367767334, "learning_rate": 0.001, "loss": 1.8685, "step": 324912 }, { "epoch": 28.033816425120772, "grad_norm": 0.5517184734344482, "learning_rate": 0.001, "loss": 1.8668, "step": 324968 }, { "epoch": 28.03864734299517, "grad_norm": 0.4099219739437103, "learning_rate": 0.001, "loss": 1.8663, "step": 325024 }, { "epoch": 28.043478260869566, "grad_norm": 0.39857882261276245, "learning_rate": 0.001, "loss": 1.8627, "step": 325080 }, { "epoch": 28.04830917874396, "grad_norm": 1.229873538017273, "learning_rate": 0.001, "loss": 1.865, "step": 325136 }, { "epoch": 28.053140096618357, "grad_norm": 1.3178666830062866, "learning_rate": 0.001, "loss": 1.8775, "step": 325192 }, { "epoch": 28.057971014492754, "grad_norm": 0.30644679069519043, "learning_rate": 0.001, "loss": 1.8748, "step": 325248 }, { "epoch": 28.06280193236715, "grad_norm": 1.005143404006958, "learning_rate": 0.001, "loss": 1.8736, "step": 325304 }, { "epoch": 28.067632850241544, "grad_norm": 0.3653129041194916, "learning_rate": 0.001, "loss": 1.8693, "step": 325360 }, { "epoch": 28.07246376811594, "grad_norm": 0.3228726089000702, "learning_rate": 0.001, "loss": 1.88, "step": 325416 }, { "epoch": 28.07729468599034, "grad_norm": 0.5306240320205688, "learning_rate": 0.001, "loss": 1.878, "step": 325472 }, { "epoch": 28.082125603864736, "grad_norm": 0.4342239499092102, "learning_rate": 0.001, "loss": 1.8895, "step": 325528 }, { "epoch": 28.08695652173913, "grad_norm": 2.961655378341675, "learning_rate": 0.001, "loss": 1.8611, "step": 325584 }, { "epoch": 28.091787439613526, "grad_norm": 0.7887042760848999, "learning_rate": 0.001, "loss": 1.8565, "step": 325640 }, { "epoch": 28.096618357487923, "grad_norm": 0.8795722723007202, "learning_rate": 0.001, "loss": 1.8673, "step": 325696 }, { "epoch": 28.10144927536232, "grad_norm": 2.6997663974761963, "learning_rate": 0.001, "loss": 1.8657, "step": 325752 }, { "epoch": 28.106280193236714, "grad_norm": 1.6187169551849365, "learning_rate": 0.001, "loss": 1.8733, "step": 325808 }, { "epoch": 28.11111111111111, "grad_norm": 1.2055699825286865, "learning_rate": 0.001, "loss": 1.8615, "step": 325864 }, { "epoch": 28.115942028985508, "grad_norm": 1.928062915802002, "learning_rate": 0.001, "loss": 1.8659, "step": 325920 }, { "epoch": 28.120772946859905, "grad_norm": 0.7335609793663025, "learning_rate": 0.001, "loss": 1.8809, "step": 325976 }, { "epoch": 28.1256038647343, "grad_norm": 1.5227272510528564, "learning_rate": 0.001, "loss": 1.879, "step": 326032 }, { "epoch": 28.130434782608695, "grad_norm": 1.1515116691589355, "learning_rate": 0.001, "loss": 1.8723, "step": 326088 }, { "epoch": 28.135265700483092, "grad_norm": 1.439655065536499, "learning_rate": 0.001, "loss": 1.8743, "step": 326144 }, { "epoch": 28.14009661835749, "grad_norm": 1.2852096557617188, "learning_rate": 0.001, "loss": 1.8921, "step": 326200 }, { "epoch": 28.144927536231883, "grad_norm": 12.06866455078125, "learning_rate": 0.001, "loss": 1.9198, "step": 326256 }, { "epoch": 28.14975845410628, "grad_norm": 1.060186505317688, "learning_rate": 0.001, "loss": 1.9161, "step": 326312 }, { "epoch": 28.154589371980677, "grad_norm": 1.3918250799179077, "learning_rate": 0.001, "loss": 1.9106, "step": 326368 }, { "epoch": 28.159420289855074, "grad_norm": 1.0849374532699585, "learning_rate": 0.001, "loss": 1.9148, "step": 326424 }, { "epoch": 28.164251207729468, "grad_norm": 0.3700352907180786, "learning_rate": 0.001, "loss": 1.9047, "step": 326480 }, { "epoch": 28.169082125603865, "grad_norm": 0.35202285647392273, "learning_rate": 0.001, "loss": 1.9088, "step": 326536 }, { "epoch": 28.17391304347826, "grad_norm": 1.53982412815094, "learning_rate": 0.001, "loss": 1.8945, "step": 326592 }, { "epoch": 28.17874396135266, "grad_norm": 0.9972717761993408, "learning_rate": 0.001, "loss": 1.8972, "step": 326648 }, { "epoch": 28.183574879227052, "grad_norm": 0.42282775044441223, "learning_rate": 0.001, "loss": 1.8961, "step": 326704 }, { "epoch": 28.18840579710145, "grad_norm": 0.5036537647247314, "learning_rate": 0.001, "loss": 1.8912, "step": 326760 }, { "epoch": 28.193236714975846, "grad_norm": 2.0767226219177246, "learning_rate": 0.001, "loss": 1.872, "step": 326816 }, { "epoch": 28.19806763285024, "grad_norm": 0.701141893863678, "learning_rate": 0.001, "loss": 1.8734, "step": 326872 }, { "epoch": 28.202898550724637, "grad_norm": 7.2736430168151855, "learning_rate": 0.001, "loss": 1.8843, "step": 326928 }, { "epoch": 28.207729468599034, "grad_norm": 1.168703556060791, "learning_rate": 0.001, "loss": 1.8875, "step": 326984 }, { "epoch": 28.21256038647343, "grad_norm": 1.0168073177337646, "learning_rate": 0.001, "loss": 1.8939, "step": 327040 }, { "epoch": 28.217391304347824, "grad_norm": 1.061295747756958, "learning_rate": 0.001, "loss": 1.8919, "step": 327096 }, { "epoch": 28.22222222222222, "grad_norm": 0.37067949771881104, "learning_rate": 0.001, "loss": 1.9043, "step": 327152 }, { "epoch": 28.22705314009662, "grad_norm": 0.5680735111236572, "learning_rate": 0.001, "loss": 1.8882, "step": 327208 }, { "epoch": 28.231884057971016, "grad_norm": 0.5914512872695923, "learning_rate": 0.001, "loss": 1.899, "step": 327264 }, { "epoch": 28.23671497584541, "grad_norm": 0.32705751061439514, "learning_rate": 0.001, "loss": 1.8847, "step": 327320 }, { "epoch": 28.241545893719806, "grad_norm": 0.5657069683074951, "learning_rate": 0.001, "loss": 1.8913, "step": 327376 }, { "epoch": 28.246376811594203, "grad_norm": 0.7612531185150146, "learning_rate": 0.001, "loss": 1.8824, "step": 327432 }, { "epoch": 28.2512077294686, "grad_norm": 0.31469398736953735, "learning_rate": 0.001, "loss": 1.8834, "step": 327488 }, { "epoch": 28.256038647342994, "grad_norm": 0.5604250431060791, "learning_rate": 0.001, "loss": 1.8885, "step": 327544 }, { "epoch": 28.26086956521739, "grad_norm": 1.1248418092727661, "learning_rate": 0.001, "loss": 1.8849, "step": 327600 }, { "epoch": 28.265700483091788, "grad_norm": 0.9591240286827087, "learning_rate": 0.001, "loss": 1.8779, "step": 327656 }, { "epoch": 28.270531400966185, "grad_norm": 0.7204088568687439, "learning_rate": 0.001, "loss": 1.879, "step": 327712 }, { "epoch": 28.27536231884058, "grad_norm": 1.5862189531326294, "learning_rate": 0.001, "loss": 1.8714, "step": 327768 }, { "epoch": 28.280193236714975, "grad_norm": 0.3382035791873932, "learning_rate": 0.001, "loss": 1.8775, "step": 327824 }, { "epoch": 28.285024154589372, "grad_norm": 1.227964997291565, "learning_rate": 0.001, "loss": 1.8782, "step": 327880 }, { "epoch": 28.28985507246377, "grad_norm": 1.745741605758667, "learning_rate": 0.001, "loss": 1.8655, "step": 327936 }, { "epoch": 28.294685990338163, "grad_norm": 0.9000911116600037, "learning_rate": 0.001, "loss": 1.8908, "step": 327992 }, { "epoch": 28.29951690821256, "grad_norm": 1.3809490203857422, "learning_rate": 0.001, "loss": 1.8957, "step": 328048 }, { "epoch": 28.304347826086957, "grad_norm": 1.2855427265167236, "learning_rate": 0.001, "loss": 1.8882, "step": 328104 }, { "epoch": 28.309178743961354, "grad_norm": 0.4027520716190338, "learning_rate": 0.001, "loss": 1.8841, "step": 328160 }, { "epoch": 28.314009661835748, "grad_norm": 0.8023689389228821, "learning_rate": 0.001, "loss": 1.8759, "step": 328216 }, { "epoch": 28.318840579710145, "grad_norm": 1.133941411972046, "learning_rate": 0.001, "loss": 1.8827, "step": 328272 }, { "epoch": 28.32367149758454, "grad_norm": 1.4654099941253662, "learning_rate": 0.001, "loss": 1.8808, "step": 328328 }, { "epoch": 28.32850241545894, "grad_norm": 4.052917003631592, "learning_rate": 0.001, "loss": 1.8846, "step": 328384 }, { "epoch": 28.333333333333332, "grad_norm": 0.5777309536933899, "learning_rate": 0.001, "loss": 1.8817, "step": 328440 }, { "epoch": 28.33816425120773, "grad_norm": 1.7741546630859375, "learning_rate": 0.001, "loss": 1.886, "step": 328496 }, { "epoch": 28.342995169082126, "grad_norm": 0.8260049223899841, "learning_rate": 0.001, "loss": 1.8837, "step": 328552 }, { "epoch": 28.347826086956523, "grad_norm": 1.6373041868209839, "learning_rate": 0.001, "loss": 1.8979, "step": 328608 }, { "epoch": 28.352657004830917, "grad_norm": 1.4206730127334595, "learning_rate": 0.001, "loss": 1.8963, "step": 328664 }, { "epoch": 28.357487922705314, "grad_norm": 0.3405592143535614, "learning_rate": 0.001, "loss": 1.9033, "step": 328720 }, { "epoch": 28.36231884057971, "grad_norm": 0.41407832503318787, "learning_rate": 0.001, "loss": 1.9058, "step": 328776 }, { "epoch": 28.367149758454108, "grad_norm": 3.836137056350708, "learning_rate": 0.001, "loss": 1.8938, "step": 328832 }, { "epoch": 28.3719806763285, "grad_norm": 3.6969447135925293, "learning_rate": 0.001, "loss": 1.9057, "step": 328888 }, { "epoch": 28.3768115942029, "grad_norm": 0.4419362545013428, "learning_rate": 0.001, "loss": 1.9009, "step": 328944 }, { "epoch": 28.381642512077295, "grad_norm": 0.4576115608215332, "learning_rate": 0.001, "loss": 1.8921, "step": 329000 }, { "epoch": 28.386473429951693, "grad_norm": 0.9162690043449402, "learning_rate": 0.001, "loss": 1.8945, "step": 329056 }, { "epoch": 28.391304347826086, "grad_norm": 4.146164894104004, "learning_rate": 0.001, "loss": 1.8938, "step": 329112 }, { "epoch": 28.396135265700483, "grad_norm": 0.37345951795578003, "learning_rate": 0.001, "loss": 1.9001, "step": 329168 }, { "epoch": 28.40096618357488, "grad_norm": 0.4641714096069336, "learning_rate": 0.001, "loss": 1.8966, "step": 329224 }, { "epoch": 28.405797101449274, "grad_norm": 0.4535137116909027, "learning_rate": 0.001, "loss": 1.8874, "step": 329280 }, { "epoch": 28.41062801932367, "grad_norm": 13.033797264099121, "learning_rate": 0.001, "loss": 1.8892, "step": 329336 }, { "epoch": 28.415458937198068, "grad_norm": 2.3273427486419678, "learning_rate": 0.001, "loss": 1.8974, "step": 329392 }, { "epoch": 28.420289855072465, "grad_norm": 1.1021487712860107, "learning_rate": 0.001, "loss": 1.9011, "step": 329448 }, { "epoch": 28.42512077294686, "grad_norm": 0.4815601408481598, "learning_rate": 0.001, "loss": 1.8981, "step": 329504 }, { "epoch": 28.429951690821255, "grad_norm": 3.4632744789123535, "learning_rate": 0.001, "loss": 1.8985, "step": 329560 }, { "epoch": 28.434782608695652, "grad_norm": 1.6353886127471924, "learning_rate": 0.001, "loss": 1.8929, "step": 329616 }, { "epoch": 28.43961352657005, "grad_norm": 2.0833888053894043, "learning_rate": 0.001, "loss": 1.9026, "step": 329672 }, { "epoch": 28.444444444444443, "grad_norm": 0.8961399793624878, "learning_rate": 0.001, "loss": 1.8878, "step": 329728 }, { "epoch": 28.44927536231884, "grad_norm": 0.8229840397834778, "learning_rate": 0.001, "loss": 1.8843, "step": 329784 }, { "epoch": 28.454106280193237, "grad_norm": 0.7641089558601379, "learning_rate": 0.001, "loss": 1.8842, "step": 329840 }, { "epoch": 28.458937198067634, "grad_norm": 0.3715387284755707, "learning_rate": 0.001, "loss": 1.8747, "step": 329896 }, { "epoch": 28.463768115942027, "grad_norm": 0.4506649076938629, "learning_rate": 0.001, "loss": 1.8757, "step": 329952 }, { "epoch": 28.468599033816425, "grad_norm": 0.6041941046714783, "learning_rate": 0.001, "loss": 1.8805, "step": 330008 }, { "epoch": 28.47342995169082, "grad_norm": 0.7833155989646912, "learning_rate": 0.001, "loss": 1.8817, "step": 330064 }, { "epoch": 28.47826086956522, "grad_norm": 1.0198636054992676, "learning_rate": 0.001, "loss": 1.8776, "step": 330120 }, { "epoch": 28.483091787439612, "grad_norm": 3.5298590660095215, "learning_rate": 0.001, "loss": 1.8723, "step": 330176 }, { "epoch": 28.48792270531401, "grad_norm": 0.4761419892311096, "learning_rate": 0.001, "loss": 1.8945, "step": 330232 }, { "epoch": 28.492753623188406, "grad_norm": 0.4587728977203369, "learning_rate": 0.001, "loss": 1.8894, "step": 330288 }, { "epoch": 28.497584541062803, "grad_norm": 0.6684659123420715, "learning_rate": 0.001, "loss": 1.8915, "step": 330344 }, { "epoch": 28.502415458937197, "grad_norm": 2.46571946144104, "learning_rate": 0.001, "loss": 1.9051, "step": 330400 }, { "epoch": 28.507246376811594, "grad_norm": 0.9475992321968079, "learning_rate": 0.001, "loss": 1.9139, "step": 330456 }, { "epoch": 28.51207729468599, "grad_norm": 4.113898754119873, "learning_rate": 0.001, "loss": 1.9044, "step": 330512 }, { "epoch": 28.516908212560388, "grad_norm": 0.4505608081817627, "learning_rate": 0.001, "loss": 1.8958, "step": 330568 }, { "epoch": 28.52173913043478, "grad_norm": 3.4996960163116455, "learning_rate": 0.001, "loss": 1.8952, "step": 330624 }, { "epoch": 28.52657004830918, "grad_norm": 1.0719223022460938, "learning_rate": 0.001, "loss": 1.8986, "step": 330680 }, { "epoch": 28.531400966183575, "grad_norm": 2.0519521236419678, "learning_rate": 0.001, "loss": 1.8808, "step": 330736 }, { "epoch": 28.536231884057973, "grad_norm": 1.0359359979629517, "learning_rate": 0.001, "loss": 1.8871, "step": 330792 }, { "epoch": 28.541062801932366, "grad_norm": 0.663783848285675, "learning_rate": 0.001, "loss": 1.911, "step": 330848 }, { "epoch": 28.545893719806763, "grad_norm": 4.949044227600098, "learning_rate": 0.001, "loss": 1.9068, "step": 330904 }, { "epoch": 28.55072463768116, "grad_norm": 0.973168134689331, "learning_rate": 0.001, "loss": 1.9024, "step": 330960 }, { "epoch": 28.555555555555557, "grad_norm": 0.6336768865585327, "learning_rate": 0.001, "loss": 1.9015, "step": 331016 }, { "epoch": 28.56038647342995, "grad_norm": 0.3745986521244049, "learning_rate": 0.001, "loss": 1.9122, "step": 331072 }, { "epoch": 28.565217391304348, "grad_norm": 0.9017367362976074, "learning_rate": 0.001, "loss": 1.9213, "step": 331128 }, { "epoch": 28.570048309178745, "grad_norm": 2.0765271186828613, "learning_rate": 0.001, "loss": 1.9297, "step": 331184 }, { "epoch": 28.57487922705314, "grad_norm": 2.877708911895752, "learning_rate": 0.001, "loss": 1.9411, "step": 331240 }, { "epoch": 28.579710144927535, "grad_norm": 0.7322184443473816, "learning_rate": 0.001, "loss": 1.9461, "step": 331296 }, { "epoch": 28.584541062801932, "grad_norm": 1.447961449623108, "learning_rate": 0.001, "loss": 1.9644, "step": 331352 }, { "epoch": 28.58937198067633, "grad_norm": 3.7392313480377197, "learning_rate": 0.001, "loss": 1.9579, "step": 331408 }, { "epoch": 28.594202898550726, "grad_norm": 1.5417708158493042, "learning_rate": 0.001, "loss": 1.9651, "step": 331464 }, { "epoch": 28.59903381642512, "grad_norm": 3.817207098007202, "learning_rate": 0.001, "loss": 1.9485, "step": 331520 }, { "epoch": 28.603864734299517, "grad_norm": 1.1892389059066772, "learning_rate": 0.001, "loss": 1.9514, "step": 331576 }, { "epoch": 28.608695652173914, "grad_norm": 0.9001258611679077, "learning_rate": 0.001, "loss": 1.9462, "step": 331632 }, { "epoch": 28.613526570048307, "grad_norm": 0.9239190816879272, "learning_rate": 0.001, "loss": 1.9475, "step": 331688 }, { "epoch": 28.618357487922705, "grad_norm": 0.8705443143844604, "learning_rate": 0.001, "loss": 1.955, "step": 331744 }, { "epoch": 28.6231884057971, "grad_norm": 0.8598714470863342, "learning_rate": 0.001, "loss": 1.9529, "step": 331800 }, { "epoch": 28.6280193236715, "grad_norm": 1.9536629915237427, "learning_rate": 0.001, "loss": 1.9459, "step": 331856 }, { "epoch": 28.632850241545892, "grad_norm": 3.766209363937378, "learning_rate": 0.001, "loss": 1.9397, "step": 331912 }, { "epoch": 28.63768115942029, "grad_norm": 8.624201774597168, "learning_rate": 0.001, "loss": 1.9453, "step": 331968 }, { "epoch": 28.642512077294686, "grad_norm": 1.2410045862197876, "learning_rate": 0.001, "loss": 1.9286, "step": 332024 }, { "epoch": 28.647342995169083, "grad_norm": 0.8718456029891968, "learning_rate": 0.001, "loss": 1.9121, "step": 332080 }, { "epoch": 28.652173913043477, "grad_norm": 0.47467195987701416, "learning_rate": 0.001, "loss": 1.9191, "step": 332136 }, { "epoch": 28.657004830917874, "grad_norm": 1.4802100658416748, "learning_rate": 0.001, "loss": 1.9219, "step": 332192 }, { "epoch": 28.66183574879227, "grad_norm": 0.8133614659309387, "learning_rate": 0.001, "loss": 1.9296, "step": 332248 }, { "epoch": 28.666666666666668, "grad_norm": 0.6556243896484375, "learning_rate": 0.001, "loss": 1.9181, "step": 332304 }, { "epoch": 28.67149758454106, "grad_norm": 1.3882675170898438, "learning_rate": 0.001, "loss": 1.9167, "step": 332360 }, { "epoch": 28.67632850241546, "grad_norm": 1.3900033235549927, "learning_rate": 0.001, "loss": 1.9121, "step": 332416 }, { "epoch": 28.681159420289855, "grad_norm": 0.7995746731758118, "learning_rate": 0.001, "loss": 1.9226, "step": 332472 }, { "epoch": 28.685990338164252, "grad_norm": 1.2805215120315552, "learning_rate": 0.001, "loss": 1.9147, "step": 332528 }, { "epoch": 28.690821256038646, "grad_norm": 1.582929015159607, "learning_rate": 0.001, "loss": 1.9099, "step": 332584 }, { "epoch": 28.695652173913043, "grad_norm": 3.7897348403930664, "learning_rate": 0.001, "loss": 1.9108, "step": 332640 }, { "epoch": 28.70048309178744, "grad_norm": 1.4902665615081787, "learning_rate": 0.001, "loss": 1.9059, "step": 332696 }, { "epoch": 28.705314009661837, "grad_norm": 0.6879105567932129, "learning_rate": 0.001, "loss": 1.9088, "step": 332752 }, { "epoch": 28.71014492753623, "grad_norm": 1.7254632711410522, "learning_rate": 0.001, "loss": 1.9093, "step": 332808 }, { "epoch": 28.714975845410628, "grad_norm": 0.7516950964927673, "learning_rate": 0.001, "loss": 1.9107, "step": 332864 }, { "epoch": 28.719806763285025, "grad_norm": 0.908341646194458, "learning_rate": 0.001, "loss": 1.9118, "step": 332920 }, { "epoch": 28.72463768115942, "grad_norm": 1.1142560243606567, "learning_rate": 0.001, "loss": 1.9286, "step": 332976 }, { "epoch": 28.729468599033815, "grad_norm": 0.6670729517936707, "learning_rate": 0.001, "loss": 1.9168, "step": 333032 }, { "epoch": 28.734299516908212, "grad_norm": 2.884117364883423, "learning_rate": 0.001, "loss": 1.9323, "step": 333088 }, { "epoch": 28.73913043478261, "grad_norm": 1.8979073762893677, "learning_rate": 0.001, "loss": 1.933, "step": 333144 }, { "epoch": 28.743961352657006, "grad_norm": 1.1814138889312744, "learning_rate": 0.001, "loss": 1.9209, "step": 333200 }, { "epoch": 28.7487922705314, "grad_norm": 0.8391841650009155, "learning_rate": 0.001, "loss": 1.9272, "step": 333256 }, { "epoch": 28.753623188405797, "grad_norm": 1.2846533060073853, "learning_rate": 0.001, "loss": 1.9252, "step": 333312 }, { "epoch": 28.758454106280194, "grad_norm": 1.039368748664856, "learning_rate": 0.001, "loss": 1.9282, "step": 333368 }, { "epoch": 28.76328502415459, "grad_norm": 2.0995516777038574, "learning_rate": 0.001, "loss": 1.9112, "step": 333424 }, { "epoch": 28.768115942028984, "grad_norm": 2.147758960723877, "learning_rate": 0.001, "loss": 1.915, "step": 333480 }, { "epoch": 28.77294685990338, "grad_norm": 0.7746847867965698, "learning_rate": 0.001, "loss": 1.9089, "step": 333536 }, { "epoch": 28.77777777777778, "grad_norm": 1.3602650165557861, "learning_rate": 0.001, "loss": 1.9196, "step": 333592 }, { "epoch": 28.782608695652176, "grad_norm": 2.182260274887085, "learning_rate": 0.001, "loss": 1.9207, "step": 333648 }, { "epoch": 28.78743961352657, "grad_norm": 1.1203982830047607, "learning_rate": 0.001, "loss": 1.926, "step": 333704 }, { "epoch": 28.792270531400966, "grad_norm": 1.8124265670776367, "learning_rate": 0.001, "loss": 1.921, "step": 333760 }, { "epoch": 28.797101449275363, "grad_norm": 4.528639316558838, "learning_rate": 0.001, "loss": 1.9203, "step": 333816 }, { "epoch": 28.80193236714976, "grad_norm": 0.5545051693916321, "learning_rate": 0.001, "loss": 1.9088, "step": 333872 }, { "epoch": 28.806763285024154, "grad_norm": 1.0197714567184448, "learning_rate": 0.001, "loss": 1.9251, "step": 333928 }, { "epoch": 28.81159420289855, "grad_norm": 1.0554872751235962, "learning_rate": 0.001, "loss": 1.9231, "step": 333984 }, { "epoch": 28.816425120772948, "grad_norm": 1.9428313970565796, "learning_rate": 0.001, "loss": 1.9153, "step": 334040 }, { "epoch": 28.82125603864734, "grad_norm": 2.292418956756592, "learning_rate": 0.001, "loss": 1.9128, "step": 334096 }, { "epoch": 28.82608695652174, "grad_norm": 0.5901442766189575, "learning_rate": 0.001, "loss": 1.9239, "step": 334152 }, { "epoch": 28.830917874396135, "grad_norm": 5.516192436218262, "learning_rate": 0.001, "loss": 1.9144, "step": 334208 }, { "epoch": 28.835748792270532, "grad_norm": 0.5279197096824646, "learning_rate": 0.001, "loss": 1.9101, "step": 334264 }, { "epoch": 28.840579710144926, "grad_norm": 1.5054384469985962, "learning_rate": 0.001, "loss": 1.9062, "step": 334320 }, { "epoch": 28.845410628019323, "grad_norm": 1.7059355974197388, "learning_rate": 0.001, "loss": 1.9127, "step": 334376 }, { "epoch": 28.85024154589372, "grad_norm": 2.5135741233825684, "learning_rate": 0.001, "loss": 1.9142, "step": 334432 }, { "epoch": 28.855072463768117, "grad_norm": 2.1412508487701416, "learning_rate": 0.001, "loss": 1.9121, "step": 334488 }, { "epoch": 28.85990338164251, "grad_norm": 2.8041398525238037, "learning_rate": 0.001, "loss": 1.9137, "step": 334544 }, { "epoch": 28.864734299516908, "grad_norm": 2.011864423751831, "learning_rate": 0.001, "loss": 1.9112, "step": 334600 }, { "epoch": 28.869565217391305, "grad_norm": 2.4927730560302734, "learning_rate": 0.001, "loss": 1.9089, "step": 334656 }, { "epoch": 28.8743961352657, "grad_norm": 0.9902358651161194, "learning_rate": 0.001, "loss": 1.929, "step": 334712 }, { "epoch": 28.879227053140095, "grad_norm": 1.463372826576233, "learning_rate": 0.001, "loss": 1.9169, "step": 334768 }, { "epoch": 28.884057971014492, "grad_norm": 7.084128379821777, "learning_rate": 0.001, "loss": 1.9146, "step": 334824 }, { "epoch": 28.88888888888889, "grad_norm": 1.7624740600585938, "learning_rate": 0.001, "loss": 1.9153, "step": 334880 }, { "epoch": 28.893719806763286, "grad_norm": 0.9364974498748779, "learning_rate": 0.001, "loss": 1.9169, "step": 334936 }, { "epoch": 28.89855072463768, "grad_norm": 2.909557580947876, "learning_rate": 0.001, "loss": 1.9223, "step": 334992 }, { "epoch": 28.903381642512077, "grad_norm": 0.7098058462142944, "learning_rate": 0.001, "loss": 1.9218, "step": 335048 }, { "epoch": 28.908212560386474, "grad_norm": 1.919205904006958, "learning_rate": 0.001, "loss": 1.9243, "step": 335104 }, { "epoch": 28.91304347826087, "grad_norm": 1.7865612506866455, "learning_rate": 0.001, "loss": 1.9129, "step": 335160 }, { "epoch": 28.917874396135264, "grad_norm": 0.8979544639587402, "learning_rate": 0.001, "loss": 1.9037, "step": 335216 }, { "epoch": 28.92270531400966, "grad_norm": 1.3111666440963745, "learning_rate": 0.001, "loss": 1.9268, "step": 335272 }, { "epoch": 28.92753623188406, "grad_norm": 1.183286190032959, "learning_rate": 0.001, "loss": 1.9211, "step": 335328 }, { "epoch": 28.932367149758456, "grad_norm": 1.013069748878479, "learning_rate": 0.001, "loss": 1.9316, "step": 335384 }, { "epoch": 28.93719806763285, "grad_norm": 3.185436487197876, "learning_rate": 0.001, "loss": 1.9366, "step": 335440 }, { "epoch": 28.942028985507246, "grad_norm": 33.5592155456543, "learning_rate": 0.001, "loss": 1.937, "step": 335496 }, { "epoch": 28.946859903381643, "grad_norm": 0.46022623777389526, "learning_rate": 0.001, "loss": 1.9302, "step": 335552 }, { "epoch": 28.95169082125604, "grad_norm": 4.151989459991455, "learning_rate": 0.001, "loss": 1.9428, "step": 335608 }, { "epoch": 28.956521739130434, "grad_norm": 4.188902854919434, "learning_rate": 0.001, "loss": 1.9312, "step": 335664 }, { "epoch": 28.96135265700483, "grad_norm": 0.5021653175354004, "learning_rate": 0.001, "loss": 1.9347, "step": 335720 }, { "epoch": 28.966183574879228, "grad_norm": 2.4328694343566895, "learning_rate": 0.001, "loss": 1.9359, "step": 335776 }, { "epoch": 28.971014492753625, "grad_norm": 1.620774507522583, "learning_rate": 0.001, "loss": 1.9386, "step": 335832 }, { "epoch": 28.97584541062802, "grad_norm": 1.0298885107040405, "learning_rate": 0.001, "loss": 1.9337, "step": 335888 }, { "epoch": 28.980676328502415, "grad_norm": 11.728135108947754, "learning_rate": 0.001, "loss": 1.9359, "step": 335944 }, { "epoch": 28.985507246376812, "grad_norm": 1.0595448017120361, "learning_rate": 0.001, "loss": 1.9352, "step": 336000 }, { "epoch": 28.990338164251206, "grad_norm": 0.7428472638130188, "learning_rate": 0.001, "loss": 1.9311, "step": 336056 }, { "epoch": 28.995169082125603, "grad_norm": 1.257922649383545, "learning_rate": 0.001, "loss": 1.9244, "step": 336112 }, { "epoch": 29.0, "grad_norm": 0.6742544174194336, "learning_rate": 0.001, "loss": 1.9182, "step": 336168 }, { "epoch": 29.004830917874397, "grad_norm": 1.009490728378296, "learning_rate": 0.001, "loss": 1.89, "step": 336224 }, { "epoch": 29.00966183574879, "grad_norm": 4.219653129577637, "learning_rate": 0.001, "loss": 1.8905, "step": 336280 }, { "epoch": 29.014492753623188, "grad_norm": 1.5191633701324463, "learning_rate": 0.001, "loss": 1.8821, "step": 336336 }, { "epoch": 29.019323671497585, "grad_norm": 3.083433151245117, "learning_rate": 0.001, "loss": 1.8838, "step": 336392 }, { "epoch": 29.02415458937198, "grad_norm": 1.6324265003204346, "learning_rate": 0.001, "loss": 1.8678, "step": 336448 }, { "epoch": 29.028985507246375, "grad_norm": 0.5845301151275635, "learning_rate": 0.001, "loss": 1.8991, "step": 336504 }, { "epoch": 29.033816425120772, "grad_norm": 1.7075315713882446, "learning_rate": 0.001, "loss": 1.891, "step": 336560 }, { "epoch": 29.03864734299517, "grad_norm": 1.9950648546218872, "learning_rate": 0.001, "loss": 1.8889, "step": 336616 }, { "epoch": 29.043478260869566, "grad_norm": 0.8414875864982605, "learning_rate": 0.001, "loss": 1.8891, "step": 336672 }, { "epoch": 29.04830917874396, "grad_norm": 3.040708065032959, "learning_rate": 0.001, "loss": 1.8794, "step": 336728 }, { "epoch": 29.053140096618357, "grad_norm": 3.2586257457733154, "learning_rate": 0.001, "loss": 1.8813, "step": 336784 }, { "epoch": 29.057971014492754, "grad_norm": 1.0472962856292725, "learning_rate": 0.001, "loss": 1.8951, "step": 336840 }, { "epoch": 29.06280193236715, "grad_norm": 0.5597095489501953, "learning_rate": 0.001, "loss": 1.8785, "step": 336896 }, { "epoch": 29.067632850241544, "grad_norm": 1.75657057762146, "learning_rate": 0.001, "loss": 1.8737, "step": 336952 }, { "epoch": 29.07246376811594, "grad_norm": 0.984176516532898, "learning_rate": 0.001, "loss": 1.8753, "step": 337008 }, { "epoch": 29.07729468599034, "grad_norm": 1.0896435976028442, "learning_rate": 0.001, "loss": 1.8789, "step": 337064 }, { "epoch": 29.082125603864736, "grad_norm": 2.123342514038086, "learning_rate": 0.001, "loss": 1.8962, "step": 337120 }, { "epoch": 29.08695652173913, "grad_norm": 2.675179958343506, "learning_rate": 0.001, "loss": 1.9052, "step": 337176 }, { "epoch": 29.091787439613526, "grad_norm": 2.3416190147399902, "learning_rate": 0.001, "loss": 1.9163, "step": 337232 }, { "epoch": 29.096618357487923, "grad_norm": 1.5199639797210693, "learning_rate": 0.001, "loss": 1.9049, "step": 337288 }, { "epoch": 29.10144927536232, "grad_norm": 1.455733060836792, "learning_rate": 0.001, "loss": 1.9034, "step": 337344 }, { "epoch": 29.106280193236714, "grad_norm": 2.525827407836914, "learning_rate": 0.001, "loss": 1.8952, "step": 337400 }, { "epoch": 29.11111111111111, "grad_norm": 1.190516710281372, "learning_rate": 0.001, "loss": 1.8971, "step": 337456 }, { "epoch": 29.115942028985508, "grad_norm": 2.2384257316589355, "learning_rate": 0.001, "loss": 1.9018, "step": 337512 }, { "epoch": 29.120772946859905, "grad_norm": 6.540494918823242, "learning_rate": 0.001, "loss": 1.9068, "step": 337568 }, { "epoch": 29.1256038647343, "grad_norm": 1.9433166980743408, "learning_rate": 0.001, "loss": 1.8958, "step": 337624 }, { "epoch": 29.130434782608695, "grad_norm": 11.21065616607666, "learning_rate": 0.001, "loss": 1.8962, "step": 337680 }, { "epoch": 29.135265700483092, "grad_norm": 1.9401861429214478, "learning_rate": 0.001, "loss": 1.906, "step": 337736 }, { "epoch": 29.14009661835749, "grad_norm": 3.240396022796631, "learning_rate": 0.001, "loss": 1.911, "step": 337792 }, { "epoch": 29.144927536231883, "grad_norm": 2.1199026107788086, "learning_rate": 0.001, "loss": 1.9171, "step": 337848 }, { "epoch": 29.14975845410628, "grad_norm": 3.307455539703369, "learning_rate": 0.001, "loss": 1.9178, "step": 337904 }, { "epoch": 29.154589371980677, "grad_norm": 1.1222668886184692, "learning_rate": 0.001, "loss": 1.9154, "step": 337960 }, { "epoch": 29.159420289855074, "grad_norm": 2.8536243438720703, "learning_rate": 0.001, "loss": 1.9155, "step": 338016 }, { "epoch": 29.164251207729468, "grad_norm": 1.1358243227005005, "learning_rate": 0.001, "loss": 1.9013, "step": 338072 }, { "epoch": 29.169082125603865, "grad_norm": 7.387932300567627, "learning_rate": 0.001, "loss": 1.896, "step": 338128 }, { "epoch": 29.17391304347826, "grad_norm": 2.918789863586426, "learning_rate": 0.001, "loss": 1.8959, "step": 338184 }, { "epoch": 29.17874396135266, "grad_norm": 2.7696633338928223, "learning_rate": 0.001, "loss": 1.897, "step": 338240 }, { "epoch": 29.183574879227052, "grad_norm": 1.3820921182632446, "learning_rate": 0.001, "loss": 1.9035, "step": 338296 }, { "epoch": 29.18840579710145, "grad_norm": 1.6911768913269043, "learning_rate": 0.001, "loss": 1.9121, "step": 338352 }, { "epoch": 29.193236714975846, "grad_norm": 2.3849222660064697, "learning_rate": 0.001, "loss": 1.9035, "step": 338408 }, { "epoch": 29.19806763285024, "grad_norm": 1.1593035459518433, "learning_rate": 0.001, "loss": 1.9072, "step": 338464 }, { "epoch": 29.202898550724637, "grad_norm": 0.4673869013786316, "learning_rate": 0.001, "loss": 1.894, "step": 338520 }, { "epoch": 29.207729468599034, "grad_norm": 2.011094808578491, "learning_rate": 0.001, "loss": 1.8922, "step": 338576 }, { "epoch": 29.21256038647343, "grad_norm": 2.9301528930664062, "learning_rate": 0.001, "loss": 1.8851, "step": 338632 }, { "epoch": 29.217391304347824, "grad_norm": 1.7181782722473145, "learning_rate": 0.001, "loss": 1.8776, "step": 338688 }, { "epoch": 29.22222222222222, "grad_norm": 2.089735507965088, "learning_rate": 0.001, "loss": 1.8859, "step": 338744 }, { "epoch": 29.22705314009662, "grad_norm": 2.2208869457244873, "learning_rate": 0.001, "loss": 1.8775, "step": 338800 }, { "epoch": 29.231884057971016, "grad_norm": 1.1865978240966797, "learning_rate": 0.001, "loss": 1.8889, "step": 338856 }, { "epoch": 29.23671497584541, "grad_norm": 1.8734606504440308, "learning_rate": 0.001, "loss": 1.8968, "step": 338912 }, { "epoch": 29.241545893719806, "grad_norm": 1.4250744581222534, "learning_rate": 0.001, "loss": 1.8793, "step": 338968 }, { "epoch": 29.246376811594203, "grad_norm": 1.6634395122528076, "learning_rate": 0.001, "loss": 1.8738, "step": 339024 }, { "epoch": 29.2512077294686, "grad_norm": 1.0095605850219727, "learning_rate": 0.001, "loss": 1.8843, "step": 339080 }, { "epoch": 29.256038647342994, "grad_norm": 1.2453047037124634, "learning_rate": 0.001, "loss": 1.8859, "step": 339136 }, { "epoch": 29.26086956521739, "grad_norm": 0.4766087830066681, "learning_rate": 0.001, "loss": 1.8914, "step": 339192 }, { "epoch": 29.265700483091788, "grad_norm": 1.6427762508392334, "learning_rate": 0.001, "loss": 1.8923, "step": 339248 }, { "epoch": 29.270531400966185, "grad_norm": 0.8533511161804199, "learning_rate": 0.001, "loss": 1.8973, "step": 339304 }, { "epoch": 29.27536231884058, "grad_norm": 1.0850225687026978, "learning_rate": 0.001, "loss": 1.8962, "step": 339360 }, { "epoch": 29.280193236714975, "grad_norm": 4.645824432373047, "learning_rate": 0.001, "loss": 1.8973, "step": 339416 }, { "epoch": 29.285024154589372, "grad_norm": 0.6815570592880249, "learning_rate": 0.001, "loss": 1.8943, "step": 339472 }, { "epoch": 29.28985507246377, "grad_norm": 0.5917956829071045, "learning_rate": 0.001, "loss": 1.8901, "step": 339528 }, { "epoch": 29.294685990338163, "grad_norm": 1.2189338207244873, "learning_rate": 0.001, "loss": 1.8901, "step": 339584 }, { "epoch": 29.29951690821256, "grad_norm": 0.3623257875442505, "learning_rate": 0.001, "loss": 1.8851, "step": 339640 }, { "epoch": 29.304347826086957, "grad_norm": 0.5104504823684692, "learning_rate": 0.001, "loss": 1.8856, "step": 339696 }, { "epoch": 29.309178743961354, "grad_norm": 1.8637456893920898, "learning_rate": 0.001, "loss": 1.8844, "step": 339752 }, { "epoch": 29.314009661835748, "grad_norm": 0.6654226779937744, "learning_rate": 0.001, "loss": 1.8839, "step": 339808 }, { "epoch": 29.318840579710145, "grad_norm": 0.8700190782546997, "learning_rate": 0.001, "loss": 1.8826, "step": 339864 }, { "epoch": 29.32367149758454, "grad_norm": 5.933652400970459, "learning_rate": 0.001, "loss": 1.8873, "step": 339920 }, { "epoch": 29.32850241545894, "grad_norm": 0.34447595477104187, "learning_rate": 0.001, "loss": 1.88, "step": 339976 }, { "epoch": 29.333333333333332, "grad_norm": 0.4928751587867737, "learning_rate": 0.001, "loss": 1.8805, "step": 340032 }, { "epoch": 29.33816425120773, "grad_norm": 0.8375958800315857, "learning_rate": 0.001, "loss": 1.8931, "step": 340088 }, { "epoch": 29.342995169082126, "grad_norm": 0.7392526268959045, "learning_rate": 0.001, "loss": 1.8935, "step": 340144 }, { "epoch": 29.347826086956523, "grad_norm": 1.9347178936004639, "learning_rate": 0.001, "loss": 1.8905, "step": 340200 }, { "epoch": 29.352657004830917, "grad_norm": 1.0659995079040527, "learning_rate": 0.001, "loss": 1.8916, "step": 340256 }, { "epoch": 29.357487922705314, "grad_norm": 2.6487209796905518, "learning_rate": 0.001, "loss": 1.8782, "step": 340312 }, { "epoch": 29.36231884057971, "grad_norm": 4.568159103393555, "learning_rate": 0.001, "loss": 1.8849, "step": 340368 }, { "epoch": 29.367149758454108, "grad_norm": 1.2360894680023193, "learning_rate": 0.001, "loss": 1.8862, "step": 340424 }, { "epoch": 29.3719806763285, "grad_norm": 0.7626190781593323, "learning_rate": 0.001, "loss": 1.8963, "step": 340480 }, { "epoch": 29.3768115942029, "grad_norm": 0.8645307421684265, "learning_rate": 0.001, "loss": 1.8933, "step": 340536 }, { "epoch": 29.381642512077295, "grad_norm": 3.376204013824463, "learning_rate": 0.001, "loss": 1.9055, "step": 340592 }, { "epoch": 29.386473429951693, "grad_norm": 1.0030955076217651, "learning_rate": 0.001, "loss": 1.8906, "step": 340648 }, { "epoch": 29.391304347826086, "grad_norm": 1.2015458345413208, "learning_rate": 0.001, "loss": 1.8926, "step": 340704 }, { "epoch": 29.396135265700483, "grad_norm": 0.8266012668609619, "learning_rate": 0.001, "loss": 1.9014, "step": 340760 }, { "epoch": 29.40096618357488, "grad_norm": 1.9911928176879883, "learning_rate": 0.001, "loss": 1.9034, "step": 340816 }, { "epoch": 29.405797101449274, "grad_norm": 1.5426700115203857, "learning_rate": 0.001, "loss": 1.9141, "step": 340872 }, { "epoch": 29.41062801932367, "grad_norm": 2.6662533283233643, "learning_rate": 0.001, "loss": 1.9073, "step": 340928 }, { "epoch": 29.415458937198068, "grad_norm": 0.5447232723236084, "learning_rate": 0.001, "loss": 1.9055, "step": 340984 }, { "epoch": 29.420289855072465, "grad_norm": 1.1376993656158447, "learning_rate": 0.001, "loss": 1.903, "step": 341040 }, { "epoch": 29.42512077294686, "grad_norm": 0.9387388825416565, "learning_rate": 0.001, "loss": 1.9141, "step": 341096 }, { "epoch": 29.429951690821255, "grad_norm": 1.7440056800842285, "learning_rate": 0.001, "loss": 1.9295, "step": 341152 }, { "epoch": 29.434782608695652, "grad_norm": 16.031557083129883, "learning_rate": 0.001, "loss": 1.923, "step": 341208 }, { "epoch": 29.43961352657005, "grad_norm": 1.6006251573562622, "learning_rate": 0.001, "loss": 1.9351, "step": 341264 }, { "epoch": 29.444444444444443, "grad_norm": 1.1466755867004395, "learning_rate": 0.001, "loss": 1.9498, "step": 341320 }, { "epoch": 29.44927536231884, "grad_norm": 1.0718761682510376, "learning_rate": 0.001, "loss": 1.9389, "step": 341376 }, { "epoch": 29.454106280193237, "grad_norm": 2.1162822246551514, "learning_rate": 0.001, "loss": 1.9265, "step": 341432 }, { "epoch": 29.458937198067634, "grad_norm": 1.7780194282531738, "learning_rate": 0.001, "loss": 1.9323, "step": 341488 }, { "epoch": 29.463768115942027, "grad_norm": 1.4814939498901367, "learning_rate": 0.001, "loss": 1.926, "step": 341544 }, { "epoch": 29.468599033816425, "grad_norm": 0.4262562394142151, "learning_rate": 0.001, "loss": 1.9275, "step": 341600 }, { "epoch": 29.47342995169082, "grad_norm": 1.196614146232605, "learning_rate": 0.001, "loss": 1.912, "step": 341656 }, { "epoch": 29.47826086956522, "grad_norm": 1.90091872215271, "learning_rate": 0.001, "loss": 1.9183, "step": 341712 }, { "epoch": 29.483091787439612, "grad_norm": 8.82557487487793, "learning_rate": 0.001, "loss": 1.9232, "step": 341768 }, { "epoch": 29.48792270531401, "grad_norm": 1.1036425828933716, "learning_rate": 0.001, "loss": 1.9266, "step": 341824 }, { "epoch": 29.492753623188406, "grad_norm": 2.078120470046997, "learning_rate": 0.001, "loss": 1.9324, "step": 341880 }, { "epoch": 29.497584541062803, "grad_norm": 2.0313286781311035, "learning_rate": 0.001, "loss": 1.9103, "step": 341936 }, { "epoch": 29.502415458937197, "grad_norm": 1.5176509618759155, "learning_rate": 0.001, "loss": 1.9092, "step": 341992 }, { "epoch": 29.507246376811594, "grad_norm": 1.3919011354446411, "learning_rate": 0.001, "loss": 1.903, "step": 342048 }, { "epoch": 29.51207729468599, "grad_norm": 3.533022165298462, "learning_rate": 0.001, "loss": 1.9039, "step": 342104 }, { "epoch": 29.516908212560388, "grad_norm": 3.4404382705688477, "learning_rate": 0.001, "loss": 1.8989, "step": 342160 }, { "epoch": 29.52173913043478, "grad_norm": 2.2579283714294434, "learning_rate": 0.001, "loss": 1.9151, "step": 342216 }, { "epoch": 29.52657004830918, "grad_norm": 0.9830791354179382, "learning_rate": 0.001, "loss": 1.9163, "step": 342272 }, { "epoch": 29.531400966183575, "grad_norm": 0.9636391401290894, "learning_rate": 0.001, "loss": 1.9351, "step": 342328 }, { "epoch": 29.536231884057973, "grad_norm": 0.5918301343917847, "learning_rate": 0.001, "loss": 1.9315, "step": 342384 }, { "epoch": 29.541062801932366, "grad_norm": 0.9615982174873352, "learning_rate": 0.001, "loss": 1.9238, "step": 342440 }, { "epoch": 29.545893719806763, "grad_norm": 1.8380165100097656, "learning_rate": 0.001, "loss": 1.9444, "step": 342496 }, { "epoch": 29.55072463768116, "grad_norm": 1.1342823505401611, "learning_rate": 0.001, "loss": 1.9493, "step": 342552 }, { "epoch": 29.555555555555557, "grad_norm": 1.566544771194458, "learning_rate": 0.001, "loss": 1.946, "step": 342608 }, { "epoch": 29.56038647342995, "grad_norm": 1.7683236598968506, "learning_rate": 0.001, "loss": 1.9402, "step": 342664 }, { "epoch": 29.565217391304348, "grad_norm": 1.1402480602264404, "learning_rate": 0.001, "loss": 1.9474, "step": 342720 }, { "epoch": 29.570048309178745, "grad_norm": 2.59822678565979, "learning_rate": 0.001, "loss": 1.9611, "step": 342776 }, { "epoch": 29.57487922705314, "grad_norm": 3.0990357398986816, "learning_rate": 0.001, "loss": 1.9721, "step": 342832 }, { "epoch": 29.579710144927535, "grad_norm": 1.9563204050064087, "learning_rate": 0.001, "loss": 1.95, "step": 342888 }, { "epoch": 29.584541062801932, "grad_norm": 0.6378768086433411, "learning_rate": 0.001, "loss": 1.9502, "step": 342944 }, { "epoch": 29.58937198067633, "grad_norm": 1.0656732320785522, "learning_rate": 0.001, "loss": 1.9313, "step": 343000 }, { "epoch": 29.594202898550726, "grad_norm": 1.0148881673812866, "learning_rate": 0.001, "loss": 1.9236, "step": 343056 }, { "epoch": 29.59903381642512, "grad_norm": 1.0317550897598267, "learning_rate": 0.001, "loss": 1.9341, "step": 343112 }, { "epoch": 29.603864734299517, "grad_norm": 0.7687577605247498, "learning_rate": 0.001, "loss": 1.9461, "step": 343168 }, { "epoch": 29.608695652173914, "grad_norm": 4.537657737731934, "learning_rate": 0.001, "loss": 1.9362, "step": 343224 }, { "epoch": 29.613526570048307, "grad_norm": 0.9439715147018433, "learning_rate": 0.001, "loss": 1.9295, "step": 343280 }, { "epoch": 29.618357487922705, "grad_norm": 0.8581864833831787, "learning_rate": 0.001, "loss": 1.9238, "step": 343336 }, { "epoch": 29.6231884057971, "grad_norm": 2.6081976890563965, "learning_rate": 0.001, "loss": 1.9165, "step": 343392 }, { "epoch": 29.6280193236715, "grad_norm": 1.3460050821304321, "learning_rate": 0.001, "loss": 1.9331, "step": 343448 }, { "epoch": 29.632850241545892, "grad_norm": 1.081058382987976, "learning_rate": 0.001, "loss": 1.9358, "step": 343504 }, { "epoch": 29.63768115942029, "grad_norm": 0.7841014862060547, "learning_rate": 0.001, "loss": 1.9408, "step": 343560 }, { "epoch": 29.642512077294686, "grad_norm": 1.4917643070220947, "learning_rate": 0.001, "loss": 1.9363, "step": 343616 }, { "epoch": 29.647342995169083, "grad_norm": 1.572274923324585, "learning_rate": 0.001, "loss": 1.9367, "step": 343672 }, { "epoch": 29.652173913043477, "grad_norm": 0.934330403804779, "learning_rate": 0.001, "loss": 1.9428, "step": 343728 }, { "epoch": 29.657004830917874, "grad_norm": 0.8813834190368652, "learning_rate": 0.001, "loss": 1.9216, "step": 343784 }, { "epoch": 29.66183574879227, "grad_norm": 2.107917308807373, "learning_rate": 0.001, "loss": 1.9186, "step": 343840 }, { "epoch": 29.666666666666668, "grad_norm": 1.0549782514572144, "learning_rate": 0.001, "loss": 1.9178, "step": 343896 }, { "epoch": 29.67149758454106, "grad_norm": 1.2666670083999634, "learning_rate": 0.001, "loss": 1.9194, "step": 343952 }, { "epoch": 29.67632850241546, "grad_norm": 1.4690303802490234, "learning_rate": 0.001, "loss": 1.9263, "step": 344008 }, { "epoch": 29.681159420289855, "grad_norm": 1.3744804859161377, "learning_rate": 0.001, "loss": 1.9344, "step": 344064 }, { "epoch": 29.685990338164252, "grad_norm": 1.8718572854995728, "learning_rate": 0.001, "loss": 1.9297, "step": 344120 }, { "epoch": 29.690821256038646, "grad_norm": 12.43152904510498, "learning_rate": 0.001, "loss": 1.9405, "step": 344176 }, { "epoch": 29.695652173913043, "grad_norm": 0.6862295866012573, "learning_rate": 0.001, "loss": 1.9238, "step": 344232 }, { "epoch": 29.70048309178744, "grad_norm": 0.7376891374588013, "learning_rate": 0.001, "loss": 1.9292, "step": 344288 }, { "epoch": 29.705314009661837, "grad_norm": 0.7916767001152039, "learning_rate": 0.001, "loss": 1.9189, "step": 344344 }, { "epoch": 29.71014492753623, "grad_norm": 0.36574843525886536, "learning_rate": 0.001, "loss": 1.9151, "step": 344400 }, { "epoch": 29.714975845410628, "grad_norm": 11.602384567260742, "learning_rate": 0.001, "loss": 1.9124, "step": 344456 }, { "epoch": 29.719806763285025, "grad_norm": 1.1952251195907593, "learning_rate": 0.001, "loss": 1.9138, "step": 344512 }, { "epoch": 29.72463768115942, "grad_norm": 1.2968500852584839, "learning_rate": 0.001, "loss": 1.9076, "step": 344568 }, { "epoch": 29.729468599033815, "grad_norm": 1.144452452659607, "learning_rate": 0.001, "loss": 1.9113, "step": 344624 }, { "epoch": 29.734299516908212, "grad_norm": 0.7884884476661682, "learning_rate": 0.001, "loss": 1.9126, "step": 344680 }, { "epoch": 29.73913043478261, "grad_norm": 0.8450772762298584, "learning_rate": 0.001, "loss": 1.9022, "step": 344736 }, { "epoch": 29.743961352657006, "grad_norm": 1.585148572921753, "learning_rate": 0.001, "loss": 1.9075, "step": 344792 }, { "epoch": 29.7487922705314, "grad_norm": 0.9838790893554688, "learning_rate": 0.001, "loss": 1.8963, "step": 344848 }, { "epoch": 29.753623188405797, "grad_norm": 1.0778287649154663, "learning_rate": 0.001, "loss": 1.9072, "step": 344904 }, { "epoch": 29.758454106280194, "grad_norm": 1.2693387269973755, "learning_rate": 0.001, "loss": 1.9076, "step": 344960 }, { "epoch": 29.76328502415459, "grad_norm": 0.7274543642997742, "learning_rate": 0.001, "loss": 1.9024, "step": 345016 }, { "epoch": 29.768115942028984, "grad_norm": 0.8931019306182861, "learning_rate": 0.001, "loss": 1.8958, "step": 345072 }, { "epoch": 29.77294685990338, "grad_norm": 0.5365729928016663, "learning_rate": 0.001, "loss": 1.9005, "step": 345128 }, { "epoch": 29.77777777777778, "grad_norm": 1.9573228359222412, "learning_rate": 0.001, "loss": 1.8966, "step": 345184 }, { "epoch": 29.782608695652176, "grad_norm": 3.15415620803833, "learning_rate": 0.001, "loss": 1.8948, "step": 345240 }, { "epoch": 29.78743961352657, "grad_norm": 0.9425359964370728, "learning_rate": 0.001, "loss": 1.89, "step": 345296 }, { "epoch": 29.792270531400966, "grad_norm": 2.3333144187927246, "learning_rate": 0.001, "loss": 1.9043, "step": 345352 }, { "epoch": 29.797101449275363, "grad_norm": 0.5949890613555908, "learning_rate": 0.001, "loss": 1.8923, "step": 345408 }, { "epoch": 29.80193236714976, "grad_norm": 2.0462820529937744, "learning_rate": 0.001, "loss": 1.8947, "step": 345464 }, { "epoch": 29.806763285024154, "grad_norm": 1.4434350728988647, "learning_rate": 0.001, "loss": 1.8851, "step": 345520 }, { "epoch": 29.81159420289855, "grad_norm": 0.3378339111804962, "learning_rate": 0.001, "loss": 1.9045, "step": 345576 }, { "epoch": 29.816425120772948, "grad_norm": 0.5433062314987183, "learning_rate": 0.001, "loss": 1.8994, "step": 345632 }, { "epoch": 29.82125603864734, "grad_norm": 0.6677520871162415, "learning_rate": 0.001, "loss": 1.8991, "step": 345688 }, { "epoch": 29.82608695652174, "grad_norm": 1.1760334968566895, "learning_rate": 0.001, "loss": 1.8932, "step": 345744 }, { "epoch": 29.830917874396135, "grad_norm": 1.9096678495407104, "learning_rate": 0.001, "loss": 1.8949, "step": 345800 }, { "epoch": 29.835748792270532, "grad_norm": 0.9076570272445679, "learning_rate": 0.001, "loss": 1.8872, "step": 345856 }, { "epoch": 29.840579710144926, "grad_norm": 1.4037699699401855, "learning_rate": 0.001, "loss": 1.9043, "step": 345912 }, { "epoch": 29.845410628019323, "grad_norm": 1.0574721097946167, "learning_rate": 0.001, "loss": 1.9002, "step": 345968 }, { "epoch": 29.85024154589372, "grad_norm": 0.27883023023605347, "learning_rate": 0.001, "loss": 1.9067, "step": 346024 }, { "epoch": 29.855072463768117, "grad_norm": 2.7373135089874268, "learning_rate": 0.001, "loss": 1.9032, "step": 346080 }, { "epoch": 29.85990338164251, "grad_norm": 14.10302448272705, "learning_rate": 0.001, "loss": 1.9102, "step": 346136 }, { "epoch": 29.864734299516908, "grad_norm": 1.1213552951812744, "learning_rate": 0.001, "loss": 1.9097, "step": 346192 }, { "epoch": 29.869565217391305, "grad_norm": 0.5022969841957092, "learning_rate": 0.001, "loss": 1.9086, "step": 346248 }, { "epoch": 29.8743961352657, "grad_norm": 0.975473165512085, "learning_rate": 0.001, "loss": 1.9082, "step": 346304 }, { "epoch": 29.879227053140095, "grad_norm": 1.0287539958953857, "learning_rate": 0.001, "loss": 1.9083, "step": 346360 }, { "epoch": 29.884057971014492, "grad_norm": 2.4628407955169678, "learning_rate": 0.001, "loss": 1.9065, "step": 346416 }, { "epoch": 29.88888888888889, "grad_norm": 0.8880907297134399, "learning_rate": 0.001, "loss": 1.9124, "step": 346472 }, { "epoch": 29.893719806763286, "grad_norm": 0.37082454562187195, "learning_rate": 0.001, "loss": 1.9023, "step": 346528 }, { "epoch": 29.89855072463768, "grad_norm": 0.33981502056121826, "learning_rate": 0.001, "loss": 1.9131, "step": 346584 }, { "epoch": 29.903381642512077, "grad_norm": 0.43468859791755676, "learning_rate": 0.001, "loss": 1.9056, "step": 346640 }, { "epoch": 29.908212560386474, "grad_norm": 0.5177057981491089, "learning_rate": 0.001, "loss": 1.9109, "step": 346696 }, { "epoch": 29.91304347826087, "grad_norm": 1.4313410520553589, "learning_rate": 0.001, "loss": 1.9183, "step": 346752 }, { "epoch": 29.917874396135264, "grad_norm": 0.34012916684150696, "learning_rate": 0.001, "loss": 1.9172, "step": 346808 }, { "epoch": 29.92270531400966, "grad_norm": 0.27256232500076294, "learning_rate": 0.001, "loss": 1.9007, "step": 346864 }, { "epoch": 29.92753623188406, "grad_norm": 0.3913356363773346, "learning_rate": 0.001, "loss": 1.8969, "step": 346920 }, { "epoch": 29.932367149758456, "grad_norm": 1.646673560142517, "learning_rate": 0.001, "loss": 1.9061, "step": 346976 }, { "epoch": 29.93719806763285, "grad_norm": 0.2729713022708893, "learning_rate": 0.001, "loss": 1.9055, "step": 347032 }, { "epoch": 29.942028985507246, "grad_norm": 1.602040410041809, "learning_rate": 0.001, "loss": 1.8903, "step": 347088 }, { "epoch": 29.946859903381643, "grad_norm": 0.4484712481498718, "learning_rate": 0.001, "loss": 1.8906, "step": 347144 }, { "epoch": 29.95169082125604, "grad_norm": 0.33455437421798706, "learning_rate": 0.001, "loss": 1.9013, "step": 347200 }, { "epoch": 29.956521739130434, "grad_norm": 0.701033890247345, "learning_rate": 0.001, "loss": 1.895, "step": 347256 }, { "epoch": 29.96135265700483, "grad_norm": 0.772212564945221, "learning_rate": 0.001, "loss": 1.8993, "step": 347312 }, { "epoch": 29.966183574879228, "grad_norm": 1.3854421377182007, "learning_rate": 0.001, "loss": 1.9001, "step": 347368 }, { "epoch": 29.971014492753625, "grad_norm": 2.3350648880004883, "learning_rate": 0.001, "loss": 1.8997, "step": 347424 }, { "epoch": 29.97584541062802, "grad_norm": 2.164705276489258, "learning_rate": 0.001, "loss": 1.8952, "step": 347480 }, { "epoch": 29.980676328502415, "grad_norm": 1.2127679586410522, "learning_rate": 0.001, "loss": 1.8991, "step": 347536 }, { "epoch": 29.985507246376812, "grad_norm": 0.6176809072494507, "learning_rate": 0.001, "loss": 1.8921, "step": 347592 }, { "epoch": 29.990338164251206, "grad_norm": 1.0814037322998047, "learning_rate": 0.001, "loss": 1.8922, "step": 347648 }, { "epoch": 29.995169082125603, "grad_norm": 0.7046548128128052, "learning_rate": 0.001, "loss": 1.8862, "step": 347704 }, { "epoch": 30.0, "grad_norm": 1.102046012878418, "learning_rate": 0.001, "loss": 1.9038, "step": 347760 }, { "epoch": 30.004830917874397, "grad_norm": 1.6245622634887695, "learning_rate": 0.001, "loss": 1.8512, "step": 347816 }, { "epoch": 30.00966183574879, "grad_norm": 0.7782652378082275, "learning_rate": 0.001, "loss": 1.8603, "step": 347872 }, { "epoch": 30.014492753623188, "grad_norm": 0.34809496998786926, "learning_rate": 0.001, "loss": 1.8611, "step": 347928 }, { "epoch": 30.019323671497585, "grad_norm": 1.3345814943313599, "learning_rate": 0.001, "loss": 1.8578, "step": 347984 }, { "epoch": 30.02415458937198, "grad_norm": 1.8830335140228271, "learning_rate": 0.001, "loss": 1.862, "step": 348040 }, { "epoch": 30.028985507246375, "grad_norm": 1.0652685165405273, "learning_rate": 0.001, "loss": 1.8553, "step": 348096 }, { "epoch": 30.033816425120772, "grad_norm": 0.6066794395446777, "learning_rate": 0.001, "loss": 1.8621, "step": 348152 }, { "epoch": 30.03864734299517, "grad_norm": 2.4736759662628174, "learning_rate": 0.001, "loss": 1.8608, "step": 348208 }, { "epoch": 30.043478260869566, "grad_norm": 6.71629524230957, "learning_rate": 0.001, "loss": 1.8629, "step": 348264 }, { "epoch": 30.04830917874396, "grad_norm": 2.431619882583618, "learning_rate": 0.001, "loss": 1.8613, "step": 348320 }, { "epoch": 30.053140096618357, "grad_norm": 2.8606183528900146, "learning_rate": 0.001, "loss": 1.861, "step": 348376 }, { "epoch": 30.057971014492754, "grad_norm": 0.47667524218559265, "learning_rate": 0.001, "loss": 1.8499, "step": 348432 }, { "epoch": 30.06280193236715, "grad_norm": 0.8094444274902344, "learning_rate": 0.001, "loss": 1.8586, "step": 348488 }, { "epoch": 30.067632850241544, "grad_norm": 0.7716014385223389, "learning_rate": 0.001, "loss": 1.8532, "step": 348544 }, { "epoch": 30.07246376811594, "grad_norm": 4.095464706420898, "learning_rate": 0.001, "loss": 1.872, "step": 348600 }, { "epoch": 30.07729468599034, "grad_norm": 8.32016658782959, "learning_rate": 0.001, "loss": 1.8802, "step": 348656 }, { "epoch": 30.082125603864736, "grad_norm": 1.7004462480545044, "learning_rate": 0.001, "loss": 1.8829, "step": 348712 }, { "epoch": 30.08695652173913, "grad_norm": 0.6757870316505432, "learning_rate": 0.001, "loss": 1.8762, "step": 348768 }, { "epoch": 30.091787439613526, "grad_norm": 0.509045422077179, "learning_rate": 0.001, "loss": 1.8807, "step": 348824 }, { "epoch": 30.096618357487923, "grad_norm": 0.9797523617744446, "learning_rate": 0.001, "loss": 1.8801, "step": 348880 }, { "epoch": 30.10144927536232, "grad_norm": 0.5945743322372437, "learning_rate": 0.001, "loss": 1.8641, "step": 348936 }, { "epoch": 30.106280193236714, "grad_norm": 5.183482646942139, "learning_rate": 0.001, "loss": 1.8767, "step": 348992 }, { "epoch": 30.11111111111111, "grad_norm": 1.6931238174438477, "learning_rate": 0.001, "loss": 1.8744, "step": 349048 }, { "epoch": 30.115942028985508, "grad_norm": 0.5609611868858337, "learning_rate": 0.001, "loss": 1.868, "step": 349104 }, { "epoch": 30.120772946859905, "grad_norm": 1.3362735509872437, "learning_rate": 0.001, "loss": 1.8645, "step": 349160 }, { "epoch": 30.1256038647343, "grad_norm": 0.9575352072715759, "learning_rate": 0.001, "loss": 1.87, "step": 349216 }, { "epoch": 30.130434782608695, "grad_norm": 0.5428534746170044, "learning_rate": 0.001, "loss": 1.8706, "step": 349272 }, { "epoch": 30.135265700483092, "grad_norm": 1.0157462358474731, "learning_rate": 0.001, "loss": 1.8687, "step": 349328 }, { "epoch": 30.14009661835749, "grad_norm": 0.9258418679237366, "learning_rate": 0.001, "loss": 1.8704, "step": 349384 }, { "epoch": 30.144927536231883, "grad_norm": 0.5394553542137146, "learning_rate": 0.001, "loss": 1.872, "step": 349440 }, { "epoch": 30.14975845410628, "grad_norm": 0.42806872725486755, "learning_rate": 0.001, "loss": 1.8705, "step": 349496 }, { "epoch": 30.154589371980677, "grad_norm": 0.7224445939064026, "learning_rate": 0.001, "loss": 1.8718, "step": 349552 }, { "epoch": 30.159420289855074, "grad_norm": 3.6242918968200684, "learning_rate": 0.001, "loss": 1.8635, "step": 349608 }, { "epoch": 30.164251207729468, "grad_norm": 1.0771609544754028, "learning_rate": 0.001, "loss": 1.8763, "step": 349664 }, { "epoch": 30.169082125603865, "grad_norm": 3.3896090984344482, "learning_rate": 0.001, "loss": 1.868, "step": 349720 }, { "epoch": 30.17391304347826, "grad_norm": 0.9373986124992371, "learning_rate": 0.001, "loss": 1.8735, "step": 349776 }, { "epoch": 30.17874396135266, "grad_norm": 0.6278557777404785, "learning_rate": 0.001, "loss": 1.8742, "step": 349832 }, { "epoch": 30.183574879227052, "grad_norm": 3.8788821697235107, "learning_rate": 0.001, "loss": 1.8658, "step": 349888 }, { "epoch": 30.18840579710145, "grad_norm": 2.0766570568084717, "learning_rate": 0.001, "loss": 1.8635, "step": 349944 }, { "epoch": 30.193236714975846, "grad_norm": 0.5334764719009399, "learning_rate": 0.001, "loss": 1.8727, "step": 350000 }, { "epoch": 30.19806763285024, "grad_norm": 1.8483541011810303, "learning_rate": 0.001, "loss": 1.8751, "step": 350056 }, { "epoch": 30.202898550724637, "grad_norm": 0.4267479479312897, "learning_rate": 0.001, "loss": 1.869, "step": 350112 }, { "epoch": 30.207729468599034, "grad_norm": 1.000412940979004, "learning_rate": 0.001, "loss": 1.8864, "step": 350168 }, { "epoch": 30.21256038647343, "grad_norm": 0.4552321135997772, "learning_rate": 0.001, "loss": 1.8743, "step": 350224 }, { "epoch": 30.217391304347824, "grad_norm": 1.0496025085449219, "learning_rate": 0.001, "loss": 1.885, "step": 350280 }, { "epoch": 30.22222222222222, "grad_norm": 1.0267835855484009, "learning_rate": 0.001, "loss": 1.8797, "step": 350336 }, { "epoch": 30.22705314009662, "grad_norm": 9.707250595092773, "learning_rate": 0.001, "loss": 1.8805, "step": 350392 }, { "epoch": 30.231884057971016, "grad_norm": 3.1955811977386475, "learning_rate": 0.001, "loss": 1.8958, "step": 350448 }, { "epoch": 30.23671497584541, "grad_norm": 1.7350873947143555, "learning_rate": 0.001, "loss": 1.8834, "step": 350504 }, { "epoch": 30.241545893719806, "grad_norm": 2.84063720703125, "learning_rate": 0.001, "loss": 1.8783, "step": 350560 }, { "epoch": 30.246376811594203, "grad_norm": 1.4558125734329224, "learning_rate": 0.001, "loss": 1.873, "step": 350616 }, { "epoch": 30.2512077294686, "grad_norm": 0.7567052245140076, "learning_rate": 0.001, "loss": 1.8766, "step": 350672 }, { "epoch": 30.256038647342994, "grad_norm": 5.560967445373535, "learning_rate": 0.001, "loss": 1.8841, "step": 350728 }, { "epoch": 30.26086956521739, "grad_norm": 2.511531352996826, "learning_rate": 0.001, "loss": 1.8727, "step": 350784 }, { "epoch": 30.265700483091788, "grad_norm": 0.6153662204742432, "learning_rate": 0.001, "loss": 1.8749, "step": 350840 }, { "epoch": 30.270531400966185, "grad_norm": 3.1324899196624756, "learning_rate": 0.001, "loss": 1.8813, "step": 350896 }, { "epoch": 30.27536231884058, "grad_norm": 1.6099321842193604, "learning_rate": 0.001, "loss": 1.8859, "step": 350952 }, { "epoch": 30.280193236714975, "grad_norm": 1.5619922876358032, "learning_rate": 0.001, "loss": 1.8909, "step": 351008 }, { "epoch": 30.285024154589372, "grad_norm": 1.7167088985443115, "learning_rate": 0.001, "loss": 1.8909, "step": 351064 }, { "epoch": 30.28985507246377, "grad_norm": 5.756916046142578, "learning_rate": 0.001, "loss": 1.8754, "step": 351120 }, { "epoch": 30.294685990338163, "grad_norm": 1.00927734375, "learning_rate": 0.001, "loss": 1.8785, "step": 351176 }, { "epoch": 30.29951690821256, "grad_norm": 1.4379228353500366, "learning_rate": 0.001, "loss": 1.8862, "step": 351232 }, { "epoch": 30.304347826086957, "grad_norm": 2.2918591499328613, "learning_rate": 0.001, "loss": 1.8808, "step": 351288 }, { "epoch": 30.309178743961354, "grad_norm": 0.5946013927459717, "learning_rate": 0.001, "loss": 1.8678, "step": 351344 }, { "epoch": 30.314009661835748, "grad_norm": 0.697689950466156, "learning_rate": 0.001, "loss": 1.874, "step": 351400 }, { "epoch": 30.318840579710145, "grad_norm": 1.8963210582733154, "learning_rate": 0.001, "loss": 1.8764, "step": 351456 }, { "epoch": 30.32367149758454, "grad_norm": 0.5150372982025146, "learning_rate": 0.001, "loss": 1.8758, "step": 351512 }, { "epoch": 30.32850241545894, "grad_norm": 2.926867961883545, "learning_rate": 0.001, "loss": 1.8891, "step": 351568 }, { "epoch": 30.333333333333332, "grad_norm": 0.6492698192596436, "learning_rate": 0.001, "loss": 1.8819, "step": 351624 }, { "epoch": 30.33816425120773, "grad_norm": 0.7006253600120544, "learning_rate": 0.001, "loss": 1.9088, "step": 351680 }, { "epoch": 30.342995169082126, "grad_norm": 0.3250187933444977, "learning_rate": 0.001, "loss": 1.9126, "step": 351736 }, { "epoch": 30.347826086956523, "grad_norm": 0.2729688584804535, "learning_rate": 0.001, "loss": 1.8987, "step": 351792 }, { "epoch": 30.352657004830917, "grad_norm": 1.5226668119430542, "learning_rate": 0.001, "loss": 1.8869, "step": 351848 }, { "epoch": 30.357487922705314, "grad_norm": 1.19586980342865, "learning_rate": 0.001, "loss": 1.8827, "step": 351904 }, { "epoch": 30.36231884057971, "grad_norm": 3.7458693981170654, "learning_rate": 0.001, "loss": 1.8731, "step": 351960 }, { "epoch": 30.367149758454108, "grad_norm": 5.488990306854248, "learning_rate": 0.001, "loss": 1.8755, "step": 352016 }, { "epoch": 30.3719806763285, "grad_norm": 0.9908542633056641, "learning_rate": 0.001, "loss": 1.8871, "step": 352072 }, { "epoch": 30.3768115942029, "grad_norm": 0.4259231686592102, "learning_rate": 0.001, "loss": 1.8784, "step": 352128 }, { "epoch": 30.381642512077295, "grad_norm": 0.45330068469047546, "learning_rate": 0.001, "loss": 1.8681, "step": 352184 }, { "epoch": 30.386473429951693, "grad_norm": 0.4506126642227173, "learning_rate": 0.001, "loss": 1.879, "step": 352240 }, { "epoch": 30.391304347826086, "grad_norm": 0.3546209931373596, "learning_rate": 0.001, "loss": 1.8691, "step": 352296 }, { "epoch": 30.396135265700483, "grad_norm": 0.3133753836154938, "learning_rate": 0.001, "loss": 1.8756, "step": 352352 }, { "epoch": 30.40096618357488, "grad_norm": 2.1002237796783447, "learning_rate": 0.001, "loss": 1.8736, "step": 352408 }, { "epoch": 30.405797101449274, "grad_norm": 0.5384484529495239, "learning_rate": 0.001, "loss": 1.8731, "step": 352464 }, { "epoch": 30.41062801932367, "grad_norm": 0.9713990092277527, "learning_rate": 0.001, "loss": 1.865, "step": 352520 }, { "epoch": 30.415458937198068, "grad_norm": 0.6886662244796753, "learning_rate": 0.001, "loss": 1.87, "step": 352576 }, { "epoch": 30.420289855072465, "grad_norm": 3.8965535163879395, "learning_rate": 0.001, "loss": 1.8748, "step": 352632 }, { "epoch": 30.42512077294686, "grad_norm": 0.39110586047172546, "learning_rate": 0.001, "loss": 1.8769, "step": 352688 }, { "epoch": 30.429951690821255, "grad_norm": 0.5718410611152649, "learning_rate": 0.001, "loss": 1.896, "step": 352744 }, { "epoch": 30.434782608695652, "grad_norm": 1.0877954959869385, "learning_rate": 0.001, "loss": 1.905, "step": 352800 }, { "epoch": 30.43961352657005, "grad_norm": 0.6660036444664001, "learning_rate": 0.001, "loss": 1.9195, "step": 352856 }, { "epoch": 30.444444444444443, "grad_norm": 1.493395447731018, "learning_rate": 0.001, "loss": 1.911, "step": 352912 }, { "epoch": 30.44927536231884, "grad_norm": 0.8233058452606201, "learning_rate": 0.001, "loss": 1.8915, "step": 352968 }, { "epoch": 30.454106280193237, "grad_norm": 1.0065187215805054, "learning_rate": 0.001, "loss": 1.9017, "step": 353024 }, { "epoch": 30.458937198067634, "grad_norm": 1.1533045768737793, "learning_rate": 0.001, "loss": 1.899, "step": 353080 }, { "epoch": 30.463768115942027, "grad_norm": 0.5408811569213867, "learning_rate": 0.001, "loss": 1.8964, "step": 353136 }, { "epoch": 30.468599033816425, "grad_norm": 2.091660499572754, "learning_rate": 0.001, "loss": 1.8864, "step": 353192 }, { "epoch": 30.47342995169082, "grad_norm": 1.5549612045288086, "learning_rate": 0.001, "loss": 1.8882, "step": 353248 }, { "epoch": 30.47826086956522, "grad_norm": 3.646867036819458, "learning_rate": 0.001, "loss": 1.8833, "step": 353304 }, { "epoch": 30.483091787439612, "grad_norm": 0.6905511021614075, "learning_rate": 0.001, "loss": 1.8953, "step": 353360 }, { "epoch": 30.48792270531401, "grad_norm": 7.458583354949951, "learning_rate": 0.001, "loss": 1.8971, "step": 353416 }, { "epoch": 30.492753623188406, "grad_norm": 3.6840155124664307, "learning_rate": 0.001, "loss": 1.8882, "step": 353472 }, { "epoch": 30.497584541062803, "grad_norm": 1.934524655342102, "learning_rate": 0.001, "loss": 1.8886, "step": 353528 }, { "epoch": 30.502415458937197, "grad_norm": 0.48761430382728577, "learning_rate": 0.001, "loss": 1.9019, "step": 353584 }, { "epoch": 30.507246376811594, "grad_norm": 1.849694013595581, "learning_rate": 0.001, "loss": 1.8822, "step": 353640 }, { "epoch": 30.51207729468599, "grad_norm": 1.0457483530044556, "learning_rate": 0.001, "loss": 1.8803, "step": 353696 }, { "epoch": 30.516908212560388, "grad_norm": 19.32866096496582, "learning_rate": 0.001, "loss": 1.8894, "step": 353752 }, { "epoch": 30.52173913043478, "grad_norm": 1.1918922662734985, "learning_rate": 0.001, "loss": 1.8969, "step": 353808 }, { "epoch": 30.52657004830918, "grad_norm": 0.8435972332954407, "learning_rate": 0.001, "loss": 1.8975, "step": 353864 }, { "epoch": 30.531400966183575, "grad_norm": 2.300915479660034, "learning_rate": 0.001, "loss": 1.9051, "step": 353920 }, { "epoch": 30.536231884057973, "grad_norm": 1.3580238819122314, "learning_rate": 0.001, "loss": 1.9022, "step": 353976 }, { "epoch": 30.541062801932366, "grad_norm": 1.833304524421692, "learning_rate": 0.001, "loss": 1.9031, "step": 354032 }, { "epoch": 30.545893719806763, "grad_norm": 1.482807993888855, "learning_rate": 0.001, "loss": 1.9081, "step": 354088 }, { "epoch": 30.55072463768116, "grad_norm": 1.5001224279403687, "learning_rate": 0.001, "loss": 1.9025, "step": 354144 }, { "epoch": 30.555555555555557, "grad_norm": 0.3176400363445282, "learning_rate": 0.001, "loss": 1.8879, "step": 354200 }, { "epoch": 30.56038647342995, "grad_norm": 0.6192721724510193, "learning_rate": 0.001, "loss": 1.8907, "step": 354256 }, { "epoch": 30.565217391304348, "grad_norm": 14.206862449645996, "learning_rate": 0.001, "loss": 1.8897, "step": 354312 }, { "epoch": 30.570048309178745, "grad_norm": 6.536881923675537, "learning_rate": 0.001, "loss": 1.8784, "step": 354368 }, { "epoch": 30.57487922705314, "grad_norm": 1.5987311601638794, "learning_rate": 0.001, "loss": 1.8948, "step": 354424 }, { "epoch": 30.579710144927535, "grad_norm": 4.253265857696533, "learning_rate": 0.001, "loss": 1.8958, "step": 354480 }, { "epoch": 30.584541062801932, "grad_norm": 6.087608814239502, "learning_rate": 0.001, "loss": 1.8939, "step": 354536 }, { "epoch": 30.58937198067633, "grad_norm": 4.231012344360352, "learning_rate": 0.001, "loss": 1.8944, "step": 354592 }, { "epoch": 30.594202898550726, "grad_norm": 0.6379872560501099, "learning_rate": 0.001, "loss": 1.899, "step": 354648 }, { "epoch": 30.59903381642512, "grad_norm": 0.5166046619415283, "learning_rate": 0.001, "loss": 1.8932, "step": 354704 }, { "epoch": 30.603864734299517, "grad_norm": 1.4545706510543823, "learning_rate": 0.001, "loss": 1.8952, "step": 354760 }, { "epoch": 30.608695652173914, "grad_norm": 0.5502257943153381, "learning_rate": 0.001, "loss": 1.9034, "step": 354816 }, { "epoch": 30.613526570048307, "grad_norm": 1.8456518650054932, "learning_rate": 0.001, "loss": 1.9101, "step": 354872 }, { "epoch": 30.618357487922705, "grad_norm": 0.6801899075508118, "learning_rate": 0.001, "loss": 1.9055, "step": 354928 }, { "epoch": 30.6231884057971, "grad_norm": 3.9923291206359863, "learning_rate": 0.001, "loss": 1.9051, "step": 354984 }, { "epoch": 30.6280193236715, "grad_norm": 0.29150211811065674, "learning_rate": 0.001, "loss": 1.9011, "step": 355040 }, { "epoch": 30.632850241545892, "grad_norm": 1.7221652269363403, "learning_rate": 0.001, "loss": 1.8993, "step": 355096 }, { "epoch": 30.63768115942029, "grad_norm": 0.46745234727859497, "learning_rate": 0.001, "loss": 1.8943, "step": 355152 }, { "epoch": 30.642512077294686, "grad_norm": 0.4678397476673126, "learning_rate": 0.001, "loss": 1.8977, "step": 355208 }, { "epoch": 30.647342995169083, "grad_norm": 1.9967821836471558, "learning_rate": 0.001, "loss": 1.8905, "step": 355264 }, { "epoch": 30.652173913043477, "grad_norm": 3.109009265899658, "learning_rate": 0.001, "loss": 1.8916, "step": 355320 }, { "epoch": 30.657004830917874, "grad_norm": 0.9057146906852722, "learning_rate": 0.001, "loss": 1.8862, "step": 355376 }, { "epoch": 30.66183574879227, "grad_norm": 0.5076047778129578, "learning_rate": 0.001, "loss": 1.8879, "step": 355432 }, { "epoch": 30.666666666666668, "grad_norm": 0.2994730472564697, "learning_rate": 0.001, "loss": 1.8877, "step": 355488 }, { "epoch": 30.67149758454106, "grad_norm": 0.3178282380104065, "learning_rate": 0.001, "loss": 1.8893, "step": 355544 }, { "epoch": 30.67632850241546, "grad_norm": 1.4147961139678955, "learning_rate": 0.001, "loss": 1.8959, "step": 355600 }, { "epoch": 30.681159420289855, "grad_norm": 1.8748258352279663, "learning_rate": 0.001, "loss": 1.8867, "step": 355656 }, { "epoch": 30.685990338164252, "grad_norm": 0.29489418864250183, "learning_rate": 0.001, "loss": 1.8845, "step": 355712 }, { "epoch": 30.690821256038646, "grad_norm": 1.7525562047958374, "learning_rate": 0.001, "loss": 1.8845, "step": 355768 }, { "epoch": 30.695652173913043, "grad_norm": 0.5614147186279297, "learning_rate": 0.001, "loss": 1.882, "step": 355824 }, { "epoch": 30.70048309178744, "grad_norm": 2.368215322494507, "learning_rate": 0.001, "loss": 1.8813, "step": 355880 }, { "epoch": 30.705314009661837, "grad_norm": 0.9378824830055237, "learning_rate": 0.001, "loss": 1.8916, "step": 355936 }, { "epoch": 30.71014492753623, "grad_norm": 1.390376091003418, "learning_rate": 0.001, "loss": 1.8809, "step": 355992 }, { "epoch": 30.714975845410628, "grad_norm": 0.41881877183914185, "learning_rate": 0.001, "loss": 1.8822, "step": 356048 }, { "epoch": 30.719806763285025, "grad_norm": 0.39202064275741577, "learning_rate": 0.001, "loss": 1.8737, "step": 356104 }, { "epoch": 30.72463768115942, "grad_norm": 7.556109428405762, "learning_rate": 0.001, "loss": 1.8834, "step": 356160 }, { "epoch": 30.729468599033815, "grad_norm": 0.6123942136764526, "learning_rate": 0.001, "loss": 1.8846, "step": 356216 }, { "epoch": 30.734299516908212, "grad_norm": 0.8291679620742798, "learning_rate": 0.001, "loss": 1.901, "step": 356272 }, { "epoch": 30.73913043478261, "grad_norm": 1.5180034637451172, "learning_rate": 0.001, "loss": 1.8911, "step": 356328 }, { "epoch": 30.743961352657006, "grad_norm": 2.6228694915771484, "learning_rate": 0.001, "loss": 1.9048, "step": 356384 }, { "epoch": 30.7487922705314, "grad_norm": 2.0018997192382812, "learning_rate": 0.001, "loss": 1.8942, "step": 356440 }, { "epoch": 30.753623188405797, "grad_norm": 1.1679586172103882, "learning_rate": 0.001, "loss": 1.9056, "step": 356496 }, { "epoch": 30.758454106280194, "grad_norm": 0.3004785180091858, "learning_rate": 0.001, "loss": 1.9099, "step": 356552 }, { "epoch": 30.76328502415459, "grad_norm": 0.6034873127937317, "learning_rate": 0.001, "loss": 1.8933, "step": 356608 }, { "epoch": 30.768115942028984, "grad_norm": 3.404514789581299, "learning_rate": 0.001, "loss": 1.8922, "step": 356664 }, { "epoch": 30.77294685990338, "grad_norm": 0.37851274013519287, "learning_rate": 0.001, "loss": 1.8978, "step": 356720 }, { "epoch": 30.77777777777778, "grad_norm": 0.35049712657928467, "learning_rate": 0.001, "loss": 1.8948, "step": 356776 }, { "epoch": 30.782608695652176, "grad_norm": 1.1702537536621094, "learning_rate": 0.001, "loss": 1.8843, "step": 356832 }, { "epoch": 30.78743961352657, "grad_norm": 0.41968366503715515, "learning_rate": 0.001, "loss": 1.8823, "step": 356888 }, { "epoch": 30.792270531400966, "grad_norm": 18.203081130981445, "learning_rate": 0.001, "loss": 1.8966, "step": 356944 }, { "epoch": 30.797101449275363, "grad_norm": 1.1103543043136597, "learning_rate": 0.001, "loss": 1.892, "step": 357000 }, { "epoch": 30.80193236714976, "grad_norm": 0.7177728414535522, "learning_rate": 0.001, "loss": 1.8938, "step": 357056 }, { "epoch": 30.806763285024154, "grad_norm": 1.4654901027679443, "learning_rate": 0.001, "loss": 1.8705, "step": 357112 }, { "epoch": 30.81159420289855, "grad_norm": 0.3273821175098419, "learning_rate": 0.001, "loss": 1.8857, "step": 357168 }, { "epoch": 30.816425120772948, "grad_norm": 0.5282752513885498, "learning_rate": 0.001, "loss": 1.8901, "step": 357224 }, { "epoch": 30.82125603864734, "grad_norm": 0.35609912872314453, "learning_rate": 0.001, "loss": 1.8864, "step": 357280 }, { "epoch": 30.82608695652174, "grad_norm": 0.7931268215179443, "learning_rate": 0.001, "loss": 1.8857, "step": 357336 }, { "epoch": 30.830917874396135, "grad_norm": 1.160841464996338, "learning_rate": 0.001, "loss": 1.8968, "step": 357392 }, { "epoch": 30.835748792270532, "grad_norm": 0.2842795252799988, "learning_rate": 0.001, "loss": 1.9003, "step": 357448 }, { "epoch": 30.840579710144926, "grad_norm": 0.33713632822036743, "learning_rate": 0.001, "loss": 1.8896, "step": 357504 }, { "epoch": 30.845410628019323, "grad_norm": 0.9215450286865234, "learning_rate": 0.001, "loss": 1.8765, "step": 357560 }, { "epoch": 30.85024154589372, "grad_norm": 0.43814125657081604, "learning_rate": 0.001, "loss": 1.8852, "step": 357616 }, { "epoch": 30.855072463768117, "grad_norm": 7.967289924621582, "learning_rate": 0.001, "loss": 1.885, "step": 357672 }, { "epoch": 30.85990338164251, "grad_norm": 1.269940972328186, "learning_rate": 0.001, "loss": 1.883, "step": 357728 }, { "epoch": 30.864734299516908, "grad_norm": 0.45210787653923035, "learning_rate": 0.001, "loss": 1.8877, "step": 357784 }, { "epoch": 30.869565217391305, "grad_norm": 1.2319374084472656, "learning_rate": 0.001, "loss": 1.8985, "step": 357840 }, { "epoch": 30.8743961352657, "grad_norm": 0.8083316683769226, "learning_rate": 0.001, "loss": 1.8862, "step": 357896 }, { "epoch": 30.879227053140095, "grad_norm": 4.0241570472717285, "learning_rate": 0.001, "loss": 1.8936, "step": 357952 }, { "epoch": 30.884057971014492, "grad_norm": 0.44804444909095764, "learning_rate": 0.001, "loss": 1.8871, "step": 358008 }, { "epoch": 30.88888888888889, "grad_norm": 1.57484769821167, "learning_rate": 0.001, "loss": 1.8819, "step": 358064 }, { "epoch": 30.893719806763286, "grad_norm": 5.863276481628418, "learning_rate": 0.001, "loss": 1.876, "step": 358120 }, { "epoch": 30.89855072463768, "grad_norm": 0.8706706762313843, "learning_rate": 0.001, "loss": 1.8819, "step": 358176 }, { "epoch": 30.903381642512077, "grad_norm": 2.754408121109009, "learning_rate": 0.001, "loss": 1.8798, "step": 358232 }, { "epoch": 30.908212560386474, "grad_norm": 1.2650333642959595, "learning_rate": 0.001, "loss": 1.8746, "step": 358288 }, { "epoch": 30.91304347826087, "grad_norm": 0.4831315875053406, "learning_rate": 0.001, "loss": 1.8843, "step": 358344 }, { "epoch": 30.917874396135264, "grad_norm": 0.5977744460105896, "learning_rate": 0.001, "loss": 1.8837, "step": 358400 }, { "epoch": 30.92270531400966, "grad_norm": 0.367818683385849, "learning_rate": 0.001, "loss": 1.8859, "step": 358456 }, { "epoch": 30.92753623188406, "grad_norm": 1.309637188911438, "learning_rate": 0.001, "loss": 1.8913, "step": 358512 }, { "epoch": 30.932367149758456, "grad_norm": 0.42118874192237854, "learning_rate": 0.001, "loss": 1.8908, "step": 358568 }, { "epoch": 30.93719806763285, "grad_norm": 0.9080486297607422, "learning_rate": 0.001, "loss": 1.8866, "step": 358624 }, { "epoch": 30.942028985507246, "grad_norm": 1.2292617559432983, "learning_rate": 0.001, "loss": 1.8896, "step": 358680 }, { "epoch": 30.946859903381643, "grad_norm": 1.4693320989608765, "learning_rate": 0.001, "loss": 1.8889, "step": 358736 }, { "epoch": 30.95169082125604, "grad_norm": 0.9383873343467712, "learning_rate": 0.001, "loss": 1.8832, "step": 358792 }, { "epoch": 30.956521739130434, "grad_norm": 0.3918863832950592, "learning_rate": 0.001, "loss": 1.8807, "step": 358848 }, { "epoch": 30.96135265700483, "grad_norm": 0.7786741256713867, "learning_rate": 0.001, "loss": 1.8793, "step": 358904 }, { "epoch": 30.966183574879228, "grad_norm": 0.6408087015151978, "learning_rate": 0.001, "loss": 1.879, "step": 358960 }, { "epoch": 30.971014492753625, "grad_norm": 1.3860994577407837, "learning_rate": 0.001, "loss": 1.8866, "step": 359016 }, { "epoch": 30.97584541062802, "grad_norm": 0.4226723909378052, "learning_rate": 0.001, "loss": 1.8848, "step": 359072 }, { "epoch": 30.980676328502415, "grad_norm": 0.40830492973327637, "learning_rate": 0.001, "loss": 1.8828, "step": 359128 }, { "epoch": 30.985507246376812, "grad_norm": 2.3204100131988525, "learning_rate": 0.001, "loss": 1.879, "step": 359184 }, { "epoch": 30.990338164251206, "grad_norm": 4.263381481170654, "learning_rate": 0.001, "loss": 1.8649, "step": 359240 }, { "epoch": 30.995169082125603, "grad_norm": 0.6153120994567871, "learning_rate": 0.001, "loss": 1.8724, "step": 359296 }, { "epoch": 31.0, "grad_norm": 0.5124632120132446, "learning_rate": 0.001, "loss": 1.8631, "step": 359352 }, { "epoch": 31.004830917874397, "grad_norm": 0.7665235996246338, "learning_rate": 0.001, "loss": 1.8441, "step": 359408 }, { "epoch": 31.00966183574879, "grad_norm": 3.1548423767089844, "learning_rate": 0.001, "loss": 1.8344, "step": 359464 }, { "epoch": 31.014492753623188, "grad_norm": 1.039751648902893, "learning_rate": 0.001, "loss": 1.8415, "step": 359520 }, { "epoch": 31.019323671497585, "grad_norm": 0.7947804927825928, "learning_rate": 0.001, "loss": 1.8436, "step": 359576 }, { "epoch": 31.02415458937198, "grad_norm": 0.4591917097568512, "learning_rate": 0.001, "loss": 1.8449, "step": 359632 }, { "epoch": 31.028985507246375, "grad_norm": 1.48048996925354, "learning_rate": 0.001, "loss": 1.8428, "step": 359688 }, { "epoch": 31.033816425120772, "grad_norm": 0.28117677569389343, "learning_rate": 0.001, "loss": 1.8507, "step": 359744 }, { "epoch": 31.03864734299517, "grad_norm": 3.1333396434783936, "learning_rate": 0.001, "loss": 1.8513, "step": 359800 }, { "epoch": 31.043478260869566, "grad_norm": 2.074233055114746, "learning_rate": 0.001, "loss": 1.8483, "step": 359856 }, { "epoch": 31.04830917874396, "grad_norm": 1.3762333393096924, "learning_rate": 0.001, "loss": 1.8438, "step": 359912 }, { "epoch": 31.053140096618357, "grad_norm": 0.8963009119033813, "learning_rate": 0.001, "loss": 1.8596, "step": 359968 }, { "epoch": 31.057971014492754, "grad_norm": 0.3135211765766144, "learning_rate": 0.001, "loss": 1.8676, "step": 360024 }, { "epoch": 31.06280193236715, "grad_norm": 2.031804323196411, "learning_rate": 0.001, "loss": 1.8837, "step": 360080 }, { "epoch": 31.067632850241544, "grad_norm": 0.8728108406066895, "learning_rate": 0.001, "loss": 1.8829, "step": 360136 }, { "epoch": 31.07246376811594, "grad_norm": 1.7144250869750977, "learning_rate": 0.001, "loss": 1.8794, "step": 360192 }, { "epoch": 31.07729468599034, "grad_norm": 0.5351441502571106, "learning_rate": 0.001, "loss": 1.8579, "step": 360248 }, { "epoch": 31.082125603864736, "grad_norm": 0.49805018305778503, "learning_rate": 0.001, "loss": 1.8494, "step": 360304 }, { "epoch": 31.08695652173913, "grad_norm": 0.5480920076370239, "learning_rate": 0.001, "loss": 1.8486, "step": 360360 }, { "epoch": 31.091787439613526, "grad_norm": 1.225471019744873, "learning_rate": 0.001, "loss": 1.8475, "step": 360416 }, { "epoch": 31.096618357487923, "grad_norm": 4.134900093078613, "learning_rate": 0.001, "loss": 1.8508, "step": 360472 }, { "epoch": 31.10144927536232, "grad_norm": 0.35351380705833435, "learning_rate": 0.001, "loss": 1.8529, "step": 360528 }, { "epoch": 31.106280193236714, "grad_norm": 3.017313003540039, "learning_rate": 0.001, "loss": 1.8399, "step": 360584 }, { "epoch": 31.11111111111111, "grad_norm": 0.5390178561210632, "learning_rate": 0.001, "loss": 1.8425, "step": 360640 }, { "epoch": 31.115942028985508, "grad_norm": 0.28583845496177673, "learning_rate": 0.001, "loss": 1.8476, "step": 360696 }, { "epoch": 31.120772946859905, "grad_norm": 0.38308778405189514, "learning_rate": 0.001, "loss": 1.8486, "step": 360752 }, { "epoch": 31.1256038647343, "grad_norm": 2.130467653274536, "learning_rate": 0.001, "loss": 1.8513, "step": 360808 }, { "epoch": 31.130434782608695, "grad_norm": 5.935608863830566, "learning_rate": 0.001, "loss": 1.8837, "step": 360864 }, { "epoch": 31.135265700483092, "grad_norm": 1.7721362113952637, "learning_rate": 0.001, "loss": 1.9122, "step": 360920 }, { "epoch": 31.14009661835749, "grad_norm": 2.543849468231201, "learning_rate": 0.001, "loss": 1.9048, "step": 360976 }, { "epoch": 31.144927536231883, "grad_norm": 2.250897169113159, "learning_rate": 0.001, "loss": 1.8928, "step": 361032 }, { "epoch": 31.14975845410628, "grad_norm": 1.2335624694824219, "learning_rate": 0.001, "loss": 1.8919, "step": 361088 }, { "epoch": 31.154589371980677, "grad_norm": 0.7922707200050354, "learning_rate": 0.001, "loss": 1.9071, "step": 361144 }, { "epoch": 31.159420289855074, "grad_norm": 6.928737640380859, "learning_rate": 0.001, "loss": 1.9078, "step": 361200 }, { "epoch": 31.164251207729468, "grad_norm": 1.592607021331787, "learning_rate": 0.001, "loss": 1.9179, "step": 361256 }, { "epoch": 31.169082125603865, "grad_norm": 1.4074281454086304, "learning_rate": 0.001, "loss": 1.9191, "step": 361312 }, { "epoch": 31.17391304347826, "grad_norm": 1.2853829860687256, "learning_rate": 0.001, "loss": 1.909, "step": 361368 }, { "epoch": 31.17874396135266, "grad_norm": 5.220868110656738, "learning_rate": 0.001, "loss": 1.8951, "step": 361424 }, { "epoch": 31.183574879227052, "grad_norm": 5.939455986022949, "learning_rate": 0.001, "loss": 1.8807, "step": 361480 }, { "epoch": 31.18840579710145, "grad_norm": 0.724431037902832, "learning_rate": 0.001, "loss": 1.8914, "step": 361536 }, { "epoch": 31.193236714975846, "grad_norm": 2.104313373565674, "learning_rate": 0.001, "loss": 1.8864, "step": 361592 }, { "epoch": 31.19806763285024, "grad_norm": 0.5674176216125488, "learning_rate": 0.001, "loss": 1.8791, "step": 361648 }, { "epoch": 31.202898550724637, "grad_norm": 1.9735665321350098, "learning_rate": 0.001, "loss": 1.8819, "step": 361704 }, { "epoch": 31.207729468599034, "grad_norm": 2.4828200340270996, "learning_rate": 0.001, "loss": 1.8917, "step": 361760 }, { "epoch": 31.21256038647343, "grad_norm": 2.5781335830688477, "learning_rate": 0.001, "loss": 1.8921, "step": 361816 }, { "epoch": 31.217391304347824, "grad_norm": 1.0383257865905762, "learning_rate": 0.001, "loss": 1.89, "step": 361872 }, { "epoch": 31.22222222222222, "grad_norm": 8.668288230895996, "learning_rate": 0.001, "loss": 1.8907, "step": 361928 }, { "epoch": 31.22705314009662, "grad_norm": 1.725039005279541, "learning_rate": 0.001, "loss": 1.8887, "step": 361984 }, { "epoch": 31.231884057971016, "grad_norm": 1.3334895372390747, "learning_rate": 0.001, "loss": 1.889, "step": 362040 }, { "epoch": 31.23671497584541, "grad_norm": 1.0550283193588257, "learning_rate": 0.001, "loss": 1.9006, "step": 362096 }, { "epoch": 31.241545893719806, "grad_norm": 0.9315765500068665, "learning_rate": 0.001, "loss": 1.9057, "step": 362152 }, { "epoch": 31.246376811594203, "grad_norm": 0.5694554448127747, "learning_rate": 0.001, "loss": 1.9001, "step": 362208 }, { "epoch": 31.2512077294686, "grad_norm": 4.522511959075928, "learning_rate": 0.001, "loss": 1.8935, "step": 362264 }, { "epoch": 31.256038647342994, "grad_norm": 0.9446779489517212, "learning_rate": 0.001, "loss": 1.8845, "step": 362320 }, { "epoch": 31.26086956521739, "grad_norm": 0.8478748798370361, "learning_rate": 0.001, "loss": 1.887, "step": 362376 }, { "epoch": 31.265700483091788, "grad_norm": 0.7540222406387329, "learning_rate": 0.001, "loss": 1.8911, "step": 362432 }, { "epoch": 31.270531400966185, "grad_norm": 1.1729736328125, "learning_rate": 0.001, "loss": 1.8813, "step": 362488 }, { "epoch": 31.27536231884058, "grad_norm": 0.775435745716095, "learning_rate": 0.001, "loss": 1.889, "step": 362544 }, { "epoch": 31.280193236714975, "grad_norm": 4.484166145324707, "learning_rate": 0.001, "loss": 1.8796, "step": 362600 }, { "epoch": 31.285024154589372, "grad_norm": 0.43832361698150635, "learning_rate": 0.001, "loss": 1.8937, "step": 362656 }, { "epoch": 31.28985507246377, "grad_norm": 2.779090404510498, "learning_rate": 0.001, "loss": 1.8878, "step": 362712 }, { "epoch": 31.294685990338163, "grad_norm": 1.391725778579712, "learning_rate": 0.001, "loss": 1.8871, "step": 362768 }, { "epoch": 31.29951690821256, "grad_norm": 4.036783695220947, "learning_rate": 0.001, "loss": 1.8884, "step": 362824 }, { "epoch": 31.304347826086957, "grad_norm": 3.537506580352783, "learning_rate": 0.001, "loss": 1.8938, "step": 362880 }, { "epoch": 31.309178743961354, "grad_norm": 1.4143881797790527, "learning_rate": 0.001, "loss": 1.884, "step": 362936 }, { "epoch": 31.314009661835748, "grad_norm": 16.878278732299805, "learning_rate": 0.001, "loss": 1.8838, "step": 362992 }, { "epoch": 31.318840579710145, "grad_norm": 3.729255199432373, "learning_rate": 0.001, "loss": 1.8909, "step": 363048 }, { "epoch": 31.32367149758454, "grad_norm": 3.9143974781036377, "learning_rate": 0.001, "loss": 1.8997, "step": 363104 }, { "epoch": 31.32850241545894, "grad_norm": 2.3262157440185547, "learning_rate": 0.001, "loss": 1.9033, "step": 363160 }, { "epoch": 31.333333333333332, "grad_norm": 4.7032952308654785, "learning_rate": 0.001, "loss": 1.915, "step": 363216 }, { "epoch": 31.33816425120773, "grad_norm": 2.9866273403167725, "learning_rate": 0.001, "loss": 1.9175, "step": 363272 }, { "epoch": 31.342995169082126, "grad_norm": 2.556567668914795, "learning_rate": 0.001, "loss": 1.9301, "step": 363328 }, { "epoch": 31.347826086956523, "grad_norm": 0.6238303780555725, "learning_rate": 0.001, "loss": 1.9048, "step": 363384 }, { "epoch": 31.352657004830917, "grad_norm": 3.960972309112549, "learning_rate": 0.001, "loss": 1.8953, "step": 363440 }, { "epoch": 31.357487922705314, "grad_norm": 0.8117548227310181, "learning_rate": 0.001, "loss": 1.8884, "step": 363496 }, { "epoch": 31.36231884057971, "grad_norm": 95.2965087890625, "learning_rate": 0.001, "loss": 1.9068, "step": 363552 }, { "epoch": 31.367149758454108, "grad_norm": 2.636323928833008, "learning_rate": 0.001, "loss": 1.9051, "step": 363608 }, { "epoch": 31.3719806763285, "grad_norm": 1.159105658531189, "learning_rate": 0.001, "loss": 1.9027, "step": 363664 }, { "epoch": 31.3768115942029, "grad_norm": 0.9292151927947998, "learning_rate": 0.001, "loss": 1.9031, "step": 363720 }, { "epoch": 31.381642512077295, "grad_norm": 2.7445006370544434, "learning_rate": 0.001, "loss": 1.912, "step": 363776 }, { "epoch": 31.386473429951693, "grad_norm": 1.198744535446167, "learning_rate": 0.001, "loss": 1.9196, "step": 363832 }, { "epoch": 31.391304347826086, "grad_norm": 18.0750789642334, "learning_rate": 0.001, "loss": 1.9173, "step": 363888 }, { "epoch": 31.396135265700483, "grad_norm": 0.8215798735618591, "learning_rate": 0.001, "loss": 1.9008, "step": 363944 }, { "epoch": 31.40096618357488, "grad_norm": 2.4406497478485107, "learning_rate": 0.001, "loss": 1.8952, "step": 364000 }, { "epoch": 31.405797101449274, "grad_norm": 0.9356613755226135, "learning_rate": 0.001, "loss": 1.8889, "step": 364056 }, { "epoch": 31.41062801932367, "grad_norm": 0.7125791907310486, "learning_rate": 0.001, "loss": 1.894, "step": 364112 }, { "epoch": 31.415458937198068, "grad_norm": 1.1252639293670654, "learning_rate": 0.001, "loss": 1.8945, "step": 364168 }, { "epoch": 31.420289855072465, "grad_norm": 2.4443447589874268, "learning_rate": 0.001, "loss": 1.8898, "step": 364224 }, { "epoch": 31.42512077294686, "grad_norm": 0.7925608158111572, "learning_rate": 0.001, "loss": 1.8819, "step": 364280 }, { "epoch": 31.429951690821255, "grad_norm": 1.6269303560256958, "learning_rate": 0.001, "loss": 1.8901, "step": 364336 }, { "epoch": 31.434782608695652, "grad_norm": 1.0643422603607178, "learning_rate": 0.001, "loss": 1.8965, "step": 364392 }, { "epoch": 31.43961352657005, "grad_norm": 6.14937686920166, "learning_rate": 0.001, "loss": 1.9056, "step": 364448 }, { "epoch": 31.444444444444443, "grad_norm": 0.7519615292549133, "learning_rate": 0.001, "loss": 1.9022, "step": 364504 }, { "epoch": 31.44927536231884, "grad_norm": 2.552140474319458, "learning_rate": 0.001, "loss": 1.8981, "step": 364560 }, { "epoch": 31.454106280193237, "grad_norm": 0.6152862310409546, "learning_rate": 0.001, "loss": 1.8895, "step": 364616 }, { "epoch": 31.458937198067634, "grad_norm": 2.9519598484039307, "learning_rate": 0.001, "loss": 1.8902, "step": 364672 }, { "epoch": 31.463768115942027, "grad_norm": 5.124978542327881, "learning_rate": 0.001, "loss": 1.8894, "step": 364728 }, { "epoch": 31.468599033816425, "grad_norm": 0.8347622752189636, "learning_rate": 0.001, "loss": 1.8838, "step": 364784 }, { "epoch": 31.47342995169082, "grad_norm": 1.9144797325134277, "learning_rate": 0.001, "loss": 1.8933, "step": 364840 }, { "epoch": 31.47826086956522, "grad_norm": 1.066724419593811, "learning_rate": 0.001, "loss": 1.8863, "step": 364896 }, { "epoch": 31.483091787439612, "grad_norm": 0.5903692245483398, "learning_rate": 0.001, "loss": 1.8875, "step": 364952 }, { "epoch": 31.48792270531401, "grad_norm": 1.2487493753433228, "learning_rate": 0.001, "loss": 1.883, "step": 365008 }, { "epoch": 31.492753623188406, "grad_norm": 124.77029418945312, "learning_rate": 0.001, "loss": 1.8796, "step": 365064 }, { "epoch": 31.497584541062803, "grad_norm": 0.7982252836227417, "learning_rate": 0.001, "loss": 1.8914, "step": 365120 }, { "epoch": 31.502415458937197, "grad_norm": 1.2779284715652466, "learning_rate": 0.001, "loss": 1.8959, "step": 365176 }, { "epoch": 31.507246376811594, "grad_norm": 1.3816096782684326, "learning_rate": 0.001, "loss": 1.8978, "step": 365232 }, { "epoch": 31.51207729468599, "grad_norm": 1.5434436798095703, "learning_rate": 0.001, "loss": 1.8929, "step": 365288 }, { "epoch": 31.516908212560388, "grad_norm": 0.633134126663208, "learning_rate": 0.001, "loss": 1.8979, "step": 365344 }, { "epoch": 31.52173913043478, "grad_norm": 0.5954933166503906, "learning_rate": 0.001, "loss": 1.9004, "step": 365400 }, { "epoch": 31.52657004830918, "grad_norm": 1.0120669603347778, "learning_rate": 0.001, "loss": 1.8991, "step": 365456 }, { "epoch": 31.531400966183575, "grad_norm": 1.978076457977295, "learning_rate": 0.001, "loss": 1.898, "step": 365512 }, { "epoch": 31.536231884057973, "grad_norm": 1.6839720010757446, "learning_rate": 0.001, "loss": 1.8872, "step": 365568 }, { "epoch": 31.541062801932366, "grad_norm": 0.29726824164390564, "learning_rate": 0.001, "loss": 1.8839, "step": 365624 }, { "epoch": 31.545893719806763, "grad_norm": 15.902640342712402, "learning_rate": 0.001, "loss": 1.8721, "step": 365680 }, { "epoch": 31.55072463768116, "grad_norm": 1.433428168296814, "learning_rate": 0.001, "loss": 1.8802, "step": 365736 }, { "epoch": 31.555555555555557, "grad_norm": 1.1068814992904663, "learning_rate": 0.001, "loss": 1.8736, "step": 365792 }, { "epoch": 31.56038647342995, "grad_norm": 5.274502277374268, "learning_rate": 0.001, "loss": 1.8742, "step": 365848 }, { "epoch": 31.565217391304348, "grad_norm": 0.9520684480667114, "learning_rate": 0.001, "loss": 1.8735, "step": 365904 }, { "epoch": 31.570048309178745, "grad_norm": 0.44822487235069275, "learning_rate": 0.001, "loss": 1.8785, "step": 365960 }, { "epoch": 31.57487922705314, "grad_norm": 2.8123536109924316, "learning_rate": 0.001, "loss": 1.8792, "step": 366016 }, { "epoch": 31.579710144927535, "grad_norm": 0.35997501015663147, "learning_rate": 0.001, "loss": 1.8855, "step": 366072 }, { "epoch": 31.584541062801932, "grad_norm": 2.1557633876800537, "learning_rate": 0.001, "loss": 1.8853, "step": 366128 }, { "epoch": 31.58937198067633, "grad_norm": 2.0968704223632812, "learning_rate": 0.001, "loss": 1.8952, "step": 366184 }, { "epoch": 31.594202898550726, "grad_norm": 0.5686438679695129, "learning_rate": 0.001, "loss": 1.9056, "step": 366240 }, { "epoch": 31.59903381642512, "grad_norm": 0.28103092312812805, "learning_rate": 0.001, "loss": 1.89, "step": 366296 }, { "epoch": 31.603864734299517, "grad_norm": 0.43879464268684387, "learning_rate": 0.001, "loss": 1.8859, "step": 366352 }, { "epoch": 31.608695652173914, "grad_norm": 0.5932091474533081, "learning_rate": 0.001, "loss": 1.8905, "step": 366408 }, { "epoch": 31.613526570048307, "grad_norm": 0.5870943665504456, "learning_rate": 0.001, "loss": 1.8837, "step": 366464 }, { "epoch": 31.618357487922705, "grad_norm": 0.7316195368766785, "learning_rate": 0.001, "loss": 1.8764, "step": 366520 }, { "epoch": 31.6231884057971, "grad_norm": 0.6806275248527527, "learning_rate": 0.001, "loss": 1.8816, "step": 366576 }, { "epoch": 31.6280193236715, "grad_norm": 0.5371478199958801, "learning_rate": 0.001, "loss": 1.892, "step": 366632 }, { "epoch": 31.632850241545892, "grad_norm": 0.516328752040863, "learning_rate": 0.001, "loss": 1.8922, "step": 366688 }, { "epoch": 31.63768115942029, "grad_norm": 0.26439759135246277, "learning_rate": 0.001, "loss": 1.8953, "step": 366744 }, { "epoch": 31.642512077294686, "grad_norm": 0.45338067412376404, "learning_rate": 0.001, "loss": 1.876, "step": 366800 }, { "epoch": 31.647342995169083, "grad_norm": 0.3305548131465912, "learning_rate": 0.001, "loss": 1.8697, "step": 366856 }, { "epoch": 31.652173913043477, "grad_norm": 1.032436490058899, "learning_rate": 0.001, "loss": 1.8694, "step": 366912 }, { "epoch": 31.657004830917874, "grad_norm": 1.9351085424423218, "learning_rate": 0.001, "loss": 1.8791, "step": 366968 }, { "epoch": 31.66183574879227, "grad_norm": 1.9809174537658691, "learning_rate": 0.001, "loss": 1.8844, "step": 367024 }, { "epoch": 31.666666666666668, "grad_norm": 2.08723783493042, "learning_rate": 0.001, "loss": 1.8825, "step": 367080 }, { "epoch": 31.67149758454106, "grad_norm": 1.454206943511963, "learning_rate": 0.001, "loss": 1.8772, "step": 367136 }, { "epoch": 31.67632850241546, "grad_norm": 0.5978344678878784, "learning_rate": 0.001, "loss": 1.8846, "step": 367192 }, { "epoch": 31.681159420289855, "grad_norm": 13.173473358154297, "learning_rate": 0.001, "loss": 1.8922, "step": 367248 }, { "epoch": 31.685990338164252, "grad_norm": 1.1626501083374023, "learning_rate": 0.001, "loss": 1.8779, "step": 367304 }, { "epoch": 31.690821256038646, "grad_norm": 0.4801710247993469, "learning_rate": 0.001, "loss": 1.8819, "step": 367360 }, { "epoch": 31.695652173913043, "grad_norm": 0.8886069059371948, "learning_rate": 0.001, "loss": 1.8739, "step": 367416 }, { "epoch": 31.70048309178744, "grad_norm": 1.299600601196289, "learning_rate": 0.001, "loss": 1.877, "step": 367472 }, { "epoch": 31.705314009661837, "grad_norm": 2.892421007156372, "learning_rate": 0.001, "loss": 1.8789, "step": 367528 }, { "epoch": 31.71014492753623, "grad_norm": 2.0238373279571533, "learning_rate": 0.001, "loss": 1.8703, "step": 367584 }, { "epoch": 31.714975845410628, "grad_norm": 1.7803832292556763, "learning_rate": 0.001, "loss": 1.8778, "step": 367640 }, { "epoch": 31.719806763285025, "grad_norm": 0.3582864999771118, "learning_rate": 0.001, "loss": 1.8903, "step": 367696 }, { "epoch": 31.72463768115942, "grad_norm": 1.3473167419433594, "learning_rate": 0.001, "loss": 1.8757, "step": 367752 }, { "epoch": 31.729468599033815, "grad_norm": 17.073017120361328, "learning_rate": 0.001, "loss": 1.8701, "step": 367808 }, { "epoch": 31.734299516908212, "grad_norm": 0.895175576210022, "learning_rate": 0.001, "loss": 1.8756, "step": 367864 }, { "epoch": 31.73913043478261, "grad_norm": 0.3503738343715668, "learning_rate": 0.001, "loss": 1.8886, "step": 367920 }, { "epoch": 31.743961352657006, "grad_norm": 0.6311994791030884, "learning_rate": 0.001, "loss": 1.8734, "step": 367976 }, { "epoch": 31.7487922705314, "grad_norm": 2.110567092895508, "learning_rate": 0.001, "loss": 1.8588, "step": 368032 }, { "epoch": 31.753623188405797, "grad_norm": 1.9593119621276855, "learning_rate": 0.001, "loss": 1.8748, "step": 368088 }, { "epoch": 31.758454106280194, "grad_norm": 0.4072810113430023, "learning_rate": 0.001, "loss": 1.8576, "step": 368144 }, { "epoch": 31.76328502415459, "grad_norm": 3.2993719577789307, "learning_rate": 0.001, "loss": 1.8613, "step": 368200 }, { "epoch": 31.768115942028984, "grad_norm": 0.33148321509361267, "learning_rate": 0.001, "loss": 1.8755, "step": 368256 }, { "epoch": 31.77294685990338, "grad_norm": 0.49064525961875916, "learning_rate": 0.001, "loss": 1.8731, "step": 368312 }, { "epoch": 31.77777777777778, "grad_norm": 0.918169379234314, "learning_rate": 0.001, "loss": 1.872, "step": 368368 }, { "epoch": 31.782608695652176, "grad_norm": 0.3168325424194336, "learning_rate": 0.001, "loss": 1.8646, "step": 368424 }, { "epoch": 31.78743961352657, "grad_norm": 0.7635107040405273, "learning_rate": 0.001, "loss": 1.8652, "step": 368480 }, { "epoch": 31.792270531400966, "grad_norm": 0.4566832184791565, "learning_rate": 0.001, "loss": 1.874, "step": 368536 }, { "epoch": 31.797101449275363, "grad_norm": 5.333982944488525, "learning_rate": 0.001, "loss": 1.8723, "step": 368592 }, { "epoch": 31.80193236714976, "grad_norm": 0.3626226782798767, "learning_rate": 0.001, "loss": 1.8692, "step": 368648 }, { "epoch": 31.806763285024154, "grad_norm": 1.1475409269332886, "learning_rate": 0.001, "loss": 1.871, "step": 368704 }, { "epoch": 31.81159420289855, "grad_norm": 0.30854079127311707, "learning_rate": 0.001, "loss": 1.8673, "step": 368760 }, { "epoch": 31.816425120772948, "grad_norm": 1.4551008939743042, "learning_rate": 0.001, "loss": 1.8643, "step": 368816 }, { "epoch": 31.82125603864734, "grad_norm": 0.8948751091957092, "learning_rate": 0.001, "loss": 1.8619, "step": 368872 }, { "epoch": 31.82608695652174, "grad_norm": 8.599570274353027, "learning_rate": 0.001, "loss": 1.8672, "step": 368928 }, { "epoch": 31.830917874396135, "grad_norm": 0.3842622637748718, "learning_rate": 0.001, "loss": 1.8688, "step": 368984 }, { "epoch": 31.835748792270532, "grad_norm": 1.985412359237671, "learning_rate": 0.001, "loss": 1.8689, "step": 369040 }, { "epoch": 31.840579710144926, "grad_norm": 1.8302814960479736, "learning_rate": 0.001, "loss": 1.874, "step": 369096 }, { "epoch": 31.845410628019323, "grad_norm": 11.460724830627441, "learning_rate": 0.001, "loss": 1.8678, "step": 369152 }, { "epoch": 31.85024154589372, "grad_norm": 0.32157501578330994, "learning_rate": 0.001, "loss": 1.875, "step": 369208 }, { "epoch": 31.855072463768117, "grad_norm": 1.1456849575042725, "learning_rate": 0.001, "loss": 1.8585, "step": 369264 }, { "epoch": 31.85990338164251, "grad_norm": 0.9333752393722534, "learning_rate": 0.001, "loss": 1.8709, "step": 369320 }, { "epoch": 31.864734299516908, "grad_norm": 1.6301710605621338, "learning_rate": 0.001, "loss": 1.8767, "step": 369376 }, { "epoch": 31.869565217391305, "grad_norm": 1.123384952545166, "learning_rate": 0.001, "loss": 1.8797, "step": 369432 }, { "epoch": 31.8743961352657, "grad_norm": 2.176255941390991, "learning_rate": 0.001, "loss": 1.893, "step": 369488 }, { "epoch": 31.879227053140095, "grad_norm": 1.0297493934631348, "learning_rate": 0.001, "loss": 1.8958, "step": 369544 }, { "epoch": 31.884057971014492, "grad_norm": 2.150526523590088, "learning_rate": 0.001, "loss": 1.8918, "step": 369600 }, { "epoch": 31.88888888888889, "grad_norm": 0.3463974595069885, "learning_rate": 0.001, "loss": 1.8936, "step": 369656 }, { "epoch": 31.893719806763286, "grad_norm": 0.7792313098907471, "learning_rate": 0.001, "loss": 1.8883, "step": 369712 }, { "epoch": 31.89855072463768, "grad_norm": 1.9169609546661377, "learning_rate": 0.001, "loss": 1.8754, "step": 369768 }, { "epoch": 31.903381642512077, "grad_norm": 2.7753422260284424, "learning_rate": 0.001, "loss": 1.8745, "step": 369824 }, { "epoch": 31.908212560386474, "grad_norm": 0.42601147294044495, "learning_rate": 0.001, "loss": 1.8772, "step": 369880 }, { "epoch": 31.91304347826087, "grad_norm": 1.050900936126709, "learning_rate": 0.001, "loss": 1.884, "step": 369936 }, { "epoch": 31.917874396135264, "grad_norm": 0.4134643077850342, "learning_rate": 0.001, "loss": 1.8962, "step": 369992 }, { "epoch": 31.92270531400966, "grad_norm": 0.978430986404419, "learning_rate": 0.001, "loss": 1.8902, "step": 370048 }, { "epoch": 31.92753623188406, "grad_norm": 0.8464446663856506, "learning_rate": 0.001, "loss": 1.8889, "step": 370104 }, { "epoch": 31.932367149758456, "grad_norm": 0.3765876591205597, "learning_rate": 0.001, "loss": 1.8886, "step": 370160 }, { "epoch": 31.93719806763285, "grad_norm": 0.5416330099105835, "learning_rate": 0.001, "loss": 1.891, "step": 370216 }, { "epoch": 31.942028985507246, "grad_norm": 1.303542971611023, "learning_rate": 0.001, "loss": 1.8926, "step": 370272 }, { "epoch": 31.946859903381643, "grad_norm": 2.5459649562835693, "learning_rate": 0.001, "loss": 1.8946, "step": 370328 }, { "epoch": 31.95169082125604, "grad_norm": 1.2558022737503052, "learning_rate": 0.001, "loss": 1.8906, "step": 370384 }, { "epoch": 31.956521739130434, "grad_norm": 2.809849977493286, "learning_rate": 0.001, "loss": 1.881, "step": 370440 }, { "epoch": 31.96135265700483, "grad_norm": 0.362740159034729, "learning_rate": 0.001, "loss": 1.8794, "step": 370496 }, { "epoch": 31.966183574879228, "grad_norm": 2.121236562728882, "learning_rate": 0.001, "loss": 1.8784, "step": 370552 }, { "epoch": 31.971014492753625, "grad_norm": 4.78397798538208, "learning_rate": 0.001, "loss": 1.8784, "step": 370608 }, { "epoch": 31.97584541062802, "grad_norm": 1.5099594593048096, "learning_rate": 0.001, "loss": 1.8938, "step": 370664 }, { "epoch": 31.980676328502415, "grad_norm": 0.8130882978439331, "learning_rate": 0.001, "loss": 1.8882, "step": 370720 }, { "epoch": 31.985507246376812, "grad_norm": 1.129683017730713, "learning_rate": 0.001, "loss": 1.872, "step": 370776 }, { "epoch": 31.990338164251206, "grad_norm": 0.6803610920906067, "learning_rate": 0.001, "loss": 1.8753, "step": 370832 }, { "epoch": 31.995169082125603, "grad_norm": 1.2479597330093384, "learning_rate": 0.001, "loss": 1.8723, "step": 370888 }, { "epoch": 32.0, "grad_norm": 0.8689315915107727, "learning_rate": 0.001, "loss": 1.8699, "step": 370944 }, { "epoch": 32.00483091787439, "grad_norm": 1.2008572816848755, "learning_rate": 0.001, "loss": 1.8401, "step": 371000 }, { "epoch": 32.009661835748794, "grad_norm": 0.5709912180900574, "learning_rate": 0.001, "loss": 1.8507, "step": 371056 }, { "epoch": 32.01449275362319, "grad_norm": 3.2960045337677, "learning_rate": 0.001, "loss": 1.8383, "step": 371112 }, { "epoch": 32.01932367149758, "grad_norm": 0.9115668535232544, "learning_rate": 0.001, "loss": 1.8525, "step": 371168 }, { "epoch": 32.02415458937198, "grad_norm": 1.3289488554000854, "learning_rate": 0.001, "loss": 1.8554, "step": 371224 }, { "epoch": 32.028985507246375, "grad_norm": 1.4813932180404663, "learning_rate": 0.001, "loss": 1.8484, "step": 371280 }, { "epoch": 32.033816425120776, "grad_norm": 0.4108002781867981, "learning_rate": 0.001, "loss": 1.8353, "step": 371336 }, { "epoch": 32.03864734299517, "grad_norm": 0.29496416449546814, "learning_rate": 0.001, "loss": 1.8398, "step": 371392 }, { "epoch": 32.04347826086956, "grad_norm": 0.4634047746658325, "learning_rate": 0.001, "loss": 1.8412, "step": 371448 }, { "epoch": 32.04830917874396, "grad_norm": 1.7094523906707764, "learning_rate": 0.001, "loss": 1.8481, "step": 371504 }, { "epoch": 32.05314009661836, "grad_norm": 0.6704553365707397, "learning_rate": 0.001, "loss": 1.8481, "step": 371560 }, { "epoch": 32.05797101449275, "grad_norm": 0.42356380820274353, "learning_rate": 0.001, "loss": 1.8417, "step": 371616 }, { "epoch": 32.06280193236715, "grad_norm": 0.3620864450931549, "learning_rate": 0.001, "loss": 1.8393, "step": 371672 }, { "epoch": 32.067632850241544, "grad_norm": 0.45119205117225647, "learning_rate": 0.001, "loss": 1.8359, "step": 371728 }, { "epoch": 32.072463768115945, "grad_norm": 1.766597867012024, "learning_rate": 0.001, "loss": 1.8372, "step": 371784 }, { "epoch": 32.07729468599034, "grad_norm": 0.5697992444038391, "learning_rate": 0.001, "loss": 1.8319, "step": 371840 }, { "epoch": 32.08212560386473, "grad_norm": 0.28652557730674744, "learning_rate": 0.001, "loss": 1.8408, "step": 371896 }, { "epoch": 32.08695652173913, "grad_norm": 0.686758279800415, "learning_rate": 0.001, "loss": 1.8331, "step": 371952 }, { "epoch": 32.091787439613526, "grad_norm": 2.194305896759033, "learning_rate": 0.001, "loss": 1.8422, "step": 372008 }, { "epoch": 32.09661835748792, "grad_norm": 2.3242835998535156, "learning_rate": 0.001, "loss": 1.84, "step": 372064 }, { "epoch": 32.10144927536232, "grad_norm": 2.731315851211548, "learning_rate": 0.001, "loss": 1.8392, "step": 372120 }, { "epoch": 32.106280193236714, "grad_norm": 0.6502743363380432, "learning_rate": 0.001, "loss": 1.8358, "step": 372176 }, { "epoch": 32.111111111111114, "grad_norm": 0.3713311553001404, "learning_rate": 0.001, "loss": 1.8291, "step": 372232 }, { "epoch": 32.11594202898551, "grad_norm": 0.6289574503898621, "learning_rate": 0.001, "loss": 1.8318, "step": 372288 }, { "epoch": 32.1207729468599, "grad_norm": 4.511777400970459, "learning_rate": 0.001, "loss": 1.8437, "step": 372344 }, { "epoch": 32.1256038647343, "grad_norm": 0.5842333436012268, "learning_rate": 0.001, "loss": 1.8578, "step": 372400 }, { "epoch": 32.130434782608695, "grad_norm": 0.4360392987728119, "learning_rate": 0.001, "loss": 1.8416, "step": 372456 }, { "epoch": 32.13526570048309, "grad_norm": 0.7678411602973938, "learning_rate": 0.001, "loss": 1.8304, "step": 372512 }, { "epoch": 32.14009661835749, "grad_norm": 1.5205198526382446, "learning_rate": 0.001, "loss": 1.8344, "step": 372568 }, { "epoch": 32.14492753623188, "grad_norm": 0.41204118728637695, "learning_rate": 0.001, "loss": 1.837, "step": 372624 }, { "epoch": 32.14975845410628, "grad_norm": 0.3045431673526764, "learning_rate": 0.001, "loss": 1.8331, "step": 372680 }, { "epoch": 32.15458937198068, "grad_norm": 1.3136420249938965, "learning_rate": 0.001, "loss": 1.845, "step": 372736 }, { "epoch": 32.15942028985507, "grad_norm": 0.9799510836601257, "learning_rate": 0.001, "loss": 1.8423, "step": 372792 }, { "epoch": 32.16425120772947, "grad_norm": 0.39545878767967224, "learning_rate": 0.001, "loss": 1.8378, "step": 372848 }, { "epoch": 32.169082125603865, "grad_norm": 0.5365079641342163, "learning_rate": 0.001, "loss": 1.8382, "step": 372904 }, { "epoch": 32.17391304347826, "grad_norm": 0.3844505548477173, "learning_rate": 0.001, "loss": 1.8298, "step": 372960 }, { "epoch": 32.17874396135266, "grad_norm": 0.31090861558914185, "learning_rate": 0.001, "loss": 1.8378, "step": 373016 }, { "epoch": 32.18357487922705, "grad_norm": 0.8890419006347656, "learning_rate": 0.001, "loss": 1.8354, "step": 373072 }, { "epoch": 32.18840579710145, "grad_norm": 0.6335465908050537, "learning_rate": 0.001, "loss": 1.8401, "step": 373128 }, { "epoch": 32.193236714975846, "grad_norm": 0.2977682948112488, "learning_rate": 0.001, "loss": 1.8444, "step": 373184 }, { "epoch": 32.19806763285024, "grad_norm": 0.6324213743209839, "learning_rate": 0.001, "loss": 1.8397, "step": 373240 }, { "epoch": 32.20289855072464, "grad_norm": 0.7859739661216736, "learning_rate": 0.001, "loss": 1.836, "step": 373296 }, { "epoch": 32.207729468599034, "grad_norm": 1.256189227104187, "learning_rate": 0.001, "loss": 1.8312, "step": 373352 }, { "epoch": 32.21256038647343, "grad_norm": 2.799471139907837, "learning_rate": 0.001, "loss": 1.8455, "step": 373408 }, { "epoch": 32.21739130434783, "grad_norm": 0.27061259746551514, "learning_rate": 0.001, "loss": 1.8422, "step": 373464 }, { "epoch": 32.22222222222222, "grad_norm": 0.31387603282928467, "learning_rate": 0.001, "loss": 1.8458, "step": 373520 }, { "epoch": 32.227053140096615, "grad_norm": 1.364209532737732, "learning_rate": 0.001, "loss": 1.8466, "step": 373576 }, { "epoch": 32.231884057971016, "grad_norm": 0.7400078177452087, "learning_rate": 0.001, "loss": 1.8364, "step": 373632 }, { "epoch": 32.23671497584541, "grad_norm": 1.4662961959838867, "learning_rate": 0.001, "loss": 1.8522, "step": 373688 }, { "epoch": 32.24154589371981, "grad_norm": 0.3981078863143921, "learning_rate": 0.001, "loss": 1.8476, "step": 373744 }, { "epoch": 32.2463768115942, "grad_norm": 0.3808528482913971, "learning_rate": 0.001, "loss": 1.8508, "step": 373800 }, { "epoch": 32.2512077294686, "grad_norm": 5.781691074371338, "learning_rate": 0.001, "loss": 1.8395, "step": 373856 }, { "epoch": 32.256038647343, "grad_norm": 0.7164187431335449, "learning_rate": 0.001, "loss": 1.8363, "step": 373912 }, { "epoch": 32.26086956521739, "grad_norm": 13.646059036254883, "learning_rate": 0.001, "loss": 1.8345, "step": 373968 }, { "epoch": 32.265700483091784, "grad_norm": 0.5398547649383545, "learning_rate": 0.001, "loss": 1.8386, "step": 374024 }, { "epoch": 32.270531400966185, "grad_norm": 1.4126924276351929, "learning_rate": 0.001, "loss": 1.8464, "step": 374080 }, { "epoch": 32.27536231884058, "grad_norm": 0.3202066421508789, "learning_rate": 0.001, "loss": 1.8532, "step": 374136 }, { "epoch": 32.28019323671498, "grad_norm": 0.7664832472801208, "learning_rate": 0.001, "loss": 1.8419, "step": 374192 }, { "epoch": 32.28502415458937, "grad_norm": 0.478803813457489, "learning_rate": 0.001, "loss": 1.8362, "step": 374248 }, { "epoch": 32.289855072463766, "grad_norm": 1.117698073387146, "learning_rate": 0.001, "loss": 1.8416, "step": 374304 }, { "epoch": 32.29468599033817, "grad_norm": 0.2940598726272583, "learning_rate": 0.001, "loss": 1.8551, "step": 374360 }, { "epoch": 32.29951690821256, "grad_norm": 0.4876776337623596, "learning_rate": 0.001, "loss": 1.8591, "step": 374416 }, { "epoch": 32.30434782608695, "grad_norm": 0.7879142165184021, "learning_rate": 0.001, "loss": 1.8445, "step": 374472 }, { "epoch": 32.309178743961354, "grad_norm": 0.3670964539051056, "learning_rate": 0.001, "loss": 1.8597, "step": 374528 }, { "epoch": 32.31400966183575, "grad_norm": 2.608320951461792, "learning_rate": 0.001, "loss": 1.855, "step": 374584 }, { "epoch": 32.31884057971015, "grad_norm": 0.6056095361709595, "learning_rate": 0.001, "loss": 1.8513, "step": 374640 }, { "epoch": 32.32367149758454, "grad_norm": 1.3014816045761108, "learning_rate": 0.001, "loss": 1.848, "step": 374696 }, { "epoch": 32.328502415458935, "grad_norm": 1.1073057651519775, "learning_rate": 0.001, "loss": 1.8478, "step": 374752 }, { "epoch": 32.333333333333336, "grad_norm": 0.2885875403881073, "learning_rate": 0.001, "loss": 1.8405, "step": 374808 }, { "epoch": 32.33816425120773, "grad_norm": 0.7921796441078186, "learning_rate": 0.001, "loss": 1.8342, "step": 374864 }, { "epoch": 32.34299516908212, "grad_norm": 0.5287802219390869, "learning_rate": 0.001, "loss": 1.8354, "step": 374920 }, { "epoch": 32.34782608695652, "grad_norm": 0.49546757340431213, "learning_rate": 0.001, "loss": 1.843, "step": 374976 }, { "epoch": 32.35265700483092, "grad_norm": 1.1142160892486572, "learning_rate": 0.001, "loss": 1.8529, "step": 375032 }, { "epoch": 32.35748792270532, "grad_norm": 3.57182240486145, "learning_rate": 0.001, "loss": 1.853, "step": 375088 }, { "epoch": 32.36231884057971, "grad_norm": 0.468313604593277, "learning_rate": 0.001, "loss": 1.8559, "step": 375144 }, { "epoch": 32.367149758454104, "grad_norm": 0.4340816140174866, "learning_rate": 0.001, "loss": 1.8585, "step": 375200 }, { "epoch": 32.371980676328505, "grad_norm": 0.4288213551044464, "learning_rate": 0.001, "loss": 1.8447, "step": 375256 }, { "epoch": 32.3768115942029, "grad_norm": 0.3087408244609833, "learning_rate": 0.001, "loss": 1.8484, "step": 375312 }, { "epoch": 32.38164251207729, "grad_norm": 2.7799503803253174, "learning_rate": 0.001, "loss": 1.8508, "step": 375368 }, { "epoch": 32.38647342995169, "grad_norm": 3.459364891052246, "learning_rate": 0.001, "loss": 1.8464, "step": 375424 }, { "epoch": 32.391304347826086, "grad_norm": 1.3312876224517822, "learning_rate": 0.001, "loss": 1.8457, "step": 375480 }, { "epoch": 32.39613526570048, "grad_norm": 0.4875665009021759, "learning_rate": 0.001, "loss": 1.8515, "step": 375536 }, { "epoch": 32.40096618357488, "grad_norm": 0.421225905418396, "learning_rate": 0.001, "loss": 1.8558, "step": 375592 }, { "epoch": 32.405797101449274, "grad_norm": 0.3473358750343323, "learning_rate": 0.001, "loss": 1.844, "step": 375648 }, { "epoch": 32.410628019323674, "grad_norm": 0.26126474142074585, "learning_rate": 0.001, "loss": 1.8482, "step": 375704 }, { "epoch": 32.41545893719807, "grad_norm": 0.2654518485069275, "learning_rate": 0.001, "loss": 1.849, "step": 375760 }, { "epoch": 32.42028985507246, "grad_norm": 1.1230734586715698, "learning_rate": 0.001, "loss": 1.8528, "step": 375816 }, { "epoch": 32.42512077294686, "grad_norm": 3.879697322845459, "learning_rate": 0.001, "loss": 1.8547, "step": 375872 }, { "epoch": 32.429951690821255, "grad_norm": 0.4892650246620178, "learning_rate": 0.001, "loss": 1.8534, "step": 375928 }, { "epoch": 32.43478260869565, "grad_norm": 0.25833895802497864, "learning_rate": 0.001, "loss": 1.8512, "step": 375984 }, { "epoch": 32.43961352657005, "grad_norm": 0.8528651595115662, "learning_rate": 0.001, "loss": 1.8455, "step": 376040 }, { "epoch": 32.44444444444444, "grad_norm": 0.283153235912323, "learning_rate": 0.001, "loss": 1.8384, "step": 376096 }, { "epoch": 32.44927536231884, "grad_norm": 0.30392587184906006, "learning_rate": 0.001, "loss": 1.8448, "step": 376152 }, { "epoch": 32.45410628019324, "grad_norm": 0.5906954407691956, "learning_rate": 0.001, "loss": 1.8373, "step": 376208 }, { "epoch": 32.45893719806763, "grad_norm": 1.9905575513839722, "learning_rate": 0.001, "loss": 1.8361, "step": 376264 }, { "epoch": 32.46376811594203, "grad_norm": 0.5323963165283203, "learning_rate": 0.001, "loss": 1.84, "step": 376320 }, { "epoch": 32.468599033816425, "grad_norm": 3.6400694847106934, "learning_rate": 0.001, "loss": 1.8418, "step": 376376 }, { "epoch": 32.47342995169082, "grad_norm": 0.35972192883491516, "learning_rate": 0.001, "loss": 1.8466, "step": 376432 }, { "epoch": 32.47826086956522, "grad_norm": 0.7316411733627319, "learning_rate": 0.001, "loss": 1.8451, "step": 376488 }, { "epoch": 32.48309178743961, "grad_norm": 0.36495721340179443, "learning_rate": 0.001, "loss": 1.8596, "step": 376544 }, { "epoch": 32.48792270531401, "grad_norm": 0.7636012434959412, "learning_rate": 0.001, "loss": 1.8733, "step": 376600 }, { "epoch": 32.492753623188406, "grad_norm": 0.29365861415863037, "learning_rate": 0.001, "loss": 1.8558, "step": 376656 }, { "epoch": 32.4975845410628, "grad_norm": 0.5585270524024963, "learning_rate": 0.001, "loss": 1.853, "step": 376712 }, { "epoch": 32.5024154589372, "grad_norm": 0.47341543436050415, "learning_rate": 0.001, "loss": 1.8535, "step": 376768 }, { "epoch": 32.507246376811594, "grad_norm": 0.293991357088089, "learning_rate": 0.001, "loss": 1.8474, "step": 376824 }, { "epoch": 32.51207729468599, "grad_norm": 1.4911845922470093, "learning_rate": 0.001, "loss": 1.8398, "step": 376880 }, { "epoch": 32.51690821256039, "grad_norm": 1.716740608215332, "learning_rate": 0.001, "loss": 1.8364, "step": 376936 }, { "epoch": 32.52173913043478, "grad_norm": 1.4789096117019653, "learning_rate": 0.001, "loss": 1.8376, "step": 376992 }, { "epoch": 32.52657004830918, "grad_norm": 0.6244176626205444, "learning_rate": 0.001, "loss": 1.8457, "step": 377048 }, { "epoch": 32.531400966183575, "grad_norm": 0.958246648311615, "learning_rate": 0.001, "loss": 1.8518, "step": 377104 }, { "epoch": 32.53623188405797, "grad_norm": 0.32466602325439453, "learning_rate": 0.001, "loss": 1.8526, "step": 377160 }, { "epoch": 32.54106280193237, "grad_norm": 1.327105164527893, "learning_rate": 0.001, "loss": 1.8511, "step": 377216 }, { "epoch": 32.54589371980676, "grad_norm": 1.754935622215271, "learning_rate": 0.001, "loss": 1.8404, "step": 377272 }, { "epoch": 32.55072463768116, "grad_norm": 0.42063432931900024, "learning_rate": 0.001, "loss": 1.8383, "step": 377328 }, { "epoch": 32.55555555555556, "grad_norm": 1.7083438634872437, "learning_rate": 0.001, "loss": 1.8388, "step": 377384 }, { "epoch": 32.56038647342995, "grad_norm": 0.35757118463516235, "learning_rate": 0.001, "loss": 1.831, "step": 377440 }, { "epoch": 32.56521739130435, "grad_norm": 0.3467182219028473, "learning_rate": 0.001, "loss": 1.8405, "step": 377496 }, { "epoch": 32.570048309178745, "grad_norm": 0.5448659658432007, "learning_rate": 0.001, "loss": 1.8455, "step": 377552 }, { "epoch": 32.57487922705314, "grad_norm": 0.35185661911964417, "learning_rate": 0.001, "loss": 1.845, "step": 377608 }, { "epoch": 32.57971014492754, "grad_norm": 0.34073004126548767, "learning_rate": 0.001, "loss": 1.8506, "step": 377664 }, { "epoch": 32.58454106280193, "grad_norm": 0.34111663699150085, "learning_rate": 0.001, "loss": 1.8501, "step": 377720 }, { "epoch": 32.589371980676326, "grad_norm": 0.3987845182418823, "learning_rate": 0.001, "loss": 1.8441, "step": 377776 }, { "epoch": 32.594202898550726, "grad_norm": 2.2910256385803223, "learning_rate": 0.001, "loss": 1.8399, "step": 377832 }, { "epoch": 32.59903381642512, "grad_norm": 0.6643227934837341, "learning_rate": 0.001, "loss": 1.8439, "step": 377888 }, { "epoch": 32.60386473429952, "grad_norm": 0.8784595131874084, "learning_rate": 0.001, "loss": 1.8422, "step": 377944 }, { "epoch": 32.608695652173914, "grad_norm": 0.672061026096344, "learning_rate": 0.001, "loss": 1.8419, "step": 378000 }, { "epoch": 32.61352657004831, "grad_norm": 0.36900657415390015, "learning_rate": 0.001, "loss": 1.8509, "step": 378056 }, { "epoch": 32.61835748792271, "grad_norm": 1.5382411479949951, "learning_rate": 0.001, "loss": 1.8541, "step": 378112 }, { "epoch": 32.6231884057971, "grad_norm": 0.404506653547287, "learning_rate": 0.001, "loss": 1.8542, "step": 378168 }, { "epoch": 32.628019323671495, "grad_norm": 0.6450835466384888, "learning_rate": 0.001, "loss": 1.8654, "step": 378224 }, { "epoch": 32.632850241545896, "grad_norm": 0.5529160499572754, "learning_rate": 0.001, "loss": 1.8622, "step": 378280 }, { "epoch": 32.63768115942029, "grad_norm": 0.6786965727806091, "learning_rate": 0.001, "loss": 1.8591, "step": 378336 }, { "epoch": 32.64251207729468, "grad_norm": 0.794245719909668, "learning_rate": 0.001, "loss": 1.8515, "step": 378392 }, { "epoch": 32.64734299516908, "grad_norm": 0.31528031826019287, "learning_rate": 0.001, "loss": 1.8527, "step": 378448 }, { "epoch": 32.65217391304348, "grad_norm": 0.374546617269516, "learning_rate": 0.001, "loss": 1.8634, "step": 378504 }, { "epoch": 32.65700483091788, "grad_norm": 1.0305025577545166, "learning_rate": 0.001, "loss": 1.8452, "step": 378560 }, { "epoch": 32.66183574879227, "grad_norm": 0.36057665944099426, "learning_rate": 0.001, "loss": 1.8452, "step": 378616 }, { "epoch": 32.666666666666664, "grad_norm": 0.5232740044593811, "learning_rate": 0.001, "loss": 1.8441, "step": 378672 }, { "epoch": 32.671497584541065, "grad_norm": 0.4006281793117523, "learning_rate": 0.001, "loss": 1.8495, "step": 378728 }, { "epoch": 32.67632850241546, "grad_norm": 0.41494491696357727, "learning_rate": 0.001, "loss": 1.839, "step": 378784 }, { "epoch": 32.68115942028985, "grad_norm": 8.640713691711426, "learning_rate": 0.001, "loss": 1.8455, "step": 378840 }, { "epoch": 32.68599033816425, "grad_norm": 2.596590995788574, "learning_rate": 0.001, "loss": 1.8608, "step": 378896 }, { "epoch": 32.690821256038646, "grad_norm": 0.849643349647522, "learning_rate": 0.001, "loss": 1.8585, "step": 378952 }, { "epoch": 32.69565217391305, "grad_norm": 0.8204634189605713, "learning_rate": 0.001, "loss": 1.8593, "step": 379008 }, { "epoch": 32.70048309178744, "grad_norm": 0.5446606278419495, "learning_rate": 0.001, "loss": 1.8604, "step": 379064 }, { "epoch": 32.70531400966183, "grad_norm": 1.643191933631897, "learning_rate": 0.001, "loss": 1.8483, "step": 379120 }, { "epoch": 32.710144927536234, "grad_norm": 1.6326783895492554, "learning_rate": 0.001, "loss": 1.8563, "step": 379176 }, { "epoch": 32.71497584541063, "grad_norm": 0.49490636587142944, "learning_rate": 0.001, "loss": 1.8555, "step": 379232 }, { "epoch": 32.71980676328502, "grad_norm": 1.0047205686569214, "learning_rate": 0.001, "loss": 1.8469, "step": 379288 }, { "epoch": 32.72463768115942, "grad_norm": 1.4708741903305054, "learning_rate": 0.001, "loss": 1.8421, "step": 379344 }, { "epoch": 32.729468599033815, "grad_norm": 0.8628129363059998, "learning_rate": 0.001, "loss": 1.8483, "step": 379400 }, { "epoch": 32.734299516908216, "grad_norm": 1.7796859741210938, "learning_rate": 0.001, "loss": 1.8507, "step": 379456 }, { "epoch": 32.73913043478261, "grad_norm": 0.5412070155143738, "learning_rate": 0.001, "loss": 1.8542, "step": 379512 }, { "epoch": 32.743961352657, "grad_norm": 0.3636011481285095, "learning_rate": 0.001, "loss": 1.8542, "step": 379568 }, { "epoch": 32.7487922705314, "grad_norm": 0.7431475520133972, "learning_rate": 0.001, "loss": 1.8571, "step": 379624 }, { "epoch": 32.7536231884058, "grad_norm": 0.34459924697875977, "learning_rate": 0.001, "loss": 1.8536, "step": 379680 }, { "epoch": 32.75845410628019, "grad_norm": 0.44119343161582947, "learning_rate": 0.001, "loss": 1.8541, "step": 379736 }, { "epoch": 32.76328502415459, "grad_norm": 0.2517727017402649, "learning_rate": 0.001, "loss": 1.8421, "step": 379792 }, { "epoch": 32.768115942028984, "grad_norm": 0.9753492474555969, "learning_rate": 0.001, "loss": 1.8465, "step": 379848 }, { "epoch": 32.772946859903385, "grad_norm": 1.606237769126892, "learning_rate": 0.001, "loss": 1.8438, "step": 379904 }, { "epoch": 32.77777777777778, "grad_norm": 0.9974656105041504, "learning_rate": 0.001, "loss": 1.8607, "step": 379960 }, { "epoch": 32.78260869565217, "grad_norm": 0.6982571482658386, "learning_rate": 0.001, "loss": 1.8776, "step": 380016 }, { "epoch": 32.78743961352657, "grad_norm": 0.43610435724258423, "learning_rate": 0.001, "loss": 1.857, "step": 380072 }, { "epoch": 32.792270531400966, "grad_norm": 0.9027001857757568, "learning_rate": 0.001, "loss": 1.8483, "step": 380128 }, { "epoch": 32.79710144927536, "grad_norm": 1.5463149547576904, "learning_rate": 0.001, "loss": 1.8554, "step": 380184 }, { "epoch": 32.80193236714976, "grad_norm": 0.7897469997406006, "learning_rate": 0.001, "loss": 1.8517, "step": 380240 }, { "epoch": 32.806763285024154, "grad_norm": 1.6597342491149902, "learning_rate": 0.001, "loss": 1.8497, "step": 380296 }, { "epoch": 32.81159420289855, "grad_norm": 0.2787741720676422, "learning_rate": 0.001, "loss": 1.8587, "step": 380352 }, { "epoch": 32.81642512077295, "grad_norm": 0.2956477105617523, "learning_rate": 0.001, "loss": 1.8546, "step": 380408 }, { "epoch": 32.82125603864734, "grad_norm": 0.3225376009941101, "learning_rate": 0.001, "loss": 1.8596, "step": 380464 }, { "epoch": 32.82608695652174, "grad_norm": 1.282189130783081, "learning_rate": 0.001, "loss": 1.8635, "step": 380520 }, { "epoch": 32.830917874396135, "grad_norm": 0.2542252838611603, "learning_rate": 0.001, "loss": 1.8426, "step": 380576 }, { "epoch": 32.83574879227053, "grad_norm": 0.506098210811615, "learning_rate": 0.001, "loss": 1.8539, "step": 380632 }, { "epoch": 32.84057971014493, "grad_norm": 1.0691663026809692, "learning_rate": 0.001, "loss": 1.8546, "step": 380688 }, { "epoch": 32.84541062801932, "grad_norm": 0.9779183268547058, "learning_rate": 0.001, "loss": 1.8464, "step": 380744 }, { "epoch": 32.85024154589372, "grad_norm": 0.35702377557754517, "learning_rate": 0.001, "loss": 1.8473, "step": 380800 }, { "epoch": 32.85507246376812, "grad_norm": 1.1675342321395874, "learning_rate": 0.001, "loss": 1.8759, "step": 380856 }, { "epoch": 32.85990338164251, "grad_norm": 1.2844762802124023, "learning_rate": 0.001, "loss": 1.8592, "step": 380912 }, { "epoch": 32.86473429951691, "grad_norm": 0.27818992733955383, "learning_rate": 0.001, "loss": 1.8611, "step": 380968 }, { "epoch": 32.869565217391305, "grad_norm": 0.9189602136611938, "learning_rate": 0.001, "loss": 1.8496, "step": 381024 }, { "epoch": 32.8743961352657, "grad_norm": 0.44964373111724854, "learning_rate": 0.001, "loss": 1.8471, "step": 381080 }, { "epoch": 32.8792270531401, "grad_norm": 0.7384527325630188, "learning_rate": 0.001, "loss": 1.8471, "step": 381136 }, { "epoch": 32.88405797101449, "grad_norm": 1.1870777606964111, "learning_rate": 0.001, "loss": 1.8408, "step": 381192 }, { "epoch": 32.888888888888886, "grad_norm": 1.3894047737121582, "learning_rate": 0.001, "loss": 1.8444, "step": 381248 }, { "epoch": 32.893719806763286, "grad_norm": 0.4911687672138214, "learning_rate": 0.001, "loss": 1.8514, "step": 381304 }, { "epoch": 32.89855072463768, "grad_norm": 0.30646243691444397, "learning_rate": 0.001, "loss": 1.8525, "step": 381360 }, { "epoch": 32.90338164251208, "grad_norm": 1.2023776769638062, "learning_rate": 0.001, "loss": 1.8522, "step": 381416 }, { "epoch": 32.908212560386474, "grad_norm": 2.8627359867095947, "learning_rate": 0.001, "loss": 1.8484, "step": 381472 }, { "epoch": 32.91304347826087, "grad_norm": 0.5137643218040466, "learning_rate": 0.001, "loss": 1.8528, "step": 381528 }, { "epoch": 32.91787439613527, "grad_norm": 1.190400242805481, "learning_rate": 0.001, "loss": 1.8552, "step": 381584 }, { "epoch": 32.92270531400966, "grad_norm": 0.36700430512428284, "learning_rate": 0.001, "loss": 1.8511, "step": 381640 }, { "epoch": 32.927536231884055, "grad_norm": 0.4066595733165741, "learning_rate": 0.001, "loss": 1.8458, "step": 381696 }, { "epoch": 32.932367149758456, "grad_norm": 0.3802686035633087, "learning_rate": 0.001, "loss": 1.8481, "step": 381752 }, { "epoch": 32.93719806763285, "grad_norm": 0.24708229303359985, "learning_rate": 0.001, "loss": 1.858, "step": 381808 }, { "epoch": 32.94202898550725, "grad_norm": 2.311570644378662, "learning_rate": 0.001, "loss": 1.8548, "step": 381864 }, { "epoch": 32.94685990338164, "grad_norm": 1.4765944480895996, "learning_rate": 0.001, "loss": 1.8567, "step": 381920 }, { "epoch": 32.95169082125604, "grad_norm": 1.4749515056610107, "learning_rate": 0.001, "loss": 1.8541, "step": 381976 }, { "epoch": 32.95652173913044, "grad_norm": 1.1101292371749878, "learning_rate": 0.001, "loss": 1.8524, "step": 382032 }, { "epoch": 32.96135265700483, "grad_norm": 1.421686053276062, "learning_rate": 0.001, "loss": 1.8494, "step": 382088 }, { "epoch": 32.966183574879224, "grad_norm": 0.993030309677124, "learning_rate": 0.001, "loss": 1.8652, "step": 382144 }, { "epoch": 32.971014492753625, "grad_norm": 1.7938148975372314, "learning_rate": 0.001, "loss": 1.8522, "step": 382200 }, { "epoch": 32.97584541062802, "grad_norm": 0.2918282449245453, "learning_rate": 0.001, "loss": 1.8628, "step": 382256 }, { "epoch": 32.98067632850242, "grad_norm": 5.5111236572265625, "learning_rate": 0.001, "loss": 1.8608, "step": 382312 }, { "epoch": 32.98550724637681, "grad_norm": 2.92281174659729, "learning_rate": 0.001, "loss": 1.8468, "step": 382368 }, { "epoch": 32.990338164251206, "grad_norm": 0.38964682817459106, "learning_rate": 0.001, "loss": 1.8607, "step": 382424 }, { "epoch": 32.99516908212561, "grad_norm": 0.4770703911781311, "learning_rate": 0.001, "loss": 1.867, "step": 382480 }, { "epoch": 33.0, "grad_norm": 0.6109275221824646, "learning_rate": 0.001, "loss": 1.8494, "step": 382536 }, { "epoch": 33.00483091787439, "grad_norm": 3.7642152309417725, "learning_rate": 0.001, "loss": 1.8286, "step": 382592 }, { "epoch": 33.009661835748794, "grad_norm": 0.7113749384880066, "learning_rate": 0.001, "loss": 1.8297, "step": 382648 }, { "epoch": 33.01449275362319, "grad_norm": 0.5638636946678162, "learning_rate": 0.001, "loss": 1.833, "step": 382704 }, { "epoch": 33.01932367149758, "grad_norm": 0.35028257966041565, "learning_rate": 0.001, "loss": 1.832, "step": 382760 }, { "epoch": 33.02415458937198, "grad_norm": 0.5259183049201965, "learning_rate": 0.001, "loss": 1.822, "step": 382816 }, { "epoch": 33.028985507246375, "grad_norm": 0.7119775414466858, "learning_rate": 0.001, "loss": 1.8128, "step": 382872 }, { "epoch": 33.033816425120776, "grad_norm": 0.4885420799255371, "learning_rate": 0.001, "loss": 1.829, "step": 382928 }, { "epoch": 33.03864734299517, "grad_norm": 0.38046929240226746, "learning_rate": 0.001, "loss": 1.8283, "step": 382984 }, { "epoch": 33.04347826086956, "grad_norm": 1.0843122005462646, "learning_rate": 0.001, "loss": 1.8387, "step": 383040 }, { "epoch": 33.04830917874396, "grad_norm": 0.6481552124023438, "learning_rate": 0.001, "loss": 1.8456, "step": 383096 }, { "epoch": 33.05314009661836, "grad_norm": 0.9348629713058472, "learning_rate": 0.001, "loss": 1.8445, "step": 383152 }, { "epoch": 33.05797101449275, "grad_norm": 0.9992024302482605, "learning_rate": 0.001, "loss": 1.8428, "step": 383208 }, { "epoch": 33.06280193236715, "grad_norm": 1.5345975160598755, "learning_rate": 0.001, "loss": 1.8411, "step": 383264 }, { "epoch": 33.067632850241544, "grad_norm": 0.6153743267059326, "learning_rate": 0.001, "loss": 1.8321, "step": 383320 }, { "epoch": 33.072463768115945, "grad_norm": 0.9540253281593323, "learning_rate": 0.001, "loss": 1.8327, "step": 383376 }, { "epoch": 33.07729468599034, "grad_norm": 5.080650806427002, "learning_rate": 0.001, "loss": 1.834, "step": 383432 }, { "epoch": 33.08212560386473, "grad_norm": 3.1469767093658447, "learning_rate": 0.001, "loss": 1.844, "step": 383488 }, { "epoch": 33.08695652173913, "grad_norm": 0.3300671875476837, "learning_rate": 0.001, "loss": 1.846, "step": 383544 }, { "epoch": 33.091787439613526, "grad_norm": 1.5894296169281006, "learning_rate": 0.001, "loss": 1.8574, "step": 383600 }, { "epoch": 33.09661835748792, "grad_norm": 0.48993924260139465, "learning_rate": 0.001, "loss": 1.8474, "step": 383656 }, { "epoch": 33.10144927536232, "grad_norm": 0.3555910587310791, "learning_rate": 0.001, "loss": 1.8472, "step": 383712 }, { "epoch": 33.106280193236714, "grad_norm": 0.7348004579544067, "learning_rate": 0.001, "loss": 1.8416, "step": 383768 }, { "epoch": 33.111111111111114, "grad_norm": 2.8869214057922363, "learning_rate": 0.001, "loss": 1.8396, "step": 383824 }, { "epoch": 33.11594202898551, "grad_norm": 0.5226358771324158, "learning_rate": 0.001, "loss": 1.8463, "step": 383880 }, { "epoch": 33.1207729468599, "grad_norm": 1.3559980392456055, "learning_rate": 0.001, "loss": 1.8454, "step": 383936 }, { "epoch": 33.1256038647343, "grad_norm": 0.36827754974365234, "learning_rate": 0.001, "loss": 1.8422, "step": 383992 }, { "epoch": 33.130434782608695, "grad_norm": 0.5226930379867554, "learning_rate": 0.001, "loss": 1.8337, "step": 384048 }, { "epoch": 33.13526570048309, "grad_norm": 0.368125855922699, "learning_rate": 0.001, "loss": 1.8544, "step": 384104 }, { "epoch": 33.14009661835749, "grad_norm": 1.0597519874572754, "learning_rate": 0.001, "loss": 1.8508, "step": 384160 }, { "epoch": 33.14492753623188, "grad_norm": 0.2916831970214844, "learning_rate": 0.001, "loss": 1.8493, "step": 384216 }, { "epoch": 33.14975845410628, "grad_norm": 0.26027148962020874, "learning_rate": 0.001, "loss": 1.8586, "step": 384272 }, { "epoch": 33.15458937198068, "grad_norm": 0.8502988815307617, "learning_rate": 0.001, "loss": 1.8633, "step": 384328 }, { "epoch": 33.15942028985507, "grad_norm": 0.5353624224662781, "learning_rate": 0.001, "loss": 1.8559, "step": 384384 }, { "epoch": 33.16425120772947, "grad_norm": 0.283352792263031, "learning_rate": 0.001, "loss": 1.8517, "step": 384440 }, { "epoch": 33.169082125603865, "grad_norm": 0.36158379912376404, "learning_rate": 0.001, "loss": 1.8507, "step": 384496 }, { "epoch": 33.17391304347826, "grad_norm": 0.5547168254852295, "learning_rate": 0.001, "loss": 1.8435, "step": 384552 }, { "epoch": 33.17874396135266, "grad_norm": 1.2818000316619873, "learning_rate": 0.001, "loss": 1.8389, "step": 384608 }, { "epoch": 33.18357487922705, "grad_norm": 0.47685927152633667, "learning_rate": 0.001, "loss": 1.8468, "step": 384664 }, { "epoch": 33.18840579710145, "grad_norm": 5.0359721183776855, "learning_rate": 0.001, "loss": 1.8485, "step": 384720 }, { "epoch": 33.193236714975846, "grad_norm": 1.5601245164871216, "learning_rate": 0.001, "loss": 1.8512, "step": 384776 }, { "epoch": 33.19806763285024, "grad_norm": 4.911972999572754, "learning_rate": 0.001, "loss": 1.8365, "step": 384832 }, { "epoch": 33.20289855072464, "grad_norm": 1.1329741477966309, "learning_rate": 0.001, "loss": 1.8412, "step": 384888 }, { "epoch": 33.207729468599034, "grad_norm": 0.36179405450820923, "learning_rate": 0.001, "loss": 1.8435, "step": 384944 }, { "epoch": 33.21256038647343, "grad_norm": 0.8295024037361145, "learning_rate": 0.001, "loss": 1.8586, "step": 385000 }, { "epoch": 33.21739130434783, "grad_norm": 0.5777642130851746, "learning_rate": 0.001, "loss": 1.8554, "step": 385056 }, { "epoch": 33.22222222222222, "grad_norm": 1.7675678730010986, "learning_rate": 0.001, "loss": 1.86, "step": 385112 }, { "epoch": 33.227053140096615, "grad_norm": 0.6530225276947021, "learning_rate": 0.001, "loss": 1.8494, "step": 385168 }, { "epoch": 33.231884057971016, "grad_norm": 1.0190540552139282, "learning_rate": 0.001, "loss": 1.8489, "step": 385224 }, { "epoch": 33.23671497584541, "grad_norm": 0.3430393636226654, "learning_rate": 0.001, "loss": 1.8357, "step": 385280 }, { "epoch": 33.24154589371981, "grad_norm": 1.2124278545379639, "learning_rate": 0.001, "loss": 1.8397, "step": 385336 }, { "epoch": 33.2463768115942, "grad_norm": 1.3630893230438232, "learning_rate": 0.001, "loss": 1.8596, "step": 385392 }, { "epoch": 33.2512077294686, "grad_norm": 0.9709978103637695, "learning_rate": 0.001, "loss": 1.8461, "step": 385448 }, { "epoch": 33.256038647343, "grad_norm": 0.5541308522224426, "learning_rate": 0.001, "loss": 1.8458, "step": 385504 }, { "epoch": 33.26086956521739, "grad_norm": 1.2440598011016846, "learning_rate": 0.001, "loss": 1.8432, "step": 385560 }, { "epoch": 33.265700483091784, "grad_norm": 0.7311402559280396, "learning_rate": 0.001, "loss": 1.8416, "step": 385616 }, { "epoch": 33.270531400966185, "grad_norm": 0.3230074346065521, "learning_rate": 0.001, "loss": 1.8336, "step": 385672 }, { "epoch": 33.27536231884058, "grad_norm": 0.3788334131240845, "learning_rate": 0.001, "loss": 1.837, "step": 385728 }, { "epoch": 33.28019323671498, "grad_norm": 0.7328329086303711, "learning_rate": 0.001, "loss": 1.8285, "step": 385784 }, { "epoch": 33.28502415458937, "grad_norm": 10.460920333862305, "learning_rate": 0.001, "loss": 1.8418, "step": 385840 }, { "epoch": 33.289855072463766, "grad_norm": 0.5932354927062988, "learning_rate": 0.001, "loss": 1.8696, "step": 385896 }, { "epoch": 33.29468599033817, "grad_norm": 0.6016402244567871, "learning_rate": 0.001, "loss": 1.8697, "step": 385952 }, { "epoch": 33.29951690821256, "grad_norm": 0.3926424980163574, "learning_rate": 0.001, "loss": 1.8633, "step": 386008 }, { "epoch": 33.30434782608695, "grad_norm": 0.37828126549720764, "learning_rate": 0.001, "loss": 1.8594, "step": 386064 }, { "epoch": 33.309178743961354, "grad_norm": 0.4842880070209503, "learning_rate": 0.001, "loss": 1.8511, "step": 386120 }, { "epoch": 33.31400966183575, "grad_norm": 0.39039677381515503, "learning_rate": 0.001, "loss": 1.8521, "step": 386176 }, { "epoch": 33.31884057971015, "grad_norm": 0.43778035044670105, "learning_rate": 0.001, "loss": 1.8476, "step": 386232 }, { "epoch": 33.32367149758454, "grad_norm": 0.3074943423271179, "learning_rate": 0.001, "loss": 1.8457, "step": 386288 }, { "epoch": 33.328502415458935, "grad_norm": 0.41545557975769043, "learning_rate": 0.001, "loss": 1.8519, "step": 386344 }, { "epoch": 33.333333333333336, "grad_norm": 0.8189210891723633, "learning_rate": 0.001, "loss": 1.8532, "step": 386400 }, { "epoch": 33.33816425120773, "grad_norm": 0.6503366827964783, "learning_rate": 0.001, "loss": 1.8533, "step": 386456 }, { "epoch": 33.34299516908212, "grad_norm": 2.9798851013183594, "learning_rate": 0.001, "loss": 1.8536, "step": 386512 }, { "epoch": 33.34782608695652, "grad_norm": 0.6166926622390747, "learning_rate": 0.001, "loss": 1.8547, "step": 386568 }, { "epoch": 33.35265700483092, "grad_norm": 0.7489621639251709, "learning_rate": 0.001, "loss": 1.849, "step": 386624 }, { "epoch": 33.35748792270532, "grad_norm": 2.761019229888916, "learning_rate": 0.001, "loss": 1.8496, "step": 386680 }, { "epoch": 33.36231884057971, "grad_norm": 0.48480159044265747, "learning_rate": 0.001, "loss": 1.8511, "step": 386736 }, { "epoch": 33.367149758454104, "grad_norm": 0.5536046028137207, "learning_rate": 0.001, "loss": 1.8557, "step": 386792 }, { "epoch": 33.371980676328505, "grad_norm": 0.3162173628807068, "learning_rate": 0.001, "loss": 1.8425, "step": 386848 }, { "epoch": 33.3768115942029, "grad_norm": 0.521343469619751, "learning_rate": 0.001, "loss": 1.8429, "step": 386904 }, { "epoch": 33.38164251207729, "grad_norm": 1.0974353551864624, "learning_rate": 0.001, "loss": 1.8329, "step": 386960 }, { "epoch": 33.38647342995169, "grad_norm": 0.44107648730278015, "learning_rate": 0.001, "loss": 1.8359, "step": 387016 }, { "epoch": 33.391304347826086, "grad_norm": 0.4756486713886261, "learning_rate": 0.001, "loss": 1.8268, "step": 387072 }, { "epoch": 33.39613526570048, "grad_norm": 0.4744235575199127, "learning_rate": 0.001, "loss": 1.826, "step": 387128 }, { "epoch": 33.40096618357488, "grad_norm": 0.7108448147773743, "learning_rate": 0.001, "loss": 1.837, "step": 387184 }, { "epoch": 33.405797101449274, "grad_norm": 0.41541534662246704, "learning_rate": 0.001, "loss": 1.8242, "step": 387240 }, { "epoch": 33.410628019323674, "grad_norm": 2.1021149158477783, "learning_rate": 0.001, "loss": 1.8292, "step": 387296 }, { "epoch": 33.41545893719807, "grad_norm": 0.43672025203704834, "learning_rate": 0.001, "loss": 1.8254, "step": 387352 }, { "epoch": 33.42028985507246, "grad_norm": 0.6818185448646545, "learning_rate": 0.001, "loss": 1.8244, "step": 387408 }, { "epoch": 33.42512077294686, "grad_norm": 0.817703366279602, "learning_rate": 0.001, "loss": 1.8266, "step": 387464 }, { "epoch": 33.429951690821255, "grad_norm": 1.0184825658798218, "learning_rate": 0.001, "loss": 1.8262, "step": 387520 }, { "epoch": 33.43478260869565, "grad_norm": 1.4650359153747559, "learning_rate": 0.001, "loss": 1.8256, "step": 387576 }, { "epoch": 33.43961352657005, "grad_norm": 0.6176877021789551, "learning_rate": 0.001, "loss": 1.8221, "step": 387632 }, { "epoch": 33.44444444444444, "grad_norm": 0.3248637318611145, "learning_rate": 0.001, "loss": 1.8275, "step": 387688 }, { "epoch": 33.44927536231884, "grad_norm": 0.4993076026439667, "learning_rate": 0.001, "loss": 1.8242, "step": 387744 }, { "epoch": 33.45410628019324, "grad_norm": 0.2622261047363281, "learning_rate": 0.001, "loss": 1.8174, "step": 387800 }, { "epoch": 33.45893719806763, "grad_norm": 1.6135770082473755, "learning_rate": 0.001, "loss": 1.8217, "step": 387856 }, { "epoch": 33.46376811594203, "grad_norm": 2.4823291301727295, "learning_rate": 0.001, "loss": 1.8295, "step": 387912 }, { "epoch": 33.468599033816425, "grad_norm": 3.4560706615448, "learning_rate": 0.001, "loss": 1.8249, "step": 387968 }, { "epoch": 33.47342995169082, "grad_norm": 0.9529895186424255, "learning_rate": 0.001, "loss": 1.8422, "step": 388024 }, { "epoch": 33.47826086956522, "grad_norm": 1.3982553482055664, "learning_rate": 0.001, "loss": 1.845, "step": 388080 }, { "epoch": 33.48309178743961, "grad_norm": 1.281186580657959, "learning_rate": 0.001, "loss": 1.8513, "step": 388136 }, { "epoch": 33.48792270531401, "grad_norm": 2.2583999633789062, "learning_rate": 0.001, "loss": 1.8548, "step": 388192 }, { "epoch": 33.492753623188406, "grad_norm": 16.483736038208008, "learning_rate": 0.001, "loss": 1.8548, "step": 388248 }, { "epoch": 33.4975845410628, "grad_norm": 0.8461055755615234, "learning_rate": 0.001, "loss": 1.8371, "step": 388304 }, { "epoch": 33.5024154589372, "grad_norm": 1.8284337520599365, "learning_rate": 0.001, "loss": 1.8384, "step": 388360 }, { "epoch": 33.507246376811594, "grad_norm": 0.5075252056121826, "learning_rate": 0.001, "loss": 1.8509, "step": 388416 }, { "epoch": 33.51207729468599, "grad_norm": 0.5701984167098999, "learning_rate": 0.001, "loss": 1.8384, "step": 388472 }, { "epoch": 33.51690821256039, "grad_norm": 13.03590202331543, "learning_rate": 0.001, "loss": 1.8395, "step": 388528 }, { "epoch": 33.52173913043478, "grad_norm": 0.4395972192287445, "learning_rate": 0.001, "loss": 1.8389, "step": 388584 }, { "epoch": 33.52657004830918, "grad_norm": 0.3167584538459778, "learning_rate": 0.001, "loss": 1.8375, "step": 388640 }, { "epoch": 33.531400966183575, "grad_norm": 3.5270092487335205, "learning_rate": 0.001, "loss": 1.8303, "step": 388696 }, { "epoch": 33.53623188405797, "grad_norm": 0.26903006434440613, "learning_rate": 0.001, "loss": 1.8414, "step": 388752 }, { "epoch": 33.54106280193237, "grad_norm": 0.4363032877445221, "learning_rate": 0.001, "loss": 1.8334, "step": 388808 }, { "epoch": 33.54589371980676, "grad_norm": 0.3003602623939514, "learning_rate": 0.001, "loss": 1.8364, "step": 388864 }, { "epoch": 33.55072463768116, "grad_norm": 0.2979651689529419, "learning_rate": 0.001, "loss": 1.833, "step": 388920 }, { "epoch": 33.55555555555556, "grad_norm": 0.5554699301719666, "learning_rate": 0.001, "loss": 1.8278, "step": 388976 }, { "epoch": 33.56038647342995, "grad_norm": 0.7324069142341614, "learning_rate": 0.001, "loss": 1.8317, "step": 389032 }, { "epoch": 33.56521739130435, "grad_norm": 0.8645010590553284, "learning_rate": 0.001, "loss": 1.8293, "step": 389088 }, { "epoch": 33.570048309178745, "grad_norm": 0.6930968761444092, "learning_rate": 0.001, "loss": 1.8384, "step": 389144 }, { "epoch": 33.57487922705314, "grad_norm": 0.2999052405357361, "learning_rate": 0.001, "loss": 1.8371, "step": 389200 }, { "epoch": 33.57971014492754, "grad_norm": 2.6434919834136963, "learning_rate": 0.001, "loss": 1.8307, "step": 389256 }, { "epoch": 33.58454106280193, "grad_norm": 26.93202781677246, "learning_rate": 0.001, "loss": 1.8304, "step": 389312 }, { "epoch": 33.589371980676326, "grad_norm": 0.6684861779212952, "learning_rate": 0.001, "loss": 1.8345, "step": 389368 }, { "epoch": 33.594202898550726, "grad_norm": 0.2618001699447632, "learning_rate": 0.001, "loss": 1.8281, "step": 389424 }, { "epoch": 33.59903381642512, "grad_norm": 0.4034615755081177, "learning_rate": 0.001, "loss": 1.8311, "step": 389480 }, { "epoch": 33.60386473429952, "grad_norm": 0.3703325092792511, "learning_rate": 0.001, "loss": 1.8252, "step": 389536 }, { "epoch": 33.608695652173914, "grad_norm": 4.496723175048828, "learning_rate": 0.001, "loss": 1.8259, "step": 389592 }, { "epoch": 33.61352657004831, "grad_norm": 0.3593529462814331, "learning_rate": 0.001, "loss": 1.8205, "step": 389648 }, { "epoch": 33.61835748792271, "grad_norm": 6.112456798553467, "learning_rate": 0.001, "loss": 1.8275, "step": 389704 }, { "epoch": 33.6231884057971, "grad_norm": 0.9142809510231018, "learning_rate": 0.001, "loss": 1.8385, "step": 389760 }, { "epoch": 33.628019323671495, "grad_norm": 2.3053598403930664, "learning_rate": 0.001, "loss": 1.8344, "step": 389816 }, { "epoch": 33.632850241545896, "grad_norm": 7.077793121337891, "learning_rate": 0.001, "loss": 1.8371, "step": 389872 }, { "epoch": 33.63768115942029, "grad_norm": 1.9662487506866455, "learning_rate": 0.001, "loss": 1.8369, "step": 389928 }, { "epoch": 33.64251207729468, "grad_norm": 0.33349180221557617, "learning_rate": 0.001, "loss": 1.8291, "step": 389984 }, { "epoch": 33.64734299516908, "grad_norm": 1.2231402397155762, "learning_rate": 0.001, "loss": 1.8357, "step": 390040 }, { "epoch": 33.65217391304348, "grad_norm": 1.894821286201477, "learning_rate": 0.001, "loss": 1.823, "step": 390096 }, { "epoch": 33.65700483091788, "grad_norm": 0.4181155264377594, "learning_rate": 0.001, "loss": 1.8257, "step": 390152 }, { "epoch": 33.66183574879227, "grad_norm": 0.4621347486972809, "learning_rate": 0.001, "loss": 1.8309, "step": 390208 }, { "epoch": 33.666666666666664, "grad_norm": 1.0278148651123047, "learning_rate": 0.001, "loss": 1.841, "step": 390264 }, { "epoch": 33.671497584541065, "grad_norm": 1.5917452573776245, "learning_rate": 0.001, "loss": 1.8482, "step": 390320 }, { "epoch": 33.67632850241546, "grad_norm": 0.8521987199783325, "learning_rate": 0.001, "loss": 1.85, "step": 390376 }, { "epoch": 33.68115942028985, "grad_norm": 5.376307964324951, "learning_rate": 0.001, "loss": 1.8504, "step": 390432 }, { "epoch": 33.68599033816425, "grad_norm": 0.4661087989807129, "learning_rate": 0.001, "loss": 1.8533, "step": 390488 }, { "epoch": 33.690821256038646, "grad_norm": 1.0833059549331665, "learning_rate": 0.001, "loss": 1.8512, "step": 390544 }, { "epoch": 33.69565217391305, "grad_norm": 0.5495493412017822, "learning_rate": 0.001, "loss": 1.8623, "step": 390600 }, { "epoch": 33.70048309178744, "grad_norm": 1.2597626447677612, "learning_rate": 0.001, "loss": 1.8714, "step": 390656 }, { "epoch": 33.70531400966183, "grad_norm": 0.63435298204422, "learning_rate": 0.001, "loss": 1.8547, "step": 390712 }, { "epoch": 33.710144927536234, "grad_norm": 0.4148454964160919, "learning_rate": 0.001, "loss": 1.8598, "step": 390768 }, { "epoch": 33.71497584541063, "grad_norm": 0.49057793617248535, "learning_rate": 0.001, "loss": 1.8586, "step": 390824 }, { "epoch": 33.71980676328502, "grad_norm": 0.6912051439285278, "learning_rate": 0.001, "loss": 1.8632, "step": 390880 }, { "epoch": 33.72463768115942, "grad_norm": 1.2648190259933472, "learning_rate": 0.001, "loss": 1.8678, "step": 390936 }, { "epoch": 33.729468599033815, "grad_norm": 0.5524211525917053, "learning_rate": 0.001, "loss": 1.8619, "step": 390992 }, { "epoch": 33.734299516908216, "grad_norm": 4.158597946166992, "learning_rate": 0.001, "loss": 1.8532, "step": 391048 }, { "epoch": 33.73913043478261, "grad_norm": 0.39962416887283325, "learning_rate": 0.001, "loss": 1.8541, "step": 391104 }, { "epoch": 33.743961352657, "grad_norm": 0.5502328872680664, "learning_rate": 0.001, "loss": 1.8467, "step": 391160 }, { "epoch": 33.7487922705314, "grad_norm": 0.7458182573318481, "learning_rate": 0.001, "loss": 1.8445, "step": 391216 }, { "epoch": 33.7536231884058, "grad_norm": 1.28138267993927, "learning_rate": 0.001, "loss": 1.8589, "step": 391272 }, { "epoch": 33.75845410628019, "grad_norm": 4.219057559967041, "learning_rate": 0.001, "loss": 1.8695, "step": 391328 }, { "epoch": 33.76328502415459, "grad_norm": 1.0130966901779175, "learning_rate": 0.001, "loss": 1.8705, "step": 391384 }, { "epoch": 33.768115942028984, "grad_norm": 2.883869171142578, "learning_rate": 0.001, "loss": 1.8792, "step": 391440 }, { "epoch": 33.772946859903385, "grad_norm": 1.156396746635437, "learning_rate": 0.001, "loss": 1.8757, "step": 391496 }, { "epoch": 33.77777777777778, "grad_norm": 1.3203887939453125, "learning_rate": 0.001, "loss": 1.8908, "step": 391552 }, { "epoch": 33.78260869565217, "grad_norm": 0.41138896346092224, "learning_rate": 0.001, "loss": 1.8755, "step": 391608 }, { "epoch": 33.78743961352657, "grad_norm": 0.726181149482727, "learning_rate": 0.001, "loss": 1.8689, "step": 391664 }, { "epoch": 33.792270531400966, "grad_norm": 1.0437425374984741, "learning_rate": 0.001, "loss": 1.8625, "step": 391720 }, { "epoch": 33.79710144927536, "grad_norm": 5.763835906982422, "learning_rate": 0.001, "loss": 1.8569, "step": 391776 }, { "epoch": 33.80193236714976, "grad_norm": 1.9013830423355103, "learning_rate": 0.001, "loss": 1.8527, "step": 391832 }, { "epoch": 33.806763285024154, "grad_norm": 2.055738925933838, "learning_rate": 0.001, "loss": 1.8427, "step": 391888 }, { "epoch": 33.81159420289855, "grad_norm": 0.5994840264320374, "learning_rate": 0.001, "loss": 1.8526, "step": 391944 }, { "epoch": 33.81642512077295, "grad_norm": 0.6166769862174988, "learning_rate": 0.001, "loss": 1.8474, "step": 392000 }, { "epoch": 33.82125603864734, "grad_norm": 0.9623236060142517, "learning_rate": 0.001, "loss": 1.841, "step": 392056 }, { "epoch": 33.82608695652174, "grad_norm": 0.48830509185791016, "learning_rate": 0.001, "loss": 1.8457, "step": 392112 }, { "epoch": 33.830917874396135, "grad_norm": 0.34471234679222107, "learning_rate": 0.001, "loss": 1.8409, "step": 392168 }, { "epoch": 33.83574879227053, "grad_norm": 0.6210710406303406, "learning_rate": 0.001, "loss": 1.8361, "step": 392224 }, { "epoch": 33.84057971014493, "grad_norm": 0.3446214199066162, "learning_rate": 0.001, "loss": 1.8329, "step": 392280 }, { "epoch": 33.84541062801932, "grad_norm": 1.2080656290054321, "learning_rate": 0.001, "loss": 1.8382, "step": 392336 }, { "epoch": 33.85024154589372, "grad_norm": 1.0600780248641968, "learning_rate": 0.001, "loss": 1.8455, "step": 392392 }, { "epoch": 33.85507246376812, "grad_norm": 21.053316116333008, "learning_rate": 0.001, "loss": 1.8436, "step": 392448 }, { "epoch": 33.85990338164251, "grad_norm": 0.8896178603172302, "learning_rate": 0.001, "loss": 1.8513, "step": 392504 }, { "epoch": 33.86473429951691, "grad_norm": 2.5448110103607178, "learning_rate": 0.001, "loss": 1.8424, "step": 392560 }, { "epoch": 33.869565217391305, "grad_norm": 0.6181594133377075, "learning_rate": 0.001, "loss": 1.8401, "step": 392616 }, { "epoch": 33.8743961352657, "grad_norm": 3.0872538089752197, "learning_rate": 0.001, "loss": 1.8452, "step": 392672 }, { "epoch": 33.8792270531401, "grad_norm": 0.7827709317207336, "learning_rate": 0.001, "loss": 1.8455, "step": 392728 }, { "epoch": 33.88405797101449, "grad_norm": 1.869963526725769, "learning_rate": 0.001, "loss": 1.8506, "step": 392784 }, { "epoch": 33.888888888888886, "grad_norm": 1.416790246963501, "learning_rate": 0.001, "loss": 1.8524, "step": 392840 }, { "epoch": 33.893719806763286, "grad_norm": 0.6115961074829102, "learning_rate": 0.001, "loss": 1.8407, "step": 392896 }, { "epoch": 33.89855072463768, "grad_norm": 7.0171332359313965, "learning_rate": 0.001, "loss": 1.8338, "step": 392952 }, { "epoch": 33.90338164251208, "grad_norm": 0.6242427229881287, "learning_rate": 0.001, "loss": 1.8485, "step": 393008 }, { "epoch": 33.908212560386474, "grad_norm": 0.42547935247421265, "learning_rate": 0.001, "loss": 1.8516, "step": 393064 }, { "epoch": 33.91304347826087, "grad_norm": 3.104501485824585, "learning_rate": 0.001, "loss": 1.8459, "step": 393120 }, { "epoch": 33.91787439613527, "grad_norm": 0.690721333026886, "learning_rate": 0.001, "loss": 1.8466, "step": 393176 }, { "epoch": 33.92270531400966, "grad_norm": 0.41456112265586853, "learning_rate": 0.001, "loss": 1.8448, "step": 393232 }, { "epoch": 33.927536231884055, "grad_norm": 0.5367662310600281, "learning_rate": 0.001, "loss": 1.8335, "step": 393288 }, { "epoch": 33.932367149758456, "grad_norm": 0.920475423336029, "learning_rate": 0.001, "loss": 1.833, "step": 393344 }, { "epoch": 33.93719806763285, "grad_norm": 0.7340195775032043, "learning_rate": 0.001, "loss": 1.8345, "step": 393400 }, { "epoch": 33.94202898550725, "grad_norm": 0.7892662286758423, "learning_rate": 0.001, "loss": 1.8424, "step": 393456 }, { "epoch": 33.94685990338164, "grad_norm": 0.7239735722541809, "learning_rate": 0.001, "loss": 1.8456, "step": 393512 }, { "epoch": 33.95169082125604, "grad_norm": 0.3607323467731476, "learning_rate": 0.001, "loss": 1.8513, "step": 393568 }, { "epoch": 33.95652173913044, "grad_norm": 2.1029012203216553, "learning_rate": 0.001, "loss": 1.8473, "step": 393624 }, { "epoch": 33.96135265700483, "grad_norm": 1.2454121112823486, "learning_rate": 0.001, "loss": 1.8651, "step": 393680 }, { "epoch": 33.966183574879224, "grad_norm": 0.42451608180999756, "learning_rate": 0.001, "loss": 1.8572, "step": 393736 }, { "epoch": 33.971014492753625, "grad_norm": 1.7361409664154053, "learning_rate": 0.001, "loss": 1.8419, "step": 393792 }, { "epoch": 33.97584541062802, "grad_norm": 0.506693422794342, "learning_rate": 0.001, "loss": 1.8411, "step": 393848 }, { "epoch": 33.98067632850242, "grad_norm": 1.842025876045227, "learning_rate": 0.001, "loss": 1.8599, "step": 393904 }, { "epoch": 33.98550724637681, "grad_norm": 1.9898440837860107, "learning_rate": 0.001, "loss": 1.8546, "step": 393960 }, { "epoch": 33.990338164251206, "grad_norm": 0.5598286986351013, "learning_rate": 0.001, "loss": 1.8541, "step": 394016 }, { "epoch": 33.99516908212561, "grad_norm": 0.3708111047744751, "learning_rate": 0.001, "loss": 1.8439, "step": 394072 }, { "epoch": 34.0, "grad_norm": 0.3796915113925934, "learning_rate": 0.001, "loss": 1.8486, "step": 394128 }, { "epoch": 34.00483091787439, "grad_norm": 0.5497387647628784, "learning_rate": 0.001, "loss": 1.8274, "step": 394184 }, { "epoch": 34.009661835748794, "grad_norm": 1.4706823825836182, "learning_rate": 0.001, "loss": 1.844, "step": 394240 }, { "epoch": 34.01449275362319, "grad_norm": 3.940784215927124, "learning_rate": 0.001, "loss": 1.8437, "step": 394296 }, { "epoch": 34.01932367149758, "grad_norm": 1.4999724626541138, "learning_rate": 0.001, "loss": 1.849, "step": 394352 }, { "epoch": 34.02415458937198, "grad_norm": 1.0261094570159912, "learning_rate": 0.001, "loss": 1.8548, "step": 394408 }, { "epoch": 34.028985507246375, "grad_norm": 3.463257074356079, "learning_rate": 0.001, "loss": 1.8316, "step": 394464 }, { "epoch": 34.033816425120776, "grad_norm": 14.41978645324707, "learning_rate": 0.001, "loss": 1.8307, "step": 394520 }, { "epoch": 34.03864734299517, "grad_norm": 6.772956848144531, "learning_rate": 0.001, "loss": 1.8312, "step": 394576 }, { "epoch": 34.04347826086956, "grad_norm": 1.7045167684555054, "learning_rate": 0.001, "loss": 1.8284, "step": 394632 }, { "epoch": 34.04830917874396, "grad_norm": 0.8915327191352844, "learning_rate": 0.001, "loss": 1.828, "step": 394688 }, { "epoch": 34.05314009661836, "grad_norm": 1.1439093351364136, "learning_rate": 0.001, "loss": 1.822, "step": 394744 }, { "epoch": 34.05797101449275, "grad_norm": 0.9980172514915466, "learning_rate": 0.001, "loss": 1.8313, "step": 394800 }, { "epoch": 34.06280193236715, "grad_norm": 2.4244210720062256, "learning_rate": 0.001, "loss": 1.8144, "step": 394856 }, { "epoch": 34.067632850241544, "grad_norm": 0.42881378531455994, "learning_rate": 0.001, "loss": 1.8195, "step": 394912 }, { "epoch": 34.072463768115945, "grad_norm": 29.04627227783203, "learning_rate": 0.001, "loss": 1.8217, "step": 394968 }, { "epoch": 34.07729468599034, "grad_norm": 2.9468963146209717, "learning_rate": 0.001, "loss": 1.8266, "step": 395024 }, { "epoch": 34.08212560386473, "grad_norm": 1.6556223630905151, "learning_rate": 0.001, "loss": 1.8215, "step": 395080 }, { "epoch": 34.08695652173913, "grad_norm": 0.25938257575035095, "learning_rate": 0.001, "loss": 1.8318, "step": 395136 }, { "epoch": 34.091787439613526, "grad_norm": 0.9088306427001953, "learning_rate": 0.001, "loss": 1.8188, "step": 395192 }, { "epoch": 34.09661835748792, "grad_norm": 4.958169937133789, "learning_rate": 0.001, "loss": 1.8259, "step": 395248 }, { "epoch": 34.10144927536232, "grad_norm": 3.492943525314331, "learning_rate": 0.001, "loss": 1.8239, "step": 395304 }, { "epoch": 34.106280193236714, "grad_norm": 0.99062579870224, "learning_rate": 0.001, "loss": 1.8314, "step": 395360 }, { "epoch": 34.111111111111114, "grad_norm": 1.066786766052246, "learning_rate": 0.001, "loss": 1.8308, "step": 395416 }, { "epoch": 34.11594202898551, "grad_norm": 0.7450012564659119, "learning_rate": 0.001, "loss": 1.8437, "step": 395472 }, { "epoch": 34.1207729468599, "grad_norm": 0.660764217376709, "learning_rate": 0.001, "loss": 1.8198, "step": 395528 }, { "epoch": 34.1256038647343, "grad_norm": 0.36358848214149475, "learning_rate": 0.001, "loss": 1.827, "step": 395584 }, { "epoch": 34.130434782608695, "grad_norm": 2.0411746501922607, "learning_rate": 0.001, "loss": 1.8452, "step": 395640 }, { "epoch": 34.13526570048309, "grad_norm": 1.1847214698791504, "learning_rate": 0.001, "loss": 1.8309, "step": 395696 }, { "epoch": 34.14009661835749, "grad_norm": 0.8884884715080261, "learning_rate": 0.001, "loss": 1.8361, "step": 395752 }, { "epoch": 34.14492753623188, "grad_norm": 0.6034506559371948, "learning_rate": 0.001, "loss": 1.8266, "step": 395808 }, { "epoch": 34.14975845410628, "grad_norm": 0.4040265679359436, "learning_rate": 0.001, "loss": 1.826, "step": 395864 }, { "epoch": 34.15458937198068, "grad_norm": 2.6042895317077637, "learning_rate": 0.001, "loss": 1.8299, "step": 395920 }, { "epoch": 34.15942028985507, "grad_norm": 2.66233229637146, "learning_rate": 0.001, "loss": 1.8172, "step": 395976 }, { "epoch": 34.16425120772947, "grad_norm": 0.8009576201438904, "learning_rate": 0.001, "loss": 1.8126, "step": 396032 }, { "epoch": 34.169082125603865, "grad_norm": 0.8031923770904541, "learning_rate": 0.001, "loss": 1.8174, "step": 396088 }, { "epoch": 34.17391304347826, "grad_norm": 1.4431639909744263, "learning_rate": 0.001, "loss": 1.8202, "step": 396144 }, { "epoch": 34.17874396135266, "grad_norm": 1.0435584783554077, "learning_rate": 0.001, "loss": 1.82, "step": 396200 }, { "epoch": 34.18357487922705, "grad_norm": 5.20131254196167, "learning_rate": 0.001, "loss": 1.8209, "step": 396256 }, { "epoch": 34.18840579710145, "grad_norm": 0.9241687655448914, "learning_rate": 0.001, "loss": 1.8363, "step": 396312 }, { "epoch": 34.193236714975846, "grad_norm": 0.6321746706962585, "learning_rate": 0.001, "loss": 1.8477, "step": 396368 }, { "epoch": 34.19806763285024, "grad_norm": 0.37644436955451965, "learning_rate": 0.001, "loss": 1.8311, "step": 396424 }, { "epoch": 34.20289855072464, "grad_norm": 1.5917465686798096, "learning_rate": 0.001, "loss": 1.8359, "step": 396480 }, { "epoch": 34.207729468599034, "grad_norm": 0.40337133407592773, "learning_rate": 0.001, "loss": 1.8345, "step": 396536 }, { "epoch": 34.21256038647343, "grad_norm": 0.6037850975990295, "learning_rate": 0.001, "loss": 1.822, "step": 396592 }, { "epoch": 34.21739130434783, "grad_norm": 0.4083627164363861, "learning_rate": 0.001, "loss": 1.8325, "step": 396648 }, { "epoch": 34.22222222222222, "grad_norm": 0.30457040667533875, "learning_rate": 0.001, "loss": 1.838, "step": 396704 }, { "epoch": 34.227053140096615, "grad_norm": 8.525131225585938, "learning_rate": 0.001, "loss": 1.8236, "step": 396760 }, { "epoch": 34.231884057971016, "grad_norm": 0.41897645592689514, "learning_rate": 0.001, "loss": 1.8209, "step": 396816 }, { "epoch": 34.23671497584541, "grad_norm": 0.8614577651023865, "learning_rate": 0.001, "loss": 1.8211, "step": 396872 }, { "epoch": 34.24154589371981, "grad_norm": 0.4343147873878479, "learning_rate": 0.001, "loss": 1.8334, "step": 396928 }, { "epoch": 34.2463768115942, "grad_norm": 1.933855414390564, "learning_rate": 0.001, "loss": 1.8225, "step": 396984 }, { "epoch": 34.2512077294686, "grad_norm": 0.4030202329158783, "learning_rate": 0.001, "loss": 1.8275, "step": 397040 }, { "epoch": 34.256038647343, "grad_norm": 0.3022425174713135, "learning_rate": 0.001, "loss": 1.8216, "step": 397096 }, { "epoch": 34.26086956521739, "grad_norm": 3.4483048915863037, "learning_rate": 0.001, "loss": 1.812, "step": 397152 }, { "epoch": 34.265700483091784, "grad_norm": 2.0142550468444824, "learning_rate": 0.001, "loss": 1.8161, "step": 397208 }, { "epoch": 34.270531400966185, "grad_norm": 4.08930778503418, "learning_rate": 0.001, "loss": 1.8331, "step": 397264 }, { "epoch": 34.27536231884058, "grad_norm": 5.345297813415527, "learning_rate": 0.001, "loss": 1.8406, "step": 397320 }, { "epoch": 34.28019323671498, "grad_norm": 1.770892858505249, "learning_rate": 0.001, "loss": 1.8314, "step": 397376 }, { "epoch": 34.28502415458937, "grad_norm": 0.4917718768119812, "learning_rate": 0.001, "loss": 1.8392, "step": 397432 }, { "epoch": 34.289855072463766, "grad_norm": 7.410118103027344, "learning_rate": 0.001, "loss": 1.8363, "step": 397488 }, { "epoch": 34.29468599033817, "grad_norm": 2.2117772102355957, "learning_rate": 0.001, "loss": 1.8418, "step": 397544 }, { "epoch": 34.29951690821256, "grad_norm": 0.564249575138092, "learning_rate": 0.001, "loss": 1.822, "step": 397600 }, { "epoch": 34.30434782608695, "grad_norm": 0.3040977418422699, "learning_rate": 0.001, "loss": 1.8229, "step": 397656 }, { "epoch": 34.309178743961354, "grad_norm": 1.8393222093582153, "learning_rate": 0.001, "loss": 1.82, "step": 397712 }, { "epoch": 34.31400966183575, "grad_norm": 0.8427688479423523, "learning_rate": 0.001, "loss": 1.8358, "step": 397768 }, { "epoch": 34.31884057971015, "grad_norm": 0.3609927296638489, "learning_rate": 0.001, "loss": 1.8482, "step": 397824 }, { "epoch": 34.32367149758454, "grad_norm": 0.47962257266044617, "learning_rate": 0.001, "loss": 1.8468, "step": 397880 }, { "epoch": 34.328502415458935, "grad_norm": 0.5269871950149536, "learning_rate": 0.001, "loss": 1.8397, "step": 397936 }, { "epoch": 34.333333333333336, "grad_norm": 4.408270359039307, "learning_rate": 0.001, "loss": 1.8402, "step": 397992 }, { "epoch": 34.33816425120773, "grad_norm": 1.089072823524475, "learning_rate": 0.001, "loss": 1.8259, "step": 398048 }, { "epoch": 34.34299516908212, "grad_norm": 4.2065606117248535, "learning_rate": 0.001, "loss": 1.8233, "step": 398104 }, { "epoch": 34.34782608695652, "grad_norm": 0.6628605723381042, "learning_rate": 0.001, "loss": 1.8328, "step": 398160 }, { "epoch": 34.35265700483092, "grad_norm": 1.0737597942352295, "learning_rate": 0.001, "loss": 1.8397, "step": 398216 }, { "epoch": 34.35748792270532, "grad_norm": 0.6845812797546387, "learning_rate": 0.001, "loss": 1.8359, "step": 398272 }, { "epoch": 34.36231884057971, "grad_norm": 0.5066124200820923, "learning_rate": 0.001, "loss": 1.823, "step": 398328 }, { "epoch": 34.367149758454104, "grad_norm": 1.1807925701141357, "learning_rate": 0.001, "loss": 1.8236, "step": 398384 }, { "epoch": 34.371980676328505, "grad_norm": 0.2666614055633545, "learning_rate": 0.001, "loss": 1.8318, "step": 398440 }, { "epoch": 34.3768115942029, "grad_norm": 0.846113920211792, "learning_rate": 0.001, "loss": 1.828, "step": 398496 }, { "epoch": 34.38164251207729, "grad_norm": 0.3131345510482788, "learning_rate": 0.001, "loss": 1.8238, "step": 398552 }, { "epoch": 34.38647342995169, "grad_norm": 0.6297203302383423, "learning_rate": 0.001, "loss": 1.8172, "step": 398608 }, { "epoch": 34.391304347826086, "grad_norm": 0.431806743144989, "learning_rate": 0.001, "loss": 1.8238, "step": 398664 }, { "epoch": 34.39613526570048, "grad_norm": 2.288335084915161, "learning_rate": 0.001, "loss": 1.8193, "step": 398720 }, { "epoch": 34.40096618357488, "grad_norm": 0.44937458634376526, "learning_rate": 0.001, "loss": 1.8313, "step": 398776 }, { "epoch": 34.405797101449274, "grad_norm": 0.9151427149772644, "learning_rate": 0.001, "loss": 1.8336, "step": 398832 }, { "epoch": 34.410628019323674, "grad_norm": 0.29244372248649597, "learning_rate": 0.001, "loss": 1.8295, "step": 398888 }, { "epoch": 34.41545893719807, "grad_norm": 3.320540428161621, "learning_rate": 0.001, "loss": 1.8253, "step": 398944 }, { "epoch": 34.42028985507246, "grad_norm": 0.4552459418773651, "learning_rate": 0.001, "loss": 1.827, "step": 399000 }, { "epoch": 34.42512077294686, "grad_norm": 0.31681784987449646, "learning_rate": 0.001, "loss": 1.8244, "step": 399056 }, { "epoch": 34.429951690821255, "grad_norm": 4.7241530418396, "learning_rate": 0.001, "loss": 1.8265, "step": 399112 }, { "epoch": 34.43478260869565, "grad_norm": 0.6703901290893555, "learning_rate": 0.001, "loss": 1.8242, "step": 399168 }, { "epoch": 34.43961352657005, "grad_norm": 1.2681350708007812, "learning_rate": 0.001, "loss": 1.8239, "step": 399224 }, { "epoch": 34.44444444444444, "grad_norm": 0.5211673974990845, "learning_rate": 0.001, "loss": 1.8277, "step": 399280 }, { "epoch": 34.44927536231884, "grad_norm": 0.5410476922988892, "learning_rate": 0.001, "loss": 1.8207, "step": 399336 }, { "epoch": 34.45410628019324, "grad_norm": 0.5327828526496887, "learning_rate": 0.001, "loss": 1.8147, "step": 399392 }, { "epoch": 34.45893719806763, "grad_norm": 0.6720408797264099, "learning_rate": 0.001, "loss": 1.8217, "step": 399448 }, { "epoch": 34.46376811594203, "grad_norm": 3.904264450073242, "learning_rate": 0.001, "loss": 1.8193, "step": 399504 }, { "epoch": 34.468599033816425, "grad_norm": 0.4814980924129486, "learning_rate": 0.001, "loss": 1.8255, "step": 399560 }, { "epoch": 34.47342995169082, "grad_norm": 0.5083090662956238, "learning_rate": 0.001, "loss": 1.8257, "step": 399616 }, { "epoch": 34.47826086956522, "grad_norm": 0.7455845475196838, "learning_rate": 0.001, "loss": 1.8236, "step": 399672 }, { "epoch": 34.48309178743961, "grad_norm": 1.604215145111084, "learning_rate": 0.001, "loss": 1.8228, "step": 399728 }, { "epoch": 34.48792270531401, "grad_norm": 0.41948601603507996, "learning_rate": 0.001, "loss": 1.8374, "step": 399784 }, { "epoch": 34.492753623188406, "grad_norm": 2.4225456714630127, "learning_rate": 0.001, "loss": 1.8426, "step": 399840 }, { "epoch": 34.4975845410628, "grad_norm": 0.3565793037414551, "learning_rate": 0.001, "loss": 1.8534, "step": 399896 }, { "epoch": 34.5024154589372, "grad_norm": 0.9869177937507629, "learning_rate": 0.001, "loss": 1.8407, "step": 399952 }, { "epoch": 34.507246376811594, "grad_norm": 1.5822570323944092, "learning_rate": 0.001, "loss": 1.8286, "step": 400008 }, { "epoch": 34.51207729468599, "grad_norm": 2.6658272743225098, "learning_rate": 0.001, "loss": 1.8381, "step": 400064 }, { "epoch": 34.51690821256039, "grad_norm": 8.518289566040039, "learning_rate": 0.001, "loss": 1.8413, "step": 400120 }, { "epoch": 34.52173913043478, "grad_norm": 4.847125053405762, "learning_rate": 0.001, "loss": 1.8369, "step": 400176 }, { "epoch": 34.52657004830918, "grad_norm": 0.38794419169425964, "learning_rate": 0.001, "loss": 1.8374, "step": 400232 }, { "epoch": 34.531400966183575, "grad_norm": 0.48767393827438354, "learning_rate": 0.001, "loss": 1.8364, "step": 400288 }, { "epoch": 34.53623188405797, "grad_norm": 0.4197387099266052, "learning_rate": 0.001, "loss": 1.8362, "step": 400344 }, { "epoch": 34.54106280193237, "grad_norm": 0.6299371719360352, "learning_rate": 0.001, "loss": 1.8275, "step": 400400 }, { "epoch": 34.54589371980676, "grad_norm": 1.0691109895706177, "learning_rate": 0.001, "loss": 1.8365, "step": 400456 }, { "epoch": 34.55072463768116, "grad_norm": 0.3436374068260193, "learning_rate": 0.001, "loss": 1.8301, "step": 400512 }, { "epoch": 34.55555555555556, "grad_norm": 0.3776346445083618, "learning_rate": 0.001, "loss": 1.8336, "step": 400568 }, { "epoch": 34.56038647342995, "grad_norm": 0.5395188927650452, "learning_rate": 0.001, "loss": 1.8242, "step": 400624 }, { "epoch": 34.56521739130435, "grad_norm": 0.4652628004550934, "learning_rate": 0.001, "loss": 1.8201, "step": 400680 }, { "epoch": 34.570048309178745, "grad_norm": 0.39203494787216187, "learning_rate": 0.001, "loss": 1.8187, "step": 400736 }, { "epoch": 34.57487922705314, "grad_norm": 0.46616658568382263, "learning_rate": 0.001, "loss": 1.8189, "step": 400792 }, { "epoch": 34.57971014492754, "grad_norm": 0.37062743306159973, "learning_rate": 0.001, "loss": 1.816, "step": 400848 }, { "epoch": 34.58454106280193, "grad_norm": 0.38949325680732727, "learning_rate": 0.001, "loss": 1.817, "step": 400904 }, { "epoch": 34.589371980676326, "grad_norm": 1.586822509765625, "learning_rate": 0.001, "loss": 1.8235, "step": 400960 }, { "epoch": 34.594202898550726, "grad_norm": 3.8605692386627197, "learning_rate": 0.001, "loss": 1.8201, "step": 401016 }, { "epoch": 34.59903381642512, "grad_norm": 0.6867456436157227, "learning_rate": 0.001, "loss": 1.8332, "step": 401072 }, { "epoch": 34.60386473429952, "grad_norm": 0.4530758559703827, "learning_rate": 0.001, "loss": 1.8377, "step": 401128 }, { "epoch": 34.608695652173914, "grad_norm": 1.516852855682373, "learning_rate": 0.001, "loss": 1.8313, "step": 401184 }, { "epoch": 34.61352657004831, "grad_norm": 0.44664517045021057, "learning_rate": 0.001, "loss": 1.8469, "step": 401240 }, { "epoch": 34.61835748792271, "grad_norm": 0.8147264719009399, "learning_rate": 0.001, "loss": 1.8451, "step": 401296 }, { "epoch": 34.6231884057971, "grad_norm": 1.0992869138717651, "learning_rate": 0.001, "loss": 1.8379, "step": 401352 }, { "epoch": 34.628019323671495, "grad_norm": 0.43195173144340515, "learning_rate": 0.001, "loss": 1.8423, "step": 401408 }, { "epoch": 34.632850241545896, "grad_norm": 0.3801847994327545, "learning_rate": 0.001, "loss": 1.8362, "step": 401464 }, { "epoch": 34.63768115942029, "grad_norm": 0.40044158697128296, "learning_rate": 0.001, "loss": 1.8307, "step": 401520 }, { "epoch": 34.64251207729468, "grad_norm": 0.27206578850746155, "learning_rate": 0.001, "loss": 1.8228, "step": 401576 }, { "epoch": 34.64734299516908, "grad_norm": 0.4572310745716095, "learning_rate": 0.001, "loss": 1.8287, "step": 401632 }, { "epoch": 34.65217391304348, "grad_norm": 0.5777914524078369, "learning_rate": 0.001, "loss": 1.8295, "step": 401688 }, { "epoch": 34.65700483091788, "grad_norm": 1.7568005323410034, "learning_rate": 0.001, "loss": 1.8305, "step": 401744 }, { "epoch": 34.66183574879227, "grad_norm": 0.4866204559803009, "learning_rate": 0.001, "loss": 1.8435, "step": 401800 }, { "epoch": 34.666666666666664, "grad_norm": 1.0231761932373047, "learning_rate": 0.001, "loss": 1.8542, "step": 401856 }, { "epoch": 34.671497584541065, "grad_norm": 0.41573628783226013, "learning_rate": 0.001, "loss": 1.8518, "step": 401912 }, { "epoch": 34.67632850241546, "grad_norm": 0.4632425010204315, "learning_rate": 0.001, "loss": 1.8495, "step": 401968 }, { "epoch": 34.68115942028985, "grad_norm": 0.4385789632797241, "learning_rate": 0.001, "loss": 1.8751, "step": 402024 }, { "epoch": 34.68599033816425, "grad_norm": 2.6744258403778076, "learning_rate": 0.001, "loss": 1.8463, "step": 402080 }, { "epoch": 34.690821256038646, "grad_norm": 0.3077704608440399, "learning_rate": 0.001, "loss": 1.8467, "step": 402136 }, { "epoch": 34.69565217391305, "grad_norm": 2.286090135574341, "learning_rate": 0.001, "loss": 1.8441, "step": 402192 }, { "epoch": 34.70048309178744, "grad_norm": 0.2735322415828705, "learning_rate": 0.001, "loss": 1.8442, "step": 402248 }, { "epoch": 34.70531400966183, "grad_norm": 1.4314903020858765, "learning_rate": 0.001, "loss": 1.8452, "step": 402304 }, { "epoch": 34.710144927536234, "grad_norm": 1.2184492349624634, "learning_rate": 0.001, "loss": 1.8463, "step": 402360 }, { "epoch": 34.71497584541063, "grad_norm": 2.7466936111450195, "learning_rate": 0.001, "loss": 1.8408, "step": 402416 }, { "epoch": 34.71980676328502, "grad_norm": 0.9126217365264893, "learning_rate": 0.001, "loss": 1.8502, "step": 402472 }, { "epoch": 34.72463768115942, "grad_norm": 1.0532206296920776, "learning_rate": 0.001, "loss": 1.8441, "step": 402528 }, { "epoch": 34.729468599033815, "grad_norm": 0.8165702223777771, "learning_rate": 0.001, "loss": 1.8523, "step": 402584 }, { "epoch": 34.734299516908216, "grad_norm": 0.8159556984901428, "learning_rate": 0.001, "loss": 1.8444, "step": 402640 }, { "epoch": 34.73913043478261, "grad_norm": 3.643235206604004, "learning_rate": 0.001, "loss": 1.8505, "step": 402696 }, { "epoch": 34.743961352657, "grad_norm": 0.4246162176132202, "learning_rate": 0.001, "loss": 1.8433, "step": 402752 }, { "epoch": 34.7487922705314, "grad_norm": 0.5304322242736816, "learning_rate": 0.001, "loss": 1.8352, "step": 402808 }, { "epoch": 34.7536231884058, "grad_norm": 0.595513641834259, "learning_rate": 0.001, "loss": 1.8332, "step": 402864 }, { "epoch": 34.75845410628019, "grad_norm": 0.5989309549331665, "learning_rate": 0.001, "loss": 1.8308, "step": 402920 }, { "epoch": 34.76328502415459, "grad_norm": 1.359060287475586, "learning_rate": 0.001, "loss": 1.839, "step": 402976 }, { "epoch": 34.768115942028984, "grad_norm": 8.58255672454834, "learning_rate": 0.001, "loss": 1.858, "step": 403032 }, { "epoch": 34.772946859903385, "grad_norm": 0.6457918882369995, "learning_rate": 0.001, "loss": 1.8513, "step": 403088 }, { "epoch": 34.77777777777778, "grad_norm": 1.3997337818145752, "learning_rate": 0.001, "loss": 1.8458, "step": 403144 }, { "epoch": 34.78260869565217, "grad_norm": 0.3774053752422333, "learning_rate": 0.001, "loss": 1.8415, "step": 403200 }, { "epoch": 34.78743961352657, "grad_norm": 1.7149569988250732, "learning_rate": 0.001, "loss": 1.8592, "step": 403256 }, { "epoch": 34.792270531400966, "grad_norm": 1.4429880380630493, "learning_rate": 0.001, "loss": 1.8454, "step": 403312 }, { "epoch": 34.79710144927536, "grad_norm": 1.2197086811065674, "learning_rate": 0.001, "loss": 1.8533, "step": 403368 }, { "epoch": 34.80193236714976, "grad_norm": 0.6489006280899048, "learning_rate": 0.001, "loss": 1.8459, "step": 403424 }, { "epoch": 34.806763285024154, "grad_norm": 0.42965269088745117, "learning_rate": 0.001, "loss": 1.8437, "step": 403480 }, { "epoch": 34.81159420289855, "grad_norm": 1.2669460773468018, "learning_rate": 0.001, "loss": 1.8423, "step": 403536 }, { "epoch": 34.81642512077295, "grad_norm": 22.795419692993164, "learning_rate": 0.001, "loss": 1.8592, "step": 403592 }, { "epoch": 34.82125603864734, "grad_norm": 7.510748386383057, "learning_rate": 0.001, "loss": 1.8539, "step": 403648 }, { "epoch": 34.82608695652174, "grad_norm": 0.6432771682739258, "learning_rate": 0.001, "loss": 1.8597, "step": 403704 }, { "epoch": 34.830917874396135, "grad_norm": 1.1688041687011719, "learning_rate": 0.001, "loss": 1.8607, "step": 403760 }, { "epoch": 34.83574879227053, "grad_norm": 0.5625596046447754, "learning_rate": 0.001, "loss": 1.8691, "step": 403816 }, { "epoch": 34.84057971014493, "grad_norm": 2.3385555744171143, "learning_rate": 0.001, "loss": 1.8576, "step": 403872 }, { "epoch": 34.84541062801932, "grad_norm": 1.0591169595718384, "learning_rate": 0.001, "loss": 1.8595, "step": 403928 }, { "epoch": 34.85024154589372, "grad_norm": 0.49466386437416077, "learning_rate": 0.001, "loss": 1.8548, "step": 403984 }, { "epoch": 34.85507246376812, "grad_norm": 0.7791069746017456, "learning_rate": 0.001, "loss": 1.8521, "step": 404040 }, { "epoch": 34.85990338164251, "grad_norm": 0.5880547761917114, "learning_rate": 0.001, "loss": 1.8517, "step": 404096 }, { "epoch": 34.86473429951691, "grad_norm": 0.2666136622428894, "learning_rate": 0.001, "loss": 1.8424, "step": 404152 }, { "epoch": 34.869565217391305, "grad_norm": 0.43976566195487976, "learning_rate": 0.001, "loss": 1.8359, "step": 404208 }, { "epoch": 34.8743961352657, "grad_norm": 0.33451250195503235, "learning_rate": 0.001, "loss": 1.8311, "step": 404264 }, { "epoch": 34.8792270531401, "grad_norm": 0.4396872818470001, "learning_rate": 0.001, "loss": 1.8358, "step": 404320 }, { "epoch": 34.88405797101449, "grad_norm": 0.2598714232444763, "learning_rate": 0.001, "loss": 1.8439, "step": 404376 }, { "epoch": 34.888888888888886, "grad_norm": 0.3735817074775696, "learning_rate": 0.001, "loss": 1.8348, "step": 404432 }, { "epoch": 34.893719806763286, "grad_norm": 0.7312192320823669, "learning_rate": 0.001, "loss": 1.8396, "step": 404488 }, { "epoch": 34.89855072463768, "grad_norm": 0.2945324778556824, "learning_rate": 0.001, "loss": 1.8401, "step": 404544 }, { "epoch": 34.90338164251208, "grad_norm": 2.8810641765594482, "learning_rate": 0.001, "loss": 1.8424, "step": 404600 }, { "epoch": 34.908212560386474, "grad_norm": 0.4389367699623108, "learning_rate": 0.001, "loss": 1.8394, "step": 404656 }, { "epoch": 34.91304347826087, "grad_norm": 0.6118005514144897, "learning_rate": 0.001, "loss": 1.8377, "step": 404712 }, { "epoch": 34.91787439613527, "grad_norm": 0.6925011873245239, "learning_rate": 0.001, "loss": 1.843, "step": 404768 }, { "epoch": 34.92270531400966, "grad_norm": 0.44683128595352173, "learning_rate": 0.001, "loss": 1.833, "step": 404824 }, { "epoch": 34.927536231884055, "grad_norm": 1.1117666959762573, "learning_rate": 0.001, "loss": 1.8325, "step": 404880 }, { "epoch": 34.932367149758456, "grad_norm": 0.4296211004257202, "learning_rate": 0.001, "loss": 1.839, "step": 404936 }, { "epoch": 34.93719806763285, "grad_norm": 0.4971621334552765, "learning_rate": 0.001, "loss": 1.8441, "step": 404992 }, { "epoch": 34.94202898550725, "grad_norm": 0.610191822052002, "learning_rate": 0.001, "loss": 1.8528, "step": 405048 }, { "epoch": 34.94685990338164, "grad_norm": 0.4546540081501007, "learning_rate": 0.001, "loss": 1.8546, "step": 405104 }, { "epoch": 34.95169082125604, "grad_norm": 0.30755624175071716, "learning_rate": 0.001, "loss": 1.8432, "step": 405160 }, { "epoch": 34.95652173913044, "grad_norm": 0.7772684097290039, "learning_rate": 0.001, "loss": 1.8427, "step": 405216 }, { "epoch": 34.96135265700483, "grad_norm": 0.953845739364624, "learning_rate": 0.001, "loss": 1.8354, "step": 405272 }, { "epoch": 34.966183574879224, "grad_norm": 0.40057969093322754, "learning_rate": 0.001, "loss": 1.8403, "step": 405328 }, { "epoch": 34.971014492753625, "grad_norm": 0.8585100173950195, "learning_rate": 0.001, "loss": 1.845, "step": 405384 }, { "epoch": 34.97584541062802, "grad_norm": 0.434465229511261, "learning_rate": 0.001, "loss": 1.8456, "step": 405440 }, { "epoch": 34.98067632850242, "grad_norm": 0.3140609562397003, "learning_rate": 0.001, "loss": 1.8383, "step": 405496 }, { "epoch": 34.98550724637681, "grad_norm": 0.3205801844596863, "learning_rate": 0.001, "loss": 1.8374, "step": 405552 }, { "epoch": 34.990338164251206, "grad_norm": 0.9985800981521606, "learning_rate": 0.001, "loss": 1.8306, "step": 405608 }, { "epoch": 34.99516908212561, "grad_norm": 16.679222106933594, "learning_rate": 0.001, "loss": 1.8398, "step": 405664 }, { "epoch": 35.0, "grad_norm": 0.38297221064567566, "learning_rate": 0.001, "loss": 1.8424, "step": 405720 }, { "epoch": 35.00483091787439, "grad_norm": 1.0603365898132324, "learning_rate": 0.001, "loss": 1.8053, "step": 405776 }, { "epoch": 35.009661835748794, "grad_norm": 12.936245918273926, "learning_rate": 0.001, "loss": 1.7918, "step": 405832 }, { "epoch": 35.01449275362319, "grad_norm": 1.0991921424865723, "learning_rate": 0.001, "loss": 1.7962, "step": 405888 }, { "epoch": 35.01932367149758, "grad_norm": 0.4593254625797272, "learning_rate": 0.001, "loss": 1.7941, "step": 405944 }, { "epoch": 35.02415458937198, "grad_norm": 5.74153995513916, "learning_rate": 0.001, "loss": 1.8026, "step": 406000 }, { "epoch": 35.028985507246375, "grad_norm": 0.45230138301849365, "learning_rate": 0.001, "loss": 1.798, "step": 406056 }, { "epoch": 35.033816425120776, "grad_norm": 0.3607964515686035, "learning_rate": 0.001, "loss": 1.7958, "step": 406112 }, { "epoch": 35.03864734299517, "grad_norm": 0.6346094012260437, "learning_rate": 0.001, "loss": 1.7974, "step": 406168 }, { "epoch": 35.04347826086956, "grad_norm": 0.2772428095340729, "learning_rate": 0.001, "loss": 1.8009, "step": 406224 }, { "epoch": 35.04830917874396, "grad_norm": 0.4213358461856842, "learning_rate": 0.001, "loss": 1.8023, "step": 406280 }, { "epoch": 35.05314009661836, "grad_norm": 2.2388756275177, "learning_rate": 0.001, "loss": 1.8003, "step": 406336 }, { "epoch": 35.05797101449275, "grad_norm": 1.3021376132965088, "learning_rate": 0.001, "loss": 1.8052, "step": 406392 }, { "epoch": 35.06280193236715, "grad_norm": 0.2797827124595642, "learning_rate": 0.001, "loss": 1.8051, "step": 406448 }, { "epoch": 35.067632850241544, "grad_norm": 0.5487902164459229, "learning_rate": 0.001, "loss": 1.8021, "step": 406504 }, { "epoch": 35.072463768115945, "grad_norm": 1.7535905838012695, "learning_rate": 0.001, "loss": 1.8083, "step": 406560 }, { "epoch": 35.07729468599034, "grad_norm": 0.37005677819252014, "learning_rate": 0.001, "loss": 1.8201, "step": 406616 }, { "epoch": 35.08212560386473, "grad_norm": 4.480099678039551, "learning_rate": 0.001, "loss": 1.8152, "step": 406672 }, { "epoch": 35.08695652173913, "grad_norm": 0.5715920329093933, "learning_rate": 0.001, "loss": 1.8173, "step": 406728 }, { "epoch": 35.091787439613526, "grad_norm": 0.44214823842048645, "learning_rate": 0.001, "loss": 1.8051, "step": 406784 }, { "epoch": 35.09661835748792, "grad_norm": 0.7164579629898071, "learning_rate": 0.001, "loss": 1.8042, "step": 406840 }, { "epoch": 35.10144927536232, "grad_norm": 0.6633809804916382, "learning_rate": 0.001, "loss": 1.8178, "step": 406896 }, { "epoch": 35.106280193236714, "grad_norm": 1.8472250699996948, "learning_rate": 0.001, "loss": 1.8172, "step": 406952 }, { "epoch": 35.111111111111114, "grad_norm": 0.332199364900589, "learning_rate": 0.001, "loss": 1.8154, "step": 407008 }, { "epoch": 35.11594202898551, "grad_norm": 0.3092857003211975, "learning_rate": 0.001, "loss": 1.8074, "step": 407064 }, { "epoch": 35.1207729468599, "grad_norm": 0.33781102299690247, "learning_rate": 0.001, "loss": 1.8074, "step": 407120 }, { "epoch": 35.1256038647343, "grad_norm": 0.8151071071624756, "learning_rate": 0.001, "loss": 1.8053, "step": 407176 }, { "epoch": 35.130434782608695, "grad_norm": 2.928256034851074, "learning_rate": 0.001, "loss": 1.8043, "step": 407232 }, { "epoch": 35.13526570048309, "grad_norm": 1.4900745153427124, "learning_rate": 0.001, "loss": 1.7923, "step": 407288 }, { "epoch": 35.14009661835749, "grad_norm": 0.8271536827087402, "learning_rate": 0.001, "loss": 1.8047, "step": 407344 }, { "epoch": 35.14492753623188, "grad_norm": 0.47368699312210083, "learning_rate": 0.001, "loss": 1.81, "step": 407400 }, { "epoch": 35.14975845410628, "grad_norm": 0.30208927392959595, "learning_rate": 0.001, "loss": 1.8137, "step": 407456 }, { "epoch": 35.15458937198068, "grad_norm": 1.0786821842193604, "learning_rate": 0.001, "loss": 1.8116, "step": 407512 }, { "epoch": 35.15942028985507, "grad_norm": 0.400648832321167, "learning_rate": 0.001, "loss": 1.8167, "step": 407568 }, { "epoch": 35.16425120772947, "grad_norm": 0.31820130348205566, "learning_rate": 0.001, "loss": 1.8198, "step": 407624 }, { "epoch": 35.169082125603865, "grad_norm": 0.5931256413459778, "learning_rate": 0.001, "loss": 1.8027, "step": 407680 }, { "epoch": 35.17391304347826, "grad_norm": 0.6282327175140381, "learning_rate": 0.001, "loss": 1.8139, "step": 407736 }, { "epoch": 35.17874396135266, "grad_norm": 2.359571933746338, "learning_rate": 0.001, "loss": 1.8079, "step": 407792 }, { "epoch": 35.18357487922705, "grad_norm": 0.4110291004180908, "learning_rate": 0.001, "loss": 1.8033, "step": 407848 }, { "epoch": 35.18840579710145, "grad_norm": 1.5298889875411987, "learning_rate": 0.001, "loss": 1.8232, "step": 407904 }, { "epoch": 35.193236714975846, "grad_norm": 0.580429196357727, "learning_rate": 0.001, "loss": 1.8258, "step": 407960 }, { "epoch": 35.19806763285024, "grad_norm": 0.36920854449272156, "learning_rate": 0.001, "loss": 1.8187, "step": 408016 }, { "epoch": 35.20289855072464, "grad_norm": 0.9936524033546448, "learning_rate": 0.001, "loss": 1.81, "step": 408072 }, { "epoch": 35.207729468599034, "grad_norm": 2.220777750015259, "learning_rate": 0.001, "loss": 1.8121, "step": 408128 }, { "epoch": 35.21256038647343, "grad_norm": 2.792311906814575, "learning_rate": 0.001, "loss": 1.8116, "step": 408184 }, { "epoch": 35.21739130434783, "grad_norm": 0.43975090980529785, "learning_rate": 0.001, "loss": 1.8143, "step": 408240 }, { "epoch": 35.22222222222222, "grad_norm": 0.6593931913375854, "learning_rate": 0.001, "loss": 1.8291, "step": 408296 }, { "epoch": 35.227053140096615, "grad_norm": 1.4133843183517456, "learning_rate": 0.001, "loss": 1.8267, "step": 408352 }, { "epoch": 35.231884057971016, "grad_norm": 1.3121495246887207, "learning_rate": 0.001, "loss": 1.8179, "step": 408408 }, { "epoch": 35.23671497584541, "grad_norm": 0.6508413553237915, "learning_rate": 0.001, "loss": 1.8164, "step": 408464 }, { "epoch": 35.24154589371981, "grad_norm": 0.4106164872646332, "learning_rate": 0.001, "loss": 1.8367, "step": 408520 }, { "epoch": 35.2463768115942, "grad_norm": 0.35245218873023987, "learning_rate": 0.001, "loss": 1.8295, "step": 408576 }, { "epoch": 35.2512077294686, "grad_norm": 0.5290487408638, "learning_rate": 0.001, "loss": 1.8204, "step": 408632 }, { "epoch": 35.256038647343, "grad_norm": 0.29576945304870605, "learning_rate": 0.001, "loss": 1.8223, "step": 408688 }, { "epoch": 35.26086956521739, "grad_norm": 0.35738885402679443, "learning_rate": 0.001, "loss": 1.8189, "step": 408744 }, { "epoch": 35.265700483091784, "grad_norm": 1.0058897733688354, "learning_rate": 0.001, "loss": 1.8183, "step": 408800 }, { "epoch": 35.270531400966185, "grad_norm": 0.2509610950946808, "learning_rate": 0.001, "loss": 1.8228, "step": 408856 }, { "epoch": 35.27536231884058, "grad_norm": 2.492396116256714, "learning_rate": 0.001, "loss": 1.8164, "step": 408912 }, { "epoch": 35.28019323671498, "grad_norm": 0.8020156025886536, "learning_rate": 0.001, "loss": 1.8186, "step": 408968 }, { "epoch": 35.28502415458937, "grad_norm": 0.3237086832523346, "learning_rate": 0.001, "loss": 1.816, "step": 409024 }, { "epoch": 35.289855072463766, "grad_norm": 0.9809046983718872, "learning_rate": 0.001, "loss": 1.8228, "step": 409080 }, { "epoch": 35.29468599033817, "grad_norm": 0.3647131621837616, "learning_rate": 0.001, "loss": 1.8214, "step": 409136 }, { "epoch": 35.29951690821256, "grad_norm": 0.6489964127540588, "learning_rate": 0.001, "loss": 1.8371, "step": 409192 }, { "epoch": 35.30434782608695, "grad_norm": 1.1161668300628662, "learning_rate": 0.001, "loss": 1.8323, "step": 409248 }, { "epoch": 35.309178743961354, "grad_norm": 1.0508959293365479, "learning_rate": 0.001, "loss": 1.8233, "step": 409304 }, { "epoch": 35.31400966183575, "grad_norm": 0.44561782479286194, "learning_rate": 0.001, "loss": 1.8174, "step": 409360 }, { "epoch": 35.31884057971015, "grad_norm": 0.4172346591949463, "learning_rate": 0.001, "loss": 1.8137, "step": 409416 }, { "epoch": 35.32367149758454, "grad_norm": 0.30724823474884033, "learning_rate": 0.001, "loss": 1.8201, "step": 409472 }, { "epoch": 35.328502415458935, "grad_norm": 0.7087112665176392, "learning_rate": 0.001, "loss": 1.8252, "step": 409528 }, { "epoch": 35.333333333333336, "grad_norm": 1.3969241380691528, "learning_rate": 0.001, "loss": 1.8205, "step": 409584 }, { "epoch": 35.33816425120773, "grad_norm": 0.3418963551521301, "learning_rate": 0.001, "loss": 1.8158, "step": 409640 }, { "epoch": 35.34299516908212, "grad_norm": 0.23668356239795685, "learning_rate": 0.001, "loss": 1.8093, "step": 409696 }, { "epoch": 35.34782608695652, "grad_norm": 0.2961994409561157, "learning_rate": 0.001, "loss": 1.8148, "step": 409752 }, { "epoch": 35.35265700483092, "grad_norm": 0.2485738843679428, "learning_rate": 0.001, "loss": 1.8182, "step": 409808 }, { "epoch": 35.35748792270532, "grad_norm": 0.2787158489227295, "learning_rate": 0.001, "loss": 1.8079, "step": 409864 }, { "epoch": 35.36231884057971, "grad_norm": 0.5110640525817871, "learning_rate": 0.001, "loss": 1.8052, "step": 409920 }, { "epoch": 35.367149758454104, "grad_norm": 0.48442232608795166, "learning_rate": 0.001, "loss": 1.8079, "step": 409976 }, { "epoch": 35.371980676328505, "grad_norm": 2.297004461288452, "learning_rate": 0.001, "loss": 1.8032, "step": 410032 }, { "epoch": 35.3768115942029, "grad_norm": 1.0248701572418213, "learning_rate": 0.001, "loss": 1.8065, "step": 410088 }, { "epoch": 35.38164251207729, "grad_norm": 0.42920953035354614, "learning_rate": 0.001, "loss": 1.8102, "step": 410144 }, { "epoch": 35.38647342995169, "grad_norm": 0.2690558433532715, "learning_rate": 0.001, "loss": 1.8062, "step": 410200 }, { "epoch": 35.391304347826086, "grad_norm": 0.5248808860778809, "learning_rate": 0.001, "loss": 1.8044, "step": 410256 }, { "epoch": 35.39613526570048, "grad_norm": 0.25228723883628845, "learning_rate": 0.001, "loss": 1.8008, "step": 410312 }, { "epoch": 35.40096618357488, "grad_norm": 0.36791178584098816, "learning_rate": 0.001, "loss": 1.7944, "step": 410368 }, { "epoch": 35.405797101449274, "grad_norm": 0.2855655252933502, "learning_rate": 0.001, "loss": 1.8005, "step": 410424 }, { "epoch": 35.410628019323674, "grad_norm": 0.30936798453330994, "learning_rate": 0.001, "loss": 1.795, "step": 410480 }, { "epoch": 35.41545893719807, "grad_norm": 0.2510398030281067, "learning_rate": 0.001, "loss": 1.8021, "step": 410536 }, { "epoch": 35.42028985507246, "grad_norm": 0.3093124032020569, "learning_rate": 0.001, "loss": 1.7968, "step": 410592 }, { "epoch": 35.42512077294686, "grad_norm": 0.2704859972000122, "learning_rate": 0.001, "loss": 1.8087, "step": 410648 }, { "epoch": 35.429951690821255, "grad_norm": 0.28626394271850586, "learning_rate": 0.001, "loss": 1.8113, "step": 410704 }, { "epoch": 35.43478260869565, "grad_norm": 0.31669795513153076, "learning_rate": 0.001, "loss": 1.8002, "step": 410760 }, { "epoch": 35.43961352657005, "grad_norm": 0.357285737991333, "learning_rate": 0.001, "loss": 1.8047, "step": 410816 }, { "epoch": 35.44444444444444, "grad_norm": 0.4459398686885834, "learning_rate": 0.001, "loss": 1.8027, "step": 410872 }, { "epoch": 35.44927536231884, "grad_norm": 0.4644375443458557, "learning_rate": 0.001, "loss": 1.8097, "step": 410928 }, { "epoch": 35.45410628019324, "grad_norm": 0.3312598764896393, "learning_rate": 0.001, "loss": 1.8105, "step": 410984 }, { "epoch": 35.45893719806763, "grad_norm": 0.3333911895751953, "learning_rate": 0.001, "loss": 1.8051, "step": 411040 }, { "epoch": 35.46376811594203, "grad_norm": 0.42953598499298096, "learning_rate": 0.001, "loss": 1.8084, "step": 411096 }, { "epoch": 35.468599033816425, "grad_norm": 0.2958180010318756, "learning_rate": 0.001, "loss": 1.8015, "step": 411152 }, { "epoch": 35.47342995169082, "grad_norm": 0.5956571698188782, "learning_rate": 0.001, "loss": 1.8109, "step": 411208 }, { "epoch": 35.47826086956522, "grad_norm": 4.026671886444092, "learning_rate": 0.001, "loss": 1.8057, "step": 411264 }, { "epoch": 35.48309178743961, "grad_norm": 0.2890520989894867, "learning_rate": 0.001, "loss": 1.8123, "step": 411320 }, { "epoch": 35.48792270531401, "grad_norm": 0.34171226620674133, "learning_rate": 0.001, "loss": 1.8047, "step": 411376 }, { "epoch": 35.492753623188406, "grad_norm": 0.38530147075653076, "learning_rate": 0.001, "loss": 1.8095, "step": 411432 }, { "epoch": 35.4975845410628, "grad_norm": 0.2500717043876648, "learning_rate": 0.001, "loss": 1.8121, "step": 411488 }, { "epoch": 35.5024154589372, "grad_norm": 0.34365856647491455, "learning_rate": 0.001, "loss": 1.8194, "step": 411544 }, { "epoch": 35.507246376811594, "grad_norm": 0.609130322933197, "learning_rate": 0.001, "loss": 1.8148, "step": 411600 }, { "epoch": 35.51207729468599, "grad_norm": 0.6082576513290405, "learning_rate": 0.001, "loss": 1.8183, "step": 411656 }, { "epoch": 35.51690821256039, "grad_norm": 0.23690636456012726, "learning_rate": 0.001, "loss": 1.8133, "step": 411712 }, { "epoch": 35.52173913043478, "grad_norm": 0.2796630859375, "learning_rate": 0.001, "loss": 1.8126, "step": 411768 }, { "epoch": 35.52657004830918, "grad_norm": 0.4137280583381653, "learning_rate": 0.001, "loss": 1.8143, "step": 411824 }, { "epoch": 35.531400966183575, "grad_norm": 0.30777817964553833, "learning_rate": 0.001, "loss": 1.8017, "step": 411880 }, { "epoch": 35.53623188405797, "grad_norm": 0.24710530042648315, "learning_rate": 0.001, "loss": 1.8078, "step": 411936 }, { "epoch": 35.54106280193237, "grad_norm": 0.2967704236507416, "learning_rate": 0.001, "loss": 1.8054, "step": 411992 }, { "epoch": 35.54589371980676, "grad_norm": 0.7357916831970215, "learning_rate": 0.001, "loss": 1.7985, "step": 412048 }, { "epoch": 35.55072463768116, "grad_norm": 0.5564244389533997, "learning_rate": 0.001, "loss": 1.8025, "step": 412104 }, { "epoch": 35.55555555555556, "grad_norm": 0.3299926817417145, "learning_rate": 0.001, "loss": 1.8251, "step": 412160 }, { "epoch": 35.56038647342995, "grad_norm": 0.2791455388069153, "learning_rate": 0.001, "loss": 1.862, "step": 412216 }, { "epoch": 35.56521739130435, "grad_norm": 1.2173422574996948, "learning_rate": 0.001, "loss": 1.8417, "step": 412272 }, { "epoch": 35.570048309178745, "grad_norm": 19.47728157043457, "learning_rate": 0.001, "loss": 1.8134, "step": 412328 }, { "epoch": 35.57487922705314, "grad_norm": 0.35238805413246155, "learning_rate": 0.001, "loss": 1.8201, "step": 412384 }, { "epoch": 35.57971014492754, "grad_norm": 0.29076236486434937, "learning_rate": 0.001, "loss": 1.8163, "step": 412440 }, { "epoch": 35.58454106280193, "grad_norm": 1.546376347541809, "learning_rate": 0.001, "loss": 1.8108, "step": 412496 }, { "epoch": 35.589371980676326, "grad_norm": 0.3189411163330078, "learning_rate": 0.001, "loss": 1.8119, "step": 412552 }, { "epoch": 35.594202898550726, "grad_norm": 0.32669347524642944, "learning_rate": 0.001, "loss": 1.8063, "step": 412608 }, { "epoch": 35.59903381642512, "grad_norm": 0.5219030976295471, "learning_rate": 0.001, "loss": 1.8081, "step": 412664 }, { "epoch": 35.60386473429952, "grad_norm": 0.2837288975715637, "learning_rate": 0.001, "loss": 1.8064, "step": 412720 }, { "epoch": 35.608695652173914, "grad_norm": 0.38072219491004944, "learning_rate": 0.001, "loss": 1.8065, "step": 412776 }, { "epoch": 35.61352657004831, "grad_norm": 0.3971971869468689, "learning_rate": 0.001, "loss": 1.8026, "step": 412832 }, { "epoch": 35.61835748792271, "grad_norm": 0.4511968493461609, "learning_rate": 0.001, "loss": 1.8085, "step": 412888 }, { "epoch": 35.6231884057971, "grad_norm": 0.46087291836738586, "learning_rate": 0.001, "loss": 1.8018, "step": 412944 }, { "epoch": 35.628019323671495, "grad_norm": 0.42689013481140137, "learning_rate": 0.001, "loss": 1.8005, "step": 413000 }, { "epoch": 35.632850241545896, "grad_norm": 1.3089145421981812, "learning_rate": 0.001, "loss": 1.8002, "step": 413056 }, { "epoch": 35.63768115942029, "grad_norm": 0.28892236948013306, "learning_rate": 0.001, "loss": 1.7973, "step": 413112 }, { "epoch": 35.64251207729468, "grad_norm": 0.32442688941955566, "learning_rate": 0.001, "loss": 1.8006, "step": 413168 }, { "epoch": 35.64734299516908, "grad_norm": 0.33931100368499756, "learning_rate": 0.001, "loss": 1.7914, "step": 413224 }, { "epoch": 35.65217391304348, "grad_norm": 0.3788907527923584, "learning_rate": 0.001, "loss": 1.8013, "step": 413280 }, { "epoch": 35.65700483091788, "grad_norm": 0.2726646959781647, "learning_rate": 0.001, "loss": 1.7966, "step": 413336 }, { "epoch": 35.66183574879227, "grad_norm": 2.5383167266845703, "learning_rate": 0.001, "loss": 1.813, "step": 413392 }, { "epoch": 35.666666666666664, "grad_norm": 0.336001455783844, "learning_rate": 0.001, "loss": 1.8313, "step": 413448 }, { "epoch": 35.671497584541065, "grad_norm": 0.3318134546279907, "learning_rate": 0.001, "loss": 1.8148, "step": 413504 }, { "epoch": 35.67632850241546, "grad_norm": 0.44807299971580505, "learning_rate": 0.001, "loss": 1.8208, "step": 413560 }, { "epoch": 35.68115942028985, "grad_norm": 0.3180854916572571, "learning_rate": 0.001, "loss": 1.815, "step": 413616 }, { "epoch": 35.68599033816425, "grad_norm": 0.47387954592704773, "learning_rate": 0.001, "loss": 1.8098, "step": 413672 }, { "epoch": 35.690821256038646, "grad_norm": 0.2597843110561371, "learning_rate": 0.001, "loss": 1.8255, "step": 413728 }, { "epoch": 35.69565217391305, "grad_norm": 0.5969817638397217, "learning_rate": 0.001, "loss": 1.813, "step": 413784 }, { "epoch": 35.70048309178744, "grad_norm": 3.7520675659179688, "learning_rate": 0.001, "loss": 1.8047, "step": 413840 }, { "epoch": 35.70531400966183, "grad_norm": 2.5166049003601074, "learning_rate": 0.001, "loss": 1.8089, "step": 413896 }, { "epoch": 35.710144927536234, "grad_norm": 0.5534590482711792, "learning_rate": 0.001, "loss": 1.8105, "step": 413952 }, { "epoch": 35.71497584541063, "grad_norm": 0.35649874806404114, "learning_rate": 0.001, "loss": 1.8126, "step": 414008 }, { "epoch": 35.71980676328502, "grad_norm": 0.3534491956233978, "learning_rate": 0.001, "loss": 1.8115, "step": 414064 }, { "epoch": 35.72463768115942, "grad_norm": 1.9140962362289429, "learning_rate": 0.001, "loss": 1.813, "step": 414120 }, { "epoch": 35.729468599033815, "grad_norm": 2.072105646133423, "learning_rate": 0.001, "loss": 1.8029, "step": 414176 }, { "epoch": 35.734299516908216, "grad_norm": 0.3519071638584137, "learning_rate": 0.001, "loss": 1.8105, "step": 414232 }, { "epoch": 35.73913043478261, "grad_norm": 0.5808678865432739, "learning_rate": 0.001, "loss": 1.8263, "step": 414288 }, { "epoch": 35.743961352657, "grad_norm": 0.6865488290786743, "learning_rate": 0.001, "loss": 1.8182, "step": 414344 }, { "epoch": 35.7487922705314, "grad_norm": 0.45111045241355896, "learning_rate": 0.001, "loss": 1.809, "step": 414400 }, { "epoch": 35.7536231884058, "grad_norm": 0.4726544916629791, "learning_rate": 0.001, "loss": 1.815, "step": 414456 }, { "epoch": 35.75845410628019, "grad_norm": 0.44025561213493347, "learning_rate": 0.001, "loss": 1.8119, "step": 414512 }, { "epoch": 35.76328502415459, "grad_norm": 0.3452722728252411, "learning_rate": 0.001, "loss": 1.8093, "step": 414568 }, { "epoch": 35.768115942028984, "grad_norm": 0.39103955030441284, "learning_rate": 0.001, "loss": 1.8152, "step": 414624 }, { "epoch": 35.772946859903385, "grad_norm": 0.5738186836242676, "learning_rate": 0.001, "loss": 1.811, "step": 414680 }, { "epoch": 35.77777777777778, "grad_norm": 0.27434220910072327, "learning_rate": 0.001, "loss": 1.8224, "step": 414736 }, { "epoch": 35.78260869565217, "grad_norm": 0.38616466522216797, "learning_rate": 0.001, "loss": 1.8218, "step": 414792 }, { "epoch": 35.78743961352657, "grad_norm": 2.38551926612854, "learning_rate": 0.001, "loss": 1.8031, "step": 414848 }, { "epoch": 35.792270531400966, "grad_norm": 0.522350013256073, "learning_rate": 0.001, "loss": 1.8206, "step": 414904 }, { "epoch": 35.79710144927536, "grad_norm": 1.2795474529266357, "learning_rate": 0.001, "loss": 1.8225, "step": 414960 }, { "epoch": 35.80193236714976, "grad_norm": 0.2650989592075348, "learning_rate": 0.001, "loss": 1.8179, "step": 415016 }, { "epoch": 35.806763285024154, "grad_norm": 0.2709149122238159, "learning_rate": 0.001, "loss": 1.8238, "step": 415072 }, { "epoch": 35.81159420289855, "grad_norm": 1.7122024297714233, "learning_rate": 0.001, "loss": 1.8287, "step": 415128 }, { "epoch": 35.81642512077295, "grad_norm": 0.8686914443969727, "learning_rate": 0.001, "loss": 1.8402, "step": 415184 }, { "epoch": 35.82125603864734, "grad_norm": 3.722557783126831, "learning_rate": 0.001, "loss": 1.8555, "step": 415240 }, { "epoch": 35.82608695652174, "grad_norm": 0.35540807247161865, "learning_rate": 0.001, "loss": 1.8392, "step": 415296 }, { "epoch": 35.830917874396135, "grad_norm": 0.9539128541946411, "learning_rate": 0.001, "loss": 1.8316, "step": 415352 }, { "epoch": 35.83574879227053, "grad_norm": 0.4823375642299652, "learning_rate": 0.001, "loss": 1.8266, "step": 415408 }, { "epoch": 35.84057971014493, "grad_norm": 0.5761464834213257, "learning_rate": 0.001, "loss": 1.8289, "step": 415464 }, { "epoch": 35.84541062801932, "grad_norm": 0.5835826992988586, "learning_rate": 0.001, "loss": 1.8207, "step": 415520 }, { "epoch": 35.85024154589372, "grad_norm": 0.6022976040840149, "learning_rate": 0.001, "loss": 1.815, "step": 415576 }, { "epoch": 35.85507246376812, "grad_norm": 0.7785156965255737, "learning_rate": 0.001, "loss": 1.8235, "step": 415632 }, { "epoch": 35.85990338164251, "grad_norm": 0.9020271897315979, "learning_rate": 0.001, "loss": 1.821, "step": 415688 }, { "epoch": 35.86473429951691, "grad_norm": 0.46804189682006836, "learning_rate": 0.001, "loss": 1.8197, "step": 415744 }, { "epoch": 35.869565217391305, "grad_norm": 1.654260516166687, "learning_rate": 0.001, "loss": 1.8183, "step": 415800 }, { "epoch": 35.8743961352657, "grad_norm": 1.5682464838027954, "learning_rate": 0.001, "loss": 1.8387, "step": 415856 }, { "epoch": 35.8792270531401, "grad_norm": 0.4145030379295349, "learning_rate": 0.001, "loss": 1.8415, "step": 415912 }, { "epoch": 35.88405797101449, "grad_norm": 0.4621835947036743, "learning_rate": 0.001, "loss": 1.8683, "step": 415968 }, { "epoch": 35.888888888888886, "grad_norm": 2.6306676864624023, "learning_rate": 0.001, "loss": 1.8738, "step": 416024 }, { "epoch": 35.893719806763286, "grad_norm": 0.4296230971813202, "learning_rate": 0.001, "loss": 1.8524, "step": 416080 }, { "epoch": 35.89855072463768, "grad_norm": 0.7701650261878967, "learning_rate": 0.001, "loss": 1.8414, "step": 416136 }, { "epoch": 35.90338164251208, "grad_norm": 1.5234531164169312, "learning_rate": 0.001, "loss": 1.8372, "step": 416192 }, { "epoch": 35.908212560386474, "grad_norm": 0.4869275391101837, "learning_rate": 0.001, "loss": 1.8349, "step": 416248 }, { "epoch": 35.91304347826087, "grad_norm": 0.40160831809043884, "learning_rate": 0.001, "loss": 1.8403, "step": 416304 }, { "epoch": 35.91787439613527, "grad_norm": 0.3963029980659485, "learning_rate": 0.001, "loss": 1.8248, "step": 416360 }, { "epoch": 35.92270531400966, "grad_norm": 0.30414238572120667, "learning_rate": 0.001, "loss": 1.8296, "step": 416416 }, { "epoch": 35.927536231884055, "grad_norm": 1.4931467771530151, "learning_rate": 0.001, "loss": 1.8267, "step": 416472 }, { "epoch": 35.932367149758456, "grad_norm": 1.6522365808486938, "learning_rate": 0.001, "loss": 1.834, "step": 416528 }, { "epoch": 35.93719806763285, "grad_norm": 2.1042068004608154, "learning_rate": 0.001, "loss": 1.8537, "step": 416584 }, { "epoch": 35.94202898550725, "grad_norm": 2.761247396469116, "learning_rate": 0.001, "loss": 1.8499, "step": 416640 }, { "epoch": 35.94685990338164, "grad_norm": 0.8252613544464111, "learning_rate": 0.001, "loss": 1.85, "step": 416696 }, { "epoch": 35.95169082125604, "grad_norm": 0.9316199421882629, "learning_rate": 0.001, "loss": 1.8398, "step": 416752 }, { "epoch": 35.95652173913044, "grad_norm": 1.2772455215454102, "learning_rate": 0.001, "loss": 1.8522, "step": 416808 }, { "epoch": 35.96135265700483, "grad_norm": 9.804481506347656, "learning_rate": 0.001, "loss": 1.8425, "step": 416864 }, { "epoch": 35.966183574879224, "grad_norm": 0.7426204681396484, "learning_rate": 0.001, "loss": 1.8501, "step": 416920 }, { "epoch": 35.971014492753625, "grad_norm": 3.051438093185425, "learning_rate": 0.001, "loss": 1.8506, "step": 416976 }, { "epoch": 35.97584541062802, "grad_norm": 0.717117190361023, "learning_rate": 0.001, "loss": 1.8541, "step": 417032 }, { "epoch": 35.98067632850242, "grad_norm": 0.6426759362220764, "learning_rate": 0.001, "loss": 1.8578, "step": 417088 }, { "epoch": 35.98550724637681, "grad_norm": 0.5064529180526733, "learning_rate": 0.001, "loss": 1.8434, "step": 417144 }, { "epoch": 35.990338164251206, "grad_norm": 0.2840609550476074, "learning_rate": 0.001, "loss": 1.8316, "step": 417200 }, { "epoch": 35.99516908212561, "grad_norm": 0.4005604386329651, "learning_rate": 0.001, "loss": 1.8351, "step": 417256 }, { "epoch": 36.0, "grad_norm": 0.3010460138320923, "learning_rate": 0.001, "loss": 1.8277, "step": 417312 }, { "epoch": 36.00483091787439, "grad_norm": 0.37541258335113525, "learning_rate": 0.001, "loss": 1.7876, "step": 417368 }, { "epoch": 36.009661835748794, "grad_norm": 1.3438811302185059, "learning_rate": 0.001, "loss": 1.8063, "step": 417424 }, { "epoch": 36.01449275362319, "grad_norm": 0.7876741886138916, "learning_rate": 0.001, "loss": 1.8022, "step": 417480 }, { "epoch": 36.01932367149758, "grad_norm": 0.3538321554660797, "learning_rate": 0.001, "loss": 1.7949, "step": 417536 }, { "epoch": 36.02415458937198, "grad_norm": 61.62012481689453, "learning_rate": 0.001, "loss": 1.7868, "step": 417592 }, { "epoch": 36.028985507246375, "grad_norm": 0.4933997690677643, "learning_rate": 0.001, "loss": 1.7861, "step": 417648 }, { "epoch": 36.033816425120776, "grad_norm": 5.336723804473877, "learning_rate": 0.001, "loss": 1.8, "step": 417704 }, { "epoch": 36.03864734299517, "grad_norm": 14.08191967010498, "learning_rate": 0.001, "loss": 1.8164, "step": 417760 }, { "epoch": 36.04347826086956, "grad_norm": 0.4196595251560211, "learning_rate": 0.001, "loss": 1.8127, "step": 417816 }, { "epoch": 36.04830917874396, "grad_norm": 3.8787102699279785, "learning_rate": 0.001, "loss": 1.8057, "step": 417872 }, { "epoch": 36.05314009661836, "grad_norm": 0.4622533321380615, "learning_rate": 0.001, "loss": 1.8252, "step": 417928 }, { "epoch": 36.05797101449275, "grad_norm": 0.4794529676437378, "learning_rate": 0.001, "loss": 1.8181, "step": 417984 }, { "epoch": 36.06280193236715, "grad_norm": 0.3200092315673828, "learning_rate": 0.001, "loss": 1.8248, "step": 418040 }, { "epoch": 36.067632850241544, "grad_norm": 2.194531202316284, "learning_rate": 0.001, "loss": 1.8301, "step": 418096 }, { "epoch": 36.072463768115945, "grad_norm": 1.1515486240386963, "learning_rate": 0.001, "loss": 1.8244, "step": 418152 }, { "epoch": 36.07729468599034, "grad_norm": 0.5132242441177368, "learning_rate": 0.001, "loss": 1.8189, "step": 418208 }, { "epoch": 36.08212560386473, "grad_norm": 0.650173008441925, "learning_rate": 0.001, "loss": 1.8081, "step": 418264 }, { "epoch": 36.08695652173913, "grad_norm": 0.8986338973045349, "learning_rate": 0.001, "loss": 1.8157, "step": 418320 }, { "epoch": 36.091787439613526, "grad_norm": 3.483461380004883, "learning_rate": 0.001, "loss": 1.8257, "step": 418376 }, { "epoch": 36.09661835748792, "grad_norm": 0.5482420325279236, "learning_rate": 0.001, "loss": 1.8149, "step": 418432 }, { "epoch": 36.10144927536232, "grad_norm": 4.823991775512695, "learning_rate": 0.001, "loss": 1.8213, "step": 418488 }, { "epoch": 36.106280193236714, "grad_norm": 2.0775492191314697, "learning_rate": 0.001, "loss": 1.8186, "step": 418544 }, { "epoch": 36.111111111111114, "grad_norm": 0.9503189921379089, "learning_rate": 0.001, "loss": 1.8131, "step": 418600 }, { "epoch": 36.11594202898551, "grad_norm": 0.6944476366043091, "learning_rate": 0.001, "loss": 1.8154, "step": 418656 }, { "epoch": 36.1207729468599, "grad_norm": 2.9918415546417236, "learning_rate": 0.001, "loss": 1.8081, "step": 418712 }, { "epoch": 36.1256038647343, "grad_norm": 0.4789454936981201, "learning_rate": 0.001, "loss": 1.8208, "step": 418768 }, { "epoch": 36.130434782608695, "grad_norm": 0.8901446461677551, "learning_rate": 0.001, "loss": 1.8209, "step": 418824 }, { "epoch": 36.13526570048309, "grad_norm": 1.2794877290725708, "learning_rate": 0.001, "loss": 1.8099, "step": 418880 }, { "epoch": 36.14009661835749, "grad_norm": 2.1572601795196533, "learning_rate": 0.001, "loss": 1.8145, "step": 418936 }, { "epoch": 36.14492753623188, "grad_norm": 4.109430313110352, "learning_rate": 0.001, "loss": 1.8123, "step": 418992 }, { "epoch": 36.14975845410628, "grad_norm": 0.6066772937774658, "learning_rate": 0.001, "loss": 1.8026, "step": 419048 }, { "epoch": 36.15458937198068, "grad_norm": 0.46949502825737, "learning_rate": 0.001, "loss": 1.8141, "step": 419104 }, { "epoch": 36.15942028985507, "grad_norm": 2.2978317737579346, "learning_rate": 0.001, "loss": 1.8042, "step": 419160 }, { "epoch": 36.16425120772947, "grad_norm": 0.3962641954421997, "learning_rate": 0.001, "loss": 1.8016, "step": 419216 }, { "epoch": 36.169082125603865, "grad_norm": 0.8893486857414246, "learning_rate": 0.001, "loss": 1.8792, "step": 419272 }, { "epoch": 36.17391304347826, "grad_norm": 12.20644474029541, "learning_rate": 0.001, "loss": 1.8596, "step": 419328 }, { "epoch": 36.17874396135266, "grad_norm": 0.47220736742019653, "learning_rate": 0.001, "loss": 1.8402, "step": 419384 }, { "epoch": 36.18357487922705, "grad_norm": 1.4701601266860962, "learning_rate": 0.001, "loss": 1.8121, "step": 419440 }, { "epoch": 36.18840579710145, "grad_norm": 0.8864219784736633, "learning_rate": 0.001, "loss": 1.8011, "step": 419496 }, { "epoch": 36.193236714975846, "grad_norm": 0.9630461931228638, "learning_rate": 0.001, "loss": 1.8003, "step": 419552 }, { "epoch": 36.19806763285024, "grad_norm": 0.4893719255924225, "learning_rate": 0.001, "loss": 1.8002, "step": 419608 }, { "epoch": 36.20289855072464, "grad_norm": 4.209807872772217, "learning_rate": 0.001, "loss": 1.8001, "step": 419664 }, { "epoch": 36.207729468599034, "grad_norm": 1.1285202503204346, "learning_rate": 0.001, "loss": 1.804, "step": 419720 }, { "epoch": 36.21256038647343, "grad_norm": 1.0526671409606934, "learning_rate": 0.001, "loss": 1.7944, "step": 419776 }, { "epoch": 36.21739130434783, "grad_norm": 1.1785130500793457, "learning_rate": 0.001, "loss": 1.8031, "step": 419832 }, { "epoch": 36.22222222222222, "grad_norm": 0.6127132177352905, "learning_rate": 0.001, "loss": 1.803, "step": 419888 }, { "epoch": 36.227053140096615, "grad_norm": 0.37304431200027466, "learning_rate": 0.001, "loss": 1.8224, "step": 419944 }, { "epoch": 36.231884057971016, "grad_norm": 0.5405763387680054, "learning_rate": 0.001, "loss": 1.8096, "step": 420000 }, { "epoch": 36.23671497584541, "grad_norm": 0.36455366015434265, "learning_rate": 0.001, "loss": 1.8087, "step": 420056 }, { "epoch": 36.24154589371981, "grad_norm": 0.4383017420768738, "learning_rate": 0.001, "loss": 1.8012, "step": 420112 }, { "epoch": 36.2463768115942, "grad_norm": 1.4162043333053589, "learning_rate": 0.001, "loss": 1.7993, "step": 420168 }, { "epoch": 36.2512077294686, "grad_norm": 0.556699812412262, "learning_rate": 0.001, "loss": 1.7967, "step": 420224 }, { "epoch": 36.256038647343, "grad_norm": 0.366416335105896, "learning_rate": 0.001, "loss": 1.8016, "step": 420280 }, { "epoch": 36.26086956521739, "grad_norm": 2.401747465133667, "learning_rate": 0.001, "loss": 1.8022, "step": 420336 }, { "epoch": 36.265700483091784, "grad_norm": 0.486751914024353, "learning_rate": 0.001, "loss": 1.809, "step": 420392 }, { "epoch": 36.270531400966185, "grad_norm": 0.4724934995174408, "learning_rate": 0.001, "loss": 1.8289, "step": 420448 }, { "epoch": 36.27536231884058, "grad_norm": 2.7811198234558105, "learning_rate": 0.001, "loss": 1.8298, "step": 420504 }, { "epoch": 36.28019323671498, "grad_norm": 1.90841805934906, "learning_rate": 0.001, "loss": 1.8269, "step": 420560 }, { "epoch": 36.28502415458937, "grad_norm": 0.9812134504318237, "learning_rate": 0.001, "loss": 1.8338, "step": 420616 }, { "epoch": 36.289855072463766, "grad_norm": 0.5965414643287659, "learning_rate": 0.001, "loss": 1.8346, "step": 420672 }, { "epoch": 36.29468599033817, "grad_norm": 0.5134904980659485, "learning_rate": 0.001, "loss": 1.8276, "step": 420728 }, { "epoch": 36.29951690821256, "grad_norm": 3.909770965576172, "learning_rate": 0.001, "loss": 1.8186, "step": 420784 }, { "epoch": 36.30434782608695, "grad_norm": 0.4676063060760498, "learning_rate": 0.001, "loss": 1.8262, "step": 420840 }, { "epoch": 36.309178743961354, "grad_norm": 0.7271302938461304, "learning_rate": 0.001, "loss": 1.8252, "step": 420896 }, { "epoch": 36.31400966183575, "grad_norm": 0.6256305575370789, "learning_rate": 0.001, "loss": 1.8227, "step": 420952 }, { "epoch": 36.31884057971015, "grad_norm": 0.8160049915313721, "learning_rate": 0.001, "loss": 1.8155, "step": 421008 }, { "epoch": 36.32367149758454, "grad_norm": 0.34517383575439453, "learning_rate": 0.001, "loss": 1.8126, "step": 421064 }, { "epoch": 36.328502415458935, "grad_norm": 8.307565689086914, "learning_rate": 0.001, "loss": 1.8276, "step": 421120 }, { "epoch": 36.333333333333336, "grad_norm": 3.8583412170410156, "learning_rate": 0.001, "loss": 1.8129, "step": 421176 }, { "epoch": 36.33816425120773, "grad_norm": 0.5957732796669006, "learning_rate": 0.001, "loss": 1.8189, "step": 421232 }, { "epoch": 36.34299516908212, "grad_norm": 1.0673693418502808, "learning_rate": 0.001, "loss": 1.8175, "step": 421288 }, { "epoch": 36.34782608695652, "grad_norm": 0.28790944814682007, "learning_rate": 0.001, "loss": 1.8061, "step": 421344 }, { "epoch": 36.35265700483092, "grad_norm": 0.797018826007843, "learning_rate": 0.001, "loss": 1.8072, "step": 421400 }, { "epoch": 36.35748792270532, "grad_norm": 0.3776380121707916, "learning_rate": 0.001, "loss": 1.8097, "step": 421456 }, { "epoch": 36.36231884057971, "grad_norm": 1.2925693988800049, "learning_rate": 0.001, "loss": 1.8007, "step": 421512 }, { "epoch": 36.367149758454104, "grad_norm": 0.8501543402671814, "learning_rate": 0.001, "loss": 1.8117, "step": 421568 }, { "epoch": 36.371980676328505, "grad_norm": 0.9536029100418091, "learning_rate": 0.001, "loss": 1.8112, "step": 421624 }, { "epoch": 36.3768115942029, "grad_norm": 0.48492881655693054, "learning_rate": 0.001, "loss": 1.8098, "step": 421680 }, { "epoch": 36.38164251207729, "grad_norm": 0.4595077335834503, "learning_rate": 0.001, "loss": 1.8107, "step": 421736 }, { "epoch": 36.38647342995169, "grad_norm": 0.3828184902667999, "learning_rate": 0.001, "loss": 1.8033, "step": 421792 }, { "epoch": 36.391304347826086, "grad_norm": 1.0317989587783813, "learning_rate": 0.001, "loss": 1.7962, "step": 421848 }, { "epoch": 36.39613526570048, "grad_norm": 0.27856922149658203, "learning_rate": 0.001, "loss": 1.8094, "step": 421904 }, { "epoch": 36.40096618357488, "grad_norm": 0.30614838004112244, "learning_rate": 0.001, "loss": 1.8022, "step": 421960 }, { "epoch": 36.405797101449274, "grad_norm": 0.3831099271774292, "learning_rate": 0.001, "loss": 1.7988, "step": 422016 }, { "epoch": 36.410628019323674, "grad_norm": 0.5046437978744507, "learning_rate": 0.001, "loss": 1.8169, "step": 422072 }, { "epoch": 36.41545893719807, "grad_norm": 3.933488368988037, "learning_rate": 0.001, "loss": 1.8106, "step": 422128 }, { "epoch": 36.42028985507246, "grad_norm": 0.696277916431427, "learning_rate": 0.001, "loss": 1.805, "step": 422184 }, { "epoch": 36.42512077294686, "grad_norm": 0.2524656653404236, "learning_rate": 0.001, "loss": 1.811, "step": 422240 }, { "epoch": 36.429951690821255, "grad_norm": 0.4835737943649292, "learning_rate": 0.001, "loss": 1.8072, "step": 422296 }, { "epoch": 36.43478260869565, "grad_norm": 0.8354586362838745, "learning_rate": 0.001, "loss": 1.8095, "step": 422352 }, { "epoch": 36.43961352657005, "grad_norm": 1.903119444847107, "learning_rate": 0.001, "loss": 1.8116, "step": 422408 }, { "epoch": 36.44444444444444, "grad_norm": 0.8190333247184753, "learning_rate": 0.001, "loss": 1.8181, "step": 422464 }, { "epoch": 36.44927536231884, "grad_norm": 3.3435781002044678, "learning_rate": 0.001, "loss": 1.8369, "step": 422520 }, { "epoch": 36.45410628019324, "grad_norm": 0.7233803272247314, "learning_rate": 0.001, "loss": 1.8728, "step": 422576 }, { "epoch": 36.45893719806763, "grad_norm": 0.6402056217193604, "learning_rate": 0.001, "loss": 1.8759, "step": 422632 }, { "epoch": 36.46376811594203, "grad_norm": 1.071313738822937, "learning_rate": 0.001, "loss": 1.8741, "step": 422688 }, { "epoch": 36.468599033816425, "grad_norm": 1.8663617372512817, "learning_rate": 0.001, "loss": 1.8469, "step": 422744 }, { "epoch": 36.47342995169082, "grad_norm": 0.7930673360824585, "learning_rate": 0.001, "loss": 1.8342, "step": 422800 }, { "epoch": 36.47826086956522, "grad_norm": 1.5684564113616943, "learning_rate": 0.001, "loss": 1.8354, "step": 422856 }, { "epoch": 36.48309178743961, "grad_norm": 0.44366708397865295, "learning_rate": 0.001, "loss": 1.8368, "step": 422912 }, { "epoch": 36.48792270531401, "grad_norm": 0.47196897864341736, "learning_rate": 0.001, "loss": 1.835, "step": 422968 }, { "epoch": 36.492753623188406, "grad_norm": 0.5209424495697021, "learning_rate": 0.001, "loss": 1.8282, "step": 423024 }, { "epoch": 36.4975845410628, "grad_norm": 0.5942379832267761, "learning_rate": 0.001, "loss": 1.8302, "step": 423080 }, { "epoch": 36.5024154589372, "grad_norm": 0.960195004940033, "learning_rate": 0.001, "loss": 1.8233, "step": 423136 }, { "epoch": 36.507246376811594, "grad_norm": 1.280127763748169, "learning_rate": 0.001, "loss": 1.8192, "step": 423192 }, { "epoch": 36.51207729468599, "grad_norm": 0.5658990740776062, "learning_rate": 0.001, "loss": 1.8141, "step": 423248 }, { "epoch": 36.51690821256039, "grad_norm": 0.8378584980964661, "learning_rate": 0.001, "loss": 1.8133, "step": 423304 }, { "epoch": 36.52173913043478, "grad_norm": 1.4449589252471924, "learning_rate": 0.001, "loss": 1.8073, "step": 423360 }, { "epoch": 36.52657004830918, "grad_norm": 0.705717146396637, "learning_rate": 0.001, "loss": 1.8161, "step": 423416 }, { "epoch": 36.531400966183575, "grad_norm": 0.5583307147026062, "learning_rate": 0.001, "loss": 1.8139, "step": 423472 }, { "epoch": 36.53623188405797, "grad_norm": 0.4237203299999237, "learning_rate": 0.001, "loss": 1.8044, "step": 423528 }, { "epoch": 36.54106280193237, "grad_norm": 0.7137573957443237, "learning_rate": 0.001, "loss": 1.8061, "step": 423584 }, { "epoch": 36.54589371980676, "grad_norm": 2.2076714038848877, "learning_rate": 0.001, "loss": 1.8137, "step": 423640 }, { "epoch": 36.55072463768116, "grad_norm": 0.6838551759719849, "learning_rate": 0.001, "loss": 1.8076, "step": 423696 }, { "epoch": 36.55555555555556, "grad_norm": 1.1682318449020386, "learning_rate": 0.001, "loss": 1.8051, "step": 423752 }, { "epoch": 36.56038647342995, "grad_norm": 0.30587345361709595, "learning_rate": 0.001, "loss": 1.809, "step": 423808 }, { "epoch": 36.56521739130435, "grad_norm": 0.8469343185424805, "learning_rate": 0.001, "loss": 1.8143, "step": 423864 }, { "epoch": 36.570048309178745, "grad_norm": 0.5489840507507324, "learning_rate": 0.001, "loss": 1.8191, "step": 423920 }, { "epoch": 36.57487922705314, "grad_norm": 0.38876935839653015, "learning_rate": 0.001, "loss": 1.81, "step": 423976 }, { "epoch": 36.57971014492754, "grad_norm": 0.3187427222728729, "learning_rate": 0.001, "loss": 1.8089, "step": 424032 }, { "epoch": 36.58454106280193, "grad_norm": 0.3336658775806427, "learning_rate": 0.001, "loss": 1.8055, "step": 424088 }, { "epoch": 36.589371980676326, "grad_norm": 1.0279488563537598, "learning_rate": 0.001, "loss": 1.801, "step": 424144 }, { "epoch": 36.594202898550726, "grad_norm": 0.9203669428825378, "learning_rate": 0.001, "loss": 1.8038, "step": 424200 }, { "epoch": 36.59903381642512, "grad_norm": 4.256526947021484, "learning_rate": 0.001, "loss": 1.7985, "step": 424256 }, { "epoch": 36.60386473429952, "grad_norm": 0.6080761551856995, "learning_rate": 0.001, "loss": 1.8059, "step": 424312 }, { "epoch": 36.608695652173914, "grad_norm": 0.28674986958503723, "learning_rate": 0.001, "loss": 1.8108, "step": 424368 }, { "epoch": 36.61352657004831, "grad_norm": 0.8129708766937256, "learning_rate": 0.001, "loss": 1.7938, "step": 424424 }, { "epoch": 36.61835748792271, "grad_norm": 0.29786789417266846, "learning_rate": 0.001, "loss": 1.8046, "step": 424480 }, { "epoch": 36.6231884057971, "grad_norm": 0.3867568373680115, "learning_rate": 0.001, "loss": 1.7977, "step": 424536 }, { "epoch": 36.628019323671495, "grad_norm": 0.29274699091911316, "learning_rate": 0.001, "loss": 1.7992, "step": 424592 }, { "epoch": 36.632850241545896, "grad_norm": 0.8310989737510681, "learning_rate": 0.001, "loss": 1.7985, "step": 424648 }, { "epoch": 36.63768115942029, "grad_norm": 1.585929274559021, "learning_rate": 0.001, "loss": 1.8078, "step": 424704 }, { "epoch": 36.64251207729468, "grad_norm": 0.7017877101898193, "learning_rate": 0.001, "loss": 1.8077, "step": 424760 }, { "epoch": 36.64734299516908, "grad_norm": 0.3629225492477417, "learning_rate": 0.001, "loss": 1.8095, "step": 424816 }, { "epoch": 36.65217391304348, "grad_norm": 0.2989633083343506, "learning_rate": 0.001, "loss": 1.8084, "step": 424872 }, { "epoch": 36.65700483091788, "grad_norm": 1.017148733139038, "learning_rate": 0.001, "loss": 1.8, "step": 424928 }, { "epoch": 36.66183574879227, "grad_norm": 0.502622663974762, "learning_rate": 0.001, "loss": 1.8083, "step": 424984 }, { "epoch": 36.666666666666664, "grad_norm": 0.35630670189857483, "learning_rate": 0.001, "loss": 1.8099, "step": 425040 }, { "epoch": 36.671497584541065, "grad_norm": 1.0991458892822266, "learning_rate": 0.001, "loss": 1.8005, "step": 425096 }, { "epoch": 36.67632850241546, "grad_norm": 0.6963735222816467, "learning_rate": 0.001, "loss": 1.8053, "step": 425152 }, { "epoch": 36.68115942028985, "grad_norm": 0.2799973487854004, "learning_rate": 0.001, "loss": 1.8112, "step": 425208 }, { "epoch": 36.68599033816425, "grad_norm": 3.464158773422241, "learning_rate": 0.001, "loss": 1.8115, "step": 425264 }, { "epoch": 36.690821256038646, "grad_norm": 0.6238375902175903, "learning_rate": 0.001, "loss": 1.8061, "step": 425320 }, { "epoch": 36.69565217391305, "grad_norm": 0.5292114019393921, "learning_rate": 0.001, "loss": 1.7986, "step": 425376 }, { "epoch": 36.70048309178744, "grad_norm": 1.1414953470230103, "learning_rate": 0.001, "loss": 1.8019, "step": 425432 }, { "epoch": 36.70531400966183, "grad_norm": 1.150097131729126, "learning_rate": 0.001, "loss": 1.7889, "step": 425488 }, { "epoch": 36.710144927536234, "grad_norm": 5.404383182525635, "learning_rate": 0.001, "loss": 1.8005, "step": 425544 }, { "epoch": 36.71497584541063, "grad_norm": 0.7964653968811035, "learning_rate": 0.001, "loss": 1.8069, "step": 425600 }, { "epoch": 36.71980676328502, "grad_norm": 0.2718655467033386, "learning_rate": 0.001, "loss": 1.7989, "step": 425656 }, { "epoch": 36.72463768115942, "grad_norm": 0.38797837495803833, "learning_rate": 0.001, "loss": 1.8122, "step": 425712 }, { "epoch": 36.729468599033815, "grad_norm": 0.6828791499137878, "learning_rate": 0.001, "loss": 1.8065, "step": 425768 }, { "epoch": 36.734299516908216, "grad_norm": 1.314735770225525, "learning_rate": 0.001, "loss": 1.8129, "step": 425824 }, { "epoch": 36.73913043478261, "grad_norm": 0.4683690667152405, "learning_rate": 0.001, "loss": 1.8095, "step": 425880 }, { "epoch": 36.743961352657, "grad_norm": 0.2769481837749481, "learning_rate": 0.001, "loss": 1.8142, "step": 425936 }, { "epoch": 36.7487922705314, "grad_norm": 1.662924885749817, "learning_rate": 0.001, "loss": 1.8028, "step": 425992 }, { "epoch": 36.7536231884058, "grad_norm": 0.5976274013519287, "learning_rate": 0.001, "loss": 1.7935, "step": 426048 }, { "epoch": 36.75845410628019, "grad_norm": 1.263322114944458, "learning_rate": 0.001, "loss": 1.8041, "step": 426104 }, { "epoch": 36.76328502415459, "grad_norm": 0.3656352162361145, "learning_rate": 0.001, "loss": 1.8042, "step": 426160 }, { "epoch": 36.768115942028984, "grad_norm": 0.5080867409706116, "learning_rate": 0.001, "loss": 1.8072, "step": 426216 }, { "epoch": 36.772946859903385, "grad_norm": 0.34757837653160095, "learning_rate": 0.001, "loss": 1.808, "step": 426272 }, { "epoch": 36.77777777777778, "grad_norm": 0.3898159861564636, "learning_rate": 0.001, "loss": 1.8102, "step": 426328 }, { "epoch": 36.78260869565217, "grad_norm": 0.2943791151046753, "learning_rate": 0.001, "loss": 1.8095, "step": 426384 }, { "epoch": 36.78743961352657, "grad_norm": 0.6318961381912231, "learning_rate": 0.001, "loss": 1.816, "step": 426440 }, { "epoch": 36.792270531400966, "grad_norm": 0.43459683656692505, "learning_rate": 0.001, "loss": 1.8065, "step": 426496 }, { "epoch": 36.79710144927536, "grad_norm": 0.2659796476364136, "learning_rate": 0.001, "loss": 1.8201, "step": 426552 }, { "epoch": 36.80193236714976, "grad_norm": 0.8089146614074707, "learning_rate": 0.001, "loss": 1.8235, "step": 426608 }, { "epoch": 36.806763285024154, "grad_norm": 0.2565155029296875, "learning_rate": 0.001, "loss": 1.8061, "step": 426664 }, { "epoch": 36.81159420289855, "grad_norm": 0.35102909803390503, "learning_rate": 0.001, "loss": 1.8084, "step": 426720 }, { "epoch": 36.81642512077295, "grad_norm": 0.5686193108558655, "learning_rate": 0.001, "loss": 1.8143, "step": 426776 }, { "epoch": 36.82125603864734, "grad_norm": 0.5286378860473633, "learning_rate": 0.001, "loss": 1.8079, "step": 426832 }, { "epoch": 36.82608695652174, "grad_norm": 0.6484923958778381, "learning_rate": 0.001, "loss": 1.8037, "step": 426888 }, { "epoch": 36.830917874396135, "grad_norm": 0.5301809310913086, "learning_rate": 0.001, "loss": 1.7958, "step": 426944 }, { "epoch": 36.83574879227053, "grad_norm": 0.38552746176719666, "learning_rate": 0.001, "loss": 1.8119, "step": 427000 }, { "epoch": 36.84057971014493, "grad_norm": 0.7235148549079895, "learning_rate": 0.001, "loss": 1.8102, "step": 427056 }, { "epoch": 36.84541062801932, "grad_norm": 0.6579404473304749, "learning_rate": 0.001, "loss": 1.8123, "step": 427112 }, { "epoch": 36.85024154589372, "grad_norm": 1.8766202926635742, "learning_rate": 0.001, "loss": 1.8374, "step": 427168 }, { "epoch": 36.85507246376812, "grad_norm": 0.3992861211299896, "learning_rate": 0.001, "loss": 1.8374, "step": 427224 }, { "epoch": 36.85990338164251, "grad_norm": 1.6838467121124268, "learning_rate": 0.001, "loss": 1.8244, "step": 427280 }, { "epoch": 36.86473429951691, "grad_norm": 0.29545336961746216, "learning_rate": 0.001, "loss": 1.8143, "step": 427336 }, { "epoch": 36.869565217391305, "grad_norm": 1.104663610458374, "learning_rate": 0.001, "loss": 1.8053, "step": 427392 }, { "epoch": 36.8743961352657, "grad_norm": 0.2821395695209503, "learning_rate": 0.001, "loss": 1.8073, "step": 427448 }, { "epoch": 36.8792270531401, "grad_norm": 0.3605702817440033, "learning_rate": 0.001, "loss": 1.8081, "step": 427504 }, { "epoch": 36.88405797101449, "grad_norm": 0.33109819889068604, "learning_rate": 0.001, "loss": 1.8139, "step": 427560 }, { "epoch": 36.888888888888886, "grad_norm": 0.6582963466644287, "learning_rate": 0.001, "loss": 1.8093, "step": 427616 }, { "epoch": 36.893719806763286, "grad_norm": 0.33953168988227844, "learning_rate": 0.001, "loss": 1.819, "step": 427672 }, { "epoch": 36.89855072463768, "grad_norm": 1.5330380201339722, "learning_rate": 0.001, "loss": 1.8527, "step": 427728 }, { "epoch": 36.90338164251208, "grad_norm": 0.5256544947624207, "learning_rate": 0.001, "loss": 1.8125, "step": 427784 }, { "epoch": 36.908212560386474, "grad_norm": 0.2494855523109436, "learning_rate": 0.001, "loss": 1.8136, "step": 427840 }, { "epoch": 36.91304347826087, "grad_norm": 0.2984012961387634, "learning_rate": 0.001, "loss": 1.8136, "step": 427896 }, { "epoch": 36.91787439613527, "grad_norm": 6.267683029174805, "learning_rate": 0.001, "loss": 1.8028, "step": 427952 }, { "epoch": 36.92270531400966, "grad_norm": 0.5449560880661011, "learning_rate": 0.001, "loss": 1.8178, "step": 428008 }, { "epoch": 36.927536231884055, "grad_norm": 0.2684186100959778, "learning_rate": 0.001, "loss": 1.8222, "step": 428064 }, { "epoch": 36.932367149758456, "grad_norm": 5.631681442260742, "learning_rate": 0.001, "loss": 1.8066, "step": 428120 }, { "epoch": 36.93719806763285, "grad_norm": 0.2734910249710083, "learning_rate": 0.001, "loss": 1.8114, "step": 428176 }, { "epoch": 36.94202898550725, "grad_norm": 0.31353959441185, "learning_rate": 0.001, "loss": 1.7969, "step": 428232 }, { "epoch": 36.94685990338164, "grad_norm": 0.3809851408004761, "learning_rate": 0.001, "loss": 1.807, "step": 428288 }, { "epoch": 36.95169082125604, "grad_norm": 0.2942412197589874, "learning_rate": 0.001, "loss": 1.8062, "step": 428344 }, { "epoch": 36.95652173913044, "grad_norm": 0.42585289478302, "learning_rate": 0.001, "loss": 1.8037, "step": 428400 }, { "epoch": 36.96135265700483, "grad_norm": 0.4983316957950592, "learning_rate": 0.001, "loss": 1.797, "step": 428456 }, { "epoch": 36.966183574879224, "grad_norm": 0.6737427115440369, "learning_rate": 0.001, "loss": 1.8007, "step": 428512 }, { "epoch": 36.971014492753625, "grad_norm": 4.854911804199219, "learning_rate": 0.001, "loss": 1.803, "step": 428568 }, { "epoch": 36.97584541062802, "grad_norm": 1.1266233921051025, "learning_rate": 0.001, "loss": 1.8027, "step": 428624 }, { "epoch": 36.98067632850242, "grad_norm": 0.8535734415054321, "learning_rate": 0.001, "loss": 1.8232, "step": 428680 }, { "epoch": 36.98550724637681, "grad_norm": 3.8211865425109863, "learning_rate": 0.001, "loss": 1.8207, "step": 428736 }, { "epoch": 36.990338164251206, "grad_norm": 0.5237981677055359, "learning_rate": 0.001, "loss": 1.802, "step": 428792 }, { "epoch": 36.99516908212561, "grad_norm": 0.5662748217582703, "learning_rate": 0.001, "loss": 1.8176, "step": 428848 }, { "epoch": 37.0, "grad_norm": 2.2902872562408447, "learning_rate": 0.001, "loss": 1.8188, "step": 428904 }, { "epoch": 37.00483091787439, "grad_norm": 0.8495247960090637, "learning_rate": 0.001, "loss": 1.775, "step": 428960 }, { "epoch": 37.009661835748794, "grad_norm": 0.8331407308578491, "learning_rate": 0.001, "loss": 1.7783, "step": 429016 }, { "epoch": 37.01449275362319, "grad_norm": 0.9239394068717957, "learning_rate": 0.001, "loss": 1.7812, "step": 429072 }, { "epoch": 37.01932367149758, "grad_norm": 0.9420920014381409, "learning_rate": 0.001, "loss": 1.7809, "step": 429128 }, { "epoch": 37.02415458937198, "grad_norm": 1.253441333770752, "learning_rate": 0.001, "loss": 1.7789, "step": 429184 }, { "epoch": 37.028985507246375, "grad_norm": 11.27698802947998, "learning_rate": 0.001, "loss": 1.7901, "step": 429240 }, { "epoch": 37.033816425120776, "grad_norm": 1.17238187789917, "learning_rate": 0.001, "loss": 1.799, "step": 429296 }, { "epoch": 37.03864734299517, "grad_norm": 1.3603156805038452, "learning_rate": 0.001, "loss": 1.8049, "step": 429352 }, { "epoch": 37.04347826086956, "grad_norm": 1.2879185676574707, "learning_rate": 0.001, "loss": 1.8043, "step": 429408 }, { "epoch": 37.04830917874396, "grad_norm": 0.2640153765678406, "learning_rate": 0.001, "loss": 1.7995, "step": 429464 }, { "epoch": 37.05314009661836, "grad_norm": 0.8613932132720947, "learning_rate": 0.001, "loss": 1.783, "step": 429520 }, { "epoch": 37.05797101449275, "grad_norm": 3.0839366912841797, "learning_rate": 0.001, "loss": 1.7828, "step": 429576 }, { "epoch": 37.06280193236715, "grad_norm": 0.9025628566741943, "learning_rate": 0.001, "loss": 1.7917, "step": 429632 }, { "epoch": 37.067632850241544, "grad_norm": 1.1744378805160522, "learning_rate": 0.001, "loss": 1.7927, "step": 429688 }, { "epoch": 37.072463768115945, "grad_norm": 0.666778028011322, "learning_rate": 0.001, "loss": 1.7883, "step": 429744 }, { "epoch": 37.07729468599034, "grad_norm": 1.390675663948059, "learning_rate": 0.001, "loss": 1.8038, "step": 429800 }, { "epoch": 37.08212560386473, "grad_norm": 1.0713013410568237, "learning_rate": 0.001, "loss": 1.7908, "step": 429856 }, { "epoch": 37.08695652173913, "grad_norm": 1.0665810108184814, "learning_rate": 0.001, "loss": 1.7945, "step": 429912 }, { "epoch": 37.091787439613526, "grad_norm": 0.47662216424942017, "learning_rate": 0.001, "loss": 1.7993, "step": 429968 }, { "epoch": 37.09661835748792, "grad_norm": 0.25070974230766296, "learning_rate": 0.001, "loss": 1.7874, "step": 430024 }, { "epoch": 37.10144927536232, "grad_norm": 4.5656633377075195, "learning_rate": 0.001, "loss": 1.7946, "step": 430080 }, { "epoch": 37.106280193236714, "grad_norm": 0.3864899277687073, "learning_rate": 0.001, "loss": 1.8021, "step": 430136 }, { "epoch": 37.111111111111114, "grad_norm": 0.8839205503463745, "learning_rate": 0.001, "loss": 1.8033, "step": 430192 }, { "epoch": 37.11594202898551, "grad_norm": 2.609945774078369, "learning_rate": 0.001, "loss": 1.7942, "step": 430248 }, { "epoch": 37.1207729468599, "grad_norm": 0.31405994296073914, "learning_rate": 0.001, "loss": 1.8031, "step": 430304 }, { "epoch": 37.1256038647343, "grad_norm": 0.5954448580741882, "learning_rate": 0.001, "loss": 1.7905, "step": 430360 }, { "epoch": 37.130434782608695, "grad_norm": 0.6253069043159485, "learning_rate": 0.001, "loss": 1.7963, "step": 430416 }, { "epoch": 37.13526570048309, "grad_norm": 0.607184112071991, "learning_rate": 0.001, "loss": 1.7978, "step": 430472 }, { "epoch": 37.14009661835749, "grad_norm": 1.179418921470642, "learning_rate": 0.001, "loss": 1.7856, "step": 430528 }, { "epoch": 37.14492753623188, "grad_norm": 1.3099792003631592, "learning_rate": 0.001, "loss": 1.7841, "step": 430584 }, { "epoch": 37.14975845410628, "grad_norm": 4.476251602172852, "learning_rate": 0.001, "loss": 1.7907, "step": 430640 }, { "epoch": 37.15458937198068, "grad_norm": 0.4609124958515167, "learning_rate": 0.001, "loss": 1.7949, "step": 430696 }, { "epoch": 37.15942028985507, "grad_norm": 0.329669326543808, "learning_rate": 0.001, "loss": 1.789, "step": 430752 }, { "epoch": 37.16425120772947, "grad_norm": 0.7025365829467773, "learning_rate": 0.001, "loss": 1.7933, "step": 430808 }, { "epoch": 37.169082125603865, "grad_norm": 0.4039738178253174, "learning_rate": 0.001, "loss": 1.8042, "step": 430864 }, { "epoch": 37.17391304347826, "grad_norm": 0.42757540941238403, "learning_rate": 0.001, "loss": 1.8012, "step": 430920 }, { "epoch": 37.17874396135266, "grad_norm": 0.4611194133758545, "learning_rate": 0.001, "loss": 1.8018, "step": 430976 }, { "epoch": 37.18357487922705, "grad_norm": 0.293576180934906, "learning_rate": 0.001, "loss": 1.8003, "step": 431032 }, { "epoch": 37.18840579710145, "grad_norm": 0.27772918343544006, "learning_rate": 0.001, "loss": 1.8137, "step": 431088 }, { "epoch": 37.193236714975846, "grad_norm": 0.37366431951522827, "learning_rate": 0.001, "loss": 1.8152, "step": 431144 }, { "epoch": 37.19806763285024, "grad_norm": 0.3329792618751526, "learning_rate": 0.001, "loss": 1.8063, "step": 431200 }, { "epoch": 37.20289855072464, "grad_norm": 0.41718485951423645, "learning_rate": 0.001, "loss": 1.7985, "step": 431256 }, { "epoch": 37.207729468599034, "grad_norm": 0.40818530321121216, "learning_rate": 0.001, "loss": 1.7845, "step": 431312 }, { "epoch": 37.21256038647343, "grad_norm": 2.585310697555542, "learning_rate": 0.001, "loss": 1.7871, "step": 431368 }, { "epoch": 37.21739130434783, "grad_norm": 0.973480761051178, "learning_rate": 0.001, "loss": 1.7868, "step": 431424 }, { "epoch": 37.22222222222222, "grad_norm": 0.24061378836631775, "learning_rate": 0.001, "loss": 1.7902, "step": 431480 }, { "epoch": 37.227053140096615, "grad_norm": 0.8591878414154053, "learning_rate": 0.001, "loss": 1.7931, "step": 431536 }, { "epoch": 37.231884057971016, "grad_norm": 0.6007813215255737, "learning_rate": 0.001, "loss": 1.798, "step": 431592 }, { "epoch": 37.23671497584541, "grad_norm": 0.4660511314868927, "learning_rate": 0.001, "loss": 1.7982, "step": 431648 }, { "epoch": 37.24154589371981, "grad_norm": 0.4099147319793701, "learning_rate": 0.001, "loss": 1.8043, "step": 431704 }, { "epoch": 37.2463768115942, "grad_norm": 0.3252065181732178, "learning_rate": 0.001, "loss": 1.802, "step": 431760 }, { "epoch": 37.2512077294686, "grad_norm": 0.38114872574806213, "learning_rate": 0.001, "loss": 1.8052, "step": 431816 }, { "epoch": 37.256038647343, "grad_norm": 1.8023308515548706, "learning_rate": 0.001, "loss": 1.8165, "step": 431872 }, { "epoch": 37.26086956521739, "grad_norm": 0.3416430950164795, "learning_rate": 0.001, "loss": 1.8023, "step": 431928 }, { "epoch": 37.265700483091784, "grad_norm": 0.28233957290649414, "learning_rate": 0.001, "loss": 1.8145, "step": 431984 }, { "epoch": 37.270531400966185, "grad_norm": 2.651761293411255, "learning_rate": 0.001, "loss": 1.7969, "step": 432040 }, { "epoch": 37.27536231884058, "grad_norm": 1.3144161701202393, "learning_rate": 0.001, "loss": 1.8034, "step": 432096 }, { "epoch": 37.28019323671498, "grad_norm": 0.48560410737991333, "learning_rate": 0.001, "loss": 1.7999, "step": 432152 }, { "epoch": 37.28502415458937, "grad_norm": 2.9764418601989746, "learning_rate": 0.001, "loss": 1.7947, "step": 432208 }, { "epoch": 37.289855072463766, "grad_norm": 0.43643227219581604, "learning_rate": 0.001, "loss": 1.8198, "step": 432264 }, { "epoch": 37.29468599033817, "grad_norm": 0.27631834149360657, "learning_rate": 0.001, "loss": 1.8048, "step": 432320 }, { "epoch": 37.29951690821256, "grad_norm": 2.0431299209594727, "learning_rate": 0.001, "loss": 1.8011, "step": 432376 }, { "epoch": 37.30434782608695, "grad_norm": 0.9546424746513367, "learning_rate": 0.001, "loss": 1.7984, "step": 432432 }, { "epoch": 37.309178743961354, "grad_norm": 0.5638821721076965, "learning_rate": 0.001, "loss": 1.8115, "step": 432488 }, { "epoch": 37.31400966183575, "grad_norm": 0.2640678584575653, "learning_rate": 0.001, "loss": 1.8161, "step": 432544 }, { "epoch": 37.31884057971015, "grad_norm": 0.3586031198501587, "learning_rate": 0.001, "loss": 1.8118, "step": 432600 }, { "epoch": 37.32367149758454, "grad_norm": 1.2178837060928345, "learning_rate": 0.001, "loss": 1.8083, "step": 432656 }, { "epoch": 37.328502415458935, "grad_norm": 0.4629884660243988, "learning_rate": 0.001, "loss": 1.7981, "step": 432712 }, { "epoch": 37.333333333333336, "grad_norm": 0.9612137079238892, "learning_rate": 0.001, "loss": 1.8076, "step": 432768 }, { "epoch": 37.33816425120773, "grad_norm": 1.936285138130188, "learning_rate": 0.001, "loss": 1.804, "step": 432824 }, { "epoch": 37.34299516908212, "grad_norm": 0.35145169496536255, "learning_rate": 0.001, "loss": 1.7977, "step": 432880 }, { "epoch": 37.34782608695652, "grad_norm": 0.29318633675575256, "learning_rate": 0.001, "loss": 1.8123, "step": 432936 }, { "epoch": 37.35265700483092, "grad_norm": 5.442259788513184, "learning_rate": 0.001, "loss": 1.8193, "step": 432992 }, { "epoch": 37.35748792270532, "grad_norm": 0.3427492082118988, "learning_rate": 0.001, "loss": 1.8012, "step": 433048 }, { "epoch": 37.36231884057971, "grad_norm": 0.6948018670082092, "learning_rate": 0.001, "loss": 1.7986, "step": 433104 }, { "epoch": 37.367149758454104, "grad_norm": 3.041978120803833, "learning_rate": 0.001, "loss": 1.8059, "step": 433160 }, { "epoch": 37.371980676328505, "grad_norm": 0.5605401396751404, "learning_rate": 0.001, "loss": 1.8107, "step": 433216 }, { "epoch": 37.3768115942029, "grad_norm": 0.474217027425766, "learning_rate": 0.001, "loss": 1.804, "step": 433272 }, { "epoch": 37.38164251207729, "grad_norm": 0.9622037410736084, "learning_rate": 0.001, "loss": 1.7973, "step": 433328 }, { "epoch": 37.38647342995169, "grad_norm": 0.6046218276023865, "learning_rate": 0.001, "loss": 1.7996, "step": 433384 }, { "epoch": 37.391304347826086, "grad_norm": 0.431861013174057, "learning_rate": 0.001, "loss": 1.8021, "step": 433440 }, { "epoch": 37.39613526570048, "grad_norm": 0.5137316584587097, "learning_rate": 0.001, "loss": 1.7906, "step": 433496 }, { "epoch": 37.40096618357488, "grad_norm": 0.7693114280700684, "learning_rate": 0.001, "loss": 1.7961, "step": 433552 }, { "epoch": 37.405797101449274, "grad_norm": 3.450913906097412, "learning_rate": 0.001, "loss": 1.7956, "step": 433608 }, { "epoch": 37.410628019323674, "grad_norm": 0.5693427324295044, "learning_rate": 0.001, "loss": 1.7947, "step": 433664 }, { "epoch": 37.41545893719807, "grad_norm": 0.4693599045276642, "learning_rate": 0.001, "loss": 1.7928, "step": 433720 }, { "epoch": 37.42028985507246, "grad_norm": 0.5289928913116455, "learning_rate": 0.001, "loss": 1.7972, "step": 433776 }, { "epoch": 37.42512077294686, "grad_norm": 0.7103660702705383, "learning_rate": 0.001, "loss": 1.7978, "step": 433832 }, { "epoch": 37.429951690821255, "grad_norm": 0.40108683705329895, "learning_rate": 0.001, "loss": 1.8, "step": 433888 }, { "epoch": 37.43478260869565, "grad_norm": 0.39831897616386414, "learning_rate": 0.001, "loss": 1.8046, "step": 433944 }, { "epoch": 37.43961352657005, "grad_norm": 0.31081414222717285, "learning_rate": 0.001, "loss": 1.809, "step": 434000 }, { "epoch": 37.44444444444444, "grad_norm": 0.352687805891037, "learning_rate": 0.001, "loss": 1.8034, "step": 434056 }, { "epoch": 37.44927536231884, "grad_norm": 0.2835441827774048, "learning_rate": 0.001, "loss": 1.7932, "step": 434112 }, { "epoch": 37.45410628019324, "grad_norm": 0.41584137082099915, "learning_rate": 0.001, "loss": 1.7963, "step": 434168 }, { "epoch": 37.45893719806763, "grad_norm": 0.6223008632659912, "learning_rate": 0.001, "loss": 1.7895, "step": 434224 }, { "epoch": 37.46376811594203, "grad_norm": 0.583615779876709, "learning_rate": 0.001, "loss": 1.7862, "step": 434280 }, { "epoch": 37.468599033816425, "grad_norm": 0.6217259168624878, "learning_rate": 0.001, "loss": 1.785, "step": 434336 }, { "epoch": 37.47342995169082, "grad_norm": 0.5850480794906616, "learning_rate": 0.001, "loss": 1.8086, "step": 434392 }, { "epoch": 37.47826086956522, "grad_norm": 0.6462048888206482, "learning_rate": 0.001, "loss": 1.8152, "step": 434448 }, { "epoch": 37.48309178743961, "grad_norm": 0.29878631234169006, "learning_rate": 0.001, "loss": 1.8147, "step": 434504 }, { "epoch": 37.48792270531401, "grad_norm": 0.536068856716156, "learning_rate": 0.001, "loss": 1.7956, "step": 434560 }, { "epoch": 37.492753623188406, "grad_norm": 4.235079288482666, "learning_rate": 0.001, "loss": 1.7907, "step": 434616 }, { "epoch": 37.4975845410628, "grad_norm": 3.0241127014160156, "learning_rate": 0.001, "loss": 1.803, "step": 434672 }, { "epoch": 37.5024154589372, "grad_norm": 0.44283756613731384, "learning_rate": 0.001, "loss": 1.8089, "step": 434728 }, { "epoch": 37.507246376811594, "grad_norm": 1.614266276359558, "learning_rate": 0.001, "loss": 1.8133, "step": 434784 }, { "epoch": 37.51207729468599, "grad_norm": 1.0691124200820923, "learning_rate": 0.001, "loss": 1.8129, "step": 434840 }, { "epoch": 37.51690821256039, "grad_norm": 0.4427920877933502, "learning_rate": 0.001, "loss": 1.8094, "step": 434896 }, { "epoch": 37.52173913043478, "grad_norm": 0.7578710317611694, "learning_rate": 0.001, "loss": 1.8117, "step": 434952 }, { "epoch": 37.52657004830918, "grad_norm": 0.7158226370811462, "learning_rate": 0.001, "loss": 1.8096, "step": 435008 }, { "epoch": 37.531400966183575, "grad_norm": 0.2901918590068817, "learning_rate": 0.001, "loss": 1.8226, "step": 435064 }, { "epoch": 37.53623188405797, "grad_norm": 2.127894163131714, "learning_rate": 0.001, "loss": 1.8172, "step": 435120 }, { "epoch": 37.54106280193237, "grad_norm": 0.6813138723373413, "learning_rate": 0.001, "loss": 1.8341, "step": 435176 }, { "epoch": 37.54589371980676, "grad_norm": 5.986727714538574, "learning_rate": 0.001, "loss": 1.8368, "step": 435232 }, { "epoch": 37.55072463768116, "grad_norm": 2.1489498615264893, "learning_rate": 0.001, "loss": 1.8319, "step": 435288 }, { "epoch": 37.55555555555556, "grad_norm": 0.4706360995769501, "learning_rate": 0.001, "loss": 1.8235, "step": 435344 }, { "epoch": 37.56038647342995, "grad_norm": 0.3429426848888397, "learning_rate": 0.001, "loss": 1.8246, "step": 435400 }, { "epoch": 37.56521739130435, "grad_norm": 0.8397558927536011, "learning_rate": 0.001, "loss": 1.8211, "step": 435456 }, { "epoch": 37.570048309178745, "grad_norm": 0.3322698771953583, "learning_rate": 0.001, "loss": 1.8294, "step": 435512 }, { "epoch": 37.57487922705314, "grad_norm": 0.5323711633682251, "learning_rate": 0.001, "loss": 1.8247, "step": 435568 }, { "epoch": 37.57971014492754, "grad_norm": 3.6785521507263184, "learning_rate": 0.001, "loss": 1.8132, "step": 435624 }, { "epoch": 37.58454106280193, "grad_norm": 0.3592097759246826, "learning_rate": 0.001, "loss": 1.8153, "step": 435680 }, { "epoch": 37.589371980676326, "grad_norm": 0.3200095295906067, "learning_rate": 0.001, "loss": 1.8093, "step": 435736 }, { "epoch": 37.594202898550726, "grad_norm": 0.38976943492889404, "learning_rate": 0.001, "loss": 1.821, "step": 435792 }, { "epoch": 37.59903381642512, "grad_norm": 1.1410948038101196, "learning_rate": 0.001, "loss": 1.8096, "step": 435848 }, { "epoch": 37.60386473429952, "grad_norm": 0.6095148324966431, "learning_rate": 0.001, "loss": 1.8093, "step": 435904 }, { "epoch": 37.608695652173914, "grad_norm": 18.66442108154297, "learning_rate": 0.001, "loss": 1.8018, "step": 435960 }, { "epoch": 37.61352657004831, "grad_norm": 0.5409610867500305, "learning_rate": 0.001, "loss": 1.8067, "step": 436016 }, { "epoch": 37.61835748792271, "grad_norm": 1.8446381092071533, "learning_rate": 0.001, "loss": 1.8061, "step": 436072 }, { "epoch": 37.6231884057971, "grad_norm": 0.6559701561927795, "learning_rate": 0.001, "loss": 1.8105, "step": 436128 }, { "epoch": 37.628019323671495, "grad_norm": 9.215534210205078, "learning_rate": 0.001, "loss": 1.8133, "step": 436184 }, { "epoch": 37.632850241545896, "grad_norm": 1.4116722345352173, "learning_rate": 0.001, "loss": 1.8015, "step": 436240 }, { "epoch": 37.63768115942029, "grad_norm": 0.5056512951850891, "learning_rate": 0.001, "loss": 1.7962, "step": 436296 }, { "epoch": 37.64251207729468, "grad_norm": 4.850395202636719, "learning_rate": 0.001, "loss": 1.8011, "step": 436352 }, { "epoch": 37.64734299516908, "grad_norm": 0.42203807830810547, "learning_rate": 0.001, "loss": 1.7961, "step": 436408 }, { "epoch": 37.65217391304348, "grad_norm": 1.1552599668502808, "learning_rate": 0.001, "loss": 1.8017, "step": 436464 }, { "epoch": 37.65700483091788, "grad_norm": 3.7905099391937256, "learning_rate": 0.001, "loss": 1.8094, "step": 436520 }, { "epoch": 37.66183574879227, "grad_norm": 2.45000958442688, "learning_rate": 0.001, "loss": 1.8208, "step": 436576 }, { "epoch": 37.666666666666664, "grad_norm": 6.391603946685791, "learning_rate": 0.001, "loss": 1.8252, "step": 436632 }, { "epoch": 37.671497584541065, "grad_norm": 2.3429369926452637, "learning_rate": 0.001, "loss": 1.8248, "step": 436688 }, { "epoch": 37.67632850241546, "grad_norm": 0.877004086971283, "learning_rate": 0.001, "loss": 1.8201, "step": 436744 }, { "epoch": 37.68115942028985, "grad_norm": 1.5893133878707886, "learning_rate": 0.001, "loss": 1.8314, "step": 436800 }, { "epoch": 37.68599033816425, "grad_norm": 0.9538088440895081, "learning_rate": 0.001, "loss": 1.8352, "step": 436856 }, { "epoch": 37.690821256038646, "grad_norm": 2.330740451812744, "learning_rate": 0.001, "loss": 1.8218, "step": 436912 }, { "epoch": 37.69565217391305, "grad_norm": 0.978886604309082, "learning_rate": 0.001, "loss": 1.8295, "step": 436968 }, { "epoch": 37.70048309178744, "grad_norm": 0.2669030427932739, "learning_rate": 0.001, "loss": 1.8212, "step": 437024 }, { "epoch": 37.70531400966183, "grad_norm": 0.3462893068790436, "learning_rate": 0.001, "loss": 1.8221, "step": 437080 }, { "epoch": 37.710144927536234, "grad_norm": 3.944455623626709, "learning_rate": 0.001, "loss": 1.8215, "step": 437136 }, { "epoch": 37.71497584541063, "grad_norm": 1.0964136123657227, "learning_rate": 0.001, "loss": 1.8406, "step": 437192 }, { "epoch": 37.71980676328502, "grad_norm": 0.624636709690094, "learning_rate": 0.001, "loss": 1.8371, "step": 437248 }, { "epoch": 37.72463768115942, "grad_norm": 0.44444236159324646, "learning_rate": 0.001, "loss": 1.8433, "step": 437304 }, { "epoch": 37.729468599033815, "grad_norm": 1.2471266984939575, "learning_rate": 0.001, "loss": 1.8325, "step": 437360 }, { "epoch": 37.734299516908216, "grad_norm": 0.4111259877681732, "learning_rate": 0.001, "loss": 1.8339, "step": 437416 }, { "epoch": 37.73913043478261, "grad_norm": 0.7006278038024902, "learning_rate": 0.001, "loss": 1.8322, "step": 437472 }, { "epoch": 37.743961352657, "grad_norm": 5.651107311248779, "learning_rate": 0.001, "loss": 1.8262, "step": 437528 }, { "epoch": 37.7487922705314, "grad_norm": 0.7450050115585327, "learning_rate": 0.001, "loss": 1.8208, "step": 437584 }, { "epoch": 37.7536231884058, "grad_norm": 0.4873601794242859, "learning_rate": 0.001, "loss": 1.8117, "step": 437640 }, { "epoch": 37.75845410628019, "grad_norm": 4.328714847564697, "learning_rate": 0.001, "loss": 1.8181, "step": 437696 }, { "epoch": 37.76328502415459, "grad_norm": 3.2995386123657227, "learning_rate": 0.001, "loss": 1.8228, "step": 437752 }, { "epoch": 37.768115942028984, "grad_norm": 0.4106883704662323, "learning_rate": 0.001, "loss": 1.8327, "step": 437808 }, { "epoch": 37.772946859903385, "grad_norm": 4.684328556060791, "learning_rate": 0.001, "loss": 1.8316, "step": 437864 }, { "epoch": 37.77777777777778, "grad_norm": 2.6593568325042725, "learning_rate": 0.001, "loss": 1.8191, "step": 437920 }, { "epoch": 37.78260869565217, "grad_norm": 1.1616255044937134, "learning_rate": 0.001, "loss": 1.8286, "step": 437976 }, { "epoch": 37.78743961352657, "grad_norm": 0.3611791431903839, "learning_rate": 0.001, "loss": 1.8464, "step": 438032 }, { "epoch": 37.792270531400966, "grad_norm": 1.3245964050292969, "learning_rate": 0.001, "loss": 1.8454, "step": 438088 }, { "epoch": 37.79710144927536, "grad_norm": 2.312638998031616, "learning_rate": 0.001, "loss": 1.8241, "step": 438144 }, { "epoch": 37.80193236714976, "grad_norm": 0.4818379282951355, "learning_rate": 0.001, "loss": 1.8168, "step": 438200 }, { "epoch": 37.806763285024154, "grad_norm": 0.840203046798706, "learning_rate": 0.001, "loss": 1.8181, "step": 438256 }, { "epoch": 37.81159420289855, "grad_norm": 1.3683149814605713, "learning_rate": 0.001, "loss": 1.8234, "step": 438312 }, { "epoch": 37.81642512077295, "grad_norm": 0.42114439606666565, "learning_rate": 0.001, "loss": 1.8244, "step": 438368 }, { "epoch": 37.82125603864734, "grad_norm": 2.7872049808502197, "learning_rate": 0.001, "loss": 1.8222, "step": 438424 }, { "epoch": 37.82608695652174, "grad_norm": 0.3368990123271942, "learning_rate": 0.001, "loss": 1.8215, "step": 438480 }, { "epoch": 37.830917874396135, "grad_norm": 2.037951946258545, "learning_rate": 0.001, "loss": 1.8196, "step": 438536 }, { "epoch": 37.83574879227053, "grad_norm": 2.174989700317383, "learning_rate": 0.001, "loss": 1.8128, "step": 438592 }, { "epoch": 37.84057971014493, "grad_norm": 7.981287479400635, "learning_rate": 0.001, "loss": 1.8251, "step": 438648 }, { "epoch": 37.84541062801932, "grad_norm": 0.345838725566864, "learning_rate": 0.001, "loss": 1.8234, "step": 438704 }, { "epoch": 37.85024154589372, "grad_norm": 0.6326851844787598, "learning_rate": 0.001, "loss": 1.8334, "step": 438760 }, { "epoch": 37.85507246376812, "grad_norm": 0.7998091578483582, "learning_rate": 0.001, "loss": 1.8256, "step": 438816 }, { "epoch": 37.85990338164251, "grad_norm": 0.409668505191803, "learning_rate": 0.001, "loss": 1.832, "step": 438872 }, { "epoch": 37.86473429951691, "grad_norm": 0.9348124265670776, "learning_rate": 0.001, "loss": 1.8295, "step": 438928 }, { "epoch": 37.869565217391305, "grad_norm": 1.647357702255249, "learning_rate": 0.001, "loss": 1.8163, "step": 438984 }, { "epoch": 37.8743961352657, "grad_norm": 3.369560718536377, "learning_rate": 0.001, "loss": 1.8163, "step": 439040 }, { "epoch": 37.8792270531401, "grad_norm": 0.2613995373249054, "learning_rate": 0.001, "loss": 1.8299, "step": 439096 }, { "epoch": 37.88405797101449, "grad_norm": 1.1269335746765137, "learning_rate": 0.001, "loss": 1.821, "step": 439152 }, { "epoch": 37.888888888888886, "grad_norm": 1.0456128120422363, "learning_rate": 0.001, "loss": 1.8262, "step": 439208 }, { "epoch": 37.893719806763286, "grad_norm": 0.6296815872192383, "learning_rate": 0.001, "loss": 1.8157, "step": 439264 }, { "epoch": 37.89855072463768, "grad_norm": 0.9080009460449219, "learning_rate": 0.001, "loss": 1.8186, "step": 439320 }, { "epoch": 37.90338164251208, "grad_norm": 2.7807295322418213, "learning_rate": 0.001, "loss": 1.8136, "step": 439376 }, { "epoch": 37.908212560386474, "grad_norm": 0.3830980956554413, "learning_rate": 0.001, "loss": 1.815, "step": 439432 }, { "epoch": 37.91304347826087, "grad_norm": 1.161338210105896, "learning_rate": 0.001, "loss": 1.8177, "step": 439488 }, { "epoch": 37.91787439613527, "grad_norm": 0.2936462163925171, "learning_rate": 0.001, "loss": 1.8078, "step": 439544 }, { "epoch": 37.92270531400966, "grad_norm": 0.42556387186050415, "learning_rate": 0.001, "loss": 1.8162, "step": 439600 }, { "epoch": 37.927536231884055, "grad_norm": 0.28779345750808716, "learning_rate": 0.001, "loss": 1.8254, "step": 439656 }, { "epoch": 37.932367149758456, "grad_norm": 3.3047831058502197, "learning_rate": 0.001, "loss": 1.8157, "step": 439712 }, { "epoch": 37.93719806763285, "grad_norm": 0.4491784870624542, "learning_rate": 0.001, "loss": 1.8183, "step": 439768 }, { "epoch": 37.94202898550725, "grad_norm": 0.6522452235221863, "learning_rate": 0.001, "loss": 1.8112, "step": 439824 }, { "epoch": 37.94685990338164, "grad_norm": 4.115870952606201, "learning_rate": 0.001, "loss": 1.8056, "step": 439880 }, { "epoch": 37.95169082125604, "grad_norm": 0.29169243574142456, "learning_rate": 0.001, "loss": 1.8111, "step": 439936 }, { "epoch": 37.95652173913044, "grad_norm": 0.29916447401046753, "learning_rate": 0.001, "loss": 1.8048, "step": 439992 }, { "epoch": 37.96135265700483, "grad_norm": 0.3145538568496704, "learning_rate": 0.001, "loss": 1.8082, "step": 440048 }, { "epoch": 37.966183574879224, "grad_norm": 1.2632999420166016, "learning_rate": 0.001, "loss": 1.8085, "step": 440104 }, { "epoch": 37.971014492753625, "grad_norm": 0.5762110948562622, "learning_rate": 0.001, "loss": 1.8175, "step": 440160 }, { "epoch": 37.97584541062802, "grad_norm": 0.4545021951198578, "learning_rate": 0.001, "loss": 1.8072, "step": 440216 }, { "epoch": 37.98067632850242, "grad_norm": 0.355076402425766, "learning_rate": 0.001, "loss": 1.8085, "step": 440272 }, { "epoch": 37.98550724637681, "grad_norm": 0.864734947681427, "learning_rate": 0.001, "loss": 1.8002, "step": 440328 }, { "epoch": 37.990338164251206, "grad_norm": 0.3194849193096161, "learning_rate": 0.001, "loss": 1.8037, "step": 440384 }, { "epoch": 37.99516908212561, "grad_norm": 3.473010540008545, "learning_rate": 0.001, "loss": 1.8042, "step": 440440 }, { "epoch": 38.0, "grad_norm": 0.332484632730484, "learning_rate": 0.001, "loss": 1.8108, "step": 440496 }, { "epoch": 38.00483091787439, "grad_norm": 0.2825302183628082, "learning_rate": 0.001, "loss": 1.7689, "step": 440552 }, { "epoch": 38.009661835748794, "grad_norm": 0.2533167004585266, "learning_rate": 0.001, "loss": 1.7675, "step": 440608 }, { "epoch": 38.01449275362319, "grad_norm": 0.35121768712997437, "learning_rate": 0.001, "loss": 1.774, "step": 440664 }, { "epoch": 38.01932367149758, "grad_norm": 0.6180436611175537, "learning_rate": 0.001, "loss": 1.7751, "step": 440720 }, { "epoch": 38.02415458937198, "grad_norm": 1.3262364864349365, "learning_rate": 0.001, "loss": 1.7715, "step": 440776 }, { "epoch": 38.028985507246375, "grad_norm": 0.6194058656692505, "learning_rate": 0.001, "loss": 1.7787, "step": 440832 }, { "epoch": 38.033816425120776, "grad_norm": 1.1052429676055908, "learning_rate": 0.001, "loss": 1.7802, "step": 440888 }, { "epoch": 38.03864734299517, "grad_norm": 0.3435405492782593, "learning_rate": 0.001, "loss": 1.7802, "step": 440944 }, { "epoch": 38.04347826086956, "grad_norm": 0.6084678769111633, "learning_rate": 0.001, "loss": 1.7722, "step": 441000 }, { "epoch": 38.04830917874396, "grad_norm": 0.2961541414260864, "learning_rate": 0.001, "loss": 1.7763, "step": 441056 }, { "epoch": 38.05314009661836, "grad_norm": 0.6770376563072205, "learning_rate": 0.001, "loss": 1.7762, "step": 441112 }, { "epoch": 38.05797101449275, "grad_norm": 0.408786416053772, "learning_rate": 0.001, "loss": 1.7786, "step": 441168 }, { "epoch": 38.06280193236715, "grad_norm": 6.636822700500488, "learning_rate": 0.001, "loss": 1.7731, "step": 441224 }, { "epoch": 38.067632850241544, "grad_norm": 0.7064849138259888, "learning_rate": 0.001, "loss": 1.7744, "step": 441280 }, { "epoch": 38.072463768115945, "grad_norm": 0.28918537497520447, "learning_rate": 0.001, "loss": 1.7723, "step": 441336 }, { "epoch": 38.07729468599034, "grad_norm": 2.697930335998535, "learning_rate": 0.001, "loss": 1.7797, "step": 441392 }, { "epoch": 38.08212560386473, "grad_norm": 1.2852022647857666, "learning_rate": 0.001, "loss": 1.8785, "step": 441448 }, { "epoch": 38.08695652173913, "grad_norm": 0.3710173964500427, "learning_rate": 0.001, "loss": 1.792, "step": 441504 }, { "epoch": 38.091787439613526, "grad_norm": 1.2837814092636108, "learning_rate": 0.001, "loss": 1.8071, "step": 441560 }, { "epoch": 38.09661835748792, "grad_norm": 0.7191460132598877, "learning_rate": 0.001, "loss": 1.8064, "step": 441616 }, { "epoch": 38.10144927536232, "grad_norm": 0.28805872797966003, "learning_rate": 0.001, "loss": 1.7816, "step": 441672 }, { "epoch": 38.106280193236714, "grad_norm": 0.2820066809654236, "learning_rate": 0.001, "loss": 1.7803, "step": 441728 }, { "epoch": 38.111111111111114, "grad_norm": 0.4016064703464508, "learning_rate": 0.001, "loss": 1.7923, "step": 441784 }, { "epoch": 38.11594202898551, "grad_norm": 0.364282488822937, "learning_rate": 0.001, "loss": 1.7808, "step": 441840 }, { "epoch": 38.1207729468599, "grad_norm": 0.4857824742794037, "learning_rate": 0.001, "loss": 1.7788, "step": 441896 }, { "epoch": 38.1256038647343, "grad_norm": 1.052676796913147, "learning_rate": 0.001, "loss": 1.7754, "step": 441952 }, { "epoch": 38.130434782608695, "grad_norm": 0.30089664459228516, "learning_rate": 0.001, "loss": 1.7835, "step": 442008 }, { "epoch": 38.13526570048309, "grad_norm": 0.8481897711753845, "learning_rate": 0.001, "loss": 1.7737, "step": 442064 }, { "epoch": 38.14009661835749, "grad_norm": 0.33267903327941895, "learning_rate": 0.001, "loss": 1.7757, "step": 442120 }, { "epoch": 38.14492753623188, "grad_norm": 0.990314245223999, "learning_rate": 0.001, "loss": 1.7883, "step": 442176 }, { "epoch": 38.14975845410628, "grad_norm": 5.142291069030762, "learning_rate": 0.001, "loss": 1.785, "step": 442232 }, { "epoch": 38.15458937198068, "grad_norm": 0.24889129400253296, "learning_rate": 0.001, "loss": 1.778, "step": 442288 }, { "epoch": 38.15942028985507, "grad_norm": 0.291190505027771, "learning_rate": 0.001, "loss": 1.7728, "step": 442344 }, { "epoch": 38.16425120772947, "grad_norm": 0.2937721014022827, "learning_rate": 0.001, "loss": 1.7762, "step": 442400 }, { "epoch": 38.169082125603865, "grad_norm": 1.039170265197754, "learning_rate": 0.001, "loss": 1.785, "step": 442456 }, { "epoch": 38.17391304347826, "grad_norm": 0.39599063992500305, "learning_rate": 0.001, "loss": 1.779, "step": 442512 }, { "epoch": 38.17874396135266, "grad_norm": 1.6691745519638062, "learning_rate": 0.001, "loss": 1.7779, "step": 442568 }, { "epoch": 38.18357487922705, "grad_norm": 0.6476718187332153, "learning_rate": 0.001, "loss": 1.8102, "step": 442624 }, { "epoch": 38.18840579710145, "grad_norm": 0.2565389573574066, "learning_rate": 0.001, "loss": 1.7822, "step": 442680 }, { "epoch": 38.193236714975846, "grad_norm": 1.7020325660705566, "learning_rate": 0.001, "loss": 1.7774, "step": 442736 }, { "epoch": 38.19806763285024, "grad_norm": 0.2932084798812866, "learning_rate": 0.001, "loss": 1.7862, "step": 442792 }, { "epoch": 38.20289855072464, "grad_norm": 1.7871392965316772, "learning_rate": 0.001, "loss": 1.7955, "step": 442848 }, { "epoch": 38.207729468599034, "grad_norm": 0.4320087730884552, "learning_rate": 0.001, "loss": 1.7818, "step": 442904 }, { "epoch": 38.21256038647343, "grad_norm": 5.316151142120361, "learning_rate": 0.001, "loss": 1.7836, "step": 442960 }, { "epoch": 38.21739130434783, "grad_norm": 0.3735989034175873, "learning_rate": 0.001, "loss": 1.7914, "step": 443016 }, { "epoch": 38.22222222222222, "grad_norm": 0.3353160619735718, "learning_rate": 0.001, "loss": 1.7765, "step": 443072 }, { "epoch": 38.227053140096615, "grad_norm": 1.1306794881820679, "learning_rate": 0.001, "loss": 1.779, "step": 443128 }, { "epoch": 38.231884057971016, "grad_norm": 0.3718976378440857, "learning_rate": 0.001, "loss": 1.7741, "step": 443184 }, { "epoch": 38.23671497584541, "grad_norm": 0.314972460269928, "learning_rate": 0.001, "loss": 1.7859, "step": 443240 }, { "epoch": 38.24154589371981, "grad_norm": 0.3694324493408203, "learning_rate": 0.001, "loss": 1.7819, "step": 443296 }, { "epoch": 38.2463768115942, "grad_norm": 0.3555459976196289, "learning_rate": 0.001, "loss": 1.7874, "step": 443352 }, { "epoch": 38.2512077294686, "grad_norm": 0.32692813873291016, "learning_rate": 0.001, "loss": 1.776, "step": 443408 }, { "epoch": 38.256038647343, "grad_norm": 0.34501805901527405, "learning_rate": 0.001, "loss": 1.7816, "step": 443464 }, { "epoch": 38.26086956521739, "grad_norm": 0.30052995681762695, "learning_rate": 0.001, "loss": 1.7749, "step": 443520 }, { "epoch": 38.265700483091784, "grad_norm": 0.461503803730011, "learning_rate": 0.001, "loss": 1.7776, "step": 443576 }, { "epoch": 38.270531400966185, "grad_norm": 0.948097825050354, "learning_rate": 0.001, "loss": 1.7719, "step": 443632 }, { "epoch": 38.27536231884058, "grad_norm": 1.2265740633010864, "learning_rate": 0.001, "loss": 1.7774, "step": 443688 }, { "epoch": 38.28019323671498, "grad_norm": 0.27604612708091736, "learning_rate": 0.001, "loss": 1.7754, "step": 443744 }, { "epoch": 38.28502415458937, "grad_norm": 0.3369390070438385, "learning_rate": 0.001, "loss": 1.7773, "step": 443800 }, { "epoch": 38.289855072463766, "grad_norm": 0.5619394183158875, "learning_rate": 0.001, "loss": 1.7741, "step": 443856 }, { "epoch": 38.29468599033817, "grad_norm": 1.4710376262664795, "learning_rate": 0.001, "loss": 1.7719, "step": 443912 }, { "epoch": 38.29951690821256, "grad_norm": 0.9131851196289062, "learning_rate": 0.001, "loss": 1.7867, "step": 443968 }, { "epoch": 38.30434782608695, "grad_norm": 0.29235509037971497, "learning_rate": 0.001, "loss": 1.7812, "step": 444024 }, { "epoch": 38.309178743961354, "grad_norm": 0.7638712525367737, "learning_rate": 0.001, "loss": 1.7784, "step": 444080 }, { "epoch": 38.31400966183575, "grad_norm": 0.7243824601173401, "learning_rate": 0.001, "loss": 1.7809, "step": 444136 }, { "epoch": 38.31884057971015, "grad_norm": 0.5621315240859985, "learning_rate": 0.001, "loss": 1.7798, "step": 444192 }, { "epoch": 38.32367149758454, "grad_norm": 0.5194664001464844, "learning_rate": 0.001, "loss": 1.782, "step": 444248 }, { "epoch": 38.328502415458935, "grad_norm": 0.25789618492126465, "learning_rate": 0.001, "loss": 1.7753, "step": 444304 }, { "epoch": 38.333333333333336, "grad_norm": 0.3064212501049042, "learning_rate": 0.001, "loss": 1.771, "step": 444360 }, { "epoch": 38.33816425120773, "grad_norm": 0.7157604694366455, "learning_rate": 0.001, "loss": 1.7749, "step": 444416 }, { "epoch": 38.34299516908212, "grad_norm": 0.6054872274398804, "learning_rate": 0.001, "loss": 1.7764, "step": 444472 }, { "epoch": 38.34782608695652, "grad_norm": 0.3910652995109558, "learning_rate": 0.001, "loss": 1.7729, "step": 444528 }, { "epoch": 38.35265700483092, "grad_norm": 2.0726563930511475, "learning_rate": 0.001, "loss": 1.765, "step": 444584 }, { "epoch": 38.35748792270532, "grad_norm": 0.8810856938362122, "learning_rate": 0.001, "loss": 1.775, "step": 444640 }, { "epoch": 38.36231884057971, "grad_norm": 0.35898667573928833, "learning_rate": 0.001, "loss": 1.7724, "step": 444696 }, { "epoch": 38.367149758454104, "grad_norm": 0.291034072637558, "learning_rate": 0.001, "loss": 1.7765, "step": 444752 }, { "epoch": 38.371980676328505, "grad_norm": 0.32668593525886536, "learning_rate": 0.001, "loss": 1.7778, "step": 444808 }, { "epoch": 38.3768115942029, "grad_norm": 0.3217102587223053, "learning_rate": 0.001, "loss": 1.7677, "step": 444864 }, { "epoch": 38.38164251207729, "grad_norm": 0.3390410244464874, "learning_rate": 0.001, "loss": 1.7914, "step": 444920 }, { "epoch": 38.38647342995169, "grad_norm": 13.918383598327637, "learning_rate": 0.001, "loss": 1.808, "step": 444976 }, { "epoch": 38.391304347826086, "grad_norm": 0.9293938279151917, "learning_rate": 0.001, "loss": 1.786, "step": 445032 }, { "epoch": 38.39613526570048, "grad_norm": 0.3694336414337158, "learning_rate": 0.001, "loss": 1.7859, "step": 445088 }, { "epoch": 38.40096618357488, "grad_norm": 0.7076948881149292, "learning_rate": 0.001, "loss": 1.7839, "step": 445144 }, { "epoch": 38.405797101449274, "grad_norm": 1.7282414436340332, "learning_rate": 0.001, "loss": 1.8068, "step": 445200 }, { "epoch": 38.410628019323674, "grad_norm": 1.0252416133880615, "learning_rate": 0.001, "loss": 1.8011, "step": 445256 }, { "epoch": 38.41545893719807, "grad_norm": 2.7271792888641357, "learning_rate": 0.001, "loss": 1.797, "step": 445312 }, { "epoch": 38.42028985507246, "grad_norm": 2.166074275970459, "learning_rate": 0.001, "loss": 1.817, "step": 445368 }, { "epoch": 38.42512077294686, "grad_norm": 0.7717810273170471, "learning_rate": 0.001, "loss": 1.819, "step": 445424 }, { "epoch": 38.429951690821255, "grad_norm": 0.3540348708629608, "learning_rate": 0.001, "loss": 1.7968, "step": 445480 }, { "epoch": 38.43478260869565, "grad_norm": 0.3778064250946045, "learning_rate": 0.001, "loss": 1.7916, "step": 445536 }, { "epoch": 38.43961352657005, "grad_norm": 0.38323456048965454, "learning_rate": 0.001, "loss": 1.7911, "step": 445592 }, { "epoch": 38.44444444444444, "grad_norm": 0.45393216609954834, "learning_rate": 0.001, "loss": 1.796, "step": 445648 }, { "epoch": 38.44927536231884, "grad_norm": 0.28253409266471863, "learning_rate": 0.001, "loss": 1.793, "step": 445704 }, { "epoch": 38.45410628019324, "grad_norm": 1.1290203332901, "learning_rate": 0.001, "loss": 1.7912, "step": 445760 }, { "epoch": 38.45893719806763, "grad_norm": 1.317454218864441, "learning_rate": 0.001, "loss": 1.7873, "step": 445816 }, { "epoch": 38.46376811594203, "grad_norm": 0.3048466742038727, "learning_rate": 0.001, "loss": 1.7959, "step": 445872 }, { "epoch": 38.468599033816425, "grad_norm": 0.3637180030345917, "learning_rate": 0.001, "loss": 1.7958, "step": 445928 }, { "epoch": 38.47342995169082, "grad_norm": 0.6282294392585754, "learning_rate": 0.001, "loss": 1.791, "step": 445984 }, { "epoch": 38.47826086956522, "grad_norm": 0.36203038692474365, "learning_rate": 0.001, "loss": 1.7847, "step": 446040 }, { "epoch": 38.48309178743961, "grad_norm": 0.6138783693313599, "learning_rate": 0.001, "loss": 1.7842, "step": 446096 }, { "epoch": 38.48792270531401, "grad_norm": 0.5937867760658264, "learning_rate": 0.001, "loss": 1.785, "step": 446152 }, { "epoch": 38.492753623188406, "grad_norm": 0.2579716145992279, "learning_rate": 0.001, "loss": 1.7968, "step": 446208 }, { "epoch": 38.4975845410628, "grad_norm": 0.27835163474082947, "learning_rate": 0.001, "loss": 1.7888, "step": 446264 }, { "epoch": 38.5024154589372, "grad_norm": 1.1484776735305786, "learning_rate": 0.001, "loss": 1.7935, "step": 446320 }, { "epoch": 38.507246376811594, "grad_norm": 0.6424139738082886, "learning_rate": 0.001, "loss": 1.7849, "step": 446376 }, { "epoch": 38.51207729468599, "grad_norm": 2.0825445652008057, "learning_rate": 0.001, "loss": 1.7953, "step": 446432 }, { "epoch": 38.51690821256039, "grad_norm": 1.0100948810577393, "learning_rate": 0.001, "loss": 1.782, "step": 446488 }, { "epoch": 38.52173913043478, "grad_norm": 5.140964984893799, "learning_rate": 0.001, "loss": 1.7905, "step": 446544 }, { "epoch": 38.52657004830918, "grad_norm": 0.67718505859375, "learning_rate": 0.001, "loss": 1.8007, "step": 446600 }, { "epoch": 38.531400966183575, "grad_norm": 0.3639608919620514, "learning_rate": 0.001, "loss": 1.7993, "step": 446656 }, { "epoch": 38.53623188405797, "grad_norm": 2.3734257221221924, "learning_rate": 0.001, "loss": 1.7981, "step": 446712 }, { "epoch": 38.54106280193237, "grad_norm": 0.9096109867095947, "learning_rate": 0.001, "loss": 1.8082, "step": 446768 }, { "epoch": 38.54589371980676, "grad_norm": 0.24499398469924927, "learning_rate": 0.001, "loss": 1.7974, "step": 446824 }, { "epoch": 38.55072463768116, "grad_norm": 0.6300747990608215, "learning_rate": 0.001, "loss": 1.82, "step": 446880 }, { "epoch": 38.55555555555556, "grad_norm": 1.2933855056762695, "learning_rate": 0.001, "loss": 1.8093, "step": 446936 }, { "epoch": 38.56038647342995, "grad_norm": 0.4451667070388794, "learning_rate": 0.001, "loss": 1.7919, "step": 446992 }, { "epoch": 38.56521739130435, "grad_norm": 0.8109222054481506, "learning_rate": 0.001, "loss": 1.804, "step": 447048 }, { "epoch": 38.570048309178745, "grad_norm": 2.574232339859009, "learning_rate": 0.001, "loss": 1.7835, "step": 447104 }, { "epoch": 38.57487922705314, "grad_norm": 0.6178728342056274, "learning_rate": 0.001, "loss": 1.7902, "step": 447160 }, { "epoch": 38.57971014492754, "grad_norm": 0.3826315999031067, "learning_rate": 0.001, "loss": 1.7842, "step": 447216 }, { "epoch": 38.58454106280193, "grad_norm": 0.554108738899231, "learning_rate": 0.001, "loss": 1.7956, "step": 447272 }, { "epoch": 38.589371980676326, "grad_norm": 0.6082323789596558, "learning_rate": 0.001, "loss": 1.7986, "step": 447328 }, { "epoch": 38.594202898550726, "grad_norm": 0.25101929903030396, "learning_rate": 0.001, "loss": 1.7934, "step": 447384 }, { "epoch": 38.59903381642512, "grad_norm": 0.3121839165687561, "learning_rate": 0.001, "loss": 1.7992, "step": 447440 }, { "epoch": 38.60386473429952, "grad_norm": 1.3409173488616943, "learning_rate": 0.001, "loss": 1.8043, "step": 447496 }, { "epoch": 38.608695652173914, "grad_norm": 1.3905839920043945, "learning_rate": 0.001, "loss": 1.7956, "step": 447552 }, { "epoch": 38.61352657004831, "grad_norm": 0.3227103054523468, "learning_rate": 0.001, "loss": 1.7891, "step": 447608 }, { "epoch": 38.61835748792271, "grad_norm": 3.4038619995117188, "learning_rate": 0.001, "loss": 1.7875, "step": 447664 }, { "epoch": 38.6231884057971, "grad_norm": 0.47102850675582886, "learning_rate": 0.001, "loss": 1.7845, "step": 447720 }, { "epoch": 38.628019323671495, "grad_norm": 0.42438656091690063, "learning_rate": 0.001, "loss": 1.7825, "step": 447776 }, { "epoch": 38.632850241545896, "grad_norm": 0.4077920913696289, "learning_rate": 0.001, "loss": 1.7966, "step": 447832 }, { "epoch": 38.63768115942029, "grad_norm": 0.2559696435928345, "learning_rate": 0.001, "loss": 1.7868, "step": 447888 }, { "epoch": 38.64251207729468, "grad_norm": 0.7494938969612122, "learning_rate": 0.001, "loss": 1.7937, "step": 447944 }, { "epoch": 38.64734299516908, "grad_norm": 1.1823296546936035, "learning_rate": 0.001, "loss": 1.7922, "step": 448000 }, { "epoch": 38.65217391304348, "grad_norm": 0.3893846571445465, "learning_rate": 0.001, "loss": 1.7848, "step": 448056 }, { "epoch": 38.65700483091788, "grad_norm": 0.5964910984039307, "learning_rate": 0.001, "loss": 1.7875, "step": 448112 }, { "epoch": 38.66183574879227, "grad_norm": 6.036163330078125, "learning_rate": 0.001, "loss": 1.79, "step": 448168 }, { "epoch": 38.666666666666664, "grad_norm": 0.3920939564704895, "learning_rate": 0.001, "loss": 1.7881, "step": 448224 }, { "epoch": 38.671497584541065, "grad_norm": 2.3875820636749268, "learning_rate": 0.001, "loss": 1.7931, "step": 448280 }, { "epoch": 38.67632850241546, "grad_norm": 0.54966801404953, "learning_rate": 0.001, "loss": 1.7957, "step": 448336 }, { "epoch": 38.68115942028985, "grad_norm": 0.34609076380729675, "learning_rate": 0.001, "loss": 1.7956, "step": 448392 }, { "epoch": 38.68599033816425, "grad_norm": 0.30398377776145935, "learning_rate": 0.001, "loss": 1.789, "step": 448448 }, { "epoch": 38.690821256038646, "grad_norm": 4.372628211975098, "learning_rate": 0.001, "loss": 1.7921, "step": 448504 }, { "epoch": 38.69565217391305, "grad_norm": 0.35899874567985535, "learning_rate": 0.001, "loss": 1.8016, "step": 448560 }, { "epoch": 38.70048309178744, "grad_norm": 0.6010347008705139, "learning_rate": 0.001, "loss": 1.7974, "step": 448616 }, { "epoch": 38.70531400966183, "grad_norm": 1.5906885862350464, "learning_rate": 0.001, "loss": 1.7986, "step": 448672 }, { "epoch": 38.710144927536234, "grad_norm": 0.32103240489959717, "learning_rate": 0.001, "loss": 1.7982, "step": 448728 }, { "epoch": 38.71497584541063, "grad_norm": 0.5428378582000732, "learning_rate": 0.001, "loss": 1.8044, "step": 448784 }, { "epoch": 38.71980676328502, "grad_norm": 0.8739556670188904, "learning_rate": 0.001, "loss": 1.8074, "step": 448840 }, { "epoch": 38.72463768115942, "grad_norm": 1.605176568031311, "learning_rate": 0.001, "loss": 1.8207, "step": 448896 }, { "epoch": 38.729468599033815, "grad_norm": 0.6749762296676636, "learning_rate": 0.001, "loss": 1.81, "step": 448952 }, { "epoch": 38.734299516908216, "grad_norm": 0.4253573417663574, "learning_rate": 0.001, "loss": 1.8054, "step": 449008 }, { "epoch": 38.73913043478261, "grad_norm": 0.5799744725227356, "learning_rate": 0.001, "loss": 1.8014, "step": 449064 }, { "epoch": 38.743961352657, "grad_norm": 1.4793938398361206, "learning_rate": 0.001, "loss": 1.7994, "step": 449120 }, { "epoch": 38.7487922705314, "grad_norm": 0.6273636221885681, "learning_rate": 0.001, "loss": 1.8021, "step": 449176 }, { "epoch": 38.7536231884058, "grad_norm": 1.5249005556106567, "learning_rate": 0.001, "loss": 1.9177, "step": 449232 }, { "epoch": 38.75845410628019, "grad_norm": 3.6143224239349365, "learning_rate": 0.001, "loss": 1.9515, "step": 449288 }, { "epoch": 38.76328502415459, "grad_norm": 0.4091202914714813, "learning_rate": 0.001, "loss": 1.8506, "step": 449344 }, { "epoch": 38.768115942028984, "grad_norm": 3.436753988265991, "learning_rate": 0.001, "loss": 1.8187, "step": 449400 }, { "epoch": 38.772946859903385, "grad_norm": 0.710066020488739, "learning_rate": 0.001, "loss": 1.8209, "step": 449456 }, { "epoch": 38.77777777777778, "grad_norm": 0.4235520362854004, "learning_rate": 0.001, "loss": 1.8059, "step": 449512 }, { "epoch": 38.78260869565217, "grad_norm": 0.5013096332550049, "learning_rate": 0.001, "loss": 1.8003, "step": 449568 }, { "epoch": 38.78743961352657, "grad_norm": 1.1082162857055664, "learning_rate": 0.001, "loss": 1.802, "step": 449624 }, { "epoch": 38.792270531400966, "grad_norm": 0.8056930303573608, "learning_rate": 0.001, "loss": 1.7974, "step": 449680 }, { "epoch": 38.79710144927536, "grad_norm": 0.756944477558136, "learning_rate": 0.001, "loss": 1.8018, "step": 449736 }, { "epoch": 38.80193236714976, "grad_norm": 0.9262315034866333, "learning_rate": 0.001, "loss": 1.7931, "step": 449792 }, { "epoch": 38.806763285024154, "grad_norm": 2.1059131622314453, "learning_rate": 0.001, "loss": 1.7953, "step": 449848 }, { "epoch": 38.81159420289855, "grad_norm": 0.38653042912483215, "learning_rate": 0.001, "loss": 1.8032, "step": 449904 }, { "epoch": 38.81642512077295, "grad_norm": 2.1234281063079834, "learning_rate": 0.001, "loss": 1.7962, "step": 449960 }, { "epoch": 38.82125603864734, "grad_norm": 0.9611732959747314, "learning_rate": 0.001, "loss": 1.8014, "step": 450016 }, { "epoch": 38.82608695652174, "grad_norm": 0.8607473373413086, "learning_rate": 0.001, "loss": 1.801, "step": 450072 }, { "epoch": 38.830917874396135, "grad_norm": 0.47827741503715515, "learning_rate": 0.001, "loss": 1.8044, "step": 450128 }, { "epoch": 38.83574879227053, "grad_norm": 0.3833787143230438, "learning_rate": 0.001, "loss": 1.8078, "step": 450184 }, { "epoch": 38.84057971014493, "grad_norm": 0.42741018533706665, "learning_rate": 0.001, "loss": 1.8107, "step": 450240 }, { "epoch": 38.84541062801932, "grad_norm": 0.7493306994438171, "learning_rate": 0.001, "loss": 1.8147, "step": 450296 }, { "epoch": 38.85024154589372, "grad_norm": 2.5808846950531006, "learning_rate": 0.001, "loss": 1.8343, "step": 450352 }, { "epoch": 38.85507246376812, "grad_norm": 8.199564933776855, "learning_rate": 0.001, "loss": 1.8322, "step": 450408 }, { "epoch": 38.85990338164251, "grad_norm": 0.903209388256073, "learning_rate": 0.001, "loss": 1.8278, "step": 450464 }, { "epoch": 38.86473429951691, "grad_norm": 1.4929951429367065, "learning_rate": 0.001, "loss": 1.8297, "step": 450520 }, { "epoch": 38.869565217391305, "grad_norm": 0.43569737672805786, "learning_rate": 0.001, "loss": 1.8065, "step": 450576 }, { "epoch": 38.8743961352657, "grad_norm": 5.928103446960449, "learning_rate": 0.001, "loss": 1.8004, "step": 450632 }, { "epoch": 38.8792270531401, "grad_norm": 2.4554736614227295, "learning_rate": 0.001, "loss": 1.8094, "step": 450688 }, { "epoch": 38.88405797101449, "grad_norm": 0.6193541288375854, "learning_rate": 0.001, "loss": 1.8152, "step": 450744 }, { "epoch": 38.888888888888886, "grad_norm": 0.43015071749687195, "learning_rate": 0.001, "loss": 1.8133, "step": 450800 }, { "epoch": 38.893719806763286, "grad_norm": 0.7441524863243103, "learning_rate": 0.001, "loss": 1.8072, "step": 450856 }, { "epoch": 38.89855072463768, "grad_norm": 0.844596803188324, "learning_rate": 0.001, "loss": 1.8017, "step": 450912 }, { "epoch": 38.90338164251208, "grad_norm": 2.394152879714966, "learning_rate": 0.001, "loss": 1.8003, "step": 450968 }, { "epoch": 38.908212560386474, "grad_norm": 0.4558674991130829, "learning_rate": 0.001, "loss": 1.8006, "step": 451024 }, { "epoch": 38.91304347826087, "grad_norm": 0.5908657312393188, "learning_rate": 0.001, "loss": 1.7947, "step": 451080 }, { "epoch": 38.91787439613527, "grad_norm": 0.33254092931747437, "learning_rate": 0.001, "loss": 1.8083, "step": 451136 }, { "epoch": 38.92270531400966, "grad_norm": 0.29405105113983154, "learning_rate": 0.001, "loss": 1.8203, "step": 451192 }, { "epoch": 38.927536231884055, "grad_norm": 0.26439738273620605, "learning_rate": 0.001, "loss": 1.8114, "step": 451248 }, { "epoch": 38.932367149758456, "grad_norm": 0.5527209043502808, "learning_rate": 0.001, "loss": 1.8205, "step": 451304 }, { "epoch": 38.93719806763285, "grad_norm": 7.17139196395874, "learning_rate": 0.001, "loss": 1.8339, "step": 451360 }, { "epoch": 38.94202898550725, "grad_norm": 1.848127007484436, "learning_rate": 0.001, "loss": 1.822, "step": 451416 }, { "epoch": 38.94685990338164, "grad_norm": 0.6753201484680176, "learning_rate": 0.001, "loss": 1.8375, "step": 451472 }, { "epoch": 38.95169082125604, "grad_norm": 0.3073612451553345, "learning_rate": 0.001, "loss": 1.8229, "step": 451528 }, { "epoch": 38.95652173913044, "grad_norm": 2.1036770343780518, "learning_rate": 0.001, "loss": 1.8185, "step": 451584 }, { "epoch": 38.96135265700483, "grad_norm": 0.7363268136978149, "learning_rate": 0.001, "loss": 1.821, "step": 451640 }, { "epoch": 38.966183574879224, "grad_norm": 0.38741424679756165, "learning_rate": 0.001, "loss": 1.8229, "step": 451696 }, { "epoch": 38.971014492753625, "grad_norm": 0.4927924573421478, "learning_rate": 0.001, "loss": 1.8089, "step": 451752 }, { "epoch": 38.97584541062802, "grad_norm": 7.602481365203857, "learning_rate": 0.001, "loss": 1.8157, "step": 451808 }, { "epoch": 38.98067632850242, "grad_norm": 1.3888112306594849, "learning_rate": 0.001, "loss": 1.8111, "step": 451864 }, { "epoch": 38.98550724637681, "grad_norm": 0.40749290585517883, "learning_rate": 0.001, "loss": 1.8118, "step": 451920 }, { "epoch": 38.990338164251206, "grad_norm": 0.43355992436408997, "learning_rate": 0.001, "loss": 1.8156, "step": 451976 }, { "epoch": 38.99516908212561, "grad_norm": 1.8002430200576782, "learning_rate": 0.001, "loss": 1.8295, "step": 452032 }, { "epoch": 39.0, "grad_norm": 0.8947835564613342, "learning_rate": 0.001, "loss": 1.8296, "step": 452088 }, { "epoch": 39.00483091787439, "grad_norm": 1.102235198020935, "learning_rate": 0.001, "loss": 1.8046, "step": 452144 }, { "epoch": 39.009661835748794, "grad_norm": 0.9161561727523804, "learning_rate": 0.001, "loss": 1.7958, "step": 452200 }, { "epoch": 39.01449275362319, "grad_norm": 2.39451265335083, "learning_rate": 0.001, "loss": 1.7968, "step": 452256 }, { "epoch": 39.01932367149758, "grad_norm": 0.5123676061630249, "learning_rate": 0.001, "loss": 1.7882, "step": 452312 }, { "epoch": 39.02415458937198, "grad_norm": 0.7145623564720154, "learning_rate": 0.001, "loss": 1.7977, "step": 452368 }, { "epoch": 39.028985507246375, "grad_norm": 4.258519172668457, "learning_rate": 0.001, "loss": 1.7865, "step": 452424 }, { "epoch": 39.033816425120776, "grad_norm": 1.5658979415893555, "learning_rate": 0.001, "loss": 1.7954, "step": 452480 }, { "epoch": 39.03864734299517, "grad_norm": 0.4580766260623932, "learning_rate": 0.001, "loss": 1.7927, "step": 452536 }, { "epoch": 39.04347826086956, "grad_norm": 0.417779803276062, "learning_rate": 0.001, "loss": 1.7843, "step": 452592 }, { "epoch": 39.04830917874396, "grad_norm": 1.5431866645812988, "learning_rate": 0.001, "loss": 1.7782, "step": 452648 }, { "epoch": 39.05314009661836, "grad_norm": 7.4389824867248535, "learning_rate": 0.001, "loss": 1.7849, "step": 452704 }, { "epoch": 39.05797101449275, "grad_norm": 2.431720495223999, "learning_rate": 0.001, "loss": 1.7893, "step": 452760 }, { "epoch": 39.06280193236715, "grad_norm": 1.2876051664352417, "learning_rate": 0.001, "loss": 1.7806, "step": 452816 }, { "epoch": 39.067632850241544, "grad_norm": 0.9265264272689819, "learning_rate": 0.001, "loss": 1.7944, "step": 452872 }, { "epoch": 39.072463768115945, "grad_norm": 1.4943867921829224, "learning_rate": 0.001, "loss": 1.7761, "step": 452928 }, { "epoch": 39.07729468599034, "grad_norm": 0.446413516998291, "learning_rate": 0.001, "loss": 1.7803, "step": 452984 }, { "epoch": 39.08212560386473, "grad_norm": 0.9982115626335144, "learning_rate": 0.001, "loss": 1.7971, "step": 453040 }, { "epoch": 39.08695652173913, "grad_norm": 2.5845043659210205, "learning_rate": 0.001, "loss": 1.7827, "step": 453096 }, { "epoch": 39.091787439613526, "grad_norm": 3.0803334712982178, "learning_rate": 0.001, "loss": 1.7906, "step": 453152 }, { "epoch": 39.09661835748792, "grad_norm": 0.5121610760688782, "learning_rate": 0.001, "loss": 1.7858, "step": 453208 }, { "epoch": 39.10144927536232, "grad_norm": 0.8989846110343933, "learning_rate": 0.001, "loss": 1.787, "step": 453264 }, { "epoch": 39.106280193236714, "grad_norm": 0.5431614518165588, "learning_rate": 0.001, "loss": 1.8048, "step": 453320 }, { "epoch": 39.111111111111114, "grad_norm": 2.233323097229004, "learning_rate": 0.001, "loss": 1.7948, "step": 453376 }, { "epoch": 39.11594202898551, "grad_norm": 1.3849501609802246, "learning_rate": 0.001, "loss": 1.778, "step": 453432 }, { "epoch": 39.1207729468599, "grad_norm": 1.2357500791549683, "learning_rate": 0.001, "loss": 1.7787, "step": 453488 }, { "epoch": 39.1256038647343, "grad_norm": 0.7278436422348022, "learning_rate": 0.001, "loss": 1.7842, "step": 453544 }, { "epoch": 39.130434782608695, "grad_norm": 0.4302615523338318, "learning_rate": 0.001, "loss": 1.7861, "step": 453600 }, { "epoch": 39.13526570048309, "grad_norm": 5.418882369995117, "learning_rate": 0.001, "loss": 1.7796, "step": 453656 }, { "epoch": 39.14009661835749, "grad_norm": 0.3702942430973053, "learning_rate": 0.001, "loss": 1.7869, "step": 453712 }, { "epoch": 39.14492753623188, "grad_norm": 0.675091564655304, "learning_rate": 0.001, "loss": 1.7856, "step": 453768 }, { "epoch": 39.14975845410628, "grad_norm": 0.9066087007522583, "learning_rate": 0.001, "loss": 1.7731, "step": 453824 }, { "epoch": 39.15458937198068, "grad_norm": 0.8583076596260071, "learning_rate": 0.001, "loss": 1.7719, "step": 453880 }, { "epoch": 39.15942028985507, "grad_norm": 0.532241940498352, "learning_rate": 0.001, "loss": 1.7764, "step": 453936 }, { "epoch": 39.16425120772947, "grad_norm": 0.698300302028656, "learning_rate": 0.001, "loss": 1.7805, "step": 453992 }, { "epoch": 39.169082125603865, "grad_norm": 3.055135726928711, "learning_rate": 0.001, "loss": 1.7759, "step": 454048 }, { "epoch": 39.17391304347826, "grad_norm": 0.418081670999527, "learning_rate": 0.001, "loss": 1.7826, "step": 454104 }, { "epoch": 39.17874396135266, "grad_norm": 0.5559875965118408, "learning_rate": 0.001, "loss": 1.7862, "step": 454160 }, { "epoch": 39.18357487922705, "grad_norm": 0.8645333051681519, "learning_rate": 0.001, "loss": 1.7945, "step": 454216 }, { "epoch": 39.18840579710145, "grad_norm": 0.4972476065158844, "learning_rate": 0.001, "loss": 1.7928, "step": 454272 }, { "epoch": 39.193236714975846, "grad_norm": 1.5201612710952759, "learning_rate": 0.001, "loss": 1.7739, "step": 454328 }, { "epoch": 39.19806763285024, "grad_norm": 1.2820297479629517, "learning_rate": 0.001, "loss": 1.7776, "step": 454384 }, { "epoch": 39.20289855072464, "grad_norm": 0.688642680644989, "learning_rate": 0.001, "loss": 1.7769, "step": 454440 }, { "epoch": 39.207729468599034, "grad_norm": 0.5572932958602905, "learning_rate": 0.001, "loss": 1.7661, "step": 454496 }, { "epoch": 39.21256038647343, "grad_norm": 3.2622170448303223, "learning_rate": 0.001, "loss": 1.7725, "step": 454552 }, { "epoch": 39.21739130434783, "grad_norm": 0.5168890953063965, "learning_rate": 0.001, "loss": 1.769, "step": 454608 }, { "epoch": 39.22222222222222, "grad_norm": 0.7060826420783997, "learning_rate": 0.001, "loss": 1.7653, "step": 454664 }, { "epoch": 39.227053140096615, "grad_norm": 0.5047960877418518, "learning_rate": 0.001, "loss": 1.7761, "step": 454720 }, { "epoch": 39.231884057971016, "grad_norm": 10.524388313293457, "learning_rate": 0.001, "loss": 1.7848, "step": 454776 }, { "epoch": 39.23671497584541, "grad_norm": 0.7524645924568176, "learning_rate": 0.001, "loss": 1.795, "step": 454832 }, { "epoch": 39.24154589371981, "grad_norm": 0.500701367855072, "learning_rate": 0.001, "loss": 1.7883, "step": 454888 }, { "epoch": 39.2463768115942, "grad_norm": 4.089268207550049, "learning_rate": 0.001, "loss": 1.7888, "step": 454944 }, { "epoch": 39.2512077294686, "grad_norm": 0.7452492713928223, "learning_rate": 0.001, "loss": 1.801, "step": 455000 }, { "epoch": 39.256038647343, "grad_norm": 0.44097328186035156, "learning_rate": 0.001, "loss": 1.8018, "step": 455056 }, { "epoch": 39.26086956521739, "grad_norm": 0.4278869926929474, "learning_rate": 0.001, "loss": 1.7893, "step": 455112 }, { "epoch": 39.265700483091784, "grad_norm": 0.9720302820205688, "learning_rate": 0.001, "loss": 1.7817, "step": 455168 }, { "epoch": 39.270531400966185, "grad_norm": 0.5431703925132751, "learning_rate": 0.001, "loss": 1.7834, "step": 455224 }, { "epoch": 39.27536231884058, "grad_norm": 0.44795337319374084, "learning_rate": 0.001, "loss": 1.7807, "step": 455280 }, { "epoch": 39.28019323671498, "grad_norm": 0.4862595200538635, "learning_rate": 0.001, "loss": 1.7852, "step": 455336 }, { "epoch": 39.28502415458937, "grad_norm": 0.3776932656764984, "learning_rate": 0.001, "loss": 1.7888, "step": 455392 }, { "epoch": 39.289855072463766, "grad_norm": 0.3562157154083252, "learning_rate": 0.001, "loss": 1.7749, "step": 455448 }, { "epoch": 39.29468599033817, "grad_norm": 0.6022749543190002, "learning_rate": 0.001, "loss": 1.7815, "step": 455504 }, { "epoch": 39.29951690821256, "grad_norm": 0.5138041973114014, "learning_rate": 0.001, "loss": 1.7892, "step": 455560 }, { "epoch": 39.30434782608695, "grad_norm": 0.7644832730293274, "learning_rate": 0.001, "loss": 1.7837, "step": 455616 }, { "epoch": 39.309178743961354, "grad_norm": 1.4737321138381958, "learning_rate": 0.001, "loss": 1.7907, "step": 455672 }, { "epoch": 39.31400966183575, "grad_norm": 0.7813582420349121, "learning_rate": 0.001, "loss": 1.7827, "step": 455728 }, { "epoch": 39.31884057971015, "grad_norm": 0.5834698677062988, "learning_rate": 0.001, "loss": 1.7805, "step": 455784 }, { "epoch": 39.32367149758454, "grad_norm": 0.407444030046463, "learning_rate": 0.001, "loss": 1.7774, "step": 455840 }, { "epoch": 39.328502415458935, "grad_norm": 0.5704084038734436, "learning_rate": 0.001, "loss": 1.7784, "step": 455896 }, { "epoch": 39.333333333333336, "grad_norm": 6.085973262786865, "learning_rate": 0.001, "loss": 1.7754, "step": 455952 }, { "epoch": 39.33816425120773, "grad_norm": 2.780090808868408, "learning_rate": 0.001, "loss": 1.7864, "step": 456008 }, { "epoch": 39.34299516908212, "grad_norm": 1.311748743057251, "learning_rate": 0.001, "loss": 1.793, "step": 456064 }, { "epoch": 39.34782608695652, "grad_norm": 0.4682284891605377, "learning_rate": 0.001, "loss": 1.7823, "step": 456120 }, { "epoch": 39.35265700483092, "grad_norm": 0.34444138407707214, "learning_rate": 0.001, "loss": 1.7957, "step": 456176 }, { "epoch": 39.35748792270532, "grad_norm": 0.4472052752971649, "learning_rate": 0.001, "loss": 1.7975, "step": 456232 }, { "epoch": 39.36231884057971, "grad_norm": 0.700936496257782, "learning_rate": 0.001, "loss": 1.7865, "step": 456288 }, { "epoch": 39.367149758454104, "grad_norm": 0.29438868165016174, "learning_rate": 0.001, "loss": 1.7828, "step": 456344 }, { "epoch": 39.371980676328505, "grad_norm": 0.6125770807266235, "learning_rate": 0.001, "loss": 1.7792, "step": 456400 }, { "epoch": 39.3768115942029, "grad_norm": 0.24931997060775757, "learning_rate": 0.001, "loss": 1.7783, "step": 456456 }, { "epoch": 39.38164251207729, "grad_norm": 0.2730088531970978, "learning_rate": 0.001, "loss": 1.7811, "step": 456512 }, { "epoch": 39.38647342995169, "grad_norm": 0.29912522435188293, "learning_rate": 0.001, "loss": 1.7771, "step": 456568 }, { "epoch": 39.391304347826086, "grad_norm": 0.7951462268829346, "learning_rate": 0.001, "loss": 1.7812, "step": 456624 }, { "epoch": 39.39613526570048, "grad_norm": 2.0140445232391357, "learning_rate": 0.001, "loss": 1.7793, "step": 456680 }, { "epoch": 39.40096618357488, "grad_norm": 1.847533106803894, "learning_rate": 0.001, "loss": 1.7799, "step": 456736 }, { "epoch": 39.405797101449274, "grad_norm": 0.8291811347007751, "learning_rate": 0.001, "loss": 1.7869, "step": 456792 }, { "epoch": 39.410628019323674, "grad_norm": 0.35937872529029846, "learning_rate": 0.001, "loss": 1.7822, "step": 456848 }, { "epoch": 39.41545893719807, "grad_norm": 0.4862651228904724, "learning_rate": 0.001, "loss": 1.7808, "step": 456904 }, { "epoch": 39.42028985507246, "grad_norm": 0.6624985337257385, "learning_rate": 0.001, "loss": 1.777, "step": 456960 }, { "epoch": 39.42512077294686, "grad_norm": 0.4744476079940796, "learning_rate": 0.001, "loss": 1.7846, "step": 457016 }, { "epoch": 39.429951690821255, "grad_norm": 0.6915600895881653, "learning_rate": 0.001, "loss": 1.7869, "step": 457072 }, { "epoch": 39.43478260869565, "grad_norm": 0.9727147221565247, "learning_rate": 0.001, "loss": 1.7867, "step": 457128 }, { "epoch": 39.43961352657005, "grad_norm": 0.9777208566665649, "learning_rate": 0.001, "loss": 1.7813, "step": 457184 }, { "epoch": 39.44444444444444, "grad_norm": 0.8607946038246155, "learning_rate": 0.001, "loss": 1.7859, "step": 457240 }, { "epoch": 39.44927536231884, "grad_norm": 0.3632735013961792, "learning_rate": 0.001, "loss": 1.7874, "step": 457296 }, { "epoch": 39.45410628019324, "grad_norm": 0.4249671399593353, "learning_rate": 0.001, "loss": 1.7867, "step": 457352 }, { "epoch": 39.45893719806763, "grad_norm": 0.4214588701725006, "learning_rate": 0.001, "loss": 1.7755, "step": 457408 }, { "epoch": 39.46376811594203, "grad_norm": 0.26164811849594116, "learning_rate": 0.001, "loss": 1.7777, "step": 457464 }, { "epoch": 39.468599033816425, "grad_norm": 0.29496949911117554, "learning_rate": 0.001, "loss": 1.782, "step": 457520 }, { "epoch": 39.47342995169082, "grad_norm": 0.5179944634437561, "learning_rate": 0.001, "loss": 1.778, "step": 457576 }, { "epoch": 39.47826086956522, "grad_norm": 0.40170150995254517, "learning_rate": 0.001, "loss": 1.7899, "step": 457632 }, { "epoch": 39.48309178743961, "grad_norm": 2.452028274536133, "learning_rate": 0.001, "loss": 1.7875, "step": 457688 }, { "epoch": 39.48792270531401, "grad_norm": 0.6511350274085999, "learning_rate": 0.001, "loss": 1.7934, "step": 457744 }, { "epoch": 39.492753623188406, "grad_norm": 0.6816292405128479, "learning_rate": 0.001, "loss": 1.769, "step": 457800 }, { "epoch": 39.4975845410628, "grad_norm": 0.8402189612388611, "learning_rate": 0.001, "loss": 1.7751, "step": 457856 }, { "epoch": 39.5024154589372, "grad_norm": 0.6438387036323547, "learning_rate": 0.001, "loss": 1.7815, "step": 457912 }, { "epoch": 39.507246376811594, "grad_norm": 0.5256146788597107, "learning_rate": 0.001, "loss": 1.7923, "step": 457968 }, { "epoch": 39.51207729468599, "grad_norm": 0.823054313659668, "learning_rate": 0.001, "loss": 1.7911, "step": 458024 }, { "epoch": 39.51690821256039, "grad_norm": 2.2309350967407227, "learning_rate": 0.001, "loss": 1.8001, "step": 458080 }, { "epoch": 39.52173913043478, "grad_norm": 0.6493335962295532, "learning_rate": 0.001, "loss": 1.8206, "step": 458136 }, { "epoch": 39.52657004830918, "grad_norm": 0.6404735445976257, "learning_rate": 0.001, "loss": 1.8164, "step": 458192 }, { "epoch": 39.531400966183575, "grad_norm": 1.2641183137893677, "learning_rate": 0.001, "loss": 1.8087, "step": 458248 }, { "epoch": 39.53623188405797, "grad_norm": 2.2141754627227783, "learning_rate": 0.001, "loss": 1.8046, "step": 458304 }, { "epoch": 39.54106280193237, "grad_norm": 0.6644294857978821, "learning_rate": 0.001, "loss": 1.7954, "step": 458360 }, { "epoch": 39.54589371980676, "grad_norm": 0.8572977781295776, "learning_rate": 0.001, "loss": 1.7893, "step": 458416 }, { "epoch": 39.55072463768116, "grad_norm": 0.6831353306770325, "learning_rate": 0.001, "loss": 1.7894, "step": 458472 }, { "epoch": 39.55555555555556, "grad_norm": 0.9897552132606506, "learning_rate": 0.001, "loss": 1.8106, "step": 458528 }, { "epoch": 39.56038647342995, "grad_norm": 1.106087327003479, "learning_rate": 0.001, "loss": 1.8136, "step": 458584 }, { "epoch": 39.56521739130435, "grad_norm": 2.1643118858337402, "learning_rate": 0.001, "loss": 1.8137, "step": 458640 }, { "epoch": 39.570048309178745, "grad_norm": 1.3748916387557983, "learning_rate": 0.001, "loss": 1.8167, "step": 458696 }, { "epoch": 39.57487922705314, "grad_norm": 0.73423832654953, "learning_rate": 0.001, "loss": 1.8181, "step": 458752 }, { "epoch": 39.57971014492754, "grad_norm": 0.4277721643447876, "learning_rate": 0.001, "loss": 1.8303, "step": 458808 }, { "epoch": 39.58454106280193, "grad_norm": 1.349006175994873, "learning_rate": 0.001, "loss": 1.8311, "step": 458864 }, { "epoch": 39.589371980676326, "grad_norm": 0.4151976406574249, "learning_rate": 0.001, "loss": 1.8233, "step": 458920 }, { "epoch": 39.594202898550726, "grad_norm": 1.5622711181640625, "learning_rate": 0.001, "loss": 1.8172, "step": 458976 }, { "epoch": 39.59903381642512, "grad_norm": 0.6670666337013245, "learning_rate": 0.001, "loss": 1.8163, "step": 459032 }, { "epoch": 39.60386473429952, "grad_norm": 0.34776684641838074, "learning_rate": 0.001, "loss": 1.8317, "step": 459088 }, { "epoch": 39.608695652173914, "grad_norm": 0.40206941962242126, "learning_rate": 0.001, "loss": 1.822, "step": 459144 }, { "epoch": 39.61352657004831, "grad_norm": 0.3365606963634491, "learning_rate": 0.001, "loss": 1.8275, "step": 459200 }, { "epoch": 39.61835748792271, "grad_norm": 0.35529863834381104, "learning_rate": 0.001, "loss": 1.8277, "step": 459256 }, { "epoch": 39.6231884057971, "grad_norm": 0.3912605941295624, "learning_rate": 0.001, "loss": 1.823, "step": 459312 }, { "epoch": 39.628019323671495, "grad_norm": 0.3111409544944763, "learning_rate": 0.001, "loss": 1.8197, "step": 459368 }, { "epoch": 39.632850241545896, "grad_norm": 0.3650505542755127, "learning_rate": 0.001, "loss": 1.8064, "step": 459424 }, { "epoch": 39.63768115942029, "grad_norm": 2.255704641342163, "learning_rate": 0.001, "loss": 1.8109, "step": 459480 }, { "epoch": 39.64251207729468, "grad_norm": 0.4701070189476013, "learning_rate": 0.001, "loss": 1.7979, "step": 459536 }, { "epoch": 39.64734299516908, "grad_norm": 0.44536930322647095, "learning_rate": 0.001, "loss": 1.7985, "step": 459592 }, { "epoch": 39.65217391304348, "grad_norm": 0.3291632831096649, "learning_rate": 0.001, "loss": 1.8007, "step": 459648 }, { "epoch": 39.65700483091788, "grad_norm": 6.458952903747559, "learning_rate": 0.001, "loss": 1.7984, "step": 459704 }, { "epoch": 39.66183574879227, "grad_norm": 0.22720588743686676, "learning_rate": 0.001, "loss": 1.7937, "step": 459760 }, { "epoch": 39.666666666666664, "grad_norm": 1.5068660974502563, "learning_rate": 0.001, "loss": 1.7856, "step": 459816 }, { "epoch": 39.671497584541065, "grad_norm": 0.455994576215744, "learning_rate": 0.001, "loss": 1.7899, "step": 459872 }, { "epoch": 39.67632850241546, "grad_norm": 0.902661919593811, "learning_rate": 0.001, "loss": 1.8073, "step": 459928 }, { "epoch": 39.68115942028985, "grad_norm": 1.2910200357437134, "learning_rate": 0.001, "loss": 1.797, "step": 459984 }, { "epoch": 39.68599033816425, "grad_norm": 0.8921377062797546, "learning_rate": 0.001, "loss": 1.8023, "step": 460040 }, { "epoch": 39.690821256038646, "grad_norm": 0.42187631130218506, "learning_rate": 0.001, "loss": 1.7963, "step": 460096 }, { "epoch": 39.69565217391305, "grad_norm": 0.6834074854850769, "learning_rate": 0.001, "loss": 1.796, "step": 460152 }, { "epoch": 39.70048309178744, "grad_norm": 1.0055701732635498, "learning_rate": 0.001, "loss": 1.7999, "step": 460208 }, { "epoch": 39.70531400966183, "grad_norm": 0.44658946990966797, "learning_rate": 0.001, "loss": 1.7977, "step": 460264 }, { "epoch": 39.710144927536234, "grad_norm": 1.421464443206787, "learning_rate": 0.001, "loss": 1.8068, "step": 460320 }, { "epoch": 39.71497584541063, "grad_norm": 0.26028600335121155, "learning_rate": 0.001, "loss": 1.8006, "step": 460376 }, { "epoch": 39.71980676328502, "grad_norm": 0.4798416793346405, "learning_rate": 0.001, "loss": 1.7922, "step": 460432 }, { "epoch": 39.72463768115942, "grad_norm": 11.575304985046387, "learning_rate": 0.001, "loss": 1.7938, "step": 460488 }, { "epoch": 39.729468599033815, "grad_norm": 0.4292560815811157, "learning_rate": 0.001, "loss": 1.7969, "step": 460544 }, { "epoch": 39.734299516908216, "grad_norm": 0.5057810544967651, "learning_rate": 0.001, "loss": 1.7958, "step": 460600 }, { "epoch": 39.73913043478261, "grad_norm": 1.2123405933380127, "learning_rate": 0.001, "loss": 1.7925, "step": 460656 }, { "epoch": 39.743961352657, "grad_norm": 0.3548462986946106, "learning_rate": 0.001, "loss": 1.7876, "step": 460712 }, { "epoch": 39.7487922705314, "grad_norm": 0.2897131145000458, "learning_rate": 0.001, "loss": 1.7923, "step": 460768 }, { "epoch": 39.7536231884058, "grad_norm": 0.459045946598053, "learning_rate": 0.001, "loss": 1.7952, "step": 460824 }, { "epoch": 39.75845410628019, "grad_norm": 0.3890233635902405, "learning_rate": 0.001, "loss": 1.79, "step": 460880 }, { "epoch": 39.76328502415459, "grad_norm": 0.4458432197570801, "learning_rate": 0.001, "loss": 1.7921, "step": 460936 }, { "epoch": 39.768115942028984, "grad_norm": 1.3525525331497192, "learning_rate": 0.001, "loss": 1.7921, "step": 460992 }, { "epoch": 39.772946859903385, "grad_norm": 0.3510584533214569, "learning_rate": 0.001, "loss": 1.7936, "step": 461048 }, { "epoch": 39.77777777777778, "grad_norm": 0.3776356279850006, "learning_rate": 0.001, "loss": 1.7868, "step": 461104 }, { "epoch": 39.78260869565217, "grad_norm": 0.34673061966896057, "learning_rate": 0.001, "loss": 1.786, "step": 461160 }, { "epoch": 39.78743961352657, "grad_norm": 0.26899057626724243, "learning_rate": 0.001, "loss": 1.7861, "step": 461216 }, { "epoch": 39.792270531400966, "grad_norm": 0.3279809057712555, "learning_rate": 0.001, "loss": 1.7921, "step": 461272 }, { "epoch": 39.79710144927536, "grad_norm": 0.40119341015815735, "learning_rate": 0.001, "loss": 1.7894, "step": 461328 }, { "epoch": 39.80193236714976, "grad_norm": 0.35732772946357727, "learning_rate": 0.001, "loss": 1.7824, "step": 461384 }, { "epoch": 39.806763285024154, "grad_norm": 0.3697347342967987, "learning_rate": 0.001, "loss": 1.7818, "step": 461440 }, { "epoch": 39.81159420289855, "grad_norm": 0.3744228184223175, "learning_rate": 0.001, "loss": 1.7832, "step": 461496 }, { "epoch": 39.81642512077295, "grad_norm": 0.331001877784729, "learning_rate": 0.001, "loss": 1.7913, "step": 461552 }, { "epoch": 39.82125603864734, "grad_norm": 0.44698405265808105, "learning_rate": 0.001, "loss": 1.7904, "step": 461608 }, { "epoch": 39.82608695652174, "grad_norm": 0.48755109310150146, "learning_rate": 0.001, "loss": 1.7897, "step": 461664 }, { "epoch": 39.830917874396135, "grad_norm": 0.35192379355430603, "learning_rate": 0.001, "loss": 1.7816, "step": 461720 }, { "epoch": 39.83574879227053, "grad_norm": 3.45180082321167, "learning_rate": 0.001, "loss": 1.793, "step": 461776 }, { "epoch": 39.84057971014493, "grad_norm": 3.187631607055664, "learning_rate": 0.001, "loss": 1.8097, "step": 461832 }, { "epoch": 39.84541062801932, "grad_norm": 0.43015211820602417, "learning_rate": 0.001, "loss": 1.8097, "step": 461888 }, { "epoch": 39.85024154589372, "grad_norm": 1.5903823375701904, "learning_rate": 0.001, "loss": 1.8046, "step": 461944 }, { "epoch": 39.85507246376812, "grad_norm": 0.40889662504196167, "learning_rate": 0.001, "loss": 1.799, "step": 462000 }, { "epoch": 39.85990338164251, "grad_norm": 0.60368412733078, "learning_rate": 0.001, "loss": 1.7985, "step": 462056 }, { "epoch": 39.86473429951691, "grad_norm": 1.835952877998352, "learning_rate": 0.001, "loss": 1.8048, "step": 462112 }, { "epoch": 39.869565217391305, "grad_norm": 0.8176502585411072, "learning_rate": 0.001, "loss": 1.8114, "step": 462168 }, { "epoch": 39.8743961352657, "grad_norm": 1.0162155628204346, "learning_rate": 0.001, "loss": 1.8158, "step": 462224 }, { "epoch": 39.8792270531401, "grad_norm": 0.3615024983882904, "learning_rate": 0.001, "loss": 1.8043, "step": 462280 }, { "epoch": 39.88405797101449, "grad_norm": 0.9517819881439209, "learning_rate": 0.001, "loss": 1.8246, "step": 462336 }, { "epoch": 39.888888888888886, "grad_norm": 0.7721591591835022, "learning_rate": 0.001, "loss": 1.8096, "step": 462392 }, { "epoch": 39.893719806763286, "grad_norm": 0.3732524514198303, "learning_rate": 0.001, "loss": 1.8057, "step": 462448 }, { "epoch": 39.89855072463768, "grad_norm": 0.34945225715637207, "learning_rate": 0.001, "loss": 1.809, "step": 462504 }, { "epoch": 39.90338164251208, "grad_norm": 0.3488682508468628, "learning_rate": 0.001, "loss": 1.8053, "step": 462560 }, { "epoch": 39.908212560386474, "grad_norm": 0.501899778842926, "learning_rate": 0.001, "loss": 1.7992, "step": 462616 }, { "epoch": 39.91304347826087, "grad_norm": 0.34453535079956055, "learning_rate": 0.001, "loss": 1.7933, "step": 462672 }, { "epoch": 39.91787439613527, "grad_norm": 0.5333484411239624, "learning_rate": 0.001, "loss": 1.7998, "step": 462728 }, { "epoch": 39.92270531400966, "grad_norm": 1.104191780090332, "learning_rate": 0.001, "loss": 1.8008, "step": 462784 }, { "epoch": 39.927536231884055, "grad_norm": 0.4531182646751404, "learning_rate": 0.001, "loss": 1.7959, "step": 462840 }, { "epoch": 39.932367149758456, "grad_norm": 0.481452614068985, "learning_rate": 0.001, "loss": 1.787, "step": 462896 }, { "epoch": 39.93719806763285, "grad_norm": 4.912936687469482, "learning_rate": 0.001, "loss": 1.7818, "step": 462952 }, { "epoch": 39.94202898550725, "grad_norm": 1.993960976600647, "learning_rate": 0.001, "loss": 1.7933, "step": 463008 }, { "epoch": 39.94685990338164, "grad_norm": 0.8945672512054443, "learning_rate": 0.001, "loss": 1.7847, "step": 463064 }, { "epoch": 39.95169082125604, "grad_norm": 0.610833466053009, "learning_rate": 0.001, "loss": 1.7887, "step": 463120 }, { "epoch": 39.95652173913044, "grad_norm": 0.3845690190792084, "learning_rate": 0.001, "loss": 1.7837, "step": 463176 }, { "epoch": 39.96135265700483, "grad_norm": 0.3148888051509857, "learning_rate": 0.001, "loss": 1.7848, "step": 463232 }, { "epoch": 39.966183574879224, "grad_norm": 0.5330688953399658, "learning_rate": 0.001, "loss": 1.7895, "step": 463288 }, { "epoch": 39.971014492753625, "grad_norm": 0.5150025486946106, "learning_rate": 0.001, "loss": 1.7939, "step": 463344 }, { "epoch": 39.97584541062802, "grad_norm": 0.2669714391231537, "learning_rate": 0.001, "loss": 1.7924, "step": 463400 }, { "epoch": 39.98067632850242, "grad_norm": 0.5796805024147034, "learning_rate": 0.001, "loss": 1.7975, "step": 463456 }, { "epoch": 39.98550724637681, "grad_norm": 0.29214203357696533, "learning_rate": 0.001, "loss": 1.8189, "step": 463512 }, { "epoch": 39.990338164251206, "grad_norm": 0.6958968639373779, "learning_rate": 0.001, "loss": 1.8422, "step": 463568 }, { "epoch": 39.99516908212561, "grad_norm": 0.3184243440628052, "learning_rate": 0.001, "loss": 1.8248, "step": 463624 }, { "epoch": 40.0, "grad_norm": 1.4207526445388794, "learning_rate": 0.001, "loss": 1.812, "step": 463680 }, { "epoch": 40.00483091787439, "grad_norm": 0.6524496674537659, "learning_rate": 0.001, "loss": 1.7646, "step": 463736 }, { "epoch": 40.009661835748794, "grad_norm": 1.4772812128067017, "learning_rate": 0.001, "loss": 1.7513, "step": 463792 }, { "epoch": 40.01449275362319, "grad_norm": 0.7522541880607605, "learning_rate": 0.001, "loss": 1.7561, "step": 463848 }, { "epoch": 40.01932367149758, "grad_norm": 0.5458579659461975, "learning_rate": 0.001, "loss": 1.7706, "step": 463904 }, { "epoch": 40.02415458937198, "grad_norm": 0.2897782325744629, "learning_rate": 0.001, "loss": 1.7693, "step": 463960 }, { "epoch": 40.028985507246375, "grad_norm": 0.28634926676750183, "learning_rate": 0.001, "loss": 1.762, "step": 464016 }, { "epoch": 40.033816425120776, "grad_norm": 0.3169400095939636, "learning_rate": 0.001, "loss": 1.7561, "step": 464072 }, { "epoch": 40.03864734299517, "grad_norm": 0.3103554844856262, "learning_rate": 0.001, "loss": 1.759, "step": 464128 }, { "epoch": 40.04347826086956, "grad_norm": 2.0770516395568848, "learning_rate": 0.001, "loss": 1.7607, "step": 464184 }, { "epoch": 40.04830917874396, "grad_norm": 0.2822380065917969, "learning_rate": 0.001, "loss": 1.7501, "step": 464240 }, { "epoch": 40.05314009661836, "grad_norm": 0.44424065947532654, "learning_rate": 0.001, "loss": 1.7483, "step": 464296 }, { "epoch": 40.05797101449275, "grad_norm": 0.8618985414505005, "learning_rate": 0.001, "loss": 1.7461, "step": 464352 }, { "epoch": 40.06280193236715, "grad_norm": 0.24674537777900696, "learning_rate": 0.001, "loss": 1.7593, "step": 464408 }, { "epoch": 40.067632850241544, "grad_norm": 2.078155040740967, "learning_rate": 0.001, "loss": 1.7589, "step": 464464 }, { "epoch": 40.072463768115945, "grad_norm": 1.1528406143188477, "learning_rate": 0.001, "loss": 1.7637, "step": 464520 }, { "epoch": 40.07729468599034, "grad_norm": 0.2665976881980896, "learning_rate": 0.001, "loss": 1.7581, "step": 464576 }, { "epoch": 40.08212560386473, "grad_norm": 0.31531381607055664, "learning_rate": 0.001, "loss": 1.7512, "step": 464632 }, { "epoch": 40.08695652173913, "grad_norm": 0.5856656432151794, "learning_rate": 0.001, "loss": 1.7607, "step": 464688 }, { "epoch": 40.091787439613526, "grad_norm": 0.6434282660484314, "learning_rate": 0.001, "loss": 1.7638, "step": 464744 }, { "epoch": 40.09661835748792, "grad_norm": 0.2845962643623352, "learning_rate": 0.001, "loss": 1.7502, "step": 464800 }, { "epoch": 40.10144927536232, "grad_norm": 1.4034231901168823, "learning_rate": 0.001, "loss": 1.7581, "step": 464856 }, { "epoch": 40.106280193236714, "grad_norm": 0.9407950639724731, "learning_rate": 0.001, "loss": 1.7573, "step": 464912 }, { "epoch": 40.111111111111114, "grad_norm": 0.3429856598377228, "learning_rate": 0.001, "loss": 1.7568, "step": 464968 }, { "epoch": 40.11594202898551, "grad_norm": 1.6717718839645386, "learning_rate": 0.001, "loss": 1.7726, "step": 465024 }, { "epoch": 40.1207729468599, "grad_norm": 0.45053860545158386, "learning_rate": 0.001, "loss": 1.7653, "step": 465080 }, { "epoch": 40.1256038647343, "grad_norm": 0.4006911516189575, "learning_rate": 0.001, "loss": 1.769, "step": 465136 }, { "epoch": 40.130434782608695, "grad_norm": 2.572669744491577, "learning_rate": 0.001, "loss": 1.7688, "step": 465192 }, { "epoch": 40.13526570048309, "grad_norm": 0.3920019268989563, "learning_rate": 0.001, "loss": 1.7681, "step": 465248 }, { "epoch": 40.14009661835749, "grad_norm": 3.3301730155944824, "learning_rate": 0.001, "loss": 1.7549, "step": 465304 }, { "epoch": 40.14492753623188, "grad_norm": 9.085259437561035, "learning_rate": 0.001, "loss": 1.7686, "step": 465360 }, { "epoch": 40.14975845410628, "grad_norm": 0.5766760110855103, "learning_rate": 0.001, "loss": 1.8207, "step": 465416 }, { "epoch": 40.15458937198068, "grad_norm": 0.7689758539199829, "learning_rate": 0.001, "loss": 1.8316, "step": 465472 }, { "epoch": 40.15942028985507, "grad_norm": 0.3896240293979645, "learning_rate": 0.001, "loss": 1.8178, "step": 465528 }, { "epoch": 40.16425120772947, "grad_norm": 0.6707692742347717, "learning_rate": 0.001, "loss": 1.7952, "step": 465584 }, { "epoch": 40.169082125603865, "grad_norm": 4.146088600158691, "learning_rate": 0.001, "loss": 1.7849, "step": 465640 }, { "epoch": 40.17391304347826, "grad_norm": 1.4844402074813843, "learning_rate": 0.001, "loss": 1.7821, "step": 465696 }, { "epoch": 40.17874396135266, "grad_norm": 0.82992023229599, "learning_rate": 0.001, "loss": 1.7904, "step": 465752 }, { "epoch": 40.18357487922705, "grad_norm": 0.7022607922554016, "learning_rate": 0.001, "loss": 1.7903, "step": 465808 }, { "epoch": 40.18840579710145, "grad_norm": 0.40158897638320923, "learning_rate": 0.001, "loss": 1.7918, "step": 465864 }, { "epoch": 40.193236714975846, "grad_norm": 2.5403244495391846, "learning_rate": 0.001, "loss": 1.7901, "step": 465920 }, { "epoch": 40.19806763285024, "grad_norm": 0.3751315176486969, "learning_rate": 0.001, "loss": 1.7909, "step": 465976 }, { "epoch": 40.20289855072464, "grad_norm": 0.5983942747116089, "learning_rate": 0.001, "loss": 1.7861, "step": 466032 }, { "epoch": 40.207729468599034, "grad_norm": 2.069592237472534, "learning_rate": 0.001, "loss": 1.783, "step": 466088 }, { "epoch": 40.21256038647343, "grad_norm": 2.0024001598358154, "learning_rate": 0.001, "loss": 1.7809, "step": 466144 }, { "epoch": 40.21739130434783, "grad_norm": 0.6388764381408691, "learning_rate": 0.001, "loss": 1.7822, "step": 466200 }, { "epoch": 40.22222222222222, "grad_norm": 1.505611538887024, "learning_rate": 0.001, "loss": 1.7861, "step": 466256 }, { "epoch": 40.227053140096615, "grad_norm": 0.5152513384819031, "learning_rate": 0.001, "loss": 1.7867, "step": 466312 }, { "epoch": 40.231884057971016, "grad_norm": 1.0379621982574463, "learning_rate": 0.001, "loss": 1.7821, "step": 466368 }, { "epoch": 40.23671497584541, "grad_norm": 1.443725824356079, "learning_rate": 0.001, "loss": 1.7804, "step": 466424 }, { "epoch": 40.24154589371981, "grad_norm": 2.7233238220214844, "learning_rate": 0.001, "loss": 1.7757, "step": 466480 }, { "epoch": 40.2463768115942, "grad_norm": 0.9201911091804504, "learning_rate": 0.001, "loss": 1.7792, "step": 466536 }, { "epoch": 40.2512077294686, "grad_norm": 4.018593788146973, "learning_rate": 0.001, "loss": 1.7679, "step": 466592 }, { "epoch": 40.256038647343, "grad_norm": 0.6747997999191284, "learning_rate": 0.001, "loss": 1.7832, "step": 466648 }, { "epoch": 40.26086956521739, "grad_norm": 3.794398546218872, "learning_rate": 0.001, "loss": 1.7907, "step": 466704 }, { "epoch": 40.265700483091784, "grad_norm": 0.8058980107307434, "learning_rate": 0.001, "loss": 1.7781, "step": 466760 }, { "epoch": 40.270531400966185, "grad_norm": 6.252180576324463, "learning_rate": 0.001, "loss": 1.7838, "step": 466816 }, { "epoch": 40.27536231884058, "grad_norm": 0.6024144291877747, "learning_rate": 0.001, "loss": 1.771, "step": 466872 }, { "epoch": 40.28019323671498, "grad_norm": 0.868556559085846, "learning_rate": 0.001, "loss": 1.7791, "step": 466928 }, { "epoch": 40.28502415458937, "grad_norm": 3.6329457759857178, "learning_rate": 0.001, "loss": 1.7833, "step": 466984 }, { "epoch": 40.289855072463766, "grad_norm": 1.9187835454940796, "learning_rate": 0.001, "loss": 1.7735, "step": 467040 }, { "epoch": 40.29468599033817, "grad_norm": 0.840522050857544, "learning_rate": 0.001, "loss": 1.7868, "step": 467096 }, { "epoch": 40.29951690821256, "grad_norm": 9.573920249938965, "learning_rate": 0.001, "loss": 1.7884, "step": 467152 }, { "epoch": 40.30434782608695, "grad_norm": 4.821118354797363, "learning_rate": 0.001, "loss": 1.7894, "step": 467208 }, { "epoch": 40.309178743961354, "grad_norm": 0.6048091650009155, "learning_rate": 0.001, "loss": 1.788, "step": 467264 }, { "epoch": 40.31400966183575, "grad_norm": 3.0706441402435303, "learning_rate": 0.001, "loss": 1.7888, "step": 467320 }, { "epoch": 40.31884057971015, "grad_norm": 0.3010439872741699, "learning_rate": 0.001, "loss": 1.7863, "step": 467376 }, { "epoch": 40.32367149758454, "grad_norm": 0.5936932563781738, "learning_rate": 0.001, "loss": 1.7896, "step": 467432 }, { "epoch": 40.328502415458935, "grad_norm": 0.531542956829071, "learning_rate": 0.001, "loss": 1.7931, "step": 467488 }, { "epoch": 40.333333333333336, "grad_norm": 1.5305919647216797, "learning_rate": 0.001, "loss": 1.7919, "step": 467544 }, { "epoch": 40.33816425120773, "grad_norm": 3.220773220062256, "learning_rate": 0.001, "loss": 1.8072, "step": 467600 }, { "epoch": 40.34299516908212, "grad_norm": 0.48922473192214966, "learning_rate": 0.001, "loss": 1.8089, "step": 467656 }, { "epoch": 40.34782608695652, "grad_norm": 0.4027334749698639, "learning_rate": 0.001, "loss": 1.8008, "step": 467712 }, { "epoch": 40.35265700483092, "grad_norm": 0.6657637357711792, "learning_rate": 0.001, "loss": 1.8009, "step": 467768 }, { "epoch": 40.35748792270532, "grad_norm": 0.4558785557746887, "learning_rate": 0.001, "loss": 1.7945, "step": 467824 }, { "epoch": 40.36231884057971, "grad_norm": 0.42154136300086975, "learning_rate": 0.001, "loss": 1.8001, "step": 467880 }, { "epoch": 40.367149758454104, "grad_norm": 9.063130378723145, "learning_rate": 0.001, "loss": 1.7923, "step": 467936 }, { "epoch": 40.371980676328505, "grad_norm": 0.9229263067245483, "learning_rate": 0.001, "loss": 1.7841, "step": 467992 }, { "epoch": 40.3768115942029, "grad_norm": 2.44097900390625, "learning_rate": 0.001, "loss": 1.7879, "step": 468048 }, { "epoch": 40.38164251207729, "grad_norm": 3.8910253047943115, "learning_rate": 0.001, "loss": 1.789, "step": 468104 }, { "epoch": 40.38647342995169, "grad_norm": 0.7099465727806091, "learning_rate": 0.001, "loss": 1.7959, "step": 468160 }, { "epoch": 40.391304347826086, "grad_norm": 0.48814600706100464, "learning_rate": 0.001, "loss": 1.7959, "step": 468216 }, { "epoch": 40.39613526570048, "grad_norm": 3.358279228210449, "learning_rate": 0.001, "loss": 1.8128, "step": 468272 }, { "epoch": 40.40096618357488, "grad_norm": 0.5397480726242065, "learning_rate": 0.001, "loss": 1.8036, "step": 468328 }, { "epoch": 40.405797101449274, "grad_norm": 1.929792881011963, "learning_rate": 0.001, "loss": 1.8015, "step": 468384 }, { "epoch": 40.410628019323674, "grad_norm": 0.45460107922554016, "learning_rate": 0.001, "loss": 1.8084, "step": 468440 }, { "epoch": 40.41545893719807, "grad_norm": 0.34679287672042847, "learning_rate": 0.001, "loss": 1.7966, "step": 468496 }, { "epoch": 40.42028985507246, "grad_norm": 1.0440071821212769, "learning_rate": 0.001, "loss": 1.7897, "step": 468552 }, { "epoch": 40.42512077294686, "grad_norm": 0.316698282957077, "learning_rate": 0.001, "loss": 1.7985, "step": 468608 }, { "epoch": 40.429951690821255, "grad_norm": 4.706199645996094, "learning_rate": 0.001, "loss": 1.7846, "step": 468664 }, { "epoch": 40.43478260869565, "grad_norm": 0.5142912864685059, "learning_rate": 0.001, "loss": 1.7943, "step": 468720 }, { "epoch": 40.43961352657005, "grad_norm": 0.34487712383270264, "learning_rate": 0.001, "loss": 1.7988, "step": 468776 }, { "epoch": 40.44444444444444, "grad_norm": 3.8424384593963623, "learning_rate": 0.001, "loss": 1.7842, "step": 468832 }, { "epoch": 40.44927536231884, "grad_norm": 0.6596300601959229, "learning_rate": 0.001, "loss": 1.7865, "step": 468888 }, { "epoch": 40.45410628019324, "grad_norm": 0.3248670697212219, "learning_rate": 0.001, "loss": 1.7873, "step": 468944 }, { "epoch": 40.45893719806763, "grad_norm": 2.6557395458221436, "learning_rate": 0.001, "loss": 1.7878, "step": 469000 }, { "epoch": 40.46376811594203, "grad_norm": 0.3085186779499054, "learning_rate": 0.001, "loss": 1.7935, "step": 469056 }, { "epoch": 40.468599033816425, "grad_norm": 0.3560912609100342, "learning_rate": 0.001, "loss": 1.7973, "step": 469112 }, { "epoch": 40.47342995169082, "grad_norm": 5.407197952270508, "learning_rate": 0.001, "loss": 1.797, "step": 469168 }, { "epoch": 40.47826086956522, "grad_norm": 0.768102765083313, "learning_rate": 0.001, "loss": 1.7848, "step": 469224 }, { "epoch": 40.48309178743961, "grad_norm": 0.41351887583732605, "learning_rate": 0.001, "loss": 1.7851, "step": 469280 }, { "epoch": 40.48792270531401, "grad_norm": 1.4266083240509033, "learning_rate": 0.001, "loss": 1.7875, "step": 469336 }, { "epoch": 40.492753623188406, "grad_norm": 0.7370843291282654, "learning_rate": 0.001, "loss": 1.7774, "step": 469392 }, { "epoch": 40.4975845410628, "grad_norm": 1.0718393325805664, "learning_rate": 0.001, "loss": 1.7945, "step": 469448 }, { "epoch": 40.5024154589372, "grad_norm": 0.7308846712112427, "learning_rate": 0.001, "loss": 1.7811, "step": 469504 }, { "epoch": 40.507246376811594, "grad_norm": 0.43245819211006165, "learning_rate": 0.001, "loss": 1.7922, "step": 469560 }, { "epoch": 40.51207729468599, "grad_norm": 0.6871855854988098, "learning_rate": 0.001, "loss": 1.7985, "step": 469616 }, { "epoch": 40.51690821256039, "grad_norm": 3.2350080013275146, "learning_rate": 0.001, "loss": 1.7913, "step": 469672 }, { "epoch": 40.52173913043478, "grad_norm": 0.330098420381546, "learning_rate": 0.001, "loss": 1.7987, "step": 469728 }, { "epoch": 40.52657004830918, "grad_norm": 1.250045657157898, "learning_rate": 0.001, "loss": 1.7906, "step": 469784 }, { "epoch": 40.531400966183575, "grad_norm": 1.5717312097549438, "learning_rate": 0.001, "loss": 1.8026, "step": 469840 }, { "epoch": 40.53623188405797, "grad_norm": 1.2172527313232422, "learning_rate": 0.001, "loss": 1.7787, "step": 469896 }, { "epoch": 40.54106280193237, "grad_norm": 1.6189872026443481, "learning_rate": 0.001, "loss": 1.7867, "step": 469952 }, { "epoch": 40.54589371980676, "grad_norm": 0.734259843826294, "learning_rate": 0.001, "loss": 1.7887, "step": 470008 }, { "epoch": 40.55072463768116, "grad_norm": 0.9472125172615051, "learning_rate": 0.001, "loss": 1.7851, "step": 470064 }, { "epoch": 40.55555555555556, "grad_norm": 1.1033351421356201, "learning_rate": 0.001, "loss": 1.7915, "step": 470120 }, { "epoch": 40.56038647342995, "grad_norm": 0.8355786800384521, "learning_rate": 0.001, "loss": 1.7946, "step": 470176 }, { "epoch": 40.56521739130435, "grad_norm": 12.913107872009277, "learning_rate": 0.001, "loss": 1.7957, "step": 470232 }, { "epoch": 40.570048309178745, "grad_norm": 2.3686106204986572, "learning_rate": 0.001, "loss": 1.7918, "step": 470288 }, { "epoch": 40.57487922705314, "grad_norm": 0.5766381621360779, "learning_rate": 0.001, "loss": 1.8085, "step": 470344 }, { "epoch": 40.57971014492754, "grad_norm": 1.085120439529419, "learning_rate": 0.001, "loss": 1.8036, "step": 470400 }, { "epoch": 40.58454106280193, "grad_norm": 2.063915252685547, "learning_rate": 0.001, "loss": 1.8052, "step": 470456 }, { "epoch": 40.589371980676326, "grad_norm": 0.5404335260391235, "learning_rate": 0.001, "loss": 1.7923, "step": 470512 }, { "epoch": 40.594202898550726, "grad_norm": 3.4098215103149414, "learning_rate": 0.001, "loss": 1.7998, "step": 470568 }, { "epoch": 40.59903381642512, "grad_norm": 2.08225679397583, "learning_rate": 0.001, "loss": 1.804, "step": 470624 }, { "epoch": 40.60386473429952, "grad_norm": 1.1381951570510864, "learning_rate": 0.001, "loss": 1.8026, "step": 470680 }, { "epoch": 40.608695652173914, "grad_norm": 1.9227761030197144, "learning_rate": 0.001, "loss": 1.7948, "step": 470736 }, { "epoch": 40.61352657004831, "grad_norm": 0.3060775399208069, "learning_rate": 0.001, "loss": 1.8048, "step": 470792 }, { "epoch": 40.61835748792271, "grad_norm": 0.3622453212738037, "learning_rate": 0.001, "loss": 1.8031, "step": 470848 }, { "epoch": 40.6231884057971, "grad_norm": 0.7300992608070374, "learning_rate": 0.001, "loss": 1.786, "step": 470904 }, { "epoch": 40.628019323671495, "grad_norm": 2.353120803833008, "learning_rate": 0.001, "loss": 1.7869, "step": 470960 }, { "epoch": 40.632850241545896, "grad_norm": 4.230882167816162, "learning_rate": 0.001, "loss": 1.7915, "step": 471016 }, { "epoch": 40.63768115942029, "grad_norm": 5.836760520935059, "learning_rate": 0.001, "loss": 1.8007, "step": 471072 }, { "epoch": 40.64251207729468, "grad_norm": 1.0078600645065308, "learning_rate": 0.001, "loss": 1.7984, "step": 471128 }, { "epoch": 40.64734299516908, "grad_norm": 1.9425383806228638, "learning_rate": 0.001, "loss": 1.7963, "step": 471184 }, { "epoch": 40.65217391304348, "grad_norm": 2.351778030395508, "learning_rate": 0.001, "loss": 1.7989, "step": 471240 }, { "epoch": 40.65700483091788, "grad_norm": 1.2992092370986938, "learning_rate": 0.001, "loss": 1.8081, "step": 471296 }, { "epoch": 40.66183574879227, "grad_norm": 0.8100602626800537, "learning_rate": 0.001, "loss": 1.7981, "step": 471352 }, { "epoch": 40.666666666666664, "grad_norm": 0.7669732570648193, "learning_rate": 0.001, "loss": 1.7953, "step": 471408 }, { "epoch": 40.671497584541065, "grad_norm": 0.9874126315116882, "learning_rate": 0.001, "loss": 1.7905, "step": 471464 }, { "epoch": 40.67632850241546, "grad_norm": 2.1374104022979736, "learning_rate": 0.001, "loss": 1.7949, "step": 471520 }, { "epoch": 40.68115942028985, "grad_norm": 1.9910579919815063, "learning_rate": 0.001, "loss": 1.7917, "step": 471576 }, { "epoch": 40.68599033816425, "grad_norm": 0.8126847147941589, "learning_rate": 0.001, "loss": 1.7862, "step": 471632 }, { "epoch": 40.690821256038646, "grad_norm": 1.6848032474517822, "learning_rate": 0.001, "loss": 1.7846, "step": 471688 }, { "epoch": 40.69565217391305, "grad_norm": 5.48894739151001, "learning_rate": 0.001, "loss": 1.7922, "step": 471744 }, { "epoch": 40.70048309178744, "grad_norm": 15.806963920593262, "learning_rate": 0.001, "loss": 1.796, "step": 471800 }, { "epoch": 40.70531400966183, "grad_norm": 1.817166805267334, "learning_rate": 0.001, "loss": 1.7947, "step": 471856 }, { "epoch": 40.710144927536234, "grad_norm": 1.236768126487732, "learning_rate": 0.001, "loss": 1.7957, "step": 471912 }, { "epoch": 40.71497584541063, "grad_norm": 0.7347263693809509, "learning_rate": 0.001, "loss": 1.8026, "step": 471968 }, { "epoch": 40.71980676328502, "grad_norm": 0.33844712376594543, "learning_rate": 0.001, "loss": 1.8077, "step": 472024 }, { "epoch": 40.72463768115942, "grad_norm": 0.2523566484451294, "learning_rate": 0.001, "loss": 1.8086, "step": 472080 }, { "epoch": 40.729468599033815, "grad_norm": 0.5487380623817444, "learning_rate": 0.001, "loss": 1.8063, "step": 472136 }, { "epoch": 40.734299516908216, "grad_norm": 0.2891555726528168, "learning_rate": 0.001, "loss": 1.8087, "step": 472192 }, { "epoch": 40.73913043478261, "grad_norm": 1.0195664167404175, "learning_rate": 0.001, "loss": 1.8053, "step": 472248 }, { "epoch": 40.743961352657, "grad_norm": 0.3404592275619507, "learning_rate": 0.001, "loss": 1.8011, "step": 472304 }, { "epoch": 40.7487922705314, "grad_norm": 0.2918546795845032, "learning_rate": 0.001, "loss": 1.7996, "step": 472360 }, { "epoch": 40.7536231884058, "grad_norm": 1.090384602546692, "learning_rate": 0.001, "loss": 1.7853, "step": 472416 }, { "epoch": 40.75845410628019, "grad_norm": 0.5601615905761719, "learning_rate": 0.001, "loss": 1.7972, "step": 472472 }, { "epoch": 40.76328502415459, "grad_norm": 1.242680311203003, "learning_rate": 0.001, "loss": 1.7908, "step": 472528 }, { "epoch": 40.768115942028984, "grad_norm": 5.124416828155518, "learning_rate": 0.001, "loss": 1.7971, "step": 472584 }, { "epoch": 40.772946859903385, "grad_norm": 1.741362452507019, "learning_rate": 0.001, "loss": 1.7985, "step": 472640 }, { "epoch": 40.77777777777778, "grad_norm": 1.7197939157485962, "learning_rate": 0.001, "loss": 1.8017, "step": 472696 }, { "epoch": 40.78260869565217, "grad_norm": 0.6404716372489929, "learning_rate": 0.001, "loss": 1.8076, "step": 472752 }, { "epoch": 40.78743961352657, "grad_norm": 0.95561283826828, "learning_rate": 0.001, "loss": 1.8056, "step": 472808 }, { "epoch": 40.792270531400966, "grad_norm": 2.1630289554595947, "learning_rate": 0.001, "loss": 1.8173, "step": 472864 }, { "epoch": 40.79710144927536, "grad_norm": 0.44083094596862793, "learning_rate": 0.001, "loss": 1.8341, "step": 472920 }, { "epoch": 40.80193236714976, "grad_norm": 5.592893600463867, "learning_rate": 0.001, "loss": 1.8339, "step": 472976 }, { "epoch": 40.806763285024154, "grad_norm": 1.3401254415512085, "learning_rate": 0.001, "loss": 1.8278, "step": 473032 }, { "epoch": 40.81159420289855, "grad_norm": 2.299884080886841, "learning_rate": 0.001, "loss": 1.8097, "step": 473088 }, { "epoch": 40.81642512077295, "grad_norm": 1.4739097356796265, "learning_rate": 0.001, "loss": 1.8035, "step": 473144 }, { "epoch": 40.82125603864734, "grad_norm": 1.2621538639068604, "learning_rate": 0.001, "loss": 1.8056, "step": 473200 }, { "epoch": 40.82608695652174, "grad_norm": 3.414216995239258, "learning_rate": 0.001, "loss": 1.8027, "step": 473256 }, { "epoch": 40.830917874396135, "grad_norm": 0.6508470177650452, "learning_rate": 0.001, "loss": 1.8106, "step": 473312 }, { "epoch": 40.83574879227053, "grad_norm": 0.2761024534702301, "learning_rate": 0.001, "loss": 1.8053, "step": 473368 }, { "epoch": 40.84057971014493, "grad_norm": 0.42463451623916626, "learning_rate": 0.001, "loss": 1.8063, "step": 473424 }, { "epoch": 40.84541062801932, "grad_norm": 0.9954354763031006, "learning_rate": 0.001, "loss": 1.8115, "step": 473480 }, { "epoch": 40.85024154589372, "grad_norm": 0.4295814633369446, "learning_rate": 0.001, "loss": 1.799, "step": 473536 }, { "epoch": 40.85507246376812, "grad_norm": 0.3261200487613678, "learning_rate": 0.001, "loss": 1.802, "step": 473592 }, { "epoch": 40.85990338164251, "grad_norm": 0.72611403465271, "learning_rate": 0.001, "loss": 1.8004, "step": 473648 }, { "epoch": 40.86473429951691, "grad_norm": 0.2879563868045807, "learning_rate": 0.001, "loss": 1.8048, "step": 473704 }, { "epoch": 40.869565217391305, "grad_norm": 0.28102660179138184, "learning_rate": 0.001, "loss": 1.8084, "step": 473760 }, { "epoch": 40.8743961352657, "grad_norm": 0.3959755003452301, "learning_rate": 0.001, "loss": 1.8062, "step": 473816 }, { "epoch": 40.8792270531401, "grad_norm": 0.522480309009552, "learning_rate": 0.001, "loss": 1.7929, "step": 473872 }, { "epoch": 40.88405797101449, "grad_norm": 0.4061656594276428, "learning_rate": 0.001, "loss": 1.8045, "step": 473928 }, { "epoch": 40.888888888888886, "grad_norm": 0.33784377574920654, "learning_rate": 0.001, "loss": 1.8011, "step": 473984 }, { "epoch": 40.893719806763286, "grad_norm": 0.38558530807495117, "learning_rate": 0.001, "loss": 1.7955, "step": 474040 }, { "epoch": 40.89855072463768, "grad_norm": 0.7068955302238464, "learning_rate": 0.001, "loss": 1.8, "step": 474096 }, { "epoch": 40.90338164251208, "grad_norm": 0.34119927883148193, "learning_rate": 0.001, "loss": 1.7966, "step": 474152 }, { "epoch": 40.908212560386474, "grad_norm": 1.0256468057632446, "learning_rate": 0.001, "loss": 1.7939, "step": 474208 }, { "epoch": 40.91304347826087, "grad_norm": 0.42362144589424133, "learning_rate": 0.001, "loss": 1.7926, "step": 474264 }, { "epoch": 40.91787439613527, "grad_norm": 1.646641731262207, "learning_rate": 0.001, "loss": 1.7942, "step": 474320 }, { "epoch": 40.92270531400966, "grad_norm": 0.718420147895813, "learning_rate": 0.001, "loss": 1.7942, "step": 474376 }, { "epoch": 40.927536231884055, "grad_norm": 1.3165122270584106, "learning_rate": 0.001, "loss": 1.7888, "step": 474432 }, { "epoch": 40.932367149758456, "grad_norm": 0.7424231171607971, "learning_rate": 0.001, "loss": 1.7939, "step": 474488 }, { "epoch": 40.93719806763285, "grad_norm": 0.6402720808982849, "learning_rate": 0.001, "loss": 1.7992, "step": 474544 }, { "epoch": 40.94202898550725, "grad_norm": 0.40274515748023987, "learning_rate": 0.001, "loss": 1.7914, "step": 474600 }, { "epoch": 40.94685990338164, "grad_norm": 0.5087569952011108, "learning_rate": 0.001, "loss": 1.8068, "step": 474656 }, { "epoch": 40.95169082125604, "grad_norm": 0.31640368700027466, "learning_rate": 0.001, "loss": 1.8043, "step": 474712 }, { "epoch": 40.95652173913044, "grad_norm": 0.6490268707275391, "learning_rate": 0.001, "loss": 1.8093, "step": 474768 }, { "epoch": 40.96135265700483, "grad_norm": 0.3309587836265564, "learning_rate": 0.001, "loss": 1.7946, "step": 474824 }, { "epoch": 40.966183574879224, "grad_norm": 0.3931916654109955, "learning_rate": 0.001, "loss": 1.7958, "step": 474880 }, { "epoch": 40.971014492753625, "grad_norm": 0.5302411913871765, "learning_rate": 0.001, "loss": 1.7975, "step": 474936 }, { "epoch": 40.97584541062802, "grad_norm": 0.850965678691864, "learning_rate": 0.001, "loss": 1.7986, "step": 474992 }, { "epoch": 40.98067632850242, "grad_norm": 2.373933792114258, "learning_rate": 0.001, "loss": 1.8104, "step": 475048 }, { "epoch": 40.98550724637681, "grad_norm": 2.4777591228485107, "learning_rate": 0.001, "loss": 1.8295, "step": 475104 }, { "epoch": 40.990338164251206, "grad_norm": 0.3572227954864502, "learning_rate": 0.001, "loss": 1.8322, "step": 475160 }, { "epoch": 40.99516908212561, "grad_norm": 0.9296237230300903, "learning_rate": 0.001, "loss": 1.822, "step": 475216 }, { "epoch": 41.0, "grad_norm": 0.8926190137863159, "learning_rate": 0.001, "loss": 1.8112, "step": 475272 }, { "epoch": 41.00483091787439, "grad_norm": 2.0439271926879883, "learning_rate": 0.001, "loss": 1.7837, "step": 475328 }, { "epoch": 41.009661835748794, "grad_norm": 0.3749220073223114, "learning_rate": 0.001, "loss": 1.7718, "step": 475384 }, { "epoch": 41.01449275362319, "grad_norm": 0.5885409712791443, "learning_rate": 0.001, "loss": 1.7551, "step": 475440 }, { "epoch": 41.01932367149758, "grad_norm": 0.5109129548072815, "learning_rate": 0.001, "loss": 1.7761, "step": 475496 }, { "epoch": 41.02415458937198, "grad_norm": 0.4802860617637634, "learning_rate": 0.001, "loss": 1.7783, "step": 475552 }, { "epoch": 41.028985507246375, "grad_norm": 0.4525991678237915, "learning_rate": 0.001, "loss": 1.77, "step": 475608 }, { "epoch": 41.033816425120776, "grad_norm": 5.876457214355469, "learning_rate": 0.001, "loss": 1.776, "step": 475664 }, { "epoch": 41.03864734299517, "grad_norm": 1.0678629875183105, "learning_rate": 0.001, "loss": 1.7699, "step": 475720 }, { "epoch": 41.04347826086956, "grad_norm": 5.873805046081543, "learning_rate": 0.001, "loss": 1.7627, "step": 475776 }, { "epoch": 41.04830917874396, "grad_norm": 7.068751335144043, "learning_rate": 0.001, "loss": 1.7577, "step": 475832 }, { "epoch": 41.05314009661836, "grad_norm": 0.4418167769908905, "learning_rate": 0.001, "loss": 1.7756, "step": 475888 }, { "epoch": 41.05797101449275, "grad_norm": 1.9148973226547241, "learning_rate": 0.001, "loss": 1.7767, "step": 475944 }, { "epoch": 41.06280193236715, "grad_norm": 0.36009693145751953, "learning_rate": 0.001, "loss": 1.7742, "step": 476000 }, { "epoch": 41.067632850241544, "grad_norm": 0.34679704904556274, "learning_rate": 0.001, "loss": 1.7679, "step": 476056 }, { "epoch": 41.072463768115945, "grad_norm": 2.6572606563568115, "learning_rate": 0.001, "loss": 1.7737, "step": 476112 }, { "epoch": 41.07729468599034, "grad_norm": 0.3586326539516449, "learning_rate": 0.001, "loss": 1.7716, "step": 476168 }, { "epoch": 41.08212560386473, "grad_norm": 0.3348504304885864, "learning_rate": 0.001, "loss": 1.7687, "step": 476224 }, { "epoch": 41.08695652173913, "grad_norm": 1.4983302354812622, "learning_rate": 0.001, "loss": 1.769, "step": 476280 }, { "epoch": 41.091787439613526, "grad_norm": 0.6238147020339966, "learning_rate": 0.001, "loss": 1.7625, "step": 476336 }, { "epoch": 41.09661835748792, "grad_norm": 0.43684133887290955, "learning_rate": 0.001, "loss": 1.7535, "step": 476392 }, { "epoch": 41.10144927536232, "grad_norm": 0.6273221373558044, "learning_rate": 0.001, "loss": 1.775, "step": 476448 }, { "epoch": 41.106280193236714, "grad_norm": 0.33252567052841187, "learning_rate": 0.001, "loss": 1.7813, "step": 476504 }, { "epoch": 41.111111111111114, "grad_norm": 1.2029973268508911, "learning_rate": 0.001, "loss": 1.7839, "step": 476560 }, { "epoch": 41.11594202898551, "grad_norm": 0.7935737371444702, "learning_rate": 0.001, "loss": 1.7744, "step": 476616 }, { "epoch": 41.1207729468599, "grad_norm": 0.3420565724372864, "learning_rate": 0.001, "loss": 1.7688, "step": 476672 }, { "epoch": 41.1256038647343, "grad_norm": 0.4086375832557678, "learning_rate": 0.001, "loss": 1.7676, "step": 476728 }, { "epoch": 41.130434782608695, "grad_norm": 0.7315663695335388, "learning_rate": 0.001, "loss": 1.7666, "step": 476784 }, { "epoch": 41.13526570048309, "grad_norm": 1.3200725317001343, "learning_rate": 0.001, "loss": 1.7617, "step": 476840 }, { "epoch": 41.14009661835749, "grad_norm": 0.36857759952545166, "learning_rate": 0.001, "loss": 1.7746, "step": 476896 }, { "epoch": 41.14492753623188, "grad_norm": 2.1257357597351074, "learning_rate": 0.001, "loss": 1.7752, "step": 476952 }, { "epoch": 41.14975845410628, "grad_norm": 18.307849884033203, "learning_rate": 0.001, "loss": 1.768, "step": 477008 }, { "epoch": 41.15458937198068, "grad_norm": 15.964292526245117, "learning_rate": 0.001, "loss": 1.7659, "step": 477064 }, { "epoch": 41.15942028985507, "grad_norm": 1.7357410192489624, "learning_rate": 0.001, "loss": 1.7811, "step": 477120 }, { "epoch": 41.16425120772947, "grad_norm": 2.8916144371032715, "learning_rate": 0.001, "loss": 1.7766, "step": 477176 }, { "epoch": 41.169082125603865, "grad_norm": 9.587381362915039, "learning_rate": 0.001, "loss": 1.7754, "step": 477232 }, { "epoch": 41.17391304347826, "grad_norm": 10.051681518554688, "learning_rate": 0.001, "loss": 1.7862, "step": 477288 }, { "epoch": 41.17874396135266, "grad_norm": 0.5565129518508911, "learning_rate": 0.001, "loss": 1.8049, "step": 477344 }, { "epoch": 41.18357487922705, "grad_norm": 0.4959957003593445, "learning_rate": 0.001, "loss": 1.8187, "step": 477400 }, { "epoch": 41.18840579710145, "grad_norm": 3.313873052597046, "learning_rate": 0.001, "loss": 1.8037, "step": 477456 }, { "epoch": 41.193236714975846, "grad_norm": 0.4376958906650543, "learning_rate": 0.001, "loss": 1.7858, "step": 477512 }, { "epoch": 41.19806763285024, "grad_norm": 0.4345230460166931, "learning_rate": 0.001, "loss": 1.7866, "step": 477568 }, { "epoch": 41.20289855072464, "grad_norm": 2.190399646759033, "learning_rate": 0.001, "loss": 1.7844, "step": 477624 }, { "epoch": 41.207729468599034, "grad_norm": 0.5309112071990967, "learning_rate": 0.001, "loss": 1.782, "step": 477680 }, { "epoch": 41.21256038647343, "grad_norm": 0.5927920937538147, "learning_rate": 0.001, "loss": 1.767, "step": 477736 }, { "epoch": 41.21739130434783, "grad_norm": 0.5471197366714478, "learning_rate": 0.001, "loss": 1.7655, "step": 477792 }, { "epoch": 41.22222222222222, "grad_norm": 0.3584359288215637, "learning_rate": 0.001, "loss": 1.772, "step": 477848 }, { "epoch": 41.227053140096615, "grad_norm": 0.6914898157119751, "learning_rate": 0.001, "loss": 1.7711, "step": 477904 }, { "epoch": 41.231884057971016, "grad_norm": 1.2572054862976074, "learning_rate": 0.001, "loss": 1.7746, "step": 477960 }, { "epoch": 41.23671497584541, "grad_norm": 2.600163221359253, "learning_rate": 0.001, "loss": 1.7854, "step": 478016 }, { "epoch": 41.24154589371981, "grad_norm": 1.1626826524734497, "learning_rate": 0.001, "loss": 1.78, "step": 478072 }, { "epoch": 41.2463768115942, "grad_norm": 0.4736309349536896, "learning_rate": 0.001, "loss": 1.7716, "step": 478128 }, { "epoch": 41.2512077294686, "grad_norm": 0.49782654643058777, "learning_rate": 0.001, "loss": 1.7727, "step": 478184 }, { "epoch": 41.256038647343, "grad_norm": 1.4296596050262451, "learning_rate": 0.001, "loss": 1.7736, "step": 478240 }, { "epoch": 41.26086956521739, "grad_norm": 1.2838565111160278, "learning_rate": 0.001, "loss": 1.7755, "step": 478296 }, { "epoch": 41.265700483091784, "grad_norm": 0.30322641134262085, "learning_rate": 0.001, "loss": 1.7734, "step": 478352 }, { "epoch": 41.270531400966185, "grad_norm": 0.558612585067749, "learning_rate": 0.001, "loss": 1.7792, "step": 478408 }, { "epoch": 41.27536231884058, "grad_norm": 0.991550624370575, "learning_rate": 0.001, "loss": 1.7751, "step": 478464 }, { "epoch": 41.28019323671498, "grad_norm": 3.387787103652954, "learning_rate": 0.001, "loss": 1.7813, "step": 478520 }, { "epoch": 41.28502415458937, "grad_norm": 0.44729992747306824, "learning_rate": 0.001, "loss": 1.7791, "step": 478576 }, { "epoch": 41.289855072463766, "grad_norm": 0.42040523886680603, "learning_rate": 0.001, "loss": 1.773, "step": 478632 }, { "epoch": 41.29468599033817, "grad_norm": 0.5031183362007141, "learning_rate": 0.001, "loss": 1.7819, "step": 478688 }, { "epoch": 41.29951690821256, "grad_norm": 5.760951042175293, "learning_rate": 0.001, "loss": 1.7731, "step": 478744 }, { "epoch": 41.30434782608695, "grad_norm": 0.3674054741859436, "learning_rate": 0.001, "loss": 1.7887, "step": 478800 }, { "epoch": 41.309178743961354, "grad_norm": 0.3358306586742401, "learning_rate": 0.001, "loss": 1.783, "step": 478856 }, { "epoch": 41.31400966183575, "grad_norm": 0.7326270341873169, "learning_rate": 0.001, "loss": 1.7937, "step": 478912 }, { "epoch": 41.31884057971015, "grad_norm": 1.3335152864456177, "learning_rate": 0.001, "loss": 1.7949, "step": 478968 }, { "epoch": 41.32367149758454, "grad_norm": 1.9383679628372192, "learning_rate": 0.001, "loss": 1.8, "step": 479024 }, { "epoch": 41.328502415458935, "grad_norm": 0.5108675956726074, "learning_rate": 0.001, "loss": 1.7908, "step": 479080 }, { "epoch": 41.333333333333336, "grad_norm": 0.6211147904396057, "learning_rate": 0.001, "loss": 1.7871, "step": 479136 }, { "epoch": 41.33816425120773, "grad_norm": 1.3744021654129028, "learning_rate": 0.001, "loss": 1.7883, "step": 479192 }, { "epoch": 41.34299516908212, "grad_norm": 0.4898088872432709, "learning_rate": 0.001, "loss": 1.7749, "step": 479248 }, { "epoch": 41.34782608695652, "grad_norm": 8.637784004211426, "learning_rate": 0.001, "loss": 1.7906, "step": 479304 }, { "epoch": 41.35265700483092, "grad_norm": 0.5045645236968994, "learning_rate": 0.001, "loss": 1.7901, "step": 479360 }, { "epoch": 41.35748792270532, "grad_norm": 2.35829496383667, "learning_rate": 0.001, "loss": 1.7951, "step": 479416 }, { "epoch": 41.36231884057971, "grad_norm": 3.2063772678375244, "learning_rate": 0.001, "loss": 1.7871, "step": 479472 }, { "epoch": 41.367149758454104, "grad_norm": 8.71304702758789, "learning_rate": 0.001, "loss": 1.7794, "step": 479528 }, { "epoch": 41.371980676328505, "grad_norm": 0.27432936429977417, "learning_rate": 0.001, "loss": 1.7908, "step": 479584 }, { "epoch": 41.3768115942029, "grad_norm": 0.48368749022483826, "learning_rate": 0.001, "loss": 1.801, "step": 479640 }, { "epoch": 41.38164251207729, "grad_norm": 1.5829427242279053, "learning_rate": 0.001, "loss": 1.8054, "step": 479696 }, { "epoch": 41.38647342995169, "grad_norm": 1.237141489982605, "learning_rate": 0.001, "loss": 1.7911, "step": 479752 }, { "epoch": 41.391304347826086, "grad_norm": 0.4113626778125763, "learning_rate": 0.001, "loss": 1.7925, "step": 479808 }, { "epoch": 41.39613526570048, "grad_norm": 3.046964168548584, "learning_rate": 0.001, "loss": 1.7911, "step": 479864 }, { "epoch": 41.40096618357488, "grad_norm": 3.221512794494629, "learning_rate": 0.001, "loss": 1.7843, "step": 479920 }, { "epoch": 41.405797101449274, "grad_norm": 0.30573770403862, "learning_rate": 0.001, "loss": 1.7887, "step": 479976 }, { "epoch": 41.410628019323674, "grad_norm": 0.3555962145328522, "learning_rate": 0.001, "loss": 1.7874, "step": 480032 }, { "epoch": 41.41545893719807, "grad_norm": 11.715228080749512, "learning_rate": 0.001, "loss": 1.7906, "step": 480088 }, { "epoch": 41.42028985507246, "grad_norm": 0.35159745812416077, "learning_rate": 0.001, "loss": 1.7796, "step": 480144 }, { "epoch": 41.42512077294686, "grad_norm": 0.8535027503967285, "learning_rate": 0.001, "loss": 1.7848, "step": 480200 }, { "epoch": 41.429951690821255, "grad_norm": 3.7386040687561035, "learning_rate": 0.001, "loss": 1.7768, "step": 480256 }, { "epoch": 41.43478260869565, "grad_norm": 0.301082581281662, "learning_rate": 0.001, "loss": 1.7808, "step": 480312 }, { "epoch": 41.43961352657005, "grad_norm": 0.3548678755760193, "learning_rate": 0.001, "loss": 1.7732, "step": 480368 }, { "epoch": 41.44444444444444, "grad_norm": 1.2450335025787354, "learning_rate": 0.001, "loss": 1.7715, "step": 480424 }, { "epoch": 41.44927536231884, "grad_norm": 0.5334143042564392, "learning_rate": 0.001, "loss": 1.7803, "step": 480480 }, { "epoch": 41.45410628019324, "grad_norm": 1.6901187896728516, "learning_rate": 0.001, "loss": 1.7874, "step": 480536 }, { "epoch": 41.45893719806763, "grad_norm": 0.4750899076461792, "learning_rate": 0.001, "loss": 1.7804, "step": 480592 }, { "epoch": 41.46376811594203, "grad_norm": 0.3533329367637634, "learning_rate": 0.001, "loss": 1.7864, "step": 480648 }, { "epoch": 41.468599033816425, "grad_norm": 8.500853538513184, "learning_rate": 0.001, "loss": 1.787, "step": 480704 }, { "epoch": 41.47342995169082, "grad_norm": 0.7980721592903137, "learning_rate": 0.001, "loss": 1.7814, "step": 480760 }, { "epoch": 41.47826086956522, "grad_norm": 1.0998457670211792, "learning_rate": 0.001, "loss": 1.7812, "step": 480816 }, { "epoch": 41.48309178743961, "grad_norm": 1.9388716220855713, "learning_rate": 0.001, "loss": 1.7766, "step": 480872 }, { "epoch": 41.48792270531401, "grad_norm": 0.5676780343055725, "learning_rate": 0.001, "loss": 1.7859, "step": 480928 }, { "epoch": 41.492753623188406, "grad_norm": 0.3662495017051697, "learning_rate": 0.001, "loss": 1.7915, "step": 480984 }, { "epoch": 41.4975845410628, "grad_norm": 1.3654133081436157, "learning_rate": 0.001, "loss": 1.7959, "step": 481040 }, { "epoch": 41.5024154589372, "grad_norm": 0.5599485039710999, "learning_rate": 0.001, "loss": 1.7949, "step": 481096 }, { "epoch": 41.507246376811594, "grad_norm": 1.0812079906463623, "learning_rate": 0.001, "loss": 1.7809, "step": 481152 }, { "epoch": 41.51207729468599, "grad_norm": 0.378946989774704, "learning_rate": 0.001, "loss": 1.7828, "step": 481208 }, { "epoch": 41.51690821256039, "grad_norm": 0.593230664730072, "learning_rate": 0.001, "loss": 1.7786, "step": 481264 }, { "epoch": 41.52173913043478, "grad_norm": 0.4693809747695923, "learning_rate": 0.001, "loss": 1.7833, "step": 481320 }, { "epoch": 41.52657004830918, "grad_norm": 1.0312747955322266, "learning_rate": 0.001, "loss": 1.7774, "step": 481376 }, { "epoch": 41.531400966183575, "grad_norm": 0.37058448791503906, "learning_rate": 0.001, "loss": 1.7772, "step": 481432 }, { "epoch": 41.53623188405797, "grad_norm": 0.701335608959198, "learning_rate": 0.001, "loss": 1.7823, "step": 481488 }, { "epoch": 41.54106280193237, "grad_norm": 0.3499172031879425, "learning_rate": 0.001, "loss": 1.7807, "step": 481544 }, { "epoch": 41.54589371980676, "grad_norm": 0.6889649033546448, "learning_rate": 0.001, "loss": 1.7807, "step": 481600 }, { "epoch": 41.55072463768116, "grad_norm": 0.3357560634613037, "learning_rate": 0.001, "loss": 1.7731, "step": 481656 }, { "epoch": 41.55555555555556, "grad_norm": 0.32627397775650024, "learning_rate": 0.001, "loss": 1.7773, "step": 481712 }, { "epoch": 41.56038647342995, "grad_norm": 0.6696512699127197, "learning_rate": 0.001, "loss": 1.7664, "step": 481768 }, { "epoch": 41.56521739130435, "grad_norm": 0.6523452401161194, "learning_rate": 0.001, "loss": 1.7703, "step": 481824 }, { "epoch": 41.570048309178745, "grad_norm": 1.4383167028427124, "learning_rate": 0.001, "loss": 1.7717, "step": 481880 }, { "epoch": 41.57487922705314, "grad_norm": 0.39921385049819946, "learning_rate": 0.001, "loss": 1.7794, "step": 481936 }, { "epoch": 41.57971014492754, "grad_norm": 0.3504568636417389, "learning_rate": 0.001, "loss": 1.7689, "step": 481992 }, { "epoch": 41.58454106280193, "grad_norm": 0.26321402192115784, "learning_rate": 0.001, "loss": 1.7827, "step": 482048 }, { "epoch": 41.589371980676326, "grad_norm": 0.5420135855674744, "learning_rate": 0.001, "loss": 1.7702, "step": 482104 }, { "epoch": 41.594202898550726, "grad_norm": 0.41049522161483765, "learning_rate": 0.001, "loss": 1.7694, "step": 482160 }, { "epoch": 41.59903381642512, "grad_norm": 0.2921530306339264, "learning_rate": 0.001, "loss": 1.7693, "step": 482216 }, { "epoch": 41.60386473429952, "grad_norm": 0.2506851851940155, "learning_rate": 0.001, "loss": 1.7731, "step": 482272 }, { "epoch": 41.608695652173914, "grad_norm": 0.40679728984832764, "learning_rate": 0.001, "loss": 1.7669, "step": 482328 }, { "epoch": 41.61352657004831, "grad_norm": 0.3142333924770355, "learning_rate": 0.001, "loss": 1.7677, "step": 482384 }, { "epoch": 41.61835748792271, "grad_norm": 0.3366691768169403, "learning_rate": 0.001, "loss": 1.7666, "step": 482440 }, { "epoch": 41.6231884057971, "grad_norm": 0.34135791659355164, "learning_rate": 0.001, "loss": 1.7767, "step": 482496 }, { "epoch": 41.628019323671495, "grad_norm": 0.29753515124320984, "learning_rate": 0.001, "loss": 1.7613, "step": 482552 }, { "epoch": 41.632850241545896, "grad_norm": 0.3050452768802643, "learning_rate": 0.001, "loss": 1.7724, "step": 482608 }, { "epoch": 41.63768115942029, "grad_norm": 0.2986689507961273, "learning_rate": 0.001, "loss": 1.7736, "step": 482664 }, { "epoch": 41.64251207729468, "grad_norm": 0.46290600299835205, "learning_rate": 0.001, "loss": 1.7748, "step": 482720 }, { "epoch": 41.64734299516908, "grad_norm": 4.14735221862793, "learning_rate": 0.001, "loss": 1.7805, "step": 482776 }, { "epoch": 41.65217391304348, "grad_norm": 0.5980857014656067, "learning_rate": 0.001, "loss": 1.7823, "step": 482832 }, { "epoch": 41.65700483091788, "grad_norm": 0.36213555932044983, "learning_rate": 0.001, "loss": 1.7801, "step": 482888 }, { "epoch": 41.66183574879227, "grad_norm": 0.524092435836792, "learning_rate": 0.001, "loss": 1.776, "step": 482944 }, { "epoch": 41.666666666666664, "grad_norm": 0.2648862898349762, "learning_rate": 0.001, "loss": 1.7864, "step": 483000 }, { "epoch": 41.671497584541065, "grad_norm": 0.717807412147522, "learning_rate": 0.001, "loss": 1.7882, "step": 483056 }, { "epoch": 41.67632850241546, "grad_norm": 1.0099356174468994, "learning_rate": 0.001, "loss": 1.7928, "step": 483112 }, { "epoch": 41.68115942028985, "grad_norm": 0.3962900638580322, "learning_rate": 0.001, "loss": 1.7741, "step": 483168 }, { "epoch": 41.68599033816425, "grad_norm": 1.5139120817184448, "learning_rate": 0.001, "loss": 1.7667, "step": 483224 }, { "epoch": 41.690821256038646, "grad_norm": 0.4782089293003082, "learning_rate": 0.001, "loss": 1.7736, "step": 483280 }, { "epoch": 41.69565217391305, "grad_norm": 1.228147268295288, "learning_rate": 0.001, "loss": 1.7762, "step": 483336 }, { "epoch": 41.70048309178744, "grad_norm": 1.27705717086792, "learning_rate": 0.001, "loss": 1.7924, "step": 483392 }, { "epoch": 41.70531400966183, "grad_norm": 1.0360182523727417, "learning_rate": 0.001, "loss": 1.7851, "step": 483448 }, { "epoch": 41.710144927536234, "grad_norm": 1.6343693733215332, "learning_rate": 0.001, "loss": 1.7678, "step": 483504 }, { "epoch": 41.71497584541063, "grad_norm": 0.5440138578414917, "learning_rate": 0.001, "loss": 1.7774, "step": 483560 }, { "epoch": 41.71980676328502, "grad_norm": 0.8304124474525452, "learning_rate": 0.001, "loss": 1.7786, "step": 483616 }, { "epoch": 41.72463768115942, "grad_norm": 0.8030955195426941, "learning_rate": 0.001, "loss": 1.7849, "step": 483672 }, { "epoch": 41.729468599033815, "grad_norm": 0.3007888197898865, "learning_rate": 0.001, "loss": 1.7787, "step": 483728 }, { "epoch": 41.734299516908216, "grad_norm": 1.1150705814361572, "learning_rate": 0.001, "loss": 1.7738, "step": 483784 }, { "epoch": 41.73913043478261, "grad_norm": 0.33095914125442505, "learning_rate": 0.001, "loss": 1.7714, "step": 483840 }, { "epoch": 41.743961352657, "grad_norm": 0.29412415623664856, "learning_rate": 0.001, "loss": 1.7632, "step": 483896 }, { "epoch": 41.7487922705314, "grad_norm": 0.38712266087532043, "learning_rate": 0.001, "loss": 1.7759, "step": 483952 }, { "epoch": 41.7536231884058, "grad_norm": 1.4370554685592651, "learning_rate": 0.001, "loss": 1.7776, "step": 484008 }, { "epoch": 41.75845410628019, "grad_norm": 3.35516619682312, "learning_rate": 0.001, "loss": 1.7943, "step": 484064 }, { "epoch": 41.76328502415459, "grad_norm": 0.8320044279098511, "learning_rate": 0.001, "loss": 1.7974, "step": 484120 }, { "epoch": 41.768115942028984, "grad_norm": 1.3436111211776733, "learning_rate": 0.001, "loss": 1.8201, "step": 484176 }, { "epoch": 41.772946859903385, "grad_norm": 2.4458961486816406, "learning_rate": 0.001, "loss": 1.8088, "step": 484232 }, { "epoch": 41.77777777777778, "grad_norm": 0.2836563289165497, "learning_rate": 0.001, "loss": 1.8052, "step": 484288 }, { "epoch": 41.78260869565217, "grad_norm": 1.6821556091308594, "learning_rate": 0.001, "loss": 1.8153, "step": 484344 }, { "epoch": 41.78743961352657, "grad_norm": 0.8647648096084595, "learning_rate": 0.001, "loss": 1.8086, "step": 484400 }, { "epoch": 41.792270531400966, "grad_norm": 0.8150994181632996, "learning_rate": 0.001, "loss": 1.8136, "step": 484456 }, { "epoch": 41.79710144927536, "grad_norm": 1.034799575805664, "learning_rate": 0.001, "loss": 1.8083, "step": 484512 }, { "epoch": 41.80193236714976, "grad_norm": 0.2924371361732483, "learning_rate": 0.001, "loss": 1.8069, "step": 484568 }, { "epoch": 41.806763285024154, "grad_norm": 8.87802505493164, "learning_rate": 0.001, "loss": 1.7988, "step": 484624 }, { "epoch": 41.81159420289855, "grad_norm": 5.548062801361084, "learning_rate": 0.001, "loss": 1.8021, "step": 484680 }, { "epoch": 41.81642512077295, "grad_norm": 0.27715036273002625, "learning_rate": 0.001, "loss": 1.7869, "step": 484736 }, { "epoch": 41.82125603864734, "grad_norm": 0.2625804841518402, "learning_rate": 0.001, "loss": 1.7848, "step": 484792 }, { "epoch": 41.82608695652174, "grad_norm": 0.3942045569419861, "learning_rate": 0.001, "loss": 1.7838, "step": 484848 }, { "epoch": 41.830917874396135, "grad_norm": 0.706469714641571, "learning_rate": 0.001, "loss": 1.7836, "step": 484904 }, { "epoch": 41.83574879227053, "grad_norm": 0.3525569438934326, "learning_rate": 0.001, "loss": 1.7905, "step": 484960 }, { "epoch": 41.84057971014493, "grad_norm": 0.344290554523468, "learning_rate": 0.001, "loss": 1.7817, "step": 485016 }, { "epoch": 41.84541062801932, "grad_norm": 2.547391891479492, "learning_rate": 0.001, "loss": 1.7799, "step": 485072 }, { "epoch": 41.85024154589372, "grad_norm": 1.127303957939148, "learning_rate": 0.001, "loss": 1.7835, "step": 485128 }, { "epoch": 41.85507246376812, "grad_norm": 1.1813757419586182, "learning_rate": 0.001, "loss": 1.7794, "step": 485184 }, { "epoch": 41.85990338164251, "grad_norm": 0.2986561954021454, "learning_rate": 0.001, "loss": 1.7872, "step": 485240 }, { "epoch": 41.86473429951691, "grad_norm": 3.1776645183563232, "learning_rate": 0.001, "loss": 1.7814, "step": 485296 }, { "epoch": 41.869565217391305, "grad_norm": 1.0821311473846436, "learning_rate": 0.001, "loss": 1.7782, "step": 485352 }, { "epoch": 41.8743961352657, "grad_norm": 0.8939442038536072, "learning_rate": 0.001, "loss": 1.7841, "step": 485408 }, { "epoch": 41.8792270531401, "grad_norm": 0.6704204678535461, "learning_rate": 0.001, "loss": 1.781, "step": 485464 }, { "epoch": 41.88405797101449, "grad_norm": 1.8376622200012207, "learning_rate": 0.001, "loss": 1.7826, "step": 485520 }, { "epoch": 41.888888888888886, "grad_norm": 0.2999434769153595, "learning_rate": 0.001, "loss": 1.781, "step": 485576 }, { "epoch": 41.893719806763286, "grad_norm": 0.7336422801017761, "learning_rate": 0.001, "loss": 1.7849, "step": 485632 }, { "epoch": 41.89855072463768, "grad_norm": 0.5957835912704468, "learning_rate": 0.001, "loss": 1.7821, "step": 485688 }, { "epoch": 41.90338164251208, "grad_norm": 0.28564947843551636, "learning_rate": 0.001, "loss": 1.7803, "step": 485744 }, { "epoch": 41.908212560386474, "grad_norm": 0.3469614088535309, "learning_rate": 0.001, "loss": 1.7822, "step": 485800 }, { "epoch": 41.91304347826087, "grad_norm": 1.036623239517212, "learning_rate": 0.001, "loss": 1.7729, "step": 485856 }, { "epoch": 41.91787439613527, "grad_norm": 0.45569103956222534, "learning_rate": 0.001, "loss": 1.7845, "step": 485912 }, { "epoch": 41.92270531400966, "grad_norm": 0.7068533897399902, "learning_rate": 0.001, "loss": 1.7887, "step": 485968 }, { "epoch": 41.927536231884055, "grad_norm": 0.471012145280838, "learning_rate": 0.001, "loss": 1.7943, "step": 486024 }, { "epoch": 41.932367149758456, "grad_norm": 1.8304589986801147, "learning_rate": 0.001, "loss": 1.793, "step": 486080 }, { "epoch": 41.93719806763285, "grad_norm": 0.32733026146888733, "learning_rate": 0.001, "loss": 1.7913, "step": 486136 }, { "epoch": 41.94202898550725, "grad_norm": 1.2495442628860474, "learning_rate": 0.001, "loss": 1.7862, "step": 486192 }, { "epoch": 41.94685990338164, "grad_norm": 0.5472974181175232, "learning_rate": 0.001, "loss": 1.791, "step": 486248 }, { "epoch": 41.95169082125604, "grad_norm": 1.3185720443725586, "learning_rate": 0.001, "loss": 1.7924, "step": 486304 }, { "epoch": 41.95652173913044, "grad_norm": 0.37852004170417786, "learning_rate": 0.001, "loss": 1.7843, "step": 486360 }, { "epoch": 41.96135265700483, "grad_norm": 0.3675987720489502, "learning_rate": 0.001, "loss": 1.7808, "step": 486416 }, { "epoch": 41.966183574879224, "grad_norm": 0.2760379910469055, "learning_rate": 0.001, "loss": 1.7836, "step": 486472 }, { "epoch": 41.971014492753625, "grad_norm": 0.29291966557502747, "learning_rate": 0.001, "loss": 1.7819, "step": 486528 }, { "epoch": 41.97584541062802, "grad_norm": 1.435795783996582, "learning_rate": 0.001, "loss": 1.7791, "step": 486584 }, { "epoch": 41.98067632850242, "grad_norm": 0.44536134600639343, "learning_rate": 0.001, "loss": 1.7803, "step": 486640 }, { "epoch": 41.98550724637681, "grad_norm": 0.5928128957748413, "learning_rate": 0.001, "loss": 1.784, "step": 486696 }, { "epoch": 41.990338164251206, "grad_norm": 0.3290199935436249, "learning_rate": 0.001, "loss": 1.7817, "step": 486752 }, { "epoch": 41.99516908212561, "grad_norm": 0.47797903418540955, "learning_rate": 0.001, "loss": 1.7828, "step": 486808 }, { "epoch": 42.0, "grad_norm": 0.3019459843635559, "learning_rate": 0.001, "loss": 1.7869, "step": 486864 }, { "epoch": 42.00483091787439, "grad_norm": 0.3805495798587799, "learning_rate": 0.001, "loss": 1.7583, "step": 486920 }, { "epoch": 42.009661835748794, "grad_norm": 0.5608112215995789, "learning_rate": 0.001, "loss": 1.7433, "step": 486976 }, { "epoch": 42.01449275362319, "grad_norm": 1.8001693487167358, "learning_rate": 0.001, "loss": 1.7575, "step": 487032 }, { "epoch": 42.01932367149758, "grad_norm": 4.909172534942627, "learning_rate": 0.001, "loss": 1.7349, "step": 487088 }, { "epoch": 42.02415458937198, "grad_norm": 0.26809054613113403, "learning_rate": 0.001, "loss": 1.7376, "step": 487144 }, { "epoch": 42.028985507246375, "grad_norm": 0.36053523421287537, "learning_rate": 0.001, "loss": 1.7501, "step": 487200 }, { "epoch": 42.033816425120776, "grad_norm": 4.105959415435791, "learning_rate": 0.001, "loss": 1.745, "step": 487256 }, { "epoch": 42.03864734299517, "grad_norm": 27.10481834411621, "learning_rate": 0.001, "loss": 1.7479, "step": 487312 }, { "epoch": 42.04347826086956, "grad_norm": 0.4710698127746582, "learning_rate": 0.001, "loss": 1.7438, "step": 487368 }, { "epoch": 42.04830917874396, "grad_norm": 0.29662564396858215, "learning_rate": 0.001, "loss": 1.7414, "step": 487424 }, { "epoch": 42.05314009661836, "grad_norm": 0.3034851849079132, "learning_rate": 0.001, "loss": 1.7484, "step": 487480 }, { "epoch": 42.05797101449275, "grad_norm": 0.3025754392147064, "learning_rate": 0.001, "loss": 1.7507, "step": 487536 }, { "epoch": 42.06280193236715, "grad_norm": 6.905580520629883, "learning_rate": 0.001, "loss": 1.7558, "step": 487592 }, { "epoch": 42.067632850241544, "grad_norm": 0.5275615453720093, "learning_rate": 0.001, "loss": 1.7583, "step": 487648 }, { "epoch": 42.072463768115945, "grad_norm": 0.8237336277961731, "learning_rate": 0.001, "loss": 1.7517, "step": 487704 }, { "epoch": 42.07729468599034, "grad_norm": 1.5896657705307007, "learning_rate": 0.001, "loss": 1.7542, "step": 487760 }, { "epoch": 42.08212560386473, "grad_norm": 2.3079230785369873, "learning_rate": 0.001, "loss": 1.7578, "step": 487816 }, { "epoch": 42.08695652173913, "grad_norm": 1.3778282403945923, "learning_rate": 0.001, "loss": 1.7526, "step": 487872 }, { "epoch": 42.091787439613526, "grad_norm": 1.1482212543487549, "learning_rate": 0.001, "loss": 1.7634, "step": 487928 }, { "epoch": 42.09661835748792, "grad_norm": 0.3569440245628357, "learning_rate": 0.001, "loss": 1.7663, "step": 487984 }, { "epoch": 42.10144927536232, "grad_norm": 1.2983609437942505, "learning_rate": 0.001, "loss": 1.7648, "step": 488040 }, { "epoch": 42.106280193236714, "grad_norm": 0.6139097213745117, "learning_rate": 0.001, "loss": 1.7713, "step": 488096 }, { "epoch": 42.111111111111114, "grad_norm": 0.6897456645965576, "learning_rate": 0.001, "loss": 1.7656, "step": 488152 }, { "epoch": 42.11594202898551, "grad_norm": 2.6329097747802734, "learning_rate": 0.001, "loss": 1.7585, "step": 488208 }, { "epoch": 42.1207729468599, "grad_norm": 0.42164111137390137, "learning_rate": 0.001, "loss": 1.76, "step": 488264 }, { "epoch": 42.1256038647343, "grad_norm": 0.35669389367103577, "learning_rate": 0.001, "loss": 1.7652, "step": 488320 }, { "epoch": 42.130434782608695, "grad_norm": 2.0179178714752197, "learning_rate": 0.001, "loss": 1.7666, "step": 488376 }, { "epoch": 42.13526570048309, "grad_norm": 0.3252449333667755, "learning_rate": 0.001, "loss": 1.7585, "step": 488432 }, { "epoch": 42.14009661835749, "grad_norm": 1.5790807008743286, "learning_rate": 0.001, "loss": 1.7561, "step": 488488 }, { "epoch": 42.14492753623188, "grad_norm": 0.7390744686126709, "learning_rate": 0.001, "loss": 1.7521, "step": 488544 }, { "epoch": 42.14975845410628, "grad_norm": 0.3487186133861542, "learning_rate": 0.001, "loss": 1.7467, "step": 488600 }, { "epoch": 42.15458937198068, "grad_norm": 1.0141098499298096, "learning_rate": 0.001, "loss": 1.75, "step": 488656 }, { "epoch": 42.15942028985507, "grad_norm": 0.3917505741119385, "learning_rate": 0.001, "loss": 1.7494, "step": 488712 }, { "epoch": 42.16425120772947, "grad_norm": 0.2992800772190094, "learning_rate": 0.001, "loss": 1.7559, "step": 488768 }, { "epoch": 42.169082125603865, "grad_norm": 1.0765210390090942, "learning_rate": 0.001, "loss": 1.7508, "step": 488824 }, { "epoch": 42.17391304347826, "grad_norm": 0.5060981512069702, "learning_rate": 0.001, "loss": 1.7489, "step": 488880 }, { "epoch": 42.17874396135266, "grad_norm": 0.631303071975708, "learning_rate": 0.001, "loss": 1.7516, "step": 488936 }, { "epoch": 42.18357487922705, "grad_norm": 0.3968592882156372, "learning_rate": 0.001, "loss": 1.7586, "step": 488992 }, { "epoch": 42.18840579710145, "grad_norm": 0.33593663573265076, "learning_rate": 0.001, "loss": 1.7619, "step": 489048 }, { "epoch": 42.193236714975846, "grad_norm": 0.5084505081176758, "learning_rate": 0.001, "loss": 1.7506, "step": 489104 }, { "epoch": 42.19806763285024, "grad_norm": 0.45566776394844055, "learning_rate": 0.001, "loss": 1.7545, "step": 489160 }, { "epoch": 42.20289855072464, "grad_norm": 2.111151695251465, "learning_rate": 0.001, "loss": 1.7601, "step": 489216 }, { "epoch": 42.207729468599034, "grad_norm": 0.41191428899765015, "learning_rate": 0.001, "loss": 1.7607, "step": 489272 }, { "epoch": 42.21256038647343, "grad_norm": 0.3674837648868561, "learning_rate": 0.001, "loss": 1.7754, "step": 489328 }, { "epoch": 42.21739130434783, "grad_norm": 0.3679570257663727, "learning_rate": 0.001, "loss": 1.7612, "step": 489384 }, { "epoch": 42.22222222222222, "grad_norm": 0.6189741492271423, "learning_rate": 0.001, "loss": 1.7579, "step": 489440 }, { "epoch": 42.227053140096615, "grad_norm": 0.27789726853370667, "learning_rate": 0.001, "loss": 1.7549, "step": 489496 }, { "epoch": 42.231884057971016, "grad_norm": 0.3272222876548767, "learning_rate": 0.001, "loss": 1.751, "step": 489552 }, { "epoch": 42.23671497584541, "grad_norm": 0.35177668929100037, "learning_rate": 0.001, "loss": 1.7518, "step": 489608 }, { "epoch": 42.24154589371981, "grad_norm": 2.27254056930542, "learning_rate": 0.001, "loss": 1.7517, "step": 489664 }, { "epoch": 42.2463768115942, "grad_norm": 0.6424328684806824, "learning_rate": 0.001, "loss": 1.7583, "step": 489720 }, { "epoch": 42.2512077294686, "grad_norm": 1.5050510168075562, "learning_rate": 0.001, "loss": 1.7458, "step": 489776 }, { "epoch": 42.256038647343, "grad_norm": 0.36695384979248047, "learning_rate": 0.001, "loss": 1.7551, "step": 489832 }, { "epoch": 42.26086956521739, "grad_norm": 1.2543820142745972, "learning_rate": 0.001, "loss": 1.7681, "step": 489888 }, { "epoch": 42.265700483091784, "grad_norm": 0.2839926481246948, "learning_rate": 0.001, "loss": 1.7607, "step": 489944 }, { "epoch": 42.270531400966185, "grad_norm": 0.4483574628829956, "learning_rate": 0.001, "loss": 1.7514, "step": 490000 }, { "epoch": 42.27536231884058, "grad_norm": 0.5835283398628235, "learning_rate": 0.001, "loss": 1.7521, "step": 490056 }, { "epoch": 42.28019323671498, "grad_norm": 0.6197874546051025, "learning_rate": 0.001, "loss": 1.7656, "step": 490112 }, { "epoch": 42.28502415458937, "grad_norm": 0.8116411566734314, "learning_rate": 0.001, "loss": 1.7603, "step": 490168 }, { "epoch": 42.289855072463766, "grad_norm": 0.3220919966697693, "learning_rate": 0.001, "loss": 1.7544, "step": 490224 }, { "epoch": 42.29468599033817, "grad_norm": 0.5693132877349854, "learning_rate": 0.001, "loss": 1.7622, "step": 490280 }, { "epoch": 42.29951690821256, "grad_norm": 0.4283170998096466, "learning_rate": 0.001, "loss": 1.7628, "step": 490336 }, { "epoch": 42.30434782608695, "grad_norm": 0.6508527398109436, "learning_rate": 0.001, "loss": 1.7551, "step": 490392 }, { "epoch": 42.309178743961354, "grad_norm": 0.32717782258987427, "learning_rate": 0.001, "loss": 1.7518, "step": 490448 }, { "epoch": 42.31400966183575, "grad_norm": 0.3732554614543915, "learning_rate": 0.001, "loss": 1.7517, "step": 490504 }, { "epoch": 42.31884057971015, "grad_norm": 0.7454554438591003, "learning_rate": 0.001, "loss": 1.7557, "step": 490560 }, { "epoch": 42.32367149758454, "grad_norm": 0.7526639103889465, "learning_rate": 0.001, "loss": 1.7503, "step": 490616 }, { "epoch": 42.328502415458935, "grad_norm": 0.4923805296421051, "learning_rate": 0.001, "loss": 1.7578, "step": 490672 }, { "epoch": 42.333333333333336, "grad_norm": 1.1724202632904053, "learning_rate": 0.001, "loss": 1.76, "step": 490728 }, { "epoch": 42.33816425120773, "grad_norm": 0.30553963780403137, "learning_rate": 0.001, "loss": 1.7613, "step": 490784 }, { "epoch": 42.34299516908212, "grad_norm": 0.31243520975112915, "learning_rate": 0.001, "loss": 1.763, "step": 490840 }, { "epoch": 42.34782608695652, "grad_norm": 2.3004539012908936, "learning_rate": 0.001, "loss": 1.7595, "step": 490896 }, { "epoch": 42.35265700483092, "grad_norm": 0.4995482563972473, "learning_rate": 0.001, "loss": 1.8014, "step": 490952 }, { "epoch": 42.35748792270532, "grad_norm": 0.7967793941497803, "learning_rate": 0.001, "loss": 1.8269, "step": 491008 }, { "epoch": 42.36231884057971, "grad_norm": 0.43179959058761597, "learning_rate": 0.001, "loss": 1.7878, "step": 491064 }, { "epoch": 42.367149758454104, "grad_norm": 1.0754050016403198, "learning_rate": 0.001, "loss": 1.7808, "step": 491120 }, { "epoch": 42.371980676328505, "grad_norm": 1.0952892303466797, "learning_rate": 0.001, "loss": 1.7675, "step": 491176 }, { "epoch": 42.3768115942029, "grad_norm": 0.24367645382881165, "learning_rate": 0.001, "loss": 1.7631, "step": 491232 }, { "epoch": 42.38164251207729, "grad_norm": 0.30544641613960266, "learning_rate": 0.001, "loss": 1.7656, "step": 491288 }, { "epoch": 42.38647342995169, "grad_norm": 0.25850972533226013, "learning_rate": 0.001, "loss": 1.7689, "step": 491344 }, { "epoch": 42.391304347826086, "grad_norm": 0.2946685254573822, "learning_rate": 0.001, "loss": 1.7651, "step": 491400 }, { "epoch": 42.39613526570048, "grad_norm": 0.3624315857887268, "learning_rate": 0.001, "loss": 1.7551, "step": 491456 }, { "epoch": 42.40096618357488, "grad_norm": 1.2270286083221436, "learning_rate": 0.001, "loss": 1.7669, "step": 491512 }, { "epoch": 42.405797101449274, "grad_norm": 0.24939844012260437, "learning_rate": 0.001, "loss": 1.7647, "step": 491568 }, { "epoch": 42.410628019323674, "grad_norm": 0.5110289454460144, "learning_rate": 0.001, "loss": 1.7625, "step": 491624 }, { "epoch": 42.41545893719807, "grad_norm": 0.42668417096138, "learning_rate": 0.001, "loss": 1.7664, "step": 491680 }, { "epoch": 42.42028985507246, "grad_norm": 0.6088284254074097, "learning_rate": 0.001, "loss": 1.7644, "step": 491736 }, { "epoch": 42.42512077294686, "grad_norm": 3.4654033184051514, "learning_rate": 0.001, "loss": 1.77, "step": 491792 }, { "epoch": 42.429951690821255, "grad_norm": 0.4033966660499573, "learning_rate": 0.001, "loss": 1.7713, "step": 491848 }, { "epoch": 42.43478260869565, "grad_norm": 0.4056691825389862, "learning_rate": 0.001, "loss": 1.7682, "step": 491904 }, { "epoch": 42.43961352657005, "grad_norm": 0.27506542205810547, "learning_rate": 0.001, "loss": 1.7694, "step": 491960 }, { "epoch": 42.44444444444444, "grad_norm": 0.5485376119613647, "learning_rate": 0.001, "loss": 1.7635, "step": 492016 }, { "epoch": 42.44927536231884, "grad_norm": 0.5162979364395142, "learning_rate": 0.001, "loss": 1.7687, "step": 492072 }, { "epoch": 42.45410628019324, "grad_norm": 0.31549012660980225, "learning_rate": 0.001, "loss": 1.763, "step": 492128 }, { "epoch": 42.45893719806763, "grad_norm": 0.3403273820877075, "learning_rate": 0.001, "loss": 1.7555, "step": 492184 }, { "epoch": 42.46376811594203, "grad_norm": 0.3960123360157013, "learning_rate": 0.001, "loss": 1.7598, "step": 492240 }, { "epoch": 42.468599033816425, "grad_norm": 0.2913028299808502, "learning_rate": 0.001, "loss": 1.7652, "step": 492296 }, { "epoch": 42.47342995169082, "grad_norm": 0.3264979124069214, "learning_rate": 0.001, "loss": 1.7653, "step": 492352 }, { "epoch": 42.47826086956522, "grad_norm": 0.35475075244903564, "learning_rate": 0.001, "loss": 1.7913, "step": 492408 }, { "epoch": 42.48309178743961, "grad_norm": 0.46228447556495667, "learning_rate": 0.001, "loss": 1.7766, "step": 492464 }, { "epoch": 42.48792270531401, "grad_norm": 0.35546115040779114, "learning_rate": 0.001, "loss": 1.7799, "step": 492520 }, { "epoch": 42.492753623188406, "grad_norm": 0.33248370885849, "learning_rate": 0.001, "loss": 1.7857, "step": 492576 }, { "epoch": 42.4975845410628, "grad_norm": 0.29436033964157104, "learning_rate": 0.001, "loss": 1.7819, "step": 492632 }, { "epoch": 42.5024154589372, "grad_norm": 0.39577922224998474, "learning_rate": 0.001, "loss": 1.7682, "step": 492688 }, { "epoch": 42.507246376811594, "grad_norm": 0.3741127550601959, "learning_rate": 0.001, "loss": 1.7617, "step": 492744 }, { "epoch": 42.51207729468599, "grad_norm": 0.2631893754005432, "learning_rate": 0.001, "loss": 1.7746, "step": 492800 }, { "epoch": 42.51690821256039, "grad_norm": 0.7409164905548096, "learning_rate": 0.001, "loss": 1.775, "step": 492856 }, { "epoch": 42.52173913043478, "grad_norm": 1.6313046216964722, "learning_rate": 0.001, "loss": 1.771, "step": 492912 }, { "epoch": 42.52657004830918, "grad_norm": 0.29509875178337097, "learning_rate": 0.001, "loss": 1.7615, "step": 492968 }, { "epoch": 42.531400966183575, "grad_norm": 6.29130744934082, "learning_rate": 0.001, "loss": 1.7597, "step": 493024 }, { "epoch": 42.53623188405797, "grad_norm": 0.30361488461494446, "learning_rate": 0.001, "loss": 1.7788, "step": 493080 }, { "epoch": 42.54106280193237, "grad_norm": 0.31971079111099243, "learning_rate": 0.001, "loss": 1.7663, "step": 493136 }, { "epoch": 42.54589371980676, "grad_norm": 0.3189185857772827, "learning_rate": 0.001, "loss": 1.7554, "step": 493192 }, { "epoch": 42.55072463768116, "grad_norm": 0.30841463804244995, "learning_rate": 0.001, "loss": 1.7618, "step": 493248 }, { "epoch": 42.55555555555556, "grad_norm": 0.5911606550216675, "learning_rate": 0.001, "loss": 1.7607, "step": 493304 }, { "epoch": 42.56038647342995, "grad_norm": 0.3256855309009552, "learning_rate": 0.001, "loss": 1.7506, "step": 493360 }, { "epoch": 42.56521739130435, "grad_norm": 0.2937866151332855, "learning_rate": 0.001, "loss": 1.7543, "step": 493416 }, { "epoch": 42.570048309178745, "grad_norm": 0.3603109121322632, "learning_rate": 0.001, "loss": 1.7635, "step": 493472 }, { "epoch": 42.57487922705314, "grad_norm": 1.8178479671478271, "learning_rate": 0.001, "loss": 1.7596, "step": 493528 }, { "epoch": 42.57971014492754, "grad_norm": 0.5783098340034485, "learning_rate": 0.001, "loss": 1.7738, "step": 493584 }, { "epoch": 42.58454106280193, "grad_norm": 0.681128740310669, "learning_rate": 0.001, "loss": 1.7759, "step": 493640 }, { "epoch": 42.589371980676326, "grad_norm": 0.5738411545753479, "learning_rate": 0.001, "loss": 1.7688, "step": 493696 }, { "epoch": 42.594202898550726, "grad_norm": 0.37642380595207214, "learning_rate": 0.001, "loss": 1.7704, "step": 493752 }, { "epoch": 42.59903381642512, "grad_norm": 2.3003830909729004, "learning_rate": 0.001, "loss": 1.7691, "step": 493808 }, { "epoch": 42.60386473429952, "grad_norm": 37.6695556640625, "learning_rate": 0.001, "loss": 1.76, "step": 493864 }, { "epoch": 42.608695652173914, "grad_norm": 0.34026166796684265, "learning_rate": 0.001, "loss": 1.7575, "step": 493920 }, { "epoch": 42.61352657004831, "grad_norm": 0.6241693496704102, "learning_rate": 0.001, "loss": 1.7769, "step": 493976 }, { "epoch": 42.61835748792271, "grad_norm": 0.4699699282646179, "learning_rate": 0.001, "loss": 1.7838, "step": 494032 }, { "epoch": 42.6231884057971, "grad_norm": 0.31558921933174133, "learning_rate": 0.001, "loss": 1.773, "step": 494088 }, { "epoch": 42.628019323671495, "grad_norm": 0.48866599798202515, "learning_rate": 0.001, "loss": 1.7829, "step": 494144 }, { "epoch": 42.632850241545896, "grad_norm": 0.9284820556640625, "learning_rate": 0.001, "loss": 1.7708, "step": 494200 }, { "epoch": 42.63768115942029, "grad_norm": 0.5138643383979797, "learning_rate": 0.001, "loss": 1.7748, "step": 494256 }, { "epoch": 42.64251207729468, "grad_norm": 0.4089737832546234, "learning_rate": 0.001, "loss": 1.7702, "step": 494312 }, { "epoch": 42.64734299516908, "grad_norm": 0.42057591676712036, "learning_rate": 0.001, "loss": 1.761, "step": 494368 }, { "epoch": 42.65217391304348, "grad_norm": 0.30342257022857666, "learning_rate": 0.001, "loss": 1.7619, "step": 494424 }, { "epoch": 42.65700483091788, "grad_norm": 1.0620832443237305, "learning_rate": 0.001, "loss": 1.7678, "step": 494480 }, { "epoch": 42.66183574879227, "grad_norm": 1.1656184196472168, "learning_rate": 0.001, "loss": 1.7744, "step": 494536 }, { "epoch": 42.666666666666664, "grad_norm": 0.3710213899612427, "learning_rate": 0.001, "loss": 1.7694, "step": 494592 }, { "epoch": 42.671497584541065, "grad_norm": 0.39259687066078186, "learning_rate": 0.001, "loss": 1.7762, "step": 494648 }, { "epoch": 42.67632850241546, "grad_norm": 0.3502708375453949, "learning_rate": 0.001, "loss": 1.771, "step": 494704 }, { "epoch": 42.68115942028985, "grad_norm": 0.31302696466445923, "learning_rate": 0.001, "loss": 1.7718, "step": 494760 }, { "epoch": 42.68599033816425, "grad_norm": 0.41087082028388977, "learning_rate": 0.001, "loss": 1.7651, "step": 494816 }, { "epoch": 42.690821256038646, "grad_norm": 3.3076224327087402, "learning_rate": 0.001, "loss": 1.7522, "step": 494872 }, { "epoch": 42.69565217391305, "grad_norm": 0.2786913812160492, "learning_rate": 0.001, "loss": 1.7608, "step": 494928 }, { "epoch": 42.70048309178744, "grad_norm": 0.44205424189567566, "learning_rate": 0.001, "loss": 1.7609, "step": 494984 }, { "epoch": 42.70531400966183, "grad_norm": 0.3690367639064789, "learning_rate": 0.001, "loss": 1.7572, "step": 495040 }, { "epoch": 42.710144927536234, "grad_norm": 0.3526320457458496, "learning_rate": 0.001, "loss": 1.7631, "step": 495096 }, { "epoch": 42.71497584541063, "grad_norm": 0.4674495756626129, "learning_rate": 0.001, "loss": 1.7639, "step": 495152 }, { "epoch": 42.71980676328502, "grad_norm": 0.2787375748157501, "learning_rate": 0.001, "loss": 1.7717, "step": 495208 }, { "epoch": 42.72463768115942, "grad_norm": 0.25620928406715393, "learning_rate": 0.001, "loss": 1.7757, "step": 495264 }, { "epoch": 42.729468599033815, "grad_norm": 0.5904477834701538, "learning_rate": 0.001, "loss": 1.7667, "step": 495320 }, { "epoch": 42.734299516908216, "grad_norm": 1.9867202043533325, "learning_rate": 0.001, "loss": 1.767, "step": 495376 }, { "epoch": 42.73913043478261, "grad_norm": 0.4272610545158386, "learning_rate": 0.001, "loss": 1.7527, "step": 495432 }, { "epoch": 42.743961352657, "grad_norm": 0.7886016368865967, "learning_rate": 0.001, "loss": 1.7645, "step": 495488 }, { "epoch": 42.7487922705314, "grad_norm": 0.46949565410614014, "learning_rate": 0.001, "loss": 1.7592, "step": 495544 }, { "epoch": 42.7536231884058, "grad_norm": 1.0974304676055908, "learning_rate": 0.001, "loss": 1.7581, "step": 495600 }, { "epoch": 42.75845410628019, "grad_norm": 0.5070813298225403, "learning_rate": 0.001, "loss": 1.7595, "step": 495656 }, { "epoch": 42.76328502415459, "grad_norm": 0.34071779251098633, "learning_rate": 0.001, "loss": 1.7563, "step": 495712 }, { "epoch": 42.768115942028984, "grad_norm": 1.6125874519348145, "learning_rate": 0.001, "loss": 1.7635, "step": 495768 }, { "epoch": 42.772946859903385, "grad_norm": 1.3556158542633057, "learning_rate": 0.001, "loss": 1.7604, "step": 495824 }, { "epoch": 42.77777777777778, "grad_norm": 0.773227870464325, "learning_rate": 0.001, "loss": 1.757, "step": 495880 }, { "epoch": 42.78260869565217, "grad_norm": 0.42122316360473633, "learning_rate": 0.001, "loss": 1.7469, "step": 495936 }, { "epoch": 42.78743961352657, "grad_norm": 0.28449690341949463, "learning_rate": 0.001, "loss": 1.7535, "step": 495992 }, { "epoch": 42.792270531400966, "grad_norm": 0.4180215001106262, "learning_rate": 0.001, "loss": 1.7568, "step": 496048 }, { "epoch": 42.79710144927536, "grad_norm": 0.22506479918956757, "learning_rate": 0.001, "loss": 1.7621, "step": 496104 }, { "epoch": 42.80193236714976, "grad_norm": 0.3406946063041687, "learning_rate": 0.001, "loss": 1.7561, "step": 496160 }, { "epoch": 42.806763285024154, "grad_norm": 0.3163577616214752, "learning_rate": 0.001, "loss": 1.7526, "step": 496216 }, { "epoch": 42.81159420289855, "grad_norm": 0.24708381295204163, "learning_rate": 0.001, "loss": 1.7611, "step": 496272 }, { "epoch": 42.81642512077295, "grad_norm": 0.3926644027233124, "learning_rate": 0.001, "loss": 1.7748, "step": 496328 }, { "epoch": 42.82125603864734, "grad_norm": 0.2978352904319763, "learning_rate": 0.001, "loss": 1.7778, "step": 496384 }, { "epoch": 42.82608695652174, "grad_norm": 0.5509081482887268, "learning_rate": 0.001, "loss": 1.7854, "step": 496440 }, { "epoch": 42.830917874396135, "grad_norm": 0.32073792815208435, "learning_rate": 0.001, "loss": 1.7783, "step": 496496 }, { "epoch": 42.83574879227053, "grad_norm": 0.6077530980110168, "learning_rate": 0.001, "loss": 1.7698, "step": 496552 }, { "epoch": 42.84057971014493, "grad_norm": 0.3482252359390259, "learning_rate": 0.001, "loss": 1.7672, "step": 496608 }, { "epoch": 42.84541062801932, "grad_norm": 0.3995283246040344, "learning_rate": 0.001, "loss": 1.7694, "step": 496664 }, { "epoch": 42.85024154589372, "grad_norm": 0.30362609028816223, "learning_rate": 0.001, "loss": 1.7697, "step": 496720 }, { "epoch": 42.85507246376812, "grad_norm": 0.3405693769454956, "learning_rate": 0.001, "loss": 1.7648, "step": 496776 }, { "epoch": 42.85990338164251, "grad_norm": 1.4146783351898193, "learning_rate": 0.001, "loss": 1.758, "step": 496832 }, { "epoch": 42.86473429951691, "grad_norm": 0.3263242840766907, "learning_rate": 0.001, "loss": 1.7615, "step": 496888 }, { "epoch": 42.869565217391305, "grad_norm": 0.47114306688308716, "learning_rate": 0.001, "loss": 1.7605, "step": 496944 }, { "epoch": 42.8743961352657, "grad_norm": 0.2934527099132538, "learning_rate": 0.001, "loss": 1.7614, "step": 497000 }, { "epoch": 42.8792270531401, "grad_norm": 0.3143116533756256, "learning_rate": 0.001, "loss": 1.761, "step": 497056 }, { "epoch": 42.88405797101449, "grad_norm": 0.38275882601737976, "learning_rate": 0.001, "loss": 1.762, "step": 497112 }, { "epoch": 42.888888888888886, "grad_norm": 0.3109045624732971, "learning_rate": 0.001, "loss": 1.7622, "step": 497168 }, { "epoch": 42.893719806763286, "grad_norm": 0.31653881072998047, "learning_rate": 0.001, "loss": 1.7707, "step": 497224 }, { "epoch": 42.89855072463768, "grad_norm": 0.3431593179702759, "learning_rate": 0.001, "loss": 1.7721, "step": 497280 }, { "epoch": 42.90338164251208, "grad_norm": 0.2628556787967682, "learning_rate": 0.001, "loss": 1.7801, "step": 497336 }, { "epoch": 42.908212560386474, "grad_norm": 0.3346251845359802, "learning_rate": 0.001, "loss": 1.7737, "step": 497392 }, { "epoch": 42.91304347826087, "grad_norm": 0.5012972950935364, "learning_rate": 0.001, "loss": 1.7738, "step": 497448 }, { "epoch": 42.91787439613527, "grad_norm": 0.4818948805332184, "learning_rate": 0.001, "loss": 1.7789, "step": 497504 }, { "epoch": 42.92270531400966, "grad_norm": 0.3438078761100769, "learning_rate": 0.001, "loss": 1.7626, "step": 497560 }, { "epoch": 42.927536231884055, "grad_norm": 1.327869176864624, "learning_rate": 0.001, "loss": 1.7551, "step": 497616 }, { "epoch": 42.932367149758456, "grad_norm": 0.584452748298645, "learning_rate": 0.001, "loss": 1.7626, "step": 497672 }, { "epoch": 42.93719806763285, "grad_norm": 0.2980891764163971, "learning_rate": 0.001, "loss": 1.7588, "step": 497728 }, { "epoch": 42.94202898550725, "grad_norm": 0.4084571897983551, "learning_rate": 0.001, "loss": 1.755, "step": 497784 }, { "epoch": 42.94685990338164, "grad_norm": 0.293361097574234, "learning_rate": 0.001, "loss": 1.755, "step": 497840 }, { "epoch": 42.95169082125604, "grad_norm": 0.3471390902996063, "learning_rate": 0.001, "loss": 1.7522, "step": 497896 }, { "epoch": 42.95652173913044, "grad_norm": 0.35901501774787903, "learning_rate": 0.001, "loss": 1.7536, "step": 497952 }, { "epoch": 42.96135265700483, "grad_norm": 1.2451173067092896, "learning_rate": 0.001, "loss": 1.7534, "step": 498008 }, { "epoch": 42.966183574879224, "grad_norm": 0.2934431731700897, "learning_rate": 0.001, "loss": 1.7604, "step": 498064 }, { "epoch": 42.971014492753625, "grad_norm": 0.23306246101856232, "learning_rate": 0.001, "loss": 1.7505, "step": 498120 }, { "epoch": 42.97584541062802, "grad_norm": 1.0261784791946411, "learning_rate": 0.001, "loss": 1.754, "step": 498176 }, { "epoch": 42.98067632850242, "grad_norm": 0.3823038339614868, "learning_rate": 0.001, "loss": 1.7532, "step": 498232 }, { "epoch": 42.98550724637681, "grad_norm": 0.962932288646698, "learning_rate": 0.001, "loss": 1.7563, "step": 498288 }, { "epoch": 42.990338164251206, "grad_norm": 0.35090169310569763, "learning_rate": 0.001, "loss": 1.7565, "step": 498344 }, { "epoch": 42.99516908212561, "grad_norm": 3.1163649559020996, "learning_rate": 0.001, "loss": 1.7479, "step": 498400 }, { "epoch": 43.0, "grad_norm": 0.31754398345947266, "learning_rate": 0.001, "loss": 1.752, "step": 498456 }, { "epoch": 43.00483091787439, "grad_norm": 0.2880837917327881, "learning_rate": 0.001, "loss": 1.7211, "step": 498512 }, { "epoch": 43.009661835748794, "grad_norm": 0.5898643732070923, "learning_rate": 0.001, "loss": 1.7208, "step": 498568 }, { "epoch": 43.01449275362319, "grad_norm": 0.9040492177009583, "learning_rate": 0.001, "loss": 1.7222, "step": 498624 }, { "epoch": 43.01932367149758, "grad_norm": 0.4281351864337921, "learning_rate": 0.001, "loss": 1.739, "step": 498680 }, { "epoch": 43.02415458937198, "grad_norm": 0.28973937034606934, "learning_rate": 0.001, "loss": 1.7452, "step": 498736 }, { "epoch": 43.028985507246375, "grad_norm": 0.2905622720718384, "learning_rate": 0.001, "loss": 1.7337, "step": 498792 }, { "epoch": 43.033816425120776, "grad_norm": 0.33053648471832275, "learning_rate": 0.001, "loss": 1.7274, "step": 498848 }, { "epoch": 43.03864734299517, "grad_norm": 0.2611054480075836, "learning_rate": 0.001, "loss": 1.7294, "step": 498904 }, { "epoch": 43.04347826086956, "grad_norm": 1.366268277168274, "learning_rate": 0.001, "loss": 1.7441, "step": 498960 }, { "epoch": 43.04830917874396, "grad_norm": 0.7922677993774414, "learning_rate": 0.001, "loss": 1.7388, "step": 499016 }, { "epoch": 43.05314009661836, "grad_norm": 0.3109741806983948, "learning_rate": 0.001, "loss": 1.732, "step": 499072 }, { "epoch": 43.05797101449275, "grad_norm": 0.45245328545570374, "learning_rate": 0.001, "loss": 1.7349, "step": 499128 }, { "epoch": 43.06280193236715, "grad_norm": 0.3570323884487152, "learning_rate": 0.001, "loss": 1.7293, "step": 499184 }, { "epoch": 43.067632850241544, "grad_norm": 0.2881699502468109, "learning_rate": 0.001, "loss": 1.7342, "step": 499240 }, { "epoch": 43.072463768115945, "grad_norm": 0.3332690894603729, "learning_rate": 0.001, "loss": 1.7223, "step": 499296 }, { "epoch": 43.07729468599034, "grad_norm": 0.5095784068107605, "learning_rate": 0.001, "loss": 1.728, "step": 499352 }, { "epoch": 43.08212560386473, "grad_norm": 0.6457642316818237, "learning_rate": 0.001, "loss": 1.7331, "step": 499408 }, { "epoch": 43.08695652173913, "grad_norm": 0.292636901140213, "learning_rate": 0.001, "loss": 1.7244, "step": 499464 }, { "epoch": 43.091787439613526, "grad_norm": 0.3947647213935852, "learning_rate": 0.001, "loss": 1.7334, "step": 499520 }, { "epoch": 43.09661835748792, "grad_norm": 0.29116585850715637, "learning_rate": 0.001, "loss": 1.7353, "step": 499576 }, { "epoch": 43.10144927536232, "grad_norm": 0.32003694772720337, "learning_rate": 0.001, "loss": 1.7247, "step": 499632 }, { "epoch": 43.106280193236714, "grad_norm": 0.2887139618396759, "learning_rate": 0.001, "loss": 1.7301, "step": 499688 }, { "epoch": 43.111111111111114, "grad_norm": 0.29200631380081177, "learning_rate": 0.001, "loss": 1.7285, "step": 499744 }, { "epoch": 43.11594202898551, "grad_norm": 0.26603424549102783, "learning_rate": 0.001, "loss": 1.7262, "step": 499800 }, { "epoch": 43.1207729468599, "grad_norm": 0.29278939962387085, "learning_rate": 0.001, "loss": 1.7259, "step": 499856 }, { "epoch": 43.1256038647343, "grad_norm": 0.41091659665107727, "learning_rate": 0.001, "loss": 1.7271, "step": 499912 }, { "epoch": 43.130434782608695, "grad_norm": 0.35819709300994873, "learning_rate": 0.001, "loss": 1.7307, "step": 499968 }, { "epoch": 43.13526570048309, "grad_norm": 0.2634340226650238, "learning_rate": 0.001, "loss": 1.7294, "step": 500024 }, { "epoch": 43.14009661835749, "grad_norm": 0.5101088881492615, "learning_rate": 0.001, "loss": 1.7346, "step": 500080 }, { "epoch": 43.14492753623188, "grad_norm": 0.3782312273979187, "learning_rate": 0.001, "loss": 1.7212, "step": 500136 }, { "epoch": 43.14975845410628, "grad_norm": 0.3335227370262146, "learning_rate": 0.001, "loss": 1.7193, "step": 500192 }, { "epoch": 43.15458937198068, "grad_norm": 0.2480354607105255, "learning_rate": 0.001, "loss": 1.7236, "step": 500248 }, { "epoch": 43.15942028985507, "grad_norm": 0.24254335463047028, "learning_rate": 0.001, "loss": 1.7284, "step": 500304 }, { "epoch": 43.16425120772947, "grad_norm": 0.25509339570999146, "learning_rate": 0.001, "loss": 1.7248, "step": 500360 }, { "epoch": 43.169082125603865, "grad_norm": 0.3785673677921295, "learning_rate": 0.001, "loss": 1.7209, "step": 500416 }, { "epoch": 43.17391304347826, "grad_norm": 0.2932906150817871, "learning_rate": 0.001, "loss": 1.7252, "step": 500472 }, { "epoch": 43.17874396135266, "grad_norm": 0.268908828496933, "learning_rate": 0.001, "loss": 1.7302, "step": 500528 }, { "epoch": 43.18357487922705, "grad_norm": 0.31887146830558777, "learning_rate": 0.001, "loss": 1.7487, "step": 500584 }, { "epoch": 43.18840579710145, "grad_norm": 0.3154051899909973, "learning_rate": 0.001, "loss": 1.745, "step": 500640 }, { "epoch": 43.193236714975846, "grad_norm": 0.3698274791240692, "learning_rate": 0.001, "loss": 1.7363, "step": 500696 }, { "epoch": 43.19806763285024, "grad_norm": 0.3926672339439392, "learning_rate": 0.001, "loss": 1.7399, "step": 500752 }, { "epoch": 43.20289855072464, "grad_norm": 1.1627651453018188, "learning_rate": 0.001, "loss": 1.7273, "step": 500808 }, { "epoch": 43.207729468599034, "grad_norm": 0.5841051340103149, "learning_rate": 0.001, "loss": 1.7236, "step": 500864 }, { "epoch": 43.21256038647343, "grad_norm": 0.4725930094718933, "learning_rate": 0.001, "loss": 1.7262, "step": 500920 }, { "epoch": 43.21739130434783, "grad_norm": 0.28683337569236755, "learning_rate": 0.001, "loss": 1.7206, "step": 500976 }, { "epoch": 43.22222222222222, "grad_norm": 0.36116835474967957, "learning_rate": 0.001, "loss": 1.7251, "step": 501032 }, { "epoch": 43.227053140096615, "grad_norm": 0.5035881400108337, "learning_rate": 0.001, "loss": 1.7356, "step": 501088 }, { "epoch": 43.231884057971016, "grad_norm": 0.3092077672481537, "learning_rate": 0.001, "loss": 1.7311, "step": 501144 }, { "epoch": 43.23671497584541, "grad_norm": 0.3408401608467102, "learning_rate": 0.001, "loss": 1.7321, "step": 501200 }, { "epoch": 43.24154589371981, "grad_norm": 0.33938276767730713, "learning_rate": 0.001, "loss": 1.7333, "step": 501256 }, { "epoch": 43.2463768115942, "grad_norm": 0.3274970054626465, "learning_rate": 0.001, "loss": 1.7293, "step": 501312 }, { "epoch": 43.2512077294686, "grad_norm": 0.5984912514686584, "learning_rate": 0.001, "loss": 1.7267, "step": 501368 }, { "epoch": 43.256038647343, "grad_norm": 1.0737807750701904, "learning_rate": 0.001, "loss": 1.7401, "step": 501424 }, { "epoch": 43.26086956521739, "grad_norm": 0.310937762260437, "learning_rate": 0.001, "loss": 1.756, "step": 501480 }, { "epoch": 43.265700483091784, "grad_norm": 0.36521288752555847, "learning_rate": 0.001, "loss": 1.7321, "step": 501536 }, { "epoch": 43.270531400966185, "grad_norm": 0.383823037147522, "learning_rate": 0.001, "loss": 1.7292, "step": 501592 }, { "epoch": 43.27536231884058, "grad_norm": 0.27954620122909546, "learning_rate": 0.001, "loss": 1.7334, "step": 501648 }, { "epoch": 43.28019323671498, "grad_norm": 2.603872299194336, "learning_rate": 0.001, "loss": 1.7271, "step": 501704 }, { "epoch": 43.28502415458937, "grad_norm": 1.2268099784851074, "learning_rate": 0.001, "loss": 1.7381, "step": 501760 }, { "epoch": 43.289855072463766, "grad_norm": 0.4057111144065857, "learning_rate": 0.001, "loss": 1.734, "step": 501816 }, { "epoch": 43.29468599033817, "grad_norm": 0.49760401248931885, "learning_rate": 0.001, "loss": 1.7475, "step": 501872 }, { "epoch": 43.29951690821256, "grad_norm": 0.33154579997062683, "learning_rate": 0.001, "loss": 1.7539, "step": 501928 }, { "epoch": 43.30434782608695, "grad_norm": 0.4327848255634308, "learning_rate": 0.001, "loss": 1.7369, "step": 501984 }, { "epoch": 43.309178743961354, "grad_norm": 0.3378254175186157, "learning_rate": 0.001, "loss": 1.7337, "step": 502040 }, { "epoch": 43.31400966183575, "grad_norm": 0.529498279094696, "learning_rate": 0.001, "loss": 1.7321, "step": 502096 }, { "epoch": 43.31884057971015, "grad_norm": 0.35313135385513306, "learning_rate": 0.001, "loss": 1.7383, "step": 502152 }, { "epoch": 43.32367149758454, "grad_norm": 0.44525596499443054, "learning_rate": 0.001, "loss": 1.731, "step": 502208 }, { "epoch": 43.328502415458935, "grad_norm": 0.2622774839401245, "learning_rate": 0.001, "loss": 1.7265, "step": 502264 }, { "epoch": 43.333333333333336, "grad_norm": 0.6243840456008911, "learning_rate": 0.001, "loss": 1.7392, "step": 502320 }, { "epoch": 43.33816425120773, "grad_norm": 0.3983575701713562, "learning_rate": 0.001, "loss": 1.7253, "step": 502376 }, { "epoch": 43.34299516908212, "grad_norm": 0.23178161680698395, "learning_rate": 0.001, "loss": 1.7271, "step": 502432 }, { "epoch": 43.34782608695652, "grad_norm": 2.99115252494812, "learning_rate": 0.001, "loss": 1.7233, "step": 502488 }, { "epoch": 43.35265700483092, "grad_norm": 0.39675578474998474, "learning_rate": 0.001, "loss": 1.7384, "step": 502544 }, { "epoch": 43.35748792270532, "grad_norm": 0.5112400054931641, "learning_rate": 0.001, "loss": 1.7475, "step": 502600 }, { "epoch": 43.36231884057971, "grad_norm": 0.6033613681793213, "learning_rate": 0.001, "loss": 1.7345, "step": 502656 }, { "epoch": 43.367149758454104, "grad_norm": 0.3621089458465576, "learning_rate": 0.001, "loss": 1.7381, "step": 502712 }, { "epoch": 43.371980676328505, "grad_norm": 0.41851142048835754, "learning_rate": 0.001, "loss": 1.7355, "step": 502768 }, { "epoch": 43.3768115942029, "grad_norm": 0.317239910364151, "learning_rate": 0.001, "loss": 1.728, "step": 502824 }, { "epoch": 43.38164251207729, "grad_norm": 0.3395302891731262, "learning_rate": 0.001, "loss": 1.7343, "step": 502880 }, { "epoch": 43.38647342995169, "grad_norm": 0.3218384385108948, "learning_rate": 0.001, "loss": 1.7402, "step": 502936 }, { "epoch": 43.391304347826086, "grad_norm": 0.9764431118965149, "learning_rate": 0.001, "loss": 1.7304, "step": 502992 }, { "epoch": 43.39613526570048, "grad_norm": 0.44307273626327515, "learning_rate": 0.001, "loss": 1.7378, "step": 503048 }, { "epoch": 43.40096618357488, "grad_norm": 0.3319181501865387, "learning_rate": 0.001, "loss": 1.7366, "step": 503104 }, { "epoch": 43.405797101449274, "grad_norm": 0.27848803997039795, "learning_rate": 0.001, "loss": 1.7248, "step": 503160 }, { "epoch": 43.410628019323674, "grad_norm": 0.24711474776268005, "learning_rate": 0.001, "loss": 1.7308, "step": 503216 }, { "epoch": 43.41545893719807, "grad_norm": 0.2719164788722992, "learning_rate": 0.001, "loss": 1.7285, "step": 503272 }, { "epoch": 43.42028985507246, "grad_norm": 0.32736772298812866, "learning_rate": 0.001, "loss": 1.7304, "step": 503328 }, { "epoch": 43.42512077294686, "grad_norm": 0.34980425238609314, "learning_rate": 0.001, "loss": 1.7334, "step": 503384 }, { "epoch": 43.429951690821255, "grad_norm": 0.24120280146598816, "learning_rate": 0.001, "loss": 1.7292, "step": 503440 }, { "epoch": 43.43478260869565, "grad_norm": 0.28396502137184143, "learning_rate": 0.001, "loss": 1.7256, "step": 503496 }, { "epoch": 43.43961352657005, "grad_norm": 0.6430443525314331, "learning_rate": 0.001, "loss": 1.7309, "step": 503552 }, { "epoch": 43.44444444444444, "grad_norm": 0.7724123001098633, "learning_rate": 0.001, "loss": 1.7327, "step": 503608 }, { "epoch": 43.44927536231884, "grad_norm": 0.42654114961624146, "learning_rate": 0.001, "loss": 1.7293, "step": 503664 }, { "epoch": 43.45410628019324, "grad_norm": 0.33623358607292175, "learning_rate": 0.001, "loss": 1.7285, "step": 503720 }, { "epoch": 43.45893719806763, "grad_norm": 0.30150890350341797, "learning_rate": 0.001, "loss": 1.73, "step": 503776 }, { "epoch": 43.46376811594203, "grad_norm": 0.30979353189468384, "learning_rate": 0.001, "loss": 1.7376, "step": 503832 }, { "epoch": 43.468599033816425, "grad_norm": 0.30670541524887085, "learning_rate": 0.001, "loss": 1.739, "step": 503888 }, { "epoch": 43.47342995169082, "grad_norm": 0.3110942840576172, "learning_rate": 0.001, "loss": 1.7273, "step": 503944 }, { "epoch": 43.47826086956522, "grad_norm": 0.33473703265190125, "learning_rate": 0.001, "loss": 1.7337, "step": 504000 }, { "epoch": 43.48309178743961, "grad_norm": 0.35574331879615784, "learning_rate": 0.001, "loss": 1.7342, "step": 504056 }, { "epoch": 43.48792270531401, "grad_norm": 0.9274934530258179, "learning_rate": 0.001, "loss": 1.7278, "step": 504112 }, { "epoch": 43.492753623188406, "grad_norm": 0.37786850333213806, "learning_rate": 0.001, "loss": 1.7601, "step": 504168 }, { "epoch": 43.4975845410628, "grad_norm": 0.472273588180542, "learning_rate": 0.001, "loss": 1.7444, "step": 504224 }, { "epoch": 43.5024154589372, "grad_norm": 0.2943299412727356, "learning_rate": 0.001, "loss": 1.7449, "step": 504280 }, { "epoch": 43.507246376811594, "grad_norm": 0.3513292670249939, "learning_rate": 0.001, "loss": 1.7432, "step": 504336 }, { "epoch": 43.51207729468599, "grad_norm": 0.30205070972442627, "learning_rate": 0.001, "loss": 1.7484, "step": 504392 }, { "epoch": 43.51690821256039, "grad_norm": 0.32721245288848877, "learning_rate": 0.001, "loss": 1.7413, "step": 504448 }, { "epoch": 43.52173913043478, "grad_norm": 0.3585265576839447, "learning_rate": 0.001, "loss": 1.7495, "step": 504504 }, { "epoch": 43.52657004830918, "grad_norm": 0.5081578493118286, "learning_rate": 0.001, "loss": 1.7606, "step": 504560 }, { "epoch": 43.531400966183575, "grad_norm": 2.930466413497925, "learning_rate": 0.001, "loss": 1.7465, "step": 504616 }, { "epoch": 43.53623188405797, "grad_norm": 1.5184738636016846, "learning_rate": 0.001, "loss": 1.7604, "step": 504672 }, { "epoch": 43.54106280193237, "grad_norm": 1.3551055192947388, "learning_rate": 0.001, "loss": 1.7635, "step": 504728 }, { "epoch": 43.54589371980676, "grad_norm": 0.7215574979782104, "learning_rate": 0.001, "loss": 1.7756, "step": 504784 }, { "epoch": 43.55072463768116, "grad_norm": 2.8795104026794434, "learning_rate": 0.001, "loss": 1.7898, "step": 504840 }, { "epoch": 43.55555555555556, "grad_norm": 0.36410486698150635, "learning_rate": 0.001, "loss": 1.7767, "step": 504896 }, { "epoch": 43.56038647342995, "grad_norm": 0.2863364517688751, "learning_rate": 0.001, "loss": 1.7698, "step": 504952 }, { "epoch": 43.56521739130435, "grad_norm": 0.3670561611652374, "learning_rate": 0.001, "loss": 1.7651, "step": 505008 }, { "epoch": 43.570048309178745, "grad_norm": 0.9845800995826721, "learning_rate": 0.001, "loss": 1.778, "step": 505064 }, { "epoch": 43.57487922705314, "grad_norm": 0.8016309142112732, "learning_rate": 0.001, "loss": 1.7631, "step": 505120 }, { "epoch": 43.57971014492754, "grad_norm": 0.36346426606178284, "learning_rate": 0.001, "loss": 1.7706, "step": 505176 }, { "epoch": 43.58454106280193, "grad_norm": 9.896422386169434, "learning_rate": 0.001, "loss": 1.7655, "step": 505232 }, { "epoch": 43.589371980676326, "grad_norm": 0.29240819811820984, "learning_rate": 0.001, "loss": 1.758, "step": 505288 }, { "epoch": 43.594202898550726, "grad_norm": 0.3180306553840637, "learning_rate": 0.001, "loss": 1.7536, "step": 505344 }, { "epoch": 43.59903381642512, "grad_norm": 1.12047278881073, "learning_rate": 0.001, "loss": 1.7631, "step": 505400 }, { "epoch": 43.60386473429952, "grad_norm": 1.241138219833374, "learning_rate": 0.001, "loss": 1.7654, "step": 505456 }, { "epoch": 43.608695652173914, "grad_norm": 3.763753652572632, "learning_rate": 0.001, "loss": 1.771, "step": 505512 }, { "epoch": 43.61352657004831, "grad_norm": 2.2122726440429688, "learning_rate": 0.001, "loss": 1.7725, "step": 505568 }, { "epoch": 43.61835748792271, "grad_norm": 0.995103120803833, "learning_rate": 0.001, "loss": 1.7656, "step": 505624 }, { "epoch": 43.6231884057971, "grad_norm": 0.3988291621208191, "learning_rate": 0.001, "loss": 1.7652, "step": 505680 }, { "epoch": 43.628019323671495, "grad_norm": 0.2836867570877075, "learning_rate": 0.001, "loss": 1.7488, "step": 505736 }, { "epoch": 43.632850241545896, "grad_norm": 0.569118320941925, "learning_rate": 0.001, "loss": 1.743, "step": 505792 }, { "epoch": 43.63768115942029, "grad_norm": 1.7818878889083862, "learning_rate": 0.001, "loss": 1.7451, "step": 505848 }, { "epoch": 43.64251207729468, "grad_norm": 0.5960211753845215, "learning_rate": 0.001, "loss": 1.7419, "step": 505904 }, { "epoch": 43.64734299516908, "grad_norm": 0.41328251361846924, "learning_rate": 0.001, "loss": 1.7382, "step": 505960 }, { "epoch": 43.65217391304348, "grad_norm": 0.46101588010787964, "learning_rate": 0.001, "loss": 1.7412, "step": 506016 }, { "epoch": 43.65700483091788, "grad_norm": 0.41485393047332764, "learning_rate": 0.001, "loss": 1.7466, "step": 506072 }, { "epoch": 43.66183574879227, "grad_norm": 1.0683009624481201, "learning_rate": 0.001, "loss": 1.7432, "step": 506128 }, { "epoch": 43.666666666666664, "grad_norm": 8.871010780334473, "learning_rate": 0.001, "loss": 1.7518, "step": 506184 }, { "epoch": 43.671497584541065, "grad_norm": 0.7039579153060913, "learning_rate": 0.001, "loss": 1.7485, "step": 506240 }, { "epoch": 43.67632850241546, "grad_norm": 0.43488240242004395, "learning_rate": 0.001, "loss": 1.7493, "step": 506296 }, { "epoch": 43.68115942028985, "grad_norm": 1.583502173423767, "learning_rate": 0.001, "loss": 1.7478, "step": 506352 }, { "epoch": 43.68599033816425, "grad_norm": 0.638083815574646, "learning_rate": 0.001, "loss": 1.7513, "step": 506408 }, { "epoch": 43.690821256038646, "grad_norm": 0.44312989711761475, "learning_rate": 0.001, "loss": 1.7684, "step": 506464 }, { "epoch": 43.69565217391305, "grad_norm": 0.34854406118392944, "learning_rate": 0.001, "loss": 1.7516, "step": 506520 }, { "epoch": 43.70048309178744, "grad_norm": 0.308992862701416, "learning_rate": 0.001, "loss": 1.7412, "step": 506576 }, { "epoch": 43.70531400966183, "grad_norm": 0.5950103998184204, "learning_rate": 0.001, "loss": 1.74, "step": 506632 }, { "epoch": 43.710144927536234, "grad_norm": 1.336281418800354, "learning_rate": 0.001, "loss": 1.7497, "step": 506688 }, { "epoch": 43.71497584541063, "grad_norm": 0.42758655548095703, "learning_rate": 0.001, "loss": 1.7453, "step": 506744 }, { "epoch": 43.71980676328502, "grad_norm": 0.4068935811519623, "learning_rate": 0.001, "loss": 1.7451, "step": 506800 }, { "epoch": 43.72463768115942, "grad_norm": 0.3154278099536896, "learning_rate": 0.001, "loss": 1.7499, "step": 506856 }, { "epoch": 43.729468599033815, "grad_norm": 0.40624624490737915, "learning_rate": 0.001, "loss": 1.7506, "step": 506912 }, { "epoch": 43.734299516908216, "grad_norm": 0.6809932589530945, "learning_rate": 0.001, "loss": 1.7481, "step": 506968 }, { "epoch": 43.73913043478261, "grad_norm": 0.4508463740348816, "learning_rate": 0.001, "loss": 1.761, "step": 507024 }, { "epoch": 43.743961352657, "grad_norm": 0.31132858991622925, "learning_rate": 0.001, "loss": 1.7471, "step": 507080 }, { "epoch": 43.7487922705314, "grad_norm": 1.1491883993148804, "learning_rate": 0.001, "loss": 1.7549, "step": 507136 }, { "epoch": 43.7536231884058, "grad_norm": 0.3585839867591858, "learning_rate": 0.001, "loss": 1.7578, "step": 507192 }, { "epoch": 43.75845410628019, "grad_norm": 0.42305389046669006, "learning_rate": 0.001, "loss": 1.7423, "step": 507248 }, { "epoch": 43.76328502415459, "grad_norm": 1.0736076831817627, "learning_rate": 0.001, "loss": 1.7486, "step": 507304 }, { "epoch": 43.768115942028984, "grad_norm": 0.3753129541873932, "learning_rate": 0.001, "loss": 1.7584, "step": 507360 }, { "epoch": 43.772946859903385, "grad_norm": 0.2627319395542145, "learning_rate": 0.001, "loss": 1.753, "step": 507416 }, { "epoch": 43.77777777777778, "grad_norm": 1.1527009010314941, "learning_rate": 0.001, "loss": 1.7548, "step": 507472 }, { "epoch": 43.78260869565217, "grad_norm": 0.5647760033607483, "learning_rate": 0.001, "loss": 1.7575, "step": 507528 }, { "epoch": 43.78743961352657, "grad_norm": 0.6352528929710388, "learning_rate": 0.001, "loss": 1.7585, "step": 507584 }, { "epoch": 43.792270531400966, "grad_norm": 0.26792261004447937, "learning_rate": 0.001, "loss": 1.7571, "step": 507640 }, { "epoch": 43.79710144927536, "grad_norm": 0.31790515780448914, "learning_rate": 0.001, "loss": 1.7679, "step": 507696 }, { "epoch": 43.80193236714976, "grad_norm": 0.42530447244644165, "learning_rate": 0.001, "loss": 1.7716, "step": 507752 }, { "epoch": 43.806763285024154, "grad_norm": 0.31393179297447205, "learning_rate": 0.001, "loss": 1.7727, "step": 507808 }, { "epoch": 43.81159420289855, "grad_norm": 0.5969078540802002, "learning_rate": 0.001, "loss": 1.7688, "step": 507864 }, { "epoch": 43.81642512077295, "grad_norm": 1.5209839344024658, "learning_rate": 0.001, "loss": 1.7803, "step": 507920 }, { "epoch": 43.82125603864734, "grad_norm": 0.4208824038505554, "learning_rate": 0.001, "loss": 1.7738, "step": 507976 }, { "epoch": 43.82608695652174, "grad_norm": 2.8256845474243164, "learning_rate": 0.001, "loss": 1.7665, "step": 508032 }, { "epoch": 43.830917874396135, "grad_norm": 4.412088394165039, "learning_rate": 0.001, "loss": 1.7678, "step": 508088 }, { "epoch": 43.83574879227053, "grad_norm": 0.9325332045555115, "learning_rate": 0.001, "loss": 1.7743, "step": 508144 }, { "epoch": 43.84057971014493, "grad_norm": 1.7778911590576172, "learning_rate": 0.001, "loss": 1.7627, "step": 508200 }, { "epoch": 43.84541062801932, "grad_norm": 1.9286770820617676, "learning_rate": 0.001, "loss": 1.7649, "step": 508256 }, { "epoch": 43.85024154589372, "grad_norm": 2.1250927448272705, "learning_rate": 0.001, "loss": 1.7569, "step": 508312 }, { "epoch": 43.85507246376812, "grad_norm": 14.545105934143066, "learning_rate": 0.001, "loss": 1.7645, "step": 508368 }, { "epoch": 43.85990338164251, "grad_norm": 0.37764468789100647, "learning_rate": 0.001, "loss": 1.7652, "step": 508424 }, { "epoch": 43.86473429951691, "grad_norm": 0.8136737942695618, "learning_rate": 0.001, "loss": 1.7627, "step": 508480 }, { "epoch": 43.869565217391305, "grad_norm": 1.5910199880599976, "learning_rate": 0.001, "loss": 1.7657, "step": 508536 }, { "epoch": 43.8743961352657, "grad_norm": 0.3547941744327545, "learning_rate": 0.001, "loss": 1.7514, "step": 508592 }, { "epoch": 43.8792270531401, "grad_norm": 1.1046407222747803, "learning_rate": 0.001, "loss": 1.7611, "step": 508648 }, { "epoch": 43.88405797101449, "grad_norm": 5.16987943649292, "learning_rate": 0.001, "loss": 1.7563, "step": 508704 }, { "epoch": 43.888888888888886, "grad_norm": 15.713579177856445, "learning_rate": 0.001, "loss": 1.7487, "step": 508760 }, { "epoch": 43.893719806763286, "grad_norm": 0.7006009817123413, "learning_rate": 0.001, "loss": 1.7504, "step": 508816 }, { "epoch": 43.89855072463768, "grad_norm": 0.402317613363266, "learning_rate": 0.001, "loss": 1.7418, "step": 508872 }, { "epoch": 43.90338164251208, "grad_norm": 0.2797320783138275, "learning_rate": 0.001, "loss": 1.749, "step": 508928 }, { "epoch": 43.908212560386474, "grad_norm": 0.4944930374622345, "learning_rate": 0.001, "loss": 1.7483, "step": 508984 }, { "epoch": 43.91304347826087, "grad_norm": 0.4514928460121155, "learning_rate": 0.001, "loss": 1.7455, "step": 509040 }, { "epoch": 43.91787439613527, "grad_norm": 0.3731263279914856, "learning_rate": 0.001, "loss": 1.7565, "step": 509096 }, { "epoch": 43.92270531400966, "grad_norm": 0.7510005831718445, "learning_rate": 0.001, "loss": 1.7534, "step": 509152 }, { "epoch": 43.927536231884055, "grad_norm": 0.3649645745754242, "learning_rate": 0.001, "loss": 1.7515, "step": 509208 }, { "epoch": 43.932367149758456, "grad_norm": 0.34229764342308044, "learning_rate": 0.001, "loss": 1.7657, "step": 509264 }, { "epoch": 43.93719806763285, "grad_norm": 0.25829237699508667, "learning_rate": 0.001, "loss": 1.7599, "step": 509320 }, { "epoch": 43.94202898550725, "grad_norm": 0.5315322875976562, "learning_rate": 0.001, "loss": 1.7615, "step": 509376 }, { "epoch": 43.94685990338164, "grad_norm": 0.7372596263885498, "learning_rate": 0.001, "loss": 1.7638, "step": 509432 }, { "epoch": 43.95169082125604, "grad_norm": 0.819179117679596, "learning_rate": 0.001, "loss": 1.7562, "step": 509488 }, { "epoch": 43.95652173913044, "grad_norm": 0.3208511173725128, "learning_rate": 0.001, "loss": 1.7528, "step": 509544 }, { "epoch": 43.96135265700483, "grad_norm": 2.2648298740386963, "learning_rate": 0.001, "loss": 1.7659, "step": 509600 }, { "epoch": 43.966183574879224, "grad_norm": 0.31545767188072205, "learning_rate": 0.001, "loss": 1.7594, "step": 509656 }, { "epoch": 43.971014492753625, "grad_norm": 1.0272369384765625, "learning_rate": 0.001, "loss": 1.7641, "step": 509712 }, { "epoch": 43.97584541062802, "grad_norm": 0.32061269879341125, "learning_rate": 0.001, "loss": 1.766, "step": 509768 }, { "epoch": 43.98067632850242, "grad_norm": 0.9945535063743591, "learning_rate": 0.001, "loss": 1.7604, "step": 509824 }, { "epoch": 43.98550724637681, "grad_norm": 853.5796508789062, "learning_rate": 0.001, "loss": 1.7667, "step": 509880 }, { "epoch": 43.990338164251206, "grad_norm": 2.772505760192871, "learning_rate": 0.001, "loss": 1.7585, "step": 509936 }, { "epoch": 43.99516908212561, "grad_norm": 4.480376243591309, "learning_rate": 0.001, "loss": 1.7613, "step": 509992 }, { "epoch": 44.0, "grad_norm": 0.3885868191719055, "learning_rate": 0.001, "loss": 1.7493, "step": 510048 }, { "epoch": 44.00483091787439, "grad_norm": 0.9080930352210999, "learning_rate": 0.001, "loss": 1.7168, "step": 510104 }, { "epoch": 44.009661835748794, "grad_norm": 0.5800145864486694, "learning_rate": 0.001, "loss": 1.7208, "step": 510160 }, { "epoch": 44.01449275362319, "grad_norm": 0.3350420594215393, "learning_rate": 0.001, "loss": 1.7119, "step": 510216 }, { "epoch": 44.01932367149758, "grad_norm": 0.3337475657463074, "learning_rate": 0.001, "loss": 1.7181, "step": 510272 }, { "epoch": 44.02415458937198, "grad_norm": 0.6137295365333557, "learning_rate": 0.001, "loss": 1.7253, "step": 510328 }, { "epoch": 44.028985507246375, "grad_norm": 0.3282407522201538, "learning_rate": 0.001, "loss": 1.7147, "step": 510384 }, { "epoch": 44.033816425120776, "grad_norm": 0.3639090657234192, "learning_rate": 0.001, "loss": 1.7151, "step": 510440 }, { "epoch": 44.03864734299517, "grad_norm": 0.32378512620925903, "learning_rate": 0.001, "loss": 1.7266, "step": 510496 }, { "epoch": 44.04347826086956, "grad_norm": 0.375983864068985, "learning_rate": 0.001, "loss": 1.7189, "step": 510552 }, { "epoch": 44.04830917874396, "grad_norm": 0.4370112121105194, "learning_rate": 0.001, "loss": 1.7167, "step": 510608 }, { "epoch": 44.05314009661836, "grad_norm": 0.35594841837882996, "learning_rate": 0.001, "loss": 1.7184, "step": 510664 }, { "epoch": 44.05797101449275, "grad_norm": 0.28139302134513855, "learning_rate": 0.001, "loss": 1.7187, "step": 510720 }, { "epoch": 44.06280193236715, "grad_norm": 0.3106311559677124, "learning_rate": 0.001, "loss": 1.7161, "step": 510776 }, { "epoch": 44.067632850241544, "grad_norm": 0.28595760464668274, "learning_rate": 0.001, "loss": 1.721, "step": 510832 }, { "epoch": 44.072463768115945, "grad_norm": 0.379766047000885, "learning_rate": 0.001, "loss": 1.7166, "step": 510888 }, { "epoch": 44.07729468599034, "grad_norm": 0.8302298188209534, "learning_rate": 0.001, "loss": 1.7152, "step": 510944 }, { "epoch": 44.08212560386473, "grad_norm": 0.36580774188041687, "learning_rate": 0.001, "loss": 1.7216, "step": 511000 }, { "epoch": 44.08695652173913, "grad_norm": 0.2754070460796356, "learning_rate": 0.001, "loss": 1.7111, "step": 511056 }, { "epoch": 44.091787439613526, "grad_norm": 0.4942632019519806, "learning_rate": 0.001, "loss": 1.7138, "step": 511112 }, { "epoch": 44.09661835748792, "grad_norm": 0.264268696308136, "learning_rate": 0.001, "loss": 1.7277, "step": 511168 }, { "epoch": 44.10144927536232, "grad_norm": 0.6325370669364929, "learning_rate": 0.001, "loss": 1.7267, "step": 511224 }, { "epoch": 44.106280193236714, "grad_norm": 0.5119471549987793, "learning_rate": 0.001, "loss": 1.7304, "step": 511280 }, { "epoch": 44.111111111111114, "grad_norm": 0.2735876142978668, "learning_rate": 0.001, "loss": 1.728, "step": 511336 }, { "epoch": 44.11594202898551, "grad_norm": 0.8999773263931274, "learning_rate": 0.001, "loss": 1.7292, "step": 511392 }, { "epoch": 44.1207729468599, "grad_norm": 0.3545585870742798, "learning_rate": 0.001, "loss": 1.7305, "step": 511448 }, { "epoch": 44.1256038647343, "grad_norm": 0.31046995520591736, "learning_rate": 0.001, "loss": 1.7271, "step": 511504 }, { "epoch": 44.130434782608695, "grad_norm": 0.4907430410385132, "learning_rate": 0.001, "loss": 1.7451, "step": 511560 }, { "epoch": 44.13526570048309, "grad_norm": 0.3819011151790619, "learning_rate": 0.001, "loss": 1.7349, "step": 511616 }, { "epoch": 44.14009661835749, "grad_norm": 1.98544442653656, "learning_rate": 0.001, "loss": 1.7248, "step": 511672 }, { "epoch": 44.14492753623188, "grad_norm": 2.1118509769439697, "learning_rate": 0.001, "loss": 1.7293, "step": 511728 }, { "epoch": 44.14975845410628, "grad_norm": 0.3777749836444855, "learning_rate": 0.001, "loss": 1.7266, "step": 511784 }, { "epoch": 44.15458937198068, "grad_norm": 0.42451635003089905, "learning_rate": 0.001, "loss": 1.7119, "step": 511840 }, { "epoch": 44.15942028985507, "grad_norm": 0.38344424962997437, "learning_rate": 0.001, "loss": 1.7263, "step": 511896 }, { "epoch": 44.16425120772947, "grad_norm": 0.37173226475715637, "learning_rate": 0.001, "loss": 1.7276, "step": 511952 }, { "epoch": 44.169082125603865, "grad_norm": 6.116097927093506, "learning_rate": 0.001, "loss": 1.7288, "step": 512008 }, { "epoch": 44.17391304347826, "grad_norm": 0.3155895471572876, "learning_rate": 0.001, "loss": 1.7332, "step": 512064 }, { "epoch": 44.17874396135266, "grad_norm": 0.5041471719741821, "learning_rate": 0.001, "loss": 1.7187, "step": 512120 }, { "epoch": 44.18357487922705, "grad_norm": 0.4779297709465027, "learning_rate": 0.001, "loss": 1.7201, "step": 512176 }, { "epoch": 44.18840579710145, "grad_norm": 0.39231663942337036, "learning_rate": 0.001, "loss": 1.7221, "step": 512232 }, { "epoch": 44.193236714975846, "grad_norm": 1.7418016195297241, "learning_rate": 0.001, "loss": 1.7299, "step": 512288 }, { "epoch": 44.19806763285024, "grad_norm": 0.3186659812927246, "learning_rate": 0.001, "loss": 1.7277, "step": 512344 }, { "epoch": 44.20289855072464, "grad_norm": 0.34087079763412476, "learning_rate": 0.001, "loss": 1.7308, "step": 512400 }, { "epoch": 44.207729468599034, "grad_norm": 1.7852113246917725, "learning_rate": 0.001, "loss": 1.7236, "step": 512456 }, { "epoch": 44.21256038647343, "grad_norm": 9.457098960876465, "learning_rate": 0.001, "loss": 1.7373, "step": 512512 }, { "epoch": 44.21739130434783, "grad_norm": 0.7152717113494873, "learning_rate": 0.001, "loss": 1.7392, "step": 512568 }, { "epoch": 44.22222222222222, "grad_norm": 0.29821234941482544, "learning_rate": 0.001, "loss": 1.728, "step": 512624 }, { "epoch": 44.227053140096615, "grad_norm": 0.6706036925315857, "learning_rate": 0.001, "loss": 1.7298, "step": 512680 }, { "epoch": 44.231884057971016, "grad_norm": 0.308407187461853, "learning_rate": 0.001, "loss": 1.7263, "step": 512736 }, { "epoch": 44.23671497584541, "grad_norm": 6.44401741027832, "learning_rate": 0.001, "loss": 1.7461, "step": 512792 }, { "epoch": 44.24154589371981, "grad_norm": 0.3682785928249359, "learning_rate": 0.001, "loss": 1.7771, "step": 512848 }, { "epoch": 44.2463768115942, "grad_norm": 0.6611726880073547, "learning_rate": 0.001, "loss": 1.7545, "step": 512904 }, { "epoch": 44.2512077294686, "grad_norm": 0.7331146597862244, "learning_rate": 0.001, "loss": 1.7423, "step": 512960 }, { "epoch": 44.256038647343, "grad_norm": 0.356121689081192, "learning_rate": 0.001, "loss": 1.7423, "step": 513016 }, { "epoch": 44.26086956521739, "grad_norm": 0.35807570815086365, "learning_rate": 0.001, "loss": 1.7548, "step": 513072 }, { "epoch": 44.265700483091784, "grad_norm": 0.35216668248176575, "learning_rate": 0.001, "loss": 1.7457, "step": 513128 }, { "epoch": 44.270531400966185, "grad_norm": 0.7017082571983337, "learning_rate": 0.001, "loss": 1.7377, "step": 513184 }, { "epoch": 44.27536231884058, "grad_norm": 0.4306289851665497, "learning_rate": 0.001, "loss": 1.7365, "step": 513240 }, { "epoch": 44.28019323671498, "grad_norm": 1.0133066177368164, "learning_rate": 0.001, "loss": 1.7319, "step": 513296 }, { "epoch": 44.28502415458937, "grad_norm": 0.3101988732814789, "learning_rate": 0.001, "loss": 1.7567, "step": 513352 }, { "epoch": 44.289855072463766, "grad_norm": 0.32137158513069153, "learning_rate": 0.001, "loss": 1.7461, "step": 513408 }, { "epoch": 44.29468599033817, "grad_norm": 1.0342674255371094, "learning_rate": 0.001, "loss": 1.7435, "step": 513464 }, { "epoch": 44.29951690821256, "grad_norm": 0.652679979801178, "learning_rate": 0.001, "loss": 1.7334, "step": 513520 }, { "epoch": 44.30434782608695, "grad_norm": 0.5621470212936401, "learning_rate": 0.001, "loss": 1.7344, "step": 513576 }, { "epoch": 44.309178743961354, "grad_norm": 3.8929972648620605, "learning_rate": 0.001, "loss": 1.7362, "step": 513632 }, { "epoch": 44.31400966183575, "grad_norm": 0.918247640132904, "learning_rate": 0.001, "loss": 1.7383, "step": 513688 }, { "epoch": 44.31884057971015, "grad_norm": 0.5870917439460754, "learning_rate": 0.001, "loss": 1.7398, "step": 513744 }, { "epoch": 44.32367149758454, "grad_norm": 0.6914522647857666, "learning_rate": 0.001, "loss": 1.7278, "step": 513800 }, { "epoch": 44.328502415458935, "grad_norm": 0.30583176016807556, "learning_rate": 0.001, "loss": 1.7381, "step": 513856 }, { "epoch": 44.333333333333336, "grad_norm": 0.3249337077140808, "learning_rate": 0.001, "loss": 1.7323, "step": 513912 }, { "epoch": 44.33816425120773, "grad_norm": 0.34601733088493347, "learning_rate": 0.001, "loss": 1.744, "step": 513968 }, { "epoch": 44.34299516908212, "grad_norm": 0.36104172468185425, "learning_rate": 0.001, "loss": 1.7368, "step": 514024 }, { "epoch": 44.34782608695652, "grad_norm": 0.6452495455741882, "learning_rate": 0.001, "loss": 1.7373, "step": 514080 }, { "epoch": 44.35265700483092, "grad_norm": 0.3669946789741516, "learning_rate": 0.001, "loss": 1.7293, "step": 514136 }, { "epoch": 44.35748792270532, "grad_norm": 0.9303929209709167, "learning_rate": 0.001, "loss": 1.7308, "step": 514192 }, { "epoch": 44.36231884057971, "grad_norm": 0.2876887023448944, "learning_rate": 0.001, "loss": 1.7418, "step": 514248 }, { "epoch": 44.367149758454104, "grad_norm": 0.3033621907234192, "learning_rate": 0.001, "loss": 1.7375, "step": 514304 }, { "epoch": 44.371980676328505, "grad_norm": 0.2653990089893341, "learning_rate": 0.001, "loss": 1.751, "step": 514360 }, { "epoch": 44.3768115942029, "grad_norm": 0.3015618920326233, "learning_rate": 0.001, "loss": 1.754, "step": 514416 }, { "epoch": 44.38164251207729, "grad_norm": 0.30280616879463196, "learning_rate": 0.001, "loss": 1.7389, "step": 514472 }, { "epoch": 44.38647342995169, "grad_norm": 0.9202247858047485, "learning_rate": 0.001, "loss": 1.7388, "step": 514528 }, { "epoch": 44.391304347826086, "grad_norm": 0.3107450008392334, "learning_rate": 0.001, "loss": 1.7401, "step": 514584 }, { "epoch": 44.39613526570048, "grad_norm": 0.2641608417034149, "learning_rate": 0.001, "loss": 1.7425, "step": 514640 }, { "epoch": 44.40096618357488, "grad_norm": 0.7104753255844116, "learning_rate": 0.001, "loss": 1.7507, "step": 514696 }, { "epoch": 44.405797101449274, "grad_norm": 0.37904998660087585, "learning_rate": 0.001, "loss": 1.7589, "step": 514752 }, { "epoch": 44.410628019323674, "grad_norm": 0.7512887120246887, "learning_rate": 0.001, "loss": 1.7716, "step": 514808 }, { "epoch": 44.41545893719807, "grad_norm": 1.52034592628479, "learning_rate": 0.001, "loss": 1.7806, "step": 514864 }, { "epoch": 44.42028985507246, "grad_norm": 0.40961334109306335, "learning_rate": 0.001, "loss": 1.7573, "step": 514920 }, { "epoch": 44.42512077294686, "grad_norm": 0.5606068968772888, "learning_rate": 0.001, "loss": 1.7619, "step": 514976 }, { "epoch": 44.429951690821255, "grad_norm": 0.45476940274238586, "learning_rate": 0.001, "loss": 1.7645, "step": 515032 }, { "epoch": 44.43478260869565, "grad_norm": 2.7815322875976562, "learning_rate": 0.001, "loss": 1.7684, "step": 515088 }, { "epoch": 44.43961352657005, "grad_norm": 1.0639739036560059, "learning_rate": 0.001, "loss": 1.7601, "step": 515144 }, { "epoch": 44.44444444444444, "grad_norm": 0.3797971308231354, "learning_rate": 0.001, "loss": 1.7633, "step": 515200 }, { "epoch": 44.44927536231884, "grad_norm": 6.126946926116943, "learning_rate": 0.001, "loss": 1.7538, "step": 515256 }, { "epoch": 44.45410628019324, "grad_norm": 0.31685301661491394, "learning_rate": 0.001, "loss": 1.7576, "step": 515312 }, { "epoch": 44.45893719806763, "grad_norm": 4.233808517456055, "learning_rate": 0.001, "loss": 1.7644, "step": 515368 }, { "epoch": 44.46376811594203, "grad_norm": 0.5376169085502625, "learning_rate": 0.001, "loss": 1.7546, "step": 515424 }, { "epoch": 44.468599033816425, "grad_norm": 0.28769081830978394, "learning_rate": 0.001, "loss": 1.7579, "step": 515480 }, { "epoch": 44.47342995169082, "grad_norm": 0.4230288863182068, "learning_rate": 0.001, "loss": 1.7613, "step": 515536 }, { "epoch": 44.47826086956522, "grad_norm": 0.5095715522766113, "learning_rate": 0.001, "loss": 1.7499, "step": 515592 }, { "epoch": 44.48309178743961, "grad_norm": 0.33711111545562744, "learning_rate": 0.001, "loss": 1.7434, "step": 515648 }, { "epoch": 44.48792270531401, "grad_norm": 0.6009061932563782, "learning_rate": 0.001, "loss": 1.7487, "step": 515704 }, { "epoch": 44.492753623188406, "grad_norm": 0.4340749979019165, "learning_rate": 0.001, "loss": 1.7596, "step": 515760 }, { "epoch": 44.4975845410628, "grad_norm": 0.3595503568649292, "learning_rate": 0.001, "loss": 1.7888, "step": 515816 }, { "epoch": 44.5024154589372, "grad_norm": 0.3426569104194641, "learning_rate": 0.001, "loss": 1.8193, "step": 515872 }, { "epoch": 44.507246376811594, "grad_norm": 0.9158487319946289, "learning_rate": 0.001, "loss": 1.8223, "step": 515928 }, { "epoch": 44.51207729468599, "grad_norm": 1.7137371301651, "learning_rate": 0.001, "loss": 1.8056, "step": 515984 }, { "epoch": 44.51690821256039, "grad_norm": 0.4418066740036011, "learning_rate": 0.001, "loss": 1.8092, "step": 516040 }, { "epoch": 44.52173913043478, "grad_norm": 3.798206090927124, "learning_rate": 0.001, "loss": 1.7802, "step": 516096 }, { "epoch": 44.52657004830918, "grad_norm": 0.763675332069397, "learning_rate": 0.001, "loss": 1.7725, "step": 516152 }, { "epoch": 44.531400966183575, "grad_norm": 3.9402244091033936, "learning_rate": 0.001, "loss": 1.7802, "step": 516208 }, { "epoch": 44.53623188405797, "grad_norm": 0.39857804775238037, "learning_rate": 0.001, "loss": 1.7745, "step": 516264 }, { "epoch": 44.54106280193237, "grad_norm": 0.6346734762191772, "learning_rate": 0.001, "loss": 1.7699, "step": 516320 }, { "epoch": 44.54589371980676, "grad_norm": 0.8489824533462524, "learning_rate": 0.001, "loss": 1.7647, "step": 516376 }, { "epoch": 44.55072463768116, "grad_norm": 0.8828040361404419, "learning_rate": 0.001, "loss": 1.7653, "step": 516432 }, { "epoch": 44.55555555555556, "grad_norm": 2.1146938800811768, "learning_rate": 0.001, "loss": 1.7618, "step": 516488 }, { "epoch": 44.56038647342995, "grad_norm": 0.9239327907562256, "learning_rate": 0.001, "loss": 1.7711, "step": 516544 }, { "epoch": 44.56521739130435, "grad_norm": 2.416935682296753, "learning_rate": 0.001, "loss": 1.7657, "step": 516600 }, { "epoch": 44.570048309178745, "grad_norm": 0.8220854997634888, "learning_rate": 0.001, "loss": 1.7693, "step": 516656 }, { "epoch": 44.57487922705314, "grad_norm": 26.31968879699707, "learning_rate": 0.001, "loss": 1.7713, "step": 516712 }, { "epoch": 44.57971014492754, "grad_norm": 5.89824104309082, "learning_rate": 0.001, "loss": 1.7684, "step": 516768 }, { "epoch": 44.58454106280193, "grad_norm": 0.3573940098285675, "learning_rate": 0.001, "loss": 1.7659, "step": 516824 }, { "epoch": 44.589371980676326, "grad_norm": 2.2968807220458984, "learning_rate": 0.001, "loss": 1.7575, "step": 516880 }, { "epoch": 44.594202898550726, "grad_norm": 3.0044000148773193, "learning_rate": 0.001, "loss": 1.7513, "step": 516936 }, { "epoch": 44.59903381642512, "grad_norm": 0.48029670119285583, "learning_rate": 0.001, "loss": 1.7525, "step": 516992 }, { "epoch": 44.60386473429952, "grad_norm": 0.7424532175064087, "learning_rate": 0.001, "loss": 1.7566, "step": 517048 }, { "epoch": 44.608695652173914, "grad_norm": 2.5451290607452393, "learning_rate": 0.001, "loss": 1.7442, "step": 517104 }, { "epoch": 44.61352657004831, "grad_norm": 1.275504231452942, "learning_rate": 0.001, "loss": 1.75, "step": 517160 }, { "epoch": 44.61835748792271, "grad_norm": 0.33170944452285767, "learning_rate": 0.001, "loss": 1.7523, "step": 517216 }, { "epoch": 44.6231884057971, "grad_norm": 5.117401123046875, "learning_rate": 0.001, "loss": 1.744, "step": 517272 }, { "epoch": 44.628019323671495, "grad_norm": 0.4236985743045807, "learning_rate": 0.001, "loss": 1.7625, "step": 517328 }, { "epoch": 44.632850241545896, "grad_norm": 0.33790692687034607, "learning_rate": 0.001, "loss": 1.7418, "step": 517384 }, { "epoch": 44.63768115942029, "grad_norm": 0.5492182374000549, "learning_rate": 0.001, "loss": 1.7506, "step": 517440 }, { "epoch": 44.64251207729468, "grad_norm": 0.6100949645042419, "learning_rate": 0.001, "loss": 1.7501, "step": 517496 }, { "epoch": 44.64734299516908, "grad_norm": 0.24246233701705933, "learning_rate": 0.001, "loss": 1.7444, "step": 517552 }, { "epoch": 44.65217391304348, "grad_norm": 0.489010751247406, "learning_rate": 0.001, "loss": 1.7432, "step": 517608 }, { "epoch": 44.65700483091788, "grad_norm": 0.4861413538455963, "learning_rate": 0.001, "loss": 1.7521, "step": 517664 }, { "epoch": 44.66183574879227, "grad_norm": 0.35629215836524963, "learning_rate": 0.001, "loss": 1.745, "step": 517720 }, { "epoch": 44.666666666666664, "grad_norm": 0.35338258743286133, "learning_rate": 0.001, "loss": 1.7398, "step": 517776 }, { "epoch": 44.671497584541065, "grad_norm": 1.5289599895477295, "learning_rate": 0.001, "loss": 1.7491, "step": 517832 }, { "epoch": 44.67632850241546, "grad_norm": 0.3826262950897217, "learning_rate": 0.001, "loss": 1.7456, "step": 517888 }, { "epoch": 44.68115942028985, "grad_norm": 0.37195757031440735, "learning_rate": 0.001, "loss": 1.744, "step": 517944 }, { "epoch": 44.68599033816425, "grad_norm": 0.25821664929389954, "learning_rate": 0.001, "loss": 1.7452, "step": 518000 }, { "epoch": 44.690821256038646, "grad_norm": 0.2607474625110626, "learning_rate": 0.001, "loss": 1.7401, "step": 518056 }, { "epoch": 44.69565217391305, "grad_norm": 6.3363566398620605, "learning_rate": 0.001, "loss": 1.7403, "step": 518112 }, { "epoch": 44.70048309178744, "grad_norm": 1.1255825757980347, "learning_rate": 0.001, "loss": 1.7455, "step": 518168 }, { "epoch": 44.70531400966183, "grad_norm": 0.5363611578941345, "learning_rate": 0.001, "loss": 1.7397, "step": 518224 }, { "epoch": 44.710144927536234, "grad_norm": 0.4870119094848633, "learning_rate": 0.001, "loss": 1.7574, "step": 518280 }, { "epoch": 44.71497584541063, "grad_norm": 0.34560301899909973, "learning_rate": 0.001, "loss": 1.7583, "step": 518336 }, { "epoch": 44.71980676328502, "grad_norm": 0.38073885440826416, "learning_rate": 0.001, "loss": 1.7573, "step": 518392 }, { "epoch": 44.72463768115942, "grad_norm": 0.3179895281791687, "learning_rate": 0.001, "loss": 1.7532, "step": 518448 }, { "epoch": 44.729468599033815, "grad_norm": 0.8279065489768982, "learning_rate": 0.001, "loss": 1.753, "step": 518504 }, { "epoch": 44.734299516908216, "grad_norm": 0.39664438366889954, "learning_rate": 0.001, "loss": 1.7572, "step": 518560 }, { "epoch": 44.73913043478261, "grad_norm": 0.5696873068809509, "learning_rate": 0.001, "loss": 1.7585, "step": 518616 }, { "epoch": 44.743961352657, "grad_norm": 1.2663644552230835, "learning_rate": 0.001, "loss": 1.7634, "step": 518672 }, { "epoch": 44.7487922705314, "grad_norm": 0.39203858375549316, "learning_rate": 0.001, "loss": 1.7512, "step": 518728 }, { "epoch": 44.7536231884058, "grad_norm": 2.1809194087982178, "learning_rate": 0.001, "loss": 1.7532, "step": 518784 }, { "epoch": 44.75845410628019, "grad_norm": 1.8396408557891846, "learning_rate": 0.001, "loss": 1.7507, "step": 518840 }, { "epoch": 44.76328502415459, "grad_norm": 0.4956870973110199, "learning_rate": 0.001, "loss": 1.757, "step": 518896 }, { "epoch": 44.768115942028984, "grad_norm": 4.103661060333252, "learning_rate": 0.001, "loss": 1.7614, "step": 518952 }, { "epoch": 44.772946859903385, "grad_norm": 0.3233181834220886, "learning_rate": 0.001, "loss": 1.7676, "step": 519008 }, { "epoch": 44.77777777777778, "grad_norm": 12.702004432678223, "learning_rate": 0.001, "loss": 1.7642, "step": 519064 }, { "epoch": 44.78260869565217, "grad_norm": 16.55841636657715, "learning_rate": 0.001, "loss": 1.7648, "step": 519120 }, { "epoch": 44.78743961352657, "grad_norm": 5.862295627593994, "learning_rate": 0.001, "loss": 1.7744, "step": 519176 }, { "epoch": 44.792270531400966, "grad_norm": 2.0035932064056396, "learning_rate": 0.001, "loss": 1.7684, "step": 519232 }, { "epoch": 44.79710144927536, "grad_norm": 1.7130136489868164, "learning_rate": 0.001, "loss": 1.7697, "step": 519288 }, { "epoch": 44.80193236714976, "grad_norm": 0.7822756767272949, "learning_rate": 0.001, "loss": 1.7827, "step": 519344 }, { "epoch": 44.806763285024154, "grad_norm": 1.0391079187393188, "learning_rate": 0.001, "loss": 1.7797, "step": 519400 }, { "epoch": 44.81159420289855, "grad_norm": 0.31277626752853394, "learning_rate": 0.001, "loss": 1.7848, "step": 519456 }, { "epoch": 44.81642512077295, "grad_norm": 0.5335419178009033, "learning_rate": 0.001, "loss": 1.7807, "step": 519512 }, { "epoch": 44.82125603864734, "grad_norm": 2.35077166557312, "learning_rate": 0.001, "loss": 1.7775, "step": 519568 }, { "epoch": 44.82608695652174, "grad_norm": 0.5974383354187012, "learning_rate": 0.001, "loss": 1.786, "step": 519624 }, { "epoch": 44.830917874396135, "grad_norm": 0.4347194731235504, "learning_rate": 0.001, "loss": 1.7751, "step": 519680 }, { "epoch": 44.83574879227053, "grad_norm": 0.28671127557754517, "learning_rate": 0.001, "loss": 1.7712, "step": 519736 }, { "epoch": 44.84057971014493, "grad_norm": 0.5309271812438965, "learning_rate": 0.001, "loss": 1.7681, "step": 519792 }, { "epoch": 44.84541062801932, "grad_norm": 1.5757977962493896, "learning_rate": 0.001, "loss": 1.7621, "step": 519848 }, { "epoch": 44.85024154589372, "grad_norm": 0.3404558598995209, "learning_rate": 0.001, "loss": 1.7594, "step": 519904 }, { "epoch": 44.85507246376812, "grad_norm": 0.33870580792427063, "learning_rate": 0.001, "loss": 1.767, "step": 519960 }, { "epoch": 44.85990338164251, "grad_norm": 0.5620613098144531, "learning_rate": 0.001, "loss": 1.7536, "step": 520016 }, { "epoch": 44.86473429951691, "grad_norm": 1.3506306409835815, "learning_rate": 0.001, "loss": 1.7595, "step": 520072 }, { "epoch": 44.869565217391305, "grad_norm": 0.4508839547634125, "learning_rate": 0.001, "loss": 1.754, "step": 520128 }, { "epoch": 44.8743961352657, "grad_norm": 0.3961387276649475, "learning_rate": 0.001, "loss": 1.7793, "step": 520184 }, { "epoch": 44.8792270531401, "grad_norm": 0.31895628571510315, "learning_rate": 0.001, "loss": 1.7584, "step": 520240 }, { "epoch": 44.88405797101449, "grad_norm": 0.4472622275352478, "learning_rate": 0.001, "loss": 1.7637, "step": 520296 }, { "epoch": 44.888888888888886, "grad_norm": 0.31508901715278625, "learning_rate": 0.001, "loss": 1.7714, "step": 520352 }, { "epoch": 44.893719806763286, "grad_norm": 0.680257260799408, "learning_rate": 0.001, "loss": 1.7649, "step": 520408 }, { "epoch": 44.89855072463768, "grad_norm": 0.30827298760414124, "learning_rate": 0.001, "loss": 1.761, "step": 520464 }, { "epoch": 44.90338164251208, "grad_norm": 0.4116891920566559, "learning_rate": 0.001, "loss": 1.7627, "step": 520520 }, { "epoch": 44.908212560386474, "grad_norm": 0.3044597804546356, "learning_rate": 0.001, "loss": 1.7603, "step": 520576 }, { "epoch": 44.91304347826087, "grad_norm": 0.33176735043525696, "learning_rate": 0.001, "loss": 1.7502, "step": 520632 }, { "epoch": 44.91787439613527, "grad_norm": 0.3552037477493286, "learning_rate": 0.001, "loss": 1.7535, "step": 520688 }, { "epoch": 44.92270531400966, "grad_norm": 0.30814239382743835, "learning_rate": 0.001, "loss": 1.75, "step": 520744 }, { "epoch": 44.927536231884055, "grad_norm": 1.234261155128479, "learning_rate": 0.001, "loss": 1.7541, "step": 520800 }, { "epoch": 44.932367149758456, "grad_norm": 1.8364142179489136, "learning_rate": 0.001, "loss": 1.7495, "step": 520856 }, { "epoch": 44.93719806763285, "grad_norm": 4.369121551513672, "learning_rate": 0.001, "loss": 1.7514, "step": 520912 }, { "epoch": 44.94202898550725, "grad_norm": 0.7031179070472717, "learning_rate": 0.001, "loss": 1.7496, "step": 520968 }, { "epoch": 44.94685990338164, "grad_norm": 0.38052552938461304, "learning_rate": 0.001, "loss": 1.7483, "step": 521024 }, { "epoch": 44.95169082125604, "grad_norm": 0.282537579536438, "learning_rate": 0.001, "loss": 1.7444, "step": 521080 }, { "epoch": 44.95652173913044, "grad_norm": 0.5754269957542419, "learning_rate": 0.001, "loss": 1.7569, "step": 521136 }, { "epoch": 44.96135265700483, "grad_norm": 0.3936268985271454, "learning_rate": 0.001, "loss": 1.7664, "step": 521192 }, { "epoch": 44.966183574879224, "grad_norm": 0.3173277676105499, "learning_rate": 0.001, "loss": 1.7585, "step": 521248 }, { "epoch": 44.971014492753625, "grad_norm": 0.45701006054878235, "learning_rate": 0.001, "loss": 1.7602, "step": 521304 }, { "epoch": 44.97584541062802, "grad_norm": 0.35018354654312134, "learning_rate": 0.001, "loss": 1.7655, "step": 521360 }, { "epoch": 44.98067632850242, "grad_norm": 0.2741232216358185, "learning_rate": 0.001, "loss": 1.7586, "step": 521416 }, { "epoch": 44.98550724637681, "grad_norm": 0.39522579312324524, "learning_rate": 0.001, "loss": 1.7532, "step": 521472 }, { "epoch": 44.990338164251206, "grad_norm": 0.9977519512176514, "learning_rate": 0.001, "loss": 1.7565, "step": 521528 }, { "epoch": 44.99516908212561, "grad_norm": 4.741569995880127, "learning_rate": 0.001, "loss": 1.7652, "step": 521584 }, { "epoch": 45.0, "grad_norm": 0.31371384859085083, "learning_rate": 0.001, "loss": 1.7518, "step": 521640 }, { "epoch": 45.00483091787439, "grad_norm": 0.3293420672416687, "learning_rate": 0.001, "loss": 1.7192, "step": 521696 }, { "epoch": 45.009661835748794, "grad_norm": 0.3113054037094116, "learning_rate": 0.001, "loss": 1.7207, "step": 521752 }, { "epoch": 45.01449275362319, "grad_norm": 0.43630892038345337, "learning_rate": 0.001, "loss": 1.7212, "step": 521808 }, { "epoch": 45.01932367149758, "grad_norm": 0.45723673701286316, "learning_rate": 0.001, "loss": 1.7243, "step": 521864 }, { "epoch": 45.02415458937198, "grad_norm": 0.3399609923362732, "learning_rate": 0.001, "loss": 1.728, "step": 521920 }, { "epoch": 45.028985507246375, "grad_norm": 0.3382002115249634, "learning_rate": 0.001, "loss": 1.715, "step": 521976 }, { "epoch": 45.033816425120776, "grad_norm": 0.3635134696960449, "learning_rate": 0.001, "loss": 1.7189, "step": 522032 }, { "epoch": 45.03864734299517, "grad_norm": 0.3185690939426422, "learning_rate": 0.001, "loss": 1.7229, "step": 522088 }, { "epoch": 45.04347826086956, "grad_norm": 0.25990843772888184, "learning_rate": 0.001, "loss": 1.7229, "step": 522144 }, { "epoch": 45.04830917874396, "grad_norm": 0.23278699815273285, "learning_rate": 0.001, "loss": 1.7234, "step": 522200 }, { "epoch": 45.05314009661836, "grad_norm": 0.3390466272830963, "learning_rate": 0.001, "loss": 1.7141, "step": 522256 }, { "epoch": 45.05797101449275, "grad_norm": 0.332475483417511, "learning_rate": 0.001, "loss": 1.7136, "step": 522312 }, { "epoch": 45.06280193236715, "grad_norm": 0.2712809443473816, "learning_rate": 0.001, "loss": 1.7147, "step": 522368 }, { "epoch": 45.067632850241544, "grad_norm": 0.24673783779144287, "learning_rate": 0.001, "loss": 1.715, "step": 522424 }, { "epoch": 45.072463768115945, "grad_norm": 5.1654253005981445, "learning_rate": 0.001, "loss": 1.7151, "step": 522480 }, { "epoch": 45.07729468599034, "grad_norm": 0.3564213514328003, "learning_rate": 0.001, "loss": 1.7228, "step": 522536 }, { "epoch": 45.08212560386473, "grad_norm": 0.2672257721424103, "learning_rate": 0.001, "loss": 1.7215, "step": 522592 }, { "epoch": 45.08695652173913, "grad_norm": 0.35722917318344116, "learning_rate": 0.001, "loss": 1.7197, "step": 522648 }, { "epoch": 45.091787439613526, "grad_norm": 1.3635164499282837, "learning_rate": 0.001, "loss": 1.7282, "step": 522704 }, { "epoch": 45.09661835748792, "grad_norm": 0.39401012659072876, "learning_rate": 0.001, "loss": 1.7375, "step": 522760 }, { "epoch": 45.10144927536232, "grad_norm": 0.26064446568489075, "learning_rate": 0.001, "loss": 1.716, "step": 522816 }, { "epoch": 45.106280193236714, "grad_norm": 0.4009648263454437, "learning_rate": 0.001, "loss": 1.7216, "step": 522872 }, { "epoch": 45.111111111111114, "grad_norm": 0.4340539574623108, "learning_rate": 0.001, "loss": 1.7306, "step": 522928 }, { "epoch": 45.11594202898551, "grad_norm": 0.7048554420471191, "learning_rate": 0.001, "loss": 1.7277, "step": 522984 }, { "epoch": 45.1207729468599, "grad_norm": 0.3423740863800049, "learning_rate": 0.001, "loss": 1.7246, "step": 523040 }, { "epoch": 45.1256038647343, "grad_norm": 0.3318486511707306, "learning_rate": 0.001, "loss": 1.7212, "step": 523096 }, { "epoch": 45.130434782608695, "grad_norm": 0.6265755295753479, "learning_rate": 0.001, "loss": 1.7222, "step": 523152 }, { "epoch": 45.13526570048309, "grad_norm": 1.302554726600647, "learning_rate": 0.001, "loss": 1.7261, "step": 523208 }, { "epoch": 45.14009661835749, "grad_norm": 0.37194523215293884, "learning_rate": 0.001, "loss": 1.7222, "step": 523264 }, { "epoch": 45.14492753623188, "grad_norm": 1.2909315824508667, "learning_rate": 0.001, "loss": 1.7373, "step": 523320 }, { "epoch": 45.14975845410628, "grad_norm": 0.7496675848960876, "learning_rate": 0.001, "loss": 1.7454, "step": 523376 }, { "epoch": 45.15458937198068, "grad_norm": 0.3917866349220276, "learning_rate": 0.001, "loss": 1.7397, "step": 523432 }, { "epoch": 45.15942028985507, "grad_norm": 0.33339712023735046, "learning_rate": 0.001, "loss": 1.7478, "step": 523488 }, { "epoch": 45.16425120772947, "grad_norm": 0.32521870732307434, "learning_rate": 0.001, "loss": 1.7441, "step": 523544 }, { "epoch": 45.169082125603865, "grad_norm": 0.2854542136192322, "learning_rate": 0.001, "loss": 1.7371, "step": 523600 }, { "epoch": 45.17391304347826, "grad_norm": 0.3545491099357605, "learning_rate": 0.001, "loss": 1.7458, "step": 523656 }, { "epoch": 45.17874396135266, "grad_norm": 2.9559524059295654, "learning_rate": 0.001, "loss": 1.7522, "step": 523712 }, { "epoch": 45.18357487922705, "grad_norm": 0.3445627987384796, "learning_rate": 0.001, "loss": 1.7375, "step": 523768 }, { "epoch": 45.18840579710145, "grad_norm": 0.6340352296829224, "learning_rate": 0.001, "loss": 1.7369, "step": 523824 }, { "epoch": 45.193236714975846, "grad_norm": 1.5123846530914307, "learning_rate": 0.001, "loss": 1.7268, "step": 523880 }, { "epoch": 45.19806763285024, "grad_norm": 0.33506837487220764, "learning_rate": 0.001, "loss": 1.73, "step": 523936 }, { "epoch": 45.20289855072464, "grad_norm": 10.253279685974121, "learning_rate": 0.001, "loss": 1.7378, "step": 523992 }, { "epoch": 45.207729468599034, "grad_norm": 0.45385637879371643, "learning_rate": 0.001, "loss": 1.7486, "step": 524048 }, { "epoch": 45.21256038647343, "grad_norm": 5.902604103088379, "learning_rate": 0.001, "loss": 1.7363, "step": 524104 }, { "epoch": 45.21739130434783, "grad_norm": 5.08174991607666, "learning_rate": 0.001, "loss": 1.819, "step": 524160 }, { "epoch": 45.22222222222222, "grad_norm": 0.6695230007171631, "learning_rate": 0.001, "loss": 1.8925, "step": 524216 }, { "epoch": 45.227053140096615, "grad_norm": 0.6429935693740845, "learning_rate": 0.001, "loss": 1.8022, "step": 524272 }, { "epoch": 45.231884057971016, "grad_norm": 0.25663134455680847, "learning_rate": 0.001, "loss": 1.7548, "step": 524328 }, { "epoch": 45.23671497584541, "grad_norm": 0.5481548309326172, "learning_rate": 0.001, "loss": 1.7535, "step": 524384 }, { "epoch": 45.24154589371981, "grad_norm": 0.2849384844303131, "learning_rate": 0.001, "loss": 1.7562, "step": 524440 }, { "epoch": 45.2463768115942, "grad_norm": 0.37499096989631653, "learning_rate": 0.001, "loss": 1.7626, "step": 524496 }, { "epoch": 45.2512077294686, "grad_norm": 2.353937864303589, "learning_rate": 0.001, "loss": 1.7944, "step": 524552 }, { "epoch": 45.256038647343, "grad_norm": 3.2369601726531982, "learning_rate": 0.001, "loss": 1.7825, "step": 524608 }, { "epoch": 45.26086956521739, "grad_norm": 0.302329957485199, "learning_rate": 0.001, "loss": 1.7773, "step": 524664 }, { "epoch": 45.265700483091784, "grad_norm": 0.23675577342510223, "learning_rate": 0.001, "loss": 1.7471, "step": 524720 }, { "epoch": 45.270531400966185, "grad_norm": 0.4364728331565857, "learning_rate": 0.001, "loss": 1.743, "step": 524776 }, { "epoch": 45.27536231884058, "grad_norm": 0.33125922083854675, "learning_rate": 0.001, "loss": 1.743, "step": 524832 }, { "epoch": 45.28019323671498, "grad_norm": 0.3779846727848053, "learning_rate": 0.001, "loss": 1.7355, "step": 524888 }, { "epoch": 45.28502415458937, "grad_norm": 0.40882301330566406, "learning_rate": 0.001, "loss": 1.7306, "step": 524944 }, { "epoch": 45.289855072463766, "grad_norm": 2.2268364429473877, "learning_rate": 0.001, "loss": 1.7389, "step": 525000 }, { "epoch": 45.29468599033817, "grad_norm": 1.0919376611709595, "learning_rate": 0.001, "loss": 1.7353, "step": 525056 }, { "epoch": 45.29951690821256, "grad_norm": 0.5641513466835022, "learning_rate": 0.001, "loss": 1.741, "step": 525112 }, { "epoch": 45.30434782608695, "grad_norm": 14.438705444335938, "learning_rate": 0.001, "loss": 1.7687, "step": 525168 }, { "epoch": 45.309178743961354, "grad_norm": 2.010845899581909, "learning_rate": 0.001, "loss": 1.785, "step": 525224 }, { "epoch": 45.31400966183575, "grad_norm": 0.4663834273815155, "learning_rate": 0.001, "loss": 1.7641, "step": 525280 }, { "epoch": 45.31884057971015, "grad_norm": 7.143008708953857, "learning_rate": 0.001, "loss": 1.7597, "step": 525336 }, { "epoch": 45.32367149758454, "grad_norm": 0.46525809168815613, "learning_rate": 0.001, "loss": 1.8088, "step": 525392 }, { "epoch": 45.328502415458935, "grad_norm": 16.268165588378906, "learning_rate": 0.001, "loss": 1.8063, "step": 525448 }, { "epoch": 45.333333333333336, "grad_norm": 0.48218870162963867, "learning_rate": 0.001, "loss": 1.786, "step": 525504 }, { "epoch": 45.33816425120773, "grad_norm": 0.47329452633857727, "learning_rate": 0.001, "loss": 1.7701, "step": 525560 }, { "epoch": 45.34299516908212, "grad_norm": 0.2616460919380188, "learning_rate": 0.001, "loss": 1.7773, "step": 525616 }, { "epoch": 45.34782608695652, "grad_norm": 1.672616720199585, "learning_rate": 0.001, "loss": 1.7675, "step": 525672 }, { "epoch": 45.35265700483092, "grad_norm": 1.5290451049804688, "learning_rate": 0.001, "loss": 1.7564, "step": 525728 }, { "epoch": 45.35748792270532, "grad_norm": 0.3674416244029999, "learning_rate": 0.001, "loss": 1.7572, "step": 525784 }, { "epoch": 45.36231884057971, "grad_norm": 0.4735299348831177, "learning_rate": 0.001, "loss": 1.7532, "step": 525840 }, { "epoch": 45.367149758454104, "grad_norm": 0.24400125443935394, "learning_rate": 0.001, "loss": 1.7524, "step": 525896 }, { "epoch": 45.371980676328505, "grad_norm": 9.190494537353516, "learning_rate": 0.001, "loss": 1.7554, "step": 525952 }, { "epoch": 45.3768115942029, "grad_norm": 1.2265187501907349, "learning_rate": 0.001, "loss": 1.7475, "step": 526008 }, { "epoch": 45.38164251207729, "grad_norm": 6.495782375335693, "learning_rate": 0.001, "loss": 1.7498, "step": 526064 }, { "epoch": 45.38647342995169, "grad_norm": 0.25140345096588135, "learning_rate": 0.001, "loss": 1.7492, "step": 526120 }, { "epoch": 45.391304347826086, "grad_norm": 0.38593724370002747, "learning_rate": 0.001, "loss": 1.7638, "step": 526176 }, { "epoch": 45.39613526570048, "grad_norm": 0.2932702600955963, "learning_rate": 0.001, "loss": 1.7672, "step": 526232 }, { "epoch": 45.40096618357488, "grad_norm": 1.4268132448196411, "learning_rate": 0.001, "loss": 1.7651, "step": 526288 }, { "epoch": 45.405797101449274, "grad_norm": 0.7168495655059814, "learning_rate": 0.001, "loss": 1.7389, "step": 526344 }, { "epoch": 45.410628019323674, "grad_norm": 0.2927975356578827, "learning_rate": 0.001, "loss": 1.7399, "step": 526400 }, { "epoch": 45.41545893719807, "grad_norm": 2.0651614665985107, "learning_rate": 0.001, "loss": 1.7417, "step": 526456 }, { "epoch": 45.42028985507246, "grad_norm": 1.1292849779129028, "learning_rate": 0.001, "loss": 1.7335, "step": 526512 }, { "epoch": 45.42512077294686, "grad_norm": 0.3622138500213623, "learning_rate": 0.001, "loss": 1.7491, "step": 526568 }, { "epoch": 45.429951690821255, "grad_norm": 0.43543022871017456, "learning_rate": 0.001, "loss": 1.7442, "step": 526624 }, { "epoch": 45.43478260869565, "grad_norm": 0.3022296130657196, "learning_rate": 0.001, "loss": 1.7427, "step": 526680 }, { "epoch": 45.43961352657005, "grad_norm": 0.2741853892803192, "learning_rate": 0.001, "loss": 1.737, "step": 526736 }, { "epoch": 45.44444444444444, "grad_norm": 0.30984988808631897, "learning_rate": 0.001, "loss": 1.7353, "step": 526792 }, { "epoch": 45.44927536231884, "grad_norm": 0.3662015497684479, "learning_rate": 0.001, "loss": 1.7413, "step": 526848 }, { "epoch": 45.45410628019324, "grad_norm": 0.3618333637714386, "learning_rate": 0.001, "loss": 1.7421, "step": 526904 }, { "epoch": 45.45893719806763, "grad_norm": 0.36521196365356445, "learning_rate": 0.001, "loss": 1.7418, "step": 526960 }, { "epoch": 45.46376811594203, "grad_norm": 0.7569295763969421, "learning_rate": 0.001, "loss": 1.7457, "step": 527016 }, { "epoch": 45.468599033816425, "grad_norm": 0.32151591777801514, "learning_rate": 0.001, "loss": 1.742, "step": 527072 }, { "epoch": 45.47342995169082, "grad_norm": 0.4429345726966858, "learning_rate": 0.001, "loss": 1.7354, "step": 527128 }, { "epoch": 45.47826086956522, "grad_norm": 0.26373571157455444, "learning_rate": 0.001, "loss": 1.7371, "step": 527184 }, { "epoch": 45.48309178743961, "grad_norm": 0.3029117286205292, "learning_rate": 0.001, "loss": 1.7371, "step": 527240 }, { "epoch": 45.48792270531401, "grad_norm": 1.0480002164840698, "learning_rate": 0.001, "loss": 1.7337, "step": 527296 }, { "epoch": 45.492753623188406, "grad_norm": 0.30854931473731995, "learning_rate": 0.001, "loss": 1.7365, "step": 527352 }, { "epoch": 45.4975845410628, "grad_norm": 0.3456036448478699, "learning_rate": 0.001, "loss": 1.7438, "step": 527408 }, { "epoch": 45.5024154589372, "grad_norm": 0.30500760674476624, "learning_rate": 0.001, "loss": 1.7403, "step": 527464 }, { "epoch": 45.507246376811594, "grad_norm": 0.26798027753829956, "learning_rate": 0.001, "loss": 1.7391, "step": 527520 }, { "epoch": 45.51207729468599, "grad_norm": 0.3296271860599518, "learning_rate": 0.001, "loss": 1.7553, "step": 527576 }, { "epoch": 45.51690821256039, "grad_norm": 0.3124648332595825, "learning_rate": 0.001, "loss": 1.7399, "step": 527632 }, { "epoch": 45.52173913043478, "grad_norm": 0.36314842104911804, "learning_rate": 0.001, "loss": 1.7394, "step": 527688 }, { "epoch": 45.52657004830918, "grad_norm": 0.7436350584030151, "learning_rate": 0.001, "loss": 1.7322, "step": 527744 }, { "epoch": 45.531400966183575, "grad_norm": 7.111041069030762, "learning_rate": 0.001, "loss": 1.7308, "step": 527800 }, { "epoch": 45.53623188405797, "grad_norm": 0.7580754160881042, "learning_rate": 0.001, "loss": 1.7389, "step": 527856 }, { "epoch": 45.54106280193237, "grad_norm": 4.935938358306885, "learning_rate": 0.001, "loss": 1.7477, "step": 527912 }, { "epoch": 45.54589371980676, "grad_norm": 0.4116394817829132, "learning_rate": 0.001, "loss": 1.7557, "step": 527968 }, { "epoch": 45.55072463768116, "grad_norm": 0.28215518593788147, "learning_rate": 0.001, "loss": 1.7843, "step": 528024 }, { "epoch": 45.55555555555556, "grad_norm": 2.5026497840881348, "learning_rate": 0.001, "loss": 1.7833, "step": 528080 }, { "epoch": 45.56038647342995, "grad_norm": 3.8257017135620117, "learning_rate": 0.001, "loss": 1.8122, "step": 528136 }, { "epoch": 45.56521739130435, "grad_norm": 8.211630821228027, "learning_rate": 0.001, "loss": 1.8626, "step": 528192 }, { "epoch": 45.570048309178745, "grad_norm": 7.574403285980225, "learning_rate": 0.001, "loss": 1.8375, "step": 528248 }, { "epoch": 45.57487922705314, "grad_norm": 3.6380040645599365, "learning_rate": 0.001, "loss": 1.802, "step": 528304 }, { "epoch": 45.57971014492754, "grad_norm": 0.7546054720878601, "learning_rate": 0.001, "loss": 1.7798, "step": 528360 }, { "epoch": 45.58454106280193, "grad_norm": 0.8628254532814026, "learning_rate": 0.001, "loss": 1.7794, "step": 528416 }, { "epoch": 45.589371980676326, "grad_norm": 3.9354124069213867, "learning_rate": 0.001, "loss": 1.7819, "step": 528472 }, { "epoch": 45.594202898550726, "grad_norm": 0.7690363526344299, "learning_rate": 0.001, "loss": 1.7771, "step": 528528 }, { "epoch": 45.59903381642512, "grad_norm": 10.157276153564453, "learning_rate": 0.001, "loss": 1.7866, "step": 528584 }, { "epoch": 45.60386473429952, "grad_norm": 0.6456630825996399, "learning_rate": 0.001, "loss": 1.7698, "step": 528640 }, { "epoch": 45.608695652173914, "grad_norm": 0.3934164345264435, "learning_rate": 0.001, "loss": 1.7736, "step": 528696 }, { "epoch": 45.61352657004831, "grad_norm": 0.3815837502479553, "learning_rate": 0.001, "loss": 1.7799, "step": 528752 }, { "epoch": 45.61835748792271, "grad_norm": 3.295544385910034, "learning_rate": 0.001, "loss": 1.7709, "step": 528808 }, { "epoch": 45.6231884057971, "grad_norm": 1.811916708946228, "learning_rate": 0.001, "loss": 1.7739, "step": 528864 }, { "epoch": 45.628019323671495, "grad_norm": 0.625079333782196, "learning_rate": 0.001, "loss": 1.8083, "step": 528920 }, { "epoch": 45.632850241545896, "grad_norm": 0.39647549390792847, "learning_rate": 0.001, "loss": 1.7836, "step": 528976 }, { "epoch": 45.63768115942029, "grad_norm": 1.930590271949768, "learning_rate": 0.001, "loss": 1.7692, "step": 529032 }, { "epoch": 45.64251207729468, "grad_norm": 0.9271938800811768, "learning_rate": 0.001, "loss": 1.7714, "step": 529088 }, { "epoch": 45.64734299516908, "grad_norm": 0.2554655373096466, "learning_rate": 0.001, "loss": 1.7741, "step": 529144 }, { "epoch": 45.65217391304348, "grad_norm": 0.6618444919586182, "learning_rate": 0.001, "loss": 1.7716, "step": 529200 }, { "epoch": 45.65700483091788, "grad_norm": 1.9374746084213257, "learning_rate": 0.001, "loss": 1.7678, "step": 529256 }, { "epoch": 45.66183574879227, "grad_norm": 0.2606915533542633, "learning_rate": 0.001, "loss": 1.7545, "step": 529312 }, { "epoch": 45.666666666666664, "grad_norm": 0.8805327415466309, "learning_rate": 0.001, "loss": 1.7641, "step": 529368 }, { "epoch": 45.671497584541065, "grad_norm": 3.211629629135132, "learning_rate": 0.001, "loss": 1.7737, "step": 529424 }, { "epoch": 45.67632850241546, "grad_norm": 3.8423213958740234, "learning_rate": 0.001, "loss": 1.7681, "step": 529480 }, { "epoch": 45.68115942028985, "grad_norm": 0.24217258393764496, "learning_rate": 0.001, "loss": 1.7757, "step": 529536 }, { "epoch": 45.68599033816425, "grad_norm": 0.3422798812389374, "learning_rate": 0.001, "loss": 1.7692, "step": 529592 }, { "epoch": 45.690821256038646, "grad_norm": 5.129047393798828, "learning_rate": 0.001, "loss": 1.7711, "step": 529648 }, { "epoch": 45.69565217391305, "grad_norm": 6.970626354217529, "learning_rate": 0.001, "loss": 1.767, "step": 529704 }, { "epoch": 45.70048309178744, "grad_norm": 5.012514591217041, "learning_rate": 0.001, "loss": 1.7791, "step": 529760 }, { "epoch": 45.70531400966183, "grad_norm": 2.616478681564331, "learning_rate": 0.001, "loss": 1.769, "step": 529816 }, { "epoch": 45.710144927536234, "grad_norm": 2.122823715209961, "learning_rate": 0.001, "loss": 1.7714, "step": 529872 }, { "epoch": 45.71497584541063, "grad_norm": 0.5691376328468323, "learning_rate": 0.001, "loss": 1.7876, "step": 529928 }, { "epoch": 45.71980676328502, "grad_norm": 4.813388824462891, "learning_rate": 0.001, "loss": 1.7888, "step": 529984 }, { "epoch": 45.72463768115942, "grad_norm": 0.5019924640655518, "learning_rate": 0.001, "loss": 1.7923, "step": 530040 }, { "epoch": 45.729468599033815, "grad_norm": 0.3909951448440552, "learning_rate": 0.001, "loss": 1.7851, "step": 530096 }, { "epoch": 45.734299516908216, "grad_norm": 9.063587188720703, "learning_rate": 0.001, "loss": 1.7759, "step": 530152 }, { "epoch": 45.73913043478261, "grad_norm": 0.250916063785553, "learning_rate": 0.001, "loss": 1.7784, "step": 530208 }, { "epoch": 45.743961352657, "grad_norm": 1.456973671913147, "learning_rate": 0.001, "loss": 1.7824, "step": 530264 }, { "epoch": 45.7487922705314, "grad_norm": 0.32225966453552246, "learning_rate": 0.001, "loss": 1.7797, "step": 530320 }, { "epoch": 45.7536231884058, "grad_norm": 1.5743563175201416, "learning_rate": 0.001, "loss": 1.7767, "step": 530376 }, { "epoch": 45.75845410628019, "grad_norm": 2.4193952083587646, "learning_rate": 0.001, "loss": 1.7769, "step": 530432 }, { "epoch": 45.76328502415459, "grad_norm": 5.2086920738220215, "learning_rate": 0.001, "loss": 1.7673, "step": 530488 }, { "epoch": 45.768115942028984, "grad_norm": 0.6869384050369263, "learning_rate": 0.001, "loss": 1.773, "step": 530544 }, { "epoch": 45.772946859903385, "grad_norm": 0.5429085493087769, "learning_rate": 0.001, "loss": 1.7759, "step": 530600 }, { "epoch": 45.77777777777778, "grad_norm": 0.2612639367580414, "learning_rate": 0.001, "loss": 1.7753, "step": 530656 }, { "epoch": 45.78260869565217, "grad_norm": 1.1171563863754272, "learning_rate": 0.001, "loss": 1.7655, "step": 530712 }, { "epoch": 45.78743961352657, "grad_norm": 1.7402960062026978, "learning_rate": 0.001, "loss": 1.7672, "step": 530768 }, { "epoch": 45.792270531400966, "grad_norm": 0.26652708649635315, "learning_rate": 0.001, "loss": 1.7707, "step": 530824 }, { "epoch": 45.79710144927536, "grad_norm": 0.4806079566478729, "learning_rate": 0.001, "loss": 1.7691, "step": 530880 }, { "epoch": 45.80193236714976, "grad_norm": 0.5488345623016357, "learning_rate": 0.001, "loss": 1.7636, "step": 530936 }, { "epoch": 45.806763285024154, "grad_norm": 0.3176897466182709, "learning_rate": 0.001, "loss": 1.761, "step": 530992 }, { "epoch": 45.81159420289855, "grad_norm": 0.32820042967796326, "learning_rate": 0.001, "loss": 1.7589, "step": 531048 }, { "epoch": 45.81642512077295, "grad_norm": 0.3281714618206024, "learning_rate": 0.001, "loss": 1.7723, "step": 531104 }, { "epoch": 45.82125603864734, "grad_norm": 0.31178396940231323, "learning_rate": 0.001, "loss": 1.7651, "step": 531160 }, { "epoch": 45.82608695652174, "grad_norm": 0.8093405961990356, "learning_rate": 0.001, "loss": 1.7573, "step": 531216 }, { "epoch": 45.830917874396135, "grad_norm": 0.5577892661094666, "learning_rate": 0.001, "loss": 1.7603, "step": 531272 }, { "epoch": 45.83574879227053, "grad_norm": 0.5364841222763062, "learning_rate": 0.001, "loss": 1.7479, "step": 531328 }, { "epoch": 45.84057971014493, "grad_norm": 1.711414098739624, "learning_rate": 0.001, "loss": 1.7619, "step": 531384 }, { "epoch": 45.84541062801932, "grad_norm": 0.2967108488082886, "learning_rate": 0.001, "loss": 1.7564, "step": 531440 }, { "epoch": 45.85024154589372, "grad_norm": 0.27044159173965454, "learning_rate": 0.001, "loss": 1.763, "step": 531496 }, { "epoch": 45.85507246376812, "grad_norm": 0.4724573791027069, "learning_rate": 0.001, "loss": 1.7609, "step": 531552 }, { "epoch": 45.85990338164251, "grad_norm": 0.2533262073993683, "learning_rate": 0.001, "loss": 1.771, "step": 531608 }, { "epoch": 45.86473429951691, "grad_norm": 0.3292151987552643, "learning_rate": 0.001, "loss": 1.7606, "step": 531664 }, { "epoch": 45.869565217391305, "grad_norm": 1.7710400819778442, "learning_rate": 0.001, "loss": 1.7592, "step": 531720 }, { "epoch": 45.8743961352657, "grad_norm": 0.3514461815357208, "learning_rate": 0.001, "loss": 1.7612, "step": 531776 }, { "epoch": 45.8792270531401, "grad_norm": 2.956660747528076, "learning_rate": 0.001, "loss": 1.7516, "step": 531832 }, { "epoch": 45.88405797101449, "grad_norm": 2.4839417934417725, "learning_rate": 0.001, "loss": 1.7552, "step": 531888 }, { "epoch": 45.888888888888886, "grad_norm": 0.3897237181663513, "learning_rate": 0.001, "loss": 1.7531, "step": 531944 }, { "epoch": 45.893719806763286, "grad_norm": 0.35215574502944946, "learning_rate": 0.001, "loss": 1.7659, "step": 532000 }, { "epoch": 45.89855072463768, "grad_norm": 1.2172863483428955, "learning_rate": 0.001, "loss": 1.764, "step": 532056 }, { "epoch": 45.90338164251208, "grad_norm": 0.3998480439186096, "learning_rate": 0.001, "loss": 1.7611, "step": 532112 }, { "epoch": 45.908212560386474, "grad_norm": 1.906186580657959, "learning_rate": 0.001, "loss": 1.7497, "step": 532168 }, { "epoch": 45.91304347826087, "grad_norm": 0.7061434388160706, "learning_rate": 0.001, "loss": 1.7451, "step": 532224 }, { "epoch": 45.91787439613527, "grad_norm": 3.7805590629577637, "learning_rate": 0.001, "loss": 1.7538, "step": 532280 }, { "epoch": 45.92270531400966, "grad_norm": 0.27729344367980957, "learning_rate": 0.001, "loss": 1.7497, "step": 532336 }, { "epoch": 45.927536231884055, "grad_norm": 1.9163427352905273, "learning_rate": 0.001, "loss": 1.7518, "step": 532392 }, { "epoch": 45.932367149758456, "grad_norm": 0.3306645154953003, "learning_rate": 0.001, "loss": 1.7557, "step": 532448 }, { "epoch": 45.93719806763285, "grad_norm": 0.38886746764183044, "learning_rate": 0.001, "loss": 1.7504, "step": 532504 }, { "epoch": 45.94202898550725, "grad_norm": 0.27934107184410095, "learning_rate": 0.001, "loss": 1.745, "step": 532560 }, { "epoch": 45.94685990338164, "grad_norm": 0.2990480661392212, "learning_rate": 0.001, "loss": 1.7437, "step": 532616 }, { "epoch": 45.95169082125604, "grad_norm": 0.3589654862880707, "learning_rate": 0.001, "loss": 1.744, "step": 532672 }, { "epoch": 45.95652173913044, "grad_norm": 0.3306331932544708, "learning_rate": 0.001, "loss": 1.7476, "step": 532728 }, { "epoch": 45.96135265700483, "grad_norm": 0.6184709072113037, "learning_rate": 0.001, "loss": 1.7417, "step": 532784 }, { "epoch": 45.966183574879224, "grad_norm": 0.4644886553287506, "learning_rate": 0.001, "loss": 1.7504, "step": 532840 }, { "epoch": 45.971014492753625, "grad_norm": 0.6293536424636841, "learning_rate": 0.001, "loss": 1.7417, "step": 532896 }, { "epoch": 45.97584541062802, "grad_norm": 1.7369558811187744, "learning_rate": 0.001, "loss": 1.7451, "step": 532952 }, { "epoch": 45.98067632850242, "grad_norm": 2.3743200302124023, "learning_rate": 0.001, "loss": 1.759, "step": 533008 }, { "epoch": 45.98550724637681, "grad_norm": 6.140804767608643, "learning_rate": 0.001, "loss": 1.7624, "step": 533064 }, { "epoch": 45.990338164251206, "grad_norm": 0.9559650421142578, "learning_rate": 0.001, "loss": 1.7587, "step": 533120 }, { "epoch": 45.99516908212561, "grad_norm": 0.6048048734664917, "learning_rate": 0.001, "loss": 1.7609, "step": 533176 }, { "epoch": 46.0, "grad_norm": 3.2695298194885254, "learning_rate": 0.001, "loss": 1.7532, "step": 533232 }, { "epoch": 46.00483091787439, "grad_norm": 0.417667955160141, "learning_rate": 0.001, "loss": 1.7176, "step": 533288 }, { "epoch": 46.009661835748794, "grad_norm": 0.32400092482566833, "learning_rate": 0.001, "loss": 1.7262, "step": 533344 }, { "epoch": 46.01449275362319, "grad_norm": 0.33220767974853516, "learning_rate": 0.001, "loss": 1.7115, "step": 533400 }, { "epoch": 46.01932367149758, "grad_norm": 0.6974681615829468, "learning_rate": 0.001, "loss": 1.7153, "step": 533456 }, { "epoch": 46.02415458937198, "grad_norm": 0.2924492657184601, "learning_rate": 0.001, "loss": 1.7127, "step": 533512 }, { "epoch": 46.028985507246375, "grad_norm": 0.8584845066070557, "learning_rate": 0.001, "loss": 1.7162, "step": 533568 }, { "epoch": 46.033816425120776, "grad_norm": 0.3093520402908325, "learning_rate": 0.001, "loss": 1.7066, "step": 533624 }, { "epoch": 46.03864734299517, "grad_norm": 0.7262521982192993, "learning_rate": 0.001, "loss": 1.7073, "step": 533680 }, { "epoch": 46.04347826086956, "grad_norm": 0.24096333980560303, "learning_rate": 0.001, "loss": 1.7105, "step": 533736 }, { "epoch": 46.04830917874396, "grad_norm": 0.8534532785415649, "learning_rate": 0.001, "loss": 1.7149, "step": 533792 }, { "epoch": 46.05314009661836, "grad_norm": 0.29234328866004944, "learning_rate": 0.001, "loss": 1.7089, "step": 533848 }, { "epoch": 46.05797101449275, "grad_norm": 0.46962398290634155, "learning_rate": 0.001, "loss": 1.7069, "step": 533904 }, { "epoch": 46.06280193236715, "grad_norm": 0.5291991233825684, "learning_rate": 0.001, "loss": 1.7037, "step": 533960 }, { "epoch": 46.067632850241544, "grad_norm": 0.4565190374851227, "learning_rate": 0.001, "loss": 1.7075, "step": 534016 }, { "epoch": 46.072463768115945, "grad_norm": 1.0419261455535889, "learning_rate": 0.001, "loss": 1.711, "step": 534072 }, { "epoch": 46.07729468599034, "grad_norm": 1.149330973625183, "learning_rate": 0.001, "loss": 1.7098, "step": 534128 }, { "epoch": 46.08212560386473, "grad_norm": 1.413597822189331, "learning_rate": 0.001, "loss": 1.7178, "step": 534184 }, { "epoch": 46.08695652173913, "grad_norm": 0.6423186659812927, "learning_rate": 0.001, "loss": 1.7172, "step": 534240 }, { "epoch": 46.091787439613526, "grad_norm": 2.29268741607666, "learning_rate": 0.001, "loss": 1.7175, "step": 534296 }, { "epoch": 46.09661835748792, "grad_norm": 1.1235376596450806, "learning_rate": 0.001, "loss": 1.7174, "step": 534352 }, { "epoch": 46.10144927536232, "grad_norm": 0.26134398579597473, "learning_rate": 0.001, "loss": 1.7134, "step": 534408 }, { "epoch": 46.106280193236714, "grad_norm": 16.597261428833008, "learning_rate": 0.001, "loss": 1.708, "step": 534464 }, { "epoch": 46.111111111111114, "grad_norm": 0.8168239593505859, "learning_rate": 0.001, "loss": 1.7133, "step": 534520 }, { "epoch": 46.11594202898551, "grad_norm": 0.33824869990348816, "learning_rate": 0.001, "loss": 1.718, "step": 534576 }, { "epoch": 46.1207729468599, "grad_norm": 0.4143065810203552, "learning_rate": 0.001, "loss": 1.7271, "step": 534632 }, { "epoch": 46.1256038647343, "grad_norm": 0.39663827419281006, "learning_rate": 0.001, "loss": 1.7172, "step": 534688 }, { "epoch": 46.130434782608695, "grad_norm": 0.7273496389389038, "learning_rate": 0.001, "loss": 1.7076, "step": 534744 }, { "epoch": 46.13526570048309, "grad_norm": 0.3638724982738495, "learning_rate": 0.001, "loss": 1.7195, "step": 534800 }, { "epoch": 46.14009661835749, "grad_norm": 0.2951245605945587, "learning_rate": 0.001, "loss": 1.7237, "step": 534856 }, { "epoch": 46.14492753623188, "grad_norm": 0.9037827849388123, "learning_rate": 0.001, "loss": 1.7212, "step": 534912 }, { "epoch": 46.14975845410628, "grad_norm": 0.3330898582935333, "learning_rate": 0.001, "loss": 1.7279, "step": 534968 }, { "epoch": 46.15458937198068, "grad_norm": 0.39309096336364746, "learning_rate": 0.001, "loss": 1.7221, "step": 535024 }, { "epoch": 46.15942028985507, "grad_norm": 0.3493063151836395, "learning_rate": 0.001, "loss": 1.7194, "step": 535080 }, { "epoch": 46.16425120772947, "grad_norm": 0.6391283273696899, "learning_rate": 0.001, "loss": 1.7174, "step": 535136 }, { "epoch": 46.169082125603865, "grad_norm": 0.7131687998771667, "learning_rate": 0.001, "loss": 1.7149, "step": 535192 }, { "epoch": 46.17391304347826, "grad_norm": 1.149593710899353, "learning_rate": 0.001, "loss": 1.7336, "step": 535248 }, { "epoch": 46.17874396135266, "grad_norm": 0.43514567613601685, "learning_rate": 0.001, "loss": 1.742, "step": 535304 }, { "epoch": 46.18357487922705, "grad_norm": 1.86967933177948, "learning_rate": 0.001, "loss": 1.7353, "step": 535360 }, { "epoch": 46.18840579710145, "grad_norm": 1.0727717876434326, "learning_rate": 0.001, "loss": 1.7352, "step": 535416 }, { "epoch": 46.193236714975846, "grad_norm": 0.3126072585582733, "learning_rate": 0.001, "loss": 1.7327, "step": 535472 }, { "epoch": 46.19806763285024, "grad_norm": 0.35572314262390137, "learning_rate": 0.001, "loss": 1.7189, "step": 535528 }, { "epoch": 46.20289855072464, "grad_norm": 1.61357843875885, "learning_rate": 0.001, "loss": 1.7172, "step": 535584 }, { "epoch": 46.207729468599034, "grad_norm": 0.6157678365707397, "learning_rate": 0.001, "loss": 1.73, "step": 535640 }, { "epoch": 46.21256038647343, "grad_norm": 0.4308922290802002, "learning_rate": 0.001, "loss": 1.7231, "step": 535696 }, { "epoch": 46.21739130434783, "grad_norm": 1.9200184345245361, "learning_rate": 0.001, "loss": 1.7377, "step": 535752 }, { "epoch": 46.22222222222222, "grad_norm": 2.3621315956115723, "learning_rate": 0.001, "loss": 1.749, "step": 535808 }, { "epoch": 46.227053140096615, "grad_norm": 1.0110433101654053, "learning_rate": 0.001, "loss": 1.7473, "step": 535864 }, { "epoch": 46.231884057971016, "grad_norm": 1.275486946105957, "learning_rate": 0.001, "loss": 1.7452, "step": 535920 }, { "epoch": 46.23671497584541, "grad_norm": 0.5857816338539124, "learning_rate": 0.001, "loss": 1.7395, "step": 535976 }, { "epoch": 46.24154589371981, "grad_norm": 0.9050594568252563, "learning_rate": 0.001, "loss": 1.7308, "step": 536032 }, { "epoch": 46.2463768115942, "grad_norm": 0.35458317399024963, "learning_rate": 0.001, "loss": 1.7363, "step": 536088 }, { "epoch": 46.2512077294686, "grad_norm": 0.43981805443763733, "learning_rate": 0.001, "loss": 1.7341, "step": 536144 }, { "epoch": 46.256038647343, "grad_norm": 0.31950825452804565, "learning_rate": 0.001, "loss": 1.7346, "step": 536200 }, { "epoch": 46.26086956521739, "grad_norm": 0.30714520812034607, "learning_rate": 0.001, "loss": 1.7341, "step": 536256 }, { "epoch": 46.265700483091784, "grad_norm": 0.32240891456604004, "learning_rate": 0.001, "loss": 1.7293, "step": 536312 }, { "epoch": 46.270531400966185, "grad_norm": 0.5001716613769531, "learning_rate": 0.001, "loss": 1.7281, "step": 536368 }, { "epoch": 46.27536231884058, "grad_norm": 0.32608523964881897, "learning_rate": 0.001, "loss": 1.7398, "step": 536424 }, { "epoch": 46.28019323671498, "grad_norm": 0.35022953152656555, "learning_rate": 0.001, "loss": 1.7329, "step": 536480 }, { "epoch": 46.28502415458937, "grad_norm": 0.6778161525726318, "learning_rate": 0.001, "loss": 1.7277, "step": 536536 }, { "epoch": 46.289855072463766, "grad_norm": 0.3277319669723511, "learning_rate": 0.001, "loss": 1.7306, "step": 536592 }, { "epoch": 46.29468599033817, "grad_norm": 0.32074132561683655, "learning_rate": 0.001, "loss": 1.7179, "step": 536648 }, { "epoch": 46.29951690821256, "grad_norm": 0.3303510248661041, "learning_rate": 0.001, "loss": 1.7171, "step": 536704 }, { "epoch": 46.30434782608695, "grad_norm": 0.2950139045715332, "learning_rate": 0.001, "loss": 1.7258, "step": 536760 }, { "epoch": 46.309178743961354, "grad_norm": 0.35521501302719116, "learning_rate": 0.001, "loss": 1.718, "step": 536816 }, { "epoch": 46.31400966183575, "grad_norm": 0.5644594430923462, "learning_rate": 0.001, "loss": 1.7132, "step": 536872 }, { "epoch": 46.31884057971015, "grad_norm": 12.377921104431152, "learning_rate": 0.001, "loss": 1.7156, "step": 536928 }, { "epoch": 46.32367149758454, "grad_norm": 0.4371606111526489, "learning_rate": 0.001, "loss": 1.7214, "step": 536984 }, { "epoch": 46.328502415458935, "grad_norm": 0.3676202595233917, "learning_rate": 0.001, "loss": 1.7313, "step": 537040 }, { "epoch": 46.333333333333336, "grad_norm": 0.43548282980918884, "learning_rate": 0.001, "loss": 1.7198, "step": 537096 }, { "epoch": 46.33816425120773, "grad_norm": 0.9101641178131104, "learning_rate": 0.001, "loss": 1.7228, "step": 537152 }, { "epoch": 46.34299516908212, "grad_norm": 0.4429427981376648, "learning_rate": 0.001, "loss": 1.7182, "step": 537208 }, { "epoch": 46.34782608695652, "grad_norm": 0.26476481556892395, "learning_rate": 0.001, "loss": 1.7208, "step": 537264 }, { "epoch": 46.35265700483092, "grad_norm": 0.43423348665237427, "learning_rate": 0.001, "loss": 1.7369, "step": 537320 }, { "epoch": 46.35748792270532, "grad_norm": 0.5661461353302002, "learning_rate": 0.001, "loss": 1.7317, "step": 537376 }, { "epoch": 46.36231884057971, "grad_norm": 0.9679610729217529, "learning_rate": 0.001, "loss": 1.7426, "step": 537432 }, { "epoch": 46.367149758454104, "grad_norm": 0.4791892170906067, "learning_rate": 0.001, "loss": 1.7547, "step": 537488 }, { "epoch": 46.371980676328505, "grad_norm": 0.9721389412879944, "learning_rate": 0.001, "loss": 1.7462, "step": 537544 }, { "epoch": 46.3768115942029, "grad_norm": 0.45305702090263367, "learning_rate": 0.001, "loss": 1.739, "step": 537600 }, { "epoch": 46.38164251207729, "grad_norm": 1.2942910194396973, "learning_rate": 0.001, "loss": 1.7451, "step": 537656 }, { "epoch": 46.38647342995169, "grad_norm": 2.3137528896331787, "learning_rate": 0.001, "loss": 1.7539, "step": 537712 }, { "epoch": 46.391304347826086, "grad_norm": 2.452158212661743, "learning_rate": 0.001, "loss": 1.7542, "step": 537768 }, { "epoch": 46.39613526570048, "grad_norm": 0.3273489475250244, "learning_rate": 0.001, "loss": 1.7699, "step": 537824 }, { "epoch": 46.40096618357488, "grad_norm": 0.4359605014324188, "learning_rate": 0.001, "loss": 1.7544, "step": 537880 }, { "epoch": 46.405797101449274, "grad_norm": 0.3248874843120575, "learning_rate": 0.001, "loss": 1.7507, "step": 537936 }, { "epoch": 46.410628019323674, "grad_norm": 0.3856216371059418, "learning_rate": 0.001, "loss": 1.7545, "step": 537992 }, { "epoch": 46.41545893719807, "grad_norm": 0.3041902184486389, "learning_rate": 0.001, "loss": 1.7463, "step": 538048 }, { "epoch": 46.42028985507246, "grad_norm": 1.364767074584961, "learning_rate": 0.001, "loss": 1.7482, "step": 538104 }, { "epoch": 46.42512077294686, "grad_norm": 2.3974130153656006, "learning_rate": 0.001, "loss": 1.7352, "step": 538160 }, { "epoch": 46.429951690821255, "grad_norm": 1.3930416107177734, "learning_rate": 0.001, "loss": 1.7404, "step": 538216 }, { "epoch": 46.43478260869565, "grad_norm": 0.34964656829833984, "learning_rate": 0.001, "loss": 1.7432, "step": 538272 }, { "epoch": 46.43961352657005, "grad_norm": 3.9416284561157227, "learning_rate": 0.001, "loss": 1.7403, "step": 538328 }, { "epoch": 46.44444444444444, "grad_norm": 0.3690131604671478, "learning_rate": 0.001, "loss": 1.7337, "step": 538384 }, { "epoch": 46.44927536231884, "grad_norm": 1.0517922639846802, "learning_rate": 0.001, "loss": 1.7437, "step": 538440 }, { "epoch": 46.45410628019324, "grad_norm": 1.3773114681243896, "learning_rate": 0.001, "loss": 1.735, "step": 538496 }, { "epoch": 46.45893719806763, "grad_norm": 7.648189544677734, "learning_rate": 0.001, "loss": 1.7393, "step": 538552 }, { "epoch": 46.46376811594203, "grad_norm": 7.102615833282471, "learning_rate": 0.001, "loss": 1.7344, "step": 538608 }, { "epoch": 46.468599033816425, "grad_norm": 3.7616474628448486, "learning_rate": 0.001, "loss": 1.734, "step": 538664 }, { "epoch": 46.47342995169082, "grad_norm": 1.7518610954284668, "learning_rate": 0.001, "loss": 1.7406, "step": 538720 }, { "epoch": 46.47826086956522, "grad_norm": 0.27856943011283875, "learning_rate": 0.001, "loss": 1.7356, "step": 538776 }, { "epoch": 46.48309178743961, "grad_norm": 0.9059680700302124, "learning_rate": 0.001, "loss": 1.7402, "step": 538832 }, { "epoch": 46.48792270531401, "grad_norm": 1.0681747198104858, "learning_rate": 0.001, "loss": 1.7355, "step": 538888 }, { "epoch": 46.492753623188406, "grad_norm": 0.2725263833999634, "learning_rate": 0.001, "loss": 1.7301, "step": 538944 }, { "epoch": 46.4975845410628, "grad_norm": 0.26980385184288025, "learning_rate": 0.001, "loss": 1.7339, "step": 539000 }, { "epoch": 46.5024154589372, "grad_norm": 0.3551305830478668, "learning_rate": 0.001, "loss": 1.7256, "step": 539056 }, { "epoch": 46.507246376811594, "grad_norm": 0.386801153421402, "learning_rate": 0.001, "loss": 1.7281, "step": 539112 }, { "epoch": 46.51207729468599, "grad_norm": 0.39928942918777466, "learning_rate": 0.001, "loss": 1.7285, "step": 539168 }, { "epoch": 46.51690821256039, "grad_norm": 0.786837637424469, "learning_rate": 0.001, "loss": 1.7275, "step": 539224 }, { "epoch": 46.52173913043478, "grad_norm": 0.3427911102771759, "learning_rate": 0.001, "loss": 1.7254, "step": 539280 }, { "epoch": 46.52657004830918, "grad_norm": 0.34961777925491333, "learning_rate": 0.001, "loss": 1.7342, "step": 539336 }, { "epoch": 46.531400966183575, "grad_norm": 0.34977880120277405, "learning_rate": 0.001, "loss": 1.7441, "step": 539392 }, { "epoch": 46.53623188405797, "grad_norm": 2.2069408893585205, "learning_rate": 0.001, "loss": 1.7314, "step": 539448 }, { "epoch": 46.54106280193237, "grad_norm": 0.3914407193660736, "learning_rate": 0.001, "loss": 1.7443, "step": 539504 }, { "epoch": 46.54589371980676, "grad_norm": 0.3044123649597168, "learning_rate": 0.001, "loss": 1.7459, "step": 539560 }, { "epoch": 46.55072463768116, "grad_norm": 0.3202945590019226, "learning_rate": 0.001, "loss": 1.7411, "step": 539616 }, { "epoch": 46.55555555555556, "grad_norm": 0.5264212489128113, "learning_rate": 0.001, "loss": 1.7302, "step": 539672 }, { "epoch": 46.56038647342995, "grad_norm": 0.46689650416374207, "learning_rate": 0.001, "loss": 1.7402, "step": 539728 }, { "epoch": 46.56521739130435, "grad_norm": 2.5595059394836426, "learning_rate": 0.001, "loss": 1.7353, "step": 539784 }, { "epoch": 46.570048309178745, "grad_norm": 0.29652321338653564, "learning_rate": 0.001, "loss": 1.7334, "step": 539840 }, { "epoch": 46.57487922705314, "grad_norm": 0.4661213457584381, "learning_rate": 0.001, "loss": 1.729, "step": 539896 }, { "epoch": 46.57971014492754, "grad_norm": 0.25999516248703003, "learning_rate": 0.001, "loss": 1.7341, "step": 539952 }, { "epoch": 46.58454106280193, "grad_norm": 0.3534297049045563, "learning_rate": 0.001, "loss": 1.7371, "step": 540008 }, { "epoch": 46.589371980676326, "grad_norm": 0.3848990201950073, "learning_rate": 0.001, "loss": 1.7257, "step": 540064 }, { "epoch": 46.594202898550726, "grad_norm": 0.4281889498233795, "learning_rate": 0.001, "loss": 1.7261, "step": 540120 }, { "epoch": 46.59903381642512, "grad_norm": 0.42716988921165466, "learning_rate": 0.001, "loss": 1.7366, "step": 540176 }, { "epoch": 46.60386473429952, "grad_norm": 0.2850320041179657, "learning_rate": 0.001, "loss": 1.7304, "step": 540232 }, { "epoch": 46.608695652173914, "grad_norm": 0.5041074752807617, "learning_rate": 0.001, "loss": 1.7313, "step": 540288 }, { "epoch": 46.61352657004831, "grad_norm": 0.26991644501686096, "learning_rate": 0.001, "loss": 1.7295, "step": 540344 }, { "epoch": 46.61835748792271, "grad_norm": 0.40907764434814453, "learning_rate": 0.001, "loss": 1.7272, "step": 540400 }, { "epoch": 46.6231884057971, "grad_norm": 0.7233055830001831, "learning_rate": 0.001, "loss": 1.7244, "step": 540456 }, { "epoch": 46.628019323671495, "grad_norm": 0.341800719499588, "learning_rate": 0.001, "loss": 1.7251, "step": 540512 }, { "epoch": 46.632850241545896, "grad_norm": 0.3241802752017975, "learning_rate": 0.001, "loss": 1.726, "step": 540568 }, { "epoch": 46.63768115942029, "grad_norm": 0.48334747552871704, "learning_rate": 0.001, "loss": 1.7313, "step": 540624 }, { "epoch": 46.64251207729468, "grad_norm": 0.38246187567710876, "learning_rate": 0.001, "loss": 1.7363, "step": 540680 }, { "epoch": 46.64734299516908, "grad_norm": 0.31606099009513855, "learning_rate": 0.001, "loss": 1.7342, "step": 540736 }, { "epoch": 46.65217391304348, "grad_norm": 0.291831910610199, "learning_rate": 0.001, "loss": 1.7356, "step": 540792 }, { "epoch": 46.65700483091788, "grad_norm": 0.34247252345085144, "learning_rate": 0.001, "loss": 1.7283, "step": 540848 }, { "epoch": 46.66183574879227, "grad_norm": 0.29882514476776123, "learning_rate": 0.001, "loss": 1.7242, "step": 540904 }, { "epoch": 46.666666666666664, "grad_norm": 1.3509570360183716, "learning_rate": 0.001, "loss": 1.7246, "step": 540960 }, { "epoch": 46.671497584541065, "grad_norm": 1.558355450630188, "learning_rate": 0.001, "loss": 1.735, "step": 541016 }, { "epoch": 46.67632850241546, "grad_norm": 0.5476917028427124, "learning_rate": 0.001, "loss": 1.7425, "step": 541072 }, { "epoch": 46.68115942028985, "grad_norm": 0.7144020795822144, "learning_rate": 0.001, "loss": 1.7423, "step": 541128 }, { "epoch": 46.68599033816425, "grad_norm": 0.7284640073776245, "learning_rate": 0.001, "loss": 1.7259, "step": 541184 }, { "epoch": 46.690821256038646, "grad_norm": 0.31424710154533386, "learning_rate": 0.001, "loss": 1.7368, "step": 541240 }, { "epoch": 46.69565217391305, "grad_norm": 0.3097711503505707, "learning_rate": 0.001, "loss": 1.7415, "step": 541296 }, { "epoch": 46.70048309178744, "grad_norm": 0.7392817735671997, "learning_rate": 0.001, "loss": 1.7327, "step": 541352 }, { "epoch": 46.70531400966183, "grad_norm": 0.4241707921028137, "learning_rate": 0.001, "loss": 1.7405, "step": 541408 }, { "epoch": 46.710144927536234, "grad_norm": 0.43632781505584717, "learning_rate": 0.001, "loss": 1.7344, "step": 541464 }, { "epoch": 46.71497584541063, "grad_norm": 0.27016007900238037, "learning_rate": 0.001, "loss": 1.7333, "step": 541520 }, { "epoch": 46.71980676328502, "grad_norm": 1.3281503915786743, "learning_rate": 0.001, "loss": 1.7332, "step": 541576 }, { "epoch": 46.72463768115942, "grad_norm": 0.2695807218551636, "learning_rate": 0.001, "loss": 1.7404, "step": 541632 }, { "epoch": 46.729468599033815, "grad_norm": 0.5190613269805908, "learning_rate": 0.001, "loss": 1.7393, "step": 541688 }, { "epoch": 46.734299516908216, "grad_norm": 0.6109021306037903, "learning_rate": 0.001, "loss": 1.7572, "step": 541744 }, { "epoch": 46.73913043478261, "grad_norm": 0.3460198938846588, "learning_rate": 0.001, "loss": 1.7492, "step": 541800 }, { "epoch": 46.743961352657, "grad_norm": 1.3889919519424438, "learning_rate": 0.001, "loss": 1.7509, "step": 541856 }, { "epoch": 46.7487922705314, "grad_norm": 1.2590762376785278, "learning_rate": 0.001, "loss": 1.7482, "step": 541912 }, { "epoch": 46.7536231884058, "grad_norm": 7.283885478973389, "learning_rate": 0.001, "loss": 1.7389, "step": 541968 }, { "epoch": 46.75845410628019, "grad_norm": 0.3557608127593994, "learning_rate": 0.001, "loss": 1.7341, "step": 542024 }, { "epoch": 46.76328502415459, "grad_norm": 0.41444477438926697, "learning_rate": 0.001, "loss": 1.7486, "step": 542080 }, { "epoch": 46.768115942028984, "grad_norm": 0.8650222420692444, "learning_rate": 0.001, "loss": 1.7346, "step": 542136 }, { "epoch": 46.772946859903385, "grad_norm": 0.31386256217956543, "learning_rate": 0.001, "loss": 1.7419, "step": 542192 }, { "epoch": 46.77777777777778, "grad_norm": 0.2825224697589874, "learning_rate": 0.001, "loss": 1.7399, "step": 542248 }, { "epoch": 46.78260869565217, "grad_norm": 2.5417585372924805, "learning_rate": 0.001, "loss": 1.7486, "step": 542304 }, { "epoch": 46.78743961352657, "grad_norm": 0.6257044672966003, "learning_rate": 0.001, "loss": 1.7526, "step": 542360 }, { "epoch": 46.792270531400966, "grad_norm": 0.45079246163368225, "learning_rate": 0.001, "loss": 1.7484, "step": 542416 }, { "epoch": 46.79710144927536, "grad_norm": 0.3512585759162903, "learning_rate": 0.001, "loss": 1.7587, "step": 542472 }, { "epoch": 46.80193236714976, "grad_norm": 1.1498621702194214, "learning_rate": 0.001, "loss": 1.7635, "step": 542528 }, { "epoch": 46.806763285024154, "grad_norm": 3.546875238418579, "learning_rate": 0.001, "loss": 1.7576, "step": 542584 }, { "epoch": 46.81159420289855, "grad_norm": 0.3002912998199463, "learning_rate": 0.001, "loss": 1.7381, "step": 542640 }, { "epoch": 46.81642512077295, "grad_norm": 4.210538387298584, "learning_rate": 0.001, "loss": 1.731, "step": 542696 }, { "epoch": 46.82125603864734, "grad_norm": 0.335534930229187, "learning_rate": 0.001, "loss": 1.7382, "step": 542752 }, { "epoch": 46.82608695652174, "grad_norm": 0.4776569604873657, "learning_rate": 0.001, "loss": 1.7368, "step": 542808 }, { "epoch": 46.830917874396135, "grad_norm": 0.6448674201965332, "learning_rate": 0.001, "loss": 1.7391, "step": 542864 }, { "epoch": 46.83574879227053, "grad_norm": 0.27452728152275085, "learning_rate": 0.001, "loss": 1.7341, "step": 542920 }, { "epoch": 46.84057971014493, "grad_norm": 0.774319589138031, "learning_rate": 0.001, "loss": 1.7408, "step": 542976 }, { "epoch": 46.84541062801932, "grad_norm": 1.252493143081665, "learning_rate": 0.001, "loss": 1.743, "step": 543032 }, { "epoch": 46.85024154589372, "grad_norm": 0.7029665112495422, "learning_rate": 0.001, "loss": 1.7362, "step": 543088 }, { "epoch": 46.85507246376812, "grad_norm": 0.3758379817008972, "learning_rate": 0.001, "loss": 1.7336, "step": 543144 }, { "epoch": 46.85990338164251, "grad_norm": 0.865254819393158, "learning_rate": 0.001, "loss": 1.7261, "step": 543200 }, { "epoch": 46.86473429951691, "grad_norm": 0.25620636343955994, "learning_rate": 0.001, "loss": 1.7402, "step": 543256 }, { "epoch": 46.869565217391305, "grad_norm": 0.4489244818687439, "learning_rate": 0.001, "loss": 1.7299, "step": 543312 }, { "epoch": 46.8743961352657, "grad_norm": 0.8276216387748718, "learning_rate": 0.001, "loss": 1.737, "step": 543368 }, { "epoch": 46.8792270531401, "grad_norm": 0.7146296501159668, "learning_rate": 0.001, "loss": 1.7277, "step": 543424 }, { "epoch": 46.88405797101449, "grad_norm": 0.353871613740921, "learning_rate": 0.001, "loss": 1.7465, "step": 543480 }, { "epoch": 46.888888888888886, "grad_norm": 0.357257217168808, "learning_rate": 0.001, "loss": 1.7508, "step": 543536 }, { "epoch": 46.893719806763286, "grad_norm": 19.562957763671875, "learning_rate": 0.001, "loss": 1.752, "step": 543592 }, { "epoch": 46.89855072463768, "grad_norm": 0.2644149959087372, "learning_rate": 0.001, "loss": 1.7452, "step": 543648 }, { "epoch": 46.90338164251208, "grad_norm": 0.6094096899032593, "learning_rate": 0.001, "loss": 1.7363, "step": 543704 }, { "epoch": 46.908212560386474, "grad_norm": 0.7662749886512756, "learning_rate": 0.001, "loss": 1.7393, "step": 543760 }, { "epoch": 46.91304347826087, "grad_norm": 0.5080996751785278, "learning_rate": 0.001, "loss": 1.737, "step": 543816 }, { "epoch": 46.91787439613527, "grad_norm": 0.7678481936454773, "learning_rate": 0.001, "loss": 1.7495, "step": 543872 }, { "epoch": 46.92270531400966, "grad_norm": 0.30250343680381775, "learning_rate": 0.001, "loss": 1.7327, "step": 543928 }, { "epoch": 46.927536231884055, "grad_norm": 0.33577293157577515, "learning_rate": 0.001, "loss": 1.7298, "step": 543984 }, { "epoch": 46.932367149758456, "grad_norm": 0.5459432005882263, "learning_rate": 0.001, "loss": 1.7336, "step": 544040 }, { "epoch": 46.93719806763285, "grad_norm": 0.2931721806526184, "learning_rate": 0.001, "loss": 1.7373, "step": 544096 }, { "epoch": 46.94202898550725, "grad_norm": 0.3553711771965027, "learning_rate": 0.001, "loss": 1.7328, "step": 544152 }, { "epoch": 46.94685990338164, "grad_norm": 0.3601447343826294, "learning_rate": 0.001, "loss": 1.731, "step": 544208 }, { "epoch": 46.95169082125604, "grad_norm": 0.5757013559341431, "learning_rate": 0.001, "loss": 1.7315, "step": 544264 }, { "epoch": 46.95652173913044, "grad_norm": 0.48101431131362915, "learning_rate": 0.001, "loss": 1.7267, "step": 544320 }, { "epoch": 46.96135265700483, "grad_norm": 0.4263179302215576, "learning_rate": 0.001, "loss": 1.73, "step": 544376 }, { "epoch": 46.966183574879224, "grad_norm": 0.8635637164115906, "learning_rate": 0.001, "loss": 1.7217, "step": 544432 }, { "epoch": 46.971014492753625, "grad_norm": 0.3279145658016205, "learning_rate": 0.001, "loss": 1.7371, "step": 544488 }, { "epoch": 46.97584541062802, "grad_norm": 0.2874996066093445, "learning_rate": 0.001, "loss": 1.7335, "step": 544544 }, { "epoch": 46.98067632850242, "grad_norm": 1.1116864681243896, "learning_rate": 0.001, "loss": 1.7358, "step": 544600 }, { "epoch": 46.98550724637681, "grad_norm": 0.6899238228797913, "learning_rate": 0.001, "loss": 1.7432, "step": 544656 }, { "epoch": 46.990338164251206, "grad_norm": 0.30474957823753357, "learning_rate": 0.001, "loss": 1.7381, "step": 544712 }, { "epoch": 46.99516908212561, "grad_norm": 0.38092026114463806, "learning_rate": 0.001, "loss": 1.7353, "step": 544768 }, { "epoch": 47.0, "grad_norm": 0.747328519821167, "learning_rate": 0.001, "loss": 1.7429, "step": 544824 }, { "epoch": 47.00483091787439, "grad_norm": 4.281112194061279, "learning_rate": 0.001, "loss": 1.7392, "step": 544880 }, { "epoch": 47.009661835748794, "grad_norm": 1.3495209217071533, "learning_rate": 0.001, "loss": 1.7578, "step": 544936 }, { "epoch": 47.01449275362319, "grad_norm": 0.4235178530216217, "learning_rate": 0.001, "loss": 1.7515, "step": 544992 }, { "epoch": 47.01932367149758, "grad_norm": 0.33219489455223083, "learning_rate": 0.001, "loss": 1.7618, "step": 545048 }, { "epoch": 47.02415458937198, "grad_norm": 0.3000887930393219, "learning_rate": 0.001, "loss": 1.766, "step": 545104 }, { "epoch": 47.028985507246375, "grad_norm": 0.27524304389953613, "learning_rate": 0.001, "loss": 1.758, "step": 545160 }, { "epoch": 47.033816425120776, "grad_norm": 3.1589064598083496, "learning_rate": 0.001, "loss": 1.7591, "step": 545216 }, { "epoch": 47.03864734299517, "grad_norm": 0.4939311444759369, "learning_rate": 0.001, "loss": 1.7937, "step": 545272 }, { "epoch": 47.04347826086956, "grad_norm": 0.4727461636066437, "learning_rate": 0.001, "loss": 1.774, "step": 545328 }, { "epoch": 47.04830917874396, "grad_norm": 0.4854380488395691, "learning_rate": 0.001, "loss": 1.7726, "step": 545384 }, { "epoch": 47.05314009661836, "grad_norm": 0.6815846562385559, "learning_rate": 0.001, "loss": 1.7664, "step": 545440 }, { "epoch": 47.05797101449275, "grad_norm": 0.5356012582778931, "learning_rate": 0.001, "loss": 1.7589, "step": 545496 }, { "epoch": 47.06280193236715, "grad_norm": 0.279079794883728, "learning_rate": 0.001, "loss": 1.769, "step": 545552 }, { "epoch": 47.067632850241544, "grad_norm": 0.429047167301178, "learning_rate": 0.001, "loss": 1.7648, "step": 545608 }, { "epoch": 47.072463768115945, "grad_norm": 0.4604443609714508, "learning_rate": 0.001, "loss": 1.7635, "step": 545664 }, { "epoch": 47.07729468599034, "grad_norm": 1.3425155878067017, "learning_rate": 0.001, "loss": 1.7561, "step": 545720 }, { "epoch": 47.08212560386473, "grad_norm": 1.7754504680633545, "learning_rate": 0.001, "loss": 1.7487, "step": 545776 }, { "epoch": 47.08695652173913, "grad_norm": 7.971505641937256, "learning_rate": 0.001, "loss": 1.7464, "step": 545832 }, { "epoch": 47.091787439613526, "grad_norm": 0.37450724840164185, "learning_rate": 0.001, "loss": 1.7363, "step": 545888 }, { "epoch": 47.09661835748792, "grad_norm": 0.3493143916130066, "learning_rate": 0.001, "loss": 1.7375, "step": 545944 }, { "epoch": 47.10144927536232, "grad_norm": 6.405582427978516, "learning_rate": 0.001, "loss": 1.7468, "step": 546000 }, { "epoch": 47.106280193236714, "grad_norm": 3.2116167545318604, "learning_rate": 0.001, "loss": 1.7487, "step": 546056 }, { "epoch": 47.111111111111114, "grad_norm": 0.40074628591537476, "learning_rate": 0.001, "loss": 1.758, "step": 546112 }, { "epoch": 47.11594202898551, "grad_norm": 0.31763288378715515, "learning_rate": 0.001, "loss": 1.7527, "step": 546168 }, { "epoch": 47.1207729468599, "grad_norm": 0.366273432970047, "learning_rate": 0.001, "loss": 1.7485, "step": 546224 }, { "epoch": 47.1256038647343, "grad_norm": 1.172105073928833, "learning_rate": 0.001, "loss": 1.7456, "step": 546280 }, { "epoch": 47.130434782608695, "grad_norm": 2.0263142585754395, "learning_rate": 0.001, "loss": 1.7519, "step": 546336 }, { "epoch": 47.13526570048309, "grad_norm": 0.8143089413642883, "learning_rate": 0.001, "loss": 1.7483, "step": 546392 }, { "epoch": 47.14009661835749, "grad_norm": 0.34187301993370056, "learning_rate": 0.001, "loss": 1.7384, "step": 546448 }, { "epoch": 47.14492753623188, "grad_norm": 0.31419092416763306, "learning_rate": 0.001, "loss": 1.7311, "step": 546504 }, { "epoch": 47.14975845410628, "grad_norm": 0.3252299129962921, "learning_rate": 0.001, "loss": 1.7319, "step": 546560 }, { "epoch": 47.15458937198068, "grad_norm": 0.387053519487381, "learning_rate": 0.001, "loss": 1.7309, "step": 546616 }, { "epoch": 47.15942028985507, "grad_norm": 2.4877116680145264, "learning_rate": 0.001, "loss": 1.7397, "step": 546672 }, { "epoch": 47.16425120772947, "grad_norm": 15.878884315490723, "learning_rate": 0.001, "loss": 1.7734, "step": 546728 }, { "epoch": 47.169082125603865, "grad_norm": 0.4047360420227051, "learning_rate": 0.001, "loss": 1.7612, "step": 546784 }, { "epoch": 47.17391304347826, "grad_norm": 0.2872651517391205, "learning_rate": 0.001, "loss": 1.7452, "step": 546840 }, { "epoch": 47.17874396135266, "grad_norm": 3.3258631229400635, "learning_rate": 0.001, "loss": 1.7472, "step": 546896 }, { "epoch": 47.18357487922705, "grad_norm": 1.3399384021759033, "learning_rate": 0.001, "loss": 1.7447, "step": 546952 }, { "epoch": 47.18840579710145, "grad_norm": 0.7725517749786377, "learning_rate": 0.001, "loss": 1.7476, "step": 547008 }, { "epoch": 47.193236714975846, "grad_norm": 0.5984556674957275, "learning_rate": 0.001, "loss": 1.7414, "step": 547064 }, { "epoch": 47.19806763285024, "grad_norm": 4.47747802734375, "learning_rate": 0.001, "loss": 1.7373, "step": 547120 }, { "epoch": 47.20289855072464, "grad_norm": 0.42242303490638733, "learning_rate": 0.001, "loss": 1.7357, "step": 547176 }, { "epoch": 47.207729468599034, "grad_norm": 0.40525195002555847, "learning_rate": 0.001, "loss": 1.734, "step": 547232 }, { "epoch": 47.21256038647343, "grad_norm": 5.895687580108643, "learning_rate": 0.001, "loss": 1.7382, "step": 547288 }, { "epoch": 47.21739130434783, "grad_norm": 3.2632696628570557, "learning_rate": 0.001, "loss": 1.7376, "step": 547344 }, { "epoch": 47.22222222222222, "grad_norm": 0.48902297019958496, "learning_rate": 0.001, "loss": 1.7386, "step": 547400 }, { "epoch": 47.227053140096615, "grad_norm": 0.8248130679130554, "learning_rate": 0.001, "loss": 1.7277, "step": 547456 }, { "epoch": 47.231884057971016, "grad_norm": 1.2479369640350342, "learning_rate": 0.001, "loss": 1.7216, "step": 547512 }, { "epoch": 47.23671497584541, "grad_norm": 0.30356669425964355, "learning_rate": 0.001, "loss": 1.7309, "step": 547568 }, { "epoch": 47.24154589371981, "grad_norm": 1.1951662302017212, "learning_rate": 0.001, "loss": 1.7333, "step": 547624 }, { "epoch": 47.2463768115942, "grad_norm": 1.8391238451004028, "learning_rate": 0.001, "loss": 1.7373, "step": 547680 }, { "epoch": 47.2512077294686, "grad_norm": 1.4917492866516113, "learning_rate": 0.001, "loss": 1.7377, "step": 547736 }, { "epoch": 47.256038647343, "grad_norm": 1.1511569023132324, "learning_rate": 0.001, "loss": 1.7318, "step": 547792 }, { "epoch": 47.26086956521739, "grad_norm": 0.8874995708465576, "learning_rate": 0.001, "loss": 1.7233, "step": 547848 }, { "epoch": 47.265700483091784, "grad_norm": 0.4187708795070648, "learning_rate": 0.001, "loss": 1.7295, "step": 547904 }, { "epoch": 47.270531400966185, "grad_norm": 0.7397943139076233, "learning_rate": 0.001, "loss": 1.721, "step": 547960 }, { "epoch": 47.27536231884058, "grad_norm": 2.0461182594299316, "learning_rate": 0.001, "loss": 1.726, "step": 548016 }, { "epoch": 47.28019323671498, "grad_norm": 2.1561331748962402, "learning_rate": 0.001, "loss": 1.7265, "step": 548072 }, { "epoch": 47.28502415458937, "grad_norm": 6.230227470397949, "learning_rate": 0.001, "loss": 1.7419, "step": 548128 }, { "epoch": 47.289855072463766, "grad_norm": 12.692964553833008, "learning_rate": 0.001, "loss": 1.731, "step": 548184 }, { "epoch": 47.29468599033817, "grad_norm": 0.911510169506073, "learning_rate": 0.001, "loss": 1.7347, "step": 548240 }, { "epoch": 47.29951690821256, "grad_norm": 1.0388526916503906, "learning_rate": 0.001, "loss": 1.7321, "step": 548296 }, { "epoch": 47.30434782608695, "grad_norm": 0.8128241300582886, "learning_rate": 0.001, "loss": 1.7231, "step": 548352 }, { "epoch": 47.309178743961354, "grad_norm": 0.2854093611240387, "learning_rate": 0.001, "loss": 1.7238, "step": 548408 }, { "epoch": 47.31400966183575, "grad_norm": 0.7462406754493713, "learning_rate": 0.001, "loss": 1.719, "step": 548464 }, { "epoch": 47.31884057971015, "grad_norm": 0.27985551953315735, "learning_rate": 0.001, "loss": 1.7244, "step": 548520 }, { "epoch": 47.32367149758454, "grad_norm": 0.4242180287837982, "learning_rate": 0.001, "loss": 1.7317, "step": 548576 }, { "epoch": 47.328502415458935, "grad_norm": 0.26534503698349, "learning_rate": 0.001, "loss": 1.7329, "step": 548632 }, { "epoch": 47.333333333333336, "grad_norm": 0.370477557182312, "learning_rate": 0.001, "loss": 1.7186, "step": 548688 }, { "epoch": 47.33816425120773, "grad_norm": 3.09329891204834, "learning_rate": 0.001, "loss": 1.7169, "step": 548744 }, { "epoch": 47.34299516908212, "grad_norm": 0.4720314145088196, "learning_rate": 0.001, "loss": 1.7182, "step": 548800 }, { "epoch": 47.34782608695652, "grad_norm": 0.3731083571910858, "learning_rate": 0.001, "loss": 1.7196, "step": 548856 }, { "epoch": 47.35265700483092, "grad_norm": 0.44405561685562134, "learning_rate": 0.001, "loss": 1.7227, "step": 548912 }, { "epoch": 47.35748792270532, "grad_norm": 0.32539740204811096, "learning_rate": 0.001, "loss": 1.8006, "step": 548968 }, { "epoch": 47.36231884057971, "grad_norm": 2.0203771591186523, "learning_rate": 0.001, "loss": 1.736, "step": 549024 }, { "epoch": 47.367149758454104, "grad_norm": 0.3143254220485687, "learning_rate": 0.001, "loss": 1.7275, "step": 549080 }, { "epoch": 47.371980676328505, "grad_norm": 0.9952269196510315, "learning_rate": 0.001, "loss": 1.7299, "step": 549136 }, { "epoch": 47.3768115942029, "grad_norm": 0.5755278468132019, "learning_rate": 0.001, "loss": 1.729, "step": 549192 }, { "epoch": 47.38164251207729, "grad_norm": 0.6705668568611145, "learning_rate": 0.001, "loss": 1.7282, "step": 549248 }, { "epoch": 47.38647342995169, "grad_norm": 0.35457879304885864, "learning_rate": 0.001, "loss": 1.7283, "step": 549304 }, { "epoch": 47.391304347826086, "grad_norm": 4.718808174133301, "learning_rate": 0.001, "loss": 1.7318, "step": 549360 }, { "epoch": 47.39613526570048, "grad_norm": 0.8548343181610107, "learning_rate": 0.001, "loss": 1.737, "step": 549416 }, { "epoch": 47.40096618357488, "grad_norm": 0.5011637210845947, "learning_rate": 0.001, "loss": 1.7298, "step": 549472 }, { "epoch": 47.405797101449274, "grad_norm": 1.8740864992141724, "learning_rate": 0.001, "loss": 1.7426, "step": 549528 }, { "epoch": 47.410628019323674, "grad_norm": 0.9155641794204712, "learning_rate": 0.001, "loss": 1.7445, "step": 549584 }, { "epoch": 47.41545893719807, "grad_norm": 0.335338294506073, "learning_rate": 0.001, "loss": 1.763, "step": 549640 }, { "epoch": 47.42028985507246, "grad_norm": 0.40270140767097473, "learning_rate": 0.001, "loss": 1.7523, "step": 549696 }, { "epoch": 47.42512077294686, "grad_norm": 0.3840565085411072, "learning_rate": 0.001, "loss": 1.7444, "step": 549752 }, { "epoch": 47.429951690821255, "grad_norm": 1.7191141843795776, "learning_rate": 0.001, "loss": 1.7434, "step": 549808 }, { "epoch": 47.43478260869565, "grad_norm": 1.3776488304138184, "learning_rate": 0.001, "loss": 1.7463, "step": 549864 }, { "epoch": 47.43961352657005, "grad_norm": 2.7461485862731934, "learning_rate": 0.001, "loss": 1.7368, "step": 549920 }, { "epoch": 47.44444444444444, "grad_norm": 1.8133947849273682, "learning_rate": 0.001, "loss": 1.7466, "step": 549976 }, { "epoch": 47.44927536231884, "grad_norm": 0.6772338151931763, "learning_rate": 0.001, "loss": 1.7417, "step": 550032 }, { "epoch": 47.45410628019324, "grad_norm": 3.5082292556762695, "learning_rate": 0.001, "loss": 1.741, "step": 550088 }, { "epoch": 47.45893719806763, "grad_norm": 12.43543529510498, "learning_rate": 0.001, "loss": 1.7355, "step": 550144 }, { "epoch": 47.46376811594203, "grad_norm": 1.3880958557128906, "learning_rate": 0.001, "loss": 1.7534, "step": 550200 }, { "epoch": 47.468599033816425, "grad_norm": 1.864904522895813, "learning_rate": 0.001, "loss": 1.7439, "step": 550256 }, { "epoch": 47.47342995169082, "grad_norm": 0.6884762644767761, "learning_rate": 0.001, "loss": 1.7556, "step": 550312 }, { "epoch": 47.47826086956522, "grad_norm": 0.4680933356285095, "learning_rate": 0.001, "loss": 1.7554, "step": 550368 }, { "epoch": 47.48309178743961, "grad_norm": 0.4483138620853424, "learning_rate": 0.001, "loss": 1.7552, "step": 550424 }, { "epoch": 47.48792270531401, "grad_norm": 2.099839448928833, "learning_rate": 0.001, "loss": 1.7552, "step": 550480 }, { "epoch": 47.492753623188406, "grad_norm": 0.963566243648529, "learning_rate": 0.001, "loss": 1.776, "step": 550536 }, { "epoch": 47.4975845410628, "grad_norm": 0.31251609325408936, "learning_rate": 0.001, "loss": 1.7659, "step": 550592 }, { "epoch": 47.5024154589372, "grad_norm": 0.4312027394771576, "learning_rate": 0.001, "loss": 1.7572, "step": 550648 }, { "epoch": 47.507246376811594, "grad_norm": 2.301367998123169, "learning_rate": 0.001, "loss": 1.7415, "step": 550704 }, { "epoch": 47.51207729468599, "grad_norm": 1.1503932476043701, "learning_rate": 0.001, "loss": 1.7532, "step": 550760 }, { "epoch": 47.51690821256039, "grad_norm": 0.6705819964408875, "learning_rate": 0.001, "loss": 1.7696, "step": 550816 }, { "epoch": 47.52173913043478, "grad_norm": 0.7047042846679688, "learning_rate": 0.001, "loss": 1.7577, "step": 550872 }, { "epoch": 47.52657004830918, "grad_norm": 0.2805469036102295, "learning_rate": 0.001, "loss": 1.7516, "step": 550928 }, { "epoch": 47.531400966183575, "grad_norm": 0.577847421169281, "learning_rate": 0.001, "loss": 1.7581, "step": 550984 }, { "epoch": 47.53623188405797, "grad_norm": 0.41907113790512085, "learning_rate": 0.001, "loss": 1.7597, "step": 551040 }, { "epoch": 47.54106280193237, "grad_norm": 0.5313217043876648, "learning_rate": 0.001, "loss": 1.7595, "step": 551096 }, { "epoch": 47.54589371980676, "grad_norm": 0.47615495324134827, "learning_rate": 0.001, "loss": 1.7417, "step": 551152 }, { "epoch": 47.55072463768116, "grad_norm": 0.27895495295524597, "learning_rate": 0.001, "loss": 1.7547, "step": 551208 }, { "epoch": 47.55555555555556, "grad_norm": 0.3781746029853821, "learning_rate": 0.001, "loss": 1.7547, "step": 551264 }, { "epoch": 47.56038647342995, "grad_norm": 0.6573802828788757, "learning_rate": 0.001, "loss": 1.7543, "step": 551320 }, { "epoch": 47.56521739130435, "grad_norm": 1.1413826942443848, "learning_rate": 0.001, "loss": 1.7521, "step": 551376 }, { "epoch": 47.570048309178745, "grad_norm": 1.2589777708053589, "learning_rate": 0.001, "loss": 1.7505, "step": 551432 }, { "epoch": 47.57487922705314, "grad_norm": 1.6205741167068481, "learning_rate": 0.001, "loss": 1.751, "step": 551488 }, { "epoch": 47.57971014492754, "grad_norm": 0.8519698977470398, "learning_rate": 0.001, "loss": 1.755, "step": 551544 }, { "epoch": 47.58454106280193, "grad_norm": 0.3133470416069031, "learning_rate": 0.001, "loss": 1.7631, "step": 551600 }, { "epoch": 47.589371980676326, "grad_norm": 1.2281559705734253, "learning_rate": 0.001, "loss": 1.758, "step": 551656 }, { "epoch": 47.594202898550726, "grad_norm": 1.2636651992797852, "learning_rate": 0.001, "loss": 1.7692, "step": 551712 }, { "epoch": 47.59903381642512, "grad_norm": 8.77667236328125, "learning_rate": 0.001, "loss": 1.759, "step": 551768 }, { "epoch": 47.60386473429952, "grad_norm": 0.447468638420105, "learning_rate": 0.001, "loss": 1.7526, "step": 551824 }, { "epoch": 47.608695652173914, "grad_norm": 3.714233160018921, "learning_rate": 0.001, "loss": 1.7488, "step": 551880 }, { "epoch": 47.61352657004831, "grad_norm": 0.35020801424980164, "learning_rate": 0.001, "loss": 1.7542, "step": 551936 }, { "epoch": 47.61835748792271, "grad_norm": 0.27114036679267883, "learning_rate": 0.001, "loss": 1.7486, "step": 551992 }, { "epoch": 47.6231884057971, "grad_norm": 0.5277588963508606, "learning_rate": 0.001, "loss": 1.748, "step": 552048 }, { "epoch": 47.628019323671495, "grad_norm": 4.6601972579956055, "learning_rate": 0.001, "loss": 1.7477, "step": 552104 }, { "epoch": 47.632850241545896, "grad_norm": 0.4733608365058899, "learning_rate": 0.001, "loss": 1.7482, "step": 552160 }, { "epoch": 47.63768115942029, "grad_norm": 0.2772434949874878, "learning_rate": 0.001, "loss": 1.7721, "step": 552216 }, { "epoch": 47.64251207729468, "grad_norm": 0.38333505392074585, "learning_rate": 0.001, "loss": 1.7704, "step": 552272 }, { "epoch": 47.64734299516908, "grad_norm": 0.9257436394691467, "learning_rate": 0.001, "loss": 1.7686, "step": 552328 }, { "epoch": 47.65217391304348, "grad_norm": 0.9447434544563293, "learning_rate": 0.001, "loss": 1.7812, "step": 552384 }, { "epoch": 47.65700483091788, "grad_norm": 0.27587616443634033, "learning_rate": 0.001, "loss": 1.7617, "step": 552440 }, { "epoch": 47.66183574879227, "grad_norm": 0.31905314326286316, "learning_rate": 0.001, "loss": 1.7443, "step": 552496 }, { "epoch": 47.666666666666664, "grad_norm": 2.9687397480010986, "learning_rate": 0.001, "loss": 1.764, "step": 552552 }, { "epoch": 47.671497584541065, "grad_norm": 0.2674528956413269, "learning_rate": 0.001, "loss": 1.7485, "step": 552608 }, { "epoch": 47.67632850241546, "grad_norm": 0.26541611552238464, "learning_rate": 0.001, "loss": 1.7458, "step": 552664 }, { "epoch": 47.68115942028985, "grad_norm": 13.463668823242188, "learning_rate": 0.001, "loss": 1.7376, "step": 552720 }, { "epoch": 47.68599033816425, "grad_norm": 0.30317020416259766, "learning_rate": 0.001, "loss": 1.7492, "step": 552776 }, { "epoch": 47.690821256038646, "grad_norm": 0.33294859528541565, "learning_rate": 0.001, "loss": 1.7497, "step": 552832 }, { "epoch": 47.69565217391305, "grad_norm": 1.5886945724487305, "learning_rate": 0.001, "loss": 1.7517, "step": 552888 }, { "epoch": 47.70048309178744, "grad_norm": 0.261288046836853, "learning_rate": 0.001, "loss": 1.7422, "step": 552944 }, { "epoch": 47.70531400966183, "grad_norm": 0.2990473806858063, "learning_rate": 0.001, "loss": 1.7425, "step": 553000 }, { "epoch": 47.710144927536234, "grad_norm": 0.2454865723848343, "learning_rate": 0.001, "loss": 1.7386, "step": 553056 }, { "epoch": 47.71497584541063, "grad_norm": 0.3279196321964264, "learning_rate": 0.001, "loss": 1.7381, "step": 553112 }, { "epoch": 47.71980676328502, "grad_norm": 10.061687469482422, "learning_rate": 0.001, "loss": 1.736, "step": 553168 }, { "epoch": 47.72463768115942, "grad_norm": 0.24158822000026703, "learning_rate": 0.001, "loss": 1.7391, "step": 553224 }, { "epoch": 47.729468599033815, "grad_norm": 12.058940887451172, "learning_rate": 0.001, "loss": 1.7414, "step": 553280 }, { "epoch": 47.734299516908216, "grad_norm": 0.2691340446472168, "learning_rate": 0.001, "loss": 1.7357, "step": 553336 }, { "epoch": 47.73913043478261, "grad_norm": 0.396634966135025, "learning_rate": 0.001, "loss": 1.7501, "step": 553392 }, { "epoch": 47.743961352657, "grad_norm": 1.7510796785354614, "learning_rate": 0.001, "loss": 1.7385, "step": 553448 }, { "epoch": 47.7487922705314, "grad_norm": 0.3275125324726105, "learning_rate": 0.001, "loss": 1.7517, "step": 553504 }, { "epoch": 47.7536231884058, "grad_norm": 0.5373415946960449, "learning_rate": 0.001, "loss": 1.7363, "step": 553560 }, { "epoch": 47.75845410628019, "grad_norm": 0.2426336109638214, "learning_rate": 0.001, "loss": 1.7395, "step": 553616 }, { "epoch": 47.76328502415459, "grad_norm": 4.613277912139893, "learning_rate": 0.001, "loss": 1.7367, "step": 553672 }, { "epoch": 47.768115942028984, "grad_norm": 0.44359657168388367, "learning_rate": 0.001, "loss": 1.7381, "step": 553728 }, { "epoch": 47.772946859903385, "grad_norm": 0.3343617916107178, "learning_rate": 0.001, "loss": 1.7344, "step": 553784 }, { "epoch": 47.77777777777778, "grad_norm": 0.3519558012485504, "learning_rate": 0.001, "loss": 1.729, "step": 553840 }, { "epoch": 47.78260869565217, "grad_norm": 0.8114468455314636, "learning_rate": 0.001, "loss": 1.7287, "step": 553896 }, { "epoch": 47.78743961352657, "grad_norm": 1.0570392608642578, "learning_rate": 0.001, "loss": 1.7371, "step": 553952 }, { "epoch": 47.792270531400966, "grad_norm": 9.910186767578125, "learning_rate": 0.001, "loss": 1.7439, "step": 554008 }, { "epoch": 47.79710144927536, "grad_norm": 0.24146710336208344, "learning_rate": 0.001, "loss": 1.7394, "step": 554064 }, { "epoch": 47.80193236714976, "grad_norm": 0.31374940276145935, "learning_rate": 0.001, "loss": 1.7351, "step": 554120 }, { "epoch": 47.806763285024154, "grad_norm": 0.3112371563911438, "learning_rate": 0.001, "loss": 1.7319, "step": 554176 }, { "epoch": 47.81159420289855, "grad_norm": 0.39495551586151123, "learning_rate": 0.001, "loss": 1.7313, "step": 554232 }, { "epoch": 47.81642512077295, "grad_norm": 11.296889305114746, "learning_rate": 0.001, "loss": 1.7424, "step": 554288 }, { "epoch": 47.82125603864734, "grad_norm": 0.31350523233413696, "learning_rate": 0.001, "loss": 1.7467, "step": 554344 }, { "epoch": 47.82608695652174, "grad_norm": 0.3233942985534668, "learning_rate": 0.001, "loss": 1.7391, "step": 554400 }, { "epoch": 47.830917874396135, "grad_norm": 0.29573899507522583, "learning_rate": 0.001, "loss": 1.7373, "step": 554456 }, { "epoch": 47.83574879227053, "grad_norm": 0.3130510747432709, "learning_rate": 0.001, "loss": 1.7383, "step": 554512 }, { "epoch": 47.84057971014493, "grad_norm": 0.2704949378967285, "learning_rate": 0.001, "loss": 1.7323, "step": 554568 }, { "epoch": 47.84541062801932, "grad_norm": 1.3435925245285034, "learning_rate": 0.001, "loss": 1.7311, "step": 554624 }, { "epoch": 47.85024154589372, "grad_norm": 18.459707260131836, "learning_rate": 0.001, "loss": 1.7372, "step": 554680 }, { "epoch": 47.85507246376812, "grad_norm": 1.997094750404358, "learning_rate": 0.001, "loss": 1.7435, "step": 554736 }, { "epoch": 47.85990338164251, "grad_norm": 0.3167450428009033, "learning_rate": 0.001, "loss": 1.7569, "step": 554792 }, { "epoch": 47.86473429951691, "grad_norm": 2.537158250808716, "learning_rate": 0.001, "loss": 1.7544, "step": 554848 }, { "epoch": 47.869565217391305, "grad_norm": 0.2570529282093048, "learning_rate": 0.001, "loss": 1.7469, "step": 554904 }, { "epoch": 47.8743961352657, "grad_norm": 0.42896637320518494, "learning_rate": 0.001, "loss": 1.7555, "step": 554960 }, { "epoch": 47.8792270531401, "grad_norm": 0.3532427251338959, "learning_rate": 0.001, "loss": 1.7619, "step": 555016 }, { "epoch": 47.88405797101449, "grad_norm": 0.4625174105167389, "learning_rate": 0.001, "loss": 1.7427, "step": 555072 }, { "epoch": 47.888888888888886, "grad_norm": 0.8862326741218567, "learning_rate": 0.001, "loss": 1.7421, "step": 555128 }, { "epoch": 47.893719806763286, "grad_norm": 0.36017340421676636, "learning_rate": 0.001, "loss": 1.7324, "step": 555184 }, { "epoch": 47.89855072463768, "grad_norm": 0.6789189577102661, "learning_rate": 0.001, "loss": 1.73, "step": 555240 }, { "epoch": 47.90338164251208, "grad_norm": 0.4127628207206726, "learning_rate": 0.001, "loss": 1.7473, "step": 555296 }, { "epoch": 47.908212560386474, "grad_norm": 0.25015756487846375, "learning_rate": 0.001, "loss": 1.7662, "step": 555352 }, { "epoch": 47.91304347826087, "grad_norm": 0.3704906105995178, "learning_rate": 0.001, "loss": 1.755, "step": 555408 }, { "epoch": 47.91787439613527, "grad_norm": 0.6751772165298462, "learning_rate": 0.001, "loss": 1.7556, "step": 555464 }, { "epoch": 47.92270531400966, "grad_norm": 1.0634523630142212, "learning_rate": 0.001, "loss": 1.7581, "step": 555520 }, { "epoch": 47.927536231884055, "grad_norm": 0.33919262886047363, "learning_rate": 0.001, "loss": 1.763, "step": 555576 }, { "epoch": 47.932367149758456, "grad_norm": 0.2908060848712921, "learning_rate": 0.001, "loss": 1.7561, "step": 555632 }, { "epoch": 47.93719806763285, "grad_norm": 0.23702818155288696, "learning_rate": 0.001, "loss": 1.7568, "step": 555688 }, { "epoch": 47.94202898550725, "grad_norm": 0.2846589684486389, "learning_rate": 0.001, "loss": 1.7532, "step": 555744 }, { "epoch": 47.94685990338164, "grad_norm": 0.30871111154556274, "learning_rate": 0.001, "loss": 1.7457, "step": 555800 }, { "epoch": 47.95169082125604, "grad_norm": 0.5630412101745605, "learning_rate": 0.001, "loss": 1.7477, "step": 555856 }, { "epoch": 47.95652173913044, "grad_norm": 0.35094401240348816, "learning_rate": 0.001, "loss": 1.7552, "step": 555912 }, { "epoch": 47.96135265700483, "grad_norm": 4.494852542877197, "learning_rate": 0.001, "loss": 1.7622, "step": 555968 }, { "epoch": 47.966183574879224, "grad_norm": 2.3337714672088623, "learning_rate": 0.001, "loss": 1.7628, "step": 556024 }, { "epoch": 47.971014492753625, "grad_norm": 1.6576848030090332, "learning_rate": 0.001, "loss": 1.76, "step": 556080 }, { "epoch": 47.97584541062802, "grad_norm": 0.6846519112586975, "learning_rate": 0.001, "loss": 1.7509, "step": 556136 }, { "epoch": 47.98067632850242, "grad_norm": 0.2737486660480499, "learning_rate": 0.001, "loss": 1.7459, "step": 556192 }, { "epoch": 47.98550724637681, "grad_norm": 0.6786449551582336, "learning_rate": 0.001, "loss": 1.7522, "step": 556248 }, { "epoch": 47.990338164251206, "grad_norm": 0.597537636756897, "learning_rate": 0.001, "loss": 1.7505, "step": 556304 }, { "epoch": 47.99516908212561, "grad_norm": 0.45557186007499695, "learning_rate": 0.001, "loss": 1.7531, "step": 556360 }, { "epoch": 48.0, "grad_norm": 0.419938325881958, "learning_rate": 0.001, "loss": 1.7476, "step": 556416 }, { "epoch": 48.00483091787439, "grad_norm": 0.9752640724182129, "learning_rate": 0.001, "loss": 1.7125, "step": 556472 }, { "epoch": 48.009661835748794, "grad_norm": 1.3594614267349243, "learning_rate": 0.001, "loss": 1.7111, "step": 556528 }, { "epoch": 48.01449275362319, "grad_norm": 0.6443732976913452, "learning_rate": 0.001, "loss": 1.7124, "step": 556584 }, { "epoch": 48.01932367149758, "grad_norm": 15.07887077331543, "learning_rate": 0.001, "loss": 1.7139, "step": 556640 }, { "epoch": 48.02415458937198, "grad_norm": 3.3458821773529053, "learning_rate": 0.001, "loss": 1.7082, "step": 556696 }, { "epoch": 48.028985507246375, "grad_norm": 4.041109561920166, "learning_rate": 0.001, "loss": 1.728, "step": 556752 }, { "epoch": 48.033816425120776, "grad_norm": 2.4486095905303955, "learning_rate": 0.001, "loss": 1.7207, "step": 556808 }, { "epoch": 48.03864734299517, "grad_norm": 4.796956539154053, "learning_rate": 0.001, "loss": 1.7232, "step": 556864 }, { "epoch": 48.04347826086956, "grad_norm": 0.46816253662109375, "learning_rate": 0.001, "loss": 1.7198, "step": 556920 }, { "epoch": 48.04830917874396, "grad_norm": 0.3836163282394409, "learning_rate": 0.001, "loss": 1.7177, "step": 556976 }, { "epoch": 48.05314009661836, "grad_norm": 1.2982035875320435, "learning_rate": 0.001, "loss": 1.7114, "step": 557032 }, { "epoch": 48.05797101449275, "grad_norm": 0.3175109326839447, "learning_rate": 0.001, "loss": 1.7396, "step": 557088 }, { "epoch": 48.06280193236715, "grad_norm": 4.622042179107666, "learning_rate": 0.001, "loss": 1.7247, "step": 557144 }, { "epoch": 48.067632850241544, "grad_norm": 0.27719756960868835, "learning_rate": 0.001, "loss": 1.7267, "step": 557200 }, { "epoch": 48.072463768115945, "grad_norm": 0.2828962206840515, "learning_rate": 0.001, "loss": 1.7265, "step": 557256 }, { "epoch": 48.07729468599034, "grad_norm": 0.36798495054244995, "learning_rate": 0.001, "loss": 1.7219, "step": 557312 }, { "epoch": 48.08212560386473, "grad_norm": 0.2621324956417084, "learning_rate": 0.001, "loss": 1.7188, "step": 557368 }, { "epoch": 48.08695652173913, "grad_norm": 0.471993625164032, "learning_rate": 0.001, "loss": 1.7236, "step": 557424 }, { "epoch": 48.091787439613526, "grad_norm": 0.6498216986656189, "learning_rate": 0.001, "loss": 1.7256, "step": 557480 }, { "epoch": 48.09661835748792, "grad_norm": 0.9547773599624634, "learning_rate": 0.001, "loss": 1.719, "step": 557536 }, { "epoch": 48.10144927536232, "grad_norm": 0.904961109161377, "learning_rate": 0.001, "loss": 1.7269, "step": 557592 }, { "epoch": 48.106280193236714, "grad_norm": 0.3333018124103546, "learning_rate": 0.001, "loss": 1.7224, "step": 557648 }, { "epoch": 48.111111111111114, "grad_norm": 0.2940675616264343, "learning_rate": 0.001, "loss": 1.7216, "step": 557704 }, { "epoch": 48.11594202898551, "grad_norm": 0.3581346869468689, "learning_rate": 0.001, "loss": 1.7253, "step": 557760 }, { "epoch": 48.1207729468599, "grad_norm": 2.423856735229492, "learning_rate": 0.001, "loss": 1.7238, "step": 557816 }, { "epoch": 48.1256038647343, "grad_norm": 0.5842982530593872, "learning_rate": 0.001, "loss": 1.7433, "step": 557872 }, { "epoch": 48.130434782608695, "grad_norm": 1.020571231842041, "learning_rate": 0.001, "loss": 1.746, "step": 557928 }, { "epoch": 48.13526570048309, "grad_norm": 2.6577956676483154, "learning_rate": 0.001, "loss": 1.7533, "step": 557984 }, { "epoch": 48.14009661835749, "grad_norm": 0.7910611033439636, "learning_rate": 0.001, "loss": 1.7539, "step": 558040 }, { "epoch": 48.14492753623188, "grad_norm": 0.5281345248222351, "learning_rate": 0.001, "loss": 1.7597, "step": 558096 }, { "epoch": 48.14975845410628, "grad_norm": 1.455918312072754, "learning_rate": 0.001, "loss": 1.7369, "step": 558152 }, { "epoch": 48.15458937198068, "grad_norm": 0.8949226140975952, "learning_rate": 0.001, "loss": 1.7358, "step": 558208 }, { "epoch": 48.15942028985507, "grad_norm": 2.0396342277526855, "learning_rate": 0.001, "loss": 1.7451, "step": 558264 }, { "epoch": 48.16425120772947, "grad_norm": 0.5577055215835571, "learning_rate": 0.001, "loss": 1.7322, "step": 558320 }, { "epoch": 48.169082125603865, "grad_norm": 4.517914295196533, "learning_rate": 0.001, "loss": 1.736, "step": 558376 }, { "epoch": 48.17391304347826, "grad_norm": 1.4687488079071045, "learning_rate": 0.001, "loss": 1.7373, "step": 558432 }, { "epoch": 48.17874396135266, "grad_norm": 0.3286985456943512, "learning_rate": 0.001, "loss": 1.734, "step": 558488 }, { "epoch": 48.18357487922705, "grad_norm": 1.4441635608673096, "learning_rate": 0.001, "loss": 1.7384, "step": 558544 }, { "epoch": 48.18840579710145, "grad_norm": 0.964182436466217, "learning_rate": 0.001, "loss": 1.7405, "step": 558600 }, { "epoch": 48.193236714975846, "grad_norm": 1.1161614656448364, "learning_rate": 0.001, "loss": 1.7311, "step": 558656 }, { "epoch": 48.19806763285024, "grad_norm": 2.071441173553467, "learning_rate": 0.001, "loss": 1.725, "step": 558712 }, { "epoch": 48.20289855072464, "grad_norm": 0.32621249556541443, "learning_rate": 0.001, "loss": 1.727, "step": 558768 }, { "epoch": 48.207729468599034, "grad_norm": 0.7437166571617126, "learning_rate": 0.001, "loss": 1.7314, "step": 558824 }, { "epoch": 48.21256038647343, "grad_norm": 2.8595383167266846, "learning_rate": 0.001, "loss": 1.7421, "step": 558880 }, { "epoch": 48.21739130434783, "grad_norm": 0.6153905987739563, "learning_rate": 0.001, "loss": 1.7388, "step": 558936 }, { "epoch": 48.22222222222222, "grad_norm": 0.3360987901687622, "learning_rate": 0.001, "loss": 1.7218, "step": 558992 }, { "epoch": 48.227053140096615, "grad_norm": 3.065673828125, "learning_rate": 0.001, "loss": 1.7176, "step": 559048 }, { "epoch": 48.231884057971016, "grad_norm": 0.512309193611145, "learning_rate": 0.001, "loss": 1.7125, "step": 559104 }, { "epoch": 48.23671497584541, "grad_norm": 0.3625718951225281, "learning_rate": 0.001, "loss": 1.7143, "step": 559160 }, { "epoch": 48.24154589371981, "grad_norm": 0.4617823660373688, "learning_rate": 0.001, "loss": 1.7206, "step": 559216 }, { "epoch": 48.2463768115942, "grad_norm": 0.7270511388778687, "learning_rate": 0.001, "loss": 1.7171, "step": 559272 }, { "epoch": 48.2512077294686, "grad_norm": 0.46190470457077026, "learning_rate": 0.001, "loss": 1.714, "step": 559328 }, { "epoch": 48.256038647343, "grad_norm": 0.7493250370025635, "learning_rate": 0.001, "loss": 1.7245, "step": 559384 }, { "epoch": 48.26086956521739, "grad_norm": 1.0108582973480225, "learning_rate": 0.001, "loss": 1.7223, "step": 559440 }, { "epoch": 48.265700483091784, "grad_norm": 3.6153907775878906, "learning_rate": 0.001, "loss": 1.7162, "step": 559496 }, { "epoch": 48.270531400966185, "grad_norm": 0.6265458464622498, "learning_rate": 0.001, "loss": 1.7295, "step": 559552 }, { "epoch": 48.27536231884058, "grad_norm": 0.45238104462623596, "learning_rate": 0.001, "loss": 1.7308, "step": 559608 }, { "epoch": 48.28019323671498, "grad_norm": 0.5736666917800903, "learning_rate": 0.001, "loss": 1.7225, "step": 559664 }, { "epoch": 48.28502415458937, "grad_norm": 0.2857624292373657, "learning_rate": 0.001, "loss": 1.729, "step": 559720 }, { "epoch": 48.289855072463766, "grad_norm": 0.4967600703239441, "learning_rate": 0.001, "loss": 1.7281, "step": 559776 }, { "epoch": 48.29468599033817, "grad_norm": 0.339504212141037, "learning_rate": 0.001, "loss": 1.7297, "step": 559832 }, { "epoch": 48.29951690821256, "grad_norm": 3.0593347549438477, "learning_rate": 0.001, "loss": 1.739, "step": 559888 }, { "epoch": 48.30434782608695, "grad_norm": 0.3582685589790344, "learning_rate": 0.001, "loss": 1.7407, "step": 559944 }, { "epoch": 48.309178743961354, "grad_norm": 0.45325610041618347, "learning_rate": 0.001, "loss": 1.736, "step": 560000 }, { "epoch": 48.31400966183575, "grad_norm": 0.7752774357795715, "learning_rate": 0.001, "loss": 1.7346, "step": 560056 }, { "epoch": 48.31884057971015, "grad_norm": 3.353123664855957, "learning_rate": 0.001, "loss": 1.7296, "step": 560112 }, { "epoch": 48.32367149758454, "grad_norm": 22.690156936645508, "learning_rate": 0.001, "loss": 1.7245, "step": 560168 }, { "epoch": 48.328502415458935, "grad_norm": 0.4600237011909485, "learning_rate": 0.001, "loss": 1.7328, "step": 560224 }, { "epoch": 48.333333333333336, "grad_norm": 0.3787469267845154, "learning_rate": 0.001, "loss": 1.7213, "step": 560280 }, { "epoch": 48.33816425120773, "grad_norm": 0.3996583819389343, "learning_rate": 0.001, "loss": 1.72, "step": 560336 }, { "epoch": 48.34299516908212, "grad_norm": 0.34201109409332275, "learning_rate": 0.001, "loss": 1.7196, "step": 560392 }, { "epoch": 48.34782608695652, "grad_norm": 0.4338032603263855, "learning_rate": 0.001, "loss": 1.7309, "step": 560448 }, { "epoch": 48.35265700483092, "grad_norm": 1.1121406555175781, "learning_rate": 0.001, "loss": 1.7262, "step": 560504 }, { "epoch": 48.35748792270532, "grad_norm": 0.32640036940574646, "learning_rate": 0.001, "loss": 1.7242, "step": 560560 }, { "epoch": 48.36231884057971, "grad_norm": 3.5201947689056396, "learning_rate": 0.001, "loss": 1.7222, "step": 560616 }, { "epoch": 48.367149758454104, "grad_norm": 2.2100040912628174, "learning_rate": 0.001, "loss": 1.7124, "step": 560672 }, { "epoch": 48.371980676328505, "grad_norm": 3.5138890743255615, "learning_rate": 0.001, "loss": 1.7193, "step": 560728 }, { "epoch": 48.3768115942029, "grad_norm": 0.6353521347045898, "learning_rate": 0.001, "loss": 1.7266, "step": 560784 }, { "epoch": 48.38164251207729, "grad_norm": 0.39309829473495483, "learning_rate": 0.001, "loss": 1.7168, "step": 560840 }, { "epoch": 48.38647342995169, "grad_norm": 0.8769246935844421, "learning_rate": 0.001, "loss": 1.7295, "step": 560896 }, { "epoch": 48.391304347826086, "grad_norm": 1.155418872833252, "learning_rate": 0.001, "loss": 1.7255, "step": 560952 }, { "epoch": 48.39613526570048, "grad_norm": 0.478248655796051, "learning_rate": 0.001, "loss": 1.7193, "step": 561008 }, { "epoch": 48.40096618357488, "grad_norm": 0.8789826035499573, "learning_rate": 0.001, "loss": 1.7315, "step": 561064 }, { "epoch": 48.405797101449274, "grad_norm": 0.47951775789260864, "learning_rate": 0.001, "loss": 1.7256, "step": 561120 }, { "epoch": 48.410628019323674, "grad_norm": 0.5099565386772156, "learning_rate": 0.001, "loss": 1.7193, "step": 561176 }, { "epoch": 48.41545893719807, "grad_norm": 1.1364742517471313, "learning_rate": 0.001, "loss": 1.7286, "step": 561232 }, { "epoch": 48.42028985507246, "grad_norm": 5.860898017883301, "learning_rate": 0.001, "loss": 1.7191, "step": 561288 }, { "epoch": 48.42512077294686, "grad_norm": 0.3071814477443695, "learning_rate": 0.001, "loss": 1.7247, "step": 561344 }, { "epoch": 48.429951690821255, "grad_norm": 0.2678964138031006, "learning_rate": 0.001, "loss": 1.7182, "step": 561400 }, { "epoch": 48.43478260869565, "grad_norm": 0.5519323945045471, "learning_rate": 0.001, "loss": 1.7203, "step": 561456 }, { "epoch": 48.43961352657005, "grad_norm": 0.9246322512626648, "learning_rate": 0.001, "loss": 1.7225, "step": 561512 }, { "epoch": 48.44444444444444, "grad_norm": 0.350729376077652, "learning_rate": 0.001, "loss": 1.7248, "step": 561568 }, { "epoch": 48.44927536231884, "grad_norm": 3.221393346786499, "learning_rate": 0.001, "loss": 1.7186, "step": 561624 }, { "epoch": 48.45410628019324, "grad_norm": 0.2818219065666199, "learning_rate": 0.001, "loss": 1.7366, "step": 561680 }, { "epoch": 48.45893719806763, "grad_norm": 0.3236127197742462, "learning_rate": 0.001, "loss": 1.7378, "step": 561736 }, { "epoch": 48.46376811594203, "grad_norm": 0.8377506732940674, "learning_rate": 0.001, "loss": 1.7331, "step": 561792 }, { "epoch": 48.468599033816425, "grad_norm": 0.2894747853279114, "learning_rate": 0.001, "loss": 1.7384, "step": 561848 }, { "epoch": 48.47342995169082, "grad_norm": 0.23993971943855286, "learning_rate": 0.001, "loss": 1.7274, "step": 561904 }, { "epoch": 48.47826086956522, "grad_norm": 18.647411346435547, "learning_rate": 0.001, "loss": 1.7194, "step": 561960 }, { "epoch": 48.48309178743961, "grad_norm": 1.3642023801803589, "learning_rate": 0.001, "loss": 1.7371, "step": 562016 }, { "epoch": 48.48792270531401, "grad_norm": 1.2759517431259155, "learning_rate": 0.001, "loss": 1.7269, "step": 562072 }, { "epoch": 48.492753623188406, "grad_norm": 1.9285154342651367, "learning_rate": 0.001, "loss": 1.7282, "step": 562128 }, { "epoch": 48.4975845410628, "grad_norm": 0.509515643119812, "learning_rate": 0.001, "loss": 1.7214, "step": 562184 }, { "epoch": 48.5024154589372, "grad_norm": 0.31755873560905457, "learning_rate": 0.001, "loss": 1.7186, "step": 562240 }, { "epoch": 48.507246376811594, "grad_norm": 1.1850541830062866, "learning_rate": 0.001, "loss": 1.7084, "step": 562296 }, { "epoch": 48.51207729468599, "grad_norm": 1.3775087594985962, "learning_rate": 0.001, "loss": 1.7238, "step": 562352 }, { "epoch": 48.51690821256039, "grad_norm": 1.3933364152908325, "learning_rate": 0.001, "loss": 1.7394, "step": 562408 }, { "epoch": 48.52173913043478, "grad_norm": 1.7489244937896729, "learning_rate": 0.001, "loss": 1.7362, "step": 562464 }, { "epoch": 48.52657004830918, "grad_norm": 0.9871833920478821, "learning_rate": 0.001, "loss": 1.7418, "step": 562520 }, { "epoch": 48.531400966183575, "grad_norm": 2.2161624431610107, "learning_rate": 0.001, "loss": 1.7406, "step": 562576 }, { "epoch": 48.53623188405797, "grad_norm": 0.2898215353488922, "learning_rate": 0.001, "loss": 1.7387, "step": 562632 }, { "epoch": 48.54106280193237, "grad_norm": 4.362494945526123, "learning_rate": 0.001, "loss": 1.7382, "step": 562688 }, { "epoch": 48.54589371980676, "grad_norm": 0.8695025444030762, "learning_rate": 0.001, "loss": 1.7449, "step": 562744 }, { "epoch": 48.55072463768116, "grad_norm": 0.610435962677002, "learning_rate": 0.001, "loss": 1.7422, "step": 562800 }, { "epoch": 48.55555555555556, "grad_norm": 1.1652413606643677, "learning_rate": 0.001, "loss": 1.7444, "step": 562856 }, { "epoch": 48.56038647342995, "grad_norm": 0.2796342968940735, "learning_rate": 0.001, "loss": 1.7417, "step": 562912 }, { "epoch": 48.56521739130435, "grad_norm": 0.6395411491394043, "learning_rate": 0.001, "loss": 1.741, "step": 562968 }, { "epoch": 48.570048309178745, "grad_norm": 3.499129295349121, "learning_rate": 0.001, "loss": 1.7342, "step": 563024 }, { "epoch": 48.57487922705314, "grad_norm": 0.3522055149078369, "learning_rate": 0.001, "loss": 1.7374, "step": 563080 }, { "epoch": 48.57971014492754, "grad_norm": 0.4047297239303589, "learning_rate": 0.001, "loss": 1.7255, "step": 563136 }, { "epoch": 48.58454106280193, "grad_norm": 0.3754846751689911, "learning_rate": 0.001, "loss": 1.73, "step": 563192 }, { "epoch": 48.589371980676326, "grad_norm": 0.2958768606185913, "learning_rate": 0.001, "loss": 1.7374, "step": 563248 }, { "epoch": 48.594202898550726, "grad_norm": 1.9761005640029907, "learning_rate": 0.001, "loss": 1.7299, "step": 563304 }, { "epoch": 48.59903381642512, "grad_norm": 0.9507854580879211, "learning_rate": 0.001, "loss": 1.7302, "step": 563360 }, { "epoch": 48.60386473429952, "grad_norm": 0.672998309135437, "learning_rate": 0.001, "loss": 1.7193, "step": 563416 }, { "epoch": 48.608695652173914, "grad_norm": 0.28299403190612793, "learning_rate": 0.001, "loss": 1.7179, "step": 563472 }, { "epoch": 48.61352657004831, "grad_norm": 0.9326180815696716, "learning_rate": 0.001, "loss": 1.733, "step": 563528 }, { "epoch": 48.61835748792271, "grad_norm": 0.8623196482658386, "learning_rate": 0.001, "loss": 1.7325, "step": 563584 }, { "epoch": 48.6231884057971, "grad_norm": 0.30137598514556885, "learning_rate": 0.001, "loss": 1.7259, "step": 563640 }, { "epoch": 48.628019323671495, "grad_norm": 7.764924049377441, "learning_rate": 0.001, "loss": 1.7308, "step": 563696 }, { "epoch": 48.632850241545896, "grad_norm": 0.28031742572784424, "learning_rate": 0.001, "loss": 1.7154, "step": 563752 }, { "epoch": 48.63768115942029, "grad_norm": 1.5746824741363525, "learning_rate": 0.001, "loss": 1.717, "step": 563808 }, { "epoch": 48.64251207729468, "grad_norm": 0.2937065362930298, "learning_rate": 0.001, "loss": 1.7229, "step": 563864 }, { "epoch": 48.64734299516908, "grad_norm": 0.23285731673240662, "learning_rate": 0.001, "loss": 1.7159, "step": 563920 }, { "epoch": 48.65217391304348, "grad_norm": 0.27084076404571533, "learning_rate": 0.001, "loss": 1.7165, "step": 563976 }, { "epoch": 48.65700483091788, "grad_norm": 3.0396528244018555, "learning_rate": 0.001, "loss": 1.7235, "step": 564032 }, { "epoch": 48.66183574879227, "grad_norm": 0.2727288007736206, "learning_rate": 0.001, "loss": 1.7159, "step": 564088 }, { "epoch": 48.666666666666664, "grad_norm": 0.7118105292320251, "learning_rate": 0.001, "loss": 1.7382, "step": 564144 }, { "epoch": 48.671497584541065, "grad_norm": 0.3286385238170624, "learning_rate": 0.001, "loss": 1.7392, "step": 564200 }, { "epoch": 48.67632850241546, "grad_norm": 0.7671298980712891, "learning_rate": 0.001, "loss": 1.7346, "step": 564256 }, { "epoch": 48.68115942028985, "grad_norm": 1.0957244634628296, "learning_rate": 0.001, "loss": 1.7303, "step": 564312 }, { "epoch": 48.68599033816425, "grad_norm": 0.6638689041137695, "learning_rate": 0.001, "loss": 1.7273, "step": 564368 }, { "epoch": 48.690821256038646, "grad_norm": 1.309252142906189, "learning_rate": 0.001, "loss": 1.7141, "step": 564424 }, { "epoch": 48.69565217391305, "grad_norm": 0.7591997981071472, "learning_rate": 0.001, "loss": 1.7265, "step": 564480 }, { "epoch": 48.70048309178744, "grad_norm": 0.3621346354484558, "learning_rate": 0.001, "loss": 1.7226, "step": 564536 }, { "epoch": 48.70531400966183, "grad_norm": 2.139744520187378, "learning_rate": 0.001, "loss": 1.7339, "step": 564592 }, { "epoch": 48.710144927536234, "grad_norm": 1.14698326587677, "learning_rate": 0.001, "loss": 1.7357, "step": 564648 }, { "epoch": 48.71497584541063, "grad_norm": 0.3161656856536865, "learning_rate": 0.001, "loss": 1.7294, "step": 564704 }, { "epoch": 48.71980676328502, "grad_norm": 1.520839810371399, "learning_rate": 0.001, "loss": 1.7531, "step": 564760 }, { "epoch": 48.72463768115942, "grad_norm": 1.4619337320327759, "learning_rate": 0.001, "loss": 1.7615, "step": 564816 }, { "epoch": 48.729468599033815, "grad_norm": 0.27585041522979736, "learning_rate": 0.001, "loss": 1.7472, "step": 564872 }, { "epoch": 48.734299516908216, "grad_norm": 0.7427679300308228, "learning_rate": 0.001, "loss": 1.7485, "step": 564928 }, { "epoch": 48.73913043478261, "grad_norm": 0.36053940653800964, "learning_rate": 0.001, "loss": 1.7474, "step": 564984 }, { "epoch": 48.743961352657, "grad_norm": 0.31082770228385925, "learning_rate": 0.001, "loss": 1.74, "step": 565040 }, { "epoch": 48.7487922705314, "grad_norm": 0.5567958950996399, "learning_rate": 0.001, "loss": 1.7454, "step": 565096 }, { "epoch": 48.7536231884058, "grad_norm": 3.210038185119629, "learning_rate": 0.001, "loss": 1.7451, "step": 565152 }, { "epoch": 48.75845410628019, "grad_norm": 0.27859172224998474, "learning_rate": 0.001, "loss": 1.7382, "step": 565208 }, { "epoch": 48.76328502415459, "grad_norm": 0.35330328345298767, "learning_rate": 0.001, "loss": 1.7415, "step": 565264 }, { "epoch": 48.768115942028984, "grad_norm": 0.25735074281692505, "learning_rate": 0.001, "loss": 1.7331, "step": 565320 }, { "epoch": 48.772946859903385, "grad_norm": 1.1551278829574585, "learning_rate": 0.001, "loss": 1.7343, "step": 565376 }, { "epoch": 48.77777777777778, "grad_norm": 4.618515968322754, "learning_rate": 0.001, "loss": 1.7353, "step": 565432 }, { "epoch": 48.78260869565217, "grad_norm": 0.4553910791873932, "learning_rate": 0.001, "loss": 1.7381, "step": 565488 }, { "epoch": 48.78743961352657, "grad_norm": 1.0614721775054932, "learning_rate": 0.001, "loss": 1.7347, "step": 565544 }, { "epoch": 48.792270531400966, "grad_norm": 0.30600351095199585, "learning_rate": 0.001, "loss": 1.7432, "step": 565600 }, { "epoch": 48.79710144927536, "grad_norm": 0.9706910848617554, "learning_rate": 0.001, "loss": 1.7419, "step": 565656 }, { "epoch": 48.80193236714976, "grad_norm": 0.7892006635665894, "learning_rate": 0.001, "loss": 1.7442, "step": 565712 }, { "epoch": 48.806763285024154, "grad_norm": 0.31674396991729736, "learning_rate": 0.001, "loss": 1.7409, "step": 565768 }, { "epoch": 48.81159420289855, "grad_norm": 0.873042106628418, "learning_rate": 0.001, "loss": 1.7434, "step": 565824 }, { "epoch": 48.81642512077295, "grad_norm": 0.24458180367946625, "learning_rate": 0.001, "loss": 1.7448, "step": 565880 }, { "epoch": 48.82125603864734, "grad_norm": 0.7202498912811279, "learning_rate": 0.001, "loss": 1.7505, "step": 565936 }, { "epoch": 48.82608695652174, "grad_norm": 0.49932006001472473, "learning_rate": 0.001, "loss": 1.7559, "step": 565992 }, { "epoch": 48.830917874396135, "grad_norm": 0.266579806804657, "learning_rate": 0.001, "loss": 1.7474, "step": 566048 }, { "epoch": 48.83574879227053, "grad_norm": 0.8755822777748108, "learning_rate": 0.001, "loss": 1.7364, "step": 566104 }, { "epoch": 48.84057971014493, "grad_norm": 0.49510353803634644, "learning_rate": 0.001, "loss": 1.7396, "step": 566160 }, { "epoch": 48.84541062801932, "grad_norm": 1.3598757982254028, "learning_rate": 0.001, "loss": 1.7399, "step": 566216 }, { "epoch": 48.85024154589372, "grad_norm": 0.5199295878410339, "learning_rate": 0.001, "loss": 1.7408, "step": 566272 }, { "epoch": 48.85507246376812, "grad_norm": 0.2606726288795471, "learning_rate": 0.001, "loss": 1.7495, "step": 566328 }, { "epoch": 48.85990338164251, "grad_norm": 2.651616096496582, "learning_rate": 0.001, "loss": 1.7447, "step": 566384 }, { "epoch": 48.86473429951691, "grad_norm": 0.2846638858318329, "learning_rate": 0.001, "loss": 1.7244, "step": 566440 }, { "epoch": 48.869565217391305, "grad_norm": 0.7517786622047424, "learning_rate": 0.001, "loss": 1.728, "step": 566496 }, { "epoch": 48.8743961352657, "grad_norm": 0.5236542224884033, "learning_rate": 0.001, "loss": 1.7325, "step": 566552 }, { "epoch": 48.8792270531401, "grad_norm": 5.7897868156433105, "learning_rate": 0.001, "loss": 1.7352, "step": 566608 }, { "epoch": 48.88405797101449, "grad_norm": 1.2427327632904053, "learning_rate": 0.001, "loss": 1.741, "step": 566664 }, { "epoch": 48.888888888888886, "grad_norm": 0.5720710754394531, "learning_rate": 0.001, "loss": 1.7366, "step": 566720 }, { "epoch": 48.893719806763286, "grad_norm": 0.4397614598274231, "learning_rate": 0.001, "loss": 1.7368, "step": 566776 }, { "epoch": 48.89855072463768, "grad_norm": 0.35932597517967224, "learning_rate": 0.001, "loss": 1.739, "step": 566832 }, { "epoch": 48.90338164251208, "grad_norm": 0.36754271388053894, "learning_rate": 0.001, "loss": 1.742, "step": 566888 }, { "epoch": 48.908212560386474, "grad_norm": 1.1754306554794312, "learning_rate": 0.001, "loss": 1.7379, "step": 566944 }, { "epoch": 48.91304347826087, "grad_norm": 0.3826800584793091, "learning_rate": 0.001, "loss": 1.7385, "step": 567000 }, { "epoch": 48.91787439613527, "grad_norm": 0.24651701748371124, "learning_rate": 0.001, "loss": 1.7326, "step": 567056 }, { "epoch": 48.92270531400966, "grad_norm": 0.5642040967941284, "learning_rate": 0.001, "loss": 1.7325, "step": 567112 }, { "epoch": 48.927536231884055, "grad_norm": 0.3014202117919922, "learning_rate": 0.001, "loss": 1.7398, "step": 567168 }, { "epoch": 48.932367149758456, "grad_norm": 0.438322514295578, "learning_rate": 0.001, "loss": 1.7245, "step": 567224 }, { "epoch": 48.93719806763285, "grad_norm": 0.35445329546928406, "learning_rate": 0.001, "loss": 1.7346, "step": 567280 }, { "epoch": 48.94202898550725, "grad_norm": 0.3005673289299011, "learning_rate": 0.001, "loss": 1.7378, "step": 567336 }, { "epoch": 48.94685990338164, "grad_norm": 0.6546096205711365, "learning_rate": 0.001, "loss": 1.7342, "step": 567392 }, { "epoch": 48.95169082125604, "grad_norm": 0.28775474429130554, "learning_rate": 0.001, "loss": 1.733, "step": 567448 }, { "epoch": 48.95652173913044, "grad_norm": 32.09855651855469, "learning_rate": 0.001, "loss": 1.727, "step": 567504 }, { "epoch": 48.96135265700483, "grad_norm": 0.304426908493042, "learning_rate": 0.001, "loss": 1.736, "step": 567560 }, { "epoch": 48.966183574879224, "grad_norm": 0.29832354187965393, "learning_rate": 0.001, "loss": 1.742, "step": 567616 }, { "epoch": 48.971014492753625, "grad_norm": 0.4310396611690521, "learning_rate": 0.001, "loss": 1.7321, "step": 567672 }, { "epoch": 48.97584541062802, "grad_norm": 0.36771413683891296, "learning_rate": 0.001, "loss": 1.7349, "step": 567728 }, { "epoch": 48.98067632850242, "grad_norm": 0.60688316822052, "learning_rate": 0.001, "loss": 1.7329, "step": 567784 }, { "epoch": 48.98550724637681, "grad_norm": 4.7237019538879395, "learning_rate": 0.001, "loss": 1.7397, "step": 567840 }, { "epoch": 48.990338164251206, "grad_norm": 0.3570004999637604, "learning_rate": 0.001, "loss": 1.7487, "step": 567896 }, { "epoch": 48.99516908212561, "grad_norm": 0.3104717433452606, "learning_rate": 0.001, "loss": 1.7467, "step": 567952 }, { "epoch": 49.0, "grad_norm": 0.5126457214355469, "learning_rate": 0.001, "loss": 1.7367, "step": 568008 }, { "epoch": 49.00483091787439, "grad_norm": 0.5701302289962769, "learning_rate": 0.001, "loss": 1.7067, "step": 568064 }, { "epoch": 49.009661835748794, "grad_norm": 3.032111406326294, "learning_rate": 0.001, "loss": 1.704, "step": 568120 }, { "epoch": 49.01449275362319, "grad_norm": 1.6264134645462036, "learning_rate": 0.001, "loss": 1.7269, "step": 568176 }, { "epoch": 49.01932367149758, "grad_norm": 0.37111741304397583, "learning_rate": 0.001, "loss": 1.7134, "step": 568232 }, { "epoch": 49.02415458937198, "grad_norm": 0.3367540240287781, "learning_rate": 0.001, "loss": 1.7019, "step": 568288 }, { "epoch": 49.028985507246375, "grad_norm": 0.6716726422309875, "learning_rate": 0.001, "loss": 1.7177, "step": 568344 }, { "epoch": 49.033816425120776, "grad_norm": 0.5578674674034119, "learning_rate": 0.001, "loss": 1.7171, "step": 568400 }, { "epoch": 49.03864734299517, "grad_norm": 0.2774312198162079, "learning_rate": 0.001, "loss": 1.7078, "step": 568456 }, { "epoch": 49.04347826086956, "grad_norm": 0.3385161757469177, "learning_rate": 0.001, "loss": 1.7065, "step": 568512 }, { "epoch": 49.04830917874396, "grad_norm": 1.0968073606491089, "learning_rate": 0.001, "loss": 1.7201, "step": 568568 }, { "epoch": 49.05314009661836, "grad_norm": 2.4758543968200684, "learning_rate": 0.001, "loss": 1.711, "step": 568624 }, { "epoch": 49.05797101449275, "grad_norm": 27.111942291259766, "learning_rate": 0.001, "loss": 1.7196, "step": 568680 }, { "epoch": 49.06280193236715, "grad_norm": 0.7593479156494141, "learning_rate": 0.001, "loss": 1.7196, "step": 568736 }, { "epoch": 49.067632850241544, "grad_norm": 17.206069946289062, "learning_rate": 0.001, "loss": 1.729, "step": 568792 }, { "epoch": 49.072463768115945, "grad_norm": 0.28531450033187866, "learning_rate": 0.001, "loss": 1.737, "step": 568848 }, { "epoch": 49.07729468599034, "grad_norm": 2.2749199867248535, "learning_rate": 0.001, "loss": 1.7353, "step": 568904 }, { "epoch": 49.08212560386473, "grad_norm": 1.751023769378662, "learning_rate": 0.001, "loss": 1.7164, "step": 568960 }, { "epoch": 49.08695652173913, "grad_norm": 3.920602321624756, "learning_rate": 0.001, "loss": 1.7172, "step": 569016 }, { "epoch": 49.091787439613526, "grad_norm": 0.4069245457649231, "learning_rate": 0.001, "loss": 1.7188, "step": 569072 }, { "epoch": 49.09661835748792, "grad_norm": 0.28642383217811584, "learning_rate": 0.001, "loss": 1.7196, "step": 569128 }, { "epoch": 49.10144927536232, "grad_norm": 1.017494797706604, "learning_rate": 0.001, "loss": 1.717, "step": 569184 }, { "epoch": 49.106280193236714, "grad_norm": 55.02179718017578, "learning_rate": 0.001, "loss": 1.7366, "step": 569240 }, { "epoch": 49.111111111111114, "grad_norm": 0.4570782482624054, "learning_rate": 0.001, "loss": 1.7311, "step": 569296 }, { "epoch": 49.11594202898551, "grad_norm": 1.3910317420959473, "learning_rate": 0.001, "loss": 1.733, "step": 569352 }, { "epoch": 49.1207729468599, "grad_norm": 2.5197677612304688, "learning_rate": 0.001, "loss": 1.7288, "step": 569408 }, { "epoch": 49.1256038647343, "grad_norm": 0.28853344917297363, "learning_rate": 0.001, "loss": 1.737, "step": 569464 }, { "epoch": 49.130434782608695, "grad_norm": 0.3744446933269501, "learning_rate": 0.001, "loss": 1.7444, "step": 569520 }, { "epoch": 49.13526570048309, "grad_norm": 1.2704921960830688, "learning_rate": 0.001, "loss": 1.7524, "step": 569576 }, { "epoch": 49.14009661835749, "grad_norm": 0.31797486543655396, "learning_rate": 0.001, "loss": 1.7393, "step": 569632 }, { "epoch": 49.14492753623188, "grad_norm": 0.3209443986415863, "learning_rate": 0.001, "loss": 1.7478, "step": 569688 }, { "epoch": 49.14975845410628, "grad_norm": 0.3100528120994568, "learning_rate": 0.001, "loss": 1.7368, "step": 569744 }, { "epoch": 49.15458937198068, "grad_norm": 0.38484281301498413, "learning_rate": 0.001, "loss": 1.7283, "step": 569800 }, { "epoch": 49.15942028985507, "grad_norm": 3.3547329902648926, "learning_rate": 0.001, "loss": 1.7298, "step": 569856 }, { "epoch": 49.16425120772947, "grad_norm": 1.142453670501709, "learning_rate": 0.001, "loss": 1.7264, "step": 569912 }, { "epoch": 49.169082125603865, "grad_norm": 0.5741724967956543, "learning_rate": 0.001, "loss": 1.7185, "step": 569968 }, { "epoch": 49.17391304347826, "grad_norm": 0.36683547496795654, "learning_rate": 0.001, "loss": 1.7221, "step": 570024 }, { "epoch": 49.17874396135266, "grad_norm": 0.884395956993103, "learning_rate": 0.001, "loss": 1.7208, "step": 570080 }, { "epoch": 49.18357487922705, "grad_norm": 0.671528697013855, "learning_rate": 0.001, "loss": 1.72, "step": 570136 }, { "epoch": 49.18840579710145, "grad_norm": 0.6723687052726746, "learning_rate": 0.001, "loss": 1.7151, "step": 570192 }, { "epoch": 49.193236714975846, "grad_norm": 0.30434340238571167, "learning_rate": 0.001, "loss": 1.7196, "step": 570248 }, { "epoch": 49.19806763285024, "grad_norm": 0.8924299478530884, "learning_rate": 0.001, "loss": 1.7141, "step": 570304 }, { "epoch": 49.20289855072464, "grad_norm": 0.4536803662776947, "learning_rate": 0.001, "loss": 1.717, "step": 570360 }, { "epoch": 49.207729468599034, "grad_norm": 4.203451156616211, "learning_rate": 0.001, "loss": 1.7177, "step": 570416 }, { "epoch": 49.21256038647343, "grad_norm": 0.7614284753799438, "learning_rate": 0.001, "loss": 1.7179, "step": 570472 }, { "epoch": 49.21739130434783, "grad_norm": 1.0777647495269775, "learning_rate": 0.001, "loss": 1.7162, "step": 570528 }, { "epoch": 49.22222222222222, "grad_norm": 0.3041408658027649, "learning_rate": 0.001, "loss": 1.7108, "step": 570584 }, { "epoch": 49.227053140096615, "grad_norm": 1.3315868377685547, "learning_rate": 0.001, "loss": 1.7292, "step": 570640 }, { "epoch": 49.231884057971016, "grad_norm": 8.672272682189941, "learning_rate": 0.001, "loss": 1.7259, "step": 570696 }, { "epoch": 49.23671497584541, "grad_norm": 3.1668944358825684, "learning_rate": 0.001, "loss": 1.7381, "step": 570752 }, { "epoch": 49.24154589371981, "grad_norm": 7.479694843292236, "learning_rate": 0.001, "loss": 1.7296, "step": 570808 }, { "epoch": 49.2463768115942, "grad_norm": 0.46210435032844543, "learning_rate": 0.001, "loss": 1.7243, "step": 570864 }, { "epoch": 49.2512077294686, "grad_norm": 16.812578201293945, "learning_rate": 0.001, "loss": 1.7334, "step": 570920 }, { "epoch": 49.256038647343, "grad_norm": 2.497494697570801, "learning_rate": 0.001, "loss": 1.7318, "step": 570976 }, { "epoch": 49.26086956521739, "grad_norm": 0.4537156820297241, "learning_rate": 0.001, "loss": 1.7361, "step": 571032 }, { "epoch": 49.265700483091784, "grad_norm": 0.8682971000671387, "learning_rate": 0.001, "loss": 1.7264, "step": 571088 }, { "epoch": 49.270531400966185, "grad_norm": 0.8241739273071289, "learning_rate": 0.001, "loss": 1.7369, "step": 571144 }, { "epoch": 49.27536231884058, "grad_norm": 0.5132399201393127, "learning_rate": 0.001, "loss": 1.7273, "step": 571200 }, { "epoch": 49.28019323671498, "grad_norm": 1.0879685878753662, "learning_rate": 0.001, "loss": 1.724, "step": 571256 }, { "epoch": 49.28502415458937, "grad_norm": 0.8041641712188721, "learning_rate": 0.001, "loss": 1.7223, "step": 571312 }, { "epoch": 49.289855072463766, "grad_norm": 0.3210260272026062, "learning_rate": 0.001, "loss": 1.7354, "step": 571368 }, { "epoch": 49.29468599033817, "grad_norm": 0.4556408226490021, "learning_rate": 0.001, "loss": 1.7273, "step": 571424 }, { "epoch": 49.29951690821256, "grad_norm": 0.6946066617965698, "learning_rate": 0.001, "loss": 1.7168, "step": 571480 }, { "epoch": 49.30434782608695, "grad_norm": 0.7491017580032349, "learning_rate": 0.001, "loss": 1.7308, "step": 571536 }, { "epoch": 49.309178743961354, "grad_norm": 1.2588515281677246, "learning_rate": 0.001, "loss": 1.7308, "step": 571592 }, { "epoch": 49.31400966183575, "grad_norm": 0.9831664562225342, "learning_rate": 0.001, "loss": 1.7458, "step": 571648 }, { "epoch": 49.31884057971015, "grad_norm": 1.5311278104782104, "learning_rate": 0.001, "loss": 1.7493, "step": 571704 }, { "epoch": 49.32367149758454, "grad_norm": 1.0603083372116089, "learning_rate": 0.001, "loss": 1.7465, "step": 571760 }, { "epoch": 49.328502415458935, "grad_norm": 1.5105196237564087, "learning_rate": 0.001, "loss": 1.7263, "step": 571816 }, { "epoch": 49.333333333333336, "grad_norm": 0.24933646619319916, "learning_rate": 0.001, "loss": 1.734, "step": 571872 }, { "epoch": 49.33816425120773, "grad_norm": 0.3715778887271881, "learning_rate": 0.001, "loss": 1.7175, "step": 571928 }, { "epoch": 49.34299516908212, "grad_norm": 0.4938346743583679, "learning_rate": 0.001, "loss": 1.7213, "step": 571984 }, { "epoch": 49.34782608695652, "grad_norm": 0.41109439730644226, "learning_rate": 0.001, "loss": 1.7237, "step": 572040 }, { "epoch": 49.35265700483092, "grad_norm": 0.32072731852531433, "learning_rate": 0.001, "loss": 1.7198, "step": 572096 }, { "epoch": 49.35748792270532, "grad_norm": 1.0099269151687622, "learning_rate": 0.001, "loss": 1.7242, "step": 572152 }, { "epoch": 49.36231884057971, "grad_norm": 1.7681523561477661, "learning_rate": 0.001, "loss": 1.7203, "step": 572208 }, { "epoch": 49.367149758454104, "grad_norm": 0.29904553294181824, "learning_rate": 0.001, "loss": 1.7226, "step": 572264 }, { "epoch": 49.371980676328505, "grad_norm": 0.7818116545677185, "learning_rate": 0.001, "loss": 1.7127, "step": 572320 }, { "epoch": 49.3768115942029, "grad_norm": 41.033355712890625, "learning_rate": 0.001, "loss": 1.7182, "step": 572376 }, { "epoch": 49.38164251207729, "grad_norm": 0.4807460606098175, "learning_rate": 0.001, "loss": 1.7292, "step": 572432 }, { "epoch": 49.38647342995169, "grad_norm": 1.3753248453140259, "learning_rate": 0.001, "loss": 1.7265, "step": 572488 }, { "epoch": 49.391304347826086, "grad_norm": 0.5084253549575806, "learning_rate": 0.001, "loss": 1.7346, "step": 572544 }, { "epoch": 49.39613526570048, "grad_norm": 0.31414175033569336, "learning_rate": 0.001, "loss": 1.7317, "step": 572600 }, { "epoch": 49.40096618357488, "grad_norm": 0.25817960500717163, "learning_rate": 0.001, "loss": 1.7314, "step": 572656 }, { "epoch": 49.405797101449274, "grad_norm": 0.3373993933200836, "learning_rate": 0.001, "loss": 1.7361, "step": 572712 }, { "epoch": 49.410628019323674, "grad_norm": 0.27275413274765015, "learning_rate": 0.001, "loss": 1.7333, "step": 572768 }, { "epoch": 49.41545893719807, "grad_norm": 2.0878312587738037, "learning_rate": 0.001, "loss": 1.7205, "step": 572824 }, { "epoch": 49.42028985507246, "grad_norm": 0.6426116228103638, "learning_rate": 0.001, "loss": 1.7071, "step": 572880 }, { "epoch": 49.42512077294686, "grad_norm": 1.2486121654510498, "learning_rate": 0.001, "loss": 1.7166, "step": 572936 }, { "epoch": 49.429951690821255, "grad_norm": 0.33519333600997925, "learning_rate": 0.001, "loss": 1.7117, "step": 572992 }, { "epoch": 49.43478260869565, "grad_norm": 3.0863380432128906, "learning_rate": 0.001, "loss": 1.7228, "step": 573048 }, { "epoch": 49.43961352657005, "grad_norm": 0.2836727797985077, "learning_rate": 0.001, "loss": 1.725, "step": 573104 }, { "epoch": 49.44444444444444, "grad_norm": 1.4792267084121704, "learning_rate": 0.001, "loss": 1.7128, "step": 573160 }, { "epoch": 49.44927536231884, "grad_norm": 0.32153230905532837, "learning_rate": 0.001, "loss": 1.7181, "step": 573216 }, { "epoch": 49.45410628019324, "grad_norm": 0.34001925587654114, "learning_rate": 0.001, "loss": 1.7171, "step": 573272 }, { "epoch": 49.45893719806763, "grad_norm": 0.3107743263244629, "learning_rate": 0.001, "loss": 1.7134, "step": 573328 }, { "epoch": 49.46376811594203, "grad_norm": 0.27029359340667725, "learning_rate": 0.001, "loss": 1.7129, "step": 573384 }, { "epoch": 49.468599033816425, "grad_norm": 0.27811601758003235, "learning_rate": 0.001, "loss": 1.7178, "step": 573440 }, { "epoch": 49.47342995169082, "grad_norm": 0.34726932644844055, "learning_rate": 0.001, "loss": 1.7216, "step": 573496 }, { "epoch": 49.47826086956522, "grad_norm": 0.7316891551017761, "learning_rate": 0.001, "loss": 1.7241, "step": 573552 }, { "epoch": 49.48309178743961, "grad_norm": 0.3362341821193695, "learning_rate": 0.001, "loss": 1.7281, "step": 573608 }, { "epoch": 49.48792270531401, "grad_norm": 0.25283247232437134, "learning_rate": 0.001, "loss": 1.7277, "step": 573664 }, { "epoch": 49.492753623188406, "grad_norm": 0.2722793221473694, "learning_rate": 0.001, "loss": 1.729, "step": 573720 }, { "epoch": 49.4975845410628, "grad_norm": 5.289150238037109, "learning_rate": 0.001, "loss": 1.7175, "step": 573776 }, { "epoch": 49.5024154589372, "grad_norm": 0.46168652176856995, "learning_rate": 0.001, "loss": 1.7115, "step": 573832 }, { "epoch": 49.507246376811594, "grad_norm": 0.33269959688186646, "learning_rate": 0.001, "loss": 1.7157, "step": 573888 }, { "epoch": 49.51207729468599, "grad_norm": 0.26609066128730774, "learning_rate": 0.001, "loss": 1.7176, "step": 573944 }, { "epoch": 49.51690821256039, "grad_norm": 0.3149854838848114, "learning_rate": 0.001, "loss": 1.7106, "step": 574000 }, { "epoch": 49.52173913043478, "grad_norm": 1.7439597845077515, "learning_rate": 0.001, "loss": 1.7219, "step": 574056 }, { "epoch": 49.52657004830918, "grad_norm": 0.3215422034263611, "learning_rate": 0.001, "loss": 1.7182, "step": 574112 }, { "epoch": 49.531400966183575, "grad_norm": 13.473784446716309, "learning_rate": 0.001, "loss": 1.7235, "step": 574168 }, { "epoch": 49.53623188405797, "grad_norm": 0.6081889867782593, "learning_rate": 0.001, "loss": 1.7195, "step": 574224 }, { "epoch": 49.54106280193237, "grad_norm": 0.8500441312789917, "learning_rate": 0.001, "loss": 1.7273, "step": 574280 }, { "epoch": 49.54589371980676, "grad_norm": 0.35605868697166443, "learning_rate": 0.001, "loss": 1.7266, "step": 574336 }, { "epoch": 49.55072463768116, "grad_norm": 0.3302740156650543, "learning_rate": 0.001, "loss": 1.7131, "step": 574392 }, { "epoch": 49.55555555555556, "grad_norm": 0.32609379291534424, "learning_rate": 0.001, "loss": 1.7125, "step": 574448 }, { "epoch": 49.56038647342995, "grad_norm": 3.1898558139801025, "learning_rate": 0.001, "loss": 1.7229, "step": 574504 }, { "epoch": 49.56521739130435, "grad_norm": 0.2807445228099823, "learning_rate": 0.001, "loss": 1.7106, "step": 574560 }, { "epoch": 49.570048309178745, "grad_norm": 0.8145819306373596, "learning_rate": 0.001, "loss": 1.7167, "step": 574616 }, { "epoch": 49.57487922705314, "grad_norm": 0.40503638982772827, "learning_rate": 0.001, "loss": 1.7107, "step": 574672 }, { "epoch": 49.57971014492754, "grad_norm": 0.2745690941810608, "learning_rate": 0.001, "loss": 1.7064, "step": 574728 }, { "epoch": 49.58454106280193, "grad_norm": 1.760508418083191, "learning_rate": 0.001, "loss": 1.7154, "step": 574784 }, { "epoch": 49.589371980676326, "grad_norm": 0.4716476798057556, "learning_rate": 0.001, "loss": 1.7186, "step": 574840 }, { "epoch": 49.594202898550726, "grad_norm": 0.2968688905239105, "learning_rate": 0.001, "loss": 1.7268, "step": 574896 }, { "epoch": 49.59903381642512, "grad_norm": 0.29158657789230347, "learning_rate": 0.001, "loss": 1.7146, "step": 574952 }, { "epoch": 49.60386473429952, "grad_norm": 0.43078359961509705, "learning_rate": 0.001, "loss": 1.7228, "step": 575008 }, { "epoch": 49.608695652173914, "grad_norm": 0.7296096086502075, "learning_rate": 0.001, "loss": 1.7294, "step": 575064 }, { "epoch": 49.61352657004831, "grad_norm": 0.29182037711143494, "learning_rate": 0.001, "loss": 1.7422, "step": 575120 }, { "epoch": 49.61835748792271, "grad_norm": 0.9701921343803406, "learning_rate": 0.001, "loss": 1.7281, "step": 575176 }, { "epoch": 49.6231884057971, "grad_norm": 0.6487398743629456, "learning_rate": 0.001, "loss": 1.7256, "step": 575232 }, { "epoch": 49.628019323671495, "grad_norm": 0.3682495653629303, "learning_rate": 0.001, "loss": 1.7346, "step": 575288 }, { "epoch": 49.632850241545896, "grad_norm": 1.5476772785186768, "learning_rate": 0.001, "loss": 1.738, "step": 575344 }, { "epoch": 49.63768115942029, "grad_norm": 0.6587009429931641, "learning_rate": 0.001, "loss": 1.7442, "step": 575400 }, { "epoch": 49.64251207729468, "grad_norm": 0.34173107147216797, "learning_rate": 0.001, "loss": 1.7477, "step": 575456 }, { "epoch": 49.64734299516908, "grad_norm": 0.46623316407203674, "learning_rate": 0.001, "loss": 1.741, "step": 575512 }, { "epoch": 49.65217391304348, "grad_norm": 0.30779990553855896, "learning_rate": 0.001, "loss": 1.748, "step": 575568 }, { "epoch": 49.65700483091788, "grad_norm": 0.5317642092704773, "learning_rate": 0.001, "loss": 1.7279, "step": 575624 }, { "epoch": 49.66183574879227, "grad_norm": 6.003671169281006, "learning_rate": 0.001, "loss": 1.7246, "step": 575680 }, { "epoch": 49.666666666666664, "grad_norm": 0.277752548456192, "learning_rate": 0.001, "loss": 1.7244, "step": 575736 }, { "epoch": 49.671497584541065, "grad_norm": 0.40121662616729736, "learning_rate": 0.001, "loss": 1.7249, "step": 575792 }, { "epoch": 49.67632850241546, "grad_norm": 2.356654644012451, "learning_rate": 0.001, "loss": 1.7243, "step": 575848 }, { "epoch": 49.68115942028985, "grad_norm": 0.8912389874458313, "learning_rate": 0.001, "loss": 1.7164, "step": 575904 }, { "epoch": 49.68599033816425, "grad_norm": 2.393475294113159, "learning_rate": 0.001, "loss": 1.7197, "step": 575960 }, { "epoch": 49.690821256038646, "grad_norm": 0.28964176774024963, "learning_rate": 0.001, "loss": 1.7297, "step": 576016 }, { "epoch": 49.69565217391305, "grad_norm": 0.33668196201324463, "learning_rate": 0.001, "loss": 1.7243, "step": 576072 }, { "epoch": 49.70048309178744, "grad_norm": 0.3650907278060913, "learning_rate": 0.001, "loss": 1.7324, "step": 576128 }, { "epoch": 49.70531400966183, "grad_norm": 1.5397624969482422, "learning_rate": 0.001, "loss": 1.7293, "step": 576184 }, { "epoch": 49.710144927536234, "grad_norm": 0.7408667802810669, "learning_rate": 0.001, "loss": 1.7283, "step": 576240 }, { "epoch": 49.71497584541063, "grad_norm": 0.46333566308021545, "learning_rate": 0.001, "loss": 1.7232, "step": 576296 }, { "epoch": 49.71980676328502, "grad_norm": 0.44951748847961426, "learning_rate": 0.001, "loss": 1.7216, "step": 576352 }, { "epoch": 49.72463768115942, "grad_norm": 1.2410917282104492, "learning_rate": 0.001, "loss": 1.7242, "step": 576408 }, { "epoch": 49.729468599033815, "grad_norm": 0.4852130115032196, "learning_rate": 0.001, "loss": 1.7204, "step": 576464 }, { "epoch": 49.734299516908216, "grad_norm": 2.499396324157715, "learning_rate": 0.001, "loss": 1.7205, "step": 576520 }, { "epoch": 49.73913043478261, "grad_norm": 0.6509780883789062, "learning_rate": 0.001, "loss": 1.7236, "step": 576576 }, { "epoch": 49.743961352657, "grad_norm": 0.5429540872573853, "learning_rate": 0.001, "loss": 1.7217, "step": 576632 }, { "epoch": 49.7487922705314, "grad_norm": 0.30181884765625, "learning_rate": 0.001, "loss": 1.7106, "step": 576688 }, { "epoch": 49.7536231884058, "grad_norm": 0.37169164419174194, "learning_rate": 0.001, "loss": 1.7248, "step": 576744 }, { "epoch": 49.75845410628019, "grad_norm": 0.29045841097831726, "learning_rate": 0.001, "loss": 1.7087, "step": 576800 }, { "epoch": 49.76328502415459, "grad_norm": 0.47012314200401306, "learning_rate": 0.001, "loss": 1.7141, "step": 576856 }, { "epoch": 49.768115942028984, "grad_norm": 0.32227471470832825, "learning_rate": 0.001, "loss": 1.7174, "step": 576912 }, { "epoch": 49.772946859903385, "grad_norm": 0.2925737202167511, "learning_rate": 0.001, "loss": 1.72, "step": 576968 }, { "epoch": 49.77777777777778, "grad_norm": 0.7784834504127502, "learning_rate": 0.001, "loss": 1.7142, "step": 577024 }, { "epoch": 49.78260869565217, "grad_norm": 0.7404419779777527, "learning_rate": 0.001, "loss": 1.716, "step": 577080 }, { "epoch": 49.78743961352657, "grad_norm": 0.661585807800293, "learning_rate": 0.001, "loss": 1.7264, "step": 577136 }, { "epoch": 49.792270531400966, "grad_norm": 0.3852500915527344, "learning_rate": 0.001, "loss": 1.7265, "step": 577192 }, { "epoch": 49.79710144927536, "grad_norm": 0.48991668224334717, "learning_rate": 0.001, "loss": 1.7351, "step": 577248 }, { "epoch": 49.80193236714976, "grad_norm": 1.069893717765808, "learning_rate": 0.001, "loss": 1.7432, "step": 577304 }, { "epoch": 49.806763285024154, "grad_norm": 0.6361894607543945, "learning_rate": 0.001, "loss": 1.7573, "step": 577360 }, { "epoch": 49.81159420289855, "grad_norm": 0.43160852789878845, "learning_rate": 0.001, "loss": 1.7438, "step": 577416 }, { "epoch": 49.81642512077295, "grad_norm": 0.30941981077194214, "learning_rate": 0.001, "loss": 1.7476, "step": 577472 }, { "epoch": 49.82125603864734, "grad_norm": 0.5266588926315308, "learning_rate": 0.001, "loss": 1.7882, "step": 577528 }, { "epoch": 49.82608695652174, "grad_norm": 2.7150566577911377, "learning_rate": 0.001, "loss": 1.7564, "step": 577584 }, { "epoch": 49.830917874396135, "grad_norm": 0.40805700421333313, "learning_rate": 0.001, "loss": 1.7623, "step": 577640 }, { "epoch": 49.83574879227053, "grad_norm": 0.2916834056377411, "learning_rate": 0.001, "loss": 1.762, "step": 577696 }, { "epoch": 49.84057971014493, "grad_norm": 0.5613381862640381, "learning_rate": 0.001, "loss": 1.7414, "step": 577752 }, { "epoch": 49.84541062801932, "grad_norm": 1.4183069467544556, "learning_rate": 0.001, "loss": 1.7436, "step": 577808 }, { "epoch": 49.85024154589372, "grad_norm": 0.37347427010536194, "learning_rate": 0.001, "loss": 1.7438, "step": 577864 }, { "epoch": 49.85507246376812, "grad_norm": 0.6687127947807312, "learning_rate": 0.001, "loss": 1.7367, "step": 577920 }, { "epoch": 49.85990338164251, "grad_norm": 0.7395805716514587, "learning_rate": 0.001, "loss": 1.7466, "step": 577976 }, { "epoch": 49.86473429951691, "grad_norm": 1.0020817518234253, "learning_rate": 0.001, "loss": 1.7421, "step": 578032 }, { "epoch": 49.869565217391305, "grad_norm": 0.33438247442245483, "learning_rate": 0.001, "loss": 1.7329, "step": 578088 }, { "epoch": 49.8743961352657, "grad_norm": 2.732793092727661, "learning_rate": 0.001, "loss": 1.7407, "step": 578144 }, { "epoch": 49.8792270531401, "grad_norm": 0.3167946934700012, "learning_rate": 0.001, "loss": 1.751, "step": 578200 }, { "epoch": 49.88405797101449, "grad_norm": 0.34583985805511475, "learning_rate": 0.001, "loss": 1.7377, "step": 578256 }, { "epoch": 49.888888888888886, "grad_norm": 10.259955406188965, "learning_rate": 0.001, "loss": 1.7303, "step": 578312 }, { "epoch": 49.893719806763286, "grad_norm": 0.6764304041862488, "learning_rate": 0.001, "loss": 1.7257, "step": 578368 }, { "epoch": 49.89855072463768, "grad_norm": 1.349168300628662, "learning_rate": 0.001, "loss": 1.7378, "step": 578424 }, { "epoch": 49.90338164251208, "grad_norm": 0.29108908772468567, "learning_rate": 0.001, "loss": 1.7459, "step": 578480 }, { "epoch": 49.908212560386474, "grad_norm": 1.0709141492843628, "learning_rate": 0.001, "loss": 1.7554, "step": 578536 }, { "epoch": 49.91304347826087, "grad_norm": 0.3281553089618683, "learning_rate": 0.001, "loss": 1.7655, "step": 578592 }, { "epoch": 49.91787439613527, "grad_norm": 0.9314797520637512, "learning_rate": 0.001, "loss": 1.7524, "step": 578648 }, { "epoch": 49.92270531400966, "grad_norm": 0.557636559009552, "learning_rate": 0.001, "loss": 1.7467, "step": 578704 }, { "epoch": 49.927536231884055, "grad_norm": 1.226763129234314, "learning_rate": 0.001, "loss": 1.739, "step": 578760 }, { "epoch": 49.932367149758456, "grad_norm": 0.3198475241661072, "learning_rate": 0.001, "loss": 1.7438, "step": 578816 }, { "epoch": 49.93719806763285, "grad_norm": 0.2477244883775711, "learning_rate": 0.001, "loss": 1.7395, "step": 578872 }, { "epoch": 49.94202898550725, "grad_norm": 0.30659806728363037, "learning_rate": 0.001, "loss": 1.7443, "step": 578928 }, { "epoch": 49.94685990338164, "grad_norm": 0.5598821640014648, "learning_rate": 0.001, "loss": 1.7497, "step": 578984 }, { "epoch": 49.95169082125604, "grad_norm": 0.30812034010887146, "learning_rate": 0.001, "loss": 1.7421, "step": 579040 }, { "epoch": 49.95652173913044, "grad_norm": 0.8377183079719543, "learning_rate": 0.001, "loss": 1.7308, "step": 579096 }, { "epoch": 49.96135265700483, "grad_norm": 1.4127155542373657, "learning_rate": 0.001, "loss": 1.7338, "step": 579152 }, { "epoch": 49.966183574879224, "grad_norm": 0.30669254064559937, "learning_rate": 0.001, "loss": 1.7293, "step": 579208 }, { "epoch": 49.971014492753625, "grad_norm": 0.42687511444091797, "learning_rate": 0.001, "loss": 1.7349, "step": 579264 }, { "epoch": 49.97584541062802, "grad_norm": 0.7905052304267883, "learning_rate": 0.001, "loss": 1.724, "step": 579320 }, { "epoch": 49.98067632850242, "grad_norm": 2.008997678756714, "learning_rate": 0.001, "loss": 1.7413, "step": 579376 }, { "epoch": 49.98550724637681, "grad_norm": 0.31003424525260925, "learning_rate": 0.001, "loss": 1.7438, "step": 579432 }, { "epoch": 49.990338164251206, "grad_norm": 0.6134177446365356, "learning_rate": 0.001, "loss": 1.7418, "step": 579488 }, { "epoch": 49.99516908212561, "grad_norm": 0.3617597222328186, "learning_rate": 0.001, "loss": 1.7374, "step": 579544 }, { "epoch": 50.0, "grad_norm": 0.2905556559562683, "learning_rate": 0.001, "loss": 1.7342, "step": 579600 }, { "epoch": 50.00483091787439, "grad_norm": 0.35544121265411377, "learning_rate": 0.001, "loss": 1.6903, "step": 579656 }, { "epoch": 50.009661835748794, "grad_norm": 0.2630232870578766, "learning_rate": 0.001, "loss": 1.6972, "step": 579712 }, { "epoch": 50.01449275362319, "grad_norm": 0.47451919317245483, "learning_rate": 0.001, "loss": 1.6964, "step": 579768 }, { "epoch": 50.01932367149758, "grad_norm": 0.3669881820678711, "learning_rate": 0.001, "loss": 1.6913, "step": 579824 }, { "epoch": 50.02415458937198, "grad_norm": 0.3273901641368866, "learning_rate": 0.001, "loss": 1.6886, "step": 579880 }, { "epoch": 50.028985507246375, "grad_norm": 1.2890019416809082, "learning_rate": 0.001, "loss": 1.6905, "step": 579936 }, { "epoch": 50.033816425120776, "grad_norm": 0.32456862926483154, "learning_rate": 0.001, "loss": 1.7024, "step": 579992 }, { "epoch": 50.03864734299517, "grad_norm": 4.109186172485352, "learning_rate": 0.001, "loss": 1.702, "step": 580048 }, { "epoch": 50.04347826086956, "grad_norm": 0.6207583546638489, "learning_rate": 0.001, "loss": 1.6975, "step": 580104 }, { "epoch": 50.04830917874396, "grad_norm": 0.2849295139312744, "learning_rate": 0.001, "loss": 1.6934, "step": 580160 }, { "epoch": 50.05314009661836, "grad_norm": 0.5930573344230652, "learning_rate": 0.001, "loss": 1.6814, "step": 580216 }, { "epoch": 50.05797101449275, "grad_norm": 0.2736207842826843, "learning_rate": 0.001, "loss": 1.6868, "step": 580272 }, { "epoch": 50.06280193236715, "grad_norm": 0.2853984832763672, "learning_rate": 0.001, "loss": 1.6883, "step": 580328 }, { "epoch": 50.067632850241544, "grad_norm": 0.28353506326675415, "learning_rate": 0.001, "loss": 1.6873, "step": 580384 }, { "epoch": 50.072463768115945, "grad_norm": 0.5116716027259827, "learning_rate": 0.001, "loss": 1.6885, "step": 580440 }, { "epoch": 50.07729468599034, "grad_norm": 0.3162793219089508, "learning_rate": 0.001, "loss": 1.6896, "step": 580496 }, { "epoch": 50.08212560386473, "grad_norm": 1.0416921377182007, "learning_rate": 0.001, "loss": 1.6898, "step": 580552 }, { "epoch": 50.08695652173913, "grad_norm": 0.4425957202911377, "learning_rate": 0.001, "loss": 1.6894, "step": 580608 }, { "epoch": 50.091787439613526, "grad_norm": 0.6890107989311218, "learning_rate": 0.001, "loss": 1.6895, "step": 580664 }, { "epoch": 50.09661835748792, "grad_norm": 0.6856723427772522, "learning_rate": 0.001, "loss": 1.6898, "step": 580720 }, { "epoch": 50.10144927536232, "grad_norm": 0.43156948685646057, "learning_rate": 0.001, "loss": 1.6938, "step": 580776 }, { "epoch": 50.106280193236714, "grad_norm": 0.33662086725234985, "learning_rate": 0.001, "loss": 1.6879, "step": 580832 }, { "epoch": 50.111111111111114, "grad_norm": 0.49947431683540344, "learning_rate": 0.001, "loss": 1.6926, "step": 580888 }, { "epoch": 50.11594202898551, "grad_norm": 0.2873063385486603, "learning_rate": 0.001, "loss": 1.6871, "step": 580944 }, { "epoch": 50.1207729468599, "grad_norm": 9.462916374206543, "learning_rate": 0.001, "loss": 1.6916, "step": 581000 }, { "epoch": 50.1256038647343, "grad_norm": 0.3304596245288849, "learning_rate": 0.001, "loss": 1.6896, "step": 581056 }, { "epoch": 50.130434782608695, "grad_norm": 0.3091363310813904, "learning_rate": 0.001, "loss": 1.6902, "step": 581112 }, { "epoch": 50.13526570048309, "grad_norm": 0.7989150285720825, "learning_rate": 0.001, "loss": 1.686, "step": 581168 }, { "epoch": 50.14009661835749, "grad_norm": 0.5335078835487366, "learning_rate": 0.001, "loss": 1.6897, "step": 581224 }, { "epoch": 50.14492753623188, "grad_norm": 1.0843504667282104, "learning_rate": 0.001, "loss": 1.688, "step": 581280 }, { "epoch": 50.14975845410628, "grad_norm": 1.0959315299987793, "learning_rate": 0.001, "loss": 1.6919, "step": 581336 }, { "epoch": 50.15458937198068, "grad_norm": 0.3360099494457245, "learning_rate": 0.001, "loss": 1.6899, "step": 581392 }, { "epoch": 50.15942028985507, "grad_norm": 0.2769608199596405, "learning_rate": 0.001, "loss": 1.687, "step": 581448 }, { "epoch": 50.16425120772947, "grad_norm": 0.3153420090675354, "learning_rate": 0.001, "loss": 1.6983, "step": 581504 }, { "epoch": 50.169082125603865, "grad_norm": 4.138840675354004, "learning_rate": 0.001, "loss": 1.6919, "step": 581560 }, { "epoch": 50.17391304347826, "grad_norm": 0.29385656118392944, "learning_rate": 0.001, "loss": 1.7016, "step": 581616 }, { "epoch": 50.17874396135266, "grad_norm": 0.25755202770233154, "learning_rate": 0.001, "loss": 1.7003, "step": 581672 }, { "epoch": 50.18357487922705, "grad_norm": 0.2635309100151062, "learning_rate": 0.001, "loss": 1.6952, "step": 581728 }, { "epoch": 50.18840579710145, "grad_norm": 0.610984206199646, "learning_rate": 0.001, "loss": 1.6878, "step": 581784 }, { "epoch": 50.193236714975846, "grad_norm": 0.3555743992328644, "learning_rate": 0.001, "loss": 1.693, "step": 581840 }, { "epoch": 50.19806763285024, "grad_norm": 0.7736687064170837, "learning_rate": 0.001, "loss": 1.6909, "step": 581896 }, { "epoch": 50.20289855072464, "grad_norm": 0.46911385655403137, "learning_rate": 0.001, "loss": 1.691, "step": 581952 }, { "epoch": 50.207729468599034, "grad_norm": 0.4877401888370514, "learning_rate": 0.001, "loss": 1.7002, "step": 582008 }, { "epoch": 50.21256038647343, "grad_norm": 0.29058465361595154, "learning_rate": 0.001, "loss": 1.6952, "step": 582064 }, { "epoch": 50.21739130434783, "grad_norm": 0.8059011101722717, "learning_rate": 0.001, "loss": 1.6941, "step": 582120 }, { "epoch": 50.22222222222222, "grad_norm": 0.3804304301738739, "learning_rate": 0.001, "loss": 1.7, "step": 582176 }, { "epoch": 50.227053140096615, "grad_norm": 1.5616884231567383, "learning_rate": 0.001, "loss": 1.7034, "step": 582232 }, { "epoch": 50.231884057971016, "grad_norm": 0.38967716693878174, "learning_rate": 0.001, "loss": 1.7091, "step": 582288 }, { "epoch": 50.23671497584541, "grad_norm": 0.282311350107193, "learning_rate": 0.001, "loss": 1.7041, "step": 582344 }, { "epoch": 50.24154589371981, "grad_norm": 0.27205535769462585, "learning_rate": 0.001, "loss": 1.7017, "step": 582400 }, { "epoch": 50.2463768115942, "grad_norm": 0.3270668387413025, "learning_rate": 0.001, "loss": 1.708, "step": 582456 }, { "epoch": 50.2512077294686, "grad_norm": 0.32446783781051636, "learning_rate": 0.001, "loss": 1.7113, "step": 582512 }, { "epoch": 50.256038647343, "grad_norm": 3.472214937210083, "learning_rate": 0.001, "loss": 1.7026, "step": 582568 }, { "epoch": 50.26086956521739, "grad_norm": 0.3483370542526245, "learning_rate": 0.001, "loss": 1.6914, "step": 582624 }, { "epoch": 50.265700483091784, "grad_norm": 0.49339285492897034, "learning_rate": 0.001, "loss": 1.6934, "step": 582680 }, { "epoch": 50.270531400966185, "grad_norm": 0.3698785901069641, "learning_rate": 0.001, "loss": 1.6886, "step": 582736 }, { "epoch": 50.27536231884058, "grad_norm": 0.5279815196990967, "learning_rate": 0.001, "loss": 1.7005, "step": 582792 }, { "epoch": 50.28019323671498, "grad_norm": 0.28780218958854675, "learning_rate": 0.001, "loss": 1.6963, "step": 582848 }, { "epoch": 50.28502415458937, "grad_norm": 0.302570641040802, "learning_rate": 0.001, "loss": 1.6863, "step": 582904 }, { "epoch": 50.289855072463766, "grad_norm": 0.5403259992599487, "learning_rate": 0.001, "loss": 1.6912, "step": 582960 }, { "epoch": 50.29468599033817, "grad_norm": 0.23936398327350616, "learning_rate": 0.001, "loss": 1.7104, "step": 583016 }, { "epoch": 50.29951690821256, "grad_norm": 0.6516003608703613, "learning_rate": 0.001, "loss": 1.6971, "step": 583072 }, { "epoch": 50.30434782608695, "grad_norm": 0.4362901449203491, "learning_rate": 0.001, "loss": 1.6959, "step": 583128 }, { "epoch": 50.309178743961354, "grad_norm": 0.3020718991756439, "learning_rate": 0.001, "loss": 1.6905, "step": 583184 }, { "epoch": 50.31400966183575, "grad_norm": 1.6075942516326904, "learning_rate": 0.001, "loss": 1.6996, "step": 583240 }, { "epoch": 50.31884057971015, "grad_norm": 0.7993410229682922, "learning_rate": 0.001, "loss": 1.6969, "step": 583296 }, { "epoch": 50.32367149758454, "grad_norm": 0.27840176224708557, "learning_rate": 0.001, "loss": 1.7004, "step": 583352 }, { "epoch": 50.328502415458935, "grad_norm": 0.2870359718799591, "learning_rate": 0.001, "loss": 1.7016, "step": 583408 }, { "epoch": 50.333333333333336, "grad_norm": 0.3222540318965912, "learning_rate": 0.001, "loss": 1.6935, "step": 583464 }, { "epoch": 50.33816425120773, "grad_norm": 0.683553159236908, "learning_rate": 0.001, "loss": 1.6984, "step": 583520 }, { "epoch": 50.34299516908212, "grad_norm": 0.9833421111106873, "learning_rate": 0.001, "loss": 1.6999, "step": 583576 }, { "epoch": 50.34782608695652, "grad_norm": 0.36737021803855896, "learning_rate": 0.001, "loss": 1.6929, "step": 583632 }, { "epoch": 50.35265700483092, "grad_norm": 0.36455318331718445, "learning_rate": 0.001, "loss": 1.7098, "step": 583688 }, { "epoch": 50.35748792270532, "grad_norm": 0.5702621936798096, "learning_rate": 0.001, "loss": 1.7103, "step": 583744 }, { "epoch": 50.36231884057971, "grad_norm": 4.23921012878418, "learning_rate": 0.001, "loss": 1.7064, "step": 583800 }, { "epoch": 50.367149758454104, "grad_norm": 0.978596568107605, "learning_rate": 0.001, "loss": 1.7117, "step": 583856 }, { "epoch": 50.371980676328505, "grad_norm": 0.40410512685775757, "learning_rate": 0.001, "loss": 1.7059, "step": 583912 }, { "epoch": 50.3768115942029, "grad_norm": 0.3986213803291321, "learning_rate": 0.001, "loss": 1.7107, "step": 583968 }, { "epoch": 50.38164251207729, "grad_norm": 0.5522612929344177, "learning_rate": 0.001, "loss": 1.7164, "step": 584024 }, { "epoch": 50.38647342995169, "grad_norm": 1.7215845584869385, "learning_rate": 0.001, "loss": 1.7186, "step": 584080 }, { "epoch": 50.391304347826086, "grad_norm": 3.032722234725952, "learning_rate": 0.001, "loss": 1.7159, "step": 584136 }, { "epoch": 50.39613526570048, "grad_norm": 1.4340418577194214, "learning_rate": 0.001, "loss": 1.7076, "step": 584192 }, { "epoch": 50.40096618357488, "grad_norm": 0.3874742090702057, "learning_rate": 0.001, "loss": 1.7172, "step": 584248 }, { "epoch": 50.405797101449274, "grad_norm": 0.8557310700416565, "learning_rate": 0.001, "loss": 1.7129, "step": 584304 }, { "epoch": 50.410628019323674, "grad_norm": 0.3996860086917877, "learning_rate": 0.001, "loss": 1.7195, "step": 584360 }, { "epoch": 50.41545893719807, "grad_norm": 0.46503958106040955, "learning_rate": 0.001, "loss": 1.7154, "step": 584416 }, { "epoch": 50.42028985507246, "grad_norm": 0.3005656599998474, "learning_rate": 0.001, "loss": 1.7221, "step": 584472 }, { "epoch": 50.42512077294686, "grad_norm": 0.25138208270072937, "learning_rate": 0.001, "loss": 1.716, "step": 584528 }, { "epoch": 50.429951690821255, "grad_norm": 0.2973655164241791, "learning_rate": 0.001, "loss": 1.7275, "step": 584584 }, { "epoch": 50.43478260869565, "grad_norm": 25.00265884399414, "learning_rate": 0.001, "loss": 1.7205, "step": 584640 }, { "epoch": 50.43961352657005, "grad_norm": 0.3398006558418274, "learning_rate": 0.001, "loss": 1.7128, "step": 584696 }, { "epoch": 50.44444444444444, "grad_norm": 9.214986801147461, "learning_rate": 0.001, "loss": 1.7073, "step": 584752 }, { "epoch": 50.44927536231884, "grad_norm": 0.821214497089386, "learning_rate": 0.001, "loss": 1.7109, "step": 584808 }, { "epoch": 50.45410628019324, "grad_norm": 0.256308913230896, "learning_rate": 0.001, "loss": 1.7047, "step": 584864 }, { "epoch": 50.45893719806763, "grad_norm": 0.5723835229873657, "learning_rate": 0.001, "loss": 1.7032, "step": 584920 }, { "epoch": 50.46376811594203, "grad_norm": 1.4734607934951782, "learning_rate": 0.001, "loss": 1.7116, "step": 584976 }, { "epoch": 50.468599033816425, "grad_norm": 0.30213865637779236, "learning_rate": 0.001, "loss": 1.7084, "step": 585032 }, { "epoch": 50.47342995169082, "grad_norm": 0.31700417399406433, "learning_rate": 0.001, "loss": 1.706, "step": 585088 }, { "epoch": 50.47826086956522, "grad_norm": 0.31916359066963196, "learning_rate": 0.001, "loss": 1.703, "step": 585144 }, { "epoch": 50.48309178743961, "grad_norm": 0.6715946793556213, "learning_rate": 0.001, "loss": 1.7064, "step": 585200 }, { "epoch": 50.48792270531401, "grad_norm": 1.3252606391906738, "learning_rate": 0.001, "loss": 1.6915, "step": 585256 }, { "epoch": 50.492753623188406, "grad_norm": 0.3906671404838562, "learning_rate": 0.001, "loss": 1.6903, "step": 585312 }, { "epoch": 50.4975845410628, "grad_norm": 0.903742253780365, "learning_rate": 0.001, "loss": 1.6976, "step": 585368 }, { "epoch": 50.5024154589372, "grad_norm": 0.4632496237754822, "learning_rate": 0.001, "loss": 1.7046, "step": 585424 }, { "epoch": 50.507246376811594, "grad_norm": 0.3027133643627167, "learning_rate": 0.001, "loss": 1.6855, "step": 585480 }, { "epoch": 50.51207729468599, "grad_norm": 1.1066515445709229, "learning_rate": 0.001, "loss": 1.6759, "step": 585536 }, { "epoch": 50.51690821256039, "grad_norm": 0.26943063735961914, "learning_rate": 0.001, "loss": 1.6836, "step": 585592 }, { "epoch": 50.52173913043478, "grad_norm": 0.7686430215835571, "learning_rate": 0.001, "loss": 1.6736, "step": 585648 }, { "epoch": 50.52657004830918, "grad_norm": 0.40275877714157104, "learning_rate": 0.001, "loss": 1.6785, "step": 585704 }, { "epoch": 50.531400966183575, "grad_norm": 0.3135135769844055, "learning_rate": 0.001, "loss": 1.6724, "step": 585760 }, { "epoch": 50.53623188405797, "grad_norm": 0.3084591031074524, "learning_rate": 0.001, "loss": 1.6808, "step": 585816 }, { "epoch": 50.54106280193237, "grad_norm": 0.5039700865745544, "learning_rate": 0.001, "loss": 1.676, "step": 585872 }, { "epoch": 50.54589371980676, "grad_norm": 0.6340568661689758, "learning_rate": 0.001, "loss": 1.6789, "step": 585928 }, { "epoch": 50.55072463768116, "grad_norm": 3.2882046699523926, "learning_rate": 0.001, "loss": 1.6764, "step": 585984 }, { "epoch": 50.55555555555556, "grad_norm": 0.3529403507709503, "learning_rate": 0.001, "loss": 1.6946, "step": 586040 }, { "epoch": 50.56038647342995, "grad_norm": 0.38209670782089233, "learning_rate": 0.001, "loss": 1.6829, "step": 586096 }, { "epoch": 50.56521739130435, "grad_norm": 0.2951977849006653, "learning_rate": 0.001, "loss": 1.6821, "step": 586152 }, { "epoch": 50.570048309178745, "grad_norm": 0.34158480167388916, "learning_rate": 0.001, "loss": 1.6864, "step": 586208 }, { "epoch": 50.57487922705314, "grad_norm": 0.48478007316589355, "learning_rate": 0.001, "loss": 1.6893, "step": 586264 }, { "epoch": 50.57971014492754, "grad_norm": 0.31387361884117126, "learning_rate": 0.001, "loss": 1.6853, "step": 586320 }, { "epoch": 50.58454106280193, "grad_norm": 0.3455338478088379, "learning_rate": 0.001, "loss": 1.6716, "step": 586376 }, { "epoch": 50.589371980676326, "grad_norm": 0.38596275448799133, "learning_rate": 0.001, "loss": 1.6804, "step": 586432 }, { "epoch": 50.594202898550726, "grad_norm": 0.32767659425735474, "learning_rate": 0.001, "loss": 1.6849, "step": 586488 }, { "epoch": 50.59903381642512, "grad_norm": 0.43412697315216064, "learning_rate": 0.001, "loss": 1.6801, "step": 586544 }, { "epoch": 50.60386473429952, "grad_norm": 0.3944624960422516, "learning_rate": 0.001, "loss": 1.6792, "step": 586600 }, { "epoch": 50.608695652173914, "grad_norm": 0.29598844051361084, "learning_rate": 0.001, "loss": 1.6796, "step": 586656 }, { "epoch": 50.61352657004831, "grad_norm": 0.28250616788864136, "learning_rate": 0.001, "loss": 1.687, "step": 586712 }, { "epoch": 50.61835748792271, "grad_norm": 0.3707408607006073, "learning_rate": 0.001, "loss": 1.6812, "step": 586768 }, { "epoch": 50.6231884057971, "grad_norm": 0.39537638425827026, "learning_rate": 0.001, "loss": 1.6787, "step": 586824 }, { "epoch": 50.628019323671495, "grad_norm": 0.37178540229797363, "learning_rate": 0.001, "loss": 1.6781, "step": 586880 }, { "epoch": 50.632850241545896, "grad_norm": 0.2894957959651947, "learning_rate": 0.001, "loss": 1.6773, "step": 586936 }, { "epoch": 50.63768115942029, "grad_norm": 0.3247082829475403, "learning_rate": 0.001, "loss": 1.6716, "step": 586992 }, { "epoch": 50.64251207729468, "grad_norm": 0.3209967017173767, "learning_rate": 0.001, "loss": 1.6901, "step": 587048 }, { "epoch": 50.64734299516908, "grad_norm": 0.40212035179138184, "learning_rate": 0.001, "loss": 1.685, "step": 587104 }, { "epoch": 50.65217391304348, "grad_norm": 0.25530076026916504, "learning_rate": 0.001, "loss": 1.6784, "step": 587160 }, { "epoch": 50.65700483091788, "grad_norm": 0.32697802782058716, "learning_rate": 0.001, "loss": 1.6775, "step": 587216 }, { "epoch": 50.66183574879227, "grad_norm": 0.3863978087902069, "learning_rate": 0.001, "loss": 1.673, "step": 587272 }, { "epoch": 50.666666666666664, "grad_norm": 0.8412653803825378, "learning_rate": 0.001, "loss": 1.6868, "step": 587328 }, { "epoch": 50.671497584541065, "grad_norm": 6.308025360107422, "learning_rate": 0.001, "loss": 1.6852, "step": 587384 }, { "epoch": 50.67632850241546, "grad_norm": 0.35370683670043945, "learning_rate": 0.001, "loss": 1.688, "step": 587440 }, { "epoch": 50.68115942028985, "grad_norm": 0.33714935183525085, "learning_rate": 0.001, "loss": 1.6856, "step": 587496 }, { "epoch": 50.68599033816425, "grad_norm": 1.6275168657302856, "learning_rate": 0.001, "loss": 1.7099, "step": 587552 }, { "epoch": 50.690821256038646, "grad_norm": 0.4248359203338623, "learning_rate": 0.001, "loss": 1.6963, "step": 587608 }, { "epoch": 50.69565217391305, "grad_norm": 0.6534740328788757, "learning_rate": 0.001, "loss": 1.6982, "step": 587664 }, { "epoch": 50.70048309178744, "grad_norm": 2.68985915184021, "learning_rate": 0.001, "loss": 1.6974, "step": 587720 }, { "epoch": 50.70531400966183, "grad_norm": 0.47170084714889526, "learning_rate": 0.001, "loss": 1.6921, "step": 587776 }, { "epoch": 50.710144927536234, "grad_norm": 0.3364291787147522, "learning_rate": 0.001, "loss": 1.6917, "step": 587832 }, { "epoch": 50.71497584541063, "grad_norm": 0.3602994382381439, "learning_rate": 0.001, "loss": 1.6892, "step": 587888 }, { "epoch": 50.71980676328502, "grad_norm": 0.5273240208625793, "learning_rate": 0.001, "loss": 1.6872, "step": 587944 }, { "epoch": 50.72463768115942, "grad_norm": 0.38391104340553284, "learning_rate": 0.001, "loss": 1.6887, "step": 588000 }, { "epoch": 50.729468599033815, "grad_norm": 0.31403690576553345, "learning_rate": 0.001, "loss": 1.6985, "step": 588056 }, { "epoch": 50.734299516908216, "grad_norm": 0.32256174087524414, "learning_rate": 0.001, "loss": 1.7023, "step": 588112 }, { "epoch": 50.73913043478261, "grad_norm": 0.24392454326152802, "learning_rate": 0.001, "loss": 1.6958, "step": 588168 }, { "epoch": 50.743961352657, "grad_norm": 0.25024136900901794, "learning_rate": 0.001, "loss": 1.697, "step": 588224 }, { "epoch": 50.7487922705314, "grad_norm": 0.3388315737247467, "learning_rate": 0.001, "loss": 1.6944, "step": 588280 }, { "epoch": 50.7536231884058, "grad_norm": 18.992431640625, "learning_rate": 0.001, "loss": 1.6969, "step": 588336 }, { "epoch": 50.75845410628019, "grad_norm": 0.5026355981826782, "learning_rate": 0.001, "loss": 1.7026, "step": 588392 }, { "epoch": 50.76328502415459, "grad_norm": 0.29561904072761536, "learning_rate": 0.001, "loss": 1.6976, "step": 588448 }, { "epoch": 50.768115942028984, "grad_norm": 0.6940761208534241, "learning_rate": 0.001, "loss": 1.7017, "step": 588504 }, { "epoch": 50.772946859903385, "grad_norm": 0.2405649721622467, "learning_rate": 0.001, "loss": 1.6889, "step": 588560 }, { "epoch": 50.77777777777778, "grad_norm": 0.32346111536026, "learning_rate": 0.001, "loss": 1.69, "step": 588616 }, { "epoch": 50.78260869565217, "grad_norm": 0.34140560030937195, "learning_rate": 0.001, "loss": 1.6939, "step": 588672 }, { "epoch": 50.78743961352657, "grad_norm": 0.2959447205066681, "learning_rate": 0.001, "loss": 1.6922, "step": 588728 }, { "epoch": 50.792270531400966, "grad_norm": 0.29406285285949707, "learning_rate": 0.001, "loss": 1.6941, "step": 588784 }, { "epoch": 50.79710144927536, "grad_norm": 0.348143994808197, "learning_rate": 0.001, "loss": 1.6931, "step": 588840 }, { "epoch": 50.80193236714976, "grad_norm": 0.3582059144973755, "learning_rate": 0.001, "loss": 1.6994, "step": 588896 }, { "epoch": 50.806763285024154, "grad_norm": 0.5090271234512329, "learning_rate": 0.001, "loss": 1.6976, "step": 588952 }, { "epoch": 50.81159420289855, "grad_norm": 0.6048501133918762, "learning_rate": 0.001, "loss": 1.6969, "step": 589008 }, { "epoch": 50.81642512077295, "grad_norm": 0.4438861012458801, "learning_rate": 0.001, "loss": 1.6977, "step": 589064 }, { "epoch": 50.82125603864734, "grad_norm": 0.30365192890167236, "learning_rate": 0.001, "loss": 1.6969, "step": 589120 }, { "epoch": 50.82608695652174, "grad_norm": 0.25482335686683655, "learning_rate": 0.001, "loss": 1.7044, "step": 589176 }, { "epoch": 50.830917874396135, "grad_norm": 0.27928122878074646, "learning_rate": 0.001, "loss": 1.7046, "step": 589232 }, { "epoch": 50.83574879227053, "grad_norm": 0.24958357214927673, "learning_rate": 0.001, "loss": 1.7033, "step": 589288 }, { "epoch": 50.84057971014493, "grad_norm": 0.4877679944038391, "learning_rate": 0.001, "loss": 1.6961, "step": 589344 }, { "epoch": 50.84541062801932, "grad_norm": 0.2805549204349518, "learning_rate": 0.001, "loss": 1.7058, "step": 589400 }, { "epoch": 50.85024154589372, "grad_norm": 0.2678360641002655, "learning_rate": 0.001, "loss": 1.695, "step": 589456 }, { "epoch": 50.85507246376812, "grad_norm": 0.4095521569252014, "learning_rate": 0.001, "loss": 1.6964, "step": 589512 }, { "epoch": 50.85990338164251, "grad_norm": 0.3756842613220215, "learning_rate": 0.001, "loss": 1.6908, "step": 589568 }, { "epoch": 50.86473429951691, "grad_norm": 0.36998894810676575, "learning_rate": 0.001, "loss": 1.6826, "step": 589624 }, { "epoch": 50.869565217391305, "grad_norm": 0.31725579500198364, "learning_rate": 0.001, "loss": 1.6929, "step": 589680 }, { "epoch": 50.8743961352657, "grad_norm": 0.3307904303073883, "learning_rate": 0.001, "loss": 1.699, "step": 589736 }, { "epoch": 50.8792270531401, "grad_norm": 0.386803537607193, "learning_rate": 0.001, "loss": 1.6941, "step": 589792 }, { "epoch": 50.88405797101449, "grad_norm": 0.2737726867198944, "learning_rate": 0.001, "loss": 1.6961, "step": 589848 }, { "epoch": 50.888888888888886, "grad_norm": 0.34264621138572693, "learning_rate": 0.001, "loss": 1.6868, "step": 589904 }, { "epoch": 50.893719806763286, "grad_norm": 0.5410685539245605, "learning_rate": 0.001, "loss": 1.6886, "step": 589960 }, { "epoch": 50.89855072463768, "grad_norm": 0.7887412905693054, "learning_rate": 0.001, "loss": 1.6964, "step": 590016 }, { "epoch": 50.90338164251208, "grad_norm": 0.9464644193649292, "learning_rate": 0.001, "loss": 1.699, "step": 590072 }, { "epoch": 50.908212560386474, "grad_norm": 0.37206387519836426, "learning_rate": 0.001, "loss": 1.6981, "step": 590128 }, { "epoch": 50.91304347826087, "grad_norm": 0.35174843668937683, "learning_rate": 0.001, "loss": 1.7, "step": 590184 }, { "epoch": 50.91787439613527, "grad_norm": 0.33037054538726807, "learning_rate": 0.001, "loss": 1.7095, "step": 590240 }, { "epoch": 50.92270531400966, "grad_norm": 0.3626663088798523, "learning_rate": 0.001, "loss": 1.6922, "step": 590296 }, { "epoch": 50.927536231884055, "grad_norm": 0.35391974449157715, "learning_rate": 0.001, "loss": 1.6864, "step": 590352 }, { "epoch": 50.932367149758456, "grad_norm": 0.42774075269699097, "learning_rate": 0.001, "loss": 1.6886, "step": 590408 }, { "epoch": 50.93719806763285, "grad_norm": 0.33727067708969116, "learning_rate": 0.001, "loss": 1.6863, "step": 590464 }, { "epoch": 50.94202898550725, "grad_norm": 0.24561363458633423, "learning_rate": 0.001, "loss": 1.6886, "step": 590520 }, { "epoch": 50.94685990338164, "grad_norm": 0.33928096294403076, "learning_rate": 0.001, "loss": 1.6963, "step": 590576 }, { "epoch": 50.95169082125604, "grad_norm": 0.3495408296585083, "learning_rate": 0.001, "loss": 1.6975, "step": 590632 }, { "epoch": 50.95652173913044, "grad_norm": 0.5006148219108582, "learning_rate": 0.001, "loss": 1.6982, "step": 590688 }, { "epoch": 50.96135265700483, "grad_norm": 0.6177756786346436, "learning_rate": 0.001, "loss": 1.6967, "step": 590744 }, { "epoch": 50.966183574879224, "grad_norm": 0.46705448627471924, "learning_rate": 0.001, "loss": 1.6945, "step": 590800 }, { "epoch": 50.971014492753625, "grad_norm": 0.4056796431541443, "learning_rate": 0.001, "loss": 1.6953, "step": 590856 }, { "epoch": 50.97584541062802, "grad_norm": 0.3182075023651123, "learning_rate": 0.001, "loss": 1.6974, "step": 590912 }, { "epoch": 50.98067632850242, "grad_norm": 0.3315759003162384, "learning_rate": 0.001, "loss": 1.6952, "step": 590968 }, { "epoch": 50.98550724637681, "grad_norm": 0.526762068271637, "learning_rate": 0.001, "loss": 1.7018, "step": 591024 }, { "epoch": 50.990338164251206, "grad_norm": 0.2628881335258484, "learning_rate": 0.001, "loss": 1.7023, "step": 591080 }, { "epoch": 50.99516908212561, "grad_norm": 0.3051557242870331, "learning_rate": 0.001, "loss": 1.7071, "step": 591136 }, { "epoch": 51.0, "grad_norm": 0.7587386965751648, "learning_rate": 0.001, "loss": 1.7379, "step": 591192 }, { "epoch": 51.00483091787439, "grad_norm": 0.7112836241722107, "learning_rate": 0.001, "loss": 1.6963, "step": 591248 }, { "epoch": 51.009661835748794, "grad_norm": 0.5381281971931458, "learning_rate": 0.001, "loss": 1.6889, "step": 591304 }, { "epoch": 51.01449275362319, "grad_norm": 0.26564812660217285, "learning_rate": 0.001, "loss": 1.6887, "step": 591360 }, { "epoch": 51.01932367149758, "grad_norm": 1.1472522020339966, "learning_rate": 0.001, "loss": 1.6873, "step": 591416 }, { "epoch": 51.02415458937198, "grad_norm": 0.3056686818599701, "learning_rate": 0.001, "loss": 1.6848, "step": 591472 }, { "epoch": 51.028985507246375, "grad_norm": 0.2901252210140228, "learning_rate": 0.001, "loss": 1.6904, "step": 591528 }, { "epoch": 51.033816425120776, "grad_norm": 1.3246546983718872, "learning_rate": 0.001, "loss": 1.6937, "step": 591584 }, { "epoch": 51.03864734299517, "grad_norm": 2.352367401123047, "learning_rate": 0.001, "loss": 1.6991, "step": 591640 }, { "epoch": 51.04347826086956, "grad_norm": 0.28800085186958313, "learning_rate": 0.001, "loss": 1.7118, "step": 591696 }, { "epoch": 51.04830917874396, "grad_norm": 0.8258289694786072, "learning_rate": 0.001, "loss": 1.705, "step": 591752 }, { "epoch": 51.05314009661836, "grad_norm": 0.27805012464523315, "learning_rate": 0.001, "loss": 1.6882, "step": 591808 }, { "epoch": 51.05797101449275, "grad_norm": 1.2052322626113892, "learning_rate": 0.001, "loss": 1.6893, "step": 591864 }, { "epoch": 51.06280193236715, "grad_norm": 0.4199780523777008, "learning_rate": 0.001, "loss": 1.6815, "step": 591920 }, { "epoch": 51.067632850241544, "grad_norm": 0.3394775092601776, "learning_rate": 0.001, "loss": 1.684, "step": 591976 }, { "epoch": 51.072463768115945, "grad_norm": 0.3429206907749176, "learning_rate": 0.001, "loss": 1.6841, "step": 592032 }, { "epoch": 51.07729468599034, "grad_norm": 0.27097785472869873, "learning_rate": 0.001, "loss": 1.6866, "step": 592088 }, { "epoch": 51.08212560386473, "grad_norm": 0.6579580307006836, "learning_rate": 0.001, "loss": 1.6868, "step": 592144 }, { "epoch": 51.08695652173913, "grad_norm": 0.6291612386703491, "learning_rate": 0.001, "loss": 1.6812, "step": 592200 }, { "epoch": 51.091787439613526, "grad_norm": 0.4057009220123291, "learning_rate": 0.001, "loss": 1.6866, "step": 592256 }, { "epoch": 51.09661835748792, "grad_norm": 0.31846123933792114, "learning_rate": 0.001, "loss": 1.6901, "step": 592312 }, { "epoch": 51.10144927536232, "grad_norm": 0.24358443915843964, "learning_rate": 0.001, "loss": 1.6991, "step": 592368 }, { "epoch": 51.106280193236714, "grad_norm": 0.23289215564727783, "learning_rate": 0.001, "loss": 1.7011, "step": 592424 }, { "epoch": 51.111111111111114, "grad_norm": 0.31041550636291504, "learning_rate": 0.001, "loss": 1.693, "step": 592480 }, { "epoch": 51.11594202898551, "grad_norm": 0.3115420341491699, "learning_rate": 0.001, "loss": 1.686, "step": 592536 }, { "epoch": 51.1207729468599, "grad_norm": 0.3617793321609497, "learning_rate": 0.001, "loss": 1.6895, "step": 592592 }, { "epoch": 51.1256038647343, "grad_norm": 0.2527279257774353, "learning_rate": 0.001, "loss": 1.6848, "step": 592648 }, { "epoch": 51.130434782608695, "grad_norm": 0.36126360297203064, "learning_rate": 0.001, "loss": 1.6831, "step": 592704 }, { "epoch": 51.13526570048309, "grad_norm": 0.4201461672782898, "learning_rate": 0.001, "loss": 1.6863, "step": 592760 }, { "epoch": 51.14009661835749, "grad_norm": 0.41131043434143066, "learning_rate": 0.001, "loss": 1.6896, "step": 592816 }, { "epoch": 51.14492753623188, "grad_norm": 0.2899206280708313, "learning_rate": 0.001, "loss": 1.6914, "step": 592872 }, { "epoch": 51.14975845410628, "grad_norm": 0.9497878551483154, "learning_rate": 0.001, "loss": 1.6858, "step": 592928 }, { "epoch": 51.15458937198068, "grad_norm": 0.31026434898376465, "learning_rate": 0.001, "loss": 1.687, "step": 592984 }, { "epoch": 51.15942028985507, "grad_norm": 0.34312912821769714, "learning_rate": 0.001, "loss": 1.6849, "step": 593040 }, { "epoch": 51.16425120772947, "grad_norm": 0.3321970999240875, "learning_rate": 0.001, "loss": 1.6884, "step": 593096 }, { "epoch": 51.169082125603865, "grad_norm": 0.9551219344139099, "learning_rate": 0.001, "loss": 1.6947, "step": 593152 }, { "epoch": 51.17391304347826, "grad_norm": 0.6080025434494019, "learning_rate": 0.001, "loss": 1.6915, "step": 593208 }, { "epoch": 51.17874396135266, "grad_norm": 0.31593868136405945, "learning_rate": 0.001, "loss": 1.6874, "step": 593264 }, { "epoch": 51.18357487922705, "grad_norm": 1.5359009504318237, "learning_rate": 0.001, "loss": 1.6859, "step": 593320 }, { "epoch": 51.18840579710145, "grad_norm": 0.30936112999916077, "learning_rate": 0.001, "loss": 1.6823, "step": 593376 }, { "epoch": 51.193236714975846, "grad_norm": 0.36021995544433594, "learning_rate": 0.001, "loss": 1.6784, "step": 593432 }, { "epoch": 51.19806763285024, "grad_norm": 0.28169432282447815, "learning_rate": 0.001, "loss": 1.6886, "step": 593488 }, { "epoch": 51.20289855072464, "grad_norm": 0.29740285873413086, "learning_rate": 0.001, "loss": 1.6916, "step": 593544 }, { "epoch": 51.207729468599034, "grad_norm": 0.35549411177635193, "learning_rate": 0.001, "loss": 1.6951, "step": 593600 }, { "epoch": 51.21256038647343, "grad_norm": 1.5772771835327148, "learning_rate": 0.001, "loss": 1.6893, "step": 593656 }, { "epoch": 51.21739130434783, "grad_norm": 0.3513064682483673, "learning_rate": 0.001, "loss": 1.6919, "step": 593712 }, { "epoch": 51.22222222222222, "grad_norm": 0.35559022426605225, "learning_rate": 0.001, "loss": 1.6823, "step": 593768 }, { "epoch": 51.227053140096615, "grad_norm": 0.42826029658317566, "learning_rate": 0.001, "loss": 1.6837, "step": 593824 }, { "epoch": 51.231884057971016, "grad_norm": 0.29592862725257874, "learning_rate": 0.001, "loss": 1.6778, "step": 593880 }, { "epoch": 51.23671497584541, "grad_norm": 0.3132611811161041, "learning_rate": 0.001, "loss": 1.6867, "step": 593936 }, { "epoch": 51.24154589371981, "grad_norm": 0.29841122031211853, "learning_rate": 0.001, "loss": 1.6839, "step": 593992 }, { "epoch": 51.2463768115942, "grad_norm": 0.27649980783462524, "learning_rate": 0.001, "loss": 1.6799, "step": 594048 }, { "epoch": 51.2512077294686, "grad_norm": 0.28863468766212463, "learning_rate": 0.001, "loss": 1.6829, "step": 594104 }, { "epoch": 51.256038647343, "grad_norm": 0.5500776767730713, "learning_rate": 0.001, "loss": 1.681, "step": 594160 }, { "epoch": 51.26086956521739, "grad_norm": 0.9718368649482727, "learning_rate": 0.001, "loss": 1.6869, "step": 594216 }, { "epoch": 51.265700483091784, "grad_norm": 0.38903674483299255, "learning_rate": 0.001, "loss": 1.6834, "step": 594272 }, { "epoch": 51.270531400966185, "grad_norm": 0.3782811462879181, "learning_rate": 0.001, "loss": 1.6935, "step": 594328 }, { "epoch": 51.27536231884058, "grad_norm": 0.3079794645309448, "learning_rate": 0.001, "loss": 1.7036, "step": 594384 }, { "epoch": 51.28019323671498, "grad_norm": 2.865391731262207, "learning_rate": 0.001, "loss": 1.6847, "step": 594440 }, { "epoch": 51.28502415458937, "grad_norm": 0.3375025689601898, "learning_rate": 0.001, "loss": 1.6876, "step": 594496 }, { "epoch": 51.289855072463766, "grad_norm": 21.67305564880371, "learning_rate": 0.001, "loss": 1.6948, "step": 594552 }, { "epoch": 51.29468599033817, "grad_norm": 0.8427408933639526, "learning_rate": 0.001, "loss": 1.7107, "step": 594608 }, { "epoch": 51.29951690821256, "grad_norm": 0.3381350338459015, "learning_rate": 0.001, "loss": 1.7087, "step": 594664 }, { "epoch": 51.30434782608695, "grad_norm": 0.2994270324707031, "learning_rate": 0.001, "loss": 1.7035, "step": 594720 }, { "epoch": 51.309178743961354, "grad_norm": 6.294826030731201, "learning_rate": 0.001, "loss": 1.7, "step": 594776 }, { "epoch": 51.31400966183575, "grad_norm": 0.2897292375564575, "learning_rate": 0.001, "loss": 1.7063, "step": 594832 }, { "epoch": 51.31884057971015, "grad_norm": 0.4916529655456543, "learning_rate": 0.001, "loss": 1.7146, "step": 594888 }, { "epoch": 51.32367149758454, "grad_norm": 0.3169519305229187, "learning_rate": 0.001, "loss": 1.7031, "step": 594944 }, { "epoch": 51.328502415458935, "grad_norm": 0.28209325671195984, "learning_rate": 0.001, "loss": 1.703, "step": 595000 }, { "epoch": 51.333333333333336, "grad_norm": 6.029934883117676, "learning_rate": 0.001, "loss": 1.7069, "step": 595056 }, { "epoch": 51.33816425120773, "grad_norm": 0.39634400606155396, "learning_rate": 0.001, "loss": 1.7123, "step": 595112 }, { "epoch": 51.34299516908212, "grad_norm": 2.434870719909668, "learning_rate": 0.001, "loss": 1.7236, "step": 595168 }, { "epoch": 51.34782608695652, "grad_norm": 0.3134518265724182, "learning_rate": 0.001, "loss": 1.7188, "step": 595224 }, { "epoch": 51.35265700483092, "grad_norm": 0.39732351899147034, "learning_rate": 0.001, "loss": 1.7154, "step": 595280 }, { "epoch": 51.35748792270532, "grad_norm": 0.27678459882736206, "learning_rate": 0.001, "loss": 1.6984, "step": 595336 }, { "epoch": 51.36231884057971, "grad_norm": 0.630029559135437, "learning_rate": 0.001, "loss": 1.6989, "step": 595392 }, { "epoch": 51.367149758454104, "grad_norm": 0.6845620274543762, "learning_rate": 0.001, "loss": 1.6967, "step": 595448 }, { "epoch": 51.371980676328505, "grad_norm": 2.3017780780792236, "learning_rate": 0.001, "loss": 1.6903, "step": 595504 }, { "epoch": 51.3768115942029, "grad_norm": 0.3084218204021454, "learning_rate": 0.001, "loss": 1.6917, "step": 595560 }, { "epoch": 51.38164251207729, "grad_norm": 8.354263305664062, "learning_rate": 0.001, "loss": 1.7007, "step": 595616 }, { "epoch": 51.38647342995169, "grad_norm": 0.3937309980392456, "learning_rate": 0.001, "loss": 1.7065, "step": 595672 }, { "epoch": 51.391304347826086, "grad_norm": 0.5663292407989502, "learning_rate": 0.001, "loss": 1.6882, "step": 595728 }, { "epoch": 51.39613526570048, "grad_norm": 0.29563623666763306, "learning_rate": 0.001, "loss": 1.6924, "step": 595784 }, { "epoch": 51.40096618357488, "grad_norm": 1.6306625604629517, "learning_rate": 0.001, "loss": 1.7023, "step": 595840 }, { "epoch": 51.405797101449274, "grad_norm": 0.28913402557373047, "learning_rate": 0.001, "loss": 1.6948, "step": 595896 }, { "epoch": 51.410628019323674, "grad_norm": 0.299816370010376, "learning_rate": 0.001, "loss": 1.6966, "step": 595952 }, { "epoch": 51.41545893719807, "grad_norm": 0.2610970139503479, "learning_rate": 0.001, "loss": 1.6989, "step": 596008 }, { "epoch": 51.42028985507246, "grad_norm": 0.2826833128929138, "learning_rate": 0.001, "loss": 1.693, "step": 596064 }, { "epoch": 51.42512077294686, "grad_norm": 3.6331980228424072, "learning_rate": 0.001, "loss": 1.6864, "step": 596120 }, { "epoch": 51.429951690821255, "grad_norm": 0.35145336389541626, "learning_rate": 0.001, "loss": 1.6928, "step": 596176 }, { "epoch": 51.43478260869565, "grad_norm": 1.5001792907714844, "learning_rate": 0.001, "loss": 1.6848, "step": 596232 }, { "epoch": 51.43961352657005, "grad_norm": 0.7073214650154114, "learning_rate": 0.001, "loss": 1.6876, "step": 596288 }, { "epoch": 51.44444444444444, "grad_norm": 0.8882980346679688, "learning_rate": 0.001, "loss": 1.6938, "step": 596344 }, { "epoch": 51.44927536231884, "grad_norm": 1.3797446489334106, "learning_rate": 0.001, "loss": 1.6853, "step": 596400 }, { "epoch": 51.45410628019324, "grad_norm": 0.27609679102897644, "learning_rate": 0.001, "loss": 1.6901, "step": 596456 }, { "epoch": 51.45893719806763, "grad_norm": 0.3298700749874115, "learning_rate": 0.001, "loss": 1.693, "step": 596512 }, { "epoch": 51.46376811594203, "grad_norm": 0.3428545892238617, "learning_rate": 0.001, "loss": 1.7006, "step": 596568 }, { "epoch": 51.468599033816425, "grad_norm": 0.33892104029655457, "learning_rate": 0.001, "loss": 1.6922, "step": 596624 }, { "epoch": 51.47342995169082, "grad_norm": 0.787027895450592, "learning_rate": 0.001, "loss": 1.6922, "step": 596680 }, { "epoch": 51.47826086956522, "grad_norm": 0.7177733778953552, "learning_rate": 0.001, "loss": 1.7035, "step": 596736 }, { "epoch": 51.48309178743961, "grad_norm": 0.2669461965560913, "learning_rate": 0.001, "loss": 1.6912, "step": 596792 }, { "epoch": 51.48792270531401, "grad_norm": 0.2829669117927551, "learning_rate": 0.001, "loss": 1.6889, "step": 596848 }, { "epoch": 51.492753623188406, "grad_norm": 0.5830268859863281, "learning_rate": 0.001, "loss": 1.6975, "step": 596904 }, { "epoch": 51.4975845410628, "grad_norm": 0.3384113907814026, "learning_rate": 0.001, "loss": 1.7001, "step": 596960 }, { "epoch": 51.5024154589372, "grad_norm": 1.684234857559204, "learning_rate": 0.001, "loss": 1.7026, "step": 597016 }, { "epoch": 51.507246376811594, "grad_norm": 0.4163397252559662, "learning_rate": 0.001, "loss": 1.7013, "step": 597072 }, { "epoch": 51.51207729468599, "grad_norm": 0.8386881947517395, "learning_rate": 0.001, "loss": 1.6993, "step": 597128 }, { "epoch": 51.51690821256039, "grad_norm": 0.9796852469444275, "learning_rate": 0.001, "loss": 1.7036, "step": 597184 }, { "epoch": 51.52173913043478, "grad_norm": 0.2585749328136444, "learning_rate": 0.001, "loss": 1.7114, "step": 597240 }, { "epoch": 51.52657004830918, "grad_norm": 0.43170279264450073, "learning_rate": 0.001, "loss": 1.7123, "step": 597296 }, { "epoch": 51.531400966183575, "grad_norm": 0.3351188898086548, "learning_rate": 0.001, "loss": 1.7015, "step": 597352 }, { "epoch": 51.53623188405797, "grad_norm": 2.055659532546997, "learning_rate": 0.001, "loss": 1.7082, "step": 597408 }, { "epoch": 51.54106280193237, "grad_norm": 1.3722572326660156, "learning_rate": 0.001, "loss": 1.697, "step": 597464 }, { "epoch": 51.54589371980676, "grad_norm": 1.511549949645996, "learning_rate": 0.001, "loss": 1.7167, "step": 597520 }, { "epoch": 51.55072463768116, "grad_norm": 4.980690956115723, "learning_rate": 0.001, "loss": 1.7551, "step": 597576 }, { "epoch": 51.55555555555556, "grad_norm": 0.3159142732620239, "learning_rate": 0.001, "loss": 1.7528, "step": 597632 }, { "epoch": 51.56038647342995, "grad_norm": 0.3148779571056366, "learning_rate": 0.001, "loss": 1.744, "step": 597688 }, { "epoch": 51.56521739130435, "grad_norm": 0.3208189308643341, "learning_rate": 0.001, "loss": 1.7482, "step": 597744 }, { "epoch": 51.570048309178745, "grad_norm": 0.2658173441886902, "learning_rate": 0.001, "loss": 1.7264, "step": 597800 }, { "epoch": 51.57487922705314, "grad_norm": 5.542259693145752, "learning_rate": 0.001, "loss": 1.7315, "step": 597856 }, { "epoch": 51.57971014492754, "grad_norm": 0.27253836393356323, "learning_rate": 0.001, "loss": 1.7228, "step": 597912 }, { "epoch": 51.58454106280193, "grad_norm": 1.5200753211975098, "learning_rate": 0.001, "loss": 1.7093, "step": 597968 }, { "epoch": 51.589371980676326, "grad_norm": 0.320385605096817, "learning_rate": 0.001, "loss": 1.7121, "step": 598024 }, { "epoch": 51.594202898550726, "grad_norm": 0.3827192783355713, "learning_rate": 0.001, "loss": 1.704, "step": 598080 }, { "epoch": 51.59903381642512, "grad_norm": 0.2876864969730377, "learning_rate": 0.001, "loss": 1.702, "step": 598136 }, { "epoch": 51.60386473429952, "grad_norm": 1.9545141458511353, "learning_rate": 0.001, "loss": 1.6967, "step": 598192 }, { "epoch": 51.608695652173914, "grad_norm": 0.2881614565849304, "learning_rate": 0.001, "loss": 1.7037, "step": 598248 }, { "epoch": 51.61352657004831, "grad_norm": 0.2827615439891815, "learning_rate": 0.001, "loss": 1.7022, "step": 598304 }, { "epoch": 51.61835748792271, "grad_norm": 0.3025899827480316, "learning_rate": 0.001, "loss": 1.6986, "step": 598360 }, { "epoch": 51.6231884057971, "grad_norm": 0.29290804266929626, "learning_rate": 0.001, "loss": 1.7175, "step": 598416 }, { "epoch": 51.628019323671495, "grad_norm": 0.36721867322921753, "learning_rate": 0.001, "loss": 1.713, "step": 598472 }, { "epoch": 51.632850241545896, "grad_norm": 0.30515486001968384, "learning_rate": 0.001, "loss": 1.718, "step": 598528 }, { "epoch": 51.63768115942029, "grad_norm": 7.00572395324707, "learning_rate": 0.001, "loss": 1.7202, "step": 598584 }, { "epoch": 51.64251207729468, "grad_norm": 1.1307706832885742, "learning_rate": 0.001, "loss": 1.7159, "step": 598640 }, { "epoch": 51.64734299516908, "grad_norm": 0.35299110412597656, "learning_rate": 0.001, "loss": 1.7226, "step": 598696 }, { "epoch": 51.65217391304348, "grad_norm": 1.3396217823028564, "learning_rate": 0.001, "loss": 1.7128, "step": 598752 }, { "epoch": 51.65700483091788, "grad_norm": 1.4907870292663574, "learning_rate": 0.001, "loss": 1.7042, "step": 598808 }, { "epoch": 51.66183574879227, "grad_norm": 1.3233610391616821, "learning_rate": 0.001, "loss": 1.6997, "step": 598864 }, { "epoch": 51.666666666666664, "grad_norm": 0.277865469455719, "learning_rate": 0.001, "loss": 1.6975, "step": 598920 }, { "epoch": 51.671497584541065, "grad_norm": 0.39408692717552185, "learning_rate": 0.001, "loss": 1.7068, "step": 598976 }, { "epoch": 51.67632850241546, "grad_norm": 0.2674724757671356, "learning_rate": 0.001, "loss": 1.6992, "step": 599032 }, { "epoch": 51.68115942028985, "grad_norm": 0.9976322054862976, "learning_rate": 0.001, "loss": 1.6969, "step": 599088 }, { "epoch": 51.68599033816425, "grad_norm": 0.2552238404750824, "learning_rate": 0.001, "loss": 1.6997, "step": 599144 }, { "epoch": 51.690821256038646, "grad_norm": 0.3872445225715637, "learning_rate": 0.001, "loss": 1.6978, "step": 599200 }, { "epoch": 51.69565217391305, "grad_norm": 0.3455553948879242, "learning_rate": 0.001, "loss": 1.701, "step": 599256 }, { "epoch": 51.70048309178744, "grad_norm": 0.30280694365501404, "learning_rate": 0.001, "loss": 1.6976, "step": 599312 }, { "epoch": 51.70531400966183, "grad_norm": 0.9519444108009338, "learning_rate": 0.001, "loss": 1.6967, "step": 599368 }, { "epoch": 51.710144927536234, "grad_norm": 2.7963154315948486, "learning_rate": 0.001, "loss": 1.6995, "step": 599424 }, { "epoch": 51.71497584541063, "grad_norm": 0.4803065359592438, "learning_rate": 0.001, "loss": 1.7116, "step": 599480 }, { "epoch": 51.71980676328502, "grad_norm": 0.26262590289115906, "learning_rate": 0.001, "loss": 1.7079, "step": 599536 }, { "epoch": 51.72463768115942, "grad_norm": 0.29479914903640747, "learning_rate": 0.001, "loss": 1.7032, "step": 599592 }, { "epoch": 51.729468599033815, "grad_norm": 0.3624474108219147, "learning_rate": 0.001, "loss": 1.7011, "step": 599648 }, { "epoch": 51.734299516908216, "grad_norm": 4.073760509490967, "learning_rate": 0.001, "loss": 1.7083, "step": 599704 }, { "epoch": 51.73913043478261, "grad_norm": 0.31366243958473206, "learning_rate": 0.001, "loss": 1.7075, "step": 599760 }, { "epoch": 51.743961352657, "grad_norm": 0.3793770670890808, "learning_rate": 0.001, "loss": 1.6976, "step": 599816 }, { "epoch": 51.7487922705314, "grad_norm": 0.38154783844947815, "learning_rate": 0.001, "loss": 1.6969, "step": 599872 }, { "epoch": 51.7536231884058, "grad_norm": 0.25706857442855835, "learning_rate": 0.001, "loss": 1.7118, "step": 599928 }, { "epoch": 51.75845410628019, "grad_norm": 0.28948748111724854, "learning_rate": 0.001, "loss": 1.7029, "step": 599984 }, { "epoch": 51.76328502415459, "grad_norm": 0.2582211196422577, "learning_rate": 0.001, "loss": 1.7148, "step": 600040 }, { "epoch": 51.768115942028984, "grad_norm": 0.36059653759002686, "learning_rate": 0.001, "loss": 1.7077, "step": 600096 }, { "epoch": 51.772946859903385, "grad_norm": 0.30814847350120544, "learning_rate": 0.001, "loss": 1.6964, "step": 600152 }, { "epoch": 51.77777777777778, "grad_norm": 0.2540637254714966, "learning_rate": 0.001, "loss": 1.7019, "step": 600208 }, { "epoch": 51.78260869565217, "grad_norm": 0.4255118668079376, "learning_rate": 0.001, "loss": 1.7054, "step": 600264 }, { "epoch": 51.78743961352657, "grad_norm": 0.2972356379032135, "learning_rate": 0.001, "loss": 1.7075, "step": 600320 }, { "epoch": 51.792270531400966, "grad_norm": 0.3912907540798187, "learning_rate": 0.001, "loss": 1.7037, "step": 600376 }, { "epoch": 51.79710144927536, "grad_norm": 0.2685694098472595, "learning_rate": 0.001, "loss": 1.7134, "step": 600432 }, { "epoch": 51.80193236714976, "grad_norm": 0.29084286093711853, "learning_rate": 0.001, "loss": 1.7081, "step": 600488 }, { "epoch": 51.806763285024154, "grad_norm": 0.5038160681724548, "learning_rate": 0.001, "loss": 1.7071, "step": 600544 }, { "epoch": 51.81159420289855, "grad_norm": 0.25105947256088257, "learning_rate": 0.001, "loss": 1.7157, "step": 600600 }, { "epoch": 51.81642512077295, "grad_norm": 0.7168490290641785, "learning_rate": 0.001, "loss": 1.7096, "step": 600656 }, { "epoch": 51.82125603864734, "grad_norm": 1.451351284980774, "learning_rate": 0.001, "loss": 1.6993, "step": 600712 }, { "epoch": 51.82608695652174, "grad_norm": 0.3234001696109772, "learning_rate": 0.001, "loss": 1.6996, "step": 600768 }, { "epoch": 51.830917874396135, "grad_norm": 4.786955833435059, "learning_rate": 0.001, "loss": 1.7161, "step": 600824 }, { "epoch": 51.83574879227053, "grad_norm": 0.34455835819244385, "learning_rate": 0.001, "loss": 1.7081, "step": 600880 }, { "epoch": 51.84057971014493, "grad_norm": 0.3131289780139923, "learning_rate": 0.001, "loss": 1.7075, "step": 600936 }, { "epoch": 51.84541062801932, "grad_norm": 1.3782198429107666, "learning_rate": 0.001, "loss": 1.6971, "step": 600992 }, { "epoch": 51.85024154589372, "grad_norm": 0.2836216688156128, "learning_rate": 0.001, "loss": 1.7109, "step": 601048 }, { "epoch": 51.85507246376812, "grad_norm": 0.3189244866371155, "learning_rate": 0.001, "loss": 1.7019, "step": 601104 }, { "epoch": 51.85990338164251, "grad_norm": 0.28573858737945557, "learning_rate": 0.001, "loss": 1.7132, "step": 601160 }, { "epoch": 51.86473429951691, "grad_norm": 0.32146748900413513, "learning_rate": 0.001, "loss": 1.7054, "step": 601216 }, { "epoch": 51.869565217391305, "grad_norm": 0.26075559854507446, "learning_rate": 0.001, "loss": 1.7088, "step": 601272 }, { "epoch": 51.8743961352657, "grad_norm": 0.4173423945903778, "learning_rate": 0.001, "loss": 1.7055, "step": 601328 }, { "epoch": 51.8792270531401, "grad_norm": 0.3194425106048584, "learning_rate": 0.001, "loss": 1.7134, "step": 601384 }, { "epoch": 51.88405797101449, "grad_norm": 1.2908283472061157, "learning_rate": 0.001, "loss": 1.7227, "step": 601440 }, { "epoch": 51.888888888888886, "grad_norm": 0.37117838859558105, "learning_rate": 0.001, "loss": 1.7221, "step": 601496 }, { "epoch": 51.893719806763286, "grad_norm": 0.2720344662666321, "learning_rate": 0.001, "loss": 1.727, "step": 601552 }, { "epoch": 51.89855072463768, "grad_norm": 0.2366008460521698, "learning_rate": 1.0791366906474821e-05, "loss": 1.669, "step": 601608 }, { "epoch": 51.90338164251208, "grad_norm": 0.3413804769515991, "learning_rate": 5.1079136690647486e-05, "loss": 1.6695, "step": 601664 }, { "epoch": 51.908212560386474, "grad_norm": 0.464634507894516, "learning_rate": 9.136690647482015e-05, "loss": 1.661, "step": 601720 }, { "epoch": 51.91304347826087, "grad_norm": 0.2339916080236435, "learning_rate": 0.0001, "loss": 1.6562, "step": 601776 }, { "epoch": 51.91787439613527, "grad_norm": 1.5181348323822021, "learning_rate": 0.0001, "loss": 1.6606, "step": 601832 }, { "epoch": 51.92270531400966, "grad_norm": 0.3050908148288727, "learning_rate": 0.0001, "loss": 1.651, "step": 601888 }, { "epoch": 51.927536231884055, "grad_norm": 0.22845712304115295, "learning_rate": 0.0001, "loss": 1.6429, "step": 601944 }, { "epoch": 51.932367149758456, "grad_norm": 0.29057684540748596, "learning_rate": 0.0001, "loss": 1.6432, "step": 602000 }, { "epoch": 51.93719806763285, "grad_norm": 1.4169676303863525, "learning_rate": 0.0001, "loss": 1.6466, "step": 602056 }, { "epoch": 51.94202898550725, "grad_norm": 0.3196248412132263, "learning_rate": 0.0001, "loss": 1.6442, "step": 602112 }, { "epoch": 51.94685990338164, "grad_norm": 0.6012705564498901, "learning_rate": 0.0001, "loss": 1.6479, "step": 602168 }, { "epoch": 51.95169082125604, "grad_norm": 0.29432612657546997, "learning_rate": 0.0001, "loss": 1.6452, "step": 602224 }, { "epoch": 51.95652173913044, "grad_norm": 11.793601036071777, "learning_rate": 0.0001, "loss": 1.6405, "step": 602280 }, { "epoch": 51.96135265700483, "grad_norm": 0.5738617777824402, "learning_rate": 0.0001, "loss": 1.6383, "step": 602336 }, { "epoch": 51.966183574879224, "grad_norm": 0.4168587028980255, "learning_rate": 0.0001, "loss": 1.6366, "step": 602392 }, { "epoch": 51.971014492753625, "grad_norm": 0.26031070947647095, "learning_rate": 0.0001, "loss": 1.6383, "step": 602448 }, { "epoch": 51.97584541062802, "grad_norm": 0.27702760696411133, "learning_rate": 0.0001, "loss": 1.6374, "step": 602504 }, { "epoch": 51.98067632850242, "grad_norm": 0.31579458713531494, "learning_rate": 0.0001, "loss": 1.6335, "step": 602560 }, { "epoch": 51.98550724637681, "grad_norm": 0.2665308713912964, "learning_rate": 0.0001, "loss": 1.6341, "step": 602616 }, { "epoch": 51.990338164251206, "grad_norm": 0.28663432598114014, "learning_rate": 0.0001, "loss": 1.6323, "step": 602672 }, { "epoch": 51.99516908212561, "grad_norm": 0.27979233860969543, "learning_rate": 0.0001, "loss": 1.6369, "step": 602728 }, { "epoch": 52.0, "grad_norm": 0.25072020292282104, "learning_rate": 0.0001, "loss": 1.6378, "step": 602784 }, { "epoch": 52.00483091787439, "grad_norm": 37.446685791015625, "learning_rate": 0.0001, "loss": 1.6458, "step": 602840 }, { "epoch": 52.009661835748794, "grad_norm": 0.2854483127593994, "learning_rate": 0.0001, "loss": 1.6496, "step": 602896 }, { "epoch": 52.01449275362319, "grad_norm": 0.33699434995651245, "learning_rate": 0.0001, "loss": 1.6398, "step": 602952 }, { "epoch": 52.01932367149758, "grad_norm": 0.3389986753463745, "learning_rate": 0.0001, "loss": 1.6473, "step": 603008 }, { "epoch": 52.02415458937198, "grad_norm": 0.2760601043701172, "learning_rate": 0.0001, "loss": 1.6491, "step": 603064 }, { "epoch": 52.028985507246375, "grad_norm": 2.985851526260376, "learning_rate": 0.0001, "loss": 1.6428, "step": 603120 }, { "epoch": 52.033816425120776, "grad_norm": 0.29792821407318115, "learning_rate": 0.0001, "loss": 1.6493, "step": 603176 }, { "epoch": 52.03864734299517, "grad_norm": 0.277726948261261, "learning_rate": 0.0001, "loss": 1.6416, "step": 603232 }, { "epoch": 52.04347826086956, "grad_norm": 0.7327926754951477, "learning_rate": 0.0001, "loss": 1.64, "step": 603288 }, { "epoch": 52.04830917874396, "grad_norm": 0.27456116676330566, "learning_rate": 0.0001, "loss": 1.6459, "step": 603344 }, { "epoch": 52.05314009661836, "grad_norm": 0.2810012996196747, "learning_rate": 0.0001, "loss": 1.6424, "step": 603400 }, { "epoch": 52.05797101449275, "grad_norm": 0.24727849662303925, "learning_rate": 0.0001, "loss": 1.6446, "step": 603456 }, { "epoch": 52.06280193236715, "grad_norm": 2.023768186569214, "learning_rate": 0.0001, "loss": 1.647, "step": 603512 }, { "epoch": 52.067632850241544, "grad_norm": 0.5602424144744873, "learning_rate": 0.0001, "loss": 1.642, "step": 603568 }, { "epoch": 52.072463768115945, "grad_norm": 0.2902458608150482, "learning_rate": 0.0001, "loss": 1.6413, "step": 603624 }, { "epoch": 52.07729468599034, "grad_norm": 0.29563942551612854, "learning_rate": 0.0001, "loss": 1.6433, "step": 603680 }, { "epoch": 52.08212560386473, "grad_norm": 16.903289794921875, "learning_rate": 0.0001, "loss": 1.6402, "step": 603736 }, { "epoch": 52.08695652173913, "grad_norm": 0.3680150508880615, "learning_rate": 0.0001, "loss": 1.6384, "step": 603792 }, { "epoch": 52.091787439613526, "grad_norm": 0.26763996481895447, "learning_rate": 0.0001, "loss": 1.6353, "step": 603848 }, { "epoch": 52.09661835748792, "grad_norm": 0.26694339513778687, "learning_rate": 0.0001, "loss": 1.6336, "step": 603904 }, { "epoch": 52.10144927536232, "grad_norm": 1.6390516757965088, "learning_rate": 0.0001, "loss": 1.6378, "step": 603960 }, { "epoch": 52.106280193236714, "grad_norm": 0.2701776921749115, "learning_rate": 0.0001, "loss": 1.635, "step": 604016 }, { "epoch": 52.111111111111114, "grad_norm": 12.320215225219727, "learning_rate": 0.0001, "loss": 1.6344, "step": 604072 }, { "epoch": 52.11594202898551, "grad_norm": 0.572233259677887, "learning_rate": 0.0001, "loss": 1.6388, "step": 604128 }, { "epoch": 52.1207729468599, "grad_norm": 0.3262225389480591, "learning_rate": 0.0001, "loss": 1.6343, "step": 604184 }, { "epoch": 52.1256038647343, "grad_norm": 0.3088948428630829, "learning_rate": 0.0001, "loss": 1.6375, "step": 604240 }, { "epoch": 52.130434782608695, "grad_norm": 0.2713767886161804, "learning_rate": 0.0001, "loss": 1.6338, "step": 604296 }, { "epoch": 52.13526570048309, "grad_norm": 0.3137091398239136, "learning_rate": 0.0001, "loss": 1.6279, "step": 604352 }, { "epoch": 52.14009661835749, "grad_norm": 1.8153878450393677, "learning_rate": 0.0001, "loss": 1.6443, "step": 604408 }, { "epoch": 52.14492753623188, "grad_norm": 0.34170347452163696, "learning_rate": 0.0001, "loss": 1.6383, "step": 604464 }, { "epoch": 52.14975845410628, "grad_norm": 7.075658798217773, "learning_rate": 0.0001, "loss": 1.6344, "step": 604520 }, { "epoch": 52.15458937198068, "grad_norm": 0.31456616520881653, "learning_rate": 0.0001, "loss": 1.6397, "step": 604576 }, { "epoch": 52.15942028985507, "grad_norm": 0.38774996995925903, "learning_rate": 0.0001, "loss": 1.6314, "step": 604632 }, { "epoch": 52.16425120772947, "grad_norm": 0.30399948358535767, "learning_rate": 0.0001, "loss": 1.6262, "step": 604688 }, { "epoch": 52.169082125603865, "grad_norm": 7.2664947509765625, "learning_rate": 0.0001, "loss": 1.6415, "step": 604744 }, { "epoch": 52.17391304347826, "grad_norm": 7.947744846343994, "learning_rate": 0.0001, "loss": 1.6298, "step": 604800 }, { "epoch": 52.17874396135266, "grad_norm": 0.28549039363861084, "learning_rate": 0.0001, "loss": 1.6292, "step": 604856 }, { "epoch": 52.18357487922705, "grad_norm": 0.26179617643356323, "learning_rate": 0.0001, "loss": 1.6292, "step": 604912 }, { "epoch": 52.18840579710145, "grad_norm": 9.568360328674316, "learning_rate": 0.0001, "loss": 1.6381, "step": 604968 }, { "epoch": 52.193236714975846, "grad_norm": 0.22379489243030548, "learning_rate": 0.0001, "loss": 1.6322, "step": 605024 }, { "epoch": 52.19806763285024, "grad_norm": 5.435745716094971, "learning_rate": 0.0001, "loss": 1.6207, "step": 605080 }, { "epoch": 52.20289855072464, "grad_norm": 0.31319963932037354, "learning_rate": 0.0001, "loss": 1.6324, "step": 605136 }, { "epoch": 52.207729468599034, "grad_norm": 0.3347600996494293, "learning_rate": 0.0001, "loss": 1.6255, "step": 605192 }, { "epoch": 52.21256038647343, "grad_norm": 0.3616093099117279, "learning_rate": 0.0001, "loss": 1.6319, "step": 605248 }, { "epoch": 52.21739130434783, "grad_norm": 0.2979772090911865, "learning_rate": 0.0001, "loss": 1.6243, "step": 605304 }, { "epoch": 52.22222222222222, "grad_norm": 0.28010931611061096, "learning_rate": 0.0001, "loss": 1.633, "step": 605360 }, { "epoch": 52.227053140096615, "grad_norm": 0.3780241012573242, "learning_rate": 0.0001, "loss": 1.6338, "step": 605416 }, { "epoch": 52.231884057971016, "grad_norm": 0.23546724021434784, "learning_rate": 0.0001, "loss": 1.6223, "step": 605472 }, { "epoch": 52.23671497584541, "grad_norm": 0.2542780041694641, "learning_rate": 0.0001, "loss": 1.6273, "step": 605528 }, { "epoch": 52.24154589371981, "grad_norm": 0.31185591220855713, "learning_rate": 0.0001, "loss": 1.6253, "step": 605584 }, { "epoch": 52.2463768115942, "grad_norm": 3.7241601943969727, "learning_rate": 0.0001, "loss": 1.6337, "step": 605640 }, { "epoch": 52.2512077294686, "grad_norm": 0.27472907304763794, "learning_rate": 0.0001, "loss": 1.6261, "step": 605696 }, { "epoch": 52.256038647343, "grad_norm": 0.3098037838935852, "learning_rate": 0.0001, "loss": 1.6274, "step": 605752 }, { "epoch": 52.26086956521739, "grad_norm": 0.2370312362909317, "learning_rate": 0.0001, "loss": 1.6164, "step": 605808 }, { "epoch": 52.265700483091784, "grad_norm": 0.4889925420284271, "learning_rate": 0.0001, "loss": 1.6241, "step": 605864 }, { "epoch": 52.270531400966185, "grad_norm": 0.4169257581233978, "learning_rate": 0.0001, "loss": 1.6265, "step": 605920 }, { "epoch": 52.27536231884058, "grad_norm": 0.28340068459510803, "learning_rate": 0.0001, "loss": 1.6328, "step": 605976 }, { "epoch": 52.28019323671498, "grad_norm": 0.28048065304756165, "learning_rate": 0.0001, "loss": 1.6325, "step": 606032 }, { "epoch": 52.28502415458937, "grad_norm": 1.8415586948394775, "learning_rate": 0.0001, "loss": 1.6257, "step": 606088 }, { "epoch": 52.289855072463766, "grad_norm": 0.3013726472854614, "learning_rate": 0.0001, "loss": 1.6268, "step": 606144 }, { "epoch": 52.29468599033817, "grad_norm": 0.6458007097244263, "learning_rate": 0.0001, "loss": 1.6293, "step": 606200 }, { "epoch": 52.29951690821256, "grad_norm": 0.2751064598560333, "learning_rate": 0.0001, "loss": 1.6277, "step": 606256 }, { "epoch": 52.30434782608695, "grad_norm": 1.9457800388336182, "learning_rate": 0.0001, "loss": 1.6277, "step": 606312 }, { "epoch": 52.309178743961354, "grad_norm": 0.26938143372535706, "learning_rate": 0.0001, "loss": 1.6336, "step": 606368 }, { "epoch": 52.31400966183575, "grad_norm": 0.321036696434021, "learning_rate": 0.0001, "loss": 1.6267, "step": 606424 }, { "epoch": 52.31884057971015, "grad_norm": 0.2476186603307724, "learning_rate": 0.0001, "loss": 1.624, "step": 606480 }, { "epoch": 52.32367149758454, "grad_norm": 0.3201674818992615, "learning_rate": 0.0001, "loss": 1.6209, "step": 606536 }, { "epoch": 52.328502415458935, "grad_norm": 0.2682456374168396, "learning_rate": 0.0001, "loss": 1.6272, "step": 606592 }, { "epoch": 52.333333333333336, "grad_norm": 0.25677499175071716, "learning_rate": 0.0001, "loss": 1.6239, "step": 606648 }, { "epoch": 52.33816425120773, "grad_norm": 4.699735641479492, "learning_rate": 0.0001, "loss": 1.6296, "step": 606704 }, { "epoch": 52.34299516908212, "grad_norm": 0.2847197651863098, "learning_rate": 0.0001, "loss": 1.6347, "step": 606760 }, { "epoch": 52.34782608695652, "grad_norm": 0.289564311504364, "learning_rate": 0.0001, "loss": 1.6266, "step": 606816 }, { "epoch": 52.35265700483092, "grad_norm": 0.32034242153167725, "learning_rate": 0.0001, "loss": 1.6247, "step": 606872 }, { "epoch": 52.35748792270532, "grad_norm": 0.2431424856185913, "learning_rate": 0.0001, "loss": 1.6246, "step": 606928 }, { "epoch": 52.36231884057971, "grad_norm": 0.21850784122943878, "learning_rate": 0.0001, "loss": 1.6237, "step": 606984 }, { "epoch": 52.367149758454104, "grad_norm": 6.20334005355835, "learning_rate": 0.0001, "loss": 1.6254, "step": 607040 }, { "epoch": 52.371980676328505, "grad_norm": 0.28069815039634705, "learning_rate": 0.0001, "loss": 1.6255, "step": 607096 }, { "epoch": 52.3768115942029, "grad_norm": 0.27331265807151794, "learning_rate": 0.0001, "loss": 1.6209, "step": 607152 }, { "epoch": 52.38164251207729, "grad_norm": 0.9559256434440613, "learning_rate": 0.0001, "loss": 1.6305, "step": 607208 }, { "epoch": 52.38647342995169, "grad_norm": 0.5178458094596863, "learning_rate": 0.0001, "loss": 1.6243, "step": 607264 }, { "epoch": 52.391304347826086, "grad_norm": 1.6355373859405518, "learning_rate": 0.0001, "loss": 1.6344, "step": 607320 }, { "epoch": 52.39613526570048, "grad_norm": 0.6614217758178711, "learning_rate": 0.0001, "loss": 1.6207, "step": 607376 }, { "epoch": 52.40096618357488, "grad_norm": 0.2916013300418854, "learning_rate": 0.0001, "loss": 1.6217, "step": 607432 }, { "epoch": 52.405797101449274, "grad_norm": 0.26791849732398987, "learning_rate": 0.0001, "loss": 1.626, "step": 607488 }, { "epoch": 52.410628019323674, "grad_norm": 0.2918906509876251, "learning_rate": 0.0001, "loss": 1.6215, "step": 607544 }, { "epoch": 52.41545893719807, "grad_norm": 0.9729248285293579, "learning_rate": 0.0001, "loss": 1.6272, "step": 607600 }, { "epoch": 52.42028985507246, "grad_norm": 0.39905714988708496, "learning_rate": 0.0001, "loss": 1.6272, "step": 607656 }, { "epoch": 52.42512077294686, "grad_norm": 0.2530437111854553, "learning_rate": 0.0001, "loss": 1.6284, "step": 607712 }, { "epoch": 52.429951690821255, "grad_norm": 3.629359483718872, "learning_rate": 0.0001, "loss": 1.6253, "step": 607768 }, { "epoch": 52.43478260869565, "grad_norm": 0.8656750917434692, "learning_rate": 0.0001, "loss": 1.6259, "step": 607824 }, { "epoch": 52.43961352657005, "grad_norm": 1.3634520769119263, "learning_rate": 0.0001, "loss": 1.6239, "step": 607880 }, { "epoch": 52.44444444444444, "grad_norm": 0.47298288345336914, "learning_rate": 0.0001, "loss": 1.6238, "step": 607936 }, { "epoch": 52.44927536231884, "grad_norm": 0.3109995722770691, "learning_rate": 0.0001, "loss": 1.6218, "step": 607992 }, { "epoch": 52.45410628019324, "grad_norm": 0.2680703401565552, "learning_rate": 0.0001, "loss": 1.6246, "step": 608048 }, { "epoch": 52.45893719806763, "grad_norm": 0.40796709060668945, "learning_rate": 0.0001, "loss": 1.6237, "step": 608104 }, { "epoch": 52.46376811594203, "grad_norm": 0.3123927414417267, "learning_rate": 0.0001, "loss": 1.6221, "step": 608160 }, { "epoch": 52.468599033816425, "grad_norm": 0.6640651226043701, "learning_rate": 0.0001, "loss": 1.6225, "step": 608216 }, { "epoch": 52.47342995169082, "grad_norm": 1.8541922569274902, "learning_rate": 0.0001, "loss": 1.6282, "step": 608272 }, { "epoch": 52.47826086956522, "grad_norm": 0.5541632175445557, "learning_rate": 0.0001, "loss": 1.6209, "step": 608328 }, { "epoch": 52.48309178743961, "grad_norm": 0.3184080123901367, "learning_rate": 0.0001, "loss": 1.621, "step": 608384 }, { "epoch": 52.48792270531401, "grad_norm": 30.388683319091797, "learning_rate": 0.0001, "loss": 1.6316, "step": 608440 }, { "epoch": 52.492753623188406, "grad_norm": 0.2882585823535919, "learning_rate": 0.0001, "loss": 1.625, "step": 608496 }, { "epoch": 52.4975845410628, "grad_norm": 0.48324042558670044, "learning_rate": 0.0001, "loss": 1.6205, "step": 608552 }, { "epoch": 52.5024154589372, "grad_norm": 5.9197163581848145, "learning_rate": 0.0001, "loss": 1.6279, "step": 608608 }, { "epoch": 52.507246376811594, "grad_norm": 0.22131407260894775, "learning_rate": 0.0001, "loss": 1.6235, "step": 608664 }, { "epoch": 52.51207729468599, "grad_norm": 0.37695831060409546, "learning_rate": 0.0001, "loss": 1.6256, "step": 608720 }, { "epoch": 52.51690821256039, "grad_norm": 0.6126030683517456, "learning_rate": 0.0001, "loss": 1.6205, "step": 608776 }, { "epoch": 52.52173913043478, "grad_norm": 0.2731838822364807, "learning_rate": 0.0001, "loss": 1.6285, "step": 608832 }, { "epoch": 52.52657004830918, "grad_norm": 0.30731335282325745, "learning_rate": 0.0001, "loss": 1.6234, "step": 608888 }, { "epoch": 52.531400966183575, "grad_norm": 0.24996615946292877, "learning_rate": 0.0001, "loss": 1.6277, "step": 608944 }, { "epoch": 52.53623188405797, "grad_norm": 0.25507205724716187, "learning_rate": 0.0001, "loss": 1.629, "step": 609000 }, { "epoch": 52.54106280193237, "grad_norm": 0.34586596488952637, "learning_rate": 0.0001, "loss": 1.63, "step": 609056 }, { "epoch": 52.54589371980676, "grad_norm": 3.0287559032440186, "learning_rate": 0.0001, "loss": 1.6191, "step": 609112 }, { "epoch": 52.55072463768116, "grad_norm": 1.6934007406234741, "learning_rate": 0.0001, "loss": 1.6272, "step": 609168 }, { "epoch": 52.55555555555556, "grad_norm": 1.0386024713516235, "learning_rate": 0.0001, "loss": 1.6238, "step": 609224 }, { "epoch": 52.56038647342995, "grad_norm": 0.32752731442451477, "learning_rate": 0.0001, "loss": 1.6246, "step": 609280 }, { "epoch": 52.56521739130435, "grad_norm": 0.2597719132900238, "learning_rate": 0.0001, "loss": 1.6298, "step": 609336 }, { "epoch": 52.570048309178745, "grad_norm": 2.204646348953247, "learning_rate": 0.0001, "loss": 1.6182, "step": 609392 }, { "epoch": 52.57487922705314, "grad_norm": 0.39089125394821167, "learning_rate": 0.0001, "loss": 1.6298, "step": 609448 }, { "epoch": 52.57971014492754, "grad_norm": 3.3549046516418457, "learning_rate": 0.0001, "loss": 1.6201, "step": 609504 }, { "epoch": 52.58454106280193, "grad_norm": 1.600205659866333, "learning_rate": 0.0001, "loss": 1.6262, "step": 609560 }, { "epoch": 52.589371980676326, "grad_norm": 0.2859632670879364, "learning_rate": 0.0001, "loss": 1.6241, "step": 609616 }, { "epoch": 52.594202898550726, "grad_norm": 0.7588967680931091, "learning_rate": 0.0001, "loss": 1.6223, "step": 609672 }, { "epoch": 52.59903381642512, "grad_norm": 0.2743130922317505, "learning_rate": 0.0001, "loss": 1.6242, "step": 609728 }, { "epoch": 52.60386473429952, "grad_norm": 0.25957030057907104, "learning_rate": 0.0001, "loss": 1.6261, "step": 609784 }, { "epoch": 52.608695652173914, "grad_norm": 0.6814267635345459, "learning_rate": 0.0001, "loss": 1.6276, "step": 609840 }, { "epoch": 52.61352657004831, "grad_norm": 0.28700578212738037, "learning_rate": 0.0001, "loss": 1.6209, "step": 609896 }, { "epoch": 52.61835748792271, "grad_norm": 0.38757407665252686, "learning_rate": 0.0001, "loss": 1.6175, "step": 609952 }, { "epoch": 52.6231884057971, "grad_norm": 0.6389174461364746, "learning_rate": 0.0001, "loss": 1.6233, "step": 610008 }, { "epoch": 52.628019323671495, "grad_norm": 0.3088226020336151, "learning_rate": 0.0001, "loss": 1.6192, "step": 610064 }, { "epoch": 52.632850241545896, "grad_norm": 0.2960357367992401, "learning_rate": 0.0001, "loss": 1.6226, "step": 610120 }, { "epoch": 52.63768115942029, "grad_norm": 0.33149728178977966, "learning_rate": 0.0001, "loss": 1.6219, "step": 610176 }, { "epoch": 52.64251207729468, "grad_norm": 0.35647013783454895, "learning_rate": 0.0001, "loss": 1.6228, "step": 610232 }, { "epoch": 52.64734299516908, "grad_norm": 2.2563788890838623, "learning_rate": 0.0001, "loss": 1.6214, "step": 610288 }, { "epoch": 52.65217391304348, "grad_norm": 0.26710596680641174, "learning_rate": 0.0001, "loss": 1.6222, "step": 610344 }, { "epoch": 52.65700483091788, "grad_norm": 0.3425505757331848, "learning_rate": 0.0001, "loss": 1.6228, "step": 610400 }, { "epoch": 52.66183574879227, "grad_norm": 0.28987011313438416, "learning_rate": 0.0001, "loss": 1.6214, "step": 610456 }, { "epoch": 52.666666666666664, "grad_norm": 0.3713277578353882, "learning_rate": 0.0001, "loss": 1.6165, "step": 610512 }, { "epoch": 52.671497584541065, "grad_norm": 0.30723389983177185, "learning_rate": 0.0001, "loss": 1.6251, "step": 610568 }, { "epoch": 52.67632850241546, "grad_norm": 1.5319758653640747, "learning_rate": 0.0001, "loss": 1.6215, "step": 610624 }, { "epoch": 52.68115942028985, "grad_norm": 0.5476358532905579, "learning_rate": 0.0001, "loss": 1.6186, "step": 610680 }, { "epoch": 52.68599033816425, "grad_norm": 1.5787017345428467, "learning_rate": 0.0001, "loss": 1.6229, "step": 610736 }, { "epoch": 52.690821256038646, "grad_norm": 0.32898372411727905, "learning_rate": 0.0001, "loss": 1.6184, "step": 610792 }, { "epoch": 52.69565217391305, "grad_norm": 0.26135796308517456, "learning_rate": 0.0001, "loss": 1.6224, "step": 610848 }, { "epoch": 52.70048309178744, "grad_norm": 15.713007926940918, "learning_rate": 0.0001, "loss": 1.6146, "step": 610904 }, { "epoch": 52.70531400966183, "grad_norm": 0.31820148229599, "learning_rate": 0.0001, "loss": 1.614, "step": 610960 }, { "epoch": 52.710144927536234, "grad_norm": 3.5832314491271973, "learning_rate": 0.0001, "loss": 1.616, "step": 611016 }, { "epoch": 52.71497584541063, "grad_norm": 0.29194745421409607, "learning_rate": 0.0001, "loss": 1.6299, "step": 611072 }, { "epoch": 52.71980676328502, "grad_norm": 1.5226807594299316, "learning_rate": 0.0001, "loss": 1.616, "step": 611128 }, { "epoch": 52.72463768115942, "grad_norm": 0.7481878995895386, "learning_rate": 0.0001, "loss": 1.6166, "step": 611184 }, { "epoch": 52.729468599033815, "grad_norm": 0.2592329978942871, "learning_rate": 0.0001, "loss": 1.6231, "step": 611240 }, { "epoch": 52.734299516908216, "grad_norm": 0.28521567583084106, "learning_rate": 0.0001, "loss": 1.6273, "step": 611296 }, { "epoch": 52.73913043478261, "grad_norm": 0.262150377035141, "learning_rate": 0.0001, "loss": 1.6221, "step": 611352 }, { "epoch": 52.743961352657, "grad_norm": 0.9481955170631409, "learning_rate": 0.0001, "loss": 1.6199, "step": 611408 }, { "epoch": 52.7487922705314, "grad_norm": 1.6077423095703125, "learning_rate": 0.0001, "loss": 1.618, "step": 611464 }, { "epoch": 52.7536231884058, "grad_norm": 0.29798710346221924, "learning_rate": 0.0001, "loss": 1.6245, "step": 611520 }, { "epoch": 52.75845410628019, "grad_norm": 0.30890578031539917, "learning_rate": 0.0001, "loss": 1.6184, "step": 611576 }, { "epoch": 52.76328502415459, "grad_norm": 1.4938876628875732, "learning_rate": 0.0001, "loss": 1.6197, "step": 611632 }, { "epoch": 52.768115942028984, "grad_norm": 0.29227763414382935, "learning_rate": 0.0001, "loss": 1.6174, "step": 611688 }, { "epoch": 52.772946859903385, "grad_norm": 0.6910477876663208, "learning_rate": 0.0001, "loss": 1.6201, "step": 611744 }, { "epoch": 52.77777777777778, "grad_norm": 0.28229081630706787, "learning_rate": 0.0001, "loss": 1.6223, "step": 611800 }, { "epoch": 52.78260869565217, "grad_norm": 1.418738603591919, "learning_rate": 0.0001, "loss": 1.6261, "step": 611856 }, { "epoch": 52.78743961352657, "grad_norm": 0.9861903190612793, "learning_rate": 0.0001, "loss": 1.6176, "step": 611912 }, { "epoch": 52.792270531400966, "grad_norm": 0.31233271956443787, "learning_rate": 0.0001, "loss": 1.6255, "step": 611968 }, { "epoch": 52.79710144927536, "grad_norm": 0.2569340467453003, "learning_rate": 0.0001, "loss": 1.6178, "step": 612024 }, { "epoch": 52.80193236714976, "grad_norm": 6.040239334106445, "learning_rate": 0.0001, "loss": 1.6256, "step": 612080 }, { "epoch": 52.806763285024154, "grad_norm": 0.7343875765800476, "learning_rate": 0.0001, "loss": 1.6206, "step": 612136 }, { "epoch": 52.81159420289855, "grad_norm": 9.717411041259766, "learning_rate": 0.0001, "loss": 1.6221, "step": 612192 }, { "epoch": 52.81642512077295, "grad_norm": 0.2467852383852005, "learning_rate": 0.0001, "loss": 1.6184, "step": 612248 }, { "epoch": 52.82125603864734, "grad_norm": 0.30453789234161377, "learning_rate": 0.0001, "loss": 1.62, "step": 612304 }, { "epoch": 52.82608695652174, "grad_norm": 0.2665581703186035, "learning_rate": 0.0001, "loss": 1.6227, "step": 612360 }, { "epoch": 52.830917874396135, "grad_norm": 0.3497079312801361, "learning_rate": 0.0001, "loss": 1.6165, "step": 612416 }, { "epoch": 52.83574879227053, "grad_norm": 0.3123895227909088, "learning_rate": 0.0001, "loss": 1.6163, "step": 612472 }, { "epoch": 52.84057971014493, "grad_norm": 0.24903084337711334, "learning_rate": 0.0001, "loss": 1.6272, "step": 612528 }, { "epoch": 52.84541062801932, "grad_norm": 0.34374096989631653, "learning_rate": 0.0001, "loss": 1.6189, "step": 612584 }, { "epoch": 52.85024154589372, "grad_norm": 0.28965386748313904, "learning_rate": 0.0001, "loss": 1.622, "step": 612640 }, { "epoch": 52.85507246376812, "grad_norm": 0.6700859069824219, "learning_rate": 0.0001, "loss": 1.6192, "step": 612696 }, { "epoch": 52.85990338164251, "grad_norm": 0.3090690076351166, "learning_rate": 0.0001, "loss": 1.6184, "step": 612752 }, { "epoch": 52.86473429951691, "grad_norm": 1.2179752588272095, "learning_rate": 0.0001, "loss": 1.6231, "step": 612808 }, { "epoch": 52.869565217391305, "grad_norm": 23.00948143005371, "learning_rate": 0.0001, "loss": 1.6226, "step": 612864 }, { "epoch": 52.8743961352657, "grad_norm": 4.898801326751709, "learning_rate": 0.0001, "loss": 1.6238, "step": 612920 }, { "epoch": 52.8792270531401, "grad_norm": 0.2622915506362915, "learning_rate": 0.0001, "loss": 1.6177, "step": 612976 }, { "epoch": 52.88405797101449, "grad_norm": 0.6003240942955017, "learning_rate": 0.0001, "loss": 1.6213, "step": 613032 }, { "epoch": 52.888888888888886, "grad_norm": 0.3018016219139099, "learning_rate": 0.0001, "loss": 1.6141, "step": 613088 }, { "epoch": 52.893719806763286, "grad_norm": 0.8734008073806763, "learning_rate": 0.0001, "loss": 1.6204, "step": 613144 }, { "epoch": 52.89855072463768, "grad_norm": 0.7662169337272644, "learning_rate": 0.0001, "loss": 1.6226, "step": 613200 }, { "epoch": 52.90338164251208, "grad_norm": 0.3326142728328705, "learning_rate": 0.0001, "loss": 1.617, "step": 613256 }, { "epoch": 52.908212560386474, "grad_norm": 0.5741136074066162, "learning_rate": 0.0001, "loss": 1.6203, "step": 613312 }, { "epoch": 52.91304347826087, "grad_norm": 0.6650238633155823, "learning_rate": 0.0001, "loss": 1.6225, "step": 613368 }, { "epoch": 52.91787439613527, "grad_norm": 1.121497631072998, "learning_rate": 0.0001, "loss": 1.6176, "step": 613424 }, { "epoch": 52.92270531400966, "grad_norm": 0.2334299087524414, "learning_rate": 0.0001, "loss": 1.6126, "step": 613480 }, { "epoch": 52.927536231884055, "grad_norm": 0.39770156145095825, "learning_rate": 0.0001, "loss": 1.6191, "step": 613536 }, { "epoch": 52.932367149758456, "grad_norm": 17.55986976623535, "learning_rate": 0.0001, "loss": 1.6225, "step": 613592 }, { "epoch": 52.93719806763285, "grad_norm": 0.4150562584400177, "learning_rate": 0.0001, "loss": 1.6207, "step": 613648 }, { "epoch": 52.94202898550725, "grad_norm": 0.2725037932395935, "learning_rate": 0.0001, "loss": 1.6212, "step": 613704 }, { "epoch": 52.94685990338164, "grad_norm": 0.2948645055294037, "learning_rate": 0.0001, "loss": 1.6145, "step": 613760 }, { "epoch": 52.95169082125604, "grad_norm": 1.501024842262268, "learning_rate": 0.0001, "loss": 1.6118, "step": 613816 }, { "epoch": 52.95652173913044, "grad_norm": 1.6338344812393188, "learning_rate": 0.0001, "loss": 1.62, "step": 613872 }, { "epoch": 52.96135265700483, "grad_norm": 0.3601607084274292, "learning_rate": 0.0001, "loss": 1.6151, "step": 613928 }, { "epoch": 52.966183574879224, "grad_norm": 8.929232597351074, "learning_rate": 0.0001, "loss": 1.6167, "step": 613984 }, { "epoch": 52.971014492753625, "grad_norm": 0.28889983892440796, "learning_rate": 0.0001, "loss": 1.6253, "step": 614040 }, { "epoch": 52.97584541062802, "grad_norm": 0.4774128198623657, "learning_rate": 0.0001, "loss": 1.6172, "step": 614096 }, { "epoch": 52.98067632850242, "grad_norm": 2.2527053356170654, "learning_rate": 0.0001, "loss": 1.6104, "step": 614152 }, { "epoch": 52.98550724637681, "grad_norm": 0.2770519554615021, "learning_rate": 0.0001, "loss": 1.6129, "step": 614208 }, { "epoch": 52.990338164251206, "grad_norm": 0.31407931447029114, "learning_rate": 0.0001, "loss": 1.6171, "step": 614264 }, { "epoch": 52.99516908212561, "grad_norm": 0.38487866520881653, "learning_rate": 0.0001, "loss": 1.6227, "step": 614320 }, { "epoch": 53.0, "grad_norm": 0.6352516412734985, "learning_rate": 0.0001, "loss": 1.6214, "step": 614376 }, { "epoch": 53.00483091787439, "grad_norm": 1.817868947982788, "learning_rate": 0.0001, "loss": 1.6131, "step": 614432 }, { "epoch": 53.009661835748794, "grad_norm": 0.2514823377132416, "learning_rate": 0.0001, "loss": 1.6071, "step": 614488 }, { "epoch": 53.01449275362319, "grad_norm": 0.275421679019928, "learning_rate": 0.0001, "loss": 1.6146, "step": 614544 }, { "epoch": 53.01932367149758, "grad_norm": 0.26059621572494507, "learning_rate": 0.0001, "loss": 1.607, "step": 614600 }, { "epoch": 53.02415458937198, "grad_norm": 1.0122089385986328, "learning_rate": 0.0001, "loss": 1.6139, "step": 614656 }, { "epoch": 53.028985507246375, "grad_norm": 1.2309614419937134, "learning_rate": 0.0001, "loss": 1.6078, "step": 614712 }, { "epoch": 53.033816425120776, "grad_norm": 2.033308982849121, "learning_rate": 0.0001, "loss": 1.611, "step": 614768 }, { "epoch": 53.03864734299517, "grad_norm": 0.3678329885005951, "learning_rate": 0.0001, "loss": 1.6126, "step": 614824 }, { "epoch": 53.04347826086956, "grad_norm": 0.3138774633407593, "learning_rate": 0.0001, "loss": 1.6073, "step": 614880 }, { "epoch": 53.04830917874396, "grad_norm": 0.3114689588546753, "learning_rate": 0.0001, "loss": 1.6052, "step": 614936 }, { "epoch": 53.05314009661836, "grad_norm": 4.979822635650635, "learning_rate": 0.0001, "loss": 1.6098, "step": 614992 }, { "epoch": 53.05797101449275, "grad_norm": 0.2543751001358032, "learning_rate": 0.0001, "loss": 1.6086, "step": 615048 }, { "epoch": 53.06280193236715, "grad_norm": 6.088794231414795, "learning_rate": 0.0001, "loss": 1.6119, "step": 615104 }, { "epoch": 53.067632850241544, "grad_norm": 0.2866100072860718, "learning_rate": 0.0001, "loss": 1.6127, "step": 615160 }, { "epoch": 53.072463768115945, "grad_norm": 0.3603365123271942, "learning_rate": 0.0001, "loss": 1.6109, "step": 615216 }, { "epoch": 53.07729468599034, "grad_norm": 0.30078527331352234, "learning_rate": 0.0001, "loss": 1.6091, "step": 615272 }, { "epoch": 53.08212560386473, "grad_norm": 0.2688330411911011, "learning_rate": 0.0001, "loss": 1.6105, "step": 615328 }, { "epoch": 53.08695652173913, "grad_norm": 48.53907012939453, "learning_rate": 0.0001, "loss": 1.6078, "step": 615384 }, { "epoch": 53.091787439613526, "grad_norm": 0.7432712316513062, "learning_rate": 0.0001, "loss": 1.6098, "step": 615440 }, { "epoch": 53.09661835748792, "grad_norm": 0.2294136881828308, "learning_rate": 0.0001, "loss": 1.6132, "step": 615496 }, { "epoch": 53.10144927536232, "grad_norm": 0.3295147716999054, "learning_rate": 0.0001, "loss": 1.6057, "step": 615552 }, { "epoch": 53.106280193236714, "grad_norm": 0.35209184885025024, "learning_rate": 0.0001, "loss": 1.6158, "step": 615608 }, { "epoch": 53.111111111111114, "grad_norm": 0.25463587045669556, "learning_rate": 0.0001, "loss": 1.6104, "step": 615664 }, { "epoch": 53.11594202898551, "grad_norm": 22.555225372314453, "learning_rate": 0.0001, "loss": 1.6113, "step": 615720 }, { "epoch": 53.1207729468599, "grad_norm": 0.7068703770637512, "learning_rate": 0.0001, "loss": 1.6131, "step": 615776 }, { "epoch": 53.1256038647343, "grad_norm": 0.29490742087364197, "learning_rate": 0.0001, "loss": 1.6143, "step": 615832 }, { "epoch": 53.130434782608695, "grad_norm": 0.38746747374534607, "learning_rate": 0.0001, "loss": 1.6133, "step": 615888 }, { "epoch": 53.13526570048309, "grad_norm": 1.3626865148544312, "learning_rate": 0.0001, "loss": 1.6076, "step": 615944 }, { "epoch": 53.14009661835749, "grad_norm": 0.3175783157348633, "learning_rate": 0.0001, "loss": 1.6071, "step": 616000 }, { "epoch": 53.14492753623188, "grad_norm": 1.6877449750900269, "learning_rate": 0.0001, "loss": 1.6129, "step": 616056 }, { "epoch": 53.14975845410628, "grad_norm": 0.285608172416687, "learning_rate": 0.0001, "loss": 1.6141, "step": 616112 }, { "epoch": 53.15458937198068, "grad_norm": 0.2439640760421753, "learning_rate": 0.0001, "loss": 1.6101, "step": 616168 }, { "epoch": 53.15942028985507, "grad_norm": 0.2720899283885956, "learning_rate": 0.0001, "loss": 1.6088, "step": 616224 }, { "epoch": 53.16425120772947, "grad_norm": 0.9761956930160522, "learning_rate": 0.0001, "loss": 1.6062, "step": 616280 }, { "epoch": 53.169082125603865, "grad_norm": 3.196282386779785, "learning_rate": 0.0001, "loss": 1.6058, "step": 616336 }, { "epoch": 53.17391304347826, "grad_norm": 0.32755520939826965, "learning_rate": 0.0001, "loss": 1.6057, "step": 616392 }, { "epoch": 53.17874396135266, "grad_norm": 0.951578676700592, "learning_rate": 0.0001, "loss": 1.608, "step": 616448 }, { "epoch": 53.18357487922705, "grad_norm": 0.24971160292625427, "learning_rate": 0.0001, "loss": 1.6098, "step": 616504 }, { "epoch": 53.18840579710145, "grad_norm": 1.4820188283920288, "learning_rate": 0.0001, "loss": 1.61, "step": 616560 }, { "epoch": 53.193236714975846, "grad_norm": 0.29804760217666626, "learning_rate": 0.0001, "loss": 1.5995, "step": 616616 }, { "epoch": 53.19806763285024, "grad_norm": 0.25496920943260193, "learning_rate": 0.0001, "loss": 1.6116, "step": 616672 }, { "epoch": 53.20289855072464, "grad_norm": 0.3531654179096222, "learning_rate": 0.0001, "loss": 1.6053, "step": 616728 }, { "epoch": 53.207729468599034, "grad_norm": 0.24462485313415527, "learning_rate": 0.0001, "loss": 1.6048, "step": 616784 }, { "epoch": 53.21256038647343, "grad_norm": 0.2877526879310608, "learning_rate": 0.0001, "loss": 1.6115, "step": 616840 }, { "epoch": 53.21739130434783, "grad_norm": 0.27727967500686646, "learning_rate": 0.0001, "loss": 1.6171, "step": 616896 }, { "epoch": 53.22222222222222, "grad_norm": 0.36959391832351685, "learning_rate": 0.0001, "loss": 1.611, "step": 616952 }, { "epoch": 53.227053140096615, "grad_norm": 0.5490204095840454, "learning_rate": 0.0001, "loss": 1.613, "step": 617008 }, { "epoch": 53.231884057971016, "grad_norm": 0.8288894891738892, "learning_rate": 0.0001, "loss": 1.614, "step": 617064 }, { "epoch": 53.23671497584541, "grad_norm": 0.30810850858688354, "learning_rate": 0.0001, "loss": 1.6148, "step": 617120 }, { "epoch": 53.24154589371981, "grad_norm": 0.3204721212387085, "learning_rate": 0.0001, "loss": 1.6123, "step": 617176 }, { "epoch": 53.2463768115942, "grad_norm": 0.2773438096046448, "learning_rate": 0.0001, "loss": 1.613, "step": 617232 }, { "epoch": 53.2512077294686, "grad_norm": 0.3788485527038574, "learning_rate": 0.0001, "loss": 1.6091, "step": 617288 }, { "epoch": 53.256038647343, "grad_norm": 0.6024500727653503, "learning_rate": 0.0001, "loss": 1.6084, "step": 617344 }, { "epoch": 53.26086956521739, "grad_norm": 0.37744078040122986, "learning_rate": 0.0001, "loss": 1.612, "step": 617400 }, { "epoch": 53.265700483091784, "grad_norm": 0.2912192940711975, "learning_rate": 0.0001, "loss": 1.6147, "step": 617456 }, { "epoch": 53.270531400966185, "grad_norm": 0.4811250567436218, "learning_rate": 0.0001, "loss": 1.6066, "step": 617512 }, { "epoch": 53.27536231884058, "grad_norm": 0.2746502459049225, "learning_rate": 0.0001, "loss": 1.6124, "step": 617568 }, { "epoch": 53.28019323671498, "grad_norm": 6.897067546844482, "learning_rate": 0.0001, "loss": 1.6164, "step": 617624 }, { "epoch": 53.28502415458937, "grad_norm": 0.30242225527763367, "learning_rate": 0.0001, "loss": 1.6149, "step": 617680 }, { "epoch": 53.289855072463766, "grad_norm": 0.2558978796005249, "learning_rate": 0.0001, "loss": 1.6076, "step": 617736 }, { "epoch": 53.29468599033817, "grad_norm": 0.2984425127506256, "learning_rate": 0.0001, "loss": 1.6063, "step": 617792 }, { "epoch": 53.29951690821256, "grad_norm": 0.25561511516571045, "learning_rate": 0.0001, "loss": 1.6092, "step": 617848 }, { "epoch": 53.30434782608695, "grad_norm": 0.36337482929229736, "learning_rate": 0.0001, "loss": 1.619, "step": 617904 }, { "epoch": 53.309178743961354, "grad_norm": 0.848654568195343, "learning_rate": 0.0001, "loss": 1.6035, "step": 617960 }, { "epoch": 53.31400966183575, "grad_norm": 0.37902387976646423, "learning_rate": 0.0001, "loss": 1.6082, "step": 618016 }, { "epoch": 53.31884057971015, "grad_norm": 0.2894681990146637, "learning_rate": 0.0001, "loss": 1.6076, "step": 618072 }, { "epoch": 53.32367149758454, "grad_norm": 0.3529329001903534, "learning_rate": 0.0001, "loss": 1.6143, "step": 618128 }, { "epoch": 53.328502415458935, "grad_norm": 0.2290400117635727, "learning_rate": 0.0001, "loss": 1.6126, "step": 618184 }, { "epoch": 53.333333333333336, "grad_norm": 0.27006590366363525, "learning_rate": 0.0001, "loss": 1.6154, "step": 618240 }, { "epoch": 53.33816425120773, "grad_norm": 1.923215389251709, "learning_rate": 0.0001, "loss": 1.6092, "step": 618296 }, { "epoch": 53.34299516908212, "grad_norm": 0.32638490200042725, "learning_rate": 0.0001, "loss": 1.6162, "step": 618352 }, { "epoch": 53.34782608695652, "grad_norm": 0.29125118255615234, "learning_rate": 0.0001, "loss": 1.6204, "step": 618408 }, { "epoch": 53.35265700483092, "grad_norm": 3.4986839294433594, "learning_rate": 0.0001, "loss": 1.6066, "step": 618464 }, { "epoch": 53.35748792270532, "grad_norm": 0.2693898379802704, "learning_rate": 0.0001, "loss": 1.6069, "step": 618520 }, { "epoch": 53.36231884057971, "grad_norm": 0.2758204936981201, "learning_rate": 0.0001, "loss": 1.6091, "step": 618576 }, { "epoch": 53.367149758454104, "grad_norm": 1.7987353801727295, "learning_rate": 0.0001, "loss": 1.6057, "step": 618632 }, { "epoch": 53.371980676328505, "grad_norm": 0.33074575662612915, "learning_rate": 0.0001, "loss": 1.6127, "step": 618688 }, { "epoch": 53.3768115942029, "grad_norm": 0.5495596528053284, "learning_rate": 0.0001, "loss": 1.6129, "step": 618744 }, { "epoch": 53.38164251207729, "grad_norm": 0.5509549379348755, "learning_rate": 0.0001, "loss": 1.6054, "step": 618800 }, { "epoch": 53.38647342995169, "grad_norm": 1.455987811088562, "learning_rate": 0.0001, "loss": 1.6089, "step": 618856 }, { "epoch": 53.391304347826086, "grad_norm": 0.6101462244987488, "learning_rate": 0.0001, "loss": 1.6151, "step": 618912 }, { "epoch": 53.39613526570048, "grad_norm": 0.2518075406551361, "learning_rate": 0.0001, "loss": 1.6114, "step": 618968 }, { "epoch": 53.40096618357488, "grad_norm": 0.8024610877037048, "learning_rate": 0.0001, "loss": 1.6103, "step": 619024 }, { "epoch": 53.405797101449274, "grad_norm": 4.009260177612305, "learning_rate": 0.0001, "loss": 1.599, "step": 619080 }, { "epoch": 53.410628019323674, "grad_norm": 1.3296899795532227, "learning_rate": 0.0001, "loss": 1.6068, "step": 619136 }, { "epoch": 53.41545893719807, "grad_norm": 1.3715002536773682, "learning_rate": 0.0001, "loss": 1.6082, "step": 619192 }, { "epoch": 53.42028985507246, "grad_norm": 1.3646429777145386, "learning_rate": 0.0001, "loss": 1.6146, "step": 619248 }, { "epoch": 53.42512077294686, "grad_norm": 1.3766738176345825, "learning_rate": 0.0001, "loss": 1.6119, "step": 619304 }, { "epoch": 53.429951690821255, "grad_norm": 0.4382825791835785, "learning_rate": 0.0001, "loss": 1.6097, "step": 619360 }, { "epoch": 53.43478260869565, "grad_norm": 0.47630441188812256, "learning_rate": 0.0001, "loss": 1.6131, "step": 619416 }, { "epoch": 53.43961352657005, "grad_norm": 0.2645590007305145, "learning_rate": 0.0001, "loss": 1.6089, "step": 619472 }, { "epoch": 53.44444444444444, "grad_norm": 1.0665777921676636, "learning_rate": 0.0001, "loss": 1.6122, "step": 619528 }, { "epoch": 53.44927536231884, "grad_norm": 0.22765222191810608, "learning_rate": 0.0001, "loss": 1.6055, "step": 619584 }, { "epoch": 53.45410628019324, "grad_norm": 0.37355872988700867, "learning_rate": 0.0001, "loss": 1.6085, "step": 619640 }, { "epoch": 53.45893719806763, "grad_norm": 0.8160785436630249, "learning_rate": 0.0001, "loss": 1.6108, "step": 619696 }, { "epoch": 53.46376811594203, "grad_norm": 0.28802430629730225, "learning_rate": 0.0001, "loss": 1.6072, "step": 619752 }, { "epoch": 53.468599033816425, "grad_norm": 1.1980456113815308, "learning_rate": 0.0001, "loss": 1.6044, "step": 619808 }, { "epoch": 53.47342995169082, "grad_norm": 0.3316384255886078, "learning_rate": 0.0001, "loss": 1.6064, "step": 619864 }, { "epoch": 53.47826086956522, "grad_norm": 0.5961681604385376, "learning_rate": 0.0001, "loss": 1.6169, "step": 619920 }, { "epoch": 53.48309178743961, "grad_norm": 0.3760119378566742, "learning_rate": 0.0001, "loss": 1.6114, "step": 619976 }, { "epoch": 53.48792270531401, "grad_norm": 0.9784485697746277, "learning_rate": 0.0001, "loss": 1.609, "step": 620032 }, { "epoch": 53.492753623188406, "grad_norm": 0.33177223801612854, "learning_rate": 0.0001, "loss": 1.6006, "step": 620088 }, { "epoch": 53.4975845410628, "grad_norm": 0.2499803900718689, "learning_rate": 0.0001, "loss": 1.6151, "step": 620144 }, { "epoch": 53.5024154589372, "grad_norm": 0.2857021987438202, "learning_rate": 0.0001, "loss": 1.6128, "step": 620200 }, { "epoch": 53.507246376811594, "grad_norm": 2.4885220527648926, "learning_rate": 0.0001, "loss": 1.6083, "step": 620256 }, { "epoch": 53.51207729468599, "grad_norm": 3.023359537124634, "learning_rate": 0.0001, "loss": 1.6093, "step": 620312 }, { "epoch": 53.51690821256039, "grad_norm": 0.28634023666381836, "learning_rate": 0.0001, "loss": 1.6069, "step": 620368 }, { "epoch": 53.52173913043478, "grad_norm": 0.4421958923339844, "learning_rate": 0.0001, "loss": 1.6087, "step": 620424 }, { "epoch": 53.52657004830918, "grad_norm": 0.2849894165992737, "learning_rate": 0.0001, "loss": 1.6093, "step": 620480 }, { "epoch": 53.531400966183575, "grad_norm": 0.32624486088752747, "learning_rate": 0.0001, "loss": 1.6088, "step": 620536 }, { "epoch": 53.53623188405797, "grad_norm": 2.5893516540527344, "learning_rate": 0.0001, "loss": 1.6088, "step": 620592 }, { "epoch": 53.54106280193237, "grad_norm": 0.28805267810821533, "learning_rate": 0.0001, "loss": 1.6024, "step": 620648 }, { "epoch": 53.54589371980676, "grad_norm": 0.2570841312408447, "learning_rate": 0.0001, "loss": 1.6061, "step": 620704 }, { "epoch": 53.55072463768116, "grad_norm": 2.708014965057373, "learning_rate": 0.0001, "loss": 1.6108, "step": 620760 }, { "epoch": 53.55555555555556, "grad_norm": 0.9658994674682617, "learning_rate": 0.0001, "loss": 1.6068, "step": 620816 }, { "epoch": 53.56038647342995, "grad_norm": 1.5797643661499023, "learning_rate": 0.0001, "loss": 1.611, "step": 620872 }, { "epoch": 53.56521739130435, "grad_norm": 0.4920995533466339, "learning_rate": 0.0001, "loss": 1.609, "step": 620928 }, { "epoch": 53.570048309178745, "grad_norm": 0.26736125349998474, "learning_rate": 0.0001, "loss": 1.6117, "step": 620984 }, { "epoch": 53.57487922705314, "grad_norm": 0.2552357017993927, "learning_rate": 0.0001, "loss": 1.6086, "step": 621040 }, { "epoch": 53.57971014492754, "grad_norm": 1.8550673723220825, "learning_rate": 0.0001, "loss": 1.6074, "step": 621096 }, { "epoch": 53.58454106280193, "grad_norm": 0.7760961055755615, "learning_rate": 0.0001, "loss": 1.606, "step": 621152 }, { "epoch": 53.589371980676326, "grad_norm": 1.6466834545135498, "learning_rate": 0.0001, "loss": 1.6027, "step": 621208 }, { "epoch": 53.594202898550726, "grad_norm": 0.40948358178138733, "learning_rate": 0.0001, "loss": 1.6066, "step": 621264 }, { "epoch": 53.59903381642512, "grad_norm": 0.27771925926208496, "learning_rate": 0.0001, "loss": 1.6025, "step": 621320 }, { "epoch": 53.60386473429952, "grad_norm": 0.25140196084976196, "learning_rate": 0.0001, "loss": 1.6078, "step": 621376 }, { "epoch": 53.608695652173914, "grad_norm": 0.3163756728172302, "learning_rate": 0.0001, "loss": 1.6093, "step": 621432 }, { "epoch": 53.61352657004831, "grad_norm": 2.584184408187866, "learning_rate": 0.0001, "loss": 1.6175, "step": 621488 }, { "epoch": 53.61835748792271, "grad_norm": 2.9850893020629883, "learning_rate": 0.0001, "loss": 1.6172, "step": 621544 }, { "epoch": 53.6231884057971, "grad_norm": 0.7785676121711731, "learning_rate": 0.0001, "loss": 1.6077, "step": 621600 }, { "epoch": 53.628019323671495, "grad_norm": 0.8889927268028259, "learning_rate": 0.0001, "loss": 1.6102, "step": 621656 }, { "epoch": 53.632850241545896, "grad_norm": 0.2973834276199341, "learning_rate": 0.0001, "loss": 1.6057, "step": 621712 }, { "epoch": 53.63768115942029, "grad_norm": 0.3340192437171936, "learning_rate": 0.0001, "loss": 1.6045, "step": 621768 }, { "epoch": 53.64251207729468, "grad_norm": 0.28479963541030884, "learning_rate": 0.0001, "loss": 1.6085, "step": 621824 }, { "epoch": 53.64734299516908, "grad_norm": 0.2740018665790558, "learning_rate": 0.0001, "loss": 1.605, "step": 621880 }, { "epoch": 53.65217391304348, "grad_norm": 0.49793222546577454, "learning_rate": 0.0001, "loss": 1.6054, "step": 621936 }, { "epoch": 53.65700483091788, "grad_norm": 0.42369046807289124, "learning_rate": 0.0001, "loss": 1.6141, "step": 621992 }, { "epoch": 53.66183574879227, "grad_norm": 0.5475583672523499, "learning_rate": 0.0001, "loss": 1.6032, "step": 622048 }, { "epoch": 53.666666666666664, "grad_norm": 1.3521236181259155, "learning_rate": 0.0001, "loss": 1.6032, "step": 622104 }, { "epoch": 53.671497584541065, "grad_norm": 0.27459555864334106, "learning_rate": 0.0001, "loss": 1.605, "step": 622160 }, { "epoch": 53.67632850241546, "grad_norm": 0.9680837988853455, "learning_rate": 0.0001, "loss": 1.6103, "step": 622216 }, { "epoch": 53.68115942028985, "grad_norm": 0.34755340218544006, "learning_rate": 0.0001, "loss": 1.607, "step": 622272 }, { "epoch": 53.68599033816425, "grad_norm": 2.2046735286712646, "learning_rate": 0.0001, "loss": 1.6018, "step": 622328 }, { "epoch": 53.690821256038646, "grad_norm": 0.2979224622249603, "learning_rate": 0.0001, "loss": 1.6082, "step": 622384 }, { "epoch": 53.69565217391305, "grad_norm": 0.29367387294769287, "learning_rate": 0.0001, "loss": 1.6048, "step": 622440 }, { "epoch": 53.70048309178744, "grad_norm": 0.29367345571517944, "learning_rate": 0.0001, "loss": 1.6072, "step": 622496 }, { "epoch": 53.70531400966183, "grad_norm": 0.6783660054206848, "learning_rate": 0.0001, "loss": 1.6139, "step": 622552 }, { "epoch": 53.710144927536234, "grad_norm": 0.273450642824173, "learning_rate": 0.0001, "loss": 1.6066, "step": 622608 }, { "epoch": 53.71497584541063, "grad_norm": 0.3159538805484772, "learning_rate": 0.0001, "loss": 1.6114, "step": 622664 }, { "epoch": 53.71980676328502, "grad_norm": 1.0674004554748535, "learning_rate": 0.0001, "loss": 1.6088, "step": 622720 }, { "epoch": 53.72463768115942, "grad_norm": 1.7798784971237183, "learning_rate": 0.0001, "loss": 1.6039, "step": 622776 }, { "epoch": 53.729468599033815, "grad_norm": 0.2806278467178345, "learning_rate": 0.0001, "loss": 1.6056, "step": 622832 }, { "epoch": 53.734299516908216, "grad_norm": 0.44711774587631226, "learning_rate": 0.0001, "loss": 1.6084, "step": 622888 }, { "epoch": 53.73913043478261, "grad_norm": 1.1626936197280884, "learning_rate": 0.0001, "loss": 1.6029, "step": 622944 }, { "epoch": 53.743961352657, "grad_norm": 3.330331325531006, "learning_rate": 0.0001, "loss": 1.6114, "step": 623000 }, { "epoch": 53.7487922705314, "grad_norm": 0.5338055491447449, "learning_rate": 0.0001, "loss": 1.6114, "step": 623056 }, { "epoch": 53.7536231884058, "grad_norm": 0.36691802740097046, "learning_rate": 0.0001, "loss": 1.6091, "step": 623112 }, { "epoch": 53.75845410628019, "grad_norm": 0.28180935978889465, "learning_rate": 0.0001, "loss": 1.6092, "step": 623168 }, { "epoch": 53.76328502415459, "grad_norm": 0.41328710317611694, "learning_rate": 0.0001, "loss": 1.6055, "step": 623224 }, { "epoch": 53.768115942028984, "grad_norm": 0.5110811591148376, "learning_rate": 0.0001, "loss": 1.6118, "step": 623280 }, { "epoch": 53.772946859903385, "grad_norm": 0.6360756158828735, "learning_rate": 0.0001, "loss": 1.6114, "step": 623336 }, { "epoch": 53.77777777777778, "grad_norm": 3.1869149208068848, "learning_rate": 0.0001, "loss": 1.6091, "step": 623392 }, { "epoch": 53.78260869565217, "grad_norm": 1.7870045900344849, "learning_rate": 0.0001, "loss": 1.6081, "step": 623448 }, { "epoch": 53.78743961352657, "grad_norm": 1.6960760354995728, "learning_rate": 0.0001, "loss": 1.602, "step": 623504 }, { "epoch": 53.792270531400966, "grad_norm": 70.3484115600586, "learning_rate": 0.0001, "loss": 1.6119, "step": 623560 }, { "epoch": 53.79710144927536, "grad_norm": 0.2801092565059662, "learning_rate": 0.0001, "loss": 1.6052, "step": 623616 }, { "epoch": 53.80193236714976, "grad_norm": 0.2731032967567444, "learning_rate": 0.0001, "loss": 1.6088, "step": 623672 }, { "epoch": 53.806763285024154, "grad_norm": 0.32689666748046875, "learning_rate": 0.0001, "loss": 1.6043, "step": 623728 }, { "epoch": 53.81159420289855, "grad_norm": 3.8599534034729004, "learning_rate": 0.0001, "loss": 1.6054, "step": 623784 }, { "epoch": 53.81642512077295, "grad_norm": 1.7656059265136719, "learning_rate": 0.0001, "loss": 1.6149, "step": 623840 }, { "epoch": 53.82125603864734, "grad_norm": 1.0321673154830933, "learning_rate": 0.0001, "loss": 1.6101, "step": 623896 }, { "epoch": 53.82608695652174, "grad_norm": 0.2927975356578827, "learning_rate": 0.0001, "loss": 1.6104, "step": 623952 }, { "epoch": 53.830917874396135, "grad_norm": 0.918186366558075, "learning_rate": 0.0001, "loss": 1.6057, "step": 624008 }, { "epoch": 53.83574879227053, "grad_norm": 0.25207510590553284, "learning_rate": 0.0001, "loss": 1.6096, "step": 624064 }, { "epoch": 53.84057971014493, "grad_norm": 0.8498697280883789, "learning_rate": 0.0001, "loss": 1.608, "step": 624120 }, { "epoch": 53.84541062801932, "grad_norm": 0.977336049079895, "learning_rate": 0.0001, "loss": 1.6115, "step": 624176 }, { "epoch": 53.85024154589372, "grad_norm": 31.43777847290039, "learning_rate": 0.0001, "loss": 1.6074, "step": 624232 }, { "epoch": 53.85507246376812, "grad_norm": 2.1555464267730713, "learning_rate": 0.0001, "loss": 1.6097, "step": 624288 }, { "epoch": 53.85990338164251, "grad_norm": 1.2258323431015015, "learning_rate": 0.0001, "loss": 1.6108, "step": 624344 }, { "epoch": 53.86473429951691, "grad_norm": 0.3260473608970642, "learning_rate": 0.0001, "loss": 1.6115, "step": 624400 }, { "epoch": 53.869565217391305, "grad_norm": 0.3182932436466217, "learning_rate": 0.0001, "loss": 1.6009, "step": 624456 }, { "epoch": 53.8743961352657, "grad_norm": 0.39077726006507874, "learning_rate": 0.0001, "loss": 1.6085, "step": 624512 }, { "epoch": 53.8792270531401, "grad_norm": 0.31768593192100525, "learning_rate": 0.0001, "loss": 1.6058, "step": 624568 }, { "epoch": 53.88405797101449, "grad_norm": 0.38129085302352905, "learning_rate": 0.0001, "loss": 1.6096, "step": 624624 }, { "epoch": 53.888888888888886, "grad_norm": 0.6533076167106628, "learning_rate": 0.0001, "loss": 1.6062, "step": 624680 }, { "epoch": 53.893719806763286, "grad_norm": 0.9170658588409424, "learning_rate": 0.0001, "loss": 1.6047, "step": 624736 }, { "epoch": 53.89855072463768, "grad_norm": 0.31055402755737305, "learning_rate": 0.0001, "loss": 1.6033, "step": 624792 }, { "epoch": 53.90338164251208, "grad_norm": 0.6105480194091797, "learning_rate": 0.0001, "loss": 1.6121, "step": 624848 }, { "epoch": 53.908212560386474, "grad_norm": 0.2634179890155792, "learning_rate": 0.0001, "loss": 1.6068, "step": 624904 }, { "epoch": 53.91304347826087, "grad_norm": 0.24867261946201324, "learning_rate": 0.0001, "loss": 1.6011, "step": 624960 }, { "epoch": 53.91787439613527, "grad_norm": 0.43336713314056396, "learning_rate": 0.0001, "loss": 1.6074, "step": 625016 }, { "epoch": 53.92270531400966, "grad_norm": 3.17081356048584, "learning_rate": 0.0001, "loss": 1.6139, "step": 625072 }, { "epoch": 53.927536231884055, "grad_norm": 1.4041420221328735, "learning_rate": 0.0001, "loss": 1.6156, "step": 625128 }, { "epoch": 53.932367149758456, "grad_norm": 0.33310747146606445, "learning_rate": 0.0001, "loss": 1.6075, "step": 625184 }, { "epoch": 53.93719806763285, "grad_norm": 0.86329585313797, "learning_rate": 0.0001, "loss": 1.6073, "step": 625240 }, { "epoch": 53.94202898550725, "grad_norm": 0.5687943696975708, "learning_rate": 0.0001, "loss": 1.6061, "step": 625296 }, { "epoch": 53.94685990338164, "grad_norm": 0.39680880308151245, "learning_rate": 0.0001, "loss": 1.6051, "step": 625352 }, { "epoch": 53.95169082125604, "grad_norm": 1.6389135122299194, "learning_rate": 0.0001, "loss": 1.6064, "step": 625408 }, { "epoch": 53.95652173913044, "grad_norm": 0.34466972947120667, "learning_rate": 0.0001, "loss": 1.602, "step": 625464 }, { "epoch": 53.96135265700483, "grad_norm": 0.2862292230129242, "learning_rate": 0.0001, "loss": 1.6121, "step": 625520 }, { "epoch": 53.966183574879224, "grad_norm": 0.2910974323749542, "learning_rate": 0.0001, "loss": 1.6028, "step": 625576 }, { "epoch": 53.971014492753625, "grad_norm": 3.5177853107452393, "learning_rate": 0.0001, "loss": 1.6126, "step": 625632 }, { "epoch": 53.97584541062802, "grad_norm": 0.24396534264087677, "learning_rate": 0.0001, "loss": 1.613, "step": 625688 }, { "epoch": 53.98067632850242, "grad_norm": 0.2746293842792511, "learning_rate": 0.0001, "loss": 1.6088, "step": 625744 }, { "epoch": 53.98550724637681, "grad_norm": 0.2521669566631317, "learning_rate": 0.0001, "loss": 1.604, "step": 625800 }, { "epoch": 53.990338164251206, "grad_norm": 1.0140031576156616, "learning_rate": 0.0001, "loss": 1.6002, "step": 625856 }, { "epoch": 53.99516908212561, "grad_norm": 3.032520294189453, "learning_rate": 0.0001, "loss": 1.6067, "step": 625912 }, { "epoch": 54.0, "grad_norm": 0.2754550576210022, "learning_rate": 0.0001, "loss": 1.6025, "step": 625968 }, { "epoch": 54.00483091787439, "grad_norm": 0.3616478145122528, "learning_rate": 0.0001, "loss": 1.6018, "step": 626024 }, { "epoch": 54.009661835748794, "grad_norm": 0.3297947645187378, "learning_rate": 0.0001, "loss": 1.594, "step": 626080 }, { "epoch": 54.01449275362319, "grad_norm": 2.164200782775879, "learning_rate": 0.0001, "loss": 1.596, "step": 626136 }, { "epoch": 54.01932367149758, "grad_norm": 0.2711178958415985, "learning_rate": 0.0001, "loss": 1.6003, "step": 626192 }, { "epoch": 54.02415458937198, "grad_norm": 0.30166110396385193, "learning_rate": 0.0001, "loss": 1.602, "step": 626248 }, { "epoch": 54.028985507246375, "grad_norm": 0.24250537157058716, "learning_rate": 0.0001, "loss": 1.6023, "step": 626304 }, { "epoch": 54.033816425120776, "grad_norm": 0.258181631565094, "learning_rate": 0.0001, "loss": 1.6019, "step": 626360 }, { "epoch": 54.03864734299517, "grad_norm": 0.25345322489738464, "learning_rate": 0.0001, "loss": 1.6058, "step": 626416 }, { "epoch": 54.04347826086956, "grad_norm": 1.273288607597351, "learning_rate": 0.0001, "loss": 1.604, "step": 626472 }, { "epoch": 54.04830917874396, "grad_norm": 0.2798645794391632, "learning_rate": 0.0001, "loss": 1.5962, "step": 626528 }, { "epoch": 54.05314009661836, "grad_norm": 1.0866791009902954, "learning_rate": 0.0001, "loss": 1.5922, "step": 626584 }, { "epoch": 54.05797101449275, "grad_norm": 0.4315543472766876, "learning_rate": 0.0001, "loss": 1.6013, "step": 626640 }, { "epoch": 54.06280193236715, "grad_norm": 1.0175254344940186, "learning_rate": 0.0001, "loss": 1.5981, "step": 626696 }, { "epoch": 54.067632850241544, "grad_norm": 0.293344110250473, "learning_rate": 0.0001, "loss": 1.5978, "step": 626752 }, { "epoch": 54.072463768115945, "grad_norm": 0.3018926680088043, "learning_rate": 0.0001, "loss": 1.6007, "step": 626808 }, { "epoch": 54.07729468599034, "grad_norm": 0.48064514994621277, "learning_rate": 0.0001, "loss": 1.6057, "step": 626864 }, { "epoch": 54.08212560386473, "grad_norm": 0.8345780968666077, "learning_rate": 0.0001, "loss": 1.6006, "step": 626920 }, { "epoch": 54.08695652173913, "grad_norm": 0.25655296444892883, "learning_rate": 0.0001, "loss": 1.5963, "step": 626976 }, { "epoch": 54.091787439613526, "grad_norm": 0.9479592442512512, "learning_rate": 0.0001, "loss": 1.5981, "step": 627032 }, { "epoch": 54.09661835748792, "grad_norm": 0.23186352849006653, "learning_rate": 0.0001, "loss": 1.6033, "step": 627088 }, { "epoch": 54.10144927536232, "grad_norm": 0.2643777132034302, "learning_rate": 0.0001, "loss": 1.5965, "step": 627144 }, { "epoch": 54.106280193236714, "grad_norm": 11.479449272155762, "learning_rate": 0.0001, "loss": 1.5969, "step": 627200 }, { "epoch": 54.111111111111114, "grad_norm": 6.679366588592529, "learning_rate": 0.0001, "loss": 1.5994, "step": 627256 }, { "epoch": 54.11594202898551, "grad_norm": 0.3403274118900299, "learning_rate": 0.0001, "loss": 1.5988, "step": 627312 }, { "epoch": 54.1207729468599, "grad_norm": 6.973695755004883, "learning_rate": 0.0001, "loss": 1.6045, "step": 627368 }, { "epoch": 54.1256038647343, "grad_norm": 4.317837715148926, "learning_rate": 0.0001, "loss": 1.5984, "step": 627424 }, { "epoch": 54.130434782608695, "grad_norm": 0.47437071800231934, "learning_rate": 0.0001, "loss": 1.5904, "step": 627480 }, { "epoch": 54.13526570048309, "grad_norm": 1.0407384634017944, "learning_rate": 0.0001, "loss": 1.5958, "step": 627536 }, { "epoch": 54.14009661835749, "grad_norm": 0.27172377705574036, "learning_rate": 0.0001, "loss": 1.6024, "step": 627592 }, { "epoch": 54.14492753623188, "grad_norm": 9.059698104858398, "learning_rate": 0.0001, "loss": 1.598, "step": 627648 }, { "epoch": 54.14975845410628, "grad_norm": 1.5495800971984863, "learning_rate": 0.0001, "loss": 1.6025, "step": 627704 }, { "epoch": 54.15458937198068, "grad_norm": 0.34374189376831055, "learning_rate": 0.0001, "loss": 1.6023, "step": 627760 }, { "epoch": 54.15942028985507, "grad_norm": 0.5767624974250793, "learning_rate": 0.0001, "loss": 1.6033, "step": 627816 }, { "epoch": 54.16425120772947, "grad_norm": 0.5548938512802124, "learning_rate": 0.0001, "loss": 1.605, "step": 627872 }, { "epoch": 54.169082125603865, "grad_norm": 0.4878440201282501, "learning_rate": 0.0001, "loss": 1.6001, "step": 627928 }, { "epoch": 54.17391304347826, "grad_norm": 2.648956298828125, "learning_rate": 0.0001, "loss": 1.6044, "step": 627984 }, { "epoch": 54.17874396135266, "grad_norm": 0.24094444513320923, "learning_rate": 0.0001, "loss": 1.5997, "step": 628040 }, { "epoch": 54.18357487922705, "grad_norm": 0.34384480118751526, "learning_rate": 0.0001, "loss": 1.6002, "step": 628096 }, { "epoch": 54.18840579710145, "grad_norm": 0.4795069396495819, "learning_rate": 0.0001, "loss": 1.6027, "step": 628152 }, { "epoch": 54.193236714975846, "grad_norm": 0.25130143761634827, "learning_rate": 0.0001, "loss": 1.6056, "step": 628208 }, { "epoch": 54.19806763285024, "grad_norm": 3.1154582500457764, "learning_rate": 0.0001, "loss": 1.6002, "step": 628264 }, { "epoch": 54.20289855072464, "grad_norm": 2.244863748550415, "learning_rate": 0.0001, "loss": 1.605, "step": 628320 }, { "epoch": 54.207729468599034, "grad_norm": 1.3136634826660156, "learning_rate": 0.0001, "loss": 1.607, "step": 628376 }, { "epoch": 54.21256038647343, "grad_norm": 8.618431091308594, "learning_rate": 0.0001, "loss": 1.5984, "step": 628432 }, { "epoch": 54.21739130434783, "grad_norm": 11.032532691955566, "learning_rate": 0.0001, "loss": 1.6018, "step": 628488 }, { "epoch": 54.22222222222222, "grad_norm": 1.1717407703399658, "learning_rate": 0.0001, "loss": 1.5951, "step": 628544 }, { "epoch": 54.227053140096615, "grad_norm": 0.329839825630188, "learning_rate": 0.0001, "loss": 1.6021, "step": 628600 }, { "epoch": 54.231884057971016, "grad_norm": 2.22916841506958, "learning_rate": 0.0001, "loss": 1.6012, "step": 628656 }, { "epoch": 54.23671497584541, "grad_norm": 2.2819831371307373, "learning_rate": 0.0001, "loss": 1.5966, "step": 628712 }, { "epoch": 54.24154589371981, "grad_norm": 0.37119096517562866, "learning_rate": 0.0001, "loss": 1.6077, "step": 628768 }, { "epoch": 54.2463768115942, "grad_norm": 0.6932963728904724, "learning_rate": 0.0001, "loss": 1.6032, "step": 628824 }, { "epoch": 54.2512077294686, "grad_norm": 0.2616609036922455, "learning_rate": 0.0001, "loss": 1.5978, "step": 628880 }, { "epoch": 54.256038647343, "grad_norm": 5.733834266662598, "learning_rate": 0.0001, "loss": 1.5959, "step": 628936 }, { "epoch": 54.26086956521739, "grad_norm": 0.290656179189682, "learning_rate": 0.0001, "loss": 1.5999, "step": 628992 }, { "epoch": 54.265700483091784, "grad_norm": 0.292722225189209, "learning_rate": 0.0001, "loss": 1.5979, "step": 629048 }, { "epoch": 54.270531400966185, "grad_norm": 0.37268519401550293, "learning_rate": 0.0001, "loss": 1.603, "step": 629104 }, { "epoch": 54.27536231884058, "grad_norm": 2.832392454147339, "learning_rate": 0.0001, "loss": 1.6044, "step": 629160 }, { "epoch": 54.28019323671498, "grad_norm": 1.4019123315811157, "learning_rate": 0.0001, "loss": 1.5958, "step": 629216 }, { "epoch": 54.28502415458937, "grad_norm": 0.3383622169494629, "learning_rate": 0.0001, "loss": 1.6044, "step": 629272 }, { "epoch": 54.289855072463766, "grad_norm": 2.28877854347229, "learning_rate": 0.0001, "loss": 1.5963, "step": 629328 }, { "epoch": 54.29468599033817, "grad_norm": 0.8670189380645752, "learning_rate": 0.0001, "loss": 1.5987, "step": 629384 }, { "epoch": 54.29951690821256, "grad_norm": 0.23986457288265228, "learning_rate": 0.0001, "loss": 1.6045, "step": 629440 }, { "epoch": 54.30434782608695, "grad_norm": 3.3139610290527344, "learning_rate": 0.0001, "loss": 1.6083, "step": 629496 }, { "epoch": 54.309178743961354, "grad_norm": 11.654072761535645, "learning_rate": 0.0001, "loss": 1.6036, "step": 629552 }, { "epoch": 54.31400966183575, "grad_norm": 0.4876439571380615, "learning_rate": 0.0001, "loss": 1.6016, "step": 629608 }, { "epoch": 54.31884057971015, "grad_norm": 0.25245243310928345, "learning_rate": 0.0001, "loss": 1.5975, "step": 629664 }, { "epoch": 54.32367149758454, "grad_norm": 0.6198601126670837, "learning_rate": 0.0001, "loss": 1.6005, "step": 629720 }, { "epoch": 54.328502415458935, "grad_norm": 0.38531753420829773, "learning_rate": 0.0001, "loss": 1.6021, "step": 629776 }, { "epoch": 54.333333333333336, "grad_norm": 0.3858354985713959, "learning_rate": 0.0001, "loss": 1.6009, "step": 629832 }, { "epoch": 54.33816425120773, "grad_norm": 0.31094029545783997, "learning_rate": 0.0001, "loss": 1.6006, "step": 629888 }, { "epoch": 54.34299516908212, "grad_norm": 0.40777209401130676, "learning_rate": 0.0001, "loss": 1.5979, "step": 629944 }, { "epoch": 54.34782608695652, "grad_norm": 0.3370199203491211, "learning_rate": 0.0001, "loss": 1.6037, "step": 630000 }, { "epoch": 54.35265700483092, "grad_norm": 0.2788527309894562, "learning_rate": 0.0001, "loss": 1.5976, "step": 630056 }, { "epoch": 54.35748792270532, "grad_norm": 0.2833544611930847, "learning_rate": 0.0001, "loss": 1.6016, "step": 630112 }, { "epoch": 54.36231884057971, "grad_norm": 0.954474151134491, "learning_rate": 0.0001, "loss": 1.5978, "step": 630168 }, { "epoch": 54.367149758454104, "grad_norm": 0.2598772943019867, "learning_rate": 0.0001, "loss": 1.6066, "step": 630224 }, { "epoch": 54.371980676328505, "grad_norm": 0.24533863365650177, "learning_rate": 0.0001, "loss": 1.6034, "step": 630280 }, { "epoch": 54.3768115942029, "grad_norm": 3.1446101665496826, "learning_rate": 0.0001, "loss": 1.6057, "step": 630336 }, { "epoch": 54.38164251207729, "grad_norm": 0.3021402359008789, "learning_rate": 0.0001, "loss": 1.6027, "step": 630392 }, { "epoch": 54.38647342995169, "grad_norm": 0.5484914183616638, "learning_rate": 0.0001, "loss": 1.6011, "step": 630448 }, { "epoch": 54.391304347826086, "grad_norm": 0.28772079944610596, "learning_rate": 0.0001, "loss": 1.6029, "step": 630504 }, { "epoch": 54.39613526570048, "grad_norm": 0.41347041726112366, "learning_rate": 0.0001, "loss": 1.6099, "step": 630560 }, { "epoch": 54.40096618357488, "grad_norm": 0.23304204642772675, "learning_rate": 0.0001, "loss": 1.6125, "step": 630616 }, { "epoch": 54.405797101449274, "grad_norm": 0.3668493330478668, "learning_rate": 0.0001, "loss": 1.6036, "step": 630672 }, { "epoch": 54.410628019323674, "grad_norm": 0.2889614999294281, "learning_rate": 0.0001, "loss": 1.5999, "step": 630728 }, { "epoch": 54.41545893719807, "grad_norm": 0.3374957740306854, "learning_rate": 0.0001, "loss": 1.5921, "step": 630784 }, { "epoch": 54.42028985507246, "grad_norm": 3.601912260055542, "learning_rate": 0.0001, "loss": 1.6022, "step": 630840 }, { "epoch": 54.42512077294686, "grad_norm": 0.3262450695037842, "learning_rate": 0.0001, "loss": 1.6038, "step": 630896 }, { "epoch": 54.429951690821255, "grad_norm": 1.0141587257385254, "learning_rate": 0.0001, "loss": 1.6009, "step": 630952 }, { "epoch": 54.43478260869565, "grad_norm": 0.2817440629005432, "learning_rate": 0.0001, "loss": 1.6057, "step": 631008 }, { "epoch": 54.43961352657005, "grad_norm": 0.6466987133026123, "learning_rate": 0.0001, "loss": 1.6018, "step": 631064 }, { "epoch": 54.44444444444444, "grad_norm": 15.758605003356934, "learning_rate": 0.0001, "loss": 1.6071, "step": 631120 }, { "epoch": 54.44927536231884, "grad_norm": 14.534988403320312, "learning_rate": 0.0001, "loss": 1.5936, "step": 631176 }, { "epoch": 54.45410628019324, "grad_norm": 0.4754365384578705, "learning_rate": 0.0001, "loss": 1.5988, "step": 631232 }, { "epoch": 54.45893719806763, "grad_norm": 0.2986752688884735, "learning_rate": 0.0001, "loss": 1.6019, "step": 631288 }, { "epoch": 54.46376811594203, "grad_norm": 0.26958325505256653, "learning_rate": 0.0001, "loss": 1.6077, "step": 631344 }, { "epoch": 54.468599033816425, "grad_norm": 0.293889582157135, "learning_rate": 0.0001, "loss": 1.6101, "step": 631400 }, { "epoch": 54.47342995169082, "grad_norm": 1.3732675313949585, "learning_rate": 0.0001, "loss": 1.5993, "step": 631456 }, { "epoch": 54.47826086956522, "grad_norm": 0.32927075028419495, "learning_rate": 0.0001, "loss": 1.6063, "step": 631512 }, { "epoch": 54.48309178743961, "grad_norm": 0.5415047407150269, "learning_rate": 0.0001, "loss": 1.5991, "step": 631568 }, { "epoch": 54.48792270531401, "grad_norm": 0.26940566301345825, "learning_rate": 0.0001, "loss": 1.606, "step": 631624 }, { "epoch": 54.492753623188406, "grad_norm": 0.264972984790802, "learning_rate": 0.0001, "loss": 1.5988, "step": 631680 }, { "epoch": 54.4975845410628, "grad_norm": 2.6659533977508545, "learning_rate": 0.0001, "loss": 1.6057, "step": 631736 }, { "epoch": 54.5024154589372, "grad_norm": 2.1350674629211426, "learning_rate": 0.0001, "loss": 1.5998, "step": 631792 }, { "epoch": 54.507246376811594, "grad_norm": 0.2759764790534973, "learning_rate": 0.0001, "loss": 1.5924, "step": 631848 }, { "epoch": 54.51207729468599, "grad_norm": 0.31260359287261963, "learning_rate": 0.0001, "loss": 1.601, "step": 631904 }, { "epoch": 54.51690821256039, "grad_norm": 0.3773660659790039, "learning_rate": 0.0001, "loss": 1.5963, "step": 631960 }, { "epoch": 54.52173913043478, "grad_norm": 1.8888691663742065, "learning_rate": 0.0001, "loss": 1.5999, "step": 632016 }, { "epoch": 54.52657004830918, "grad_norm": 0.3085639178752899, "learning_rate": 0.0001, "loss": 1.6079, "step": 632072 }, { "epoch": 54.531400966183575, "grad_norm": 0.2433236837387085, "learning_rate": 0.0001, "loss": 1.6032, "step": 632128 }, { "epoch": 54.53623188405797, "grad_norm": 0.3244127631187439, "learning_rate": 0.0001, "loss": 1.6071, "step": 632184 }, { "epoch": 54.54106280193237, "grad_norm": 0.2726103961467743, "learning_rate": 0.0001, "loss": 1.6036, "step": 632240 }, { "epoch": 54.54589371980676, "grad_norm": 0.6640788316726685, "learning_rate": 0.0001, "loss": 1.602, "step": 632296 }, { "epoch": 54.55072463768116, "grad_norm": 0.2773686647415161, "learning_rate": 0.0001, "loss": 1.5951, "step": 632352 }, { "epoch": 54.55555555555556, "grad_norm": 2.156007766723633, "learning_rate": 0.0001, "loss": 1.5994, "step": 632408 }, { "epoch": 54.56038647342995, "grad_norm": 0.25119465589523315, "learning_rate": 0.0001, "loss": 1.6047, "step": 632464 }, { "epoch": 54.56521739130435, "grad_norm": 0.27163630723953247, "learning_rate": 0.0001, "loss": 1.6, "step": 632520 }, { "epoch": 54.570048309178745, "grad_norm": 0.6936202645301819, "learning_rate": 0.0001, "loss": 1.6058, "step": 632576 }, { "epoch": 54.57487922705314, "grad_norm": 0.4193975627422333, "learning_rate": 0.0001, "loss": 1.5961, "step": 632632 }, { "epoch": 54.57971014492754, "grad_norm": 0.44500067830085754, "learning_rate": 0.0001, "loss": 1.6006, "step": 632688 }, { "epoch": 54.58454106280193, "grad_norm": 0.4224701523780823, "learning_rate": 0.0001, "loss": 1.5992, "step": 632744 }, { "epoch": 54.589371980676326, "grad_norm": 0.2555035948753357, "learning_rate": 0.0001, "loss": 1.6032, "step": 632800 }, { "epoch": 54.594202898550726, "grad_norm": 0.4504264295101166, "learning_rate": 0.0001, "loss": 1.6037, "step": 632856 }, { "epoch": 54.59903381642512, "grad_norm": 0.2514598071575165, "learning_rate": 0.0001, "loss": 1.5974, "step": 632912 }, { "epoch": 54.60386473429952, "grad_norm": 0.37692540884017944, "learning_rate": 0.0001, "loss": 1.61, "step": 632968 }, { "epoch": 54.608695652173914, "grad_norm": 0.3077220618724823, "learning_rate": 0.0001, "loss": 1.6077, "step": 633024 }, { "epoch": 54.61352657004831, "grad_norm": 0.26042306423187256, "learning_rate": 0.0001, "loss": 1.5988, "step": 633080 }, { "epoch": 54.61835748792271, "grad_norm": 0.5861327648162842, "learning_rate": 0.0001, "loss": 1.604, "step": 633136 }, { "epoch": 54.6231884057971, "grad_norm": 0.28420573472976685, "learning_rate": 0.0001, "loss": 1.603, "step": 633192 }, { "epoch": 54.628019323671495, "grad_norm": 0.3809611201286316, "learning_rate": 0.0001, "loss": 1.5963, "step": 633248 }, { "epoch": 54.632850241545896, "grad_norm": 10.470609664916992, "learning_rate": 0.0001, "loss": 1.6005, "step": 633304 }, { "epoch": 54.63768115942029, "grad_norm": 0.29606589674949646, "learning_rate": 0.0001, "loss": 1.6034, "step": 633360 }, { "epoch": 54.64251207729468, "grad_norm": 0.8232465386390686, "learning_rate": 0.0001, "loss": 1.6035, "step": 633416 }, { "epoch": 54.64734299516908, "grad_norm": 0.3435504138469696, "learning_rate": 0.0001, "loss": 1.601, "step": 633472 }, { "epoch": 54.65217391304348, "grad_norm": 0.3241100609302521, "learning_rate": 0.0001, "loss": 1.5979, "step": 633528 }, { "epoch": 54.65700483091788, "grad_norm": 0.7482240796089172, "learning_rate": 0.0001, "loss": 1.6063, "step": 633584 }, { "epoch": 54.66183574879227, "grad_norm": 0.3226909637451172, "learning_rate": 0.0001, "loss": 1.6044, "step": 633640 }, { "epoch": 54.666666666666664, "grad_norm": 0.5861750245094299, "learning_rate": 0.0001, "loss": 1.6059, "step": 633696 }, { "epoch": 54.671497584541065, "grad_norm": 1.9660439491271973, "learning_rate": 0.0001, "loss": 1.6044, "step": 633752 }, { "epoch": 54.67632850241546, "grad_norm": 0.271602064371109, "learning_rate": 0.0001, "loss": 1.6043, "step": 633808 }, { "epoch": 54.68115942028985, "grad_norm": 0.28513437509536743, "learning_rate": 0.0001, "loss": 1.6053, "step": 633864 }, { "epoch": 54.68599033816425, "grad_norm": 0.6856714487075806, "learning_rate": 0.0001, "loss": 1.6014, "step": 633920 }, { "epoch": 54.690821256038646, "grad_norm": 1.4860577583312988, "learning_rate": 0.0001, "loss": 1.6002, "step": 633976 }, { "epoch": 54.69565217391305, "grad_norm": 0.3629586100578308, "learning_rate": 0.0001, "loss": 1.5957, "step": 634032 }, { "epoch": 54.70048309178744, "grad_norm": 0.42111778259277344, "learning_rate": 0.0001, "loss": 1.604, "step": 634088 }, { "epoch": 54.70531400966183, "grad_norm": 2.465000629425049, "learning_rate": 0.0001, "loss": 1.5998, "step": 634144 }, { "epoch": 54.710144927536234, "grad_norm": 9.242046356201172, "learning_rate": 0.0001, "loss": 1.5986, "step": 634200 }, { "epoch": 54.71497584541063, "grad_norm": 0.28298911452293396, "learning_rate": 0.0001, "loss": 1.6029, "step": 634256 }, { "epoch": 54.71980676328502, "grad_norm": 0.283955454826355, "learning_rate": 0.0001, "loss": 1.6014, "step": 634312 }, { "epoch": 54.72463768115942, "grad_norm": 0.2496984302997589, "learning_rate": 0.0001, "loss": 1.5984, "step": 634368 }, { "epoch": 54.729468599033815, "grad_norm": 0.31494003534317017, "learning_rate": 0.0001, "loss": 1.6058, "step": 634424 }, { "epoch": 54.734299516908216, "grad_norm": 0.2694123089313507, "learning_rate": 0.0001, "loss": 1.6051, "step": 634480 }, { "epoch": 54.73913043478261, "grad_norm": 0.4280897080898285, "learning_rate": 0.0001, "loss": 1.6016, "step": 634536 }, { "epoch": 54.743961352657, "grad_norm": 0.5651580095291138, "learning_rate": 0.0001, "loss": 1.6013, "step": 634592 }, { "epoch": 54.7487922705314, "grad_norm": 0.26647111773490906, "learning_rate": 0.0001, "loss": 1.6027, "step": 634648 }, { "epoch": 54.7536231884058, "grad_norm": 0.30910080671310425, "learning_rate": 0.0001, "loss": 1.6045, "step": 634704 }, { "epoch": 54.75845410628019, "grad_norm": 1.5053876638412476, "learning_rate": 0.0001, "loss": 1.5974, "step": 634760 }, { "epoch": 54.76328502415459, "grad_norm": 0.2556949853897095, "learning_rate": 0.0001, "loss": 1.6103, "step": 634816 }, { "epoch": 54.768115942028984, "grad_norm": 0.25683313608169556, "learning_rate": 0.0001, "loss": 1.6017, "step": 634872 }, { "epoch": 54.772946859903385, "grad_norm": 0.2633473575115204, "learning_rate": 0.0001, "loss": 1.5982, "step": 634928 }, { "epoch": 54.77777777777778, "grad_norm": 0.4238305389881134, "learning_rate": 0.0001, "loss": 1.6004, "step": 634984 }, { "epoch": 54.78260869565217, "grad_norm": 0.2753404378890991, "learning_rate": 0.0001, "loss": 1.6037, "step": 635040 }, { "epoch": 54.78743961352657, "grad_norm": 0.31280267238616943, "learning_rate": 0.0001, "loss": 1.6012, "step": 635096 }, { "epoch": 54.792270531400966, "grad_norm": 0.28449442982673645, "learning_rate": 0.0001, "loss": 1.5972, "step": 635152 }, { "epoch": 54.79710144927536, "grad_norm": 1.3971601724624634, "learning_rate": 0.0001, "loss": 1.5994, "step": 635208 }, { "epoch": 54.80193236714976, "grad_norm": 0.25212550163269043, "learning_rate": 0.0001, "loss": 1.5994, "step": 635264 }, { "epoch": 54.806763285024154, "grad_norm": 0.2675356864929199, "learning_rate": 0.0001, "loss": 1.5972, "step": 635320 }, { "epoch": 54.81159420289855, "grad_norm": 9.420217514038086, "learning_rate": 0.0001, "loss": 1.5985, "step": 635376 }, { "epoch": 54.81642512077295, "grad_norm": 0.2421475350856781, "learning_rate": 0.0001, "loss": 1.604, "step": 635432 }, { "epoch": 54.82125603864734, "grad_norm": 5.222280502319336, "learning_rate": 0.0001, "loss": 1.6037, "step": 635488 }, { "epoch": 54.82608695652174, "grad_norm": 0.7292366623878479, "learning_rate": 0.0001, "loss": 1.6074, "step": 635544 }, { "epoch": 54.830917874396135, "grad_norm": 0.5822354555130005, "learning_rate": 0.0001, "loss": 1.607, "step": 635600 }, { "epoch": 54.83574879227053, "grad_norm": 0.2606086730957031, "learning_rate": 0.0001, "loss": 1.6031, "step": 635656 }, { "epoch": 54.84057971014493, "grad_norm": 0.7120017409324646, "learning_rate": 0.0001, "loss": 1.6013, "step": 635712 }, { "epoch": 54.84541062801932, "grad_norm": 0.33280935883522034, "learning_rate": 0.0001, "loss": 1.5993, "step": 635768 }, { "epoch": 54.85024154589372, "grad_norm": 0.3703424632549286, "learning_rate": 0.0001, "loss": 1.5982, "step": 635824 }, { "epoch": 54.85507246376812, "grad_norm": 0.7437340021133423, "learning_rate": 0.0001, "loss": 1.601, "step": 635880 }, { "epoch": 54.85990338164251, "grad_norm": 0.32744187116622925, "learning_rate": 0.0001, "loss": 1.6044, "step": 635936 }, { "epoch": 54.86473429951691, "grad_norm": 0.2534404993057251, "learning_rate": 0.0001, "loss": 1.5922, "step": 635992 }, { "epoch": 54.869565217391305, "grad_norm": 0.3905467391014099, "learning_rate": 0.0001, "loss": 1.6021, "step": 636048 }, { "epoch": 54.8743961352657, "grad_norm": 0.32848936319351196, "learning_rate": 0.0001, "loss": 1.6021, "step": 636104 }, { "epoch": 54.8792270531401, "grad_norm": 0.53107088804245, "learning_rate": 0.0001, "loss": 1.606, "step": 636160 }, { "epoch": 54.88405797101449, "grad_norm": 0.2986257076263428, "learning_rate": 0.0001, "loss": 1.5967, "step": 636216 }, { "epoch": 54.888888888888886, "grad_norm": 0.3002891540527344, "learning_rate": 0.0001, "loss": 1.596, "step": 636272 }, { "epoch": 54.893719806763286, "grad_norm": 0.3032078146934509, "learning_rate": 0.0001, "loss": 1.606, "step": 636328 }, { "epoch": 54.89855072463768, "grad_norm": 0.33160027861595154, "learning_rate": 0.0001, "loss": 1.6045, "step": 636384 }, { "epoch": 54.90338164251208, "grad_norm": 0.5494252443313599, "learning_rate": 0.0001, "loss": 1.604, "step": 636440 }, { "epoch": 54.908212560386474, "grad_norm": 1.124831199645996, "learning_rate": 0.0001, "loss": 1.608, "step": 636496 }, { "epoch": 54.91304347826087, "grad_norm": 0.26767536997795105, "learning_rate": 0.0001, "loss": 1.6041, "step": 636552 }, { "epoch": 54.91787439613527, "grad_norm": 0.3294317126274109, "learning_rate": 0.0001, "loss": 1.6, "step": 636608 }, { "epoch": 54.92270531400966, "grad_norm": 1.6468474864959717, "learning_rate": 0.0001, "loss": 1.6021, "step": 636664 }, { "epoch": 54.927536231884055, "grad_norm": 1.6024459600448608, "learning_rate": 0.0001, "loss": 1.5996, "step": 636720 }, { "epoch": 54.932367149758456, "grad_norm": 0.26908278465270996, "learning_rate": 0.0001, "loss": 1.5976, "step": 636776 }, { "epoch": 54.93719806763285, "grad_norm": 17.495332717895508, "learning_rate": 0.0001, "loss": 1.604, "step": 636832 }, { "epoch": 54.94202898550725, "grad_norm": 0.36930736899375916, "learning_rate": 0.0001, "loss": 1.6056, "step": 636888 }, { "epoch": 54.94685990338164, "grad_norm": 6.246365070343018, "learning_rate": 0.0001, "loss": 1.6068, "step": 636944 }, { "epoch": 54.95169082125604, "grad_norm": 0.2673054337501526, "learning_rate": 0.0001, "loss": 1.5976, "step": 637000 }, { "epoch": 54.95652173913044, "grad_norm": 2.4239885807037354, "learning_rate": 0.0001, "loss": 1.5991, "step": 637056 }, { "epoch": 54.96135265700483, "grad_norm": 0.3352994918823242, "learning_rate": 0.0001, "loss": 1.6062, "step": 637112 }, { "epoch": 54.966183574879224, "grad_norm": 1.171528697013855, "learning_rate": 0.0001, "loss": 1.5983, "step": 637168 }, { "epoch": 54.971014492753625, "grad_norm": 0.30225345492362976, "learning_rate": 0.0001, "loss": 1.6042, "step": 637224 }, { "epoch": 54.97584541062802, "grad_norm": 0.22701790928840637, "learning_rate": 0.0001, "loss": 1.6005, "step": 637280 }, { "epoch": 54.98067632850242, "grad_norm": 0.4461284577846527, "learning_rate": 0.0001, "loss": 1.6003, "step": 637336 }, { "epoch": 54.98550724637681, "grad_norm": 0.48023366928100586, "learning_rate": 0.0001, "loss": 1.6112, "step": 637392 }, { "epoch": 54.990338164251206, "grad_norm": 0.4148176908493042, "learning_rate": 0.0001, "loss": 1.6044, "step": 637448 }, { "epoch": 54.99516908212561, "grad_norm": 3.4294486045837402, "learning_rate": 0.0001, "loss": 1.6079, "step": 637504 }, { "epoch": 55.0, "grad_norm": 0.2557530999183655, "learning_rate": 0.0001, "loss": 1.5988, "step": 637560 }, { "epoch": 55.00483091787439, "grad_norm": 0.2894023656845093, "learning_rate": 0.0001, "loss": 1.6049, "step": 637616 }, { "epoch": 55.009661835748794, "grad_norm": 0.239263117313385, "learning_rate": 0.0001, "loss": 1.5935, "step": 637672 }, { "epoch": 55.01449275362319, "grad_norm": 0.27949434518814087, "learning_rate": 0.0001, "loss": 1.5965, "step": 637728 }, { "epoch": 55.01932367149758, "grad_norm": 1.4575718641281128, "learning_rate": 0.0001, "loss": 1.5917, "step": 637784 }, { "epoch": 55.02415458937198, "grad_norm": 0.7019737958908081, "learning_rate": 0.0001, "loss": 1.5978, "step": 637840 }, { "epoch": 55.028985507246375, "grad_norm": 3.4075868129730225, "learning_rate": 0.0001, "loss": 1.5918, "step": 637896 }, { "epoch": 55.033816425120776, "grad_norm": 0.2925054430961609, "learning_rate": 0.0001, "loss": 1.5948, "step": 637952 }, { "epoch": 55.03864734299517, "grad_norm": 0.37292709946632385, "learning_rate": 0.0001, "loss": 1.5992, "step": 638008 }, { "epoch": 55.04347826086956, "grad_norm": 0.27361220121383667, "learning_rate": 0.0001, "loss": 1.601, "step": 638064 }, { "epoch": 55.04830917874396, "grad_norm": 0.35947442054748535, "learning_rate": 0.0001, "loss": 1.6048, "step": 638120 }, { "epoch": 55.05314009661836, "grad_norm": 0.22224469482898712, "learning_rate": 0.0001, "loss": 1.5976, "step": 638176 }, { "epoch": 55.05797101449275, "grad_norm": 0.2460516393184662, "learning_rate": 0.0001, "loss": 1.5945, "step": 638232 }, { "epoch": 55.06280193236715, "grad_norm": 1.2580095529556274, "learning_rate": 0.0001, "loss": 1.6034, "step": 638288 }, { "epoch": 55.067632850241544, "grad_norm": 1.332988977432251, "learning_rate": 0.0001, "loss": 1.5954, "step": 638344 }, { "epoch": 55.072463768115945, "grad_norm": 0.4005599021911621, "learning_rate": 0.0001, "loss": 1.5965, "step": 638400 }, { "epoch": 55.07729468599034, "grad_norm": 1.6828666925430298, "learning_rate": 0.0001, "loss": 1.5957, "step": 638456 }, { "epoch": 55.08212560386473, "grad_norm": 3.06148362159729, "learning_rate": 0.0001, "loss": 1.5955, "step": 638512 }, { "epoch": 55.08695652173913, "grad_norm": 7.444970607757568, "learning_rate": 0.0001, "loss": 1.5971, "step": 638568 }, { "epoch": 55.091787439613526, "grad_norm": 0.2263961285352707, "learning_rate": 0.0001, "loss": 1.5988, "step": 638624 }, { "epoch": 55.09661835748792, "grad_norm": 8.3302583694458, "learning_rate": 0.0001, "loss": 1.5963, "step": 638680 }, { "epoch": 55.10144927536232, "grad_norm": 0.2807260751724243, "learning_rate": 0.0001, "loss": 1.5958, "step": 638736 }, { "epoch": 55.106280193236714, "grad_norm": 11.70958137512207, "learning_rate": 0.0001, "loss": 1.6007, "step": 638792 }, { "epoch": 55.111111111111114, "grad_norm": 0.2796240746974945, "learning_rate": 0.0001, "loss": 1.5911, "step": 638848 }, { "epoch": 55.11594202898551, "grad_norm": 1.2932190895080566, "learning_rate": 0.0001, "loss": 1.5958, "step": 638904 }, { "epoch": 55.1207729468599, "grad_norm": 2.7089197635650635, "learning_rate": 0.0001, "loss": 1.5921, "step": 638960 }, { "epoch": 55.1256038647343, "grad_norm": 0.2931582033634186, "learning_rate": 0.0001, "loss": 1.5953, "step": 639016 }, { "epoch": 55.130434782608695, "grad_norm": 4.223657608032227, "learning_rate": 0.0001, "loss": 1.5925, "step": 639072 }, { "epoch": 55.13526570048309, "grad_norm": 0.6435850858688354, "learning_rate": 0.0001, "loss": 1.5972, "step": 639128 }, { "epoch": 55.14009661835749, "grad_norm": 0.36849603056907654, "learning_rate": 0.0001, "loss": 1.601, "step": 639184 }, { "epoch": 55.14492753623188, "grad_norm": 0.30869176983833313, "learning_rate": 0.0001, "loss": 1.5962, "step": 639240 }, { "epoch": 55.14975845410628, "grad_norm": 8.483145713806152, "learning_rate": 0.0001, "loss": 1.599, "step": 639296 }, { "epoch": 55.15458937198068, "grad_norm": 0.9094793200492859, "learning_rate": 0.0001, "loss": 1.5951, "step": 639352 }, { "epoch": 55.15942028985507, "grad_norm": 0.3377348780632019, "learning_rate": 0.0001, "loss": 1.5908, "step": 639408 }, { "epoch": 55.16425120772947, "grad_norm": 0.30702024698257446, "learning_rate": 0.0001, "loss": 1.5952, "step": 639464 }, { "epoch": 55.169082125603865, "grad_norm": 1.7288646697998047, "learning_rate": 0.0001, "loss": 1.5955, "step": 639520 }, { "epoch": 55.17391304347826, "grad_norm": 0.2540227770805359, "learning_rate": 0.0001, "loss": 1.5916, "step": 639576 }, { "epoch": 55.17874396135266, "grad_norm": 1.1267266273498535, "learning_rate": 0.0001, "loss": 1.5982, "step": 639632 }, { "epoch": 55.18357487922705, "grad_norm": 0.27489304542541504, "learning_rate": 0.0001, "loss": 1.5999, "step": 639688 }, { "epoch": 55.18840579710145, "grad_norm": 0.27892830967903137, "learning_rate": 0.0001, "loss": 1.598, "step": 639744 }, { "epoch": 55.193236714975846, "grad_norm": 0.289753794670105, "learning_rate": 0.0001, "loss": 1.599, "step": 639800 }, { "epoch": 55.19806763285024, "grad_norm": 14.241250038146973, "learning_rate": 0.0001, "loss": 1.5914, "step": 639856 }, { "epoch": 55.20289855072464, "grad_norm": 0.3125571012496948, "learning_rate": 0.0001, "loss": 1.5877, "step": 639912 }, { "epoch": 55.207729468599034, "grad_norm": 0.3216071128845215, "learning_rate": 0.0001, "loss": 1.5949, "step": 639968 }, { "epoch": 55.21256038647343, "grad_norm": 0.28730857372283936, "learning_rate": 0.0001, "loss": 1.5873, "step": 640024 }, { "epoch": 55.21739130434783, "grad_norm": 0.29526200890541077, "learning_rate": 0.0001, "loss": 1.5963, "step": 640080 }, { "epoch": 55.22222222222222, "grad_norm": 0.28400519490242004, "learning_rate": 0.0001, "loss": 1.5917, "step": 640136 }, { "epoch": 55.227053140096615, "grad_norm": 0.2564033567905426, "learning_rate": 0.0001, "loss": 1.5912, "step": 640192 }, { "epoch": 55.231884057971016, "grad_norm": 0.2816677987575531, "learning_rate": 0.0001, "loss": 1.6008, "step": 640248 }, { "epoch": 55.23671497584541, "grad_norm": 0.3982822895050049, "learning_rate": 0.0001, "loss": 1.588, "step": 640304 }, { "epoch": 55.24154589371981, "grad_norm": 0.29809653759002686, "learning_rate": 0.0001, "loss": 1.5918, "step": 640360 }, { "epoch": 55.2463768115942, "grad_norm": 0.29959380626678467, "learning_rate": 0.0001, "loss": 1.5952, "step": 640416 }, { "epoch": 55.2512077294686, "grad_norm": 19.37786102294922, "learning_rate": 0.0001, "loss": 1.5936, "step": 640472 }, { "epoch": 55.256038647343, "grad_norm": 0.3112099766731262, "learning_rate": 0.0001, "loss": 1.5999, "step": 640528 }, { "epoch": 55.26086956521739, "grad_norm": 0.3903619945049286, "learning_rate": 0.0001, "loss": 1.5935, "step": 640584 }, { "epoch": 55.265700483091784, "grad_norm": 0.3649788796901703, "learning_rate": 0.0001, "loss": 1.5927, "step": 640640 }, { "epoch": 55.270531400966185, "grad_norm": 0.30071139335632324, "learning_rate": 0.0001, "loss": 1.593, "step": 640696 }, { "epoch": 55.27536231884058, "grad_norm": 0.22420832514762878, "learning_rate": 0.0001, "loss": 1.5899, "step": 640752 }, { "epoch": 55.28019323671498, "grad_norm": 0.5002242922782898, "learning_rate": 0.0001, "loss": 1.5966, "step": 640808 }, { "epoch": 55.28502415458937, "grad_norm": 0.3591330647468567, "learning_rate": 0.0001, "loss": 1.5931, "step": 640864 }, { "epoch": 55.289855072463766, "grad_norm": 1.7645248174667358, "learning_rate": 0.0001, "loss": 1.5918, "step": 640920 }, { "epoch": 55.29468599033817, "grad_norm": 24.572280883789062, "learning_rate": 0.0001, "loss": 1.5974, "step": 640976 }, { "epoch": 55.29951690821256, "grad_norm": 0.2890204191207886, "learning_rate": 0.0001, "loss": 1.5886, "step": 641032 }, { "epoch": 55.30434782608695, "grad_norm": 1.9891971349716187, "learning_rate": 0.0001, "loss": 1.5958, "step": 641088 }, { "epoch": 55.309178743961354, "grad_norm": 0.29440557956695557, "learning_rate": 0.0001, "loss": 1.593, "step": 641144 }, { "epoch": 55.31400966183575, "grad_norm": 0.32077357172966003, "learning_rate": 0.0001, "loss": 1.5896, "step": 641200 }, { "epoch": 55.31884057971015, "grad_norm": 2.6748430728912354, "learning_rate": 0.0001, "loss": 1.5976, "step": 641256 }, { "epoch": 55.32367149758454, "grad_norm": 0.22633500397205353, "learning_rate": 0.0001, "loss": 1.5928, "step": 641312 }, { "epoch": 55.328502415458935, "grad_norm": 0.26438090205192566, "learning_rate": 0.0001, "loss": 1.5949, "step": 641368 }, { "epoch": 55.333333333333336, "grad_norm": 0.27017903327941895, "learning_rate": 0.0001, "loss": 1.5967, "step": 641424 }, { "epoch": 55.33816425120773, "grad_norm": 0.28431618213653564, "learning_rate": 0.0001, "loss": 1.5942, "step": 641480 }, { "epoch": 55.34299516908212, "grad_norm": 0.6448376774787903, "learning_rate": 0.0001, "loss": 1.5929, "step": 641536 }, { "epoch": 55.34782608695652, "grad_norm": 0.25304731726646423, "learning_rate": 0.0001, "loss": 1.599, "step": 641592 }, { "epoch": 55.35265700483092, "grad_norm": 1.5779285430908203, "learning_rate": 0.0001, "loss": 1.6026, "step": 641648 }, { "epoch": 55.35748792270532, "grad_norm": 0.35764646530151367, "learning_rate": 0.0001, "loss": 1.5974, "step": 641704 }, { "epoch": 55.36231884057971, "grad_norm": 0.26212984323501587, "learning_rate": 0.0001, "loss": 1.5906, "step": 641760 }, { "epoch": 55.367149758454104, "grad_norm": 0.8578245043754578, "learning_rate": 0.0001, "loss": 1.5984, "step": 641816 }, { "epoch": 55.371980676328505, "grad_norm": 0.2670283913612366, "learning_rate": 0.0001, "loss": 1.5926, "step": 641872 }, { "epoch": 55.3768115942029, "grad_norm": 0.8513098359107971, "learning_rate": 0.0001, "loss": 1.5905, "step": 641928 }, { "epoch": 55.38164251207729, "grad_norm": 0.2589764893054962, "learning_rate": 0.0001, "loss": 1.5927, "step": 641984 }, { "epoch": 55.38647342995169, "grad_norm": 0.3776125907897949, "learning_rate": 0.0001, "loss": 1.5969, "step": 642040 }, { "epoch": 55.391304347826086, "grad_norm": 0.9294224381446838, "learning_rate": 0.0001, "loss": 1.6006, "step": 642096 }, { "epoch": 55.39613526570048, "grad_norm": 0.7386584877967834, "learning_rate": 0.0001, "loss": 1.5966, "step": 642152 }, { "epoch": 55.40096618357488, "grad_norm": 0.25929969549179077, "learning_rate": 0.0001, "loss": 1.5913, "step": 642208 }, { "epoch": 55.405797101449274, "grad_norm": 0.30820348858833313, "learning_rate": 0.0001, "loss": 1.5966, "step": 642264 }, { "epoch": 55.410628019323674, "grad_norm": 1.4932940006256104, "learning_rate": 0.0001, "loss": 1.6027, "step": 642320 }, { "epoch": 55.41545893719807, "grad_norm": 0.4702835977077484, "learning_rate": 0.0001, "loss": 1.6009, "step": 642376 }, { "epoch": 55.42028985507246, "grad_norm": 9.8528413772583, "learning_rate": 0.0001, "loss": 1.5962, "step": 642432 }, { "epoch": 55.42512077294686, "grad_norm": 0.22038212418556213, "learning_rate": 0.0001, "loss": 1.5903, "step": 642488 }, { "epoch": 55.429951690821255, "grad_norm": 0.26703351736068726, "learning_rate": 0.0001, "loss": 1.5924, "step": 642544 }, { "epoch": 55.43478260869565, "grad_norm": 2.1349050998687744, "learning_rate": 0.0001, "loss": 1.6005, "step": 642600 }, { "epoch": 55.43961352657005, "grad_norm": 1.4283818006515503, "learning_rate": 0.0001, "loss": 1.5968, "step": 642656 }, { "epoch": 55.44444444444444, "grad_norm": 0.2723599076271057, "learning_rate": 0.0001, "loss": 1.5977, "step": 642712 }, { "epoch": 55.44927536231884, "grad_norm": 0.38651639223098755, "learning_rate": 0.0001, "loss": 1.5919, "step": 642768 }, { "epoch": 55.45410628019324, "grad_norm": 0.92103511095047, "learning_rate": 0.0001, "loss": 1.5974, "step": 642824 }, { "epoch": 55.45893719806763, "grad_norm": 2.8122379779815674, "learning_rate": 0.0001, "loss": 1.5968, "step": 642880 }, { "epoch": 55.46376811594203, "grad_norm": 0.2559720575809479, "learning_rate": 0.0001, "loss": 1.6046, "step": 642936 }, { "epoch": 55.468599033816425, "grad_norm": 1.4952353239059448, "learning_rate": 0.0001, "loss": 1.6027, "step": 642992 }, { "epoch": 55.47342995169082, "grad_norm": 2.7159807682037354, "learning_rate": 0.0001, "loss": 1.595, "step": 643048 }, { "epoch": 55.47826086956522, "grad_norm": 0.2837645411491394, "learning_rate": 0.0001, "loss": 1.5958, "step": 643104 }, { "epoch": 55.48309178743961, "grad_norm": 0.276797890663147, "learning_rate": 0.0001, "loss": 1.5997, "step": 643160 }, { "epoch": 55.48792270531401, "grad_norm": 1.6165279150009155, "learning_rate": 0.0001, "loss": 1.5974, "step": 643216 }, { "epoch": 55.492753623188406, "grad_norm": 0.2740486264228821, "learning_rate": 0.0001, "loss": 1.596, "step": 643272 }, { "epoch": 55.4975845410628, "grad_norm": 0.26407670974731445, "learning_rate": 0.0001, "loss": 1.5951, "step": 643328 }, { "epoch": 55.5024154589372, "grad_norm": 0.38970696926116943, "learning_rate": 0.0001, "loss": 1.5941, "step": 643384 }, { "epoch": 55.507246376811594, "grad_norm": 0.2442391812801361, "learning_rate": 0.0001, "loss": 1.5996, "step": 643440 }, { "epoch": 55.51207729468599, "grad_norm": 0.24828267097473145, "learning_rate": 0.0001, "loss": 1.5969, "step": 643496 }, { "epoch": 55.51690821256039, "grad_norm": 0.3300180733203888, "learning_rate": 0.0001, "loss": 1.5934, "step": 643552 }, { "epoch": 55.52173913043478, "grad_norm": 0.4389801323413849, "learning_rate": 0.0001, "loss": 1.5927, "step": 643608 }, { "epoch": 55.52657004830918, "grad_norm": 1.3133279085159302, "learning_rate": 0.0001, "loss": 1.5985, "step": 643664 }, { "epoch": 55.531400966183575, "grad_norm": 0.4353677034378052, "learning_rate": 0.0001, "loss": 1.5936, "step": 643720 }, { "epoch": 55.53623188405797, "grad_norm": 0.24321818351745605, "learning_rate": 0.0001, "loss": 1.5943, "step": 643776 }, { "epoch": 55.54106280193237, "grad_norm": 0.31741949915885925, "learning_rate": 0.0001, "loss": 1.6013, "step": 643832 }, { "epoch": 55.54589371980676, "grad_norm": 0.2613678574562073, "learning_rate": 0.0001, "loss": 1.5897, "step": 643888 }, { "epoch": 55.55072463768116, "grad_norm": 0.2585724890232086, "learning_rate": 0.0001, "loss": 1.5931, "step": 643944 }, { "epoch": 55.55555555555556, "grad_norm": 1.037743091583252, "learning_rate": 0.0001, "loss": 1.5985, "step": 644000 }, { "epoch": 55.56038647342995, "grad_norm": 3.697885751724243, "learning_rate": 0.0001, "loss": 1.5929, "step": 644056 }, { "epoch": 55.56521739130435, "grad_norm": 3.8899240493774414, "learning_rate": 0.0001, "loss": 1.5928, "step": 644112 }, { "epoch": 55.570048309178745, "grad_norm": 0.43591195344924927, "learning_rate": 0.0001, "loss": 1.5962, "step": 644168 }, { "epoch": 55.57487922705314, "grad_norm": 0.2590180039405823, "learning_rate": 0.0001, "loss": 1.5998, "step": 644224 }, { "epoch": 55.57971014492754, "grad_norm": 0.9669620990753174, "learning_rate": 0.0001, "loss": 1.5958, "step": 644280 }, { "epoch": 55.58454106280193, "grad_norm": 0.24706041812896729, "learning_rate": 0.0001, "loss": 1.5988, "step": 644336 }, { "epoch": 55.589371980676326, "grad_norm": 0.3677745759487152, "learning_rate": 0.0001, "loss": 1.5956, "step": 644392 }, { "epoch": 55.594202898550726, "grad_norm": 0.2375112771987915, "learning_rate": 0.0001, "loss": 1.5959, "step": 644448 }, { "epoch": 55.59903381642512, "grad_norm": 0.23679977655410767, "learning_rate": 0.0001, "loss": 1.5968, "step": 644504 }, { "epoch": 55.60386473429952, "grad_norm": 0.26609084010124207, "learning_rate": 0.0001, "loss": 1.5976, "step": 644560 }, { "epoch": 55.608695652173914, "grad_norm": 0.2522110641002655, "learning_rate": 0.0001, "loss": 1.5982, "step": 644616 }, { "epoch": 55.61352657004831, "grad_norm": 0.2544567286968231, "learning_rate": 0.0001, "loss": 1.595, "step": 644672 }, { "epoch": 55.61835748792271, "grad_norm": 21.371746063232422, "learning_rate": 0.0001, "loss": 1.6012, "step": 644728 }, { "epoch": 55.6231884057971, "grad_norm": 0.7340934872627258, "learning_rate": 0.0001, "loss": 1.5989, "step": 644784 }, { "epoch": 55.628019323671495, "grad_norm": 0.24964241683483124, "learning_rate": 0.0001, "loss": 1.5963, "step": 644840 }, { "epoch": 55.632850241545896, "grad_norm": 0.9722907543182373, "learning_rate": 0.0001, "loss": 1.5978, "step": 644896 }, { "epoch": 55.63768115942029, "grad_norm": 0.37080737948417664, "learning_rate": 0.0001, "loss": 1.5975, "step": 644952 }, { "epoch": 55.64251207729468, "grad_norm": 0.43572521209716797, "learning_rate": 0.0001, "loss": 1.5897, "step": 645008 }, { "epoch": 55.64734299516908, "grad_norm": 0.3404228389263153, "learning_rate": 0.0001, "loss": 1.5976, "step": 645064 }, { "epoch": 55.65217391304348, "grad_norm": 0.4940323531627655, "learning_rate": 0.0001, "loss": 1.6035, "step": 645120 }, { "epoch": 55.65700483091788, "grad_norm": 4.148716926574707, "learning_rate": 0.0001, "loss": 1.5951, "step": 645176 }, { "epoch": 55.66183574879227, "grad_norm": 0.791892409324646, "learning_rate": 0.0001, "loss": 1.6026, "step": 645232 }, { "epoch": 55.666666666666664, "grad_norm": 0.28575995564460754, "learning_rate": 0.0001, "loss": 1.5913, "step": 645288 }, { "epoch": 55.671497584541065, "grad_norm": 0.25622910261154175, "learning_rate": 0.0001, "loss": 1.6063, "step": 645344 }, { "epoch": 55.67632850241546, "grad_norm": 1.483147144317627, "learning_rate": 0.0001, "loss": 1.5959, "step": 645400 }, { "epoch": 55.68115942028985, "grad_norm": 0.23231831192970276, "learning_rate": 0.0001, "loss": 1.597, "step": 645456 }, { "epoch": 55.68599033816425, "grad_norm": 0.40277865529060364, "learning_rate": 0.0001, "loss": 1.5951, "step": 645512 }, { "epoch": 55.690821256038646, "grad_norm": 1.3784946203231812, "learning_rate": 0.0001, "loss": 1.597, "step": 645568 }, { "epoch": 55.69565217391305, "grad_norm": 0.30855119228363037, "learning_rate": 0.0001, "loss": 1.6016, "step": 645624 }, { "epoch": 55.70048309178744, "grad_norm": 0.27094438672065735, "learning_rate": 0.0001, "loss": 1.5937, "step": 645680 }, { "epoch": 55.70531400966183, "grad_norm": 0.43054062128067017, "learning_rate": 0.0001, "loss": 1.5904, "step": 645736 }, { "epoch": 55.710144927536234, "grad_norm": 0.5895392894744873, "learning_rate": 0.0001, "loss": 1.6025, "step": 645792 }, { "epoch": 55.71497584541063, "grad_norm": 0.3315744698047638, "learning_rate": 0.0001, "loss": 1.5966, "step": 645848 }, { "epoch": 55.71980676328502, "grad_norm": 0.5285239219665527, "learning_rate": 0.0001, "loss": 1.5999, "step": 645904 }, { "epoch": 55.72463768115942, "grad_norm": 0.7359983921051025, "learning_rate": 0.0001, "loss": 1.6031, "step": 645960 }, { "epoch": 55.729468599033815, "grad_norm": 0.256358802318573, "learning_rate": 0.0001, "loss": 1.601, "step": 646016 }, { "epoch": 55.734299516908216, "grad_norm": 1.4203691482543945, "learning_rate": 0.0001, "loss": 1.5976, "step": 646072 }, { "epoch": 55.73913043478261, "grad_norm": 0.30101799964904785, "learning_rate": 0.0001, "loss": 1.5958, "step": 646128 }, { "epoch": 55.743961352657, "grad_norm": 0.2875584065914154, "learning_rate": 0.0001, "loss": 1.5946, "step": 646184 }, { "epoch": 55.7487922705314, "grad_norm": 0.25799694657325745, "learning_rate": 0.0001, "loss": 1.5946, "step": 646240 }, { "epoch": 55.7536231884058, "grad_norm": 0.26000872254371643, "learning_rate": 0.0001, "loss": 1.5928, "step": 646296 }, { "epoch": 55.75845410628019, "grad_norm": 0.33526524901390076, "learning_rate": 0.0001, "loss": 1.5948, "step": 646352 }, { "epoch": 55.76328502415459, "grad_norm": 0.5462419986724854, "learning_rate": 0.0001, "loss": 1.5917, "step": 646408 }, { "epoch": 55.768115942028984, "grad_norm": 1.7810053825378418, "learning_rate": 0.0001, "loss": 1.597, "step": 646464 }, { "epoch": 55.772946859903385, "grad_norm": 1.4947108030319214, "learning_rate": 0.0001, "loss": 1.5946, "step": 646520 }, { "epoch": 55.77777777777778, "grad_norm": 0.41852450370788574, "learning_rate": 0.0001, "loss": 1.597, "step": 646576 }, { "epoch": 55.78260869565217, "grad_norm": 0.2976429760456085, "learning_rate": 0.0001, "loss": 1.6005, "step": 646632 }, { "epoch": 55.78743961352657, "grad_norm": 0.30139589309692383, "learning_rate": 0.0001, "loss": 1.5944, "step": 646688 }, { "epoch": 55.792270531400966, "grad_norm": 0.3390910029411316, "learning_rate": 0.0001, "loss": 1.6012, "step": 646744 }, { "epoch": 55.79710144927536, "grad_norm": 0.24549348652362823, "learning_rate": 0.0001, "loss": 1.5982, "step": 646800 }, { "epoch": 55.80193236714976, "grad_norm": 7.116519927978516, "learning_rate": 0.0001, "loss": 1.5971, "step": 646856 }, { "epoch": 55.806763285024154, "grad_norm": 1.0041066408157349, "learning_rate": 0.0001, "loss": 1.6, "step": 646912 }, { "epoch": 55.81159420289855, "grad_norm": 0.41689276695251465, "learning_rate": 0.0001, "loss": 1.5985, "step": 646968 }, { "epoch": 55.81642512077295, "grad_norm": 0.30550652742385864, "learning_rate": 0.0001, "loss": 1.5957, "step": 647024 }, { "epoch": 55.82125603864734, "grad_norm": 0.6295708417892456, "learning_rate": 0.0001, "loss": 1.5987, "step": 647080 }, { "epoch": 55.82608695652174, "grad_norm": 1.0880374908447266, "learning_rate": 0.0001, "loss": 1.5994, "step": 647136 }, { "epoch": 55.830917874396135, "grad_norm": 0.3180694878101349, "learning_rate": 0.0001, "loss": 1.5976, "step": 647192 }, { "epoch": 55.83574879227053, "grad_norm": 0.3590626120567322, "learning_rate": 0.0001, "loss": 1.5942, "step": 647248 }, { "epoch": 55.84057971014493, "grad_norm": 0.7671523690223694, "learning_rate": 0.0001, "loss": 1.5986, "step": 647304 }, { "epoch": 55.84541062801932, "grad_norm": 0.4572526216506958, "learning_rate": 0.0001, "loss": 1.6002, "step": 647360 }, { "epoch": 55.85024154589372, "grad_norm": 0.7453961372375488, "learning_rate": 0.0001, "loss": 1.5993, "step": 647416 }, { "epoch": 55.85507246376812, "grad_norm": 0.3526923358440399, "learning_rate": 0.0001, "loss": 1.5955, "step": 647472 }, { "epoch": 55.85990338164251, "grad_norm": 0.2897365093231201, "learning_rate": 0.0001, "loss": 1.5971, "step": 647528 }, { "epoch": 55.86473429951691, "grad_norm": 2.6845345497131348, "learning_rate": 0.0001, "loss": 1.5973, "step": 647584 }, { "epoch": 55.869565217391305, "grad_norm": 0.34734439849853516, "learning_rate": 0.0001, "loss": 1.5962, "step": 647640 }, { "epoch": 55.8743961352657, "grad_norm": 0.5180969834327698, "learning_rate": 0.0001, "loss": 1.6001, "step": 647696 }, { "epoch": 55.8792270531401, "grad_norm": 0.32027319073677063, "learning_rate": 0.0001, "loss": 1.6022, "step": 647752 }, { "epoch": 55.88405797101449, "grad_norm": 0.8577221035957336, "learning_rate": 0.0001, "loss": 1.5946, "step": 647808 }, { "epoch": 55.888888888888886, "grad_norm": 6.569311141967773, "learning_rate": 0.0001, "loss": 1.5988, "step": 647864 }, { "epoch": 55.893719806763286, "grad_norm": 1.6513396501541138, "learning_rate": 0.0001, "loss": 1.5952, "step": 647920 }, { "epoch": 55.89855072463768, "grad_norm": 0.32061028480529785, "learning_rate": 0.0001, "loss": 1.597, "step": 647976 }, { "epoch": 55.90338164251208, "grad_norm": 0.8602241277694702, "learning_rate": 0.0001, "loss": 1.5974, "step": 648032 }, { "epoch": 55.908212560386474, "grad_norm": 0.26892420649528503, "learning_rate": 0.0001, "loss": 1.595, "step": 648088 }, { "epoch": 55.91304347826087, "grad_norm": 0.30354374647140503, "learning_rate": 0.0001, "loss": 1.5976, "step": 648144 }, { "epoch": 55.91787439613527, "grad_norm": 0.5478280186653137, "learning_rate": 0.0001, "loss": 1.601, "step": 648200 }, { "epoch": 55.92270531400966, "grad_norm": 0.36106589436531067, "learning_rate": 0.0001, "loss": 1.5979, "step": 648256 }, { "epoch": 55.927536231884055, "grad_norm": 0.22322499752044678, "learning_rate": 0.0001, "loss": 1.5987, "step": 648312 }, { "epoch": 55.932367149758456, "grad_norm": 0.3077503740787506, "learning_rate": 0.0001, "loss": 1.5987, "step": 648368 }, { "epoch": 55.93719806763285, "grad_norm": 0.26559945940971375, "learning_rate": 0.0001, "loss": 1.5938, "step": 648424 }, { "epoch": 55.94202898550725, "grad_norm": 0.30950579047203064, "learning_rate": 0.0001, "loss": 1.5969, "step": 648480 }, { "epoch": 55.94685990338164, "grad_norm": 0.28371933102607727, "learning_rate": 0.0001, "loss": 1.5955, "step": 648536 }, { "epoch": 55.95169082125604, "grad_norm": 0.31314918398857117, "learning_rate": 0.0001, "loss": 1.5935, "step": 648592 }, { "epoch": 55.95652173913044, "grad_norm": 0.2876567244529724, "learning_rate": 0.0001, "loss": 1.5968, "step": 648648 }, { "epoch": 55.96135265700483, "grad_norm": 1.3727102279663086, "learning_rate": 0.0001, "loss": 1.5907, "step": 648704 }, { "epoch": 55.966183574879224, "grad_norm": 0.5949172973632812, "learning_rate": 0.0001, "loss": 1.5955, "step": 648760 }, { "epoch": 55.971014492753625, "grad_norm": 0.30107274651527405, "learning_rate": 0.0001, "loss": 1.5908, "step": 648816 }, { "epoch": 55.97584541062802, "grad_norm": 0.23813703656196594, "learning_rate": 0.0001, "loss": 1.5949, "step": 648872 }, { "epoch": 55.98067632850242, "grad_norm": 0.49461829662323, "learning_rate": 0.0001, "loss": 1.5901, "step": 648928 }, { "epoch": 55.98550724637681, "grad_norm": 1.1480404138565063, "learning_rate": 0.0001, "loss": 1.5937, "step": 648984 }, { "epoch": 55.990338164251206, "grad_norm": 0.39671897888183594, "learning_rate": 0.0001, "loss": 1.6, "step": 649040 }, { "epoch": 55.99516908212561, "grad_norm": 0.2233998030424118, "learning_rate": 0.0001, "loss": 1.6005, "step": 649096 }, { "epoch": 56.0, "grad_norm": 0.25031524896621704, "learning_rate": 0.0001, "loss": 1.588, "step": 649152 }, { "epoch": 56.00483091787439, "grad_norm": 0.3251936137676239, "learning_rate": 0.0001, "loss": 1.5884, "step": 649208 }, { "epoch": 56.009661835748794, "grad_norm": 0.9744939804077148, "learning_rate": 0.0001, "loss": 1.5944, "step": 649264 }, { "epoch": 56.01449275362319, "grad_norm": 0.25618577003479004, "learning_rate": 0.0001, "loss": 1.5875, "step": 649320 }, { "epoch": 56.01932367149758, "grad_norm": 0.2661288380622864, "learning_rate": 0.0001, "loss": 1.5909, "step": 649376 }, { "epoch": 56.02415458937198, "grad_norm": 0.2846794128417969, "learning_rate": 0.0001, "loss": 1.5895, "step": 649432 }, { "epoch": 56.028985507246375, "grad_norm": 0.3291507065296173, "learning_rate": 0.0001, "loss": 1.5906, "step": 649488 }, { "epoch": 56.033816425120776, "grad_norm": 0.2677748203277588, "learning_rate": 0.0001, "loss": 1.5986, "step": 649544 }, { "epoch": 56.03864734299517, "grad_norm": 0.2962036430835724, "learning_rate": 0.0001, "loss": 1.5929, "step": 649600 }, { "epoch": 56.04347826086956, "grad_norm": 1.0134567022323608, "learning_rate": 0.0001, "loss": 1.5902, "step": 649656 }, { "epoch": 56.04830917874396, "grad_norm": 0.2790984809398651, "learning_rate": 0.0001, "loss": 1.5941, "step": 649712 }, { "epoch": 56.05314009661836, "grad_norm": 0.3483864665031433, "learning_rate": 0.0001, "loss": 1.5851, "step": 649768 }, { "epoch": 56.05797101449275, "grad_norm": 0.24777841567993164, "learning_rate": 0.0001, "loss": 1.5884, "step": 649824 }, { "epoch": 56.06280193236715, "grad_norm": 0.31328755617141724, "learning_rate": 0.0001, "loss": 1.5876, "step": 649880 }, { "epoch": 56.067632850241544, "grad_norm": 0.25611594319343567, "learning_rate": 0.0001, "loss": 1.591, "step": 649936 }, { "epoch": 56.072463768115945, "grad_norm": 1.0781488418579102, "learning_rate": 0.0001, "loss": 1.587, "step": 649992 }, { "epoch": 56.07729468599034, "grad_norm": 0.27940571308135986, "learning_rate": 0.0001, "loss": 1.589, "step": 650048 }, { "epoch": 56.08212560386473, "grad_norm": 0.3435153067111969, "learning_rate": 0.0001, "loss": 1.5962, "step": 650104 }, { "epoch": 56.08695652173913, "grad_norm": 0.3163916766643524, "learning_rate": 0.0001, "loss": 1.5955, "step": 650160 }, { "epoch": 56.091787439613526, "grad_norm": 0.2701931297779083, "learning_rate": 0.0001, "loss": 1.5878, "step": 650216 }, { "epoch": 56.09661835748792, "grad_norm": 0.28042179346084595, "learning_rate": 0.0001, "loss": 1.5886, "step": 650272 }, { "epoch": 56.10144927536232, "grad_norm": 0.4571271538734436, "learning_rate": 0.0001, "loss": 1.5888, "step": 650328 }, { "epoch": 56.106280193236714, "grad_norm": 8.698904037475586, "learning_rate": 0.0001, "loss": 1.5819, "step": 650384 }, { "epoch": 56.111111111111114, "grad_norm": 0.43161579966545105, "learning_rate": 0.0001, "loss": 1.5874, "step": 650440 }, { "epoch": 56.11594202898551, "grad_norm": 0.45476385951042175, "learning_rate": 0.0001, "loss": 1.5923, "step": 650496 }, { "epoch": 56.1207729468599, "grad_norm": 2.11631178855896, "learning_rate": 0.0001, "loss": 1.5884, "step": 650552 }, { "epoch": 56.1256038647343, "grad_norm": 0.27540963888168335, "learning_rate": 0.0001, "loss": 1.5894, "step": 650608 }, { "epoch": 56.130434782608695, "grad_norm": 14.3665132522583, "learning_rate": 0.0001, "loss": 1.5876, "step": 650664 }, { "epoch": 56.13526570048309, "grad_norm": 0.2646839916706085, "learning_rate": 0.0001, "loss": 1.5882, "step": 650720 }, { "epoch": 56.14009661835749, "grad_norm": 0.5071391463279724, "learning_rate": 0.0001, "loss": 1.5908, "step": 650776 }, { "epoch": 56.14492753623188, "grad_norm": 1.1900123357772827, "learning_rate": 0.0001, "loss": 1.593, "step": 650832 }, { "epoch": 56.14975845410628, "grad_norm": 0.542011559009552, "learning_rate": 0.0001, "loss": 1.5884, "step": 650888 }, { "epoch": 56.15458937198068, "grad_norm": 0.44578832387924194, "learning_rate": 0.0001, "loss": 1.5848, "step": 650944 }, { "epoch": 56.15942028985507, "grad_norm": 0.2741454541683197, "learning_rate": 0.0001, "loss": 1.5935, "step": 651000 }, { "epoch": 56.16425120772947, "grad_norm": 2.6737120151519775, "learning_rate": 0.0001, "loss": 1.595, "step": 651056 }, { "epoch": 56.169082125603865, "grad_norm": 0.7009297013282776, "learning_rate": 0.0001, "loss": 1.5885, "step": 651112 }, { "epoch": 56.17391304347826, "grad_norm": 0.3018144965171814, "learning_rate": 0.0001, "loss": 1.5958, "step": 651168 }, { "epoch": 56.17874396135266, "grad_norm": 2.688872814178467, "learning_rate": 0.0001, "loss": 1.5918, "step": 651224 }, { "epoch": 56.18357487922705, "grad_norm": 0.2711884379386902, "learning_rate": 0.0001, "loss": 1.5989, "step": 651280 }, { "epoch": 56.18840579710145, "grad_norm": 0.2594825327396393, "learning_rate": 0.0001, "loss": 1.5933, "step": 651336 }, { "epoch": 56.193236714975846, "grad_norm": 0.4535479247570038, "learning_rate": 0.0001, "loss": 1.592, "step": 651392 }, { "epoch": 56.19806763285024, "grad_norm": 0.532451868057251, "learning_rate": 0.0001, "loss": 1.598, "step": 651448 }, { "epoch": 56.20289855072464, "grad_norm": 1.6821171045303345, "learning_rate": 0.0001, "loss": 1.5948, "step": 651504 }, { "epoch": 56.207729468599034, "grad_norm": 0.2655870318412781, "learning_rate": 0.0001, "loss": 1.5933, "step": 651560 }, { "epoch": 56.21256038647343, "grad_norm": 0.24932555854320526, "learning_rate": 0.0001, "loss": 1.5954, "step": 651616 }, { "epoch": 56.21739130434783, "grad_norm": 1.3307522535324097, "learning_rate": 0.0001, "loss": 1.5936, "step": 651672 }, { "epoch": 56.22222222222222, "grad_norm": 0.3183320462703705, "learning_rate": 0.0001, "loss": 1.5924, "step": 651728 }, { "epoch": 56.227053140096615, "grad_norm": 0.2520836889743805, "learning_rate": 0.0001, "loss": 1.5932, "step": 651784 }, { "epoch": 56.231884057971016, "grad_norm": 0.34203898906707764, "learning_rate": 0.0001, "loss": 1.5913, "step": 651840 }, { "epoch": 56.23671497584541, "grad_norm": 0.33018165826797485, "learning_rate": 0.0001, "loss": 1.5865, "step": 651896 }, { "epoch": 56.24154589371981, "grad_norm": 0.8041684031486511, "learning_rate": 0.0001, "loss": 1.5883, "step": 651952 }, { "epoch": 56.2463768115942, "grad_norm": 0.3847254812717438, "learning_rate": 0.0001, "loss": 1.5966, "step": 652008 }, { "epoch": 56.2512077294686, "grad_norm": 0.24853341281414032, "learning_rate": 0.0001, "loss": 1.591, "step": 652064 }, { "epoch": 56.256038647343, "grad_norm": 1.4574060440063477, "learning_rate": 0.0001, "loss": 1.5935, "step": 652120 }, { "epoch": 56.26086956521739, "grad_norm": 2.262221574783325, "learning_rate": 0.0001, "loss": 1.5958, "step": 652176 }, { "epoch": 56.265700483091784, "grad_norm": 0.30158647894859314, "learning_rate": 0.0001, "loss": 1.5897, "step": 652232 }, { "epoch": 56.270531400966185, "grad_norm": 4.869492053985596, "learning_rate": 0.0001, "loss": 1.5876, "step": 652288 }, { "epoch": 56.27536231884058, "grad_norm": 0.6301326155662537, "learning_rate": 0.0001, "loss": 1.5997, "step": 652344 }, { "epoch": 56.28019323671498, "grad_norm": 1.360180377960205, "learning_rate": 0.0001, "loss": 1.5966, "step": 652400 }, { "epoch": 56.28502415458937, "grad_norm": 0.28031474351882935, "learning_rate": 0.0001, "loss": 1.5899, "step": 652456 }, { "epoch": 56.289855072463766, "grad_norm": 0.6259554624557495, "learning_rate": 0.0001, "loss": 1.5941, "step": 652512 }, { "epoch": 56.29468599033817, "grad_norm": 0.35960331559181213, "learning_rate": 0.0001, "loss": 1.5876, "step": 652568 }, { "epoch": 56.29951690821256, "grad_norm": 0.38535276055336, "learning_rate": 0.0001, "loss": 1.5856, "step": 652624 }, { "epoch": 56.30434782608695, "grad_norm": 0.2939078211784363, "learning_rate": 0.0001, "loss": 1.5925, "step": 652680 }, { "epoch": 56.309178743961354, "grad_norm": 0.2735764980316162, "learning_rate": 0.0001, "loss": 1.5918, "step": 652736 }, { "epoch": 56.31400966183575, "grad_norm": 1.430564284324646, "learning_rate": 0.0001, "loss": 1.5918, "step": 652792 }, { "epoch": 56.31884057971015, "grad_norm": 0.28131869435310364, "learning_rate": 0.0001, "loss": 1.5915, "step": 652848 }, { "epoch": 56.32367149758454, "grad_norm": 0.33446744084358215, "learning_rate": 0.0001, "loss": 1.5959, "step": 652904 }, { "epoch": 56.328502415458935, "grad_norm": 1.2044622898101807, "learning_rate": 0.0001, "loss": 1.5917, "step": 652960 }, { "epoch": 56.333333333333336, "grad_norm": 31.95981788635254, "learning_rate": 0.0001, "loss": 1.5958, "step": 653016 }, { "epoch": 56.33816425120773, "grad_norm": 0.37520450353622437, "learning_rate": 0.0001, "loss": 1.5838, "step": 653072 }, { "epoch": 56.34299516908212, "grad_norm": 0.4469257593154907, "learning_rate": 0.0001, "loss": 1.5996, "step": 653128 }, { "epoch": 56.34782608695652, "grad_norm": 0.2645532786846161, "learning_rate": 0.0001, "loss": 1.5916, "step": 653184 }, { "epoch": 56.35265700483092, "grad_norm": 0.34892866015434265, "learning_rate": 0.0001, "loss": 1.593, "step": 653240 }, { "epoch": 56.35748792270532, "grad_norm": 0.2636328637599945, "learning_rate": 0.0001, "loss": 1.5942, "step": 653296 }, { "epoch": 56.36231884057971, "grad_norm": 0.48251017928123474, "learning_rate": 0.0001, "loss": 1.5966, "step": 653352 }, { "epoch": 56.367149758454104, "grad_norm": 0.3225861191749573, "learning_rate": 0.0001, "loss": 1.5919, "step": 653408 }, { "epoch": 56.371980676328505, "grad_norm": 0.32521700859069824, "learning_rate": 0.0001, "loss": 1.5933, "step": 653464 }, { "epoch": 56.3768115942029, "grad_norm": 0.2776007652282715, "learning_rate": 0.0001, "loss": 1.5907, "step": 653520 }, { "epoch": 56.38164251207729, "grad_norm": 0.26025688648223877, "learning_rate": 0.0001, "loss": 1.5903, "step": 653576 }, { "epoch": 56.38647342995169, "grad_norm": 0.28209730982780457, "learning_rate": 0.0001, "loss": 1.5947, "step": 653632 }, { "epoch": 56.391304347826086, "grad_norm": 0.3722004294395447, "learning_rate": 0.0001, "loss": 1.5903, "step": 653688 }, { "epoch": 56.39613526570048, "grad_norm": 4.569806098937988, "learning_rate": 0.0001, "loss": 1.5931, "step": 653744 }, { "epoch": 56.40096618357488, "grad_norm": 0.2531089186668396, "learning_rate": 0.0001, "loss": 1.5903, "step": 653800 }, { "epoch": 56.405797101449274, "grad_norm": 0.6256371736526489, "learning_rate": 0.0001, "loss": 1.5967, "step": 653856 }, { "epoch": 56.410628019323674, "grad_norm": 0.32933592796325684, "learning_rate": 0.0001, "loss": 1.5856, "step": 653912 }, { "epoch": 56.41545893719807, "grad_norm": 4.181570529937744, "learning_rate": 0.0001, "loss": 1.5928, "step": 653968 }, { "epoch": 56.42028985507246, "grad_norm": 0.26997044682502747, "learning_rate": 0.0001, "loss": 1.5935, "step": 654024 }, { "epoch": 56.42512077294686, "grad_norm": 0.3191961646080017, "learning_rate": 0.0001, "loss": 1.5934, "step": 654080 }, { "epoch": 56.429951690821255, "grad_norm": 0.5153756141662598, "learning_rate": 0.0001, "loss": 1.5949, "step": 654136 }, { "epoch": 56.43478260869565, "grad_norm": 0.7202045321464539, "learning_rate": 0.0001, "loss": 1.586, "step": 654192 }, { "epoch": 56.43961352657005, "grad_norm": 0.29034224152565, "learning_rate": 0.0001, "loss": 1.5891, "step": 654248 }, { "epoch": 56.44444444444444, "grad_norm": 0.41070428490638733, "learning_rate": 0.0001, "loss": 1.5946, "step": 654304 }, { "epoch": 56.44927536231884, "grad_norm": 0.673068642616272, "learning_rate": 0.0001, "loss": 1.5945, "step": 654360 }, { "epoch": 56.45410628019324, "grad_norm": 0.4053625762462616, "learning_rate": 0.0001, "loss": 1.5896, "step": 654416 }, { "epoch": 56.45893719806763, "grad_norm": 0.5001897811889648, "learning_rate": 0.0001, "loss": 1.5996, "step": 654472 }, { "epoch": 56.46376811594203, "grad_norm": 3.28840970993042, "learning_rate": 0.0001, "loss": 1.5898, "step": 654528 }, { "epoch": 56.468599033816425, "grad_norm": 0.28617236018180847, "learning_rate": 0.0001, "loss": 1.5921, "step": 654584 }, { "epoch": 56.47342995169082, "grad_norm": 5.020074367523193, "learning_rate": 0.0001, "loss": 1.5999, "step": 654640 }, { "epoch": 56.47826086956522, "grad_norm": 0.6261232495307922, "learning_rate": 0.0001, "loss": 1.5894, "step": 654696 }, { "epoch": 56.48309178743961, "grad_norm": 0.3204079270362854, "learning_rate": 0.0001, "loss": 1.5878, "step": 654752 }, { "epoch": 56.48792270531401, "grad_norm": 0.27893829345703125, "learning_rate": 0.0001, "loss": 1.5932, "step": 654808 }, { "epoch": 56.492753623188406, "grad_norm": 0.29471907019615173, "learning_rate": 0.0001, "loss": 1.5934, "step": 654864 }, { "epoch": 56.4975845410628, "grad_norm": 1.0430346727371216, "learning_rate": 0.0001, "loss": 1.6005, "step": 654920 }, { "epoch": 56.5024154589372, "grad_norm": 0.2444280982017517, "learning_rate": 0.0001, "loss": 1.5959, "step": 654976 }, { "epoch": 56.507246376811594, "grad_norm": 0.2596479654312134, "learning_rate": 0.0001, "loss": 1.5934, "step": 655032 }, { "epoch": 56.51207729468599, "grad_norm": 1.3687646389007568, "learning_rate": 0.0001, "loss": 1.588, "step": 655088 }, { "epoch": 56.51690821256039, "grad_norm": 0.26424553990364075, "learning_rate": 0.0001, "loss": 1.5882, "step": 655144 }, { "epoch": 56.52173913043478, "grad_norm": 0.48956334590911865, "learning_rate": 0.0001, "loss": 1.6019, "step": 655200 }, { "epoch": 56.52657004830918, "grad_norm": 0.5770378708839417, "learning_rate": 0.0001, "loss": 1.594, "step": 655256 }, { "epoch": 56.531400966183575, "grad_norm": 0.3211418092250824, "learning_rate": 0.0001, "loss": 1.5912, "step": 655312 }, { "epoch": 56.53623188405797, "grad_norm": 1.1312309503555298, "learning_rate": 0.0001, "loss": 1.5966, "step": 655368 }, { "epoch": 56.54106280193237, "grad_norm": 2.9840753078460693, "learning_rate": 0.0001, "loss": 1.5914, "step": 655424 }, { "epoch": 56.54589371980676, "grad_norm": 0.30969855189323425, "learning_rate": 0.0001, "loss": 1.5966, "step": 655480 }, { "epoch": 56.55072463768116, "grad_norm": 1.2605434656143188, "learning_rate": 0.0001, "loss": 1.5959, "step": 655536 }, { "epoch": 56.55555555555556, "grad_norm": 0.3720840811729431, "learning_rate": 0.0001, "loss": 1.591, "step": 655592 }, { "epoch": 56.56038647342995, "grad_norm": 4.409801483154297, "learning_rate": 0.0001, "loss": 1.5955, "step": 655648 }, { "epoch": 56.56521739130435, "grad_norm": 0.5601296424865723, "learning_rate": 0.0001, "loss": 1.5933, "step": 655704 }, { "epoch": 56.570048309178745, "grad_norm": 0.367973655462265, "learning_rate": 0.0001, "loss": 1.592, "step": 655760 }, { "epoch": 56.57487922705314, "grad_norm": 0.3280981779098511, "learning_rate": 0.0001, "loss": 1.5918, "step": 655816 }, { "epoch": 56.57971014492754, "grad_norm": 0.265379399061203, "learning_rate": 0.0001, "loss": 1.5942, "step": 655872 }, { "epoch": 56.58454106280193, "grad_norm": 0.35233739018440247, "learning_rate": 0.0001, "loss": 1.5924, "step": 655928 }, { "epoch": 56.589371980676326, "grad_norm": 0.39589330554008484, "learning_rate": 0.0001, "loss": 1.5935, "step": 655984 }, { "epoch": 56.594202898550726, "grad_norm": 0.23902460932731628, "learning_rate": 0.0001, "loss": 1.5967, "step": 656040 }, { "epoch": 56.59903381642512, "grad_norm": 0.2332705855369568, "learning_rate": 0.0001, "loss": 1.587, "step": 656096 }, { "epoch": 56.60386473429952, "grad_norm": 2.976029872894287, "learning_rate": 0.0001, "loss": 1.5977, "step": 656152 }, { "epoch": 56.608695652173914, "grad_norm": 0.2998616099357605, "learning_rate": 0.0001, "loss": 1.5928, "step": 656208 }, { "epoch": 56.61352657004831, "grad_norm": 0.2656669616699219, "learning_rate": 0.0001, "loss": 1.5883, "step": 656264 }, { "epoch": 56.61835748792271, "grad_norm": 0.2556016445159912, "learning_rate": 0.0001, "loss": 1.5925, "step": 656320 }, { "epoch": 56.6231884057971, "grad_norm": 0.2933351993560791, "learning_rate": 0.0001, "loss": 1.5935, "step": 656376 }, { "epoch": 56.628019323671495, "grad_norm": 0.30946779251098633, "learning_rate": 0.0001, "loss": 1.5947, "step": 656432 }, { "epoch": 56.632850241545896, "grad_norm": 0.23561258614063263, "learning_rate": 0.0001, "loss": 1.595, "step": 656488 }, { "epoch": 56.63768115942029, "grad_norm": 0.30168983340263367, "learning_rate": 0.0001, "loss": 1.5892, "step": 656544 }, { "epoch": 56.64251207729468, "grad_norm": 1.2999591827392578, "learning_rate": 0.0001, "loss": 1.6012, "step": 656600 }, { "epoch": 56.64734299516908, "grad_norm": 0.34842193126678467, "learning_rate": 0.0001, "loss": 1.5895, "step": 656656 }, { "epoch": 56.65217391304348, "grad_norm": 0.32077836990356445, "learning_rate": 0.0001, "loss": 1.5976, "step": 656712 }, { "epoch": 56.65700483091788, "grad_norm": 0.22075481712818146, "learning_rate": 0.0001, "loss": 1.5976, "step": 656768 }, { "epoch": 56.66183574879227, "grad_norm": 0.3286707103252411, "learning_rate": 0.0001, "loss": 1.5898, "step": 656824 }, { "epoch": 56.666666666666664, "grad_norm": 1.6538316011428833, "learning_rate": 0.0001, "loss": 1.5879, "step": 656880 }, { "epoch": 56.671497584541065, "grad_norm": 0.7517232894897461, "learning_rate": 0.0001, "loss": 1.5941, "step": 656936 }, { "epoch": 56.67632850241546, "grad_norm": 0.28714558482170105, "learning_rate": 0.0001, "loss": 1.5886, "step": 656992 }, { "epoch": 56.68115942028985, "grad_norm": 0.3306226432323456, "learning_rate": 0.0001, "loss": 1.5963, "step": 657048 }, { "epoch": 56.68599033816425, "grad_norm": 0.3845840096473694, "learning_rate": 0.0001, "loss": 1.5874, "step": 657104 }, { "epoch": 56.690821256038646, "grad_norm": 0.4463178515434265, "learning_rate": 0.0001, "loss": 1.5907, "step": 657160 }, { "epoch": 56.69565217391305, "grad_norm": 0.4781760573387146, "learning_rate": 0.0001, "loss": 1.5948, "step": 657216 }, { "epoch": 56.70048309178744, "grad_norm": 0.4841776192188263, "learning_rate": 0.0001, "loss": 1.5988, "step": 657272 }, { "epoch": 56.70531400966183, "grad_norm": 0.40344417095184326, "learning_rate": 0.0001, "loss": 1.5937, "step": 657328 }, { "epoch": 56.710144927536234, "grad_norm": 0.7120177745819092, "learning_rate": 0.0001, "loss": 1.5957, "step": 657384 }, { "epoch": 56.71497584541063, "grad_norm": 0.2518468499183655, "learning_rate": 0.0001, "loss": 1.5928, "step": 657440 }, { "epoch": 56.71980676328502, "grad_norm": 1.6171658039093018, "learning_rate": 0.0001, "loss": 1.5983, "step": 657496 }, { "epoch": 56.72463768115942, "grad_norm": 0.24397915601730347, "learning_rate": 0.0001, "loss": 1.5927, "step": 657552 }, { "epoch": 56.729468599033815, "grad_norm": 1.8604178428649902, "learning_rate": 0.0001, "loss": 1.5927, "step": 657608 }, { "epoch": 56.734299516908216, "grad_norm": 0.5340882539749146, "learning_rate": 0.0001, "loss": 1.5938, "step": 657664 }, { "epoch": 56.73913043478261, "grad_norm": 0.2847989797592163, "learning_rate": 0.0001, "loss": 1.5895, "step": 657720 }, { "epoch": 56.743961352657, "grad_norm": 0.3082844913005829, "learning_rate": 0.0001, "loss": 1.5932, "step": 657776 }, { "epoch": 56.7487922705314, "grad_norm": 0.37361085414886475, "learning_rate": 0.0001, "loss": 1.5892, "step": 657832 }, { "epoch": 56.7536231884058, "grad_norm": 2.4283738136291504, "learning_rate": 0.0001, "loss": 1.5913, "step": 657888 }, { "epoch": 56.75845410628019, "grad_norm": 0.36251190304756165, "learning_rate": 0.0001, "loss": 1.5886, "step": 657944 }, { "epoch": 56.76328502415459, "grad_norm": 0.2524905204772949, "learning_rate": 0.0001, "loss": 1.5883, "step": 658000 }, { "epoch": 56.768115942028984, "grad_norm": 0.5296201109886169, "learning_rate": 0.0001, "loss": 1.5883, "step": 658056 }, { "epoch": 56.772946859903385, "grad_norm": 0.3094281256198883, "learning_rate": 0.0001, "loss": 1.5909, "step": 658112 }, { "epoch": 56.77777777777778, "grad_norm": 1.780634880065918, "learning_rate": 0.0001, "loss": 1.5987, "step": 658168 }, { "epoch": 56.78260869565217, "grad_norm": 0.3451590836048126, "learning_rate": 0.0001, "loss": 1.5944, "step": 658224 }, { "epoch": 56.78743961352657, "grad_norm": 14.111398696899414, "learning_rate": 0.0001, "loss": 1.5936, "step": 658280 }, { "epoch": 56.792270531400966, "grad_norm": 0.3622214198112488, "learning_rate": 0.0001, "loss": 1.5916, "step": 658336 }, { "epoch": 56.79710144927536, "grad_norm": 0.2960776388645172, "learning_rate": 0.0001, "loss": 1.5982, "step": 658392 }, { "epoch": 56.80193236714976, "grad_norm": 0.3961354196071625, "learning_rate": 0.0001, "loss": 1.5891, "step": 658448 }, { "epoch": 56.806763285024154, "grad_norm": 0.2803933620452881, "learning_rate": 0.0001, "loss": 1.5937, "step": 658504 }, { "epoch": 56.81159420289855, "grad_norm": 0.2465822994709015, "learning_rate": 0.0001, "loss": 1.5906, "step": 658560 }, { "epoch": 56.81642512077295, "grad_norm": 0.5572532415390015, "learning_rate": 0.0001, "loss": 1.5849, "step": 658616 }, { "epoch": 56.82125603864734, "grad_norm": 1.1493234634399414, "learning_rate": 0.0001, "loss": 1.5949, "step": 658672 }, { "epoch": 56.82608695652174, "grad_norm": 0.2782050669193268, "learning_rate": 0.0001, "loss": 1.5837, "step": 658728 }, { "epoch": 56.830917874396135, "grad_norm": 0.3546285927295685, "learning_rate": 0.0001, "loss": 1.5948, "step": 658784 }, { "epoch": 56.83574879227053, "grad_norm": 0.281319797039032, "learning_rate": 0.0001, "loss": 1.5865, "step": 658840 }, { "epoch": 56.84057971014493, "grad_norm": 1.2814018726348877, "learning_rate": 0.0001, "loss": 1.5925, "step": 658896 }, { "epoch": 56.84541062801932, "grad_norm": 0.7033421397209167, "learning_rate": 0.0001, "loss": 1.5933, "step": 658952 }, { "epoch": 56.85024154589372, "grad_norm": 0.42105403542518616, "learning_rate": 0.0001, "loss": 1.5939, "step": 659008 }, { "epoch": 56.85507246376812, "grad_norm": 0.30412155389785767, "learning_rate": 0.0001, "loss": 1.5954, "step": 659064 }, { "epoch": 56.85990338164251, "grad_norm": 0.36143743991851807, "learning_rate": 0.0001, "loss": 1.5867, "step": 659120 }, { "epoch": 56.86473429951691, "grad_norm": 0.2625112235546112, "learning_rate": 0.0001, "loss": 1.5904, "step": 659176 }, { "epoch": 56.869565217391305, "grad_norm": 0.24434307217597961, "learning_rate": 0.0001, "loss": 1.595, "step": 659232 }, { "epoch": 56.8743961352657, "grad_norm": 3.4435336589813232, "learning_rate": 0.0001, "loss": 1.5933, "step": 659288 }, { "epoch": 56.8792270531401, "grad_norm": 0.30624547600746155, "learning_rate": 0.0001, "loss": 1.5977, "step": 659344 }, { "epoch": 56.88405797101449, "grad_norm": 10.303715705871582, "learning_rate": 0.0001, "loss": 1.5953, "step": 659400 }, { "epoch": 56.888888888888886, "grad_norm": 0.2802961766719818, "learning_rate": 0.0001, "loss": 1.5948, "step": 659456 }, { "epoch": 56.893719806763286, "grad_norm": 0.2462731897830963, "learning_rate": 0.0001, "loss": 1.5904, "step": 659512 }, { "epoch": 56.89855072463768, "grad_norm": 0.6648129224777222, "learning_rate": 0.0001, "loss": 1.5949, "step": 659568 }, { "epoch": 56.90338164251208, "grad_norm": 0.3840871751308441, "learning_rate": 0.0001, "loss": 1.5913, "step": 659624 }, { "epoch": 56.908212560386474, "grad_norm": 0.27150776982307434, "learning_rate": 0.0001, "loss": 1.594, "step": 659680 }, { "epoch": 56.91304347826087, "grad_norm": 0.4511825442314148, "learning_rate": 0.0001, "loss": 1.5919, "step": 659736 }, { "epoch": 56.91787439613527, "grad_norm": 0.28496190905570984, "learning_rate": 0.0001, "loss": 1.5907, "step": 659792 }, { "epoch": 56.92270531400966, "grad_norm": 0.22230364382266998, "learning_rate": 0.0001, "loss": 1.5889, "step": 659848 }, { "epoch": 56.927536231884055, "grad_norm": 0.3547990024089813, "learning_rate": 0.0001, "loss": 1.5988, "step": 659904 }, { "epoch": 56.932367149758456, "grad_norm": 0.36376458406448364, "learning_rate": 0.0001, "loss": 1.5977, "step": 659960 }, { "epoch": 56.93719806763285, "grad_norm": 0.9163570404052734, "learning_rate": 0.0001, "loss": 1.5936, "step": 660016 }, { "epoch": 56.94202898550725, "grad_norm": 0.35101327300071716, "learning_rate": 0.0001, "loss": 1.5923, "step": 660072 }, { "epoch": 56.94685990338164, "grad_norm": 0.34388497471809387, "learning_rate": 0.0001, "loss": 1.5926, "step": 660128 }, { "epoch": 56.95169082125604, "grad_norm": 0.33212268352508545, "learning_rate": 0.0001, "loss": 1.5985, "step": 660184 }, { "epoch": 56.95652173913044, "grad_norm": 0.27713996171951294, "learning_rate": 0.0001, "loss": 1.592, "step": 660240 }, { "epoch": 56.96135265700483, "grad_norm": 0.7289239764213562, "learning_rate": 0.0001, "loss": 1.5874, "step": 660296 }, { "epoch": 56.966183574879224, "grad_norm": 1.095052719116211, "learning_rate": 0.0001, "loss": 1.5911, "step": 660352 }, { "epoch": 56.971014492753625, "grad_norm": 0.36998888850212097, "learning_rate": 0.0001, "loss": 1.5937, "step": 660408 }, { "epoch": 56.97584541062802, "grad_norm": 0.47621890902519226, "learning_rate": 0.0001, "loss": 1.5975, "step": 660464 }, { "epoch": 56.98067632850242, "grad_norm": 0.3030875325202942, "learning_rate": 0.0001, "loss": 1.5949, "step": 660520 }, { "epoch": 56.98550724637681, "grad_norm": 0.34515222907066345, "learning_rate": 0.0001, "loss": 1.5934, "step": 660576 }, { "epoch": 56.990338164251206, "grad_norm": 0.33673614263534546, "learning_rate": 0.0001, "loss": 1.5933, "step": 660632 }, { "epoch": 56.99516908212561, "grad_norm": 0.6740550994873047, "learning_rate": 0.0001, "loss": 1.5928, "step": 660688 }, { "epoch": 57.0, "grad_norm": 5.984618663787842, "learning_rate": 0.0001, "loss": 1.5937, "step": 660744 }, { "epoch": 57.00483091787439, "grad_norm": 0.2531173527240753, "learning_rate": 0.0001, "loss": 1.593, "step": 660800 }, { "epoch": 57.009661835748794, "grad_norm": 2.8122222423553467, "learning_rate": 0.0001, "loss": 1.5827, "step": 660856 }, { "epoch": 57.01449275362319, "grad_norm": 0.29173678159713745, "learning_rate": 0.0001, "loss": 1.5868, "step": 660912 }, { "epoch": 57.01932367149758, "grad_norm": 3.597409248352051, "learning_rate": 0.0001, "loss": 1.5887, "step": 660968 }, { "epoch": 57.02415458937198, "grad_norm": 6.77615213394165, "learning_rate": 0.0001, "loss": 1.5793, "step": 661024 }, { "epoch": 57.028985507246375, "grad_norm": 1.9863362312316895, "learning_rate": 0.0001, "loss": 1.577, "step": 661080 }, { "epoch": 57.033816425120776, "grad_norm": 0.5025198459625244, "learning_rate": 0.0001, "loss": 1.5777, "step": 661136 }, { "epoch": 57.03864734299517, "grad_norm": 2.8713226318359375, "learning_rate": 0.0001, "loss": 1.5919, "step": 661192 }, { "epoch": 57.04347826086956, "grad_norm": 0.35258808732032776, "learning_rate": 0.0001, "loss": 1.5878, "step": 661248 }, { "epoch": 57.04830917874396, "grad_norm": 0.2553408443927765, "learning_rate": 0.0001, "loss": 1.5789, "step": 661304 }, { "epoch": 57.05314009661836, "grad_norm": 0.25983041524887085, "learning_rate": 0.0001, "loss": 1.5877, "step": 661360 }, { "epoch": 57.05797101449275, "grad_norm": 0.2709876298904419, "learning_rate": 0.0001, "loss": 1.5877, "step": 661416 }, { "epoch": 57.06280193236715, "grad_norm": 0.8470173478126526, "learning_rate": 0.0001, "loss": 1.5907, "step": 661472 }, { "epoch": 57.067632850241544, "grad_norm": 1.2813643217086792, "learning_rate": 0.0001, "loss": 1.5832, "step": 661528 }, { "epoch": 57.072463768115945, "grad_norm": 0.30295422673225403, "learning_rate": 0.0001, "loss": 1.5868, "step": 661584 }, { "epoch": 57.07729468599034, "grad_norm": 0.3164427876472473, "learning_rate": 0.0001, "loss": 1.5778, "step": 661640 }, { "epoch": 57.08212560386473, "grad_norm": 0.2698957324028015, "learning_rate": 0.0001, "loss": 1.584, "step": 661696 }, { "epoch": 57.08695652173913, "grad_norm": 0.37237900495529175, "learning_rate": 0.0001, "loss": 1.5866, "step": 661752 }, { "epoch": 57.091787439613526, "grad_norm": 1.8867425918579102, "learning_rate": 0.0001, "loss": 1.5952, "step": 661808 }, { "epoch": 57.09661835748792, "grad_norm": 0.9572229385375977, "learning_rate": 0.0001, "loss": 1.5893, "step": 661864 }, { "epoch": 57.10144927536232, "grad_norm": 0.3471813201904297, "learning_rate": 0.0001, "loss": 1.5877, "step": 661920 }, { "epoch": 57.106280193236714, "grad_norm": 0.33243048191070557, "learning_rate": 0.0001, "loss": 1.5851, "step": 661976 }, { "epoch": 57.111111111111114, "grad_norm": 0.5631434321403503, "learning_rate": 0.0001, "loss": 1.5854, "step": 662032 }, { "epoch": 57.11594202898551, "grad_norm": 1.2187086343765259, "learning_rate": 0.0001, "loss": 1.5884, "step": 662088 }, { "epoch": 57.1207729468599, "grad_norm": 1.651403546333313, "learning_rate": 0.0001, "loss": 1.591, "step": 662144 }, { "epoch": 57.1256038647343, "grad_norm": 0.2593250572681427, "learning_rate": 0.0001, "loss": 1.5822, "step": 662200 }, { "epoch": 57.130434782608695, "grad_norm": 1.0020442008972168, "learning_rate": 0.0001, "loss": 1.5832, "step": 662256 }, { "epoch": 57.13526570048309, "grad_norm": 0.8066262602806091, "learning_rate": 0.0001, "loss": 1.5858, "step": 662312 }, { "epoch": 57.14009661835749, "grad_norm": 0.7116947770118713, "learning_rate": 0.0001, "loss": 1.5774, "step": 662368 }, { "epoch": 57.14492753623188, "grad_norm": 0.2587328851222992, "learning_rate": 0.0001, "loss": 1.5837, "step": 662424 }, { "epoch": 57.14975845410628, "grad_norm": 0.2759600281715393, "learning_rate": 0.0001, "loss": 1.5867, "step": 662480 }, { "epoch": 57.15458937198068, "grad_norm": 0.23403924703598022, "learning_rate": 0.0001, "loss": 1.5864, "step": 662536 }, { "epoch": 57.15942028985507, "grad_norm": 0.23003321886062622, "learning_rate": 0.0001, "loss": 1.5935, "step": 662592 }, { "epoch": 57.16425120772947, "grad_norm": 3.8311879634857178, "learning_rate": 0.0001, "loss": 1.58, "step": 662648 }, { "epoch": 57.169082125603865, "grad_norm": 0.41634976863861084, "learning_rate": 0.0001, "loss": 1.5925, "step": 662704 }, { "epoch": 57.17391304347826, "grad_norm": 0.2734439969062805, "learning_rate": 0.0001, "loss": 1.5934, "step": 662760 }, { "epoch": 57.17874396135266, "grad_norm": 0.29495882987976074, "learning_rate": 0.0001, "loss": 1.5821, "step": 662816 }, { "epoch": 57.18357487922705, "grad_norm": 0.23844580352306366, "learning_rate": 0.0001, "loss": 1.5896, "step": 662872 }, { "epoch": 57.18840579710145, "grad_norm": 0.29950541257858276, "learning_rate": 0.0001, "loss": 1.5898, "step": 662928 }, { "epoch": 57.193236714975846, "grad_norm": 0.4593835473060608, "learning_rate": 0.0001, "loss": 1.589, "step": 662984 }, { "epoch": 57.19806763285024, "grad_norm": 0.7737977504730225, "learning_rate": 0.0001, "loss": 1.5914, "step": 663040 }, { "epoch": 57.20289855072464, "grad_norm": 0.37389910221099854, "learning_rate": 0.0001, "loss": 1.5819, "step": 663096 }, { "epoch": 57.207729468599034, "grad_norm": 7.954313278198242, "learning_rate": 0.0001, "loss": 1.583, "step": 663152 }, { "epoch": 57.21256038647343, "grad_norm": 0.6001903414726257, "learning_rate": 0.0001, "loss": 1.5918, "step": 663208 }, { "epoch": 57.21739130434783, "grad_norm": 0.31442052125930786, "learning_rate": 0.0001, "loss": 1.5816, "step": 663264 }, { "epoch": 57.22222222222222, "grad_norm": 1.2496358156204224, "learning_rate": 0.0001, "loss": 1.5879, "step": 663320 }, { "epoch": 57.227053140096615, "grad_norm": 13.5412015914917, "learning_rate": 0.0001, "loss": 1.5885, "step": 663376 }, { "epoch": 57.231884057971016, "grad_norm": 0.3703087866306305, "learning_rate": 0.0001, "loss": 1.59, "step": 663432 }, { "epoch": 57.23671497584541, "grad_norm": 0.32277023792266846, "learning_rate": 0.0001, "loss": 1.5917, "step": 663488 }, { "epoch": 57.24154589371981, "grad_norm": 0.29385513067245483, "learning_rate": 0.0001, "loss": 1.5882, "step": 663544 }, { "epoch": 57.2463768115942, "grad_norm": 0.9948647022247314, "learning_rate": 0.0001, "loss": 1.5873, "step": 663600 }, { "epoch": 57.2512077294686, "grad_norm": 0.9787476658821106, "learning_rate": 0.0001, "loss": 1.5897, "step": 663656 }, { "epoch": 57.256038647343, "grad_norm": 0.23670697212219238, "learning_rate": 0.0001, "loss": 1.5894, "step": 663712 }, { "epoch": 57.26086956521739, "grad_norm": 0.28373992443084717, "learning_rate": 0.0001, "loss": 1.5838, "step": 663768 }, { "epoch": 57.265700483091784, "grad_norm": 1.335060477256775, "learning_rate": 0.0001, "loss": 1.5898, "step": 663824 }, { "epoch": 57.270531400966185, "grad_norm": 10.365352630615234, "learning_rate": 0.0001, "loss": 1.5882, "step": 663880 }, { "epoch": 57.27536231884058, "grad_norm": 0.3427030146121979, "learning_rate": 0.0001, "loss": 1.5828, "step": 663936 }, { "epoch": 57.28019323671498, "grad_norm": 0.9815719127655029, "learning_rate": 0.0001, "loss": 1.5846, "step": 663992 }, { "epoch": 57.28502415458937, "grad_norm": 0.23451752960681915, "learning_rate": 0.0001, "loss": 1.5956, "step": 664048 }, { "epoch": 57.289855072463766, "grad_norm": 0.34871917963027954, "learning_rate": 0.0001, "loss": 1.5886, "step": 664104 }, { "epoch": 57.29468599033817, "grad_norm": 0.28734058141708374, "learning_rate": 0.0001, "loss": 1.5907, "step": 664160 }, { "epoch": 57.29951690821256, "grad_norm": 2.1902992725372314, "learning_rate": 0.0001, "loss": 1.5818, "step": 664216 }, { "epoch": 57.30434782608695, "grad_norm": 0.36597204208374023, "learning_rate": 0.0001, "loss": 1.583, "step": 664272 }, { "epoch": 57.309178743961354, "grad_norm": 0.35549840331077576, "learning_rate": 0.0001, "loss": 1.5829, "step": 664328 }, { "epoch": 57.31400966183575, "grad_norm": 2.732577085494995, "learning_rate": 0.0001, "loss": 1.5881, "step": 664384 }, { "epoch": 57.31884057971015, "grad_norm": 0.25895264744758606, "learning_rate": 0.0001, "loss": 1.5859, "step": 664440 }, { "epoch": 57.32367149758454, "grad_norm": 0.2509225606918335, "learning_rate": 0.0001, "loss": 1.5871, "step": 664496 }, { "epoch": 57.328502415458935, "grad_norm": 0.2717515826225281, "learning_rate": 0.0001, "loss": 1.5908, "step": 664552 }, { "epoch": 57.333333333333336, "grad_norm": 0.4059692323207855, "learning_rate": 0.0001, "loss": 1.5822, "step": 664608 }, { "epoch": 57.33816425120773, "grad_norm": 2.948133945465088, "learning_rate": 0.0001, "loss": 1.5873, "step": 664664 }, { "epoch": 57.34299516908212, "grad_norm": 0.3240695893764496, "learning_rate": 0.0001, "loss": 1.588, "step": 664720 }, { "epoch": 57.34782608695652, "grad_norm": 0.2776854932308197, "learning_rate": 0.0001, "loss": 1.5808, "step": 664776 }, { "epoch": 57.35265700483092, "grad_norm": 0.5787100195884705, "learning_rate": 0.0001, "loss": 1.5871, "step": 664832 }, { "epoch": 57.35748792270532, "grad_norm": 0.39732733368873596, "learning_rate": 0.0001, "loss": 1.5883, "step": 664888 }, { "epoch": 57.36231884057971, "grad_norm": 0.32534679770469666, "learning_rate": 0.0001, "loss": 1.5874, "step": 664944 }, { "epoch": 57.367149758454104, "grad_norm": 0.3282020390033722, "learning_rate": 0.0001, "loss": 1.5842, "step": 665000 }, { "epoch": 57.371980676328505, "grad_norm": 0.29444992542266846, "learning_rate": 0.0001, "loss": 1.5899, "step": 665056 }, { "epoch": 57.3768115942029, "grad_norm": 0.30300039052963257, "learning_rate": 0.0001, "loss": 1.5862, "step": 665112 }, { "epoch": 57.38164251207729, "grad_norm": 0.2937059700489044, "learning_rate": 0.0001, "loss": 1.5877, "step": 665168 }, { "epoch": 57.38647342995169, "grad_norm": 0.2411220669746399, "learning_rate": 0.0001, "loss": 1.5925, "step": 665224 }, { "epoch": 57.391304347826086, "grad_norm": 0.309778094291687, "learning_rate": 0.0001, "loss": 1.5839, "step": 665280 }, { "epoch": 57.39613526570048, "grad_norm": 2.1967825889587402, "learning_rate": 0.0001, "loss": 1.5904, "step": 665336 }, { "epoch": 57.40096618357488, "grad_norm": 0.27019160985946655, "learning_rate": 0.0001, "loss": 1.5834, "step": 665392 }, { "epoch": 57.405797101449274, "grad_norm": 0.32516276836395264, "learning_rate": 0.0001, "loss": 1.5818, "step": 665448 }, { "epoch": 57.410628019323674, "grad_norm": 0.26210373640060425, "learning_rate": 0.0001, "loss": 1.5896, "step": 665504 }, { "epoch": 57.41545893719807, "grad_norm": 0.2448747605085373, "learning_rate": 0.0001, "loss": 1.5775, "step": 665560 }, { "epoch": 57.42028985507246, "grad_norm": 0.23223187029361725, "learning_rate": 0.0001, "loss": 1.5854, "step": 665616 }, { "epoch": 57.42512077294686, "grad_norm": 0.3058696687221527, "learning_rate": 0.0001, "loss": 1.5918, "step": 665672 }, { "epoch": 57.429951690821255, "grad_norm": 7.0097975730896, "learning_rate": 0.0001, "loss": 1.5788, "step": 665728 }, { "epoch": 57.43478260869565, "grad_norm": 0.29730290174484253, "learning_rate": 0.0001, "loss": 1.5864, "step": 665784 }, { "epoch": 57.43961352657005, "grad_norm": 0.5133915543556213, "learning_rate": 0.0001, "loss": 1.5911, "step": 665840 }, { "epoch": 57.44444444444444, "grad_norm": 0.5308663249015808, "learning_rate": 0.0001, "loss": 1.5833, "step": 665896 }, { "epoch": 57.44927536231884, "grad_norm": 1.3270922899246216, "learning_rate": 0.0001, "loss": 1.5907, "step": 665952 }, { "epoch": 57.45410628019324, "grad_norm": 0.2905636727809906, "learning_rate": 0.0001, "loss": 1.5914, "step": 666008 }, { "epoch": 57.45893719806763, "grad_norm": 1.1662876605987549, "learning_rate": 0.0001, "loss": 1.5877, "step": 666064 }, { "epoch": 57.46376811594203, "grad_norm": 1.0651025772094727, "learning_rate": 0.0001, "loss": 1.5815, "step": 666120 }, { "epoch": 57.468599033816425, "grad_norm": 0.7503356337547302, "learning_rate": 0.0001, "loss": 1.5855, "step": 666176 }, { "epoch": 57.47342995169082, "grad_norm": 0.31526434421539307, "learning_rate": 0.0001, "loss": 1.5912, "step": 666232 }, { "epoch": 57.47826086956522, "grad_norm": 11.830708503723145, "learning_rate": 0.0001, "loss": 1.5927, "step": 666288 }, { "epoch": 57.48309178743961, "grad_norm": 0.2604566812515259, "learning_rate": 0.0001, "loss": 1.585, "step": 666344 }, { "epoch": 57.48792270531401, "grad_norm": 3.3370361328125, "learning_rate": 0.0001, "loss": 1.588, "step": 666400 }, { "epoch": 57.492753623188406, "grad_norm": 0.986670970916748, "learning_rate": 0.0001, "loss": 1.5929, "step": 666456 }, { "epoch": 57.4975845410628, "grad_norm": 0.2948988080024719, "learning_rate": 0.0001, "loss": 1.5947, "step": 666512 }, { "epoch": 57.5024154589372, "grad_norm": 3.4742417335510254, "learning_rate": 0.0001, "loss": 1.5904, "step": 666568 }, { "epoch": 57.507246376811594, "grad_norm": 1.660082221031189, "learning_rate": 0.0001, "loss": 1.5861, "step": 666624 }, { "epoch": 57.51207729468599, "grad_norm": 0.2827708125114441, "learning_rate": 0.0001, "loss": 1.5854, "step": 666680 }, { "epoch": 57.51690821256039, "grad_norm": 0.28263550996780396, "learning_rate": 0.0001, "loss": 1.5849, "step": 666736 }, { "epoch": 57.52173913043478, "grad_norm": 0.5060697197914124, "learning_rate": 0.0001, "loss": 1.582, "step": 666792 }, { "epoch": 57.52657004830918, "grad_norm": 0.2568678557872772, "learning_rate": 0.0001, "loss": 1.5829, "step": 666848 }, { "epoch": 57.531400966183575, "grad_norm": 0.31914693117141724, "learning_rate": 0.0001, "loss": 1.589, "step": 666904 }, { "epoch": 57.53623188405797, "grad_norm": 0.2652868926525116, "learning_rate": 0.0001, "loss": 1.5855, "step": 666960 }, { "epoch": 57.54106280193237, "grad_norm": 0.24547892808914185, "learning_rate": 0.0001, "loss": 1.5887, "step": 667016 }, { "epoch": 57.54589371980676, "grad_norm": 0.2894747257232666, "learning_rate": 0.0001, "loss": 1.5922, "step": 667072 }, { "epoch": 57.55072463768116, "grad_norm": 13.101865768432617, "learning_rate": 0.0001, "loss": 1.5916, "step": 667128 }, { "epoch": 57.55555555555556, "grad_norm": 0.3075178563594818, "learning_rate": 0.0001, "loss": 1.589, "step": 667184 }, { "epoch": 57.56038647342995, "grad_norm": 0.8403758406639099, "learning_rate": 0.0001, "loss": 1.586, "step": 667240 }, { "epoch": 57.56521739130435, "grad_norm": 0.44471612572669983, "learning_rate": 0.0001, "loss": 1.5859, "step": 667296 }, { "epoch": 57.570048309178745, "grad_norm": 5.929322242736816, "learning_rate": 0.0001, "loss": 1.5851, "step": 667352 }, { "epoch": 57.57487922705314, "grad_norm": 1.00764000415802, "learning_rate": 0.0001, "loss": 1.5951, "step": 667408 }, { "epoch": 57.57971014492754, "grad_norm": 0.5128440260887146, "learning_rate": 0.0001, "loss": 1.5849, "step": 667464 }, { "epoch": 57.58454106280193, "grad_norm": 0.5857464671134949, "learning_rate": 0.0001, "loss": 1.5874, "step": 667520 }, { "epoch": 57.589371980676326, "grad_norm": 0.252819687128067, "learning_rate": 0.0001, "loss": 1.59, "step": 667576 }, { "epoch": 57.594202898550726, "grad_norm": 0.33129966259002686, "learning_rate": 0.0001, "loss": 1.591, "step": 667632 }, { "epoch": 57.59903381642512, "grad_norm": 0.36170604825019836, "learning_rate": 0.0001, "loss": 1.5879, "step": 667688 }, { "epoch": 57.60386473429952, "grad_norm": 0.22233271598815918, "learning_rate": 0.0001, "loss": 1.5924, "step": 667744 }, { "epoch": 57.608695652173914, "grad_norm": 0.42417341470718384, "learning_rate": 0.0001, "loss": 1.5934, "step": 667800 }, { "epoch": 57.61352657004831, "grad_norm": 1.4773484468460083, "learning_rate": 0.0001, "loss": 1.5832, "step": 667856 }, { "epoch": 57.61835748792271, "grad_norm": 0.32445424795150757, "learning_rate": 0.0001, "loss": 1.586, "step": 667912 }, { "epoch": 57.6231884057971, "grad_norm": 0.3307144343852997, "learning_rate": 0.0001, "loss": 1.5862, "step": 667968 }, { "epoch": 57.628019323671495, "grad_norm": 0.4422079026699066, "learning_rate": 0.0001, "loss": 1.59, "step": 668024 }, { "epoch": 57.632850241545896, "grad_norm": 0.3038958013057709, "learning_rate": 0.0001, "loss": 1.5883, "step": 668080 }, { "epoch": 57.63768115942029, "grad_norm": 0.27070996165275574, "learning_rate": 0.0001, "loss": 1.5843, "step": 668136 }, { "epoch": 57.64251207729468, "grad_norm": 1.1970059871673584, "learning_rate": 0.0001, "loss": 1.5858, "step": 668192 }, { "epoch": 57.64734299516908, "grad_norm": 6.17649507522583, "learning_rate": 0.0001, "loss": 1.5901, "step": 668248 }, { "epoch": 57.65217391304348, "grad_norm": 0.3615120053291321, "learning_rate": 0.0001, "loss": 1.5882, "step": 668304 }, { "epoch": 57.65700483091788, "grad_norm": 4.093512058258057, "learning_rate": 0.0001, "loss": 1.5914, "step": 668360 }, { "epoch": 57.66183574879227, "grad_norm": 0.24995392560958862, "learning_rate": 0.0001, "loss": 1.5879, "step": 668416 }, { "epoch": 57.666666666666664, "grad_norm": 0.29068633913993835, "learning_rate": 0.0001, "loss": 1.5874, "step": 668472 }, { "epoch": 57.671497584541065, "grad_norm": 0.8554396629333496, "learning_rate": 0.0001, "loss": 1.5885, "step": 668528 }, { "epoch": 57.67632850241546, "grad_norm": 0.6616571545600891, "learning_rate": 0.0001, "loss": 1.5906, "step": 668584 }, { "epoch": 57.68115942028985, "grad_norm": 0.24308724701404572, "learning_rate": 0.0001, "loss": 1.5897, "step": 668640 }, { "epoch": 57.68599033816425, "grad_norm": 0.2856757938861847, "learning_rate": 0.0001, "loss": 1.5849, "step": 668696 }, { "epoch": 57.690821256038646, "grad_norm": 0.9465160965919495, "learning_rate": 0.0001, "loss": 1.5902, "step": 668752 }, { "epoch": 57.69565217391305, "grad_norm": 0.4886169731616974, "learning_rate": 0.0001, "loss": 1.5889, "step": 668808 }, { "epoch": 57.70048309178744, "grad_norm": 0.264651894569397, "learning_rate": 0.0001, "loss": 1.5833, "step": 668864 }, { "epoch": 57.70531400966183, "grad_norm": 3.342373847961426, "learning_rate": 0.0001, "loss": 1.5925, "step": 668920 }, { "epoch": 57.710144927536234, "grad_norm": 0.25566020607948303, "learning_rate": 0.0001, "loss": 1.5879, "step": 668976 }, { "epoch": 57.71497584541063, "grad_norm": 0.5741772055625916, "learning_rate": 0.0001, "loss": 1.5892, "step": 669032 }, { "epoch": 57.71980676328502, "grad_norm": 0.2407185286283493, "learning_rate": 0.0001, "loss": 1.5841, "step": 669088 }, { "epoch": 57.72463768115942, "grad_norm": 0.4379412531852722, "learning_rate": 0.0001, "loss": 1.5852, "step": 669144 }, { "epoch": 57.729468599033815, "grad_norm": 0.36291515827178955, "learning_rate": 0.0001, "loss": 1.5863, "step": 669200 }, { "epoch": 57.734299516908216, "grad_norm": 2.6955432891845703, "learning_rate": 0.0001, "loss": 1.5975, "step": 669256 }, { "epoch": 57.73913043478261, "grad_norm": 1.8953683376312256, "learning_rate": 0.0001, "loss": 1.5974, "step": 669312 }, { "epoch": 57.743961352657, "grad_norm": 5.765132427215576, "learning_rate": 0.0001, "loss": 1.5815, "step": 669368 }, { "epoch": 57.7487922705314, "grad_norm": 0.24464446306228638, "learning_rate": 0.0001, "loss": 1.5917, "step": 669424 }, { "epoch": 57.7536231884058, "grad_norm": 9.699729919433594, "learning_rate": 0.0001, "loss": 1.5875, "step": 669480 }, { "epoch": 57.75845410628019, "grad_norm": 0.28706875443458557, "learning_rate": 0.0001, "loss": 1.5898, "step": 669536 }, { "epoch": 57.76328502415459, "grad_norm": 0.5738072991371155, "learning_rate": 0.0001, "loss": 1.5891, "step": 669592 }, { "epoch": 57.768115942028984, "grad_norm": 0.5041653513908386, "learning_rate": 0.0001, "loss": 1.591, "step": 669648 }, { "epoch": 57.772946859903385, "grad_norm": 0.27284687757492065, "learning_rate": 0.0001, "loss": 1.5888, "step": 669704 }, { "epoch": 57.77777777777778, "grad_norm": 0.25972646474838257, "learning_rate": 0.0001, "loss": 1.5903, "step": 669760 }, { "epoch": 57.78260869565217, "grad_norm": 0.35060805082321167, "learning_rate": 0.0001, "loss": 1.586, "step": 669816 }, { "epoch": 57.78743961352657, "grad_norm": 1.3661746978759766, "learning_rate": 0.0001, "loss": 1.5896, "step": 669872 }, { "epoch": 57.792270531400966, "grad_norm": 1.1457667350769043, "learning_rate": 0.0001, "loss": 1.5958, "step": 669928 }, { "epoch": 57.79710144927536, "grad_norm": 0.24623721837997437, "learning_rate": 0.0001, "loss": 1.5852, "step": 669984 }, { "epoch": 57.80193236714976, "grad_norm": 0.655100405216217, "learning_rate": 0.0001, "loss": 1.5863, "step": 670040 }, { "epoch": 57.806763285024154, "grad_norm": 0.3344529867172241, "learning_rate": 0.0001, "loss": 1.5908, "step": 670096 }, { "epoch": 57.81159420289855, "grad_norm": 0.24826708436012268, "learning_rate": 0.0001, "loss": 1.5906, "step": 670152 }, { "epoch": 57.81642512077295, "grad_norm": 0.3805430233478546, "learning_rate": 0.0001, "loss": 1.587, "step": 670208 }, { "epoch": 57.82125603864734, "grad_norm": 0.2761281728744507, "learning_rate": 0.0001, "loss": 1.5857, "step": 670264 }, { "epoch": 57.82608695652174, "grad_norm": 0.2981856167316437, "learning_rate": 0.0001, "loss": 1.5931, "step": 670320 }, { "epoch": 57.830917874396135, "grad_norm": 0.45111700892448425, "learning_rate": 0.0001, "loss": 1.5839, "step": 670376 }, { "epoch": 57.83574879227053, "grad_norm": 1.5935108661651611, "learning_rate": 0.0001, "loss": 1.5923, "step": 670432 }, { "epoch": 57.84057971014493, "grad_norm": 0.2709166407585144, "learning_rate": 0.0001, "loss": 1.5902, "step": 670488 }, { "epoch": 57.84541062801932, "grad_norm": 0.25121068954467773, "learning_rate": 0.0001, "loss": 1.5922, "step": 670544 }, { "epoch": 57.85024154589372, "grad_norm": 1.5126638412475586, "learning_rate": 0.0001, "loss": 1.5881, "step": 670600 }, { "epoch": 57.85507246376812, "grad_norm": 5.176803112030029, "learning_rate": 0.0001, "loss": 1.59, "step": 670656 }, { "epoch": 57.85990338164251, "grad_norm": 0.30222803354263306, "learning_rate": 0.0001, "loss": 1.5882, "step": 670712 }, { "epoch": 57.86473429951691, "grad_norm": 0.2514079511165619, "learning_rate": 0.0001, "loss": 1.5873, "step": 670768 }, { "epoch": 57.869565217391305, "grad_norm": 0.2997678220272064, "learning_rate": 0.0001, "loss": 1.5841, "step": 670824 }, { "epoch": 57.8743961352657, "grad_norm": 0.35212287306785583, "learning_rate": 0.0001, "loss": 1.5878, "step": 670880 }, { "epoch": 57.8792270531401, "grad_norm": 0.22849324345588684, "learning_rate": 0.0001, "loss": 1.5841, "step": 670936 }, { "epoch": 57.88405797101449, "grad_norm": 0.36635035276412964, "learning_rate": 0.0001, "loss": 1.5866, "step": 670992 }, { "epoch": 57.888888888888886, "grad_norm": 0.7168186902999878, "learning_rate": 0.0001, "loss": 1.5792, "step": 671048 }, { "epoch": 57.893719806763286, "grad_norm": 0.5695323944091797, "learning_rate": 0.0001, "loss": 1.5943, "step": 671104 }, { "epoch": 57.89855072463768, "grad_norm": 0.46422114968299866, "learning_rate": 0.0001, "loss": 1.5834, "step": 671160 }, { "epoch": 57.90338164251208, "grad_norm": 0.9658125042915344, "learning_rate": 0.0001, "loss": 1.5901, "step": 671216 }, { "epoch": 57.908212560386474, "grad_norm": 0.29980260133743286, "learning_rate": 0.0001, "loss": 1.5905, "step": 671272 }, { "epoch": 57.91304347826087, "grad_norm": 0.2794153094291687, "learning_rate": 0.0001, "loss": 1.5858, "step": 671328 }, { "epoch": 57.91787439613527, "grad_norm": 0.3954618275165558, "learning_rate": 0.0001, "loss": 1.5903, "step": 671384 }, { "epoch": 57.92270531400966, "grad_norm": 1.5990618467330933, "learning_rate": 0.0001, "loss": 1.5891, "step": 671440 }, { "epoch": 57.927536231884055, "grad_norm": 0.26295873522758484, "learning_rate": 0.0001, "loss": 1.5845, "step": 671496 }, { "epoch": 57.932367149758456, "grad_norm": 2.659999132156372, "learning_rate": 0.0001, "loss": 1.5884, "step": 671552 }, { "epoch": 57.93719806763285, "grad_norm": 0.3068985939025879, "learning_rate": 0.0001, "loss": 1.599, "step": 671608 }, { "epoch": 57.94202898550725, "grad_norm": 0.3689214587211609, "learning_rate": 0.0001, "loss": 1.5931, "step": 671664 }, { "epoch": 57.94685990338164, "grad_norm": 1.5390597581863403, "learning_rate": 0.0001, "loss": 1.5847, "step": 671720 }, { "epoch": 57.95169082125604, "grad_norm": 0.29907384514808655, "learning_rate": 0.0001, "loss": 1.5943, "step": 671776 }, { "epoch": 57.95652173913044, "grad_norm": 0.292556494474411, "learning_rate": 0.0001, "loss": 1.5923, "step": 671832 }, { "epoch": 57.96135265700483, "grad_norm": 0.2802937626838684, "learning_rate": 0.0001, "loss": 1.589, "step": 671888 }, { "epoch": 57.966183574879224, "grad_norm": 0.26041409373283386, "learning_rate": 0.0001, "loss": 1.5901, "step": 671944 }, { "epoch": 57.971014492753625, "grad_norm": 0.3263836205005646, "learning_rate": 0.0001, "loss": 1.5941, "step": 672000 }, { "epoch": 57.97584541062802, "grad_norm": 0.30306994915008545, "learning_rate": 0.0001, "loss": 1.5869, "step": 672056 }, { "epoch": 57.98067632850242, "grad_norm": 12.335121154785156, "learning_rate": 0.0001, "loss": 1.5944, "step": 672112 }, { "epoch": 57.98550724637681, "grad_norm": 2.3316352367401123, "learning_rate": 0.0001, "loss": 1.5784, "step": 672168 }, { "epoch": 57.990338164251206, "grad_norm": 0.30121347308158875, "learning_rate": 0.0001, "loss": 1.5938, "step": 672224 }, { "epoch": 57.99516908212561, "grad_norm": 0.3391442596912384, "learning_rate": 0.0001, "loss": 1.5873, "step": 672280 }, { "epoch": 58.0, "grad_norm": 0.23737387359142303, "learning_rate": 0.0001, "loss": 1.5879, "step": 672336 }, { "epoch": 58.00483091787439, "grad_norm": 0.33299335837364197, "learning_rate": 0.0001, "loss": 1.5803, "step": 672392 }, { "epoch": 58.009661835748794, "grad_norm": 0.31400033831596375, "learning_rate": 0.0001, "loss": 1.58, "step": 672448 }, { "epoch": 58.01449275362319, "grad_norm": 0.3174540400505066, "learning_rate": 0.0001, "loss": 1.5772, "step": 672504 }, { "epoch": 58.01932367149758, "grad_norm": 0.43931442499160767, "learning_rate": 0.0001, "loss": 1.5841, "step": 672560 }, { "epoch": 58.02415458937198, "grad_norm": 0.2488747537136078, "learning_rate": 0.0001, "loss": 1.5805, "step": 672616 }, { "epoch": 58.028985507246375, "grad_norm": 0.2938361167907715, "learning_rate": 0.0001, "loss": 1.5774, "step": 672672 }, { "epoch": 58.033816425120776, "grad_norm": 4.2431721687316895, "learning_rate": 0.0001, "loss": 1.581, "step": 672728 }, { "epoch": 58.03864734299517, "grad_norm": 0.3705519139766693, "learning_rate": 0.0001, "loss": 1.584, "step": 672784 }, { "epoch": 58.04347826086956, "grad_norm": 11.357935905456543, "learning_rate": 0.0001, "loss": 1.5826, "step": 672840 }, { "epoch": 58.04830917874396, "grad_norm": 0.2803310453891754, "learning_rate": 0.0001, "loss": 1.5875, "step": 672896 }, { "epoch": 58.05314009661836, "grad_norm": 0.3038651943206787, "learning_rate": 0.0001, "loss": 1.5866, "step": 672952 }, { "epoch": 58.05797101449275, "grad_norm": 0.2385237067937851, "learning_rate": 0.0001, "loss": 1.5797, "step": 673008 }, { "epoch": 58.06280193236715, "grad_norm": 2.325150489807129, "learning_rate": 0.0001, "loss": 1.5854, "step": 673064 }, { "epoch": 58.067632850241544, "grad_norm": 1.1273908615112305, "learning_rate": 0.0001, "loss": 1.5847, "step": 673120 }, { "epoch": 58.072463768115945, "grad_norm": 1.0408766269683838, "learning_rate": 0.0001, "loss": 1.5872, "step": 673176 }, { "epoch": 58.07729468599034, "grad_norm": 0.31396836042404175, "learning_rate": 0.0001, "loss": 1.5823, "step": 673232 }, { "epoch": 58.08212560386473, "grad_norm": 0.31797000765800476, "learning_rate": 0.0001, "loss": 1.5834, "step": 673288 }, { "epoch": 58.08695652173913, "grad_norm": 2.675870656967163, "learning_rate": 0.0001, "loss": 1.5783, "step": 673344 }, { "epoch": 58.091787439613526, "grad_norm": 0.2307223081588745, "learning_rate": 0.0001, "loss": 1.5869, "step": 673400 }, { "epoch": 58.09661835748792, "grad_norm": 0.2499265968799591, "learning_rate": 0.0001, "loss": 1.5777, "step": 673456 }, { "epoch": 58.10144927536232, "grad_norm": 0.29461464285850525, "learning_rate": 0.0001, "loss": 1.5901, "step": 673512 }, { "epoch": 58.106280193236714, "grad_norm": 0.3163118362426758, "learning_rate": 0.0001, "loss": 1.5834, "step": 673568 }, { "epoch": 58.111111111111114, "grad_norm": 0.24747467041015625, "learning_rate": 0.0001, "loss": 1.5791, "step": 673624 }, { "epoch": 58.11594202898551, "grad_norm": 1.41733980178833, "learning_rate": 0.0001, "loss": 1.5793, "step": 673680 }, { "epoch": 58.1207729468599, "grad_norm": 0.4171726405620575, "learning_rate": 0.0001, "loss": 1.584, "step": 673736 }, { "epoch": 58.1256038647343, "grad_norm": 2.039374589920044, "learning_rate": 0.0001, "loss": 1.5767, "step": 673792 }, { "epoch": 58.130434782608695, "grad_norm": 1.035357117652893, "learning_rate": 0.0001, "loss": 1.5776, "step": 673848 }, { "epoch": 58.13526570048309, "grad_norm": 0.24151378870010376, "learning_rate": 0.0001, "loss": 1.59, "step": 673904 }, { "epoch": 58.14009661835749, "grad_norm": 0.2769957184791565, "learning_rate": 0.0001, "loss": 1.572, "step": 673960 }, { "epoch": 58.14492753623188, "grad_norm": 0.26343467831611633, "learning_rate": 0.0001, "loss": 1.5845, "step": 674016 }, { "epoch": 58.14975845410628, "grad_norm": 0.323453813791275, "learning_rate": 0.0001, "loss": 1.5805, "step": 674072 }, { "epoch": 58.15458937198068, "grad_norm": 0.30943918228149414, "learning_rate": 0.0001, "loss": 1.5844, "step": 674128 }, { "epoch": 58.15942028985507, "grad_norm": 0.2859145700931549, "learning_rate": 0.0001, "loss": 1.5824, "step": 674184 }, { "epoch": 58.16425120772947, "grad_norm": 0.5130011439323425, "learning_rate": 0.0001, "loss": 1.5785, "step": 674240 }, { "epoch": 58.169082125603865, "grad_norm": 0.5334696173667908, "learning_rate": 0.0001, "loss": 1.5822, "step": 674296 }, { "epoch": 58.17391304347826, "grad_norm": 0.2642700970172882, "learning_rate": 0.0001, "loss": 1.5883, "step": 674352 }, { "epoch": 58.17874396135266, "grad_norm": 0.6865452527999878, "learning_rate": 0.0001, "loss": 1.5873, "step": 674408 }, { "epoch": 58.18357487922705, "grad_norm": 0.375489741563797, "learning_rate": 0.0001, "loss": 1.579, "step": 674464 }, { "epoch": 58.18840579710145, "grad_norm": 0.40096208453178406, "learning_rate": 0.0001, "loss": 1.5788, "step": 674520 }, { "epoch": 58.193236714975846, "grad_norm": 4.224609851837158, "learning_rate": 0.0001, "loss": 1.5766, "step": 674576 }, { "epoch": 58.19806763285024, "grad_norm": 0.2967926561832428, "learning_rate": 0.0001, "loss": 1.5813, "step": 674632 }, { "epoch": 58.20289855072464, "grad_norm": 0.612129807472229, "learning_rate": 0.0001, "loss": 1.5802, "step": 674688 }, { "epoch": 58.207729468599034, "grad_norm": 0.2855076491832733, "learning_rate": 0.0001, "loss": 1.5872, "step": 674744 }, { "epoch": 58.21256038647343, "grad_norm": 0.3659957945346832, "learning_rate": 0.0001, "loss": 1.5822, "step": 674800 }, { "epoch": 58.21739130434783, "grad_norm": 0.31486374139785767, "learning_rate": 0.0001, "loss": 1.587, "step": 674856 }, { "epoch": 58.22222222222222, "grad_norm": 0.2897539436817169, "learning_rate": 0.0001, "loss": 1.5872, "step": 674912 }, { "epoch": 58.227053140096615, "grad_norm": 0.400713175535202, "learning_rate": 0.0001, "loss": 1.5849, "step": 674968 }, { "epoch": 58.231884057971016, "grad_norm": 0.3183567523956299, "learning_rate": 0.0001, "loss": 1.5811, "step": 675024 }, { "epoch": 58.23671497584541, "grad_norm": 0.7865095734596252, "learning_rate": 0.0001, "loss": 1.5845, "step": 675080 }, { "epoch": 58.24154589371981, "grad_norm": 0.3270359933376312, "learning_rate": 0.0001, "loss": 1.5832, "step": 675136 }, { "epoch": 58.2463768115942, "grad_norm": 0.253262460231781, "learning_rate": 0.0001, "loss": 1.5878, "step": 675192 }, { "epoch": 58.2512077294686, "grad_norm": 0.690147876739502, "learning_rate": 0.0001, "loss": 1.5816, "step": 675248 }, { "epoch": 58.256038647343, "grad_norm": 2.148361921310425, "learning_rate": 0.0001, "loss": 1.579, "step": 675304 }, { "epoch": 58.26086956521739, "grad_norm": 0.2994334399700165, "learning_rate": 0.0001, "loss": 1.5903, "step": 675360 }, { "epoch": 58.265700483091784, "grad_norm": 0.2938796579837799, "learning_rate": 0.0001, "loss": 1.5878, "step": 675416 }, { "epoch": 58.270531400966185, "grad_norm": 0.36393505334854126, "learning_rate": 0.0001, "loss": 1.583, "step": 675472 }, { "epoch": 58.27536231884058, "grad_norm": 0.7930569648742676, "learning_rate": 0.0001, "loss": 1.5871, "step": 675528 }, { "epoch": 58.28019323671498, "grad_norm": 13.403369903564453, "learning_rate": 0.0001, "loss": 1.5773, "step": 675584 }, { "epoch": 58.28502415458937, "grad_norm": 0.30163443088531494, "learning_rate": 0.0001, "loss": 1.5894, "step": 675640 }, { "epoch": 58.289855072463766, "grad_norm": 0.4103415608406067, "learning_rate": 0.0001, "loss": 1.5846, "step": 675696 }, { "epoch": 58.29468599033817, "grad_norm": 0.39212727546691895, "learning_rate": 0.0001, "loss": 1.5825, "step": 675752 }, { "epoch": 58.29951690821256, "grad_norm": 0.23739400506019592, "learning_rate": 0.0001, "loss": 1.5815, "step": 675808 }, { "epoch": 58.30434782608695, "grad_norm": 1.0521320104599, "learning_rate": 0.0001, "loss": 1.586, "step": 675864 }, { "epoch": 58.309178743961354, "grad_norm": 0.2546657919883728, "learning_rate": 0.0001, "loss": 1.5827, "step": 675920 }, { "epoch": 58.31400966183575, "grad_norm": 0.3177567422389984, "learning_rate": 0.0001, "loss": 1.584, "step": 675976 }, { "epoch": 58.31884057971015, "grad_norm": 0.2987024486064911, "learning_rate": 0.0001, "loss": 1.5866, "step": 676032 }, { "epoch": 58.32367149758454, "grad_norm": 0.31085559725761414, "learning_rate": 0.0001, "loss": 1.5895, "step": 676088 }, { "epoch": 58.328502415458935, "grad_norm": 0.27732425928115845, "learning_rate": 0.0001, "loss": 1.5861, "step": 676144 }, { "epoch": 58.333333333333336, "grad_norm": 1.0877974033355713, "learning_rate": 0.0001, "loss": 1.5821, "step": 676200 }, { "epoch": 58.33816425120773, "grad_norm": 0.27116239070892334, "learning_rate": 0.0001, "loss": 1.5794, "step": 676256 }, { "epoch": 58.34299516908212, "grad_norm": 0.31950289011001587, "learning_rate": 0.0001, "loss": 1.5847, "step": 676312 }, { "epoch": 58.34782608695652, "grad_norm": 0.22682860493659973, "learning_rate": 0.0001, "loss": 1.5901, "step": 676368 }, { "epoch": 58.35265700483092, "grad_norm": 1.0958901643753052, "learning_rate": 0.0001, "loss": 1.5814, "step": 676424 }, { "epoch": 58.35748792270532, "grad_norm": 0.7303402423858643, "learning_rate": 0.0001, "loss": 1.5797, "step": 676480 }, { "epoch": 58.36231884057971, "grad_norm": 1.0566961765289307, "learning_rate": 0.0001, "loss": 1.5823, "step": 676536 }, { "epoch": 58.367149758454104, "grad_norm": 0.24594645202159882, "learning_rate": 0.0001, "loss": 1.5811, "step": 676592 }, { "epoch": 58.371980676328505, "grad_norm": 1.079473614692688, "learning_rate": 0.0001, "loss": 1.5835, "step": 676648 }, { "epoch": 58.3768115942029, "grad_norm": 0.24790966510772705, "learning_rate": 0.0001, "loss": 1.5888, "step": 676704 }, { "epoch": 58.38164251207729, "grad_norm": 0.253540962934494, "learning_rate": 0.0001, "loss": 1.5829, "step": 676760 }, { "epoch": 58.38647342995169, "grad_norm": 0.5606018304824829, "learning_rate": 0.0001, "loss": 1.5809, "step": 676816 }, { "epoch": 58.391304347826086, "grad_norm": 0.3160109519958496, "learning_rate": 0.0001, "loss": 1.5897, "step": 676872 }, { "epoch": 58.39613526570048, "grad_norm": 3.3691494464874268, "learning_rate": 0.0001, "loss": 1.5784, "step": 676928 }, { "epoch": 58.40096618357488, "grad_norm": 0.3291076421737671, "learning_rate": 0.0001, "loss": 1.5815, "step": 676984 }, { "epoch": 58.405797101449274, "grad_norm": 0.24339626729488373, "learning_rate": 0.0001, "loss": 1.5812, "step": 677040 }, { "epoch": 58.410628019323674, "grad_norm": 0.7551382184028625, "learning_rate": 0.0001, "loss": 1.587, "step": 677096 }, { "epoch": 58.41545893719807, "grad_norm": 0.3015437126159668, "learning_rate": 0.0001, "loss": 1.5858, "step": 677152 }, { "epoch": 58.42028985507246, "grad_norm": 0.25522252917289734, "learning_rate": 0.0001, "loss": 1.581, "step": 677208 }, { "epoch": 58.42512077294686, "grad_norm": 0.8947916030883789, "learning_rate": 0.0001, "loss": 1.5907, "step": 677264 }, { "epoch": 58.429951690821255, "grad_norm": 0.2783665060997009, "learning_rate": 0.0001, "loss": 1.5903, "step": 677320 }, { "epoch": 58.43478260869565, "grad_norm": 1.2541810274124146, "learning_rate": 0.0001, "loss": 1.5855, "step": 677376 }, { "epoch": 58.43961352657005, "grad_norm": 0.2645035982131958, "learning_rate": 0.0001, "loss": 1.5841, "step": 677432 }, { "epoch": 58.44444444444444, "grad_norm": 0.384770005941391, "learning_rate": 0.0001, "loss": 1.5835, "step": 677488 }, { "epoch": 58.44927536231884, "grad_norm": 0.23335754871368408, "learning_rate": 0.0001, "loss": 1.5753, "step": 677544 }, { "epoch": 58.45410628019324, "grad_norm": 10.183673858642578, "learning_rate": 0.0001, "loss": 1.5841, "step": 677600 }, { "epoch": 58.45893719806763, "grad_norm": 0.2762611210346222, "learning_rate": 0.0001, "loss": 1.582, "step": 677656 }, { "epoch": 58.46376811594203, "grad_norm": 0.2567254602909088, "learning_rate": 0.0001, "loss": 1.5881, "step": 677712 }, { "epoch": 58.468599033816425, "grad_norm": 1.1981122493743896, "learning_rate": 0.0001, "loss": 1.5774, "step": 677768 }, { "epoch": 58.47342995169082, "grad_norm": 0.2588583827018738, "learning_rate": 0.0001, "loss": 1.5868, "step": 677824 }, { "epoch": 58.47826086956522, "grad_norm": 0.40883469581604004, "learning_rate": 0.0001, "loss": 1.581, "step": 677880 }, { "epoch": 58.48309178743961, "grad_norm": 0.5625379085540771, "learning_rate": 0.0001, "loss": 1.5788, "step": 677936 }, { "epoch": 58.48792270531401, "grad_norm": 0.34081751108169556, "learning_rate": 0.0001, "loss": 1.5855, "step": 677992 }, { "epoch": 58.492753623188406, "grad_norm": 1.3769038915634155, "learning_rate": 0.0001, "loss": 1.5821, "step": 678048 }, { "epoch": 58.4975845410628, "grad_norm": 0.913993775844574, "learning_rate": 0.0001, "loss": 1.5788, "step": 678104 }, { "epoch": 58.5024154589372, "grad_norm": 0.2948938310146332, "learning_rate": 0.0001, "loss": 1.5777, "step": 678160 }, { "epoch": 58.507246376811594, "grad_norm": 0.24612301588058472, "learning_rate": 0.0001, "loss": 1.5841, "step": 678216 }, { "epoch": 58.51207729468599, "grad_norm": 0.2898714542388916, "learning_rate": 0.0001, "loss": 1.5789, "step": 678272 }, { "epoch": 58.51690821256039, "grad_norm": 0.2925602197647095, "learning_rate": 0.0001, "loss": 1.5866, "step": 678328 }, { "epoch": 58.52173913043478, "grad_norm": 0.2739903926849365, "learning_rate": 0.0001, "loss": 1.5815, "step": 678384 }, { "epoch": 58.52657004830918, "grad_norm": 0.33110031485557556, "learning_rate": 0.0001, "loss": 1.5839, "step": 678440 }, { "epoch": 58.531400966183575, "grad_norm": 0.3342961370944977, "learning_rate": 0.0001, "loss": 1.5865, "step": 678496 }, { "epoch": 58.53623188405797, "grad_norm": 0.5845152139663696, "learning_rate": 0.0001, "loss": 1.5809, "step": 678552 }, { "epoch": 58.54106280193237, "grad_norm": 1.1890541315078735, "learning_rate": 0.0001, "loss": 1.5855, "step": 678608 }, { "epoch": 58.54589371980676, "grad_norm": 0.3427938222885132, "learning_rate": 0.0001, "loss": 1.5802, "step": 678664 }, { "epoch": 58.55072463768116, "grad_norm": 0.2629779279232025, "learning_rate": 0.0001, "loss": 1.5863, "step": 678720 }, { "epoch": 58.55555555555556, "grad_norm": 0.2897011637687683, "learning_rate": 0.0001, "loss": 1.5813, "step": 678776 }, { "epoch": 58.56038647342995, "grad_norm": 6.021579265594482, "learning_rate": 0.0001, "loss": 1.5871, "step": 678832 }, { "epoch": 58.56521739130435, "grad_norm": 0.3199467062950134, "learning_rate": 0.0001, "loss": 1.5855, "step": 678888 }, { "epoch": 58.570048309178745, "grad_norm": 0.30713269114494324, "learning_rate": 0.0001, "loss": 1.5795, "step": 678944 }, { "epoch": 58.57487922705314, "grad_norm": 0.37106946110725403, "learning_rate": 0.0001, "loss": 1.5876, "step": 679000 }, { "epoch": 58.57971014492754, "grad_norm": 0.9417797923088074, "learning_rate": 0.0001, "loss": 1.5868, "step": 679056 }, { "epoch": 58.58454106280193, "grad_norm": 10.803523063659668, "learning_rate": 0.0001, "loss": 1.5898, "step": 679112 }, { "epoch": 58.589371980676326, "grad_norm": 0.2689247727394104, "learning_rate": 0.0001, "loss": 1.5816, "step": 679168 }, { "epoch": 58.594202898550726, "grad_norm": 1.1674644947052002, "learning_rate": 0.0001, "loss": 1.5886, "step": 679224 }, { "epoch": 58.59903381642512, "grad_norm": 0.308292418718338, "learning_rate": 0.0001, "loss": 1.5803, "step": 679280 }, { "epoch": 58.60386473429952, "grad_norm": 0.42954474687576294, "learning_rate": 0.0001, "loss": 1.5864, "step": 679336 }, { "epoch": 58.608695652173914, "grad_norm": 0.3538098931312561, "learning_rate": 0.0001, "loss": 1.5913, "step": 679392 }, { "epoch": 58.61352657004831, "grad_norm": 0.26973503828048706, "learning_rate": 0.0001, "loss": 1.584, "step": 679448 }, { "epoch": 58.61835748792271, "grad_norm": 0.3626634180545807, "learning_rate": 0.0001, "loss": 1.58, "step": 679504 }, { "epoch": 58.6231884057971, "grad_norm": 0.41559311747550964, "learning_rate": 0.0001, "loss": 1.5927, "step": 679560 }, { "epoch": 58.628019323671495, "grad_norm": 0.26181676983833313, "learning_rate": 0.0001, "loss": 1.5865, "step": 679616 }, { "epoch": 58.632850241545896, "grad_norm": 0.27030694484710693, "learning_rate": 0.0001, "loss": 1.5816, "step": 679672 }, { "epoch": 58.63768115942029, "grad_norm": 0.29837527871131897, "learning_rate": 0.0001, "loss": 1.5823, "step": 679728 }, { "epoch": 58.64251207729468, "grad_norm": 1.897667407989502, "learning_rate": 0.0001, "loss": 1.5934, "step": 679784 }, { "epoch": 58.64734299516908, "grad_norm": 0.8893882632255554, "learning_rate": 0.0001, "loss": 1.579, "step": 679840 }, { "epoch": 58.65217391304348, "grad_norm": 0.9524097442626953, "learning_rate": 0.0001, "loss": 1.5857, "step": 679896 }, { "epoch": 58.65700483091788, "grad_norm": 0.22249025106430054, "learning_rate": 0.0001, "loss": 1.5745, "step": 679952 }, { "epoch": 58.66183574879227, "grad_norm": 0.3854876756668091, "learning_rate": 0.0001, "loss": 1.5894, "step": 680008 }, { "epoch": 58.666666666666664, "grad_norm": 1.0055376291275024, "learning_rate": 0.0001, "loss": 1.5859, "step": 680064 }, { "epoch": 58.671497584541065, "grad_norm": 0.21605059504508972, "learning_rate": 0.0001, "loss": 1.5843, "step": 680120 }, { "epoch": 58.67632850241546, "grad_norm": 0.43761512637138367, "learning_rate": 0.0001, "loss": 1.5818, "step": 680176 }, { "epoch": 58.68115942028985, "grad_norm": 0.27228060364723206, "learning_rate": 0.0001, "loss": 1.5809, "step": 680232 }, { "epoch": 58.68599033816425, "grad_norm": 0.33425506949424744, "learning_rate": 0.0001, "loss": 1.5907, "step": 680288 }, { "epoch": 58.690821256038646, "grad_norm": 0.7659456729888916, "learning_rate": 0.0001, "loss": 1.5859, "step": 680344 }, { "epoch": 58.69565217391305, "grad_norm": 0.27563101053237915, "learning_rate": 0.0001, "loss": 1.5848, "step": 680400 }, { "epoch": 58.70048309178744, "grad_norm": 0.371427983045578, "learning_rate": 0.0001, "loss": 1.5844, "step": 680456 }, { "epoch": 58.70531400966183, "grad_norm": 0.5376603007316589, "learning_rate": 0.0001, "loss": 1.5845, "step": 680512 }, { "epoch": 58.710144927536234, "grad_norm": 0.2423972189426422, "learning_rate": 0.0001, "loss": 1.5843, "step": 680568 }, { "epoch": 58.71497584541063, "grad_norm": 0.2794467806816101, "learning_rate": 0.0001, "loss": 1.5799, "step": 680624 }, { "epoch": 58.71980676328502, "grad_norm": 0.8715777397155762, "learning_rate": 0.0001, "loss": 1.5914, "step": 680680 }, { "epoch": 58.72463768115942, "grad_norm": 0.2673552632331848, "learning_rate": 0.0001, "loss": 1.5873, "step": 680736 }, { "epoch": 58.729468599033815, "grad_norm": 1.9642705917358398, "learning_rate": 0.0001, "loss": 1.5787, "step": 680792 }, { "epoch": 58.734299516908216, "grad_norm": 0.4346705675125122, "learning_rate": 0.0001, "loss": 1.5834, "step": 680848 }, { "epoch": 58.73913043478261, "grad_norm": 0.34198713302612305, "learning_rate": 0.0001, "loss": 1.5889, "step": 680904 }, { "epoch": 58.743961352657, "grad_norm": 0.30977970361709595, "learning_rate": 0.0001, "loss": 1.5906, "step": 680960 }, { "epoch": 58.7487922705314, "grad_norm": 0.2897457182407379, "learning_rate": 0.0001, "loss": 1.5913, "step": 681016 }, { "epoch": 58.7536231884058, "grad_norm": 0.24582715332508087, "learning_rate": 0.0001, "loss": 1.5859, "step": 681072 }, { "epoch": 58.75845410628019, "grad_norm": 0.4306314289569855, "learning_rate": 0.0001, "loss": 1.5878, "step": 681128 }, { "epoch": 58.76328502415459, "grad_norm": 0.27350184321403503, "learning_rate": 0.0001, "loss": 1.5861, "step": 681184 }, { "epoch": 58.768115942028984, "grad_norm": 0.28238070011138916, "learning_rate": 0.0001, "loss": 1.5892, "step": 681240 }, { "epoch": 58.772946859903385, "grad_norm": 0.955064594745636, "learning_rate": 0.0001, "loss": 1.5866, "step": 681296 }, { "epoch": 58.77777777777778, "grad_norm": 0.270038366317749, "learning_rate": 0.0001, "loss": 1.5882, "step": 681352 }, { "epoch": 58.78260869565217, "grad_norm": 0.31017154455184937, "learning_rate": 0.0001, "loss": 1.5897, "step": 681408 }, { "epoch": 58.78743961352657, "grad_norm": 47.79620361328125, "learning_rate": 0.0001, "loss": 1.5854, "step": 681464 }, { "epoch": 58.792270531400966, "grad_norm": 0.3356359004974365, "learning_rate": 0.0001, "loss": 1.5853, "step": 681520 }, { "epoch": 58.79710144927536, "grad_norm": 0.24216584861278534, "learning_rate": 0.0001, "loss": 1.584, "step": 681576 }, { "epoch": 58.80193236714976, "grad_norm": 0.32543808221817017, "learning_rate": 0.0001, "loss": 1.5805, "step": 681632 }, { "epoch": 58.806763285024154, "grad_norm": 0.45783424377441406, "learning_rate": 0.0001, "loss": 1.5834, "step": 681688 }, { "epoch": 58.81159420289855, "grad_norm": 0.4332028031349182, "learning_rate": 0.0001, "loss": 1.5861, "step": 681744 }, { "epoch": 58.81642512077295, "grad_norm": 0.3299473524093628, "learning_rate": 0.0001, "loss": 1.5781, "step": 681800 }, { "epoch": 58.82125603864734, "grad_norm": 0.3067893385887146, "learning_rate": 0.0001, "loss": 1.5904, "step": 681856 }, { "epoch": 58.82608695652174, "grad_norm": 0.23257340490818024, "learning_rate": 0.0001, "loss": 1.5819, "step": 681912 }, { "epoch": 58.830917874396135, "grad_norm": 1.3375574350357056, "learning_rate": 0.0001, "loss": 1.5831, "step": 681968 }, { "epoch": 58.83574879227053, "grad_norm": 0.24204623699188232, "learning_rate": 0.0001, "loss": 1.5767, "step": 682024 }, { "epoch": 58.84057971014493, "grad_norm": 0.27616384625434875, "learning_rate": 0.0001, "loss": 1.5896, "step": 682080 }, { "epoch": 58.84541062801932, "grad_norm": 0.25747591257095337, "learning_rate": 0.0001, "loss": 1.5855, "step": 682136 }, { "epoch": 58.85024154589372, "grad_norm": 10.146709442138672, "learning_rate": 0.0001, "loss": 1.5856, "step": 682192 }, { "epoch": 58.85507246376812, "grad_norm": 0.7005946040153503, "learning_rate": 0.0001, "loss": 1.5923, "step": 682248 }, { "epoch": 58.85990338164251, "grad_norm": 1.09749174118042, "learning_rate": 0.0001, "loss": 1.5839, "step": 682304 }, { "epoch": 58.86473429951691, "grad_norm": 0.36853867769241333, "learning_rate": 0.0001, "loss": 1.5775, "step": 682360 }, { "epoch": 58.869565217391305, "grad_norm": 0.29107117652893066, "learning_rate": 0.0001, "loss": 1.5827, "step": 682416 }, { "epoch": 58.8743961352657, "grad_norm": 0.2705055773258209, "learning_rate": 0.0001, "loss": 1.5919, "step": 682472 }, { "epoch": 58.8792270531401, "grad_norm": 0.29739198088645935, "learning_rate": 0.0001, "loss": 1.5863, "step": 682528 }, { "epoch": 58.88405797101449, "grad_norm": 0.27547356486320496, "learning_rate": 0.0001, "loss": 1.5833, "step": 682584 }, { "epoch": 58.888888888888886, "grad_norm": 0.24085521697998047, "learning_rate": 0.0001, "loss": 1.59, "step": 682640 }, { "epoch": 58.893719806763286, "grad_norm": 0.3094344437122345, "learning_rate": 0.0001, "loss": 1.5809, "step": 682696 }, { "epoch": 58.89855072463768, "grad_norm": 0.27615946531295776, "learning_rate": 0.0001, "loss": 1.5862, "step": 682752 }, { "epoch": 58.90338164251208, "grad_norm": 0.3510574996471405, "learning_rate": 0.0001, "loss": 1.5888, "step": 682808 }, { "epoch": 58.908212560386474, "grad_norm": 0.31502941250801086, "learning_rate": 0.0001, "loss": 1.5849, "step": 682864 }, { "epoch": 58.91304347826087, "grad_norm": 0.3188639283180237, "learning_rate": 0.0001, "loss": 1.5828, "step": 682920 }, { "epoch": 58.91787439613527, "grad_norm": 0.28043273091316223, "learning_rate": 0.0001, "loss": 1.5891, "step": 682976 }, { "epoch": 58.92270531400966, "grad_norm": 0.3279688358306885, "learning_rate": 0.0001, "loss": 1.5782, "step": 683032 }, { "epoch": 58.927536231884055, "grad_norm": 0.24411197006702423, "learning_rate": 0.0001, "loss": 1.5865, "step": 683088 }, { "epoch": 58.932367149758456, "grad_norm": 1.0973484516143799, "learning_rate": 0.0001, "loss": 1.5846, "step": 683144 }, { "epoch": 58.93719806763285, "grad_norm": 0.28659045696258545, "learning_rate": 0.0001, "loss": 1.5874, "step": 683200 }, { "epoch": 58.94202898550725, "grad_norm": 3.474828004837036, "learning_rate": 0.0001, "loss": 1.5845, "step": 683256 }, { "epoch": 58.94685990338164, "grad_norm": 0.26419079303741455, "learning_rate": 0.0001, "loss": 1.5803, "step": 683312 }, { "epoch": 58.95169082125604, "grad_norm": 0.7956817150115967, "learning_rate": 0.0001, "loss": 1.5869, "step": 683368 }, { "epoch": 58.95652173913044, "grad_norm": 0.34280723333358765, "learning_rate": 0.0001, "loss": 1.5874, "step": 683424 }, { "epoch": 58.96135265700483, "grad_norm": 0.26841095089912415, "learning_rate": 0.0001, "loss": 1.5916, "step": 683480 }, { "epoch": 58.966183574879224, "grad_norm": 0.26691436767578125, "learning_rate": 0.0001, "loss": 1.5802, "step": 683536 }, { "epoch": 58.971014492753625, "grad_norm": 0.23096594214439392, "learning_rate": 0.0001, "loss": 1.5868, "step": 683592 }, { "epoch": 58.97584541062802, "grad_norm": 0.7600662708282471, "learning_rate": 0.0001, "loss": 1.584, "step": 683648 }, { "epoch": 58.98067632850242, "grad_norm": 0.29907217621803284, "learning_rate": 0.0001, "loss": 1.5849, "step": 683704 }, { "epoch": 58.98550724637681, "grad_norm": 0.462681382894516, "learning_rate": 0.0001, "loss": 1.5913, "step": 683760 }, { "epoch": 58.990338164251206, "grad_norm": 0.618218183517456, "learning_rate": 0.0001, "loss": 1.5831, "step": 683816 }, { "epoch": 58.99516908212561, "grad_norm": 0.30687469244003296, "learning_rate": 0.0001, "loss": 1.5905, "step": 683872 }, { "epoch": 59.0, "grad_norm": 0.24293969571590424, "learning_rate": 0.0001, "loss": 1.5837, "step": 683928 }, { "epoch": 59.00483091787439, "grad_norm": 0.27516376972198486, "learning_rate": 0.0001, "loss": 1.5743, "step": 683984 }, { "epoch": 59.009661835748794, "grad_norm": 0.49560531973838806, "learning_rate": 0.0001, "loss": 1.5797, "step": 684040 }, { "epoch": 59.01449275362319, "grad_norm": 0.3669692277908325, "learning_rate": 0.0001, "loss": 1.5834, "step": 684096 }, { "epoch": 59.01932367149758, "grad_norm": 0.3157254159450531, "learning_rate": 0.0001, "loss": 1.5782, "step": 684152 }, { "epoch": 59.02415458937198, "grad_norm": 0.28928643465042114, "learning_rate": 0.0001, "loss": 1.5769, "step": 684208 }, { "epoch": 59.028985507246375, "grad_norm": 0.2576009929180145, "learning_rate": 0.0001, "loss": 1.577, "step": 684264 }, { "epoch": 59.033816425120776, "grad_norm": 0.3031589686870575, "learning_rate": 0.0001, "loss": 1.58, "step": 684320 }, { "epoch": 59.03864734299517, "grad_norm": 0.2568304240703583, "learning_rate": 0.0001, "loss": 1.5803, "step": 684376 }, { "epoch": 59.04347826086956, "grad_norm": 0.37158262729644775, "learning_rate": 0.0001, "loss": 1.5774, "step": 684432 }, { "epoch": 59.04830917874396, "grad_norm": 0.2939485013484955, "learning_rate": 0.0001, "loss": 1.5806, "step": 684488 }, { "epoch": 59.05314009661836, "grad_norm": 0.3225690722465515, "learning_rate": 0.0001, "loss": 1.5834, "step": 684544 }, { "epoch": 59.05797101449275, "grad_norm": 2.5039007663726807, "learning_rate": 0.0001, "loss": 1.5757, "step": 684600 }, { "epoch": 59.06280193236715, "grad_norm": 0.6771671175956726, "learning_rate": 0.0001, "loss": 1.5716, "step": 684656 }, { "epoch": 59.067632850241544, "grad_norm": 0.3947732746601105, "learning_rate": 0.0001, "loss": 1.5793, "step": 684712 }, { "epoch": 59.072463768115945, "grad_norm": 0.33547767996788025, "learning_rate": 0.0001, "loss": 1.5754, "step": 684768 }, { "epoch": 59.07729468599034, "grad_norm": 3.4308481216430664, "learning_rate": 0.0001, "loss": 1.5782, "step": 684824 }, { "epoch": 59.08212560386473, "grad_norm": 0.4161035716533661, "learning_rate": 0.0001, "loss": 1.5747, "step": 684880 }, { "epoch": 59.08695652173913, "grad_norm": 0.284868985414505, "learning_rate": 0.0001, "loss": 1.5825, "step": 684936 }, { "epoch": 59.091787439613526, "grad_norm": 0.2610558569431305, "learning_rate": 0.0001, "loss": 1.5797, "step": 684992 }, { "epoch": 59.09661835748792, "grad_norm": 0.24968427419662476, "learning_rate": 0.0001, "loss": 1.5837, "step": 685048 }, { "epoch": 59.10144927536232, "grad_norm": 0.43108534812927246, "learning_rate": 0.0001, "loss": 1.5829, "step": 685104 }, { "epoch": 59.106280193236714, "grad_norm": 0.7682844996452332, "learning_rate": 0.0001, "loss": 1.5826, "step": 685160 }, { "epoch": 59.111111111111114, "grad_norm": 0.28737568855285645, "learning_rate": 0.0001, "loss": 1.5719, "step": 685216 }, { "epoch": 59.11594202898551, "grad_norm": 6.578753471374512, "learning_rate": 0.0001, "loss": 1.5784, "step": 685272 }, { "epoch": 59.1207729468599, "grad_norm": 0.26628386974334717, "learning_rate": 0.0001, "loss": 1.5838, "step": 685328 }, { "epoch": 59.1256038647343, "grad_norm": 1.4076613187789917, "learning_rate": 0.0001, "loss": 1.5785, "step": 685384 }, { "epoch": 59.130434782608695, "grad_norm": 11.906909942626953, "learning_rate": 0.0001, "loss": 1.5828, "step": 685440 }, { "epoch": 59.13526570048309, "grad_norm": 0.4458235800266266, "learning_rate": 0.0001, "loss": 1.5826, "step": 685496 }, { "epoch": 59.14009661835749, "grad_norm": 0.4811791181564331, "learning_rate": 0.0001, "loss": 1.5851, "step": 685552 }, { "epoch": 59.14492753623188, "grad_norm": 0.2673913836479187, "learning_rate": 0.0001, "loss": 1.5801, "step": 685608 }, { "epoch": 59.14975845410628, "grad_norm": 0.40166082978248596, "learning_rate": 0.0001, "loss": 1.5717, "step": 685664 }, { "epoch": 59.15458937198068, "grad_norm": 3.2066516876220703, "learning_rate": 0.0001, "loss": 1.5808, "step": 685720 }, { "epoch": 59.15942028985507, "grad_norm": 0.8093942999839783, "learning_rate": 0.0001, "loss": 1.5816, "step": 685776 }, { "epoch": 59.16425120772947, "grad_norm": 0.5130541324615479, "learning_rate": 0.0001, "loss": 1.5812, "step": 685832 }, { "epoch": 59.169082125603865, "grad_norm": 0.289439857006073, "learning_rate": 0.0001, "loss": 1.578, "step": 685888 }, { "epoch": 59.17391304347826, "grad_norm": 0.5557953715324402, "learning_rate": 0.0001, "loss": 1.5757, "step": 685944 }, { "epoch": 59.17874396135266, "grad_norm": 0.2778831422328949, "learning_rate": 0.0001, "loss": 1.5756, "step": 686000 }, { "epoch": 59.18357487922705, "grad_norm": 0.33455517888069153, "learning_rate": 0.0001, "loss": 1.5831, "step": 686056 }, { "epoch": 59.18840579710145, "grad_norm": 0.3775702714920044, "learning_rate": 0.0001, "loss": 1.573, "step": 686112 }, { "epoch": 59.193236714975846, "grad_norm": 0.2945551872253418, "learning_rate": 0.0001, "loss": 1.5812, "step": 686168 }, { "epoch": 59.19806763285024, "grad_norm": 0.21357780694961548, "learning_rate": 0.0001, "loss": 1.5843, "step": 686224 }, { "epoch": 59.20289855072464, "grad_norm": 2.4289052486419678, "learning_rate": 0.0001, "loss": 1.5757, "step": 686280 }, { "epoch": 59.207729468599034, "grad_norm": 0.25041791796684265, "learning_rate": 0.0001, "loss": 1.5811, "step": 686336 }, { "epoch": 59.21256038647343, "grad_norm": 17.54153823852539, "learning_rate": 0.0001, "loss": 1.5828, "step": 686392 }, { "epoch": 59.21739130434783, "grad_norm": 0.29004713892936707, "learning_rate": 0.0001, "loss": 1.5821, "step": 686448 }, { "epoch": 59.22222222222222, "grad_norm": 0.4157388508319855, "learning_rate": 0.0001, "loss": 1.5745, "step": 686504 }, { "epoch": 59.227053140096615, "grad_norm": 0.27281779050827026, "learning_rate": 0.0001, "loss": 1.5867, "step": 686560 }, { "epoch": 59.231884057971016, "grad_norm": 0.3060753345489502, "learning_rate": 0.0001, "loss": 1.5826, "step": 686616 }, { "epoch": 59.23671497584541, "grad_norm": 0.26018092036247253, "learning_rate": 0.0001, "loss": 1.5746, "step": 686672 }, { "epoch": 59.24154589371981, "grad_norm": 2.867929697036743, "learning_rate": 0.0001, "loss": 1.5812, "step": 686728 }, { "epoch": 59.2463768115942, "grad_norm": 2.309478282928467, "learning_rate": 0.0001, "loss": 1.5823, "step": 686784 }, { "epoch": 59.2512077294686, "grad_norm": 0.5264551043510437, "learning_rate": 0.0001, "loss": 1.5796, "step": 686840 }, { "epoch": 59.256038647343, "grad_norm": 0.5918468832969666, "learning_rate": 0.0001, "loss": 1.5813, "step": 686896 }, { "epoch": 59.26086956521739, "grad_norm": 0.46293720602989197, "learning_rate": 0.0001, "loss": 1.5791, "step": 686952 }, { "epoch": 59.265700483091784, "grad_norm": 0.6818119883537292, "learning_rate": 0.0001, "loss": 1.5762, "step": 687008 }, { "epoch": 59.270531400966185, "grad_norm": 0.2999832034111023, "learning_rate": 0.0001, "loss": 1.5798, "step": 687064 }, { "epoch": 59.27536231884058, "grad_norm": 0.344504177570343, "learning_rate": 0.0001, "loss": 1.5835, "step": 687120 }, { "epoch": 59.28019323671498, "grad_norm": 0.2660022974014282, "learning_rate": 0.0001, "loss": 1.5817, "step": 687176 }, { "epoch": 59.28502415458937, "grad_norm": 2.12516188621521, "learning_rate": 0.0001, "loss": 1.5842, "step": 687232 }, { "epoch": 59.289855072463766, "grad_norm": 0.9074131846427917, "learning_rate": 0.0001, "loss": 1.5849, "step": 687288 }, { "epoch": 59.29468599033817, "grad_norm": 0.4134586751461029, "learning_rate": 0.0001, "loss": 1.5787, "step": 687344 }, { "epoch": 59.29951690821256, "grad_norm": 0.5649986267089844, "learning_rate": 0.0001, "loss": 1.5729, "step": 687400 }, { "epoch": 59.30434782608695, "grad_norm": 0.3270477056503296, "learning_rate": 0.0001, "loss": 1.5794, "step": 687456 }, { "epoch": 59.309178743961354, "grad_norm": 0.33067724108695984, "learning_rate": 0.0001, "loss": 1.5807, "step": 687512 }, { "epoch": 59.31400966183575, "grad_norm": 0.26336851716041565, "learning_rate": 0.0001, "loss": 1.5817, "step": 687568 }, { "epoch": 59.31884057971015, "grad_norm": 0.37002840638160706, "learning_rate": 0.0001, "loss": 1.5794, "step": 687624 }, { "epoch": 59.32367149758454, "grad_norm": 0.326915442943573, "learning_rate": 0.0001, "loss": 1.582, "step": 687680 }, { "epoch": 59.328502415458935, "grad_norm": 0.3233988583087921, "learning_rate": 0.0001, "loss": 1.5852, "step": 687736 }, { "epoch": 59.333333333333336, "grad_norm": 0.9350061416625977, "learning_rate": 0.0001, "loss": 1.5851, "step": 687792 }, { "epoch": 59.33816425120773, "grad_norm": 2.886613130569458, "learning_rate": 0.0001, "loss": 1.5841, "step": 687848 }, { "epoch": 59.34299516908212, "grad_norm": 0.37324896454811096, "learning_rate": 0.0001, "loss": 1.5765, "step": 687904 }, { "epoch": 59.34782608695652, "grad_norm": 0.2677599787712097, "learning_rate": 0.0001, "loss": 1.5813, "step": 687960 }, { "epoch": 59.35265700483092, "grad_norm": 1.1214882135391235, "learning_rate": 0.0001, "loss": 1.5767, "step": 688016 }, { "epoch": 59.35748792270532, "grad_norm": 0.30127742886543274, "learning_rate": 0.0001, "loss": 1.5738, "step": 688072 }, { "epoch": 59.36231884057971, "grad_norm": 0.2964323163032532, "learning_rate": 0.0001, "loss": 1.5842, "step": 688128 }, { "epoch": 59.367149758454104, "grad_norm": 0.34859925508499146, "learning_rate": 0.0001, "loss": 1.5779, "step": 688184 }, { "epoch": 59.371980676328505, "grad_norm": 2.3959391117095947, "learning_rate": 0.0001, "loss": 1.5804, "step": 688240 }, { "epoch": 59.3768115942029, "grad_norm": 0.5064895153045654, "learning_rate": 0.0001, "loss": 1.5837, "step": 688296 }, { "epoch": 59.38164251207729, "grad_norm": 0.3281683623790741, "learning_rate": 0.0001, "loss": 1.5805, "step": 688352 }, { "epoch": 59.38647342995169, "grad_norm": 0.23971572518348694, "learning_rate": 0.0001, "loss": 1.5768, "step": 688408 }, { "epoch": 59.391304347826086, "grad_norm": 0.8625513315200806, "learning_rate": 0.0001, "loss": 1.5835, "step": 688464 }, { "epoch": 59.39613526570048, "grad_norm": 0.33772754669189453, "learning_rate": 0.0001, "loss": 1.5804, "step": 688520 }, { "epoch": 59.40096618357488, "grad_norm": 0.2538101077079773, "learning_rate": 0.0001, "loss": 1.5776, "step": 688576 }, { "epoch": 59.405797101449274, "grad_norm": 1.8667675256729126, "learning_rate": 0.0001, "loss": 1.5832, "step": 688632 }, { "epoch": 59.410628019323674, "grad_norm": 0.4152032434940338, "learning_rate": 0.0001, "loss": 1.585, "step": 688688 }, { "epoch": 59.41545893719807, "grad_norm": 0.2872133255004883, "learning_rate": 0.0001, "loss": 1.582, "step": 688744 }, { "epoch": 59.42028985507246, "grad_norm": 1.4630166292190552, "learning_rate": 0.0001, "loss": 1.5815, "step": 688800 }, { "epoch": 59.42512077294686, "grad_norm": 0.3487747311592102, "learning_rate": 0.0001, "loss": 1.5781, "step": 688856 }, { "epoch": 59.429951690821255, "grad_norm": 0.2999427914619446, "learning_rate": 0.0001, "loss": 1.5753, "step": 688912 }, { "epoch": 59.43478260869565, "grad_norm": 0.2584612965583801, "learning_rate": 0.0001, "loss": 1.5861, "step": 688968 }, { "epoch": 59.43961352657005, "grad_norm": 0.4201512038707733, "learning_rate": 0.0001, "loss": 1.5841, "step": 689024 }, { "epoch": 59.44444444444444, "grad_norm": 0.31643030047416687, "learning_rate": 0.0001, "loss": 1.586, "step": 689080 }, { "epoch": 59.44927536231884, "grad_norm": 0.3859144449234009, "learning_rate": 0.0001, "loss": 1.5847, "step": 689136 }, { "epoch": 59.45410628019324, "grad_norm": 0.5832363367080688, "learning_rate": 0.0001, "loss": 1.5834, "step": 689192 }, { "epoch": 59.45893719806763, "grad_norm": 3.517916679382324, "learning_rate": 0.0001, "loss": 1.5801, "step": 689248 }, { "epoch": 59.46376811594203, "grad_norm": 0.4620636999607086, "learning_rate": 0.0001, "loss": 1.5836, "step": 689304 }, { "epoch": 59.468599033816425, "grad_norm": 0.4568859934806824, "learning_rate": 0.0001, "loss": 1.5785, "step": 689360 }, { "epoch": 59.47342995169082, "grad_norm": 0.2600019872188568, "learning_rate": 0.0001, "loss": 1.5826, "step": 689416 }, { "epoch": 59.47826086956522, "grad_norm": 0.3569640517234802, "learning_rate": 0.0001, "loss": 1.5842, "step": 689472 }, { "epoch": 59.48309178743961, "grad_norm": 0.282779335975647, "learning_rate": 0.0001, "loss": 1.5892, "step": 689528 }, { "epoch": 59.48792270531401, "grad_norm": 0.30423077940940857, "learning_rate": 0.0001, "loss": 1.5779, "step": 689584 }, { "epoch": 59.492753623188406, "grad_norm": 0.37865403294563293, "learning_rate": 0.0001, "loss": 1.583, "step": 689640 }, { "epoch": 59.4975845410628, "grad_norm": 0.7821088433265686, "learning_rate": 0.0001, "loss": 1.5792, "step": 689696 }, { "epoch": 59.5024154589372, "grad_norm": 0.4307098686695099, "learning_rate": 0.0001, "loss": 1.5756, "step": 689752 }, { "epoch": 59.507246376811594, "grad_norm": 0.3359273374080658, "learning_rate": 0.0001, "loss": 1.5831, "step": 689808 }, { "epoch": 59.51207729468599, "grad_norm": 0.2567836344242096, "learning_rate": 0.0001, "loss": 1.578, "step": 689864 }, { "epoch": 59.51690821256039, "grad_norm": 1.1110930442810059, "learning_rate": 0.0001, "loss": 1.5837, "step": 689920 }, { "epoch": 59.52173913043478, "grad_norm": 0.38898763060569763, "learning_rate": 0.0001, "loss": 1.5826, "step": 689976 }, { "epoch": 59.52657004830918, "grad_norm": 2.043168544769287, "learning_rate": 0.0001, "loss": 1.5783, "step": 690032 }, { "epoch": 59.531400966183575, "grad_norm": 0.35199370980262756, "learning_rate": 0.0001, "loss": 1.5806, "step": 690088 }, { "epoch": 59.53623188405797, "grad_norm": 0.24492743611335754, "learning_rate": 0.0001, "loss": 1.5802, "step": 690144 }, { "epoch": 59.54106280193237, "grad_norm": 0.25902462005615234, "learning_rate": 0.0001, "loss": 1.5817, "step": 690200 }, { "epoch": 59.54589371980676, "grad_norm": 0.27917835116386414, "learning_rate": 0.0001, "loss": 1.5826, "step": 690256 }, { "epoch": 59.55072463768116, "grad_norm": 0.23888061940670013, "learning_rate": 0.0001, "loss": 1.5719, "step": 690312 }, { "epoch": 59.55555555555556, "grad_norm": 0.4996838867664337, "learning_rate": 0.0001, "loss": 1.5753, "step": 690368 }, { "epoch": 59.56038647342995, "grad_norm": 0.26170793175697327, "learning_rate": 0.0001, "loss": 1.5826, "step": 690424 }, { "epoch": 59.56521739130435, "grad_norm": 0.3399134576320648, "learning_rate": 0.0001, "loss": 1.5874, "step": 690480 }, { "epoch": 59.570048309178745, "grad_norm": 0.29369840025901794, "learning_rate": 0.0001, "loss": 1.5797, "step": 690536 }, { "epoch": 59.57487922705314, "grad_norm": 0.22971054911613464, "learning_rate": 0.0001, "loss": 1.5804, "step": 690592 }, { "epoch": 59.57971014492754, "grad_norm": 0.37108519673347473, "learning_rate": 0.0001, "loss": 1.5757, "step": 690648 }, { "epoch": 59.58454106280193, "grad_norm": 0.2575676143169403, "learning_rate": 0.0001, "loss": 1.5794, "step": 690704 }, { "epoch": 59.589371980676326, "grad_norm": 0.9849643707275391, "learning_rate": 0.0001, "loss": 1.5782, "step": 690760 }, { "epoch": 59.594202898550726, "grad_norm": 0.4554312825202942, "learning_rate": 0.0001, "loss": 1.5794, "step": 690816 }, { "epoch": 59.59903381642512, "grad_norm": 0.3184286952018738, "learning_rate": 0.0001, "loss": 1.578, "step": 690872 }, { "epoch": 59.60386473429952, "grad_norm": 0.28708282113075256, "learning_rate": 0.0001, "loss": 1.5737, "step": 690928 }, { "epoch": 59.608695652173914, "grad_norm": 0.5002889037132263, "learning_rate": 0.0001, "loss": 1.5803, "step": 690984 }, { "epoch": 59.61352657004831, "grad_norm": 0.3920978307723999, "learning_rate": 0.0001, "loss": 1.5764, "step": 691040 }, { "epoch": 59.61835748792271, "grad_norm": 0.2972944974899292, "learning_rate": 0.0001, "loss": 1.5828, "step": 691096 }, { "epoch": 59.6231884057971, "grad_norm": 2.226684808731079, "learning_rate": 0.0001, "loss": 1.5803, "step": 691152 }, { "epoch": 59.628019323671495, "grad_norm": 0.24386470019817352, "learning_rate": 0.0001, "loss": 1.5825, "step": 691208 }, { "epoch": 59.632850241545896, "grad_norm": 0.289233535528183, "learning_rate": 0.0001, "loss": 1.5853, "step": 691264 }, { "epoch": 59.63768115942029, "grad_norm": 0.31486329436302185, "learning_rate": 0.0001, "loss": 1.5885, "step": 691320 }, { "epoch": 59.64251207729468, "grad_norm": 0.2357928603887558, "learning_rate": 0.0001, "loss": 1.5783, "step": 691376 }, { "epoch": 59.64734299516908, "grad_norm": 0.2875485420227051, "learning_rate": 0.0001, "loss": 1.5727, "step": 691432 }, { "epoch": 59.65217391304348, "grad_norm": 0.9742491841316223, "learning_rate": 0.0001, "loss": 1.5778, "step": 691488 }, { "epoch": 59.65700483091788, "grad_norm": 0.3086865544319153, "learning_rate": 0.0001, "loss": 1.5766, "step": 691544 }, { "epoch": 59.66183574879227, "grad_norm": 2.8799257278442383, "learning_rate": 0.0001, "loss": 1.5823, "step": 691600 }, { "epoch": 59.666666666666664, "grad_norm": 2.031182050704956, "learning_rate": 0.0001, "loss": 1.5852, "step": 691656 }, { "epoch": 59.671497584541065, "grad_norm": 0.39554327726364136, "learning_rate": 0.0001, "loss": 1.5802, "step": 691712 }, { "epoch": 59.67632850241546, "grad_norm": 5.398155212402344, "learning_rate": 0.0001, "loss": 1.5758, "step": 691768 }, { "epoch": 59.68115942028985, "grad_norm": 2.6095893383026123, "learning_rate": 0.0001, "loss": 1.5847, "step": 691824 }, { "epoch": 59.68599033816425, "grad_norm": 0.24330627918243408, "learning_rate": 0.0001, "loss": 1.586, "step": 691880 }, { "epoch": 59.690821256038646, "grad_norm": 0.8425537943840027, "learning_rate": 0.0001, "loss": 1.5852, "step": 691936 }, { "epoch": 59.69565217391305, "grad_norm": 0.31725358963012695, "learning_rate": 0.0001, "loss": 1.5845, "step": 691992 }, { "epoch": 59.70048309178744, "grad_norm": 0.2762819826602936, "learning_rate": 0.0001, "loss": 1.5855, "step": 692048 }, { "epoch": 59.70531400966183, "grad_norm": 0.6427628993988037, "learning_rate": 0.0001, "loss": 1.575, "step": 692104 }, { "epoch": 59.710144927536234, "grad_norm": 0.30668994784355164, "learning_rate": 0.0001, "loss": 1.5803, "step": 692160 }, { "epoch": 59.71497584541063, "grad_norm": 0.2652125358581543, "learning_rate": 0.0001, "loss": 1.5836, "step": 692216 }, { "epoch": 59.71980676328502, "grad_norm": 1.0040802955627441, "learning_rate": 0.0001, "loss": 1.5807, "step": 692272 }, { "epoch": 59.72463768115942, "grad_norm": 0.269000381231308, "learning_rate": 0.0001, "loss": 1.5808, "step": 692328 }, { "epoch": 59.729468599033815, "grad_norm": 0.3632833957672119, "learning_rate": 0.0001, "loss": 1.5836, "step": 692384 }, { "epoch": 59.734299516908216, "grad_norm": 0.2548156678676605, "learning_rate": 0.0001, "loss": 1.5837, "step": 692440 }, { "epoch": 59.73913043478261, "grad_norm": 0.7157008647918701, "learning_rate": 0.0001, "loss": 1.5787, "step": 692496 }, { "epoch": 59.743961352657, "grad_norm": 0.4770122468471527, "learning_rate": 0.0001, "loss": 1.5863, "step": 692552 }, { "epoch": 59.7487922705314, "grad_norm": 0.7277584671974182, "learning_rate": 0.0001, "loss": 1.5781, "step": 692608 }, { "epoch": 59.7536231884058, "grad_norm": 0.3695371448993683, "learning_rate": 0.0001, "loss": 1.5782, "step": 692664 }, { "epoch": 59.75845410628019, "grad_norm": 0.6567927598953247, "learning_rate": 0.0001, "loss": 1.5833, "step": 692720 }, { "epoch": 59.76328502415459, "grad_norm": 0.4213573932647705, "learning_rate": 0.0001, "loss": 1.585, "step": 692776 }, { "epoch": 59.768115942028984, "grad_norm": 1.556086540222168, "learning_rate": 0.0001, "loss": 1.5794, "step": 692832 }, { "epoch": 59.772946859903385, "grad_norm": 0.685319185256958, "learning_rate": 0.0001, "loss": 1.5846, "step": 692888 }, { "epoch": 59.77777777777778, "grad_norm": 0.24809466302394867, "learning_rate": 0.0001, "loss": 1.5831, "step": 692944 }, { "epoch": 59.78260869565217, "grad_norm": 0.33587440848350525, "learning_rate": 0.0001, "loss": 1.5754, "step": 693000 }, { "epoch": 59.78743961352657, "grad_norm": 0.23237693309783936, "learning_rate": 0.0001, "loss": 1.5816, "step": 693056 }, { "epoch": 59.792270531400966, "grad_norm": 0.3283630609512329, "learning_rate": 0.0001, "loss": 1.5855, "step": 693112 }, { "epoch": 59.79710144927536, "grad_norm": 0.28697702288627625, "learning_rate": 0.0001, "loss": 1.5844, "step": 693168 }, { "epoch": 59.80193236714976, "grad_norm": 8.34589958190918, "learning_rate": 0.0001, "loss": 1.5846, "step": 693224 }, { "epoch": 59.806763285024154, "grad_norm": 0.2744125723838806, "learning_rate": 0.0001, "loss": 1.5836, "step": 693280 }, { "epoch": 59.81159420289855, "grad_norm": 0.28326475620269775, "learning_rate": 0.0001, "loss": 1.5801, "step": 693336 }, { "epoch": 59.81642512077295, "grad_norm": 1.5065664052963257, "learning_rate": 0.0001, "loss": 1.588, "step": 693392 }, { "epoch": 59.82125603864734, "grad_norm": 29.41591453552246, "learning_rate": 0.0001, "loss": 1.5816, "step": 693448 }, { "epoch": 59.82608695652174, "grad_norm": 0.23397944867610931, "learning_rate": 0.0001, "loss": 1.5802, "step": 693504 }, { "epoch": 59.830917874396135, "grad_norm": 0.31295284628868103, "learning_rate": 0.0001, "loss": 1.5824, "step": 693560 }, { "epoch": 59.83574879227053, "grad_norm": 0.36243149638175964, "learning_rate": 0.0001, "loss": 1.5857, "step": 693616 }, { "epoch": 59.84057971014493, "grad_norm": 0.24212320148944855, "learning_rate": 0.0001, "loss": 1.5879, "step": 693672 }, { "epoch": 59.84541062801932, "grad_norm": 0.9146267771720886, "learning_rate": 0.0001, "loss": 1.5775, "step": 693728 }, { "epoch": 59.85024154589372, "grad_norm": 0.32866770029067993, "learning_rate": 0.0001, "loss": 1.5819, "step": 693784 }, { "epoch": 59.85507246376812, "grad_norm": 0.24476449191570282, "learning_rate": 0.0001, "loss": 1.5718, "step": 693840 }, { "epoch": 59.85990338164251, "grad_norm": 1.019330620765686, "learning_rate": 0.0001, "loss": 1.5837, "step": 693896 }, { "epoch": 59.86473429951691, "grad_norm": 31.8280086517334, "learning_rate": 0.0001, "loss": 1.5853, "step": 693952 }, { "epoch": 59.869565217391305, "grad_norm": 0.663215160369873, "learning_rate": 0.0001, "loss": 1.5823, "step": 694008 }, { "epoch": 59.8743961352657, "grad_norm": 1.517335295677185, "learning_rate": 0.0001, "loss": 1.5856, "step": 694064 }, { "epoch": 59.8792270531401, "grad_norm": 0.344287633895874, "learning_rate": 0.0001, "loss": 1.5804, "step": 694120 }, { "epoch": 59.88405797101449, "grad_norm": 0.2983733117580414, "learning_rate": 0.0001, "loss": 1.5821, "step": 694176 }, { "epoch": 59.888888888888886, "grad_norm": 0.2282552421092987, "learning_rate": 0.0001, "loss": 1.5886, "step": 694232 }, { "epoch": 59.893719806763286, "grad_norm": 0.25940120220184326, "learning_rate": 0.0001, "loss": 1.5858, "step": 694288 }, { "epoch": 59.89855072463768, "grad_norm": 0.2784886360168457, "learning_rate": 0.0001, "loss": 1.5846, "step": 694344 }, { "epoch": 59.90338164251208, "grad_norm": 2.8056018352508545, "learning_rate": 0.0001, "loss": 1.5748, "step": 694400 }, { "epoch": 59.908212560386474, "grad_norm": 0.24200503528118134, "learning_rate": 0.0001, "loss": 1.5846, "step": 694456 }, { "epoch": 59.91304347826087, "grad_norm": 0.25908395648002625, "learning_rate": 0.0001, "loss": 1.585, "step": 694512 }, { "epoch": 59.91787439613527, "grad_norm": 1.2265514135360718, "learning_rate": 0.0001, "loss": 1.5847, "step": 694568 }, { "epoch": 59.92270531400966, "grad_norm": 0.27906733751296997, "learning_rate": 0.0001, "loss": 1.5838, "step": 694624 }, { "epoch": 59.927536231884055, "grad_norm": 2.3226158618927, "learning_rate": 0.0001, "loss": 1.5848, "step": 694680 }, { "epoch": 59.932367149758456, "grad_norm": 4.921370506286621, "learning_rate": 0.0001, "loss": 1.5779, "step": 694736 }, { "epoch": 59.93719806763285, "grad_norm": 0.5084928274154663, "learning_rate": 0.0001, "loss": 1.583, "step": 694792 }, { "epoch": 59.94202898550725, "grad_norm": 0.25429216027259827, "learning_rate": 0.0001, "loss": 1.5868, "step": 694848 }, { "epoch": 59.94685990338164, "grad_norm": 0.25756266713142395, "learning_rate": 0.0001, "loss": 1.5783, "step": 694904 }, { "epoch": 59.95169082125604, "grad_norm": 0.2764509916305542, "learning_rate": 0.0001, "loss": 1.5864, "step": 694960 }, { "epoch": 59.95652173913044, "grad_norm": 1.283164381980896, "learning_rate": 0.0001, "loss": 1.5827, "step": 695016 }, { "epoch": 59.96135265700483, "grad_norm": 0.3314727544784546, "learning_rate": 0.0001, "loss": 1.579, "step": 695072 }, { "epoch": 59.966183574879224, "grad_norm": 0.5720407366752625, "learning_rate": 0.0001, "loss": 1.5837, "step": 695128 }, { "epoch": 59.971014492753625, "grad_norm": 0.2651757597923279, "learning_rate": 0.0001, "loss": 1.58, "step": 695184 }, { "epoch": 59.97584541062802, "grad_norm": 0.2496098130941391, "learning_rate": 0.0001, "loss": 1.5946, "step": 695240 }, { "epoch": 59.98067632850242, "grad_norm": 0.3280239403247833, "learning_rate": 0.0001, "loss": 1.586, "step": 695296 }, { "epoch": 59.98550724637681, "grad_norm": 0.2764151692390442, "learning_rate": 0.0001, "loss": 1.5858, "step": 695352 }, { "epoch": 59.990338164251206, "grad_norm": 1.5433892011642456, "learning_rate": 0.0001, "loss": 1.5793, "step": 695408 }, { "epoch": 59.99516908212561, "grad_norm": 0.2738545835018158, "learning_rate": 0.0001, "loss": 1.5868, "step": 695464 }, { "epoch": 60.0, "grad_norm": 0.2801104485988617, "learning_rate": 0.0001, "loss": 1.5824, "step": 695520 }, { "epoch": 60.00483091787439, "grad_norm": 0.2857520282268524, "learning_rate": 0.0001, "loss": 1.574, "step": 695576 }, { "epoch": 60.009661835748794, "grad_norm": 1.0743688344955444, "learning_rate": 0.0001, "loss": 1.5717, "step": 695632 }, { "epoch": 60.01449275362319, "grad_norm": 1.79491126537323, "learning_rate": 0.0001, "loss": 1.5742, "step": 695688 }, { "epoch": 60.01932367149758, "grad_norm": 0.34099647402763367, "learning_rate": 0.0001, "loss": 1.5737, "step": 695744 }, { "epoch": 60.02415458937198, "grad_norm": 0.2691984176635742, "learning_rate": 0.0001, "loss": 1.581, "step": 695800 }, { "epoch": 60.028985507246375, "grad_norm": 0.3307435214519501, "learning_rate": 0.0001, "loss": 1.5822, "step": 695856 }, { "epoch": 60.033816425120776, "grad_norm": 0.33421286940574646, "learning_rate": 0.0001, "loss": 1.5682, "step": 695912 }, { "epoch": 60.03864734299517, "grad_norm": 20.811918258666992, "learning_rate": 0.0001, "loss": 1.5764, "step": 695968 }, { "epoch": 60.04347826086956, "grad_norm": 0.230454221367836, "learning_rate": 0.0001, "loss": 1.5729, "step": 696024 }, { "epoch": 60.04830917874396, "grad_norm": 0.24860779941082, "learning_rate": 0.0001, "loss": 1.5761, "step": 696080 }, { "epoch": 60.05314009661836, "grad_norm": 0.47124120593070984, "learning_rate": 0.0001, "loss": 1.5681, "step": 696136 }, { "epoch": 60.05797101449275, "grad_norm": 0.7666196227073669, "learning_rate": 0.0001, "loss": 1.5722, "step": 696192 }, { "epoch": 60.06280193236715, "grad_norm": 0.30247679352760315, "learning_rate": 0.0001, "loss": 1.5805, "step": 696248 }, { "epoch": 60.067632850241544, "grad_norm": 0.2915549576282501, "learning_rate": 0.0001, "loss": 1.5736, "step": 696304 }, { "epoch": 60.072463768115945, "grad_norm": 0.5614500641822815, "learning_rate": 0.0001, "loss": 1.5736, "step": 696360 }, { "epoch": 60.07729468599034, "grad_norm": 0.29355961084365845, "learning_rate": 0.0001, "loss": 1.5777, "step": 696416 }, { "epoch": 60.08212560386473, "grad_norm": 0.46507272124290466, "learning_rate": 0.0001, "loss": 1.5767, "step": 696472 }, { "epoch": 60.08695652173913, "grad_norm": 0.3543570637702942, "learning_rate": 0.0001, "loss": 1.5801, "step": 696528 }, { "epoch": 60.091787439613526, "grad_norm": 0.3216313421726227, "learning_rate": 0.0001, "loss": 1.5745, "step": 696584 }, { "epoch": 60.09661835748792, "grad_norm": 0.24897031486034393, "learning_rate": 0.0001, "loss": 1.582, "step": 696640 }, { "epoch": 60.10144927536232, "grad_norm": 0.30684202909469604, "learning_rate": 0.0001, "loss": 1.5713, "step": 696696 }, { "epoch": 60.106280193236714, "grad_norm": 0.27969709038734436, "learning_rate": 0.0001, "loss": 1.5844, "step": 696752 }, { "epoch": 60.111111111111114, "grad_norm": 0.27083954215049744, "learning_rate": 0.0001, "loss": 1.5711, "step": 696808 }, { "epoch": 60.11594202898551, "grad_norm": 0.2981301546096802, "learning_rate": 0.0001, "loss": 1.5647, "step": 696864 }, { "epoch": 60.1207729468599, "grad_norm": 2.3170888423919678, "learning_rate": 0.0001, "loss": 1.5761, "step": 696920 }, { "epoch": 60.1256038647343, "grad_norm": 0.2816902697086334, "learning_rate": 0.0001, "loss": 1.5776, "step": 696976 }, { "epoch": 60.130434782608695, "grad_norm": 0.22933365404605865, "learning_rate": 0.0001, "loss": 1.5756, "step": 697032 }, { "epoch": 60.13526570048309, "grad_norm": 0.6917015314102173, "learning_rate": 0.0001, "loss": 1.5797, "step": 697088 }, { "epoch": 60.14009661835749, "grad_norm": 0.2562465965747833, "learning_rate": 0.0001, "loss": 1.574, "step": 697144 }, { "epoch": 60.14492753623188, "grad_norm": 0.3197854459285736, "learning_rate": 0.0001, "loss": 1.5805, "step": 697200 }, { "epoch": 60.14975845410628, "grad_norm": 3.074932098388672, "learning_rate": 0.0001, "loss": 1.5819, "step": 697256 }, { "epoch": 60.15458937198068, "grad_norm": 0.6191052198410034, "learning_rate": 0.0001, "loss": 1.5815, "step": 697312 }, { "epoch": 60.15942028985507, "grad_norm": 0.25801995396614075, "learning_rate": 0.0001, "loss": 1.5791, "step": 697368 }, { "epoch": 60.16425120772947, "grad_norm": 0.2604316473007202, "learning_rate": 0.0001, "loss": 1.5809, "step": 697424 }, { "epoch": 60.169082125603865, "grad_norm": 0.25910744071006775, "learning_rate": 0.0001, "loss": 1.5788, "step": 697480 }, { "epoch": 60.17391304347826, "grad_norm": 0.2584953308105469, "learning_rate": 0.0001, "loss": 1.5772, "step": 697536 }, { "epoch": 60.17874396135266, "grad_norm": 0.3602120280265808, "learning_rate": 0.0001, "loss": 1.5813, "step": 697592 }, { "epoch": 60.18357487922705, "grad_norm": 0.2698121666908264, "learning_rate": 0.0001, "loss": 1.5733, "step": 697648 }, { "epoch": 60.18840579710145, "grad_norm": 0.6010696887969971, "learning_rate": 0.0001, "loss": 1.5843, "step": 697704 }, { "epoch": 60.193236714975846, "grad_norm": 0.27337396144866943, "learning_rate": 0.0001, "loss": 1.5832, "step": 697760 }, { "epoch": 60.19806763285024, "grad_norm": 0.2853875160217285, "learning_rate": 0.0001, "loss": 1.5765, "step": 697816 }, { "epoch": 60.20289855072464, "grad_norm": 0.3413335978984833, "learning_rate": 0.0001, "loss": 1.5846, "step": 697872 }, { "epoch": 60.207729468599034, "grad_norm": 0.26946112513542175, "learning_rate": 0.0001, "loss": 1.5699, "step": 697928 }, { "epoch": 60.21256038647343, "grad_norm": 0.6045644879341125, "learning_rate": 0.0001, "loss": 1.5723, "step": 697984 }, { "epoch": 60.21739130434783, "grad_norm": 2.794064998626709, "learning_rate": 0.0001, "loss": 1.5738, "step": 698040 }, { "epoch": 60.22222222222222, "grad_norm": 0.26960599422454834, "learning_rate": 0.0001, "loss": 1.5732, "step": 698096 }, { "epoch": 60.227053140096615, "grad_norm": 0.24827370047569275, "learning_rate": 0.0001, "loss": 1.5741, "step": 698152 }, { "epoch": 60.231884057971016, "grad_norm": 0.2728942036628723, "learning_rate": 0.0001, "loss": 1.5795, "step": 698208 }, { "epoch": 60.23671497584541, "grad_norm": 0.24475567042827606, "learning_rate": 0.0001, "loss": 1.5751, "step": 698264 }, { "epoch": 60.24154589371981, "grad_norm": 0.5537627935409546, "learning_rate": 0.0001, "loss": 1.576, "step": 698320 }, { "epoch": 60.2463768115942, "grad_norm": 0.26496297121047974, "learning_rate": 0.0001, "loss": 1.5753, "step": 698376 }, { "epoch": 60.2512077294686, "grad_norm": 0.48235222697257996, "learning_rate": 0.0001, "loss": 1.5763, "step": 698432 }, { "epoch": 60.256038647343, "grad_norm": 0.3524574637413025, "learning_rate": 0.0001, "loss": 1.5751, "step": 698488 }, { "epoch": 60.26086956521739, "grad_norm": 0.35422977805137634, "learning_rate": 0.0001, "loss": 1.5701, "step": 698544 }, { "epoch": 60.265700483091784, "grad_norm": 0.6287068724632263, "learning_rate": 0.0001, "loss": 1.5739, "step": 698600 }, { "epoch": 60.270531400966185, "grad_norm": 0.24017618596553802, "learning_rate": 0.0001, "loss": 1.5746, "step": 698656 }, { "epoch": 60.27536231884058, "grad_norm": 0.2853739559650421, "learning_rate": 0.0001, "loss": 1.5728, "step": 698712 }, { "epoch": 60.28019323671498, "grad_norm": 0.9683747887611389, "learning_rate": 0.0001, "loss": 1.5774, "step": 698768 }, { "epoch": 60.28502415458937, "grad_norm": 0.2317504584789276, "learning_rate": 0.0001, "loss": 1.5766, "step": 698824 }, { "epoch": 60.289855072463766, "grad_norm": 0.26654016971588135, "learning_rate": 0.0001, "loss": 1.5727, "step": 698880 }, { "epoch": 60.29468599033817, "grad_norm": 0.2955748438835144, "learning_rate": 0.0001, "loss": 1.5726, "step": 698936 }, { "epoch": 60.29951690821256, "grad_norm": 0.3092181086540222, "learning_rate": 0.0001, "loss": 1.5771, "step": 698992 }, { "epoch": 60.30434782608695, "grad_norm": 0.3172142505645752, "learning_rate": 0.0001, "loss": 1.5796, "step": 699048 }, { "epoch": 60.309178743961354, "grad_norm": 0.29703933000564575, "learning_rate": 0.0001, "loss": 1.5793, "step": 699104 }, { "epoch": 60.31400966183575, "grad_norm": 0.26891663670539856, "learning_rate": 0.0001, "loss": 1.5753, "step": 699160 }, { "epoch": 60.31884057971015, "grad_norm": 2.070209503173828, "learning_rate": 0.0001, "loss": 1.5748, "step": 699216 }, { "epoch": 60.32367149758454, "grad_norm": 0.34885460138320923, "learning_rate": 0.0001, "loss": 1.5769, "step": 699272 }, { "epoch": 60.328502415458935, "grad_norm": 3.5126407146453857, "learning_rate": 0.0001, "loss": 1.5824, "step": 699328 }, { "epoch": 60.333333333333336, "grad_norm": 0.4002555012702942, "learning_rate": 0.0001, "loss": 1.5802, "step": 699384 }, { "epoch": 60.33816425120773, "grad_norm": 0.2590198516845703, "learning_rate": 0.0001, "loss": 1.571, "step": 699440 }, { "epoch": 60.34299516908212, "grad_norm": 0.3490011692047119, "learning_rate": 0.0001, "loss": 1.581, "step": 699496 }, { "epoch": 60.34782608695652, "grad_norm": 0.3233735263347626, "learning_rate": 0.0001, "loss": 1.5783, "step": 699552 }, { "epoch": 60.35265700483092, "grad_norm": 1.7230165004730225, "learning_rate": 0.0001, "loss": 1.5762, "step": 699608 }, { "epoch": 60.35748792270532, "grad_norm": 0.5987613201141357, "learning_rate": 0.0001, "loss": 1.5753, "step": 699664 }, { "epoch": 60.36231884057971, "grad_norm": 1.8417809009552002, "learning_rate": 0.0001, "loss": 1.5753, "step": 699720 }, { "epoch": 60.367149758454104, "grad_norm": 0.24969609081745148, "learning_rate": 0.0001, "loss": 1.577, "step": 699776 }, { "epoch": 60.371980676328505, "grad_norm": 0.3222193717956543, "learning_rate": 0.0001, "loss": 1.5776, "step": 699832 }, { "epoch": 60.3768115942029, "grad_norm": 0.4185481369495392, "learning_rate": 0.0001, "loss": 1.5742, "step": 699888 }, { "epoch": 60.38164251207729, "grad_norm": 0.6484152674674988, "learning_rate": 0.0001, "loss": 1.573, "step": 699944 }, { "epoch": 60.38647342995169, "grad_norm": 0.2997056245803833, "learning_rate": 0.0001, "loss": 1.5758, "step": 700000 }, { "epoch": 60.391304347826086, "grad_norm": 0.9676834344863892, "learning_rate": 0.0001, "loss": 1.5709, "step": 700056 }, { "epoch": 60.39613526570048, "grad_norm": 0.5560657978057861, "learning_rate": 0.0001, "loss": 1.5733, "step": 700112 }, { "epoch": 60.40096618357488, "grad_norm": 0.355456680059433, "learning_rate": 0.0001, "loss": 1.5773, "step": 700168 }, { "epoch": 60.405797101449274, "grad_norm": 1.2229633331298828, "learning_rate": 0.0001, "loss": 1.575, "step": 700224 }, { "epoch": 60.410628019323674, "grad_norm": 0.2738179564476013, "learning_rate": 0.0001, "loss": 1.5752, "step": 700280 }, { "epoch": 60.41545893719807, "grad_norm": 0.2679765820503235, "learning_rate": 0.0001, "loss": 1.5767, "step": 700336 }, { "epoch": 60.42028985507246, "grad_norm": 0.34425830841064453, "learning_rate": 0.0001, "loss": 1.5782, "step": 700392 }, { "epoch": 60.42512077294686, "grad_norm": 45.25291442871094, "learning_rate": 0.0001, "loss": 1.5757, "step": 700448 }, { "epoch": 60.429951690821255, "grad_norm": 0.4831262528896332, "learning_rate": 0.0001, "loss": 1.5747, "step": 700504 }, { "epoch": 60.43478260869565, "grad_norm": 0.27243492007255554, "learning_rate": 0.0001, "loss": 1.5761, "step": 700560 }, { "epoch": 60.43961352657005, "grad_norm": 0.36161792278289795, "learning_rate": 0.0001, "loss": 1.5801, "step": 700616 }, { "epoch": 60.44444444444444, "grad_norm": 0.3692052960395813, "learning_rate": 0.0001, "loss": 1.5737, "step": 700672 }, { "epoch": 60.44927536231884, "grad_norm": 1.4017276763916016, "learning_rate": 0.0001, "loss": 1.5821, "step": 700728 }, { "epoch": 60.45410628019324, "grad_norm": 0.3149808943271637, "learning_rate": 0.0001, "loss": 1.5778, "step": 700784 }, { "epoch": 60.45893719806763, "grad_norm": 14.724553108215332, "learning_rate": 0.0001, "loss": 1.5722, "step": 700840 }, { "epoch": 60.46376811594203, "grad_norm": 0.2763996422290802, "learning_rate": 0.0001, "loss": 1.576, "step": 700896 }, { "epoch": 60.468599033816425, "grad_norm": 0.7944520115852356, "learning_rate": 0.0001, "loss": 1.5781, "step": 700952 }, { "epoch": 60.47342995169082, "grad_norm": 0.23510867357254028, "learning_rate": 0.0001, "loss": 1.5723, "step": 701008 }, { "epoch": 60.47826086956522, "grad_norm": 0.31905364990234375, "learning_rate": 0.0001, "loss": 1.58, "step": 701064 }, { "epoch": 60.48309178743961, "grad_norm": 1.1589583158493042, "learning_rate": 0.0001, "loss": 1.5753, "step": 701120 }, { "epoch": 60.48792270531401, "grad_norm": 0.3990651071071625, "learning_rate": 0.0001, "loss": 1.5798, "step": 701176 }, { "epoch": 60.492753623188406, "grad_norm": 5.37070894241333, "learning_rate": 0.0001, "loss": 1.5777, "step": 701232 }, { "epoch": 60.4975845410628, "grad_norm": 0.23619192838668823, "learning_rate": 0.0001, "loss": 1.5791, "step": 701288 }, { "epoch": 60.5024154589372, "grad_norm": 1.6228036880493164, "learning_rate": 0.0001, "loss": 1.5827, "step": 701344 }, { "epoch": 60.507246376811594, "grad_norm": 0.2527296245098114, "learning_rate": 0.0001, "loss": 1.5836, "step": 701400 }, { "epoch": 60.51207729468599, "grad_norm": 0.7983850836753845, "learning_rate": 0.0001, "loss": 1.578, "step": 701456 }, { "epoch": 60.51690821256039, "grad_norm": 5.398256778717041, "learning_rate": 0.0001, "loss": 1.5744, "step": 701512 }, { "epoch": 60.52173913043478, "grad_norm": 0.35326850414276123, "learning_rate": 0.0001, "loss": 1.5752, "step": 701568 }, { "epoch": 60.52657004830918, "grad_norm": 0.3318440616130829, "learning_rate": 0.0001, "loss": 1.5679, "step": 701624 }, { "epoch": 60.531400966183575, "grad_norm": 0.36323273181915283, "learning_rate": 0.0001, "loss": 1.5799, "step": 701680 }, { "epoch": 60.53623188405797, "grad_norm": 0.5379390120506287, "learning_rate": 0.0001, "loss": 1.5822, "step": 701736 }, { "epoch": 60.54106280193237, "grad_norm": 0.31537649035453796, "learning_rate": 0.0001, "loss": 1.5779, "step": 701792 }, { "epoch": 60.54589371980676, "grad_norm": 0.26845961809158325, "learning_rate": 0.0001, "loss": 1.5794, "step": 701848 }, { "epoch": 60.55072463768116, "grad_norm": 0.8374590277671814, "learning_rate": 0.0001, "loss": 1.5771, "step": 701904 }, { "epoch": 60.55555555555556, "grad_norm": 5.551652908325195, "learning_rate": 0.0001, "loss": 1.5758, "step": 701960 }, { "epoch": 60.56038647342995, "grad_norm": 0.318589985370636, "learning_rate": 0.0001, "loss": 1.5782, "step": 702016 }, { "epoch": 60.56521739130435, "grad_norm": 0.6572666764259338, "learning_rate": 0.0001, "loss": 1.5788, "step": 702072 }, { "epoch": 60.570048309178745, "grad_norm": 4.907597064971924, "learning_rate": 0.0001, "loss": 1.58, "step": 702128 }, { "epoch": 60.57487922705314, "grad_norm": 0.24195681512355804, "learning_rate": 0.0001, "loss": 1.5724, "step": 702184 }, { "epoch": 60.57971014492754, "grad_norm": 0.36632591485977173, "learning_rate": 0.0001, "loss": 1.5773, "step": 702240 }, { "epoch": 60.58454106280193, "grad_norm": 0.5818426012992859, "learning_rate": 0.0001, "loss": 1.5825, "step": 702296 }, { "epoch": 60.589371980676326, "grad_norm": 0.2787812650203705, "learning_rate": 0.0001, "loss": 1.5822, "step": 702352 }, { "epoch": 60.594202898550726, "grad_norm": 0.8423380851745605, "learning_rate": 0.0001, "loss": 1.5737, "step": 702408 }, { "epoch": 60.59903381642512, "grad_norm": 0.35317331552505493, "learning_rate": 0.0001, "loss": 1.5753, "step": 702464 }, { "epoch": 60.60386473429952, "grad_norm": 0.2691987454891205, "learning_rate": 0.0001, "loss": 1.5834, "step": 702520 }, { "epoch": 60.608695652173914, "grad_norm": 0.27900025248527527, "learning_rate": 0.0001, "loss": 1.5887, "step": 702576 }, { "epoch": 60.61352657004831, "grad_norm": 0.28731805086135864, "learning_rate": 0.0001, "loss": 1.5818, "step": 702632 }, { "epoch": 60.61835748792271, "grad_norm": 9.03830623626709, "learning_rate": 0.0001, "loss": 1.5761, "step": 702688 }, { "epoch": 60.6231884057971, "grad_norm": 0.6311138868331909, "learning_rate": 0.0001, "loss": 1.5759, "step": 702744 }, { "epoch": 60.628019323671495, "grad_norm": 0.2666708528995514, "learning_rate": 0.0001, "loss": 1.5819, "step": 702800 }, { "epoch": 60.632850241545896, "grad_norm": 0.2911073565483093, "learning_rate": 0.0001, "loss": 1.5756, "step": 702856 }, { "epoch": 60.63768115942029, "grad_norm": 4.086987495422363, "learning_rate": 0.0001, "loss": 1.5808, "step": 702912 }, { "epoch": 60.64251207729468, "grad_norm": 0.2916426360607147, "learning_rate": 0.0001, "loss": 1.5847, "step": 702968 }, { "epoch": 60.64734299516908, "grad_norm": 0.3448326289653778, "learning_rate": 0.0001, "loss": 1.5784, "step": 703024 }, { "epoch": 60.65217391304348, "grad_norm": 0.26453959941864014, "learning_rate": 0.0001, "loss": 1.5808, "step": 703080 }, { "epoch": 60.65700483091788, "grad_norm": 0.2780526578426361, "learning_rate": 0.0001, "loss": 1.5756, "step": 703136 }, { "epoch": 60.66183574879227, "grad_norm": 0.3610136806964874, "learning_rate": 0.0001, "loss": 1.5796, "step": 703192 }, { "epoch": 60.666666666666664, "grad_norm": 0.6798877120018005, "learning_rate": 0.0001, "loss": 1.582, "step": 703248 }, { "epoch": 60.671497584541065, "grad_norm": 0.3715328276157379, "learning_rate": 0.0001, "loss": 1.5776, "step": 703304 }, { "epoch": 60.67632850241546, "grad_norm": 0.3307245075702667, "learning_rate": 0.0001, "loss": 1.5791, "step": 703360 }, { "epoch": 60.68115942028985, "grad_norm": 0.2726972997188568, "learning_rate": 0.0001, "loss": 1.5833, "step": 703416 }, { "epoch": 60.68599033816425, "grad_norm": 1.9366445541381836, "learning_rate": 0.0001, "loss": 1.5809, "step": 703472 }, { "epoch": 60.690821256038646, "grad_norm": 0.23848965764045715, "learning_rate": 0.0001, "loss": 1.5825, "step": 703528 }, { "epoch": 60.69565217391305, "grad_norm": 0.25545644760131836, "learning_rate": 0.0001, "loss": 1.5765, "step": 703584 }, { "epoch": 60.70048309178744, "grad_norm": 0.2560465931892395, "learning_rate": 0.0001, "loss": 1.581, "step": 703640 }, { "epoch": 60.70531400966183, "grad_norm": 0.4662501811981201, "learning_rate": 0.0001, "loss": 1.58, "step": 703696 }, { "epoch": 60.710144927536234, "grad_norm": 0.35691899061203003, "learning_rate": 0.0001, "loss": 1.5829, "step": 703752 }, { "epoch": 60.71497584541063, "grad_norm": 0.3274211883544922, "learning_rate": 0.0001, "loss": 1.5796, "step": 703808 }, { "epoch": 60.71980676328502, "grad_norm": 0.4629564583301544, "learning_rate": 0.0001, "loss": 1.5767, "step": 703864 }, { "epoch": 60.72463768115942, "grad_norm": 0.9337282776832581, "learning_rate": 0.0001, "loss": 1.5778, "step": 703920 }, { "epoch": 60.729468599033815, "grad_norm": 0.3152867555618286, "learning_rate": 0.0001, "loss": 1.5784, "step": 703976 }, { "epoch": 60.734299516908216, "grad_norm": 0.28810223937034607, "learning_rate": 0.0001, "loss": 1.5771, "step": 704032 }, { "epoch": 60.73913043478261, "grad_norm": 0.29190632700920105, "learning_rate": 0.0001, "loss": 1.5726, "step": 704088 }, { "epoch": 60.743961352657, "grad_norm": 0.38491880893707275, "learning_rate": 0.0001, "loss": 1.5767, "step": 704144 }, { "epoch": 60.7487922705314, "grad_norm": 0.4193209409713745, "learning_rate": 0.0001, "loss": 1.5825, "step": 704200 }, { "epoch": 60.7536231884058, "grad_norm": 0.8171809911727905, "learning_rate": 0.0001, "loss": 1.5797, "step": 704256 }, { "epoch": 60.75845410628019, "grad_norm": 0.739954948425293, "learning_rate": 0.0001, "loss": 1.5824, "step": 704312 }, { "epoch": 60.76328502415459, "grad_norm": 0.5175865888595581, "learning_rate": 0.0001, "loss": 1.5792, "step": 704368 }, { "epoch": 60.768115942028984, "grad_norm": 1.2149710655212402, "learning_rate": 0.0001, "loss": 1.5814, "step": 704424 }, { "epoch": 60.772946859903385, "grad_norm": 0.23068314790725708, "learning_rate": 0.0001, "loss": 1.5758, "step": 704480 }, { "epoch": 60.77777777777778, "grad_norm": 0.3206363618373871, "learning_rate": 0.0001, "loss": 1.5839, "step": 704536 }, { "epoch": 60.78260869565217, "grad_norm": 0.23973232507705688, "learning_rate": 0.0001, "loss": 1.5856, "step": 704592 }, { "epoch": 60.78743961352657, "grad_norm": 0.2820824384689331, "learning_rate": 0.0001, "loss": 1.5774, "step": 704648 }, { "epoch": 60.792270531400966, "grad_norm": 0.2807294726371765, "learning_rate": 0.0001, "loss": 1.5758, "step": 704704 }, { "epoch": 60.79710144927536, "grad_norm": 0.27001234889030457, "learning_rate": 0.0001, "loss": 1.5805, "step": 704760 }, { "epoch": 60.80193236714976, "grad_norm": 2.431659460067749, "learning_rate": 0.0001, "loss": 1.5792, "step": 704816 }, { "epoch": 60.806763285024154, "grad_norm": 0.30381497740745544, "learning_rate": 0.0001, "loss": 1.5821, "step": 704872 }, { "epoch": 60.81159420289855, "grad_norm": 0.28622183203697205, "learning_rate": 0.0001, "loss": 1.581, "step": 704928 }, { "epoch": 60.81642512077295, "grad_norm": 0.23376376926898956, "learning_rate": 0.0001, "loss": 1.5742, "step": 704984 }, { "epoch": 60.82125603864734, "grad_norm": 2.4151201248168945, "learning_rate": 0.0001, "loss": 1.5822, "step": 705040 }, { "epoch": 60.82608695652174, "grad_norm": 0.34188878536224365, "learning_rate": 0.0001, "loss": 1.5809, "step": 705096 }, { "epoch": 60.830917874396135, "grad_norm": 0.39257293939590454, "learning_rate": 0.0001, "loss": 1.58, "step": 705152 }, { "epoch": 60.83574879227053, "grad_norm": 0.3087775409221649, "learning_rate": 0.0001, "loss": 1.5765, "step": 705208 }, { "epoch": 60.84057971014493, "grad_norm": 0.25810956954956055, "learning_rate": 0.0001, "loss": 1.5773, "step": 705264 }, { "epoch": 60.84541062801932, "grad_norm": 2.080561637878418, "learning_rate": 0.0001, "loss": 1.5764, "step": 705320 }, { "epoch": 60.85024154589372, "grad_norm": 0.2681334316730499, "learning_rate": 0.0001, "loss": 1.5782, "step": 705376 }, { "epoch": 60.85507246376812, "grad_norm": 0.3120843172073364, "learning_rate": 0.0001, "loss": 1.5722, "step": 705432 }, { "epoch": 60.85990338164251, "grad_norm": 0.4115764796733856, "learning_rate": 0.0001, "loss": 1.5786, "step": 705488 }, { "epoch": 60.86473429951691, "grad_norm": 0.36883267760276794, "learning_rate": 0.0001, "loss": 1.5826, "step": 705544 }, { "epoch": 60.869565217391305, "grad_norm": 1.66441810131073, "learning_rate": 0.0001, "loss": 1.5773, "step": 705600 }, { "epoch": 60.8743961352657, "grad_norm": 0.26673856377601624, "learning_rate": 0.0001, "loss": 1.5841, "step": 705656 }, { "epoch": 60.8792270531401, "grad_norm": 0.24281026422977448, "learning_rate": 0.0001, "loss": 1.5835, "step": 705712 }, { "epoch": 60.88405797101449, "grad_norm": 1.2472904920578003, "learning_rate": 0.0001, "loss": 1.5794, "step": 705768 }, { "epoch": 60.888888888888886, "grad_norm": 0.31463244557380676, "learning_rate": 0.0001, "loss": 1.5798, "step": 705824 }, { "epoch": 60.893719806763286, "grad_norm": 3.4052040576934814, "learning_rate": 0.0001, "loss": 1.5743, "step": 705880 }, { "epoch": 60.89855072463768, "grad_norm": 0.2651376724243164, "learning_rate": 0.0001, "loss": 1.5797, "step": 705936 }, { "epoch": 60.90338164251208, "grad_norm": 0.4313299059867859, "learning_rate": 0.0001, "loss": 1.5819, "step": 705992 }, { "epoch": 60.908212560386474, "grad_norm": 0.28778496384620667, "learning_rate": 0.0001, "loss": 1.5841, "step": 706048 }, { "epoch": 60.91304347826087, "grad_norm": 0.9363234043121338, "learning_rate": 0.0001, "loss": 1.5838, "step": 706104 }, { "epoch": 60.91787439613527, "grad_norm": 0.2762107253074646, "learning_rate": 0.0001, "loss": 1.5806, "step": 706160 }, { "epoch": 60.92270531400966, "grad_norm": 1.1599273681640625, "learning_rate": 0.0001, "loss": 1.5831, "step": 706216 }, { "epoch": 60.927536231884055, "grad_norm": 1.6533840894699097, "learning_rate": 0.0001, "loss": 1.5827, "step": 706272 }, { "epoch": 60.932367149758456, "grad_norm": 0.304665207862854, "learning_rate": 0.0001, "loss": 1.5732, "step": 706328 }, { "epoch": 60.93719806763285, "grad_norm": 0.6969716548919678, "learning_rate": 0.0001, "loss": 1.5815, "step": 706384 }, { "epoch": 60.94202898550725, "grad_norm": 0.5837818384170532, "learning_rate": 0.0001, "loss": 1.5827, "step": 706440 }, { "epoch": 60.94685990338164, "grad_norm": 16.20887565612793, "learning_rate": 0.0001, "loss": 1.5785, "step": 706496 }, { "epoch": 60.95169082125604, "grad_norm": 0.48456811904907227, "learning_rate": 0.0001, "loss": 1.5765, "step": 706552 }, { "epoch": 60.95652173913044, "grad_norm": 0.30704331398010254, "learning_rate": 0.0001, "loss": 1.5809, "step": 706608 }, { "epoch": 60.96135265700483, "grad_norm": 0.4699413776397705, "learning_rate": 0.0001, "loss": 1.5865, "step": 706664 }, { "epoch": 60.966183574879224, "grad_norm": 77.42718505859375, "learning_rate": 0.0001, "loss": 1.573, "step": 706720 }, { "epoch": 60.971014492753625, "grad_norm": 0.2723563611507416, "learning_rate": 0.0001, "loss": 1.5882, "step": 706776 }, { "epoch": 60.97584541062802, "grad_norm": 0.25183504819869995, "learning_rate": 0.0001, "loss": 1.5826, "step": 706832 }, { "epoch": 60.98067632850242, "grad_norm": 0.2587428092956543, "learning_rate": 0.0001, "loss": 1.5744, "step": 706888 }, { "epoch": 60.98550724637681, "grad_norm": 15.342964172363281, "learning_rate": 0.0001, "loss": 1.5795, "step": 706944 }, { "epoch": 60.990338164251206, "grad_norm": 1.0619491338729858, "learning_rate": 0.0001, "loss": 1.5752, "step": 707000 }, { "epoch": 60.99516908212561, "grad_norm": 0.2839602530002594, "learning_rate": 0.0001, "loss": 1.579, "step": 707056 }, { "epoch": 61.0, "grad_norm": 3.0173633098602295, "learning_rate": 0.0001, "loss": 1.5735, "step": 707112 }, { "epoch": 61.00483091787439, "grad_norm": 3.925373077392578, "learning_rate": 0.0001, "loss": 1.572, "step": 707168 }, { "epoch": 61.009661835748794, "grad_norm": 4.558032512664795, "learning_rate": 0.0001, "loss": 1.575, "step": 707224 }, { "epoch": 61.01449275362319, "grad_norm": 0.5272940397262573, "learning_rate": 0.0001, "loss": 1.5715, "step": 707280 }, { "epoch": 61.01932367149758, "grad_norm": 0.26881831884384155, "learning_rate": 0.0001, "loss": 1.568, "step": 707336 }, { "epoch": 61.02415458937198, "grad_norm": 0.2460513412952423, "learning_rate": 0.0001, "loss": 1.5716, "step": 707392 }, { "epoch": 61.028985507246375, "grad_norm": 0.29725199937820435, "learning_rate": 0.0001, "loss": 1.5715, "step": 707448 }, { "epoch": 61.033816425120776, "grad_norm": 2.3977904319763184, "learning_rate": 0.0001, "loss": 1.5737, "step": 707504 }, { "epoch": 61.03864734299517, "grad_norm": 0.4584938883781433, "learning_rate": 0.0001, "loss": 1.5765, "step": 707560 }, { "epoch": 61.04347826086956, "grad_norm": 0.6319020390510559, "learning_rate": 0.0001, "loss": 1.5704, "step": 707616 }, { "epoch": 61.04830917874396, "grad_norm": 0.28821122646331787, "learning_rate": 0.0001, "loss": 1.5708, "step": 707672 }, { "epoch": 61.05314009661836, "grad_norm": 4.598287105560303, "learning_rate": 0.0001, "loss": 1.574, "step": 707728 }, { "epoch": 61.05797101449275, "grad_norm": 2.8723325729370117, "learning_rate": 0.0001, "loss": 1.5706, "step": 707784 }, { "epoch": 61.06280193236715, "grad_norm": 5.082941055297852, "learning_rate": 0.0001, "loss": 1.5749, "step": 707840 }, { "epoch": 61.067632850241544, "grad_norm": 0.2612135410308838, "learning_rate": 0.0001, "loss": 1.5771, "step": 707896 }, { "epoch": 61.072463768115945, "grad_norm": 0.4682893455028534, "learning_rate": 0.0001, "loss": 1.5714, "step": 707952 }, { "epoch": 61.07729468599034, "grad_norm": 0.3149164021015167, "learning_rate": 0.0001, "loss": 1.5686, "step": 708008 }, { "epoch": 61.08212560386473, "grad_norm": 0.29604437947273254, "learning_rate": 0.0001, "loss": 1.5713, "step": 708064 }, { "epoch": 61.08695652173913, "grad_norm": 0.35978588461875916, "learning_rate": 0.0001, "loss": 1.5689, "step": 708120 }, { "epoch": 61.091787439613526, "grad_norm": 0.37029239535331726, "learning_rate": 0.0001, "loss": 1.5765, "step": 708176 }, { "epoch": 61.09661835748792, "grad_norm": 1.2241863012313843, "learning_rate": 0.0001, "loss": 1.5733, "step": 708232 }, { "epoch": 61.10144927536232, "grad_norm": 0.2892577648162842, "learning_rate": 0.0001, "loss": 1.574, "step": 708288 }, { "epoch": 61.106280193236714, "grad_norm": 3.019963502883911, "learning_rate": 0.0001, "loss": 1.5706, "step": 708344 }, { "epoch": 61.111111111111114, "grad_norm": 4.545439720153809, "learning_rate": 0.0001, "loss": 1.5702, "step": 708400 }, { "epoch": 61.11594202898551, "grad_norm": 0.2560272812843323, "learning_rate": 0.0001, "loss": 1.5723, "step": 708456 }, { "epoch": 61.1207729468599, "grad_norm": 1.0144187211990356, "learning_rate": 0.0001, "loss": 1.575, "step": 708512 }, { "epoch": 61.1256038647343, "grad_norm": 1.125284194946289, "learning_rate": 0.0001, "loss": 1.5707, "step": 708568 }, { "epoch": 61.130434782608695, "grad_norm": 0.3748113512992859, "learning_rate": 0.0001, "loss": 1.5763, "step": 708624 }, { "epoch": 61.13526570048309, "grad_norm": 1.199535846710205, "learning_rate": 0.0001, "loss": 1.5738, "step": 708680 }, { "epoch": 61.14009661835749, "grad_norm": 0.28137844800949097, "learning_rate": 0.0001, "loss": 1.5716, "step": 708736 }, { "epoch": 61.14492753623188, "grad_norm": 0.25269901752471924, "learning_rate": 0.0001, "loss": 1.5772, "step": 708792 }, { "epoch": 61.14975845410628, "grad_norm": 0.241893470287323, "learning_rate": 0.0001, "loss": 1.5777, "step": 708848 }, { "epoch": 61.15458937198068, "grad_norm": 37.60060119628906, "learning_rate": 0.0001, "loss": 1.5678, "step": 708904 }, { "epoch": 61.15942028985507, "grad_norm": 0.23191361129283905, "learning_rate": 0.0001, "loss": 1.5728, "step": 708960 }, { "epoch": 61.16425120772947, "grad_norm": 2.8853492736816406, "learning_rate": 0.0001, "loss": 1.5729, "step": 709016 }, { "epoch": 61.169082125603865, "grad_norm": 0.3786466121673584, "learning_rate": 0.0001, "loss": 1.5778, "step": 709072 }, { "epoch": 61.17391304347826, "grad_norm": 0.2874002158641815, "learning_rate": 0.0001, "loss": 1.5759, "step": 709128 }, { "epoch": 61.17874396135266, "grad_norm": 0.28061333298683167, "learning_rate": 0.0001, "loss": 1.5778, "step": 709184 }, { "epoch": 61.18357487922705, "grad_norm": 0.7291356325149536, "learning_rate": 0.0001, "loss": 1.5707, "step": 709240 }, { "epoch": 61.18840579710145, "grad_norm": 1.8430068492889404, "learning_rate": 0.0001, "loss": 1.5742, "step": 709296 }, { "epoch": 61.193236714975846, "grad_norm": 0.2931986153125763, "learning_rate": 0.0001, "loss": 1.5775, "step": 709352 }, { "epoch": 61.19806763285024, "grad_norm": 0.254166841506958, "learning_rate": 0.0001, "loss": 1.572, "step": 709408 }, { "epoch": 61.20289855072464, "grad_norm": 0.27229824662208557, "learning_rate": 0.0001, "loss": 1.5728, "step": 709464 }, { "epoch": 61.207729468599034, "grad_norm": 0.3015446960926056, "learning_rate": 0.0001, "loss": 1.5726, "step": 709520 }, { "epoch": 61.21256038647343, "grad_norm": 1.6222108602523804, "learning_rate": 0.0001, "loss": 1.5713, "step": 709576 }, { "epoch": 61.21739130434783, "grad_norm": 0.2885836362838745, "learning_rate": 0.0001, "loss": 1.5756, "step": 709632 }, { "epoch": 61.22222222222222, "grad_norm": 0.7709008455276489, "learning_rate": 0.0001, "loss": 1.5703, "step": 709688 }, { "epoch": 61.227053140096615, "grad_norm": 0.3154696822166443, "learning_rate": 0.0001, "loss": 1.5762, "step": 709744 }, { "epoch": 61.231884057971016, "grad_norm": 0.2746739685535431, "learning_rate": 0.0001, "loss": 1.5714, "step": 709800 }, { "epoch": 61.23671497584541, "grad_norm": 0.2599153220653534, "learning_rate": 0.0001, "loss": 1.575, "step": 709856 }, { "epoch": 61.24154589371981, "grad_norm": 0.25850871205329895, "learning_rate": 0.0001, "loss": 1.5732, "step": 709912 }, { "epoch": 61.2463768115942, "grad_norm": 1.091483235359192, "learning_rate": 0.0001, "loss": 1.5724, "step": 709968 }, { "epoch": 61.2512077294686, "grad_norm": 2.1819355487823486, "learning_rate": 0.0001, "loss": 1.5679, "step": 710024 }, { "epoch": 61.256038647343, "grad_norm": 0.24929137527942657, "learning_rate": 0.0001, "loss": 1.5775, "step": 710080 }, { "epoch": 61.26086956521739, "grad_norm": 0.2785819172859192, "learning_rate": 0.0001, "loss": 1.5752, "step": 710136 }, { "epoch": 61.265700483091784, "grad_norm": 0.38679370284080505, "learning_rate": 0.0001, "loss": 1.5801, "step": 710192 }, { "epoch": 61.270531400966185, "grad_norm": 0.9100805521011353, "learning_rate": 0.0001, "loss": 1.5685, "step": 710248 }, { "epoch": 61.27536231884058, "grad_norm": 8.985306739807129, "learning_rate": 0.0001, "loss": 1.568, "step": 710304 }, { "epoch": 61.28019323671498, "grad_norm": 0.44843730330467224, "learning_rate": 0.0001, "loss": 1.5813, "step": 710360 }, { "epoch": 61.28502415458937, "grad_norm": 0.26209965348243713, "learning_rate": 0.0001, "loss": 1.5748, "step": 710416 }, { "epoch": 61.289855072463766, "grad_norm": 2.6083216667175293, "learning_rate": 0.0001, "loss": 1.5822, "step": 710472 }, { "epoch": 61.29468599033817, "grad_norm": 3.275996685028076, "learning_rate": 0.0001, "loss": 1.5799, "step": 710528 }, { "epoch": 61.29951690821256, "grad_norm": 0.29894179105758667, "learning_rate": 0.0001, "loss": 1.5728, "step": 710584 }, { "epoch": 61.30434782608695, "grad_norm": 2.273465394973755, "learning_rate": 0.0001, "loss": 1.5761, "step": 710640 }, { "epoch": 61.309178743961354, "grad_norm": 0.6388105154037476, "learning_rate": 0.0001, "loss": 1.5853, "step": 710696 }, { "epoch": 61.31400966183575, "grad_norm": 0.40667659044265747, "learning_rate": 0.0001, "loss": 1.5812, "step": 710752 }, { "epoch": 61.31884057971015, "grad_norm": 0.226955845952034, "learning_rate": 0.0001, "loss": 1.5789, "step": 710808 }, { "epoch": 61.32367149758454, "grad_norm": 1.377339243888855, "learning_rate": 0.0001, "loss": 1.5722, "step": 710864 }, { "epoch": 61.328502415458935, "grad_norm": 10.629037857055664, "learning_rate": 0.0001, "loss": 1.5684, "step": 710920 }, { "epoch": 61.333333333333336, "grad_norm": 1.2224481105804443, "learning_rate": 0.0001, "loss": 1.5691, "step": 710976 }, { "epoch": 61.33816425120773, "grad_norm": 0.44341856241226196, "learning_rate": 0.0001, "loss": 1.5772, "step": 711032 }, { "epoch": 61.34299516908212, "grad_norm": 0.5303725004196167, "learning_rate": 0.0001, "loss": 1.5737, "step": 711088 }, { "epoch": 61.34782608695652, "grad_norm": 1.0188984870910645, "learning_rate": 0.0001, "loss": 1.5788, "step": 711144 }, { "epoch": 61.35265700483092, "grad_norm": 0.8242328763008118, "learning_rate": 0.0001, "loss": 1.577, "step": 711200 }, { "epoch": 61.35748792270532, "grad_norm": 0.9852714538574219, "learning_rate": 0.0001, "loss": 1.576, "step": 711256 }, { "epoch": 61.36231884057971, "grad_norm": 0.457401305437088, "learning_rate": 0.0001, "loss": 1.5816, "step": 711312 }, { "epoch": 61.367149758454104, "grad_norm": 0.4954931139945984, "learning_rate": 0.0001, "loss": 1.5748, "step": 711368 }, { "epoch": 61.371980676328505, "grad_norm": 0.2956332266330719, "learning_rate": 0.0001, "loss": 1.5769, "step": 711424 }, { "epoch": 61.3768115942029, "grad_norm": 0.3747125267982483, "learning_rate": 0.0001, "loss": 1.5741, "step": 711480 }, { "epoch": 61.38164251207729, "grad_norm": 0.4140782654285431, "learning_rate": 0.0001, "loss": 1.577, "step": 711536 }, { "epoch": 61.38647342995169, "grad_norm": 0.27006375789642334, "learning_rate": 0.0001, "loss": 1.5807, "step": 711592 }, { "epoch": 61.391304347826086, "grad_norm": 0.2834433913230896, "learning_rate": 0.0001, "loss": 1.5771, "step": 711648 }, { "epoch": 61.39613526570048, "grad_norm": 0.3385052978992462, "learning_rate": 0.0001, "loss": 1.5796, "step": 711704 }, { "epoch": 61.40096618357488, "grad_norm": 0.5700408220291138, "learning_rate": 0.0001, "loss": 1.5765, "step": 711760 }, { "epoch": 61.405797101449274, "grad_norm": 0.296112596988678, "learning_rate": 0.0001, "loss": 1.5774, "step": 711816 }, { "epoch": 61.410628019323674, "grad_norm": 0.37918850779533386, "learning_rate": 0.0001, "loss": 1.5769, "step": 711872 }, { "epoch": 61.41545893719807, "grad_norm": 0.7240327000617981, "learning_rate": 0.0001, "loss": 1.5739, "step": 711928 }, { "epoch": 61.42028985507246, "grad_norm": 0.2588534951210022, "learning_rate": 0.0001, "loss": 1.5779, "step": 711984 }, { "epoch": 61.42512077294686, "grad_norm": 0.2852977216243744, "learning_rate": 0.0001, "loss": 1.5727, "step": 712040 }, { "epoch": 61.429951690821255, "grad_norm": 0.2521919310092926, "learning_rate": 0.0001, "loss": 1.579, "step": 712096 }, { "epoch": 61.43478260869565, "grad_norm": 0.5191546082496643, "learning_rate": 0.0001, "loss": 1.5783, "step": 712152 }, { "epoch": 61.43961352657005, "grad_norm": 0.4493212103843689, "learning_rate": 0.0001, "loss": 1.5759, "step": 712208 }, { "epoch": 61.44444444444444, "grad_norm": 1.0132029056549072, "learning_rate": 0.0001, "loss": 1.5735, "step": 712264 }, { "epoch": 61.44927536231884, "grad_norm": 0.25182828307151794, "learning_rate": 0.0001, "loss": 1.5714, "step": 712320 }, { "epoch": 61.45410628019324, "grad_norm": 0.4157980978488922, "learning_rate": 0.0001, "loss": 1.5781, "step": 712376 }, { "epoch": 61.45893719806763, "grad_norm": 0.27654898166656494, "learning_rate": 0.0001, "loss": 1.5811, "step": 712432 }, { "epoch": 61.46376811594203, "grad_norm": 0.4602123498916626, "learning_rate": 0.0001, "loss": 1.5746, "step": 712488 }, { "epoch": 61.468599033816425, "grad_norm": 0.31591328978538513, "learning_rate": 0.0001, "loss": 1.5831, "step": 712544 }, { "epoch": 61.47342995169082, "grad_norm": 0.7056344151496887, "learning_rate": 0.0001, "loss": 1.5805, "step": 712600 }, { "epoch": 61.47826086956522, "grad_norm": 16.0964412689209, "learning_rate": 0.0001, "loss": 1.5792, "step": 712656 }, { "epoch": 61.48309178743961, "grad_norm": 0.23835571110248566, "learning_rate": 0.0001, "loss": 1.5757, "step": 712712 }, { "epoch": 61.48792270531401, "grad_norm": 0.3656258285045624, "learning_rate": 0.0001, "loss": 1.5774, "step": 712768 }, { "epoch": 61.492753623188406, "grad_norm": 0.3629853427410126, "learning_rate": 0.0001, "loss": 1.5806, "step": 712824 }, { "epoch": 61.4975845410628, "grad_norm": 0.36367160081863403, "learning_rate": 0.0001, "loss": 1.5762, "step": 712880 }, { "epoch": 61.5024154589372, "grad_norm": 0.2584623694419861, "learning_rate": 0.0001, "loss": 1.577, "step": 712936 }, { "epoch": 61.507246376811594, "grad_norm": 0.25657474994659424, "learning_rate": 0.0001, "loss": 1.5735, "step": 712992 }, { "epoch": 61.51207729468599, "grad_norm": 0.30951741337776184, "learning_rate": 0.0001, "loss": 1.5738, "step": 713048 }, { "epoch": 61.51690821256039, "grad_norm": 0.27919116616249084, "learning_rate": 0.0001, "loss": 1.5782, "step": 713104 }, { "epoch": 61.52173913043478, "grad_norm": 0.31952840089797974, "learning_rate": 0.0001, "loss": 1.573, "step": 713160 }, { "epoch": 61.52657004830918, "grad_norm": 5.581448554992676, "learning_rate": 0.0001, "loss": 1.5755, "step": 713216 }, { "epoch": 61.531400966183575, "grad_norm": 0.8094971179962158, "learning_rate": 0.0001, "loss": 1.5813, "step": 713272 }, { "epoch": 61.53623188405797, "grad_norm": 1.05503511428833, "learning_rate": 0.0001, "loss": 1.575, "step": 713328 }, { "epoch": 61.54106280193237, "grad_norm": 0.22255708277225494, "learning_rate": 0.0001, "loss": 1.5704, "step": 713384 }, { "epoch": 61.54589371980676, "grad_norm": 0.2873416244983673, "learning_rate": 0.0001, "loss": 1.5753, "step": 713440 }, { "epoch": 61.55072463768116, "grad_norm": 0.32051974534988403, "learning_rate": 0.0001, "loss": 1.5789, "step": 713496 }, { "epoch": 61.55555555555556, "grad_norm": 4.6225175857543945, "learning_rate": 0.0001, "loss": 1.5754, "step": 713552 }, { "epoch": 61.56038647342995, "grad_norm": 0.2495705783367157, "learning_rate": 0.0001, "loss": 1.5806, "step": 713608 }, { "epoch": 61.56521739130435, "grad_norm": 0.22982977330684662, "learning_rate": 0.0001, "loss": 1.5764, "step": 713664 }, { "epoch": 61.570048309178745, "grad_norm": 0.30501580238342285, "learning_rate": 0.0001, "loss": 1.577, "step": 713720 }, { "epoch": 61.57487922705314, "grad_norm": 0.4799022078514099, "learning_rate": 0.0001, "loss": 1.5797, "step": 713776 }, { "epoch": 61.57971014492754, "grad_norm": 1.7759435176849365, "learning_rate": 0.0001, "loss": 1.5717, "step": 713832 }, { "epoch": 61.58454106280193, "grad_norm": 0.31036433577537537, "learning_rate": 0.0001, "loss": 1.5749, "step": 713888 }, { "epoch": 61.589371980676326, "grad_norm": 0.24876782298088074, "learning_rate": 0.0001, "loss": 1.5801, "step": 713944 }, { "epoch": 61.594202898550726, "grad_norm": 3.6069633960723877, "learning_rate": 0.0001, "loss": 1.5734, "step": 714000 }, { "epoch": 61.59903381642512, "grad_norm": 0.2292565256357193, "learning_rate": 0.0001, "loss": 1.5757, "step": 714056 }, { "epoch": 61.60386473429952, "grad_norm": 0.26956114172935486, "learning_rate": 0.0001, "loss": 1.5786, "step": 714112 }, { "epoch": 61.608695652173914, "grad_norm": 0.5024564266204834, "learning_rate": 0.0001, "loss": 1.57, "step": 714168 }, { "epoch": 61.61352657004831, "grad_norm": 0.3320198655128479, "learning_rate": 0.0001, "loss": 1.5836, "step": 714224 }, { "epoch": 61.61835748792271, "grad_norm": 0.24528321623802185, "learning_rate": 0.0001, "loss": 1.5828, "step": 714280 }, { "epoch": 61.6231884057971, "grad_norm": 0.3065892457962036, "learning_rate": 0.0001, "loss": 1.5717, "step": 714336 }, { "epoch": 61.628019323671495, "grad_norm": 0.2761060297489166, "learning_rate": 0.0001, "loss": 1.5784, "step": 714392 }, { "epoch": 61.632850241545896, "grad_norm": 1.2911356687545776, "learning_rate": 0.0001, "loss": 1.5793, "step": 714448 }, { "epoch": 61.63768115942029, "grad_norm": 0.5480738878250122, "learning_rate": 0.0001, "loss": 1.5762, "step": 714504 }, { "epoch": 61.64251207729468, "grad_norm": 0.31444698572158813, "learning_rate": 0.0001, "loss": 1.577, "step": 714560 }, { "epoch": 61.64734299516908, "grad_norm": 0.31863847374916077, "learning_rate": 0.0001, "loss": 1.5805, "step": 714616 }, { "epoch": 61.65217391304348, "grad_norm": 0.4323248565196991, "learning_rate": 0.0001, "loss": 1.5757, "step": 714672 }, { "epoch": 61.65700483091788, "grad_norm": 0.3030738830566406, "learning_rate": 0.0001, "loss": 1.5799, "step": 714728 }, { "epoch": 61.66183574879227, "grad_norm": 0.26754432916641235, "learning_rate": 0.0001, "loss": 1.5821, "step": 714784 }, { "epoch": 61.666666666666664, "grad_norm": 0.2535049319267273, "learning_rate": 0.0001, "loss": 1.5797, "step": 714840 }, { "epoch": 61.671497584541065, "grad_norm": 3.7473013401031494, "learning_rate": 0.0001, "loss": 1.5818, "step": 714896 }, { "epoch": 61.67632850241546, "grad_norm": 0.3863111436367035, "learning_rate": 0.0001, "loss": 1.5725, "step": 714952 }, { "epoch": 61.68115942028985, "grad_norm": 0.4501098096370697, "learning_rate": 0.0001, "loss": 1.5742, "step": 715008 }, { "epoch": 61.68599033816425, "grad_norm": 0.31327489018440247, "learning_rate": 0.0001, "loss": 1.5753, "step": 715064 }, { "epoch": 61.690821256038646, "grad_norm": 0.27620646357536316, "learning_rate": 0.0001, "loss": 1.5775, "step": 715120 }, { "epoch": 61.69565217391305, "grad_norm": 0.2712896466255188, "learning_rate": 0.0001, "loss": 1.5765, "step": 715176 }, { "epoch": 61.70048309178744, "grad_norm": 0.25367820262908936, "learning_rate": 0.0001, "loss": 1.5755, "step": 715232 }, { "epoch": 61.70531400966183, "grad_norm": 0.29599303007125854, "learning_rate": 0.0001, "loss": 1.573, "step": 715288 }, { "epoch": 61.710144927536234, "grad_norm": 0.3205218017101288, "learning_rate": 0.0001, "loss": 1.5755, "step": 715344 }, { "epoch": 61.71497584541063, "grad_norm": 0.24954386055469513, "learning_rate": 0.0001, "loss": 1.5738, "step": 715400 }, { "epoch": 61.71980676328502, "grad_norm": 0.2510959804058075, "learning_rate": 0.0001, "loss": 1.5777, "step": 715456 }, { "epoch": 61.72463768115942, "grad_norm": 0.31778275966644287, "learning_rate": 0.0001, "loss": 1.5735, "step": 715512 }, { "epoch": 61.729468599033815, "grad_norm": 0.32798612117767334, "learning_rate": 0.0001, "loss": 1.579, "step": 715568 }, { "epoch": 61.734299516908216, "grad_norm": 0.2992408871650696, "learning_rate": 0.0001, "loss": 1.5755, "step": 715624 }, { "epoch": 61.73913043478261, "grad_norm": 0.26027578115463257, "learning_rate": 0.0001, "loss": 1.5721, "step": 715680 }, { "epoch": 61.743961352657, "grad_norm": 0.24718870222568512, "learning_rate": 0.0001, "loss": 1.5778, "step": 715736 }, { "epoch": 61.7487922705314, "grad_norm": 0.34376052021980286, "learning_rate": 0.0001, "loss": 1.5876, "step": 715792 }, { "epoch": 61.7536231884058, "grad_norm": 0.2940681576728821, "learning_rate": 0.0001, "loss": 1.5763, "step": 715848 }, { "epoch": 61.75845410628019, "grad_norm": 0.4428504705429077, "learning_rate": 0.0001, "loss": 1.577, "step": 715904 }, { "epoch": 61.76328502415459, "grad_norm": 0.26935720443725586, "learning_rate": 0.0001, "loss": 1.5783, "step": 715960 }, { "epoch": 61.768115942028984, "grad_norm": 0.3200920522212982, "learning_rate": 0.0001, "loss": 1.5783, "step": 716016 }, { "epoch": 61.772946859903385, "grad_norm": 0.40254729986190796, "learning_rate": 0.0001, "loss": 1.5802, "step": 716072 }, { "epoch": 61.77777777777778, "grad_norm": 0.42070648074150085, "learning_rate": 0.0001, "loss": 1.5794, "step": 716128 }, { "epoch": 61.78260869565217, "grad_norm": 0.26394692063331604, "learning_rate": 0.0001, "loss": 1.5681, "step": 716184 }, { "epoch": 61.78743961352657, "grad_norm": 0.5077584385871887, "learning_rate": 0.0001, "loss": 1.5786, "step": 716240 }, { "epoch": 61.792270531400966, "grad_norm": 0.2613588273525238, "learning_rate": 0.0001, "loss": 1.5721, "step": 716296 }, { "epoch": 61.79710144927536, "grad_norm": 0.27209460735321045, "learning_rate": 0.0001, "loss": 1.5759, "step": 716352 }, { "epoch": 61.80193236714976, "grad_norm": 0.27411115169525146, "learning_rate": 0.0001, "loss": 1.5778, "step": 716408 }, { "epoch": 61.806763285024154, "grad_norm": 0.248151496052742, "learning_rate": 0.0001, "loss": 1.5741, "step": 716464 }, { "epoch": 61.81159420289855, "grad_norm": 3.373969078063965, "learning_rate": 0.0001, "loss": 1.571, "step": 716520 }, { "epoch": 61.81642512077295, "grad_norm": 1.1116524934768677, "learning_rate": 0.0001, "loss": 1.5775, "step": 716576 }, { "epoch": 61.82125603864734, "grad_norm": 0.6801126003265381, "learning_rate": 0.0001, "loss": 1.5736, "step": 716632 }, { "epoch": 61.82608695652174, "grad_norm": 0.26198506355285645, "learning_rate": 0.0001, "loss": 1.5765, "step": 716688 }, { "epoch": 61.830917874396135, "grad_norm": 0.3113771677017212, "learning_rate": 0.0001, "loss": 1.5784, "step": 716744 }, { "epoch": 61.83574879227053, "grad_norm": 0.4877435863018036, "learning_rate": 0.0001, "loss": 1.5736, "step": 716800 }, { "epoch": 61.84057971014493, "grad_norm": 0.33468589186668396, "learning_rate": 0.0001, "loss": 1.5823, "step": 716856 }, { "epoch": 61.84541062801932, "grad_norm": 0.2770255208015442, "learning_rate": 0.0001, "loss": 1.5788, "step": 716912 }, { "epoch": 61.85024154589372, "grad_norm": 1.8216781616210938, "learning_rate": 0.0001, "loss": 1.5828, "step": 716968 }, { "epoch": 61.85507246376812, "grad_norm": 0.3090316653251648, "learning_rate": 0.0001, "loss": 1.5701, "step": 717024 }, { "epoch": 61.85990338164251, "grad_norm": 0.4076203405857086, "learning_rate": 0.0001, "loss": 1.5754, "step": 717080 }, { "epoch": 61.86473429951691, "grad_norm": 0.5850328803062439, "learning_rate": 0.0001, "loss": 1.5724, "step": 717136 }, { "epoch": 61.869565217391305, "grad_norm": 0.3816438615322113, "learning_rate": 0.0001, "loss": 1.577, "step": 717192 }, { "epoch": 61.8743961352657, "grad_norm": 0.30891329050064087, "learning_rate": 0.0001, "loss": 1.5764, "step": 717248 }, { "epoch": 61.8792270531401, "grad_norm": 0.35230880975723267, "learning_rate": 0.0001, "loss": 1.572, "step": 717304 }, { "epoch": 61.88405797101449, "grad_norm": 0.27095702290534973, "learning_rate": 0.0001, "loss": 1.5765, "step": 717360 }, { "epoch": 61.888888888888886, "grad_norm": 0.3140602707862854, "learning_rate": 0.0001, "loss": 1.5773, "step": 717416 }, { "epoch": 61.893719806763286, "grad_norm": 0.3024456202983856, "learning_rate": 0.0001, "loss": 1.5782, "step": 717472 }, { "epoch": 61.89855072463768, "grad_norm": 0.24109508097171783, "learning_rate": 0.0001, "loss": 1.5723, "step": 717528 }, { "epoch": 61.90338164251208, "grad_norm": 0.37315553426742554, "learning_rate": 0.0001, "loss": 1.5753, "step": 717584 }, { "epoch": 61.908212560386474, "grad_norm": 0.37923529744148254, "learning_rate": 0.0001, "loss": 1.5724, "step": 717640 }, { "epoch": 61.91304347826087, "grad_norm": 0.2573399841785431, "learning_rate": 0.0001, "loss": 1.5718, "step": 717696 }, { "epoch": 61.91787439613527, "grad_norm": 1.2428532838821411, "learning_rate": 0.0001, "loss": 1.5817, "step": 717752 }, { "epoch": 61.92270531400966, "grad_norm": 0.26380500197410583, "learning_rate": 0.0001, "loss": 1.5821, "step": 717808 }, { "epoch": 61.927536231884055, "grad_norm": 1.6198300123214722, "learning_rate": 0.0001, "loss": 1.5822, "step": 717864 }, { "epoch": 61.932367149758456, "grad_norm": 0.5084480047225952, "learning_rate": 0.0001, "loss": 1.579, "step": 717920 }, { "epoch": 61.93719806763285, "grad_norm": 0.22791847586631775, "learning_rate": 0.0001, "loss": 1.5802, "step": 717976 }, { "epoch": 61.94202898550725, "grad_norm": 0.311413049697876, "learning_rate": 0.0001, "loss": 1.5828, "step": 718032 }, { "epoch": 61.94685990338164, "grad_norm": 0.2812856137752533, "learning_rate": 0.0001, "loss": 1.5816, "step": 718088 }, { "epoch": 61.95169082125604, "grad_norm": 16.810190200805664, "learning_rate": 0.0001, "loss": 1.5735, "step": 718144 }, { "epoch": 61.95652173913044, "grad_norm": 0.39504769444465637, "learning_rate": 0.0001, "loss": 1.5777, "step": 718200 }, { "epoch": 61.96135265700483, "grad_norm": 0.3207128643989563, "learning_rate": 0.0001, "loss": 1.5795, "step": 718256 }, { "epoch": 61.966183574879224, "grad_norm": 0.2892376780509949, "learning_rate": 0.0001, "loss": 1.5742, "step": 718312 }, { "epoch": 61.971014492753625, "grad_norm": 0.28309258818626404, "learning_rate": 0.0001, "loss": 1.5752, "step": 718368 }, { "epoch": 61.97584541062802, "grad_norm": 0.5236415863037109, "learning_rate": 0.0001, "loss": 1.5772, "step": 718424 }, { "epoch": 61.98067632850242, "grad_norm": 0.297164648771286, "learning_rate": 0.0001, "loss": 1.5714, "step": 718480 }, { "epoch": 61.98550724637681, "grad_norm": 0.38202664256095886, "learning_rate": 0.0001, "loss": 1.5754, "step": 718536 }, { "epoch": 61.990338164251206, "grad_norm": 0.27205079793930054, "learning_rate": 0.0001, "loss": 1.5743, "step": 718592 }, { "epoch": 61.99516908212561, "grad_norm": 0.3413431942462921, "learning_rate": 0.0001, "loss": 1.5772, "step": 718648 }, { "epoch": 62.0, "grad_norm": 0.26737019419670105, "learning_rate": 0.0001, "loss": 1.5755, "step": 718704 }, { "epoch": 62.00483091787439, "grad_norm": 0.23744308948516846, "learning_rate": 0.0001, "loss": 1.5734, "step": 718760 }, { "epoch": 62.009661835748794, "grad_norm": 0.30823537707328796, "learning_rate": 0.0001, "loss": 1.5751, "step": 718816 }, { "epoch": 62.01449275362319, "grad_norm": 0.4164966344833374, "learning_rate": 0.0001, "loss": 1.5731, "step": 718872 }, { "epoch": 62.01932367149758, "grad_norm": 0.29370516538619995, "learning_rate": 0.0001, "loss": 1.5636, "step": 718928 }, { "epoch": 62.02415458937198, "grad_norm": 0.3478502631187439, "learning_rate": 0.0001, "loss": 1.5705, "step": 718984 }, { "epoch": 62.028985507246375, "grad_norm": 0.4254664480686188, "learning_rate": 0.0001, "loss": 1.5662, "step": 719040 }, { "epoch": 62.033816425120776, "grad_norm": 0.35918867588043213, "learning_rate": 0.0001, "loss": 1.5663, "step": 719096 }, { "epoch": 62.03864734299517, "grad_norm": 0.2897569537162781, "learning_rate": 0.0001, "loss": 1.5702, "step": 719152 }, { "epoch": 62.04347826086956, "grad_norm": 2.839599370956421, "learning_rate": 0.0001, "loss": 1.5663, "step": 719208 }, { "epoch": 62.04830917874396, "grad_norm": 0.27170616388320923, "learning_rate": 0.0001, "loss": 1.5753, "step": 719264 }, { "epoch": 62.05314009661836, "grad_norm": 0.8673324584960938, "learning_rate": 0.0001, "loss": 1.5723, "step": 719320 }, { "epoch": 62.05797101449275, "grad_norm": 0.2595483958721161, "learning_rate": 0.0001, "loss": 1.5757, "step": 719376 }, { "epoch": 62.06280193236715, "grad_norm": 0.27956634759902954, "learning_rate": 0.0001, "loss": 1.5692, "step": 719432 }, { "epoch": 62.067632850241544, "grad_norm": 0.24846455454826355, "learning_rate": 0.0001, "loss": 1.5642, "step": 719488 }, { "epoch": 62.072463768115945, "grad_norm": 0.2939254641532898, "learning_rate": 0.0001, "loss": 1.5646, "step": 719544 }, { "epoch": 62.07729468599034, "grad_norm": 0.48062849044799805, "learning_rate": 0.0001, "loss": 1.5666, "step": 719600 }, { "epoch": 62.08212560386473, "grad_norm": 0.3272504508495331, "learning_rate": 0.0001, "loss": 1.5722, "step": 719656 }, { "epoch": 62.08695652173913, "grad_norm": 0.3451225161552429, "learning_rate": 0.0001, "loss": 1.5754, "step": 719712 }, { "epoch": 62.091787439613526, "grad_norm": 0.46895721554756165, "learning_rate": 0.0001, "loss": 1.5702, "step": 719768 }, { "epoch": 62.09661835748792, "grad_norm": 0.23571674525737762, "learning_rate": 0.0001, "loss": 1.5654, "step": 719824 }, { "epoch": 62.10144927536232, "grad_norm": 0.2887115776538849, "learning_rate": 0.0001, "loss": 1.5735, "step": 719880 }, { "epoch": 62.106280193236714, "grad_norm": 15.427064895629883, "learning_rate": 0.0001, "loss": 1.5718, "step": 719936 }, { "epoch": 62.111111111111114, "grad_norm": 0.2987514138221741, "learning_rate": 0.0001, "loss": 1.5758, "step": 719992 }, { "epoch": 62.11594202898551, "grad_norm": 0.6932117938995361, "learning_rate": 0.0001, "loss": 1.5698, "step": 720048 }, { "epoch": 62.1207729468599, "grad_norm": 0.2765071392059326, "learning_rate": 0.0001, "loss": 1.5743, "step": 720104 }, { "epoch": 62.1256038647343, "grad_norm": 0.3690530061721802, "learning_rate": 0.0001, "loss": 1.5715, "step": 720160 }, { "epoch": 62.130434782608695, "grad_norm": 6.731592178344727, "learning_rate": 0.0001, "loss": 1.574, "step": 720216 }, { "epoch": 62.13526570048309, "grad_norm": 0.25920456647872925, "learning_rate": 0.0001, "loss": 1.5697, "step": 720272 }, { "epoch": 62.14009661835749, "grad_norm": 0.3881968855857849, "learning_rate": 0.0001, "loss": 1.5718, "step": 720328 }, { "epoch": 62.14492753623188, "grad_norm": 0.26893243193626404, "learning_rate": 0.0001, "loss": 1.5705, "step": 720384 }, { "epoch": 62.14975845410628, "grad_norm": 0.9074289202690125, "learning_rate": 0.0001, "loss": 1.5711, "step": 720440 }, { "epoch": 62.15458937198068, "grad_norm": 0.6855112910270691, "learning_rate": 0.0001, "loss": 1.5704, "step": 720496 }, { "epoch": 62.15942028985507, "grad_norm": 0.4005833864212036, "learning_rate": 0.0001, "loss": 1.5678, "step": 720552 }, { "epoch": 62.16425120772947, "grad_norm": 0.24582967162132263, "learning_rate": 0.0001, "loss": 1.5739, "step": 720608 }, { "epoch": 62.169082125603865, "grad_norm": 1.3619827032089233, "learning_rate": 0.0001, "loss": 1.5742, "step": 720664 }, { "epoch": 62.17391304347826, "grad_norm": 0.26840364933013916, "learning_rate": 0.0001, "loss": 1.5689, "step": 720720 }, { "epoch": 62.17874396135266, "grad_norm": 0.2663675546646118, "learning_rate": 0.0001, "loss": 1.5725, "step": 720776 }, { "epoch": 62.18357487922705, "grad_norm": 0.2864399254322052, "learning_rate": 0.0001, "loss": 1.5646, "step": 720832 }, { "epoch": 62.18840579710145, "grad_norm": 0.2544513940811157, "learning_rate": 0.0001, "loss": 1.5767, "step": 720888 }, { "epoch": 62.193236714975846, "grad_norm": 0.35212525725364685, "learning_rate": 0.0001, "loss": 1.5741, "step": 720944 }, { "epoch": 62.19806763285024, "grad_norm": 0.32270529866218567, "learning_rate": 0.0001, "loss": 1.5723, "step": 721000 }, { "epoch": 62.20289855072464, "grad_norm": 0.27425700426101685, "learning_rate": 0.0001, "loss": 1.5774, "step": 721056 }, { "epoch": 62.207729468599034, "grad_norm": 0.2842741012573242, "learning_rate": 0.0001, "loss": 1.5717, "step": 721112 }, { "epoch": 62.21256038647343, "grad_norm": 0.2341836839914322, "learning_rate": 0.0001, "loss": 1.5709, "step": 721168 }, { "epoch": 62.21739130434783, "grad_norm": 0.6717098355293274, "learning_rate": 0.0001, "loss": 1.5657, "step": 721224 }, { "epoch": 62.22222222222222, "grad_norm": 0.24953006207942963, "learning_rate": 0.0001, "loss": 1.5719, "step": 721280 }, { "epoch": 62.227053140096615, "grad_norm": 0.4333614706993103, "learning_rate": 0.0001, "loss": 1.5688, "step": 721336 }, { "epoch": 62.231884057971016, "grad_norm": 0.3396976888179779, "learning_rate": 0.0001, "loss": 1.5698, "step": 721392 }, { "epoch": 62.23671497584541, "grad_norm": 0.4736870229244232, "learning_rate": 0.0001, "loss": 1.5764, "step": 721448 }, { "epoch": 62.24154589371981, "grad_norm": 0.25140857696533203, "learning_rate": 0.0001, "loss": 1.5738, "step": 721504 }, { "epoch": 62.2463768115942, "grad_norm": 0.6037200093269348, "learning_rate": 0.0001, "loss": 1.567, "step": 721560 }, { "epoch": 62.2512077294686, "grad_norm": 0.4943421185016632, "learning_rate": 0.0001, "loss": 1.5599, "step": 721616 }, { "epoch": 62.256038647343, "grad_norm": 0.3591087758541107, "learning_rate": 0.0001, "loss": 1.5715, "step": 721672 }, { "epoch": 62.26086956521739, "grad_norm": 0.27091488242149353, "learning_rate": 0.0001, "loss": 1.5629, "step": 721728 }, { "epoch": 62.265700483091784, "grad_norm": 0.24980884790420532, "learning_rate": 0.0001, "loss": 1.5684, "step": 721784 }, { "epoch": 62.270531400966185, "grad_norm": 0.27753156423568726, "learning_rate": 0.0001, "loss": 1.5642, "step": 721840 }, { "epoch": 62.27536231884058, "grad_norm": 0.24672089517116547, "learning_rate": 0.0001, "loss": 1.5687, "step": 721896 }, { "epoch": 62.28019323671498, "grad_norm": 0.2910483181476593, "learning_rate": 0.0001, "loss": 1.5723, "step": 721952 }, { "epoch": 62.28502415458937, "grad_norm": 0.9092201590538025, "learning_rate": 0.0001, "loss": 1.5726, "step": 722008 }, { "epoch": 62.289855072463766, "grad_norm": 0.27946457266807556, "learning_rate": 0.0001, "loss": 1.574, "step": 722064 }, { "epoch": 62.29468599033817, "grad_norm": 1.0597881078720093, "learning_rate": 0.0001, "loss": 1.5645, "step": 722120 }, { "epoch": 62.29951690821256, "grad_norm": 0.26028355956077576, "learning_rate": 0.0001, "loss": 1.5647, "step": 722176 }, { "epoch": 62.30434782608695, "grad_norm": 0.2621420919895172, "learning_rate": 0.0001, "loss": 1.5741, "step": 722232 }, { "epoch": 62.309178743961354, "grad_norm": 0.3190864026546478, "learning_rate": 0.0001, "loss": 1.5718, "step": 722288 }, { "epoch": 62.31400966183575, "grad_norm": 0.28275972604751587, "learning_rate": 0.0001, "loss": 1.5716, "step": 722344 }, { "epoch": 62.31884057971015, "grad_norm": 0.4138582944869995, "learning_rate": 0.0001, "loss": 1.5736, "step": 722400 }, { "epoch": 62.32367149758454, "grad_norm": 2.5288259983062744, "learning_rate": 0.0001, "loss": 1.5686, "step": 722456 }, { "epoch": 62.328502415458935, "grad_norm": 0.27203208208084106, "learning_rate": 0.0001, "loss": 1.5729, "step": 722512 }, { "epoch": 62.333333333333336, "grad_norm": 0.5166379809379578, "learning_rate": 0.0001, "loss": 1.5772, "step": 722568 }, { "epoch": 62.33816425120773, "grad_norm": 0.25468745827674866, "learning_rate": 0.0001, "loss": 1.5742, "step": 722624 }, { "epoch": 62.34299516908212, "grad_norm": 0.28255048394203186, "learning_rate": 0.0001, "loss": 1.5744, "step": 722680 }, { "epoch": 62.34782608695652, "grad_norm": 0.8507705926895142, "learning_rate": 0.0001, "loss": 1.5735, "step": 722736 }, { "epoch": 62.35265700483092, "grad_norm": 1.5568976402282715, "learning_rate": 0.0001, "loss": 1.5696, "step": 722792 }, { "epoch": 62.35748792270532, "grad_norm": 0.6761753559112549, "learning_rate": 0.0001, "loss": 1.5757, "step": 722848 }, { "epoch": 62.36231884057971, "grad_norm": 0.3762079179286957, "learning_rate": 0.0001, "loss": 1.5708, "step": 722904 }, { "epoch": 62.367149758454104, "grad_norm": 0.287078320980072, "learning_rate": 0.0001, "loss": 1.5787, "step": 722960 }, { "epoch": 62.371980676328505, "grad_norm": 1.3267771005630493, "learning_rate": 0.0001, "loss": 1.5741, "step": 723016 }, { "epoch": 62.3768115942029, "grad_norm": 1.1347860097885132, "learning_rate": 0.0001, "loss": 1.5687, "step": 723072 }, { "epoch": 62.38164251207729, "grad_norm": 0.3141985535621643, "learning_rate": 0.0001, "loss": 1.5696, "step": 723128 }, { "epoch": 62.38647342995169, "grad_norm": 1.009322166442871, "learning_rate": 0.0001, "loss": 1.5725, "step": 723184 }, { "epoch": 62.391304347826086, "grad_norm": 0.2980998456478119, "learning_rate": 0.0001, "loss": 1.5731, "step": 723240 }, { "epoch": 62.39613526570048, "grad_norm": 0.36813339591026306, "learning_rate": 0.0001, "loss": 1.5759, "step": 723296 }, { "epoch": 62.40096618357488, "grad_norm": 0.2782837748527527, "learning_rate": 0.0001, "loss": 1.5674, "step": 723352 }, { "epoch": 62.405797101449274, "grad_norm": 23.04176139831543, "learning_rate": 0.0001, "loss": 1.573, "step": 723408 }, { "epoch": 62.410628019323674, "grad_norm": 1.763614296913147, "learning_rate": 0.0001, "loss": 1.5767, "step": 723464 }, { "epoch": 62.41545893719807, "grad_norm": 0.2924918234348297, "learning_rate": 0.0001, "loss": 1.5738, "step": 723520 }, { "epoch": 62.42028985507246, "grad_norm": 0.31590038537979126, "learning_rate": 0.0001, "loss": 1.5707, "step": 723576 }, { "epoch": 62.42512077294686, "grad_norm": 0.41350075602531433, "learning_rate": 0.0001, "loss": 1.569, "step": 723632 }, { "epoch": 62.429951690821255, "grad_norm": 0.37031635642051697, "learning_rate": 0.0001, "loss": 1.5708, "step": 723688 }, { "epoch": 62.43478260869565, "grad_norm": 0.2953779101371765, "learning_rate": 0.0001, "loss": 1.573, "step": 723744 }, { "epoch": 62.43961352657005, "grad_norm": 1.0928354263305664, "learning_rate": 0.0001, "loss": 1.5735, "step": 723800 }, { "epoch": 62.44444444444444, "grad_norm": 0.29653748869895935, "learning_rate": 0.0001, "loss": 1.5676, "step": 723856 }, { "epoch": 62.44927536231884, "grad_norm": 5.613714218139648, "learning_rate": 0.0001, "loss": 1.5723, "step": 723912 }, { "epoch": 62.45410628019324, "grad_norm": 0.2396327704191208, "learning_rate": 0.0001, "loss": 1.5716, "step": 723968 }, { "epoch": 62.45893719806763, "grad_norm": 0.27971112728118896, "learning_rate": 0.0001, "loss": 1.573, "step": 724024 }, { "epoch": 62.46376811594203, "grad_norm": 0.4705822467803955, "learning_rate": 0.0001, "loss": 1.5705, "step": 724080 }, { "epoch": 62.468599033816425, "grad_norm": 0.34000322222709656, "learning_rate": 0.0001, "loss": 1.568, "step": 724136 }, { "epoch": 62.47342995169082, "grad_norm": 0.3107225000858307, "learning_rate": 0.0001, "loss": 1.5747, "step": 724192 }, { "epoch": 62.47826086956522, "grad_norm": 0.3259021043777466, "learning_rate": 0.0001, "loss": 1.5749, "step": 724248 }, { "epoch": 62.48309178743961, "grad_norm": 0.27540090680122375, "learning_rate": 0.0001, "loss": 1.5755, "step": 724304 }, { "epoch": 62.48792270531401, "grad_norm": 0.3806430995464325, "learning_rate": 0.0001, "loss": 1.5731, "step": 724360 }, { "epoch": 62.492753623188406, "grad_norm": 0.5679048895835876, "learning_rate": 0.0001, "loss": 1.5669, "step": 724416 }, { "epoch": 62.4975845410628, "grad_norm": 0.2653096616268158, "learning_rate": 0.0001, "loss": 1.571, "step": 724472 }, { "epoch": 62.5024154589372, "grad_norm": 0.5664370656013489, "learning_rate": 0.0001, "loss": 1.5724, "step": 724528 }, { "epoch": 62.507246376811594, "grad_norm": 0.35827577114105225, "learning_rate": 0.0001, "loss": 1.5759, "step": 724584 }, { "epoch": 62.51207729468599, "grad_norm": 0.28962960839271545, "learning_rate": 0.0001, "loss": 1.5709, "step": 724640 }, { "epoch": 62.51690821256039, "grad_norm": 0.28388622403144836, "learning_rate": 0.0001, "loss": 1.5726, "step": 724696 }, { "epoch": 62.52173913043478, "grad_norm": 0.5569143295288086, "learning_rate": 0.0001, "loss": 1.5732, "step": 724752 }, { "epoch": 62.52657004830918, "grad_norm": 1.334276795387268, "learning_rate": 0.0001, "loss": 1.57, "step": 724808 }, { "epoch": 62.531400966183575, "grad_norm": 0.31167083978652954, "learning_rate": 0.0001, "loss": 1.5704, "step": 724864 }, { "epoch": 62.53623188405797, "grad_norm": 0.28319841623306274, "learning_rate": 0.0001, "loss": 1.5783, "step": 724920 }, { "epoch": 62.54106280193237, "grad_norm": 0.3387158513069153, "learning_rate": 0.0001, "loss": 1.575, "step": 724976 }, { "epoch": 62.54589371980676, "grad_norm": 0.7180444598197937, "learning_rate": 0.0001, "loss": 1.5739, "step": 725032 }, { "epoch": 62.55072463768116, "grad_norm": 0.2707345187664032, "learning_rate": 0.0001, "loss": 1.5778, "step": 725088 }, { "epoch": 62.55555555555556, "grad_norm": 0.3770512640476227, "learning_rate": 0.0001, "loss": 1.5689, "step": 725144 }, { "epoch": 62.56038647342995, "grad_norm": 0.285092294216156, "learning_rate": 0.0001, "loss": 1.5732, "step": 725200 }, { "epoch": 62.56521739130435, "grad_norm": 0.5101979970932007, "learning_rate": 0.0001, "loss": 1.5697, "step": 725256 }, { "epoch": 62.570048309178745, "grad_norm": 0.25316736102104187, "learning_rate": 0.0001, "loss": 1.5748, "step": 725312 }, { "epoch": 62.57487922705314, "grad_norm": 0.3947499394416809, "learning_rate": 0.0001, "loss": 1.5695, "step": 725368 }, { "epoch": 62.57971014492754, "grad_norm": 0.2294870913028717, "learning_rate": 0.0001, "loss": 1.574, "step": 725424 }, { "epoch": 62.58454106280193, "grad_norm": 1.2710660696029663, "learning_rate": 0.0001, "loss": 1.5718, "step": 725480 }, { "epoch": 62.589371980676326, "grad_norm": 0.7106131911277771, "learning_rate": 0.0001, "loss": 1.5739, "step": 725536 }, { "epoch": 62.594202898550726, "grad_norm": 3.1924750804901123, "learning_rate": 0.0001, "loss": 1.5729, "step": 725592 }, { "epoch": 62.59903381642512, "grad_norm": 0.2459021508693695, "learning_rate": 0.0001, "loss": 1.5715, "step": 725648 }, { "epoch": 62.60386473429952, "grad_norm": 0.3116636872291565, "learning_rate": 0.0001, "loss": 1.5734, "step": 725704 }, { "epoch": 62.608695652173914, "grad_norm": 0.27598831057548523, "learning_rate": 0.0001, "loss": 1.5735, "step": 725760 }, { "epoch": 62.61352657004831, "grad_norm": 0.7767271995544434, "learning_rate": 0.0001, "loss": 1.5724, "step": 725816 }, { "epoch": 62.61835748792271, "grad_norm": 64.29972839355469, "learning_rate": 0.0001, "loss": 1.5652, "step": 725872 }, { "epoch": 62.6231884057971, "grad_norm": 0.2971838414669037, "learning_rate": 0.0001, "loss": 1.5644, "step": 725928 }, { "epoch": 62.628019323671495, "grad_norm": 0.2941811978816986, "learning_rate": 0.0001, "loss": 1.5749, "step": 725984 }, { "epoch": 62.632850241545896, "grad_norm": 1.9784293174743652, "learning_rate": 0.0001, "loss": 1.5702, "step": 726040 }, { "epoch": 62.63768115942029, "grad_norm": 1.4864757061004639, "learning_rate": 0.0001, "loss": 1.5755, "step": 726096 }, { "epoch": 62.64251207729468, "grad_norm": 0.302395224571228, "learning_rate": 0.0001, "loss": 1.5694, "step": 726152 }, { "epoch": 62.64734299516908, "grad_norm": 0.30319944024086, "learning_rate": 0.0001, "loss": 1.5769, "step": 726208 }, { "epoch": 62.65217391304348, "grad_norm": 2.1581859588623047, "learning_rate": 0.0001, "loss": 1.5754, "step": 726264 }, { "epoch": 62.65700483091788, "grad_norm": 0.2761234641075134, "learning_rate": 0.0001, "loss": 1.5761, "step": 726320 }, { "epoch": 62.66183574879227, "grad_norm": 0.295910120010376, "learning_rate": 0.0001, "loss": 1.5707, "step": 726376 }, { "epoch": 62.666666666666664, "grad_norm": 1.1831156015396118, "learning_rate": 0.0001, "loss": 1.5749, "step": 726432 }, { "epoch": 62.671497584541065, "grad_norm": 0.2515677511692047, "learning_rate": 0.0001, "loss": 1.5704, "step": 726488 }, { "epoch": 62.67632850241546, "grad_norm": 0.32965385913848877, "learning_rate": 0.0001, "loss": 1.5666, "step": 726544 }, { "epoch": 62.68115942028985, "grad_norm": 0.3025713562965393, "learning_rate": 0.0001, "loss": 1.5704, "step": 726600 }, { "epoch": 62.68599033816425, "grad_norm": 0.2572263479232788, "learning_rate": 0.0001, "loss": 1.5677, "step": 726656 }, { "epoch": 62.690821256038646, "grad_norm": 0.2883860766887665, "learning_rate": 0.0001, "loss": 1.5789, "step": 726712 }, { "epoch": 62.69565217391305, "grad_norm": 0.2594689130783081, "learning_rate": 0.0001, "loss": 1.5766, "step": 726768 }, { "epoch": 62.70048309178744, "grad_norm": 0.6017184257507324, "learning_rate": 0.0001, "loss": 1.5752, "step": 726824 }, { "epoch": 62.70531400966183, "grad_norm": 0.3372388482093811, "learning_rate": 0.0001, "loss": 1.5656, "step": 726880 }, { "epoch": 62.710144927536234, "grad_norm": 0.41932281851768494, "learning_rate": 0.0001, "loss": 1.5678, "step": 726936 }, { "epoch": 62.71497584541063, "grad_norm": 0.25688132643699646, "learning_rate": 0.0001, "loss": 1.574, "step": 726992 }, { "epoch": 62.71980676328502, "grad_norm": 0.2768692672252655, "learning_rate": 0.0001, "loss": 1.5736, "step": 727048 }, { "epoch": 62.72463768115942, "grad_norm": 21.747173309326172, "learning_rate": 0.0001, "loss": 1.5658, "step": 727104 }, { "epoch": 62.729468599033815, "grad_norm": 0.33917534351348877, "learning_rate": 0.0001, "loss": 1.5789, "step": 727160 }, { "epoch": 62.734299516908216, "grad_norm": 0.33717602491378784, "learning_rate": 0.0001, "loss": 1.5747, "step": 727216 }, { "epoch": 62.73913043478261, "grad_norm": 0.26417937874794006, "learning_rate": 0.0001, "loss": 1.5688, "step": 727272 }, { "epoch": 62.743961352657, "grad_norm": 0.4496406018733978, "learning_rate": 0.0001, "loss": 1.5732, "step": 727328 }, { "epoch": 62.7487922705314, "grad_norm": 0.2827538847923279, "learning_rate": 0.0001, "loss": 1.5774, "step": 727384 }, { "epoch": 62.7536231884058, "grad_norm": 0.2905906140804291, "learning_rate": 0.0001, "loss": 1.5777, "step": 727440 }, { "epoch": 62.75845410628019, "grad_norm": 0.3543965816497803, "learning_rate": 0.0001, "loss": 1.5803, "step": 727496 }, { "epoch": 62.76328502415459, "grad_norm": 0.4133850932121277, "learning_rate": 0.0001, "loss": 1.5701, "step": 727552 }, { "epoch": 62.768115942028984, "grad_norm": 0.7020771503448486, "learning_rate": 0.0001, "loss": 1.5729, "step": 727608 }, { "epoch": 62.772946859903385, "grad_norm": 0.24971868097782135, "learning_rate": 0.0001, "loss": 1.5692, "step": 727664 }, { "epoch": 62.77777777777778, "grad_norm": 0.24484284222126007, "learning_rate": 0.0001, "loss": 1.5732, "step": 727720 }, { "epoch": 62.78260869565217, "grad_norm": 0.28959810733795166, "learning_rate": 0.0001, "loss": 1.5732, "step": 727776 }, { "epoch": 62.78743961352657, "grad_norm": 15.7720308303833, "learning_rate": 0.0001, "loss": 1.5766, "step": 727832 }, { "epoch": 62.792270531400966, "grad_norm": 0.2975970208644867, "learning_rate": 0.0001, "loss": 1.5738, "step": 727888 }, { "epoch": 62.79710144927536, "grad_norm": 0.26386791467666626, "learning_rate": 0.0001, "loss": 1.5753, "step": 727944 }, { "epoch": 62.80193236714976, "grad_norm": 0.2563723921775818, "learning_rate": 0.0001, "loss": 1.5701, "step": 728000 }, { "epoch": 62.806763285024154, "grad_norm": 0.32487085461616516, "learning_rate": 0.0001, "loss": 1.5763, "step": 728056 }, { "epoch": 62.81159420289855, "grad_norm": 5.733213901519775, "learning_rate": 0.0001, "loss": 1.577, "step": 728112 }, { "epoch": 62.81642512077295, "grad_norm": 0.2563782334327698, "learning_rate": 0.0001, "loss": 1.5732, "step": 728168 }, { "epoch": 62.82125603864734, "grad_norm": 1.3346635103225708, "learning_rate": 0.0001, "loss": 1.5789, "step": 728224 }, { "epoch": 62.82608695652174, "grad_norm": 0.36824584007263184, "learning_rate": 0.0001, "loss": 1.5692, "step": 728280 }, { "epoch": 62.830917874396135, "grad_norm": 0.25935986638069153, "learning_rate": 0.0001, "loss": 1.5785, "step": 728336 }, { "epoch": 62.83574879227053, "grad_norm": 7.117404937744141, "learning_rate": 0.0001, "loss": 1.5624, "step": 728392 }, { "epoch": 62.84057971014493, "grad_norm": 0.28636881709098816, "learning_rate": 0.0001, "loss": 1.5744, "step": 728448 }, { "epoch": 62.84541062801932, "grad_norm": 0.2776661515235901, "learning_rate": 0.0001, "loss": 1.5736, "step": 728504 }, { "epoch": 62.85024154589372, "grad_norm": 0.3196359872817993, "learning_rate": 0.0001, "loss": 1.5784, "step": 728560 }, { "epoch": 62.85507246376812, "grad_norm": 0.3319043815135956, "learning_rate": 0.0001, "loss": 1.5712, "step": 728616 }, { "epoch": 62.85990338164251, "grad_norm": 0.22401300072669983, "learning_rate": 0.0001, "loss": 1.5642, "step": 728672 }, { "epoch": 62.86473429951691, "grad_norm": 0.268051415681839, "learning_rate": 0.0001, "loss": 1.5783, "step": 728728 }, { "epoch": 62.869565217391305, "grad_norm": 0.39764392375946045, "learning_rate": 0.0001, "loss": 1.576, "step": 728784 }, { "epoch": 62.8743961352657, "grad_norm": 0.3552161753177643, "learning_rate": 0.0001, "loss": 1.5715, "step": 728840 }, { "epoch": 62.8792270531401, "grad_norm": 0.37312474846839905, "learning_rate": 0.0001, "loss": 1.5772, "step": 728896 }, { "epoch": 62.88405797101449, "grad_norm": 5.982202529907227, "learning_rate": 0.0001, "loss": 1.5719, "step": 728952 }, { "epoch": 62.888888888888886, "grad_norm": 0.28168177604675293, "learning_rate": 0.0001, "loss": 1.5779, "step": 729008 }, { "epoch": 62.893719806763286, "grad_norm": 0.5119758248329163, "learning_rate": 0.0001, "loss": 1.5644, "step": 729064 }, { "epoch": 62.89855072463768, "grad_norm": 0.24650201201438904, "learning_rate": 0.0001, "loss": 1.5723, "step": 729120 }, { "epoch": 62.90338164251208, "grad_norm": 0.3829774558544159, "learning_rate": 0.0001, "loss": 1.578, "step": 729176 }, { "epoch": 62.908212560386474, "grad_norm": 0.31171712279319763, "learning_rate": 0.0001, "loss": 1.5741, "step": 729232 }, { "epoch": 62.91304347826087, "grad_norm": 0.4717906415462494, "learning_rate": 0.0001, "loss": 1.5732, "step": 729288 }, { "epoch": 62.91787439613527, "grad_norm": 0.44332364201545715, "learning_rate": 0.0001, "loss": 1.5737, "step": 729344 }, { "epoch": 62.92270531400966, "grad_norm": 0.272013396024704, "learning_rate": 0.0001, "loss": 1.5745, "step": 729400 }, { "epoch": 62.927536231884055, "grad_norm": 0.38605454564094543, "learning_rate": 0.0001, "loss": 1.568, "step": 729456 }, { "epoch": 62.932367149758456, "grad_norm": 0.44695958495140076, "learning_rate": 0.0001, "loss": 1.5757, "step": 729512 }, { "epoch": 62.93719806763285, "grad_norm": 0.28032365441322327, "learning_rate": 0.0001, "loss": 1.5785, "step": 729568 }, { "epoch": 62.94202898550725, "grad_norm": 25.361452102661133, "learning_rate": 0.0001, "loss": 1.5774, "step": 729624 }, { "epoch": 62.94685990338164, "grad_norm": 8.967512130737305, "learning_rate": 0.0001, "loss": 1.5778, "step": 729680 }, { "epoch": 62.95169082125604, "grad_norm": 0.31491655111312866, "learning_rate": 0.0001, "loss": 1.5732, "step": 729736 }, { "epoch": 62.95652173913044, "grad_norm": 0.8885500431060791, "learning_rate": 0.0001, "loss": 1.5755, "step": 729792 }, { "epoch": 62.96135265700483, "grad_norm": 0.3107333183288574, "learning_rate": 0.0001, "loss": 1.5757, "step": 729848 }, { "epoch": 62.966183574879224, "grad_norm": 0.34509849548339844, "learning_rate": 0.0001, "loss": 1.569, "step": 729904 }, { "epoch": 62.971014492753625, "grad_norm": 1.0076206922531128, "learning_rate": 0.0001, "loss": 1.5684, "step": 729960 }, { "epoch": 62.97584541062802, "grad_norm": 0.23483017086982727, "learning_rate": 0.0001, "loss": 1.5653, "step": 730016 }, { "epoch": 62.98067632850242, "grad_norm": 0.3078582286834717, "learning_rate": 0.0001, "loss": 1.5715, "step": 730072 }, { "epoch": 62.98550724637681, "grad_norm": 0.2710314393043518, "learning_rate": 0.0001, "loss": 1.5747, "step": 730128 }, { "epoch": 62.990338164251206, "grad_norm": 1.267478585243225, "learning_rate": 0.0001, "loss": 1.5771, "step": 730184 }, { "epoch": 62.99516908212561, "grad_norm": 0.24333450198173523, "learning_rate": 0.0001, "loss": 1.5748, "step": 730240 }, { "epoch": 63.0, "grad_norm": 0.28113898634910583, "learning_rate": 0.0001, "loss": 1.5713, "step": 730296 }, { "epoch": 63.00483091787439, "grad_norm": 0.47547677159309387, "learning_rate": 0.0001, "loss": 1.568, "step": 730352 }, { "epoch": 63.009661835748794, "grad_norm": 0.31084364652633667, "learning_rate": 0.0001, "loss": 1.5633, "step": 730408 }, { "epoch": 63.01449275362319, "grad_norm": 0.31600216031074524, "learning_rate": 0.0001, "loss": 1.5668, "step": 730464 }, { "epoch": 63.01932367149758, "grad_norm": 4.499281883239746, "learning_rate": 0.0001, "loss": 1.5679, "step": 730520 }, { "epoch": 63.02415458937198, "grad_norm": 0.2888919413089752, "learning_rate": 0.0001, "loss": 1.5585, "step": 730576 }, { "epoch": 63.028985507246375, "grad_norm": 0.2715318202972412, "learning_rate": 0.0001, "loss": 1.5678, "step": 730632 }, { "epoch": 63.033816425120776, "grad_norm": 1.219583511352539, "learning_rate": 0.0001, "loss": 1.5663, "step": 730688 }, { "epoch": 63.03864734299517, "grad_norm": 0.30026939511299133, "learning_rate": 0.0001, "loss": 1.572, "step": 730744 }, { "epoch": 63.04347826086956, "grad_norm": 0.3126368224620819, "learning_rate": 0.0001, "loss": 1.5638, "step": 730800 }, { "epoch": 63.04830917874396, "grad_norm": 13.72336196899414, "learning_rate": 0.0001, "loss": 1.5626, "step": 730856 }, { "epoch": 63.05314009661836, "grad_norm": 7.792726039886475, "learning_rate": 0.0001, "loss": 1.5695, "step": 730912 }, { "epoch": 63.05797101449275, "grad_norm": 0.3179454505443573, "learning_rate": 0.0001, "loss": 1.5631, "step": 730968 }, { "epoch": 63.06280193236715, "grad_norm": 0.5311191082000732, "learning_rate": 0.0001, "loss": 1.569, "step": 731024 }, { "epoch": 63.067632850241544, "grad_norm": 0.26377755403518677, "learning_rate": 0.0001, "loss": 1.5649, "step": 731080 }, { "epoch": 63.072463768115945, "grad_norm": 0.2552780508995056, "learning_rate": 0.0001, "loss": 1.5603, "step": 731136 }, { "epoch": 63.07729468599034, "grad_norm": 0.23113960027694702, "learning_rate": 0.0001, "loss": 1.5695, "step": 731192 }, { "epoch": 63.08212560386473, "grad_norm": 0.3738711476325989, "learning_rate": 0.0001, "loss": 1.5664, "step": 731248 }, { "epoch": 63.08695652173913, "grad_norm": 0.6125767230987549, "learning_rate": 0.0001, "loss": 1.5629, "step": 731304 }, { "epoch": 63.091787439613526, "grad_norm": 0.40638983249664307, "learning_rate": 0.0001, "loss": 1.5724, "step": 731360 }, { "epoch": 63.09661835748792, "grad_norm": 0.418361634016037, "learning_rate": 0.0001, "loss": 1.5677, "step": 731416 }, { "epoch": 63.10144927536232, "grad_norm": 0.2965411841869354, "learning_rate": 0.0001, "loss": 1.5692, "step": 731472 }, { "epoch": 63.106280193236714, "grad_norm": 13.978006362915039, "learning_rate": 0.0001, "loss": 1.5741, "step": 731528 }, { "epoch": 63.111111111111114, "grad_norm": 0.3471987247467041, "learning_rate": 0.0001, "loss": 1.5635, "step": 731584 }, { "epoch": 63.11594202898551, "grad_norm": 0.26076585054397583, "learning_rate": 0.0001, "loss": 1.571, "step": 731640 }, { "epoch": 63.1207729468599, "grad_norm": 0.2392660230398178, "learning_rate": 0.0001, "loss": 1.5688, "step": 731696 }, { "epoch": 63.1256038647343, "grad_norm": 0.23961885273456573, "learning_rate": 0.0001, "loss": 1.5589, "step": 731752 }, { "epoch": 63.130434782608695, "grad_norm": 0.2780132293701172, "learning_rate": 0.0001, "loss": 1.5613, "step": 731808 }, { "epoch": 63.13526570048309, "grad_norm": 0.8991456031799316, "learning_rate": 0.0001, "loss": 1.5693, "step": 731864 }, { "epoch": 63.14009661835749, "grad_norm": 0.22709132730960846, "learning_rate": 0.0001, "loss": 1.5694, "step": 731920 }, { "epoch": 63.14492753623188, "grad_norm": 0.4227042496204376, "learning_rate": 0.0001, "loss": 1.5631, "step": 731976 }, { "epoch": 63.14975845410628, "grad_norm": 0.24387165904045105, "learning_rate": 0.0001, "loss": 1.5652, "step": 732032 }, { "epoch": 63.15458937198068, "grad_norm": 2.2165582180023193, "learning_rate": 0.0001, "loss": 1.5721, "step": 732088 }, { "epoch": 63.15942028985507, "grad_norm": 6.512873649597168, "learning_rate": 0.0001, "loss": 1.5649, "step": 732144 }, { "epoch": 63.16425120772947, "grad_norm": 0.24220681190490723, "learning_rate": 0.0001, "loss": 1.5665, "step": 732200 }, { "epoch": 63.169082125603865, "grad_norm": 0.29070696234703064, "learning_rate": 0.0001, "loss": 1.5671, "step": 732256 }, { "epoch": 63.17391304347826, "grad_norm": 0.2518070638179779, "learning_rate": 0.0001, "loss": 1.5753, "step": 732312 }, { "epoch": 63.17874396135266, "grad_norm": 0.27027416229248047, "learning_rate": 0.0001, "loss": 1.5586, "step": 732368 }, { "epoch": 63.18357487922705, "grad_norm": 0.28442052006721497, "learning_rate": 0.0001, "loss": 1.5719, "step": 732424 }, { "epoch": 63.18840579710145, "grad_norm": 0.5259303450584412, "learning_rate": 0.0001, "loss": 1.5687, "step": 732480 }, { "epoch": 63.193236714975846, "grad_norm": 0.25152894854545593, "learning_rate": 0.0001, "loss": 1.5591, "step": 732536 }, { "epoch": 63.19806763285024, "grad_norm": 0.2665584683418274, "learning_rate": 0.0001, "loss": 1.5707, "step": 732592 }, { "epoch": 63.20289855072464, "grad_norm": 0.3286307752132416, "learning_rate": 0.0001, "loss": 1.5594, "step": 732648 }, { "epoch": 63.207729468599034, "grad_norm": 0.3297647535800934, "learning_rate": 0.0001, "loss": 1.561, "step": 732704 }, { "epoch": 63.21256038647343, "grad_norm": 0.3355868458747864, "learning_rate": 0.0001, "loss": 1.5634, "step": 732760 }, { "epoch": 63.21739130434783, "grad_norm": 0.25921422243118286, "learning_rate": 0.0001, "loss": 1.5746, "step": 732816 }, { "epoch": 63.22222222222222, "grad_norm": 0.3115796744823456, "learning_rate": 0.0001, "loss": 1.5734, "step": 732872 }, { "epoch": 63.227053140096615, "grad_norm": 0.2764767110347748, "learning_rate": 0.0001, "loss": 1.564, "step": 732928 }, { "epoch": 63.231884057971016, "grad_norm": 0.2950091063976288, "learning_rate": 0.0001, "loss": 1.5647, "step": 732984 }, { "epoch": 63.23671497584541, "grad_norm": 1.7026853561401367, "learning_rate": 0.0001, "loss": 1.5701, "step": 733040 }, { "epoch": 63.24154589371981, "grad_norm": 1.3242324590682983, "learning_rate": 0.0001, "loss": 1.5585, "step": 733096 }, { "epoch": 63.2463768115942, "grad_norm": 0.24653026461601257, "learning_rate": 0.0001, "loss": 1.572, "step": 733152 }, { "epoch": 63.2512077294686, "grad_norm": 0.2830531895160675, "learning_rate": 0.0001, "loss": 1.5628, "step": 733208 }, { "epoch": 63.256038647343, "grad_norm": 0.25864270329475403, "learning_rate": 0.0001, "loss": 1.5675, "step": 733264 }, { "epoch": 63.26086956521739, "grad_norm": 0.28365904092788696, "learning_rate": 0.0001, "loss": 1.5815, "step": 733320 }, { "epoch": 63.265700483091784, "grad_norm": 0.41176384687423706, "learning_rate": 0.0001, "loss": 1.5682, "step": 733376 }, { "epoch": 63.270531400966185, "grad_norm": 0.2887535095214844, "learning_rate": 0.0001, "loss": 1.5743, "step": 733432 }, { "epoch": 63.27536231884058, "grad_norm": 73.96841430664062, "learning_rate": 0.0001, "loss": 1.5675, "step": 733488 }, { "epoch": 63.28019323671498, "grad_norm": 0.2362847924232483, "learning_rate": 0.0001, "loss": 1.5696, "step": 733544 }, { "epoch": 63.28502415458937, "grad_norm": 0.8285291790962219, "learning_rate": 0.0001, "loss": 1.5671, "step": 733600 }, { "epoch": 63.289855072463766, "grad_norm": 0.2947680950164795, "learning_rate": 0.0001, "loss": 1.5666, "step": 733656 }, { "epoch": 63.29468599033817, "grad_norm": 0.5716466903686523, "learning_rate": 0.0001, "loss": 1.5706, "step": 733712 }, { "epoch": 63.29951690821256, "grad_norm": 0.27095967531204224, "learning_rate": 0.0001, "loss": 1.5655, "step": 733768 }, { "epoch": 63.30434782608695, "grad_norm": 0.2569930851459503, "learning_rate": 0.0001, "loss": 1.5711, "step": 733824 }, { "epoch": 63.309178743961354, "grad_norm": 0.2698161005973816, "learning_rate": 0.0001, "loss": 1.5715, "step": 733880 }, { "epoch": 63.31400966183575, "grad_norm": 1.0906389951705933, "learning_rate": 0.0001, "loss": 1.5726, "step": 733936 }, { "epoch": 63.31884057971015, "grad_norm": 0.2867649793624878, "learning_rate": 0.0001, "loss": 1.5616, "step": 733992 }, { "epoch": 63.32367149758454, "grad_norm": 0.27258583903312683, "learning_rate": 0.0001, "loss": 1.5732, "step": 734048 }, { "epoch": 63.328502415458935, "grad_norm": 0.31716597080230713, "learning_rate": 0.0001, "loss": 1.5602, "step": 734104 }, { "epoch": 63.333333333333336, "grad_norm": 0.33570927381515503, "learning_rate": 0.0001, "loss": 1.5674, "step": 734160 }, { "epoch": 63.33816425120773, "grad_norm": 0.23145237565040588, "learning_rate": 0.0001, "loss": 1.5717, "step": 734216 }, { "epoch": 63.34299516908212, "grad_norm": 0.3338344395160675, "learning_rate": 0.0001, "loss": 1.5724, "step": 734272 }, { "epoch": 63.34782608695652, "grad_norm": 0.3588441014289856, "learning_rate": 0.0001, "loss": 1.5743, "step": 734328 }, { "epoch": 63.35265700483092, "grad_norm": 0.31399065256118774, "learning_rate": 0.0001, "loss": 1.5633, "step": 734384 }, { "epoch": 63.35748792270532, "grad_norm": 0.2813735008239746, "learning_rate": 0.0001, "loss": 1.5678, "step": 734440 }, { "epoch": 63.36231884057971, "grad_norm": 0.3023267388343811, "learning_rate": 0.0001, "loss": 1.5678, "step": 734496 }, { "epoch": 63.367149758454104, "grad_norm": 0.3149886727333069, "learning_rate": 0.0001, "loss": 1.5686, "step": 734552 }, { "epoch": 63.371980676328505, "grad_norm": 0.24981655180454254, "learning_rate": 0.0001, "loss": 1.5741, "step": 734608 }, { "epoch": 63.3768115942029, "grad_norm": 0.23704344034194946, "learning_rate": 0.0001, "loss": 1.565, "step": 734664 }, { "epoch": 63.38164251207729, "grad_norm": 0.25516054034233093, "learning_rate": 0.0001, "loss": 1.569, "step": 734720 }, { "epoch": 63.38647342995169, "grad_norm": 0.35584303736686707, "learning_rate": 0.0001, "loss": 1.5669, "step": 734776 }, { "epoch": 63.391304347826086, "grad_norm": 4.821107387542725, "learning_rate": 0.0001, "loss": 1.5641, "step": 734832 }, { "epoch": 63.39613526570048, "grad_norm": 0.24041256308555603, "learning_rate": 0.0001, "loss": 1.5631, "step": 734888 }, { "epoch": 63.40096618357488, "grad_norm": 0.2828560173511505, "learning_rate": 0.0001, "loss": 1.5691, "step": 734944 }, { "epoch": 63.405797101449274, "grad_norm": 0.33003097772598267, "learning_rate": 0.0001, "loss": 1.5702, "step": 735000 }, { "epoch": 63.410628019323674, "grad_norm": 0.3950437903404236, "learning_rate": 0.0001, "loss": 1.568, "step": 735056 }, { "epoch": 63.41545893719807, "grad_norm": 0.32921433448791504, "learning_rate": 0.0001, "loss": 1.5675, "step": 735112 }, { "epoch": 63.42028985507246, "grad_norm": 0.3636189103126526, "learning_rate": 0.0001, "loss": 1.5683, "step": 735168 }, { "epoch": 63.42512077294686, "grad_norm": 0.3952851891517639, "learning_rate": 0.0001, "loss": 1.5702, "step": 735224 }, { "epoch": 63.429951690821255, "grad_norm": 0.25807344913482666, "learning_rate": 0.0001, "loss": 1.5765, "step": 735280 }, { "epoch": 63.43478260869565, "grad_norm": 0.27405884861946106, "learning_rate": 0.0001, "loss": 1.5662, "step": 735336 }, { "epoch": 63.43961352657005, "grad_norm": 1.2540326118469238, "learning_rate": 0.0001, "loss": 1.5695, "step": 735392 }, { "epoch": 63.44444444444444, "grad_norm": 0.30132073163986206, "learning_rate": 0.0001, "loss": 1.5624, "step": 735448 }, { "epoch": 63.44927536231884, "grad_norm": 18.523557662963867, "learning_rate": 0.0001, "loss": 1.5743, "step": 735504 }, { "epoch": 63.45410628019324, "grad_norm": 0.2521708309650421, "learning_rate": 0.0001, "loss": 1.5729, "step": 735560 }, { "epoch": 63.45893719806763, "grad_norm": 0.24642795324325562, "learning_rate": 0.0001, "loss": 1.5655, "step": 735616 }, { "epoch": 63.46376811594203, "grad_norm": 54.14277267456055, "learning_rate": 0.0001, "loss": 1.575, "step": 735672 }, { "epoch": 63.468599033816425, "grad_norm": 0.3921535015106201, "learning_rate": 0.0001, "loss": 1.5691, "step": 735728 }, { "epoch": 63.47342995169082, "grad_norm": 0.37491750717163086, "learning_rate": 0.0001, "loss": 1.5679, "step": 735784 }, { "epoch": 63.47826086956522, "grad_norm": 1.7025470733642578, "learning_rate": 0.0001, "loss": 1.5661, "step": 735840 }, { "epoch": 63.48309178743961, "grad_norm": 0.3617103695869446, "learning_rate": 0.0001, "loss": 1.5737, "step": 735896 }, { "epoch": 63.48792270531401, "grad_norm": 0.2953236997127533, "learning_rate": 0.0001, "loss": 1.5719, "step": 735952 }, { "epoch": 63.492753623188406, "grad_norm": 0.25271880626678467, "learning_rate": 0.0001, "loss": 1.5709, "step": 736008 }, { "epoch": 63.4975845410628, "grad_norm": 4.412477016448975, "learning_rate": 0.0001, "loss": 1.5712, "step": 736064 }, { "epoch": 63.5024154589372, "grad_norm": 0.9974247217178345, "learning_rate": 0.0001, "loss": 1.5677, "step": 736120 }, { "epoch": 63.507246376811594, "grad_norm": 0.25849851965904236, "learning_rate": 0.0001, "loss": 1.5768, "step": 736176 }, { "epoch": 63.51207729468599, "grad_norm": 0.6702790856361389, "learning_rate": 0.0001, "loss": 1.5733, "step": 736232 }, { "epoch": 63.51690821256039, "grad_norm": 0.6511969566345215, "learning_rate": 0.0001, "loss": 1.5706, "step": 736288 }, { "epoch": 63.52173913043478, "grad_norm": 0.4891354441642761, "learning_rate": 0.0001, "loss": 1.5692, "step": 736344 }, { "epoch": 63.52657004830918, "grad_norm": 0.31100359559059143, "learning_rate": 0.0001, "loss": 1.5674, "step": 736400 }, { "epoch": 63.531400966183575, "grad_norm": 0.35169920325279236, "learning_rate": 0.0001, "loss": 1.5783, "step": 736456 }, { "epoch": 63.53623188405797, "grad_norm": 0.2651878893375397, "learning_rate": 0.0001, "loss": 1.5703, "step": 736512 }, { "epoch": 63.54106280193237, "grad_norm": 0.2739761173725128, "learning_rate": 0.0001, "loss": 1.567, "step": 736568 }, { "epoch": 63.54589371980676, "grad_norm": 0.5964056253433228, "learning_rate": 0.0001, "loss": 1.574, "step": 736624 }, { "epoch": 63.55072463768116, "grad_norm": 0.32951539754867554, "learning_rate": 0.0001, "loss": 1.5665, "step": 736680 }, { "epoch": 63.55555555555556, "grad_norm": 0.2947358191013336, "learning_rate": 0.0001, "loss": 1.5639, "step": 736736 }, { "epoch": 63.56038647342995, "grad_norm": 0.25102686882019043, "learning_rate": 0.0001, "loss": 1.5689, "step": 736792 }, { "epoch": 63.56521739130435, "grad_norm": 0.37429606914520264, "learning_rate": 0.0001, "loss": 1.5642, "step": 736848 }, { "epoch": 63.570048309178745, "grad_norm": 0.36108696460723877, "learning_rate": 0.0001, "loss": 1.5648, "step": 736904 }, { "epoch": 63.57487922705314, "grad_norm": 0.3280046582221985, "learning_rate": 0.0001, "loss": 1.5666, "step": 736960 }, { "epoch": 63.57971014492754, "grad_norm": 0.33242204785346985, "learning_rate": 0.0001, "loss": 1.5639, "step": 737016 }, { "epoch": 63.58454106280193, "grad_norm": 0.2721671164035797, "learning_rate": 0.0001, "loss": 1.5719, "step": 737072 }, { "epoch": 63.589371980676326, "grad_norm": 0.2743692994117737, "learning_rate": 0.0001, "loss": 1.5761, "step": 737128 }, { "epoch": 63.594202898550726, "grad_norm": 3.0269014835357666, "learning_rate": 0.0001, "loss": 1.5674, "step": 737184 }, { "epoch": 63.59903381642512, "grad_norm": 0.26000115275382996, "learning_rate": 0.0001, "loss": 1.5704, "step": 737240 }, { "epoch": 63.60386473429952, "grad_norm": 0.24528621137142181, "learning_rate": 0.0001, "loss": 1.5727, "step": 737296 }, { "epoch": 63.608695652173914, "grad_norm": 0.3945212960243225, "learning_rate": 0.0001, "loss": 1.5664, "step": 737352 }, { "epoch": 63.61352657004831, "grad_norm": 0.40827736258506775, "learning_rate": 0.0001, "loss": 1.5627, "step": 737408 }, { "epoch": 63.61835748792271, "grad_norm": 0.3077528178691864, "learning_rate": 0.0001, "loss": 1.5694, "step": 737464 }, { "epoch": 63.6231884057971, "grad_norm": 0.36255231499671936, "learning_rate": 0.0001, "loss": 1.5688, "step": 737520 }, { "epoch": 63.628019323671495, "grad_norm": 0.27644020318984985, "learning_rate": 0.0001, "loss": 1.573, "step": 737576 }, { "epoch": 63.632850241545896, "grad_norm": 0.2931049168109894, "learning_rate": 0.0001, "loss": 1.5679, "step": 737632 }, { "epoch": 63.63768115942029, "grad_norm": 0.3155617117881775, "learning_rate": 0.0001, "loss": 1.5702, "step": 737688 }, { "epoch": 63.64251207729468, "grad_norm": 0.2707212269306183, "learning_rate": 0.0001, "loss": 1.5683, "step": 737744 }, { "epoch": 63.64734299516908, "grad_norm": 1.1334447860717773, "learning_rate": 0.0001, "loss": 1.5711, "step": 737800 }, { "epoch": 63.65217391304348, "grad_norm": 0.2913053333759308, "learning_rate": 0.0001, "loss": 1.5803, "step": 737856 }, { "epoch": 63.65700483091788, "grad_norm": 2.289947986602783, "learning_rate": 0.0001, "loss": 1.5703, "step": 737912 }, { "epoch": 63.66183574879227, "grad_norm": 0.3003033697605133, "learning_rate": 0.0001, "loss": 1.5727, "step": 737968 }, { "epoch": 63.666666666666664, "grad_norm": 0.7571542263031006, "learning_rate": 0.0001, "loss": 1.572, "step": 738024 }, { "epoch": 63.671497584541065, "grad_norm": 0.4357714354991913, "learning_rate": 0.0001, "loss": 1.5729, "step": 738080 }, { "epoch": 63.67632850241546, "grad_norm": 6.875448226928711, "learning_rate": 0.0001, "loss": 1.5686, "step": 738136 }, { "epoch": 63.68115942028985, "grad_norm": 0.33190369606018066, "learning_rate": 0.0001, "loss": 1.5709, "step": 738192 }, { "epoch": 63.68599033816425, "grad_norm": 0.27044519782066345, "learning_rate": 0.0001, "loss": 1.5709, "step": 738248 }, { "epoch": 63.690821256038646, "grad_norm": 0.37058401107788086, "learning_rate": 0.0001, "loss": 1.5687, "step": 738304 }, { "epoch": 63.69565217391305, "grad_norm": 0.2625899612903595, "learning_rate": 0.0001, "loss": 1.5634, "step": 738360 }, { "epoch": 63.70048309178744, "grad_norm": 0.32220780849456787, "learning_rate": 0.0001, "loss": 1.5688, "step": 738416 }, { "epoch": 63.70531400966183, "grad_norm": 0.8345224857330322, "learning_rate": 0.0001, "loss": 1.5664, "step": 738472 }, { "epoch": 63.710144927536234, "grad_norm": 1.8252661228179932, "learning_rate": 0.0001, "loss": 1.5658, "step": 738528 }, { "epoch": 63.71497584541063, "grad_norm": 0.3183533251285553, "learning_rate": 0.0001, "loss": 1.5729, "step": 738584 }, { "epoch": 63.71980676328502, "grad_norm": 0.24737201631069183, "learning_rate": 0.0001, "loss": 1.5689, "step": 738640 }, { "epoch": 63.72463768115942, "grad_norm": 0.26409950852394104, "learning_rate": 0.0001, "loss": 1.5696, "step": 738696 }, { "epoch": 63.729468599033815, "grad_norm": 0.30818185210227966, "learning_rate": 0.0001, "loss": 1.5588, "step": 738752 }, { "epoch": 63.734299516908216, "grad_norm": 0.32108986377716064, "learning_rate": 0.0001, "loss": 1.5707, "step": 738808 }, { "epoch": 63.73913043478261, "grad_norm": 0.2965286076068878, "learning_rate": 0.0001, "loss": 1.5682, "step": 738864 }, { "epoch": 63.743961352657, "grad_norm": 0.5109328627586365, "learning_rate": 0.0001, "loss": 1.5733, "step": 738920 }, { "epoch": 63.7487922705314, "grad_norm": 0.2801593542098999, "learning_rate": 0.0001, "loss": 1.5679, "step": 738976 }, { "epoch": 63.7536231884058, "grad_norm": 0.2718394994735718, "learning_rate": 0.0001, "loss": 1.5688, "step": 739032 }, { "epoch": 63.75845410628019, "grad_norm": 0.3147476613521576, "learning_rate": 0.0001, "loss": 1.5667, "step": 739088 }, { "epoch": 63.76328502415459, "grad_norm": 0.3560924232006073, "learning_rate": 0.0001, "loss": 1.5717, "step": 739144 }, { "epoch": 63.768115942028984, "grad_norm": 0.7241440415382385, "learning_rate": 0.0001, "loss": 1.5639, "step": 739200 }, { "epoch": 63.772946859903385, "grad_norm": 20.30845069885254, "learning_rate": 0.0001, "loss": 1.5683, "step": 739256 }, { "epoch": 63.77777777777778, "grad_norm": 0.34887170791625977, "learning_rate": 0.0001, "loss": 1.5649, "step": 739312 }, { "epoch": 63.78260869565217, "grad_norm": 0.8283113837242126, "learning_rate": 0.0001, "loss": 1.5762, "step": 739368 }, { "epoch": 63.78743961352657, "grad_norm": 0.25511613488197327, "learning_rate": 0.0001, "loss": 1.5654, "step": 739424 }, { "epoch": 63.792270531400966, "grad_norm": 0.32994627952575684, "learning_rate": 0.0001, "loss": 1.5718, "step": 739480 }, { "epoch": 63.79710144927536, "grad_norm": 0.6589224338531494, "learning_rate": 0.0001, "loss": 1.5696, "step": 739536 }, { "epoch": 63.80193236714976, "grad_norm": 0.39978811144828796, "learning_rate": 0.0001, "loss": 1.575, "step": 739592 }, { "epoch": 63.806763285024154, "grad_norm": 1.2771176099777222, "learning_rate": 0.0001, "loss": 1.5698, "step": 739648 }, { "epoch": 63.81159420289855, "grad_norm": 0.3366304934024811, "learning_rate": 0.0001, "loss": 1.5705, "step": 739704 }, { "epoch": 63.81642512077295, "grad_norm": 0.5164982676506042, "learning_rate": 0.0001, "loss": 1.577, "step": 739760 }, { "epoch": 63.82125603864734, "grad_norm": 0.3213403820991516, "learning_rate": 0.0001, "loss": 1.5721, "step": 739816 }, { "epoch": 63.82608695652174, "grad_norm": 0.7095179557800293, "learning_rate": 0.0001, "loss": 1.5702, "step": 739872 }, { "epoch": 63.830917874396135, "grad_norm": 0.5541601181030273, "learning_rate": 0.0001, "loss": 1.5699, "step": 739928 }, { "epoch": 63.83574879227053, "grad_norm": 0.3014202415943146, "learning_rate": 0.0001, "loss": 1.5693, "step": 739984 }, { "epoch": 63.84057971014493, "grad_norm": 0.25986289978027344, "learning_rate": 0.0001, "loss": 1.5724, "step": 740040 }, { "epoch": 63.84541062801932, "grad_norm": 0.27522605657577515, "learning_rate": 0.0001, "loss": 1.5711, "step": 740096 }, { "epoch": 63.85024154589372, "grad_norm": 0.9252640604972839, "learning_rate": 0.0001, "loss": 1.5718, "step": 740152 }, { "epoch": 63.85507246376812, "grad_norm": 0.5270540714263916, "learning_rate": 0.0001, "loss": 1.5708, "step": 740208 }, { "epoch": 63.85990338164251, "grad_norm": 0.3556101322174072, "learning_rate": 0.0001, "loss": 1.57, "step": 740264 }, { "epoch": 63.86473429951691, "grad_norm": 20.85801887512207, "learning_rate": 0.0001, "loss": 1.572, "step": 740320 }, { "epoch": 63.869565217391305, "grad_norm": 0.3831097185611725, "learning_rate": 0.0001, "loss": 1.5732, "step": 740376 }, { "epoch": 63.8743961352657, "grad_norm": 0.3140772581100464, "learning_rate": 0.0001, "loss": 1.5713, "step": 740432 }, { "epoch": 63.8792270531401, "grad_norm": 0.6439085006713867, "learning_rate": 0.0001, "loss": 1.5783, "step": 740488 }, { "epoch": 63.88405797101449, "grad_norm": 0.2804937958717346, "learning_rate": 0.0001, "loss": 1.5721, "step": 740544 }, { "epoch": 63.888888888888886, "grad_norm": 0.3860592842102051, "learning_rate": 0.0001, "loss": 1.5721, "step": 740600 }, { "epoch": 63.893719806763286, "grad_norm": 0.30732589960098267, "learning_rate": 0.0001, "loss": 1.5737, "step": 740656 }, { "epoch": 63.89855072463768, "grad_norm": 0.2824482023715973, "learning_rate": 0.0001, "loss": 1.5747, "step": 740712 }, { "epoch": 63.90338164251208, "grad_norm": 0.3442060053348541, "learning_rate": 0.0001, "loss": 1.5794, "step": 740768 }, { "epoch": 63.908212560386474, "grad_norm": 0.3591180443763733, "learning_rate": 0.0001, "loss": 1.5731, "step": 740824 }, { "epoch": 63.91304347826087, "grad_norm": 0.22759337723255157, "learning_rate": 0.0001, "loss": 1.571, "step": 740880 }, { "epoch": 63.91787439613527, "grad_norm": 0.2874988317489624, "learning_rate": 0.0001, "loss": 1.5653, "step": 740936 }, { "epoch": 63.92270531400966, "grad_norm": 0.2946223318576813, "learning_rate": 0.0001, "loss": 1.5745, "step": 740992 }, { "epoch": 63.927536231884055, "grad_norm": 0.3354147970676422, "learning_rate": 0.0001, "loss": 1.5757, "step": 741048 }, { "epoch": 63.932367149758456, "grad_norm": 0.22109240293502808, "learning_rate": 0.0001, "loss": 1.5767, "step": 741104 }, { "epoch": 63.93719806763285, "grad_norm": 0.2837904095649719, "learning_rate": 0.0001, "loss": 1.5682, "step": 741160 }, { "epoch": 63.94202898550725, "grad_norm": 0.2658074200153351, "learning_rate": 0.0001, "loss": 1.5693, "step": 741216 }, { "epoch": 63.94685990338164, "grad_norm": 0.2656523883342743, "learning_rate": 0.0001, "loss": 1.5662, "step": 741272 }, { "epoch": 63.95169082125604, "grad_norm": 0.5256099104881287, "learning_rate": 0.0001, "loss": 1.5718, "step": 741328 }, { "epoch": 63.95652173913044, "grad_norm": 0.2789287865161896, "learning_rate": 0.0001, "loss": 1.5797, "step": 741384 }, { "epoch": 63.96135265700483, "grad_norm": 4.384358882904053, "learning_rate": 0.0001, "loss": 1.573, "step": 741440 }, { "epoch": 63.966183574879224, "grad_norm": 6.135206699371338, "learning_rate": 0.0001, "loss": 1.5668, "step": 741496 }, { "epoch": 63.971014492753625, "grad_norm": 0.27514174580574036, "learning_rate": 0.0001, "loss": 1.5707, "step": 741552 }, { "epoch": 63.97584541062802, "grad_norm": 0.2813592851161957, "learning_rate": 0.0001, "loss": 1.5722, "step": 741608 }, { "epoch": 63.98067632850242, "grad_norm": 0.43265318870544434, "learning_rate": 0.0001, "loss": 1.5672, "step": 741664 }, { "epoch": 63.98550724637681, "grad_norm": 18.640073776245117, "learning_rate": 0.0001, "loss": 1.5686, "step": 741720 }, { "epoch": 63.990338164251206, "grad_norm": 0.29791995882987976, "learning_rate": 0.0001, "loss": 1.5731, "step": 741776 }, { "epoch": 63.99516908212561, "grad_norm": 0.36086925864219666, "learning_rate": 0.0001, "loss": 1.5694, "step": 741832 }, { "epoch": 64.0, "grad_norm": 0.3329020142555237, "learning_rate": 0.0001, "loss": 1.5661, "step": 741888 }, { "epoch": 64.0048309178744, "grad_norm": 0.3498096466064453, "learning_rate": 0.0001, "loss": 1.5656, "step": 741944 }, { "epoch": 64.00966183574879, "grad_norm": 0.2952572703361511, "learning_rate": 0.0001, "loss": 1.5594, "step": 742000 }, { "epoch": 64.01449275362319, "grad_norm": 0.33379441499710083, "learning_rate": 0.0001, "loss": 1.5672, "step": 742056 }, { "epoch": 64.01932367149759, "grad_norm": 0.3823011517524719, "learning_rate": 0.0001, "loss": 1.5653, "step": 742112 }, { "epoch": 64.02415458937197, "grad_norm": 0.40923020243644714, "learning_rate": 0.0001, "loss": 1.5601, "step": 742168 }, { "epoch": 64.02898550724638, "grad_norm": 1.0744661092758179, "learning_rate": 0.0001, "loss": 1.5559, "step": 742224 }, { "epoch": 64.03381642512078, "grad_norm": 0.3164548873901367, "learning_rate": 0.0001, "loss": 1.568, "step": 742280 }, { "epoch": 64.03864734299516, "grad_norm": 0.24004055559635162, "learning_rate": 0.0001, "loss": 1.5695, "step": 742336 }, { "epoch": 64.04347826086956, "grad_norm": 0.2812853157520294, "learning_rate": 0.0001, "loss": 1.5617, "step": 742392 }, { "epoch": 64.04830917874396, "grad_norm": 0.2608727514743805, "learning_rate": 0.0001, "loss": 1.567, "step": 742448 }, { "epoch": 64.05314009661836, "grad_norm": 0.26380518078804016, "learning_rate": 0.0001, "loss": 1.5619, "step": 742504 }, { "epoch": 64.05797101449275, "grad_norm": 0.2515946328639984, "learning_rate": 0.0001, "loss": 1.5727, "step": 742560 }, { "epoch": 64.06280193236715, "grad_norm": 0.3291115164756775, "learning_rate": 0.0001, "loss": 1.5669, "step": 742616 }, { "epoch": 64.06763285024155, "grad_norm": 0.24239470064640045, "learning_rate": 0.0001, "loss": 1.5621, "step": 742672 }, { "epoch": 64.07246376811594, "grad_norm": 0.2954666316509247, "learning_rate": 0.0001, "loss": 1.5647, "step": 742728 }, { "epoch": 64.07729468599034, "grad_norm": 0.3083321154117584, "learning_rate": 0.0001, "loss": 1.5625, "step": 742784 }, { "epoch": 64.08212560386474, "grad_norm": 0.24324579536914825, "learning_rate": 0.0001, "loss": 1.5673, "step": 742840 }, { "epoch": 64.08695652173913, "grad_norm": 0.21713155508041382, "learning_rate": 0.0001, "loss": 1.5633, "step": 742896 }, { "epoch": 64.09178743961353, "grad_norm": 0.32253918051719666, "learning_rate": 0.0001, "loss": 1.5728, "step": 742952 }, { "epoch": 64.09661835748793, "grad_norm": 0.4811483919620514, "learning_rate": 0.0001, "loss": 1.562, "step": 743008 }, { "epoch": 64.10144927536231, "grad_norm": 0.264586865901947, "learning_rate": 0.0001, "loss": 1.5642, "step": 743064 }, { "epoch": 64.10628019323671, "grad_norm": 0.4195355176925659, "learning_rate": 0.0001, "loss": 1.5704, "step": 743120 }, { "epoch": 64.11111111111111, "grad_norm": 0.3176027238368988, "learning_rate": 0.0001, "loss": 1.5661, "step": 743176 }, { "epoch": 64.1159420289855, "grad_norm": 0.2815641760826111, "learning_rate": 0.0001, "loss": 1.5737, "step": 743232 }, { "epoch": 64.1207729468599, "grad_norm": 0.28773167729377747, "learning_rate": 0.0001, "loss": 1.566, "step": 743288 }, { "epoch": 64.1256038647343, "grad_norm": 0.2503811717033386, "learning_rate": 0.0001, "loss": 1.5687, "step": 743344 }, { "epoch": 64.1304347826087, "grad_norm": 0.25287070870399475, "learning_rate": 0.0001, "loss": 1.5619, "step": 743400 }, { "epoch": 64.13526570048309, "grad_norm": 0.2678529620170593, "learning_rate": 0.0001, "loss": 1.5698, "step": 743456 }, { "epoch": 64.14009661835749, "grad_norm": 0.25955694913864136, "learning_rate": 0.0001, "loss": 1.5654, "step": 743512 }, { "epoch": 64.14492753623189, "grad_norm": 0.3534862995147705, "learning_rate": 0.0001, "loss": 1.5663, "step": 743568 }, { "epoch": 64.14975845410628, "grad_norm": 0.3846474885940552, "learning_rate": 0.0001, "loss": 1.5683, "step": 743624 }, { "epoch": 64.15458937198068, "grad_norm": 0.4261574447154999, "learning_rate": 0.0001, "loss": 1.5667, "step": 743680 }, { "epoch": 64.15942028985508, "grad_norm": 0.30949336290359497, "learning_rate": 0.0001, "loss": 1.5712, "step": 743736 }, { "epoch": 64.16425120772946, "grad_norm": 0.4109453558921814, "learning_rate": 0.0001, "loss": 1.565, "step": 743792 }, { "epoch": 64.16908212560386, "grad_norm": 0.2533850073814392, "learning_rate": 0.0001, "loss": 1.5685, "step": 743848 }, { "epoch": 64.17391304347827, "grad_norm": 0.2874692380428314, "learning_rate": 0.0001, "loss": 1.5647, "step": 743904 }, { "epoch": 64.17874396135265, "grad_norm": 0.30741870403289795, "learning_rate": 0.0001, "loss": 1.5578, "step": 743960 }, { "epoch": 64.18357487922705, "grad_norm": 26.571155548095703, "learning_rate": 0.0001, "loss": 1.5712, "step": 744016 }, { "epoch": 64.18840579710145, "grad_norm": 0.26519593596458435, "learning_rate": 0.0001, "loss": 1.565, "step": 744072 }, { "epoch": 64.19323671497584, "grad_norm": 0.3722441792488098, "learning_rate": 0.0001, "loss": 1.5718, "step": 744128 }, { "epoch": 64.19806763285024, "grad_norm": 0.3988434970378876, "learning_rate": 0.0001, "loss": 1.5676, "step": 744184 }, { "epoch": 64.20289855072464, "grad_norm": 0.37628892064094543, "learning_rate": 0.0001, "loss": 1.5681, "step": 744240 }, { "epoch": 64.20772946859903, "grad_norm": 1.0867174863815308, "learning_rate": 0.0001, "loss": 1.5617, "step": 744296 }, { "epoch": 64.21256038647343, "grad_norm": 0.3649153709411621, "learning_rate": 0.0001, "loss": 1.5672, "step": 744352 }, { "epoch": 64.21739130434783, "grad_norm": 0.27044451236724854, "learning_rate": 0.0001, "loss": 1.5596, "step": 744408 }, { "epoch": 64.22222222222223, "grad_norm": 0.26587337255477905, "learning_rate": 0.0001, "loss": 1.5653, "step": 744464 }, { "epoch": 64.22705314009661, "grad_norm": 0.29724836349487305, "learning_rate": 0.0001, "loss": 1.5677, "step": 744520 }, { "epoch": 64.23188405797102, "grad_norm": 5.395325183868408, "learning_rate": 0.0001, "loss": 1.5645, "step": 744576 }, { "epoch": 64.23671497584542, "grad_norm": 0.35108792781829834, "learning_rate": 0.0001, "loss": 1.5658, "step": 744632 }, { "epoch": 64.2415458937198, "grad_norm": 0.2865370810031891, "learning_rate": 0.0001, "loss": 1.5701, "step": 744688 }, { "epoch": 64.2463768115942, "grad_norm": 0.2518458664417267, "learning_rate": 0.0001, "loss": 1.56, "step": 744744 }, { "epoch": 64.2512077294686, "grad_norm": 0.3438859283924103, "learning_rate": 0.0001, "loss": 1.5738, "step": 744800 }, { "epoch": 64.25603864734299, "grad_norm": 0.2862623333930969, "learning_rate": 0.0001, "loss": 1.5615, "step": 744856 }, { "epoch": 64.26086956521739, "grad_norm": 0.2987564206123352, "learning_rate": 0.0001, "loss": 1.5632, "step": 744912 }, { "epoch": 64.26570048309179, "grad_norm": 0.32283860445022583, "learning_rate": 0.0001, "loss": 1.5636, "step": 744968 }, { "epoch": 64.27053140096618, "grad_norm": 0.25756344199180603, "learning_rate": 0.0001, "loss": 1.5665, "step": 745024 }, { "epoch": 64.27536231884058, "grad_norm": 0.408292293548584, "learning_rate": 0.0001, "loss": 1.567, "step": 745080 }, { "epoch": 64.28019323671498, "grad_norm": 0.2927461266517639, "learning_rate": 0.0001, "loss": 1.5626, "step": 745136 }, { "epoch": 64.28502415458937, "grad_norm": 0.23118941485881805, "learning_rate": 0.0001, "loss": 1.5624, "step": 745192 }, { "epoch": 64.28985507246377, "grad_norm": 0.37464773654937744, "learning_rate": 0.0001, "loss": 1.5616, "step": 745248 }, { "epoch": 64.29468599033817, "grad_norm": 1.9837226867675781, "learning_rate": 0.0001, "loss": 1.563, "step": 745304 }, { "epoch": 64.29951690821257, "grad_norm": 0.26776793599128723, "learning_rate": 0.0001, "loss": 1.5645, "step": 745360 }, { "epoch": 64.30434782608695, "grad_norm": 0.4233185350894928, "learning_rate": 0.0001, "loss": 1.5694, "step": 745416 }, { "epoch": 64.30917874396135, "grad_norm": 0.29308998584747314, "learning_rate": 0.0001, "loss": 1.5639, "step": 745472 }, { "epoch": 64.31400966183575, "grad_norm": 0.34067434072494507, "learning_rate": 0.0001, "loss": 1.5672, "step": 745528 }, { "epoch": 64.31884057971014, "grad_norm": 12.765238761901855, "learning_rate": 0.0001, "loss": 1.5703, "step": 745584 }, { "epoch": 64.32367149758454, "grad_norm": 0.3085225820541382, "learning_rate": 0.0001, "loss": 1.5665, "step": 745640 }, { "epoch": 64.32850241545894, "grad_norm": 0.259596049785614, "learning_rate": 0.0001, "loss": 1.5697, "step": 745696 }, { "epoch": 64.33333333333333, "grad_norm": 0.25375524163246155, "learning_rate": 0.0001, "loss": 1.5616, "step": 745752 }, { "epoch": 64.33816425120773, "grad_norm": 0.2916201651096344, "learning_rate": 0.0001, "loss": 1.5653, "step": 745808 }, { "epoch": 64.34299516908213, "grad_norm": 0.3049803078174591, "learning_rate": 0.0001, "loss": 1.5694, "step": 745864 }, { "epoch": 64.34782608695652, "grad_norm": 0.2501409947872162, "learning_rate": 0.0001, "loss": 1.5691, "step": 745920 }, { "epoch": 64.35265700483092, "grad_norm": 1.0479083061218262, "learning_rate": 0.0001, "loss": 1.5627, "step": 745976 }, { "epoch": 64.35748792270532, "grad_norm": 0.46135058999061584, "learning_rate": 0.0001, "loss": 1.575, "step": 746032 }, { "epoch": 64.3623188405797, "grad_norm": 0.37425950169563293, "learning_rate": 0.0001, "loss": 1.5625, "step": 746088 }, { "epoch": 64.3671497584541, "grad_norm": 0.3259718418121338, "learning_rate": 0.0001, "loss": 1.5638, "step": 746144 }, { "epoch": 64.3719806763285, "grad_norm": 0.3488105535507202, "learning_rate": 0.0001, "loss": 1.5672, "step": 746200 }, { "epoch": 64.3768115942029, "grad_norm": 0.37250933051109314, "learning_rate": 0.0001, "loss": 1.5729, "step": 746256 }, { "epoch": 64.38164251207729, "grad_norm": 0.2919609546661377, "learning_rate": 0.0001, "loss": 1.5631, "step": 746312 }, { "epoch": 64.38647342995169, "grad_norm": 0.3266714811325073, "learning_rate": 0.0001, "loss": 1.568, "step": 746368 }, { "epoch": 64.3913043478261, "grad_norm": 0.2407822459936142, "learning_rate": 0.0001, "loss": 1.563, "step": 746424 }, { "epoch": 64.39613526570048, "grad_norm": 0.49791550636291504, "learning_rate": 0.0001, "loss": 1.5737, "step": 746480 }, { "epoch": 64.40096618357488, "grad_norm": 0.27864712476730347, "learning_rate": 0.0001, "loss": 1.5728, "step": 746536 }, { "epoch": 64.40579710144928, "grad_norm": 0.3841218650341034, "learning_rate": 0.0001, "loss": 1.5685, "step": 746592 }, { "epoch": 64.41062801932367, "grad_norm": 0.25327828526496887, "learning_rate": 0.0001, "loss": 1.5669, "step": 746648 }, { "epoch": 64.41545893719807, "grad_norm": 0.4484589695930481, "learning_rate": 0.0001, "loss": 1.5691, "step": 746704 }, { "epoch": 64.42028985507247, "grad_norm": 0.46656733751296997, "learning_rate": 0.0001, "loss": 1.565, "step": 746760 }, { "epoch": 64.42512077294685, "grad_norm": 0.2791946232318878, "learning_rate": 0.0001, "loss": 1.571, "step": 746816 }, { "epoch": 64.42995169082126, "grad_norm": 0.2750720977783203, "learning_rate": 0.0001, "loss": 1.5724, "step": 746872 }, { "epoch": 64.43478260869566, "grad_norm": 0.2912481129169464, "learning_rate": 0.0001, "loss": 1.5615, "step": 746928 }, { "epoch": 64.43961352657004, "grad_norm": 0.8285535573959351, "learning_rate": 0.0001, "loss": 1.5703, "step": 746984 }, { "epoch": 64.44444444444444, "grad_norm": 0.2747577428817749, "learning_rate": 0.0001, "loss": 1.5718, "step": 747040 }, { "epoch": 64.44927536231884, "grad_norm": 0.6413703560829163, "learning_rate": 0.0001, "loss": 1.569, "step": 747096 }, { "epoch": 64.45410628019323, "grad_norm": 0.29654955863952637, "learning_rate": 0.0001, "loss": 1.568, "step": 747152 }, { "epoch": 64.45893719806763, "grad_norm": 0.3709215521812439, "learning_rate": 0.0001, "loss": 1.5635, "step": 747208 }, { "epoch": 64.46376811594203, "grad_norm": 1.9019427299499512, "learning_rate": 0.0001, "loss": 1.5615, "step": 747264 }, { "epoch": 64.46859903381643, "grad_norm": 0.29361867904663086, "learning_rate": 0.0001, "loss": 1.5739, "step": 747320 }, { "epoch": 64.47342995169082, "grad_norm": 0.28778448700904846, "learning_rate": 0.0001, "loss": 1.5729, "step": 747376 }, { "epoch": 64.47826086956522, "grad_norm": 0.29585957527160645, "learning_rate": 0.0001, "loss": 1.5649, "step": 747432 }, { "epoch": 64.48309178743962, "grad_norm": 0.4334616959095001, "learning_rate": 0.0001, "loss": 1.5645, "step": 747488 }, { "epoch": 64.487922705314, "grad_norm": 0.23616832494735718, "learning_rate": 0.0001, "loss": 1.5721, "step": 747544 }, { "epoch": 64.4927536231884, "grad_norm": 0.2677697241306305, "learning_rate": 0.0001, "loss": 1.5677, "step": 747600 }, { "epoch": 64.4975845410628, "grad_norm": 0.2460099458694458, "learning_rate": 0.0001, "loss": 1.5693, "step": 747656 }, { "epoch": 64.5024154589372, "grad_norm": 0.28257009387016296, "learning_rate": 0.0001, "loss": 1.5747, "step": 747712 }, { "epoch": 64.5072463768116, "grad_norm": 1.187709927558899, "learning_rate": 0.0001, "loss": 1.577, "step": 747768 }, { "epoch": 64.512077294686, "grad_norm": 0.3981636166572571, "learning_rate": 0.0001, "loss": 1.5652, "step": 747824 }, { "epoch": 64.51690821256038, "grad_norm": 0.2293919026851654, "learning_rate": 0.0001, "loss": 1.5722, "step": 747880 }, { "epoch": 64.52173913043478, "grad_norm": 0.25797998905181885, "learning_rate": 0.0001, "loss": 1.5744, "step": 747936 }, { "epoch": 64.52657004830918, "grad_norm": 0.24310515820980072, "learning_rate": 0.0001, "loss": 1.568, "step": 747992 }, { "epoch": 64.53140096618357, "grad_norm": 0.36769258975982666, "learning_rate": 0.0001, "loss": 1.5659, "step": 748048 }, { "epoch": 64.53623188405797, "grad_norm": 0.26201292872428894, "learning_rate": 0.0001, "loss": 1.5634, "step": 748104 }, { "epoch": 64.54106280193237, "grad_norm": 0.2488633394241333, "learning_rate": 0.0001, "loss": 1.5627, "step": 748160 }, { "epoch": 64.54589371980677, "grad_norm": 0.3997940719127655, "learning_rate": 0.0001, "loss": 1.562, "step": 748216 }, { "epoch": 64.55072463768116, "grad_norm": 0.3789123594760895, "learning_rate": 0.0001, "loss": 1.5705, "step": 748272 }, { "epoch": 64.55555555555556, "grad_norm": 2.1552975177764893, "learning_rate": 0.0001, "loss": 1.5615, "step": 748328 }, { "epoch": 64.56038647342996, "grad_norm": 0.295379638671875, "learning_rate": 0.0001, "loss": 1.5569, "step": 748384 }, { "epoch": 64.56521739130434, "grad_norm": 0.35163938999176025, "learning_rate": 0.0001, "loss": 1.5658, "step": 748440 }, { "epoch": 64.57004830917874, "grad_norm": 0.3130193054676056, "learning_rate": 0.0001, "loss": 1.5668, "step": 748496 }, { "epoch": 64.57487922705315, "grad_norm": 0.3306048810482025, "learning_rate": 0.0001, "loss": 1.5645, "step": 748552 }, { "epoch": 64.57971014492753, "grad_norm": 0.24234512448310852, "learning_rate": 0.0001, "loss": 1.568, "step": 748608 }, { "epoch": 64.58454106280193, "grad_norm": 0.4807204008102417, "learning_rate": 0.0001, "loss": 1.5646, "step": 748664 }, { "epoch": 64.58937198067633, "grad_norm": 0.2645256221294403, "learning_rate": 0.0001, "loss": 1.5712, "step": 748720 }, { "epoch": 64.59420289855072, "grad_norm": 0.3527630865573883, "learning_rate": 0.0001, "loss": 1.5618, "step": 748776 }, { "epoch": 64.59903381642512, "grad_norm": 0.3257485628128052, "learning_rate": 0.0001, "loss": 1.5692, "step": 748832 }, { "epoch": 64.60386473429952, "grad_norm": 0.3806562125682831, "learning_rate": 0.0001, "loss": 1.5669, "step": 748888 }, { "epoch": 64.6086956521739, "grad_norm": 0.28791698813438416, "learning_rate": 0.0001, "loss": 1.5643, "step": 748944 }, { "epoch": 64.61352657004831, "grad_norm": 0.2506769895553589, "learning_rate": 0.0001, "loss": 1.5708, "step": 749000 }, { "epoch": 64.61835748792271, "grad_norm": 0.32007282972335815, "learning_rate": 0.0001, "loss": 1.5705, "step": 749056 }, { "epoch": 64.6231884057971, "grad_norm": 0.267305463552475, "learning_rate": 0.0001, "loss": 1.5692, "step": 749112 }, { "epoch": 64.6280193236715, "grad_norm": 0.33030202984809875, "learning_rate": 0.0001, "loss": 1.5647, "step": 749168 }, { "epoch": 64.6328502415459, "grad_norm": 0.2548978328704834, "learning_rate": 0.0001, "loss": 1.5674, "step": 749224 }, { "epoch": 64.6376811594203, "grad_norm": 0.6835223436355591, "learning_rate": 0.0001, "loss": 1.5676, "step": 749280 }, { "epoch": 64.64251207729468, "grad_norm": 0.2773696184158325, "learning_rate": 0.0001, "loss": 1.5722, "step": 749336 }, { "epoch": 64.64734299516908, "grad_norm": 0.253262460231781, "learning_rate": 0.0001, "loss": 1.5686, "step": 749392 }, { "epoch": 64.65217391304348, "grad_norm": 10.12911319732666, "learning_rate": 0.0001, "loss": 1.5688, "step": 749448 }, { "epoch": 64.65700483091787, "grad_norm": 0.2590923309326172, "learning_rate": 0.0001, "loss": 1.5651, "step": 749504 }, { "epoch": 64.66183574879227, "grad_norm": 0.4954107701778412, "learning_rate": 0.0001, "loss": 1.5709, "step": 749560 }, { "epoch": 64.66666666666667, "grad_norm": 0.23425675928592682, "learning_rate": 0.0001, "loss": 1.5746, "step": 749616 }, { "epoch": 64.67149758454106, "grad_norm": 0.2735257148742676, "learning_rate": 0.0001, "loss": 1.5692, "step": 749672 }, { "epoch": 64.67632850241546, "grad_norm": 0.37387508153915405, "learning_rate": 0.0001, "loss": 1.5585, "step": 749728 }, { "epoch": 64.68115942028986, "grad_norm": 0.32058435678482056, "learning_rate": 0.0001, "loss": 1.5713, "step": 749784 }, { "epoch": 64.68599033816425, "grad_norm": 0.34344765543937683, "learning_rate": 0.0001, "loss": 1.5587, "step": 749840 }, { "epoch": 64.69082125603865, "grad_norm": 0.28823211789131165, "learning_rate": 0.0001, "loss": 1.5707, "step": 749896 }, { "epoch": 64.69565217391305, "grad_norm": 0.476360023021698, "learning_rate": 0.0001, "loss": 1.5701, "step": 749952 }, { "epoch": 64.70048309178743, "grad_norm": 0.7747220396995544, "learning_rate": 0.0001, "loss": 1.5719, "step": 750008 }, { "epoch": 64.70531400966183, "grad_norm": 0.37567463517189026, "learning_rate": 0.0001, "loss": 1.5719, "step": 750064 }, { "epoch": 64.71014492753623, "grad_norm": 0.3014237880706787, "learning_rate": 0.0001, "loss": 1.5691, "step": 750120 }, { "epoch": 64.71497584541063, "grad_norm": 0.2723066210746765, "learning_rate": 0.0001, "loss": 1.5777, "step": 750176 }, { "epoch": 64.71980676328502, "grad_norm": 0.30208149552345276, "learning_rate": 0.0001, "loss": 1.5716, "step": 750232 }, { "epoch": 64.72463768115942, "grad_norm": 0.26470687985420227, "learning_rate": 0.0001, "loss": 1.5621, "step": 750288 }, { "epoch": 64.72946859903382, "grad_norm": 1.0270676612854004, "learning_rate": 0.0001, "loss": 1.5637, "step": 750344 }, { "epoch": 64.73429951690821, "grad_norm": 0.2945767939090729, "learning_rate": 0.0001, "loss": 1.5679, "step": 750400 }, { "epoch": 64.73913043478261, "grad_norm": 0.28738638758659363, "learning_rate": 0.0001, "loss": 1.5698, "step": 750456 }, { "epoch": 64.74396135265701, "grad_norm": 1.222340703010559, "learning_rate": 0.0001, "loss": 1.5741, "step": 750512 }, { "epoch": 64.7487922705314, "grad_norm": 0.37524712085723877, "learning_rate": 0.0001, "loss": 1.5706, "step": 750568 }, { "epoch": 64.7536231884058, "grad_norm": 0.27672144770622253, "learning_rate": 0.0001, "loss": 1.5727, "step": 750624 }, { "epoch": 64.7584541062802, "grad_norm": 0.31923967599868774, "learning_rate": 0.0001, "loss": 1.5607, "step": 750680 }, { "epoch": 64.76328502415458, "grad_norm": 0.22505097091197968, "learning_rate": 0.0001, "loss": 1.5601, "step": 750736 }, { "epoch": 64.76811594202898, "grad_norm": 0.8988355398178101, "learning_rate": 0.0001, "loss": 1.5692, "step": 750792 }, { "epoch": 64.77294685990339, "grad_norm": 1.7415374517440796, "learning_rate": 0.0001, "loss": 1.5733, "step": 750848 }, { "epoch": 64.77777777777777, "grad_norm": 0.23996078968048096, "learning_rate": 0.0001, "loss": 1.5659, "step": 750904 }, { "epoch": 64.78260869565217, "grad_norm": 0.3895362913608551, "learning_rate": 0.0001, "loss": 1.5739, "step": 750960 }, { "epoch": 64.78743961352657, "grad_norm": 0.4408572316169739, "learning_rate": 0.0001, "loss": 1.5641, "step": 751016 }, { "epoch": 64.79227053140096, "grad_norm": 3.1274659633636475, "learning_rate": 0.0001, "loss": 1.5636, "step": 751072 }, { "epoch": 64.79710144927536, "grad_norm": 1.385522723197937, "learning_rate": 0.0001, "loss": 1.5727, "step": 751128 }, { "epoch": 64.80193236714976, "grad_norm": 0.2570280134677887, "learning_rate": 0.0001, "loss": 1.5736, "step": 751184 }, { "epoch": 64.80676328502416, "grad_norm": 0.38472673296928406, "learning_rate": 0.0001, "loss": 1.5712, "step": 751240 }, { "epoch": 64.81159420289855, "grad_norm": 0.26611942052841187, "learning_rate": 0.0001, "loss": 1.566, "step": 751296 }, { "epoch": 64.81642512077295, "grad_norm": 0.3763500154018402, "learning_rate": 0.0001, "loss": 1.5748, "step": 751352 }, { "epoch": 64.82125603864735, "grad_norm": 0.27065035700798035, "learning_rate": 0.0001, "loss": 1.5708, "step": 751408 }, { "epoch": 64.82608695652173, "grad_norm": 0.3223312795162201, "learning_rate": 0.0001, "loss": 1.5739, "step": 751464 }, { "epoch": 64.83091787439614, "grad_norm": 0.30655017495155334, "learning_rate": 0.0001, "loss": 1.5737, "step": 751520 }, { "epoch": 64.83574879227054, "grad_norm": 0.3576255440711975, "learning_rate": 0.0001, "loss": 1.5688, "step": 751576 }, { "epoch": 64.84057971014492, "grad_norm": 0.2659268081188202, "learning_rate": 0.0001, "loss": 1.5731, "step": 751632 }, { "epoch": 64.84541062801932, "grad_norm": 0.31237339973449707, "learning_rate": 0.0001, "loss": 1.5685, "step": 751688 }, { "epoch": 64.85024154589372, "grad_norm": 0.2606436014175415, "learning_rate": 0.0001, "loss": 1.5725, "step": 751744 }, { "epoch": 64.85507246376811, "grad_norm": 0.3091509938240051, "learning_rate": 0.0001, "loss": 1.5725, "step": 751800 }, { "epoch": 64.85990338164251, "grad_norm": 0.2831288278102875, "learning_rate": 0.0001, "loss": 1.5728, "step": 751856 }, { "epoch": 64.86473429951691, "grad_norm": 0.3455502688884735, "learning_rate": 0.0001, "loss": 1.5713, "step": 751912 }, { "epoch": 64.8695652173913, "grad_norm": 0.5452723503112793, "learning_rate": 0.0001, "loss": 1.5698, "step": 751968 }, { "epoch": 64.8743961352657, "grad_norm": 0.7126954793930054, "learning_rate": 0.0001, "loss": 1.5692, "step": 752024 }, { "epoch": 64.8792270531401, "grad_norm": 0.7290889024734497, "learning_rate": 0.0001, "loss": 1.5759, "step": 752080 }, { "epoch": 64.8840579710145, "grad_norm": 0.36290356516838074, "learning_rate": 0.0001, "loss": 1.5705, "step": 752136 }, { "epoch": 64.88888888888889, "grad_norm": 0.24656616151332855, "learning_rate": 0.0001, "loss": 1.5751, "step": 752192 }, { "epoch": 64.89371980676329, "grad_norm": 0.27369144558906555, "learning_rate": 0.0001, "loss": 1.5669, "step": 752248 }, { "epoch": 64.89855072463769, "grad_norm": 0.23196829855442047, "learning_rate": 0.0001, "loss": 1.5655, "step": 752304 }, { "epoch": 64.90338164251207, "grad_norm": 0.5586740374565125, "learning_rate": 0.0001, "loss": 1.5718, "step": 752360 }, { "epoch": 64.90821256038647, "grad_norm": 5.1856303215026855, "learning_rate": 0.0001, "loss": 1.5736, "step": 752416 }, { "epoch": 64.91304347826087, "grad_norm": 0.23558734357357025, "learning_rate": 0.0001, "loss": 1.5694, "step": 752472 }, { "epoch": 64.91787439613526, "grad_norm": 0.29972147941589355, "learning_rate": 0.0001, "loss": 1.5695, "step": 752528 }, { "epoch": 64.92270531400966, "grad_norm": 0.345343679189682, "learning_rate": 0.0001, "loss": 1.5673, "step": 752584 }, { "epoch": 64.92753623188406, "grad_norm": 0.2713644206523895, "learning_rate": 0.0001, "loss": 1.5686, "step": 752640 }, { "epoch": 64.93236714975845, "grad_norm": 0.28017306327819824, "learning_rate": 0.0001, "loss": 1.5708, "step": 752696 }, { "epoch": 64.93719806763285, "grad_norm": 1.2060009241104126, "learning_rate": 0.0001, "loss": 1.5724, "step": 752752 }, { "epoch": 64.94202898550725, "grad_norm": 0.2323039472103119, "learning_rate": 0.0001, "loss": 1.5644, "step": 752808 }, { "epoch": 64.94685990338164, "grad_norm": 0.3144837021827698, "learning_rate": 0.0001, "loss": 1.5743, "step": 752864 }, { "epoch": 64.95169082125604, "grad_norm": 0.2886348068714142, "learning_rate": 0.0001, "loss": 1.5687, "step": 752920 }, { "epoch": 64.95652173913044, "grad_norm": 0.30109214782714844, "learning_rate": 0.0001, "loss": 1.5662, "step": 752976 }, { "epoch": 64.96135265700484, "grad_norm": 3.9912731647491455, "learning_rate": 0.0001, "loss": 1.5707, "step": 753032 }, { "epoch": 64.96618357487922, "grad_norm": 0.4060533344745636, "learning_rate": 0.0001, "loss": 1.57, "step": 753088 }, { "epoch": 64.97101449275362, "grad_norm": 0.37225571274757385, "learning_rate": 0.0001, "loss": 1.5697, "step": 753144 }, { "epoch": 64.97584541062803, "grad_norm": 0.2739839553833008, "learning_rate": 0.0001, "loss": 1.5708, "step": 753200 }, { "epoch": 64.98067632850241, "grad_norm": 1.4083092212677002, "learning_rate": 0.0001, "loss": 1.577, "step": 753256 }, { "epoch": 64.98550724637681, "grad_norm": 0.47691023349761963, "learning_rate": 0.0001, "loss": 1.5693, "step": 753312 }, { "epoch": 64.99033816425121, "grad_norm": 0.3300624489784241, "learning_rate": 0.0001, "loss": 1.5668, "step": 753368 }, { "epoch": 64.9951690821256, "grad_norm": 0.848706066608429, "learning_rate": 0.0001, "loss": 1.5737, "step": 753424 }, { "epoch": 65.0, "grad_norm": 0.25603339076042175, "learning_rate": 0.0001, "loss": 1.5762, "step": 753480 }, { "epoch": 65.0048309178744, "grad_norm": 0.321577787399292, "learning_rate": 0.0001, "loss": 1.5632, "step": 753536 }, { "epoch": 65.00966183574879, "grad_norm": 0.3302834630012512, "learning_rate": 0.0001, "loss": 1.5651, "step": 753592 }, { "epoch": 65.01449275362319, "grad_norm": 2.662656784057617, "learning_rate": 0.0001, "loss": 1.5623, "step": 753648 }, { "epoch": 65.01932367149759, "grad_norm": 0.3603155314922333, "learning_rate": 0.0001, "loss": 1.5664, "step": 753704 }, { "epoch": 65.02415458937197, "grad_norm": 0.2728820741176605, "learning_rate": 0.0001, "loss": 1.562, "step": 753760 }, { "epoch": 65.02898550724638, "grad_norm": 0.2829191982746124, "learning_rate": 0.0001, "loss": 1.5663, "step": 753816 }, { "epoch": 65.03381642512078, "grad_norm": 0.30690792202949524, "learning_rate": 0.0001, "loss": 1.5675, "step": 753872 }, { "epoch": 65.03864734299516, "grad_norm": 0.5989208221435547, "learning_rate": 0.0001, "loss": 1.5631, "step": 753928 }, { "epoch": 65.04347826086956, "grad_norm": 0.28339630365371704, "learning_rate": 0.0001, "loss": 1.5712, "step": 753984 }, { "epoch": 65.04830917874396, "grad_norm": 0.8489905595779419, "learning_rate": 0.0001, "loss": 1.5614, "step": 754040 }, { "epoch": 65.05314009661836, "grad_norm": 0.41959378123283386, "learning_rate": 0.0001, "loss": 1.5725, "step": 754096 }, { "epoch": 65.05797101449275, "grad_norm": 0.26918864250183105, "learning_rate": 0.0001, "loss": 1.5696, "step": 754152 }, { "epoch": 65.06280193236715, "grad_norm": 0.2494177371263504, "learning_rate": 0.0001, "loss": 1.5671, "step": 754208 }, { "epoch": 65.06763285024155, "grad_norm": 0.2502475678920746, "learning_rate": 0.0001, "loss": 1.5647, "step": 754264 }, { "epoch": 65.07246376811594, "grad_norm": 0.2533220052719116, "learning_rate": 0.0001, "loss": 1.5654, "step": 754320 }, { "epoch": 65.07729468599034, "grad_norm": 0.32272613048553467, "learning_rate": 0.0001, "loss": 1.5704, "step": 754376 }, { "epoch": 65.08212560386474, "grad_norm": 0.33496391773223877, "learning_rate": 0.0001, "loss": 1.5638, "step": 754432 }, { "epoch": 65.08695652173913, "grad_norm": 0.3891160190105438, "learning_rate": 0.0001, "loss": 1.5572, "step": 754488 }, { "epoch": 65.09178743961353, "grad_norm": 0.3485589027404785, "learning_rate": 0.0001, "loss": 1.5654, "step": 754544 }, { "epoch": 65.09661835748793, "grad_norm": 0.36372697353363037, "learning_rate": 0.0001, "loss": 1.567, "step": 754600 }, { "epoch": 65.10144927536231, "grad_norm": 0.35620468854904175, "learning_rate": 0.0001, "loss": 1.5578, "step": 754656 }, { "epoch": 65.10628019323671, "grad_norm": 1.0777480602264404, "learning_rate": 0.0001, "loss": 1.5693, "step": 754712 }, { "epoch": 65.11111111111111, "grad_norm": 0.4588122069835663, "learning_rate": 0.0001, "loss": 1.5562, "step": 754768 }, { "epoch": 65.1159420289855, "grad_norm": 1.148769736289978, "learning_rate": 0.0001, "loss": 1.5648, "step": 754824 }, { "epoch": 65.1207729468599, "grad_norm": 22.41375160217285, "learning_rate": 0.0001, "loss": 1.5639, "step": 754880 }, { "epoch": 65.1256038647343, "grad_norm": 1.667398452758789, "learning_rate": 0.0001, "loss": 1.5633, "step": 754936 }, { "epoch": 65.1304347826087, "grad_norm": 0.763707160949707, "learning_rate": 0.0001, "loss": 1.5623, "step": 754992 }, { "epoch": 65.13526570048309, "grad_norm": 0.27680841088294983, "learning_rate": 0.0001, "loss": 1.5656, "step": 755048 }, { "epoch": 65.14009661835749, "grad_norm": 1.3278406858444214, "learning_rate": 0.0001, "loss": 1.5695, "step": 755104 }, { "epoch": 65.14492753623189, "grad_norm": 0.29387181997299194, "learning_rate": 0.0001, "loss": 1.5646, "step": 755160 }, { "epoch": 65.14975845410628, "grad_norm": 0.25877800583839417, "learning_rate": 0.0001, "loss": 1.5629, "step": 755216 }, { "epoch": 65.15458937198068, "grad_norm": 0.34677207469940186, "learning_rate": 0.0001, "loss": 1.569, "step": 755272 }, { "epoch": 65.15942028985508, "grad_norm": 0.2910681366920471, "learning_rate": 0.0001, "loss": 1.5677, "step": 755328 }, { "epoch": 65.16425120772946, "grad_norm": 0.3450247049331665, "learning_rate": 0.0001, "loss": 1.5605, "step": 755384 }, { "epoch": 65.16908212560386, "grad_norm": 0.3260599374771118, "learning_rate": 0.0001, "loss": 1.5642, "step": 755440 }, { "epoch": 65.17391304347827, "grad_norm": 0.2980875074863434, "learning_rate": 0.0001, "loss": 1.5626, "step": 755496 }, { "epoch": 65.17874396135265, "grad_norm": 0.3689132630825043, "learning_rate": 0.0001, "loss": 1.5637, "step": 755552 }, { "epoch": 65.18357487922705, "grad_norm": 0.2579410970211029, "learning_rate": 0.0001, "loss": 1.5645, "step": 755608 }, { "epoch": 65.18840579710145, "grad_norm": 0.3051004409790039, "learning_rate": 0.0001, "loss": 1.5638, "step": 755664 }, { "epoch": 65.19323671497584, "grad_norm": 0.3647564649581909, "learning_rate": 0.0001, "loss": 1.5598, "step": 755720 }, { "epoch": 65.19806763285024, "grad_norm": 0.27326592803001404, "learning_rate": 0.0001, "loss": 1.5582, "step": 755776 }, { "epoch": 65.20289855072464, "grad_norm": 0.5404766201972961, "learning_rate": 0.0001, "loss": 1.5664, "step": 755832 }, { "epoch": 65.20772946859903, "grad_norm": 0.27613720297813416, "learning_rate": 0.0001, "loss": 1.5586, "step": 755888 }, { "epoch": 65.21256038647343, "grad_norm": 0.3174060881137848, "learning_rate": 0.0001, "loss": 1.5586, "step": 755944 }, { "epoch": 65.21739130434783, "grad_norm": 0.2816508412361145, "learning_rate": 0.0001, "loss": 1.5616, "step": 756000 }, { "epoch": 65.22222222222223, "grad_norm": 0.31043508648872375, "learning_rate": 0.0001, "loss": 1.565, "step": 756056 }, { "epoch": 65.22705314009661, "grad_norm": 0.31107884645462036, "learning_rate": 0.0001, "loss": 1.5637, "step": 756112 }, { "epoch": 65.23188405797102, "grad_norm": 0.3938499391078949, "learning_rate": 0.0001, "loss": 1.5641, "step": 756168 }, { "epoch": 65.23671497584542, "grad_norm": 0.2659676671028137, "learning_rate": 0.0001, "loss": 1.5617, "step": 756224 }, { "epoch": 65.2415458937198, "grad_norm": 0.3660803735256195, "learning_rate": 0.0001, "loss": 1.5615, "step": 756280 }, { "epoch": 65.2463768115942, "grad_norm": 0.25417375564575195, "learning_rate": 0.0001, "loss": 1.5595, "step": 756336 }, { "epoch": 65.2512077294686, "grad_norm": 4.452216625213623, "learning_rate": 0.0001, "loss": 1.5618, "step": 756392 }, { "epoch": 65.25603864734299, "grad_norm": 0.3459450304508209, "learning_rate": 0.0001, "loss": 1.5637, "step": 756448 }, { "epoch": 65.26086956521739, "grad_norm": 0.3070867359638214, "learning_rate": 0.0001, "loss": 1.5616, "step": 756504 }, { "epoch": 65.26570048309179, "grad_norm": 0.31357911229133606, "learning_rate": 0.0001, "loss": 1.5671, "step": 756560 }, { "epoch": 65.27053140096618, "grad_norm": 0.31082621216773987, "learning_rate": 0.0001, "loss": 1.5669, "step": 756616 }, { "epoch": 65.27536231884058, "grad_norm": 19.791730880737305, "learning_rate": 0.0001, "loss": 1.5692, "step": 756672 }, { "epoch": 65.28019323671498, "grad_norm": 4.677754878997803, "learning_rate": 0.0001, "loss": 1.563, "step": 756728 }, { "epoch": 65.28502415458937, "grad_norm": 0.2757240831851959, "learning_rate": 0.0001, "loss": 1.5692, "step": 756784 }, { "epoch": 65.28985507246377, "grad_norm": 0.33657506108283997, "learning_rate": 0.0001, "loss": 1.5633, "step": 756840 }, { "epoch": 65.29468599033817, "grad_norm": 1.3593143224716187, "learning_rate": 0.0001, "loss": 1.5715, "step": 756896 }, { "epoch": 65.29951690821257, "grad_norm": 0.29490721225738525, "learning_rate": 0.0001, "loss": 1.5692, "step": 756952 }, { "epoch": 65.30434782608695, "grad_norm": 0.2774712145328522, "learning_rate": 0.0001, "loss": 1.5655, "step": 757008 }, { "epoch": 65.30917874396135, "grad_norm": 0.41362911462783813, "learning_rate": 0.0001, "loss": 1.5617, "step": 757064 }, { "epoch": 65.31400966183575, "grad_norm": 0.28417572379112244, "learning_rate": 0.0001, "loss": 1.5603, "step": 757120 }, { "epoch": 65.31884057971014, "grad_norm": 0.24836571514606476, "learning_rate": 0.0001, "loss": 1.5632, "step": 757176 }, { "epoch": 65.32367149758454, "grad_norm": 4.954866409301758, "learning_rate": 0.0001, "loss": 1.5599, "step": 757232 }, { "epoch": 65.32850241545894, "grad_norm": 0.3920716345310211, "learning_rate": 0.0001, "loss": 1.5579, "step": 757288 }, { "epoch": 65.33333333333333, "grad_norm": 0.3206993341445923, "learning_rate": 0.0001, "loss": 1.5597, "step": 757344 }, { "epoch": 65.33816425120773, "grad_norm": 8.639791488647461, "learning_rate": 0.0001, "loss": 1.5607, "step": 757400 }, { "epoch": 65.34299516908213, "grad_norm": 0.2821206748485565, "learning_rate": 0.0001, "loss": 1.5707, "step": 757456 }, { "epoch": 65.34782608695652, "grad_norm": 0.32966965436935425, "learning_rate": 0.0001, "loss": 1.5631, "step": 757512 }, { "epoch": 65.35265700483092, "grad_norm": 0.31244954466819763, "learning_rate": 0.0001, "loss": 1.5675, "step": 757568 }, { "epoch": 65.35748792270532, "grad_norm": 0.316843718290329, "learning_rate": 0.0001, "loss": 1.5661, "step": 757624 }, { "epoch": 65.3623188405797, "grad_norm": 0.284454882144928, "learning_rate": 0.0001, "loss": 1.5681, "step": 757680 }, { "epoch": 65.3671497584541, "grad_norm": 0.22955681383609772, "learning_rate": 0.0001, "loss": 1.5619, "step": 757736 }, { "epoch": 65.3719806763285, "grad_norm": 17.78631591796875, "learning_rate": 0.0001, "loss": 1.5662, "step": 757792 }, { "epoch": 65.3768115942029, "grad_norm": 0.2811257839202881, "learning_rate": 0.0001, "loss": 1.562, "step": 757848 }, { "epoch": 65.38164251207729, "grad_norm": 0.38654470443725586, "learning_rate": 0.0001, "loss": 1.5652, "step": 757904 }, { "epoch": 65.38647342995169, "grad_norm": 1.4204473495483398, "learning_rate": 0.0001, "loss": 1.5614, "step": 757960 }, { "epoch": 65.3913043478261, "grad_norm": 0.30883902311325073, "learning_rate": 0.0001, "loss": 1.5715, "step": 758016 }, { "epoch": 65.39613526570048, "grad_norm": 0.29755425453186035, "learning_rate": 0.0001, "loss": 1.5616, "step": 758072 }, { "epoch": 65.40096618357488, "grad_norm": 22.493778228759766, "learning_rate": 0.0001, "loss": 1.5657, "step": 758128 }, { "epoch": 65.40579710144928, "grad_norm": 0.27127158641815186, "learning_rate": 0.0001, "loss": 1.5681, "step": 758184 }, { "epoch": 65.41062801932367, "grad_norm": 0.26343101263046265, "learning_rate": 0.0001, "loss": 1.5619, "step": 758240 }, { "epoch": 65.41545893719807, "grad_norm": 0.3590954542160034, "learning_rate": 0.0001, "loss": 1.5651, "step": 758296 }, { "epoch": 65.42028985507247, "grad_norm": 0.5739070773124695, "learning_rate": 0.0001, "loss": 1.5696, "step": 758352 }, { "epoch": 65.42512077294685, "grad_norm": 0.2842254936695099, "learning_rate": 0.0001, "loss": 1.5612, "step": 758408 }, { "epoch": 65.42995169082126, "grad_norm": 0.3246716260910034, "learning_rate": 0.0001, "loss": 1.5607, "step": 758464 }, { "epoch": 65.43478260869566, "grad_norm": 0.26981788873672485, "learning_rate": 0.0001, "loss": 1.5695, "step": 758520 }, { "epoch": 65.43961352657004, "grad_norm": 0.27210697531700134, "learning_rate": 0.0001, "loss": 1.5696, "step": 758576 }, { "epoch": 65.44444444444444, "grad_norm": 0.305817186832428, "learning_rate": 0.0001, "loss": 1.5678, "step": 758632 }, { "epoch": 65.44927536231884, "grad_norm": 0.27315637469291687, "learning_rate": 0.0001, "loss": 1.5647, "step": 758688 }, { "epoch": 65.45410628019323, "grad_norm": 3.1590018272399902, "learning_rate": 0.0001, "loss": 1.5627, "step": 758744 }, { "epoch": 65.45893719806763, "grad_norm": 0.7622895836830139, "learning_rate": 0.0001, "loss": 1.5649, "step": 758800 }, { "epoch": 65.46376811594203, "grad_norm": 1.1339725255966187, "learning_rate": 0.0001, "loss": 1.5674, "step": 758856 }, { "epoch": 65.46859903381643, "grad_norm": 0.31277814507484436, "learning_rate": 0.0001, "loss": 1.579, "step": 758912 }, { "epoch": 65.47342995169082, "grad_norm": 8.501861572265625, "learning_rate": 0.0001, "loss": 1.5647, "step": 758968 }, { "epoch": 65.47826086956522, "grad_norm": 0.3834956884384155, "learning_rate": 0.0001, "loss": 1.5643, "step": 759024 }, { "epoch": 65.48309178743962, "grad_norm": 23.349061965942383, "learning_rate": 0.0001, "loss": 1.5637, "step": 759080 }, { "epoch": 65.487922705314, "grad_norm": 0.2721394896507263, "learning_rate": 0.0001, "loss": 1.5715, "step": 759136 }, { "epoch": 65.4927536231884, "grad_norm": 14.878941535949707, "learning_rate": 0.0001, "loss": 1.5671, "step": 759192 }, { "epoch": 65.4975845410628, "grad_norm": 0.4069483280181885, "learning_rate": 0.0001, "loss": 1.5723, "step": 759248 }, { "epoch": 65.5024154589372, "grad_norm": 0.3082132935523987, "learning_rate": 0.0001, "loss": 1.5747, "step": 759304 }, { "epoch": 65.5072463768116, "grad_norm": 0.2733650207519531, "learning_rate": 0.0001, "loss": 1.5611, "step": 759360 }, { "epoch": 65.512077294686, "grad_norm": 0.26898306608200073, "learning_rate": 0.0001, "loss": 1.5648, "step": 759416 }, { "epoch": 65.51690821256038, "grad_norm": 0.2577162981033325, "learning_rate": 0.0001, "loss": 1.5683, "step": 759472 }, { "epoch": 65.52173913043478, "grad_norm": 0.34289443492889404, "learning_rate": 0.0001, "loss": 1.5668, "step": 759528 }, { "epoch": 65.52657004830918, "grad_norm": 0.30483537912368774, "learning_rate": 0.0001, "loss": 1.5611, "step": 759584 }, { "epoch": 65.53140096618357, "grad_norm": 0.29410961270332336, "learning_rate": 0.0001, "loss": 1.5684, "step": 759640 }, { "epoch": 65.53623188405797, "grad_norm": 0.42968323826789856, "learning_rate": 0.0001, "loss": 1.5664, "step": 759696 }, { "epoch": 65.54106280193237, "grad_norm": 0.2422635704278946, "learning_rate": 0.0001, "loss": 1.5648, "step": 759752 }, { "epoch": 65.54589371980677, "grad_norm": 0.32057294249534607, "learning_rate": 0.0001, "loss": 1.5588, "step": 759808 }, { "epoch": 65.55072463768116, "grad_norm": 0.2674073874950409, "learning_rate": 0.0001, "loss": 1.5753, "step": 759864 }, { "epoch": 65.55555555555556, "grad_norm": 0.24342548847198486, "learning_rate": 0.0001, "loss": 1.5611, "step": 759920 }, { "epoch": 65.56038647342996, "grad_norm": 0.26104769110679626, "learning_rate": 0.0001, "loss": 1.5666, "step": 759976 }, { "epoch": 65.56521739130434, "grad_norm": 0.3460283875465393, "learning_rate": 0.0001, "loss": 1.5664, "step": 760032 }, { "epoch": 65.57004830917874, "grad_norm": 0.3970549404621124, "learning_rate": 0.0001, "loss": 1.5651, "step": 760088 }, { "epoch": 65.57487922705315, "grad_norm": 0.35186126828193665, "learning_rate": 0.0001, "loss": 1.5624, "step": 760144 }, { "epoch": 65.57971014492753, "grad_norm": 0.4592183828353882, "learning_rate": 0.0001, "loss": 1.5699, "step": 760200 }, { "epoch": 65.58454106280193, "grad_norm": 0.2835994064807892, "learning_rate": 0.0001, "loss": 1.5634, "step": 760256 }, { "epoch": 65.58937198067633, "grad_norm": 0.37213578820228577, "learning_rate": 0.0001, "loss": 1.5671, "step": 760312 }, { "epoch": 65.59420289855072, "grad_norm": 0.33897069096565247, "learning_rate": 0.0001, "loss": 1.559, "step": 760368 }, { "epoch": 65.59903381642512, "grad_norm": 0.314315527677536, "learning_rate": 0.0001, "loss": 1.562, "step": 760424 }, { "epoch": 65.60386473429952, "grad_norm": 0.24713730812072754, "learning_rate": 0.0001, "loss": 1.5682, "step": 760480 }, { "epoch": 65.6086956521739, "grad_norm": 0.3315476179122925, "learning_rate": 0.0001, "loss": 1.566, "step": 760536 }, { "epoch": 65.61352657004831, "grad_norm": 0.3474685251712799, "learning_rate": 0.0001, "loss": 1.5643, "step": 760592 }, { "epoch": 65.61835748792271, "grad_norm": 24.092727661132812, "learning_rate": 0.0001, "loss": 1.5641, "step": 760648 }, { "epoch": 65.6231884057971, "grad_norm": 2.0175561904907227, "learning_rate": 0.0001, "loss": 1.5664, "step": 760704 }, { "epoch": 65.6280193236715, "grad_norm": 0.6867895126342773, "learning_rate": 0.0001, "loss": 1.5644, "step": 760760 }, { "epoch": 65.6328502415459, "grad_norm": 0.2910241186618805, "learning_rate": 0.0001, "loss": 1.5658, "step": 760816 }, { "epoch": 65.6376811594203, "grad_norm": 0.23299676179885864, "learning_rate": 0.0001, "loss": 1.5664, "step": 760872 }, { "epoch": 65.64251207729468, "grad_norm": 0.30295389890670776, "learning_rate": 0.0001, "loss": 1.5682, "step": 760928 }, { "epoch": 65.64734299516908, "grad_norm": 0.377191424369812, "learning_rate": 0.0001, "loss": 1.5633, "step": 760984 }, { "epoch": 65.65217391304348, "grad_norm": 1.1991280317306519, "learning_rate": 0.0001, "loss": 1.5637, "step": 761040 }, { "epoch": 65.65700483091787, "grad_norm": 0.2983400225639343, "learning_rate": 0.0001, "loss": 1.5619, "step": 761096 }, { "epoch": 65.66183574879227, "grad_norm": 32.30929183959961, "learning_rate": 0.0001, "loss": 1.5594, "step": 761152 }, { "epoch": 65.66666666666667, "grad_norm": 0.41170889139175415, "learning_rate": 0.0001, "loss": 1.5661, "step": 761208 }, { "epoch": 65.67149758454106, "grad_norm": 0.34200048446655273, "learning_rate": 0.0001, "loss": 1.5709, "step": 761264 }, { "epoch": 65.67632850241546, "grad_norm": 0.29555243253707886, "learning_rate": 0.0001, "loss": 1.5621, "step": 761320 }, { "epoch": 65.68115942028986, "grad_norm": 0.33945366740226746, "learning_rate": 0.0001, "loss": 1.5705, "step": 761376 }, { "epoch": 65.68599033816425, "grad_norm": 0.30149444937705994, "learning_rate": 0.0001, "loss": 1.5672, "step": 761432 }, { "epoch": 65.69082125603865, "grad_norm": 0.234300896525383, "learning_rate": 0.0001, "loss": 1.5633, "step": 761488 }, { "epoch": 65.69565217391305, "grad_norm": 0.5533701777458191, "learning_rate": 0.0001, "loss": 1.5649, "step": 761544 }, { "epoch": 65.70048309178743, "grad_norm": 0.5896756052970886, "learning_rate": 0.0001, "loss": 1.5633, "step": 761600 }, { "epoch": 65.70531400966183, "grad_norm": 0.30221840739250183, "learning_rate": 0.0001, "loss": 1.5608, "step": 761656 }, { "epoch": 65.71014492753623, "grad_norm": 0.2640894949436188, "learning_rate": 0.0001, "loss": 1.5679, "step": 761712 }, { "epoch": 65.71497584541063, "grad_norm": 0.31356096267700195, "learning_rate": 0.0001, "loss": 1.5621, "step": 761768 }, { "epoch": 65.71980676328502, "grad_norm": 0.3306295871734619, "learning_rate": 0.0001, "loss": 1.5692, "step": 761824 }, { "epoch": 65.72463768115942, "grad_norm": 0.32122841477394104, "learning_rate": 0.0001, "loss": 1.5655, "step": 761880 }, { "epoch": 65.72946859903382, "grad_norm": 0.26876741647720337, "learning_rate": 0.0001, "loss": 1.5626, "step": 761936 }, { "epoch": 65.73429951690821, "grad_norm": 0.29240256547927856, "learning_rate": 0.0001, "loss": 1.5549, "step": 761992 }, { "epoch": 65.73913043478261, "grad_norm": 0.2740503251552582, "learning_rate": 0.0001, "loss": 1.5669, "step": 762048 }, { "epoch": 65.74396135265701, "grad_norm": 0.29549115896224976, "learning_rate": 0.0001, "loss": 1.5612, "step": 762104 }, { "epoch": 65.7487922705314, "grad_norm": 0.2696676254272461, "learning_rate": 0.0001, "loss": 1.5664, "step": 762160 }, { "epoch": 65.7536231884058, "grad_norm": 0.3849197030067444, "learning_rate": 0.0001, "loss": 1.5609, "step": 762216 }, { "epoch": 65.7584541062802, "grad_norm": 0.26438507437705994, "learning_rate": 0.0001, "loss": 1.5646, "step": 762272 }, { "epoch": 65.76328502415458, "grad_norm": 0.6821528673171997, "learning_rate": 0.0001, "loss": 1.5716, "step": 762328 }, { "epoch": 65.76811594202898, "grad_norm": 0.2856484651565552, "learning_rate": 0.0001, "loss": 1.567, "step": 762384 }, { "epoch": 65.77294685990339, "grad_norm": 0.309221088886261, "learning_rate": 0.0001, "loss": 1.5644, "step": 762440 }, { "epoch": 65.77777777777777, "grad_norm": 0.2628379166126251, "learning_rate": 0.0001, "loss": 1.5615, "step": 762496 }, { "epoch": 65.78260869565217, "grad_norm": 0.3739776909351349, "learning_rate": 0.0001, "loss": 1.5661, "step": 762552 }, { "epoch": 65.78743961352657, "grad_norm": 0.36333420872688293, "learning_rate": 0.0001, "loss": 1.5639, "step": 762608 }, { "epoch": 65.79227053140096, "grad_norm": 0.31493887305259705, "learning_rate": 0.0001, "loss": 1.5619, "step": 762664 }, { "epoch": 65.79710144927536, "grad_norm": 0.5347508192062378, "learning_rate": 0.0001, "loss": 1.569, "step": 762720 }, { "epoch": 65.80193236714976, "grad_norm": 0.25016093254089355, "learning_rate": 0.0001, "loss": 1.5654, "step": 762776 }, { "epoch": 65.80676328502416, "grad_norm": 0.691175103187561, "learning_rate": 0.0001, "loss": 1.5661, "step": 762832 }, { "epoch": 65.81159420289855, "grad_norm": 0.3461848497390747, "learning_rate": 0.0001, "loss": 1.5658, "step": 762888 }, { "epoch": 65.81642512077295, "grad_norm": 0.49480387568473816, "learning_rate": 0.0001, "loss": 1.567, "step": 762944 }, { "epoch": 65.82125603864735, "grad_norm": 0.2591111361980438, "learning_rate": 0.0001, "loss": 1.5688, "step": 763000 }, { "epoch": 65.82608695652173, "grad_norm": 0.43944188952445984, "learning_rate": 0.0001, "loss": 1.5614, "step": 763056 }, { "epoch": 65.83091787439614, "grad_norm": 0.8201513290405273, "learning_rate": 0.0001, "loss": 1.5642, "step": 763112 }, { "epoch": 65.83574879227054, "grad_norm": 1.3611624240875244, "learning_rate": 0.0001, "loss": 1.5629, "step": 763168 }, { "epoch": 65.84057971014492, "grad_norm": 0.3612276613712311, "learning_rate": 0.0001, "loss": 1.5684, "step": 763224 }, { "epoch": 65.84541062801932, "grad_norm": 0.36534884572029114, "learning_rate": 0.0001, "loss": 1.5644, "step": 763280 }, { "epoch": 65.85024154589372, "grad_norm": 0.38576048612594604, "learning_rate": 0.0001, "loss": 1.5618, "step": 763336 }, { "epoch": 65.85507246376811, "grad_norm": 1.3462353944778442, "learning_rate": 0.0001, "loss": 1.5683, "step": 763392 }, { "epoch": 65.85990338164251, "grad_norm": 0.28043726086616516, "learning_rate": 0.0001, "loss": 1.5672, "step": 763448 }, { "epoch": 65.86473429951691, "grad_norm": 0.24759472906589508, "learning_rate": 0.0001, "loss": 1.5676, "step": 763504 }, { "epoch": 65.8695652173913, "grad_norm": 0.3531445264816284, "learning_rate": 0.0001, "loss": 1.5654, "step": 763560 }, { "epoch": 65.8743961352657, "grad_norm": 1.3340014219284058, "learning_rate": 0.0001, "loss": 1.5692, "step": 763616 }, { "epoch": 65.8792270531401, "grad_norm": 0.26638931035995483, "learning_rate": 0.0001, "loss": 1.5703, "step": 763672 }, { "epoch": 65.8840579710145, "grad_norm": 0.5166969895362854, "learning_rate": 0.0001, "loss": 1.5702, "step": 763728 }, { "epoch": 65.88888888888889, "grad_norm": 0.3111715316772461, "learning_rate": 0.0001, "loss": 1.5678, "step": 763784 }, { "epoch": 65.89371980676329, "grad_norm": 0.6437698602676392, "learning_rate": 0.0001, "loss": 1.5718, "step": 763840 }, { "epoch": 65.89855072463769, "grad_norm": 0.26165738701820374, "learning_rate": 0.0001, "loss": 1.5711, "step": 763896 }, { "epoch": 65.90338164251207, "grad_norm": 0.29472431540489197, "learning_rate": 0.0001, "loss": 1.566, "step": 763952 }, { "epoch": 65.90821256038647, "grad_norm": 0.29940956830978394, "learning_rate": 0.0001, "loss": 1.5751, "step": 764008 }, { "epoch": 65.91304347826087, "grad_norm": 0.491241991519928, "learning_rate": 0.0001, "loss": 1.5671, "step": 764064 }, { "epoch": 65.91787439613526, "grad_norm": 0.3038044273853302, "learning_rate": 0.0001, "loss": 1.566, "step": 764120 }, { "epoch": 65.92270531400966, "grad_norm": 0.2811158001422882, "learning_rate": 0.0001, "loss": 1.5699, "step": 764176 }, { "epoch": 65.92753623188406, "grad_norm": 0.2670927047729492, "learning_rate": 0.0001, "loss": 1.5667, "step": 764232 }, { "epoch": 65.93236714975845, "grad_norm": 0.3794791102409363, "learning_rate": 0.0001, "loss": 1.5683, "step": 764288 }, { "epoch": 65.93719806763285, "grad_norm": 0.3079412877559662, "learning_rate": 0.0001, "loss": 1.5657, "step": 764344 }, { "epoch": 65.94202898550725, "grad_norm": 0.30686554312705994, "learning_rate": 0.0001, "loss": 1.5644, "step": 764400 }, { "epoch": 65.94685990338164, "grad_norm": 0.698809027671814, "learning_rate": 0.0001, "loss": 1.5735, "step": 764456 }, { "epoch": 65.95169082125604, "grad_norm": 1.726287841796875, "learning_rate": 0.0001, "loss": 1.5643, "step": 764512 }, { "epoch": 65.95652173913044, "grad_norm": 0.580233633518219, "learning_rate": 0.0001, "loss": 1.5666, "step": 764568 }, { "epoch": 65.96135265700484, "grad_norm": 0.46957483887672424, "learning_rate": 0.0001, "loss": 1.5697, "step": 764624 }, { "epoch": 65.96618357487922, "grad_norm": 1.1239022016525269, "learning_rate": 0.0001, "loss": 1.5654, "step": 764680 }, { "epoch": 65.97101449275362, "grad_norm": 0.35959452390670776, "learning_rate": 0.0001, "loss": 1.5661, "step": 764736 }, { "epoch": 65.97584541062803, "grad_norm": 0.3647146224975586, "learning_rate": 0.0001, "loss": 1.5684, "step": 764792 }, { "epoch": 65.98067632850241, "grad_norm": 1.123530626296997, "learning_rate": 0.0001, "loss": 1.568, "step": 764848 }, { "epoch": 65.98550724637681, "grad_norm": 0.40050074458122253, "learning_rate": 0.0001, "loss": 1.5595, "step": 764904 }, { "epoch": 65.99033816425121, "grad_norm": 0.28689029812812805, "learning_rate": 0.0001, "loss": 1.5618, "step": 764960 }, { "epoch": 65.9951690821256, "grad_norm": 0.26310133934020996, "learning_rate": 0.0001, "loss": 1.5679, "step": 765016 }, { "epoch": 66.0, "grad_norm": 0.5856367349624634, "learning_rate": 0.0001, "loss": 1.5721, "step": 765072 }, { "epoch": 66.0048309178744, "grad_norm": 0.2563001215457916, "learning_rate": 0.0001, "loss": 1.5631, "step": 765128 }, { "epoch": 66.00966183574879, "grad_norm": 0.3006897270679474, "learning_rate": 0.0001, "loss": 1.5583, "step": 765184 }, { "epoch": 66.01449275362319, "grad_norm": 0.2845304012298584, "learning_rate": 0.0001, "loss": 1.5666, "step": 765240 }, { "epoch": 66.01932367149759, "grad_norm": 0.39421936869621277, "learning_rate": 0.0001, "loss": 1.5588, "step": 765296 }, { "epoch": 66.02415458937197, "grad_norm": 0.30983638763427734, "learning_rate": 0.0001, "loss": 1.5505, "step": 765352 }, { "epoch": 66.02898550724638, "grad_norm": 0.3367575407028198, "learning_rate": 0.0001, "loss": 1.5549, "step": 765408 }, { "epoch": 66.03381642512078, "grad_norm": 39.64958953857422, "learning_rate": 0.0001, "loss": 1.5664, "step": 765464 }, { "epoch": 66.03864734299516, "grad_norm": 0.2754492461681366, "learning_rate": 0.0001, "loss": 1.5639, "step": 765520 }, { "epoch": 66.04347826086956, "grad_norm": 0.43371161818504333, "learning_rate": 0.0001, "loss": 1.5602, "step": 765576 }, { "epoch": 66.04830917874396, "grad_norm": 0.2664428651332855, "learning_rate": 0.0001, "loss": 1.5556, "step": 765632 }, { "epoch": 66.05314009661836, "grad_norm": 0.6307640671730042, "learning_rate": 0.0001, "loss": 1.5589, "step": 765688 }, { "epoch": 66.05797101449275, "grad_norm": 0.7190828323364258, "learning_rate": 0.0001, "loss": 1.565, "step": 765744 }, { "epoch": 66.06280193236715, "grad_norm": 4.76002311706543, "learning_rate": 0.0001, "loss": 1.5563, "step": 765800 }, { "epoch": 66.06763285024155, "grad_norm": 0.3329600393772125, "learning_rate": 0.0001, "loss": 1.5597, "step": 765856 }, { "epoch": 66.07246376811594, "grad_norm": 0.2572633922100067, "learning_rate": 0.0001, "loss": 1.5579, "step": 765912 }, { "epoch": 66.07729468599034, "grad_norm": 0.28197842836380005, "learning_rate": 0.0001, "loss": 1.5601, "step": 765968 }, { "epoch": 66.08212560386474, "grad_norm": 0.34686148166656494, "learning_rate": 0.0001, "loss": 1.5639, "step": 766024 }, { "epoch": 66.08695652173913, "grad_norm": 0.2984161674976349, "learning_rate": 0.0001, "loss": 1.5602, "step": 766080 }, { "epoch": 66.09178743961353, "grad_norm": 0.3820868730545044, "learning_rate": 0.0001, "loss": 1.5675, "step": 766136 }, { "epoch": 66.09661835748793, "grad_norm": 0.261527955532074, "learning_rate": 0.0001, "loss": 1.559, "step": 766192 }, { "epoch": 66.10144927536231, "grad_norm": 2.700469732284546, "learning_rate": 0.0001, "loss": 1.5639, "step": 766248 }, { "epoch": 66.10628019323671, "grad_norm": 0.24000781774520874, "learning_rate": 0.0001, "loss": 1.5654, "step": 766304 }, { "epoch": 66.11111111111111, "grad_norm": 0.4684176743030548, "learning_rate": 0.0001, "loss": 1.5599, "step": 766360 }, { "epoch": 66.1159420289855, "grad_norm": 0.23709455132484436, "learning_rate": 0.0001, "loss": 1.5615, "step": 766416 }, { "epoch": 66.1207729468599, "grad_norm": 0.2759253680706024, "learning_rate": 0.0001, "loss": 1.557, "step": 766472 }, { "epoch": 66.1256038647343, "grad_norm": 3.278449296951294, "learning_rate": 0.0001, "loss": 1.5549, "step": 766528 }, { "epoch": 66.1304347826087, "grad_norm": 0.4254584312438965, "learning_rate": 0.0001, "loss": 1.5543, "step": 766584 }, { "epoch": 66.13526570048309, "grad_norm": 18.568620681762695, "learning_rate": 0.0001, "loss": 1.5686, "step": 766640 }, { "epoch": 66.14009661835749, "grad_norm": 0.4054301977157593, "learning_rate": 0.0001, "loss": 1.563, "step": 766696 }, { "epoch": 66.14492753623189, "grad_norm": 0.40708285570144653, "learning_rate": 0.0001, "loss": 1.5616, "step": 766752 }, { "epoch": 66.14975845410628, "grad_norm": 0.2982594966888428, "learning_rate": 0.0001, "loss": 1.557, "step": 766808 }, { "epoch": 66.15458937198068, "grad_norm": 0.37242957949638367, "learning_rate": 0.0001, "loss": 1.5554, "step": 766864 }, { "epoch": 66.15942028985508, "grad_norm": 0.24564525485038757, "learning_rate": 0.0001, "loss": 1.5663, "step": 766920 }, { "epoch": 66.16425120772946, "grad_norm": 0.29112040996551514, "learning_rate": 0.0001, "loss": 1.5478, "step": 766976 }, { "epoch": 66.16908212560386, "grad_norm": 29.102691650390625, "learning_rate": 0.0001, "loss": 1.5639, "step": 767032 }, { "epoch": 66.17391304347827, "grad_norm": 0.3397343158721924, "learning_rate": 0.0001, "loss": 1.5628, "step": 767088 }, { "epoch": 66.17874396135265, "grad_norm": 0.7636525630950928, "learning_rate": 0.0001, "loss": 1.5561, "step": 767144 }, { "epoch": 66.18357487922705, "grad_norm": 0.3689013421535492, "learning_rate": 0.0001, "loss": 1.5583, "step": 767200 }, { "epoch": 66.18840579710145, "grad_norm": 0.5007786750793457, "learning_rate": 0.0001, "loss": 1.5661, "step": 767256 }, { "epoch": 66.19323671497584, "grad_norm": 1.301956295967102, "learning_rate": 0.0001, "loss": 1.5591, "step": 767312 }, { "epoch": 66.19806763285024, "grad_norm": 0.2689502239227295, "learning_rate": 0.0001, "loss": 1.5629, "step": 767368 }, { "epoch": 66.20289855072464, "grad_norm": 0.2749817669391632, "learning_rate": 0.0001, "loss": 1.5704, "step": 767424 }, { "epoch": 66.20772946859903, "grad_norm": 0.36555102467536926, "learning_rate": 0.0001, "loss": 1.5548, "step": 767480 }, { "epoch": 66.21256038647343, "grad_norm": 0.25406187772750854, "learning_rate": 0.0001, "loss": 1.561, "step": 767536 }, { "epoch": 66.21739130434783, "grad_norm": 0.5487571954727173, "learning_rate": 0.0001, "loss": 1.5621, "step": 767592 }, { "epoch": 66.22222222222223, "grad_norm": 0.8515563011169434, "learning_rate": 0.0001, "loss": 1.5582, "step": 767648 }, { "epoch": 66.22705314009661, "grad_norm": 0.3968696892261505, "learning_rate": 0.0001, "loss": 1.5558, "step": 767704 }, { "epoch": 66.23188405797102, "grad_norm": 0.316398948431015, "learning_rate": 0.0001, "loss": 1.5699, "step": 767760 }, { "epoch": 66.23671497584542, "grad_norm": 0.5930464267730713, "learning_rate": 0.0001, "loss": 1.5592, "step": 767816 }, { "epoch": 66.2415458937198, "grad_norm": 0.3174757957458496, "learning_rate": 0.0001, "loss": 1.5557, "step": 767872 }, { "epoch": 66.2463768115942, "grad_norm": 0.5203148722648621, "learning_rate": 0.0001, "loss": 1.5582, "step": 767928 }, { "epoch": 66.2512077294686, "grad_norm": 0.39381128549575806, "learning_rate": 0.0001, "loss": 1.5668, "step": 767984 }, { "epoch": 66.25603864734299, "grad_norm": 0.29857659339904785, "learning_rate": 0.0001, "loss": 1.5614, "step": 768040 }, { "epoch": 66.26086956521739, "grad_norm": 0.24370655417442322, "learning_rate": 0.0001, "loss": 1.5659, "step": 768096 }, { "epoch": 66.26570048309179, "grad_norm": 5.171236038208008, "learning_rate": 0.0001, "loss": 1.5607, "step": 768152 }, { "epoch": 66.27053140096618, "grad_norm": 0.3161377012729645, "learning_rate": 0.0001, "loss": 1.5595, "step": 768208 }, { "epoch": 66.27536231884058, "grad_norm": 0.29709291458129883, "learning_rate": 0.0001, "loss": 1.5594, "step": 768264 }, { "epoch": 66.28019323671498, "grad_norm": 0.25580716133117676, "learning_rate": 0.0001, "loss": 1.5658, "step": 768320 }, { "epoch": 66.28502415458937, "grad_norm": 0.27786415815353394, "learning_rate": 0.0001, "loss": 1.561, "step": 768376 }, { "epoch": 66.28985507246377, "grad_norm": 0.28798848390579224, "learning_rate": 0.0001, "loss": 1.5619, "step": 768432 }, { "epoch": 66.29468599033817, "grad_norm": 0.5494320392608643, "learning_rate": 0.0001, "loss": 1.5603, "step": 768488 }, { "epoch": 66.29951690821257, "grad_norm": 0.4275902211666107, "learning_rate": 0.0001, "loss": 1.5576, "step": 768544 }, { "epoch": 66.30434782608695, "grad_norm": 0.23549358546733856, "learning_rate": 0.0001, "loss": 1.5686, "step": 768600 }, { "epoch": 66.30917874396135, "grad_norm": 0.4464161694049835, "learning_rate": 0.0001, "loss": 1.5668, "step": 768656 }, { "epoch": 66.31400966183575, "grad_norm": 0.24914662539958954, "learning_rate": 0.0001, "loss": 1.558, "step": 768712 }, { "epoch": 66.31884057971014, "grad_norm": 0.3065078854560852, "learning_rate": 0.0001, "loss": 1.5542, "step": 768768 }, { "epoch": 66.32367149758454, "grad_norm": 0.23981496691703796, "learning_rate": 0.0001, "loss": 1.5607, "step": 768824 }, { "epoch": 66.32850241545894, "grad_norm": 21.957721710205078, "learning_rate": 0.0001, "loss": 1.563, "step": 768880 }, { "epoch": 66.33333333333333, "grad_norm": 2.4587221145629883, "learning_rate": 0.0001, "loss": 1.5553, "step": 768936 }, { "epoch": 66.33816425120773, "grad_norm": 0.45057645440101624, "learning_rate": 0.0001, "loss": 1.5618, "step": 768992 }, { "epoch": 66.34299516908213, "grad_norm": 0.2848086953163147, "learning_rate": 0.0001, "loss": 1.5686, "step": 769048 }, { "epoch": 66.34782608695652, "grad_norm": 0.5729213356971741, "learning_rate": 0.0001, "loss": 1.561, "step": 769104 }, { "epoch": 66.35265700483092, "grad_norm": 4.3968915939331055, "learning_rate": 0.0001, "loss": 1.5618, "step": 769160 }, { "epoch": 66.35748792270532, "grad_norm": 0.8145719766616821, "learning_rate": 0.0001, "loss": 1.5554, "step": 769216 }, { "epoch": 66.3623188405797, "grad_norm": 0.2933526933193207, "learning_rate": 0.0001, "loss": 1.5637, "step": 769272 }, { "epoch": 66.3671497584541, "grad_norm": 0.5098117589950562, "learning_rate": 0.0001, "loss": 1.5529, "step": 769328 }, { "epoch": 66.3719806763285, "grad_norm": 0.2591201663017273, "learning_rate": 0.0001, "loss": 1.563, "step": 769384 }, { "epoch": 66.3768115942029, "grad_norm": 0.2683529555797577, "learning_rate": 0.0001, "loss": 1.56, "step": 769440 }, { "epoch": 66.38164251207729, "grad_norm": 0.35332444310188293, "learning_rate": 0.0001, "loss": 1.5572, "step": 769496 }, { "epoch": 66.38647342995169, "grad_norm": 1.2254807949066162, "learning_rate": 0.0001, "loss": 1.5637, "step": 769552 }, { "epoch": 66.3913043478261, "grad_norm": 0.4708338677883148, "learning_rate": 0.0001, "loss": 1.5635, "step": 769608 }, { "epoch": 66.39613526570048, "grad_norm": 0.7337239980697632, "learning_rate": 0.0001, "loss": 1.5561, "step": 769664 }, { "epoch": 66.40096618357488, "grad_norm": 0.3129578232765198, "learning_rate": 0.0001, "loss": 1.5639, "step": 769720 }, { "epoch": 66.40579710144928, "grad_norm": 0.2823856770992279, "learning_rate": 0.0001, "loss": 1.5605, "step": 769776 }, { "epoch": 66.41062801932367, "grad_norm": 0.24489352107048035, "learning_rate": 0.0001, "loss": 1.5594, "step": 769832 }, { "epoch": 66.41545893719807, "grad_norm": 0.35319992899894714, "learning_rate": 0.0001, "loss": 1.5643, "step": 769888 }, { "epoch": 66.42028985507247, "grad_norm": 0.24001237750053406, "learning_rate": 0.0001, "loss": 1.5572, "step": 769944 }, { "epoch": 66.42512077294685, "grad_norm": 0.3354353606700897, "learning_rate": 0.0001, "loss": 1.5586, "step": 770000 }, { "epoch": 66.42995169082126, "grad_norm": 0.27334338426589966, "learning_rate": 0.0001, "loss": 1.5653, "step": 770056 }, { "epoch": 66.43478260869566, "grad_norm": 0.35372093319892883, "learning_rate": 0.0001, "loss": 1.5635, "step": 770112 }, { "epoch": 66.43961352657004, "grad_norm": 0.37665238976478577, "learning_rate": 0.0001, "loss": 1.5663, "step": 770168 }, { "epoch": 66.44444444444444, "grad_norm": 0.2823256254196167, "learning_rate": 0.0001, "loss": 1.5637, "step": 770224 }, { "epoch": 66.44927536231884, "grad_norm": 0.38564005494117737, "learning_rate": 0.0001, "loss": 1.5685, "step": 770280 }, { "epoch": 66.45410628019323, "grad_norm": 1.4676063060760498, "learning_rate": 0.0001, "loss": 1.5674, "step": 770336 }, { "epoch": 66.45893719806763, "grad_norm": 0.3660421073436737, "learning_rate": 0.0001, "loss": 1.56, "step": 770392 }, { "epoch": 66.46376811594203, "grad_norm": 0.8738279938697815, "learning_rate": 0.0001, "loss": 1.5607, "step": 770448 }, { "epoch": 66.46859903381643, "grad_norm": 2.2517924308776855, "learning_rate": 0.0001, "loss": 1.5608, "step": 770504 }, { "epoch": 66.47342995169082, "grad_norm": 0.24801276624202728, "learning_rate": 0.0001, "loss": 1.5607, "step": 770560 }, { "epoch": 66.47826086956522, "grad_norm": 0.3785662353038788, "learning_rate": 0.0001, "loss": 1.5653, "step": 770616 }, { "epoch": 66.48309178743962, "grad_norm": 1.7208539247512817, "learning_rate": 0.0001, "loss": 1.5661, "step": 770672 }, { "epoch": 66.487922705314, "grad_norm": 0.26990121603012085, "learning_rate": 0.0001, "loss": 1.5694, "step": 770728 }, { "epoch": 66.4927536231884, "grad_norm": 0.2755604386329651, "learning_rate": 0.0001, "loss": 1.5649, "step": 770784 }, { "epoch": 66.4975845410628, "grad_norm": 0.24632267653942108, "learning_rate": 0.0001, "loss": 1.5597, "step": 770840 }, { "epoch": 66.5024154589372, "grad_norm": 0.4564986526966095, "learning_rate": 0.0001, "loss": 1.5612, "step": 770896 }, { "epoch": 66.5072463768116, "grad_norm": 0.29019200801849365, "learning_rate": 0.0001, "loss": 1.5648, "step": 770952 }, { "epoch": 66.512077294686, "grad_norm": 0.2082933634519577, "learning_rate": 0.0001, "loss": 1.558, "step": 771008 }, { "epoch": 66.51690821256038, "grad_norm": 0.29252296686172485, "learning_rate": 0.0001, "loss": 1.5622, "step": 771064 }, { "epoch": 66.52173913043478, "grad_norm": 0.24897262454032898, "learning_rate": 0.0001, "loss": 1.5632, "step": 771120 }, { "epoch": 66.52657004830918, "grad_norm": 1.9589807987213135, "learning_rate": 0.0001, "loss": 1.5597, "step": 771176 }, { "epoch": 66.53140096618357, "grad_norm": 0.2432938814163208, "learning_rate": 0.0001, "loss": 1.5693, "step": 771232 }, { "epoch": 66.53623188405797, "grad_norm": 1.4863018989562988, "learning_rate": 0.0001, "loss": 1.5703, "step": 771288 }, { "epoch": 66.54106280193237, "grad_norm": 0.356643408536911, "learning_rate": 0.0001, "loss": 1.5678, "step": 771344 }, { "epoch": 66.54589371980677, "grad_norm": 0.3247665464878082, "learning_rate": 0.0001, "loss": 1.5632, "step": 771400 }, { "epoch": 66.55072463768116, "grad_norm": 0.4967484772205353, "learning_rate": 0.0001, "loss": 1.5648, "step": 771456 }, { "epoch": 66.55555555555556, "grad_norm": 1.015754222869873, "learning_rate": 0.0001, "loss": 1.5572, "step": 771512 }, { "epoch": 66.56038647342996, "grad_norm": 0.2708740234375, "learning_rate": 0.0001, "loss": 1.5613, "step": 771568 }, { "epoch": 66.56521739130434, "grad_norm": 0.5621724724769592, "learning_rate": 0.0001, "loss": 1.5602, "step": 771624 }, { "epoch": 66.57004830917874, "grad_norm": 0.4278220534324646, "learning_rate": 0.0001, "loss": 1.5612, "step": 771680 }, { "epoch": 66.57487922705315, "grad_norm": 3.0510900020599365, "learning_rate": 0.0001, "loss": 1.5628, "step": 771736 }, { "epoch": 66.57971014492753, "grad_norm": 0.30085489153862, "learning_rate": 0.0001, "loss": 1.5623, "step": 771792 }, { "epoch": 66.58454106280193, "grad_norm": 0.23028452694416046, "learning_rate": 0.0001, "loss": 1.5662, "step": 771848 }, { "epoch": 66.58937198067633, "grad_norm": 3.0194573402404785, "learning_rate": 0.0001, "loss": 1.5616, "step": 771904 }, { "epoch": 66.59420289855072, "grad_norm": 0.2727173864841461, "learning_rate": 0.0001, "loss": 1.563, "step": 771960 }, { "epoch": 66.59903381642512, "grad_norm": 0.33669769763946533, "learning_rate": 0.0001, "loss": 1.5594, "step": 772016 }, { "epoch": 66.60386473429952, "grad_norm": 0.2346484512090683, "learning_rate": 0.0001, "loss": 1.5603, "step": 772072 }, { "epoch": 66.6086956521739, "grad_norm": 0.8767399787902832, "learning_rate": 0.0001, "loss": 1.5652, "step": 772128 }, { "epoch": 66.61352657004831, "grad_norm": 0.8701591491699219, "learning_rate": 0.0001, "loss": 1.5682, "step": 772184 }, { "epoch": 66.61835748792271, "grad_norm": 1.3740191459655762, "learning_rate": 0.0001, "loss": 1.5612, "step": 772240 }, { "epoch": 66.6231884057971, "grad_norm": 1.417009711265564, "learning_rate": 0.0001, "loss": 1.5657, "step": 772296 }, { "epoch": 66.6280193236715, "grad_norm": 0.285256952047348, "learning_rate": 0.0001, "loss": 1.5618, "step": 772352 }, { "epoch": 66.6328502415459, "grad_norm": 0.2641623914241791, "learning_rate": 0.0001, "loss": 1.5674, "step": 772408 }, { "epoch": 66.6376811594203, "grad_norm": 0.27014872431755066, "learning_rate": 0.0001, "loss": 1.5622, "step": 772464 }, { "epoch": 66.64251207729468, "grad_norm": 0.32357797026634216, "learning_rate": 0.0001, "loss": 1.5613, "step": 772520 }, { "epoch": 66.64734299516908, "grad_norm": 0.40125998854637146, "learning_rate": 0.0001, "loss": 1.5572, "step": 772576 }, { "epoch": 66.65217391304348, "grad_norm": 0.3150024712085724, "learning_rate": 0.0001, "loss": 1.5664, "step": 772632 }, { "epoch": 66.65700483091787, "grad_norm": 0.620135486125946, "learning_rate": 0.0001, "loss": 1.5611, "step": 772688 }, { "epoch": 66.66183574879227, "grad_norm": 0.25242945551872253, "learning_rate": 0.0001, "loss": 1.563, "step": 772744 }, { "epoch": 66.66666666666667, "grad_norm": 0.3337790071964264, "learning_rate": 0.0001, "loss": 1.5618, "step": 772800 }, { "epoch": 66.67149758454106, "grad_norm": 0.3339521586894989, "learning_rate": 0.0001, "loss": 1.56, "step": 772856 }, { "epoch": 66.67632850241546, "grad_norm": 1.3777198791503906, "learning_rate": 0.0001, "loss": 1.5632, "step": 772912 }, { "epoch": 66.68115942028986, "grad_norm": 0.2438710480928421, "learning_rate": 0.0001, "loss": 1.5697, "step": 772968 }, { "epoch": 66.68599033816425, "grad_norm": 0.6909959316253662, "learning_rate": 0.0001, "loss": 1.5647, "step": 773024 }, { "epoch": 66.69082125603865, "grad_norm": 0.3006516396999359, "learning_rate": 0.0001, "loss": 1.5624, "step": 773080 }, { "epoch": 66.69565217391305, "grad_norm": 0.30514782667160034, "learning_rate": 0.0001, "loss": 1.5704, "step": 773136 }, { "epoch": 66.70048309178743, "grad_norm": 0.7524892687797546, "learning_rate": 0.0001, "loss": 1.5658, "step": 773192 }, { "epoch": 66.70531400966183, "grad_norm": 0.29407835006713867, "learning_rate": 0.0001, "loss": 1.5602, "step": 773248 }, { "epoch": 66.71014492753623, "grad_norm": 0.3209315240383148, "learning_rate": 0.0001, "loss": 1.5679, "step": 773304 }, { "epoch": 66.71497584541063, "grad_norm": 0.3324458599090576, "learning_rate": 0.0001, "loss": 1.5707, "step": 773360 }, { "epoch": 66.71980676328502, "grad_norm": 1.275874376296997, "learning_rate": 0.0001, "loss": 1.5688, "step": 773416 }, { "epoch": 66.72463768115942, "grad_norm": 0.8195192217826843, "learning_rate": 0.0001, "loss": 1.5606, "step": 773472 }, { "epoch": 66.72946859903382, "grad_norm": 0.36563345789909363, "learning_rate": 0.0001, "loss": 1.5622, "step": 773528 }, { "epoch": 66.73429951690821, "grad_norm": 0.31431862711906433, "learning_rate": 0.0001, "loss": 1.5614, "step": 773584 }, { "epoch": 66.73913043478261, "grad_norm": 0.2661501169204712, "learning_rate": 0.0001, "loss": 1.5606, "step": 773640 }, { "epoch": 66.74396135265701, "grad_norm": 0.6076523661613464, "learning_rate": 0.0001, "loss": 1.5655, "step": 773696 }, { "epoch": 66.7487922705314, "grad_norm": 0.23039893805980682, "learning_rate": 0.0001, "loss": 1.5575, "step": 773752 }, { "epoch": 66.7536231884058, "grad_norm": 0.3131767511367798, "learning_rate": 0.0001, "loss": 1.5688, "step": 773808 }, { "epoch": 66.7584541062802, "grad_norm": 0.2764017879962921, "learning_rate": 0.0001, "loss": 1.5656, "step": 773864 }, { "epoch": 66.76328502415458, "grad_norm": 3.2011115550994873, "learning_rate": 0.0001, "loss": 1.5591, "step": 773920 }, { "epoch": 66.76811594202898, "grad_norm": 0.22888420522212982, "learning_rate": 0.0001, "loss": 1.5656, "step": 773976 }, { "epoch": 66.77294685990339, "grad_norm": 0.42215481400489807, "learning_rate": 0.0001, "loss": 1.5675, "step": 774032 }, { "epoch": 66.77777777777777, "grad_norm": 0.3258195221424103, "learning_rate": 0.0001, "loss": 1.5646, "step": 774088 }, { "epoch": 66.78260869565217, "grad_norm": 0.3301403820514679, "learning_rate": 0.0001, "loss": 1.5641, "step": 774144 }, { "epoch": 66.78743961352657, "grad_norm": 0.34604698419570923, "learning_rate": 0.0001, "loss": 1.5636, "step": 774200 }, { "epoch": 66.79227053140096, "grad_norm": 0.4341176450252533, "learning_rate": 0.0001, "loss": 1.5653, "step": 774256 }, { "epoch": 66.79710144927536, "grad_norm": 0.3203357756137848, "learning_rate": 0.0001, "loss": 1.5704, "step": 774312 }, { "epoch": 66.80193236714976, "grad_norm": 1.390061378479004, "learning_rate": 0.0001, "loss": 1.5629, "step": 774368 }, { "epoch": 66.80676328502416, "grad_norm": 8.754069328308105, "learning_rate": 0.0001, "loss": 1.5578, "step": 774424 }, { "epoch": 66.81159420289855, "grad_norm": 0.7871268391609192, "learning_rate": 0.0001, "loss": 1.5694, "step": 774480 }, { "epoch": 66.81642512077295, "grad_norm": 0.3393017649650574, "learning_rate": 0.0001, "loss": 1.5665, "step": 774536 }, { "epoch": 66.82125603864735, "grad_norm": 0.27746349573135376, "learning_rate": 0.0001, "loss": 1.572, "step": 774592 }, { "epoch": 66.82608695652173, "grad_norm": 0.4757489860057831, "learning_rate": 0.0001, "loss": 1.5608, "step": 774648 }, { "epoch": 66.83091787439614, "grad_norm": 0.32391443848609924, "learning_rate": 0.0001, "loss": 1.563, "step": 774704 }, { "epoch": 66.83574879227054, "grad_norm": 0.2934685945510864, "learning_rate": 0.0001, "loss": 1.5738, "step": 774760 }, { "epoch": 66.84057971014492, "grad_norm": 2.0113978385925293, "learning_rate": 0.0001, "loss": 1.5702, "step": 774816 }, { "epoch": 66.84541062801932, "grad_norm": 0.24427548050880432, "learning_rate": 0.0001, "loss": 1.5644, "step": 774872 }, { "epoch": 66.85024154589372, "grad_norm": 0.2432553470134735, "learning_rate": 0.0001, "loss": 1.5738, "step": 774928 }, { "epoch": 66.85507246376811, "grad_norm": 0.24999961256980896, "learning_rate": 0.0001, "loss": 1.5692, "step": 774984 }, { "epoch": 66.85990338164251, "grad_norm": 0.2504385709762573, "learning_rate": 0.0001, "loss": 1.5676, "step": 775040 }, { "epoch": 66.86473429951691, "grad_norm": 0.3970105051994324, "learning_rate": 0.0001, "loss": 1.5643, "step": 775096 }, { "epoch": 66.8695652173913, "grad_norm": 0.2495032548904419, "learning_rate": 0.0001, "loss": 1.5683, "step": 775152 }, { "epoch": 66.8743961352657, "grad_norm": 0.2822336256504059, "learning_rate": 0.0001, "loss": 1.5709, "step": 775208 }, { "epoch": 66.8792270531401, "grad_norm": 0.6935181617736816, "learning_rate": 0.0001, "loss": 1.5624, "step": 775264 }, { "epoch": 66.8840579710145, "grad_norm": 0.25061461329460144, "learning_rate": 0.0001, "loss": 1.5685, "step": 775320 }, { "epoch": 66.88888888888889, "grad_norm": 0.31532782316207886, "learning_rate": 0.0001, "loss": 1.573, "step": 775376 }, { "epoch": 66.89371980676329, "grad_norm": 0.25265154242515564, "learning_rate": 0.0001, "loss": 1.5675, "step": 775432 }, { "epoch": 66.89855072463769, "grad_norm": 0.3132772445678711, "learning_rate": 0.0001, "loss": 1.5631, "step": 775488 }, { "epoch": 66.90338164251207, "grad_norm": 0.444654643535614, "learning_rate": 0.0001, "loss": 1.5653, "step": 775544 }, { "epoch": 66.90821256038647, "grad_norm": 3.0097455978393555, "learning_rate": 0.0001, "loss": 1.5732, "step": 775600 }, { "epoch": 66.91304347826087, "grad_norm": 1.6191519498825073, "learning_rate": 0.0001, "loss": 1.5687, "step": 775656 }, { "epoch": 66.91787439613526, "grad_norm": 0.3094693720340729, "learning_rate": 0.0001, "loss": 1.5653, "step": 775712 }, { "epoch": 66.92270531400966, "grad_norm": 6.22520112991333, "learning_rate": 0.0001, "loss": 1.565, "step": 775768 }, { "epoch": 66.92753623188406, "grad_norm": 0.26441413164138794, "learning_rate": 0.0001, "loss": 1.5635, "step": 775824 }, { "epoch": 66.93236714975845, "grad_norm": 0.2803055942058563, "learning_rate": 0.0001, "loss": 1.5592, "step": 775880 }, { "epoch": 66.93719806763285, "grad_norm": 0.276589959859848, "learning_rate": 0.0001, "loss": 1.5644, "step": 775936 }, { "epoch": 66.94202898550725, "grad_norm": 0.3354303538799286, "learning_rate": 0.0001, "loss": 1.5683, "step": 775992 }, { "epoch": 66.94685990338164, "grad_norm": 0.2568884491920471, "learning_rate": 0.0001, "loss": 1.5631, "step": 776048 }, { "epoch": 66.95169082125604, "grad_norm": 0.3716515600681305, "learning_rate": 0.0001, "loss": 1.5599, "step": 776104 }, { "epoch": 66.95652173913044, "grad_norm": 0.3209345042705536, "learning_rate": 0.0001, "loss": 1.5591, "step": 776160 }, { "epoch": 66.96135265700484, "grad_norm": 0.3166658580303192, "learning_rate": 0.0001, "loss": 1.5638, "step": 776216 }, { "epoch": 66.96618357487922, "grad_norm": 0.3459225594997406, "learning_rate": 0.0001, "loss": 1.5618, "step": 776272 }, { "epoch": 66.97101449275362, "grad_norm": 0.6360331177711487, "learning_rate": 0.0001, "loss": 1.566, "step": 776328 }, { "epoch": 66.97584541062803, "grad_norm": 0.411372572183609, "learning_rate": 0.0001, "loss": 1.5693, "step": 776384 }, { "epoch": 66.98067632850241, "grad_norm": 0.37370941042900085, "learning_rate": 0.0001, "loss": 1.5662, "step": 776440 }, { "epoch": 66.98550724637681, "grad_norm": 0.6281160712242126, "learning_rate": 0.0001, "loss": 1.5631, "step": 776496 }, { "epoch": 66.99033816425121, "grad_norm": 0.7079773545265198, "learning_rate": 0.0001, "loss": 1.5701, "step": 776552 }, { "epoch": 66.9951690821256, "grad_norm": 0.3305188715457916, "learning_rate": 0.0001, "loss": 1.5656, "step": 776608 }, { "epoch": 67.0, "grad_norm": 0.29210224747657776, "learning_rate": 0.0001, "loss": 1.5687, "step": 776664 }, { "epoch": 67.0048309178744, "grad_norm": 0.30581042170524597, "learning_rate": 0.0001, "loss": 1.5549, "step": 776720 }, { "epoch": 67.00966183574879, "grad_norm": 0.2627394199371338, "learning_rate": 0.0001, "loss": 1.556, "step": 776776 }, { "epoch": 67.01449275362319, "grad_norm": 0.40947309136390686, "learning_rate": 0.0001, "loss": 1.5584, "step": 776832 }, { "epoch": 67.01932367149759, "grad_norm": 0.35372576117515564, "learning_rate": 0.0001, "loss": 1.561, "step": 776888 }, { "epoch": 67.02415458937197, "grad_norm": 0.4217323362827301, "learning_rate": 0.0001, "loss": 1.5574, "step": 776944 }, { "epoch": 67.02898550724638, "grad_norm": 0.44810840487480164, "learning_rate": 0.0001, "loss": 1.5593, "step": 777000 }, { "epoch": 67.03381642512078, "grad_norm": 0.3228624761104584, "learning_rate": 0.0001, "loss": 1.5561, "step": 777056 }, { "epoch": 67.03864734299516, "grad_norm": 0.29724839329719543, "learning_rate": 0.0001, "loss": 1.557, "step": 777112 }, { "epoch": 67.04347826086956, "grad_norm": 0.2530393898487091, "learning_rate": 0.0001, "loss": 1.5578, "step": 777168 }, { "epoch": 67.04830917874396, "grad_norm": 0.24618010222911835, "learning_rate": 0.0001, "loss": 1.5585, "step": 777224 }, { "epoch": 67.05314009661836, "grad_norm": 0.2444380521774292, "learning_rate": 0.0001, "loss": 1.558, "step": 777280 }, { "epoch": 67.05797101449275, "grad_norm": 0.5522968769073486, "learning_rate": 0.0001, "loss": 1.564, "step": 777336 }, { "epoch": 67.06280193236715, "grad_norm": 0.3003110885620117, "learning_rate": 0.0001, "loss": 1.5601, "step": 777392 }, { "epoch": 67.06763285024155, "grad_norm": 1.5124810934066772, "learning_rate": 0.0001, "loss": 1.5571, "step": 777448 }, { "epoch": 67.07246376811594, "grad_norm": 2.125720739364624, "learning_rate": 0.0001, "loss": 1.5553, "step": 777504 }, { "epoch": 67.07729468599034, "grad_norm": 0.31976374983787537, "learning_rate": 0.0001, "loss": 1.5619, "step": 777560 }, { "epoch": 67.08212560386474, "grad_norm": 0.4126104712486267, "learning_rate": 0.0001, "loss": 1.5591, "step": 777616 }, { "epoch": 67.08695652173913, "grad_norm": 0.24778181314468384, "learning_rate": 0.0001, "loss": 1.552, "step": 777672 }, { "epoch": 67.09178743961353, "grad_norm": 0.27900707721710205, "learning_rate": 0.0001, "loss": 1.5493, "step": 777728 }, { "epoch": 67.09661835748793, "grad_norm": 16.071624755859375, "learning_rate": 0.0001, "loss": 1.5611, "step": 777784 }, { "epoch": 67.10144927536231, "grad_norm": 0.3376804292201996, "learning_rate": 0.0001, "loss": 1.555, "step": 777840 }, { "epoch": 67.10628019323671, "grad_norm": 0.30923476815223694, "learning_rate": 0.0001, "loss": 1.5598, "step": 777896 }, { "epoch": 67.11111111111111, "grad_norm": 1.5166842937469482, "learning_rate": 0.0001, "loss": 1.5573, "step": 777952 }, { "epoch": 67.1159420289855, "grad_norm": 0.25632116198539734, "learning_rate": 0.0001, "loss": 1.5591, "step": 778008 }, { "epoch": 67.1207729468599, "grad_norm": 0.243098646402359, "learning_rate": 0.0001, "loss": 1.5522, "step": 778064 }, { "epoch": 67.1256038647343, "grad_norm": 0.27407947182655334, "learning_rate": 0.0001, "loss": 1.5547, "step": 778120 }, { "epoch": 67.1304347826087, "grad_norm": 0.3376550078392029, "learning_rate": 0.0001, "loss": 1.5584, "step": 778176 }, { "epoch": 67.13526570048309, "grad_norm": 0.29460954666137695, "learning_rate": 0.0001, "loss": 1.5618, "step": 778232 }, { "epoch": 67.14009661835749, "grad_norm": 0.36293184757232666, "learning_rate": 0.0001, "loss": 1.5549, "step": 778288 }, { "epoch": 67.14492753623189, "grad_norm": 0.30952152609825134, "learning_rate": 0.0001, "loss": 1.5586, "step": 778344 }, { "epoch": 67.14975845410628, "grad_norm": 0.2550649642944336, "learning_rate": 0.0001, "loss": 1.565, "step": 778400 }, { "epoch": 67.15458937198068, "grad_norm": 0.3760794997215271, "learning_rate": 0.0001, "loss": 1.5529, "step": 778456 }, { "epoch": 67.15942028985508, "grad_norm": 0.47641903162002563, "learning_rate": 0.0001, "loss": 1.5538, "step": 778512 }, { "epoch": 67.16425120772946, "grad_norm": 1.2024073600769043, "learning_rate": 0.0001, "loss": 1.5537, "step": 778568 }, { "epoch": 67.16908212560386, "grad_norm": 0.34184807538986206, "learning_rate": 0.0001, "loss": 1.5608, "step": 778624 }, { "epoch": 67.17391304347827, "grad_norm": 0.3431274890899658, "learning_rate": 0.0001, "loss": 1.5603, "step": 778680 }, { "epoch": 67.17874396135265, "grad_norm": 0.3198308050632477, "learning_rate": 0.0001, "loss": 1.5594, "step": 778736 }, { "epoch": 67.18357487922705, "grad_norm": 0.3809530436992645, "learning_rate": 0.0001, "loss": 1.5606, "step": 778792 }, { "epoch": 67.18840579710145, "grad_norm": 6.364351749420166, "learning_rate": 0.0001, "loss": 1.5611, "step": 778848 }, { "epoch": 67.19323671497584, "grad_norm": 0.8670615553855896, "learning_rate": 0.0001, "loss": 1.5608, "step": 778904 }, { "epoch": 67.19806763285024, "grad_norm": 10.748159408569336, "learning_rate": 0.0001, "loss": 1.5603, "step": 778960 }, { "epoch": 67.20289855072464, "grad_norm": 0.5010509490966797, "learning_rate": 0.0001, "loss": 1.5561, "step": 779016 }, { "epoch": 67.20772946859903, "grad_norm": 0.31256285309791565, "learning_rate": 0.0001, "loss": 1.5668, "step": 779072 }, { "epoch": 67.21256038647343, "grad_norm": 0.3588590919971466, "learning_rate": 0.0001, "loss": 1.5588, "step": 779128 }, { "epoch": 67.21739130434783, "grad_norm": 1.5610178709030151, "learning_rate": 0.0001, "loss": 1.5623, "step": 779184 }, { "epoch": 67.22222222222223, "grad_norm": 0.48424220085144043, "learning_rate": 0.0001, "loss": 1.5544, "step": 779240 }, { "epoch": 67.22705314009661, "grad_norm": 0.7582911849021912, "learning_rate": 0.0001, "loss": 1.56, "step": 779296 }, { "epoch": 67.23188405797102, "grad_norm": 0.6182879209518433, "learning_rate": 0.0001, "loss": 1.557, "step": 779352 }, { "epoch": 67.23671497584542, "grad_norm": 5.505654811859131, "learning_rate": 0.0001, "loss": 1.5598, "step": 779408 }, { "epoch": 67.2415458937198, "grad_norm": 0.27350637316703796, "learning_rate": 0.0001, "loss": 1.5648, "step": 779464 }, { "epoch": 67.2463768115942, "grad_norm": 0.23837755620479584, "learning_rate": 0.0001, "loss": 1.5586, "step": 779520 }, { "epoch": 67.2512077294686, "grad_norm": 0.2539164125919342, "learning_rate": 0.0001, "loss": 1.5627, "step": 779576 }, { "epoch": 67.25603864734299, "grad_norm": 2.3915157318115234, "learning_rate": 0.0001, "loss": 1.5604, "step": 779632 }, { "epoch": 67.26086956521739, "grad_norm": 0.4453781545162201, "learning_rate": 0.0001, "loss": 1.5592, "step": 779688 }, { "epoch": 67.26570048309179, "grad_norm": 0.36298999190330505, "learning_rate": 0.0001, "loss": 1.5579, "step": 779744 }, { "epoch": 67.27053140096618, "grad_norm": 0.2785531282424927, "learning_rate": 0.0001, "loss": 1.554, "step": 779800 }, { "epoch": 67.27536231884058, "grad_norm": 0.3996225893497467, "learning_rate": 0.0001, "loss": 1.5583, "step": 779856 }, { "epoch": 67.28019323671498, "grad_norm": 0.2722395360469818, "learning_rate": 0.0001, "loss": 1.5544, "step": 779912 }, { "epoch": 67.28502415458937, "grad_norm": 0.2511376142501831, "learning_rate": 0.0001, "loss": 1.5579, "step": 779968 }, { "epoch": 67.28985507246377, "grad_norm": 0.825631856918335, "learning_rate": 0.0001, "loss": 1.567, "step": 780024 }, { "epoch": 67.29468599033817, "grad_norm": 0.2788703441619873, "learning_rate": 0.0001, "loss": 1.5572, "step": 780080 }, { "epoch": 67.29951690821257, "grad_norm": 0.7371350526809692, "learning_rate": 0.0001, "loss": 1.5658, "step": 780136 }, { "epoch": 67.30434782608695, "grad_norm": 0.2886565327644348, "learning_rate": 0.0001, "loss": 1.561, "step": 780192 }, { "epoch": 67.30917874396135, "grad_norm": 0.4417482316493988, "learning_rate": 0.0001, "loss": 1.5631, "step": 780248 }, { "epoch": 67.31400966183575, "grad_norm": 1.0383872985839844, "learning_rate": 0.0001, "loss": 1.5607, "step": 780304 }, { "epoch": 67.31884057971014, "grad_norm": 0.25415676832199097, "learning_rate": 0.0001, "loss": 1.5573, "step": 780360 }, { "epoch": 67.32367149758454, "grad_norm": 0.3387380540370941, "learning_rate": 0.0001, "loss": 1.5595, "step": 780416 }, { "epoch": 67.32850241545894, "grad_norm": 1.3380576372146606, "learning_rate": 0.0001, "loss": 1.5567, "step": 780472 }, { "epoch": 67.33333333333333, "grad_norm": 0.36171653866767883, "learning_rate": 0.0001, "loss": 1.5555, "step": 780528 }, { "epoch": 67.33816425120773, "grad_norm": 0.3426547646522522, "learning_rate": 0.0001, "loss": 1.5578, "step": 780584 }, { "epoch": 67.34299516908213, "grad_norm": 0.34136831760406494, "learning_rate": 0.0001, "loss": 1.5545, "step": 780640 }, { "epoch": 67.34782608695652, "grad_norm": 0.3165171444416046, "learning_rate": 0.0001, "loss": 1.5621, "step": 780696 }, { "epoch": 67.35265700483092, "grad_norm": 0.4817773103713989, "learning_rate": 0.0001, "loss": 1.5602, "step": 780752 }, { "epoch": 67.35748792270532, "grad_norm": 0.9132673740386963, "learning_rate": 0.0001, "loss": 1.5575, "step": 780808 }, { "epoch": 67.3623188405797, "grad_norm": 0.9060377478599548, "learning_rate": 0.0001, "loss": 1.5574, "step": 780864 }, { "epoch": 67.3671497584541, "grad_norm": 0.7518261671066284, "learning_rate": 0.0001, "loss": 1.5655, "step": 780920 }, { "epoch": 67.3719806763285, "grad_norm": 0.6328599452972412, "learning_rate": 0.0001, "loss": 1.5587, "step": 780976 }, { "epoch": 67.3768115942029, "grad_norm": 0.30402642488479614, "learning_rate": 0.0001, "loss": 1.5508, "step": 781032 }, { "epoch": 67.38164251207729, "grad_norm": 0.31505194306373596, "learning_rate": 0.0001, "loss": 1.5632, "step": 781088 }, { "epoch": 67.38647342995169, "grad_norm": 0.4347954988479614, "learning_rate": 0.0001, "loss": 1.562, "step": 781144 }, { "epoch": 67.3913043478261, "grad_norm": 0.25671446323394775, "learning_rate": 0.0001, "loss": 1.5595, "step": 781200 }, { "epoch": 67.39613526570048, "grad_norm": 0.8326221108436584, "learning_rate": 0.0001, "loss": 1.5573, "step": 781256 }, { "epoch": 67.40096618357488, "grad_norm": 0.23811033368110657, "learning_rate": 0.0001, "loss": 1.5617, "step": 781312 }, { "epoch": 67.40579710144928, "grad_norm": 0.7216591238975525, "learning_rate": 0.0001, "loss": 1.5605, "step": 781368 }, { "epoch": 67.41062801932367, "grad_norm": 0.35203438997268677, "learning_rate": 0.0001, "loss": 1.5587, "step": 781424 }, { "epoch": 67.41545893719807, "grad_norm": 2.506413698196411, "learning_rate": 0.0001, "loss": 1.5567, "step": 781480 }, { "epoch": 67.42028985507247, "grad_norm": 0.2816193997859955, "learning_rate": 0.0001, "loss": 1.5609, "step": 781536 }, { "epoch": 67.42512077294685, "grad_norm": 0.32644417881965637, "learning_rate": 0.0001, "loss": 1.552, "step": 781592 }, { "epoch": 67.42995169082126, "grad_norm": 1.9828438758850098, "learning_rate": 0.0001, "loss": 1.5612, "step": 781648 }, { "epoch": 67.43478260869566, "grad_norm": 0.3002889156341553, "learning_rate": 0.0001, "loss": 1.5576, "step": 781704 }, { "epoch": 67.43961352657004, "grad_norm": 0.3506823182106018, "learning_rate": 0.0001, "loss": 1.5557, "step": 781760 }, { "epoch": 67.44444444444444, "grad_norm": 0.26033779978752136, "learning_rate": 0.0001, "loss": 1.564, "step": 781816 }, { "epoch": 67.44927536231884, "grad_norm": 0.5046219229698181, "learning_rate": 0.0001, "loss": 1.5623, "step": 781872 }, { "epoch": 67.45410628019323, "grad_norm": 0.30137303471565247, "learning_rate": 0.0001, "loss": 1.5629, "step": 781928 }, { "epoch": 67.45893719806763, "grad_norm": 0.8527369499206543, "learning_rate": 0.0001, "loss": 1.5639, "step": 781984 }, { "epoch": 67.46376811594203, "grad_norm": 2.0265250205993652, "learning_rate": 0.0001, "loss": 1.5626, "step": 782040 }, { "epoch": 67.46859903381643, "grad_norm": 0.352023720741272, "learning_rate": 0.0001, "loss": 1.5665, "step": 782096 }, { "epoch": 67.47342995169082, "grad_norm": 0.28390976786613464, "learning_rate": 0.0001, "loss": 1.5644, "step": 782152 }, { "epoch": 67.47826086956522, "grad_norm": 2.6231868267059326, "learning_rate": 0.0001, "loss": 1.5561, "step": 782208 }, { "epoch": 67.48309178743962, "grad_norm": 2.0206055641174316, "learning_rate": 0.0001, "loss": 1.5629, "step": 782264 }, { "epoch": 67.487922705314, "grad_norm": 1.6541680097579956, "learning_rate": 0.0001, "loss": 1.5622, "step": 782320 }, { "epoch": 67.4927536231884, "grad_norm": 0.23876070976257324, "learning_rate": 0.0001, "loss": 1.5576, "step": 782376 }, { "epoch": 67.4975845410628, "grad_norm": 0.45835989713668823, "learning_rate": 0.0001, "loss": 1.5665, "step": 782432 }, { "epoch": 67.5024154589372, "grad_norm": 0.26139646768569946, "learning_rate": 0.0001, "loss": 1.5638, "step": 782488 }, { "epoch": 67.5072463768116, "grad_norm": 0.8322675824165344, "learning_rate": 0.0001, "loss": 1.5615, "step": 782544 }, { "epoch": 67.512077294686, "grad_norm": 0.548595666885376, "learning_rate": 0.0001, "loss": 1.5599, "step": 782600 }, { "epoch": 67.51690821256038, "grad_norm": 0.33043262362480164, "learning_rate": 0.0001, "loss": 1.5601, "step": 782656 }, { "epoch": 67.52173913043478, "grad_norm": 0.9363154768943787, "learning_rate": 0.0001, "loss": 1.562, "step": 782712 }, { "epoch": 67.52657004830918, "grad_norm": 0.5105632543563843, "learning_rate": 0.0001, "loss": 1.564, "step": 782768 }, { "epoch": 67.53140096618357, "grad_norm": 0.26477935910224915, "learning_rate": 0.0001, "loss": 1.5531, "step": 782824 }, { "epoch": 67.53623188405797, "grad_norm": 0.2895054221153259, "learning_rate": 0.0001, "loss": 1.5583, "step": 782880 }, { "epoch": 67.54106280193237, "grad_norm": 0.24038605391979218, "learning_rate": 0.0001, "loss": 1.564, "step": 782936 }, { "epoch": 67.54589371980677, "grad_norm": 0.3095158636569977, "learning_rate": 0.0001, "loss": 1.5605, "step": 782992 }, { "epoch": 67.55072463768116, "grad_norm": 1.2643038034439087, "learning_rate": 0.0001, "loss": 1.5586, "step": 783048 }, { "epoch": 67.55555555555556, "grad_norm": 3.2022714614868164, "learning_rate": 0.0001, "loss": 1.57, "step": 783104 }, { "epoch": 67.56038647342996, "grad_norm": 0.31175366044044495, "learning_rate": 0.0001, "loss": 1.5686, "step": 783160 }, { "epoch": 67.56521739130434, "grad_norm": 1.680616855621338, "learning_rate": 0.0001, "loss": 1.5582, "step": 783216 }, { "epoch": 67.57004830917874, "grad_norm": 0.30984535813331604, "learning_rate": 0.0001, "loss": 1.5642, "step": 783272 }, { "epoch": 67.57487922705315, "grad_norm": 3.308011770248413, "learning_rate": 0.0001, "loss": 1.5502, "step": 783328 }, { "epoch": 67.57971014492753, "grad_norm": 0.7993625402450562, "learning_rate": 0.0001, "loss": 1.5619, "step": 783384 }, { "epoch": 67.58454106280193, "grad_norm": 0.4262431859970093, "learning_rate": 0.0001, "loss": 1.5594, "step": 783440 }, { "epoch": 67.58937198067633, "grad_norm": 0.4762856364250183, "learning_rate": 0.0001, "loss": 1.5652, "step": 783496 }, { "epoch": 67.59420289855072, "grad_norm": 0.29757317900657654, "learning_rate": 0.0001, "loss": 1.5665, "step": 783552 }, { "epoch": 67.59903381642512, "grad_norm": 0.424176961183548, "learning_rate": 0.0001, "loss": 1.5617, "step": 783608 }, { "epoch": 67.60386473429952, "grad_norm": 3.3731420040130615, "learning_rate": 0.0001, "loss": 1.5641, "step": 783664 }, { "epoch": 67.6086956521739, "grad_norm": 0.37526920437812805, "learning_rate": 0.0001, "loss": 1.558, "step": 783720 }, { "epoch": 67.61352657004831, "grad_norm": 0.30215010046958923, "learning_rate": 0.0001, "loss": 1.5642, "step": 783776 }, { "epoch": 67.61835748792271, "grad_norm": 0.7338246703147888, "learning_rate": 0.0001, "loss": 1.5576, "step": 783832 }, { "epoch": 67.6231884057971, "grad_norm": 0.22710512578487396, "learning_rate": 0.0001, "loss": 1.5535, "step": 783888 }, { "epoch": 67.6280193236715, "grad_norm": 1.1746764183044434, "learning_rate": 0.0001, "loss": 1.5622, "step": 783944 }, { "epoch": 67.6328502415459, "grad_norm": 0.38129574060440063, "learning_rate": 0.0001, "loss": 1.558, "step": 784000 }, { "epoch": 67.6376811594203, "grad_norm": 0.35181260108947754, "learning_rate": 0.0001, "loss": 1.5676, "step": 784056 }, { "epoch": 67.64251207729468, "grad_norm": 0.24725821614265442, "learning_rate": 0.0001, "loss": 1.5604, "step": 784112 }, { "epoch": 67.64734299516908, "grad_norm": 0.2717941701412201, "learning_rate": 0.0001, "loss": 1.5697, "step": 784168 }, { "epoch": 67.65217391304348, "grad_norm": 0.6320924162864685, "learning_rate": 0.0001, "loss": 1.5644, "step": 784224 }, { "epoch": 67.65700483091787, "grad_norm": 0.2541438043117523, "learning_rate": 0.0001, "loss": 1.5653, "step": 784280 }, { "epoch": 67.66183574879227, "grad_norm": 0.25502267479896545, "learning_rate": 0.0001, "loss": 1.5619, "step": 784336 }, { "epoch": 67.66666666666667, "grad_norm": 0.31662148237228394, "learning_rate": 0.0001, "loss": 1.5608, "step": 784392 }, { "epoch": 67.67149758454106, "grad_norm": 0.6432024836540222, "learning_rate": 0.0001, "loss": 1.5611, "step": 784448 }, { "epoch": 67.67632850241546, "grad_norm": 2.1575911045074463, "learning_rate": 0.0001, "loss": 1.5643, "step": 784504 }, { "epoch": 67.68115942028986, "grad_norm": 6.063177108764648, "learning_rate": 0.0001, "loss": 1.5647, "step": 784560 }, { "epoch": 67.68599033816425, "grad_norm": 0.5237758159637451, "learning_rate": 0.0001, "loss": 1.5584, "step": 784616 }, { "epoch": 67.69082125603865, "grad_norm": 0.34024277329444885, "learning_rate": 0.0001, "loss": 1.5654, "step": 784672 }, { "epoch": 67.69565217391305, "grad_norm": 0.2996928095817566, "learning_rate": 0.0001, "loss": 1.5663, "step": 784728 }, { "epoch": 67.70048309178743, "grad_norm": 0.39986443519592285, "learning_rate": 0.0001, "loss": 1.5572, "step": 784784 }, { "epoch": 67.70531400966183, "grad_norm": 0.4187985956668854, "learning_rate": 0.0001, "loss": 1.5643, "step": 784840 }, { "epoch": 67.71014492753623, "grad_norm": 0.2995889186859131, "learning_rate": 0.0001, "loss": 1.5631, "step": 784896 }, { "epoch": 67.71497584541063, "grad_norm": 0.29746973514556885, "learning_rate": 0.0001, "loss": 1.5573, "step": 784952 }, { "epoch": 67.71980676328502, "grad_norm": 0.937144935131073, "learning_rate": 0.0001, "loss": 1.5667, "step": 785008 }, { "epoch": 67.72463768115942, "grad_norm": 0.22879347205162048, "learning_rate": 0.0001, "loss": 1.554, "step": 785064 }, { "epoch": 67.72946859903382, "grad_norm": 0.3096133768558502, "learning_rate": 0.0001, "loss": 1.5568, "step": 785120 }, { "epoch": 67.73429951690821, "grad_norm": 0.27728596329689026, "learning_rate": 0.0001, "loss": 1.5588, "step": 785176 }, { "epoch": 67.73913043478261, "grad_norm": 4.729928970336914, "learning_rate": 0.0001, "loss": 1.5591, "step": 785232 }, { "epoch": 67.74396135265701, "grad_norm": 0.27011290192604065, "learning_rate": 0.0001, "loss": 1.5675, "step": 785288 }, { "epoch": 67.7487922705314, "grad_norm": 0.3967490792274475, "learning_rate": 0.0001, "loss": 1.5596, "step": 785344 }, { "epoch": 67.7536231884058, "grad_norm": 0.25681933760643005, "learning_rate": 0.0001, "loss": 1.562, "step": 785400 }, { "epoch": 67.7584541062802, "grad_norm": 18.304765701293945, "learning_rate": 0.0001, "loss": 1.5646, "step": 785456 }, { "epoch": 67.76328502415458, "grad_norm": 0.4689170718193054, "learning_rate": 0.0001, "loss": 1.5681, "step": 785512 }, { "epoch": 67.76811594202898, "grad_norm": 0.5522106885910034, "learning_rate": 0.0001, "loss": 1.5649, "step": 785568 }, { "epoch": 67.77294685990339, "grad_norm": 0.27110788226127625, "learning_rate": 0.0001, "loss": 1.5578, "step": 785624 }, { "epoch": 67.77777777777777, "grad_norm": 0.3414964973926544, "learning_rate": 0.0001, "loss": 1.5667, "step": 785680 }, { "epoch": 67.78260869565217, "grad_norm": 0.38626572489738464, "learning_rate": 0.0001, "loss": 1.5698, "step": 785736 }, { "epoch": 67.78743961352657, "grad_norm": 0.28024497628211975, "learning_rate": 0.0001, "loss": 1.5637, "step": 785792 }, { "epoch": 67.79227053140096, "grad_norm": 0.2856641411781311, "learning_rate": 0.0001, "loss": 1.5669, "step": 785848 }, { "epoch": 67.79710144927536, "grad_norm": 0.5061808824539185, "learning_rate": 0.0001, "loss": 1.5643, "step": 785904 }, { "epoch": 67.80193236714976, "grad_norm": 0.2820097506046295, "learning_rate": 0.0001, "loss": 1.5585, "step": 785960 }, { "epoch": 67.80676328502416, "grad_norm": 0.3176916539669037, "learning_rate": 0.0001, "loss": 1.5704, "step": 786016 }, { "epoch": 67.81159420289855, "grad_norm": 0.3941981792449951, "learning_rate": 0.0001, "loss": 1.564, "step": 786072 }, { "epoch": 67.81642512077295, "grad_norm": 0.30960604548454285, "learning_rate": 0.0001, "loss": 1.5577, "step": 786128 }, { "epoch": 67.82125603864735, "grad_norm": 0.24033129215240479, "learning_rate": 0.0001, "loss": 1.5664, "step": 786184 }, { "epoch": 67.82608695652173, "grad_norm": 0.2878323197364807, "learning_rate": 0.0001, "loss": 1.564, "step": 786240 }, { "epoch": 67.83091787439614, "grad_norm": 0.34883037209510803, "learning_rate": 0.0001, "loss": 1.5665, "step": 786296 }, { "epoch": 67.83574879227054, "grad_norm": 0.26839205622673035, "learning_rate": 0.0001, "loss": 1.565, "step": 786352 }, { "epoch": 67.84057971014492, "grad_norm": 0.2756737470626831, "learning_rate": 0.0001, "loss": 1.5666, "step": 786408 }, { "epoch": 67.84541062801932, "grad_norm": 0.5124763250350952, "learning_rate": 0.0001, "loss": 1.57, "step": 786464 }, { "epoch": 67.85024154589372, "grad_norm": 0.2994832396507263, "learning_rate": 0.0001, "loss": 1.5635, "step": 786520 }, { "epoch": 67.85507246376811, "grad_norm": 0.3569243550300598, "learning_rate": 0.0001, "loss": 1.5647, "step": 786576 }, { "epoch": 67.85990338164251, "grad_norm": 0.27064549922943115, "learning_rate": 0.0001, "loss": 1.5595, "step": 786632 }, { "epoch": 67.86473429951691, "grad_norm": 0.4939839243888855, "learning_rate": 0.0001, "loss": 1.5576, "step": 786688 }, { "epoch": 67.8695652173913, "grad_norm": 0.29873520135879517, "learning_rate": 0.0001, "loss": 1.5622, "step": 786744 }, { "epoch": 67.8743961352657, "grad_norm": 0.3261673152446747, "learning_rate": 0.0001, "loss": 1.567, "step": 786800 }, { "epoch": 67.8792270531401, "grad_norm": 0.2753426134586334, "learning_rate": 0.0001, "loss": 1.569, "step": 786856 }, { "epoch": 67.8840579710145, "grad_norm": 0.2460440695285797, "learning_rate": 0.0001, "loss": 1.5588, "step": 786912 }, { "epoch": 67.88888888888889, "grad_norm": 0.5286543369293213, "learning_rate": 0.0001, "loss": 1.568, "step": 786968 }, { "epoch": 67.89371980676329, "grad_norm": 0.2882510721683502, "learning_rate": 0.0001, "loss": 1.561, "step": 787024 }, { "epoch": 67.89855072463769, "grad_norm": 0.7511845231056213, "learning_rate": 0.0001, "loss": 1.5638, "step": 787080 }, { "epoch": 67.90338164251207, "grad_norm": 1.013218879699707, "learning_rate": 0.0001, "loss": 1.558, "step": 787136 }, { "epoch": 67.90821256038647, "grad_norm": 0.2383406162261963, "learning_rate": 0.0001, "loss": 1.5579, "step": 787192 }, { "epoch": 67.91304347826087, "grad_norm": 0.3620454967021942, "learning_rate": 0.0001, "loss": 1.5608, "step": 787248 }, { "epoch": 67.91787439613526, "grad_norm": 4.983066082000732, "learning_rate": 0.0001, "loss": 1.563, "step": 787304 }, { "epoch": 67.92270531400966, "grad_norm": 0.35393026471138, "learning_rate": 0.0001, "loss": 1.5636, "step": 787360 }, { "epoch": 67.92753623188406, "grad_norm": 0.2764950394630432, "learning_rate": 0.0001, "loss": 1.5652, "step": 787416 }, { "epoch": 67.93236714975845, "grad_norm": 0.30593541264533997, "learning_rate": 0.0001, "loss": 1.5622, "step": 787472 }, { "epoch": 67.93719806763285, "grad_norm": 1.9493488073349, "learning_rate": 0.0001, "loss": 1.5613, "step": 787528 }, { "epoch": 67.94202898550725, "grad_norm": 0.31284964084625244, "learning_rate": 0.0001, "loss": 1.5635, "step": 787584 }, { "epoch": 67.94685990338164, "grad_norm": 0.2664720118045807, "learning_rate": 0.0001, "loss": 1.5668, "step": 787640 }, { "epoch": 67.95169082125604, "grad_norm": 0.28953900933265686, "learning_rate": 0.0001, "loss": 1.5653, "step": 787696 }, { "epoch": 67.95652173913044, "grad_norm": 0.3318665623664856, "learning_rate": 0.0001, "loss": 1.563, "step": 787752 }, { "epoch": 67.96135265700484, "grad_norm": 0.2583514451980591, "learning_rate": 0.0001, "loss": 1.5665, "step": 787808 }, { "epoch": 67.96618357487922, "grad_norm": 1.5513015985488892, "learning_rate": 0.0001, "loss": 1.5572, "step": 787864 }, { "epoch": 67.97101449275362, "grad_norm": 0.3164113461971283, "learning_rate": 0.0001, "loss": 1.5649, "step": 787920 }, { "epoch": 67.97584541062803, "grad_norm": 0.3410484194755554, "learning_rate": 0.0001, "loss": 1.5534, "step": 787976 }, { "epoch": 67.98067632850241, "grad_norm": 0.79530268907547, "learning_rate": 0.0001, "loss": 1.5699, "step": 788032 }, { "epoch": 67.98550724637681, "grad_norm": 0.36380359530448914, "learning_rate": 0.0001, "loss": 1.5614, "step": 788088 }, { "epoch": 67.99033816425121, "grad_norm": 2.993191719055176, "learning_rate": 0.0001, "loss": 1.5588, "step": 788144 }, { "epoch": 67.9951690821256, "grad_norm": 0.3633680045604706, "learning_rate": 0.0001, "loss": 1.5637, "step": 788200 }, { "epoch": 68.0, "grad_norm": 0.27089348435401917, "learning_rate": 0.0001, "loss": 1.5602, "step": 788256 }, { "epoch": 68.0048309178744, "grad_norm": 0.27152717113494873, "learning_rate": 0.0001, "loss": 1.5616, "step": 788312 }, { "epoch": 68.00966183574879, "grad_norm": 1.367233395576477, "learning_rate": 0.0001, "loss": 1.5547, "step": 788368 }, { "epoch": 68.01449275362319, "grad_norm": 0.9392129778862, "learning_rate": 0.0001, "loss": 1.5549, "step": 788424 }, { "epoch": 68.01932367149759, "grad_norm": 0.44873321056365967, "learning_rate": 0.0001, "loss": 1.5577, "step": 788480 }, { "epoch": 68.02415458937197, "grad_norm": 0.2638939917087555, "learning_rate": 0.0001, "loss": 1.5529, "step": 788536 }, { "epoch": 68.02898550724638, "grad_norm": 0.2568410336971283, "learning_rate": 0.0001, "loss": 1.5546, "step": 788592 }, { "epoch": 68.03381642512078, "grad_norm": 0.4023662805557251, "learning_rate": 0.0001, "loss": 1.5493, "step": 788648 }, { "epoch": 68.03864734299516, "grad_norm": 0.7378989458084106, "learning_rate": 0.0001, "loss": 1.5498, "step": 788704 }, { "epoch": 68.04347826086956, "grad_norm": 0.32525715231895447, "learning_rate": 0.0001, "loss": 1.5573, "step": 788760 }, { "epoch": 68.04830917874396, "grad_norm": 44.344844818115234, "learning_rate": 0.0001, "loss": 1.5537, "step": 788816 }, { "epoch": 68.05314009661836, "grad_norm": 0.2841314673423767, "learning_rate": 0.0001, "loss": 1.5575, "step": 788872 }, { "epoch": 68.05797101449275, "grad_norm": 0.28423011302948, "learning_rate": 0.0001, "loss": 1.5586, "step": 788928 }, { "epoch": 68.06280193236715, "grad_norm": 21.928083419799805, "learning_rate": 0.0001, "loss": 1.5539, "step": 788984 }, { "epoch": 68.06763285024155, "grad_norm": 0.2863818407058716, "learning_rate": 0.0001, "loss": 1.5491, "step": 789040 }, { "epoch": 68.07246376811594, "grad_norm": 0.6721529960632324, "learning_rate": 0.0001, "loss": 1.5544, "step": 789096 }, { "epoch": 68.07729468599034, "grad_norm": 0.2573480010032654, "learning_rate": 0.0001, "loss": 1.5547, "step": 789152 }, { "epoch": 68.08212560386474, "grad_norm": 1.3271445035934448, "learning_rate": 0.0001, "loss": 1.5508, "step": 789208 }, { "epoch": 68.08695652173913, "grad_norm": 0.270881712436676, "learning_rate": 0.0001, "loss": 1.5609, "step": 789264 }, { "epoch": 68.09178743961353, "grad_norm": 3.025317430496216, "learning_rate": 0.0001, "loss": 1.5532, "step": 789320 }, { "epoch": 68.09661835748793, "grad_norm": 0.30860060453414917, "learning_rate": 0.0001, "loss": 1.5558, "step": 789376 }, { "epoch": 68.10144927536231, "grad_norm": 0.5371059775352478, "learning_rate": 0.0001, "loss": 1.5576, "step": 789432 }, { "epoch": 68.10628019323671, "grad_norm": 0.29943960905075073, "learning_rate": 0.0001, "loss": 1.556, "step": 789488 }, { "epoch": 68.11111111111111, "grad_norm": 0.5397753715515137, "learning_rate": 0.0001, "loss": 1.5485, "step": 789544 }, { "epoch": 68.1159420289855, "grad_norm": 0.42325878143310547, "learning_rate": 0.0001, "loss": 1.5492, "step": 789600 }, { "epoch": 68.1207729468599, "grad_norm": 0.9706488847732544, "learning_rate": 0.0001, "loss": 1.5605, "step": 789656 }, { "epoch": 68.1256038647343, "grad_norm": 0.3359469771385193, "learning_rate": 0.0001, "loss": 1.557, "step": 789712 }, { "epoch": 68.1304347826087, "grad_norm": 0.2915772795677185, "learning_rate": 0.0001, "loss": 1.5612, "step": 789768 }, { "epoch": 68.13526570048309, "grad_norm": 0.2968873381614685, "learning_rate": 0.0001, "loss": 1.5578, "step": 789824 }, { "epoch": 68.14009661835749, "grad_norm": 0.2412489950656891, "learning_rate": 0.0001, "loss": 1.5575, "step": 789880 }, { "epoch": 68.14492753623189, "grad_norm": 0.28346341848373413, "learning_rate": 0.0001, "loss": 1.5548, "step": 789936 }, { "epoch": 68.14975845410628, "grad_norm": 0.24604012072086334, "learning_rate": 0.0001, "loss": 1.5483, "step": 789992 }, { "epoch": 68.15458937198068, "grad_norm": 0.24148643016815186, "learning_rate": 0.0001, "loss": 1.5537, "step": 790048 }, { "epoch": 68.15942028985508, "grad_norm": 0.4229241609573364, "learning_rate": 0.0001, "loss": 1.5583, "step": 790104 }, { "epoch": 68.16425120772946, "grad_norm": 9.834256172180176, "learning_rate": 0.0001, "loss": 1.5636, "step": 790160 }, { "epoch": 68.16908212560386, "grad_norm": 0.2960294783115387, "learning_rate": 0.0001, "loss": 1.5609, "step": 790216 }, { "epoch": 68.17391304347827, "grad_norm": 0.2905453145503998, "learning_rate": 0.0001, "loss": 1.5565, "step": 790272 }, { "epoch": 68.17874396135265, "grad_norm": 0.27898430824279785, "learning_rate": 0.0001, "loss": 1.5519, "step": 790328 }, { "epoch": 68.18357487922705, "grad_norm": 0.2417907565832138, "learning_rate": 0.0001, "loss": 1.5571, "step": 790384 }, { "epoch": 68.18840579710145, "grad_norm": 0.5107095837593079, "learning_rate": 0.0001, "loss": 1.5539, "step": 790440 }, { "epoch": 68.19323671497584, "grad_norm": 0.25954702496528625, "learning_rate": 0.0001, "loss": 1.5558, "step": 790496 }, { "epoch": 68.19806763285024, "grad_norm": 0.48099687695503235, "learning_rate": 0.0001, "loss": 1.5597, "step": 790552 }, { "epoch": 68.20289855072464, "grad_norm": 0.5027490854263306, "learning_rate": 0.0001, "loss": 1.5559, "step": 790608 }, { "epoch": 68.20772946859903, "grad_norm": 0.4367044270038605, "learning_rate": 0.0001, "loss": 1.5631, "step": 790664 }, { "epoch": 68.21256038647343, "grad_norm": 0.9419441819190979, "learning_rate": 0.0001, "loss": 1.5595, "step": 790720 }, { "epoch": 68.21739130434783, "grad_norm": 0.25286006927490234, "learning_rate": 0.0001, "loss": 1.556, "step": 790776 }, { "epoch": 68.22222222222223, "grad_norm": 0.3956034779548645, "learning_rate": 0.0001, "loss": 1.5624, "step": 790832 }, { "epoch": 68.22705314009661, "grad_norm": 0.42753249406814575, "learning_rate": 0.0001, "loss": 1.56, "step": 790888 }, { "epoch": 68.23188405797102, "grad_norm": 0.2955209016799927, "learning_rate": 0.0001, "loss": 1.5603, "step": 790944 }, { "epoch": 68.23671497584542, "grad_norm": 0.45253798365592957, "learning_rate": 0.0001, "loss": 1.5571, "step": 791000 }, { "epoch": 68.2415458937198, "grad_norm": 0.6489685773849487, "learning_rate": 0.0001, "loss": 1.5622, "step": 791056 }, { "epoch": 68.2463768115942, "grad_norm": 0.27369457483291626, "learning_rate": 0.0001, "loss": 1.5577, "step": 791112 }, { "epoch": 68.2512077294686, "grad_norm": 0.35671183466911316, "learning_rate": 0.0001, "loss": 1.5521, "step": 791168 }, { "epoch": 68.25603864734299, "grad_norm": 0.41854429244995117, "learning_rate": 0.0001, "loss": 1.5628, "step": 791224 }, { "epoch": 68.26086956521739, "grad_norm": 0.3102894723415375, "learning_rate": 0.0001, "loss": 1.5546, "step": 791280 }, { "epoch": 68.26570048309179, "grad_norm": 0.25807133316993713, "learning_rate": 0.0001, "loss": 1.5581, "step": 791336 }, { "epoch": 68.27053140096618, "grad_norm": 0.34775397181510925, "learning_rate": 0.0001, "loss": 1.5553, "step": 791392 }, { "epoch": 68.27536231884058, "grad_norm": 2.447864055633545, "learning_rate": 0.0001, "loss": 1.5598, "step": 791448 }, { "epoch": 68.28019323671498, "grad_norm": 2.0311219692230225, "learning_rate": 0.0001, "loss": 1.5579, "step": 791504 }, { "epoch": 68.28502415458937, "grad_norm": 0.3502989709377289, "learning_rate": 0.0001, "loss": 1.557, "step": 791560 }, { "epoch": 68.28985507246377, "grad_norm": 0.3684276044368744, "learning_rate": 0.0001, "loss": 1.5548, "step": 791616 }, { "epoch": 68.29468599033817, "grad_norm": 0.4266395568847656, "learning_rate": 0.0001, "loss": 1.5554, "step": 791672 }, { "epoch": 68.29951690821257, "grad_norm": 0.43275150656700134, "learning_rate": 0.0001, "loss": 1.5529, "step": 791728 }, { "epoch": 68.30434782608695, "grad_norm": 49.87490463256836, "learning_rate": 0.0001, "loss": 1.5596, "step": 791784 }, { "epoch": 68.30917874396135, "grad_norm": 0.37953439354896545, "learning_rate": 0.0001, "loss": 1.5611, "step": 791840 }, { "epoch": 68.31400966183575, "grad_norm": 4.60984992980957, "learning_rate": 0.0001, "loss": 1.5604, "step": 791896 }, { "epoch": 68.31884057971014, "grad_norm": 1.0964692831039429, "learning_rate": 0.0001, "loss": 1.5557, "step": 791952 }, { "epoch": 68.32367149758454, "grad_norm": 0.6619576215744019, "learning_rate": 0.0001, "loss": 1.5509, "step": 792008 }, { "epoch": 68.32850241545894, "grad_norm": 0.2443152666091919, "learning_rate": 0.0001, "loss": 1.5573, "step": 792064 }, { "epoch": 68.33333333333333, "grad_norm": 0.2745153307914734, "learning_rate": 0.0001, "loss": 1.5637, "step": 792120 }, { "epoch": 68.33816425120773, "grad_norm": 0.2557649314403534, "learning_rate": 0.0001, "loss": 1.5616, "step": 792176 }, { "epoch": 68.34299516908213, "grad_norm": 3.177886486053467, "learning_rate": 0.0001, "loss": 1.558, "step": 792232 }, { "epoch": 68.34782608695652, "grad_norm": 0.4558207094669342, "learning_rate": 0.0001, "loss": 1.5578, "step": 792288 }, { "epoch": 68.35265700483092, "grad_norm": 0.5797086358070374, "learning_rate": 0.0001, "loss": 1.5583, "step": 792344 }, { "epoch": 68.35748792270532, "grad_norm": 0.4355900287628174, "learning_rate": 0.0001, "loss": 1.5555, "step": 792400 }, { "epoch": 68.3623188405797, "grad_norm": 0.3543908894062042, "learning_rate": 0.0001, "loss": 1.5615, "step": 792456 }, { "epoch": 68.3671497584541, "grad_norm": 0.29733526706695557, "learning_rate": 0.0001, "loss": 1.5613, "step": 792512 }, { "epoch": 68.3719806763285, "grad_norm": 0.5967655777931213, "learning_rate": 0.0001, "loss": 1.5607, "step": 792568 }, { "epoch": 68.3768115942029, "grad_norm": 0.26772740483283997, "learning_rate": 0.0001, "loss": 1.5568, "step": 792624 }, { "epoch": 68.38164251207729, "grad_norm": 0.47712644934654236, "learning_rate": 0.0001, "loss": 1.5575, "step": 792680 }, { "epoch": 68.38647342995169, "grad_norm": 0.30798202753067017, "learning_rate": 0.0001, "loss": 1.551, "step": 792736 }, { "epoch": 68.3913043478261, "grad_norm": 0.24888701736927032, "learning_rate": 0.0001, "loss": 1.5604, "step": 792792 }, { "epoch": 68.39613526570048, "grad_norm": 1.2731107473373413, "learning_rate": 0.0001, "loss": 1.5584, "step": 792848 }, { "epoch": 68.40096618357488, "grad_norm": 0.4785417318344116, "learning_rate": 0.0001, "loss": 1.5591, "step": 792904 }, { "epoch": 68.40579710144928, "grad_norm": 0.2914532423019409, "learning_rate": 0.0001, "loss": 1.5546, "step": 792960 }, { "epoch": 68.41062801932367, "grad_norm": 0.3692094385623932, "learning_rate": 0.0001, "loss": 1.5602, "step": 793016 }, { "epoch": 68.41545893719807, "grad_norm": 0.26443275809288025, "learning_rate": 0.0001, "loss": 1.5579, "step": 793072 }, { "epoch": 68.42028985507247, "grad_norm": 0.29946693778038025, "learning_rate": 0.0001, "loss": 1.5633, "step": 793128 }, { "epoch": 68.42512077294685, "grad_norm": 1.6558536291122437, "learning_rate": 0.0001, "loss": 1.5586, "step": 793184 }, { "epoch": 68.42995169082126, "grad_norm": 0.31111419200897217, "learning_rate": 0.0001, "loss": 1.5587, "step": 793240 }, { "epoch": 68.43478260869566, "grad_norm": 0.4950283169746399, "learning_rate": 0.0001, "loss": 1.5509, "step": 793296 }, { "epoch": 68.43961352657004, "grad_norm": 0.3995665907859802, "learning_rate": 0.0001, "loss": 1.5548, "step": 793352 }, { "epoch": 68.44444444444444, "grad_norm": 19.655277252197266, "learning_rate": 0.0001, "loss": 1.5599, "step": 793408 }, { "epoch": 68.44927536231884, "grad_norm": 4.045124530792236, "learning_rate": 0.0001, "loss": 1.5585, "step": 793464 }, { "epoch": 68.45410628019323, "grad_norm": 0.22397993505001068, "learning_rate": 0.0001, "loss": 1.5568, "step": 793520 }, { "epoch": 68.45893719806763, "grad_norm": 2.174450159072876, "learning_rate": 0.0001, "loss": 1.56, "step": 793576 }, { "epoch": 68.46376811594203, "grad_norm": 0.2637239694595337, "learning_rate": 0.0001, "loss": 1.5556, "step": 793632 }, { "epoch": 68.46859903381643, "grad_norm": 0.5109177231788635, "learning_rate": 0.0001, "loss": 1.5608, "step": 793688 }, { "epoch": 68.47342995169082, "grad_norm": 0.9912487268447876, "learning_rate": 0.0001, "loss": 1.56, "step": 793744 }, { "epoch": 68.47826086956522, "grad_norm": 0.40513718128204346, "learning_rate": 0.0001, "loss": 1.5606, "step": 793800 }, { "epoch": 68.48309178743962, "grad_norm": 0.24072009325027466, "learning_rate": 0.0001, "loss": 1.5565, "step": 793856 }, { "epoch": 68.487922705314, "grad_norm": 13.641082763671875, "learning_rate": 0.0001, "loss": 1.5568, "step": 793912 }, { "epoch": 68.4927536231884, "grad_norm": 1.4652987718582153, "learning_rate": 0.0001, "loss": 1.556, "step": 793968 }, { "epoch": 68.4975845410628, "grad_norm": 0.3166112005710602, "learning_rate": 0.0001, "loss": 1.5624, "step": 794024 }, { "epoch": 68.5024154589372, "grad_norm": 0.25467196106910706, "learning_rate": 0.0001, "loss": 1.5658, "step": 794080 }, { "epoch": 68.5072463768116, "grad_norm": 2.8783962726593018, "learning_rate": 0.0001, "loss": 1.5546, "step": 794136 }, { "epoch": 68.512077294686, "grad_norm": 49.63637161254883, "learning_rate": 0.0001, "loss": 1.5589, "step": 794192 }, { "epoch": 68.51690821256038, "grad_norm": 1.6706703901290894, "learning_rate": 0.0001, "loss": 1.5593, "step": 794248 }, { "epoch": 68.52173913043478, "grad_norm": 0.3068375289440155, "learning_rate": 0.0001, "loss": 1.5582, "step": 794304 }, { "epoch": 68.52657004830918, "grad_norm": 0.28072500228881836, "learning_rate": 0.0001, "loss": 1.5616, "step": 794360 }, { "epoch": 68.53140096618357, "grad_norm": 0.3639604151248932, "learning_rate": 0.0001, "loss": 1.5582, "step": 794416 }, { "epoch": 68.53623188405797, "grad_norm": 0.32647255063056946, "learning_rate": 0.0001, "loss": 1.5564, "step": 794472 }, { "epoch": 68.54106280193237, "grad_norm": 0.4786968529224396, "learning_rate": 0.0001, "loss": 1.5606, "step": 794528 }, { "epoch": 68.54589371980677, "grad_norm": 0.5883485674858093, "learning_rate": 0.0001, "loss": 1.5501, "step": 794584 }, { "epoch": 68.55072463768116, "grad_norm": 0.7386552095413208, "learning_rate": 0.0001, "loss": 1.5582, "step": 794640 }, { "epoch": 68.55555555555556, "grad_norm": 0.3578093945980072, "learning_rate": 0.0001, "loss": 1.5573, "step": 794696 }, { "epoch": 68.56038647342996, "grad_norm": 0.2798082232475281, "learning_rate": 0.0001, "loss": 1.5516, "step": 794752 }, { "epoch": 68.56521739130434, "grad_norm": 0.28475579619407654, "learning_rate": 0.0001, "loss": 1.5602, "step": 794808 }, { "epoch": 68.57004830917874, "grad_norm": 0.9150302410125732, "learning_rate": 0.0001, "loss": 1.5648, "step": 794864 }, { "epoch": 68.57487922705315, "grad_norm": 0.5980296730995178, "learning_rate": 0.0001, "loss": 1.5609, "step": 794920 }, { "epoch": 68.57971014492753, "grad_norm": 0.30095329880714417, "learning_rate": 0.0001, "loss": 1.5599, "step": 794976 }, { "epoch": 68.58454106280193, "grad_norm": 0.39560043811798096, "learning_rate": 0.0001, "loss": 1.5646, "step": 795032 }, { "epoch": 68.58937198067633, "grad_norm": 0.6875603199005127, "learning_rate": 0.0001, "loss": 1.554, "step": 795088 }, { "epoch": 68.59420289855072, "grad_norm": 0.4028678834438324, "learning_rate": 0.0001, "loss": 1.5588, "step": 795144 }, { "epoch": 68.59903381642512, "grad_norm": 0.300575315952301, "learning_rate": 0.0001, "loss": 1.5604, "step": 795200 }, { "epoch": 68.60386473429952, "grad_norm": 0.2829127609729767, "learning_rate": 0.0001, "loss": 1.561, "step": 795256 }, { "epoch": 68.6086956521739, "grad_norm": 0.33208122849464417, "learning_rate": 0.0001, "loss": 1.557, "step": 795312 }, { "epoch": 68.61352657004831, "grad_norm": 0.32908791303634644, "learning_rate": 0.0001, "loss": 1.5563, "step": 795368 }, { "epoch": 68.61835748792271, "grad_norm": 0.3069375455379486, "learning_rate": 0.0001, "loss": 1.5577, "step": 795424 }, { "epoch": 68.6231884057971, "grad_norm": 0.8781052231788635, "learning_rate": 0.0001, "loss": 1.5617, "step": 795480 }, { "epoch": 68.6280193236715, "grad_norm": 0.3203296363353729, "learning_rate": 0.0001, "loss": 1.5667, "step": 795536 }, { "epoch": 68.6328502415459, "grad_norm": 0.24590271711349487, "learning_rate": 0.0001, "loss": 1.5592, "step": 795592 }, { "epoch": 68.6376811594203, "grad_norm": 0.29121580719947815, "learning_rate": 0.0001, "loss": 1.5599, "step": 795648 }, { "epoch": 68.64251207729468, "grad_norm": 0.5339531302452087, "learning_rate": 0.0001, "loss": 1.562, "step": 795704 }, { "epoch": 68.64734299516908, "grad_norm": 0.4929146468639374, "learning_rate": 0.0001, "loss": 1.5689, "step": 795760 }, { "epoch": 68.65217391304348, "grad_norm": 0.2924222946166992, "learning_rate": 0.0001, "loss": 1.5621, "step": 795816 }, { "epoch": 68.65700483091787, "grad_norm": 0.31004172563552856, "learning_rate": 0.0001, "loss": 1.5542, "step": 795872 }, { "epoch": 68.66183574879227, "grad_norm": 1.4079060554504395, "learning_rate": 0.0001, "loss": 1.5617, "step": 795928 }, { "epoch": 68.66666666666667, "grad_norm": 0.33831751346588135, "learning_rate": 0.0001, "loss": 1.5554, "step": 795984 }, { "epoch": 68.67149758454106, "grad_norm": 0.25011828541755676, "learning_rate": 0.0001, "loss": 1.5623, "step": 796040 }, { "epoch": 68.67632850241546, "grad_norm": 1.5211440324783325, "learning_rate": 0.0001, "loss": 1.554, "step": 796096 }, { "epoch": 68.68115942028986, "grad_norm": 0.34833988547325134, "learning_rate": 0.0001, "loss": 1.5569, "step": 796152 }, { "epoch": 68.68599033816425, "grad_norm": 0.2483135312795639, "learning_rate": 0.0001, "loss": 1.5618, "step": 796208 }, { "epoch": 68.69082125603865, "grad_norm": 1.3013445138931274, "learning_rate": 0.0001, "loss": 1.5618, "step": 796264 }, { "epoch": 68.69565217391305, "grad_norm": 0.2582094967365265, "learning_rate": 0.0001, "loss": 1.5666, "step": 796320 }, { "epoch": 68.70048309178743, "grad_norm": 1.0529043674468994, "learning_rate": 0.0001, "loss": 1.5594, "step": 796376 }, { "epoch": 68.70531400966183, "grad_norm": 0.3360380530357361, "learning_rate": 0.0001, "loss": 1.5622, "step": 796432 }, { "epoch": 68.71014492753623, "grad_norm": 0.2535970211029053, "learning_rate": 0.0001, "loss": 1.5637, "step": 796488 }, { "epoch": 68.71497584541063, "grad_norm": 0.2857963740825653, "learning_rate": 0.0001, "loss": 1.5637, "step": 796544 }, { "epoch": 68.71980676328502, "grad_norm": 0.3707467317581177, "learning_rate": 0.0001, "loss": 1.5619, "step": 796600 }, { "epoch": 68.72463768115942, "grad_norm": 0.2437407374382019, "learning_rate": 0.0001, "loss": 1.5658, "step": 796656 }, { "epoch": 68.72946859903382, "grad_norm": 0.6863292455673218, "learning_rate": 0.0001, "loss": 1.5616, "step": 796712 }, { "epoch": 68.73429951690821, "grad_norm": 0.2818121016025543, "learning_rate": 0.0001, "loss": 1.5591, "step": 796768 }, { "epoch": 68.73913043478261, "grad_norm": 0.29435741901397705, "learning_rate": 0.0001, "loss": 1.5599, "step": 796824 }, { "epoch": 68.74396135265701, "grad_norm": 1.217456579208374, "learning_rate": 0.0001, "loss": 1.5619, "step": 796880 }, { "epoch": 68.7487922705314, "grad_norm": 0.35616791248321533, "learning_rate": 0.0001, "loss": 1.5604, "step": 796936 }, { "epoch": 68.7536231884058, "grad_norm": 0.28842437267303467, "learning_rate": 0.0001, "loss": 1.5589, "step": 796992 }, { "epoch": 68.7584541062802, "grad_norm": 0.34215566515922546, "learning_rate": 0.0001, "loss": 1.5611, "step": 797048 }, { "epoch": 68.76328502415458, "grad_norm": 3.1355230808258057, "learning_rate": 0.0001, "loss": 1.5577, "step": 797104 }, { "epoch": 68.76811594202898, "grad_norm": 1.1190294027328491, "learning_rate": 0.0001, "loss": 1.5626, "step": 797160 }, { "epoch": 68.77294685990339, "grad_norm": 0.24017155170440674, "learning_rate": 0.0001, "loss": 1.5586, "step": 797216 }, { "epoch": 68.77777777777777, "grad_norm": 0.4113256335258484, "learning_rate": 0.0001, "loss": 1.5599, "step": 797272 }, { "epoch": 68.78260869565217, "grad_norm": 1.0426725149154663, "learning_rate": 0.0001, "loss": 1.5538, "step": 797328 }, { "epoch": 68.78743961352657, "grad_norm": 2.441333293914795, "learning_rate": 0.0001, "loss": 1.5606, "step": 797384 }, { "epoch": 68.79227053140096, "grad_norm": 0.3080378770828247, "learning_rate": 0.0001, "loss": 1.5662, "step": 797440 }, { "epoch": 68.79710144927536, "grad_norm": 0.25594377517700195, "learning_rate": 0.0001, "loss": 1.5577, "step": 797496 }, { "epoch": 68.80193236714976, "grad_norm": 0.5845853090286255, "learning_rate": 0.0001, "loss": 1.5617, "step": 797552 }, { "epoch": 68.80676328502416, "grad_norm": 0.3517843186855316, "learning_rate": 0.0001, "loss": 1.5527, "step": 797608 }, { "epoch": 68.81159420289855, "grad_norm": 0.22031386196613312, "learning_rate": 0.0001, "loss": 1.5612, "step": 797664 }, { "epoch": 68.81642512077295, "grad_norm": 1.5137550830841064, "learning_rate": 0.0001, "loss": 1.5612, "step": 797720 }, { "epoch": 68.82125603864735, "grad_norm": 0.27588391304016113, "learning_rate": 0.0001, "loss": 1.5572, "step": 797776 }, { "epoch": 68.82608695652173, "grad_norm": 0.31342560052871704, "learning_rate": 0.0001, "loss": 1.5593, "step": 797832 }, { "epoch": 68.83091787439614, "grad_norm": 0.41754043102264404, "learning_rate": 0.0001, "loss": 1.5585, "step": 797888 }, { "epoch": 68.83574879227054, "grad_norm": 1.0762580633163452, "learning_rate": 0.0001, "loss": 1.56, "step": 797944 }, { "epoch": 68.84057971014492, "grad_norm": 0.24768052995204926, "learning_rate": 0.0001, "loss": 1.5584, "step": 798000 }, { "epoch": 68.84541062801932, "grad_norm": 0.33129122853279114, "learning_rate": 0.0001, "loss": 1.5588, "step": 798056 }, { "epoch": 68.85024154589372, "grad_norm": 0.45529407262802124, "learning_rate": 0.0001, "loss": 1.557, "step": 798112 }, { "epoch": 68.85507246376811, "grad_norm": 0.25355902314186096, "learning_rate": 0.0001, "loss": 1.5571, "step": 798168 }, { "epoch": 68.85990338164251, "grad_norm": 0.3427985608577728, "learning_rate": 0.0001, "loss": 1.5584, "step": 798224 }, { "epoch": 68.86473429951691, "grad_norm": 1.9014004468917847, "learning_rate": 0.0001, "loss": 1.5559, "step": 798280 }, { "epoch": 68.8695652173913, "grad_norm": 0.3155386745929718, "learning_rate": 0.0001, "loss": 1.5573, "step": 798336 }, { "epoch": 68.8743961352657, "grad_norm": 0.372397243976593, "learning_rate": 0.0001, "loss": 1.565, "step": 798392 }, { "epoch": 68.8792270531401, "grad_norm": 0.3285730481147766, "learning_rate": 0.0001, "loss": 1.5568, "step": 798448 }, { "epoch": 68.8840579710145, "grad_norm": 0.36532673239707947, "learning_rate": 0.0001, "loss": 1.5603, "step": 798504 }, { "epoch": 68.88888888888889, "grad_norm": 0.2950141727924347, "learning_rate": 0.0001, "loss": 1.5519, "step": 798560 }, { "epoch": 68.89371980676329, "grad_norm": 0.3430154323577881, "learning_rate": 0.0001, "loss": 1.562, "step": 798616 }, { "epoch": 68.89855072463769, "grad_norm": 0.2646428048610687, "learning_rate": 0.0001, "loss": 1.5531, "step": 798672 }, { "epoch": 68.90338164251207, "grad_norm": 0.2745956480503082, "learning_rate": 0.0001, "loss": 1.5543, "step": 798728 }, { "epoch": 68.90821256038647, "grad_norm": 1.068819284439087, "learning_rate": 0.0001, "loss": 1.5535, "step": 798784 }, { "epoch": 68.91304347826087, "grad_norm": 0.30358782410621643, "learning_rate": 0.0001, "loss": 1.5592, "step": 798840 }, { "epoch": 68.91787439613526, "grad_norm": 8.718852043151855, "learning_rate": 0.0001, "loss": 1.5611, "step": 798896 }, { "epoch": 68.92270531400966, "grad_norm": 0.3160240650177002, "learning_rate": 0.0001, "loss": 1.5631, "step": 798952 }, { "epoch": 68.92753623188406, "grad_norm": 0.2883756756782532, "learning_rate": 0.0001, "loss": 1.5533, "step": 799008 }, { "epoch": 68.93236714975845, "grad_norm": 0.7460474967956543, "learning_rate": 0.0001, "loss": 1.5647, "step": 799064 }, { "epoch": 68.93719806763285, "grad_norm": 0.3212554454803467, "learning_rate": 0.0001, "loss": 1.5578, "step": 799120 }, { "epoch": 68.94202898550725, "grad_norm": 0.2809494137763977, "learning_rate": 0.0001, "loss": 1.5573, "step": 799176 }, { "epoch": 68.94685990338164, "grad_norm": 0.3663082718849182, "learning_rate": 0.0001, "loss": 1.5726, "step": 799232 }, { "epoch": 68.95169082125604, "grad_norm": 0.30242690443992615, "learning_rate": 0.0001, "loss": 1.5626, "step": 799288 }, { "epoch": 68.95652173913044, "grad_norm": 0.24551771581172943, "learning_rate": 0.0001, "loss": 1.5597, "step": 799344 }, { "epoch": 68.96135265700484, "grad_norm": 0.2935114800930023, "learning_rate": 0.0001, "loss": 1.5629, "step": 799400 }, { "epoch": 68.96618357487922, "grad_norm": 0.8228194713592529, "learning_rate": 0.0001, "loss": 1.5662, "step": 799456 }, { "epoch": 68.97101449275362, "grad_norm": 0.2755443751811981, "learning_rate": 0.0001, "loss": 1.5639, "step": 799512 }, { "epoch": 68.97584541062803, "grad_norm": 0.7189322710037231, "learning_rate": 0.0001, "loss": 1.5563, "step": 799568 }, { "epoch": 68.98067632850241, "grad_norm": 0.30731481313705444, "learning_rate": 0.0001, "loss": 1.5634, "step": 799624 }, { "epoch": 68.98550724637681, "grad_norm": 4.682941436767578, "learning_rate": 0.0001, "loss": 1.5629, "step": 799680 }, { "epoch": 68.99033816425121, "grad_norm": 0.2905234396457672, "learning_rate": 0.0001, "loss": 1.558, "step": 799736 }, { "epoch": 68.9951690821256, "grad_norm": 0.27792888879776, "learning_rate": 0.0001, "loss": 1.5609, "step": 799792 }, { "epoch": 69.0, "grad_norm": 0.2612062990665436, "learning_rate": 0.0001, "loss": 1.5681, "step": 799848 }, { "epoch": 69.0048309178744, "grad_norm": 0.2730284631252289, "learning_rate": 0.0001, "loss": 1.5561, "step": 799904 }, { "epoch": 69.00966183574879, "grad_norm": 0.3350675404071808, "learning_rate": 0.0001, "loss": 1.5523, "step": 799960 }, { "epoch": 69.01449275362319, "grad_norm": 0.28823158144950867, "learning_rate": 0.0001, "loss": 1.5524, "step": 800016 }, { "epoch": 69.01932367149759, "grad_norm": 0.30617231130599976, "learning_rate": 0.0001, "loss": 1.5533, "step": 800072 }, { "epoch": 69.02415458937197, "grad_norm": 0.2897290885448456, "learning_rate": 0.0001, "loss": 1.5511, "step": 800128 }, { "epoch": 69.02898550724638, "grad_norm": 0.5674495100975037, "learning_rate": 0.0001, "loss": 1.5513, "step": 800184 }, { "epoch": 69.03381642512078, "grad_norm": 0.6598460078239441, "learning_rate": 0.0001, "loss": 1.5538, "step": 800240 }, { "epoch": 69.03864734299516, "grad_norm": 0.31663739681243896, "learning_rate": 0.0001, "loss": 1.5555, "step": 800296 }, { "epoch": 69.04347826086956, "grad_norm": 0.32841211557388306, "learning_rate": 0.0001, "loss": 1.5615, "step": 800352 }, { "epoch": 69.04830917874396, "grad_norm": 0.3477230370044708, "learning_rate": 0.0001, "loss": 1.5544, "step": 800408 }, { "epoch": 69.05314009661836, "grad_norm": 0.4784669578075409, "learning_rate": 0.0001, "loss": 1.5565, "step": 800464 }, { "epoch": 69.05797101449275, "grad_norm": 5.770514965057373, "learning_rate": 0.0001, "loss": 1.5536, "step": 800520 }, { "epoch": 69.06280193236715, "grad_norm": 5.5318708419799805, "learning_rate": 0.0001, "loss": 1.5562, "step": 800576 }, { "epoch": 69.06763285024155, "grad_norm": 0.30796369910240173, "learning_rate": 0.0001, "loss": 1.5546, "step": 800632 }, { "epoch": 69.07246376811594, "grad_norm": 0.857586681842804, "learning_rate": 0.0001, "loss": 1.5546, "step": 800688 }, { "epoch": 69.07729468599034, "grad_norm": 0.2463316172361374, "learning_rate": 0.0001, "loss": 1.5575, "step": 800744 }, { "epoch": 69.08212560386474, "grad_norm": 0.4980921745300293, "learning_rate": 0.0001, "loss": 1.5541, "step": 800800 }, { "epoch": 69.08695652173913, "grad_norm": 0.7193477749824524, "learning_rate": 0.0001, "loss": 1.5534, "step": 800856 }, { "epoch": 69.09178743961353, "grad_norm": 0.29389849305152893, "learning_rate": 0.0001, "loss": 1.5622, "step": 800912 }, { "epoch": 69.09661835748793, "grad_norm": 0.358846515417099, "learning_rate": 0.0001, "loss": 1.5577, "step": 800968 }, { "epoch": 69.10144927536231, "grad_norm": 0.3268831670284271, "learning_rate": 0.0001, "loss": 1.5567, "step": 801024 }, { "epoch": 69.10628019323671, "grad_norm": 0.35183414816856384, "learning_rate": 0.0001, "loss": 1.5562, "step": 801080 }, { "epoch": 69.11111111111111, "grad_norm": 0.5219362378120422, "learning_rate": 0.0001, "loss": 1.5596, "step": 801136 }, { "epoch": 69.1159420289855, "grad_norm": 0.5097180008888245, "learning_rate": 0.0001, "loss": 1.5468, "step": 801192 }, { "epoch": 69.1207729468599, "grad_norm": 0.3383094370365143, "learning_rate": 0.0001, "loss": 1.5622, "step": 801248 }, { "epoch": 69.1256038647343, "grad_norm": 0.3377127945423126, "learning_rate": 0.0001, "loss": 1.5613, "step": 801304 }, { "epoch": 69.1304347826087, "grad_norm": 0.23020382225513458, "learning_rate": 0.0001, "loss": 1.5542, "step": 801360 }, { "epoch": 69.13526570048309, "grad_norm": 0.36575940251350403, "learning_rate": 0.0001, "loss": 1.56, "step": 801416 }, { "epoch": 69.14009661835749, "grad_norm": 0.7285922169685364, "learning_rate": 0.0001, "loss": 1.5524, "step": 801472 }, { "epoch": 69.14492753623189, "grad_norm": 6.944097518920898, "learning_rate": 0.0001, "loss": 1.5543, "step": 801528 }, { "epoch": 69.14975845410628, "grad_norm": 0.28297117352485657, "learning_rate": 0.0001, "loss": 1.5554, "step": 801584 }, { "epoch": 69.15458937198068, "grad_norm": 0.281621515750885, "learning_rate": 0.0001, "loss": 1.5514, "step": 801640 }, { "epoch": 69.15942028985508, "grad_norm": 0.298454612493515, "learning_rate": 0.0001, "loss": 1.5558, "step": 801696 }, { "epoch": 69.16425120772946, "grad_norm": 0.2826274633407593, "learning_rate": 0.0001, "loss": 1.5519, "step": 801752 }, { "epoch": 69.16908212560386, "grad_norm": 0.296562522649765, "learning_rate": 0.0001, "loss": 1.5494, "step": 801808 }, { "epoch": 69.17391304347827, "grad_norm": 1.3661155700683594, "learning_rate": 0.0001, "loss": 1.5539, "step": 801864 }, { "epoch": 69.17874396135265, "grad_norm": 1.3785653114318848, "learning_rate": 0.0001, "loss": 1.5531, "step": 801920 }, { "epoch": 69.18357487922705, "grad_norm": 0.3040103316307068, "learning_rate": 0.0001, "loss": 1.5621, "step": 801976 }, { "epoch": 69.18840579710145, "grad_norm": 0.2673148512840271, "learning_rate": 0.0001, "loss": 1.553, "step": 802032 }, { "epoch": 69.19323671497584, "grad_norm": 1.5242770910263062, "learning_rate": 0.0001, "loss": 1.5615, "step": 802088 }, { "epoch": 69.19806763285024, "grad_norm": 0.41085776686668396, "learning_rate": 0.0001, "loss": 1.5558, "step": 802144 }, { "epoch": 69.20289855072464, "grad_norm": 0.37716197967529297, "learning_rate": 0.0001, "loss": 1.5621, "step": 802200 }, { "epoch": 69.20772946859903, "grad_norm": 0.29936447739601135, "learning_rate": 0.0001, "loss": 1.5547, "step": 802256 }, { "epoch": 69.21256038647343, "grad_norm": 0.25021493434906006, "learning_rate": 0.0001, "loss": 1.5534, "step": 802312 }, { "epoch": 69.21739130434783, "grad_norm": 0.42010655999183655, "learning_rate": 0.0001, "loss": 1.5552, "step": 802368 }, { "epoch": 69.22222222222223, "grad_norm": 0.5392991304397583, "learning_rate": 0.0001, "loss": 1.5514, "step": 802424 }, { "epoch": 69.22705314009661, "grad_norm": 0.9801837801933289, "learning_rate": 0.0001, "loss": 1.5659, "step": 802480 }, { "epoch": 69.23188405797102, "grad_norm": 0.8812358379364014, "learning_rate": 0.0001, "loss": 1.561, "step": 802536 }, { "epoch": 69.23671497584542, "grad_norm": 0.2346494197845459, "learning_rate": 0.0001, "loss": 1.5587, "step": 802592 }, { "epoch": 69.2415458937198, "grad_norm": 1.0300824642181396, "learning_rate": 0.0001, "loss": 1.5545, "step": 802648 }, { "epoch": 69.2463768115942, "grad_norm": 0.25142356753349304, "learning_rate": 0.0001, "loss": 1.5586, "step": 802704 }, { "epoch": 69.2512077294686, "grad_norm": 2.4841582775115967, "learning_rate": 0.0001, "loss": 1.5494, "step": 802760 }, { "epoch": 69.25603864734299, "grad_norm": 0.4604091942310333, "learning_rate": 0.0001, "loss": 1.5598, "step": 802816 }, { "epoch": 69.26086956521739, "grad_norm": 0.3293761909008026, "learning_rate": 0.0001, "loss": 1.5576, "step": 802872 }, { "epoch": 69.26570048309179, "grad_norm": 0.3093249499797821, "learning_rate": 0.0001, "loss": 1.5611, "step": 802928 }, { "epoch": 69.27053140096618, "grad_norm": 0.2931654453277588, "learning_rate": 0.0001, "loss": 1.5632, "step": 802984 }, { "epoch": 69.27536231884058, "grad_norm": 0.34274938702583313, "learning_rate": 0.0001, "loss": 1.5528, "step": 803040 }, { "epoch": 69.28019323671498, "grad_norm": 0.33271563053131104, "learning_rate": 0.0001, "loss": 1.5595, "step": 803096 }, { "epoch": 69.28502415458937, "grad_norm": 0.2898225486278534, "learning_rate": 0.0001, "loss": 1.5593, "step": 803152 }, { "epoch": 69.28985507246377, "grad_norm": 0.3068314492702484, "learning_rate": 0.0001, "loss": 1.5601, "step": 803208 }, { "epoch": 69.29468599033817, "grad_norm": 2.5399577617645264, "learning_rate": 0.0001, "loss": 1.5652, "step": 803264 }, { "epoch": 69.29951690821257, "grad_norm": 0.36476677656173706, "learning_rate": 0.0001, "loss": 1.5577, "step": 803320 }, { "epoch": 69.30434782608695, "grad_norm": 0.5707525610923767, "learning_rate": 0.0001, "loss": 1.5584, "step": 803376 }, { "epoch": 69.30917874396135, "grad_norm": 0.5795260667800903, "learning_rate": 0.0001, "loss": 1.5584, "step": 803432 }, { "epoch": 69.31400966183575, "grad_norm": 1.1687572002410889, "learning_rate": 0.0001, "loss": 1.5553, "step": 803488 }, { "epoch": 69.31884057971014, "grad_norm": 1.4705692529678345, "learning_rate": 0.0001, "loss": 1.5562, "step": 803544 }, { "epoch": 69.32367149758454, "grad_norm": 7.746151924133301, "learning_rate": 0.0001, "loss": 1.5574, "step": 803600 }, { "epoch": 69.32850241545894, "grad_norm": 1.2478731870651245, "learning_rate": 0.0001, "loss": 1.5509, "step": 803656 }, { "epoch": 69.33333333333333, "grad_norm": 14.875003814697266, "learning_rate": 0.0001, "loss": 1.5542, "step": 803712 }, { "epoch": 69.33816425120773, "grad_norm": 0.5110794305801392, "learning_rate": 0.0001, "loss": 1.5615, "step": 803768 }, { "epoch": 69.34299516908213, "grad_norm": 9.357985496520996, "learning_rate": 0.0001, "loss": 1.5551, "step": 803824 }, { "epoch": 69.34782608695652, "grad_norm": 0.598272442817688, "learning_rate": 0.0001, "loss": 1.5583, "step": 803880 }, { "epoch": 69.35265700483092, "grad_norm": 0.432780385017395, "learning_rate": 0.0001, "loss": 1.5574, "step": 803936 }, { "epoch": 69.35748792270532, "grad_norm": 4.839487075805664, "learning_rate": 0.0001, "loss": 1.5536, "step": 803992 }, { "epoch": 69.3623188405797, "grad_norm": 0.4204079806804657, "learning_rate": 0.0001, "loss": 1.5597, "step": 804048 }, { "epoch": 69.3671497584541, "grad_norm": 0.372854620218277, "learning_rate": 0.0001, "loss": 1.5569, "step": 804104 }, { "epoch": 69.3719806763285, "grad_norm": 10.55038833618164, "learning_rate": 0.0001, "loss": 1.5628, "step": 804160 }, { "epoch": 69.3768115942029, "grad_norm": 0.6290101408958435, "learning_rate": 0.0001, "loss": 1.5626, "step": 804216 }, { "epoch": 69.38164251207729, "grad_norm": 8.949373245239258, "learning_rate": 0.0001, "loss": 1.554, "step": 804272 }, { "epoch": 69.38647342995169, "grad_norm": 35.040313720703125, "learning_rate": 0.0001, "loss": 1.5539, "step": 804328 }, { "epoch": 69.3913043478261, "grad_norm": 0.3509450852870941, "learning_rate": 0.0001, "loss": 1.5556, "step": 804384 }, { "epoch": 69.39613526570048, "grad_norm": 0.2516057789325714, "learning_rate": 0.0001, "loss": 1.5654, "step": 804440 }, { "epoch": 69.40096618357488, "grad_norm": 0.27630895376205444, "learning_rate": 0.0001, "loss": 1.5601, "step": 804496 }, { "epoch": 69.40579710144928, "grad_norm": 0.7191358208656311, "learning_rate": 0.0001, "loss": 1.5616, "step": 804552 }, { "epoch": 69.41062801932367, "grad_norm": 0.2780303359031677, "learning_rate": 0.0001, "loss": 1.5572, "step": 804608 }, { "epoch": 69.41545893719807, "grad_norm": 2.470360040664673, "learning_rate": 0.0001, "loss": 1.5551, "step": 804664 }, { "epoch": 69.42028985507247, "grad_norm": 0.38274458050727844, "learning_rate": 0.0001, "loss": 1.5605, "step": 804720 }, { "epoch": 69.42512077294685, "grad_norm": 0.29293471574783325, "learning_rate": 0.0001, "loss": 1.5614, "step": 804776 }, { "epoch": 69.42995169082126, "grad_norm": 0.38836365938186646, "learning_rate": 0.0001, "loss": 1.5527, "step": 804832 }, { "epoch": 69.43478260869566, "grad_norm": 0.26968857645988464, "learning_rate": 0.0001, "loss": 1.5602, "step": 804888 }, { "epoch": 69.43961352657004, "grad_norm": 0.25222861766815186, "learning_rate": 0.0001, "loss": 1.5584, "step": 804944 }, { "epoch": 69.44444444444444, "grad_norm": 0.30720534920692444, "learning_rate": 0.0001, "loss": 1.5565, "step": 805000 }, { "epoch": 69.44927536231884, "grad_norm": 7.032383918762207, "learning_rate": 0.0001, "loss": 1.5572, "step": 805056 }, { "epoch": 69.45410628019323, "grad_norm": 0.2732642590999603, "learning_rate": 0.0001, "loss": 1.5524, "step": 805112 }, { "epoch": 69.45893719806763, "grad_norm": 1.229728102684021, "learning_rate": 0.0001, "loss": 1.5621, "step": 805168 }, { "epoch": 69.46376811594203, "grad_norm": 1.5470482110977173, "learning_rate": 0.0001, "loss": 1.558, "step": 805224 }, { "epoch": 69.46859903381643, "grad_norm": 0.3577076494693756, "learning_rate": 0.0001, "loss": 1.5498, "step": 805280 }, { "epoch": 69.47342995169082, "grad_norm": 0.46161115169525146, "learning_rate": 0.0001, "loss": 1.5566, "step": 805336 }, { "epoch": 69.47826086956522, "grad_norm": 0.2644154131412506, "learning_rate": 0.0001, "loss": 1.5597, "step": 805392 }, { "epoch": 69.48309178743962, "grad_norm": 0.6079645156860352, "learning_rate": 0.0001, "loss": 1.5552, "step": 805448 }, { "epoch": 69.487922705314, "grad_norm": 0.21290619671344757, "learning_rate": 0.0001, "loss": 1.5599, "step": 805504 }, { "epoch": 69.4927536231884, "grad_norm": 0.48403653502464294, "learning_rate": 0.0001, "loss": 1.5566, "step": 805560 }, { "epoch": 69.4975845410628, "grad_norm": 0.2345712035894394, "learning_rate": 0.0001, "loss": 1.5538, "step": 805616 }, { "epoch": 69.5024154589372, "grad_norm": 0.30945438146591187, "learning_rate": 0.0001, "loss": 1.5561, "step": 805672 }, { "epoch": 69.5072463768116, "grad_norm": 0.25704699754714966, "learning_rate": 0.0001, "loss": 1.56, "step": 805728 }, { "epoch": 69.512077294686, "grad_norm": 0.26683419942855835, "learning_rate": 0.0001, "loss": 1.5646, "step": 805784 }, { "epoch": 69.51690821256038, "grad_norm": 0.33997687697410583, "learning_rate": 0.0001, "loss": 1.555, "step": 805840 }, { "epoch": 69.52173913043478, "grad_norm": 1.065147042274475, "learning_rate": 0.0001, "loss": 1.5543, "step": 805896 }, { "epoch": 69.52657004830918, "grad_norm": 0.24582892656326294, "learning_rate": 0.0001, "loss": 1.5563, "step": 805952 }, { "epoch": 69.53140096618357, "grad_norm": 0.2958937883377075, "learning_rate": 0.0001, "loss": 1.5577, "step": 806008 }, { "epoch": 69.53623188405797, "grad_norm": 0.26785537600517273, "learning_rate": 0.0001, "loss": 1.5588, "step": 806064 }, { "epoch": 69.54106280193237, "grad_norm": 0.3193325102329254, "learning_rate": 0.0001, "loss": 1.5552, "step": 806120 }, { "epoch": 69.54589371980677, "grad_norm": 4.0463643074035645, "learning_rate": 0.0001, "loss": 1.5542, "step": 806176 }, { "epoch": 69.55072463768116, "grad_norm": 1.1608684062957764, "learning_rate": 0.0001, "loss": 1.557, "step": 806232 }, { "epoch": 69.55555555555556, "grad_norm": 0.28952309489250183, "learning_rate": 0.0001, "loss": 1.56, "step": 806288 }, { "epoch": 69.56038647342996, "grad_norm": 0.47442883253097534, "learning_rate": 0.0001, "loss": 1.5564, "step": 806344 }, { "epoch": 69.56521739130434, "grad_norm": 0.3121059536933899, "learning_rate": 0.0001, "loss": 1.557, "step": 806400 }, { "epoch": 69.57004830917874, "grad_norm": 8.231475830078125, "learning_rate": 0.0001, "loss": 1.5586, "step": 806456 }, { "epoch": 69.57487922705315, "grad_norm": 1.6505810022354126, "learning_rate": 0.0001, "loss": 1.5521, "step": 806512 }, { "epoch": 69.57971014492753, "grad_norm": 9.31320571899414, "learning_rate": 0.0001, "loss": 1.5529, "step": 806568 }, { "epoch": 69.58454106280193, "grad_norm": 0.3511636257171631, "learning_rate": 0.0001, "loss": 1.5557, "step": 806624 }, { "epoch": 69.58937198067633, "grad_norm": 0.7172993421554565, "learning_rate": 0.0001, "loss": 1.5517, "step": 806680 }, { "epoch": 69.59420289855072, "grad_norm": 0.3458469808101654, "learning_rate": 0.0001, "loss": 1.5493, "step": 806736 }, { "epoch": 69.59903381642512, "grad_norm": 0.29406219720840454, "learning_rate": 0.0001, "loss": 1.5538, "step": 806792 }, { "epoch": 69.60386473429952, "grad_norm": 0.2586846947669983, "learning_rate": 0.0001, "loss": 1.5562, "step": 806848 }, { "epoch": 69.6086956521739, "grad_norm": 0.4136974513530731, "learning_rate": 0.0001, "loss": 1.5586, "step": 806904 }, { "epoch": 69.61352657004831, "grad_norm": 0.24435944855213165, "learning_rate": 0.0001, "loss": 1.5557, "step": 806960 }, { "epoch": 69.61835748792271, "grad_norm": 0.3186863362789154, "learning_rate": 0.0001, "loss": 1.5571, "step": 807016 }, { "epoch": 69.6231884057971, "grad_norm": 1.3270446062088013, "learning_rate": 0.0001, "loss": 1.5554, "step": 807072 }, { "epoch": 69.6280193236715, "grad_norm": 0.2667897045612335, "learning_rate": 0.0001, "loss": 1.5551, "step": 807128 }, { "epoch": 69.6328502415459, "grad_norm": 0.3495778441429138, "learning_rate": 0.0001, "loss": 1.5627, "step": 807184 }, { "epoch": 69.6376811594203, "grad_norm": 1.3649673461914062, "learning_rate": 0.0001, "loss": 1.5529, "step": 807240 }, { "epoch": 69.64251207729468, "grad_norm": 0.2968643605709076, "learning_rate": 0.0001, "loss": 1.5644, "step": 807296 }, { "epoch": 69.64734299516908, "grad_norm": 0.6178998351097107, "learning_rate": 0.0001, "loss": 1.5573, "step": 807352 }, { "epoch": 69.65217391304348, "grad_norm": 0.28605204820632935, "learning_rate": 0.0001, "loss": 1.5535, "step": 807408 }, { "epoch": 69.65700483091787, "grad_norm": 20.584022521972656, "learning_rate": 0.0001, "loss": 1.5578, "step": 807464 }, { "epoch": 69.66183574879227, "grad_norm": 0.30489596724510193, "learning_rate": 0.0001, "loss": 1.5526, "step": 807520 }, { "epoch": 69.66666666666667, "grad_norm": 14.810226440429688, "learning_rate": 0.0001, "loss": 1.5636, "step": 807576 }, { "epoch": 69.67149758454106, "grad_norm": 0.30845701694488525, "learning_rate": 0.0001, "loss": 1.5615, "step": 807632 }, { "epoch": 69.67632850241546, "grad_norm": 1.1614267826080322, "learning_rate": 0.0001, "loss": 1.565, "step": 807688 }, { "epoch": 69.68115942028986, "grad_norm": 0.3348759412765503, "learning_rate": 0.0001, "loss": 1.5555, "step": 807744 }, { "epoch": 69.68599033816425, "grad_norm": 0.324116051197052, "learning_rate": 0.0001, "loss": 1.5572, "step": 807800 }, { "epoch": 69.69082125603865, "grad_norm": 0.41812682151794434, "learning_rate": 0.0001, "loss": 1.5581, "step": 807856 }, { "epoch": 69.69565217391305, "grad_norm": 2.6054961681365967, "learning_rate": 0.0001, "loss": 1.5518, "step": 807912 }, { "epoch": 69.70048309178743, "grad_norm": 1.853570580482483, "learning_rate": 0.0001, "loss": 1.5546, "step": 807968 }, { "epoch": 69.70531400966183, "grad_norm": 0.3155389428138733, "learning_rate": 0.0001, "loss": 1.5633, "step": 808024 }, { "epoch": 69.71014492753623, "grad_norm": 0.3152922987937927, "learning_rate": 0.0001, "loss": 1.5595, "step": 808080 }, { "epoch": 69.71497584541063, "grad_norm": 0.7278767824172974, "learning_rate": 0.0001, "loss": 1.5603, "step": 808136 }, { "epoch": 69.71980676328502, "grad_norm": 0.33098745346069336, "learning_rate": 0.0001, "loss": 1.5625, "step": 808192 }, { "epoch": 69.72463768115942, "grad_norm": 0.25863927602767944, "learning_rate": 0.0001, "loss": 1.5617, "step": 808248 }, { "epoch": 69.72946859903382, "grad_norm": 0.3745333254337311, "learning_rate": 0.0001, "loss": 1.5612, "step": 808304 }, { "epoch": 69.73429951690821, "grad_norm": 0.6905719637870789, "learning_rate": 0.0001, "loss": 1.553, "step": 808360 }, { "epoch": 69.73913043478261, "grad_norm": 1.9485361576080322, "learning_rate": 0.0001, "loss": 1.5571, "step": 808416 }, { "epoch": 69.74396135265701, "grad_norm": 0.29890015721321106, "learning_rate": 0.0001, "loss": 1.5568, "step": 808472 }, { "epoch": 69.7487922705314, "grad_norm": 0.41853368282318115, "learning_rate": 0.0001, "loss": 1.5504, "step": 808528 }, { "epoch": 69.7536231884058, "grad_norm": 0.3627220392227173, "learning_rate": 0.0001, "loss": 1.5655, "step": 808584 }, { "epoch": 69.7584541062802, "grad_norm": 0.4229997992515564, "learning_rate": 0.0001, "loss": 1.5663, "step": 808640 }, { "epoch": 69.76328502415458, "grad_norm": 0.33495035767555237, "learning_rate": 0.0001, "loss": 1.5544, "step": 808696 }, { "epoch": 69.76811594202898, "grad_norm": 0.6387436389923096, "learning_rate": 0.0001, "loss": 1.5574, "step": 808752 }, { "epoch": 69.77294685990339, "grad_norm": 0.45740365982055664, "learning_rate": 0.0001, "loss": 1.5589, "step": 808808 }, { "epoch": 69.77777777777777, "grad_norm": 0.3347523808479309, "learning_rate": 0.0001, "loss": 1.5551, "step": 808864 }, { "epoch": 69.78260869565217, "grad_norm": 0.31392666697502136, "learning_rate": 0.0001, "loss": 1.5628, "step": 808920 }, { "epoch": 69.78743961352657, "grad_norm": 0.3518419563770294, "learning_rate": 0.0001, "loss": 1.5518, "step": 808976 }, { "epoch": 69.79227053140096, "grad_norm": 0.30357593297958374, "learning_rate": 0.0001, "loss": 1.5611, "step": 809032 }, { "epoch": 69.79710144927536, "grad_norm": 0.3116656541824341, "learning_rate": 0.0001, "loss": 1.5632, "step": 809088 }, { "epoch": 69.80193236714976, "grad_norm": 0.3977534770965576, "learning_rate": 0.0001, "loss": 1.5628, "step": 809144 }, { "epoch": 69.80676328502416, "grad_norm": 0.5039286017417908, "learning_rate": 0.0001, "loss": 1.5568, "step": 809200 }, { "epoch": 69.81159420289855, "grad_norm": 0.44152650237083435, "learning_rate": 0.0001, "loss": 1.556, "step": 809256 }, { "epoch": 69.81642512077295, "grad_norm": 0.31236448884010315, "learning_rate": 0.0001, "loss": 1.5591, "step": 809312 }, { "epoch": 69.82125603864735, "grad_norm": 0.2879325747489929, "learning_rate": 0.0001, "loss": 1.5592, "step": 809368 }, { "epoch": 69.82608695652173, "grad_norm": 0.27765747904777527, "learning_rate": 0.0001, "loss": 1.5688, "step": 809424 }, { "epoch": 69.83091787439614, "grad_norm": 0.33685633540153503, "learning_rate": 0.0001, "loss": 1.5542, "step": 809480 }, { "epoch": 69.83574879227054, "grad_norm": 0.29439058899879456, "learning_rate": 0.0001, "loss": 1.5595, "step": 809536 }, { "epoch": 69.84057971014492, "grad_norm": 0.45822104811668396, "learning_rate": 0.0001, "loss": 1.5563, "step": 809592 }, { "epoch": 69.84541062801932, "grad_norm": 0.3133896589279175, "learning_rate": 0.0001, "loss": 1.5596, "step": 809648 }, { "epoch": 69.85024154589372, "grad_norm": 0.5898823142051697, "learning_rate": 0.0001, "loss": 1.5607, "step": 809704 }, { "epoch": 69.85507246376811, "grad_norm": 6.3573689460754395, "learning_rate": 0.0001, "loss": 1.5512, "step": 809760 }, { "epoch": 69.85990338164251, "grad_norm": 0.39622727036476135, "learning_rate": 0.0001, "loss": 1.5526, "step": 809816 }, { "epoch": 69.86473429951691, "grad_norm": 0.29059702157974243, "learning_rate": 0.0001, "loss": 1.5611, "step": 809872 }, { "epoch": 69.8695652173913, "grad_norm": 1.1604644060134888, "learning_rate": 0.0001, "loss": 1.5601, "step": 809928 }, { "epoch": 69.8743961352657, "grad_norm": 0.24732649326324463, "learning_rate": 0.0001, "loss": 1.5541, "step": 809984 }, { "epoch": 69.8792270531401, "grad_norm": 0.2658132016658783, "learning_rate": 0.0001, "loss": 1.5577, "step": 810040 }, { "epoch": 69.8840579710145, "grad_norm": 0.3943793773651123, "learning_rate": 0.0001, "loss": 1.5588, "step": 810096 }, { "epoch": 69.88888888888889, "grad_norm": 2.455183744430542, "learning_rate": 0.0001, "loss": 1.553, "step": 810152 }, { "epoch": 69.89371980676329, "grad_norm": 0.5563138723373413, "learning_rate": 0.0001, "loss": 1.5556, "step": 810208 }, { "epoch": 69.89855072463769, "grad_norm": 0.3932478725910187, "learning_rate": 0.0001, "loss": 1.5565, "step": 810264 }, { "epoch": 69.90338164251207, "grad_norm": 1.2407420873641968, "learning_rate": 0.0001, "loss": 1.5569, "step": 810320 }, { "epoch": 69.90821256038647, "grad_norm": 0.4498031735420227, "learning_rate": 0.0001, "loss": 1.556, "step": 810376 }, { "epoch": 69.91304347826087, "grad_norm": 0.6108095645904541, "learning_rate": 0.0001, "loss": 1.5563, "step": 810432 }, { "epoch": 69.91787439613526, "grad_norm": 0.23706670105457306, "learning_rate": 0.0001, "loss": 1.5641, "step": 810488 }, { "epoch": 69.92270531400966, "grad_norm": 0.25769636034965515, "learning_rate": 0.0001, "loss": 1.5582, "step": 810544 }, { "epoch": 69.92753623188406, "grad_norm": 0.8166965246200562, "learning_rate": 0.0001, "loss": 1.5558, "step": 810600 }, { "epoch": 69.93236714975845, "grad_norm": 2.317169666290283, "learning_rate": 0.0001, "loss": 1.5488, "step": 810656 }, { "epoch": 69.93719806763285, "grad_norm": 0.39092016220092773, "learning_rate": 0.0001, "loss": 1.5658, "step": 810712 }, { "epoch": 69.94202898550725, "grad_norm": 0.2785685360431671, "learning_rate": 0.0001, "loss": 1.5626, "step": 810768 }, { "epoch": 69.94685990338164, "grad_norm": 0.3826298117637634, "learning_rate": 0.0001, "loss": 1.5594, "step": 810824 }, { "epoch": 69.95169082125604, "grad_norm": 0.30225107073783875, "learning_rate": 0.0001, "loss": 1.5521, "step": 810880 }, { "epoch": 69.95652173913044, "grad_norm": 2.309183120727539, "learning_rate": 0.0001, "loss": 1.561, "step": 810936 }, { "epoch": 69.96135265700484, "grad_norm": 1.1254518032073975, "learning_rate": 0.0001, "loss": 1.556, "step": 810992 }, { "epoch": 69.96618357487922, "grad_norm": 0.3435673713684082, "learning_rate": 0.0001, "loss": 1.5538, "step": 811048 }, { "epoch": 69.97101449275362, "grad_norm": 0.3303617238998413, "learning_rate": 0.0001, "loss": 1.5562, "step": 811104 }, { "epoch": 69.97584541062803, "grad_norm": 0.31808018684387207, "learning_rate": 0.0001, "loss": 1.5661, "step": 811160 }, { "epoch": 69.98067632850241, "grad_norm": 0.4949994385242462, "learning_rate": 0.0001, "loss": 1.5516, "step": 811216 }, { "epoch": 69.98550724637681, "grad_norm": 0.5249282121658325, "learning_rate": 0.0001, "loss": 1.5665, "step": 811272 }, { "epoch": 69.99033816425121, "grad_norm": 0.8560518622398376, "learning_rate": 0.0001, "loss": 1.5592, "step": 811328 }, { "epoch": 69.9951690821256, "grad_norm": 11.331794738769531, "learning_rate": 0.0001, "loss": 1.5595, "step": 811384 }, { "epoch": 70.0, "grad_norm": 2.78851318359375, "learning_rate": 0.0001, "loss": 1.5583, "step": 811440 }, { "epoch": 70.0048309178744, "grad_norm": 2.0961437225341797, "learning_rate": 0.0001, "loss": 1.5529, "step": 811496 }, { "epoch": 70.00966183574879, "grad_norm": 0.3936871588230133, "learning_rate": 0.0001, "loss": 1.5565, "step": 811552 }, { "epoch": 70.01449275362319, "grad_norm": 0.47907960414886475, "learning_rate": 0.0001, "loss": 1.5546, "step": 811608 }, { "epoch": 70.01932367149759, "grad_norm": 0.2684059143066406, "learning_rate": 0.0001, "loss": 1.5579, "step": 811664 }, { "epoch": 70.02415458937197, "grad_norm": 0.3313027322292328, "learning_rate": 0.0001, "loss": 1.5538, "step": 811720 }, { "epoch": 70.02898550724638, "grad_norm": 0.7679251432418823, "learning_rate": 0.0001, "loss": 1.5482, "step": 811776 }, { "epoch": 70.03381642512078, "grad_norm": 0.3015922009944916, "learning_rate": 0.0001, "loss": 1.5555, "step": 811832 }, { "epoch": 70.03864734299516, "grad_norm": 0.4584186375141144, "learning_rate": 0.0001, "loss": 1.5516, "step": 811888 }, { "epoch": 70.04347826086956, "grad_norm": 0.7768761515617371, "learning_rate": 0.0001, "loss": 1.5492, "step": 811944 }, { "epoch": 70.04830917874396, "grad_norm": 0.6243045330047607, "learning_rate": 0.0001, "loss": 1.5468, "step": 812000 }, { "epoch": 70.05314009661836, "grad_norm": 0.27819857001304626, "learning_rate": 0.0001, "loss": 1.5481, "step": 812056 }, { "epoch": 70.05797101449275, "grad_norm": 0.2896891236305237, "learning_rate": 0.0001, "loss": 1.5521, "step": 812112 }, { "epoch": 70.06280193236715, "grad_norm": 0.24421674013137817, "learning_rate": 0.0001, "loss": 1.55, "step": 812168 }, { "epoch": 70.06763285024155, "grad_norm": 0.272579550743103, "learning_rate": 0.0001, "loss": 1.5545, "step": 812224 }, { "epoch": 70.07246376811594, "grad_norm": 0.8198105692863464, "learning_rate": 0.0001, "loss": 1.5554, "step": 812280 }, { "epoch": 70.07729468599034, "grad_norm": 0.396050363779068, "learning_rate": 0.0001, "loss": 1.5552, "step": 812336 }, { "epoch": 70.08212560386474, "grad_norm": 0.34419673681259155, "learning_rate": 0.0001, "loss": 1.5565, "step": 812392 }, { "epoch": 70.08695652173913, "grad_norm": 0.2777276337146759, "learning_rate": 0.0001, "loss": 1.5486, "step": 812448 }, { "epoch": 70.09178743961353, "grad_norm": 0.3400641083717346, "learning_rate": 0.0001, "loss": 1.5487, "step": 812504 }, { "epoch": 70.09661835748793, "grad_norm": 0.2717764675617218, "learning_rate": 0.0001, "loss": 1.553, "step": 812560 }, { "epoch": 70.10144927536231, "grad_norm": 0.24095909297466278, "learning_rate": 0.0001, "loss": 1.5535, "step": 812616 }, { "epoch": 70.10628019323671, "grad_norm": 0.2888641655445099, "learning_rate": 0.0001, "loss": 1.5567, "step": 812672 }, { "epoch": 70.11111111111111, "grad_norm": 0.2651875913143158, "learning_rate": 0.0001, "loss": 1.5484, "step": 812728 }, { "epoch": 70.1159420289855, "grad_norm": 0.3372345566749573, "learning_rate": 0.0001, "loss": 1.5534, "step": 812784 }, { "epoch": 70.1207729468599, "grad_norm": 0.27934765815734863, "learning_rate": 0.0001, "loss": 1.5483, "step": 812840 }, { "epoch": 70.1256038647343, "grad_norm": 0.5796911716461182, "learning_rate": 0.0001, "loss": 1.5512, "step": 812896 }, { "epoch": 70.1304347826087, "grad_norm": 1.803030014038086, "learning_rate": 0.0001, "loss": 1.5552, "step": 812952 }, { "epoch": 70.13526570048309, "grad_norm": 0.2336534857749939, "learning_rate": 0.0001, "loss": 1.5546, "step": 813008 }, { "epoch": 70.14009661835749, "grad_norm": 0.3946218192577362, "learning_rate": 0.0001, "loss": 1.5504, "step": 813064 }, { "epoch": 70.14492753623189, "grad_norm": 0.250273734331131, "learning_rate": 0.0001, "loss": 1.5485, "step": 813120 }, { "epoch": 70.14975845410628, "grad_norm": 0.2668542265892029, "learning_rate": 0.0001, "loss": 1.5545, "step": 813176 }, { "epoch": 70.15458937198068, "grad_norm": 0.31593361496925354, "learning_rate": 0.0001, "loss": 1.5491, "step": 813232 }, { "epoch": 70.15942028985508, "grad_norm": 0.32707926630973816, "learning_rate": 0.0001, "loss": 1.5515, "step": 813288 }, { "epoch": 70.16425120772946, "grad_norm": 0.94158935546875, "learning_rate": 0.0001, "loss": 1.5474, "step": 813344 }, { "epoch": 70.16908212560386, "grad_norm": 0.2837890088558197, "learning_rate": 0.0001, "loss": 1.5564, "step": 813400 }, { "epoch": 70.17391304347827, "grad_norm": 0.25728437304496765, "learning_rate": 0.0001, "loss": 1.5522, "step": 813456 }, { "epoch": 70.17874396135265, "grad_norm": 0.3171633183956146, "learning_rate": 0.0001, "loss": 1.5487, "step": 813512 }, { "epoch": 70.18357487922705, "grad_norm": 0.2875058054924011, "learning_rate": 0.0001, "loss": 1.5448, "step": 813568 }, { "epoch": 70.18840579710145, "grad_norm": 0.22457918524742126, "learning_rate": 0.0001, "loss": 1.5546, "step": 813624 }, { "epoch": 70.19323671497584, "grad_norm": 0.23383252322673798, "learning_rate": 0.0001, "loss": 1.5492, "step": 813680 }, { "epoch": 70.19806763285024, "grad_norm": 0.2755691111087799, "learning_rate": 0.0001, "loss": 1.5531, "step": 813736 }, { "epoch": 70.20289855072464, "grad_norm": 0.3390747010707855, "learning_rate": 0.0001, "loss": 1.5538, "step": 813792 }, { "epoch": 70.20772946859903, "grad_norm": 1.0072535276412964, "learning_rate": 0.0001, "loss": 1.5613, "step": 813848 }, { "epoch": 70.21256038647343, "grad_norm": 0.2622012794017792, "learning_rate": 0.0001, "loss": 1.5541, "step": 813904 }, { "epoch": 70.21739130434783, "grad_norm": 0.411728173494339, "learning_rate": 0.0001, "loss": 1.5501, "step": 813960 }, { "epoch": 70.22222222222223, "grad_norm": 0.26576393842697144, "learning_rate": 0.0001, "loss": 1.5506, "step": 814016 }, { "epoch": 70.22705314009661, "grad_norm": 0.4428793787956238, "learning_rate": 0.0001, "loss": 1.5556, "step": 814072 }, { "epoch": 70.23188405797102, "grad_norm": 0.30938956141471863, "learning_rate": 0.0001, "loss": 1.5528, "step": 814128 }, { "epoch": 70.23671497584542, "grad_norm": 0.25954732298851013, "learning_rate": 0.0001, "loss": 1.5521, "step": 814184 }, { "epoch": 70.2415458937198, "grad_norm": 0.42917218804359436, "learning_rate": 0.0001, "loss": 1.5543, "step": 814240 }, { "epoch": 70.2463768115942, "grad_norm": 0.31577450037002563, "learning_rate": 0.0001, "loss": 1.547, "step": 814296 }, { "epoch": 70.2512077294686, "grad_norm": 1.4533360004425049, "learning_rate": 0.0001, "loss": 1.5489, "step": 814352 }, { "epoch": 70.25603864734299, "grad_norm": 0.30785441398620605, "learning_rate": 0.0001, "loss": 1.5552, "step": 814408 }, { "epoch": 70.26086956521739, "grad_norm": 0.24599690735340118, "learning_rate": 0.0001, "loss": 1.5513, "step": 814464 }, { "epoch": 70.26570048309179, "grad_norm": 1.0146836042404175, "learning_rate": 0.0001, "loss": 1.5541, "step": 814520 }, { "epoch": 70.27053140096618, "grad_norm": 0.26256483793258667, "learning_rate": 0.0001, "loss": 1.5509, "step": 814576 }, { "epoch": 70.27536231884058, "grad_norm": 0.8857872486114502, "learning_rate": 0.0001, "loss": 1.5514, "step": 814632 }, { "epoch": 70.28019323671498, "grad_norm": 0.24917148053646088, "learning_rate": 0.0001, "loss": 1.5564, "step": 814688 }, { "epoch": 70.28502415458937, "grad_norm": 0.47303855419158936, "learning_rate": 0.0001, "loss": 1.5481, "step": 814744 }, { "epoch": 70.28985507246377, "grad_norm": 0.24015569686889648, "learning_rate": 0.0001, "loss": 1.5558, "step": 814800 }, { "epoch": 70.29468599033817, "grad_norm": 0.3669865131378174, "learning_rate": 0.0001, "loss": 1.5544, "step": 814856 }, { "epoch": 70.29951690821257, "grad_norm": 0.4408866763114929, "learning_rate": 0.0001, "loss": 1.5518, "step": 814912 }, { "epoch": 70.30434782608695, "grad_norm": 1.5703964233398438, "learning_rate": 0.0001, "loss": 1.5556, "step": 814968 }, { "epoch": 70.30917874396135, "grad_norm": 0.36596181988716125, "learning_rate": 0.0001, "loss": 1.5535, "step": 815024 }, { "epoch": 70.31400966183575, "grad_norm": 0.6596453785896301, "learning_rate": 0.0001, "loss": 1.5566, "step": 815080 }, { "epoch": 70.31884057971014, "grad_norm": 0.43241119384765625, "learning_rate": 0.0001, "loss": 1.5547, "step": 815136 }, { "epoch": 70.32367149758454, "grad_norm": 0.26685500144958496, "learning_rate": 0.0001, "loss": 1.555, "step": 815192 }, { "epoch": 70.32850241545894, "grad_norm": 0.2845839560031891, "learning_rate": 0.0001, "loss": 1.5546, "step": 815248 }, { "epoch": 70.33333333333333, "grad_norm": 1.3903741836547852, "learning_rate": 0.0001, "loss": 1.5611, "step": 815304 }, { "epoch": 70.33816425120773, "grad_norm": 0.28660935163497925, "learning_rate": 0.0001, "loss": 1.5503, "step": 815360 }, { "epoch": 70.34299516908213, "grad_norm": 0.25324565172195435, "learning_rate": 0.0001, "loss": 1.5585, "step": 815416 }, { "epoch": 70.34782608695652, "grad_norm": 0.26190632581710815, "learning_rate": 0.0001, "loss": 1.553, "step": 815472 }, { "epoch": 70.35265700483092, "grad_norm": 0.31742483377456665, "learning_rate": 0.0001, "loss": 1.5556, "step": 815528 }, { "epoch": 70.35748792270532, "grad_norm": 0.3930263817310333, "learning_rate": 0.0001, "loss": 1.5534, "step": 815584 }, { "epoch": 70.3623188405797, "grad_norm": 1.0839576721191406, "learning_rate": 0.0001, "loss": 1.5599, "step": 815640 }, { "epoch": 70.3671497584541, "grad_norm": 0.5922160744667053, "learning_rate": 0.0001, "loss": 1.555, "step": 815696 }, { "epoch": 70.3719806763285, "grad_norm": 0.3333873450756073, "learning_rate": 0.0001, "loss": 1.5507, "step": 815752 }, { "epoch": 70.3768115942029, "grad_norm": 0.9195272326469421, "learning_rate": 0.0001, "loss": 1.5524, "step": 815808 }, { "epoch": 70.38164251207729, "grad_norm": 1.033447027206421, "learning_rate": 0.0001, "loss": 1.5554, "step": 815864 }, { "epoch": 70.38647342995169, "grad_norm": 0.3684085011482239, "learning_rate": 0.0001, "loss": 1.5529, "step": 815920 }, { "epoch": 70.3913043478261, "grad_norm": 0.27772775292396545, "learning_rate": 0.0001, "loss": 1.559, "step": 815976 }, { "epoch": 70.39613526570048, "grad_norm": 0.40551894903182983, "learning_rate": 0.0001, "loss": 1.5567, "step": 816032 }, { "epoch": 70.40096618357488, "grad_norm": 0.23984041810035706, "learning_rate": 0.0001, "loss": 1.5579, "step": 816088 }, { "epoch": 70.40579710144928, "grad_norm": 0.34981808066368103, "learning_rate": 0.0001, "loss": 1.5581, "step": 816144 }, { "epoch": 70.41062801932367, "grad_norm": 0.2661125957965851, "learning_rate": 0.0001, "loss": 1.5566, "step": 816200 }, { "epoch": 70.41545893719807, "grad_norm": 0.38045382499694824, "learning_rate": 0.0001, "loss": 1.5628, "step": 816256 }, { "epoch": 70.42028985507247, "grad_norm": 5.247325420379639, "learning_rate": 0.0001, "loss": 1.5587, "step": 816312 }, { "epoch": 70.42512077294685, "grad_norm": 0.24089756608009338, "learning_rate": 0.0001, "loss": 1.5624, "step": 816368 }, { "epoch": 70.42995169082126, "grad_norm": 0.2997238337993622, "learning_rate": 0.0001, "loss": 1.5573, "step": 816424 }, { "epoch": 70.43478260869566, "grad_norm": 0.2756384015083313, "learning_rate": 0.0001, "loss": 1.5585, "step": 816480 }, { "epoch": 70.43961352657004, "grad_norm": 0.2620161771774292, "learning_rate": 0.0001, "loss": 1.5588, "step": 816536 }, { "epoch": 70.44444444444444, "grad_norm": 0.2821025848388672, "learning_rate": 0.0001, "loss": 1.5543, "step": 816592 }, { "epoch": 70.44927536231884, "grad_norm": 0.29247841238975525, "learning_rate": 0.0001, "loss": 1.5502, "step": 816648 }, { "epoch": 70.45410628019323, "grad_norm": 0.3372337818145752, "learning_rate": 0.0001, "loss": 1.555, "step": 816704 }, { "epoch": 70.45893719806763, "grad_norm": 0.35794270038604736, "learning_rate": 0.0001, "loss": 1.5493, "step": 816760 }, { "epoch": 70.46376811594203, "grad_norm": 0.2637154757976532, "learning_rate": 0.0001, "loss": 1.5462, "step": 816816 }, { "epoch": 70.46859903381643, "grad_norm": 0.35766705870628357, "learning_rate": 0.0001, "loss": 1.5557, "step": 816872 }, { "epoch": 70.47342995169082, "grad_norm": 0.43640220165252686, "learning_rate": 0.0001, "loss": 1.5503, "step": 816928 }, { "epoch": 70.47826086956522, "grad_norm": 0.397434800863266, "learning_rate": 0.0001, "loss": 1.5469, "step": 816984 }, { "epoch": 70.48309178743962, "grad_norm": 0.3797900378704071, "learning_rate": 0.0001, "loss": 1.5562, "step": 817040 }, { "epoch": 70.487922705314, "grad_norm": 0.7130241394042969, "learning_rate": 0.0001, "loss": 1.5559, "step": 817096 }, { "epoch": 70.4927536231884, "grad_norm": 0.30047607421875, "learning_rate": 0.0001, "loss": 1.5527, "step": 817152 }, { "epoch": 70.4975845410628, "grad_norm": 3.8636295795440674, "learning_rate": 0.0001, "loss": 1.5526, "step": 817208 }, { "epoch": 70.5024154589372, "grad_norm": 1.2976999282836914, "learning_rate": 0.0001, "loss": 1.559, "step": 817264 }, { "epoch": 70.5072463768116, "grad_norm": 0.2900172472000122, "learning_rate": 0.0001, "loss": 1.5551, "step": 817320 }, { "epoch": 70.512077294686, "grad_norm": 0.40500402450561523, "learning_rate": 0.0001, "loss": 1.5572, "step": 817376 }, { "epoch": 70.51690821256038, "grad_norm": 0.25883105397224426, "learning_rate": 0.0001, "loss": 1.5576, "step": 817432 }, { "epoch": 70.52173913043478, "grad_norm": 0.33761245012283325, "learning_rate": 0.0001, "loss": 1.5538, "step": 817488 }, { "epoch": 70.52657004830918, "grad_norm": 0.24697698652744293, "learning_rate": 0.0001, "loss": 1.5549, "step": 817544 }, { "epoch": 70.53140096618357, "grad_norm": 0.7540317177772522, "learning_rate": 0.0001, "loss": 1.5561, "step": 817600 }, { "epoch": 70.53623188405797, "grad_norm": 0.8883280754089355, "learning_rate": 0.0001, "loss": 1.5537, "step": 817656 }, { "epoch": 70.54106280193237, "grad_norm": 0.5103464126586914, "learning_rate": 0.0001, "loss": 1.5534, "step": 817712 }, { "epoch": 70.54589371980677, "grad_norm": 0.49295923113822937, "learning_rate": 0.0001, "loss": 1.5516, "step": 817768 }, { "epoch": 70.55072463768116, "grad_norm": 0.24342314898967743, "learning_rate": 0.0001, "loss": 1.5536, "step": 817824 }, { "epoch": 70.55555555555556, "grad_norm": 0.22756119072437286, "learning_rate": 0.0001, "loss": 1.5583, "step": 817880 }, { "epoch": 70.56038647342996, "grad_norm": 0.3810202479362488, "learning_rate": 0.0001, "loss": 1.5504, "step": 817936 }, { "epoch": 70.56521739130434, "grad_norm": 0.2899780571460724, "learning_rate": 0.0001, "loss": 1.5477, "step": 817992 }, { "epoch": 70.57004830917874, "grad_norm": 0.27928560972213745, "learning_rate": 0.0001, "loss": 1.5594, "step": 818048 }, { "epoch": 70.57487922705315, "grad_norm": 22.454254150390625, "learning_rate": 0.0001, "loss": 1.5496, "step": 818104 }, { "epoch": 70.57971014492753, "grad_norm": 0.3158833086490631, "learning_rate": 0.0001, "loss": 1.5528, "step": 818160 }, { "epoch": 70.58454106280193, "grad_norm": 1.3450095653533936, "learning_rate": 0.0001, "loss": 1.5569, "step": 818216 }, { "epoch": 70.58937198067633, "grad_norm": 0.3581647276878357, "learning_rate": 0.0001, "loss": 1.5543, "step": 818272 }, { "epoch": 70.59420289855072, "grad_norm": 0.24625170230865479, "learning_rate": 0.0001, "loss": 1.553, "step": 818328 }, { "epoch": 70.59903381642512, "grad_norm": 0.22748620808124542, "learning_rate": 0.0001, "loss": 1.5569, "step": 818384 }, { "epoch": 70.60386473429952, "grad_norm": 0.562282145023346, "learning_rate": 0.0001, "loss": 1.5585, "step": 818440 }, { "epoch": 70.6086956521739, "grad_norm": 0.27562472224235535, "learning_rate": 0.0001, "loss": 1.551, "step": 818496 }, { "epoch": 70.61352657004831, "grad_norm": 0.9998619556427002, "learning_rate": 0.0001, "loss": 1.5588, "step": 818552 }, { "epoch": 70.61835748792271, "grad_norm": 3.722966194152832, "learning_rate": 0.0001, "loss": 1.5483, "step": 818608 }, { "epoch": 70.6231884057971, "grad_norm": 1.3413459062576294, "learning_rate": 0.0001, "loss": 1.556, "step": 818664 }, { "epoch": 70.6280193236715, "grad_norm": 0.3151610195636749, "learning_rate": 0.0001, "loss": 1.5571, "step": 818720 }, { "epoch": 70.6328502415459, "grad_norm": 0.3900704085826874, "learning_rate": 0.0001, "loss": 1.5552, "step": 818776 }, { "epoch": 70.6376811594203, "grad_norm": 0.37634867429733276, "learning_rate": 0.0001, "loss": 1.5619, "step": 818832 }, { "epoch": 70.64251207729468, "grad_norm": 0.27196839451789856, "learning_rate": 0.0001, "loss": 1.5562, "step": 818888 }, { "epoch": 70.64734299516908, "grad_norm": 40.23036575317383, "learning_rate": 0.0001, "loss": 1.5614, "step": 818944 }, { "epoch": 70.65217391304348, "grad_norm": 0.5630510449409485, "learning_rate": 0.0001, "loss": 1.563, "step": 819000 }, { "epoch": 70.65700483091787, "grad_norm": 0.6599827408790588, "learning_rate": 0.0001, "loss": 1.5578, "step": 819056 }, { "epoch": 70.66183574879227, "grad_norm": 0.298686683177948, "learning_rate": 0.0001, "loss": 1.5603, "step": 819112 }, { "epoch": 70.66666666666667, "grad_norm": 0.34765875339508057, "learning_rate": 0.0001, "loss": 1.5615, "step": 819168 }, { "epoch": 70.67149758454106, "grad_norm": 0.28737226128578186, "learning_rate": 0.0001, "loss": 1.5549, "step": 819224 }, { "epoch": 70.67632850241546, "grad_norm": 0.30529946088790894, "learning_rate": 0.0001, "loss": 1.5468, "step": 819280 }, { "epoch": 70.68115942028986, "grad_norm": 0.3746245205402374, "learning_rate": 0.0001, "loss": 1.5544, "step": 819336 }, { "epoch": 70.68599033816425, "grad_norm": 0.2997240126132965, "learning_rate": 0.0001, "loss": 1.5535, "step": 819392 }, { "epoch": 70.69082125603865, "grad_norm": 0.3456723988056183, "learning_rate": 0.0001, "loss": 1.5618, "step": 819448 }, { "epoch": 70.69565217391305, "grad_norm": 0.26553818583488464, "learning_rate": 0.0001, "loss": 1.5521, "step": 819504 }, { "epoch": 70.70048309178743, "grad_norm": 0.2731158435344696, "learning_rate": 0.0001, "loss": 1.56, "step": 819560 }, { "epoch": 70.70531400966183, "grad_norm": 0.2725643813610077, "learning_rate": 0.0001, "loss": 1.5587, "step": 819616 }, { "epoch": 70.71014492753623, "grad_norm": 0.2979659140110016, "learning_rate": 0.0001, "loss": 1.5536, "step": 819672 }, { "epoch": 70.71497584541063, "grad_norm": 0.23610128462314606, "learning_rate": 0.0001, "loss": 1.5544, "step": 819728 }, { "epoch": 70.71980676328502, "grad_norm": 0.2894839942455292, "learning_rate": 0.0001, "loss": 1.5632, "step": 819784 }, { "epoch": 70.72463768115942, "grad_norm": 0.626234233379364, "learning_rate": 0.0001, "loss": 1.5527, "step": 819840 }, { "epoch": 70.72946859903382, "grad_norm": 0.25930848717689514, "learning_rate": 0.0001, "loss": 1.5642, "step": 819896 }, { "epoch": 70.73429951690821, "grad_norm": 0.2553984224796295, "learning_rate": 0.0001, "loss": 1.5582, "step": 819952 }, { "epoch": 70.73913043478261, "grad_norm": 0.3574540913105011, "learning_rate": 0.0001, "loss": 1.5545, "step": 820008 }, { "epoch": 70.74396135265701, "grad_norm": 0.278501957654953, "learning_rate": 0.0001, "loss": 1.5541, "step": 820064 }, { "epoch": 70.7487922705314, "grad_norm": 0.35150620341300964, "learning_rate": 0.0001, "loss": 1.5538, "step": 820120 }, { "epoch": 70.7536231884058, "grad_norm": 0.30935248732566833, "learning_rate": 0.0001, "loss": 1.5567, "step": 820176 }, { "epoch": 70.7584541062802, "grad_norm": 0.29781532287597656, "learning_rate": 0.0001, "loss": 1.5586, "step": 820232 }, { "epoch": 70.76328502415458, "grad_norm": 0.603242039680481, "learning_rate": 0.0001, "loss": 1.5589, "step": 820288 }, { "epoch": 70.76811594202898, "grad_norm": 0.3421213924884796, "learning_rate": 0.0001, "loss": 1.5538, "step": 820344 }, { "epoch": 70.77294685990339, "grad_norm": 0.3188979923725128, "learning_rate": 0.0001, "loss": 1.5563, "step": 820400 }, { "epoch": 70.77777777777777, "grad_norm": 0.33020728826522827, "learning_rate": 0.0001, "loss": 1.5457, "step": 820456 }, { "epoch": 70.78260869565217, "grad_norm": 1.5000014305114746, "learning_rate": 0.0001, "loss": 1.5578, "step": 820512 }, { "epoch": 70.78743961352657, "grad_norm": 1.3173646926879883, "learning_rate": 0.0001, "loss": 1.5598, "step": 820568 }, { "epoch": 70.79227053140096, "grad_norm": 0.43402382731437683, "learning_rate": 0.0001, "loss": 1.5542, "step": 820624 }, { "epoch": 70.79710144927536, "grad_norm": 0.6824092864990234, "learning_rate": 0.0001, "loss": 1.5585, "step": 820680 }, { "epoch": 70.80193236714976, "grad_norm": 0.27182072401046753, "learning_rate": 0.0001, "loss": 1.5599, "step": 820736 }, { "epoch": 70.80676328502416, "grad_norm": 0.3243178129196167, "learning_rate": 0.0001, "loss": 1.5607, "step": 820792 }, { "epoch": 70.81159420289855, "grad_norm": 0.2501881420612335, "learning_rate": 0.0001, "loss": 1.5588, "step": 820848 }, { "epoch": 70.81642512077295, "grad_norm": 0.3565293848514557, "learning_rate": 0.0001, "loss": 1.5545, "step": 820904 }, { "epoch": 70.82125603864735, "grad_norm": 0.34448373317718506, "learning_rate": 0.0001, "loss": 1.5517, "step": 820960 }, { "epoch": 70.82608695652173, "grad_norm": 0.22687311470508575, "learning_rate": 0.0001, "loss": 1.5543, "step": 821016 }, { "epoch": 70.83091787439614, "grad_norm": 0.2750020921230316, "learning_rate": 0.0001, "loss": 1.5604, "step": 821072 }, { "epoch": 70.83574879227054, "grad_norm": 0.44297394156455994, "learning_rate": 0.0001, "loss": 1.5556, "step": 821128 }, { "epoch": 70.84057971014492, "grad_norm": 0.3208041489124298, "learning_rate": 0.0001, "loss": 1.5556, "step": 821184 }, { "epoch": 70.84541062801932, "grad_norm": 21.09697151184082, "learning_rate": 0.0001, "loss": 1.5576, "step": 821240 }, { "epoch": 70.85024154589372, "grad_norm": 0.7843339443206787, "learning_rate": 0.0001, "loss": 1.558, "step": 821296 }, { "epoch": 70.85507246376811, "grad_norm": 0.8338590860366821, "learning_rate": 0.0001, "loss": 1.5619, "step": 821352 }, { "epoch": 70.85990338164251, "grad_norm": 0.34885573387145996, "learning_rate": 0.0001, "loss": 1.558, "step": 821408 }, { "epoch": 70.86473429951691, "grad_norm": 0.36314406991004944, "learning_rate": 0.0001, "loss": 1.5586, "step": 821464 }, { "epoch": 70.8695652173913, "grad_norm": 0.26877084374427795, "learning_rate": 0.0001, "loss": 1.5588, "step": 821520 }, { "epoch": 70.8743961352657, "grad_norm": 0.2790859639644623, "learning_rate": 0.0001, "loss": 1.5589, "step": 821576 }, { "epoch": 70.8792270531401, "grad_norm": 0.34788718819618225, "learning_rate": 0.0001, "loss": 1.5621, "step": 821632 }, { "epoch": 70.8840579710145, "grad_norm": 0.32740816473960876, "learning_rate": 0.0001, "loss": 1.5565, "step": 821688 }, { "epoch": 70.88888888888889, "grad_norm": 0.3323192000389099, "learning_rate": 0.0001, "loss": 1.5578, "step": 821744 }, { "epoch": 70.89371980676329, "grad_norm": 0.7556242942810059, "learning_rate": 0.0001, "loss": 1.5563, "step": 821800 }, { "epoch": 70.89855072463769, "grad_norm": 0.5470066666603088, "learning_rate": 0.0001, "loss": 1.5556, "step": 821856 }, { "epoch": 70.90338164251207, "grad_norm": 0.26571518182754517, "learning_rate": 0.0001, "loss": 1.5636, "step": 821912 }, { "epoch": 70.90821256038647, "grad_norm": 0.35192951560020447, "learning_rate": 0.0001, "loss": 1.5612, "step": 821968 }, { "epoch": 70.91304347826087, "grad_norm": 0.28798532485961914, "learning_rate": 0.0001, "loss": 1.5593, "step": 822024 }, { "epoch": 70.91787439613526, "grad_norm": 0.48565804958343506, "learning_rate": 0.0001, "loss": 1.5565, "step": 822080 }, { "epoch": 70.92270531400966, "grad_norm": 1.262198567390442, "learning_rate": 0.0001, "loss": 1.5573, "step": 822136 }, { "epoch": 70.92753623188406, "grad_norm": 0.544151782989502, "learning_rate": 0.0001, "loss": 1.5554, "step": 822192 }, { "epoch": 70.93236714975845, "grad_norm": 0.615493893623352, "learning_rate": 0.0001, "loss": 1.5551, "step": 822248 }, { "epoch": 70.93719806763285, "grad_norm": 0.2844991683959961, "learning_rate": 0.0001, "loss": 1.5573, "step": 822304 }, { "epoch": 70.94202898550725, "grad_norm": 0.3484768569469452, "learning_rate": 0.0001, "loss": 1.5613, "step": 822360 }, { "epoch": 70.94685990338164, "grad_norm": 0.47870829701423645, "learning_rate": 0.0001, "loss": 1.5566, "step": 822416 }, { "epoch": 70.95169082125604, "grad_norm": 1.136504054069519, "learning_rate": 0.0001, "loss": 1.5578, "step": 822472 }, { "epoch": 70.95652173913044, "grad_norm": 0.6887720823287964, "learning_rate": 0.0001, "loss": 1.5613, "step": 822528 }, { "epoch": 70.96135265700484, "grad_norm": 0.27526193857192993, "learning_rate": 0.0001, "loss": 1.5568, "step": 822584 }, { "epoch": 70.96618357487922, "grad_norm": 20.013296127319336, "learning_rate": 0.0001, "loss": 1.5508, "step": 822640 }, { "epoch": 70.97101449275362, "grad_norm": 1.9697186946868896, "learning_rate": 0.0001, "loss": 1.5606, "step": 822696 }, { "epoch": 70.97584541062803, "grad_norm": 0.4128214418888092, "learning_rate": 0.0001, "loss": 1.5531, "step": 822752 }, { "epoch": 70.98067632850241, "grad_norm": 0.5451667904853821, "learning_rate": 0.0001, "loss": 1.5576, "step": 822808 }, { "epoch": 70.98550724637681, "grad_norm": 5.387852191925049, "learning_rate": 0.0001, "loss": 1.5517, "step": 822864 }, { "epoch": 70.99033816425121, "grad_norm": 0.2927531599998474, "learning_rate": 0.0001, "loss": 1.5574, "step": 822920 }, { "epoch": 70.9951690821256, "grad_norm": 0.30295678973197937, "learning_rate": 0.0001, "loss": 1.553, "step": 822976 }, { "epoch": 71.0, "grad_norm": 0.32333001494407654, "learning_rate": 0.0001, "loss": 1.5588, "step": 823032 }, { "epoch": 71.0048309178744, "grad_norm": 0.28914928436279297, "learning_rate": 0.0001, "loss": 1.5536, "step": 823088 }, { "epoch": 71.00966183574879, "grad_norm": 0.2452630251646042, "learning_rate": 0.0001, "loss": 1.5484, "step": 823144 }, { "epoch": 71.01449275362319, "grad_norm": 0.49073755741119385, "learning_rate": 0.0001, "loss": 1.5483, "step": 823200 }, { "epoch": 71.01932367149759, "grad_norm": 0.4740998446941376, "learning_rate": 0.0001, "loss": 1.5507, "step": 823256 }, { "epoch": 71.02415458937197, "grad_norm": 0.2583577632904053, "learning_rate": 0.0001, "loss": 1.5516, "step": 823312 }, { "epoch": 71.02898550724638, "grad_norm": 0.7794737815856934, "learning_rate": 0.0001, "loss": 1.5493, "step": 823368 }, { "epoch": 71.03381642512078, "grad_norm": 0.3148103952407837, "learning_rate": 0.0001, "loss": 1.5527, "step": 823424 }, { "epoch": 71.03864734299516, "grad_norm": 0.9655210375785828, "learning_rate": 0.0001, "loss": 1.5508, "step": 823480 }, { "epoch": 71.04347826086956, "grad_norm": 0.2564266324043274, "learning_rate": 0.0001, "loss": 1.5468, "step": 823536 }, { "epoch": 71.04830917874396, "grad_norm": 1.0624359846115112, "learning_rate": 0.0001, "loss": 1.5509, "step": 823592 }, { "epoch": 71.05314009661836, "grad_norm": 0.28926214575767517, "learning_rate": 0.0001, "loss": 1.5468, "step": 823648 }, { "epoch": 71.05797101449275, "grad_norm": 0.39653751254081726, "learning_rate": 0.0001, "loss": 1.5494, "step": 823704 }, { "epoch": 71.06280193236715, "grad_norm": 0.29699191451072693, "learning_rate": 0.0001, "loss": 1.5435, "step": 823760 }, { "epoch": 71.06763285024155, "grad_norm": 0.23443464934825897, "learning_rate": 0.0001, "loss": 1.5469, "step": 823816 }, { "epoch": 71.07246376811594, "grad_norm": 0.3828304708003998, "learning_rate": 0.0001, "loss": 1.5448, "step": 823872 }, { "epoch": 71.07729468599034, "grad_norm": 0.265653133392334, "learning_rate": 0.0001, "loss": 1.5473, "step": 823928 }, { "epoch": 71.08212560386474, "grad_norm": 0.32923176884651184, "learning_rate": 0.0001, "loss": 1.5488, "step": 823984 }, { "epoch": 71.08695652173913, "grad_norm": 0.3217480182647705, "learning_rate": 0.0001, "loss": 1.5474, "step": 824040 }, { "epoch": 71.09178743961353, "grad_norm": 1.3854742050170898, "learning_rate": 0.0001, "loss": 1.5498, "step": 824096 }, { "epoch": 71.09661835748793, "grad_norm": 8.628397941589355, "learning_rate": 0.0001, "loss": 1.5567, "step": 824152 }, { "epoch": 71.10144927536231, "grad_norm": 0.2977035939693451, "learning_rate": 0.0001, "loss": 1.5436, "step": 824208 }, { "epoch": 71.10628019323671, "grad_norm": 0.25961658358573914, "learning_rate": 0.0001, "loss": 1.5489, "step": 824264 }, { "epoch": 71.11111111111111, "grad_norm": 0.26048341393470764, "learning_rate": 0.0001, "loss": 1.5557, "step": 824320 }, { "epoch": 71.1159420289855, "grad_norm": 0.37404799461364746, "learning_rate": 0.0001, "loss": 1.5515, "step": 824376 }, { "epoch": 71.1207729468599, "grad_norm": 0.331145316362381, "learning_rate": 0.0001, "loss": 1.5515, "step": 824432 }, { "epoch": 71.1256038647343, "grad_norm": 1.6805713176727295, "learning_rate": 0.0001, "loss": 1.5506, "step": 824488 }, { "epoch": 71.1304347826087, "grad_norm": 0.37767505645751953, "learning_rate": 0.0001, "loss": 1.5498, "step": 824544 }, { "epoch": 71.13526570048309, "grad_norm": 0.26098498702049255, "learning_rate": 0.0001, "loss": 1.5505, "step": 824600 }, { "epoch": 71.14009661835749, "grad_norm": 0.48982393741607666, "learning_rate": 0.0001, "loss": 1.5584, "step": 824656 }, { "epoch": 71.14492753623189, "grad_norm": 0.28632301092147827, "learning_rate": 0.0001, "loss": 1.5511, "step": 824712 }, { "epoch": 71.14975845410628, "grad_norm": 0.5456331968307495, "learning_rate": 0.0001, "loss": 1.5536, "step": 824768 }, { "epoch": 71.15458937198068, "grad_norm": 0.32651451230049133, "learning_rate": 0.0001, "loss": 1.5511, "step": 824824 }, { "epoch": 71.15942028985508, "grad_norm": 0.31857749819755554, "learning_rate": 0.0001, "loss": 1.5518, "step": 824880 }, { "epoch": 71.16425120772946, "grad_norm": 0.30754634737968445, "learning_rate": 0.0001, "loss": 1.5493, "step": 824936 }, { "epoch": 71.16908212560386, "grad_norm": 0.42745333909988403, "learning_rate": 0.0001, "loss": 1.5502, "step": 824992 }, { "epoch": 71.17391304347827, "grad_norm": 0.2510411739349365, "learning_rate": 0.0001, "loss": 1.5478, "step": 825048 }, { "epoch": 71.17874396135265, "grad_norm": 0.4312749207019806, "learning_rate": 0.0001, "loss": 1.55, "step": 825104 }, { "epoch": 71.18357487922705, "grad_norm": 2.250779628753662, "learning_rate": 0.0001, "loss": 1.5555, "step": 825160 }, { "epoch": 71.18840579710145, "grad_norm": 0.3261016607284546, "learning_rate": 0.0001, "loss": 1.5573, "step": 825216 }, { "epoch": 71.19323671497584, "grad_norm": 0.21854349970817566, "learning_rate": 0.0001, "loss": 1.5449, "step": 825272 }, { "epoch": 71.19806763285024, "grad_norm": 0.34747785329818726, "learning_rate": 0.0001, "loss": 1.5526, "step": 825328 }, { "epoch": 71.20289855072464, "grad_norm": 0.34846732020378113, "learning_rate": 0.0001, "loss": 1.5533, "step": 825384 }, { "epoch": 71.20772946859903, "grad_norm": 0.509559154510498, "learning_rate": 0.0001, "loss": 1.5517, "step": 825440 }, { "epoch": 71.21256038647343, "grad_norm": 0.3505825996398926, "learning_rate": 0.0001, "loss": 1.5527, "step": 825496 }, { "epoch": 71.21739130434783, "grad_norm": 1.0284171104431152, "learning_rate": 0.0001, "loss": 1.5563, "step": 825552 }, { "epoch": 71.22222222222223, "grad_norm": 0.5627572536468506, "learning_rate": 0.0001, "loss": 1.5502, "step": 825608 }, { "epoch": 71.22705314009661, "grad_norm": 0.2645014226436615, "learning_rate": 0.0001, "loss": 1.553, "step": 825664 }, { "epoch": 71.23188405797102, "grad_norm": 0.2839333117008209, "learning_rate": 0.0001, "loss": 1.5506, "step": 825720 }, { "epoch": 71.23671497584542, "grad_norm": 1.8716135025024414, "learning_rate": 0.0001, "loss": 1.5606, "step": 825776 }, { "epoch": 71.2415458937198, "grad_norm": 0.9104961156845093, "learning_rate": 0.0001, "loss": 1.5467, "step": 825832 }, { "epoch": 71.2463768115942, "grad_norm": 0.3312029540538788, "learning_rate": 0.0001, "loss": 1.5536, "step": 825888 }, { "epoch": 71.2512077294686, "grad_norm": 0.24511252343654633, "learning_rate": 0.0001, "loss": 1.5557, "step": 825944 }, { "epoch": 71.25603864734299, "grad_norm": 0.6051290035247803, "learning_rate": 0.0001, "loss": 1.5482, "step": 826000 }, { "epoch": 71.26086956521739, "grad_norm": 0.2981499433517456, "learning_rate": 0.0001, "loss": 1.5596, "step": 826056 }, { "epoch": 71.26570048309179, "grad_norm": 0.38824349641799927, "learning_rate": 0.0001, "loss": 1.5529, "step": 826112 }, { "epoch": 71.27053140096618, "grad_norm": 0.24986425042152405, "learning_rate": 0.0001, "loss": 1.5551, "step": 826168 }, { "epoch": 71.27536231884058, "grad_norm": 1.8874067068099976, "learning_rate": 0.0001, "loss": 1.5539, "step": 826224 }, { "epoch": 71.28019323671498, "grad_norm": 0.3161029517650604, "learning_rate": 0.0001, "loss": 1.5574, "step": 826280 }, { "epoch": 71.28502415458937, "grad_norm": 0.35912683606147766, "learning_rate": 0.0001, "loss": 1.5557, "step": 826336 }, { "epoch": 71.28985507246377, "grad_norm": 0.2444460690021515, "learning_rate": 0.0001, "loss": 1.5519, "step": 826392 }, { "epoch": 71.29468599033817, "grad_norm": 0.5176471471786499, "learning_rate": 0.0001, "loss": 1.5529, "step": 826448 }, { "epoch": 71.29951690821257, "grad_norm": 0.3209226727485657, "learning_rate": 0.0001, "loss": 1.5493, "step": 826504 }, { "epoch": 71.30434782608695, "grad_norm": 0.6535325646400452, "learning_rate": 0.0001, "loss": 1.5523, "step": 826560 }, { "epoch": 71.30917874396135, "grad_norm": 0.34479662775993347, "learning_rate": 0.0001, "loss": 1.5486, "step": 826616 }, { "epoch": 71.31400966183575, "grad_norm": 0.2997021973133087, "learning_rate": 0.0001, "loss": 1.5481, "step": 826672 }, { "epoch": 71.31884057971014, "grad_norm": 3.9269566535949707, "learning_rate": 0.0001, "loss": 1.5559, "step": 826728 }, { "epoch": 71.32367149758454, "grad_norm": 0.4109722971916199, "learning_rate": 0.0001, "loss": 1.5484, "step": 826784 }, { "epoch": 71.32850241545894, "grad_norm": 0.2697111964225769, "learning_rate": 0.0001, "loss": 1.5538, "step": 826840 }, { "epoch": 71.33333333333333, "grad_norm": 0.966960072517395, "learning_rate": 0.0001, "loss": 1.5529, "step": 826896 }, { "epoch": 71.33816425120773, "grad_norm": 0.5317843556404114, "learning_rate": 0.0001, "loss": 1.5487, "step": 826952 }, { "epoch": 71.34299516908213, "grad_norm": 0.2851557731628418, "learning_rate": 0.0001, "loss": 1.5502, "step": 827008 }, { "epoch": 71.34782608695652, "grad_norm": 0.3426188826560974, "learning_rate": 0.0001, "loss": 1.55, "step": 827064 }, { "epoch": 71.35265700483092, "grad_norm": 0.34784752130508423, "learning_rate": 0.0001, "loss": 1.5562, "step": 827120 }, { "epoch": 71.35748792270532, "grad_norm": 0.22904427349567413, "learning_rate": 0.0001, "loss": 1.5538, "step": 827176 }, { "epoch": 71.3623188405797, "grad_norm": 0.3851616382598877, "learning_rate": 0.0001, "loss": 1.5532, "step": 827232 }, { "epoch": 71.3671497584541, "grad_norm": 0.2514403760433197, "learning_rate": 0.0001, "loss": 1.5541, "step": 827288 }, { "epoch": 71.3719806763285, "grad_norm": 0.407419890165329, "learning_rate": 0.0001, "loss": 1.5509, "step": 827344 }, { "epoch": 71.3768115942029, "grad_norm": 0.2693115770816803, "learning_rate": 0.0001, "loss": 1.553, "step": 827400 }, { "epoch": 71.38164251207729, "grad_norm": 0.2500169277191162, "learning_rate": 0.0001, "loss": 1.5498, "step": 827456 }, { "epoch": 71.38647342995169, "grad_norm": 0.5439067482948303, "learning_rate": 0.0001, "loss": 1.5526, "step": 827512 }, { "epoch": 71.3913043478261, "grad_norm": 0.2964686453342438, "learning_rate": 0.0001, "loss": 1.5488, "step": 827568 }, { "epoch": 71.39613526570048, "grad_norm": 0.4743329584598541, "learning_rate": 0.0001, "loss": 1.5514, "step": 827624 }, { "epoch": 71.40096618357488, "grad_norm": 1.133446455001831, "learning_rate": 0.0001, "loss": 1.5515, "step": 827680 }, { "epoch": 71.40579710144928, "grad_norm": 0.3236153721809387, "learning_rate": 0.0001, "loss": 1.5533, "step": 827736 }, { "epoch": 71.41062801932367, "grad_norm": 0.2895698845386505, "learning_rate": 0.0001, "loss": 1.5552, "step": 827792 }, { "epoch": 71.41545893719807, "grad_norm": 0.28076040744781494, "learning_rate": 0.0001, "loss": 1.5493, "step": 827848 }, { "epoch": 71.42028985507247, "grad_norm": 0.5365455746650696, "learning_rate": 0.0001, "loss": 1.5544, "step": 827904 }, { "epoch": 71.42512077294685, "grad_norm": 0.27370724081993103, "learning_rate": 0.0001, "loss": 1.5597, "step": 827960 }, { "epoch": 71.42995169082126, "grad_norm": 0.26971206068992615, "learning_rate": 0.0001, "loss": 1.5505, "step": 828016 }, { "epoch": 71.43478260869566, "grad_norm": 0.6966723203659058, "learning_rate": 0.0001, "loss": 1.5556, "step": 828072 }, { "epoch": 71.43961352657004, "grad_norm": 0.3386363685131073, "learning_rate": 0.0001, "loss": 1.5459, "step": 828128 }, { "epoch": 71.44444444444444, "grad_norm": 0.28074121475219727, "learning_rate": 0.0001, "loss": 1.552, "step": 828184 }, { "epoch": 71.44927536231884, "grad_norm": 0.28701117634773254, "learning_rate": 0.0001, "loss": 1.5564, "step": 828240 }, { "epoch": 71.45410628019323, "grad_norm": 0.49166473746299744, "learning_rate": 0.0001, "loss": 1.5504, "step": 828296 }, { "epoch": 71.45893719806763, "grad_norm": 0.2775537669658661, "learning_rate": 0.0001, "loss": 1.5537, "step": 828352 }, { "epoch": 71.46376811594203, "grad_norm": 0.9039533734321594, "learning_rate": 0.0001, "loss": 1.5506, "step": 828408 }, { "epoch": 71.46859903381643, "grad_norm": 0.25875601172447205, "learning_rate": 0.0001, "loss": 1.5543, "step": 828464 }, { "epoch": 71.47342995169082, "grad_norm": 0.31424108147621155, "learning_rate": 0.0001, "loss": 1.5519, "step": 828520 }, { "epoch": 71.47826086956522, "grad_norm": 4.848052024841309, "learning_rate": 0.0001, "loss": 1.5496, "step": 828576 }, { "epoch": 71.48309178743962, "grad_norm": 0.24934153258800507, "learning_rate": 0.0001, "loss": 1.5547, "step": 828632 }, { "epoch": 71.487922705314, "grad_norm": 0.3008538484573364, "learning_rate": 0.0001, "loss": 1.5532, "step": 828688 }, { "epoch": 71.4927536231884, "grad_norm": 0.37092164158821106, "learning_rate": 0.0001, "loss": 1.5511, "step": 828744 }, { "epoch": 71.4975845410628, "grad_norm": 0.3318462073802948, "learning_rate": 0.0001, "loss": 1.5543, "step": 828800 }, { "epoch": 71.5024154589372, "grad_norm": 1.545053482055664, "learning_rate": 0.0001, "loss": 1.5528, "step": 828856 }, { "epoch": 71.5072463768116, "grad_norm": 0.564694881439209, "learning_rate": 0.0001, "loss": 1.5608, "step": 828912 }, { "epoch": 71.512077294686, "grad_norm": 2.3454933166503906, "learning_rate": 0.0001, "loss": 1.5531, "step": 828968 }, { "epoch": 71.51690821256038, "grad_norm": 0.3083861172199249, "learning_rate": 0.0001, "loss": 1.5519, "step": 829024 }, { "epoch": 71.52173913043478, "grad_norm": 0.26832315325737, "learning_rate": 0.0001, "loss": 1.5624, "step": 829080 }, { "epoch": 71.52657004830918, "grad_norm": 0.32879456877708435, "learning_rate": 0.0001, "loss": 1.5556, "step": 829136 }, { "epoch": 71.53140096618357, "grad_norm": 0.32423657178878784, "learning_rate": 0.0001, "loss": 1.5502, "step": 829192 }, { "epoch": 71.53623188405797, "grad_norm": 0.6479806900024414, "learning_rate": 0.0001, "loss": 1.5525, "step": 829248 }, { "epoch": 71.54106280193237, "grad_norm": 0.309197336435318, "learning_rate": 0.0001, "loss": 1.5559, "step": 829304 }, { "epoch": 71.54589371980677, "grad_norm": 0.6306284666061401, "learning_rate": 0.0001, "loss": 1.5513, "step": 829360 }, { "epoch": 71.55072463768116, "grad_norm": 0.3203956186771393, "learning_rate": 0.0001, "loss": 1.5639, "step": 829416 }, { "epoch": 71.55555555555556, "grad_norm": 0.2707810401916504, "learning_rate": 0.0001, "loss": 1.5649, "step": 829472 }, { "epoch": 71.56038647342996, "grad_norm": 0.9632209539413452, "learning_rate": 0.0001, "loss": 1.5546, "step": 829528 }, { "epoch": 71.56521739130434, "grad_norm": 0.9708321690559387, "learning_rate": 0.0001, "loss": 1.5593, "step": 829584 }, { "epoch": 71.57004830917874, "grad_norm": 0.37804192304611206, "learning_rate": 0.0001, "loss": 1.5498, "step": 829640 }, { "epoch": 71.57487922705315, "grad_norm": 0.263376921415329, "learning_rate": 0.0001, "loss": 1.5535, "step": 829696 }, { "epoch": 71.57971014492753, "grad_norm": 0.4392019510269165, "learning_rate": 0.0001, "loss": 1.5514, "step": 829752 }, { "epoch": 71.58454106280193, "grad_norm": 0.31562840938568115, "learning_rate": 0.0001, "loss": 1.5578, "step": 829808 }, { "epoch": 71.58937198067633, "grad_norm": 0.2952810525894165, "learning_rate": 0.0001, "loss": 1.5552, "step": 829864 }, { "epoch": 71.59420289855072, "grad_norm": 0.3035182058811188, "learning_rate": 0.0001, "loss": 1.5578, "step": 829920 }, { "epoch": 71.59903381642512, "grad_norm": 0.36545926332473755, "learning_rate": 0.0001, "loss": 1.554, "step": 829976 }, { "epoch": 71.60386473429952, "grad_norm": 0.31055548787117004, "learning_rate": 0.0001, "loss": 1.5441, "step": 830032 }, { "epoch": 71.6086956521739, "grad_norm": 0.318833589553833, "learning_rate": 0.0001, "loss": 1.5494, "step": 830088 }, { "epoch": 71.61352657004831, "grad_norm": 0.2559443712234497, "learning_rate": 0.0001, "loss": 1.555, "step": 830144 }, { "epoch": 71.61835748792271, "grad_norm": 0.2972756624221802, "learning_rate": 0.0001, "loss": 1.5495, "step": 830200 }, { "epoch": 71.6231884057971, "grad_norm": 0.27523377537727356, "learning_rate": 0.0001, "loss": 1.5514, "step": 830256 }, { "epoch": 71.6280193236715, "grad_norm": 0.7767321467399597, "learning_rate": 0.0001, "loss": 1.5561, "step": 830312 }, { "epoch": 71.6328502415459, "grad_norm": 0.5115237236022949, "learning_rate": 0.0001, "loss": 1.5577, "step": 830368 }, { "epoch": 71.6376811594203, "grad_norm": 0.5266006588935852, "learning_rate": 0.0001, "loss": 1.5545, "step": 830424 }, { "epoch": 71.64251207729468, "grad_norm": 0.31367623805999756, "learning_rate": 0.0001, "loss": 1.5509, "step": 830480 }, { "epoch": 71.64734299516908, "grad_norm": 0.46496209502220154, "learning_rate": 0.0001, "loss": 1.5517, "step": 830536 }, { "epoch": 71.65217391304348, "grad_norm": 0.2507486343383789, "learning_rate": 0.0001, "loss": 1.5523, "step": 830592 }, { "epoch": 71.65700483091787, "grad_norm": 0.30094999074935913, "learning_rate": 0.0001, "loss": 1.5555, "step": 830648 }, { "epoch": 71.66183574879227, "grad_norm": 0.36215734481811523, "learning_rate": 0.0001, "loss": 1.5546, "step": 830704 }, { "epoch": 71.66666666666667, "grad_norm": 0.23066917061805725, "learning_rate": 0.0001, "loss": 1.5522, "step": 830760 }, { "epoch": 71.67149758454106, "grad_norm": 0.25294044613838196, "learning_rate": 0.0001, "loss": 1.5585, "step": 830816 }, { "epoch": 71.67632850241546, "grad_norm": 0.3486888110637665, "learning_rate": 0.0001, "loss": 1.5547, "step": 830872 }, { "epoch": 71.68115942028986, "grad_norm": 0.4751327931880951, "learning_rate": 0.0001, "loss": 1.554, "step": 830928 }, { "epoch": 71.68599033816425, "grad_norm": 0.25968149304389954, "learning_rate": 0.0001, "loss": 1.5568, "step": 830984 }, { "epoch": 71.69082125603865, "grad_norm": 0.3191193640232086, "learning_rate": 0.0001, "loss": 1.5507, "step": 831040 }, { "epoch": 71.69565217391305, "grad_norm": 0.3060542047023773, "learning_rate": 0.0001, "loss": 1.5557, "step": 831096 }, { "epoch": 71.70048309178743, "grad_norm": 0.3748762905597687, "learning_rate": 0.0001, "loss": 1.5587, "step": 831152 }, { "epoch": 71.70531400966183, "grad_norm": 1.5450035333633423, "learning_rate": 0.0001, "loss": 1.5553, "step": 831208 }, { "epoch": 71.71014492753623, "grad_norm": 0.2952014207839966, "learning_rate": 0.0001, "loss": 1.5497, "step": 831264 }, { "epoch": 71.71497584541063, "grad_norm": 0.3562396466732025, "learning_rate": 0.0001, "loss": 1.5522, "step": 831320 }, { "epoch": 71.71980676328502, "grad_norm": 0.3686712980270386, "learning_rate": 0.0001, "loss": 1.5546, "step": 831376 }, { "epoch": 71.72463768115942, "grad_norm": 0.29803216457366943, "learning_rate": 0.0001, "loss": 1.5582, "step": 831432 }, { "epoch": 71.72946859903382, "grad_norm": 0.5348162055015564, "learning_rate": 0.0001, "loss": 1.5562, "step": 831488 }, { "epoch": 71.73429951690821, "grad_norm": 0.41689813137054443, "learning_rate": 0.0001, "loss": 1.5574, "step": 831544 }, { "epoch": 71.73913043478261, "grad_norm": 0.2912014424800873, "learning_rate": 0.0001, "loss": 1.557, "step": 831600 }, { "epoch": 71.74396135265701, "grad_norm": 0.305317223072052, "learning_rate": 0.0001, "loss": 1.5485, "step": 831656 }, { "epoch": 71.7487922705314, "grad_norm": 1.5410258769989014, "learning_rate": 0.0001, "loss": 1.5543, "step": 831712 }, { "epoch": 71.7536231884058, "grad_norm": 0.22977742552757263, "learning_rate": 0.0001, "loss": 1.5562, "step": 831768 }, { "epoch": 71.7584541062802, "grad_norm": 0.2858276665210724, "learning_rate": 0.0001, "loss": 1.5538, "step": 831824 }, { "epoch": 71.76328502415458, "grad_norm": 0.35024574398994446, "learning_rate": 0.0001, "loss": 1.5567, "step": 831880 }, { "epoch": 71.76811594202898, "grad_norm": 0.6612334251403809, "learning_rate": 0.0001, "loss": 1.5497, "step": 831936 }, { "epoch": 71.77294685990339, "grad_norm": 0.25982120633125305, "learning_rate": 0.0001, "loss": 1.5545, "step": 831992 }, { "epoch": 71.77777777777777, "grad_norm": 0.27606216073036194, "learning_rate": 0.0001, "loss": 1.5516, "step": 832048 }, { "epoch": 71.78260869565217, "grad_norm": 0.3429277837276459, "learning_rate": 0.0001, "loss": 1.5512, "step": 832104 }, { "epoch": 71.78743961352657, "grad_norm": 0.27177587151527405, "learning_rate": 0.0001, "loss": 1.5535, "step": 832160 }, { "epoch": 71.79227053140096, "grad_norm": 4.127712249755859, "learning_rate": 0.0001, "loss": 1.56, "step": 832216 }, { "epoch": 71.79710144927536, "grad_norm": 0.2582075595855713, "learning_rate": 0.0001, "loss": 1.5498, "step": 832272 }, { "epoch": 71.80193236714976, "grad_norm": 0.35958898067474365, "learning_rate": 0.0001, "loss": 1.5595, "step": 832328 }, { "epoch": 71.80676328502416, "grad_norm": 0.4435732960700989, "learning_rate": 0.0001, "loss": 1.5553, "step": 832384 }, { "epoch": 71.81159420289855, "grad_norm": 0.282431036233902, "learning_rate": 0.0001, "loss": 1.554, "step": 832440 }, { "epoch": 71.81642512077295, "grad_norm": 0.28314492106437683, "learning_rate": 0.0001, "loss": 1.5557, "step": 832496 }, { "epoch": 71.82125603864735, "grad_norm": 0.42712587118148804, "learning_rate": 0.0001, "loss": 1.5584, "step": 832552 }, { "epoch": 71.82608695652173, "grad_norm": 0.3562917411327362, "learning_rate": 0.0001, "loss": 1.5547, "step": 832608 }, { "epoch": 71.83091787439614, "grad_norm": 0.22666805982589722, "learning_rate": 0.0001, "loss": 1.5511, "step": 832664 }, { "epoch": 71.83574879227054, "grad_norm": 0.30916616320610046, "learning_rate": 0.0001, "loss": 1.5564, "step": 832720 }, { "epoch": 71.84057971014492, "grad_norm": 0.4761633276939392, "learning_rate": 0.0001, "loss": 1.5524, "step": 832776 }, { "epoch": 71.84541062801932, "grad_norm": 0.6814051270484924, "learning_rate": 0.0001, "loss": 1.5602, "step": 832832 }, { "epoch": 71.85024154589372, "grad_norm": 0.39639046788215637, "learning_rate": 0.0001, "loss": 1.5587, "step": 832888 }, { "epoch": 71.85507246376811, "grad_norm": 0.3707043528556824, "learning_rate": 0.0001, "loss": 1.5514, "step": 832944 }, { "epoch": 71.85990338164251, "grad_norm": 0.30012837052345276, "learning_rate": 0.0001, "loss": 1.5503, "step": 833000 }, { "epoch": 71.86473429951691, "grad_norm": 0.9174394011497498, "learning_rate": 0.0001, "loss": 1.5534, "step": 833056 }, { "epoch": 71.8695652173913, "grad_norm": 0.48659077286720276, "learning_rate": 0.0001, "loss": 1.5547, "step": 833112 }, { "epoch": 71.8743961352657, "grad_norm": 0.34674033522605896, "learning_rate": 0.0001, "loss": 1.5518, "step": 833168 }, { "epoch": 71.8792270531401, "grad_norm": 0.2815556228160858, "learning_rate": 0.0001, "loss": 1.5525, "step": 833224 }, { "epoch": 71.8840579710145, "grad_norm": 0.3398805856704712, "learning_rate": 0.0001, "loss": 1.5598, "step": 833280 }, { "epoch": 71.88888888888889, "grad_norm": 0.2584531903266907, "learning_rate": 0.0001, "loss": 1.5514, "step": 833336 }, { "epoch": 71.89371980676329, "grad_norm": 4.889510154724121, "learning_rate": 0.0001, "loss": 1.5535, "step": 833392 }, { "epoch": 71.89855072463769, "grad_norm": 0.6254977583885193, "learning_rate": 0.0001, "loss": 1.5553, "step": 833448 }, { "epoch": 71.90338164251207, "grad_norm": 1.2520248889923096, "learning_rate": 0.0001, "loss": 1.5547, "step": 833504 }, { "epoch": 71.90821256038647, "grad_norm": 0.2687070965766907, "learning_rate": 0.0001, "loss": 1.5574, "step": 833560 }, { "epoch": 71.91304347826087, "grad_norm": 0.3191376328468323, "learning_rate": 0.0001, "loss": 1.5669, "step": 833616 }, { "epoch": 71.91787439613526, "grad_norm": 0.39158663153648376, "learning_rate": 0.0001, "loss": 1.5542, "step": 833672 }, { "epoch": 71.92270531400966, "grad_norm": 0.24094487726688385, "learning_rate": 0.0001, "loss": 1.5595, "step": 833728 }, { "epoch": 71.92753623188406, "grad_norm": 2.383126735687256, "learning_rate": 0.0001, "loss": 1.5584, "step": 833784 }, { "epoch": 71.93236714975845, "grad_norm": 0.26794707775115967, "learning_rate": 0.0001, "loss": 1.5572, "step": 833840 }, { "epoch": 71.93719806763285, "grad_norm": 0.5466254353523254, "learning_rate": 0.0001, "loss": 1.5546, "step": 833896 }, { "epoch": 71.94202898550725, "grad_norm": 0.4928216338157654, "learning_rate": 0.0001, "loss": 1.5492, "step": 833952 }, { "epoch": 71.94685990338164, "grad_norm": 0.3302038908004761, "learning_rate": 0.0001, "loss": 1.5499, "step": 834008 }, { "epoch": 71.95169082125604, "grad_norm": 0.8308007717132568, "learning_rate": 0.0001, "loss": 1.5594, "step": 834064 }, { "epoch": 71.95652173913044, "grad_norm": 0.6636524796485901, "learning_rate": 0.0001, "loss": 1.5552, "step": 834120 }, { "epoch": 71.96135265700484, "grad_norm": 0.2300470769405365, "learning_rate": 0.0001, "loss": 1.5598, "step": 834176 }, { "epoch": 71.96618357487922, "grad_norm": 0.29052987694740295, "learning_rate": 0.0001, "loss": 1.5492, "step": 834232 }, { "epoch": 71.97101449275362, "grad_norm": 0.2446851283311844, "learning_rate": 0.0001, "loss": 1.5568, "step": 834288 }, { "epoch": 71.97584541062803, "grad_norm": 0.27067986130714417, "learning_rate": 0.0001, "loss": 1.5568, "step": 834344 }, { "epoch": 71.98067632850241, "grad_norm": 0.27674269676208496, "learning_rate": 0.0001, "loss": 1.5572, "step": 834400 }, { "epoch": 71.98550724637681, "grad_norm": 0.3507527709007263, "learning_rate": 0.0001, "loss": 1.5672, "step": 834456 }, { "epoch": 71.99033816425121, "grad_norm": 0.2773164212703705, "learning_rate": 0.0001, "loss": 1.5604, "step": 834512 }, { "epoch": 71.9951690821256, "grad_norm": 0.3090800940990448, "learning_rate": 0.0001, "loss": 1.5613, "step": 834568 }, { "epoch": 72.0, "grad_norm": 0.35953980684280396, "learning_rate": 0.0001, "loss": 1.5551, "step": 834624 }, { "epoch": 72.0048309178744, "grad_norm": 0.32814374566078186, "learning_rate": 0.0001, "loss": 1.5547, "step": 834680 }, { "epoch": 72.00966183574879, "grad_norm": 0.26847466826438904, "learning_rate": 0.0001, "loss": 1.5541, "step": 834736 }, { "epoch": 72.01449275362319, "grad_norm": 1.0191328525543213, "learning_rate": 0.0001, "loss": 1.5467, "step": 834792 }, { "epoch": 72.01932367149759, "grad_norm": 0.5360642075538635, "learning_rate": 0.0001, "loss": 1.5475, "step": 834848 }, { "epoch": 72.02415458937197, "grad_norm": 0.38673123717308044, "learning_rate": 0.0001, "loss": 1.5496, "step": 834904 }, { "epoch": 72.02898550724638, "grad_norm": 0.461440771818161, "learning_rate": 0.0001, "loss": 1.5548, "step": 834960 }, { "epoch": 72.03381642512078, "grad_norm": 0.24573692679405212, "learning_rate": 0.0001, "loss": 1.5526, "step": 835016 }, { "epoch": 72.03864734299516, "grad_norm": 0.37867504358291626, "learning_rate": 0.0001, "loss": 1.5514, "step": 835072 }, { "epoch": 72.04347826086956, "grad_norm": 0.284641832113266, "learning_rate": 0.0001, "loss": 1.5524, "step": 835128 }, { "epoch": 72.04830917874396, "grad_norm": 0.6581095457077026, "learning_rate": 0.0001, "loss": 1.549, "step": 835184 }, { "epoch": 72.05314009661836, "grad_norm": 0.22937220335006714, "learning_rate": 0.0001, "loss": 1.5583, "step": 835240 }, { "epoch": 72.05797101449275, "grad_norm": 0.33185142278671265, "learning_rate": 0.0001, "loss": 1.5495, "step": 835296 }, { "epoch": 72.06280193236715, "grad_norm": 0.35150691866874695, "learning_rate": 0.0001, "loss": 1.551, "step": 835352 }, { "epoch": 72.06763285024155, "grad_norm": 0.9096566438674927, "learning_rate": 0.0001, "loss": 1.5504, "step": 835408 }, { "epoch": 72.07246376811594, "grad_norm": 0.31972160935401917, "learning_rate": 0.0001, "loss": 1.5522, "step": 835464 }, { "epoch": 72.07729468599034, "grad_norm": 0.29376864433288574, "learning_rate": 0.0001, "loss": 1.5478, "step": 835520 }, { "epoch": 72.08212560386474, "grad_norm": 0.2842343747615814, "learning_rate": 0.0001, "loss": 1.5483, "step": 835576 }, { "epoch": 72.08695652173913, "grad_norm": 1.2000670433044434, "learning_rate": 0.0001, "loss": 1.5515, "step": 835632 }, { "epoch": 72.09178743961353, "grad_norm": 0.34213927388191223, "learning_rate": 0.0001, "loss": 1.5483, "step": 835688 }, { "epoch": 72.09661835748793, "grad_norm": 0.3017626404762268, "learning_rate": 0.0001, "loss": 1.5537, "step": 835744 }, { "epoch": 72.10144927536231, "grad_norm": 0.48664042353630066, "learning_rate": 0.0001, "loss": 1.552, "step": 835800 }, { "epoch": 72.10628019323671, "grad_norm": 0.39358609914779663, "learning_rate": 0.0001, "loss": 1.5475, "step": 835856 }, { "epoch": 72.11111111111111, "grad_norm": 13.342547416687012, "learning_rate": 0.0001, "loss": 1.5446, "step": 835912 }, { "epoch": 72.1159420289855, "grad_norm": 1.134681224822998, "learning_rate": 0.0001, "loss": 1.5503, "step": 835968 }, { "epoch": 72.1207729468599, "grad_norm": 0.3203466534614563, "learning_rate": 0.0001, "loss": 1.5494, "step": 836024 }, { "epoch": 72.1256038647343, "grad_norm": 0.7495366334915161, "learning_rate": 0.0001, "loss": 1.5524, "step": 836080 }, { "epoch": 72.1304347826087, "grad_norm": 0.31295570731163025, "learning_rate": 0.0001, "loss": 1.5467, "step": 836136 }, { "epoch": 72.13526570048309, "grad_norm": 0.6146248579025269, "learning_rate": 0.0001, "loss": 1.5444, "step": 836192 }, { "epoch": 72.14009661835749, "grad_norm": 1.6044390201568604, "learning_rate": 0.0001, "loss": 1.5431, "step": 836248 }, { "epoch": 72.14492753623189, "grad_norm": 0.5860841274261475, "learning_rate": 0.0001, "loss": 1.55, "step": 836304 }, { "epoch": 72.14975845410628, "grad_norm": 0.27939167618751526, "learning_rate": 0.0001, "loss": 1.5502, "step": 836360 }, { "epoch": 72.15458937198068, "grad_norm": 0.4250093698501587, "learning_rate": 0.0001, "loss": 1.5512, "step": 836416 }, { "epoch": 72.15942028985508, "grad_norm": 0.3135227859020233, "learning_rate": 0.0001, "loss": 1.5462, "step": 836472 }, { "epoch": 72.16425120772946, "grad_norm": 0.2778961956501007, "learning_rate": 0.0001, "loss": 1.5532, "step": 836528 }, { "epoch": 72.16908212560386, "grad_norm": 2.547518014907837, "learning_rate": 0.0001, "loss": 1.5456, "step": 836584 }, { "epoch": 72.17391304347827, "grad_norm": 0.3651216924190521, "learning_rate": 0.0001, "loss": 1.5469, "step": 836640 }, { "epoch": 72.17874396135265, "grad_norm": 0.42274409532546997, "learning_rate": 0.0001, "loss": 1.5538, "step": 836696 }, { "epoch": 72.18357487922705, "grad_norm": 0.27706822752952576, "learning_rate": 0.0001, "loss": 1.5511, "step": 836752 }, { "epoch": 72.18840579710145, "grad_norm": 1.3330847024917603, "learning_rate": 0.0001, "loss": 1.5478, "step": 836808 }, { "epoch": 72.19323671497584, "grad_norm": 0.3398173153400421, "learning_rate": 0.0001, "loss": 1.551, "step": 836864 }, { "epoch": 72.19806763285024, "grad_norm": 0.453777939081192, "learning_rate": 0.0001, "loss": 1.5493, "step": 836920 }, { "epoch": 72.20289855072464, "grad_norm": 7.810399055480957, "learning_rate": 0.0001, "loss": 1.549, "step": 836976 }, { "epoch": 72.20772946859903, "grad_norm": 0.34179335832595825, "learning_rate": 0.0001, "loss": 1.5473, "step": 837032 }, { "epoch": 72.21256038647343, "grad_norm": 0.339743435382843, "learning_rate": 0.0001, "loss": 1.5466, "step": 837088 }, { "epoch": 72.21739130434783, "grad_norm": 0.3668128252029419, "learning_rate": 0.0001, "loss": 1.5519, "step": 837144 }, { "epoch": 72.22222222222223, "grad_norm": 0.3434917628765106, "learning_rate": 0.0001, "loss": 1.5479, "step": 837200 }, { "epoch": 72.22705314009661, "grad_norm": 0.4964594841003418, "learning_rate": 0.0001, "loss": 1.5521, "step": 837256 }, { "epoch": 72.23188405797102, "grad_norm": 0.3095363676548004, "learning_rate": 0.0001, "loss": 1.5458, "step": 837312 }, { "epoch": 72.23671497584542, "grad_norm": 0.6245014071464539, "learning_rate": 0.0001, "loss": 1.5475, "step": 837368 }, { "epoch": 72.2415458937198, "grad_norm": 0.25468623638153076, "learning_rate": 0.0001, "loss": 1.551, "step": 837424 }, { "epoch": 72.2463768115942, "grad_norm": 0.38944554328918457, "learning_rate": 0.0001, "loss": 1.5516, "step": 837480 }, { "epoch": 72.2512077294686, "grad_norm": 0.3329654037952423, "learning_rate": 0.0001, "loss": 1.5484, "step": 837536 }, { "epoch": 72.25603864734299, "grad_norm": 2.6757423877716064, "learning_rate": 0.0001, "loss": 1.5436, "step": 837592 }, { "epoch": 72.26086956521739, "grad_norm": 0.2844753563404083, "learning_rate": 0.0001, "loss": 1.5512, "step": 837648 }, { "epoch": 72.26570048309179, "grad_norm": 0.8288004398345947, "learning_rate": 0.0001, "loss": 1.5479, "step": 837704 }, { "epoch": 72.27053140096618, "grad_norm": 0.2754797339439392, "learning_rate": 0.0001, "loss": 1.5412, "step": 837760 }, { "epoch": 72.27536231884058, "grad_norm": 1.2470765113830566, "learning_rate": 0.0001, "loss": 1.5455, "step": 837816 }, { "epoch": 72.28019323671498, "grad_norm": 0.2688941955566406, "learning_rate": 0.0001, "loss": 1.553, "step": 837872 }, { "epoch": 72.28502415458937, "grad_norm": 0.3263843357563019, "learning_rate": 0.0001, "loss": 1.5468, "step": 837928 }, { "epoch": 72.28985507246377, "grad_norm": 0.24195288121700287, "learning_rate": 0.0001, "loss": 1.5495, "step": 837984 }, { "epoch": 72.29468599033817, "grad_norm": 0.2644932270050049, "learning_rate": 0.0001, "loss": 1.5484, "step": 838040 }, { "epoch": 72.29951690821257, "grad_norm": 26.913494110107422, "learning_rate": 0.0001, "loss": 1.5489, "step": 838096 }, { "epoch": 72.30434782608695, "grad_norm": 0.37195178866386414, "learning_rate": 0.0001, "loss": 1.5592, "step": 838152 }, { "epoch": 72.30917874396135, "grad_norm": 0.3422242999076843, "learning_rate": 0.0001, "loss": 1.5495, "step": 838208 }, { "epoch": 72.31400966183575, "grad_norm": 0.3404980003833771, "learning_rate": 0.0001, "loss": 1.5497, "step": 838264 }, { "epoch": 72.31884057971014, "grad_norm": 0.30812060832977295, "learning_rate": 0.0001, "loss": 1.5568, "step": 838320 }, { "epoch": 72.32367149758454, "grad_norm": 3.9588489532470703, "learning_rate": 0.0001, "loss": 1.5487, "step": 838376 }, { "epoch": 72.32850241545894, "grad_norm": 0.2856982350349426, "learning_rate": 0.0001, "loss": 1.5543, "step": 838432 }, { "epoch": 72.33333333333333, "grad_norm": 0.6957613229751587, "learning_rate": 0.0001, "loss": 1.5478, "step": 838488 }, { "epoch": 72.33816425120773, "grad_norm": 0.3088136911392212, "learning_rate": 0.0001, "loss": 1.5505, "step": 838544 }, { "epoch": 72.34299516908213, "grad_norm": 0.27046290040016174, "learning_rate": 0.0001, "loss": 1.5541, "step": 838600 }, { "epoch": 72.34782608695652, "grad_norm": 0.8680992126464844, "learning_rate": 0.0001, "loss": 1.554, "step": 838656 }, { "epoch": 72.35265700483092, "grad_norm": 0.5010822415351868, "learning_rate": 0.0001, "loss": 1.5431, "step": 838712 }, { "epoch": 72.35748792270532, "grad_norm": 0.2761306166648865, "learning_rate": 0.0001, "loss": 1.5557, "step": 838768 }, { "epoch": 72.3623188405797, "grad_norm": 0.42564070224761963, "learning_rate": 0.0001, "loss": 1.5556, "step": 838824 }, { "epoch": 72.3671497584541, "grad_norm": 0.23503753542900085, "learning_rate": 0.0001, "loss": 1.5543, "step": 838880 }, { "epoch": 72.3719806763285, "grad_norm": 0.9911259412765503, "learning_rate": 0.0001, "loss": 1.5472, "step": 838936 }, { "epoch": 72.3768115942029, "grad_norm": 1.018143653869629, "learning_rate": 0.0001, "loss": 1.5522, "step": 838992 }, { "epoch": 72.38164251207729, "grad_norm": 0.3652785122394562, "learning_rate": 0.0001, "loss": 1.5476, "step": 839048 }, { "epoch": 72.38647342995169, "grad_norm": 0.2894362509250641, "learning_rate": 0.0001, "loss": 1.5489, "step": 839104 }, { "epoch": 72.3913043478261, "grad_norm": 0.40981945395469666, "learning_rate": 0.0001, "loss": 1.5496, "step": 839160 }, { "epoch": 72.39613526570048, "grad_norm": 0.2622465491294861, "learning_rate": 0.0001, "loss": 1.559, "step": 839216 }, { "epoch": 72.40096618357488, "grad_norm": 0.5841796398162842, "learning_rate": 0.0001, "loss": 1.5528, "step": 839272 }, { "epoch": 72.40579710144928, "grad_norm": 1.7619293928146362, "learning_rate": 0.0001, "loss": 1.5566, "step": 839328 }, { "epoch": 72.41062801932367, "grad_norm": 0.264601469039917, "learning_rate": 0.0001, "loss": 1.5543, "step": 839384 }, { "epoch": 72.41545893719807, "grad_norm": 0.44461652636528015, "learning_rate": 0.0001, "loss": 1.5488, "step": 839440 }, { "epoch": 72.42028985507247, "grad_norm": 0.3854884207248688, "learning_rate": 0.0001, "loss": 1.5433, "step": 839496 }, { "epoch": 72.42512077294685, "grad_norm": 0.4055601954460144, "learning_rate": 0.0001, "loss": 1.5513, "step": 839552 }, { "epoch": 72.42995169082126, "grad_norm": 0.27510398626327515, "learning_rate": 0.0001, "loss": 1.5533, "step": 839608 }, { "epoch": 72.43478260869566, "grad_norm": 0.29437458515167236, "learning_rate": 0.0001, "loss": 1.5521, "step": 839664 }, { "epoch": 72.43961352657004, "grad_norm": 0.341201514005661, "learning_rate": 0.0001, "loss": 1.5481, "step": 839720 }, { "epoch": 72.44444444444444, "grad_norm": 0.5492062568664551, "learning_rate": 0.0001, "loss": 1.5502, "step": 839776 }, { "epoch": 72.44927536231884, "grad_norm": 0.37126731872558594, "learning_rate": 0.0001, "loss": 1.5491, "step": 839832 }, { "epoch": 72.45410628019323, "grad_norm": 2.423757791519165, "learning_rate": 0.0001, "loss": 1.5499, "step": 839888 }, { "epoch": 72.45893719806763, "grad_norm": 0.30034857988357544, "learning_rate": 0.0001, "loss": 1.5488, "step": 839944 }, { "epoch": 72.46376811594203, "grad_norm": 0.44225940108299255, "learning_rate": 0.0001, "loss": 1.5499, "step": 840000 }, { "epoch": 72.46859903381643, "grad_norm": 0.29799342155456543, "learning_rate": 0.0001, "loss": 1.5468, "step": 840056 }, { "epoch": 72.47342995169082, "grad_norm": 0.24036389589309692, "learning_rate": 0.0001, "loss": 1.5481, "step": 840112 }, { "epoch": 72.47826086956522, "grad_norm": 0.42659348249435425, "learning_rate": 0.0001, "loss": 1.551, "step": 840168 }, { "epoch": 72.48309178743962, "grad_norm": 0.2502802014350891, "learning_rate": 0.0001, "loss": 1.55, "step": 840224 }, { "epoch": 72.487922705314, "grad_norm": 0.46810442209243774, "learning_rate": 0.0001, "loss": 1.5565, "step": 840280 }, { "epoch": 72.4927536231884, "grad_norm": 0.2577829360961914, "learning_rate": 0.0001, "loss": 1.5516, "step": 840336 }, { "epoch": 72.4975845410628, "grad_norm": 0.3240218758583069, "learning_rate": 0.0001, "loss": 1.5536, "step": 840392 }, { "epoch": 72.5024154589372, "grad_norm": 0.29857200384140015, "learning_rate": 0.0001, "loss": 1.5501, "step": 840448 }, { "epoch": 72.5072463768116, "grad_norm": 0.3695286512374878, "learning_rate": 0.0001, "loss": 1.5524, "step": 840504 }, { "epoch": 72.512077294686, "grad_norm": 0.4378248155117035, "learning_rate": 0.0001, "loss": 1.5517, "step": 840560 }, { "epoch": 72.51690821256038, "grad_norm": 0.2675636112689972, "learning_rate": 0.0001, "loss": 1.5486, "step": 840616 }, { "epoch": 72.52173913043478, "grad_norm": 0.32714805006980896, "learning_rate": 0.0001, "loss": 1.5466, "step": 840672 }, { "epoch": 72.52657004830918, "grad_norm": 0.8586647510528564, "learning_rate": 0.0001, "loss": 1.5487, "step": 840728 }, { "epoch": 72.53140096618357, "grad_norm": 0.2639898657798767, "learning_rate": 0.0001, "loss": 1.5561, "step": 840784 }, { "epoch": 72.53623188405797, "grad_norm": 0.29924505949020386, "learning_rate": 0.0001, "loss": 1.5508, "step": 840840 }, { "epoch": 72.54106280193237, "grad_norm": 0.3926955759525299, "learning_rate": 0.0001, "loss": 1.5467, "step": 840896 }, { "epoch": 72.54589371980677, "grad_norm": 0.29023829102516174, "learning_rate": 0.0001, "loss": 1.5537, "step": 840952 }, { "epoch": 72.55072463768116, "grad_norm": 0.24517859518527985, "learning_rate": 0.0001, "loss": 1.5485, "step": 841008 }, { "epoch": 72.55555555555556, "grad_norm": 0.4778381288051605, "learning_rate": 0.0001, "loss": 1.5592, "step": 841064 }, { "epoch": 72.56038647342996, "grad_norm": 0.26084256172180176, "learning_rate": 0.0001, "loss": 1.5506, "step": 841120 }, { "epoch": 72.56521739130434, "grad_norm": 0.44175487756729126, "learning_rate": 0.0001, "loss": 1.5535, "step": 841176 }, { "epoch": 72.57004830917874, "grad_norm": 0.310092568397522, "learning_rate": 0.0001, "loss": 1.5529, "step": 841232 }, { "epoch": 72.57487922705315, "grad_norm": 0.5549047589302063, "learning_rate": 0.0001, "loss": 1.5522, "step": 841288 }, { "epoch": 72.57971014492753, "grad_norm": 0.40311089158058167, "learning_rate": 0.0001, "loss": 1.5459, "step": 841344 }, { "epoch": 72.58454106280193, "grad_norm": 0.28209346532821655, "learning_rate": 0.0001, "loss": 1.5521, "step": 841400 }, { "epoch": 72.58937198067633, "grad_norm": 6.7748823165893555, "learning_rate": 0.0001, "loss": 1.5555, "step": 841456 }, { "epoch": 72.59420289855072, "grad_norm": 0.28670307993888855, "learning_rate": 0.0001, "loss": 1.5538, "step": 841512 }, { "epoch": 72.59903381642512, "grad_norm": 0.30944859981536865, "learning_rate": 0.0001, "loss": 1.5578, "step": 841568 }, { "epoch": 72.60386473429952, "grad_norm": 0.2567039132118225, "learning_rate": 0.0001, "loss": 1.5508, "step": 841624 }, { "epoch": 72.6086956521739, "grad_norm": 0.3227902948856354, "learning_rate": 0.0001, "loss": 1.5562, "step": 841680 }, { "epoch": 72.61352657004831, "grad_norm": 0.24202997982501984, "learning_rate": 0.0001, "loss": 1.5527, "step": 841736 }, { "epoch": 72.61835748792271, "grad_norm": 0.3219892382621765, "learning_rate": 0.0001, "loss": 1.5493, "step": 841792 }, { "epoch": 72.6231884057971, "grad_norm": 0.3269314467906952, "learning_rate": 0.0001, "loss": 1.5566, "step": 841848 }, { "epoch": 72.6280193236715, "grad_norm": 0.339456707239151, "learning_rate": 0.0001, "loss": 1.5574, "step": 841904 }, { "epoch": 72.6328502415459, "grad_norm": 0.3421250879764557, "learning_rate": 0.0001, "loss": 1.5485, "step": 841960 }, { "epoch": 72.6376811594203, "grad_norm": 0.3063240349292755, "learning_rate": 0.0001, "loss": 1.5495, "step": 842016 }, { "epoch": 72.64251207729468, "grad_norm": 0.2658022940158844, "learning_rate": 0.0001, "loss": 1.5465, "step": 842072 }, { "epoch": 72.64734299516908, "grad_norm": 0.5766712427139282, "learning_rate": 0.0001, "loss": 1.5454, "step": 842128 }, { "epoch": 72.65217391304348, "grad_norm": 0.28645631670951843, "learning_rate": 0.0001, "loss": 1.5536, "step": 842184 }, { "epoch": 72.65700483091787, "grad_norm": 0.47816744446754456, "learning_rate": 0.0001, "loss": 1.5541, "step": 842240 }, { "epoch": 72.66183574879227, "grad_norm": 0.2917937636375427, "learning_rate": 0.0001, "loss": 1.5524, "step": 842296 }, { "epoch": 72.66666666666667, "grad_norm": 0.3040211796760559, "learning_rate": 0.0001, "loss": 1.5569, "step": 842352 }, { "epoch": 72.67149758454106, "grad_norm": 0.4475165009498596, "learning_rate": 0.0001, "loss": 1.559, "step": 842408 }, { "epoch": 72.67632850241546, "grad_norm": 0.43888676166534424, "learning_rate": 0.0001, "loss": 1.5495, "step": 842464 }, { "epoch": 72.68115942028986, "grad_norm": 0.3920617401599884, "learning_rate": 0.0001, "loss": 1.5504, "step": 842520 }, { "epoch": 72.68599033816425, "grad_norm": 0.277005136013031, "learning_rate": 0.0001, "loss": 1.5479, "step": 842576 }, { "epoch": 72.69082125603865, "grad_norm": 1.0882834196090698, "learning_rate": 0.0001, "loss": 1.5511, "step": 842632 }, { "epoch": 72.69565217391305, "grad_norm": 0.34135764837265015, "learning_rate": 0.0001, "loss": 1.5496, "step": 842688 }, { "epoch": 72.70048309178743, "grad_norm": 0.3207785487174988, "learning_rate": 0.0001, "loss": 1.5465, "step": 842744 }, { "epoch": 72.70531400966183, "grad_norm": 0.2524976134300232, "learning_rate": 0.0001, "loss": 1.5509, "step": 842800 }, { "epoch": 72.71014492753623, "grad_norm": 3.4803240299224854, "learning_rate": 0.0001, "loss": 1.5537, "step": 842856 }, { "epoch": 72.71497584541063, "grad_norm": 0.24014084041118622, "learning_rate": 0.0001, "loss": 1.5478, "step": 842912 }, { "epoch": 72.71980676328502, "grad_norm": 1.2373350858688354, "learning_rate": 0.0001, "loss": 1.5556, "step": 842968 }, { "epoch": 72.72463768115942, "grad_norm": 0.30215632915496826, "learning_rate": 0.0001, "loss": 1.5529, "step": 843024 }, { "epoch": 72.72946859903382, "grad_norm": 0.28891897201538086, "learning_rate": 0.0001, "loss": 1.5489, "step": 843080 }, { "epoch": 72.73429951690821, "grad_norm": 51.99875259399414, "learning_rate": 0.0001, "loss": 1.5512, "step": 843136 }, { "epoch": 72.73913043478261, "grad_norm": 0.303107887506485, "learning_rate": 0.0001, "loss": 1.5561, "step": 843192 }, { "epoch": 72.74396135265701, "grad_norm": 0.9799423217773438, "learning_rate": 0.0001, "loss": 1.5531, "step": 843248 }, { "epoch": 72.7487922705314, "grad_norm": 0.2743590474128723, "learning_rate": 0.0001, "loss": 1.5521, "step": 843304 }, { "epoch": 72.7536231884058, "grad_norm": 0.42746081948280334, "learning_rate": 0.0001, "loss": 1.5558, "step": 843360 }, { "epoch": 72.7584541062802, "grad_norm": 0.8285321593284607, "learning_rate": 0.0001, "loss": 1.5593, "step": 843416 }, { "epoch": 72.76328502415458, "grad_norm": 0.4923485815525055, "learning_rate": 0.0001, "loss": 1.5545, "step": 843472 }, { "epoch": 72.76811594202898, "grad_norm": 0.3232939839363098, "learning_rate": 0.0001, "loss": 1.5564, "step": 843528 }, { "epoch": 72.77294685990339, "grad_norm": 0.36219197511672974, "learning_rate": 0.0001, "loss": 1.5489, "step": 843584 }, { "epoch": 72.77777777777777, "grad_norm": 1.2913596630096436, "learning_rate": 0.0001, "loss": 1.5545, "step": 843640 }, { "epoch": 72.78260869565217, "grad_norm": 0.28639650344848633, "learning_rate": 0.0001, "loss": 1.5511, "step": 843696 }, { "epoch": 72.78743961352657, "grad_norm": 0.252113401889801, "learning_rate": 0.0001, "loss": 1.5471, "step": 843752 }, { "epoch": 72.79227053140096, "grad_norm": 1.8531593084335327, "learning_rate": 0.0001, "loss": 1.5482, "step": 843808 }, { "epoch": 72.79710144927536, "grad_norm": 0.26130905747413635, "learning_rate": 0.0001, "loss": 1.5597, "step": 843864 }, { "epoch": 72.80193236714976, "grad_norm": 1.8889731168746948, "learning_rate": 0.0001, "loss": 1.5522, "step": 843920 }, { "epoch": 72.80676328502416, "grad_norm": 0.2384462207555771, "learning_rate": 0.0001, "loss": 1.553, "step": 843976 }, { "epoch": 72.81159420289855, "grad_norm": 0.34966060519218445, "learning_rate": 0.0001, "loss": 1.5538, "step": 844032 }, { "epoch": 72.81642512077295, "grad_norm": 0.259316086769104, "learning_rate": 0.0001, "loss": 1.5532, "step": 844088 }, { "epoch": 72.82125603864735, "grad_norm": 0.28322523832321167, "learning_rate": 0.0001, "loss": 1.552, "step": 844144 }, { "epoch": 72.82608695652173, "grad_norm": 0.3029020428657532, "learning_rate": 0.0001, "loss": 1.5461, "step": 844200 }, { "epoch": 72.83091787439614, "grad_norm": 0.45913779735565186, "learning_rate": 0.0001, "loss": 1.5581, "step": 844256 }, { "epoch": 72.83574879227054, "grad_norm": 0.291594922542572, "learning_rate": 0.0001, "loss": 1.5555, "step": 844312 }, { "epoch": 72.84057971014492, "grad_norm": 0.29291653633117676, "learning_rate": 0.0001, "loss": 1.5554, "step": 844368 }, { "epoch": 72.84541062801932, "grad_norm": 1.4945127964019775, "learning_rate": 0.0001, "loss": 1.5569, "step": 844424 }, { "epoch": 72.85024154589372, "grad_norm": 0.3263772130012512, "learning_rate": 0.0001, "loss": 1.5533, "step": 844480 }, { "epoch": 72.85507246376811, "grad_norm": 0.31991058588027954, "learning_rate": 0.0001, "loss": 1.551, "step": 844536 }, { "epoch": 72.85990338164251, "grad_norm": 9.667479515075684, "learning_rate": 0.0001, "loss": 1.5526, "step": 844592 }, { "epoch": 72.86473429951691, "grad_norm": 0.8311197757720947, "learning_rate": 0.0001, "loss": 1.5469, "step": 844648 }, { "epoch": 72.8695652173913, "grad_norm": 0.23989802598953247, "learning_rate": 0.0001, "loss": 1.5472, "step": 844704 }, { "epoch": 72.8743961352657, "grad_norm": 0.47071513533592224, "learning_rate": 0.0001, "loss": 1.5498, "step": 844760 }, { "epoch": 72.8792270531401, "grad_norm": 0.31598615646362305, "learning_rate": 0.0001, "loss": 1.5579, "step": 844816 }, { "epoch": 72.8840579710145, "grad_norm": 0.3496675193309784, "learning_rate": 0.0001, "loss": 1.5581, "step": 844872 }, { "epoch": 72.88888888888889, "grad_norm": 0.31200072169303894, "learning_rate": 0.0001, "loss": 1.5607, "step": 844928 }, { "epoch": 72.89371980676329, "grad_norm": 0.3001238703727722, "learning_rate": 0.0001, "loss": 1.5601, "step": 844984 }, { "epoch": 72.89855072463769, "grad_norm": 8.895811080932617, "learning_rate": 0.0001, "loss": 1.554, "step": 845040 }, { "epoch": 72.90338164251207, "grad_norm": 0.274355411529541, "learning_rate": 0.0001, "loss": 1.5496, "step": 845096 }, { "epoch": 72.90821256038647, "grad_norm": 0.3016550838947296, "learning_rate": 0.0001, "loss": 1.5508, "step": 845152 }, { "epoch": 72.91304347826087, "grad_norm": 0.28458696603775024, "learning_rate": 0.0001, "loss": 1.5504, "step": 845208 }, { "epoch": 72.91787439613526, "grad_norm": 0.3171004354953766, "learning_rate": 0.0001, "loss": 1.5537, "step": 845264 }, { "epoch": 72.92270531400966, "grad_norm": 0.2973271310329437, "learning_rate": 0.0001, "loss": 1.5481, "step": 845320 }, { "epoch": 72.92753623188406, "grad_norm": 0.32609817385673523, "learning_rate": 0.0001, "loss": 1.5538, "step": 845376 }, { "epoch": 72.93236714975845, "grad_norm": 0.9841088056564331, "learning_rate": 0.0001, "loss": 1.5535, "step": 845432 }, { "epoch": 72.93719806763285, "grad_norm": 0.43862253427505493, "learning_rate": 0.0001, "loss": 1.5456, "step": 845488 }, { "epoch": 72.94202898550725, "grad_norm": 0.32709747552871704, "learning_rate": 0.0001, "loss": 1.5553, "step": 845544 }, { "epoch": 72.94685990338164, "grad_norm": 7.712766170501709, "learning_rate": 0.0001, "loss": 1.5542, "step": 845600 }, { "epoch": 72.95169082125604, "grad_norm": 0.2334909439086914, "learning_rate": 0.0001, "loss": 1.5582, "step": 845656 }, { "epoch": 72.95652173913044, "grad_norm": 52.6143798828125, "learning_rate": 0.0001, "loss": 1.5478, "step": 845712 }, { "epoch": 72.96135265700484, "grad_norm": 0.7402064204216003, "learning_rate": 0.0001, "loss": 1.5465, "step": 845768 }, { "epoch": 72.96618357487922, "grad_norm": 0.3037840723991394, "learning_rate": 0.0001, "loss": 1.5525, "step": 845824 }, { "epoch": 72.97101449275362, "grad_norm": 0.9925898909568787, "learning_rate": 0.0001, "loss": 1.5471, "step": 845880 }, { "epoch": 72.97584541062803, "grad_norm": 0.29266777634620667, "learning_rate": 0.0001, "loss": 1.5482, "step": 845936 }, { "epoch": 72.98067632850241, "grad_norm": 0.3656380772590637, "learning_rate": 0.0001, "loss": 1.5648, "step": 845992 }, { "epoch": 72.98550724637681, "grad_norm": 0.29998937249183655, "learning_rate": 0.0001, "loss": 1.5562, "step": 846048 }, { "epoch": 72.99033816425121, "grad_norm": 1.3944538831710815, "learning_rate": 0.0001, "loss": 1.5479, "step": 846104 }, { "epoch": 72.9951690821256, "grad_norm": 0.8594692349433899, "learning_rate": 0.0001, "loss": 1.5515, "step": 846160 }, { "epoch": 73.0, "grad_norm": 0.2886667251586914, "learning_rate": 0.0001, "loss": 1.5541, "step": 846216 }, { "epoch": 73.0048309178744, "grad_norm": 0.3342727720737457, "learning_rate": 0.0001, "loss": 1.5515, "step": 846272 }, { "epoch": 73.00966183574879, "grad_norm": 0.23807571828365326, "learning_rate": 0.0001, "loss": 1.5468, "step": 846328 }, { "epoch": 73.01449275362319, "grad_norm": 1.4529787302017212, "learning_rate": 0.0001, "loss": 1.5431, "step": 846384 }, { "epoch": 73.01932367149759, "grad_norm": 0.3464507758617401, "learning_rate": 0.0001, "loss": 1.5475, "step": 846440 }, { "epoch": 73.02415458937197, "grad_norm": 0.5008220672607422, "learning_rate": 0.0001, "loss": 1.5454, "step": 846496 }, { "epoch": 73.02898550724638, "grad_norm": 1.8189488649368286, "learning_rate": 0.0001, "loss": 1.5518, "step": 846552 }, { "epoch": 73.03381642512078, "grad_norm": 0.23797225952148438, "learning_rate": 0.0001, "loss": 1.5485, "step": 846608 }, { "epoch": 73.03864734299516, "grad_norm": 0.29767748713493347, "learning_rate": 0.0001, "loss": 1.5439, "step": 846664 }, { "epoch": 73.04347826086956, "grad_norm": 0.3292091190814972, "learning_rate": 0.0001, "loss": 1.5477, "step": 846720 }, { "epoch": 73.04830917874396, "grad_norm": 0.36063265800476074, "learning_rate": 0.0001, "loss": 1.5461, "step": 846776 }, { "epoch": 73.05314009661836, "grad_norm": 2.5195610523223877, "learning_rate": 0.0001, "loss": 1.5539, "step": 846832 }, { "epoch": 73.05797101449275, "grad_norm": 0.2624399960041046, "learning_rate": 0.0001, "loss": 1.5478, "step": 846888 }, { "epoch": 73.06280193236715, "grad_norm": 0.4145580530166626, "learning_rate": 0.0001, "loss": 1.5486, "step": 846944 }, { "epoch": 73.06763285024155, "grad_norm": 0.3048096001148224, "learning_rate": 0.0001, "loss": 1.5483, "step": 847000 }, { "epoch": 73.07246376811594, "grad_norm": 0.3313409984111786, "learning_rate": 0.0001, "loss": 1.5471, "step": 847056 }, { "epoch": 73.07729468599034, "grad_norm": 0.315876305103302, "learning_rate": 0.0001, "loss": 1.5491, "step": 847112 }, { "epoch": 73.08212560386474, "grad_norm": 0.37242037057876587, "learning_rate": 0.0001, "loss": 1.5517, "step": 847168 }, { "epoch": 73.08695652173913, "grad_norm": 0.26840928196907043, "learning_rate": 0.0001, "loss": 1.5462, "step": 847224 }, { "epoch": 73.09178743961353, "grad_norm": 1.038604497909546, "learning_rate": 0.0001, "loss": 1.5482, "step": 847280 }, { "epoch": 73.09661835748793, "grad_norm": 0.35145097970962524, "learning_rate": 0.0001, "loss": 1.546, "step": 847336 }, { "epoch": 73.10144927536231, "grad_norm": 0.5660275816917419, "learning_rate": 0.0001, "loss": 1.5499, "step": 847392 }, { "epoch": 73.10628019323671, "grad_norm": 0.33224543929100037, "learning_rate": 0.0001, "loss": 1.5428, "step": 847448 }, { "epoch": 73.11111111111111, "grad_norm": 0.5422766804695129, "learning_rate": 0.0001, "loss": 1.5428, "step": 847504 }, { "epoch": 73.1159420289855, "grad_norm": 1.097558856010437, "learning_rate": 0.0001, "loss": 1.5483, "step": 847560 }, { "epoch": 73.1207729468599, "grad_norm": 0.29943761229515076, "learning_rate": 0.0001, "loss": 1.5484, "step": 847616 }, { "epoch": 73.1256038647343, "grad_norm": 0.24222709238529205, "learning_rate": 0.0001, "loss": 1.5439, "step": 847672 }, { "epoch": 73.1304347826087, "grad_norm": 0.33847108483314514, "learning_rate": 0.0001, "loss": 1.5535, "step": 847728 }, { "epoch": 73.13526570048309, "grad_norm": 7.867316246032715, "learning_rate": 0.0001, "loss": 1.5536, "step": 847784 }, { "epoch": 73.14009661835749, "grad_norm": 0.8502141833305359, "learning_rate": 0.0001, "loss": 1.5542, "step": 847840 }, { "epoch": 73.14492753623189, "grad_norm": 0.3264757990837097, "learning_rate": 0.0001, "loss": 1.5476, "step": 847896 }, { "epoch": 73.14975845410628, "grad_norm": 0.2917115390300751, "learning_rate": 0.0001, "loss": 1.5502, "step": 847952 }, { "epoch": 73.15458937198068, "grad_norm": 0.8911092877388, "learning_rate": 0.0001, "loss": 1.5505, "step": 848008 }, { "epoch": 73.15942028985508, "grad_norm": 0.29623836278915405, "learning_rate": 0.0001, "loss": 1.5503, "step": 848064 }, { "epoch": 73.16425120772946, "grad_norm": 1.4374678134918213, "learning_rate": 0.0001, "loss": 1.5444, "step": 848120 }, { "epoch": 73.16908212560386, "grad_norm": 0.502798318862915, "learning_rate": 0.0001, "loss": 1.5505, "step": 848176 }, { "epoch": 73.17391304347827, "grad_norm": 0.31436845660209656, "learning_rate": 0.0001, "loss": 1.5495, "step": 848232 }, { "epoch": 73.17874396135265, "grad_norm": 0.6839659214019775, "learning_rate": 0.0001, "loss": 1.5455, "step": 848288 }, { "epoch": 73.18357487922705, "grad_norm": 0.2720988392829895, "learning_rate": 0.0001, "loss": 1.5472, "step": 848344 }, { "epoch": 73.18840579710145, "grad_norm": 0.27825456857681274, "learning_rate": 0.0001, "loss": 1.5477, "step": 848400 }, { "epoch": 73.19323671497584, "grad_norm": 0.2837628722190857, "learning_rate": 0.0001, "loss": 1.542, "step": 848456 }, { "epoch": 73.19806763285024, "grad_norm": 0.4184974431991577, "learning_rate": 0.0001, "loss": 1.5543, "step": 848512 }, { "epoch": 73.20289855072464, "grad_norm": 0.3355271518230438, "learning_rate": 0.0001, "loss": 1.5422, "step": 848568 }, { "epoch": 73.20772946859903, "grad_norm": 0.27956268191337585, "learning_rate": 0.0001, "loss": 1.5434, "step": 848624 }, { "epoch": 73.21256038647343, "grad_norm": 0.3262493312358856, "learning_rate": 0.0001, "loss": 1.5504, "step": 848680 }, { "epoch": 73.21739130434783, "grad_norm": 0.5495017170906067, "learning_rate": 0.0001, "loss": 1.5514, "step": 848736 }, { "epoch": 73.22222222222223, "grad_norm": 0.29844969511032104, "learning_rate": 0.0001, "loss": 1.5475, "step": 848792 }, { "epoch": 73.22705314009661, "grad_norm": 0.2889877259731293, "learning_rate": 0.0001, "loss": 1.5514, "step": 848848 }, { "epoch": 73.23188405797102, "grad_norm": 0.3141089975833893, "learning_rate": 0.0001, "loss": 1.5508, "step": 848904 }, { "epoch": 73.23671497584542, "grad_norm": 0.3702518343925476, "learning_rate": 0.0001, "loss": 1.5449, "step": 848960 }, { "epoch": 73.2415458937198, "grad_norm": 1.4239581823349, "learning_rate": 0.0001, "loss": 1.5492, "step": 849016 }, { "epoch": 73.2463768115942, "grad_norm": 0.30002984404563904, "learning_rate": 0.0001, "loss": 1.5441, "step": 849072 }, { "epoch": 73.2512077294686, "grad_norm": 0.2732536196708679, "learning_rate": 0.0001, "loss": 1.5459, "step": 849128 }, { "epoch": 73.25603864734299, "grad_norm": 5.805105209350586, "learning_rate": 0.0001, "loss": 1.5427, "step": 849184 }, { "epoch": 73.26086956521739, "grad_norm": 0.6782453060150146, "learning_rate": 0.0001, "loss": 1.5557, "step": 849240 }, { "epoch": 73.26570048309179, "grad_norm": 0.2221088409423828, "learning_rate": 0.0001, "loss": 1.5487, "step": 849296 }, { "epoch": 73.27053140096618, "grad_norm": 0.27847999334335327, "learning_rate": 0.0001, "loss": 1.5493, "step": 849352 }, { "epoch": 73.27536231884058, "grad_norm": 0.25769495964050293, "learning_rate": 0.0001, "loss": 1.55, "step": 849408 }, { "epoch": 73.28019323671498, "grad_norm": 0.2518981695175171, "learning_rate": 0.0001, "loss": 1.5429, "step": 849464 }, { "epoch": 73.28502415458937, "grad_norm": 1.9355548620224, "learning_rate": 0.0001, "loss": 1.5486, "step": 849520 }, { "epoch": 73.28985507246377, "grad_norm": 0.29548773169517517, "learning_rate": 0.0001, "loss": 1.5486, "step": 849576 }, { "epoch": 73.29468599033817, "grad_norm": 0.29705116152763367, "learning_rate": 0.0001, "loss": 1.5479, "step": 849632 }, { "epoch": 73.29951690821257, "grad_norm": 0.36460891366004944, "learning_rate": 0.0001, "loss": 1.5432, "step": 849688 }, { "epoch": 73.30434782608695, "grad_norm": 0.29434719681739807, "learning_rate": 0.0001, "loss": 1.5544, "step": 849744 }, { "epoch": 73.30917874396135, "grad_norm": 0.25269004702568054, "learning_rate": 0.0001, "loss": 1.5454, "step": 849800 }, { "epoch": 73.31400966183575, "grad_norm": 1.7586222887039185, "learning_rate": 0.0001, "loss": 1.5506, "step": 849856 }, { "epoch": 73.31884057971014, "grad_norm": 0.32532384991645813, "learning_rate": 0.0001, "loss": 1.5508, "step": 849912 }, { "epoch": 73.32367149758454, "grad_norm": 0.29834797978401184, "learning_rate": 0.0001, "loss": 1.5516, "step": 849968 }, { "epoch": 73.32850241545894, "grad_norm": 0.35595783591270447, "learning_rate": 0.0001, "loss": 1.5546, "step": 850024 }, { "epoch": 73.33333333333333, "grad_norm": 0.3062031865119934, "learning_rate": 0.0001, "loss": 1.5477, "step": 850080 }, { "epoch": 73.33816425120773, "grad_norm": 0.30281734466552734, "learning_rate": 0.0001, "loss": 1.5529, "step": 850136 }, { "epoch": 73.34299516908213, "grad_norm": 0.3866454064846039, "learning_rate": 0.0001, "loss": 1.5491, "step": 850192 }, { "epoch": 73.34782608695652, "grad_norm": 0.39158007502555847, "learning_rate": 0.0001, "loss": 1.5482, "step": 850248 }, { "epoch": 73.35265700483092, "grad_norm": 1.7838627099990845, "learning_rate": 0.0001, "loss": 1.5515, "step": 850304 }, { "epoch": 73.35748792270532, "grad_norm": 0.35728663206100464, "learning_rate": 0.0001, "loss": 1.5514, "step": 850360 }, { "epoch": 73.3623188405797, "grad_norm": 2.7505786418914795, "learning_rate": 0.0001, "loss": 1.551, "step": 850416 }, { "epoch": 73.3671497584541, "grad_norm": 0.40380364656448364, "learning_rate": 0.0001, "loss": 1.5465, "step": 850472 }, { "epoch": 73.3719806763285, "grad_norm": 0.48626908659935, "learning_rate": 0.0001, "loss": 1.5551, "step": 850528 }, { "epoch": 73.3768115942029, "grad_norm": 0.2859211564064026, "learning_rate": 0.0001, "loss": 1.5491, "step": 850584 }, { "epoch": 73.38164251207729, "grad_norm": 1.311699628829956, "learning_rate": 0.0001, "loss": 1.5452, "step": 850640 }, { "epoch": 73.38647342995169, "grad_norm": 0.4344375729560852, "learning_rate": 0.0001, "loss": 1.5464, "step": 850696 }, { "epoch": 73.3913043478261, "grad_norm": 0.2842482030391693, "learning_rate": 0.0001, "loss": 1.5489, "step": 850752 }, { "epoch": 73.39613526570048, "grad_norm": 0.27468761801719666, "learning_rate": 0.0001, "loss": 1.5541, "step": 850808 }, { "epoch": 73.40096618357488, "grad_norm": 0.3265385627746582, "learning_rate": 0.0001, "loss": 1.551, "step": 850864 }, { "epoch": 73.40579710144928, "grad_norm": 0.26323962211608887, "learning_rate": 0.0001, "loss": 1.55, "step": 850920 }, { "epoch": 73.41062801932367, "grad_norm": 0.3538817763328552, "learning_rate": 0.0001, "loss": 1.5493, "step": 850976 }, { "epoch": 73.41545893719807, "grad_norm": 0.32812267541885376, "learning_rate": 0.0001, "loss": 1.553, "step": 851032 }, { "epoch": 73.42028985507247, "grad_norm": 0.4578169584274292, "learning_rate": 0.0001, "loss": 1.5497, "step": 851088 }, { "epoch": 73.42512077294685, "grad_norm": 0.7521981596946716, "learning_rate": 0.0001, "loss": 1.548, "step": 851144 }, { "epoch": 73.42995169082126, "grad_norm": 0.2649959921836853, "learning_rate": 0.0001, "loss": 1.5552, "step": 851200 }, { "epoch": 73.43478260869566, "grad_norm": 0.2731609642505646, "learning_rate": 0.0001, "loss": 1.5503, "step": 851256 }, { "epoch": 73.43961352657004, "grad_norm": 0.4078466296195984, "learning_rate": 0.0001, "loss": 1.5504, "step": 851312 }, { "epoch": 73.44444444444444, "grad_norm": 1.0814080238342285, "learning_rate": 0.0001, "loss": 1.5457, "step": 851368 }, { "epoch": 73.44927536231884, "grad_norm": 0.32753852009773254, "learning_rate": 0.0001, "loss": 1.5523, "step": 851424 }, { "epoch": 73.45410628019323, "grad_norm": 0.43813303112983704, "learning_rate": 0.0001, "loss": 1.5553, "step": 851480 }, { "epoch": 73.45893719806763, "grad_norm": 0.36521923542022705, "learning_rate": 0.0001, "loss": 1.5538, "step": 851536 }, { "epoch": 73.46376811594203, "grad_norm": 1.328196406364441, "learning_rate": 0.0001, "loss": 1.5483, "step": 851592 }, { "epoch": 73.46859903381643, "grad_norm": 0.31597378849983215, "learning_rate": 0.0001, "loss": 1.5503, "step": 851648 }, { "epoch": 73.47342995169082, "grad_norm": 0.3497198224067688, "learning_rate": 0.0001, "loss": 1.5441, "step": 851704 }, { "epoch": 73.47826086956522, "grad_norm": 0.28276026248931885, "learning_rate": 0.0001, "loss": 1.5438, "step": 851760 }, { "epoch": 73.48309178743962, "grad_norm": 0.23667562007904053, "learning_rate": 0.0001, "loss": 1.5493, "step": 851816 }, { "epoch": 73.487922705314, "grad_norm": 0.2818549871444702, "learning_rate": 0.0001, "loss": 1.5429, "step": 851872 }, { "epoch": 73.4927536231884, "grad_norm": 0.33312085270881653, "learning_rate": 0.0001, "loss": 1.5531, "step": 851928 }, { "epoch": 73.4975845410628, "grad_norm": 0.2503253221511841, "learning_rate": 0.0001, "loss": 1.5438, "step": 851984 }, { "epoch": 73.5024154589372, "grad_norm": 0.39553919434547424, "learning_rate": 0.0001, "loss": 1.551, "step": 852040 }, { "epoch": 73.5072463768116, "grad_norm": 2.005678653717041, "learning_rate": 0.0001, "loss": 1.5502, "step": 852096 }, { "epoch": 73.512077294686, "grad_norm": 0.5719006061553955, "learning_rate": 0.0001, "loss": 1.5552, "step": 852152 }, { "epoch": 73.51690821256038, "grad_norm": 0.4028954803943634, "learning_rate": 0.0001, "loss": 1.541, "step": 852208 }, { "epoch": 73.52173913043478, "grad_norm": 2.7290236949920654, "learning_rate": 0.0001, "loss": 1.5448, "step": 852264 }, { "epoch": 73.52657004830918, "grad_norm": 0.35923880338668823, "learning_rate": 0.0001, "loss": 1.5474, "step": 852320 }, { "epoch": 73.53140096618357, "grad_norm": 0.3156774938106537, "learning_rate": 0.0001, "loss": 1.5551, "step": 852376 }, { "epoch": 73.53623188405797, "grad_norm": 0.3138878047466278, "learning_rate": 0.0001, "loss": 1.556, "step": 852432 }, { "epoch": 73.54106280193237, "grad_norm": 0.3132822513580322, "learning_rate": 0.0001, "loss": 1.5497, "step": 852488 }, { "epoch": 73.54589371980677, "grad_norm": 0.41331833600997925, "learning_rate": 0.0001, "loss": 1.5521, "step": 852544 }, { "epoch": 73.55072463768116, "grad_norm": 2.813166856765747, "learning_rate": 0.0001, "loss": 1.5433, "step": 852600 }, { "epoch": 73.55555555555556, "grad_norm": 0.2859540283679962, "learning_rate": 0.0001, "loss": 1.5493, "step": 852656 }, { "epoch": 73.56038647342996, "grad_norm": 0.28540313243865967, "learning_rate": 0.0001, "loss": 1.548, "step": 852712 }, { "epoch": 73.56521739130434, "grad_norm": 0.2984123229980469, "learning_rate": 0.0001, "loss": 1.5581, "step": 852768 }, { "epoch": 73.57004830917874, "grad_norm": 0.3682372272014618, "learning_rate": 0.0001, "loss": 1.5462, "step": 852824 }, { "epoch": 73.57487922705315, "grad_norm": 0.4391799569129944, "learning_rate": 0.0001, "loss": 1.5504, "step": 852880 }, { "epoch": 73.57971014492753, "grad_norm": 0.2774272561073303, "learning_rate": 0.0001, "loss": 1.5524, "step": 852936 }, { "epoch": 73.58454106280193, "grad_norm": 0.27927398681640625, "learning_rate": 0.0001, "loss": 1.5555, "step": 852992 }, { "epoch": 73.58937198067633, "grad_norm": 0.47602349519729614, "learning_rate": 0.0001, "loss": 1.546, "step": 853048 }, { "epoch": 73.59420289855072, "grad_norm": 0.40173888206481934, "learning_rate": 0.0001, "loss": 1.5514, "step": 853104 }, { "epoch": 73.59903381642512, "grad_norm": 0.7387980222702026, "learning_rate": 0.0001, "loss": 1.554, "step": 853160 }, { "epoch": 73.60386473429952, "grad_norm": 0.3611734211444855, "learning_rate": 0.0001, "loss": 1.554, "step": 853216 }, { "epoch": 73.6086956521739, "grad_norm": 0.28485605120658875, "learning_rate": 0.0001, "loss": 1.5534, "step": 853272 }, { "epoch": 73.61352657004831, "grad_norm": 1.270798683166504, "learning_rate": 0.0001, "loss": 1.5525, "step": 853328 }, { "epoch": 73.61835748792271, "grad_norm": 1.0681484937667847, "learning_rate": 0.0001, "loss": 1.5442, "step": 853384 }, { "epoch": 73.6231884057971, "grad_norm": 0.26418551802635193, "learning_rate": 0.0001, "loss": 1.5524, "step": 853440 }, { "epoch": 73.6280193236715, "grad_norm": 0.2798893451690674, "learning_rate": 0.0001, "loss": 1.5492, "step": 853496 }, { "epoch": 73.6328502415459, "grad_norm": 0.36323726177215576, "learning_rate": 0.0001, "loss": 1.5505, "step": 853552 }, { "epoch": 73.6376811594203, "grad_norm": 0.640393078327179, "learning_rate": 0.0001, "loss": 1.5563, "step": 853608 }, { "epoch": 73.64251207729468, "grad_norm": 13.781241416931152, "learning_rate": 0.0001, "loss": 1.5537, "step": 853664 }, { "epoch": 73.64734299516908, "grad_norm": 0.38151609897613525, "learning_rate": 0.0001, "loss": 1.5437, "step": 853720 }, { "epoch": 73.65217391304348, "grad_norm": 0.5119112133979797, "learning_rate": 0.0001, "loss": 1.5432, "step": 853776 }, { "epoch": 73.65700483091787, "grad_norm": 0.31418856978416443, "learning_rate": 0.0001, "loss": 1.5488, "step": 853832 }, { "epoch": 73.66183574879227, "grad_norm": 0.3534928858280182, "learning_rate": 0.0001, "loss": 1.5546, "step": 853888 }, { "epoch": 73.66666666666667, "grad_norm": 0.2550116181373596, "learning_rate": 0.0001, "loss": 1.5516, "step": 853944 }, { "epoch": 73.67149758454106, "grad_norm": 0.37160858511924744, "learning_rate": 0.0001, "loss": 1.5399, "step": 854000 }, { "epoch": 73.67632850241546, "grad_norm": 0.3409365117549896, "learning_rate": 0.0001, "loss": 1.5511, "step": 854056 }, { "epoch": 73.68115942028986, "grad_norm": 0.2531895041465759, "learning_rate": 0.0001, "loss": 1.5488, "step": 854112 }, { "epoch": 73.68599033816425, "grad_norm": 0.3186221718788147, "learning_rate": 0.0001, "loss": 1.5513, "step": 854168 }, { "epoch": 73.69082125603865, "grad_norm": 0.3278068006038666, "learning_rate": 0.0001, "loss": 1.5533, "step": 854224 }, { "epoch": 73.69565217391305, "grad_norm": 1.2750880718231201, "learning_rate": 0.0001, "loss": 1.5592, "step": 854280 }, { "epoch": 73.70048309178743, "grad_norm": 0.2929232120513916, "learning_rate": 0.0001, "loss": 1.5523, "step": 854336 }, { "epoch": 73.70531400966183, "grad_norm": 0.7976210117340088, "learning_rate": 0.0001, "loss": 1.5525, "step": 854392 }, { "epoch": 73.71014492753623, "grad_norm": 0.7127354741096497, "learning_rate": 0.0001, "loss": 1.5467, "step": 854448 }, { "epoch": 73.71497584541063, "grad_norm": 0.302817702293396, "learning_rate": 0.0001, "loss": 1.5527, "step": 854504 }, { "epoch": 73.71980676328502, "grad_norm": 0.9640470743179321, "learning_rate": 0.0001, "loss": 1.5551, "step": 854560 }, { "epoch": 73.72463768115942, "grad_norm": 0.8056087493896484, "learning_rate": 0.0001, "loss": 1.5602, "step": 854616 }, { "epoch": 73.72946859903382, "grad_norm": 0.27168408036231995, "learning_rate": 0.0001, "loss": 1.557, "step": 854672 }, { "epoch": 73.73429951690821, "grad_norm": 0.2554932236671448, "learning_rate": 0.0001, "loss": 1.5492, "step": 854728 }, { "epoch": 73.73913043478261, "grad_norm": 0.24079729616641998, "learning_rate": 0.0001, "loss": 1.5486, "step": 854784 }, { "epoch": 73.74396135265701, "grad_norm": 0.2747451961040497, "learning_rate": 0.0001, "loss": 1.5469, "step": 854840 }, { "epoch": 73.7487922705314, "grad_norm": 0.3200933635234833, "learning_rate": 0.0001, "loss": 1.5516, "step": 854896 }, { "epoch": 73.7536231884058, "grad_norm": 0.546423614025116, "learning_rate": 0.0001, "loss": 1.5529, "step": 854952 }, { "epoch": 73.7584541062802, "grad_norm": 0.2598802149295807, "learning_rate": 0.0001, "loss": 1.5492, "step": 855008 }, { "epoch": 73.76328502415458, "grad_norm": 0.3405876159667969, "learning_rate": 0.0001, "loss": 1.5478, "step": 855064 }, { "epoch": 73.76811594202898, "grad_norm": 0.4002784490585327, "learning_rate": 0.0001, "loss": 1.549, "step": 855120 }, { "epoch": 73.77294685990339, "grad_norm": 0.25425219535827637, "learning_rate": 0.0001, "loss": 1.5476, "step": 855176 }, { "epoch": 73.77777777777777, "grad_norm": 0.6482347249984741, "learning_rate": 0.0001, "loss": 1.5455, "step": 855232 }, { "epoch": 73.78260869565217, "grad_norm": 0.3467003107070923, "learning_rate": 0.0001, "loss": 1.5514, "step": 855288 }, { "epoch": 73.78743961352657, "grad_norm": 0.26797622442245483, "learning_rate": 0.0001, "loss": 1.5518, "step": 855344 }, { "epoch": 73.79227053140096, "grad_norm": 0.2806653678417206, "learning_rate": 0.0001, "loss": 1.5484, "step": 855400 }, { "epoch": 73.79710144927536, "grad_norm": 0.4003015160560608, "learning_rate": 0.0001, "loss": 1.5474, "step": 855456 }, { "epoch": 73.80193236714976, "grad_norm": 0.5048342347145081, "learning_rate": 0.0001, "loss": 1.5565, "step": 855512 }, { "epoch": 73.80676328502416, "grad_norm": 0.25381210446357727, "learning_rate": 0.0001, "loss": 1.5526, "step": 855568 }, { "epoch": 73.81159420289855, "grad_norm": 0.27808886766433716, "learning_rate": 0.0001, "loss": 1.5533, "step": 855624 }, { "epoch": 73.81642512077295, "grad_norm": 0.2587437629699707, "learning_rate": 0.0001, "loss": 1.5476, "step": 855680 }, { "epoch": 73.82125603864735, "grad_norm": 0.8244941830635071, "learning_rate": 0.0001, "loss": 1.5526, "step": 855736 }, { "epoch": 73.82608695652173, "grad_norm": 0.27357402443885803, "learning_rate": 0.0001, "loss": 1.5522, "step": 855792 }, { "epoch": 73.83091787439614, "grad_norm": 0.2701512575149536, "learning_rate": 0.0001, "loss": 1.5529, "step": 855848 }, { "epoch": 73.83574879227054, "grad_norm": 0.40763118863105774, "learning_rate": 0.0001, "loss": 1.5505, "step": 855904 }, { "epoch": 73.84057971014492, "grad_norm": 0.48203980922698975, "learning_rate": 0.0001, "loss": 1.5539, "step": 855960 }, { "epoch": 73.84541062801932, "grad_norm": 0.46093282103538513, "learning_rate": 0.0001, "loss": 1.5479, "step": 856016 }, { "epoch": 73.85024154589372, "grad_norm": 0.3958704173564911, "learning_rate": 0.0001, "loss": 1.5488, "step": 856072 }, { "epoch": 73.85507246376811, "grad_norm": 1.2288455963134766, "learning_rate": 0.0001, "loss": 1.5466, "step": 856128 }, { "epoch": 73.85990338164251, "grad_norm": 0.5311572551727295, "learning_rate": 0.0001, "loss": 1.5504, "step": 856184 }, { "epoch": 73.86473429951691, "grad_norm": 0.3133232891559601, "learning_rate": 0.0001, "loss": 1.5457, "step": 856240 }, { "epoch": 73.8695652173913, "grad_norm": 0.3448738157749176, "learning_rate": 0.0001, "loss": 1.5469, "step": 856296 }, { "epoch": 73.8743961352657, "grad_norm": 0.27147945761680603, "learning_rate": 0.0001, "loss": 1.5512, "step": 856352 }, { "epoch": 73.8792270531401, "grad_norm": 0.2587440311908722, "learning_rate": 0.0001, "loss": 1.5465, "step": 856408 }, { "epoch": 73.8840579710145, "grad_norm": 0.3234604597091675, "learning_rate": 0.0001, "loss": 1.5427, "step": 856464 }, { "epoch": 73.88888888888889, "grad_norm": 0.33123788237571716, "learning_rate": 0.0001, "loss": 1.5562, "step": 856520 }, { "epoch": 73.89371980676329, "grad_norm": 0.3446468412876129, "learning_rate": 0.0001, "loss": 1.557, "step": 856576 }, { "epoch": 73.89855072463769, "grad_norm": 0.3553003668785095, "learning_rate": 0.0001, "loss": 1.5559, "step": 856632 }, { "epoch": 73.90338164251207, "grad_norm": 0.247635617852211, "learning_rate": 0.0001, "loss": 1.5437, "step": 856688 }, { "epoch": 73.90821256038647, "grad_norm": 0.45188724994659424, "learning_rate": 0.0001, "loss": 1.5495, "step": 856744 }, { "epoch": 73.91304347826087, "grad_norm": 0.334075927734375, "learning_rate": 0.0001, "loss": 1.5615, "step": 856800 }, { "epoch": 73.91787439613526, "grad_norm": 0.48419979214668274, "learning_rate": 0.0001, "loss": 1.5469, "step": 856856 }, { "epoch": 73.92270531400966, "grad_norm": 0.4273416996002197, "learning_rate": 0.0001, "loss": 1.5594, "step": 856912 }, { "epoch": 73.92753623188406, "grad_norm": 0.2681576609611511, "learning_rate": 0.0001, "loss": 1.554, "step": 856968 }, { "epoch": 73.93236714975845, "grad_norm": 0.9888299107551575, "learning_rate": 0.0001, "loss": 1.5562, "step": 857024 }, { "epoch": 73.93719806763285, "grad_norm": 0.7716449499130249, "learning_rate": 0.0001, "loss": 1.5529, "step": 857080 }, { "epoch": 73.94202898550725, "grad_norm": 0.2398664653301239, "learning_rate": 0.0001, "loss": 1.5483, "step": 857136 }, { "epoch": 73.94685990338164, "grad_norm": 0.2589481472969055, "learning_rate": 0.0001, "loss": 1.5513, "step": 857192 }, { "epoch": 73.95169082125604, "grad_norm": 0.46153363585472107, "learning_rate": 0.0001, "loss": 1.5505, "step": 857248 }, { "epoch": 73.95652173913044, "grad_norm": 0.3381863534450531, "learning_rate": 0.0001, "loss": 1.5547, "step": 857304 }, { "epoch": 73.96135265700484, "grad_norm": 0.2445976734161377, "learning_rate": 0.0001, "loss": 1.5506, "step": 857360 }, { "epoch": 73.96618357487922, "grad_norm": 0.3679485619068146, "learning_rate": 0.0001, "loss": 1.5447, "step": 857416 }, { "epoch": 73.97101449275362, "grad_norm": 3.1271889209747314, "learning_rate": 0.0001, "loss": 1.5456, "step": 857472 }, { "epoch": 73.97584541062803, "grad_norm": 0.31884220242500305, "learning_rate": 0.0001, "loss": 1.5618, "step": 857528 }, { "epoch": 73.98067632850241, "grad_norm": 0.4890647828578949, "learning_rate": 0.0001, "loss": 1.5571, "step": 857584 }, { "epoch": 73.98550724637681, "grad_norm": 0.35520827770233154, "learning_rate": 0.0001, "loss": 1.5414, "step": 857640 }, { "epoch": 73.99033816425121, "grad_norm": 0.25845152139663696, "learning_rate": 0.0001, "loss": 1.5496, "step": 857696 }, { "epoch": 73.9951690821256, "grad_norm": 0.45664331316947937, "learning_rate": 0.0001, "loss": 1.5457, "step": 857752 }, { "epoch": 74.0, "grad_norm": 2.9754679203033447, "learning_rate": 0.0001, "loss": 1.552, "step": 857808 }, { "epoch": 74.0048309178744, "grad_norm": 0.3042506277561188, "learning_rate": 0.0001, "loss": 1.5429, "step": 857864 }, { "epoch": 74.00966183574879, "grad_norm": 0.5835771560668945, "learning_rate": 0.0001, "loss": 1.55, "step": 857920 }, { "epoch": 74.01449275362319, "grad_norm": 0.25140780210494995, "learning_rate": 0.0001, "loss": 1.5471, "step": 857976 }, { "epoch": 74.01932367149759, "grad_norm": 0.34957408905029297, "learning_rate": 0.0001, "loss": 1.546, "step": 858032 }, { "epoch": 74.02415458937197, "grad_norm": 0.3019605875015259, "learning_rate": 0.0001, "loss": 1.5407, "step": 858088 }, { "epoch": 74.02898550724638, "grad_norm": 0.3012160360813141, "learning_rate": 0.0001, "loss": 1.5437, "step": 858144 }, { "epoch": 74.03381642512078, "grad_norm": 0.3331177234649658, "learning_rate": 0.0001, "loss": 1.5428, "step": 858200 }, { "epoch": 74.03864734299516, "grad_norm": 0.23923343420028687, "learning_rate": 0.0001, "loss": 1.5413, "step": 858256 }, { "epoch": 74.04347826086956, "grad_norm": 0.9811349511146545, "learning_rate": 0.0001, "loss": 1.5484, "step": 858312 }, { "epoch": 74.04830917874396, "grad_norm": 0.22264984250068665, "learning_rate": 0.0001, "loss": 1.5488, "step": 858368 }, { "epoch": 74.05314009661836, "grad_norm": 0.3794899880886078, "learning_rate": 0.0001, "loss": 1.5464, "step": 858424 }, { "epoch": 74.05797101449275, "grad_norm": 17.27873420715332, "learning_rate": 0.0001, "loss": 1.5475, "step": 858480 }, { "epoch": 74.06280193236715, "grad_norm": 1.3471873998641968, "learning_rate": 0.0001, "loss": 1.5455, "step": 858536 }, { "epoch": 74.06763285024155, "grad_norm": 0.3193117380142212, "learning_rate": 0.0001, "loss": 1.5456, "step": 858592 }, { "epoch": 74.07246376811594, "grad_norm": 0.28078749775886536, "learning_rate": 0.0001, "loss": 1.5425, "step": 858648 }, { "epoch": 74.07729468599034, "grad_norm": 0.6458261609077454, "learning_rate": 0.0001, "loss": 1.544, "step": 858704 }, { "epoch": 74.08212560386474, "grad_norm": 0.30348363518714905, "learning_rate": 0.0001, "loss": 1.5448, "step": 858760 }, { "epoch": 74.08695652173913, "grad_norm": 3.225843667984009, "learning_rate": 0.0001, "loss": 1.5382, "step": 858816 }, { "epoch": 74.09178743961353, "grad_norm": 0.28603610396385193, "learning_rate": 0.0001, "loss": 1.5519, "step": 858872 }, { "epoch": 74.09661835748793, "grad_norm": 0.33560609817504883, "learning_rate": 0.0001, "loss": 1.5466, "step": 858928 }, { "epoch": 74.10144927536231, "grad_norm": 0.258551687002182, "learning_rate": 0.0001, "loss": 1.5504, "step": 858984 }, { "epoch": 74.10628019323671, "grad_norm": 0.2682439386844635, "learning_rate": 0.0001, "loss": 1.5495, "step": 859040 }, { "epoch": 74.11111111111111, "grad_norm": 0.6913463473320007, "learning_rate": 0.0001, "loss": 1.5379, "step": 859096 }, { "epoch": 74.1159420289855, "grad_norm": 0.4954454004764557, "learning_rate": 0.0001, "loss": 1.549, "step": 859152 }, { "epoch": 74.1207729468599, "grad_norm": 0.3558264970779419, "learning_rate": 0.0001, "loss": 1.5471, "step": 859208 }, { "epoch": 74.1256038647343, "grad_norm": 2.359910249710083, "learning_rate": 0.0001, "loss": 1.5466, "step": 859264 }, { "epoch": 74.1304347826087, "grad_norm": 0.6537205576896667, "learning_rate": 0.0001, "loss": 1.5449, "step": 859320 }, { "epoch": 74.13526570048309, "grad_norm": 0.37438979744911194, "learning_rate": 0.0001, "loss": 1.5411, "step": 859376 }, { "epoch": 74.14009661835749, "grad_norm": 0.3693693280220032, "learning_rate": 0.0001, "loss": 1.5448, "step": 859432 }, { "epoch": 74.14492753623189, "grad_norm": 0.2947206497192383, "learning_rate": 0.0001, "loss": 1.5491, "step": 859488 }, { "epoch": 74.14975845410628, "grad_norm": 5.912968635559082, "learning_rate": 0.0001, "loss": 1.5422, "step": 859544 }, { "epoch": 74.15458937198068, "grad_norm": 0.432913601398468, "learning_rate": 0.0001, "loss": 1.5493, "step": 859600 }, { "epoch": 74.15942028985508, "grad_norm": 0.6974234580993652, "learning_rate": 0.0001, "loss": 1.5431, "step": 859656 }, { "epoch": 74.16425120772946, "grad_norm": 0.28780657052993774, "learning_rate": 0.0001, "loss": 1.5446, "step": 859712 }, { "epoch": 74.16908212560386, "grad_norm": 0.2846371531486511, "learning_rate": 0.0001, "loss": 1.5507, "step": 859768 }, { "epoch": 74.17391304347827, "grad_norm": 0.26367536187171936, "learning_rate": 0.0001, "loss": 1.5447, "step": 859824 }, { "epoch": 74.17874396135265, "grad_norm": 0.24742595851421356, "learning_rate": 0.0001, "loss": 1.5442, "step": 859880 }, { "epoch": 74.18357487922705, "grad_norm": 2.4072365760803223, "learning_rate": 0.0001, "loss": 1.5488, "step": 859936 }, { "epoch": 74.18840579710145, "grad_norm": 0.840711236000061, "learning_rate": 0.0001, "loss": 1.5487, "step": 859992 }, { "epoch": 74.19323671497584, "grad_norm": 1.0259121656417847, "learning_rate": 0.0001, "loss": 1.5441, "step": 860048 }, { "epoch": 74.19806763285024, "grad_norm": 0.7254974842071533, "learning_rate": 0.0001, "loss": 1.5408, "step": 860104 }, { "epoch": 74.20289855072464, "grad_norm": 0.26128339767456055, "learning_rate": 0.0001, "loss": 1.5432, "step": 860160 }, { "epoch": 74.20772946859903, "grad_norm": 23.522279739379883, "learning_rate": 0.0001, "loss": 1.5457, "step": 860216 }, { "epoch": 74.21256038647343, "grad_norm": 1.2463260889053345, "learning_rate": 0.0001, "loss": 1.5516, "step": 860272 }, { "epoch": 74.21739130434783, "grad_norm": 0.3177523612976074, "learning_rate": 0.0001, "loss": 1.5401, "step": 860328 }, { "epoch": 74.22222222222223, "grad_norm": 0.32615119218826294, "learning_rate": 0.0001, "loss": 1.5437, "step": 860384 }, { "epoch": 74.22705314009661, "grad_norm": 0.2953067421913147, "learning_rate": 0.0001, "loss": 1.5474, "step": 860440 }, { "epoch": 74.23188405797102, "grad_norm": 0.3839573562145233, "learning_rate": 0.0001, "loss": 1.5478, "step": 860496 }, { "epoch": 74.23671497584542, "grad_norm": 4.524076461791992, "learning_rate": 0.0001, "loss": 1.5471, "step": 860552 }, { "epoch": 74.2415458937198, "grad_norm": 3.841259717941284, "learning_rate": 0.0001, "loss": 1.5477, "step": 860608 }, { "epoch": 74.2463768115942, "grad_norm": 0.4044472575187683, "learning_rate": 0.0001, "loss": 1.5413, "step": 860664 }, { "epoch": 74.2512077294686, "grad_norm": 0.24968446791172028, "learning_rate": 0.0001, "loss": 1.5441, "step": 860720 }, { "epoch": 74.25603864734299, "grad_norm": 0.5169285535812378, "learning_rate": 0.0001, "loss": 1.544, "step": 860776 }, { "epoch": 74.26086956521739, "grad_norm": 0.402553915977478, "learning_rate": 0.0001, "loss": 1.5492, "step": 860832 }, { "epoch": 74.26570048309179, "grad_norm": 0.3312280774116516, "learning_rate": 0.0001, "loss": 1.5413, "step": 860888 }, { "epoch": 74.27053140096618, "grad_norm": 0.26268458366394043, "learning_rate": 0.0001, "loss": 1.5478, "step": 860944 }, { "epoch": 74.27536231884058, "grad_norm": 1.8704606294631958, "learning_rate": 0.0001, "loss": 1.5416, "step": 861000 }, { "epoch": 74.28019323671498, "grad_norm": 0.48986369371414185, "learning_rate": 0.0001, "loss": 1.5464, "step": 861056 }, { "epoch": 74.28502415458937, "grad_norm": 0.3303447365760803, "learning_rate": 0.0001, "loss": 1.5456, "step": 861112 }, { "epoch": 74.28985507246377, "grad_norm": 0.42689236998558044, "learning_rate": 0.0001, "loss": 1.5511, "step": 861168 }, { "epoch": 74.29468599033817, "grad_norm": 0.2701157033443451, "learning_rate": 0.0001, "loss": 1.5528, "step": 861224 }, { "epoch": 74.29951690821257, "grad_norm": 0.291719526052475, "learning_rate": 0.0001, "loss": 1.5451, "step": 861280 }, { "epoch": 74.30434782608695, "grad_norm": 0.33514705300331116, "learning_rate": 0.0001, "loss": 1.5438, "step": 861336 }, { "epoch": 74.30917874396135, "grad_norm": 0.5132144093513489, "learning_rate": 0.0001, "loss": 1.5492, "step": 861392 }, { "epoch": 74.31400966183575, "grad_norm": 0.34096285700798035, "learning_rate": 0.0001, "loss": 1.5518, "step": 861448 }, { "epoch": 74.31884057971014, "grad_norm": 0.8837071657180786, "learning_rate": 0.0001, "loss": 1.5484, "step": 861504 }, { "epoch": 74.32367149758454, "grad_norm": 0.3476924002170563, "learning_rate": 0.0001, "loss": 1.5491, "step": 861560 }, { "epoch": 74.32850241545894, "grad_norm": 0.2718266546726227, "learning_rate": 0.0001, "loss": 1.5504, "step": 861616 }, { "epoch": 74.33333333333333, "grad_norm": 0.32581281661987305, "learning_rate": 0.0001, "loss": 1.5456, "step": 861672 }, { "epoch": 74.33816425120773, "grad_norm": 0.3028445243835449, "learning_rate": 0.0001, "loss": 1.5473, "step": 861728 }, { "epoch": 74.34299516908213, "grad_norm": 0.4496018588542938, "learning_rate": 0.0001, "loss": 1.5451, "step": 861784 }, { "epoch": 74.34782608695652, "grad_norm": 0.24290770292282104, "learning_rate": 0.0001, "loss": 1.5412, "step": 861840 }, { "epoch": 74.35265700483092, "grad_norm": 0.3281551003456116, "learning_rate": 0.0001, "loss": 1.5455, "step": 861896 }, { "epoch": 74.35748792270532, "grad_norm": 0.6963595747947693, "learning_rate": 0.0001, "loss": 1.5498, "step": 861952 }, { "epoch": 74.3623188405797, "grad_norm": 0.2539660334587097, "learning_rate": 0.0001, "loss": 1.5501, "step": 862008 }, { "epoch": 74.3671497584541, "grad_norm": 0.2869892120361328, "learning_rate": 0.0001, "loss": 1.5475, "step": 862064 }, { "epoch": 74.3719806763285, "grad_norm": 0.3836989104747772, "learning_rate": 0.0001, "loss": 1.548, "step": 862120 }, { "epoch": 74.3768115942029, "grad_norm": 1.206839919090271, "learning_rate": 0.0001, "loss": 1.5472, "step": 862176 }, { "epoch": 74.38164251207729, "grad_norm": 0.3572497069835663, "learning_rate": 0.0001, "loss": 1.5498, "step": 862232 }, { "epoch": 74.38647342995169, "grad_norm": 0.49994027614593506, "learning_rate": 0.0001, "loss": 1.5464, "step": 862288 }, { "epoch": 74.3913043478261, "grad_norm": 0.2582208514213562, "learning_rate": 0.0001, "loss": 1.5493, "step": 862344 }, { "epoch": 74.39613526570048, "grad_norm": 0.23088325560092926, "learning_rate": 0.0001, "loss": 1.5476, "step": 862400 }, { "epoch": 74.40096618357488, "grad_norm": 4.379991054534912, "learning_rate": 0.0001, "loss": 1.5477, "step": 862456 }, { "epoch": 74.40579710144928, "grad_norm": 1.0844595432281494, "learning_rate": 0.0001, "loss": 1.5485, "step": 862512 }, { "epoch": 74.41062801932367, "grad_norm": 0.2795783579349518, "learning_rate": 0.0001, "loss": 1.5474, "step": 862568 }, { "epoch": 74.41545893719807, "grad_norm": 17.095630645751953, "learning_rate": 0.0001, "loss": 1.542, "step": 862624 }, { "epoch": 74.42028985507247, "grad_norm": 0.3834109306335449, "learning_rate": 0.0001, "loss": 1.5551, "step": 862680 }, { "epoch": 74.42512077294685, "grad_norm": 0.38940319418907166, "learning_rate": 0.0001, "loss": 1.545, "step": 862736 }, { "epoch": 74.42995169082126, "grad_norm": 0.2931216061115265, "learning_rate": 0.0001, "loss": 1.5503, "step": 862792 }, { "epoch": 74.43478260869566, "grad_norm": 0.26779115200042725, "learning_rate": 0.0001, "loss": 1.545, "step": 862848 }, { "epoch": 74.43961352657004, "grad_norm": 1.0083365440368652, "learning_rate": 0.0001, "loss": 1.5505, "step": 862904 }, { "epoch": 74.44444444444444, "grad_norm": 7.876206398010254, "learning_rate": 0.0001, "loss": 1.5468, "step": 862960 }, { "epoch": 74.44927536231884, "grad_norm": 7.621323585510254, "learning_rate": 0.0001, "loss": 1.5461, "step": 863016 }, { "epoch": 74.45410628019323, "grad_norm": 0.2738022208213806, "learning_rate": 0.0001, "loss": 1.5451, "step": 863072 }, { "epoch": 74.45893719806763, "grad_norm": 0.36335489153862, "learning_rate": 0.0001, "loss": 1.5489, "step": 863128 }, { "epoch": 74.46376811594203, "grad_norm": 0.7231324315071106, "learning_rate": 0.0001, "loss": 1.5475, "step": 863184 }, { "epoch": 74.46859903381643, "grad_norm": 0.25773727893829346, "learning_rate": 0.0001, "loss": 1.5493, "step": 863240 }, { "epoch": 74.47342995169082, "grad_norm": 0.29874569177627563, "learning_rate": 0.0001, "loss": 1.5371, "step": 863296 }, { "epoch": 74.47826086956522, "grad_norm": 0.2483185976743698, "learning_rate": 0.0001, "loss": 1.5494, "step": 863352 }, { "epoch": 74.48309178743962, "grad_norm": 0.39931103587150574, "learning_rate": 0.0001, "loss": 1.5426, "step": 863408 }, { "epoch": 74.487922705314, "grad_norm": 0.34015390276908875, "learning_rate": 0.0001, "loss": 1.545, "step": 863464 }, { "epoch": 74.4927536231884, "grad_norm": 0.43103063106536865, "learning_rate": 0.0001, "loss": 1.5435, "step": 863520 }, { "epoch": 74.4975845410628, "grad_norm": 0.2870701849460602, "learning_rate": 0.0001, "loss": 1.5526, "step": 863576 }, { "epoch": 74.5024154589372, "grad_norm": 0.2968762516975403, "learning_rate": 0.0001, "loss": 1.5415, "step": 863632 }, { "epoch": 74.5072463768116, "grad_norm": 0.29906976222991943, "learning_rate": 0.0001, "loss": 1.5506, "step": 863688 }, { "epoch": 74.512077294686, "grad_norm": 0.4630171060562134, "learning_rate": 0.0001, "loss": 1.5472, "step": 863744 }, { "epoch": 74.51690821256038, "grad_norm": 0.3979451656341553, "learning_rate": 0.0001, "loss": 1.5494, "step": 863800 }, { "epoch": 74.52173913043478, "grad_norm": 0.43025049567222595, "learning_rate": 0.0001, "loss": 1.5484, "step": 863856 }, { "epoch": 74.52657004830918, "grad_norm": 11.891583442687988, "learning_rate": 0.0001, "loss": 1.5483, "step": 863912 }, { "epoch": 74.53140096618357, "grad_norm": 0.7433462738990784, "learning_rate": 0.0001, "loss": 1.5519, "step": 863968 }, { "epoch": 74.53623188405797, "grad_norm": 0.2794242799282074, "learning_rate": 0.0001, "loss": 1.5545, "step": 864024 }, { "epoch": 74.54106280193237, "grad_norm": 0.627196192741394, "learning_rate": 0.0001, "loss": 1.5482, "step": 864080 }, { "epoch": 74.54589371980677, "grad_norm": 0.3876979351043701, "learning_rate": 0.0001, "loss": 1.5523, "step": 864136 }, { "epoch": 74.55072463768116, "grad_norm": 0.27398380637168884, "learning_rate": 0.0001, "loss": 1.554, "step": 864192 }, { "epoch": 74.55555555555556, "grad_norm": 0.2734474539756775, "learning_rate": 0.0001, "loss": 1.5553, "step": 864248 }, { "epoch": 74.56038647342996, "grad_norm": 0.31494805216789246, "learning_rate": 0.0001, "loss": 1.5471, "step": 864304 }, { "epoch": 74.56521739130434, "grad_norm": 0.38375619053840637, "learning_rate": 0.0001, "loss": 1.5527, "step": 864360 }, { "epoch": 74.57004830917874, "grad_norm": 0.5469962954521179, "learning_rate": 0.0001, "loss": 1.5479, "step": 864416 }, { "epoch": 74.57487922705315, "grad_norm": 0.5324450731277466, "learning_rate": 0.0001, "loss": 1.546, "step": 864472 }, { "epoch": 74.57971014492753, "grad_norm": 5.153418064117432, "learning_rate": 0.0001, "loss": 1.5472, "step": 864528 }, { "epoch": 74.58454106280193, "grad_norm": 0.6745179295539856, "learning_rate": 0.0001, "loss": 1.548, "step": 864584 }, { "epoch": 74.58937198067633, "grad_norm": 0.3043243885040283, "learning_rate": 0.0001, "loss": 1.5491, "step": 864640 }, { "epoch": 74.59420289855072, "grad_norm": 35.30533981323242, "learning_rate": 0.0001, "loss": 1.5476, "step": 864696 }, { "epoch": 74.59903381642512, "grad_norm": 1.6491007804870605, "learning_rate": 0.0001, "loss": 1.5448, "step": 864752 }, { "epoch": 74.60386473429952, "grad_norm": 0.2873440980911255, "learning_rate": 0.0001, "loss": 1.5545, "step": 864808 }, { "epoch": 74.6086956521739, "grad_norm": 14.175415992736816, "learning_rate": 0.0001, "loss": 1.5477, "step": 864864 }, { "epoch": 74.61352657004831, "grad_norm": 0.3368375599384308, "learning_rate": 0.0001, "loss": 1.5483, "step": 864920 }, { "epoch": 74.61835748792271, "grad_norm": 0.5232054591178894, "learning_rate": 0.0001, "loss": 1.5484, "step": 864976 }, { "epoch": 74.6231884057971, "grad_norm": 0.7533140182495117, "learning_rate": 0.0001, "loss": 1.5483, "step": 865032 }, { "epoch": 74.6280193236715, "grad_norm": 0.23758140206336975, "learning_rate": 0.0001, "loss": 1.5471, "step": 865088 }, { "epoch": 74.6328502415459, "grad_norm": 0.2520284056663513, "learning_rate": 0.0001, "loss": 1.5471, "step": 865144 }, { "epoch": 74.6376811594203, "grad_norm": 0.3887334167957306, "learning_rate": 0.0001, "loss": 1.5535, "step": 865200 }, { "epoch": 74.64251207729468, "grad_norm": 0.37946945428848267, "learning_rate": 0.0001, "loss": 1.5434, "step": 865256 }, { "epoch": 74.64734299516908, "grad_norm": 1.268738865852356, "learning_rate": 0.0001, "loss": 1.5525, "step": 865312 }, { "epoch": 74.65217391304348, "grad_norm": 1.543115258216858, "learning_rate": 0.0001, "loss": 1.5489, "step": 865368 }, { "epoch": 74.65700483091787, "grad_norm": 0.3241463899612427, "learning_rate": 0.0001, "loss": 1.5518, "step": 865424 }, { "epoch": 74.66183574879227, "grad_norm": 3.3291728496551514, "learning_rate": 0.0001, "loss": 1.5501, "step": 865480 }, { "epoch": 74.66666666666667, "grad_norm": 0.39561182260513306, "learning_rate": 0.0001, "loss": 1.5517, "step": 865536 }, { "epoch": 74.67149758454106, "grad_norm": 0.7831647396087646, "learning_rate": 0.0001, "loss": 1.5581, "step": 865592 }, { "epoch": 74.67632850241546, "grad_norm": 0.4770269989967346, "learning_rate": 0.0001, "loss": 1.5495, "step": 865648 }, { "epoch": 74.68115942028986, "grad_norm": 0.331857830286026, "learning_rate": 0.0001, "loss": 1.5508, "step": 865704 }, { "epoch": 74.68599033816425, "grad_norm": 1.3973979949951172, "learning_rate": 0.0001, "loss": 1.5469, "step": 865760 }, { "epoch": 74.69082125603865, "grad_norm": 0.2651485800743103, "learning_rate": 0.0001, "loss": 1.5448, "step": 865816 }, { "epoch": 74.69565217391305, "grad_norm": 0.33752694725990295, "learning_rate": 0.0001, "loss": 1.5534, "step": 865872 }, { "epoch": 74.70048309178743, "grad_norm": 17.203079223632812, "learning_rate": 0.0001, "loss": 1.5541, "step": 865928 }, { "epoch": 74.70531400966183, "grad_norm": 0.9919813275337219, "learning_rate": 0.0001, "loss": 1.5522, "step": 865984 }, { "epoch": 74.71014492753623, "grad_norm": 0.3330995440483093, "learning_rate": 0.0001, "loss": 1.5525, "step": 866040 }, { "epoch": 74.71497584541063, "grad_norm": 0.3087797462940216, "learning_rate": 0.0001, "loss": 1.5425, "step": 866096 }, { "epoch": 74.71980676328502, "grad_norm": 0.26503416895866394, "learning_rate": 0.0001, "loss": 1.5419, "step": 866152 }, { "epoch": 74.72463768115942, "grad_norm": 1.860371708869934, "learning_rate": 0.0001, "loss": 1.5523, "step": 866208 }, { "epoch": 74.72946859903382, "grad_norm": 0.3464454114437103, "learning_rate": 0.0001, "loss": 1.5541, "step": 866264 }, { "epoch": 74.73429951690821, "grad_norm": 0.5248913764953613, "learning_rate": 0.0001, "loss": 1.5507, "step": 866320 }, { "epoch": 74.73913043478261, "grad_norm": 0.24214953184127808, "learning_rate": 0.0001, "loss": 1.5516, "step": 866376 }, { "epoch": 74.74396135265701, "grad_norm": 0.29688477516174316, "learning_rate": 0.0001, "loss": 1.5534, "step": 866432 }, { "epoch": 74.7487922705314, "grad_norm": 0.28681737184524536, "learning_rate": 0.0001, "loss": 1.5537, "step": 866488 }, { "epoch": 74.7536231884058, "grad_norm": 0.5022196769714355, "learning_rate": 0.0001, "loss": 1.5496, "step": 866544 }, { "epoch": 74.7584541062802, "grad_norm": 0.33167314529418945, "learning_rate": 0.0001, "loss": 1.5556, "step": 866600 }, { "epoch": 74.76328502415458, "grad_norm": 0.49795591831207275, "learning_rate": 0.0001, "loss": 1.5558, "step": 866656 }, { "epoch": 74.76811594202898, "grad_norm": 1.8333935737609863, "learning_rate": 0.0001, "loss": 1.5457, "step": 866712 }, { "epoch": 74.77294685990339, "grad_norm": 1.060968279838562, "learning_rate": 0.0001, "loss": 1.5465, "step": 866768 }, { "epoch": 74.77777777777777, "grad_norm": 0.35890093445777893, "learning_rate": 0.0001, "loss": 1.5555, "step": 866824 }, { "epoch": 74.78260869565217, "grad_norm": 0.4153282642364502, "learning_rate": 0.0001, "loss": 1.5499, "step": 866880 }, { "epoch": 74.78743961352657, "grad_norm": 1.134898066520691, "learning_rate": 0.0001, "loss": 1.5449, "step": 866936 }, { "epoch": 74.79227053140096, "grad_norm": 0.3000851273536682, "learning_rate": 0.0001, "loss": 1.546, "step": 866992 }, { "epoch": 74.79710144927536, "grad_norm": 0.32071754336357117, "learning_rate": 0.0001, "loss": 1.5487, "step": 867048 }, { "epoch": 74.80193236714976, "grad_norm": 0.23861561715602875, "learning_rate": 0.0001, "loss": 1.5549, "step": 867104 }, { "epoch": 74.80676328502416, "grad_norm": 0.467263787984848, "learning_rate": 0.0001, "loss": 1.5466, "step": 867160 }, { "epoch": 74.81159420289855, "grad_norm": 0.26086220145225525, "learning_rate": 0.0001, "loss": 1.5483, "step": 867216 }, { "epoch": 74.81642512077295, "grad_norm": 0.2643926739692688, "learning_rate": 0.0001, "loss": 1.5474, "step": 867272 }, { "epoch": 74.82125603864735, "grad_norm": 0.3317827880382538, "learning_rate": 0.0001, "loss": 1.5563, "step": 867328 }, { "epoch": 74.82608695652173, "grad_norm": 0.6867469549179077, "learning_rate": 0.0001, "loss": 1.5453, "step": 867384 }, { "epoch": 74.83091787439614, "grad_norm": 2.065959930419922, "learning_rate": 0.0001, "loss": 1.5499, "step": 867440 }, { "epoch": 74.83574879227054, "grad_norm": 0.2768230736255646, "learning_rate": 0.0001, "loss": 1.5473, "step": 867496 }, { "epoch": 74.84057971014492, "grad_norm": 0.2217167168855667, "learning_rate": 0.0001, "loss": 1.5548, "step": 867552 }, { "epoch": 74.84541062801932, "grad_norm": 1.1638630628585815, "learning_rate": 0.0001, "loss": 1.5478, "step": 867608 }, { "epoch": 74.85024154589372, "grad_norm": 0.2609773278236389, "learning_rate": 0.0001, "loss": 1.5515, "step": 867664 }, { "epoch": 74.85507246376811, "grad_norm": 2.4273297786712646, "learning_rate": 0.0001, "loss": 1.5517, "step": 867720 }, { "epoch": 74.85990338164251, "grad_norm": 0.29647570848464966, "learning_rate": 0.0001, "loss": 1.545, "step": 867776 }, { "epoch": 74.86473429951691, "grad_norm": 0.2688014507293701, "learning_rate": 0.0001, "loss": 1.5538, "step": 867832 }, { "epoch": 74.8695652173913, "grad_norm": 0.2494051158428192, "learning_rate": 0.0001, "loss": 1.544, "step": 867888 }, { "epoch": 74.8743961352657, "grad_norm": 0.2750936448574066, "learning_rate": 0.0001, "loss": 1.5466, "step": 867944 }, { "epoch": 74.8792270531401, "grad_norm": 0.38827186822891235, "learning_rate": 0.0001, "loss": 1.5515, "step": 868000 }, { "epoch": 74.8840579710145, "grad_norm": 0.3288438618183136, "learning_rate": 0.0001, "loss": 1.5454, "step": 868056 }, { "epoch": 74.88888888888889, "grad_norm": 0.7392421960830688, "learning_rate": 0.0001, "loss": 1.547, "step": 868112 }, { "epoch": 74.89371980676329, "grad_norm": 0.2817467451095581, "learning_rate": 0.0001, "loss": 1.5491, "step": 868168 }, { "epoch": 74.89855072463769, "grad_norm": 0.2815801799297333, "learning_rate": 0.0001, "loss": 1.5494, "step": 868224 }, { "epoch": 74.90338164251207, "grad_norm": 0.2741711139678955, "learning_rate": 0.0001, "loss": 1.5576, "step": 868280 }, { "epoch": 74.90821256038647, "grad_norm": 8.599843978881836, "learning_rate": 0.0001, "loss": 1.547, "step": 868336 }, { "epoch": 74.91304347826087, "grad_norm": 0.33618152141571045, "learning_rate": 0.0001, "loss": 1.5551, "step": 868392 }, { "epoch": 74.91787439613526, "grad_norm": 3.9268906116485596, "learning_rate": 0.0001, "loss": 1.5461, "step": 868448 }, { "epoch": 74.92270531400966, "grad_norm": 0.5365140438079834, "learning_rate": 0.0001, "loss": 1.5483, "step": 868504 }, { "epoch": 74.92753623188406, "grad_norm": 1.0226702690124512, "learning_rate": 0.0001, "loss": 1.5487, "step": 868560 }, { "epoch": 74.93236714975845, "grad_norm": 0.29717695713043213, "learning_rate": 0.0001, "loss": 1.5523, "step": 868616 }, { "epoch": 74.93719806763285, "grad_norm": 0.2439536154270172, "learning_rate": 0.0001, "loss": 1.551, "step": 868672 }, { "epoch": 74.94202898550725, "grad_norm": 0.39867785573005676, "learning_rate": 0.0001, "loss": 1.5467, "step": 868728 }, { "epoch": 74.94685990338164, "grad_norm": 0.5672454237937927, "learning_rate": 0.0001, "loss": 1.5417, "step": 868784 }, { "epoch": 74.95169082125604, "grad_norm": 0.6567291617393494, "learning_rate": 0.0001, "loss": 1.5454, "step": 868840 }, { "epoch": 74.95652173913044, "grad_norm": 0.26315537095069885, "learning_rate": 0.0001, "loss": 1.5506, "step": 868896 }, { "epoch": 74.96135265700484, "grad_norm": 0.2982213795185089, "learning_rate": 0.0001, "loss": 1.5545, "step": 868952 }, { "epoch": 74.96618357487922, "grad_norm": 0.39709794521331787, "learning_rate": 0.0001, "loss": 1.5499, "step": 869008 }, { "epoch": 74.97101449275362, "grad_norm": 0.2861427068710327, "learning_rate": 0.0001, "loss": 1.5559, "step": 869064 }, { "epoch": 74.97584541062803, "grad_norm": 0.3596062958240509, "learning_rate": 0.0001, "loss": 1.5516, "step": 869120 }, { "epoch": 74.98067632850241, "grad_norm": 0.33383315801620483, "learning_rate": 0.0001, "loss": 1.5529, "step": 869176 }, { "epoch": 74.98550724637681, "grad_norm": 0.31804850697517395, "learning_rate": 0.0001, "loss": 1.5517, "step": 869232 }, { "epoch": 74.99033816425121, "grad_norm": 0.30657339096069336, "learning_rate": 0.0001, "loss": 1.5535, "step": 869288 }, { "epoch": 74.9951690821256, "grad_norm": 0.3771638572216034, "learning_rate": 0.0001, "loss": 1.5448, "step": 869344 }, { "epoch": 75.0, "grad_norm": 0.27491244673728943, "learning_rate": 0.0001, "loss": 1.5472, "step": 869400 }, { "epoch": 75.0048309178744, "grad_norm": 0.30550751090049744, "learning_rate": 0.0001, "loss": 1.5516, "step": 869456 }, { "epoch": 75.00966183574879, "grad_norm": 0.9378640055656433, "learning_rate": 0.0001, "loss": 1.5414, "step": 869512 }, { "epoch": 75.01449275362319, "grad_norm": 0.5628950595855713, "learning_rate": 0.0001, "loss": 1.5432, "step": 869568 }, { "epoch": 75.01932367149759, "grad_norm": 0.27950963377952576, "learning_rate": 0.0001, "loss": 1.5445, "step": 869624 }, { "epoch": 75.02415458937197, "grad_norm": 0.32502028346061707, "learning_rate": 0.0001, "loss": 1.5434, "step": 869680 }, { "epoch": 75.02898550724638, "grad_norm": 0.38594478368759155, "learning_rate": 0.0001, "loss": 1.5427, "step": 869736 }, { "epoch": 75.03381642512078, "grad_norm": 0.29134446382522583, "learning_rate": 0.0001, "loss": 1.5352, "step": 869792 }, { "epoch": 75.03864734299516, "grad_norm": 0.2952409088611603, "learning_rate": 0.0001, "loss": 1.5383, "step": 869848 }, { "epoch": 75.04347826086956, "grad_norm": 0.2953055202960968, "learning_rate": 0.0001, "loss": 1.5444, "step": 869904 }, { "epoch": 75.04830917874396, "grad_norm": 0.2791944444179535, "learning_rate": 0.0001, "loss": 1.5413, "step": 869960 }, { "epoch": 75.05314009661836, "grad_norm": 4.054348468780518, "learning_rate": 0.0001, "loss": 1.5459, "step": 870016 }, { "epoch": 75.05797101449275, "grad_norm": 0.3122437000274658, "learning_rate": 0.0001, "loss": 1.5455, "step": 870072 }, { "epoch": 75.06280193236715, "grad_norm": 0.296504944562912, "learning_rate": 0.0001, "loss": 1.5425, "step": 870128 }, { "epoch": 75.06763285024155, "grad_norm": 0.371139794588089, "learning_rate": 0.0001, "loss": 1.5528, "step": 870184 }, { "epoch": 75.07246376811594, "grad_norm": 0.28821834921836853, "learning_rate": 0.0001, "loss": 1.543, "step": 870240 }, { "epoch": 75.07729468599034, "grad_norm": 0.9130119681358337, "learning_rate": 0.0001, "loss": 1.5465, "step": 870296 }, { "epoch": 75.08212560386474, "grad_norm": 0.6075076460838318, "learning_rate": 0.0001, "loss": 1.5391, "step": 870352 }, { "epoch": 75.08695652173913, "grad_norm": 0.3118010461330414, "learning_rate": 0.0001, "loss": 1.5443, "step": 870408 }, { "epoch": 75.09178743961353, "grad_norm": 0.3396507799625397, "learning_rate": 0.0001, "loss": 1.5383, "step": 870464 }, { "epoch": 75.09661835748793, "grad_norm": 0.42275547981262207, "learning_rate": 0.0001, "loss": 1.5427, "step": 870520 }, { "epoch": 75.10144927536231, "grad_norm": 0.2740899324417114, "learning_rate": 0.0001, "loss": 1.5463, "step": 870576 }, { "epoch": 75.10628019323671, "grad_norm": 0.2867985665798187, "learning_rate": 0.0001, "loss": 1.5411, "step": 870632 }, { "epoch": 75.11111111111111, "grad_norm": 0.27264583110809326, "learning_rate": 0.0001, "loss": 1.5461, "step": 870688 }, { "epoch": 75.1159420289855, "grad_norm": 0.3103075921535492, "learning_rate": 0.0001, "loss": 1.5438, "step": 870744 }, { "epoch": 75.1207729468599, "grad_norm": 0.4430798292160034, "learning_rate": 0.0001, "loss": 1.5439, "step": 870800 }, { "epoch": 75.1256038647343, "grad_norm": 0.3036194443702698, "learning_rate": 0.0001, "loss": 1.5458, "step": 870856 }, { "epoch": 75.1304347826087, "grad_norm": 0.3030773401260376, "learning_rate": 0.0001, "loss": 1.5381, "step": 870912 }, { "epoch": 75.13526570048309, "grad_norm": 0.3942970633506775, "learning_rate": 0.0001, "loss": 1.5427, "step": 870968 }, { "epoch": 75.14009661835749, "grad_norm": 5.360647678375244, "learning_rate": 0.0001, "loss": 1.5433, "step": 871024 }, { "epoch": 75.14492753623189, "grad_norm": 0.4593000113964081, "learning_rate": 0.0001, "loss": 1.5396, "step": 871080 }, { "epoch": 75.14975845410628, "grad_norm": 19.450178146362305, "learning_rate": 0.0001, "loss": 1.5513, "step": 871136 }, { "epoch": 75.15458937198068, "grad_norm": 0.3390761613845825, "learning_rate": 0.0001, "loss": 1.5429, "step": 871192 }, { "epoch": 75.15942028985508, "grad_norm": 0.6930444240570068, "learning_rate": 0.0001, "loss": 1.5504, "step": 871248 }, { "epoch": 75.16425120772946, "grad_norm": 0.41377031803131104, "learning_rate": 0.0001, "loss": 1.5451, "step": 871304 }, { "epoch": 75.16908212560386, "grad_norm": 0.270761102437973, "learning_rate": 0.0001, "loss": 1.5491, "step": 871360 }, { "epoch": 75.17391304347827, "grad_norm": 0.5526509284973145, "learning_rate": 0.0001, "loss": 1.544, "step": 871416 }, { "epoch": 75.17874396135265, "grad_norm": 0.26023536920547485, "learning_rate": 0.0001, "loss": 1.5511, "step": 871472 }, { "epoch": 75.18357487922705, "grad_norm": 0.5109171867370605, "learning_rate": 0.0001, "loss": 1.5477, "step": 871528 }, { "epoch": 75.18840579710145, "grad_norm": 0.3176673352718353, "learning_rate": 0.0001, "loss": 1.5457, "step": 871584 }, { "epoch": 75.19323671497584, "grad_norm": 0.23216387629508972, "learning_rate": 0.0001, "loss": 1.553, "step": 871640 }, { "epoch": 75.19806763285024, "grad_norm": 0.241106778383255, "learning_rate": 0.0001, "loss": 1.5466, "step": 871696 }, { "epoch": 75.20289855072464, "grad_norm": 0.26669248938560486, "learning_rate": 0.0001, "loss": 1.5478, "step": 871752 }, { "epoch": 75.20772946859903, "grad_norm": 12.895463943481445, "learning_rate": 0.0001, "loss": 1.5485, "step": 871808 }, { "epoch": 75.21256038647343, "grad_norm": 1.0060585737228394, "learning_rate": 0.0001, "loss": 1.5465, "step": 871864 }, { "epoch": 75.21739130434783, "grad_norm": 0.2664135694503784, "learning_rate": 0.0001, "loss": 1.545, "step": 871920 }, { "epoch": 75.22222222222223, "grad_norm": 1.6529768705368042, "learning_rate": 0.0001, "loss": 1.5464, "step": 871976 }, { "epoch": 75.22705314009661, "grad_norm": 0.2929820120334625, "learning_rate": 0.0001, "loss": 1.5486, "step": 872032 }, { "epoch": 75.23188405797102, "grad_norm": 0.26243939995765686, "learning_rate": 0.0001, "loss": 1.5471, "step": 872088 }, { "epoch": 75.23671497584542, "grad_norm": 0.321478009223938, "learning_rate": 0.0001, "loss": 1.5471, "step": 872144 }, { "epoch": 75.2415458937198, "grad_norm": 0.3501635193824768, "learning_rate": 0.0001, "loss": 1.5461, "step": 872200 }, { "epoch": 75.2463768115942, "grad_norm": 0.27230629324913025, "learning_rate": 0.0001, "loss": 1.5462, "step": 872256 }, { "epoch": 75.2512077294686, "grad_norm": 0.33901146054267883, "learning_rate": 0.0001, "loss": 1.5505, "step": 872312 }, { "epoch": 75.25603864734299, "grad_norm": 0.24202287197113037, "learning_rate": 0.0001, "loss": 1.5461, "step": 872368 }, { "epoch": 75.26086956521739, "grad_norm": 0.43070709705352783, "learning_rate": 0.0001, "loss": 1.5417, "step": 872424 }, { "epoch": 75.26570048309179, "grad_norm": 0.3531452417373657, "learning_rate": 0.0001, "loss": 1.5442, "step": 872480 }, { "epoch": 75.27053140096618, "grad_norm": 16.00021743774414, "learning_rate": 0.0001, "loss": 1.5467, "step": 872536 }, { "epoch": 75.27536231884058, "grad_norm": 0.2786702811717987, "learning_rate": 0.0001, "loss": 1.5483, "step": 872592 }, { "epoch": 75.28019323671498, "grad_norm": 0.2888874113559723, "learning_rate": 0.0001, "loss": 1.5464, "step": 872648 }, { "epoch": 75.28502415458937, "grad_norm": 0.2376817911863327, "learning_rate": 0.0001, "loss": 1.5428, "step": 872704 }, { "epoch": 75.28985507246377, "grad_norm": 0.35416701436042786, "learning_rate": 0.0001, "loss": 1.5388, "step": 872760 }, { "epoch": 75.29468599033817, "grad_norm": 0.3526116907596588, "learning_rate": 0.0001, "loss": 1.5422, "step": 872816 }, { "epoch": 75.29951690821257, "grad_norm": 1.2254382371902466, "learning_rate": 0.0001, "loss": 1.5435, "step": 872872 }, { "epoch": 75.30434782608695, "grad_norm": 0.30430862307548523, "learning_rate": 0.0001, "loss": 1.543, "step": 872928 }, { "epoch": 75.30917874396135, "grad_norm": 3.619002342224121, "learning_rate": 0.0001, "loss": 1.5397, "step": 872984 }, { "epoch": 75.31400966183575, "grad_norm": 0.2760641276836395, "learning_rate": 0.0001, "loss": 1.5472, "step": 873040 }, { "epoch": 75.31884057971014, "grad_norm": 0.2791801691055298, "learning_rate": 0.0001, "loss": 1.5465, "step": 873096 }, { "epoch": 75.32367149758454, "grad_norm": 0.5048266053199768, "learning_rate": 0.0001, "loss": 1.5414, "step": 873152 }, { "epoch": 75.32850241545894, "grad_norm": 0.24071769416332245, "learning_rate": 0.0001, "loss": 1.5481, "step": 873208 }, { "epoch": 75.33333333333333, "grad_norm": 0.32568687200546265, "learning_rate": 0.0001, "loss": 1.5444, "step": 873264 }, { "epoch": 75.33816425120773, "grad_norm": 0.27391576766967773, "learning_rate": 0.0001, "loss": 1.5413, "step": 873320 }, { "epoch": 75.34299516908213, "grad_norm": 0.350544810295105, "learning_rate": 0.0001, "loss": 1.5519, "step": 873376 }, { "epoch": 75.34782608695652, "grad_norm": 0.3293461799621582, "learning_rate": 0.0001, "loss": 1.5433, "step": 873432 }, { "epoch": 75.35265700483092, "grad_norm": 0.3223719894886017, "learning_rate": 0.0001, "loss": 1.5492, "step": 873488 }, { "epoch": 75.35748792270532, "grad_norm": 0.2575567662715912, "learning_rate": 0.0001, "loss": 1.5507, "step": 873544 }, { "epoch": 75.3623188405797, "grad_norm": 0.27295613288879395, "learning_rate": 0.0001, "loss": 1.5366, "step": 873600 }, { "epoch": 75.3671497584541, "grad_norm": 0.839709460735321, "learning_rate": 0.0001, "loss": 1.5498, "step": 873656 }, { "epoch": 75.3719806763285, "grad_norm": 0.24226512014865875, "learning_rate": 0.0001, "loss": 1.5454, "step": 873712 }, { "epoch": 75.3768115942029, "grad_norm": 0.2885420322418213, "learning_rate": 0.0001, "loss": 1.5487, "step": 873768 }, { "epoch": 75.38164251207729, "grad_norm": 0.3001272976398468, "learning_rate": 0.0001, "loss": 1.5469, "step": 873824 }, { "epoch": 75.38647342995169, "grad_norm": 0.30965176224708557, "learning_rate": 0.0001, "loss": 1.5473, "step": 873880 }, { "epoch": 75.3913043478261, "grad_norm": 0.24642500281333923, "learning_rate": 0.0001, "loss": 1.549, "step": 873936 }, { "epoch": 75.39613526570048, "grad_norm": 0.2612013816833496, "learning_rate": 0.0001, "loss": 1.5402, "step": 873992 }, { "epoch": 75.40096618357488, "grad_norm": 0.2224203497171402, "learning_rate": 0.0001, "loss": 1.5528, "step": 874048 }, { "epoch": 75.40579710144928, "grad_norm": 0.3277219533920288, "learning_rate": 0.0001, "loss": 1.5514, "step": 874104 }, { "epoch": 75.41062801932367, "grad_norm": 0.45191490650177, "learning_rate": 0.0001, "loss": 1.5517, "step": 874160 }, { "epoch": 75.41545893719807, "grad_norm": 0.4728807210922241, "learning_rate": 0.0001, "loss": 1.5385, "step": 874216 }, { "epoch": 75.42028985507247, "grad_norm": 0.3054945468902588, "learning_rate": 0.0001, "loss": 1.551, "step": 874272 }, { "epoch": 75.42512077294685, "grad_norm": 0.35907286405563354, "learning_rate": 0.0001, "loss": 1.547, "step": 874328 }, { "epoch": 75.42995169082126, "grad_norm": 0.33575430512428284, "learning_rate": 0.0001, "loss": 1.5544, "step": 874384 }, { "epoch": 75.43478260869566, "grad_norm": 0.41233888268470764, "learning_rate": 0.0001, "loss": 1.5467, "step": 874440 }, { "epoch": 75.43961352657004, "grad_norm": 14.559837341308594, "learning_rate": 0.0001, "loss": 1.5481, "step": 874496 }, { "epoch": 75.44444444444444, "grad_norm": 0.44702762365341187, "learning_rate": 0.0001, "loss": 1.5487, "step": 874552 }, { "epoch": 75.44927536231884, "grad_norm": 0.253591924905777, "learning_rate": 0.0001, "loss": 1.5448, "step": 874608 }, { "epoch": 75.45410628019323, "grad_norm": 0.2549905478954315, "learning_rate": 0.0001, "loss": 1.5462, "step": 874664 }, { "epoch": 75.45893719806763, "grad_norm": 0.2655353844165802, "learning_rate": 0.0001, "loss": 1.546, "step": 874720 }, { "epoch": 75.46376811594203, "grad_norm": 0.2741941511631012, "learning_rate": 0.0001, "loss": 1.5452, "step": 874776 }, { "epoch": 75.46859903381643, "grad_norm": 0.28552889823913574, "learning_rate": 0.0001, "loss": 1.5376, "step": 874832 }, { "epoch": 75.47342995169082, "grad_norm": 9.746400833129883, "learning_rate": 0.0001, "loss": 1.5507, "step": 874888 }, { "epoch": 75.47826086956522, "grad_norm": 0.833098292350769, "learning_rate": 0.0001, "loss": 1.5465, "step": 874944 }, { "epoch": 75.48309178743962, "grad_norm": 0.25142666697502136, "learning_rate": 0.0001, "loss": 1.5432, "step": 875000 }, { "epoch": 75.487922705314, "grad_norm": 0.2830803394317627, "learning_rate": 0.0001, "loss": 1.5436, "step": 875056 }, { "epoch": 75.4927536231884, "grad_norm": 0.27284833788871765, "learning_rate": 0.0001, "loss": 1.5451, "step": 875112 }, { "epoch": 75.4975845410628, "grad_norm": 15.150989532470703, "learning_rate": 0.0001, "loss": 1.5487, "step": 875168 }, { "epoch": 75.5024154589372, "grad_norm": 0.2702958285808563, "learning_rate": 0.0001, "loss": 1.5488, "step": 875224 }, { "epoch": 75.5072463768116, "grad_norm": 1.4669764041900635, "learning_rate": 0.0001, "loss": 1.5434, "step": 875280 }, { "epoch": 75.512077294686, "grad_norm": 0.49722930788993835, "learning_rate": 0.0001, "loss": 1.5512, "step": 875336 }, { "epoch": 75.51690821256038, "grad_norm": 2.1969261169433594, "learning_rate": 0.0001, "loss": 1.5523, "step": 875392 }, { "epoch": 75.52173913043478, "grad_norm": 0.3095143437385559, "learning_rate": 0.0001, "loss": 1.5484, "step": 875448 }, { "epoch": 75.52657004830918, "grad_norm": 0.2840661108493805, "learning_rate": 0.0001, "loss": 1.5405, "step": 875504 }, { "epoch": 75.53140096618357, "grad_norm": 0.2548658549785614, "learning_rate": 0.0001, "loss": 1.5505, "step": 875560 }, { "epoch": 75.53623188405797, "grad_norm": 1.4692379236221313, "learning_rate": 0.0001, "loss": 1.5443, "step": 875616 }, { "epoch": 75.54106280193237, "grad_norm": 0.2276950627565384, "learning_rate": 0.0001, "loss": 1.5478, "step": 875672 }, { "epoch": 75.54589371980677, "grad_norm": 0.2848412096500397, "learning_rate": 0.0001, "loss": 1.5499, "step": 875728 }, { "epoch": 75.55072463768116, "grad_norm": 0.2677062153816223, "learning_rate": 0.0001, "loss": 1.5447, "step": 875784 }, { "epoch": 75.55555555555556, "grad_norm": 0.5318085551261902, "learning_rate": 0.0001, "loss": 1.5415, "step": 875840 }, { "epoch": 75.56038647342996, "grad_norm": 0.31600168347358704, "learning_rate": 0.0001, "loss": 1.5446, "step": 875896 }, { "epoch": 75.56521739130434, "grad_norm": 0.33605146408081055, "learning_rate": 0.0001, "loss": 1.5489, "step": 875952 }, { "epoch": 75.57004830917874, "grad_norm": 26.252307891845703, "learning_rate": 0.0001, "loss": 1.5394, "step": 876008 }, { "epoch": 75.57487922705315, "grad_norm": 0.35619494318962097, "learning_rate": 0.0001, "loss": 1.5448, "step": 876064 }, { "epoch": 75.57971014492753, "grad_norm": 0.24316255748271942, "learning_rate": 0.0001, "loss": 1.5361, "step": 876120 }, { "epoch": 75.58454106280193, "grad_norm": 0.30187100172042847, "learning_rate": 0.0001, "loss": 1.5415, "step": 876176 }, { "epoch": 75.58937198067633, "grad_norm": 4.152764797210693, "learning_rate": 0.0001, "loss": 1.5526, "step": 876232 }, { "epoch": 75.59420289855072, "grad_norm": 0.2731647789478302, "learning_rate": 0.0001, "loss": 1.5476, "step": 876288 }, { "epoch": 75.59903381642512, "grad_norm": 0.31322813034057617, "learning_rate": 0.0001, "loss": 1.5397, "step": 876344 }, { "epoch": 75.60386473429952, "grad_norm": 0.2739797532558441, "learning_rate": 0.0001, "loss": 1.5464, "step": 876400 }, { "epoch": 75.6086956521739, "grad_norm": 0.3269583284854889, "learning_rate": 0.0001, "loss": 1.5478, "step": 876456 }, { "epoch": 75.61352657004831, "grad_norm": 0.5273131132125854, "learning_rate": 0.0001, "loss": 1.5411, "step": 876512 }, { "epoch": 75.61835748792271, "grad_norm": 0.5010157823562622, "learning_rate": 0.0001, "loss": 1.5447, "step": 876568 }, { "epoch": 75.6231884057971, "grad_norm": 0.8247702121734619, "learning_rate": 0.0001, "loss": 1.5507, "step": 876624 }, { "epoch": 75.6280193236715, "grad_norm": 0.3107677698135376, "learning_rate": 0.0001, "loss": 1.5452, "step": 876680 }, { "epoch": 75.6328502415459, "grad_norm": 0.34723061323165894, "learning_rate": 0.0001, "loss": 1.5413, "step": 876736 }, { "epoch": 75.6376811594203, "grad_norm": 0.5755276083946228, "learning_rate": 0.0001, "loss": 1.5435, "step": 876792 }, { "epoch": 75.64251207729468, "grad_norm": 0.29494231939315796, "learning_rate": 0.0001, "loss": 1.5426, "step": 876848 }, { "epoch": 75.64734299516908, "grad_norm": 0.47985443472862244, "learning_rate": 0.0001, "loss": 1.5459, "step": 876904 }, { "epoch": 75.65217391304348, "grad_norm": 0.29422709345817566, "learning_rate": 0.0001, "loss": 1.5515, "step": 876960 }, { "epoch": 75.65700483091787, "grad_norm": 0.26763251423835754, "learning_rate": 0.0001, "loss": 1.5537, "step": 877016 }, { "epoch": 75.66183574879227, "grad_norm": 0.5669326782226562, "learning_rate": 0.0001, "loss": 1.5442, "step": 877072 }, { "epoch": 75.66666666666667, "grad_norm": 0.3339022397994995, "learning_rate": 0.0001, "loss": 1.552, "step": 877128 }, { "epoch": 75.67149758454106, "grad_norm": 0.5008623003959656, "learning_rate": 0.0001, "loss": 1.543, "step": 877184 }, { "epoch": 75.67632850241546, "grad_norm": 0.35621926188468933, "learning_rate": 0.0001, "loss": 1.5414, "step": 877240 }, { "epoch": 75.68115942028986, "grad_norm": 0.2826424241065979, "learning_rate": 0.0001, "loss": 1.5449, "step": 877296 }, { "epoch": 75.68599033816425, "grad_norm": 0.2812899351119995, "learning_rate": 0.0001, "loss": 1.5484, "step": 877352 }, { "epoch": 75.69082125603865, "grad_norm": 0.2626188397407532, "learning_rate": 0.0001, "loss": 1.5468, "step": 877408 }, { "epoch": 75.69565217391305, "grad_norm": 1.8394206762313843, "learning_rate": 0.0001, "loss": 1.5491, "step": 877464 }, { "epoch": 75.70048309178743, "grad_norm": 0.2781457304954529, "learning_rate": 0.0001, "loss": 1.5442, "step": 877520 }, { "epoch": 75.70531400966183, "grad_norm": 0.3183233439922333, "learning_rate": 0.0001, "loss": 1.5436, "step": 877576 }, { "epoch": 75.71014492753623, "grad_norm": 0.31630197167396545, "learning_rate": 0.0001, "loss": 1.5456, "step": 877632 }, { "epoch": 75.71497584541063, "grad_norm": 0.3459679186344147, "learning_rate": 0.0001, "loss": 1.5435, "step": 877688 }, { "epoch": 75.71980676328502, "grad_norm": 0.2530309557914734, "learning_rate": 0.0001, "loss": 1.5538, "step": 877744 }, { "epoch": 75.72463768115942, "grad_norm": 1.5250476598739624, "learning_rate": 0.0001, "loss": 1.5478, "step": 877800 }, { "epoch": 75.72946859903382, "grad_norm": 0.32940441370010376, "learning_rate": 0.0001, "loss": 1.543, "step": 877856 }, { "epoch": 75.73429951690821, "grad_norm": 0.5773353576660156, "learning_rate": 0.0001, "loss": 1.5532, "step": 877912 }, { "epoch": 75.73913043478261, "grad_norm": 0.3159562051296234, "learning_rate": 0.0001, "loss": 1.5442, "step": 877968 }, { "epoch": 75.74396135265701, "grad_norm": 0.4361148476600647, "learning_rate": 0.0001, "loss": 1.5473, "step": 878024 }, { "epoch": 75.7487922705314, "grad_norm": 0.25758612155914307, "learning_rate": 0.0001, "loss": 1.5463, "step": 878080 }, { "epoch": 75.7536231884058, "grad_norm": 0.25703808665275574, "learning_rate": 0.0001, "loss": 1.55, "step": 878136 }, { "epoch": 75.7584541062802, "grad_norm": 0.34620165824890137, "learning_rate": 0.0001, "loss": 1.5501, "step": 878192 }, { "epoch": 75.76328502415458, "grad_norm": 0.3781526982784271, "learning_rate": 0.0001, "loss": 1.544, "step": 878248 }, { "epoch": 75.76811594202898, "grad_norm": 0.30678799748420715, "learning_rate": 0.0001, "loss": 1.5549, "step": 878304 }, { "epoch": 75.77294685990339, "grad_norm": 0.28564196825027466, "learning_rate": 0.0001, "loss": 1.5519, "step": 878360 }, { "epoch": 75.77777777777777, "grad_norm": 24.932905197143555, "learning_rate": 0.0001, "loss": 1.5501, "step": 878416 }, { "epoch": 75.78260869565217, "grad_norm": 0.287201464176178, "learning_rate": 0.0001, "loss": 1.5447, "step": 878472 }, { "epoch": 75.78743961352657, "grad_norm": 1.3108958005905151, "learning_rate": 0.0001, "loss": 1.5408, "step": 878528 }, { "epoch": 75.79227053140096, "grad_norm": 0.31970974802970886, "learning_rate": 0.0001, "loss": 1.5417, "step": 878584 }, { "epoch": 75.79710144927536, "grad_norm": 0.33653029799461365, "learning_rate": 0.0001, "loss": 1.5478, "step": 878640 }, { "epoch": 75.80193236714976, "grad_norm": 0.3229396343231201, "learning_rate": 0.0001, "loss": 1.5456, "step": 878696 }, { "epoch": 75.80676328502416, "grad_norm": 6.04819393157959, "learning_rate": 0.0001, "loss": 1.547, "step": 878752 }, { "epoch": 75.81159420289855, "grad_norm": 1.1188117265701294, "learning_rate": 0.0001, "loss": 1.5501, "step": 878808 }, { "epoch": 75.81642512077295, "grad_norm": 0.25698158144950867, "learning_rate": 0.0001, "loss": 1.5422, "step": 878864 }, { "epoch": 75.82125603864735, "grad_norm": 0.32428425550460815, "learning_rate": 0.0001, "loss": 1.5482, "step": 878920 }, { "epoch": 75.82608695652173, "grad_norm": 0.32023170590400696, "learning_rate": 0.0001, "loss": 1.5497, "step": 878976 }, { "epoch": 75.83091787439614, "grad_norm": 0.2976889908313751, "learning_rate": 0.0001, "loss": 1.5496, "step": 879032 }, { "epoch": 75.83574879227054, "grad_norm": 9.49255084991455, "learning_rate": 0.0001, "loss": 1.5431, "step": 879088 }, { "epoch": 75.84057971014492, "grad_norm": 0.30988919734954834, "learning_rate": 0.0001, "loss": 1.5455, "step": 879144 }, { "epoch": 75.84541062801932, "grad_norm": 0.34071823954582214, "learning_rate": 0.0001, "loss": 1.5504, "step": 879200 }, { "epoch": 75.85024154589372, "grad_norm": 0.5381593704223633, "learning_rate": 0.0001, "loss": 1.5425, "step": 879256 }, { "epoch": 75.85507246376811, "grad_norm": 0.2725801169872284, "learning_rate": 0.0001, "loss": 1.5478, "step": 879312 }, { "epoch": 75.85990338164251, "grad_norm": 0.360377699136734, "learning_rate": 0.0001, "loss": 1.5492, "step": 879368 }, { "epoch": 75.86473429951691, "grad_norm": 0.3161342144012451, "learning_rate": 0.0001, "loss": 1.5508, "step": 879424 }, { "epoch": 75.8695652173913, "grad_norm": 0.2971298098564148, "learning_rate": 0.0001, "loss": 1.5482, "step": 879480 }, { "epoch": 75.8743961352657, "grad_norm": 0.26947543025016785, "learning_rate": 0.0001, "loss": 1.5511, "step": 879536 }, { "epoch": 75.8792270531401, "grad_norm": 0.2758001685142517, "learning_rate": 0.0001, "loss": 1.5438, "step": 879592 }, { "epoch": 75.8840579710145, "grad_norm": 0.29606738686561584, "learning_rate": 0.0001, "loss": 1.5476, "step": 879648 }, { "epoch": 75.88888888888889, "grad_norm": 0.2888810336589813, "learning_rate": 0.0001, "loss": 1.5429, "step": 879704 }, { "epoch": 75.89371980676329, "grad_norm": 0.3939531743526459, "learning_rate": 0.0001, "loss": 1.5451, "step": 879760 }, { "epoch": 75.89855072463769, "grad_norm": 0.5656301379203796, "learning_rate": 0.0001, "loss": 1.5502, "step": 879816 }, { "epoch": 75.90338164251207, "grad_norm": 0.39469635486602783, "learning_rate": 0.0001, "loss": 1.5509, "step": 879872 }, { "epoch": 75.90821256038647, "grad_norm": 0.3055523931980133, "learning_rate": 0.0001, "loss": 1.5487, "step": 879928 }, { "epoch": 75.91304347826087, "grad_norm": 0.4613645672798157, "learning_rate": 0.0001, "loss": 1.5467, "step": 879984 }, { "epoch": 75.91787439613526, "grad_norm": 0.3154642581939697, "learning_rate": 0.0001, "loss": 1.546, "step": 880040 }, { "epoch": 75.92270531400966, "grad_norm": 0.502507746219635, "learning_rate": 0.0001, "loss": 1.5442, "step": 880096 }, { "epoch": 75.92753623188406, "grad_norm": 0.2908695936203003, "learning_rate": 0.0001, "loss": 1.5473, "step": 880152 }, { "epoch": 75.93236714975845, "grad_norm": 0.3368602693080902, "learning_rate": 0.0001, "loss": 1.5409, "step": 880208 }, { "epoch": 75.93719806763285, "grad_norm": 0.23832818865776062, "learning_rate": 0.0001, "loss": 1.551, "step": 880264 }, { "epoch": 75.94202898550725, "grad_norm": 0.27345654368400574, "learning_rate": 0.0001, "loss": 1.5552, "step": 880320 }, { "epoch": 75.94685990338164, "grad_norm": 0.6684137582778931, "learning_rate": 0.0001, "loss": 1.5448, "step": 880376 }, { "epoch": 75.95169082125604, "grad_norm": 0.22771592438220978, "learning_rate": 0.0001, "loss": 1.5409, "step": 880432 }, { "epoch": 75.95652173913044, "grad_norm": 0.30767086148262024, "learning_rate": 0.0001, "loss": 1.5466, "step": 880488 }, { "epoch": 75.96135265700484, "grad_norm": 0.27861136198043823, "learning_rate": 0.0001, "loss": 1.5485, "step": 880544 }, { "epoch": 75.96618357487922, "grad_norm": 0.3238072991371155, "learning_rate": 0.0001, "loss": 1.547, "step": 880600 }, { "epoch": 75.97101449275362, "grad_norm": 0.6486573815345764, "learning_rate": 0.0001, "loss": 1.5492, "step": 880656 }, { "epoch": 75.97584541062803, "grad_norm": 0.5957919955253601, "learning_rate": 0.0001, "loss": 1.5452, "step": 880712 }, { "epoch": 75.98067632850241, "grad_norm": 0.33002564311027527, "learning_rate": 0.0001, "loss": 1.5498, "step": 880768 }, { "epoch": 75.98550724637681, "grad_norm": 0.2702052891254425, "learning_rate": 0.0001, "loss": 1.5532, "step": 880824 }, { "epoch": 75.99033816425121, "grad_norm": 0.25966939330101013, "learning_rate": 0.0001, "loss": 1.5468, "step": 880880 }, { "epoch": 75.9951690821256, "grad_norm": 0.2717050015926361, "learning_rate": 0.0001, "loss": 1.5504, "step": 880936 }, { "epoch": 76.0, "grad_norm": 0.2785145342350006, "learning_rate": 0.0001, "loss": 1.5471, "step": 880992 }, { "epoch": 76.0048309178744, "grad_norm": 17.509103775024414, "learning_rate": 0.0001, "loss": 1.5385, "step": 881048 }, { "epoch": 76.00966183574879, "grad_norm": 0.4202020466327667, "learning_rate": 0.0001, "loss": 1.5397, "step": 881104 }, { "epoch": 76.01449275362319, "grad_norm": 1.6993886232376099, "learning_rate": 0.0001, "loss": 1.5473, "step": 881160 }, { "epoch": 76.01932367149759, "grad_norm": 0.3246864974498749, "learning_rate": 0.0001, "loss": 1.5417, "step": 881216 }, { "epoch": 76.02415458937197, "grad_norm": 0.2729562520980835, "learning_rate": 0.0001, "loss": 1.5481, "step": 881272 }, { "epoch": 76.02898550724638, "grad_norm": 0.32472681999206543, "learning_rate": 0.0001, "loss": 1.5421, "step": 881328 }, { "epoch": 76.03381642512078, "grad_norm": 0.2465248703956604, "learning_rate": 0.0001, "loss": 1.5358, "step": 881384 }, { "epoch": 76.03864734299516, "grad_norm": 0.24072110652923584, "learning_rate": 0.0001, "loss": 1.5399, "step": 881440 }, { "epoch": 76.04347826086956, "grad_norm": 0.3144630789756775, "learning_rate": 0.0001, "loss": 1.546, "step": 881496 }, { "epoch": 76.04830917874396, "grad_norm": 1.3697556257247925, "learning_rate": 0.0001, "loss": 1.5512, "step": 881552 }, { "epoch": 76.05314009661836, "grad_norm": 0.37025317549705505, "learning_rate": 0.0001, "loss": 1.5393, "step": 881608 }, { "epoch": 76.05797101449275, "grad_norm": 0.24238808453083038, "learning_rate": 0.0001, "loss": 1.5398, "step": 881664 }, { "epoch": 76.06280193236715, "grad_norm": 0.25177276134490967, "learning_rate": 0.0001, "loss": 1.5325, "step": 881720 }, { "epoch": 76.06763285024155, "grad_norm": 0.31002309918403625, "learning_rate": 0.0001, "loss": 1.5481, "step": 881776 }, { "epoch": 76.07246376811594, "grad_norm": 0.36887338757514954, "learning_rate": 0.0001, "loss": 1.5428, "step": 881832 }, { "epoch": 76.07729468599034, "grad_norm": 0.2814287841320038, "learning_rate": 0.0001, "loss": 1.5384, "step": 881888 }, { "epoch": 76.08212560386474, "grad_norm": 0.3171651363372803, "learning_rate": 0.0001, "loss": 1.5437, "step": 881944 }, { "epoch": 76.08695652173913, "grad_norm": 0.23429520428180695, "learning_rate": 0.0001, "loss": 1.5432, "step": 882000 }, { "epoch": 76.09178743961353, "grad_norm": 0.2407567799091339, "learning_rate": 0.0001, "loss": 1.5474, "step": 882056 }, { "epoch": 76.09661835748793, "grad_norm": 0.24385757744312286, "learning_rate": 0.0001, "loss": 1.5409, "step": 882112 }, { "epoch": 76.10144927536231, "grad_norm": 0.3652181327342987, "learning_rate": 0.0001, "loss": 1.5491, "step": 882168 }, { "epoch": 76.10628019323671, "grad_norm": 0.24167920649051666, "learning_rate": 0.0001, "loss": 1.5453, "step": 882224 }, { "epoch": 76.11111111111111, "grad_norm": 0.2754625976085663, "learning_rate": 0.0001, "loss": 1.5381, "step": 882280 }, { "epoch": 76.1159420289855, "grad_norm": 0.29277297854423523, "learning_rate": 0.0001, "loss": 1.5469, "step": 882336 }, { "epoch": 76.1207729468599, "grad_norm": 0.4007415771484375, "learning_rate": 0.0001, "loss": 1.5436, "step": 882392 }, { "epoch": 76.1256038647343, "grad_norm": 0.23513691127300262, "learning_rate": 0.0001, "loss": 1.539, "step": 882448 }, { "epoch": 76.1304347826087, "grad_norm": 0.26998183131217957, "learning_rate": 0.0001, "loss": 1.5398, "step": 882504 }, { "epoch": 76.13526570048309, "grad_norm": 5.430777549743652, "learning_rate": 0.0001, "loss": 1.5479, "step": 882560 }, { "epoch": 76.14009661835749, "grad_norm": 0.3250654339790344, "learning_rate": 0.0001, "loss": 1.546, "step": 882616 }, { "epoch": 76.14492753623189, "grad_norm": 0.3108275830745697, "learning_rate": 0.0001, "loss": 1.5403, "step": 882672 }, { "epoch": 76.14975845410628, "grad_norm": 0.3048650622367859, "learning_rate": 0.0001, "loss": 1.54, "step": 882728 }, { "epoch": 76.15458937198068, "grad_norm": 0.32402530312538147, "learning_rate": 0.0001, "loss": 1.5434, "step": 882784 }, { "epoch": 76.15942028985508, "grad_norm": 0.27845299243927, "learning_rate": 0.0001, "loss": 1.5431, "step": 882840 }, { "epoch": 76.16425120772946, "grad_norm": 0.38094329833984375, "learning_rate": 0.0001, "loss": 1.5442, "step": 882896 }, { "epoch": 76.16908212560386, "grad_norm": 0.3227974772453308, "learning_rate": 0.0001, "loss": 1.54, "step": 882952 }, { "epoch": 76.17391304347827, "grad_norm": 0.46744757890701294, "learning_rate": 0.0001, "loss": 1.541, "step": 883008 }, { "epoch": 76.17874396135265, "grad_norm": 0.2902345657348633, "learning_rate": 0.0001, "loss": 1.5405, "step": 883064 }, { "epoch": 76.18357487922705, "grad_norm": 1.1694716215133667, "learning_rate": 0.0001, "loss": 1.5347, "step": 883120 }, { "epoch": 76.18840579710145, "grad_norm": 1.6634471416473389, "learning_rate": 0.0001, "loss": 1.5441, "step": 883176 }, { "epoch": 76.19323671497584, "grad_norm": 2.520158529281616, "learning_rate": 0.0001, "loss": 1.5396, "step": 883232 }, { "epoch": 76.19806763285024, "grad_norm": 0.7906051278114319, "learning_rate": 0.0001, "loss": 1.5446, "step": 883288 }, { "epoch": 76.20289855072464, "grad_norm": 0.3348827362060547, "learning_rate": 0.0001, "loss": 1.5428, "step": 883344 }, { "epoch": 76.20772946859903, "grad_norm": 0.37884166836738586, "learning_rate": 0.0001, "loss": 1.5437, "step": 883400 }, { "epoch": 76.21256038647343, "grad_norm": 0.28352078795433044, "learning_rate": 0.0001, "loss": 1.5513, "step": 883456 }, { "epoch": 76.21739130434783, "grad_norm": 0.39173397421836853, "learning_rate": 0.0001, "loss": 1.5461, "step": 883512 }, { "epoch": 76.22222222222223, "grad_norm": 0.2553957402706146, "learning_rate": 0.0001, "loss": 1.5408, "step": 883568 }, { "epoch": 76.22705314009661, "grad_norm": 0.2925727367401123, "learning_rate": 0.0001, "loss": 1.5412, "step": 883624 }, { "epoch": 76.23188405797102, "grad_norm": 0.30755195021629333, "learning_rate": 0.0001, "loss": 1.5436, "step": 883680 }, { "epoch": 76.23671497584542, "grad_norm": 0.33085566759109497, "learning_rate": 0.0001, "loss": 1.5436, "step": 883736 }, { "epoch": 76.2415458937198, "grad_norm": 6.215447902679443, "learning_rate": 0.0001, "loss": 1.5423, "step": 883792 }, { "epoch": 76.2463768115942, "grad_norm": 0.2680525779724121, "learning_rate": 0.0001, "loss": 1.5378, "step": 883848 }, { "epoch": 76.2512077294686, "grad_norm": 0.2394089251756668, "learning_rate": 0.0001, "loss": 1.5384, "step": 883904 }, { "epoch": 76.25603864734299, "grad_norm": 0.3081532120704651, "learning_rate": 0.0001, "loss": 1.5412, "step": 883960 }, { "epoch": 76.26086956521739, "grad_norm": 0.35599109530448914, "learning_rate": 0.0001, "loss": 1.5366, "step": 884016 }, { "epoch": 76.26570048309179, "grad_norm": 0.31715127825737, "learning_rate": 0.0001, "loss": 1.5419, "step": 884072 }, { "epoch": 76.27053140096618, "grad_norm": 0.9623591899871826, "learning_rate": 0.0001, "loss": 1.5387, "step": 884128 }, { "epoch": 76.27536231884058, "grad_norm": 0.337245911359787, "learning_rate": 0.0001, "loss": 1.5394, "step": 884184 }, { "epoch": 76.28019323671498, "grad_norm": 0.287562757730484, "learning_rate": 0.0001, "loss": 1.5445, "step": 884240 }, { "epoch": 76.28502415458937, "grad_norm": 0.30816733837127686, "learning_rate": 0.0001, "loss": 1.54, "step": 884296 }, { "epoch": 76.28985507246377, "grad_norm": 1.1384440660476685, "learning_rate": 0.0001, "loss": 1.5428, "step": 884352 }, { "epoch": 76.29468599033817, "grad_norm": 0.30675897002220154, "learning_rate": 0.0001, "loss": 1.5407, "step": 884408 }, { "epoch": 76.29951690821257, "grad_norm": 0.3911447823047638, "learning_rate": 0.0001, "loss": 1.5408, "step": 884464 }, { "epoch": 76.30434782608695, "grad_norm": 0.9115587472915649, "learning_rate": 0.0001, "loss": 1.539, "step": 884520 }, { "epoch": 76.30917874396135, "grad_norm": 0.846619188785553, "learning_rate": 0.0001, "loss": 1.5425, "step": 884576 }, { "epoch": 76.31400966183575, "grad_norm": 0.31057503819465637, "learning_rate": 0.0001, "loss": 1.5411, "step": 884632 }, { "epoch": 76.31884057971014, "grad_norm": 0.8239922523498535, "learning_rate": 0.0001, "loss": 1.5385, "step": 884688 }, { "epoch": 76.32367149758454, "grad_norm": 2.3425867557525635, "learning_rate": 0.0001, "loss": 1.5356, "step": 884744 }, { "epoch": 76.32850241545894, "grad_norm": 0.4647968113422394, "learning_rate": 0.0001, "loss": 1.5465, "step": 884800 }, { "epoch": 76.33333333333333, "grad_norm": 0.23446136713027954, "learning_rate": 0.0001, "loss": 1.5384, "step": 884856 }, { "epoch": 76.33816425120773, "grad_norm": 0.2308267205953598, "learning_rate": 0.0001, "loss": 1.5342, "step": 884912 }, { "epoch": 76.34299516908213, "grad_norm": 0.27309876680374146, "learning_rate": 0.0001, "loss": 1.5371, "step": 884968 }, { "epoch": 76.34782608695652, "grad_norm": 0.26338934898376465, "learning_rate": 0.0001, "loss": 1.543, "step": 885024 }, { "epoch": 76.35265700483092, "grad_norm": 0.25777146220207214, "learning_rate": 0.0001, "loss": 1.5463, "step": 885080 }, { "epoch": 76.35748792270532, "grad_norm": 0.2806294858455658, "learning_rate": 0.0001, "loss": 1.545, "step": 885136 }, { "epoch": 76.3623188405797, "grad_norm": 0.3517148196697235, "learning_rate": 0.0001, "loss": 1.5425, "step": 885192 }, { "epoch": 76.3671497584541, "grad_norm": 0.3098033666610718, "learning_rate": 0.0001, "loss": 1.5439, "step": 885248 }, { "epoch": 76.3719806763285, "grad_norm": 13.87988567352295, "learning_rate": 0.0001, "loss": 1.5412, "step": 885304 }, { "epoch": 76.3768115942029, "grad_norm": 0.22567668557167053, "learning_rate": 0.0001, "loss": 1.5454, "step": 885360 }, { "epoch": 76.38164251207729, "grad_norm": 0.44934800267219543, "learning_rate": 0.0001, "loss": 1.5365, "step": 885416 }, { "epoch": 76.38647342995169, "grad_norm": 0.3803689479827881, "learning_rate": 0.0001, "loss": 1.5452, "step": 885472 }, { "epoch": 76.3913043478261, "grad_norm": 0.30743464827537537, "learning_rate": 0.0001, "loss": 1.5432, "step": 885528 }, { "epoch": 76.39613526570048, "grad_norm": 0.3427988588809967, "learning_rate": 0.0001, "loss": 1.5363, "step": 885584 }, { "epoch": 76.40096618357488, "grad_norm": 1.0735911130905151, "learning_rate": 0.0001, "loss": 1.551, "step": 885640 }, { "epoch": 76.40579710144928, "grad_norm": 0.3005915880203247, "learning_rate": 0.0001, "loss": 1.5445, "step": 885696 }, { "epoch": 76.41062801932367, "grad_norm": 0.38227444887161255, "learning_rate": 0.0001, "loss": 1.5409, "step": 885752 }, { "epoch": 76.41545893719807, "grad_norm": 1.0711023807525635, "learning_rate": 0.0001, "loss": 1.5449, "step": 885808 }, { "epoch": 76.42028985507247, "grad_norm": 0.3207992911338806, "learning_rate": 0.0001, "loss": 1.5437, "step": 885864 }, { "epoch": 76.42512077294685, "grad_norm": 0.3321024775505066, "learning_rate": 0.0001, "loss": 1.5501, "step": 885920 }, { "epoch": 76.42995169082126, "grad_norm": 0.9657902717590332, "learning_rate": 0.0001, "loss": 1.5484, "step": 885976 }, { "epoch": 76.43478260869566, "grad_norm": 0.3105214238166809, "learning_rate": 0.0001, "loss": 1.5453, "step": 886032 }, { "epoch": 76.43961352657004, "grad_norm": 0.34124520421028137, "learning_rate": 0.0001, "loss": 1.5415, "step": 886088 }, { "epoch": 76.44444444444444, "grad_norm": 0.38019201159477234, "learning_rate": 0.0001, "loss": 1.5497, "step": 886144 }, { "epoch": 76.44927536231884, "grad_norm": 0.5216482877731323, "learning_rate": 0.0001, "loss": 1.539, "step": 886200 }, { "epoch": 76.45410628019323, "grad_norm": 0.30478429794311523, "learning_rate": 0.0001, "loss": 1.5393, "step": 886256 }, { "epoch": 76.45893719806763, "grad_norm": 11.684835433959961, "learning_rate": 0.0001, "loss": 1.5402, "step": 886312 }, { "epoch": 76.46376811594203, "grad_norm": 3.400697708129883, "learning_rate": 0.0001, "loss": 1.5469, "step": 886368 }, { "epoch": 76.46859903381643, "grad_norm": 0.9653022885322571, "learning_rate": 0.0001, "loss": 1.5471, "step": 886424 }, { "epoch": 76.47342995169082, "grad_norm": 0.5980508327484131, "learning_rate": 0.0001, "loss": 1.5418, "step": 886480 }, { "epoch": 76.47826086956522, "grad_norm": 1.1004393100738525, "learning_rate": 0.0001, "loss": 1.5504, "step": 886536 }, { "epoch": 76.48309178743962, "grad_norm": 0.3187023997306824, "learning_rate": 0.0001, "loss": 1.5463, "step": 886592 }, { "epoch": 76.487922705314, "grad_norm": 0.26167652010917664, "learning_rate": 0.0001, "loss": 1.5501, "step": 886648 }, { "epoch": 76.4927536231884, "grad_norm": 0.3114655017852783, "learning_rate": 0.0001, "loss": 1.5434, "step": 886704 }, { "epoch": 76.4975845410628, "grad_norm": 0.6821562647819519, "learning_rate": 0.0001, "loss": 1.5517, "step": 886760 }, { "epoch": 76.5024154589372, "grad_norm": 1.050826907157898, "learning_rate": 0.0001, "loss": 1.5435, "step": 886816 }, { "epoch": 76.5072463768116, "grad_norm": 0.8368690609931946, "learning_rate": 0.0001, "loss": 1.5347, "step": 886872 }, { "epoch": 76.512077294686, "grad_norm": 0.6036837100982666, "learning_rate": 0.0001, "loss": 1.5443, "step": 886928 }, { "epoch": 76.51690821256038, "grad_norm": 0.3135110139846802, "learning_rate": 0.0001, "loss": 1.5504, "step": 886984 }, { "epoch": 76.52173913043478, "grad_norm": 0.590304434299469, "learning_rate": 0.0001, "loss": 1.5486, "step": 887040 }, { "epoch": 76.52657004830918, "grad_norm": 0.27466315031051636, "learning_rate": 0.0001, "loss": 1.551, "step": 887096 }, { "epoch": 76.53140096618357, "grad_norm": 0.30429109930992126, "learning_rate": 0.0001, "loss": 1.5431, "step": 887152 }, { "epoch": 76.53623188405797, "grad_norm": 0.844764769077301, "learning_rate": 0.0001, "loss": 1.5381, "step": 887208 }, { "epoch": 76.54106280193237, "grad_norm": 0.5797962546348572, "learning_rate": 0.0001, "loss": 1.5419, "step": 887264 }, { "epoch": 76.54589371980677, "grad_norm": 0.2486310750246048, "learning_rate": 0.0001, "loss": 1.5432, "step": 887320 }, { "epoch": 76.55072463768116, "grad_norm": 0.33988359570503235, "learning_rate": 0.0001, "loss": 1.5454, "step": 887376 }, { "epoch": 76.55555555555556, "grad_norm": 0.35853761434555054, "learning_rate": 0.0001, "loss": 1.5422, "step": 887432 }, { "epoch": 76.56038647342996, "grad_norm": 0.28681501746177673, "learning_rate": 0.0001, "loss": 1.5443, "step": 887488 }, { "epoch": 76.56521739130434, "grad_norm": 0.43025267124176025, "learning_rate": 0.0001, "loss": 1.5441, "step": 887544 }, { "epoch": 76.57004830917874, "grad_norm": 0.3586062490940094, "learning_rate": 0.0001, "loss": 1.5418, "step": 887600 }, { "epoch": 76.57487922705315, "grad_norm": 0.27319589257240295, "learning_rate": 0.0001, "loss": 1.5503, "step": 887656 }, { "epoch": 76.57971014492753, "grad_norm": 0.34221699833869934, "learning_rate": 0.0001, "loss": 1.5393, "step": 887712 }, { "epoch": 76.58454106280193, "grad_norm": 0.26851987838745117, "learning_rate": 0.0001, "loss": 1.5431, "step": 887768 }, { "epoch": 76.58937198067633, "grad_norm": 0.303510457277298, "learning_rate": 0.0001, "loss": 1.5547, "step": 887824 }, { "epoch": 76.59420289855072, "grad_norm": 0.3159205913543701, "learning_rate": 0.0001, "loss": 1.5518, "step": 887880 }, { "epoch": 76.59903381642512, "grad_norm": 16.154712677001953, "learning_rate": 0.0001, "loss": 1.5429, "step": 887936 }, { "epoch": 76.60386473429952, "grad_norm": 0.27912071347236633, "learning_rate": 0.0001, "loss": 1.547, "step": 887992 }, { "epoch": 76.6086956521739, "grad_norm": 0.28424662351608276, "learning_rate": 0.0001, "loss": 1.546, "step": 888048 }, { "epoch": 76.61352657004831, "grad_norm": 0.34707069396972656, "learning_rate": 0.0001, "loss": 1.5499, "step": 888104 }, { "epoch": 76.61835748792271, "grad_norm": 18.83921241760254, "learning_rate": 0.0001, "loss": 1.5463, "step": 888160 }, { "epoch": 76.6231884057971, "grad_norm": 0.23330742120742798, "learning_rate": 0.0001, "loss": 1.5535, "step": 888216 }, { "epoch": 76.6280193236715, "grad_norm": 0.3328731060028076, "learning_rate": 0.0001, "loss": 1.5493, "step": 888272 }, { "epoch": 76.6328502415459, "grad_norm": 0.2663881182670593, "learning_rate": 0.0001, "loss": 1.5454, "step": 888328 }, { "epoch": 76.6376811594203, "grad_norm": 0.29217568039894104, "learning_rate": 0.0001, "loss": 1.5487, "step": 888384 }, { "epoch": 76.64251207729468, "grad_norm": 0.30596697330474854, "learning_rate": 0.0001, "loss": 1.549, "step": 888440 }, { "epoch": 76.64734299516908, "grad_norm": 0.6659618616104126, "learning_rate": 0.0001, "loss": 1.5445, "step": 888496 }, { "epoch": 76.65217391304348, "grad_norm": 17.874103546142578, "learning_rate": 0.0001, "loss": 1.5425, "step": 888552 }, { "epoch": 76.65700483091787, "grad_norm": 0.28543412685394287, "learning_rate": 0.0001, "loss": 1.5488, "step": 888608 }, { "epoch": 76.66183574879227, "grad_norm": 0.3802967667579651, "learning_rate": 0.0001, "loss": 1.5435, "step": 888664 }, { "epoch": 76.66666666666667, "grad_norm": 0.3275357186794281, "learning_rate": 0.0001, "loss": 1.5429, "step": 888720 }, { "epoch": 76.67149758454106, "grad_norm": 0.2737198770046234, "learning_rate": 0.0001, "loss": 1.5415, "step": 888776 }, { "epoch": 76.67632850241546, "grad_norm": 0.2852764129638672, "learning_rate": 0.0001, "loss": 1.547, "step": 888832 }, { "epoch": 76.68115942028986, "grad_norm": 6.191061496734619, "learning_rate": 0.0001, "loss": 1.5479, "step": 888888 }, { "epoch": 76.68599033816425, "grad_norm": 0.36777907609939575, "learning_rate": 0.0001, "loss": 1.5426, "step": 888944 }, { "epoch": 76.69082125603865, "grad_norm": 0.2721315026283264, "learning_rate": 0.0001, "loss": 1.5494, "step": 889000 }, { "epoch": 76.69565217391305, "grad_norm": 0.4133915305137634, "learning_rate": 0.0001, "loss": 1.5416, "step": 889056 }, { "epoch": 76.70048309178743, "grad_norm": 0.28051525354385376, "learning_rate": 0.0001, "loss": 1.5451, "step": 889112 }, { "epoch": 76.70531400966183, "grad_norm": 0.2915217876434326, "learning_rate": 0.0001, "loss": 1.5444, "step": 889168 }, { "epoch": 76.71014492753623, "grad_norm": 0.8325273990631104, "learning_rate": 0.0001, "loss": 1.5458, "step": 889224 }, { "epoch": 76.71497584541063, "grad_norm": 0.26914194226264954, "learning_rate": 0.0001, "loss": 1.5475, "step": 889280 }, { "epoch": 76.71980676328502, "grad_norm": 0.3193332850933075, "learning_rate": 0.0001, "loss": 1.5401, "step": 889336 }, { "epoch": 76.72463768115942, "grad_norm": 1.004360318183899, "learning_rate": 0.0001, "loss": 1.5447, "step": 889392 }, { "epoch": 76.72946859903382, "grad_norm": 0.3334895670413971, "learning_rate": 0.0001, "loss": 1.5467, "step": 889448 }, { "epoch": 76.73429951690821, "grad_norm": 0.35001251101493835, "learning_rate": 0.0001, "loss": 1.5456, "step": 889504 }, { "epoch": 76.73913043478261, "grad_norm": 0.7158212065696716, "learning_rate": 0.0001, "loss": 1.5466, "step": 889560 }, { "epoch": 76.74396135265701, "grad_norm": 0.4624361991882324, "learning_rate": 0.0001, "loss": 1.5469, "step": 889616 }, { "epoch": 76.7487922705314, "grad_norm": 0.31355029344558716, "learning_rate": 0.0001, "loss": 1.5425, "step": 889672 }, { "epoch": 76.7536231884058, "grad_norm": 1.2393286228179932, "learning_rate": 0.0001, "loss": 1.5439, "step": 889728 }, { "epoch": 76.7584541062802, "grad_norm": 0.3503963351249695, "learning_rate": 0.0001, "loss": 1.5448, "step": 889784 }, { "epoch": 76.76328502415458, "grad_norm": 0.2926509380340576, "learning_rate": 0.0001, "loss": 1.5448, "step": 889840 }, { "epoch": 76.76811594202898, "grad_norm": 0.26317471265792847, "learning_rate": 0.0001, "loss": 1.5509, "step": 889896 }, { "epoch": 76.77294685990339, "grad_norm": 0.26333940029144287, "learning_rate": 0.0001, "loss": 1.5431, "step": 889952 }, { "epoch": 76.77777777777777, "grad_norm": 0.3144097924232483, "learning_rate": 0.0001, "loss": 1.5424, "step": 890008 }, { "epoch": 76.78260869565217, "grad_norm": 40.33753204345703, "learning_rate": 0.0001, "loss": 1.541, "step": 890064 }, { "epoch": 76.78743961352657, "grad_norm": 0.24389594793319702, "learning_rate": 0.0001, "loss": 1.5418, "step": 890120 }, { "epoch": 76.79227053140096, "grad_norm": 0.33138084411621094, "learning_rate": 0.0001, "loss": 1.5407, "step": 890176 }, { "epoch": 76.79710144927536, "grad_norm": 0.3146216571331024, "learning_rate": 0.0001, "loss": 1.5478, "step": 890232 }, { "epoch": 76.80193236714976, "grad_norm": 0.40616732835769653, "learning_rate": 0.0001, "loss": 1.5457, "step": 890288 }, { "epoch": 76.80676328502416, "grad_norm": 0.29346582293510437, "learning_rate": 0.0001, "loss": 1.5463, "step": 890344 }, { "epoch": 76.81159420289855, "grad_norm": 0.35836803913116455, "learning_rate": 0.0001, "loss": 1.5511, "step": 890400 }, { "epoch": 76.81642512077295, "grad_norm": 0.249020054936409, "learning_rate": 0.0001, "loss": 1.5484, "step": 890456 }, { "epoch": 76.82125603864735, "grad_norm": 0.30458930134773254, "learning_rate": 0.0001, "loss": 1.5458, "step": 890512 }, { "epoch": 76.82608695652173, "grad_norm": 1.9965858459472656, "learning_rate": 0.0001, "loss": 1.5488, "step": 890568 }, { "epoch": 76.83091787439614, "grad_norm": 1.5120283365249634, "learning_rate": 0.0001, "loss": 1.5488, "step": 890624 }, { "epoch": 76.83574879227054, "grad_norm": 0.3128375709056854, "learning_rate": 0.0001, "loss": 1.5434, "step": 890680 }, { "epoch": 76.84057971014492, "grad_norm": 0.4499654173851013, "learning_rate": 0.0001, "loss": 1.5428, "step": 890736 }, { "epoch": 76.84541062801932, "grad_norm": 0.30135607719421387, "learning_rate": 0.0001, "loss": 1.5465, "step": 890792 }, { "epoch": 76.85024154589372, "grad_norm": 0.2562079131603241, "learning_rate": 0.0001, "loss": 1.5468, "step": 890848 }, { "epoch": 76.85507246376811, "grad_norm": 0.8505415320396423, "learning_rate": 0.0001, "loss": 1.5463, "step": 890904 }, { "epoch": 76.85990338164251, "grad_norm": 0.2893083691596985, "learning_rate": 0.0001, "loss": 1.539, "step": 890960 }, { "epoch": 76.86473429951691, "grad_norm": 0.23727762699127197, "learning_rate": 0.0001, "loss": 1.5438, "step": 891016 }, { "epoch": 76.8695652173913, "grad_norm": 0.4809964597225189, "learning_rate": 0.0001, "loss": 1.5511, "step": 891072 }, { "epoch": 76.8743961352657, "grad_norm": 0.3172372579574585, "learning_rate": 0.0001, "loss": 1.5452, "step": 891128 }, { "epoch": 76.8792270531401, "grad_norm": 0.6929073333740234, "learning_rate": 0.0001, "loss": 1.5467, "step": 891184 }, { "epoch": 76.8840579710145, "grad_norm": 0.3517664968967438, "learning_rate": 0.0001, "loss": 1.5486, "step": 891240 }, { "epoch": 76.88888888888889, "grad_norm": 1.1323614120483398, "learning_rate": 0.0001, "loss": 1.5453, "step": 891296 }, { "epoch": 76.89371980676329, "grad_norm": 0.3235151469707489, "learning_rate": 0.0001, "loss": 1.5442, "step": 891352 }, { "epoch": 76.89855072463769, "grad_norm": 0.469760924577713, "learning_rate": 0.0001, "loss": 1.542, "step": 891408 }, { "epoch": 76.90338164251207, "grad_norm": 0.5204635858535767, "learning_rate": 0.0001, "loss": 1.548, "step": 891464 }, { "epoch": 76.90821256038647, "grad_norm": 0.3171195983886719, "learning_rate": 0.0001, "loss": 1.5489, "step": 891520 }, { "epoch": 76.91304347826087, "grad_norm": 0.24541881680488586, "learning_rate": 0.0001, "loss": 1.5453, "step": 891576 }, { "epoch": 76.91787439613526, "grad_norm": 0.3575242757797241, "learning_rate": 0.0001, "loss": 1.5499, "step": 891632 }, { "epoch": 76.92270531400966, "grad_norm": 0.2769855558872223, "learning_rate": 0.0001, "loss": 1.5532, "step": 891688 }, { "epoch": 76.92753623188406, "grad_norm": 0.3400444984436035, "learning_rate": 0.0001, "loss": 1.5424, "step": 891744 }, { "epoch": 76.93236714975845, "grad_norm": 0.31047841906547546, "learning_rate": 0.0001, "loss": 1.5428, "step": 891800 }, { "epoch": 76.93719806763285, "grad_norm": 0.25483885407447815, "learning_rate": 0.0001, "loss": 1.5474, "step": 891856 }, { "epoch": 76.94202898550725, "grad_norm": 0.32835453748703003, "learning_rate": 0.0001, "loss": 1.5481, "step": 891912 }, { "epoch": 76.94685990338164, "grad_norm": 0.2653012275695801, "learning_rate": 0.0001, "loss": 1.5495, "step": 891968 }, { "epoch": 76.95169082125604, "grad_norm": 0.28780636191368103, "learning_rate": 0.0001, "loss": 1.5508, "step": 892024 }, { "epoch": 76.95652173913044, "grad_norm": 0.32467854022979736, "learning_rate": 0.0001, "loss": 1.5476, "step": 892080 }, { "epoch": 76.96135265700484, "grad_norm": 1.2277270555496216, "learning_rate": 0.0001, "loss": 1.5486, "step": 892136 }, { "epoch": 76.96618357487922, "grad_norm": 0.3015211820602417, "learning_rate": 0.0001, "loss": 1.5468, "step": 892192 }, { "epoch": 76.97101449275362, "grad_norm": 0.3613453507423401, "learning_rate": 0.0001, "loss": 1.548, "step": 892248 }, { "epoch": 76.97584541062803, "grad_norm": 1.0194636583328247, "learning_rate": 0.0001, "loss": 1.5512, "step": 892304 }, { "epoch": 76.98067632850241, "grad_norm": 0.32174497842788696, "learning_rate": 0.0001, "loss": 1.5514, "step": 892360 }, { "epoch": 76.98550724637681, "grad_norm": 0.2283664047718048, "learning_rate": 0.0001, "loss": 1.5467, "step": 892416 }, { "epoch": 76.99033816425121, "grad_norm": 0.24132594466209412, "learning_rate": 0.0001, "loss": 1.5469, "step": 892472 }, { "epoch": 76.9951690821256, "grad_norm": 0.3979596495628357, "learning_rate": 0.0001, "loss": 1.5444, "step": 892528 }, { "epoch": 77.0, "grad_norm": 2.8958818912506104, "learning_rate": 0.0001, "loss": 1.5502, "step": 892584 }, { "epoch": 77.0048309178744, "grad_norm": 0.33290717005729675, "learning_rate": 0.0001, "loss": 1.5347, "step": 892640 }, { "epoch": 77.00966183574879, "grad_norm": 0.31586819887161255, "learning_rate": 0.0001, "loss": 1.5379, "step": 892696 }, { "epoch": 77.01449275362319, "grad_norm": 0.29623135924339294, "learning_rate": 0.0001, "loss": 1.5357, "step": 892752 }, { "epoch": 77.01932367149759, "grad_norm": 0.2562268078327179, "learning_rate": 0.0001, "loss": 1.5356, "step": 892808 }, { "epoch": 77.02415458937197, "grad_norm": 3.1662535667419434, "learning_rate": 0.0001, "loss": 1.5414, "step": 892864 }, { "epoch": 77.02898550724638, "grad_norm": 0.30531272292137146, "learning_rate": 0.0001, "loss": 1.5365, "step": 892920 }, { "epoch": 77.03381642512078, "grad_norm": 2.9569036960601807, "learning_rate": 0.0001, "loss": 1.5465, "step": 892976 }, { "epoch": 77.03864734299516, "grad_norm": 0.27660515904426575, "learning_rate": 0.0001, "loss": 1.5352, "step": 893032 }, { "epoch": 77.04347826086956, "grad_norm": 0.33799660205841064, "learning_rate": 0.0001, "loss": 1.5385, "step": 893088 }, { "epoch": 77.04830917874396, "grad_norm": 0.5071626305580139, "learning_rate": 0.0001, "loss": 1.5414, "step": 893144 }, { "epoch": 77.05314009661836, "grad_norm": 23.24966049194336, "learning_rate": 0.0001, "loss": 1.5458, "step": 893200 }, { "epoch": 77.05797101449275, "grad_norm": 3.375088691711426, "learning_rate": 0.0001, "loss": 1.5397, "step": 893256 }, { "epoch": 77.06280193236715, "grad_norm": 0.43373677134513855, "learning_rate": 0.0001, "loss": 1.5369, "step": 893312 }, { "epoch": 77.06763285024155, "grad_norm": 4.1627302169799805, "learning_rate": 0.0001, "loss": 1.5376, "step": 893368 }, { "epoch": 77.07246376811594, "grad_norm": 0.2615315318107605, "learning_rate": 0.0001, "loss": 1.5332, "step": 893424 }, { "epoch": 77.07729468599034, "grad_norm": 0.3285675644874573, "learning_rate": 0.0001, "loss": 1.5361, "step": 893480 }, { "epoch": 77.08212560386474, "grad_norm": 0.28919339179992676, "learning_rate": 0.0001, "loss": 1.5306, "step": 893536 }, { "epoch": 77.08695652173913, "grad_norm": 0.32138869166374207, "learning_rate": 0.0001, "loss": 1.5335, "step": 893592 }, { "epoch": 77.09178743961353, "grad_norm": 0.3273879587650299, "learning_rate": 0.0001, "loss": 1.5408, "step": 893648 }, { "epoch": 77.09661835748793, "grad_norm": 0.2722780704498291, "learning_rate": 0.0001, "loss": 1.5441, "step": 893704 }, { "epoch": 77.10144927536231, "grad_norm": 0.3228033483028412, "learning_rate": 0.0001, "loss": 1.5422, "step": 893760 }, { "epoch": 77.10628019323671, "grad_norm": 0.24491305649280548, "learning_rate": 0.0001, "loss": 1.5438, "step": 893816 }, { "epoch": 77.11111111111111, "grad_norm": 1.6466772556304932, "learning_rate": 0.0001, "loss": 1.5387, "step": 893872 }, { "epoch": 77.1159420289855, "grad_norm": 2.0251705646514893, "learning_rate": 0.0001, "loss": 1.5446, "step": 893928 }, { "epoch": 77.1207729468599, "grad_norm": 0.28950557112693787, "learning_rate": 0.0001, "loss": 1.5456, "step": 893984 }, { "epoch": 77.1256038647343, "grad_norm": 0.3835700452327728, "learning_rate": 0.0001, "loss": 1.5358, "step": 894040 }, { "epoch": 77.1304347826087, "grad_norm": 0.26751142740249634, "learning_rate": 0.0001, "loss": 1.5392, "step": 894096 }, { "epoch": 77.13526570048309, "grad_norm": 0.2468877136707306, "learning_rate": 0.0001, "loss": 1.5438, "step": 894152 }, { "epoch": 77.14009661835749, "grad_norm": 0.2901710569858551, "learning_rate": 0.0001, "loss": 1.542, "step": 894208 }, { "epoch": 77.14492753623189, "grad_norm": 0.27336329221725464, "learning_rate": 0.0001, "loss": 1.5446, "step": 894264 }, { "epoch": 77.14975845410628, "grad_norm": 0.24589508771896362, "learning_rate": 0.0001, "loss": 1.5498, "step": 894320 }, { "epoch": 77.15458937198068, "grad_norm": 0.3376100957393646, "learning_rate": 0.0001, "loss": 1.5347, "step": 894376 }, { "epoch": 77.15942028985508, "grad_norm": 0.2789279818534851, "learning_rate": 0.0001, "loss": 1.5422, "step": 894432 }, { "epoch": 77.16425120772946, "grad_norm": 0.4849938154220581, "learning_rate": 0.0001, "loss": 1.539, "step": 894488 }, { "epoch": 77.16908212560386, "grad_norm": 0.35239601135253906, "learning_rate": 0.0001, "loss": 1.542, "step": 894544 }, { "epoch": 77.17391304347827, "grad_norm": 1.4299936294555664, "learning_rate": 0.0001, "loss": 1.5371, "step": 894600 }, { "epoch": 77.17874396135265, "grad_norm": 0.3254808187484741, "learning_rate": 0.0001, "loss": 1.5409, "step": 894656 }, { "epoch": 77.18357487922705, "grad_norm": 1.275976300239563, "learning_rate": 0.0001, "loss": 1.5368, "step": 894712 }, { "epoch": 77.18840579710145, "grad_norm": 1.67021906375885, "learning_rate": 0.0001, "loss": 1.5429, "step": 894768 }, { "epoch": 77.19323671497584, "grad_norm": 0.27209797501564026, "learning_rate": 0.0001, "loss": 1.5377, "step": 894824 }, { "epoch": 77.19806763285024, "grad_norm": 0.552079975605011, "learning_rate": 0.0001, "loss": 1.5439, "step": 894880 }, { "epoch": 77.20289855072464, "grad_norm": 0.3377586305141449, "learning_rate": 0.0001, "loss": 1.5384, "step": 894936 }, { "epoch": 77.20772946859903, "grad_norm": 0.31091514229774475, "learning_rate": 0.0001, "loss": 1.5419, "step": 894992 }, { "epoch": 77.21256038647343, "grad_norm": 20.485462188720703, "learning_rate": 0.0001, "loss": 1.5381, "step": 895048 }, { "epoch": 77.21739130434783, "grad_norm": 0.5028026103973389, "learning_rate": 0.0001, "loss": 1.5431, "step": 895104 }, { "epoch": 77.22222222222223, "grad_norm": 29.418418884277344, "learning_rate": 0.0001, "loss": 1.5423, "step": 895160 }, { "epoch": 77.22705314009661, "grad_norm": 0.2757616937160492, "learning_rate": 0.0001, "loss": 1.5447, "step": 895216 }, { "epoch": 77.23188405797102, "grad_norm": 0.529567539691925, "learning_rate": 0.0001, "loss": 1.5465, "step": 895272 }, { "epoch": 77.23671497584542, "grad_norm": 0.27486392855644226, "learning_rate": 0.0001, "loss": 1.5409, "step": 895328 }, { "epoch": 77.2415458937198, "grad_norm": 0.32313814759254456, "learning_rate": 0.0001, "loss": 1.5309, "step": 895384 }, { "epoch": 77.2463768115942, "grad_norm": 0.2878094017505646, "learning_rate": 0.0001, "loss": 1.5369, "step": 895440 }, { "epoch": 77.2512077294686, "grad_norm": 0.29680588841438293, "learning_rate": 0.0001, "loss": 1.5394, "step": 895496 }, { "epoch": 77.25603864734299, "grad_norm": 0.42956483364105225, "learning_rate": 0.0001, "loss": 1.5412, "step": 895552 }, { "epoch": 77.26086956521739, "grad_norm": 0.28790903091430664, "learning_rate": 0.0001, "loss": 1.5393, "step": 895608 }, { "epoch": 77.26570048309179, "grad_norm": 0.26590147614479065, "learning_rate": 0.0001, "loss": 1.5428, "step": 895664 }, { "epoch": 77.27053140096618, "grad_norm": 0.25076979398727417, "learning_rate": 0.0001, "loss": 1.5369, "step": 895720 }, { "epoch": 77.27536231884058, "grad_norm": 0.40829506516456604, "learning_rate": 0.0001, "loss": 1.5481, "step": 895776 }, { "epoch": 77.28019323671498, "grad_norm": 0.6188421845436096, "learning_rate": 0.0001, "loss": 1.5448, "step": 895832 }, { "epoch": 77.28502415458937, "grad_norm": 2.6184794902801514, "learning_rate": 0.0001, "loss": 1.5433, "step": 895888 }, { "epoch": 77.28985507246377, "grad_norm": 0.44290241599082947, "learning_rate": 0.0001, "loss": 1.5506, "step": 895944 }, { "epoch": 77.29468599033817, "grad_norm": 0.2824206054210663, "learning_rate": 0.0001, "loss": 1.5448, "step": 896000 }, { "epoch": 77.29951690821257, "grad_norm": 0.46977439522743225, "learning_rate": 0.0001, "loss": 1.5458, "step": 896056 }, { "epoch": 77.30434782608695, "grad_norm": 0.41024237871170044, "learning_rate": 0.0001, "loss": 1.5462, "step": 896112 }, { "epoch": 77.30917874396135, "grad_norm": 0.2643563449382782, "learning_rate": 0.0001, "loss": 1.5502, "step": 896168 }, { "epoch": 77.31400966183575, "grad_norm": 0.3695909380912781, "learning_rate": 0.0001, "loss": 1.5456, "step": 896224 }, { "epoch": 77.31884057971014, "grad_norm": 0.4043790102005005, "learning_rate": 0.0001, "loss": 1.5451, "step": 896280 }, { "epoch": 77.32367149758454, "grad_norm": 0.5640071630477905, "learning_rate": 0.0001, "loss": 1.5472, "step": 896336 }, { "epoch": 77.32850241545894, "grad_norm": 0.3174602687358856, "learning_rate": 0.0001, "loss": 1.5393, "step": 896392 }, { "epoch": 77.33333333333333, "grad_norm": 0.2925054728984833, "learning_rate": 0.0001, "loss": 1.5437, "step": 896448 }, { "epoch": 77.33816425120773, "grad_norm": 4.581953048706055, "learning_rate": 0.0001, "loss": 1.5379, "step": 896504 }, { "epoch": 77.34299516908213, "grad_norm": 2.0670437812805176, "learning_rate": 0.0001, "loss": 1.5484, "step": 896560 }, { "epoch": 77.34782608695652, "grad_norm": 0.21982458233833313, "learning_rate": 0.0001, "loss": 1.5391, "step": 896616 }, { "epoch": 77.35265700483092, "grad_norm": 0.2862972915172577, "learning_rate": 0.0001, "loss": 1.5422, "step": 896672 }, { "epoch": 77.35748792270532, "grad_norm": 0.34445539116859436, "learning_rate": 0.0001, "loss": 1.5469, "step": 896728 }, { "epoch": 77.3623188405797, "grad_norm": 1.2311981916427612, "learning_rate": 0.0001, "loss": 1.5421, "step": 896784 }, { "epoch": 77.3671497584541, "grad_norm": 1.1050893068313599, "learning_rate": 0.0001, "loss": 1.5435, "step": 896840 }, { "epoch": 77.3719806763285, "grad_norm": 0.4309213161468506, "learning_rate": 0.0001, "loss": 1.5408, "step": 896896 }, { "epoch": 77.3768115942029, "grad_norm": 0.38114696741104126, "learning_rate": 0.0001, "loss": 1.5362, "step": 896952 }, { "epoch": 77.38164251207729, "grad_norm": 0.3538317382335663, "learning_rate": 0.0001, "loss": 1.5374, "step": 897008 }, { "epoch": 77.38647342995169, "grad_norm": 0.3507722318172455, "learning_rate": 0.0001, "loss": 1.5462, "step": 897064 }, { "epoch": 77.3913043478261, "grad_norm": 1.0935847759246826, "learning_rate": 0.0001, "loss": 1.5403, "step": 897120 }, { "epoch": 77.39613526570048, "grad_norm": 0.32229092717170715, "learning_rate": 0.0001, "loss": 1.5439, "step": 897176 }, { "epoch": 77.40096618357488, "grad_norm": 0.24583016335964203, "learning_rate": 0.0001, "loss": 1.541, "step": 897232 }, { "epoch": 77.40579710144928, "grad_norm": 3.3605504035949707, "learning_rate": 0.0001, "loss": 1.5521, "step": 897288 }, { "epoch": 77.41062801932367, "grad_norm": 0.32950589060783386, "learning_rate": 0.0001, "loss": 1.5397, "step": 897344 }, { "epoch": 77.41545893719807, "grad_norm": 60.00363540649414, "learning_rate": 0.0001, "loss": 1.5399, "step": 897400 }, { "epoch": 77.42028985507247, "grad_norm": 0.4006808400154114, "learning_rate": 0.0001, "loss": 1.5456, "step": 897456 }, { "epoch": 77.42512077294685, "grad_norm": 0.3252699077129364, "learning_rate": 0.0001, "loss": 1.5487, "step": 897512 }, { "epoch": 77.42995169082126, "grad_norm": 0.24723169207572937, "learning_rate": 0.0001, "loss": 1.552, "step": 897568 }, { "epoch": 77.43478260869566, "grad_norm": 0.2695583999156952, "learning_rate": 0.0001, "loss": 1.5468, "step": 897624 }, { "epoch": 77.43961352657004, "grad_norm": 0.30578914284706116, "learning_rate": 0.0001, "loss": 1.5412, "step": 897680 }, { "epoch": 77.44444444444444, "grad_norm": 0.23076440393924713, "learning_rate": 0.0001, "loss": 1.5486, "step": 897736 }, { "epoch": 77.44927536231884, "grad_norm": 0.4009498059749603, "learning_rate": 0.0001, "loss": 1.5467, "step": 897792 }, { "epoch": 77.45410628019323, "grad_norm": 0.26550033688545227, "learning_rate": 0.0001, "loss": 1.5437, "step": 897848 }, { "epoch": 77.45893719806763, "grad_norm": 0.30349400639533997, "learning_rate": 0.0001, "loss": 1.5432, "step": 897904 }, { "epoch": 77.46376811594203, "grad_norm": 0.28720518946647644, "learning_rate": 0.0001, "loss": 1.5416, "step": 897960 }, { "epoch": 77.46859903381643, "grad_norm": 8.202338218688965, "learning_rate": 0.0001, "loss": 1.5415, "step": 898016 }, { "epoch": 77.47342995169082, "grad_norm": 0.283584326505661, "learning_rate": 0.0001, "loss": 1.5452, "step": 898072 }, { "epoch": 77.47826086956522, "grad_norm": 0.33084040880203247, "learning_rate": 0.0001, "loss": 1.5414, "step": 898128 }, { "epoch": 77.48309178743962, "grad_norm": 0.3759298622608185, "learning_rate": 0.0001, "loss": 1.5509, "step": 898184 }, { "epoch": 77.487922705314, "grad_norm": 0.333644837141037, "learning_rate": 0.0001, "loss": 1.5418, "step": 898240 }, { "epoch": 77.4927536231884, "grad_norm": 0.40551045536994934, "learning_rate": 0.0001, "loss": 1.5542, "step": 898296 }, { "epoch": 77.4975845410628, "grad_norm": 1.3935898542404175, "learning_rate": 0.0001, "loss": 1.5465, "step": 898352 }, { "epoch": 77.5024154589372, "grad_norm": 0.3350406587123871, "learning_rate": 0.0001, "loss": 1.5402, "step": 898408 }, { "epoch": 77.5072463768116, "grad_norm": 0.3032197952270508, "learning_rate": 0.0001, "loss": 1.5438, "step": 898464 }, { "epoch": 77.512077294686, "grad_norm": 0.4709841012954712, "learning_rate": 0.0001, "loss": 1.5452, "step": 898520 }, { "epoch": 77.51690821256038, "grad_norm": 0.3427942991256714, "learning_rate": 0.0001, "loss": 1.5423, "step": 898576 }, { "epoch": 77.52173913043478, "grad_norm": 0.3285948932170868, "learning_rate": 0.0001, "loss": 1.5403, "step": 898632 }, { "epoch": 77.52657004830918, "grad_norm": 0.27721989154815674, "learning_rate": 0.0001, "loss": 1.5429, "step": 898688 }, { "epoch": 77.53140096618357, "grad_norm": 0.5090702176094055, "learning_rate": 0.0001, "loss": 1.5441, "step": 898744 }, { "epoch": 77.53623188405797, "grad_norm": 0.31896206736564636, "learning_rate": 0.0001, "loss": 1.5534, "step": 898800 }, { "epoch": 77.54106280193237, "grad_norm": 7.796575546264648, "learning_rate": 0.0001, "loss": 1.549, "step": 898856 }, { "epoch": 77.54589371980677, "grad_norm": 0.28479912877082825, "learning_rate": 0.0001, "loss": 1.5447, "step": 898912 }, { "epoch": 77.55072463768116, "grad_norm": 0.33950695395469666, "learning_rate": 0.0001, "loss": 1.5502, "step": 898968 }, { "epoch": 77.55555555555556, "grad_norm": 0.32679060101509094, "learning_rate": 0.0001, "loss": 1.5429, "step": 899024 }, { "epoch": 77.56038647342996, "grad_norm": 0.3070855140686035, "learning_rate": 0.0001, "loss": 1.5498, "step": 899080 }, { "epoch": 77.56521739130434, "grad_norm": 5.622460842132568, "learning_rate": 0.0001, "loss": 1.5468, "step": 899136 }, { "epoch": 77.57004830917874, "grad_norm": 1.3780114650726318, "learning_rate": 0.0001, "loss": 1.5459, "step": 899192 }, { "epoch": 77.57487922705315, "grad_norm": 0.25802090764045715, "learning_rate": 0.0001, "loss": 1.544, "step": 899248 }, { "epoch": 77.57971014492753, "grad_norm": 0.2651505470275879, "learning_rate": 0.0001, "loss": 1.5465, "step": 899304 }, { "epoch": 77.58454106280193, "grad_norm": 0.26064079999923706, "learning_rate": 0.0001, "loss": 1.5448, "step": 899360 }, { "epoch": 77.58937198067633, "grad_norm": 0.3571234941482544, "learning_rate": 0.0001, "loss": 1.5451, "step": 899416 }, { "epoch": 77.59420289855072, "grad_norm": 6.9121832847595215, "learning_rate": 0.0001, "loss": 1.5451, "step": 899472 }, { "epoch": 77.59903381642512, "grad_norm": 0.28921788930892944, "learning_rate": 0.0001, "loss": 1.5466, "step": 899528 }, { "epoch": 77.60386473429952, "grad_norm": 0.6692693829536438, "learning_rate": 0.0001, "loss": 1.5431, "step": 899584 }, { "epoch": 77.6086956521739, "grad_norm": 0.6374149322509766, "learning_rate": 0.0001, "loss": 1.5404, "step": 899640 }, { "epoch": 77.61352657004831, "grad_norm": 27.365150451660156, "learning_rate": 0.0001, "loss": 1.5512, "step": 899696 }, { "epoch": 77.61835748792271, "grad_norm": 0.3343762457370758, "learning_rate": 0.0001, "loss": 1.5427, "step": 899752 }, { "epoch": 77.6231884057971, "grad_norm": 0.32328835129737854, "learning_rate": 0.0001, "loss": 1.5456, "step": 899808 }, { "epoch": 77.6280193236715, "grad_norm": 0.26397332549095154, "learning_rate": 0.0001, "loss": 1.5446, "step": 899864 }, { "epoch": 77.6328502415459, "grad_norm": 0.582588791847229, "learning_rate": 0.0001, "loss": 1.5483, "step": 899920 }, { "epoch": 77.6376811594203, "grad_norm": 0.2605839669704437, "learning_rate": 0.0001, "loss": 1.539, "step": 899976 }, { "epoch": 77.64251207729468, "grad_norm": 0.28575390577316284, "learning_rate": 0.0001, "loss": 1.5427, "step": 900032 }, { "epoch": 77.64734299516908, "grad_norm": 0.2592625617980957, "learning_rate": 0.0001, "loss": 1.5434, "step": 900088 }, { "epoch": 77.65217391304348, "grad_norm": 1.5215011835098267, "learning_rate": 0.0001, "loss": 1.5461, "step": 900144 }, { "epoch": 77.65700483091787, "grad_norm": 2.8097610473632812, "learning_rate": 0.0001, "loss": 1.5498, "step": 900200 }, { "epoch": 77.66183574879227, "grad_norm": 0.26549577713012695, "learning_rate": 0.0001, "loss": 1.5454, "step": 900256 }, { "epoch": 77.66666666666667, "grad_norm": 0.3161364793777466, "learning_rate": 0.0001, "loss": 1.5385, "step": 900312 }, { "epoch": 77.67149758454106, "grad_norm": 0.26924729347229004, "learning_rate": 0.0001, "loss": 1.5407, "step": 900368 }, { "epoch": 77.67632850241546, "grad_norm": 0.3326740860939026, "learning_rate": 0.0001, "loss": 1.5441, "step": 900424 }, { "epoch": 77.68115942028986, "grad_norm": 2.7808287143707275, "learning_rate": 0.0001, "loss": 1.548, "step": 900480 }, { "epoch": 77.68599033816425, "grad_norm": 0.2711746096611023, "learning_rate": 0.0001, "loss": 1.5474, "step": 900536 }, { "epoch": 77.69082125603865, "grad_norm": 0.3349229097366333, "learning_rate": 0.0001, "loss": 1.5485, "step": 900592 }, { "epoch": 77.69565217391305, "grad_norm": 0.34686651825904846, "learning_rate": 0.0001, "loss": 1.5505, "step": 900648 }, { "epoch": 77.70048309178743, "grad_norm": 0.30728501081466675, "learning_rate": 0.0001, "loss": 1.5479, "step": 900704 }, { "epoch": 77.70531400966183, "grad_norm": 20.62008285522461, "learning_rate": 0.0001, "loss": 1.5479, "step": 900760 }, { "epoch": 77.71014492753623, "grad_norm": 0.25298774242401123, "learning_rate": 0.0001, "loss": 1.541, "step": 900816 }, { "epoch": 77.71497584541063, "grad_norm": 0.3070846199989319, "learning_rate": 0.0001, "loss": 1.5481, "step": 900872 }, { "epoch": 77.71980676328502, "grad_norm": 0.30153900384902954, "learning_rate": 0.0001, "loss": 1.5434, "step": 900928 }, { "epoch": 77.72463768115942, "grad_norm": 21.45399284362793, "learning_rate": 0.0001, "loss": 1.5454, "step": 900984 }, { "epoch": 77.72946859903382, "grad_norm": 0.2858729362487793, "learning_rate": 0.0001, "loss": 1.5483, "step": 901040 }, { "epoch": 77.73429951690821, "grad_norm": 0.29450950026512146, "learning_rate": 0.0001, "loss": 1.543, "step": 901096 }, { "epoch": 77.73913043478261, "grad_norm": 0.2764764130115509, "learning_rate": 0.0001, "loss": 1.5452, "step": 901152 }, { "epoch": 77.74396135265701, "grad_norm": 1.618788242340088, "learning_rate": 0.0001, "loss": 1.5486, "step": 901208 }, { "epoch": 77.7487922705314, "grad_norm": 0.3591453731060028, "learning_rate": 0.0001, "loss": 1.5493, "step": 901264 }, { "epoch": 77.7536231884058, "grad_norm": 0.30350545048713684, "learning_rate": 0.0001, "loss": 1.539, "step": 901320 }, { "epoch": 77.7584541062802, "grad_norm": 1.3370097875595093, "learning_rate": 0.0001, "loss": 1.5447, "step": 901376 }, { "epoch": 77.76328502415458, "grad_norm": 0.4716581106185913, "learning_rate": 0.0001, "loss": 1.545, "step": 901432 }, { "epoch": 77.76811594202898, "grad_norm": 0.327351450920105, "learning_rate": 0.0001, "loss": 1.5425, "step": 901488 }, { "epoch": 77.77294685990339, "grad_norm": 1.8966959714889526, "learning_rate": 0.0001, "loss": 1.5415, "step": 901544 }, { "epoch": 77.77777777777777, "grad_norm": 0.3900633454322815, "learning_rate": 0.0001, "loss": 1.5499, "step": 901600 }, { "epoch": 77.78260869565217, "grad_norm": 0.77599036693573, "learning_rate": 0.0001, "loss": 1.5414, "step": 901656 }, { "epoch": 77.78743961352657, "grad_norm": 0.2742416262626648, "learning_rate": 0.0001, "loss": 1.5451, "step": 901712 }, { "epoch": 77.79227053140096, "grad_norm": 0.2738041877746582, "learning_rate": 0.0001, "loss": 1.5432, "step": 901768 }, { "epoch": 77.79710144927536, "grad_norm": 0.35324880480766296, "learning_rate": 0.0001, "loss": 1.5454, "step": 901824 }, { "epoch": 77.80193236714976, "grad_norm": 4.963611602783203, "learning_rate": 0.0001, "loss": 1.5435, "step": 901880 }, { "epoch": 77.80676328502416, "grad_norm": 0.6359931230545044, "learning_rate": 0.0001, "loss": 1.5474, "step": 901936 }, { "epoch": 77.81159420289855, "grad_norm": 0.5247960090637207, "learning_rate": 0.0001, "loss": 1.5506, "step": 901992 }, { "epoch": 77.81642512077295, "grad_norm": 0.7400205731391907, "learning_rate": 0.0001, "loss": 1.543, "step": 902048 }, { "epoch": 77.82125603864735, "grad_norm": 0.43259143829345703, "learning_rate": 0.0001, "loss": 1.5431, "step": 902104 }, { "epoch": 77.82608695652173, "grad_norm": 0.2996666431427002, "learning_rate": 0.0001, "loss": 1.544, "step": 902160 }, { "epoch": 77.83091787439614, "grad_norm": 0.4399386942386627, "learning_rate": 0.0001, "loss": 1.55, "step": 902216 }, { "epoch": 77.83574879227054, "grad_norm": 0.28421491384506226, "learning_rate": 0.0001, "loss": 1.543, "step": 902272 }, { "epoch": 77.84057971014492, "grad_norm": 0.25974854826927185, "learning_rate": 0.0001, "loss": 1.5498, "step": 902328 }, { "epoch": 77.84541062801932, "grad_norm": 0.2537292242050171, "learning_rate": 0.0001, "loss": 1.5438, "step": 902384 }, { "epoch": 77.85024154589372, "grad_norm": 0.3952697515487671, "learning_rate": 0.0001, "loss": 1.5417, "step": 902440 }, { "epoch": 77.85507246376811, "grad_norm": 16.093338012695312, "learning_rate": 0.0001, "loss": 1.5446, "step": 902496 }, { "epoch": 77.85990338164251, "grad_norm": 0.3053652346134186, "learning_rate": 0.0001, "loss": 1.5447, "step": 902552 }, { "epoch": 77.86473429951691, "grad_norm": 0.3124948740005493, "learning_rate": 0.0001, "loss": 1.5412, "step": 902608 }, { "epoch": 77.8695652173913, "grad_norm": 1.4360259771347046, "learning_rate": 0.0001, "loss": 1.5435, "step": 902664 }, { "epoch": 77.8743961352657, "grad_norm": 0.2618696689605713, "learning_rate": 0.0001, "loss": 1.5486, "step": 902720 }, { "epoch": 77.8792270531401, "grad_norm": 0.29347309470176697, "learning_rate": 0.0001, "loss": 1.5465, "step": 902776 }, { "epoch": 77.8840579710145, "grad_norm": 0.3476647436618805, "learning_rate": 0.0001, "loss": 1.5473, "step": 902832 }, { "epoch": 77.88888888888889, "grad_norm": 0.6460204720497131, "learning_rate": 0.0001, "loss": 1.5494, "step": 902888 }, { "epoch": 77.89371980676329, "grad_norm": 0.2782568335533142, "learning_rate": 0.0001, "loss": 1.5458, "step": 902944 }, { "epoch": 77.89855072463769, "grad_norm": 0.22985932230949402, "learning_rate": 0.0001, "loss": 1.5395, "step": 903000 }, { "epoch": 77.90338164251207, "grad_norm": 0.48537078499794006, "learning_rate": 0.0001, "loss": 1.5462, "step": 903056 }, { "epoch": 77.90821256038647, "grad_norm": 0.3915448486804962, "learning_rate": 0.0001, "loss": 1.5471, "step": 903112 }, { "epoch": 77.91304347826087, "grad_norm": 1.216597557067871, "learning_rate": 0.0001, "loss": 1.545, "step": 903168 }, { "epoch": 77.91787439613526, "grad_norm": 0.44479531049728394, "learning_rate": 0.0001, "loss": 1.5431, "step": 903224 }, { "epoch": 77.92270531400966, "grad_norm": 0.28007349371910095, "learning_rate": 0.0001, "loss": 1.5451, "step": 903280 }, { "epoch": 77.92753623188406, "grad_norm": 0.7516322731971741, "learning_rate": 0.0001, "loss": 1.5442, "step": 903336 }, { "epoch": 77.93236714975845, "grad_norm": 0.7130716443061829, "learning_rate": 0.0001, "loss": 1.5497, "step": 903392 }, { "epoch": 77.93719806763285, "grad_norm": 0.3128926753997803, "learning_rate": 0.0001, "loss": 1.5487, "step": 903448 }, { "epoch": 77.94202898550725, "grad_norm": 1.851073145866394, "learning_rate": 0.0001, "loss": 1.5405, "step": 903504 }, { "epoch": 77.94685990338164, "grad_norm": 0.3096476197242737, "learning_rate": 0.0001, "loss": 1.5523, "step": 903560 }, { "epoch": 77.95169082125604, "grad_norm": 0.38477349281311035, "learning_rate": 0.0001, "loss": 1.5475, "step": 903616 }, { "epoch": 77.95652173913044, "grad_norm": 0.29839372634887695, "learning_rate": 0.0001, "loss": 1.5499, "step": 903672 }, { "epoch": 77.96135265700484, "grad_norm": 0.34852132201194763, "learning_rate": 0.0001, "loss": 1.5507, "step": 903728 }, { "epoch": 77.96618357487922, "grad_norm": 0.24572138488292694, "learning_rate": 0.0001, "loss": 1.5496, "step": 903784 }, { "epoch": 77.97101449275362, "grad_norm": 0.27830517292022705, "learning_rate": 0.0001, "loss": 1.5451, "step": 903840 }, { "epoch": 77.97584541062803, "grad_norm": 3.016322612762451, "learning_rate": 0.0001, "loss": 1.549, "step": 903896 }, { "epoch": 77.98067632850241, "grad_norm": 0.328317254781723, "learning_rate": 0.0001, "loss": 1.5419, "step": 903952 }, { "epoch": 77.98550724637681, "grad_norm": 0.2735985815525055, "learning_rate": 0.0001, "loss": 1.5444, "step": 904008 }, { "epoch": 77.99033816425121, "grad_norm": 0.34103718400001526, "learning_rate": 0.0001, "loss": 1.5475, "step": 904064 }, { "epoch": 77.9951690821256, "grad_norm": 0.919086754322052, "learning_rate": 0.0001, "loss": 1.5392, "step": 904120 }, { "epoch": 78.0, "grad_norm": 0.9196894764900208, "learning_rate": 0.0001, "loss": 1.5469, "step": 904176 }, { "epoch": 78.0048309178744, "grad_norm": 0.29140162467956543, "learning_rate": 0.0001, "loss": 1.5339, "step": 904232 }, { "epoch": 78.00966183574879, "grad_norm": 0.25236600637435913, "learning_rate": 0.0001, "loss": 1.5411, "step": 904288 }, { "epoch": 78.01449275362319, "grad_norm": 0.2661077678203583, "learning_rate": 0.0001, "loss": 1.536, "step": 904344 }, { "epoch": 78.01932367149759, "grad_norm": 0.2613813877105713, "learning_rate": 0.0001, "loss": 1.5433, "step": 904400 }, { "epoch": 78.02415458937197, "grad_norm": 1.0823627710342407, "learning_rate": 0.0001, "loss": 1.5358, "step": 904456 }, { "epoch": 78.02898550724638, "grad_norm": 0.7270811200141907, "learning_rate": 0.0001, "loss": 1.5438, "step": 904512 }, { "epoch": 78.03381642512078, "grad_norm": 0.3428058326244354, "learning_rate": 0.0001, "loss": 1.5414, "step": 904568 }, { "epoch": 78.03864734299516, "grad_norm": 0.37530553340911865, "learning_rate": 0.0001, "loss": 1.5417, "step": 904624 }, { "epoch": 78.04347826086956, "grad_norm": 0.35448306798934937, "learning_rate": 0.0001, "loss": 1.541, "step": 904680 }, { "epoch": 78.04830917874396, "grad_norm": 0.308420330286026, "learning_rate": 0.0001, "loss": 1.5423, "step": 904736 }, { "epoch": 78.05314009661836, "grad_norm": 0.3255378007888794, "learning_rate": 0.0001, "loss": 1.5422, "step": 904792 }, { "epoch": 78.05797101449275, "grad_norm": 0.26479873061180115, "learning_rate": 0.0001, "loss": 1.5391, "step": 904848 }, { "epoch": 78.06280193236715, "grad_norm": 0.30922970175743103, "learning_rate": 0.0001, "loss": 1.5384, "step": 904904 }, { "epoch": 78.06763285024155, "grad_norm": 0.8137607574462891, "learning_rate": 0.0001, "loss": 1.5378, "step": 904960 }, { "epoch": 78.07246376811594, "grad_norm": 0.3761061131954193, "learning_rate": 0.0001, "loss": 1.5374, "step": 905016 }, { "epoch": 78.07729468599034, "grad_norm": 0.3530772030353546, "learning_rate": 0.0001, "loss": 1.539, "step": 905072 }, { "epoch": 78.08212560386474, "grad_norm": 0.8135175108909607, "learning_rate": 0.0001, "loss": 1.5373, "step": 905128 }, { "epoch": 78.08695652173913, "grad_norm": 0.3686438798904419, "learning_rate": 0.0001, "loss": 1.5485, "step": 905184 }, { "epoch": 78.09178743961353, "grad_norm": 0.34080472588539124, "learning_rate": 0.0001, "loss": 1.5342, "step": 905240 }, { "epoch": 78.09661835748793, "grad_norm": 0.3933558464050293, "learning_rate": 0.0001, "loss": 1.5457, "step": 905296 }, { "epoch": 78.10144927536231, "grad_norm": 0.3942315876483917, "learning_rate": 0.0001, "loss": 1.5422, "step": 905352 }, { "epoch": 78.10628019323671, "grad_norm": 0.2575486898422241, "learning_rate": 0.0001, "loss": 1.5356, "step": 905408 }, { "epoch": 78.11111111111111, "grad_norm": 0.30359748005867004, "learning_rate": 0.0001, "loss": 1.5445, "step": 905464 }, { "epoch": 78.1159420289855, "grad_norm": 0.48156729340553284, "learning_rate": 0.0001, "loss": 1.5379, "step": 905520 }, { "epoch": 78.1207729468599, "grad_norm": 0.31163373589515686, "learning_rate": 0.0001, "loss": 1.5368, "step": 905576 }, { "epoch": 78.1256038647343, "grad_norm": 0.7852391004562378, "learning_rate": 0.0001, "loss": 1.5394, "step": 905632 }, { "epoch": 78.1304347826087, "grad_norm": 0.7259731888771057, "learning_rate": 0.0001, "loss": 1.5467, "step": 905688 }, { "epoch": 78.13526570048309, "grad_norm": 0.25255024433135986, "learning_rate": 0.0001, "loss": 1.5399, "step": 905744 }, { "epoch": 78.14009661835749, "grad_norm": 0.30980348587036133, "learning_rate": 0.0001, "loss": 1.5415, "step": 905800 }, { "epoch": 78.14492753623189, "grad_norm": 0.5042296648025513, "learning_rate": 0.0001, "loss": 1.5413, "step": 905856 }, { "epoch": 78.14975845410628, "grad_norm": 0.3039153516292572, "learning_rate": 0.0001, "loss": 1.539, "step": 905912 }, { "epoch": 78.15458937198068, "grad_norm": 0.7001309990882874, "learning_rate": 0.0001, "loss": 1.5387, "step": 905968 }, { "epoch": 78.15942028985508, "grad_norm": 0.36234450340270996, "learning_rate": 0.0001, "loss": 1.5362, "step": 906024 }, { "epoch": 78.16425120772946, "grad_norm": 0.4835905432701111, "learning_rate": 0.0001, "loss": 1.5369, "step": 906080 }, { "epoch": 78.16908212560386, "grad_norm": 0.2915676236152649, "learning_rate": 0.0001, "loss": 1.5332, "step": 906136 }, { "epoch": 78.17391304347827, "grad_norm": 0.4710419774055481, "learning_rate": 0.0001, "loss": 1.5423, "step": 906192 }, { "epoch": 78.17874396135265, "grad_norm": 3.5197019577026367, "learning_rate": 0.0001, "loss": 1.5421, "step": 906248 }, { "epoch": 78.18357487922705, "grad_norm": 0.5289636254310608, "learning_rate": 0.0001, "loss": 1.5427, "step": 906304 }, { "epoch": 78.18840579710145, "grad_norm": 0.7580848932266235, "learning_rate": 0.0001, "loss": 1.5365, "step": 906360 }, { "epoch": 78.19323671497584, "grad_norm": 0.3395281434059143, "learning_rate": 0.0001, "loss": 1.5425, "step": 906416 }, { "epoch": 78.19806763285024, "grad_norm": 0.25637292861938477, "learning_rate": 0.0001, "loss": 1.5445, "step": 906472 }, { "epoch": 78.20289855072464, "grad_norm": 0.266223669052124, "learning_rate": 0.0001, "loss": 1.5401, "step": 906528 }, { "epoch": 78.20772946859903, "grad_norm": 0.762747049331665, "learning_rate": 0.0001, "loss": 1.5448, "step": 906584 }, { "epoch": 78.21256038647343, "grad_norm": 1.6561857461929321, "learning_rate": 0.0001, "loss": 1.5393, "step": 906640 }, { "epoch": 78.21739130434783, "grad_norm": 0.33398064970970154, "learning_rate": 0.0001, "loss": 1.5365, "step": 906696 }, { "epoch": 78.22222222222223, "grad_norm": 0.6655439734458923, "learning_rate": 0.0001, "loss": 1.5366, "step": 906752 }, { "epoch": 78.22705314009661, "grad_norm": 0.362110435962677, "learning_rate": 0.0001, "loss": 1.5383, "step": 906808 }, { "epoch": 78.23188405797102, "grad_norm": 0.39455726742744446, "learning_rate": 0.0001, "loss": 1.5444, "step": 906864 }, { "epoch": 78.23671497584542, "grad_norm": 3.3720309734344482, "learning_rate": 0.0001, "loss": 1.5444, "step": 906920 }, { "epoch": 78.2415458937198, "grad_norm": 0.27210384607315063, "learning_rate": 0.0001, "loss": 1.54, "step": 906976 }, { "epoch": 78.2463768115942, "grad_norm": 0.4624921381473541, "learning_rate": 0.0001, "loss": 1.5463, "step": 907032 }, { "epoch": 78.2512077294686, "grad_norm": 0.49851787090301514, "learning_rate": 0.0001, "loss": 1.5428, "step": 907088 }, { "epoch": 78.25603864734299, "grad_norm": 0.4197843372821808, "learning_rate": 0.0001, "loss": 1.5362, "step": 907144 }, { "epoch": 78.26086956521739, "grad_norm": 0.757652759552002, "learning_rate": 0.0001, "loss": 1.5324, "step": 907200 }, { "epoch": 78.26570048309179, "grad_norm": 0.6700170040130615, "learning_rate": 0.0001, "loss": 1.5453, "step": 907256 }, { "epoch": 78.27053140096618, "grad_norm": 0.6350409388542175, "learning_rate": 0.0001, "loss": 1.535, "step": 907312 }, { "epoch": 78.27536231884058, "grad_norm": 0.32353800535202026, "learning_rate": 0.0001, "loss": 1.5502, "step": 907368 }, { "epoch": 78.28019323671498, "grad_norm": 0.4580133557319641, "learning_rate": 0.0001, "loss": 1.5471, "step": 907424 }, { "epoch": 78.28502415458937, "grad_norm": 0.4100892245769501, "learning_rate": 0.0001, "loss": 1.5445, "step": 907480 }, { "epoch": 78.28985507246377, "grad_norm": 0.2619897127151489, "learning_rate": 0.0001, "loss": 1.5463, "step": 907536 }, { "epoch": 78.29468599033817, "grad_norm": 0.2968263030052185, "learning_rate": 0.0001, "loss": 1.5417, "step": 907592 }, { "epoch": 78.29951690821257, "grad_norm": 0.28773611783981323, "learning_rate": 0.0001, "loss": 1.5428, "step": 907648 }, { "epoch": 78.30434782608695, "grad_norm": 0.26234033703804016, "learning_rate": 0.0001, "loss": 1.5472, "step": 907704 }, { "epoch": 78.30917874396135, "grad_norm": 0.3056105673313141, "learning_rate": 0.0001, "loss": 1.538, "step": 907760 }, { "epoch": 78.31400966183575, "grad_norm": 0.3171331584453583, "learning_rate": 0.0001, "loss": 1.537, "step": 907816 }, { "epoch": 78.31884057971014, "grad_norm": 0.3269898593425751, "learning_rate": 0.0001, "loss": 1.5419, "step": 907872 }, { "epoch": 78.32367149758454, "grad_norm": 0.37185806035995483, "learning_rate": 0.0001, "loss": 1.5387, "step": 907928 }, { "epoch": 78.32850241545894, "grad_norm": 0.3672376871109009, "learning_rate": 0.0001, "loss": 1.548, "step": 907984 }, { "epoch": 78.33333333333333, "grad_norm": 1.270997166633606, "learning_rate": 0.0001, "loss": 1.5472, "step": 908040 }, { "epoch": 78.33816425120773, "grad_norm": 0.31751489639282227, "learning_rate": 0.0001, "loss": 1.5341, "step": 908096 }, { "epoch": 78.34299516908213, "grad_norm": 0.3124522864818573, "learning_rate": 0.0001, "loss": 1.5437, "step": 908152 }, { "epoch": 78.34782608695652, "grad_norm": 0.23015572130680084, "learning_rate": 0.0001, "loss": 1.5419, "step": 908208 }, { "epoch": 78.35265700483092, "grad_norm": 0.3133036494255066, "learning_rate": 0.0001, "loss": 1.5389, "step": 908264 }, { "epoch": 78.35748792270532, "grad_norm": 0.3523850440979004, "learning_rate": 0.0001, "loss": 1.541, "step": 908320 }, { "epoch": 78.3623188405797, "grad_norm": 0.22553174197673798, "learning_rate": 0.0001, "loss": 1.5429, "step": 908376 }, { "epoch": 78.3671497584541, "grad_norm": 0.2611035406589508, "learning_rate": 0.0001, "loss": 1.5439, "step": 908432 }, { "epoch": 78.3719806763285, "grad_norm": 1.6962493658065796, "learning_rate": 0.0001, "loss": 1.5523, "step": 908488 }, { "epoch": 78.3768115942029, "grad_norm": 0.3836349546909332, "learning_rate": 0.0001, "loss": 1.5419, "step": 908544 }, { "epoch": 78.38164251207729, "grad_norm": 0.34536418318748474, "learning_rate": 0.0001, "loss": 1.5392, "step": 908600 }, { "epoch": 78.38647342995169, "grad_norm": 0.5743812322616577, "learning_rate": 0.0001, "loss": 1.5409, "step": 908656 }, { "epoch": 78.3913043478261, "grad_norm": 1.4740278720855713, "learning_rate": 0.0001, "loss": 1.5407, "step": 908712 }, { "epoch": 78.39613526570048, "grad_norm": 0.40846797823905945, "learning_rate": 0.0001, "loss": 1.5437, "step": 908768 }, { "epoch": 78.40096618357488, "grad_norm": 0.29711663722991943, "learning_rate": 0.0001, "loss": 1.539, "step": 908824 }, { "epoch": 78.40579710144928, "grad_norm": 0.423377126455307, "learning_rate": 0.0001, "loss": 1.5434, "step": 908880 }, { "epoch": 78.41062801932367, "grad_norm": 1.5631135702133179, "learning_rate": 0.0001, "loss": 1.5406, "step": 908936 }, { "epoch": 78.41545893719807, "grad_norm": 0.25251060724258423, "learning_rate": 0.0001, "loss": 1.5431, "step": 908992 }, { "epoch": 78.42028985507247, "grad_norm": 0.39739611744880676, "learning_rate": 0.0001, "loss": 1.5334, "step": 909048 }, { "epoch": 78.42512077294685, "grad_norm": 0.23244686424732208, "learning_rate": 0.0001, "loss": 1.547, "step": 909104 }, { "epoch": 78.42995169082126, "grad_norm": 0.42936623096466064, "learning_rate": 0.0001, "loss": 1.5428, "step": 909160 }, { "epoch": 78.43478260869566, "grad_norm": 0.3487823009490967, "learning_rate": 0.0001, "loss": 1.551, "step": 909216 }, { "epoch": 78.43961352657004, "grad_norm": 0.2873503863811493, "learning_rate": 0.0001, "loss": 1.5364, "step": 909272 }, { "epoch": 78.44444444444444, "grad_norm": 0.5146862864494324, "learning_rate": 0.0001, "loss": 1.5425, "step": 909328 }, { "epoch": 78.44927536231884, "grad_norm": 0.5034518837928772, "learning_rate": 0.0001, "loss": 1.5395, "step": 909384 }, { "epoch": 78.45410628019323, "grad_norm": 3.108351707458496, "learning_rate": 0.0001, "loss": 1.5408, "step": 909440 }, { "epoch": 78.45893719806763, "grad_norm": 1.2702144384384155, "learning_rate": 0.0001, "loss": 1.5442, "step": 909496 }, { "epoch": 78.46376811594203, "grad_norm": 1.7510607242584229, "learning_rate": 0.0001, "loss": 1.5369, "step": 909552 }, { "epoch": 78.46859903381643, "grad_norm": 0.446585088968277, "learning_rate": 0.0001, "loss": 1.542, "step": 909608 }, { "epoch": 78.47342995169082, "grad_norm": 7.182013988494873, "learning_rate": 0.0001, "loss": 1.5404, "step": 909664 }, { "epoch": 78.47826086956522, "grad_norm": 10.601664543151855, "learning_rate": 0.0001, "loss": 1.5366, "step": 909720 }, { "epoch": 78.48309178743962, "grad_norm": 0.24985235929489136, "learning_rate": 0.0001, "loss": 1.5347, "step": 909776 }, { "epoch": 78.487922705314, "grad_norm": 0.25700655579566956, "learning_rate": 0.0001, "loss": 1.5445, "step": 909832 }, { "epoch": 78.4927536231884, "grad_norm": 0.27763256430625916, "learning_rate": 0.0001, "loss": 1.5389, "step": 909888 }, { "epoch": 78.4975845410628, "grad_norm": 0.4109707772731781, "learning_rate": 0.0001, "loss": 1.5477, "step": 909944 }, { "epoch": 78.5024154589372, "grad_norm": 0.5046495795249939, "learning_rate": 0.0001, "loss": 1.5439, "step": 910000 }, { "epoch": 78.5072463768116, "grad_norm": 0.24482525885105133, "learning_rate": 0.0001, "loss": 1.5415, "step": 910056 }, { "epoch": 78.512077294686, "grad_norm": 0.28007373213768005, "learning_rate": 0.0001, "loss": 1.542, "step": 910112 }, { "epoch": 78.51690821256038, "grad_norm": 6.471556186676025, "learning_rate": 0.0001, "loss": 1.5454, "step": 910168 }, { "epoch": 78.52173913043478, "grad_norm": 0.2847007215023041, "learning_rate": 0.0001, "loss": 1.539, "step": 910224 }, { "epoch": 78.52657004830918, "grad_norm": 0.8587493896484375, "learning_rate": 0.0001, "loss": 1.5414, "step": 910280 }, { "epoch": 78.53140096618357, "grad_norm": 0.36581116914749146, "learning_rate": 0.0001, "loss": 1.5448, "step": 910336 }, { "epoch": 78.53623188405797, "grad_norm": 0.34533628821372986, "learning_rate": 0.0001, "loss": 1.5398, "step": 910392 }, { "epoch": 78.54106280193237, "grad_norm": 0.38374730944633484, "learning_rate": 0.0001, "loss": 1.5392, "step": 910448 }, { "epoch": 78.54589371980677, "grad_norm": 0.4388698637485504, "learning_rate": 0.0001, "loss": 1.5438, "step": 910504 }, { "epoch": 78.55072463768116, "grad_norm": 0.24388599395751953, "learning_rate": 0.0001, "loss": 1.544, "step": 910560 }, { "epoch": 78.55555555555556, "grad_norm": 0.3540874421596527, "learning_rate": 0.0001, "loss": 1.5451, "step": 910616 }, { "epoch": 78.56038647342996, "grad_norm": 0.6306494474411011, "learning_rate": 0.0001, "loss": 1.5388, "step": 910672 }, { "epoch": 78.56521739130434, "grad_norm": 0.47051018476486206, "learning_rate": 0.0001, "loss": 1.5389, "step": 910728 }, { "epoch": 78.57004830917874, "grad_norm": 0.4533426761627197, "learning_rate": 0.0001, "loss": 1.542, "step": 910784 }, { "epoch": 78.57487922705315, "grad_norm": 0.6730632185935974, "learning_rate": 0.0001, "loss": 1.5447, "step": 910840 }, { "epoch": 78.57971014492753, "grad_norm": 0.2676924467086792, "learning_rate": 0.0001, "loss": 1.5438, "step": 910896 }, { "epoch": 78.58454106280193, "grad_norm": 0.255460262298584, "learning_rate": 0.0001, "loss": 1.5375, "step": 910952 }, { "epoch": 78.58937198067633, "grad_norm": 1.1612517833709717, "learning_rate": 0.0001, "loss": 1.5414, "step": 911008 }, { "epoch": 78.59420289855072, "grad_norm": 0.3216797113418579, "learning_rate": 0.0001, "loss": 1.542, "step": 911064 }, { "epoch": 78.59903381642512, "grad_norm": 0.28924599289894104, "learning_rate": 0.0001, "loss": 1.5401, "step": 911120 }, { "epoch": 78.60386473429952, "grad_norm": 0.2999010682106018, "learning_rate": 0.0001, "loss": 1.5428, "step": 911176 }, { "epoch": 78.6086956521739, "grad_norm": 0.3491629958152771, "learning_rate": 0.0001, "loss": 1.5389, "step": 911232 }, { "epoch": 78.61352657004831, "grad_norm": 0.3935507833957672, "learning_rate": 0.0001, "loss": 1.5451, "step": 911288 }, { "epoch": 78.61835748792271, "grad_norm": 0.322223961353302, "learning_rate": 0.0001, "loss": 1.5382, "step": 911344 }, { "epoch": 78.6231884057971, "grad_norm": 77.91619110107422, "learning_rate": 0.0001, "loss": 1.5453, "step": 911400 }, { "epoch": 78.6280193236715, "grad_norm": 0.8664394021034241, "learning_rate": 0.0001, "loss": 1.5356, "step": 911456 }, { "epoch": 78.6328502415459, "grad_norm": 0.5470309257507324, "learning_rate": 0.0001, "loss": 1.5445, "step": 911512 }, { "epoch": 78.6376811594203, "grad_norm": 0.2579398453235626, "learning_rate": 0.0001, "loss": 1.5354, "step": 911568 }, { "epoch": 78.64251207729468, "grad_norm": 0.43783411383628845, "learning_rate": 0.0001, "loss": 1.5376, "step": 911624 }, { "epoch": 78.64734299516908, "grad_norm": 1.4666857719421387, "learning_rate": 0.0001, "loss": 1.5387, "step": 911680 }, { "epoch": 78.65217391304348, "grad_norm": 0.9575477838516235, "learning_rate": 0.0001, "loss": 1.5394, "step": 911736 }, { "epoch": 78.65700483091787, "grad_norm": 0.30318379402160645, "learning_rate": 0.0001, "loss": 1.5432, "step": 911792 }, { "epoch": 78.66183574879227, "grad_norm": 0.27660077810287476, "learning_rate": 0.0001, "loss": 1.5418, "step": 911848 }, { "epoch": 78.66666666666667, "grad_norm": 0.26516130566596985, "learning_rate": 0.0001, "loss": 1.5391, "step": 911904 }, { "epoch": 78.67149758454106, "grad_norm": 0.9699448347091675, "learning_rate": 0.0001, "loss": 1.5413, "step": 911960 }, { "epoch": 78.67632850241546, "grad_norm": 0.6295803785324097, "learning_rate": 0.0001, "loss": 1.5436, "step": 912016 }, { "epoch": 78.68115942028986, "grad_norm": 0.8389052152633667, "learning_rate": 0.0001, "loss": 1.5406, "step": 912072 }, { "epoch": 78.68599033816425, "grad_norm": 0.43181073665618896, "learning_rate": 0.0001, "loss": 1.5411, "step": 912128 }, { "epoch": 78.69082125603865, "grad_norm": 0.28395119309425354, "learning_rate": 0.0001, "loss": 1.5402, "step": 912184 }, { "epoch": 78.69565217391305, "grad_norm": 0.3898066580295563, "learning_rate": 0.0001, "loss": 1.5498, "step": 912240 }, { "epoch": 78.70048309178743, "grad_norm": 0.28744181990623474, "learning_rate": 0.0001, "loss": 1.5395, "step": 912296 }, { "epoch": 78.70531400966183, "grad_norm": 0.3756221830844879, "learning_rate": 0.0001, "loss": 1.5407, "step": 912352 }, { "epoch": 78.71014492753623, "grad_norm": 0.26291149854660034, "learning_rate": 0.0001, "loss": 1.5476, "step": 912408 }, { "epoch": 78.71497584541063, "grad_norm": 0.5427486896514893, "learning_rate": 0.0001, "loss": 1.5444, "step": 912464 }, { "epoch": 78.71980676328502, "grad_norm": 0.3775538206100464, "learning_rate": 0.0001, "loss": 1.5432, "step": 912520 }, { "epoch": 78.72463768115942, "grad_norm": 0.4303833544254303, "learning_rate": 0.0001, "loss": 1.5513, "step": 912576 }, { "epoch": 78.72946859903382, "grad_norm": 15.072853088378906, "learning_rate": 0.0001, "loss": 1.5416, "step": 912632 }, { "epoch": 78.73429951690821, "grad_norm": 0.4351203739643097, "learning_rate": 0.0001, "loss": 1.5442, "step": 912688 }, { "epoch": 78.73913043478261, "grad_norm": 0.34397491812705994, "learning_rate": 0.0001, "loss": 1.551, "step": 912744 }, { "epoch": 78.74396135265701, "grad_norm": 0.28510481119155884, "learning_rate": 0.0001, "loss": 1.5504, "step": 912800 }, { "epoch": 78.7487922705314, "grad_norm": 0.38958239555358887, "learning_rate": 0.0001, "loss": 1.5449, "step": 912856 }, { "epoch": 78.7536231884058, "grad_norm": 4.739225387573242, "learning_rate": 0.0001, "loss": 1.5388, "step": 912912 }, { "epoch": 78.7584541062802, "grad_norm": 0.2972474694252014, "learning_rate": 0.0001, "loss": 1.5408, "step": 912968 }, { "epoch": 78.76328502415458, "grad_norm": 0.2851087749004364, "learning_rate": 0.0001, "loss": 1.545, "step": 913024 }, { "epoch": 78.76811594202898, "grad_norm": 0.3086169362068176, "learning_rate": 0.0001, "loss": 1.5431, "step": 913080 }, { "epoch": 78.77294685990339, "grad_norm": 0.31409770250320435, "learning_rate": 0.0001, "loss": 1.5416, "step": 913136 }, { "epoch": 78.77777777777777, "grad_norm": 0.6832907199859619, "learning_rate": 0.0001, "loss": 1.5456, "step": 913192 }, { "epoch": 78.78260869565217, "grad_norm": 0.32797807455062866, "learning_rate": 0.0001, "loss": 1.5443, "step": 913248 }, { "epoch": 78.78743961352657, "grad_norm": 0.33668431639671326, "learning_rate": 0.0001, "loss": 1.5413, "step": 913304 }, { "epoch": 78.79227053140096, "grad_norm": 0.3527728021144867, "learning_rate": 0.0001, "loss": 1.545, "step": 913360 }, { "epoch": 78.79710144927536, "grad_norm": 0.2849292755126953, "learning_rate": 0.0001, "loss": 1.5372, "step": 913416 }, { "epoch": 78.80193236714976, "grad_norm": 0.28488001227378845, "learning_rate": 0.0001, "loss": 1.5486, "step": 913472 }, { "epoch": 78.80676328502416, "grad_norm": 0.29306039214134216, "learning_rate": 0.0001, "loss": 1.5396, "step": 913528 }, { "epoch": 78.81159420289855, "grad_norm": 0.2718256711959839, "learning_rate": 0.0001, "loss": 1.548, "step": 913584 }, { "epoch": 78.81642512077295, "grad_norm": 1.6053106784820557, "learning_rate": 0.0001, "loss": 1.5456, "step": 913640 }, { "epoch": 78.82125603864735, "grad_norm": 0.39855363965034485, "learning_rate": 0.0001, "loss": 1.5386, "step": 913696 }, { "epoch": 78.82608695652173, "grad_norm": 0.2511284351348877, "learning_rate": 0.0001, "loss": 1.538, "step": 913752 }, { "epoch": 78.83091787439614, "grad_norm": 0.41425076127052307, "learning_rate": 0.0001, "loss": 1.5448, "step": 913808 }, { "epoch": 78.83574879227054, "grad_norm": 2.1316707134246826, "learning_rate": 0.0001, "loss": 1.5441, "step": 913864 }, { "epoch": 78.84057971014492, "grad_norm": 0.26935893297195435, "learning_rate": 0.0001, "loss": 1.541, "step": 913920 }, { "epoch": 78.84541062801932, "grad_norm": 1.0191763639450073, "learning_rate": 0.0001, "loss": 1.5458, "step": 913976 }, { "epoch": 78.85024154589372, "grad_norm": 0.36918285489082336, "learning_rate": 0.0001, "loss": 1.5408, "step": 914032 }, { "epoch": 78.85507246376811, "grad_norm": 0.31536629796028137, "learning_rate": 0.0001, "loss": 1.5483, "step": 914088 }, { "epoch": 78.85990338164251, "grad_norm": 0.35902515053749084, "learning_rate": 0.0001, "loss": 1.5449, "step": 914144 }, { "epoch": 78.86473429951691, "grad_norm": 0.5193590521812439, "learning_rate": 0.0001, "loss": 1.548, "step": 914200 }, { "epoch": 78.8695652173913, "grad_norm": 0.2922074794769287, "learning_rate": 0.0001, "loss": 1.5507, "step": 914256 }, { "epoch": 78.8743961352657, "grad_norm": 0.25271716713905334, "learning_rate": 0.0001, "loss": 1.5406, "step": 914312 }, { "epoch": 78.8792270531401, "grad_norm": 0.2485070824623108, "learning_rate": 0.0001, "loss": 1.5387, "step": 914368 }, { "epoch": 78.8840579710145, "grad_norm": 0.24245615303516388, "learning_rate": 0.0001, "loss": 1.5396, "step": 914424 }, { "epoch": 78.88888888888889, "grad_norm": 0.7364509105682373, "learning_rate": 0.0001, "loss": 1.5465, "step": 914480 }, { "epoch": 78.89371980676329, "grad_norm": 0.35267379879951477, "learning_rate": 0.0001, "loss": 1.545, "step": 914536 }, { "epoch": 78.89855072463769, "grad_norm": 0.29491084814071655, "learning_rate": 0.0001, "loss": 1.5475, "step": 914592 }, { "epoch": 78.90338164251207, "grad_norm": 0.38616201281547546, "learning_rate": 0.0001, "loss": 1.5443, "step": 914648 }, { "epoch": 78.90821256038647, "grad_norm": 0.356711745262146, "learning_rate": 0.0001, "loss": 1.5467, "step": 914704 }, { "epoch": 78.91304347826087, "grad_norm": 0.27186912298202515, "learning_rate": 0.0001, "loss": 1.543, "step": 914760 }, { "epoch": 78.91787439613526, "grad_norm": 0.29479238390922546, "learning_rate": 0.0001, "loss": 1.546, "step": 914816 }, { "epoch": 78.92270531400966, "grad_norm": 1.471650242805481, "learning_rate": 0.0001, "loss": 1.5433, "step": 914872 }, { "epoch": 78.92753623188406, "grad_norm": 0.3403843641281128, "learning_rate": 0.0001, "loss": 1.5449, "step": 914928 }, { "epoch": 78.93236714975845, "grad_norm": 0.4091770052909851, "learning_rate": 0.0001, "loss": 1.5469, "step": 914984 }, { "epoch": 78.93719806763285, "grad_norm": 0.2863544225692749, "learning_rate": 0.0001, "loss": 1.5422, "step": 915040 }, { "epoch": 78.94202898550725, "grad_norm": 0.2918528914451599, "learning_rate": 0.0001, "loss": 1.5469, "step": 915096 }, { "epoch": 78.94685990338164, "grad_norm": 0.6461113691329956, "learning_rate": 0.0001, "loss": 1.5424, "step": 915152 }, { "epoch": 78.95169082125604, "grad_norm": 0.28026777505874634, "learning_rate": 0.0001, "loss": 1.541, "step": 915208 }, { "epoch": 78.95652173913044, "grad_norm": 0.28597375750541687, "learning_rate": 0.0001, "loss": 1.5421, "step": 915264 }, { "epoch": 78.96135265700484, "grad_norm": 0.3875376284122467, "learning_rate": 0.0001, "loss": 1.5428, "step": 915320 }, { "epoch": 78.96618357487922, "grad_norm": 0.30939415097236633, "learning_rate": 0.0001, "loss": 1.5373, "step": 915376 }, { "epoch": 78.97101449275362, "grad_norm": 0.2936078906059265, "learning_rate": 0.0001, "loss": 1.5469, "step": 915432 }, { "epoch": 78.97584541062803, "grad_norm": 0.44650909304618835, "learning_rate": 0.0001, "loss": 1.5414, "step": 915488 }, { "epoch": 78.98067632850241, "grad_norm": 0.4333311915397644, "learning_rate": 0.0001, "loss": 1.549, "step": 915544 }, { "epoch": 78.98550724637681, "grad_norm": 0.5504847764968872, "learning_rate": 0.0001, "loss": 1.5419, "step": 915600 }, { "epoch": 78.99033816425121, "grad_norm": 0.6100783348083496, "learning_rate": 0.0001, "loss": 1.5457, "step": 915656 }, { "epoch": 78.9951690821256, "grad_norm": 0.29325494170188904, "learning_rate": 0.0001, "loss": 1.5437, "step": 915712 }, { "epoch": 79.0, "grad_norm": 0.2830547094345093, "learning_rate": 0.0001, "loss": 1.5453, "step": 915768 }, { "epoch": 79.0048309178744, "grad_norm": 0.6336060166358948, "learning_rate": 0.0001, "loss": 1.5342, "step": 915824 }, { "epoch": 79.00966183574879, "grad_norm": 0.33932989835739136, "learning_rate": 0.0001, "loss": 1.5389, "step": 915880 }, { "epoch": 79.01449275362319, "grad_norm": 0.2684313654899597, "learning_rate": 0.0001, "loss": 1.5383, "step": 915936 }, { "epoch": 79.01932367149759, "grad_norm": 4.805475234985352, "learning_rate": 0.0001, "loss": 1.5394, "step": 915992 }, { "epoch": 79.02415458937197, "grad_norm": 0.738775908946991, "learning_rate": 0.0001, "loss": 1.5407, "step": 916048 }, { "epoch": 79.02898550724638, "grad_norm": 8.070571899414062, "learning_rate": 0.0001, "loss": 1.5315, "step": 916104 }, { "epoch": 79.03381642512078, "grad_norm": 0.6747612953186035, "learning_rate": 0.0001, "loss": 1.5311, "step": 916160 }, { "epoch": 79.03864734299516, "grad_norm": 0.2384030967950821, "learning_rate": 0.0001, "loss": 1.543, "step": 916216 }, { "epoch": 79.04347826086956, "grad_norm": 0.36710721254348755, "learning_rate": 0.0001, "loss": 1.5451, "step": 916272 }, { "epoch": 79.04830917874396, "grad_norm": 0.3165782690048218, "learning_rate": 0.0001, "loss": 1.5381, "step": 916328 }, { "epoch": 79.05314009661836, "grad_norm": 2.682419538497925, "learning_rate": 0.0001, "loss": 1.5309, "step": 916384 }, { "epoch": 79.05797101449275, "grad_norm": 0.5397142767906189, "learning_rate": 0.0001, "loss": 1.5365, "step": 916440 }, { "epoch": 79.06280193236715, "grad_norm": 0.27631711959838867, "learning_rate": 0.0001, "loss": 1.5381, "step": 916496 }, { "epoch": 79.06763285024155, "grad_norm": 0.5313124060630798, "learning_rate": 0.0001, "loss": 1.5389, "step": 916552 }, { "epoch": 79.07246376811594, "grad_norm": 0.3191685378551483, "learning_rate": 0.0001, "loss": 1.5407, "step": 916608 }, { "epoch": 79.07729468599034, "grad_norm": 0.4174763262271881, "learning_rate": 0.0001, "loss": 1.5317, "step": 916664 }, { "epoch": 79.08212560386474, "grad_norm": 0.9444323778152466, "learning_rate": 0.0001, "loss": 1.5446, "step": 916720 }, { "epoch": 79.08695652173913, "grad_norm": 0.30522146821022034, "learning_rate": 0.0001, "loss": 1.5389, "step": 916776 }, { "epoch": 79.09178743961353, "grad_norm": 0.6204567551612854, "learning_rate": 0.0001, "loss": 1.5433, "step": 916832 }, { "epoch": 79.09661835748793, "grad_norm": 0.3203633725643158, "learning_rate": 0.0001, "loss": 1.5396, "step": 916888 }, { "epoch": 79.10144927536231, "grad_norm": 0.33762767910957336, "learning_rate": 0.0001, "loss": 1.5392, "step": 916944 }, { "epoch": 79.10628019323671, "grad_norm": 0.3217253088951111, "learning_rate": 0.0001, "loss": 1.5403, "step": 917000 }, { "epoch": 79.11111111111111, "grad_norm": 3.220623731613159, "learning_rate": 0.0001, "loss": 1.5431, "step": 917056 }, { "epoch": 79.1159420289855, "grad_norm": 0.34848034381866455, "learning_rate": 0.0001, "loss": 1.5369, "step": 917112 }, { "epoch": 79.1207729468599, "grad_norm": 0.28418633341789246, "learning_rate": 0.0001, "loss": 1.5422, "step": 917168 }, { "epoch": 79.1256038647343, "grad_norm": 1.0650274753570557, "learning_rate": 0.0001, "loss": 1.5379, "step": 917224 }, { "epoch": 79.1304347826087, "grad_norm": 0.32894423604011536, "learning_rate": 0.0001, "loss": 1.542, "step": 917280 }, { "epoch": 79.13526570048309, "grad_norm": 0.5637495517730713, "learning_rate": 0.0001, "loss": 1.5358, "step": 917336 }, { "epoch": 79.14009661835749, "grad_norm": 0.9410009980201721, "learning_rate": 0.0001, "loss": 1.5347, "step": 917392 }, { "epoch": 79.14492753623189, "grad_norm": 0.37441959977149963, "learning_rate": 0.0001, "loss": 1.5448, "step": 917448 }, { "epoch": 79.14975845410628, "grad_norm": 0.3447473645210266, "learning_rate": 0.0001, "loss": 1.5391, "step": 917504 }, { "epoch": 79.15458937198068, "grad_norm": 0.5699714422225952, "learning_rate": 0.0001, "loss": 1.5378, "step": 917560 }, { "epoch": 79.15942028985508, "grad_norm": 10.1673583984375, "learning_rate": 0.0001, "loss": 1.5415, "step": 917616 }, { "epoch": 79.16425120772946, "grad_norm": 0.5444681644439697, "learning_rate": 0.0001, "loss": 1.5386, "step": 917672 }, { "epoch": 79.16908212560386, "grad_norm": 0.3977280557155609, "learning_rate": 0.0001, "loss": 1.5389, "step": 917728 }, { "epoch": 79.17391304347827, "grad_norm": 0.4107820987701416, "learning_rate": 0.0001, "loss": 1.5337, "step": 917784 }, { "epoch": 79.17874396135265, "grad_norm": 0.3455648720264435, "learning_rate": 0.0001, "loss": 1.5371, "step": 917840 }, { "epoch": 79.18357487922705, "grad_norm": 0.3824272155761719, "learning_rate": 0.0001, "loss": 1.5476, "step": 917896 }, { "epoch": 79.18840579710145, "grad_norm": 0.2675459086894989, "learning_rate": 0.0001, "loss": 1.5414, "step": 917952 }, { "epoch": 79.19323671497584, "grad_norm": 0.37730854749679565, "learning_rate": 0.0001, "loss": 1.5364, "step": 918008 }, { "epoch": 79.19806763285024, "grad_norm": 0.45737341046333313, "learning_rate": 0.0001, "loss": 1.5416, "step": 918064 }, { "epoch": 79.20289855072464, "grad_norm": 0.2514328360557556, "learning_rate": 0.0001, "loss": 1.5411, "step": 918120 }, { "epoch": 79.20772946859903, "grad_norm": 0.9005009531974792, "learning_rate": 0.0001, "loss": 1.5459, "step": 918176 }, { "epoch": 79.21256038647343, "grad_norm": 0.8920179009437561, "learning_rate": 0.0001, "loss": 1.5442, "step": 918232 }, { "epoch": 79.21739130434783, "grad_norm": 0.4717774987220764, "learning_rate": 0.0001, "loss": 1.5453, "step": 918288 }, { "epoch": 79.22222222222223, "grad_norm": 0.40128999948501587, "learning_rate": 0.0001, "loss": 1.5394, "step": 918344 }, { "epoch": 79.22705314009661, "grad_norm": 0.40500327944755554, "learning_rate": 0.0001, "loss": 1.5357, "step": 918400 }, { "epoch": 79.23188405797102, "grad_norm": 1.6248362064361572, "learning_rate": 0.0001, "loss": 1.5389, "step": 918456 }, { "epoch": 79.23671497584542, "grad_norm": 6.066866397857666, "learning_rate": 0.0001, "loss": 1.534, "step": 918512 }, { "epoch": 79.2415458937198, "grad_norm": 1.4554351568222046, "learning_rate": 0.0001, "loss": 1.5388, "step": 918568 }, { "epoch": 79.2463768115942, "grad_norm": 2.7451765537261963, "learning_rate": 0.0001, "loss": 1.5403, "step": 918624 }, { "epoch": 79.2512077294686, "grad_norm": 3.0488452911376953, "learning_rate": 0.0001, "loss": 1.5396, "step": 918680 }, { "epoch": 79.25603864734299, "grad_norm": 0.3622616231441498, "learning_rate": 0.0001, "loss": 1.5436, "step": 918736 }, { "epoch": 79.26086956521739, "grad_norm": 1.132912516593933, "learning_rate": 0.0001, "loss": 1.5417, "step": 918792 }, { "epoch": 79.26570048309179, "grad_norm": 0.22858107089996338, "learning_rate": 0.0001, "loss": 1.5429, "step": 918848 }, { "epoch": 79.27053140096618, "grad_norm": 0.3561394512653351, "learning_rate": 0.0001, "loss": 1.5366, "step": 918904 }, { "epoch": 79.27536231884058, "grad_norm": 0.43151408433914185, "learning_rate": 0.0001, "loss": 1.5374, "step": 918960 }, { "epoch": 79.28019323671498, "grad_norm": 0.37654903531074524, "learning_rate": 0.0001, "loss": 1.5391, "step": 919016 }, { "epoch": 79.28502415458937, "grad_norm": 0.26144951581954956, "learning_rate": 0.0001, "loss": 1.5384, "step": 919072 }, { "epoch": 79.28985507246377, "grad_norm": 1.8327221870422363, "learning_rate": 0.0001, "loss": 1.5376, "step": 919128 }, { "epoch": 79.29468599033817, "grad_norm": 0.3136448860168457, "learning_rate": 0.0001, "loss": 1.5407, "step": 919184 }, { "epoch": 79.29951690821257, "grad_norm": 0.26538440585136414, "learning_rate": 0.0001, "loss": 1.5446, "step": 919240 }, { "epoch": 79.30434782608695, "grad_norm": 0.2953964173793793, "learning_rate": 0.0001, "loss": 1.5408, "step": 919296 }, { "epoch": 79.30917874396135, "grad_norm": 0.4783855676651001, "learning_rate": 0.0001, "loss": 1.5347, "step": 919352 }, { "epoch": 79.31400966183575, "grad_norm": 1.150491714477539, "learning_rate": 0.0001, "loss": 1.5382, "step": 919408 }, { "epoch": 79.31884057971014, "grad_norm": 0.3517020046710968, "learning_rate": 0.0001, "loss": 1.5382, "step": 919464 }, { "epoch": 79.32367149758454, "grad_norm": 0.33420559763908386, "learning_rate": 0.0001, "loss": 1.5366, "step": 919520 }, { "epoch": 79.32850241545894, "grad_norm": 0.794965386390686, "learning_rate": 0.0001, "loss": 1.5419, "step": 919576 }, { "epoch": 79.33333333333333, "grad_norm": 0.2849220335483551, "learning_rate": 0.0001, "loss": 1.533, "step": 919632 }, { "epoch": 79.33816425120773, "grad_norm": 0.25828269124031067, "learning_rate": 0.0001, "loss": 1.5382, "step": 919688 }, { "epoch": 79.34299516908213, "grad_norm": 0.30719321966171265, "learning_rate": 0.0001, "loss": 1.5356, "step": 919744 }, { "epoch": 79.34782608695652, "grad_norm": 0.28618377447128296, "learning_rate": 0.0001, "loss": 1.531, "step": 919800 }, { "epoch": 79.35265700483092, "grad_norm": 0.35456913709640503, "learning_rate": 0.0001, "loss": 1.5398, "step": 919856 }, { "epoch": 79.35748792270532, "grad_norm": 0.29829373955726624, "learning_rate": 0.0001, "loss": 1.5379, "step": 919912 }, { "epoch": 79.3623188405797, "grad_norm": 0.25921696424484253, "learning_rate": 0.0001, "loss": 1.5351, "step": 919968 }, { "epoch": 79.3671497584541, "grad_norm": 0.34751981496810913, "learning_rate": 0.0001, "loss": 1.5434, "step": 920024 }, { "epoch": 79.3719806763285, "grad_norm": 0.3951878547668457, "learning_rate": 0.0001, "loss": 1.54, "step": 920080 }, { "epoch": 79.3768115942029, "grad_norm": 0.2772815227508545, "learning_rate": 0.0001, "loss": 1.5357, "step": 920136 }, { "epoch": 79.38164251207729, "grad_norm": 0.5573855638504028, "learning_rate": 0.0001, "loss": 1.5381, "step": 920192 }, { "epoch": 79.38647342995169, "grad_norm": 0.37169766426086426, "learning_rate": 0.0001, "loss": 1.5413, "step": 920248 }, { "epoch": 79.3913043478261, "grad_norm": 0.3448805809020996, "learning_rate": 0.0001, "loss": 1.5455, "step": 920304 }, { "epoch": 79.39613526570048, "grad_norm": 0.2640610337257385, "learning_rate": 0.0001, "loss": 1.5445, "step": 920360 }, { "epoch": 79.40096618357488, "grad_norm": 0.30498960614204407, "learning_rate": 0.0001, "loss": 1.5329, "step": 920416 }, { "epoch": 79.40579710144928, "grad_norm": 2.1403863430023193, "learning_rate": 0.0001, "loss": 1.5369, "step": 920472 }, { "epoch": 79.41062801932367, "grad_norm": 0.29000717401504517, "learning_rate": 0.0001, "loss": 1.538, "step": 920528 }, { "epoch": 79.41545893719807, "grad_norm": 0.31579843163490295, "learning_rate": 0.0001, "loss": 1.536, "step": 920584 }, { "epoch": 79.42028985507247, "grad_norm": 0.32541903853416443, "learning_rate": 0.0001, "loss": 1.543, "step": 920640 }, { "epoch": 79.42512077294685, "grad_norm": 0.3135448396205902, "learning_rate": 0.0001, "loss": 1.5463, "step": 920696 }, { "epoch": 79.42995169082126, "grad_norm": 0.3711724579334259, "learning_rate": 0.0001, "loss": 1.5445, "step": 920752 }, { "epoch": 79.43478260869566, "grad_norm": 0.4648873805999756, "learning_rate": 0.0001, "loss": 1.5325, "step": 920808 }, { "epoch": 79.43961352657004, "grad_norm": 0.3198508620262146, "learning_rate": 0.0001, "loss": 1.5417, "step": 920864 }, { "epoch": 79.44444444444444, "grad_norm": 0.2791030704975128, "learning_rate": 0.0001, "loss": 1.5352, "step": 920920 }, { "epoch": 79.44927536231884, "grad_norm": 0.3359648287296295, "learning_rate": 0.0001, "loss": 1.5415, "step": 920976 }, { "epoch": 79.45410628019323, "grad_norm": 0.2846261262893677, "learning_rate": 0.0001, "loss": 1.5429, "step": 921032 }, { "epoch": 79.45893719806763, "grad_norm": 1.151986837387085, "learning_rate": 0.0001, "loss": 1.538, "step": 921088 }, { "epoch": 79.46376811594203, "grad_norm": 0.31550079584121704, "learning_rate": 0.0001, "loss": 1.5433, "step": 921144 }, { "epoch": 79.46859903381643, "grad_norm": 0.29191359877586365, "learning_rate": 0.0001, "loss": 1.5424, "step": 921200 }, { "epoch": 79.47342995169082, "grad_norm": 0.2578412592411041, "learning_rate": 0.0001, "loss": 1.5389, "step": 921256 }, { "epoch": 79.47826086956522, "grad_norm": 0.5285391807556152, "learning_rate": 0.0001, "loss": 1.5404, "step": 921312 }, { "epoch": 79.48309178743962, "grad_norm": 0.3613726496696472, "learning_rate": 0.0001, "loss": 1.5325, "step": 921368 }, { "epoch": 79.487922705314, "grad_norm": 0.4365033507347107, "learning_rate": 0.0001, "loss": 1.5408, "step": 921424 }, { "epoch": 79.4927536231884, "grad_norm": 0.273699551820755, "learning_rate": 0.0001, "loss": 1.5369, "step": 921480 }, { "epoch": 79.4975845410628, "grad_norm": 0.38549697399139404, "learning_rate": 0.0001, "loss": 1.5411, "step": 921536 }, { "epoch": 79.5024154589372, "grad_norm": 0.3107045590877533, "learning_rate": 0.0001, "loss": 1.5369, "step": 921592 }, { "epoch": 79.5072463768116, "grad_norm": 0.7915700674057007, "learning_rate": 0.0001, "loss": 1.5415, "step": 921648 }, { "epoch": 79.512077294686, "grad_norm": 0.3865875005722046, "learning_rate": 0.0001, "loss": 1.543, "step": 921704 }, { "epoch": 79.51690821256038, "grad_norm": 0.3013225197792053, "learning_rate": 0.0001, "loss": 1.5318, "step": 921760 }, { "epoch": 79.52173913043478, "grad_norm": 0.4886389970779419, "learning_rate": 0.0001, "loss": 1.5454, "step": 921816 }, { "epoch": 79.52657004830918, "grad_norm": 0.5009304881095886, "learning_rate": 0.0001, "loss": 1.533, "step": 921872 }, { "epoch": 79.53140096618357, "grad_norm": 0.2859712839126587, "learning_rate": 0.0001, "loss": 1.542, "step": 921928 }, { "epoch": 79.53623188405797, "grad_norm": 0.5032116174697876, "learning_rate": 0.0001, "loss": 1.5385, "step": 921984 }, { "epoch": 79.54106280193237, "grad_norm": 0.29908883571624756, "learning_rate": 0.0001, "loss": 1.5411, "step": 922040 }, { "epoch": 79.54589371980677, "grad_norm": 0.39805397391319275, "learning_rate": 0.0001, "loss": 1.5367, "step": 922096 }, { "epoch": 79.55072463768116, "grad_norm": 0.4719856083393097, "learning_rate": 0.0001, "loss": 1.5371, "step": 922152 }, { "epoch": 79.55555555555556, "grad_norm": 0.3275960683822632, "learning_rate": 0.0001, "loss": 1.5401, "step": 922208 }, { "epoch": 79.56038647342996, "grad_norm": 1.1870392560958862, "learning_rate": 0.0001, "loss": 1.5453, "step": 922264 }, { "epoch": 79.56521739130434, "grad_norm": 0.26985740661621094, "learning_rate": 0.0001, "loss": 1.5462, "step": 922320 }, { "epoch": 79.57004830917874, "grad_norm": 0.3174995183944702, "learning_rate": 0.0001, "loss": 1.5383, "step": 922376 }, { "epoch": 79.57487922705315, "grad_norm": 0.2664487659931183, "learning_rate": 0.0001, "loss": 1.548, "step": 922432 }, { "epoch": 79.57971014492753, "grad_norm": 0.3053112328052521, "learning_rate": 0.0001, "loss": 1.5373, "step": 922488 }, { "epoch": 79.58454106280193, "grad_norm": 0.3103458285331726, "learning_rate": 0.0001, "loss": 1.5448, "step": 922544 }, { "epoch": 79.58937198067633, "grad_norm": 0.2801256477832794, "learning_rate": 0.0001, "loss": 1.5358, "step": 922600 }, { "epoch": 79.59420289855072, "grad_norm": 0.2911549508571625, "learning_rate": 0.0001, "loss": 1.5414, "step": 922656 }, { "epoch": 79.59903381642512, "grad_norm": 0.2819034159183502, "learning_rate": 0.0001, "loss": 1.5364, "step": 922712 }, { "epoch": 79.60386473429952, "grad_norm": 0.3472531735897064, "learning_rate": 0.0001, "loss": 1.5356, "step": 922768 }, { "epoch": 79.6086956521739, "grad_norm": 0.33419206738471985, "learning_rate": 0.0001, "loss": 1.5467, "step": 922824 }, { "epoch": 79.61352657004831, "grad_norm": 0.31783515214920044, "learning_rate": 0.0001, "loss": 1.5387, "step": 922880 }, { "epoch": 79.61835748792271, "grad_norm": 6.505009174346924, "learning_rate": 0.0001, "loss": 1.5425, "step": 922936 }, { "epoch": 79.6231884057971, "grad_norm": 0.3034021854400635, "learning_rate": 0.0001, "loss": 1.5422, "step": 922992 }, { "epoch": 79.6280193236715, "grad_norm": 0.3528486490249634, "learning_rate": 0.0001, "loss": 1.5435, "step": 923048 }, { "epoch": 79.6328502415459, "grad_norm": 0.2691405415534973, "learning_rate": 0.0001, "loss": 1.5395, "step": 923104 }, { "epoch": 79.6376811594203, "grad_norm": 0.4208216369152069, "learning_rate": 0.0001, "loss": 1.5376, "step": 923160 }, { "epoch": 79.64251207729468, "grad_norm": 0.24408522248268127, "learning_rate": 0.0001, "loss": 1.5432, "step": 923216 }, { "epoch": 79.64734299516908, "grad_norm": 0.38549163937568665, "learning_rate": 0.0001, "loss": 1.5422, "step": 923272 }, { "epoch": 79.65217391304348, "grad_norm": 2.4281792640686035, "learning_rate": 0.0001, "loss": 1.5374, "step": 923328 }, { "epoch": 79.65700483091787, "grad_norm": 25.483543395996094, "learning_rate": 0.0001, "loss": 1.5377, "step": 923384 }, { "epoch": 79.66183574879227, "grad_norm": 0.2673294246196747, "learning_rate": 0.0001, "loss": 1.5397, "step": 923440 }, { "epoch": 79.66666666666667, "grad_norm": 0.38171809911727905, "learning_rate": 0.0001, "loss": 1.5437, "step": 923496 }, { "epoch": 79.67149758454106, "grad_norm": 0.38681915402412415, "learning_rate": 0.0001, "loss": 1.5385, "step": 923552 }, { "epoch": 79.67632850241546, "grad_norm": 0.3477536141872406, "learning_rate": 0.0001, "loss": 1.5421, "step": 923608 }, { "epoch": 79.68115942028986, "grad_norm": 0.5152520537376404, "learning_rate": 0.0001, "loss": 1.5476, "step": 923664 }, { "epoch": 79.68599033816425, "grad_norm": 0.31521928310394287, "learning_rate": 0.0001, "loss": 1.5435, "step": 923720 }, { "epoch": 79.69082125603865, "grad_norm": 0.37572187185287476, "learning_rate": 0.0001, "loss": 1.5457, "step": 923776 }, { "epoch": 79.69565217391305, "grad_norm": 0.4137894809246063, "learning_rate": 0.0001, "loss": 1.5406, "step": 923832 }, { "epoch": 79.70048309178743, "grad_norm": 0.27317675948143005, "learning_rate": 0.0001, "loss": 1.5463, "step": 923888 }, { "epoch": 79.70531400966183, "grad_norm": 3.318833351135254, "learning_rate": 0.0001, "loss": 1.5322, "step": 923944 }, { "epoch": 79.71014492753623, "grad_norm": 0.26841503381729126, "learning_rate": 0.0001, "loss": 1.54, "step": 924000 }, { "epoch": 79.71497584541063, "grad_norm": 0.4088207185268402, "learning_rate": 0.0001, "loss": 1.5399, "step": 924056 }, { "epoch": 79.71980676328502, "grad_norm": 0.2991235852241516, "learning_rate": 0.0001, "loss": 1.5456, "step": 924112 }, { "epoch": 79.72463768115942, "grad_norm": 0.305090993642807, "learning_rate": 0.0001, "loss": 1.5384, "step": 924168 }, { "epoch": 79.72946859903382, "grad_norm": 0.2942723333835602, "learning_rate": 0.0001, "loss": 1.5462, "step": 924224 }, { "epoch": 79.73429951690821, "grad_norm": 0.3240179419517517, "learning_rate": 0.0001, "loss": 1.5388, "step": 924280 }, { "epoch": 79.73913043478261, "grad_norm": 0.748625636100769, "learning_rate": 0.0001, "loss": 1.5439, "step": 924336 }, { "epoch": 79.74396135265701, "grad_norm": 0.5292832851409912, "learning_rate": 0.0001, "loss": 1.5447, "step": 924392 }, { "epoch": 79.7487922705314, "grad_norm": 0.3771337866783142, "learning_rate": 0.0001, "loss": 1.5416, "step": 924448 }, { "epoch": 79.7536231884058, "grad_norm": 0.2538004517555237, "learning_rate": 0.0001, "loss": 1.5383, "step": 924504 }, { "epoch": 79.7584541062802, "grad_norm": 1.1661114692687988, "learning_rate": 0.0001, "loss": 1.5469, "step": 924560 }, { "epoch": 79.76328502415458, "grad_norm": 0.27930814027786255, "learning_rate": 0.0001, "loss": 1.544, "step": 924616 }, { "epoch": 79.76811594202898, "grad_norm": 0.6561813354492188, "learning_rate": 0.0001, "loss": 1.5431, "step": 924672 }, { "epoch": 79.77294685990339, "grad_norm": 0.29751601815223694, "learning_rate": 0.0001, "loss": 1.5424, "step": 924728 }, { "epoch": 79.77777777777777, "grad_norm": 8.773868560791016, "learning_rate": 0.0001, "loss": 1.533, "step": 924784 }, { "epoch": 79.78260869565217, "grad_norm": 0.2917194664478302, "learning_rate": 0.0001, "loss": 1.5397, "step": 924840 }, { "epoch": 79.78743961352657, "grad_norm": 0.41237661242485046, "learning_rate": 0.0001, "loss": 1.5423, "step": 924896 }, { "epoch": 79.79227053140096, "grad_norm": 4.401500701904297, "learning_rate": 0.0001, "loss": 1.5472, "step": 924952 }, { "epoch": 79.79710144927536, "grad_norm": 0.31098395586013794, "learning_rate": 0.0001, "loss": 1.5341, "step": 925008 }, { "epoch": 79.80193236714976, "grad_norm": 0.2764168977737427, "learning_rate": 0.0001, "loss": 1.5457, "step": 925064 }, { "epoch": 79.80676328502416, "grad_norm": 1.503452181816101, "learning_rate": 0.0001, "loss": 1.5455, "step": 925120 }, { "epoch": 79.81159420289855, "grad_norm": 0.5458923578262329, "learning_rate": 0.0001, "loss": 1.551, "step": 925176 }, { "epoch": 79.81642512077295, "grad_norm": 0.32571712136268616, "learning_rate": 0.0001, "loss": 1.5428, "step": 925232 }, { "epoch": 79.82125603864735, "grad_norm": 0.27954214811325073, "learning_rate": 0.0001, "loss": 1.5443, "step": 925288 }, { "epoch": 79.82608695652173, "grad_norm": 0.6152105331420898, "learning_rate": 0.0001, "loss": 1.548, "step": 925344 }, { "epoch": 79.83091787439614, "grad_norm": 0.3080536127090454, "learning_rate": 0.0001, "loss": 1.5426, "step": 925400 }, { "epoch": 79.83574879227054, "grad_norm": 0.32587021589279175, "learning_rate": 0.0001, "loss": 1.5385, "step": 925456 }, { "epoch": 79.84057971014492, "grad_norm": 0.28467077016830444, "learning_rate": 0.0001, "loss": 1.5372, "step": 925512 }, { "epoch": 79.84541062801932, "grad_norm": 0.35462743043899536, "learning_rate": 0.0001, "loss": 1.5512, "step": 925568 }, { "epoch": 79.85024154589372, "grad_norm": 0.4947749972343445, "learning_rate": 0.0001, "loss": 1.5468, "step": 925624 }, { "epoch": 79.85507246376811, "grad_norm": 0.43966180086135864, "learning_rate": 0.0001, "loss": 1.5448, "step": 925680 }, { "epoch": 79.85990338164251, "grad_norm": 0.7056536078453064, "learning_rate": 0.0001, "loss": 1.5454, "step": 925736 }, { "epoch": 79.86473429951691, "grad_norm": 0.3062213063240051, "learning_rate": 0.0001, "loss": 1.5439, "step": 925792 }, { "epoch": 79.8695652173913, "grad_norm": 0.3220975995063782, "learning_rate": 0.0001, "loss": 1.5411, "step": 925848 }, { "epoch": 79.8743961352657, "grad_norm": 0.25721633434295654, "learning_rate": 0.0001, "loss": 1.5395, "step": 925904 }, { "epoch": 79.8792270531401, "grad_norm": 0.2599981427192688, "learning_rate": 0.0001, "loss": 1.546, "step": 925960 }, { "epoch": 79.8840579710145, "grad_norm": 0.2914609909057617, "learning_rate": 0.0001, "loss": 1.546, "step": 926016 }, { "epoch": 79.88888888888889, "grad_norm": 2.432797908782959, "learning_rate": 0.0001, "loss": 1.5469, "step": 926072 }, { "epoch": 79.89371980676329, "grad_norm": 0.33943405747413635, "learning_rate": 0.0001, "loss": 1.5434, "step": 926128 }, { "epoch": 79.89855072463769, "grad_norm": 0.4187485873699188, "learning_rate": 0.0001, "loss": 1.5343, "step": 926184 }, { "epoch": 79.90338164251207, "grad_norm": 11.59826374053955, "learning_rate": 0.0001, "loss": 1.5376, "step": 926240 }, { "epoch": 79.90821256038647, "grad_norm": 11.873930931091309, "learning_rate": 0.0001, "loss": 1.5486, "step": 926296 }, { "epoch": 79.91304347826087, "grad_norm": 0.28213050961494446, "learning_rate": 0.0001, "loss": 1.5388, "step": 926352 }, { "epoch": 79.91787439613526, "grad_norm": 0.3300761282444, "learning_rate": 0.0001, "loss": 1.5391, "step": 926408 }, { "epoch": 79.92270531400966, "grad_norm": 0.414393812417984, "learning_rate": 0.0001, "loss": 1.5394, "step": 926464 }, { "epoch": 79.92753623188406, "grad_norm": 2.027982234954834, "learning_rate": 0.0001, "loss": 1.5456, "step": 926520 }, { "epoch": 79.93236714975845, "grad_norm": 0.5667633414268494, "learning_rate": 0.0001, "loss": 1.5377, "step": 926576 }, { "epoch": 79.93719806763285, "grad_norm": 0.27723410725593567, "learning_rate": 0.0001, "loss": 1.5456, "step": 926632 }, { "epoch": 79.94202898550725, "grad_norm": 0.32830744981765747, "learning_rate": 0.0001, "loss": 1.543, "step": 926688 }, { "epoch": 79.94685990338164, "grad_norm": 0.3356005847454071, "learning_rate": 0.0001, "loss": 1.5421, "step": 926744 }, { "epoch": 79.95169082125604, "grad_norm": 8.051163673400879, "learning_rate": 0.0001, "loss": 1.5449, "step": 926800 }, { "epoch": 79.95652173913044, "grad_norm": 0.3361421525478363, "learning_rate": 0.0001, "loss": 1.5448, "step": 926856 }, { "epoch": 79.96135265700484, "grad_norm": 0.2838037610054016, "learning_rate": 0.0001, "loss": 1.5452, "step": 926912 }, { "epoch": 79.96618357487922, "grad_norm": 0.24946697056293488, "learning_rate": 0.0001, "loss": 1.5392, "step": 926968 }, { "epoch": 79.97101449275362, "grad_norm": 0.2508333623409271, "learning_rate": 0.0001, "loss": 1.5447, "step": 927024 }, { "epoch": 79.97584541062803, "grad_norm": 0.32734090089797974, "learning_rate": 0.0001, "loss": 1.546, "step": 927080 }, { "epoch": 79.98067632850241, "grad_norm": 0.27906620502471924, "learning_rate": 0.0001, "loss": 1.5391, "step": 927136 }, { "epoch": 79.98550724637681, "grad_norm": 0.276443749666214, "learning_rate": 0.0001, "loss": 1.5466, "step": 927192 }, { "epoch": 79.99033816425121, "grad_norm": 0.28398895263671875, "learning_rate": 0.0001, "loss": 1.5453, "step": 927248 }, { "epoch": 79.9951690821256, "grad_norm": 0.28044652938842773, "learning_rate": 0.0001, "loss": 1.5436, "step": 927304 }, { "epoch": 80.0, "grad_norm": 0.42185837030410767, "learning_rate": 0.0001, "loss": 1.5452, "step": 927360 }, { "epoch": 80.0048309178744, "grad_norm": 0.3382316827774048, "learning_rate": 0.0001, "loss": 1.5347, "step": 927416 }, { "epoch": 80.00966183574879, "grad_norm": 0.44356080889701843, "learning_rate": 0.0001, "loss": 1.5355, "step": 927472 }, { "epoch": 80.01449275362319, "grad_norm": 0.27261993288993835, "learning_rate": 0.0001, "loss": 1.5311, "step": 927528 }, { "epoch": 80.01932367149759, "grad_norm": 0.25946661829948425, "learning_rate": 0.0001, "loss": 1.5345, "step": 927584 }, { "epoch": 80.02415458937197, "grad_norm": 0.32575130462646484, "learning_rate": 0.0001, "loss": 1.5419, "step": 927640 }, { "epoch": 80.02898550724638, "grad_norm": 0.2713773846626282, "learning_rate": 0.0001, "loss": 1.543, "step": 927696 }, { "epoch": 80.03381642512078, "grad_norm": 0.3853548467159271, "learning_rate": 0.0001, "loss": 1.5347, "step": 927752 }, { "epoch": 80.03864734299516, "grad_norm": 0.5714399814605713, "learning_rate": 0.0001, "loss": 1.5391, "step": 927808 }, { "epoch": 80.04347826086956, "grad_norm": 0.3214564919471741, "learning_rate": 0.0001, "loss": 1.5383, "step": 927864 }, { "epoch": 80.04830917874396, "grad_norm": 0.8356935381889343, "learning_rate": 0.0001, "loss": 1.5341, "step": 927920 }, { "epoch": 80.05314009661836, "grad_norm": 0.33444511890411377, "learning_rate": 0.0001, "loss": 1.5384, "step": 927976 }, { "epoch": 80.05797101449275, "grad_norm": 0.2631012499332428, "learning_rate": 0.0001, "loss": 1.5419, "step": 928032 }, { "epoch": 80.06280193236715, "grad_norm": 0.3375038802623749, "learning_rate": 0.0001, "loss": 1.5372, "step": 928088 }, { "epoch": 80.06763285024155, "grad_norm": 0.2888491153717041, "learning_rate": 0.0001, "loss": 1.5351, "step": 928144 }, { "epoch": 80.07246376811594, "grad_norm": 0.2596883177757263, "learning_rate": 0.0001, "loss": 1.5434, "step": 928200 }, { "epoch": 80.07729468599034, "grad_norm": 1.0182827711105347, "learning_rate": 0.0001, "loss": 1.5322, "step": 928256 }, { "epoch": 80.08212560386474, "grad_norm": 0.25316122174263, "learning_rate": 0.0001, "loss": 1.5349, "step": 928312 }, { "epoch": 80.08695652173913, "grad_norm": 0.2522345781326294, "learning_rate": 0.0001, "loss": 1.5369, "step": 928368 }, { "epoch": 80.09178743961353, "grad_norm": 0.33957964181900024, "learning_rate": 0.0001, "loss": 1.5364, "step": 928424 }, { "epoch": 80.09661835748793, "grad_norm": 0.21796543896198273, "learning_rate": 0.0001, "loss": 1.5341, "step": 928480 }, { "epoch": 80.10144927536231, "grad_norm": 0.3780405819416046, "learning_rate": 0.0001, "loss": 1.5391, "step": 928536 }, { "epoch": 80.10628019323671, "grad_norm": 0.3339178264141083, "learning_rate": 0.0001, "loss": 1.532, "step": 928592 }, { "epoch": 80.11111111111111, "grad_norm": 0.2681741416454315, "learning_rate": 0.0001, "loss": 1.5387, "step": 928648 }, { "epoch": 80.1159420289855, "grad_norm": 0.3113389015197754, "learning_rate": 0.0001, "loss": 1.5392, "step": 928704 }, { "epoch": 80.1207729468599, "grad_norm": 0.28852763772010803, "learning_rate": 0.0001, "loss": 1.535, "step": 928760 }, { "epoch": 80.1256038647343, "grad_norm": 0.3503413498401642, "learning_rate": 0.0001, "loss": 1.5429, "step": 928816 }, { "epoch": 80.1304347826087, "grad_norm": 0.27427294850349426, "learning_rate": 0.0001, "loss": 1.5386, "step": 928872 }, { "epoch": 80.13526570048309, "grad_norm": 0.37667685747146606, "learning_rate": 0.0001, "loss": 1.5319, "step": 928928 }, { "epoch": 80.14009661835749, "grad_norm": 0.3232547640800476, "learning_rate": 0.0001, "loss": 1.5422, "step": 928984 }, { "epoch": 80.14492753623189, "grad_norm": 0.33315569162368774, "learning_rate": 0.0001, "loss": 1.54, "step": 929040 }, { "epoch": 80.14975845410628, "grad_norm": 0.4545743763446808, "learning_rate": 0.0001, "loss": 1.5387, "step": 929096 }, { "epoch": 80.15458937198068, "grad_norm": 0.307144433259964, "learning_rate": 0.0001, "loss": 1.5413, "step": 929152 }, { "epoch": 80.15942028985508, "grad_norm": 0.36335840821266174, "learning_rate": 0.0001, "loss": 1.5342, "step": 929208 }, { "epoch": 80.16425120772946, "grad_norm": 0.24768990278244019, "learning_rate": 0.0001, "loss": 1.5354, "step": 929264 }, { "epoch": 80.16908212560386, "grad_norm": 0.317692369222641, "learning_rate": 0.0001, "loss": 1.5352, "step": 929320 }, { "epoch": 80.17391304347827, "grad_norm": 0.32017087936401367, "learning_rate": 0.0001, "loss": 1.5356, "step": 929376 }, { "epoch": 80.17874396135265, "grad_norm": 0.2681630253791809, "learning_rate": 0.0001, "loss": 1.5355, "step": 929432 }, { "epoch": 80.18357487922705, "grad_norm": 1.42319917678833, "learning_rate": 0.0001, "loss": 1.5426, "step": 929488 }, { "epoch": 80.18840579710145, "grad_norm": 0.3307180404663086, "learning_rate": 0.0001, "loss": 1.5425, "step": 929544 }, { "epoch": 80.19323671497584, "grad_norm": 0.4181622266769409, "learning_rate": 0.0001, "loss": 1.5366, "step": 929600 }, { "epoch": 80.19806763285024, "grad_norm": 0.3966149687767029, "learning_rate": 0.0001, "loss": 1.5407, "step": 929656 }, { "epoch": 80.20289855072464, "grad_norm": 0.2609166204929352, "learning_rate": 0.0001, "loss": 1.5391, "step": 929712 }, { "epoch": 80.20772946859903, "grad_norm": 0.2752124071121216, "learning_rate": 0.0001, "loss": 1.5393, "step": 929768 }, { "epoch": 80.21256038647343, "grad_norm": 0.2586921155452728, "learning_rate": 0.0001, "loss": 1.5435, "step": 929824 }, { "epoch": 80.21739130434783, "grad_norm": 0.9370951652526855, "learning_rate": 0.0001, "loss": 1.5289, "step": 929880 }, { "epoch": 80.22222222222223, "grad_norm": 0.425060898065567, "learning_rate": 0.0001, "loss": 1.5379, "step": 929936 }, { "epoch": 80.22705314009661, "grad_norm": 0.35000526905059814, "learning_rate": 0.0001, "loss": 1.5367, "step": 929992 }, { "epoch": 80.23188405797102, "grad_norm": 0.2834046483039856, "learning_rate": 0.0001, "loss": 1.5427, "step": 930048 }, { "epoch": 80.23671497584542, "grad_norm": 0.4054237902164459, "learning_rate": 0.0001, "loss": 1.5449, "step": 930104 }, { "epoch": 80.2415458937198, "grad_norm": 0.29351720213890076, "learning_rate": 0.0001, "loss": 1.5405, "step": 930160 }, { "epoch": 80.2463768115942, "grad_norm": 0.393742173910141, "learning_rate": 0.0001, "loss": 1.534, "step": 930216 }, { "epoch": 80.2512077294686, "grad_norm": 0.26636964082717896, "learning_rate": 0.0001, "loss": 1.5287, "step": 930272 }, { "epoch": 80.25603864734299, "grad_norm": 0.2934848964214325, "learning_rate": 0.0001, "loss": 1.5363, "step": 930328 }, { "epoch": 80.26086956521739, "grad_norm": 9.762383460998535, "learning_rate": 0.0001, "loss": 1.5385, "step": 930384 }, { "epoch": 80.26570048309179, "grad_norm": 0.3294130861759186, "learning_rate": 0.0001, "loss": 1.538, "step": 930440 }, { "epoch": 80.27053140096618, "grad_norm": 0.287813663482666, "learning_rate": 0.0001, "loss": 1.534, "step": 930496 }, { "epoch": 80.27536231884058, "grad_norm": 0.4863489866256714, "learning_rate": 0.0001, "loss": 1.5382, "step": 930552 }, { "epoch": 80.28019323671498, "grad_norm": 0.3204800486564636, "learning_rate": 0.0001, "loss": 1.5383, "step": 930608 }, { "epoch": 80.28502415458937, "grad_norm": 0.30556777119636536, "learning_rate": 0.0001, "loss": 1.538, "step": 930664 }, { "epoch": 80.28985507246377, "grad_norm": 5.5613179206848145, "learning_rate": 0.0001, "loss": 1.5358, "step": 930720 }, { "epoch": 80.29468599033817, "grad_norm": 0.29572346806526184, "learning_rate": 0.0001, "loss": 1.5335, "step": 930776 }, { "epoch": 80.29951690821257, "grad_norm": 0.35213226079940796, "learning_rate": 0.0001, "loss": 1.5362, "step": 930832 }, { "epoch": 80.30434782608695, "grad_norm": 1.0043221712112427, "learning_rate": 0.0001, "loss": 1.5372, "step": 930888 }, { "epoch": 80.30917874396135, "grad_norm": 0.29177939891815186, "learning_rate": 0.0001, "loss": 1.5336, "step": 930944 }, { "epoch": 80.31400966183575, "grad_norm": 0.42679721117019653, "learning_rate": 0.0001, "loss": 1.5445, "step": 931000 }, { "epoch": 80.31884057971014, "grad_norm": 0.4907686412334442, "learning_rate": 0.0001, "loss": 1.5363, "step": 931056 }, { "epoch": 80.32367149758454, "grad_norm": 0.27431967854499817, "learning_rate": 0.0001, "loss": 1.5367, "step": 931112 }, { "epoch": 80.32850241545894, "grad_norm": 0.3290030360221863, "learning_rate": 0.0001, "loss": 1.539, "step": 931168 }, { "epoch": 80.33333333333333, "grad_norm": 0.5224719047546387, "learning_rate": 0.0001, "loss": 1.5391, "step": 931224 }, { "epoch": 80.33816425120773, "grad_norm": 0.3125714063644409, "learning_rate": 0.0001, "loss": 1.5377, "step": 931280 }, { "epoch": 80.34299516908213, "grad_norm": 0.3669188320636749, "learning_rate": 0.0001, "loss": 1.5437, "step": 931336 }, { "epoch": 80.34782608695652, "grad_norm": 0.3034742474555969, "learning_rate": 0.0001, "loss": 1.5351, "step": 931392 }, { "epoch": 80.35265700483092, "grad_norm": 0.3150305449962616, "learning_rate": 0.0001, "loss": 1.5371, "step": 931448 }, { "epoch": 80.35748792270532, "grad_norm": 0.3134607672691345, "learning_rate": 0.0001, "loss": 1.5378, "step": 931504 }, { "epoch": 80.3623188405797, "grad_norm": 1.020257830619812, "learning_rate": 0.0001, "loss": 1.5409, "step": 931560 }, { "epoch": 80.3671497584541, "grad_norm": 0.6028734445571899, "learning_rate": 0.0001, "loss": 1.5369, "step": 931616 }, { "epoch": 80.3719806763285, "grad_norm": 1.7890183925628662, "learning_rate": 0.0001, "loss": 1.536, "step": 931672 }, { "epoch": 80.3768115942029, "grad_norm": 17.454708099365234, "learning_rate": 0.0001, "loss": 1.5369, "step": 931728 }, { "epoch": 80.38164251207729, "grad_norm": 0.30018430948257446, "learning_rate": 0.0001, "loss": 1.5416, "step": 931784 }, { "epoch": 80.38647342995169, "grad_norm": 0.5617804527282715, "learning_rate": 0.0001, "loss": 1.5434, "step": 931840 }, { "epoch": 80.3913043478261, "grad_norm": 0.6517516374588013, "learning_rate": 0.0001, "loss": 1.5367, "step": 931896 }, { "epoch": 80.39613526570048, "grad_norm": 1.2323055267333984, "learning_rate": 0.0001, "loss": 1.5359, "step": 931952 }, { "epoch": 80.40096618357488, "grad_norm": 0.34958529472351074, "learning_rate": 0.0001, "loss": 1.5465, "step": 932008 }, { "epoch": 80.40579710144928, "grad_norm": 0.6971215009689331, "learning_rate": 0.0001, "loss": 1.5374, "step": 932064 }, { "epoch": 80.41062801932367, "grad_norm": 0.302078515291214, "learning_rate": 0.0001, "loss": 1.5421, "step": 932120 }, { "epoch": 80.41545893719807, "grad_norm": 0.24417105317115784, "learning_rate": 0.0001, "loss": 1.5403, "step": 932176 }, { "epoch": 80.42028985507247, "grad_norm": 0.24878594279289246, "learning_rate": 0.0001, "loss": 1.5339, "step": 932232 }, { "epoch": 80.42512077294685, "grad_norm": 0.3253902792930603, "learning_rate": 0.0001, "loss": 1.5385, "step": 932288 }, { "epoch": 80.42995169082126, "grad_norm": 0.34174928069114685, "learning_rate": 0.0001, "loss": 1.5421, "step": 932344 }, { "epoch": 80.43478260869566, "grad_norm": 0.3925272822380066, "learning_rate": 0.0001, "loss": 1.5394, "step": 932400 }, { "epoch": 80.43961352657004, "grad_norm": 1.071459174156189, "learning_rate": 0.0001, "loss": 1.5416, "step": 932456 }, { "epoch": 80.44444444444444, "grad_norm": 0.3015718460083008, "learning_rate": 0.0001, "loss": 1.5357, "step": 932512 }, { "epoch": 80.44927536231884, "grad_norm": 0.2740243375301361, "learning_rate": 0.0001, "loss": 1.5435, "step": 932568 }, { "epoch": 80.45410628019323, "grad_norm": 1.0218946933746338, "learning_rate": 0.0001, "loss": 1.5448, "step": 932624 }, { "epoch": 80.45893719806763, "grad_norm": 0.2704409062862396, "learning_rate": 0.0001, "loss": 1.5375, "step": 932680 }, { "epoch": 80.46376811594203, "grad_norm": 7.074089527130127, "learning_rate": 0.0001, "loss": 1.54, "step": 932736 }, { "epoch": 80.46859903381643, "grad_norm": 0.3312263488769531, "learning_rate": 0.0001, "loss": 1.5312, "step": 932792 }, { "epoch": 80.47342995169082, "grad_norm": 0.4106878936290741, "learning_rate": 0.0001, "loss": 1.5374, "step": 932848 }, { "epoch": 80.47826086956522, "grad_norm": 0.34686920046806335, "learning_rate": 0.0001, "loss": 1.5306, "step": 932904 }, { "epoch": 80.48309178743962, "grad_norm": 0.2825300693511963, "learning_rate": 0.0001, "loss": 1.5386, "step": 932960 }, { "epoch": 80.487922705314, "grad_norm": 0.5066402554512024, "learning_rate": 0.0001, "loss": 1.5421, "step": 933016 }, { "epoch": 80.4927536231884, "grad_norm": 0.3182002305984497, "learning_rate": 0.0001, "loss": 1.5343, "step": 933072 }, { "epoch": 80.4975845410628, "grad_norm": 0.328473836183548, "learning_rate": 0.0001, "loss": 1.544, "step": 933128 }, { "epoch": 80.5024154589372, "grad_norm": 0.2629919648170471, "learning_rate": 0.0001, "loss": 1.5368, "step": 933184 }, { "epoch": 80.5072463768116, "grad_norm": 1.9220712184906006, "learning_rate": 0.0001, "loss": 1.5342, "step": 933240 }, { "epoch": 80.512077294686, "grad_norm": 0.2778517007827759, "learning_rate": 0.0001, "loss": 1.5355, "step": 933296 }, { "epoch": 80.51690821256038, "grad_norm": 8.786460876464844, "learning_rate": 0.0001, "loss": 1.532, "step": 933352 }, { "epoch": 80.52173913043478, "grad_norm": 0.25045129656791687, "learning_rate": 0.0001, "loss": 1.537, "step": 933408 }, { "epoch": 80.52657004830918, "grad_norm": 0.24173536896705627, "learning_rate": 0.0001, "loss": 1.5469, "step": 933464 }, { "epoch": 80.53140096618357, "grad_norm": 0.3440306782722473, "learning_rate": 0.0001, "loss": 1.5288, "step": 933520 }, { "epoch": 80.53623188405797, "grad_norm": 0.2933304011821747, "learning_rate": 0.0001, "loss": 1.5391, "step": 933576 }, { "epoch": 80.54106280193237, "grad_norm": 0.354073166847229, "learning_rate": 0.0001, "loss": 1.5445, "step": 933632 }, { "epoch": 80.54589371980677, "grad_norm": 0.3196357488632202, "learning_rate": 0.0001, "loss": 1.5354, "step": 933688 }, { "epoch": 80.55072463768116, "grad_norm": 0.8575426340103149, "learning_rate": 0.0001, "loss": 1.5428, "step": 933744 }, { "epoch": 80.55555555555556, "grad_norm": 0.29591476917266846, "learning_rate": 0.0001, "loss": 1.536, "step": 933800 }, { "epoch": 80.56038647342996, "grad_norm": 0.31455472111701965, "learning_rate": 0.0001, "loss": 1.5406, "step": 933856 }, { "epoch": 80.56521739130434, "grad_norm": 0.27774450182914734, "learning_rate": 0.0001, "loss": 1.5384, "step": 933912 }, { "epoch": 80.57004830917874, "grad_norm": 0.35974767804145813, "learning_rate": 0.0001, "loss": 1.5407, "step": 933968 }, { "epoch": 80.57487922705315, "grad_norm": 0.37977123260498047, "learning_rate": 0.0001, "loss": 1.5437, "step": 934024 }, { "epoch": 80.57971014492753, "grad_norm": 0.32496705651283264, "learning_rate": 0.0001, "loss": 1.5366, "step": 934080 }, { "epoch": 80.58454106280193, "grad_norm": 0.8308771848678589, "learning_rate": 0.0001, "loss": 1.5426, "step": 934136 }, { "epoch": 80.58937198067633, "grad_norm": 0.4355931878089905, "learning_rate": 0.0001, "loss": 1.5444, "step": 934192 }, { "epoch": 80.59420289855072, "grad_norm": 0.2425985038280487, "learning_rate": 0.0001, "loss": 1.5387, "step": 934248 }, { "epoch": 80.59903381642512, "grad_norm": 23.6328182220459, "learning_rate": 0.0001, "loss": 1.5348, "step": 934304 }, { "epoch": 80.60386473429952, "grad_norm": 0.286245197057724, "learning_rate": 0.0001, "loss": 1.5438, "step": 934360 }, { "epoch": 80.6086956521739, "grad_norm": 0.879637598991394, "learning_rate": 0.0001, "loss": 1.5315, "step": 934416 }, { "epoch": 80.61352657004831, "grad_norm": 0.6241676211357117, "learning_rate": 0.0001, "loss": 1.535, "step": 934472 }, { "epoch": 80.61835748792271, "grad_norm": 0.39013540744781494, "learning_rate": 0.0001, "loss": 1.5373, "step": 934528 }, { "epoch": 80.6231884057971, "grad_norm": 8.820347785949707, "learning_rate": 0.0001, "loss": 1.5333, "step": 934584 }, { "epoch": 80.6280193236715, "grad_norm": 0.2379237413406372, "learning_rate": 0.0001, "loss": 1.5422, "step": 934640 }, { "epoch": 80.6328502415459, "grad_norm": 0.35896041989326477, "learning_rate": 0.0001, "loss": 1.541, "step": 934696 }, { "epoch": 80.6376811594203, "grad_norm": 0.2906648516654968, "learning_rate": 0.0001, "loss": 1.5421, "step": 934752 }, { "epoch": 80.64251207729468, "grad_norm": 0.3905352056026459, "learning_rate": 0.0001, "loss": 1.547, "step": 934808 }, { "epoch": 80.64734299516908, "grad_norm": 7.828787803649902, "learning_rate": 0.0001, "loss": 1.5329, "step": 934864 }, { "epoch": 80.65217391304348, "grad_norm": 0.30419284105300903, "learning_rate": 0.0001, "loss": 1.5358, "step": 934920 }, { "epoch": 80.65700483091787, "grad_norm": 0.43782150745391846, "learning_rate": 0.0001, "loss": 1.5388, "step": 934976 }, { "epoch": 80.66183574879227, "grad_norm": 0.2734384834766388, "learning_rate": 0.0001, "loss": 1.537, "step": 935032 }, { "epoch": 80.66666666666667, "grad_norm": 0.3591497540473938, "learning_rate": 0.0001, "loss": 1.5337, "step": 935088 }, { "epoch": 80.67149758454106, "grad_norm": 19.108728408813477, "learning_rate": 0.0001, "loss": 1.5434, "step": 935144 }, { "epoch": 80.67632850241546, "grad_norm": 0.2864193916320801, "learning_rate": 0.0001, "loss": 1.5385, "step": 935200 }, { "epoch": 80.68115942028986, "grad_norm": 0.33435389399528503, "learning_rate": 0.0001, "loss": 1.5332, "step": 935256 }, { "epoch": 80.68599033816425, "grad_norm": 0.347965806722641, "learning_rate": 0.0001, "loss": 1.5357, "step": 935312 }, { "epoch": 80.69082125603865, "grad_norm": 0.29034146666526794, "learning_rate": 0.0001, "loss": 1.5457, "step": 935368 }, { "epoch": 80.69565217391305, "grad_norm": 0.3014856278896332, "learning_rate": 0.0001, "loss": 1.5433, "step": 935424 }, { "epoch": 80.70048309178743, "grad_norm": 0.2710544764995575, "learning_rate": 0.0001, "loss": 1.5408, "step": 935480 }, { "epoch": 80.70531400966183, "grad_norm": 0.4230332374572754, "learning_rate": 0.0001, "loss": 1.5455, "step": 935536 }, { "epoch": 80.71014492753623, "grad_norm": 0.30109813809394836, "learning_rate": 0.0001, "loss": 1.539, "step": 935592 }, { "epoch": 80.71497584541063, "grad_norm": 0.2792746424674988, "learning_rate": 0.0001, "loss": 1.5388, "step": 935648 }, { "epoch": 80.71980676328502, "grad_norm": 0.3233787417411804, "learning_rate": 0.0001, "loss": 1.5383, "step": 935704 }, { "epoch": 80.72463768115942, "grad_norm": 0.23955807089805603, "learning_rate": 0.0001, "loss": 1.541, "step": 935760 }, { "epoch": 80.72946859903382, "grad_norm": 0.28496840596199036, "learning_rate": 0.0001, "loss": 1.5377, "step": 935816 }, { "epoch": 80.73429951690821, "grad_norm": 0.2857365906238556, "learning_rate": 0.0001, "loss": 1.538, "step": 935872 }, { "epoch": 80.73913043478261, "grad_norm": 0.3087848424911499, "learning_rate": 0.0001, "loss": 1.54, "step": 935928 }, { "epoch": 80.74396135265701, "grad_norm": 2.694533109664917, "learning_rate": 0.0001, "loss": 1.5428, "step": 935984 }, { "epoch": 80.7487922705314, "grad_norm": 9.008414268493652, "learning_rate": 0.0001, "loss": 1.5406, "step": 936040 }, { "epoch": 80.7536231884058, "grad_norm": 0.35340866446495056, "learning_rate": 0.0001, "loss": 1.5346, "step": 936096 }, { "epoch": 80.7584541062802, "grad_norm": 4.402353763580322, "learning_rate": 0.0001, "loss": 1.535, "step": 936152 }, { "epoch": 80.76328502415458, "grad_norm": 0.33815890550613403, "learning_rate": 0.0001, "loss": 1.5425, "step": 936208 }, { "epoch": 80.76811594202898, "grad_norm": 0.2744450867176056, "learning_rate": 0.0001, "loss": 1.5392, "step": 936264 }, { "epoch": 80.77294685990339, "grad_norm": 0.4267714023590088, "learning_rate": 0.0001, "loss": 1.5385, "step": 936320 }, { "epoch": 80.77777777777777, "grad_norm": 1.4421250820159912, "learning_rate": 0.0001, "loss": 1.5462, "step": 936376 }, { "epoch": 80.78260869565217, "grad_norm": 0.46373382210731506, "learning_rate": 0.0001, "loss": 1.5403, "step": 936432 }, { "epoch": 80.78743961352657, "grad_norm": 0.29190388321876526, "learning_rate": 0.0001, "loss": 1.5399, "step": 936488 }, { "epoch": 80.79227053140096, "grad_norm": 5.404422760009766, "learning_rate": 0.0001, "loss": 1.5447, "step": 936544 }, { "epoch": 80.79710144927536, "grad_norm": 0.25130224227905273, "learning_rate": 0.0001, "loss": 1.5443, "step": 936600 }, { "epoch": 80.80193236714976, "grad_norm": 0.27810773253440857, "learning_rate": 0.0001, "loss": 1.5407, "step": 936656 }, { "epoch": 80.80676328502416, "grad_norm": 18.420507431030273, "learning_rate": 0.0001, "loss": 1.5407, "step": 936712 }, { "epoch": 80.81159420289855, "grad_norm": 0.2580792009830475, "learning_rate": 0.0001, "loss": 1.5413, "step": 936768 }, { "epoch": 80.81642512077295, "grad_norm": 30.252151489257812, "learning_rate": 0.0001, "loss": 1.5354, "step": 936824 }, { "epoch": 80.82125603864735, "grad_norm": 1.116931438446045, "learning_rate": 0.0001, "loss": 1.5388, "step": 936880 }, { "epoch": 80.82608695652173, "grad_norm": 0.5832847356796265, "learning_rate": 0.0001, "loss": 1.5347, "step": 936936 }, { "epoch": 80.83091787439614, "grad_norm": 0.2952418029308319, "learning_rate": 0.0001, "loss": 1.5405, "step": 936992 }, { "epoch": 80.83574879227054, "grad_norm": 0.24870319664478302, "learning_rate": 0.0001, "loss": 1.5391, "step": 937048 }, { "epoch": 80.84057971014492, "grad_norm": 0.25030583143234253, "learning_rate": 0.0001, "loss": 1.5454, "step": 937104 }, { "epoch": 80.84541062801932, "grad_norm": 0.3445553779602051, "learning_rate": 0.0001, "loss": 1.5379, "step": 937160 }, { "epoch": 80.85024154589372, "grad_norm": 0.2596765160560608, "learning_rate": 0.0001, "loss": 1.5497, "step": 937216 }, { "epoch": 80.85507246376811, "grad_norm": 0.31241339445114136, "learning_rate": 0.0001, "loss": 1.535, "step": 937272 }, { "epoch": 80.85990338164251, "grad_norm": 0.29034510254859924, "learning_rate": 0.0001, "loss": 1.5375, "step": 937328 }, { "epoch": 80.86473429951691, "grad_norm": 0.2656581699848175, "learning_rate": 0.0001, "loss": 1.5363, "step": 937384 }, { "epoch": 80.8695652173913, "grad_norm": 2.09741473197937, "learning_rate": 0.0001, "loss": 1.538, "step": 937440 }, { "epoch": 80.8743961352657, "grad_norm": 0.2853366732597351, "learning_rate": 0.0001, "loss": 1.5413, "step": 937496 }, { "epoch": 80.8792270531401, "grad_norm": 0.3900660276412964, "learning_rate": 0.0001, "loss": 1.5468, "step": 937552 }, { "epoch": 80.8840579710145, "grad_norm": 0.26375922560691833, "learning_rate": 0.0001, "loss": 1.543, "step": 937608 }, { "epoch": 80.88888888888889, "grad_norm": 0.3695529103279114, "learning_rate": 0.0001, "loss": 1.5409, "step": 937664 }, { "epoch": 80.89371980676329, "grad_norm": 0.3016757071018219, "learning_rate": 0.0001, "loss": 1.5426, "step": 937720 }, { "epoch": 80.89855072463769, "grad_norm": 0.27877894043922424, "learning_rate": 0.0001, "loss": 1.5378, "step": 937776 }, { "epoch": 80.90338164251207, "grad_norm": 0.3997453451156616, "learning_rate": 0.0001, "loss": 1.5418, "step": 937832 }, { "epoch": 80.90821256038647, "grad_norm": 0.31168627738952637, "learning_rate": 0.0001, "loss": 1.5394, "step": 937888 }, { "epoch": 80.91304347826087, "grad_norm": 0.47910061478614807, "learning_rate": 0.0001, "loss": 1.546, "step": 937944 }, { "epoch": 80.91787439613526, "grad_norm": 0.48696303367614746, "learning_rate": 0.0001, "loss": 1.5463, "step": 938000 }, { "epoch": 80.92270531400966, "grad_norm": 1.0626264810562134, "learning_rate": 0.0001, "loss": 1.5405, "step": 938056 }, { "epoch": 80.92753623188406, "grad_norm": 1.663933515548706, "learning_rate": 0.0001, "loss": 1.5395, "step": 938112 }, { "epoch": 80.93236714975845, "grad_norm": 0.2551517188549042, "learning_rate": 0.0001, "loss": 1.5401, "step": 938168 }, { "epoch": 80.93719806763285, "grad_norm": 0.8204693794250488, "learning_rate": 0.0001, "loss": 1.5392, "step": 938224 }, { "epoch": 80.94202898550725, "grad_norm": 0.32150766253471375, "learning_rate": 0.0001, "loss": 1.5382, "step": 938280 }, { "epoch": 80.94685990338164, "grad_norm": 0.4165453314781189, "learning_rate": 0.0001, "loss": 1.536, "step": 938336 }, { "epoch": 80.95169082125604, "grad_norm": 0.2641509175300598, "learning_rate": 0.0001, "loss": 1.5333, "step": 938392 }, { "epoch": 80.95652173913044, "grad_norm": 0.37112560868263245, "learning_rate": 0.0001, "loss": 1.5415, "step": 938448 }, { "epoch": 80.96135265700484, "grad_norm": 2.41756272315979, "learning_rate": 0.0001, "loss": 1.5316, "step": 938504 }, { "epoch": 80.96618357487922, "grad_norm": 0.26826775074005127, "learning_rate": 0.0001, "loss": 1.5401, "step": 938560 }, { "epoch": 80.97101449275362, "grad_norm": 0.32712602615356445, "learning_rate": 0.0001, "loss": 1.5355, "step": 938616 }, { "epoch": 80.97584541062803, "grad_norm": 0.3010948598384857, "learning_rate": 0.0001, "loss": 1.5425, "step": 938672 }, { "epoch": 80.98067632850241, "grad_norm": 0.27144619822502136, "learning_rate": 0.0001, "loss": 1.5382, "step": 938728 }, { "epoch": 80.98550724637681, "grad_norm": 0.5947315692901611, "learning_rate": 0.0001, "loss": 1.5354, "step": 938784 }, { "epoch": 80.99033816425121, "grad_norm": 0.3451215326786041, "learning_rate": 0.0001, "loss": 1.5393, "step": 938840 }, { "epoch": 80.9951690821256, "grad_norm": 0.40437331795692444, "learning_rate": 0.0001, "loss": 1.5378, "step": 938896 }, { "epoch": 81.0, "grad_norm": 0.3804194927215576, "learning_rate": 0.0001, "loss": 1.5414, "step": 938952 }, { "epoch": 81.0048309178744, "grad_norm": 0.3153477609157562, "learning_rate": 0.0001, "loss": 1.5383, "step": 939008 }, { "epoch": 81.00966183574879, "grad_norm": 0.26870089769363403, "learning_rate": 0.0001, "loss": 1.5404, "step": 939064 }, { "epoch": 81.01449275362319, "grad_norm": 0.2774100601673126, "learning_rate": 0.0001, "loss": 1.5277, "step": 939120 }, { "epoch": 81.01932367149759, "grad_norm": 0.6271528005599976, "learning_rate": 0.0001, "loss": 1.5375, "step": 939176 }, { "epoch": 81.02415458937197, "grad_norm": 0.42881977558135986, "learning_rate": 0.0001, "loss": 1.5349, "step": 939232 }, { "epoch": 81.02898550724638, "grad_norm": 0.3624206483364105, "learning_rate": 0.0001, "loss": 1.5385, "step": 939288 }, { "epoch": 81.03381642512078, "grad_norm": 0.2815920114517212, "learning_rate": 0.0001, "loss": 1.5301, "step": 939344 }, { "epoch": 81.03864734299516, "grad_norm": 0.6429088115692139, "learning_rate": 0.0001, "loss": 1.5337, "step": 939400 }, { "epoch": 81.04347826086956, "grad_norm": 0.36016765236854553, "learning_rate": 0.0001, "loss": 1.5315, "step": 939456 }, { "epoch": 81.04830917874396, "grad_norm": 0.4022619426250458, "learning_rate": 0.0001, "loss": 1.5342, "step": 939512 }, { "epoch": 81.05314009661836, "grad_norm": 3.701786518096924, "learning_rate": 0.0001, "loss": 1.5352, "step": 939568 }, { "epoch": 81.05797101449275, "grad_norm": 0.8266977667808533, "learning_rate": 0.0001, "loss": 1.533, "step": 939624 }, { "epoch": 81.06280193236715, "grad_norm": 0.3094256520271301, "learning_rate": 0.0001, "loss": 1.5304, "step": 939680 }, { "epoch": 81.06763285024155, "grad_norm": 1.2280044555664062, "learning_rate": 0.0001, "loss": 1.5352, "step": 939736 }, { "epoch": 81.07246376811594, "grad_norm": 0.26501166820526123, "learning_rate": 0.0001, "loss": 1.5444, "step": 939792 }, { "epoch": 81.07729468599034, "grad_norm": 0.36712583899497986, "learning_rate": 0.0001, "loss": 1.5418, "step": 939848 }, { "epoch": 81.08212560386474, "grad_norm": 0.25851720571517944, "learning_rate": 0.0001, "loss": 1.5367, "step": 939904 }, { "epoch": 81.08695652173913, "grad_norm": 1.093078374862671, "learning_rate": 0.0001, "loss": 1.5428, "step": 939960 }, { "epoch": 81.09178743961353, "grad_norm": 0.2626625895500183, "learning_rate": 0.0001, "loss": 1.5324, "step": 940016 }, { "epoch": 81.09661835748793, "grad_norm": 0.5779732465744019, "learning_rate": 0.0001, "loss": 1.5353, "step": 940072 }, { "epoch": 81.10144927536231, "grad_norm": 0.24168513715267181, "learning_rate": 0.0001, "loss": 1.5382, "step": 940128 }, { "epoch": 81.10628019323671, "grad_norm": 0.2468889355659485, "learning_rate": 0.0001, "loss": 1.5372, "step": 940184 }, { "epoch": 81.11111111111111, "grad_norm": 0.35534197092056274, "learning_rate": 0.0001, "loss": 1.5337, "step": 940240 }, { "epoch": 81.1159420289855, "grad_norm": 1.6470366716384888, "learning_rate": 0.0001, "loss": 1.5328, "step": 940296 }, { "epoch": 81.1207729468599, "grad_norm": 1.0884376764297485, "learning_rate": 0.0001, "loss": 1.5393, "step": 940352 }, { "epoch": 81.1256038647343, "grad_norm": 1.002964973449707, "learning_rate": 0.0001, "loss": 1.5382, "step": 940408 }, { "epoch": 81.1304347826087, "grad_norm": 0.29442206025123596, "learning_rate": 0.0001, "loss": 1.5342, "step": 940464 }, { "epoch": 81.13526570048309, "grad_norm": 0.31773871183395386, "learning_rate": 0.0001, "loss": 1.5413, "step": 940520 }, { "epoch": 81.14009661835749, "grad_norm": 0.2833757996559143, "learning_rate": 0.0001, "loss": 1.5338, "step": 940576 }, { "epoch": 81.14492753623189, "grad_norm": 0.39267733693122864, "learning_rate": 0.0001, "loss": 1.5381, "step": 940632 }, { "epoch": 81.14975845410628, "grad_norm": 0.33917567133903503, "learning_rate": 0.0001, "loss": 1.5363, "step": 940688 }, { "epoch": 81.15458937198068, "grad_norm": 6.416690349578857, "learning_rate": 0.0001, "loss": 1.5366, "step": 940744 }, { "epoch": 81.15942028985508, "grad_norm": 6.2120537757873535, "learning_rate": 0.0001, "loss": 1.5334, "step": 940800 }, { "epoch": 81.16425120772946, "grad_norm": 0.30875730514526367, "learning_rate": 0.0001, "loss": 1.5337, "step": 940856 }, { "epoch": 81.16908212560386, "grad_norm": 0.759405791759491, "learning_rate": 0.0001, "loss": 1.5296, "step": 940912 }, { "epoch": 81.17391304347827, "grad_norm": 0.3875877261161804, "learning_rate": 0.0001, "loss": 1.5348, "step": 940968 }, { "epoch": 81.17874396135265, "grad_norm": 0.2555858790874481, "learning_rate": 0.0001, "loss": 1.5346, "step": 941024 }, { "epoch": 81.18357487922705, "grad_norm": 0.3189179003238678, "learning_rate": 0.0001, "loss": 1.5279, "step": 941080 }, { "epoch": 81.18840579710145, "grad_norm": 0.3514600694179535, "learning_rate": 0.0001, "loss": 1.5358, "step": 941136 }, { "epoch": 81.19323671497584, "grad_norm": 0.29882320761680603, "learning_rate": 0.0001, "loss": 1.5363, "step": 941192 }, { "epoch": 81.19806763285024, "grad_norm": 0.2456207424402237, "learning_rate": 0.0001, "loss": 1.5394, "step": 941248 }, { "epoch": 81.20289855072464, "grad_norm": 4.724773406982422, "learning_rate": 0.0001, "loss": 1.5424, "step": 941304 }, { "epoch": 81.20772946859903, "grad_norm": 0.6116238236427307, "learning_rate": 0.0001, "loss": 1.5344, "step": 941360 }, { "epoch": 81.21256038647343, "grad_norm": 0.27025172114372253, "learning_rate": 0.0001, "loss": 1.5363, "step": 941416 }, { "epoch": 81.21739130434783, "grad_norm": 0.3743244707584381, "learning_rate": 0.0001, "loss": 1.5376, "step": 941472 }, { "epoch": 81.22222222222223, "grad_norm": 0.2685745358467102, "learning_rate": 0.0001, "loss": 1.5337, "step": 941528 }, { "epoch": 81.22705314009661, "grad_norm": 0.2828514873981476, "learning_rate": 0.0001, "loss": 1.5387, "step": 941584 }, { "epoch": 81.23188405797102, "grad_norm": 0.9745945930480957, "learning_rate": 0.0001, "loss": 1.5434, "step": 941640 }, { "epoch": 81.23671497584542, "grad_norm": 0.24535267055034637, "learning_rate": 0.0001, "loss": 1.536, "step": 941696 }, { "epoch": 81.2415458937198, "grad_norm": 0.36934512853622437, "learning_rate": 0.0001, "loss": 1.5363, "step": 941752 }, { "epoch": 81.2463768115942, "grad_norm": 0.28268179297447205, "learning_rate": 0.0001, "loss": 1.536, "step": 941808 }, { "epoch": 81.2512077294686, "grad_norm": 1.2822853326797485, "learning_rate": 0.0001, "loss": 1.5371, "step": 941864 }, { "epoch": 81.25603864734299, "grad_norm": 0.31416213512420654, "learning_rate": 0.0001, "loss": 1.5418, "step": 941920 }, { "epoch": 81.26086956521739, "grad_norm": 0.2688729465007782, "learning_rate": 0.0001, "loss": 1.5368, "step": 941976 }, { "epoch": 81.26570048309179, "grad_norm": 0.3567007780075073, "learning_rate": 0.0001, "loss": 1.5353, "step": 942032 }, { "epoch": 81.27053140096618, "grad_norm": 2.4475369453430176, "learning_rate": 0.0001, "loss": 1.5356, "step": 942088 }, { "epoch": 81.27536231884058, "grad_norm": 0.3381841480731964, "learning_rate": 0.0001, "loss": 1.5384, "step": 942144 }, { "epoch": 81.28019323671498, "grad_norm": 0.2791370153427124, "learning_rate": 0.0001, "loss": 1.5363, "step": 942200 }, { "epoch": 81.28502415458937, "grad_norm": 0.2938128411769867, "learning_rate": 0.0001, "loss": 1.5446, "step": 942256 }, { "epoch": 81.28985507246377, "grad_norm": 0.3091854453086853, "learning_rate": 0.0001, "loss": 1.5393, "step": 942312 }, { "epoch": 81.29468599033817, "grad_norm": 0.3311636745929718, "learning_rate": 0.0001, "loss": 1.5345, "step": 942368 }, { "epoch": 81.29951690821257, "grad_norm": 0.9158441424369812, "learning_rate": 0.0001, "loss": 1.5348, "step": 942424 }, { "epoch": 81.30434782608695, "grad_norm": 2.4922685623168945, "learning_rate": 0.0001, "loss": 1.5347, "step": 942480 }, { "epoch": 81.30917874396135, "grad_norm": 0.4703826308250427, "learning_rate": 0.0001, "loss": 1.5355, "step": 942536 }, { "epoch": 81.31400966183575, "grad_norm": 0.30327874422073364, "learning_rate": 0.0001, "loss": 1.5336, "step": 942592 }, { "epoch": 81.31884057971014, "grad_norm": 0.4644230306148529, "learning_rate": 0.0001, "loss": 1.5348, "step": 942648 }, { "epoch": 81.32367149758454, "grad_norm": 0.3866393566131592, "learning_rate": 0.0001, "loss": 1.535, "step": 942704 }, { "epoch": 81.32850241545894, "grad_norm": 0.2604726254940033, "learning_rate": 0.0001, "loss": 1.5333, "step": 942760 }, { "epoch": 81.33333333333333, "grad_norm": 0.2914869487285614, "learning_rate": 0.0001, "loss": 1.5332, "step": 942816 }, { "epoch": 81.33816425120773, "grad_norm": 0.8672446012496948, "learning_rate": 0.0001, "loss": 1.5321, "step": 942872 }, { "epoch": 81.34299516908213, "grad_norm": 0.3445504605770111, "learning_rate": 0.0001, "loss": 1.5398, "step": 942928 }, { "epoch": 81.34782608695652, "grad_norm": 0.3543800711631775, "learning_rate": 0.0001, "loss": 1.5349, "step": 942984 }, { "epoch": 81.35265700483092, "grad_norm": 0.3146056532859802, "learning_rate": 0.0001, "loss": 1.5408, "step": 943040 }, { "epoch": 81.35748792270532, "grad_norm": 0.2891468107700348, "learning_rate": 0.0001, "loss": 1.5353, "step": 943096 }, { "epoch": 81.3623188405797, "grad_norm": 0.23993074893951416, "learning_rate": 0.0001, "loss": 1.5374, "step": 943152 }, { "epoch": 81.3671497584541, "grad_norm": 0.38183143734931946, "learning_rate": 0.0001, "loss": 1.5356, "step": 943208 }, { "epoch": 81.3719806763285, "grad_norm": 0.3656521737575531, "learning_rate": 0.0001, "loss": 1.5406, "step": 943264 }, { "epoch": 81.3768115942029, "grad_norm": 0.39872345328330994, "learning_rate": 0.0001, "loss": 1.5389, "step": 943320 }, { "epoch": 81.38164251207729, "grad_norm": 0.269528865814209, "learning_rate": 0.0001, "loss": 1.536, "step": 943376 }, { "epoch": 81.38647342995169, "grad_norm": 0.2948266267776489, "learning_rate": 0.0001, "loss": 1.5358, "step": 943432 }, { "epoch": 81.3913043478261, "grad_norm": 0.26772236824035645, "learning_rate": 0.0001, "loss": 1.5362, "step": 943488 }, { "epoch": 81.39613526570048, "grad_norm": 0.3171555995941162, "learning_rate": 0.0001, "loss": 1.5373, "step": 943544 }, { "epoch": 81.40096618357488, "grad_norm": 0.419356107711792, "learning_rate": 0.0001, "loss": 1.5391, "step": 943600 }, { "epoch": 81.40579710144928, "grad_norm": 0.2949814796447754, "learning_rate": 0.0001, "loss": 1.5356, "step": 943656 }, { "epoch": 81.41062801932367, "grad_norm": 0.2600535452365875, "learning_rate": 0.0001, "loss": 1.5382, "step": 943712 }, { "epoch": 81.41545893719807, "grad_norm": 0.3172491788864136, "learning_rate": 0.0001, "loss": 1.5356, "step": 943768 }, { "epoch": 81.42028985507247, "grad_norm": 9.177350044250488, "learning_rate": 0.0001, "loss": 1.539, "step": 943824 }, { "epoch": 81.42512077294685, "grad_norm": 0.3137019872665405, "learning_rate": 0.0001, "loss": 1.5394, "step": 943880 }, { "epoch": 81.42995169082126, "grad_norm": 3.6258294582366943, "learning_rate": 0.0001, "loss": 1.5328, "step": 943936 }, { "epoch": 81.43478260869566, "grad_norm": 0.2687394917011261, "learning_rate": 0.0001, "loss": 1.5389, "step": 943992 }, { "epoch": 81.43961352657004, "grad_norm": 0.25012099742889404, "learning_rate": 0.0001, "loss": 1.5382, "step": 944048 }, { "epoch": 81.44444444444444, "grad_norm": 0.4234636723995209, "learning_rate": 0.0001, "loss": 1.5398, "step": 944104 }, { "epoch": 81.44927536231884, "grad_norm": 0.30379003286361694, "learning_rate": 0.0001, "loss": 1.5427, "step": 944160 }, { "epoch": 81.45410628019323, "grad_norm": 0.28029686212539673, "learning_rate": 0.0001, "loss": 1.5407, "step": 944216 }, { "epoch": 81.45893719806763, "grad_norm": 0.35967057943344116, "learning_rate": 0.0001, "loss": 1.5429, "step": 944272 }, { "epoch": 81.46376811594203, "grad_norm": 0.4029168486595154, "learning_rate": 0.0001, "loss": 1.5322, "step": 944328 }, { "epoch": 81.46859903381643, "grad_norm": 0.41641491651535034, "learning_rate": 0.0001, "loss": 1.5433, "step": 944384 }, { "epoch": 81.47342995169082, "grad_norm": 22.670351028442383, "learning_rate": 0.0001, "loss": 1.5337, "step": 944440 }, { "epoch": 81.47826086956522, "grad_norm": 20.19307518005371, "learning_rate": 0.0001, "loss": 1.5401, "step": 944496 }, { "epoch": 81.48309178743962, "grad_norm": 0.2968663275241852, "learning_rate": 0.0001, "loss": 1.5307, "step": 944552 }, { "epoch": 81.487922705314, "grad_norm": 0.740466833114624, "learning_rate": 0.0001, "loss": 1.5303, "step": 944608 }, { "epoch": 81.4927536231884, "grad_norm": 0.39736470580101013, "learning_rate": 0.0001, "loss": 1.5386, "step": 944664 }, { "epoch": 81.4975845410628, "grad_norm": 0.28907132148742676, "learning_rate": 0.0001, "loss": 1.5357, "step": 944720 }, { "epoch": 81.5024154589372, "grad_norm": 0.3203314244747162, "learning_rate": 0.0001, "loss": 1.5377, "step": 944776 }, { "epoch": 81.5072463768116, "grad_norm": 0.266140341758728, "learning_rate": 0.0001, "loss": 1.5358, "step": 944832 }, { "epoch": 81.512077294686, "grad_norm": 2.9396939277648926, "learning_rate": 0.0001, "loss": 1.5346, "step": 944888 }, { "epoch": 81.51690821256038, "grad_norm": 0.8323559165000916, "learning_rate": 0.0001, "loss": 1.5342, "step": 944944 }, { "epoch": 81.52173913043478, "grad_norm": 0.3245454430580139, "learning_rate": 0.0001, "loss": 1.5359, "step": 945000 }, { "epoch": 81.52657004830918, "grad_norm": 0.2687116265296936, "learning_rate": 0.0001, "loss": 1.5399, "step": 945056 }, { "epoch": 81.53140096618357, "grad_norm": 0.33481839299201965, "learning_rate": 0.0001, "loss": 1.5387, "step": 945112 }, { "epoch": 81.53623188405797, "grad_norm": 0.2852904498577118, "learning_rate": 0.0001, "loss": 1.5332, "step": 945168 }, { "epoch": 81.54106280193237, "grad_norm": 4.513950824737549, "learning_rate": 0.0001, "loss": 1.5368, "step": 945224 }, { "epoch": 81.54589371980677, "grad_norm": 1.0793753862380981, "learning_rate": 0.0001, "loss": 1.5358, "step": 945280 }, { "epoch": 81.55072463768116, "grad_norm": 0.2868187725543976, "learning_rate": 0.0001, "loss": 1.5429, "step": 945336 }, { "epoch": 81.55555555555556, "grad_norm": 0.344978004693985, "learning_rate": 0.0001, "loss": 1.5355, "step": 945392 }, { "epoch": 81.56038647342996, "grad_norm": 0.332023024559021, "learning_rate": 0.0001, "loss": 1.5357, "step": 945448 }, { "epoch": 81.56521739130434, "grad_norm": 0.22974321246147156, "learning_rate": 0.0001, "loss": 1.5373, "step": 945504 }, { "epoch": 81.57004830917874, "grad_norm": 0.280367910861969, "learning_rate": 0.0001, "loss": 1.5367, "step": 945560 }, { "epoch": 81.57487922705315, "grad_norm": 0.3002293109893799, "learning_rate": 0.0001, "loss": 1.5377, "step": 945616 }, { "epoch": 81.57971014492753, "grad_norm": 0.2977185547351837, "learning_rate": 0.0001, "loss": 1.5463, "step": 945672 }, { "epoch": 81.58454106280193, "grad_norm": 0.3539254665374756, "learning_rate": 0.0001, "loss": 1.5399, "step": 945728 }, { "epoch": 81.58937198067633, "grad_norm": 0.2624005675315857, "learning_rate": 0.0001, "loss": 1.5411, "step": 945784 }, { "epoch": 81.59420289855072, "grad_norm": 0.3235623240470886, "learning_rate": 0.0001, "loss": 1.541, "step": 945840 }, { "epoch": 81.59903381642512, "grad_norm": 0.4195101261138916, "learning_rate": 0.0001, "loss": 1.5302, "step": 945896 }, { "epoch": 81.60386473429952, "grad_norm": 0.29341286420822144, "learning_rate": 0.0001, "loss": 1.5371, "step": 945952 }, { "epoch": 81.6086956521739, "grad_norm": 0.2691452205181122, "learning_rate": 0.0001, "loss": 1.5351, "step": 946008 }, { "epoch": 81.61352657004831, "grad_norm": 0.36175933480262756, "learning_rate": 0.0001, "loss": 1.5352, "step": 946064 }, { "epoch": 81.61835748792271, "grad_norm": 0.4139655828475952, "learning_rate": 0.0001, "loss": 1.5383, "step": 946120 }, { "epoch": 81.6231884057971, "grad_norm": 0.30569663643836975, "learning_rate": 0.0001, "loss": 1.5315, "step": 946176 }, { "epoch": 81.6280193236715, "grad_norm": 0.23899231851100922, "learning_rate": 0.0001, "loss": 1.5349, "step": 946232 }, { "epoch": 81.6328502415459, "grad_norm": 0.314299613237381, "learning_rate": 0.0001, "loss": 1.5393, "step": 946288 }, { "epoch": 81.6376811594203, "grad_norm": 0.35985589027404785, "learning_rate": 0.0001, "loss": 1.5411, "step": 946344 }, { "epoch": 81.64251207729468, "grad_norm": 0.26839759945869446, "learning_rate": 0.0001, "loss": 1.5344, "step": 946400 }, { "epoch": 81.64734299516908, "grad_norm": 0.3341757357120514, "learning_rate": 0.0001, "loss": 1.5334, "step": 946456 }, { "epoch": 81.65217391304348, "grad_norm": 0.5696761608123779, "learning_rate": 0.0001, "loss": 1.5411, "step": 946512 }, { "epoch": 81.65700483091787, "grad_norm": 0.3786410689353943, "learning_rate": 0.0001, "loss": 1.5334, "step": 946568 }, { "epoch": 81.66183574879227, "grad_norm": 0.2734892666339874, "learning_rate": 0.0001, "loss": 1.5384, "step": 946624 }, { "epoch": 81.66666666666667, "grad_norm": 0.36961933970451355, "learning_rate": 0.0001, "loss": 1.5298, "step": 946680 }, { "epoch": 81.67149758454106, "grad_norm": 0.2937150001525879, "learning_rate": 0.0001, "loss": 1.5339, "step": 946736 }, { "epoch": 81.67632850241546, "grad_norm": 0.4322865307331085, "learning_rate": 0.0001, "loss": 1.5396, "step": 946792 }, { "epoch": 81.68115942028986, "grad_norm": 0.3754095733165741, "learning_rate": 0.0001, "loss": 1.5406, "step": 946848 }, { "epoch": 81.68599033816425, "grad_norm": 0.27480268478393555, "learning_rate": 0.0001, "loss": 1.5354, "step": 946904 }, { "epoch": 81.69082125603865, "grad_norm": 0.24256478250026703, "learning_rate": 0.0001, "loss": 1.542, "step": 946960 }, { "epoch": 81.69565217391305, "grad_norm": 0.26828476786613464, "learning_rate": 0.0001, "loss": 1.5364, "step": 947016 }, { "epoch": 81.70048309178743, "grad_norm": 15.947023391723633, "learning_rate": 0.0001, "loss": 1.537, "step": 947072 }, { "epoch": 81.70531400966183, "grad_norm": 0.2815791964530945, "learning_rate": 0.0001, "loss": 1.5364, "step": 947128 }, { "epoch": 81.71014492753623, "grad_norm": 0.37550994753837585, "learning_rate": 0.0001, "loss": 1.5431, "step": 947184 }, { "epoch": 81.71497584541063, "grad_norm": 0.3160410225391388, "learning_rate": 0.0001, "loss": 1.5423, "step": 947240 }, { "epoch": 81.71980676328502, "grad_norm": 0.30770525336265564, "learning_rate": 0.0001, "loss": 1.5341, "step": 947296 }, { "epoch": 81.72463768115942, "grad_norm": 0.4030595123767853, "learning_rate": 0.0001, "loss": 1.5399, "step": 947352 }, { "epoch": 81.72946859903382, "grad_norm": 0.28623393177986145, "learning_rate": 0.0001, "loss": 1.5355, "step": 947408 }, { "epoch": 81.73429951690821, "grad_norm": 0.25915631651878357, "learning_rate": 0.0001, "loss": 1.537, "step": 947464 }, { "epoch": 81.73913043478261, "grad_norm": 0.37595412135124207, "learning_rate": 0.0001, "loss": 1.5429, "step": 947520 }, { "epoch": 81.74396135265701, "grad_norm": 0.32736697793006897, "learning_rate": 0.0001, "loss": 1.5371, "step": 947576 }, { "epoch": 81.7487922705314, "grad_norm": 3.184401512145996, "learning_rate": 0.0001, "loss": 1.5463, "step": 947632 }, { "epoch": 81.7536231884058, "grad_norm": 8.790237426757812, "learning_rate": 0.0001, "loss": 1.5385, "step": 947688 }, { "epoch": 81.7584541062802, "grad_norm": 0.3066118061542511, "learning_rate": 0.0001, "loss": 1.5384, "step": 947744 }, { "epoch": 81.76328502415458, "grad_norm": 0.2986660897731781, "learning_rate": 0.0001, "loss": 1.5313, "step": 947800 }, { "epoch": 81.76811594202898, "grad_norm": 0.3669808506965637, "learning_rate": 0.0001, "loss": 1.5336, "step": 947856 }, { "epoch": 81.77294685990339, "grad_norm": 0.6747108101844788, "learning_rate": 0.0001, "loss": 1.5337, "step": 947912 }, { "epoch": 81.77777777777777, "grad_norm": 0.3187461495399475, "learning_rate": 0.0001, "loss": 1.5287, "step": 947968 }, { "epoch": 81.78260869565217, "grad_norm": 0.2625714838504791, "learning_rate": 0.0001, "loss": 1.5342, "step": 948024 }, { "epoch": 81.78743961352657, "grad_norm": 8.231473922729492, "learning_rate": 0.0001, "loss": 1.5367, "step": 948080 }, { "epoch": 81.79227053140096, "grad_norm": 0.4314301311969757, "learning_rate": 0.0001, "loss": 1.5351, "step": 948136 }, { "epoch": 81.79710144927536, "grad_norm": 0.3665993809700012, "learning_rate": 0.0001, "loss": 1.5399, "step": 948192 }, { "epoch": 81.80193236714976, "grad_norm": 0.38432759046554565, "learning_rate": 0.0001, "loss": 1.5368, "step": 948248 } ], "logging_steps": 56, "max_steps": 1159200, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 278, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.5406829607430128e+19, "train_batch_size": 230, "trial_name": null, "trial_params": null }