{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5089621144652929, "eval_steps": 500, "global_step": 6215, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.1892536518953e-05, "grad_norm": 0.1689453125, "learning_rate": 0.0, "loss": 1.2692375183105469, "step": 1 }, { "epoch": 0.000163785073037906, "grad_norm": 0.138671875, "learning_rate": 1.2244897959183673e-07, "loss": 0.7788365483283997, "step": 2 }, { "epoch": 0.000245677609556859, "grad_norm": 0.1474609375, "learning_rate": 2.4489795918367347e-07, "loss": 1.3062002658843994, "step": 3 }, { "epoch": 0.000327570146075812, "grad_norm": 0.11669921875, "learning_rate": 3.673469387755102e-07, "loss": 0.7250372767448425, "step": 4 }, { "epoch": 0.00040946268259476503, "grad_norm": 0.1337890625, "learning_rate": 4.897959183673469e-07, "loss": 0.961167573928833, "step": 5 }, { "epoch": 0.000491355219113718, "grad_norm": 0.16015625, "learning_rate": 6.122448979591837e-07, "loss": 1.0844104290008545, "step": 6 }, { "epoch": 0.000573247755632671, "grad_norm": 0.1455078125, "learning_rate": 7.346938775510204e-07, "loss": 0.9477209448814392, "step": 7 }, { "epoch": 0.000655140292151624, "grad_norm": 0.1484375, "learning_rate": 8.571428571428571e-07, "loss": 0.8315292000770569, "step": 8 }, { "epoch": 0.000737032828670577, "grad_norm": 0.142578125, "learning_rate": 9.795918367346939e-07, "loss": 0.8303043246269226, "step": 9 }, { "epoch": 0.0008189253651895301, "grad_norm": 0.1552734375, "learning_rate": 1.1020408163265306e-06, "loss": 0.748382568359375, "step": 10 }, { "epoch": 0.000900817901708483, "grad_norm": 0.1455078125, "learning_rate": 1.2244897959183673e-06, "loss": 0.7946436405181885, "step": 11 }, { "epoch": 0.000982710438227436, "grad_norm": 0.1484375, "learning_rate": 1.346938775510204e-06, "loss": 0.8571218848228455, "step": 12 }, { "epoch": 0.001064602974746389, "grad_norm": 0.1552734375, "learning_rate": 1.4693877551020408e-06, "loss": 0.8640573024749756, "step": 13 }, { "epoch": 0.001146495511265342, "grad_norm": 0.14453125, "learning_rate": 1.5918367346938777e-06, "loss": 0.8880600333213806, "step": 14 }, { "epoch": 0.0012283880477842951, "grad_norm": 0.158203125, "learning_rate": 1.7142857142857143e-06, "loss": 0.8768412470817566, "step": 15 }, { "epoch": 0.001310280584303248, "grad_norm": 0.1376953125, "learning_rate": 1.836734693877551e-06, "loss": 0.8432092070579529, "step": 16 }, { "epoch": 0.001392173120822201, "grad_norm": 0.12451171875, "learning_rate": 1.9591836734693877e-06, "loss": 0.7979586124420166, "step": 17 }, { "epoch": 0.001474065657341154, "grad_norm": 0.201171875, "learning_rate": 2.0816326530612247e-06, "loss": 1.0535919666290283, "step": 18 }, { "epoch": 0.001555958193860107, "grad_norm": 0.1689453125, "learning_rate": 2.204081632653061e-06, "loss": 0.7070658206939697, "step": 19 }, { "epoch": 0.0016378507303790601, "grad_norm": 0.1455078125, "learning_rate": 2.326530612244898e-06, "loss": 1.01751708984375, "step": 20 }, { "epoch": 0.0017197432668980132, "grad_norm": 0.154296875, "learning_rate": 2.4489795918367347e-06, "loss": 1.043867588043213, "step": 21 }, { "epoch": 0.001801635803416966, "grad_norm": 0.158203125, "learning_rate": 2.571428571428571e-06, "loss": 0.9232407808303833, "step": 22 }, { "epoch": 0.001883528339935919, "grad_norm": 0.1435546875, "learning_rate": 2.693877551020408e-06, "loss": 0.9462562203407288, "step": 23 }, { "epoch": 0.001965420876454872, "grad_norm": 0.1298828125, "learning_rate": 2.816326530612245e-06, "loss": 0.9328899383544922, "step": 24 }, { "epoch": 0.002047313412973825, "grad_norm": 0.1513671875, "learning_rate": 2.9387755102040816e-06, "loss": 0.8777925372123718, "step": 25 }, { "epoch": 0.002129205949492778, "grad_norm": 0.140625, "learning_rate": 3.0612244897959185e-06, "loss": 0.6303024888038635, "step": 26 }, { "epoch": 0.002211098486011731, "grad_norm": 0.1396484375, "learning_rate": 3.1836734693877555e-06, "loss": 0.9259619116783142, "step": 27 }, { "epoch": 0.002292991022530684, "grad_norm": 0.138671875, "learning_rate": 3.3061224489795916e-06, "loss": 0.8729074597358704, "step": 28 }, { "epoch": 0.0023748835590496373, "grad_norm": 0.1298828125, "learning_rate": 3.4285714285714285e-06, "loss": 0.5940540432929993, "step": 29 }, { "epoch": 0.0024567760955685903, "grad_norm": 0.142578125, "learning_rate": 3.5510204081632655e-06, "loss": 0.7300029397010803, "step": 30 }, { "epoch": 0.0025386686320875433, "grad_norm": 0.14453125, "learning_rate": 3.673469387755102e-06, "loss": 0.6736958026885986, "step": 31 }, { "epoch": 0.002620561168606496, "grad_norm": 0.1572265625, "learning_rate": 3.795918367346939e-06, "loss": 0.6989753842353821, "step": 32 }, { "epoch": 0.002702453705125449, "grad_norm": 0.146484375, "learning_rate": 3.9183673469387755e-06, "loss": 1.0111935138702393, "step": 33 }, { "epoch": 0.002784346241644402, "grad_norm": 0.13671875, "learning_rate": 4.040816326530612e-06, "loss": 0.7319263219833374, "step": 34 }, { "epoch": 0.002866238778163355, "grad_norm": 0.1337890625, "learning_rate": 4.163265306122449e-06, "loss": 0.8890162706375122, "step": 35 }, { "epoch": 0.002948131314682308, "grad_norm": 0.150390625, "learning_rate": 4.285714285714286e-06, "loss": 1.323777198791504, "step": 36 }, { "epoch": 0.003030023851201261, "grad_norm": 0.162109375, "learning_rate": 4.408163265306122e-06, "loss": 1.0636528730392456, "step": 37 }, { "epoch": 0.003111916387720214, "grad_norm": 0.1416015625, "learning_rate": 4.530612244897959e-06, "loss": 1.1280652284622192, "step": 38 }, { "epoch": 0.003193808924239167, "grad_norm": 0.1357421875, "learning_rate": 4.653061224489796e-06, "loss": 0.8183513879776001, "step": 39 }, { "epoch": 0.0032757014607581202, "grad_norm": 0.1328125, "learning_rate": 4.775510204081632e-06, "loss": 0.7616685032844543, "step": 40 }, { "epoch": 0.0033575939972770733, "grad_norm": 0.17578125, "learning_rate": 4.897959183673469e-06, "loss": 0.9517663717269897, "step": 41 }, { "epoch": 0.0034394865337960263, "grad_norm": 0.1767578125, "learning_rate": 5.020408163265306e-06, "loss": 0.9238744974136353, "step": 42 }, { "epoch": 0.0035213790703149794, "grad_norm": 0.1181640625, "learning_rate": 5.142857142857142e-06, "loss": 0.6035186648368835, "step": 43 }, { "epoch": 0.003603271606833932, "grad_norm": 0.146484375, "learning_rate": 5.265306122448979e-06, "loss": 0.504254937171936, "step": 44 }, { "epoch": 0.003685164143352885, "grad_norm": 0.1455078125, "learning_rate": 5.387755102040816e-06, "loss": 0.8422408699989319, "step": 45 }, { "epoch": 0.003767056679871838, "grad_norm": 0.1484375, "learning_rate": 5.510204081632653e-06, "loss": 0.7991898059844971, "step": 46 }, { "epoch": 0.003848949216390791, "grad_norm": 0.1455078125, "learning_rate": 5.63265306122449e-06, "loss": 0.6923357248306274, "step": 47 }, { "epoch": 0.003930841752909744, "grad_norm": 0.166015625, "learning_rate": 5.755102040816326e-06, "loss": 0.8725593090057373, "step": 48 }, { "epoch": 0.004012734289428698, "grad_norm": 0.1376953125, "learning_rate": 5.877551020408163e-06, "loss": 0.8000404238700867, "step": 49 }, { "epoch": 0.00409462682594765, "grad_norm": 0.1337890625, "learning_rate": 6e-06, "loss": 0.8268479108810425, "step": 50 }, { "epoch": 0.004176519362466603, "grad_norm": 0.1552734375, "learning_rate": 6.122448979591837e-06, "loss": 0.9731822609901428, "step": 51 }, { "epoch": 0.004258411898985556, "grad_norm": 0.126953125, "learning_rate": 6.244897959183674e-06, "loss": 0.6076136231422424, "step": 52 }, { "epoch": 0.004340304435504509, "grad_norm": 0.1533203125, "learning_rate": 6.367346938775511e-06, "loss": 0.8989383578300476, "step": 53 }, { "epoch": 0.004422196972023462, "grad_norm": 0.1318359375, "learning_rate": 6.489795918367348e-06, "loss": 1.2253038883209229, "step": 54 }, { "epoch": 0.004504089508542415, "grad_norm": 0.1416015625, "learning_rate": 6.612244897959183e-06, "loss": 1.0751113891601562, "step": 55 }, { "epoch": 0.004585982045061368, "grad_norm": 0.15625, "learning_rate": 6.73469387755102e-06, "loss": 0.976893961429596, "step": 56 }, { "epoch": 0.004667874581580321, "grad_norm": 0.1630859375, "learning_rate": 6.857142857142857e-06, "loss": 1.0483677387237549, "step": 57 }, { "epoch": 0.0047497671180992745, "grad_norm": 0.14453125, "learning_rate": 6.979591836734694e-06, "loss": 1.0369893312454224, "step": 58 }, { "epoch": 0.004831659654618227, "grad_norm": 0.1533203125, "learning_rate": 7.102040816326531e-06, "loss": 0.9358440637588501, "step": 59 }, { "epoch": 0.004913552191137181, "grad_norm": 0.1484375, "learning_rate": 7.224489795918368e-06, "loss": 0.8366637229919434, "step": 60 }, { "epoch": 0.004995444727656133, "grad_norm": 0.158203125, "learning_rate": 7.346938775510204e-06, "loss": 0.9385107755661011, "step": 61 }, { "epoch": 0.005077337264175087, "grad_norm": 0.1474609375, "learning_rate": 7.469387755102041e-06, "loss": 0.8058649301528931, "step": 62 }, { "epoch": 0.005159229800694039, "grad_norm": 0.1572265625, "learning_rate": 7.591836734693878e-06, "loss": 1.2202202081680298, "step": 63 }, { "epoch": 0.005241122337212992, "grad_norm": 0.1259765625, "learning_rate": 7.714285714285716e-06, "loss": 0.6693165898323059, "step": 64 }, { "epoch": 0.005323014873731945, "grad_norm": 0.1689453125, "learning_rate": 7.836734693877551e-06, "loss": 0.6848925948143005, "step": 65 }, { "epoch": 0.005404907410250898, "grad_norm": 0.126953125, "learning_rate": 7.959183673469388e-06, "loss": 0.5377231240272522, "step": 66 }, { "epoch": 0.005486799946769851, "grad_norm": 0.12890625, "learning_rate": 8.081632653061225e-06, "loss": 0.7365704774856567, "step": 67 }, { "epoch": 0.005568692483288804, "grad_norm": 0.142578125, "learning_rate": 8.204081632653062e-06, "loss": 0.8671019673347473, "step": 68 }, { "epoch": 0.0056505850198077575, "grad_norm": 0.1611328125, "learning_rate": 8.326530612244899e-06, "loss": 0.8263939023017883, "step": 69 }, { "epoch": 0.00573247755632671, "grad_norm": 0.1572265625, "learning_rate": 8.448979591836736e-06, "loss": 0.8309429883956909, "step": 70 }, { "epoch": 0.005814370092845664, "grad_norm": 0.142578125, "learning_rate": 8.571428571428573e-06, "loss": 0.868381917476654, "step": 71 }, { "epoch": 0.005896262629364616, "grad_norm": 0.1455078125, "learning_rate": 8.693877551020408e-06, "loss": 0.8100480437278748, "step": 72 }, { "epoch": 0.00597815516588357, "grad_norm": 0.154296875, "learning_rate": 8.816326530612245e-06, "loss": 0.8355422616004944, "step": 73 }, { "epoch": 0.006060047702402522, "grad_norm": 0.1767578125, "learning_rate": 8.938775510204082e-06, "loss": 0.6954861879348755, "step": 74 }, { "epoch": 0.006141940238921475, "grad_norm": 0.1826171875, "learning_rate": 9.061224489795919e-06, "loss": 1.3356058597564697, "step": 75 }, { "epoch": 0.006223832775440428, "grad_norm": 0.1533203125, "learning_rate": 9.183673469387756e-06, "loss": 0.8860036134719849, "step": 76 }, { "epoch": 0.006305725311959381, "grad_norm": 0.1474609375, "learning_rate": 9.306122448979593e-06, "loss": 0.8816828727722168, "step": 77 }, { "epoch": 0.006387617848478334, "grad_norm": 0.146484375, "learning_rate": 9.428571428571428e-06, "loss": 0.8622653484344482, "step": 78 }, { "epoch": 0.006469510384997287, "grad_norm": 0.150390625, "learning_rate": 9.551020408163265e-06, "loss": 1.005674123764038, "step": 79 }, { "epoch": 0.0065514029215162405, "grad_norm": 0.208984375, "learning_rate": 9.673469387755102e-06, "loss": 0.9369722604751587, "step": 80 }, { "epoch": 0.006633295458035193, "grad_norm": 0.130859375, "learning_rate": 9.795918367346939e-06, "loss": 0.881393551826477, "step": 81 }, { "epoch": 0.006715187994554147, "grad_norm": 0.197265625, "learning_rate": 9.918367346938776e-06, "loss": 0.801750898361206, "step": 82 }, { "epoch": 0.006797080531073099, "grad_norm": 0.1640625, "learning_rate": 1.0040816326530613e-05, "loss": 0.88079434633255, "step": 83 }, { "epoch": 0.006878973067592053, "grad_norm": 0.162109375, "learning_rate": 1.016326530612245e-05, "loss": 1.0499800443649292, "step": 84 }, { "epoch": 0.006960865604111005, "grad_norm": 0.162109375, "learning_rate": 1.0285714285714285e-05, "loss": 0.7700325846672058, "step": 85 }, { "epoch": 0.007042758140629959, "grad_norm": 0.150390625, "learning_rate": 1.0408163265306122e-05, "loss": 0.8462141156196594, "step": 86 }, { "epoch": 0.007124650677148911, "grad_norm": 0.1533203125, "learning_rate": 1.0530612244897959e-05, "loss": 0.9165534377098083, "step": 87 }, { "epoch": 0.007206543213667864, "grad_norm": 0.322265625, "learning_rate": 1.0653061224489796e-05, "loss": 0.8755977153778076, "step": 88 }, { "epoch": 0.007288435750186817, "grad_norm": 0.1513671875, "learning_rate": 1.0775510204081633e-05, "loss": 0.9935065507888794, "step": 89 }, { "epoch": 0.00737032828670577, "grad_norm": 0.1376953125, "learning_rate": 1.089795918367347e-05, "loss": 0.7393916845321655, "step": 90 }, { "epoch": 0.0074522208232247235, "grad_norm": 0.1376953125, "learning_rate": 1.1020408163265306e-05, "loss": 1.090970754623413, "step": 91 }, { "epoch": 0.007534113359743676, "grad_norm": 0.1591796875, "learning_rate": 1.1142857142857143e-05, "loss": 0.5876522064208984, "step": 92 }, { "epoch": 0.0076160058962626296, "grad_norm": 0.125, "learning_rate": 1.126530612244898e-05, "loss": 0.6235696077346802, "step": 93 }, { "epoch": 0.007697898432781582, "grad_norm": 0.419921875, "learning_rate": 1.1387755102040817e-05, "loss": 0.8901874423027039, "step": 94 }, { "epoch": 0.007779790969300536, "grad_norm": 0.169921875, "learning_rate": 1.1510204081632653e-05, "loss": 0.7616423964500427, "step": 95 }, { "epoch": 0.007861683505819488, "grad_norm": 0.193359375, "learning_rate": 1.163265306122449e-05, "loss": 1.2576884031295776, "step": 96 }, { "epoch": 0.00794357604233844, "grad_norm": 0.177734375, "learning_rate": 1.1755102040816326e-05, "loss": 0.6757850050926208, "step": 97 }, { "epoch": 0.008025468578857395, "grad_norm": 0.140625, "learning_rate": 1.1877551020408163e-05, "loss": 0.775367259979248, "step": 98 }, { "epoch": 0.008107361115376348, "grad_norm": 0.1484375, "learning_rate": 1.2e-05, "loss": 0.9876186847686768, "step": 99 }, { "epoch": 0.0081892536518953, "grad_norm": 0.1591796875, "learning_rate": 1.2122448979591836e-05, "loss": 0.8306399583816528, "step": 100 }, { "epoch": 0.008271146188414253, "grad_norm": 0.16796875, "learning_rate": 1.2244897959183674e-05, "loss": 0.8706358075141907, "step": 101 }, { "epoch": 0.008353038724933206, "grad_norm": 0.1611328125, "learning_rate": 1.236734693877551e-05, "loss": 0.7968311309814453, "step": 102 }, { "epoch": 0.00843493126145216, "grad_norm": 0.1318359375, "learning_rate": 1.2489795918367348e-05, "loss": 0.7418109178543091, "step": 103 }, { "epoch": 0.008516823797971113, "grad_norm": 0.1474609375, "learning_rate": 1.2612244897959183e-05, "loss": 0.8188655972480774, "step": 104 }, { "epoch": 0.008598716334490065, "grad_norm": 0.1533203125, "learning_rate": 1.2734693877551022e-05, "loss": 0.6840787529945374, "step": 105 }, { "epoch": 0.008680608871009018, "grad_norm": 0.16015625, "learning_rate": 1.2857142857142857e-05, "loss": 0.8916717767715454, "step": 106 }, { "epoch": 0.008762501407527972, "grad_norm": 0.1318359375, "learning_rate": 1.2979591836734696e-05, "loss": 0.7145904302597046, "step": 107 }, { "epoch": 0.008844393944046925, "grad_norm": 0.1572265625, "learning_rate": 1.3102040816326531e-05, "loss": 0.8699553608894348, "step": 108 }, { "epoch": 0.008926286480565877, "grad_norm": 0.177734375, "learning_rate": 1.3224489795918366e-05, "loss": 1.0870182514190674, "step": 109 }, { "epoch": 0.00900817901708483, "grad_norm": 0.1533203125, "learning_rate": 1.3346938775510205e-05, "loss": 0.830143392086029, "step": 110 }, { "epoch": 0.009090071553603784, "grad_norm": 0.3125, "learning_rate": 1.346938775510204e-05, "loss": 0.9132651090621948, "step": 111 }, { "epoch": 0.009171964090122737, "grad_norm": 0.1806640625, "learning_rate": 1.3591836734693879e-05, "loss": 0.7668812274932861, "step": 112 }, { "epoch": 0.00925385662664169, "grad_norm": 0.169921875, "learning_rate": 1.3714285714285714e-05, "loss": 0.7226609587669373, "step": 113 }, { "epoch": 0.009335749163160642, "grad_norm": 0.1748046875, "learning_rate": 1.3836734693877553e-05, "loss": 0.9787721633911133, "step": 114 }, { "epoch": 0.009417641699679595, "grad_norm": 0.1591796875, "learning_rate": 1.3959183673469388e-05, "loss": 0.8557654619216919, "step": 115 }, { "epoch": 0.009499534236198549, "grad_norm": 0.1650390625, "learning_rate": 1.4081632653061225e-05, "loss": 0.9546581506729126, "step": 116 }, { "epoch": 0.009581426772717502, "grad_norm": 0.146484375, "learning_rate": 1.4204081632653062e-05, "loss": 0.629300594329834, "step": 117 }, { "epoch": 0.009663319309236454, "grad_norm": 0.158203125, "learning_rate": 1.4326530612244899e-05, "loss": 1.1067485809326172, "step": 118 }, { "epoch": 0.009745211845755407, "grad_norm": 0.1806640625, "learning_rate": 1.4448979591836736e-05, "loss": 0.8721896409988403, "step": 119 }, { "epoch": 0.009827104382274361, "grad_norm": 0.1484375, "learning_rate": 1.4571428571428571e-05, "loss": 0.671083927154541, "step": 120 }, { "epoch": 0.009908996918793314, "grad_norm": 0.138671875, "learning_rate": 1.4693877551020408e-05, "loss": 1.0528693199157715, "step": 121 }, { "epoch": 0.009990889455312266, "grad_norm": 0.1962890625, "learning_rate": 1.4816326530612245e-05, "loss": 0.8432303667068481, "step": 122 }, { "epoch": 0.010072781991831219, "grad_norm": 0.1396484375, "learning_rate": 1.4938775510204082e-05, "loss": 0.7435109615325928, "step": 123 }, { "epoch": 0.010154674528350173, "grad_norm": 0.169921875, "learning_rate": 1.5061224489795919e-05, "loss": 0.7423120141029358, "step": 124 }, { "epoch": 0.010236567064869126, "grad_norm": 0.14453125, "learning_rate": 1.5183673469387756e-05, "loss": 0.9071717858314514, "step": 125 }, { "epoch": 0.010318459601388079, "grad_norm": 0.1748046875, "learning_rate": 1.530612244897959e-05, "loss": 0.7631561160087585, "step": 126 }, { "epoch": 0.010400352137907031, "grad_norm": 0.1796875, "learning_rate": 1.542857142857143e-05, "loss": 0.8060978651046753, "step": 127 }, { "epoch": 0.010482244674425984, "grad_norm": 0.1611328125, "learning_rate": 1.5551020408163265e-05, "loss": 0.7506223320960999, "step": 128 }, { "epoch": 0.010564137210944938, "grad_norm": 0.169921875, "learning_rate": 1.5673469387755102e-05, "loss": 0.9428765773773193, "step": 129 }, { "epoch": 0.01064602974746389, "grad_norm": 0.1689453125, "learning_rate": 1.579591836734694e-05, "loss": 0.5963538289070129, "step": 130 }, { "epoch": 0.010727922283982843, "grad_norm": 0.158203125, "learning_rate": 1.5918367346938776e-05, "loss": 0.7066556215286255, "step": 131 }, { "epoch": 0.010809814820501796, "grad_norm": 0.1484375, "learning_rate": 1.6040816326530613e-05, "loss": 0.882148265838623, "step": 132 }, { "epoch": 0.01089170735702075, "grad_norm": 0.205078125, "learning_rate": 1.616326530612245e-05, "loss": 0.797228991985321, "step": 133 }, { "epoch": 0.010973599893539703, "grad_norm": 0.173828125, "learning_rate": 1.6285714285714287e-05, "loss": 0.7989157438278198, "step": 134 }, { "epoch": 0.011055492430058655, "grad_norm": 0.1806640625, "learning_rate": 1.6408163265306124e-05, "loss": 1.149370789527893, "step": 135 }, { "epoch": 0.011137384966577608, "grad_norm": 0.1875, "learning_rate": 1.653061224489796e-05, "loss": 0.9384973049163818, "step": 136 }, { "epoch": 0.011219277503096562, "grad_norm": 0.1748046875, "learning_rate": 1.6653061224489797e-05, "loss": 0.6930122375488281, "step": 137 }, { "epoch": 0.011301170039615515, "grad_norm": 0.1904296875, "learning_rate": 1.6775510204081634e-05, "loss": 1.0194259881973267, "step": 138 }, { "epoch": 0.011383062576134468, "grad_norm": 0.1748046875, "learning_rate": 1.689795918367347e-05, "loss": 0.8852027058601379, "step": 139 }, { "epoch": 0.01146495511265342, "grad_norm": 0.1708984375, "learning_rate": 1.7020408163265305e-05, "loss": 0.8619977831840515, "step": 140 }, { "epoch": 0.011546847649172373, "grad_norm": 0.158203125, "learning_rate": 1.7142857142857145e-05, "loss": 0.7176647186279297, "step": 141 }, { "epoch": 0.011628740185691327, "grad_norm": 0.1953125, "learning_rate": 1.726530612244898e-05, "loss": 1.1816068887710571, "step": 142 }, { "epoch": 0.01171063272221028, "grad_norm": 0.1767578125, "learning_rate": 1.7387755102040816e-05, "loss": 0.9537625908851624, "step": 143 }, { "epoch": 0.011792525258729232, "grad_norm": 0.275390625, "learning_rate": 1.7510204081632653e-05, "loss": 0.7848352193832397, "step": 144 }, { "epoch": 0.011874417795248185, "grad_norm": 0.212890625, "learning_rate": 1.763265306122449e-05, "loss": 0.8986237049102783, "step": 145 }, { "epoch": 0.01195631033176714, "grad_norm": 0.2080078125, "learning_rate": 1.7755102040816327e-05, "loss": 0.7229043245315552, "step": 146 }, { "epoch": 0.012038202868286092, "grad_norm": 0.1875, "learning_rate": 1.7877551020408164e-05, "loss": 1.0595723390579224, "step": 147 }, { "epoch": 0.012120095404805045, "grad_norm": 0.19140625, "learning_rate": 1.8e-05, "loss": 0.9196676015853882, "step": 148 }, { "epoch": 0.012201987941323997, "grad_norm": 0.193359375, "learning_rate": 1.8122448979591837e-05, "loss": 0.7915199995040894, "step": 149 }, { "epoch": 0.01228388047784295, "grad_norm": 0.181640625, "learning_rate": 1.8244897959183674e-05, "loss": 0.683129072189331, "step": 150 }, { "epoch": 0.012365773014361904, "grad_norm": 0.162109375, "learning_rate": 1.836734693877551e-05, "loss": 0.8023064732551575, "step": 151 }, { "epoch": 0.012447665550880857, "grad_norm": 0.197265625, "learning_rate": 1.8489795918367348e-05, "loss": 0.6344118714332581, "step": 152 }, { "epoch": 0.01252955808739981, "grad_norm": 0.17578125, "learning_rate": 1.8612244897959185e-05, "loss": 0.7012781500816345, "step": 153 }, { "epoch": 0.012611450623918762, "grad_norm": 0.16796875, "learning_rate": 1.8734693877551022e-05, "loss": 0.9908215999603271, "step": 154 }, { "epoch": 0.012693343160437716, "grad_norm": 0.1796875, "learning_rate": 1.8857142857142856e-05, "loss": 0.8533449172973633, "step": 155 }, { "epoch": 0.012775235696956669, "grad_norm": 0.15625, "learning_rate": 1.8979591836734696e-05, "loss": 0.7644760012626648, "step": 156 }, { "epoch": 0.012857128233475621, "grad_norm": 0.162109375, "learning_rate": 1.910204081632653e-05, "loss": 1.0475883483886719, "step": 157 }, { "epoch": 0.012939020769994574, "grad_norm": 0.1826171875, "learning_rate": 1.9224489795918367e-05, "loss": 0.7506498098373413, "step": 158 }, { "epoch": 0.013020913306513528, "grad_norm": 0.177734375, "learning_rate": 1.9346938775510203e-05, "loss": 0.6886553764343262, "step": 159 }, { "epoch": 0.013102805843032481, "grad_norm": 0.189453125, "learning_rate": 1.946938775510204e-05, "loss": 0.8300869464874268, "step": 160 }, { "epoch": 0.013184698379551434, "grad_norm": 0.1767578125, "learning_rate": 1.9591836734693877e-05, "loss": 1.0316212177276611, "step": 161 }, { "epoch": 0.013266590916070386, "grad_norm": 0.1865234375, "learning_rate": 1.9714285714285714e-05, "loss": 0.7299474477767944, "step": 162 }, { "epoch": 0.013348483452589339, "grad_norm": 0.216796875, "learning_rate": 1.983673469387755e-05, "loss": 0.8153265714645386, "step": 163 }, { "epoch": 0.013430375989108293, "grad_norm": 0.1630859375, "learning_rate": 1.9959183673469388e-05, "loss": 0.9564380049705505, "step": 164 }, { "epoch": 0.013512268525627246, "grad_norm": 0.1728515625, "learning_rate": 2.0081632653061225e-05, "loss": 1.0247093439102173, "step": 165 }, { "epoch": 0.013594161062146198, "grad_norm": 0.169921875, "learning_rate": 2.0204081632653062e-05, "loss": 0.9854190349578857, "step": 166 }, { "epoch": 0.013676053598665151, "grad_norm": 0.232421875, "learning_rate": 2.03265306122449e-05, "loss": 0.8826335072517395, "step": 167 }, { "epoch": 0.013757946135184105, "grad_norm": 0.1767578125, "learning_rate": 2.0448979591836736e-05, "loss": 0.9545683264732361, "step": 168 }, { "epoch": 0.013839838671703058, "grad_norm": 0.2216796875, "learning_rate": 2.057142857142857e-05, "loss": 0.7048317790031433, "step": 169 }, { "epoch": 0.01392173120822201, "grad_norm": 0.1962890625, "learning_rate": 2.069387755102041e-05, "loss": 1.383650541305542, "step": 170 }, { "epoch": 0.014003623744740963, "grad_norm": 0.2490234375, "learning_rate": 2.0816326530612243e-05, "loss": 1.3275967836380005, "step": 171 }, { "epoch": 0.014085516281259917, "grad_norm": 0.1904296875, "learning_rate": 2.0938775510204084e-05, "loss": 0.9396587610244751, "step": 172 }, { "epoch": 0.01416740881777887, "grad_norm": 0.1845703125, "learning_rate": 2.1061224489795917e-05, "loss": 0.7036116123199463, "step": 173 }, { "epoch": 0.014249301354297823, "grad_norm": 0.20703125, "learning_rate": 2.1183673469387758e-05, "loss": 0.9278486967086792, "step": 174 }, { "epoch": 0.014331193890816775, "grad_norm": 0.1474609375, "learning_rate": 2.130612244897959e-05, "loss": 0.7104832530021667, "step": 175 }, { "epoch": 0.014413086427335728, "grad_norm": 0.177734375, "learning_rate": 2.142857142857143e-05, "loss": 0.9519690275192261, "step": 176 }, { "epoch": 0.014494978963854682, "grad_norm": 0.177734375, "learning_rate": 2.1551020408163265e-05, "loss": 0.84523606300354, "step": 177 }, { "epoch": 0.014576871500373635, "grad_norm": 0.1904296875, "learning_rate": 2.1673469387755102e-05, "loss": 0.9742757081985474, "step": 178 }, { "epoch": 0.014658764036892587, "grad_norm": 0.2265625, "learning_rate": 2.179591836734694e-05, "loss": 0.9848318099975586, "step": 179 }, { "epoch": 0.01474065657341154, "grad_norm": 0.18359375, "learning_rate": 2.1918367346938776e-05, "loss": 0.790285587310791, "step": 180 }, { "epoch": 0.014822549109930494, "grad_norm": 0.24609375, "learning_rate": 2.2040816326530613e-05, "loss": 1.0562388896942139, "step": 181 }, { "epoch": 0.014904441646449447, "grad_norm": 0.203125, "learning_rate": 2.216326530612245e-05, "loss": 0.6656460762023926, "step": 182 }, { "epoch": 0.0149863341829684, "grad_norm": 0.177734375, "learning_rate": 2.2285714285714287e-05, "loss": 0.7126745581626892, "step": 183 }, { "epoch": 0.015068226719487352, "grad_norm": 0.1494140625, "learning_rate": 2.2408163265306124e-05, "loss": 0.5595638751983643, "step": 184 }, { "epoch": 0.015150119256006307, "grad_norm": 0.1591796875, "learning_rate": 2.253061224489796e-05, "loss": 0.837664008140564, "step": 185 }, { "epoch": 0.015232011792525259, "grad_norm": 0.2333984375, "learning_rate": 2.2653061224489798e-05, "loss": 1.0549743175506592, "step": 186 }, { "epoch": 0.015313904329044212, "grad_norm": 0.158203125, "learning_rate": 2.2775510204081635e-05, "loss": 0.6522034406661987, "step": 187 }, { "epoch": 0.015395796865563164, "grad_norm": 0.181640625, "learning_rate": 2.289795918367347e-05, "loss": 0.88138747215271, "step": 188 }, { "epoch": 0.015477689402082117, "grad_norm": 0.23046875, "learning_rate": 2.3020408163265305e-05, "loss": 0.7675457000732422, "step": 189 }, { "epoch": 0.015559581938601071, "grad_norm": 0.1650390625, "learning_rate": 2.3142857142857145e-05, "loss": 0.7595466375350952, "step": 190 }, { "epoch": 0.015641474475120022, "grad_norm": 0.1572265625, "learning_rate": 2.326530612244898e-05, "loss": 0.6058135628700256, "step": 191 }, { "epoch": 0.015723367011638976, "grad_norm": 0.181640625, "learning_rate": 2.3387755102040816e-05, "loss": 0.8671406507492065, "step": 192 }, { "epoch": 0.01580525954815793, "grad_norm": 0.16796875, "learning_rate": 2.3510204081632653e-05, "loss": 0.9157660007476807, "step": 193 }, { "epoch": 0.01588715208467688, "grad_norm": 0.2197265625, "learning_rate": 2.363265306122449e-05, "loss": 0.879551112651825, "step": 194 }, { "epoch": 0.015969044621195836, "grad_norm": 0.28515625, "learning_rate": 2.3755102040816327e-05, "loss": 0.641154944896698, "step": 195 }, { "epoch": 0.01605093715771479, "grad_norm": 0.171875, "learning_rate": 2.3877551020408164e-05, "loss": 0.6814965605735779, "step": 196 }, { "epoch": 0.01613282969423374, "grad_norm": 0.1572265625, "learning_rate": 2.4e-05, "loss": 0.8545130491256714, "step": 197 }, { "epoch": 0.016214722230752696, "grad_norm": 0.1787109375, "learning_rate": 2.399999973921529e-05, "loss": 0.9945880174636841, "step": 198 }, { "epoch": 0.016296614767271646, "grad_norm": 0.1484375, "learning_rate": 2.3999998956861162e-05, "loss": 0.7373678088188171, "step": 199 }, { "epoch": 0.0163785073037906, "grad_norm": 0.1943359375, "learning_rate": 2.3999997652937657e-05, "loss": 0.8678381443023682, "step": 200 }, { "epoch": 0.016460399840309555, "grad_norm": 0.1884765625, "learning_rate": 2.3999995827444833e-05, "loss": 0.6554551720619202, "step": 201 }, { "epoch": 0.016542292376828506, "grad_norm": 0.134765625, "learning_rate": 2.3999993480382764e-05, "loss": 0.6440178751945496, "step": 202 }, { "epoch": 0.01662418491334746, "grad_norm": 0.1953125, "learning_rate": 2.3999990611751552e-05, "loss": 0.9317048788070679, "step": 203 }, { "epoch": 0.01670607744986641, "grad_norm": 0.21875, "learning_rate": 2.3999987221551325e-05, "loss": 0.949978232383728, "step": 204 }, { "epoch": 0.016787969986385366, "grad_norm": 0.185546875, "learning_rate": 2.3999983309782227e-05, "loss": 0.7016098499298096, "step": 205 }, { "epoch": 0.01686986252290432, "grad_norm": 0.1806640625, "learning_rate": 2.3999978876444435e-05, "loss": 0.8135285973548889, "step": 206 }, { "epoch": 0.01695175505942327, "grad_norm": 0.240234375, "learning_rate": 2.3999973921538135e-05, "loss": 0.6167044043540955, "step": 207 }, { "epoch": 0.017033647595942225, "grad_norm": 0.1865234375, "learning_rate": 2.3999968445063538e-05, "loss": 0.8787804841995239, "step": 208 }, { "epoch": 0.01711554013246118, "grad_norm": 0.193359375, "learning_rate": 2.3999962447020896e-05, "loss": 1.1545552015304565, "step": 209 }, { "epoch": 0.01719743266898013, "grad_norm": 0.1943359375, "learning_rate": 2.399995592741046e-05, "loss": 0.711759090423584, "step": 210 }, { "epoch": 0.017279325205499085, "grad_norm": 0.1923828125, "learning_rate": 2.3999948886232516e-05, "loss": 0.8914077877998352, "step": 211 }, { "epoch": 0.017361217742018036, "grad_norm": 0.1572265625, "learning_rate": 2.3999941323487364e-05, "loss": 0.8630354404449463, "step": 212 }, { "epoch": 0.01744311027853699, "grad_norm": 0.1923828125, "learning_rate": 2.3999933239175342e-05, "loss": 1.1325874328613281, "step": 213 }, { "epoch": 0.017525002815055944, "grad_norm": 0.1982421875, "learning_rate": 2.3999924633296796e-05, "loss": 0.9854510426521301, "step": 214 }, { "epoch": 0.017606895351574895, "grad_norm": 0.1884765625, "learning_rate": 2.3999915505852103e-05, "loss": 0.8229562044143677, "step": 215 }, { "epoch": 0.01768878788809385, "grad_norm": 0.1591796875, "learning_rate": 2.3999905856841657e-05, "loss": 1.047928810119629, "step": 216 }, { "epoch": 0.0177706804246128, "grad_norm": 0.16015625, "learning_rate": 2.399989568626588e-05, "loss": 0.8584483861923218, "step": 217 }, { "epoch": 0.017852572961131755, "grad_norm": 0.1669921875, "learning_rate": 2.399988499412521e-05, "loss": 0.7633299827575684, "step": 218 }, { "epoch": 0.01793446549765071, "grad_norm": 0.1484375, "learning_rate": 2.3999873780420115e-05, "loss": 0.6681854128837585, "step": 219 }, { "epoch": 0.01801635803416966, "grad_norm": 0.2294921875, "learning_rate": 2.3999862045151084e-05, "loss": 0.8275490999221802, "step": 220 }, { "epoch": 0.018098250570688614, "grad_norm": 0.2197265625, "learning_rate": 2.399984978831862e-05, "loss": 1.0392675399780273, "step": 221 }, { "epoch": 0.01818014310720757, "grad_norm": 0.1875, "learning_rate": 2.3999837009923263e-05, "loss": 0.5126798748970032, "step": 222 }, { "epoch": 0.01826203564372652, "grad_norm": 0.1787109375, "learning_rate": 2.3999823709965567e-05, "loss": 1.0829551219940186, "step": 223 }, { "epoch": 0.018343928180245474, "grad_norm": 0.169921875, "learning_rate": 2.399980988844611e-05, "loss": 0.9590635299682617, "step": 224 }, { "epoch": 0.018425820716764425, "grad_norm": 0.1875, "learning_rate": 2.3999795545365487e-05, "loss": 1.1667808294296265, "step": 225 }, { "epoch": 0.01850771325328338, "grad_norm": 0.203125, "learning_rate": 2.399978068072433e-05, "loss": 0.8245648145675659, "step": 226 }, { "epoch": 0.018589605789802333, "grad_norm": 0.1533203125, "learning_rate": 2.399976529452328e-05, "loss": 1.0823947191238403, "step": 227 }, { "epoch": 0.018671498326321284, "grad_norm": 0.1689453125, "learning_rate": 2.3999749386763007e-05, "loss": 0.8102518916130066, "step": 228 }, { "epoch": 0.01875339086284024, "grad_norm": 0.1875, "learning_rate": 2.39997329574442e-05, "loss": 0.6749840974807739, "step": 229 }, { "epoch": 0.01883528339935919, "grad_norm": 0.15625, "learning_rate": 2.399971600656758e-05, "loss": 0.8160229921340942, "step": 230 }, { "epoch": 0.018917175935878144, "grad_norm": 0.1572265625, "learning_rate": 2.3999698534133875e-05, "loss": 0.9988230466842651, "step": 231 }, { "epoch": 0.018999068472397098, "grad_norm": 0.1474609375, "learning_rate": 2.399968054014385e-05, "loss": 1.023828148841858, "step": 232 }, { "epoch": 0.01908096100891605, "grad_norm": 0.2001953125, "learning_rate": 2.3999662024598287e-05, "loss": 0.9382991790771484, "step": 233 }, { "epoch": 0.019162853545435003, "grad_norm": 0.173828125, "learning_rate": 2.3999642987497988e-05, "loss": 0.8369897603988647, "step": 234 }, { "epoch": 0.019244746081953958, "grad_norm": 0.1669921875, "learning_rate": 2.399962342884378e-05, "loss": 0.9899635314941406, "step": 235 }, { "epoch": 0.01932663861847291, "grad_norm": 0.240234375, "learning_rate": 2.399960334863652e-05, "loss": 1.175307035446167, "step": 236 }, { "epoch": 0.019408531154991863, "grad_norm": 0.2060546875, "learning_rate": 2.3999582746877072e-05, "loss": 0.9381195902824402, "step": 237 }, { "epoch": 0.019490423691510814, "grad_norm": 0.1904296875, "learning_rate": 2.3999561623566336e-05, "loss": 1.063532829284668, "step": 238 }, { "epoch": 0.019572316228029768, "grad_norm": 0.1513671875, "learning_rate": 2.399953997870523e-05, "loss": 0.6051982641220093, "step": 239 }, { "epoch": 0.019654208764548722, "grad_norm": 0.158203125, "learning_rate": 2.3999517812294692e-05, "loss": 0.8455670475959778, "step": 240 }, { "epoch": 0.019736101301067673, "grad_norm": 0.236328125, "learning_rate": 2.399949512433569e-05, "loss": 0.715160608291626, "step": 241 }, { "epoch": 0.019817993837586628, "grad_norm": 0.1787109375, "learning_rate": 2.3999471914829207e-05, "loss": 0.8958784341812134, "step": 242 }, { "epoch": 0.01989988637410558, "grad_norm": 0.17578125, "learning_rate": 2.399944818377625e-05, "loss": 0.6496281623840332, "step": 243 }, { "epoch": 0.019981778910624533, "grad_norm": 0.2177734375, "learning_rate": 2.3999423931177856e-05, "loss": 1.1493349075317383, "step": 244 }, { "epoch": 0.020063671447143487, "grad_norm": 0.1982421875, "learning_rate": 2.3999399157035073e-05, "loss": 0.5976084470748901, "step": 245 }, { "epoch": 0.020145563983662438, "grad_norm": 0.2109375, "learning_rate": 2.3999373861348982e-05, "loss": 0.9261276721954346, "step": 246 }, { "epoch": 0.020227456520181392, "grad_norm": 0.18359375, "learning_rate": 2.3999348044120683e-05, "loss": 1.0355850458145142, "step": 247 }, { "epoch": 0.020309349056700347, "grad_norm": 0.2451171875, "learning_rate": 2.3999321705351298e-05, "loss": 0.9628753066062927, "step": 248 }, { "epoch": 0.020391241593219298, "grad_norm": 0.1640625, "learning_rate": 2.3999294845041965e-05, "loss": 0.9781221151351929, "step": 249 }, { "epoch": 0.020473134129738252, "grad_norm": 0.1689453125, "learning_rate": 2.3999267463193857e-05, "loss": 0.9014031291007996, "step": 250 }, { "epoch": 0.020555026666257203, "grad_norm": 0.193359375, "learning_rate": 2.3999239559808167e-05, "loss": 0.7805596590042114, "step": 251 }, { "epoch": 0.020636919202776157, "grad_norm": 0.16796875, "learning_rate": 2.3999211134886103e-05, "loss": 0.9792352914810181, "step": 252 }, { "epoch": 0.02071881173929511, "grad_norm": 0.2041015625, "learning_rate": 2.39991821884289e-05, "loss": 0.6729113459587097, "step": 253 }, { "epoch": 0.020800704275814062, "grad_norm": 0.203125, "learning_rate": 2.399915272043782e-05, "loss": 0.7053857445716858, "step": 254 }, { "epoch": 0.020882596812333017, "grad_norm": 0.1640625, "learning_rate": 2.399912273091414e-05, "loss": 0.730910062789917, "step": 255 }, { "epoch": 0.020964489348851967, "grad_norm": 0.189453125, "learning_rate": 2.399909221985917e-05, "loss": 1.0160516500473022, "step": 256 }, { "epoch": 0.021046381885370922, "grad_norm": 0.1982421875, "learning_rate": 2.399906118727423e-05, "loss": 1.0672346353530884, "step": 257 }, { "epoch": 0.021128274421889876, "grad_norm": 0.1669921875, "learning_rate": 2.3999029633160666e-05, "loss": 0.8804312944412231, "step": 258 }, { "epoch": 0.021210166958408827, "grad_norm": 0.23828125, "learning_rate": 2.3998997557519854e-05, "loss": 0.6796444058418274, "step": 259 }, { "epoch": 0.02129205949492778, "grad_norm": 0.1875, "learning_rate": 2.3998964960353192e-05, "loss": 0.9091837406158447, "step": 260 }, { "epoch": 0.021373952031446736, "grad_norm": 0.248046875, "learning_rate": 2.3998931841662094e-05, "loss": 0.8179640173912048, "step": 261 }, { "epoch": 0.021455844567965687, "grad_norm": 0.201171875, "learning_rate": 2.3998898201447995e-05, "loss": 0.8405275940895081, "step": 262 }, { "epoch": 0.02153773710448464, "grad_norm": 0.14453125, "learning_rate": 2.3998864039712357e-05, "loss": 0.7459143400192261, "step": 263 }, { "epoch": 0.021619629641003592, "grad_norm": 0.17578125, "learning_rate": 2.3998829356456676e-05, "loss": 0.6243706345558167, "step": 264 }, { "epoch": 0.021701522177522546, "grad_norm": 0.16015625, "learning_rate": 2.3998794151682444e-05, "loss": 0.5267554521560669, "step": 265 }, { "epoch": 0.0217834147140415, "grad_norm": 0.1904296875, "learning_rate": 2.39987584253912e-05, "loss": 1.0524226427078247, "step": 266 }, { "epoch": 0.02186530725056045, "grad_norm": 0.1767578125, "learning_rate": 2.39987221775845e-05, "loss": 1.0620660781860352, "step": 267 }, { "epoch": 0.021947199787079406, "grad_norm": 0.158203125, "learning_rate": 2.3998685408263914e-05, "loss": 0.8309974074363708, "step": 268 }, { "epoch": 0.022029092323598357, "grad_norm": 0.19140625, "learning_rate": 2.399864811743104e-05, "loss": 0.8603475689888, "step": 269 }, { "epoch": 0.02211098486011731, "grad_norm": 0.1748046875, "learning_rate": 2.39986103050875e-05, "loss": 0.7075223922729492, "step": 270 }, { "epoch": 0.022192877396636265, "grad_norm": 0.19140625, "learning_rate": 2.3998571971234938e-05, "loss": 0.9101685285568237, "step": 271 }, { "epoch": 0.022274769933155216, "grad_norm": 0.18359375, "learning_rate": 2.399853311587502e-05, "loss": 0.6626906394958496, "step": 272 }, { "epoch": 0.02235666246967417, "grad_norm": 0.1826171875, "learning_rate": 2.3998493739009434e-05, "loss": 1.0679376125335693, "step": 273 }, { "epoch": 0.022438555006193125, "grad_norm": 0.169921875, "learning_rate": 2.399845384063989e-05, "loss": 0.9680445790290833, "step": 274 }, { "epoch": 0.022520447542712076, "grad_norm": 0.140625, "learning_rate": 2.3998413420768126e-05, "loss": 0.7406020164489746, "step": 275 }, { "epoch": 0.02260234007923103, "grad_norm": 0.216796875, "learning_rate": 2.3998372479395895e-05, "loss": 1.1959878206253052, "step": 276 }, { "epoch": 0.02268423261574998, "grad_norm": 0.2060546875, "learning_rate": 2.3998331016524984e-05, "loss": 0.8100699186325073, "step": 277 }, { "epoch": 0.022766125152268935, "grad_norm": 0.146484375, "learning_rate": 2.3998289032157183e-05, "loss": 0.9156461954116821, "step": 278 }, { "epoch": 0.02284801768878789, "grad_norm": 0.236328125, "learning_rate": 2.3998246526294326e-05, "loss": 0.8663290739059448, "step": 279 }, { "epoch": 0.02292991022530684, "grad_norm": 0.181640625, "learning_rate": 2.3998203498938257e-05, "loss": 0.6476192474365234, "step": 280 }, { "epoch": 0.023011802761825795, "grad_norm": 0.1767578125, "learning_rate": 2.399815995009085e-05, "loss": 0.7882580161094666, "step": 281 }, { "epoch": 0.023093695298344746, "grad_norm": 0.1669921875, "learning_rate": 2.399811587975399e-05, "loss": 0.760831892490387, "step": 282 }, { "epoch": 0.0231755878348637, "grad_norm": 0.18359375, "learning_rate": 2.39980712879296e-05, "loss": 0.7089680433273315, "step": 283 }, { "epoch": 0.023257480371382654, "grad_norm": 0.185546875, "learning_rate": 2.3998026174619617e-05, "loss": 0.902609646320343, "step": 284 }, { "epoch": 0.023339372907901605, "grad_norm": 0.142578125, "learning_rate": 2.3997980539825998e-05, "loss": 0.6368549466133118, "step": 285 }, { "epoch": 0.02342126544442056, "grad_norm": 0.1708984375, "learning_rate": 2.3997934383550727e-05, "loss": 1.0773712396621704, "step": 286 }, { "epoch": 0.02350315798093951, "grad_norm": 0.1806640625, "learning_rate": 2.3997887705795815e-05, "loss": 0.6845366358757019, "step": 287 }, { "epoch": 0.023585050517458465, "grad_norm": 0.1689453125, "learning_rate": 2.3997840506563286e-05, "loss": 0.8296812772750854, "step": 288 }, { "epoch": 0.02366694305397742, "grad_norm": 0.244140625, "learning_rate": 2.3997792785855194e-05, "loss": 0.964162290096283, "step": 289 }, { "epoch": 0.02374883559049637, "grad_norm": 0.220703125, "learning_rate": 2.3997744543673612e-05, "loss": 1.0193179845809937, "step": 290 }, { "epoch": 0.023830728127015324, "grad_norm": 0.15625, "learning_rate": 2.3997695780020636e-05, "loss": 0.8332913517951965, "step": 291 }, { "epoch": 0.02391262066353428, "grad_norm": 0.18359375, "learning_rate": 2.3997646494898384e-05, "loss": 0.930297315120697, "step": 292 }, { "epoch": 0.02399451320005323, "grad_norm": 0.181640625, "learning_rate": 2.3997596688309006e-05, "loss": 0.867175817489624, "step": 293 }, { "epoch": 0.024076405736572184, "grad_norm": 0.154296875, "learning_rate": 2.3997546360254664e-05, "loss": 0.875670313835144, "step": 294 }, { "epoch": 0.024158298273091135, "grad_norm": 0.2060546875, "learning_rate": 2.3997495510737535e-05, "loss": 0.8585609197616577, "step": 295 }, { "epoch": 0.02424019080961009, "grad_norm": 0.16796875, "learning_rate": 2.3997444139759842e-05, "loss": 0.7514534592628479, "step": 296 }, { "epoch": 0.024322083346129043, "grad_norm": 0.1884765625, "learning_rate": 2.399739224732381e-05, "loss": 0.8820545077323914, "step": 297 }, { "epoch": 0.024403975882647994, "grad_norm": 0.1787109375, "learning_rate": 2.39973398334317e-05, "loss": 0.8920679688453674, "step": 298 }, { "epoch": 0.02448586841916695, "grad_norm": 0.2099609375, "learning_rate": 2.3997286898085784e-05, "loss": 1.2037074565887451, "step": 299 }, { "epoch": 0.0245677609556859, "grad_norm": 0.1875, "learning_rate": 2.3997233441288368e-05, "loss": 0.6516916155815125, "step": 300 }, { "epoch": 0.024649653492204854, "grad_norm": 0.1552734375, "learning_rate": 2.3997179463041772e-05, "loss": 0.6991357803344727, "step": 301 }, { "epoch": 0.024731546028723808, "grad_norm": 0.18359375, "learning_rate": 2.399712496334835e-05, "loss": 0.7480301260948181, "step": 302 }, { "epoch": 0.02481343856524276, "grad_norm": 0.2099609375, "learning_rate": 2.3997069942210458e-05, "loss": 0.8925761580467224, "step": 303 }, { "epoch": 0.024895331101761713, "grad_norm": 0.1962890625, "learning_rate": 2.3997014399630496e-05, "loss": 0.838609516620636, "step": 304 }, { "epoch": 0.024977223638280668, "grad_norm": 0.138671875, "learning_rate": 2.3996958335610875e-05, "loss": 0.7044812440872192, "step": 305 }, { "epoch": 0.02505911617479962, "grad_norm": 0.1826171875, "learning_rate": 2.3996901750154035e-05, "loss": 0.78529292345047, "step": 306 }, { "epoch": 0.025141008711318573, "grad_norm": 0.19140625, "learning_rate": 2.3996844643262432e-05, "loss": 1.0998769998550415, "step": 307 }, { "epoch": 0.025222901247837524, "grad_norm": 0.2041015625, "learning_rate": 2.3996787014938552e-05, "loss": 0.8928024768829346, "step": 308 }, { "epoch": 0.025304793784356478, "grad_norm": 0.20703125, "learning_rate": 2.3996728865184897e-05, "loss": 0.8206113576889038, "step": 309 }, { "epoch": 0.025386686320875432, "grad_norm": 0.169921875, "learning_rate": 2.399667019400399e-05, "loss": 0.6971143484115601, "step": 310 }, { "epoch": 0.025468578857394383, "grad_norm": 0.1953125, "learning_rate": 2.399661100139839e-05, "loss": 0.9175295829772949, "step": 311 }, { "epoch": 0.025550471393913338, "grad_norm": 0.1630859375, "learning_rate": 2.399655128737066e-05, "loss": 0.6745611429214478, "step": 312 }, { "epoch": 0.02563236393043229, "grad_norm": 0.1943359375, "learning_rate": 2.3996491051923406e-05, "loss": 1.0425691604614258, "step": 313 }, { "epoch": 0.025714256466951243, "grad_norm": 0.232421875, "learning_rate": 2.399643029505924e-05, "loss": 0.807881236076355, "step": 314 }, { "epoch": 0.025796149003470197, "grad_norm": 0.169921875, "learning_rate": 2.39963690167808e-05, "loss": 0.9455503225326538, "step": 315 }, { "epoch": 0.025878041539989148, "grad_norm": 0.181640625, "learning_rate": 2.399630721709076e-05, "loss": 0.6920031309127808, "step": 316 }, { "epoch": 0.025959934076508102, "grad_norm": 0.1982421875, "learning_rate": 2.3996244895991797e-05, "loss": 1.0862457752227783, "step": 317 }, { "epoch": 0.026041826613027057, "grad_norm": 0.1298828125, "learning_rate": 2.399618205348662e-05, "loss": 0.6960668563842773, "step": 318 }, { "epoch": 0.026123719149546008, "grad_norm": 0.1904296875, "learning_rate": 2.3996118689577964e-05, "loss": 0.9928886294364929, "step": 319 }, { "epoch": 0.026205611686064962, "grad_norm": 0.1689453125, "learning_rate": 2.399605480426858e-05, "loss": 0.8675575256347656, "step": 320 }, { "epoch": 0.026287504222583913, "grad_norm": 0.1669921875, "learning_rate": 2.3995990397561248e-05, "loss": 0.8609463572502136, "step": 321 }, { "epoch": 0.026369396759102867, "grad_norm": 0.52734375, "learning_rate": 2.3995925469458765e-05, "loss": 0.8016886115074158, "step": 322 }, { "epoch": 0.02645128929562182, "grad_norm": 0.1728515625, "learning_rate": 2.399586001996395e-05, "loss": 0.7719072103500366, "step": 323 }, { "epoch": 0.026533181832140772, "grad_norm": 0.2080078125, "learning_rate": 2.3995794049079652e-05, "loss": 0.9414935111999512, "step": 324 }, { "epoch": 0.026615074368659727, "grad_norm": 0.1923828125, "learning_rate": 2.399572755680874e-05, "loss": 0.6840108633041382, "step": 325 }, { "epoch": 0.026696966905178678, "grad_norm": 0.1630859375, "learning_rate": 2.39956605431541e-05, "loss": 0.9565162658691406, "step": 326 }, { "epoch": 0.026778859441697632, "grad_norm": 0.19921875, "learning_rate": 2.3995593008118647e-05, "loss": 0.5880173444747925, "step": 327 }, { "epoch": 0.026860751978216586, "grad_norm": 0.1943359375, "learning_rate": 2.3995524951705315e-05, "loss": 1.1901681423187256, "step": 328 }, { "epoch": 0.026942644514735537, "grad_norm": 0.1982421875, "learning_rate": 2.3995456373917064e-05, "loss": 0.7729980945587158, "step": 329 }, { "epoch": 0.02702453705125449, "grad_norm": 0.228515625, "learning_rate": 2.3995387274756873e-05, "loss": 0.8738072514533997, "step": 330 }, { "epoch": 0.027106429587773446, "grad_norm": 0.1953125, "learning_rate": 2.3995317654227747e-05, "loss": 0.73838210105896, "step": 331 }, { "epoch": 0.027188322124292397, "grad_norm": 0.14453125, "learning_rate": 2.399524751233271e-05, "loss": 0.6781070828437805, "step": 332 }, { "epoch": 0.02727021466081135, "grad_norm": 0.201171875, "learning_rate": 2.399517684907481e-05, "loss": 1.1951383352279663, "step": 333 }, { "epoch": 0.027352107197330302, "grad_norm": 0.154296875, "learning_rate": 2.399510566445712e-05, "loss": 0.5185015201568604, "step": 334 }, { "epoch": 0.027433999733849256, "grad_norm": 0.1650390625, "learning_rate": 2.3995033958482734e-05, "loss": 0.7132613062858582, "step": 335 }, { "epoch": 0.02751589227036821, "grad_norm": 0.1494140625, "learning_rate": 2.3994961731154767e-05, "loss": 0.6685208678245544, "step": 336 }, { "epoch": 0.02759778480688716, "grad_norm": 0.1650390625, "learning_rate": 2.399488898247636e-05, "loss": 0.5686011910438538, "step": 337 }, { "epoch": 0.027679677343406116, "grad_norm": 0.19921875, "learning_rate": 2.3994815712450675e-05, "loss": 0.8996471762657166, "step": 338 }, { "epoch": 0.027761569879925067, "grad_norm": 0.1787109375, "learning_rate": 2.3994741921080894e-05, "loss": 0.5920256972312927, "step": 339 }, { "epoch": 0.02784346241644402, "grad_norm": 0.1533203125, "learning_rate": 2.399466760837023e-05, "loss": 0.7819945216178894, "step": 340 }, { "epoch": 0.027925354952962975, "grad_norm": 0.1650390625, "learning_rate": 2.3994592774321908e-05, "loss": 0.8822646141052246, "step": 341 }, { "epoch": 0.028007247489481926, "grad_norm": 0.3125, "learning_rate": 2.399451741893918e-05, "loss": 1.0437325239181519, "step": 342 }, { "epoch": 0.02808914002600088, "grad_norm": 0.1845703125, "learning_rate": 2.3994441542225326e-05, "loss": 0.7110456824302673, "step": 343 }, { "epoch": 0.028171032562519835, "grad_norm": 0.1875, "learning_rate": 2.399436514418364e-05, "loss": 1.2693532705307007, "step": 344 }, { "epoch": 0.028252925099038786, "grad_norm": 0.1943359375, "learning_rate": 2.3994288224817447e-05, "loss": 0.8693782091140747, "step": 345 }, { "epoch": 0.02833481763555774, "grad_norm": 0.2099609375, "learning_rate": 2.399421078413008e-05, "loss": 0.9717293381690979, "step": 346 }, { "epoch": 0.02841671017207669, "grad_norm": 0.1572265625, "learning_rate": 2.3994132822124916e-05, "loss": 0.9909393191337585, "step": 347 }, { "epoch": 0.028498602708595645, "grad_norm": 0.1552734375, "learning_rate": 2.399405433880534e-05, "loss": 0.7639890909194946, "step": 348 }, { "epoch": 0.0285804952451146, "grad_norm": 0.1708984375, "learning_rate": 2.399397533417476e-05, "loss": 0.8804298639297485, "step": 349 }, { "epoch": 0.02866238778163355, "grad_norm": 0.1494140625, "learning_rate": 2.3993895808236613e-05, "loss": 0.6749045848846436, "step": 350 }, { "epoch": 0.028744280318152505, "grad_norm": 0.173828125, "learning_rate": 2.3993815760994358e-05, "loss": 0.9385254383087158, "step": 351 }, { "epoch": 0.028826172854671456, "grad_norm": 0.154296875, "learning_rate": 2.3993735192451465e-05, "loss": 0.9184956550598145, "step": 352 }, { "epoch": 0.02890806539119041, "grad_norm": 0.19140625, "learning_rate": 2.399365410261145e-05, "loss": 0.8687407970428467, "step": 353 }, { "epoch": 0.028989957927709364, "grad_norm": 0.1650390625, "learning_rate": 2.399357249147782e-05, "loss": 0.7708573937416077, "step": 354 }, { "epoch": 0.029071850464228315, "grad_norm": 0.1796875, "learning_rate": 2.399349035905414e-05, "loss": 0.9318118691444397, "step": 355 }, { "epoch": 0.02915374300074727, "grad_norm": 0.166015625, "learning_rate": 2.3993407705343968e-05, "loss": 0.8875454664230347, "step": 356 }, { "epoch": 0.029235635537266224, "grad_norm": 0.2041015625, "learning_rate": 2.3993324530350898e-05, "loss": 0.8044502139091492, "step": 357 }, { "epoch": 0.029317528073785175, "grad_norm": 0.16796875, "learning_rate": 2.3993240834078553e-05, "loss": 0.6632757782936096, "step": 358 }, { "epoch": 0.02939942061030413, "grad_norm": 0.189453125, "learning_rate": 2.3993156616530558e-05, "loss": 0.9331107139587402, "step": 359 }, { "epoch": 0.02948131314682308, "grad_norm": 0.1806640625, "learning_rate": 2.3993071877710586e-05, "loss": 1.1078283786773682, "step": 360 }, { "epoch": 0.029563205683342034, "grad_norm": 0.177734375, "learning_rate": 2.399298661762231e-05, "loss": 1.0519784688949585, "step": 361 }, { "epoch": 0.02964509821986099, "grad_norm": 0.1591796875, "learning_rate": 2.3992900836269444e-05, "loss": 0.8882368803024292, "step": 362 }, { "epoch": 0.02972699075637994, "grad_norm": 0.203125, "learning_rate": 2.399281453365571e-05, "loss": 1.2736127376556396, "step": 363 }, { "epoch": 0.029808883292898894, "grad_norm": 0.177734375, "learning_rate": 2.399272770978486e-05, "loss": 1.0872962474822998, "step": 364 }, { "epoch": 0.029890775829417845, "grad_norm": 0.263671875, "learning_rate": 2.399264036466067e-05, "loss": 1.2390316724777222, "step": 365 }, { "epoch": 0.0299726683659368, "grad_norm": 0.2060546875, "learning_rate": 2.3992552498286938e-05, "loss": 0.9149156212806702, "step": 366 }, { "epoch": 0.030054560902455753, "grad_norm": 0.162109375, "learning_rate": 2.3992464110667476e-05, "loss": 0.7826298475265503, "step": 367 }, { "epoch": 0.030136453438974704, "grad_norm": 0.169921875, "learning_rate": 2.3992375201806138e-05, "loss": 0.9323840737342834, "step": 368 }, { "epoch": 0.03021834597549366, "grad_norm": 0.1513671875, "learning_rate": 2.3992285771706774e-05, "loss": 0.7545689344406128, "step": 369 }, { "epoch": 0.030300238512012613, "grad_norm": 0.1767578125, "learning_rate": 2.3992195820373283e-05, "loss": 0.6738094687461853, "step": 370 }, { "epoch": 0.030382131048531564, "grad_norm": 0.1787109375, "learning_rate": 2.3992105347809564e-05, "loss": 0.7620134949684143, "step": 371 }, { "epoch": 0.030464023585050518, "grad_norm": 0.146484375, "learning_rate": 2.399201435401956e-05, "loss": 0.6103556156158447, "step": 372 }, { "epoch": 0.03054591612156947, "grad_norm": 0.1806640625, "learning_rate": 2.399192283900722e-05, "loss": 0.6378960609436035, "step": 373 }, { "epoch": 0.030627808658088423, "grad_norm": 0.1728515625, "learning_rate": 2.3991830802776518e-05, "loss": 0.8057185411453247, "step": 374 }, { "epoch": 0.030709701194607378, "grad_norm": 0.1767578125, "learning_rate": 2.3991738245331465e-05, "loss": 0.9602435231208801, "step": 375 }, { "epoch": 0.03079159373112633, "grad_norm": 0.1787109375, "learning_rate": 2.3991645166676073e-05, "loss": 0.9772658348083496, "step": 376 }, { "epoch": 0.030873486267645283, "grad_norm": 0.18359375, "learning_rate": 2.3991551566814392e-05, "loss": 1.1092474460601807, "step": 377 }, { "epoch": 0.030955378804164234, "grad_norm": 0.177734375, "learning_rate": 2.399145744575049e-05, "loss": 0.8104967474937439, "step": 378 }, { "epoch": 0.031037271340683188, "grad_norm": 0.2060546875, "learning_rate": 2.399136280348846e-05, "loss": 1.0451630353927612, "step": 379 }, { "epoch": 0.031119163877202143, "grad_norm": 0.1689453125, "learning_rate": 2.3991267640032415e-05, "loss": 0.6408492922782898, "step": 380 }, { "epoch": 0.031201056413721093, "grad_norm": 0.1865234375, "learning_rate": 2.3991171955386485e-05, "loss": 1.005161166191101, "step": 381 }, { "epoch": 0.031282948950240044, "grad_norm": 0.1689453125, "learning_rate": 2.3991075749554837e-05, "loss": 0.7929934859275818, "step": 382 }, { "epoch": 0.031364841486759, "grad_norm": 0.2275390625, "learning_rate": 2.3990979022541648e-05, "loss": 0.9439051151275635, "step": 383 }, { "epoch": 0.03144673402327795, "grad_norm": 0.1787109375, "learning_rate": 2.3990881774351126e-05, "loss": 1.241767168045044, "step": 384 }, { "epoch": 0.03152862655979691, "grad_norm": 0.1396484375, "learning_rate": 2.3990784004987496e-05, "loss": 0.7989773750305176, "step": 385 }, { "epoch": 0.03161051909631586, "grad_norm": 0.166015625, "learning_rate": 2.3990685714455004e-05, "loss": 0.6661219596862793, "step": 386 }, { "epoch": 0.03169241163283481, "grad_norm": 0.1748046875, "learning_rate": 2.3990586902757926e-05, "loss": 0.8293935060501099, "step": 387 }, { "epoch": 0.03177430416935376, "grad_norm": 0.181640625, "learning_rate": 2.3990487569900553e-05, "loss": 0.7385613918304443, "step": 388 }, { "epoch": 0.03185619670587272, "grad_norm": 0.17578125, "learning_rate": 2.3990387715887206e-05, "loss": 1.1216137409210205, "step": 389 }, { "epoch": 0.03193808924239167, "grad_norm": 0.16796875, "learning_rate": 2.399028734072223e-05, "loss": 0.8001031875610352, "step": 390 }, { "epoch": 0.032019981778910626, "grad_norm": 0.23046875, "learning_rate": 2.3990186444409972e-05, "loss": 0.6830602288246155, "step": 391 }, { "epoch": 0.03210187431542958, "grad_norm": 0.1787109375, "learning_rate": 2.3990085026954836e-05, "loss": 0.9557156562805176, "step": 392 }, { "epoch": 0.03218376685194853, "grad_norm": 0.1826171875, "learning_rate": 2.3989983088361216e-05, "loss": 0.6188973188400269, "step": 393 }, { "epoch": 0.03226565938846748, "grad_norm": 0.2001953125, "learning_rate": 2.398988062863355e-05, "loss": 0.7792149782180786, "step": 394 }, { "epoch": 0.03234755192498644, "grad_norm": 0.1748046875, "learning_rate": 2.398977764777629e-05, "loss": 1.051107406616211, "step": 395 }, { "epoch": 0.03242944446150539, "grad_norm": 0.1767578125, "learning_rate": 2.3989674145793907e-05, "loss": 1.1782621145248413, "step": 396 }, { "epoch": 0.032511336998024346, "grad_norm": 0.1826171875, "learning_rate": 2.3989570122690906e-05, "loss": 0.9232417941093445, "step": 397 }, { "epoch": 0.03259322953454329, "grad_norm": 0.2080078125, "learning_rate": 2.3989465578471806e-05, "loss": 0.7142661213874817, "step": 398 }, { "epoch": 0.03267512207106225, "grad_norm": 0.18359375, "learning_rate": 2.398936051314115e-05, "loss": 0.8970451354980469, "step": 399 }, { "epoch": 0.0327570146075812, "grad_norm": 0.2041015625, "learning_rate": 2.398925492670351e-05, "loss": 0.8157063722610474, "step": 400 }, { "epoch": 0.032838907144100156, "grad_norm": 0.1474609375, "learning_rate": 2.3989148819163468e-05, "loss": 0.6992770433425903, "step": 401 }, { "epoch": 0.03292079968061911, "grad_norm": 0.1591796875, "learning_rate": 2.3989042190525637e-05, "loss": 0.5834121108055115, "step": 402 }, { "epoch": 0.03300269221713806, "grad_norm": 0.26953125, "learning_rate": 2.3988935040794654e-05, "loss": 0.6128764748573303, "step": 403 }, { "epoch": 0.03308458475365701, "grad_norm": 0.18359375, "learning_rate": 2.398882736997518e-05, "loss": 1.056875228881836, "step": 404 }, { "epoch": 0.033166477290175966, "grad_norm": 0.1689453125, "learning_rate": 2.3988719178071884e-05, "loss": 0.7617940902709961, "step": 405 }, { "epoch": 0.03324836982669492, "grad_norm": 0.1455078125, "learning_rate": 2.3988610465089474e-05, "loss": 0.6815392374992371, "step": 406 }, { "epoch": 0.033330262363213875, "grad_norm": 0.1806640625, "learning_rate": 2.398850123103268e-05, "loss": 0.9706563353538513, "step": 407 }, { "epoch": 0.03341215489973282, "grad_norm": 0.189453125, "learning_rate": 2.398839147590624e-05, "loss": 1.1680763959884644, "step": 408 }, { "epoch": 0.03349404743625178, "grad_norm": 0.1650390625, "learning_rate": 2.398828119971494e-05, "loss": 0.8144336938858032, "step": 409 }, { "epoch": 0.03357593997277073, "grad_norm": 0.208984375, "learning_rate": 2.3988170402463554e-05, "loss": 0.9499691128730774, "step": 410 }, { "epoch": 0.033657832509289685, "grad_norm": 0.125, "learning_rate": 2.398805908415691e-05, "loss": 0.4251038730144501, "step": 411 }, { "epoch": 0.03373972504580864, "grad_norm": 0.169921875, "learning_rate": 2.3987947244799843e-05, "loss": 0.8208865523338318, "step": 412 }, { "epoch": 0.03382161758232759, "grad_norm": 0.265625, "learning_rate": 2.398783488439721e-05, "loss": 0.991141140460968, "step": 413 }, { "epoch": 0.03390351011884654, "grad_norm": 0.2138671875, "learning_rate": 2.3987722002953903e-05, "loss": 1.2578606605529785, "step": 414 }, { "epoch": 0.033985402655365496, "grad_norm": 0.1630859375, "learning_rate": 2.3987608600474825e-05, "loss": 1.044303059577942, "step": 415 }, { "epoch": 0.03406729519188445, "grad_norm": 0.1669921875, "learning_rate": 2.39874946769649e-05, "loss": 0.9341923594474792, "step": 416 }, { "epoch": 0.034149187728403405, "grad_norm": 0.271484375, "learning_rate": 2.398738023242909e-05, "loss": 0.9305179119110107, "step": 417 }, { "epoch": 0.03423108026492236, "grad_norm": 0.19140625, "learning_rate": 2.3987265266872357e-05, "loss": 1.0452042818069458, "step": 418 }, { "epoch": 0.034312972801441306, "grad_norm": 0.171875, "learning_rate": 2.3987149780299704e-05, "loss": 0.7530025839805603, "step": 419 }, { "epoch": 0.03439486533796026, "grad_norm": 0.2001953125, "learning_rate": 2.398703377271615e-05, "loss": 0.7470859885215759, "step": 420 }, { "epoch": 0.034476757874479215, "grad_norm": 0.197265625, "learning_rate": 2.3986917244126736e-05, "loss": 0.7884257435798645, "step": 421 }, { "epoch": 0.03455865041099817, "grad_norm": 0.345703125, "learning_rate": 2.398680019453653e-05, "loss": 1.1965625286102295, "step": 422 }, { "epoch": 0.034640542947517124, "grad_norm": 0.2001953125, "learning_rate": 2.3986682623950618e-05, "loss": 1.0573604106903076, "step": 423 }, { "epoch": 0.03472243548403607, "grad_norm": 0.154296875, "learning_rate": 2.3986564532374112e-05, "loss": 0.8759225606918335, "step": 424 }, { "epoch": 0.034804328020555025, "grad_norm": 0.171875, "learning_rate": 2.3986445919812137e-05, "loss": 0.8650780916213989, "step": 425 }, { "epoch": 0.03488622055707398, "grad_norm": 0.1630859375, "learning_rate": 2.398632678626986e-05, "loss": 0.5486708879470825, "step": 426 }, { "epoch": 0.034968113093592934, "grad_norm": 0.2275390625, "learning_rate": 2.3986207131752448e-05, "loss": 1.3964512348175049, "step": 427 }, { "epoch": 0.03505000563011189, "grad_norm": 0.1845703125, "learning_rate": 2.3986086956265106e-05, "loss": 1.2789416313171387, "step": 428 }, { "epoch": 0.035131898166630836, "grad_norm": 0.1513671875, "learning_rate": 2.3985966259813063e-05, "loss": 1.0349560976028442, "step": 429 }, { "epoch": 0.03521379070314979, "grad_norm": 0.189453125, "learning_rate": 2.3985845042401554e-05, "loss": 0.845013439655304, "step": 430 }, { "epoch": 0.035295683239668744, "grad_norm": 0.1796875, "learning_rate": 2.3985723304035855e-05, "loss": 0.8173735737800598, "step": 431 }, { "epoch": 0.0353775757761877, "grad_norm": 0.1376953125, "learning_rate": 2.3985601044721255e-05, "loss": 0.6505873799324036, "step": 432 }, { "epoch": 0.03545946831270665, "grad_norm": 0.169921875, "learning_rate": 2.3985478264463072e-05, "loss": 0.8953864574432373, "step": 433 }, { "epoch": 0.0355413608492256, "grad_norm": 0.1455078125, "learning_rate": 2.3985354963266637e-05, "loss": 0.7566075921058655, "step": 434 }, { "epoch": 0.035623253385744555, "grad_norm": 0.19921875, "learning_rate": 2.3985231141137312e-05, "loss": 1.0244107246398926, "step": 435 }, { "epoch": 0.03570514592226351, "grad_norm": 0.2021484375, "learning_rate": 2.3985106798080475e-05, "loss": 0.9059413075447083, "step": 436 }, { "epoch": 0.035787038458782464, "grad_norm": 0.1552734375, "learning_rate": 2.3984981934101537e-05, "loss": 0.833506166934967, "step": 437 }, { "epoch": 0.03586893099530142, "grad_norm": 0.1787109375, "learning_rate": 2.398485654920592e-05, "loss": 0.8970056772232056, "step": 438 }, { "epoch": 0.035950823531820365, "grad_norm": 0.203125, "learning_rate": 2.3984730643399077e-05, "loss": 0.8806341886520386, "step": 439 }, { "epoch": 0.03603271606833932, "grad_norm": 0.177734375, "learning_rate": 2.398460421668648e-05, "loss": 0.8865904808044434, "step": 440 }, { "epoch": 0.036114608604858274, "grad_norm": 0.2080078125, "learning_rate": 2.3984477269073617e-05, "loss": 0.7775003910064697, "step": 441 }, { "epoch": 0.03619650114137723, "grad_norm": 0.16796875, "learning_rate": 2.3984349800566015e-05, "loss": 0.9406428337097168, "step": 442 }, { "epoch": 0.03627839367789618, "grad_norm": 0.166015625, "learning_rate": 2.398422181116921e-05, "loss": 0.9077646136283875, "step": 443 }, { "epoch": 0.03636028621441514, "grad_norm": 0.1767578125, "learning_rate": 2.3984093300888767e-05, "loss": 0.7018104791641235, "step": 444 }, { "epoch": 0.036442178750934084, "grad_norm": 0.220703125, "learning_rate": 2.3983964269730267e-05, "loss": 0.9777675867080688, "step": 445 }, { "epoch": 0.03652407128745304, "grad_norm": 0.1875, "learning_rate": 2.3983834717699326e-05, "loss": 0.7176735401153564, "step": 446 }, { "epoch": 0.03660596382397199, "grad_norm": 0.150390625, "learning_rate": 2.3983704644801568e-05, "loss": 0.6879844069480896, "step": 447 }, { "epoch": 0.03668785636049095, "grad_norm": 0.1650390625, "learning_rate": 2.398357405104265e-05, "loss": 0.7447598576545715, "step": 448 }, { "epoch": 0.0367697488970099, "grad_norm": 0.1875, "learning_rate": 2.3983442936428245e-05, "loss": 0.9589807987213135, "step": 449 }, { "epoch": 0.03685164143352885, "grad_norm": 0.189453125, "learning_rate": 2.3983311300964055e-05, "loss": 1.155102252960205, "step": 450 }, { "epoch": 0.036933533970047804, "grad_norm": 0.203125, "learning_rate": 2.3983179144655802e-05, "loss": 1.1781085729599, "step": 451 }, { "epoch": 0.03701542650656676, "grad_norm": 0.1943359375, "learning_rate": 2.3983046467509227e-05, "loss": 1.3224983215332031, "step": 452 }, { "epoch": 0.03709731904308571, "grad_norm": 0.21484375, "learning_rate": 2.3982913269530098e-05, "loss": 1.1346737146377563, "step": 453 }, { "epoch": 0.03717921157960467, "grad_norm": 0.1611328125, "learning_rate": 2.3982779550724203e-05, "loss": 0.901663064956665, "step": 454 }, { "epoch": 0.037261104116123614, "grad_norm": 0.1826171875, "learning_rate": 2.3982645311097356e-05, "loss": 0.7664777636528015, "step": 455 }, { "epoch": 0.03734299665264257, "grad_norm": 0.1640625, "learning_rate": 2.398251055065539e-05, "loss": 0.9744863510131836, "step": 456 }, { "epoch": 0.03742488918916152, "grad_norm": 0.1875, "learning_rate": 2.3982375269404166e-05, "loss": 1.1253559589385986, "step": 457 }, { "epoch": 0.03750678172568048, "grad_norm": 0.1865234375, "learning_rate": 2.3982239467349557e-05, "loss": 0.750821053981781, "step": 458 }, { "epoch": 0.03758867426219943, "grad_norm": 0.16015625, "learning_rate": 2.398210314449747e-05, "loss": 0.8490447998046875, "step": 459 }, { "epoch": 0.03767056679871838, "grad_norm": 0.189453125, "learning_rate": 2.3981966300853833e-05, "loss": 0.9658976793289185, "step": 460 }, { "epoch": 0.03775245933523733, "grad_norm": 0.1982421875, "learning_rate": 2.3981828936424588e-05, "loss": 0.8551173806190491, "step": 461 }, { "epoch": 0.03783435187175629, "grad_norm": 0.1767578125, "learning_rate": 2.398169105121571e-05, "loss": 1.0039170980453491, "step": 462 }, { "epoch": 0.03791624440827524, "grad_norm": 0.1962890625, "learning_rate": 2.398155264523319e-05, "loss": 0.6282235383987427, "step": 463 }, { "epoch": 0.037998136944794196, "grad_norm": 0.17578125, "learning_rate": 2.3981413718483042e-05, "loss": 0.8205994963645935, "step": 464 }, { "epoch": 0.03808002948131314, "grad_norm": 0.1796875, "learning_rate": 2.3981274270971307e-05, "loss": 0.8125453591346741, "step": 465 }, { "epoch": 0.0381619220178321, "grad_norm": 0.1640625, "learning_rate": 2.3981134302704045e-05, "loss": 1.2362008094787598, "step": 466 }, { "epoch": 0.03824381455435105, "grad_norm": 0.2275390625, "learning_rate": 2.3980993813687338e-05, "loss": 1.047678828239441, "step": 467 }, { "epoch": 0.038325707090870006, "grad_norm": 0.1748046875, "learning_rate": 2.3980852803927293e-05, "loss": 0.8089118003845215, "step": 468 }, { "epoch": 0.03840759962738896, "grad_norm": 0.193359375, "learning_rate": 2.398071127343004e-05, "loss": 1.0405510663986206, "step": 469 }, { "epoch": 0.038489492163907915, "grad_norm": 0.1865234375, "learning_rate": 2.3980569222201734e-05, "loss": 1.1824989318847656, "step": 470 }, { "epoch": 0.03857138470042686, "grad_norm": 0.55078125, "learning_rate": 2.398042665024854e-05, "loss": 0.8795514702796936, "step": 471 }, { "epoch": 0.03865327723694582, "grad_norm": 0.16015625, "learning_rate": 2.3980283557576664e-05, "loss": 0.7451978325843811, "step": 472 }, { "epoch": 0.03873516977346477, "grad_norm": 0.2431640625, "learning_rate": 2.3980139944192323e-05, "loss": 1.1166579723358154, "step": 473 }, { "epoch": 0.038817062309983726, "grad_norm": 0.173828125, "learning_rate": 2.397999581010175e-05, "loss": 0.8764607310295105, "step": 474 }, { "epoch": 0.03889895484650268, "grad_norm": 0.1904296875, "learning_rate": 2.3979851155311226e-05, "loss": 0.7599079608917236, "step": 475 }, { "epoch": 0.03898084738302163, "grad_norm": 0.197265625, "learning_rate": 2.3979705979827023e-05, "loss": 1.1147818565368652, "step": 476 }, { "epoch": 0.03906273991954058, "grad_norm": 0.1787109375, "learning_rate": 2.3979560283655458e-05, "loss": 0.979387104511261, "step": 477 }, { "epoch": 0.039144632456059536, "grad_norm": 0.1826171875, "learning_rate": 2.3979414066802864e-05, "loss": 0.7678375244140625, "step": 478 }, { "epoch": 0.03922652499257849, "grad_norm": 0.1748046875, "learning_rate": 2.3979267329275597e-05, "loss": 0.6357822418212891, "step": 479 }, { "epoch": 0.039308417529097445, "grad_norm": 0.1630859375, "learning_rate": 2.397912007108003e-05, "loss": 0.7360219955444336, "step": 480 }, { "epoch": 0.03939031006561639, "grad_norm": 0.18359375, "learning_rate": 2.397897229222257e-05, "loss": 0.8464394211769104, "step": 481 }, { "epoch": 0.039472202602135346, "grad_norm": 0.2001953125, "learning_rate": 2.3978823992709632e-05, "loss": 0.779076099395752, "step": 482 }, { "epoch": 0.0395540951386543, "grad_norm": 0.1875, "learning_rate": 2.397867517254767e-05, "loss": 0.8278973698616028, "step": 483 }, { "epoch": 0.039635987675173255, "grad_norm": 0.138671875, "learning_rate": 2.3978525831743145e-05, "loss": 0.6052179336547852, "step": 484 }, { "epoch": 0.03971788021169221, "grad_norm": 0.1591796875, "learning_rate": 2.3978375970302556e-05, "loss": 0.8245387673377991, "step": 485 }, { "epoch": 0.03979977274821116, "grad_norm": 0.1669921875, "learning_rate": 2.3978225588232407e-05, "loss": 0.7775510549545288, "step": 486 }, { "epoch": 0.03988166528473011, "grad_norm": 0.1318359375, "learning_rate": 2.397807468553924e-05, "loss": 0.786597490310669, "step": 487 }, { "epoch": 0.039963557821249066, "grad_norm": 0.169921875, "learning_rate": 2.3977923262229616e-05, "loss": 0.9905457496643066, "step": 488 }, { "epoch": 0.04004545035776802, "grad_norm": 0.1640625, "learning_rate": 2.3977771318310113e-05, "loss": 0.8521064519882202, "step": 489 }, { "epoch": 0.040127342894286974, "grad_norm": 0.1865234375, "learning_rate": 2.3977618853787332e-05, "loss": 0.7497962713241577, "step": 490 }, { "epoch": 0.04020923543080592, "grad_norm": 0.146484375, "learning_rate": 2.3977465868667906e-05, "loss": 0.6652951836585999, "step": 491 }, { "epoch": 0.040291127967324876, "grad_norm": 0.17578125, "learning_rate": 2.397731236295848e-05, "loss": 0.806548535823822, "step": 492 }, { "epoch": 0.04037302050384383, "grad_norm": 0.216796875, "learning_rate": 2.397715833666573e-05, "loss": 0.9164248704910278, "step": 493 }, { "epoch": 0.040454913040362785, "grad_norm": 0.16796875, "learning_rate": 2.397700378979635e-05, "loss": 1.0114997625350952, "step": 494 }, { "epoch": 0.04053680557688174, "grad_norm": 0.203125, "learning_rate": 2.3976848722357055e-05, "loss": 0.9673534631729126, "step": 495 }, { "epoch": 0.04061869811340069, "grad_norm": 0.2119140625, "learning_rate": 2.3976693134354582e-05, "loss": 1.0153312683105469, "step": 496 }, { "epoch": 0.04070059064991964, "grad_norm": 0.1787109375, "learning_rate": 2.39765370257957e-05, "loss": 0.7527498006820679, "step": 497 }, { "epoch": 0.040782483186438595, "grad_norm": 0.166015625, "learning_rate": 2.397638039668719e-05, "loss": 0.6898579597473145, "step": 498 }, { "epoch": 0.04086437572295755, "grad_norm": 0.1455078125, "learning_rate": 2.397622324703586e-05, "loss": 0.6689336895942688, "step": 499 }, { "epoch": 0.040946268259476504, "grad_norm": 0.2041015625, "learning_rate": 2.3976065576848543e-05, "loss": 0.9839563369750977, "step": 500 }, { "epoch": 0.04102816079599546, "grad_norm": 0.1875, "learning_rate": 2.397590738613209e-05, "loss": 0.7965925335884094, "step": 501 }, { "epoch": 0.041110053332514405, "grad_norm": 0.19921875, "learning_rate": 2.3975748674893375e-05, "loss": 0.7120146155357361, "step": 502 }, { "epoch": 0.04119194586903336, "grad_norm": 0.1591796875, "learning_rate": 2.39755894431393e-05, "loss": 0.9516551494598389, "step": 503 }, { "epoch": 0.041273838405552314, "grad_norm": 0.189453125, "learning_rate": 2.3975429690876786e-05, "loss": 1.1224391460418701, "step": 504 }, { "epoch": 0.04135573094207127, "grad_norm": 0.17578125, "learning_rate": 2.3975269418112772e-05, "loss": 1.016868233680725, "step": 505 }, { "epoch": 0.04143762347859022, "grad_norm": 0.17578125, "learning_rate": 2.3975108624854225e-05, "loss": 1.1816296577453613, "step": 506 }, { "epoch": 0.04151951601510917, "grad_norm": 0.16796875, "learning_rate": 2.3974947311108136e-05, "loss": 0.7898935675621033, "step": 507 }, { "epoch": 0.041601408551628125, "grad_norm": 0.1953125, "learning_rate": 2.3974785476881516e-05, "loss": 1.2441715002059937, "step": 508 }, { "epoch": 0.04168330108814708, "grad_norm": 0.166015625, "learning_rate": 2.39746231221814e-05, "loss": 0.5922395586967468, "step": 509 }, { "epoch": 0.04176519362466603, "grad_norm": 0.1806640625, "learning_rate": 2.397446024701484e-05, "loss": 0.7211483716964722, "step": 510 }, { "epoch": 0.04184708616118499, "grad_norm": 0.1962890625, "learning_rate": 2.3974296851388924e-05, "loss": 0.9107987880706787, "step": 511 }, { "epoch": 0.041928978697703935, "grad_norm": 0.150390625, "learning_rate": 2.3974132935310744e-05, "loss": 0.7573146820068359, "step": 512 }, { "epoch": 0.04201087123422289, "grad_norm": 0.16015625, "learning_rate": 2.3973968498787433e-05, "loss": 0.7425338625907898, "step": 513 }, { "epoch": 0.042092763770741844, "grad_norm": 0.158203125, "learning_rate": 2.3973803541826128e-05, "loss": 0.7786843776702881, "step": 514 }, { "epoch": 0.0421746563072608, "grad_norm": 0.193359375, "learning_rate": 2.3973638064434007e-05, "loss": 0.938799262046814, "step": 515 }, { "epoch": 0.04225654884377975, "grad_norm": 0.1611328125, "learning_rate": 2.3973472066618264e-05, "loss": 0.8231399059295654, "step": 516 }, { "epoch": 0.0423384413802987, "grad_norm": 0.16796875, "learning_rate": 2.3973305548386108e-05, "loss": 0.9845247864723206, "step": 517 }, { "epoch": 0.042420333916817654, "grad_norm": 0.150390625, "learning_rate": 2.3973138509744773e-05, "loss": 0.8089458346366882, "step": 518 }, { "epoch": 0.04250222645333661, "grad_norm": 0.1533203125, "learning_rate": 2.397297095070153e-05, "loss": 0.7149685621261597, "step": 519 }, { "epoch": 0.04258411898985556, "grad_norm": 0.1884765625, "learning_rate": 2.3972802871263654e-05, "loss": 0.9618757963180542, "step": 520 }, { "epoch": 0.04266601152637452, "grad_norm": 0.1806640625, "learning_rate": 2.397263427143845e-05, "loss": 0.8664198517799377, "step": 521 }, { "epoch": 0.04274790406289347, "grad_norm": 0.1513671875, "learning_rate": 2.3972465151233256e-05, "loss": 0.9404294490814209, "step": 522 }, { "epoch": 0.04282979659941242, "grad_norm": 0.171875, "learning_rate": 2.3972295510655412e-05, "loss": 0.8636691570281982, "step": 523 }, { "epoch": 0.04291168913593137, "grad_norm": 0.203125, "learning_rate": 2.3972125349712293e-05, "loss": 0.7196763753890991, "step": 524 }, { "epoch": 0.04299358167245033, "grad_norm": 0.1611328125, "learning_rate": 2.39719546684113e-05, "loss": 1.0419987440109253, "step": 525 }, { "epoch": 0.04307547420896928, "grad_norm": 0.203125, "learning_rate": 2.397178346675985e-05, "loss": 0.8712450861930847, "step": 526 }, { "epoch": 0.043157366745488236, "grad_norm": 0.162109375, "learning_rate": 2.3971611744765377e-05, "loss": 1.0688278675079346, "step": 527 }, { "epoch": 0.043239259282007184, "grad_norm": 0.1796875, "learning_rate": 2.3971439502435356e-05, "loss": 0.7815065979957581, "step": 528 }, { "epoch": 0.04332115181852614, "grad_norm": 0.1875, "learning_rate": 2.397126673977726e-05, "loss": 0.8646108508110046, "step": 529 }, { "epoch": 0.04340304435504509, "grad_norm": 0.1826171875, "learning_rate": 2.3971093456798613e-05, "loss": 0.6068559288978577, "step": 530 }, { "epoch": 0.04348493689156405, "grad_norm": 0.2265625, "learning_rate": 2.3970919653506932e-05, "loss": 0.8408427238464355, "step": 531 }, { "epoch": 0.043566829428083, "grad_norm": 0.1650390625, "learning_rate": 2.3970745329909782e-05, "loss": 0.7949510216712952, "step": 532 }, { "epoch": 0.04364872196460195, "grad_norm": 0.1728515625, "learning_rate": 2.397057048601474e-05, "loss": 1.1144850254058838, "step": 533 }, { "epoch": 0.0437306145011209, "grad_norm": 0.2275390625, "learning_rate": 2.3970395121829396e-05, "loss": 1.0043058395385742, "step": 534 }, { "epoch": 0.04381250703763986, "grad_norm": 0.1650390625, "learning_rate": 2.3970219237361375e-05, "loss": 0.9013270735740662, "step": 535 }, { "epoch": 0.04389439957415881, "grad_norm": 0.1796875, "learning_rate": 2.397004283261833e-05, "loss": 1.0792452096939087, "step": 536 }, { "epoch": 0.043976292110677766, "grad_norm": 0.185546875, "learning_rate": 2.396986590760792e-05, "loss": 0.8610565662384033, "step": 537 }, { "epoch": 0.04405818464719671, "grad_norm": 0.1650390625, "learning_rate": 2.396968846233784e-05, "loss": 1.0946379899978638, "step": 538 }, { "epoch": 0.04414007718371567, "grad_norm": 0.1826171875, "learning_rate": 2.3969510496815797e-05, "loss": 0.762466311454773, "step": 539 }, { "epoch": 0.04422196972023462, "grad_norm": 0.1904296875, "learning_rate": 2.396933201104953e-05, "loss": 0.9066336750984192, "step": 540 }, { "epoch": 0.044303862256753576, "grad_norm": 0.1865234375, "learning_rate": 2.3969153005046797e-05, "loss": 0.6592026948928833, "step": 541 }, { "epoch": 0.04438575479327253, "grad_norm": 0.21484375, "learning_rate": 2.3968973478815375e-05, "loss": 0.9944949150085449, "step": 542 }, { "epoch": 0.04446764732979148, "grad_norm": 0.2333984375, "learning_rate": 2.396879343236307e-05, "loss": 0.9634906649589539, "step": 543 }, { "epoch": 0.04454953986631043, "grad_norm": 0.208984375, "learning_rate": 2.3968612865697703e-05, "loss": 1.0275566577911377, "step": 544 }, { "epoch": 0.04463143240282939, "grad_norm": 0.2255859375, "learning_rate": 2.3968431778827134e-05, "loss": 0.8620836138725281, "step": 545 }, { "epoch": 0.04471332493934834, "grad_norm": 0.16796875, "learning_rate": 2.396825017175922e-05, "loss": 0.8265810608863831, "step": 546 }, { "epoch": 0.044795217475867295, "grad_norm": 0.1806640625, "learning_rate": 2.3968068044501862e-05, "loss": 0.8992477655410767, "step": 547 }, { "epoch": 0.04487711001238625, "grad_norm": 0.1845703125, "learning_rate": 2.396788539706297e-05, "loss": 0.7925514578819275, "step": 548 }, { "epoch": 0.0449590025489052, "grad_norm": 0.1826171875, "learning_rate": 2.396770222945049e-05, "loss": 0.9274776577949524, "step": 549 }, { "epoch": 0.04504089508542415, "grad_norm": 0.185546875, "learning_rate": 2.3967518541672375e-05, "loss": 0.8201966285705566, "step": 550 }, { "epoch": 0.045122787621943106, "grad_norm": 0.2138671875, "learning_rate": 2.3967334333736622e-05, "loss": 1.0400993824005127, "step": 551 }, { "epoch": 0.04520468015846206, "grad_norm": 0.181640625, "learning_rate": 2.3967149605651224e-05, "loss": 0.6288777589797974, "step": 552 }, { "epoch": 0.045286572694981014, "grad_norm": 0.162109375, "learning_rate": 2.3966964357424215e-05, "loss": 0.6736346483230591, "step": 553 }, { "epoch": 0.04536846523149996, "grad_norm": 0.1806640625, "learning_rate": 2.3966778589063648e-05, "loss": 0.7843412756919861, "step": 554 }, { "epoch": 0.045450357768018916, "grad_norm": 0.17578125, "learning_rate": 2.3966592300577593e-05, "loss": 0.6924692392349243, "step": 555 }, { "epoch": 0.04553225030453787, "grad_norm": 0.154296875, "learning_rate": 2.3966405491974156e-05, "loss": 0.7438194155693054, "step": 556 }, { "epoch": 0.045614142841056825, "grad_norm": 0.1494140625, "learning_rate": 2.3966218163261444e-05, "loss": 0.9577575922012329, "step": 557 }, { "epoch": 0.04569603537757578, "grad_norm": 0.1728515625, "learning_rate": 2.3966030314447608e-05, "loss": 0.9418614506721497, "step": 558 }, { "epoch": 0.045777927914094727, "grad_norm": 0.177734375, "learning_rate": 2.3965841945540808e-05, "loss": 0.8478980660438538, "step": 559 }, { "epoch": 0.04585982045061368, "grad_norm": 0.1259765625, "learning_rate": 2.396565305654924e-05, "loss": 0.4260137677192688, "step": 560 }, { "epoch": 0.045941712987132635, "grad_norm": 0.1826171875, "learning_rate": 2.39654636474811e-05, "loss": 0.9933167695999146, "step": 561 }, { "epoch": 0.04602360552365159, "grad_norm": 0.1669921875, "learning_rate": 2.396527371834463e-05, "loss": 1.0423835515975952, "step": 562 }, { "epoch": 0.046105498060170544, "grad_norm": 0.1884765625, "learning_rate": 2.3965083269148086e-05, "loss": 0.8101381063461304, "step": 563 }, { "epoch": 0.04618739059668949, "grad_norm": 0.1806640625, "learning_rate": 2.396489229989974e-05, "loss": 0.9623128175735474, "step": 564 }, { "epoch": 0.046269283133208446, "grad_norm": 0.1962890625, "learning_rate": 2.3964700810607892e-05, "loss": 1.0519846677780151, "step": 565 }, { "epoch": 0.0463511756697274, "grad_norm": 0.1728515625, "learning_rate": 2.396450880128087e-05, "loss": 0.9688785672187805, "step": 566 }, { "epoch": 0.046433068206246354, "grad_norm": 0.1630859375, "learning_rate": 2.3964316271927016e-05, "loss": 0.966496467590332, "step": 567 }, { "epoch": 0.04651496074276531, "grad_norm": 0.283203125, "learning_rate": 2.3964123222554704e-05, "loss": 0.8825667500495911, "step": 568 }, { "epoch": 0.046596853279284256, "grad_norm": 0.1923828125, "learning_rate": 2.3963929653172312e-05, "loss": 0.9471298456192017, "step": 569 }, { "epoch": 0.04667874581580321, "grad_norm": 0.181640625, "learning_rate": 2.3963735563788272e-05, "loss": 1.1921699047088623, "step": 570 }, { "epoch": 0.046760638352322165, "grad_norm": 0.1826171875, "learning_rate": 2.3963540954411004e-05, "loss": 0.9107367396354675, "step": 571 }, { "epoch": 0.04684253088884112, "grad_norm": 0.140625, "learning_rate": 2.3963345825048967e-05, "loss": 1.1056568622589111, "step": 572 }, { "epoch": 0.04692442342536007, "grad_norm": 0.158203125, "learning_rate": 2.3963150175710654e-05, "loss": 0.5379326939582825, "step": 573 }, { "epoch": 0.04700631596187902, "grad_norm": 0.1865234375, "learning_rate": 2.396295400640456e-05, "loss": 0.729265034198761, "step": 574 }, { "epoch": 0.047088208498397975, "grad_norm": 0.1865234375, "learning_rate": 2.3962757317139215e-05, "loss": 0.6542950868606567, "step": 575 }, { "epoch": 0.04717010103491693, "grad_norm": 0.15234375, "learning_rate": 2.3962560107923165e-05, "loss": 0.5564326047897339, "step": 576 }, { "epoch": 0.047251993571435884, "grad_norm": 0.2294921875, "learning_rate": 2.3962362378764977e-05, "loss": 0.9876377582550049, "step": 577 }, { "epoch": 0.04733388610795484, "grad_norm": 0.1513671875, "learning_rate": 2.3962164129673257e-05, "loss": 0.9433709383010864, "step": 578 }, { "epoch": 0.04741577864447379, "grad_norm": 0.1533203125, "learning_rate": 2.3961965360656613e-05, "loss": 0.6138058304786682, "step": 579 }, { "epoch": 0.04749767118099274, "grad_norm": 0.1962890625, "learning_rate": 2.3961766071723686e-05, "loss": 1.362326741218567, "step": 580 }, { "epoch": 0.047579563717511694, "grad_norm": 0.169921875, "learning_rate": 2.396156626288314e-05, "loss": 0.7150259613990784, "step": 581 }, { "epoch": 0.04766145625403065, "grad_norm": 0.1904296875, "learning_rate": 2.396136593414366e-05, "loss": 0.6978367567062378, "step": 582 }, { "epoch": 0.0477433487905496, "grad_norm": 0.1650390625, "learning_rate": 2.3961165085513947e-05, "loss": 0.8931869268417358, "step": 583 }, { "epoch": 0.04782524132706856, "grad_norm": 0.169921875, "learning_rate": 2.396096371700274e-05, "loss": 0.6849859356880188, "step": 584 }, { "epoch": 0.047907133863587505, "grad_norm": 0.25390625, "learning_rate": 2.3960761828618782e-05, "loss": 1.1862263679504395, "step": 585 }, { "epoch": 0.04798902640010646, "grad_norm": 0.228515625, "learning_rate": 2.3960559420370854e-05, "loss": 1.0873479843139648, "step": 586 }, { "epoch": 0.04807091893662541, "grad_norm": 0.1943359375, "learning_rate": 2.396035649226775e-05, "loss": 0.8785818219184875, "step": 587 }, { "epoch": 0.04815281147314437, "grad_norm": 0.1484375, "learning_rate": 2.3960153044318295e-05, "loss": 0.5999451279640198, "step": 588 }, { "epoch": 0.04823470400966332, "grad_norm": 0.177734375, "learning_rate": 2.3959949076531326e-05, "loss": 0.8909758925437927, "step": 589 }, { "epoch": 0.04831659654618227, "grad_norm": 0.1552734375, "learning_rate": 2.3959744588915714e-05, "loss": 0.7863945364952087, "step": 590 }, { "epoch": 0.048398489082701224, "grad_norm": 0.16796875, "learning_rate": 2.395953958148034e-05, "loss": 0.5463456511497498, "step": 591 }, { "epoch": 0.04848038161922018, "grad_norm": 0.1396484375, "learning_rate": 2.3959334054234116e-05, "loss": 0.6835671067237854, "step": 592 }, { "epoch": 0.04856227415573913, "grad_norm": 0.1787109375, "learning_rate": 2.395912800718598e-05, "loss": 1.0347270965576172, "step": 593 }, { "epoch": 0.04864416669225809, "grad_norm": 0.18359375, "learning_rate": 2.3958921440344887e-05, "loss": 0.8445790410041809, "step": 594 }, { "epoch": 0.048726059228777034, "grad_norm": 0.1806640625, "learning_rate": 2.395871435371981e-05, "loss": 0.8437964916229248, "step": 595 }, { "epoch": 0.04880795176529599, "grad_norm": 0.203125, "learning_rate": 2.3958506747319752e-05, "loss": 1.0122697353363037, "step": 596 }, { "epoch": 0.04888984430181494, "grad_norm": 0.185546875, "learning_rate": 2.3958298621153738e-05, "loss": 0.8762169480323792, "step": 597 }, { "epoch": 0.0489717368383339, "grad_norm": 0.205078125, "learning_rate": 2.3958089975230815e-05, "loss": 0.9854332804679871, "step": 598 }, { "epoch": 0.04905362937485285, "grad_norm": 0.162109375, "learning_rate": 2.395788080956005e-05, "loss": 0.8069448471069336, "step": 599 }, { "epoch": 0.0491355219113718, "grad_norm": 0.2255859375, "learning_rate": 2.395767112415053e-05, "loss": 1.1460633277893066, "step": 600 }, { "epoch": 0.04921741444789075, "grad_norm": 0.173828125, "learning_rate": 2.395746091901138e-05, "loss": 0.9800331592559814, "step": 601 }, { "epoch": 0.04929930698440971, "grad_norm": 0.1943359375, "learning_rate": 2.3957250194151724e-05, "loss": 0.7255781888961792, "step": 602 }, { "epoch": 0.04938119952092866, "grad_norm": 0.1796875, "learning_rate": 2.395703894958073e-05, "loss": 0.889350950717926, "step": 603 }, { "epoch": 0.049463092057447616, "grad_norm": 0.1953125, "learning_rate": 2.3956827185307574e-05, "loss": 0.8888030052185059, "step": 604 }, { "epoch": 0.04954498459396657, "grad_norm": 0.2001953125, "learning_rate": 2.395661490134146e-05, "loss": 0.9086771011352539, "step": 605 }, { "epoch": 0.04962687713048552, "grad_norm": 0.1611328125, "learning_rate": 2.395640209769162e-05, "loss": 0.7518740296363831, "step": 606 }, { "epoch": 0.04970876966700447, "grad_norm": 0.193359375, "learning_rate": 2.3956188774367295e-05, "loss": 0.9608128666877747, "step": 607 }, { "epoch": 0.04979066220352343, "grad_norm": 0.255859375, "learning_rate": 2.3955974931377765e-05, "loss": 0.8345894813537598, "step": 608 }, { "epoch": 0.04987255474004238, "grad_norm": 0.1796875, "learning_rate": 2.3955760568732323e-05, "loss": 0.9964596629142761, "step": 609 }, { "epoch": 0.049954447276561335, "grad_norm": 0.169921875, "learning_rate": 2.3955545686440282e-05, "loss": 0.8712890148162842, "step": 610 }, { "epoch": 0.05003633981308028, "grad_norm": 0.169921875, "learning_rate": 2.3955330284510987e-05, "loss": 1.1242917776107788, "step": 611 }, { "epoch": 0.05011823234959924, "grad_norm": 0.2138671875, "learning_rate": 2.3955114362953793e-05, "loss": 0.6739065051078796, "step": 612 }, { "epoch": 0.05020012488611819, "grad_norm": 0.1875, "learning_rate": 2.3954897921778094e-05, "loss": 1.101578712463379, "step": 613 }, { "epoch": 0.050282017422637146, "grad_norm": 0.197265625, "learning_rate": 2.3954680960993293e-05, "loss": 0.9087690711021423, "step": 614 }, { "epoch": 0.0503639099591561, "grad_norm": 0.1875, "learning_rate": 2.3954463480608816e-05, "loss": 0.6032750606536865, "step": 615 }, { "epoch": 0.05044580249567505, "grad_norm": 0.19140625, "learning_rate": 2.3954245480634123e-05, "loss": 0.7097981572151184, "step": 616 }, { "epoch": 0.050527695032194, "grad_norm": 0.16015625, "learning_rate": 2.395402696107868e-05, "loss": 0.7773026823997498, "step": 617 }, { "epoch": 0.050609587568712956, "grad_norm": 0.2333984375, "learning_rate": 2.3953807921951992e-05, "loss": 0.8052107691764832, "step": 618 }, { "epoch": 0.05069148010523191, "grad_norm": 0.1982421875, "learning_rate": 2.3953588363263582e-05, "loss": 0.8313415050506592, "step": 619 }, { "epoch": 0.050773372641750865, "grad_norm": 0.1748046875, "learning_rate": 2.3953368285022984e-05, "loss": 0.7332470417022705, "step": 620 }, { "epoch": 0.05085526517826981, "grad_norm": 0.1650390625, "learning_rate": 2.395314768723977e-05, "loss": 0.8336531519889832, "step": 621 }, { "epoch": 0.05093715771478877, "grad_norm": 0.16015625, "learning_rate": 2.3952926569923527e-05, "loss": 0.8546928763389587, "step": 622 }, { "epoch": 0.05101905025130772, "grad_norm": 0.1630859375, "learning_rate": 2.3952704933083864e-05, "loss": 0.8728529810905457, "step": 623 }, { "epoch": 0.051100942787826675, "grad_norm": 0.17578125, "learning_rate": 2.395248277673041e-05, "loss": 1.0662384033203125, "step": 624 }, { "epoch": 0.05118283532434563, "grad_norm": 0.2294921875, "learning_rate": 2.3952260100872833e-05, "loss": 0.9664002060890198, "step": 625 }, { "epoch": 0.05126472786086458, "grad_norm": 0.1533203125, "learning_rate": 2.39520369055208e-05, "loss": 0.8857104182243347, "step": 626 }, { "epoch": 0.05134662039738353, "grad_norm": 0.169921875, "learning_rate": 2.3951813190684022e-05, "loss": 0.742447555065155, "step": 627 }, { "epoch": 0.051428512933902486, "grad_norm": 0.1396484375, "learning_rate": 2.395158895637221e-05, "loss": 0.929764986038208, "step": 628 }, { "epoch": 0.05151040547042144, "grad_norm": 0.166015625, "learning_rate": 2.3951364202595123e-05, "loss": 0.9282650947570801, "step": 629 }, { "epoch": 0.051592298006940394, "grad_norm": 0.212890625, "learning_rate": 2.395113892936252e-05, "loss": 0.7192749381065369, "step": 630 }, { "epoch": 0.05167419054345935, "grad_norm": 0.1552734375, "learning_rate": 2.3950913136684196e-05, "loss": 0.9937841892242432, "step": 631 }, { "epoch": 0.051756083079978296, "grad_norm": 0.2060546875, "learning_rate": 2.395068682456996e-05, "loss": 1.1982215642929077, "step": 632 }, { "epoch": 0.05183797561649725, "grad_norm": 0.1552734375, "learning_rate": 2.3950459993029662e-05, "loss": 0.6705058217048645, "step": 633 }, { "epoch": 0.051919868153016205, "grad_norm": 0.134765625, "learning_rate": 2.395023264207315e-05, "loss": 0.8769830465316772, "step": 634 }, { "epoch": 0.05200176068953516, "grad_norm": 0.1875, "learning_rate": 2.3950004771710306e-05, "loss": 0.5416737794876099, "step": 635 }, { "epoch": 0.052083653226054114, "grad_norm": 0.1630859375, "learning_rate": 2.394977638195104e-05, "loss": 0.9033718705177307, "step": 636 }, { "epoch": 0.05216554576257306, "grad_norm": 0.1875, "learning_rate": 2.3949547472805267e-05, "loss": 1.1842782497406006, "step": 637 }, { "epoch": 0.052247438299092015, "grad_norm": 0.171875, "learning_rate": 2.3949318044282954e-05, "loss": 0.9973214268684387, "step": 638 }, { "epoch": 0.05232933083561097, "grad_norm": 0.1728515625, "learning_rate": 2.394908809639406e-05, "loss": 0.8814473152160645, "step": 639 }, { "epoch": 0.052411223372129924, "grad_norm": 0.177734375, "learning_rate": 2.3948857629148578e-05, "loss": 0.5170973539352417, "step": 640 }, { "epoch": 0.05249311590864888, "grad_norm": 0.220703125, "learning_rate": 2.3948626642556535e-05, "loss": 1.0104471445083618, "step": 641 }, { "epoch": 0.052575008445167826, "grad_norm": 0.142578125, "learning_rate": 2.394839513662796e-05, "loss": 0.7293509840965271, "step": 642 }, { "epoch": 0.05265690098168678, "grad_norm": 0.16015625, "learning_rate": 2.3948163111372928e-05, "loss": 0.982900857925415, "step": 643 }, { "epoch": 0.052738793518205734, "grad_norm": 0.1875, "learning_rate": 2.394793056680151e-05, "loss": 0.9158048033714294, "step": 644 }, { "epoch": 0.05282068605472469, "grad_norm": 0.212890625, "learning_rate": 2.394769750292382e-05, "loss": 0.7885652184486389, "step": 645 }, { "epoch": 0.05290257859124364, "grad_norm": 0.140625, "learning_rate": 2.394746391974999e-05, "loss": 0.6910034418106079, "step": 646 }, { "epoch": 0.05298447112776259, "grad_norm": 0.1640625, "learning_rate": 2.3947229817290167e-05, "loss": 0.9547855257987976, "step": 647 }, { "epoch": 0.053066363664281545, "grad_norm": 0.19140625, "learning_rate": 2.3946995195554533e-05, "loss": 0.8249313831329346, "step": 648 }, { "epoch": 0.0531482562008005, "grad_norm": 0.169921875, "learning_rate": 2.3946760054553282e-05, "loss": 0.5903275012969971, "step": 649 }, { "epoch": 0.05323014873731945, "grad_norm": 0.1845703125, "learning_rate": 2.3946524394296633e-05, "loss": 0.8317531943321228, "step": 650 }, { "epoch": 0.05331204127383841, "grad_norm": 0.1611328125, "learning_rate": 2.3946288214794828e-05, "loss": 0.6406201124191284, "step": 651 }, { "epoch": 0.053393933810357355, "grad_norm": 0.1865234375, "learning_rate": 2.3946051516058134e-05, "loss": 0.9876687526702881, "step": 652 }, { "epoch": 0.05347582634687631, "grad_norm": 0.1689453125, "learning_rate": 2.3945814298096842e-05, "loss": 0.7347517609596252, "step": 653 }, { "epoch": 0.053557718883395264, "grad_norm": 0.1572265625, "learning_rate": 2.3945576560921255e-05, "loss": 1.0370835065841675, "step": 654 }, { "epoch": 0.05363961141991422, "grad_norm": 0.1767578125, "learning_rate": 2.3945338304541714e-05, "loss": 1.0775206089019775, "step": 655 }, { "epoch": 0.05372150395643317, "grad_norm": 0.1650390625, "learning_rate": 2.3945099528968568e-05, "loss": 0.9059051871299744, "step": 656 }, { "epoch": 0.05380339649295213, "grad_norm": 0.171875, "learning_rate": 2.39448602342122e-05, "loss": 0.954623818397522, "step": 657 }, { "epoch": 0.053885289029471074, "grad_norm": 0.173828125, "learning_rate": 2.3944620420283013e-05, "loss": 1.0121254920959473, "step": 658 }, { "epoch": 0.05396718156599003, "grad_norm": 0.169921875, "learning_rate": 2.394438008719142e-05, "loss": 0.888215184211731, "step": 659 }, { "epoch": 0.05404907410250898, "grad_norm": 0.1787109375, "learning_rate": 2.3944139234947878e-05, "loss": 0.7328816056251526, "step": 660 }, { "epoch": 0.05413096663902794, "grad_norm": 0.1650390625, "learning_rate": 2.3943897863562844e-05, "loss": 0.934559166431427, "step": 661 }, { "epoch": 0.05421285917554689, "grad_norm": 0.1962890625, "learning_rate": 2.394365597304682e-05, "loss": 1.2699017524719238, "step": 662 }, { "epoch": 0.05429475171206584, "grad_norm": 0.1982421875, "learning_rate": 2.3943413563410315e-05, "loss": 0.9635473489761353, "step": 663 }, { "epoch": 0.05437664424858479, "grad_norm": 0.173828125, "learning_rate": 2.3943170634663867e-05, "loss": 0.8488468527793884, "step": 664 }, { "epoch": 0.05445853678510375, "grad_norm": 0.1904296875, "learning_rate": 2.3942927186818027e-05, "loss": 0.7763572931289673, "step": 665 }, { "epoch": 0.0545404293216227, "grad_norm": 0.1591796875, "learning_rate": 2.3942683219883387e-05, "loss": 0.8804049491882324, "step": 666 }, { "epoch": 0.054622321858141656, "grad_norm": 0.1826171875, "learning_rate": 2.3942438733870545e-05, "loss": 1.2234079837799072, "step": 667 }, { "epoch": 0.054704214394660604, "grad_norm": 0.181640625, "learning_rate": 2.394219372879013e-05, "loss": 0.9823880195617676, "step": 668 }, { "epoch": 0.05478610693117956, "grad_norm": 0.171875, "learning_rate": 2.3941948204652786e-05, "loss": 0.5647228360176086, "step": 669 }, { "epoch": 0.05486799946769851, "grad_norm": 0.1962890625, "learning_rate": 2.394170216146919e-05, "loss": 0.7954634428024292, "step": 670 }, { "epoch": 0.05494989200421747, "grad_norm": 0.1552734375, "learning_rate": 2.394145559925003e-05, "loss": 0.5313792824745178, "step": 671 }, { "epoch": 0.05503178454073642, "grad_norm": 0.2197265625, "learning_rate": 2.394120851800603e-05, "loss": 0.9468759298324585, "step": 672 }, { "epoch": 0.05511367707725537, "grad_norm": 0.1533203125, "learning_rate": 2.3940960917747924e-05, "loss": 0.6649528741836548, "step": 673 }, { "epoch": 0.05519556961377432, "grad_norm": 0.1845703125, "learning_rate": 2.3940712798486476e-05, "loss": 0.980419933795929, "step": 674 }, { "epoch": 0.05527746215029328, "grad_norm": 0.169921875, "learning_rate": 2.394046416023247e-05, "loss": 0.8429052233695984, "step": 675 }, { "epoch": 0.05535935468681223, "grad_norm": 0.169921875, "learning_rate": 2.3940215002996714e-05, "loss": 0.770273745059967, "step": 676 }, { "epoch": 0.055441247223331186, "grad_norm": 0.2177734375, "learning_rate": 2.3939965326790036e-05, "loss": 1.0326683521270752, "step": 677 }, { "epoch": 0.05552313975985013, "grad_norm": 0.19921875, "learning_rate": 2.3939715131623284e-05, "loss": 0.9016773700714111, "step": 678 }, { "epoch": 0.05560503229636909, "grad_norm": 0.1826171875, "learning_rate": 2.3939464417507336e-05, "loss": 1.0948452949523926, "step": 679 }, { "epoch": 0.05568692483288804, "grad_norm": 0.1357421875, "learning_rate": 2.3939213184453095e-05, "loss": 0.5703760385513306, "step": 680 }, { "epoch": 0.055768817369406996, "grad_norm": 0.14453125, "learning_rate": 2.393896143247147e-05, "loss": 0.8222994804382324, "step": 681 }, { "epoch": 0.05585070990592595, "grad_norm": 0.1474609375, "learning_rate": 2.393870916157341e-05, "loss": 0.7562463879585266, "step": 682 }, { "epoch": 0.055932602442444905, "grad_norm": 0.16015625, "learning_rate": 2.3938456371769877e-05, "loss": 1.0128675699234009, "step": 683 }, { "epoch": 0.05601449497896385, "grad_norm": 0.1630859375, "learning_rate": 2.393820306307186e-05, "loss": 0.7738261222839355, "step": 684 }, { "epoch": 0.05609638751548281, "grad_norm": 0.263671875, "learning_rate": 2.393794923549037e-05, "loss": 1.0237443447113037, "step": 685 }, { "epoch": 0.05617828005200176, "grad_norm": 0.185546875, "learning_rate": 2.3937694889036433e-05, "loss": 0.9787827134132385, "step": 686 }, { "epoch": 0.056260172588520715, "grad_norm": 0.1591796875, "learning_rate": 2.3937440023721112e-05, "loss": 0.935725748538971, "step": 687 }, { "epoch": 0.05634206512503967, "grad_norm": 0.2294921875, "learning_rate": 2.3937184639555477e-05, "loss": 1.0377476215362549, "step": 688 }, { "epoch": 0.05642395766155862, "grad_norm": 0.1513671875, "learning_rate": 2.3936928736550636e-05, "loss": 0.9346053600311279, "step": 689 }, { "epoch": 0.05650585019807757, "grad_norm": 0.2158203125, "learning_rate": 2.3936672314717705e-05, "loss": 0.8664288520812988, "step": 690 }, { "epoch": 0.056587742734596526, "grad_norm": 0.1630859375, "learning_rate": 2.3936415374067833e-05, "loss": 0.8344204425811768, "step": 691 }, { "epoch": 0.05666963527111548, "grad_norm": 0.15625, "learning_rate": 2.3936157914612182e-05, "loss": 0.6864413022994995, "step": 692 }, { "epoch": 0.056751527807634435, "grad_norm": 0.20703125, "learning_rate": 2.3935899936361956e-05, "loss": 1.0343800783157349, "step": 693 }, { "epoch": 0.05683342034415338, "grad_norm": 0.150390625, "learning_rate": 2.3935641439328353e-05, "loss": 1.025879144668579, "step": 694 }, { "epoch": 0.056915312880672336, "grad_norm": 0.1748046875, "learning_rate": 2.393538242352261e-05, "loss": 0.9210976362228394, "step": 695 }, { "epoch": 0.05699720541719129, "grad_norm": 0.17578125, "learning_rate": 2.3935122888955993e-05, "loss": 0.7017812132835388, "step": 696 }, { "epoch": 0.057079097953710245, "grad_norm": 0.201171875, "learning_rate": 2.3934862835639778e-05, "loss": 1.1410526037216187, "step": 697 }, { "epoch": 0.0571609904902292, "grad_norm": 0.2021484375, "learning_rate": 2.3934602263585267e-05, "loss": 0.8802720904350281, "step": 698 }, { "epoch": 0.05724288302674815, "grad_norm": 0.1435546875, "learning_rate": 2.393434117280379e-05, "loss": 0.6988977789878845, "step": 699 }, { "epoch": 0.0573247755632671, "grad_norm": 0.1591796875, "learning_rate": 2.3934079563306686e-05, "loss": 0.7477326989173889, "step": 700 }, { "epoch": 0.057406668099786055, "grad_norm": 0.1611328125, "learning_rate": 2.3933817435105335e-05, "loss": 0.8691118359565735, "step": 701 }, { "epoch": 0.05748856063630501, "grad_norm": 0.193359375, "learning_rate": 2.3933554788211125e-05, "loss": 1.1438881158828735, "step": 702 }, { "epoch": 0.057570453172823964, "grad_norm": 0.1787109375, "learning_rate": 2.3933291622635473e-05, "loss": 0.8003643751144409, "step": 703 }, { "epoch": 0.05765234570934291, "grad_norm": 0.1572265625, "learning_rate": 2.3933027938389818e-05, "loss": 0.9975863695144653, "step": 704 }, { "epoch": 0.057734238245861866, "grad_norm": 0.2001953125, "learning_rate": 2.393276373548562e-05, "loss": 0.8926045298576355, "step": 705 }, { "epoch": 0.05781613078238082, "grad_norm": 0.169921875, "learning_rate": 2.3932499013934365e-05, "loss": 0.9769608378410339, "step": 706 }, { "epoch": 0.057898023318899774, "grad_norm": 0.18359375, "learning_rate": 2.3932233773747556e-05, "loss": 0.958143413066864, "step": 707 }, { "epoch": 0.05797991585541873, "grad_norm": 0.177734375, "learning_rate": 2.3931968014936718e-05, "loss": 1.0230159759521484, "step": 708 }, { "epoch": 0.05806180839193768, "grad_norm": 0.1640625, "learning_rate": 2.393170173751341e-05, "loss": 0.9981197118759155, "step": 709 }, { "epoch": 0.05814370092845663, "grad_norm": 0.140625, "learning_rate": 2.39314349414892e-05, "loss": 0.7228063344955444, "step": 710 }, { "epoch": 0.058225593464975585, "grad_norm": 0.1728515625, "learning_rate": 2.3931167626875683e-05, "loss": 0.966167688369751, "step": 711 }, { "epoch": 0.05830748600149454, "grad_norm": 0.1962890625, "learning_rate": 2.3930899793684483e-05, "loss": 0.706882894039154, "step": 712 }, { "epoch": 0.058389378538013494, "grad_norm": 0.1728515625, "learning_rate": 2.3930631441927235e-05, "loss": 0.6703528165817261, "step": 713 }, { "epoch": 0.05847127107453245, "grad_norm": 0.17578125, "learning_rate": 2.393036257161561e-05, "loss": 0.7549928426742554, "step": 714 }, { "epoch": 0.058553163611051395, "grad_norm": 0.169921875, "learning_rate": 2.3930093182761288e-05, "loss": 0.7933093309402466, "step": 715 }, { "epoch": 0.05863505614757035, "grad_norm": 0.15625, "learning_rate": 2.3929823275375983e-05, "loss": 0.7844583988189697, "step": 716 }, { "epoch": 0.058716948684089304, "grad_norm": 0.1328125, "learning_rate": 2.392955284947142e-05, "loss": 0.3895062208175659, "step": 717 }, { "epoch": 0.05879884122060826, "grad_norm": 0.16796875, "learning_rate": 2.3929281905059355e-05, "loss": 1.131768822669983, "step": 718 }, { "epoch": 0.05888073375712721, "grad_norm": 0.1787109375, "learning_rate": 2.392901044215157e-05, "loss": 0.6950779557228088, "step": 719 }, { "epoch": 0.05896262629364616, "grad_norm": 0.1728515625, "learning_rate": 2.3928738460759852e-05, "loss": 0.7448866367340088, "step": 720 }, { "epoch": 0.059044518830165114, "grad_norm": 0.177734375, "learning_rate": 2.3928465960896036e-05, "loss": 0.8207625150680542, "step": 721 }, { "epoch": 0.05912641136668407, "grad_norm": 0.181640625, "learning_rate": 2.392819294257196e-05, "loss": 0.8510642647743225, "step": 722 }, { "epoch": 0.05920830390320302, "grad_norm": 0.1962890625, "learning_rate": 2.3927919405799486e-05, "loss": 1.0432260036468506, "step": 723 }, { "epoch": 0.05929019643972198, "grad_norm": 0.1845703125, "learning_rate": 2.3927645350590505e-05, "loss": 1.0727624893188477, "step": 724 }, { "epoch": 0.059372088976240925, "grad_norm": 0.1923828125, "learning_rate": 2.3927370776956938e-05, "loss": 0.7621020674705505, "step": 725 }, { "epoch": 0.05945398151275988, "grad_norm": 0.2216796875, "learning_rate": 2.3927095684910705e-05, "loss": 1.1959811449050903, "step": 726 }, { "epoch": 0.059535874049278834, "grad_norm": 0.1650390625, "learning_rate": 2.392682007446377e-05, "loss": 0.7877959609031677, "step": 727 }, { "epoch": 0.05961776658579779, "grad_norm": 0.1689453125, "learning_rate": 2.3926543945628117e-05, "loss": 1.078640341758728, "step": 728 }, { "epoch": 0.05969965912231674, "grad_norm": 0.166015625, "learning_rate": 2.392626729841574e-05, "loss": 1.136202096939087, "step": 729 }, { "epoch": 0.05978155165883569, "grad_norm": 0.171875, "learning_rate": 2.3925990132838663e-05, "loss": 0.8831365704536438, "step": 730 }, { "epoch": 0.059863444195354644, "grad_norm": 0.15234375, "learning_rate": 2.3925712448908936e-05, "loss": 0.8536401987075806, "step": 731 }, { "epoch": 0.0599453367318736, "grad_norm": 0.1611328125, "learning_rate": 2.3925434246638633e-05, "loss": 0.6902964115142822, "step": 732 }, { "epoch": 0.06002722926839255, "grad_norm": 0.203125, "learning_rate": 2.3925155526039833e-05, "loss": 0.9232505559921265, "step": 733 }, { "epoch": 0.06010912180491151, "grad_norm": 0.205078125, "learning_rate": 2.392487628712466e-05, "loss": 0.9984489679336548, "step": 734 }, { "epoch": 0.06019101434143046, "grad_norm": 0.1826171875, "learning_rate": 2.392459652990525e-05, "loss": 1.1143248081207275, "step": 735 }, { "epoch": 0.06027290687794941, "grad_norm": 0.1943359375, "learning_rate": 2.392431625439376e-05, "loss": 0.8914723992347717, "step": 736 }, { "epoch": 0.06035479941446836, "grad_norm": 0.173828125, "learning_rate": 2.3924035460602374e-05, "loss": 0.7896877527236938, "step": 737 }, { "epoch": 0.06043669195098732, "grad_norm": 0.1611328125, "learning_rate": 2.3923754148543294e-05, "loss": 0.6035254597663879, "step": 738 }, { "epoch": 0.06051858448750627, "grad_norm": 0.1669921875, "learning_rate": 2.392347231822875e-05, "loss": 0.8128514289855957, "step": 739 }, { "epoch": 0.060600477024025226, "grad_norm": 0.1533203125, "learning_rate": 2.3923189969670987e-05, "loss": 0.5372666120529175, "step": 740 }, { "epoch": 0.060682369560544173, "grad_norm": 0.1904296875, "learning_rate": 2.3922907102882282e-05, "loss": 0.9451065063476562, "step": 741 }, { "epoch": 0.06076426209706313, "grad_norm": 0.1865234375, "learning_rate": 2.3922623717874927e-05, "loss": 0.8032661080360413, "step": 742 }, { "epoch": 0.06084615463358208, "grad_norm": 0.197265625, "learning_rate": 2.392233981466124e-05, "loss": 0.7575138807296753, "step": 743 }, { "epoch": 0.060928047170101036, "grad_norm": 0.1494140625, "learning_rate": 2.3922055393253558e-05, "loss": 0.7567515969276428, "step": 744 }, { "epoch": 0.06100993970661999, "grad_norm": 0.1669921875, "learning_rate": 2.392177045366425e-05, "loss": 0.5902807712554932, "step": 745 }, { "epoch": 0.06109183224313894, "grad_norm": 0.1787109375, "learning_rate": 2.3921484995905694e-05, "loss": 0.9041367769241333, "step": 746 }, { "epoch": 0.06117372477965789, "grad_norm": 0.23828125, "learning_rate": 2.3921199019990297e-05, "loss": 1.1415441036224365, "step": 747 }, { "epoch": 0.06125561731617685, "grad_norm": 0.1572265625, "learning_rate": 2.392091252593049e-05, "loss": 0.8333556652069092, "step": 748 }, { "epoch": 0.0613375098526958, "grad_norm": 0.2021484375, "learning_rate": 2.392062551373873e-05, "loss": 0.7662279605865479, "step": 749 }, { "epoch": 0.061419402389214756, "grad_norm": 0.16015625, "learning_rate": 2.3920337983427485e-05, "loss": 0.8412419557571411, "step": 750 }, { "epoch": 0.0615012949257337, "grad_norm": 0.1845703125, "learning_rate": 2.3920049935009257e-05, "loss": 0.8625929355621338, "step": 751 }, { "epoch": 0.06158318746225266, "grad_norm": 0.1767578125, "learning_rate": 2.3919761368496562e-05, "loss": 0.8521045446395874, "step": 752 }, { "epoch": 0.06166507999877161, "grad_norm": 0.1845703125, "learning_rate": 2.391947228390194e-05, "loss": 0.9968081712722778, "step": 753 }, { "epoch": 0.061746972535290566, "grad_norm": 0.173828125, "learning_rate": 2.3919182681237968e-05, "loss": 0.8399487733840942, "step": 754 }, { "epoch": 0.06182886507180952, "grad_norm": 0.1943359375, "learning_rate": 2.391889256051722e-05, "loss": 0.8944022059440613, "step": 755 }, { "epoch": 0.06191075760832847, "grad_norm": 0.2041015625, "learning_rate": 2.3918601921752313e-05, "loss": 1.0813689231872559, "step": 756 }, { "epoch": 0.06199265014484742, "grad_norm": 0.208984375, "learning_rate": 2.3918310764955877e-05, "loss": 0.6755024194717407, "step": 757 }, { "epoch": 0.062074542681366376, "grad_norm": 0.140625, "learning_rate": 2.3918019090140568e-05, "loss": 0.8223787546157837, "step": 758 }, { "epoch": 0.06215643521788533, "grad_norm": 0.2099609375, "learning_rate": 2.3917726897319058e-05, "loss": 1.0436381101608276, "step": 759 }, { "epoch": 0.062238327754404285, "grad_norm": 0.2158203125, "learning_rate": 2.3917434186504057e-05, "loss": 1.1123673915863037, "step": 760 }, { "epoch": 0.06232022029092324, "grad_norm": 0.18359375, "learning_rate": 2.391714095770828e-05, "loss": 0.897057294845581, "step": 761 }, { "epoch": 0.06240211282744219, "grad_norm": 0.1708984375, "learning_rate": 2.3916847210944475e-05, "loss": 0.7659156918525696, "step": 762 }, { "epoch": 0.06248400536396114, "grad_norm": 0.1923828125, "learning_rate": 2.3916552946225407e-05, "loss": 0.9436648488044739, "step": 763 }, { "epoch": 0.06256589790048009, "grad_norm": 0.1689453125, "learning_rate": 2.3916258163563866e-05, "loss": 0.8617867231369019, "step": 764 }, { "epoch": 0.06264779043699904, "grad_norm": 0.1845703125, "learning_rate": 2.391596286297267e-05, "loss": 0.7458943724632263, "step": 765 }, { "epoch": 0.062729682973518, "grad_norm": 0.166015625, "learning_rate": 2.3915667044464647e-05, "loss": 0.8565216064453125, "step": 766 }, { "epoch": 0.06281157551003695, "grad_norm": 0.203125, "learning_rate": 2.3915370708052655e-05, "loss": 0.6604374647140503, "step": 767 }, { "epoch": 0.0628934680465559, "grad_norm": 0.21875, "learning_rate": 2.3915073853749577e-05, "loss": 1.0307912826538086, "step": 768 }, { "epoch": 0.06297536058307486, "grad_norm": 0.2099609375, "learning_rate": 2.3914776481568318e-05, "loss": 0.5363497734069824, "step": 769 }, { "epoch": 0.06305725311959381, "grad_norm": 0.16015625, "learning_rate": 2.3914478591521795e-05, "loss": 0.8559870719909668, "step": 770 }, { "epoch": 0.06313914565611277, "grad_norm": 0.14453125, "learning_rate": 2.3914180183622968e-05, "loss": 0.6360553503036499, "step": 771 }, { "epoch": 0.06322103819263172, "grad_norm": 0.16796875, "learning_rate": 2.3913881257884794e-05, "loss": 0.5966918468475342, "step": 772 }, { "epoch": 0.06330293072915068, "grad_norm": 0.18359375, "learning_rate": 2.391358181432027e-05, "loss": 1.0226327180862427, "step": 773 }, { "epoch": 0.06338482326566962, "grad_norm": 0.1611328125, "learning_rate": 2.3913281852942417e-05, "loss": 0.8963218331336975, "step": 774 }, { "epoch": 0.06346671580218857, "grad_norm": 0.1953125, "learning_rate": 2.391298137376426e-05, "loss": 0.7732521891593933, "step": 775 }, { "epoch": 0.06354860833870753, "grad_norm": 0.2001953125, "learning_rate": 2.391268037679887e-05, "loss": 0.5948144197463989, "step": 776 }, { "epoch": 0.06363050087522648, "grad_norm": 0.1650390625, "learning_rate": 2.3912378862059327e-05, "loss": 0.7968024015426636, "step": 777 }, { "epoch": 0.06371239341174544, "grad_norm": 0.19921875, "learning_rate": 2.3912076829558738e-05, "loss": 1.0155894756317139, "step": 778 }, { "epoch": 0.06379428594826439, "grad_norm": 0.1484375, "learning_rate": 2.391177427931022e-05, "loss": 0.8076337575912476, "step": 779 }, { "epoch": 0.06387617848478334, "grad_norm": 0.1884765625, "learning_rate": 2.3911471211326938e-05, "loss": 0.7400011420249939, "step": 780 }, { "epoch": 0.0639580710213023, "grad_norm": 0.1669921875, "learning_rate": 2.3911167625622054e-05, "loss": 1.0085256099700928, "step": 781 }, { "epoch": 0.06403996355782125, "grad_norm": 0.1806640625, "learning_rate": 2.3910863522208765e-05, "loss": 0.800769567489624, "step": 782 }, { "epoch": 0.06412185609434021, "grad_norm": 0.1572265625, "learning_rate": 2.3910558901100293e-05, "loss": 0.7707474827766418, "step": 783 }, { "epoch": 0.06420374863085916, "grad_norm": 0.19140625, "learning_rate": 2.3910253762309875e-05, "loss": 1.0052869319915771, "step": 784 }, { "epoch": 0.0642856411673781, "grad_norm": 0.154296875, "learning_rate": 2.3909948105850774e-05, "loss": 0.8430008888244629, "step": 785 }, { "epoch": 0.06436753370389706, "grad_norm": 0.2373046875, "learning_rate": 2.3909641931736274e-05, "loss": 0.8366373181343079, "step": 786 }, { "epoch": 0.06444942624041601, "grad_norm": 0.1591796875, "learning_rate": 2.390933523997968e-05, "loss": 1.0782530307769775, "step": 787 }, { "epoch": 0.06453131877693496, "grad_norm": 0.1611328125, "learning_rate": 2.390902803059433e-05, "loss": 0.7264035940170288, "step": 788 }, { "epoch": 0.06461321131345392, "grad_norm": 0.17578125, "learning_rate": 2.390872030359357e-05, "loss": 1.003858208656311, "step": 789 }, { "epoch": 0.06469510384997287, "grad_norm": 0.1884765625, "learning_rate": 2.390841205899078e-05, "loss": 1.1298493146896362, "step": 790 }, { "epoch": 0.06477699638649183, "grad_norm": 0.1943359375, "learning_rate": 2.3908103296799353e-05, "loss": 1.0049954652786255, "step": 791 }, { "epoch": 0.06485888892301078, "grad_norm": 0.205078125, "learning_rate": 2.390779401703271e-05, "loss": 0.8537121415138245, "step": 792 }, { "epoch": 0.06494078145952974, "grad_norm": 0.169921875, "learning_rate": 2.3907484219704294e-05, "loss": 0.6126849055290222, "step": 793 }, { "epoch": 0.06502267399604869, "grad_norm": 0.19921875, "learning_rate": 2.390717390482757e-05, "loss": 1.0838035345077515, "step": 794 }, { "epoch": 0.06510456653256763, "grad_norm": 0.1474609375, "learning_rate": 2.390686307241603e-05, "loss": 0.6581450700759888, "step": 795 }, { "epoch": 0.06518645906908659, "grad_norm": 0.1591796875, "learning_rate": 2.390655172248318e-05, "loss": 0.7470253705978394, "step": 796 }, { "epoch": 0.06526835160560554, "grad_norm": 0.1640625, "learning_rate": 2.390623985504255e-05, "loss": 0.6015428900718689, "step": 797 }, { "epoch": 0.0653502441421245, "grad_norm": 0.1640625, "learning_rate": 2.39059274701077e-05, "loss": 0.6400900483131409, "step": 798 }, { "epoch": 0.06543213667864345, "grad_norm": 0.1533203125, "learning_rate": 2.39056145676922e-05, "loss": 0.8458360433578491, "step": 799 }, { "epoch": 0.0655140292151624, "grad_norm": 0.1806640625, "learning_rate": 2.390530114780966e-05, "loss": 0.885083019733429, "step": 800 }, { "epoch": 0.06559592175168136, "grad_norm": 0.1572265625, "learning_rate": 2.3904987210473695e-05, "loss": 0.6624044179916382, "step": 801 }, { "epoch": 0.06567781428820031, "grad_norm": 0.1640625, "learning_rate": 2.3904672755697956e-05, "loss": 0.7437598705291748, "step": 802 }, { "epoch": 0.06575970682471927, "grad_norm": 0.19921875, "learning_rate": 2.3904357783496105e-05, "loss": 0.9272804260253906, "step": 803 }, { "epoch": 0.06584159936123822, "grad_norm": 0.1962890625, "learning_rate": 2.3904042293881833e-05, "loss": 0.8374510407447815, "step": 804 }, { "epoch": 0.06592349189775717, "grad_norm": 0.1328125, "learning_rate": 2.3903726286868857e-05, "loss": 0.8609854578971863, "step": 805 }, { "epoch": 0.06600538443427612, "grad_norm": 0.12353515625, "learning_rate": 2.3903409762470907e-05, "loss": 0.6509743928909302, "step": 806 }, { "epoch": 0.06608727697079507, "grad_norm": 0.19140625, "learning_rate": 2.390309272070174e-05, "loss": 0.9537597298622131, "step": 807 }, { "epoch": 0.06616916950731402, "grad_norm": 0.1708984375, "learning_rate": 2.3902775161575142e-05, "loss": 0.6923487782478333, "step": 808 }, { "epoch": 0.06625106204383298, "grad_norm": 0.19140625, "learning_rate": 2.390245708510491e-05, "loss": 0.62888503074646, "step": 809 }, { "epoch": 0.06633295458035193, "grad_norm": 0.1728515625, "learning_rate": 2.390213849130487e-05, "loss": 0.9218167662620544, "step": 810 }, { "epoch": 0.06641484711687089, "grad_norm": 0.18359375, "learning_rate": 2.3901819380188872e-05, "loss": 0.8725098371505737, "step": 811 }, { "epoch": 0.06649673965338984, "grad_norm": 0.1767578125, "learning_rate": 2.3901499751770782e-05, "loss": 0.9962688684463501, "step": 812 }, { "epoch": 0.0665786321899088, "grad_norm": 0.1533203125, "learning_rate": 2.390117960606449e-05, "loss": 0.6602803468704224, "step": 813 }, { "epoch": 0.06666052472642775, "grad_norm": 0.21875, "learning_rate": 2.3900858943083924e-05, "loss": 1.0366820096969604, "step": 814 }, { "epoch": 0.0667424172629467, "grad_norm": 0.1796875, "learning_rate": 2.3900537762843006e-05, "loss": 0.977749228477478, "step": 815 }, { "epoch": 0.06682430979946564, "grad_norm": 0.1640625, "learning_rate": 2.3900216065355703e-05, "loss": 0.6417954564094543, "step": 816 }, { "epoch": 0.0669062023359846, "grad_norm": 0.201171875, "learning_rate": 2.3899893850635997e-05, "loss": 1.2211062908172607, "step": 817 }, { "epoch": 0.06698809487250355, "grad_norm": 0.1552734375, "learning_rate": 2.3899571118697893e-05, "loss": 0.6863321661949158, "step": 818 }, { "epoch": 0.06706998740902251, "grad_norm": 0.1640625, "learning_rate": 2.3899247869555416e-05, "loss": 0.9974377155303955, "step": 819 }, { "epoch": 0.06715187994554146, "grad_norm": 0.1513671875, "learning_rate": 2.3898924103222618e-05, "loss": 0.7611660361289978, "step": 820 }, { "epoch": 0.06723377248206042, "grad_norm": 0.185546875, "learning_rate": 2.3898599819713572e-05, "loss": 0.8447750806808472, "step": 821 }, { "epoch": 0.06731566501857937, "grad_norm": 0.1650390625, "learning_rate": 2.389827501904237e-05, "loss": 0.9318820834159851, "step": 822 }, { "epoch": 0.06739755755509833, "grad_norm": 0.134765625, "learning_rate": 2.3897949701223128e-05, "loss": 0.5046913623809814, "step": 823 }, { "epoch": 0.06747945009161728, "grad_norm": 0.1650390625, "learning_rate": 2.389762386626999e-05, "loss": 0.8080394864082336, "step": 824 }, { "epoch": 0.06756134262813623, "grad_norm": 0.1630859375, "learning_rate": 2.3897297514197115e-05, "loss": 0.9606471061706543, "step": 825 }, { "epoch": 0.06764323516465517, "grad_norm": 0.1357421875, "learning_rate": 2.389697064501869e-05, "loss": 0.6995418071746826, "step": 826 }, { "epoch": 0.06772512770117413, "grad_norm": 0.1748046875, "learning_rate": 2.389664325874892e-05, "loss": 1.1075576543807983, "step": 827 }, { "epoch": 0.06780702023769308, "grad_norm": 0.2080078125, "learning_rate": 2.3896315355402032e-05, "loss": 0.8601889610290527, "step": 828 }, { "epoch": 0.06788891277421204, "grad_norm": 0.197265625, "learning_rate": 2.3895986934992285e-05, "loss": 0.7834513187408447, "step": 829 }, { "epoch": 0.06797080531073099, "grad_norm": 0.1728515625, "learning_rate": 2.3895657997533952e-05, "loss": 0.8533635139465332, "step": 830 }, { "epoch": 0.06805269784724995, "grad_norm": 0.2080078125, "learning_rate": 2.3895328543041325e-05, "loss": 0.8712145090103149, "step": 831 }, { "epoch": 0.0681345903837689, "grad_norm": 0.1865234375, "learning_rate": 2.3894998571528727e-05, "loss": 1.0655306577682495, "step": 832 }, { "epoch": 0.06821648292028785, "grad_norm": 0.2412109375, "learning_rate": 2.38946680830105e-05, "loss": 1.0326701402664185, "step": 833 }, { "epoch": 0.06829837545680681, "grad_norm": 0.1923828125, "learning_rate": 2.3894337077501005e-05, "loss": 1.2634947299957275, "step": 834 }, { "epoch": 0.06838026799332576, "grad_norm": 0.181640625, "learning_rate": 2.3894005555014634e-05, "loss": 0.8876171112060547, "step": 835 }, { "epoch": 0.06846216052984472, "grad_norm": 0.1328125, "learning_rate": 2.389367351556579e-05, "loss": 0.7812813520431519, "step": 836 }, { "epoch": 0.06854405306636366, "grad_norm": 0.2041015625, "learning_rate": 2.389334095916891e-05, "loss": 0.7355129718780518, "step": 837 }, { "epoch": 0.06862594560288261, "grad_norm": 0.146484375, "learning_rate": 2.389300788583845e-05, "loss": 0.6984882950782776, "step": 838 }, { "epoch": 0.06870783813940157, "grad_norm": 0.2041015625, "learning_rate": 2.3892674295588882e-05, "loss": 0.7929954528808594, "step": 839 }, { "epoch": 0.06878973067592052, "grad_norm": 0.1728515625, "learning_rate": 2.3892340188434705e-05, "loss": 0.8127511739730835, "step": 840 }, { "epoch": 0.06887162321243948, "grad_norm": 0.228515625, "learning_rate": 2.3892005564390444e-05, "loss": 0.9786326885223389, "step": 841 }, { "epoch": 0.06895351574895843, "grad_norm": 0.1513671875, "learning_rate": 2.3891670423470644e-05, "loss": 0.39185306429862976, "step": 842 }, { "epoch": 0.06903540828547738, "grad_norm": 0.1640625, "learning_rate": 2.3891334765689862e-05, "loss": 0.89034503698349, "step": 843 }, { "epoch": 0.06911730082199634, "grad_norm": 0.2373046875, "learning_rate": 2.3890998591062704e-05, "loss": 1.085848331451416, "step": 844 }, { "epoch": 0.06919919335851529, "grad_norm": 0.1708984375, "learning_rate": 2.3890661899603764e-05, "loss": 0.8302363157272339, "step": 845 }, { "epoch": 0.06928108589503425, "grad_norm": 0.1728515625, "learning_rate": 2.3890324691327685e-05, "loss": 1.2554987668991089, "step": 846 }, { "epoch": 0.06936297843155319, "grad_norm": 0.140625, "learning_rate": 2.3889986966249126e-05, "loss": 0.6674655079841614, "step": 847 }, { "epoch": 0.06944487096807214, "grad_norm": 0.2001953125, "learning_rate": 2.388964872438276e-05, "loss": 0.6139657497406006, "step": 848 }, { "epoch": 0.0695267635045911, "grad_norm": 0.1455078125, "learning_rate": 2.388930996574329e-05, "loss": 0.7831220626831055, "step": 849 }, { "epoch": 0.06960865604111005, "grad_norm": 0.2060546875, "learning_rate": 2.3888970690345437e-05, "loss": 0.8593570590019226, "step": 850 }, { "epoch": 0.069690548577629, "grad_norm": 0.1640625, "learning_rate": 2.3888630898203958e-05, "loss": 0.6840532422065735, "step": 851 }, { "epoch": 0.06977244111414796, "grad_norm": 0.1865234375, "learning_rate": 2.388829058933361e-05, "loss": 0.8797383308410645, "step": 852 }, { "epoch": 0.06985433365066691, "grad_norm": 0.1767578125, "learning_rate": 2.388794976374919e-05, "loss": 1.1468734741210938, "step": 853 }, { "epoch": 0.06993622618718587, "grad_norm": 0.1904296875, "learning_rate": 2.388760842146551e-05, "loss": 0.8519679307937622, "step": 854 }, { "epoch": 0.07001811872370482, "grad_norm": 0.171875, "learning_rate": 2.3887266562497407e-05, "loss": 0.9134557843208313, "step": 855 }, { "epoch": 0.07010001126022378, "grad_norm": 0.1845703125, "learning_rate": 2.388692418685974e-05, "loss": 0.7357765436172485, "step": 856 }, { "epoch": 0.07018190379674273, "grad_norm": 0.1943359375, "learning_rate": 2.3886581294567387e-05, "loss": 1.0959980487823486, "step": 857 }, { "epoch": 0.07026379633326167, "grad_norm": 0.166015625, "learning_rate": 2.3886237885635254e-05, "loss": 0.9201380014419556, "step": 858 }, { "epoch": 0.07034568886978063, "grad_norm": 0.2216796875, "learning_rate": 2.3885893960078268e-05, "loss": 0.7882974147796631, "step": 859 }, { "epoch": 0.07042758140629958, "grad_norm": 0.177734375, "learning_rate": 2.3885549517911375e-05, "loss": 0.9611315131187439, "step": 860 }, { "epoch": 0.07050947394281853, "grad_norm": 0.1689453125, "learning_rate": 2.388520455914955e-05, "loss": 0.9144834876060486, "step": 861 }, { "epoch": 0.07059136647933749, "grad_norm": 0.17578125, "learning_rate": 2.388485908380778e-05, "loss": 0.5752807855606079, "step": 862 }, { "epoch": 0.07067325901585644, "grad_norm": 0.1787109375, "learning_rate": 2.3884513091901083e-05, "loss": 0.6473537683486938, "step": 863 }, { "epoch": 0.0707551515523754, "grad_norm": 0.2080078125, "learning_rate": 2.38841665834445e-05, "loss": 0.733709990978241, "step": 864 }, { "epoch": 0.07083704408889435, "grad_norm": 0.2041015625, "learning_rate": 2.388381955845309e-05, "loss": 0.9901431798934937, "step": 865 }, { "epoch": 0.0709189366254133, "grad_norm": 0.171875, "learning_rate": 2.3883472016941936e-05, "loss": 0.6014519333839417, "step": 866 }, { "epoch": 0.07100082916193226, "grad_norm": 0.1572265625, "learning_rate": 2.3883123958926143e-05, "loss": 0.9459564685821533, "step": 867 }, { "epoch": 0.0710827216984512, "grad_norm": 0.1748046875, "learning_rate": 2.388277538442084e-05, "loss": 0.7435308694839478, "step": 868 }, { "epoch": 0.07116461423497016, "grad_norm": 0.224609375, "learning_rate": 2.3882426293441183e-05, "loss": 1.2223165035247803, "step": 869 }, { "epoch": 0.07124650677148911, "grad_norm": 0.1845703125, "learning_rate": 2.3882076686002332e-05, "loss": 0.7333842515945435, "step": 870 }, { "epoch": 0.07132839930800806, "grad_norm": 0.158203125, "learning_rate": 2.388172656211949e-05, "loss": 0.7209481000900269, "step": 871 }, { "epoch": 0.07141029184452702, "grad_norm": 0.1796875, "learning_rate": 2.388137592180788e-05, "loss": 0.8283629417419434, "step": 872 }, { "epoch": 0.07149218438104597, "grad_norm": 0.1708984375, "learning_rate": 2.388102476508273e-05, "loss": 0.9210089445114136, "step": 873 }, { "epoch": 0.07157407691756493, "grad_norm": 0.1513671875, "learning_rate": 2.3880673091959314e-05, "loss": 0.823883593082428, "step": 874 }, { "epoch": 0.07165596945408388, "grad_norm": 0.40625, "learning_rate": 2.3880320902452907e-05, "loss": 0.8558413982391357, "step": 875 }, { "epoch": 0.07173786199060284, "grad_norm": 0.1787109375, "learning_rate": 2.3879968196578828e-05, "loss": 0.925654411315918, "step": 876 }, { "epoch": 0.07181975452712179, "grad_norm": 0.2001953125, "learning_rate": 2.38796149743524e-05, "loss": 1.2575734853744507, "step": 877 }, { "epoch": 0.07190164706364073, "grad_norm": 0.185546875, "learning_rate": 2.3879261235788975e-05, "loss": 1.1645255088806152, "step": 878 }, { "epoch": 0.07198353960015969, "grad_norm": 0.19921875, "learning_rate": 2.387890698090393e-05, "loss": 0.667198121547699, "step": 879 }, { "epoch": 0.07206543213667864, "grad_norm": 0.2197265625, "learning_rate": 2.387855220971266e-05, "loss": 0.9587016105651855, "step": 880 }, { "epoch": 0.0721473246731976, "grad_norm": 0.166015625, "learning_rate": 2.387819692223059e-05, "loss": 0.8451201915740967, "step": 881 }, { "epoch": 0.07222921720971655, "grad_norm": 0.17578125, "learning_rate": 2.387784111847316e-05, "loss": 0.7909094095230103, "step": 882 }, { "epoch": 0.0723111097462355, "grad_norm": 0.15625, "learning_rate": 2.387748479845583e-05, "loss": 0.6922479271888733, "step": 883 }, { "epoch": 0.07239300228275446, "grad_norm": 0.1689453125, "learning_rate": 2.3877127962194093e-05, "loss": 0.7669557929039001, "step": 884 }, { "epoch": 0.07247489481927341, "grad_norm": 0.1728515625, "learning_rate": 2.3876770609703456e-05, "loss": 0.8714954853057861, "step": 885 }, { "epoch": 0.07255678735579237, "grad_norm": 0.216796875, "learning_rate": 2.3876412740999452e-05, "loss": 0.9960775375366211, "step": 886 }, { "epoch": 0.07263867989231132, "grad_norm": 0.20703125, "learning_rate": 2.3876054356097632e-05, "loss": 0.9100107550621033, "step": 887 }, { "epoch": 0.07272057242883027, "grad_norm": 0.185546875, "learning_rate": 2.387569545501358e-05, "loss": 1.038118839263916, "step": 888 }, { "epoch": 0.07280246496534921, "grad_norm": 0.150390625, "learning_rate": 2.387533603776289e-05, "loss": 0.5999362468719482, "step": 889 }, { "epoch": 0.07288435750186817, "grad_norm": 0.197265625, "learning_rate": 2.3874976104361187e-05, "loss": 0.780677318572998, "step": 890 }, { "epoch": 0.07296625003838712, "grad_norm": 0.1884765625, "learning_rate": 2.387461565482411e-05, "loss": 0.7013919353485107, "step": 891 }, { "epoch": 0.07304814257490608, "grad_norm": 0.166015625, "learning_rate": 2.3874254689167324e-05, "loss": 0.9123063683509827, "step": 892 }, { "epoch": 0.07313003511142503, "grad_norm": 0.16015625, "learning_rate": 2.387389320740653e-05, "loss": 1.3979144096374512, "step": 893 }, { "epoch": 0.07321192764794399, "grad_norm": 0.2080078125, "learning_rate": 2.387353120955743e-05, "loss": 0.8316360712051392, "step": 894 }, { "epoch": 0.07329382018446294, "grad_norm": 0.1982421875, "learning_rate": 2.387316869563576e-05, "loss": 0.9191043972969055, "step": 895 }, { "epoch": 0.0733757127209819, "grad_norm": 0.158203125, "learning_rate": 2.3872805665657274e-05, "loss": 0.7610026001930237, "step": 896 }, { "epoch": 0.07345760525750085, "grad_norm": 0.2080078125, "learning_rate": 2.3872442119637756e-05, "loss": 0.5807282328605652, "step": 897 }, { "epoch": 0.0735394977940198, "grad_norm": 0.1669921875, "learning_rate": 2.3872078057593007e-05, "loss": 1.0620054006576538, "step": 898 }, { "epoch": 0.07362139033053874, "grad_norm": 0.1513671875, "learning_rate": 2.3871713479538846e-05, "loss": 0.9009345173835754, "step": 899 }, { "epoch": 0.0737032828670577, "grad_norm": 0.294921875, "learning_rate": 2.387134838549112e-05, "loss": 0.8334758877754211, "step": 900 }, { "epoch": 0.07378517540357665, "grad_norm": 0.185546875, "learning_rate": 2.38709827754657e-05, "loss": 0.8208713531494141, "step": 901 }, { "epoch": 0.07386706794009561, "grad_norm": 0.1591796875, "learning_rate": 2.3870616649478472e-05, "loss": 1.0119149684906006, "step": 902 }, { "epoch": 0.07394896047661456, "grad_norm": 0.2119140625, "learning_rate": 2.387025000754536e-05, "loss": 1.1852924823760986, "step": 903 }, { "epoch": 0.07403085301313352, "grad_norm": 0.189453125, "learning_rate": 2.3869882849682287e-05, "loss": 0.9489538073539734, "step": 904 }, { "epoch": 0.07411274554965247, "grad_norm": 0.16015625, "learning_rate": 2.386951517590522e-05, "loss": 0.8638594150543213, "step": 905 }, { "epoch": 0.07419463808617142, "grad_norm": 0.17578125, "learning_rate": 2.3869146986230134e-05, "loss": 1.0569462776184082, "step": 906 }, { "epoch": 0.07427653062269038, "grad_norm": 0.1943359375, "learning_rate": 2.3868778280673035e-05, "loss": 0.7985172271728516, "step": 907 }, { "epoch": 0.07435842315920933, "grad_norm": 0.1982421875, "learning_rate": 2.386840905924995e-05, "loss": 0.7662058472633362, "step": 908 }, { "epoch": 0.07444031569572829, "grad_norm": 0.177734375, "learning_rate": 2.386803932197692e-05, "loss": 0.875137209892273, "step": 909 }, { "epoch": 0.07452220823224723, "grad_norm": 0.1435546875, "learning_rate": 2.3867669068870027e-05, "loss": 0.6429243683815002, "step": 910 }, { "epoch": 0.07460410076876618, "grad_norm": 0.1533203125, "learning_rate": 2.3867298299945353e-05, "loss": 0.6278123259544373, "step": 911 }, { "epoch": 0.07468599330528514, "grad_norm": 0.17578125, "learning_rate": 2.3866927015219018e-05, "loss": 0.5837377905845642, "step": 912 }, { "epoch": 0.07476788584180409, "grad_norm": 0.1484375, "learning_rate": 2.3866555214707158e-05, "loss": 0.752549946308136, "step": 913 }, { "epoch": 0.07484977837832305, "grad_norm": 0.1455078125, "learning_rate": 2.3866182898425935e-05, "loss": 0.6466978192329407, "step": 914 }, { "epoch": 0.074931670914842, "grad_norm": 0.2041015625, "learning_rate": 2.386581006639153e-05, "loss": 1.0016194581985474, "step": 915 }, { "epoch": 0.07501356345136095, "grad_norm": 0.177734375, "learning_rate": 2.3865436718620146e-05, "loss": 0.9586094617843628, "step": 916 }, { "epoch": 0.07509545598787991, "grad_norm": 0.166015625, "learning_rate": 2.3865062855128013e-05, "loss": 0.7151205539703369, "step": 917 }, { "epoch": 0.07517734852439886, "grad_norm": 0.25390625, "learning_rate": 2.386468847593138e-05, "loss": 0.8532623052597046, "step": 918 }, { "epoch": 0.07525924106091782, "grad_norm": 0.1748046875, "learning_rate": 2.3864313581046516e-05, "loss": 1.0382089614868164, "step": 919 }, { "epoch": 0.07534113359743676, "grad_norm": 0.1494140625, "learning_rate": 2.386393817048972e-05, "loss": 0.9049426913261414, "step": 920 }, { "epoch": 0.07542302613395571, "grad_norm": 0.19140625, "learning_rate": 2.386356224427731e-05, "loss": 0.6633367538452148, "step": 921 }, { "epoch": 0.07550491867047467, "grad_norm": 0.185546875, "learning_rate": 2.386318580242562e-05, "loss": 0.6424201726913452, "step": 922 }, { "epoch": 0.07558681120699362, "grad_norm": 0.162109375, "learning_rate": 2.3862808844951015e-05, "loss": 0.681705117225647, "step": 923 }, { "epoch": 0.07566870374351257, "grad_norm": 0.2197265625, "learning_rate": 2.3862431371869875e-05, "loss": 1.1234625577926636, "step": 924 }, { "epoch": 0.07575059628003153, "grad_norm": 0.21875, "learning_rate": 2.3862053383198615e-05, "loss": 1.153337836265564, "step": 925 }, { "epoch": 0.07583248881655048, "grad_norm": 0.18359375, "learning_rate": 2.3861674878953656e-05, "loss": 0.7771589756011963, "step": 926 }, { "epoch": 0.07591438135306944, "grad_norm": 0.1923828125, "learning_rate": 2.3861295859151452e-05, "loss": 0.824066162109375, "step": 927 }, { "epoch": 0.07599627388958839, "grad_norm": 0.1748046875, "learning_rate": 2.3860916323808476e-05, "loss": 0.779680073261261, "step": 928 }, { "epoch": 0.07607816642610735, "grad_norm": 0.15625, "learning_rate": 2.3860536272941227e-05, "loss": 0.7602167129516602, "step": 929 }, { "epoch": 0.07616005896262629, "grad_norm": 0.228515625, "learning_rate": 2.386015570656622e-05, "loss": 1.1875331401824951, "step": 930 }, { "epoch": 0.07624195149914524, "grad_norm": 0.1982421875, "learning_rate": 2.38597746247e-05, "loss": 1.1889960765838623, "step": 931 }, { "epoch": 0.0763238440356642, "grad_norm": 0.1611328125, "learning_rate": 2.3859393027359125e-05, "loss": 0.7926914095878601, "step": 932 }, { "epoch": 0.07640573657218315, "grad_norm": 0.220703125, "learning_rate": 2.3859010914560186e-05, "loss": 0.5850602984428406, "step": 933 }, { "epoch": 0.0764876291087021, "grad_norm": 0.1767578125, "learning_rate": 2.3858628286319788e-05, "loss": 0.9435917735099792, "step": 934 }, { "epoch": 0.07656952164522106, "grad_norm": 0.1748046875, "learning_rate": 2.3858245142654563e-05, "loss": 0.6421206593513489, "step": 935 }, { "epoch": 0.07665141418174001, "grad_norm": 0.197265625, "learning_rate": 2.3857861483581163e-05, "loss": 0.9105852246284485, "step": 936 }, { "epoch": 0.07673330671825897, "grad_norm": 0.19921875, "learning_rate": 2.3857477309116265e-05, "loss": 1.106433391571045, "step": 937 }, { "epoch": 0.07681519925477792, "grad_norm": 0.20703125, "learning_rate": 2.3857092619276566e-05, "loss": 1.0934832096099854, "step": 938 }, { "epoch": 0.07689709179129688, "grad_norm": 0.1572265625, "learning_rate": 2.3856707414078782e-05, "loss": 1.102327823638916, "step": 939 }, { "epoch": 0.07697898432781583, "grad_norm": 0.13671875, "learning_rate": 2.3856321693539662e-05, "loss": 0.7117345929145813, "step": 940 }, { "epoch": 0.07706087686433477, "grad_norm": 0.1787109375, "learning_rate": 2.385593545767597e-05, "loss": 1.048274040222168, "step": 941 }, { "epoch": 0.07714276940085373, "grad_norm": 0.2314453125, "learning_rate": 2.3855548706504493e-05, "loss": 0.7626367211341858, "step": 942 }, { "epoch": 0.07722466193737268, "grad_norm": 0.1611328125, "learning_rate": 2.3855161440042037e-05, "loss": 0.9599903225898743, "step": 943 }, { "epoch": 0.07730655447389163, "grad_norm": 0.1767578125, "learning_rate": 2.3854773658305438e-05, "loss": 0.9171708822250366, "step": 944 }, { "epoch": 0.07738844701041059, "grad_norm": 0.1943359375, "learning_rate": 2.385438536131155e-05, "loss": 0.8478205800056458, "step": 945 }, { "epoch": 0.07747033954692954, "grad_norm": 0.208984375, "learning_rate": 2.385399654907725e-05, "loss": 1.2198203802108765, "step": 946 }, { "epoch": 0.0775522320834485, "grad_norm": 0.1689453125, "learning_rate": 2.3853607221619436e-05, "loss": 0.9292618632316589, "step": 947 }, { "epoch": 0.07763412461996745, "grad_norm": 0.1513671875, "learning_rate": 2.385321737895503e-05, "loss": 0.9733217358589172, "step": 948 }, { "epoch": 0.0777160171564864, "grad_norm": 0.169921875, "learning_rate": 2.385282702110098e-05, "loss": 0.9776267409324646, "step": 949 }, { "epoch": 0.07779790969300536, "grad_norm": 0.1474609375, "learning_rate": 2.3852436148074245e-05, "loss": 0.7498472332954407, "step": 950 }, { "epoch": 0.0778798022295243, "grad_norm": 0.18359375, "learning_rate": 2.385204475989182e-05, "loss": 0.9233551621437073, "step": 951 }, { "epoch": 0.07796169476604325, "grad_norm": 0.287109375, "learning_rate": 2.3851652856570717e-05, "loss": 0.9215163588523865, "step": 952 }, { "epoch": 0.07804358730256221, "grad_norm": 0.1845703125, "learning_rate": 2.3851260438127965e-05, "loss": 0.8095278739929199, "step": 953 }, { "epoch": 0.07812547983908116, "grad_norm": 0.1923828125, "learning_rate": 2.3850867504580622e-05, "loss": 0.755857527256012, "step": 954 }, { "epoch": 0.07820737237560012, "grad_norm": 0.1494140625, "learning_rate": 2.3850474055945768e-05, "loss": 0.8303584456443787, "step": 955 }, { "epoch": 0.07828926491211907, "grad_norm": 0.169921875, "learning_rate": 2.3850080092240505e-05, "loss": 0.8198450207710266, "step": 956 }, { "epoch": 0.07837115744863803, "grad_norm": 0.19140625, "learning_rate": 2.3849685613481954e-05, "loss": 0.9797332882881165, "step": 957 }, { "epoch": 0.07845304998515698, "grad_norm": 0.234375, "learning_rate": 2.3849290619687256e-05, "loss": 0.7985044717788696, "step": 958 }, { "epoch": 0.07853494252167594, "grad_norm": 0.20703125, "learning_rate": 2.3848895110873588e-05, "loss": 0.9533881545066833, "step": 959 }, { "epoch": 0.07861683505819489, "grad_norm": 0.1875, "learning_rate": 2.384849908705814e-05, "loss": 0.9056169390678406, "step": 960 }, { "epoch": 0.07869872759471383, "grad_norm": 0.1962890625, "learning_rate": 2.3848102548258116e-05, "loss": 0.8116352558135986, "step": 961 }, { "epoch": 0.07878062013123278, "grad_norm": 0.1572265625, "learning_rate": 2.384770549449076e-05, "loss": 0.7232645750045776, "step": 962 }, { "epoch": 0.07886251266775174, "grad_norm": 0.1474609375, "learning_rate": 2.3847307925773323e-05, "loss": 0.8241935968399048, "step": 963 }, { "epoch": 0.07894440520427069, "grad_norm": 0.154296875, "learning_rate": 2.3846909842123085e-05, "loss": 0.7238634824752808, "step": 964 }, { "epoch": 0.07902629774078965, "grad_norm": 0.19921875, "learning_rate": 2.384651124355736e-05, "loss": 0.8749205470085144, "step": 965 }, { "epoch": 0.0791081902773086, "grad_norm": 0.150390625, "learning_rate": 2.384611213009346e-05, "loss": 0.6803001165390015, "step": 966 }, { "epoch": 0.07919008281382756, "grad_norm": 0.2021484375, "learning_rate": 2.3845712501748735e-05, "loss": 0.7880287766456604, "step": 967 }, { "epoch": 0.07927197535034651, "grad_norm": 0.166015625, "learning_rate": 2.3845312358540556e-05, "loss": 0.7550244927406311, "step": 968 }, { "epoch": 0.07935386788686546, "grad_norm": 0.1865234375, "learning_rate": 2.3844911700486316e-05, "loss": 0.7586873173713684, "step": 969 }, { "epoch": 0.07943576042338442, "grad_norm": 0.1689453125, "learning_rate": 2.3844510527603425e-05, "loss": 0.6517073512077332, "step": 970 }, { "epoch": 0.07951765295990337, "grad_norm": 0.205078125, "learning_rate": 2.384410883990933e-05, "loss": 0.7106707096099854, "step": 971 }, { "epoch": 0.07959954549642231, "grad_norm": 0.1748046875, "learning_rate": 2.3843706637421475e-05, "loss": 0.7612528800964355, "step": 972 }, { "epoch": 0.07968143803294127, "grad_norm": 0.1630859375, "learning_rate": 2.384330392015735e-05, "loss": 0.9213825464248657, "step": 973 }, { "epoch": 0.07976333056946022, "grad_norm": 0.154296875, "learning_rate": 2.3842900688134457e-05, "loss": 0.6322633028030396, "step": 974 }, { "epoch": 0.07984522310597918, "grad_norm": 0.16015625, "learning_rate": 2.3842496941370327e-05, "loss": 0.8157610893249512, "step": 975 }, { "epoch": 0.07992711564249813, "grad_norm": 0.1572265625, "learning_rate": 2.3842092679882503e-05, "loss": 0.8752837777137756, "step": 976 }, { "epoch": 0.08000900817901709, "grad_norm": 0.18359375, "learning_rate": 2.3841687903688555e-05, "loss": 0.6833584308624268, "step": 977 }, { "epoch": 0.08009090071553604, "grad_norm": 0.18359375, "learning_rate": 2.384128261280608e-05, "loss": 0.7934967279434204, "step": 978 }, { "epoch": 0.080172793252055, "grad_norm": 0.1748046875, "learning_rate": 2.384087680725269e-05, "loss": 0.8384382128715515, "step": 979 }, { "epoch": 0.08025468578857395, "grad_norm": 0.15234375, "learning_rate": 2.384047048704603e-05, "loss": 0.916492223739624, "step": 980 }, { "epoch": 0.0803365783250929, "grad_norm": 0.1962890625, "learning_rate": 2.384006365220375e-05, "loss": 1.0703872442245483, "step": 981 }, { "epoch": 0.08041847086161184, "grad_norm": 0.2060546875, "learning_rate": 2.383965630274354e-05, "loss": 1.0057543516159058, "step": 982 }, { "epoch": 0.0805003633981308, "grad_norm": 0.162109375, "learning_rate": 2.3839248438683102e-05, "loss": 0.7645152807235718, "step": 983 }, { "epoch": 0.08058225593464975, "grad_norm": 0.1640625, "learning_rate": 2.3838840060040164e-05, "loss": 0.6167287826538086, "step": 984 }, { "epoch": 0.0806641484711687, "grad_norm": 0.1552734375, "learning_rate": 2.3838431166832477e-05, "loss": 0.6900557279586792, "step": 985 }, { "epoch": 0.08074604100768766, "grad_norm": 0.1572265625, "learning_rate": 2.3838021759077814e-05, "loss": 0.6050208806991577, "step": 986 }, { "epoch": 0.08082793354420661, "grad_norm": 0.185546875, "learning_rate": 2.383761183679397e-05, "loss": 0.8370410799980164, "step": 987 }, { "epoch": 0.08090982608072557, "grad_norm": 0.2060546875, "learning_rate": 2.383720139999876e-05, "loss": 0.9643905758857727, "step": 988 }, { "epoch": 0.08099171861724452, "grad_norm": 0.169921875, "learning_rate": 2.3836790448710016e-05, "loss": 0.9964718222618103, "step": 989 }, { "epoch": 0.08107361115376348, "grad_norm": 0.13671875, "learning_rate": 2.3836378982945615e-05, "loss": 0.891491711139679, "step": 990 }, { "epoch": 0.08115550369028243, "grad_norm": 0.1875, "learning_rate": 2.3835967002723428e-05, "loss": 1.0418084859848022, "step": 991 }, { "epoch": 0.08123739622680139, "grad_norm": 0.1689453125, "learning_rate": 2.3835554508061367e-05, "loss": 0.9015802145004272, "step": 992 }, { "epoch": 0.08131928876332033, "grad_norm": 0.1552734375, "learning_rate": 2.3835141498977362e-05, "loss": 1.27971613407135, "step": 993 }, { "epoch": 0.08140118129983928, "grad_norm": 0.2236328125, "learning_rate": 2.383472797548936e-05, "loss": 0.9369308948516846, "step": 994 }, { "epoch": 0.08148307383635824, "grad_norm": 0.21484375, "learning_rate": 2.3834313937615337e-05, "loss": 0.7776637673377991, "step": 995 }, { "epoch": 0.08156496637287719, "grad_norm": 0.1630859375, "learning_rate": 2.383389938537329e-05, "loss": 1.026078224182129, "step": 996 }, { "epoch": 0.08164685890939614, "grad_norm": 0.2099609375, "learning_rate": 2.3833484318781234e-05, "loss": 1.4209386110305786, "step": 997 }, { "epoch": 0.0817287514459151, "grad_norm": 0.1669921875, "learning_rate": 2.3833068737857214e-05, "loss": 0.8527098894119263, "step": 998 }, { "epoch": 0.08181064398243405, "grad_norm": 0.1982421875, "learning_rate": 2.3832652642619284e-05, "loss": 0.8481400012969971, "step": 999 }, { "epoch": 0.08189253651895301, "grad_norm": 0.1826171875, "learning_rate": 2.3832236033085538e-05, "loss": 1.1074132919311523, "step": 1000 }, { "epoch": 0.08197442905547196, "grad_norm": 0.1435546875, "learning_rate": 2.3831818909274084e-05, "loss": 0.5550487637519836, "step": 1001 }, { "epoch": 0.08205632159199092, "grad_norm": 0.1904296875, "learning_rate": 2.3831401271203043e-05, "loss": 1.1143569946289062, "step": 1002 }, { "epoch": 0.08213821412850986, "grad_norm": 0.16796875, "learning_rate": 2.3830983118890574e-05, "loss": 0.7879875898361206, "step": 1003 }, { "epoch": 0.08222010666502881, "grad_norm": 0.1728515625, "learning_rate": 2.3830564452354852e-05, "loss": 0.770058810710907, "step": 1004 }, { "epoch": 0.08230199920154777, "grad_norm": 0.2041015625, "learning_rate": 2.383014527161407e-05, "loss": 1.0728634595870972, "step": 1005 }, { "epoch": 0.08238389173806672, "grad_norm": 0.1708984375, "learning_rate": 2.3829725576686456e-05, "loss": 0.9467970132827759, "step": 1006 }, { "epoch": 0.08246578427458567, "grad_norm": 0.1728515625, "learning_rate": 2.3829305367590242e-05, "loss": 1.0244967937469482, "step": 1007 }, { "epoch": 0.08254767681110463, "grad_norm": 0.177734375, "learning_rate": 2.3828884644343693e-05, "loss": 0.8292291164398193, "step": 1008 }, { "epoch": 0.08262956934762358, "grad_norm": 0.1748046875, "learning_rate": 2.3828463406965097e-05, "loss": 0.9327377080917358, "step": 1009 }, { "epoch": 0.08271146188414254, "grad_norm": 0.16796875, "learning_rate": 2.3828041655472765e-05, "loss": 0.5960700511932373, "step": 1010 }, { "epoch": 0.08279335442066149, "grad_norm": 0.1474609375, "learning_rate": 2.3827619389885028e-05, "loss": 0.7657983899116516, "step": 1011 }, { "epoch": 0.08287524695718045, "grad_norm": 0.1435546875, "learning_rate": 2.382719661022024e-05, "loss": 0.9336369633674622, "step": 1012 }, { "epoch": 0.08295713949369939, "grad_norm": 0.166015625, "learning_rate": 2.382677331649677e-05, "loss": 0.8051844835281372, "step": 1013 }, { "epoch": 0.08303903203021834, "grad_norm": 0.158203125, "learning_rate": 2.3826349508733023e-05, "loss": 0.6600877642631531, "step": 1014 }, { "epoch": 0.0831209245667373, "grad_norm": 0.208984375, "learning_rate": 2.3825925186947415e-05, "loss": 0.9509165287017822, "step": 1015 }, { "epoch": 0.08320281710325625, "grad_norm": 0.1689453125, "learning_rate": 2.3825500351158394e-05, "loss": 0.8809319734573364, "step": 1016 }, { "epoch": 0.0832847096397752, "grad_norm": 0.2060546875, "learning_rate": 2.382507500138442e-05, "loss": 1.0736415386199951, "step": 1017 }, { "epoch": 0.08336660217629416, "grad_norm": 0.166015625, "learning_rate": 2.3824649137643982e-05, "loss": 0.7450970411300659, "step": 1018 }, { "epoch": 0.08344849471281311, "grad_norm": 0.1640625, "learning_rate": 2.3824222759955594e-05, "loss": 0.7833691835403442, "step": 1019 }, { "epoch": 0.08353038724933207, "grad_norm": 0.2021484375, "learning_rate": 2.382379586833778e-05, "loss": 1.0517538785934448, "step": 1020 }, { "epoch": 0.08361227978585102, "grad_norm": 0.17578125, "learning_rate": 2.3823368462809104e-05, "loss": 0.8261633515357971, "step": 1021 }, { "epoch": 0.08369417232236998, "grad_norm": 0.1728515625, "learning_rate": 2.3822940543388134e-05, "loss": 0.952375054359436, "step": 1022 }, { "epoch": 0.08377606485888893, "grad_norm": 0.17578125, "learning_rate": 2.3822512110093474e-05, "loss": 0.7037366628646851, "step": 1023 }, { "epoch": 0.08385795739540787, "grad_norm": 0.1669921875, "learning_rate": 2.3822083162943745e-05, "loss": 0.8279221653938293, "step": 1024 }, { "epoch": 0.08393984993192682, "grad_norm": 0.125, "learning_rate": 2.3821653701957587e-05, "loss": 0.7880839109420776, "step": 1025 }, { "epoch": 0.08402174246844578, "grad_norm": 0.197265625, "learning_rate": 2.3821223727153673e-05, "loss": 0.8539780974388123, "step": 1026 }, { "epoch": 0.08410363500496473, "grad_norm": 0.1826171875, "learning_rate": 2.3820793238550683e-05, "loss": 1.090681791305542, "step": 1027 }, { "epoch": 0.08418552754148369, "grad_norm": 0.1494140625, "learning_rate": 2.3820362236167337e-05, "loss": 0.6790997982025146, "step": 1028 }, { "epoch": 0.08426742007800264, "grad_norm": 0.15625, "learning_rate": 2.3819930720022363e-05, "loss": 0.7699350118637085, "step": 1029 }, { "epoch": 0.0843493126145216, "grad_norm": 0.1474609375, "learning_rate": 2.3819498690134516e-05, "loss": 0.9489578008651733, "step": 1030 }, { "epoch": 0.08443120515104055, "grad_norm": 0.1689453125, "learning_rate": 2.381906614652258e-05, "loss": 0.7199457883834839, "step": 1031 }, { "epoch": 0.0845130976875595, "grad_norm": 0.259765625, "learning_rate": 2.3818633089205342e-05, "loss": 0.8059130311012268, "step": 1032 }, { "epoch": 0.08459499022407846, "grad_norm": 0.1513671875, "learning_rate": 2.3818199518201637e-05, "loss": 0.7351808547973633, "step": 1033 }, { "epoch": 0.0846768827605974, "grad_norm": 0.1572265625, "learning_rate": 2.3817765433530302e-05, "loss": 0.9748575091362, "step": 1034 }, { "epoch": 0.08475877529711635, "grad_norm": 0.2001953125, "learning_rate": 2.381733083521021e-05, "loss": 1.220763087272644, "step": 1035 }, { "epoch": 0.08484066783363531, "grad_norm": 0.173828125, "learning_rate": 2.381689572326025e-05, "loss": 0.856031596660614, "step": 1036 }, { "epoch": 0.08492256037015426, "grad_norm": 0.154296875, "learning_rate": 2.381646009769933e-05, "loss": 0.9350159168243408, "step": 1037 }, { "epoch": 0.08500445290667322, "grad_norm": 0.2041015625, "learning_rate": 2.3816023958546383e-05, "loss": 0.892288088798523, "step": 1038 }, { "epoch": 0.08508634544319217, "grad_norm": 0.16796875, "learning_rate": 2.3815587305820372e-05, "loss": 0.79413241147995, "step": 1039 }, { "epoch": 0.08516823797971113, "grad_norm": 0.1796875, "learning_rate": 2.381515013954027e-05, "loss": 0.9142088890075684, "step": 1040 }, { "epoch": 0.08525013051623008, "grad_norm": 0.1748046875, "learning_rate": 2.381471245972508e-05, "loss": 0.7598832249641418, "step": 1041 }, { "epoch": 0.08533202305274903, "grad_norm": 0.1728515625, "learning_rate": 2.3814274266393826e-05, "loss": 1.1087967157363892, "step": 1042 }, { "epoch": 0.08541391558926799, "grad_norm": 0.1640625, "learning_rate": 2.3813835559565552e-05, "loss": 0.9357991814613342, "step": 1043 }, { "epoch": 0.08549580812578694, "grad_norm": 0.1943359375, "learning_rate": 2.381339633925933e-05, "loss": 1.114556908607483, "step": 1044 }, { "epoch": 0.08557770066230588, "grad_norm": 0.13671875, "learning_rate": 2.3812956605494242e-05, "loss": 0.8096150755882263, "step": 1045 }, { "epoch": 0.08565959319882484, "grad_norm": 0.1943359375, "learning_rate": 2.381251635828941e-05, "loss": 0.8399940133094788, "step": 1046 }, { "epoch": 0.08574148573534379, "grad_norm": 0.1865234375, "learning_rate": 2.3812075597663964e-05, "loss": 0.6026678085327148, "step": 1047 }, { "epoch": 0.08582337827186275, "grad_norm": 0.17578125, "learning_rate": 2.381163432363706e-05, "loss": 0.8701160550117493, "step": 1048 }, { "epoch": 0.0859052708083817, "grad_norm": 0.185546875, "learning_rate": 2.3811192536227883e-05, "loss": 1.0040497779846191, "step": 1049 }, { "epoch": 0.08598716334490066, "grad_norm": 0.1923828125, "learning_rate": 2.381075023545563e-05, "loss": 1.1004680395126343, "step": 1050 }, { "epoch": 0.08606905588141961, "grad_norm": 0.1552734375, "learning_rate": 2.3810307421339525e-05, "loss": 0.7033898234367371, "step": 1051 }, { "epoch": 0.08615094841793856, "grad_norm": 0.1640625, "learning_rate": 2.380986409389882e-05, "loss": 0.9362397789955139, "step": 1052 }, { "epoch": 0.08623284095445752, "grad_norm": 0.146484375, "learning_rate": 2.380942025315278e-05, "loss": 0.812309205532074, "step": 1053 }, { "epoch": 0.08631473349097647, "grad_norm": 0.1787109375, "learning_rate": 2.3808975899120693e-05, "loss": 0.8131587505340576, "step": 1054 }, { "epoch": 0.08639662602749541, "grad_norm": 0.171875, "learning_rate": 2.3808531031821875e-05, "loss": 0.8315556049346924, "step": 1055 }, { "epoch": 0.08647851856401437, "grad_norm": 0.1748046875, "learning_rate": 2.3808085651275667e-05, "loss": 0.621181070804596, "step": 1056 }, { "epoch": 0.08656041110053332, "grad_norm": 0.162109375, "learning_rate": 2.380763975750142e-05, "loss": 0.7798931002616882, "step": 1057 }, { "epoch": 0.08664230363705228, "grad_norm": 0.1826171875, "learning_rate": 2.380719335051852e-05, "loss": 0.7550495266914368, "step": 1058 }, { "epoch": 0.08672419617357123, "grad_norm": 0.1767578125, "learning_rate": 2.380674643034636e-05, "loss": 0.8598673343658447, "step": 1059 }, { "epoch": 0.08680608871009018, "grad_norm": 0.2421875, "learning_rate": 2.380629899700438e-05, "loss": 1.1551227569580078, "step": 1060 }, { "epoch": 0.08688798124660914, "grad_norm": 0.1650390625, "learning_rate": 2.3805851050512018e-05, "loss": 0.6638979315757751, "step": 1061 }, { "epoch": 0.0869698737831281, "grad_norm": 0.15625, "learning_rate": 2.380540259088874e-05, "loss": 0.8854860067367554, "step": 1062 }, { "epoch": 0.08705176631964705, "grad_norm": 0.146484375, "learning_rate": 2.3804953618154045e-05, "loss": 0.9188386797904968, "step": 1063 }, { "epoch": 0.087133658856166, "grad_norm": 0.12890625, "learning_rate": 2.3804504132327444e-05, "loss": 0.6485326290130615, "step": 1064 }, { "epoch": 0.08721555139268494, "grad_norm": 0.2041015625, "learning_rate": 2.3804054133428474e-05, "loss": 0.8500991463661194, "step": 1065 }, { "epoch": 0.0872974439292039, "grad_norm": 0.166015625, "learning_rate": 2.3803603621476695e-05, "loss": 1.025068998336792, "step": 1066 }, { "epoch": 0.08737933646572285, "grad_norm": 0.162109375, "learning_rate": 2.380315259649169e-05, "loss": 0.7836132645606995, "step": 1067 }, { "epoch": 0.0874612290022418, "grad_norm": 0.1669921875, "learning_rate": 2.3802701058493054e-05, "loss": 0.718864917755127, "step": 1068 }, { "epoch": 0.08754312153876076, "grad_norm": 0.2021484375, "learning_rate": 2.380224900750042e-05, "loss": 1.11733877658844, "step": 1069 }, { "epoch": 0.08762501407527971, "grad_norm": 0.181640625, "learning_rate": 2.3801796443533436e-05, "loss": 1.039713978767395, "step": 1070 }, { "epoch": 0.08770690661179867, "grad_norm": 0.1611328125, "learning_rate": 2.3801343366611766e-05, "loss": 0.887320339679718, "step": 1071 }, { "epoch": 0.08778879914831762, "grad_norm": 0.1533203125, "learning_rate": 2.3800889776755113e-05, "loss": 0.954429030418396, "step": 1072 }, { "epoch": 0.08787069168483658, "grad_norm": 0.1806640625, "learning_rate": 2.3800435673983183e-05, "loss": 0.8651047945022583, "step": 1073 }, { "epoch": 0.08795258422135553, "grad_norm": 0.17578125, "learning_rate": 2.3799981058315715e-05, "loss": 0.969821572303772, "step": 1074 }, { "epoch": 0.08803447675787449, "grad_norm": 0.1796875, "learning_rate": 2.3799525929772474e-05, "loss": 1.157622218132019, "step": 1075 }, { "epoch": 0.08811636929439343, "grad_norm": 0.19140625, "learning_rate": 2.3799070288373233e-05, "loss": 0.49655666947364807, "step": 1076 }, { "epoch": 0.08819826183091238, "grad_norm": 0.21875, "learning_rate": 2.3798614134137802e-05, "loss": 0.9968538284301758, "step": 1077 }, { "epoch": 0.08828015436743133, "grad_norm": 0.1474609375, "learning_rate": 2.3798157467086003e-05, "loss": 1.0708034038543701, "step": 1078 }, { "epoch": 0.08836204690395029, "grad_norm": 0.189453125, "learning_rate": 2.3797700287237696e-05, "loss": 0.873045802116394, "step": 1079 }, { "epoch": 0.08844393944046924, "grad_norm": 0.1728515625, "learning_rate": 2.3797242594612736e-05, "loss": 0.841846227645874, "step": 1080 }, { "epoch": 0.0885258319769882, "grad_norm": 0.1591796875, "learning_rate": 2.3796784389231025e-05, "loss": 0.7304664850234985, "step": 1081 }, { "epoch": 0.08860772451350715, "grad_norm": 0.2021484375, "learning_rate": 2.3796325671112477e-05, "loss": 0.933490514755249, "step": 1082 }, { "epoch": 0.0886896170500261, "grad_norm": 0.177734375, "learning_rate": 2.379586644027703e-05, "loss": 1.2038507461547852, "step": 1083 }, { "epoch": 0.08877150958654506, "grad_norm": 0.1669921875, "learning_rate": 2.3795406696744646e-05, "loss": 0.8266881704330444, "step": 1084 }, { "epoch": 0.08885340212306402, "grad_norm": 0.1904296875, "learning_rate": 2.3794946440535304e-05, "loss": 0.8591686487197876, "step": 1085 }, { "epoch": 0.08893529465958296, "grad_norm": 0.1748046875, "learning_rate": 2.3794485671669005e-05, "loss": 0.9704241752624512, "step": 1086 }, { "epoch": 0.08901718719610191, "grad_norm": 0.1767578125, "learning_rate": 2.3794024390165786e-05, "loss": 0.8588113784790039, "step": 1087 }, { "epoch": 0.08909907973262086, "grad_norm": 0.1650390625, "learning_rate": 2.379356259604569e-05, "loss": 0.7976828217506409, "step": 1088 }, { "epoch": 0.08918097226913982, "grad_norm": 0.17578125, "learning_rate": 2.379310028932879e-05, "loss": 0.9168569445610046, "step": 1089 }, { "epoch": 0.08926286480565877, "grad_norm": 0.16015625, "learning_rate": 2.3792637470035175e-05, "loss": 0.6929162740707397, "step": 1090 }, { "epoch": 0.08934475734217773, "grad_norm": 0.173828125, "learning_rate": 2.3792174138184973e-05, "loss": 1.091790795326233, "step": 1091 }, { "epoch": 0.08942664987869668, "grad_norm": 0.1650390625, "learning_rate": 2.379171029379831e-05, "loss": 0.8192435503005981, "step": 1092 }, { "epoch": 0.08950854241521564, "grad_norm": 0.1689453125, "learning_rate": 2.379124593689535e-05, "loss": 0.9715936779975891, "step": 1093 }, { "epoch": 0.08959043495173459, "grad_norm": 0.1572265625, "learning_rate": 2.3790781067496282e-05, "loss": 1.0156961679458618, "step": 1094 }, { "epoch": 0.08967232748825354, "grad_norm": 0.1826171875, "learning_rate": 2.37903156856213e-05, "loss": 0.8466988205909729, "step": 1095 }, { "epoch": 0.0897542200247725, "grad_norm": 0.1650390625, "learning_rate": 2.3789849791290642e-05, "loss": 0.5893105864524841, "step": 1096 }, { "epoch": 0.08983611256129144, "grad_norm": 0.1572265625, "learning_rate": 2.378938338452455e-05, "loss": 0.7864755392074585, "step": 1097 }, { "epoch": 0.0899180050978104, "grad_norm": 0.1396484375, "learning_rate": 2.3788916465343302e-05, "loss": 0.7493928670883179, "step": 1098 }, { "epoch": 0.08999989763432935, "grad_norm": 0.1875, "learning_rate": 2.3788449033767187e-05, "loss": 1.1909775733947754, "step": 1099 }, { "epoch": 0.0900817901708483, "grad_norm": 0.185546875, "learning_rate": 2.3787981089816526e-05, "loss": 0.8320218324661255, "step": 1100 }, { "epoch": 0.09016368270736726, "grad_norm": 0.1640625, "learning_rate": 2.3787512633511654e-05, "loss": 0.8960492610931396, "step": 1101 }, { "epoch": 0.09024557524388621, "grad_norm": 0.166015625, "learning_rate": 2.3787043664872934e-05, "loss": 0.8856436014175415, "step": 1102 }, { "epoch": 0.09032746778040517, "grad_norm": 0.193359375, "learning_rate": 2.3786574183920752e-05, "loss": 0.8444043397903442, "step": 1103 }, { "epoch": 0.09040936031692412, "grad_norm": 0.1484375, "learning_rate": 2.3786104190675506e-05, "loss": 0.9387022256851196, "step": 1104 }, { "epoch": 0.09049125285344307, "grad_norm": 0.173828125, "learning_rate": 2.378563368515763e-05, "loss": 1.1504004001617432, "step": 1105 }, { "epoch": 0.09057314538996203, "grad_norm": 0.1435546875, "learning_rate": 2.3785162667387576e-05, "loss": 0.7708770632743835, "step": 1106 }, { "epoch": 0.09065503792648097, "grad_norm": 0.15234375, "learning_rate": 2.378469113738581e-05, "loss": 0.6220785975456238, "step": 1107 }, { "epoch": 0.09073693046299992, "grad_norm": 0.1513671875, "learning_rate": 2.378421909517283e-05, "loss": 0.8233803510665894, "step": 1108 }, { "epoch": 0.09081882299951888, "grad_norm": 0.1669921875, "learning_rate": 2.378374654076915e-05, "loss": 1.0616023540496826, "step": 1109 }, { "epoch": 0.09090071553603783, "grad_norm": 0.205078125, "learning_rate": 2.3783273474195315e-05, "loss": 0.6827281713485718, "step": 1110 }, { "epoch": 0.09098260807255679, "grad_norm": 0.15234375, "learning_rate": 2.378279989547188e-05, "loss": 0.7312248349189758, "step": 1111 }, { "epoch": 0.09106450060907574, "grad_norm": 0.181640625, "learning_rate": 2.3782325804619435e-05, "loss": 0.9664647579193115, "step": 1112 }, { "epoch": 0.0911463931455947, "grad_norm": 0.1865234375, "learning_rate": 2.378185120165858e-05, "loss": 0.8721472024917603, "step": 1113 }, { "epoch": 0.09122828568211365, "grad_norm": 0.166015625, "learning_rate": 2.378137608660995e-05, "loss": 0.8965354561805725, "step": 1114 }, { "epoch": 0.0913101782186326, "grad_norm": 0.1943359375, "learning_rate": 2.378090045949419e-05, "loss": 1.1307601928710938, "step": 1115 }, { "epoch": 0.09139207075515156, "grad_norm": 0.1689453125, "learning_rate": 2.378042432033197e-05, "loss": 0.9814367294311523, "step": 1116 }, { "epoch": 0.0914739632916705, "grad_norm": 0.1728515625, "learning_rate": 2.377994766914399e-05, "loss": 1.02003812789917, "step": 1117 }, { "epoch": 0.09155585582818945, "grad_norm": 0.1884765625, "learning_rate": 2.377947050595097e-05, "loss": 1.2087690830230713, "step": 1118 }, { "epoch": 0.09163774836470841, "grad_norm": 0.185546875, "learning_rate": 2.3778992830773644e-05, "loss": 0.6142762899398804, "step": 1119 }, { "epoch": 0.09171964090122736, "grad_norm": 0.16015625, "learning_rate": 2.377851464363278e-05, "loss": 0.8210044503211975, "step": 1120 }, { "epoch": 0.09180153343774632, "grad_norm": 0.1591796875, "learning_rate": 2.3778035944549148e-05, "loss": 0.7467665076255798, "step": 1121 }, { "epoch": 0.09188342597426527, "grad_norm": 0.18359375, "learning_rate": 2.377755673354357e-05, "loss": 0.7314620018005371, "step": 1122 }, { "epoch": 0.09196531851078422, "grad_norm": 0.1640625, "learning_rate": 2.3777077010636864e-05, "loss": 0.8658515214920044, "step": 1123 }, { "epoch": 0.09204721104730318, "grad_norm": 0.2138671875, "learning_rate": 2.377659677584989e-05, "loss": 0.8021686673164368, "step": 1124 }, { "epoch": 0.09212910358382213, "grad_norm": 0.1640625, "learning_rate": 2.3776116029203512e-05, "loss": 0.7749435901641846, "step": 1125 }, { "epoch": 0.09221099612034109, "grad_norm": 0.1923828125, "learning_rate": 2.3775634770718632e-05, "loss": 0.8898816704750061, "step": 1126 }, { "epoch": 0.09229288865686004, "grad_norm": 0.2109375, "learning_rate": 2.3775153000416165e-05, "loss": 1.1790481805801392, "step": 1127 }, { "epoch": 0.09237478119337898, "grad_norm": 0.173828125, "learning_rate": 2.3774670718317044e-05, "loss": 0.62465500831604, "step": 1128 }, { "epoch": 0.09245667372989794, "grad_norm": 0.2197265625, "learning_rate": 2.3774187924442243e-05, "loss": 0.9543071985244751, "step": 1129 }, { "epoch": 0.09253856626641689, "grad_norm": 0.1708984375, "learning_rate": 2.3773704618812737e-05, "loss": 0.697795569896698, "step": 1130 }, { "epoch": 0.09262045880293585, "grad_norm": 0.138671875, "learning_rate": 2.377322080144954e-05, "loss": 0.6742991209030151, "step": 1131 }, { "epoch": 0.0927023513394548, "grad_norm": 0.31640625, "learning_rate": 2.3772736472373675e-05, "loss": 1.0602946281433105, "step": 1132 }, { "epoch": 0.09278424387597375, "grad_norm": 0.15625, "learning_rate": 2.3772251631606192e-05, "loss": 0.9222228527069092, "step": 1133 }, { "epoch": 0.09286613641249271, "grad_norm": 0.2109375, "learning_rate": 2.377176627916817e-05, "loss": 0.825689435005188, "step": 1134 }, { "epoch": 0.09294802894901166, "grad_norm": 0.16796875, "learning_rate": 2.37712804150807e-05, "loss": 0.8277634382247925, "step": 1135 }, { "epoch": 0.09302992148553062, "grad_norm": 0.2119140625, "learning_rate": 2.37707940393649e-05, "loss": 0.9808722138404846, "step": 1136 }, { "epoch": 0.09311181402204957, "grad_norm": 0.1728515625, "learning_rate": 2.3770307152041913e-05, "loss": 0.6830477118492126, "step": 1137 }, { "epoch": 0.09319370655856851, "grad_norm": 0.1494140625, "learning_rate": 2.3769819753132897e-05, "loss": 0.9099923372268677, "step": 1138 }, { "epoch": 0.09327559909508747, "grad_norm": 0.16015625, "learning_rate": 2.376933184265904e-05, "loss": 0.8088845014572144, "step": 1139 }, { "epoch": 0.09335749163160642, "grad_norm": 0.205078125, "learning_rate": 2.3768843420641545e-05, "loss": 0.8877663612365723, "step": 1140 }, { "epoch": 0.09343938416812538, "grad_norm": 0.181640625, "learning_rate": 2.3768354487101646e-05, "loss": 1.0679798126220703, "step": 1141 }, { "epoch": 0.09352127670464433, "grad_norm": 0.1943359375, "learning_rate": 2.3767865042060584e-05, "loss": 1.0294158458709717, "step": 1142 }, { "epoch": 0.09360316924116328, "grad_norm": 0.1748046875, "learning_rate": 2.3767375085539645e-05, "loss": 0.6938620209693909, "step": 1143 }, { "epoch": 0.09368506177768224, "grad_norm": 0.1923828125, "learning_rate": 2.3766884617560115e-05, "loss": 1.0503660440444946, "step": 1144 }, { "epoch": 0.09376695431420119, "grad_norm": 0.140625, "learning_rate": 2.3766393638143317e-05, "loss": 0.6979154944419861, "step": 1145 }, { "epoch": 0.09384884685072015, "grad_norm": 0.185546875, "learning_rate": 2.376590214731059e-05, "loss": 1.1186442375183105, "step": 1146 }, { "epoch": 0.0939307393872391, "grad_norm": 0.1533203125, "learning_rate": 2.3765410145083295e-05, "loss": 1.0059125423431396, "step": 1147 }, { "epoch": 0.09401263192375804, "grad_norm": 0.1845703125, "learning_rate": 2.3764917631482815e-05, "loss": 0.8100786209106445, "step": 1148 }, { "epoch": 0.094094524460277, "grad_norm": 0.1689453125, "learning_rate": 2.376442460653056e-05, "loss": 1.0981206893920898, "step": 1149 }, { "epoch": 0.09417641699679595, "grad_norm": 0.2080078125, "learning_rate": 2.376393107024796e-05, "loss": 1.0711621046066284, "step": 1150 }, { "epoch": 0.0942583095333149, "grad_norm": 0.171875, "learning_rate": 2.3763437022656456e-05, "loss": 0.9385680556297302, "step": 1151 }, { "epoch": 0.09434020206983386, "grad_norm": 0.18359375, "learning_rate": 2.3762942463777536e-05, "loss": 1.0812402963638306, "step": 1152 }, { "epoch": 0.09442209460635281, "grad_norm": 0.1689453125, "learning_rate": 2.3762447393632687e-05, "loss": 0.9176828265190125, "step": 1153 }, { "epoch": 0.09450398714287177, "grad_norm": 0.173828125, "learning_rate": 2.3761951812243427e-05, "loss": 0.6775470972061157, "step": 1154 }, { "epoch": 0.09458587967939072, "grad_norm": 0.251953125, "learning_rate": 2.3761455719631297e-05, "loss": 1.1069231033325195, "step": 1155 }, { "epoch": 0.09466777221590968, "grad_norm": 0.1630859375, "learning_rate": 2.376095911581786e-05, "loss": 1.2215290069580078, "step": 1156 }, { "epoch": 0.09474966475242863, "grad_norm": 0.158203125, "learning_rate": 2.3760462000824702e-05, "loss": 0.8767337203025818, "step": 1157 }, { "epoch": 0.09483155728894758, "grad_norm": 0.228515625, "learning_rate": 2.3759964374673428e-05, "loss": 1.3536542654037476, "step": 1158 }, { "epoch": 0.09491344982546653, "grad_norm": 0.23828125, "learning_rate": 2.3759466237385665e-05, "loss": 0.8645602464675903, "step": 1159 }, { "epoch": 0.09499534236198548, "grad_norm": 0.2060546875, "learning_rate": 2.3758967588983066e-05, "loss": 0.7537268400192261, "step": 1160 }, { "epoch": 0.09507723489850443, "grad_norm": 0.21484375, "learning_rate": 2.3758468429487302e-05, "loss": 1.343405842781067, "step": 1161 }, { "epoch": 0.09515912743502339, "grad_norm": 0.1875, "learning_rate": 2.375796875892007e-05, "loss": 0.5895355939865112, "step": 1162 }, { "epoch": 0.09524101997154234, "grad_norm": 0.1708984375, "learning_rate": 2.375746857730309e-05, "loss": 0.7574297785758972, "step": 1163 }, { "epoch": 0.0953229125080613, "grad_norm": 0.1708984375, "learning_rate": 2.3756967884658103e-05, "loss": 1.0528451204299927, "step": 1164 }, { "epoch": 0.09540480504458025, "grad_norm": 0.1865234375, "learning_rate": 2.3756466681006865e-05, "loss": 1.221675992012024, "step": 1165 }, { "epoch": 0.0954866975810992, "grad_norm": 0.1708984375, "learning_rate": 2.3755964966371164e-05, "loss": 0.9941505193710327, "step": 1166 }, { "epoch": 0.09556859011761816, "grad_norm": 0.2138671875, "learning_rate": 2.375546274077281e-05, "loss": 0.8325309753417969, "step": 1167 }, { "epoch": 0.09565048265413711, "grad_norm": 0.208984375, "learning_rate": 2.3754960004233622e-05, "loss": 1.1821049451828003, "step": 1168 }, { "epoch": 0.09573237519065605, "grad_norm": 0.14453125, "learning_rate": 2.3754456756775458e-05, "loss": 0.973341703414917, "step": 1169 }, { "epoch": 0.09581426772717501, "grad_norm": 0.22265625, "learning_rate": 2.3753952998420193e-05, "loss": 0.8207471966743469, "step": 1170 }, { "epoch": 0.09589616026369396, "grad_norm": 0.1484375, "learning_rate": 2.375344872918972e-05, "loss": 0.7534990310668945, "step": 1171 }, { "epoch": 0.09597805280021292, "grad_norm": 0.18359375, "learning_rate": 2.3752943949105952e-05, "loss": 1.007485032081604, "step": 1172 }, { "epoch": 0.09605994533673187, "grad_norm": 0.1552734375, "learning_rate": 2.3752438658190838e-05, "loss": 0.6339713931083679, "step": 1173 }, { "epoch": 0.09614183787325083, "grad_norm": 0.2041015625, "learning_rate": 2.3751932856466336e-05, "loss": 0.8266494870185852, "step": 1174 }, { "epoch": 0.09622373040976978, "grad_norm": 0.15625, "learning_rate": 2.3751426543954424e-05, "loss": 1.0286155939102173, "step": 1175 }, { "epoch": 0.09630562294628874, "grad_norm": 0.169921875, "learning_rate": 2.375091972067712e-05, "loss": 0.9234880805015564, "step": 1176 }, { "epoch": 0.09638751548280769, "grad_norm": 0.1796875, "learning_rate": 2.3750412386656444e-05, "loss": 0.9872547388076782, "step": 1177 }, { "epoch": 0.09646940801932664, "grad_norm": 0.1572265625, "learning_rate": 2.3749904541914446e-05, "loss": 0.8976078629493713, "step": 1178 }, { "epoch": 0.0965513005558456, "grad_norm": 0.193359375, "learning_rate": 2.3749396186473205e-05, "loss": 1.085433006286621, "step": 1179 }, { "epoch": 0.09663319309236454, "grad_norm": 0.1640625, "learning_rate": 2.3748887320354813e-05, "loss": 0.7641810178756714, "step": 1180 }, { "epoch": 0.0967150856288835, "grad_norm": 0.1689453125, "learning_rate": 2.3748377943581386e-05, "loss": 0.6829699277877808, "step": 1181 }, { "epoch": 0.09679697816540245, "grad_norm": 0.2001953125, "learning_rate": 2.374786805617507e-05, "loss": 0.8693859577178955, "step": 1182 }, { "epoch": 0.0968788707019214, "grad_norm": 0.208984375, "learning_rate": 2.374735765815802e-05, "loss": 0.7369914650917053, "step": 1183 }, { "epoch": 0.09696076323844036, "grad_norm": 0.158203125, "learning_rate": 2.374684674955242e-05, "loss": 0.9452453255653381, "step": 1184 }, { "epoch": 0.09704265577495931, "grad_norm": 0.1708984375, "learning_rate": 2.3746335330380484e-05, "loss": 1.0259038209915161, "step": 1185 }, { "epoch": 0.09712454831147826, "grad_norm": 0.181640625, "learning_rate": 2.3745823400664435e-05, "loss": 0.7812591791152954, "step": 1186 }, { "epoch": 0.09720644084799722, "grad_norm": 0.1845703125, "learning_rate": 2.3745310960426517e-05, "loss": 0.6742737293243408, "step": 1187 }, { "epoch": 0.09728833338451617, "grad_norm": 0.1640625, "learning_rate": 2.3744798009689015e-05, "loss": 0.855651319026947, "step": 1188 }, { "epoch": 0.09737022592103513, "grad_norm": 0.1708984375, "learning_rate": 2.3744284548474215e-05, "loss": 0.994301974773407, "step": 1189 }, { "epoch": 0.09745211845755407, "grad_norm": 0.173828125, "learning_rate": 2.3743770576804435e-05, "loss": 0.9601473808288574, "step": 1190 }, { "epoch": 0.09753401099407302, "grad_norm": 0.17578125, "learning_rate": 2.374325609470202e-05, "loss": 0.5318287014961243, "step": 1191 }, { "epoch": 0.09761590353059198, "grad_norm": 0.220703125, "learning_rate": 2.3742741102189332e-05, "loss": 1.1253520250320435, "step": 1192 }, { "epoch": 0.09769779606711093, "grad_norm": 0.166015625, "learning_rate": 2.3742225599288746e-05, "loss": 0.7274142503738403, "step": 1193 }, { "epoch": 0.09777968860362989, "grad_norm": 0.1640625, "learning_rate": 2.3741709586022673e-05, "loss": 0.6838446855545044, "step": 1194 }, { "epoch": 0.09786158114014884, "grad_norm": 0.2109375, "learning_rate": 2.374119306241354e-05, "loss": 1.2360278367996216, "step": 1195 }, { "epoch": 0.0979434736766678, "grad_norm": 0.177734375, "learning_rate": 2.3740676028483803e-05, "loss": 0.8400553464889526, "step": 1196 }, { "epoch": 0.09802536621318675, "grad_norm": 0.19140625, "learning_rate": 2.3740158484255924e-05, "loss": 1.3896855115890503, "step": 1197 }, { "epoch": 0.0981072587497057, "grad_norm": 0.21875, "learning_rate": 2.3739640429752406e-05, "loss": 0.9722265005111694, "step": 1198 }, { "epoch": 0.09818915128622466, "grad_norm": 0.1611328125, "learning_rate": 2.3739121864995767e-05, "loss": 0.5556004047393799, "step": 1199 }, { "epoch": 0.0982710438227436, "grad_norm": 0.16796875, "learning_rate": 2.3738602790008533e-05, "loss": 0.684959888458252, "step": 1200 }, { "epoch": 0.09835293635926255, "grad_norm": 0.1982421875, "learning_rate": 2.373808320481328e-05, "loss": 0.9369453191757202, "step": 1201 }, { "epoch": 0.0984348288957815, "grad_norm": 0.1708984375, "learning_rate": 2.3737563109432584e-05, "loss": 0.9035078287124634, "step": 1202 }, { "epoch": 0.09851672143230046, "grad_norm": 0.17578125, "learning_rate": 2.373704250388905e-05, "loss": 0.9444288611412048, "step": 1203 }, { "epoch": 0.09859861396881942, "grad_norm": 0.1396484375, "learning_rate": 2.373652138820531e-05, "loss": 0.963218629360199, "step": 1204 }, { "epoch": 0.09868050650533837, "grad_norm": 0.162109375, "learning_rate": 2.373599976240401e-05, "loss": 0.8077481985092163, "step": 1205 }, { "epoch": 0.09876239904185732, "grad_norm": 0.1748046875, "learning_rate": 2.3735477626507825e-05, "loss": 0.8083269596099854, "step": 1206 }, { "epoch": 0.09884429157837628, "grad_norm": 0.1708984375, "learning_rate": 2.373495498053944e-05, "loss": 0.8624137043952942, "step": 1207 }, { "epoch": 0.09892618411489523, "grad_norm": 0.166015625, "learning_rate": 2.3734431824521585e-05, "loss": 0.7679412364959717, "step": 1208 }, { "epoch": 0.09900807665141419, "grad_norm": 0.1630859375, "learning_rate": 2.3733908158476996e-05, "loss": 0.7846046686172485, "step": 1209 }, { "epoch": 0.09908996918793314, "grad_norm": 0.177734375, "learning_rate": 2.3733383982428425e-05, "loss": 0.846065104007721, "step": 1210 }, { "epoch": 0.09917186172445208, "grad_norm": 0.1572265625, "learning_rate": 2.373285929639866e-05, "loss": 0.7402765154838562, "step": 1211 }, { "epoch": 0.09925375426097104, "grad_norm": 0.1484375, "learning_rate": 2.3732334100410507e-05, "loss": 0.6380136013031006, "step": 1212 }, { "epoch": 0.09933564679748999, "grad_norm": 0.1640625, "learning_rate": 2.373180839448679e-05, "loss": 0.767514705657959, "step": 1213 }, { "epoch": 0.09941753933400894, "grad_norm": 0.2197265625, "learning_rate": 2.3731282178650363e-05, "loss": 0.928498387336731, "step": 1214 }, { "epoch": 0.0994994318705279, "grad_norm": 0.1513671875, "learning_rate": 2.3730755452924096e-05, "loss": 0.8237026333808899, "step": 1215 }, { "epoch": 0.09958132440704685, "grad_norm": 0.16015625, "learning_rate": 2.373022821733088e-05, "loss": 0.9202016592025757, "step": 1216 }, { "epoch": 0.09966321694356581, "grad_norm": 0.1611328125, "learning_rate": 2.372970047189363e-05, "loss": 0.7350637912750244, "step": 1217 }, { "epoch": 0.09974510948008476, "grad_norm": 0.177734375, "learning_rate": 2.372917221663529e-05, "loss": 0.7856539487838745, "step": 1218 }, { "epoch": 0.09982700201660372, "grad_norm": 0.1982421875, "learning_rate": 2.3728643451578813e-05, "loss": 0.8335817456245422, "step": 1219 }, { "epoch": 0.09990889455312267, "grad_norm": 0.1494140625, "learning_rate": 2.372811417674719e-05, "loss": 0.7585546970367432, "step": 1220 }, { "epoch": 0.09999078708964161, "grad_norm": 0.20703125, "learning_rate": 2.3727584392163416e-05, "loss": 1.0586453676223755, "step": 1221 }, { "epoch": 0.10007267962616057, "grad_norm": 0.173828125, "learning_rate": 2.3727054097850523e-05, "loss": 0.8208510279655457, "step": 1222 }, { "epoch": 0.10015457216267952, "grad_norm": 0.1982421875, "learning_rate": 2.372652329383156e-05, "loss": 1.077556848526001, "step": 1223 }, { "epoch": 0.10023646469919847, "grad_norm": 0.1689453125, "learning_rate": 2.3725991980129597e-05, "loss": 1.1347525119781494, "step": 1224 }, { "epoch": 0.10031835723571743, "grad_norm": 0.20703125, "learning_rate": 2.3725460156767725e-05, "loss": 0.9661146402359009, "step": 1225 }, { "epoch": 0.10040024977223638, "grad_norm": 0.1787109375, "learning_rate": 2.372492782376906e-05, "loss": 0.8614739179611206, "step": 1226 }, { "epoch": 0.10048214230875534, "grad_norm": 0.1845703125, "learning_rate": 2.372439498115674e-05, "loss": 0.6785455346107483, "step": 1227 }, { "epoch": 0.10056403484527429, "grad_norm": 0.1650390625, "learning_rate": 2.372386162895393e-05, "loss": 0.767665445804596, "step": 1228 }, { "epoch": 0.10064592738179325, "grad_norm": 0.18359375, "learning_rate": 2.37233277671838e-05, "loss": 0.7161230444908142, "step": 1229 }, { "epoch": 0.1007278199183122, "grad_norm": 0.181640625, "learning_rate": 2.3722793395869566e-05, "loss": 1.0382130146026611, "step": 1230 }, { "epoch": 0.10080971245483115, "grad_norm": 0.1494140625, "learning_rate": 2.3722258515034443e-05, "loss": 0.7062875628471375, "step": 1231 }, { "epoch": 0.1008916049913501, "grad_norm": 0.15625, "learning_rate": 2.3721723124701688e-05, "loss": 0.8426028490066528, "step": 1232 }, { "epoch": 0.10097349752786905, "grad_norm": 0.1669921875, "learning_rate": 2.3721187224894566e-05, "loss": 0.9212101101875305, "step": 1233 }, { "epoch": 0.101055390064388, "grad_norm": 0.1650390625, "learning_rate": 2.3720650815636375e-05, "loss": 0.8535024523735046, "step": 1234 }, { "epoch": 0.10113728260090696, "grad_norm": 0.20703125, "learning_rate": 2.372011389695042e-05, "loss": 0.9604092836380005, "step": 1235 }, { "epoch": 0.10121917513742591, "grad_norm": 0.181640625, "learning_rate": 2.3719576468860048e-05, "loss": 0.9024224281311035, "step": 1236 }, { "epoch": 0.10130106767394487, "grad_norm": 0.1689453125, "learning_rate": 2.371903853138861e-05, "loss": 1.0288610458374023, "step": 1237 }, { "epoch": 0.10138296021046382, "grad_norm": 0.1591796875, "learning_rate": 2.3718500084559492e-05, "loss": 0.8394074440002441, "step": 1238 }, { "epoch": 0.10146485274698278, "grad_norm": 0.1669921875, "learning_rate": 2.3717961128396096e-05, "loss": 0.8096848726272583, "step": 1239 }, { "epoch": 0.10154674528350173, "grad_norm": 0.1396484375, "learning_rate": 2.3717421662921844e-05, "loss": 0.8051522970199585, "step": 1240 }, { "epoch": 0.10162863782002068, "grad_norm": 0.228515625, "learning_rate": 2.371688168816019e-05, "loss": 1.0528494119644165, "step": 1241 }, { "epoch": 0.10171053035653962, "grad_norm": 0.1669921875, "learning_rate": 2.3716341204134595e-05, "loss": 0.7388101816177368, "step": 1242 }, { "epoch": 0.10179242289305858, "grad_norm": 0.173828125, "learning_rate": 2.371580021086856e-05, "loss": 1.0895910263061523, "step": 1243 }, { "epoch": 0.10187431542957753, "grad_norm": 0.1669921875, "learning_rate": 2.3715258708385592e-05, "loss": 0.7570978999137878, "step": 1244 }, { "epoch": 0.10195620796609649, "grad_norm": 0.169921875, "learning_rate": 2.3714716696709226e-05, "loss": 0.8175119161605835, "step": 1245 }, { "epoch": 0.10203810050261544, "grad_norm": 0.146484375, "learning_rate": 2.3714174175863027e-05, "loss": 0.9203885197639465, "step": 1246 }, { "epoch": 0.1021199930391344, "grad_norm": 0.1845703125, "learning_rate": 2.371363114587057e-05, "loss": 0.7850099205970764, "step": 1247 }, { "epoch": 0.10220188557565335, "grad_norm": 0.15625, "learning_rate": 2.3713087606755456e-05, "loss": 0.9579271078109741, "step": 1248 }, { "epoch": 0.1022837781121723, "grad_norm": 0.177734375, "learning_rate": 2.371254355854132e-05, "loss": 0.795863687992096, "step": 1249 }, { "epoch": 0.10236567064869126, "grad_norm": 0.1630859375, "learning_rate": 2.371199900125179e-05, "loss": 0.7548266649246216, "step": 1250 }, { "epoch": 0.10244756318521021, "grad_norm": 0.16015625, "learning_rate": 2.3711453934910552e-05, "loss": 0.867897093296051, "step": 1251 }, { "epoch": 0.10252945572172915, "grad_norm": 0.173828125, "learning_rate": 2.371090835954129e-05, "loss": 0.845425546169281, "step": 1252 }, { "epoch": 0.10261134825824811, "grad_norm": 0.1748046875, "learning_rate": 2.3710362275167715e-05, "loss": 0.7724940776824951, "step": 1253 }, { "epoch": 0.10269324079476706, "grad_norm": 0.142578125, "learning_rate": 2.3709815681813562e-05, "loss": 0.6816731095314026, "step": 1254 }, { "epoch": 0.10277513333128602, "grad_norm": 0.166015625, "learning_rate": 2.370926857950259e-05, "loss": 0.802532970905304, "step": 1255 }, { "epoch": 0.10285702586780497, "grad_norm": 0.15234375, "learning_rate": 2.3708720968258582e-05, "loss": 0.8309116363525391, "step": 1256 }, { "epoch": 0.10293891840432393, "grad_norm": 0.193359375, "learning_rate": 2.3708172848105336e-05, "loss": 0.8417888879776001, "step": 1257 }, { "epoch": 0.10302081094084288, "grad_norm": 0.1591796875, "learning_rate": 2.3707624219066676e-05, "loss": 0.7619121670722961, "step": 1258 }, { "epoch": 0.10310270347736183, "grad_norm": 0.1396484375, "learning_rate": 2.3707075081166445e-05, "loss": 0.9109389781951904, "step": 1259 }, { "epoch": 0.10318459601388079, "grad_norm": 0.1953125, "learning_rate": 2.3706525434428514e-05, "loss": 0.9622306227684021, "step": 1260 }, { "epoch": 0.10326648855039974, "grad_norm": 0.1533203125, "learning_rate": 2.370597527887677e-05, "loss": 0.7471734285354614, "step": 1261 }, { "epoch": 0.1033483810869187, "grad_norm": 0.2021484375, "learning_rate": 2.370542461453513e-05, "loss": 1.0727462768554688, "step": 1262 }, { "epoch": 0.10343027362343764, "grad_norm": 0.166015625, "learning_rate": 2.3704873441427524e-05, "loss": 0.8078822493553162, "step": 1263 }, { "epoch": 0.10351216615995659, "grad_norm": 0.1748046875, "learning_rate": 2.370432175957791e-05, "loss": 0.8034855723381042, "step": 1264 }, { "epoch": 0.10359405869647555, "grad_norm": 0.29296875, "learning_rate": 2.3703769569010266e-05, "loss": 0.682559072971344, "step": 1265 }, { "epoch": 0.1036759512329945, "grad_norm": 0.212890625, "learning_rate": 2.3703216869748593e-05, "loss": 0.8307250142097473, "step": 1266 }, { "epoch": 0.10375784376951346, "grad_norm": 0.17578125, "learning_rate": 2.370266366181691e-05, "loss": 0.7775280475616455, "step": 1267 }, { "epoch": 0.10383973630603241, "grad_norm": 0.1962890625, "learning_rate": 2.3702109945239264e-05, "loss": 0.8794049024581909, "step": 1268 }, { "epoch": 0.10392162884255136, "grad_norm": 0.173828125, "learning_rate": 2.3701555720039724e-05, "loss": 0.8266291618347168, "step": 1269 }, { "epoch": 0.10400352137907032, "grad_norm": 0.19921875, "learning_rate": 2.3701000986242377e-05, "loss": 1.2334038019180298, "step": 1270 }, { "epoch": 0.10408541391558927, "grad_norm": 0.1806640625, "learning_rate": 2.3700445743871335e-05, "loss": 0.8541483283042908, "step": 1271 }, { "epoch": 0.10416730645210823, "grad_norm": 0.1474609375, "learning_rate": 2.369988999295073e-05, "loss": 0.869008481502533, "step": 1272 }, { "epoch": 0.10424919898862717, "grad_norm": 0.15625, "learning_rate": 2.369933373350472e-05, "loss": 0.8213968276977539, "step": 1273 }, { "epoch": 0.10433109152514612, "grad_norm": 0.171875, "learning_rate": 2.3698776965557477e-05, "loss": 1.0679702758789062, "step": 1274 }, { "epoch": 0.10441298406166508, "grad_norm": 0.162109375, "learning_rate": 2.3698219689133204e-05, "loss": 0.9498001933097839, "step": 1275 }, { "epoch": 0.10449487659818403, "grad_norm": 0.1708984375, "learning_rate": 2.369766190425612e-05, "loss": 0.806916356086731, "step": 1276 }, { "epoch": 0.10457676913470298, "grad_norm": 0.1796875, "learning_rate": 2.3697103610950474e-05, "loss": 0.8300641179084778, "step": 1277 }, { "epoch": 0.10465866167122194, "grad_norm": 0.1650390625, "learning_rate": 2.3696544809240522e-05, "loss": 0.7898098826408386, "step": 1278 }, { "epoch": 0.1047405542077409, "grad_norm": 0.1572265625, "learning_rate": 2.3695985499150567e-05, "loss": 0.6283451318740845, "step": 1279 }, { "epoch": 0.10482244674425985, "grad_norm": 0.1708984375, "learning_rate": 2.3695425680704904e-05, "loss": 0.8911735415458679, "step": 1280 }, { "epoch": 0.1049043392807788, "grad_norm": 0.1865234375, "learning_rate": 2.3694865353927875e-05, "loss": 0.8601976633071899, "step": 1281 }, { "epoch": 0.10498623181729776, "grad_norm": 0.1513671875, "learning_rate": 2.3694304518843827e-05, "loss": 0.7631255388259888, "step": 1282 }, { "epoch": 0.10506812435381671, "grad_norm": 0.1826171875, "learning_rate": 2.3693743175477136e-05, "loss": 0.90514075756073, "step": 1283 }, { "epoch": 0.10515001689033565, "grad_norm": 0.18359375, "learning_rate": 2.369318132385221e-05, "loss": 0.739728569984436, "step": 1284 }, { "epoch": 0.1052319094268546, "grad_norm": 0.181640625, "learning_rate": 2.369261896399346e-05, "loss": 1.0946121215820312, "step": 1285 }, { "epoch": 0.10531380196337356, "grad_norm": 0.18359375, "learning_rate": 2.3692056095925332e-05, "loss": 0.6571671366691589, "step": 1286 }, { "epoch": 0.10539569449989251, "grad_norm": 0.15234375, "learning_rate": 2.369149271967229e-05, "loss": 0.7671352624893188, "step": 1287 }, { "epoch": 0.10547758703641147, "grad_norm": 0.162109375, "learning_rate": 2.369092883525882e-05, "loss": 0.6566355228424072, "step": 1288 }, { "epoch": 0.10555947957293042, "grad_norm": 0.14453125, "learning_rate": 2.3690364442709435e-05, "loss": 0.6651175022125244, "step": 1289 }, { "epoch": 0.10564137210944938, "grad_norm": 0.171875, "learning_rate": 2.3689799542048657e-05, "loss": 0.940728485584259, "step": 1290 }, { "epoch": 0.10572326464596833, "grad_norm": 0.16796875, "learning_rate": 2.3689234133301048e-05, "loss": 0.833203911781311, "step": 1291 }, { "epoch": 0.10580515718248729, "grad_norm": 0.1728515625, "learning_rate": 2.3688668216491177e-05, "loss": 0.8607549071311951, "step": 1292 }, { "epoch": 0.10588704971900624, "grad_norm": 0.169921875, "learning_rate": 2.3688101791643644e-05, "loss": 0.9978910088539124, "step": 1293 }, { "epoch": 0.10596894225552518, "grad_norm": 0.15234375, "learning_rate": 2.368753485878307e-05, "loss": 0.6208826899528503, "step": 1294 }, { "epoch": 0.10605083479204414, "grad_norm": 0.1552734375, "learning_rate": 2.3686967417934088e-05, "loss": 0.9551267623901367, "step": 1295 }, { "epoch": 0.10613272732856309, "grad_norm": 0.1962890625, "learning_rate": 2.3686399469121373e-05, "loss": 0.8951752185821533, "step": 1296 }, { "epoch": 0.10621461986508204, "grad_norm": 0.2060546875, "learning_rate": 2.36858310123696e-05, "loss": 0.9852178692817688, "step": 1297 }, { "epoch": 0.106296512401601, "grad_norm": 0.181640625, "learning_rate": 2.368526204770348e-05, "loss": 0.9228842258453369, "step": 1298 }, { "epoch": 0.10637840493811995, "grad_norm": 0.21484375, "learning_rate": 2.3684692575147747e-05, "loss": 0.8692556619644165, "step": 1299 }, { "epoch": 0.1064602974746389, "grad_norm": 0.1552734375, "learning_rate": 2.3684122594727148e-05, "loss": 0.6508516669273376, "step": 1300 }, { "epoch": 0.10654219001115786, "grad_norm": 0.1474609375, "learning_rate": 2.3683552106466455e-05, "loss": 1.052425742149353, "step": 1301 }, { "epoch": 0.10662408254767682, "grad_norm": 0.1826171875, "learning_rate": 2.3682981110390468e-05, "loss": 0.9383931756019592, "step": 1302 }, { "epoch": 0.10670597508419577, "grad_norm": 0.1845703125, "learning_rate": 2.3682409606524003e-05, "loss": 0.6776193976402283, "step": 1303 }, { "epoch": 0.10678786762071471, "grad_norm": 0.1728515625, "learning_rate": 2.3681837594891902e-05, "loss": 0.697880744934082, "step": 1304 }, { "epoch": 0.10686976015723366, "grad_norm": 0.2001953125, "learning_rate": 2.3681265075519022e-05, "loss": 0.8639987111091614, "step": 1305 }, { "epoch": 0.10695165269375262, "grad_norm": 0.1826171875, "learning_rate": 2.3680692048430254e-05, "loss": 0.8437212109565735, "step": 1306 }, { "epoch": 0.10703354523027157, "grad_norm": 0.1826171875, "learning_rate": 2.3680118513650496e-05, "loss": 0.621090829372406, "step": 1307 }, { "epoch": 0.10711543776679053, "grad_norm": 0.162109375, "learning_rate": 2.3679544471204686e-05, "loss": 0.931085467338562, "step": 1308 }, { "epoch": 0.10719733030330948, "grad_norm": 0.1376953125, "learning_rate": 2.3678969921117766e-05, "loss": 0.8058827519416809, "step": 1309 }, { "epoch": 0.10727922283982844, "grad_norm": 0.1416015625, "learning_rate": 2.3678394863414707e-05, "loss": 0.6842740178108215, "step": 1310 }, { "epoch": 0.10736111537634739, "grad_norm": 0.1708984375, "learning_rate": 2.3677819298120514e-05, "loss": 0.8930715918540955, "step": 1311 }, { "epoch": 0.10744300791286635, "grad_norm": 0.1787109375, "learning_rate": 2.3677243225260195e-05, "loss": 0.765418529510498, "step": 1312 }, { "epoch": 0.1075249004493853, "grad_norm": 0.150390625, "learning_rate": 2.367666664485879e-05, "loss": 0.9842696189880371, "step": 1313 }, { "epoch": 0.10760679298590425, "grad_norm": 0.189453125, "learning_rate": 2.367608955694136e-05, "loss": 0.7024298906326294, "step": 1314 }, { "epoch": 0.1076886855224232, "grad_norm": 0.1708984375, "learning_rate": 2.3675511961532985e-05, "loss": 1.0522289276123047, "step": 1315 }, { "epoch": 0.10777057805894215, "grad_norm": 0.2099609375, "learning_rate": 2.3674933858658776e-05, "loss": 0.6449146270751953, "step": 1316 }, { "epoch": 0.1078524705954611, "grad_norm": 0.1875, "learning_rate": 2.367435524834385e-05, "loss": 1.0934807062149048, "step": 1317 }, { "epoch": 0.10793436313198006, "grad_norm": 0.193359375, "learning_rate": 2.367377613061337e-05, "loss": 0.9252569079399109, "step": 1318 }, { "epoch": 0.10801625566849901, "grad_norm": 0.1806640625, "learning_rate": 2.3673196505492493e-05, "loss": 0.7038392424583435, "step": 1319 }, { "epoch": 0.10809814820501797, "grad_norm": 0.17578125, "learning_rate": 2.367261637300642e-05, "loss": 0.7427034974098206, "step": 1320 }, { "epoch": 0.10818004074153692, "grad_norm": 0.158203125, "learning_rate": 2.3672035733180364e-05, "loss": 1.0501346588134766, "step": 1321 }, { "epoch": 0.10826193327805587, "grad_norm": 0.1884765625, "learning_rate": 2.367145458603956e-05, "loss": 0.9486469030380249, "step": 1322 }, { "epoch": 0.10834382581457483, "grad_norm": 0.2001953125, "learning_rate": 2.367087293160927e-05, "loss": 1.0136432647705078, "step": 1323 }, { "epoch": 0.10842571835109378, "grad_norm": 0.1884765625, "learning_rate": 2.3670290769914774e-05, "loss": 0.835607647895813, "step": 1324 }, { "epoch": 0.10850761088761272, "grad_norm": 0.1826171875, "learning_rate": 2.366970810098137e-05, "loss": 0.6189396977424622, "step": 1325 }, { "epoch": 0.10858950342413168, "grad_norm": 0.1533203125, "learning_rate": 2.366912492483439e-05, "loss": 0.5748917460441589, "step": 1326 }, { "epoch": 0.10867139596065063, "grad_norm": 0.1640625, "learning_rate": 2.366854124149918e-05, "loss": 0.8622034788131714, "step": 1327 }, { "epoch": 0.10875328849716959, "grad_norm": 0.1396484375, "learning_rate": 2.366795705100111e-05, "loss": 0.7574789524078369, "step": 1328 }, { "epoch": 0.10883518103368854, "grad_norm": 0.1650390625, "learning_rate": 2.3667372353365567e-05, "loss": 0.8984616994857788, "step": 1329 }, { "epoch": 0.1089170735702075, "grad_norm": 0.1875, "learning_rate": 2.3666787148617968e-05, "loss": 1.061842679977417, "step": 1330 }, { "epoch": 0.10899896610672645, "grad_norm": 0.1474609375, "learning_rate": 2.366620143678375e-05, "loss": 1.0523066520690918, "step": 1331 }, { "epoch": 0.1090808586432454, "grad_norm": 0.1875, "learning_rate": 2.3665615217888367e-05, "loss": 1.214852213859558, "step": 1332 }, { "epoch": 0.10916275117976436, "grad_norm": 0.1796875, "learning_rate": 2.3665028491957296e-05, "loss": 0.6970698237419128, "step": 1333 }, { "epoch": 0.10924464371628331, "grad_norm": 0.15625, "learning_rate": 2.3664441259016047e-05, "loss": 0.4861088991165161, "step": 1334 }, { "epoch": 0.10932653625280227, "grad_norm": 0.162109375, "learning_rate": 2.3663853519090136e-05, "loss": 0.9725649952888489, "step": 1335 }, { "epoch": 0.10940842878932121, "grad_norm": 0.1748046875, "learning_rate": 2.3663265272205112e-05, "loss": 1.0047404766082764, "step": 1336 }, { "epoch": 0.10949032132584016, "grad_norm": 0.1708984375, "learning_rate": 2.366267651838654e-05, "loss": 0.6580833196640015, "step": 1337 }, { "epoch": 0.10957221386235912, "grad_norm": 0.1650390625, "learning_rate": 2.3662087257660012e-05, "loss": 0.9446367025375366, "step": 1338 }, { "epoch": 0.10965410639887807, "grad_norm": 0.197265625, "learning_rate": 2.366149749005114e-05, "loss": 1.0608404874801636, "step": 1339 }, { "epoch": 0.10973599893539702, "grad_norm": 0.173828125, "learning_rate": 2.3660907215585557e-05, "loss": 1.1110117435455322, "step": 1340 }, { "epoch": 0.10981789147191598, "grad_norm": 0.173828125, "learning_rate": 2.3660316434288915e-05, "loss": 0.9198451638221741, "step": 1341 }, { "epoch": 0.10989978400843493, "grad_norm": 0.1689453125, "learning_rate": 2.36597251461869e-05, "loss": 0.7069704532623291, "step": 1342 }, { "epoch": 0.10998167654495389, "grad_norm": 0.2119140625, "learning_rate": 2.365913335130521e-05, "loss": 0.931777834892273, "step": 1343 }, { "epoch": 0.11006356908147284, "grad_norm": 0.177734375, "learning_rate": 2.3658541049669557e-05, "loss": 0.8208369016647339, "step": 1344 }, { "epoch": 0.1101454616179918, "grad_norm": 0.203125, "learning_rate": 2.3657948241305698e-05, "loss": 1.2157013416290283, "step": 1345 }, { "epoch": 0.11022735415451074, "grad_norm": 0.1650390625, "learning_rate": 2.3657354926239387e-05, "loss": 1.0143898725509644, "step": 1346 }, { "epoch": 0.11030924669102969, "grad_norm": 0.162109375, "learning_rate": 2.3656761104496422e-05, "loss": 1.0013749599456787, "step": 1347 }, { "epoch": 0.11039113922754865, "grad_norm": 0.1787109375, "learning_rate": 2.3656166776102607e-05, "loss": 0.8227722644805908, "step": 1348 }, { "epoch": 0.1104730317640676, "grad_norm": 0.1669921875, "learning_rate": 2.3655571941083777e-05, "loss": 0.7714188098907471, "step": 1349 }, { "epoch": 0.11055492430058655, "grad_norm": 0.1826171875, "learning_rate": 2.3654976599465783e-05, "loss": 1.1967484951019287, "step": 1350 }, { "epoch": 0.11063681683710551, "grad_norm": 0.16796875, "learning_rate": 2.3654380751274504e-05, "loss": 1.0309585332870483, "step": 1351 }, { "epoch": 0.11071870937362446, "grad_norm": 0.208984375, "learning_rate": 2.3653784396535835e-05, "loss": 0.666429340839386, "step": 1352 }, { "epoch": 0.11080060191014342, "grad_norm": 0.1552734375, "learning_rate": 2.3653187535275702e-05, "loss": 1.0589696168899536, "step": 1353 }, { "epoch": 0.11088249444666237, "grad_norm": 0.1689453125, "learning_rate": 2.3652590167520038e-05, "loss": 0.8405098915100098, "step": 1354 }, { "epoch": 0.11096438698318133, "grad_norm": 0.1923828125, "learning_rate": 2.3651992293294812e-05, "loss": 0.9821621179580688, "step": 1355 }, { "epoch": 0.11104627951970027, "grad_norm": 0.177734375, "learning_rate": 2.3651393912626013e-05, "loss": 1.0876461267471313, "step": 1356 }, { "epoch": 0.11112817205621922, "grad_norm": 0.150390625, "learning_rate": 2.3650795025539643e-05, "loss": 0.7555913329124451, "step": 1357 }, { "epoch": 0.11121006459273818, "grad_norm": 0.169921875, "learning_rate": 2.3650195632061737e-05, "loss": 0.8301786184310913, "step": 1358 }, { "epoch": 0.11129195712925713, "grad_norm": 0.18359375, "learning_rate": 2.3649595732218344e-05, "loss": 0.8340923190116882, "step": 1359 }, { "epoch": 0.11137384966577608, "grad_norm": 0.177734375, "learning_rate": 2.364899532603554e-05, "loss": 0.6152247786521912, "step": 1360 }, { "epoch": 0.11145574220229504, "grad_norm": 0.16796875, "learning_rate": 2.364839441353942e-05, "loss": 0.8651869893074036, "step": 1361 }, { "epoch": 0.11153763473881399, "grad_norm": 0.1611328125, "learning_rate": 2.3647792994756105e-05, "loss": 0.750312328338623, "step": 1362 }, { "epoch": 0.11161952727533295, "grad_norm": 0.171875, "learning_rate": 2.3647191069711728e-05, "loss": 0.6322492361068726, "step": 1363 }, { "epoch": 0.1117014198118519, "grad_norm": 0.23046875, "learning_rate": 2.364658863843246e-05, "loss": 0.7558035254478455, "step": 1364 }, { "epoch": 0.11178331234837086, "grad_norm": 0.2255859375, "learning_rate": 2.3645985700944483e-05, "loss": 0.9625349044799805, "step": 1365 }, { "epoch": 0.11186520488488981, "grad_norm": 0.17578125, "learning_rate": 2.3645382257273992e-05, "loss": 1.1611638069152832, "step": 1366 }, { "epoch": 0.11194709742140875, "grad_norm": 0.169921875, "learning_rate": 2.364477830744723e-05, "loss": 0.7579199075698853, "step": 1367 }, { "epoch": 0.1120289899579277, "grad_norm": 0.18359375, "learning_rate": 2.3644173851490444e-05, "loss": 0.9915123581886292, "step": 1368 }, { "epoch": 0.11211088249444666, "grad_norm": 0.169921875, "learning_rate": 2.36435688894299e-05, "loss": 0.8808772563934326, "step": 1369 }, { "epoch": 0.11219277503096561, "grad_norm": 0.1552734375, "learning_rate": 2.3642963421291897e-05, "loss": 0.7593473792076111, "step": 1370 }, { "epoch": 0.11227466756748457, "grad_norm": 0.1416015625, "learning_rate": 2.3642357447102748e-05, "loss": 0.8324772119522095, "step": 1371 }, { "epoch": 0.11235656010400352, "grad_norm": 0.2099609375, "learning_rate": 2.3641750966888795e-05, "loss": 0.93071049451828, "step": 1372 }, { "epoch": 0.11243845264052248, "grad_norm": 0.1513671875, "learning_rate": 2.36411439806764e-05, "loss": 1.1701622009277344, "step": 1373 }, { "epoch": 0.11252034517704143, "grad_norm": 0.1787109375, "learning_rate": 2.3640536488491934e-05, "loss": 1.3812800645828247, "step": 1374 }, { "epoch": 0.11260223771356039, "grad_norm": 0.16015625, "learning_rate": 2.3639928490361814e-05, "loss": 0.6663912534713745, "step": 1375 }, { "epoch": 0.11268413025007934, "grad_norm": 0.1943359375, "learning_rate": 2.3639319986312456e-05, "loss": 1.032552719116211, "step": 1376 }, { "epoch": 0.11276602278659828, "grad_norm": 0.15234375, "learning_rate": 2.3638710976370313e-05, "loss": 0.7712052464485168, "step": 1377 }, { "epoch": 0.11284791532311723, "grad_norm": 0.1669921875, "learning_rate": 2.3638101460561855e-05, "loss": 1.2373815774917603, "step": 1378 }, { "epoch": 0.11292980785963619, "grad_norm": 0.171875, "learning_rate": 2.3637491438913575e-05, "loss": 1.1107985973358154, "step": 1379 }, { "epoch": 0.11301170039615514, "grad_norm": 0.1806640625, "learning_rate": 2.3636880911451987e-05, "loss": 0.795857310295105, "step": 1380 }, { "epoch": 0.1130935929326741, "grad_norm": 0.1787109375, "learning_rate": 2.3636269878203622e-05, "loss": 0.660552978515625, "step": 1381 }, { "epoch": 0.11317548546919305, "grad_norm": 0.212890625, "learning_rate": 2.3635658339195045e-05, "loss": 0.8467762470245361, "step": 1382 }, { "epoch": 0.113257378005712, "grad_norm": 0.177734375, "learning_rate": 2.3635046294452833e-05, "loss": 0.7546990513801575, "step": 1383 }, { "epoch": 0.11333927054223096, "grad_norm": 0.134765625, "learning_rate": 2.3634433744003585e-05, "loss": 0.6038516759872437, "step": 1384 }, { "epoch": 0.11342116307874991, "grad_norm": 0.1728515625, "learning_rate": 2.363382068787393e-05, "loss": 0.716826319694519, "step": 1385 }, { "epoch": 0.11350305561526887, "grad_norm": 0.1650390625, "learning_rate": 2.3633207126090512e-05, "loss": 0.6720520257949829, "step": 1386 }, { "epoch": 0.11358494815178781, "grad_norm": 0.18359375, "learning_rate": 2.363259305868e-05, "loss": 0.9037716388702393, "step": 1387 }, { "epoch": 0.11366684068830676, "grad_norm": 0.2099609375, "learning_rate": 2.3631978485669078e-05, "loss": 0.9661259651184082, "step": 1388 }, { "epoch": 0.11374873322482572, "grad_norm": 0.1953125, "learning_rate": 2.3631363407084466e-05, "loss": 1.051241397857666, "step": 1389 }, { "epoch": 0.11383062576134467, "grad_norm": 0.1494140625, "learning_rate": 2.363074782295289e-05, "loss": 1.0183197259902954, "step": 1390 }, { "epoch": 0.11391251829786363, "grad_norm": 0.1650390625, "learning_rate": 2.3630131733301116e-05, "loss": 0.7491849660873413, "step": 1391 }, { "epoch": 0.11399441083438258, "grad_norm": 0.1708984375, "learning_rate": 2.3629515138155914e-05, "loss": 0.8053170442581177, "step": 1392 }, { "epoch": 0.11407630337090154, "grad_norm": 0.1552734375, "learning_rate": 2.3628898037544083e-05, "loss": 0.7001284956932068, "step": 1393 }, { "epoch": 0.11415819590742049, "grad_norm": 0.177734375, "learning_rate": 2.3628280431492448e-05, "loss": 0.6447217464447021, "step": 1394 }, { "epoch": 0.11424008844393944, "grad_norm": 0.1611328125, "learning_rate": 2.3627662320027855e-05, "loss": 0.7947399616241455, "step": 1395 }, { "epoch": 0.1143219809804584, "grad_norm": 0.255859375, "learning_rate": 2.3627043703177164e-05, "loss": 1.1202263832092285, "step": 1396 }, { "epoch": 0.11440387351697735, "grad_norm": 0.2109375, "learning_rate": 2.3626424580967264e-05, "loss": 0.8530346751213074, "step": 1397 }, { "epoch": 0.1144857660534963, "grad_norm": 0.185546875, "learning_rate": 2.362580495342507e-05, "loss": 0.8882307410240173, "step": 1398 }, { "epoch": 0.11456765859001525, "grad_norm": 0.1923828125, "learning_rate": 2.362518482057751e-05, "loss": 0.9850342869758606, "step": 1399 }, { "epoch": 0.1146495511265342, "grad_norm": 0.1787109375, "learning_rate": 2.3624564182451533e-05, "loss": 1.0179409980773926, "step": 1400 }, { "epoch": 0.11473144366305316, "grad_norm": 0.203125, "learning_rate": 2.3623943039074118e-05, "loss": 1.0570334196090698, "step": 1401 }, { "epoch": 0.11481333619957211, "grad_norm": 0.2021484375, "learning_rate": 2.3623321390472267e-05, "loss": 0.7722122669219971, "step": 1402 }, { "epoch": 0.11489522873609107, "grad_norm": 0.17578125, "learning_rate": 2.3622699236672993e-05, "loss": 1.1823748350143433, "step": 1403 }, { "epoch": 0.11497712127261002, "grad_norm": 0.1748046875, "learning_rate": 2.3622076577703344e-05, "loss": 0.8372278809547424, "step": 1404 }, { "epoch": 0.11505901380912897, "grad_norm": 0.1953125, "learning_rate": 2.3621453413590375e-05, "loss": 0.8335072994232178, "step": 1405 }, { "epoch": 0.11514090634564793, "grad_norm": 0.169921875, "learning_rate": 2.3620829744361175e-05, "loss": 1.0014448165893555, "step": 1406 }, { "epoch": 0.11522279888216688, "grad_norm": 0.2421875, "learning_rate": 2.362020557004285e-05, "loss": 0.9114251732826233, "step": 1407 }, { "epoch": 0.11530469141868582, "grad_norm": 0.14453125, "learning_rate": 2.3619580890662536e-05, "loss": 0.8452286124229431, "step": 1408 }, { "epoch": 0.11538658395520478, "grad_norm": 0.1572265625, "learning_rate": 2.3618955706247376e-05, "loss": 0.7000858187675476, "step": 1409 }, { "epoch": 0.11546847649172373, "grad_norm": 0.1474609375, "learning_rate": 2.3618330016824547e-05, "loss": 1.0321189165115356, "step": 1410 }, { "epoch": 0.11555036902824269, "grad_norm": 0.1494140625, "learning_rate": 2.361770382242124e-05, "loss": 0.6062124967575073, "step": 1411 }, { "epoch": 0.11563226156476164, "grad_norm": 0.1591796875, "learning_rate": 2.361707712306468e-05, "loss": 0.6719691753387451, "step": 1412 }, { "epoch": 0.1157141541012806, "grad_norm": 0.1513671875, "learning_rate": 2.3616449918782097e-05, "loss": 1.252720832824707, "step": 1413 }, { "epoch": 0.11579604663779955, "grad_norm": 0.21484375, "learning_rate": 2.361582220960076e-05, "loss": 0.6980074644088745, "step": 1414 }, { "epoch": 0.1158779391743185, "grad_norm": 0.201171875, "learning_rate": 2.3615193995547945e-05, "loss": 0.7396358847618103, "step": 1415 }, { "epoch": 0.11595983171083746, "grad_norm": 0.1455078125, "learning_rate": 2.361456527665096e-05, "loss": 0.527680516242981, "step": 1416 }, { "epoch": 0.11604172424735641, "grad_norm": 0.189453125, "learning_rate": 2.3613936052937133e-05, "loss": 1.0667195320129395, "step": 1417 }, { "epoch": 0.11612361678387537, "grad_norm": 0.1611328125, "learning_rate": 2.3613306324433805e-05, "loss": 0.7322046756744385, "step": 1418 }, { "epoch": 0.1162055093203943, "grad_norm": 0.1552734375, "learning_rate": 2.3612676091168358e-05, "loss": 0.7747830152511597, "step": 1419 }, { "epoch": 0.11628740185691326, "grad_norm": 0.1689453125, "learning_rate": 2.3612045353168175e-05, "loss": 0.8319998979568481, "step": 1420 }, { "epoch": 0.11636929439343222, "grad_norm": 0.2080078125, "learning_rate": 2.361141411046068e-05, "loss": 0.6736820340156555, "step": 1421 }, { "epoch": 0.11645118692995117, "grad_norm": 0.16015625, "learning_rate": 2.3610782363073302e-05, "loss": 0.9293571710586548, "step": 1422 }, { "epoch": 0.11653307946647012, "grad_norm": 0.1650390625, "learning_rate": 2.3610150111033496e-05, "loss": 0.8201375007629395, "step": 1423 }, { "epoch": 0.11661497200298908, "grad_norm": 0.1796875, "learning_rate": 2.360951735436875e-05, "loss": 1.0424195528030396, "step": 1424 }, { "epoch": 0.11669686453950803, "grad_norm": 0.2138671875, "learning_rate": 2.3608884093106566e-05, "loss": 0.950284481048584, "step": 1425 }, { "epoch": 0.11677875707602699, "grad_norm": 0.1669921875, "learning_rate": 2.3608250327274465e-05, "loss": 0.8470015525817871, "step": 1426 }, { "epoch": 0.11686064961254594, "grad_norm": 0.1728515625, "learning_rate": 2.3607616056899995e-05, "loss": 0.7615162134170532, "step": 1427 }, { "epoch": 0.1169425421490649, "grad_norm": 0.154296875, "learning_rate": 2.360698128201072e-05, "loss": 0.6562010049819946, "step": 1428 }, { "epoch": 0.11702443468558384, "grad_norm": 0.169921875, "learning_rate": 2.3606346002634232e-05, "loss": 0.917264461517334, "step": 1429 }, { "epoch": 0.11710632722210279, "grad_norm": 0.1728515625, "learning_rate": 2.360571021879815e-05, "loss": 0.8433929681777954, "step": 1430 }, { "epoch": 0.11718821975862175, "grad_norm": 0.1669921875, "learning_rate": 2.3605073930530096e-05, "loss": 1.0980688333511353, "step": 1431 }, { "epoch": 0.1172701122951407, "grad_norm": 0.16015625, "learning_rate": 2.360443713785773e-05, "loss": 0.9113644361495972, "step": 1432 }, { "epoch": 0.11735200483165965, "grad_norm": 0.1533203125, "learning_rate": 2.360379984080874e-05, "loss": 0.8511938452720642, "step": 1433 }, { "epoch": 0.11743389736817861, "grad_norm": 0.2021484375, "learning_rate": 2.3603162039410807e-05, "loss": 1.0827933549880981, "step": 1434 }, { "epoch": 0.11751578990469756, "grad_norm": 0.197265625, "learning_rate": 2.360252373369166e-05, "loss": 0.9591037034988403, "step": 1435 }, { "epoch": 0.11759768244121652, "grad_norm": 0.1728515625, "learning_rate": 2.3601884923679053e-05, "loss": 0.973788321018219, "step": 1436 }, { "epoch": 0.11767957497773547, "grad_norm": 0.1630859375, "learning_rate": 2.360124560940074e-05, "loss": 0.8529676795005798, "step": 1437 }, { "epoch": 0.11776146751425443, "grad_norm": 0.17578125, "learning_rate": 2.3600605790884508e-05, "loss": 1.0046101808547974, "step": 1438 }, { "epoch": 0.11784336005077337, "grad_norm": 0.2412109375, "learning_rate": 2.359996546815817e-05, "loss": 1.1490534543991089, "step": 1439 }, { "epoch": 0.11792525258729232, "grad_norm": 0.12890625, "learning_rate": 2.3599324641249557e-05, "loss": 0.5618139505386353, "step": 1440 }, { "epoch": 0.11800714512381127, "grad_norm": 0.1845703125, "learning_rate": 2.359868331018652e-05, "loss": 0.9995988607406616, "step": 1441 }, { "epoch": 0.11808903766033023, "grad_norm": 0.17578125, "learning_rate": 2.3598041474996936e-05, "loss": 0.8002933263778687, "step": 1442 }, { "epoch": 0.11817093019684918, "grad_norm": 0.1689453125, "learning_rate": 2.3597399135708698e-05, "loss": 0.6839928030967712, "step": 1443 }, { "epoch": 0.11825282273336814, "grad_norm": 0.173828125, "learning_rate": 2.359675629234973e-05, "loss": 0.6120706796646118, "step": 1444 }, { "epoch": 0.11833471526988709, "grad_norm": 0.1591796875, "learning_rate": 2.359611294494797e-05, "loss": 1.011182427406311, "step": 1445 }, { "epoch": 0.11841660780640605, "grad_norm": 0.1513671875, "learning_rate": 2.359546909353138e-05, "loss": 0.8566934466362, "step": 1446 }, { "epoch": 0.118498500342925, "grad_norm": 0.20703125, "learning_rate": 2.3594824738127945e-05, "loss": 0.9829684495925903, "step": 1447 }, { "epoch": 0.11858039287944395, "grad_norm": 0.185546875, "learning_rate": 2.359417987876567e-05, "loss": 0.8409010171890259, "step": 1448 }, { "epoch": 0.11866228541596291, "grad_norm": 0.166015625, "learning_rate": 2.3593534515472586e-05, "loss": 0.9676474332809448, "step": 1449 }, { "epoch": 0.11874417795248185, "grad_norm": 0.228515625, "learning_rate": 2.359288864827674e-05, "loss": 0.9093770384788513, "step": 1450 }, { "epoch": 0.1188260704890008, "grad_norm": 0.1689453125, "learning_rate": 2.359224227720621e-05, "loss": 0.7728246450424194, "step": 1451 }, { "epoch": 0.11890796302551976, "grad_norm": 0.177734375, "learning_rate": 2.359159540228908e-05, "loss": 0.9202204942703247, "step": 1452 }, { "epoch": 0.11898985556203871, "grad_norm": 0.1962890625, "learning_rate": 2.3590948023553475e-05, "loss": 1.076067566871643, "step": 1453 }, { "epoch": 0.11907174809855767, "grad_norm": 0.16796875, "learning_rate": 2.3590300141027527e-05, "loss": 0.8276640176773071, "step": 1454 }, { "epoch": 0.11915364063507662, "grad_norm": 0.15625, "learning_rate": 2.3589651754739402e-05, "loss": 0.7712468504905701, "step": 1455 }, { "epoch": 0.11923553317159558, "grad_norm": 0.1494140625, "learning_rate": 2.3589002864717273e-05, "loss": 0.5769593715667725, "step": 1456 }, { "epoch": 0.11931742570811453, "grad_norm": 0.1611328125, "learning_rate": 2.358835347098935e-05, "loss": 0.4050445556640625, "step": 1457 }, { "epoch": 0.11939931824463348, "grad_norm": 0.1455078125, "learning_rate": 2.3587703573583857e-05, "loss": 0.7761590480804443, "step": 1458 }, { "epoch": 0.11948121078115244, "grad_norm": 0.16015625, "learning_rate": 2.358705317252904e-05, "loss": 0.8701906800270081, "step": 1459 }, { "epoch": 0.11956310331767138, "grad_norm": 0.1630859375, "learning_rate": 2.3586402267853167e-05, "loss": 1.0924251079559326, "step": 1460 }, { "epoch": 0.11964499585419033, "grad_norm": 0.1865234375, "learning_rate": 2.3585750859584532e-05, "loss": 1.1683921813964844, "step": 1461 }, { "epoch": 0.11972688839070929, "grad_norm": 0.220703125, "learning_rate": 2.3585098947751446e-05, "loss": 0.8024615049362183, "step": 1462 }, { "epoch": 0.11980878092722824, "grad_norm": 0.1923828125, "learning_rate": 2.358444653238224e-05, "loss": 0.9534985423088074, "step": 1463 }, { "epoch": 0.1198906734637472, "grad_norm": 0.1455078125, "learning_rate": 2.3583793613505283e-05, "loss": 0.6281746029853821, "step": 1464 }, { "epoch": 0.11997256600026615, "grad_norm": 0.1806640625, "learning_rate": 2.3583140191148943e-05, "loss": 1.019468903541565, "step": 1465 }, { "epoch": 0.1200544585367851, "grad_norm": 0.1298828125, "learning_rate": 2.358248626534162e-05, "loss": 0.6304879188537598, "step": 1466 }, { "epoch": 0.12013635107330406, "grad_norm": 0.1669921875, "learning_rate": 2.358183183611174e-05, "loss": 0.7297646999359131, "step": 1467 }, { "epoch": 0.12021824360982301, "grad_norm": 0.1533203125, "learning_rate": 2.3581176903487746e-05, "loss": 0.6875846982002258, "step": 1468 }, { "epoch": 0.12030013614634197, "grad_norm": 0.1630859375, "learning_rate": 2.3580521467498107e-05, "loss": 0.662101149559021, "step": 1469 }, { "epoch": 0.12038202868286092, "grad_norm": 0.1748046875, "learning_rate": 2.3579865528171306e-05, "loss": 0.9831692576408386, "step": 1470 }, { "epoch": 0.12046392121937986, "grad_norm": 0.1513671875, "learning_rate": 2.3579209085535854e-05, "loss": 0.901297390460968, "step": 1471 }, { "epoch": 0.12054581375589882, "grad_norm": 0.1923828125, "learning_rate": 2.3578552139620286e-05, "loss": 0.7877477407455444, "step": 1472 }, { "epoch": 0.12062770629241777, "grad_norm": 0.1943359375, "learning_rate": 2.3577894690453154e-05, "loss": 0.7524563074111938, "step": 1473 }, { "epoch": 0.12070959882893673, "grad_norm": 0.2060546875, "learning_rate": 2.357723673806303e-05, "loss": 0.7991000413894653, "step": 1474 }, { "epoch": 0.12079149136545568, "grad_norm": 0.1689453125, "learning_rate": 2.3576578282478522e-05, "loss": 0.8455972671508789, "step": 1475 }, { "epoch": 0.12087338390197463, "grad_norm": 0.13671875, "learning_rate": 2.357591932372823e-05, "loss": 0.6629756689071655, "step": 1476 }, { "epoch": 0.12095527643849359, "grad_norm": 0.2314453125, "learning_rate": 2.3575259861840813e-05, "loss": 0.94914710521698, "step": 1477 }, { "epoch": 0.12103716897501254, "grad_norm": 0.1962890625, "learning_rate": 2.3574599896844926e-05, "loss": 1.237830400466919, "step": 1478 }, { "epoch": 0.1211190615115315, "grad_norm": 0.1884765625, "learning_rate": 2.3573939428769255e-05, "loss": 1.1077529191970825, "step": 1479 }, { "epoch": 0.12120095404805045, "grad_norm": 0.17578125, "learning_rate": 2.357327845764251e-05, "loss": 0.6586318016052246, "step": 1480 }, { "epoch": 0.12128284658456939, "grad_norm": 0.2158203125, "learning_rate": 2.3572616983493417e-05, "loss": 1.1534115076065063, "step": 1481 }, { "epoch": 0.12136473912108835, "grad_norm": 0.15234375, "learning_rate": 2.357195500635072e-05, "loss": 0.8297834992408752, "step": 1482 }, { "epoch": 0.1214466316576073, "grad_norm": 0.1611328125, "learning_rate": 2.35712925262432e-05, "loss": 0.5930123329162598, "step": 1483 }, { "epoch": 0.12152852419412626, "grad_norm": 0.1689453125, "learning_rate": 2.3570629543199648e-05, "loss": 0.9174181222915649, "step": 1484 }, { "epoch": 0.12161041673064521, "grad_norm": 0.2001953125, "learning_rate": 2.356996605724888e-05, "loss": 0.677579402923584, "step": 1485 }, { "epoch": 0.12169230926716416, "grad_norm": 0.1533203125, "learning_rate": 2.3569302068419734e-05, "loss": 0.5644012689590454, "step": 1486 }, { "epoch": 0.12177420180368312, "grad_norm": 0.171875, "learning_rate": 2.3568637576741072e-05, "loss": 0.8930041790008545, "step": 1487 }, { "epoch": 0.12185609434020207, "grad_norm": 0.197265625, "learning_rate": 2.356797258224177e-05, "loss": 1.1511998176574707, "step": 1488 }, { "epoch": 0.12193798687672103, "grad_norm": 0.1650390625, "learning_rate": 2.3567307084950736e-05, "loss": 1.2148027420043945, "step": 1489 }, { "epoch": 0.12201987941323998, "grad_norm": 0.142578125, "learning_rate": 2.3566641084896894e-05, "loss": 0.7131853103637695, "step": 1490 }, { "epoch": 0.12210177194975892, "grad_norm": 0.490234375, "learning_rate": 2.3565974582109186e-05, "loss": 1.1556023359298706, "step": 1491 }, { "epoch": 0.12218366448627788, "grad_norm": 0.2294921875, "learning_rate": 2.3565307576616593e-05, "loss": 1.1626863479614258, "step": 1492 }, { "epoch": 0.12226555702279683, "grad_norm": 0.1376953125, "learning_rate": 2.3564640068448096e-05, "loss": 1.0818698406219482, "step": 1493 }, { "epoch": 0.12234744955931579, "grad_norm": 0.1748046875, "learning_rate": 2.356397205763271e-05, "loss": 0.8868678212165833, "step": 1494 }, { "epoch": 0.12242934209583474, "grad_norm": 0.140625, "learning_rate": 2.356330354419947e-05, "loss": 1.000571608543396, "step": 1495 }, { "epoch": 0.1225112346323537, "grad_norm": 0.1787109375, "learning_rate": 2.3562634528177433e-05, "loss": 0.9135953783988953, "step": 1496 }, { "epoch": 0.12259312716887265, "grad_norm": 0.1279296875, "learning_rate": 2.3561965009595674e-05, "loss": 0.6179946064949036, "step": 1497 }, { "epoch": 0.1226750197053916, "grad_norm": 0.1796875, "learning_rate": 2.35612949884833e-05, "loss": 1.0819332599639893, "step": 1498 }, { "epoch": 0.12275691224191056, "grad_norm": 0.16015625, "learning_rate": 2.3560624464869427e-05, "loss": 0.7386307120323181, "step": 1499 }, { "epoch": 0.12283880477842951, "grad_norm": 0.15234375, "learning_rate": 2.3559953438783195e-05, "loss": 0.6335099935531616, "step": 1500 }, { "epoch": 0.12292069731494847, "grad_norm": 0.162109375, "learning_rate": 2.355928191025378e-05, "loss": 0.7543211579322815, "step": 1501 }, { "epoch": 0.1230025898514674, "grad_norm": 0.1396484375, "learning_rate": 2.355860987931036e-05, "loss": 0.7102307081222534, "step": 1502 }, { "epoch": 0.12308448238798636, "grad_norm": 0.1767578125, "learning_rate": 2.3557937345982152e-05, "loss": 0.8057283163070679, "step": 1503 }, { "epoch": 0.12316637492450531, "grad_norm": 0.16796875, "learning_rate": 2.355726431029838e-05, "loss": 1.0413039922714233, "step": 1504 }, { "epoch": 0.12324826746102427, "grad_norm": 0.1650390625, "learning_rate": 2.3556590772288304e-05, "loss": 0.7117171287536621, "step": 1505 }, { "epoch": 0.12333015999754322, "grad_norm": 0.16015625, "learning_rate": 2.3555916731981194e-05, "loss": 1.0694845914840698, "step": 1506 }, { "epoch": 0.12341205253406218, "grad_norm": 0.166015625, "learning_rate": 2.3555242189406344e-05, "loss": 0.8402178883552551, "step": 1507 }, { "epoch": 0.12349394507058113, "grad_norm": 0.1904296875, "learning_rate": 2.355456714459308e-05, "loss": 0.9071080684661865, "step": 1508 }, { "epoch": 0.12357583760710009, "grad_norm": 0.1552734375, "learning_rate": 2.3553891597570737e-05, "loss": 0.8198522329330444, "step": 1509 }, { "epoch": 0.12365773014361904, "grad_norm": 0.177734375, "learning_rate": 2.3553215548368677e-05, "loss": 0.4995543956756592, "step": 1510 }, { "epoch": 0.123739622680138, "grad_norm": 0.16015625, "learning_rate": 2.3552538997016284e-05, "loss": 0.8689622282981873, "step": 1511 }, { "epoch": 0.12382151521665694, "grad_norm": 0.2060546875, "learning_rate": 2.3551861943542967e-05, "loss": 1.2427992820739746, "step": 1512 }, { "epoch": 0.12390340775317589, "grad_norm": 0.169921875, "learning_rate": 2.355118438797815e-05, "loss": 0.8442955017089844, "step": 1513 }, { "epoch": 0.12398530028969484, "grad_norm": 0.162109375, "learning_rate": 2.3550506330351287e-05, "loss": 0.8814522624015808, "step": 1514 }, { "epoch": 0.1240671928262138, "grad_norm": 0.1845703125, "learning_rate": 2.354982777069184e-05, "loss": 0.9446765184402466, "step": 1515 }, { "epoch": 0.12414908536273275, "grad_norm": 0.1787109375, "learning_rate": 2.3549148709029314e-05, "loss": 0.8336957693099976, "step": 1516 }, { "epoch": 0.12423097789925171, "grad_norm": 0.203125, "learning_rate": 2.3548469145393214e-05, "loss": 1.1811503171920776, "step": 1517 }, { "epoch": 0.12431287043577066, "grad_norm": 0.1728515625, "learning_rate": 2.354778907981308e-05, "loss": 1.0255887508392334, "step": 1518 }, { "epoch": 0.12439476297228962, "grad_norm": 0.1533203125, "learning_rate": 2.3547108512318475e-05, "loss": 0.6583116054534912, "step": 1519 }, { "epoch": 0.12447665550880857, "grad_norm": 0.181640625, "learning_rate": 2.354642744293897e-05, "loss": 1.0843958854675293, "step": 1520 }, { "epoch": 0.12455854804532752, "grad_norm": 0.1796875, "learning_rate": 2.3545745871704175e-05, "loss": 0.955223798751831, "step": 1521 }, { "epoch": 0.12464044058184648, "grad_norm": 0.1826171875, "learning_rate": 2.3545063798643708e-05, "loss": 0.7975376844406128, "step": 1522 }, { "epoch": 0.12472233311836542, "grad_norm": 0.158203125, "learning_rate": 2.354438122378722e-05, "loss": 0.6243172883987427, "step": 1523 }, { "epoch": 0.12480422565488437, "grad_norm": 0.150390625, "learning_rate": 2.3543698147164376e-05, "loss": 0.5798383951187134, "step": 1524 }, { "epoch": 0.12488611819140333, "grad_norm": 0.154296875, "learning_rate": 2.3543014568804867e-05, "loss": 0.8330278396606445, "step": 1525 }, { "epoch": 0.12496801072792228, "grad_norm": 0.2041015625, "learning_rate": 2.35423304887384e-05, "loss": 0.6851657629013062, "step": 1526 }, { "epoch": 0.12504990326444124, "grad_norm": 0.166015625, "learning_rate": 2.354164590699471e-05, "loss": 1.025659203529358, "step": 1527 }, { "epoch": 0.12513179580096018, "grad_norm": 0.1640625, "learning_rate": 2.3540960823603553e-05, "loss": 0.8657687306404114, "step": 1528 }, { "epoch": 0.12521368833747915, "grad_norm": 0.15234375, "learning_rate": 2.3540275238594708e-05, "loss": 0.7867289781570435, "step": 1529 }, { "epoch": 0.12529558087399809, "grad_norm": 0.1748046875, "learning_rate": 2.353958915199797e-05, "loss": 1.0214639902114868, "step": 1530 }, { "epoch": 0.12537747341051705, "grad_norm": 0.1982421875, "learning_rate": 2.3538902563843153e-05, "loss": 1.1388967037200928, "step": 1531 }, { "epoch": 0.125459365947036, "grad_norm": 0.169921875, "learning_rate": 2.353821547416011e-05, "loss": 0.6637215614318848, "step": 1532 }, { "epoch": 0.12554125848355496, "grad_norm": 0.18359375, "learning_rate": 2.3537527882978698e-05, "loss": 0.7284050583839417, "step": 1533 }, { "epoch": 0.1256231510200739, "grad_norm": 0.1845703125, "learning_rate": 2.353683979032881e-05, "loss": 0.6624422073364258, "step": 1534 }, { "epoch": 0.12570504355659287, "grad_norm": 0.2041015625, "learning_rate": 2.3536151196240342e-05, "loss": 1.0671190023422241, "step": 1535 }, { "epoch": 0.1257869360931118, "grad_norm": 0.2138671875, "learning_rate": 2.3535462100743234e-05, "loss": 0.7406049370765686, "step": 1536 }, { "epoch": 0.12586882862963078, "grad_norm": 0.1884765625, "learning_rate": 2.353477250386743e-05, "loss": 0.8896205425262451, "step": 1537 }, { "epoch": 0.12595072116614972, "grad_norm": 0.1669921875, "learning_rate": 2.3534082405642902e-05, "loss": 0.6648122072219849, "step": 1538 }, { "epoch": 0.12603261370266866, "grad_norm": 0.189453125, "learning_rate": 2.353339180609965e-05, "loss": 1.1986068487167358, "step": 1539 }, { "epoch": 0.12611450623918763, "grad_norm": 0.16796875, "learning_rate": 2.3532700705267685e-05, "loss": 0.8703520894050598, "step": 1540 }, { "epoch": 0.12619639877570657, "grad_norm": 0.16796875, "learning_rate": 2.3532009103177052e-05, "loss": 0.8013082146644592, "step": 1541 }, { "epoch": 0.12627829131222554, "grad_norm": 0.1923828125, "learning_rate": 2.35313169998578e-05, "loss": 0.6455479860305786, "step": 1542 }, { "epoch": 0.12636018384874448, "grad_norm": 0.1611328125, "learning_rate": 2.3530624395340025e-05, "loss": 1.1092280149459839, "step": 1543 }, { "epoch": 0.12644207638526345, "grad_norm": 0.1650390625, "learning_rate": 2.3529931289653817e-05, "loss": 0.8820127248764038, "step": 1544 }, { "epoch": 0.1265239689217824, "grad_norm": 0.15625, "learning_rate": 2.3529237682829308e-05, "loss": 0.7131547927856445, "step": 1545 }, { "epoch": 0.12660586145830136, "grad_norm": 0.1923828125, "learning_rate": 2.3528543574896645e-05, "loss": 0.7285799980163574, "step": 1546 }, { "epoch": 0.1266877539948203, "grad_norm": 0.1767578125, "learning_rate": 2.3527848965885998e-05, "loss": 0.8894945383071899, "step": 1547 }, { "epoch": 0.12676964653133924, "grad_norm": 0.142578125, "learning_rate": 2.352715385582755e-05, "loss": 0.6435328722000122, "step": 1548 }, { "epoch": 0.1268515390678582, "grad_norm": 0.2001953125, "learning_rate": 2.3526458244751523e-05, "loss": 0.759693443775177, "step": 1549 }, { "epoch": 0.12693343160437714, "grad_norm": 0.185546875, "learning_rate": 2.3525762132688145e-05, "loss": 1.003343939781189, "step": 1550 }, { "epoch": 0.1270153241408961, "grad_norm": 0.1513671875, "learning_rate": 2.352506551966767e-05, "loss": 0.7220286130905151, "step": 1551 }, { "epoch": 0.12709721667741505, "grad_norm": 0.232421875, "learning_rate": 2.3524368405720383e-05, "loss": 0.8319891691207886, "step": 1552 }, { "epoch": 0.12717910921393402, "grad_norm": 0.2060546875, "learning_rate": 2.352367079087658e-05, "loss": 0.8849310874938965, "step": 1553 }, { "epoch": 0.12726100175045296, "grad_norm": 0.169921875, "learning_rate": 2.3522972675166583e-05, "loss": 0.9899094700813293, "step": 1554 }, { "epoch": 0.12734289428697193, "grad_norm": 0.201171875, "learning_rate": 2.3522274058620733e-05, "loss": 1.2100054025650024, "step": 1555 }, { "epoch": 0.12742478682349087, "grad_norm": 0.154296875, "learning_rate": 2.3521574941269395e-05, "loss": 0.9192343354225159, "step": 1556 }, { "epoch": 0.12750667936000984, "grad_norm": 0.2080078125, "learning_rate": 2.3520875323142956e-05, "loss": 0.6621610522270203, "step": 1557 }, { "epoch": 0.12758857189652878, "grad_norm": 0.1689453125, "learning_rate": 2.352017520427182e-05, "loss": 0.8334894776344299, "step": 1558 }, { "epoch": 0.12767046443304772, "grad_norm": 0.18359375, "learning_rate": 2.351947458468643e-05, "loss": 1.0577324628829956, "step": 1559 }, { "epoch": 0.1277523569695667, "grad_norm": 0.1787109375, "learning_rate": 2.3518773464417223e-05, "loss": 0.8690276145935059, "step": 1560 }, { "epoch": 0.12783424950608563, "grad_norm": 0.1708984375, "learning_rate": 2.3518071843494682e-05, "loss": 1.1270685195922852, "step": 1561 }, { "epoch": 0.1279161420426046, "grad_norm": 0.1533203125, "learning_rate": 2.3517369721949296e-05, "loss": 1.227945327758789, "step": 1562 }, { "epoch": 0.12799803457912354, "grad_norm": 0.17578125, "learning_rate": 2.351666709981159e-05, "loss": 0.7913717031478882, "step": 1563 }, { "epoch": 0.1280799271156425, "grad_norm": 0.1552734375, "learning_rate": 2.3515963977112097e-05, "loss": 0.8744975328445435, "step": 1564 }, { "epoch": 0.12816181965216145, "grad_norm": 0.212890625, "learning_rate": 2.3515260353881376e-05, "loss": 0.8258058428764343, "step": 1565 }, { "epoch": 0.12824371218868041, "grad_norm": 0.1669921875, "learning_rate": 2.3514556230150012e-05, "loss": 0.7909632325172424, "step": 1566 }, { "epoch": 0.12832560472519935, "grad_norm": 0.16796875, "learning_rate": 2.351385160594861e-05, "loss": 0.8148907423019409, "step": 1567 }, { "epoch": 0.12840749726171832, "grad_norm": 0.1728515625, "learning_rate": 2.35131464813078e-05, "loss": 0.9934531450271606, "step": 1568 }, { "epoch": 0.12848938979823726, "grad_norm": 0.173828125, "learning_rate": 2.3512440856258222e-05, "loss": 0.8845478892326355, "step": 1569 }, { "epoch": 0.1285712823347562, "grad_norm": 0.169921875, "learning_rate": 2.3511734730830544e-05, "loss": 0.9996358752250671, "step": 1570 }, { "epoch": 0.12865317487127517, "grad_norm": 0.1884765625, "learning_rate": 2.3511028105055463e-05, "loss": 0.890006422996521, "step": 1571 }, { "epoch": 0.1287350674077941, "grad_norm": 0.201171875, "learning_rate": 2.3510320978963694e-05, "loss": 0.8518478870391846, "step": 1572 }, { "epoch": 0.12881695994431308, "grad_norm": 0.16015625, "learning_rate": 2.3509613352585963e-05, "loss": 1.0976216793060303, "step": 1573 }, { "epoch": 0.12889885248083202, "grad_norm": 0.1630859375, "learning_rate": 2.3508905225953037e-05, "loss": 0.6429983973503113, "step": 1574 }, { "epoch": 0.128980745017351, "grad_norm": 0.271484375, "learning_rate": 2.3508196599095682e-05, "loss": 0.8002221584320068, "step": 1575 }, { "epoch": 0.12906263755386993, "grad_norm": 0.162109375, "learning_rate": 2.3507487472044705e-05, "loss": 0.7020823359489441, "step": 1576 }, { "epoch": 0.1291445300903889, "grad_norm": 0.1708984375, "learning_rate": 2.350677784483093e-05, "loss": 0.9947074055671692, "step": 1577 }, { "epoch": 0.12922642262690784, "grad_norm": 0.158203125, "learning_rate": 2.3506067717485192e-05, "loss": 0.9710113406181335, "step": 1578 }, { "epoch": 0.1293083151634268, "grad_norm": 0.169921875, "learning_rate": 2.3505357090038365e-05, "loss": 0.39010822772979736, "step": 1579 }, { "epoch": 0.12939020769994575, "grad_norm": 0.193359375, "learning_rate": 2.350464596252133e-05, "loss": 1.1331063508987427, "step": 1580 }, { "epoch": 0.1294721002364647, "grad_norm": 0.1728515625, "learning_rate": 2.3503934334964996e-05, "loss": 1.0230588912963867, "step": 1581 }, { "epoch": 0.12955399277298366, "grad_norm": 0.1875, "learning_rate": 2.35032222074003e-05, "loss": 0.8720943331718445, "step": 1582 }, { "epoch": 0.1296358853095026, "grad_norm": 0.203125, "learning_rate": 2.3502509579858185e-05, "loss": 0.8469151258468628, "step": 1583 }, { "epoch": 0.12971777784602156, "grad_norm": 0.185546875, "learning_rate": 2.3501796452369626e-05, "loss": 0.7596884369850159, "step": 1584 }, { "epoch": 0.1297996703825405, "grad_norm": 0.1591796875, "learning_rate": 2.3501082824965627e-05, "loss": 0.9370070695877075, "step": 1585 }, { "epoch": 0.12988156291905947, "grad_norm": 0.2099609375, "learning_rate": 2.350036869767719e-05, "loss": 0.9339257478713989, "step": 1586 }, { "epoch": 0.1299634554555784, "grad_norm": 0.181640625, "learning_rate": 2.349965407053537e-05, "loss": 0.961476743221283, "step": 1587 }, { "epoch": 0.13004534799209738, "grad_norm": 0.1591796875, "learning_rate": 2.349893894357122e-05, "loss": 0.7527668476104736, "step": 1588 }, { "epoch": 0.13012724052861632, "grad_norm": 0.203125, "learning_rate": 2.3498223316815822e-05, "loss": 1.0761133432388306, "step": 1589 }, { "epoch": 0.13020913306513526, "grad_norm": 0.1455078125, "learning_rate": 2.349750719030028e-05, "loss": 1.0514628887176514, "step": 1590 }, { "epoch": 0.13029102560165423, "grad_norm": 0.1455078125, "learning_rate": 2.349679056405572e-05, "loss": 0.8418586850166321, "step": 1591 }, { "epoch": 0.13037291813817317, "grad_norm": 0.177734375, "learning_rate": 2.349607343811329e-05, "loss": 1.0045831203460693, "step": 1592 }, { "epoch": 0.13045481067469214, "grad_norm": 0.1884765625, "learning_rate": 2.349535581250416e-05, "loss": 0.7268948554992676, "step": 1593 }, { "epoch": 0.13053670321121108, "grad_norm": 0.181640625, "learning_rate": 2.3494637687259526e-05, "loss": 1.1282360553741455, "step": 1594 }, { "epoch": 0.13061859574773005, "grad_norm": 0.1474609375, "learning_rate": 2.3493919062410588e-05, "loss": 0.6756092309951782, "step": 1595 }, { "epoch": 0.130700488284249, "grad_norm": 0.181640625, "learning_rate": 2.349319993798859e-05, "loss": 0.9167482852935791, "step": 1596 }, { "epoch": 0.13078238082076796, "grad_norm": 0.16796875, "learning_rate": 2.3492480314024785e-05, "loss": 0.6549324989318848, "step": 1597 }, { "epoch": 0.1308642733572869, "grad_norm": 0.1875, "learning_rate": 2.349176019055045e-05, "loss": 1.1990950107574463, "step": 1598 }, { "epoch": 0.13094616589380587, "grad_norm": 0.1806640625, "learning_rate": 2.3491039567596892e-05, "loss": 1.0907965898513794, "step": 1599 }, { "epoch": 0.1310280584303248, "grad_norm": 0.1513671875, "learning_rate": 2.349031844519542e-05, "loss": 0.7681134939193726, "step": 1600 }, { "epoch": 0.13110995096684375, "grad_norm": 0.150390625, "learning_rate": 2.348959682337739e-05, "loss": 0.991611123085022, "step": 1601 }, { "epoch": 0.13119184350336272, "grad_norm": 0.1474609375, "learning_rate": 2.3488874702174152e-05, "loss": 0.7560036182403564, "step": 1602 }, { "epoch": 0.13127373603988166, "grad_norm": 0.1865234375, "learning_rate": 2.3488152081617105e-05, "loss": 0.6427559852600098, "step": 1603 }, { "epoch": 0.13135562857640062, "grad_norm": 0.1962890625, "learning_rate": 2.3487428961737652e-05, "loss": 0.9197355508804321, "step": 1604 }, { "epoch": 0.13143752111291956, "grad_norm": 0.193359375, "learning_rate": 2.3486705342567222e-05, "loss": 1.0656557083129883, "step": 1605 }, { "epoch": 0.13151941364943853, "grad_norm": 0.1796875, "learning_rate": 2.3485981224137267e-05, "loss": 0.7695053219795227, "step": 1606 }, { "epoch": 0.13160130618595747, "grad_norm": 0.185546875, "learning_rate": 2.3485256606479263e-05, "loss": 0.8690658211708069, "step": 1607 }, { "epoch": 0.13168319872247644, "grad_norm": 0.17578125, "learning_rate": 2.3484531489624698e-05, "loss": 0.9840766787528992, "step": 1608 }, { "epoch": 0.13176509125899538, "grad_norm": 0.169921875, "learning_rate": 2.34838058736051e-05, "loss": 0.7369621992111206, "step": 1609 }, { "epoch": 0.13184698379551435, "grad_norm": 0.2001953125, "learning_rate": 2.3483079758451995e-05, "loss": 0.7512542009353638, "step": 1610 }, { "epoch": 0.1319288763320333, "grad_norm": 0.1806640625, "learning_rate": 2.348235314419695e-05, "loss": 0.789833128452301, "step": 1611 }, { "epoch": 0.13201076886855223, "grad_norm": 0.1484375, "learning_rate": 2.3481626030871546e-05, "loss": 0.7180898189544678, "step": 1612 }, { "epoch": 0.1320926614050712, "grad_norm": 0.19140625, "learning_rate": 2.3480898418507387e-05, "loss": 0.640289306640625, "step": 1613 }, { "epoch": 0.13217455394159014, "grad_norm": 0.1513671875, "learning_rate": 2.348017030713609e-05, "loss": 0.8316268920898438, "step": 1614 }, { "epoch": 0.1322564464781091, "grad_norm": 0.1796875, "learning_rate": 2.3479441696789312e-05, "loss": 0.8022192120552063, "step": 1615 }, { "epoch": 0.13233833901462805, "grad_norm": 0.1611328125, "learning_rate": 2.3478712587498717e-05, "loss": 0.8319164514541626, "step": 1616 }, { "epoch": 0.13242023155114702, "grad_norm": 0.1611328125, "learning_rate": 2.3477982979296e-05, "loss": 0.8941062092781067, "step": 1617 }, { "epoch": 0.13250212408766596, "grad_norm": 0.1943359375, "learning_rate": 2.347725287221286e-05, "loss": 0.8899974226951599, "step": 1618 }, { "epoch": 0.13258401662418492, "grad_norm": 0.1435546875, "learning_rate": 2.3476522266281044e-05, "loss": 0.8108752965927124, "step": 1619 }, { "epoch": 0.13266590916070387, "grad_norm": 0.1796875, "learning_rate": 2.3475791161532302e-05, "loss": 1.1495907306671143, "step": 1620 }, { "epoch": 0.1327478016972228, "grad_norm": 0.18359375, "learning_rate": 2.347505955799841e-05, "loss": 0.7595223784446716, "step": 1621 }, { "epoch": 0.13282969423374177, "grad_norm": 0.1865234375, "learning_rate": 2.3474327455711166e-05, "loss": 1.1076464653015137, "step": 1622 }, { "epoch": 0.13291158677026071, "grad_norm": 0.310546875, "learning_rate": 2.3473594854702392e-05, "loss": 1.1217509508132935, "step": 1623 }, { "epoch": 0.13299347930677968, "grad_norm": 0.177734375, "learning_rate": 2.3472861755003932e-05, "loss": 0.9420228004455566, "step": 1624 }, { "epoch": 0.13307537184329862, "grad_norm": 0.16015625, "learning_rate": 2.3472128156647646e-05, "loss": 0.9235535860061646, "step": 1625 }, { "epoch": 0.1331572643798176, "grad_norm": 0.1533203125, "learning_rate": 2.3471394059665418e-05, "loss": 0.6970453262329102, "step": 1626 }, { "epoch": 0.13323915691633653, "grad_norm": 0.177734375, "learning_rate": 2.347065946408916e-05, "loss": 0.8882928490638733, "step": 1627 }, { "epoch": 0.1333210494528555, "grad_norm": 0.2353515625, "learning_rate": 2.3469924369950794e-05, "loss": 1.0362622737884521, "step": 1628 }, { "epoch": 0.13340294198937444, "grad_norm": 0.1865234375, "learning_rate": 2.3469188777282274e-05, "loss": 1.0224241018295288, "step": 1629 }, { "epoch": 0.1334848345258934, "grad_norm": 0.1728515625, "learning_rate": 2.3468452686115575e-05, "loss": 0.7838633060455322, "step": 1630 }, { "epoch": 0.13356672706241235, "grad_norm": 0.1865234375, "learning_rate": 2.3467716096482683e-05, "loss": 1.1424044370651245, "step": 1631 }, { "epoch": 0.1336486195989313, "grad_norm": 0.1640625, "learning_rate": 2.3466979008415618e-05, "loss": 0.9850019812583923, "step": 1632 }, { "epoch": 0.13373051213545026, "grad_norm": 0.24609375, "learning_rate": 2.346624142194642e-05, "loss": 0.9179469347000122, "step": 1633 }, { "epoch": 0.1338124046719692, "grad_norm": 0.18359375, "learning_rate": 2.3465503337107142e-05, "loss": 0.7566598057746887, "step": 1634 }, { "epoch": 0.13389429720848817, "grad_norm": 0.181640625, "learning_rate": 2.3464764753929865e-05, "loss": 1.0667723417282104, "step": 1635 }, { "epoch": 0.1339761897450071, "grad_norm": 0.1748046875, "learning_rate": 2.3464025672446693e-05, "loss": 0.8108075857162476, "step": 1636 }, { "epoch": 0.13405808228152608, "grad_norm": 0.1962890625, "learning_rate": 2.3463286092689746e-05, "loss": 0.9261921644210815, "step": 1637 }, { "epoch": 0.13413997481804502, "grad_norm": 0.193359375, "learning_rate": 2.3462546014691177e-05, "loss": 0.6809033751487732, "step": 1638 }, { "epoch": 0.13422186735456398, "grad_norm": 0.177734375, "learning_rate": 2.3461805438483143e-05, "loss": 1.0155786275863647, "step": 1639 }, { "epoch": 0.13430375989108292, "grad_norm": 0.396484375, "learning_rate": 2.3461064364097837e-05, "loss": 0.8724837303161621, "step": 1640 }, { "epoch": 0.1343856524276019, "grad_norm": 0.2314453125, "learning_rate": 2.346032279156747e-05, "loss": 1.0166274309158325, "step": 1641 }, { "epoch": 0.13446754496412083, "grad_norm": 0.171875, "learning_rate": 2.3459580720924273e-05, "loss": 0.9664424061775208, "step": 1642 }, { "epoch": 0.13454943750063977, "grad_norm": 0.1806640625, "learning_rate": 2.3458838152200498e-05, "loss": 0.9100191593170166, "step": 1643 }, { "epoch": 0.13463133003715874, "grad_norm": 0.142578125, "learning_rate": 2.3458095085428427e-05, "loss": 0.658198356628418, "step": 1644 }, { "epoch": 0.13471322257367768, "grad_norm": 0.177734375, "learning_rate": 2.3457351520640344e-05, "loss": 1.0236546993255615, "step": 1645 }, { "epoch": 0.13479511511019665, "grad_norm": 0.166015625, "learning_rate": 2.3456607457868577e-05, "loss": 0.6518593430519104, "step": 1646 }, { "epoch": 0.1348770076467156, "grad_norm": 0.1611328125, "learning_rate": 2.3455862897145466e-05, "loss": 0.6210149526596069, "step": 1647 }, { "epoch": 0.13495890018323456, "grad_norm": 0.185546875, "learning_rate": 2.3455117838503368e-05, "loss": 0.9304356575012207, "step": 1648 }, { "epoch": 0.1350407927197535, "grad_norm": 0.216796875, "learning_rate": 2.3454372281974666e-05, "loss": 0.9017465710639954, "step": 1649 }, { "epoch": 0.13512268525627247, "grad_norm": 0.201171875, "learning_rate": 2.345362622759177e-05, "loss": 0.961761474609375, "step": 1650 }, { "epoch": 0.1352045777927914, "grad_norm": 0.205078125, "learning_rate": 2.3452879675387107e-05, "loss": 0.9097169637680054, "step": 1651 }, { "epoch": 0.13528647032931035, "grad_norm": 0.1728515625, "learning_rate": 2.345213262539312e-05, "loss": 0.7993609309196472, "step": 1652 }, { "epoch": 0.13536836286582932, "grad_norm": 0.17578125, "learning_rate": 2.3451385077642282e-05, "loss": 0.933063268661499, "step": 1653 }, { "epoch": 0.13545025540234826, "grad_norm": 0.1748046875, "learning_rate": 2.345063703216708e-05, "loss": 0.8823580741882324, "step": 1654 }, { "epoch": 0.13553214793886723, "grad_norm": 0.1787109375, "learning_rate": 2.3449888489000036e-05, "loss": 0.7844700813293457, "step": 1655 }, { "epoch": 0.13561404047538617, "grad_norm": 0.154296875, "learning_rate": 2.3449139448173673e-05, "loss": 0.8455648422241211, "step": 1656 }, { "epoch": 0.13569593301190513, "grad_norm": 0.14453125, "learning_rate": 2.3448389909720562e-05, "loss": 0.8499159812927246, "step": 1657 }, { "epoch": 0.13577782554842407, "grad_norm": 0.2119140625, "learning_rate": 2.3447639873673267e-05, "loss": 1.277330994606018, "step": 1658 }, { "epoch": 0.13585971808494304, "grad_norm": 0.19140625, "learning_rate": 2.3446889340064396e-05, "loss": 0.808635950088501, "step": 1659 }, { "epoch": 0.13594161062146198, "grad_norm": 0.19921875, "learning_rate": 2.3446138308926566e-05, "loss": 1.0380194187164307, "step": 1660 }, { "epoch": 0.13602350315798095, "grad_norm": 0.177734375, "learning_rate": 2.344538678029243e-05, "loss": 1.3031929731369019, "step": 1661 }, { "epoch": 0.1361053956944999, "grad_norm": 0.15625, "learning_rate": 2.344463475419463e-05, "loss": 0.9188863039016724, "step": 1662 }, { "epoch": 0.13618728823101883, "grad_norm": 0.1611328125, "learning_rate": 2.3443882230665877e-05, "loss": 0.836012065410614, "step": 1663 }, { "epoch": 0.1362691807675378, "grad_norm": 0.169921875, "learning_rate": 2.3443129209738868e-05, "loss": 1.0730888843536377, "step": 1664 }, { "epoch": 0.13635107330405674, "grad_norm": 0.181640625, "learning_rate": 2.3442375691446333e-05, "loss": 0.7292597889900208, "step": 1665 }, { "epoch": 0.1364329658405757, "grad_norm": 0.1767578125, "learning_rate": 2.344162167582102e-05, "loss": 0.8135591745376587, "step": 1666 }, { "epoch": 0.13651485837709465, "grad_norm": 0.181640625, "learning_rate": 2.3440867162895706e-05, "loss": 0.8758120536804199, "step": 1667 }, { "epoch": 0.13659675091361362, "grad_norm": 0.193359375, "learning_rate": 2.344011215270318e-05, "loss": 1.052804946899414, "step": 1668 }, { "epoch": 0.13667864345013256, "grad_norm": 0.1630859375, "learning_rate": 2.3439356645276266e-05, "loss": 0.7884631156921387, "step": 1669 }, { "epoch": 0.13676053598665153, "grad_norm": 0.1787109375, "learning_rate": 2.3438600640647796e-05, "loss": 0.9983245134353638, "step": 1670 }, { "epoch": 0.13684242852317047, "grad_norm": 0.1640625, "learning_rate": 2.3437844138850626e-05, "loss": 0.9895610213279724, "step": 1671 }, { "epoch": 0.13692432105968944, "grad_norm": 0.177734375, "learning_rate": 2.3437087139917646e-05, "loss": 1.0172215700149536, "step": 1672 }, { "epoch": 0.13700621359620838, "grad_norm": 0.177734375, "learning_rate": 2.343632964388175e-05, "loss": 0.8625044226646423, "step": 1673 }, { "epoch": 0.13708810613272732, "grad_norm": 0.169921875, "learning_rate": 2.3435571650775868e-05, "loss": 0.934339702129364, "step": 1674 }, { "epoch": 0.13716999866924628, "grad_norm": 0.1435546875, "learning_rate": 2.343481316063294e-05, "loss": 0.6338703036308289, "step": 1675 }, { "epoch": 0.13725189120576523, "grad_norm": 0.1640625, "learning_rate": 2.3434054173485935e-05, "loss": 0.9846989512443542, "step": 1676 }, { "epoch": 0.1373337837422842, "grad_norm": 0.1455078125, "learning_rate": 2.3433294689367846e-05, "loss": 0.834060788154602, "step": 1677 }, { "epoch": 0.13741567627880313, "grad_norm": 0.1669921875, "learning_rate": 2.3432534708311674e-05, "loss": 0.9962806701660156, "step": 1678 }, { "epoch": 0.1374975688153221, "grad_norm": 0.173828125, "learning_rate": 2.3431774230350458e-05, "loss": 0.7456172108650208, "step": 1679 }, { "epoch": 0.13757946135184104, "grad_norm": 0.1630859375, "learning_rate": 2.3431013255517252e-05, "loss": 1.105972170829773, "step": 1680 }, { "epoch": 0.13766135388836, "grad_norm": 0.1845703125, "learning_rate": 2.343025178384513e-05, "loss": 0.9536287188529968, "step": 1681 }, { "epoch": 0.13774324642487895, "grad_norm": 0.1845703125, "learning_rate": 2.3429489815367183e-05, "loss": 0.8721716403961182, "step": 1682 }, { "epoch": 0.13782513896139792, "grad_norm": 0.1572265625, "learning_rate": 2.3428727350116534e-05, "loss": 0.9927589893341064, "step": 1683 }, { "epoch": 0.13790703149791686, "grad_norm": 0.146484375, "learning_rate": 2.342796438812633e-05, "loss": 0.7557966709136963, "step": 1684 }, { "epoch": 0.1379889240344358, "grad_norm": 0.1513671875, "learning_rate": 2.342720092942972e-05, "loss": 0.960281491279602, "step": 1685 }, { "epoch": 0.13807081657095477, "grad_norm": 0.23828125, "learning_rate": 2.3426436974059895e-05, "loss": 1.0987720489501953, "step": 1686 }, { "epoch": 0.1381527091074737, "grad_norm": 0.1923828125, "learning_rate": 2.3425672522050056e-05, "loss": 0.8334921598434448, "step": 1687 }, { "epoch": 0.13823460164399268, "grad_norm": 0.177734375, "learning_rate": 2.3424907573433427e-05, "loss": 0.7242124676704407, "step": 1688 }, { "epoch": 0.13831649418051162, "grad_norm": 0.1611328125, "learning_rate": 2.342414212824326e-05, "loss": 0.5259591341018677, "step": 1689 }, { "epoch": 0.13839838671703059, "grad_norm": 0.13671875, "learning_rate": 2.3423376186512827e-05, "loss": 0.6272844672203064, "step": 1690 }, { "epoch": 0.13848027925354953, "grad_norm": 0.1953125, "learning_rate": 2.3422609748275414e-05, "loss": 0.8940479755401611, "step": 1691 }, { "epoch": 0.1385621717900685, "grad_norm": 0.19921875, "learning_rate": 2.3421842813564336e-05, "loss": 0.943976104259491, "step": 1692 }, { "epoch": 0.13864406432658744, "grad_norm": 0.1435546875, "learning_rate": 2.3421075382412923e-05, "loss": 0.7209089398384094, "step": 1693 }, { "epoch": 0.13872595686310638, "grad_norm": 0.1875, "learning_rate": 2.3420307454854537e-05, "loss": 0.6116722822189331, "step": 1694 }, { "epoch": 0.13880784939962534, "grad_norm": 0.1796875, "learning_rate": 2.3419539030922552e-05, "loss": 0.6617465615272522, "step": 1695 }, { "epoch": 0.13888974193614428, "grad_norm": 0.16015625, "learning_rate": 2.341877011065037e-05, "loss": 0.6561160683631897, "step": 1696 }, { "epoch": 0.13897163447266325, "grad_norm": 0.1552734375, "learning_rate": 2.3418000694071403e-05, "loss": 0.8326876759529114, "step": 1697 }, { "epoch": 0.1390535270091822, "grad_norm": 0.193359375, "learning_rate": 2.3417230781219096e-05, "loss": 1.0738868713378906, "step": 1698 }, { "epoch": 0.13913541954570116, "grad_norm": 0.1806640625, "learning_rate": 2.3416460372126923e-05, "loss": 0.8097984194755554, "step": 1699 }, { "epoch": 0.1392173120822201, "grad_norm": 0.169921875, "learning_rate": 2.3415689466828355e-05, "loss": 0.8695337772369385, "step": 1700 }, { "epoch": 0.13929920461873907, "grad_norm": 0.1767578125, "learning_rate": 2.3414918065356907e-05, "loss": 0.8281451463699341, "step": 1701 }, { "epoch": 0.139381097155258, "grad_norm": 0.1640625, "learning_rate": 2.341414616774611e-05, "loss": 0.8183345198631287, "step": 1702 }, { "epoch": 0.13946298969177698, "grad_norm": 0.1474609375, "learning_rate": 2.3413373774029503e-05, "loss": 0.7993346452713013, "step": 1703 }, { "epoch": 0.13954488222829592, "grad_norm": 0.1689453125, "learning_rate": 2.3412600884240665e-05, "loss": 0.8513830304145813, "step": 1704 }, { "epoch": 0.13962677476481486, "grad_norm": 0.1748046875, "learning_rate": 2.341182749841319e-05, "loss": 0.8494141101837158, "step": 1705 }, { "epoch": 0.13970866730133383, "grad_norm": 0.18359375, "learning_rate": 2.341105361658069e-05, "loss": 0.7807350158691406, "step": 1706 }, { "epoch": 0.13979055983785277, "grad_norm": 0.212890625, "learning_rate": 2.34102792387768e-05, "loss": 1.30607271194458, "step": 1707 }, { "epoch": 0.13987245237437174, "grad_norm": 0.2021484375, "learning_rate": 2.3409504365035175e-05, "loss": 0.9800342917442322, "step": 1708 }, { "epoch": 0.13995434491089068, "grad_norm": 0.1455078125, "learning_rate": 2.3408728995389502e-05, "loss": 0.996315598487854, "step": 1709 }, { "epoch": 0.14003623744740964, "grad_norm": 0.16796875, "learning_rate": 2.3407953129873476e-05, "loss": 0.7794680595397949, "step": 1710 }, { "epoch": 0.14011812998392859, "grad_norm": 0.205078125, "learning_rate": 2.3407176768520826e-05, "loss": 0.7499452829360962, "step": 1711 }, { "epoch": 0.14020002252044755, "grad_norm": 0.173828125, "learning_rate": 2.3406399911365283e-05, "loss": 0.5939576625823975, "step": 1712 }, { "epoch": 0.1402819150569665, "grad_norm": 0.171875, "learning_rate": 2.3405622558440628e-05, "loss": 0.9416583776473999, "step": 1713 }, { "epoch": 0.14036380759348546, "grad_norm": 0.1875, "learning_rate": 2.3404844709780633e-05, "loss": 0.7452306747436523, "step": 1714 }, { "epoch": 0.1404457001300044, "grad_norm": 0.158203125, "learning_rate": 2.340406636541912e-05, "loss": 0.5363136529922485, "step": 1715 }, { "epoch": 0.14052759266652334, "grad_norm": 0.1650390625, "learning_rate": 2.340328752538991e-05, "loss": 0.7687567472457886, "step": 1716 }, { "epoch": 0.1406094852030423, "grad_norm": 0.1787109375, "learning_rate": 2.3402508189726856e-05, "loss": 0.9629082679748535, "step": 1717 }, { "epoch": 0.14069137773956125, "grad_norm": 0.1796875, "learning_rate": 2.3401728358463835e-05, "loss": 0.8481153845787048, "step": 1718 }, { "epoch": 0.14077327027608022, "grad_norm": 0.14453125, "learning_rate": 2.340094803163474e-05, "loss": 0.6960976123809814, "step": 1719 }, { "epoch": 0.14085516281259916, "grad_norm": 0.1875, "learning_rate": 2.3400167209273486e-05, "loss": 0.7860400676727295, "step": 1720 }, { "epoch": 0.14093705534911813, "grad_norm": 0.2021484375, "learning_rate": 2.3399385891414008e-05, "loss": 0.9449538588523865, "step": 1721 }, { "epoch": 0.14101894788563707, "grad_norm": 0.1484375, "learning_rate": 2.339860407809027e-05, "loss": 0.8932732939720154, "step": 1722 }, { "epoch": 0.14110084042215604, "grad_norm": 0.1396484375, "learning_rate": 2.3397821769336254e-05, "loss": 0.7036584615707397, "step": 1723 }, { "epoch": 0.14118273295867498, "grad_norm": 0.177734375, "learning_rate": 2.3397038965185956e-05, "loss": 0.7544406056404114, "step": 1724 }, { "epoch": 0.14126462549519392, "grad_norm": 0.177734375, "learning_rate": 2.339625566567341e-05, "loss": 0.8169337511062622, "step": 1725 }, { "epoch": 0.1413465180317129, "grad_norm": 0.142578125, "learning_rate": 2.339547187083265e-05, "loss": 0.8005601763725281, "step": 1726 }, { "epoch": 0.14142841056823183, "grad_norm": 0.2236328125, "learning_rate": 2.3394687580697748e-05, "loss": 1.0061776638031006, "step": 1727 }, { "epoch": 0.1415103031047508, "grad_norm": 0.1982421875, "learning_rate": 2.339390279530279e-05, "loss": 1.0360643863677979, "step": 1728 }, { "epoch": 0.14159219564126974, "grad_norm": 0.1650390625, "learning_rate": 2.3393117514681896e-05, "loss": 0.5064568519592285, "step": 1729 }, { "epoch": 0.1416740881777887, "grad_norm": 0.2158203125, "learning_rate": 2.3392331738869182e-05, "loss": 1.1183488368988037, "step": 1730 }, { "epoch": 0.14175598071430764, "grad_norm": 0.1494140625, "learning_rate": 2.3391545467898816e-05, "loss": 0.9721965193748474, "step": 1731 }, { "epoch": 0.1418378732508266, "grad_norm": 0.201171875, "learning_rate": 2.3390758701804962e-05, "loss": 0.9406471252441406, "step": 1732 }, { "epoch": 0.14191976578734555, "grad_norm": 0.17578125, "learning_rate": 2.3389971440621815e-05, "loss": 0.7524545788764954, "step": 1733 }, { "epoch": 0.14200165832386452, "grad_norm": 0.1787109375, "learning_rate": 2.3389183684383604e-05, "loss": 0.8204085826873779, "step": 1734 }, { "epoch": 0.14208355086038346, "grad_norm": 0.1630859375, "learning_rate": 2.3388395433124562e-05, "loss": 0.5364327430725098, "step": 1735 }, { "epoch": 0.1421654433969024, "grad_norm": 0.1640625, "learning_rate": 2.3387606686878945e-05, "loss": 0.8509069085121155, "step": 1736 }, { "epoch": 0.14224733593342137, "grad_norm": 0.1474609375, "learning_rate": 2.3386817445681043e-05, "loss": 0.49502792954444885, "step": 1737 }, { "epoch": 0.1423292284699403, "grad_norm": 0.1689453125, "learning_rate": 2.3386027709565155e-05, "loss": 0.9358277916908264, "step": 1738 }, { "epoch": 0.14241112100645928, "grad_norm": 0.15625, "learning_rate": 2.3385237478565605e-05, "loss": 0.7964696884155273, "step": 1739 }, { "epoch": 0.14249301354297822, "grad_norm": 0.1552734375, "learning_rate": 2.3384446752716742e-05, "loss": 0.9974815845489502, "step": 1740 }, { "epoch": 0.1425749060794972, "grad_norm": 0.154296875, "learning_rate": 2.3383655532052936e-05, "loss": 0.7631921768188477, "step": 1741 }, { "epoch": 0.14265679861601613, "grad_norm": 0.2119140625, "learning_rate": 2.3382863816608576e-05, "loss": 0.6960386037826538, "step": 1742 }, { "epoch": 0.1427386911525351, "grad_norm": 0.1748046875, "learning_rate": 2.3382071606418065e-05, "loss": 0.689273476600647, "step": 1743 }, { "epoch": 0.14282058368905404, "grad_norm": 0.1865234375, "learning_rate": 2.338127890151585e-05, "loss": 0.9112082123756409, "step": 1744 }, { "epoch": 0.142902476225573, "grad_norm": 0.203125, "learning_rate": 2.3380485701936376e-05, "loss": 1.1433637142181396, "step": 1745 }, { "epoch": 0.14298436876209195, "grad_norm": 0.2001953125, "learning_rate": 2.3379692007714123e-05, "loss": 1.0371371507644653, "step": 1746 }, { "epoch": 0.1430662612986109, "grad_norm": 0.162109375, "learning_rate": 2.337889781888358e-05, "loss": 1.0123802423477173, "step": 1747 }, { "epoch": 0.14314815383512985, "grad_norm": 0.1767578125, "learning_rate": 2.3378103135479275e-05, "loss": 1.2243375778198242, "step": 1748 }, { "epoch": 0.1432300463716488, "grad_norm": 0.1640625, "learning_rate": 2.3377307957535744e-05, "loss": 0.7974395751953125, "step": 1749 }, { "epoch": 0.14331193890816776, "grad_norm": 0.1767578125, "learning_rate": 2.337651228508755e-05, "loss": 0.8220515251159668, "step": 1750 }, { "epoch": 0.1433938314446867, "grad_norm": 0.173828125, "learning_rate": 2.3375716118169276e-05, "loss": 0.5945915579795837, "step": 1751 }, { "epoch": 0.14347572398120567, "grad_norm": 0.1748046875, "learning_rate": 2.3374919456815525e-05, "loss": 1.0424771308898926, "step": 1752 }, { "epoch": 0.1435576165177246, "grad_norm": 0.1435546875, "learning_rate": 2.3374122301060924e-05, "loss": 0.6379466652870178, "step": 1753 }, { "epoch": 0.14363950905424358, "grad_norm": 0.1591796875, "learning_rate": 2.337332465094012e-05, "loss": 0.9607183933258057, "step": 1754 }, { "epoch": 0.14372140159076252, "grad_norm": 0.16796875, "learning_rate": 2.337252650648779e-05, "loss": 0.9077478647232056, "step": 1755 }, { "epoch": 0.14380329412728146, "grad_norm": 0.1767578125, "learning_rate": 2.3371727867738612e-05, "loss": 0.8585506677627563, "step": 1756 }, { "epoch": 0.14388518666380043, "grad_norm": 0.171875, "learning_rate": 2.337092873472731e-05, "loss": 1.0332348346710205, "step": 1757 }, { "epoch": 0.14396707920031937, "grad_norm": 0.189453125, "learning_rate": 2.3370129107488604e-05, "loss": 0.8863798975944519, "step": 1758 }, { "epoch": 0.14404897173683834, "grad_norm": 0.1396484375, "learning_rate": 2.336932898605726e-05, "loss": 0.9455370903015137, "step": 1759 }, { "epoch": 0.14413086427335728, "grad_norm": 0.1689453125, "learning_rate": 2.3368528370468055e-05, "loss": 0.8861120343208313, "step": 1760 }, { "epoch": 0.14421275680987625, "grad_norm": 0.22265625, "learning_rate": 2.336772726075578e-05, "loss": 1.1002259254455566, "step": 1761 }, { "epoch": 0.1442946493463952, "grad_norm": 0.1611328125, "learning_rate": 2.3366925656955257e-05, "loss": 0.558000922203064, "step": 1762 }, { "epoch": 0.14437654188291416, "grad_norm": 0.1494140625, "learning_rate": 2.336612355910133e-05, "loss": 0.7646887898445129, "step": 1763 }, { "epoch": 0.1444584344194331, "grad_norm": 0.2099609375, "learning_rate": 2.336532096722886e-05, "loss": 1.1245468854904175, "step": 1764 }, { "epoch": 0.14454032695595206, "grad_norm": 0.197265625, "learning_rate": 2.336451788137273e-05, "loss": 1.103338599205017, "step": 1765 }, { "epoch": 0.144622219492471, "grad_norm": 0.1796875, "learning_rate": 2.3363714301567846e-05, "loss": 0.978534996509552, "step": 1766 }, { "epoch": 0.14470411202898995, "grad_norm": 0.189453125, "learning_rate": 2.3362910227849136e-05, "loss": 0.890990138053894, "step": 1767 }, { "epoch": 0.1447860045655089, "grad_norm": 0.16796875, "learning_rate": 2.3362105660251545e-05, "loss": 0.8109412789344788, "step": 1768 }, { "epoch": 0.14486789710202785, "grad_norm": 0.166015625, "learning_rate": 2.3361300598810043e-05, "loss": 0.8064419031143188, "step": 1769 }, { "epoch": 0.14494978963854682, "grad_norm": 0.1669921875, "learning_rate": 2.3360495043559625e-05, "loss": 0.5979679822921753, "step": 1770 }, { "epoch": 0.14503168217506576, "grad_norm": 0.1416015625, "learning_rate": 2.3359688994535302e-05, "loss": 0.6205583214759827, "step": 1771 }, { "epoch": 0.14511357471158473, "grad_norm": 0.1611328125, "learning_rate": 2.335888245177211e-05, "loss": 0.8708716630935669, "step": 1772 }, { "epoch": 0.14519546724810367, "grad_norm": 0.203125, "learning_rate": 2.33580754153051e-05, "loss": 0.9297593832015991, "step": 1773 }, { "epoch": 0.14527735978462264, "grad_norm": 0.177734375, "learning_rate": 2.3357267885169356e-05, "loss": 0.9373528361320496, "step": 1774 }, { "epoch": 0.14535925232114158, "grad_norm": 0.181640625, "learning_rate": 2.335645986139997e-05, "loss": 0.5473214983940125, "step": 1775 }, { "epoch": 0.14544114485766055, "grad_norm": 0.173828125, "learning_rate": 2.3355651344032064e-05, "loss": 0.8224920034408569, "step": 1776 }, { "epoch": 0.1455230373941795, "grad_norm": 0.1640625, "learning_rate": 2.335484233310078e-05, "loss": 0.539886474609375, "step": 1777 }, { "epoch": 0.14560492993069843, "grad_norm": 0.177734375, "learning_rate": 2.335403282864128e-05, "loss": 0.6082117557525635, "step": 1778 }, { "epoch": 0.1456868224672174, "grad_norm": 0.19921875, "learning_rate": 2.3353222830688754e-05, "loss": 0.975936770439148, "step": 1779 }, { "epoch": 0.14576871500373634, "grad_norm": 0.1357421875, "learning_rate": 2.33524123392784e-05, "loss": 0.626113772392273, "step": 1780 }, { "epoch": 0.1458506075402553, "grad_norm": 0.228515625, "learning_rate": 2.3351601354445448e-05, "loss": 0.8247703909873962, "step": 1781 }, { "epoch": 0.14593250007677425, "grad_norm": 0.1611328125, "learning_rate": 2.335078987622515e-05, "loss": 0.652069628238678, "step": 1782 }, { "epoch": 0.14601439261329321, "grad_norm": 0.1591796875, "learning_rate": 2.334997790465277e-05, "loss": 0.8391812443733215, "step": 1783 }, { "epoch": 0.14609628514981216, "grad_norm": 0.1630859375, "learning_rate": 2.3349165439763607e-05, "loss": 0.991180419921875, "step": 1784 }, { "epoch": 0.14617817768633112, "grad_norm": 0.1669921875, "learning_rate": 2.3348352481592966e-05, "loss": 0.6333591938018799, "step": 1785 }, { "epoch": 0.14626007022285006, "grad_norm": 0.162109375, "learning_rate": 2.3347539030176185e-05, "loss": 1.0944880247116089, "step": 1786 }, { "epoch": 0.146341962759369, "grad_norm": 0.154296875, "learning_rate": 2.3346725085548626e-05, "loss": 0.6372475624084473, "step": 1787 }, { "epoch": 0.14642385529588797, "grad_norm": 0.1572265625, "learning_rate": 2.3345910647745658e-05, "loss": 0.8103689551353455, "step": 1788 }, { "epoch": 0.1465057478324069, "grad_norm": 0.1923828125, "learning_rate": 2.3345095716802686e-05, "loss": 0.9788298606872559, "step": 1789 }, { "epoch": 0.14658764036892588, "grad_norm": 0.2099609375, "learning_rate": 2.334428029275512e-05, "loss": 0.9469529390335083, "step": 1790 }, { "epoch": 0.14666953290544482, "grad_norm": 0.1953125, "learning_rate": 2.3343464375638417e-05, "loss": 1.1397786140441895, "step": 1791 }, { "epoch": 0.1467514254419638, "grad_norm": 0.1552734375, "learning_rate": 2.334264796548803e-05, "loss": 0.8082143068313599, "step": 1792 }, { "epoch": 0.14683331797848273, "grad_norm": 0.166015625, "learning_rate": 2.3341831062339442e-05, "loss": 1.1135767698287964, "step": 1793 }, { "epoch": 0.1469152105150017, "grad_norm": 0.18359375, "learning_rate": 2.3341013666228165e-05, "loss": 0.7053612470626831, "step": 1794 }, { "epoch": 0.14699710305152064, "grad_norm": 0.150390625, "learning_rate": 2.334019577718972e-05, "loss": 0.8920732736587524, "step": 1795 }, { "epoch": 0.1470789955880396, "grad_norm": 0.150390625, "learning_rate": 2.3339377395259664e-05, "loss": 0.9755126237869263, "step": 1796 }, { "epoch": 0.14716088812455855, "grad_norm": 0.162109375, "learning_rate": 2.3338558520473562e-05, "loss": 0.8654794692993164, "step": 1797 }, { "epoch": 0.1472427806610775, "grad_norm": 0.1962890625, "learning_rate": 2.333773915286701e-05, "loss": 0.8878124952316284, "step": 1798 }, { "epoch": 0.14732467319759646, "grad_norm": 0.33984375, "learning_rate": 2.333691929247561e-05, "loss": 0.8451465964317322, "step": 1799 }, { "epoch": 0.1474065657341154, "grad_norm": 0.1767578125, "learning_rate": 2.333609893933501e-05, "loss": 0.9521805644035339, "step": 1800 }, { "epoch": 0.14748845827063436, "grad_norm": 0.1796875, "learning_rate": 2.333527809348086e-05, "loss": 0.8027971386909485, "step": 1801 }, { "epoch": 0.1475703508071533, "grad_norm": 0.1708984375, "learning_rate": 2.3334456754948836e-05, "loss": 0.8663756251335144, "step": 1802 }, { "epoch": 0.14765224334367227, "grad_norm": 0.1650390625, "learning_rate": 2.3333634923774636e-05, "loss": 0.9972551465034485, "step": 1803 }, { "epoch": 0.14773413588019121, "grad_norm": 0.1640625, "learning_rate": 2.3332812599993986e-05, "loss": 1.2096611261367798, "step": 1804 }, { "epoch": 0.14781602841671018, "grad_norm": 0.1865234375, "learning_rate": 2.3331989783642624e-05, "loss": 1.4040031433105469, "step": 1805 }, { "epoch": 0.14789792095322912, "grad_norm": 0.1552734375, "learning_rate": 2.333116647475631e-05, "loss": 0.7512882351875305, "step": 1806 }, { "epoch": 0.1479798134897481, "grad_norm": 0.19140625, "learning_rate": 2.3330342673370835e-05, "loss": 0.9009363055229187, "step": 1807 }, { "epoch": 0.14806170602626703, "grad_norm": 0.16015625, "learning_rate": 2.3329518379521996e-05, "loss": 0.9386667013168335, "step": 1808 }, { "epoch": 0.14814359856278597, "grad_norm": 0.1806640625, "learning_rate": 2.3328693593245627e-05, "loss": 0.7612395286560059, "step": 1809 }, { "epoch": 0.14822549109930494, "grad_norm": 0.1396484375, "learning_rate": 2.3327868314577577e-05, "loss": 0.6654129028320312, "step": 1810 }, { "epoch": 0.14830738363582388, "grad_norm": 0.234375, "learning_rate": 2.3327042543553714e-05, "loss": 0.7936034798622131, "step": 1811 }, { "epoch": 0.14838927617234285, "grad_norm": 0.21484375, "learning_rate": 2.332621628020993e-05, "loss": 1.1812770366668701, "step": 1812 }, { "epoch": 0.1484711687088618, "grad_norm": 0.169921875, "learning_rate": 2.3325389524582136e-05, "loss": 0.7320436239242554, "step": 1813 }, { "epoch": 0.14855306124538076, "grad_norm": 0.16015625, "learning_rate": 2.3324562276706266e-05, "loss": 0.9512939453125, "step": 1814 }, { "epoch": 0.1486349537818997, "grad_norm": 0.1884765625, "learning_rate": 2.3323734536618277e-05, "loss": 0.6637625098228455, "step": 1815 }, { "epoch": 0.14871684631841867, "grad_norm": 0.193359375, "learning_rate": 2.332290630435415e-05, "loss": 0.9432358741760254, "step": 1816 }, { "epoch": 0.1487987388549376, "grad_norm": 0.1826171875, "learning_rate": 2.3322077579949875e-05, "loss": 0.852164089679718, "step": 1817 }, { "epoch": 0.14888063139145657, "grad_norm": 0.154296875, "learning_rate": 2.332124836344148e-05, "loss": 0.9127488136291504, "step": 1818 }, { "epoch": 0.14896252392797552, "grad_norm": 0.2001953125, "learning_rate": 2.3320418654865e-05, "loss": 0.9190155267715454, "step": 1819 }, { "epoch": 0.14904441646449446, "grad_norm": 0.2041015625, "learning_rate": 2.33195884542565e-05, "loss": 0.6365010738372803, "step": 1820 }, { "epoch": 0.14912630900101342, "grad_norm": 0.197265625, "learning_rate": 2.3318757761652064e-05, "loss": 1.0945080518722534, "step": 1821 }, { "epoch": 0.14920820153753236, "grad_norm": 0.177734375, "learning_rate": 2.3317926577087796e-05, "loss": 0.8778930902481079, "step": 1822 }, { "epoch": 0.14929009407405133, "grad_norm": 0.1845703125, "learning_rate": 2.3317094900599823e-05, "loss": 0.8118739724159241, "step": 1823 }, { "epoch": 0.14937198661057027, "grad_norm": 0.2001953125, "learning_rate": 2.3316262732224297e-05, "loss": 0.9301413893699646, "step": 1824 }, { "epoch": 0.14945387914708924, "grad_norm": 0.1845703125, "learning_rate": 2.3315430071997386e-05, "loss": 0.9562491774559021, "step": 1825 }, { "epoch": 0.14953577168360818, "grad_norm": 0.1611328125, "learning_rate": 2.3314596919955275e-05, "loss": 0.9944878816604614, "step": 1826 }, { "epoch": 0.14961766422012715, "grad_norm": 0.1611328125, "learning_rate": 2.3313763276134183e-05, "loss": 0.6779910922050476, "step": 1827 }, { "epoch": 0.1496995567566461, "grad_norm": 0.1826171875, "learning_rate": 2.3312929140570336e-05, "loss": 0.6554465293884277, "step": 1828 }, { "epoch": 0.14978144929316503, "grad_norm": 0.1748046875, "learning_rate": 2.3312094513299998e-05, "loss": 0.9440432190895081, "step": 1829 }, { "epoch": 0.149863341829684, "grad_norm": 0.154296875, "learning_rate": 2.3311259394359444e-05, "loss": 0.8022931814193726, "step": 1830 }, { "epoch": 0.14994523436620294, "grad_norm": 0.2060546875, "learning_rate": 2.3310423783784966e-05, "loss": 0.9430667161941528, "step": 1831 }, { "epoch": 0.1500271269027219, "grad_norm": 0.1484375, "learning_rate": 2.3309587681612887e-05, "loss": 0.9594816565513611, "step": 1832 }, { "epoch": 0.15010901943924085, "grad_norm": 0.1796875, "learning_rate": 2.330875108787955e-05, "loss": 0.988732099533081, "step": 1833 }, { "epoch": 0.15019091197575982, "grad_norm": 0.146484375, "learning_rate": 2.3307914002621305e-05, "loss": 0.845299243927002, "step": 1834 }, { "epoch": 0.15027280451227876, "grad_norm": 0.1767578125, "learning_rate": 2.3307076425874546e-05, "loss": 0.7803511023521423, "step": 1835 }, { "epoch": 0.15035469704879773, "grad_norm": 0.1630859375, "learning_rate": 2.330623835767568e-05, "loss": 0.8433563113212585, "step": 1836 }, { "epoch": 0.15043658958531667, "grad_norm": 0.1943359375, "learning_rate": 2.3305399798061124e-05, "loss": 1.142472505569458, "step": 1837 }, { "epoch": 0.15051848212183563, "grad_norm": 0.1884765625, "learning_rate": 2.3304560747067333e-05, "loss": 0.8218322992324829, "step": 1838 }, { "epoch": 0.15060037465835457, "grad_norm": 0.1767578125, "learning_rate": 2.3303721204730766e-05, "loss": 0.9977097511291504, "step": 1839 }, { "epoch": 0.15068226719487351, "grad_norm": 0.14453125, "learning_rate": 2.3302881171087924e-05, "loss": 0.8673079013824463, "step": 1840 }, { "epoch": 0.15076415973139248, "grad_norm": 0.1640625, "learning_rate": 2.330204064617531e-05, "loss": 0.9375929832458496, "step": 1841 }, { "epoch": 0.15084605226791142, "grad_norm": 0.16796875, "learning_rate": 2.330119963002946e-05, "loss": 0.613758385181427, "step": 1842 }, { "epoch": 0.1509279448044304, "grad_norm": 0.1650390625, "learning_rate": 2.3300358122686932e-05, "loss": 0.8373093008995056, "step": 1843 }, { "epoch": 0.15100983734094933, "grad_norm": 0.5390625, "learning_rate": 2.3299516124184293e-05, "loss": 0.7188383936882019, "step": 1844 }, { "epoch": 0.1510917298774683, "grad_norm": 0.1962890625, "learning_rate": 2.3298673634558145e-05, "loss": 0.8333261609077454, "step": 1845 }, { "epoch": 0.15117362241398724, "grad_norm": 0.1708984375, "learning_rate": 2.3297830653845103e-05, "loss": 0.7658025026321411, "step": 1846 }, { "epoch": 0.1512555149505062, "grad_norm": 0.1611328125, "learning_rate": 2.3296987182081813e-05, "loss": 0.8517516851425171, "step": 1847 }, { "epoch": 0.15133740748702515, "grad_norm": 0.2060546875, "learning_rate": 2.3296143219304927e-05, "loss": 0.8477591872215271, "step": 1848 }, { "epoch": 0.15141930002354412, "grad_norm": 0.1494140625, "learning_rate": 2.3295298765551134e-05, "loss": 0.8994050025939941, "step": 1849 }, { "epoch": 0.15150119256006306, "grad_norm": 0.171875, "learning_rate": 2.3294453820857136e-05, "loss": 0.7842777371406555, "step": 1850 }, { "epoch": 0.151583085096582, "grad_norm": 0.197265625, "learning_rate": 2.3293608385259653e-05, "loss": 0.8934636116027832, "step": 1851 }, { "epoch": 0.15166497763310097, "grad_norm": 0.1796875, "learning_rate": 2.3292762458795438e-05, "loss": 0.7866275310516357, "step": 1852 }, { "epoch": 0.1517468701696199, "grad_norm": 0.2021484375, "learning_rate": 2.329191604150125e-05, "loss": 0.9011574983596802, "step": 1853 }, { "epoch": 0.15182876270613888, "grad_norm": 0.15625, "learning_rate": 2.3291069133413886e-05, "loss": 0.8706033825874329, "step": 1854 }, { "epoch": 0.15191065524265782, "grad_norm": 0.21484375, "learning_rate": 2.329022173457015e-05, "loss": 1.3024297952651978, "step": 1855 }, { "epoch": 0.15199254777917678, "grad_norm": 0.1904296875, "learning_rate": 2.3289373845006882e-05, "loss": 0.9255737066268921, "step": 1856 }, { "epoch": 0.15207444031569572, "grad_norm": 0.177734375, "learning_rate": 2.3288525464760928e-05, "loss": 0.7151395082473755, "step": 1857 }, { "epoch": 0.1521563328522147, "grad_norm": 0.1689453125, "learning_rate": 2.328767659386916e-05, "loss": 0.8222572803497314, "step": 1858 }, { "epoch": 0.15223822538873363, "grad_norm": 0.173828125, "learning_rate": 2.328682723236848e-05, "loss": 0.912538468837738, "step": 1859 }, { "epoch": 0.15232011792525257, "grad_norm": 0.1689453125, "learning_rate": 2.32859773802958e-05, "loss": 1.010779857635498, "step": 1860 }, { "epoch": 0.15240201046177154, "grad_norm": 0.162109375, "learning_rate": 2.328512703768806e-05, "loss": 0.8114819526672363, "step": 1861 }, { "epoch": 0.15248390299829048, "grad_norm": 0.1689453125, "learning_rate": 2.328427620458222e-05, "loss": 0.6525090932846069, "step": 1862 }, { "epoch": 0.15256579553480945, "grad_norm": 0.1904296875, "learning_rate": 2.3283424881015256e-05, "loss": 1.1830549240112305, "step": 1863 }, { "epoch": 0.1526476880713284, "grad_norm": 0.1591796875, "learning_rate": 2.328257306702417e-05, "loss": 0.8467474579811096, "step": 1864 }, { "epoch": 0.15272958060784736, "grad_norm": 0.212890625, "learning_rate": 2.3281720762646e-05, "loss": 1.3070322275161743, "step": 1865 }, { "epoch": 0.1528114731443663, "grad_norm": 0.181640625, "learning_rate": 2.3280867967917775e-05, "loss": 0.8158149123191833, "step": 1866 }, { "epoch": 0.15289336568088527, "grad_norm": 0.1904296875, "learning_rate": 2.3280014682876562e-05, "loss": 1.0151132345199585, "step": 1867 }, { "epoch": 0.1529752582174042, "grad_norm": 0.173828125, "learning_rate": 2.3279160907559458e-05, "loss": 0.8197196125984192, "step": 1868 }, { "epoch": 0.15305715075392318, "grad_norm": 0.134765625, "learning_rate": 2.327830664200356e-05, "loss": 0.6891666650772095, "step": 1869 }, { "epoch": 0.15313904329044212, "grad_norm": 0.169921875, "learning_rate": 2.3277451886246007e-05, "loss": 0.8846073746681213, "step": 1870 }, { "epoch": 0.15322093582696106, "grad_norm": 0.16796875, "learning_rate": 2.3276596640323944e-05, "loss": 0.8270590305328369, "step": 1871 }, { "epoch": 0.15330282836348003, "grad_norm": 0.16796875, "learning_rate": 2.3275740904274547e-05, "loss": 0.9666038155555725, "step": 1872 }, { "epoch": 0.15338472089999897, "grad_norm": 0.1650390625, "learning_rate": 2.3274884678135007e-05, "loss": 0.5371145009994507, "step": 1873 }, { "epoch": 0.15346661343651793, "grad_norm": 0.1630859375, "learning_rate": 2.3274027961942546e-05, "loss": 0.48053568601608276, "step": 1874 }, { "epoch": 0.15354850597303688, "grad_norm": 0.1728515625, "learning_rate": 2.327317075573439e-05, "loss": 0.8310407400131226, "step": 1875 }, { "epoch": 0.15363039850955584, "grad_norm": 0.185546875, "learning_rate": 2.3272313059547805e-05, "loss": 0.9808354377746582, "step": 1876 }, { "epoch": 0.15371229104607478, "grad_norm": 0.1689453125, "learning_rate": 2.327145487342007e-05, "loss": 0.8298897743225098, "step": 1877 }, { "epoch": 0.15379418358259375, "grad_norm": 0.15625, "learning_rate": 2.327059619738848e-05, "loss": 0.7051181793212891, "step": 1878 }, { "epoch": 0.1538760761191127, "grad_norm": 0.1533203125, "learning_rate": 2.3269737031490357e-05, "loss": 0.9141343832015991, "step": 1879 }, { "epoch": 0.15395796865563166, "grad_norm": 0.15234375, "learning_rate": 2.3268877375763047e-05, "loss": 0.7681975364685059, "step": 1880 }, { "epoch": 0.1540398611921506, "grad_norm": 0.1513671875, "learning_rate": 2.3268017230243916e-05, "loss": 0.9187538027763367, "step": 1881 }, { "epoch": 0.15412175372866954, "grad_norm": 0.1689453125, "learning_rate": 2.3267156594970344e-05, "loss": 0.8955500721931458, "step": 1882 }, { "epoch": 0.1542036462651885, "grad_norm": 0.2138671875, "learning_rate": 2.3266295469979742e-05, "loss": 0.7968420386314392, "step": 1883 }, { "epoch": 0.15428553880170745, "grad_norm": 0.13671875, "learning_rate": 2.3265433855309536e-05, "loss": 0.7414544820785522, "step": 1884 }, { "epoch": 0.15436743133822642, "grad_norm": 0.158203125, "learning_rate": 2.3264571750997173e-05, "loss": 0.8185932636260986, "step": 1885 }, { "epoch": 0.15444932387474536, "grad_norm": 0.16015625, "learning_rate": 2.326370915708013e-05, "loss": 1.0999760627746582, "step": 1886 }, { "epoch": 0.15453121641126433, "grad_norm": 0.1826171875, "learning_rate": 2.3262846073595894e-05, "loss": 1.0285556316375732, "step": 1887 }, { "epoch": 0.15461310894778327, "grad_norm": 0.205078125, "learning_rate": 2.3261982500581977e-05, "loss": 0.9064048528671265, "step": 1888 }, { "epoch": 0.15469500148430224, "grad_norm": 0.154296875, "learning_rate": 2.326111843807592e-05, "loss": 0.8146162629127502, "step": 1889 }, { "epoch": 0.15477689402082118, "grad_norm": 0.1455078125, "learning_rate": 2.326025388611527e-05, "loss": 0.4510672390460968, "step": 1890 }, { "epoch": 0.15485878655734012, "grad_norm": 0.1728515625, "learning_rate": 2.325938884473761e-05, "loss": 0.7402224540710449, "step": 1891 }, { "epoch": 0.15494067909385908, "grad_norm": 0.16015625, "learning_rate": 2.325852331398054e-05, "loss": 0.5941665768623352, "step": 1892 }, { "epoch": 0.15502257163037803, "grad_norm": 0.1640625, "learning_rate": 2.3257657293881674e-05, "loss": 0.8247563242912292, "step": 1893 }, { "epoch": 0.155104464166897, "grad_norm": 0.1923828125, "learning_rate": 2.3256790784478653e-05, "loss": 0.8122414350509644, "step": 1894 }, { "epoch": 0.15518635670341593, "grad_norm": 0.1875, "learning_rate": 2.3255923785809147e-05, "loss": 1.1455626487731934, "step": 1895 }, { "epoch": 0.1552682492399349, "grad_norm": 0.1630859375, "learning_rate": 2.3255056297910825e-05, "loss": 0.6622958183288574, "step": 1896 }, { "epoch": 0.15535014177645384, "grad_norm": 0.19921875, "learning_rate": 2.3254188320821405e-05, "loss": 0.7829557061195374, "step": 1897 }, { "epoch": 0.1554320343129728, "grad_norm": 0.1904296875, "learning_rate": 2.325331985457861e-05, "loss": 0.9498754143714905, "step": 1898 }, { "epoch": 0.15551392684949175, "grad_norm": 0.2021484375, "learning_rate": 2.3252450899220183e-05, "loss": 1.2340171337127686, "step": 1899 }, { "epoch": 0.15559581938601072, "grad_norm": 0.146484375, "learning_rate": 2.3251581454783896e-05, "loss": 0.7974053621292114, "step": 1900 }, { "epoch": 0.15567771192252966, "grad_norm": 0.197265625, "learning_rate": 2.3250711521307533e-05, "loss": 1.127620816230774, "step": 1901 }, { "epoch": 0.1557596044590486, "grad_norm": 0.171875, "learning_rate": 2.3249841098828912e-05, "loss": 0.443057119846344, "step": 1902 }, { "epoch": 0.15584149699556757, "grad_norm": 0.1455078125, "learning_rate": 2.324897018738586e-05, "loss": 0.9318500757217407, "step": 1903 }, { "epoch": 0.1559233895320865, "grad_norm": 0.1513671875, "learning_rate": 2.3248098787016238e-05, "loss": 0.7802523374557495, "step": 1904 }, { "epoch": 0.15600528206860548, "grad_norm": 0.1416015625, "learning_rate": 2.324722689775791e-05, "loss": 0.7112146615982056, "step": 1905 }, { "epoch": 0.15608717460512442, "grad_norm": 0.1875, "learning_rate": 2.324635451964878e-05, "loss": 0.6115627884864807, "step": 1906 }, { "epoch": 0.1561690671416434, "grad_norm": 0.15234375, "learning_rate": 2.3245481652726758e-05, "loss": 0.9857809543609619, "step": 1907 }, { "epoch": 0.15625095967816233, "grad_norm": 0.1669921875, "learning_rate": 2.3244608297029792e-05, "loss": 0.6501709818840027, "step": 1908 }, { "epoch": 0.1563328522146813, "grad_norm": 0.1572265625, "learning_rate": 2.3243734452595832e-05, "loss": 0.507906973361969, "step": 1909 }, { "epoch": 0.15641474475120024, "grad_norm": 0.1689453125, "learning_rate": 2.3242860119462865e-05, "loss": 0.9455816149711609, "step": 1910 }, { "epoch": 0.1564966372877192, "grad_norm": 0.1787109375, "learning_rate": 2.324198529766889e-05, "loss": 0.889655590057373, "step": 1911 }, { "epoch": 0.15657852982423814, "grad_norm": 0.173828125, "learning_rate": 2.3241109987251935e-05, "loss": 0.8881437182426453, "step": 1912 }, { "epoch": 0.15666042236075708, "grad_norm": 0.1650390625, "learning_rate": 2.3240234188250038e-05, "loss": 0.5286319255828857, "step": 1913 }, { "epoch": 0.15674231489727605, "grad_norm": 0.1708984375, "learning_rate": 2.3239357900701272e-05, "loss": 0.9125378727912903, "step": 1914 }, { "epoch": 0.156824207433795, "grad_norm": 0.1650390625, "learning_rate": 2.3238481124643716e-05, "loss": 0.974899411201477, "step": 1915 }, { "epoch": 0.15690609997031396, "grad_norm": 0.1806640625, "learning_rate": 2.3237603860115484e-05, "loss": 0.9207587242126465, "step": 1916 }, { "epoch": 0.1569879925068329, "grad_norm": 0.1630859375, "learning_rate": 2.32367261071547e-05, "loss": 0.6555114984512329, "step": 1917 }, { "epoch": 0.15706988504335187, "grad_norm": 0.1845703125, "learning_rate": 2.3235847865799525e-05, "loss": 0.8852350115776062, "step": 1918 }, { "epoch": 0.1571517775798708, "grad_norm": 0.1669921875, "learning_rate": 2.323496913608812e-05, "loss": 0.6822836399078369, "step": 1919 }, { "epoch": 0.15723367011638978, "grad_norm": 0.1396484375, "learning_rate": 2.3234089918058688e-05, "loss": 0.6815710067749023, "step": 1920 }, { "epoch": 0.15731556265290872, "grad_norm": 0.1728515625, "learning_rate": 2.323321021174943e-05, "loss": 0.7130799293518066, "step": 1921 }, { "epoch": 0.15739745518942766, "grad_norm": 0.1630859375, "learning_rate": 2.3232330017198597e-05, "loss": 0.7802451848983765, "step": 1922 }, { "epoch": 0.15747934772594663, "grad_norm": 0.1669921875, "learning_rate": 2.3231449334444437e-05, "loss": 1.2767488956451416, "step": 1923 }, { "epoch": 0.15756124026246557, "grad_norm": 0.1669921875, "learning_rate": 2.3230568163525233e-05, "loss": 1.0317450761795044, "step": 1924 }, { "epoch": 0.15764313279898454, "grad_norm": 0.22265625, "learning_rate": 2.3229686504479275e-05, "loss": 1.0305235385894775, "step": 1925 }, { "epoch": 0.15772502533550348, "grad_norm": 0.18359375, "learning_rate": 2.3228804357344896e-05, "loss": 0.6864871382713318, "step": 1926 }, { "epoch": 0.15780691787202245, "grad_norm": 0.2060546875, "learning_rate": 2.322792172216043e-05, "loss": 0.9575198888778687, "step": 1927 }, { "epoch": 0.15788881040854139, "grad_norm": 0.171875, "learning_rate": 2.3227038598964238e-05, "loss": 1.0892506837844849, "step": 1928 }, { "epoch": 0.15797070294506035, "grad_norm": 0.1513671875, "learning_rate": 2.3226154987794707e-05, "loss": 1.0408154726028442, "step": 1929 }, { "epoch": 0.1580525954815793, "grad_norm": 0.1904296875, "learning_rate": 2.322527088869025e-05, "loss": 0.9348576068878174, "step": 1930 }, { "epoch": 0.15813448801809826, "grad_norm": 0.1962890625, "learning_rate": 2.3224386301689282e-05, "loss": 1.0929203033447266, "step": 1931 }, { "epoch": 0.1582163805546172, "grad_norm": 0.1787109375, "learning_rate": 2.3223501226830255e-05, "loss": 0.9947420358657837, "step": 1932 }, { "epoch": 0.15829827309113614, "grad_norm": 0.1982421875, "learning_rate": 2.322261566415164e-05, "loss": 0.7739285826683044, "step": 1933 }, { "epoch": 0.1583801656276551, "grad_norm": 0.1826171875, "learning_rate": 2.3221729613691926e-05, "loss": 0.5848917365074158, "step": 1934 }, { "epoch": 0.15846205816417405, "grad_norm": 0.1591796875, "learning_rate": 2.3220843075489623e-05, "loss": 0.6993266344070435, "step": 1935 }, { "epoch": 0.15854395070069302, "grad_norm": 0.181640625, "learning_rate": 2.3219956049583267e-05, "loss": 0.5696324706077576, "step": 1936 }, { "epoch": 0.15862584323721196, "grad_norm": 0.171875, "learning_rate": 2.3219068536011406e-05, "loss": 0.8747787475585938, "step": 1937 }, { "epoch": 0.15870773577373093, "grad_norm": 0.2041015625, "learning_rate": 2.321818053481262e-05, "loss": 0.93418288230896, "step": 1938 }, { "epoch": 0.15878962831024987, "grad_norm": 0.2021484375, "learning_rate": 2.3217292046025504e-05, "loss": 1.0705628395080566, "step": 1939 }, { "epoch": 0.15887152084676884, "grad_norm": 0.1640625, "learning_rate": 2.3216403069688675e-05, "loss": 1.0094441175460815, "step": 1940 }, { "epoch": 0.15895341338328778, "grad_norm": 0.2177734375, "learning_rate": 2.321551360584077e-05, "loss": 1.2365726232528687, "step": 1941 }, { "epoch": 0.15903530591980675, "grad_norm": 0.19140625, "learning_rate": 2.3214623654520454e-05, "loss": 0.779843270778656, "step": 1942 }, { "epoch": 0.1591171984563257, "grad_norm": 0.1943359375, "learning_rate": 2.32137332157664e-05, "loss": 0.9291587471961975, "step": 1943 }, { "epoch": 0.15919909099284463, "grad_norm": 0.1689453125, "learning_rate": 2.321284228961732e-05, "loss": 0.8547040820121765, "step": 1944 }, { "epoch": 0.1592809835293636, "grad_norm": 0.1767578125, "learning_rate": 2.3211950876111924e-05, "loss": 1.061223030090332, "step": 1945 }, { "epoch": 0.15936287606588254, "grad_norm": 0.2109375, "learning_rate": 2.321105897528897e-05, "loss": 0.9094790816307068, "step": 1946 }, { "epoch": 0.1594447686024015, "grad_norm": 0.1484375, "learning_rate": 2.3210166587187217e-05, "loss": 0.6813600063323975, "step": 1947 }, { "epoch": 0.15952666113892044, "grad_norm": 0.21484375, "learning_rate": 2.3209273711845453e-05, "loss": 0.809687077999115, "step": 1948 }, { "epoch": 0.1596085536754394, "grad_norm": 0.203125, "learning_rate": 2.3208380349302484e-05, "loss": 0.8648678064346313, "step": 1949 }, { "epoch": 0.15969044621195835, "grad_norm": 0.189453125, "learning_rate": 2.3207486499597142e-05, "loss": 1.147385835647583, "step": 1950 }, { "epoch": 0.15977233874847732, "grad_norm": 0.21484375, "learning_rate": 2.3206592162768278e-05, "loss": 0.9705239534378052, "step": 1951 }, { "epoch": 0.15985423128499626, "grad_norm": 0.166015625, "learning_rate": 2.320569733885476e-05, "loss": 1.0364453792572021, "step": 1952 }, { "epoch": 0.15993612382151523, "grad_norm": 0.1884765625, "learning_rate": 2.3204802027895486e-05, "loss": 0.8680760264396667, "step": 1953 }, { "epoch": 0.16001801635803417, "grad_norm": 0.15625, "learning_rate": 2.3203906229929363e-05, "loss": 0.6885971426963806, "step": 1954 }, { "epoch": 0.1600999088945531, "grad_norm": 0.181640625, "learning_rate": 2.320300994499533e-05, "loss": 0.9312105774879456, "step": 1955 }, { "epoch": 0.16018180143107208, "grad_norm": 0.1640625, "learning_rate": 2.3202113173132347e-05, "loss": 0.6682813167572021, "step": 1956 }, { "epoch": 0.16026369396759102, "grad_norm": 0.1630859375, "learning_rate": 2.3201215914379383e-05, "loss": 0.8751347661018372, "step": 1957 }, { "epoch": 0.16034558650411, "grad_norm": 0.1845703125, "learning_rate": 2.3200318168775443e-05, "loss": 1.0036970376968384, "step": 1958 }, { "epoch": 0.16042747904062893, "grad_norm": 0.1455078125, "learning_rate": 2.3199419936359543e-05, "loss": 0.7621043920516968, "step": 1959 }, { "epoch": 0.1605093715771479, "grad_norm": 0.19140625, "learning_rate": 2.319852121717073e-05, "loss": 0.9809600710868835, "step": 1960 }, { "epoch": 0.16059126411366684, "grad_norm": 0.154296875, "learning_rate": 2.3197622011248055e-05, "loss": 1.0563632249832153, "step": 1961 }, { "epoch": 0.1606731566501858, "grad_norm": 0.2041015625, "learning_rate": 2.3196722318630614e-05, "loss": 0.7657145857810974, "step": 1962 }, { "epoch": 0.16075504918670475, "grad_norm": 0.1552734375, "learning_rate": 2.3195822139357502e-05, "loss": 0.603901207447052, "step": 1963 }, { "epoch": 0.1608369417232237, "grad_norm": 0.142578125, "learning_rate": 2.3194921473467848e-05, "loss": 0.6035488843917847, "step": 1964 }, { "epoch": 0.16091883425974265, "grad_norm": 0.189453125, "learning_rate": 2.3194020321000798e-05, "loss": 0.8689208030700684, "step": 1965 }, { "epoch": 0.1610007267962616, "grad_norm": 0.181640625, "learning_rate": 2.3193118681995516e-05, "loss": 1.0088626146316528, "step": 1966 }, { "epoch": 0.16108261933278056, "grad_norm": 0.1650390625, "learning_rate": 2.3192216556491202e-05, "loss": 0.8131465315818787, "step": 1967 }, { "epoch": 0.1611645118692995, "grad_norm": 0.1611328125, "learning_rate": 2.3191313944527054e-05, "loss": 0.5557928681373596, "step": 1968 }, { "epoch": 0.16124640440581847, "grad_norm": 0.16015625, "learning_rate": 2.319041084614231e-05, "loss": 1.294734239578247, "step": 1969 }, { "epoch": 0.1613282969423374, "grad_norm": 0.1708984375, "learning_rate": 2.3189507261376222e-05, "loss": 0.7189170122146606, "step": 1970 }, { "epoch": 0.16141018947885638, "grad_norm": 0.1875, "learning_rate": 2.3188603190268064e-05, "loss": 1.066721796989441, "step": 1971 }, { "epoch": 0.16149208201537532, "grad_norm": 0.1865234375, "learning_rate": 2.3187698632857125e-05, "loss": 1.1146469116210938, "step": 1972 }, { "epoch": 0.1615739745518943, "grad_norm": 0.1796875, "learning_rate": 2.318679358918273e-05, "loss": 1.247765302658081, "step": 1973 }, { "epoch": 0.16165586708841323, "grad_norm": 0.1806640625, "learning_rate": 2.31858880592842e-05, "loss": 0.6710013747215271, "step": 1974 }, { "epoch": 0.16173775962493217, "grad_norm": 0.212890625, "learning_rate": 2.3184982043200913e-05, "loss": 0.9186426997184753, "step": 1975 }, { "epoch": 0.16181965216145114, "grad_norm": 0.1435546875, "learning_rate": 2.3184075540972234e-05, "loss": 0.8672297596931458, "step": 1976 }, { "epoch": 0.16190154469797008, "grad_norm": 0.1845703125, "learning_rate": 2.3183168552637574e-05, "loss": 0.8545089364051819, "step": 1977 }, { "epoch": 0.16198343723448905, "grad_norm": 0.15625, "learning_rate": 2.3182261078236344e-05, "loss": 0.7210851907730103, "step": 1978 }, { "epoch": 0.162065329771008, "grad_norm": 0.16796875, "learning_rate": 2.318135311780799e-05, "loss": 0.9063572287559509, "step": 1979 }, { "epoch": 0.16214722230752696, "grad_norm": 0.1796875, "learning_rate": 2.318044467139198e-05, "loss": 1.060003399848938, "step": 1980 }, { "epoch": 0.1622291148440459, "grad_norm": 0.171875, "learning_rate": 2.3179535739027796e-05, "loss": 0.8971522450447083, "step": 1981 }, { "epoch": 0.16231100738056486, "grad_norm": 0.1630859375, "learning_rate": 2.317862632075494e-05, "loss": 0.6887643337249756, "step": 1982 }, { "epoch": 0.1623928999170838, "grad_norm": 0.1708984375, "learning_rate": 2.317771641661295e-05, "loss": 1.041860580444336, "step": 1983 }, { "epoch": 0.16247479245360277, "grad_norm": 0.16796875, "learning_rate": 2.3176806026641356e-05, "loss": 0.9931774139404297, "step": 1984 }, { "epoch": 0.1625566849901217, "grad_norm": 0.177734375, "learning_rate": 2.3175895150879745e-05, "loss": 0.6979286670684814, "step": 1985 }, { "epoch": 0.16263857752664065, "grad_norm": 0.1826171875, "learning_rate": 2.31749837893677e-05, "loss": 0.9371703863143921, "step": 1986 }, { "epoch": 0.16272047006315962, "grad_norm": 0.1513671875, "learning_rate": 2.3174071942144833e-05, "loss": 0.6812039613723755, "step": 1987 }, { "epoch": 0.16280236259967856, "grad_norm": 0.1787109375, "learning_rate": 2.317315960925078e-05, "loss": 1.0110540390014648, "step": 1988 }, { "epoch": 0.16288425513619753, "grad_norm": 0.169921875, "learning_rate": 2.3172246790725186e-05, "loss": 0.8606452345848083, "step": 1989 }, { "epoch": 0.16296614767271647, "grad_norm": 0.1416015625, "learning_rate": 2.3171333486607733e-05, "loss": 0.5210959911346436, "step": 1990 }, { "epoch": 0.16304804020923544, "grad_norm": 0.1875, "learning_rate": 2.3170419696938116e-05, "loss": 0.7098227739334106, "step": 1991 }, { "epoch": 0.16312993274575438, "grad_norm": 0.1806640625, "learning_rate": 2.316950542175605e-05, "loss": 1.1202596426010132, "step": 1992 }, { "epoch": 0.16321182528227335, "grad_norm": 0.1572265625, "learning_rate": 2.3168590661101277e-05, "loss": 0.7595298290252686, "step": 1993 }, { "epoch": 0.1632937178187923, "grad_norm": 0.197265625, "learning_rate": 2.3167675415013554e-05, "loss": 0.8345801830291748, "step": 1994 }, { "epoch": 0.16337561035531123, "grad_norm": 0.140625, "learning_rate": 2.316675968353266e-05, "loss": 0.6969337463378906, "step": 1995 }, { "epoch": 0.1634575028918302, "grad_norm": 0.166015625, "learning_rate": 2.3165843466698395e-05, "loss": 0.8890387415885925, "step": 1996 }, { "epoch": 0.16353939542834914, "grad_norm": 0.1845703125, "learning_rate": 2.3164926764550587e-05, "loss": 0.8984854817390442, "step": 1997 }, { "epoch": 0.1636212879648681, "grad_norm": 0.2255859375, "learning_rate": 2.3164009577129075e-05, "loss": 1.393692135810852, "step": 1998 }, { "epoch": 0.16370318050138705, "grad_norm": 0.1904296875, "learning_rate": 2.3163091904473728e-05, "loss": 0.7264331579208374, "step": 1999 }, { "epoch": 0.16378507303790601, "grad_norm": 0.1630859375, "learning_rate": 2.316217374662443e-05, "loss": 1.0736693143844604, "step": 2000 }, { "epoch": 0.16386696557442496, "grad_norm": 0.146484375, "learning_rate": 2.3161255103621086e-05, "loss": 0.9192960262298584, "step": 2001 }, { "epoch": 0.16394885811094392, "grad_norm": 0.201171875, "learning_rate": 2.3160335975503622e-05, "loss": 0.9903260469436646, "step": 2002 }, { "epoch": 0.16403075064746286, "grad_norm": 0.1845703125, "learning_rate": 2.3159416362311998e-05, "loss": 0.739921510219574, "step": 2003 }, { "epoch": 0.16411264318398183, "grad_norm": 0.1533203125, "learning_rate": 2.315849626408617e-05, "loss": 1.1444851160049438, "step": 2004 }, { "epoch": 0.16419453572050077, "grad_norm": 0.2275390625, "learning_rate": 2.315757568086614e-05, "loss": 0.6804274320602417, "step": 2005 }, { "epoch": 0.1642764282570197, "grad_norm": 0.1806640625, "learning_rate": 2.3156654612691913e-05, "loss": 0.7933639287948608, "step": 2006 }, { "epoch": 0.16435832079353868, "grad_norm": 0.150390625, "learning_rate": 2.3155733059603526e-05, "loss": 0.7657653093338013, "step": 2007 }, { "epoch": 0.16444021333005762, "grad_norm": 0.1708984375, "learning_rate": 2.3154811021641035e-05, "loss": 0.8075305223464966, "step": 2008 }, { "epoch": 0.1645221058665766, "grad_norm": 0.201171875, "learning_rate": 2.3153888498844512e-05, "loss": 0.8470852375030518, "step": 2009 }, { "epoch": 0.16460399840309553, "grad_norm": 0.1455078125, "learning_rate": 2.315296549125406e-05, "loss": 0.584110677242279, "step": 2010 }, { "epoch": 0.1646858909396145, "grad_norm": 0.1806640625, "learning_rate": 2.3152041998909787e-05, "loss": 1.0222033262252808, "step": 2011 }, { "epoch": 0.16476778347613344, "grad_norm": 0.185546875, "learning_rate": 2.315111802185184e-05, "loss": 0.8398823142051697, "step": 2012 }, { "epoch": 0.1648496760126524, "grad_norm": 0.173828125, "learning_rate": 2.315019356012037e-05, "loss": 0.7560561895370483, "step": 2013 }, { "epoch": 0.16493156854917135, "grad_norm": 0.1943359375, "learning_rate": 2.314926861375557e-05, "loss": 0.7072266340255737, "step": 2014 }, { "epoch": 0.16501346108569032, "grad_norm": 0.169921875, "learning_rate": 2.3148343182797636e-05, "loss": 0.8676473498344421, "step": 2015 }, { "epoch": 0.16509535362220926, "grad_norm": 0.150390625, "learning_rate": 2.3147417267286784e-05, "loss": 0.8015260696411133, "step": 2016 }, { "epoch": 0.1651772461587282, "grad_norm": 0.1728515625, "learning_rate": 2.314649086726327e-05, "loss": 1.1094400882720947, "step": 2017 }, { "epoch": 0.16525913869524717, "grad_norm": 0.220703125, "learning_rate": 2.314556398276735e-05, "loss": 1.2217539548873901, "step": 2018 }, { "epoch": 0.1653410312317661, "grad_norm": 0.138671875, "learning_rate": 2.314463661383932e-05, "loss": 0.8388075828552246, "step": 2019 }, { "epoch": 0.16542292376828507, "grad_norm": 0.15625, "learning_rate": 2.3143708760519474e-05, "loss": 1.0679703950881958, "step": 2020 }, { "epoch": 0.16550481630480401, "grad_norm": 0.1923828125, "learning_rate": 2.314278042284815e-05, "loss": 0.9242013692855835, "step": 2021 }, { "epoch": 0.16558670884132298, "grad_norm": 0.197265625, "learning_rate": 2.3141851600865697e-05, "loss": 0.9729780554771423, "step": 2022 }, { "epoch": 0.16566860137784192, "grad_norm": 0.1826171875, "learning_rate": 2.3140922294612483e-05, "loss": 0.9754358530044556, "step": 2023 }, { "epoch": 0.1657504939143609, "grad_norm": 0.1767578125, "learning_rate": 2.31399925041289e-05, "loss": 0.9507209658622742, "step": 2024 }, { "epoch": 0.16583238645087983, "grad_norm": 0.1982421875, "learning_rate": 2.3139062229455363e-05, "loss": 0.9255710244178772, "step": 2025 }, { "epoch": 0.16591427898739877, "grad_norm": 0.142578125, "learning_rate": 2.31381314706323e-05, "loss": 0.7302979230880737, "step": 2026 }, { "epoch": 0.16599617152391774, "grad_norm": 0.177734375, "learning_rate": 2.3137200227700172e-05, "loss": 0.9877808094024658, "step": 2027 }, { "epoch": 0.16607806406043668, "grad_norm": 0.2041015625, "learning_rate": 2.3136268500699444e-05, "loss": 0.9927208423614502, "step": 2028 }, { "epoch": 0.16615995659695565, "grad_norm": 0.1494140625, "learning_rate": 2.3135336289670625e-05, "loss": 0.6989736557006836, "step": 2029 }, { "epoch": 0.1662418491334746, "grad_norm": 0.171875, "learning_rate": 2.3134403594654227e-05, "loss": 1.0436620712280273, "step": 2030 }, { "epoch": 0.16632374166999356, "grad_norm": 0.166015625, "learning_rate": 2.3133470415690793e-05, "loss": 0.9086163640022278, "step": 2031 }, { "epoch": 0.1664056342065125, "grad_norm": 0.203125, "learning_rate": 2.3132536752820875e-05, "loss": 0.8396445512771606, "step": 2032 }, { "epoch": 0.16648752674303147, "grad_norm": 0.150390625, "learning_rate": 2.313160260608506e-05, "loss": 0.8068231344223022, "step": 2033 }, { "epoch": 0.1665694192795504, "grad_norm": 0.1806640625, "learning_rate": 2.3130667975523947e-05, "loss": 0.8173387050628662, "step": 2034 }, { "epoch": 0.16665131181606938, "grad_norm": 0.173828125, "learning_rate": 2.312973286117816e-05, "loss": 0.9024599194526672, "step": 2035 }, { "epoch": 0.16673320435258832, "grad_norm": 0.1591796875, "learning_rate": 2.3128797263088345e-05, "loss": 0.817664623260498, "step": 2036 }, { "epoch": 0.16681509688910726, "grad_norm": 0.1689453125, "learning_rate": 2.3127861181295164e-05, "loss": 0.694021463394165, "step": 2037 }, { "epoch": 0.16689698942562622, "grad_norm": 0.16015625, "learning_rate": 2.3126924615839302e-05, "loss": 0.749485433101654, "step": 2038 }, { "epoch": 0.16697888196214516, "grad_norm": 0.166015625, "learning_rate": 2.3125987566761468e-05, "loss": 0.7049168348312378, "step": 2039 }, { "epoch": 0.16706077449866413, "grad_norm": 0.1572265625, "learning_rate": 2.312505003410239e-05, "loss": 0.8267319202423096, "step": 2040 }, { "epoch": 0.16714266703518307, "grad_norm": 0.193359375, "learning_rate": 2.3124112017902814e-05, "loss": 0.8198041915893555, "step": 2041 }, { "epoch": 0.16722455957170204, "grad_norm": 0.171875, "learning_rate": 2.312317351820352e-05, "loss": 0.8141278028488159, "step": 2042 }, { "epoch": 0.16730645210822098, "grad_norm": 0.150390625, "learning_rate": 2.3122234535045288e-05, "loss": 0.8274849057197571, "step": 2043 }, { "epoch": 0.16738834464473995, "grad_norm": 0.162109375, "learning_rate": 2.3121295068468934e-05, "loss": 0.9098456501960754, "step": 2044 }, { "epoch": 0.1674702371812589, "grad_norm": 0.1748046875, "learning_rate": 2.3120355118515288e-05, "loss": 0.9314104318618774, "step": 2045 }, { "epoch": 0.16755212971777786, "grad_norm": 0.18359375, "learning_rate": 2.311941468522521e-05, "loss": 1.144854187965393, "step": 2046 }, { "epoch": 0.1676340222542968, "grad_norm": 0.169921875, "learning_rate": 2.3118473768639575e-05, "loss": 1.1058868169784546, "step": 2047 }, { "epoch": 0.16771591479081574, "grad_norm": 0.2080078125, "learning_rate": 2.3117532368799275e-05, "loss": 1.0394481420516968, "step": 2048 }, { "epoch": 0.1677978073273347, "grad_norm": 0.1357421875, "learning_rate": 2.3116590485745224e-05, "loss": 0.7576707005500793, "step": 2049 }, { "epoch": 0.16787969986385365, "grad_norm": 0.1875, "learning_rate": 2.311564811951837e-05, "loss": 0.9654994606971741, "step": 2050 }, { "epoch": 0.16796159240037262, "grad_norm": 0.212890625, "learning_rate": 2.3114705270159666e-05, "loss": 1.1635290384292603, "step": 2051 }, { "epoch": 0.16804348493689156, "grad_norm": 0.1767578125, "learning_rate": 2.3113761937710093e-05, "loss": 0.7197125554084778, "step": 2052 }, { "epoch": 0.16812537747341053, "grad_norm": 0.1669921875, "learning_rate": 2.3112818122210648e-05, "loss": 0.8843101859092712, "step": 2053 }, { "epoch": 0.16820727000992947, "grad_norm": 0.14453125, "learning_rate": 2.311187382370236e-05, "loss": 0.8230028748512268, "step": 2054 }, { "epoch": 0.16828916254644843, "grad_norm": 0.2099609375, "learning_rate": 2.3110929042226268e-05, "loss": 1.0551960468292236, "step": 2055 }, { "epoch": 0.16837105508296737, "grad_norm": 0.1884765625, "learning_rate": 2.3109983777823438e-05, "loss": 0.9145382642745972, "step": 2056 }, { "epoch": 0.16845294761948634, "grad_norm": 0.1650390625, "learning_rate": 2.3109038030534954e-05, "loss": 0.8974514007568359, "step": 2057 }, { "epoch": 0.16853484015600528, "grad_norm": 0.1435546875, "learning_rate": 2.3108091800401922e-05, "loss": 0.6485768556594849, "step": 2058 }, { "epoch": 0.16861673269252422, "grad_norm": 0.1474609375, "learning_rate": 2.3107145087465473e-05, "loss": 0.7728451490402222, "step": 2059 }, { "epoch": 0.1686986252290432, "grad_norm": 0.1591796875, "learning_rate": 2.3106197891766747e-05, "loss": 0.5592920780181885, "step": 2060 }, { "epoch": 0.16878051776556213, "grad_norm": 0.173828125, "learning_rate": 2.3105250213346922e-05, "loss": 0.9258127212524414, "step": 2061 }, { "epoch": 0.1688624103020811, "grad_norm": 0.16015625, "learning_rate": 2.310430205224718e-05, "loss": 0.948998749256134, "step": 2062 }, { "epoch": 0.16894430283860004, "grad_norm": 0.1650390625, "learning_rate": 2.3103353408508738e-05, "loss": 0.9782514572143555, "step": 2063 }, { "epoch": 0.169026195375119, "grad_norm": 0.17578125, "learning_rate": 2.3102404282172823e-05, "loss": 0.9074375629425049, "step": 2064 }, { "epoch": 0.16910808791163795, "grad_norm": 0.203125, "learning_rate": 2.310145467328069e-05, "loss": 0.8254364132881165, "step": 2065 }, { "epoch": 0.16918998044815692, "grad_norm": 0.1708984375, "learning_rate": 2.3100504581873614e-05, "loss": 0.8852524757385254, "step": 2066 }, { "epoch": 0.16927187298467586, "grad_norm": 0.173828125, "learning_rate": 2.3099554007992893e-05, "loss": 0.6029716730117798, "step": 2067 }, { "epoch": 0.1693537655211948, "grad_norm": 0.1904296875, "learning_rate": 2.3098602951679834e-05, "loss": 0.631441593170166, "step": 2068 }, { "epoch": 0.16943565805771377, "grad_norm": 0.171875, "learning_rate": 2.309765141297578e-05, "loss": 0.7891640067100525, "step": 2069 }, { "epoch": 0.1695175505942327, "grad_norm": 0.1865234375, "learning_rate": 2.3096699391922086e-05, "loss": 0.8181204795837402, "step": 2070 }, { "epoch": 0.16959944313075168, "grad_norm": 0.16796875, "learning_rate": 2.309574688856014e-05, "loss": 0.7776691913604736, "step": 2071 }, { "epoch": 0.16968133566727062, "grad_norm": 0.15625, "learning_rate": 2.3094793902931327e-05, "loss": 1.044589877128601, "step": 2072 }, { "epoch": 0.16976322820378958, "grad_norm": 0.16796875, "learning_rate": 2.309384043507708e-05, "loss": 0.8124565482139587, "step": 2073 }, { "epoch": 0.16984512074030853, "grad_norm": 0.1796875, "learning_rate": 2.309288648503883e-05, "loss": 0.5685734748840332, "step": 2074 }, { "epoch": 0.1699270132768275, "grad_norm": 0.1767578125, "learning_rate": 2.309193205285805e-05, "loss": 0.5505271553993225, "step": 2075 }, { "epoch": 0.17000890581334643, "grad_norm": 0.1728515625, "learning_rate": 2.3090977138576213e-05, "loss": 0.7584421634674072, "step": 2076 }, { "epoch": 0.1700907983498654, "grad_norm": 0.138671875, "learning_rate": 2.3090021742234838e-05, "loss": 0.8737994432449341, "step": 2077 }, { "epoch": 0.17017269088638434, "grad_norm": 0.2138671875, "learning_rate": 2.3089065863875435e-05, "loss": 0.8054501414299011, "step": 2078 }, { "epoch": 0.17025458342290328, "grad_norm": 0.169921875, "learning_rate": 2.3088109503539557e-05, "loss": 0.7641791105270386, "step": 2079 }, { "epoch": 0.17033647595942225, "grad_norm": 0.193359375, "learning_rate": 2.3087152661268773e-05, "loss": 0.9865874648094177, "step": 2080 }, { "epoch": 0.1704183684959412, "grad_norm": 0.171875, "learning_rate": 2.3086195337104672e-05, "loss": 0.6361075043678284, "step": 2081 }, { "epoch": 0.17050026103246016, "grad_norm": 0.19140625, "learning_rate": 2.3085237531088857e-05, "loss": 0.8034785985946655, "step": 2082 }, { "epoch": 0.1705821535689791, "grad_norm": 0.1640625, "learning_rate": 2.3084279243262964e-05, "loss": 1.0324164628982544, "step": 2083 }, { "epoch": 0.17066404610549807, "grad_norm": 0.1904296875, "learning_rate": 2.308332047366864e-05, "loss": 0.8297885060310364, "step": 2084 }, { "epoch": 0.170745938642017, "grad_norm": 0.1796875, "learning_rate": 2.3082361222347563e-05, "loss": 0.7850766181945801, "step": 2085 }, { "epoch": 0.17082783117853598, "grad_norm": 0.1728515625, "learning_rate": 2.308140148934142e-05, "loss": 0.7645384669303894, "step": 2086 }, { "epoch": 0.17090972371505492, "grad_norm": 0.1845703125, "learning_rate": 2.308044127469193e-05, "loss": 1.186807632446289, "step": 2087 }, { "epoch": 0.17099161625157389, "grad_norm": 0.15234375, "learning_rate": 2.3079480578440823e-05, "loss": 0.5767953395843506, "step": 2088 }, { "epoch": 0.17107350878809283, "grad_norm": 0.1611328125, "learning_rate": 2.3078519400629858e-05, "loss": 0.839798092842102, "step": 2089 }, { "epoch": 0.17115540132461177, "grad_norm": 0.181640625, "learning_rate": 2.307755774130081e-05, "loss": 0.8485851287841797, "step": 2090 }, { "epoch": 0.17123729386113073, "grad_norm": 0.1943359375, "learning_rate": 2.3076595600495475e-05, "loss": 0.7233185172080994, "step": 2091 }, { "epoch": 0.17131918639764968, "grad_norm": 0.21484375, "learning_rate": 2.307563297825568e-05, "loss": 1.4081238508224487, "step": 2092 }, { "epoch": 0.17140107893416864, "grad_norm": 0.24609375, "learning_rate": 2.3074669874623253e-05, "loss": 0.9178886413574219, "step": 2093 }, { "epoch": 0.17148297147068758, "grad_norm": 0.1279296875, "learning_rate": 2.3073706289640062e-05, "loss": 0.9092115163803101, "step": 2094 }, { "epoch": 0.17156486400720655, "grad_norm": 0.1552734375, "learning_rate": 2.3072742223347987e-05, "loss": 1.0433051586151123, "step": 2095 }, { "epoch": 0.1716467565437255, "grad_norm": 0.173828125, "learning_rate": 2.307177767578893e-05, "loss": 0.6784330606460571, "step": 2096 }, { "epoch": 0.17172864908024446, "grad_norm": 0.1982421875, "learning_rate": 2.3070812647004815e-05, "loss": 1.0354896783828735, "step": 2097 }, { "epoch": 0.1718105416167634, "grad_norm": 0.1875, "learning_rate": 2.3069847137037584e-05, "loss": 1.0092010498046875, "step": 2098 }, { "epoch": 0.17189243415328234, "grad_norm": 0.173828125, "learning_rate": 2.3068881145929198e-05, "loss": 1.04713773727417, "step": 2099 }, { "epoch": 0.1719743266898013, "grad_norm": 0.18359375, "learning_rate": 2.3067914673721656e-05, "loss": 0.7055670619010925, "step": 2100 }, { "epoch": 0.17205621922632025, "grad_norm": 0.1689453125, "learning_rate": 2.306694772045695e-05, "loss": 1.066530466079712, "step": 2101 }, { "epoch": 0.17213811176283922, "grad_norm": 0.17578125, "learning_rate": 2.306598028617712e-05, "loss": 1.0950257778167725, "step": 2102 }, { "epoch": 0.17222000429935816, "grad_norm": 0.14453125, "learning_rate": 2.3065012370924204e-05, "loss": 0.7926771640777588, "step": 2103 }, { "epoch": 0.17230189683587713, "grad_norm": 0.14453125, "learning_rate": 2.3064043974740283e-05, "loss": 0.814128041267395, "step": 2104 }, { "epoch": 0.17238378937239607, "grad_norm": 0.169921875, "learning_rate": 2.3063075097667437e-05, "loss": 0.864211916923523, "step": 2105 }, { "epoch": 0.17246568190891504, "grad_norm": 0.16015625, "learning_rate": 2.3062105739747786e-05, "loss": 0.8817232251167297, "step": 2106 }, { "epoch": 0.17254757444543398, "grad_norm": 0.14453125, "learning_rate": 2.306113590102345e-05, "loss": 0.5957571268081665, "step": 2107 }, { "epoch": 0.17262946698195294, "grad_norm": 0.171875, "learning_rate": 2.30601655815366e-05, "loss": 0.9646713137626648, "step": 2108 }, { "epoch": 0.17271135951847189, "grad_norm": 0.169921875, "learning_rate": 2.3059194781329396e-05, "loss": 0.5499109029769897, "step": 2109 }, { "epoch": 0.17279325205499083, "grad_norm": 0.1875, "learning_rate": 2.3058223500444038e-05, "loss": 1.030017614364624, "step": 2110 }, { "epoch": 0.1728751445915098, "grad_norm": 0.17578125, "learning_rate": 2.305725173892274e-05, "loss": 0.7581807971000671, "step": 2111 }, { "epoch": 0.17295703712802873, "grad_norm": 0.162109375, "learning_rate": 2.3056279496807738e-05, "loss": 0.8523330688476562, "step": 2112 }, { "epoch": 0.1730389296645477, "grad_norm": 0.158203125, "learning_rate": 2.3055306774141296e-05, "loss": 0.8295431137084961, "step": 2113 }, { "epoch": 0.17312082220106664, "grad_norm": 0.158203125, "learning_rate": 2.3054333570965686e-05, "loss": 0.5596280694007874, "step": 2114 }, { "epoch": 0.1732027147375856, "grad_norm": 0.1455078125, "learning_rate": 2.305335988732321e-05, "loss": 0.8161397576332092, "step": 2115 }, { "epoch": 0.17328460727410455, "grad_norm": 0.1962890625, "learning_rate": 2.3052385723256187e-05, "loss": 0.608106791973114, "step": 2116 }, { "epoch": 0.17336649981062352, "grad_norm": 0.16015625, "learning_rate": 2.305141107880696e-05, "loss": 0.7428133487701416, "step": 2117 }, { "epoch": 0.17344839234714246, "grad_norm": 0.169921875, "learning_rate": 2.305043595401789e-05, "loss": 0.6883682608604431, "step": 2118 }, { "epoch": 0.17353028488366143, "grad_norm": 0.1708984375, "learning_rate": 2.3049460348931357e-05, "loss": 1.2211458683013916, "step": 2119 }, { "epoch": 0.17361217742018037, "grad_norm": 0.1728515625, "learning_rate": 2.3048484263589773e-05, "loss": 0.8047317862510681, "step": 2120 }, { "epoch": 0.1736940699566993, "grad_norm": 0.208984375, "learning_rate": 2.3047507698035557e-05, "loss": 1.1279809474945068, "step": 2121 }, { "epoch": 0.17377596249321828, "grad_norm": 0.1650390625, "learning_rate": 2.3046530652311154e-05, "loss": 0.9892963767051697, "step": 2122 }, { "epoch": 0.17385785502973722, "grad_norm": 0.169921875, "learning_rate": 2.304555312645903e-05, "loss": 1.1074419021606445, "step": 2123 }, { "epoch": 0.1739397475662562, "grad_norm": 0.201171875, "learning_rate": 2.3044575120521677e-05, "loss": 1.0455310344696045, "step": 2124 }, { "epoch": 0.17402164010277513, "grad_norm": 0.1630859375, "learning_rate": 2.3043596634541593e-05, "loss": 0.7842694520950317, "step": 2125 }, { "epoch": 0.1741035326392941, "grad_norm": 0.1845703125, "learning_rate": 2.304261766856132e-05, "loss": 0.927800714969635, "step": 2126 }, { "epoch": 0.17418542517581304, "grad_norm": 0.1708984375, "learning_rate": 2.3041638222623403e-05, "loss": 0.8272109031677246, "step": 2127 }, { "epoch": 0.174267317712332, "grad_norm": 0.1357421875, "learning_rate": 2.3040658296770407e-05, "loss": 0.5830202698707581, "step": 2128 }, { "epoch": 0.17434921024885094, "grad_norm": 0.1748046875, "learning_rate": 2.303967789104493e-05, "loss": 1.0474522113800049, "step": 2129 }, { "epoch": 0.17443110278536988, "grad_norm": 0.1572265625, "learning_rate": 2.3038697005489585e-05, "loss": 1.3157117366790771, "step": 2130 }, { "epoch": 0.17451299532188885, "grad_norm": 0.1494140625, "learning_rate": 2.3037715640147e-05, "loss": 0.7627755403518677, "step": 2131 }, { "epoch": 0.1745948878584078, "grad_norm": 0.189453125, "learning_rate": 2.3036733795059835e-05, "loss": 0.7738596796989441, "step": 2132 }, { "epoch": 0.17467678039492676, "grad_norm": 0.20703125, "learning_rate": 2.303575147027076e-05, "loss": 0.9290691614151001, "step": 2133 }, { "epoch": 0.1747586729314457, "grad_norm": 0.1435546875, "learning_rate": 2.3034768665822473e-05, "loss": 0.4888533651828766, "step": 2134 }, { "epoch": 0.17484056546796467, "grad_norm": 0.19140625, "learning_rate": 2.3033785381757694e-05, "loss": 1.016878604888916, "step": 2135 }, { "epoch": 0.1749224580044836, "grad_norm": 0.1591796875, "learning_rate": 2.3032801618119153e-05, "loss": 0.8429866433143616, "step": 2136 }, { "epoch": 0.17500435054100258, "grad_norm": 0.216796875, "learning_rate": 2.3031817374949615e-05, "loss": 0.8701666593551636, "step": 2137 }, { "epoch": 0.17508624307752152, "grad_norm": 0.1826171875, "learning_rate": 2.303083265229186e-05, "loss": 1.0822858810424805, "step": 2138 }, { "epoch": 0.1751681356140405, "grad_norm": 0.166015625, "learning_rate": 2.302984745018868e-05, "loss": 0.6886203289031982, "step": 2139 }, { "epoch": 0.17525002815055943, "grad_norm": 0.1875, "learning_rate": 2.3028861768682907e-05, "loss": 0.6356196403503418, "step": 2140 }, { "epoch": 0.17533192068707837, "grad_norm": 0.162109375, "learning_rate": 2.3027875607817374e-05, "loss": 0.5120158195495605, "step": 2141 }, { "epoch": 0.17541381322359734, "grad_norm": 0.1572265625, "learning_rate": 2.3026888967634947e-05, "loss": 0.7832841873168945, "step": 2142 }, { "epoch": 0.17549570576011628, "grad_norm": 0.1787109375, "learning_rate": 2.302590184817851e-05, "loss": 0.9659411311149597, "step": 2143 }, { "epoch": 0.17557759829663525, "grad_norm": 0.1455078125, "learning_rate": 2.302491424949096e-05, "loss": 0.6503680944442749, "step": 2144 }, { "epoch": 0.17565949083315419, "grad_norm": 0.16796875, "learning_rate": 2.3023926171615237e-05, "loss": 0.8655272722244263, "step": 2145 }, { "epoch": 0.17574138336967315, "grad_norm": 0.16796875, "learning_rate": 2.3022937614594276e-05, "loss": 0.7896318435668945, "step": 2146 }, { "epoch": 0.1758232759061921, "grad_norm": 0.1865234375, "learning_rate": 2.3021948578471043e-05, "loss": 0.9102089405059814, "step": 2147 }, { "epoch": 0.17590516844271106, "grad_norm": 0.1796875, "learning_rate": 2.3020959063288533e-05, "loss": 1.209578514099121, "step": 2148 }, { "epoch": 0.17598706097923, "grad_norm": 0.1826171875, "learning_rate": 2.301996906908975e-05, "loss": 0.948302686214447, "step": 2149 }, { "epoch": 0.17606895351574897, "grad_norm": 0.16015625, "learning_rate": 2.301897859591772e-05, "loss": 0.7943192720413208, "step": 2150 }, { "epoch": 0.1761508460522679, "grad_norm": 0.201171875, "learning_rate": 2.3017987643815496e-05, "loss": 0.7528172731399536, "step": 2151 }, { "epoch": 0.17623273858878685, "grad_norm": 0.1787109375, "learning_rate": 2.3016996212826148e-05, "loss": 0.8487266302108765, "step": 2152 }, { "epoch": 0.17631463112530582, "grad_norm": 0.177734375, "learning_rate": 2.3016004302992774e-05, "loss": 0.9192425608634949, "step": 2153 }, { "epoch": 0.17639652366182476, "grad_norm": 0.1640625, "learning_rate": 2.301501191435848e-05, "loss": 0.6583120226860046, "step": 2154 }, { "epoch": 0.17647841619834373, "grad_norm": 0.1826171875, "learning_rate": 2.30140190469664e-05, "loss": 0.8925849199295044, "step": 2155 }, { "epoch": 0.17656030873486267, "grad_norm": 0.171875, "learning_rate": 2.301302570085969e-05, "loss": 0.9125053286552429, "step": 2156 }, { "epoch": 0.17664220127138164, "grad_norm": 0.2080078125, "learning_rate": 2.3012031876081523e-05, "loss": 0.9876391291618347, "step": 2157 }, { "epoch": 0.17672409380790058, "grad_norm": 0.1923828125, "learning_rate": 2.301103757267509e-05, "loss": 1.21542227268219, "step": 2158 }, { "epoch": 0.17680598634441955, "grad_norm": 0.169921875, "learning_rate": 2.301004279068362e-05, "loss": 0.8203423619270325, "step": 2159 }, { "epoch": 0.1768878788809385, "grad_norm": 0.1875, "learning_rate": 2.300904753015034e-05, "loss": 0.9300917387008667, "step": 2160 }, { "epoch": 0.17696977141745743, "grad_norm": 0.189453125, "learning_rate": 2.300805179111851e-05, "loss": 1.0471628904342651, "step": 2161 }, { "epoch": 0.1770516639539764, "grad_norm": 0.1513671875, "learning_rate": 2.3007055573631416e-05, "loss": 0.694509744644165, "step": 2162 }, { "epoch": 0.17713355649049534, "grad_norm": 0.19140625, "learning_rate": 2.300605887773235e-05, "loss": 0.8566156625747681, "step": 2163 }, { "epoch": 0.1772154490270143, "grad_norm": 0.1708984375, "learning_rate": 2.300506170346463e-05, "loss": 1.1923434734344482, "step": 2164 }, { "epoch": 0.17729734156353325, "grad_norm": 0.15625, "learning_rate": 2.3004064050871607e-05, "loss": 0.7378466725349426, "step": 2165 }, { "epoch": 0.1773792341000522, "grad_norm": 0.1884765625, "learning_rate": 2.3003065919996637e-05, "loss": 0.9856939911842346, "step": 2166 }, { "epoch": 0.17746112663657115, "grad_norm": 0.1689453125, "learning_rate": 2.3002067310883102e-05, "loss": 0.8166617751121521, "step": 2167 }, { "epoch": 0.17754301917309012, "grad_norm": 0.197265625, "learning_rate": 2.3001068223574406e-05, "loss": 1.0910999774932861, "step": 2168 }, { "epoch": 0.17762491170960906, "grad_norm": 0.171875, "learning_rate": 2.300006865811398e-05, "loss": 0.8753263354301453, "step": 2169 }, { "epoch": 0.17770680424612803, "grad_norm": 0.1708984375, "learning_rate": 2.299906861454526e-05, "loss": 0.8544874787330627, "step": 2170 }, { "epoch": 0.17778869678264697, "grad_norm": 0.1640625, "learning_rate": 2.2998068092911717e-05, "loss": 0.8821544647216797, "step": 2171 }, { "epoch": 0.1778705893191659, "grad_norm": 0.19140625, "learning_rate": 2.299706709325684e-05, "loss": 0.6823170185089111, "step": 2172 }, { "epoch": 0.17795248185568488, "grad_norm": 0.17578125, "learning_rate": 2.2996065615624128e-05, "loss": 0.9480848908424377, "step": 2173 }, { "epoch": 0.17803437439220382, "grad_norm": 0.1884765625, "learning_rate": 2.299506366005712e-05, "loss": 0.7389025688171387, "step": 2174 }, { "epoch": 0.1781162669287228, "grad_norm": 0.119140625, "learning_rate": 2.299406122659936e-05, "loss": 0.6551054120063782, "step": 2175 }, { "epoch": 0.17819815946524173, "grad_norm": 0.1806640625, "learning_rate": 2.299305831529441e-05, "loss": 0.9082637429237366, "step": 2176 }, { "epoch": 0.1782800520017607, "grad_norm": 0.166015625, "learning_rate": 2.2992054926185876e-05, "loss": 0.8177830576896667, "step": 2177 }, { "epoch": 0.17836194453827964, "grad_norm": 0.173828125, "learning_rate": 2.299105105931736e-05, "loss": 1.126342535018921, "step": 2178 }, { "epoch": 0.1784438370747986, "grad_norm": 0.1982421875, "learning_rate": 2.2990046714732497e-05, "loss": 0.787146806716919, "step": 2179 }, { "epoch": 0.17852572961131755, "grad_norm": 0.173828125, "learning_rate": 2.2989041892474942e-05, "loss": 0.5844379663467407, "step": 2180 }, { "epoch": 0.17860762214783651, "grad_norm": 0.181640625, "learning_rate": 2.2988036592588362e-05, "loss": 0.7549855709075928, "step": 2181 }, { "epoch": 0.17868951468435545, "grad_norm": 0.146484375, "learning_rate": 2.2987030815116455e-05, "loss": 0.8013399243354797, "step": 2182 }, { "epoch": 0.1787714072208744, "grad_norm": 0.16796875, "learning_rate": 2.2986024560102937e-05, "loss": 0.8951451778411865, "step": 2183 }, { "epoch": 0.17885329975739336, "grad_norm": 0.16015625, "learning_rate": 2.2985017827591543e-05, "loss": 0.7120135426521301, "step": 2184 }, { "epoch": 0.1789351922939123, "grad_norm": 0.1650390625, "learning_rate": 2.2984010617626033e-05, "loss": 0.6407305002212524, "step": 2185 }, { "epoch": 0.17901708483043127, "grad_norm": 0.1708984375, "learning_rate": 2.298300293025018e-05, "loss": 0.7624015212059021, "step": 2186 }, { "epoch": 0.1790989773669502, "grad_norm": 0.1728515625, "learning_rate": 2.2981994765507783e-05, "loss": 0.6639080047607422, "step": 2187 }, { "epoch": 0.17918086990346918, "grad_norm": 0.1826171875, "learning_rate": 2.2980986123442662e-05, "loss": 0.7758611440658569, "step": 2188 }, { "epoch": 0.17926276243998812, "grad_norm": 0.19921875, "learning_rate": 2.2979977004098658e-05, "loss": 0.8872136473655701, "step": 2189 }, { "epoch": 0.1793446549765071, "grad_norm": 0.1728515625, "learning_rate": 2.2978967407519627e-05, "loss": 0.7779150009155273, "step": 2190 }, { "epoch": 0.17942654751302603, "grad_norm": 0.2294921875, "learning_rate": 2.2977957333749455e-05, "loss": 1.2085351943969727, "step": 2191 }, { "epoch": 0.179508440049545, "grad_norm": 0.173828125, "learning_rate": 2.2976946782832044e-05, "loss": 0.6555917859077454, "step": 2192 }, { "epoch": 0.17959033258606394, "grad_norm": 0.1416015625, "learning_rate": 2.2975935754811317e-05, "loss": 0.42303186655044556, "step": 2193 }, { "epoch": 0.17967222512258288, "grad_norm": 0.166015625, "learning_rate": 2.2974924249731213e-05, "loss": 0.8524506688117981, "step": 2194 }, { "epoch": 0.17975411765910185, "grad_norm": 0.1982421875, "learning_rate": 2.29739122676357e-05, "loss": 0.8667759895324707, "step": 2195 }, { "epoch": 0.1798360101956208, "grad_norm": 0.2158203125, "learning_rate": 2.297289980856876e-05, "loss": 0.5838576555252075, "step": 2196 }, { "epoch": 0.17991790273213976, "grad_norm": 0.169921875, "learning_rate": 2.29718868725744e-05, "loss": 0.7316719889640808, "step": 2197 }, { "epoch": 0.1799997952686587, "grad_norm": 0.1435546875, "learning_rate": 2.2970873459696646e-05, "loss": 0.9028429388999939, "step": 2198 }, { "epoch": 0.18008168780517766, "grad_norm": 0.220703125, "learning_rate": 2.2969859569979546e-05, "loss": 0.9240031242370605, "step": 2199 }, { "epoch": 0.1801635803416966, "grad_norm": 0.1689453125, "learning_rate": 2.296884520346717e-05, "loss": 0.8532719612121582, "step": 2200 }, { "epoch": 0.18024547287821557, "grad_norm": 0.2138671875, "learning_rate": 2.29678303602036e-05, "loss": 0.8681788444519043, "step": 2201 }, { "epoch": 0.18032736541473451, "grad_norm": 0.1484375, "learning_rate": 2.2966815040232953e-05, "loss": 0.6748262047767639, "step": 2202 }, { "epoch": 0.18040925795125345, "grad_norm": 0.173828125, "learning_rate": 2.2965799243599354e-05, "loss": 0.7944550514221191, "step": 2203 }, { "epoch": 0.18049115048777242, "grad_norm": 0.169921875, "learning_rate": 2.2964782970346956e-05, "loss": 0.8010057210922241, "step": 2204 }, { "epoch": 0.18057304302429136, "grad_norm": 0.1474609375, "learning_rate": 2.296376622051993e-05, "loss": 0.5805017352104187, "step": 2205 }, { "epoch": 0.18065493556081033, "grad_norm": 0.1787109375, "learning_rate": 2.296274899416247e-05, "loss": 0.8926481008529663, "step": 2206 }, { "epoch": 0.18073682809732927, "grad_norm": 0.1552734375, "learning_rate": 2.296173129131878e-05, "loss": 0.8309744596481323, "step": 2207 }, { "epoch": 0.18081872063384824, "grad_norm": 0.1611328125, "learning_rate": 2.29607131120331e-05, "loss": 0.669938862323761, "step": 2208 }, { "epoch": 0.18090061317036718, "grad_norm": 0.1591796875, "learning_rate": 2.295969445634969e-05, "loss": 1.0254864692687988, "step": 2209 }, { "epoch": 0.18098250570688615, "grad_norm": 0.1591796875, "learning_rate": 2.2958675324312814e-05, "loss": 0.7999016046524048, "step": 2210 }, { "epoch": 0.1810643982434051, "grad_norm": 0.244140625, "learning_rate": 2.2957655715966778e-05, "loss": 1.1470184326171875, "step": 2211 }, { "epoch": 0.18114629077992406, "grad_norm": 0.1533203125, "learning_rate": 2.295663563135589e-05, "loss": 0.7634882926940918, "step": 2212 }, { "epoch": 0.181228183316443, "grad_norm": 0.1875, "learning_rate": 2.2955615070524494e-05, "loss": 0.7822745442390442, "step": 2213 }, { "epoch": 0.18131007585296194, "grad_norm": 0.1669921875, "learning_rate": 2.2954594033516937e-05, "loss": 0.6191521883010864, "step": 2214 }, { "epoch": 0.1813919683894809, "grad_norm": 0.25390625, "learning_rate": 2.2953572520377612e-05, "loss": 0.9916181564331055, "step": 2215 }, { "epoch": 0.18147386092599985, "grad_norm": 0.1845703125, "learning_rate": 2.2952550531150906e-05, "loss": 0.9677515029907227, "step": 2216 }, { "epoch": 0.18155575346251882, "grad_norm": 0.1787109375, "learning_rate": 2.2951528065881245e-05, "loss": 0.91322922706604, "step": 2217 }, { "epoch": 0.18163764599903776, "grad_norm": 0.1640625, "learning_rate": 2.295050512461307e-05, "loss": 0.7749010324478149, "step": 2218 }, { "epoch": 0.18171953853555672, "grad_norm": 0.232421875, "learning_rate": 2.294948170739084e-05, "loss": 0.6981696486473083, "step": 2219 }, { "epoch": 0.18180143107207566, "grad_norm": 0.1875, "learning_rate": 2.2948457814259035e-05, "loss": 0.7711498141288757, "step": 2220 }, { "epoch": 0.18188332360859463, "grad_norm": 0.1767578125, "learning_rate": 2.294743344526216e-05, "loss": 1.117043137550354, "step": 2221 }, { "epoch": 0.18196521614511357, "grad_norm": 0.1982421875, "learning_rate": 2.294640860044474e-05, "loss": 0.9432753324508667, "step": 2222 }, { "epoch": 0.18204710868163254, "grad_norm": 0.1455078125, "learning_rate": 2.294538327985132e-05, "loss": 0.634803056716919, "step": 2223 }, { "epoch": 0.18212900121815148, "grad_norm": 0.1552734375, "learning_rate": 2.2944357483526456e-05, "loss": 0.6533849835395813, "step": 2224 }, { "epoch": 0.18221089375467042, "grad_norm": 0.1708984375, "learning_rate": 2.2943331211514744e-05, "loss": 0.7691883444786072, "step": 2225 }, { "epoch": 0.1822927862911894, "grad_norm": 0.1826171875, "learning_rate": 2.2942304463860785e-05, "loss": 1.0226714611053467, "step": 2226 }, { "epoch": 0.18237467882770833, "grad_norm": 0.1513671875, "learning_rate": 2.2941277240609203e-05, "loss": 0.8920809626579285, "step": 2227 }, { "epoch": 0.1824565713642273, "grad_norm": 0.2001953125, "learning_rate": 2.294024954180465e-05, "loss": 0.744800329208374, "step": 2228 }, { "epoch": 0.18253846390074624, "grad_norm": 0.1669921875, "learning_rate": 2.2939221367491793e-05, "loss": 0.8038849830627441, "step": 2229 }, { "epoch": 0.1826203564372652, "grad_norm": 0.154296875, "learning_rate": 2.293819271771532e-05, "loss": 0.8700615167617798, "step": 2230 }, { "epoch": 0.18270224897378415, "grad_norm": 0.1787109375, "learning_rate": 2.2937163592519936e-05, "loss": 0.7021470069885254, "step": 2231 }, { "epoch": 0.18278414151030312, "grad_norm": 0.18359375, "learning_rate": 2.293613399195038e-05, "loss": 1.1559795141220093, "step": 2232 }, { "epoch": 0.18286603404682206, "grad_norm": 0.1650390625, "learning_rate": 2.29351039160514e-05, "loss": 0.7895053625106812, "step": 2233 }, { "epoch": 0.182947926583341, "grad_norm": 0.1640625, "learning_rate": 2.2934073364867764e-05, "loss": 0.825248658657074, "step": 2234 }, { "epoch": 0.18302981911985997, "grad_norm": 0.150390625, "learning_rate": 2.2933042338444263e-05, "loss": 1.064957857131958, "step": 2235 }, { "epoch": 0.1831117116563789, "grad_norm": 0.1953125, "learning_rate": 2.293201083682571e-05, "loss": 1.0424407720565796, "step": 2236 }, { "epoch": 0.18319360419289787, "grad_norm": 0.1845703125, "learning_rate": 2.2930978860056944e-05, "loss": 0.9745092391967773, "step": 2237 }, { "epoch": 0.18327549672941681, "grad_norm": 0.1953125, "learning_rate": 2.2929946408182817e-05, "loss": 0.7348380088806152, "step": 2238 }, { "epoch": 0.18335738926593578, "grad_norm": 0.185546875, "learning_rate": 2.29289134812482e-05, "loss": 0.8879343867301941, "step": 2239 }, { "epoch": 0.18343928180245472, "grad_norm": 0.142578125, "learning_rate": 2.2927880079297988e-05, "loss": 0.7733120322227478, "step": 2240 }, { "epoch": 0.1835211743389737, "grad_norm": 0.1650390625, "learning_rate": 2.29268462023771e-05, "loss": 0.7583624720573425, "step": 2241 }, { "epoch": 0.18360306687549263, "grad_norm": 0.1640625, "learning_rate": 2.2925811850530475e-05, "loss": 0.9134156703948975, "step": 2242 }, { "epoch": 0.1836849594120116, "grad_norm": 0.1630859375, "learning_rate": 2.292477702380306e-05, "loss": 0.7790586948394775, "step": 2243 }, { "epoch": 0.18376685194853054, "grad_norm": 0.177734375, "learning_rate": 2.292374172223985e-05, "loss": 0.8823917508125305, "step": 2244 }, { "epoch": 0.18384874448504948, "grad_norm": 0.19921875, "learning_rate": 2.2922705945885826e-05, "loss": 0.747046947479248, "step": 2245 }, { "epoch": 0.18393063702156845, "grad_norm": 0.18359375, "learning_rate": 2.2921669694786015e-05, "loss": 0.8483587503433228, "step": 2246 }, { "epoch": 0.1840125295580874, "grad_norm": 0.203125, "learning_rate": 2.2920632968985457e-05, "loss": 0.7791857123374939, "step": 2247 }, { "epoch": 0.18409442209460636, "grad_norm": 0.1513671875, "learning_rate": 2.291959576852921e-05, "loss": 0.819450855255127, "step": 2248 }, { "epoch": 0.1841763146311253, "grad_norm": 0.138671875, "learning_rate": 2.2918558093462357e-05, "loss": 0.5116016268730164, "step": 2249 }, { "epoch": 0.18425820716764427, "grad_norm": 0.19140625, "learning_rate": 2.291751994383e-05, "loss": 0.9610041379928589, "step": 2250 }, { "epoch": 0.1843400997041632, "grad_norm": 0.171875, "learning_rate": 2.291648131967726e-05, "loss": 0.8992875814437866, "step": 2251 }, { "epoch": 0.18442199224068218, "grad_norm": 0.197265625, "learning_rate": 2.291544222104928e-05, "loss": 0.8930961489677429, "step": 2252 }, { "epoch": 0.18450388477720112, "grad_norm": 0.212890625, "learning_rate": 2.2914402647991224e-05, "loss": 1.131935715675354, "step": 2253 }, { "epoch": 0.18458577731372008, "grad_norm": 0.1630859375, "learning_rate": 2.2913362600548273e-05, "loss": 0.9759795665740967, "step": 2254 }, { "epoch": 0.18466766985023902, "grad_norm": 0.181640625, "learning_rate": 2.291232207876564e-05, "loss": 1.1040501594543457, "step": 2255 }, { "epoch": 0.18474956238675797, "grad_norm": 0.1953125, "learning_rate": 2.291128108268854e-05, "loss": 0.8537464141845703, "step": 2256 }, { "epoch": 0.18483145492327693, "grad_norm": 0.1416015625, "learning_rate": 2.2910239612362225e-05, "loss": 0.8829268217086792, "step": 2257 }, { "epoch": 0.18491334745979587, "grad_norm": 0.1669921875, "learning_rate": 2.2909197667831963e-05, "loss": 0.8605987429618835, "step": 2258 }, { "epoch": 0.18499523999631484, "grad_norm": 0.1845703125, "learning_rate": 2.2908155249143038e-05, "loss": 1.1137299537658691, "step": 2259 }, { "epoch": 0.18507713253283378, "grad_norm": 0.1923828125, "learning_rate": 2.2907112356340756e-05, "loss": 1.041411280632019, "step": 2260 }, { "epoch": 0.18515902506935275, "grad_norm": 0.2021484375, "learning_rate": 2.2906068989470448e-05, "loss": 0.9275497794151306, "step": 2261 }, { "epoch": 0.1852409176058717, "grad_norm": 0.169921875, "learning_rate": 2.2905025148577467e-05, "loss": 0.9737173318862915, "step": 2262 }, { "epoch": 0.18532281014239066, "grad_norm": 0.1884765625, "learning_rate": 2.2903980833707172e-05, "loss": 1.1345387697219849, "step": 2263 }, { "epoch": 0.1854047026789096, "grad_norm": 0.1376953125, "learning_rate": 2.2902936044904963e-05, "loss": 0.5863505601882935, "step": 2264 }, { "epoch": 0.18548659521542854, "grad_norm": 0.140625, "learning_rate": 2.2901890782216252e-05, "loss": 0.8046703338623047, "step": 2265 }, { "epoch": 0.1855684877519475, "grad_norm": 0.1982421875, "learning_rate": 2.290084504568646e-05, "loss": 0.929973840713501, "step": 2266 }, { "epoch": 0.18565038028846645, "grad_norm": 0.220703125, "learning_rate": 2.289979883536105e-05, "loss": 1.036879062652588, "step": 2267 }, { "epoch": 0.18573227282498542, "grad_norm": 0.14453125, "learning_rate": 2.2898752151285485e-05, "loss": 0.45054754614830017, "step": 2268 }, { "epoch": 0.18581416536150436, "grad_norm": 0.1533203125, "learning_rate": 2.2897704993505266e-05, "loss": 0.8195974230766296, "step": 2269 }, { "epoch": 0.18589605789802333, "grad_norm": 0.1484375, "learning_rate": 2.28966573620659e-05, "loss": 0.941970705986023, "step": 2270 }, { "epoch": 0.18597795043454227, "grad_norm": 0.19140625, "learning_rate": 2.2895609257012926e-05, "loss": 0.7935249209403992, "step": 2271 }, { "epoch": 0.18605984297106123, "grad_norm": 0.1845703125, "learning_rate": 2.28945606783919e-05, "loss": 0.9533289074897766, "step": 2272 }, { "epoch": 0.18614173550758017, "grad_norm": 0.1640625, "learning_rate": 2.2893511626248396e-05, "loss": 0.830804169178009, "step": 2273 }, { "epoch": 0.18622362804409914, "grad_norm": 0.296875, "learning_rate": 2.2892462100628006e-05, "loss": 0.9977461099624634, "step": 2274 }, { "epoch": 0.18630552058061808, "grad_norm": 0.1982421875, "learning_rate": 2.2891412101576356e-05, "loss": 0.6901416182518005, "step": 2275 }, { "epoch": 0.18638741311713702, "grad_norm": 0.1923828125, "learning_rate": 2.289036162913907e-05, "loss": 1.090146541595459, "step": 2276 }, { "epoch": 0.186469305653656, "grad_norm": 0.142578125, "learning_rate": 2.288931068336182e-05, "loss": 0.8273465633392334, "step": 2277 }, { "epoch": 0.18655119819017493, "grad_norm": 0.1806640625, "learning_rate": 2.288825926429028e-05, "loss": 0.8297091126441956, "step": 2278 }, { "epoch": 0.1866330907266939, "grad_norm": 0.1572265625, "learning_rate": 2.288720737197014e-05, "loss": 0.6991950273513794, "step": 2279 }, { "epoch": 0.18671498326321284, "grad_norm": 0.15625, "learning_rate": 2.2886155006447125e-05, "loss": 0.8491441607475281, "step": 2280 }, { "epoch": 0.1867968757997318, "grad_norm": 0.1572265625, "learning_rate": 2.2885102167766985e-05, "loss": 0.8676788806915283, "step": 2281 }, { "epoch": 0.18687876833625075, "grad_norm": 0.166015625, "learning_rate": 2.2884048855975465e-05, "loss": 0.8516773581504822, "step": 2282 }, { "epoch": 0.18696066087276972, "grad_norm": 0.1982421875, "learning_rate": 2.2882995071118357e-05, "loss": 1.1552330255508423, "step": 2283 }, { "epoch": 0.18704255340928866, "grad_norm": 0.173828125, "learning_rate": 2.288194081324146e-05, "loss": 0.8443632125854492, "step": 2284 }, { "epoch": 0.18712444594580763, "grad_norm": 0.181640625, "learning_rate": 2.2880886082390595e-05, "loss": 0.8985230922698975, "step": 2285 }, { "epoch": 0.18720633848232657, "grad_norm": 0.169921875, "learning_rate": 2.2879830878611606e-05, "loss": 0.9845249652862549, "step": 2286 }, { "epoch": 0.1872882310188455, "grad_norm": 0.17578125, "learning_rate": 2.2878775201950354e-05, "loss": 0.6851732134819031, "step": 2287 }, { "epoch": 0.18737012355536448, "grad_norm": 0.15234375, "learning_rate": 2.287771905245273e-05, "loss": 0.8991727828979492, "step": 2288 }, { "epoch": 0.18745201609188342, "grad_norm": 0.19921875, "learning_rate": 2.287666243016463e-05, "loss": 0.8521350622177124, "step": 2289 }, { "epoch": 0.18753390862840238, "grad_norm": 0.1455078125, "learning_rate": 2.2875605335131986e-05, "loss": 0.8241181969642639, "step": 2290 }, { "epoch": 0.18761580116492133, "grad_norm": 0.1943359375, "learning_rate": 2.287454776740074e-05, "loss": 1.0478544235229492, "step": 2291 }, { "epoch": 0.1876976937014403, "grad_norm": 0.1669921875, "learning_rate": 2.2873489727016856e-05, "loss": 0.9343612194061279, "step": 2292 }, { "epoch": 0.18777958623795923, "grad_norm": 0.2314453125, "learning_rate": 2.287243121402633e-05, "loss": 0.8143721222877502, "step": 2293 }, { "epoch": 0.1878614787744782, "grad_norm": 0.177734375, "learning_rate": 2.287137222847516e-05, "loss": 0.730394721031189, "step": 2294 }, { "epoch": 0.18794337131099714, "grad_norm": 0.1728515625, "learning_rate": 2.2870312770409374e-05, "loss": 0.9779607653617859, "step": 2295 }, { "epoch": 0.18802526384751608, "grad_norm": 0.1552734375, "learning_rate": 2.2869252839875024e-05, "loss": 0.9047526121139526, "step": 2296 }, { "epoch": 0.18810715638403505, "grad_norm": 0.16015625, "learning_rate": 2.2868192436918182e-05, "loss": 0.6098130941390991, "step": 2297 }, { "epoch": 0.188189048920554, "grad_norm": 0.2197265625, "learning_rate": 2.2867131561584933e-05, "loss": 1.2453804016113281, "step": 2298 }, { "epoch": 0.18827094145707296, "grad_norm": 0.1943359375, "learning_rate": 2.2866070213921385e-05, "loss": 1.0133761167526245, "step": 2299 }, { "epoch": 0.1883528339935919, "grad_norm": 0.1669921875, "learning_rate": 2.2865008393973676e-05, "loss": 0.9602872133255005, "step": 2300 }, { "epoch": 0.18843472653011087, "grad_norm": 0.1640625, "learning_rate": 2.2863946101787948e-05, "loss": 0.8568207621574402, "step": 2301 }, { "epoch": 0.1885166190666298, "grad_norm": 0.1513671875, "learning_rate": 2.2862883337410378e-05, "loss": 0.7341924905776978, "step": 2302 }, { "epoch": 0.18859851160314878, "grad_norm": 0.1513671875, "learning_rate": 2.286182010088716e-05, "loss": 0.8664823770523071, "step": 2303 }, { "epoch": 0.18868040413966772, "grad_norm": 0.146484375, "learning_rate": 2.28607563922645e-05, "loss": 0.6416254043579102, "step": 2304 }, { "epoch": 0.18876229667618669, "grad_norm": 0.15234375, "learning_rate": 2.285969221158864e-05, "loss": 0.9313453435897827, "step": 2305 }, { "epoch": 0.18884418921270563, "grad_norm": 0.1943359375, "learning_rate": 2.2858627558905822e-05, "loss": 1.1781384944915771, "step": 2306 }, { "epoch": 0.18892608174922457, "grad_norm": 0.1796875, "learning_rate": 2.2857562434262332e-05, "loss": 0.8042875528335571, "step": 2307 }, { "epoch": 0.18900797428574354, "grad_norm": 0.177734375, "learning_rate": 2.2856496837704458e-05, "loss": 0.8714444041252136, "step": 2308 }, { "epoch": 0.18908986682226248, "grad_norm": 0.16796875, "learning_rate": 2.285543076927852e-05, "loss": 0.8938601613044739, "step": 2309 }, { "epoch": 0.18917175935878144, "grad_norm": 0.197265625, "learning_rate": 2.2854364229030845e-05, "loss": 1.0221461057662964, "step": 2310 }, { "epoch": 0.18925365189530038, "grad_norm": 0.1787109375, "learning_rate": 2.28532972170078e-05, "loss": 0.9000333547592163, "step": 2311 }, { "epoch": 0.18933554443181935, "grad_norm": 0.177734375, "learning_rate": 2.2852229733255755e-05, "loss": 0.7862361669540405, "step": 2312 }, { "epoch": 0.1894174369683383, "grad_norm": 0.1953125, "learning_rate": 2.2851161777821105e-05, "loss": 0.8476905822753906, "step": 2313 }, { "epoch": 0.18949932950485726, "grad_norm": 0.203125, "learning_rate": 2.2850093350750276e-05, "loss": 0.9489936232566833, "step": 2314 }, { "epoch": 0.1895812220413762, "grad_norm": 0.16796875, "learning_rate": 2.2849024452089697e-05, "loss": 0.9766147136688232, "step": 2315 }, { "epoch": 0.18966311457789517, "grad_norm": 0.16796875, "learning_rate": 2.284795508188584e-05, "loss": 0.9581578969955444, "step": 2316 }, { "epoch": 0.1897450071144141, "grad_norm": 0.201171875, "learning_rate": 2.284688524018517e-05, "loss": 0.9424863457679749, "step": 2317 }, { "epoch": 0.18982689965093305, "grad_norm": 0.177734375, "learning_rate": 2.284581492703419e-05, "loss": 0.9911003112792969, "step": 2318 }, { "epoch": 0.18990879218745202, "grad_norm": 0.166015625, "learning_rate": 2.2844744142479424e-05, "loss": 0.9128723740577698, "step": 2319 }, { "epoch": 0.18999068472397096, "grad_norm": 0.2236328125, "learning_rate": 2.2843672886567413e-05, "loss": 0.8795434236526489, "step": 2320 }, { "epoch": 0.19007257726048993, "grad_norm": 0.2099609375, "learning_rate": 2.284260115934472e-05, "loss": 0.9829942584037781, "step": 2321 }, { "epoch": 0.19015446979700887, "grad_norm": 0.1533203125, "learning_rate": 2.2841528960857917e-05, "loss": 0.7064555883407593, "step": 2322 }, { "epoch": 0.19023636233352784, "grad_norm": 0.2099609375, "learning_rate": 2.2840456291153612e-05, "loss": 1.0536891222000122, "step": 2323 }, { "epoch": 0.19031825487004678, "grad_norm": 0.17578125, "learning_rate": 2.283938315027843e-05, "loss": 1.0481301546096802, "step": 2324 }, { "epoch": 0.19040014740656575, "grad_norm": 0.1943359375, "learning_rate": 2.2838309538279015e-05, "loss": 0.758525013923645, "step": 2325 }, { "epoch": 0.19048203994308469, "grad_norm": 0.1826171875, "learning_rate": 2.2837235455202026e-05, "loss": 0.871162474155426, "step": 2326 }, { "epoch": 0.19056393247960365, "grad_norm": 0.16796875, "learning_rate": 2.2836160901094148e-05, "loss": 0.9626050591468811, "step": 2327 }, { "epoch": 0.1906458250161226, "grad_norm": 0.1611328125, "learning_rate": 2.2835085876002084e-05, "loss": 0.7131202816963196, "step": 2328 }, { "epoch": 0.19072771755264153, "grad_norm": 0.1748046875, "learning_rate": 2.2834010379972566e-05, "loss": 0.6760917901992798, "step": 2329 }, { "epoch": 0.1908096100891605, "grad_norm": 0.181640625, "learning_rate": 2.2832934413052333e-05, "loss": 0.9866670966148376, "step": 2330 }, { "epoch": 0.19089150262567944, "grad_norm": 0.1806640625, "learning_rate": 2.2831857975288155e-05, "loss": 1.0584834814071655, "step": 2331 }, { "epoch": 0.1909733951621984, "grad_norm": 0.169921875, "learning_rate": 2.2830781066726816e-05, "loss": 1.0270410776138306, "step": 2332 }, { "epoch": 0.19105528769871735, "grad_norm": 0.1796875, "learning_rate": 2.282970368741512e-05, "loss": 1.1342506408691406, "step": 2333 }, { "epoch": 0.19113718023523632, "grad_norm": 0.1728515625, "learning_rate": 2.2828625837399898e-05, "loss": 0.8717266321182251, "step": 2334 }, { "epoch": 0.19121907277175526, "grad_norm": 0.166015625, "learning_rate": 2.2827547516727997e-05, "loss": 0.6416990160942078, "step": 2335 }, { "epoch": 0.19130096530827423, "grad_norm": 0.19921875, "learning_rate": 2.2826468725446285e-05, "loss": 0.7129364013671875, "step": 2336 }, { "epoch": 0.19138285784479317, "grad_norm": 0.1640625, "learning_rate": 2.2825389463601654e-05, "loss": 0.745063066482544, "step": 2337 }, { "epoch": 0.1914647503813121, "grad_norm": 0.1484375, "learning_rate": 2.2824309731241008e-05, "loss": 0.6407421231269836, "step": 2338 }, { "epoch": 0.19154664291783108, "grad_norm": 0.1396484375, "learning_rate": 2.282322952841128e-05, "loss": 0.6431829333305359, "step": 2339 }, { "epoch": 0.19162853545435002, "grad_norm": 0.1806640625, "learning_rate": 2.282214885515942e-05, "loss": 0.6795263886451721, "step": 2340 }, { "epoch": 0.191710427990869, "grad_norm": 0.169921875, "learning_rate": 2.28210677115324e-05, "loss": 0.9546639919281006, "step": 2341 }, { "epoch": 0.19179232052738793, "grad_norm": 0.1513671875, "learning_rate": 2.2819986097577203e-05, "loss": 0.6644640564918518, "step": 2342 }, { "epoch": 0.1918742130639069, "grad_norm": 0.16015625, "learning_rate": 2.2818904013340847e-05, "loss": 0.7293444275856018, "step": 2343 }, { "epoch": 0.19195610560042584, "grad_norm": 0.193359375, "learning_rate": 2.2817821458870366e-05, "loss": 1.1363742351531982, "step": 2344 }, { "epoch": 0.1920379981369448, "grad_norm": 0.1884765625, "learning_rate": 2.2816738434212808e-05, "loss": 0.7027411460876465, "step": 2345 }, { "epoch": 0.19211989067346374, "grad_norm": 0.166015625, "learning_rate": 2.2815654939415243e-05, "loss": 0.8657201528549194, "step": 2346 }, { "epoch": 0.1922017832099827, "grad_norm": 0.1962890625, "learning_rate": 2.2814570974524773e-05, "loss": 0.7708097696304321, "step": 2347 }, { "epoch": 0.19228367574650165, "grad_norm": 0.205078125, "learning_rate": 2.2813486539588505e-05, "loss": 1.0157450437545776, "step": 2348 }, { "epoch": 0.1923655682830206, "grad_norm": 0.16015625, "learning_rate": 2.2812401634653572e-05, "loss": 1.0896971225738525, "step": 2349 }, { "epoch": 0.19244746081953956, "grad_norm": 0.2216796875, "learning_rate": 2.2811316259767136e-05, "loss": 0.7974168658256531, "step": 2350 }, { "epoch": 0.1925293533560585, "grad_norm": 0.16015625, "learning_rate": 2.281023041497636e-05, "loss": 1.0025204420089722, "step": 2351 }, { "epoch": 0.19261124589257747, "grad_norm": 0.15234375, "learning_rate": 2.2809144100328452e-05, "loss": 1.0021412372589111, "step": 2352 }, { "epoch": 0.1926931384290964, "grad_norm": 0.1787109375, "learning_rate": 2.280805731587062e-05, "loss": 0.6389228105545044, "step": 2353 }, { "epoch": 0.19277503096561538, "grad_norm": 0.1708984375, "learning_rate": 2.2806970061650102e-05, "loss": 1.2039275169372559, "step": 2354 }, { "epoch": 0.19285692350213432, "grad_norm": 0.177734375, "learning_rate": 2.2805882337714152e-05, "loss": 0.743495523929596, "step": 2355 }, { "epoch": 0.1929388160386533, "grad_norm": 0.1982421875, "learning_rate": 2.280479414411005e-05, "loss": 0.9987788200378418, "step": 2356 }, { "epoch": 0.19302070857517223, "grad_norm": 0.169921875, "learning_rate": 2.2803705480885094e-05, "loss": 0.7725076079368591, "step": 2357 }, { "epoch": 0.1931026011116912, "grad_norm": 0.1728515625, "learning_rate": 2.2802616348086602e-05, "loss": 0.9377445578575134, "step": 2358 }, { "epoch": 0.19318449364821014, "grad_norm": 0.1953125, "learning_rate": 2.280152674576191e-05, "loss": 0.9206058382987976, "step": 2359 }, { "epoch": 0.19326638618472908, "grad_norm": 0.28515625, "learning_rate": 2.2800436673958378e-05, "loss": 0.8345106244087219, "step": 2360 }, { "epoch": 0.19334827872124805, "grad_norm": 0.140625, "learning_rate": 2.2799346132723384e-05, "loss": 0.7060814499855042, "step": 2361 }, { "epoch": 0.193430171257767, "grad_norm": 0.1845703125, "learning_rate": 2.2798255122104324e-05, "loss": 0.9959428310394287, "step": 2362 }, { "epoch": 0.19351206379428595, "grad_norm": 0.177734375, "learning_rate": 2.2797163642148628e-05, "loss": 0.9122245907783508, "step": 2363 }, { "epoch": 0.1935939563308049, "grad_norm": 0.1689453125, "learning_rate": 2.2796071692903726e-05, "loss": 0.9186090230941772, "step": 2364 }, { "epoch": 0.19367584886732386, "grad_norm": 0.16796875, "learning_rate": 2.2794979274417086e-05, "loss": 0.7945417761802673, "step": 2365 }, { "epoch": 0.1937577414038428, "grad_norm": 0.171875, "learning_rate": 2.279388638673618e-05, "loss": 0.7534151673316956, "step": 2366 }, { "epoch": 0.19383963394036177, "grad_norm": 0.1865234375, "learning_rate": 2.2792793029908517e-05, "loss": 0.8852794170379639, "step": 2367 }, { "epoch": 0.1939215264768807, "grad_norm": 0.171875, "learning_rate": 2.279169920398162e-05, "loss": 0.87395840883255, "step": 2368 }, { "epoch": 0.19400341901339965, "grad_norm": 0.173828125, "learning_rate": 2.2790604909003025e-05, "loss": 1.0682847499847412, "step": 2369 }, { "epoch": 0.19408531154991862, "grad_norm": 0.16796875, "learning_rate": 2.2789510145020297e-05, "loss": 0.6849828362464905, "step": 2370 }, { "epoch": 0.19416720408643756, "grad_norm": 0.19140625, "learning_rate": 2.2788414912081017e-05, "loss": 0.8124204874038696, "step": 2371 }, { "epoch": 0.19424909662295653, "grad_norm": 0.1611328125, "learning_rate": 2.2787319210232795e-05, "loss": 0.7958555221557617, "step": 2372 }, { "epoch": 0.19433098915947547, "grad_norm": 0.19921875, "learning_rate": 2.278622303952325e-05, "loss": 1.0223562717437744, "step": 2373 }, { "epoch": 0.19441288169599444, "grad_norm": 0.208984375, "learning_rate": 2.2785126400000028e-05, "loss": 0.8524122834205627, "step": 2374 }, { "epoch": 0.19449477423251338, "grad_norm": 0.150390625, "learning_rate": 2.2784029291710785e-05, "loss": 0.9349551200866699, "step": 2375 }, { "epoch": 0.19457666676903235, "grad_norm": 0.181640625, "learning_rate": 2.278293171470322e-05, "loss": 1.0573779344558716, "step": 2376 }, { "epoch": 0.1946585593055513, "grad_norm": 0.2119140625, "learning_rate": 2.2781833669025026e-05, "loss": 1.039783000946045, "step": 2377 }, { "epoch": 0.19474045184207026, "grad_norm": 0.1689453125, "learning_rate": 2.2780735154723937e-05, "loss": 1.1323715448379517, "step": 2378 }, { "epoch": 0.1948223443785892, "grad_norm": 0.1689453125, "learning_rate": 2.2779636171847694e-05, "loss": 0.8134557008743286, "step": 2379 }, { "epoch": 0.19490423691510814, "grad_norm": 0.1513671875, "learning_rate": 2.2778536720444067e-05, "loss": 0.6291362643241882, "step": 2380 }, { "epoch": 0.1949861294516271, "grad_norm": 0.1943359375, "learning_rate": 2.277743680056084e-05, "loss": 0.7305190563201904, "step": 2381 }, { "epoch": 0.19506802198814605, "grad_norm": 0.146484375, "learning_rate": 2.277633641224582e-05, "loss": 0.9288169145584106, "step": 2382 }, { "epoch": 0.195149914524665, "grad_norm": 0.19921875, "learning_rate": 2.2775235555546834e-05, "loss": 0.918010950088501, "step": 2383 }, { "epoch": 0.19523180706118395, "grad_norm": 0.1806640625, "learning_rate": 2.2774134230511728e-05, "loss": 0.7995038032531738, "step": 2384 }, { "epoch": 0.19531369959770292, "grad_norm": 0.1650390625, "learning_rate": 2.277303243718838e-05, "loss": 0.9856702089309692, "step": 2385 }, { "epoch": 0.19539559213422186, "grad_norm": 0.1923828125, "learning_rate": 2.2771930175624665e-05, "loss": 1.0170800685882568, "step": 2386 }, { "epoch": 0.19547748467074083, "grad_norm": 0.19140625, "learning_rate": 2.2770827445868504e-05, "loss": 0.7401501536369324, "step": 2387 }, { "epoch": 0.19555937720725977, "grad_norm": 0.173828125, "learning_rate": 2.2769724247967817e-05, "loss": 0.7248157858848572, "step": 2388 }, { "epoch": 0.19564126974377874, "grad_norm": 0.1611328125, "learning_rate": 2.2768620581970557e-05, "loss": 0.8135899901390076, "step": 2389 }, { "epoch": 0.19572316228029768, "grad_norm": 0.1552734375, "learning_rate": 2.2767516447924692e-05, "loss": 0.591747522354126, "step": 2390 }, { "epoch": 0.19580505481681662, "grad_norm": 0.1611328125, "learning_rate": 2.2766411845878217e-05, "loss": 0.9034044146537781, "step": 2391 }, { "epoch": 0.1958869473533356, "grad_norm": 0.1708984375, "learning_rate": 2.276530677587914e-05, "loss": 0.6724435091018677, "step": 2392 }, { "epoch": 0.19596883988985453, "grad_norm": 0.1923828125, "learning_rate": 2.2764201237975493e-05, "loss": 0.8551095724105835, "step": 2393 }, { "epoch": 0.1960507324263735, "grad_norm": 0.171875, "learning_rate": 2.276309523221532e-05, "loss": 0.6165861487388611, "step": 2394 }, { "epoch": 0.19613262496289244, "grad_norm": 0.166015625, "learning_rate": 2.2761988758646703e-05, "loss": 1.0519481897354126, "step": 2395 }, { "epoch": 0.1962145174994114, "grad_norm": 0.2216796875, "learning_rate": 2.276088181731773e-05, "loss": 0.8137823939323425, "step": 2396 }, { "epoch": 0.19629641003593035, "grad_norm": 0.16796875, "learning_rate": 2.275977440827651e-05, "loss": 1.119165301322937, "step": 2397 }, { "epoch": 0.19637830257244931, "grad_norm": 0.150390625, "learning_rate": 2.2758666531571176e-05, "loss": 0.9124765396118164, "step": 2398 }, { "epoch": 0.19646019510896826, "grad_norm": 0.193359375, "learning_rate": 2.2757558187249883e-05, "loss": 0.7939457893371582, "step": 2399 }, { "epoch": 0.1965420876454872, "grad_norm": 0.169921875, "learning_rate": 2.275644937536081e-05, "loss": 0.5007310509681702, "step": 2400 }, { "epoch": 0.19662398018200616, "grad_norm": 0.14453125, "learning_rate": 2.275534009595214e-05, "loss": 0.7983091473579407, "step": 2401 }, { "epoch": 0.1967058727185251, "grad_norm": 0.2412109375, "learning_rate": 2.275423034907209e-05, "loss": 0.6883283257484436, "step": 2402 }, { "epoch": 0.19678776525504407, "grad_norm": 0.2041015625, "learning_rate": 2.2753120134768897e-05, "loss": 0.8722920417785645, "step": 2403 }, { "epoch": 0.196869657791563, "grad_norm": 0.16796875, "learning_rate": 2.2752009453090815e-05, "loss": 0.9087334871292114, "step": 2404 }, { "epoch": 0.19695155032808198, "grad_norm": 0.1748046875, "learning_rate": 2.275089830408612e-05, "loss": 0.8070912957191467, "step": 2405 }, { "epoch": 0.19703344286460092, "grad_norm": 0.1806640625, "learning_rate": 2.2749786687803098e-05, "loss": 0.6768667697906494, "step": 2406 }, { "epoch": 0.1971153354011199, "grad_norm": 0.1884765625, "learning_rate": 2.2748674604290076e-05, "loss": 0.9066121578216553, "step": 2407 }, { "epoch": 0.19719722793763883, "grad_norm": 0.169921875, "learning_rate": 2.2747562053595388e-05, "loss": 0.799535870552063, "step": 2408 }, { "epoch": 0.1972791204741578, "grad_norm": 0.1640625, "learning_rate": 2.2746449035767383e-05, "loss": 0.7664024233818054, "step": 2409 }, { "epoch": 0.19736101301067674, "grad_norm": 0.203125, "learning_rate": 2.2745335550854443e-05, "loss": 0.8135346174240112, "step": 2410 }, { "epoch": 0.19744290554719568, "grad_norm": 0.2001953125, "learning_rate": 2.2744221598904965e-05, "loss": 1.1084506511688232, "step": 2411 }, { "epoch": 0.19752479808371465, "grad_norm": 0.154296875, "learning_rate": 2.274310717996736e-05, "loss": 0.977137565612793, "step": 2412 }, { "epoch": 0.1976066906202336, "grad_norm": 0.14453125, "learning_rate": 2.274199229409007e-05, "loss": 0.7611242532730103, "step": 2413 }, { "epoch": 0.19768858315675256, "grad_norm": 0.1767578125, "learning_rate": 2.2740876941321554e-05, "loss": 0.8063002824783325, "step": 2414 }, { "epoch": 0.1977704756932715, "grad_norm": 0.21484375, "learning_rate": 2.2739761121710288e-05, "loss": 0.8747677803039551, "step": 2415 }, { "epoch": 0.19785236822979047, "grad_norm": 0.1455078125, "learning_rate": 2.273864483530477e-05, "loss": 0.883658230304718, "step": 2416 }, { "epoch": 0.1979342607663094, "grad_norm": 0.1962890625, "learning_rate": 2.273752808215352e-05, "loss": 0.707085907459259, "step": 2417 }, { "epoch": 0.19801615330282837, "grad_norm": 0.1826171875, "learning_rate": 2.273641086230507e-05, "loss": 1.0529742240905762, "step": 2418 }, { "epoch": 0.19809804583934731, "grad_norm": 0.1806640625, "learning_rate": 2.2735293175807987e-05, "loss": 0.8960705995559692, "step": 2419 }, { "epoch": 0.19817993837586628, "grad_norm": 0.1748046875, "learning_rate": 2.2734175022710845e-05, "loss": 0.992230236530304, "step": 2420 }, { "epoch": 0.19826183091238522, "grad_norm": 0.1669921875, "learning_rate": 2.2733056403062245e-05, "loss": 0.6943228840827942, "step": 2421 }, { "epoch": 0.19834372344890416, "grad_norm": 0.1748046875, "learning_rate": 2.273193731691081e-05, "loss": 0.829267144203186, "step": 2422 }, { "epoch": 0.19842561598542313, "grad_norm": 0.166015625, "learning_rate": 2.2730817764305176e-05, "loss": 1.0084446668624878, "step": 2423 }, { "epoch": 0.19850750852194207, "grad_norm": 0.1787109375, "learning_rate": 2.2729697745294006e-05, "loss": 0.667164146900177, "step": 2424 }, { "epoch": 0.19858940105846104, "grad_norm": 0.1611328125, "learning_rate": 2.272857725992598e-05, "loss": 0.8183192014694214, "step": 2425 }, { "epoch": 0.19867129359497998, "grad_norm": 0.158203125, "learning_rate": 2.27274563082498e-05, "loss": 0.924652099609375, "step": 2426 }, { "epoch": 0.19875318613149895, "grad_norm": 0.1982421875, "learning_rate": 2.2726334890314182e-05, "loss": 0.9087849259376526, "step": 2427 }, { "epoch": 0.1988350786680179, "grad_norm": 0.1611328125, "learning_rate": 2.2725213006167873e-05, "loss": 1.170980453491211, "step": 2428 }, { "epoch": 0.19891697120453686, "grad_norm": 0.1962890625, "learning_rate": 2.272409065585963e-05, "loss": 0.8187077045440674, "step": 2429 }, { "epoch": 0.1989988637410558, "grad_norm": 0.1728515625, "learning_rate": 2.2722967839438242e-05, "loss": 0.9408527612686157, "step": 2430 }, { "epoch": 0.19908075627757477, "grad_norm": 0.404296875, "learning_rate": 2.2721844556952505e-05, "loss": 0.669274091720581, "step": 2431 }, { "epoch": 0.1991626488140937, "grad_norm": 0.1787109375, "learning_rate": 2.2720720808451246e-05, "loss": 0.7295891046524048, "step": 2432 }, { "epoch": 0.19924454135061265, "grad_norm": 0.169921875, "learning_rate": 2.2719596593983302e-05, "loss": 0.9046094417572021, "step": 2433 }, { "epoch": 0.19932643388713162, "grad_norm": 0.1640625, "learning_rate": 2.271847191359754e-05, "loss": 0.8711844682693481, "step": 2434 }, { "epoch": 0.19940832642365056, "grad_norm": 0.1845703125, "learning_rate": 2.2717346767342843e-05, "loss": 0.6914234757423401, "step": 2435 }, { "epoch": 0.19949021896016952, "grad_norm": 0.2158203125, "learning_rate": 2.2716221155268114e-05, "loss": 1.1778360605239868, "step": 2436 }, { "epoch": 0.19957211149668846, "grad_norm": 0.1767578125, "learning_rate": 2.271509507742228e-05, "loss": 0.7482850551605225, "step": 2437 }, { "epoch": 0.19965400403320743, "grad_norm": 0.1796875, "learning_rate": 2.2713968533854274e-05, "loss": 0.8816819787025452, "step": 2438 }, { "epoch": 0.19973589656972637, "grad_norm": 0.1591796875, "learning_rate": 2.2712841524613074e-05, "loss": 0.7633107900619507, "step": 2439 }, { "epoch": 0.19981778910624534, "grad_norm": 0.173828125, "learning_rate": 2.2711714049747658e-05, "loss": 0.8391861915588379, "step": 2440 }, { "epoch": 0.19989968164276428, "grad_norm": 0.142578125, "learning_rate": 2.271058610930703e-05, "loss": 0.6692081093788147, "step": 2441 }, { "epoch": 0.19998157417928322, "grad_norm": 0.173828125, "learning_rate": 2.2709457703340217e-05, "loss": 0.9222955107688904, "step": 2442 }, { "epoch": 0.2000634667158022, "grad_norm": 0.1591796875, "learning_rate": 2.270832883189626e-05, "loss": 0.5949663519859314, "step": 2443 }, { "epoch": 0.20014535925232113, "grad_norm": 0.1826171875, "learning_rate": 2.270719949502423e-05, "loss": 1.123129963874817, "step": 2444 }, { "epoch": 0.2002272517888401, "grad_norm": 0.1884765625, "learning_rate": 2.270606969277321e-05, "loss": 1.356101632118225, "step": 2445 }, { "epoch": 0.20030914432535904, "grad_norm": 0.173828125, "learning_rate": 2.2704939425192305e-05, "loss": 0.9444726705551147, "step": 2446 }, { "epoch": 0.200391036861878, "grad_norm": 0.1689453125, "learning_rate": 2.2703808692330643e-05, "loss": 0.8924837708473206, "step": 2447 }, { "epoch": 0.20047292939839695, "grad_norm": 0.1552734375, "learning_rate": 2.270267749423737e-05, "loss": 0.8812747597694397, "step": 2448 }, { "epoch": 0.20055482193491592, "grad_norm": 0.14453125, "learning_rate": 2.270154583096165e-05, "loss": 0.64066481590271, "step": 2449 }, { "epoch": 0.20063671447143486, "grad_norm": 0.1513671875, "learning_rate": 2.2700413702552673e-05, "loss": 0.7396572232246399, "step": 2450 }, { "epoch": 0.20071860700795383, "grad_norm": 0.1630859375, "learning_rate": 2.269928110905965e-05, "loss": 0.847211480140686, "step": 2451 }, { "epoch": 0.20080049954447277, "grad_norm": 0.1923828125, "learning_rate": 2.2698148050531794e-05, "loss": 0.7291325330734253, "step": 2452 }, { "epoch": 0.2008823920809917, "grad_norm": 0.1865234375, "learning_rate": 2.2697014527018365e-05, "loss": 0.7133866548538208, "step": 2453 }, { "epoch": 0.20096428461751067, "grad_norm": 0.162109375, "learning_rate": 2.269588053856863e-05, "loss": 0.984965980052948, "step": 2454 }, { "epoch": 0.20104617715402961, "grad_norm": 0.1845703125, "learning_rate": 2.269474608523187e-05, "loss": 0.935857355594635, "step": 2455 }, { "epoch": 0.20112806969054858, "grad_norm": 0.1904296875, "learning_rate": 2.2693611167057397e-05, "loss": 0.7755582928657532, "step": 2456 }, { "epoch": 0.20120996222706752, "grad_norm": 0.177734375, "learning_rate": 2.269247578409454e-05, "loss": 0.7424017190933228, "step": 2457 }, { "epoch": 0.2012918547635865, "grad_norm": 0.1708984375, "learning_rate": 2.2691339936392644e-05, "loss": 0.7800498604774475, "step": 2458 }, { "epoch": 0.20137374730010543, "grad_norm": 0.16796875, "learning_rate": 2.269020362400108e-05, "loss": 0.6788142919540405, "step": 2459 }, { "epoch": 0.2014556398366244, "grad_norm": 0.1728515625, "learning_rate": 2.2689066846969243e-05, "loss": 0.755730390548706, "step": 2460 }, { "epoch": 0.20153753237314334, "grad_norm": 0.20703125, "learning_rate": 2.268792960534653e-05, "loss": 0.8113637566566467, "step": 2461 }, { "epoch": 0.2016194249096623, "grad_norm": 0.1884765625, "learning_rate": 2.2686791899182377e-05, "loss": 1.0466136932373047, "step": 2462 }, { "epoch": 0.20170131744618125, "grad_norm": 0.15234375, "learning_rate": 2.268565372852623e-05, "loss": 0.8877158761024475, "step": 2463 }, { "epoch": 0.2017832099827002, "grad_norm": 0.2099609375, "learning_rate": 2.2684515093427565e-05, "loss": 0.8467962145805359, "step": 2464 }, { "epoch": 0.20186510251921916, "grad_norm": 0.1767578125, "learning_rate": 2.268337599393587e-05, "loss": 0.807767927646637, "step": 2465 }, { "epoch": 0.2019469950557381, "grad_norm": 0.2109375, "learning_rate": 2.268223643010065e-05, "loss": 0.7990671992301941, "step": 2466 }, { "epoch": 0.20202888759225707, "grad_norm": 0.1826171875, "learning_rate": 2.2681096401971433e-05, "loss": 0.8404882550239563, "step": 2467 }, { "epoch": 0.202110780128776, "grad_norm": 0.1640625, "learning_rate": 2.267995590959778e-05, "loss": 0.9492491483688354, "step": 2468 }, { "epoch": 0.20219267266529498, "grad_norm": 0.1650390625, "learning_rate": 2.2678814953029253e-05, "loss": 0.8310052156448364, "step": 2469 }, { "epoch": 0.20227456520181392, "grad_norm": 0.2109375, "learning_rate": 2.2677673532315448e-05, "loss": 0.7433747053146362, "step": 2470 }, { "epoch": 0.20235645773833288, "grad_norm": 0.1787109375, "learning_rate": 2.2676531647505968e-05, "loss": 0.8398047089576721, "step": 2471 }, { "epoch": 0.20243835027485182, "grad_norm": 0.1865234375, "learning_rate": 2.2675389298650454e-05, "loss": 0.6458292603492737, "step": 2472 }, { "epoch": 0.20252024281137077, "grad_norm": 0.208984375, "learning_rate": 2.2674246485798552e-05, "loss": 1.076598882675171, "step": 2473 }, { "epoch": 0.20260213534788973, "grad_norm": 0.267578125, "learning_rate": 2.267310320899993e-05, "loss": 0.8773399591445923, "step": 2474 }, { "epoch": 0.20268402788440867, "grad_norm": 0.1806640625, "learning_rate": 2.267195946830429e-05, "loss": 0.9287348389625549, "step": 2475 }, { "epoch": 0.20276592042092764, "grad_norm": 0.1787109375, "learning_rate": 2.2670815263761334e-05, "loss": 0.9702558517456055, "step": 2476 }, { "epoch": 0.20284781295744658, "grad_norm": 0.1787109375, "learning_rate": 2.2669670595420793e-05, "loss": 0.705794632434845, "step": 2477 }, { "epoch": 0.20292970549396555, "grad_norm": 0.1923828125, "learning_rate": 2.2668525463332426e-05, "loss": 1.1086801290512085, "step": 2478 }, { "epoch": 0.2030115980304845, "grad_norm": 0.1875, "learning_rate": 2.2667379867546003e-05, "loss": 0.732354998588562, "step": 2479 }, { "epoch": 0.20309349056700346, "grad_norm": 0.1865234375, "learning_rate": 2.266623380811131e-05, "loss": 0.9052433371543884, "step": 2480 }, { "epoch": 0.2031753831035224, "grad_norm": 0.1484375, "learning_rate": 2.2665087285078166e-05, "loss": 0.6810624599456787, "step": 2481 }, { "epoch": 0.20325727564004137, "grad_norm": 0.1708984375, "learning_rate": 2.2663940298496407e-05, "loss": 1.0754530429840088, "step": 2482 }, { "epoch": 0.2033391681765603, "grad_norm": 0.173828125, "learning_rate": 2.266279284841588e-05, "loss": 0.7184799909591675, "step": 2483 }, { "epoch": 0.20342106071307925, "grad_norm": 0.181640625, "learning_rate": 2.2661644934886456e-05, "loss": 0.6900075674057007, "step": 2484 }, { "epoch": 0.20350295324959822, "grad_norm": 0.166015625, "learning_rate": 2.2660496557958038e-05, "loss": 0.9767782092094421, "step": 2485 }, { "epoch": 0.20358484578611716, "grad_norm": 0.171875, "learning_rate": 2.2659347717680523e-05, "loss": 1.0323313474655151, "step": 2486 }, { "epoch": 0.20366673832263613, "grad_norm": 0.1953125, "learning_rate": 2.2658198414103858e-05, "loss": 1.0238064527511597, "step": 2487 }, { "epoch": 0.20374863085915507, "grad_norm": 0.158203125, "learning_rate": 2.2657048647277992e-05, "loss": 0.9866200685501099, "step": 2488 }, { "epoch": 0.20383052339567403, "grad_norm": 0.33203125, "learning_rate": 2.2655898417252898e-05, "loss": 0.993322491645813, "step": 2489 }, { "epoch": 0.20391241593219298, "grad_norm": 0.1796875, "learning_rate": 2.265474772407857e-05, "loss": 0.8741251826286316, "step": 2490 }, { "epoch": 0.20399430846871194, "grad_norm": 0.1669921875, "learning_rate": 2.2653596567805025e-05, "loss": 0.7923253178596497, "step": 2491 }, { "epoch": 0.20407620100523088, "grad_norm": 0.2412109375, "learning_rate": 2.2652444948482287e-05, "loss": 1.2638981342315674, "step": 2492 }, { "epoch": 0.20415809354174985, "grad_norm": 0.1513671875, "learning_rate": 2.2651292866160424e-05, "loss": 0.9013453125953674, "step": 2493 }, { "epoch": 0.2042399860782688, "grad_norm": 0.171875, "learning_rate": 2.2650140320889502e-05, "loss": 0.840977132320404, "step": 2494 }, { "epoch": 0.20432187861478773, "grad_norm": 0.1982421875, "learning_rate": 2.2648987312719618e-05, "loss": 0.810945987701416, "step": 2495 }, { "epoch": 0.2044037711513067, "grad_norm": 0.208984375, "learning_rate": 2.2647833841700886e-05, "loss": 0.9737492799758911, "step": 2496 }, { "epoch": 0.20448566368782564, "grad_norm": 0.1787109375, "learning_rate": 2.264667990788344e-05, "loss": 0.8026618361473083, "step": 2497 }, { "epoch": 0.2045675562243446, "grad_norm": 0.185546875, "learning_rate": 2.264552551131743e-05, "loss": 0.8607867956161499, "step": 2498 }, { "epoch": 0.20464944876086355, "grad_norm": 0.1728515625, "learning_rate": 2.264437065205304e-05, "loss": 0.7500647902488708, "step": 2499 }, { "epoch": 0.20473134129738252, "grad_norm": 0.1396484375, "learning_rate": 2.264321533014046e-05, "loss": 0.6665752530097961, "step": 2500 }, { "epoch": 0.20481323383390146, "grad_norm": 0.1787109375, "learning_rate": 2.2642059545629903e-05, "loss": 1.1682158708572388, "step": 2501 }, { "epoch": 0.20489512637042043, "grad_norm": 0.1591796875, "learning_rate": 2.2640903298571606e-05, "loss": 0.8135837912559509, "step": 2502 }, { "epoch": 0.20497701890693937, "grad_norm": 0.1982421875, "learning_rate": 2.2639746589015827e-05, "loss": 0.5252673029899597, "step": 2503 }, { "epoch": 0.2050589114434583, "grad_norm": 0.1689453125, "learning_rate": 2.263858941701284e-05, "loss": 0.734936535358429, "step": 2504 }, { "epoch": 0.20514080397997728, "grad_norm": 0.1474609375, "learning_rate": 2.263743178261294e-05, "loss": 0.7587753534317017, "step": 2505 }, { "epoch": 0.20522269651649622, "grad_norm": 0.2001953125, "learning_rate": 2.2636273685866445e-05, "loss": 0.9108928442001343, "step": 2506 }, { "epoch": 0.20530458905301519, "grad_norm": 0.185546875, "learning_rate": 2.2635115126823682e-05, "loss": 0.8398345708847046, "step": 2507 }, { "epoch": 0.20538648158953413, "grad_norm": 0.208984375, "learning_rate": 2.263395610553502e-05, "loss": 0.6165756583213806, "step": 2508 }, { "epoch": 0.2054683741260531, "grad_norm": 0.1708984375, "learning_rate": 2.263279662205082e-05, "loss": 1.147895336151123, "step": 2509 }, { "epoch": 0.20555026666257203, "grad_norm": 0.169921875, "learning_rate": 2.2631636676421488e-05, "loss": 0.7744183540344238, "step": 2510 }, { "epoch": 0.205632159199091, "grad_norm": 0.1640625, "learning_rate": 2.263047626869744e-05, "loss": 0.6623430252075195, "step": 2511 }, { "epoch": 0.20571405173560994, "grad_norm": 0.1669921875, "learning_rate": 2.2629315398929108e-05, "loss": 0.7683284282684326, "step": 2512 }, { "epoch": 0.2057959442721289, "grad_norm": 0.1669921875, "learning_rate": 2.262815406716695e-05, "loss": 1.0426021814346313, "step": 2513 }, { "epoch": 0.20587783680864785, "grad_norm": 0.1982421875, "learning_rate": 2.262699227346144e-05, "loss": 1.0416150093078613, "step": 2514 }, { "epoch": 0.2059597293451668, "grad_norm": 0.146484375, "learning_rate": 2.262583001786308e-05, "loss": 0.6190228462219238, "step": 2515 }, { "epoch": 0.20604162188168576, "grad_norm": 0.1865234375, "learning_rate": 2.262466730042238e-05, "loss": 0.8024833798408508, "step": 2516 }, { "epoch": 0.2061235144182047, "grad_norm": 0.1572265625, "learning_rate": 2.262350412118988e-05, "loss": 0.9323862195014954, "step": 2517 }, { "epoch": 0.20620540695472367, "grad_norm": 0.1650390625, "learning_rate": 2.262234048021614e-05, "loss": 0.9894610047340393, "step": 2518 }, { "epoch": 0.2062872994912426, "grad_norm": 0.1591796875, "learning_rate": 2.2621176377551726e-05, "loss": 0.9465169906616211, "step": 2519 }, { "epoch": 0.20636919202776158, "grad_norm": 0.2470703125, "learning_rate": 2.2620011813247246e-05, "loss": 0.8565041422843933, "step": 2520 }, { "epoch": 0.20645108456428052, "grad_norm": 0.1455078125, "learning_rate": 2.261884678735331e-05, "loss": 0.7899689674377441, "step": 2521 }, { "epoch": 0.2065329771007995, "grad_norm": 0.140625, "learning_rate": 2.2617681299920557e-05, "loss": 0.6901161670684814, "step": 2522 }, { "epoch": 0.20661486963731843, "grad_norm": 0.173828125, "learning_rate": 2.2616515350999642e-05, "loss": 1.0179699659347534, "step": 2523 }, { "epoch": 0.2066967621738374, "grad_norm": 0.2099609375, "learning_rate": 2.2615348940641246e-05, "loss": 1.3159328699111938, "step": 2524 }, { "epoch": 0.20677865471035634, "grad_norm": 0.18359375, "learning_rate": 2.2614182068896063e-05, "loss": 0.6894040703773499, "step": 2525 }, { "epoch": 0.20686054724687528, "grad_norm": 0.1787109375, "learning_rate": 2.261301473581481e-05, "loss": 0.9576308131217957, "step": 2526 }, { "epoch": 0.20694243978339424, "grad_norm": 0.169921875, "learning_rate": 2.2611846941448225e-05, "loss": 0.7722223997116089, "step": 2527 }, { "epoch": 0.20702433231991318, "grad_norm": 0.1806640625, "learning_rate": 2.2610678685847067e-05, "loss": 1.1833792924880981, "step": 2528 }, { "epoch": 0.20710622485643215, "grad_norm": 0.18359375, "learning_rate": 2.260950996906211e-05, "loss": 0.7832878232002258, "step": 2529 }, { "epoch": 0.2071881173929511, "grad_norm": 0.1640625, "learning_rate": 2.2608340791144153e-05, "loss": 1.098849892616272, "step": 2530 }, { "epoch": 0.20727000992947006, "grad_norm": 0.1640625, "learning_rate": 2.2607171152144007e-05, "loss": 0.6746349334716797, "step": 2531 }, { "epoch": 0.207351902465989, "grad_norm": 0.1728515625, "learning_rate": 2.2606001052112522e-05, "loss": 1.3599038124084473, "step": 2532 }, { "epoch": 0.20743379500250797, "grad_norm": 0.15234375, "learning_rate": 2.2604830491100544e-05, "loss": 1.0439919233322144, "step": 2533 }, { "epoch": 0.2075156875390269, "grad_norm": 0.166015625, "learning_rate": 2.260365946915896e-05, "loss": 0.6895688772201538, "step": 2534 }, { "epoch": 0.20759758007554585, "grad_norm": 0.1435546875, "learning_rate": 2.2602487986338656e-05, "loss": 0.6307955384254456, "step": 2535 }, { "epoch": 0.20767947261206482, "grad_norm": 0.15234375, "learning_rate": 2.260131604269056e-05, "loss": 0.7039069533348083, "step": 2536 }, { "epoch": 0.20776136514858376, "grad_norm": 0.1796875, "learning_rate": 2.2600143638265602e-05, "loss": 0.7854093909263611, "step": 2537 }, { "epoch": 0.20784325768510273, "grad_norm": 0.1923828125, "learning_rate": 2.2598970773114746e-05, "loss": 0.6994537115097046, "step": 2538 }, { "epoch": 0.20792515022162167, "grad_norm": 0.2041015625, "learning_rate": 2.2597797447288966e-05, "loss": 0.9157559871673584, "step": 2539 }, { "epoch": 0.20800704275814064, "grad_norm": 0.1552734375, "learning_rate": 2.2596623660839255e-05, "loss": 0.6570084095001221, "step": 2540 }, { "epoch": 0.20808893529465958, "grad_norm": 0.13671875, "learning_rate": 2.259544941381664e-05, "loss": 1.0261468887329102, "step": 2541 }, { "epoch": 0.20817082783117855, "grad_norm": 0.171875, "learning_rate": 2.2594274706272155e-05, "loss": 0.6482420563697815, "step": 2542 }, { "epoch": 0.20825272036769749, "grad_norm": 0.1669921875, "learning_rate": 2.2593099538256854e-05, "loss": 0.8905519843101501, "step": 2543 }, { "epoch": 0.20833461290421645, "grad_norm": 0.1953125, "learning_rate": 2.2591923909821815e-05, "loss": 0.8548463582992554, "step": 2544 }, { "epoch": 0.2084165054407354, "grad_norm": 0.1748046875, "learning_rate": 2.2590747821018142e-05, "loss": 0.8943751454353333, "step": 2545 }, { "epoch": 0.20849839797725433, "grad_norm": 0.1650390625, "learning_rate": 2.2589571271896948e-05, "loss": 1.1034404039382935, "step": 2546 }, { "epoch": 0.2085802905137733, "grad_norm": 0.1513671875, "learning_rate": 2.2588394262509374e-05, "loss": 0.9230262041091919, "step": 2547 }, { "epoch": 0.20866218305029224, "grad_norm": 0.171875, "learning_rate": 2.258721679290657e-05, "loss": 0.9269582033157349, "step": 2548 }, { "epoch": 0.2087440755868112, "grad_norm": 0.171875, "learning_rate": 2.258603886313972e-05, "loss": 0.9056403636932373, "step": 2549 }, { "epoch": 0.20882596812333015, "grad_norm": 0.14453125, "learning_rate": 2.2584860473260017e-05, "loss": 0.775119423866272, "step": 2550 }, { "epoch": 0.20890786065984912, "grad_norm": 0.1689453125, "learning_rate": 2.2583681623318688e-05, "loss": 0.5513975024223328, "step": 2551 }, { "epoch": 0.20898975319636806, "grad_norm": 0.1904296875, "learning_rate": 2.2582502313366962e-05, "loss": 0.8215306997299194, "step": 2552 }, { "epoch": 0.20907164573288703, "grad_norm": 0.173828125, "learning_rate": 2.25813225434561e-05, "loss": 0.9845231175422668, "step": 2553 }, { "epoch": 0.20915353826940597, "grad_norm": 0.1640625, "learning_rate": 2.2580142313637377e-05, "loss": 0.986456036567688, "step": 2554 }, { "epoch": 0.20923543080592494, "grad_norm": 0.1484375, "learning_rate": 2.2578961623962096e-05, "loss": 0.7501486539840698, "step": 2555 }, { "epoch": 0.20931732334244388, "grad_norm": 0.171875, "learning_rate": 2.257778047448157e-05, "loss": 1.0405055284500122, "step": 2556 }, { "epoch": 0.20939921587896282, "grad_norm": 0.1650390625, "learning_rate": 2.2576598865247135e-05, "loss": 1.2263362407684326, "step": 2557 }, { "epoch": 0.2094811084154818, "grad_norm": 0.146484375, "learning_rate": 2.2575416796310156e-05, "loss": 0.7137336730957031, "step": 2558 }, { "epoch": 0.20956300095200073, "grad_norm": 0.38671875, "learning_rate": 2.2574234267722003e-05, "loss": 1.294555425643921, "step": 2559 }, { "epoch": 0.2096448934885197, "grad_norm": 0.189453125, "learning_rate": 2.2573051279534078e-05, "loss": 0.9039717316627502, "step": 2560 }, { "epoch": 0.20972678602503864, "grad_norm": 0.197265625, "learning_rate": 2.2571867831797795e-05, "loss": 0.8533040881156921, "step": 2561 }, { "epoch": 0.2098086785615576, "grad_norm": 0.189453125, "learning_rate": 2.2570683924564598e-05, "loss": 0.8020855188369751, "step": 2562 }, { "epoch": 0.20989057109807654, "grad_norm": 0.1826171875, "learning_rate": 2.2569499557885937e-05, "loss": 1.0323879718780518, "step": 2563 }, { "epoch": 0.2099724636345955, "grad_norm": 0.1826171875, "learning_rate": 2.2568314731813292e-05, "loss": 0.9836834669113159, "step": 2564 }, { "epoch": 0.21005435617111445, "grad_norm": 0.1640625, "learning_rate": 2.2567129446398163e-05, "loss": 0.9234398603439331, "step": 2565 }, { "epoch": 0.21013624870763342, "grad_norm": 0.1982421875, "learning_rate": 2.2565943701692066e-05, "loss": 1.093058705329895, "step": 2566 }, { "epoch": 0.21021814124415236, "grad_norm": 0.1474609375, "learning_rate": 2.2564757497746535e-05, "loss": 0.7526820302009583, "step": 2567 }, { "epoch": 0.2103000337806713, "grad_norm": 0.181640625, "learning_rate": 2.2563570834613133e-05, "loss": 0.7566789984703064, "step": 2568 }, { "epoch": 0.21038192631719027, "grad_norm": 0.1533203125, "learning_rate": 2.2562383712343432e-05, "loss": 0.9776017665863037, "step": 2569 }, { "epoch": 0.2104638188537092, "grad_norm": 0.1591796875, "learning_rate": 2.2561196130989033e-05, "loss": 0.7597828507423401, "step": 2570 }, { "epoch": 0.21054571139022818, "grad_norm": 0.17578125, "learning_rate": 2.2560008090601554e-05, "loss": 0.7205164432525635, "step": 2571 }, { "epoch": 0.21062760392674712, "grad_norm": 0.1953125, "learning_rate": 2.2558819591232626e-05, "loss": 1.1665676832199097, "step": 2572 }, { "epoch": 0.2107094964632661, "grad_norm": 0.154296875, "learning_rate": 2.2557630632933912e-05, "loss": 0.7656850814819336, "step": 2573 }, { "epoch": 0.21079138899978503, "grad_norm": 0.17578125, "learning_rate": 2.2556441215757085e-05, "loss": 0.7946710586547852, "step": 2574 }, { "epoch": 0.210873281536304, "grad_norm": 0.1533203125, "learning_rate": 2.2555251339753846e-05, "loss": 0.8632849454879761, "step": 2575 }, { "epoch": 0.21095517407282294, "grad_norm": 0.1806640625, "learning_rate": 2.2554061004975907e-05, "loss": 1.0044519901275635, "step": 2576 }, { "epoch": 0.21103706660934188, "grad_norm": 0.1650390625, "learning_rate": 2.2552870211475013e-05, "loss": 0.8329063653945923, "step": 2577 }, { "epoch": 0.21111895914586085, "grad_norm": 0.1484375, "learning_rate": 2.2551678959302912e-05, "loss": 0.5952067971229553, "step": 2578 }, { "epoch": 0.2112008516823798, "grad_norm": 0.166015625, "learning_rate": 2.2550487248511384e-05, "loss": 0.7586578726768494, "step": 2579 }, { "epoch": 0.21128274421889875, "grad_norm": 0.17578125, "learning_rate": 2.2549295079152228e-05, "loss": 1.0566492080688477, "step": 2580 }, { "epoch": 0.2113646367554177, "grad_norm": 0.16015625, "learning_rate": 2.2548102451277254e-05, "loss": 0.7290369272232056, "step": 2581 }, { "epoch": 0.21144652929193666, "grad_norm": 0.173828125, "learning_rate": 2.2546909364938308e-05, "loss": 0.8774522542953491, "step": 2582 }, { "epoch": 0.2115284218284556, "grad_norm": 0.1572265625, "learning_rate": 2.254571582018724e-05, "loss": 0.6438607573509216, "step": 2583 }, { "epoch": 0.21161031436497457, "grad_norm": 0.1650390625, "learning_rate": 2.2544521817075928e-05, "loss": 1.0225238800048828, "step": 2584 }, { "epoch": 0.2116922069014935, "grad_norm": 0.1611328125, "learning_rate": 2.254332735565627e-05, "loss": 0.9594260454177856, "step": 2585 }, { "epoch": 0.21177409943801248, "grad_norm": 0.1396484375, "learning_rate": 2.2542132435980177e-05, "loss": 0.5366291403770447, "step": 2586 }, { "epoch": 0.21185599197453142, "grad_norm": 0.1669921875, "learning_rate": 2.2540937058099587e-05, "loss": 0.9230993986129761, "step": 2587 }, { "epoch": 0.21193788451105036, "grad_norm": 0.1669921875, "learning_rate": 2.253974122206646e-05, "loss": 0.7536305785179138, "step": 2588 }, { "epoch": 0.21201977704756933, "grad_norm": 0.1884765625, "learning_rate": 2.253854492793277e-05, "loss": 1.0587902069091797, "step": 2589 }, { "epoch": 0.21210166958408827, "grad_norm": 0.1513671875, "learning_rate": 2.253734817575051e-05, "loss": 0.7969832420349121, "step": 2590 }, { "epoch": 0.21218356212060724, "grad_norm": 0.146484375, "learning_rate": 2.2536150965571697e-05, "loss": 0.6274747848510742, "step": 2591 }, { "epoch": 0.21226545465712618, "grad_norm": 0.1962890625, "learning_rate": 2.2534953297448374e-05, "loss": 0.9164037704467773, "step": 2592 }, { "epoch": 0.21234734719364515, "grad_norm": 0.189453125, "learning_rate": 2.253375517143259e-05, "loss": 0.8369346857070923, "step": 2593 }, { "epoch": 0.2124292397301641, "grad_norm": 0.20703125, "learning_rate": 2.2532556587576413e-05, "loss": 1.0573561191558838, "step": 2594 }, { "epoch": 0.21251113226668306, "grad_norm": 0.18359375, "learning_rate": 2.2531357545931954e-05, "loss": 0.9581342935562134, "step": 2595 }, { "epoch": 0.212593024803202, "grad_norm": 0.1513671875, "learning_rate": 2.253015804655132e-05, "loss": 0.9424036741256714, "step": 2596 }, { "epoch": 0.21267491733972096, "grad_norm": 0.171875, "learning_rate": 2.252895808948664e-05, "loss": 0.8117066621780396, "step": 2597 }, { "epoch": 0.2127568098762399, "grad_norm": 0.1953125, "learning_rate": 2.2527757674790082e-05, "loss": 0.965522050857544, "step": 2598 }, { "epoch": 0.21283870241275885, "grad_norm": 0.16015625, "learning_rate": 2.2526556802513812e-05, "loss": 1.1489461660385132, "step": 2599 }, { "epoch": 0.2129205949492778, "grad_norm": 0.1708984375, "learning_rate": 2.2525355472710033e-05, "loss": 0.87331622838974, "step": 2600 }, { "epoch": 0.21300248748579675, "grad_norm": 0.19921875, "learning_rate": 2.252415368543095e-05, "loss": 0.8089415431022644, "step": 2601 }, { "epoch": 0.21308438002231572, "grad_norm": 0.2001953125, "learning_rate": 2.25229514407288e-05, "loss": 1.0025982856750488, "step": 2602 }, { "epoch": 0.21316627255883466, "grad_norm": 0.1728515625, "learning_rate": 2.2521748738655843e-05, "loss": 1.007644534111023, "step": 2603 }, { "epoch": 0.21324816509535363, "grad_norm": 0.1708984375, "learning_rate": 2.252054557926435e-05, "loss": 0.8380034565925598, "step": 2604 }, { "epoch": 0.21333005763187257, "grad_norm": 0.1640625, "learning_rate": 2.2519341962606614e-05, "loss": 0.7382635474205017, "step": 2605 }, { "epoch": 0.21341195016839154, "grad_norm": 0.177734375, "learning_rate": 2.251813788873495e-05, "loss": 1.073710322380066, "step": 2606 }, { "epoch": 0.21349384270491048, "grad_norm": 0.1376953125, "learning_rate": 2.251693335770169e-05, "loss": 0.6477277278900146, "step": 2607 }, { "epoch": 0.21357573524142942, "grad_norm": 0.15625, "learning_rate": 2.2515728369559196e-05, "loss": 0.7063530683517456, "step": 2608 }, { "epoch": 0.2136576277779484, "grad_norm": 0.2138671875, "learning_rate": 2.251452292435983e-05, "loss": 1.0643256902694702, "step": 2609 }, { "epoch": 0.21373952031446733, "grad_norm": 0.15234375, "learning_rate": 2.2513317022155997e-05, "loss": 0.8453289270401001, "step": 2610 }, { "epoch": 0.2138214128509863, "grad_norm": 0.2138671875, "learning_rate": 2.2512110663000103e-05, "loss": 0.8432967662811279, "step": 2611 }, { "epoch": 0.21390330538750524, "grad_norm": 0.2333984375, "learning_rate": 2.251090384694458e-05, "loss": 0.935516893863678, "step": 2612 }, { "epoch": 0.2139851979240242, "grad_norm": 0.2138671875, "learning_rate": 2.2509696574041887e-05, "loss": 1.1079294681549072, "step": 2613 }, { "epoch": 0.21406709046054315, "grad_norm": 0.171875, "learning_rate": 2.2508488844344495e-05, "loss": 0.8772408366203308, "step": 2614 }, { "epoch": 0.21414898299706212, "grad_norm": 0.1767578125, "learning_rate": 2.25072806579049e-05, "loss": 0.6262285113334656, "step": 2615 }, { "epoch": 0.21423087553358106, "grad_norm": 0.142578125, "learning_rate": 2.2506072014775607e-05, "loss": 0.6965766549110413, "step": 2616 }, { "epoch": 0.21431276807010002, "grad_norm": 0.1806640625, "learning_rate": 2.2504862915009155e-05, "loss": 0.9412310123443604, "step": 2617 }, { "epoch": 0.21439466060661896, "grad_norm": 0.1611328125, "learning_rate": 2.250365335865809e-05, "loss": 0.8304561972618103, "step": 2618 }, { "epoch": 0.2144765531431379, "grad_norm": 0.2109375, "learning_rate": 2.2502443345774994e-05, "loss": 1.1396684646606445, "step": 2619 }, { "epoch": 0.21455844567965687, "grad_norm": 0.1611328125, "learning_rate": 2.250123287641245e-05, "loss": 0.7396496534347534, "step": 2620 }, { "epoch": 0.2146403382161758, "grad_norm": 0.2080078125, "learning_rate": 2.2500021950623073e-05, "loss": 0.5160247087478638, "step": 2621 }, { "epoch": 0.21472223075269478, "grad_norm": 0.15625, "learning_rate": 2.2498810568459504e-05, "loss": 0.82952880859375, "step": 2622 }, { "epoch": 0.21480412328921372, "grad_norm": 0.1962890625, "learning_rate": 2.2497598729974377e-05, "loss": 0.7126262187957764, "step": 2623 }, { "epoch": 0.2148860158257327, "grad_norm": 0.2158203125, "learning_rate": 2.2496386435220377e-05, "loss": 0.997394323348999, "step": 2624 }, { "epoch": 0.21496790836225163, "grad_norm": 0.2001953125, "learning_rate": 2.249517368425019e-05, "loss": 0.6629370450973511, "step": 2625 }, { "epoch": 0.2150498008987706, "grad_norm": 0.1640625, "learning_rate": 2.2493960477116528e-05, "loss": 0.77293461561203, "step": 2626 }, { "epoch": 0.21513169343528954, "grad_norm": 0.1748046875, "learning_rate": 2.249274681387212e-05, "loss": 0.6796474456787109, "step": 2627 }, { "epoch": 0.2152135859718085, "grad_norm": 0.1904296875, "learning_rate": 2.2491532694569724e-05, "loss": 0.9979060292243958, "step": 2628 }, { "epoch": 0.21529547850832745, "grad_norm": 0.224609375, "learning_rate": 2.2490318119262107e-05, "loss": 1.1129047870635986, "step": 2629 }, { "epoch": 0.2153773710448464, "grad_norm": 0.1533203125, "learning_rate": 2.2489103088002054e-05, "loss": 0.9572575688362122, "step": 2630 }, { "epoch": 0.21545926358136536, "grad_norm": 0.16015625, "learning_rate": 2.2487887600842374e-05, "loss": 0.6022533774375916, "step": 2631 }, { "epoch": 0.2155411561178843, "grad_norm": 0.1728515625, "learning_rate": 2.2486671657835907e-05, "loss": 0.9832905530929565, "step": 2632 }, { "epoch": 0.21562304865440327, "grad_norm": 0.19140625, "learning_rate": 2.2485455259035503e-05, "loss": 0.8883419632911682, "step": 2633 }, { "epoch": 0.2157049411909222, "grad_norm": 0.2080078125, "learning_rate": 2.2484238404494022e-05, "loss": 0.985409140586853, "step": 2634 }, { "epoch": 0.21578683372744117, "grad_norm": 0.1748046875, "learning_rate": 2.248302109426436e-05, "loss": 0.9418313503265381, "step": 2635 }, { "epoch": 0.21586872626396011, "grad_norm": 0.193359375, "learning_rate": 2.2481803328399428e-05, "loss": 0.7812432050704956, "step": 2636 }, { "epoch": 0.21595061880047908, "grad_norm": 0.1826171875, "learning_rate": 2.2480585106952145e-05, "loss": 1.1868882179260254, "step": 2637 }, { "epoch": 0.21603251133699802, "grad_norm": 0.1689453125, "learning_rate": 2.2479366429975473e-05, "loss": 0.7767732739448547, "step": 2638 }, { "epoch": 0.21611440387351696, "grad_norm": 0.1748046875, "learning_rate": 2.2478147297522372e-05, "loss": 1.016821265220642, "step": 2639 }, { "epoch": 0.21619629641003593, "grad_norm": 0.1640625, "learning_rate": 2.247692770964583e-05, "loss": 0.6401596069335938, "step": 2640 }, { "epoch": 0.21627818894655487, "grad_norm": 0.201171875, "learning_rate": 2.2475707666398863e-05, "loss": 1.0144199132919312, "step": 2641 }, { "epoch": 0.21636008148307384, "grad_norm": 0.189453125, "learning_rate": 2.2474487167834494e-05, "loss": 0.8511896133422852, "step": 2642 }, { "epoch": 0.21644197401959278, "grad_norm": 0.2177734375, "learning_rate": 2.2473266214005772e-05, "loss": 0.9372079968452454, "step": 2643 }, { "epoch": 0.21652386655611175, "grad_norm": 0.185546875, "learning_rate": 2.2472044804965762e-05, "loss": 0.9081453680992126, "step": 2644 }, { "epoch": 0.2166057590926307, "grad_norm": 0.1806640625, "learning_rate": 2.2470822940767557e-05, "loss": 1.0545884370803833, "step": 2645 }, { "epoch": 0.21668765162914966, "grad_norm": 0.166015625, "learning_rate": 2.2469600621464257e-05, "loss": 0.745715320110321, "step": 2646 }, { "epoch": 0.2167695441656686, "grad_norm": 0.1552734375, "learning_rate": 2.2468377847108994e-05, "loss": 0.6323316097259521, "step": 2647 }, { "epoch": 0.21685143670218757, "grad_norm": 0.150390625, "learning_rate": 2.2467154617754913e-05, "loss": 0.6223617196083069, "step": 2648 }, { "epoch": 0.2169333292387065, "grad_norm": 0.1357421875, "learning_rate": 2.2465930933455183e-05, "loss": 0.5440846681594849, "step": 2649 }, { "epoch": 0.21701522177522545, "grad_norm": 0.2021484375, "learning_rate": 2.246470679426299e-05, "loss": 0.8357701897621155, "step": 2650 }, { "epoch": 0.21709711431174442, "grad_norm": 0.1484375, "learning_rate": 2.2463482200231535e-05, "loss": 0.6614250540733337, "step": 2651 }, { "epoch": 0.21717900684826336, "grad_norm": 0.15234375, "learning_rate": 2.2462257151414047e-05, "loss": 1.0901719331741333, "step": 2652 }, { "epoch": 0.21726089938478232, "grad_norm": 0.1728515625, "learning_rate": 2.2461031647863777e-05, "loss": 0.8307749032974243, "step": 2653 }, { "epoch": 0.21734279192130126, "grad_norm": 0.126953125, "learning_rate": 2.2459805689633983e-05, "loss": 0.6122291684150696, "step": 2654 }, { "epoch": 0.21742468445782023, "grad_norm": 0.2255859375, "learning_rate": 2.2458579276777954e-05, "loss": 0.8964424133300781, "step": 2655 }, { "epoch": 0.21750657699433917, "grad_norm": 0.185546875, "learning_rate": 2.2457352409348995e-05, "loss": 0.9349625706672668, "step": 2656 }, { "epoch": 0.21758846953085814, "grad_norm": 0.1630859375, "learning_rate": 2.2456125087400426e-05, "loss": 0.8891605734825134, "step": 2657 }, { "epoch": 0.21767036206737708, "grad_norm": 0.1630859375, "learning_rate": 2.2454897310985595e-05, "loss": 0.7247942686080933, "step": 2658 }, { "epoch": 0.21775225460389605, "grad_norm": 0.166015625, "learning_rate": 2.245366908015787e-05, "loss": 0.7170271873474121, "step": 2659 }, { "epoch": 0.217834147140415, "grad_norm": 0.197265625, "learning_rate": 2.245244039497063e-05, "loss": 0.9139279127120972, "step": 2660 }, { "epoch": 0.21791603967693393, "grad_norm": 0.154296875, "learning_rate": 2.2451211255477276e-05, "loss": 0.7368593811988831, "step": 2661 }, { "epoch": 0.2179979322134529, "grad_norm": 0.1611328125, "learning_rate": 2.244998166173124e-05, "loss": 0.7071901559829712, "step": 2662 }, { "epoch": 0.21807982474997184, "grad_norm": 0.1806640625, "learning_rate": 2.2448751613785963e-05, "loss": 0.9164714813232422, "step": 2663 }, { "epoch": 0.2181617172864908, "grad_norm": 0.166015625, "learning_rate": 2.24475211116949e-05, "loss": 0.9555971026420593, "step": 2664 }, { "epoch": 0.21824360982300975, "grad_norm": 0.1669921875, "learning_rate": 2.244629015551154e-05, "loss": 0.6782249808311462, "step": 2665 }, { "epoch": 0.21832550235952872, "grad_norm": 0.1796875, "learning_rate": 2.2445058745289387e-05, "loss": 0.7124421000480652, "step": 2666 }, { "epoch": 0.21840739489604766, "grad_norm": 0.1494140625, "learning_rate": 2.2443826881081962e-05, "loss": 0.8786941766738892, "step": 2667 }, { "epoch": 0.21848928743256663, "grad_norm": 0.1796875, "learning_rate": 2.2442594562942804e-05, "loss": 0.8248938918113708, "step": 2668 }, { "epoch": 0.21857117996908557, "grad_norm": 0.1796875, "learning_rate": 2.2441361790925476e-05, "loss": 0.6030952334403992, "step": 2669 }, { "epoch": 0.21865307250560453, "grad_norm": 0.15625, "learning_rate": 2.244012856508356e-05, "loss": 0.8378911018371582, "step": 2670 }, { "epoch": 0.21873496504212347, "grad_norm": 0.1884765625, "learning_rate": 2.2438894885470655e-05, "loss": 0.8841310143470764, "step": 2671 }, { "epoch": 0.21881685757864242, "grad_norm": 0.1611328125, "learning_rate": 2.2437660752140386e-05, "loss": 0.7605337500572205, "step": 2672 }, { "epoch": 0.21889875011516138, "grad_norm": 0.1728515625, "learning_rate": 2.2436426165146392e-05, "loss": 0.7846240401268005, "step": 2673 }, { "epoch": 0.21898064265168032, "grad_norm": 0.1669921875, "learning_rate": 2.243519112454233e-05, "loss": 0.8841848969459534, "step": 2674 }, { "epoch": 0.2190625351881993, "grad_norm": 0.2021484375, "learning_rate": 2.243395563038188e-05, "loss": 1.184723973274231, "step": 2675 }, { "epoch": 0.21914442772471823, "grad_norm": 0.171875, "learning_rate": 2.243271968271875e-05, "loss": 0.8364683985710144, "step": 2676 }, { "epoch": 0.2192263202612372, "grad_norm": 0.1455078125, "learning_rate": 2.243148328160665e-05, "loss": 1.0725886821746826, "step": 2677 }, { "epoch": 0.21930821279775614, "grad_norm": 0.1962890625, "learning_rate": 2.2430246427099325e-05, "loss": 0.9857078194618225, "step": 2678 }, { "epoch": 0.2193901053342751, "grad_norm": 0.1806640625, "learning_rate": 2.242900911925053e-05, "loss": 0.6420339345932007, "step": 2679 }, { "epoch": 0.21947199787079405, "grad_norm": 0.1787109375, "learning_rate": 2.2427771358114044e-05, "loss": 0.6804531812667847, "step": 2680 }, { "epoch": 0.219553890407313, "grad_norm": 0.1591796875, "learning_rate": 2.2426533143743666e-05, "loss": 0.732304573059082, "step": 2681 }, { "epoch": 0.21963578294383196, "grad_norm": 0.166015625, "learning_rate": 2.2425294476193213e-05, "loss": 1.0372018814086914, "step": 2682 }, { "epoch": 0.2197176754803509, "grad_norm": 0.1416015625, "learning_rate": 2.2424055355516528e-05, "loss": 0.6854705214500427, "step": 2683 }, { "epoch": 0.21979956801686987, "grad_norm": 0.19140625, "learning_rate": 2.2422815781767455e-05, "loss": 0.7514453530311584, "step": 2684 }, { "epoch": 0.2198814605533888, "grad_norm": 0.16796875, "learning_rate": 2.2421575754999887e-05, "loss": 0.917513906955719, "step": 2685 }, { "epoch": 0.21996335308990778, "grad_norm": 0.1650390625, "learning_rate": 2.242033527526771e-05, "loss": 0.6444403529167175, "step": 2686 }, { "epoch": 0.22004524562642672, "grad_norm": 0.15625, "learning_rate": 2.2419094342624845e-05, "loss": 0.8434231877326965, "step": 2687 }, { "epoch": 0.22012713816294568, "grad_norm": 0.1767578125, "learning_rate": 2.241785295712523e-05, "loss": 0.8348996043205261, "step": 2688 }, { "epoch": 0.22020903069946463, "grad_norm": 0.162109375, "learning_rate": 2.241661111882281e-05, "loss": 0.6523064970970154, "step": 2689 }, { "epoch": 0.2202909232359836, "grad_norm": 0.16015625, "learning_rate": 2.2415368827771575e-05, "loss": 0.6707596778869629, "step": 2690 }, { "epoch": 0.22037281577250253, "grad_norm": 0.16015625, "learning_rate": 2.241412608402551e-05, "loss": 0.8914279937744141, "step": 2691 }, { "epoch": 0.22045470830902147, "grad_norm": 0.2001953125, "learning_rate": 2.241288288763863e-05, "loss": 1.177140235900879, "step": 2692 }, { "epoch": 0.22053660084554044, "grad_norm": 0.162109375, "learning_rate": 2.2411639238664974e-05, "loss": 0.6267945170402527, "step": 2693 }, { "epoch": 0.22061849338205938, "grad_norm": 0.162109375, "learning_rate": 2.24103951371586e-05, "loss": 1.0307586193084717, "step": 2694 }, { "epoch": 0.22070038591857835, "grad_norm": 0.1826171875, "learning_rate": 2.240915058317357e-05, "loss": 1.268770694732666, "step": 2695 }, { "epoch": 0.2207822784550973, "grad_norm": 0.16015625, "learning_rate": 2.2407905576763985e-05, "loss": 0.752870500087738, "step": 2696 }, { "epoch": 0.22086417099161626, "grad_norm": 0.197265625, "learning_rate": 2.240666011798396e-05, "loss": 1.4423089027404785, "step": 2697 }, { "epoch": 0.2209460635281352, "grad_norm": 0.158203125, "learning_rate": 2.2405414206887617e-05, "loss": 0.8685122132301331, "step": 2698 }, { "epoch": 0.22102795606465417, "grad_norm": 0.1796875, "learning_rate": 2.240416784352912e-05, "loss": 0.9673736691474915, "step": 2699 }, { "epoch": 0.2211098486011731, "grad_norm": 0.212890625, "learning_rate": 2.2402921027962637e-05, "loss": 0.8751057982444763, "step": 2700 }, { "epoch": 0.22119174113769208, "grad_norm": 0.1806640625, "learning_rate": 2.240167376024236e-05, "loss": 0.9190973043441772, "step": 2701 }, { "epoch": 0.22127363367421102, "grad_norm": 0.2021484375, "learning_rate": 2.2400426040422497e-05, "loss": 0.921265721321106, "step": 2702 }, { "epoch": 0.22135552621072996, "grad_norm": 0.166015625, "learning_rate": 2.2399177868557285e-05, "loss": 0.7625551819801331, "step": 2703 }, { "epoch": 0.22143741874724893, "grad_norm": 0.158203125, "learning_rate": 2.239792924470097e-05, "loss": 0.8799278736114502, "step": 2704 }, { "epoch": 0.22151931128376787, "grad_norm": 0.1640625, "learning_rate": 2.2396680168907826e-05, "loss": 0.663055419921875, "step": 2705 }, { "epoch": 0.22160120382028684, "grad_norm": 0.1728515625, "learning_rate": 2.239543064123214e-05, "loss": 0.9900674819946289, "step": 2706 }, { "epoch": 0.22168309635680578, "grad_norm": 0.181640625, "learning_rate": 2.2394180661728222e-05, "loss": 1.0261987447738647, "step": 2707 }, { "epoch": 0.22176498889332474, "grad_norm": 0.1630859375, "learning_rate": 2.2392930230450403e-05, "loss": 1.237915277481079, "step": 2708 }, { "epoch": 0.22184688142984368, "grad_norm": 0.2197265625, "learning_rate": 2.239167934745303e-05, "loss": 0.6792721152305603, "step": 2709 }, { "epoch": 0.22192877396636265, "grad_norm": 0.193359375, "learning_rate": 2.2390428012790474e-05, "loss": 0.9879810810089111, "step": 2710 }, { "epoch": 0.2220106665028816, "grad_norm": 0.2255859375, "learning_rate": 2.2389176226517122e-05, "loss": 1.0300103425979614, "step": 2711 }, { "epoch": 0.22209255903940053, "grad_norm": 0.1611328125, "learning_rate": 2.238792398868738e-05, "loss": 0.7472534775733948, "step": 2712 }, { "epoch": 0.2221744515759195, "grad_norm": 0.15234375, "learning_rate": 2.2386671299355678e-05, "loss": 0.5477824211120605, "step": 2713 }, { "epoch": 0.22225634411243844, "grad_norm": 0.1875, "learning_rate": 2.2385418158576457e-05, "loss": 1.100843071937561, "step": 2714 }, { "epoch": 0.2223382366489574, "grad_norm": 0.1650390625, "learning_rate": 2.2384164566404196e-05, "loss": 0.5873090624809265, "step": 2715 }, { "epoch": 0.22242012918547635, "grad_norm": 0.1337890625, "learning_rate": 2.2382910522893367e-05, "loss": 1.061188817024231, "step": 2716 }, { "epoch": 0.22250202172199532, "grad_norm": 0.1650390625, "learning_rate": 2.2381656028098488e-05, "loss": 0.6065191626548767, "step": 2717 }, { "epoch": 0.22258391425851426, "grad_norm": 0.171875, "learning_rate": 2.2380401082074073e-05, "loss": 0.9351382255554199, "step": 2718 }, { "epoch": 0.22266580679503323, "grad_norm": 0.14453125, "learning_rate": 2.2379145684874677e-05, "loss": 0.9476308226585388, "step": 2719 }, { "epoch": 0.22274769933155217, "grad_norm": 0.283203125, "learning_rate": 2.237788983655486e-05, "loss": 0.9022799134254456, "step": 2720 }, { "epoch": 0.22282959186807114, "grad_norm": 0.1552734375, "learning_rate": 2.237663353716921e-05, "loss": 0.6997306942939758, "step": 2721 }, { "epoch": 0.22291148440459008, "grad_norm": 0.19140625, "learning_rate": 2.2375376786772323e-05, "loss": 1.0690778493881226, "step": 2722 }, { "epoch": 0.22299337694110902, "grad_norm": 0.197265625, "learning_rate": 2.2374119585418834e-05, "loss": 1.147035837173462, "step": 2723 }, { "epoch": 0.22307526947762799, "grad_norm": 0.154296875, "learning_rate": 2.2372861933163374e-05, "loss": 1.100911021232605, "step": 2724 }, { "epoch": 0.22315716201414693, "grad_norm": 0.1845703125, "learning_rate": 2.2371603830060615e-05, "loss": 0.7608529329299927, "step": 2725 }, { "epoch": 0.2232390545506659, "grad_norm": 0.1650390625, "learning_rate": 2.2370345276165238e-05, "loss": 0.9179092645645142, "step": 2726 }, { "epoch": 0.22332094708718483, "grad_norm": 0.18359375, "learning_rate": 2.236908627153194e-05, "loss": 0.5465078353881836, "step": 2727 }, { "epoch": 0.2234028396237038, "grad_norm": 0.1708984375, "learning_rate": 2.2367826816215446e-05, "loss": 1.1685466766357422, "step": 2728 }, { "epoch": 0.22348473216022274, "grad_norm": 0.1943359375, "learning_rate": 2.2366566910270496e-05, "loss": 0.9647210240364075, "step": 2729 }, { "epoch": 0.2235666246967417, "grad_norm": 0.1640625, "learning_rate": 2.2365306553751852e-05, "loss": 1.0556919574737549, "step": 2730 }, { "epoch": 0.22364851723326065, "grad_norm": 0.1494140625, "learning_rate": 2.2364045746714293e-05, "loss": 0.4899267852306366, "step": 2731 }, { "epoch": 0.22373040976977962, "grad_norm": 0.193359375, "learning_rate": 2.2362784489212622e-05, "loss": 1.1409335136413574, "step": 2732 }, { "epoch": 0.22381230230629856, "grad_norm": 0.1875, "learning_rate": 2.2361522781301652e-05, "loss": 1.1569693088531494, "step": 2733 }, { "epoch": 0.2238941948428175, "grad_norm": 0.1572265625, "learning_rate": 2.236026062303623e-05, "loss": 0.6331688165664673, "step": 2734 }, { "epoch": 0.22397608737933647, "grad_norm": 0.173828125, "learning_rate": 2.2358998014471207e-05, "loss": 0.7054038643836975, "step": 2735 }, { "epoch": 0.2240579799158554, "grad_norm": 0.169921875, "learning_rate": 2.2357734955661468e-05, "loss": 0.6985722780227661, "step": 2736 }, { "epoch": 0.22413987245237438, "grad_norm": 0.1640625, "learning_rate": 2.2356471446661906e-05, "loss": 0.8991851806640625, "step": 2737 }, { "epoch": 0.22422176498889332, "grad_norm": 0.1748046875, "learning_rate": 2.2355207487527443e-05, "loss": 1.0830262899398804, "step": 2738 }, { "epoch": 0.2243036575254123, "grad_norm": 0.1689453125, "learning_rate": 2.2353943078313004e-05, "loss": 0.8604142069816589, "step": 2739 }, { "epoch": 0.22438555006193123, "grad_norm": 0.1845703125, "learning_rate": 2.235267821907356e-05, "loss": 0.7714955806732178, "step": 2740 }, { "epoch": 0.2244674425984502, "grad_norm": 0.1865234375, "learning_rate": 2.2351412909864083e-05, "loss": 1.056908130645752, "step": 2741 }, { "epoch": 0.22454933513496914, "grad_norm": 0.244140625, "learning_rate": 2.2350147150739562e-05, "loss": 0.9743558168411255, "step": 2742 }, { "epoch": 0.22463122767148808, "grad_norm": 0.1923828125, "learning_rate": 2.2348880941755016e-05, "loss": 1.0073190927505493, "step": 2743 }, { "epoch": 0.22471312020800704, "grad_norm": 0.1708984375, "learning_rate": 2.2347614282965485e-05, "loss": 0.8281304836273193, "step": 2744 }, { "epoch": 0.22479501274452598, "grad_norm": 0.1494140625, "learning_rate": 2.2346347174426017e-05, "loss": 0.8440684676170349, "step": 2745 }, { "epoch": 0.22487690528104495, "grad_norm": 0.16015625, "learning_rate": 2.2345079616191685e-05, "loss": 0.7128986120223999, "step": 2746 }, { "epoch": 0.2249587978175639, "grad_norm": 0.203125, "learning_rate": 2.2343811608317587e-05, "loss": 0.8777107000350952, "step": 2747 }, { "epoch": 0.22504069035408286, "grad_norm": 0.1533203125, "learning_rate": 2.2342543150858834e-05, "loss": 0.9418197870254517, "step": 2748 }, { "epoch": 0.2251225828906018, "grad_norm": 0.357421875, "learning_rate": 2.2341274243870555e-05, "loss": 1.0588223934173584, "step": 2749 }, { "epoch": 0.22520447542712077, "grad_norm": 0.177734375, "learning_rate": 2.2340004887407908e-05, "loss": 0.9309899806976318, "step": 2750 }, { "epoch": 0.2252863679636397, "grad_norm": 0.177734375, "learning_rate": 2.233873508152606e-05, "loss": 0.6725599765777588, "step": 2751 }, { "epoch": 0.22536826050015868, "grad_norm": 0.1826171875, "learning_rate": 2.2337464826280202e-05, "loss": 0.6725819706916809, "step": 2752 }, { "epoch": 0.22545015303667762, "grad_norm": 0.1845703125, "learning_rate": 2.2336194121725548e-05, "loss": 1.0403375625610352, "step": 2753 }, { "epoch": 0.22553204557319656, "grad_norm": 0.232421875, "learning_rate": 2.2334922967917324e-05, "loss": 0.9659050703048706, "step": 2754 }, { "epoch": 0.22561393810971553, "grad_norm": 0.177734375, "learning_rate": 2.233365136491078e-05, "loss": 1.0475457906723022, "step": 2755 }, { "epoch": 0.22569583064623447, "grad_norm": 0.162109375, "learning_rate": 2.233237931276119e-05, "loss": 1.0404893159866333, "step": 2756 }, { "epoch": 0.22577772318275344, "grad_norm": 0.16015625, "learning_rate": 2.233110681152384e-05, "loss": 0.8932117223739624, "step": 2757 }, { "epoch": 0.22585961571927238, "grad_norm": 0.1591796875, "learning_rate": 2.2329833861254035e-05, "loss": 0.9532628059387207, "step": 2758 }, { "epoch": 0.22594150825579135, "grad_norm": 0.18359375, "learning_rate": 2.23285604620071e-05, "loss": 0.9125658273696899, "step": 2759 }, { "epoch": 0.2260234007923103, "grad_norm": 0.1865234375, "learning_rate": 2.2327286613838394e-05, "loss": 0.612433671951294, "step": 2760 }, { "epoch": 0.22610529332882925, "grad_norm": 0.189453125, "learning_rate": 2.2326012316803274e-05, "loss": 0.8008420467376709, "step": 2761 }, { "epoch": 0.2261871858653482, "grad_norm": 0.2041015625, "learning_rate": 2.232473757095713e-05, "loss": 0.9025201797485352, "step": 2762 }, { "epoch": 0.22626907840186716, "grad_norm": 0.1806640625, "learning_rate": 2.2323462376355366e-05, "loss": 0.9112869501113892, "step": 2763 }, { "epoch": 0.2263509709383861, "grad_norm": 0.189453125, "learning_rate": 2.2322186733053412e-05, "loss": 0.8256112337112427, "step": 2764 }, { "epoch": 0.22643286347490504, "grad_norm": 0.15234375, "learning_rate": 2.23209106411067e-05, "loss": 0.9586428999900818, "step": 2765 }, { "epoch": 0.226514756011424, "grad_norm": 0.201171875, "learning_rate": 2.231963410057071e-05, "loss": 0.8691222071647644, "step": 2766 }, { "epoch": 0.22659664854794295, "grad_norm": 0.201171875, "learning_rate": 2.2318357111500915e-05, "loss": 0.6286084055900574, "step": 2767 }, { "epoch": 0.22667854108446192, "grad_norm": 0.1484375, "learning_rate": 2.2317079673952823e-05, "loss": 0.7256220579147339, "step": 2768 }, { "epoch": 0.22676043362098086, "grad_norm": 0.2080078125, "learning_rate": 2.2315801787981957e-05, "loss": 0.8661752343177795, "step": 2769 }, { "epoch": 0.22684232615749983, "grad_norm": 0.1669921875, "learning_rate": 2.2314523453643858e-05, "loss": 0.6361514925956726, "step": 2770 }, { "epoch": 0.22692421869401877, "grad_norm": 0.140625, "learning_rate": 2.2313244670994085e-05, "loss": 0.5595276355743408, "step": 2771 }, { "epoch": 0.22700611123053774, "grad_norm": 0.1591796875, "learning_rate": 2.231196544008822e-05, "loss": 0.6491144299507141, "step": 2772 }, { "epoch": 0.22708800376705668, "grad_norm": 0.1796875, "learning_rate": 2.2310685760981863e-05, "loss": 0.8894050121307373, "step": 2773 }, { "epoch": 0.22716989630357562, "grad_norm": 0.1884765625, "learning_rate": 2.2309405633730644e-05, "loss": 1.0037997961044312, "step": 2774 }, { "epoch": 0.2272517888400946, "grad_norm": 0.1572265625, "learning_rate": 2.230812505839019e-05, "loss": 0.7675134539604187, "step": 2775 }, { "epoch": 0.22733368137661353, "grad_norm": 0.146484375, "learning_rate": 2.2306844035016164e-05, "loss": 0.9257134199142456, "step": 2776 }, { "epoch": 0.2274155739131325, "grad_norm": 0.1748046875, "learning_rate": 2.2305562563664253e-05, "loss": 1.1266851425170898, "step": 2777 }, { "epoch": 0.22749746644965144, "grad_norm": 0.1884765625, "learning_rate": 2.2304280644390138e-05, "loss": 1.0576081275939941, "step": 2778 }, { "epoch": 0.2275793589861704, "grad_norm": 0.138671875, "learning_rate": 2.230299827724955e-05, "loss": 0.9829504489898682, "step": 2779 }, { "epoch": 0.22766125152268935, "grad_norm": 0.158203125, "learning_rate": 2.230171546229822e-05, "loss": 0.7380350828170776, "step": 2780 }, { "epoch": 0.2277431440592083, "grad_norm": 0.1650390625, "learning_rate": 2.2300432199591908e-05, "loss": 1.1664187908172607, "step": 2781 }, { "epoch": 0.22782503659572725, "grad_norm": 0.15625, "learning_rate": 2.2299148489186392e-05, "loss": 0.8917802572250366, "step": 2782 }, { "epoch": 0.22790692913224622, "grad_norm": 0.267578125, "learning_rate": 2.229786433113746e-05, "loss": 0.7070808410644531, "step": 2783 }, { "epoch": 0.22798882166876516, "grad_norm": 0.2158203125, "learning_rate": 2.229657972550093e-05, "loss": 0.7713556289672852, "step": 2784 }, { "epoch": 0.2280707142052841, "grad_norm": 0.1728515625, "learning_rate": 2.2295294672332636e-05, "loss": 0.7796047329902649, "step": 2785 }, { "epoch": 0.22815260674180307, "grad_norm": 0.158203125, "learning_rate": 2.2294009171688433e-05, "loss": 0.9818745851516724, "step": 2786 }, { "epoch": 0.228234499278322, "grad_norm": 0.1904296875, "learning_rate": 2.2292723223624196e-05, "loss": 0.9776700735092163, "step": 2787 }, { "epoch": 0.22831639181484098, "grad_norm": 0.1708984375, "learning_rate": 2.2291436828195812e-05, "loss": 0.7744762301445007, "step": 2788 }, { "epoch": 0.22839828435135992, "grad_norm": 0.15625, "learning_rate": 2.2290149985459198e-05, "loss": 0.9199493527412415, "step": 2789 }, { "epoch": 0.2284801768878789, "grad_norm": 0.1962890625, "learning_rate": 2.2288862695470278e-05, "loss": 0.9536228179931641, "step": 2790 }, { "epoch": 0.22856206942439783, "grad_norm": 0.1953125, "learning_rate": 2.2287574958285015e-05, "loss": 1.0106019973754883, "step": 2791 }, { "epoch": 0.2286439619609168, "grad_norm": 0.1669921875, "learning_rate": 2.2286286773959373e-05, "loss": 0.7199668884277344, "step": 2792 }, { "epoch": 0.22872585449743574, "grad_norm": 0.2041015625, "learning_rate": 2.2284998142549335e-05, "loss": 0.8404042720794678, "step": 2793 }, { "epoch": 0.2288077470339547, "grad_norm": 0.1787109375, "learning_rate": 2.2283709064110924e-05, "loss": 0.9001615047454834, "step": 2794 }, { "epoch": 0.22888963957047365, "grad_norm": 0.16015625, "learning_rate": 2.2282419538700158e-05, "loss": 0.622333824634552, "step": 2795 }, { "epoch": 0.2289715321069926, "grad_norm": 0.16796875, "learning_rate": 2.228112956637309e-05, "loss": 0.6969611048698425, "step": 2796 }, { "epoch": 0.22905342464351156, "grad_norm": 0.16796875, "learning_rate": 2.2279839147185788e-05, "loss": 1.0442826747894287, "step": 2797 }, { "epoch": 0.2291353171800305, "grad_norm": 0.142578125, "learning_rate": 2.2278548281194333e-05, "loss": 0.7030993103981018, "step": 2798 }, { "epoch": 0.22921720971654946, "grad_norm": 0.1591796875, "learning_rate": 2.227725696845484e-05, "loss": 0.774620771408081, "step": 2799 }, { "epoch": 0.2292991022530684, "grad_norm": 0.171875, "learning_rate": 2.2275965209023427e-05, "loss": 0.902047872543335, "step": 2800 }, { "epoch": 0.22938099478958737, "grad_norm": 0.1572265625, "learning_rate": 2.2274673002956245e-05, "loss": 0.9336604475975037, "step": 2801 }, { "epoch": 0.2294628873261063, "grad_norm": 0.16015625, "learning_rate": 2.2273380350309453e-05, "loss": 0.5391343235969543, "step": 2802 }, { "epoch": 0.22954477986262528, "grad_norm": 0.169921875, "learning_rate": 2.2272087251139242e-05, "loss": 0.8729950189590454, "step": 2803 }, { "epoch": 0.22962667239914422, "grad_norm": 0.197265625, "learning_rate": 2.2270793705501807e-05, "loss": 0.7882866263389587, "step": 2804 }, { "epoch": 0.2297085649356632, "grad_norm": 0.154296875, "learning_rate": 2.2269499713453376e-05, "loss": 0.8446259498596191, "step": 2805 }, { "epoch": 0.22979045747218213, "grad_norm": 0.173828125, "learning_rate": 2.2268205275050194e-05, "loss": 0.7359898090362549, "step": 2806 }, { "epoch": 0.22987235000870107, "grad_norm": 0.16796875, "learning_rate": 2.2266910390348516e-05, "loss": 0.9246798157691956, "step": 2807 }, { "epoch": 0.22995424254522004, "grad_norm": 0.1611328125, "learning_rate": 2.2265615059404623e-05, "loss": 0.8838239908218384, "step": 2808 }, { "epoch": 0.23003613508173898, "grad_norm": 0.1552734375, "learning_rate": 2.2264319282274826e-05, "loss": 0.7450317144393921, "step": 2809 }, { "epoch": 0.23011802761825795, "grad_norm": 0.1591796875, "learning_rate": 2.226302305901543e-05, "loss": 0.9908334016799927, "step": 2810 }, { "epoch": 0.2301999201547769, "grad_norm": 0.1904296875, "learning_rate": 2.2261726389682788e-05, "loss": 0.7841396927833557, "step": 2811 }, { "epoch": 0.23028181269129586, "grad_norm": 0.1845703125, "learning_rate": 2.2260429274333247e-05, "loss": 0.5980224013328552, "step": 2812 }, { "epoch": 0.2303637052278148, "grad_norm": 0.1416015625, "learning_rate": 2.2259131713023197e-05, "loss": 0.7482721209526062, "step": 2813 }, { "epoch": 0.23044559776433376, "grad_norm": 0.2265625, "learning_rate": 2.2257833705809024e-05, "loss": 0.7392801642417908, "step": 2814 }, { "epoch": 0.2305274903008527, "grad_norm": 0.1484375, "learning_rate": 2.225653525274715e-05, "loss": 0.6493318676948547, "step": 2815 }, { "epoch": 0.23060938283737165, "grad_norm": 0.197265625, "learning_rate": 2.225523635389401e-05, "loss": 1.2641602754592896, "step": 2816 }, { "epoch": 0.23069127537389061, "grad_norm": 0.1728515625, "learning_rate": 2.2253937009306058e-05, "loss": 1.003706455230713, "step": 2817 }, { "epoch": 0.23077316791040955, "grad_norm": 0.193359375, "learning_rate": 2.225263721903977e-05, "loss": 0.8137313723564148, "step": 2818 }, { "epoch": 0.23085506044692852, "grad_norm": 0.15625, "learning_rate": 2.2251336983151643e-05, "loss": 0.8125075697898865, "step": 2819 }, { "epoch": 0.23093695298344746, "grad_norm": 0.1787109375, "learning_rate": 2.225003630169819e-05, "loss": 0.7304106950759888, "step": 2820 }, { "epoch": 0.23101884551996643, "grad_norm": 0.1923828125, "learning_rate": 2.224873517473594e-05, "loss": 0.6394543051719666, "step": 2821 }, { "epoch": 0.23110073805648537, "grad_norm": 0.1826171875, "learning_rate": 2.2247433602321453e-05, "loss": 0.91314297914505, "step": 2822 }, { "epoch": 0.23118263059300434, "grad_norm": 0.171875, "learning_rate": 2.224613158451129e-05, "loss": 1.0550345182418823, "step": 2823 }, { "epoch": 0.23126452312952328, "grad_norm": 0.17578125, "learning_rate": 2.224482912136205e-05, "loss": 1.013972282409668, "step": 2824 }, { "epoch": 0.23134641566604225, "grad_norm": 0.171875, "learning_rate": 2.224352621293034e-05, "loss": 0.8673343062400818, "step": 2825 }, { "epoch": 0.2314283082025612, "grad_norm": 0.1728515625, "learning_rate": 2.224222285927279e-05, "loss": 0.8091303706169128, "step": 2826 }, { "epoch": 0.23151020073908013, "grad_norm": 0.1884765625, "learning_rate": 2.224091906044605e-05, "loss": 0.9118584990501404, "step": 2827 }, { "epoch": 0.2315920932755991, "grad_norm": 0.203125, "learning_rate": 2.223961481650679e-05, "loss": 1.1091703176498413, "step": 2828 }, { "epoch": 0.23167398581211804, "grad_norm": 0.15625, "learning_rate": 2.2238310127511697e-05, "loss": 0.8564403057098389, "step": 2829 }, { "epoch": 0.231755878348637, "grad_norm": 0.1865234375, "learning_rate": 2.2237004993517475e-05, "loss": 0.7855836749076843, "step": 2830 }, { "epoch": 0.23183777088515595, "grad_norm": 0.177734375, "learning_rate": 2.2235699414580857e-05, "loss": 0.9884588718414307, "step": 2831 }, { "epoch": 0.23191966342167492, "grad_norm": 0.166015625, "learning_rate": 2.2234393390758578e-05, "loss": 0.9046828746795654, "step": 2832 }, { "epoch": 0.23200155595819386, "grad_norm": 0.2041015625, "learning_rate": 2.2233086922107416e-05, "loss": 0.9615907669067383, "step": 2833 }, { "epoch": 0.23208344849471282, "grad_norm": 0.1748046875, "learning_rate": 2.2231780008684146e-05, "loss": 0.8504703044891357, "step": 2834 }, { "epoch": 0.23216534103123176, "grad_norm": 0.1611328125, "learning_rate": 2.2230472650545573e-05, "loss": 0.9791901707649231, "step": 2835 }, { "epoch": 0.23224723356775073, "grad_norm": 0.130859375, "learning_rate": 2.2229164847748526e-05, "loss": 0.6011077165603638, "step": 2836 }, { "epoch": 0.23232912610426967, "grad_norm": 0.1533203125, "learning_rate": 2.2227856600349845e-05, "loss": 0.8247321844100952, "step": 2837 }, { "epoch": 0.2324110186407886, "grad_norm": 0.15625, "learning_rate": 2.222654790840639e-05, "loss": 0.8194366693496704, "step": 2838 }, { "epoch": 0.23249291117730758, "grad_norm": 0.15625, "learning_rate": 2.222523877197504e-05, "loss": 0.9967739582061768, "step": 2839 }, { "epoch": 0.23257480371382652, "grad_norm": 0.1533203125, "learning_rate": 2.2223929191112698e-05, "loss": 0.6677063703536987, "step": 2840 }, { "epoch": 0.2326566962503455, "grad_norm": 0.1943359375, "learning_rate": 2.222261916587629e-05, "loss": 0.6134150624275208, "step": 2841 }, { "epoch": 0.23273858878686443, "grad_norm": 0.1875, "learning_rate": 2.2221308696322745e-05, "loss": 0.6183879375457764, "step": 2842 }, { "epoch": 0.2328204813233834, "grad_norm": 0.1748046875, "learning_rate": 2.2219997782509026e-05, "loss": 0.8526945114135742, "step": 2843 }, { "epoch": 0.23290237385990234, "grad_norm": 0.1611328125, "learning_rate": 2.2218686424492106e-05, "loss": 0.6396515965461731, "step": 2844 }, { "epoch": 0.2329842663964213, "grad_norm": 0.1533203125, "learning_rate": 2.2217374622328995e-05, "loss": 1.0298415422439575, "step": 2845 }, { "epoch": 0.23306615893294025, "grad_norm": 0.16015625, "learning_rate": 2.2216062376076694e-05, "loss": 0.7058196663856506, "step": 2846 }, { "epoch": 0.2331480514694592, "grad_norm": 0.1787109375, "learning_rate": 2.2214749685792247e-05, "loss": 0.612147331237793, "step": 2847 }, { "epoch": 0.23322994400597816, "grad_norm": 0.15625, "learning_rate": 2.2213436551532708e-05, "loss": 0.9517743587493896, "step": 2848 }, { "epoch": 0.2333118365424971, "grad_norm": 0.138671875, "learning_rate": 2.221212297335515e-05, "loss": 0.6100751161575317, "step": 2849 }, { "epoch": 0.23339372907901607, "grad_norm": 0.189453125, "learning_rate": 2.221080895131667e-05, "loss": 0.8263170719146729, "step": 2850 }, { "epoch": 0.233475621615535, "grad_norm": 0.1650390625, "learning_rate": 2.220949448547437e-05, "loss": 0.7124947309494019, "step": 2851 }, { "epoch": 0.23355751415205397, "grad_norm": 0.1689453125, "learning_rate": 2.2208179575885393e-05, "loss": 0.7261850237846375, "step": 2852 }, { "epoch": 0.23363940668857291, "grad_norm": 0.150390625, "learning_rate": 2.2206864222606893e-05, "loss": 0.9134925007820129, "step": 2853 }, { "epoch": 0.23372129922509188, "grad_norm": 0.16796875, "learning_rate": 2.220554842569603e-05, "loss": 1.0967222452163696, "step": 2854 }, { "epoch": 0.23380319176161082, "grad_norm": 0.1669921875, "learning_rate": 2.220423218521e-05, "loss": 0.640552818775177, "step": 2855 }, { "epoch": 0.2338850842981298, "grad_norm": 0.236328125, "learning_rate": 2.2202915501206012e-05, "loss": 0.6707224249839783, "step": 2856 }, { "epoch": 0.23396697683464873, "grad_norm": 0.201171875, "learning_rate": 2.220159837374129e-05, "loss": 0.9944537281990051, "step": 2857 }, { "epoch": 0.23404886937116767, "grad_norm": 0.1748046875, "learning_rate": 2.2200280802873088e-05, "loss": 0.9138835668563843, "step": 2858 }, { "epoch": 0.23413076190768664, "grad_norm": 0.1806640625, "learning_rate": 2.219896278865867e-05, "loss": 0.8319474458694458, "step": 2859 }, { "epoch": 0.23421265444420558, "grad_norm": 0.1650390625, "learning_rate": 2.2197644331155323e-05, "loss": 1.066777229309082, "step": 2860 }, { "epoch": 0.23429454698072455, "grad_norm": 0.166015625, "learning_rate": 2.2196325430420353e-05, "loss": 0.8615840077400208, "step": 2861 }, { "epoch": 0.2343764395172435, "grad_norm": 0.1640625, "learning_rate": 2.2195006086511083e-05, "loss": 0.8138639330863953, "step": 2862 }, { "epoch": 0.23445833205376246, "grad_norm": 0.1865234375, "learning_rate": 2.2193686299484855e-05, "loss": 1.0833104848861694, "step": 2863 }, { "epoch": 0.2345402245902814, "grad_norm": 0.15625, "learning_rate": 2.219236606939904e-05, "loss": 0.9868308305740356, "step": 2864 }, { "epoch": 0.23462211712680037, "grad_norm": 0.1826171875, "learning_rate": 2.2191045396311017e-05, "loss": 0.7957302331924438, "step": 2865 }, { "epoch": 0.2347040096633193, "grad_norm": 0.189453125, "learning_rate": 2.2189724280278185e-05, "loss": 0.7406501770019531, "step": 2866 }, { "epoch": 0.23478590219983828, "grad_norm": 0.1845703125, "learning_rate": 2.2188402721357972e-05, "loss": 0.9510295987129211, "step": 2867 }, { "epoch": 0.23486779473635722, "grad_norm": 0.1650390625, "learning_rate": 2.218708071960781e-05, "loss": 0.681911051273346, "step": 2868 }, { "epoch": 0.23494968727287616, "grad_norm": 0.1650390625, "learning_rate": 2.218575827508516e-05, "loss": 1.0726261138916016, "step": 2869 }, { "epoch": 0.23503157980939512, "grad_norm": 0.193359375, "learning_rate": 2.2184435387847506e-05, "loss": 0.5477421283721924, "step": 2870 }, { "epoch": 0.23511347234591407, "grad_norm": 0.150390625, "learning_rate": 2.2183112057952345e-05, "loss": 0.6962782740592957, "step": 2871 }, { "epoch": 0.23519536488243303, "grad_norm": 0.1767578125, "learning_rate": 2.2181788285457193e-05, "loss": 0.9193621277809143, "step": 2872 }, { "epoch": 0.23527725741895197, "grad_norm": 0.1474609375, "learning_rate": 2.2180464070419582e-05, "loss": 0.9806286096572876, "step": 2873 }, { "epoch": 0.23535914995547094, "grad_norm": 0.1669921875, "learning_rate": 2.2179139412897075e-05, "loss": 0.944311797618866, "step": 2874 }, { "epoch": 0.23544104249198988, "grad_norm": 0.1630859375, "learning_rate": 2.2177814312947244e-05, "loss": 0.9483674764633179, "step": 2875 }, { "epoch": 0.23552293502850885, "grad_norm": 0.1611328125, "learning_rate": 2.2176488770627683e-05, "loss": 0.8573991060256958, "step": 2876 }, { "epoch": 0.2356048275650278, "grad_norm": 0.1767578125, "learning_rate": 2.2175162785996007e-05, "loss": 0.9898126125335693, "step": 2877 }, { "epoch": 0.23568672010154673, "grad_norm": 0.1845703125, "learning_rate": 2.2173836359109846e-05, "loss": 1.0606895685195923, "step": 2878 }, { "epoch": 0.2357686126380657, "grad_norm": 0.1708984375, "learning_rate": 2.217250949002686e-05, "loss": 0.8710831999778748, "step": 2879 }, { "epoch": 0.23585050517458464, "grad_norm": 0.1640625, "learning_rate": 2.2171182178804708e-05, "loss": 1.0073729753494263, "step": 2880 }, { "epoch": 0.2359323977111036, "grad_norm": 0.1630859375, "learning_rate": 2.216985442550109e-05, "loss": 0.7897433042526245, "step": 2881 }, { "epoch": 0.23601429024762255, "grad_norm": 0.1787109375, "learning_rate": 2.216852623017371e-05, "loss": 0.8032549619674683, "step": 2882 }, { "epoch": 0.23609618278414152, "grad_norm": 0.1630859375, "learning_rate": 2.2167197592880297e-05, "loss": 0.9509630799293518, "step": 2883 }, { "epoch": 0.23617807532066046, "grad_norm": 0.16796875, "learning_rate": 2.2165868513678604e-05, "loss": 0.8557263612747192, "step": 2884 }, { "epoch": 0.23625996785717943, "grad_norm": 0.1865234375, "learning_rate": 2.2164538992626393e-05, "loss": 0.7353136539459229, "step": 2885 }, { "epoch": 0.23634186039369837, "grad_norm": 0.173828125, "learning_rate": 2.2163209029781458e-05, "loss": 0.8399332165718079, "step": 2886 }, { "epoch": 0.23642375293021733, "grad_norm": 0.146484375, "learning_rate": 2.2161878625201593e-05, "loss": 0.6546846628189087, "step": 2887 }, { "epoch": 0.23650564546673628, "grad_norm": 0.1787109375, "learning_rate": 2.216054777894463e-05, "loss": 0.745204508304596, "step": 2888 }, { "epoch": 0.23658753800325522, "grad_norm": 0.197265625, "learning_rate": 2.2159216491068415e-05, "loss": 0.8619378209114075, "step": 2889 }, { "epoch": 0.23666943053977418, "grad_norm": 0.1728515625, "learning_rate": 2.2157884761630806e-05, "loss": 0.6461140513420105, "step": 2890 }, { "epoch": 0.23675132307629312, "grad_norm": 0.177734375, "learning_rate": 2.2156552590689694e-05, "loss": 0.6375608444213867, "step": 2891 }, { "epoch": 0.2368332156128121, "grad_norm": 0.193359375, "learning_rate": 2.215521997830297e-05, "loss": 0.5877677798271179, "step": 2892 }, { "epoch": 0.23691510814933103, "grad_norm": 0.2138671875, "learning_rate": 2.215388692452856e-05, "loss": 1.1882790327072144, "step": 2893 }, { "epoch": 0.23699700068585, "grad_norm": 0.271484375, "learning_rate": 2.2152553429424403e-05, "loss": 0.9810208678245544, "step": 2894 }, { "epoch": 0.23707889322236894, "grad_norm": 0.1708984375, "learning_rate": 2.2151219493048457e-05, "loss": 1.1147767305374146, "step": 2895 }, { "epoch": 0.2371607857588879, "grad_norm": 0.1572265625, "learning_rate": 2.2149885115458704e-05, "loss": 1.024065375328064, "step": 2896 }, { "epoch": 0.23724267829540685, "grad_norm": 0.15234375, "learning_rate": 2.2148550296713137e-05, "loss": 0.6614758968353271, "step": 2897 }, { "epoch": 0.23732457083192582, "grad_norm": 0.1474609375, "learning_rate": 2.214721503686978e-05, "loss": 0.6425355076789856, "step": 2898 }, { "epoch": 0.23740646336844476, "grad_norm": 0.189453125, "learning_rate": 2.2145879335986664e-05, "loss": 0.8511650562286377, "step": 2899 }, { "epoch": 0.2374883559049637, "grad_norm": 0.19140625, "learning_rate": 2.214454319412184e-05, "loss": 1.4089066982269287, "step": 2900 }, { "epoch": 0.23757024844148267, "grad_norm": 0.15234375, "learning_rate": 2.2143206611333387e-05, "loss": 0.752382218837738, "step": 2901 }, { "epoch": 0.2376521409780016, "grad_norm": 0.1953125, "learning_rate": 2.21418695876794e-05, "loss": 0.9225605130195618, "step": 2902 }, { "epoch": 0.23773403351452058, "grad_norm": 0.2041015625, "learning_rate": 2.214053212321799e-05, "loss": 1.0390173196792603, "step": 2903 }, { "epoch": 0.23781592605103952, "grad_norm": 0.1630859375, "learning_rate": 2.213919421800729e-05, "loss": 1.0006986856460571, "step": 2904 }, { "epoch": 0.23789781858755848, "grad_norm": 0.1708984375, "learning_rate": 2.213785587210544e-05, "loss": 0.6326524615287781, "step": 2905 }, { "epoch": 0.23797971112407743, "grad_norm": 0.1484375, "learning_rate": 2.213651708557063e-05, "loss": 0.7818308472633362, "step": 2906 }, { "epoch": 0.2380616036605964, "grad_norm": 0.1728515625, "learning_rate": 2.2135177858461033e-05, "loss": 1.0309195518493652, "step": 2907 }, { "epoch": 0.23814349619711533, "grad_norm": 0.1552734375, "learning_rate": 2.213383819083486e-05, "loss": 0.8918837308883667, "step": 2908 }, { "epoch": 0.23822538873363427, "grad_norm": 0.2099609375, "learning_rate": 2.2132498082750348e-05, "loss": 0.8845656514167786, "step": 2909 }, { "epoch": 0.23830728127015324, "grad_norm": 0.17578125, "learning_rate": 2.213115753426573e-05, "loss": 0.7136186957359314, "step": 2910 }, { "epoch": 0.23838917380667218, "grad_norm": 0.15625, "learning_rate": 2.212981654543928e-05, "loss": 0.569214940071106, "step": 2911 }, { "epoch": 0.23847106634319115, "grad_norm": 0.1845703125, "learning_rate": 2.2128475116329287e-05, "loss": 1.0568721294403076, "step": 2912 }, { "epoch": 0.2385529588797101, "grad_norm": 0.16796875, "learning_rate": 2.2127133246994046e-05, "loss": 1.0686583518981934, "step": 2913 }, { "epoch": 0.23863485141622906, "grad_norm": 0.1728515625, "learning_rate": 2.212579093749188e-05, "loss": 0.9004485607147217, "step": 2914 }, { "epoch": 0.238716743952748, "grad_norm": 0.1640625, "learning_rate": 2.2124448187881136e-05, "loss": 0.7355899810791016, "step": 2915 }, { "epoch": 0.23879863648926697, "grad_norm": 0.19140625, "learning_rate": 2.2123104998220176e-05, "loss": 1.1711084842681885, "step": 2916 }, { "epoch": 0.2388805290257859, "grad_norm": 0.166015625, "learning_rate": 2.212176136856738e-05, "loss": 0.8979333639144897, "step": 2917 }, { "epoch": 0.23896242156230488, "grad_norm": 0.17578125, "learning_rate": 2.2120417298981142e-05, "loss": 1.1877660751342773, "step": 2918 }, { "epoch": 0.23904431409882382, "grad_norm": 0.19921875, "learning_rate": 2.2119072789519888e-05, "loss": 0.8069250583648682, "step": 2919 }, { "epoch": 0.23912620663534276, "grad_norm": 0.14453125, "learning_rate": 2.211772784024205e-05, "loss": 0.7692654132843018, "step": 2920 }, { "epoch": 0.23920809917186173, "grad_norm": 0.158203125, "learning_rate": 2.211638245120609e-05, "loss": 0.5090773701667786, "step": 2921 }, { "epoch": 0.23928999170838067, "grad_norm": 0.1689453125, "learning_rate": 2.2115036622470485e-05, "loss": 0.6984267830848694, "step": 2922 }, { "epoch": 0.23937188424489964, "grad_norm": 0.1552734375, "learning_rate": 2.211369035409373e-05, "loss": 0.9082513451576233, "step": 2923 }, { "epoch": 0.23945377678141858, "grad_norm": 0.171875, "learning_rate": 2.211234364613433e-05, "loss": 1.2546738386154175, "step": 2924 }, { "epoch": 0.23953566931793754, "grad_norm": 0.1513671875, "learning_rate": 2.2110996498650825e-05, "loss": 1.1569148302078247, "step": 2925 }, { "epoch": 0.23961756185445648, "grad_norm": 0.19140625, "learning_rate": 2.2109648911701772e-05, "loss": 0.8320716619491577, "step": 2926 }, { "epoch": 0.23969945439097545, "grad_norm": 0.2333984375, "learning_rate": 2.2108300885345738e-05, "loss": 0.9242125749588013, "step": 2927 }, { "epoch": 0.2397813469274944, "grad_norm": 0.16796875, "learning_rate": 2.2106952419641317e-05, "loss": 1.0613276958465576, "step": 2928 }, { "epoch": 0.23986323946401336, "grad_norm": 0.154296875, "learning_rate": 2.2105603514647114e-05, "loss": 0.54240882396698, "step": 2929 }, { "epoch": 0.2399451320005323, "grad_norm": 0.162109375, "learning_rate": 2.210425417042176e-05, "loss": 0.6276845335960388, "step": 2930 }, { "epoch": 0.24002702453705124, "grad_norm": 0.134765625, "learning_rate": 2.2102904387023905e-05, "loss": 0.6514420509338379, "step": 2931 }, { "epoch": 0.2401089170735702, "grad_norm": 0.1904296875, "learning_rate": 2.2101554164512212e-05, "loss": 0.5951980352401733, "step": 2932 }, { "epoch": 0.24019080961008915, "grad_norm": 0.1630859375, "learning_rate": 2.210020350294537e-05, "loss": 0.647340714931488, "step": 2933 }, { "epoch": 0.24027270214660812, "grad_norm": 0.166015625, "learning_rate": 2.2098852402382085e-05, "loss": 0.6541335582733154, "step": 2934 }, { "epoch": 0.24035459468312706, "grad_norm": 0.1640625, "learning_rate": 2.209750086288108e-05, "loss": 0.8863160014152527, "step": 2935 }, { "epoch": 0.24043648721964603, "grad_norm": 0.169921875, "learning_rate": 2.20961488845011e-05, "loss": 1.0600528717041016, "step": 2936 }, { "epoch": 0.24051837975616497, "grad_norm": 0.208984375, "learning_rate": 2.2094796467300907e-05, "loss": 0.9757941365242004, "step": 2937 }, { "epoch": 0.24060027229268394, "grad_norm": 0.1865234375, "learning_rate": 2.2093443611339276e-05, "loss": 0.9362033605575562, "step": 2938 }, { "epoch": 0.24068216482920288, "grad_norm": 0.169921875, "learning_rate": 2.209209031667502e-05, "loss": 0.9061464667320251, "step": 2939 }, { "epoch": 0.24076405736572185, "grad_norm": 0.1669921875, "learning_rate": 2.2090736583366953e-05, "loss": 0.8506525754928589, "step": 2940 }, { "epoch": 0.24084594990224079, "grad_norm": 0.2138671875, "learning_rate": 2.208938241147391e-05, "loss": 0.760705828666687, "step": 2941 }, { "epoch": 0.24092784243875973, "grad_norm": 0.1806640625, "learning_rate": 2.208802780105476e-05, "loss": 1.064037561416626, "step": 2942 }, { "epoch": 0.2410097349752787, "grad_norm": 0.1689453125, "learning_rate": 2.2086672752168364e-05, "loss": 0.9620828628540039, "step": 2943 }, { "epoch": 0.24109162751179763, "grad_norm": 0.1796875, "learning_rate": 2.2085317264873626e-05, "loss": 0.9036626815795898, "step": 2944 }, { "epoch": 0.2411735200483166, "grad_norm": 0.17578125, "learning_rate": 2.2083961339229466e-05, "loss": 0.8733800649642944, "step": 2945 }, { "epoch": 0.24125541258483554, "grad_norm": 0.16796875, "learning_rate": 2.208260497529481e-05, "loss": 0.7264404892921448, "step": 2946 }, { "epoch": 0.2413373051213545, "grad_norm": 0.1572265625, "learning_rate": 2.2081248173128614e-05, "loss": 0.9964972734451294, "step": 2947 }, { "epoch": 0.24141919765787345, "grad_norm": 0.1826171875, "learning_rate": 2.2079890932789854e-05, "loss": 0.7170913219451904, "step": 2948 }, { "epoch": 0.24150109019439242, "grad_norm": 0.119140625, "learning_rate": 2.2078533254337518e-05, "loss": 0.8381007313728333, "step": 2949 }, { "epoch": 0.24158298273091136, "grad_norm": 0.185546875, "learning_rate": 2.2077175137830615e-05, "loss": 0.7747131586074829, "step": 2950 }, { "epoch": 0.2416648752674303, "grad_norm": 0.185546875, "learning_rate": 2.2075816583328175e-05, "loss": 1.1531018018722534, "step": 2951 }, { "epoch": 0.24174676780394927, "grad_norm": 0.1748046875, "learning_rate": 2.2074457590889245e-05, "loss": 1.0712947845458984, "step": 2952 }, { "epoch": 0.2418286603404682, "grad_norm": 0.1669921875, "learning_rate": 2.20730981605729e-05, "loss": 0.7124852538108826, "step": 2953 }, { "epoch": 0.24191055287698718, "grad_norm": 0.1826171875, "learning_rate": 2.2071738292438218e-05, "loss": 0.8471325635910034, "step": 2954 }, { "epoch": 0.24199244541350612, "grad_norm": 0.1796875, "learning_rate": 2.2070377986544307e-05, "loss": 0.7931849360466003, "step": 2955 }, { "epoch": 0.2420743379500251, "grad_norm": 0.1669921875, "learning_rate": 2.2069017242950294e-05, "loss": 0.8124366402626038, "step": 2956 }, { "epoch": 0.24215623048654403, "grad_norm": 0.197265625, "learning_rate": 2.2067656061715317e-05, "loss": 0.7925934195518494, "step": 2957 }, { "epoch": 0.242238123023063, "grad_norm": 0.15234375, "learning_rate": 2.2066294442898542e-05, "loss": 0.8002415299415588, "step": 2958 }, { "epoch": 0.24232001555958194, "grad_norm": 0.15234375, "learning_rate": 2.2064932386559156e-05, "loss": 0.7029062509536743, "step": 2959 }, { "epoch": 0.2424019080961009, "grad_norm": 0.1708984375, "learning_rate": 2.2063569892756347e-05, "loss": 0.9557847380638123, "step": 2960 }, { "epoch": 0.24248380063261984, "grad_norm": 0.1845703125, "learning_rate": 2.206220696154935e-05, "loss": 1.0886515378952026, "step": 2961 }, { "epoch": 0.24256569316913879, "grad_norm": 0.1533203125, "learning_rate": 2.2060843592997387e-05, "loss": 0.7827026844024658, "step": 2962 }, { "epoch": 0.24264758570565775, "grad_norm": 0.18359375, "learning_rate": 2.2059479787159726e-05, "loss": 0.9219954609870911, "step": 2963 }, { "epoch": 0.2427294782421767, "grad_norm": 0.154296875, "learning_rate": 2.205811554409564e-05, "loss": 0.9839540719985962, "step": 2964 }, { "epoch": 0.24281137077869566, "grad_norm": 0.1494140625, "learning_rate": 2.205675086386443e-05, "loss": 0.5516818165779114, "step": 2965 }, { "epoch": 0.2428932633152146, "grad_norm": 0.1904296875, "learning_rate": 2.2055385746525405e-05, "loss": 0.7227625846862793, "step": 2966 }, { "epoch": 0.24297515585173357, "grad_norm": 0.1494140625, "learning_rate": 2.2054020192137902e-05, "loss": 0.827283501625061, "step": 2967 }, { "epoch": 0.2430570483882525, "grad_norm": 0.1669921875, "learning_rate": 2.2052654200761273e-05, "loss": 1.0273233652114868, "step": 2968 }, { "epoch": 0.24313894092477148, "grad_norm": 0.2021484375, "learning_rate": 2.2051287772454886e-05, "loss": 0.8882022500038147, "step": 2969 }, { "epoch": 0.24322083346129042, "grad_norm": 0.1533203125, "learning_rate": 2.204992090727813e-05, "loss": 0.6083340644836426, "step": 2970 }, { "epoch": 0.2433027259978094, "grad_norm": 0.1953125, "learning_rate": 2.2048553605290424e-05, "loss": 0.6950846910476685, "step": 2971 }, { "epoch": 0.24338461853432833, "grad_norm": 0.236328125, "learning_rate": 2.2047185866551192e-05, "loss": 1.3416111469268799, "step": 2972 }, { "epoch": 0.24346651107084727, "grad_norm": 0.2080078125, "learning_rate": 2.204581769111988e-05, "loss": 1.0637092590332031, "step": 2973 }, { "epoch": 0.24354840360736624, "grad_norm": 0.19921875, "learning_rate": 2.2044449079055956e-05, "loss": 0.8769762516021729, "step": 2974 }, { "epoch": 0.24363029614388518, "grad_norm": 0.1982421875, "learning_rate": 2.2043080030418902e-05, "loss": 0.9633129835128784, "step": 2975 }, { "epoch": 0.24371218868040415, "grad_norm": 0.1884765625, "learning_rate": 2.2041710545268227e-05, "loss": 1.1436876058578491, "step": 2976 }, { "epoch": 0.2437940812169231, "grad_norm": 0.1630859375, "learning_rate": 2.2040340623663453e-05, "loss": 0.8332898616790771, "step": 2977 }, { "epoch": 0.24387597375344205, "grad_norm": 0.1767578125, "learning_rate": 2.2038970265664123e-05, "loss": 1.0950275659561157, "step": 2978 }, { "epoch": 0.243957866289961, "grad_norm": 0.1943359375, "learning_rate": 2.2037599471329797e-05, "loss": 0.9903711080551147, "step": 2979 }, { "epoch": 0.24403975882647996, "grad_norm": 0.1474609375, "learning_rate": 2.203622824072006e-05, "loss": 0.7225038409233093, "step": 2980 }, { "epoch": 0.2441216513629989, "grad_norm": 0.1787109375, "learning_rate": 2.20348565738945e-05, "loss": 1.003244400024414, "step": 2981 }, { "epoch": 0.24420354389951784, "grad_norm": 0.1494140625, "learning_rate": 2.2033484470912746e-05, "loss": 0.9090712070465088, "step": 2982 }, { "epoch": 0.2442854364360368, "grad_norm": 0.1728515625, "learning_rate": 2.2032111931834436e-05, "loss": 0.8148490190505981, "step": 2983 }, { "epoch": 0.24436732897255575, "grad_norm": 0.1572265625, "learning_rate": 2.203073895671922e-05, "loss": 0.9793577790260315, "step": 2984 }, { "epoch": 0.24444922150907472, "grad_norm": 0.146484375, "learning_rate": 2.202936554562677e-05, "loss": 1.031598687171936, "step": 2985 }, { "epoch": 0.24453111404559366, "grad_norm": 0.185546875, "learning_rate": 2.202799169861679e-05, "loss": 0.7654539346694946, "step": 2986 }, { "epoch": 0.24461300658211263, "grad_norm": 0.162109375, "learning_rate": 2.2026617415748992e-05, "loss": 0.817785382270813, "step": 2987 }, { "epoch": 0.24469489911863157, "grad_norm": 0.158203125, "learning_rate": 2.20252426970831e-05, "loss": 0.6804990172386169, "step": 2988 }, { "epoch": 0.24477679165515054, "grad_norm": 0.1708984375, "learning_rate": 2.202386754267887e-05, "loss": 0.7387655377388, "step": 2989 }, { "epoch": 0.24485868419166948, "grad_norm": 0.1552734375, "learning_rate": 2.202249195259607e-05, "loss": 0.7371788024902344, "step": 2990 }, { "epoch": 0.24494057672818845, "grad_norm": 0.1650390625, "learning_rate": 2.2021115926894493e-05, "loss": 0.8963190913200378, "step": 2991 }, { "epoch": 0.2450224692647074, "grad_norm": 0.1669921875, "learning_rate": 2.2019739465633942e-05, "loss": 0.8483158946037292, "step": 2992 }, { "epoch": 0.24510436180122633, "grad_norm": 0.1708984375, "learning_rate": 2.2018362568874248e-05, "loss": 1.0992926359176636, "step": 2993 }, { "epoch": 0.2451862543377453, "grad_norm": 0.173828125, "learning_rate": 2.201698523667525e-05, "loss": 0.8891171216964722, "step": 2994 }, { "epoch": 0.24526814687426424, "grad_norm": 0.177734375, "learning_rate": 2.2015607469096823e-05, "loss": 0.7438278794288635, "step": 2995 }, { "epoch": 0.2453500394107832, "grad_norm": 0.1748046875, "learning_rate": 2.2014229266198836e-05, "loss": 0.8359938859939575, "step": 2996 }, { "epoch": 0.24543193194730215, "grad_norm": 0.1708984375, "learning_rate": 2.2012850628041205e-05, "loss": 0.7275131940841675, "step": 2997 }, { "epoch": 0.2455138244838211, "grad_norm": 0.169921875, "learning_rate": 2.2011471554683844e-05, "loss": 0.9328632354736328, "step": 2998 }, { "epoch": 0.24559571702034005, "grad_norm": 0.1484375, "learning_rate": 2.2010092046186697e-05, "loss": 0.751015841960907, "step": 2999 }, { "epoch": 0.24567760955685902, "grad_norm": 0.1953125, "learning_rate": 2.2008712102609718e-05, "loss": 0.9326849579811096, "step": 3000 }, { "epoch": 0.24575950209337796, "grad_norm": 0.1669921875, "learning_rate": 2.200733172401289e-05, "loss": 0.8515874743461609, "step": 3001 }, { "epoch": 0.24584139462989693, "grad_norm": 0.181640625, "learning_rate": 2.200595091045621e-05, "loss": 0.753883421421051, "step": 3002 }, { "epoch": 0.24592328716641587, "grad_norm": 0.23046875, "learning_rate": 2.2004569661999685e-05, "loss": 1.2156012058258057, "step": 3003 }, { "epoch": 0.2460051797029348, "grad_norm": 0.1689453125, "learning_rate": 2.2003187978703362e-05, "loss": 0.8289654850959778, "step": 3004 }, { "epoch": 0.24608707223945378, "grad_norm": 0.1669921875, "learning_rate": 2.200180586062729e-05, "loss": 0.8761445879936218, "step": 3005 }, { "epoch": 0.24616896477597272, "grad_norm": 0.16796875, "learning_rate": 2.2000423307831543e-05, "loss": 0.9996823668479919, "step": 3006 }, { "epoch": 0.2462508573124917, "grad_norm": 0.1669921875, "learning_rate": 2.1999040320376206e-05, "loss": 0.8873587846755981, "step": 3007 }, { "epoch": 0.24633274984901063, "grad_norm": 0.1767578125, "learning_rate": 2.1997656898321393e-05, "loss": 0.8999268412590027, "step": 3008 }, { "epoch": 0.2464146423855296, "grad_norm": 0.1435546875, "learning_rate": 2.1996273041727236e-05, "loss": 0.7499046325683594, "step": 3009 }, { "epoch": 0.24649653492204854, "grad_norm": 0.171875, "learning_rate": 2.199488875065388e-05, "loss": 0.9111863374710083, "step": 3010 }, { "epoch": 0.2465784274585675, "grad_norm": 0.193359375, "learning_rate": 2.1993504025161493e-05, "loss": 0.8589242696762085, "step": 3011 }, { "epoch": 0.24666031999508645, "grad_norm": 0.1865234375, "learning_rate": 2.1992118865310263e-05, "loss": 0.8668841123580933, "step": 3012 }, { "epoch": 0.2467422125316054, "grad_norm": 0.220703125, "learning_rate": 2.199073327116039e-05, "loss": 0.8567924499511719, "step": 3013 }, { "epoch": 0.24682410506812436, "grad_norm": 0.1728515625, "learning_rate": 2.1989347242772102e-05, "loss": 0.8537012338638306, "step": 3014 }, { "epoch": 0.2469059976046433, "grad_norm": 0.1552734375, "learning_rate": 2.1987960780205637e-05, "loss": 0.9299254417419434, "step": 3015 }, { "epoch": 0.24698789014116226, "grad_norm": 0.1611328125, "learning_rate": 2.1986573883521263e-05, "loss": 0.9946805238723755, "step": 3016 }, { "epoch": 0.2470697826776812, "grad_norm": 0.1572265625, "learning_rate": 2.1985186552779254e-05, "loss": 0.880164384841919, "step": 3017 }, { "epoch": 0.24715167521420017, "grad_norm": 0.1689453125, "learning_rate": 2.1983798788039914e-05, "loss": 0.6238075494766235, "step": 3018 }, { "epoch": 0.2472335677507191, "grad_norm": 0.224609375, "learning_rate": 2.1982410589363555e-05, "loss": 0.8949953317642212, "step": 3019 }, { "epoch": 0.24731546028723808, "grad_norm": 0.220703125, "learning_rate": 2.198102195681052e-05, "loss": 0.963131308555603, "step": 3020 }, { "epoch": 0.24739735282375702, "grad_norm": 0.162109375, "learning_rate": 2.1979632890441156e-05, "loss": 1.1852781772613525, "step": 3021 }, { "epoch": 0.247479245360276, "grad_norm": 0.2109375, "learning_rate": 2.1978243390315852e-05, "loss": 1.0087637901306152, "step": 3022 }, { "epoch": 0.24756113789679493, "grad_norm": 0.1787109375, "learning_rate": 2.1976853456494988e-05, "loss": 0.9421628713607788, "step": 3023 }, { "epoch": 0.24764303043331387, "grad_norm": 0.1728515625, "learning_rate": 2.197546308903898e-05, "loss": 0.950789749622345, "step": 3024 }, { "epoch": 0.24772492296983284, "grad_norm": 0.173828125, "learning_rate": 2.1974072288008265e-05, "loss": 0.8576475977897644, "step": 3025 }, { "epoch": 0.24780681550635178, "grad_norm": 0.1923828125, "learning_rate": 2.1972681053463285e-05, "loss": 0.9681447148323059, "step": 3026 }, { "epoch": 0.24788870804287075, "grad_norm": 0.1767578125, "learning_rate": 2.197128938546451e-05, "loss": 0.7783282995223999, "step": 3027 }, { "epoch": 0.2479706005793897, "grad_norm": 0.1708984375, "learning_rate": 2.196989728407243e-05, "loss": 0.9873195290565491, "step": 3028 }, { "epoch": 0.24805249311590866, "grad_norm": 0.1728515625, "learning_rate": 2.1968504749347557e-05, "loss": 0.8307504057884216, "step": 3029 }, { "epoch": 0.2481343856524276, "grad_norm": 0.18359375, "learning_rate": 2.1967111781350406e-05, "loss": 1.3429138660430908, "step": 3030 }, { "epoch": 0.24821627818894657, "grad_norm": 0.1787109375, "learning_rate": 2.1965718380141524e-05, "loss": 0.9864434599876404, "step": 3031 }, { "epoch": 0.2482981707254655, "grad_norm": 0.1884765625, "learning_rate": 2.1964324545781476e-05, "loss": 1.0726772546768188, "step": 3032 }, { "epoch": 0.24838006326198447, "grad_norm": 0.2333984375, "learning_rate": 2.1962930278330847e-05, "loss": 0.9629580974578857, "step": 3033 }, { "epoch": 0.24846195579850341, "grad_norm": 0.16796875, "learning_rate": 2.1961535577850233e-05, "loss": 0.9671548008918762, "step": 3034 }, { "epoch": 0.24854384833502235, "grad_norm": 0.1923828125, "learning_rate": 2.196014044440025e-05, "loss": 1.1271796226501465, "step": 3035 }, { "epoch": 0.24862574087154132, "grad_norm": 0.1943359375, "learning_rate": 2.1958744878041543e-05, "loss": 1.174940824508667, "step": 3036 }, { "epoch": 0.24870763340806026, "grad_norm": 0.2001953125, "learning_rate": 2.1957348878834767e-05, "loss": 1.1124440431594849, "step": 3037 }, { "epoch": 0.24878952594457923, "grad_norm": 0.1806640625, "learning_rate": 2.1955952446840596e-05, "loss": 1.0802549123764038, "step": 3038 }, { "epoch": 0.24887141848109817, "grad_norm": 0.18359375, "learning_rate": 2.195455558211973e-05, "loss": 1.1630305051803589, "step": 3039 }, { "epoch": 0.24895331101761714, "grad_norm": 0.1796875, "learning_rate": 2.195315828473287e-05, "loss": 0.9395103454589844, "step": 3040 }, { "epoch": 0.24903520355413608, "grad_norm": 0.1591796875, "learning_rate": 2.1951760554740765e-05, "loss": 0.8728451728820801, "step": 3041 }, { "epoch": 0.24911709609065505, "grad_norm": 0.19140625, "learning_rate": 2.1950362392204154e-05, "loss": 0.980659008026123, "step": 3042 }, { "epoch": 0.249198988627174, "grad_norm": 0.2119140625, "learning_rate": 2.1948963797183813e-05, "loss": 0.9050116539001465, "step": 3043 }, { "epoch": 0.24928088116369296, "grad_norm": 0.1904296875, "learning_rate": 2.1947564769740528e-05, "loss": 1.059549331665039, "step": 3044 }, { "epoch": 0.2493627737002119, "grad_norm": 0.1591796875, "learning_rate": 2.1946165309935106e-05, "loss": 0.8213518261909485, "step": 3045 }, { "epoch": 0.24944466623673084, "grad_norm": 0.1865234375, "learning_rate": 2.1944765417828374e-05, "loss": 1.1724355220794678, "step": 3046 }, { "epoch": 0.2495265587732498, "grad_norm": 0.1953125, "learning_rate": 2.1943365093481183e-05, "loss": 0.6799591779708862, "step": 3047 }, { "epoch": 0.24960845130976875, "grad_norm": 0.162109375, "learning_rate": 2.1941964336954385e-05, "loss": 0.9837350845336914, "step": 3048 }, { "epoch": 0.24969034384628772, "grad_norm": 0.1953125, "learning_rate": 2.1940563148308872e-05, "loss": 0.8224217295646667, "step": 3049 }, { "epoch": 0.24977223638280666, "grad_norm": 0.185546875, "learning_rate": 2.1939161527605542e-05, "loss": 1.083338737487793, "step": 3050 }, { "epoch": 0.24985412891932562, "grad_norm": 0.19140625, "learning_rate": 2.1937759474905315e-05, "loss": 0.946805477142334, "step": 3051 }, { "epoch": 0.24993602145584456, "grad_norm": 0.1943359375, "learning_rate": 2.193635699026913e-05, "loss": 0.6161782741546631, "step": 3052 }, { "epoch": 0.2500179139923635, "grad_norm": 0.2021484375, "learning_rate": 2.1934954073757945e-05, "loss": 0.9599847793579102, "step": 3053 }, { "epoch": 0.2500998065288825, "grad_norm": 0.1494140625, "learning_rate": 2.193355072543274e-05, "loss": 0.5544872283935547, "step": 3054 }, { "epoch": 0.25018169906540144, "grad_norm": 0.2138671875, "learning_rate": 2.19321469453545e-05, "loss": 0.7686026096343994, "step": 3055 }, { "epoch": 0.25026359160192035, "grad_norm": 0.150390625, "learning_rate": 2.1930742733584256e-05, "loss": 0.689558207988739, "step": 3056 }, { "epoch": 0.2503454841384393, "grad_norm": 0.1484375, "learning_rate": 2.1929338090183026e-05, "loss": 0.9226515293121338, "step": 3057 }, { "epoch": 0.2504273766749583, "grad_norm": 0.1865234375, "learning_rate": 2.192793301521187e-05, "loss": 0.9194018244743347, "step": 3058 }, { "epoch": 0.25050926921147726, "grad_norm": 0.15625, "learning_rate": 2.192652750873185e-05, "loss": 1.065134048461914, "step": 3059 }, { "epoch": 0.25059116174799617, "grad_norm": 0.181640625, "learning_rate": 2.1925121570804063e-05, "loss": 1.0438369512557983, "step": 3060 }, { "epoch": 0.25067305428451514, "grad_norm": 0.19140625, "learning_rate": 2.192371520148961e-05, "loss": 0.9493274092674255, "step": 3061 }, { "epoch": 0.2507549468210341, "grad_norm": 0.1787109375, "learning_rate": 2.192230840084963e-05, "loss": 0.7314890623092651, "step": 3062 }, { "epoch": 0.2508368393575531, "grad_norm": 0.189453125, "learning_rate": 2.1920901168945253e-05, "loss": 0.7952975630760193, "step": 3063 }, { "epoch": 0.250918731894072, "grad_norm": 0.14453125, "learning_rate": 2.191949350583765e-05, "loss": 0.8662047386169434, "step": 3064 }, { "epoch": 0.25100062443059096, "grad_norm": 0.16015625, "learning_rate": 2.1918085411588005e-05, "loss": 0.8837013840675354, "step": 3065 }, { "epoch": 0.2510825169671099, "grad_norm": 0.16015625, "learning_rate": 2.191667688625752e-05, "loss": 1.0853338241577148, "step": 3066 }, { "epoch": 0.25116440950362884, "grad_norm": 0.203125, "learning_rate": 2.1915267929907413e-05, "loss": 0.8274083137512207, "step": 3067 }, { "epoch": 0.2512463020401478, "grad_norm": 0.16015625, "learning_rate": 2.1913858542598924e-05, "loss": 0.8375928401947021, "step": 3068 }, { "epoch": 0.2513281945766668, "grad_norm": 0.177734375, "learning_rate": 2.191244872439331e-05, "loss": 0.8726490139961243, "step": 3069 }, { "epoch": 0.25141008711318574, "grad_norm": 0.1572265625, "learning_rate": 2.191103847535185e-05, "loss": 0.7674068212509155, "step": 3070 }, { "epoch": 0.25149197964970466, "grad_norm": 0.1865234375, "learning_rate": 2.1909627795535833e-05, "loss": 0.8712756633758545, "step": 3071 }, { "epoch": 0.2515738721862236, "grad_norm": 0.1923828125, "learning_rate": 2.1908216685006578e-05, "loss": 0.7865182757377625, "step": 3072 }, { "epoch": 0.2516557647227426, "grad_norm": 0.208984375, "learning_rate": 2.1906805143825417e-05, "loss": 0.9612048864364624, "step": 3073 }, { "epoch": 0.25173765725926156, "grad_norm": 0.15234375, "learning_rate": 2.1905393172053706e-05, "loss": 0.7275028824806213, "step": 3074 }, { "epoch": 0.2518195497957805, "grad_norm": 0.20703125, "learning_rate": 2.1903980769752806e-05, "loss": 1.0381853580474854, "step": 3075 }, { "epoch": 0.25190144233229944, "grad_norm": 0.154296875, "learning_rate": 2.1902567936984115e-05, "loss": 0.6771314740180969, "step": 3076 }, { "epoch": 0.2519833348688184, "grad_norm": 0.1787109375, "learning_rate": 2.1901154673809028e-05, "loss": 0.7741199135780334, "step": 3077 }, { "epoch": 0.2520652274053373, "grad_norm": 0.208984375, "learning_rate": 2.1899740980288983e-05, "loss": 1.65373694896698, "step": 3078 }, { "epoch": 0.2521471199418563, "grad_norm": 0.19140625, "learning_rate": 2.1898326856485423e-05, "loss": 0.7512911558151245, "step": 3079 }, { "epoch": 0.25222901247837526, "grad_norm": 0.1875, "learning_rate": 2.189691230245981e-05, "loss": 0.8491021990776062, "step": 3080 }, { "epoch": 0.2523109050148942, "grad_norm": 0.171875, "learning_rate": 2.1895497318273622e-05, "loss": 1.0949782133102417, "step": 3081 }, { "epoch": 0.25239279755141314, "grad_norm": 0.17578125, "learning_rate": 2.1894081903988367e-05, "loss": 0.7864523530006409, "step": 3082 }, { "epoch": 0.2524746900879321, "grad_norm": 0.1689453125, "learning_rate": 2.1892666059665558e-05, "loss": 0.9498428106307983, "step": 3083 }, { "epoch": 0.2525565826244511, "grad_norm": 0.169921875, "learning_rate": 2.189124978536674e-05, "loss": 0.7808324098587036, "step": 3084 }, { "epoch": 0.25263847516097004, "grad_norm": 0.1611328125, "learning_rate": 2.1889833081153466e-05, "loss": 0.7288825511932373, "step": 3085 }, { "epoch": 0.25272036769748896, "grad_norm": 0.169921875, "learning_rate": 2.1888415947087315e-05, "loss": 0.7504899501800537, "step": 3086 }, { "epoch": 0.2528022602340079, "grad_norm": 0.1826171875, "learning_rate": 2.188699838322988e-05, "loss": 0.8437036871910095, "step": 3087 }, { "epoch": 0.2528841527705269, "grad_norm": 0.1650390625, "learning_rate": 2.1885580389642774e-05, "loss": 0.6429985761642456, "step": 3088 }, { "epoch": 0.2529660453070458, "grad_norm": 0.1826171875, "learning_rate": 2.1884161966387624e-05, "loss": 0.818250834941864, "step": 3089 }, { "epoch": 0.2530479378435648, "grad_norm": 0.1943359375, "learning_rate": 2.1882743113526085e-05, "loss": 1.0685276985168457, "step": 3090 }, { "epoch": 0.25312983038008374, "grad_norm": 0.1982421875, "learning_rate": 2.1881323831119834e-05, "loss": 0.7248176336288452, "step": 3091 }, { "epoch": 0.2532117229166027, "grad_norm": 0.173828125, "learning_rate": 2.1879904119230544e-05, "loss": 1.0803157091140747, "step": 3092 }, { "epoch": 0.2532936154531216, "grad_norm": 0.208984375, "learning_rate": 2.1878483977919928e-05, "loss": 1.0954208374023438, "step": 3093 }, { "epoch": 0.2533755079896406, "grad_norm": 0.1953125, "learning_rate": 2.1877063407249717e-05, "loss": 1.0476826429367065, "step": 3094 }, { "epoch": 0.25345740052615956, "grad_norm": 0.1904296875, "learning_rate": 2.1875642407281647e-05, "loss": 0.9073938131332397, "step": 3095 }, { "epoch": 0.2535392930626785, "grad_norm": 0.1923828125, "learning_rate": 2.1874220978077485e-05, "loss": 0.8722144365310669, "step": 3096 }, { "epoch": 0.25362118559919744, "grad_norm": 0.15625, "learning_rate": 2.187279911969901e-05, "loss": 0.6819531321525574, "step": 3097 }, { "epoch": 0.2537030781357164, "grad_norm": 0.1572265625, "learning_rate": 2.1871376832208017e-05, "loss": 1.0163159370422363, "step": 3098 }, { "epoch": 0.2537849706722354, "grad_norm": 0.2080078125, "learning_rate": 2.1869954115666337e-05, "loss": 1.041135549545288, "step": 3099 }, { "epoch": 0.2538668632087543, "grad_norm": 0.1630859375, "learning_rate": 2.1868530970135797e-05, "loss": 0.7326840758323669, "step": 3100 }, { "epoch": 0.25394875574527326, "grad_norm": 0.19921875, "learning_rate": 2.1867107395678253e-05, "loss": 1.2159595489501953, "step": 3101 }, { "epoch": 0.2540306482817922, "grad_norm": 0.1923828125, "learning_rate": 2.1865683392355585e-05, "loss": 0.9358042478561401, "step": 3102 }, { "epoch": 0.2541125408183112, "grad_norm": 0.203125, "learning_rate": 2.1864258960229683e-05, "loss": 0.8298425674438477, "step": 3103 }, { "epoch": 0.2541944333548301, "grad_norm": 0.1650390625, "learning_rate": 2.1862834099362457e-05, "loss": 0.7570986747741699, "step": 3104 }, { "epoch": 0.2542763258913491, "grad_norm": 0.1640625, "learning_rate": 2.186140880981584e-05, "loss": 1.0800713300704956, "step": 3105 }, { "epoch": 0.25435821842786804, "grad_norm": 0.18359375, "learning_rate": 2.1859983091651778e-05, "loss": 1.1672977209091187, "step": 3106 }, { "epoch": 0.25444011096438696, "grad_norm": 0.1884765625, "learning_rate": 2.185855694493224e-05, "loss": 1.0439587831497192, "step": 3107 }, { "epoch": 0.2545220035009059, "grad_norm": 0.1826171875, "learning_rate": 2.185713036971922e-05, "loss": 0.7559874653816223, "step": 3108 }, { "epoch": 0.2546038960374249, "grad_norm": 0.1767578125, "learning_rate": 2.1855703366074707e-05, "loss": 0.9461041688919067, "step": 3109 }, { "epoch": 0.25468578857394386, "grad_norm": 0.142578125, "learning_rate": 2.1854275934060738e-05, "loss": 0.9024447202682495, "step": 3110 }, { "epoch": 0.2547676811104628, "grad_norm": 0.181640625, "learning_rate": 2.1852848073739348e-05, "loss": 0.5854197144508362, "step": 3111 }, { "epoch": 0.25484957364698174, "grad_norm": 0.2109375, "learning_rate": 2.1851419785172604e-05, "loss": 0.9766862392425537, "step": 3112 }, { "epoch": 0.2549314661835007, "grad_norm": 0.166015625, "learning_rate": 2.1849991068422577e-05, "loss": 0.9357848763465881, "step": 3113 }, { "epoch": 0.2550133587200197, "grad_norm": 0.1533203125, "learning_rate": 2.184856192355137e-05, "loss": 0.65680330991745, "step": 3114 }, { "epoch": 0.2550952512565386, "grad_norm": 0.1826171875, "learning_rate": 2.1847132350621096e-05, "loss": 0.8248845338821411, "step": 3115 }, { "epoch": 0.25517714379305756, "grad_norm": 0.1650390625, "learning_rate": 2.1845702349693895e-05, "loss": 1.0158073902130127, "step": 3116 }, { "epoch": 0.2552590363295765, "grad_norm": 0.1708984375, "learning_rate": 2.184427192083192e-05, "loss": 0.9877614378929138, "step": 3117 }, { "epoch": 0.25534092886609544, "grad_norm": 0.19140625, "learning_rate": 2.184284106409734e-05, "loss": 0.9232885241508484, "step": 3118 }, { "epoch": 0.2554228214026144, "grad_norm": 0.205078125, "learning_rate": 2.1841409779552345e-05, "loss": 0.760655403137207, "step": 3119 }, { "epoch": 0.2555047139391334, "grad_norm": 0.1943359375, "learning_rate": 2.1839978067259153e-05, "loss": 0.9549834132194519, "step": 3120 }, { "epoch": 0.25558660647565234, "grad_norm": 0.1865234375, "learning_rate": 2.1838545927279982e-05, "loss": 0.7998074293136597, "step": 3121 }, { "epoch": 0.25566849901217126, "grad_norm": 0.154296875, "learning_rate": 2.1837113359677085e-05, "loss": 0.8158242702484131, "step": 3122 }, { "epoch": 0.2557503915486902, "grad_norm": 0.154296875, "learning_rate": 2.1835680364512725e-05, "loss": 1.0770370960235596, "step": 3123 }, { "epoch": 0.2558322840852092, "grad_norm": 0.212890625, "learning_rate": 2.1834246941849183e-05, "loss": 1.177646279335022, "step": 3124 }, { "epoch": 0.25591417662172816, "grad_norm": 0.1474609375, "learning_rate": 2.183281309174877e-05, "loss": 0.8634687662124634, "step": 3125 }, { "epoch": 0.2559960691582471, "grad_norm": 0.1943359375, "learning_rate": 2.1831378814273798e-05, "loss": 0.7518569231033325, "step": 3126 }, { "epoch": 0.25607796169476604, "grad_norm": 0.1630859375, "learning_rate": 2.182994410948661e-05, "loss": 0.9456021785736084, "step": 3127 }, { "epoch": 0.256159854231285, "grad_norm": 0.1552734375, "learning_rate": 2.1828508977449568e-05, "loss": 0.8693006038665771, "step": 3128 }, { "epoch": 0.2562417467678039, "grad_norm": 0.1787109375, "learning_rate": 2.182707341822504e-05, "loss": 0.92015540599823, "step": 3129 }, { "epoch": 0.2563236393043229, "grad_norm": 0.1796875, "learning_rate": 2.1825637431875433e-05, "loss": 0.8281747102737427, "step": 3130 }, { "epoch": 0.25640553184084186, "grad_norm": 0.1689453125, "learning_rate": 2.182420101846315e-05, "loss": 0.9466538429260254, "step": 3131 }, { "epoch": 0.25648742437736083, "grad_norm": 0.1669921875, "learning_rate": 2.1822764178050627e-05, "loss": 0.8807326555252075, "step": 3132 }, { "epoch": 0.25656931691387974, "grad_norm": 0.212890625, "learning_rate": 2.182132691070032e-05, "loss": 0.7819794416427612, "step": 3133 }, { "epoch": 0.2566512094503987, "grad_norm": 0.16796875, "learning_rate": 2.181988921647469e-05, "loss": 0.7207423448562622, "step": 3134 }, { "epoch": 0.2567331019869177, "grad_norm": 0.1748046875, "learning_rate": 2.181845109543623e-05, "loss": 0.744535505771637, "step": 3135 }, { "epoch": 0.25681499452343665, "grad_norm": 0.19140625, "learning_rate": 2.181701254764745e-05, "loss": 0.8818186521530151, "step": 3136 }, { "epoch": 0.25689688705995556, "grad_norm": 0.1767578125, "learning_rate": 2.181557357317087e-05, "loss": 0.8681089282035828, "step": 3137 }, { "epoch": 0.2569787795964745, "grad_norm": 0.1845703125, "learning_rate": 2.1814134172069032e-05, "loss": 0.8344646692276001, "step": 3138 }, { "epoch": 0.2570606721329935, "grad_norm": 0.173828125, "learning_rate": 2.1812694344404503e-05, "loss": 0.609687328338623, "step": 3139 }, { "epoch": 0.2571425646695124, "grad_norm": 0.18359375, "learning_rate": 2.181125409023986e-05, "loss": 0.777496874332428, "step": 3140 }, { "epoch": 0.2572244572060314, "grad_norm": 0.177734375, "learning_rate": 2.180981340963771e-05, "loss": 0.958476185798645, "step": 3141 }, { "epoch": 0.25730634974255034, "grad_norm": 0.17578125, "learning_rate": 2.1808372302660664e-05, "loss": 0.8258377313613892, "step": 3142 }, { "epoch": 0.2573882422790693, "grad_norm": 0.1513671875, "learning_rate": 2.1806930769371358e-05, "loss": 0.8648903965950012, "step": 3143 }, { "epoch": 0.2574701348155882, "grad_norm": 0.1552734375, "learning_rate": 2.180548880983245e-05, "loss": 0.9093597531318665, "step": 3144 }, { "epoch": 0.2575520273521072, "grad_norm": 0.173828125, "learning_rate": 2.180404642410661e-05, "loss": 1.144288182258606, "step": 3145 }, { "epoch": 0.25763391988862616, "grad_norm": 0.1796875, "learning_rate": 2.1802603612256534e-05, "loss": 0.6446361541748047, "step": 3146 }, { "epoch": 0.25771581242514513, "grad_norm": 0.1796875, "learning_rate": 2.1801160374344935e-05, "loss": 0.8613916635513306, "step": 3147 }, { "epoch": 0.25779770496166404, "grad_norm": 0.1865234375, "learning_rate": 2.1799716710434534e-05, "loss": 1.0618281364440918, "step": 3148 }, { "epoch": 0.257879597498183, "grad_norm": 0.1845703125, "learning_rate": 2.1798272620588085e-05, "loss": 0.9598224759101868, "step": 3149 }, { "epoch": 0.257961490034702, "grad_norm": 0.17578125, "learning_rate": 2.179682810486835e-05, "loss": 0.8121265172958374, "step": 3150 }, { "epoch": 0.2580433825712209, "grad_norm": 0.150390625, "learning_rate": 2.1795383163338115e-05, "loss": 0.9354348182678223, "step": 3151 }, { "epoch": 0.25812527510773986, "grad_norm": 0.203125, "learning_rate": 2.1793937796060184e-05, "loss": 0.9365136027336121, "step": 3152 }, { "epoch": 0.25820716764425883, "grad_norm": 0.1396484375, "learning_rate": 2.179249200309738e-05, "loss": 0.5169033408164978, "step": 3153 }, { "epoch": 0.2582890601807778, "grad_norm": 0.1865234375, "learning_rate": 2.1791045784512542e-05, "loss": 0.9792241454124451, "step": 3154 }, { "epoch": 0.2583709527172967, "grad_norm": 0.1884765625, "learning_rate": 2.1789599140368524e-05, "loss": 1.0530011653900146, "step": 3155 }, { "epoch": 0.2584528452538157, "grad_norm": 0.1494140625, "learning_rate": 2.1788152070728208e-05, "loss": 0.5000385642051697, "step": 3156 }, { "epoch": 0.25853473779033465, "grad_norm": 0.197265625, "learning_rate": 2.1786704575654487e-05, "loss": 0.9103628396987915, "step": 3157 }, { "epoch": 0.2586166303268536, "grad_norm": 0.1640625, "learning_rate": 2.178525665521028e-05, "loss": 0.8115824460983276, "step": 3158 }, { "epoch": 0.2586985228633725, "grad_norm": 0.197265625, "learning_rate": 2.1783808309458515e-05, "loss": 0.8792688846588135, "step": 3159 }, { "epoch": 0.2587804153998915, "grad_norm": 0.2001953125, "learning_rate": 2.1782359538462146e-05, "loss": 0.8507426381111145, "step": 3160 }, { "epoch": 0.25886230793641046, "grad_norm": 0.19140625, "learning_rate": 2.1780910342284138e-05, "loss": 0.9073144793510437, "step": 3161 }, { "epoch": 0.2589442004729294, "grad_norm": 0.18359375, "learning_rate": 2.1779460720987483e-05, "loss": 0.7104508876800537, "step": 3162 }, { "epoch": 0.25902609300944834, "grad_norm": 0.17578125, "learning_rate": 2.1778010674635188e-05, "loss": 0.9252887964248657, "step": 3163 }, { "epoch": 0.2591079855459673, "grad_norm": 0.2119140625, "learning_rate": 2.1776560203290274e-05, "loss": 0.7737255096435547, "step": 3164 }, { "epoch": 0.2591898780824863, "grad_norm": 0.173828125, "learning_rate": 2.177510930701579e-05, "loss": 0.8987854719161987, "step": 3165 }, { "epoch": 0.2592717706190052, "grad_norm": 0.15625, "learning_rate": 2.177365798587479e-05, "loss": 0.9809098243713379, "step": 3166 }, { "epoch": 0.25935366315552416, "grad_norm": 0.1767578125, "learning_rate": 2.177220623993036e-05, "loss": 0.9465204477310181, "step": 3167 }, { "epoch": 0.25943555569204313, "grad_norm": 0.1953125, "learning_rate": 2.1770754069245603e-05, "loss": 0.9769243001937866, "step": 3168 }, { "epoch": 0.25951744822856204, "grad_norm": 0.177734375, "learning_rate": 2.176930147388363e-05, "loss": 0.8874969482421875, "step": 3169 }, { "epoch": 0.259599340765081, "grad_norm": 0.1640625, "learning_rate": 2.1767848453907575e-05, "loss": 0.8695144653320312, "step": 3170 }, { "epoch": 0.2596812333016, "grad_norm": 0.193359375, "learning_rate": 2.1766395009380597e-05, "loss": 0.676611602306366, "step": 3171 }, { "epoch": 0.25976312583811895, "grad_norm": 0.1552734375, "learning_rate": 2.1764941140365866e-05, "loss": 0.8168504238128662, "step": 3172 }, { "epoch": 0.25984501837463786, "grad_norm": 0.1533203125, "learning_rate": 2.1763486846926574e-05, "loss": 0.8248706459999084, "step": 3173 }, { "epoch": 0.2599269109111568, "grad_norm": 0.166015625, "learning_rate": 2.176203212912593e-05, "loss": 0.7082149982452393, "step": 3174 }, { "epoch": 0.2600088034476758, "grad_norm": 0.1806640625, "learning_rate": 2.1760576987027165e-05, "loss": 0.6691633462905884, "step": 3175 }, { "epoch": 0.26009069598419476, "grad_norm": 0.1708984375, "learning_rate": 2.1759121420693523e-05, "loss": 0.9209831357002258, "step": 3176 }, { "epoch": 0.2601725885207137, "grad_norm": 0.17578125, "learning_rate": 2.1757665430188272e-05, "loss": 0.7841283082962036, "step": 3177 }, { "epoch": 0.26025448105723265, "grad_norm": 0.2578125, "learning_rate": 2.1756209015574688e-05, "loss": 1.1342308521270752, "step": 3178 }, { "epoch": 0.2603363735937516, "grad_norm": 0.193359375, "learning_rate": 2.175475217691608e-05, "loss": 0.902176022529602, "step": 3179 }, { "epoch": 0.2604182661302705, "grad_norm": 0.158203125, "learning_rate": 2.1753294914275763e-05, "loss": 0.7692538499832153, "step": 3180 }, { "epoch": 0.2605001586667895, "grad_norm": 0.1796875, "learning_rate": 2.175183722771708e-05, "loss": 0.8452116250991821, "step": 3181 }, { "epoch": 0.26058205120330846, "grad_norm": 0.169921875, "learning_rate": 2.1750379117303387e-05, "loss": 0.968145489692688, "step": 3182 }, { "epoch": 0.26066394373982743, "grad_norm": 0.150390625, "learning_rate": 2.1748920583098056e-05, "loss": 0.7973054647445679, "step": 3183 }, { "epoch": 0.26074583627634634, "grad_norm": 0.16015625, "learning_rate": 2.1747461625164486e-05, "loss": 0.5918578505516052, "step": 3184 }, { "epoch": 0.2608277288128653, "grad_norm": 0.177734375, "learning_rate": 2.1746002243566086e-05, "loss": 1.0115458965301514, "step": 3185 }, { "epoch": 0.2609096213493843, "grad_norm": 0.203125, "learning_rate": 2.1744542438366286e-05, "loss": 1.1336578130722046, "step": 3186 }, { "epoch": 0.26099151388590325, "grad_norm": 0.1953125, "learning_rate": 2.174308220962854e-05, "loss": 1.0043970346450806, "step": 3187 }, { "epoch": 0.26107340642242216, "grad_norm": 0.1806640625, "learning_rate": 2.174162155741631e-05, "loss": 0.6462467908859253, "step": 3188 }, { "epoch": 0.26115529895894113, "grad_norm": 0.193359375, "learning_rate": 2.1740160481793085e-05, "loss": 0.8580551743507385, "step": 3189 }, { "epoch": 0.2612371914954601, "grad_norm": 0.20703125, "learning_rate": 2.173869898282237e-05, "loss": 1.1722352504730225, "step": 3190 }, { "epoch": 0.261319084031979, "grad_norm": 0.1728515625, "learning_rate": 2.1737237060567688e-05, "loss": 0.743541419506073, "step": 3191 }, { "epoch": 0.261400976568498, "grad_norm": 0.173828125, "learning_rate": 2.173577471509257e-05, "loss": 0.8838053941726685, "step": 3192 }, { "epoch": 0.26148286910501695, "grad_norm": 0.17578125, "learning_rate": 2.173431194646059e-05, "loss": 1.0029171705245972, "step": 3193 }, { "epoch": 0.2615647616415359, "grad_norm": 0.1923828125, "learning_rate": 2.1732848754735322e-05, "loss": 1.2409414052963257, "step": 3194 }, { "epoch": 0.2616466541780548, "grad_norm": 0.169921875, "learning_rate": 2.173138513998036e-05, "loss": 0.846108078956604, "step": 3195 }, { "epoch": 0.2617285467145738, "grad_norm": 0.1787109375, "learning_rate": 2.1729921102259317e-05, "loss": 1.0626165866851807, "step": 3196 }, { "epoch": 0.26181043925109276, "grad_norm": 0.1875, "learning_rate": 2.172845664163583e-05, "loss": 1.053910732269287, "step": 3197 }, { "epoch": 0.26189233178761173, "grad_norm": 0.189453125, "learning_rate": 2.172699175817355e-05, "loss": 0.8863646388053894, "step": 3198 }, { "epoch": 0.26197422432413064, "grad_norm": 0.171875, "learning_rate": 2.1725526451936142e-05, "loss": 1.0099010467529297, "step": 3199 }, { "epoch": 0.2620561168606496, "grad_norm": 0.1787109375, "learning_rate": 2.17240607229873e-05, "loss": 0.9404041767120361, "step": 3200 }, { "epoch": 0.2621380093971686, "grad_norm": 0.1845703125, "learning_rate": 2.1722594571390728e-05, "loss": 0.5972029566764832, "step": 3201 }, { "epoch": 0.2622199019336875, "grad_norm": 0.1591796875, "learning_rate": 2.172112799721015e-05, "loss": 0.7316563129425049, "step": 3202 }, { "epoch": 0.26230179447020646, "grad_norm": 0.177734375, "learning_rate": 2.1719661000509316e-05, "loss": 1.034212589263916, "step": 3203 }, { "epoch": 0.26238368700672543, "grad_norm": 0.1650390625, "learning_rate": 2.1718193581351975e-05, "loss": 0.9521102905273438, "step": 3204 }, { "epoch": 0.2624655795432444, "grad_norm": 0.169921875, "learning_rate": 2.171672573980192e-05, "loss": 0.871894359588623, "step": 3205 }, { "epoch": 0.2625474720797633, "grad_norm": 0.150390625, "learning_rate": 2.1715257475922942e-05, "loss": 0.6355702877044678, "step": 3206 }, { "epoch": 0.2626293646162823, "grad_norm": 0.158203125, "learning_rate": 2.1713788789778862e-05, "loss": 0.8873429894447327, "step": 3207 }, { "epoch": 0.26271125715280125, "grad_norm": 0.1982421875, "learning_rate": 2.171231968143351e-05, "loss": 1.0372215509414673, "step": 3208 }, { "epoch": 0.2627931496893202, "grad_norm": 0.173828125, "learning_rate": 2.1710850150950747e-05, "loss": 0.7492622137069702, "step": 3209 }, { "epoch": 0.26287504222583913, "grad_norm": 0.1748046875, "learning_rate": 2.1709380198394437e-05, "loss": 0.9619871973991394, "step": 3210 }, { "epoch": 0.2629569347623581, "grad_norm": 0.1435546875, "learning_rate": 2.1707909823828473e-05, "loss": 0.6420440673828125, "step": 3211 }, { "epoch": 0.26303882729887706, "grad_norm": 0.1640625, "learning_rate": 2.1706439027316765e-05, "loss": 0.8716416954994202, "step": 3212 }, { "epoch": 0.263120719835396, "grad_norm": 0.1650390625, "learning_rate": 2.170496780892324e-05, "loss": 0.6702097058296204, "step": 3213 }, { "epoch": 0.26320261237191495, "grad_norm": 0.1640625, "learning_rate": 2.1703496168711842e-05, "loss": 1.0833449363708496, "step": 3214 }, { "epoch": 0.2632845049084339, "grad_norm": 0.1552734375, "learning_rate": 2.170202410674653e-05, "loss": 0.8423060178756714, "step": 3215 }, { "epoch": 0.2633663974449529, "grad_norm": 0.14453125, "learning_rate": 2.1700551623091296e-05, "loss": 0.9879197478294373, "step": 3216 }, { "epoch": 0.2634482899814718, "grad_norm": 0.1923828125, "learning_rate": 2.1699078717810133e-05, "loss": 0.6664204597473145, "step": 3217 }, { "epoch": 0.26353018251799076, "grad_norm": 0.1904296875, "learning_rate": 2.169760539096706e-05, "loss": 0.8965526819229126, "step": 3218 }, { "epoch": 0.26361207505450973, "grad_norm": 0.1279296875, "learning_rate": 2.169613164262612e-05, "loss": 0.7042847871780396, "step": 3219 }, { "epoch": 0.2636939675910287, "grad_norm": 0.16015625, "learning_rate": 2.1694657472851362e-05, "loss": 0.5074277520179749, "step": 3220 }, { "epoch": 0.2637758601275476, "grad_norm": 0.1796875, "learning_rate": 2.169318288170686e-05, "loss": 0.9434421062469482, "step": 3221 }, { "epoch": 0.2638577526640666, "grad_norm": 0.16796875, "learning_rate": 2.1691707869256703e-05, "loss": 1.2658205032348633, "step": 3222 }, { "epoch": 0.26393964520058555, "grad_norm": 0.1650390625, "learning_rate": 2.169023243556501e-05, "loss": 1.0743223428726196, "step": 3223 }, { "epoch": 0.26402153773710446, "grad_norm": 0.158203125, "learning_rate": 2.16887565806959e-05, "loss": 0.8824954628944397, "step": 3224 }, { "epoch": 0.26410343027362343, "grad_norm": 0.1591796875, "learning_rate": 2.168728030471353e-05, "loss": 0.6488144397735596, "step": 3225 }, { "epoch": 0.2641853228101424, "grad_norm": 0.1484375, "learning_rate": 2.1685803607682053e-05, "loss": 0.5821422934532166, "step": 3226 }, { "epoch": 0.26426721534666137, "grad_norm": 0.1943359375, "learning_rate": 2.168432648966566e-05, "loss": 0.9351921081542969, "step": 3227 }, { "epoch": 0.2643491078831803, "grad_norm": 0.2236328125, "learning_rate": 2.168284895072855e-05, "loss": 1.092302680015564, "step": 3228 }, { "epoch": 0.26443100041969925, "grad_norm": 0.1787109375, "learning_rate": 2.1681370990934944e-05, "loss": 0.7790848612785339, "step": 3229 }, { "epoch": 0.2645128929562182, "grad_norm": 0.197265625, "learning_rate": 2.1679892610349086e-05, "loss": 0.8729329109191895, "step": 3230 }, { "epoch": 0.2645947854927372, "grad_norm": 0.1767578125, "learning_rate": 2.1678413809035217e-05, "loss": 0.8801979422569275, "step": 3231 }, { "epoch": 0.2646766780292561, "grad_norm": 0.2216796875, "learning_rate": 2.1676934587057626e-05, "loss": 0.9245819449424744, "step": 3232 }, { "epoch": 0.26475857056577506, "grad_norm": 0.19140625, "learning_rate": 2.1675454944480605e-05, "loss": 0.9424863457679749, "step": 3233 }, { "epoch": 0.26484046310229403, "grad_norm": 0.1708984375, "learning_rate": 2.1673974881368456e-05, "loss": 0.9543589353561401, "step": 3234 }, { "epoch": 0.26492235563881295, "grad_norm": 0.1669921875, "learning_rate": 2.1672494397785517e-05, "loss": 0.6458380818367004, "step": 3235 }, { "epoch": 0.2650042481753319, "grad_norm": 0.208984375, "learning_rate": 2.1671013493796134e-05, "loss": 0.9891178011894226, "step": 3236 }, { "epoch": 0.2650861407118509, "grad_norm": 0.1337890625, "learning_rate": 2.1669532169464673e-05, "loss": 0.9154999852180481, "step": 3237 }, { "epoch": 0.26516803324836985, "grad_norm": 0.17578125, "learning_rate": 2.1668050424855518e-05, "loss": 0.8810940384864807, "step": 3238 }, { "epoch": 0.26524992578488876, "grad_norm": 0.18359375, "learning_rate": 2.166656826003307e-05, "loss": 0.8166366219520569, "step": 3239 }, { "epoch": 0.26533181832140773, "grad_norm": 0.1640625, "learning_rate": 2.166508567506175e-05, "loss": 0.7319273352622986, "step": 3240 }, { "epoch": 0.2654137108579267, "grad_norm": 0.1875, "learning_rate": 2.1663602670006e-05, "loss": 0.8784331679344177, "step": 3241 }, { "epoch": 0.2654956033944456, "grad_norm": 0.1455078125, "learning_rate": 2.1662119244930282e-05, "loss": 0.9117374420166016, "step": 3242 }, { "epoch": 0.2655774959309646, "grad_norm": 0.1787109375, "learning_rate": 2.166063539989906e-05, "loss": 0.9602220058441162, "step": 3243 }, { "epoch": 0.26565938846748355, "grad_norm": 0.185546875, "learning_rate": 2.1659151134976838e-05, "loss": 0.9221316576004028, "step": 3244 }, { "epoch": 0.2657412810040025, "grad_norm": 0.193359375, "learning_rate": 2.1657666450228123e-05, "loss": 1.1194803714752197, "step": 3245 }, { "epoch": 0.26582317354052143, "grad_norm": 0.1884765625, "learning_rate": 2.165618134571745e-05, "loss": 1.4498815536499023, "step": 3246 }, { "epoch": 0.2659050660770404, "grad_norm": 0.1298828125, "learning_rate": 2.1654695821509362e-05, "loss": 0.7133536338806152, "step": 3247 }, { "epoch": 0.26598695861355937, "grad_norm": 0.1875, "learning_rate": 2.1653209877668427e-05, "loss": 0.7237063050270081, "step": 3248 }, { "epoch": 0.26606885115007833, "grad_norm": 0.1689453125, "learning_rate": 2.1651723514259233e-05, "loss": 0.9974046349525452, "step": 3249 }, { "epoch": 0.26615074368659725, "grad_norm": 0.173828125, "learning_rate": 2.1650236731346383e-05, "loss": 0.8866168260574341, "step": 3250 }, { "epoch": 0.2662326362231162, "grad_norm": 0.169921875, "learning_rate": 2.16487495289945e-05, "loss": 0.8706800937652588, "step": 3251 }, { "epoch": 0.2663145287596352, "grad_norm": 0.1962890625, "learning_rate": 2.1647261907268223e-05, "loss": 1.0852530002593994, "step": 3252 }, { "epoch": 0.2663964212961541, "grad_norm": 0.193359375, "learning_rate": 2.1645773866232204e-05, "loss": 0.8357301950454712, "step": 3253 }, { "epoch": 0.26647831383267306, "grad_norm": 0.1669921875, "learning_rate": 2.164428540595113e-05, "loss": 0.9736103415489197, "step": 3254 }, { "epoch": 0.26656020636919203, "grad_norm": 0.2041015625, "learning_rate": 2.1642796526489688e-05, "loss": 0.9015145897865295, "step": 3255 }, { "epoch": 0.266642098905711, "grad_norm": 0.125, "learning_rate": 2.1641307227912593e-05, "loss": 0.7823876142501831, "step": 3256 }, { "epoch": 0.2667239914422299, "grad_norm": 0.2041015625, "learning_rate": 2.1639817510284576e-05, "loss": 1.0863451957702637, "step": 3257 }, { "epoch": 0.2668058839787489, "grad_norm": 0.1533203125, "learning_rate": 2.1638327373670387e-05, "loss": 0.5649192929267883, "step": 3258 }, { "epoch": 0.26688777651526785, "grad_norm": 0.224609375, "learning_rate": 2.1636836818134793e-05, "loss": 0.8872652053833008, "step": 3259 }, { "epoch": 0.2669696690517868, "grad_norm": 0.1572265625, "learning_rate": 2.1635345843742583e-05, "loss": 0.5855347514152527, "step": 3260 }, { "epoch": 0.26705156158830573, "grad_norm": 0.1884765625, "learning_rate": 2.1633854450558553e-05, "loss": 0.8995658159255981, "step": 3261 }, { "epoch": 0.2671334541248247, "grad_norm": 0.185546875, "learning_rate": 2.163236263864753e-05, "loss": 0.9249913096427917, "step": 3262 }, { "epoch": 0.26721534666134367, "grad_norm": 0.181640625, "learning_rate": 2.1630870408074356e-05, "loss": 0.9172536730766296, "step": 3263 }, { "epoch": 0.2672972391978626, "grad_norm": 0.193359375, "learning_rate": 2.1629377758903886e-05, "loss": 1.160362720489502, "step": 3264 }, { "epoch": 0.26737913173438155, "grad_norm": 0.1865234375, "learning_rate": 2.1627884691201e-05, "loss": 1.0079379081726074, "step": 3265 }, { "epoch": 0.2674610242709005, "grad_norm": 0.1962890625, "learning_rate": 2.162639120503059e-05, "loss": 0.8354660272598267, "step": 3266 }, { "epoch": 0.2675429168074195, "grad_norm": 0.1728515625, "learning_rate": 2.162489730045757e-05, "loss": 0.83490389585495, "step": 3267 }, { "epoch": 0.2676248093439384, "grad_norm": 0.177734375, "learning_rate": 2.1623402977546866e-05, "loss": 0.8092930316925049, "step": 3268 }, { "epoch": 0.26770670188045737, "grad_norm": 0.1572265625, "learning_rate": 2.162190823636344e-05, "loss": 0.947308361530304, "step": 3269 }, { "epoch": 0.26778859441697633, "grad_norm": 0.212890625, "learning_rate": 2.162041307697225e-05, "loss": 0.8496508598327637, "step": 3270 }, { "epoch": 0.2678704869534953, "grad_norm": 0.1689453125, "learning_rate": 2.1618917499438284e-05, "loss": 0.9197134971618652, "step": 3271 }, { "epoch": 0.2679523794900142, "grad_norm": 0.1484375, "learning_rate": 2.1617421503826545e-05, "loss": 0.6617064476013184, "step": 3272 }, { "epoch": 0.2680342720265332, "grad_norm": 0.18359375, "learning_rate": 2.161592509020206e-05, "loss": 0.714332640171051, "step": 3273 }, { "epoch": 0.26811616456305215, "grad_norm": 0.1474609375, "learning_rate": 2.161442825862986e-05, "loss": 0.6404594779014587, "step": 3274 }, { "epoch": 0.26819805709957106, "grad_norm": 0.1904296875, "learning_rate": 2.1612931009175013e-05, "loss": 0.7566066384315491, "step": 3275 }, { "epoch": 0.26827994963609003, "grad_norm": 0.158203125, "learning_rate": 2.161143334190259e-05, "loss": 0.8044634461402893, "step": 3276 }, { "epoch": 0.268361842172609, "grad_norm": 0.1806640625, "learning_rate": 2.1609935256877687e-05, "loss": 1.4273775815963745, "step": 3277 }, { "epoch": 0.26844373470912797, "grad_norm": 0.1494140625, "learning_rate": 2.1608436754165417e-05, "loss": 0.7233834266662598, "step": 3278 }, { "epoch": 0.2685256272456469, "grad_norm": 0.166015625, "learning_rate": 2.1606937833830915e-05, "loss": 0.8778788447380066, "step": 3279 }, { "epoch": 0.26860751978216585, "grad_norm": 0.1650390625, "learning_rate": 2.1605438495939324e-05, "loss": 1.1701123714447021, "step": 3280 }, { "epoch": 0.2686894123186848, "grad_norm": 0.16796875, "learning_rate": 2.1603938740555812e-05, "loss": 0.935649573802948, "step": 3281 }, { "epoch": 0.2687713048552038, "grad_norm": 0.2353515625, "learning_rate": 2.160243856774557e-05, "loss": 0.7498301863670349, "step": 3282 }, { "epoch": 0.2688531973917227, "grad_norm": 0.173828125, "learning_rate": 2.1600937977573796e-05, "loss": 0.7776157855987549, "step": 3283 }, { "epoch": 0.26893508992824167, "grad_norm": 0.1826171875, "learning_rate": 2.1599436970105717e-05, "loss": 0.9413073062896729, "step": 3284 }, { "epoch": 0.26901698246476063, "grad_norm": 0.2080078125, "learning_rate": 2.159793554540657e-05, "loss": 1.0632649660110474, "step": 3285 }, { "epoch": 0.26909887500127955, "grad_norm": 0.1689453125, "learning_rate": 2.1596433703541608e-05, "loss": 0.7193772792816162, "step": 3286 }, { "epoch": 0.2691807675377985, "grad_norm": 0.1865234375, "learning_rate": 2.1594931444576115e-05, "loss": 1.1338950395584106, "step": 3287 }, { "epoch": 0.2692626600743175, "grad_norm": 0.18359375, "learning_rate": 2.1593428768575388e-05, "loss": 0.944406270980835, "step": 3288 }, { "epoch": 0.26934455261083645, "grad_norm": 0.1708984375, "learning_rate": 2.1591925675604725e-05, "loss": 0.9453411102294922, "step": 3289 }, { "epoch": 0.26942644514735536, "grad_norm": 0.19921875, "learning_rate": 2.1590422165729473e-05, "loss": 0.635551929473877, "step": 3290 }, { "epoch": 0.26950833768387433, "grad_norm": 0.2265625, "learning_rate": 2.1588918239014973e-05, "loss": 0.8830382227897644, "step": 3291 }, { "epoch": 0.2695902302203933, "grad_norm": 0.158203125, "learning_rate": 2.158741389552659e-05, "loss": 0.906486451625824, "step": 3292 }, { "epoch": 0.26967212275691227, "grad_norm": 0.15625, "learning_rate": 2.1585909135329713e-05, "loss": 0.9953230023384094, "step": 3293 }, { "epoch": 0.2697540152934312, "grad_norm": 0.2265625, "learning_rate": 2.158440395848974e-05, "loss": 0.9503775238990784, "step": 3294 }, { "epoch": 0.26983590782995015, "grad_norm": 0.146484375, "learning_rate": 2.1582898365072102e-05, "loss": 0.5092971324920654, "step": 3295 }, { "epoch": 0.2699178003664691, "grad_norm": 0.173828125, "learning_rate": 2.1581392355142227e-05, "loss": 0.9471458196640015, "step": 3296 }, { "epoch": 0.26999969290298803, "grad_norm": 0.1572265625, "learning_rate": 2.157988592876558e-05, "loss": 0.4898902177810669, "step": 3297 }, { "epoch": 0.270081585439507, "grad_norm": 0.16015625, "learning_rate": 2.1578379086007633e-05, "loss": 0.6574862599372864, "step": 3298 }, { "epoch": 0.27016347797602597, "grad_norm": 0.1787109375, "learning_rate": 2.157687182693388e-05, "loss": 0.9062574505805969, "step": 3299 }, { "epoch": 0.27024537051254494, "grad_norm": 0.1904296875, "learning_rate": 2.1575364151609833e-05, "loss": 1.3011393547058105, "step": 3300 }, { "epoch": 0.27032726304906385, "grad_norm": 0.130859375, "learning_rate": 2.1573856060101023e-05, "loss": 0.8509037494659424, "step": 3301 }, { "epoch": 0.2704091555855828, "grad_norm": 0.16015625, "learning_rate": 2.1572347552472993e-05, "loss": 0.9384050369262695, "step": 3302 }, { "epoch": 0.2704910481221018, "grad_norm": 0.166015625, "learning_rate": 2.1570838628791318e-05, "loss": 0.9923960566520691, "step": 3303 }, { "epoch": 0.2705729406586207, "grad_norm": 0.19140625, "learning_rate": 2.1569329289121577e-05, "loss": 0.8370259404182434, "step": 3304 }, { "epoch": 0.27065483319513967, "grad_norm": 0.16796875, "learning_rate": 2.156781953352937e-05, "loss": 0.6945908069610596, "step": 3305 }, { "epoch": 0.27073672573165863, "grad_norm": 0.22265625, "learning_rate": 2.156630936208032e-05, "loss": 0.7941571474075317, "step": 3306 }, { "epoch": 0.2708186182681776, "grad_norm": 0.1708984375, "learning_rate": 2.156479877484006e-05, "loss": 1.0198163986206055, "step": 3307 }, { "epoch": 0.2709005108046965, "grad_norm": 0.1513671875, "learning_rate": 2.1563287771874255e-05, "loss": 0.6542173027992249, "step": 3308 }, { "epoch": 0.2709824033412155, "grad_norm": 0.1513671875, "learning_rate": 2.1561776353248574e-05, "loss": 0.743459165096283, "step": 3309 }, { "epoch": 0.27106429587773445, "grad_norm": 0.1708984375, "learning_rate": 2.156026451902871e-05, "loss": 0.837165892124176, "step": 3310 }, { "epoch": 0.2711461884142534, "grad_norm": 0.1611328125, "learning_rate": 2.155875226928038e-05, "loss": 0.7349922060966492, "step": 3311 }, { "epoch": 0.27122808095077233, "grad_norm": 0.1943359375, "learning_rate": 2.15572396040693e-05, "loss": 0.9216521382331848, "step": 3312 }, { "epoch": 0.2713099734872913, "grad_norm": 0.1640625, "learning_rate": 2.1555726523461224e-05, "loss": 0.7761582732200623, "step": 3313 }, { "epoch": 0.27139186602381027, "grad_norm": 0.2041015625, "learning_rate": 2.1554213027521916e-05, "loss": 0.8121665120124817, "step": 3314 }, { "epoch": 0.2714737585603292, "grad_norm": 0.1611328125, "learning_rate": 2.155269911631716e-05, "loss": 0.8760371804237366, "step": 3315 }, { "epoch": 0.27155565109684815, "grad_norm": 0.162109375, "learning_rate": 2.1551184789912756e-05, "loss": 0.5040208101272583, "step": 3316 }, { "epoch": 0.2716375436333671, "grad_norm": 0.1435546875, "learning_rate": 2.1549670048374522e-05, "loss": 0.5382332801818848, "step": 3317 }, { "epoch": 0.2717194361698861, "grad_norm": 0.18359375, "learning_rate": 2.1548154891768297e-05, "loss": 0.9676521420478821, "step": 3318 }, { "epoch": 0.271801328706405, "grad_norm": 0.1494140625, "learning_rate": 2.1546639320159933e-05, "loss": 0.6809881329536438, "step": 3319 }, { "epoch": 0.27188322124292397, "grad_norm": 0.1669921875, "learning_rate": 2.1545123333615305e-05, "loss": 0.9668429493904114, "step": 3320 }, { "epoch": 0.27196511377944294, "grad_norm": 0.17578125, "learning_rate": 2.1543606932200302e-05, "loss": 0.703383207321167, "step": 3321 }, { "epoch": 0.2720470063159619, "grad_norm": 0.2138671875, "learning_rate": 2.154209011598084e-05, "loss": 0.8019699454307556, "step": 3322 }, { "epoch": 0.2721288988524808, "grad_norm": 0.146484375, "learning_rate": 2.1540572885022834e-05, "loss": 0.679069995880127, "step": 3323 }, { "epoch": 0.2722107913889998, "grad_norm": 0.158203125, "learning_rate": 2.1539055239392238e-05, "loss": 0.9363023042678833, "step": 3324 }, { "epoch": 0.27229268392551875, "grad_norm": 0.1640625, "learning_rate": 2.1537537179155017e-05, "loss": 0.6437115669250488, "step": 3325 }, { "epoch": 0.27237457646203767, "grad_norm": 0.1533203125, "learning_rate": 2.153601870437714e-05, "loss": 0.7268803715705872, "step": 3326 }, { "epoch": 0.27245646899855663, "grad_norm": 0.162109375, "learning_rate": 2.153449981512462e-05, "loss": 0.8665785193443298, "step": 3327 }, { "epoch": 0.2725383615350756, "grad_norm": 0.1611328125, "learning_rate": 2.153298051146346e-05, "loss": 0.865010678768158, "step": 3328 }, { "epoch": 0.27262025407159457, "grad_norm": 0.1513671875, "learning_rate": 2.1531460793459715e-05, "loss": 0.7722593545913696, "step": 3329 }, { "epoch": 0.2727021466081135, "grad_norm": 0.1689453125, "learning_rate": 2.1529940661179422e-05, "loss": 0.8160887956619263, "step": 3330 }, { "epoch": 0.27278403914463245, "grad_norm": 0.1943359375, "learning_rate": 2.1528420114688654e-05, "loss": 0.9909271001815796, "step": 3331 }, { "epoch": 0.2728659316811514, "grad_norm": 0.1376953125, "learning_rate": 2.1526899154053507e-05, "loss": 0.8764643669128418, "step": 3332 }, { "epoch": 0.2729478242176704, "grad_norm": 0.193359375, "learning_rate": 2.1525377779340086e-05, "loss": 0.9621984958648682, "step": 3333 }, { "epoch": 0.2730297167541893, "grad_norm": 0.173828125, "learning_rate": 2.1523855990614508e-05, "loss": 0.8603249192237854, "step": 3334 }, { "epoch": 0.27311160929070827, "grad_norm": 0.1357421875, "learning_rate": 2.152233378794293e-05, "loss": 0.7865197658538818, "step": 3335 }, { "epoch": 0.27319350182722724, "grad_norm": 0.1796875, "learning_rate": 2.15208111713915e-05, "loss": 0.8992390036582947, "step": 3336 }, { "epoch": 0.27327539436374615, "grad_norm": 0.166015625, "learning_rate": 2.1519288141026405e-05, "loss": 0.921657919883728, "step": 3337 }, { "epoch": 0.2733572869002651, "grad_norm": 0.16796875, "learning_rate": 2.151776469691384e-05, "loss": 0.5226486325263977, "step": 3338 }, { "epoch": 0.2734391794367841, "grad_norm": 0.1943359375, "learning_rate": 2.151624083912002e-05, "loss": 0.7886949777603149, "step": 3339 }, { "epoch": 0.27352107197330305, "grad_norm": 0.15625, "learning_rate": 2.151471656771118e-05, "loss": 0.9264416098594666, "step": 3340 }, { "epoch": 0.27360296450982197, "grad_norm": 0.1796875, "learning_rate": 2.151319188275357e-05, "loss": 0.6940490007400513, "step": 3341 }, { "epoch": 0.27368485704634093, "grad_norm": 0.1943359375, "learning_rate": 2.151166678431346e-05, "loss": 0.7757836580276489, "step": 3342 }, { "epoch": 0.2737667495828599, "grad_norm": 0.1796875, "learning_rate": 2.1510141272457134e-05, "loss": 0.8390427827835083, "step": 3343 }, { "epoch": 0.27384864211937887, "grad_norm": 0.1650390625, "learning_rate": 2.1508615347250898e-05, "loss": 0.9607489109039307, "step": 3344 }, { "epoch": 0.2739305346558978, "grad_norm": 0.1884765625, "learning_rate": 2.1507089008761078e-05, "loss": 0.783923864364624, "step": 3345 }, { "epoch": 0.27401242719241675, "grad_norm": 0.1494140625, "learning_rate": 2.150556225705401e-05, "loss": 0.7999295592308044, "step": 3346 }, { "epoch": 0.2740943197289357, "grad_norm": 0.158203125, "learning_rate": 2.150403509219606e-05, "loss": 0.8167508244514465, "step": 3347 }, { "epoch": 0.27417621226545463, "grad_norm": 0.1875, "learning_rate": 2.1502507514253596e-05, "loss": 0.8116077184677124, "step": 3348 }, { "epoch": 0.2742581048019736, "grad_norm": 0.18359375, "learning_rate": 2.150097952329302e-05, "loss": 1.3164403438568115, "step": 3349 }, { "epoch": 0.27433999733849257, "grad_norm": 0.18359375, "learning_rate": 2.1499451119380745e-05, "loss": 0.670732855796814, "step": 3350 }, { "epoch": 0.27442188987501154, "grad_norm": 0.17578125, "learning_rate": 2.149792230258319e-05, "loss": 1.1000217199325562, "step": 3351 }, { "epoch": 0.27450378241153045, "grad_norm": 0.18359375, "learning_rate": 2.1496393072966824e-05, "loss": 0.8944618701934814, "step": 3352 }, { "epoch": 0.2745856749480494, "grad_norm": 0.17578125, "learning_rate": 2.1494863430598095e-05, "loss": 0.9926925301551819, "step": 3353 }, { "epoch": 0.2746675674845684, "grad_norm": 0.1640625, "learning_rate": 2.1493333375543495e-05, "loss": 0.9102782607078552, "step": 3354 }, { "epoch": 0.27474946002108735, "grad_norm": 0.154296875, "learning_rate": 2.149180290786953e-05, "loss": 1.0286027193069458, "step": 3355 }, { "epoch": 0.27483135255760627, "grad_norm": 0.15625, "learning_rate": 2.1490272027642717e-05, "loss": 0.6212396621704102, "step": 3356 }, { "epoch": 0.27491324509412524, "grad_norm": 0.1826171875, "learning_rate": 2.1488740734929593e-05, "loss": 0.8314988613128662, "step": 3357 }, { "epoch": 0.2749951376306442, "grad_norm": 0.1708984375, "learning_rate": 2.148720902979671e-05, "loss": 0.8606696128845215, "step": 3358 }, { "epoch": 0.2750770301671631, "grad_norm": 0.1884765625, "learning_rate": 2.1485676912310654e-05, "loss": 0.8349741697311401, "step": 3359 }, { "epoch": 0.2751589227036821, "grad_norm": 0.15234375, "learning_rate": 2.148414438253801e-05, "loss": 0.7126782536506653, "step": 3360 }, { "epoch": 0.27524081524020105, "grad_norm": 0.150390625, "learning_rate": 2.1482611440545387e-05, "loss": 0.8179392218589783, "step": 3361 }, { "epoch": 0.27532270777672, "grad_norm": 0.1650390625, "learning_rate": 2.1481078086399414e-05, "loss": 0.892266571521759, "step": 3362 }, { "epoch": 0.27540460031323893, "grad_norm": 0.173828125, "learning_rate": 2.1479544320166738e-05, "loss": 1.2144250869750977, "step": 3363 }, { "epoch": 0.2754864928497579, "grad_norm": 0.2021484375, "learning_rate": 2.1478010141914025e-05, "loss": 0.7363628149032593, "step": 3364 }, { "epoch": 0.27556838538627687, "grad_norm": 0.1640625, "learning_rate": 2.147647555170795e-05, "loss": 0.9002977013587952, "step": 3365 }, { "epoch": 0.27565027792279584, "grad_norm": 0.1689453125, "learning_rate": 2.1474940549615223e-05, "loss": 0.7352514863014221, "step": 3366 }, { "epoch": 0.27573217045931475, "grad_norm": 0.201171875, "learning_rate": 2.1473405135702553e-05, "loss": 0.6047722101211548, "step": 3367 }, { "epoch": 0.2758140629958337, "grad_norm": 0.1611328125, "learning_rate": 2.1471869310036673e-05, "loss": 0.7658945322036743, "step": 3368 }, { "epoch": 0.2758959555323527, "grad_norm": 0.1865234375, "learning_rate": 2.1470333072684345e-05, "loss": 0.7119348049163818, "step": 3369 }, { "epoch": 0.2759778480688716, "grad_norm": 0.1748046875, "learning_rate": 2.1468796423712337e-05, "loss": 1.169087529182434, "step": 3370 }, { "epoch": 0.27605974060539057, "grad_norm": 0.1767578125, "learning_rate": 2.1467259363187434e-05, "loss": 0.9668115973472595, "step": 3371 }, { "epoch": 0.27614163314190954, "grad_norm": 0.162109375, "learning_rate": 2.146572189117645e-05, "loss": 1.1241261959075928, "step": 3372 }, { "epoch": 0.2762235256784285, "grad_norm": 0.208984375, "learning_rate": 2.1464184007746204e-05, "loss": 1.1234874725341797, "step": 3373 }, { "epoch": 0.2763054182149474, "grad_norm": 0.19140625, "learning_rate": 2.1462645712963542e-05, "loss": 1.0917906761169434, "step": 3374 }, { "epoch": 0.2763873107514664, "grad_norm": 0.166015625, "learning_rate": 2.1461107006895322e-05, "loss": 0.7701463103294373, "step": 3375 }, { "epoch": 0.27646920328798535, "grad_norm": 0.197265625, "learning_rate": 2.1459567889608425e-05, "loss": 0.8444384336471558, "step": 3376 }, { "epoch": 0.27655109582450427, "grad_norm": 0.21484375, "learning_rate": 2.1458028361169746e-05, "loss": 1.0009405612945557, "step": 3377 }, { "epoch": 0.27663298836102324, "grad_norm": 0.162109375, "learning_rate": 2.1456488421646202e-05, "loss": 0.7138739824295044, "step": 3378 }, { "epoch": 0.2767148808975422, "grad_norm": 0.173828125, "learning_rate": 2.145494807110472e-05, "loss": 0.7219703793525696, "step": 3379 }, { "epoch": 0.27679677343406117, "grad_norm": 0.171875, "learning_rate": 2.1453407309612253e-05, "loss": 0.7128299474716187, "step": 3380 }, { "epoch": 0.2768786659705801, "grad_norm": 0.1640625, "learning_rate": 2.145186613723577e-05, "loss": 0.6667338609695435, "step": 3381 }, { "epoch": 0.27696055850709905, "grad_norm": 0.154296875, "learning_rate": 2.1450324554042254e-05, "loss": 0.8319555521011353, "step": 3382 }, { "epoch": 0.277042451043618, "grad_norm": 0.1630859375, "learning_rate": 2.144878256009871e-05, "loss": 0.8431136608123779, "step": 3383 }, { "epoch": 0.277124343580137, "grad_norm": 0.1748046875, "learning_rate": 2.144724015547216e-05, "loss": 0.7516739368438721, "step": 3384 }, { "epoch": 0.2772062361166559, "grad_norm": 0.1328125, "learning_rate": 2.144569734022964e-05, "loss": 0.5456576347351074, "step": 3385 }, { "epoch": 0.27728812865317487, "grad_norm": 0.16796875, "learning_rate": 2.1444154114438215e-05, "loss": 0.8720216155052185, "step": 3386 }, { "epoch": 0.27737002118969384, "grad_norm": 0.255859375, "learning_rate": 2.144261047816495e-05, "loss": 1.2837430238723755, "step": 3387 }, { "epoch": 0.27745191372621275, "grad_norm": 0.1669921875, "learning_rate": 2.1441066431476944e-05, "loss": 0.7659080624580383, "step": 3388 }, { "epoch": 0.2775338062627317, "grad_norm": 0.181640625, "learning_rate": 2.1439521974441306e-05, "loss": 1.1645841598510742, "step": 3389 }, { "epoch": 0.2776156987992507, "grad_norm": 0.2021484375, "learning_rate": 2.1437977107125166e-05, "loss": 0.9932535886764526, "step": 3390 }, { "epoch": 0.27769759133576966, "grad_norm": 0.166015625, "learning_rate": 2.1436431829595665e-05, "loss": 1.0030277967453003, "step": 3391 }, { "epoch": 0.27777948387228857, "grad_norm": 0.166015625, "learning_rate": 2.1434886141919973e-05, "loss": 0.6630159020423889, "step": 3392 }, { "epoch": 0.27786137640880754, "grad_norm": 0.1669921875, "learning_rate": 2.143334004416527e-05, "loss": 0.6942229270935059, "step": 3393 }, { "epoch": 0.2779432689453265, "grad_norm": 0.1826171875, "learning_rate": 2.1431793536398753e-05, "loss": 0.7576448917388916, "step": 3394 }, { "epoch": 0.2780251614818455, "grad_norm": 0.1923828125, "learning_rate": 2.1430246618687646e-05, "loss": 0.9480331540107727, "step": 3395 }, { "epoch": 0.2781070540183644, "grad_norm": 0.1552734375, "learning_rate": 2.1428699291099177e-05, "loss": 0.7499651908874512, "step": 3396 }, { "epoch": 0.27818894655488335, "grad_norm": 0.171875, "learning_rate": 2.1427151553700606e-05, "loss": 0.9190750122070312, "step": 3397 }, { "epoch": 0.2782708390914023, "grad_norm": 0.2060546875, "learning_rate": 2.1425603406559198e-05, "loss": 0.8124038577079773, "step": 3398 }, { "epoch": 0.27835273162792123, "grad_norm": 0.173828125, "learning_rate": 2.1424054849742248e-05, "loss": 1.122057318687439, "step": 3399 }, { "epoch": 0.2784346241644402, "grad_norm": 0.171875, "learning_rate": 2.1422505883317058e-05, "loss": 0.845874547958374, "step": 3400 }, { "epoch": 0.27851651670095917, "grad_norm": 0.1650390625, "learning_rate": 2.1420956507350953e-05, "loss": 0.9810129404067993, "step": 3401 }, { "epoch": 0.27859840923747814, "grad_norm": 0.1669921875, "learning_rate": 2.1419406721911277e-05, "loss": 0.7269851565361023, "step": 3402 }, { "epoch": 0.27868030177399705, "grad_norm": 0.1630859375, "learning_rate": 2.141785652706539e-05, "loss": 0.7321099042892456, "step": 3403 }, { "epoch": 0.278762194310516, "grad_norm": 0.158203125, "learning_rate": 2.141630592288067e-05, "loss": 0.5046508312225342, "step": 3404 }, { "epoch": 0.278844086847035, "grad_norm": 0.166015625, "learning_rate": 2.1414754909424508e-05, "loss": 0.6796903610229492, "step": 3405 }, { "epoch": 0.27892597938355396, "grad_norm": 0.1484375, "learning_rate": 2.1413203486764324e-05, "loss": 0.7862737774848938, "step": 3406 }, { "epoch": 0.27900787192007287, "grad_norm": 0.154296875, "learning_rate": 2.141165165496754e-05, "loss": 0.8339933156967163, "step": 3407 }, { "epoch": 0.27908976445659184, "grad_norm": 0.1494140625, "learning_rate": 2.141009941410162e-05, "loss": 0.9509305953979492, "step": 3408 }, { "epoch": 0.2791716569931108, "grad_norm": 0.1845703125, "learning_rate": 2.140854676423402e-05, "loss": 1.0637438297271729, "step": 3409 }, { "epoch": 0.2792535495296297, "grad_norm": 0.19140625, "learning_rate": 2.1406993705432225e-05, "loss": 1.1327433586120605, "step": 3410 }, { "epoch": 0.2793354420661487, "grad_norm": 0.181640625, "learning_rate": 2.1405440237763737e-05, "loss": 0.9173071384429932, "step": 3411 }, { "epoch": 0.27941733460266766, "grad_norm": 0.197265625, "learning_rate": 2.140388636129608e-05, "loss": 1.0329043865203857, "step": 3412 }, { "epoch": 0.2794992271391866, "grad_norm": 0.208984375, "learning_rate": 2.1402332076096794e-05, "loss": 1.0449938774108887, "step": 3413 }, { "epoch": 0.27958111967570554, "grad_norm": 0.150390625, "learning_rate": 2.1400777382233423e-05, "loss": 0.7545625567436218, "step": 3414 }, { "epoch": 0.2796630122122245, "grad_norm": 0.150390625, "learning_rate": 2.1399222279773553e-05, "loss": 0.8826322555541992, "step": 3415 }, { "epoch": 0.2797449047487435, "grad_norm": 0.1796875, "learning_rate": 2.1397666768784766e-05, "loss": 1.063607096672058, "step": 3416 }, { "epoch": 0.27982679728526244, "grad_norm": 0.16796875, "learning_rate": 2.1396110849334682e-05, "loss": 0.9113302826881409, "step": 3417 }, { "epoch": 0.27990868982178135, "grad_norm": 0.2333984375, "learning_rate": 2.1394554521490916e-05, "loss": 0.9686901569366455, "step": 3418 }, { "epoch": 0.2799905823583003, "grad_norm": 0.15234375, "learning_rate": 2.1392997785321117e-05, "loss": 0.7442489266395569, "step": 3419 }, { "epoch": 0.2800724748948193, "grad_norm": 0.1640625, "learning_rate": 2.139144064089295e-05, "loss": 0.9052310585975647, "step": 3420 }, { "epoch": 0.2801543674313382, "grad_norm": 0.1806640625, "learning_rate": 2.138988308827409e-05, "loss": 0.8613073229789734, "step": 3421 }, { "epoch": 0.28023625996785717, "grad_norm": 0.1806640625, "learning_rate": 2.1388325127532237e-05, "loss": 0.8392568826675415, "step": 3422 }, { "epoch": 0.28031815250437614, "grad_norm": 0.5545027256011963, "learning_rate": 2.138676675873511e-05, "loss": 1.0198066234588623, "step": 3423 }, { "epoch": 0.2804000450408951, "grad_norm": 0.4595646262168884, "learning_rate": 2.138520798195043e-05, "loss": 0.7996975779533386, "step": 3424 }, { "epoch": 0.280481937577414, "grad_norm": 0.30966249108314514, "learning_rate": 2.1383648797245964e-05, "loss": 1.255074381828308, "step": 3425 }, { "epoch": 0.280563830113933, "grad_norm": 0.3617246747016907, "learning_rate": 2.1382089204689472e-05, "loss": 0.9435317516326904, "step": 3426 }, { "epoch": 0.28064572265045196, "grad_norm": 0.4147423207759857, "learning_rate": 2.1380529204348737e-05, "loss": 1.2138372659683228, "step": 3427 }, { "epoch": 0.2807276151869709, "grad_norm": 0.3086487054824829, "learning_rate": 2.1378968796291573e-05, "loss": 1.2042549848556519, "step": 3428 }, { "epoch": 0.28080950772348984, "grad_norm": 0.2678821384906769, "learning_rate": 2.1377407980585793e-05, "loss": 1.2063226699829102, "step": 3429 }, { "epoch": 0.2808914002600088, "grad_norm": 0.2660142183303833, "learning_rate": 2.1375846757299238e-05, "loss": 1.0376510620117188, "step": 3430 }, { "epoch": 0.2809732927965278, "grad_norm": 0.3243228793144226, "learning_rate": 2.1374285126499768e-05, "loss": 1.0781488418579102, "step": 3431 }, { "epoch": 0.2810551853330467, "grad_norm": 0.2732582688331604, "learning_rate": 2.1372723088255255e-05, "loss": 1.0324527025222778, "step": 3432 }, { "epoch": 0.28113707786956565, "grad_norm": 0.2861688435077667, "learning_rate": 2.1371160642633597e-05, "loss": 0.7667874693870544, "step": 3433 }, { "epoch": 0.2812189704060846, "grad_norm": 0.292312890291214, "learning_rate": 2.1369597789702696e-05, "loss": 1.2269484996795654, "step": 3434 }, { "epoch": 0.2813008629426036, "grad_norm": 0.30376964807510376, "learning_rate": 2.1368034529530486e-05, "loss": 0.9459224939346313, "step": 3435 }, { "epoch": 0.2813827554791225, "grad_norm": 0.2555980682373047, "learning_rate": 2.136647086218491e-05, "loss": 1.2257999181747437, "step": 3436 }, { "epoch": 0.28146464801564147, "grad_norm": 0.3167591392993927, "learning_rate": 2.136490678773394e-05, "loss": 0.9251630306243896, "step": 3437 }, { "epoch": 0.28154654055216044, "grad_norm": 0.2513663172721863, "learning_rate": 2.1363342306245542e-05, "loss": 0.9764858484268188, "step": 3438 }, { "epoch": 0.28162843308867935, "grad_norm": 0.2653743624687195, "learning_rate": 2.1361777417787722e-05, "loss": 0.9525572061538696, "step": 3439 }, { "epoch": 0.2817103256251983, "grad_norm": 0.25401419401168823, "learning_rate": 2.1360212122428504e-05, "loss": 0.958200216293335, "step": 3440 }, { "epoch": 0.2817922181617173, "grad_norm": 0.34562209248542786, "learning_rate": 2.135864642023591e-05, "loss": 0.9576067924499512, "step": 3441 }, { "epoch": 0.28187411069823626, "grad_norm": 0.24991260468959808, "learning_rate": 2.1357080311278e-05, "loss": 0.7946743369102478, "step": 3442 }, { "epoch": 0.28195600323475517, "grad_norm": 0.31108707189559937, "learning_rate": 2.135551379562284e-05, "loss": 0.7807180881500244, "step": 3443 }, { "epoch": 0.28203789577127414, "grad_norm": 0.3037821352481842, "learning_rate": 2.1353946873338516e-05, "loss": 0.9542021751403809, "step": 3444 }, { "epoch": 0.2821197883077931, "grad_norm": 0.26082366704940796, "learning_rate": 2.1352379544493136e-05, "loss": 0.8835973143577576, "step": 3445 }, { "epoch": 0.2822016808443121, "grad_norm": 0.24040617048740387, "learning_rate": 2.135081180915482e-05, "loss": 0.9280560612678528, "step": 3446 }, { "epoch": 0.282283573380831, "grad_norm": 0.24738606810569763, "learning_rate": 2.134924366739171e-05, "loss": 0.9297424554824829, "step": 3447 }, { "epoch": 0.28236546591734996, "grad_norm": 0.2396710366010666, "learning_rate": 2.1347675119271963e-05, "loss": 0.9982947707176208, "step": 3448 }, { "epoch": 0.2824473584538689, "grad_norm": 0.22884425520896912, "learning_rate": 2.1346106164863757e-05, "loss": 0.8980881571769714, "step": 3449 }, { "epoch": 0.28252925099038784, "grad_norm": 0.24518036842346191, "learning_rate": 2.1344536804235283e-05, "loss": 0.8322001099586487, "step": 3450 }, { "epoch": 0.2826111435269068, "grad_norm": 0.20058652758598328, "learning_rate": 2.1342967037454752e-05, "loss": 0.9090523719787598, "step": 3451 }, { "epoch": 0.2826930360634258, "grad_norm": 0.20916317403316498, "learning_rate": 2.1341396864590393e-05, "loss": 1.0050197839736938, "step": 3452 }, { "epoch": 0.28277492859994474, "grad_norm": 0.1953408569097519, "learning_rate": 2.133982628571045e-05, "loss": 0.9561449289321899, "step": 3453 }, { "epoch": 0.28285682113646365, "grad_norm": 0.21356765925884247, "learning_rate": 2.133825530088319e-05, "loss": 1.0452392101287842, "step": 3454 }, { "epoch": 0.2829387136729826, "grad_norm": 0.24609620869159698, "learning_rate": 2.1336683910176894e-05, "loss": 1.0572271347045898, "step": 3455 }, { "epoch": 0.2830206062095016, "grad_norm": 0.22513464093208313, "learning_rate": 2.133511211365986e-05, "loss": 0.8402677178382874, "step": 3456 }, { "epoch": 0.28310249874602056, "grad_norm": 0.24858179688453674, "learning_rate": 2.1333539911400406e-05, "loss": 1.086310625076294, "step": 3457 }, { "epoch": 0.28318439128253947, "grad_norm": 0.22680412232875824, "learning_rate": 2.133196730346686e-05, "loss": 0.6389338970184326, "step": 3458 }, { "epoch": 0.28326628381905844, "grad_norm": 0.1937006264925003, "learning_rate": 2.1330394289927586e-05, "loss": 1.0341110229492188, "step": 3459 }, { "epoch": 0.2833481763555774, "grad_norm": 0.21183858811855316, "learning_rate": 2.1328820870850944e-05, "loss": 0.8572084307670593, "step": 3460 }, { "epoch": 0.2834300688920963, "grad_norm": 0.23052723705768585, "learning_rate": 2.1327247046305326e-05, "loss": 1.122206449508667, "step": 3461 }, { "epoch": 0.2835119614286153, "grad_norm": 0.2200106531381607, "learning_rate": 2.1325672816359135e-05, "loss": 0.8906905651092529, "step": 3462 }, { "epoch": 0.28359385396513426, "grad_norm": 0.24819497764110565, "learning_rate": 2.1324098181080793e-05, "loss": 1.2684714794158936, "step": 3463 }, { "epoch": 0.2836757465016532, "grad_norm": 0.17595423758029938, "learning_rate": 2.132252314053874e-05, "loss": 0.580949068069458, "step": 3464 }, { "epoch": 0.28375763903817214, "grad_norm": 0.20759359002113342, "learning_rate": 2.1320947694801438e-05, "loss": 1.0530781745910645, "step": 3465 }, { "epoch": 0.2838395315746911, "grad_norm": 0.164481982588768, "learning_rate": 2.1319371843937355e-05, "loss": 0.520302951335907, "step": 3466 }, { "epoch": 0.2839214241112101, "grad_norm": 0.21060939133167267, "learning_rate": 2.131779558801499e-05, "loss": 0.9476941227912903, "step": 3467 }, { "epoch": 0.28400331664772904, "grad_norm": 0.2568098306655884, "learning_rate": 2.131621892710285e-05, "loss": 0.8709134459495544, "step": 3468 }, { "epoch": 0.28408520918424796, "grad_norm": 0.20867551863193512, "learning_rate": 2.1314641861269463e-05, "loss": 0.9996675252914429, "step": 3469 }, { "epoch": 0.2841671017207669, "grad_norm": 0.19812415540218353, "learning_rate": 2.131306439058338e-05, "loss": 0.851952075958252, "step": 3470 }, { "epoch": 0.2842489942572859, "grad_norm": 0.19411563873291016, "learning_rate": 2.1311486515113162e-05, "loss": 0.6276647448539734, "step": 3471 }, { "epoch": 0.2843308867938048, "grad_norm": 0.24754750728607178, "learning_rate": 2.130990823492739e-05, "loss": 0.8451815247535706, "step": 3472 }, { "epoch": 0.2844127793303238, "grad_norm": 0.20604436099529266, "learning_rate": 2.1308329550094658e-05, "loss": 0.9173741340637207, "step": 3473 }, { "epoch": 0.28449467186684274, "grad_norm": 0.21023263037204742, "learning_rate": 2.1306750460683585e-05, "loss": 1.026425838470459, "step": 3474 }, { "epoch": 0.2845765644033617, "grad_norm": 0.19120372831821442, "learning_rate": 2.1305170966762805e-05, "loss": 0.8910130262374878, "step": 3475 }, { "epoch": 0.2846584569398806, "grad_norm": 0.18901297450065613, "learning_rate": 2.130359106840097e-05, "loss": 0.6302460432052612, "step": 3476 }, { "epoch": 0.2847403494763996, "grad_norm": 0.21456506848335266, "learning_rate": 2.130201076566675e-05, "loss": 1.1058099269866943, "step": 3477 }, { "epoch": 0.28482224201291856, "grad_norm": 0.18502090871334076, "learning_rate": 2.130043005862883e-05, "loss": 0.6995141506195068, "step": 3478 }, { "epoch": 0.2849041345494375, "grad_norm": 0.2526298761367798, "learning_rate": 2.1298848947355914e-05, "loss": 1.0816057920455933, "step": 3479 }, { "epoch": 0.28498602708595644, "grad_norm": 0.2092422991991043, "learning_rate": 2.1297267431916723e-05, "loss": 0.7715293169021606, "step": 3480 }, { "epoch": 0.2850679196224754, "grad_norm": 0.22245262563228607, "learning_rate": 2.1295685512379997e-05, "loss": 1.4129287004470825, "step": 3481 }, { "epoch": 0.2851498121589944, "grad_norm": 0.252566933631897, "learning_rate": 2.129410318881449e-05, "loss": 0.9743900299072266, "step": 3482 }, { "epoch": 0.2852317046955133, "grad_norm": 0.16718724370002747, "learning_rate": 2.1292520461288984e-05, "loss": 0.48105719685554504, "step": 3483 }, { "epoch": 0.28531359723203226, "grad_norm": 0.21920624375343323, "learning_rate": 2.1290937329872263e-05, "loss": 0.7706964015960693, "step": 3484 }, { "epoch": 0.2853954897685512, "grad_norm": 0.20677447319030762, "learning_rate": 2.128935379463314e-05, "loss": 1.0058112144470215, "step": 3485 }, { "epoch": 0.2854773823050702, "grad_norm": 0.20283395051956177, "learning_rate": 2.1287769855640437e-05, "loss": 0.8632333874702454, "step": 3486 }, { "epoch": 0.2855592748415891, "grad_norm": 0.20556992292404175, "learning_rate": 2.1286185512963006e-05, "loss": 0.729941725730896, "step": 3487 }, { "epoch": 0.2856411673781081, "grad_norm": 0.20022818446159363, "learning_rate": 2.1284600766669703e-05, "loss": 1.026996374130249, "step": 3488 }, { "epoch": 0.28572305991462704, "grad_norm": 0.1891862154006958, "learning_rate": 2.1283015616829413e-05, "loss": 0.8198191523551941, "step": 3489 }, { "epoch": 0.285804952451146, "grad_norm": 0.26199573278427124, "learning_rate": 2.128143006351103e-05, "loss": 0.8752697110176086, "step": 3490 }, { "epoch": 0.2858868449876649, "grad_norm": 0.25328126549720764, "learning_rate": 2.1279844106783468e-05, "loss": 0.7696465253829956, "step": 3491 }, { "epoch": 0.2859687375241839, "grad_norm": 0.1954895406961441, "learning_rate": 2.127825774671566e-05, "loss": 0.9152534008026123, "step": 3492 }, { "epoch": 0.28605063006070286, "grad_norm": 0.16714297235012054, "learning_rate": 2.1276670983376558e-05, "loss": 0.6931283473968506, "step": 3493 }, { "epoch": 0.2861325225972218, "grad_norm": 0.21672800183296204, "learning_rate": 2.1275083816835127e-05, "loss": 1.0445926189422607, "step": 3494 }, { "epoch": 0.28621441513374074, "grad_norm": 0.19939132034778595, "learning_rate": 2.127349624716035e-05, "loss": 0.9226009249687195, "step": 3495 }, { "epoch": 0.2862963076702597, "grad_norm": 0.17384614050388336, "learning_rate": 2.1271908274421233e-05, "loss": 0.6694759726524353, "step": 3496 }, { "epoch": 0.2863782002067787, "grad_norm": 0.21277102828025818, "learning_rate": 2.1270319898686792e-05, "loss": 0.7632806301116943, "step": 3497 }, { "epoch": 0.2864600927432976, "grad_norm": 0.18788889050483704, "learning_rate": 2.126873112002607e-05, "loss": 0.8672062158584595, "step": 3498 }, { "epoch": 0.28654198527981656, "grad_norm": 0.21760745346546173, "learning_rate": 2.1267141938508115e-05, "loss": 0.6594892740249634, "step": 3499 }, { "epoch": 0.2866238778163355, "grad_norm": 0.215936541557312, "learning_rate": 2.126555235420201e-05, "loss": 0.6994695067405701, "step": 3500 }, { "epoch": 0.2867057703528545, "grad_norm": 0.1899453103542328, "learning_rate": 2.126396236717683e-05, "loss": 1.0494346618652344, "step": 3501 }, { "epoch": 0.2867876628893734, "grad_norm": 0.19823096692562103, "learning_rate": 2.1262371977501692e-05, "loss": 0.7733740210533142, "step": 3502 }, { "epoch": 0.2868695554258924, "grad_norm": 0.22605086863040924, "learning_rate": 2.1260781185245722e-05, "loss": 0.7243752479553223, "step": 3503 }, { "epoch": 0.28695144796241134, "grad_norm": 0.16761215031147003, "learning_rate": 2.1259189990478053e-05, "loss": 0.7263807654380798, "step": 3504 }, { "epoch": 0.28703334049893026, "grad_norm": 0.21117354929447174, "learning_rate": 2.1257598393267856e-05, "loss": 0.916650652885437, "step": 3505 }, { "epoch": 0.2871152330354492, "grad_norm": 0.17035061120986938, "learning_rate": 2.12560063936843e-05, "loss": 0.5386192202568054, "step": 3506 }, { "epoch": 0.2871971255719682, "grad_norm": 0.20572909712791443, "learning_rate": 2.1254413991796587e-05, "loss": 0.774568498134613, "step": 3507 }, { "epoch": 0.28727901810848716, "grad_norm": 0.19865141808986664, "learning_rate": 2.1252821187673924e-05, "loss": 0.8318107724189758, "step": 3508 }, { "epoch": 0.2873609106450061, "grad_norm": 0.19003914296627045, "learning_rate": 2.1251227981385544e-05, "loss": 0.9233892560005188, "step": 3509 }, { "epoch": 0.28744280318152504, "grad_norm": 0.2344079613685608, "learning_rate": 2.124963437300069e-05, "loss": 0.9341983795166016, "step": 3510 }, { "epoch": 0.287524695718044, "grad_norm": 0.15803226828575134, "learning_rate": 2.1248040362588635e-05, "loss": 0.7519998550415039, "step": 3511 }, { "epoch": 0.2876065882545629, "grad_norm": 0.21238510310649872, "learning_rate": 2.1246445950218654e-05, "loss": 0.7486221790313721, "step": 3512 }, { "epoch": 0.2876884807910819, "grad_norm": 0.16170275211334229, "learning_rate": 2.1244851135960047e-05, "loss": 0.7832815647125244, "step": 3513 }, { "epoch": 0.28777037332760086, "grad_norm": 0.2384539246559143, "learning_rate": 2.1243255919882132e-05, "loss": 0.7580384016036987, "step": 3514 }, { "epoch": 0.2878522658641198, "grad_norm": 0.22442620992660522, "learning_rate": 2.1241660302054247e-05, "loss": 0.6989566087722778, "step": 3515 }, { "epoch": 0.28793415840063874, "grad_norm": 0.1991148591041565, "learning_rate": 2.124006428254574e-05, "loss": 0.9883272051811218, "step": 3516 }, { "epoch": 0.2880160509371577, "grad_norm": 0.20200927555561066, "learning_rate": 2.1238467861425985e-05, "loss": 0.7939639687538147, "step": 3517 }, { "epoch": 0.2880979434736767, "grad_norm": 0.22574682533740997, "learning_rate": 2.1236871038764362e-05, "loss": 0.850476861000061, "step": 3518 }, { "epoch": 0.28817983601019564, "grad_norm": 0.17249317467212677, "learning_rate": 2.123527381463028e-05, "loss": 0.5548695921897888, "step": 3519 }, { "epoch": 0.28826172854671456, "grad_norm": 0.1980728805065155, "learning_rate": 2.1233676189093164e-05, "loss": 0.8167330026626587, "step": 3520 }, { "epoch": 0.2883436210832335, "grad_norm": 0.1657007336616516, "learning_rate": 2.123207816222245e-05, "loss": 0.7155598402023315, "step": 3521 }, { "epoch": 0.2884255136197525, "grad_norm": 0.18101030588150024, "learning_rate": 2.123047973408759e-05, "loss": 0.6549408435821533, "step": 3522 }, { "epoch": 0.2885074061562714, "grad_norm": 0.20963455736637115, "learning_rate": 2.1228880904758068e-05, "loss": 0.7759702801704407, "step": 3523 }, { "epoch": 0.2885892986927904, "grad_norm": 0.39317113161087036, "learning_rate": 2.122728167430337e-05, "loss": 0.9718130826950073, "step": 3524 }, { "epoch": 0.28867119122930934, "grad_norm": 0.1974186897277832, "learning_rate": 2.1225682042793006e-05, "loss": 0.9817551374435425, "step": 3525 }, { "epoch": 0.2887530837658283, "grad_norm": 0.20485323667526245, "learning_rate": 2.1224082010296502e-05, "loss": 0.8142287135124207, "step": 3526 }, { "epoch": 0.2888349763023472, "grad_norm": 0.1927838921546936, "learning_rate": 2.1222481576883407e-05, "loss": 0.6998031139373779, "step": 3527 }, { "epoch": 0.2889168688388662, "grad_norm": 0.1966256946325302, "learning_rate": 2.1220880742623272e-05, "loss": 1.10746431350708, "step": 3528 }, { "epoch": 0.28899876137538516, "grad_norm": 0.2113838940858841, "learning_rate": 2.1219279507585684e-05, "loss": 0.7397045493125916, "step": 3529 }, { "epoch": 0.28908065391190413, "grad_norm": 0.20919625461101532, "learning_rate": 2.1217677871840237e-05, "loss": 1.1148093938827515, "step": 3530 }, { "epoch": 0.28916254644842304, "grad_norm": 0.19676482677459717, "learning_rate": 2.1216075835456545e-05, "loss": 0.7693652510643005, "step": 3531 }, { "epoch": 0.289244438984942, "grad_norm": 0.17656555771827698, "learning_rate": 2.121447339850424e-05, "loss": 0.5824464559555054, "step": 3532 }, { "epoch": 0.289326331521461, "grad_norm": 0.18711617588996887, "learning_rate": 2.1212870561052967e-05, "loss": 0.7225980758666992, "step": 3533 }, { "epoch": 0.2894082240579799, "grad_norm": 0.20248383283615112, "learning_rate": 2.1211267323172395e-05, "loss": 0.7111272215843201, "step": 3534 }, { "epoch": 0.28949011659449886, "grad_norm": 0.22039610147476196, "learning_rate": 2.120966368493221e-05, "loss": 0.7027662992477417, "step": 3535 }, { "epoch": 0.2895720091310178, "grad_norm": 0.22662951052188873, "learning_rate": 2.1208059646402103e-05, "loss": 0.7621035575866699, "step": 3536 }, { "epoch": 0.2896539016675368, "grad_norm": 0.17215535044670105, "learning_rate": 2.1206455207651805e-05, "loss": 0.866097629070282, "step": 3537 }, { "epoch": 0.2897357942040557, "grad_norm": 0.19153009355068207, "learning_rate": 2.120485036875104e-05, "loss": 0.8311400413513184, "step": 3538 }, { "epoch": 0.2898176867405747, "grad_norm": 0.2478572428226471, "learning_rate": 2.120324512976957e-05, "loss": 0.7503623366355896, "step": 3539 }, { "epoch": 0.28989957927709364, "grad_norm": 0.20078802108764648, "learning_rate": 2.120163949077716e-05, "loss": 0.5034589171409607, "step": 3540 }, { "epoch": 0.2899814718136126, "grad_norm": 0.22539538145065308, "learning_rate": 2.12000334518436e-05, "loss": 0.8971729874610901, "step": 3541 }, { "epoch": 0.2900633643501315, "grad_norm": 0.22430646419525146, "learning_rate": 2.119842701303869e-05, "loss": 1.1851565837860107, "step": 3542 }, { "epoch": 0.2901452568866505, "grad_norm": 0.20909461379051208, "learning_rate": 2.1196820174432264e-05, "loss": 0.5989973545074463, "step": 3543 }, { "epoch": 0.29022714942316946, "grad_norm": 0.1593896448612213, "learning_rate": 2.1195212936094148e-05, "loss": 0.6213828921318054, "step": 3544 }, { "epoch": 0.2903090419596884, "grad_norm": 0.18795660138130188, "learning_rate": 2.1193605298094208e-05, "loss": 0.515428900718689, "step": 3545 }, { "epoch": 0.29039093449620734, "grad_norm": 0.1951572149991989, "learning_rate": 2.1191997260502315e-05, "loss": 0.7568216919898987, "step": 3546 }, { "epoch": 0.2904728270327263, "grad_norm": 0.15674081444740295, "learning_rate": 2.119038882338836e-05, "loss": 0.6737725734710693, "step": 3547 }, { "epoch": 0.2905547195692453, "grad_norm": 0.18807759881019592, "learning_rate": 2.1188779986822256e-05, "loss": 0.9392871260643005, "step": 3548 }, { "epoch": 0.2906366121057642, "grad_norm": 0.2081177532672882, "learning_rate": 2.1187170750873932e-05, "loss": 0.9934238195419312, "step": 3549 }, { "epoch": 0.29071850464228316, "grad_norm": 0.21088671684265137, "learning_rate": 2.1185561115613322e-05, "loss": 0.9218080639839172, "step": 3550 }, { "epoch": 0.29080039717880213, "grad_norm": 0.208150252699852, "learning_rate": 2.1183951081110396e-05, "loss": 1.020330548286438, "step": 3551 }, { "epoch": 0.2908822897153211, "grad_norm": 0.19615116715431213, "learning_rate": 2.118234064743513e-05, "loss": 0.8098911046981812, "step": 3552 }, { "epoch": 0.29096418225184, "grad_norm": 0.16913655400276184, "learning_rate": 2.118072981465752e-05, "loss": 0.701073408126831, "step": 3553 }, { "epoch": 0.291046074788359, "grad_norm": 0.20514576137065887, "learning_rate": 2.117911858284758e-05, "loss": 0.9694563150405884, "step": 3554 }, { "epoch": 0.29112796732487795, "grad_norm": 0.1729450523853302, "learning_rate": 2.117750695207534e-05, "loss": 0.8312361836433411, "step": 3555 }, { "epoch": 0.29120985986139686, "grad_norm": 0.18179979920387268, "learning_rate": 2.117589492241085e-05, "loss": 0.7960910797119141, "step": 3556 }, { "epoch": 0.2912917523979158, "grad_norm": 0.2027096152305603, "learning_rate": 2.1174282493924172e-05, "loss": 0.5393216013908386, "step": 3557 }, { "epoch": 0.2913736449344348, "grad_norm": 0.45329728722572327, "learning_rate": 2.1172669666685392e-05, "loss": 0.8141299486160278, "step": 3558 }, { "epoch": 0.29145553747095376, "grad_norm": 0.1998831331729889, "learning_rate": 2.1171056440764606e-05, "loss": 1.109323263168335, "step": 3559 }, { "epoch": 0.2915374300074727, "grad_norm": 0.18283510208129883, "learning_rate": 2.116944281623194e-05, "loss": 0.7146959900856018, "step": 3560 }, { "epoch": 0.29161932254399164, "grad_norm": 0.17302998900413513, "learning_rate": 2.116782879315752e-05, "loss": 0.7244794368743896, "step": 3561 }, { "epoch": 0.2917012150805106, "grad_norm": 0.2004489153623581, "learning_rate": 2.1166214371611496e-05, "loss": 0.9209729433059692, "step": 3562 }, { "epoch": 0.2917831076170296, "grad_norm": 0.18157488107681274, "learning_rate": 2.116459955166405e-05, "loss": 0.8496153950691223, "step": 3563 }, { "epoch": 0.2918650001535485, "grad_norm": 0.1911879926919937, "learning_rate": 2.1162984333385358e-05, "loss": 1.1315703392028809, "step": 3564 }, { "epoch": 0.29194689269006746, "grad_norm": 0.1688978374004364, "learning_rate": 2.1161368716845626e-05, "loss": 0.8458208441734314, "step": 3565 }, { "epoch": 0.29202878522658643, "grad_norm": 0.20819826424121857, "learning_rate": 2.1159752702115075e-05, "loss": 0.7576515078544617, "step": 3566 }, { "epoch": 0.29211067776310534, "grad_norm": 0.23401206731796265, "learning_rate": 2.1158136289263952e-05, "loss": 1.089598298072815, "step": 3567 }, { "epoch": 0.2921925702996243, "grad_norm": 0.2130027562379837, "learning_rate": 2.1156519478362506e-05, "loss": 1.0247031450271606, "step": 3568 }, { "epoch": 0.2922744628361433, "grad_norm": 0.25619781017303467, "learning_rate": 2.1154902269481003e-05, "loss": 0.7694712281227112, "step": 3569 }, { "epoch": 0.29235635537266225, "grad_norm": 0.1922881156206131, "learning_rate": 2.1153284662689747e-05, "loss": 0.9210580587387085, "step": 3570 }, { "epoch": 0.29243824790918116, "grad_norm": 0.1944931447505951, "learning_rate": 2.115166665805904e-05, "loss": 0.9005422592163086, "step": 3571 }, { "epoch": 0.2925201404457001, "grad_norm": 0.23080092668533325, "learning_rate": 2.1150048255659206e-05, "loss": 0.7594001293182373, "step": 3572 }, { "epoch": 0.2926020329822191, "grad_norm": 0.19769082963466644, "learning_rate": 2.1148429455560586e-05, "loss": 0.9875636696815491, "step": 3573 }, { "epoch": 0.292683925518738, "grad_norm": 0.2345762848854065, "learning_rate": 2.1146810257833546e-05, "loss": 0.856637179851532, "step": 3574 }, { "epoch": 0.292765818055257, "grad_norm": 0.19932471215724945, "learning_rate": 2.1145190662548454e-05, "loss": 0.8109740614891052, "step": 3575 }, { "epoch": 0.29284771059177594, "grad_norm": 0.2150142788887024, "learning_rate": 2.1143570669775718e-05, "loss": 0.8369321823120117, "step": 3576 }, { "epoch": 0.2929296031282949, "grad_norm": 0.17657695710659027, "learning_rate": 2.1141950279585733e-05, "loss": 0.723195493221283, "step": 3577 }, { "epoch": 0.2930114956648138, "grad_norm": 0.1696932315826416, "learning_rate": 2.114032949204894e-05, "loss": 1.216187596321106, "step": 3578 }, { "epoch": 0.2930933882013328, "grad_norm": 0.199996218085289, "learning_rate": 2.113870830723578e-05, "loss": 0.9340181350708008, "step": 3579 }, { "epoch": 0.29317528073785176, "grad_norm": 0.15180617570877075, "learning_rate": 2.1137086725216717e-05, "loss": 0.624434769153595, "step": 3580 }, { "epoch": 0.29325717327437073, "grad_norm": 0.23232433199882507, "learning_rate": 2.113546474606223e-05, "loss": 1.0785369873046875, "step": 3581 }, { "epoch": 0.29333906581088964, "grad_norm": 0.20001237094402313, "learning_rate": 2.1133842369842827e-05, "loss": 0.8305016160011292, "step": 3582 }, { "epoch": 0.2934209583474086, "grad_norm": 0.19694243371486664, "learning_rate": 2.1132219596629004e-05, "loss": 0.7204241752624512, "step": 3583 }, { "epoch": 0.2935028508839276, "grad_norm": 0.18327634036540985, "learning_rate": 2.1130596426491314e-05, "loss": 0.6896508932113647, "step": 3584 }, { "epoch": 0.2935847434204465, "grad_norm": 0.19185766577720642, "learning_rate": 2.1128972859500294e-05, "loss": 0.9345558285713196, "step": 3585 }, { "epoch": 0.29366663595696546, "grad_norm": 0.19742132723331451, "learning_rate": 2.1127348895726515e-05, "loss": 0.6110600829124451, "step": 3586 }, { "epoch": 0.29374852849348443, "grad_norm": 0.1884407103061676, "learning_rate": 2.1125724535240555e-05, "loss": 0.8066259026527405, "step": 3587 }, { "epoch": 0.2938304210300034, "grad_norm": 0.17098784446716309, "learning_rate": 2.1124099778113024e-05, "loss": 0.6190654635429382, "step": 3588 }, { "epoch": 0.2939123135665223, "grad_norm": 0.2020869255065918, "learning_rate": 2.1122474624414537e-05, "loss": 0.826740562915802, "step": 3589 }, { "epoch": 0.2939942061030413, "grad_norm": 0.183569997549057, "learning_rate": 2.112084907421573e-05, "loss": 0.6702662110328674, "step": 3590 }, { "epoch": 0.29407609863956025, "grad_norm": 0.18634085357189178, "learning_rate": 2.1119223127587257e-05, "loss": 0.7662432193756104, "step": 3591 }, { "epoch": 0.2941579911760792, "grad_norm": 0.19652844965457916, "learning_rate": 2.1117596784599787e-05, "loss": 1.0405168533325195, "step": 3592 }, { "epoch": 0.2942398837125981, "grad_norm": 0.1693311482667923, "learning_rate": 2.111597004532401e-05, "loss": 1.054413080215454, "step": 3593 }, { "epoch": 0.2943217762491171, "grad_norm": 0.21019431948661804, "learning_rate": 2.111434290983063e-05, "loss": 0.9166066646575928, "step": 3594 }, { "epoch": 0.29440366878563606, "grad_norm": 0.1755557507276535, "learning_rate": 2.1112715378190358e-05, "loss": 1.0017973184585571, "step": 3595 }, { "epoch": 0.294485561322155, "grad_norm": 0.18864481151103973, "learning_rate": 2.1111087450473954e-05, "loss": 0.947195291519165, "step": 3596 }, { "epoch": 0.29456745385867394, "grad_norm": 0.20033425092697144, "learning_rate": 2.1109459126752158e-05, "loss": 0.9460005760192871, "step": 3597 }, { "epoch": 0.2946493463951929, "grad_norm": 0.18150371313095093, "learning_rate": 2.110783040709575e-05, "loss": 1.0964206457138062, "step": 3598 }, { "epoch": 0.2947312389317119, "grad_norm": 0.1885889321565628, "learning_rate": 2.110620129157552e-05, "loss": 0.8664576411247253, "step": 3599 }, { "epoch": 0.2948131314682308, "grad_norm": 0.1925138235092163, "learning_rate": 2.1104571780262277e-05, "loss": 1.0306211709976196, "step": 3600 }, { "epoch": 0.29489502400474976, "grad_norm": 0.22584065794944763, "learning_rate": 2.1102941873226848e-05, "loss": 0.9187113046646118, "step": 3601 }, { "epoch": 0.29497691654126873, "grad_norm": 0.1997673362493515, "learning_rate": 2.1101311570540073e-05, "loss": 0.8119727373123169, "step": 3602 }, { "epoch": 0.2950588090777877, "grad_norm": 0.20866994559764862, "learning_rate": 2.1099680872272807e-05, "loss": 0.817253828048706, "step": 3603 }, { "epoch": 0.2951407016143066, "grad_norm": 0.19118531048297882, "learning_rate": 2.1098049778495935e-05, "loss": 0.9325932264328003, "step": 3604 }, { "epoch": 0.2952225941508256, "grad_norm": 0.18405868113040924, "learning_rate": 2.109641828928035e-05, "loss": 0.5527405738830566, "step": 3605 }, { "epoch": 0.29530448668734455, "grad_norm": 0.18512536585330963, "learning_rate": 2.1094786404696955e-05, "loss": 0.5500252842903137, "step": 3606 }, { "epoch": 0.29538637922386346, "grad_norm": 0.1829407811164856, "learning_rate": 2.1093154124816682e-05, "loss": 0.7503373622894287, "step": 3607 }, { "epoch": 0.29546827176038243, "grad_norm": 0.3849031925201416, "learning_rate": 2.1091521449710487e-05, "loss": 0.8927099704742432, "step": 3608 }, { "epoch": 0.2955501642969014, "grad_norm": 0.19822444021701813, "learning_rate": 2.108988837944932e-05, "loss": 0.9564392566680908, "step": 3609 }, { "epoch": 0.29563205683342036, "grad_norm": 0.1835123598575592, "learning_rate": 2.1088254914104164e-05, "loss": 0.8965294361114502, "step": 3610 }, { "epoch": 0.2957139493699393, "grad_norm": 0.17707829177379608, "learning_rate": 2.1086621053746022e-05, "loss": 0.772913932800293, "step": 3611 }, { "epoch": 0.29579584190645825, "grad_norm": 0.23680436611175537, "learning_rate": 2.10849867984459e-05, "loss": 1.0605396032333374, "step": 3612 }, { "epoch": 0.2958777344429772, "grad_norm": 0.1721189171075821, "learning_rate": 2.1083352148274836e-05, "loss": 1.0507632493972778, "step": 3613 }, { "epoch": 0.2959596269794962, "grad_norm": 0.19446878135204315, "learning_rate": 2.1081717103303876e-05, "loss": 0.9577060341835022, "step": 3614 }, { "epoch": 0.2960415195160151, "grad_norm": 0.18839064240455627, "learning_rate": 2.1080081663604083e-05, "loss": 0.657987117767334, "step": 3615 }, { "epoch": 0.29612341205253406, "grad_norm": 0.17192865908145905, "learning_rate": 2.1078445829246544e-05, "loss": 0.7006998062133789, "step": 3616 }, { "epoch": 0.29620530458905303, "grad_norm": 0.20551976561546326, "learning_rate": 2.107680960030236e-05, "loss": 0.7701177597045898, "step": 3617 }, { "epoch": 0.29628719712557194, "grad_norm": 0.21339356899261475, "learning_rate": 2.1075172976842645e-05, "loss": 1.0517058372497559, "step": 3618 }, { "epoch": 0.2963690896620909, "grad_norm": 0.18560940027236938, "learning_rate": 2.1073535958938535e-05, "loss": 0.5469334721565247, "step": 3619 }, { "epoch": 0.2964509821986099, "grad_norm": 0.1744692623615265, "learning_rate": 2.107189854666118e-05, "loss": 0.8858669996261597, "step": 3620 }, { "epoch": 0.29653287473512885, "grad_norm": 0.21723338961601257, "learning_rate": 2.107026074008175e-05, "loss": 0.8496415019035339, "step": 3621 }, { "epoch": 0.29661476727164776, "grad_norm": 0.18160609900951385, "learning_rate": 2.106862253927143e-05, "loss": 0.8248187899589539, "step": 3622 }, { "epoch": 0.29669665980816673, "grad_norm": 0.19062523543834686, "learning_rate": 2.1066983944301424e-05, "loss": 1.081024408340454, "step": 3623 }, { "epoch": 0.2967785523446857, "grad_norm": 0.1949036568403244, "learning_rate": 2.1065344955242955e-05, "loss": 0.8662595748901367, "step": 3624 }, { "epoch": 0.29686044488120467, "grad_norm": 0.20386208593845367, "learning_rate": 2.106370557216725e-05, "loss": 0.7789850234985352, "step": 3625 }, { "epoch": 0.2969423374177236, "grad_norm": 0.19800038635730743, "learning_rate": 2.1062065795145576e-05, "loss": 0.7887215614318848, "step": 3626 }, { "epoch": 0.29702422995424255, "grad_norm": 0.18346603214740753, "learning_rate": 2.1060425624249195e-05, "loss": 0.8028406500816345, "step": 3627 }, { "epoch": 0.2971061224907615, "grad_norm": 0.21444019675254822, "learning_rate": 2.1058785059549402e-05, "loss": 0.9322720766067505, "step": 3628 }, { "epoch": 0.2971880150272804, "grad_norm": 0.20921628177165985, "learning_rate": 2.1057144101117498e-05, "loss": 0.8555570840835571, "step": 3629 }, { "epoch": 0.2972699075637994, "grad_norm": 0.21037790179252625, "learning_rate": 2.105550274902481e-05, "loss": 0.8249996900558472, "step": 3630 }, { "epoch": 0.29735180010031836, "grad_norm": 0.2035447657108307, "learning_rate": 2.105386100334267e-05, "loss": 0.8506997227668762, "step": 3631 }, { "epoch": 0.29743369263683733, "grad_norm": 0.1981995403766632, "learning_rate": 2.1052218864142448e-05, "loss": 1.1441645622253418, "step": 3632 }, { "epoch": 0.29751558517335625, "grad_norm": 0.18912583589553833, "learning_rate": 2.1050576331495508e-05, "loss": 0.7843077182769775, "step": 3633 }, { "epoch": 0.2975974777098752, "grad_norm": 0.20738671720027924, "learning_rate": 2.1048933405473243e-05, "loss": 1.1583847999572754, "step": 3634 }, { "epoch": 0.2976793702463942, "grad_norm": 0.2356564700603485, "learning_rate": 2.1047290086147066e-05, "loss": 0.7506566047668457, "step": 3635 }, { "epoch": 0.29776126278291315, "grad_norm": 0.2145264446735382, "learning_rate": 2.1045646373588394e-05, "loss": 0.8833611011505127, "step": 3636 }, { "epoch": 0.29784315531943206, "grad_norm": 0.2108660191297531, "learning_rate": 2.1044002267868677e-05, "loss": 0.9278150200843811, "step": 3637 }, { "epoch": 0.29792504785595103, "grad_norm": 0.21484282612800598, "learning_rate": 2.104235776905937e-05, "loss": 1.1076394319534302, "step": 3638 }, { "epoch": 0.29800694039247, "grad_norm": 0.20101384818553925, "learning_rate": 2.104071287723195e-05, "loss": 1.1551094055175781, "step": 3639 }, { "epoch": 0.2980888329289889, "grad_norm": 0.1831672042608261, "learning_rate": 2.1039067592457916e-05, "loss": 0.7470964789390564, "step": 3640 }, { "epoch": 0.2981707254655079, "grad_norm": 0.1978040337562561, "learning_rate": 2.1037421914808773e-05, "loss": 0.7266411781311035, "step": 3641 }, { "epoch": 0.29825261800202685, "grad_norm": 0.20666398108005524, "learning_rate": 2.1035775844356053e-05, "loss": 0.9648134708404541, "step": 3642 }, { "epoch": 0.2983345105385458, "grad_norm": 0.2071513831615448, "learning_rate": 2.1034129381171303e-05, "loss": 0.6932756304740906, "step": 3643 }, { "epoch": 0.29841640307506473, "grad_norm": 0.1790638267993927, "learning_rate": 2.1032482525326073e-05, "loss": 0.8512274026870728, "step": 3644 }, { "epoch": 0.2984982956115837, "grad_norm": 0.18330150842666626, "learning_rate": 2.1030835276891958e-05, "loss": 0.6236366033554077, "step": 3645 }, { "epoch": 0.29858018814810267, "grad_norm": 0.25310254096984863, "learning_rate": 2.102918763594054e-05, "loss": 0.9596943855285645, "step": 3646 }, { "epoch": 0.2986620806846216, "grad_norm": 0.16413013637065887, "learning_rate": 2.102753960254344e-05, "loss": 0.5448846220970154, "step": 3647 }, { "epoch": 0.29874397322114055, "grad_norm": 0.23257330060005188, "learning_rate": 2.102589117677229e-05, "loss": 0.9468790292739868, "step": 3648 }, { "epoch": 0.2988258657576595, "grad_norm": 0.20402218401432037, "learning_rate": 2.1024242358698734e-05, "loss": 0.9778248071670532, "step": 3649 }, { "epoch": 0.2989077582941785, "grad_norm": 0.19620724022388458, "learning_rate": 2.1022593148394438e-05, "loss": 0.7206214666366577, "step": 3650 }, { "epoch": 0.2989896508306974, "grad_norm": 0.1872878074645996, "learning_rate": 2.1020943545931083e-05, "loss": 1.1113299131393433, "step": 3651 }, { "epoch": 0.29907154336721636, "grad_norm": 0.1915750503540039, "learning_rate": 2.101929355138036e-05, "loss": 1.2483373880386353, "step": 3652 }, { "epoch": 0.29915343590373533, "grad_norm": 0.20338261127471924, "learning_rate": 2.1017643164813998e-05, "loss": 0.9897397756576538, "step": 3653 }, { "epoch": 0.2992353284402543, "grad_norm": 0.19959871470928192, "learning_rate": 2.101599238630372e-05, "loss": 0.6907796263694763, "step": 3654 }, { "epoch": 0.2993172209767732, "grad_norm": 0.22270812094211578, "learning_rate": 2.101434121592128e-05, "loss": 0.6984301209449768, "step": 3655 }, { "epoch": 0.2993991135132922, "grad_norm": 0.17777365446090698, "learning_rate": 2.1012689653738446e-05, "loss": 0.6999461054801941, "step": 3656 }, { "epoch": 0.29948100604981115, "grad_norm": 0.21583420038223267, "learning_rate": 2.1011037699826997e-05, "loss": 0.8415200710296631, "step": 3657 }, { "epoch": 0.29956289858633006, "grad_norm": 0.2374778836965561, "learning_rate": 2.1009385354258735e-05, "loss": 1.0429660081863403, "step": 3658 }, { "epoch": 0.29964479112284903, "grad_norm": 0.22632156312465668, "learning_rate": 2.1007732617105474e-05, "loss": 1.031368374824524, "step": 3659 }, { "epoch": 0.299726683659368, "grad_norm": 0.2669917643070221, "learning_rate": 2.100607948843906e-05, "loss": 0.9242194294929504, "step": 3660 }, { "epoch": 0.29980857619588697, "grad_norm": 0.24219609797000885, "learning_rate": 2.100442596833134e-05, "loss": 1.129044532775879, "step": 3661 }, { "epoch": 0.2998904687324059, "grad_norm": 0.2114255726337433, "learning_rate": 2.1002772056854174e-05, "loss": 1.0678277015686035, "step": 3662 }, { "epoch": 0.29997236126892485, "grad_norm": 0.1788359135389328, "learning_rate": 2.100111775407946e-05, "loss": 0.947293221950531, "step": 3663 }, { "epoch": 0.3000542538054438, "grad_norm": 0.20146743953227997, "learning_rate": 2.0999463060079095e-05, "loss": 1.0194687843322754, "step": 3664 }, { "epoch": 0.3001361463419628, "grad_norm": 0.1982983499765396, "learning_rate": 2.0997807974925e-05, "loss": 1.2301114797592163, "step": 3665 }, { "epoch": 0.3002180388784817, "grad_norm": 0.17931246757507324, "learning_rate": 2.0996152498689105e-05, "loss": 0.6765968203544617, "step": 3666 }, { "epoch": 0.30029993141500066, "grad_norm": 0.17129762470722198, "learning_rate": 2.0994496631443377e-05, "loss": 0.8984649181365967, "step": 3667 }, { "epoch": 0.30038182395151963, "grad_norm": 0.21193966269493103, "learning_rate": 2.0992840373259775e-05, "loss": 0.9020853042602539, "step": 3668 }, { "epoch": 0.30046371648803855, "grad_norm": 0.19254326820373535, "learning_rate": 2.0991183724210293e-05, "loss": 0.8749078512191772, "step": 3669 }, { "epoch": 0.3005456090245575, "grad_norm": 0.20751403272151947, "learning_rate": 2.098952668436693e-05, "loss": 1.054942011833191, "step": 3670 }, { "epoch": 0.3006275015610765, "grad_norm": 0.18410690128803253, "learning_rate": 2.0987869253801725e-05, "loss": 0.5626079440116882, "step": 3671 }, { "epoch": 0.30070939409759545, "grad_norm": 0.1679917722940445, "learning_rate": 2.0986211432586695e-05, "loss": 0.6299710273742676, "step": 3672 }, { "epoch": 0.30079128663411436, "grad_norm": 0.20177562534809113, "learning_rate": 2.0984553220793904e-05, "loss": 0.8676398992538452, "step": 3673 }, { "epoch": 0.30087317917063333, "grad_norm": 0.20765089988708496, "learning_rate": 2.0982894618495427e-05, "loss": 1.1479179859161377, "step": 3674 }, { "epoch": 0.3009550717071523, "grad_norm": 0.22143487632274628, "learning_rate": 2.098123562576335e-05, "loss": 0.6360337138175964, "step": 3675 }, { "epoch": 0.30103696424367127, "grad_norm": 0.21387112140655518, "learning_rate": 2.0979576242669785e-05, "loss": 0.9845086336135864, "step": 3676 }, { "epoch": 0.3011188567801902, "grad_norm": 0.20906053483486176, "learning_rate": 2.0977916469286853e-05, "loss": 0.8634927272796631, "step": 3677 }, { "epoch": 0.30120074931670915, "grad_norm": 0.19571611285209656, "learning_rate": 2.097625630568669e-05, "loss": 0.9353699684143066, "step": 3678 }, { "epoch": 0.3012826418532281, "grad_norm": 0.20345059037208557, "learning_rate": 2.097459575194146e-05, "loss": 0.9322031140327454, "step": 3679 }, { "epoch": 0.30136453438974703, "grad_norm": 0.20729608833789825, "learning_rate": 2.0972934808123334e-05, "loss": 0.9988180994987488, "step": 3680 }, { "epoch": 0.301446426926266, "grad_norm": 0.20294074714183807, "learning_rate": 2.0971273474304505e-05, "loss": 0.9455978870391846, "step": 3681 }, { "epoch": 0.30152831946278497, "grad_norm": 0.23743915557861328, "learning_rate": 2.0969611750557182e-05, "loss": 0.7566089034080505, "step": 3682 }, { "epoch": 0.30161021199930393, "grad_norm": 0.19040416181087494, "learning_rate": 2.0967949636953588e-05, "loss": 0.9789379835128784, "step": 3683 }, { "epoch": 0.30169210453582285, "grad_norm": 0.2011021375656128, "learning_rate": 2.0966287133565966e-05, "loss": 0.8621591925621033, "step": 3684 }, { "epoch": 0.3017739970723418, "grad_norm": 0.19810828566551208, "learning_rate": 2.0964624240466577e-05, "loss": 1.105165719985962, "step": 3685 }, { "epoch": 0.3018558896088608, "grad_norm": 0.21400846540927887, "learning_rate": 2.096296095772769e-05, "loss": 1.1357966661453247, "step": 3686 }, { "epoch": 0.30193778214537975, "grad_norm": 0.1794648915529251, "learning_rate": 2.0961297285421613e-05, "loss": 0.5741147994995117, "step": 3687 }, { "epoch": 0.30201967468189866, "grad_norm": 0.2044583112001419, "learning_rate": 2.095963322362064e-05, "loss": 0.6140956282615662, "step": 3688 }, { "epoch": 0.30210156721841763, "grad_norm": 0.19455432891845703, "learning_rate": 2.095796877239711e-05, "loss": 0.8320263624191284, "step": 3689 }, { "epoch": 0.3021834597549366, "grad_norm": 0.19657950103282928, "learning_rate": 2.0956303931823364e-05, "loss": 0.7934436798095703, "step": 3690 }, { "epoch": 0.3022653522914555, "grad_norm": 0.20718331634998322, "learning_rate": 2.0954638701971753e-05, "loss": 0.994743824005127, "step": 3691 }, { "epoch": 0.3023472448279745, "grad_norm": 0.18333777785301208, "learning_rate": 2.0952973082914666e-05, "loss": 0.963508129119873, "step": 3692 }, { "epoch": 0.30242913736449345, "grad_norm": 0.2397436797618866, "learning_rate": 2.0951307074724494e-05, "loss": 1.0450435876846313, "step": 3693 }, { "epoch": 0.3025110299010124, "grad_norm": 0.2019968330860138, "learning_rate": 2.094964067747365e-05, "loss": 0.9910508990287781, "step": 3694 }, { "epoch": 0.30259292243753133, "grad_norm": 0.2190040796995163, "learning_rate": 2.0947973891234565e-05, "loss": 0.8946284055709839, "step": 3695 }, { "epoch": 0.3026748149740503, "grad_norm": 0.17628934979438782, "learning_rate": 2.0946306716079674e-05, "loss": 0.9257820844650269, "step": 3696 }, { "epoch": 0.30275670751056927, "grad_norm": 0.1866961419582367, "learning_rate": 2.0944639152081448e-05, "loss": 0.6957502961158752, "step": 3697 }, { "epoch": 0.30283860004708824, "grad_norm": 0.20171265304088593, "learning_rate": 2.0942971199312368e-05, "loss": 0.9175857901573181, "step": 3698 }, { "epoch": 0.30292049258360715, "grad_norm": 0.213101327419281, "learning_rate": 2.0941302857844924e-05, "loss": 0.8888790011405945, "step": 3699 }, { "epoch": 0.3030023851201261, "grad_norm": 0.1841278076171875, "learning_rate": 2.0939634127751625e-05, "loss": 0.6397982835769653, "step": 3700 }, { "epoch": 0.3030842776566451, "grad_norm": 0.1682266741991043, "learning_rate": 2.0937965009105014e-05, "loss": 0.8855237364768982, "step": 3701 }, { "epoch": 0.303166170193164, "grad_norm": 0.2096906453371048, "learning_rate": 2.093629550197763e-05, "loss": 0.663582444190979, "step": 3702 }, { "epoch": 0.30324806272968297, "grad_norm": 0.16171486675739288, "learning_rate": 2.0934625606442037e-05, "loss": 0.4826012849807739, "step": 3703 }, { "epoch": 0.30332995526620193, "grad_norm": 0.20908123254776, "learning_rate": 2.093295532257082e-05, "loss": 1.1438027620315552, "step": 3704 }, { "epoch": 0.3034118478027209, "grad_norm": 0.18108218908309937, "learning_rate": 2.0931284650436567e-05, "loss": 0.6727376580238342, "step": 3705 }, { "epoch": 0.3034937403392398, "grad_norm": 0.24184975028038025, "learning_rate": 2.09296135901119e-05, "loss": 1.028395414352417, "step": 3706 }, { "epoch": 0.3035756328757588, "grad_norm": 0.23232777416706085, "learning_rate": 2.0927942141669443e-05, "loss": 1.176581859588623, "step": 3707 }, { "epoch": 0.30365752541227775, "grad_norm": 0.19576068222522736, "learning_rate": 2.092627030518186e-05, "loss": 0.8960968852043152, "step": 3708 }, { "epoch": 0.30373941794879666, "grad_norm": 0.1940532773733139, "learning_rate": 2.0924598080721797e-05, "loss": 0.7640482187271118, "step": 3709 }, { "epoch": 0.30382131048531563, "grad_norm": 0.2615938186645508, "learning_rate": 2.0922925468361946e-05, "loss": 0.803968071937561, "step": 3710 }, { "epoch": 0.3039032030218346, "grad_norm": 0.2292097806930542, "learning_rate": 2.0921252468175e-05, "loss": 0.8943720459938049, "step": 3711 }, { "epoch": 0.30398509555835357, "grad_norm": 0.1992519348859787, "learning_rate": 2.0919579080233683e-05, "loss": 1.2030646800994873, "step": 3712 }, { "epoch": 0.3040669880948725, "grad_norm": 0.19640801846981049, "learning_rate": 2.0917905304610716e-05, "loss": 0.9638792276382446, "step": 3713 }, { "epoch": 0.30414888063139145, "grad_norm": 0.18813134729862213, "learning_rate": 2.091623114137886e-05, "loss": 0.7159693241119385, "step": 3714 }, { "epoch": 0.3042307731679104, "grad_norm": 0.22801652550697327, "learning_rate": 2.091455659061087e-05, "loss": 0.8400739431381226, "step": 3715 }, { "epoch": 0.3043126657044294, "grad_norm": 0.20077286660671234, "learning_rate": 2.091288165237954e-05, "loss": 0.6092285513877869, "step": 3716 }, { "epoch": 0.3043945582409483, "grad_norm": 0.18133825063705444, "learning_rate": 2.091120632675766e-05, "loss": 0.5924459099769592, "step": 3717 }, { "epoch": 0.30447645077746727, "grad_norm": 0.21254850924015045, "learning_rate": 2.0909530613818054e-05, "loss": 1.1440659761428833, "step": 3718 }, { "epoch": 0.30455834331398624, "grad_norm": 0.18105019629001617, "learning_rate": 2.0907854513633547e-05, "loss": 0.7164005637168884, "step": 3719 }, { "epoch": 0.30464023585050515, "grad_norm": 0.21055519580841064, "learning_rate": 2.0906178026276998e-05, "loss": 0.9109532237052917, "step": 3720 }, { "epoch": 0.3047221283870241, "grad_norm": 0.20325630903244019, "learning_rate": 2.090450115182127e-05, "loss": 0.9352898597717285, "step": 3721 }, { "epoch": 0.3048040209235431, "grad_norm": 0.15850035846233368, "learning_rate": 2.0902823890339247e-05, "loss": 0.8019141554832458, "step": 3722 }, { "epoch": 0.30488591346006205, "grad_norm": 0.203294575214386, "learning_rate": 2.0901146241903826e-05, "loss": 0.9413224458694458, "step": 3723 }, { "epoch": 0.30496780599658097, "grad_norm": 0.19006268680095673, "learning_rate": 2.0899468206587932e-05, "loss": 0.7488620281219482, "step": 3724 }, { "epoch": 0.30504969853309993, "grad_norm": 0.20841634273529053, "learning_rate": 2.0897789784464492e-05, "loss": 0.8511260151863098, "step": 3725 }, { "epoch": 0.3051315910696189, "grad_norm": 0.21967197954654694, "learning_rate": 2.0896110975606462e-05, "loss": 1.0204687118530273, "step": 3726 }, { "epoch": 0.30521348360613787, "grad_norm": 0.1968212127685547, "learning_rate": 2.0894431780086813e-05, "loss": 0.8045575618743896, "step": 3727 }, { "epoch": 0.3052953761426568, "grad_norm": 0.1779811680316925, "learning_rate": 2.0892752197978518e-05, "loss": 0.6743385791778564, "step": 3728 }, { "epoch": 0.30537726867917575, "grad_norm": 0.20954355597496033, "learning_rate": 2.089107222935459e-05, "loss": 0.7055544257164001, "step": 3729 }, { "epoch": 0.3054591612156947, "grad_norm": 0.1805024892091751, "learning_rate": 2.0889391874288044e-05, "loss": 0.74539715051651, "step": 3730 }, { "epoch": 0.30554105375221363, "grad_norm": 0.196786031126976, "learning_rate": 2.0887711132851914e-05, "loss": 0.8997848033905029, "step": 3731 }, { "epoch": 0.3056229462887326, "grad_norm": 0.217910036444664, "learning_rate": 2.0886030005119252e-05, "loss": 0.81126469373703, "step": 3732 }, { "epoch": 0.30570483882525157, "grad_norm": 0.2062082588672638, "learning_rate": 2.0884348491163127e-05, "loss": 0.7422431707382202, "step": 3733 }, { "epoch": 0.30578673136177054, "grad_norm": 0.21547353267669678, "learning_rate": 2.0882666591056625e-05, "loss": 1.1990283727645874, "step": 3734 }, { "epoch": 0.30586862389828945, "grad_norm": 0.19358514249324799, "learning_rate": 2.0880984304872848e-05, "loss": 1.0718698501586914, "step": 3735 }, { "epoch": 0.3059505164348084, "grad_norm": 0.1743157058954239, "learning_rate": 2.0879301632684915e-05, "loss": 0.5294317007064819, "step": 3736 }, { "epoch": 0.3060324089713274, "grad_norm": 0.20120902359485626, "learning_rate": 2.087761857456596e-05, "loss": 1.0308455228805542, "step": 3737 }, { "epoch": 0.30611430150784635, "grad_norm": 0.23224692046642303, "learning_rate": 2.0875935130589136e-05, "loss": 0.7668212056159973, "step": 3738 }, { "epoch": 0.30619619404436527, "grad_norm": 0.21504060924053192, "learning_rate": 2.087425130082762e-05, "loss": 0.7705087065696716, "step": 3739 }, { "epoch": 0.30627808658088423, "grad_norm": 0.19296769797801971, "learning_rate": 2.087256708535459e-05, "loss": 0.999333381652832, "step": 3740 }, { "epoch": 0.3063599791174032, "grad_norm": 0.19944867491722107, "learning_rate": 2.087088248424325e-05, "loss": 0.952528715133667, "step": 3741 }, { "epoch": 0.3064418716539221, "grad_norm": 0.17591442167758942, "learning_rate": 2.086919749756682e-05, "loss": 0.48611778020858765, "step": 3742 }, { "epoch": 0.3065237641904411, "grad_norm": 0.1993882805109024, "learning_rate": 2.0867512125398538e-05, "loss": 0.6830252408981323, "step": 3743 }, { "epoch": 0.30660565672696005, "grad_norm": 0.19135212898254395, "learning_rate": 2.0865826367811654e-05, "loss": 0.7023112773895264, "step": 3744 }, { "epoch": 0.306687549263479, "grad_norm": 0.22780735790729523, "learning_rate": 2.0864140224879446e-05, "loss": 0.8729267120361328, "step": 3745 }, { "epoch": 0.30676944179999793, "grad_norm": 0.17214713990688324, "learning_rate": 2.086245369667519e-05, "loss": 0.7706936001777649, "step": 3746 }, { "epoch": 0.3068513343365169, "grad_norm": 0.1965707242488861, "learning_rate": 2.0860766783272195e-05, "loss": 0.8064629435539246, "step": 3747 }, { "epoch": 0.30693322687303587, "grad_norm": 0.20206986367702484, "learning_rate": 2.0859079484743783e-05, "loss": 0.8335772752761841, "step": 3748 }, { "epoch": 0.30701511940955484, "grad_norm": 0.18004991114139557, "learning_rate": 2.085739180116329e-05, "loss": 0.6776199340820312, "step": 3749 }, { "epoch": 0.30709701194607375, "grad_norm": 0.18720576167106628, "learning_rate": 2.0855703732604067e-05, "loss": 0.9954450726509094, "step": 3750 }, { "epoch": 0.3071789044825927, "grad_norm": 0.2131120264530182, "learning_rate": 2.0854015279139482e-05, "loss": 1.1569010019302368, "step": 3751 }, { "epoch": 0.3072607970191117, "grad_norm": 0.18281470239162445, "learning_rate": 2.0852326440842927e-05, "loss": 0.8434798121452332, "step": 3752 }, { "epoch": 0.3073426895556306, "grad_norm": 0.2585003972053528, "learning_rate": 2.0850637217787808e-05, "loss": 0.7501276731491089, "step": 3753 }, { "epoch": 0.30742458209214957, "grad_norm": 0.20072004199028015, "learning_rate": 2.0848947610047543e-05, "loss": 1.2636387348175049, "step": 3754 }, { "epoch": 0.30750647462866854, "grad_norm": 0.19847123324871063, "learning_rate": 2.0847257617695567e-05, "loss": 0.8386142253875732, "step": 3755 }, { "epoch": 0.3075883671651875, "grad_norm": 0.23981152474880219, "learning_rate": 2.0845567240805337e-05, "loss": 0.8591972589492798, "step": 3756 }, { "epoch": 0.3076702597017064, "grad_norm": 0.17332816123962402, "learning_rate": 2.084387647945032e-05, "loss": 0.805083155632019, "step": 3757 }, { "epoch": 0.3077521522382254, "grad_norm": 0.19180920720100403, "learning_rate": 2.084218533370401e-05, "loss": 0.8333313465118408, "step": 3758 }, { "epoch": 0.30783404477474435, "grad_norm": 0.17884868383407593, "learning_rate": 2.0840493803639902e-05, "loss": 1.0697728395462036, "step": 3759 }, { "epoch": 0.3079159373112633, "grad_norm": 0.1998935341835022, "learning_rate": 2.0838801889331527e-05, "loss": 0.9401984810829163, "step": 3760 }, { "epoch": 0.30799782984778223, "grad_norm": 0.17782531678676605, "learning_rate": 2.0837109590852412e-05, "loss": 0.8681778907775879, "step": 3761 }, { "epoch": 0.3080797223843012, "grad_norm": 0.18978998064994812, "learning_rate": 2.083541690827612e-05, "loss": 0.691510021686554, "step": 3762 }, { "epoch": 0.30816161492082017, "grad_norm": 0.19456076622009277, "learning_rate": 2.083372384167622e-05, "loss": 0.9550209045410156, "step": 3763 }, { "epoch": 0.3082435074573391, "grad_norm": 0.1908067762851715, "learning_rate": 2.0832030391126295e-05, "loss": 0.8028213977813721, "step": 3764 }, { "epoch": 0.30832539999385805, "grad_norm": 0.18947221338748932, "learning_rate": 2.0830336556699954e-05, "loss": 1.0443196296691895, "step": 3765 }, { "epoch": 0.308407292530377, "grad_norm": 0.20464542508125305, "learning_rate": 2.082864233847082e-05, "loss": 0.6871306896209717, "step": 3766 }, { "epoch": 0.308489185066896, "grad_norm": 0.21797701716423035, "learning_rate": 2.0826947736512523e-05, "loss": 0.6797203421592712, "step": 3767 }, { "epoch": 0.3085710776034149, "grad_norm": 0.2328663021326065, "learning_rate": 2.0825252750898723e-05, "loss": 0.907656192779541, "step": 3768 }, { "epoch": 0.30865297013993387, "grad_norm": 0.1911153495311737, "learning_rate": 2.0823557381703093e-05, "loss": 1.2358806133270264, "step": 3769 }, { "epoch": 0.30873486267645284, "grad_norm": 0.19534823298454285, "learning_rate": 2.0821861628999316e-05, "loss": 0.8565807342529297, "step": 3770 }, { "epoch": 0.3088167552129718, "grad_norm": 0.20858164131641388, "learning_rate": 2.0820165492861094e-05, "loss": 1.083613395690918, "step": 3771 }, { "epoch": 0.3088986477494907, "grad_norm": 0.18433508276939392, "learning_rate": 2.0818468973362158e-05, "loss": 0.7031400799751282, "step": 3772 }, { "epoch": 0.3089805402860097, "grad_norm": 0.21164171397686005, "learning_rate": 2.0816772070576236e-05, "loss": 0.7776554226875305, "step": 3773 }, { "epoch": 0.30906243282252865, "grad_norm": 0.19280529022216797, "learning_rate": 2.081507478457709e-05, "loss": 1.0052731037139893, "step": 3774 }, { "epoch": 0.30914432535904757, "grad_norm": 0.17771291732788086, "learning_rate": 2.0813377115438485e-05, "loss": 0.8894796371459961, "step": 3775 }, { "epoch": 0.30922621789556654, "grad_norm": 0.18012332916259766, "learning_rate": 2.0811679063234206e-05, "loss": 0.6829866766929626, "step": 3776 }, { "epoch": 0.3093081104320855, "grad_norm": 0.22205588221549988, "learning_rate": 2.080998062803807e-05, "loss": 0.8511006832122803, "step": 3777 }, { "epoch": 0.30939000296860447, "grad_norm": 0.187313973903656, "learning_rate": 2.0808281809923885e-05, "loss": 0.7881535291671753, "step": 3778 }, { "epoch": 0.3094718955051234, "grad_norm": 0.18435177206993103, "learning_rate": 2.0806582608965497e-05, "loss": 0.5894652009010315, "step": 3779 }, { "epoch": 0.30955378804164235, "grad_norm": 0.20513075590133667, "learning_rate": 2.0804883025236755e-05, "loss": 0.6846891641616821, "step": 3780 }, { "epoch": 0.3096356805781613, "grad_norm": 0.17659910023212433, "learning_rate": 2.080318305881153e-05, "loss": 0.6493246555328369, "step": 3781 }, { "epoch": 0.30971757311468023, "grad_norm": 0.171564519405365, "learning_rate": 2.080148270976372e-05, "loss": 0.939162015914917, "step": 3782 }, { "epoch": 0.3097994656511992, "grad_norm": 0.19607193768024445, "learning_rate": 2.0799781978167215e-05, "loss": 0.9123477935791016, "step": 3783 }, { "epoch": 0.30988135818771817, "grad_norm": 0.23267166316509247, "learning_rate": 2.079808086409594e-05, "loss": 0.977313756942749, "step": 3784 }, { "epoch": 0.30996325072423714, "grad_norm": 0.20389822125434875, "learning_rate": 2.0796379367623838e-05, "loss": 0.9569587707519531, "step": 3785 }, { "epoch": 0.31004514326075605, "grad_norm": 0.24494490027427673, "learning_rate": 2.0794677488824858e-05, "loss": 0.9752430319786072, "step": 3786 }, { "epoch": 0.310127035797275, "grad_norm": 0.21150928735733032, "learning_rate": 2.079297522777297e-05, "loss": 1.0910792350769043, "step": 3787 }, { "epoch": 0.310208928333794, "grad_norm": 0.17977339029312134, "learning_rate": 2.0791272584542162e-05, "loss": 0.7511414289474487, "step": 3788 }, { "epoch": 0.31029082087031296, "grad_norm": 0.17239928245544434, "learning_rate": 2.0789569559206442e-05, "loss": 0.7230886816978455, "step": 3789 }, { "epoch": 0.31037271340683187, "grad_norm": 0.1916889101266861, "learning_rate": 2.0787866151839826e-05, "loss": 0.8523291945457458, "step": 3790 }, { "epoch": 0.31045460594335084, "grad_norm": 0.19170795381069183, "learning_rate": 2.0786162362516353e-05, "loss": 0.6724187135696411, "step": 3791 }, { "epoch": 0.3105364984798698, "grad_norm": 0.19682304561138153, "learning_rate": 2.0784458191310074e-05, "loss": 1.1722692251205444, "step": 3792 }, { "epoch": 0.3106183910163887, "grad_norm": 0.16867810487747192, "learning_rate": 2.0782753638295066e-05, "loss": 0.7759914398193359, "step": 3793 }, { "epoch": 0.3107002835529077, "grad_norm": 0.1667218655347824, "learning_rate": 2.0781048703545408e-05, "loss": 0.884178638458252, "step": 3794 }, { "epoch": 0.31078217608942665, "grad_norm": 0.22587884962558746, "learning_rate": 2.077934338713521e-05, "loss": 0.9652723073959351, "step": 3795 }, { "epoch": 0.3108640686259456, "grad_norm": 0.24535419046878815, "learning_rate": 2.0777637689138583e-05, "loss": 0.8766284584999084, "step": 3796 }, { "epoch": 0.31094596116246453, "grad_norm": 0.1690375804901123, "learning_rate": 2.0775931609629673e-05, "loss": 0.848909318447113, "step": 3797 }, { "epoch": 0.3110278536989835, "grad_norm": 0.23776361346244812, "learning_rate": 2.077422514868263e-05, "loss": 0.9482001662254333, "step": 3798 }, { "epoch": 0.31110974623550247, "grad_norm": 0.18520626425743103, "learning_rate": 2.0772518306371625e-05, "loss": 1.0880141258239746, "step": 3799 }, { "epoch": 0.31119163877202144, "grad_norm": 0.17754656076431274, "learning_rate": 2.077081108277084e-05, "loss": 0.5987615585327148, "step": 3800 }, { "epoch": 0.31127353130854035, "grad_norm": 0.22625593841075897, "learning_rate": 2.0769103477954484e-05, "loss": 0.8761833310127258, "step": 3801 }, { "epoch": 0.3113554238450593, "grad_norm": 0.20020852982997894, "learning_rate": 2.076739549199677e-05, "loss": 0.9232507348060608, "step": 3802 }, { "epoch": 0.3114373163815783, "grad_norm": 0.21406102180480957, "learning_rate": 2.076568712497194e-05, "loss": 0.9071295857429504, "step": 3803 }, { "epoch": 0.3115192089180972, "grad_norm": 0.19963616132736206, "learning_rate": 2.0763978376954245e-05, "loss": 1.0720038414001465, "step": 3804 }, { "epoch": 0.31160110145461617, "grad_norm": 0.1983698308467865, "learning_rate": 2.0762269248017953e-05, "loss": 0.8587870001792908, "step": 3805 }, { "epoch": 0.31168299399113514, "grad_norm": 0.18734951317310333, "learning_rate": 2.076055973823735e-05, "loss": 1.0613579750061035, "step": 3806 }, { "epoch": 0.3117648865276541, "grad_norm": 0.20103240013122559, "learning_rate": 2.0758849847686737e-05, "loss": 0.6078776717185974, "step": 3807 }, { "epoch": 0.311846779064173, "grad_norm": 0.2544565796852112, "learning_rate": 2.075713957644044e-05, "loss": 0.8925833702087402, "step": 3808 }, { "epoch": 0.311928671600692, "grad_norm": 0.1899135559797287, "learning_rate": 2.0755428924572784e-05, "loss": 0.8773936629295349, "step": 3809 }, { "epoch": 0.31201056413721096, "grad_norm": 0.1791934221982956, "learning_rate": 2.0753717892158127e-05, "loss": 0.9249658584594727, "step": 3810 }, { "epoch": 0.3120924566737299, "grad_norm": 0.19495287537574768, "learning_rate": 2.075200647927084e-05, "loss": 0.6954169869422913, "step": 3811 }, { "epoch": 0.31217434921024884, "grad_norm": 0.18937784433364868, "learning_rate": 2.0750294685985296e-05, "loss": 0.9945257902145386, "step": 3812 }, { "epoch": 0.3122562417467678, "grad_norm": 0.20374411344528198, "learning_rate": 2.0748582512375916e-05, "loss": 0.9466389417648315, "step": 3813 }, { "epoch": 0.3123381342832868, "grad_norm": 0.21982556581497192, "learning_rate": 2.07468699585171e-05, "loss": 0.8534730076789856, "step": 3814 }, { "epoch": 0.3124200268198057, "grad_norm": 0.16985943913459778, "learning_rate": 2.0745157024483292e-05, "loss": 0.8691702485084534, "step": 3815 }, { "epoch": 0.31250191935632465, "grad_norm": 0.19603653252124786, "learning_rate": 2.0743443710348938e-05, "loss": 0.8510909080505371, "step": 3816 }, { "epoch": 0.3125838118928436, "grad_norm": 0.20499542355537415, "learning_rate": 2.0741730016188514e-05, "loss": 1.2135144472122192, "step": 3817 }, { "epoch": 0.3126657044293626, "grad_norm": 0.18448813259601593, "learning_rate": 2.0740015942076497e-05, "loss": 0.9232240319252014, "step": 3818 }, { "epoch": 0.3127475969658815, "grad_norm": 0.2162674516439438, "learning_rate": 2.0738301488087385e-05, "loss": 0.7483606934547424, "step": 3819 }, { "epoch": 0.31282948950240047, "grad_norm": 0.19455885887145996, "learning_rate": 2.0736586654295705e-05, "loss": 0.8170048594474792, "step": 3820 }, { "epoch": 0.31291138203891944, "grad_norm": 0.18474586308002472, "learning_rate": 2.0734871440775987e-05, "loss": 0.9258907437324524, "step": 3821 }, { "epoch": 0.3129932745754384, "grad_norm": 0.18167629837989807, "learning_rate": 2.0733155847602777e-05, "loss": 0.8377065658569336, "step": 3822 }, { "epoch": 0.3130751671119573, "grad_norm": 0.15868346393108368, "learning_rate": 2.0731439874850644e-05, "loss": 0.9797303080558777, "step": 3823 }, { "epoch": 0.3131570596484763, "grad_norm": 0.1682647466659546, "learning_rate": 2.0729723522594174e-05, "loss": 0.9187793731689453, "step": 3824 }, { "epoch": 0.31323895218499526, "grad_norm": 0.20399266481399536, "learning_rate": 2.0728006790907963e-05, "loss": 0.8983795642852783, "step": 3825 }, { "epoch": 0.31332084472151417, "grad_norm": 0.1870231032371521, "learning_rate": 2.072628967986663e-05, "loss": 0.8341652154922485, "step": 3826 }, { "epoch": 0.31340273725803314, "grad_norm": 0.21393652260303497, "learning_rate": 2.0724572189544806e-05, "loss": 0.8763335943222046, "step": 3827 }, { "epoch": 0.3134846297945521, "grad_norm": 0.20966021716594696, "learning_rate": 2.0722854320017144e-05, "loss": 0.9868144989013672, "step": 3828 }, { "epoch": 0.3135665223310711, "grad_norm": 0.20558182895183563, "learning_rate": 2.07211360713583e-05, "loss": 0.6942949295043945, "step": 3829 }, { "epoch": 0.31364841486759, "grad_norm": 0.21020591259002686, "learning_rate": 2.0719417443642966e-05, "loss": 0.9508772492408752, "step": 3830 }, { "epoch": 0.31373030740410895, "grad_norm": 0.20934821665287018, "learning_rate": 2.0717698436945836e-05, "loss": 0.877099335193634, "step": 3831 }, { "epoch": 0.3138121999406279, "grad_norm": 0.17882463335990906, "learning_rate": 2.071597905134163e-05, "loss": 0.8389298915863037, "step": 3832 }, { "epoch": 0.3138940924771469, "grad_norm": 0.16195642948150635, "learning_rate": 2.0714259286905072e-05, "loss": 0.5474529266357422, "step": 3833 }, { "epoch": 0.3139759850136658, "grad_norm": 0.18354681134223938, "learning_rate": 2.0712539143710912e-05, "loss": 0.7336528897285461, "step": 3834 }, { "epoch": 0.31405787755018477, "grad_norm": 0.19657662510871887, "learning_rate": 2.0710818621833925e-05, "loss": 0.8177025318145752, "step": 3835 }, { "epoch": 0.31413977008670374, "grad_norm": 0.1851317286491394, "learning_rate": 2.0709097721348877e-05, "loss": 0.8647388219833374, "step": 3836 }, { "epoch": 0.31422166262322265, "grad_norm": 0.20581874251365662, "learning_rate": 2.070737644233057e-05, "loss": 0.701156735420227, "step": 3837 }, { "epoch": 0.3143035551597416, "grad_norm": 0.18253971636295319, "learning_rate": 2.070565478485383e-05, "loss": 0.9010382294654846, "step": 3838 }, { "epoch": 0.3143854476962606, "grad_norm": 0.1823616474866867, "learning_rate": 2.0703932748993465e-05, "loss": 0.6611784100532532, "step": 3839 }, { "epoch": 0.31446734023277956, "grad_norm": 0.14296896755695343, "learning_rate": 2.0702210334824342e-05, "loss": 0.5418720245361328, "step": 3840 }, { "epoch": 0.31454923276929847, "grad_norm": 0.23105500638484955, "learning_rate": 2.070048754242131e-05, "loss": 1.025282382965088, "step": 3841 }, { "epoch": 0.31463112530581744, "grad_norm": 0.20904365181922913, "learning_rate": 2.069876437185926e-05, "loss": 0.7960243821144104, "step": 3842 }, { "epoch": 0.3147130178423364, "grad_norm": 0.18670852482318878, "learning_rate": 2.069704082321308e-05, "loss": 0.826521635055542, "step": 3843 }, { "epoch": 0.3147949103788553, "grad_norm": 0.17215362191200256, "learning_rate": 2.0695316896557685e-05, "loss": 0.7121895551681519, "step": 3844 }, { "epoch": 0.3148768029153743, "grad_norm": 0.22568632662296295, "learning_rate": 2.0693592591968006e-05, "loss": 0.8222243785858154, "step": 3845 }, { "epoch": 0.31495869545189326, "grad_norm": 0.2369709610939026, "learning_rate": 2.0691867909518985e-05, "loss": 1.3335106372833252, "step": 3846 }, { "epoch": 0.3150405879884122, "grad_norm": 0.2030957043170929, "learning_rate": 2.069014284928559e-05, "loss": 0.7807875871658325, "step": 3847 }, { "epoch": 0.31512248052493114, "grad_norm": 0.20485574007034302, "learning_rate": 2.068841741134279e-05, "loss": 0.9713330864906311, "step": 3848 }, { "epoch": 0.3152043730614501, "grad_norm": 0.20400424301624298, "learning_rate": 2.0686691595765585e-05, "loss": 1.037530779838562, "step": 3849 }, { "epoch": 0.3152862655979691, "grad_norm": 0.2002319097518921, "learning_rate": 2.0684965402628987e-05, "loss": 0.7449715733528137, "step": 3850 }, { "epoch": 0.31536815813448804, "grad_norm": 0.16794659197330475, "learning_rate": 2.068323883200802e-05, "loss": 0.9423976540565491, "step": 3851 }, { "epoch": 0.31545005067100695, "grad_norm": 0.18332268297672272, "learning_rate": 2.0681511883977728e-05, "loss": 0.615775465965271, "step": 3852 }, { "epoch": 0.3155319432075259, "grad_norm": 0.2267131358385086, "learning_rate": 2.0679784558613178e-05, "loss": 0.6362888813018799, "step": 3853 }, { "epoch": 0.3156138357440449, "grad_norm": 0.2197464555501938, "learning_rate": 2.0678056855989437e-05, "loss": 0.938831090927124, "step": 3854 }, { "epoch": 0.3156957282805638, "grad_norm": 0.21812167763710022, "learning_rate": 2.0676328776181606e-05, "loss": 0.7093120217323303, "step": 3855 }, { "epoch": 0.31577762081708277, "grad_norm": 0.15155361592769623, "learning_rate": 2.067460031926479e-05, "loss": 0.9182919263839722, "step": 3856 }, { "epoch": 0.31585951335360174, "grad_norm": 0.21244792640209198, "learning_rate": 2.0672871485314114e-05, "loss": 1.2241127490997314, "step": 3857 }, { "epoch": 0.3159414058901207, "grad_norm": 0.21129639446735382, "learning_rate": 2.0671142274404723e-05, "loss": 0.9119269251823425, "step": 3858 }, { "epoch": 0.3160232984266396, "grad_norm": 0.18024909496307373, "learning_rate": 2.066941268661178e-05, "loss": 0.7217423915863037, "step": 3859 }, { "epoch": 0.3161051909631586, "grad_norm": 0.2295406460762024, "learning_rate": 2.066768272201045e-05, "loss": 0.8601570725440979, "step": 3860 }, { "epoch": 0.31618708349967756, "grad_norm": 0.21038605272769928, "learning_rate": 2.0665952380675933e-05, "loss": 0.7318704724311829, "step": 3861 }, { "epoch": 0.3162689760361965, "grad_norm": 0.1876082569360733, "learning_rate": 2.066422166268343e-05, "loss": 0.858354389667511, "step": 3862 }, { "epoch": 0.31635086857271544, "grad_norm": 0.19325366616249084, "learning_rate": 2.066249056810817e-05, "loss": 0.6785359382629395, "step": 3863 }, { "epoch": 0.3164327611092344, "grad_norm": 0.19640369713306427, "learning_rate": 2.0660759097025386e-05, "loss": 0.927278459072113, "step": 3864 }, { "epoch": 0.3165146536457534, "grad_norm": 0.1848151832818985, "learning_rate": 2.065902724951035e-05, "loss": 0.558994710445404, "step": 3865 }, { "epoch": 0.3165965461822723, "grad_norm": 0.25601357221603394, "learning_rate": 2.065729502563832e-05, "loss": 1.0158015489578247, "step": 3866 }, { "epoch": 0.31667843871879126, "grad_norm": 0.17892087996006012, "learning_rate": 2.0655562425484594e-05, "loss": 0.7735381722450256, "step": 3867 }, { "epoch": 0.3167603312553102, "grad_norm": 0.2392985224723816, "learning_rate": 2.0653829449124473e-05, "loss": 1.0902955532073975, "step": 3868 }, { "epoch": 0.3168422237918292, "grad_norm": 0.1831737458705902, "learning_rate": 2.065209609663328e-05, "loss": 0.8795191645622253, "step": 3869 }, { "epoch": 0.3169241163283481, "grad_norm": 0.2295103520154953, "learning_rate": 2.065036236808636e-05, "loss": 0.6277364492416382, "step": 3870 }, { "epoch": 0.3170060088648671, "grad_norm": 0.19100867211818695, "learning_rate": 2.0648628263559063e-05, "loss": 0.8783537149429321, "step": 3871 }, { "epoch": 0.31708790140138604, "grad_norm": 0.19775338470935822, "learning_rate": 2.0646893783126756e-05, "loss": 0.7873702049255371, "step": 3872 }, { "epoch": 0.317169793937905, "grad_norm": 0.21302008628845215, "learning_rate": 2.0645158926864835e-05, "loss": 0.7487426996231079, "step": 3873 }, { "epoch": 0.3172516864744239, "grad_norm": 0.17428874969482422, "learning_rate": 2.06434236948487e-05, "loss": 0.8748592138290405, "step": 3874 }, { "epoch": 0.3173335790109429, "grad_norm": 0.1516123265028, "learning_rate": 2.0641688087153768e-05, "loss": 0.7990954518318176, "step": 3875 }, { "epoch": 0.31741547154746186, "grad_norm": 0.18156829476356506, "learning_rate": 2.0639952103855483e-05, "loss": 0.8907844424247742, "step": 3876 }, { "epoch": 0.31749736408398077, "grad_norm": 0.18587413430213928, "learning_rate": 2.0638215745029292e-05, "loss": 1.0132131576538086, "step": 3877 }, { "epoch": 0.31757925662049974, "grad_norm": 0.19086430966854095, "learning_rate": 2.0636479010750668e-05, "loss": 0.737686038017273, "step": 3878 }, { "epoch": 0.3176611491570187, "grad_norm": 0.1720389574766159, "learning_rate": 2.0634741901095096e-05, "loss": 0.8174468278884888, "step": 3879 }, { "epoch": 0.3177430416935377, "grad_norm": 0.17553453147411346, "learning_rate": 2.0633004416138076e-05, "loss": 0.9217116832733154, "step": 3880 }, { "epoch": 0.3178249342300566, "grad_norm": 0.1806309074163437, "learning_rate": 2.0631266555955127e-05, "loss": 0.7567261457443237, "step": 3881 }, { "epoch": 0.31790682676657556, "grad_norm": 0.21313153207302094, "learning_rate": 2.0629528320621784e-05, "loss": 1.0394160747528076, "step": 3882 }, { "epoch": 0.3179887193030945, "grad_norm": 0.18041832745075226, "learning_rate": 2.0627789710213598e-05, "loss": 0.8996294736862183, "step": 3883 }, { "epoch": 0.3180706118396135, "grad_norm": 0.18491987884044647, "learning_rate": 2.0626050724806136e-05, "loss": 0.6309583783149719, "step": 3884 }, { "epoch": 0.3181525043761324, "grad_norm": 0.21424968540668488, "learning_rate": 2.0624311364474982e-05, "loss": 0.8386402726173401, "step": 3885 }, { "epoch": 0.3182343969126514, "grad_norm": 0.20529955625534058, "learning_rate": 2.062257162929573e-05, "loss": 0.8438444137573242, "step": 3886 }, { "epoch": 0.31831628944917034, "grad_norm": 0.16787421703338623, "learning_rate": 2.062083151934401e-05, "loss": 0.8958845734596252, "step": 3887 }, { "epoch": 0.31839818198568925, "grad_norm": 0.18092255294322968, "learning_rate": 2.0619091034695438e-05, "loss": 1.0243592262268066, "step": 3888 }, { "epoch": 0.3184800745222082, "grad_norm": 0.18010175228118896, "learning_rate": 2.0617350175425677e-05, "loss": 0.8616993427276611, "step": 3889 }, { "epoch": 0.3185619670587272, "grad_norm": 0.20390157401561737, "learning_rate": 2.061560894161038e-05, "loss": 0.8038257360458374, "step": 3890 }, { "epoch": 0.31864385959524616, "grad_norm": 0.1707228720188141, "learning_rate": 2.0613867333325238e-05, "loss": 0.7912411093711853, "step": 3891 }, { "epoch": 0.31872575213176507, "grad_norm": 0.17128688097000122, "learning_rate": 2.0612125350645938e-05, "loss": 0.9279229044914246, "step": 3892 }, { "epoch": 0.31880764466828404, "grad_norm": 0.22608759999275208, "learning_rate": 2.0610382993648203e-05, "loss": 0.7735222578048706, "step": 3893 }, { "epoch": 0.318889537204803, "grad_norm": 0.19106890261173248, "learning_rate": 2.060864026240776e-05, "loss": 0.9058697819709778, "step": 3894 }, { "epoch": 0.318971429741322, "grad_norm": 0.18119873106479645, "learning_rate": 2.060689715700035e-05, "loss": 0.7403808832168579, "step": 3895 }, { "epoch": 0.3190533222778409, "grad_norm": 0.21368330717086792, "learning_rate": 2.0605153677501744e-05, "loss": 1.1984975337982178, "step": 3896 }, { "epoch": 0.31913521481435986, "grad_norm": 0.19462525844573975, "learning_rate": 2.0603409823987716e-05, "loss": 0.8307121992111206, "step": 3897 }, { "epoch": 0.3192171073508788, "grad_norm": 0.21561211347579956, "learning_rate": 2.0601665596534063e-05, "loss": 0.6790074110031128, "step": 3898 }, { "epoch": 0.31929899988739774, "grad_norm": 0.2133370041847229, "learning_rate": 2.0599920995216598e-05, "loss": 0.6817212104797363, "step": 3899 }, { "epoch": 0.3193808924239167, "grad_norm": 0.19220006465911865, "learning_rate": 2.0598176020111144e-05, "loss": 0.9359793663024902, "step": 3900 }, { "epoch": 0.3194627849604357, "grad_norm": 0.21502521634101868, "learning_rate": 2.0596430671293547e-05, "loss": 0.9597976803779602, "step": 3901 }, { "epoch": 0.31954467749695464, "grad_norm": 0.1860586553812027, "learning_rate": 2.0594684948839668e-05, "loss": 0.8201647996902466, "step": 3902 }, { "epoch": 0.31962657003347356, "grad_norm": 0.18704071640968323, "learning_rate": 2.059293885282538e-05, "loss": 0.7935498952865601, "step": 3903 }, { "epoch": 0.3197084625699925, "grad_norm": 0.19323976337909698, "learning_rate": 2.0591192383326578e-05, "loss": 0.8398987054824829, "step": 3904 }, { "epoch": 0.3197903551065115, "grad_norm": 0.20410950481891632, "learning_rate": 2.058944554041917e-05, "loss": 0.9792617559432983, "step": 3905 }, { "epoch": 0.31987224764303046, "grad_norm": 0.15536758303642273, "learning_rate": 2.0587698324179083e-05, "loss": 0.8749412894248962, "step": 3906 }, { "epoch": 0.3199541401795494, "grad_norm": 0.21461927890777588, "learning_rate": 2.0585950734682256e-05, "loss": 0.7866345643997192, "step": 3907 }, { "epoch": 0.32003603271606834, "grad_norm": 0.1821107566356659, "learning_rate": 2.0584202772004647e-05, "loss": 0.9668945670127869, "step": 3908 }, { "epoch": 0.3201179252525873, "grad_norm": 0.18682432174682617, "learning_rate": 2.058245443622223e-05, "loss": 0.9067835807800293, "step": 3909 }, { "epoch": 0.3201998177891062, "grad_norm": 0.18764114379882812, "learning_rate": 2.0580705727410995e-05, "loss": 0.723134458065033, "step": 3910 }, { "epoch": 0.3202817103256252, "grad_norm": 0.17350256443023682, "learning_rate": 2.0578956645646948e-05, "loss": 0.6418245434761047, "step": 3911 }, { "epoch": 0.32036360286214416, "grad_norm": 0.2288198322057724, "learning_rate": 2.057720719100611e-05, "loss": 0.856052577495575, "step": 3912 }, { "epoch": 0.3204454953986631, "grad_norm": 0.20863474905490875, "learning_rate": 2.0575457363564524e-05, "loss": 1.129311203956604, "step": 3913 }, { "epoch": 0.32052738793518204, "grad_norm": 0.1890038698911667, "learning_rate": 2.0573707163398238e-05, "loss": 0.9141160249710083, "step": 3914 }, { "epoch": 0.320609280471701, "grad_norm": 0.17243625223636627, "learning_rate": 2.0571956590583326e-05, "loss": 0.8704214692115784, "step": 3915 }, { "epoch": 0.32069117300822, "grad_norm": 0.17833374440670013, "learning_rate": 2.0570205645195874e-05, "loss": 0.4970870316028595, "step": 3916 }, { "epoch": 0.3207730655447389, "grad_norm": 0.1937279850244522, "learning_rate": 2.056845432731199e-05, "loss": 0.7491456270217896, "step": 3917 }, { "epoch": 0.32085495808125786, "grad_norm": 0.15782898664474487, "learning_rate": 2.0566702637007787e-05, "loss": 0.6254806518554688, "step": 3918 }, { "epoch": 0.3209368506177768, "grad_norm": 0.1783123016357422, "learning_rate": 2.0564950574359405e-05, "loss": 0.8593987226486206, "step": 3919 }, { "epoch": 0.3210187431542958, "grad_norm": 0.18524810671806335, "learning_rate": 2.0563198139442997e-05, "loss": 0.755302369594574, "step": 3920 }, { "epoch": 0.3211006356908147, "grad_norm": 0.16901125013828278, "learning_rate": 2.0561445332334724e-05, "loss": 0.7434175610542297, "step": 3921 }, { "epoch": 0.3211825282273337, "grad_norm": 0.18124042451381683, "learning_rate": 2.0559692153110777e-05, "loss": 0.6324748992919922, "step": 3922 }, { "epoch": 0.32126442076385264, "grad_norm": 0.21092143654823303, "learning_rate": 2.0557938601847357e-05, "loss": 0.7077627182006836, "step": 3923 }, { "epoch": 0.3213463133003716, "grad_norm": 0.18164531886577606, "learning_rate": 2.0556184678620674e-05, "loss": 0.9406149983406067, "step": 3924 }, { "epoch": 0.3214282058368905, "grad_norm": 0.20724418759346008, "learning_rate": 2.0554430383506967e-05, "loss": 0.947371780872345, "step": 3925 }, { "epoch": 0.3215100983734095, "grad_norm": 0.2119489163160324, "learning_rate": 2.0552675716582483e-05, "loss": 0.6114541292190552, "step": 3926 }, { "epoch": 0.32159199090992846, "grad_norm": 0.20484659075737, "learning_rate": 2.055092067792348e-05, "loss": 0.8839597702026367, "step": 3927 }, { "epoch": 0.3216738834464474, "grad_norm": 0.19731709361076355, "learning_rate": 2.0549165267606254e-05, "loss": 0.6500949859619141, "step": 3928 }, { "epoch": 0.32175577598296634, "grad_norm": 0.1871248185634613, "learning_rate": 2.054740948570709e-05, "loss": 0.7536279559135437, "step": 3929 }, { "epoch": 0.3218376685194853, "grad_norm": 0.21729592978954315, "learning_rate": 2.0545653332302305e-05, "loss": 0.8361296057701111, "step": 3930 }, { "epoch": 0.3219195610560043, "grad_norm": 0.17965973913669586, "learning_rate": 2.054389680746823e-05, "loss": 0.9434899091720581, "step": 3931 }, { "epoch": 0.3220014535925232, "grad_norm": 0.1846047192811966, "learning_rate": 2.054213991128121e-05, "loss": 0.8550084233283997, "step": 3932 }, { "epoch": 0.32208334612904216, "grad_norm": 0.22459964454174042, "learning_rate": 2.0540382643817612e-05, "loss": 1.0315570831298828, "step": 3933 }, { "epoch": 0.3221652386655611, "grad_norm": 0.1752089112997055, "learning_rate": 2.0538625005153804e-05, "loss": 0.9271630048751831, "step": 3934 }, { "epoch": 0.3222471312020801, "grad_norm": 0.24773702025413513, "learning_rate": 2.0536866995366184e-05, "loss": 0.8134306073188782, "step": 3935 }, { "epoch": 0.322329023738599, "grad_norm": 0.1779778003692627, "learning_rate": 2.0535108614531163e-05, "loss": 0.567272961139679, "step": 3936 }, { "epoch": 0.322410916275118, "grad_norm": 0.19251899421215057, "learning_rate": 2.053334986272517e-05, "loss": 0.641463577747345, "step": 3937 }, { "epoch": 0.32249280881163694, "grad_norm": 0.17036749422550201, "learning_rate": 2.0531590740024647e-05, "loss": 0.9544930458068848, "step": 3938 }, { "epoch": 0.32257470134815586, "grad_norm": 0.2248890995979309, "learning_rate": 2.052983124650605e-05, "loss": 0.8219738006591797, "step": 3939 }, { "epoch": 0.3226565938846748, "grad_norm": 0.2041727751493454, "learning_rate": 2.052807138224586e-05, "loss": 0.8931393623352051, "step": 3940 }, { "epoch": 0.3227384864211938, "grad_norm": 0.17000088095664978, "learning_rate": 2.0526311147320555e-05, "loss": 0.855679452419281, "step": 3941 }, { "epoch": 0.32282037895771276, "grad_norm": 0.2026071399450302, "learning_rate": 2.0524550541806654e-05, "loss": 0.7965473532676697, "step": 3942 }, { "epoch": 0.3229022714942317, "grad_norm": 0.18399696052074432, "learning_rate": 2.0522789565780678e-05, "loss": 0.803577721118927, "step": 3943 }, { "epoch": 0.32298416403075064, "grad_norm": 0.2059074491262436, "learning_rate": 2.0521028219319163e-05, "loss": 1.0486431121826172, "step": 3944 }, { "epoch": 0.3230660565672696, "grad_norm": 0.18338526785373688, "learning_rate": 2.0519266502498666e-05, "loss": 1.0016177892684937, "step": 3945 }, { "epoch": 0.3231479491037886, "grad_norm": 0.1801069974899292, "learning_rate": 2.0517504415395758e-05, "loss": 0.5547131896018982, "step": 3946 }, { "epoch": 0.3232298416403075, "grad_norm": 0.1768687665462494, "learning_rate": 2.0515741958087027e-05, "loss": 1.0042048692703247, "step": 3947 }, { "epoch": 0.32331173417682646, "grad_norm": 0.17749008536338806, "learning_rate": 2.051397913064908e-05, "loss": 0.6840186715126038, "step": 3948 }, { "epoch": 0.32339362671334543, "grad_norm": 0.1998671144247055, "learning_rate": 2.051221593315853e-05, "loss": 0.9369029998779297, "step": 3949 }, { "epoch": 0.32347551924986434, "grad_norm": 0.19199851155281067, "learning_rate": 2.051045236569202e-05, "loss": 0.8841372728347778, "step": 3950 }, { "epoch": 0.3235574117863833, "grad_norm": 0.22909732162952423, "learning_rate": 2.0508688428326194e-05, "loss": 0.6738160848617554, "step": 3951 }, { "epoch": 0.3236393043229023, "grad_norm": 0.1914035528898239, "learning_rate": 2.0506924121137723e-05, "loss": 0.7841907143592834, "step": 3952 }, { "epoch": 0.32372119685942125, "grad_norm": 0.18163597583770752, "learning_rate": 2.0505159444203294e-05, "loss": 1.0898945331573486, "step": 3953 }, { "epoch": 0.32380308939594016, "grad_norm": 0.21326129138469696, "learning_rate": 2.0503394397599608e-05, "loss": 0.7765082716941833, "step": 3954 }, { "epoch": 0.3238849819324591, "grad_norm": 0.1583007276058197, "learning_rate": 2.0501628981403374e-05, "loss": 0.7911266684532166, "step": 3955 }, { "epoch": 0.3239668744689781, "grad_norm": 0.17539963126182556, "learning_rate": 2.0499863195691335e-05, "loss": 0.9357081651687622, "step": 3956 }, { "epoch": 0.32404876700549706, "grad_norm": 0.18082186579704285, "learning_rate": 2.049809704054023e-05, "loss": 0.7464720606803894, "step": 3957 }, { "epoch": 0.324130659542016, "grad_norm": 0.1655406951904297, "learning_rate": 2.0496330516026828e-05, "loss": 0.8147081136703491, "step": 3958 }, { "epoch": 0.32421255207853494, "grad_norm": 0.2123180329799652, "learning_rate": 2.0494563622227903e-05, "loss": 0.8541117310523987, "step": 3959 }, { "epoch": 0.3242944446150539, "grad_norm": 0.22505545616149902, "learning_rate": 2.0492796359220263e-05, "loss": 0.960929274559021, "step": 3960 }, { "epoch": 0.3243763371515728, "grad_norm": 0.1861141473054886, "learning_rate": 2.049102872708071e-05, "loss": 0.8769974112510681, "step": 3961 }, { "epoch": 0.3244582296880918, "grad_norm": 0.18319188058376312, "learning_rate": 2.0489260725886075e-05, "loss": 0.9222878217697144, "step": 3962 }, { "epoch": 0.32454012222461076, "grad_norm": 0.16979198157787323, "learning_rate": 2.048749235571321e-05, "loss": 0.8676437139511108, "step": 3963 }, { "epoch": 0.32462201476112973, "grad_norm": 0.16637015342712402, "learning_rate": 2.0485723616638965e-05, "loss": 1.018117904663086, "step": 3964 }, { "epoch": 0.32470390729764864, "grad_norm": 0.1901896893978119, "learning_rate": 2.0483954508740223e-05, "loss": 1.0465023517608643, "step": 3965 }, { "epoch": 0.3247857998341676, "grad_norm": 0.19633741676807404, "learning_rate": 2.0482185032093873e-05, "loss": 0.6522077918052673, "step": 3966 }, { "epoch": 0.3248676923706866, "grad_norm": 0.2304004728794098, "learning_rate": 2.048041518677683e-05, "loss": 0.8279807567596436, "step": 3967 }, { "epoch": 0.32494958490720555, "grad_norm": 0.17468871176242828, "learning_rate": 2.0478644972866008e-05, "loss": 1.110076904296875, "step": 3968 }, { "epoch": 0.32503147744372446, "grad_norm": 0.1968289017677307, "learning_rate": 2.0476874390438362e-05, "loss": 0.671642541885376, "step": 3969 }, { "epoch": 0.3251133699802434, "grad_norm": 0.18486829102039337, "learning_rate": 2.047510343957084e-05, "loss": 0.7551819086074829, "step": 3970 }, { "epoch": 0.3251952625167624, "grad_norm": 0.26064807176589966, "learning_rate": 2.047333212034041e-05, "loss": 0.9243712425231934, "step": 3971 }, { "epoch": 0.3252771550532813, "grad_norm": 0.21018044650554657, "learning_rate": 2.047156043282407e-05, "loss": 0.6543718576431274, "step": 3972 }, { "epoch": 0.3253590475898003, "grad_norm": 0.18876391649246216, "learning_rate": 2.0469788377098826e-05, "loss": 0.82879638671875, "step": 3973 }, { "epoch": 0.32544094012631924, "grad_norm": 0.22995233535766602, "learning_rate": 2.0468015953241692e-05, "loss": 0.7684871554374695, "step": 3974 }, { "epoch": 0.3255228326628382, "grad_norm": 0.18712438642978668, "learning_rate": 2.046624316132971e-05, "loss": 0.9632517099380493, "step": 3975 }, { "epoch": 0.3256047251993571, "grad_norm": 0.18045209348201752, "learning_rate": 2.0464470001439927e-05, "loss": 0.7628248929977417, "step": 3976 }, { "epoch": 0.3256866177358761, "grad_norm": 0.18567217886447906, "learning_rate": 2.046269647364941e-05, "loss": 0.9612525105476379, "step": 3977 }, { "epoch": 0.32576851027239506, "grad_norm": 0.19326569139957428, "learning_rate": 2.046092257803526e-05, "loss": 0.9389368295669556, "step": 3978 }, { "epoch": 0.32585040280891403, "grad_norm": 0.17253535985946655, "learning_rate": 2.0459148314674563e-05, "loss": 0.7407574653625488, "step": 3979 }, { "epoch": 0.32593229534543294, "grad_norm": 0.18855491280555725, "learning_rate": 2.0457373683644437e-05, "loss": 1.1804454326629639, "step": 3980 }, { "epoch": 0.3260141878819519, "grad_norm": 0.2047351449728012, "learning_rate": 2.0455598685022018e-05, "loss": 0.9871070384979248, "step": 3981 }, { "epoch": 0.3260960804184709, "grad_norm": 0.15462957322597504, "learning_rate": 2.0453823318884452e-05, "loss": 0.6662690043449402, "step": 3982 }, { "epoch": 0.3261779729549898, "grad_norm": 0.14857029914855957, "learning_rate": 2.0452047585308913e-05, "loss": 0.3944564163684845, "step": 3983 }, { "epoch": 0.32625986549150876, "grad_norm": 0.20832253992557526, "learning_rate": 2.045027148437257e-05, "loss": 0.8589080572128296, "step": 3984 }, { "epoch": 0.32634175802802773, "grad_norm": 0.17122115194797516, "learning_rate": 2.044849501615262e-05, "loss": 0.9156578183174133, "step": 3985 }, { "epoch": 0.3264236505645467, "grad_norm": 0.21610112488269806, "learning_rate": 2.0446718180726288e-05, "loss": 0.9434451460838318, "step": 3986 }, { "epoch": 0.3265055431010656, "grad_norm": 0.22846412658691406, "learning_rate": 2.044494097817079e-05, "loss": 0.9660305380821228, "step": 3987 }, { "epoch": 0.3265874356375846, "grad_norm": 0.19854377210140228, "learning_rate": 2.0443163408563374e-05, "loss": 0.7821274995803833, "step": 3988 }, { "epoch": 0.32666932817410355, "grad_norm": 0.20556685328483582, "learning_rate": 2.0441385471981303e-05, "loss": 0.8567473888397217, "step": 3989 }, { "epoch": 0.32675122071062246, "grad_norm": 0.18805284798145294, "learning_rate": 2.0439607168501852e-05, "loss": 0.7281758785247803, "step": 3990 }, { "epoch": 0.3268331132471414, "grad_norm": 0.19593830406665802, "learning_rate": 2.043782849820231e-05, "loss": 0.6428591012954712, "step": 3991 }, { "epoch": 0.3269150057836604, "grad_norm": 0.22520758211612701, "learning_rate": 2.0436049461159993e-05, "loss": 0.7370777726173401, "step": 3992 }, { "epoch": 0.32699689832017936, "grad_norm": 0.17537952959537506, "learning_rate": 2.0434270057452222e-05, "loss": 0.5172303318977356, "step": 3993 }, { "epoch": 0.3270787908566983, "grad_norm": 0.19850336015224457, "learning_rate": 2.043249028715633e-05, "loss": 1.1505286693572998, "step": 3994 }, { "epoch": 0.32716068339321724, "grad_norm": 0.22050577402114868, "learning_rate": 2.0430710150349685e-05, "loss": 0.7051447033882141, "step": 3995 }, { "epoch": 0.3272425759297362, "grad_norm": 0.2753869593143463, "learning_rate": 2.042892964710965e-05, "loss": 0.7393182516098022, "step": 3996 }, { "epoch": 0.3273244684662552, "grad_norm": 0.22312431037425995, "learning_rate": 2.0427148777513617e-05, "loss": 1.0518414974212646, "step": 3997 }, { "epoch": 0.3274063610027741, "grad_norm": 0.19419540464878082, "learning_rate": 2.0425367541638988e-05, "loss": 0.9165040850639343, "step": 3998 }, { "epoch": 0.32748825353929306, "grad_norm": 0.20299820601940155, "learning_rate": 2.0423585939563187e-05, "loss": 0.8882592916488647, "step": 3999 }, { "epoch": 0.32757014607581203, "grad_norm": 0.16102394461631775, "learning_rate": 2.0421803971363645e-05, "loss": 1.0121883153915405, "step": 4000 }, { "epoch": 0.32765203861233094, "grad_norm": 0.17446939647197723, "learning_rate": 2.0420021637117815e-05, "loss": 0.6966046690940857, "step": 4001 }, { "epoch": 0.3277339311488499, "grad_norm": 0.21143308281898499, "learning_rate": 2.0418238936903168e-05, "loss": 1.127868413925171, "step": 4002 }, { "epoch": 0.3278158236853689, "grad_norm": 0.22681520879268646, "learning_rate": 2.0416455870797182e-05, "loss": 0.73637855052948, "step": 4003 }, { "epoch": 0.32789771622188785, "grad_norm": 0.189267098903656, "learning_rate": 2.041467243887736e-05, "loss": 0.8378512263298035, "step": 4004 }, { "epoch": 0.32797960875840676, "grad_norm": 0.16721610724925995, "learning_rate": 2.0412888641221217e-05, "loss": 1.018246054649353, "step": 4005 }, { "epoch": 0.32806150129492573, "grad_norm": 0.19538401067256927, "learning_rate": 2.041110447790628e-05, "loss": 1.0527546405792236, "step": 4006 }, { "epoch": 0.3281433938314447, "grad_norm": 0.18923735618591309, "learning_rate": 2.0409319949010103e-05, "loss": 0.7907548546791077, "step": 4007 }, { "epoch": 0.32822528636796366, "grad_norm": 0.1968170702457428, "learning_rate": 2.0407535054610243e-05, "loss": 1.136070728302002, "step": 4008 }, { "epoch": 0.3283071789044826, "grad_norm": 0.186052143573761, "learning_rate": 2.0405749794784287e-05, "loss": 0.8291460275650024, "step": 4009 }, { "epoch": 0.32838907144100155, "grad_norm": 0.195217564702034, "learning_rate": 2.0403964169609818e-05, "loss": 0.8916608095169067, "step": 4010 }, { "epoch": 0.3284709639775205, "grad_norm": 0.22029122710227966, "learning_rate": 2.040217817916446e-05, "loss": 0.8192560076713562, "step": 4011 }, { "epoch": 0.3285528565140394, "grad_norm": 0.17189323902130127, "learning_rate": 2.0400391823525825e-05, "loss": 0.9450331330299377, "step": 4012 }, { "epoch": 0.3286347490505584, "grad_norm": 0.1893395483493805, "learning_rate": 2.0398605102771563e-05, "loss": 0.7952911853790283, "step": 4013 }, { "epoch": 0.32871664158707736, "grad_norm": 0.18401770293712616, "learning_rate": 2.0396818016979335e-05, "loss": 0.881236732006073, "step": 4014 }, { "epoch": 0.32879853412359633, "grad_norm": 0.20696619153022766, "learning_rate": 2.039503056622681e-05, "loss": 1.0850868225097656, "step": 4015 }, { "epoch": 0.32888042666011524, "grad_norm": 0.18049083650112152, "learning_rate": 2.0393242750591685e-05, "loss": 0.9143643975257874, "step": 4016 }, { "epoch": 0.3289623191966342, "grad_norm": 0.19269223511219025, "learning_rate": 2.0391454570151657e-05, "loss": 0.9244289994239807, "step": 4017 }, { "epoch": 0.3290442117331532, "grad_norm": 0.2084731161594391, "learning_rate": 2.038966602498445e-05, "loss": 1.1961641311645508, "step": 4018 }, { "epoch": 0.32912610426967215, "grad_norm": 0.15834586322307587, "learning_rate": 2.0387877115167802e-05, "loss": 0.6730955839157104, "step": 4019 }, { "epoch": 0.32920799680619106, "grad_norm": 0.20531298220157623, "learning_rate": 2.038608784077947e-05, "loss": 0.70001220703125, "step": 4020 }, { "epoch": 0.32928988934271003, "grad_norm": 0.19275188446044922, "learning_rate": 2.0384298201897223e-05, "loss": 1.0028375387191772, "step": 4021 }, { "epoch": 0.329371781879229, "grad_norm": 0.20015354454517365, "learning_rate": 2.038250819859884e-05, "loss": 1.0834403038024902, "step": 4022 }, { "epoch": 0.3294536744157479, "grad_norm": 0.22932399809360504, "learning_rate": 2.0380717830962126e-05, "loss": 0.983782172203064, "step": 4023 }, { "epoch": 0.3295355669522669, "grad_norm": 0.16976185142993927, "learning_rate": 2.03789270990649e-05, "loss": 0.7869149446487427, "step": 4024 }, { "epoch": 0.32961745948878585, "grad_norm": 0.22935444116592407, "learning_rate": 2.037713600298499e-05, "loss": 0.7244135737419128, "step": 4025 }, { "epoch": 0.3296993520253048, "grad_norm": 0.2144642025232315, "learning_rate": 2.0375344542800246e-05, "loss": 1.1511712074279785, "step": 4026 }, { "epoch": 0.3297812445618237, "grad_norm": 0.19879969954490662, "learning_rate": 2.0373552718588535e-05, "loss": 1.047278881072998, "step": 4027 }, { "epoch": 0.3298631370983427, "grad_norm": 0.19602613151073456, "learning_rate": 2.0371760530427733e-05, "loss": 0.8940179347991943, "step": 4028 }, { "epoch": 0.32994502963486166, "grad_norm": 0.16749311983585358, "learning_rate": 2.036996797839574e-05, "loss": 0.3920304477214813, "step": 4029 }, { "epoch": 0.33002692217138063, "grad_norm": 0.2268643081188202, "learning_rate": 2.0368175062570464e-05, "loss": 0.7390793561935425, "step": 4030 }, { "epoch": 0.33010881470789954, "grad_norm": 0.2297021448612213, "learning_rate": 2.036638178302983e-05, "loss": 1.1392782926559448, "step": 4031 }, { "epoch": 0.3301907072444185, "grad_norm": 0.22649626433849335, "learning_rate": 2.0364588139851786e-05, "loss": 0.8550149202346802, "step": 4032 }, { "epoch": 0.3302725997809375, "grad_norm": 0.23461101949214935, "learning_rate": 2.0362794133114295e-05, "loss": 1.0357513427734375, "step": 4033 }, { "epoch": 0.3303544923174564, "grad_norm": 0.22288455069065094, "learning_rate": 2.0360999762895326e-05, "loss": 0.748895525932312, "step": 4034 }, { "epoch": 0.33043638485397536, "grad_norm": 0.17852593958377838, "learning_rate": 2.0359205029272866e-05, "loss": 0.6420503258705139, "step": 4035 }, { "epoch": 0.33051827739049433, "grad_norm": 0.18965943157672882, "learning_rate": 2.035740993232493e-05, "loss": 0.8011335730552673, "step": 4036 }, { "epoch": 0.3306001699270133, "grad_norm": 0.19641093909740448, "learning_rate": 2.0355614472129538e-05, "loss": 0.9267378449440002, "step": 4037 }, { "epoch": 0.3306820624635322, "grad_norm": 0.21265588700771332, "learning_rate": 2.0353818648764724e-05, "loss": 0.7502137422561646, "step": 4038 }, { "epoch": 0.3307639550000512, "grad_norm": 0.18382368981838226, "learning_rate": 2.0352022462308544e-05, "loss": 0.6006070375442505, "step": 4039 }, { "epoch": 0.33084584753657015, "grad_norm": 0.170362651348114, "learning_rate": 2.0350225912839075e-05, "loss": 0.8745612502098083, "step": 4040 }, { "epoch": 0.3309277400730891, "grad_norm": 0.1651085466146469, "learning_rate": 2.034842900043439e-05, "loss": 0.7015158534049988, "step": 4041 }, { "epoch": 0.33100963260960803, "grad_norm": 0.19304487109184265, "learning_rate": 2.0346631725172595e-05, "loss": 0.7266128659248352, "step": 4042 }, { "epoch": 0.331091525146127, "grad_norm": 0.1942589432001114, "learning_rate": 2.034483408713181e-05, "loss": 0.830193042755127, "step": 4043 }, { "epoch": 0.33117341768264597, "grad_norm": 0.23720845580101013, "learning_rate": 2.0343036086390168e-05, "loss": 1.2950538396835327, "step": 4044 }, { "epoch": 0.3312553102191649, "grad_norm": 0.20238645374774933, "learning_rate": 2.034123772302581e-05, "loss": 0.7765851616859436, "step": 4045 }, { "epoch": 0.33133720275568385, "grad_norm": 0.17884604632854462, "learning_rate": 2.033943899711691e-05, "loss": 0.44498249888420105, "step": 4046 }, { "epoch": 0.3314190952922028, "grad_norm": 0.18991456925868988, "learning_rate": 2.0337639908741642e-05, "loss": 0.8519623875617981, "step": 4047 }, { "epoch": 0.3315009878287218, "grad_norm": 0.19754613935947418, "learning_rate": 2.0335840457978204e-05, "loss": 0.9353293180465698, "step": 4048 }, { "epoch": 0.3315828803652407, "grad_norm": 0.18952597677707672, "learning_rate": 2.0334040644904802e-05, "loss": 1.0402045249938965, "step": 4049 }, { "epoch": 0.33166477290175966, "grad_norm": 0.23784281313419342, "learning_rate": 2.0332240469599678e-05, "loss": 0.7042875289916992, "step": 4050 }, { "epoch": 0.33174666543827863, "grad_norm": 0.19683462381362915, "learning_rate": 2.0330439932141057e-05, "loss": 0.8289764523506165, "step": 4051 }, { "epoch": 0.33182855797479754, "grad_norm": 0.18794338405132294, "learning_rate": 2.0328639032607208e-05, "loss": 0.8717424273490906, "step": 4052 }, { "epoch": 0.3319104505113165, "grad_norm": 0.4129798710346222, "learning_rate": 2.0326837771076407e-05, "loss": 0.9264035224914551, "step": 4053 }, { "epoch": 0.3319923430478355, "grad_norm": 0.17741082608699799, "learning_rate": 2.0325036147626938e-05, "loss": 0.8676615953445435, "step": 4054 }, { "epoch": 0.33207423558435445, "grad_norm": 0.17749322950839996, "learning_rate": 2.032323416233711e-05, "loss": 0.8169325590133667, "step": 4055 }, { "epoch": 0.33215612812087336, "grad_norm": 0.17200404405593872, "learning_rate": 2.0321431815285244e-05, "loss": 0.8413490056991577, "step": 4056 }, { "epoch": 0.33223802065739233, "grad_norm": 0.2325853407382965, "learning_rate": 2.0319629106549674e-05, "loss": 0.6615083813667297, "step": 4057 }, { "epoch": 0.3323199131939113, "grad_norm": 0.20090986788272858, "learning_rate": 2.0317826036208766e-05, "loss": 0.7788219451904297, "step": 4058 }, { "epoch": 0.33240180573043027, "grad_norm": 0.20628434419631958, "learning_rate": 2.031602260434087e-05, "loss": 0.8360165357589722, "step": 4059 }, { "epoch": 0.3324836982669492, "grad_norm": 0.1663960963487625, "learning_rate": 2.0314218811024386e-05, "loss": 0.861330509185791, "step": 4060 }, { "epoch": 0.33256559080346815, "grad_norm": 0.215473011136055, "learning_rate": 2.031241465633771e-05, "loss": 0.8846909999847412, "step": 4061 }, { "epoch": 0.3326474833399871, "grad_norm": 0.19832509756088257, "learning_rate": 2.0310610140359254e-05, "loss": 0.8285926580429077, "step": 4062 }, { "epoch": 0.33272937587650603, "grad_norm": 0.19998326897621155, "learning_rate": 2.030880526316745e-05, "loss": 0.7034660577774048, "step": 4063 }, { "epoch": 0.332811268413025, "grad_norm": 0.19860784709453583, "learning_rate": 2.0307000024840753e-05, "loss": 0.8173413872718811, "step": 4064 }, { "epoch": 0.33289316094954396, "grad_norm": 0.1582118570804596, "learning_rate": 2.0305194425457616e-05, "loss": 0.9139061570167542, "step": 4065 }, { "epoch": 0.33297505348606293, "grad_norm": 0.19425052404403687, "learning_rate": 2.0303388465096528e-05, "loss": 0.8305050134658813, "step": 4066 }, { "epoch": 0.33305694602258185, "grad_norm": 0.14408493041992188, "learning_rate": 2.030158214383598e-05, "loss": 0.5420559644699097, "step": 4067 }, { "epoch": 0.3331388385591008, "grad_norm": 0.19711323082447052, "learning_rate": 2.029977546175447e-05, "loss": 0.9916682839393616, "step": 4068 }, { "epoch": 0.3332207310956198, "grad_norm": 0.19299691915512085, "learning_rate": 2.029796841893054e-05, "loss": 0.8007975816726685, "step": 4069 }, { "epoch": 0.33330262363213875, "grad_norm": 0.20652566850185394, "learning_rate": 2.0296161015442726e-05, "loss": 0.8380779027938843, "step": 4070 }, { "epoch": 0.33338451616865766, "grad_norm": 0.18891799449920654, "learning_rate": 2.0294353251369585e-05, "loss": 0.5312878489494324, "step": 4071 }, { "epoch": 0.33346640870517663, "grad_norm": 0.20972566306591034, "learning_rate": 2.029254512678969e-05, "loss": 0.9175806045532227, "step": 4072 }, { "epoch": 0.3335483012416956, "grad_norm": 0.2145683616399765, "learning_rate": 2.0290736641781628e-05, "loss": 0.9316257238388062, "step": 4073 }, { "epoch": 0.3336301937782145, "grad_norm": 0.1590670794248581, "learning_rate": 2.0288927796424008e-05, "loss": 0.8267413973808289, "step": 4074 }, { "epoch": 0.3337120863147335, "grad_norm": 0.1610410511493683, "learning_rate": 2.0287118590795442e-05, "loss": 0.8455234169960022, "step": 4075 }, { "epoch": 0.33379397885125245, "grad_norm": 0.16455750167369843, "learning_rate": 2.028530902497457e-05, "loss": 0.7639331817626953, "step": 4076 }, { "epoch": 0.3338758713877714, "grad_norm": 0.19437019526958466, "learning_rate": 2.0283499099040046e-05, "loss": 0.6027877926826477, "step": 4077 }, { "epoch": 0.33395776392429033, "grad_norm": 0.1702795922756195, "learning_rate": 2.028168881307053e-05, "loss": 0.713958740234375, "step": 4078 }, { "epoch": 0.3340396564608093, "grad_norm": 0.19411027431488037, "learning_rate": 2.0279878167144712e-05, "loss": 0.7649436593055725, "step": 4079 }, { "epoch": 0.33412154899732827, "grad_norm": 0.17687830328941345, "learning_rate": 2.0278067161341285e-05, "loss": 1.0156811475753784, "step": 4080 }, { "epoch": 0.33420344153384723, "grad_norm": 0.20212875306606293, "learning_rate": 2.0276255795738968e-05, "loss": 0.8082864284515381, "step": 4081 }, { "epoch": 0.33428533407036615, "grad_norm": 0.20079933106899261, "learning_rate": 2.027444407041648e-05, "loss": 0.7874143123626709, "step": 4082 }, { "epoch": 0.3343672266068851, "grad_norm": 0.17478986084461212, "learning_rate": 2.0272631985452576e-05, "loss": 0.7269238829612732, "step": 4083 }, { "epoch": 0.3344491191434041, "grad_norm": 0.210065096616745, "learning_rate": 2.0270819540926015e-05, "loss": 0.6287314891815186, "step": 4084 }, { "epoch": 0.334531011679923, "grad_norm": 0.17534174025058746, "learning_rate": 2.0269006736915572e-05, "loss": 0.9051300287246704, "step": 4085 }, { "epoch": 0.33461290421644196, "grad_norm": 0.21370923519134521, "learning_rate": 2.0267193573500034e-05, "loss": 0.7044405937194824, "step": 4086 }, { "epoch": 0.33469479675296093, "grad_norm": 0.3106364607810974, "learning_rate": 2.026538005075822e-05, "loss": 0.7187976241111755, "step": 4087 }, { "epoch": 0.3347766892894799, "grad_norm": 0.1656503528356552, "learning_rate": 2.026356616876894e-05, "loss": 0.9378139972686768, "step": 4088 }, { "epoch": 0.3348585818259988, "grad_norm": 0.17929284274578094, "learning_rate": 2.026175192761104e-05, "loss": 1.0935980081558228, "step": 4089 }, { "epoch": 0.3349404743625178, "grad_norm": 0.16197103261947632, "learning_rate": 2.025993732736338e-05, "loss": 0.5480328798294067, "step": 4090 }, { "epoch": 0.33502236689903675, "grad_norm": 0.2683809697628021, "learning_rate": 2.0258122368104817e-05, "loss": 0.5010266900062561, "step": 4091 }, { "epoch": 0.3351042594355557, "grad_norm": 0.19227901101112366, "learning_rate": 2.0256307049914246e-05, "loss": 0.9019798040390015, "step": 4092 }, { "epoch": 0.33518615197207463, "grad_norm": 0.20224416255950928, "learning_rate": 2.0254491372870564e-05, "loss": 1.3310216665267944, "step": 4093 }, { "epoch": 0.3352680445085936, "grad_norm": 0.1752881556749344, "learning_rate": 2.0252675337052687e-05, "loss": 0.6946004629135132, "step": 4094 }, { "epoch": 0.33534993704511257, "grad_norm": 0.23102502524852753, "learning_rate": 2.0250858942539552e-05, "loss": 0.947023868560791, "step": 4095 }, { "epoch": 0.3354318295816315, "grad_norm": 0.19129011034965515, "learning_rate": 2.024904218941011e-05, "loss": 0.751158595085144, "step": 4096 }, { "epoch": 0.33551372211815045, "grad_norm": 0.2030688226222992, "learning_rate": 2.0247225077743313e-05, "loss": 0.8773525357246399, "step": 4097 }, { "epoch": 0.3355956146546694, "grad_norm": 0.1738300919532776, "learning_rate": 2.0245407607618146e-05, "loss": 0.8313312530517578, "step": 4098 }, { "epoch": 0.3356775071911884, "grad_norm": 0.1841152310371399, "learning_rate": 2.0243589779113604e-05, "loss": 0.7006422877311707, "step": 4099 }, { "epoch": 0.3357593997277073, "grad_norm": 0.19686107337474823, "learning_rate": 2.02417715923087e-05, "loss": 0.6052550077438354, "step": 4100 }, { "epoch": 0.33584129226422627, "grad_norm": 0.17831183969974518, "learning_rate": 2.0239953047282457e-05, "loss": 0.6575282216072083, "step": 4101 }, { "epoch": 0.33592318480074523, "grad_norm": 0.17297640442848206, "learning_rate": 2.0238134144113916e-05, "loss": 0.8445392847061157, "step": 4102 }, { "epoch": 0.3360050773372642, "grad_norm": 0.17780667543411255, "learning_rate": 2.0236314882882132e-05, "loss": 0.6708532571792603, "step": 4103 }, { "epoch": 0.3360869698737831, "grad_norm": 0.1817796379327774, "learning_rate": 2.0234495263666185e-05, "loss": 0.9366111159324646, "step": 4104 }, { "epoch": 0.3361688624103021, "grad_norm": 0.2038915604352951, "learning_rate": 2.0232675286545156e-05, "loss": 1.074502944946289, "step": 4105 }, { "epoch": 0.33625075494682105, "grad_norm": 0.21009889245033264, "learning_rate": 2.023085495159815e-05, "loss": 0.8993433713912964, "step": 4106 }, { "epoch": 0.33633264748333996, "grad_norm": 0.19389379024505615, "learning_rate": 2.0229034258904293e-05, "loss": 1.0217398405075073, "step": 4107 }, { "epoch": 0.33641454001985893, "grad_norm": 0.17575329542160034, "learning_rate": 2.0227213208542704e-05, "loss": 1.0826812982559204, "step": 4108 }, { "epoch": 0.3364964325563779, "grad_norm": 0.19492943584918976, "learning_rate": 2.0225391800592553e-05, "loss": 0.7999382019042969, "step": 4109 }, { "epoch": 0.33657832509289687, "grad_norm": 0.18974758684635162, "learning_rate": 2.022357003513299e-05, "loss": 0.6127067804336548, "step": 4110 }, { "epoch": 0.3366602176294158, "grad_norm": 0.17256681621074677, "learning_rate": 2.0221747912243207e-05, "loss": 0.6307836174964905, "step": 4111 }, { "epoch": 0.33674211016593475, "grad_norm": 0.2065284252166748, "learning_rate": 2.0219925432002396e-05, "loss": 1.0226571559906006, "step": 4112 }, { "epoch": 0.3368240027024537, "grad_norm": 0.16375331580638885, "learning_rate": 2.0218102594489773e-05, "loss": 0.6260685920715332, "step": 4113 }, { "epoch": 0.3369058952389727, "grad_norm": 0.16487957537174225, "learning_rate": 2.0216279399784556e-05, "loss": 0.9201445579528809, "step": 4114 }, { "epoch": 0.3369877877754916, "grad_norm": 0.18548017740249634, "learning_rate": 2.0214455847966003e-05, "loss": 0.8532046675682068, "step": 4115 }, { "epoch": 0.33706968031201057, "grad_norm": 0.2213117927312851, "learning_rate": 2.021263193911336e-05, "loss": 1.012744426727295, "step": 4116 }, { "epoch": 0.33715157284852953, "grad_norm": 0.2301885336637497, "learning_rate": 2.021080767330591e-05, "loss": 0.7633083462715149, "step": 4117 }, { "epoch": 0.33723346538504845, "grad_norm": 0.22310608625411987, "learning_rate": 2.0208983050622944e-05, "loss": 0.7867302894592285, "step": 4118 }, { "epoch": 0.3373153579215674, "grad_norm": 0.1906222403049469, "learning_rate": 2.0207158071143758e-05, "loss": 0.7465484142303467, "step": 4119 }, { "epoch": 0.3373972504580864, "grad_norm": 0.22309747338294983, "learning_rate": 2.0205332734947685e-05, "loss": 0.7260332703590393, "step": 4120 }, { "epoch": 0.33747914299460535, "grad_norm": 0.20883961021900177, "learning_rate": 2.0203507042114058e-05, "loss": 1.0104247331619263, "step": 4121 }, { "epoch": 0.33756103553112426, "grad_norm": 0.180904358625412, "learning_rate": 2.020168099272222e-05, "loss": 0.97743821144104, "step": 4122 }, { "epoch": 0.33764292806764323, "grad_norm": 0.19980546832084656, "learning_rate": 2.019985458685155e-05, "loss": 1.0802273750305176, "step": 4123 }, { "epoch": 0.3377248206041622, "grad_norm": 0.1654147505760193, "learning_rate": 2.0198027824581427e-05, "loss": 0.8144475221633911, "step": 4124 }, { "epoch": 0.3378067131406811, "grad_norm": 0.21910542249679565, "learning_rate": 2.0196200705991246e-05, "loss": 0.8999002575874329, "step": 4125 }, { "epoch": 0.3378886056772001, "grad_norm": 0.19500267505645752, "learning_rate": 2.0194373231160428e-05, "loss": 0.8315403461456299, "step": 4126 }, { "epoch": 0.33797049821371905, "grad_norm": 0.19605717062950134, "learning_rate": 2.0192545400168397e-05, "loss": 0.9278091192245483, "step": 4127 }, { "epoch": 0.338052390750238, "grad_norm": 0.17522062361240387, "learning_rate": 2.01907172130946e-05, "loss": 0.6893069744110107, "step": 4128 }, { "epoch": 0.33813428328675693, "grad_norm": 0.20571212470531464, "learning_rate": 2.0188888670018504e-05, "loss": 0.7394000291824341, "step": 4129 }, { "epoch": 0.3382161758232759, "grad_norm": 0.20247095823287964, "learning_rate": 2.0187059771019577e-05, "loss": 1.0518132448196411, "step": 4130 }, { "epoch": 0.33829806835979487, "grad_norm": 0.1958884447813034, "learning_rate": 2.0185230516177307e-05, "loss": 1.017464280128479, "step": 4131 }, { "epoch": 0.33837996089631384, "grad_norm": 0.1891353875398636, "learning_rate": 2.018340090557121e-05, "loss": 0.8115677833557129, "step": 4132 }, { "epoch": 0.33846185343283275, "grad_norm": 0.2231300175189972, "learning_rate": 2.0181570939280803e-05, "loss": 0.970905065536499, "step": 4133 }, { "epoch": 0.3385437459693517, "grad_norm": 0.19984278082847595, "learning_rate": 2.0179740617385627e-05, "loss": 0.7906361818313599, "step": 4134 }, { "epoch": 0.3386256385058707, "grad_norm": 0.2086818665266037, "learning_rate": 2.0177909939965235e-05, "loss": 0.7698701620101929, "step": 4135 }, { "epoch": 0.3387075310423896, "grad_norm": 0.2006273865699768, "learning_rate": 2.0176078907099192e-05, "loss": 1.41153085231781, "step": 4136 }, { "epoch": 0.33878942357890857, "grad_norm": 0.173396497964859, "learning_rate": 2.0174247518867087e-05, "loss": 0.6647032499313354, "step": 4137 }, { "epoch": 0.33887131611542753, "grad_norm": 0.1706957072019577, "learning_rate": 2.0172415775348513e-05, "loss": 0.5756266117095947, "step": 4138 }, { "epoch": 0.3389532086519465, "grad_norm": 0.2908274233341217, "learning_rate": 2.017058367662309e-05, "loss": 0.7330725193023682, "step": 4139 }, { "epoch": 0.3390351011884654, "grad_norm": 0.23696134984493256, "learning_rate": 2.0168751222770456e-05, "loss": 1.2401033639907837, "step": 4140 }, { "epoch": 0.3391169937249844, "grad_norm": 0.18230324983596802, "learning_rate": 2.0166918413870243e-05, "loss": 0.9775770902633667, "step": 4141 }, { "epoch": 0.33919888626150335, "grad_norm": 0.18045051395893097, "learning_rate": 2.0165085250002123e-05, "loss": 0.8250633478164673, "step": 4142 }, { "epoch": 0.3392807787980223, "grad_norm": 0.21234093606472015, "learning_rate": 2.0163251731245762e-05, "loss": 1.0050899982452393, "step": 4143 }, { "epoch": 0.33936267133454123, "grad_norm": 0.21815265715122223, "learning_rate": 2.0161417857680866e-05, "loss": 0.9787946939468384, "step": 4144 }, { "epoch": 0.3394445638710602, "grad_norm": 0.19218075275421143, "learning_rate": 2.015958362938713e-05, "loss": 0.8710106015205383, "step": 4145 }, { "epoch": 0.33952645640757917, "grad_norm": 0.1919543296098709, "learning_rate": 2.0157749046444286e-05, "loss": 0.8262546062469482, "step": 4146 }, { "epoch": 0.3396083489440981, "grad_norm": 0.20037543773651123, "learning_rate": 2.015591410893207e-05, "loss": 0.8349179625511169, "step": 4147 }, { "epoch": 0.33969024148061705, "grad_norm": 0.19892346858978271, "learning_rate": 2.015407881693023e-05, "loss": 0.5769320726394653, "step": 4148 }, { "epoch": 0.339772134017136, "grad_norm": 0.1886955201625824, "learning_rate": 2.0152243170518545e-05, "loss": 0.5961681008338928, "step": 4149 }, { "epoch": 0.339854026553655, "grad_norm": 0.21364907920360565, "learning_rate": 2.0150407169776792e-05, "loss": 0.9519136548042297, "step": 4150 }, { "epoch": 0.3399359190901739, "grad_norm": 0.21014325320720673, "learning_rate": 2.0148570814784777e-05, "loss": 0.8765990138053894, "step": 4151 }, { "epoch": 0.34001781162669287, "grad_norm": 0.1934913992881775, "learning_rate": 2.014673410562231e-05, "loss": 0.7026876211166382, "step": 4152 }, { "epoch": 0.34009970416321184, "grad_norm": 0.20816461741924286, "learning_rate": 2.0144897042369228e-05, "loss": 0.6527125239372253, "step": 4153 }, { "epoch": 0.3401815966997308, "grad_norm": 0.21145157516002655, "learning_rate": 2.014305962510537e-05, "loss": 0.9180698990821838, "step": 4154 }, { "epoch": 0.3402634892362497, "grad_norm": 0.21570849418640137, "learning_rate": 2.0141221853910608e-05, "loss": 1.093538761138916, "step": 4155 }, { "epoch": 0.3403453817727687, "grad_norm": 0.16500669717788696, "learning_rate": 2.013938372886481e-05, "loss": 1.1335633993148804, "step": 4156 }, { "epoch": 0.34042727430928765, "grad_norm": 0.18976552784442902, "learning_rate": 2.013754525004787e-05, "loss": 0.7708661556243896, "step": 4157 }, { "epoch": 0.34050916684580657, "grad_norm": 0.18272224068641663, "learning_rate": 2.0135706417539698e-05, "loss": 0.6988851428031921, "step": 4158 }, { "epoch": 0.34059105938232553, "grad_norm": 0.1677340567111969, "learning_rate": 2.0133867231420216e-05, "loss": 1.115917682647705, "step": 4159 }, { "epoch": 0.3406729519188445, "grad_norm": 0.20438788831233978, "learning_rate": 2.0132027691769365e-05, "loss": 0.7363069653511047, "step": 4160 }, { "epoch": 0.34075484445536347, "grad_norm": 0.20224568247795105, "learning_rate": 2.0130187798667095e-05, "loss": 0.9210011959075928, "step": 4161 }, { "epoch": 0.3408367369918824, "grad_norm": 0.17178243398666382, "learning_rate": 2.012834755219338e-05, "loss": 0.6549001932144165, "step": 4162 }, { "epoch": 0.34091862952840135, "grad_norm": 0.16897203028202057, "learning_rate": 2.01265069524282e-05, "loss": 1.1297110319137573, "step": 4163 }, { "epoch": 0.3410005220649203, "grad_norm": 0.19492314755916595, "learning_rate": 2.012466599945156e-05, "loss": 0.8482123017311096, "step": 4164 }, { "epoch": 0.3410824146014393, "grad_norm": 0.16053996980190277, "learning_rate": 2.012282469334347e-05, "loss": 0.8623709678649902, "step": 4165 }, { "epoch": 0.3411643071379582, "grad_norm": 0.21516868472099304, "learning_rate": 2.0120983034183966e-05, "loss": 0.9922193884849548, "step": 4166 }, { "epoch": 0.34124619967447717, "grad_norm": 0.2561231553554535, "learning_rate": 2.0119141022053093e-05, "loss": 0.7635040283203125, "step": 4167 }, { "epoch": 0.34132809221099614, "grad_norm": 0.1531168520450592, "learning_rate": 2.01172986570309e-05, "loss": 0.8474525213241577, "step": 4168 }, { "epoch": 0.34140998474751505, "grad_norm": 0.18329504132270813, "learning_rate": 2.0115455939197485e-05, "loss": 0.9704961180686951, "step": 4169 }, { "epoch": 0.341491877284034, "grad_norm": 0.416801393032074, "learning_rate": 2.0113612868632927e-05, "loss": 0.8646448254585266, "step": 4170 }, { "epoch": 0.341573769820553, "grad_norm": 0.16965526342391968, "learning_rate": 2.011176944541734e-05, "loss": 0.6681275367736816, "step": 4171 }, { "epoch": 0.34165566235707195, "grad_norm": 0.20634815096855164, "learning_rate": 2.0109925669630835e-05, "loss": 1.004655361175537, "step": 4172 }, { "epoch": 0.34173755489359087, "grad_norm": 0.17234119772911072, "learning_rate": 2.010808154135356e-05, "loss": 0.9319829940795898, "step": 4173 }, { "epoch": 0.34181944743010984, "grad_norm": 0.18674084544181824, "learning_rate": 2.010623706066567e-05, "loss": 0.9745858907699585, "step": 4174 }, { "epoch": 0.3419013399666288, "grad_norm": 0.1892077475786209, "learning_rate": 2.010439222764733e-05, "loss": 0.9484982490539551, "step": 4175 }, { "epoch": 0.34198323250314777, "grad_norm": 0.16572999954223633, "learning_rate": 2.0102547042378723e-05, "loss": 0.8517343401908875, "step": 4176 }, { "epoch": 0.3420651250396667, "grad_norm": 0.1920069009065628, "learning_rate": 2.010070150494005e-05, "loss": 0.9989929795265198, "step": 4177 }, { "epoch": 0.34214701757618565, "grad_norm": 0.19334693253040314, "learning_rate": 2.0098855615411525e-05, "loss": 0.9607858061790466, "step": 4178 }, { "epoch": 0.3422289101127046, "grad_norm": 0.18254682421684265, "learning_rate": 2.009700937387338e-05, "loss": 0.8767298460006714, "step": 4179 }, { "epoch": 0.34231080264922353, "grad_norm": 0.2217516452074051, "learning_rate": 2.0095162780405857e-05, "loss": 0.6976207494735718, "step": 4180 }, { "epoch": 0.3423926951857425, "grad_norm": 0.19698381423950195, "learning_rate": 2.0093315835089217e-05, "loss": 0.6450433731079102, "step": 4181 }, { "epoch": 0.34247458772226147, "grad_norm": 0.2059532254934311, "learning_rate": 2.009146853800374e-05, "loss": 0.5354965329170227, "step": 4182 }, { "epoch": 0.34255648025878044, "grad_norm": 0.1903630495071411, "learning_rate": 2.008962088922971e-05, "loss": 1.2999749183654785, "step": 4183 }, { "epoch": 0.34263837279529935, "grad_norm": 0.18949788808822632, "learning_rate": 2.0087772888847438e-05, "loss": 1.024554967880249, "step": 4184 }, { "epoch": 0.3427202653318183, "grad_norm": 0.177216038107872, "learning_rate": 2.0085924536937247e-05, "loss": 1.057919979095459, "step": 4185 }, { "epoch": 0.3428021578683373, "grad_norm": 0.18121379613876343, "learning_rate": 2.0084075833579473e-05, "loss": 0.9753561019897461, "step": 4186 }, { "epoch": 0.3428840504048562, "grad_norm": 0.2093733847141266, "learning_rate": 2.0082226778854466e-05, "loss": 0.8342843055725098, "step": 4187 }, { "epoch": 0.34296594294137517, "grad_norm": 0.1915922313928604, "learning_rate": 2.008037737284259e-05, "loss": 1.049103021621704, "step": 4188 }, { "epoch": 0.34304783547789414, "grad_norm": 0.16693562269210815, "learning_rate": 2.007852761562424e-05, "loss": 0.9955962896347046, "step": 4189 }, { "epoch": 0.3431297280144131, "grad_norm": 0.240909606218338, "learning_rate": 2.0076677507279803e-05, "loss": 1.1978267431259155, "step": 4190 }, { "epoch": 0.343211620550932, "grad_norm": 0.16977940499782562, "learning_rate": 2.0074827047889698e-05, "loss": 0.7878450751304626, "step": 4191 }, { "epoch": 0.343293513087451, "grad_norm": 0.18611086905002594, "learning_rate": 2.0072976237534354e-05, "loss": 0.9752344489097595, "step": 4192 }, { "epoch": 0.34337540562396995, "grad_norm": 0.22668877243995667, "learning_rate": 2.0071125076294206e-05, "loss": 0.7601072192192078, "step": 4193 }, { "epoch": 0.3434572981604889, "grad_norm": 0.17141306400299072, "learning_rate": 2.0069273564249724e-05, "loss": 0.6256403923034668, "step": 4194 }, { "epoch": 0.34353919069700783, "grad_norm": 0.1692526787519455, "learning_rate": 2.0067421701481382e-05, "loss": 0.5932313799858093, "step": 4195 }, { "epoch": 0.3436210832335268, "grad_norm": 0.17114360630512238, "learning_rate": 2.006556948806966e-05, "loss": 0.8519042134284973, "step": 4196 }, { "epoch": 0.34370297577004577, "grad_norm": 0.16610580682754517, "learning_rate": 2.0063716924095066e-05, "loss": 0.6596360206604004, "step": 4197 }, { "epoch": 0.3437848683065647, "grad_norm": 0.2002793848514557, "learning_rate": 2.0061864009638127e-05, "loss": 1.0652709007263184, "step": 4198 }, { "epoch": 0.34386676084308365, "grad_norm": 0.1663132905960083, "learning_rate": 2.0060010744779372e-05, "loss": 0.9694660305976868, "step": 4199 }, { "epoch": 0.3439486533796026, "grad_norm": 0.19559408724308014, "learning_rate": 2.0058157129599357e-05, "loss": 0.7791293263435364, "step": 4200 }, { "epoch": 0.3440305459161216, "grad_norm": 0.17432673275470734, "learning_rate": 2.005630316417864e-05, "loss": 0.8515185117721558, "step": 4201 }, { "epoch": 0.3441124384526405, "grad_norm": 0.16162970662117004, "learning_rate": 2.0054448848597807e-05, "loss": 0.542104959487915, "step": 4202 }, { "epoch": 0.34419433098915947, "grad_norm": 0.1894133985042572, "learning_rate": 2.0052594182937455e-05, "loss": 1.1690123081207275, "step": 4203 }, { "epoch": 0.34427622352567844, "grad_norm": 0.2007523626089096, "learning_rate": 2.0050739167278193e-05, "loss": 0.8925947546958923, "step": 4204 }, { "epoch": 0.3443581160621974, "grad_norm": 0.21960397064685822, "learning_rate": 2.0048883801700646e-05, "loss": 0.8110586404800415, "step": 4205 }, { "epoch": 0.3444400085987163, "grad_norm": 0.22228048741817474, "learning_rate": 2.0047028086285457e-05, "loss": 0.9946169853210449, "step": 4206 }, { "epoch": 0.3445219011352353, "grad_norm": 0.1810937523841858, "learning_rate": 2.0045172021113285e-05, "loss": 0.7024261355400085, "step": 4207 }, { "epoch": 0.34460379367175425, "grad_norm": 0.42427799105644226, "learning_rate": 2.0043315606264807e-05, "loss": 0.8590747714042664, "step": 4208 }, { "epoch": 0.34468568620827317, "grad_norm": 0.20287349820137024, "learning_rate": 2.0041458841820696e-05, "loss": 0.9089409708976746, "step": 4209 }, { "epoch": 0.34476757874479214, "grad_norm": 0.17215101420879364, "learning_rate": 2.003960172786167e-05, "loss": 0.555267333984375, "step": 4210 }, { "epoch": 0.3448494712813111, "grad_norm": 0.18266358971595764, "learning_rate": 2.0037744264468436e-05, "loss": 0.9355218410491943, "step": 4211 }, { "epoch": 0.3449313638178301, "grad_norm": 0.20984376966953278, "learning_rate": 2.0035886451721736e-05, "loss": 0.8429971933364868, "step": 4212 }, { "epoch": 0.345013256354349, "grad_norm": 0.18971949815750122, "learning_rate": 2.003402828970231e-05, "loss": 0.6979826092720032, "step": 4213 }, { "epoch": 0.34509514889086795, "grad_norm": 0.1872001439332962, "learning_rate": 2.0032169778490926e-05, "loss": 0.7802934050559998, "step": 4214 }, { "epoch": 0.3451770414273869, "grad_norm": 0.16957956552505493, "learning_rate": 2.003031091816836e-05, "loss": 0.6597304940223694, "step": 4215 }, { "epoch": 0.3452589339639059, "grad_norm": 0.16619089245796204, "learning_rate": 2.0028451708815408e-05, "loss": 0.5896395444869995, "step": 4216 }, { "epoch": 0.3453408265004248, "grad_norm": 0.18357901275157928, "learning_rate": 2.0026592150512876e-05, "loss": 0.9305190443992615, "step": 4217 }, { "epoch": 0.34542271903694377, "grad_norm": 0.19903860986232758, "learning_rate": 2.0024732243341594e-05, "loss": 0.7996610403060913, "step": 4218 }, { "epoch": 0.34550461157346274, "grad_norm": 0.23519916832447052, "learning_rate": 2.002287198738239e-05, "loss": 0.9352813959121704, "step": 4219 }, { "epoch": 0.34558650410998165, "grad_norm": 0.19062447547912598, "learning_rate": 2.0021011382716134e-05, "loss": 0.7166861891746521, "step": 4220 }, { "epoch": 0.3456683966465006, "grad_norm": 0.19423682987689972, "learning_rate": 2.0019150429423688e-05, "loss": 0.8326184153556824, "step": 4221 }, { "epoch": 0.3457502891830196, "grad_norm": 0.1745479851961136, "learning_rate": 2.0017289127585926e-05, "loss": 0.7536429762840271, "step": 4222 }, { "epoch": 0.34583218171953856, "grad_norm": 0.18591739237308502, "learning_rate": 2.0015427477283766e-05, "loss": 0.8680458068847656, "step": 4223 }, { "epoch": 0.34591407425605747, "grad_norm": 0.1790480762720108, "learning_rate": 2.001356547859811e-05, "loss": 0.7651387453079224, "step": 4224 }, { "epoch": 0.34599596679257644, "grad_norm": 0.20816820859909058, "learning_rate": 2.0011703131609897e-05, "loss": 0.9094093441963196, "step": 4225 }, { "epoch": 0.3460778593290954, "grad_norm": 0.15505242347717285, "learning_rate": 2.0009840436400067e-05, "loss": 0.9693399667739868, "step": 4226 }, { "epoch": 0.3461597518656144, "grad_norm": 0.2405937910079956, "learning_rate": 2.000797739304958e-05, "loss": 0.9961382150650024, "step": 4227 }, { "epoch": 0.3462416444021333, "grad_norm": 0.18022936582565308, "learning_rate": 2.000611400163941e-05, "loss": 0.822860598564148, "step": 4228 }, { "epoch": 0.34632353693865225, "grad_norm": 0.20053011178970337, "learning_rate": 2.000425026225056e-05, "loss": 1.0446475744247437, "step": 4229 }, { "epoch": 0.3464054294751712, "grad_norm": 0.27562493085861206, "learning_rate": 2.000238617496402e-05, "loss": 1.021561622619629, "step": 4230 }, { "epoch": 0.34648732201169014, "grad_norm": 0.19245405495166779, "learning_rate": 2.0000521739860816e-05, "loss": 0.8204619288444519, "step": 4231 }, { "epoch": 0.3465692145482091, "grad_norm": 0.16932040452957153, "learning_rate": 1.999865695702199e-05, "loss": 0.558012068271637, "step": 4232 }, { "epoch": 0.34665110708472807, "grad_norm": 0.2020881325006485, "learning_rate": 1.9996791826528584e-05, "loss": 1.2564321756362915, "step": 4233 }, { "epoch": 0.34673299962124704, "grad_norm": 0.19408632814884186, "learning_rate": 1.9994926348461675e-05, "loss": 0.592451810836792, "step": 4234 }, { "epoch": 0.34681489215776595, "grad_norm": 0.18360961973667145, "learning_rate": 1.9993060522902333e-05, "loss": 1.1483325958251953, "step": 4235 }, { "epoch": 0.3468967846942849, "grad_norm": 0.17517441511154175, "learning_rate": 1.999119434993166e-05, "loss": 0.7502737045288086, "step": 4236 }, { "epoch": 0.3469786772308039, "grad_norm": 0.18871162831783295, "learning_rate": 1.998932782963077e-05, "loss": 0.7700850367546082, "step": 4237 }, { "epoch": 0.34706056976732286, "grad_norm": 0.1738772690296173, "learning_rate": 1.9987460962080786e-05, "loss": 0.9592525362968445, "step": 4238 }, { "epoch": 0.34714246230384177, "grad_norm": 0.18718184530735016, "learning_rate": 1.998559374736285e-05, "loss": 0.575971245765686, "step": 4239 }, { "epoch": 0.34722435484036074, "grad_norm": 0.1919238418340683, "learning_rate": 1.9983726185558116e-05, "loss": 1.109427809715271, "step": 4240 }, { "epoch": 0.3473062473768797, "grad_norm": 0.15160447359085083, "learning_rate": 1.9981858276747766e-05, "loss": 0.6453830599784851, "step": 4241 }, { "epoch": 0.3473881399133986, "grad_norm": 0.22129864990711212, "learning_rate": 1.9979990021012978e-05, "loss": 0.8160969018936157, "step": 4242 }, { "epoch": 0.3474700324499176, "grad_norm": 0.1703474521636963, "learning_rate": 1.9978121418434952e-05, "loss": 1.0769004821777344, "step": 4243 }, { "epoch": 0.34755192498643656, "grad_norm": 0.19220326840877533, "learning_rate": 1.9976252469094916e-05, "loss": 0.6733195185661316, "step": 4244 }, { "epoch": 0.3476338175229555, "grad_norm": 0.15140116214752197, "learning_rate": 1.9974383173074093e-05, "loss": 0.8357107043266296, "step": 4245 }, { "epoch": 0.34771571005947444, "grad_norm": 0.21509481966495514, "learning_rate": 1.9972513530453734e-05, "loss": 0.8916155695915222, "step": 4246 }, { "epoch": 0.3477976025959934, "grad_norm": 0.2284659892320633, "learning_rate": 1.99706435413151e-05, "loss": 1.1279882192611694, "step": 4247 }, { "epoch": 0.3478794951325124, "grad_norm": 0.19866876304149628, "learning_rate": 1.996877320573947e-05, "loss": 0.9605868458747864, "step": 4248 }, { "epoch": 0.34796138766903134, "grad_norm": 0.19072696566581726, "learning_rate": 1.9966902523808135e-05, "loss": 0.9076762795448303, "step": 4249 }, { "epoch": 0.34804328020555025, "grad_norm": 0.20904573798179626, "learning_rate": 1.99650314956024e-05, "loss": 1.008750081062317, "step": 4250 }, { "epoch": 0.3481251727420692, "grad_norm": 0.20247584581375122, "learning_rate": 1.9963160121203594e-05, "loss": 0.8050978779792786, "step": 4251 }, { "epoch": 0.3482070652785882, "grad_norm": 0.19660364091396332, "learning_rate": 1.9961288400693054e-05, "loss": 0.8156879544258118, "step": 4252 }, { "epoch": 0.3482889578151071, "grad_norm": 0.1963547319173813, "learning_rate": 1.9959416334152127e-05, "loss": 0.7328289747238159, "step": 4253 }, { "epoch": 0.34837085035162607, "grad_norm": 0.1964053213596344, "learning_rate": 1.9957543921662182e-05, "loss": 0.6124880909919739, "step": 4254 }, { "epoch": 0.34845274288814504, "grad_norm": 0.19500236213207245, "learning_rate": 1.9955671163304605e-05, "loss": 1.0373597145080566, "step": 4255 }, { "epoch": 0.348534635424664, "grad_norm": 0.1970420926809311, "learning_rate": 1.9953798059160792e-05, "loss": 0.666071355342865, "step": 4256 }, { "epoch": 0.3486165279611829, "grad_norm": 0.1643259972333908, "learning_rate": 1.9951924609312158e-05, "loss": 0.6159888505935669, "step": 4257 }, { "epoch": 0.3486984204977019, "grad_norm": 0.15608172118663788, "learning_rate": 1.9950050813840125e-05, "loss": 0.8564125299453735, "step": 4258 }, { "epoch": 0.34878031303422086, "grad_norm": 0.17953112721443176, "learning_rate": 1.994817667282614e-05, "loss": 0.8252280950546265, "step": 4259 }, { "epoch": 0.34886220557073977, "grad_norm": 0.17098568379878998, "learning_rate": 1.9946302186351668e-05, "loss": 0.72845059633255, "step": 4260 }, { "epoch": 0.34894409810725874, "grad_norm": 0.1770675927400589, "learning_rate": 1.9944427354498168e-05, "loss": 0.7689481973648071, "step": 4261 }, { "epoch": 0.3490259906437777, "grad_norm": 0.1830819845199585, "learning_rate": 1.9942552177347134e-05, "loss": 0.869820237159729, "step": 4262 }, { "epoch": 0.3491078831802967, "grad_norm": 0.16418322920799255, "learning_rate": 1.9940676654980073e-05, "loss": 0.7928192019462585, "step": 4263 }, { "epoch": 0.3491897757168156, "grad_norm": 0.20262765884399414, "learning_rate": 1.9938800787478497e-05, "loss": 1.225271224975586, "step": 4264 }, { "epoch": 0.34927166825333456, "grad_norm": 0.2257627695798874, "learning_rate": 1.993692457492394e-05, "loss": 1.019595742225647, "step": 4265 }, { "epoch": 0.3493535607898535, "grad_norm": 0.18162813782691956, "learning_rate": 1.9935048017397954e-05, "loss": 0.9958438277244568, "step": 4266 }, { "epoch": 0.3494354533263725, "grad_norm": 0.16226407885551453, "learning_rate": 1.9933171114982095e-05, "loss": 0.7579657435417175, "step": 4267 }, { "epoch": 0.3495173458628914, "grad_norm": 0.21937301754951477, "learning_rate": 1.993129386775795e-05, "loss": 0.7065722346305847, "step": 4268 }, { "epoch": 0.3495992383994104, "grad_norm": 0.20830292999744415, "learning_rate": 1.9929416275807104e-05, "loss": 0.7484489679336548, "step": 4269 }, { "epoch": 0.34968113093592934, "grad_norm": 0.19231916964054108, "learning_rate": 1.9927538339211165e-05, "loss": 1.1127445697784424, "step": 4270 }, { "epoch": 0.34976302347244825, "grad_norm": 0.21205741167068481, "learning_rate": 1.9925660058051764e-05, "loss": 1.2230138778686523, "step": 4271 }, { "epoch": 0.3498449160089672, "grad_norm": 0.13818219304084778, "learning_rate": 1.9923781432410535e-05, "loss": 0.5718998908996582, "step": 4272 }, { "epoch": 0.3499268085454862, "grad_norm": 0.1705896109342575, "learning_rate": 1.9921902462369125e-05, "loss": 0.5857288837432861, "step": 4273 }, { "epoch": 0.35000870108200516, "grad_norm": 0.16927361488342285, "learning_rate": 1.9920023148009208e-05, "loss": 1.0685962438583374, "step": 4274 }, { "epoch": 0.35009059361852407, "grad_norm": 0.19389848411083221, "learning_rate": 1.9918143489412465e-05, "loss": 0.7640177607536316, "step": 4275 }, { "epoch": 0.35017248615504304, "grad_norm": 0.18173375725746155, "learning_rate": 1.9916263486660588e-05, "loss": 0.6967271566390991, "step": 4276 }, { "epoch": 0.350254378691562, "grad_norm": 0.18806962668895721, "learning_rate": 1.99143831398353e-05, "loss": 0.903894305229187, "step": 4277 }, { "epoch": 0.350336271228081, "grad_norm": 0.19347622990608215, "learning_rate": 1.9912502449018327e-05, "loss": 0.9440847635269165, "step": 4278 }, { "epoch": 0.3504181637645999, "grad_norm": 0.1341981738805771, "learning_rate": 1.99106214142914e-05, "loss": 0.7661923766136169, "step": 4279 }, { "epoch": 0.35050005630111886, "grad_norm": 0.20395943522453308, "learning_rate": 1.990874003573629e-05, "loss": 0.9385321140289307, "step": 4280 }, { "epoch": 0.3505819488376378, "grad_norm": 0.1743140071630478, "learning_rate": 1.9906858313434763e-05, "loss": 0.8520224094390869, "step": 4281 }, { "epoch": 0.35066384137415674, "grad_norm": 0.1934700608253479, "learning_rate": 1.990497624746861e-05, "loss": 1.0494107007980347, "step": 4282 }, { "epoch": 0.3507457339106757, "grad_norm": 0.19144435226917267, "learning_rate": 1.9903093837919632e-05, "loss": 0.962820291519165, "step": 4283 }, { "epoch": 0.3508276264471947, "grad_norm": 0.15662795305252075, "learning_rate": 1.9901211084869644e-05, "loss": 0.5281029939651489, "step": 4284 }, { "epoch": 0.35090951898371364, "grad_norm": 0.23295748233795166, "learning_rate": 1.9899327988400478e-05, "loss": 0.6705214381217957, "step": 4285 }, { "epoch": 0.35099141152023255, "grad_norm": 0.18341365456581116, "learning_rate": 1.9897444548593984e-05, "loss": 0.8179709911346436, "step": 4286 }, { "epoch": 0.3510733040567515, "grad_norm": 0.1692945808172226, "learning_rate": 1.9895560765532017e-05, "loss": 0.8284463286399841, "step": 4287 }, { "epoch": 0.3511551965932705, "grad_norm": 0.17492544651031494, "learning_rate": 1.9893676639296467e-05, "loss": 1.2347835302352905, "step": 4288 }, { "epoch": 0.35123708912978946, "grad_norm": 0.1503075808286667, "learning_rate": 1.989179216996922e-05, "loss": 0.7111825942993164, "step": 4289 }, { "epoch": 0.35131898166630837, "grad_norm": 0.16423186659812927, "learning_rate": 1.9889907357632174e-05, "loss": 0.8143547773361206, "step": 4290 }, { "epoch": 0.35140087420282734, "grad_norm": 0.16754703223705292, "learning_rate": 1.9888022202367264e-05, "loss": 0.9910504221916199, "step": 4291 }, { "epoch": 0.3514827667393463, "grad_norm": 0.17540523409843445, "learning_rate": 1.988613670425642e-05, "loss": 0.8069806098937988, "step": 4292 }, { "epoch": 0.3515646592758652, "grad_norm": 0.15714527666568756, "learning_rate": 1.988425086338159e-05, "loss": 0.7547283172607422, "step": 4293 }, { "epoch": 0.3516465518123842, "grad_norm": 0.21366292238235474, "learning_rate": 1.9882364679824747e-05, "loss": 0.6168365478515625, "step": 4294 }, { "epoch": 0.35172844434890316, "grad_norm": 0.2177937626838684, "learning_rate": 1.988047815366787e-05, "loss": 1.053797960281372, "step": 4295 }, { "epoch": 0.3518103368854221, "grad_norm": 0.1770176887512207, "learning_rate": 1.9878591284992955e-05, "loss": 0.7101513147354126, "step": 4296 }, { "epoch": 0.35189222942194104, "grad_norm": 0.1441406011581421, "learning_rate": 1.9876704073882015e-05, "loss": 0.7538225054740906, "step": 4297 }, { "epoch": 0.35197412195846, "grad_norm": 0.19140158593654633, "learning_rate": 1.9874816520417072e-05, "loss": 1.0232203006744385, "step": 4298 }, { "epoch": 0.352056014494979, "grad_norm": 0.16455036401748657, "learning_rate": 1.987292862468017e-05, "loss": 0.7590416073799133, "step": 4299 }, { "epoch": 0.35213790703149794, "grad_norm": 0.2422240972518921, "learning_rate": 1.9871040386753366e-05, "loss": 0.9051753282546997, "step": 4300 }, { "epoch": 0.35221979956801686, "grad_norm": 0.17255592346191406, "learning_rate": 1.9869151806718725e-05, "loss": 0.6966418623924255, "step": 4301 }, { "epoch": 0.3523016921045358, "grad_norm": 0.19271442294120789, "learning_rate": 1.9867262884658335e-05, "loss": 1.1196495294570923, "step": 4302 }, { "epoch": 0.3523835846410548, "grad_norm": 0.17691737413406372, "learning_rate": 1.9865373620654302e-05, "loss": 0.6582580208778381, "step": 4303 }, { "epoch": 0.3524654771775737, "grad_norm": 0.22815318405628204, "learning_rate": 1.986348401478873e-05, "loss": 0.7224811911582947, "step": 4304 }, { "epoch": 0.3525473697140927, "grad_norm": 0.19707545638084412, "learning_rate": 1.986159406714376e-05, "loss": 0.8755302429199219, "step": 4305 }, { "epoch": 0.35262926225061164, "grad_norm": 0.27530646324157715, "learning_rate": 1.9859703777801533e-05, "loss": 0.8659212589263916, "step": 4306 }, { "epoch": 0.3527111547871306, "grad_norm": 0.1753542423248291, "learning_rate": 1.9857813146844204e-05, "loss": 0.6943292021751404, "step": 4307 }, { "epoch": 0.3527930473236495, "grad_norm": 0.22514866292476654, "learning_rate": 1.9855922174353954e-05, "loss": 1.1258009672164917, "step": 4308 }, { "epoch": 0.3528749398601685, "grad_norm": 0.1806892454624176, "learning_rate": 1.9854030860412973e-05, "loss": 0.6063889861106873, "step": 4309 }, { "epoch": 0.35295683239668746, "grad_norm": 0.18263940513134003, "learning_rate": 1.9852139205103457e-05, "loss": 1.044600009918213, "step": 4310 }, { "epoch": 0.3530387249332064, "grad_norm": 0.22632959485054016, "learning_rate": 1.9850247208507633e-05, "loss": 1.0330491065979004, "step": 4311 }, { "epoch": 0.35312061746972534, "grad_norm": 0.2883365750312805, "learning_rate": 1.9848354870707734e-05, "loss": 0.7334637641906738, "step": 4312 }, { "epoch": 0.3532025100062443, "grad_norm": 0.1803656965494156, "learning_rate": 1.9846462191786003e-05, "loss": 0.9466259479522705, "step": 4313 }, { "epoch": 0.3532844025427633, "grad_norm": 0.18818654119968414, "learning_rate": 1.984456917182471e-05, "loss": 0.5185015797615051, "step": 4314 }, { "epoch": 0.3533662950792822, "grad_norm": 0.17948360741138458, "learning_rate": 1.9842675810906135e-05, "loss": 0.968687117099762, "step": 4315 }, { "epoch": 0.35344818761580116, "grad_norm": 0.15650972723960876, "learning_rate": 1.984078210911256e-05, "loss": 0.5491303205490112, "step": 4316 }, { "epoch": 0.3535300801523201, "grad_norm": 0.1698748767375946, "learning_rate": 1.9838888066526308e-05, "loss": 0.7467520236968994, "step": 4317 }, { "epoch": 0.3536119726888391, "grad_norm": 0.15565535426139832, "learning_rate": 1.983699368322969e-05, "loss": 0.8507865071296692, "step": 4318 }, { "epoch": 0.353693865225358, "grad_norm": 0.18987567722797394, "learning_rate": 1.9835098959305054e-05, "loss": 1.0501869916915894, "step": 4319 }, { "epoch": 0.353775757761877, "grad_norm": 0.18029314279556274, "learning_rate": 1.9833203894834743e-05, "loss": 0.7362135052680969, "step": 4320 }, { "epoch": 0.35385765029839594, "grad_norm": 0.17571952939033508, "learning_rate": 1.983130848990113e-05, "loss": 0.6698356866836548, "step": 4321 }, { "epoch": 0.35393954283491486, "grad_norm": 0.1656728982925415, "learning_rate": 1.9829412744586592e-05, "loss": 0.8587588667869568, "step": 4322 }, { "epoch": 0.3540214353714338, "grad_norm": 0.18742333352565765, "learning_rate": 1.982751665897353e-05, "loss": 0.9078229069709778, "step": 4323 }, { "epoch": 0.3541033279079528, "grad_norm": 0.17899560928344727, "learning_rate": 1.9825620233144355e-05, "loss": 0.7761216759681702, "step": 4324 }, { "epoch": 0.35418522044447176, "grad_norm": 0.18317992985248566, "learning_rate": 1.9823723467181494e-05, "loss": 1.0347492694854736, "step": 4325 }, { "epoch": 0.3542671129809907, "grad_norm": 0.16234563291072845, "learning_rate": 1.982182636116739e-05, "loss": 0.45347118377685547, "step": 4326 }, { "epoch": 0.35434900551750964, "grad_norm": 0.18607275187969208, "learning_rate": 1.9819928915184493e-05, "loss": 0.6735910773277283, "step": 4327 }, { "epoch": 0.3544308980540286, "grad_norm": 0.18664667010307312, "learning_rate": 1.981803112931528e-05, "loss": 0.9249265193939209, "step": 4328 }, { "epoch": 0.3545127905905476, "grad_norm": 0.17043016850948334, "learning_rate": 1.981613300364223e-05, "loss": 0.6687347888946533, "step": 4329 }, { "epoch": 0.3545946831270665, "grad_norm": 0.19587352871894836, "learning_rate": 1.981423453824785e-05, "loss": 1.0197314023971558, "step": 4330 }, { "epoch": 0.35467657566358546, "grad_norm": 0.17299440503120422, "learning_rate": 1.9812335733214652e-05, "loss": 0.6650671362876892, "step": 4331 }, { "epoch": 0.3547584682001044, "grad_norm": 0.18933191895484924, "learning_rate": 1.9810436588625168e-05, "loss": 1.0059159994125366, "step": 4332 }, { "epoch": 0.35484036073662334, "grad_norm": 0.18306343257427216, "learning_rate": 1.9808537104561938e-05, "loss": 0.7553528547286987, "step": 4333 }, { "epoch": 0.3549222532731423, "grad_norm": 0.1626724749803543, "learning_rate": 1.980663728110753e-05, "loss": 0.6667891144752502, "step": 4334 }, { "epoch": 0.3550041458096613, "grad_norm": 0.1766515076160431, "learning_rate": 1.9804737118344504e-05, "loss": 0.8149048686027527, "step": 4335 }, { "epoch": 0.35508603834618024, "grad_norm": 0.1662658303976059, "learning_rate": 1.9802836616355465e-05, "loss": 0.6821157336235046, "step": 4336 }, { "epoch": 0.35516793088269916, "grad_norm": 0.15897555649280548, "learning_rate": 1.9800935775223006e-05, "loss": 0.9281177520751953, "step": 4337 }, { "epoch": 0.3552498234192181, "grad_norm": 0.20516298711299896, "learning_rate": 1.9799034595029747e-05, "loss": 0.5188025236129761, "step": 4338 }, { "epoch": 0.3553317159557371, "grad_norm": 0.1998523324728012, "learning_rate": 1.9797133075858327e-05, "loss": 0.7048397660255432, "step": 4339 }, { "epoch": 0.35541360849225606, "grad_norm": 0.1746547520160675, "learning_rate": 1.9795231217791386e-05, "loss": 0.7627096772193909, "step": 4340 }, { "epoch": 0.355495501028775, "grad_norm": 0.21026314795017242, "learning_rate": 1.979332902091159e-05, "loss": 0.7111027836799622, "step": 4341 }, { "epoch": 0.35557739356529394, "grad_norm": 0.18885231018066406, "learning_rate": 1.9791426485301618e-05, "loss": 0.7639370560646057, "step": 4342 }, { "epoch": 0.3556592861018129, "grad_norm": 0.16572143137454987, "learning_rate": 1.978952361104416e-05, "loss": 0.914178192615509, "step": 4343 }, { "epoch": 0.3557411786383318, "grad_norm": 0.18069341778755188, "learning_rate": 1.9787620398221923e-05, "loss": 0.7422683238983154, "step": 4344 }, { "epoch": 0.3558230711748508, "grad_norm": 0.16477006673812866, "learning_rate": 1.9785716846917632e-05, "loss": 0.8486352562904358, "step": 4345 }, { "epoch": 0.35590496371136976, "grad_norm": 0.17174987494945526, "learning_rate": 1.9783812957214015e-05, "loss": 0.8172500133514404, "step": 4346 }, { "epoch": 0.3559868562478887, "grad_norm": 0.2293248474597931, "learning_rate": 1.978190872919383e-05, "loss": 0.970594048500061, "step": 4347 }, { "epoch": 0.35606874878440764, "grad_norm": 0.18764536082744598, "learning_rate": 1.978000416293984e-05, "loss": 0.905680239200592, "step": 4348 }, { "epoch": 0.3561506413209266, "grad_norm": 0.16522447764873505, "learning_rate": 1.9778099258534825e-05, "loss": 0.8441263437271118, "step": 4349 }, { "epoch": 0.3562325338574456, "grad_norm": 0.1701192557811737, "learning_rate": 1.977619401606158e-05, "loss": 0.6271524429321289, "step": 4350 }, { "epoch": 0.35631442639396455, "grad_norm": 0.19550827145576477, "learning_rate": 1.9774288435602917e-05, "loss": 0.8148967027664185, "step": 4351 }, { "epoch": 0.35639631893048346, "grad_norm": 0.2333151400089264, "learning_rate": 1.9772382517241653e-05, "loss": 0.8291774392127991, "step": 4352 }, { "epoch": 0.3564782114670024, "grad_norm": 0.20872347056865692, "learning_rate": 1.977047626106064e-05, "loss": 0.7301253080368042, "step": 4353 }, { "epoch": 0.3565601040035214, "grad_norm": 0.15973712503910065, "learning_rate": 1.9768569667142723e-05, "loss": 0.5338894128799438, "step": 4354 }, { "epoch": 0.3566419965400403, "grad_norm": 0.19325588643550873, "learning_rate": 1.9766662735570768e-05, "loss": 0.9150426983833313, "step": 4355 }, { "epoch": 0.3567238890765593, "grad_norm": 0.21037428081035614, "learning_rate": 1.9764755466427665e-05, "loss": 1.0235742330551147, "step": 4356 }, { "epoch": 0.35680578161307824, "grad_norm": 0.23164819180965424, "learning_rate": 1.976284785979631e-05, "loss": 0.7465975284576416, "step": 4357 }, { "epoch": 0.3568876741495972, "grad_norm": 0.19242756068706512, "learning_rate": 1.976093991575961e-05, "loss": 0.7502991557121277, "step": 4358 }, { "epoch": 0.3569695666861161, "grad_norm": 0.18773558735847473, "learning_rate": 1.9759031634400498e-05, "loss": 0.9565351009368896, "step": 4359 }, { "epoch": 0.3570514592226351, "grad_norm": 0.1774405837059021, "learning_rate": 1.975712301580192e-05, "loss": 0.9624481797218323, "step": 4360 }, { "epoch": 0.35713335175915406, "grad_norm": 0.18466518819332123, "learning_rate": 1.9755214060046818e-05, "loss": 0.6589597463607788, "step": 4361 }, { "epoch": 0.35721524429567303, "grad_norm": 0.19594885408878326, "learning_rate": 1.9753304767218178e-05, "loss": 0.8354603052139282, "step": 4362 }, { "epoch": 0.35729713683219194, "grad_norm": 0.19095203280448914, "learning_rate": 1.9751395137398974e-05, "loss": 0.655829131603241, "step": 4363 }, { "epoch": 0.3573790293687109, "grad_norm": 0.21325017511844635, "learning_rate": 1.9749485170672216e-05, "loss": 1.09444260597229, "step": 4364 }, { "epoch": 0.3574609219052299, "grad_norm": 0.1545410007238388, "learning_rate": 1.9747574867120912e-05, "loss": 0.6071938872337341, "step": 4365 }, { "epoch": 0.3575428144417488, "grad_norm": 0.1673203855752945, "learning_rate": 1.9745664226828096e-05, "loss": 0.7164135575294495, "step": 4366 }, { "epoch": 0.35762470697826776, "grad_norm": 0.1788136512041092, "learning_rate": 1.9743753249876807e-05, "loss": 0.7308321595191956, "step": 4367 }, { "epoch": 0.3577065995147867, "grad_norm": 0.189166858792305, "learning_rate": 1.9741841936350112e-05, "loss": 0.6530867218971252, "step": 4368 }, { "epoch": 0.3577884920513057, "grad_norm": 0.17598296701908112, "learning_rate": 1.9739930286331078e-05, "loss": 0.595264196395874, "step": 4369 }, { "epoch": 0.3578703845878246, "grad_norm": 0.1730663776397705, "learning_rate": 1.97380182999028e-05, "loss": 0.8073391914367676, "step": 4370 }, { "epoch": 0.3579522771243436, "grad_norm": 0.1832951158285141, "learning_rate": 1.9736105977148368e-05, "loss": 0.8449031710624695, "step": 4371 }, { "epoch": 0.35803416966086254, "grad_norm": 0.17041772603988647, "learning_rate": 1.9734193318150914e-05, "loss": 1.0381929874420166, "step": 4372 }, { "epoch": 0.3581160621973815, "grad_norm": 0.17107076942920685, "learning_rate": 1.9732280322993557e-05, "loss": 0.5129362940788269, "step": 4373 }, { "epoch": 0.3581979547339004, "grad_norm": 0.18035803735256195, "learning_rate": 1.9730366991759457e-05, "loss": 0.98234623670578, "step": 4374 }, { "epoch": 0.3582798472704194, "grad_norm": 0.19209909439086914, "learning_rate": 1.9728453324531764e-05, "loss": 1.0065691471099854, "step": 4375 }, { "epoch": 0.35836173980693836, "grad_norm": 0.21134227514266968, "learning_rate": 1.972653932139366e-05, "loss": 0.5568132996559143, "step": 4376 }, { "epoch": 0.3584436323434573, "grad_norm": 0.18635819852352142, "learning_rate": 1.9724624982428335e-05, "loss": 0.6490844488143921, "step": 4377 }, { "epoch": 0.35852552487997624, "grad_norm": 0.22073891758918762, "learning_rate": 1.972271030771899e-05, "loss": 1.0237884521484375, "step": 4378 }, { "epoch": 0.3586074174164952, "grad_norm": 0.22047628462314606, "learning_rate": 1.972079529734885e-05, "loss": 1.0606614351272583, "step": 4379 }, { "epoch": 0.3586893099530142, "grad_norm": 0.17419691383838654, "learning_rate": 1.9718879951401146e-05, "loss": 0.6295831799507141, "step": 4380 }, { "epoch": 0.3587712024895331, "grad_norm": 0.1864946484565735, "learning_rate": 1.9716964269959127e-05, "loss": 0.6515485644340515, "step": 4381 }, { "epoch": 0.35885309502605206, "grad_norm": 0.1874263733625412, "learning_rate": 1.9715048253106057e-05, "loss": 0.8942470550537109, "step": 4382 }, { "epoch": 0.35893498756257103, "grad_norm": 0.2038254737854004, "learning_rate": 1.9713131900925216e-05, "loss": 0.9094241261482239, "step": 4383 }, { "epoch": 0.35901688009909, "grad_norm": 0.23463955521583557, "learning_rate": 1.9711215213499892e-05, "loss": 1.0054821968078613, "step": 4384 }, { "epoch": 0.3590987726356089, "grad_norm": 0.19530893862247467, "learning_rate": 1.9709298190913395e-05, "loss": 0.6390146017074585, "step": 4385 }, { "epoch": 0.3591806651721279, "grad_norm": 0.2438758760690689, "learning_rate": 1.970738083324905e-05, "loss": 0.7766982316970825, "step": 4386 }, { "epoch": 0.35926255770864685, "grad_norm": 0.1736193299293518, "learning_rate": 1.9705463140590185e-05, "loss": 0.47715631127357483, "step": 4387 }, { "epoch": 0.35934445024516576, "grad_norm": 0.17889083921909332, "learning_rate": 1.9703545113020157e-05, "loss": 0.9706364274024963, "step": 4388 }, { "epoch": 0.3594263427816847, "grad_norm": 0.22794081270694733, "learning_rate": 1.970162675062233e-05, "loss": 0.8281793594360352, "step": 4389 }, { "epoch": 0.3595082353182037, "grad_norm": 0.18033763766288757, "learning_rate": 1.9699708053480087e-05, "loss": 0.8748568296432495, "step": 4390 }, { "epoch": 0.35959012785472266, "grad_norm": 0.19694048166275024, "learning_rate": 1.9697789021676816e-05, "loss": 0.6674535870552063, "step": 4391 }, { "epoch": 0.3596720203912416, "grad_norm": 0.19479872286319733, "learning_rate": 1.9695869655295927e-05, "loss": 0.9064968228340149, "step": 4392 }, { "epoch": 0.35975391292776054, "grad_norm": 0.18926383554935455, "learning_rate": 1.9693949954420847e-05, "loss": 0.7663837671279907, "step": 4393 }, { "epoch": 0.3598358054642795, "grad_norm": 0.18565207719802856, "learning_rate": 1.9692029919135014e-05, "loss": 0.8070425391197205, "step": 4394 }, { "epoch": 0.3599176980007984, "grad_norm": 0.1964593529701233, "learning_rate": 1.969010954952188e-05, "loss": 0.9323968887329102, "step": 4395 }, { "epoch": 0.3599995905373174, "grad_norm": 0.15867365896701813, "learning_rate": 1.968818884566491e-05, "loss": 0.7625018358230591, "step": 4396 }, { "epoch": 0.36008148307383636, "grad_norm": 0.17026787996292114, "learning_rate": 1.9686267807647594e-05, "loss": 0.9231011271476746, "step": 4397 }, { "epoch": 0.36016337561035533, "grad_norm": 0.2280600517988205, "learning_rate": 1.9684346435553415e-05, "loss": 0.7552457451820374, "step": 4398 }, { "epoch": 0.36024526814687424, "grad_norm": 0.18990188837051392, "learning_rate": 1.968242472946589e-05, "loss": 0.7399788498878479, "step": 4399 }, { "epoch": 0.3603271606833932, "grad_norm": 0.18415255844593048, "learning_rate": 1.9680502689468547e-05, "loss": 0.714137077331543, "step": 4400 }, { "epoch": 0.3604090532199122, "grad_norm": 0.1729525476694107, "learning_rate": 1.967858031564492e-05, "loss": 0.8887204527854919, "step": 4401 }, { "epoch": 0.36049094575643115, "grad_norm": 0.17705397307872772, "learning_rate": 1.9676657608078573e-05, "loss": 0.6605455279350281, "step": 4402 }, { "epoch": 0.36057283829295006, "grad_norm": 0.1925312727689743, "learning_rate": 1.9674734566853066e-05, "loss": 0.8454083204269409, "step": 4403 }, { "epoch": 0.36065473082946903, "grad_norm": 0.18488526344299316, "learning_rate": 1.967281119205198e-05, "loss": 1.1678919792175293, "step": 4404 }, { "epoch": 0.360736623365988, "grad_norm": 0.15734003484249115, "learning_rate": 1.9670887483758927e-05, "loss": 0.8140684366226196, "step": 4405 }, { "epoch": 0.3608185159025069, "grad_norm": 0.19680911302566528, "learning_rate": 1.9668963442057503e-05, "loss": 0.984260082244873, "step": 4406 }, { "epoch": 0.3609004084390259, "grad_norm": 0.19573622941970825, "learning_rate": 1.966703906703134e-05, "loss": 0.8262365460395813, "step": 4407 }, { "epoch": 0.36098230097554485, "grad_norm": 0.1551995426416397, "learning_rate": 1.9665114358764085e-05, "loss": 0.9407004714012146, "step": 4408 }, { "epoch": 0.3610641935120638, "grad_norm": 0.1732412725687027, "learning_rate": 1.966318931733939e-05, "loss": 0.9971442222595215, "step": 4409 }, { "epoch": 0.3611460860485827, "grad_norm": 0.18320736289024353, "learning_rate": 1.966126394284092e-05, "loss": 1.1332502365112305, "step": 4410 }, { "epoch": 0.3612279785851017, "grad_norm": 0.20795078575611115, "learning_rate": 1.965933823535237e-05, "loss": 0.8641313314437866, "step": 4411 }, { "epoch": 0.36130987112162066, "grad_norm": 0.22419123351573944, "learning_rate": 1.9657412194957432e-05, "loss": 0.7736263871192932, "step": 4412 }, { "epoch": 0.36139176365813963, "grad_norm": 0.16440878808498383, "learning_rate": 1.9655485821739823e-05, "loss": 0.8979436159133911, "step": 4413 }, { "epoch": 0.36147365619465854, "grad_norm": 0.21328333020210266, "learning_rate": 1.9653559115783267e-05, "loss": 0.7405194044113159, "step": 4414 }, { "epoch": 0.3615555487311775, "grad_norm": 0.21749716997146606, "learning_rate": 1.965163207717151e-05, "loss": 0.9242398738861084, "step": 4415 }, { "epoch": 0.3616374412676965, "grad_norm": 0.16800417006015778, "learning_rate": 1.9649704705988308e-05, "loss": 0.7356133460998535, "step": 4416 }, { "epoch": 0.3617193338042154, "grad_norm": 0.22920911014080048, "learning_rate": 1.9647777002317434e-05, "loss": 0.6302648782730103, "step": 4417 }, { "epoch": 0.36180122634073436, "grad_norm": 0.21627835929393768, "learning_rate": 1.9645848966242672e-05, "loss": 0.6307827830314636, "step": 4418 }, { "epoch": 0.36188311887725333, "grad_norm": 0.1857755482196808, "learning_rate": 1.9643920597847823e-05, "loss": 1.0630323886871338, "step": 4419 }, { "epoch": 0.3619650114137723, "grad_norm": 0.16455215215682983, "learning_rate": 1.96419918972167e-05, "loss": 0.6739237308502197, "step": 4420 }, { "epoch": 0.3620469039502912, "grad_norm": 0.17445345222949982, "learning_rate": 1.9640062864433136e-05, "loss": 0.5842790007591248, "step": 4421 }, { "epoch": 0.3621287964868102, "grad_norm": 0.20279544591903687, "learning_rate": 1.9638133499580972e-05, "loss": 0.8445159792900085, "step": 4422 }, { "epoch": 0.36221068902332915, "grad_norm": 0.18346132338047028, "learning_rate": 1.963620380274407e-05, "loss": 0.8005721569061279, "step": 4423 }, { "epoch": 0.3622925815598481, "grad_norm": 0.2000623345375061, "learning_rate": 1.9634273774006295e-05, "loss": 0.7876731157302856, "step": 4424 }, { "epoch": 0.362374474096367, "grad_norm": 0.17045186460018158, "learning_rate": 1.963234341345154e-05, "loss": 0.8150262832641602, "step": 4425 }, { "epoch": 0.362456366632886, "grad_norm": 0.20091471076011658, "learning_rate": 1.9630412721163706e-05, "loss": 0.7972959280014038, "step": 4426 }, { "epoch": 0.36253825916940496, "grad_norm": 0.1542266309261322, "learning_rate": 1.9628481697226705e-05, "loss": 0.7244962453842163, "step": 4427 }, { "epoch": 0.3626201517059239, "grad_norm": 0.16996188461780548, "learning_rate": 1.9626550341724474e-05, "loss": 0.5139568448066711, "step": 4428 }, { "epoch": 0.36270204424244284, "grad_norm": 0.1878669410943985, "learning_rate": 1.9624618654740955e-05, "loss": 0.7973442673683167, "step": 4429 }, { "epoch": 0.3627839367789618, "grad_norm": 0.18063956499099731, "learning_rate": 1.9622686636360098e-05, "loss": 1.0832916498184204, "step": 4430 }, { "epoch": 0.3628658293154808, "grad_norm": 0.16756555438041687, "learning_rate": 1.962075428666589e-05, "loss": 0.7238767743110657, "step": 4431 }, { "epoch": 0.3629477218519997, "grad_norm": 0.23344026505947113, "learning_rate": 1.961882160574231e-05, "loss": 0.8512411713600159, "step": 4432 }, { "epoch": 0.36302961438851866, "grad_norm": 0.2140486091375351, "learning_rate": 1.9616888593673367e-05, "loss": 0.9644138813018799, "step": 4433 }, { "epoch": 0.36311150692503763, "grad_norm": 0.1673780083656311, "learning_rate": 1.9614955250543074e-05, "loss": 0.7698075771331787, "step": 4434 }, { "epoch": 0.3631933994615566, "grad_norm": 0.1794469952583313, "learning_rate": 1.961302157643546e-05, "loss": 0.6335943341255188, "step": 4435 }, { "epoch": 0.3632752919980755, "grad_norm": 0.17696282267570496, "learning_rate": 1.961108757143457e-05, "loss": 0.7825096845626831, "step": 4436 }, { "epoch": 0.3633571845345945, "grad_norm": 0.1811041384935379, "learning_rate": 1.960915323562447e-05, "loss": 0.7318376302719116, "step": 4437 }, { "epoch": 0.36343907707111345, "grad_norm": 0.19609248638153076, "learning_rate": 1.960721856908923e-05, "loss": 0.7728772163391113, "step": 4438 }, { "epoch": 0.36352096960763236, "grad_norm": 0.19586695730686188, "learning_rate": 1.960528357191294e-05, "loss": 0.8522976636886597, "step": 4439 }, { "epoch": 0.36360286214415133, "grad_norm": 0.1879831999540329, "learning_rate": 1.96033482441797e-05, "loss": 0.9742370843887329, "step": 4440 }, { "epoch": 0.3636847546806703, "grad_norm": 0.2076287567615509, "learning_rate": 1.9601412585973633e-05, "loss": 0.8953176736831665, "step": 4441 }, { "epoch": 0.36376664721718927, "grad_norm": 0.20425009727478027, "learning_rate": 1.959947659737886e-05, "loss": 0.8315043449401855, "step": 4442 }, { "epoch": 0.3638485397537082, "grad_norm": 0.2228768914937973, "learning_rate": 1.959754027847954e-05, "loss": 0.5408908724784851, "step": 4443 }, { "epoch": 0.36393043229022715, "grad_norm": 0.19096027314662933, "learning_rate": 1.959560362935983e-05, "loss": 1.079308271408081, "step": 4444 }, { "epoch": 0.3640123248267461, "grad_norm": 0.20256954431533813, "learning_rate": 1.9593666650103895e-05, "loss": 0.8582855463027954, "step": 4445 }, { "epoch": 0.3640942173632651, "grad_norm": 0.14436964690685272, "learning_rate": 1.9591729340795937e-05, "loss": 0.6439790725708008, "step": 4446 }, { "epoch": 0.364176109899784, "grad_norm": 0.15410207211971283, "learning_rate": 1.9589791701520153e-05, "loss": 0.7378430962562561, "step": 4447 }, { "epoch": 0.36425800243630296, "grad_norm": 0.21897171437740326, "learning_rate": 1.9587853732360757e-05, "loss": 0.7293628454208374, "step": 4448 }, { "epoch": 0.36433989497282193, "grad_norm": 0.1672581285238266, "learning_rate": 1.958591543340199e-05, "loss": 0.8193011283874512, "step": 4449 }, { "epoch": 0.36442178750934084, "grad_norm": 0.20077411830425262, "learning_rate": 1.9583976804728095e-05, "loss": 0.538223385810852, "step": 4450 }, { "epoch": 0.3645036800458598, "grad_norm": 0.1502661556005478, "learning_rate": 1.958203784642333e-05, "loss": 0.9661727547645569, "step": 4451 }, { "epoch": 0.3645855725823788, "grad_norm": 0.2117607742547989, "learning_rate": 1.9580098558571974e-05, "loss": 1.007814884185791, "step": 4452 }, { "epoch": 0.36466746511889775, "grad_norm": 0.19347292184829712, "learning_rate": 1.957815894125831e-05, "loss": 1.0287089347839355, "step": 4453 }, { "epoch": 0.36474935765541666, "grad_norm": 0.164510115981102, "learning_rate": 1.9576218994566652e-05, "loss": 0.8123359680175781, "step": 4454 }, { "epoch": 0.36483125019193563, "grad_norm": 0.1980883628129959, "learning_rate": 1.957427871858131e-05, "loss": 0.6736574769020081, "step": 4455 }, { "epoch": 0.3649131427284546, "grad_norm": 0.20440638065338135, "learning_rate": 1.9572338113386615e-05, "loss": 0.7504818439483643, "step": 4456 }, { "epoch": 0.3649950352649735, "grad_norm": 0.1670597940683365, "learning_rate": 1.9570397179066922e-05, "loss": 0.8283258080482483, "step": 4457 }, { "epoch": 0.3650769278014925, "grad_norm": 0.20536115765571594, "learning_rate": 1.9568455915706585e-05, "loss": 1.1591969728469849, "step": 4458 }, { "epoch": 0.36515882033801145, "grad_norm": 0.2784768342971802, "learning_rate": 1.956651432338998e-05, "loss": 0.766333281993866, "step": 4459 }, { "epoch": 0.3652407128745304, "grad_norm": 0.2383124679327011, "learning_rate": 1.9564572402201505e-05, "loss": 1.1083064079284668, "step": 4460 }, { "epoch": 0.36532260541104933, "grad_norm": 0.2069420963525772, "learning_rate": 1.9562630152225553e-05, "loss": 0.6386774182319641, "step": 4461 }, { "epoch": 0.3654044979475683, "grad_norm": 0.20315736532211304, "learning_rate": 1.9560687573546546e-05, "loss": 1.005161166191101, "step": 4462 }, { "epoch": 0.36548639048408726, "grad_norm": 0.17940525710582733, "learning_rate": 1.9558744666248916e-05, "loss": 0.66644686460495, "step": 4463 }, { "epoch": 0.36556828302060623, "grad_norm": 0.17964328825473785, "learning_rate": 1.9556801430417114e-05, "loss": 1.230204463005066, "step": 4464 }, { "epoch": 0.36565017555712515, "grad_norm": 0.18787643313407898, "learning_rate": 1.9554857866135597e-05, "loss": 0.8045151233673096, "step": 4465 }, { "epoch": 0.3657320680936441, "grad_norm": 0.1781775951385498, "learning_rate": 1.955291397348884e-05, "loss": 0.8430500030517578, "step": 4466 }, { "epoch": 0.3658139606301631, "grad_norm": 0.16637390851974487, "learning_rate": 1.9550969752561335e-05, "loss": 0.8271950483322144, "step": 4467 }, { "epoch": 0.365895853166682, "grad_norm": 0.15370331704616547, "learning_rate": 1.954902520343758e-05, "loss": 0.8958649635314941, "step": 4468 }, { "epoch": 0.36597774570320096, "grad_norm": 0.1868627369403839, "learning_rate": 1.9547080326202098e-05, "loss": 0.6421324014663696, "step": 4469 }, { "epoch": 0.36605963823971993, "grad_norm": 0.17652401328086853, "learning_rate": 1.9545135120939425e-05, "loss": 0.6036357283592224, "step": 4470 }, { "epoch": 0.3661415307762389, "grad_norm": 0.18761026859283447, "learning_rate": 1.9543189587734105e-05, "loss": 0.7823152542114258, "step": 4471 }, { "epoch": 0.3662234233127578, "grad_norm": 0.15301331877708435, "learning_rate": 1.954124372667069e-05, "loss": 0.7712863087654114, "step": 4472 }, { "epoch": 0.3663053158492768, "grad_norm": 0.17974521219730377, "learning_rate": 1.9539297537833768e-05, "loss": 0.7622796297073364, "step": 4473 }, { "epoch": 0.36638720838579575, "grad_norm": 0.18023647367954254, "learning_rate": 1.9537351021307923e-05, "loss": 0.8440065383911133, "step": 4474 }, { "epoch": 0.3664691009223147, "grad_norm": 0.19292372465133667, "learning_rate": 1.9535404177177755e-05, "loss": 1.007076621055603, "step": 4475 }, { "epoch": 0.36655099345883363, "grad_norm": 0.18161523342132568, "learning_rate": 1.9533457005527887e-05, "loss": 0.9718837738037109, "step": 4476 }, { "epoch": 0.3666328859953526, "grad_norm": 0.13928084075450897, "learning_rate": 1.9531509506442953e-05, "loss": 0.7639694213867188, "step": 4477 }, { "epoch": 0.36671477853187157, "grad_norm": 0.2091611623764038, "learning_rate": 1.9529561680007588e-05, "loss": 1.0506787300109863, "step": 4478 }, { "epoch": 0.3667966710683905, "grad_norm": 0.2031441479921341, "learning_rate": 1.9527613526306468e-05, "loss": 0.8657310605049133, "step": 4479 }, { "epoch": 0.36687856360490945, "grad_norm": 0.1838437020778656, "learning_rate": 1.952566504542426e-05, "loss": 0.7657269835472107, "step": 4480 }, { "epoch": 0.3669604561414284, "grad_norm": 0.16421669721603394, "learning_rate": 1.9523716237445647e-05, "loss": 0.894124448299408, "step": 4481 }, { "epoch": 0.3670423486779474, "grad_norm": 0.1986609250307083, "learning_rate": 1.9521767102455343e-05, "loss": 0.8338479399681091, "step": 4482 }, { "epoch": 0.3671242412144663, "grad_norm": 0.1784464567899704, "learning_rate": 1.951981764053806e-05, "loss": 0.9913228154182434, "step": 4483 }, { "epoch": 0.36720613375098526, "grad_norm": 0.21472986042499542, "learning_rate": 1.951786785177853e-05, "loss": 0.8309758901596069, "step": 4484 }, { "epoch": 0.36728802628750423, "grad_norm": 0.15903311967849731, "learning_rate": 1.95159177362615e-05, "loss": 0.7856239676475525, "step": 4485 }, { "epoch": 0.3673699188240232, "grad_norm": 0.18653059005737305, "learning_rate": 1.9513967294071726e-05, "loss": 0.8931725025177002, "step": 4486 }, { "epoch": 0.3674518113605421, "grad_norm": 0.18549638986587524, "learning_rate": 1.951201652529399e-05, "loss": 0.727169394493103, "step": 4487 }, { "epoch": 0.3675337038970611, "grad_norm": 0.20123016834259033, "learning_rate": 1.9510065430013073e-05, "loss": 1.066023349761963, "step": 4488 }, { "epoch": 0.36761559643358005, "grad_norm": 0.22585538029670715, "learning_rate": 1.9508114008313783e-05, "loss": 0.7750298976898193, "step": 4489 }, { "epoch": 0.36769748897009896, "grad_norm": 0.16940414905548096, "learning_rate": 1.9506162260280935e-05, "loss": 0.4586896002292633, "step": 4490 }, { "epoch": 0.36777938150661793, "grad_norm": 0.16240467131137848, "learning_rate": 1.950421018599936e-05, "loss": 0.6740632057189941, "step": 4491 }, { "epoch": 0.3678612740431369, "grad_norm": 0.18858158588409424, "learning_rate": 1.95022577855539e-05, "loss": 0.8084611892700195, "step": 4492 }, { "epoch": 0.36794316657965587, "grad_norm": 0.18464578688144684, "learning_rate": 1.950030505902942e-05, "loss": 0.9965607523918152, "step": 4493 }, { "epoch": 0.3680250591161748, "grad_norm": 0.21004711091518402, "learning_rate": 1.9498352006510788e-05, "loss": 0.9133917093276978, "step": 4494 }, { "epoch": 0.36810695165269375, "grad_norm": 0.17623217403888702, "learning_rate": 1.9496398628082896e-05, "loss": 0.9391257762908936, "step": 4495 }, { "epoch": 0.3681888441892127, "grad_norm": 0.17982333898544312, "learning_rate": 1.9494444923830644e-05, "loss": 0.7609497308731079, "step": 4496 }, { "epoch": 0.3682707367257317, "grad_norm": 0.19167070090770721, "learning_rate": 1.9492490893838947e-05, "loss": 0.6060552597045898, "step": 4497 }, { "epoch": 0.3683526292622506, "grad_norm": 0.20592227578163147, "learning_rate": 1.9490536538192738e-05, "loss": 0.851462185382843, "step": 4498 }, { "epoch": 0.36843452179876957, "grad_norm": 0.17703038454055786, "learning_rate": 1.9488581856976958e-05, "loss": 0.7449119687080383, "step": 4499 }, { "epoch": 0.36851641433528853, "grad_norm": 0.18883945047855377, "learning_rate": 1.948662685027657e-05, "loss": 0.8299137353897095, "step": 4500 }, { "epoch": 0.36859830687180745, "grad_norm": 0.19982299208641052, "learning_rate": 1.948467151817654e-05, "loss": 0.7308027744293213, "step": 4501 }, { "epoch": 0.3686801994083264, "grad_norm": 0.19350045919418335, "learning_rate": 1.9482715860761864e-05, "loss": 0.8454387784004211, "step": 4502 }, { "epoch": 0.3687620919448454, "grad_norm": 0.1684250682592392, "learning_rate": 1.9480759878117535e-05, "loss": 0.6734793186187744, "step": 4503 }, { "epoch": 0.36884398448136435, "grad_norm": 0.18470391631126404, "learning_rate": 1.9478803570328566e-05, "loss": 0.9357970356941223, "step": 4504 }, { "epoch": 0.36892587701788326, "grad_norm": 0.21319517493247986, "learning_rate": 1.9476846937479995e-05, "loss": 1.3622959852218628, "step": 4505 }, { "epoch": 0.36900776955440223, "grad_norm": 0.15470333397388458, "learning_rate": 1.9474889979656862e-05, "loss": 0.7581462860107422, "step": 4506 }, { "epoch": 0.3690896620909212, "grad_norm": 0.18320509791374207, "learning_rate": 1.9472932696944227e-05, "loss": 0.8273270130157471, "step": 4507 }, { "epoch": 0.36917155462744017, "grad_norm": 0.22648395597934723, "learning_rate": 1.9470975089427155e-05, "loss": 0.8774992227554321, "step": 4508 }, { "epoch": 0.3692534471639591, "grad_norm": 0.22458767890930176, "learning_rate": 1.946901715719073e-05, "loss": 0.6364206671714783, "step": 4509 }, { "epoch": 0.36933533970047805, "grad_norm": 0.2132290154695511, "learning_rate": 1.9467058900320065e-05, "loss": 0.8736752271652222, "step": 4510 }, { "epoch": 0.369417232236997, "grad_norm": 0.17828500270843506, "learning_rate": 1.946510031890026e-05, "loss": 0.9607784748077393, "step": 4511 }, { "epoch": 0.36949912477351593, "grad_norm": 0.18401983380317688, "learning_rate": 1.946314141301645e-05, "loss": 1.117727518081665, "step": 4512 }, { "epoch": 0.3695810173100349, "grad_norm": 0.18003787100315094, "learning_rate": 1.9461182182753776e-05, "loss": 0.9633479118347168, "step": 4513 }, { "epoch": 0.36966290984655387, "grad_norm": 0.17685002088546753, "learning_rate": 1.9459222628197396e-05, "loss": 0.7344169020652771, "step": 4514 }, { "epoch": 0.36974480238307283, "grad_norm": 0.1823803335428238, "learning_rate": 1.9457262749432475e-05, "loss": 0.9310920238494873, "step": 4515 }, { "epoch": 0.36982669491959175, "grad_norm": 0.1701962649822235, "learning_rate": 1.9455302546544203e-05, "loss": 0.6976556777954102, "step": 4516 }, { "epoch": 0.3699085874561107, "grad_norm": 0.19401836395263672, "learning_rate": 1.9453342019617775e-05, "loss": 0.8871124982833862, "step": 4517 }, { "epoch": 0.3699904799926297, "grad_norm": 0.1735466569662094, "learning_rate": 1.9451381168738405e-05, "loss": 0.6942733526229858, "step": 4518 }, { "epoch": 0.37007237252914865, "grad_norm": 0.18954968452453613, "learning_rate": 1.944941999399132e-05, "loss": 0.8441452383995056, "step": 4519 }, { "epoch": 0.37015426506566756, "grad_norm": 0.1651827096939087, "learning_rate": 1.9447458495461763e-05, "loss": 0.7416768670082092, "step": 4520 }, { "epoch": 0.37023615760218653, "grad_norm": 0.19355501234531403, "learning_rate": 1.944549667323498e-05, "loss": 1.031585931777954, "step": 4521 }, { "epoch": 0.3703180501387055, "grad_norm": 0.19508761167526245, "learning_rate": 1.944353452739625e-05, "loss": 1.1424672603607178, "step": 4522 }, { "epoch": 0.3703999426752244, "grad_norm": 0.21336470544338226, "learning_rate": 1.9441572058030848e-05, "loss": 0.683007001876831, "step": 4523 }, { "epoch": 0.3704818352117434, "grad_norm": 0.175675168633461, "learning_rate": 1.9439609265224075e-05, "loss": 0.5875193476676941, "step": 4524 }, { "epoch": 0.37056372774826235, "grad_norm": 0.21545380353927612, "learning_rate": 1.9437646149061245e-05, "loss": 0.8703210949897766, "step": 4525 }, { "epoch": 0.3706456202847813, "grad_norm": 0.19088014960289001, "learning_rate": 1.943568270962768e-05, "loss": 0.7797892689704895, "step": 4526 }, { "epoch": 0.37072751282130023, "grad_norm": 0.14856427907943726, "learning_rate": 1.9433718947008716e-05, "loss": 0.9655696749687195, "step": 4527 }, { "epoch": 0.3708094053578192, "grad_norm": 0.1834736168384552, "learning_rate": 1.943175486128971e-05, "loss": 0.6576709747314453, "step": 4528 }, { "epoch": 0.37089129789433817, "grad_norm": 0.18833300471305847, "learning_rate": 1.9429790452556033e-05, "loss": 0.9876053333282471, "step": 4529 }, { "epoch": 0.3709731904308571, "grad_norm": 0.1802380532026291, "learning_rate": 1.942782572089306e-05, "loss": 0.8554468750953674, "step": 4530 }, { "epoch": 0.37105508296737605, "grad_norm": 0.2061423510313034, "learning_rate": 1.9425860666386187e-05, "loss": 0.9618027210235596, "step": 4531 }, { "epoch": 0.371136975503895, "grad_norm": 0.19142718613147736, "learning_rate": 1.9423895289120824e-05, "loss": 0.7149055600166321, "step": 4532 }, { "epoch": 0.371218868040414, "grad_norm": 0.18927709758281708, "learning_rate": 1.9421929589182393e-05, "loss": 0.773067831993103, "step": 4533 }, { "epoch": 0.3713007605769329, "grad_norm": 0.1648421436548233, "learning_rate": 1.941996356665634e-05, "loss": 0.8457905650138855, "step": 4534 }, { "epoch": 0.37138265311345187, "grad_norm": 0.18654143810272217, "learning_rate": 1.94179972216281e-05, "loss": 0.900816798210144, "step": 4535 }, { "epoch": 0.37146454564997083, "grad_norm": 0.20962196588516235, "learning_rate": 1.9416030554183155e-05, "loss": 0.6707956790924072, "step": 4536 }, { "epoch": 0.3715464381864898, "grad_norm": 0.1802428811788559, "learning_rate": 1.9414063564406977e-05, "loss": 0.8269150257110596, "step": 4537 }, { "epoch": 0.3716283307230087, "grad_norm": 0.19045491516590118, "learning_rate": 1.941209625238506e-05, "loss": 1.281597375869751, "step": 4538 }, { "epoch": 0.3717102232595277, "grad_norm": 0.21050530672073364, "learning_rate": 1.9410128618202913e-05, "loss": 0.8412893414497375, "step": 4539 }, { "epoch": 0.37179211579604665, "grad_norm": 0.18824219703674316, "learning_rate": 1.9408160661946052e-05, "loss": 1.096423864364624, "step": 4540 }, { "epoch": 0.37187400833256556, "grad_norm": 0.1591040939092636, "learning_rate": 1.940619238370002e-05, "loss": 0.9486916065216064, "step": 4541 }, { "epoch": 0.37195590086908453, "grad_norm": 0.1613236665725708, "learning_rate": 1.940422378355036e-05, "loss": 0.6722403764724731, "step": 4542 }, { "epoch": 0.3720377934056035, "grad_norm": 0.19576163589954376, "learning_rate": 1.9402254861582645e-05, "loss": 1.0907971858978271, "step": 4543 }, { "epoch": 0.37211968594212247, "grad_norm": 0.2036546766757965, "learning_rate": 1.940028561788244e-05, "loss": 1.0277293920516968, "step": 4544 }, { "epoch": 0.3722015784786414, "grad_norm": 0.20037733018398285, "learning_rate": 1.9398316052535346e-05, "loss": 0.9633432626724243, "step": 4545 }, { "epoch": 0.37228347101516035, "grad_norm": 0.19084274768829346, "learning_rate": 1.9396346165626964e-05, "loss": 0.6675033569335938, "step": 4546 }, { "epoch": 0.3723653635516793, "grad_norm": 0.16899316012859344, "learning_rate": 1.9394375957242915e-05, "loss": 0.7685231566429138, "step": 4547 }, { "epoch": 0.3724472560881983, "grad_norm": 0.1697678416967392, "learning_rate": 1.9392405427468832e-05, "loss": 0.4490240514278412, "step": 4548 }, { "epoch": 0.3725291486247172, "grad_norm": 0.2173643261194229, "learning_rate": 1.9390434576390355e-05, "loss": 1.0609660148620605, "step": 4549 }, { "epoch": 0.37261104116123617, "grad_norm": 0.2078549563884735, "learning_rate": 1.938846340409316e-05, "loss": 0.571779727935791, "step": 4550 }, { "epoch": 0.37269293369775514, "grad_norm": 0.16200584173202515, "learning_rate": 1.9386491910662916e-05, "loss": 0.8075960874557495, "step": 4551 }, { "epoch": 0.37277482623427405, "grad_norm": 0.16025245189666748, "learning_rate": 1.9384520096185306e-05, "loss": 0.9250539541244507, "step": 4552 }, { "epoch": 0.372856718770793, "grad_norm": 0.1940290778875351, "learning_rate": 1.9382547960746038e-05, "loss": 1.031877040863037, "step": 4553 }, { "epoch": 0.372938611307312, "grad_norm": 0.18438591063022614, "learning_rate": 1.938057550443083e-05, "loss": 0.6505870223045349, "step": 4554 }, { "epoch": 0.37302050384383095, "grad_norm": 0.14912840723991394, "learning_rate": 1.9378602727325416e-05, "loss": 0.5663623213768005, "step": 4555 }, { "epoch": 0.37310239638034987, "grad_norm": 0.19380109012126923, "learning_rate": 1.937662962951553e-05, "loss": 0.8810120224952698, "step": 4556 }, { "epoch": 0.37318428891686883, "grad_norm": 0.18831732869148254, "learning_rate": 1.9374656211086945e-05, "loss": 0.9577232599258423, "step": 4557 }, { "epoch": 0.3732661814533878, "grad_norm": 0.2217322140932083, "learning_rate": 1.9372682472125424e-05, "loss": 0.893231987953186, "step": 4558 }, { "epoch": 0.37334807398990677, "grad_norm": 0.203490749001503, "learning_rate": 1.9370708412716755e-05, "loss": 0.8800333142280579, "step": 4559 }, { "epoch": 0.3734299665264257, "grad_norm": 0.1817035973072052, "learning_rate": 1.9368734032946745e-05, "loss": 0.9259476661682129, "step": 4560 }, { "epoch": 0.37351185906294465, "grad_norm": 0.20660722255706787, "learning_rate": 1.93667593329012e-05, "loss": 0.9931461215019226, "step": 4561 }, { "epoch": 0.3735937515994636, "grad_norm": 0.17772835493087769, "learning_rate": 1.9364784312665957e-05, "loss": 0.7022066712379456, "step": 4562 }, { "epoch": 0.37367564413598253, "grad_norm": 0.1951800286769867, "learning_rate": 1.9362808972326853e-05, "loss": 0.8044015169143677, "step": 4563 }, { "epoch": 0.3737575366725015, "grad_norm": 0.18887825310230255, "learning_rate": 1.9360833311969742e-05, "loss": 0.7709892988204956, "step": 4564 }, { "epoch": 0.37383942920902047, "grad_norm": 0.20662137866020203, "learning_rate": 1.9358857331680505e-05, "loss": 0.8899393677711487, "step": 4565 }, { "epoch": 0.37392132174553944, "grad_norm": 0.1804330050945282, "learning_rate": 1.9356881031545017e-05, "loss": 0.6485239863395691, "step": 4566 }, { "epoch": 0.37400321428205835, "grad_norm": 0.23091301321983337, "learning_rate": 1.9354904411649174e-05, "loss": 0.7925410866737366, "step": 4567 }, { "epoch": 0.3740851068185773, "grad_norm": 0.19559821486473083, "learning_rate": 1.9352927472078898e-05, "loss": 0.8500974774360657, "step": 4568 }, { "epoch": 0.3741669993550963, "grad_norm": 0.15601348876953125, "learning_rate": 1.9350950212920107e-05, "loss": 0.9639931321144104, "step": 4569 }, { "epoch": 0.37424889189161525, "grad_norm": 0.2301827073097229, "learning_rate": 1.9348972634258746e-05, "loss": 0.8901113867759705, "step": 4570 }, { "epoch": 0.37433078442813417, "grad_norm": 0.20078466832637787, "learning_rate": 1.9346994736180763e-05, "loss": 0.9731923937797546, "step": 4571 }, { "epoch": 0.37441267696465313, "grad_norm": 0.20019513368606567, "learning_rate": 1.934501651877213e-05, "loss": 0.5382856726646423, "step": 4572 }, { "epoch": 0.3744945695011721, "grad_norm": 0.16807831823825836, "learning_rate": 1.9343037982118828e-05, "loss": 0.8273330926895142, "step": 4573 }, { "epoch": 0.374576462037691, "grad_norm": 0.19539517164230347, "learning_rate": 1.934105912630685e-05, "loss": 1.2040979862213135, "step": 4574 }, { "epoch": 0.37465835457421, "grad_norm": 0.17991510033607483, "learning_rate": 1.9339079951422208e-05, "loss": 0.7164191007614136, "step": 4575 }, { "epoch": 0.37474024711072895, "grad_norm": 0.21070437133312225, "learning_rate": 1.9337100457550927e-05, "loss": 0.8691115379333496, "step": 4576 }, { "epoch": 0.3748221396472479, "grad_norm": 0.17130343616008759, "learning_rate": 1.933512064477904e-05, "loss": 0.6696967482566833, "step": 4577 }, { "epoch": 0.37490403218376683, "grad_norm": 0.21378833055496216, "learning_rate": 1.933314051319259e-05, "loss": 0.6832566857337952, "step": 4578 }, { "epoch": 0.3749859247202858, "grad_norm": 0.15101757645606995, "learning_rate": 1.9331160062877657e-05, "loss": 0.6452093720436096, "step": 4579 }, { "epoch": 0.37506781725680477, "grad_norm": 0.22484837472438812, "learning_rate": 1.932917929392031e-05, "loss": 1.0838408470153809, "step": 4580 }, { "epoch": 0.37514970979332374, "grad_norm": 0.20229625701904297, "learning_rate": 1.932719820640665e-05, "loss": 1.0439257621765137, "step": 4581 }, { "epoch": 0.37523160232984265, "grad_norm": 0.18673290312290192, "learning_rate": 1.9325216800422774e-05, "loss": 0.7688818573951721, "step": 4582 }, { "epoch": 0.3753134948663616, "grad_norm": 0.1581125259399414, "learning_rate": 1.9323235076054804e-05, "loss": 0.7316241264343262, "step": 4583 }, { "epoch": 0.3753953874028806, "grad_norm": 0.2093316614627838, "learning_rate": 1.9321253033388876e-05, "loss": 0.7076972723007202, "step": 4584 }, { "epoch": 0.3754772799393995, "grad_norm": 0.18392010033130646, "learning_rate": 1.9319270672511137e-05, "loss": 0.8832443952560425, "step": 4585 }, { "epoch": 0.37555917247591847, "grad_norm": 0.16416677832603455, "learning_rate": 1.9317287993507753e-05, "loss": 0.6995161771774292, "step": 4586 }, { "epoch": 0.37564106501243744, "grad_norm": 0.17237384617328644, "learning_rate": 1.9315304996464893e-05, "loss": 0.7553730607032776, "step": 4587 }, { "epoch": 0.3757229575489564, "grad_norm": 0.18508848547935486, "learning_rate": 1.9313321681468748e-05, "loss": 0.8050664067268372, "step": 4588 }, { "epoch": 0.3758048500854753, "grad_norm": 0.21674981713294983, "learning_rate": 1.931133804860552e-05, "loss": 0.8104474544525146, "step": 4589 }, { "epoch": 0.3758867426219943, "grad_norm": 0.20441006124019623, "learning_rate": 1.9309354097961423e-05, "loss": 0.9267318248748779, "step": 4590 }, { "epoch": 0.37596863515851325, "grad_norm": 0.17830127477645874, "learning_rate": 1.93073698296227e-05, "loss": 0.8412303924560547, "step": 4591 }, { "epoch": 0.37605052769503217, "grad_norm": 0.19757241010665894, "learning_rate": 1.9305385243675582e-05, "loss": 0.6384515166282654, "step": 4592 }, { "epoch": 0.37613242023155113, "grad_norm": 0.21371901035308838, "learning_rate": 1.9303400340206334e-05, "loss": 0.6727374792098999, "step": 4593 }, { "epoch": 0.3762143127680701, "grad_norm": 0.1869383156299591, "learning_rate": 1.930141511930123e-05, "loss": 1.1401580572128296, "step": 4594 }, { "epoch": 0.37629620530458907, "grad_norm": 0.2347451001405716, "learning_rate": 1.929942958104655e-05, "loss": 0.909294068813324, "step": 4595 }, { "epoch": 0.376378097841108, "grad_norm": 0.15483404695987701, "learning_rate": 1.9297443725528595e-05, "loss": 0.9040797352790833, "step": 4596 }, { "epoch": 0.37645999037762695, "grad_norm": 0.21393480896949768, "learning_rate": 1.929545755283368e-05, "loss": 0.669298529624939, "step": 4597 }, { "epoch": 0.3765418829141459, "grad_norm": 0.16979260742664337, "learning_rate": 1.9293471063048133e-05, "loss": 0.9506065845489502, "step": 4598 }, { "epoch": 0.3766237754506649, "grad_norm": 0.21195824444293976, "learning_rate": 1.9291484256258295e-05, "loss": 0.9584797620773315, "step": 4599 }, { "epoch": 0.3767056679871838, "grad_norm": 0.1855274736881256, "learning_rate": 1.9289497132550516e-05, "loss": 0.8431603908538818, "step": 4600 }, { "epoch": 0.37678756052370277, "grad_norm": 0.1596568524837494, "learning_rate": 1.928750969201117e-05, "loss": 0.7100690603256226, "step": 4601 }, { "epoch": 0.37686945306022174, "grad_norm": 0.15324713289737701, "learning_rate": 1.928552193472664e-05, "loss": 0.7743615508079529, "step": 4602 }, { "epoch": 0.37695134559674065, "grad_norm": 0.16985802352428436, "learning_rate": 1.9283533860783317e-05, "loss": 0.7185080051422119, "step": 4603 }, { "epoch": 0.3770332381332596, "grad_norm": 0.2017555981874466, "learning_rate": 1.9281545470267615e-05, "loss": 0.7175133228302002, "step": 4604 }, { "epoch": 0.3771151306697786, "grad_norm": 0.13697998225688934, "learning_rate": 1.9279556763265956e-05, "loss": 0.5350152254104614, "step": 4605 }, { "epoch": 0.37719702320629755, "grad_norm": 0.20710907876491547, "learning_rate": 1.927756773986478e-05, "loss": 1.0026438236236572, "step": 4606 }, { "epoch": 0.37727891574281647, "grad_norm": 0.16106805205345154, "learning_rate": 1.9275578400150537e-05, "loss": 0.9975225329399109, "step": 4607 }, { "epoch": 0.37736080827933544, "grad_norm": 0.16031181812286377, "learning_rate": 1.927358874420969e-05, "loss": 0.6076956391334534, "step": 4608 }, { "epoch": 0.3774427008158544, "grad_norm": 0.18430650234222412, "learning_rate": 1.927159877212871e-05, "loss": 0.7946627140045166, "step": 4609 }, { "epoch": 0.37752459335237337, "grad_norm": 0.17545609176158905, "learning_rate": 1.926960848399411e-05, "loss": 0.6843754649162292, "step": 4610 }, { "epoch": 0.3776064858888923, "grad_norm": 0.19993361830711365, "learning_rate": 1.9267617879892376e-05, "loss": 0.6761384010314941, "step": 4611 }, { "epoch": 0.37768837842541125, "grad_norm": 0.18578436970710754, "learning_rate": 1.9265626959910037e-05, "loss": 0.8283423185348511, "step": 4612 }, { "epoch": 0.3777702709619302, "grad_norm": 0.19191323220729828, "learning_rate": 1.926363572413363e-05, "loss": 0.828531801700592, "step": 4613 }, { "epoch": 0.37785216349844913, "grad_norm": 0.19362658262252808, "learning_rate": 1.926164417264969e-05, "loss": 0.5216598510742188, "step": 4614 }, { "epoch": 0.3779340560349681, "grad_norm": 0.18186156451702118, "learning_rate": 1.925965230554479e-05, "loss": 0.49163955450057983, "step": 4615 }, { "epoch": 0.37801594857148707, "grad_norm": 0.1928747296333313, "learning_rate": 1.92576601229055e-05, "loss": 1.0166362524032593, "step": 4616 }, { "epoch": 0.37809784110800604, "grad_norm": 0.17852579057216644, "learning_rate": 1.925566762481841e-05, "loss": 0.7471522688865662, "step": 4617 }, { "epoch": 0.37817973364452495, "grad_norm": 0.21047402918338776, "learning_rate": 1.9253674811370123e-05, "loss": 0.6857478022575378, "step": 4618 }, { "epoch": 0.3782616261810439, "grad_norm": 0.17051324248313904, "learning_rate": 1.925168168264725e-05, "loss": 0.6565566062927246, "step": 4619 }, { "epoch": 0.3783435187175629, "grad_norm": 0.17616161704063416, "learning_rate": 1.9249688238736423e-05, "loss": 0.7236255407333374, "step": 4620 }, { "epoch": 0.37842541125408186, "grad_norm": 0.178658127784729, "learning_rate": 1.9247694479724288e-05, "loss": 0.8734599351882935, "step": 4621 }, { "epoch": 0.37850730379060077, "grad_norm": 0.17532174289226532, "learning_rate": 1.92457004056975e-05, "loss": 0.665255069732666, "step": 4622 }, { "epoch": 0.37858919632711974, "grad_norm": 0.20607908070087433, "learning_rate": 1.9243706016742728e-05, "loss": 0.9591212272644043, "step": 4623 }, { "epoch": 0.3786710888636387, "grad_norm": 0.2087327390909195, "learning_rate": 1.924171131294666e-05, "loss": 0.8994796276092529, "step": 4624 }, { "epoch": 0.3787529814001576, "grad_norm": 0.17072175443172455, "learning_rate": 1.923971629439599e-05, "loss": 0.6543944478034973, "step": 4625 }, { "epoch": 0.3788348739366766, "grad_norm": 0.22963057458400726, "learning_rate": 1.923772096117743e-05, "loss": 0.9494592547416687, "step": 4626 }, { "epoch": 0.37891676647319555, "grad_norm": 0.179121732711792, "learning_rate": 1.923572531337771e-05, "loss": 1.0978577136993408, "step": 4627 }, { "epoch": 0.3789986590097145, "grad_norm": 0.2187269777059555, "learning_rate": 1.9233729351083564e-05, "loss": 1.0316312313079834, "step": 4628 }, { "epoch": 0.37908055154623344, "grad_norm": 0.22152844071388245, "learning_rate": 1.923173307438175e-05, "loss": 0.7307385206222534, "step": 4629 }, { "epoch": 0.3791624440827524, "grad_norm": 0.16824860870838165, "learning_rate": 1.9229736483359028e-05, "loss": 0.7215285301208496, "step": 4630 }, { "epoch": 0.37924433661927137, "grad_norm": 0.19458675384521484, "learning_rate": 1.922773957810218e-05, "loss": 0.9631766676902771, "step": 4631 }, { "epoch": 0.37932622915579034, "grad_norm": 0.18557296693325043, "learning_rate": 1.922574235869801e-05, "loss": 0.732447624206543, "step": 4632 }, { "epoch": 0.37940812169230925, "grad_norm": 0.18249297142028809, "learning_rate": 1.9223744825233305e-05, "loss": 1.072929859161377, "step": 4633 }, { "epoch": 0.3794900142288282, "grad_norm": 0.23205098509788513, "learning_rate": 1.9221746977794904e-05, "loss": 0.9405452013015747, "step": 4634 }, { "epoch": 0.3795719067653472, "grad_norm": 0.1696392148733139, "learning_rate": 1.9219748816469633e-05, "loss": 0.612343430519104, "step": 4635 }, { "epoch": 0.3796537993018661, "grad_norm": 0.15711471438407898, "learning_rate": 1.9217750341344345e-05, "loss": 0.7110911011695862, "step": 4636 }, { "epoch": 0.37973569183838507, "grad_norm": 0.1825173795223236, "learning_rate": 1.9215751552505896e-05, "loss": 0.9842389225959778, "step": 4637 }, { "epoch": 0.37981758437490404, "grad_norm": 0.19044475257396698, "learning_rate": 1.9213752450041166e-05, "loss": 0.8584103584289551, "step": 4638 }, { "epoch": 0.379899476911423, "grad_norm": 0.15955668687820435, "learning_rate": 1.9211753034037046e-05, "loss": 0.6860961318016052, "step": 4639 }, { "epoch": 0.3799813694479419, "grad_norm": 0.1818350851535797, "learning_rate": 1.9209753304580432e-05, "loss": 0.6002848148345947, "step": 4640 }, { "epoch": 0.3800632619844609, "grad_norm": 0.23158234357833862, "learning_rate": 1.9207753261758247e-05, "loss": 1.1487622261047363, "step": 4641 }, { "epoch": 0.38014515452097986, "grad_norm": 0.18547779321670532, "learning_rate": 1.920575290565742e-05, "loss": 0.837938129901886, "step": 4642 }, { "epoch": 0.3802270470574988, "grad_norm": 0.20087409019470215, "learning_rate": 1.920375223636489e-05, "loss": 0.8864272236824036, "step": 4643 }, { "epoch": 0.38030893959401774, "grad_norm": 0.19217292964458466, "learning_rate": 1.920175125396762e-05, "loss": 0.9743700623512268, "step": 4644 }, { "epoch": 0.3803908321305367, "grad_norm": 0.17482365667819977, "learning_rate": 1.9199749958552576e-05, "loss": 0.7312459945678711, "step": 4645 }, { "epoch": 0.3804727246670557, "grad_norm": 0.19659122824668884, "learning_rate": 1.919774835020675e-05, "loss": 0.7501091957092285, "step": 4646 }, { "epoch": 0.3805546172035746, "grad_norm": 0.2303171157836914, "learning_rate": 1.9195746429017132e-05, "loss": 1.1049586534500122, "step": 4647 }, { "epoch": 0.38063650974009355, "grad_norm": 0.17912191152572632, "learning_rate": 1.919374419507074e-05, "loss": 0.6308512687683105, "step": 4648 }, { "epoch": 0.3807184022766125, "grad_norm": 0.1980789303779602, "learning_rate": 1.919174164845459e-05, "loss": 0.9254670143127441, "step": 4649 }, { "epoch": 0.3808002948131315, "grad_norm": 0.18950803577899933, "learning_rate": 1.9189738789255733e-05, "loss": 0.9841037392616272, "step": 4650 }, { "epoch": 0.3808821873496504, "grad_norm": 0.1781456172466278, "learning_rate": 1.9187735617561217e-05, "loss": 0.8448410034179688, "step": 4651 }, { "epoch": 0.38096407988616937, "grad_norm": 0.17821036279201508, "learning_rate": 1.9185732133458104e-05, "loss": 0.9637575149536133, "step": 4652 }, { "epoch": 0.38104597242268834, "grad_norm": 0.14005641639232635, "learning_rate": 1.9183728337033475e-05, "loss": 0.9429913759231567, "step": 4653 }, { "epoch": 0.3811278649592073, "grad_norm": 0.2177179902791977, "learning_rate": 1.9181724228374427e-05, "loss": 0.7686384916305542, "step": 4654 }, { "epoch": 0.3812097574957262, "grad_norm": 0.20252060890197754, "learning_rate": 1.9179719807568063e-05, "loss": 1.2005079984664917, "step": 4655 }, { "epoch": 0.3812916500322452, "grad_norm": 0.18166102468967438, "learning_rate": 1.9177715074701508e-05, "loss": 0.7227151989936829, "step": 4656 }, { "epoch": 0.38137354256876416, "grad_norm": 0.16207797825336456, "learning_rate": 1.917571002986189e-05, "loss": 1.0884654521942139, "step": 4657 }, { "epoch": 0.38145543510528307, "grad_norm": 0.15721853077411652, "learning_rate": 1.917370467313636e-05, "loss": 0.9087743759155273, "step": 4658 }, { "epoch": 0.38153732764180204, "grad_norm": 0.16655312478542328, "learning_rate": 1.9171699004612083e-05, "loss": 0.7234389781951904, "step": 4659 }, { "epoch": 0.381619220178321, "grad_norm": 0.19969238340854645, "learning_rate": 1.9169693024376224e-05, "loss": 0.7969765663146973, "step": 4660 }, { "epoch": 0.38170111271484, "grad_norm": 0.15778997540473938, "learning_rate": 1.9167686732515978e-05, "loss": 0.846939742565155, "step": 4661 }, { "epoch": 0.3817830052513589, "grad_norm": 0.1712370365858078, "learning_rate": 1.9165680129118545e-05, "loss": 0.7386664748191833, "step": 4662 }, { "epoch": 0.38186489778787785, "grad_norm": 0.17935208976268768, "learning_rate": 1.9163673214271143e-05, "loss": 0.8220565319061279, "step": 4663 }, { "epoch": 0.3819467903243968, "grad_norm": 0.22253113985061646, "learning_rate": 1.9161665988060996e-05, "loss": 0.7763327956199646, "step": 4664 }, { "epoch": 0.38202868286091574, "grad_norm": 0.2299795299768448, "learning_rate": 1.915965845057535e-05, "loss": 0.9015088081359863, "step": 4665 }, { "epoch": 0.3821105753974347, "grad_norm": 0.17065173387527466, "learning_rate": 1.9157650601901454e-05, "loss": 0.8723979592323303, "step": 4666 }, { "epoch": 0.3821924679339537, "grad_norm": 0.1794518083333969, "learning_rate": 1.9155642442126585e-05, "loss": 1.0887919664382935, "step": 4667 }, { "epoch": 0.38227436047047264, "grad_norm": 0.41670772433280945, "learning_rate": 1.9153633971338027e-05, "loss": 0.6485389471054077, "step": 4668 }, { "epoch": 0.38235625300699155, "grad_norm": 0.1808953881263733, "learning_rate": 1.915162518962307e-05, "loss": 0.7988380789756775, "step": 4669 }, { "epoch": 0.3824381455435105, "grad_norm": 0.18424618244171143, "learning_rate": 1.9149616097069028e-05, "loss": 1.096545934677124, "step": 4670 }, { "epoch": 0.3825200380800295, "grad_norm": 0.19670920073986053, "learning_rate": 1.9147606693763222e-05, "loss": 0.7661637663841248, "step": 4671 }, { "epoch": 0.38260193061654846, "grad_norm": 0.1931157261133194, "learning_rate": 1.914559697979299e-05, "loss": 1.1491210460662842, "step": 4672 }, { "epoch": 0.38268382315306737, "grad_norm": 0.20335866510868073, "learning_rate": 1.9143586955245685e-05, "loss": 0.9681087136268616, "step": 4673 }, { "epoch": 0.38276571568958634, "grad_norm": 0.18682818114757538, "learning_rate": 1.9141576620208665e-05, "loss": 0.642471194267273, "step": 4674 }, { "epoch": 0.3828476082261053, "grad_norm": 0.22111478447914124, "learning_rate": 1.9139565974769314e-05, "loss": 1.2741645574569702, "step": 4675 }, { "epoch": 0.3829295007626242, "grad_norm": 0.19419409334659576, "learning_rate": 1.913755501901502e-05, "loss": 0.922049343585968, "step": 4676 }, { "epoch": 0.3830113932991432, "grad_norm": 0.15282124280929565, "learning_rate": 1.9135543753033186e-05, "loss": 0.9016849398612976, "step": 4677 }, { "epoch": 0.38309328583566216, "grad_norm": 0.19066882133483887, "learning_rate": 1.913353217691123e-05, "loss": 0.6194227337837219, "step": 4678 }, { "epoch": 0.3831751783721811, "grad_norm": 0.18405954539775848, "learning_rate": 1.913152029073659e-05, "loss": 0.8733087182044983, "step": 4679 }, { "epoch": 0.38325707090870004, "grad_norm": 0.2081102877855301, "learning_rate": 1.91295080945967e-05, "loss": 1.0447919368743896, "step": 4680 }, { "epoch": 0.383338963445219, "grad_norm": 0.1806064248085022, "learning_rate": 1.912749558857903e-05, "loss": 0.8776271343231201, "step": 4681 }, { "epoch": 0.383420855981738, "grad_norm": 0.1844988465309143, "learning_rate": 1.912548277277104e-05, "loss": 0.9014125466346741, "step": 4682 }, { "epoch": 0.38350274851825694, "grad_norm": 0.157343789935112, "learning_rate": 1.9123469647260223e-05, "loss": 0.8187368512153625, "step": 4683 }, { "epoch": 0.38358464105477585, "grad_norm": 0.2054157257080078, "learning_rate": 1.9121456212134075e-05, "loss": 0.9915117025375366, "step": 4684 }, { "epoch": 0.3836665335912948, "grad_norm": 0.17829643189907074, "learning_rate": 1.9119442467480116e-05, "loss": 0.9117102026939392, "step": 4685 }, { "epoch": 0.3837484261278138, "grad_norm": 0.18098647892475128, "learning_rate": 1.9117428413385855e-05, "loss": 1.1551471948623657, "step": 4686 }, { "epoch": 0.3838303186643327, "grad_norm": 0.16925470530986786, "learning_rate": 1.911541404993884e-05, "loss": 0.6812540292739868, "step": 4687 }, { "epoch": 0.38391221120085167, "grad_norm": 0.23048144578933716, "learning_rate": 1.9113399377226634e-05, "loss": 1.3978283405303955, "step": 4688 }, { "epoch": 0.38399410373737064, "grad_norm": 0.2023683488368988, "learning_rate": 1.9111384395336783e-05, "loss": 0.897840142250061, "step": 4689 }, { "epoch": 0.3840759962738896, "grad_norm": 0.16986438632011414, "learning_rate": 1.9109369104356884e-05, "loss": 0.540365993976593, "step": 4690 }, { "epoch": 0.3841578888104085, "grad_norm": 0.2009609192609787, "learning_rate": 1.9107353504374517e-05, "loss": 1.0243202447891235, "step": 4691 }, { "epoch": 0.3842397813469275, "grad_norm": 0.1699782907962799, "learning_rate": 1.9105337595477297e-05, "loss": 0.7374356389045715, "step": 4692 }, { "epoch": 0.38432167388344646, "grad_norm": 0.17886433005332947, "learning_rate": 1.9103321377752837e-05, "loss": 0.9126213788986206, "step": 4693 }, { "epoch": 0.3844035664199654, "grad_norm": 0.17297737300395966, "learning_rate": 1.9101304851288777e-05, "loss": 0.576627254486084, "step": 4694 }, { "epoch": 0.38448545895648434, "grad_norm": 0.20151056349277496, "learning_rate": 1.9099288016172758e-05, "loss": 0.6510732173919678, "step": 4695 }, { "epoch": 0.3845673514930033, "grad_norm": 0.18612340092658997, "learning_rate": 1.9097270872492446e-05, "loss": 0.8395634889602661, "step": 4696 }, { "epoch": 0.3846492440295223, "grad_norm": 0.1726856529712677, "learning_rate": 1.9095253420335504e-05, "loss": 0.7868666648864746, "step": 4697 }, { "epoch": 0.3847311365660412, "grad_norm": 0.17768368124961853, "learning_rate": 1.9093235659789627e-05, "loss": 0.7823092341423035, "step": 4698 }, { "epoch": 0.38481302910256016, "grad_norm": 0.21562795341014862, "learning_rate": 1.9091217590942518e-05, "loss": 0.8404867053031921, "step": 4699 }, { "epoch": 0.3848949216390791, "grad_norm": 0.6321312785148621, "learning_rate": 1.908919921388188e-05, "loss": 1.3623120784759521, "step": 4700 }, { "epoch": 0.3849768141755981, "grad_norm": 0.19024516642093658, "learning_rate": 1.908718052869545e-05, "loss": 0.3984249532222748, "step": 4701 }, { "epoch": 0.385058706712117, "grad_norm": 0.17382001876831055, "learning_rate": 1.908516153547096e-05, "loss": 0.777703583240509, "step": 4702 }, { "epoch": 0.385140599248636, "grad_norm": 0.16616584360599518, "learning_rate": 1.9083142234296173e-05, "loss": 0.7914732098579407, "step": 4703 }, { "epoch": 0.38522249178515494, "grad_norm": 0.18576990067958832, "learning_rate": 1.9081122625258847e-05, "loss": 0.7434723377227783, "step": 4704 }, { "epoch": 0.3853043843216739, "grad_norm": 0.1827581822872162, "learning_rate": 1.907910270844677e-05, "loss": 0.790871798992157, "step": 4705 }, { "epoch": 0.3853862768581928, "grad_norm": 0.25097396969795227, "learning_rate": 1.9077082483947732e-05, "loss": 0.5377322435379028, "step": 4706 }, { "epoch": 0.3854681693947118, "grad_norm": 0.18662287294864655, "learning_rate": 1.907506195184954e-05, "loss": 0.8455860018730164, "step": 4707 }, { "epoch": 0.38555006193123076, "grad_norm": 0.1949547678232193, "learning_rate": 1.9073041112240015e-05, "loss": 0.7009474635124207, "step": 4708 }, { "epoch": 0.38563195446774967, "grad_norm": 0.2103237807750702, "learning_rate": 1.907101996520699e-05, "loss": 1.0106903314590454, "step": 4709 }, { "epoch": 0.38571384700426864, "grad_norm": 0.2117520123720169, "learning_rate": 1.9068998510838318e-05, "loss": 1.0766915082931519, "step": 4710 }, { "epoch": 0.3857957395407876, "grad_norm": 0.20286214351654053, "learning_rate": 1.9066976749221855e-05, "loss": 0.6734803915023804, "step": 4711 }, { "epoch": 0.3858776320773066, "grad_norm": 0.16865183413028717, "learning_rate": 1.9064954680445472e-05, "loss": 0.651240885257721, "step": 4712 }, { "epoch": 0.3859595246138255, "grad_norm": 0.1968625783920288, "learning_rate": 1.9062932304597064e-05, "loss": 0.8508142232894897, "step": 4713 }, { "epoch": 0.38604141715034446, "grad_norm": 0.1921038031578064, "learning_rate": 1.9060909621764525e-05, "loss": 1.035552978515625, "step": 4714 }, { "epoch": 0.3861233096868634, "grad_norm": 0.18270500004291534, "learning_rate": 1.9058886632035775e-05, "loss": 0.8478269577026367, "step": 4715 }, { "epoch": 0.3862052022233824, "grad_norm": 0.18830478191375732, "learning_rate": 1.9056863335498738e-05, "loss": 0.8543262481689453, "step": 4716 }, { "epoch": 0.3862870947599013, "grad_norm": 0.1982220858335495, "learning_rate": 1.905483973224135e-05, "loss": 0.832301914691925, "step": 4717 }, { "epoch": 0.3863689872964203, "grad_norm": 0.17266348004341125, "learning_rate": 1.905281582235158e-05, "loss": 0.6463779807090759, "step": 4718 }, { "epoch": 0.38645087983293924, "grad_norm": 0.18379566073417664, "learning_rate": 1.905079160591738e-05, "loss": 0.953780472278595, "step": 4719 }, { "epoch": 0.38653277236945816, "grad_norm": 0.17711329460144043, "learning_rate": 1.9048767083026738e-05, "loss": 0.8314529061317444, "step": 4720 }, { "epoch": 0.3866146649059771, "grad_norm": 0.16410169005393982, "learning_rate": 1.9046742253767646e-05, "loss": 0.8281599283218384, "step": 4721 }, { "epoch": 0.3866965574424961, "grad_norm": 0.17608238756656647, "learning_rate": 1.9044717118228114e-05, "loss": 0.8168250322341919, "step": 4722 }, { "epoch": 0.38677844997901506, "grad_norm": 0.20266146957874298, "learning_rate": 1.904269167649616e-05, "loss": 0.751883864402771, "step": 4723 }, { "epoch": 0.386860342515534, "grad_norm": 0.17009034752845764, "learning_rate": 1.9040665928659818e-05, "loss": 0.5345026254653931, "step": 4724 }, { "epoch": 0.38694223505205294, "grad_norm": 0.19455698132514954, "learning_rate": 1.9038639874807135e-05, "loss": 0.8817864656448364, "step": 4725 }, { "epoch": 0.3870241275885719, "grad_norm": 0.18844927847385406, "learning_rate": 1.903661351502618e-05, "loss": 1.080299973487854, "step": 4726 }, { "epoch": 0.3871060201250909, "grad_norm": 0.18124818801879883, "learning_rate": 1.903458684940501e-05, "loss": 1.2788214683532715, "step": 4727 }, { "epoch": 0.3871879126616098, "grad_norm": 0.17130951583385468, "learning_rate": 1.9032559878031727e-05, "loss": 0.8856240510940552, "step": 4728 }, { "epoch": 0.38726980519812876, "grad_norm": 0.17440466582775116, "learning_rate": 1.9030532600994428e-05, "loss": 1.0971019268035889, "step": 4729 }, { "epoch": 0.3873516977346477, "grad_norm": 0.1842314451932907, "learning_rate": 1.9028505018381224e-05, "loss": 0.704480767250061, "step": 4730 }, { "epoch": 0.38743359027116664, "grad_norm": 0.17579196393489838, "learning_rate": 1.9026477130280246e-05, "loss": 0.9885772466659546, "step": 4731 }, { "epoch": 0.3875154828076856, "grad_norm": 0.180941641330719, "learning_rate": 1.9024448936779627e-05, "loss": 0.8051034212112427, "step": 4732 }, { "epoch": 0.3875973753442046, "grad_norm": 0.15753179788589478, "learning_rate": 1.902242043796753e-05, "loss": 0.557755708694458, "step": 4733 }, { "epoch": 0.38767926788072354, "grad_norm": 0.2828572392463684, "learning_rate": 1.9020391633932116e-05, "loss": 0.7578437924385071, "step": 4734 }, { "epoch": 0.38776116041724246, "grad_norm": 0.24470220506191254, "learning_rate": 1.9018362524761566e-05, "loss": 1.21573805809021, "step": 4735 }, { "epoch": 0.3878430529537614, "grad_norm": 0.19364018738269806, "learning_rate": 1.9016333110544078e-05, "loss": 0.7295354604721069, "step": 4736 }, { "epoch": 0.3879249454902804, "grad_norm": 0.19651104509830475, "learning_rate": 1.901430339136785e-05, "loss": 0.8116263151168823, "step": 4737 }, { "epoch": 0.3880068380267993, "grad_norm": 0.17374886572360992, "learning_rate": 1.901227336732111e-05, "loss": 0.978779137134552, "step": 4738 }, { "epoch": 0.3880887305633183, "grad_norm": 0.1617298573255539, "learning_rate": 1.9010243038492088e-05, "loss": 0.7317852973937988, "step": 4739 }, { "epoch": 0.38817062309983724, "grad_norm": 0.18309368193149567, "learning_rate": 1.900821240496903e-05, "loss": 0.9934594035148621, "step": 4740 }, { "epoch": 0.3882525156363562, "grad_norm": 0.1813001185655594, "learning_rate": 1.9006181466840192e-05, "loss": 0.9230425953865051, "step": 4741 }, { "epoch": 0.3883344081728751, "grad_norm": 0.15505822002887726, "learning_rate": 1.900415022419386e-05, "loss": 0.7632986307144165, "step": 4742 }, { "epoch": 0.3884163007093941, "grad_norm": 0.1734817624092102, "learning_rate": 1.9002118677118302e-05, "loss": 0.8062170147895813, "step": 4743 }, { "epoch": 0.38849819324591306, "grad_norm": 0.18184848129749298, "learning_rate": 1.900008682570183e-05, "loss": 0.8907952308654785, "step": 4744 }, { "epoch": 0.388580085782432, "grad_norm": 0.1758464127779007, "learning_rate": 1.8998054670032756e-05, "loss": 0.7911707758903503, "step": 4745 }, { "epoch": 0.38866197831895094, "grad_norm": 0.19586613774299622, "learning_rate": 1.89960222101994e-05, "loss": 1.026349425315857, "step": 4746 }, { "epoch": 0.3887438708554699, "grad_norm": 0.17799249291419983, "learning_rate": 1.8993989446290104e-05, "loss": 0.5929654240608215, "step": 4747 }, { "epoch": 0.3888257633919889, "grad_norm": 0.1711270809173584, "learning_rate": 1.8991956378393223e-05, "loss": 0.7344236969947815, "step": 4748 }, { "epoch": 0.3889076559285078, "grad_norm": 0.19303074479103088, "learning_rate": 1.898992300659712e-05, "loss": 0.6305198073387146, "step": 4749 }, { "epoch": 0.38898954846502676, "grad_norm": 0.20798610150814056, "learning_rate": 1.8987889330990174e-05, "loss": 0.8729308247566223, "step": 4750 }, { "epoch": 0.3890714410015457, "grad_norm": 0.21643230319023132, "learning_rate": 1.8985855351660776e-05, "loss": 0.9534658789634705, "step": 4751 }, { "epoch": 0.3891533335380647, "grad_norm": 0.1691722720861435, "learning_rate": 1.8983821068697327e-05, "loss": 0.7010846734046936, "step": 4752 }, { "epoch": 0.3892352260745836, "grad_norm": 0.24139350652694702, "learning_rate": 1.8981786482188257e-05, "loss": 0.8685301542282104, "step": 4753 }, { "epoch": 0.3893171186111026, "grad_norm": 0.20303493738174438, "learning_rate": 1.8979751592221988e-05, "loss": 0.9798063039779663, "step": 4754 }, { "epoch": 0.38939901114762154, "grad_norm": 0.1991591602563858, "learning_rate": 1.8977716398886965e-05, "loss": 0.816617488861084, "step": 4755 }, { "epoch": 0.3894809036841405, "grad_norm": 0.16839754581451416, "learning_rate": 1.8975680902271653e-05, "loss": 0.70870041847229, "step": 4756 }, { "epoch": 0.3895627962206594, "grad_norm": 0.17853528261184692, "learning_rate": 1.8973645102464516e-05, "loss": 0.8785358667373657, "step": 4757 }, { "epoch": 0.3896446887571784, "grad_norm": 0.18125510215759277, "learning_rate": 1.8971608999554038e-05, "loss": 0.6509623527526855, "step": 4758 }, { "epoch": 0.38972658129369736, "grad_norm": 0.16728413105010986, "learning_rate": 1.896957259362872e-05, "loss": 0.9230778813362122, "step": 4759 }, { "epoch": 0.3898084738302163, "grad_norm": 0.178557351231575, "learning_rate": 1.8967535884777074e-05, "loss": 0.6155305504798889, "step": 4760 }, { "epoch": 0.38989036636673524, "grad_norm": 0.19686660170555115, "learning_rate": 1.896549887308762e-05, "loss": 0.825183629989624, "step": 4761 }, { "epoch": 0.3899722589032542, "grad_norm": 0.2109401822090149, "learning_rate": 1.8963461558648894e-05, "loss": 0.8403916954994202, "step": 4762 }, { "epoch": 0.3900541514397732, "grad_norm": 0.17190447449684143, "learning_rate": 1.896142394154945e-05, "loss": 0.8841496706008911, "step": 4763 }, { "epoch": 0.3901360439762921, "grad_norm": 0.18996299803256989, "learning_rate": 1.8959386021877853e-05, "loss": 0.8261072039604187, "step": 4764 }, { "epoch": 0.39021793651281106, "grad_norm": 0.1672254055738449, "learning_rate": 1.895734779972267e-05, "loss": 1.0292366743087769, "step": 4765 }, { "epoch": 0.39029982904933, "grad_norm": 0.1517544686794281, "learning_rate": 1.8955309275172503e-05, "loss": 0.5616741180419922, "step": 4766 }, { "epoch": 0.390381721585849, "grad_norm": 0.18326613306999207, "learning_rate": 1.8953270448315943e-05, "loss": 0.7764319181442261, "step": 4767 }, { "epoch": 0.3904636141223679, "grad_norm": 0.18155787885189056, "learning_rate": 1.895123131924161e-05, "loss": 0.9292797446250916, "step": 4768 }, { "epoch": 0.3905455066588869, "grad_norm": 0.21552330255508423, "learning_rate": 1.8949191888038134e-05, "loss": 1.0428134202957153, "step": 4769 }, { "epoch": 0.39062739919540584, "grad_norm": 0.19898544251918793, "learning_rate": 1.8947152154794162e-05, "loss": 0.5966057181358337, "step": 4770 }, { "epoch": 0.39070929173192476, "grad_norm": 0.1756182610988617, "learning_rate": 1.894511211959834e-05, "loss": 0.9428150057792664, "step": 4771 }, { "epoch": 0.3907911842684437, "grad_norm": 0.15361586213111877, "learning_rate": 1.8943071782539343e-05, "loss": 0.9658079743385315, "step": 4772 }, { "epoch": 0.3908730768049627, "grad_norm": 0.17854666709899902, "learning_rate": 1.894103114370585e-05, "loss": 0.7041339874267578, "step": 4773 }, { "epoch": 0.39095496934148166, "grad_norm": 0.19075332581996918, "learning_rate": 1.893899020318655e-05, "loss": 1.0539426803588867, "step": 4774 }, { "epoch": 0.3910368618780006, "grad_norm": 0.17244793474674225, "learning_rate": 1.893694896107016e-05, "loss": 0.6864275932312012, "step": 4775 }, { "epoch": 0.39111875441451954, "grad_norm": 0.1847013384103775, "learning_rate": 1.89349074174454e-05, "loss": 0.798041820526123, "step": 4776 }, { "epoch": 0.3912006469510385, "grad_norm": 0.201450377702713, "learning_rate": 1.8932865572400997e-05, "loss": 0.7214220762252808, "step": 4777 }, { "epoch": 0.3912825394875575, "grad_norm": 0.18268105387687683, "learning_rate": 1.89308234260257e-05, "loss": 1.269468903541565, "step": 4778 }, { "epoch": 0.3913644320240764, "grad_norm": 0.1874200701713562, "learning_rate": 1.8928780978408275e-05, "loss": 1.0264439582824707, "step": 4779 }, { "epoch": 0.39144632456059536, "grad_norm": 0.20627617835998535, "learning_rate": 1.892673822963749e-05, "loss": 0.5321992635726929, "step": 4780 }, { "epoch": 0.39152821709711433, "grad_norm": 0.1619804948568344, "learning_rate": 1.8924695179802135e-05, "loss": 0.5075188279151917, "step": 4781 }, { "epoch": 0.39161010963363324, "grad_norm": 0.188926100730896, "learning_rate": 1.8922651828991003e-05, "loss": 1.024842381477356, "step": 4782 }, { "epoch": 0.3916920021701522, "grad_norm": 0.15851546823978424, "learning_rate": 1.8920608177292908e-05, "loss": 0.6178660988807678, "step": 4783 }, { "epoch": 0.3917738947066712, "grad_norm": 0.18076932430267334, "learning_rate": 1.8918564224796686e-05, "loss": 0.6195169687271118, "step": 4784 }, { "epoch": 0.39185578724319015, "grad_norm": 0.22339071333408356, "learning_rate": 1.8916519971591158e-05, "loss": 0.7771727442741394, "step": 4785 }, { "epoch": 0.39193767977970906, "grad_norm": 0.26517030596733093, "learning_rate": 1.8914475417765193e-05, "loss": 0.6652868986129761, "step": 4786 }, { "epoch": 0.392019572316228, "grad_norm": 0.16020995378494263, "learning_rate": 1.8912430563407642e-05, "loss": 0.8639443516731262, "step": 4787 }, { "epoch": 0.392101464852747, "grad_norm": 0.15542225539684296, "learning_rate": 1.891038540860739e-05, "loss": 0.8990994691848755, "step": 4788 }, { "epoch": 0.39218335738926596, "grad_norm": 0.1965998113155365, "learning_rate": 1.8908339953453326e-05, "loss": 0.6839345097541809, "step": 4789 }, { "epoch": 0.3922652499257849, "grad_norm": 0.19546347856521606, "learning_rate": 1.8906294198034354e-05, "loss": 0.8501741290092468, "step": 4790 }, { "epoch": 0.39234714246230384, "grad_norm": 0.24416853487491608, "learning_rate": 1.8904248142439394e-05, "loss": 1.0741746425628662, "step": 4791 }, { "epoch": 0.3924290349988228, "grad_norm": 0.2053661048412323, "learning_rate": 1.890220178675737e-05, "loss": 0.8482263684272766, "step": 4792 }, { "epoch": 0.3925109275353417, "grad_norm": 0.1658380627632141, "learning_rate": 1.890015513107723e-05, "loss": 1.1489648818969727, "step": 4793 }, { "epoch": 0.3925928200718607, "grad_norm": 0.18956169486045837, "learning_rate": 1.8898108175487927e-05, "loss": 0.9440225958824158, "step": 4794 }, { "epoch": 0.39267471260837966, "grad_norm": 0.2088089883327484, "learning_rate": 1.8896060920078433e-05, "loss": 0.7082890868186951, "step": 4795 }, { "epoch": 0.39275660514489863, "grad_norm": 0.18618738651275635, "learning_rate": 1.889401336493773e-05, "loss": 0.7174160480499268, "step": 4796 }, { "epoch": 0.39283849768141754, "grad_norm": 0.2095162570476532, "learning_rate": 1.889196551015481e-05, "loss": 0.8309505581855774, "step": 4797 }, { "epoch": 0.3929203902179365, "grad_norm": 0.20121712982654572, "learning_rate": 1.888991735581868e-05, "loss": 0.8457232713699341, "step": 4798 }, { "epoch": 0.3930022827544555, "grad_norm": 0.21801941096782684, "learning_rate": 1.888786890201837e-05, "loss": 0.7142794132232666, "step": 4799 }, { "epoch": 0.3930841752909744, "grad_norm": 0.20744477212429047, "learning_rate": 1.8885820148842902e-05, "loss": 0.896957278251648, "step": 4800 }, { "epoch": 0.39316606782749336, "grad_norm": 0.1974206119775772, "learning_rate": 1.8883771096381336e-05, "loss": 1.000659465789795, "step": 4801 }, { "epoch": 0.39324796036401233, "grad_norm": 0.15925173461437225, "learning_rate": 1.888172174472272e-05, "loss": 0.7107816934585571, "step": 4802 }, { "epoch": 0.3933298529005313, "grad_norm": 0.15333260595798492, "learning_rate": 1.8879672093956142e-05, "loss": 0.7949489951133728, "step": 4803 }, { "epoch": 0.3934117454370502, "grad_norm": 0.19944031536579132, "learning_rate": 1.8877622144170673e-05, "loss": 0.9496010541915894, "step": 4804 }, { "epoch": 0.3934936379735692, "grad_norm": 0.19722013175487518, "learning_rate": 1.887557189545542e-05, "loss": 0.5283668041229248, "step": 4805 }, { "epoch": 0.39357553051008815, "grad_norm": 0.1944478154182434, "learning_rate": 1.8873521347899493e-05, "loss": 0.8928956985473633, "step": 4806 }, { "epoch": 0.3936574230466071, "grad_norm": 0.2396780103445053, "learning_rate": 1.887147050159202e-05, "loss": 1.3415393829345703, "step": 4807 }, { "epoch": 0.393739315583126, "grad_norm": 0.22172248363494873, "learning_rate": 1.8869419356622133e-05, "loss": 0.6701005101203918, "step": 4808 }, { "epoch": 0.393821208119645, "grad_norm": 0.18499638140201569, "learning_rate": 1.8867367913078994e-05, "loss": 0.6846961975097656, "step": 4809 }, { "epoch": 0.39390310065616396, "grad_norm": 0.15962941944599152, "learning_rate": 1.886531617105176e-05, "loss": 0.82304847240448, "step": 4810 }, { "epoch": 0.3939849931926829, "grad_norm": 0.1600765585899353, "learning_rate": 1.8863264130629602e-05, "loss": 0.8350710272789001, "step": 4811 }, { "epoch": 0.39406688572920184, "grad_norm": 0.16347472369670868, "learning_rate": 1.8861211791901726e-05, "loss": 1.1845346689224243, "step": 4812 }, { "epoch": 0.3941487782657208, "grad_norm": 0.2014016956090927, "learning_rate": 1.885915915495732e-05, "loss": 1.119883418083191, "step": 4813 }, { "epoch": 0.3942306708022398, "grad_norm": 0.1755836457014084, "learning_rate": 1.8857106219885612e-05, "loss": 0.8172536492347717, "step": 4814 }, { "epoch": 0.3943125633387587, "grad_norm": 0.17551127076148987, "learning_rate": 1.885505298677582e-05, "loss": 1.0441190004348755, "step": 4815 }, { "epoch": 0.39439445587527766, "grad_norm": 0.17074929177761078, "learning_rate": 1.8852999455717195e-05, "loss": 0.8962393999099731, "step": 4816 }, { "epoch": 0.39447634841179663, "grad_norm": 0.20979388058185577, "learning_rate": 1.8850945626798986e-05, "loss": 0.9057430624961853, "step": 4817 }, { "epoch": 0.3945582409483156, "grad_norm": 0.22108319401741028, "learning_rate": 1.8848891500110465e-05, "loss": 0.6513183116912842, "step": 4818 }, { "epoch": 0.3946401334848345, "grad_norm": 0.22447112202644348, "learning_rate": 1.884683707574091e-05, "loss": 0.9307804703712463, "step": 4819 }, { "epoch": 0.3947220260213535, "grad_norm": 0.19502268731594086, "learning_rate": 1.8844782353779614e-05, "loss": 0.6807905435562134, "step": 4820 }, { "epoch": 0.39480391855787245, "grad_norm": 0.1621302217245102, "learning_rate": 1.8842727334315887e-05, "loss": 0.8481922149658203, "step": 4821 }, { "epoch": 0.39488581109439136, "grad_norm": 0.1866615116596222, "learning_rate": 1.8840672017439047e-05, "loss": 1.0245755910873413, "step": 4822 }, { "epoch": 0.3949677036309103, "grad_norm": 0.1874581277370453, "learning_rate": 1.8838616403238427e-05, "loss": 0.7871371507644653, "step": 4823 }, { "epoch": 0.3950495961674293, "grad_norm": 0.16598296165466309, "learning_rate": 1.883656049180337e-05, "loss": 0.7117646932601929, "step": 4824 }, { "epoch": 0.39513148870394826, "grad_norm": 0.2599584758281708, "learning_rate": 1.8834504283223238e-05, "loss": 0.8345670700073242, "step": 4825 }, { "epoch": 0.3952133812404672, "grad_norm": 0.1995057612657547, "learning_rate": 1.88324477775874e-05, "loss": 0.7166072726249695, "step": 4826 }, { "epoch": 0.39529527377698614, "grad_norm": 0.1992579698562622, "learning_rate": 1.8830390974985244e-05, "loss": 0.6348736882209778, "step": 4827 }, { "epoch": 0.3953771663135051, "grad_norm": 0.21752823889255524, "learning_rate": 1.8828333875506157e-05, "loss": 0.8026854991912842, "step": 4828 }, { "epoch": 0.3954590588500241, "grad_norm": 0.19239723682403564, "learning_rate": 1.882627647923956e-05, "loss": 1.0187113285064697, "step": 4829 }, { "epoch": 0.395540951386543, "grad_norm": 0.17154355347156525, "learning_rate": 1.8824218786274872e-05, "loss": 0.5761831998825073, "step": 4830 }, { "epoch": 0.39562284392306196, "grad_norm": 0.17544662952423096, "learning_rate": 1.882216079670153e-05, "loss": 0.6359361410140991, "step": 4831 }, { "epoch": 0.39570473645958093, "grad_norm": 0.19478167593479156, "learning_rate": 1.882010251060898e-05, "loss": 0.9323430061340332, "step": 4832 }, { "epoch": 0.39578662899609984, "grad_norm": 0.22067810595035553, "learning_rate": 1.881804392808668e-05, "loss": 0.7781283855438232, "step": 4833 }, { "epoch": 0.3958685215326188, "grad_norm": 0.19773642718791962, "learning_rate": 1.8815985049224113e-05, "loss": 1.1480196714401245, "step": 4834 }, { "epoch": 0.3959504140691378, "grad_norm": 0.19072774052619934, "learning_rate": 1.8813925874110766e-05, "loss": 0.6420587301254272, "step": 4835 }, { "epoch": 0.39603230660565675, "grad_norm": 0.18430760502815247, "learning_rate": 1.8811866402836136e-05, "loss": 0.8467979431152344, "step": 4836 }, { "epoch": 0.39611419914217566, "grad_norm": 0.17710460722446442, "learning_rate": 1.8809806635489732e-05, "loss": 0.6361441612243652, "step": 4837 }, { "epoch": 0.39619609167869463, "grad_norm": 0.1872129589319229, "learning_rate": 1.8807746572161085e-05, "loss": 0.5884761214256287, "step": 4838 }, { "epoch": 0.3962779842152136, "grad_norm": 0.17545996606349945, "learning_rate": 1.8805686212939733e-05, "loss": 1.0176620483398438, "step": 4839 }, { "epoch": 0.39635987675173256, "grad_norm": 0.18100392818450928, "learning_rate": 1.880362555791523e-05, "loss": 0.8562783598899841, "step": 4840 }, { "epoch": 0.3964417692882515, "grad_norm": 0.15924102067947388, "learning_rate": 1.8801564607177138e-05, "loss": 0.672008216381073, "step": 4841 }, { "epoch": 0.39652366182477045, "grad_norm": 0.19947659969329834, "learning_rate": 1.8799503360815033e-05, "loss": 0.7233729958534241, "step": 4842 }, { "epoch": 0.3966055543612894, "grad_norm": 0.18508578836917877, "learning_rate": 1.8797441818918507e-05, "loss": 1.0334047079086304, "step": 4843 }, { "epoch": 0.3966874468978083, "grad_norm": 0.17228274047374725, "learning_rate": 1.8795379981577164e-05, "loss": 0.5747145414352417, "step": 4844 }, { "epoch": 0.3967693394343273, "grad_norm": 0.1550668627023697, "learning_rate": 1.8793317848880616e-05, "loss": 1.0177005529403687, "step": 4845 }, { "epoch": 0.39685123197084626, "grad_norm": 0.22186262905597687, "learning_rate": 1.8791255420918496e-05, "loss": 0.7089474201202393, "step": 4846 }, { "epoch": 0.39693312450736523, "grad_norm": 0.29002153873443604, "learning_rate": 1.8789192697780447e-05, "loss": 1.2491763830184937, "step": 4847 }, { "epoch": 0.39701501704388414, "grad_norm": 0.1888546496629715, "learning_rate": 1.8787129679556117e-05, "loss": 0.7779920101165771, "step": 4848 }, { "epoch": 0.3970969095804031, "grad_norm": 0.21422606706619263, "learning_rate": 1.878506636633518e-05, "loss": 1.074955940246582, "step": 4849 }, { "epoch": 0.3971788021169221, "grad_norm": 0.19364796578884125, "learning_rate": 1.8783002758207308e-05, "loss": 0.6064584255218506, "step": 4850 }, { "epoch": 0.39726069465344105, "grad_norm": 0.17778833210468292, "learning_rate": 1.8780938855262203e-05, "loss": 1.032112717628479, "step": 4851 }, { "epoch": 0.39734258718995996, "grad_norm": 0.2084885835647583, "learning_rate": 1.877887465758957e-05, "loss": 0.8352402448654175, "step": 4852 }, { "epoch": 0.39742447972647893, "grad_norm": 0.15326200425624847, "learning_rate": 1.877681016527912e-05, "loss": 0.6935020089149475, "step": 4853 }, { "epoch": 0.3975063722629979, "grad_norm": 0.19249418377876282, "learning_rate": 1.8774745378420587e-05, "loss": 0.7612571120262146, "step": 4854 }, { "epoch": 0.3975882647995168, "grad_norm": 0.22246062755584717, "learning_rate": 1.8772680297103717e-05, "loss": 1.0527467727661133, "step": 4855 }, { "epoch": 0.3976701573360358, "grad_norm": 0.19392932951450348, "learning_rate": 1.877061492141827e-05, "loss": 0.985914945602417, "step": 4856 }, { "epoch": 0.39775204987255475, "grad_norm": 0.19676290452480316, "learning_rate": 1.876854925145401e-05, "loss": 1.0880986452102661, "step": 4857 }, { "epoch": 0.3978339424090737, "grad_norm": 0.19492173194885254, "learning_rate": 1.8766483287300725e-05, "loss": 0.8911033868789673, "step": 4858 }, { "epoch": 0.39791583494559263, "grad_norm": 0.20369091629981995, "learning_rate": 1.87644170290482e-05, "loss": 0.6007199883460999, "step": 4859 }, { "epoch": 0.3979977274821116, "grad_norm": 0.19511395692825317, "learning_rate": 1.876235047678626e-05, "loss": 0.8877812623977661, "step": 4860 }, { "epoch": 0.39807962001863056, "grad_norm": 0.18075543642044067, "learning_rate": 1.876028363060471e-05, "loss": 0.954290509223938, "step": 4861 }, { "epoch": 0.39816151255514953, "grad_norm": 0.18347269296646118, "learning_rate": 1.8758216490593392e-05, "loss": 0.9476075172424316, "step": 4862 }, { "epoch": 0.39824340509166845, "grad_norm": 0.29638779163360596, "learning_rate": 1.875614905684215e-05, "loss": 0.7463136911392212, "step": 4863 }, { "epoch": 0.3983252976281874, "grad_norm": 0.201890230178833, "learning_rate": 1.8754081329440843e-05, "loss": 1.0599281787872314, "step": 4864 }, { "epoch": 0.3984071901647064, "grad_norm": 0.18756718933582306, "learning_rate": 1.875201330847934e-05, "loss": 0.9535694122314453, "step": 4865 }, { "epoch": 0.3984890827012253, "grad_norm": 0.17451325058937073, "learning_rate": 1.8749944994047535e-05, "loss": 0.9142393469810486, "step": 4866 }, { "epoch": 0.39857097523774426, "grad_norm": 0.16986796259880066, "learning_rate": 1.874787638623532e-05, "loss": 0.7894538044929504, "step": 4867 }, { "epoch": 0.39865286777426323, "grad_norm": 0.1724662333726883, "learning_rate": 1.87458074851326e-05, "loss": 0.6923216581344604, "step": 4868 }, { "epoch": 0.3987347603107822, "grad_norm": 0.18401780724525452, "learning_rate": 1.8743738290829307e-05, "loss": 1.099830985069275, "step": 4869 }, { "epoch": 0.3988166528473011, "grad_norm": 0.1732831746339798, "learning_rate": 1.8741668803415368e-05, "loss": 0.976685643196106, "step": 4870 }, { "epoch": 0.3988985453838201, "grad_norm": 0.20462198555469513, "learning_rate": 1.873959902298074e-05, "loss": 0.8694584369659424, "step": 4871 }, { "epoch": 0.39898043792033905, "grad_norm": 0.20911945402622223, "learning_rate": 1.8737528949615378e-05, "loss": 1.0574796199798584, "step": 4872 }, { "epoch": 0.39906233045685796, "grad_norm": 0.16782048344612122, "learning_rate": 1.873545858340926e-05, "loss": 0.9791169166564941, "step": 4873 }, { "epoch": 0.39914422299337693, "grad_norm": 0.16992226243019104, "learning_rate": 1.8733387924452365e-05, "loss": 0.8472546339035034, "step": 4874 }, { "epoch": 0.3992261155298959, "grad_norm": 0.1652415543794632, "learning_rate": 1.8731316972834703e-05, "loss": 0.5662095546722412, "step": 4875 }, { "epoch": 0.39930800806641487, "grad_norm": 0.1790178269147873, "learning_rate": 1.872924572864628e-05, "loss": 0.48817360401153564, "step": 4876 }, { "epoch": 0.3993899006029338, "grad_norm": 0.20772720873355865, "learning_rate": 1.8727174191977122e-05, "loss": 0.9243919849395752, "step": 4877 }, { "epoch": 0.39947179313945275, "grad_norm": 0.167026087641716, "learning_rate": 1.8725102362917267e-05, "loss": 0.7697576284408569, "step": 4878 }, { "epoch": 0.3995536856759717, "grad_norm": 0.2021031528711319, "learning_rate": 1.8723030241556763e-05, "loss": 0.9909370541572571, "step": 4879 }, { "epoch": 0.3996355782124907, "grad_norm": 0.18447256088256836, "learning_rate": 1.8720957827985675e-05, "loss": 0.8108887672424316, "step": 4880 }, { "epoch": 0.3997174707490096, "grad_norm": 0.16770540177822113, "learning_rate": 1.8718885122294082e-05, "loss": 0.8009738922119141, "step": 4881 }, { "epoch": 0.39979936328552856, "grad_norm": 0.15608267486095428, "learning_rate": 1.871681212457206e-05, "loss": 0.4319993257522583, "step": 4882 }, { "epoch": 0.39988125582204753, "grad_norm": 0.19232289493083954, "learning_rate": 1.8714738834909727e-05, "loss": 0.6601657271385193, "step": 4883 }, { "epoch": 0.39996314835856644, "grad_norm": 0.19573992490768433, "learning_rate": 1.8712665253397186e-05, "loss": 1.050797700881958, "step": 4884 }, { "epoch": 0.4000450408950854, "grad_norm": 0.20168934762477875, "learning_rate": 1.8710591380124564e-05, "loss": 1.1181604862213135, "step": 4885 }, { "epoch": 0.4001269334316044, "grad_norm": 0.1804691106081009, "learning_rate": 1.8708517215182004e-05, "loss": 0.9934780597686768, "step": 4886 }, { "epoch": 0.40020882596812335, "grad_norm": 0.18792091310024261, "learning_rate": 1.8706442758659654e-05, "loss": 0.9834442138671875, "step": 4887 }, { "epoch": 0.40029071850464226, "grad_norm": 0.5209029316902161, "learning_rate": 1.8704368010647678e-05, "loss": 1.3392693996429443, "step": 4888 }, { "epoch": 0.40037261104116123, "grad_norm": 0.18379555642604828, "learning_rate": 1.870229297123626e-05, "loss": 0.7264520525932312, "step": 4889 }, { "epoch": 0.4004545035776802, "grad_norm": 0.17947925627231598, "learning_rate": 1.8700217640515582e-05, "loss": 0.8933248519897461, "step": 4890 }, { "epoch": 0.40053639611419917, "grad_norm": 0.20687562227249146, "learning_rate": 1.8698142018575844e-05, "loss": 0.6400417685508728, "step": 4891 }, { "epoch": 0.4006182886507181, "grad_norm": 0.17644000053405762, "learning_rate": 1.8696066105507273e-05, "loss": 1.0227482318878174, "step": 4892 }, { "epoch": 0.40070018118723705, "grad_norm": 0.19244930148124695, "learning_rate": 1.8693989901400086e-05, "loss": 1.3238481283187866, "step": 4893 }, { "epoch": 0.400782073723756, "grad_norm": 0.19676131010055542, "learning_rate": 1.8691913406344533e-05, "loss": 0.8338460922241211, "step": 4894 }, { "epoch": 0.40086396626027493, "grad_norm": 0.17388290166854858, "learning_rate": 1.8689836620430855e-05, "loss": 0.8799936771392822, "step": 4895 }, { "epoch": 0.4009458587967939, "grad_norm": 0.1800016611814499, "learning_rate": 1.868775954374933e-05, "loss": 0.6964021325111389, "step": 4896 }, { "epoch": 0.40102775133331287, "grad_norm": 0.20619326829910278, "learning_rate": 1.8685682176390224e-05, "loss": 1.0306931734085083, "step": 4897 }, { "epoch": 0.40110964386983183, "grad_norm": 0.27117466926574707, "learning_rate": 1.868360451844384e-05, "loss": 0.8637059926986694, "step": 4898 }, { "epoch": 0.40119153640635075, "grad_norm": 0.1894320249557495, "learning_rate": 1.868152657000047e-05, "loss": 1.009373664855957, "step": 4899 }, { "epoch": 0.4012734289428697, "grad_norm": 0.1927165985107422, "learning_rate": 1.8679448331150442e-05, "loss": 0.8266853094100952, "step": 4900 }, { "epoch": 0.4013553214793887, "grad_norm": 0.1999220848083496, "learning_rate": 1.8677369801984076e-05, "loss": 0.8183196187019348, "step": 4901 }, { "epoch": 0.40143721401590765, "grad_norm": 0.1815393567085266, "learning_rate": 1.8675290982591717e-05, "loss": 0.6955007910728455, "step": 4902 }, { "epoch": 0.40151910655242656, "grad_norm": 0.21682676672935486, "learning_rate": 1.8673211873063716e-05, "loss": 1.1195605993270874, "step": 4903 }, { "epoch": 0.40160099908894553, "grad_norm": 0.16370873153209686, "learning_rate": 1.8671132473490448e-05, "loss": 0.8337005972862244, "step": 4904 }, { "epoch": 0.4016828916254645, "grad_norm": 0.14623259007930756, "learning_rate": 1.866905278396228e-05, "loss": 0.6407696604728699, "step": 4905 }, { "epoch": 0.4017647841619834, "grad_norm": 0.16124309599399567, "learning_rate": 1.8666972804569613e-05, "loss": 0.6979441046714783, "step": 4906 }, { "epoch": 0.4018466766985024, "grad_norm": 0.17841477692127228, "learning_rate": 1.8664892535402848e-05, "loss": 0.7008439898490906, "step": 4907 }, { "epoch": 0.40192856923502135, "grad_norm": 0.15273445844650269, "learning_rate": 1.8662811976552403e-05, "loss": 0.541215717792511, "step": 4908 }, { "epoch": 0.4020104617715403, "grad_norm": 0.17031942307949066, "learning_rate": 1.866073112810871e-05, "loss": 0.8181204199790955, "step": 4909 }, { "epoch": 0.40209235430805923, "grad_norm": 0.16850562393665314, "learning_rate": 1.8658649990162205e-05, "loss": 0.7695010900497437, "step": 4910 }, { "epoch": 0.4021742468445782, "grad_norm": 0.18291139602661133, "learning_rate": 1.8656568562803347e-05, "loss": 1.1851997375488281, "step": 4911 }, { "epoch": 0.40225613938109717, "grad_norm": 0.16229744255542755, "learning_rate": 1.8654486846122604e-05, "loss": 0.8543393611907959, "step": 4912 }, { "epoch": 0.40233803191761613, "grad_norm": 0.18266651034355164, "learning_rate": 1.8652404840210455e-05, "loss": 0.6875848174095154, "step": 4913 }, { "epoch": 0.40241992445413505, "grad_norm": 0.19899767637252808, "learning_rate": 1.8650322545157388e-05, "loss": 0.8558641672134399, "step": 4914 }, { "epoch": 0.402501816990654, "grad_norm": 0.1952270269393921, "learning_rate": 1.8648239961053918e-05, "loss": 0.7933339476585388, "step": 4915 }, { "epoch": 0.402583709527173, "grad_norm": 0.18190136551856995, "learning_rate": 1.8646157087990555e-05, "loss": 0.976254403591156, "step": 4916 }, { "epoch": 0.4026656020636919, "grad_norm": 0.1763625293970108, "learning_rate": 1.864407392605783e-05, "loss": 0.9067272543907166, "step": 4917 }, { "epoch": 0.40274749460021086, "grad_norm": 0.17689669132232666, "learning_rate": 1.8641990475346292e-05, "loss": 0.8053146004676819, "step": 4918 }, { "epoch": 0.40282938713672983, "grad_norm": 0.211796835064888, "learning_rate": 1.8639906735946488e-05, "loss": 0.9738070964813232, "step": 4919 }, { "epoch": 0.4029112796732488, "grad_norm": 0.17949987947940826, "learning_rate": 1.8637822707948992e-05, "loss": 0.8623453974723816, "step": 4920 }, { "epoch": 0.4029931722097677, "grad_norm": 0.17677032947540283, "learning_rate": 1.863573839144438e-05, "loss": 0.9536089897155762, "step": 4921 }, { "epoch": 0.4030750647462867, "grad_norm": 0.18126145005226135, "learning_rate": 1.8633653786523247e-05, "loss": 0.7138976454734802, "step": 4922 }, { "epoch": 0.40315695728280565, "grad_norm": 0.17649222910404205, "learning_rate": 1.8631568893276198e-05, "loss": 0.7391239404678345, "step": 4923 }, { "epoch": 0.4032388498193246, "grad_norm": 0.1705232858657837, "learning_rate": 1.862948371179385e-05, "loss": 0.9113979339599609, "step": 4924 }, { "epoch": 0.40332074235584353, "grad_norm": 0.1746300309896469, "learning_rate": 1.8627398242166837e-05, "loss": 0.8079941272735596, "step": 4925 }, { "epoch": 0.4034026348923625, "grad_norm": 0.18022660911083221, "learning_rate": 1.8625312484485802e-05, "loss": 1.0175292491912842, "step": 4926 }, { "epoch": 0.40348452742888147, "grad_norm": 0.20292019844055176, "learning_rate": 1.8623226438841396e-05, "loss": 0.9352402687072754, "step": 4927 }, { "epoch": 0.4035664199654004, "grad_norm": 0.19311539828777313, "learning_rate": 1.8621140105324288e-05, "loss": 0.7845450043678284, "step": 4928 }, { "epoch": 0.40364831250191935, "grad_norm": 0.22226370871067047, "learning_rate": 1.8619053484025167e-05, "loss": 0.8938077092170715, "step": 4929 }, { "epoch": 0.4037302050384383, "grad_norm": 0.19201205670833588, "learning_rate": 1.861696657503472e-05, "loss": 0.7553132772445679, "step": 4930 }, { "epoch": 0.4038120975749573, "grad_norm": 0.19559839367866516, "learning_rate": 1.861487937844364e-05, "loss": 1.0512011051177979, "step": 4931 }, { "epoch": 0.4038939901114762, "grad_norm": 0.1607467383146286, "learning_rate": 1.861279189434267e-05, "loss": 0.6537278294563293, "step": 4932 }, { "epoch": 0.40397588264799517, "grad_norm": 0.20775189995765686, "learning_rate": 1.8610704122822524e-05, "loss": 0.6359946131706238, "step": 4933 }, { "epoch": 0.40405777518451413, "grad_norm": 0.21767739951610565, "learning_rate": 1.860861606397395e-05, "loss": 0.8081493377685547, "step": 4934 }, { "epoch": 0.40413966772103305, "grad_norm": 0.1786140650510788, "learning_rate": 1.86065277178877e-05, "loss": 1.2672450542449951, "step": 4935 }, { "epoch": 0.404221560257552, "grad_norm": 0.2459297925233841, "learning_rate": 1.8604439084654547e-05, "loss": 0.7733134627342224, "step": 4936 }, { "epoch": 0.404303452794071, "grad_norm": 0.20963795483112335, "learning_rate": 1.860235016436527e-05, "loss": 0.8020097017288208, "step": 4937 }, { "epoch": 0.40438534533058995, "grad_norm": 0.21937990188598633, "learning_rate": 1.8600260957110665e-05, "loss": 0.5839344263076782, "step": 4938 }, { "epoch": 0.40446723786710886, "grad_norm": 0.16929015517234802, "learning_rate": 1.859817146298153e-05, "loss": 0.9136120080947876, "step": 4939 }, { "epoch": 0.40454913040362783, "grad_norm": 0.16853730380535126, "learning_rate": 1.8596081682068688e-05, "loss": 0.883001446723938, "step": 4940 }, { "epoch": 0.4046310229401468, "grad_norm": 0.18358813226222992, "learning_rate": 1.859399161446297e-05, "loss": 0.6945838332176208, "step": 4941 }, { "epoch": 0.40471291547666577, "grad_norm": 0.2062784880399704, "learning_rate": 1.8591901260255217e-05, "loss": 0.7648672461509705, "step": 4942 }, { "epoch": 0.4047948080131847, "grad_norm": 0.19497710466384888, "learning_rate": 1.8589810619536286e-05, "loss": 0.6843498349189758, "step": 4943 }, { "epoch": 0.40487670054970365, "grad_norm": 0.1966712474822998, "learning_rate": 1.8587719692397044e-05, "loss": 0.7911531329154968, "step": 4944 }, { "epoch": 0.4049585930862226, "grad_norm": 0.1806543469429016, "learning_rate": 1.8585628478928366e-05, "loss": 1.0327527523040771, "step": 4945 }, { "epoch": 0.40504048562274153, "grad_norm": 0.18884876370429993, "learning_rate": 1.858353697922116e-05, "loss": 0.9346252679824829, "step": 4946 }, { "epoch": 0.4051223781592605, "grad_norm": 0.1834551841020584, "learning_rate": 1.8581445193366313e-05, "loss": 0.8489859104156494, "step": 4947 }, { "epoch": 0.40520427069577947, "grad_norm": 0.1718529760837555, "learning_rate": 1.8579353121454755e-05, "loss": 0.8416170477867126, "step": 4948 }, { "epoch": 0.40528616323229844, "grad_norm": 0.20401811599731445, "learning_rate": 1.857726076357741e-05, "loss": 0.9367818832397461, "step": 4949 }, { "epoch": 0.40536805576881735, "grad_norm": 0.2048729956150055, "learning_rate": 1.8575168119825225e-05, "loss": 0.9329394102096558, "step": 4950 }, { "epoch": 0.4054499483053363, "grad_norm": 0.19837123155593872, "learning_rate": 1.857307519028915e-05, "loss": 1.1233603954315186, "step": 4951 }, { "epoch": 0.4055318408418553, "grad_norm": 0.18282349407672882, "learning_rate": 1.8570981975060155e-05, "loss": 0.6810227036476135, "step": 4952 }, { "epoch": 0.40561373337837425, "grad_norm": 0.15881504118442535, "learning_rate": 1.856888847422922e-05, "loss": 0.726540744304657, "step": 4953 }, { "epoch": 0.40569562591489317, "grad_norm": 0.22410526871681213, "learning_rate": 1.856679468788734e-05, "loss": 0.9761060476303101, "step": 4954 }, { "epoch": 0.40577751845141213, "grad_norm": 0.1948442906141281, "learning_rate": 1.8564700616125513e-05, "loss": 0.8350445032119751, "step": 4955 }, { "epoch": 0.4058594109879311, "grad_norm": 0.1629355400800705, "learning_rate": 1.8562606259034754e-05, "loss": 0.8301382064819336, "step": 4956 }, { "epoch": 0.40594130352445, "grad_norm": 0.16960804164409637, "learning_rate": 1.8560511616706108e-05, "loss": 0.7986688017845154, "step": 4957 }, { "epoch": 0.406023196060969, "grad_norm": 0.1749076098203659, "learning_rate": 1.85584166892306e-05, "loss": 0.8358690738677979, "step": 4958 }, { "epoch": 0.40610508859748795, "grad_norm": 0.16591525077819824, "learning_rate": 1.8556321476699292e-05, "loss": 0.7329019904136658, "step": 4959 }, { "epoch": 0.4061869811340069, "grad_norm": 0.1741303652524948, "learning_rate": 1.8554225979203248e-05, "loss": 0.6649019718170166, "step": 4960 }, { "epoch": 0.40626887367052583, "grad_norm": 0.1892394870519638, "learning_rate": 1.8552130196833547e-05, "loss": 0.8816577196121216, "step": 4961 }, { "epoch": 0.4063507662070448, "grad_norm": 0.199670672416687, "learning_rate": 1.8550034129681286e-05, "loss": 0.8123025298118591, "step": 4962 }, { "epoch": 0.40643265874356377, "grad_norm": 0.19663968682289124, "learning_rate": 1.854793777783756e-05, "loss": 0.9683864712715149, "step": 4963 }, { "epoch": 0.40651455128008274, "grad_norm": 0.21916139125823975, "learning_rate": 1.8545841141393492e-05, "loss": 0.884762704372406, "step": 4964 }, { "epoch": 0.40659644381660165, "grad_norm": 0.20365190505981445, "learning_rate": 1.8543744220440206e-05, "loss": 0.9140419960021973, "step": 4965 }, { "epoch": 0.4066783363531206, "grad_norm": 0.22361786663532257, "learning_rate": 1.8541647015068846e-05, "loss": 1.1997884511947632, "step": 4966 }, { "epoch": 0.4067602288896396, "grad_norm": 0.2001107633113861, "learning_rate": 1.853954952537056e-05, "loss": 0.769279956817627, "step": 4967 }, { "epoch": 0.4068421214261585, "grad_norm": 0.19657249748706818, "learning_rate": 1.853745175143652e-05, "loss": 0.7329741716384888, "step": 4968 }, { "epoch": 0.40692401396267747, "grad_norm": 0.2194504588842392, "learning_rate": 1.85353536933579e-05, "loss": 0.9167615175247192, "step": 4969 }, { "epoch": 0.40700590649919643, "grad_norm": 0.19511324167251587, "learning_rate": 1.8533255351225892e-05, "loss": 0.9569521546363831, "step": 4970 }, { "epoch": 0.4070877990357154, "grad_norm": 0.18368768692016602, "learning_rate": 1.85311567251317e-05, "loss": 0.924453616142273, "step": 4971 }, { "epoch": 0.4071696915722343, "grad_norm": 0.1920098513364792, "learning_rate": 1.8529057815166535e-05, "loss": 0.9231491088867188, "step": 4972 }, { "epoch": 0.4072515841087533, "grad_norm": 0.20510846376419067, "learning_rate": 1.852695862142162e-05, "loss": 0.7535781860351562, "step": 4973 }, { "epoch": 0.40733347664527225, "grad_norm": 0.2008444368839264, "learning_rate": 1.852485914398821e-05, "loss": 0.9512158632278442, "step": 4974 }, { "epoch": 0.4074153691817912, "grad_norm": 0.17124027013778687, "learning_rate": 1.8522759382957547e-05, "loss": 0.7773808836936951, "step": 4975 }, { "epoch": 0.40749726171831013, "grad_norm": 0.18196497857570648, "learning_rate": 1.8520659338420892e-05, "loss": 0.8907803893089294, "step": 4976 }, { "epoch": 0.4075791542548291, "grad_norm": 0.1609843522310257, "learning_rate": 1.8518559010469528e-05, "loss": 1.0434606075286865, "step": 4977 }, { "epoch": 0.40766104679134807, "grad_norm": 0.178365558385849, "learning_rate": 1.851645839919474e-05, "loss": 0.8374481797218323, "step": 4978 }, { "epoch": 0.407742939327867, "grad_norm": 0.15459269285202026, "learning_rate": 1.851435750468783e-05, "loss": 0.7163717150688171, "step": 4979 }, { "epoch": 0.40782483186438595, "grad_norm": 0.2024887651205063, "learning_rate": 1.8512256327040114e-05, "loss": 0.6893372535705566, "step": 4980 }, { "epoch": 0.4079067244009049, "grad_norm": 0.18016675114631653, "learning_rate": 1.8510154866342915e-05, "loss": 0.45104050636291504, "step": 4981 }, { "epoch": 0.4079886169374239, "grad_norm": 0.18132303655147552, "learning_rate": 1.850805312268757e-05, "loss": 0.6311023235321045, "step": 4982 }, { "epoch": 0.4080705094739428, "grad_norm": 0.17496518790721893, "learning_rate": 1.8505951096165434e-05, "loss": 0.9249585270881653, "step": 4983 }, { "epoch": 0.40815240201046177, "grad_norm": 0.17304642498493195, "learning_rate": 1.850384878686787e-05, "loss": 0.7712814211845398, "step": 4984 }, { "epoch": 0.40823429454698074, "grad_norm": 0.22274519503116608, "learning_rate": 1.8501746194886244e-05, "loss": 0.6546491980552673, "step": 4985 }, { "epoch": 0.4083161870834997, "grad_norm": 0.24287113547325134, "learning_rate": 1.8499643320311956e-05, "loss": 0.8947634100914001, "step": 4986 }, { "epoch": 0.4083980796200186, "grad_norm": 0.18471942842006683, "learning_rate": 1.8497540163236395e-05, "loss": 0.9118187427520752, "step": 4987 }, { "epoch": 0.4084799721565376, "grad_norm": 0.21892130374908447, "learning_rate": 1.849543672375098e-05, "loss": 1.0773539543151855, "step": 4988 }, { "epoch": 0.40856186469305655, "grad_norm": 0.19448097050189972, "learning_rate": 1.849333300194713e-05, "loss": 0.8315266370773315, "step": 4989 }, { "epoch": 0.40864375722957547, "grad_norm": 0.18658170104026794, "learning_rate": 1.8491228997916285e-05, "loss": 1.0829107761383057, "step": 4990 }, { "epoch": 0.40872564976609443, "grad_norm": 0.21908164024353027, "learning_rate": 1.848912471174989e-05, "loss": 0.9035988450050354, "step": 4991 }, { "epoch": 0.4088075423026134, "grad_norm": 0.1565021425485611, "learning_rate": 1.8487020143539412e-05, "loss": 0.7623292207717896, "step": 4992 }, { "epoch": 0.40888943483913237, "grad_norm": 0.1700085550546646, "learning_rate": 1.848491529337632e-05, "loss": 1.0076258182525635, "step": 4993 }, { "epoch": 0.4089713273756513, "grad_norm": 0.1764584481716156, "learning_rate": 1.8482810161352095e-05, "loss": 1.2546398639678955, "step": 4994 }, { "epoch": 0.40905321991217025, "grad_norm": 0.13372963666915894, "learning_rate": 1.8480704747558247e-05, "loss": 0.6615644693374634, "step": 4995 }, { "epoch": 0.4091351124486892, "grad_norm": 0.1648058444261551, "learning_rate": 1.8478599052086272e-05, "loss": 1.0163546800613403, "step": 4996 }, { "epoch": 0.4092170049852082, "grad_norm": 0.2006029635667801, "learning_rate": 1.84764930750277e-05, "loss": 1.329639196395874, "step": 4997 }, { "epoch": 0.4092988975217271, "grad_norm": 0.1914386749267578, "learning_rate": 1.8474386816474068e-05, "loss": 0.6329330801963806, "step": 4998 }, { "epoch": 0.40938079005824607, "grad_norm": 0.17491298913955688, "learning_rate": 1.8472280276516916e-05, "loss": 1.0190798044204712, "step": 4999 }, { "epoch": 0.40946268259476504, "grad_norm": 0.1948796510696411, "learning_rate": 1.8470173455247806e-05, "loss": 0.9776408672332764, "step": 5000 }, { "epoch": 0.40954457513128395, "grad_norm": 0.1562151461839676, "learning_rate": 1.846806635275831e-05, "loss": 0.9279755353927612, "step": 5001 }, { "epoch": 0.4096264676678029, "grad_norm": 0.1939138025045395, "learning_rate": 1.8465958969140008e-05, "loss": 0.7593216896057129, "step": 5002 }, { "epoch": 0.4097083602043219, "grad_norm": 0.17962585389614105, "learning_rate": 1.8463851304484503e-05, "loss": 0.8716633319854736, "step": 5003 }, { "epoch": 0.40979025274084085, "grad_norm": 0.20440377295017242, "learning_rate": 1.846174335888339e-05, "loss": 1.3017947673797607, "step": 5004 }, { "epoch": 0.40987214527735977, "grad_norm": 0.2181839495897293, "learning_rate": 1.84596351324283e-05, "loss": 0.6828992962837219, "step": 5005 }, { "epoch": 0.40995403781387874, "grad_norm": 0.19064843654632568, "learning_rate": 1.8457526625210867e-05, "loss": 0.7747812271118164, "step": 5006 }, { "epoch": 0.4100359303503977, "grad_norm": 0.1992618441581726, "learning_rate": 1.8455417837322724e-05, "loss": 0.8848176002502441, "step": 5007 }, { "epoch": 0.4101178228869166, "grad_norm": 0.2140759378671646, "learning_rate": 1.8453308768855533e-05, "loss": 1.458719253540039, "step": 5008 }, { "epoch": 0.4101997154234356, "grad_norm": 0.1962522268295288, "learning_rate": 1.8451199419900972e-05, "loss": 0.8300123810768127, "step": 5009 }, { "epoch": 0.41028160795995455, "grad_norm": 0.15152139961719513, "learning_rate": 1.8449089790550706e-05, "loss": 0.6351271867752075, "step": 5010 }, { "epoch": 0.4103635004964735, "grad_norm": 0.22866128385066986, "learning_rate": 1.8446979880896437e-05, "loss": 0.7984527349472046, "step": 5011 }, { "epoch": 0.41044539303299243, "grad_norm": 0.16357088088989258, "learning_rate": 1.8444869691029874e-05, "loss": 0.8507169485092163, "step": 5012 }, { "epoch": 0.4105272855695114, "grad_norm": 0.18283525109291077, "learning_rate": 1.844275922104272e-05, "loss": 1.0137220621109009, "step": 5013 }, { "epoch": 0.41060917810603037, "grad_norm": 0.17136558890342712, "learning_rate": 1.8440648471026727e-05, "loss": 0.794072687625885, "step": 5014 }, { "epoch": 0.41069107064254934, "grad_norm": 0.1886119395494461, "learning_rate": 1.8438537441073614e-05, "loss": 0.8772546052932739, "step": 5015 }, { "epoch": 0.41077296317906825, "grad_norm": 0.1752847284078598, "learning_rate": 1.8436426131275148e-05, "loss": 0.895545244216919, "step": 5016 }, { "epoch": 0.4108548557155872, "grad_norm": 0.17248259484767914, "learning_rate": 1.84343145417231e-05, "loss": 0.953667402267456, "step": 5017 }, { "epoch": 0.4109367482521062, "grad_norm": 0.17045527696609497, "learning_rate": 1.8432202672509234e-05, "loss": 0.9176346659660339, "step": 5018 }, { "epoch": 0.4110186407886251, "grad_norm": 0.1872190684080124, "learning_rate": 1.8430090523725348e-05, "loss": 0.91285640001297, "step": 5019 }, { "epoch": 0.41110053332514407, "grad_norm": 0.18455931544303894, "learning_rate": 1.8427978095463243e-05, "loss": 0.7770208120346069, "step": 5020 }, { "epoch": 0.41118242586166304, "grad_norm": 0.15735073387622833, "learning_rate": 1.842586538781474e-05, "loss": 0.6984356641769409, "step": 5021 }, { "epoch": 0.411264318398182, "grad_norm": 0.1825786828994751, "learning_rate": 1.8423752400871654e-05, "loss": 0.833448052406311, "step": 5022 }, { "epoch": 0.4113462109347009, "grad_norm": 0.17885322868824005, "learning_rate": 1.8421639134725835e-05, "loss": 0.6901897192001343, "step": 5023 }, { "epoch": 0.4114281034712199, "grad_norm": 0.18414925038814545, "learning_rate": 1.841952558946913e-05, "loss": 0.7179949283599854, "step": 5024 }, { "epoch": 0.41150999600773885, "grad_norm": 0.17916403710842133, "learning_rate": 1.8417411765193408e-05, "loss": 1.1610106229782104, "step": 5025 }, { "epoch": 0.4115918885442578, "grad_norm": 0.18813838064670563, "learning_rate": 1.841529766199053e-05, "loss": 0.8597434759140015, "step": 5026 }, { "epoch": 0.41167378108077674, "grad_norm": 0.1834438592195511, "learning_rate": 1.8413183279952397e-05, "loss": 0.8743181824684143, "step": 5027 }, { "epoch": 0.4117556736172957, "grad_norm": 0.18378406763076782, "learning_rate": 1.8411068619170906e-05, "loss": 0.8814966082572937, "step": 5028 }, { "epoch": 0.41183756615381467, "grad_norm": 0.18118485808372498, "learning_rate": 1.8408953679737968e-05, "loss": 0.9799172282218933, "step": 5029 }, { "epoch": 0.4119194586903336, "grad_norm": 0.16234096884727478, "learning_rate": 1.8406838461745505e-05, "loss": 0.8437620997428894, "step": 5030 }, { "epoch": 0.41200135122685255, "grad_norm": 0.1751297265291214, "learning_rate": 1.8404722965285456e-05, "loss": 0.6059399247169495, "step": 5031 }, { "epoch": 0.4120832437633715, "grad_norm": 0.181343674659729, "learning_rate": 1.8402607190449766e-05, "loss": 0.7560007572174072, "step": 5032 }, { "epoch": 0.4121651362998905, "grad_norm": 0.16960038244724274, "learning_rate": 1.84004911373304e-05, "loss": 0.7235829830169678, "step": 5033 }, { "epoch": 0.4122470288364094, "grad_norm": 0.16263945400714874, "learning_rate": 1.8398374806019323e-05, "loss": 0.6315873265266418, "step": 5034 }, { "epoch": 0.41232892137292837, "grad_norm": 0.16230539977550507, "learning_rate": 1.8396258196608524e-05, "loss": 0.940045952796936, "step": 5035 }, { "epoch": 0.41241081390944734, "grad_norm": 0.17629235982894897, "learning_rate": 1.8394141309190003e-05, "loss": 0.7686565518379211, "step": 5036 }, { "epoch": 0.4124927064459663, "grad_norm": 0.14415593445301056, "learning_rate": 1.8392024143855764e-05, "loss": 0.6679525971412659, "step": 5037 }, { "epoch": 0.4125745989824852, "grad_norm": 0.22276516258716583, "learning_rate": 1.8389906700697832e-05, "loss": 0.7229623794555664, "step": 5038 }, { "epoch": 0.4126564915190042, "grad_norm": 0.19961333274841309, "learning_rate": 1.8387788979808232e-05, "loss": 0.7958537936210632, "step": 5039 }, { "epoch": 0.41273838405552316, "grad_norm": 0.1747620701789856, "learning_rate": 1.8385670981279017e-05, "loss": 0.9156761169433594, "step": 5040 }, { "epoch": 0.41282027659204207, "grad_norm": 0.20118634402751923, "learning_rate": 1.8383552705202237e-05, "loss": 0.9581171870231628, "step": 5041 }, { "epoch": 0.41290216912856104, "grad_norm": 0.1832311451435089, "learning_rate": 1.8381434151669966e-05, "loss": 0.8034359216690063, "step": 5042 }, { "epoch": 0.41298406166508, "grad_norm": 0.17819732427597046, "learning_rate": 1.8379315320774284e-05, "loss": 1.0520960092544556, "step": 5043 }, { "epoch": 0.413065954201599, "grad_norm": 0.15707549452781677, "learning_rate": 1.8377196212607286e-05, "loss": 0.8873629570007324, "step": 5044 }, { "epoch": 0.4131478467381179, "grad_norm": 0.14208269119262695, "learning_rate": 1.837507682726107e-05, "loss": 0.8027006983757019, "step": 5045 }, { "epoch": 0.41322973927463685, "grad_norm": 0.21803416311740875, "learning_rate": 1.837295716482776e-05, "loss": 1.0342291593551636, "step": 5046 }, { "epoch": 0.4133116318111558, "grad_norm": 0.1716996133327484, "learning_rate": 1.8370837225399484e-05, "loss": 0.7716652750968933, "step": 5047 }, { "epoch": 0.4133935243476748, "grad_norm": 0.17768065631389618, "learning_rate": 1.836871700906838e-05, "loss": 0.9595019221305847, "step": 5048 }, { "epoch": 0.4134754168841937, "grad_norm": 0.1855856031179428, "learning_rate": 1.8366596515926606e-05, "loss": 0.9740675687789917, "step": 5049 }, { "epoch": 0.41355730942071267, "grad_norm": 0.20895196497440338, "learning_rate": 1.8364475746066327e-05, "loss": 1.0079102516174316, "step": 5050 }, { "epoch": 0.41363920195723164, "grad_norm": 0.20975518226623535, "learning_rate": 1.836235469957972e-05, "loss": 0.8894243836402893, "step": 5051 }, { "epoch": 0.41372109449375055, "grad_norm": 0.19174417853355408, "learning_rate": 1.8360233376558967e-05, "loss": 0.8962051868438721, "step": 5052 }, { "epoch": 0.4138029870302695, "grad_norm": 0.1846431940793991, "learning_rate": 1.835811177709628e-05, "loss": 0.6075901389122009, "step": 5053 }, { "epoch": 0.4138848795667885, "grad_norm": 0.18730808794498444, "learning_rate": 1.8355989901283868e-05, "loss": 0.981726348400116, "step": 5054 }, { "epoch": 0.41396677210330746, "grad_norm": 0.198908731341362, "learning_rate": 1.8353867749213958e-05, "loss": 0.7639051675796509, "step": 5055 }, { "epoch": 0.41404866463982637, "grad_norm": 0.15319058299064636, "learning_rate": 1.835174532097878e-05, "loss": 0.7714362144470215, "step": 5056 }, { "epoch": 0.41413055717634534, "grad_norm": 0.19342046976089478, "learning_rate": 1.8349622616670595e-05, "loss": 0.8694853782653809, "step": 5057 }, { "epoch": 0.4142124497128643, "grad_norm": 0.19516195356845856, "learning_rate": 1.8347499636381658e-05, "loss": 0.5231842398643494, "step": 5058 }, { "epoch": 0.4142943422493833, "grad_norm": 0.1792737990617752, "learning_rate": 1.834537638020424e-05, "loss": 0.9029706120491028, "step": 5059 }, { "epoch": 0.4143762347859022, "grad_norm": 0.19359585642814636, "learning_rate": 1.834325284823063e-05, "loss": 0.8035160303115845, "step": 5060 }, { "epoch": 0.41445812732242115, "grad_norm": 0.17062629759311676, "learning_rate": 1.834112904055313e-05, "loss": 0.9138513803482056, "step": 5061 }, { "epoch": 0.4145400198589401, "grad_norm": 0.23629418015480042, "learning_rate": 1.8339004957264042e-05, "loss": 1.0429738759994507, "step": 5062 }, { "epoch": 0.41462191239545904, "grad_norm": 0.2184596061706543, "learning_rate": 1.8336880598455692e-05, "loss": 0.7987755537033081, "step": 5063 }, { "epoch": 0.414703804931978, "grad_norm": 0.20879052579402924, "learning_rate": 1.8334755964220406e-05, "loss": 0.8705446720123291, "step": 5064 }, { "epoch": 0.41478569746849697, "grad_norm": 0.21428747475147247, "learning_rate": 1.8332631054650538e-05, "loss": 0.7276716232299805, "step": 5065 }, { "epoch": 0.41486759000501594, "grad_norm": 0.18660998344421387, "learning_rate": 1.8330505869838446e-05, "loss": 0.7780908942222595, "step": 5066 }, { "epoch": 0.41494948254153485, "grad_norm": 0.201173335313797, "learning_rate": 1.8328380409876493e-05, "loss": 0.9767258167266846, "step": 5067 }, { "epoch": 0.4150313750780538, "grad_norm": 0.1836206167936325, "learning_rate": 1.8326254674857057e-05, "loss": 0.7083567976951599, "step": 5068 }, { "epoch": 0.4151132676145728, "grad_norm": 0.18724898993968964, "learning_rate": 1.8324128664872545e-05, "loss": 0.7906497120857239, "step": 5069 }, { "epoch": 0.4151951601510917, "grad_norm": 0.17939847707748413, "learning_rate": 1.832200238001535e-05, "loss": 0.8438933491706848, "step": 5070 }, { "epoch": 0.41527705268761067, "grad_norm": 0.1575000137090683, "learning_rate": 1.8319875820377897e-05, "loss": 1.034073829650879, "step": 5071 }, { "epoch": 0.41535894522412964, "grad_norm": 0.18241645395755768, "learning_rate": 1.8317748986052606e-05, "loss": 0.7690894603729248, "step": 5072 }, { "epoch": 0.4154408377606486, "grad_norm": 0.17777486145496368, "learning_rate": 1.8315621877131926e-05, "loss": 0.932884693145752, "step": 5073 }, { "epoch": 0.4155227302971675, "grad_norm": 0.1905587762594223, "learning_rate": 1.8313494493708305e-05, "loss": 0.8105591535568237, "step": 5074 }, { "epoch": 0.4156046228336865, "grad_norm": 0.18785883486270905, "learning_rate": 1.8311366835874212e-05, "loss": 0.9248709678649902, "step": 5075 }, { "epoch": 0.41568651537020546, "grad_norm": 0.17590418457984924, "learning_rate": 1.830923890372212e-05, "loss": 0.7090432643890381, "step": 5076 }, { "epoch": 0.4157684079067244, "grad_norm": 0.19362188875675201, "learning_rate": 1.830711069734452e-05, "loss": 0.9690373539924622, "step": 5077 }, { "epoch": 0.41585030044324334, "grad_norm": 0.19288070499897003, "learning_rate": 1.8304982216833915e-05, "loss": 0.9549434781074524, "step": 5078 }, { "epoch": 0.4159321929797623, "grad_norm": 0.17955997586250305, "learning_rate": 1.8302853462282812e-05, "loss": 1.018237829208374, "step": 5079 }, { "epoch": 0.4160140855162813, "grad_norm": 0.18209630250930786, "learning_rate": 1.8300724433783738e-05, "loss": 0.9378691911697388, "step": 5080 }, { "epoch": 0.4160959780528002, "grad_norm": 0.17391470074653625, "learning_rate": 1.8298595131429228e-05, "loss": 0.947184681892395, "step": 5081 }, { "epoch": 0.41617787058931915, "grad_norm": 0.19187791645526886, "learning_rate": 1.8296465555311833e-05, "loss": 1.1741151809692383, "step": 5082 }, { "epoch": 0.4162597631258381, "grad_norm": 0.19200529158115387, "learning_rate": 1.8294335705524114e-05, "loss": 0.744033932685852, "step": 5083 }, { "epoch": 0.4163416556623571, "grad_norm": 0.18049094080924988, "learning_rate": 1.8292205582158634e-05, "loss": 0.6440449953079224, "step": 5084 }, { "epoch": 0.416423548198876, "grad_norm": 0.18438820540905, "learning_rate": 1.829007518530799e-05, "loss": 0.6096867918968201, "step": 5085 }, { "epoch": 0.41650544073539497, "grad_norm": 0.17387816309928894, "learning_rate": 1.828794451506477e-05, "loss": 0.8521718978881836, "step": 5086 }, { "epoch": 0.41658733327191394, "grad_norm": 0.21966509521007538, "learning_rate": 1.828581357152158e-05, "loss": 0.903580367565155, "step": 5087 }, { "epoch": 0.4166692258084329, "grad_norm": 0.1836947500705719, "learning_rate": 1.828368235477105e-05, "loss": 0.8803563117980957, "step": 5088 }, { "epoch": 0.4167511183449518, "grad_norm": 0.1807958036661148, "learning_rate": 1.8281550864905798e-05, "loss": 1.0072920322418213, "step": 5089 }, { "epoch": 0.4168330108814708, "grad_norm": 0.1803923100233078, "learning_rate": 1.8279419102018476e-05, "loss": 0.8528308272361755, "step": 5090 }, { "epoch": 0.41691490341798976, "grad_norm": 0.20516814291477203, "learning_rate": 1.8277287066201737e-05, "loss": 0.9267112016677856, "step": 5091 }, { "epoch": 0.41699679595450867, "grad_norm": 0.19773732125759125, "learning_rate": 1.8275154757548245e-05, "loss": 0.809674084186554, "step": 5092 }, { "epoch": 0.41707868849102764, "grad_norm": 0.1936405599117279, "learning_rate": 1.8273022176150686e-05, "loss": 0.9244376420974731, "step": 5093 }, { "epoch": 0.4171605810275466, "grad_norm": 0.18972590565681458, "learning_rate": 1.8270889322101742e-05, "loss": 0.7975389361381531, "step": 5094 }, { "epoch": 0.4172424735640656, "grad_norm": 0.18869681656360626, "learning_rate": 1.8268756195494123e-05, "loss": 0.9766507744789124, "step": 5095 }, { "epoch": 0.4173243661005845, "grad_norm": 0.21369363367557526, "learning_rate": 1.8266622796420542e-05, "loss": 0.9319756627082825, "step": 5096 }, { "epoch": 0.41740625863710346, "grad_norm": 0.20810289680957794, "learning_rate": 1.8264489124973722e-05, "loss": 1.1251598596572876, "step": 5097 }, { "epoch": 0.4174881511736224, "grad_norm": 0.17675898969173431, "learning_rate": 1.8262355181246406e-05, "loss": 0.7165467739105225, "step": 5098 }, { "epoch": 0.4175700437101414, "grad_norm": 0.16627661883831024, "learning_rate": 1.8260220965331334e-05, "loss": 0.7169197797775269, "step": 5099 }, { "epoch": 0.4176519362466603, "grad_norm": 0.1938011795282364, "learning_rate": 1.825808647732128e-05, "loss": 0.8337584733963013, "step": 5100 }, { "epoch": 0.4177338287831793, "grad_norm": 0.16278977692127228, "learning_rate": 1.8255951717309013e-05, "loss": 0.844434380531311, "step": 5101 }, { "epoch": 0.41781572131969824, "grad_norm": 0.2081368863582611, "learning_rate": 1.825381668538732e-05, "loss": 0.7591282725334167, "step": 5102 }, { "epoch": 0.41789761385621715, "grad_norm": 0.17814123630523682, "learning_rate": 1.825168138164899e-05, "loss": 1.009148120880127, "step": 5103 }, { "epoch": 0.4179795063927361, "grad_norm": 0.1793467104434967, "learning_rate": 1.824954580618684e-05, "loss": 0.8043340444564819, "step": 5104 }, { "epoch": 0.4180613989292551, "grad_norm": 0.16902127861976624, "learning_rate": 1.824740995909369e-05, "loss": 0.5436157584190369, "step": 5105 }, { "epoch": 0.41814329146577406, "grad_norm": 0.20043110847473145, "learning_rate": 1.8245273840462373e-05, "loss": 0.7089431881904602, "step": 5106 }, { "epoch": 0.41822518400229297, "grad_norm": 0.21063454449176788, "learning_rate": 1.8243137450385734e-05, "loss": 0.7714178562164307, "step": 5107 }, { "epoch": 0.41830707653881194, "grad_norm": 0.17908529937267303, "learning_rate": 1.8241000788956625e-05, "loss": 0.6990824937820435, "step": 5108 }, { "epoch": 0.4183889690753309, "grad_norm": 0.1587780863046646, "learning_rate": 1.823886385626792e-05, "loss": 0.8366127610206604, "step": 5109 }, { "epoch": 0.4184708616118499, "grad_norm": 0.19029811024665833, "learning_rate": 1.823672665241249e-05, "loss": 0.8395830392837524, "step": 5110 }, { "epoch": 0.4185527541483688, "grad_norm": 0.17339667677879333, "learning_rate": 1.8234589177483238e-05, "loss": 1.0161033868789673, "step": 5111 }, { "epoch": 0.41863464668488776, "grad_norm": 0.2131277322769165, "learning_rate": 1.823245143157306e-05, "loss": 0.7818050384521484, "step": 5112 }, { "epoch": 0.4187165392214067, "grad_norm": 0.17857001721858978, "learning_rate": 1.8230313414774873e-05, "loss": 0.858956515789032, "step": 5113 }, { "epoch": 0.41879843175792564, "grad_norm": 0.18848836421966553, "learning_rate": 1.8228175127181605e-05, "loss": 0.7782373428344727, "step": 5114 }, { "epoch": 0.4188803242944446, "grad_norm": 0.2088644951581955, "learning_rate": 1.8226036568886196e-05, "loss": 0.6854217648506165, "step": 5115 }, { "epoch": 0.4189622168309636, "grad_norm": 0.17247751355171204, "learning_rate": 1.8223897739981588e-05, "loss": 0.6193524599075317, "step": 5116 }, { "epoch": 0.41904410936748254, "grad_norm": 0.20458747446537018, "learning_rate": 1.8221758640560756e-05, "loss": 0.8224255442619324, "step": 5117 }, { "epoch": 0.41912600190400146, "grad_norm": 0.1811729073524475, "learning_rate": 1.8219619270716666e-05, "loss": 0.7989381551742554, "step": 5118 }, { "epoch": 0.4192078944405204, "grad_norm": 0.15227505564689636, "learning_rate": 1.8217479630542306e-05, "loss": 0.6109708547592163, "step": 5119 }, { "epoch": 0.4192897869770394, "grad_norm": 0.17618891596794128, "learning_rate": 1.8215339720130673e-05, "loss": 1.030480146408081, "step": 5120 }, { "epoch": 0.41937167951355836, "grad_norm": 0.17340907454490662, "learning_rate": 1.8213199539574778e-05, "loss": 0.7121025919914246, "step": 5121 }, { "epoch": 0.4194535720500773, "grad_norm": 0.19075623154640198, "learning_rate": 1.8211059088967635e-05, "loss": 0.9103667736053467, "step": 5122 }, { "epoch": 0.41953546458659624, "grad_norm": 0.18273910880088806, "learning_rate": 1.820891836840229e-05, "loss": 0.9746288657188416, "step": 5123 }, { "epoch": 0.4196173571231152, "grad_norm": 0.20412300527095795, "learning_rate": 1.8206777377971777e-05, "loss": 0.6894181370735168, "step": 5124 }, { "epoch": 0.4196992496596341, "grad_norm": 0.19168631732463837, "learning_rate": 1.8204636117769154e-05, "loss": 0.6336537003517151, "step": 5125 }, { "epoch": 0.4197811421961531, "grad_norm": 0.18541893362998962, "learning_rate": 1.8202494587887494e-05, "loss": 0.8271810412406921, "step": 5126 }, { "epoch": 0.41986303473267206, "grad_norm": 0.16419188678264618, "learning_rate": 1.8200352788419868e-05, "loss": 1.0343014001846313, "step": 5127 }, { "epoch": 0.419944927269191, "grad_norm": 0.1834053099155426, "learning_rate": 1.8198210719459376e-05, "loss": 0.6832797527313232, "step": 5128 }, { "epoch": 0.42002681980570994, "grad_norm": 0.15223999321460724, "learning_rate": 1.819606838109912e-05, "loss": 0.9008648991584778, "step": 5129 }, { "epoch": 0.4201087123422289, "grad_norm": 0.16065111756324768, "learning_rate": 1.8193925773432205e-05, "loss": 0.7688698768615723, "step": 5130 }, { "epoch": 0.4201906048787479, "grad_norm": 0.1646135151386261, "learning_rate": 1.819178289655177e-05, "loss": 0.9256729483604431, "step": 5131 }, { "epoch": 0.42027249741526684, "grad_norm": 0.1854640245437622, "learning_rate": 1.8189639750550947e-05, "loss": 0.9076009392738342, "step": 5132 }, { "epoch": 0.42035438995178576, "grad_norm": 0.15605102479457855, "learning_rate": 1.8187496335522887e-05, "loss": 1.2095659971237183, "step": 5133 }, { "epoch": 0.4204362824883047, "grad_norm": 0.18665140867233276, "learning_rate": 1.8185352651560754e-05, "loss": 0.6672487854957581, "step": 5134 }, { "epoch": 0.4205181750248237, "grad_norm": 0.16953518986701965, "learning_rate": 1.818320869875772e-05, "loss": 0.7401173710823059, "step": 5135 }, { "epoch": 0.4206000675613426, "grad_norm": 0.18107870221138, "learning_rate": 1.8181064477206964e-05, "loss": 0.9411282539367676, "step": 5136 }, { "epoch": 0.4206819600978616, "grad_norm": 0.22464562952518463, "learning_rate": 1.817891998700169e-05, "loss": 1.0147037506103516, "step": 5137 }, { "epoch": 0.42076385263438054, "grad_norm": 0.18624348938465118, "learning_rate": 1.8176775228235105e-05, "loss": 0.776630163192749, "step": 5138 }, { "epoch": 0.4208457451708995, "grad_norm": 0.20334188640117645, "learning_rate": 1.8174630201000426e-05, "loss": 0.8975825905799866, "step": 5139 }, { "epoch": 0.4209276377074184, "grad_norm": 0.20415005087852478, "learning_rate": 1.8172484905390894e-05, "loss": 1.018853783607483, "step": 5140 }, { "epoch": 0.4210095302439374, "grad_norm": 0.1865503340959549, "learning_rate": 1.8170339341499738e-05, "loss": 0.5734263062477112, "step": 5141 }, { "epoch": 0.42109142278045636, "grad_norm": 0.15297000110149384, "learning_rate": 1.816819350942022e-05, "loss": 1.0201913118362427, "step": 5142 }, { "epoch": 0.42117331531697527, "grad_norm": 0.23985448479652405, "learning_rate": 1.8166047409245612e-05, "loss": 0.9153088331222534, "step": 5143 }, { "epoch": 0.42125520785349424, "grad_norm": 0.159892737865448, "learning_rate": 1.8163901041069185e-05, "loss": 0.6670908331871033, "step": 5144 }, { "epoch": 0.4213371003900132, "grad_norm": 0.19944003224372864, "learning_rate": 1.8161754404984237e-05, "loss": 0.7307386994361877, "step": 5145 }, { "epoch": 0.4214189929265322, "grad_norm": 0.1786630004644394, "learning_rate": 1.815960750108406e-05, "loss": 0.8546532988548279, "step": 5146 }, { "epoch": 0.4215008854630511, "grad_norm": 0.17497839033603668, "learning_rate": 1.8157460329461966e-05, "loss": 0.9227883815765381, "step": 5147 }, { "epoch": 0.42158277799957006, "grad_norm": 0.15835429728031158, "learning_rate": 1.815531289021129e-05, "loss": 0.6922209858894348, "step": 5148 }, { "epoch": 0.421664670536089, "grad_norm": 0.18352779746055603, "learning_rate": 1.8153165183425366e-05, "loss": 0.6159884333610535, "step": 5149 }, { "epoch": 0.421746563072608, "grad_norm": 0.15086840093135834, "learning_rate": 1.8151017209197537e-05, "loss": 0.6185681223869324, "step": 5150 }, { "epoch": 0.4218284556091269, "grad_norm": 0.27050885558128357, "learning_rate": 1.814886896762117e-05, "loss": 0.9331297874450684, "step": 5151 }, { "epoch": 0.4219103481456459, "grad_norm": 0.18095380067825317, "learning_rate": 1.8146720458789628e-05, "loss": 0.8188208937644958, "step": 5152 }, { "epoch": 0.42199224068216484, "grad_norm": 0.2031712681055069, "learning_rate": 1.81445716827963e-05, "loss": 0.8433481454849243, "step": 5153 }, { "epoch": 0.42207413321868376, "grad_norm": 0.19730126857757568, "learning_rate": 1.8142422639734577e-05, "loss": 0.6817410588264465, "step": 5154 }, { "epoch": 0.4221560257552027, "grad_norm": 0.20351417362689972, "learning_rate": 1.8140273329697867e-05, "loss": 0.9335296154022217, "step": 5155 }, { "epoch": 0.4222379182917217, "grad_norm": 0.23672421276569366, "learning_rate": 1.8138123752779595e-05, "loss": 0.847127377986908, "step": 5156 }, { "epoch": 0.42231981082824066, "grad_norm": 0.15737968683242798, "learning_rate": 1.813597390907318e-05, "loss": 1.0522080659866333, "step": 5157 }, { "epoch": 0.4224017033647596, "grad_norm": 0.19796104729175568, "learning_rate": 1.813382379867206e-05, "loss": 1.0770142078399658, "step": 5158 }, { "epoch": 0.42248359590127854, "grad_norm": 0.2659813165664673, "learning_rate": 1.8131673421669702e-05, "loss": 0.826482355594635, "step": 5159 }, { "epoch": 0.4225654884377975, "grad_norm": 0.16272585093975067, "learning_rate": 1.812952277815956e-05, "loss": 1.1627418994903564, "step": 5160 }, { "epoch": 0.4226473809743165, "grad_norm": 0.168051615357399, "learning_rate": 1.8127371868235116e-05, "loss": 1.096964955329895, "step": 5161 }, { "epoch": 0.4227292735108354, "grad_norm": 0.16542716324329376, "learning_rate": 1.812522069198985e-05, "loss": 0.7036202549934387, "step": 5162 }, { "epoch": 0.42281116604735436, "grad_norm": 0.18782448768615723, "learning_rate": 1.8123069249517263e-05, "loss": 1.015702724456787, "step": 5163 }, { "epoch": 0.4228930585838733, "grad_norm": 0.19121849536895752, "learning_rate": 1.812091754091087e-05, "loss": 0.7503167390823364, "step": 5164 }, { "epoch": 0.42297495112039224, "grad_norm": 0.17587770521640778, "learning_rate": 1.8118765566264193e-05, "loss": 0.5838730335235596, "step": 5165 }, { "epoch": 0.4230568436569112, "grad_norm": 0.20884041488170624, "learning_rate": 1.8116613325670762e-05, "loss": 0.634531557559967, "step": 5166 }, { "epoch": 0.4231387361934302, "grad_norm": 0.1666855663061142, "learning_rate": 1.811446081922412e-05, "loss": 0.7154521942138672, "step": 5167 }, { "epoch": 0.42322062872994914, "grad_norm": 0.14254380762577057, "learning_rate": 1.8112308047017828e-05, "loss": 0.6127246618270874, "step": 5168 }, { "epoch": 0.42330252126646806, "grad_norm": 0.15958784520626068, "learning_rate": 1.8110155009145457e-05, "loss": 0.8781707286834717, "step": 5169 }, { "epoch": 0.423384413802987, "grad_norm": 0.16516119241714478, "learning_rate": 1.810800170570058e-05, "loss": 1.0303021669387817, "step": 5170 }, { "epoch": 0.423466306339506, "grad_norm": 0.18929176032543182, "learning_rate": 1.8105848136776793e-05, "loss": 0.9102661609649658, "step": 5171 }, { "epoch": 0.42354819887602496, "grad_norm": 0.14856991171836853, "learning_rate": 1.8103694302467698e-05, "loss": 0.766535222530365, "step": 5172 }, { "epoch": 0.4236300914125439, "grad_norm": 0.19249460101127625, "learning_rate": 1.810154020286691e-05, "loss": 1.1857414245605469, "step": 5173 }, { "epoch": 0.42371198394906284, "grad_norm": 0.168783038854599, "learning_rate": 1.8099385838068053e-05, "loss": 0.7672098875045776, "step": 5174 }, { "epoch": 0.4237938764855818, "grad_norm": 0.18971367180347443, "learning_rate": 1.8097231208164766e-05, "loss": 0.7717016339302063, "step": 5175 }, { "epoch": 0.4238757690221007, "grad_norm": 0.18256278336048126, "learning_rate": 1.80950763132507e-05, "loss": 0.900749921798706, "step": 5176 }, { "epoch": 0.4239576615586197, "grad_norm": 0.20790952444076538, "learning_rate": 1.8092921153419512e-05, "loss": 0.8541547060012817, "step": 5177 }, { "epoch": 0.42403955409513866, "grad_norm": 0.1926247924566269, "learning_rate": 1.8090765728764875e-05, "loss": 1.0725033283233643, "step": 5178 }, { "epoch": 0.42412144663165763, "grad_norm": 0.18151718378067017, "learning_rate": 1.8088610039380475e-05, "loss": 0.728358805179596, "step": 5179 }, { "epoch": 0.42420333916817654, "grad_norm": 0.1894334852695465, "learning_rate": 1.8086454085360004e-05, "loss": 0.9969973564147949, "step": 5180 }, { "epoch": 0.4242852317046955, "grad_norm": 0.17966806888580322, "learning_rate": 1.808429786679717e-05, "loss": 0.793081521987915, "step": 5181 }, { "epoch": 0.4243671242412145, "grad_norm": 0.15464387834072113, "learning_rate": 1.808214138378569e-05, "loss": 0.534359335899353, "step": 5182 }, { "epoch": 0.42444901677773345, "grad_norm": 0.16161811351776123, "learning_rate": 1.80799846364193e-05, "loss": 0.654643714427948, "step": 5183 }, { "epoch": 0.42453090931425236, "grad_norm": 0.16348862648010254, "learning_rate": 1.807782762479173e-05, "loss": 0.5545058250427246, "step": 5184 }, { "epoch": 0.4246128018507713, "grad_norm": 0.18262702226638794, "learning_rate": 1.8075670348996746e-05, "loss": 0.6415174007415771, "step": 5185 }, { "epoch": 0.4246946943872903, "grad_norm": 0.1428273618221283, "learning_rate": 1.80735128091281e-05, "loss": 0.6356614828109741, "step": 5186 }, { "epoch": 0.4247765869238092, "grad_norm": 0.18506789207458496, "learning_rate": 1.807135500527957e-05, "loss": 1.241161584854126, "step": 5187 }, { "epoch": 0.4248584794603282, "grad_norm": 0.21634933352470398, "learning_rate": 1.8069196937544952e-05, "loss": 0.7496830821037292, "step": 5188 }, { "epoch": 0.42494037199684714, "grad_norm": 0.19860310852527618, "learning_rate": 1.8067038606018036e-05, "loss": 0.9316244721412659, "step": 5189 }, { "epoch": 0.4250222645333661, "grad_norm": 0.1529722362756729, "learning_rate": 1.806488001079263e-05, "loss": 0.8022573590278625, "step": 5190 }, { "epoch": 0.425104157069885, "grad_norm": 0.24162186682224274, "learning_rate": 1.8062721151962562e-05, "loss": 1.0487141609191895, "step": 5191 }, { "epoch": 0.425186049606404, "grad_norm": 0.2286836802959442, "learning_rate": 1.8060562029621664e-05, "loss": 1.109235405921936, "step": 5192 }, { "epoch": 0.42526794214292296, "grad_norm": 0.19610245525836945, "learning_rate": 1.8058402643863772e-05, "loss": 0.9658858180046082, "step": 5193 }, { "epoch": 0.42534983467944193, "grad_norm": 0.2430948168039322, "learning_rate": 1.8056242994782754e-05, "loss": 1.4655661582946777, "step": 5194 }, { "epoch": 0.42543172721596084, "grad_norm": 0.1682863086462021, "learning_rate": 1.805408308247247e-05, "loss": 0.6658735871315002, "step": 5195 }, { "epoch": 0.4255136197524798, "grad_norm": 0.19950735569000244, "learning_rate": 1.80519229070268e-05, "loss": 0.9865747690200806, "step": 5196 }, { "epoch": 0.4255955122889988, "grad_norm": 0.15903159976005554, "learning_rate": 1.804976246853964e-05, "loss": 0.7308022975921631, "step": 5197 }, { "epoch": 0.4256774048255177, "grad_norm": 0.20391543209552765, "learning_rate": 1.8047601767104878e-05, "loss": 1.0937438011169434, "step": 5198 }, { "epoch": 0.42575929736203666, "grad_norm": 0.2078532576560974, "learning_rate": 1.8045440802816436e-05, "loss": 1.2011927366256714, "step": 5199 }, { "epoch": 0.4258411898985556, "grad_norm": 0.20719796419143677, "learning_rate": 1.8043279575768243e-05, "loss": 0.8820013999938965, "step": 5200 }, { "epoch": 0.4259230824350746, "grad_norm": 0.1719372719526291, "learning_rate": 1.8041118086054223e-05, "loss": 0.7421293258666992, "step": 5201 }, { "epoch": 0.4260049749715935, "grad_norm": 0.17443060874938965, "learning_rate": 1.8038956333768336e-05, "loss": 0.9194854497909546, "step": 5202 }, { "epoch": 0.4260868675081125, "grad_norm": 0.1749100685119629, "learning_rate": 1.803679431900453e-05, "loss": 0.8144937753677368, "step": 5203 }, { "epoch": 0.42616876004463145, "grad_norm": 0.16796676814556122, "learning_rate": 1.8034632041856776e-05, "loss": 0.9969911575317383, "step": 5204 }, { "epoch": 0.42625065258115036, "grad_norm": 0.20621591806411743, "learning_rate": 1.8032469502419066e-05, "loss": 0.8193541169166565, "step": 5205 }, { "epoch": 0.4263325451176693, "grad_norm": 0.18196383118629456, "learning_rate": 1.8030306700785383e-05, "loss": 0.9046236872673035, "step": 5206 }, { "epoch": 0.4264144376541883, "grad_norm": 0.21178656816482544, "learning_rate": 1.8028143637049728e-05, "loss": 0.8544080257415771, "step": 5207 }, { "epoch": 0.42649633019070726, "grad_norm": 0.1592809408903122, "learning_rate": 1.802598031130613e-05, "loss": 0.7195807695388794, "step": 5208 }, { "epoch": 0.4265782227272262, "grad_norm": 0.14795784652233124, "learning_rate": 1.8023816723648604e-05, "loss": 0.8261533975601196, "step": 5209 }, { "epoch": 0.42666011526374514, "grad_norm": 0.18316765129566193, "learning_rate": 1.802165287417119e-05, "loss": 1.1495039463043213, "step": 5210 }, { "epoch": 0.4267420078002641, "grad_norm": 0.1899278163909912, "learning_rate": 1.801948876296795e-05, "loss": 0.6749752163887024, "step": 5211 }, { "epoch": 0.4268239003367831, "grad_norm": 0.17670032382011414, "learning_rate": 1.8017324390132927e-05, "loss": 0.6554186344146729, "step": 5212 }, { "epoch": 0.426905792873302, "grad_norm": 0.20175275206565857, "learning_rate": 1.801515975576021e-05, "loss": 0.5411480069160461, "step": 5213 }, { "epoch": 0.42698768540982096, "grad_norm": 0.2086649239063263, "learning_rate": 1.801299485994387e-05, "loss": 0.9328161478042603, "step": 5214 }, { "epoch": 0.42706957794633993, "grad_norm": 0.17418532073497772, "learning_rate": 1.8010829702778008e-05, "loss": 0.6841269135475159, "step": 5215 }, { "epoch": 0.42715147048285884, "grad_norm": 0.17946818470954895, "learning_rate": 1.8008664284356734e-05, "loss": 0.774822473526001, "step": 5216 }, { "epoch": 0.4272333630193778, "grad_norm": 0.19635938107967377, "learning_rate": 1.8006498604774158e-05, "loss": 1.1369215250015259, "step": 5217 }, { "epoch": 0.4273152555558968, "grad_norm": 0.22370025515556335, "learning_rate": 1.8004332664124416e-05, "loss": 1.0345852375030518, "step": 5218 }, { "epoch": 0.42739714809241575, "grad_norm": 0.22223836183547974, "learning_rate": 1.800216646250165e-05, "loss": 0.8757798075675964, "step": 5219 }, { "epoch": 0.42747904062893466, "grad_norm": 0.16669832170009613, "learning_rate": 1.8e-05, "loss": 0.723968505859375, "step": 5220 }, { "epoch": 0.4275609331654536, "grad_norm": 0.22766569256782532, "learning_rate": 1.7997833276713642e-05, "loss": 0.668658971786499, "step": 5221 }, { "epoch": 0.4276428257019726, "grad_norm": 0.1738009750843048, "learning_rate": 1.799566629273675e-05, "loss": 1.0621371269226074, "step": 5222 }, { "epoch": 0.42772471823849156, "grad_norm": 0.1554383486509323, "learning_rate": 1.7993499048163503e-05, "loss": 0.7264142036437988, "step": 5223 }, { "epoch": 0.4278066107750105, "grad_norm": 0.17040498554706573, "learning_rate": 1.79913315430881e-05, "loss": 0.8015718460083008, "step": 5224 }, { "epoch": 0.42788850331152944, "grad_norm": 0.21791017055511475, "learning_rate": 1.7989163777604756e-05, "loss": 0.9434969425201416, "step": 5225 }, { "epoch": 0.4279703958480484, "grad_norm": 0.2375526875257492, "learning_rate": 1.7986995751807686e-05, "loss": 0.8470356464385986, "step": 5226 }, { "epoch": 0.4280522883845673, "grad_norm": 0.184286966919899, "learning_rate": 1.7984827465791117e-05, "loss": 0.8053234219551086, "step": 5227 }, { "epoch": 0.4281341809210863, "grad_norm": 0.18212683498859406, "learning_rate": 1.79826589196493e-05, "loss": 0.8717324137687683, "step": 5228 }, { "epoch": 0.42821607345760526, "grad_norm": 0.22836649417877197, "learning_rate": 1.798049011347649e-05, "loss": 1.044623613357544, "step": 5229 }, { "epoch": 0.42829796599412423, "grad_norm": 0.17592276632785797, "learning_rate": 1.797832104736694e-05, "loss": 0.6819548606872559, "step": 5230 }, { "epoch": 0.42837985853064314, "grad_norm": 0.20201385021209717, "learning_rate": 1.7976151721414937e-05, "loss": 0.9130160212516785, "step": 5231 }, { "epoch": 0.4284617510671621, "grad_norm": 0.24081097543239594, "learning_rate": 1.7973982135714768e-05, "loss": 0.9813926815986633, "step": 5232 }, { "epoch": 0.4285436436036811, "grad_norm": 0.17714235186576843, "learning_rate": 1.7971812290360732e-05, "loss": 0.773067057132721, "step": 5233 }, { "epoch": 0.42862553614020005, "grad_norm": 0.18605205416679382, "learning_rate": 1.7969642185447133e-05, "loss": 0.7597056031227112, "step": 5234 }, { "epoch": 0.42870742867671896, "grad_norm": 0.22878563404083252, "learning_rate": 1.7967471821068302e-05, "loss": 0.9603187441825867, "step": 5235 }, { "epoch": 0.42878932121323793, "grad_norm": 0.19997872412204742, "learning_rate": 1.7965301197318563e-05, "loss": 0.9045911431312561, "step": 5236 }, { "epoch": 0.4288712137497569, "grad_norm": 0.2051328718662262, "learning_rate": 1.796313031429227e-05, "loss": 1.0697424411773682, "step": 5237 }, { "epoch": 0.4289531062862758, "grad_norm": 0.15966010093688965, "learning_rate": 1.796095917208377e-05, "loss": 0.7443054914474487, "step": 5238 }, { "epoch": 0.4290349988227948, "grad_norm": 0.21561506390571594, "learning_rate": 1.795878777078743e-05, "loss": 0.9222891926765442, "step": 5239 }, { "epoch": 0.42911689135931375, "grad_norm": 0.16359250247478485, "learning_rate": 1.7956616110497637e-05, "loss": 0.7263847589492798, "step": 5240 }, { "epoch": 0.4291987838958327, "grad_norm": 0.18921780586242676, "learning_rate": 1.7954444191308773e-05, "loss": 0.8206148147583008, "step": 5241 }, { "epoch": 0.4292806764323516, "grad_norm": 0.22242067754268646, "learning_rate": 1.7952272013315237e-05, "loss": 1.5340698957443237, "step": 5242 }, { "epoch": 0.4293625689688706, "grad_norm": 0.1659547984600067, "learning_rate": 1.7950099576611445e-05, "loss": 0.644098162651062, "step": 5243 }, { "epoch": 0.42944446150538956, "grad_norm": 0.16177275776863098, "learning_rate": 1.794792688129182e-05, "loss": 0.7638753652572632, "step": 5244 }, { "epoch": 0.42952635404190853, "grad_norm": 0.15325792133808136, "learning_rate": 1.7945753927450796e-05, "loss": 0.7638987302780151, "step": 5245 }, { "epoch": 0.42960824657842744, "grad_norm": 0.14615435898303986, "learning_rate": 1.7943580715182813e-05, "loss": 0.7906946539878845, "step": 5246 }, { "epoch": 0.4296901391149464, "grad_norm": 0.1929064393043518, "learning_rate": 1.7941407244582335e-05, "loss": 0.9481295347213745, "step": 5247 }, { "epoch": 0.4297720316514654, "grad_norm": 0.19395139813423157, "learning_rate": 1.7939233515743826e-05, "loss": 0.9269990921020508, "step": 5248 }, { "epoch": 0.4298539241879843, "grad_norm": 0.19673453271389008, "learning_rate": 1.793705952876177e-05, "loss": 0.9051194190979004, "step": 5249 }, { "epoch": 0.42993581672450326, "grad_norm": 0.19067056477069855, "learning_rate": 1.793488528373065e-05, "loss": 1.0638176202774048, "step": 5250 }, { "epoch": 0.43001770926102223, "grad_norm": 0.19593147933483124, "learning_rate": 1.7932710780744975e-05, "loss": 0.9559282064437866, "step": 5251 }, { "epoch": 0.4300996017975412, "grad_norm": 0.19393077492713928, "learning_rate": 1.793053601989925e-05, "loss": 0.7309958934783936, "step": 5252 }, { "epoch": 0.4301814943340601, "grad_norm": 0.1893109530210495, "learning_rate": 1.7928361001288008e-05, "loss": 0.653713047504425, "step": 5253 }, { "epoch": 0.4302633868705791, "grad_norm": 0.25740256905555725, "learning_rate": 1.792618572500578e-05, "loss": 0.8142497539520264, "step": 5254 }, { "epoch": 0.43034527940709805, "grad_norm": 0.19734449684619904, "learning_rate": 1.7924010191147107e-05, "loss": 1.0365540981292725, "step": 5255 }, { "epoch": 0.430427171943617, "grad_norm": 0.2064865082502365, "learning_rate": 1.7921834399806556e-05, "loss": 0.9764235019683838, "step": 5256 }, { "epoch": 0.43050906448013593, "grad_norm": 0.17908696830272675, "learning_rate": 1.791965835107869e-05, "loss": 0.6567720174789429, "step": 5257 }, { "epoch": 0.4305909570166549, "grad_norm": 0.16605861485004425, "learning_rate": 1.791748204505809e-05, "loss": 0.7692967057228088, "step": 5258 }, { "epoch": 0.43067284955317386, "grad_norm": 0.17985761165618896, "learning_rate": 1.791530548183935e-05, "loss": 0.6868066191673279, "step": 5259 }, { "epoch": 0.4307547420896928, "grad_norm": 0.17758452892303467, "learning_rate": 1.791312866151707e-05, "loss": 0.9222252368927002, "step": 5260 }, { "epoch": 0.43083663462621175, "grad_norm": 0.16254809498786926, "learning_rate": 1.7910951584185865e-05, "loss": 0.5568316578865051, "step": 5261 }, { "epoch": 0.4309185271627307, "grad_norm": 0.18595828115940094, "learning_rate": 1.7908774249940357e-05, "loss": 0.7172431349754333, "step": 5262 }, { "epoch": 0.4310004196992497, "grad_norm": 0.19052202999591827, "learning_rate": 1.7906596658875188e-05, "loss": 0.9660304188728333, "step": 5263 }, { "epoch": 0.4310823122357686, "grad_norm": 0.17811733484268188, "learning_rate": 1.790441881108499e-05, "loss": 0.8613092303276062, "step": 5264 }, { "epoch": 0.43116420477228756, "grad_norm": 0.19627846777439117, "learning_rate": 1.7902240706664442e-05, "loss": 0.8554889559745789, "step": 5265 }, { "epoch": 0.43124609730880653, "grad_norm": 0.17916545271873474, "learning_rate": 1.79000623457082e-05, "loss": 1.122860074043274, "step": 5266 }, { "epoch": 0.4313279898453255, "grad_norm": 0.18732695281505585, "learning_rate": 1.789788372831095e-05, "loss": 0.9140031337738037, "step": 5267 }, { "epoch": 0.4314098823818444, "grad_norm": 0.19233152270317078, "learning_rate": 1.789570485456738e-05, "loss": 0.9495373964309692, "step": 5268 }, { "epoch": 0.4314917749183634, "grad_norm": 0.17256949841976166, "learning_rate": 1.7893525724572192e-05, "loss": 0.8867323994636536, "step": 5269 }, { "epoch": 0.43157366745488235, "grad_norm": 0.21394529938697815, "learning_rate": 1.7891346338420103e-05, "loss": 0.8856604099273682, "step": 5270 }, { "epoch": 0.43165555999140126, "grad_norm": 0.19805340468883514, "learning_rate": 1.788916669620584e-05, "loss": 0.8674989342689514, "step": 5271 }, { "epoch": 0.43173745252792023, "grad_norm": 0.20426052808761597, "learning_rate": 1.7886986798024138e-05, "loss": 0.5844146609306335, "step": 5272 }, { "epoch": 0.4318193450644392, "grad_norm": 0.21376396715641022, "learning_rate": 1.788480664396974e-05, "loss": 0.7621214985847473, "step": 5273 }, { "epoch": 0.43190123760095817, "grad_norm": 0.19605118036270142, "learning_rate": 1.7882626234137408e-05, "loss": 0.8814871907234192, "step": 5274 }, { "epoch": 0.4319831301374771, "grad_norm": 0.22234995663166046, "learning_rate": 1.788044556862191e-05, "loss": 1.3437964916229248, "step": 5275 }, { "epoch": 0.43206502267399605, "grad_norm": 0.1702258288860321, "learning_rate": 1.787826464751803e-05, "loss": 0.7519149780273438, "step": 5276 }, { "epoch": 0.432146915210515, "grad_norm": 0.20419761538505554, "learning_rate": 1.7876083470920557e-05, "loss": 1.1047908067703247, "step": 5277 }, { "epoch": 0.4322288077470339, "grad_norm": 0.16860273480415344, "learning_rate": 1.7873902038924292e-05, "loss": 0.9084750413894653, "step": 5278 }, { "epoch": 0.4323107002835529, "grad_norm": 0.20364035665988922, "learning_rate": 1.7871720351624056e-05, "loss": 0.8341184258460999, "step": 5279 }, { "epoch": 0.43239259282007186, "grad_norm": 0.16052241623401642, "learning_rate": 1.7869538409114667e-05, "loss": 0.7386298179626465, "step": 5280 }, { "epoch": 0.43247448535659083, "grad_norm": 0.19251850247383118, "learning_rate": 1.786735621149096e-05, "loss": 0.7212740182876587, "step": 5281 }, { "epoch": 0.43255637789310974, "grad_norm": 0.1846843957901001, "learning_rate": 1.7865173758847797e-05, "loss": 0.924606442451477, "step": 5282 }, { "epoch": 0.4326382704296287, "grad_norm": 0.19072383642196655, "learning_rate": 1.786299105128002e-05, "loss": 1.2306139469146729, "step": 5283 }, { "epoch": 0.4327201629661477, "grad_norm": 0.18118098378181458, "learning_rate": 1.78608080888825e-05, "loss": 0.679756224155426, "step": 5284 }, { "epoch": 0.43280205550266665, "grad_norm": 0.20475806295871735, "learning_rate": 1.7858624871750125e-05, "loss": 0.6254398822784424, "step": 5285 }, { "epoch": 0.43288394803918556, "grad_norm": 0.1910928636789322, "learning_rate": 1.785644139997778e-05, "loss": 0.7806975245475769, "step": 5286 }, { "epoch": 0.43296584057570453, "grad_norm": 0.2629445791244507, "learning_rate": 1.785425767366038e-05, "loss": 0.6889681816101074, "step": 5287 }, { "epoch": 0.4330477331122235, "grad_norm": 0.226944699883461, "learning_rate": 1.785207369289282e-05, "loss": 0.8932806849479675, "step": 5288 }, { "epoch": 0.4331296256487424, "grad_norm": 0.1821507215499878, "learning_rate": 1.7849889457770035e-05, "loss": 0.6528094410896301, "step": 5289 }, { "epoch": 0.4332115181852614, "grad_norm": 0.1574363261461258, "learning_rate": 1.7847704968386964e-05, "loss": 0.8717193603515625, "step": 5290 }, { "epoch": 0.43329341072178035, "grad_norm": 0.21368974447250366, "learning_rate": 1.7845520224838552e-05, "loss": 0.8764718174934387, "step": 5291 }, { "epoch": 0.4333753032582993, "grad_norm": 0.16383175551891327, "learning_rate": 1.7843335227219747e-05, "loss": 0.7858221530914307, "step": 5292 }, { "epoch": 0.43345719579481823, "grad_norm": 0.19847773015499115, "learning_rate": 1.7841149975625534e-05, "loss": 0.740852952003479, "step": 5293 }, { "epoch": 0.4335390883313372, "grad_norm": 0.1982891857624054, "learning_rate": 1.7838964470150884e-05, "loss": 0.8177998661994934, "step": 5294 }, { "epoch": 0.43362098086785617, "grad_norm": 0.18227092921733856, "learning_rate": 1.7836778710890787e-05, "loss": 0.918961763381958, "step": 5295 }, { "epoch": 0.43370287340437513, "grad_norm": 0.2009030431509018, "learning_rate": 1.7834592697940246e-05, "loss": 0.8913455605506897, "step": 5296 }, { "epoch": 0.43378476594089405, "grad_norm": 0.2048221230506897, "learning_rate": 1.783240643139428e-05, "loss": 0.6601479053497314, "step": 5297 }, { "epoch": 0.433866658477413, "grad_norm": 0.19115330278873444, "learning_rate": 1.7830219911347904e-05, "loss": 0.8063157796859741, "step": 5298 }, { "epoch": 0.433948551013932, "grad_norm": 0.15937402844429016, "learning_rate": 1.782803313789616e-05, "loss": 0.8701393008232117, "step": 5299 }, { "epoch": 0.4340304435504509, "grad_norm": 0.18525800108909607, "learning_rate": 1.782584611113409e-05, "loss": 0.9214959144592285, "step": 5300 }, { "epoch": 0.43411233608696986, "grad_norm": 0.20054341852664948, "learning_rate": 1.7823658831156757e-05, "loss": 0.7140713334083557, "step": 5301 }, { "epoch": 0.43419422862348883, "grad_norm": 0.20579785108566284, "learning_rate": 1.782147129805922e-05, "loss": 1.3348990678787231, "step": 5302 }, { "epoch": 0.4342761211600078, "grad_norm": 0.17758409678936005, "learning_rate": 1.781928351193657e-05, "loss": 0.8599410653114319, "step": 5303 }, { "epoch": 0.4343580136965267, "grad_norm": 0.16102300584316254, "learning_rate": 1.7817095472883887e-05, "loss": 0.8594273328781128, "step": 5304 }, { "epoch": 0.4344399062330457, "grad_norm": 0.18289902806282043, "learning_rate": 1.7814907180996274e-05, "loss": 0.8032743334770203, "step": 5305 }, { "epoch": 0.43452179876956465, "grad_norm": 0.18315783143043518, "learning_rate": 1.781271863636885e-05, "loss": 1.0232023000717163, "step": 5306 }, { "epoch": 0.4346036913060836, "grad_norm": 0.21194827556610107, "learning_rate": 1.7810529839096727e-05, "loss": 0.8434994220733643, "step": 5307 }, { "epoch": 0.43468558384260253, "grad_norm": 0.15675370395183563, "learning_rate": 1.780834078927505e-05, "loss": 1.1023461818695068, "step": 5308 }, { "epoch": 0.4347674763791215, "grad_norm": 0.17738156020641327, "learning_rate": 1.780615148699896e-05, "loss": 0.7819367051124573, "step": 5309 }, { "epoch": 0.43484936891564047, "grad_norm": 0.19961993396282196, "learning_rate": 1.780396193236361e-05, "loss": 0.856561541557312, "step": 5310 }, { "epoch": 0.4349312614521594, "grad_norm": 0.19377334415912628, "learning_rate": 1.780177212546417e-05, "loss": 1.0451115369796753, "step": 5311 }, { "epoch": 0.43501315398867835, "grad_norm": 0.17293603718280792, "learning_rate": 1.7799582066395818e-05, "loss": 0.6771226525306702, "step": 5312 }, { "epoch": 0.4350950465251973, "grad_norm": 0.19175678491592407, "learning_rate": 1.7797391755253745e-05, "loss": 0.770937979221344, "step": 5313 }, { "epoch": 0.4351769390617163, "grad_norm": 0.2649477422237396, "learning_rate": 1.7795201192133146e-05, "loss": 0.7696906328201294, "step": 5314 }, { "epoch": 0.4352588315982352, "grad_norm": 0.20499393343925476, "learning_rate": 1.7793010377129234e-05, "loss": 0.5918949246406555, "step": 5315 }, { "epoch": 0.43534072413475416, "grad_norm": 0.16641917824745178, "learning_rate": 1.7790819310337233e-05, "loss": 0.7682873010635376, "step": 5316 }, { "epoch": 0.43542261667127313, "grad_norm": 0.1669362485408783, "learning_rate": 1.7788627991852374e-05, "loss": 0.7710197567939758, "step": 5317 }, { "epoch": 0.4355045092077921, "grad_norm": 0.19260668754577637, "learning_rate": 1.77864364217699e-05, "loss": 0.8183823227882385, "step": 5318 }, { "epoch": 0.435586401744311, "grad_norm": 0.17928192019462585, "learning_rate": 1.7784244600185065e-05, "loss": 0.8545870184898376, "step": 5319 }, { "epoch": 0.43566829428083, "grad_norm": 0.2326846420764923, "learning_rate": 1.778205252719314e-05, "loss": 0.7696012258529663, "step": 5320 }, { "epoch": 0.43575018681734895, "grad_norm": 0.18279686570167542, "learning_rate": 1.777986020288939e-05, "loss": 0.7747607231140137, "step": 5321 }, { "epoch": 0.43583207935386786, "grad_norm": 0.17589135468006134, "learning_rate": 1.777766762736912e-05, "loss": 0.7990815043449402, "step": 5322 }, { "epoch": 0.43591397189038683, "grad_norm": 0.18682031333446503, "learning_rate": 1.7775474800727613e-05, "loss": 1.2742195129394531, "step": 5323 }, { "epoch": 0.4359958644269058, "grad_norm": 0.19327767193317413, "learning_rate": 1.7773281723060185e-05, "loss": 0.8878986239433289, "step": 5324 }, { "epoch": 0.43607775696342477, "grad_norm": 0.15939031541347504, "learning_rate": 1.7771088394462156e-05, "loss": 0.8430910110473633, "step": 5325 }, { "epoch": 0.4361596494999437, "grad_norm": 0.21649105846881866, "learning_rate": 1.7768894815028853e-05, "loss": 0.8194905519485474, "step": 5326 }, { "epoch": 0.43624154203646265, "grad_norm": 0.18161314725875854, "learning_rate": 1.7766700984855623e-05, "loss": 0.7152822613716125, "step": 5327 }, { "epoch": 0.4363234345729816, "grad_norm": 0.18267078697681427, "learning_rate": 1.7764506904037816e-05, "loss": 0.6546453237533569, "step": 5328 }, { "epoch": 0.4364053271095006, "grad_norm": 0.23010072112083435, "learning_rate": 1.77623125726708e-05, "loss": 1.086508870124817, "step": 5329 }, { "epoch": 0.4364872196460195, "grad_norm": 0.17149338126182556, "learning_rate": 1.7760117990849942e-05, "loss": 0.9295289516448975, "step": 5330 }, { "epoch": 0.43656911218253847, "grad_norm": 0.20218384265899658, "learning_rate": 1.7757923158670638e-05, "loss": 0.9519931077957153, "step": 5331 }, { "epoch": 0.43665100471905743, "grad_norm": 0.17656703293323517, "learning_rate": 1.775572807622827e-05, "loss": 0.6793431043624878, "step": 5332 }, { "epoch": 0.43673289725557635, "grad_norm": 0.1682281643152237, "learning_rate": 1.7753532743618257e-05, "loss": 0.6486058831214905, "step": 5333 }, { "epoch": 0.4368147897920953, "grad_norm": 0.20148693025112152, "learning_rate": 1.7751337160936018e-05, "loss": 0.82770836353302, "step": 5334 }, { "epoch": 0.4368966823286143, "grad_norm": 0.19946426153182983, "learning_rate": 1.774914132827697e-05, "loss": 0.576486349105835, "step": 5335 }, { "epoch": 0.43697857486513325, "grad_norm": 0.19758914411067963, "learning_rate": 1.7746945245736568e-05, "loss": 0.6773539185523987, "step": 5336 }, { "epoch": 0.43706046740165216, "grad_norm": 0.1711030751466751, "learning_rate": 1.7744748913410255e-05, "loss": 0.8180829882621765, "step": 5337 }, { "epoch": 0.43714235993817113, "grad_norm": 0.18911796808242798, "learning_rate": 1.774255233139349e-05, "loss": 0.8503258228302002, "step": 5338 }, { "epoch": 0.4372242524746901, "grad_norm": 0.2244829684495926, "learning_rate": 1.774035549978175e-05, "loss": 1.099334478378296, "step": 5339 }, { "epoch": 0.43730614501120907, "grad_norm": 0.17577069997787476, "learning_rate": 1.7738158418670518e-05, "loss": 0.7109920382499695, "step": 5340 }, { "epoch": 0.437388037547728, "grad_norm": 0.17887598276138306, "learning_rate": 1.7735961088155287e-05, "loss": 0.9608672857284546, "step": 5341 }, { "epoch": 0.43746993008424695, "grad_norm": 0.21850259602069855, "learning_rate": 1.773376350833156e-05, "loss": 1.1691210269927979, "step": 5342 }, { "epoch": 0.4375518226207659, "grad_norm": 0.17714087665081024, "learning_rate": 1.773156567929486e-05, "loss": 0.7391451597213745, "step": 5343 }, { "epoch": 0.43763371515728483, "grad_norm": 0.15036965906620026, "learning_rate": 1.7729367601140705e-05, "loss": 0.685166597366333, "step": 5344 }, { "epoch": 0.4377156076938038, "grad_norm": 0.17373575270175934, "learning_rate": 1.772716927396464e-05, "loss": 1.000604510307312, "step": 5345 }, { "epoch": 0.43779750023032277, "grad_norm": 0.16224631667137146, "learning_rate": 1.7724970697862207e-05, "loss": 0.9363677501678467, "step": 5346 }, { "epoch": 0.43787939276684174, "grad_norm": 0.18499144911766052, "learning_rate": 1.772277187292897e-05, "loss": 0.8159418106079102, "step": 5347 }, { "epoch": 0.43796128530336065, "grad_norm": 0.15708468854427338, "learning_rate": 1.7720572799260497e-05, "loss": 0.6539720296859741, "step": 5348 }, { "epoch": 0.4380431778398796, "grad_norm": 0.17841537296772003, "learning_rate": 1.7718373476952366e-05, "loss": 0.7083767056465149, "step": 5349 }, { "epoch": 0.4381250703763986, "grad_norm": 0.1555212289094925, "learning_rate": 1.7716173906100178e-05, "loss": 0.6411643624305725, "step": 5350 }, { "epoch": 0.4382069629129175, "grad_norm": 0.23363284766674042, "learning_rate": 1.771397408679952e-05, "loss": 1.0165244340896606, "step": 5351 }, { "epoch": 0.43828885544943647, "grad_norm": 0.16776911914348602, "learning_rate": 1.771177401914602e-05, "loss": 0.7910541296005249, "step": 5352 }, { "epoch": 0.43837074798595543, "grad_norm": 0.19235385954380035, "learning_rate": 1.7709573703235295e-05, "loss": 0.7911033630371094, "step": 5353 }, { "epoch": 0.4384526405224744, "grad_norm": 0.1787407398223877, "learning_rate": 1.7707373139162982e-05, "loss": 0.7406383156776428, "step": 5354 }, { "epoch": 0.4385345330589933, "grad_norm": 0.17045576870441437, "learning_rate": 1.7705172327024718e-05, "loss": 0.9196993708610535, "step": 5355 }, { "epoch": 0.4386164255955123, "grad_norm": 0.18317103385925293, "learning_rate": 1.7702971266916176e-05, "loss": 0.8093180656433105, "step": 5356 }, { "epoch": 0.43869831813203125, "grad_norm": 0.18082581460475922, "learning_rate": 1.770076995893301e-05, "loss": 0.9671875238418579, "step": 5357 }, { "epoch": 0.4387802106685502, "grad_norm": 0.16281995177268982, "learning_rate": 1.7698568403170904e-05, "loss": 0.7753483057022095, "step": 5358 }, { "epoch": 0.43886210320506913, "grad_norm": 0.19645413756370544, "learning_rate": 1.769636659972554e-05, "loss": 0.899834394454956, "step": 5359 }, { "epoch": 0.4389439957415881, "grad_norm": 0.18177786469459534, "learning_rate": 1.7694164548692624e-05, "loss": 0.7106770873069763, "step": 5360 }, { "epoch": 0.43902588827810707, "grad_norm": 0.17229938507080078, "learning_rate": 1.7691962250167866e-05, "loss": 0.7530112266540527, "step": 5361 }, { "epoch": 0.439107780814626, "grad_norm": 0.17109821736812592, "learning_rate": 1.7689759704246983e-05, "loss": 0.7759050726890564, "step": 5362 }, { "epoch": 0.43918967335114495, "grad_norm": 0.2030181735754013, "learning_rate": 1.7687556911025707e-05, "loss": 0.8982888460159302, "step": 5363 }, { "epoch": 0.4392715658876639, "grad_norm": 0.16997826099395752, "learning_rate": 1.768535387059979e-05, "loss": 0.6584606766700745, "step": 5364 }, { "epoch": 0.4393534584241829, "grad_norm": 0.18718430399894714, "learning_rate": 1.7683150583064972e-05, "loss": 0.6908299326896667, "step": 5365 }, { "epoch": 0.4394353509607018, "grad_norm": 0.1918424814939499, "learning_rate": 1.768094704851702e-05, "loss": 0.8388704061508179, "step": 5366 }, { "epoch": 0.43951724349722077, "grad_norm": 0.16878333687782288, "learning_rate": 1.767874326705172e-05, "loss": 0.7723832130432129, "step": 5367 }, { "epoch": 0.43959913603373973, "grad_norm": 0.2200322151184082, "learning_rate": 1.7676539238764838e-05, "loss": 0.9158124923706055, "step": 5368 }, { "epoch": 0.4396810285702587, "grad_norm": 0.19098779559135437, "learning_rate": 1.7674334963752187e-05, "loss": 0.6910420656204224, "step": 5369 }, { "epoch": 0.4397629211067776, "grad_norm": 0.19668938219547272, "learning_rate": 1.7672130442109567e-05, "loss": 0.6618331670761108, "step": 5370 }, { "epoch": 0.4398448136432966, "grad_norm": 0.18889592587947845, "learning_rate": 1.7669925673932796e-05, "loss": 0.82016921043396, "step": 5371 }, { "epoch": 0.43992670617981555, "grad_norm": 0.22577214241027832, "learning_rate": 1.76677206593177e-05, "loss": 0.8170837163925171, "step": 5372 }, { "epoch": 0.44000859871633446, "grad_norm": 0.1758626103401184, "learning_rate": 1.7665515398360124e-05, "loss": 0.7099708318710327, "step": 5373 }, { "epoch": 0.44009049125285343, "grad_norm": 0.18815259635448456, "learning_rate": 1.7663309891155915e-05, "loss": 0.7074691653251648, "step": 5374 }, { "epoch": 0.4401723837893724, "grad_norm": 0.19174188375473022, "learning_rate": 1.7661104137800928e-05, "loss": 0.6769420504570007, "step": 5375 }, { "epoch": 0.44025427632589137, "grad_norm": 0.18055561184883118, "learning_rate": 1.765889813839104e-05, "loss": 0.7461521029472351, "step": 5376 }, { "epoch": 0.4403361688624103, "grad_norm": 0.18693168461322784, "learning_rate": 1.7656691893022136e-05, "loss": 0.799895167350769, "step": 5377 }, { "epoch": 0.44041806139892925, "grad_norm": 0.20090514421463013, "learning_rate": 1.76544854017901e-05, "loss": 0.7830310463905334, "step": 5378 }, { "epoch": 0.4404999539354482, "grad_norm": 0.20648477971553802, "learning_rate": 1.7652278664790837e-05, "loss": 1.1128127574920654, "step": 5379 }, { "epoch": 0.4405818464719672, "grad_norm": 0.16559775173664093, "learning_rate": 1.765007168212027e-05, "loss": 1.059472918510437, "step": 5380 }, { "epoch": 0.4406637390084861, "grad_norm": 0.2078838050365448, "learning_rate": 1.7647864453874313e-05, "loss": 0.8263754844665527, "step": 5381 }, { "epoch": 0.44074563154500507, "grad_norm": 0.17898233234882355, "learning_rate": 1.7645656980148904e-05, "loss": 0.944334864616394, "step": 5382 }, { "epoch": 0.44082752408152404, "grad_norm": 0.1673971712589264, "learning_rate": 1.764344926103999e-05, "loss": 0.7917730212211609, "step": 5383 }, { "epoch": 0.44090941661804295, "grad_norm": 0.1558142751455307, "learning_rate": 1.764124129664353e-05, "loss": 0.8971219062805176, "step": 5384 }, { "epoch": 0.4409913091545619, "grad_norm": 0.17135639488697052, "learning_rate": 1.7639033087055487e-05, "loss": 1.0078907012939453, "step": 5385 }, { "epoch": 0.4410732016910809, "grad_norm": 0.19785155355930328, "learning_rate": 1.7636824632371843e-05, "loss": 0.7111561894416809, "step": 5386 }, { "epoch": 0.44115509422759985, "grad_norm": 0.15664352476596832, "learning_rate": 1.763461593268858e-05, "loss": 0.6788076758384705, "step": 5387 }, { "epoch": 0.44123698676411877, "grad_norm": 0.17624083161354065, "learning_rate": 1.7632406988101704e-05, "loss": 1.0577363967895508, "step": 5388 }, { "epoch": 0.44131887930063773, "grad_norm": 0.15697833895683289, "learning_rate": 1.763019779870722e-05, "loss": 0.9716111421585083, "step": 5389 }, { "epoch": 0.4414007718371567, "grad_norm": 0.19322696328163147, "learning_rate": 1.7627988364601153e-05, "loss": 0.8862764239311218, "step": 5390 }, { "epoch": 0.44148266437367567, "grad_norm": 0.17831338942050934, "learning_rate": 1.7625778685879528e-05, "loss": 0.606127142906189, "step": 5391 }, { "epoch": 0.4415645569101946, "grad_norm": 0.1701916754245758, "learning_rate": 1.7623568762638394e-05, "loss": 0.8640361428260803, "step": 5392 }, { "epoch": 0.44164644944671355, "grad_norm": 0.2113039195537567, "learning_rate": 1.76213585949738e-05, "loss": 1.2156221866607666, "step": 5393 }, { "epoch": 0.4417283419832325, "grad_norm": 0.1668146550655365, "learning_rate": 1.761914818298181e-05, "loss": 1.0389198064804077, "step": 5394 }, { "epoch": 0.44181023451975143, "grad_norm": 0.15076585114002228, "learning_rate": 1.761693752675849e-05, "loss": 0.7940069437026978, "step": 5395 }, { "epoch": 0.4418921270562704, "grad_norm": 0.15243412554264069, "learning_rate": 1.7614726626399937e-05, "loss": 0.6890599131584167, "step": 5396 }, { "epoch": 0.44197401959278937, "grad_norm": 0.16983623802661896, "learning_rate": 1.7612515482002237e-05, "loss": 0.9388861656188965, "step": 5397 }, { "epoch": 0.44205591212930834, "grad_norm": 0.2105804979801178, "learning_rate": 1.76103040936615e-05, "loss": 0.8224899768829346, "step": 5398 }, { "epoch": 0.44213780466582725, "grad_norm": 0.19148819148540497, "learning_rate": 1.7608092461473832e-05, "loss": 0.6029199957847595, "step": 5399 }, { "epoch": 0.4422196972023462, "grad_norm": 0.22187601029872894, "learning_rate": 1.7605880585535376e-05, "loss": 0.6557669639587402, "step": 5400 }, { "epoch": 0.4423015897388652, "grad_norm": 0.1849789023399353, "learning_rate": 1.7603668465942258e-05, "loss": 0.8905231356620789, "step": 5401 }, { "epoch": 0.44238348227538415, "grad_norm": 0.2168682962656021, "learning_rate": 1.7601456102790633e-05, "loss": 0.831391453742981, "step": 5402 }, { "epoch": 0.44246537481190307, "grad_norm": 0.17690175771713257, "learning_rate": 1.7599243496176647e-05, "loss": 1.0622131824493408, "step": 5403 }, { "epoch": 0.44254726734842204, "grad_norm": 0.19221307337284088, "learning_rate": 1.759703064619648e-05, "loss": 0.6370087265968323, "step": 5404 }, { "epoch": 0.442629159884941, "grad_norm": 0.22120662033557892, "learning_rate": 1.7594817552946313e-05, "loss": 1.1419450044631958, "step": 5405 }, { "epoch": 0.4427110524214599, "grad_norm": 0.20749647915363312, "learning_rate": 1.759260421652233e-05, "loss": 0.8344279527664185, "step": 5406 }, { "epoch": 0.4427929449579789, "grad_norm": 0.21582792699337006, "learning_rate": 1.759039063702073e-05, "loss": 0.8805184960365295, "step": 5407 }, { "epoch": 0.44287483749449785, "grad_norm": 0.2117430567741394, "learning_rate": 1.758817681453773e-05, "loss": 0.9094138741493225, "step": 5408 }, { "epoch": 0.4429567300310168, "grad_norm": 0.15981557965278625, "learning_rate": 1.758596274916955e-05, "loss": 0.8580504059791565, "step": 5409 }, { "epoch": 0.44303862256753573, "grad_norm": 0.23026731610298157, "learning_rate": 1.758374844101242e-05, "loss": 0.9361943602561951, "step": 5410 }, { "epoch": 0.4431205151040547, "grad_norm": 0.17902237176895142, "learning_rate": 1.758153389016259e-05, "loss": 0.7177950739860535, "step": 5411 }, { "epoch": 0.44320240764057367, "grad_norm": 0.1778019368648529, "learning_rate": 1.75793190967163e-05, "loss": 0.8256241083145142, "step": 5412 }, { "epoch": 0.4432843001770926, "grad_norm": 0.20447474718093872, "learning_rate": 1.7577104060769832e-05, "loss": 0.7965594530105591, "step": 5413 }, { "epoch": 0.44336619271361155, "grad_norm": 0.19354255497455597, "learning_rate": 1.7574888782419445e-05, "loss": 0.749048113822937, "step": 5414 }, { "epoch": 0.4434480852501305, "grad_norm": 0.15867304801940918, "learning_rate": 1.757267326176143e-05, "loss": 0.7198055386543274, "step": 5415 }, { "epoch": 0.4435299777866495, "grad_norm": 0.19383947551250458, "learning_rate": 1.757045749889209e-05, "loss": 0.9032157063484192, "step": 5416 }, { "epoch": 0.4436118703231684, "grad_norm": 0.1326582431793213, "learning_rate": 1.7568241493907715e-05, "loss": 0.9449827671051025, "step": 5417 }, { "epoch": 0.44369376285968737, "grad_norm": 0.17711769044399261, "learning_rate": 1.7566025246904635e-05, "loss": 0.7158915996551514, "step": 5418 }, { "epoch": 0.44377565539620634, "grad_norm": 0.17684563994407654, "learning_rate": 1.7563808757979174e-05, "loss": 1.0480120182037354, "step": 5419 }, { "epoch": 0.4438575479327253, "grad_norm": 0.18076738715171814, "learning_rate": 1.7561592027227664e-05, "loss": 0.6801307201385498, "step": 5420 }, { "epoch": 0.4439394404692442, "grad_norm": 0.1546599268913269, "learning_rate": 1.755937505474646e-05, "loss": 0.6293601393699646, "step": 5421 }, { "epoch": 0.4440213330057632, "grad_norm": 0.19035924971103668, "learning_rate": 1.7557157840631924e-05, "loss": 1.0910961627960205, "step": 5422 }, { "epoch": 0.44410322554228215, "grad_norm": 0.17091119289398193, "learning_rate": 1.755494038498041e-05, "loss": 0.6681796908378601, "step": 5423 }, { "epoch": 0.44418511807880107, "grad_norm": 0.18414147198200226, "learning_rate": 1.7552722687888317e-05, "loss": 0.8092550039291382, "step": 5424 }, { "epoch": 0.44426701061532003, "grad_norm": 0.18572527170181274, "learning_rate": 1.7550504749452023e-05, "loss": 1.1223559379577637, "step": 5425 }, { "epoch": 0.444348903151839, "grad_norm": 0.15737557411193848, "learning_rate": 1.754828656976793e-05, "loss": 0.8473818898200989, "step": 5426 }, { "epoch": 0.44443079568835797, "grad_norm": 0.15985092520713806, "learning_rate": 1.7546068148932455e-05, "loss": 0.8165673017501831, "step": 5427 }, { "epoch": 0.4445126882248769, "grad_norm": 0.16474726796150208, "learning_rate": 1.7543849487042014e-05, "loss": 0.8353172540664673, "step": 5428 }, { "epoch": 0.44459458076139585, "grad_norm": 0.19072523713111877, "learning_rate": 1.7541630584193037e-05, "loss": 1.0249396562576294, "step": 5429 }, { "epoch": 0.4446764732979148, "grad_norm": 0.17066937685012817, "learning_rate": 1.7539411440481975e-05, "loss": 1.0296144485473633, "step": 5430 }, { "epoch": 0.4447583658344338, "grad_norm": 0.17020459473133087, "learning_rate": 1.7537192056005273e-05, "loss": 0.8971514701843262, "step": 5431 }, { "epoch": 0.4448402583709527, "grad_norm": 0.19096539914608002, "learning_rate": 1.7534972430859403e-05, "loss": 0.6164766550064087, "step": 5432 }, { "epoch": 0.44492215090747167, "grad_norm": 0.17912890017032623, "learning_rate": 1.7532752565140834e-05, "loss": 0.792995810508728, "step": 5433 }, { "epoch": 0.44500404344399064, "grad_norm": 0.1713162660598755, "learning_rate": 1.7530532458946044e-05, "loss": 0.8437955379486084, "step": 5434 }, { "epoch": 0.44508593598050955, "grad_norm": 0.18425898253917694, "learning_rate": 1.752831211237154e-05, "loss": 0.7909479737281799, "step": 5435 }, { "epoch": 0.4451678285170285, "grad_norm": 0.18475206196308136, "learning_rate": 1.7526091525513823e-05, "loss": 1.0152255296707153, "step": 5436 }, { "epoch": 0.4452497210535475, "grad_norm": 0.1779736876487732, "learning_rate": 1.7523870698469404e-05, "loss": 0.7136324644088745, "step": 5437 }, { "epoch": 0.44533161359006646, "grad_norm": 0.1955915242433548, "learning_rate": 1.7521649631334817e-05, "loss": 0.7692753672599792, "step": 5438 }, { "epoch": 0.44541350612658537, "grad_norm": 0.16643686592578888, "learning_rate": 1.751942832420659e-05, "loss": 0.778534471988678, "step": 5439 }, { "epoch": 0.44549539866310434, "grad_norm": 0.16394299268722534, "learning_rate": 1.7517206777181283e-05, "loss": 0.9160162806510925, "step": 5440 }, { "epoch": 0.4455772911996233, "grad_norm": 0.20852093398571014, "learning_rate": 1.7514984990355442e-05, "loss": 0.9404916763305664, "step": 5441 }, { "epoch": 0.4456591837361423, "grad_norm": 0.16706232726573944, "learning_rate": 1.751276296382564e-05, "loss": 0.6478914618492126, "step": 5442 }, { "epoch": 0.4457410762726612, "grad_norm": 0.1882484257221222, "learning_rate": 1.751054069768845e-05, "loss": 0.9441010355949402, "step": 5443 }, { "epoch": 0.44582296880918015, "grad_norm": 0.19298788905143738, "learning_rate": 1.7508318192040467e-05, "loss": 0.6870664954185486, "step": 5444 }, { "epoch": 0.4459048613456991, "grad_norm": 0.17786552011966705, "learning_rate": 1.7506095446978288e-05, "loss": 0.7045921087265015, "step": 5445 }, { "epoch": 0.44598675388221803, "grad_norm": 0.18361935019493103, "learning_rate": 1.7503872462598523e-05, "loss": 0.8420767188072205, "step": 5446 }, { "epoch": 0.446068646418737, "grad_norm": 0.17545048892498016, "learning_rate": 1.7501649238997792e-05, "loss": 1.03066885471344, "step": 5447 }, { "epoch": 0.44615053895525597, "grad_norm": 0.21919608116149902, "learning_rate": 1.749942577627273e-05, "loss": 0.8753542304039001, "step": 5448 }, { "epoch": 0.44623243149177494, "grad_norm": 0.15587112307548523, "learning_rate": 1.7497202074519968e-05, "loss": 0.7061967253684998, "step": 5449 }, { "epoch": 0.44631432402829385, "grad_norm": 0.15762439370155334, "learning_rate": 1.7494978133836163e-05, "loss": 0.7688755393028259, "step": 5450 }, { "epoch": 0.4463962165648128, "grad_norm": 0.16541148722171783, "learning_rate": 1.749275395431798e-05, "loss": 0.5245738625526428, "step": 5451 }, { "epoch": 0.4464781091013318, "grad_norm": 0.24459873139858246, "learning_rate": 1.7490529536062084e-05, "loss": 0.7518802881240845, "step": 5452 }, { "epoch": 0.44656000163785076, "grad_norm": 0.17605768144130707, "learning_rate": 1.748830487916516e-05, "loss": 0.7318469285964966, "step": 5453 }, { "epoch": 0.44664189417436967, "grad_norm": 0.17240917682647705, "learning_rate": 1.74860799837239e-05, "loss": 0.8843234181404114, "step": 5454 }, { "epoch": 0.44672378671088864, "grad_norm": 0.20060975849628448, "learning_rate": 1.748385484983501e-05, "loss": 0.8388319611549377, "step": 5455 }, { "epoch": 0.4468056792474076, "grad_norm": 0.19646620750427246, "learning_rate": 1.7481629477595202e-05, "loss": 0.9793956279754639, "step": 5456 }, { "epoch": 0.4468875717839265, "grad_norm": 0.20142483711242676, "learning_rate": 1.7479403867101203e-05, "loss": 0.6185078024864197, "step": 5457 }, { "epoch": 0.4469694643204455, "grad_norm": 0.18595372140407562, "learning_rate": 1.747717801844974e-05, "loss": 0.8611507415771484, "step": 5458 }, { "epoch": 0.44705135685696445, "grad_norm": 0.20564210414886475, "learning_rate": 1.747495193173756e-05, "loss": 0.8688211441040039, "step": 5459 }, { "epoch": 0.4471332493934834, "grad_norm": 0.15971340239048004, "learning_rate": 1.7472725607061427e-05, "loss": 0.7364599704742432, "step": 5460 }, { "epoch": 0.44721514193000234, "grad_norm": 0.18550996482372284, "learning_rate": 1.7470499044518092e-05, "loss": 1.013221263885498, "step": 5461 }, { "epoch": 0.4472970344665213, "grad_norm": 0.19798415899276733, "learning_rate": 1.746827224420434e-05, "loss": 1.0060904026031494, "step": 5462 }, { "epoch": 0.44737892700304027, "grad_norm": 0.17262700200080872, "learning_rate": 1.746604520621695e-05, "loss": 0.599404513835907, "step": 5463 }, { "epoch": 0.44746081953955924, "grad_norm": 0.1654191017150879, "learning_rate": 1.7463817930652724e-05, "loss": 1.036536693572998, "step": 5464 }, { "epoch": 0.44754271207607815, "grad_norm": 0.19327780604362488, "learning_rate": 1.746159041760847e-05, "loss": 1.0583151578903198, "step": 5465 }, { "epoch": 0.4476246046125971, "grad_norm": 0.18376433849334717, "learning_rate": 1.7459362667181e-05, "loss": 0.8896461725234985, "step": 5466 }, { "epoch": 0.4477064971491161, "grad_norm": 0.1908615082502365, "learning_rate": 1.7457134679467142e-05, "loss": 0.7980520725250244, "step": 5467 }, { "epoch": 0.447788389685635, "grad_norm": 0.17626892030239105, "learning_rate": 1.745490645456374e-05, "loss": 0.6143047213554382, "step": 5468 }, { "epoch": 0.44787028222215397, "grad_norm": 0.2045426070690155, "learning_rate": 1.7452677992567628e-05, "loss": 0.8465481400489807, "step": 5469 }, { "epoch": 0.44795217475867294, "grad_norm": 0.29870834946632385, "learning_rate": 1.7450449293575676e-05, "loss": 1.0538989305496216, "step": 5470 }, { "epoch": 0.4480340672951919, "grad_norm": 0.17277467250823975, "learning_rate": 1.7448220357684753e-05, "loss": 0.7371391654014587, "step": 5471 }, { "epoch": 0.4481159598317108, "grad_norm": 0.17069286108016968, "learning_rate": 1.7445991184991725e-05, "loss": 0.868395209312439, "step": 5472 }, { "epoch": 0.4481978523682298, "grad_norm": 0.190364271402359, "learning_rate": 1.7443761775593494e-05, "loss": 0.8595293760299683, "step": 5473 }, { "epoch": 0.44827974490474876, "grad_norm": 0.18257489800453186, "learning_rate": 1.744153212958696e-05, "loss": 0.6958825588226318, "step": 5474 }, { "epoch": 0.4483616374412677, "grad_norm": 0.17307493090629578, "learning_rate": 1.743930224706902e-05, "loss": 0.6610575914382935, "step": 5475 }, { "epoch": 0.44844352997778664, "grad_norm": 0.17977136373519897, "learning_rate": 1.7437072128136606e-05, "loss": 0.628240168094635, "step": 5476 }, { "epoch": 0.4485254225143056, "grad_norm": 0.16960987448692322, "learning_rate": 1.743484177288664e-05, "loss": 0.872008740901947, "step": 5477 }, { "epoch": 0.4486073150508246, "grad_norm": 0.20718859136104584, "learning_rate": 1.743261118141607e-05, "loss": 0.9913686513900757, "step": 5478 }, { "epoch": 0.4486892075873435, "grad_norm": 0.17606183886528015, "learning_rate": 1.7430380353821844e-05, "loss": 0.9771310091018677, "step": 5479 }, { "epoch": 0.44877110012386245, "grad_norm": 0.1913255751132965, "learning_rate": 1.7428149290200915e-05, "loss": 0.6810815334320068, "step": 5480 }, { "epoch": 0.4488529926603814, "grad_norm": 0.1763249933719635, "learning_rate": 1.742591799065027e-05, "loss": 1.047243595123291, "step": 5481 }, { "epoch": 0.4489348851969004, "grad_norm": 0.17235812544822693, "learning_rate": 1.7423686455266876e-05, "loss": 0.7062926292419434, "step": 5482 }, { "epoch": 0.4490167777334193, "grad_norm": 0.14848990738391876, "learning_rate": 1.7421454684147725e-05, "loss": 0.6057683229446411, "step": 5483 }, { "epoch": 0.44909867026993827, "grad_norm": 0.20214088261127472, "learning_rate": 1.7419222677389833e-05, "loss": 0.7344767451286316, "step": 5484 }, { "epoch": 0.44918056280645724, "grad_norm": 0.19846497476100922, "learning_rate": 1.7416990435090202e-05, "loss": 1.2643386125564575, "step": 5485 }, { "epoch": 0.44926245534297615, "grad_norm": 0.18715107440948486, "learning_rate": 1.741475795734585e-05, "loss": 0.8277486562728882, "step": 5486 }, { "epoch": 0.4493443478794951, "grad_norm": 0.19467340409755707, "learning_rate": 1.7412525244253824e-05, "loss": 0.7172595858573914, "step": 5487 }, { "epoch": 0.4494262404160141, "grad_norm": 0.17791521549224854, "learning_rate": 1.741029229591115e-05, "loss": 0.9489541053771973, "step": 5488 }, { "epoch": 0.44950813295253306, "grad_norm": 0.19396749138832092, "learning_rate": 1.7408059112414894e-05, "loss": 0.8113859295845032, "step": 5489 }, { "epoch": 0.44959002548905197, "grad_norm": 0.17106521129608154, "learning_rate": 1.7405825693862113e-05, "loss": 0.8235581517219543, "step": 5490 }, { "epoch": 0.44967191802557094, "grad_norm": 0.18694078922271729, "learning_rate": 1.7403592040349884e-05, "loss": 0.8287842869758606, "step": 5491 }, { "epoch": 0.4497538105620899, "grad_norm": 0.20243921875953674, "learning_rate": 1.740135815197529e-05, "loss": 0.7463896870613098, "step": 5492 }, { "epoch": 0.4498357030986089, "grad_norm": 0.18900007009506226, "learning_rate": 1.739912402883542e-05, "loss": 0.5805867910385132, "step": 5493 }, { "epoch": 0.4499175956351278, "grad_norm": 0.1829969435930252, "learning_rate": 1.739688967102738e-05, "loss": 0.800578773021698, "step": 5494 }, { "epoch": 0.44999948817164676, "grad_norm": 0.19881930947303772, "learning_rate": 1.7394655078648298e-05, "loss": 0.780738353729248, "step": 5495 }, { "epoch": 0.4500813807081657, "grad_norm": 0.20201973617076874, "learning_rate": 1.7392420251795283e-05, "loss": 0.6185060739517212, "step": 5496 }, { "epoch": 0.45016327324468464, "grad_norm": 0.14701874554157257, "learning_rate": 1.7390185190565466e-05, "loss": 0.804574728012085, "step": 5497 }, { "epoch": 0.4502451657812036, "grad_norm": 0.1929609626531601, "learning_rate": 1.7387949895056006e-05, "loss": 0.9140502214431763, "step": 5498 }, { "epoch": 0.4503270583177226, "grad_norm": 0.19172126054763794, "learning_rate": 1.7385714365364054e-05, "loss": 0.8545142412185669, "step": 5499 }, { "epoch": 0.45040895085424154, "grad_norm": 0.2117965817451477, "learning_rate": 1.738347860158677e-05, "loss": 0.7460088133811951, "step": 5500 }, { "epoch": 0.45049084339076045, "grad_norm": 0.1671985387802124, "learning_rate": 1.738124260382134e-05, "loss": 0.9331665635108948, "step": 5501 }, { "epoch": 0.4505727359272794, "grad_norm": 0.15618084371089935, "learning_rate": 1.7379006372164936e-05, "loss": 0.5510960221290588, "step": 5502 }, { "epoch": 0.4506546284637984, "grad_norm": 0.1842808872461319, "learning_rate": 1.737676990671476e-05, "loss": 0.8737232685089111, "step": 5503 }, { "epoch": 0.45073652100031736, "grad_norm": 0.18284103274345398, "learning_rate": 1.737453320756802e-05, "loss": 0.6817474365234375, "step": 5504 }, { "epoch": 0.45081841353683627, "grad_norm": 0.1913343071937561, "learning_rate": 1.7372296274821936e-05, "loss": 0.9730151891708374, "step": 5505 }, { "epoch": 0.45090030607335524, "grad_norm": 0.19930996000766754, "learning_rate": 1.7370059108573723e-05, "loss": 0.9446821808815002, "step": 5506 }, { "epoch": 0.4509821986098742, "grad_norm": 0.17180867493152618, "learning_rate": 1.7367821708920624e-05, "loss": 0.9238617420196533, "step": 5507 }, { "epoch": 0.4510640911463931, "grad_norm": 0.20669066905975342, "learning_rate": 1.7365584075959887e-05, "loss": 1.3154327869415283, "step": 5508 }, { "epoch": 0.4511459836829121, "grad_norm": 0.18532808125019073, "learning_rate": 1.7363346209788766e-05, "loss": 0.6891070008277893, "step": 5509 }, { "epoch": 0.45122787621943106, "grad_norm": 0.18500258028507233, "learning_rate": 1.736110811050453e-05, "loss": 0.7747712731361389, "step": 5510 }, { "epoch": 0.45130976875595, "grad_norm": 0.18326996266841888, "learning_rate": 1.7358869778204452e-05, "loss": 0.7281123995780945, "step": 5511 }, { "epoch": 0.45139166129246894, "grad_norm": 0.1820676326751709, "learning_rate": 1.7356631212985822e-05, "loss": 0.813267707824707, "step": 5512 }, { "epoch": 0.4514735538289879, "grad_norm": 0.18251243233680725, "learning_rate": 1.7354392414945936e-05, "loss": 0.6807146668434143, "step": 5513 }, { "epoch": 0.4515554463655069, "grad_norm": 0.17788632214069366, "learning_rate": 1.7352153384182108e-05, "loss": 0.6701048016548157, "step": 5514 }, { "epoch": 0.45163733890202584, "grad_norm": 0.20719458162784576, "learning_rate": 1.7349914120791648e-05, "loss": 0.870582103729248, "step": 5515 }, { "epoch": 0.45171923143854475, "grad_norm": 0.15191584825515747, "learning_rate": 1.734767462487188e-05, "loss": 0.5300018787384033, "step": 5516 }, { "epoch": 0.4518011239750637, "grad_norm": 0.15818405151367188, "learning_rate": 1.7345434896520154e-05, "loss": 1.1534440517425537, "step": 5517 }, { "epoch": 0.4518830165115827, "grad_norm": 0.20795802772045135, "learning_rate": 1.734319493583381e-05, "loss": 1.1220030784606934, "step": 5518 }, { "epoch": 0.4519649090481016, "grad_norm": 0.17918804287910461, "learning_rate": 1.7340954742910206e-05, "loss": 0.7341663241386414, "step": 5519 }, { "epoch": 0.4520468015846206, "grad_norm": 0.20755669474601746, "learning_rate": 1.733871431784671e-05, "loss": 0.7412256002426147, "step": 5520 }, { "epoch": 0.45212869412113954, "grad_norm": 0.19410134851932526, "learning_rate": 1.7336473660740704e-05, "loss": 0.8384029269218445, "step": 5521 }, { "epoch": 0.4522105866576585, "grad_norm": 0.17224611341953278, "learning_rate": 1.733423277168957e-05, "loss": 0.631521463394165, "step": 5522 }, { "epoch": 0.4522924791941774, "grad_norm": 0.21063853800296783, "learning_rate": 1.7331991650790712e-05, "loss": 0.8545302748680115, "step": 5523 }, { "epoch": 0.4523743717306964, "grad_norm": 0.20307908952236176, "learning_rate": 1.7329750298141535e-05, "loss": 1.1925686597824097, "step": 5524 }, { "epoch": 0.45245626426721536, "grad_norm": 0.18799248337745667, "learning_rate": 1.732750871383946e-05, "loss": 0.9018441438674927, "step": 5525 }, { "epoch": 0.4525381568037343, "grad_norm": 0.19797302782535553, "learning_rate": 1.732526689798191e-05, "loss": 1.0830812454223633, "step": 5526 }, { "epoch": 0.45262004934025324, "grad_norm": 0.19762778282165527, "learning_rate": 1.732302485066633e-05, "loss": 1.1677149534225464, "step": 5527 }, { "epoch": 0.4527019418767722, "grad_norm": 0.19650253653526306, "learning_rate": 1.7320782571990168e-05, "loss": 0.7618950605392456, "step": 5528 }, { "epoch": 0.4527838344132912, "grad_norm": 0.16138139367103577, "learning_rate": 1.7318540062050882e-05, "loss": 0.7819635272026062, "step": 5529 }, { "epoch": 0.4528657269498101, "grad_norm": 0.17745046317577362, "learning_rate": 1.7316297320945934e-05, "loss": 0.8661229014396667, "step": 5530 }, { "epoch": 0.45294761948632906, "grad_norm": 0.17848540842533112, "learning_rate": 1.7314054348772807e-05, "loss": 0.6610631942749023, "step": 5531 }, { "epoch": 0.453029512022848, "grad_norm": 0.18103136122226715, "learning_rate": 1.7311811145628998e-05, "loss": 0.5063185095787048, "step": 5532 }, { "epoch": 0.453111404559367, "grad_norm": 0.17355968058109283, "learning_rate": 1.7309567711611997e-05, "loss": 0.7535468935966492, "step": 5533 }, { "epoch": 0.4531932970958859, "grad_norm": 0.19797727465629578, "learning_rate": 1.730732404681931e-05, "loss": 0.8034313917160034, "step": 5534 }, { "epoch": 0.4532751896324049, "grad_norm": 0.17570658028125763, "learning_rate": 1.7305080151348467e-05, "loss": 0.6996821165084839, "step": 5535 }, { "epoch": 0.45335708216892384, "grad_norm": 0.17188256978988647, "learning_rate": 1.7302836025296986e-05, "loss": 0.6018949151039124, "step": 5536 }, { "epoch": 0.4534389747054428, "grad_norm": 0.1995725929737091, "learning_rate": 1.7300591668762413e-05, "loss": 0.7266729474067688, "step": 5537 }, { "epoch": 0.4535208672419617, "grad_norm": 0.19049639999866486, "learning_rate": 1.7298347081842294e-05, "loss": 0.6787266135215759, "step": 5538 }, { "epoch": 0.4536027597784807, "grad_norm": 0.1832939237356186, "learning_rate": 1.7296102264634188e-05, "loss": 0.9585368037223816, "step": 5539 }, { "epoch": 0.45368465231499966, "grad_norm": 0.2039623260498047, "learning_rate": 1.7293857217235663e-05, "loss": 0.7218517065048218, "step": 5540 }, { "epoch": 0.45376654485151857, "grad_norm": 0.2135409265756607, "learning_rate": 1.7291611939744304e-05, "loss": 1.1587527990341187, "step": 5541 }, { "epoch": 0.45384843738803754, "grad_norm": 0.18465997278690338, "learning_rate": 1.7289366432257696e-05, "loss": 1.21940016746521, "step": 5542 }, { "epoch": 0.4539303299245565, "grad_norm": 0.1953372210264206, "learning_rate": 1.728712069487343e-05, "loss": 0.9770494103431702, "step": 5543 }, { "epoch": 0.4540122224610755, "grad_norm": 0.16749964654445648, "learning_rate": 1.7284874727689125e-05, "loss": 0.468506783246994, "step": 5544 }, { "epoch": 0.4540941149975944, "grad_norm": 0.32112085819244385, "learning_rate": 1.7282628530802405e-05, "loss": 0.8787829279899597, "step": 5545 }, { "epoch": 0.45417600753411336, "grad_norm": 0.15989165008068085, "learning_rate": 1.728038210431088e-05, "loss": 0.7138000130653381, "step": 5546 }, { "epoch": 0.4542579000706323, "grad_norm": 0.19676348567008972, "learning_rate": 1.727813544831221e-05, "loss": 1.0367257595062256, "step": 5547 }, { "epoch": 0.45433979260715124, "grad_norm": 0.17124904692173004, "learning_rate": 1.7275888562904028e-05, "loss": 0.49154144525527954, "step": 5548 }, { "epoch": 0.4544216851436702, "grad_norm": 0.20187829434871674, "learning_rate": 1.7273641448184003e-05, "loss": 1.0354478359222412, "step": 5549 }, { "epoch": 0.4545035776801892, "grad_norm": 0.19685757160186768, "learning_rate": 1.7271394104249802e-05, "loss": 1.0775749683380127, "step": 5550 }, { "epoch": 0.45458547021670814, "grad_norm": 0.2088436633348465, "learning_rate": 1.7269146531199098e-05, "loss": 0.9345431327819824, "step": 5551 }, { "epoch": 0.45466736275322706, "grad_norm": 0.23142242431640625, "learning_rate": 1.7266898729129583e-05, "loss": 1.1239397525787354, "step": 5552 }, { "epoch": 0.454749255289746, "grad_norm": 0.19443278014659882, "learning_rate": 1.7264650698138962e-05, "loss": 0.7231912016868591, "step": 5553 }, { "epoch": 0.454831147826265, "grad_norm": 0.2733670473098755, "learning_rate": 1.7262402438324932e-05, "loss": 0.8453428149223328, "step": 5554 }, { "epoch": 0.45491304036278396, "grad_norm": 0.18593311309814453, "learning_rate": 1.726015394978522e-05, "loss": 0.5910945534706116, "step": 5555 }, { "epoch": 0.4549949328993029, "grad_norm": 0.17861849069595337, "learning_rate": 1.7257905232617555e-05, "loss": 0.8169873952865601, "step": 5556 }, { "epoch": 0.45507682543582184, "grad_norm": 0.1759481579065323, "learning_rate": 1.7255656286919666e-05, "loss": 0.9164767861366272, "step": 5557 }, { "epoch": 0.4551587179723408, "grad_norm": 0.16161689162254333, "learning_rate": 1.7253407112789317e-05, "loss": 0.5934125781059265, "step": 5558 }, { "epoch": 0.4552406105088597, "grad_norm": 0.1739482581615448, "learning_rate": 1.7251157710324253e-05, "loss": 1.0104881525039673, "step": 5559 }, { "epoch": 0.4553225030453787, "grad_norm": 0.1943245381116867, "learning_rate": 1.7248908079622244e-05, "loss": 0.9934820532798767, "step": 5560 }, { "epoch": 0.45540439558189766, "grad_norm": 0.14801955223083496, "learning_rate": 1.724665822078108e-05, "loss": 0.9609352946281433, "step": 5561 }, { "epoch": 0.4554862881184166, "grad_norm": 0.19583968818187714, "learning_rate": 1.724440813389853e-05, "loss": 0.9021040797233582, "step": 5562 }, { "epoch": 0.45556818065493554, "grad_norm": 0.15777328610420227, "learning_rate": 1.7242157819072408e-05, "loss": 0.7732074856758118, "step": 5563 }, { "epoch": 0.4556500731914545, "grad_norm": 0.18235963582992554, "learning_rate": 1.7239907276400518e-05, "loss": 0.8612820506095886, "step": 5564 }, { "epoch": 0.4557319657279735, "grad_norm": 0.21064628660678864, "learning_rate": 1.7237656505980673e-05, "loss": 0.9134030938148499, "step": 5565 }, { "epoch": 0.45581385826449244, "grad_norm": 0.17954738438129425, "learning_rate": 1.7235405507910704e-05, "loss": 0.796614944934845, "step": 5566 }, { "epoch": 0.45589575080101136, "grad_norm": 0.1477358639240265, "learning_rate": 1.7233154282288448e-05, "loss": 0.8662889003753662, "step": 5567 }, { "epoch": 0.4559776433375303, "grad_norm": 0.18856821954250336, "learning_rate": 1.7230902829211754e-05, "loss": 0.7465298771858215, "step": 5568 }, { "epoch": 0.4560595358740493, "grad_norm": 0.2203916311264038, "learning_rate": 1.722865114877848e-05, "loss": 0.7384775280952454, "step": 5569 }, { "epoch": 0.4561414284105682, "grad_norm": 0.15738825500011444, "learning_rate": 1.7226399241086494e-05, "loss": 0.8732999563217163, "step": 5570 }, { "epoch": 0.4562233209470872, "grad_norm": 0.22765879333019257, "learning_rate": 1.7224147106233665e-05, "loss": 0.8452392220497131, "step": 5571 }, { "epoch": 0.45630521348360614, "grad_norm": 0.1592656821012497, "learning_rate": 1.722189474431789e-05, "loss": 0.8674609661102295, "step": 5572 }, { "epoch": 0.4563871060201251, "grad_norm": 0.19468270242214203, "learning_rate": 1.721964215543706e-05, "loss": 0.8810535073280334, "step": 5573 }, { "epoch": 0.456468998556644, "grad_norm": 0.22282390296459198, "learning_rate": 1.7217389339689088e-05, "loss": 0.9202957153320312, "step": 5574 }, { "epoch": 0.456550891093163, "grad_norm": 0.2271028757095337, "learning_rate": 1.721513629717188e-05, "loss": 0.8471079468727112, "step": 5575 }, { "epoch": 0.45663278362968196, "grad_norm": 0.19008708000183105, "learning_rate": 1.7212883027983373e-05, "loss": 1.0594571828842163, "step": 5576 }, { "epoch": 0.45671467616620093, "grad_norm": 0.21355175971984863, "learning_rate": 1.7210629532221497e-05, "loss": 0.9036674499511719, "step": 5577 }, { "epoch": 0.45679656870271984, "grad_norm": 0.1982448697090149, "learning_rate": 1.7208375809984205e-05, "loss": 0.7601367831230164, "step": 5578 }, { "epoch": 0.4568784612392388, "grad_norm": 0.1790967732667923, "learning_rate": 1.7206121861369443e-05, "loss": 1.2467598915100098, "step": 5579 }, { "epoch": 0.4569603537757578, "grad_norm": 0.2072426825761795, "learning_rate": 1.7203867686475185e-05, "loss": 0.8452096581459045, "step": 5580 }, { "epoch": 0.4570422463122767, "grad_norm": 0.17102275788784027, "learning_rate": 1.7201613285399404e-05, "loss": 0.7410465478897095, "step": 5581 }, { "epoch": 0.45712413884879566, "grad_norm": 0.2763693928718567, "learning_rate": 1.719935865824009e-05, "loss": 1.0754187107086182, "step": 5582 }, { "epoch": 0.4572060313853146, "grad_norm": 0.20731182396411896, "learning_rate": 1.7197103805095228e-05, "loss": 0.8688687086105347, "step": 5583 }, { "epoch": 0.4572879239218336, "grad_norm": 0.18402840197086334, "learning_rate": 1.719484872606283e-05, "loss": 0.9625867605209351, "step": 5584 }, { "epoch": 0.4573698164583525, "grad_norm": 0.17972294986248016, "learning_rate": 1.7192593421240912e-05, "loss": 0.6844893097877502, "step": 5585 }, { "epoch": 0.4574517089948715, "grad_norm": 0.17459367215633392, "learning_rate": 1.71903378907275e-05, "loss": 0.9083195924758911, "step": 5586 }, { "epoch": 0.45753360153139044, "grad_norm": 0.20023025572299957, "learning_rate": 1.718808213462062e-05, "loss": 0.7861866354942322, "step": 5587 }, { "epoch": 0.4576154940679094, "grad_norm": 0.17141178250312805, "learning_rate": 1.7185826153018327e-05, "loss": 0.8535504341125488, "step": 5588 }, { "epoch": 0.4576973866044283, "grad_norm": 0.18199318647384644, "learning_rate": 1.7183569946018665e-05, "loss": 0.9447701573371887, "step": 5589 }, { "epoch": 0.4577792791409473, "grad_norm": 0.1726444810628891, "learning_rate": 1.718131351371971e-05, "loss": 0.6782296895980835, "step": 5590 }, { "epoch": 0.45786117167746626, "grad_norm": 0.1802307367324829, "learning_rate": 1.7179056856219527e-05, "loss": 0.9088866710662842, "step": 5591 }, { "epoch": 0.4579430642139852, "grad_norm": 0.17682746052742004, "learning_rate": 1.71767999736162e-05, "loss": 1.0102368593215942, "step": 5592 }, { "epoch": 0.45802495675050414, "grad_norm": 0.1846659928560257, "learning_rate": 1.7174542866007825e-05, "loss": 1.1352025270462036, "step": 5593 }, { "epoch": 0.4581068492870231, "grad_norm": 0.16514235734939575, "learning_rate": 1.7172285533492508e-05, "loss": 0.5929916501045227, "step": 5594 }, { "epoch": 0.4581887418235421, "grad_norm": 0.2367272675037384, "learning_rate": 1.7170027976168355e-05, "loss": 1.0209989547729492, "step": 5595 }, { "epoch": 0.458270634360061, "grad_norm": 0.20493969321250916, "learning_rate": 1.7167770194133494e-05, "loss": 0.7963019013404846, "step": 5596 }, { "epoch": 0.45835252689657996, "grad_norm": 0.17345423996448517, "learning_rate": 1.7165512187486057e-05, "loss": 0.6873565316200256, "step": 5597 }, { "epoch": 0.4584344194330989, "grad_norm": 0.22013291716575623, "learning_rate": 1.7163253956324182e-05, "loss": 0.8781787753105164, "step": 5598 }, { "epoch": 0.4585163119696179, "grad_norm": 0.18288859724998474, "learning_rate": 1.7160995500746028e-05, "loss": 0.5488592386245728, "step": 5599 }, { "epoch": 0.4585982045061368, "grad_norm": 0.18942083418369293, "learning_rate": 1.715873682084975e-05, "loss": 0.783503532409668, "step": 5600 }, { "epoch": 0.4586800970426558, "grad_norm": 0.17771703004837036, "learning_rate": 1.7156477916733525e-05, "loss": 0.5279858112335205, "step": 5601 }, { "epoch": 0.45876198957917474, "grad_norm": 0.21360014379024506, "learning_rate": 1.715421878849553e-05, "loss": 1.20832097530365, "step": 5602 }, { "epoch": 0.45884388211569366, "grad_norm": 0.23653362691402435, "learning_rate": 1.7151959436233957e-05, "loss": 0.8957186937332153, "step": 5603 }, { "epoch": 0.4589257746522126, "grad_norm": 0.1886117309331894, "learning_rate": 1.714969986004701e-05, "loss": 0.8974544405937195, "step": 5604 }, { "epoch": 0.4590076671887316, "grad_norm": 0.17192848026752472, "learning_rate": 1.7147440060032895e-05, "loss": 0.7700780630111694, "step": 5605 }, { "epoch": 0.45908955972525056, "grad_norm": 0.20000216364860535, "learning_rate": 1.7145180036289834e-05, "loss": 0.8034606575965881, "step": 5606 }, { "epoch": 0.4591714522617695, "grad_norm": 0.14850115776062012, "learning_rate": 1.714291978891606e-05, "loss": 0.6440001130104065, "step": 5607 }, { "epoch": 0.45925334479828844, "grad_norm": 0.19381895661354065, "learning_rate": 1.7140659318009807e-05, "loss": 1.0006682872772217, "step": 5608 }, { "epoch": 0.4593352373348074, "grad_norm": 0.19166319072246552, "learning_rate": 1.7138398623669325e-05, "loss": 0.8180659413337708, "step": 5609 }, { "epoch": 0.4594171298713264, "grad_norm": 0.16670072078704834, "learning_rate": 1.7136137705992883e-05, "loss": 0.6220978498458862, "step": 5610 }, { "epoch": 0.4594990224078453, "grad_norm": 0.1902817338705063, "learning_rate": 1.7133876565078735e-05, "loss": 1.195241093635559, "step": 5611 }, { "epoch": 0.45958091494436426, "grad_norm": 0.16342461109161377, "learning_rate": 1.7131615201025165e-05, "loss": 0.7688427567481995, "step": 5612 }, { "epoch": 0.45966280748088323, "grad_norm": 0.16556203365325928, "learning_rate": 1.7129353613930473e-05, "loss": 0.8004022240638733, "step": 5613 }, { "epoch": 0.45974470001740214, "grad_norm": 0.15656988322734833, "learning_rate": 1.712709180389294e-05, "loss": 0.6066978573799133, "step": 5614 }, { "epoch": 0.4598265925539211, "grad_norm": 0.19695082306861877, "learning_rate": 1.7124829771010878e-05, "loss": 0.5805996060371399, "step": 5615 }, { "epoch": 0.4599084850904401, "grad_norm": 0.17101480066776276, "learning_rate": 1.7122567515382612e-05, "loss": 0.7877641320228577, "step": 5616 }, { "epoch": 0.45999037762695905, "grad_norm": 0.2078593224287033, "learning_rate": 1.7120305037106457e-05, "loss": 0.965295135974884, "step": 5617 }, { "epoch": 0.46007227016347796, "grad_norm": 0.16212765872478485, "learning_rate": 1.7118042336280763e-05, "loss": 0.6437591910362244, "step": 5618 }, { "epoch": 0.4601541626999969, "grad_norm": 0.16204874217510223, "learning_rate": 1.7115779413003865e-05, "loss": 0.5001888275146484, "step": 5619 }, { "epoch": 0.4602360552365159, "grad_norm": 0.18134404718875885, "learning_rate": 1.7113516267374124e-05, "loss": 0.8329024314880371, "step": 5620 }, { "epoch": 0.4603179477730348, "grad_norm": 0.1889573633670807, "learning_rate": 1.711125289948991e-05, "loss": 0.7223650217056274, "step": 5621 }, { "epoch": 0.4603998403095538, "grad_norm": 0.18908809125423431, "learning_rate": 1.7108989309449587e-05, "loss": 1.00648033618927, "step": 5622 }, { "epoch": 0.46048173284607274, "grad_norm": 0.19665831327438354, "learning_rate": 1.7106725497351548e-05, "loss": 1.105470895767212, "step": 5623 }, { "epoch": 0.4605636253825917, "grad_norm": 0.19255506992340088, "learning_rate": 1.7104461463294186e-05, "loss": 1.028418779373169, "step": 5624 }, { "epoch": 0.4606455179191106, "grad_norm": 0.1941685527563095, "learning_rate": 1.71021972073759e-05, "loss": 0.7433605194091797, "step": 5625 }, { "epoch": 0.4607274104556296, "grad_norm": 0.18795400857925415, "learning_rate": 1.7099932729695116e-05, "loss": 0.7060457468032837, "step": 5626 }, { "epoch": 0.46080930299214856, "grad_norm": 0.17160651087760925, "learning_rate": 1.7097668030350254e-05, "loss": 0.7491191625595093, "step": 5627 }, { "epoch": 0.46089119552866753, "grad_norm": 0.23182187974452972, "learning_rate": 1.7095403109439734e-05, "loss": 0.8939904570579529, "step": 5628 }, { "epoch": 0.46097308806518644, "grad_norm": 0.1814865916967392, "learning_rate": 1.7093137967062014e-05, "loss": 1.0917574167251587, "step": 5629 }, { "epoch": 0.4610549806017054, "grad_norm": 0.19662444293498993, "learning_rate": 1.709087260331554e-05, "loss": 0.9021788835525513, "step": 5630 }, { "epoch": 0.4611368731382244, "grad_norm": 0.2308211475610733, "learning_rate": 1.708860701829877e-05, "loss": 0.8778523206710815, "step": 5631 }, { "epoch": 0.4612187656747433, "grad_norm": 0.21362243592739105, "learning_rate": 1.708634121211019e-05, "loss": 0.9810387492179871, "step": 5632 }, { "epoch": 0.46130065821126226, "grad_norm": 0.18830111622810364, "learning_rate": 1.7084075184848267e-05, "loss": 0.7273983955383301, "step": 5633 }, { "epoch": 0.46138255074778123, "grad_norm": 0.18773406744003296, "learning_rate": 1.7081808936611494e-05, "loss": 0.7527506351470947, "step": 5634 }, { "epoch": 0.4614644432843002, "grad_norm": 0.2082962542772293, "learning_rate": 1.7079542467498378e-05, "loss": 0.8878594040870667, "step": 5635 }, { "epoch": 0.4615463358208191, "grad_norm": 0.16195878386497498, "learning_rate": 1.7077275777607425e-05, "loss": 0.5821598768234253, "step": 5636 }, { "epoch": 0.4616282283573381, "grad_norm": 0.16819487512111664, "learning_rate": 1.7075008867037152e-05, "loss": 0.715498685836792, "step": 5637 }, { "epoch": 0.46171012089385705, "grad_norm": 0.199930340051651, "learning_rate": 1.7072741735886096e-05, "loss": 0.9459577202796936, "step": 5638 }, { "epoch": 0.461792013430376, "grad_norm": 0.1911356896162033, "learning_rate": 1.7070474384252784e-05, "loss": 0.6491736173629761, "step": 5639 }, { "epoch": 0.4618739059668949, "grad_norm": 0.18258215487003326, "learning_rate": 1.706820681223578e-05, "loss": 0.8887325525283813, "step": 5640 }, { "epoch": 0.4619557985034139, "grad_norm": 0.17445173859596252, "learning_rate": 1.7065939019933628e-05, "loss": 0.9699308276176453, "step": 5641 }, { "epoch": 0.46203769103993286, "grad_norm": 0.1519145667552948, "learning_rate": 1.70636710074449e-05, "loss": 0.7765964269638062, "step": 5642 }, { "epoch": 0.4621195835764518, "grad_norm": 0.19676879048347473, "learning_rate": 1.7061402774868177e-05, "loss": 0.6092524528503418, "step": 5643 }, { "epoch": 0.46220147611297074, "grad_norm": 0.18010647594928741, "learning_rate": 1.7059134322302044e-05, "loss": 0.8814293742179871, "step": 5644 }, { "epoch": 0.4622833686494897, "grad_norm": 0.15104055404663086, "learning_rate": 1.7056865649845094e-05, "loss": 0.6018142700195312, "step": 5645 }, { "epoch": 0.4623652611860087, "grad_norm": 0.1672787368297577, "learning_rate": 1.7054596757595938e-05, "loss": 0.796288251876831, "step": 5646 }, { "epoch": 0.4624471537225276, "grad_norm": 0.16131895780563354, "learning_rate": 1.705232764565318e-05, "loss": 0.5134752988815308, "step": 5647 }, { "epoch": 0.46252904625904656, "grad_norm": 0.17613138258457184, "learning_rate": 1.7050058314115466e-05, "loss": 0.7516793012619019, "step": 5648 }, { "epoch": 0.46261093879556553, "grad_norm": 0.15342296659946442, "learning_rate": 1.704778876308141e-05, "loss": 0.6599679589271545, "step": 5649 }, { "epoch": 0.4626928313320845, "grad_norm": 0.18254119157791138, "learning_rate": 1.704551899264967e-05, "loss": 0.901342511177063, "step": 5650 }, { "epoch": 0.4627747238686034, "grad_norm": 0.20758965611457825, "learning_rate": 1.7043249002918886e-05, "loss": 0.4898245334625244, "step": 5651 }, { "epoch": 0.4628566164051224, "grad_norm": 0.16814488172531128, "learning_rate": 1.704097879398773e-05, "loss": 0.6414906978607178, "step": 5652 }, { "epoch": 0.46293850894164135, "grad_norm": 0.266051322221756, "learning_rate": 1.7038708365954878e-05, "loss": 0.7771191000938416, "step": 5653 }, { "epoch": 0.46302040147816026, "grad_norm": 0.17315742373466492, "learning_rate": 1.7036437718919007e-05, "loss": 0.5392952561378479, "step": 5654 }, { "epoch": 0.4631022940146792, "grad_norm": 0.195817768573761, "learning_rate": 1.7034166852978807e-05, "loss": 0.5648343563079834, "step": 5655 }, { "epoch": 0.4631841865511982, "grad_norm": 0.18534733355045319, "learning_rate": 1.703189576823298e-05, "loss": 0.9492453336715698, "step": 5656 }, { "epoch": 0.46326607908771716, "grad_norm": 0.18725791573524475, "learning_rate": 1.702962446478024e-05, "loss": 0.7943506836891174, "step": 5657 }, { "epoch": 0.4633479716242361, "grad_norm": 0.1605147421360016, "learning_rate": 1.7027352942719306e-05, "loss": 0.99833744764328, "step": 5658 }, { "epoch": 0.46342986416075505, "grad_norm": 0.21395263075828552, "learning_rate": 1.7025081202148908e-05, "loss": 0.856096625328064, "step": 5659 }, { "epoch": 0.463511756697274, "grad_norm": 0.18935652077198029, "learning_rate": 1.702280924316778e-05, "loss": 0.6701532602310181, "step": 5660 }, { "epoch": 0.463593649233793, "grad_norm": 0.17860884964466095, "learning_rate": 1.702053706587468e-05, "loss": 0.583470344543457, "step": 5661 }, { "epoch": 0.4636755417703119, "grad_norm": 0.1616896390914917, "learning_rate": 1.7018264670368357e-05, "loss": 0.6640766263008118, "step": 5662 }, { "epoch": 0.46375743430683086, "grad_norm": 0.17915141582489014, "learning_rate": 1.7015992056747585e-05, "loss": 0.8074891567230225, "step": 5663 }, { "epoch": 0.46383932684334983, "grad_norm": 0.21121381223201752, "learning_rate": 1.7013719225111137e-05, "loss": 0.6954071521759033, "step": 5664 }, { "epoch": 0.46392121937986874, "grad_norm": 0.21897628903388977, "learning_rate": 1.70114461755578e-05, "loss": 0.7080338001251221, "step": 5665 }, { "epoch": 0.4640031119163877, "grad_norm": 0.20881624519824982, "learning_rate": 1.7009172908186374e-05, "loss": 0.9644142389297485, "step": 5666 }, { "epoch": 0.4640850044529067, "grad_norm": 0.19718126952648163, "learning_rate": 1.7006899423095663e-05, "loss": 0.7552162408828735, "step": 5667 }, { "epoch": 0.46416689698942565, "grad_norm": 0.19201596081256866, "learning_rate": 1.700462572038448e-05, "loss": 0.8494425415992737, "step": 5668 }, { "epoch": 0.46424878952594456, "grad_norm": 0.1904914379119873, "learning_rate": 1.700235180015165e-05, "loss": 0.888371467590332, "step": 5669 }, { "epoch": 0.46433068206246353, "grad_norm": 0.21081507205963135, "learning_rate": 1.700007766249601e-05, "loss": 0.7260885238647461, "step": 5670 }, { "epoch": 0.4644125745989825, "grad_norm": 0.1953665018081665, "learning_rate": 1.69978033075164e-05, "loss": 0.6683377027511597, "step": 5671 }, { "epoch": 0.46449446713550147, "grad_norm": 0.1951061487197876, "learning_rate": 1.699552873531167e-05, "loss": 0.7700653672218323, "step": 5672 }, { "epoch": 0.4645763596720204, "grad_norm": 0.15980330109596252, "learning_rate": 1.6993253945980694e-05, "loss": 0.7222982048988342, "step": 5673 }, { "epoch": 0.46465825220853935, "grad_norm": 0.1792273223400116, "learning_rate": 1.6990978939622326e-05, "loss": 0.9708443880081177, "step": 5674 }, { "epoch": 0.4647401447450583, "grad_norm": 0.18301260471343994, "learning_rate": 1.6988703716335466e-05, "loss": 0.99620521068573, "step": 5675 }, { "epoch": 0.4648220372815772, "grad_norm": 0.19610466063022614, "learning_rate": 1.6986428276218986e-05, "loss": 0.6621044278144836, "step": 5676 }, { "epoch": 0.4649039298180962, "grad_norm": 0.21389372646808624, "learning_rate": 1.6984152619371804e-05, "loss": 1.09792160987854, "step": 5677 }, { "epoch": 0.46498582235461516, "grad_norm": 0.16702988743782043, "learning_rate": 1.6981876745892817e-05, "loss": 0.8485398888587952, "step": 5678 }, { "epoch": 0.46506771489113413, "grad_norm": 0.18003708124160767, "learning_rate": 1.697960065588095e-05, "loss": 0.8555956482887268, "step": 5679 }, { "epoch": 0.46514960742765304, "grad_norm": 0.19652913510799408, "learning_rate": 1.6977324349435128e-05, "loss": 1.2005982398986816, "step": 5680 }, { "epoch": 0.465231499964172, "grad_norm": 0.211979478597641, "learning_rate": 1.697504782665429e-05, "loss": 0.6962770819664001, "step": 5681 }, { "epoch": 0.465313392500691, "grad_norm": 0.1556617021560669, "learning_rate": 1.697277108763738e-05, "loss": 0.7614181041717529, "step": 5682 }, { "epoch": 0.4653952850372099, "grad_norm": 0.1847931295633316, "learning_rate": 1.6970494132483363e-05, "loss": 0.6512038707733154, "step": 5683 }, { "epoch": 0.46547717757372886, "grad_norm": 0.19451624155044556, "learning_rate": 1.6968216961291195e-05, "loss": 1.0497111082077026, "step": 5684 }, { "epoch": 0.46555907011024783, "grad_norm": 0.19757571816444397, "learning_rate": 1.6965939574159857e-05, "loss": 0.7682152986526489, "step": 5685 }, { "epoch": 0.4656409626467668, "grad_norm": 0.17299173772335052, "learning_rate": 1.6963661971188328e-05, "loss": 0.9950641989707947, "step": 5686 }, { "epoch": 0.4657228551832857, "grad_norm": 0.191226065158844, "learning_rate": 1.696138415247561e-05, "loss": 1.1259377002716064, "step": 5687 }, { "epoch": 0.4658047477198047, "grad_norm": 0.17694751918315887, "learning_rate": 1.6959106118120697e-05, "loss": 0.8161852359771729, "step": 5688 }, { "epoch": 0.46588664025632365, "grad_norm": 0.1892276257276535, "learning_rate": 1.6956827868222615e-05, "loss": 0.6322022080421448, "step": 5689 }, { "epoch": 0.4659685327928426, "grad_norm": 0.16336172819137573, "learning_rate": 1.6954549402880375e-05, "loss": 0.6846575736999512, "step": 5690 }, { "epoch": 0.46605042532936153, "grad_norm": 0.17966033518314362, "learning_rate": 1.6952270722193008e-05, "loss": 0.9478905200958252, "step": 5691 }, { "epoch": 0.4661323178658805, "grad_norm": 0.21429374814033508, "learning_rate": 1.6949991826259567e-05, "loss": 0.5821859240531921, "step": 5692 }, { "epoch": 0.46621421040239946, "grad_norm": 0.23263147473335266, "learning_rate": 1.6947712715179086e-05, "loss": 0.950603723526001, "step": 5693 }, { "epoch": 0.4662961029389184, "grad_norm": 0.20973782241344452, "learning_rate": 1.694543338905063e-05, "loss": 0.8668786883354187, "step": 5694 }, { "epoch": 0.46637799547543735, "grad_norm": 0.2305840700864792, "learning_rate": 1.6943153847973278e-05, "loss": 0.8857128620147705, "step": 5695 }, { "epoch": 0.4664598880119563, "grad_norm": 0.16598130762577057, "learning_rate": 1.6940874092046092e-05, "loss": 0.8672669529914856, "step": 5696 }, { "epoch": 0.4665417805484753, "grad_norm": 0.17495568096637726, "learning_rate": 1.6938594121368174e-05, "loss": 0.7202290892601013, "step": 5697 }, { "epoch": 0.4666236730849942, "grad_norm": 0.18416143953800201, "learning_rate": 1.6936313936038615e-05, "loss": 0.9972740411758423, "step": 5698 }, { "epoch": 0.46670556562151316, "grad_norm": 0.12858416140079498, "learning_rate": 1.693403353615652e-05, "loss": 0.362326055765152, "step": 5699 }, { "epoch": 0.46678745815803213, "grad_norm": 0.20268410444259644, "learning_rate": 1.6931752921821008e-05, "loss": 1.0605305433273315, "step": 5700 }, { "epoch": 0.4668693506945511, "grad_norm": 0.17314472794532776, "learning_rate": 1.69294720931312e-05, "loss": 0.800609827041626, "step": 5701 }, { "epoch": 0.46695124323107, "grad_norm": 0.21764276921749115, "learning_rate": 1.6927191050186226e-05, "loss": 0.7787314653396606, "step": 5702 }, { "epoch": 0.467033135767589, "grad_norm": 0.1762946993112564, "learning_rate": 1.6924909793085245e-05, "loss": 0.8782187700271606, "step": 5703 }, { "epoch": 0.46711502830410795, "grad_norm": 0.16525842249393463, "learning_rate": 1.6922628321927394e-05, "loss": 0.9047735929489136, "step": 5704 }, { "epoch": 0.46719692084062686, "grad_norm": 0.17772705852985382, "learning_rate": 1.6920346636811842e-05, "loss": 0.5702717900276184, "step": 5705 }, { "epoch": 0.46727881337714583, "grad_norm": 0.1556197851896286, "learning_rate": 1.691806473783776e-05, "loss": 0.9687420725822449, "step": 5706 }, { "epoch": 0.4673607059136648, "grad_norm": 0.15444719791412354, "learning_rate": 1.691578262510433e-05, "loss": 0.7855178117752075, "step": 5707 }, { "epoch": 0.46744259845018377, "grad_norm": 0.19381767511367798, "learning_rate": 1.691350029871074e-05, "loss": 0.8316521644592285, "step": 5708 }, { "epoch": 0.4675244909867027, "grad_norm": 0.17090266942977905, "learning_rate": 1.6911217758756188e-05, "loss": 0.9096817970275879, "step": 5709 }, { "epoch": 0.46760638352322165, "grad_norm": 0.17258812487125397, "learning_rate": 1.6908935005339888e-05, "loss": 0.8648214340209961, "step": 5710 }, { "epoch": 0.4676882760597406, "grad_norm": 0.19979976117610931, "learning_rate": 1.690665203856105e-05, "loss": 1.2726770639419556, "step": 5711 }, { "epoch": 0.4677701685962596, "grad_norm": 0.16230051219463348, "learning_rate": 1.690436885851891e-05, "loss": 0.7734116911888123, "step": 5712 }, { "epoch": 0.4678520611327785, "grad_norm": 0.21505683660507202, "learning_rate": 1.6902085465312692e-05, "loss": 0.830539882183075, "step": 5713 }, { "epoch": 0.46793395366929746, "grad_norm": 0.1595895141363144, "learning_rate": 1.689980185904166e-05, "loss": 0.5812140703201294, "step": 5714 }, { "epoch": 0.46801584620581643, "grad_norm": 0.21550410985946655, "learning_rate": 1.689751803980505e-05, "loss": 1.1381564140319824, "step": 5715 }, { "epoch": 0.46809773874233535, "grad_norm": 0.21947431564331055, "learning_rate": 1.6895234007702135e-05, "loss": 0.8519974946975708, "step": 5716 }, { "epoch": 0.4681796312788543, "grad_norm": 0.1863469034433365, "learning_rate": 1.6892949762832193e-05, "loss": 0.9654135704040527, "step": 5717 }, { "epoch": 0.4682615238153733, "grad_norm": 0.16923269629478455, "learning_rate": 1.68906653052945e-05, "loss": 1.1130108833312988, "step": 5718 }, { "epoch": 0.46834341635189225, "grad_norm": 0.17674696445465088, "learning_rate": 1.688838063518835e-05, "loss": 0.6391989588737488, "step": 5719 }, { "epoch": 0.46842530888841116, "grad_norm": 0.14777280390262604, "learning_rate": 1.6886095752613043e-05, "loss": 0.776767373085022, "step": 5720 }, { "epoch": 0.46850720142493013, "grad_norm": 0.21793873608112335, "learning_rate": 1.688381065766789e-05, "loss": 1.19598388671875, "step": 5721 }, { "epoch": 0.4685890939614491, "grad_norm": 0.16271845996379852, "learning_rate": 1.688152535045221e-05, "loss": 0.8221518397331238, "step": 5722 }, { "epoch": 0.46867098649796807, "grad_norm": 0.16399644315242767, "learning_rate": 1.6879239831065332e-05, "loss": 0.6849920749664307, "step": 5723 }, { "epoch": 0.468752879034487, "grad_norm": 0.17152543365955353, "learning_rate": 1.68769540996066e-05, "loss": 0.8478953242301941, "step": 5724 }, { "epoch": 0.46883477157100595, "grad_norm": 0.2091539055109024, "learning_rate": 1.687466815617535e-05, "loss": 1.0456154346466064, "step": 5725 }, { "epoch": 0.4689166641075249, "grad_norm": 0.18918949365615845, "learning_rate": 1.687238200087095e-05, "loss": 0.692163348197937, "step": 5726 }, { "epoch": 0.46899855664404383, "grad_norm": 0.18937453627586365, "learning_rate": 1.6870095633792758e-05, "loss": 0.8134610056877136, "step": 5727 }, { "epoch": 0.4690804491805628, "grad_norm": 0.17877361178398132, "learning_rate": 1.686780905504015e-05, "loss": 0.9310188293457031, "step": 5728 }, { "epoch": 0.46916234171708177, "grad_norm": 0.1685734987258911, "learning_rate": 1.686552226471251e-05, "loss": 0.940948486328125, "step": 5729 }, { "epoch": 0.46924423425360073, "grad_norm": 0.2175823301076889, "learning_rate": 1.6863235262909236e-05, "loss": 0.9955415725708008, "step": 5730 }, { "epoch": 0.46932612679011965, "grad_norm": 0.17485475540161133, "learning_rate": 1.6860948049729726e-05, "loss": 0.9868797659873962, "step": 5731 }, { "epoch": 0.4694080193266386, "grad_norm": 0.19364407658576965, "learning_rate": 1.6858660625273395e-05, "loss": 0.7460997104644775, "step": 5732 }, { "epoch": 0.4694899118631576, "grad_norm": 0.1785038411617279, "learning_rate": 1.6856372989639662e-05, "loss": 0.9369797706604004, "step": 5733 }, { "epoch": 0.46957180439967655, "grad_norm": 0.1772373914718628, "learning_rate": 1.6854085142927954e-05, "loss": 0.7768234610557556, "step": 5734 }, { "epoch": 0.46965369693619546, "grad_norm": 0.19427774846553802, "learning_rate": 1.6851797085237717e-05, "loss": 0.7026744484901428, "step": 5735 }, { "epoch": 0.46973558947271443, "grad_norm": 0.20748481154441833, "learning_rate": 1.6849508816668392e-05, "loss": 1.211838722229004, "step": 5736 }, { "epoch": 0.4698174820092334, "grad_norm": 0.20712335407733917, "learning_rate": 1.6847220337319438e-05, "loss": 0.8668811321258545, "step": 5737 }, { "epoch": 0.4698993745457523, "grad_norm": 0.21387356519699097, "learning_rate": 1.6844931647290327e-05, "loss": 1.122694492340088, "step": 5738 }, { "epoch": 0.4699812670822713, "grad_norm": 0.19650539755821228, "learning_rate": 1.684264274668053e-05, "loss": 1.053371787071228, "step": 5739 }, { "epoch": 0.47006315961879025, "grad_norm": 0.17744415998458862, "learning_rate": 1.684035363558954e-05, "loss": 0.8609519004821777, "step": 5740 }, { "epoch": 0.4701450521553092, "grad_norm": 0.16463683545589447, "learning_rate": 1.683806431411684e-05, "loss": 0.5769405364990234, "step": 5741 }, { "epoch": 0.47022694469182813, "grad_norm": 0.2522983253002167, "learning_rate": 1.683577478236194e-05, "loss": 0.9162659645080566, "step": 5742 }, { "epoch": 0.4703088372283471, "grad_norm": 0.21955548226833344, "learning_rate": 1.683348504042435e-05, "loss": 0.8730915784835815, "step": 5743 }, { "epoch": 0.47039072976486607, "grad_norm": 0.20450858771800995, "learning_rate": 1.6831195088403594e-05, "loss": 1.0019282102584839, "step": 5744 }, { "epoch": 0.47047262230138504, "grad_norm": 0.20428945124149323, "learning_rate": 1.6828904926399198e-05, "loss": 0.8124568462371826, "step": 5745 }, { "epoch": 0.47055451483790395, "grad_norm": 0.1979932337999344, "learning_rate": 1.6826614554510706e-05, "loss": 1.0529217720031738, "step": 5746 }, { "epoch": 0.4706364073744229, "grad_norm": 0.1792546957731247, "learning_rate": 1.6824323972837673e-05, "loss": 0.7455668449401855, "step": 5747 }, { "epoch": 0.4707182999109419, "grad_norm": 0.15432637929916382, "learning_rate": 1.6822033181479638e-05, "loss": 0.9391785860061646, "step": 5748 }, { "epoch": 0.4708001924474608, "grad_norm": 0.2170325368642807, "learning_rate": 1.6819742180536188e-05, "loss": 0.7375652194023132, "step": 5749 }, { "epoch": 0.47088208498397977, "grad_norm": 0.1708453744649887, "learning_rate": 1.6817450970106895e-05, "loss": 0.6354907751083374, "step": 5750 }, { "epoch": 0.47096397752049873, "grad_norm": 0.1957627683877945, "learning_rate": 1.681515955029133e-05, "loss": 0.7261526584625244, "step": 5751 }, { "epoch": 0.4710458700570177, "grad_norm": 0.1713513880968094, "learning_rate": 1.681286792118911e-05, "loss": 1.0553860664367676, "step": 5752 }, { "epoch": 0.4711277625935366, "grad_norm": 0.20723366737365723, "learning_rate": 1.6810576082899818e-05, "loss": 0.8606569170951843, "step": 5753 }, { "epoch": 0.4712096551300556, "grad_norm": 0.16217076778411865, "learning_rate": 1.6808284035523084e-05, "loss": 0.7416989207267761, "step": 5754 }, { "epoch": 0.47129154766657455, "grad_norm": 0.20789554715156555, "learning_rate": 1.680599177915852e-05, "loss": 0.7503676414489746, "step": 5755 }, { "epoch": 0.47137344020309346, "grad_norm": 0.17819589376449585, "learning_rate": 1.6803699313905755e-05, "loss": 0.6370301842689514, "step": 5756 }, { "epoch": 0.47145533273961243, "grad_norm": 0.20768415927886963, "learning_rate": 1.6801406639864434e-05, "loss": 0.9824140071868896, "step": 5757 }, { "epoch": 0.4715372252761314, "grad_norm": 0.18644043803215027, "learning_rate": 1.6799113757134212e-05, "loss": 0.907228946685791, "step": 5758 }, { "epoch": 0.47161911781265037, "grad_norm": 0.22746238112449646, "learning_rate": 1.679682066581473e-05, "loss": 0.6297433376312256, "step": 5759 }, { "epoch": 0.4717010103491693, "grad_norm": 0.16416217386722565, "learning_rate": 1.679452736600567e-05, "loss": 0.8742093443870544, "step": 5760 }, { "epoch": 0.47178290288568825, "grad_norm": 0.19041581451892853, "learning_rate": 1.6792233857806707e-05, "loss": 0.989783763885498, "step": 5761 }, { "epoch": 0.4718647954222072, "grad_norm": 0.18986694514751434, "learning_rate": 1.6789940141317516e-05, "loss": 0.7801942825317383, "step": 5762 }, { "epoch": 0.4719466879587262, "grad_norm": 0.15892240405082703, "learning_rate": 1.67876462166378e-05, "loss": 0.6128737330436707, "step": 5763 }, { "epoch": 0.4720285804952451, "grad_norm": 0.21373392641544342, "learning_rate": 1.6785352083867262e-05, "loss": 0.9701625108718872, "step": 5764 }, { "epoch": 0.47211047303176407, "grad_norm": 0.21070343255996704, "learning_rate": 1.678305774310561e-05, "loss": 1.1207671165466309, "step": 5765 }, { "epoch": 0.47219236556828303, "grad_norm": 0.1858547031879425, "learning_rate": 1.6780763194452577e-05, "loss": 0.6909077167510986, "step": 5766 }, { "epoch": 0.47227425810480195, "grad_norm": 0.18076090514659882, "learning_rate": 1.6778468438007877e-05, "loss": 0.9301475882530212, "step": 5767 }, { "epoch": 0.4723561506413209, "grad_norm": 0.17048142850399017, "learning_rate": 1.6776173473871257e-05, "loss": 0.8735812902450562, "step": 5768 }, { "epoch": 0.4724380431778399, "grad_norm": 0.19194333255290985, "learning_rate": 1.677387830214247e-05, "loss": 1.2091397047042847, "step": 5769 }, { "epoch": 0.47251993571435885, "grad_norm": 0.1814650595188141, "learning_rate": 1.677158292292127e-05, "loss": 0.6549610495567322, "step": 5770 }, { "epoch": 0.47260182825087776, "grad_norm": 0.16812385618686676, "learning_rate": 1.6769287336307425e-05, "loss": 0.7203570604324341, "step": 5771 }, { "epoch": 0.47268372078739673, "grad_norm": 0.1686863750219345, "learning_rate": 1.6766991542400705e-05, "loss": 0.73825603723526, "step": 5772 }, { "epoch": 0.4727656133239157, "grad_norm": 0.1793726533651352, "learning_rate": 1.67646955413009e-05, "loss": 0.5963665246963501, "step": 5773 }, { "epoch": 0.47284750586043467, "grad_norm": 0.1557624638080597, "learning_rate": 1.6762399333107803e-05, "loss": 0.6332284212112427, "step": 5774 }, { "epoch": 0.4729293983969536, "grad_norm": 0.19557465612888336, "learning_rate": 1.6760102917921217e-05, "loss": 0.7497131824493408, "step": 5775 }, { "epoch": 0.47301129093347255, "grad_norm": 0.1806601881980896, "learning_rate": 1.6757806295840948e-05, "loss": 0.675439715385437, "step": 5776 }, { "epoch": 0.4730931834699915, "grad_norm": 0.18669982254505157, "learning_rate": 1.6755509466966828e-05, "loss": 1.1132562160491943, "step": 5777 }, { "epoch": 0.47317507600651043, "grad_norm": 0.194931298494339, "learning_rate": 1.6753212431398675e-05, "loss": 0.9010046124458313, "step": 5778 }, { "epoch": 0.4732569685430294, "grad_norm": 0.2079889327287674, "learning_rate": 1.6750915189236336e-05, "loss": 0.8490778803825378, "step": 5779 }, { "epoch": 0.47333886107954837, "grad_norm": 0.20925164222717285, "learning_rate": 1.6748617740579653e-05, "loss": 1.3072772026062012, "step": 5780 }, { "epoch": 0.47342075361606734, "grad_norm": 0.18591450154781342, "learning_rate": 1.6746320085528485e-05, "loss": 0.7093144655227661, "step": 5781 }, { "epoch": 0.47350264615258625, "grad_norm": 0.19523046910762787, "learning_rate": 1.67440222241827e-05, "loss": 0.8174944519996643, "step": 5782 }, { "epoch": 0.4735845386891052, "grad_norm": 0.17547807097434998, "learning_rate": 1.6741724156642167e-05, "loss": 0.8408262133598328, "step": 5783 }, { "epoch": 0.4736664312256242, "grad_norm": 0.18496105074882507, "learning_rate": 1.6739425883006775e-05, "loss": 1.0224480628967285, "step": 5784 }, { "epoch": 0.47374832376214315, "grad_norm": 0.15358321368694305, "learning_rate": 1.6737127403376412e-05, "loss": 0.6119422912597656, "step": 5785 }, { "epoch": 0.47383021629866207, "grad_norm": 0.19539202749729156, "learning_rate": 1.673482871785098e-05, "loss": 0.880734384059906, "step": 5786 }, { "epoch": 0.47391210883518103, "grad_norm": 0.1827968955039978, "learning_rate": 1.6732529826530395e-05, "loss": 0.4837223291397095, "step": 5787 }, { "epoch": 0.4739940013717, "grad_norm": 0.20449529588222504, "learning_rate": 1.673023072951457e-05, "loss": 0.9290793538093567, "step": 5788 }, { "epoch": 0.4740758939082189, "grad_norm": 0.20017297565937042, "learning_rate": 1.6727931426903434e-05, "loss": 0.9493145942687988, "step": 5789 }, { "epoch": 0.4741577864447379, "grad_norm": 0.20536845922470093, "learning_rate": 1.6725631918796925e-05, "loss": 0.7183416485786438, "step": 5790 }, { "epoch": 0.47423967898125685, "grad_norm": 0.12783238291740417, "learning_rate": 1.672333220529499e-05, "loss": 0.4309667646884918, "step": 5791 }, { "epoch": 0.4743215715177758, "grad_norm": 0.17433913052082062, "learning_rate": 1.672103228649758e-05, "loss": 0.7464715838432312, "step": 5792 }, { "epoch": 0.47440346405429473, "grad_norm": 0.19139544665813446, "learning_rate": 1.6718732162504667e-05, "loss": 0.9179016947746277, "step": 5793 }, { "epoch": 0.4744853565908137, "grad_norm": 0.198594868183136, "learning_rate": 1.6716431833416218e-05, "loss": 0.780087947845459, "step": 5794 }, { "epoch": 0.47456724912733267, "grad_norm": 0.17975525557994843, "learning_rate": 1.671413129933222e-05, "loss": 0.650503396987915, "step": 5795 }, { "epoch": 0.47464914166385164, "grad_norm": 0.1643456369638443, "learning_rate": 1.671183056035265e-05, "loss": 0.8135290741920471, "step": 5796 }, { "epoch": 0.47473103420037055, "grad_norm": 0.15091553330421448, "learning_rate": 1.6709529616577525e-05, "loss": 0.5742231607437134, "step": 5797 }, { "epoch": 0.4748129267368895, "grad_norm": 0.17277908325195312, "learning_rate": 1.670722846810684e-05, "loss": 0.9630283117294312, "step": 5798 }, { "epoch": 0.4748948192734085, "grad_norm": 0.2050812989473343, "learning_rate": 1.6704927115040627e-05, "loss": 0.9857592582702637, "step": 5799 }, { "epoch": 0.4749767118099274, "grad_norm": 0.15594175457954407, "learning_rate": 1.6702625557478898e-05, "loss": 0.9345353841781616, "step": 5800 }, { "epoch": 0.47505860434644637, "grad_norm": 0.14824168384075165, "learning_rate": 1.6700323795521692e-05, "loss": 0.648530900478363, "step": 5801 }, { "epoch": 0.47514049688296534, "grad_norm": 0.16983556747436523, "learning_rate": 1.6698021829269056e-05, "loss": 0.7262153625488281, "step": 5802 }, { "epoch": 0.4752223894194843, "grad_norm": 0.14108283817768097, "learning_rate": 1.6695719658821043e-05, "loss": 0.41172972321510315, "step": 5803 }, { "epoch": 0.4753042819560032, "grad_norm": 0.19198612868785858, "learning_rate": 1.669341728427771e-05, "loss": 0.7691149711608887, "step": 5804 }, { "epoch": 0.4753861744925222, "grad_norm": 0.22505192458629608, "learning_rate": 1.6691114705739132e-05, "loss": 0.998029351234436, "step": 5805 }, { "epoch": 0.47546806702904115, "grad_norm": 0.1786547303199768, "learning_rate": 1.668881192330539e-05, "loss": 0.8285019397735596, "step": 5806 }, { "epoch": 0.4755499595655601, "grad_norm": 0.21969948709011078, "learning_rate": 1.668650893707657e-05, "loss": 0.9418721199035645, "step": 5807 }, { "epoch": 0.47563185210207903, "grad_norm": 0.17640012502670288, "learning_rate": 1.6684205747152766e-05, "loss": 0.6732139587402344, "step": 5808 }, { "epoch": 0.475713744638598, "grad_norm": 0.17091907560825348, "learning_rate": 1.668190235363409e-05, "loss": 0.7789372801780701, "step": 5809 }, { "epoch": 0.47579563717511697, "grad_norm": 0.21132904291152954, "learning_rate": 1.6679598756620654e-05, "loss": 0.6547149419784546, "step": 5810 }, { "epoch": 0.4758775297116359, "grad_norm": 0.15671876072883606, "learning_rate": 1.6677294956212584e-05, "loss": 0.8701981902122498, "step": 5811 }, { "epoch": 0.47595942224815485, "grad_norm": 0.17224308848381042, "learning_rate": 1.6674990952510008e-05, "loss": 0.6987871527671814, "step": 5812 }, { "epoch": 0.4760413147846738, "grad_norm": 0.15286237001419067, "learning_rate": 1.667268674561307e-05, "loss": 0.7890997529029846, "step": 5813 }, { "epoch": 0.4761232073211928, "grad_norm": 0.18529218435287476, "learning_rate": 1.6670382335621924e-05, "loss": 0.9518860578536987, "step": 5814 }, { "epoch": 0.4762050998577117, "grad_norm": 0.17764198780059814, "learning_rate": 1.6668077722636724e-05, "loss": 0.8647974729537964, "step": 5815 }, { "epoch": 0.47628699239423067, "grad_norm": 0.1884479820728302, "learning_rate": 1.6665772906757636e-05, "loss": 0.9043370485305786, "step": 5816 }, { "epoch": 0.47636888493074964, "grad_norm": 0.1891394853591919, "learning_rate": 1.6663467888084843e-05, "loss": 1.0571242570877075, "step": 5817 }, { "epoch": 0.47645077746726855, "grad_norm": 0.1889127939939499, "learning_rate": 1.6661162666718534e-05, "loss": 0.6697296500205994, "step": 5818 }, { "epoch": 0.4765326700037875, "grad_norm": 0.1529742181301117, "learning_rate": 1.665885724275889e-05, "loss": 0.6400282979011536, "step": 5819 }, { "epoch": 0.4766145625403065, "grad_norm": 0.1753159463405609, "learning_rate": 1.6656551616306124e-05, "loss": 0.9439696073532104, "step": 5820 }, { "epoch": 0.47669645507682545, "grad_norm": 0.18142856657505035, "learning_rate": 1.665424578746045e-05, "loss": 0.8795878887176514, "step": 5821 }, { "epoch": 0.47677834761334437, "grad_norm": 0.1863868534564972, "learning_rate": 1.6651939756322077e-05, "loss": 0.8876465559005737, "step": 5822 }, { "epoch": 0.47686024014986333, "grad_norm": 0.18236854672431946, "learning_rate": 1.6649633522991248e-05, "loss": 0.970358669757843, "step": 5823 }, { "epoch": 0.4769421326863823, "grad_norm": 0.1758289635181427, "learning_rate": 1.6647327087568193e-05, "loss": 0.8860161900520325, "step": 5824 }, { "epoch": 0.47702402522290127, "grad_norm": 0.21044903993606567, "learning_rate": 1.664502045015316e-05, "loss": 0.9439597129821777, "step": 5825 }, { "epoch": 0.4771059177594202, "grad_norm": 0.18804138898849487, "learning_rate": 1.6642713610846414e-05, "loss": 0.6427783966064453, "step": 5826 }, { "epoch": 0.47718781029593915, "grad_norm": 0.15707151591777802, "learning_rate": 1.6640406569748205e-05, "loss": 0.9073885679244995, "step": 5827 }, { "epoch": 0.4772697028324581, "grad_norm": 0.21776331961154938, "learning_rate": 1.663809932695882e-05, "loss": 1.0187879800796509, "step": 5828 }, { "epoch": 0.47735159536897703, "grad_norm": 0.19416797161102295, "learning_rate": 1.6635791882578535e-05, "loss": 0.7169744968414307, "step": 5829 }, { "epoch": 0.477433487905496, "grad_norm": 0.2072611153125763, "learning_rate": 1.6633484236707635e-05, "loss": 1.0421209335327148, "step": 5830 }, { "epoch": 0.47751538044201497, "grad_norm": 0.21846184134483337, "learning_rate": 1.6631176389446432e-05, "loss": 0.8848848342895508, "step": 5831 }, { "epoch": 0.47759727297853394, "grad_norm": 0.1447957307100296, "learning_rate": 1.662886834089523e-05, "loss": 0.4993537664413452, "step": 5832 }, { "epoch": 0.47767916551505285, "grad_norm": 0.18340003490447998, "learning_rate": 1.6626560091154337e-05, "loss": 0.6100228428840637, "step": 5833 }, { "epoch": 0.4777610580515718, "grad_norm": 0.19292204082012177, "learning_rate": 1.6624251640324093e-05, "loss": 0.5712056756019592, "step": 5834 }, { "epoch": 0.4778429505880908, "grad_norm": 0.16753728687763214, "learning_rate": 1.6621942988504826e-05, "loss": 0.730812132358551, "step": 5835 }, { "epoch": 0.47792484312460976, "grad_norm": 0.1869751363992691, "learning_rate": 1.6619634135796877e-05, "loss": 0.8702684044837952, "step": 5836 }, { "epoch": 0.47800673566112867, "grad_norm": 0.17108774185180664, "learning_rate": 1.6617325082300607e-05, "loss": 0.6573414206504822, "step": 5837 }, { "epoch": 0.47808862819764764, "grad_norm": 0.19015993177890778, "learning_rate": 1.661501582811637e-05, "loss": 0.8195396065711975, "step": 5838 }, { "epoch": 0.4781705207341666, "grad_norm": 0.17832225561141968, "learning_rate": 1.6612706373344534e-05, "loss": 0.7135940790176392, "step": 5839 }, { "epoch": 0.4782524132706855, "grad_norm": 0.17596618831157684, "learning_rate": 1.661039671808548e-05, "loss": 0.9052462577819824, "step": 5840 }, { "epoch": 0.4783343058072045, "grad_norm": 0.21407197415828705, "learning_rate": 1.6608086862439602e-05, "loss": 0.8048447966575623, "step": 5841 }, { "epoch": 0.47841619834372345, "grad_norm": 0.19464482367038727, "learning_rate": 1.6605776806507287e-05, "loss": 1.0843486785888672, "step": 5842 }, { "epoch": 0.4784980908802424, "grad_norm": 0.16307486593723297, "learning_rate": 1.6603466550388943e-05, "loss": 0.8358691334724426, "step": 5843 }, { "epoch": 0.47857998341676133, "grad_norm": 0.1839357316493988, "learning_rate": 1.6601156094184977e-05, "loss": 1.0746279954910278, "step": 5844 }, { "epoch": 0.4786618759532803, "grad_norm": 0.17272637784481049, "learning_rate": 1.659884543799582e-05, "loss": 0.7889348268508911, "step": 5845 }, { "epoch": 0.47874376848979927, "grad_norm": 0.20222406089305878, "learning_rate": 1.6596534581921903e-05, "loss": 0.8294821977615356, "step": 5846 }, { "epoch": 0.47882566102631824, "grad_norm": 0.15591055154800415, "learning_rate": 1.6594223526063653e-05, "loss": 0.7688875794410706, "step": 5847 }, { "epoch": 0.47890755356283715, "grad_norm": 0.18662510812282562, "learning_rate": 1.6591912270521536e-05, "loss": 0.7065189480781555, "step": 5848 }, { "epoch": 0.4789894460993561, "grad_norm": 0.1918013095855713, "learning_rate": 1.6589600815395993e-05, "loss": 0.7625279426574707, "step": 5849 }, { "epoch": 0.4790713386358751, "grad_norm": 0.20438404381275177, "learning_rate": 1.6587289160787493e-05, "loss": 0.9446308016777039, "step": 5850 }, { "epoch": 0.479153231172394, "grad_norm": 0.24744661152362823, "learning_rate": 1.6584977306796518e-05, "loss": 0.7864745855331421, "step": 5851 }, { "epoch": 0.47923512370891297, "grad_norm": 0.18516987562179565, "learning_rate": 1.6582665253523545e-05, "loss": 1.0437281131744385, "step": 5852 }, { "epoch": 0.47931701624543194, "grad_norm": 0.16550351679325104, "learning_rate": 1.6580353001069063e-05, "loss": 0.8916025757789612, "step": 5853 }, { "epoch": 0.4793989087819509, "grad_norm": 0.19893646240234375, "learning_rate": 1.6578040549533576e-05, "loss": 0.7225375175476074, "step": 5854 }, { "epoch": 0.4794808013184698, "grad_norm": 0.20850993692874908, "learning_rate": 1.657572789901759e-05, "loss": 0.7167333364486694, "step": 5855 }, { "epoch": 0.4795626938549888, "grad_norm": 0.1947079598903656, "learning_rate": 1.6573415049621623e-05, "loss": 0.9935597777366638, "step": 5856 }, { "epoch": 0.47964458639150775, "grad_norm": 0.2638479173183441, "learning_rate": 1.65711020014462e-05, "loss": 0.7906663417816162, "step": 5857 }, { "epoch": 0.4797264789280267, "grad_norm": 0.19740994274616241, "learning_rate": 1.656878875459186e-05, "loss": 0.8551731109619141, "step": 5858 }, { "epoch": 0.47980837146454564, "grad_norm": 0.19004499912261963, "learning_rate": 1.656647530915914e-05, "loss": 0.8458139896392822, "step": 5859 }, { "epoch": 0.4798902640010646, "grad_norm": 0.15888270735740662, "learning_rate": 1.65641616652486e-05, "loss": 0.9409599304199219, "step": 5860 }, { "epoch": 0.47997215653758357, "grad_norm": 0.1668064445257187, "learning_rate": 1.6561847822960792e-05, "loss": 1.0055943727493286, "step": 5861 }, { "epoch": 0.4800540490741025, "grad_norm": 0.17890961468219757, "learning_rate": 1.6559533782396293e-05, "loss": 0.997800886631012, "step": 5862 }, { "epoch": 0.48013594161062145, "grad_norm": 0.18552571535110474, "learning_rate": 1.6557219543655673e-05, "loss": 0.9187594652175903, "step": 5863 }, { "epoch": 0.4802178341471404, "grad_norm": 0.1885429322719574, "learning_rate": 1.6554905106839518e-05, "loss": 0.7258826494216919, "step": 5864 }, { "epoch": 0.4802997266836594, "grad_norm": 0.1855742633342743, "learning_rate": 1.6552590472048433e-05, "loss": 0.7395814061164856, "step": 5865 }, { "epoch": 0.4803816192201783, "grad_norm": 0.15087305009365082, "learning_rate": 1.655027563938301e-05, "loss": 0.761139452457428, "step": 5866 }, { "epoch": 0.48046351175669727, "grad_norm": 0.2607874870300293, "learning_rate": 1.654796060894387e-05, "loss": 0.7329815030097961, "step": 5867 }, { "epoch": 0.48054540429321624, "grad_norm": 0.17394457757472992, "learning_rate": 1.6545645380831633e-05, "loss": 0.8681674003601074, "step": 5868 }, { "epoch": 0.4806272968297352, "grad_norm": 0.2048947811126709, "learning_rate": 1.6543329955146922e-05, "loss": 1.0877737998962402, "step": 5869 }, { "epoch": 0.4807091893662541, "grad_norm": 0.21914289891719818, "learning_rate": 1.654101433199038e-05, "loss": 0.6633110642433167, "step": 5870 }, { "epoch": 0.4807910819027731, "grad_norm": 0.1721796989440918, "learning_rate": 1.653869851146265e-05, "loss": 1.1039314270019531, "step": 5871 }, { "epoch": 0.48087297443929206, "grad_norm": 0.22088006138801575, "learning_rate": 1.6536382493664392e-05, "loss": 0.8358103036880493, "step": 5872 }, { "epoch": 0.48095486697581097, "grad_norm": 0.21586157381534576, "learning_rate": 1.6534066278696264e-05, "loss": 0.7305868864059448, "step": 5873 }, { "epoch": 0.48103675951232994, "grad_norm": 0.18070310354232788, "learning_rate": 1.6531749866658945e-05, "loss": 0.6932225227355957, "step": 5874 }, { "epoch": 0.4811186520488489, "grad_norm": 0.21230816841125488, "learning_rate": 1.6529433257653106e-05, "loss": 0.614865243434906, "step": 5875 }, { "epoch": 0.4812005445853679, "grad_norm": 0.1732427477836609, "learning_rate": 1.6527116451779446e-05, "loss": 0.8121309280395508, "step": 5876 }, { "epoch": 0.4812824371218868, "grad_norm": 0.20355747640132904, "learning_rate": 1.652479944913866e-05, "loss": 1.0225627422332764, "step": 5877 }, { "epoch": 0.48136432965840575, "grad_norm": 0.19230307638645172, "learning_rate": 1.6522482249831454e-05, "loss": 0.5575873255729675, "step": 5878 }, { "epoch": 0.4814462221949247, "grad_norm": 0.180848628282547, "learning_rate": 1.652016485395854e-05, "loss": 0.7529196739196777, "step": 5879 }, { "epoch": 0.4815281147314437, "grad_norm": 0.21735979616641998, "learning_rate": 1.6517847261620642e-05, "loss": 1.1100118160247803, "step": 5880 }, { "epoch": 0.4816100072679626, "grad_norm": 0.22147461771965027, "learning_rate": 1.65155294729185e-05, "loss": 1.0712127685546875, "step": 5881 }, { "epoch": 0.48169189980448157, "grad_norm": 0.1745114028453827, "learning_rate": 1.6513211487952848e-05, "loss": 0.7644980549812317, "step": 5882 }, { "epoch": 0.48177379234100054, "grad_norm": 0.18079325556755066, "learning_rate": 1.6510893306824433e-05, "loss": 0.843749463558197, "step": 5883 }, { "epoch": 0.48185568487751945, "grad_norm": 0.19780589640140533, "learning_rate": 1.6508574929634017e-05, "loss": 0.8462681770324707, "step": 5884 }, { "epoch": 0.4819375774140384, "grad_norm": 0.4819859564304352, "learning_rate": 1.6506256356482365e-05, "loss": 0.9043001532554626, "step": 5885 }, { "epoch": 0.4820194699505574, "grad_norm": 0.22115559875965118, "learning_rate": 1.6503937587470255e-05, "loss": 0.8394163846969604, "step": 5886 }, { "epoch": 0.48210136248707636, "grad_norm": 0.1958402693271637, "learning_rate": 1.6501618622698464e-05, "loss": 1.0369101762771606, "step": 5887 }, { "epoch": 0.48218325502359527, "grad_norm": 0.16870145499706268, "learning_rate": 1.6499299462267787e-05, "loss": 0.6988822221755981, "step": 5888 }, { "epoch": 0.48226514756011424, "grad_norm": 0.184292271733284, "learning_rate": 1.6496980106279026e-05, "loss": 0.884005606174469, "step": 5889 }, { "epoch": 0.4823470400966332, "grad_norm": 0.20188480615615845, "learning_rate": 1.6494660554832985e-05, "loss": 0.6285704374313354, "step": 5890 }, { "epoch": 0.4824289326331521, "grad_norm": 0.1911008656024933, "learning_rate": 1.6492340808030486e-05, "loss": 0.9541065692901611, "step": 5891 }, { "epoch": 0.4825108251696711, "grad_norm": 0.21020163595676422, "learning_rate": 1.6490020865972354e-05, "loss": 0.7494750022888184, "step": 5892 }, { "epoch": 0.48259271770619006, "grad_norm": 0.15996553003787994, "learning_rate": 1.6487700728759417e-05, "loss": 0.8565782308578491, "step": 5893 }, { "epoch": 0.482674610242709, "grad_norm": 0.197597473859787, "learning_rate": 1.6485380396492526e-05, "loss": 0.6839019060134888, "step": 5894 }, { "epoch": 0.48275650277922794, "grad_norm": 0.20415787398815155, "learning_rate": 1.6483059869272532e-05, "loss": 0.912388026714325, "step": 5895 }, { "epoch": 0.4828383953157469, "grad_norm": 0.19238954782485962, "learning_rate": 1.648073914720029e-05, "loss": 0.7359582185745239, "step": 5896 }, { "epoch": 0.4829202878522659, "grad_norm": 0.16258898377418518, "learning_rate": 1.647841823037667e-05, "loss": 0.9700924754142761, "step": 5897 }, { "epoch": 0.48300218038878484, "grad_norm": 0.2144785076379776, "learning_rate": 1.6476097118902546e-05, "loss": 1.0964922904968262, "step": 5898 }, { "epoch": 0.48308407292530375, "grad_norm": 0.2112424671649933, "learning_rate": 1.6473775812878806e-05, "loss": 0.6591051816940308, "step": 5899 }, { "epoch": 0.4831659654618227, "grad_norm": 0.20651821792125702, "learning_rate": 1.6471454312406348e-05, "loss": 0.708734929561615, "step": 5900 }, { "epoch": 0.4832478579983417, "grad_norm": 0.19638611376285553, "learning_rate": 1.6469132617586063e-05, "loss": 1.0208868980407715, "step": 5901 }, { "epoch": 0.4833297505348606, "grad_norm": 0.19778789579868317, "learning_rate": 1.646681072851887e-05, "loss": 0.6759640574455261, "step": 5902 }, { "epoch": 0.48341164307137957, "grad_norm": 0.1592661738395691, "learning_rate": 1.6464488645305683e-05, "loss": 0.716952383518219, "step": 5903 }, { "epoch": 0.48349353560789854, "grad_norm": 0.19048267602920532, "learning_rate": 1.6462166368047434e-05, "loss": 0.9284117221832275, "step": 5904 }, { "epoch": 0.4835754281444175, "grad_norm": 0.18087686598300934, "learning_rate": 1.645984389684506e-05, "loss": 1.0534253120422363, "step": 5905 }, { "epoch": 0.4836573206809364, "grad_norm": 0.19676411151885986, "learning_rate": 1.6457521231799494e-05, "loss": 0.9921011328697205, "step": 5906 }, { "epoch": 0.4837392132174554, "grad_norm": 0.17191162705421448, "learning_rate": 1.64551983730117e-05, "loss": 0.6204790472984314, "step": 5907 }, { "epoch": 0.48382110575397436, "grad_norm": 0.18426401913166046, "learning_rate": 1.6452875320582636e-05, "loss": 0.5094653964042664, "step": 5908 }, { "epoch": 0.4839029982904933, "grad_norm": 0.218369260430336, "learning_rate": 1.645055207461327e-05, "loss": 0.7832961082458496, "step": 5909 }, { "epoch": 0.48398489082701224, "grad_norm": 0.15537796914577484, "learning_rate": 1.6448228635204576e-05, "loss": 0.7167912721633911, "step": 5910 }, { "epoch": 0.4840667833635312, "grad_norm": 0.1970459520816803, "learning_rate": 1.644590500245755e-05, "loss": 0.8766236305236816, "step": 5911 }, { "epoch": 0.4841486759000502, "grad_norm": 0.19825111329555511, "learning_rate": 1.644358117647318e-05, "loss": 0.7676331996917725, "step": 5912 }, { "epoch": 0.4842305684365691, "grad_norm": 0.2296890765428543, "learning_rate": 1.6441257157352467e-05, "loss": 0.8500493764877319, "step": 5913 }, { "epoch": 0.48431246097308805, "grad_norm": 0.19898927211761475, "learning_rate": 1.643893294519643e-05, "loss": 0.8490474224090576, "step": 5914 }, { "epoch": 0.484394353509607, "grad_norm": 0.18297028541564941, "learning_rate": 1.6436608540106083e-05, "loss": 1.0119972229003906, "step": 5915 }, { "epoch": 0.484476246046126, "grad_norm": 0.184550940990448, "learning_rate": 1.6434283942182455e-05, "loss": 0.7259731292724609, "step": 5916 }, { "epoch": 0.4845581385826449, "grad_norm": 0.20221969485282898, "learning_rate": 1.6431959151526584e-05, "loss": 0.7339967489242554, "step": 5917 }, { "epoch": 0.48464003111916387, "grad_norm": 0.1757216602563858, "learning_rate": 1.6429634168239513e-05, "loss": 0.9319389462471008, "step": 5918 }, { "epoch": 0.48472192365568284, "grad_norm": 0.17842978239059448, "learning_rate": 1.64273089924223e-05, "loss": 0.7654030919075012, "step": 5919 }, { "epoch": 0.4848038161922018, "grad_norm": 0.1859051138162613, "learning_rate": 1.6424983624176e-05, "loss": 0.6247894763946533, "step": 5920 }, { "epoch": 0.4848857087287207, "grad_norm": 0.16781671345233917, "learning_rate": 1.6422658063601688e-05, "loss": 0.8171154260635376, "step": 5921 }, { "epoch": 0.4849676012652397, "grad_norm": 0.21774910390377045, "learning_rate": 1.6420332310800444e-05, "loss": 0.9938828349113464, "step": 5922 }, { "epoch": 0.48504949380175866, "grad_norm": 0.1769893914461136, "learning_rate": 1.6418006365873348e-05, "loss": 0.866245687007904, "step": 5923 }, { "epoch": 0.48513138633827757, "grad_norm": 0.19895203411579132, "learning_rate": 1.6415680228921497e-05, "loss": 0.7402900457382202, "step": 5924 }, { "epoch": 0.48521327887479654, "grad_norm": 0.14952559769153595, "learning_rate": 1.6413353900046e-05, "loss": 0.8994572758674622, "step": 5925 }, { "epoch": 0.4852951714113155, "grad_norm": 0.16222989559173584, "learning_rate": 1.6411027379347964e-05, "loss": 1.1332429647445679, "step": 5926 }, { "epoch": 0.4853770639478345, "grad_norm": 0.21819794178009033, "learning_rate": 1.6408700666928512e-05, "loss": 0.8343693614006042, "step": 5927 }, { "epoch": 0.4854589564843534, "grad_norm": 0.17140400409698486, "learning_rate": 1.640637376288877e-05, "loss": 0.7677515149116516, "step": 5928 }, { "epoch": 0.48554084902087236, "grad_norm": 0.2273983359336853, "learning_rate": 1.6404046667329876e-05, "loss": 1.0695781707763672, "step": 5929 }, { "epoch": 0.4856227415573913, "grad_norm": 0.18006975948810577, "learning_rate": 1.6401719380352973e-05, "loss": 1.282604694366455, "step": 5930 }, { "epoch": 0.4857046340939103, "grad_norm": 0.17954321205615997, "learning_rate": 1.639939190205922e-05, "loss": 1.0202034711837769, "step": 5931 }, { "epoch": 0.4857865266304292, "grad_norm": 0.15555952489376068, "learning_rate": 1.6397064232549773e-05, "loss": 0.7616036534309387, "step": 5932 }, { "epoch": 0.4858684191669482, "grad_norm": 0.16191786527633667, "learning_rate": 1.6394736371925803e-05, "loss": 0.546235978603363, "step": 5933 }, { "epoch": 0.48595031170346714, "grad_norm": 0.18401175737380981, "learning_rate": 1.6392408320288488e-05, "loss": 0.7593172788619995, "step": 5934 }, { "epoch": 0.48603220423998605, "grad_norm": 0.16298557817935944, "learning_rate": 1.639008007773902e-05, "loss": 1.0414432287216187, "step": 5935 }, { "epoch": 0.486114096776505, "grad_norm": 0.15804295241832733, "learning_rate": 1.638775164437859e-05, "loss": 0.6644487977027893, "step": 5936 }, { "epoch": 0.486195989313024, "grad_norm": 0.17623047530651093, "learning_rate": 1.63854230203084e-05, "loss": 0.7886113524436951, "step": 5937 }, { "epoch": 0.48627788184954296, "grad_norm": 0.17648865282535553, "learning_rate": 1.6383094205629665e-05, "loss": 0.6907986402511597, "step": 5938 }, { "epoch": 0.48635977438606187, "grad_norm": 0.2065678834915161, "learning_rate": 1.6380765200443603e-05, "loss": 0.9648787975311279, "step": 5939 }, { "epoch": 0.48644166692258084, "grad_norm": 0.19891603291034698, "learning_rate": 1.6378436004851446e-05, "loss": 0.8552531003952026, "step": 5940 }, { "epoch": 0.4865235594590998, "grad_norm": 0.1907181739807129, "learning_rate": 1.6376106618954422e-05, "loss": 0.825438916683197, "step": 5941 }, { "epoch": 0.4866054519956188, "grad_norm": 0.17743533849716187, "learning_rate": 1.637377704285378e-05, "loss": 0.9448757767677307, "step": 5942 }, { "epoch": 0.4866873445321377, "grad_norm": 0.21974827349185944, "learning_rate": 1.6371447276650775e-05, "loss": 0.7243618369102478, "step": 5943 }, { "epoch": 0.48676923706865666, "grad_norm": 0.18077319860458374, "learning_rate": 1.6369117320446667e-05, "loss": 1.0222158432006836, "step": 5944 }, { "epoch": 0.4868511296051756, "grad_norm": 0.2170586735010147, "learning_rate": 1.6366787174342723e-05, "loss": 0.8983923196792603, "step": 5945 }, { "epoch": 0.48693302214169454, "grad_norm": 0.16678480803966522, "learning_rate": 1.6364456838440227e-05, "loss": 0.7031627893447876, "step": 5946 }, { "epoch": 0.4870149146782135, "grad_norm": 0.17797419428825378, "learning_rate": 1.636212631284046e-05, "loss": 0.7951322793960571, "step": 5947 }, { "epoch": 0.4870968072147325, "grad_norm": 0.19243201613426208, "learning_rate": 1.6359795597644715e-05, "loss": 0.9492900967597961, "step": 5948 }, { "epoch": 0.48717869975125144, "grad_norm": 0.16273166239261627, "learning_rate": 1.63574646929543e-05, "loss": 0.8243293166160583, "step": 5949 }, { "epoch": 0.48726059228777036, "grad_norm": 0.17069761455059052, "learning_rate": 1.6355133598870514e-05, "loss": 0.7924271821975708, "step": 5950 }, { "epoch": 0.4873424848242893, "grad_norm": 0.17256031930446625, "learning_rate": 1.6352802315494693e-05, "loss": 0.8493505716323853, "step": 5951 }, { "epoch": 0.4874243773608083, "grad_norm": 0.1741335541009903, "learning_rate": 1.6350470842928154e-05, "loss": 0.755685567855835, "step": 5952 }, { "epoch": 0.4875062698973272, "grad_norm": 0.18250291049480438, "learning_rate": 1.634813918127223e-05, "loss": 0.6969490051269531, "step": 5953 }, { "epoch": 0.4875881624338462, "grad_norm": 0.21383148431777954, "learning_rate": 1.6345807330628274e-05, "loss": 0.8579533100128174, "step": 5954 }, { "epoch": 0.48767005497036514, "grad_norm": 0.17530924081802368, "learning_rate": 1.6343475291097626e-05, "loss": 0.6964641213417053, "step": 5955 }, { "epoch": 0.4877519475068841, "grad_norm": 0.16725848615169525, "learning_rate": 1.6341143062781658e-05, "loss": 0.7544265389442444, "step": 5956 }, { "epoch": 0.487833840043403, "grad_norm": 0.19130340218544006, "learning_rate": 1.6338810645781728e-05, "loss": 0.9070687294006348, "step": 5957 }, { "epoch": 0.487915732579922, "grad_norm": 0.1846885085105896, "learning_rate": 1.633647804019922e-05, "loss": 0.8739230036735535, "step": 5958 }, { "epoch": 0.48799762511644096, "grad_norm": 0.2216329425573349, "learning_rate": 1.6334145246135516e-05, "loss": 1.321541428565979, "step": 5959 }, { "epoch": 0.4880795176529599, "grad_norm": 0.16341957449913025, "learning_rate": 1.633181226369201e-05, "loss": 0.6577097773551941, "step": 5960 }, { "epoch": 0.48816141018947884, "grad_norm": 0.19821658730506897, "learning_rate": 1.63294790929701e-05, "loss": 0.713863730430603, "step": 5961 }, { "epoch": 0.4882433027259978, "grad_norm": 0.18818292021751404, "learning_rate": 1.632714573407119e-05, "loss": 0.9049433469772339, "step": 5962 }, { "epoch": 0.4883251952625168, "grad_norm": 0.18383146822452545, "learning_rate": 1.6324812187096716e-05, "loss": 0.6146726012229919, "step": 5963 }, { "epoch": 0.4884070877990357, "grad_norm": 0.1608864665031433, "learning_rate": 1.6322478452148083e-05, "loss": 0.6927241683006287, "step": 5964 }, { "epoch": 0.48848898033555466, "grad_norm": 0.20579831302165985, "learning_rate": 1.632014452932674e-05, "loss": 0.6864463090896606, "step": 5965 }, { "epoch": 0.4885708728720736, "grad_norm": 0.2145933210849762, "learning_rate": 1.6317810418734118e-05, "loss": 1.2520948648452759, "step": 5966 }, { "epoch": 0.4886527654085926, "grad_norm": 0.20621420443058014, "learning_rate": 1.631547612047167e-05, "loss": 0.7455288171768188, "step": 5967 }, { "epoch": 0.4887346579451115, "grad_norm": 0.21172888576984406, "learning_rate": 1.6313141634640862e-05, "loss": 0.8102445602416992, "step": 5968 }, { "epoch": 0.4888165504816305, "grad_norm": 0.17395006120204926, "learning_rate": 1.631080696134315e-05, "loss": 0.6828479170799255, "step": 5969 }, { "epoch": 0.48889844301814944, "grad_norm": 0.20230428874492645, "learning_rate": 1.630847210068001e-05, "loss": 0.5908797979354858, "step": 5970 }, { "epoch": 0.4889803355546684, "grad_norm": 0.1880623996257782, "learning_rate": 1.630613705275293e-05, "loss": 0.6746910214424133, "step": 5971 }, { "epoch": 0.4890622280911873, "grad_norm": 0.14637719094753265, "learning_rate": 1.6303801817663402e-05, "loss": 0.5278902649879456, "step": 5972 }, { "epoch": 0.4891441206277063, "grad_norm": 0.18227972090244293, "learning_rate": 1.630146639551291e-05, "loss": 0.8327375054359436, "step": 5973 }, { "epoch": 0.48922601316422526, "grad_norm": 0.15522220730781555, "learning_rate": 1.6299130786402983e-05, "loss": 0.7231636047363281, "step": 5974 }, { "epoch": 0.4893079057007442, "grad_norm": 0.1990952342748642, "learning_rate": 1.629679499043512e-05, "loss": 1.0277854204177856, "step": 5975 }, { "epoch": 0.48938979823726314, "grad_norm": 0.17304225265979767, "learning_rate": 1.6294459007710852e-05, "loss": 0.7476375102996826, "step": 5976 }, { "epoch": 0.4894716907737821, "grad_norm": 0.18287530541419983, "learning_rate": 1.6292122838331706e-05, "loss": 0.6991099119186401, "step": 5977 }, { "epoch": 0.4895535833103011, "grad_norm": 0.18304510414600372, "learning_rate": 1.6289786482399222e-05, "loss": 0.9188651442527771, "step": 5978 }, { "epoch": 0.48963547584682, "grad_norm": 0.18915781378746033, "learning_rate": 1.6287449940014952e-05, "loss": 0.8288341164588928, "step": 5979 }, { "epoch": 0.48971736838333896, "grad_norm": 0.188652902841568, "learning_rate": 1.628511321128045e-05, "loss": 0.4175039529800415, "step": 5980 }, { "epoch": 0.4897992609198579, "grad_norm": 0.18770867586135864, "learning_rate": 1.6282776296297275e-05, "loss": 0.7712852954864502, "step": 5981 }, { "epoch": 0.4898811534563769, "grad_norm": 0.20129351317882538, "learning_rate": 1.6280439195167007e-05, "loss": 0.9338968992233276, "step": 5982 }, { "epoch": 0.4899630459928958, "grad_norm": 0.22080492973327637, "learning_rate": 1.627810190799122e-05, "loss": 0.8265432119369507, "step": 5983 }, { "epoch": 0.4900449385294148, "grad_norm": 0.20745952427387238, "learning_rate": 1.6275764434871498e-05, "loss": 0.830575168132782, "step": 5984 }, { "epoch": 0.49012683106593374, "grad_norm": 0.18632270395755768, "learning_rate": 1.627342677590945e-05, "loss": 1.0338287353515625, "step": 5985 }, { "epoch": 0.49020872360245266, "grad_norm": 0.22307045757770538, "learning_rate": 1.6271088931206675e-05, "loss": 0.8989386558532715, "step": 5986 }, { "epoch": 0.4902906161389716, "grad_norm": 0.1769031137228012, "learning_rate": 1.6268750900864777e-05, "loss": 0.8107023239135742, "step": 5987 }, { "epoch": 0.4903725086754906, "grad_norm": 0.192116379737854, "learning_rate": 1.6266412684985387e-05, "loss": 0.6837422251701355, "step": 5988 }, { "epoch": 0.49045440121200956, "grad_norm": 0.17722377181053162, "learning_rate": 1.626407428367013e-05, "loss": 1.1288189888000488, "step": 5989 }, { "epoch": 0.4905362937485285, "grad_norm": 0.26542580127716064, "learning_rate": 1.626173569702064e-05, "loss": 0.7757213711738586, "step": 5990 }, { "epoch": 0.49061818628504744, "grad_norm": 0.20512759685516357, "learning_rate": 1.6259396925138568e-05, "loss": 0.848060131072998, "step": 5991 }, { "epoch": 0.4907000788215664, "grad_norm": 0.20065143704414368, "learning_rate": 1.6257057968125556e-05, "loss": 0.8630199432373047, "step": 5992 }, { "epoch": 0.4907819713580854, "grad_norm": 0.19146764278411865, "learning_rate": 1.6254718826083276e-05, "loss": 0.7795783877372742, "step": 5993 }, { "epoch": 0.4908638638946043, "grad_norm": 0.15905913710594177, "learning_rate": 1.625237949911339e-05, "loss": 0.5010542273521423, "step": 5994 }, { "epoch": 0.49094575643112326, "grad_norm": 0.18451066315174103, "learning_rate": 1.6250039987317577e-05, "loss": 0.7587258219718933, "step": 5995 }, { "epoch": 0.4910276489676422, "grad_norm": 0.1795945018529892, "learning_rate": 1.6247700290797524e-05, "loss": 0.681980311870575, "step": 5996 }, { "epoch": 0.49110954150416114, "grad_norm": 0.23591294884681702, "learning_rate": 1.6245360409654918e-05, "loss": 1.0770446062088013, "step": 5997 }, { "epoch": 0.4911914340406801, "grad_norm": 0.16469796001911163, "learning_rate": 1.6243020343991466e-05, "loss": 1.0984690189361572, "step": 5998 }, { "epoch": 0.4912733265771991, "grad_norm": 0.16598966717720032, "learning_rate": 1.6240680093908873e-05, "loss": 0.7699282169342041, "step": 5999 }, { "epoch": 0.49135521911371804, "grad_norm": 0.1616978943347931, "learning_rate": 1.6238339659508855e-05, "loss": 0.7049899101257324, "step": 6000 }, { "epoch": 0.49143711165023696, "grad_norm": 0.15934009850025177, "learning_rate": 1.623599904089314e-05, "loss": 1.1693589687347412, "step": 6001 }, { "epoch": 0.4915190041867559, "grad_norm": 0.1892499327659607, "learning_rate": 1.6233658238163464e-05, "loss": 0.9363872408866882, "step": 6002 }, { "epoch": 0.4916008967232749, "grad_norm": 0.19122911989688873, "learning_rate": 1.6231317251421556e-05, "loss": 1.0013247728347778, "step": 6003 }, { "epoch": 0.49168278925979386, "grad_norm": 0.19167572259902954, "learning_rate": 1.622897608076918e-05, "loss": 0.7464023232460022, "step": 6004 }, { "epoch": 0.4917646817963128, "grad_norm": 0.18945056200027466, "learning_rate": 1.6226634726308083e-05, "loss": 0.9319173097610474, "step": 6005 }, { "epoch": 0.49184657433283174, "grad_norm": 0.2046397477388382, "learning_rate": 1.622429318814003e-05, "loss": 0.6826932430267334, "step": 6006 }, { "epoch": 0.4919284668693507, "grad_norm": 0.16344715654850006, "learning_rate": 1.6221951466366797e-05, "loss": 0.6544229388237, "step": 6007 }, { "epoch": 0.4920103594058696, "grad_norm": 0.16131818294525146, "learning_rate": 1.621960956109017e-05, "loss": 0.7040161490440369, "step": 6008 }, { "epoch": 0.4920922519423886, "grad_norm": 0.18988215923309326, "learning_rate": 1.6217267472411926e-05, "loss": 0.7467964291572571, "step": 6009 }, { "epoch": 0.49217414447890756, "grad_norm": 0.1994233876466751, "learning_rate": 1.6214925200433872e-05, "loss": 0.6349900960922241, "step": 6010 }, { "epoch": 0.49225603701542653, "grad_norm": 0.21836267411708832, "learning_rate": 1.6212582745257807e-05, "loss": 0.8732305765151978, "step": 6011 }, { "epoch": 0.49233792955194544, "grad_norm": 0.15567655861377716, "learning_rate": 1.6210240106985546e-05, "loss": 0.5584937930107117, "step": 6012 }, { "epoch": 0.4924198220884644, "grad_norm": 0.17866085469722748, "learning_rate": 1.6207897285718907e-05, "loss": 0.8120579719543457, "step": 6013 }, { "epoch": 0.4925017146249834, "grad_norm": 0.2261458784341812, "learning_rate": 1.6205554281559725e-05, "loss": 1.1751279830932617, "step": 6014 }, { "epoch": 0.49258360716150235, "grad_norm": 0.1887982189655304, "learning_rate": 1.620321109460983e-05, "loss": 1.096996545791626, "step": 6015 }, { "epoch": 0.49266549969802126, "grad_norm": 0.23590917885303497, "learning_rate": 1.6200867724971067e-05, "loss": 0.8173369765281677, "step": 6016 }, { "epoch": 0.4927473922345402, "grad_norm": 0.17784301936626434, "learning_rate": 1.619852417274529e-05, "loss": 0.6741865873336792, "step": 6017 }, { "epoch": 0.4928292847710592, "grad_norm": 0.22401024401187897, "learning_rate": 1.6196180438034365e-05, "loss": 0.9108845591545105, "step": 6018 }, { "epoch": 0.4929111773075781, "grad_norm": 0.18459883332252502, "learning_rate": 1.6193836520940156e-05, "loss": 0.8867147564888, "step": 6019 }, { "epoch": 0.4929930698440971, "grad_norm": 0.1768030822277069, "learning_rate": 1.6191492421564538e-05, "loss": 0.7810871005058289, "step": 6020 }, { "epoch": 0.49307496238061604, "grad_norm": 0.1696414351463318, "learning_rate": 1.618914814000939e-05, "loss": 1.021082878112793, "step": 6021 }, { "epoch": 0.493156854917135, "grad_norm": 0.1930042803287506, "learning_rate": 1.6186803676376615e-05, "loss": 1.0277472734451294, "step": 6022 }, { "epoch": 0.4932387474536539, "grad_norm": 0.2281750589609146, "learning_rate": 1.6184459030768108e-05, "loss": 1.2067933082580566, "step": 6023 }, { "epoch": 0.4933206399901729, "grad_norm": 0.20835436880588531, "learning_rate": 1.6182114203285773e-05, "loss": 0.739664614200592, "step": 6024 }, { "epoch": 0.49340253252669186, "grad_norm": 0.18414336442947388, "learning_rate": 1.6179769194031534e-05, "loss": 0.885077953338623, "step": 6025 }, { "epoch": 0.4934844250632108, "grad_norm": 0.17989236116409302, "learning_rate": 1.617742400310731e-05, "loss": 0.983352780342102, "step": 6026 }, { "epoch": 0.49356631759972974, "grad_norm": 0.20264087617397308, "learning_rate": 1.617507863061503e-05, "loss": 0.8508093357086182, "step": 6027 }, { "epoch": 0.4936482101362487, "grad_norm": 0.19364526867866516, "learning_rate": 1.6172733076656643e-05, "loss": 0.9638767838478088, "step": 6028 }, { "epoch": 0.4937301026727677, "grad_norm": 0.22690469026565552, "learning_rate": 1.6170387341334086e-05, "loss": 0.705052375793457, "step": 6029 }, { "epoch": 0.4938119952092866, "grad_norm": 0.37671831250190735, "learning_rate": 1.6168041424749317e-05, "loss": 0.7582945823669434, "step": 6030 }, { "epoch": 0.49389388774580556, "grad_norm": 0.2004888653755188, "learning_rate": 1.6165695327004306e-05, "loss": 0.8920019268989563, "step": 6031 }, { "epoch": 0.49397578028232453, "grad_norm": 0.18018996715545654, "learning_rate": 1.6163349048201012e-05, "loss": 0.7740724086761475, "step": 6032 }, { "epoch": 0.4940576728188435, "grad_norm": 0.19254964590072632, "learning_rate": 1.6161002588441424e-05, "loss": 0.8580701351165771, "step": 6033 }, { "epoch": 0.4941395653553624, "grad_norm": 0.1991758644580841, "learning_rate": 1.615865594782753e-05, "loss": 0.9248565435409546, "step": 6034 }, { "epoch": 0.4942214578918814, "grad_norm": 0.18530799448490143, "learning_rate": 1.6156309126461315e-05, "loss": 0.6842769980430603, "step": 6035 }, { "epoch": 0.49430335042840035, "grad_norm": 0.15058189630508423, "learning_rate": 1.615396212444479e-05, "loss": 0.9477306604385376, "step": 6036 }, { "epoch": 0.49438524296491926, "grad_norm": 0.17723846435546875, "learning_rate": 1.615161494187996e-05, "loss": 0.8458277583122253, "step": 6037 }, { "epoch": 0.4944671355014382, "grad_norm": 0.206573486328125, "learning_rate": 1.6149267578868843e-05, "loss": 0.9696130752563477, "step": 6038 }, { "epoch": 0.4945490280379572, "grad_norm": 0.16157765686511993, "learning_rate": 1.6146920035513473e-05, "loss": 0.7796776294708252, "step": 6039 }, { "epoch": 0.49463092057447616, "grad_norm": 0.18003542721271515, "learning_rate": 1.6144572311915876e-05, "loss": 0.8854595422744751, "step": 6040 }, { "epoch": 0.4947128131109951, "grad_norm": 0.17743580043315887, "learning_rate": 1.6142224408178094e-05, "loss": 0.7067782878875732, "step": 6041 }, { "epoch": 0.49479470564751404, "grad_norm": 0.17366071045398712, "learning_rate": 1.6139876324402183e-05, "loss": 0.9901339411735535, "step": 6042 }, { "epoch": 0.494876598184033, "grad_norm": 0.18295088410377502, "learning_rate": 1.613752806069019e-05, "loss": 0.8676550388336182, "step": 6043 }, { "epoch": 0.494958490720552, "grad_norm": 0.1947259157896042, "learning_rate": 1.613517961714419e-05, "loss": 0.6863188147544861, "step": 6044 }, { "epoch": 0.4950403832570709, "grad_norm": 0.20279835164546967, "learning_rate": 1.6132830993866256e-05, "loss": 0.9753381013870239, "step": 6045 }, { "epoch": 0.49512227579358986, "grad_norm": 0.19291162490844727, "learning_rate": 1.613048219095846e-05, "loss": 0.9472445249557495, "step": 6046 }, { "epoch": 0.49520416833010883, "grad_norm": 0.20757408440113068, "learning_rate": 1.6128133208522892e-05, "loss": 0.9282315969467163, "step": 6047 }, { "epoch": 0.49528606086662774, "grad_norm": 0.18038591742515564, "learning_rate": 1.6125784046661663e-05, "loss": 0.5307304859161377, "step": 6048 }, { "epoch": 0.4953679534031467, "grad_norm": 0.1814199984073639, "learning_rate": 1.6123434705476857e-05, "loss": 0.6789615750312805, "step": 6049 }, { "epoch": 0.4954498459396657, "grad_norm": 0.17542913556098938, "learning_rate": 1.61210851850706e-05, "loss": 0.7129001617431641, "step": 6050 }, { "epoch": 0.49553173847618465, "grad_norm": 0.25318071246147156, "learning_rate": 1.611873548554501e-05, "loss": 0.9080148935317993, "step": 6051 }, { "epoch": 0.49561363101270356, "grad_norm": 0.1979486644268036, "learning_rate": 1.6116385607002204e-05, "loss": 1.0141981840133667, "step": 6052 }, { "epoch": 0.4956955235492225, "grad_norm": 0.19000868499279022, "learning_rate": 1.611403554954433e-05, "loss": 0.8312104344367981, "step": 6053 }, { "epoch": 0.4957774160857415, "grad_norm": 0.16280408203601837, "learning_rate": 1.611168531327353e-05, "loss": 0.7266126871109009, "step": 6054 }, { "epoch": 0.49585930862226046, "grad_norm": 0.2032080590724945, "learning_rate": 1.6109334898291944e-05, "loss": 1.0277180671691895, "step": 6055 }, { "epoch": 0.4959412011587794, "grad_norm": 0.22894930839538574, "learning_rate": 1.6106984304701746e-05, "loss": 0.9807442426681519, "step": 6056 }, { "epoch": 0.49602309369529834, "grad_norm": 0.17532365024089813, "learning_rate": 1.6104633532605086e-05, "loss": 0.845416784286499, "step": 6057 }, { "epoch": 0.4961049862318173, "grad_norm": 0.18398912250995636, "learning_rate": 1.6102282582104156e-05, "loss": 0.9250243902206421, "step": 6058 }, { "epoch": 0.4961868787683362, "grad_norm": 0.1566864401102066, "learning_rate": 1.6099931453301123e-05, "loss": 0.9628044962882996, "step": 6059 }, { "epoch": 0.4962687713048552, "grad_norm": 0.17723681032657623, "learning_rate": 1.6097580146298184e-05, "loss": 0.9886734485626221, "step": 6060 }, { "epoch": 0.49635066384137416, "grad_norm": 0.20652930438518524, "learning_rate": 1.6095228661197535e-05, "loss": 0.7692238092422485, "step": 6061 }, { "epoch": 0.49643255637789313, "grad_norm": 0.18562577664852142, "learning_rate": 1.6092876998101383e-05, "loss": 0.7086588144302368, "step": 6062 }, { "epoch": 0.49651444891441204, "grad_norm": 0.17545676231384277, "learning_rate": 1.6090525157111936e-05, "loss": 0.729061484336853, "step": 6063 }, { "epoch": 0.496596341450931, "grad_norm": 0.18465320765972137, "learning_rate": 1.6088173138331423e-05, "loss": 0.8835002183914185, "step": 6064 }, { "epoch": 0.49667823398745, "grad_norm": 0.19601015746593475, "learning_rate": 1.6085820941862063e-05, "loss": 0.9176339507102966, "step": 6065 }, { "epoch": 0.49676012652396895, "grad_norm": 0.148952454328537, "learning_rate": 1.60834685678061e-05, "loss": 0.7268451452255249, "step": 6066 }, { "epoch": 0.49684201906048786, "grad_norm": 0.15964043140411377, "learning_rate": 1.608111601626577e-05, "loss": 0.730918824672699, "step": 6067 }, { "epoch": 0.49692391159700683, "grad_norm": 0.14792898297309875, "learning_rate": 1.6078763287343332e-05, "loss": 0.8943265080451965, "step": 6068 }, { "epoch": 0.4970058041335258, "grad_norm": 0.18148761987686157, "learning_rate": 1.6076410381141043e-05, "loss": 0.8143479228019714, "step": 6069 }, { "epoch": 0.4970876966700447, "grad_norm": 0.15110190212726593, "learning_rate": 1.6074057297761166e-05, "loss": 0.6135390996932983, "step": 6070 }, { "epoch": 0.4971695892065637, "grad_norm": 0.18802663683891296, "learning_rate": 1.6071704037305982e-05, "loss": 0.5304161906242371, "step": 6071 }, { "epoch": 0.49725148174308265, "grad_norm": 0.21430660784244537, "learning_rate": 1.606935059987777e-05, "loss": 1.1035454273223877, "step": 6072 }, { "epoch": 0.4973333742796016, "grad_norm": 0.1921183168888092, "learning_rate": 1.6066996985578823e-05, "loss": 0.8215813040733337, "step": 6073 }, { "epoch": 0.4974152668161205, "grad_norm": 0.20075133442878723, "learning_rate": 1.6064643194511433e-05, "loss": 0.8783615231513977, "step": 6074 }, { "epoch": 0.4974971593526395, "grad_norm": 0.18515415489673615, "learning_rate": 1.6062289226777907e-05, "loss": 0.6204794049263, "step": 6075 }, { "epoch": 0.49757905188915846, "grad_norm": 0.16364100575447083, "learning_rate": 1.6059935082480563e-05, "loss": 1.0888397693634033, "step": 6076 }, { "epoch": 0.49766094442567743, "grad_norm": 0.17873060703277588, "learning_rate": 1.605758076172172e-05, "loss": 0.8292990922927856, "step": 6077 }, { "epoch": 0.49774283696219634, "grad_norm": 0.17903932929039001, "learning_rate": 1.60552262646037e-05, "loss": 1.021253228187561, "step": 6078 }, { "epoch": 0.4978247294987153, "grad_norm": 0.19135256111621857, "learning_rate": 1.605287159122885e-05, "loss": 0.8693904876708984, "step": 6079 }, { "epoch": 0.4979066220352343, "grad_norm": 0.16468168795108795, "learning_rate": 1.6050516741699503e-05, "loss": 0.8124088048934937, "step": 6080 }, { "epoch": 0.4979885145717532, "grad_norm": 0.18407782912254333, "learning_rate": 1.604816171611802e-05, "loss": 0.6734189987182617, "step": 6081 }, { "epoch": 0.49807040710827216, "grad_norm": 0.16637873649597168, "learning_rate": 1.604580651458675e-05, "loss": 0.5516297221183777, "step": 6082 }, { "epoch": 0.49815229964479113, "grad_norm": 0.20353186130523682, "learning_rate": 1.6043451137208074e-05, "loss": 0.8016135096549988, "step": 6083 }, { "epoch": 0.4982341921813101, "grad_norm": 0.1805795580148697, "learning_rate": 1.604109558408435e-05, "loss": 0.6310434937477112, "step": 6084 }, { "epoch": 0.498316084717829, "grad_norm": 0.19343794882297516, "learning_rate": 1.6038739855317972e-05, "loss": 1.0442055463790894, "step": 6085 }, { "epoch": 0.498397977254348, "grad_norm": 0.16567005217075348, "learning_rate": 1.6036383951011325e-05, "loss": 0.7219021916389465, "step": 6086 }, { "epoch": 0.49847986979086695, "grad_norm": 0.1630488485097885, "learning_rate": 1.6034027871266804e-05, "loss": 0.5867639183998108, "step": 6087 }, { "epoch": 0.4985617623273859, "grad_norm": 0.1810281127691269, "learning_rate": 1.6031671616186818e-05, "loss": 0.7500725388526917, "step": 6088 }, { "epoch": 0.49864365486390483, "grad_norm": 0.17640671133995056, "learning_rate": 1.602931518587378e-05, "loss": 0.7010160684585571, "step": 6089 }, { "epoch": 0.4987255474004238, "grad_norm": 0.17068596184253693, "learning_rate": 1.602695858043011e-05, "loss": 0.7685654759407043, "step": 6090 }, { "epoch": 0.49880743993694276, "grad_norm": 0.1880425065755844, "learning_rate": 1.6024601799958232e-05, "loss": 0.7995885014533997, "step": 6091 }, { "epoch": 0.4988893324734617, "grad_norm": 0.15210434794425964, "learning_rate": 1.602224484456058e-05, "loss": 0.7728027105331421, "step": 6092 }, { "epoch": 0.49897122500998065, "grad_norm": 0.1873197704553604, "learning_rate": 1.6019887714339605e-05, "loss": 1.0509437322616577, "step": 6093 }, { "epoch": 0.4990531175464996, "grad_norm": 0.16454248130321503, "learning_rate": 1.6017530409397758e-05, "loss": 0.784302830696106, "step": 6094 }, { "epoch": 0.4991350100830186, "grad_norm": 0.19081909954547882, "learning_rate": 1.6015172929837484e-05, "loss": 0.8579769730567932, "step": 6095 }, { "epoch": 0.4992169026195375, "grad_norm": 0.18529193103313446, "learning_rate": 1.601281527576126e-05, "loss": 0.7277228832244873, "step": 6096 }, { "epoch": 0.49929879515605646, "grad_norm": 0.19794096052646637, "learning_rate": 1.601045744727156e-05, "loss": 0.6129662990570068, "step": 6097 }, { "epoch": 0.49938068769257543, "grad_norm": 0.18975374102592468, "learning_rate": 1.6008099444470853e-05, "loss": 1.054952621459961, "step": 6098 }, { "epoch": 0.49946258022909434, "grad_norm": 0.28653237223625183, "learning_rate": 1.600574126746164e-05, "loss": 0.8132591843605042, "step": 6099 }, { "epoch": 0.4995444727656133, "grad_norm": 0.16355295479297638, "learning_rate": 1.6003382916346416e-05, "loss": 0.8547933101654053, "step": 6100 }, { "epoch": 0.4996263653021323, "grad_norm": 0.21802574396133423, "learning_rate": 1.600102439122768e-05, "loss": 0.9124488234519958, "step": 6101 }, { "epoch": 0.49970825783865125, "grad_norm": 0.203663632273674, "learning_rate": 1.5998665692207946e-05, "loss": 0.9423521161079407, "step": 6102 }, { "epoch": 0.49979015037517016, "grad_norm": 0.18875637650489807, "learning_rate": 1.5996306819389733e-05, "loss": 0.7195621132850647, "step": 6103 }, { "epoch": 0.49987204291168913, "grad_norm": 0.183464914560318, "learning_rate": 1.599394777287556e-05, "loss": 0.8054258227348328, "step": 6104 }, { "epoch": 0.4999539354482081, "grad_norm": 0.22302785515785217, "learning_rate": 1.5991588552767974e-05, "loss": 1.0078012943267822, "step": 6105 }, { "epoch": 0.500035827984727, "grad_norm": 0.18861261010169983, "learning_rate": 1.59892291591695e-05, "loss": 1.0852680206298828, "step": 6106 }, { "epoch": 0.500117720521246, "grad_norm": 0.1930169314146042, "learning_rate": 1.5986869592182705e-05, "loss": 0.7914626002311707, "step": 6107 }, { "epoch": 0.500199613057765, "grad_norm": 0.203404039144516, "learning_rate": 1.5984509851910136e-05, "loss": 0.9395484328269958, "step": 6108 }, { "epoch": 0.5002815055942839, "grad_norm": 0.19660070538520813, "learning_rate": 1.5982149938454353e-05, "loss": 0.9900261163711548, "step": 6109 }, { "epoch": 0.5003633981308029, "grad_norm": 0.23250092566013336, "learning_rate": 1.5979789851917938e-05, "loss": 1.376842737197876, "step": 6110 }, { "epoch": 0.5004452906673218, "grad_norm": 0.1906135380268097, "learning_rate": 1.5977429592403462e-05, "loss": 0.8129264116287231, "step": 6111 }, { "epoch": 0.5005271832038407, "grad_norm": 0.1819225698709488, "learning_rate": 1.5975069160013513e-05, "loss": 0.8175798058509827, "step": 6112 }, { "epoch": 0.5006090757403597, "grad_norm": 0.17959755659103394, "learning_rate": 1.597270855485069e-05, "loss": 0.7669265866279602, "step": 6113 }, { "epoch": 0.5006909682768786, "grad_norm": 0.15156373381614685, "learning_rate": 1.5970347777017587e-05, "loss": 0.8157228231430054, "step": 6114 }, { "epoch": 0.5007728608133977, "grad_norm": 0.17712171375751495, "learning_rate": 1.5967986826616812e-05, "loss": 0.9094599485397339, "step": 6115 }, { "epoch": 0.5008547533499166, "grad_norm": 0.1868724524974823, "learning_rate": 1.5965625703751e-05, "loss": 0.9826311469078064, "step": 6116 }, { "epoch": 0.5009366458864355, "grad_norm": 0.1626458317041397, "learning_rate": 1.5963264408522752e-05, "loss": 0.6231788992881775, "step": 6117 }, { "epoch": 0.5010185384229545, "grad_norm": 0.16001787781715393, "learning_rate": 1.596090294103471e-05, "loss": 0.5353693962097168, "step": 6118 }, { "epoch": 0.5011004309594734, "grad_norm": 0.1653135120868683, "learning_rate": 1.5958541301389514e-05, "loss": 0.7741868495941162, "step": 6119 }, { "epoch": 0.5011823234959923, "grad_norm": 0.18304568529129028, "learning_rate": 1.595617948968981e-05, "loss": 0.8161888718605042, "step": 6120 }, { "epoch": 0.5012642160325114, "grad_norm": 0.20770657062530518, "learning_rate": 1.5953817506038248e-05, "loss": 1.0122888088226318, "step": 6121 }, { "epoch": 0.5013461085690303, "grad_norm": 0.16320550441741943, "learning_rate": 1.5951455350537495e-05, "loss": 0.6598148941993713, "step": 6122 }, { "epoch": 0.5014280011055492, "grad_norm": 0.15605279803276062, "learning_rate": 1.5949093023290215e-05, "loss": 0.6082609295845032, "step": 6123 }, { "epoch": 0.5015098936420682, "grad_norm": 0.14467981457710266, "learning_rate": 1.5946730524399093e-05, "loss": 0.5528988242149353, "step": 6124 }, { "epoch": 0.5015917861785871, "grad_norm": 0.20720174908638, "learning_rate": 1.5944367853966802e-05, "loss": 0.8028239607810974, "step": 6125 }, { "epoch": 0.5016736787151062, "grad_norm": 0.17479228973388672, "learning_rate": 1.5942005012096038e-05, "loss": 0.7395589351654053, "step": 6126 }, { "epoch": 0.5017555712516251, "grad_norm": 0.17119121551513672, "learning_rate": 1.5939641998889503e-05, "loss": 0.805945098400116, "step": 6127 }, { "epoch": 0.501837463788144, "grad_norm": 0.19484971463680267, "learning_rate": 1.59372788144499e-05, "loss": 1.003394603729248, "step": 6128 }, { "epoch": 0.501919356324663, "grad_norm": 0.17105597257614136, "learning_rate": 1.5934915458879943e-05, "loss": 0.9606291055679321, "step": 6129 }, { "epoch": 0.5020012488611819, "grad_norm": 0.16642872989177704, "learning_rate": 1.5932551932282354e-05, "loss": 0.9622793197631836, "step": 6130 }, { "epoch": 0.5020831413977008, "grad_norm": 0.19118767976760864, "learning_rate": 1.593018823475986e-05, "loss": 1.0155919790267944, "step": 6131 }, { "epoch": 0.5021650339342199, "grad_norm": 0.2053527981042862, "learning_rate": 1.5927824366415197e-05, "loss": 0.7105567455291748, "step": 6132 }, { "epoch": 0.5022469264707388, "grad_norm": 0.15834151208400726, "learning_rate": 1.5925460327351115e-05, "loss": 1.0442135334014893, "step": 6133 }, { "epoch": 0.5023288190072577, "grad_norm": 0.1950991451740265, "learning_rate": 1.592309611767035e-05, "loss": 0.7112772464752197, "step": 6134 }, { "epoch": 0.5024107115437767, "grad_norm": 0.19311010837554932, "learning_rate": 1.5920731737475676e-05, "loss": 0.5621709227561951, "step": 6135 }, { "epoch": 0.5024926040802956, "grad_norm": 0.17039185762405396, "learning_rate": 1.591836718686985e-05, "loss": 0.8528304100036621, "step": 6136 }, { "epoch": 0.5025744966168146, "grad_norm": 0.19568243622779846, "learning_rate": 1.591600246595565e-05, "loss": 0.9229130148887634, "step": 6137 }, { "epoch": 0.5026563891533335, "grad_norm": 0.19473113119602203, "learning_rate": 1.591363757483585e-05, "loss": 0.8811407089233398, "step": 6138 }, { "epoch": 0.5027382816898525, "grad_norm": 0.1736166924238205, "learning_rate": 1.591127251361325e-05, "loss": 0.41226258873939514, "step": 6139 }, { "epoch": 0.5028201742263715, "grad_norm": 0.16802465915679932, "learning_rate": 1.590890728239063e-05, "loss": 0.7620505690574646, "step": 6140 }, { "epoch": 0.5029020667628904, "grad_norm": 0.16849781572818756, "learning_rate": 1.59065418812708e-05, "loss": 0.5825456976890564, "step": 6141 }, { "epoch": 0.5029839592994093, "grad_norm": 0.17279082536697388, "learning_rate": 1.5904176310356572e-05, "loss": 0.9386383295059204, "step": 6142 }, { "epoch": 0.5030658518359283, "grad_norm": 0.18946796655654907, "learning_rate": 1.590181056975076e-05, "loss": 0.6538257002830505, "step": 6143 }, { "epoch": 0.5031477443724472, "grad_norm": 0.17477157711982727, "learning_rate": 1.5899444659556193e-05, "loss": 0.5137530565261841, "step": 6144 }, { "epoch": 0.5032296369089662, "grad_norm": 0.17897672951221466, "learning_rate": 1.58970785798757e-05, "loss": 0.9241603016853333, "step": 6145 }, { "epoch": 0.5033115294454852, "grad_norm": 0.18752330541610718, "learning_rate": 1.5894712330812117e-05, "loss": 0.9729989767074585, "step": 6146 }, { "epoch": 0.5033934219820041, "grad_norm": 0.1529158353805542, "learning_rate": 1.58923459124683e-05, "loss": 0.7252431511878967, "step": 6147 }, { "epoch": 0.5034753145185231, "grad_norm": 0.18967436254024506, "learning_rate": 1.5889979324947095e-05, "loss": 0.9941389560699463, "step": 6148 }, { "epoch": 0.503557207055042, "grad_norm": 0.20673413574695587, "learning_rate": 1.588761256835137e-05, "loss": 0.694774329662323, "step": 6149 }, { "epoch": 0.503639099591561, "grad_norm": 0.18518875539302826, "learning_rate": 1.5885245642783984e-05, "loss": 0.9380534291267395, "step": 6150 }, { "epoch": 0.50372099212808, "grad_norm": 0.17920972406864166, "learning_rate": 1.588287854834783e-05, "loss": 0.68118816614151, "step": 6151 }, { "epoch": 0.5038028846645989, "grad_norm": 0.18359778821468353, "learning_rate": 1.5880511285145776e-05, "loss": 1.0915721654891968, "step": 6152 }, { "epoch": 0.5038847772011178, "grad_norm": 0.1891583949327469, "learning_rate": 1.587814385328072e-05, "loss": 0.7668443918228149, "step": 6153 }, { "epoch": 0.5039666697376368, "grad_norm": 0.19533923268318176, "learning_rate": 1.587577625285556e-05, "loss": 0.8623557686805725, "step": 6154 }, { "epoch": 0.5040485622741557, "grad_norm": 0.18547093868255615, "learning_rate": 1.5873408483973198e-05, "loss": 0.8885098695755005, "step": 6155 }, { "epoch": 0.5041304548106746, "grad_norm": 0.17513704299926758, "learning_rate": 1.5871040546736552e-05, "loss": 0.6870269179344177, "step": 6156 }, { "epoch": 0.5042123473471937, "grad_norm": 0.22231684625148773, "learning_rate": 1.5868672441248542e-05, "loss": 0.9237499833106995, "step": 6157 }, { "epoch": 0.5042942398837126, "grad_norm": 0.1796477735042572, "learning_rate": 1.586630416761209e-05, "loss": 0.657782256603241, "step": 6158 }, { "epoch": 0.5043761324202316, "grad_norm": 0.19057083129882812, "learning_rate": 1.5863935725930138e-05, "loss": 0.8185007572174072, "step": 6159 }, { "epoch": 0.5044580249567505, "grad_norm": 0.20670312643051147, "learning_rate": 1.586156711630562e-05, "loss": 0.9608175158500671, "step": 6160 }, { "epoch": 0.5045399174932694, "grad_norm": 0.25205540657043457, "learning_rate": 1.5859198338841494e-05, "loss": 0.7630547881126404, "step": 6161 }, { "epoch": 0.5046218100297885, "grad_norm": 0.21782465279102325, "learning_rate": 1.5856829393640717e-05, "loss": 0.9521205425262451, "step": 6162 }, { "epoch": 0.5047037025663074, "grad_norm": 0.18899668753147125, "learning_rate": 1.5854460280806244e-05, "loss": 1.1117991209030151, "step": 6163 }, { "epoch": 0.5047855951028263, "grad_norm": 0.17173950374126434, "learning_rate": 1.5852091000441056e-05, "loss": 0.8057806491851807, "step": 6164 }, { "epoch": 0.5048674876393453, "grad_norm": 0.18325428664684296, "learning_rate": 1.5849721552648126e-05, "loss": 0.8163033723831177, "step": 6165 }, { "epoch": 0.5049493801758642, "grad_norm": 0.1738894134759903, "learning_rate": 1.5847351937530438e-05, "loss": 0.7633187770843506, "step": 6166 }, { "epoch": 0.5050312727123831, "grad_norm": 0.1836252510547638, "learning_rate": 1.5844982155190994e-05, "loss": 0.7511736154556274, "step": 6167 }, { "epoch": 0.5051131652489022, "grad_norm": 0.24098731577396393, "learning_rate": 1.5842612205732786e-05, "loss": 0.8980438709259033, "step": 6168 }, { "epoch": 0.5051950577854211, "grad_norm": 0.18256351351737976, "learning_rate": 1.5840242089258824e-05, "loss": 0.966129720211029, "step": 6169 }, { "epoch": 0.5052769503219401, "grad_norm": 0.20424871146678925, "learning_rate": 1.5837871805872127e-05, "loss": 1.0273600816726685, "step": 6170 }, { "epoch": 0.505358842858459, "grad_norm": 0.19612838327884674, "learning_rate": 1.5835501355675715e-05, "loss": 0.8313472867012024, "step": 6171 }, { "epoch": 0.5054407353949779, "grad_norm": 0.20804281532764435, "learning_rate": 1.583313073877261e-05, "loss": 0.7504562139511108, "step": 6172 }, { "epoch": 0.5055226279314969, "grad_norm": 0.1730232834815979, "learning_rate": 1.5830759955265864e-05, "loss": 1.065096139907837, "step": 6173 }, { "epoch": 0.5056045204680159, "grad_norm": 0.1905190646648407, "learning_rate": 1.5828389005258508e-05, "loss": 0.5174404978752136, "step": 6174 }, { "epoch": 0.5056864130045348, "grad_norm": 0.1818682700395584, "learning_rate": 1.5826017888853597e-05, "loss": 0.9148801565170288, "step": 6175 }, { "epoch": 0.5057683055410538, "grad_norm": 0.18313413858413696, "learning_rate": 1.5823646606154193e-05, "loss": 0.9049778580665588, "step": 6176 }, { "epoch": 0.5058501980775727, "grad_norm": 0.1969725638628006, "learning_rate": 1.582127515726336e-05, "loss": 1.2798010110855103, "step": 6177 }, { "epoch": 0.5059320906140916, "grad_norm": 0.1933370679616928, "learning_rate": 1.5818903542284164e-05, "loss": 1.0624165534973145, "step": 6178 }, { "epoch": 0.5060139831506106, "grad_norm": 0.16406072676181793, "learning_rate": 1.58165317613197e-05, "loss": 0.6419641375541687, "step": 6179 }, { "epoch": 0.5060958756871295, "grad_norm": 0.17887158691883087, "learning_rate": 1.5814159814473038e-05, "loss": 0.7151022553443909, "step": 6180 }, { "epoch": 0.5061777682236486, "grad_norm": 0.1604248285293579, "learning_rate": 1.5811787701847286e-05, "loss": 0.9685138463973999, "step": 6181 }, { "epoch": 0.5062596607601675, "grad_norm": 0.14742308855056763, "learning_rate": 1.580941542354554e-05, "loss": 0.6921091675758362, "step": 6182 }, { "epoch": 0.5063415532966864, "grad_norm": 0.20879127085208893, "learning_rate": 1.5807042979670908e-05, "loss": 0.9967896938323975, "step": 6183 }, { "epoch": 0.5064234458332054, "grad_norm": 0.1870083510875702, "learning_rate": 1.580467037032651e-05, "loss": 0.8751665949821472, "step": 6184 }, { "epoch": 0.5065053383697243, "grad_norm": 0.17805488407611847, "learning_rate": 1.5802297595615467e-05, "loss": 0.9428232312202454, "step": 6185 }, { "epoch": 0.5065872309062432, "grad_norm": 0.20805010199546814, "learning_rate": 1.579992465564091e-05, "loss": 0.7506993412971497, "step": 6186 }, { "epoch": 0.5066691234427623, "grad_norm": 0.1952597200870514, "learning_rate": 1.579755155050598e-05, "loss": 0.8962850570678711, "step": 6187 }, { "epoch": 0.5067510159792812, "grad_norm": 0.1840689331293106, "learning_rate": 1.5795178280313815e-05, "loss": 0.6820369362831116, "step": 6188 }, { "epoch": 0.5068329085158001, "grad_norm": 0.19097687304019928, "learning_rate": 1.5792804845167567e-05, "loss": 1.0595669746398926, "step": 6189 }, { "epoch": 0.5069148010523191, "grad_norm": 0.20877598226070404, "learning_rate": 1.5790431245170405e-05, "loss": 0.9497317671775818, "step": 6190 }, { "epoch": 0.506996693588838, "grad_norm": 0.19021402299404144, "learning_rate": 1.578805748042549e-05, "loss": 0.7152024507522583, "step": 6191 }, { "epoch": 0.507078586125357, "grad_norm": 0.15502318739891052, "learning_rate": 1.5785683551035993e-05, "loss": 0.6683006882667542, "step": 6192 }, { "epoch": 0.507160478661876, "grad_norm": 0.1646219789981842, "learning_rate": 1.5783309457105097e-05, "loss": 0.8411416411399841, "step": 6193 }, { "epoch": 0.5072423711983949, "grad_norm": 0.15915091335773468, "learning_rate": 1.578093519873599e-05, "loss": 0.8426781296730042, "step": 6194 }, { "epoch": 0.5073242637349139, "grad_norm": 0.1753385066986084, "learning_rate": 1.5778560776031863e-05, "loss": 0.8337968587875366, "step": 6195 }, { "epoch": 0.5074061562714328, "grad_norm": 0.17402473092079163, "learning_rate": 1.5776186189095926e-05, "loss": 1.0381420850753784, "step": 6196 }, { "epoch": 0.5074880488079517, "grad_norm": 0.19346854090690613, "learning_rate": 1.577381143803138e-05, "loss": 0.7190743088722229, "step": 6197 }, { "epoch": 0.5075699413444708, "grad_norm": 0.20574545860290527, "learning_rate": 1.5771436522941453e-05, "loss": 0.9691641926765442, "step": 6198 }, { "epoch": 0.5076518338809897, "grad_norm": 0.20631082355976105, "learning_rate": 1.5769061443929356e-05, "loss": 1.0389184951782227, "step": 6199 }, { "epoch": 0.5077337264175086, "grad_norm": 0.1436527967453003, "learning_rate": 1.5766686201098325e-05, "loss": 0.7563503384590149, "step": 6200 }, { "epoch": 0.5078156189540276, "grad_norm": 0.2222694456577301, "learning_rate": 1.57643107945516e-05, "loss": 0.796898365020752, "step": 6201 }, { "epoch": 0.5078975114905465, "grad_norm": 0.2159685492515564, "learning_rate": 1.5761935224392422e-05, "loss": 1.2416822910308838, "step": 6202 }, { "epoch": 0.5079794040270654, "grad_norm": 0.23369912803173065, "learning_rate": 1.5759559490724044e-05, "loss": 0.5414125323295593, "step": 6203 }, { "epoch": 0.5080612965635845, "grad_norm": 0.1802685260772705, "learning_rate": 1.5757183593649728e-05, "loss": 0.9305001497268677, "step": 6204 }, { "epoch": 0.5081431891001034, "grad_norm": 0.16473180055618286, "learning_rate": 1.575480753327274e-05, "loss": 0.8268470764160156, "step": 6205 }, { "epoch": 0.5082250816366224, "grad_norm": 0.2181403934955597, "learning_rate": 1.575243130969635e-05, "loss": 0.9849451780319214, "step": 6206 }, { "epoch": 0.5083069741731413, "grad_norm": 0.1859103888273239, "learning_rate": 1.575005492302384e-05, "loss": 0.775029718875885, "step": 6207 }, { "epoch": 0.5083888667096602, "grad_norm": 0.19782453775405884, "learning_rate": 1.5747678373358498e-05, "loss": 0.9369567632675171, "step": 6208 }, { "epoch": 0.5084707592461792, "grad_norm": 0.19090162217617035, "learning_rate": 1.5745301660803615e-05, "loss": 0.8635470867156982, "step": 6209 }, { "epoch": 0.5085526517826982, "grad_norm": 0.16779080033302307, "learning_rate": 1.57429247854625e-05, "loss": 1.0011975765228271, "step": 6210 }, { "epoch": 0.5086345443192171, "grad_norm": 0.18283754587173462, "learning_rate": 1.5740547747438456e-05, "loss": 1.3412443399429321, "step": 6211 }, { "epoch": 0.5087164368557361, "grad_norm": 0.1835862249135971, "learning_rate": 1.5738170546834804e-05, "loss": 1.0479602813720703, "step": 6212 }, { "epoch": 0.508798329392255, "grad_norm": 0.16380038857460022, "learning_rate": 1.573579318375486e-05, "loss": 0.5879664421081543, "step": 6213 }, { "epoch": 0.5088802219287739, "grad_norm": 0.17093974351882935, "learning_rate": 1.5733415658301957e-05, "loss": 0.5378586649894714, "step": 6214 }, { "epoch": 0.5089621144652929, "grad_norm": 0.18705593049526215, "learning_rate": 1.5731037970579435e-05, "loss": 0.8711770176887512, "step": 6215 } ], "logging_steps": 1, "max_steps": 15265, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 4580, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.7325972803557335e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }