{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8188890405350076, "eval_steps": 500, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001364815067558346, "grad_norm": 5.499263763427734, "learning_rate": 2.666666666666667e-07, "loss": 2.2291, "step": 1 }, { "epoch": 0.0002729630135116692, "grad_norm": 4.660139560699463, "learning_rate": 5.333333333333335e-07, "loss": 2.16, "step": 2 }, { "epoch": 0.00040944452026750374, "grad_norm": 4.5329132080078125, "learning_rate": 8.000000000000001e-07, "loss": 2.2355, "step": 3 }, { "epoch": 0.0005459260270233384, "grad_norm": 4.985339641571045, "learning_rate": 1.066666666666667e-06, "loss": 2.1168, "step": 4 }, { "epoch": 0.0006824075337791729, "grad_norm": 4.656001567840576, "learning_rate": 1.3333333333333334e-06, "loss": 2.1413, "step": 5 }, { "epoch": 0.0008188890405350075, "grad_norm": 3.8532602787017822, "learning_rate": 1.6000000000000001e-06, "loss": 2.0836, "step": 6 }, { "epoch": 0.0009553705472908421, "grad_norm": 5.339655876159668, "learning_rate": 1.8666666666666669e-06, "loss": 2.1081, "step": 7 }, { "epoch": 0.0010918520540466768, "grad_norm": 5.364240646362305, "learning_rate": 2.133333333333334e-06, "loss": 2.1882, "step": 8 }, { "epoch": 0.0012283335608025113, "grad_norm": 4.23295783996582, "learning_rate": 2.4000000000000003e-06, "loss": 2.1973, "step": 9 }, { "epoch": 0.0013648150675583458, "grad_norm": 3.9411795139312744, "learning_rate": 2.666666666666667e-06, "loss": 2.0814, "step": 10 }, { "epoch": 0.0015012965743141805, "grad_norm": 3.924015760421753, "learning_rate": 2.9333333333333338e-06, "loss": 2.046, "step": 11 }, { "epoch": 0.001637778081070015, "grad_norm": 3.311981201171875, "learning_rate": 3.2000000000000003e-06, "loss": 2.1344, "step": 12 }, { "epoch": 0.0017742595878258497, "grad_norm": 3.360774040222168, "learning_rate": 3.4666666666666672e-06, "loss": 2.0675, "step": 13 }, { "epoch": 0.0019107410945816842, "grad_norm": 2.448817253112793, "learning_rate": 3.7333333333333337e-06, "loss": 2.1028, "step": 14 }, { "epoch": 0.002047222601337519, "grad_norm": 2.544743299484253, "learning_rate": 4.000000000000001e-06, "loss": 2.1193, "step": 15 }, { "epoch": 0.0021837041080933536, "grad_norm": 2.36195969581604, "learning_rate": 4.266666666666668e-06, "loss": 2.0871, "step": 16 }, { "epoch": 0.002320185614849188, "grad_norm": 1.793238878250122, "learning_rate": 4.533333333333334e-06, "loss": 1.8511, "step": 17 }, { "epoch": 0.0024566671216050225, "grad_norm": 2.0045647621154785, "learning_rate": 4.800000000000001e-06, "loss": 2.1472, "step": 18 }, { "epoch": 0.0025931486283608573, "grad_norm": 1.3631515502929688, "learning_rate": 5.0666666666666676e-06, "loss": 1.9385, "step": 19 }, { "epoch": 0.0027296301351166915, "grad_norm": 1.7018771171569824, "learning_rate": 5.333333333333334e-06, "loss": 1.9948, "step": 20 }, { "epoch": 0.0028661116418725262, "grad_norm": 1.1106555461883545, "learning_rate": 5.600000000000001e-06, "loss": 1.9604, "step": 21 }, { "epoch": 0.003002593148628361, "grad_norm": 1.2149659395217896, "learning_rate": 5.8666666666666675e-06, "loss": 2.0106, "step": 22 }, { "epoch": 0.0031390746553841956, "grad_norm": 0.8920502066612244, "learning_rate": 6.133333333333334e-06, "loss": 1.7102, "step": 23 }, { "epoch": 0.00327555616214003, "grad_norm": 0.8421320915222168, "learning_rate": 6.4000000000000006e-06, "loss": 1.9706, "step": 24 }, { "epoch": 0.0034120376688958646, "grad_norm": 0.8898251056671143, "learning_rate": 6.666666666666667e-06, "loss": 1.9256, "step": 25 }, { "epoch": 0.0035485191756516993, "grad_norm": 0.8477884531021118, "learning_rate": 6.9333333333333344e-06, "loss": 2.0185, "step": 26 }, { "epoch": 0.0036850006824075336, "grad_norm": 0.6731047630310059, "learning_rate": 7.2000000000000005e-06, "loss": 1.8752, "step": 27 }, { "epoch": 0.0038214821891633683, "grad_norm": 0.8444198369979858, "learning_rate": 7.4666666666666675e-06, "loss": 1.9105, "step": 28 }, { "epoch": 0.003957963695919203, "grad_norm": 0.6602134704589844, "learning_rate": 7.733333333333334e-06, "loss": 1.888, "step": 29 }, { "epoch": 0.004094445202675038, "grad_norm": 0.5404819846153259, "learning_rate": 8.000000000000001e-06, "loss": 1.9087, "step": 30 }, { "epoch": 0.004230926709430872, "grad_norm": 0.5722994804382324, "learning_rate": 8.266666666666667e-06, "loss": 1.8945, "step": 31 }, { "epoch": 0.004367408216186707, "grad_norm": 0.5562062859535217, "learning_rate": 8.533333333333335e-06, "loss": 1.8278, "step": 32 }, { "epoch": 0.004503889722942541, "grad_norm": 0.5615333914756775, "learning_rate": 8.8e-06, "loss": 1.956, "step": 33 }, { "epoch": 0.004640371229698376, "grad_norm": 0.5378018021583557, "learning_rate": 9.066666666666667e-06, "loss": 1.8295, "step": 34 }, { "epoch": 0.004776852736454211, "grad_norm": 0.47099870443344116, "learning_rate": 9.333333333333334e-06, "loss": 1.8913, "step": 35 }, { "epoch": 0.004913334243210045, "grad_norm": 0.4328675866127014, "learning_rate": 9.600000000000001e-06, "loss": 1.8578, "step": 36 }, { "epoch": 0.005049815749965879, "grad_norm": 0.5030441284179688, "learning_rate": 9.866666666666668e-06, "loss": 1.8735, "step": 37 }, { "epoch": 0.0051862972567217145, "grad_norm": 0.43279239535331726, "learning_rate": 1.0133333333333335e-05, "loss": 1.8315, "step": 38 }, { "epoch": 0.005322778763477549, "grad_norm": 0.4185083210468292, "learning_rate": 1.04e-05, "loss": 1.8237, "step": 39 }, { "epoch": 0.005459260270233383, "grad_norm": 0.4204055368900299, "learning_rate": 1.0666666666666667e-05, "loss": 1.8742, "step": 40 }, { "epoch": 0.005595741776989218, "grad_norm": 0.32618430256843567, "learning_rate": 1.0933333333333334e-05, "loss": 1.809, "step": 41 }, { "epoch": 0.0057322232837450525, "grad_norm": 0.35566413402557373, "learning_rate": 1.1200000000000001e-05, "loss": 1.7725, "step": 42 }, { "epoch": 0.005868704790500887, "grad_norm": 0.29983246326446533, "learning_rate": 1.1466666666666668e-05, "loss": 1.9016, "step": 43 }, { "epoch": 0.006005186297256722, "grad_norm": 0.2848670482635498, "learning_rate": 1.1733333333333335e-05, "loss": 1.8058, "step": 44 }, { "epoch": 0.006141667804012556, "grad_norm": 0.2734982967376709, "learning_rate": 1.2e-05, "loss": 1.7248, "step": 45 }, { "epoch": 0.006278149310768391, "grad_norm": 0.27347004413604736, "learning_rate": 1.2266666666666667e-05, "loss": 1.8156, "step": 46 }, { "epoch": 0.0064146308175242256, "grad_norm": 0.2598482668399811, "learning_rate": 1.2533333333333336e-05, "loss": 1.7959, "step": 47 }, { "epoch": 0.00655111232428006, "grad_norm": 0.23635509610176086, "learning_rate": 1.2800000000000001e-05, "loss": 1.7623, "step": 48 }, { "epoch": 0.006687593831035895, "grad_norm": 0.24684293568134308, "learning_rate": 1.3066666666666668e-05, "loss": 1.845, "step": 49 }, { "epoch": 0.006824075337791729, "grad_norm": 0.23189635574817657, "learning_rate": 1.3333333333333333e-05, "loss": 1.8112, "step": 50 }, { "epoch": 0.0069605568445475635, "grad_norm": 0.2241699993610382, "learning_rate": 1.3600000000000002e-05, "loss": 1.7468, "step": 51 }, { "epoch": 0.007097038351303399, "grad_norm": 0.24107544124126434, "learning_rate": 1.3866666666666669e-05, "loss": 1.8191, "step": 52 }, { "epoch": 0.007233519858059233, "grad_norm": 0.22175419330596924, "learning_rate": 1.4133333333333334e-05, "loss": 1.7544, "step": 53 }, { "epoch": 0.007370001364815067, "grad_norm": 0.2288978397846222, "learning_rate": 1.4400000000000001e-05, "loss": 1.7702, "step": 54 }, { "epoch": 0.007506482871570902, "grad_norm": 0.22638070583343506, "learning_rate": 1.4666666666666666e-05, "loss": 1.7895, "step": 55 }, { "epoch": 0.007642964378326737, "grad_norm": 0.248519629240036, "learning_rate": 1.4933333333333335e-05, "loss": 1.8377, "step": 56 }, { "epoch": 0.007779445885082571, "grad_norm": 0.2243175208568573, "learning_rate": 1.5200000000000002e-05, "loss": 1.7965, "step": 57 }, { "epoch": 0.007915927391838405, "grad_norm": 0.2314777821302414, "learning_rate": 1.546666666666667e-05, "loss": 1.7646, "step": 58 }, { "epoch": 0.008052408898594241, "grad_norm": 0.22540760040283203, "learning_rate": 1.5733333333333334e-05, "loss": 1.791, "step": 59 }, { "epoch": 0.008188890405350075, "grad_norm": 0.24623164534568787, "learning_rate": 1.6000000000000003e-05, "loss": 1.7741, "step": 60 }, { "epoch": 0.00832537191210591, "grad_norm": 0.24007107317447662, "learning_rate": 1.6266666666666668e-05, "loss": 1.7735, "step": 61 }, { "epoch": 0.008461853418861744, "grad_norm": 0.2574295699596405, "learning_rate": 1.6533333333333333e-05, "loss": 1.7881, "step": 62 }, { "epoch": 0.008598334925617578, "grad_norm": 0.2421531081199646, "learning_rate": 1.6800000000000002e-05, "loss": 1.7364, "step": 63 }, { "epoch": 0.008734816432373414, "grad_norm": 0.2307588905096054, "learning_rate": 1.706666666666667e-05, "loss": 1.727, "step": 64 }, { "epoch": 0.008871297939129249, "grad_norm": 0.238815039396286, "learning_rate": 1.7333333333333336e-05, "loss": 1.7139, "step": 65 }, { "epoch": 0.009007779445885083, "grad_norm": 0.22701887786388397, "learning_rate": 1.76e-05, "loss": 1.773, "step": 66 }, { "epoch": 0.009144260952640917, "grad_norm": 0.22581444680690765, "learning_rate": 1.7866666666666666e-05, "loss": 1.7459, "step": 67 }, { "epoch": 0.009280742459396751, "grad_norm": 0.21733753383159637, "learning_rate": 1.8133333333333335e-05, "loss": 1.7576, "step": 68 }, { "epoch": 0.009417223966152586, "grad_norm": 0.24274277687072754, "learning_rate": 1.8400000000000003e-05, "loss": 1.7336, "step": 69 }, { "epoch": 0.009553705472908422, "grad_norm": 0.20616181194782257, "learning_rate": 1.866666666666667e-05, "loss": 1.7586, "step": 70 }, { "epoch": 0.009690186979664256, "grad_norm": 0.205729678273201, "learning_rate": 1.8933333333333334e-05, "loss": 1.7318, "step": 71 }, { "epoch": 0.00982666848642009, "grad_norm": 0.20966161787509918, "learning_rate": 1.9200000000000003e-05, "loss": 1.7637, "step": 72 }, { "epoch": 0.009963149993175924, "grad_norm": 0.20736359059810638, "learning_rate": 1.9466666666666668e-05, "loss": 1.7353, "step": 73 }, { "epoch": 0.010099631499931759, "grad_norm": 0.20281726121902466, "learning_rate": 1.9733333333333336e-05, "loss": 1.7322, "step": 74 }, { "epoch": 0.010236113006687593, "grad_norm": 0.20042139291763306, "learning_rate": 2e-05, "loss": 1.7195, "step": 75 }, { "epoch": 0.010372594513443429, "grad_norm": 0.21634063124656677, "learning_rate": 2.026666666666667e-05, "loss": 1.7943, "step": 76 }, { "epoch": 0.010509076020199263, "grad_norm": 0.2153872698545456, "learning_rate": 2.0533333333333336e-05, "loss": 1.763, "step": 77 }, { "epoch": 0.010645557526955098, "grad_norm": 0.2081228494644165, "learning_rate": 2.08e-05, "loss": 1.7792, "step": 78 }, { "epoch": 0.010782039033710932, "grad_norm": 0.2098853439092636, "learning_rate": 2.1066666666666666e-05, "loss": 1.7918, "step": 79 }, { "epoch": 0.010918520540466766, "grad_norm": 0.20721176266670227, "learning_rate": 2.1333333333333335e-05, "loss": 1.7064, "step": 80 }, { "epoch": 0.011055002047222602, "grad_norm": 0.2197636067867279, "learning_rate": 2.1600000000000003e-05, "loss": 1.7002, "step": 81 }, { "epoch": 0.011191483553978436, "grad_norm": 0.19892142713069916, "learning_rate": 2.186666666666667e-05, "loss": 1.6049, "step": 82 }, { "epoch": 0.01132796506073427, "grad_norm": 0.20634345710277557, "learning_rate": 2.2133333333333337e-05, "loss": 1.7028, "step": 83 }, { "epoch": 0.011464446567490105, "grad_norm": 0.213628351688385, "learning_rate": 2.2400000000000002e-05, "loss": 1.711, "step": 84 }, { "epoch": 0.01160092807424594, "grad_norm": 0.20134010910987854, "learning_rate": 2.2666666666666668e-05, "loss": 1.672, "step": 85 }, { "epoch": 0.011737409581001773, "grad_norm": 0.21955366432666779, "learning_rate": 2.2933333333333336e-05, "loss": 1.8046, "step": 86 }, { "epoch": 0.01187389108775761, "grad_norm": 0.22468940913677216, "learning_rate": 2.32e-05, "loss": 1.763, "step": 87 }, { "epoch": 0.012010372594513444, "grad_norm": 0.20560674369335175, "learning_rate": 2.346666666666667e-05, "loss": 1.7117, "step": 88 }, { "epoch": 0.012146854101269278, "grad_norm": 0.21587899327278137, "learning_rate": 2.373333333333334e-05, "loss": 1.8043, "step": 89 }, { "epoch": 0.012283335608025112, "grad_norm": 0.210346519947052, "learning_rate": 2.4e-05, "loss": 1.6873, "step": 90 }, { "epoch": 0.012419817114780947, "grad_norm": 0.22895421087741852, "learning_rate": 2.426666666666667e-05, "loss": 1.7526, "step": 91 }, { "epoch": 0.012556298621536783, "grad_norm": 0.21360772848129272, "learning_rate": 2.4533333333333334e-05, "loss": 1.7118, "step": 92 }, { "epoch": 0.012692780128292617, "grad_norm": 0.23913121223449707, "learning_rate": 2.4800000000000003e-05, "loss": 1.7234, "step": 93 }, { "epoch": 0.012829261635048451, "grad_norm": 0.2257775068283081, "learning_rate": 2.5066666666666672e-05, "loss": 1.7052, "step": 94 }, { "epoch": 0.012965743141804285, "grad_norm": 0.22123000025749207, "learning_rate": 2.5333333333333334e-05, "loss": 1.7659, "step": 95 }, { "epoch": 0.01310222464856012, "grad_norm": 0.25808942317962646, "learning_rate": 2.5600000000000002e-05, "loss": 1.7974, "step": 96 }, { "epoch": 0.013238706155315954, "grad_norm": 0.22531723976135254, "learning_rate": 2.5866666666666667e-05, "loss": 1.7703, "step": 97 }, { "epoch": 0.01337518766207179, "grad_norm": 0.22541013360023499, "learning_rate": 2.6133333333333336e-05, "loss": 1.6882, "step": 98 }, { "epoch": 0.013511669168827624, "grad_norm": 0.24326053261756897, "learning_rate": 2.6400000000000005e-05, "loss": 1.7636, "step": 99 }, { "epoch": 0.013648150675583458, "grad_norm": 0.22722408175468445, "learning_rate": 2.6666666666666667e-05, "loss": 1.7421, "step": 100 }, { "epoch": 0.013784632182339293, "grad_norm": 0.23277103900909424, "learning_rate": 2.6933333333333335e-05, "loss": 1.7752, "step": 101 }, { "epoch": 0.013921113689095127, "grad_norm": 0.2395719438791275, "learning_rate": 2.7200000000000004e-05, "loss": 1.762, "step": 102 }, { "epoch": 0.014057595195850963, "grad_norm": 0.21451711654663086, "learning_rate": 2.746666666666667e-05, "loss": 1.7415, "step": 103 }, { "epoch": 0.014194076702606797, "grad_norm": 0.24826586246490479, "learning_rate": 2.7733333333333338e-05, "loss": 1.8362, "step": 104 }, { "epoch": 0.014330558209362632, "grad_norm": 0.24056163430213928, "learning_rate": 2.8e-05, "loss": 1.801, "step": 105 }, { "epoch": 0.014467039716118466, "grad_norm": 0.24722066521644592, "learning_rate": 2.8266666666666668e-05, "loss": 1.727, "step": 106 }, { "epoch": 0.0146035212228743, "grad_norm": 0.23071065545082092, "learning_rate": 2.8533333333333337e-05, "loss": 1.7135, "step": 107 }, { "epoch": 0.014740002729630134, "grad_norm": 0.234844371676445, "learning_rate": 2.8800000000000002e-05, "loss": 1.8161, "step": 108 }, { "epoch": 0.01487648423638597, "grad_norm": 0.2340114414691925, "learning_rate": 2.906666666666667e-05, "loss": 1.7293, "step": 109 }, { "epoch": 0.015012965743141805, "grad_norm": 0.22716818749904633, "learning_rate": 2.9333333333333333e-05, "loss": 1.7176, "step": 110 }, { "epoch": 0.015149447249897639, "grad_norm": 0.2477683275938034, "learning_rate": 2.96e-05, "loss": 1.6629, "step": 111 }, { "epoch": 0.015285928756653473, "grad_norm": 0.2254948765039444, "learning_rate": 2.986666666666667e-05, "loss": 1.6964, "step": 112 }, { "epoch": 0.015422410263409307, "grad_norm": 0.23815442621707916, "learning_rate": 3.0133333333333335e-05, "loss": 1.7543, "step": 113 }, { "epoch": 0.015558891770165142, "grad_norm": 0.22915583848953247, "learning_rate": 3.0400000000000004e-05, "loss": 1.7556, "step": 114 }, { "epoch": 0.015695373276920978, "grad_norm": 0.24844741821289062, "learning_rate": 3.066666666666667e-05, "loss": 1.7621, "step": 115 }, { "epoch": 0.01583185478367681, "grad_norm": 0.22810101509094238, "learning_rate": 3.093333333333334e-05, "loss": 1.7511, "step": 116 }, { "epoch": 0.015968336290432646, "grad_norm": 0.2491244375705719, "learning_rate": 3.1200000000000006e-05, "loss": 1.7414, "step": 117 }, { "epoch": 0.016104817797188482, "grad_norm": 0.22312700748443604, "learning_rate": 3.146666666666667e-05, "loss": 1.7253, "step": 118 }, { "epoch": 0.016241299303944315, "grad_norm": 0.22729012370109558, "learning_rate": 3.173333333333334e-05, "loss": 1.6379, "step": 119 }, { "epoch": 0.01637778081070015, "grad_norm": 0.2469879686832428, "learning_rate": 3.2000000000000005e-05, "loss": 1.7275, "step": 120 }, { "epoch": 0.016514262317455983, "grad_norm": 0.23909714818000793, "learning_rate": 3.226666666666667e-05, "loss": 1.692, "step": 121 }, { "epoch": 0.01665074382421182, "grad_norm": 0.25035664439201355, "learning_rate": 3.2533333333333336e-05, "loss": 1.7314, "step": 122 }, { "epoch": 0.016787225330967655, "grad_norm": 0.23064374923706055, "learning_rate": 3.28e-05, "loss": 1.8229, "step": 123 }, { "epoch": 0.016923706837723488, "grad_norm": 0.22864677011966705, "learning_rate": 3.3066666666666666e-05, "loss": 1.6719, "step": 124 }, { "epoch": 0.017060188344479324, "grad_norm": 0.23785266280174255, "learning_rate": 3.3333333333333335e-05, "loss": 1.7707, "step": 125 }, { "epoch": 0.017196669851235157, "grad_norm": 0.2404264658689499, "learning_rate": 3.3600000000000004e-05, "loss": 1.7706, "step": 126 }, { "epoch": 0.017333151357990993, "grad_norm": 0.24038049578666687, "learning_rate": 3.386666666666667e-05, "loss": 1.8063, "step": 127 }, { "epoch": 0.01746963286474683, "grad_norm": 0.26162853837013245, "learning_rate": 3.413333333333334e-05, "loss": 1.7752, "step": 128 }, { "epoch": 0.01760611437150266, "grad_norm": 0.22979912161827087, "learning_rate": 3.44e-05, "loss": 1.7798, "step": 129 }, { "epoch": 0.017742595878258497, "grad_norm": 0.23631471395492554, "learning_rate": 3.466666666666667e-05, "loss": 1.8091, "step": 130 }, { "epoch": 0.01787907738501433, "grad_norm": 0.230385422706604, "learning_rate": 3.493333333333333e-05, "loss": 1.6328, "step": 131 }, { "epoch": 0.018015558891770166, "grad_norm": 0.24715574085712433, "learning_rate": 3.52e-05, "loss": 1.6432, "step": 132 }, { "epoch": 0.018152040398525998, "grad_norm": 0.25098592042922974, "learning_rate": 3.546666666666667e-05, "loss": 1.6562, "step": 133 }, { "epoch": 0.018288521905281834, "grad_norm": 0.2581266760826111, "learning_rate": 3.573333333333333e-05, "loss": 1.7382, "step": 134 }, { "epoch": 0.01842500341203767, "grad_norm": 0.24009093642234802, "learning_rate": 3.6e-05, "loss": 1.7733, "step": 135 }, { "epoch": 0.018561484918793503, "grad_norm": 0.23203186690807343, "learning_rate": 3.626666666666667e-05, "loss": 1.672, "step": 136 }, { "epoch": 0.01869796642554934, "grad_norm": 0.24924978613853455, "learning_rate": 3.653333333333334e-05, "loss": 1.7233, "step": 137 }, { "epoch": 0.01883444793230517, "grad_norm": 0.2568417489528656, "learning_rate": 3.680000000000001e-05, "loss": 1.6306, "step": 138 }, { "epoch": 0.018970929439061007, "grad_norm": 0.25758302211761475, "learning_rate": 3.706666666666667e-05, "loss": 1.7771, "step": 139 }, { "epoch": 0.019107410945816843, "grad_norm": 0.23922881484031677, "learning_rate": 3.733333333333334e-05, "loss": 1.768, "step": 140 }, { "epoch": 0.019243892452572676, "grad_norm": 0.22760292887687683, "learning_rate": 3.76e-05, "loss": 1.6672, "step": 141 }, { "epoch": 0.019380373959328512, "grad_norm": 0.25242963433265686, "learning_rate": 3.786666666666667e-05, "loss": 1.7868, "step": 142 }, { "epoch": 0.019516855466084344, "grad_norm": 0.2641962766647339, "learning_rate": 3.8133333333333336e-05, "loss": 1.6787, "step": 143 }, { "epoch": 0.01965333697284018, "grad_norm": 0.2402966320514679, "learning_rate": 3.8400000000000005e-05, "loss": 1.6341, "step": 144 }, { "epoch": 0.019789818479596016, "grad_norm": 0.3131767809391022, "learning_rate": 3.866666666666667e-05, "loss": 1.7194, "step": 145 }, { "epoch": 0.01992629998635185, "grad_norm": 0.2589942514896393, "learning_rate": 3.8933333333333336e-05, "loss": 1.7461, "step": 146 }, { "epoch": 0.020062781493107685, "grad_norm": 0.30445966124534607, "learning_rate": 3.9200000000000004e-05, "loss": 1.7237, "step": 147 }, { "epoch": 0.020199262999863517, "grad_norm": 0.2719636559486389, "learning_rate": 3.946666666666667e-05, "loss": 1.71, "step": 148 }, { "epoch": 0.020335744506619353, "grad_norm": 0.23952992260456085, "learning_rate": 3.9733333333333335e-05, "loss": 1.8032, "step": 149 }, { "epoch": 0.020472226013375186, "grad_norm": 0.2376592457294464, "learning_rate": 4e-05, "loss": 1.7425, "step": 150 }, { "epoch": 0.020608707520131022, "grad_norm": 0.24733857810497284, "learning_rate": 4.0266666666666665e-05, "loss": 1.7476, "step": 151 }, { "epoch": 0.020745189026886858, "grad_norm": 0.23061153292655945, "learning_rate": 4.053333333333334e-05, "loss": 1.6733, "step": 152 }, { "epoch": 0.02088167053364269, "grad_norm": 0.2736116051673889, "learning_rate": 4.08e-05, "loss": 1.6989, "step": 153 }, { "epoch": 0.021018152040398527, "grad_norm": 0.2521119713783264, "learning_rate": 4.106666666666667e-05, "loss": 1.6959, "step": 154 }, { "epoch": 0.02115463354715436, "grad_norm": 0.25929921865463257, "learning_rate": 4.133333333333334e-05, "loss": 1.6956, "step": 155 }, { "epoch": 0.021291115053910195, "grad_norm": 0.2587325870990753, "learning_rate": 4.16e-05, "loss": 1.7962, "step": 156 }, { "epoch": 0.02142759656066603, "grad_norm": 0.23272058367729187, "learning_rate": 4.186666666666667e-05, "loss": 1.6473, "step": 157 }, { "epoch": 0.021564078067421864, "grad_norm": 0.2319646179676056, "learning_rate": 4.213333333333333e-05, "loss": 1.6893, "step": 158 }, { "epoch": 0.0217005595741777, "grad_norm": 0.23524560034275055, "learning_rate": 4.240000000000001e-05, "loss": 1.718, "step": 159 }, { "epoch": 0.021837041080933532, "grad_norm": 0.23512865602970123, "learning_rate": 4.266666666666667e-05, "loss": 1.7011, "step": 160 }, { "epoch": 0.021973522587689368, "grad_norm": 0.21862873435020447, "learning_rate": 4.293333333333333e-05, "loss": 1.6088, "step": 161 }, { "epoch": 0.022110004094445204, "grad_norm": 0.22855187952518463, "learning_rate": 4.3200000000000007e-05, "loss": 1.7111, "step": 162 }, { "epoch": 0.022246485601201037, "grad_norm": 0.23267142474651337, "learning_rate": 4.346666666666667e-05, "loss": 1.6642, "step": 163 }, { "epoch": 0.022382967107956873, "grad_norm": 0.244045227766037, "learning_rate": 4.373333333333334e-05, "loss": 1.6515, "step": 164 }, { "epoch": 0.022519448614712705, "grad_norm": 0.2407236099243164, "learning_rate": 4.4000000000000006e-05, "loss": 1.7264, "step": 165 }, { "epoch": 0.02265593012146854, "grad_norm": 0.26442793011665344, "learning_rate": 4.4266666666666674e-05, "loss": 1.7056, "step": 166 }, { "epoch": 0.022792411628224377, "grad_norm": 0.21957506239414215, "learning_rate": 4.4533333333333336e-05, "loss": 1.6498, "step": 167 }, { "epoch": 0.02292889313498021, "grad_norm": 0.2429881989955902, "learning_rate": 4.4800000000000005e-05, "loss": 1.7154, "step": 168 }, { "epoch": 0.023065374641736046, "grad_norm": 0.2509872615337372, "learning_rate": 4.506666666666667e-05, "loss": 1.7395, "step": 169 }, { "epoch": 0.02320185614849188, "grad_norm": 0.2709554433822632, "learning_rate": 4.5333333333333335e-05, "loss": 1.7147, "step": 170 }, { "epoch": 0.023338337655247714, "grad_norm": 0.22771143913269043, "learning_rate": 4.56e-05, "loss": 1.6264, "step": 171 }, { "epoch": 0.023474819162003547, "grad_norm": 0.2200254499912262, "learning_rate": 4.586666666666667e-05, "loss": 1.6607, "step": 172 }, { "epoch": 0.023611300668759383, "grad_norm": 0.2651680111885071, "learning_rate": 4.6133333333333334e-05, "loss": 1.6919, "step": 173 }, { "epoch": 0.02374778217551522, "grad_norm": 0.2326844334602356, "learning_rate": 4.64e-05, "loss": 1.6351, "step": 174 }, { "epoch": 0.02388426368227105, "grad_norm": 0.26194167137145996, "learning_rate": 4.666666666666667e-05, "loss": 1.7389, "step": 175 }, { "epoch": 0.024020745189026887, "grad_norm": 0.22865556180477142, "learning_rate": 4.693333333333334e-05, "loss": 1.7029, "step": 176 }, { "epoch": 0.02415722669578272, "grad_norm": 0.27646172046661377, "learning_rate": 4.72e-05, "loss": 1.6935, "step": 177 }, { "epoch": 0.024293708202538556, "grad_norm": 0.24295304715633392, "learning_rate": 4.746666666666668e-05, "loss": 1.6951, "step": 178 }, { "epoch": 0.024430189709294392, "grad_norm": 0.2156314253807068, "learning_rate": 4.773333333333334e-05, "loss": 1.648, "step": 179 }, { "epoch": 0.024566671216050225, "grad_norm": 0.22737029194831848, "learning_rate": 4.8e-05, "loss": 1.6679, "step": 180 }, { "epoch": 0.02470315272280606, "grad_norm": 0.248259037733078, "learning_rate": 4.826666666666668e-05, "loss": 1.6591, "step": 181 }, { "epoch": 0.024839634229561893, "grad_norm": 0.22868888080120087, "learning_rate": 4.853333333333334e-05, "loss": 1.7137, "step": 182 }, { "epoch": 0.02497611573631773, "grad_norm": 0.25324422121047974, "learning_rate": 4.88e-05, "loss": 1.7322, "step": 183 }, { "epoch": 0.025112597243073565, "grad_norm": 0.24201399087905884, "learning_rate": 4.906666666666667e-05, "loss": 1.651, "step": 184 }, { "epoch": 0.025249078749829398, "grad_norm": 0.2308133840560913, "learning_rate": 4.933333333333334e-05, "loss": 1.7445, "step": 185 }, { "epoch": 0.025385560256585234, "grad_norm": 0.2358778715133667, "learning_rate": 4.9600000000000006e-05, "loss": 1.6765, "step": 186 }, { "epoch": 0.025522041763341066, "grad_norm": 0.2414613515138626, "learning_rate": 4.986666666666667e-05, "loss": 1.6198, "step": 187 }, { "epoch": 0.025658523270096902, "grad_norm": 0.26769500970840454, "learning_rate": 5.0133333333333343e-05, "loss": 1.7696, "step": 188 }, { "epoch": 0.025795004776852735, "grad_norm": 0.2725449502468109, "learning_rate": 5.0400000000000005e-05, "loss": 1.7405, "step": 189 }, { "epoch": 0.02593148628360857, "grad_norm": 0.2680228054523468, "learning_rate": 5.066666666666667e-05, "loss": 1.6777, "step": 190 }, { "epoch": 0.026067967790364407, "grad_norm": 0.2760530412197113, "learning_rate": 5.093333333333334e-05, "loss": 1.651, "step": 191 }, { "epoch": 0.02620444929712024, "grad_norm": 0.24931032955646515, "learning_rate": 5.1200000000000004e-05, "loss": 1.7195, "step": 192 }, { "epoch": 0.026340930803876075, "grad_norm": 0.28645068407058716, "learning_rate": 5.1466666666666666e-05, "loss": 1.6904, "step": 193 }, { "epoch": 0.026477412310631908, "grad_norm": 0.2453315705060959, "learning_rate": 5.1733333333333335e-05, "loss": 1.6433, "step": 194 }, { "epoch": 0.026613893817387744, "grad_norm": 0.2835664451122284, "learning_rate": 5.2000000000000004e-05, "loss": 1.6562, "step": 195 }, { "epoch": 0.02675037532414358, "grad_norm": 0.26578426361083984, "learning_rate": 5.226666666666667e-05, "loss": 1.6577, "step": 196 }, { "epoch": 0.026886856830899412, "grad_norm": 0.32209882140159607, "learning_rate": 5.2533333333333334e-05, "loss": 1.7582, "step": 197 }, { "epoch": 0.02702333833765525, "grad_norm": 0.2664225399494171, "learning_rate": 5.280000000000001e-05, "loss": 1.687, "step": 198 }, { "epoch": 0.02715981984441108, "grad_norm": 0.29573750495910645, "learning_rate": 5.306666666666667e-05, "loss": 1.677, "step": 199 }, { "epoch": 0.027296301351166917, "grad_norm": 0.2537699341773987, "learning_rate": 5.333333333333333e-05, "loss": 1.6774, "step": 200 }, { "epoch": 0.027432782857922753, "grad_norm": 0.3485255539417267, "learning_rate": 5.360000000000001e-05, "loss": 1.7344, "step": 201 }, { "epoch": 0.027569264364678586, "grad_norm": 0.21835796535015106, "learning_rate": 5.386666666666667e-05, "loss": 1.7329, "step": 202 }, { "epoch": 0.02770574587143442, "grad_norm": 0.26980435848236084, "learning_rate": 5.413333333333333e-05, "loss": 1.7202, "step": 203 }, { "epoch": 0.027842227378190254, "grad_norm": 0.24076232314109802, "learning_rate": 5.440000000000001e-05, "loss": 1.7953, "step": 204 }, { "epoch": 0.02797870888494609, "grad_norm": 0.25215744972229004, "learning_rate": 5.466666666666667e-05, "loss": 1.7256, "step": 205 }, { "epoch": 0.028115190391701926, "grad_norm": 0.24413655698299408, "learning_rate": 5.493333333333334e-05, "loss": 1.6929, "step": 206 }, { "epoch": 0.02825167189845776, "grad_norm": 0.22124704718589783, "learning_rate": 5.52e-05, "loss": 1.7886, "step": 207 }, { "epoch": 0.028388153405213595, "grad_norm": 0.21877169609069824, "learning_rate": 5.5466666666666675e-05, "loss": 1.6658, "step": 208 }, { "epoch": 0.028524634911969427, "grad_norm": 0.2238980531692505, "learning_rate": 5.573333333333334e-05, "loss": 1.7004, "step": 209 }, { "epoch": 0.028661116418725263, "grad_norm": 0.21076123416423798, "learning_rate": 5.6e-05, "loss": 1.7027, "step": 210 }, { "epoch": 0.028797597925481096, "grad_norm": 0.21802055835723877, "learning_rate": 5.6266666666666675e-05, "loss": 1.7392, "step": 211 }, { "epoch": 0.02893407943223693, "grad_norm": 0.212049663066864, "learning_rate": 5.6533333333333336e-05, "loss": 1.6786, "step": 212 }, { "epoch": 0.029070560938992768, "grad_norm": 0.22911016643047333, "learning_rate": 5.6800000000000005e-05, "loss": 1.6352, "step": 213 }, { "epoch": 0.0292070424457486, "grad_norm": 0.2185114473104477, "learning_rate": 5.7066666666666674e-05, "loss": 1.5794, "step": 214 }, { "epoch": 0.029343523952504436, "grad_norm": 0.253623366355896, "learning_rate": 5.7333333333333336e-05, "loss": 1.6806, "step": 215 }, { "epoch": 0.02948000545926027, "grad_norm": 0.251220703125, "learning_rate": 5.7600000000000004e-05, "loss": 1.648, "step": 216 }, { "epoch": 0.029616486966016105, "grad_norm": 0.24054937064647675, "learning_rate": 5.786666666666667e-05, "loss": 1.7466, "step": 217 }, { "epoch": 0.02975296847277194, "grad_norm": 0.23784852027893066, "learning_rate": 5.813333333333334e-05, "loss": 1.6687, "step": 218 }, { "epoch": 0.029889449979527773, "grad_norm": 0.25286269187927246, "learning_rate": 5.84e-05, "loss": 1.6895, "step": 219 }, { "epoch": 0.03002593148628361, "grad_norm": 0.22769373655319214, "learning_rate": 5.8666666666666665e-05, "loss": 1.6349, "step": 220 }, { "epoch": 0.030162412993039442, "grad_norm": 0.24668146669864655, "learning_rate": 5.893333333333334e-05, "loss": 1.7242, "step": 221 }, { "epoch": 0.030298894499795278, "grad_norm": 0.2401885688304901, "learning_rate": 5.92e-05, "loss": 1.8042, "step": 222 }, { "epoch": 0.030435376006551114, "grad_norm": 0.25869131088256836, "learning_rate": 5.946666666666667e-05, "loss": 1.7239, "step": 223 }, { "epoch": 0.030571857513306946, "grad_norm": 0.23416532576084137, "learning_rate": 5.973333333333334e-05, "loss": 1.6702, "step": 224 }, { "epoch": 0.030708339020062782, "grad_norm": 0.2530698776245117, "learning_rate": 6.000000000000001e-05, "loss": 1.5992, "step": 225 }, { "epoch": 0.030844820526818615, "grad_norm": 0.24807973206043243, "learning_rate": 6.026666666666667e-05, "loss": 1.6091, "step": 226 }, { "epoch": 0.03098130203357445, "grad_norm": 0.2626461088657379, "learning_rate": 6.053333333333334e-05, "loss": 1.7514, "step": 227 }, { "epoch": 0.031117783540330284, "grad_norm": 0.25457173585891724, "learning_rate": 6.080000000000001e-05, "loss": 1.6573, "step": 228 }, { "epoch": 0.03125426504708612, "grad_norm": 0.21489518880844116, "learning_rate": 6.106666666666667e-05, "loss": 1.6021, "step": 229 }, { "epoch": 0.031390746553841956, "grad_norm": 0.2610913813114166, "learning_rate": 6.133333333333334e-05, "loss": 1.7039, "step": 230 }, { "epoch": 0.03152722806059779, "grad_norm": 0.21213537454605103, "learning_rate": 6.16e-05, "loss": 1.6823, "step": 231 }, { "epoch": 0.03166370956735362, "grad_norm": 0.20221945643424988, "learning_rate": 6.186666666666668e-05, "loss": 1.6794, "step": 232 }, { "epoch": 0.03180019107410946, "grad_norm": 0.22009679675102234, "learning_rate": 6.213333333333333e-05, "loss": 1.7155, "step": 233 }, { "epoch": 0.03193667258086529, "grad_norm": 0.20505869388580322, "learning_rate": 6.240000000000001e-05, "loss": 1.6534, "step": 234 }, { "epoch": 0.032073154087621125, "grad_norm": 0.2528541684150696, "learning_rate": 6.266666666666667e-05, "loss": 1.6833, "step": 235 }, { "epoch": 0.032209635594376965, "grad_norm": 0.2703731954097748, "learning_rate": 6.293333333333334e-05, "loss": 1.68, "step": 236 }, { "epoch": 0.0323461171011328, "grad_norm": 0.20772017538547516, "learning_rate": 6.32e-05, "loss": 1.7331, "step": 237 }, { "epoch": 0.03248259860788863, "grad_norm": 0.21209511160850525, "learning_rate": 6.346666666666667e-05, "loss": 1.5974, "step": 238 }, { "epoch": 0.03261908011464446, "grad_norm": 0.25466233491897583, "learning_rate": 6.373333333333334e-05, "loss": 1.7179, "step": 239 }, { "epoch": 0.0327555616214003, "grad_norm": 0.19708532094955444, "learning_rate": 6.400000000000001e-05, "loss": 1.6266, "step": 240 }, { "epoch": 0.032892043128156134, "grad_norm": 0.2628290057182312, "learning_rate": 6.426666666666668e-05, "loss": 1.7026, "step": 241 }, { "epoch": 0.03302852463491197, "grad_norm": 0.22539164125919342, "learning_rate": 6.453333333333333e-05, "loss": 1.6611, "step": 242 }, { "epoch": 0.033165006141667806, "grad_norm": 0.21600398421287537, "learning_rate": 6.48e-05, "loss": 1.6971, "step": 243 }, { "epoch": 0.03330148764842364, "grad_norm": 0.2313871830701828, "learning_rate": 6.506666666666667e-05, "loss": 1.6648, "step": 244 }, { "epoch": 0.03343796915517947, "grad_norm": 0.22528918087482452, "learning_rate": 6.533333333333334e-05, "loss": 1.6178, "step": 245 }, { "epoch": 0.03357445066193531, "grad_norm": 0.2156699150800705, "learning_rate": 6.56e-05, "loss": 1.7208, "step": 246 }, { "epoch": 0.03371093216869114, "grad_norm": 0.2433416247367859, "learning_rate": 6.586666666666668e-05, "loss": 1.7256, "step": 247 }, { "epoch": 0.033847413675446976, "grad_norm": 0.2248915582895279, "learning_rate": 6.613333333333333e-05, "loss": 1.6664, "step": 248 }, { "epoch": 0.03398389518220281, "grad_norm": 0.23145483434200287, "learning_rate": 6.64e-05, "loss": 1.6347, "step": 249 }, { "epoch": 0.03412037668895865, "grad_norm": 0.19734616577625275, "learning_rate": 6.666666666666667e-05, "loss": 1.5771, "step": 250 }, { "epoch": 0.03425685819571448, "grad_norm": 0.2143116295337677, "learning_rate": 6.693333333333334e-05, "loss": 1.6928, "step": 251 }, { "epoch": 0.03439333970247031, "grad_norm": 0.22231534123420715, "learning_rate": 6.720000000000001e-05, "loss": 1.6467, "step": 252 }, { "epoch": 0.03452982120922615, "grad_norm": 0.20797906816005707, "learning_rate": 6.746666666666668e-05, "loss": 1.5702, "step": 253 }, { "epoch": 0.034666302715981985, "grad_norm": 0.24098797142505646, "learning_rate": 6.773333333333334e-05, "loss": 1.6262, "step": 254 }, { "epoch": 0.03480278422273782, "grad_norm": 0.2452200949192047, "learning_rate": 6.8e-05, "loss": 1.7232, "step": 255 }, { "epoch": 0.03493926572949366, "grad_norm": 0.2327650785446167, "learning_rate": 6.826666666666668e-05, "loss": 1.7224, "step": 256 }, { "epoch": 0.03507574723624949, "grad_norm": 0.2117166817188263, "learning_rate": 6.853333333333334e-05, "loss": 1.6533, "step": 257 }, { "epoch": 0.03521222874300532, "grad_norm": 0.24029532074928284, "learning_rate": 6.88e-05, "loss": 1.6022, "step": 258 }, { "epoch": 0.035348710249761155, "grad_norm": 0.24144768714904785, "learning_rate": 6.906666666666667e-05, "loss": 1.705, "step": 259 }, { "epoch": 0.035485191756516994, "grad_norm": 0.2214532494544983, "learning_rate": 6.933333333333334e-05, "loss": 1.7441, "step": 260 }, { "epoch": 0.03562167326327283, "grad_norm": 0.21449518203735352, "learning_rate": 6.960000000000001e-05, "loss": 1.6511, "step": 261 }, { "epoch": 0.03575815477002866, "grad_norm": 0.22649604082107544, "learning_rate": 6.986666666666667e-05, "loss": 1.6649, "step": 262 }, { "epoch": 0.0358946362767845, "grad_norm": 0.2125379592180252, "learning_rate": 7.013333333333335e-05, "loss": 1.6957, "step": 263 }, { "epoch": 0.03603111778354033, "grad_norm": 0.23201030492782593, "learning_rate": 7.04e-05, "loss": 1.6779, "step": 264 }, { "epoch": 0.036167599290296164, "grad_norm": 0.20203745365142822, "learning_rate": 7.066666666666667e-05, "loss": 1.6503, "step": 265 }, { "epoch": 0.036304080797051996, "grad_norm": 0.23546801507472992, "learning_rate": 7.093333333333334e-05, "loss": 1.6396, "step": 266 }, { "epoch": 0.036440562303807836, "grad_norm": 0.20308132469654083, "learning_rate": 7.120000000000001e-05, "loss": 1.7182, "step": 267 }, { "epoch": 0.03657704381056367, "grad_norm": 0.20700064301490784, "learning_rate": 7.146666666666666e-05, "loss": 1.7149, "step": 268 }, { "epoch": 0.0367135253173195, "grad_norm": 0.2265264242887497, "learning_rate": 7.173333333333333e-05, "loss": 1.7122, "step": 269 }, { "epoch": 0.03685000682407534, "grad_norm": 0.21799160540103912, "learning_rate": 7.2e-05, "loss": 1.6572, "step": 270 }, { "epoch": 0.03698648833083117, "grad_norm": 0.19743822515010834, "learning_rate": 7.226666666666667e-05, "loss": 1.6004, "step": 271 }, { "epoch": 0.037122969837587005, "grad_norm": 0.19909922778606415, "learning_rate": 7.253333333333334e-05, "loss": 1.5925, "step": 272 }, { "epoch": 0.037259451344342845, "grad_norm": 0.20454707741737366, "learning_rate": 7.280000000000001e-05, "loss": 1.7318, "step": 273 }, { "epoch": 0.03739593285109868, "grad_norm": 0.19905130565166473, "learning_rate": 7.306666666666668e-05, "loss": 1.6399, "step": 274 }, { "epoch": 0.03753241435785451, "grad_norm": 0.22123555839061737, "learning_rate": 7.333333333333333e-05, "loss": 1.6797, "step": 275 }, { "epoch": 0.03766889586461034, "grad_norm": 0.2101968228816986, "learning_rate": 7.360000000000001e-05, "loss": 1.6601, "step": 276 }, { "epoch": 0.03780537737136618, "grad_norm": 0.2130962312221527, "learning_rate": 7.386666666666667e-05, "loss": 1.734, "step": 277 }, { "epoch": 0.037941858878122015, "grad_norm": 0.21536697447299957, "learning_rate": 7.413333333333334e-05, "loss": 1.7315, "step": 278 }, { "epoch": 0.03807834038487785, "grad_norm": 0.21114717423915863, "learning_rate": 7.44e-05, "loss": 1.6886, "step": 279 }, { "epoch": 0.03821482189163369, "grad_norm": 0.24319419264793396, "learning_rate": 7.466666666666667e-05, "loss": 1.7375, "step": 280 }, { "epoch": 0.03835130339838952, "grad_norm": 0.19769835472106934, "learning_rate": 7.493333333333334e-05, "loss": 1.7083, "step": 281 }, { "epoch": 0.03848778490514535, "grad_norm": 0.22592894732952118, "learning_rate": 7.52e-05, "loss": 1.6407, "step": 282 }, { "epoch": 0.038624266411901184, "grad_norm": 0.20605631172657013, "learning_rate": 7.546666666666668e-05, "loss": 1.6803, "step": 283 }, { "epoch": 0.038760747918657024, "grad_norm": 0.21297262609004974, "learning_rate": 7.573333333333334e-05, "loss": 1.6462, "step": 284 }, { "epoch": 0.038897229425412856, "grad_norm": 0.20373934507369995, "learning_rate": 7.6e-05, "loss": 1.6578, "step": 285 }, { "epoch": 0.03903371093216869, "grad_norm": 0.2054784893989563, "learning_rate": 7.626666666666667e-05, "loss": 1.7021, "step": 286 }, { "epoch": 0.03917019243892453, "grad_norm": 0.20568403601646423, "learning_rate": 7.653333333333334e-05, "loss": 1.7562, "step": 287 }, { "epoch": 0.03930667394568036, "grad_norm": 0.19960707426071167, "learning_rate": 7.680000000000001e-05, "loss": 1.8018, "step": 288 }, { "epoch": 0.03944315545243619, "grad_norm": 0.2213381826877594, "learning_rate": 7.706666666666668e-05, "loss": 1.6819, "step": 289 }, { "epoch": 0.03957963695919203, "grad_norm": 0.21227072179317474, "learning_rate": 7.733333333333333e-05, "loss": 1.718, "step": 290 }, { "epoch": 0.039716118465947865, "grad_norm": 0.21815580129623413, "learning_rate": 7.76e-05, "loss": 1.7343, "step": 291 }, { "epoch": 0.0398525999727037, "grad_norm": 0.2301357537508011, "learning_rate": 7.786666666666667e-05, "loss": 1.6573, "step": 292 }, { "epoch": 0.03998908147945953, "grad_norm": 0.21771447360515594, "learning_rate": 7.813333333333334e-05, "loss": 1.7112, "step": 293 }, { "epoch": 0.04012556298621537, "grad_norm": 0.196007639169693, "learning_rate": 7.840000000000001e-05, "loss": 1.6526, "step": 294 }, { "epoch": 0.0402620444929712, "grad_norm": 0.21248257160186768, "learning_rate": 7.866666666666666e-05, "loss": 1.7492, "step": 295 }, { "epoch": 0.040398525999727035, "grad_norm": 0.20819489657878876, "learning_rate": 7.893333333333335e-05, "loss": 1.6847, "step": 296 }, { "epoch": 0.040535007506482874, "grad_norm": 0.20487208664417267, "learning_rate": 7.92e-05, "loss": 1.6649, "step": 297 }, { "epoch": 0.04067148901323871, "grad_norm": 0.21150562167167664, "learning_rate": 7.946666666666667e-05, "loss": 1.6469, "step": 298 }, { "epoch": 0.04080797051999454, "grad_norm": 0.24073141813278198, "learning_rate": 7.973333333333334e-05, "loss": 1.6623, "step": 299 }, { "epoch": 0.04094445202675037, "grad_norm": 0.24552752077579498, "learning_rate": 8e-05, "loss": 1.6755, "step": 300 }, { "epoch": 0.04108093353350621, "grad_norm": 0.21898119151592255, "learning_rate": 8e-05, "loss": 1.7248, "step": 301 }, { "epoch": 0.041217415040262044, "grad_norm": 0.21144811809062958, "learning_rate": 8e-05, "loss": 1.649, "step": 302 }, { "epoch": 0.04135389654701788, "grad_norm": 0.20077790319919586, "learning_rate": 8e-05, "loss": 1.6616, "step": 303 }, { "epoch": 0.041490378053773716, "grad_norm": 0.21370148658752441, "learning_rate": 8e-05, "loss": 1.6461, "step": 304 }, { "epoch": 0.04162685956052955, "grad_norm": 0.19295500218868256, "learning_rate": 8e-05, "loss": 1.6885, "step": 305 }, { "epoch": 0.04176334106728538, "grad_norm": 0.19603842496871948, "learning_rate": 8e-05, "loss": 1.5694, "step": 306 }, { "epoch": 0.04189982257404122, "grad_norm": 0.22422997653484344, "learning_rate": 8e-05, "loss": 1.6848, "step": 307 }, { "epoch": 0.04203630408079705, "grad_norm": 0.20466266572475433, "learning_rate": 8e-05, "loss": 1.6764, "step": 308 }, { "epoch": 0.042172785587552886, "grad_norm": 0.2288111001253128, "learning_rate": 8e-05, "loss": 1.7092, "step": 309 }, { "epoch": 0.04230926709430872, "grad_norm": 0.2034597247838974, "learning_rate": 8e-05, "loss": 1.6341, "step": 310 }, { "epoch": 0.04244574860106456, "grad_norm": 0.20546230673789978, "learning_rate": 8e-05, "loss": 1.7258, "step": 311 }, { "epoch": 0.04258223010782039, "grad_norm": 0.22445982694625854, "learning_rate": 8e-05, "loss": 1.7117, "step": 312 }, { "epoch": 0.04271871161457622, "grad_norm": 0.20283575356006622, "learning_rate": 8e-05, "loss": 1.7253, "step": 313 }, { "epoch": 0.04285519312133206, "grad_norm": 0.2159210741519928, "learning_rate": 8e-05, "loss": 1.699, "step": 314 }, { "epoch": 0.042991674628087895, "grad_norm": 0.20850765705108643, "learning_rate": 8e-05, "loss": 1.6698, "step": 315 }, { "epoch": 0.04312815613484373, "grad_norm": 0.20244640111923218, "learning_rate": 8e-05, "loss": 1.665, "step": 316 }, { "epoch": 0.04326463764159956, "grad_norm": 0.1934514194726944, "learning_rate": 8e-05, "loss": 1.5591, "step": 317 }, { "epoch": 0.0434011191483554, "grad_norm": 0.1918637752532959, "learning_rate": 8e-05, "loss": 1.6103, "step": 318 }, { "epoch": 0.04353760065511123, "grad_norm": 0.20331163704395294, "learning_rate": 8e-05, "loss": 1.7058, "step": 319 }, { "epoch": 0.043674082161867064, "grad_norm": 0.19439738988876343, "learning_rate": 8e-05, "loss": 1.6387, "step": 320 }, { "epoch": 0.043810563668622904, "grad_norm": 0.20036499202251434, "learning_rate": 8e-05, "loss": 1.7089, "step": 321 }, { "epoch": 0.043947045175378736, "grad_norm": 0.1999746859073639, "learning_rate": 8e-05, "loss": 1.6931, "step": 322 }, { "epoch": 0.04408352668213457, "grad_norm": 0.18918439745903015, "learning_rate": 8e-05, "loss": 1.5734, "step": 323 }, { "epoch": 0.04422000818889041, "grad_norm": 0.19035042822360992, "learning_rate": 8e-05, "loss": 1.6692, "step": 324 }, { "epoch": 0.04435648969564624, "grad_norm": 0.21209511160850525, "learning_rate": 8e-05, "loss": 1.6569, "step": 325 }, { "epoch": 0.044492971202402073, "grad_norm": 0.20302721858024597, "learning_rate": 8e-05, "loss": 1.7535, "step": 326 }, { "epoch": 0.044629452709157906, "grad_norm": 0.21007706224918365, "learning_rate": 8e-05, "loss": 1.6325, "step": 327 }, { "epoch": 0.044765934215913745, "grad_norm": 0.1938336044549942, "learning_rate": 8e-05, "loss": 1.6244, "step": 328 }, { "epoch": 0.04490241572266958, "grad_norm": 0.2358740270137787, "learning_rate": 8e-05, "loss": 1.6867, "step": 329 }, { "epoch": 0.04503889722942541, "grad_norm": 0.20879460871219635, "learning_rate": 8e-05, "loss": 1.658, "step": 330 }, { "epoch": 0.04517537873618125, "grad_norm": 0.19917111098766327, "learning_rate": 8e-05, "loss": 1.6262, "step": 331 }, { "epoch": 0.04531186024293708, "grad_norm": 0.19454148411750793, "learning_rate": 8e-05, "loss": 1.6908, "step": 332 }, { "epoch": 0.045448341749692915, "grad_norm": 0.2147936373949051, "learning_rate": 8e-05, "loss": 1.6701, "step": 333 }, { "epoch": 0.045584823256448755, "grad_norm": 0.2066538780927658, "learning_rate": 8e-05, "loss": 1.6621, "step": 334 }, { "epoch": 0.04572130476320459, "grad_norm": 0.1907842755317688, "learning_rate": 8e-05, "loss": 1.6841, "step": 335 }, { "epoch": 0.04585778626996042, "grad_norm": 0.19874846935272217, "learning_rate": 8e-05, "loss": 1.6558, "step": 336 }, { "epoch": 0.04599426777671625, "grad_norm": 0.18912295997142792, "learning_rate": 8e-05, "loss": 1.6683, "step": 337 }, { "epoch": 0.04613074928347209, "grad_norm": 0.20593127608299255, "learning_rate": 8e-05, "loss": 1.6934, "step": 338 }, { "epoch": 0.046267230790227924, "grad_norm": 0.22252845764160156, "learning_rate": 8e-05, "loss": 1.6167, "step": 339 }, { "epoch": 0.04640371229698376, "grad_norm": 0.18263423442840576, "learning_rate": 8e-05, "loss": 1.6565, "step": 340 }, { "epoch": 0.046540193803739596, "grad_norm": 0.23404349386692047, "learning_rate": 8e-05, "loss": 1.6804, "step": 341 }, { "epoch": 0.04667667531049543, "grad_norm": 0.19697609543800354, "learning_rate": 8e-05, "loss": 1.685, "step": 342 }, { "epoch": 0.04681315681725126, "grad_norm": 0.22630178928375244, "learning_rate": 8e-05, "loss": 1.6312, "step": 343 }, { "epoch": 0.046949638324007094, "grad_norm": 0.21530590951442719, "learning_rate": 8e-05, "loss": 1.6038, "step": 344 }, { "epoch": 0.04708611983076293, "grad_norm": 0.2517217695713043, "learning_rate": 8e-05, "loss": 1.7455, "step": 345 }, { "epoch": 0.047222601337518766, "grad_norm": 0.20627780258655548, "learning_rate": 8e-05, "loss": 1.6001, "step": 346 }, { "epoch": 0.0473590828442746, "grad_norm": 0.25718629360198975, "learning_rate": 8e-05, "loss": 1.6864, "step": 347 }, { "epoch": 0.04749556435103044, "grad_norm": 0.20818424224853516, "learning_rate": 8e-05, "loss": 1.6876, "step": 348 }, { "epoch": 0.04763204585778627, "grad_norm": 0.24214956164360046, "learning_rate": 8e-05, "loss": 1.6171, "step": 349 }, { "epoch": 0.0477685273645421, "grad_norm": 0.19357354938983917, "learning_rate": 8e-05, "loss": 1.5822, "step": 350 }, { "epoch": 0.04790500887129794, "grad_norm": 0.24596109986305237, "learning_rate": 8e-05, "loss": 1.7334, "step": 351 }, { "epoch": 0.048041490378053775, "grad_norm": 0.19316093623638153, "learning_rate": 8e-05, "loss": 1.6899, "step": 352 }, { "epoch": 0.04817797188480961, "grad_norm": 0.21627448499202728, "learning_rate": 8e-05, "loss": 1.6796, "step": 353 }, { "epoch": 0.04831445339156544, "grad_norm": 0.19135534763336182, "learning_rate": 8e-05, "loss": 1.5976, "step": 354 }, { "epoch": 0.04845093489832128, "grad_norm": 0.22228562831878662, "learning_rate": 8e-05, "loss": 1.6874, "step": 355 }, { "epoch": 0.04858741640507711, "grad_norm": 0.2129838615655899, "learning_rate": 8e-05, "loss": 1.6597, "step": 356 }, { "epoch": 0.048723897911832945, "grad_norm": 0.20773866772651672, "learning_rate": 8e-05, "loss": 1.6435, "step": 357 }, { "epoch": 0.048860379418588784, "grad_norm": 0.23375366628170013, "learning_rate": 8e-05, "loss": 1.6342, "step": 358 }, { "epoch": 0.04899686092534462, "grad_norm": 0.20398132503032684, "learning_rate": 8e-05, "loss": 1.6864, "step": 359 }, { "epoch": 0.04913334243210045, "grad_norm": 0.2301444411277771, "learning_rate": 8e-05, "loss": 1.5876, "step": 360 }, { "epoch": 0.04926982393885628, "grad_norm": 0.19762130081653595, "learning_rate": 8e-05, "loss": 1.6802, "step": 361 }, { "epoch": 0.04940630544561212, "grad_norm": 0.2054339498281479, "learning_rate": 8e-05, "loss": 1.5758, "step": 362 }, { "epoch": 0.049542786952367954, "grad_norm": 0.20791882276535034, "learning_rate": 8e-05, "loss": 1.635, "step": 363 }, { "epoch": 0.049679268459123786, "grad_norm": 0.2073269635438919, "learning_rate": 8e-05, "loss": 1.5745, "step": 364 }, { "epoch": 0.049815749965879626, "grad_norm": 0.2151254564523697, "learning_rate": 8e-05, "loss": 1.7144, "step": 365 }, { "epoch": 0.04995223147263546, "grad_norm": 0.22530192136764526, "learning_rate": 8e-05, "loss": 1.7093, "step": 366 }, { "epoch": 0.05008871297939129, "grad_norm": 0.1957068145275116, "learning_rate": 8e-05, "loss": 1.6484, "step": 367 }, { "epoch": 0.05022519448614713, "grad_norm": 0.18599842488765717, "learning_rate": 8e-05, "loss": 1.6179, "step": 368 }, { "epoch": 0.05036167599290296, "grad_norm": 0.20138153433799744, "learning_rate": 8e-05, "loss": 1.6834, "step": 369 }, { "epoch": 0.050498157499658795, "grad_norm": 0.18490669131278992, "learning_rate": 8e-05, "loss": 1.6518, "step": 370 }, { "epoch": 0.05063463900641463, "grad_norm": 0.225370392203331, "learning_rate": 8e-05, "loss": 1.5999, "step": 371 }, { "epoch": 0.05077112051317047, "grad_norm": 0.18147234618663788, "learning_rate": 8e-05, "loss": 1.5972, "step": 372 }, { "epoch": 0.0509076020199263, "grad_norm": 0.26411259174346924, "learning_rate": 8e-05, "loss": 1.6782, "step": 373 }, { "epoch": 0.05104408352668213, "grad_norm": 0.18885082006454468, "learning_rate": 8e-05, "loss": 1.6966, "step": 374 }, { "epoch": 0.05118056503343797, "grad_norm": 0.23470209538936615, "learning_rate": 8e-05, "loss": 1.6684, "step": 375 }, { "epoch": 0.051317046540193804, "grad_norm": 0.1856028288602829, "learning_rate": 8e-05, "loss": 1.5842, "step": 376 }, { "epoch": 0.05145352804694964, "grad_norm": 0.1803678572177887, "learning_rate": 8e-05, "loss": 1.6273, "step": 377 }, { "epoch": 0.05159000955370547, "grad_norm": 0.20237888395786285, "learning_rate": 8e-05, "loss": 1.6027, "step": 378 }, { "epoch": 0.05172649106046131, "grad_norm": 0.18280954658985138, "learning_rate": 8e-05, "loss": 1.6077, "step": 379 }, { "epoch": 0.05186297256721714, "grad_norm": 0.22067183256149292, "learning_rate": 8e-05, "loss": 1.6794, "step": 380 }, { "epoch": 0.051999454073972974, "grad_norm": 0.2018761783838272, "learning_rate": 8e-05, "loss": 1.6902, "step": 381 }, { "epoch": 0.052135935580728814, "grad_norm": 0.22056815028190613, "learning_rate": 8e-05, "loss": 1.6127, "step": 382 }, { "epoch": 0.052272417087484646, "grad_norm": 0.18608184158802032, "learning_rate": 8e-05, "loss": 1.605, "step": 383 }, { "epoch": 0.05240889859424048, "grad_norm": 0.21474720537662506, "learning_rate": 8e-05, "loss": 1.7027, "step": 384 }, { "epoch": 0.05254538010099632, "grad_norm": 0.1835620254278183, "learning_rate": 8e-05, "loss": 1.6522, "step": 385 }, { "epoch": 0.05268186160775215, "grad_norm": 0.1864248812198639, "learning_rate": 8e-05, "loss": 1.709, "step": 386 }, { "epoch": 0.05281834311450798, "grad_norm": 0.17684119939804077, "learning_rate": 8e-05, "loss": 1.652, "step": 387 }, { "epoch": 0.052954824621263816, "grad_norm": 0.19974467158317566, "learning_rate": 8e-05, "loss": 1.6851, "step": 388 }, { "epoch": 0.053091306128019655, "grad_norm": 0.22409911453723907, "learning_rate": 8e-05, "loss": 1.5977, "step": 389 }, { "epoch": 0.05322778763477549, "grad_norm": 0.19997046887874603, "learning_rate": 8e-05, "loss": 1.6593, "step": 390 }, { "epoch": 0.05336426914153132, "grad_norm": 0.24276980757713318, "learning_rate": 8e-05, "loss": 1.5949, "step": 391 }, { "epoch": 0.05350075064828716, "grad_norm": 0.19641022384166718, "learning_rate": 8e-05, "loss": 1.6682, "step": 392 }, { "epoch": 0.05363723215504299, "grad_norm": 0.2161017209291458, "learning_rate": 8e-05, "loss": 1.6506, "step": 393 }, { "epoch": 0.053773713661798825, "grad_norm": 0.21501685678958893, "learning_rate": 8e-05, "loss": 1.6629, "step": 394 }, { "epoch": 0.05391019516855466, "grad_norm": 0.19006696343421936, "learning_rate": 8e-05, "loss": 1.6657, "step": 395 }, { "epoch": 0.0540466766753105, "grad_norm": 0.21929454803466797, "learning_rate": 8e-05, "loss": 1.6503, "step": 396 }, { "epoch": 0.05418315818206633, "grad_norm": 0.1907109171152115, "learning_rate": 8e-05, "loss": 1.6453, "step": 397 }, { "epoch": 0.05431963968882216, "grad_norm": 0.19140149652957916, "learning_rate": 8e-05, "loss": 1.6601, "step": 398 }, { "epoch": 0.054456121195578, "grad_norm": 0.20610398054122925, "learning_rate": 8e-05, "loss": 1.7, "step": 399 }, { "epoch": 0.054592602702333834, "grad_norm": 0.19578817486763, "learning_rate": 8e-05, "loss": 1.5467, "step": 400 }, { "epoch": 0.054729084209089666, "grad_norm": 0.18162508308887482, "learning_rate": 8e-05, "loss": 1.6109, "step": 401 }, { "epoch": 0.054865565715845506, "grad_norm": 0.18938252329826355, "learning_rate": 8e-05, "loss": 1.6709, "step": 402 }, { "epoch": 0.05500204722260134, "grad_norm": 0.18807187676429749, "learning_rate": 8e-05, "loss": 1.639, "step": 403 }, { "epoch": 0.05513852872935717, "grad_norm": 0.19044460356235504, "learning_rate": 8e-05, "loss": 1.6816, "step": 404 }, { "epoch": 0.055275010236113004, "grad_norm": 0.18897344172000885, "learning_rate": 8e-05, "loss": 1.5969, "step": 405 }, { "epoch": 0.05541149174286884, "grad_norm": 0.187645822763443, "learning_rate": 8e-05, "loss": 1.6859, "step": 406 }, { "epoch": 0.055547973249624676, "grad_norm": 0.192600280046463, "learning_rate": 8e-05, "loss": 1.6359, "step": 407 }, { "epoch": 0.05568445475638051, "grad_norm": 0.1796467900276184, "learning_rate": 8e-05, "loss": 1.6123, "step": 408 }, { "epoch": 0.05582093626313635, "grad_norm": 0.19574713706970215, "learning_rate": 8e-05, "loss": 1.602, "step": 409 }, { "epoch": 0.05595741776989218, "grad_norm": 0.19001144170761108, "learning_rate": 8e-05, "loss": 1.6127, "step": 410 }, { "epoch": 0.05609389927664801, "grad_norm": 0.18304583430290222, "learning_rate": 8e-05, "loss": 1.5418, "step": 411 }, { "epoch": 0.05623038078340385, "grad_norm": 0.17753589153289795, "learning_rate": 8e-05, "loss": 1.6737, "step": 412 }, { "epoch": 0.056366862290159685, "grad_norm": 0.19941185414791107, "learning_rate": 8e-05, "loss": 1.7627, "step": 413 }, { "epoch": 0.05650334379691552, "grad_norm": 0.20377519726753235, "learning_rate": 8e-05, "loss": 1.6124, "step": 414 }, { "epoch": 0.05663982530367135, "grad_norm": 0.1892452836036682, "learning_rate": 8e-05, "loss": 1.6369, "step": 415 }, { "epoch": 0.05677630681042719, "grad_norm": 0.18668115139007568, "learning_rate": 8e-05, "loss": 1.7098, "step": 416 }, { "epoch": 0.05691278831718302, "grad_norm": 0.19333577156066895, "learning_rate": 8e-05, "loss": 1.6651, "step": 417 }, { "epoch": 0.057049269823938854, "grad_norm": 0.18673676252365112, "learning_rate": 8e-05, "loss": 1.673, "step": 418 }, { "epoch": 0.057185751330694694, "grad_norm": 0.18598119914531708, "learning_rate": 8e-05, "loss": 1.7121, "step": 419 }, { "epoch": 0.057322232837450526, "grad_norm": 0.18558545410633087, "learning_rate": 8e-05, "loss": 1.682, "step": 420 }, { "epoch": 0.05745871434420636, "grad_norm": 0.18987078964710236, "learning_rate": 8e-05, "loss": 1.7683, "step": 421 }, { "epoch": 0.05759519585096219, "grad_norm": 0.18435579538345337, "learning_rate": 8e-05, "loss": 1.6526, "step": 422 }, { "epoch": 0.05773167735771803, "grad_norm": 0.182847261428833, "learning_rate": 8e-05, "loss": 1.6763, "step": 423 }, { "epoch": 0.05786815886447386, "grad_norm": 0.1804504543542862, "learning_rate": 8e-05, "loss": 1.6196, "step": 424 }, { "epoch": 0.058004640371229696, "grad_norm": 0.18960043787956238, "learning_rate": 8e-05, "loss": 1.716, "step": 425 }, { "epoch": 0.058141121877985535, "grad_norm": 0.19457489252090454, "learning_rate": 8e-05, "loss": 1.7394, "step": 426 }, { "epoch": 0.05827760338474137, "grad_norm": 0.19392524659633636, "learning_rate": 8e-05, "loss": 1.6727, "step": 427 }, { "epoch": 0.0584140848914972, "grad_norm": 0.18370689451694489, "learning_rate": 8e-05, "loss": 1.6537, "step": 428 }, { "epoch": 0.05855056639825304, "grad_norm": 0.19954009354114532, "learning_rate": 8e-05, "loss": 1.6842, "step": 429 }, { "epoch": 0.05868704790500887, "grad_norm": 0.18143226206302643, "learning_rate": 8e-05, "loss": 1.6629, "step": 430 }, { "epoch": 0.058823529411764705, "grad_norm": 0.19837979972362518, "learning_rate": 8e-05, "loss": 1.6453, "step": 431 }, { "epoch": 0.05896001091852054, "grad_norm": 0.1801684945821762, "learning_rate": 8e-05, "loss": 1.6465, "step": 432 }, { "epoch": 0.05909649242527638, "grad_norm": 0.18215817213058472, "learning_rate": 8e-05, "loss": 1.6294, "step": 433 }, { "epoch": 0.05923297393203221, "grad_norm": 0.1929459571838379, "learning_rate": 8e-05, "loss": 1.7075, "step": 434 }, { "epoch": 0.05936945543878804, "grad_norm": 0.1846276819705963, "learning_rate": 8e-05, "loss": 1.6005, "step": 435 }, { "epoch": 0.05950593694554388, "grad_norm": 0.18948525190353394, "learning_rate": 8e-05, "loss": 1.713, "step": 436 }, { "epoch": 0.059642418452299714, "grad_norm": 0.18023772537708282, "learning_rate": 8e-05, "loss": 1.5822, "step": 437 }, { "epoch": 0.05977889995905555, "grad_norm": 0.18806883692741394, "learning_rate": 8e-05, "loss": 1.6338, "step": 438 }, { "epoch": 0.05991538146581138, "grad_norm": 0.18786229193210602, "learning_rate": 8e-05, "loss": 1.6116, "step": 439 }, { "epoch": 0.06005186297256722, "grad_norm": 0.18303732573986053, "learning_rate": 8e-05, "loss": 1.6841, "step": 440 }, { "epoch": 0.06018834447932305, "grad_norm": 0.1819573938846588, "learning_rate": 8e-05, "loss": 1.6379, "step": 441 }, { "epoch": 0.060324825986078884, "grad_norm": 0.1820116937160492, "learning_rate": 8e-05, "loss": 1.61, "step": 442 }, { "epoch": 0.06046130749283472, "grad_norm": 0.1831539124250412, "learning_rate": 8e-05, "loss": 1.6973, "step": 443 }, { "epoch": 0.060597788999590556, "grad_norm": 0.189011350274086, "learning_rate": 8e-05, "loss": 1.6912, "step": 444 }, { "epoch": 0.06073427050634639, "grad_norm": 0.19471584260463715, "learning_rate": 8e-05, "loss": 1.6684, "step": 445 }, { "epoch": 0.06087075201310223, "grad_norm": 0.18301334977149963, "learning_rate": 8e-05, "loss": 1.6273, "step": 446 }, { "epoch": 0.06100723351985806, "grad_norm": 0.17497512698173523, "learning_rate": 8e-05, "loss": 1.5936, "step": 447 }, { "epoch": 0.06114371502661389, "grad_norm": 0.19188492000102997, "learning_rate": 8e-05, "loss": 1.6072, "step": 448 }, { "epoch": 0.061280196533369725, "grad_norm": 0.18681305646896362, "learning_rate": 8e-05, "loss": 1.7043, "step": 449 }, { "epoch": 0.061416678040125565, "grad_norm": 0.18630671501159668, "learning_rate": 8e-05, "loss": 1.671, "step": 450 }, { "epoch": 0.0615531595468814, "grad_norm": 0.17838414013385773, "learning_rate": 8e-05, "loss": 1.6463, "step": 451 }, { "epoch": 0.06168964105363723, "grad_norm": 0.1982869654893875, "learning_rate": 8e-05, "loss": 1.6796, "step": 452 }, { "epoch": 0.06182612256039307, "grad_norm": 0.18733154237270355, "learning_rate": 8e-05, "loss": 1.6762, "step": 453 }, { "epoch": 0.0619626040671489, "grad_norm": 0.1786513477563858, "learning_rate": 8e-05, "loss": 1.6243, "step": 454 }, { "epoch": 0.062099085573904735, "grad_norm": 0.17923349142074585, "learning_rate": 8e-05, "loss": 1.6431, "step": 455 }, { "epoch": 0.06223556708066057, "grad_norm": 0.18081489205360413, "learning_rate": 8e-05, "loss": 1.6823, "step": 456 }, { "epoch": 0.06237204858741641, "grad_norm": 0.1793363094329834, "learning_rate": 8e-05, "loss": 1.6245, "step": 457 }, { "epoch": 0.06250853009417225, "grad_norm": 0.17861542105674744, "learning_rate": 8e-05, "loss": 1.6321, "step": 458 }, { "epoch": 0.06264501160092807, "grad_norm": 0.1745217889547348, "learning_rate": 8e-05, "loss": 1.6596, "step": 459 }, { "epoch": 0.06278149310768391, "grad_norm": 0.19002461433410645, "learning_rate": 8e-05, "loss": 1.6861, "step": 460 }, { "epoch": 0.06291797461443974, "grad_norm": 0.18763870000839233, "learning_rate": 8e-05, "loss": 1.7062, "step": 461 }, { "epoch": 0.06305445612119558, "grad_norm": 0.18333010375499725, "learning_rate": 8e-05, "loss": 1.6824, "step": 462 }, { "epoch": 0.06319093762795142, "grad_norm": 0.17597143352031708, "learning_rate": 8e-05, "loss": 1.5862, "step": 463 }, { "epoch": 0.06332741913470724, "grad_norm": 0.18492625653743744, "learning_rate": 8e-05, "loss": 1.679, "step": 464 }, { "epoch": 0.06346390064146308, "grad_norm": 0.17959477007389069, "learning_rate": 8e-05, "loss": 1.6545, "step": 465 }, { "epoch": 0.06360038214821892, "grad_norm": 0.19852040708065033, "learning_rate": 8e-05, "loss": 1.6661, "step": 466 }, { "epoch": 0.06373686365497475, "grad_norm": 0.17028813064098358, "learning_rate": 8e-05, "loss": 1.5626, "step": 467 }, { "epoch": 0.06387334516173059, "grad_norm": 0.2159101963043213, "learning_rate": 8e-05, "loss": 1.6208, "step": 468 }, { "epoch": 0.06400982666848642, "grad_norm": 0.18861988186836243, "learning_rate": 8e-05, "loss": 1.729, "step": 469 }, { "epoch": 0.06414630817524225, "grad_norm": 0.19807402789592743, "learning_rate": 8e-05, "loss": 1.665, "step": 470 }, { "epoch": 0.06428278968199809, "grad_norm": 0.19683124125003815, "learning_rate": 8e-05, "loss": 1.6051, "step": 471 }, { "epoch": 0.06441927118875393, "grad_norm": 0.22598282992839813, "learning_rate": 8e-05, "loss": 1.7043, "step": 472 }, { "epoch": 0.06455575269550975, "grad_norm": 0.19435809552669525, "learning_rate": 8e-05, "loss": 1.6718, "step": 473 }, { "epoch": 0.0646922342022656, "grad_norm": 0.19861872494220734, "learning_rate": 8e-05, "loss": 1.6778, "step": 474 }, { "epoch": 0.06482871570902143, "grad_norm": 0.19188061356544495, "learning_rate": 8e-05, "loss": 1.6291, "step": 475 }, { "epoch": 0.06496519721577726, "grad_norm": 0.18942494690418243, "learning_rate": 8e-05, "loss": 1.6663, "step": 476 }, { "epoch": 0.0651016787225331, "grad_norm": 0.21525967121124268, "learning_rate": 8e-05, "loss": 1.6194, "step": 477 }, { "epoch": 0.06523816022928892, "grad_norm": 0.19176410138607025, "learning_rate": 8e-05, "loss": 1.607, "step": 478 }, { "epoch": 0.06537464173604476, "grad_norm": 0.20893029868602753, "learning_rate": 8e-05, "loss": 1.6575, "step": 479 }, { "epoch": 0.0655111232428006, "grad_norm": 0.22374695539474487, "learning_rate": 8e-05, "loss": 1.6471, "step": 480 }, { "epoch": 0.06564760474955643, "grad_norm": 0.18589884042739868, "learning_rate": 8e-05, "loss": 1.6891, "step": 481 }, { "epoch": 0.06578408625631227, "grad_norm": 0.23982296884059906, "learning_rate": 8e-05, "loss": 1.6712, "step": 482 }, { "epoch": 0.06592056776306811, "grad_norm": 0.1724051833152771, "learning_rate": 8e-05, "loss": 1.5632, "step": 483 }, { "epoch": 0.06605704926982393, "grad_norm": 0.2297288030385971, "learning_rate": 8e-05, "loss": 1.7348, "step": 484 }, { "epoch": 0.06619353077657977, "grad_norm": 0.19763821363449097, "learning_rate": 8e-05, "loss": 1.654, "step": 485 }, { "epoch": 0.06633001228333561, "grad_norm": 0.21263867616653442, "learning_rate": 8e-05, "loss": 1.649, "step": 486 }, { "epoch": 0.06646649379009144, "grad_norm": 0.22302626073360443, "learning_rate": 8e-05, "loss": 1.6925, "step": 487 }, { "epoch": 0.06660297529684728, "grad_norm": 0.18238939344882965, "learning_rate": 8e-05, "loss": 1.5917, "step": 488 }, { "epoch": 0.06673945680360312, "grad_norm": 0.22283275425434113, "learning_rate": 8e-05, "loss": 1.554, "step": 489 }, { "epoch": 0.06687593831035894, "grad_norm": 0.18292540311813354, "learning_rate": 8e-05, "loss": 1.6418, "step": 490 }, { "epoch": 0.06701241981711478, "grad_norm": 0.21135155856609344, "learning_rate": 8e-05, "loss": 1.6997, "step": 491 }, { "epoch": 0.06714890132387062, "grad_norm": 0.18980979919433594, "learning_rate": 8e-05, "loss": 1.6658, "step": 492 }, { "epoch": 0.06728538283062645, "grad_norm": 0.17444701492786407, "learning_rate": 8e-05, "loss": 1.5922, "step": 493 }, { "epoch": 0.06742186433738229, "grad_norm": 0.19433224201202393, "learning_rate": 8e-05, "loss": 1.6231, "step": 494 }, { "epoch": 0.06755834584413811, "grad_norm": 0.18066327273845673, "learning_rate": 8e-05, "loss": 1.7068, "step": 495 }, { "epoch": 0.06769482735089395, "grad_norm": 0.17841613292694092, "learning_rate": 8e-05, "loss": 1.5933, "step": 496 }, { "epoch": 0.06783130885764979, "grad_norm": 0.1893261969089508, "learning_rate": 8e-05, "loss": 1.6119, "step": 497 }, { "epoch": 0.06796779036440562, "grad_norm": 0.1816590577363968, "learning_rate": 8e-05, "loss": 1.6872, "step": 498 }, { "epoch": 0.06810427187116146, "grad_norm": 0.18823958933353424, "learning_rate": 8e-05, "loss": 1.5941, "step": 499 }, { "epoch": 0.0682407533779173, "grad_norm": 0.19860830903053284, "learning_rate": 8e-05, "loss": 1.6583, "step": 500 }, { "epoch": 0.06837723488467312, "grad_norm": 0.18241938948631287, "learning_rate": 8e-05, "loss": 1.6137, "step": 501 }, { "epoch": 0.06851371639142896, "grad_norm": 0.20241890847682953, "learning_rate": 8e-05, "loss": 1.6397, "step": 502 }, { "epoch": 0.0686501978981848, "grad_norm": 0.1916690170764923, "learning_rate": 8e-05, "loss": 1.688, "step": 503 }, { "epoch": 0.06878667940494063, "grad_norm": 0.17784589529037476, "learning_rate": 8e-05, "loss": 1.6673, "step": 504 }, { "epoch": 0.06892316091169647, "grad_norm": 0.18047195672988892, "learning_rate": 8e-05, "loss": 1.6797, "step": 505 }, { "epoch": 0.0690596424184523, "grad_norm": 0.1743379384279251, "learning_rate": 8e-05, "loss": 1.6622, "step": 506 }, { "epoch": 0.06919612392520813, "grad_norm": 0.1787814497947693, "learning_rate": 8e-05, "loss": 1.6185, "step": 507 }, { "epoch": 0.06933260543196397, "grad_norm": 0.17678914964199066, "learning_rate": 8e-05, "loss": 1.6039, "step": 508 }, { "epoch": 0.06946908693871981, "grad_norm": 0.19351589679718018, "learning_rate": 8e-05, "loss": 1.6361, "step": 509 }, { "epoch": 0.06960556844547564, "grad_norm": 0.1810530722141266, "learning_rate": 8e-05, "loss": 1.6316, "step": 510 }, { "epoch": 0.06974204995223147, "grad_norm": 0.1901681274175644, "learning_rate": 8e-05, "loss": 1.6253, "step": 511 }, { "epoch": 0.06987853145898731, "grad_norm": 0.18215714395046234, "learning_rate": 8e-05, "loss": 1.7158, "step": 512 }, { "epoch": 0.07001501296574314, "grad_norm": 0.1821824312210083, "learning_rate": 8e-05, "loss": 1.5911, "step": 513 }, { "epoch": 0.07015149447249898, "grad_norm": 0.1817176342010498, "learning_rate": 8e-05, "loss": 1.5968, "step": 514 }, { "epoch": 0.0702879759792548, "grad_norm": 0.189289852976799, "learning_rate": 8e-05, "loss": 1.6603, "step": 515 }, { "epoch": 0.07042445748601064, "grad_norm": 0.19207283854484558, "learning_rate": 8e-05, "loss": 1.5982, "step": 516 }, { "epoch": 0.07056093899276648, "grad_norm": 0.1871204674243927, "learning_rate": 8e-05, "loss": 1.5979, "step": 517 }, { "epoch": 0.07069742049952231, "grad_norm": 0.2023966759443283, "learning_rate": 8e-05, "loss": 1.6025, "step": 518 }, { "epoch": 0.07083390200627815, "grad_norm": 0.19608986377716064, "learning_rate": 8e-05, "loss": 1.6508, "step": 519 }, { "epoch": 0.07097038351303399, "grad_norm": 0.2101517766714096, "learning_rate": 8e-05, "loss": 1.6785, "step": 520 }, { "epoch": 0.07110686501978981, "grad_norm": 0.1760852038860321, "learning_rate": 8e-05, "loss": 1.5946, "step": 521 }, { "epoch": 0.07124334652654565, "grad_norm": 0.2021082639694214, "learning_rate": 8e-05, "loss": 1.6436, "step": 522 }, { "epoch": 0.07137982803330149, "grad_norm": 0.18128426373004913, "learning_rate": 8e-05, "loss": 1.5831, "step": 523 }, { "epoch": 0.07151630954005732, "grad_norm": 0.18913501501083374, "learning_rate": 8e-05, "loss": 1.6757, "step": 524 }, { "epoch": 0.07165279104681316, "grad_norm": 0.1942007690668106, "learning_rate": 8e-05, "loss": 1.7017, "step": 525 }, { "epoch": 0.071789272553569, "grad_norm": 0.18365976214408875, "learning_rate": 8e-05, "loss": 1.6474, "step": 526 }, { "epoch": 0.07192575406032482, "grad_norm": 0.19202566146850586, "learning_rate": 8e-05, "loss": 1.6855, "step": 527 }, { "epoch": 0.07206223556708066, "grad_norm": 0.1858273595571518, "learning_rate": 8e-05, "loss": 1.6574, "step": 528 }, { "epoch": 0.0721987170738365, "grad_norm": 0.17584769427776337, "learning_rate": 8e-05, "loss": 1.6969, "step": 529 }, { "epoch": 0.07233519858059233, "grad_norm": 0.18194857239723206, "learning_rate": 8e-05, "loss": 1.6564, "step": 530 }, { "epoch": 0.07247168008734817, "grad_norm": 0.18180221319198608, "learning_rate": 8e-05, "loss": 1.6502, "step": 531 }, { "epoch": 0.07260816159410399, "grad_norm": 0.19153547286987305, "learning_rate": 8e-05, "loss": 1.6868, "step": 532 }, { "epoch": 0.07274464310085983, "grad_norm": 0.1788415163755417, "learning_rate": 8e-05, "loss": 1.6516, "step": 533 }, { "epoch": 0.07288112460761567, "grad_norm": 0.19157685339450836, "learning_rate": 8e-05, "loss": 1.6113, "step": 534 }, { "epoch": 0.0730176061143715, "grad_norm": 0.18316860496997833, "learning_rate": 8e-05, "loss": 1.5985, "step": 535 }, { "epoch": 0.07315408762112734, "grad_norm": 0.17753863334655762, "learning_rate": 8e-05, "loss": 1.6421, "step": 536 }, { "epoch": 0.07329056912788318, "grad_norm": 0.19196350872516632, "learning_rate": 8e-05, "loss": 1.6801, "step": 537 }, { "epoch": 0.073427050634639, "grad_norm": 0.18240304291248322, "learning_rate": 8e-05, "loss": 1.6567, "step": 538 }, { "epoch": 0.07356353214139484, "grad_norm": 0.1779155284166336, "learning_rate": 8e-05, "loss": 1.693, "step": 539 }, { "epoch": 0.07370001364815068, "grad_norm": 0.17845813930034637, "learning_rate": 8e-05, "loss": 1.6092, "step": 540 }, { "epoch": 0.0738364951549065, "grad_norm": 0.18534943461418152, "learning_rate": 8e-05, "loss": 1.6085, "step": 541 }, { "epoch": 0.07397297666166235, "grad_norm": 0.17992828786373138, "learning_rate": 8e-05, "loss": 1.6104, "step": 542 }, { "epoch": 0.07410945816841819, "grad_norm": 0.19209209084510803, "learning_rate": 8e-05, "loss": 1.6199, "step": 543 }, { "epoch": 0.07424593967517401, "grad_norm": 0.18622608482837677, "learning_rate": 8e-05, "loss": 1.6336, "step": 544 }, { "epoch": 0.07438242118192985, "grad_norm": 0.1822274625301361, "learning_rate": 8e-05, "loss": 1.6934, "step": 545 }, { "epoch": 0.07451890268868569, "grad_norm": 0.19041560590267181, "learning_rate": 8e-05, "loss": 1.6646, "step": 546 }, { "epoch": 0.07465538419544152, "grad_norm": 0.18942587077617645, "learning_rate": 8e-05, "loss": 1.6672, "step": 547 }, { "epoch": 0.07479186570219735, "grad_norm": 0.1978999376296997, "learning_rate": 8e-05, "loss": 1.7197, "step": 548 }, { "epoch": 0.07492834720895318, "grad_norm": 0.19729527831077576, "learning_rate": 8e-05, "loss": 1.6574, "step": 549 }, { "epoch": 0.07506482871570902, "grad_norm": 0.20745132863521576, "learning_rate": 8e-05, "loss": 1.6587, "step": 550 }, { "epoch": 0.07520131022246486, "grad_norm": 0.17871299386024475, "learning_rate": 8e-05, "loss": 1.6033, "step": 551 }, { "epoch": 0.07533779172922069, "grad_norm": 0.2038271129131317, "learning_rate": 8e-05, "loss": 1.6117, "step": 552 }, { "epoch": 0.07547427323597652, "grad_norm": 0.17945469915866852, "learning_rate": 8e-05, "loss": 1.6201, "step": 553 }, { "epoch": 0.07561075474273236, "grad_norm": 0.19727687537670135, "learning_rate": 8e-05, "loss": 1.6736, "step": 554 }, { "epoch": 0.07574723624948819, "grad_norm": 0.1985924392938614, "learning_rate": 8e-05, "loss": 1.6352, "step": 555 }, { "epoch": 0.07588371775624403, "grad_norm": 0.18431121110916138, "learning_rate": 8e-05, "loss": 1.5889, "step": 556 }, { "epoch": 0.07602019926299987, "grad_norm": 0.19703145325183868, "learning_rate": 8e-05, "loss": 1.6593, "step": 557 }, { "epoch": 0.0761566807697557, "grad_norm": 0.1927427351474762, "learning_rate": 8e-05, "loss": 1.6977, "step": 558 }, { "epoch": 0.07629316227651153, "grad_norm": 0.20637810230255127, "learning_rate": 8e-05, "loss": 1.6604, "step": 559 }, { "epoch": 0.07642964378326737, "grad_norm": 0.19992417097091675, "learning_rate": 8e-05, "loss": 1.6228, "step": 560 }, { "epoch": 0.0765661252900232, "grad_norm": 0.18012253940105438, "learning_rate": 8e-05, "loss": 1.6103, "step": 561 }, { "epoch": 0.07670260679677904, "grad_norm": 0.194854736328125, "learning_rate": 8e-05, "loss": 1.6051, "step": 562 }, { "epoch": 0.07683908830353488, "grad_norm": 0.18546807765960693, "learning_rate": 8e-05, "loss": 1.7072, "step": 563 }, { "epoch": 0.0769755698102907, "grad_norm": 0.18692143261432648, "learning_rate": 8e-05, "loss": 1.69, "step": 564 }, { "epoch": 0.07711205131704654, "grad_norm": 0.20474644005298615, "learning_rate": 8e-05, "loss": 1.6859, "step": 565 }, { "epoch": 0.07724853282380237, "grad_norm": 0.16820207238197327, "learning_rate": 8e-05, "loss": 1.6029, "step": 566 }, { "epoch": 0.07738501433055821, "grad_norm": 0.19693829119205475, "learning_rate": 8e-05, "loss": 1.6249, "step": 567 }, { "epoch": 0.07752149583731405, "grad_norm": 0.1874043047428131, "learning_rate": 8e-05, "loss": 1.6213, "step": 568 }, { "epoch": 0.07765797734406987, "grad_norm": 0.18235719203948975, "learning_rate": 8e-05, "loss": 1.6472, "step": 569 }, { "epoch": 0.07779445885082571, "grad_norm": 0.18116523325443268, "learning_rate": 8e-05, "loss": 1.63, "step": 570 }, { "epoch": 0.07793094035758155, "grad_norm": 0.18194203078746796, "learning_rate": 8e-05, "loss": 1.6328, "step": 571 }, { "epoch": 0.07806742186433738, "grad_norm": 0.17890216410160065, "learning_rate": 8e-05, "loss": 1.5576, "step": 572 }, { "epoch": 0.07820390337109322, "grad_norm": 0.21269476413726807, "learning_rate": 8e-05, "loss": 1.6526, "step": 573 }, { "epoch": 0.07834038487784906, "grad_norm": 0.17812982201576233, "learning_rate": 8e-05, "loss": 1.6321, "step": 574 }, { "epoch": 0.07847686638460488, "grad_norm": 0.20546594262123108, "learning_rate": 8e-05, "loss": 1.6051, "step": 575 }, { "epoch": 0.07861334789136072, "grad_norm": 0.1874275803565979, "learning_rate": 8e-05, "loss": 1.6454, "step": 576 }, { "epoch": 0.07874982939811656, "grad_norm": 0.17828841507434845, "learning_rate": 8e-05, "loss": 1.589, "step": 577 }, { "epoch": 0.07888631090487239, "grad_norm": 0.19493460655212402, "learning_rate": 8e-05, "loss": 1.6244, "step": 578 }, { "epoch": 0.07902279241162823, "grad_norm": 0.17887650430202484, "learning_rate": 8e-05, "loss": 1.6464, "step": 579 }, { "epoch": 0.07915927391838407, "grad_norm": 0.18878933787345886, "learning_rate": 8e-05, "loss": 1.6293, "step": 580 }, { "epoch": 0.07929575542513989, "grad_norm": 0.17782963812351227, "learning_rate": 8e-05, "loss": 1.6023, "step": 581 }, { "epoch": 0.07943223693189573, "grad_norm": 0.18788425624370575, "learning_rate": 8e-05, "loss": 1.6826, "step": 582 }, { "epoch": 0.07956871843865156, "grad_norm": 0.1840958595275879, "learning_rate": 8e-05, "loss": 1.6839, "step": 583 }, { "epoch": 0.0797051999454074, "grad_norm": 0.1809142827987671, "learning_rate": 8e-05, "loss": 1.6705, "step": 584 }, { "epoch": 0.07984168145216324, "grad_norm": 0.17688719928264618, "learning_rate": 8e-05, "loss": 1.6396, "step": 585 }, { "epoch": 0.07997816295891906, "grad_norm": 0.178343266248703, "learning_rate": 8e-05, "loss": 1.6275, "step": 586 }, { "epoch": 0.0801146444656749, "grad_norm": 0.19121424853801727, "learning_rate": 8e-05, "loss": 1.623, "step": 587 }, { "epoch": 0.08025112597243074, "grad_norm": 0.1846960335969925, "learning_rate": 8e-05, "loss": 1.6091, "step": 588 }, { "epoch": 0.08038760747918657, "grad_norm": 0.1875857263803482, "learning_rate": 8e-05, "loss": 1.7452, "step": 589 }, { "epoch": 0.0805240889859424, "grad_norm": 0.18118062615394592, "learning_rate": 8e-05, "loss": 1.707, "step": 590 }, { "epoch": 0.08066057049269824, "grad_norm": 0.17859064042568207, "learning_rate": 8e-05, "loss": 1.5662, "step": 591 }, { "epoch": 0.08079705199945407, "grad_norm": 0.19599363207817078, "learning_rate": 8e-05, "loss": 1.6767, "step": 592 }, { "epoch": 0.08093353350620991, "grad_norm": 0.17053189873695374, "learning_rate": 8e-05, "loss": 1.6022, "step": 593 }, { "epoch": 0.08107001501296575, "grad_norm": 0.17487818002700806, "learning_rate": 8e-05, "loss": 1.5905, "step": 594 }, { "epoch": 0.08120649651972157, "grad_norm": 0.17549864947795868, "learning_rate": 8e-05, "loss": 1.6034, "step": 595 }, { "epoch": 0.08134297802647741, "grad_norm": 0.17001301050186157, "learning_rate": 8e-05, "loss": 1.6029, "step": 596 }, { "epoch": 0.08147945953323325, "grad_norm": 0.17742592096328735, "learning_rate": 8e-05, "loss": 1.6001, "step": 597 }, { "epoch": 0.08161594103998908, "grad_norm": 0.18993912637233734, "learning_rate": 8e-05, "loss": 1.6296, "step": 598 }, { "epoch": 0.08175242254674492, "grad_norm": 0.18070772290229797, "learning_rate": 8e-05, "loss": 1.7273, "step": 599 }, { "epoch": 0.08188890405350074, "grad_norm": 0.17393943667411804, "learning_rate": 8e-05, "loss": 1.5863, "step": 600 }, { "epoch": 0.08202538556025658, "grad_norm": 0.18577931821346283, "learning_rate": 8e-05, "loss": 1.6604, "step": 601 }, { "epoch": 0.08216186706701242, "grad_norm": 0.1849592924118042, "learning_rate": 8e-05, "loss": 1.6337, "step": 602 }, { "epoch": 0.08229834857376825, "grad_norm": 0.17174462974071503, "learning_rate": 8e-05, "loss": 1.5765, "step": 603 }, { "epoch": 0.08243483008052409, "grad_norm": 0.17975455522537231, "learning_rate": 8e-05, "loss": 1.6085, "step": 604 }, { "epoch": 0.08257131158727993, "grad_norm": 0.18230432271957397, "learning_rate": 8e-05, "loss": 1.6576, "step": 605 }, { "epoch": 0.08270779309403575, "grad_norm": 0.18215805292129517, "learning_rate": 8e-05, "loss": 1.6604, "step": 606 }, { "epoch": 0.08284427460079159, "grad_norm": 0.17772619426250458, "learning_rate": 8e-05, "loss": 1.626, "step": 607 }, { "epoch": 0.08298075610754743, "grad_norm": 0.17819859087467194, "learning_rate": 8e-05, "loss": 1.648, "step": 608 }, { "epoch": 0.08311723761430326, "grad_norm": 0.18093952536582947, "learning_rate": 8e-05, "loss": 1.6612, "step": 609 }, { "epoch": 0.0832537191210591, "grad_norm": 0.17327451705932617, "learning_rate": 8e-05, "loss": 1.5269, "step": 610 }, { "epoch": 0.08339020062781494, "grad_norm": 0.18055465817451477, "learning_rate": 8e-05, "loss": 1.6117, "step": 611 }, { "epoch": 0.08352668213457076, "grad_norm": 0.17736971378326416, "learning_rate": 8e-05, "loss": 1.6308, "step": 612 }, { "epoch": 0.0836631636413266, "grad_norm": 0.17380128800868988, "learning_rate": 8e-05, "loss": 1.6427, "step": 613 }, { "epoch": 0.08379964514808244, "grad_norm": 0.18247854709625244, "learning_rate": 8e-05, "loss": 1.5935, "step": 614 }, { "epoch": 0.08393612665483827, "grad_norm": 0.17850947380065918, "learning_rate": 8e-05, "loss": 1.5802, "step": 615 }, { "epoch": 0.0840726081615941, "grad_norm": 0.1836286187171936, "learning_rate": 8e-05, "loss": 1.6556, "step": 616 }, { "epoch": 0.08420908966834993, "grad_norm": 0.18170252442359924, "learning_rate": 8e-05, "loss": 1.6419, "step": 617 }, { "epoch": 0.08434557117510577, "grad_norm": 0.17840580642223358, "learning_rate": 8e-05, "loss": 1.6947, "step": 618 }, { "epoch": 0.08448205268186161, "grad_norm": 0.1744961142539978, "learning_rate": 8e-05, "loss": 1.5932, "step": 619 }, { "epoch": 0.08461853418861744, "grad_norm": 0.17901203036308289, "learning_rate": 8e-05, "loss": 1.6444, "step": 620 }, { "epoch": 0.08475501569537328, "grad_norm": 0.17852306365966797, "learning_rate": 8e-05, "loss": 1.6794, "step": 621 }, { "epoch": 0.08489149720212912, "grad_norm": 0.18519847095012665, "learning_rate": 8e-05, "loss": 1.6977, "step": 622 }, { "epoch": 0.08502797870888494, "grad_norm": 0.19501279294490814, "learning_rate": 8e-05, "loss": 1.6459, "step": 623 }, { "epoch": 0.08516446021564078, "grad_norm": 0.18123270571231842, "learning_rate": 8e-05, "loss": 1.7063, "step": 624 }, { "epoch": 0.08530094172239662, "grad_norm": 0.1957874447107315, "learning_rate": 8e-05, "loss": 1.6561, "step": 625 }, { "epoch": 0.08543742322915245, "grad_norm": 0.18587222695350647, "learning_rate": 8e-05, "loss": 1.6923, "step": 626 }, { "epoch": 0.08557390473590828, "grad_norm": 0.18250805139541626, "learning_rate": 8e-05, "loss": 1.6973, "step": 627 }, { "epoch": 0.08571038624266412, "grad_norm": 0.184391051530838, "learning_rate": 8e-05, "loss": 1.5878, "step": 628 }, { "epoch": 0.08584686774941995, "grad_norm": 0.17343154549598694, "learning_rate": 8e-05, "loss": 1.6113, "step": 629 }, { "epoch": 0.08598334925617579, "grad_norm": 0.21289904415607452, "learning_rate": 8e-05, "loss": 1.6744, "step": 630 }, { "epoch": 0.08611983076293163, "grad_norm": 0.17811021208763123, "learning_rate": 8e-05, "loss": 1.5439, "step": 631 }, { "epoch": 0.08625631226968745, "grad_norm": 0.21290110051631927, "learning_rate": 8e-05, "loss": 1.7123, "step": 632 }, { "epoch": 0.0863927937764433, "grad_norm": 0.17709466814994812, "learning_rate": 8e-05, "loss": 1.629, "step": 633 }, { "epoch": 0.08652927528319912, "grad_norm": 0.20067048072814941, "learning_rate": 8e-05, "loss": 1.588, "step": 634 }, { "epoch": 0.08666575678995496, "grad_norm": 0.18714983761310577, "learning_rate": 8e-05, "loss": 1.6911, "step": 635 }, { "epoch": 0.0868022382967108, "grad_norm": 0.19722506403923035, "learning_rate": 8e-05, "loss": 1.6215, "step": 636 }, { "epoch": 0.08693871980346662, "grad_norm": 0.18553335964679718, "learning_rate": 8e-05, "loss": 1.5947, "step": 637 }, { "epoch": 0.08707520131022246, "grad_norm": 0.18402236700057983, "learning_rate": 8e-05, "loss": 1.6838, "step": 638 }, { "epoch": 0.0872116828169783, "grad_norm": 0.20606359839439392, "learning_rate": 8e-05, "loss": 1.7076, "step": 639 }, { "epoch": 0.08734816432373413, "grad_norm": 0.1979096233844757, "learning_rate": 8e-05, "loss": 1.6213, "step": 640 }, { "epoch": 0.08748464583048997, "grad_norm": 0.19418217241764069, "learning_rate": 8e-05, "loss": 1.663, "step": 641 }, { "epoch": 0.08762112733724581, "grad_norm": 0.17649537324905396, "learning_rate": 8e-05, "loss": 1.6426, "step": 642 }, { "epoch": 0.08775760884400163, "grad_norm": 0.18922159075737, "learning_rate": 8e-05, "loss": 1.6781, "step": 643 }, { "epoch": 0.08789409035075747, "grad_norm": 0.18971461057662964, "learning_rate": 8e-05, "loss": 1.6523, "step": 644 }, { "epoch": 0.08803057185751331, "grad_norm": 0.1711868941783905, "learning_rate": 8e-05, "loss": 1.5955, "step": 645 }, { "epoch": 0.08816705336426914, "grad_norm": 0.1809423565864563, "learning_rate": 8e-05, "loss": 1.6656, "step": 646 }, { "epoch": 0.08830353487102498, "grad_norm": 0.18164674937725067, "learning_rate": 8e-05, "loss": 1.6545, "step": 647 }, { "epoch": 0.08844001637778082, "grad_norm": 0.20286716520786285, "learning_rate": 8e-05, "loss": 1.7422, "step": 648 }, { "epoch": 0.08857649788453664, "grad_norm": 0.20841902494430542, "learning_rate": 8e-05, "loss": 1.6166, "step": 649 }, { "epoch": 0.08871297939129248, "grad_norm": 0.1873120218515396, "learning_rate": 8e-05, "loss": 1.6197, "step": 650 }, { "epoch": 0.08884946089804832, "grad_norm": 0.17084191739559174, "learning_rate": 8e-05, "loss": 1.5835, "step": 651 }, { "epoch": 0.08898594240480415, "grad_norm": 0.19740137457847595, "learning_rate": 8e-05, "loss": 1.6389, "step": 652 }, { "epoch": 0.08912242391155999, "grad_norm": 0.19413062930107117, "learning_rate": 8e-05, "loss": 1.6391, "step": 653 }, { "epoch": 0.08925890541831581, "grad_norm": 0.18452408909797668, "learning_rate": 8e-05, "loss": 1.654, "step": 654 }, { "epoch": 0.08939538692507165, "grad_norm": 0.22501632571220398, "learning_rate": 8e-05, "loss": 1.7583, "step": 655 }, { "epoch": 0.08953186843182749, "grad_norm": 0.1789039820432663, "learning_rate": 8e-05, "loss": 1.5751, "step": 656 }, { "epoch": 0.08966834993858332, "grad_norm": 0.19610245525836945, "learning_rate": 8e-05, "loss": 1.6414, "step": 657 }, { "epoch": 0.08980483144533916, "grad_norm": 0.17572997510433197, "learning_rate": 8e-05, "loss": 1.5355, "step": 658 }, { "epoch": 0.089941312952095, "grad_norm": 0.19441260397434235, "learning_rate": 8e-05, "loss": 1.6019, "step": 659 }, { "epoch": 0.09007779445885082, "grad_norm": 0.2057199776172638, "learning_rate": 8e-05, "loss": 1.669, "step": 660 }, { "epoch": 0.09021427596560666, "grad_norm": 0.1797274947166443, "learning_rate": 8e-05, "loss": 1.5548, "step": 661 }, { "epoch": 0.0903507574723625, "grad_norm": 0.19070973992347717, "learning_rate": 8e-05, "loss": 1.6114, "step": 662 }, { "epoch": 0.09048723897911833, "grad_norm": 0.17818237841129303, "learning_rate": 8e-05, "loss": 1.5809, "step": 663 }, { "epoch": 0.09062372048587417, "grad_norm": 0.17819111049175262, "learning_rate": 8e-05, "loss": 1.6134, "step": 664 }, { "epoch": 0.09076020199263, "grad_norm": 0.20394210517406464, "learning_rate": 8e-05, "loss": 1.653, "step": 665 }, { "epoch": 0.09089668349938583, "grad_norm": 0.18936486542224884, "learning_rate": 8e-05, "loss": 1.6626, "step": 666 }, { "epoch": 0.09103316500614167, "grad_norm": 0.1895633190870285, "learning_rate": 8e-05, "loss": 1.604, "step": 667 }, { "epoch": 0.09116964651289751, "grad_norm": 0.1924016922712326, "learning_rate": 8e-05, "loss": 1.6529, "step": 668 }, { "epoch": 0.09130612801965333, "grad_norm": 0.18355004489421844, "learning_rate": 8e-05, "loss": 1.5788, "step": 669 }, { "epoch": 0.09144260952640917, "grad_norm": 0.20524947345256805, "learning_rate": 8e-05, "loss": 1.6324, "step": 670 }, { "epoch": 0.091579091033165, "grad_norm": 0.17362678050994873, "learning_rate": 8e-05, "loss": 1.5638, "step": 671 }, { "epoch": 0.09171557253992084, "grad_norm": 0.1757184863090515, "learning_rate": 8e-05, "loss": 1.6031, "step": 672 }, { "epoch": 0.09185205404667668, "grad_norm": 0.20039305090904236, "learning_rate": 8e-05, "loss": 1.5806, "step": 673 }, { "epoch": 0.0919885355534325, "grad_norm": 0.17184749245643616, "learning_rate": 8e-05, "loss": 1.6395, "step": 674 }, { "epoch": 0.09212501706018834, "grad_norm": 0.18741856515407562, "learning_rate": 8e-05, "loss": 1.6868, "step": 675 }, { "epoch": 0.09226149856694418, "grad_norm": 0.19409061968326569, "learning_rate": 8e-05, "loss": 1.5463, "step": 676 }, { "epoch": 0.09239798007370001, "grad_norm": 0.17097792029380798, "learning_rate": 8e-05, "loss": 1.5881, "step": 677 }, { "epoch": 0.09253446158045585, "grad_norm": 0.1736527979373932, "learning_rate": 8e-05, "loss": 1.6135, "step": 678 }, { "epoch": 0.09267094308721169, "grad_norm": 0.18509618937969208, "learning_rate": 8e-05, "loss": 1.6609, "step": 679 }, { "epoch": 0.09280742459396751, "grad_norm": 0.17875801026821136, "learning_rate": 8e-05, "loss": 1.648, "step": 680 }, { "epoch": 0.09294390610072335, "grad_norm": 0.16205383837223053, "learning_rate": 8e-05, "loss": 1.4986, "step": 681 }, { "epoch": 0.09308038760747919, "grad_norm": 0.17233863472938538, "learning_rate": 8e-05, "loss": 1.6139, "step": 682 }, { "epoch": 0.09321686911423502, "grad_norm": 0.17813430726528168, "learning_rate": 8e-05, "loss": 1.6024, "step": 683 }, { "epoch": 0.09335335062099086, "grad_norm": 0.1721436083316803, "learning_rate": 8e-05, "loss": 1.5504, "step": 684 }, { "epoch": 0.0934898321277467, "grad_norm": 0.16886721551418304, "learning_rate": 8e-05, "loss": 1.5518, "step": 685 }, { "epoch": 0.09362631363450252, "grad_norm": 0.17673741281032562, "learning_rate": 8e-05, "loss": 1.6614, "step": 686 }, { "epoch": 0.09376279514125836, "grad_norm": 0.18254360556602478, "learning_rate": 8e-05, "loss": 1.6435, "step": 687 }, { "epoch": 0.09389927664801419, "grad_norm": 0.1704188734292984, "learning_rate": 8e-05, "loss": 1.5971, "step": 688 }, { "epoch": 0.09403575815477003, "grad_norm": 0.1748236119747162, "learning_rate": 8e-05, "loss": 1.6403, "step": 689 }, { "epoch": 0.09417223966152587, "grad_norm": 0.18408794701099396, "learning_rate": 8e-05, "loss": 1.7105, "step": 690 }, { "epoch": 0.09430872116828169, "grad_norm": 0.17438945174217224, "learning_rate": 8e-05, "loss": 1.6163, "step": 691 }, { "epoch": 0.09444520267503753, "grad_norm": 0.17807906866073608, "learning_rate": 8e-05, "loss": 1.6521, "step": 692 }, { "epoch": 0.09458168418179337, "grad_norm": 0.17539091408252716, "learning_rate": 8e-05, "loss": 1.613, "step": 693 }, { "epoch": 0.0947181656885492, "grad_norm": 0.17115943133831024, "learning_rate": 8e-05, "loss": 1.575, "step": 694 }, { "epoch": 0.09485464719530504, "grad_norm": 0.18094389140605927, "learning_rate": 8e-05, "loss": 1.6639, "step": 695 }, { "epoch": 0.09499112870206088, "grad_norm": 0.16728800535202026, "learning_rate": 8e-05, "loss": 1.5564, "step": 696 }, { "epoch": 0.0951276102088167, "grad_norm": 0.17686955630779266, "learning_rate": 8e-05, "loss": 1.5704, "step": 697 }, { "epoch": 0.09526409171557254, "grad_norm": 0.1698547601699829, "learning_rate": 8e-05, "loss": 1.6361, "step": 698 }, { "epoch": 0.09540057322232838, "grad_norm": 0.17156901955604553, "learning_rate": 8e-05, "loss": 1.6179, "step": 699 }, { "epoch": 0.0955370547290842, "grad_norm": 0.16910618543624878, "learning_rate": 8e-05, "loss": 1.6377, "step": 700 }, { "epoch": 0.09567353623584005, "grad_norm": 0.1788112223148346, "learning_rate": 8e-05, "loss": 1.6383, "step": 701 }, { "epoch": 0.09581001774259588, "grad_norm": 0.1680026799440384, "learning_rate": 8e-05, "loss": 1.5867, "step": 702 }, { "epoch": 0.09594649924935171, "grad_norm": 0.17168425023555756, "learning_rate": 8e-05, "loss": 1.5692, "step": 703 }, { "epoch": 0.09608298075610755, "grad_norm": 0.1807289719581604, "learning_rate": 8e-05, "loss": 1.656, "step": 704 }, { "epoch": 0.09621946226286338, "grad_norm": 0.1735907942056656, "learning_rate": 8e-05, "loss": 1.5859, "step": 705 }, { "epoch": 0.09635594376961922, "grad_norm": 0.1873815804719925, "learning_rate": 8e-05, "loss": 1.7093, "step": 706 }, { "epoch": 0.09649242527637505, "grad_norm": 0.17822548747062683, "learning_rate": 8e-05, "loss": 1.6805, "step": 707 }, { "epoch": 0.09662890678313088, "grad_norm": 0.17290425300598145, "learning_rate": 8e-05, "loss": 1.615, "step": 708 }, { "epoch": 0.09676538828988672, "grad_norm": 0.19037611782550812, "learning_rate": 8e-05, "loss": 1.6254, "step": 709 }, { "epoch": 0.09690186979664256, "grad_norm": 0.18396949768066406, "learning_rate": 8e-05, "loss": 1.6543, "step": 710 }, { "epoch": 0.09703835130339838, "grad_norm": 0.19072912633419037, "learning_rate": 8e-05, "loss": 1.5653, "step": 711 }, { "epoch": 0.09717483281015422, "grad_norm": 0.1756344735622406, "learning_rate": 8e-05, "loss": 1.6244, "step": 712 }, { "epoch": 0.09731131431691006, "grad_norm": 0.17327451705932617, "learning_rate": 8e-05, "loss": 1.5827, "step": 713 }, { "epoch": 0.09744779582366589, "grad_norm": 0.17609016597270966, "learning_rate": 8e-05, "loss": 1.5889, "step": 714 }, { "epoch": 0.09758427733042173, "grad_norm": 0.17689797282218933, "learning_rate": 8e-05, "loss": 1.6015, "step": 715 }, { "epoch": 0.09772075883717757, "grad_norm": 0.18054063618183136, "learning_rate": 8e-05, "loss": 1.6543, "step": 716 }, { "epoch": 0.0978572403439334, "grad_norm": 0.17576654255390167, "learning_rate": 8e-05, "loss": 1.5521, "step": 717 }, { "epoch": 0.09799372185068923, "grad_norm": 0.18059571087360382, "learning_rate": 8e-05, "loss": 1.638, "step": 718 }, { "epoch": 0.09813020335744507, "grad_norm": 0.18142476677894592, "learning_rate": 8e-05, "loss": 1.6408, "step": 719 }, { "epoch": 0.0982666848642009, "grad_norm": 0.18520772457122803, "learning_rate": 8e-05, "loss": 1.6612, "step": 720 }, { "epoch": 0.09840316637095674, "grad_norm": 0.18081459403038025, "learning_rate": 8e-05, "loss": 1.6467, "step": 721 }, { "epoch": 0.09853964787771256, "grad_norm": 0.18610171973705292, "learning_rate": 8e-05, "loss": 1.7697, "step": 722 }, { "epoch": 0.0986761293844684, "grad_norm": 0.17639221251010895, "learning_rate": 8e-05, "loss": 1.6672, "step": 723 }, { "epoch": 0.09881261089122424, "grad_norm": 0.17549027502536774, "learning_rate": 8e-05, "loss": 1.6471, "step": 724 }, { "epoch": 0.09894909239798007, "grad_norm": 0.18425285816192627, "learning_rate": 8e-05, "loss": 1.6699, "step": 725 }, { "epoch": 0.09908557390473591, "grad_norm": 0.17593851685523987, "learning_rate": 8e-05, "loss": 1.5275, "step": 726 }, { "epoch": 0.09922205541149175, "grad_norm": 0.18171188235282898, "learning_rate": 8e-05, "loss": 1.586, "step": 727 }, { "epoch": 0.09935853691824757, "grad_norm": 0.18206368386745453, "learning_rate": 8e-05, "loss": 1.593, "step": 728 }, { "epoch": 0.09949501842500341, "grad_norm": 0.18444791436195374, "learning_rate": 8e-05, "loss": 1.615, "step": 729 }, { "epoch": 0.09963149993175925, "grad_norm": 0.19138328731060028, "learning_rate": 8e-05, "loss": 1.5999, "step": 730 }, { "epoch": 0.09976798143851508, "grad_norm": 0.1718161404132843, "learning_rate": 8e-05, "loss": 1.5442, "step": 731 }, { "epoch": 0.09990446294527092, "grad_norm": 0.18484805524349213, "learning_rate": 8e-05, "loss": 1.64, "step": 732 }, { "epoch": 0.10004094445202676, "grad_norm": 0.18484321236610413, "learning_rate": 8e-05, "loss": 1.5446, "step": 733 }, { "epoch": 0.10017742595878258, "grad_norm": 0.1844317615032196, "learning_rate": 8e-05, "loss": 1.6799, "step": 734 }, { "epoch": 0.10031390746553842, "grad_norm": 0.17879654467105865, "learning_rate": 8e-05, "loss": 1.6757, "step": 735 }, { "epoch": 0.10045038897229426, "grad_norm": 0.1806444376707077, "learning_rate": 8e-05, "loss": 1.6026, "step": 736 }, { "epoch": 0.10058687047905009, "grad_norm": 0.17929135262966156, "learning_rate": 8e-05, "loss": 1.6354, "step": 737 }, { "epoch": 0.10072335198580593, "grad_norm": 0.1743006408214569, "learning_rate": 8e-05, "loss": 1.5847, "step": 738 }, { "epoch": 0.10085983349256175, "grad_norm": 0.18439467251300812, "learning_rate": 8e-05, "loss": 1.6701, "step": 739 }, { "epoch": 0.10099631499931759, "grad_norm": 0.18599484860897064, "learning_rate": 8e-05, "loss": 1.6073, "step": 740 }, { "epoch": 0.10113279650607343, "grad_norm": 0.1775076687335968, "learning_rate": 8e-05, "loss": 1.5868, "step": 741 }, { "epoch": 0.10126927801282926, "grad_norm": 0.18221992254257202, "learning_rate": 8e-05, "loss": 1.5985, "step": 742 }, { "epoch": 0.1014057595195851, "grad_norm": 0.19071851670742035, "learning_rate": 8e-05, "loss": 1.6711, "step": 743 }, { "epoch": 0.10154224102634093, "grad_norm": 0.1738080382347107, "learning_rate": 8e-05, "loss": 1.5843, "step": 744 }, { "epoch": 0.10167872253309676, "grad_norm": 0.18030865490436554, "learning_rate": 8e-05, "loss": 1.6202, "step": 745 }, { "epoch": 0.1018152040398526, "grad_norm": 0.18551528453826904, "learning_rate": 8e-05, "loss": 1.5975, "step": 746 }, { "epoch": 0.10195168554660844, "grad_norm": 0.1839742213487625, "learning_rate": 8e-05, "loss": 1.7085, "step": 747 }, { "epoch": 0.10208816705336426, "grad_norm": 0.16842563450336456, "learning_rate": 8e-05, "loss": 1.5614, "step": 748 }, { "epoch": 0.1022246485601201, "grad_norm": 0.18717491626739502, "learning_rate": 8e-05, "loss": 1.6272, "step": 749 }, { "epoch": 0.10236113006687594, "grad_norm": 0.1846681535243988, "learning_rate": 8e-05, "loss": 1.6348, "step": 750 }, { "epoch": 0.10249761157363177, "grad_norm": 0.19289425015449524, "learning_rate": 8e-05, "loss": 1.6517, "step": 751 }, { "epoch": 0.10263409308038761, "grad_norm": 0.1740955263376236, "learning_rate": 8e-05, "loss": 1.5315, "step": 752 }, { "epoch": 0.10277057458714345, "grad_norm": 0.18069280683994293, "learning_rate": 8e-05, "loss": 1.6629, "step": 753 }, { "epoch": 0.10290705609389927, "grad_norm": 0.17482346296310425, "learning_rate": 8e-05, "loss": 1.6385, "step": 754 }, { "epoch": 0.10304353760065511, "grad_norm": 0.1850505769252777, "learning_rate": 8e-05, "loss": 1.6082, "step": 755 }, { "epoch": 0.10318001910741094, "grad_norm": 0.17505916953086853, "learning_rate": 8e-05, "loss": 1.6234, "step": 756 }, { "epoch": 0.10331650061416678, "grad_norm": 0.17290765047073364, "learning_rate": 8e-05, "loss": 1.6003, "step": 757 }, { "epoch": 0.10345298212092262, "grad_norm": 0.17706461250782013, "learning_rate": 8e-05, "loss": 1.5897, "step": 758 }, { "epoch": 0.10358946362767844, "grad_norm": 0.18021456897258759, "learning_rate": 8e-05, "loss": 1.5928, "step": 759 }, { "epoch": 0.10372594513443428, "grad_norm": 0.176732137799263, "learning_rate": 8e-05, "loss": 1.605, "step": 760 }, { "epoch": 0.10386242664119012, "grad_norm": 0.17645902931690216, "learning_rate": 8e-05, "loss": 1.6628, "step": 761 }, { "epoch": 0.10399890814794595, "grad_norm": 0.17248065769672394, "learning_rate": 8e-05, "loss": 1.6413, "step": 762 }, { "epoch": 0.10413538965470179, "grad_norm": 0.18161921203136444, "learning_rate": 8e-05, "loss": 1.6437, "step": 763 }, { "epoch": 0.10427187116145763, "grad_norm": 0.1725803017616272, "learning_rate": 8e-05, "loss": 1.6634, "step": 764 }, { "epoch": 0.10440835266821345, "grad_norm": 0.18166513741016388, "learning_rate": 8e-05, "loss": 1.6684, "step": 765 }, { "epoch": 0.10454483417496929, "grad_norm": 0.17848937213420868, "learning_rate": 8e-05, "loss": 1.6001, "step": 766 }, { "epoch": 0.10468131568172513, "grad_norm": 0.1779319941997528, "learning_rate": 8e-05, "loss": 1.6762, "step": 767 }, { "epoch": 0.10481779718848096, "grad_norm": 0.17400763928890228, "learning_rate": 8e-05, "loss": 1.5908, "step": 768 }, { "epoch": 0.1049542786952368, "grad_norm": 0.1692686676979065, "learning_rate": 8e-05, "loss": 1.5829, "step": 769 }, { "epoch": 0.10509076020199264, "grad_norm": 0.18350636959075928, "learning_rate": 8e-05, "loss": 1.6285, "step": 770 }, { "epoch": 0.10522724170874846, "grad_norm": 0.1727180778980255, "learning_rate": 8e-05, "loss": 1.6248, "step": 771 }, { "epoch": 0.1053637232155043, "grad_norm": 0.19921256601810455, "learning_rate": 8e-05, "loss": 1.5664, "step": 772 }, { "epoch": 0.10550020472226013, "grad_norm": 0.1811063140630722, "learning_rate": 8e-05, "loss": 1.6635, "step": 773 }, { "epoch": 0.10563668622901597, "grad_norm": 0.171713188290596, "learning_rate": 8e-05, "loss": 1.6085, "step": 774 }, { "epoch": 0.1057731677357718, "grad_norm": 0.17383509874343872, "learning_rate": 8e-05, "loss": 1.6016, "step": 775 }, { "epoch": 0.10590964924252763, "grad_norm": 0.1733373999595642, "learning_rate": 8e-05, "loss": 1.5695, "step": 776 }, { "epoch": 0.10604613074928347, "grad_norm": 0.1787233203649521, "learning_rate": 8e-05, "loss": 1.6467, "step": 777 }, { "epoch": 0.10618261225603931, "grad_norm": 0.17274242639541626, "learning_rate": 8e-05, "loss": 1.5419, "step": 778 }, { "epoch": 0.10631909376279514, "grad_norm": 0.17590732872486115, "learning_rate": 8e-05, "loss": 1.6402, "step": 779 }, { "epoch": 0.10645557526955098, "grad_norm": 0.1764359325170517, "learning_rate": 8e-05, "loss": 1.5906, "step": 780 }, { "epoch": 0.10659205677630681, "grad_norm": 0.1733657717704773, "learning_rate": 8e-05, "loss": 1.6303, "step": 781 }, { "epoch": 0.10672853828306264, "grad_norm": 0.183291494846344, "learning_rate": 8e-05, "loss": 1.6155, "step": 782 }, { "epoch": 0.10686501978981848, "grad_norm": 0.1736065298318863, "learning_rate": 8e-05, "loss": 1.5983, "step": 783 }, { "epoch": 0.10700150129657432, "grad_norm": 0.17475703358650208, "learning_rate": 8e-05, "loss": 1.6154, "step": 784 }, { "epoch": 0.10713798280333015, "grad_norm": 0.17683954536914825, "learning_rate": 8e-05, "loss": 1.5617, "step": 785 }, { "epoch": 0.10727446431008598, "grad_norm": 0.17981061339378357, "learning_rate": 8e-05, "loss": 1.6209, "step": 786 }, { "epoch": 0.10741094581684182, "grad_norm": 0.17566846311092377, "learning_rate": 8e-05, "loss": 1.6507, "step": 787 }, { "epoch": 0.10754742732359765, "grad_norm": 0.18363140523433685, "learning_rate": 8e-05, "loss": 1.5931, "step": 788 }, { "epoch": 0.10768390883035349, "grad_norm": 0.18192534148693085, "learning_rate": 8e-05, "loss": 1.6672, "step": 789 }, { "epoch": 0.10782039033710931, "grad_norm": 0.17498360574245453, "learning_rate": 8e-05, "loss": 1.579, "step": 790 }, { "epoch": 0.10795687184386515, "grad_norm": 0.17999783158302307, "learning_rate": 8e-05, "loss": 1.6491, "step": 791 }, { "epoch": 0.108093353350621, "grad_norm": 0.1754375696182251, "learning_rate": 8e-05, "loss": 1.6647, "step": 792 }, { "epoch": 0.10822983485737682, "grad_norm": 0.17585621774196625, "learning_rate": 8e-05, "loss": 1.6693, "step": 793 }, { "epoch": 0.10836631636413266, "grad_norm": 0.16878916323184967, "learning_rate": 8e-05, "loss": 1.5616, "step": 794 }, { "epoch": 0.1085027978708885, "grad_norm": 0.18098954856395721, "learning_rate": 8e-05, "loss": 1.6234, "step": 795 }, { "epoch": 0.10863927937764432, "grad_norm": 0.17510724067687988, "learning_rate": 8e-05, "loss": 1.6338, "step": 796 }, { "epoch": 0.10877576088440016, "grad_norm": 0.17434807121753693, "learning_rate": 8e-05, "loss": 1.6585, "step": 797 }, { "epoch": 0.108912242391156, "grad_norm": 0.1847684234380722, "learning_rate": 8e-05, "loss": 1.6667, "step": 798 }, { "epoch": 0.10904872389791183, "grad_norm": 0.17530272901058197, "learning_rate": 8e-05, "loss": 1.5606, "step": 799 }, { "epoch": 0.10918520540466767, "grad_norm": 0.18760628998279572, "learning_rate": 8e-05, "loss": 1.6785, "step": 800 }, { "epoch": 0.10932168691142351, "grad_norm": 0.18097880482673645, "learning_rate": 8e-05, "loss": 1.6786, "step": 801 }, { "epoch": 0.10945816841817933, "grad_norm": 0.18548883497714996, "learning_rate": 8e-05, "loss": 1.6102, "step": 802 }, { "epoch": 0.10959464992493517, "grad_norm": 0.18543604016304016, "learning_rate": 8e-05, "loss": 1.6156, "step": 803 }, { "epoch": 0.10973113143169101, "grad_norm": 0.17286135256290436, "learning_rate": 8e-05, "loss": 1.5581, "step": 804 }, { "epoch": 0.10986761293844684, "grad_norm": 0.1733119934797287, "learning_rate": 8e-05, "loss": 1.644, "step": 805 }, { "epoch": 0.11000409444520268, "grad_norm": 0.17915892601013184, "learning_rate": 8e-05, "loss": 1.5881, "step": 806 }, { "epoch": 0.11014057595195852, "grad_norm": 0.1865999549627304, "learning_rate": 8e-05, "loss": 1.5852, "step": 807 }, { "epoch": 0.11027705745871434, "grad_norm": 0.18712003529071808, "learning_rate": 8e-05, "loss": 1.6569, "step": 808 }, { "epoch": 0.11041353896547018, "grad_norm": 0.18837514519691467, "learning_rate": 8e-05, "loss": 1.6171, "step": 809 }, { "epoch": 0.11055002047222601, "grad_norm": 0.19650830328464508, "learning_rate": 8e-05, "loss": 1.6427, "step": 810 }, { "epoch": 0.11068650197898185, "grad_norm": 0.1765844076871872, "learning_rate": 8e-05, "loss": 1.648, "step": 811 }, { "epoch": 0.11082298348573769, "grad_norm": 0.19629192352294922, "learning_rate": 8e-05, "loss": 1.6166, "step": 812 }, { "epoch": 0.11095946499249351, "grad_norm": 0.17397604882717133, "learning_rate": 8e-05, "loss": 1.6121, "step": 813 }, { "epoch": 0.11109594649924935, "grad_norm": 0.18724505603313446, "learning_rate": 8e-05, "loss": 1.5744, "step": 814 }, { "epoch": 0.11123242800600519, "grad_norm": 0.1819848269224167, "learning_rate": 8e-05, "loss": 1.5901, "step": 815 }, { "epoch": 0.11136890951276102, "grad_norm": 0.17821615934371948, "learning_rate": 8e-05, "loss": 1.562, "step": 816 }, { "epoch": 0.11150539101951686, "grad_norm": 0.1906854659318924, "learning_rate": 8e-05, "loss": 1.6737, "step": 817 }, { "epoch": 0.1116418725262727, "grad_norm": 0.17341342568397522, "learning_rate": 8e-05, "loss": 1.613, "step": 818 }, { "epoch": 0.11177835403302852, "grad_norm": 0.1838037222623825, "learning_rate": 8e-05, "loss": 1.6355, "step": 819 }, { "epoch": 0.11191483553978436, "grad_norm": 0.17821507155895233, "learning_rate": 8e-05, "loss": 1.608, "step": 820 }, { "epoch": 0.1120513170465402, "grad_norm": 0.17655165493488312, "learning_rate": 8e-05, "loss": 1.6209, "step": 821 }, { "epoch": 0.11218779855329603, "grad_norm": 0.19318488240242004, "learning_rate": 8e-05, "loss": 1.7248, "step": 822 }, { "epoch": 0.11232428006005186, "grad_norm": 0.19512103497982025, "learning_rate": 8e-05, "loss": 1.6075, "step": 823 }, { "epoch": 0.1124607615668077, "grad_norm": 0.17627279460430145, "learning_rate": 8e-05, "loss": 1.5919, "step": 824 }, { "epoch": 0.11259724307356353, "grad_norm": 0.17704002559185028, "learning_rate": 8e-05, "loss": 1.6296, "step": 825 }, { "epoch": 0.11273372458031937, "grad_norm": 0.1744222790002823, "learning_rate": 8e-05, "loss": 1.5609, "step": 826 }, { "epoch": 0.1128702060870752, "grad_norm": 0.18518079817295074, "learning_rate": 8e-05, "loss": 1.5868, "step": 827 }, { "epoch": 0.11300668759383103, "grad_norm": 0.1840139776468277, "learning_rate": 8e-05, "loss": 1.6527, "step": 828 }, { "epoch": 0.11314316910058687, "grad_norm": 0.18673016130924225, "learning_rate": 8e-05, "loss": 1.6941, "step": 829 }, { "epoch": 0.1132796506073427, "grad_norm": 0.1809215247631073, "learning_rate": 8e-05, "loss": 1.6359, "step": 830 }, { "epoch": 0.11341613211409854, "grad_norm": 0.17370539903640747, "learning_rate": 8e-05, "loss": 1.6307, "step": 831 }, { "epoch": 0.11355261362085438, "grad_norm": 0.1796826869249344, "learning_rate": 8e-05, "loss": 1.6541, "step": 832 }, { "epoch": 0.1136890951276102, "grad_norm": 0.18027593195438385, "learning_rate": 8e-05, "loss": 1.5867, "step": 833 }, { "epoch": 0.11382557663436604, "grad_norm": 0.17990680038928986, "learning_rate": 8e-05, "loss": 1.6446, "step": 834 }, { "epoch": 0.11396205814112188, "grad_norm": 0.1757950484752655, "learning_rate": 8e-05, "loss": 1.6375, "step": 835 }, { "epoch": 0.11409853964787771, "grad_norm": 0.17497842013835907, "learning_rate": 8e-05, "loss": 1.6421, "step": 836 }, { "epoch": 0.11423502115463355, "grad_norm": 0.17865315079689026, "learning_rate": 8e-05, "loss": 1.6132, "step": 837 }, { "epoch": 0.11437150266138939, "grad_norm": 0.171797513961792, "learning_rate": 8e-05, "loss": 1.5798, "step": 838 }, { "epoch": 0.11450798416814521, "grad_norm": 0.18378469347953796, "learning_rate": 8e-05, "loss": 1.6889, "step": 839 }, { "epoch": 0.11464446567490105, "grad_norm": 0.17329350113868713, "learning_rate": 8e-05, "loss": 1.6126, "step": 840 }, { "epoch": 0.11478094718165689, "grad_norm": 0.17543116211891174, "learning_rate": 8e-05, "loss": 1.6117, "step": 841 }, { "epoch": 0.11491742868841272, "grad_norm": 0.17282266914844513, "learning_rate": 8e-05, "loss": 1.5835, "step": 842 }, { "epoch": 0.11505391019516856, "grad_norm": 0.1784832775592804, "learning_rate": 8e-05, "loss": 1.6578, "step": 843 }, { "epoch": 0.11519039170192438, "grad_norm": 0.17148315906524658, "learning_rate": 8e-05, "loss": 1.5482, "step": 844 }, { "epoch": 0.11532687320868022, "grad_norm": 0.17613743245601654, "learning_rate": 8e-05, "loss": 1.627, "step": 845 }, { "epoch": 0.11546335471543606, "grad_norm": 0.18533572554588318, "learning_rate": 8e-05, "loss": 1.7499, "step": 846 }, { "epoch": 0.11559983622219189, "grad_norm": 0.1787489503622055, "learning_rate": 8e-05, "loss": 1.6186, "step": 847 }, { "epoch": 0.11573631772894773, "grad_norm": 0.1767359972000122, "learning_rate": 8e-05, "loss": 1.6634, "step": 848 }, { "epoch": 0.11587279923570357, "grad_norm": 0.1792907565832138, "learning_rate": 8e-05, "loss": 1.6119, "step": 849 }, { "epoch": 0.11600928074245939, "grad_norm": 0.18009668588638306, "learning_rate": 8e-05, "loss": 1.5623, "step": 850 }, { "epoch": 0.11614576224921523, "grad_norm": 0.18194961547851562, "learning_rate": 8e-05, "loss": 1.6383, "step": 851 }, { "epoch": 0.11628224375597107, "grad_norm": 0.18576505780220032, "learning_rate": 8e-05, "loss": 1.6717, "step": 852 }, { "epoch": 0.1164187252627269, "grad_norm": 0.18106262385845184, "learning_rate": 8e-05, "loss": 1.7024, "step": 853 }, { "epoch": 0.11655520676948274, "grad_norm": 0.17341932654380798, "learning_rate": 8e-05, "loss": 1.6119, "step": 854 }, { "epoch": 0.11669168827623858, "grad_norm": 0.183492511510849, "learning_rate": 8e-05, "loss": 1.6422, "step": 855 }, { "epoch": 0.1168281697829944, "grad_norm": 0.17791801691055298, "learning_rate": 8e-05, "loss": 1.5885, "step": 856 }, { "epoch": 0.11696465128975024, "grad_norm": 0.184437096118927, "learning_rate": 8e-05, "loss": 1.6706, "step": 857 }, { "epoch": 0.11710113279650608, "grad_norm": 0.17684046924114227, "learning_rate": 8e-05, "loss": 1.6077, "step": 858 }, { "epoch": 0.1172376143032619, "grad_norm": 0.1801089197397232, "learning_rate": 8e-05, "loss": 1.6257, "step": 859 }, { "epoch": 0.11737409581001775, "grad_norm": 0.1937098205089569, "learning_rate": 8e-05, "loss": 1.5849, "step": 860 }, { "epoch": 0.11751057731677357, "grad_norm": 0.18046562373638153, "learning_rate": 8e-05, "loss": 1.5483, "step": 861 }, { "epoch": 0.11764705882352941, "grad_norm": 0.17740978300571442, "learning_rate": 8e-05, "loss": 1.6336, "step": 862 }, { "epoch": 0.11778354033028525, "grad_norm": 0.1771164983510971, "learning_rate": 8e-05, "loss": 1.5904, "step": 863 }, { "epoch": 0.11792002183704108, "grad_norm": 0.17637817561626434, "learning_rate": 8e-05, "loss": 1.5997, "step": 864 }, { "epoch": 0.11805650334379691, "grad_norm": 0.17993707954883575, "learning_rate": 8e-05, "loss": 1.6397, "step": 865 }, { "epoch": 0.11819298485055275, "grad_norm": 0.18234601616859436, "learning_rate": 8e-05, "loss": 1.6226, "step": 866 }, { "epoch": 0.11832946635730858, "grad_norm": 0.18139328062534332, "learning_rate": 8e-05, "loss": 1.677, "step": 867 }, { "epoch": 0.11846594786406442, "grad_norm": 0.17290136218070984, "learning_rate": 8e-05, "loss": 1.5314, "step": 868 }, { "epoch": 0.11860242937082026, "grad_norm": 0.1816997528076172, "learning_rate": 8e-05, "loss": 1.6601, "step": 869 }, { "epoch": 0.11873891087757608, "grad_norm": 0.18639543652534485, "learning_rate": 8e-05, "loss": 1.6538, "step": 870 }, { "epoch": 0.11887539238433192, "grad_norm": 0.17828713357448578, "learning_rate": 8e-05, "loss": 1.6684, "step": 871 }, { "epoch": 0.11901187389108776, "grad_norm": 0.1788337528705597, "learning_rate": 8e-05, "loss": 1.6158, "step": 872 }, { "epoch": 0.11914835539784359, "grad_norm": 0.17844010889530182, "learning_rate": 8e-05, "loss": 1.5794, "step": 873 }, { "epoch": 0.11928483690459943, "grad_norm": 0.1788933128118515, "learning_rate": 8e-05, "loss": 1.6747, "step": 874 }, { "epoch": 0.11942131841135527, "grad_norm": 0.18061116337776184, "learning_rate": 8e-05, "loss": 1.6569, "step": 875 }, { "epoch": 0.1195577999181111, "grad_norm": 0.1775013953447342, "learning_rate": 8e-05, "loss": 1.5957, "step": 876 }, { "epoch": 0.11969428142486693, "grad_norm": 0.17435961961746216, "learning_rate": 8e-05, "loss": 1.5344, "step": 877 }, { "epoch": 0.11983076293162276, "grad_norm": 0.17335893213748932, "learning_rate": 8e-05, "loss": 1.471, "step": 878 }, { "epoch": 0.1199672444383786, "grad_norm": 0.20797888934612274, "learning_rate": 8e-05, "loss": 1.5969, "step": 879 }, { "epoch": 0.12010372594513444, "grad_norm": 0.18705587089061737, "learning_rate": 8e-05, "loss": 1.5795, "step": 880 }, { "epoch": 0.12024020745189026, "grad_norm": 0.20102541148662567, "learning_rate": 8e-05, "loss": 1.6551, "step": 881 }, { "epoch": 0.1203766889586461, "grad_norm": 0.18595711886882782, "learning_rate": 8e-05, "loss": 1.5661, "step": 882 }, { "epoch": 0.12051317046540194, "grad_norm": 0.18247805535793304, "learning_rate": 8e-05, "loss": 1.6687, "step": 883 }, { "epoch": 0.12064965197215777, "grad_norm": 0.18416698276996613, "learning_rate": 8e-05, "loss": 1.6469, "step": 884 }, { "epoch": 0.12078613347891361, "grad_norm": 0.17978128790855408, "learning_rate": 8e-05, "loss": 1.5843, "step": 885 }, { "epoch": 0.12092261498566945, "grad_norm": 0.18981236219406128, "learning_rate": 8e-05, "loss": 1.6247, "step": 886 }, { "epoch": 0.12105909649242527, "grad_norm": 0.17937511205673218, "learning_rate": 8e-05, "loss": 1.5445, "step": 887 }, { "epoch": 0.12119557799918111, "grad_norm": 0.18684163689613342, "learning_rate": 8e-05, "loss": 1.6342, "step": 888 }, { "epoch": 0.12133205950593695, "grad_norm": 0.17635121941566467, "learning_rate": 8e-05, "loss": 1.5577, "step": 889 }, { "epoch": 0.12146854101269278, "grad_norm": 0.1840089112520218, "learning_rate": 8e-05, "loss": 1.6777, "step": 890 }, { "epoch": 0.12160502251944862, "grad_norm": 0.1795346438884735, "learning_rate": 8e-05, "loss": 1.6696, "step": 891 }, { "epoch": 0.12174150402620446, "grad_norm": 0.176628977060318, "learning_rate": 8e-05, "loss": 1.6765, "step": 892 }, { "epoch": 0.12187798553296028, "grad_norm": 0.1766405552625656, "learning_rate": 8e-05, "loss": 1.522, "step": 893 }, { "epoch": 0.12201446703971612, "grad_norm": 0.1855059564113617, "learning_rate": 8e-05, "loss": 1.5793, "step": 894 }, { "epoch": 0.12215094854647195, "grad_norm": 0.16692188382148743, "learning_rate": 8e-05, "loss": 1.5426, "step": 895 }, { "epoch": 0.12228743005322779, "grad_norm": 0.18360888957977295, "learning_rate": 8e-05, "loss": 1.656, "step": 896 }, { "epoch": 0.12242391155998363, "grad_norm": 0.19752137362957, "learning_rate": 8e-05, "loss": 1.6744, "step": 897 }, { "epoch": 0.12256039306673945, "grad_norm": 0.17349641025066376, "learning_rate": 8e-05, "loss": 1.5743, "step": 898 }, { "epoch": 0.12269687457349529, "grad_norm": 0.1835305094718933, "learning_rate": 8e-05, "loss": 1.6503, "step": 899 }, { "epoch": 0.12283335608025113, "grad_norm": 0.17954355478286743, "learning_rate": 8e-05, "loss": 1.6242, "step": 900 }, { "epoch": 0.12296983758700696, "grad_norm": 0.17774495482444763, "learning_rate": 8e-05, "loss": 1.569, "step": 901 }, { "epoch": 0.1231063190937628, "grad_norm": 0.18133053183555603, "learning_rate": 8e-05, "loss": 1.6259, "step": 902 }, { "epoch": 0.12324280060051863, "grad_norm": 0.1818680465221405, "learning_rate": 8e-05, "loss": 1.6475, "step": 903 }, { "epoch": 0.12337928210727446, "grad_norm": 0.17560385167598724, "learning_rate": 8e-05, "loss": 1.5604, "step": 904 }, { "epoch": 0.1235157636140303, "grad_norm": 0.17599783837795258, "learning_rate": 8e-05, "loss": 1.556, "step": 905 }, { "epoch": 0.12365224512078614, "grad_norm": 0.18171408772468567, "learning_rate": 8e-05, "loss": 1.6056, "step": 906 }, { "epoch": 0.12378872662754196, "grad_norm": 0.190260112285614, "learning_rate": 8e-05, "loss": 1.6413, "step": 907 }, { "epoch": 0.1239252081342978, "grad_norm": 0.18249057233333588, "learning_rate": 8e-05, "loss": 1.6704, "step": 908 }, { "epoch": 0.12406168964105364, "grad_norm": 0.1811327487230301, "learning_rate": 8e-05, "loss": 1.5384, "step": 909 }, { "epoch": 0.12419817114780947, "grad_norm": 0.18575896322727203, "learning_rate": 8e-05, "loss": 1.6392, "step": 910 }, { "epoch": 0.12433465265456531, "grad_norm": 0.20230241119861603, "learning_rate": 8e-05, "loss": 1.615, "step": 911 }, { "epoch": 0.12447113416132113, "grad_norm": 0.1710241734981537, "learning_rate": 8e-05, "loss": 1.4948, "step": 912 }, { "epoch": 0.12460761566807697, "grad_norm": 0.21191109716892242, "learning_rate": 8e-05, "loss": 1.6232, "step": 913 }, { "epoch": 0.12474409717483281, "grad_norm": 0.1764707863330841, "learning_rate": 8e-05, "loss": 1.5538, "step": 914 }, { "epoch": 0.12488057868158864, "grad_norm": 0.1901218146085739, "learning_rate": 8e-05, "loss": 1.6123, "step": 915 }, { "epoch": 0.1250170601883445, "grad_norm": 0.17722094058990479, "learning_rate": 8e-05, "loss": 1.6695, "step": 916 }, { "epoch": 0.12515354169510032, "grad_norm": 0.1798778921365738, "learning_rate": 8e-05, "loss": 1.6525, "step": 917 }, { "epoch": 0.12529002320185614, "grad_norm": 0.17167997360229492, "learning_rate": 8e-05, "loss": 1.5566, "step": 918 }, { "epoch": 0.125426504708612, "grad_norm": 0.17248013615608215, "learning_rate": 8e-05, "loss": 1.5774, "step": 919 }, { "epoch": 0.12556298621536782, "grad_norm": 0.194889098405838, "learning_rate": 8e-05, "loss": 1.6303, "step": 920 }, { "epoch": 0.12569946772212365, "grad_norm": 0.17680509388446808, "learning_rate": 8e-05, "loss": 1.6224, "step": 921 }, { "epoch": 0.12583594922887947, "grad_norm": 0.18244650959968567, "learning_rate": 8e-05, "loss": 1.6834, "step": 922 }, { "epoch": 0.12597243073563533, "grad_norm": 0.17713268101215363, "learning_rate": 8e-05, "loss": 1.6018, "step": 923 }, { "epoch": 0.12610891224239115, "grad_norm": 0.17911219596862793, "learning_rate": 8e-05, "loss": 1.6386, "step": 924 }, { "epoch": 0.12624539374914698, "grad_norm": 0.17873425781726837, "learning_rate": 8e-05, "loss": 1.5517, "step": 925 }, { "epoch": 0.12638187525590283, "grad_norm": 0.1766316145658493, "learning_rate": 8e-05, "loss": 1.6051, "step": 926 }, { "epoch": 0.12651835676265866, "grad_norm": 0.17669962346553802, "learning_rate": 8e-05, "loss": 1.619, "step": 927 }, { "epoch": 0.12665483826941448, "grad_norm": 0.18439628183841705, "learning_rate": 8e-05, "loss": 1.6189, "step": 928 }, { "epoch": 0.12679131977617034, "grad_norm": 0.1775929480791092, "learning_rate": 8e-05, "loss": 1.5436, "step": 929 }, { "epoch": 0.12692780128292616, "grad_norm": 0.18298010528087616, "learning_rate": 8e-05, "loss": 1.5582, "step": 930 }, { "epoch": 0.127064282789682, "grad_norm": 0.1873892992734909, "learning_rate": 8e-05, "loss": 1.619, "step": 931 }, { "epoch": 0.12720076429643784, "grad_norm": 0.17163720726966858, "learning_rate": 8e-05, "loss": 1.5191, "step": 932 }, { "epoch": 0.12733724580319367, "grad_norm": 0.18349236249923706, "learning_rate": 8e-05, "loss": 1.5994, "step": 933 }, { "epoch": 0.1274737273099495, "grad_norm": 0.1902233362197876, "learning_rate": 8e-05, "loss": 1.6826, "step": 934 }, { "epoch": 0.12761020881670534, "grad_norm": 0.1762106865644455, "learning_rate": 8e-05, "loss": 1.5892, "step": 935 }, { "epoch": 0.12774669032346117, "grad_norm": 0.18263155221939087, "learning_rate": 8e-05, "loss": 1.6569, "step": 936 }, { "epoch": 0.127883171830217, "grad_norm": 0.1742410808801651, "learning_rate": 8e-05, "loss": 1.634, "step": 937 }, { "epoch": 0.12801965333697285, "grad_norm": 0.1760226935148239, "learning_rate": 8e-05, "loss": 1.5318, "step": 938 }, { "epoch": 0.12815613484372868, "grad_norm": 0.17952662706375122, "learning_rate": 8e-05, "loss": 1.5442, "step": 939 }, { "epoch": 0.1282926163504845, "grad_norm": 0.1722107231616974, "learning_rate": 8e-05, "loss": 1.6329, "step": 940 }, { "epoch": 0.12842909785724035, "grad_norm": 0.19019272923469543, "learning_rate": 8e-05, "loss": 1.6392, "step": 941 }, { "epoch": 0.12856557936399618, "grad_norm": 0.1751847267150879, "learning_rate": 8e-05, "loss": 1.6177, "step": 942 }, { "epoch": 0.128702060870752, "grad_norm": 0.17907164990901947, "learning_rate": 8e-05, "loss": 1.5831, "step": 943 }, { "epoch": 0.12883854237750786, "grad_norm": 0.18604299426078796, "learning_rate": 8e-05, "loss": 1.5701, "step": 944 }, { "epoch": 0.12897502388426368, "grad_norm": 0.18538393080234528, "learning_rate": 8e-05, "loss": 1.6417, "step": 945 }, { "epoch": 0.1291115053910195, "grad_norm": 0.19442118704319, "learning_rate": 8e-05, "loss": 1.6245, "step": 946 }, { "epoch": 0.12924798689777536, "grad_norm": 0.17790429294109344, "learning_rate": 8e-05, "loss": 1.5531, "step": 947 }, { "epoch": 0.1293844684045312, "grad_norm": 0.2003907859325409, "learning_rate": 8e-05, "loss": 1.6179, "step": 948 }, { "epoch": 0.12952094991128701, "grad_norm": 0.17197178304195404, "learning_rate": 8e-05, "loss": 1.5862, "step": 949 }, { "epoch": 0.12965743141804287, "grad_norm": 0.17345573008060455, "learning_rate": 8e-05, "loss": 1.5621, "step": 950 }, { "epoch": 0.1297939129247987, "grad_norm": 0.18658950924873352, "learning_rate": 8e-05, "loss": 1.6306, "step": 951 }, { "epoch": 0.12993039443155452, "grad_norm": 0.18130122125148773, "learning_rate": 8e-05, "loss": 1.6065, "step": 952 }, { "epoch": 0.13006687593831037, "grad_norm": 0.19098636507987976, "learning_rate": 8e-05, "loss": 1.642, "step": 953 }, { "epoch": 0.1302033574450662, "grad_norm": 0.187925323843956, "learning_rate": 8e-05, "loss": 1.6444, "step": 954 }, { "epoch": 0.13033983895182202, "grad_norm": 0.1866169571876526, "learning_rate": 8e-05, "loss": 1.6347, "step": 955 }, { "epoch": 0.13047632045857785, "grad_norm": 0.18686704337596893, "learning_rate": 8e-05, "loss": 1.6261, "step": 956 }, { "epoch": 0.1306128019653337, "grad_norm": 0.17064644396305084, "learning_rate": 8e-05, "loss": 1.6082, "step": 957 }, { "epoch": 0.13074928347208953, "grad_norm": 0.199337437748909, "learning_rate": 8e-05, "loss": 1.5889, "step": 958 }, { "epoch": 0.13088576497884535, "grad_norm": 0.17565563321113586, "learning_rate": 8e-05, "loss": 1.595, "step": 959 }, { "epoch": 0.1310222464856012, "grad_norm": 0.18454402685165405, "learning_rate": 8e-05, "loss": 1.5688, "step": 960 }, { "epoch": 0.13115872799235703, "grad_norm": 0.17383253574371338, "learning_rate": 8e-05, "loss": 1.5263, "step": 961 }, { "epoch": 0.13129520949911286, "grad_norm": 0.17539046704769135, "learning_rate": 8e-05, "loss": 1.533, "step": 962 }, { "epoch": 0.1314316910058687, "grad_norm": 0.18956957757472992, "learning_rate": 8e-05, "loss": 1.6146, "step": 963 }, { "epoch": 0.13156817251262454, "grad_norm": 0.1752173900604248, "learning_rate": 8e-05, "loss": 1.6185, "step": 964 }, { "epoch": 0.13170465401938036, "grad_norm": 0.17612940073013306, "learning_rate": 8e-05, "loss": 1.632, "step": 965 }, { "epoch": 0.13184113552613622, "grad_norm": 0.1937987357378006, "learning_rate": 8e-05, "loss": 1.6381, "step": 966 }, { "epoch": 0.13197761703289204, "grad_norm": 0.17771616578102112, "learning_rate": 8e-05, "loss": 1.606, "step": 967 }, { "epoch": 0.13211409853964787, "grad_norm": 0.17480716109275818, "learning_rate": 8e-05, "loss": 1.4751, "step": 968 }, { "epoch": 0.13225058004640372, "grad_norm": 0.17813922464847565, "learning_rate": 8e-05, "loss": 1.5524, "step": 969 }, { "epoch": 0.13238706155315955, "grad_norm": 0.18162702023983002, "learning_rate": 8e-05, "loss": 1.5679, "step": 970 }, { "epoch": 0.13252354305991537, "grad_norm": 0.18407806754112244, "learning_rate": 8e-05, "loss": 1.6155, "step": 971 }, { "epoch": 0.13266002456667123, "grad_norm": 0.18449129164218903, "learning_rate": 8e-05, "loss": 1.5685, "step": 972 }, { "epoch": 0.13279650607342705, "grad_norm": 0.20189370214939117, "learning_rate": 8e-05, "loss": 1.6208, "step": 973 }, { "epoch": 0.13293298758018288, "grad_norm": 0.17681576311588287, "learning_rate": 8e-05, "loss": 1.61, "step": 974 }, { "epoch": 0.13306946908693873, "grad_norm": 0.1731061041355133, "learning_rate": 8e-05, "loss": 1.5359, "step": 975 }, { "epoch": 0.13320595059369456, "grad_norm": 0.18673290312290192, "learning_rate": 8e-05, "loss": 1.6002, "step": 976 }, { "epoch": 0.13334243210045038, "grad_norm": 0.18171891570091248, "learning_rate": 8e-05, "loss": 1.6136, "step": 977 }, { "epoch": 0.13347891360720623, "grad_norm": 0.1831137239933014, "learning_rate": 8e-05, "loss": 1.608, "step": 978 }, { "epoch": 0.13361539511396206, "grad_norm": 0.17454639077186584, "learning_rate": 8e-05, "loss": 1.5923, "step": 979 }, { "epoch": 0.13375187662071789, "grad_norm": 0.18279410898685455, "learning_rate": 8e-05, "loss": 1.6134, "step": 980 }, { "epoch": 0.13388835812747374, "grad_norm": 0.17709580063819885, "learning_rate": 8e-05, "loss": 1.54, "step": 981 }, { "epoch": 0.13402483963422956, "grad_norm": 0.1750289499759674, "learning_rate": 8e-05, "loss": 1.5738, "step": 982 }, { "epoch": 0.1341613211409854, "grad_norm": 0.17871804535388947, "learning_rate": 8e-05, "loss": 1.6181, "step": 983 }, { "epoch": 0.13429780264774124, "grad_norm": 0.17914311587810516, "learning_rate": 8e-05, "loss": 1.6717, "step": 984 }, { "epoch": 0.13443428415449707, "grad_norm": 0.18388959765434265, "learning_rate": 8e-05, "loss": 1.6198, "step": 985 }, { "epoch": 0.1345707656612529, "grad_norm": 0.18308115005493164, "learning_rate": 8e-05, "loss": 1.6229, "step": 986 }, { "epoch": 0.13470724716800875, "grad_norm": 0.1827654242515564, "learning_rate": 8e-05, "loss": 1.661, "step": 987 }, { "epoch": 0.13484372867476457, "grad_norm": 0.17717143893241882, "learning_rate": 8e-05, "loss": 1.6604, "step": 988 }, { "epoch": 0.1349802101815204, "grad_norm": 0.1805904507637024, "learning_rate": 8e-05, "loss": 1.6351, "step": 989 }, { "epoch": 0.13511669168827622, "grad_norm": 0.17695428431034088, "learning_rate": 8e-05, "loss": 1.6024, "step": 990 }, { "epoch": 0.13525317319503208, "grad_norm": 0.17206792533397675, "learning_rate": 8e-05, "loss": 1.5984, "step": 991 }, { "epoch": 0.1353896547017879, "grad_norm": 0.1792447865009308, "learning_rate": 8e-05, "loss": 1.6037, "step": 992 }, { "epoch": 0.13552613620854373, "grad_norm": 0.18265284597873688, "learning_rate": 8e-05, "loss": 1.5422, "step": 993 }, { "epoch": 0.13566261771529958, "grad_norm": 0.17521649599075317, "learning_rate": 8e-05, "loss": 1.6209, "step": 994 }, { "epoch": 0.1357990992220554, "grad_norm": 0.1735079139471054, "learning_rate": 8e-05, "loss": 1.6155, "step": 995 }, { "epoch": 0.13593558072881123, "grad_norm": 0.18370352685451508, "learning_rate": 8e-05, "loss": 1.6391, "step": 996 }, { "epoch": 0.1360720622355671, "grad_norm": 0.18222841620445251, "learning_rate": 8e-05, "loss": 1.557, "step": 997 }, { "epoch": 0.1362085437423229, "grad_norm": 0.18838071823120117, "learning_rate": 8e-05, "loss": 1.6012, "step": 998 }, { "epoch": 0.13634502524907874, "grad_norm": 0.17374905943870544, "learning_rate": 8e-05, "loss": 1.4881, "step": 999 }, { "epoch": 0.1364815067558346, "grad_norm": 0.18086311221122742, "learning_rate": 8e-05, "loss": 1.5805, "step": 1000 }, { "epoch": 0.13661798826259042, "grad_norm": 0.19914259016513824, "learning_rate": 8e-05, "loss": 1.6356, "step": 1001 }, { "epoch": 0.13675446976934624, "grad_norm": 0.18456502258777618, "learning_rate": 8e-05, "loss": 1.687, "step": 1002 }, { "epoch": 0.1368909512761021, "grad_norm": 0.1976194530725479, "learning_rate": 8e-05, "loss": 1.6784, "step": 1003 }, { "epoch": 0.13702743278285792, "grad_norm": 0.1925603747367859, "learning_rate": 8e-05, "loss": 1.6308, "step": 1004 }, { "epoch": 0.13716391428961375, "grad_norm": 0.17244145274162292, "learning_rate": 8e-05, "loss": 1.4991, "step": 1005 }, { "epoch": 0.1373003957963696, "grad_norm": 0.19100700318813324, "learning_rate": 8e-05, "loss": 1.5489, "step": 1006 }, { "epoch": 0.13743687730312543, "grad_norm": 0.17784613370895386, "learning_rate": 8e-05, "loss": 1.654, "step": 1007 }, { "epoch": 0.13757335880988125, "grad_norm": 0.17631614208221436, "learning_rate": 8e-05, "loss": 1.5725, "step": 1008 }, { "epoch": 0.1377098403166371, "grad_norm": 0.18062086403369904, "learning_rate": 8e-05, "loss": 1.622, "step": 1009 }, { "epoch": 0.13784632182339293, "grad_norm": 0.16928671300411224, "learning_rate": 8e-05, "loss": 1.5581, "step": 1010 }, { "epoch": 0.13798280333014876, "grad_norm": 0.17381703853607178, "learning_rate": 8e-05, "loss": 1.5035, "step": 1011 }, { "epoch": 0.1381192848369046, "grad_norm": 0.17410334944725037, "learning_rate": 8e-05, "loss": 1.5857, "step": 1012 }, { "epoch": 0.13825576634366044, "grad_norm": 0.1883874237537384, "learning_rate": 8e-05, "loss": 1.6435, "step": 1013 }, { "epoch": 0.13839224785041626, "grad_norm": 0.1813386082649231, "learning_rate": 8e-05, "loss": 1.6014, "step": 1014 }, { "epoch": 0.13852872935717211, "grad_norm": 0.1805773824453354, "learning_rate": 8e-05, "loss": 1.6754, "step": 1015 }, { "epoch": 0.13866521086392794, "grad_norm": 0.17270179092884064, "learning_rate": 8e-05, "loss": 1.5897, "step": 1016 }, { "epoch": 0.13880169237068377, "grad_norm": 0.1726042479276657, "learning_rate": 8e-05, "loss": 1.5009, "step": 1017 }, { "epoch": 0.13893817387743962, "grad_norm": 0.17762930691242218, "learning_rate": 8e-05, "loss": 1.5906, "step": 1018 }, { "epoch": 0.13907465538419544, "grad_norm": 0.1860552728176117, "learning_rate": 8e-05, "loss": 1.6389, "step": 1019 }, { "epoch": 0.13921113689095127, "grad_norm": 0.1832042634487152, "learning_rate": 8e-05, "loss": 1.5953, "step": 1020 }, { "epoch": 0.13934761839770712, "grad_norm": 0.1861710250377655, "learning_rate": 8e-05, "loss": 1.6036, "step": 1021 }, { "epoch": 0.13948409990446295, "grad_norm": 0.1767587512731552, "learning_rate": 8e-05, "loss": 1.5687, "step": 1022 }, { "epoch": 0.13962058141121877, "grad_norm": 0.18185380101203918, "learning_rate": 8e-05, "loss": 1.6492, "step": 1023 }, { "epoch": 0.13975706291797463, "grad_norm": 0.1820162683725357, "learning_rate": 8e-05, "loss": 1.5917, "step": 1024 }, { "epoch": 0.13989354442473045, "grad_norm": 0.17909790575504303, "learning_rate": 8e-05, "loss": 1.5004, "step": 1025 }, { "epoch": 0.14003002593148628, "grad_norm": 0.1793469786643982, "learning_rate": 8e-05, "loss": 1.553, "step": 1026 }, { "epoch": 0.1401665074382421, "grad_norm": 0.18177662789821625, "learning_rate": 8e-05, "loss": 1.5983, "step": 1027 }, { "epoch": 0.14030298894499796, "grad_norm": 0.17323514819145203, "learning_rate": 8e-05, "loss": 1.5768, "step": 1028 }, { "epoch": 0.14043947045175378, "grad_norm": 0.18185874819755554, "learning_rate": 8e-05, "loss": 1.5988, "step": 1029 }, { "epoch": 0.1405759519585096, "grad_norm": 0.1708773374557495, "learning_rate": 8e-05, "loss": 1.5021, "step": 1030 }, { "epoch": 0.14071243346526546, "grad_norm": 0.18672265112400055, "learning_rate": 8e-05, "loss": 1.6696, "step": 1031 }, { "epoch": 0.1408489149720213, "grad_norm": 0.18373003602027893, "learning_rate": 8e-05, "loss": 1.6313, "step": 1032 }, { "epoch": 0.14098539647877711, "grad_norm": 0.18454372882843018, "learning_rate": 8e-05, "loss": 1.685, "step": 1033 }, { "epoch": 0.14112187798553297, "grad_norm": 0.1784476488828659, "learning_rate": 8e-05, "loss": 1.5652, "step": 1034 }, { "epoch": 0.1412583594922888, "grad_norm": 0.17644748091697693, "learning_rate": 8e-05, "loss": 1.5939, "step": 1035 }, { "epoch": 0.14139484099904462, "grad_norm": 0.1777205467224121, "learning_rate": 8e-05, "loss": 1.619, "step": 1036 }, { "epoch": 0.14153132250580047, "grad_norm": 0.18302205204963684, "learning_rate": 8e-05, "loss": 1.6365, "step": 1037 }, { "epoch": 0.1416678040125563, "grad_norm": 0.1734844446182251, "learning_rate": 8e-05, "loss": 1.584, "step": 1038 }, { "epoch": 0.14180428551931212, "grad_norm": 0.18382154405117035, "learning_rate": 8e-05, "loss": 1.6426, "step": 1039 }, { "epoch": 0.14194076702606798, "grad_norm": 0.18865056335926056, "learning_rate": 8e-05, "loss": 1.5651, "step": 1040 }, { "epoch": 0.1420772485328238, "grad_norm": 0.17658798396587372, "learning_rate": 8e-05, "loss": 1.5401, "step": 1041 }, { "epoch": 0.14221373003957963, "grad_norm": 0.19382302463054657, "learning_rate": 8e-05, "loss": 1.5658, "step": 1042 }, { "epoch": 0.14235021154633548, "grad_norm": 0.18951554596424103, "learning_rate": 8e-05, "loss": 1.5464, "step": 1043 }, { "epoch": 0.1424866930530913, "grad_norm": 0.17096170783042908, "learning_rate": 8e-05, "loss": 1.559, "step": 1044 }, { "epoch": 0.14262317455984713, "grad_norm": 0.18710759282112122, "learning_rate": 8e-05, "loss": 1.6514, "step": 1045 }, { "epoch": 0.14275965606660299, "grad_norm": 0.17742778360843658, "learning_rate": 8e-05, "loss": 1.6381, "step": 1046 }, { "epoch": 0.1428961375733588, "grad_norm": 0.17701713740825653, "learning_rate": 8e-05, "loss": 1.604, "step": 1047 }, { "epoch": 0.14303261908011464, "grad_norm": 0.17899398505687714, "learning_rate": 8e-05, "loss": 1.633, "step": 1048 }, { "epoch": 0.1431691005868705, "grad_norm": 0.17460735142230988, "learning_rate": 8e-05, "loss": 1.5702, "step": 1049 }, { "epoch": 0.14330558209362632, "grad_norm": 0.18164092302322388, "learning_rate": 8e-05, "loss": 1.5929, "step": 1050 }, { "epoch": 0.14344206360038214, "grad_norm": 0.1788603514432907, "learning_rate": 8e-05, "loss": 1.5617, "step": 1051 }, { "epoch": 0.143578545107138, "grad_norm": 0.18057799339294434, "learning_rate": 8e-05, "loss": 1.5827, "step": 1052 }, { "epoch": 0.14371502661389382, "grad_norm": 0.17332379519939423, "learning_rate": 8e-05, "loss": 1.5671, "step": 1053 }, { "epoch": 0.14385150812064965, "grad_norm": 0.18277396261692047, "learning_rate": 8e-05, "loss": 1.5798, "step": 1054 }, { "epoch": 0.1439879896274055, "grad_norm": 0.1774376928806305, "learning_rate": 8e-05, "loss": 1.5576, "step": 1055 }, { "epoch": 0.14412447113416132, "grad_norm": 0.17968496680259705, "learning_rate": 8e-05, "loss": 1.5896, "step": 1056 }, { "epoch": 0.14426095264091715, "grad_norm": 0.18069961667060852, "learning_rate": 8e-05, "loss": 1.6002, "step": 1057 }, { "epoch": 0.144397434147673, "grad_norm": 0.17182733118534088, "learning_rate": 8e-05, "loss": 1.5373, "step": 1058 }, { "epoch": 0.14453391565442883, "grad_norm": 0.17266228795051575, "learning_rate": 8e-05, "loss": 1.6098, "step": 1059 }, { "epoch": 0.14467039716118466, "grad_norm": 0.17059507966041565, "learning_rate": 8e-05, "loss": 1.5563, "step": 1060 }, { "epoch": 0.14480687866794048, "grad_norm": 0.17278394103050232, "learning_rate": 8e-05, "loss": 1.5623, "step": 1061 }, { "epoch": 0.14494336017469633, "grad_norm": 0.17384158074855804, "learning_rate": 8e-05, "loss": 1.5201, "step": 1062 }, { "epoch": 0.14507984168145216, "grad_norm": 0.1728159636259079, "learning_rate": 8e-05, "loss": 1.6348, "step": 1063 }, { "epoch": 0.14521632318820799, "grad_norm": 0.1760879009962082, "learning_rate": 8e-05, "loss": 1.563, "step": 1064 }, { "epoch": 0.14535280469496384, "grad_norm": 0.17579421401023865, "learning_rate": 8e-05, "loss": 1.593, "step": 1065 }, { "epoch": 0.14548928620171966, "grad_norm": 0.17799431085586548, "learning_rate": 8e-05, "loss": 1.6455, "step": 1066 }, { "epoch": 0.1456257677084755, "grad_norm": 0.17833682894706726, "learning_rate": 8e-05, "loss": 1.5442, "step": 1067 }, { "epoch": 0.14576224921523134, "grad_norm": 0.1798112988471985, "learning_rate": 8e-05, "loss": 1.5715, "step": 1068 }, { "epoch": 0.14589873072198717, "grad_norm": 0.1862846314907074, "learning_rate": 8e-05, "loss": 1.6284, "step": 1069 }, { "epoch": 0.146035212228743, "grad_norm": 0.1751265525817871, "learning_rate": 8e-05, "loss": 1.5103, "step": 1070 }, { "epoch": 0.14617169373549885, "grad_norm": 0.17471760511398315, "learning_rate": 8e-05, "loss": 1.6167, "step": 1071 }, { "epoch": 0.14630817524225467, "grad_norm": 0.18728645145893097, "learning_rate": 8e-05, "loss": 1.6578, "step": 1072 }, { "epoch": 0.1464446567490105, "grad_norm": 0.18203704059123993, "learning_rate": 8e-05, "loss": 1.6135, "step": 1073 }, { "epoch": 0.14658113825576635, "grad_norm": 0.1761341243982315, "learning_rate": 8e-05, "loss": 1.6291, "step": 1074 }, { "epoch": 0.14671761976252218, "grad_norm": 0.17968878149986267, "learning_rate": 8e-05, "loss": 1.6392, "step": 1075 }, { "epoch": 0.146854101269278, "grad_norm": 0.18649674952030182, "learning_rate": 8e-05, "loss": 1.6194, "step": 1076 }, { "epoch": 0.14699058277603386, "grad_norm": 0.18363752961158752, "learning_rate": 8e-05, "loss": 1.5694, "step": 1077 }, { "epoch": 0.14712706428278968, "grad_norm": 0.17442214488983154, "learning_rate": 8e-05, "loss": 1.5561, "step": 1078 }, { "epoch": 0.1472635457895455, "grad_norm": 0.17706051468849182, "learning_rate": 8e-05, "loss": 1.5519, "step": 1079 }, { "epoch": 0.14740002729630136, "grad_norm": 0.17640429735183716, "learning_rate": 8e-05, "loss": 1.5352, "step": 1080 }, { "epoch": 0.1475365088030572, "grad_norm": 0.18479065597057343, "learning_rate": 8e-05, "loss": 1.704, "step": 1081 }, { "epoch": 0.147672990309813, "grad_norm": 0.17930950224399567, "learning_rate": 8e-05, "loss": 1.5157, "step": 1082 }, { "epoch": 0.14780947181656887, "grad_norm": 0.18708758056163788, "learning_rate": 8e-05, "loss": 1.6156, "step": 1083 }, { "epoch": 0.1479459533233247, "grad_norm": 0.17782355844974518, "learning_rate": 8e-05, "loss": 1.5782, "step": 1084 }, { "epoch": 0.14808243483008052, "grad_norm": 0.19263522326946259, "learning_rate": 8e-05, "loss": 1.5718, "step": 1085 }, { "epoch": 0.14821891633683637, "grad_norm": 0.18563781678676605, "learning_rate": 8e-05, "loss": 1.6418, "step": 1086 }, { "epoch": 0.1483553978435922, "grad_norm": 0.18000678718090057, "learning_rate": 8e-05, "loss": 1.4983, "step": 1087 }, { "epoch": 0.14849187935034802, "grad_norm": 0.19443407654762268, "learning_rate": 8e-05, "loss": 1.6139, "step": 1088 }, { "epoch": 0.14862836085710388, "grad_norm": 0.1867537498474121, "learning_rate": 8e-05, "loss": 1.5969, "step": 1089 }, { "epoch": 0.1487648423638597, "grad_norm": 0.21880941092967987, "learning_rate": 8e-05, "loss": 1.5525, "step": 1090 }, { "epoch": 0.14890132387061553, "grad_norm": 0.1861274689435959, "learning_rate": 8e-05, "loss": 1.5813, "step": 1091 }, { "epoch": 0.14903780537737138, "grad_norm": 0.17828530073165894, "learning_rate": 8e-05, "loss": 1.5915, "step": 1092 }, { "epoch": 0.1491742868841272, "grad_norm": 0.1942828744649887, "learning_rate": 8e-05, "loss": 1.6211, "step": 1093 }, { "epoch": 0.14931076839088303, "grad_norm": 0.1772737354040146, "learning_rate": 8e-05, "loss": 1.5944, "step": 1094 }, { "epoch": 0.14944724989763886, "grad_norm": 0.1927632838487625, "learning_rate": 8e-05, "loss": 1.6124, "step": 1095 }, { "epoch": 0.1495837314043947, "grad_norm": 0.1839390993118286, "learning_rate": 8e-05, "loss": 1.5653, "step": 1096 }, { "epoch": 0.14972021291115054, "grad_norm": 0.1866489201784134, "learning_rate": 8e-05, "loss": 1.6096, "step": 1097 }, { "epoch": 0.14985669441790636, "grad_norm": 0.19737054407596588, "learning_rate": 8e-05, "loss": 1.6586, "step": 1098 }, { "epoch": 0.14999317592466221, "grad_norm": 0.20163214206695557, "learning_rate": 8e-05, "loss": 1.6707, "step": 1099 }, { "epoch": 0.15012965743141804, "grad_norm": 0.1980806142091751, "learning_rate": 8e-05, "loss": 1.5811, "step": 1100 }, { "epoch": 0.15026613893817387, "grad_norm": 0.19414465129375458, "learning_rate": 8e-05, "loss": 1.5875, "step": 1101 }, { "epoch": 0.15040262044492972, "grad_norm": 0.1710263043642044, "learning_rate": 8e-05, "loss": 1.5526, "step": 1102 }, { "epoch": 0.15053910195168554, "grad_norm": 0.19318680465221405, "learning_rate": 8e-05, "loss": 1.6141, "step": 1103 }, { "epoch": 0.15067558345844137, "grad_norm": 0.1879231184720993, "learning_rate": 8e-05, "loss": 1.5401, "step": 1104 }, { "epoch": 0.15081206496519722, "grad_norm": 0.1749289333820343, "learning_rate": 8e-05, "loss": 1.4879, "step": 1105 }, { "epoch": 0.15094854647195305, "grad_norm": 0.18455862998962402, "learning_rate": 8e-05, "loss": 1.5904, "step": 1106 }, { "epoch": 0.15108502797870887, "grad_norm": 0.18272286653518677, "learning_rate": 8e-05, "loss": 1.5967, "step": 1107 }, { "epoch": 0.15122150948546473, "grad_norm": 0.18324877321720123, "learning_rate": 8e-05, "loss": 1.5726, "step": 1108 }, { "epoch": 0.15135799099222055, "grad_norm": 0.19138990342617035, "learning_rate": 8e-05, "loss": 1.5911, "step": 1109 }, { "epoch": 0.15149447249897638, "grad_norm": 0.18146942555904388, "learning_rate": 8e-05, "loss": 1.6572, "step": 1110 }, { "epoch": 0.15163095400573223, "grad_norm": 0.18672722578048706, "learning_rate": 8e-05, "loss": 1.6006, "step": 1111 }, { "epoch": 0.15176743551248806, "grad_norm": 0.17490947246551514, "learning_rate": 8e-05, "loss": 1.633, "step": 1112 }, { "epoch": 0.15190391701924388, "grad_norm": 0.18168552219867706, "learning_rate": 8e-05, "loss": 1.5323, "step": 1113 }, { "epoch": 0.15204039852599974, "grad_norm": 0.18998941779136658, "learning_rate": 8e-05, "loss": 1.6606, "step": 1114 }, { "epoch": 0.15217688003275556, "grad_norm": 0.17711807787418365, "learning_rate": 8e-05, "loss": 1.5634, "step": 1115 }, { "epoch": 0.1523133615395114, "grad_norm": 0.19640128314495087, "learning_rate": 8e-05, "loss": 1.6648, "step": 1116 }, { "epoch": 0.15244984304626724, "grad_norm": 0.18015168607234955, "learning_rate": 8e-05, "loss": 1.5841, "step": 1117 }, { "epoch": 0.15258632455302307, "grad_norm": 0.19263866543769836, "learning_rate": 8e-05, "loss": 1.6062, "step": 1118 }, { "epoch": 0.1527228060597789, "grad_norm": 0.183291494846344, "learning_rate": 8e-05, "loss": 1.6171, "step": 1119 }, { "epoch": 0.15285928756653475, "grad_norm": 0.18031246960163116, "learning_rate": 8e-05, "loss": 1.5428, "step": 1120 }, { "epoch": 0.15299576907329057, "grad_norm": 0.20082776248455048, "learning_rate": 8e-05, "loss": 1.6154, "step": 1121 }, { "epoch": 0.1531322505800464, "grad_norm": 0.1699168086051941, "learning_rate": 8e-05, "loss": 1.555, "step": 1122 }, { "epoch": 0.15326873208680225, "grad_norm": 0.19546018540859222, "learning_rate": 8e-05, "loss": 1.6221, "step": 1123 }, { "epoch": 0.15340521359355808, "grad_norm": 0.18226487934589386, "learning_rate": 8e-05, "loss": 1.6237, "step": 1124 }, { "epoch": 0.1535416951003139, "grad_norm": 0.1791285276412964, "learning_rate": 8e-05, "loss": 1.6126, "step": 1125 }, { "epoch": 0.15367817660706976, "grad_norm": 0.18853293359279633, "learning_rate": 8e-05, "loss": 1.5212, "step": 1126 }, { "epoch": 0.15381465811382558, "grad_norm": 0.1931140422821045, "learning_rate": 8e-05, "loss": 1.612, "step": 1127 }, { "epoch": 0.1539511396205814, "grad_norm": 0.19364455342292786, "learning_rate": 8e-05, "loss": 1.6224, "step": 1128 }, { "epoch": 0.15408762112733723, "grad_norm": 0.19626347720623016, "learning_rate": 8e-05, "loss": 1.6153, "step": 1129 }, { "epoch": 0.15422410263409309, "grad_norm": 0.1855393946170807, "learning_rate": 8e-05, "loss": 1.7106, "step": 1130 }, { "epoch": 0.1543605841408489, "grad_norm": 0.1744105964899063, "learning_rate": 8e-05, "loss": 1.5318, "step": 1131 }, { "epoch": 0.15449706564760474, "grad_norm": 0.18494953215122223, "learning_rate": 8e-05, "loss": 1.5704, "step": 1132 }, { "epoch": 0.1546335471543606, "grad_norm": 0.1748018115758896, "learning_rate": 8e-05, "loss": 1.5554, "step": 1133 }, { "epoch": 0.15477002866111642, "grad_norm": 0.1800168752670288, "learning_rate": 8e-05, "loss": 1.5526, "step": 1134 }, { "epoch": 0.15490651016787224, "grad_norm": 0.18198733031749725, "learning_rate": 8e-05, "loss": 1.5644, "step": 1135 }, { "epoch": 0.1550429916746281, "grad_norm": 0.1802837997674942, "learning_rate": 8e-05, "loss": 1.602, "step": 1136 }, { "epoch": 0.15517947318138392, "grad_norm": 0.1845933347940445, "learning_rate": 8e-05, "loss": 1.6427, "step": 1137 }, { "epoch": 0.15531595468813975, "grad_norm": 0.19058282673358917, "learning_rate": 8e-05, "loss": 1.5956, "step": 1138 }, { "epoch": 0.1554524361948956, "grad_norm": 0.18110552430152893, "learning_rate": 8e-05, "loss": 1.6011, "step": 1139 }, { "epoch": 0.15558891770165142, "grad_norm": 0.18208782374858856, "learning_rate": 8e-05, "loss": 1.5625, "step": 1140 }, { "epoch": 0.15572539920840725, "grad_norm": 0.19271939992904663, "learning_rate": 8e-05, "loss": 1.5715, "step": 1141 }, { "epoch": 0.1558618807151631, "grad_norm": 0.18664176762104034, "learning_rate": 8e-05, "loss": 1.6075, "step": 1142 }, { "epoch": 0.15599836222191893, "grad_norm": 0.195692241191864, "learning_rate": 8e-05, "loss": 1.6713, "step": 1143 }, { "epoch": 0.15613484372867475, "grad_norm": 0.1937275528907776, "learning_rate": 8e-05, "loss": 1.5908, "step": 1144 }, { "epoch": 0.1562713252354306, "grad_norm": 0.16840621829032898, "learning_rate": 8e-05, "loss": 1.4969, "step": 1145 }, { "epoch": 0.15640780674218643, "grad_norm": 0.1681431531906128, "learning_rate": 8e-05, "loss": 1.5064, "step": 1146 }, { "epoch": 0.15654428824894226, "grad_norm": 0.18363936245441437, "learning_rate": 8e-05, "loss": 1.6139, "step": 1147 }, { "epoch": 0.1566807697556981, "grad_norm": 0.17105446755886078, "learning_rate": 8e-05, "loss": 1.5472, "step": 1148 }, { "epoch": 0.15681725126245394, "grad_norm": 0.177464559674263, "learning_rate": 8e-05, "loss": 1.5689, "step": 1149 }, { "epoch": 0.15695373276920976, "grad_norm": 0.17929357290267944, "learning_rate": 8e-05, "loss": 1.5316, "step": 1150 }, { "epoch": 0.15709021427596562, "grad_norm": 0.17444851994514465, "learning_rate": 8e-05, "loss": 1.5878, "step": 1151 }, { "epoch": 0.15722669578272144, "grad_norm": 0.18968383967876434, "learning_rate": 8e-05, "loss": 1.653, "step": 1152 }, { "epoch": 0.15736317728947727, "grad_norm": 0.17355993390083313, "learning_rate": 8e-05, "loss": 1.4527, "step": 1153 }, { "epoch": 0.15749965879623312, "grad_norm": 0.17717935144901276, "learning_rate": 8e-05, "loss": 1.5335, "step": 1154 }, { "epoch": 0.15763614030298895, "grad_norm": 0.18725042045116425, "learning_rate": 8e-05, "loss": 1.5996, "step": 1155 }, { "epoch": 0.15777262180974477, "grad_norm": 0.17206521332263947, "learning_rate": 8e-05, "loss": 1.596, "step": 1156 }, { "epoch": 0.15790910331650063, "grad_norm": 0.17939825356006622, "learning_rate": 8e-05, "loss": 1.6106, "step": 1157 }, { "epoch": 0.15804558482325645, "grad_norm": 0.1848825365304947, "learning_rate": 8e-05, "loss": 1.5789, "step": 1158 }, { "epoch": 0.15818206633001228, "grad_norm": 0.17308993637561798, "learning_rate": 8e-05, "loss": 1.5632, "step": 1159 }, { "epoch": 0.15831854783676813, "grad_norm": 0.17614364624023438, "learning_rate": 8e-05, "loss": 1.6281, "step": 1160 }, { "epoch": 0.15845502934352396, "grad_norm": 0.18215034902095795, "learning_rate": 8e-05, "loss": 1.6151, "step": 1161 }, { "epoch": 0.15859151085027978, "grad_norm": 0.19466570019721985, "learning_rate": 8e-05, "loss": 1.644, "step": 1162 }, { "epoch": 0.15872799235703564, "grad_norm": 0.17851008474826813, "learning_rate": 8e-05, "loss": 1.6262, "step": 1163 }, { "epoch": 0.15886447386379146, "grad_norm": 0.19332507252693176, "learning_rate": 8e-05, "loss": 1.5907, "step": 1164 }, { "epoch": 0.1590009553705473, "grad_norm": 0.18777388334274292, "learning_rate": 8e-05, "loss": 1.586, "step": 1165 }, { "epoch": 0.1591374368773031, "grad_norm": 0.17552988231182098, "learning_rate": 8e-05, "loss": 1.5109, "step": 1166 }, { "epoch": 0.15927391838405897, "grad_norm": 0.2035273313522339, "learning_rate": 8e-05, "loss": 1.64, "step": 1167 }, { "epoch": 0.1594103998908148, "grad_norm": 0.18196377158164978, "learning_rate": 8e-05, "loss": 1.6123, "step": 1168 }, { "epoch": 0.15954688139757062, "grad_norm": 0.19131863117218018, "learning_rate": 8e-05, "loss": 1.5559, "step": 1169 }, { "epoch": 0.15968336290432647, "grad_norm": 0.20446893572807312, "learning_rate": 8e-05, "loss": 1.5598, "step": 1170 }, { "epoch": 0.1598198444110823, "grad_norm": 0.17946723103523254, "learning_rate": 8e-05, "loss": 1.5476, "step": 1171 }, { "epoch": 0.15995632591783812, "grad_norm": 0.1865733414888382, "learning_rate": 8e-05, "loss": 1.5754, "step": 1172 }, { "epoch": 0.16009280742459397, "grad_norm": 0.21724280714988708, "learning_rate": 8e-05, "loss": 1.5898, "step": 1173 }, { "epoch": 0.1602292889313498, "grad_norm": 0.1725117415189743, "learning_rate": 8e-05, "loss": 1.6274, "step": 1174 }, { "epoch": 0.16036577043810563, "grad_norm": 0.20278382301330566, "learning_rate": 8e-05, "loss": 1.556, "step": 1175 }, { "epoch": 0.16050225194486148, "grad_norm": 0.2073919028043747, "learning_rate": 8e-05, "loss": 1.5558, "step": 1176 }, { "epoch": 0.1606387334516173, "grad_norm": 0.17578734457492828, "learning_rate": 8e-05, "loss": 1.5469, "step": 1177 }, { "epoch": 0.16077521495837313, "grad_norm": 0.20177391171455383, "learning_rate": 8e-05, "loss": 1.6901, "step": 1178 }, { "epoch": 0.16091169646512898, "grad_norm": 0.191424161195755, "learning_rate": 8e-05, "loss": 1.5903, "step": 1179 }, { "epoch": 0.1610481779718848, "grad_norm": 0.17382898926734924, "learning_rate": 8e-05, "loss": 1.6036, "step": 1180 }, { "epoch": 0.16118465947864064, "grad_norm": 0.1912289559841156, "learning_rate": 8e-05, "loss": 1.583, "step": 1181 }, { "epoch": 0.1613211409853965, "grad_norm": 0.1886594593524933, "learning_rate": 8e-05, "loss": 1.5413, "step": 1182 }, { "epoch": 0.16145762249215231, "grad_norm": 0.18339189887046814, "learning_rate": 8e-05, "loss": 1.6444, "step": 1183 }, { "epoch": 0.16159410399890814, "grad_norm": 0.19257821142673492, "learning_rate": 8e-05, "loss": 1.5236, "step": 1184 }, { "epoch": 0.161730585505664, "grad_norm": 0.1859150528907776, "learning_rate": 8e-05, "loss": 1.62, "step": 1185 }, { "epoch": 0.16186706701241982, "grad_norm": 0.19414544105529785, "learning_rate": 8e-05, "loss": 1.5491, "step": 1186 }, { "epoch": 0.16200354851917564, "grad_norm": 0.18056927621364594, "learning_rate": 8e-05, "loss": 1.5462, "step": 1187 }, { "epoch": 0.1621400300259315, "grad_norm": 0.18426348268985748, "learning_rate": 8e-05, "loss": 1.6781, "step": 1188 }, { "epoch": 0.16227651153268732, "grad_norm": 0.18608851730823517, "learning_rate": 8e-05, "loss": 1.6254, "step": 1189 }, { "epoch": 0.16241299303944315, "grad_norm": 0.1818545013666153, "learning_rate": 8e-05, "loss": 1.6091, "step": 1190 }, { "epoch": 0.162549474546199, "grad_norm": 0.18049830198287964, "learning_rate": 8e-05, "loss": 1.6309, "step": 1191 }, { "epoch": 0.16268595605295483, "grad_norm": 0.17946931719779968, "learning_rate": 8e-05, "loss": 1.5668, "step": 1192 }, { "epoch": 0.16282243755971065, "grad_norm": 0.1907290369272232, "learning_rate": 8e-05, "loss": 1.7258, "step": 1193 }, { "epoch": 0.1629589190664665, "grad_norm": 0.18133649230003357, "learning_rate": 8e-05, "loss": 1.6859, "step": 1194 }, { "epoch": 0.16309540057322233, "grad_norm": 0.19502057135105133, "learning_rate": 8e-05, "loss": 1.5835, "step": 1195 }, { "epoch": 0.16323188207997816, "grad_norm": 0.18882198631763458, "learning_rate": 8e-05, "loss": 1.5295, "step": 1196 }, { "epoch": 0.163368363586734, "grad_norm": 0.1806669384241104, "learning_rate": 8e-05, "loss": 1.6159, "step": 1197 }, { "epoch": 0.16350484509348984, "grad_norm": 0.18993724882602692, "learning_rate": 8e-05, "loss": 1.6031, "step": 1198 }, { "epoch": 0.16364132660024566, "grad_norm": 0.18915489315986633, "learning_rate": 8e-05, "loss": 1.5938, "step": 1199 }, { "epoch": 0.1637778081070015, "grad_norm": 0.17887987196445465, "learning_rate": 8e-05, "loss": 1.5922, "step": 1200 }, { "epoch": 0.16391428961375734, "grad_norm": 0.22455234825611115, "learning_rate": 8e-05, "loss": 1.589, "step": 1201 }, { "epoch": 0.16405077112051317, "grad_norm": 0.18593254685401917, "learning_rate": 8e-05, "loss": 1.5684, "step": 1202 }, { "epoch": 0.164187252627269, "grad_norm": 0.18921193480491638, "learning_rate": 8e-05, "loss": 1.5405, "step": 1203 }, { "epoch": 0.16432373413402485, "grad_norm": 0.21956679224967957, "learning_rate": 8e-05, "loss": 1.6019, "step": 1204 }, { "epoch": 0.16446021564078067, "grad_norm": 0.18349327147006989, "learning_rate": 8e-05, "loss": 1.6218, "step": 1205 }, { "epoch": 0.1645966971475365, "grad_norm": 0.19205377995967865, "learning_rate": 8e-05, "loss": 1.6074, "step": 1206 }, { "epoch": 0.16473317865429235, "grad_norm": 0.1899394392967224, "learning_rate": 8e-05, "loss": 1.5794, "step": 1207 }, { "epoch": 0.16486966016104818, "grad_norm": 0.18048936128616333, "learning_rate": 8e-05, "loss": 1.6396, "step": 1208 }, { "epoch": 0.165006141667804, "grad_norm": 0.1869986206293106, "learning_rate": 8e-05, "loss": 1.6498, "step": 1209 }, { "epoch": 0.16514262317455985, "grad_norm": 0.18800325691699982, "learning_rate": 8e-05, "loss": 1.5452, "step": 1210 }, { "epoch": 0.16527910468131568, "grad_norm": 0.1765373796224594, "learning_rate": 8e-05, "loss": 1.6174, "step": 1211 }, { "epoch": 0.1654155861880715, "grad_norm": 0.17699959874153137, "learning_rate": 8e-05, "loss": 1.5607, "step": 1212 }, { "epoch": 0.16555206769482736, "grad_norm": 0.18276195228099823, "learning_rate": 8e-05, "loss": 1.5191, "step": 1213 }, { "epoch": 0.16568854920158319, "grad_norm": 0.18691441416740417, "learning_rate": 8e-05, "loss": 1.6696, "step": 1214 }, { "epoch": 0.165825030708339, "grad_norm": 0.17904722690582275, "learning_rate": 8e-05, "loss": 1.5763, "step": 1215 }, { "epoch": 0.16596151221509486, "grad_norm": 0.17879913747310638, "learning_rate": 8e-05, "loss": 1.6084, "step": 1216 }, { "epoch": 0.1660979937218507, "grad_norm": 0.17948831617832184, "learning_rate": 8e-05, "loss": 1.6256, "step": 1217 }, { "epoch": 0.16623447522860652, "grad_norm": 0.18168850243091583, "learning_rate": 8e-05, "loss": 1.6366, "step": 1218 }, { "epoch": 0.16637095673536237, "grad_norm": 0.17248106002807617, "learning_rate": 8e-05, "loss": 1.5605, "step": 1219 }, { "epoch": 0.1665074382421182, "grad_norm": 0.18305379152297974, "learning_rate": 8e-05, "loss": 1.525, "step": 1220 }, { "epoch": 0.16664391974887402, "grad_norm": 0.1820470541715622, "learning_rate": 8e-05, "loss": 1.6237, "step": 1221 }, { "epoch": 0.16678040125562987, "grad_norm": 0.18057158589363098, "learning_rate": 8e-05, "loss": 1.6108, "step": 1222 }, { "epoch": 0.1669168827623857, "grad_norm": 0.190862238407135, "learning_rate": 8e-05, "loss": 1.5501, "step": 1223 }, { "epoch": 0.16705336426914152, "grad_norm": 0.18769824504852295, "learning_rate": 8e-05, "loss": 1.5897, "step": 1224 }, { "epoch": 0.16718984577589738, "grad_norm": 0.18200239539146423, "learning_rate": 8e-05, "loss": 1.6346, "step": 1225 }, { "epoch": 0.1673263272826532, "grad_norm": 0.18543142080307007, "learning_rate": 8e-05, "loss": 1.6309, "step": 1226 }, { "epoch": 0.16746280878940903, "grad_norm": 0.1815391331911087, "learning_rate": 8e-05, "loss": 1.5868, "step": 1227 }, { "epoch": 0.16759929029616488, "grad_norm": 0.17922349274158478, "learning_rate": 8e-05, "loss": 1.5171, "step": 1228 }, { "epoch": 0.1677357718029207, "grad_norm": 0.17526231706142426, "learning_rate": 8e-05, "loss": 1.5778, "step": 1229 }, { "epoch": 0.16787225330967653, "grad_norm": 0.17742040753364563, "learning_rate": 8e-05, "loss": 1.5731, "step": 1230 }, { "epoch": 0.1680087348164324, "grad_norm": 0.1920635998249054, "learning_rate": 8e-05, "loss": 1.602, "step": 1231 }, { "epoch": 0.1681452163231882, "grad_norm": 0.17591023445129395, "learning_rate": 8e-05, "loss": 1.5963, "step": 1232 }, { "epoch": 0.16828169782994404, "grad_norm": 0.18112918734550476, "learning_rate": 8e-05, "loss": 1.5914, "step": 1233 }, { "epoch": 0.16841817933669986, "grad_norm": 0.17961393296718597, "learning_rate": 8e-05, "loss": 1.5291, "step": 1234 }, { "epoch": 0.16855466084345572, "grad_norm": 0.17470215260982513, "learning_rate": 8e-05, "loss": 1.6568, "step": 1235 }, { "epoch": 0.16869114235021154, "grad_norm": 0.1790974885225296, "learning_rate": 8e-05, "loss": 1.5562, "step": 1236 }, { "epoch": 0.16882762385696737, "grad_norm": 0.180716410279274, "learning_rate": 8e-05, "loss": 1.5557, "step": 1237 }, { "epoch": 0.16896410536372322, "grad_norm": 0.1740095019340515, "learning_rate": 8e-05, "loss": 1.5266, "step": 1238 }, { "epoch": 0.16910058687047905, "grad_norm": 0.17068733274936676, "learning_rate": 8e-05, "loss": 1.5597, "step": 1239 }, { "epoch": 0.16923706837723487, "grad_norm": 0.19151034951210022, "learning_rate": 8e-05, "loss": 1.6622, "step": 1240 }, { "epoch": 0.16937354988399073, "grad_norm": 0.17609147727489471, "learning_rate": 8e-05, "loss": 1.529, "step": 1241 }, { "epoch": 0.16951003139074655, "grad_norm": 0.17521561682224274, "learning_rate": 8e-05, "loss": 1.5431, "step": 1242 }, { "epoch": 0.16964651289750238, "grad_norm": 0.18549153208732605, "learning_rate": 8e-05, "loss": 1.5748, "step": 1243 }, { "epoch": 0.16978299440425823, "grad_norm": 0.1770140379667282, "learning_rate": 8e-05, "loss": 1.5427, "step": 1244 }, { "epoch": 0.16991947591101406, "grad_norm": 0.1812058389186859, "learning_rate": 8e-05, "loss": 1.5732, "step": 1245 }, { "epoch": 0.17005595741776988, "grad_norm": 0.18413046002388, "learning_rate": 8e-05, "loss": 1.5803, "step": 1246 }, { "epoch": 0.17019243892452574, "grad_norm": 0.18512527644634247, "learning_rate": 8e-05, "loss": 1.5742, "step": 1247 }, { "epoch": 0.17032892043128156, "grad_norm": 0.18638159334659576, "learning_rate": 8e-05, "loss": 1.6277, "step": 1248 }, { "epoch": 0.1704654019380374, "grad_norm": 0.1843906044960022, "learning_rate": 8e-05, "loss": 1.6835, "step": 1249 }, { "epoch": 0.17060188344479324, "grad_norm": 0.1919764131307602, "learning_rate": 8e-05, "loss": 1.6332, "step": 1250 }, { "epoch": 0.17073836495154907, "grad_norm": 0.18232044577598572, "learning_rate": 8e-05, "loss": 1.5453, "step": 1251 }, { "epoch": 0.1708748464583049, "grad_norm": 0.1758478581905365, "learning_rate": 8e-05, "loss": 1.6234, "step": 1252 }, { "epoch": 0.17101132796506074, "grad_norm": 0.18299835920333862, "learning_rate": 8e-05, "loss": 1.5974, "step": 1253 }, { "epoch": 0.17114780947181657, "grad_norm": 0.17757123708724976, "learning_rate": 8e-05, "loss": 1.5759, "step": 1254 }, { "epoch": 0.1712842909785724, "grad_norm": 0.18284830451011658, "learning_rate": 8e-05, "loss": 1.6488, "step": 1255 }, { "epoch": 0.17142077248532825, "grad_norm": 0.17563997209072113, "learning_rate": 8e-05, "loss": 1.5768, "step": 1256 }, { "epoch": 0.17155725399208407, "grad_norm": 0.1784861832857132, "learning_rate": 8e-05, "loss": 1.5842, "step": 1257 }, { "epoch": 0.1716937354988399, "grad_norm": 0.17500747740268707, "learning_rate": 8e-05, "loss": 1.5705, "step": 1258 }, { "epoch": 0.17183021700559575, "grad_norm": 0.17861445248126984, "learning_rate": 8e-05, "loss": 1.6016, "step": 1259 }, { "epoch": 0.17196669851235158, "grad_norm": 0.18527667224407196, "learning_rate": 8e-05, "loss": 1.656, "step": 1260 }, { "epoch": 0.1721031800191074, "grad_norm": 0.1847296804189682, "learning_rate": 8e-05, "loss": 1.6229, "step": 1261 }, { "epoch": 0.17223966152586326, "grad_norm": 0.18138012290000916, "learning_rate": 8e-05, "loss": 1.6551, "step": 1262 }, { "epoch": 0.17237614303261908, "grad_norm": 0.1790272295475006, "learning_rate": 8e-05, "loss": 1.5923, "step": 1263 }, { "epoch": 0.1725126245393749, "grad_norm": 0.18722257018089294, "learning_rate": 8e-05, "loss": 1.599, "step": 1264 }, { "epoch": 0.17264910604613076, "grad_norm": 0.17761902511119843, "learning_rate": 8e-05, "loss": 1.5586, "step": 1265 }, { "epoch": 0.1727855875528866, "grad_norm": 0.1800829917192459, "learning_rate": 8e-05, "loss": 1.6066, "step": 1266 }, { "epoch": 0.1729220690596424, "grad_norm": 0.18426303565502167, "learning_rate": 8e-05, "loss": 1.6557, "step": 1267 }, { "epoch": 0.17305855056639824, "grad_norm": 0.18017345666885376, "learning_rate": 8e-05, "loss": 1.6678, "step": 1268 }, { "epoch": 0.1731950320731541, "grad_norm": 0.1756056249141693, "learning_rate": 8e-05, "loss": 1.6008, "step": 1269 }, { "epoch": 0.17333151357990992, "grad_norm": 0.1743742674589157, "learning_rate": 8e-05, "loss": 1.605, "step": 1270 }, { "epoch": 0.17346799508666574, "grad_norm": 0.17890498042106628, "learning_rate": 8e-05, "loss": 1.5899, "step": 1271 }, { "epoch": 0.1736044765934216, "grad_norm": 0.18561498820781708, "learning_rate": 8e-05, "loss": 1.5761, "step": 1272 }, { "epoch": 0.17374095810017742, "grad_norm": 0.1821655035018921, "learning_rate": 8e-05, "loss": 1.6609, "step": 1273 }, { "epoch": 0.17387743960693325, "grad_norm": 0.1756887286901474, "learning_rate": 8e-05, "loss": 1.5573, "step": 1274 }, { "epoch": 0.1740139211136891, "grad_norm": 0.17907758057117462, "learning_rate": 8e-05, "loss": 1.609, "step": 1275 }, { "epoch": 0.17415040262044493, "grad_norm": 0.17647773027420044, "learning_rate": 8e-05, "loss": 1.5732, "step": 1276 }, { "epoch": 0.17428688412720075, "grad_norm": 0.18217086791992188, "learning_rate": 8e-05, "loss": 1.6304, "step": 1277 }, { "epoch": 0.1744233656339566, "grad_norm": 0.1770544797182083, "learning_rate": 8e-05, "loss": 1.5869, "step": 1278 }, { "epoch": 0.17455984714071243, "grad_norm": 0.1738310307264328, "learning_rate": 8e-05, "loss": 1.5463, "step": 1279 }, { "epoch": 0.17469632864746826, "grad_norm": 0.1832553595304489, "learning_rate": 8e-05, "loss": 1.628, "step": 1280 }, { "epoch": 0.1748328101542241, "grad_norm": 0.18110144138336182, "learning_rate": 8e-05, "loss": 1.5282, "step": 1281 }, { "epoch": 0.17496929166097994, "grad_norm": 0.191071555018425, "learning_rate": 8e-05, "loss": 1.5891, "step": 1282 }, { "epoch": 0.17510577316773576, "grad_norm": 0.18352948129177094, "learning_rate": 8e-05, "loss": 1.6483, "step": 1283 }, { "epoch": 0.17524225467449162, "grad_norm": 0.1845688819885254, "learning_rate": 8e-05, "loss": 1.6153, "step": 1284 }, { "epoch": 0.17537873618124744, "grad_norm": 0.17512784898281097, "learning_rate": 8e-05, "loss": 1.5424, "step": 1285 }, { "epoch": 0.17551521768800327, "grad_norm": 0.18274016678333282, "learning_rate": 8e-05, "loss": 1.5847, "step": 1286 }, { "epoch": 0.17565169919475912, "grad_norm": 0.18950589001178741, "learning_rate": 8e-05, "loss": 1.616, "step": 1287 }, { "epoch": 0.17578818070151495, "grad_norm": 0.1836417019367218, "learning_rate": 8e-05, "loss": 1.6508, "step": 1288 }, { "epoch": 0.17592466220827077, "grad_norm": 0.18523946404457092, "learning_rate": 8e-05, "loss": 1.6194, "step": 1289 }, { "epoch": 0.17606114371502662, "grad_norm": 0.17753663659095764, "learning_rate": 8e-05, "loss": 1.5809, "step": 1290 }, { "epoch": 0.17619762522178245, "grad_norm": 0.18328975141048431, "learning_rate": 8e-05, "loss": 1.563, "step": 1291 }, { "epoch": 0.17633410672853828, "grad_norm": 0.1872308999300003, "learning_rate": 8e-05, "loss": 1.5452, "step": 1292 }, { "epoch": 0.17647058823529413, "grad_norm": 0.1936081498861313, "learning_rate": 8e-05, "loss": 1.5831, "step": 1293 }, { "epoch": 0.17660706974204995, "grad_norm": 0.18803086876869202, "learning_rate": 8e-05, "loss": 1.632, "step": 1294 }, { "epoch": 0.17674355124880578, "grad_norm": 0.1898953914642334, "learning_rate": 8e-05, "loss": 1.5856, "step": 1295 }, { "epoch": 0.17688003275556163, "grad_norm": 0.19058725237846375, "learning_rate": 8e-05, "loss": 1.5651, "step": 1296 }, { "epoch": 0.17701651426231746, "grad_norm": 0.17649631202220917, "learning_rate": 8e-05, "loss": 1.536, "step": 1297 }, { "epoch": 0.17715299576907328, "grad_norm": 0.18933144211769104, "learning_rate": 8e-05, "loss": 1.6062, "step": 1298 }, { "epoch": 0.17728947727582914, "grad_norm": 0.19089755415916443, "learning_rate": 8e-05, "loss": 1.5853, "step": 1299 }, { "epoch": 0.17742595878258496, "grad_norm": 0.17789152264595032, "learning_rate": 8e-05, "loss": 1.5872, "step": 1300 }, { "epoch": 0.1775624402893408, "grad_norm": 0.18564368784427643, "learning_rate": 8e-05, "loss": 1.582, "step": 1301 }, { "epoch": 0.17769892179609664, "grad_norm": 0.17861820757389069, "learning_rate": 8e-05, "loss": 1.5708, "step": 1302 }, { "epoch": 0.17783540330285247, "grad_norm": 0.17816072702407837, "learning_rate": 8e-05, "loss": 1.5364, "step": 1303 }, { "epoch": 0.1779718848096083, "grad_norm": 0.181040957570076, "learning_rate": 8e-05, "loss": 1.6433, "step": 1304 }, { "epoch": 0.17810836631636412, "grad_norm": 0.19521668553352356, "learning_rate": 8e-05, "loss": 1.5515, "step": 1305 }, { "epoch": 0.17824484782311997, "grad_norm": 0.17776930332183838, "learning_rate": 8e-05, "loss": 1.6037, "step": 1306 }, { "epoch": 0.1783813293298758, "grad_norm": 0.18215477466583252, "learning_rate": 8e-05, "loss": 1.5833, "step": 1307 }, { "epoch": 0.17851781083663162, "grad_norm": 0.19597412645816803, "learning_rate": 8e-05, "loss": 1.5776, "step": 1308 }, { "epoch": 0.17865429234338748, "grad_norm": 0.17734640836715698, "learning_rate": 8e-05, "loss": 1.5731, "step": 1309 }, { "epoch": 0.1787907738501433, "grad_norm": 0.18767887353897095, "learning_rate": 8e-05, "loss": 1.5646, "step": 1310 }, { "epoch": 0.17892725535689913, "grad_norm": 0.18095117807388306, "learning_rate": 8e-05, "loss": 1.59, "step": 1311 }, { "epoch": 0.17906373686365498, "grad_norm": 0.1826968640089035, "learning_rate": 8e-05, "loss": 1.5734, "step": 1312 }, { "epoch": 0.1792002183704108, "grad_norm": 0.1729998141527176, "learning_rate": 8e-05, "loss": 1.5843, "step": 1313 }, { "epoch": 0.17933669987716663, "grad_norm": 0.1798374056816101, "learning_rate": 8e-05, "loss": 1.6466, "step": 1314 }, { "epoch": 0.1794731813839225, "grad_norm": 0.18414777517318726, "learning_rate": 8e-05, "loss": 1.6257, "step": 1315 }, { "epoch": 0.1796096628906783, "grad_norm": 0.17512325942516327, "learning_rate": 8e-05, "loss": 1.5287, "step": 1316 }, { "epoch": 0.17974614439743414, "grad_norm": 0.17899727821350098, "learning_rate": 8e-05, "loss": 1.5239, "step": 1317 }, { "epoch": 0.17988262590419, "grad_norm": 0.17925399541854858, "learning_rate": 8e-05, "loss": 1.5803, "step": 1318 }, { "epoch": 0.18001910741094582, "grad_norm": 0.17679765820503235, "learning_rate": 8e-05, "loss": 1.5521, "step": 1319 }, { "epoch": 0.18015558891770164, "grad_norm": 0.17553366720676422, "learning_rate": 8e-05, "loss": 1.5214, "step": 1320 }, { "epoch": 0.1802920704244575, "grad_norm": 0.18355882167816162, "learning_rate": 8e-05, "loss": 1.5951, "step": 1321 }, { "epoch": 0.18042855193121332, "grad_norm": 0.18068662285804749, "learning_rate": 8e-05, "loss": 1.5647, "step": 1322 }, { "epoch": 0.18056503343796915, "grad_norm": 0.18069930374622345, "learning_rate": 8e-05, "loss": 1.6196, "step": 1323 }, { "epoch": 0.180701514944725, "grad_norm": 0.17888805270195007, "learning_rate": 8e-05, "loss": 1.5438, "step": 1324 }, { "epoch": 0.18083799645148083, "grad_norm": 0.17886215448379517, "learning_rate": 8e-05, "loss": 1.5614, "step": 1325 }, { "epoch": 0.18097447795823665, "grad_norm": 0.18087361752986908, "learning_rate": 8e-05, "loss": 1.6352, "step": 1326 }, { "epoch": 0.1811109594649925, "grad_norm": 0.17912723124027252, "learning_rate": 8e-05, "loss": 1.5454, "step": 1327 }, { "epoch": 0.18124744097174833, "grad_norm": 0.1830485761165619, "learning_rate": 8e-05, "loss": 1.6326, "step": 1328 }, { "epoch": 0.18138392247850416, "grad_norm": 0.1840202957391739, "learning_rate": 8e-05, "loss": 1.5615, "step": 1329 }, { "epoch": 0.18152040398526, "grad_norm": 0.17745667695999146, "learning_rate": 8e-05, "loss": 1.5846, "step": 1330 }, { "epoch": 0.18165688549201583, "grad_norm": 0.17821282148361206, "learning_rate": 8e-05, "loss": 1.619, "step": 1331 }, { "epoch": 0.18179336699877166, "grad_norm": 0.1944548338651657, "learning_rate": 8e-05, "loss": 1.6365, "step": 1332 }, { "epoch": 0.1819298485055275, "grad_norm": 0.18436874449253082, "learning_rate": 8e-05, "loss": 1.6429, "step": 1333 }, { "epoch": 0.18206633001228334, "grad_norm": 0.17645595967769623, "learning_rate": 8e-05, "loss": 1.5708, "step": 1334 }, { "epoch": 0.18220281151903917, "grad_norm": 0.18822963535785675, "learning_rate": 8e-05, "loss": 1.6372, "step": 1335 }, { "epoch": 0.18233929302579502, "grad_norm": 0.1763017475605011, "learning_rate": 8e-05, "loss": 1.5917, "step": 1336 }, { "epoch": 0.18247577453255084, "grad_norm": 0.1805952787399292, "learning_rate": 8e-05, "loss": 1.6374, "step": 1337 }, { "epoch": 0.18261225603930667, "grad_norm": 0.17240872979164124, "learning_rate": 8e-05, "loss": 1.5551, "step": 1338 }, { "epoch": 0.1827487375460625, "grad_norm": 0.17618606984615326, "learning_rate": 8e-05, "loss": 1.5382, "step": 1339 }, { "epoch": 0.18288521905281835, "grad_norm": 0.17841020226478577, "learning_rate": 8e-05, "loss": 1.6514, "step": 1340 }, { "epoch": 0.18302170055957417, "grad_norm": 0.17210696637630463, "learning_rate": 8e-05, "loss": 1.5366, "step": 1341 }, { "epoch": 0.18315818206633, "grad_norm": 0.1769982874393463, "learning_rate": 8e-05, "loss": 1.5067, "step": 1342 }, { "epoch": 0.18329466357308585, "grad_norm": 0.18421314656734467, "learning_rate": 8e-05, "loss": 1.6176, "step": 1343 }, { "epoch": 0.18343114507984168, "grad_norm": 0.1886877715587616, "learning_rate": 8e-05, "loss": 1.6538, "step": 1344 }, { "epoch": 0.1835676265865975, "grad_norm": 0.17790721356868744, "learning_rate": 8e-05, "loss": 1.5944, "step": 1345 }, { "epoch": 0.18370410809335336, "grad_norm": 0.18201424181461334, "learning_rate": 8e-05, "loss": 1.5556, "step": 1346 }, { "epoch": 0.18384058960010918, "grad_norm": 0.1772538274526596, "learning_rate": 8e-05, "loss": 1.5366, "step": 1347 }, { "epoch": 0.183977071106865, "grad_norm": 0.18448364734649658, "learning_rate": 8e-05, "loss": 1.6209, "step": 1348 }, { "epoch": 0.18411355261362086, "grad_norm": 0.17663274705410004, "learning_rate": 8e-05, "loss": 1.6001, "step": 1349 }, { "epoch": 0.1842500341203767, "grad_norm": 0.18552815914154053, "learning_rate": 8e-05, "loss": 1.6226, "step": 1350 }, { "epoch": 0.1843865156271325, "grad_norm": 0.17485670745372772, "learning_rate": 8e-05, "loss": 1.5477, "step": 1351 }, { "epoch": 0.18452299713388837, "grad_norm": 0.17874416708946228, "learning_rate": 8e-05, "loss": 1.6272, "step": 1352 }, { "epoch": 0.1846594786406442, "grad_norm": 0.19542373716831207, "learning_rate": 8e-05, "loss": 1.6181, "step": 1353 }, { "epoch": 0.18479596014740002, "grad_norm": 0.18012094497680664, "learning_rate": 8e-05, "loss": 1.6576, "step": 1354 }, { "epoch": 0.18493244165415587, "grad_norm": 0.18003003299236298, "learning_rate": 8e-05, "loss": 1.5485, "step": 1355 }, { "epoch": 0.1850689231609117, "grad_norm": 0.19407962262630463, "learning_rate": 8e-05, "loss": 1.6061, "step": 1356 }, { "epoch": 0.18520540466766752, "grad_norm": 0.1789020597934723, "learning_rate": 8e-05, "loss": 1.5852, "step": 1357 }, { "epoch": 0.18534188617442338, "grad_norm": 0.20204922556877136, "learning_rate": 8e-05, "loss": 1.6413, "step": 1358 }, { "epoch": 0.1854783676811792, "grad_norm": 0.1791933923959732, "learning_rate": 8e-05, "loss": 1.5622, "step": 1359 }, { "epoch": 0.18561484918793503, "grad_norm": 0.19147218763828278, "learning_rate": 8e-05, "loss": 1.5751, "step": 1360 }, { "epoch": 0.18575133069469088, "grad_norm": 0.18052275478839874, "learning_rate": 8e-05, "loss": 1.5237, "step": 1361 }, { "epoch": 0.1858878122014467, "grad_norm": 0.17625939846038818, "learning_rate": 8e-05, "loss": 1.5768, "step": 1362 }, { "epoch": 0.18602429370820253, "grad_norm": 0.17764948308467865, "learning_rate": 8e-05, "loss": 1.5714, "step": 1363 }, { "epoch": 0.18616077521495838, "grad_norm": 0.18250398337841034, "learning_rate": 8e-05, "loss": 1.5763, "step": 1364 }, { "epoch": 0.1862972567217142, "grad_norm": 0.18556956946849823, "learning_rate": 8e-05, "loss": 1.5832, "step": 1365 }, { "epoch": 0.18643373822847004, "grad_norm": 0.18151864409446716, "learning_rate": 8e-05, "loss": 1.5757, "step": 1366 }, { "epoch": 0.1865702197352259, "grad_norm": 0.17581619322299957, "learning_rate": 8e-05, "loss": 1.5534, "step": 1367 }, { "epoch": 0.18670670124198172, "grad_norm": 0.17195409536361694, "learning_rate": 8e-05, "loss": 1.5554, "step": 1368 }, { "epoch": 0.18684318274873754, "grad_norm": 0.18987657129764557, "learning_rate": 8e-05, "loss": 1.5606, "step": 1369 }, { "epoch": 0.1869796642554934, "grad_norm": 0.18373243510723114, "learning_rate": 8e-05, "loss": 1.6091, "step": 1370 }, { "epoch": 0.18711614576224922, "grad_norm": 0.18615441024303436, "learning_rate": 8e-05, "loss": 1.594, "step": 1371 }, { "epoch": 0.18725262726900505, "grad_norm": 0.1862824410200119, "learning_rate": 8e-05, "loss": 1.5754, "step": 1372 }, { "epoch": 0.18738910877576087, "grad_norm": 0.18179596960544586, "learning_rate": 8e-05, "loss": 1.5886, "step": 1373 }, { "epoch": 0.18752559028251672, "grad_norm": 0.17452482879161835, "learning_rate": 8e-05, "loss": 1.5953, "step": 1374 }, { "epoch": 0.18766207178927255, "grad_norm": 0.18409357964992523, "learning_rate": 8e-05, "loss": 1.6741, "step": 1375 }, { "epoch": 0.18779855329602838, "grad_norm": 0.17680881917476654, "learning_rate": 8e-05, "loss": 1.5384, "step": 1376 }, { "epoch": 0.18793503480278423, "grad_norm": 0.1769135594367981, "learning_rate": 8e-05, "loss": 1.5392, "step": 1377 }, { "epoch": 0.18807151630954005, "grad_norm": 0.18122313916683197, "learning_rate": 8e-05, "loss": 1.6071, "step": 1378 }, { "epoch": 0.18820799781629588, "grad_norm": 0.18079566955566406, "learning_rate": 8e-05, "loss": 1.6187, "step": 1379 }, { "epoch": 0.18834447932305173, "grad_norm": 0.1818685680627823, "learning_rate": 8e-05, "loss": 1.6198, "step": 1380 }, { "epoch": 0.18848096082980756, "grad_norm": 0.19014553725719452, "learning_rate": 8e-05, "loss": 1.5558, "step": 1381 }, { "epoch": 0.18861744233656338, "grad_norm": 0.17578458786010742, "learning_rate": 8e-05, "loss": 1.5983, "step": 1382 }, { "epoch": 0.18875392384331924, "grad_norm": 0.18871986865997314, "learning_rate": 8e-05, "loss": 1.5878, "step": 1383 }, { "epoch": 0.18889040535007506, "grad_norm": 0.18032406270503998, "learning_rate": 8e-05, "loss": 1.5184, "step": 1384 }, { "epoch": 0.1890268868568309, "grad_norm": 0.18657973408699036, "learning_rate": 8e-05, "loss": 1.5795, "step": 1385 }, { "epoch": 0.18916336836358674, "grad_norm": 0.18356724083423615, "learning_rate": 8e-05, "loss": 1.6399, "step": 1386 }, { "epoch": 0.18929984987034257, "grad_norm": 0.17990297079086304, "learning_rate": 8e-05, "loss": 1.5812, "step": 1387 }, { "epoch": 0.1894363313770984, "grad_norm": 0.17911188304424286, "learning_rate": 8e-05, "loss": 1.5895, "step": 1388 }, { "epoch": 0.18957281288385425, "grad_norm": 0.1806691288948059, "learning_rate": 8e-05, "loss": 1.4952, "step": 1389 }, { "epoch": 0.18970929439061007, "grad_norm": 0.18714362382888794, "learning_rate": 8e-05, "loss": 1.5709, "step": 1390 }, { "epoch": 0.1898457758973659, "grad_norm": 0.19810731709003448, "learning_rate": 8e-05, "loss": 1.6054, "step": 1391 }, { "epoch": 0.18998225740412175, "grad_norm": 0.1779014766216278, "learning_rate": 8e-05, "loss": 1.5369, "step": 1392 }, { "epoch": 0.19011873891087758, "grad_norm": 0.19377335906028748, "learning_rate": 8e-05, "loss": 1.5488, "step": 1393 }, { "epoch": 0.1902552204176334, "grad_norm": 0.17974470555782318, "learning_rate": 8e-05, "loss": 1.5464, "step": 1394 }, { "epoch": 0.19039170192438926, "grad_norm": 0.19470670819282532, "learning_rate": 8e-05, "loss": 1.7032, "step": 1395 }, { "epoch": 0.19052818343114508, "grad_norm": 0.1894291341304779, "learning_rate": 8e-05, "loss": 1.5906, "step": 1396 }, { "epoch": 0.1906646649379009, "grad_norm": 0.17871151864528656, "learning_rate": 8e-05, "loss": 1.5975, "step": 1397 }, { "epoch": 0.19080114644465676, "grad_norm": 0.19233465194702148, "learning_rate": 8e-05, "loss": 1.5793, "step": 1398 }, { "epoch": 0.1909376279514126, "grad_norm": 0.18489322066307068, "learning_rate": 8e-05, "loss": 1.5832, "step": 1399 }, { "epoch": 0.1910741094581684, "grad_norm": 0.17781518399715424, "learning_rate": 8e-05, "loss": 1.4896, "step": 1400 }, { "epoch": 0.19121059096492427, "grad_norm": 0.18936896324157715, "learning_rate": 8e-05, "loss": 1.6037, "step": 1401 }, { "epoch": 0.1913470724716801, "grad_norm": 0.19243223965168, "learning_rate": 8e-05, "loss": 1.6017, "step": 1402 }, { "epoch": 0.19148355397843592, "grad_norm": 0.18140558898448944, "learning_rate": 8e-05, "loss": 1.5482, "step": 1403 }, { "epoch": 0.19162003548519177, "grad_norm": 0.1888154149055481, "learning_rate": 8e-05, "loss": 1.5025, "step": 1404 }, { "epoch": 0.1917565169919476, "grad_norm": 0.17819145321846008, "learning_rate": 8e-05, "loss": 1.546, "step": 1405 }, { "epoch": 0.19189299849870342, "grad_norm": 0.1893412172794342, "learning_rate": 8e-05, "loss": 1.5782, "step": 1406 }, { "epoch": 0.19202948000545925, "grad_norm": 0.18471944332122803, "learning_rate": 8e-05, "loss": 1.5434, "step": 1407 }, { "epoch": 0.1921659615122151, "grad_norm": 0.185714453458786, "learning_rate": 8e-05, "loss": 1.5985, "step": 1408 }, { "epoch": 0.19230244301897093, "grad_norm": 0.18834368884563446, "learning_rate": 8e-05, "loss": 1.5472, "step": 1409 }, { "epoch": 0.19243892452572675, "grad_norm": 0.18776118755340576, "learning_rate": 8e-05, "loss": 1.5496, "step": 1410 }, { "epoch": 0.1925754060324826, "grad_norm": 0.18822896480560303, "learning_rate": 8e-05, "loss": 1.6111, "step": 1411 }, { "epoch": 0.19271188753923843, "grad_norm": 0.1927414983510971, "learning_rate": 8e-05, "loss": 1.6198, "step": 1412 }, { "epoch": 0.19284836904599426, "grad_norm": 0.17877045273780823, "learning_rate": 8e-05, "loss": 1.4872, "step": 1413 }, { "epoch": 0.1929848505527501, "grad_norm": 0.17581462860107422, "learning_rate": 8e-05, "loss": 1.6288, "step": 1414 }, { "epoch": 0.19312133205950593, "grad_norm": 0.18094876408576965, "learning_rate": 8e-05, "loss": 1.58, "step": 1415 }, { "epoch": 0.19325781356626176, "grad_norm": 0.1943579465150833, "learning_rate": 8e-05, "loss": 1.6584, "step": 1416 }, { "epoch": 0.1933942950730176, "grad_norm": 0.1700141727924347, "learning_rate": 8e-05, "loss": 1.452, "step": 1417 }, { "epoch": 0.19353077657977344, "grad_norm": 0.18419000506401062, "learning_rate": 8e-05, "loss": 1.5609, "step": 1418 }, { "epoch": 0.19366725808652926, "grad_norm": 0.17995068430900574, "learning_rate": 8e-05, "loss": 1.6087, "step": 1419 }, { "epoch": 0.19380373959328512, "grad_norm": 0.18322820961475372, "learning_rate": 8e-05, "loss": 1.5926, "step": 1420 }, { "epoch": 0.19394022110004094, "grad_norm": 0.18575672805309296, "learning_rate": 8e-05, "loss": 1.6346, "step": 1421 }, { "epoch": 0.19407670260679677, "grad_norm": 0.17758823931217194, "learning_rate": 8e-05, "loss": 1.6032, "step": 1422 }, { "epoch": 0.19421318411355262, "grad_norm": 0.17651227116584778, "learning_rate": 8e-05, "loss": 1.5394, "step": 1423 }, { "epoch": 0.19434966562030845, "grad_norm": 0.18368026614189148, "learning_rate": 8e-05, "loss": 1.5713, "step": 1424 }, { "epoch": 0.19448614712706427, "grad_norm": 0.18148313462734222, "learning_rate": 8e-05, "loss": 1.6103, "step": 1425 }, { "epoch": 0.19462262863382013, "grad_norm": 0.18397633731365204, "learning_rate": 8e-05, "loss": 1.5528, "step": 1426 }, { "epoch": 0.19475911014057595, "grad_norm": 0.18574011325836182, "learning_rate": 8e-05, "loss": 1.5982, "step": 1427 }, { "epoch": 0.19489559164733178, "grad_norm": 0.18528436124324799, "learning_rate": 8e-05, "loss": 1.5983, "step": 1428 }, { "epoch": 0.19503207315408763, "grad_norm": 0.18001489341259003, "learning_rate": 8e-05, "loss": 1.577, "step": 1429 }, { "epoch": 0.19516855466084346, "grad_norm": 0.1796899288892746, "learning_rate": 8e-05, "loss": 1.5733, "step": 1430 }, { "epoch": 0.19530503616759928, "grad_norm": 0.18439553678035736, "learning_rate": 8e-05, "loss": 1.6148, "step": 1431 }, { "epoch": 0.19544151767435514, "grad_norm": 0.18113921582698822, "learning_rate": 8e-05, "loss": 1.588, "step": 1432 }, { "epoch": 0.19557799918111096, "grad_norm": 0.19196051359176636, "learning_rate": 8e-05, "loss": 1.6416, "step": 1433 }, { "epoch": 0.1957144806878668, "grad_norm": 0.17708393931388855, "learning_rate": 8e-05, "loss": 1.5517, "step": 1434 }, { "epoch": 0.19585096219462264, "grad_norm": 0.18627843260765076, "learning_rate": 8e-05, "loss": 1.6045, "step": 1435 }, { "epoch": 0.19598744370137847, "grad_norm": 0.18533670902252197, "learning_rate": 8e-05, "loss": 1.5846, "step": 1436 }, { "epoch": 0.1961239252081343, "grad_norm": 0.18477529287338257, "learning_rate": 8e-05, "loss": 1.565, "step": 1437 }, { "epoch": 0.19626040671489015, "grad_norm": 0.1898396909236908, "learning_rate": 8e-05, "loss": 1.5633, "step": 1438 }, { "epoch": 0.19639688822164597, "grad_norm": 0.18457242846488953, "learning_rate": 8e-05, "loss": 1.6306, "step": 1439 }, { "epoch": 0.1965333697284018, "grad_norm": 0.19674451649188995, "learning_rate": 8e-05, "loss": 1.5266, "step": 1440 }, { "epoch": 0.19666985123515765, "grad_norm": 0.1788880079984665, "learning_rate": 8e-05, "loss": 1.5564, "step": 1441 }, { "epoch": 0.19680633274191348, "grad_norm": 0.19101595878601074, "learning_rate": 8e-05, "loss": 1.6423, "step": 1442 }, { "epoch": 0.1969428142486693, "grad_norm": 0.1784212589263916, "learning_rate": 8e-05, "loss": 1.5565, "step": 1443 }, { "epoch": 0.19707929575542513, "grad_norm": 0.18406438827514648, "learning_rate": 8e-05, "loss": 1.6147, "step": 1444 }, { "epoch": 0.19721577726218098, "grad_norm": 0.19459623098373413, "learning_rate": 8e-05, "loss": 1.6433, "step": 1445 }, { "epoch": 0.1973522587689368, "grad_norm": 0.17270679771900177, "learning_rate": 8e-05, "loss": 1.5593, "step": 1446 }, { "epoch": 0.19748874027569263, "grad_norm": 0.1803906261920929, "learning_rate": 8e-05, "loss": 1.5253, "step": 1447 }, { "epoch": 0.19762522178244848, "grad_norm": 0.1875525414943695, "learning_rate": 8e-05, "loss": 1.5995, "step": 1448 }, { "epoch": 0.1977617032892043, "grad_norm": 0.19411605596542358, "learning_rate": 8e-05, "loss": 1.6321, "step": 1449 }, { "epoch": 0.19789818479596014, "grad_norm": 0.18178801238536835, "learning_rate": 8e-05, "loss": 1.5548, "step": 1450 }, { "epoch": 0.198034666302716, "grad_norm": 0.18365299701690674, "learning_rate": 8e-05, "loss": 1.5959, "step": 1451 }, { "epoch": 0.19817114780947181, "grad_norm": 0.18477144837379456, "learning_rate": 8e-05, "loss": 1.5584, "step": 1452 }, { "epoch": 0.19830762931622764, "grad_norm": 0.18778949975967407, "learning_rate": 8e-05, "loss": 1.6385, "step": 1453 }, { "epoch": 0.1984441108229835, "grad_norm": 0.1929863840341568, "learning_rate": 8e-05, "loss": 1.6051, "step": 1454 }, { "epoch": 0.19858059232973932, "grad_norm": 0.1916200965642929, "learning_rate": 8e-05, "loss": 1.5693, "step": 1455 }, { "epoch": 0.19871707383649515, "grad_norm": 0.18464729189872742, "learning_rate": 8e-05, "loss": 1.6588, "step": 1456 }, { "epoch": 0.198853555343251, "grad_norm": 0.1821947544813156, "learning_rate": 8e-05, "loss": 1.6237, "step": 1457 }, { "epoch": 0.19899003685000682, "grad_norm": 0.19039857387542725, "learning_rate": 8e-05, "loss": 1.6034, "step": 1458 }, { "epoch": 0.19912651835676265, "grad_norm": 0.1781628131866455, "learning_rate": 8e-05, "loss": 1.503, "step": 1459 }, { "epoch": 0.1992629998635185, "grad_norm": 0.18593347072601318, "learning_rate": 8e-05, "loss": 1.5386, "step": 1460 }, { "epoch": 0.19939948137027433, "grad_norm": 0.19762296974658966, "learning_rate": 8e-05, "loss": 1.6506, "step": 1461 }, { "epoch": 0.19953596287703015, "grad_norm": 0.1734614372253418, "learning_rate": 8e-05, "loss": 1.4897, "step": 1462 }, { "epoch": 0.199672444383786, "grad_norm": 0.18353652954101562, "learning_rate": 8e-05, "loss": 1.4828, "step": 1463 }, { "epoch": 0.19980892589054183, "grad_norm": 0.18647031486034393, "learning_rate": 8e-05, "loss": 1.6421, "step": 1464 }, { "epoch": 0.19994540739729766, "grad_norm": 0.17637072503566742, "learning_rate": 8e-05, "loss": 1.5307, "step": 1465 }, { "epoch": 0.2000818889040535, "grad_norm": 0.18160459399223328, "learning_rate": 8e-05, "loss": 1.6175, "step": 1466 }, { "epoch": 0.20021837041080934, "grad_norm": 0.185297429561615, "learning_rate": 8e-05, "loss": 1.6306, "step": 1467 }, { "epoch": 0.20035485191756516, "grad_norm": 0.18620960414409637, "learning_rate": 8e-05, "loss": 1.5991, "step": 1468 }, { "epoch": 0.20049133342432102, "grad_norm": 0.17840619385242462, "learning_rate": 8e-05, "loss": 1.559, "step": 1469 }, { "epoch": 0.20062781493107684, "grad_norm": 0.1843876987695694, "learning_rate": 8e-05, "loss": 1.5255, "step": 1470 }, { "epoch": 0.20076429643783267, "grad_norm": 0.18618100881576538, "learning_rate": 8e-05, "loss": 1.5604, "step": 1471 }, { "epoch": 0.20090077794458852, "grad_norm": 0.19297049939632416, "learning_rate": 8e-05, "loss": 1.5528, "step": 1472 }, { "epoch": 0.20103725945134435, "grad_norm": 0.1964980512857437, "learning_rate": 8e-05, "loss": 1.6083, "step": 1473 }, { "epoch": 0.20117374095810017, "grad_norm": 0.17532451450824738, "learning_rate": 8e-05, "loss": 1.6164, "step": 1474 }, { "epoch": 0.20131022246485603, "grad_norm": 0.19287341833114624, "learning_rate": 8e-05, "loss": 1.5363, "step": 1475 }, { "epoch": 0.20144670397161185, "grad_norm": 0.18532688915729523, "learning_rate": 8e-05, "loss": 1.5219, "step": 1476 }, { "epoch": 0.20158318547836768, "grad_norm": 0.18044430017471313, "learning_rate": 8e-05, "loss": 1.5565, "step": 1477 }, { "epoch": 0.2017196669851235, "grad_norm": 0.19123688340187073, "learning_rate": 8e-05, "loss": 1.5733, "step": 1478 }, { "epoch": 0.20185614849187936, "grad_norm": 0.18336538970470428, "learning_rate": 8e-05, "loss": 1.5159, "step": 1479 }, { "epoch": 0.20199262999863518, "grad_norm": 0.1817123144865036, "learning_rate": 8e-05, "loss": 1.5417, "step": 1480 }, { "epoch": 0.202129111505391, "grad_norm": 0.19311976432800293, "learning_rate": 8e-05, "loss": 1.5999, "step": 1481 }, { "epoch": 0.20226559301214686, "grad_norm": 0.1777079701423645, "learning_rate": 8e-05, "loss": 1.589, "step": 1482 }, { "epoch": 0.20240207451890269, "grad_norm": 0.17433413863182068, "learning_rate": 8e-05, "loss": 1.5651, "step": 1483 }, { "epoch": 0.2025385560256585, "grad_norm": 0.17603716254234314, "learning_rate": 8e-05, "loss": 1.5548, "step": 1484 }, { "epoch": 0.20267503753241436, "grad_norm": 0.17388878762722015, "learning_rate": 8e-05, "loss": 1.5054, "step": 1485 }, { "epoch": 0.2028115190391702, "grad_norm": 0.17641131579875946, "learning_rate": 8e-05, "loss": 1.5715, "step": 1486 }, { "epoch": 0.20294800054592602, "grad_norm": 0.1832215040922165, "learning_rate": 8e-05, "loss": 1.6561, "step": 1487 }, { "epoch": 0.20308448205268187, "grad_norm": 0.183164581656456, "learning_rate": 8e-05, "loss": 1.5961, "step": 1488 }, { "epoch": 0.2032209635594377, "grad_norm": 0.18779879808425903, "learning_rate": 8e-05, "loss": 1.6263, "step": 1489 }, { "epoch": 0.20335744506619352, "grad_norm": 0.17913123965263367, "learning_rate": 8e-05, "loss": 1.5698, "step": 1490 }, { "epoch": 0.20349392657294937, "grad_norm": 0.17858372628688812, "learning_rate": 8e-05, "loss": 1.5861, "step": 1491 }, { "epoch": 0.2036304080797052, "grad_norm": 0.18160991370677948, "learning_rate": 8e-05, "loss": 1.6053, "step": 1492 }, { "epoch": 0.20376688958646103, "grad_norm": 0.18251852691173553, "learning_rate": 8e-05, "loss": 1.6425, "step": 1493 }, { "epoch": 0.20390337109321688, "grad_norm": 0.17210716009140015, "learning_rate": 8e-05, "loss": 1.5618, "step": 1494 }, { "epoch": 0.2040398525999727, "grad_norm": 0.1758878231048584, "learning_rate": 8e-05, "loss": 1.5821, "step": 1495 }, { "epoch": 0.20417633410672853, "grad_norm": 0.18337933719158173, "learning_rate": 8e-05, "loss": 1.6693, "step": 1496 }, { "epoch": 0.20431281561348438, "grad_norm": 0.17764587700366974, "learning_rate": 8e-05, "loss": 1.4911, "step": 1497 }, { "epoch": 0.2044492971202402, "grad_norm": 0.18335145711898804, "learning_rate": 8e-05, "loss": 1.6109, "step": 1498 }, { "epoch": 0.20458577862699603, "grad_norm": 0.1805732250213623, "learning_rate": 8e-05, "loss": 1.5671, "step": 1499 }, { "epoch": 0.2047222601337519, "grad_norm": 0.1846403032541275, "learning_rate": 8e-05, "loss": 1.5964, "step": 1500 }, { "epoch": 0.2048587416405077, "grad_norm": 0.18115833401679993, "learning_rate": 8e-05, "loss": 1.5455, "step": 1501 }, { "epoch": 0.20499522314726354, "grad_norm": 0.18207481503486633, "learning_rate": 8e-05, "loss": 1.5806, "step": 1502 }, { "epoch": 0.2051317046540194, "grad_norm": 0.19865632057189941, "learning_rate": 8e-05, "loss": 1.579, "step": 1503 }, { "epoch": 0.20526818616077522, "grad_norm": 0.18002501130104065, "learning_rate": 8e-05, "loss": 1.6192, "step": 1504 }, { "epoch": 0.20540466766753104, "grad_norm": 0.19746966660022736, "learning_rate": 8e-05, "loss": 1.6286, "step": 1505 }, { "epoch": 0.2055411491742869, "grad_norm": 0.1856650859117508, "learning_rate": 8e-05, "loss": 1.6451, "step": 1506 }, { "epoch": 0.20567763068104272, "grad_norm": 0.18077273666858673, "learning_rate": 8e-05, "loss": 1.5799, "step": 1507 }, { "epoch": 0.20581411218779855, "grad_norm": 0.1812048852443695, "learning_rate": 8e-05, "loss": 1.5351, "step": 1508 }, { "epoch": 0.2059505936945544, "grad_norm": 0.1826622486114502, "learning_rate": 8e-05, "loss": 1.5731, "step": 1509 }, { "epoch": 0.20608707520131023, "grad_norm": 0.18820056319236755, "learning_rate": 8e-05, "loss": 1.5687, "step": 1510 }, { "epoch": 0.20622355670806605, "grad_norm": 0.17998521029949188, "learning_rate": 8e-05, "loss": 1.5749, "step": 1511 }, { "epoch": 0.20636003821482188, "grad_norm": 0.17936627566814423, "learning_rate": 8e-05, "loss": 1.5751, "step": 1512 }, { "epoch": 0.20649651972157773, "grad_norm": 0.17615634202957153, "learning_rate": 8e-05, "loss": 1.5345, "step": 1513 }, { "epoch": 0.20663300122833356, "grad_norm": 0.18781231343746185, "learning_rate": 8e-05, "loss": 1.6013, "step": 1514 }, { "epoch": 0.20676948273508938, "grad_norm": 0.174053356051445, "learning_rate": 8e-05, "loss": 1.5677, "step": 1515 }, { "epoch": 0.20690596424184524, "grad_norm": 0.1739943027496338, "learning_rate": 8e-05, "loss": 1.5526, "step": 1516 }, { "epoch": 0.20704244574860106, "grad_norm": 0.19110843539237976, "learning_rate": 8e-05, "loss": 1.5806, "step": 1517 }, { "epoch": 0.2071789272553569, "grad_norm": 0.18332581222057343, "learning_rate": 8e-05, "loss": 1.6264, "step": 1518 }, { "epoch": 0.20731540876211274, "grad_norm": 0.17619259655475616, "learning_rate": 8e-05, "loss": 1.5669, "step": 1519 }, { "epoch": 0.20745189026886857, "grad_norm": 0.18023639917373657, "learning_rate": 8e-05, "loss": 1.6248, "step": 1520 }, { "epoch": 0.2075883717756244, "grad_norm": 0.18323586881160736, "learning_rate": 8e-05, "loss": 1.536, "step": 1521 }, { "epoch": 0.20772485328238025, "grad_norm": 0.17915664613246918, "learning_rate": 8e-05, "loss": 1.5775, "step": 1522 }, { "epoch": 0.20786133478913607, "grad_norm": 0.1844443380832672, "learning_rate": 8e-05, "loss": 1.5854, "step": 1523 }, { "epoch": 0.2079978162958919, "grad_norm": 0.1829414814710617, "learning_rate": 8e-05, "loss": 1.5234, "step": 1524 }, { "epoch": 0.20813429780264775, "grad_norm": 0.18842652440071106, "learning_rate": 8e-05, "loss": 1.6359, "step": 1525 }, { "epoch": 0.20827077930940358, "grad_norm": 0.19175831973552704, "learning_rate": 8e-05, "loss": 1.5753, "step": 1526 }, { "epoch": 0.2084072608161594, "grad_norm": 0.18748562037944794, "learning_rate": 8e-05, "loss": 1.6429, "step": 1527 }, { "epoch": 0.20854374232291525, "grad_norm": 0.18532401323318481, "learning_rate": 8e-05, "loss": 1.5994, "step": 1528 }, { "epoch": 0.20868022382967108, "grad_norm": 0.18803122639656067, "learning_rate": 8e-05, "loss": 1.5104, "step": 1529 }, { "epoch": 0.2088167053364269, "grad_norm": 0.1791721135377884, "learning_rate": 8e-05, "loss": 1.5574, "step": 1530 }, { "epoch": 0.20895318684318276, "grad_norm": 0.1932784765958786, "learning_rate": 8e-05, "loss": 1.6243, "step": 1531 }, { "epoch": 0.20908966834993858, "grad_norm": 0.18730276823043823, "learning_rate": 8e-05, "loss": 1.6431, "step": 1532 }, { "epoch": 0.2092261498566944, "grad_norm": 0.17879685759544373, "learning_rate": 8e-05, "loss": 1.4858, "step": 1533 }, { "epoch": 0.20936263136345026, "grad_norm": 0.1810392588376999, "learning_rate": 8e-05, "loss": 1.5718, "step": 1534 }, { "epoch": 0.2094991128702061, "grad_norm": 0.1829460859298706, "learning_rate": 8e-05, "loss": 1.5643, "step": 1535 }, { "epoch": 0.20963559437696191, "grad_norm": 0.1807374358177185, "learning_rate": 8e-05, "loss": 1.5539, "step": 1536 }, { "epoch": 0.20977207588371777, "grad_norm": 0.18087249994277954, "learning_rate": 8e-05, "loss": 1.5843, "step": 1537 }, { "epoch": 0.2099085573904736, "grad_norm": 0.17787086963653564, "learning_rate": 8e-05, "loss": 1.5498, "step": 1538 }, { "epoch": 0.21004503889722942, "grad_norm": 0.18385079503059387, "learning_rate": 8e-05, "loss": 1.6346, "step": 1539 }, { "epoch": 0.21018152040398527, "grad_norm": 0.1800709366798401, "learning_rate": 8e-05, "loss": 1.6858, "step": 1540 }, { "epoch": 0.2103180019107411, "grad_norm": 0.17953120172023773, "learning_rate": 8e-05, "loss": 1.5423, "step": 1541 }, { "epoch": 0.21045448341749692, "grad_norm": 0.18363916873931885, "learning_rate": 8e-05, "loss": 1.6179, "step": 1542 }, { "epoch": 0.21059096492425278, "grad_norm": 0.18143697082996368, "learning_rate": 8e-05, "loss": 1.5613, "step": 1543 }, { "epoch": 0.2107274464310086, "grad_norm": 0.18854306638240814, "learning_rate": 8e-05, "loss": 1.5614, "step": 1544 }, { "epoch": 0.21086392793776443, "grad_norm": 0.18977026641368866, "learning_rate": 8e-05, "loss": 1.5859, "step": 1545 }, { "epoch": 0.21100040944452025, "grad_norm": 0.18302160501480103, "learning_rate": 8e-05, "loss": 1.62, "step": 1546 }, { "epoch": 0.2111368909512761, "grad_norm": 0.18566632270812988, "learning_rate": 8e-05, "loss": 1.6041, "step": 1547 }, { "epoch": 0.21127337245803193, "grad_norm": 0.18153445422649384, "learning_rate": 8e-05, "loss": 1.5318, "step": 1548 }, { "epoch": 0.21140985396478776, "grad_norm": 0.18662768602371216, "learning_rate": 8e-05, "loss": 1.5883, "step": 1549 }, { "epoch": 0.2115463354715436, "grad_norm": 0.18222737312316895, "learning_rate": 8e-05, "loss": 1.568, "step": 1550 }, { "epoch": 0.21168281697829944, "grad_norm": 0.18449488282203674, "learning_rate": 8e-05, "loss": 1.5504, "step": 1551 }, { "epoch": 0.21181929848505526, "grad_norm": 0.17821605503559113, "learning_rate": 8e-05, "loss": 1.5229, "step": 1552 }, { "epoch": 0.21195577999181112, "grad_norm": 0.18590347468852997, "learning_rate": 8e-05, "loss": 1.5472, "step": 1553 }, { "epoch": 0.21209226149856694, "grad_norm": 0.18014498054981232, "learning_rate": 8e-05, "loss": 1.5367, "step": 1554 }, { "epoch": 0.21222874300532277, "grad_norm": 0.19307273626327515, "learning_rate": 8e-05, "loss": 1.5766, "step": 1555 }, { "epoch": 0.21236522451207862, "grad_norm": 0.18647314608097076, "learning_rate": 8e-05, "loss": 1.6199, "step": 1556 }, { "epoch": 0.21250170601883445, "grad_norm": 0.18725787103176117, "learning_rate": 8e-05, "loss": 1.6083, "step": 1557 }, { "epoch": 0.21263818752559027, "grad_norm": 0.1864556521177292, "learning_rate": 8e-05, "loss": 1.5702, "step": 1558 }, { "epoch": 0.21277466903234613, "grad_norm": 0.17538176476955414, "learning_rate": 8e-05, "loss": 1.5471, "step": 1559 }, { "epoch": 0.21291115053910195, "grad_norm": 0.1799948811531067, "learning_rate": 8e-05, "loss": 1.4613, "step": 1560 }, { "epoch": 0.21304763204585778, "grad_norm": 0.18626180291175842, "learning_rate": 8e-05, "loss": 1.5782, "step": 1561 }, { "epoch": 0.21318411355261363, "grad_norm": 0.1895618587732315, "learning_rate": 8e-05, "loss": 1.4661, "step": 1562 }, { "epoch": 0.21332059505936946, "grad_norm": 0.20060861110687256, "learning_rate": 8e-05, "loss": 1.6723, "step": 1563 }, { "epoch": 0.21345707656612528, "grad_norm": 0.1764329969882965, "learning_rate": 8e-05, "loss": 1.5655, "step": 1564 }, { "epoch": 0.21359355807288113, "grad_norm": 0.185940220952034, "learning_rate": 8e-05, "loss": 1.5785, "step": 1565 }, { "epoch": 0.21373003957963696, "grad_norm": 0.18389998376369476, "learning_rate": 8e-05, "loss": 1.5572, "step": 1566 }, { "epoch": 0.21386652108639279, "grad_norm": 0.1898891031742096, "learning_rate": 8e-05, "loss": 1.6052, "step": 1567 }, { "epoch": 0.21400300259314864, "grad_norm": 0.18268591165542603, "learning_rate": 8e-05, "loss": 1.6123, "step": 1568 }, { "epoch": 0.21413948409990446, "grad_norm": 0.18448932468891144, "learning_rate": 8e-05, "loss": 1.5656, "step": 1569 }, { "epoch": 0.2142759656066603, "grad_norm": 0.1885816603899002, "learning_rate": 8e-05, "loss": 1.6011, "step": 1570 }, { "epoch": 0.21441244711341614, "grad_norm": 0.19512155652046204, "learning_rate": 8e-05, "loss": 1.6871, "step": 1571 }, { "epoch": 0.21454892862017197, "grad_norm": 0.18985328078269958, "learning_rate": 8e-05, "loss": 1.6215, "step": 1572 }, { "epoch": 0.2146854101269278, "grad_norm": 0.19342605769634247, "learning_rate": 8e-05, "loss": 1.6147, "step": 1573 }, { "epoch": 0.21482189163368365, "grad_norm": 0.184814453125, "learning_rate": 8e-05, "loss": 1.5641, "step": 1574 }, { "epoch": 0.21495837314043947, "grad_norm": 0.1896548569202423, "learning_rate": 8e-05, "loss": 1.5218, "step": 1575 }, { "epoch": 0.2150948546471953, "grad_norm": 0.18027977645397186, "learning_rate": 8e-05, "loss": 1.5824, "step": 1576 }, { "epoch": 0.21523133615395115, "grad_norm": 0.17841024696826935, "learning_rate": 8e-05, "loss": 1.5568, "step": 1577 }, { "epoch": 0.21536781766070698, "grad_norm": 0.18876133859157562, "learning_rate": 8e-05, "loss": 1.6052, "step": 1578 }, { "epoch": 0.2155042991674628, "grad_norm": 0.19057638943195343, "learning_rate": 8e-05, "loss": 1.6539, "step": 1579 }, { "epoch": 0.21564078067421863, "grad_norm": 0.18067972362041473, "learning_rate": 8e-05, "loss": 1.5582, "step": 1580 }, { "epoch": 0.21577726218097448, "grad_norm": 0.19346143305301666, "learning_rate": 8e-05, "loss": 1.6287, "step": 1581 }, { "epoch": 0.2159137436877303, "grad_norm": 0.1792355626821518, "learning_rate": 8e-05, "loss": 1.6181, "step": 1582 }, { "epoch": 0.21605022519448613, "grad_norm": 0.19466540217399597, "learning_rate": 8e-05, "loss": 1.5735, "step": 1583 }, { "epoch": 0.216186706701242, "grad_norm": 0.1889699399471283, "learning_rate": 8e-05, "loss": 1.4937, "step": 1584 }, { "epoch": 0.2163231882079978, "grad_norm": 0.1760951727628708, "learning_rate": 8e-05, "loss": 1.5711, "step": 1585 }, { "epoch": 0.21645966971475364, "grad_norm": 0.20095914602279663, "learning_rate": 8e-05, "loss": 1.5902, "step": 1586 }, { "epoch": 0.2165961512215095, "grad_norm": 0.18775932490825653, "learning_rate": 8e-05, "loss": 1.4952, "step": 1587 }, { "epoch": 0.21673263272826532, "grad_norm": 0.18992333114147186, "learning_rate": 8e-05, "loss": 1.6199, "step": 1588 }, { "epoch": 0.21686911423502114, "grad_norm": 0.18613582849502563, "learning_rate": 8e-05, "loss": 1.5408, "step": 1589 }, { "epoch": 0.217005595741777, "grad_norm": 0.19453835487365723, "learning_rate": 8e-05, "loss": 1.6378, "step": 1590 }, { "epoch": 0.21714207724853282, "grad_norm": 0.17925287783145905, "learning_rate": 8e-05, "loss": 1.5504, "step": 1591 }, { "epoch": 0.21727855875528865, "grad_norm": 0.19244016706943512, "learning_rate": 8e-05, "loss": 1.623, "step": 1592 }, { "epoch": 0.2174150402620445, "grad_norm": 0.18168294429779053, "learning_rate": 8e-05, "loss": 1.5767, "step": 1593 }, { "epoch": 0.21755152176880033, "grad_norm": 0.18998073041439056, "learning_rate": 8e-05, "loss": 1.5795, "step": 1594 }, { "epoch": 0.21768800327555615, "grad_norm": 0.18540899455547333, "learning_rate": 8e-05, "loss": 1.5832, "step": 1595 }, { "epoch": 0.217824484782312, "grad_norm": 0.179458886384964, "learning_rate": 8e-05, "loss": 1.5175, "step": 1596 }, { "epoch": 0.21796096628906783, "grad_norm": 0.185092031955719, "learning_rate": 8e-05, "loss": 1.5238, "step": 1597 }, { "epoch": 0.21809744779582366, "grad_norm": 0.18470264971256256, "learning_rate": 8e-05, "loss": 1.5739, "step": 1598 }, { "epoch": 0.2182339293025795, "grad_norm": 0.1853020042181015, "learning_rate": 8e-05, "loss": 1.5878, "step": 1599 }, { "epoch": 0.21837041080933534, "grad_norm": 0.1860482096672058, "learning_rate": 8e-05, "loss": 1.5906, "step": 1600 }, { "epoch": 0.21850689231609116, "grad_norm": 0.18579822778701782, "learning_rate": 8e-05, "loss": 1.597, "step": 1601 }, { "epoch": 0.21864337382284701, "grad_norm": 0.17685216665267944, "learning_rate": 8e-05, "loss": 1.586, "step": 1602 }, { "epoch": 0.21877985532960284, "grad_norm": 0.1827811300754547, "learning_rate": 8e-05, "loss": 1.5486, "step": 1603 }, { "epoch": 0.21891633683635867, "grad_norm": 0.18281586468219757, "learning_rate": 8e-05, "loss": 1.5609, "step": 1604 }, { "epoch": 0.21905281834311452, "grad_norm": 0.18391123414039612, "learning_rate": 8e-05, "loss": 1.6639, "step": 1605 }, { "epoch": 0.21918929984987034, "grad_norm": 0.17738930881023407, "learning_rate": 8e-05, "loss": 1.5435, "step": 1606 }, { "epoch": 0.21932578135662617, "grad_norm": 0.18947827816009521, "learning_rate": 8e-05, "loss": 1.5785, "step": 1607 }, { "epoch": 0.21946226286338202, "grad_norm": 0.1755830943584442, "learning_rate": 8e-05, "loss": 1.5044, "step": 1608 }, { "epoch": 0.21959874437013785, "grad_norm": 0.18073181807994843, "learning_rate": 8e-05, "loss": 1.5401, "step": 1609 }, { "epoch": 0.21973522587689368, "grad_norm": 0.1794690489768982, "learning_rate": 8e-05, "loss": 1.5518, "step": 1610 }, { "epoch": 0.21987170738364953, "grad_norm": 0.1837550550699234, "learning_rate": 8e-05, "loss": 1.6088, "step": 1611 }, { "epoch": 0.22000818889040535, "grad_norm": 0.18821464478969574, "learning_rate": 8e-05, "loss": 1.5675, "step": 1612 }, { "epoch": 0.22014467039716118, "grad_norm": 0.1863572895526886, "learning_rate": 8e-05, "loss": 1.5586, "step": 1613 }, { "epoch": 0.22028115190391703, "grad_norm": 0.17708463966846466, "learning_rate": 8e-05, "loss": 1.5177, "step": 1614 }, { "epoch": 0.22041763341067286, "grad_norm": 0.17648367583751678, "learning_rate": 8e-05, "loss": 1.5344, "step": 1615 }, { "epoch": 0.22055411491742868, "grad_norm": 0.18008606135845184, "learning_rate": 8e-05, "loss": 1.5645, "step": 1616 }, { "epoch": 0.2206905964241845, "grad_norm": 0.17940223217010498, "learning_rate": 8e-05, "loss": 1.541, "step": 1617 }, { "epoch": 0.22082707793094036, "grad_norm": 0.18638667464256287, "learning_rate": 8e-05, "loss": 1.579, "step": 1618 }, { "epoch": 0.2209635594376962, "grad_norm": 0.18122965097427368, "learning_rate": 8e-05, "loss": 1.5054, "step": 1619 }, { "epoch": 0.22110004094445201, "grad_norm": 0.17930862307548523, "learning_rate": 8e-05, "loss": 1.4994, "step": 1620 }, { "epoch": 0.22123652245120787, "grad_norm": 0.18551065027713776, "learning_rate": 8e-05, "loss": 1.5883, "step": 1621 }, { "epoch": 0.2213730039579637, "grad_norm": 0.18968766927719116, "learning_rate": 8e-05, "loss": 1.6192, "step": 1622 }, { "epoch": 0.22150948546471952, "grad_norm": 0.18144211173057556, "learning_rate": 8e-05, "loss": 1.6039, "step": 1623 }, { "epoch": 0.22164596697147537, "grad_norm": 0.19915767014026642, "learning_rate": 8e-05, "loss": 1.6332, "step": 1624 }, { "epoch": 0.2217824484782312, "grad_norm": 0.18128690123558044, "learning_rate": 8e-05, "loss": 1.5864, "step": 1625 }, { "epoch": 0.22191892998498702, "grad_norm": 0.18224012851715088, "learning_rate": 8e-05, "loss": 1.5499, "step": 1626 }, { "epoch": 0.22205541149174288, "grad_norm": 0.18859682977199554, "learning_rate": 8e-05, "loss": 1.5924, "step": 1627 }, { "epoch": 0.2221918929984987, "grad_norm": 0.1913030445575714, "learning_rate": 8e-05, "loss": 1.6293, "step": 1628 }, { "epoch": 0.22232837450525453, "grad_norm": 0.1801142692565918, "learning_rate": 8e-05, "loss": 1.5826, "step": 1629 }, { "epoch": 0.22246485601201038, "grad_norm": 0.18184086680412292, "learning_rate": 8e-05, "loss": 1.5923, "step": 1630 }, { "epoch": 0.2226013375187662, "grad_norm": 0.18206322193145752, "learning_rate": 8e-05, "loss": 1.6629, "step": 1631 }, { "epoch": 0.22273781902552203, "grad_norm": 0.18237844109535217, "learning_rate": 8e-05, "loss": 1.5355, "step": 1632 }, { "epoch": 0.22287430053227789, "grad_norm": 0.18598276376724243, "learning_rate": 8e-05, "loss": 1.6273, "step": 1633 }, { "epoch": 0.2230107820390337, "grad_norm": 0.18124960362911224, "learning_rate": 8e-05, "loss": 1.6666, "step": 1634 }, { "epoch": 0.22314726354578954, "grad_norm": 0.18319760262966156, "learning_rate": 8e-05, "loss": 1.5884, "step": 1635 }, { "epoch": 0.2232837450525454, "grad_norm": 0.1866164356470108, "learning_rate": 8e-05, "loss": 1.6187, "step": 1636 }, { "epoch": 0.22342022655930122, "grad_norm": 0.1916627287864685, "learning_rate": 8e-05, "loss": 1.6515, "step": 1637 }, { "epoch": 0.22355670806605704, "grad_norm": 0.19214844703674316, "learning_rate": 8e-05, "loss": 1.6045, "step": 1638 }, { "epoch": 0.2236931895728129, "grad_norm": 0.189527690410614, "learning_rate": 8e-05, "loss": 1.5885, "step": 1639 }, { "epoch": 0.22382967107956872, "grad_norm": 0.19260188937187195, "learning_rate": 8e-05, "loss": 1.6103, "step": 1640 }, { "epoch": 0.22396615258632455, "grad_norm": 0.18151207268238068, "learning_rate": 8e-05, "loss": 1.5554, "step": 1641 }, { "epoch": 0.2241026340930804, "grad_norm": 0.18015263974666595, "learning_rate": 8e-05, "loss": 1.543, "step": 1642 }, { "epoch": 0.22423911559983623, "grad_norm": 0.17515723407268524, "learning_rate": 8e-05, "loss": 1.4826, "step": 1643 }, { "epoch": 0.22437559710659205, "grad_norm": 0.2210821658372879, "learning_rate": 8e-05, "loss": 1.6734, "step": 1644 }, { "epoch": 0.2245120786133479, "grad_norm": 0.17541536688804626, "learning_rate": 8e-05, "loss": 1.4722, "step": 1645 }, { "epoch": 0.22464856012010373, "grad_norm": 0.18410120904445648, "learning_rate": 8e-05, "loss": 1.5478, "step": 1646 }, { "epoch": 0.22478504162685956, "grad_norm": 0.18484385311603546, "learning_rate": 8e-05, "loss": 1.5784, "step": 1647 }, { "epoch": 0.2249215231336154, "grad_norm": 0.18690799176692963, "learning_rate": 8e-05, "loss": 1.5366, "step": 1648 }, { "epoch": 0.22505800464037123, "grad_norm": 0.18821720778942108, "learning_rate": 8e-05, "loss": 1.6194, "step": 1649 }, { "epoch": 0.22519448614712706, "grad_norm": 0.1808300018310547, "learning_rate": 8e-05, "loss": 1.5815, "step": 1650 }, { "epoch": 0.22533096765388289, "grad_norm": 0.18520498275756836, "learning_rate": 8e-05, "loss": 1.5865, "step": 1651 }, { "epoch": 0.22546744916063874, "grad_norm": 0.1838509738445282, "learning_rate": 8e-05, "loss": 1.5299, "step": 1652 }, { "epoch": 0.22560393066739456, "grad_norm": 0.18807122111320496, "learning_rate": 8e-05, "loss": 1.6612, "step": 1653 }, { "epoch": 0.2257404121741504, "grad_norm": 0.18304729461669922, "learning_rate": 8e-05, "loss": 1.5478, "step": 1654 }, { "epoch": 0.22587689368090624, "grad_norm": 0.17820486426353455, "learning_rate": 8e-05, "loss": 1.5461, "step": 1655 }, { "epoch": 0.22601337518766207, "grad_norm": 0.19268783926963806, "learning_rate": 8e-05, "loss": 1.6725, "step": 1656 }, { "epoch": 0.2261498566944179, "grad_norm": 0.17507818341255188, "learning_rate": 8e-05, "loss": 1.5209, "step": 1657 }, { "epoch": 0.22628633820117375, "grad_norm": 0.18350698053836823, "learning_rate": 8e-05, "loss": 1.5497, "step": 1658 }, { "epoch": 0.22642281970792957, "grad_norm": 0.18189282715320587, "learning_rate": 8e-05, "loss": 1.5553, "step": 1659 }, { "epoch": 0.2265593012146854, "grad_norm": 0.18441444635391235, "learning_rate": 8e-05, "loss": 1.562, "step": 1660 }, { "epoch": 0.22669578272144125, "grad_norm": 0.18575315177440643, "learning_rate": 8e-05, "loss": 1.5492, "step": 1661 }, { "epoch": 0.22683226422819708, "grad_norm": 0.18621037900447845, "learning_rate": 8e-05, "loss": 1.5294, "step": 1662 }, { "epoch": 0.2269687457349529, "grad_norm": 0.18469302356243134, "learning_rate": 8e-05, "loss": 1.5702, "step": 1663 }, { "epoch": 0.22710522724170876, "grad_norm": 0.1926560252904892, "learning_rate": 8e-05, "loss": 1.5972, "step": 1664 }, { "epoch": 0.22724170874846458, "grad_norm": 0.1905696988105774, "learning_rate": 8e-05, "loss": 1.6119, "step": 1665 }, { "epoch": 0.2273781902552204, "grad_norm": 0.1913861632347107, "learning_rate": 8e-05, "loss": 1.5963, "step": 1666 }, { "epoch": 0.22751467176197626, "grad_norm": 0.19802449643611908, "learning_rate": 8e-05, "loss": 1.5716, "step": 1667 }, { "epoch": 0.2276511532687321, "grad_norm": 0.18814772367477417, "learning_rate": 8e-05, "loss": 1.5731, "step": 1668 }, { "epoch": 0.2277876347754879, "grad_norm": 0.19677011668682098, "learning_rate": 8e-05, "loss": 1.5641, "step": 1669 }, { "epoch": 0.22792411628224377, "grad_norm": 0.19284853339195251, "learning_rate": 8e-05, "loss": 1.6204, "step": 1670 }, { "epoch": 0.2280605977889996, "grad_norm": 0.17543110251426697, "learning_rate": 8e-05, "loss": 1.5528, "step": 1671 }, { "epoch": 0.22819707929575542, "grad_norm": 0.1940460354089737, "learning_rate": 8e-05, "loss": 1.5794, "step": 1672 }, { "epoch": 0.22833356080251127, "grad_norm": 0.19870352745056152, "learning_rate": 8e-05, "loss": 1.5732, "step": 1673 }, { "epoch": 0.2284700423092671, "grad_norm": 0.19045959413051605, "learning_rate": 8e-05, "loss": 1.5823, "step": 1674 }, { "epoch": 0.22860652381602292, "grad_norm": 0.19584600627422333, "learning_rate": 8e-05, "loss": 1.6097, "step": 1675 }, { "epoch": 0.22874300532277878, "grad_norm": 0.1913304328918457, "learning_rate": 8e-05, "loss": 1.6193, "step": 1676 }, { "epoch": 0.2288794868295346, "grad_norm": 0.17441055178642273, "learning_rate": 8e-05, "loss": 1.5261, "step": 1677 }, { "epoch": 0.22901596833629043, "grad_norm": 0.19493237137794495, "learning_rate": 8e-05, "loss": 1.5439, "step": 1678 }, { "epoch": 0.22915244984304628, "grad_norm": 0.1975114345550537, "learning_rate": 8e-05, "loss": 1.6389, "step": 1679 }, { "epoch": 0.2292889313498021, "grad_norm": 0.18074315786361694, "learning_rate": 8e-05, "loss": 1.5748, "step": 1680 }, { "epoch": 0.22942541285655793, "grad_norm": 0.19960017502307892, "learning_rate": 8e-05, "loss": 1.6028, "step": 1681 }, { "epoch": 0.22956189436331378, "grad_norm": 0.17777031660079956, "learning_rate": 8e-05, "loss": 1.5553, "step": 1682 }, { "epoch": 0.2296983758700696, "grad_norm": 0.18749453127384186, "learning_rate": 8e-05, "loss": 1.5826, "step": 1683 }, { "epoch": 0.22983485737682544, "grad_norm": 0.18549850583076477, "learning_rate": 8e-05, "loss": 1.6022, "step": 1684 }, { "epoch": 0.22997133888358126, "grad_norm": 0.18856392800807953, "learning_rate": 8e-05, "loss": 1.5913, "step": 1685 }, { "epoch": 0.23010782039033711, "grad_norm": 0.18631874024868011, "learning_rate": 8e-05, "loss": 1.6017, "step": 1686 }, { "epoch": 0.23024430189709294, "grad_norm": 0.18141521513462067, "learning_rate": 8e-05, "loss": 1.5771, "step": 1687 }, { "epoch": 0.23038078340384877, "grad_norm": 0.18833263218402863, "learning_rate": 8e-05, "loss": 1.5988, "step": 1688 }, { "epoch": 0.23051726491060462, "grad_norm": 0.17912141978740692, "learning_rate": 8e-05, "loss": 1.5363, "step": 1689 }, { "epoch": 0.23065374641736044, "grad_norm": 0.17798754572868347, "learning_rate": 8e-05, "loss": 1.5971, "step": 1690 }, { "epoch": 0.23079022792411627, "grad_norm": 0.192842498421669, "learning_rate": 8e-05, "loss": 1.6131, "step": 1691 }, { "epoch": 0.23092670943087212, "grad_norm": 0.18777163326740265, "learning_rate": 8e-05, "loss": 1.6236, "step": 1692 }, { "epoch": 0.23106319093762795, "grad_norm": 0.2039753496646881, "learning_rate": 8e-05, "loss": 1.6215, "step": 1693 }, { "epoch": 0.23119967244438377, "grad_norm": 0.1855362504720688, "learning_rate": 8e-05, "loss": 1.5367, "step": 1694 }, { "epoch": 0.23133615395113963, "grad_norm": 0.18319478631019592, "learning_rate": 8e-05, "loss": 1.6745, "step": 1695 }, { "epoch": 0.23147263545789545, "grad_norm": 0.19037805497646332, "learning_rate": 8e-05, "loss": 1.5054, "step": 1696 }, { "epoch": 0.23160911696465128, "grad_norm": 0.19100148975849152, "learning_rate": 8e-05, "loss": 1.5849, "step": 1697 }, { "epoch": 0.23174559847140713, "grad_norm": 0.18612314760684967, "learning_rate": 8e-05, "loss": 1.6026, "step": 1698 }, { "epoch": 0.23188207997816296, "grad_norm": 0.1895783245563507, "learning_rate": 8e-05, "loss": 1.5919, "step": 1699 }, { "epoch": 0.23201856148491878, "grad_norm": 0.18185438215732574, "learning_rate": 8e-05, "loss": 1.4859, "step": 1700 }, { "epoch": 0.23215504299167464, "grad_norm": 0.18597343564033508, "learning_rate": 8e-05, "loss": 1.5924, "step": 1701 }, { "epoch": 0.23229152449843046, "grad_norm": 0.18556249141693115, "learning_rate": 8e-05, "loss": 1.6016, "step": 1702 }, { "epoch": 0.2324280060051863, "grad_norm": 0.17800599336624146, "learning_rate": 8e-05, "loss": 1.485, "step": 1703 }, { "epoch": 0.23256448751194214, "grad_norm": 0.1902007907629013, "learning_rate": 8e-05, "loss": 1.561, "step": 1704 }, { "epoch": 0.23270096901869797, "grad_norm": 0.18084144592285156, "learning_rate": 8e-05, "loss": 1.4766, "step": 1705 }, { "epoch": 0.2328374505254538, "grad_norm": 0.18228298425674438, "learning_rate": 8e-05, "loss": 1.6214, "step": 1706 }, { "epoch": 0.23297393203220965, "grad_norm": 0.18154548108577728, "learning_rate": 8e-05, "loss": 1.559, "step": 1707 }, { "epoch": 0.23311041353896547, "grad_norm": 0.19297222793102264, "learning_rate": 8e-05, "loss": 1.5947, "step": 1708 }, { "epoch": 0.2332468950457213, "grad_norm": 0.18998752534389496, "learning_rate": 8e-05, "loss": 1.6204, "step": 1709 }, { "epoch": 0.23338337655247715, "grad_norm": 0.18729344010353088, "learning_rate": 8e-05, "loss": 1.5876, "step": 1710 }, { "epoch": 0.23351985805923298, "grad_norm": 0.1814780831336975, "learning_rate": 8e-05, "loss": 1.5308, "step": 1711 }, { "epoch": 0.2336563395659888, "grad_norm": 0.1783227175474167, "learning_rate": 8e-05, "loss": 1.4944, "step": 1712 }, { "epoch": 0.23379282107274466, "grad_norm": 0.17750617861747742, "learning_rate": 8e-05, "loss": 1.5669, "step": 1713 }, { "epoch": 0.23392930257950048, "grad_norm": 0.18692000210285187, "learning_rate": 8e-05, "loss": 1.581, "step": 1714 }, { "epoch": 0.2340657840862563, "grad_norm": 0.19738642871379852, "learning_rate": 8e-05, "loss": 1.658, "step": 1715 }, { "epoch": 0.23420226559301216, "grad_norm": 0.18085099756717682, "learning_rate": 8e-05, "loss": 1.5419, "step": 1716 }, { "epoch": 0.23433874709976799, "grad_norm": 0.18681450188159943, "learning_rate": 8e-05, "loss": 1.5564, "step": 1717 }, { "epoch": 0.2344752286065238, "grad_norm": 0.18711727857589722, "learning_rate": 8e-05, "loss": 1.5845, "step": 1718 }, { "epoch": 0.23461171011327964, "grad_norm": 0.17836855351924896, "learning_rate": 8e-05, "loss": 1.5297, "step": 1719 }, { "epoch": 0.2347481916200355, "grad_norm": 0.18290987610816956, "learning_rate": 8e-05, "loss": 1.5947, "step": 1720 }, { "epoch": 0.23488467312679132, "grad_norm": 0.1832231879234314, "learning_rate": 8e-05, "loss": 1.5779, "step": 1721 }, { "epoch": 0.23502115463354714, "grad_norm": 0.18527549505233765, "learning_rate": 8e-05, "loss": 1.5402, "step": 1722 }, { "epoch": 0.235157636140303, "grad_norm": 0.18735694885253906, "learning_rate": 8e-05, "loss": 1.6025, "step": 1723 }, { "epoch": 0.23529411764705882, "grad_norm": 0.1856762021780014, "learning_rate": 8e-05, "loss": 1.5542, "step": 1724 }, { "epoch": 0.23543059915381465, "grad_norm": 0.18207597732543945, "learning_rate": 8e-05, "loss": 1.5709, "step": 1725 }, { "epoch": 0.2355670806605705, "grad_norm": 0.18174079060554504, "learning_rate": 8e-05, "loss": 1.5285, "step": 1726 }, { "epoch": 0.23570356216732632, "grad_norm": 0.17819462716579437, "learning_rate": 8e-05, "loss": 1.5778, "step": 1727 }, { "epoch": 0.23584004367408215, "grad_norm": 0.1815337985754013, "learning_rate": 8e-05, "loss": 1.5385, "step": 1728 }, { "epoch": 0.235976525180838, "grad_norm": 0.18723762035369873, "learning_rate": 8e-05, "loss": 1.5462, "step": 1729 }, { "epoch": 0.23611300668759383, "grad_norm": 0.18837349116802216, "learning_rate": 8e-05, "loss": 1.5454, "step": 1730 }, { "epoch": 0.23624948819434965, "grad_norm": 0.1874261498451233, "learning_rate": 8e-05, "loss": 1.556, "step": 1731 }, { "epoch": 0.2363859697011055, "grad_norm": 0.1893012374639511, "learning_rate": 8e-05, "loss": 1.6241, "step": 1732 }, { "epoch": 0.23652245120786133, "grad_norm": 0.18179252743721008, "learning_rate": 8e-05, "loss": 1.5481, "step": 1733 }, { "epoch": 0.23665893271461716, "grad_norm": 0.18713226914405823, "learning_rate": 8e-05, "loss": 1.6427, "step": 1734 }, { "epoch": 0.236795414221373, "grad_norm": 0.1828542947769165, "learning_rate": 8e-05, "loss": 1.5939, "step": 1735 }, { "epoch": 0.23693189572812884, "grad_norm": 0.1856721192598343, "learning_rate": 8e-05, "loss": 1.6004, "step": 1736 }, { "epoch": 0.23706837723488466, "grad_norm": 0.17282211780548096, "learning_rate": 8e-05, "loss": 1.4907, "step": 1737 }, { "epoch": 0.23720485874164052, "grad_norm": 0.18401271104812622, "learning_rate": 8e-05, "loss": 1.5894, "step": 1738 }, { "epoch": 0.23734134024839634, "grad_norm": 0.18857260048389435, "learning_rate": 8e-05, "loss": 1.5991, "step": 1739 }, { "epoch": 0.23747782175515217, "grad_norm": 0.1815832406282425, "learning_rate": 8e-05, "loss": 1.5199, "step": 1740 }, { "epoch": 0.23761430326190802, "grad_norm": 0.18803003430366516, "learning_rate": 8e-05, "loss": 1.6241, "step": 1741 }, { "epoch": 0.23775078476866385, "grad_norm": 0.18170347809791565, "learning_rate": 8e-05, "loss": 1.5681, "step": 1742 }, { "epoch": 0.23788726627541967, "grad_norm": 0.18565987050533295, "learning_rate": 8e-05, "loss": 1.6161, "step": 1743 }, { "epoch": 0.23802374778217553, "grad_norm": 0.1861896961927414, "learning_rate": 8e-05, "loss": 1.5383, "step": 1744 }, { "epoch": 0.23816022928893135, "grad_norm": 0.18452812731266022, "learning_rate": 8e-05, "loss": 1.598, "step": 1745 }, { "epoch": 0.23829671079568718, "grad_norm": 0.18803074955940247, "learning_rate": 8e-05, "loss": 1.5837, "step": 1746 }, { "epoch": 0.23843319230244303, "grad_norm": 0.18187682330608368, "learning_rate": 8e-05, "loss": 1.5535, "step": 1747 }, { "epoch": 0.23856967380919886, "grad_norm": 0.1811991035938263, "learning_rate": 8e-05, "loss": 1.5395, "step": 1748 }, { "epoch": 0.23870615531595468, "grad_norm": 0.18209730088710785, "learning_rate": 8e-05, "loss": 1.5702, "step": 1749 }, { "epoch": 0.23884263682271054, "grad_norm": 0.17959095537662506, "learning_rate": 8e-05, "loss": 1.4725, "step": 1750 }, { "epoch": 0.23897911832946636, "grad_norm": 0.18598006665706635, "learning_rate": 8e-05, "loss": 1.6384, "step": 1751 }, { "epoch": 0.2391155998362222, "grad_norm": 0.1867654174566269, "learning_rate": 8e-05, "loss": 1.5633, "step": 1752 }, { "epoch": 0.23925208134297804, "grad_norm": 0.18545357882976532, "learning_rate": 8e-05, "loss": 1.5358, "step": 1753 }, { "epoch": 0.23938856284973387, "grad_norm": 0.1776314377784729, "learning_rate": 8e-05, "loss": 1.5031, "step": 1754 }, { "epoch": 0.2395250443564897, "grad_norm": 0.18356305360794067, "learning_rate": 8e-05, "loss": 1.5502, "step": 1755 }, { "epoch": 0.23966152586324552, "grad_norm": 0.19067412614822388, "learning_rate": 8e-05, "loss": 1.6561, "step": 1756 }, { "epoch": 0.23979800737000137, "grad_norm": 0.18596555292606354, "learning_rate": 8e-05, "loss": 1.5643, "step": 1757 }, { "epoch": 0.2399344888767572, "grad_norm": 0.1766899973154068, "learning_rate": 8e-05, "loss": 1.5439, "step": 1758 }, { "epoch": 0.24007097038351302, "grad_norm": 0.18611909449100494, "learning_rate": 8e-05, "loss": 1.5079, "step": 1759 }, { "epoch": 0.24020745189026887, "grad_norm": 0.2000034898519516, "learning_rate": 8e-05, "loss": 1.6163, "step": 1760 }, { "epoch": 0.2403439333970247, "grad_norm": 0.18233288824558258, "learning_rate": 8e-05, "loss": 1.5896, "step": 1761 }, { "epoch": 0.24048041490378053, "grad_norm": 0.18243840336799622, "learning_rate": 8e-05, "loss": 1.5346, "step": 1762 }, { "epoch": 0.24061689641053638, "grad_norm": 0.18466459214687347, "learning_rate": 8e-05, "loss": 1.6264, "step": 1763 }, { "epoch": 0.2407533779172922, "grad_norm": 0.1838509738445282, "learning_rate": 8e-05, "loss": 1.5906, "step": 1764 }, { "epoch": 0.24088985942404803, "grad_norm": 0.18624763190746307, "learning_rate": 8e-05, "loss": 1.5161, "step": 1765 }, { "epoch": 0.24102634093080388, "grad_norm": 0.18495598435401917, "learning_rate": 8e-05, "loss": 1.5565, "step": 1766 }, { "epoch": 0.2411628224375597, "grad_norm": 0.177808940410614, "learning_rate": 8e-05, "loss": 1.4451, "step": 1767 }, { "epoch": 0.24129930394431554, "grad_norm": 0.1832917183637619, "learning_rate": 8e-05, "loss": 1.5571, "step": 1768 }, { "epoch": 0.2414357854510714, "grad_norm": 0.18603256344795227, "learning_rate": 8e-05, "loss": 1.5763, "step": 1769 }, { "epoch": 0.24157226695782721, "grad_norm": 0.19211837649345398, "learning_rate": 8e-05, "loss": 1.6222, "step": 1770 }, { "epoch": 0.24170874846458304, "grad_norm": 0.18810877203941345, "learning_rate": 8e-05, "loss": 1.5149, "step": 1771 }, { "epoch": 0.2418452299713389, "grad_norm": 0.1866840422153473, "learning_rate": 8e-05, "loss": 1.6311, "step": 1772 }, { "epoch": 0.24198171147809472, "grad_norm": 0.18082091212272644, "learning_rate": 8e-05, "loss": 1.4942, "step": 1773 }, { "epoch": 0.24211819298485054, "grad_norm": 0.18563398718833923, "learning_rate": 8e-05, "loss": 1.503, "step": 1774 }, { "epoch": 0.2422546744916064, "grad_norm": 0.1880609095096588, "learning_rate": 8e-05, "loss": 1.564, "step": 1775 }, { "epoch": 0.24239115599836222, "grad_norm": 0.19007337093353271, "learning_rate": 8e-05, "loss": 1.5747, "step": 1776 }, { "epoch": 0.24252763750511805, "grad_norm": 0.19348379969596863, "learning_rate": 8e-05, "loss": 1.6002, "step": 1777 }, { "epoch": 0.2426641190118739, "grad_norm": 0.18118761479854584, "learning_rate": 8e-05, "loss": 1.559, "step": 1778 }, { "epoch": 0.24280060051862973, "grad_norm": 0.17686647176742554, "learning_rate": 8e-05, "loss": 1.5131, "step": 1779 }, { "epoch": 0.24293708202538555, "grad_norm": 0.17891229689121246, "learning_rate": 8e-05, "loss": 1.517, "step": 1780 }, { "epoch": 0.2430735635321414, "grad_norm": 0.18298952281475067, "learning_rate": 8e-05, "loss": 1.5231, "step": 1781 }, { "epoch": 0.24321004503889723, "grad_norm": 0.18165840208530426, "learning_rate": 8e-05, "loss": 1.6075, "step": 1782 }, { "epoch": 0.24334652654565306, "grad_norm": 0.18146726489067078, "learning_rate": 8e-05, "loss": 1.5803, "step": 1783 }, { "epoch": 0.2434830080524089, "grad_norm": 0.18730725347995758, "learning_rate": 8e-05, "loss": 1.5268, "step": 1784 }, { "epoch": 0.24361948955916474, "grad_norm": 0.18791694939136505, "learning_rate": 8e-05, "loss": 1.5828, "step": 1785 }, { "epoch": 0.24375597106592056, "grad_norm": 0.1907023787498474, "learning_rate": 8e-05, "loss": 1.5374, "step": 1786 }, { "epoch": 0.24389245257267642, "grad_norm": 0.18179954588413239, "learning_rate": 8e-05, "loss": 1.558, "step": 1787 }, { "epoch": 0.24402893407943224, "grad_norm": 0.1894650012254715, "learning_rate": 8e-05, "loss": 1.5712, "step": 1788 }, { "epoch": 0.24416541558618807, "grad_norm": 0.18077702820301056, "learning_rate": 8e-05, "loss": 1.5108, "step": 1789 }, { "epoch": 0.2443018970929439, "grad_norm": 0.18770000338554382, "learning_rate": 8e-05, "loss": 1.6286, "step": 1790 }, { "epoch": 0.24443837859969975, "grad_norm": 0.18682396411895752, "learning_rate": 8e-05, "loss": 1.5702, "step": 1791 }, { "epoch": 0.24457486010645557, "grad_norm": 0.18906524777412415, "learning_rate": 8e-05, "loss": 1.5516, "step": 1792 }, { "epoch": 0.2447113416132114, "grad_norm": 0.18724487721920013, "learning_rate": 8e-05, "loss": 1.5224, "step": 1793 }, { "epoch": 0.24484782311996725, "grad_norm": 0.1839170902967453, "learning_rate": 8e-05, "loss": 1.5458, "step": 1794 }, { "epoch": 0.24498430462672308, "grad_norm": 0.18446402251720428, "learning_rate": 8e-05, "loss": 1.5778, "step": 1795 }, { "epoch": 0.2451207861334789, "grad_norm": 0.185521200299263, "learning_rate": 8e-05, "loss": 1.5664, "step": 1796 }, { "epoch": 0.24525726764023476, "grad_norm": 0.18370167911052704, "learning_rate": 8e-05, "loss": 1.5303, "step": 1797 }, { "epoch": 0.24539374914699058, "grad_norm": 0.17842985689640045, "learning_rate": 8e-05, "loss": 1.5075, "step": 1798 }, { "epoch": 0.2455302306537464, "grad_norm": 0.1852072924375534, "learning_rate": 8e-05, "loss": 1.6184, "step": 1799 }, { "epoch": 0.24566671216050226, "grad_norm": 0.18539951741695404, "learning_rate": 8e-05, "loss": 1.5343, "step": 1800 }, { "epoch": 0.24580319366725809, "grad_norm": 0.18143805861473083, "learning_rate": 8e-05, "loss": 1.5167, "step": 1801 }, { "epoch": 0.2459396751740139, "grad_norm": 0.1851867288351059, "learning_rate": 8e-05, "loss": 1.6343, "step": 1802 }, { "epoch": 0.24607615668076976, "grad_norm": 0.18274852633476257, "learning_rate": 8e-05, "loss": 1.5456, "step": 1803 }, { "epoch": 0.2462126381875256, "grad_norm": 0.19242529571056366, "learning_rate": 8e-05, "loss": 1.6031, "step": 1804 }, { "epoch": 0.24634911969428142, "grad_norm": 0.20002421736717224, "learning_rate": 8e-05, "loss": 1.64, "step": 1805 }, { "epoch": 0.24648560120103727, "grad_norm": 0.18668735027313232, "learning_rate": 8e-05, "loss": 1.581, "step": 1806 }, { "epoch": 0.2466220827077931, "grad_norm": 0.20423966646194458, "learning_rate": 8e-05, "loss": 1.5301, "step": 1807 }, { "epoch": 0.24675856421454892, "grad_norm": 0.19704192876815796, "learning_rate": 8e-05, "loss": 1.6062, "step": 1808 }, { "epoch": 0.24689504572130477, "grad_norm": 0.19407778978347778, "learning_rate": 8e-05, "loss": 1.5928, "step": 1809 }, { "epoch": 0.2470315272280606, "grad_norm": 0.19340352714061737, "learning_rate": 8e-05, "loss": 1.5411, "step": 1810 }, { "epoch": 0.24716800873481642, "grad_norm": 0.18367774784564972, "learning_rate": 8e-05, "loss": 1.5603, "step": 1811 }, { "epoch": 0.24730449024157228, "grad_norm": 0.19014742970466614, "learning_rate": 8e-05, "loss": 1.5895, "step": 1812 }, { "epoch": 0.2474409717483281, "grad_norm": 0.1854240447282791, "learning_rate": 8e-05, "loss": 1.6484, "step": 1813 }, { "epoch": 0.24757745325508393, "grad_norm": 0.18356232345104218, "learning_rate": 8e-05, "loss": 1.5342, "step": 1814 }, { "epoch": 0.24771393476183978, "grad_norm": 0.18884854018688202, "learning_rate": 8e-05, "loss": 1.5875, "step": 1815 }, { "epoch": 0.2478504162685956, "grad_norm": 0.19298192858695984, "learning_rate": 8e-05, "loss": 1.6354, "step": 1816 }, { "epoch": 0.24798689777535143, "grad_norm": 0.1852366030216217, "learning_rate": 8e-05, "loss": 1.5895, "step": 1817 }, { "epoch": 0.2481233792821073, "grad_norm": 0.18575924634933472, "learning_rate": 8e-05, "loss": 1.6003, "step": 1818 }, { "epoch": 0.2482598607888631, "grad_norm": 0.18977604806423187, "learning_rate": 8e-05, "loss": 1.5989, "step": 1819 }, { "epoch": 0.24839634229561894, "grad_norm": 0.1818329095840454, "learning_rate": 8e-05, "loss": 1.588, "step": 1820 }, { "epoch": 0.2485328238023748, "grad_norm": 0.1889045685529709, "learning_rate": 8e-05, "loss": 1.5503, "step": 1821 }, { "epoch": 0.24866930530913062, "grad_norm": 0.18959684669971466, "learning_rate": 8e-05, "loss": 1.557, "step": 1822 }, { "epoch": 0.24880578681588644, "grad_norm": 0.18398132920265198, "learning_rate": 8e-05, "loss": 1.6111, "step": 1823 }, { "epoch": 0.24894226832264227, "grad_norm": 0.1816786378622055, "learning_rate": 8e-05, "loss": 1.5566, "step": 1824 }, { "epoch": 0.24907874982939812, "grad_norm": 0.18479779362678528, "learning_rate": 8e-05, "loss": 1.634, "step": 1825 }, { "epoch": 0.24921523133615395, "grad_norm": 0.19588877260684967, "learning_rate": 8e-05, "loss": 1.5449, "step": 1826 }, { "epoch": 0.24935171284290977, "grad_norm": 0.18671122193336487, "learning_rate": 8e-05, "loss": 1.6456, "step": 1827 }, { "epoch": 0.24948819434966563, "grad_norm": 0.1876567304134369, "learning_rate": 8e-05, "loss": 1.5394, "step": 1828 }, { "epoch": 0.24962467585642145, "grad_norm": 0.1924601048231125, "learning_rate": 8e-05, "loss": 1.6051, "step": 1829 }, { "epoch": 0.24976115736317728, "grad_norm": 0.17967765033245087, "learning_rate": 8e-05, "loss": 1.542, "step": 1830 }, { "epoch": 0.24989763886993313, "grad_norm": 0.1899469792842865, "learning_rate": 8e-05, "loss": 1.6332, "step": 1831 }, { "epoch": 0.250034120376689, "grad_norm": 0.1814192235469818, "learning_rate": 8e-05, "loss": 1.5524, "step": 1832 }, { "epoch": 0.2501706018834448, "grad_norm": 0.18947488069534302, "learning_rate": 8e-05, "loss": 1.5592, "step": 1833 }, { "epoch": 0.25030708339020064, "grad_norm": 0.1923573613166809, "learning_rate": 8e-05, "loss": 1.5693, "step": 1834 }, { "epoch": 0.25044356489695646, "grad_norm": 0.19979293644428253, "learning_rate": 8e-05, "loss": 1.6527, "step": 1835 }, { "epoch": 0.2505800464037123, "grad_norm": 0.1897367388010025, "learning_rate": 8e-05, "loss": 1.5894, "step": 1836 }, { "epoch": 0.2507165279104681, "grad_norm": 0.194015771150589, "learning_rate": 8e-05, "loss": 1.6288, "step": 1837 }, { "epoch": 0.250853009417224, "grad_norm": 0.1856689304113388, "learning_rate": 8e-05, "loss": 1.4927, "step": 1838 }, { "epoch": 0.2509894909239798, "grad_norm": 0.1950702965259552, "learning_rate": 8e-05, "loss": 1.6373, "step": 1839 }, { "epoch": 0.25112597243073564, "grad_norm": 0.18865469098091125, "learning_rate": 8e-05, "loss": 1.5705, "step": 1840 }, { "epoch": 0.25126245393749147, "grad_norm": 0.18866439163684845, "learning_rate": 8e-05, "loss": 1.59, "step": 1841 }, { "epoch": 0.2513989354442473, "grad_norm": 0.18741683661937714, "learning_rate": 8e-05, "loss": 1.5315, "step": 1842 }, { "epoch": 0.2515354169510031, "grad_norm": 0.18092559278011322, "learning_rate": 8e-05, "loss": 1.5799, "step": 1843 }, { "epoch": 0.25167189845775895, "grad_norm": 0.1817786991596222, "learning_rate": 8e-05, "loss": 1.5454, "step": 1844 }, { "epoch": 0.25180837996451483, "grad_norm": 0.18489402532577515, "learning_rate": 8e-05, "loss": 1.594, "step": 1845 }, { "epoch": 0.25194486147127065, "grad_norm": 0.18681347370147705, "learning_rate": 8e-05, "loss": 1.5142, "step": 1846 }, { "epoch": 0.2520813429780265, "grad_norm": 0.1880158931016922, "learning_rate": 8e-05, "loss": 1.6058, "step": 1847 }, { "epoch": 0.2522178244847823, "grad_norm": 0.18516340851783752, "learning_rate": 8e-05, "loss": 1.5813, "step": 1848 }, { "epoch": 0.25235430599153813, "grad_norm": 0.18830637633800507, "learning_rate": 8e-05, "loss": 1.5738, "step": 1849 }, { "epoch": 0.25249078749829396, "grad_norm": 0.17914266884326935, "learning_rate": 8e-05, "loss": 1.4799, "step": 1850 }, { "epoch": 0.25262726900504984, "grad_norm": 0.1884596347808838, "learning_rate": 8e-05, "loss": 1.5647, "step": 1851 }, { "epoch": 0.25276375051180566, "grad_norm": 0.18159277737140656, "learning_rate": 8e-05, "loss": 1.5272, "step": 1852 }, { "epoch": 0.2529002320185615, "grad_norm": 0.18422812223434448, "learning_rate": 8e-05, "loss": 1.5541, "step": 1853 }, { "epoch": 0.2530367135253173, "grad_norm": 0.191286101937294, "learning_rate": 8e-05, "loss": 1.5673, "step": 1854 }, { "epoch": 0.25317319503207314, "grad_norm": 0.18754881620407104, "learning_rate": 8e-05, "loss": 1.5608, "step": 1855 }, { "epoch": 0.25330967653882897, "grad_norm": 0.18286220729351044, "learning_rate": 8e-05, "loss": 1.5371, "step": 1856 }, { "epoch": 0.25344615804558485, "grad_norm": 0.17880402505397797, "learning_rate": 8e-05, "loss": 1.5517, "step": 1857 }, { "epoch": 0.25358263955234067, "grad_norm": 0.18703843653202057, "learning_rate": 8e-05, "loss": 1.5652, "step": 1858 }, { "epoch": 0.2537191210590965, "grad_norm": 0.1764143854379654, "learning_rate": 8e-05, "loss": 1.512, "step": 1859 }, { "epoch": 0.2538556025658523, "grad_norm": 0.1798507571220398, "learning_rate": 8e-05, "loss": 1.5014, "step": 1860 }, { "epoch": 0.25399208407260815, "grad_norm": 0.1905338168144226, "learning_rate": 8e-05, "loss": 1.5993, "step": 1861 }, { "epoch": 0.254128565579364, "grad_norm": 0.18906225264072418, "learning_rate": 8e-05, "loss": 1.6017, "step": 1862 }, { "epoch": 0.25426504708611986, "grad_norm": 0.18377509713172913, "learning_rate": 8e-05, "loss": 1.5638, "step": 1863 }, { "epoch": 0.2544015285928757, "grad_norm": 0.18969833850860596, "learning_rate": 8e-05, "loss": 1.5632, "step": 1864 }, { "epoch": 0.2545380100996315, "grad_norm": 0.17398011684417725, "learning_rate": 8e-05, "loss": 1.5176, "step": 1865 }, { "epoch": 0.25467449160638733, "grad_norm": 0.18954981863498688, "learning_rate": 8e-05, "loss": 1.5695, "step": 1866 }, { "epoch": 0.25481097311314316, "grad_norm": 0.18197040259838104, "learning_rate": 8e-05, "loss": 1.5118, "step": 1867 }, { "epoch": 0.254947454619899, "grad_norm": 0.18149541318416595, "learning_rate": 8e-05, "loss": 1.549, "step": 1868 }, { "epoch": 0.25508393612665486, "grad_norm": 0.1875280886888504, "learning_rate": 8e-05, "loss": 1.5445, "step": 1869 }, { "epoch": 0.2552204176334107, "grad_norm": 0.19960227608680725, "learning_rate": 8e-05, "loss": 1.5625, "step": 1870 }, { "epoch": 0.2553568991401665, "grad_norm": 0.1862519383430481, "learning_rate": 8e-05, "loss": 1.5746, "step": 1871 }, { "epoch": 0.25549338064692234, "grad_norm": 0.20189553499221802, "learning_rate": 8e-05, "loss": 1.6515, "step": 1872 }, { "epoch": 0.25562986215367817, "grad_norm": 0.18987150490283966, "learning_rate": 8e-05, "loss": 1.5846, "step": 1873 }, { "epoch": 0.255766343660434, "grad_norm": 0.19546480476856232, "learning_rate": 8e-05, "loss": 1.5187, "step": 1874 }, { "epoch": 0.2559028251671899, "grad_norm": 0.19442078471183777, "learning_rate": 8e-05, "loss": 1.5456, "step": 1875 }, { "epoch": 0.2560393066739457, "grad_norm": 0.1887025535106659, "learning_rate": 8e-05, "loss": 1.5502, "step": 1876 }, { "epoch": 0.2561757881807015, "grad_norm": 0.20818020403385162, "learning_rate": 8e-05, "loss": 1.6106, "step": 1877 }, { "epoch": 0.25631226968745735, "grad_norm": 0.18358318507671356, "learning_rate": 8e-05, "loss": 1.5764, "step": 1878 }, { "epoch": 0.2564487511942132, "grad_norm": 0.18465085327625275, "learning_rate": 8e-05, "loss": 1.5487, "step": 1879 }, { "epoch": 0.256585232700969, "grad_norm": 0.18243341147899628, "learning_rate": 8e-05, "loss": 1.5826, "step": 1880 }, { "epoch": 0.2567217142077248, "grad_norm": 0.18170012533664703, "learning_rate": 8e-05, "loss": 1.4825, "step": 1881 }, { "epoch": 0.2568581957144807, "grad_norm": 0.18276526033878326, "learning_rate": 8e-05, "loss": 1.5194, "step": 1882 }, { "epoch": 0.25699467722123653, "grad_norm": 0.1819353848695755, "learning_rate": 8e-05, "loss": 1.534, "step": 1883 }, { "epoch": 0.25713115872799236, "grad_norm": 0.18505947291851044, "learning_rate": 8e-05, "loss": 1.4915, "step": 1884 }, { "epoch": 0.2572676402347482, "grad_norm": 0.18670497834682465, "learning_rate": 8e-05, "loss": 1.5531, "step": 1885 }, { "epoch": 0.257404121741504, "grad_norm": 0.1910771280527115, "learning_rate": 8e-05, "loss": 1.5833, "step": 1886 }, { "epoch": 0.25754060324825984, "grad_norm": 0.18426668643951416, "learning_rate": 8e-05, "loss": 1.4876, "step": 1887 }, { "epoch": 0.2576770847550157, "grad_norm": 0.1860811412334442, "learning_rate": 8e-05, "loss": 1.5694, "step": 1888 }, { "epoch": 0.25781356626177154, "grad_norm": 0.18157057464122772, "learning_rate": 8e-05, "loss": 1.5061, "step": 1889 }, { "epoch": 0.25795004776852737, "grad_norm": 0.18245695531368256, "learning_rate": 8e-05, "loss": 1.5004, "step": 1890 }, { "epoch": 0.2580865292752832, "grad_norm": 0.1781587302684784, "learning_rate": 8e-05, "loss": 1.4823, "step": 1891 }, { "epoch": 0.258223010782039, "grad_norm": 0.18184123933315277, "learning_rate": 8e-05, "loss": 1.5435, "step": 1892 }, { "epoch": 0.25835949228879485, "grad_norm": 0.1817796677350998, "learning_rate": 8e-05, "loss": 1.5381, "step": 1893 }, { "epoch": 0.2584959737955507, "grad_norm": 0.18690018355846405, "learning_rate": 8e-05, "loss": 1.6565, "step": 1894 }, { "epoch": 0.25863245530230655, "grad_norm": 0.1951090395450592, "learning_rate": 8e-05, "loss": 1.5865, "step": 1895 }, { "epoch": 0.2587689368090624, "grad_norm": 0.17705518007278442, "learning_rate": 8e-05, "loss": 1.5145, "step": 1896 }, { "epoch": 0.2589054183158182, "grad_norm": 0.18770405650138855, "learning_rate": 8e-05, "loss": 1.514, "step": 1897 }, { "epoch": 0.25904189982257403, "grad_norm": 0.1851692795753479, "learning_rate": 8e-05, "loss": 1.5676, "step": 1898 }, { "epoch": 0.25917838132932985, "grad_norm": 0.1832483559846878, "learning_rate": 8e-05, "loss": 1.5865, "step": 1899 }, { "epoch": 0.25931486283608574, "grad_norm": 0.1904088258743286, "learning_rate": 8e-05, "loss": 1.5513, "step": 1900 }, { "epoch": 0.25945134434284156, "grad_norm": 0.1859157830476761, "learning_rate": 8e-05, "loss": 1.5848, "step": 1901 }, { "epoch": 0.2595878258495974, "grad_norm": 0.18166902661323547, "learning_rate": 8e-05, "loss": 1.5155, "step": 1902 }, { "epoch": 0.2597243073563532, "grad_norm": 0.18620115518569946, "learning_rate": 8e-05, "loss": 1.5589, "step": 1903 }, { "epoch": 0.25986078886310904, "grad_norm": 0.1801372766494751, "learning_rate": 8e-05, "loss": 1.5704, "step": 1904 }, { "epoch": 0.25999727036986486, "grad_norm": 0.19733835756778717, "learning_rate": 8e-05, "loss": 1.6516, "step": 1905 }, { "epoch": 0.26013375187662074, "grad_norm": 0.19153082370758057, "learning_rate": 8e-05, "loss": 1.536, "step": 1906 }, { "epoch": 0.26027023338337657, "grad_norm": 0.18736200034618378, "learning_rate": 8e-05, "loss": 1.5959, "step": 1907 }, { "epoch": 0.2604067148901324, "grad_norm": 0.18396945297718048, "learning_rate": 8e-05, "loss": 1.5616, "step": 1908 }, { "epoch": 0.2605431963968882, "grad_norm": 0.18430747091770172, "learning_rate": 8e-05, "loss": 1.5906, "step": 1909 }, { "epoch": 0.26067967790364405, "grad_norm": 0.18824610114097595, "learning_rate": 8e-05, "loss": 1.6472, "step": 1910 }, { "epoch": 0.2608161594103999, "grad_norm": 0.174940288066864, "learning_rate": 8e-05, "loss": 1.4856, "step": 1911 }, { "epoch": 0.2609526409171557, "grad_norm": 0.1898488998413086, "learning_rate": 8e-05, "loss": 1.6297, "step": 1912 }, { "epoch": 0.2610891224239116, "grad_norm": 0.188020721077919, "learning_rate": 8e-05, "loss": 1.5217, "step": 1913 }, { "epoch": 0.2612256039306674, "grad_norm": 0.19418668746948242, "learning_rate": 8e-05, "loss": 1.5922, "step": 1914 }, { "epoch": 0.26136208543742323, "grad_norm": 0.1825471967458725, "learning_rate": 8e-05, "loss": 1.5454, "step": 1915 }, { "epoch": 0.26149856694417906, "grad_norm": 0.18769685924053192, "learning_rate": 8e-05, "loss": 1.5708, "step": 1916 }, { "epoch": 0.2616350484509349, "grad_norm": 0.1926618069410324, "learning_rate": 8e-05, "loss": 1.6287, "step": 1917 }, { "epoch": 0.2617715299576907, "grad_norm": 0.1830102503299713, "learning_rate": 8e-05, "loss": 1.6395, "step": 1918 }, { "epoch": 0.2619080114644466, "grad_norm": 0.1960916668176651, "learning_rate": 8e-05, "loss": 1.5714, "step": 1919 }, { "epoch": 0.2620444929712024, "grad_norm": 0.17569278180599213, "learning_rate": 8e-05, "loss": 1.511, "step": 1920 }, { "epoch": 0.26218097447795824, "grad_norm": 0.19341592490673065, "learning_rate": 8e-05, "loss": 1.5864, "step": 1921 }, { "epoch": 0.26231745598471407, "grad_norm": 0.19142979383468628, "learning_rate": 8e-05, "loss": 1.5923, "step": 1922 }, { "epoch": 0.2624539374914699, "grad_norm": 0.20391963422298431, "learning_rate": 8e-05, "loss": 1.4985, "step": 1923 }, { "epoch": 0.2625904189982257, "grad_norm": 0.18116265535354614, "learning_rate": 8e-05, "loss": 1.5303, "step": 1924 }, { "epoch": 0.2627269005049816, "grad_norm": 0.19184935092926025, "learning_rate": 8e-05, "loss": 1.5891, "step": 1925 }, { "epoch": 0.2628633820117374, "grad_norm": 0.18622086942195892, "learning_rate": 8e-05, "loss": 1.5305, "step": 1926 }, { "epoch": 0.26299986351849325, "grad_norm": 0.20018920302391052, "learning_rate": 8e-05, "loss": 1.5998, "step": 1927 }, { "epoch": 0.2631363450252491, "grad_norm": 0.20352910459041595, "learning_rate": 8e-05, "loss": 1.6103, "step": 1928 }, { "epoch": 0.2632728265320049, "grad_norm": 0.181611105799675, "learning_rate": 8e-05, "loss": 1.57, "step": 1929 }, { "epoch": 0.2634093080387607, "grad_norm": 0.18917249143123627, "learning_rate": 8e-05, "loss": 1.5988, "step": 1930 }, { "epoch": 0.2635457895455166, "grad_norm": 0.19118757545948029, "learning_rate": 8e-05, "loss": 1.5691, "step": 1931 }, { "epoch": 0.26368227105227243, "grad_norm": 0.18285700678825378, "learning_rate": 8e-05, "loss": 1.5444, "step": 1932 }, { "epoch": 0.26381875255902826, "grad_norm": 0.18428394198417664, "learning_rate": 8e-05, "loss": 1.5476, "step": 1933 }, { "epoch": 0.2639552340657841, "grad_norm": 0.18672525882720947, "learning_rate": 8e-05, "loss": 1.5277, "step": 1934 }, { "epoch": 0.2640917155725399, "grad_norm": 0.1911453753709793, "learning_rate": 8e-05, "loss": 1.4905, "step": 1935 }, { "epoch": 0.26422819707929573, "grad_norm": 0.18629948794841766, "learning_rate": 8e-05, "loss": 1.526, "step": 1936 }, { "epoch": 0.2643646785860516, "grad_norm": 0.18960405886173248, "learning_rate": 8e-05, "loss": 1.5701, "step": 1937 }, { "epoch": 0.26450116009280744, "grad_norm": 0.19084347784519196, "learning_rate": 8e-05, "loss": 1.6068, "step": 1938 }, { "epoch": 0.26463764159956327, "grad_norm": 0.18919622898101807, "learning_rate": 8e-05, "loss": 1.5063, "step": 1939 }, { "epoch": 0.2647741231063191, "grad_norm": 0.18236729502677917, "learning_rate": 8e-05, "loss": 1.4601, "step": 1940 }, { "epoch": 0.2649106046130749, "grad_norm": 0.18604658544063568, "learning_rate": 8e-05, "loss": 1.5463, "step": 1941 }, { "epoch": 0.26504708611983074, "grad_norm": 0.1972835212945938, "learning_rate": 8e-05, "loss": 1.6216, "step": 1942 }, { "epoch": 0.2651835676265866, "grad_norm": 0.18087676167488098, "learning_rate": 8e-05, "loss": 1.5253, "step": 1943 }, { "epoch": 0.26532004913334245, "grad_norm": 0.18288066983222961, "learning_rate": 8e-05, "loss": 1.5598, "step": 1944 }, { "epoch": 0.2654565306400983, "grad_norm": 0.19971388578414917, "learning_rate": 8e-05, "loss": 1.5837, "step": 1945 }, { "epoch": 0.2655930121468541, "grad_norm": 0.19132724404335022, "learning_rate": 8e-05, "loss": 1.5639, "step": 1946 }, { "epoch": 0.2657294936536099, "grad_norm": 0.18988388776779175, "learning_rate": 8e-05, "loss": 1.5873, "step": 1947 }, { "epoch": 0.26586597516036575, "grad_norm": 0.18902939558029175, "learning_rate": 8e-05, "loss": 1.5355, "step": 1948 }, { "epoch": 0.2660024566671216, "grad_norm": 0.18844576179981232, "learning_rate": 8e-05, "loss": 1.5817, "step": 1949 }, { "epoch": 0.26613893817387746, "grad_norm": 0.1893915832042694, "learning_rate": 8e-05, "loss": 1.5684, "step": 1950 }, { "epoch": 0.2662754196806333, "grad_norm": 0.19330920279026031, "learning_rate": 8e-05, "loss": 1.5569, "step": 1951 }, { "epoch": 0.2664119011873891, "grad_norm": 0.18004919588565826, "learning_rate": 8e-05, "loss": 1.593, "step": 1952 }, { "epoch": 0.26654838269414494, "grad_norm": 0.18775908648967743, "learning_rate": 8e-05, "loss": 1.4856, "step": 1953 }, { "epoch": 0.26668486420090076, "grad_norm": 0.19320730865001678, "learning_rate": 8e-05, "loss": 1.5666, "step": 1954 }, { "epoch": 0.2668213457076566, "grad_norm": 0.1863574981689453, "learning_rate": 8e-05, "loss": 1.5345, "step": 1955 }, { "epoch": 0.26695782721441247, "grad_norm": 0.1857500523328781, "learning_rate": 8e-05, "loss": 1.5215, "step": 1956 }, { "epoch": 0.2670943087211683, "grad_norm": 0.18506550788879395, "learning_rate": 8e-05, "loss": 1.5488, "step": 1957 }, { "epoch": 0.2672307902279241, "grad_norm": 0.181392103433609, "learning_rate": 8e-05, "loss": 1.4983, "step": 1958 }, { "epoch": 0.26736727173467995, "grad_norm": 0.19811207056045532, "learning_rate": 8e-05, "loss": 1.5972, "step": 1959 }, { "epoch": 0.26750375324143577, "grad_norm": 0.18951451778411865, "learning_rate": 8e-05, "loss": 1.589, "step": 1960 }, { "epoch": 0.2676402347481916, "grad_norm": 0.18197423219680786, "learning_rate": 8e-05, "loss": 1.5186, "step": 1961 }, { "epoch": 0.2677767162549475, "grad_norm": 0.1882587969303131, "learning_rate": 8e-05, "loss": 1.5531, "step": 1962 }, { "epoch": 0.2679131977617033, "grad_norm": 0.19073234498500824, "learning_rate": 8e-05, "loss": 1.5673, "step": 1963 }, { "epoch": 0.26804967926845913, "grad_norm": 0.19055069983005524, "learning_rate": 8e-05, "loss": 1.612, "step": 1964 }, { "epoch": 0.26818616077521495, "grad_norm": 0.1990155577659607, "learning_rate": 8e-05, "loss": 1.5585, "step": 1965 }, { "epoch": 0.2683226422819708, "grad_norm": 0.18501326441764832, "learning_rate": 8e-05, "loss": 1.5013, "step": 1966 }, { "epoch": 0.2684591237887266, "grad_norm": 0.1913275271654129, "learning_rate": 8e-05, "loss": 1.5332, "step": 1967 }, { "epoch": 0.2685956052954825, "grad_norm": 0.18184155225753784, "learning_rate": 8e-05, "loss": 1.5156, "step": 1968 }, { "epoch": 0.2687320868022383, "grad_norm": 0.1934329718351364, "learning_rate": 8e-05, "loss": 1.5777, "step": 1969 }, { "epoch": 0.26886856830899414, "grad_norm": 0.18897759914398193, "learning_rate": 8e-05, "loss": 1.5917, "step": 1970 }, { "epoch": 0.26900504981574996, "grad_norm": 0.18884557485580444, "learning_rate": 8e-05, "loss": 1.5869, "step": 1971 }, { "epoch": 0.2691415313225058, "grad_norm": 0.1875036358833313, "learning_rate": 8e-05, "loss": 1.5606, "step": 1972 }, { "epoch": 0.2692780128292616, "grad_norm": 0.18670396506786346, "learning_rate": 8e-05, "loss": 1.5546, "step": 1973 }, { "epoch": 0.2694144943360175, "grad_norm": 0.18138957023620605, "learning_rate": 8e-05, "loss": 1.5251, "step": 1974 }, { "epoch": 0.2695509758427733, "grad_norm": 0.1885509043931961, "learning_rate": 8e-05, "loss": 1.5715, "step": 1975 }, { "epoch": 0.26968745734952915, "grad_norm": 0.187229722738266, "learning_rate": 8e-05, "loss": 1.6568, "step": 1976 }, { "epoch": 0.269823938856285, "grad_norm": 0.19409427046775818, "learning_rate": 8e-05, "loss": 1.5599, "step": 1977 }, { "epoch": 0.2699604203630408, "grad_norm": 0.1868712157011032, "learning_rate": 8e-05, "loss": 1.5734, "step": 1978 }, { "epoch": 0.2700969018697966, "grad_norm": 0.19214344024658203, "learning_rate": 8e-05, "loss": 1.5954, "step": 1979 }, { "epoch": 0.27023338337655245, "grad_norm": 0.19068008661270142, "learning_rate": 8e-05, "loss": 1.6215, "step": 1980 }, { "epoch": 0.27036986488330833, "grad_norm": 0.18215326964855194, "learning_rate": 8e-05, "loss": 1.5185, "step": 1981 }, { "epoch": 0.27050634639006416, "grad_norm": 0.18147170543670654, "learning_rate": 8e-05, "loss": 1.5206, "step": 1982 }, { "epoch": 0.27064282789682, "grad_norm": 0.18436045944690704, "learning_rate": 8e-05, "loss": 1.5095, "step": 1983 }, { "epoch": 0.2707793094035758, "grad_norm": 0.18467599153518677, "learning_rate": 8e-05, "loss": 1.5196, "step": 1984 }, { "epoch": 0.27091579091033163, "grad_norm": 0.18867486715316772, "learning_rate": 8e-05, "loss": 1.5409, "step": 1985 }, { "epoch": 0.27105227241708746, "grad_norm": 0.18096183240413666, "learning_rate": 8e-05, "loss": 1.5082, "step": 1986 }, { "epoch": 0.27118875392384334, "grad_norm": 0.1838560700416565, "learning_rate": 8e-05, "loss": 1.5188, "step": 1987 }, { "epoch": 0.27132523543059917, "grad_norm": 0.1841355562210083, "learning_rate": 8e-05, "loss": 1.5545, "step": 1988 }, { "epoch": 0.271461716937355, "grad_norm": 0.18679086863994598, "learning_rate": 8e-05, "loss": 1.561, "step": 1989 }, { "epoch": 0.2715981984441108, "grad_norm": 0.18134665489196777, "learning_rate": 8e-05, "loss": 1.5034, "step": 1990 }, { "epoch": 0.27173467995086664, "grad_norm": 0.18904753029346466, "learning_rate": 8e-05, "loss": 1.5461, "step": 1991 }, { "epoch": 0.27187116145762247, "grad_norm": 0.18858948349952698, "learning_rate": 8e-05, "loss": 1.5919, "step": 1992 }, { "epoch": 0.27200764296437835, "grad_norm": 0.1885107159614563, "learning_rate": 8e-05, "loss": 1.5668, "step": 1993 }, { "epoch": 0.2721441244711342, "grad_norm": 0.19152052700519562, "learning_rate": 8e-05, "loss": 1.5104, "step": 1994 }, { "epoch": 0.27228060597789, "grad_norm": 0.18891370296478271, "learning_rate": 8e-05, "loss": 1.5267, "step": 1995 }, { "epoch": 0.2724170874846458, "grad_norm": 0.19039413332939148, "learning_rate": 8e-05, "loss": 1.5849, "step": 1996 }, { "epoch": 0.27255356899140165, "grad_norm": 0.18724073469638824, "learning_rate": 8e-05, "loss": 1.6407, "step": 1997 }, { "epoch": 0.2726900504981575, "grad_norm": 0.18726858496665955, "learning_rate": 8e-05, "loss": 1.6044, "step": 1998 }, { "epoch": 0.27282653200491336, "grad_norm": 0.17973366379737854, "learning_rate": 8e-05, "loss": 1.548, "step": 1999 }, { "epoch": 0.2729630135116692, "grad_norm": 0.18358615040779114, "learning_rate": 8e-05, "loss": 1.5915, "step": 2000 }, { "epoch": 0.273099495018425, "grad_norm": 0.18650315701961517, "learning_rate": 8e-05, "loss": 1.4898, "step": 2001 }, { "epoch": 0.27323597652518083, "grad_norm": 0.18275026977062225, "learning_rate": 8e-05, "loss": 1.5492, "step": 2002 }, { "epoch": 0.27337245803193666, "grad_norm": 0.1825461983680725, "learning_rate": 8e-05, "loss": 1.504, "step": 2003 }, { "epoch": 0.2735089395386925, "grad_norm": 0.18888992071151733, "learning_rate": 8e-05, "loss": 1.5453, "step": 2004 }, { "epoch": 0.27364542104544837, "grad_norm": 0.1880376785993576, "learning_rate": 8e-05, "loss": 1.5013, "step": 2005 }, { "epoch": 0.2737819025522042, "grad_norm": 0.18573841452598572, "learning_rate": 8e-05, "loss": 1.5665, "step": 2006 }, { "epoch": 0.27391838405896, "grad_norm": 0.18665440380573273, "learning_rate": 8e-05, "loss": 1.5353, "step": 2007 }, { "epoch": 0.27405486556571584, "grad_norm": 0.18374697864055634, "learning_rate": 8e-05, "loss": 1.5599, "step": 2008 }, { "epoch": 0.27419134707247167, "grad_norm": 0.1893589198589325, "learning_rate": 8e-05, "loss": 1.5302, "step": 2009 }, { "epoch": 0.2743278285792275, "grad_norm": 0.18848778307437897, "learning_rate": 8e-05, "loss": 1.4803, "step": 2010 }, { "epoch": 0.2744643100859834, "grad_norm": 0.18755941092967987, "learning_rate": 8e-05, "loss": 1.5679, "step": 2011 }, { "epoch": 0.2746007915927392, "grad_norm": 0.18306218087673187, "learning_rate": 8e-05, "loss": 1.5044, "step": 2012 }, { "epoch": 0.274737273099495, "grad_norm": 0.18841566145420074, "learning_rate": 8e-05, "loss": 1.5486, "step": 2013 }, { "epoch": 0.27487375460625085, "grad_norm": 0.18870852887630463, "learning_rate": 8e-05, "loss": 1.5111, "step": 2014 }, { "epoch": 0.2750102361130067, "grad_norm": 0.18545715510845184, "learning_rate": 8e-05, "loss": 1.5179, "step": 2015 }, { "epoch": 0.2751467176197625, "grad_norm": 0.1913764774799347, "learning_rate": 8e-05, "loss": 1.5866, "step": 2016 }, { "epoch": 0.27528319912651833, "grad_norm": 0.18971224129199982, "learning_rate": 8e-05, "loss": 1.5129, "step": 2017 }, { "epoch": 0.2754196806332742, "grad_norm": 0.18147705495357513, "learning_rate": 8e-05, "loss": 1.5084, "step": 2018 }, { "epoch": 0.27555616214003004, "grad_norm": 0.18673628568649292, "learning_rate": 8e-05, "loss": 1.5013, "step": 2019 }, { "epoch": 0.27569264364678586, "grad_norm": 0.1802338808774948, "learning_rate": 8e-05, "loss": 1.4617, "step": 2020 }, { "epoch": 0.2758291251535417, "grad_norm": 0.19209244847297668, "learning_rate": 8e-05, "loss": 1.5712, "step": 2021 }, { "epoch": 0.2759656066602975, "grad_norm": 0.20253556966781616, "learning_rate": 8e-05, "loss": 1.5397, "step": 2022 }, { "epoch": 0.27610208816705334, "grad_norm": 0.18339714407920837, "learning_rate": 8e-05, "loss": 1.5375, "step": 2023 }, { "epoch": 0.2762385696738092, "grad_norm": 0.18465369939804077, "learning_rate": 8e-05, "loss": 1.4795, "step": 2024 }, { "epoch": 0.27637505118056505, "grad_norm": 0.20187842845916748, "learning_rate": 8e-05, "loss": 1.5624, "step": 2025 }, { "epoch": 0.27651153268732087, "grad_norm": 0.18822062015533447, "learning_rate": 8e-05, "loss": 1.5789, "step": 2026 }, { "epoch": 0.2766480141940767, "grad_norm": 0.1913299262523651, "learning_rate": 8e-05, "loss": 1.5947, "step": 2027 }, { "epoch": 0.2767844957008325, "grad_norm": 0.19084617495536804, "learning_rate": 8e-05, "loss": 1.571, "step": 2028 }, { "epoch": 0.27692097720758835, "grad_norm": 0.19963937997817993, "learning_rate": 8e-05, "loss": 1.6228, "step": 2029 }, { "epoch": 0.27705745871434423, "grad_norm": 0.18904776871204376, "learning_rate": 8e-05, "loss": 1.5947, "step": 2030 }, { "epoch": 0.27719394022110005, "grad_norm": 0.1909492164850235, "learning_rate": 8e-05, "loss": 1.493, "step": 2031 }, { "epoch": 0.2773304217278559, "grad_norm": 0.18875887989997864, "learning_rate": 8e-05, "loss": 1.6363, "step": 2032 }, { "epoch": 0.2774669032346117, "grad_norm": 0.18893785774707794, "learning_rate": 8e-05, "loss": 1.5963, "step": 2033 }, { "epoch": 0.27760338474136753, "grad_norm": 0.18837638199329376, "learning_rate": 8e-05, "loss": 1.5717, "step": 2034 }, { "epoch": 0.27773986624812336, "grad_norm": 0.19914820790290833, "learning_rate": 8e-05, "loss": 1.6072, "step": 2035 }, { "epoch": 0.27787634775487924, "grad_norm": 0.18605150282382965, "learning_rate": 8e-05, "loss": 1.5508, "step": 2036 }, { "epoch": 0.27801282926163506, "grad_norm": 0.1822100281715393, "learning_rate": 8e-05, "loss": 1.5239, "step": 2037 }, { "epoch": 0.2781493107683909, "grad_norm": 0.1860920935869217, "learning_rate": 8e-05, "loss": 1.4875, "step": 2038 }, { "epoch": 0.2782857922751467, "grad_norm": 0.18487825989723206, "learning_rate": 8e-05, "loss": 1.5003, "step": 2039 }, { "epoch": 0.27842227378190254, "grad_norm": 0.21461714804172516, "learning_rate": 8e-05, "loss": 1.5303, "step": 2040 }, { "epoch": 0.27855875528865837, "grad_norm": 0.1883653998374939, "learning_rate": 8e-05, "loss": 1.6343, "step": 2041 }, { "epoch": 0.27869523679541425, "grad_norm": 0.18029989302158356, "learning_rate": 8e-05, "loss": 1.4902, "step": 2042 }, { "epoch": 0.2788317183021701, "grad_norm": 0.1968306452035904, "learning_rate": 8e-05, "loss": 1.5404, "step": 2043 }, { "epoch": 0.2789681998089259, "grad_norm": 0.1841675192117691, "learning_rate": 8e-05, "loss": 1.5112, "step": 2044 }, { "epoch": 0.2791046813156817, "grad_norm": 0.18856370449066162, "learning_rate": 8e-05, "loss": 1.4514, "step": 2045 }, { "epoch": 0.27924116282243755, "grad_norm": 0.19120898842811584, "learning_rate": 8e-05, "loss": 1.5324, "step": 2046 }, { "epoch": 0.2793776443291934, "grad_norm": 0.1992325335741043, "learning_rate": 8e-05, "loss": 1.617, "step": 2047 }, { "epoch": 0.27951412583594926, "grad_norm": 0.1858394593000412, "learning_rate": 8e-05, "loss": 1.562, "step": 2048 }, { "epoch": 0.2796506073427051, "grad_norm": 0.18444325029850006, "learning_rate": 8e-05, "loss": 1.5448, "step": 2049 }, { "epoch": 0.2797870888494609, "grad_norm": 0.18600699305534363, "learning_rate": 8e-05, "loss": 1.4986, "step": 2050 }, { "epoch": 0.27992357035621673, "grad_norm": 0.18486641347408295, "learning_rate": 8e-05, "loss": 1.6104, "step": 2051 }, { "epoch": 0.28006005186297256, "grad_norm": 0.18436729907989502, "learning_rate": 8e-05, "loss": 1.5727, "step": 2052 }, { "epoch": 0.2801965333697284, "grad_norm": 0.18976762890815735, "learning_rate": 8e-05, "loss": 1.584, "step": 2053 }, { "epoch": 0.2803330148764842, "grad_norm": 0.1834273636341095, "learning_rate": 8e-05, "loss": 1.5478, "step": 2054 }, { "epoch": 0.2804694963832401, "grad_norm": 0.1896577775478363, "learning_rate": 8e-05, "loss": 1.5514, "step": 2055 }, { "epoch": 0.2806059778899959, "grad_norm": 0.19044657051563263, "learning_rate": 8e-05, "loss": 1.5773, "step": 2056 }, { "epoch": 0.28074245939675174, "grad_norm": 0.19076402485370636, "learning_rate": 8e-05, "loss": 1.6496, "step": 2057 }, { "epoch": 0.28087894090350757, "grad_norm": 0.18252645432949066, "learning_rate": 8e-05, "loss": 1.555, "step": 2058 }, { "epoch": 0.2810154224102634, "grad_norm": 0.18642456829547882, "learning_rate": 8e-05, "loss": 1.5843, "step": 2059 }, { "epoch": 0.2811519039170192, "grad_norm": 0.198273703455925, "learning_rate": 8e-05, "loss": 1.5821, "step": 2060 }, { "epoch": 0.2812883854237751, "grad_norm": 0.188239187002182, "learning_rate": 8e-05, "loss": 1.5706, "step": 2061 }, { "epoch": 0.2814248669305309, "grad_norm": 0.18614868819713593, "learning_rate": 8e-05, "loss": 1.5624, "step": 2062 }, { "epoch": 0.28156134843728675, "grad_norm": 0.19303081929683685, "learning_rate": 8e-05, "loss": 1.5311, "step": 2063 }, { "epoch": 0.2816978299440426, "grad_norm": 0.18951718509197235, "learning_rate": 8e-05, "loss": 1.5813, "step": 2064 }, { "epoch": 0.2818343114507984, "grad_norm": 0.19406652450561523, "learning_rate": 8e-05, "loss": 1.6268, "step": 2065 }, { "epoch": 0.28197079295755423, "grad_norm": 0.19498033821582794, "learning_rate": 8e-05, "loss": 1.5763, "step": 2066 }, { "epoch": 0.2821072744643101, "grad_norm": 0.18559187650680542, "learning_rate": 8e-05, "loss": 1.486, "step": 2067 }, { "epoch": 0.28224375597106593, "grad_norm": 0.18782497942447662, "learning_rate": 8e-05, "loss": 1.4472, "step": 2068 }, { "epoch": 0.28238023747782176, "grad_norm": 0.1820422261953354, "learning_rate": 8e-05, "loss": 1.5141, "step": 2069 }, { "epoch": 0.2825167189845776, "grad_norm": 0.18505264818668365, "learning_rate": 8e-05, "loss": 1.599, "step": 2070 }, { "epoch": 0.2826532004913334, "grad_norm": 0.18905840814113617, "learning_rate": 8e-05, "loss": 1.5395, "step": 2071 }, { "epoch": 0.28278968199808924, "grad_norm": 0.19095966219902039, "learning_rate": 8e-05, "loss": 1.5983, "step": 2072 }, { "epoch": 0.2829261635048451, "grad_norm": 0.19496643543243408, "learning_rate": 8e-05, "loss": 1.6043, "step": 2073 }, { "epoch": 0.28306264501160094, "grad_norm": 0.19167523086071014, "learning_rate": 8e-05, "loss": 1.5845, "step": 2074 }, { "epoch": 0.28319912651835677, "grad_norm": 0.1842069774866104, "learning_rate": 8e-05, "loss": 1.5383, "step": 2075 }, { "epoch": 0.2833356080251126, "grad_norm": 0.19793909788131714, "learning_rate": 8e-05, "loss": 1.6075, "step": 2076 }, { "epoch": 0.2834720895318684, "grad_norm": 0.19219326972961426, "learning_rate": 8e-05, "loss": 1.5475, "step": 2077 }, { "epoch": 0.28360857103862425, "grad_norm": 0.18906018137931824, "learning_rate": 8e-05, "loss": 1.5779, "step": 2078 }, { "epoch": 0.2837450525453801, "grad_norm": 0.196896493434906, "learning_rate": 8e-05, "loss": 1.5992, "step": 2079 }, { "epoch": 0.28388153405213595, "grad_norm": 0.1978483647108078, "learning_rate": 8e-05, "loss": 1.583, "step": 2080 }, { "epoch": 0.2840180155588918, "grad_norm": 0.1934979259967804, "learning_rate": 8e-05, "loss": 1.5597, "step": 2081 }, { "epoch": 0.2841544970656476, "grad_norm": 0.19172784686088562, "learning_rate": 8e-05, "loss": 1.616, "step": 2082 }, { "epoch": 0.28429097857240343, "grad_norm": 0.19835710525512695, "learning_rate": 8e-05, "loss": 1.4582, "step": 2083 }, { "epoch": 0.28442746007915926, "grad_norm": 0.2018197625875473, "learning_rate": 8e-05, "loss": 1.6179, "step": 2084 }, { "epoch": 0.2845639415859151, "grad_norm": 0.1912423074245453, "learning_rate": 8e-05, "loss": 1.5217, "step": 2085 }, { "epoch": 0.28470042309267096, "grad_norm": 0.19440267980098724, "learning_rate": 8e-05, "loss": 1.508, "step": 2086 }, { "epoch": 0.2848369045994268, "grad_norm": 0.18047915399074554, "learning_rate": 8e-05, "loss": 1.4986, "step": 2087 }, { "epoch": 0.2849733861061826, "grad_norm": 0.2030009776353836, "learning_rate": 8e-05, "loss": 1.5616, "step": 2088 }, { "epoch": 0.28510986761293844, "grad_norm": 0.1893721967935562, "learning_rate": 8e-05, "loss": 1.5186, "step": 2089 }, { "epoch": 0.28524634911969426, "grad_norm": 0.2278795689344406, "learning_rate": 8e-05, "loss": 1.4578, "step": 2090 }, { "epoch": 0.2853828306264501, "grad_norm": 0.21405383944511414, "learning_rate": 8e-05, "loss": 1.5802, "step": 2091 }, { "epoch": 0.28551931213320597, "grad_norm": 0.1927763819694519, "learning_rate": 8e-05, "loss": 1.5062, "step": 2092 }, { "epoch": 0.2856557936399618, "grad_norm": 0.20548342168331146, "learning_rate": 8e-05, "loss": 1.5398, "step": 2093 }, { "epoch": 0.2857922751467176, "grad_norm": 0.193179190158844, "learning_rate": 8e-05, "loss": 1.567, "step": 2094 }, { "epoch": 0.28592875665347345, "grad_norm": 0.19033104181289673, "learning_rate": 8e-05, "loss": 1.5351, "step": 2095 }, { "epoch": 0.2860652381602293, "grad_norm": 0.19091038405895233, "learning_rate": 8e-05, "loss": 1.5134, "step": 2096 }, { "epoch": 0.2862017196669851, "grad_norm": 0.18988966941833496, "learning_rate": 8e-05, "loss": 1.5559, "step": 2097 }, { "epoch": 0.286338201173741, "grad_norm": 0.193196639418602, "learning_rate": 8e-05, "loss": 1.6583, "step": 2098 }, { "epoch": 0.2864746826804968, "grad_norm": 0.18785765767097473, "learning_rate": 8e-05, "loss": 1.5565, "step": 2099 }, { "epoch": 0.28661116418725263, "grad_norm": 0.1824287623167038, "learning_rate": 8e-05, "loss": 1.5245, "step": 2100 }, { "epoch": 0.28674764569400846, "grad_norm": 0.18832935392856598, "learning_rate": 8e-05, "loss": 1.5842, "step": 2101 }, { "epoch": 0.2868841272007643, "grad_norm": 0.1972111016511917, "learning_rate": 8e-05, "loss": 1.5565, "step": 2102 }, { "epoch": 0.2870206087075201, "grad_norm": 0.18275626003742218, "learning_rate": 8e-05, "loss": 1.5838, "step": 2103 }, { "epoch": 0.287157090214276, "grad_norm": 0.18729160726070404, "learning_rate": 8e-05, "loss": 1.5443, "step": 2104 }, { "epoch": 0.2872935717210318, "grad_norm": 0.18493196368217468, "learning_rate": 8e-05, "loss": 1.5415, "step": 2105 }, { "epoch": 0.28743005322778764, "grad_norm": 0.19562236964702606, "learning_rate": 8e-05, "loss": 1.534, "step": 2106 }, { "epoch": 0.28756653473454347, "grad_norm": 0.1914881318807602, "learning_rate": 8e-05, "loss": 1.508, "step": 2107 }, { "epoch": 0.2877030162412993, "grad_norm": 0.19250090420246124, "learning_rate": 8e-05, "loss": 1.6242, "step": 2108 }, { "epoch": 0.2878394977480551, "grad_norm": 0.19696591794490814, "learning_rate": 8e-05, "loss": 1.5742, "step": 2109 }, { "epoch": 0.287975979254811, "grad_norm": 0.19119641184806824, "learning_rate": 8e-05, "loss": 1.543, "step": 2110 }, { "epoch": 0.2881124607615668, "grad_norm": 0.1956411451101303, "learning_rate": 8e-05, "loss": 1.6082, "step": 2111 }, { "epoch": 0.28824894226832265, "grad_norm": 0.19782638549804688, "learning_rate": 8e-05, "loss": 1.5285, "step": 2112 }, { "epoch": 0.2883854237750785, "grad_norm": 0.193250373005867, "learning_rate": 8e-05, "loss": 1.571, "step": 2113 }, { "epoch": 0.2885219052818343, "grad_norm": 0.19214728474617004, "learning_rate": 8e-05, "loss": 1.5146, "step": 2114 }, { "epoch": 0.2886583867885901, "grad_norm": 0.19219325482845306, "learning_rate": 8e-05, "loss": 1.5628, "step": 2115 }, { "epoch": 0.288794868295346, "grad_norm": 0.18980087339878082, "learning_rate": 8e-05, "loss": 1.5773, "step": 2116 }, { "epoch": 0.28893134980210183, "grad_norm": 0.19851712882518768, "learning_rate": 8e-05, "loss": 1.5897, "step": 2117 }, { "epoch": 0.28906783130885766, "grad_norm": 0.18184475600719452, "learning_rate": 8e-05, "loss": 1.5243, "step": 2118 }, { "epoch": 0.2892043128156135, "grad_norm": 0.1804375797510147, "learning_rate": 8e-05, "loss": 1.4641, "step": 2119 }, { "epoch": 0.2893407943223693, "grad_norm": 0.1868881732225418, "learning_rate": 8e-05, "loss": 1.5258, "step": 2120 }, { "epoch": 0.28947727582912514, "grad_norm": 0.18092136085033417, "learning_rate": 8e-05, "loss": 1.4103, "step": 2121 }, { "epoch": 0.28961375733588096, "grad_norm": 0.19184724986553192, "learning_rate": 8e-05, "loss": 1.5465, "step": 2122 }, { "epoch": 0.28975023884263684, "grad_norm": 0.18744532763957977, "learning_rate": 8e-05, "loss": 1.5336, "step": 2123 }, { "epoch": 0.28988672034939267, "grad_norm": 0.18454964458942413, "learning_rate": 8e-05, "loss": 1.5272, "step": 2124 }, { "epoch": 0.2900232018561485, "grad_norm": 0.18746648728847504, "learning_rate": 8e-05, "loss": 1.5787, "step": 2125 }, { "epoch": 0.2901596833629043, "grad_norm": 0.1891246736049652, "learning_rate": 8e-05, "loss": 1.5275, "step": 2126 }, { "epoch": 0.29029616486966014, "grad_norm": 0.19264520704746246, "learning_rate": 8e-05, "loss": 1.6074, "step": 2127 }, { "epoch": 0.29043264637641597, "grad_norm": 0.1850605607032776, "learning_rate": 8e-05, "loss": 1.5125, "step": 2128 }, { "epoch": 0.29056912788317185, "grad_norm": 0.18765035271644592, "learning_rate": 8e-05, "loss": 1.4998, "step": 2129 }, { "epoch": 0.2907056093899277, "grad_norm": 0.18671131134033203, "learning_rate": 8e-05, "loss": 1.6026, "step": 2130 }, { "epoch": 0.2908420908966835, "grad_norm": 0.18650102615356445, "learning_rate": 8e-05, "loss": 1.5592, "step": 2131 }, { "epoch": 0.29097857240343933, "grad_norm": 0.1930171400308609, "learning_rate": 8e-05, "loss": 1.6365, "step": 2132 }, { "epoch": 0.29111505391019515, "grad_norm": 0.17904651165008545, "learning_rate": 8e-05, "loss": 1.4884, "step": 2133 }, { "epoch": 0.291251535416951, "grad_norm": 0.19207796454429626, "learning_rate": 8e-05, "loss": 1.5931, "step": 2134 }, { "epoch": 0.29138801692370686, "grad_norm": 0.1889646202325821, "learning_rate": 8e-05, "loss": 1.5802, "step": 2135 }, { "epoch": 0.2915244984304627, "grad_norm": 0.18964365124702454, "learning_rate": 8e-05, "loss": 1.5885, "step": 2136 }, { "epoch": 0.2916609799372185, "grad_norm": 0.19761215150356293, "learning_rate": 8e-05, "loss": 1.5285, "step": 2137 }, { "epoch": 0.29179746144397434, "grad_norm": 0.1873132437467575, "learning_rate": 8e-05, "loss": 1.5224, "step": 2138 }, { "epoch": 0.29193394295073016, "grad_norm": 0.18819749355316162, "learning_rate": 8e-05, "loss": 1.5988, "step": 2139 }, { "epoch": 0.292070424457486, "grad_norm": 0.1918659210205078, "learning_rate": 8e-05, "loss": 1.5342, "step": 2140 }, { "epoch": 0.29220690596424187, "grad_norm": 0.18802089989185333, "learning_rate": 8e-05, "loss": 1.5565, "step": 2141 }, { "epoch": 0.2923433874709977, "grad_norm": 0.19641587138175964, "learning_rate": 8e-05, "loss": 1.6525, "step": 2142 }, { "epoch": 0.2924798689777535, "grad_norm": 0.18942345678806305, "learning_rate": 8e-05, "loss": 1.5585, "step": 2143 }, { "epoch": 0.29261635048450935, "grad_norm": 0.19834423065185547, "learning_rate": 8e-05, "loss": 1.5201, "step": 2144 }, { "epoch": 0.29275283199126517, "grad_norm": 0.1920042335987091, "learning_rate": 8e-05, "loss": 1.5266, "step": 2145 }, { "epoch": 0.292889313498021, "grad_norm": 0.19429340958595276, "learning_rate": 8e-05, "loss": 1.5539, "step": 2146 }, { "epoch": 0.2930257950047769, "grad_norm": 0.19816112518310547, "learning_rate": 8e-05, "loss": 1.5476, "step": 2147 }, { "epoch": 0.2931622765115327, "grad_norm": 0.1952446848154068, "learning_rate": 8e-05, "loss": 1.493, "step": 2148 }, { "epoch": 0.29329875801828853, "grad_norm": 0.19394353032112122, "learning_rate": 8e-05, "loss": 1.5646, "step": 2149 }, { "epoch": 0.29343523952504436, "grad_norm": 0.19018422067165375, "learning_rate": 8e-05, "loss": 1.603, "step": 2150 }, { "epoch": 0.2935717210318002, "grad_norm": 0.20142124593257904, "learning_rate": 8e-05, "loss": 1.6244, "step": 2151 }, { "epoch": 0.293708202538556, "grad_norm": 0.18389956653118134, "learning_rate": 8e-05, "loss": 1.5624, "step": 2152 }, { "epoch": 0.29384468404531183, "grad_norm": 0.20680299401283264, "learning_rate": 8e-05, "loss": 1.6146, "step": 2153 }, { "epoch": 0.2939811655520677, "grad_norm": 0.1897064596414566, "learning_rate": 8e-05, "loss": 1.5477, "step": 2154 }, { "epoch": 0.29411764705882354, "grad_norm": 0.19357186555862427, "learning_rate": 8e-05, "loss": 1.5455, "step": 2155 }, { "epoch": 0.29425412856557936, "grad_norm": 0.1795782744884491, "learning_rate": 8e-05, "loss": 1.4079, "step": 2156 }, { "epoch": 0.2943906100723352, "grad_norm": 0.18702712655067444, "learning_rate": 8e-05, "loss": 1.4603, "step": 2157 }, { "epoch": 0.294527091579091, "grad_norm": 0.20010407269001007, "learning_rate": 8e-05, "loss": 1.5664, "step": 2158 }, { "epoch": 0.29466357308584684, "grad_norm": 0.1988273561000824, "learning_rate": 8e-05, "loss": 1.504, "step": 2159 }, { "epoch": 0.2948000545926027, "grad_norm": 0.19437240064144135, "learning_rate": 8e-05, "loss": 1.5527, "step": 2160 }, { "epoch": 0.29493653609935855, "grad_norm": 0.19792622327804565, "learning_rate": 8e-05, "loss": 1.5669, "step": 2161 }, { "epoch": 0.2950730176061144, "grad_norm": 0.2051275074481964, "learning_rate": 8e-05, "loss": 1.5655, "step": 2162 }, { "epoch": 0.2952094991128702, "grad_norm": 0.18476387858390808, "learning_rate": 8e-05, "loss": 1.506, "step": 2163 }, { "epoch": 0.295345980619626, "grad_norm": 0.18674908578395844, "learning_rate": 8e-05, "loss": 1.521, "step": 2164 }, { "epoch": 0.29548246212638185, "grad_norm": 0.19958634674549103, "learning_rate": 8e-05, "loss": 1.5931, "step": 2165 }, { "epoch": 0.29561894363313773, "grad_norm": 0.1926935911178589, "learning_rate": 8e-05, "loss": 1.4892, "step": 2166 }, { "epoch": 0.29575542513989356, "grad_norm": 0.19321037828922272, "learning_rate": 8e-05, "loss": 1.5844, "step": 2167 }, { "epoch": 0.2958919066466494, "grad_norm": 0.19959363341331482, "learning_rate": 8e-05, "loss": 1.5102, "step": 2168 }, { "epoch": 0.2960283881534052, "grad_norm": 0.19045016169548035, "learning_rate": 8e-05, "loss": 1.5389, "step": 2169 }, { "epoch": 0.29616486966016103, "grad_norm": 0.18875819444656372, "learning_rate": 8e-05, "loss": 1.5632, "step": 2170 }, { "epoch": 0.29630135116691686, "grad_norm": 0.19844792783260345, "learning_rate": 8e-05, "loss": 1.6383, "step": 2171 }, { "epoch": 0.29643783267367274, "grad_norm": 0.19257670640945435, "learning_rate": 8e-05, "loss": 1.4932, "step": 2172 }, { "epoch": 0.29657431418042857, "grad_norm": 0.20057834684848785, "learning_rate": 8e-05, "loss": 1.5459, "step": 2173 }, { "epoch": 0.2967107956871844, "grad_norm": 0.18506813049316406, "learning_rate": 8e-05, "loss": 1.4849, "step": 2174 }, { "epoch": 0.2968472771939402, "grad_norm": 0.20173443853855133, "learning_rate": 8e-05, "loss": 1.497, "step": 2175 }, { "epoch": 0.29698375870069604, "grad_norm": 0.19865339994430542, "learning_rate": 8e-05, "loss": 1.5678, "step": 2176 }, { "epoch": 0.29712024020745187, "grad_norm": 0.19193463027477264, "learning_rate": 8e-05, "loss": 1.5209, "step": 2177 }, { "epoch": 0.29725672171420775, "grad_norm": 0.19100703299045563, "learning_rate": 8e-05, "loss": 1.4982, "step": 2178 }, { "epoch": 0.2973932032209636, "grad_norm": 0.18851928412914276, "learning_rate": 8e-05, "loss": 1.4989, "step": 2179 }, { "epoch": 0.2975296847277194, "grad_norm": 0.19007621705532074, "learning_rate": 8e-05, "loss": 1.5471, "step": 2180 }, { "epoch": 0.2976661662344752, "grad_norm": 0.18618464469909668, "learning_rate": 8e-05, "loss": 1.582, "step": 2181 }, { "epoch": 0.29780264774123105, "grad_norm": 0.19135455787181854, "learning_rate": 8e-05, "loss": 1.5656, "step": 2182 }, { "epoch": 0.2979391292479869, "grad_norm": 0.19191507995128632, "learning_rate": 8e-05, "loss": 1.5291, "step": 2183 }, { "epoch": 0.29807561075474276, "grad_norm": 0.1900150030851364, "learning_rate": 8e-05, "loss": 1.5971, "step": 2184 }, { "epoch": 0.2982120922614986, "grad_norm": 0.18471696972846985, "learning_rate": 8e-05, "loss": 1.4987, "step": 2185 }, { "epoch": 0.2983485737682544, "grad_norm": 0.18377231061458588, "learning_rate": 8e-05, "loss": 1.5939, "step": 2186 }, { "epoch": 0.29848505527501024, "grad_norm": 0.18222413957118988, "learning_rate": 8e-05, "loss": 1.5185, "step": 2187 }, { "epoch": 0.29862153678176606, "grad_norm": 0.18538598716259003, "learning_rate": 8e-05, "loss": 1.5332, "step": 2188 }, { "epoch": 0.2987580182885219, "grad_norm": 0.18902656435966492, "learning_rate": 8e-05, "loss": 1.512, "step": 2189 }, { "epoch": 0.2988944997952777, "grad_norm": 0.19450733065605164, "learning_rate": 8e-05, "loss": 1.538, "step": 2190 }, { "epoch": 0.2990309813020336, "grad_norm": 0.19497349858283997, "learning_rate": 8e-05, "loss": 1.5361, "step": 2191 }, { "epoch": 0.2991674628087894, "grad_norm": 0.1885165125131607, "learning_rate": 8e-05, "loss": 1.573, "step": 2192 }, { "epoch": 0.29930394431554525, "grad_norm": 0.18749703466892242, "learning_rate": 8e-05, "loss": 1.5459, "step": 2193 }, { "epoch": 0.29944042582230107, "grad_norm": 0.19850268959999084, "learning_rate": 8e-05, "loss": 1.6713, "step": 2194 }, { "epoch": 0.2995769073290569, "grad_norm": 0.19352257251739502, "learning_rate": 8e-05, "loss": 1.5485, "step": 2195 }, { "epoch": 0.2997133888358127, "grad_norm": 0.1824529618024826, "learning_rate": 8e-05, "loss": 1.5188, "step": 2196 }, { "epoch": 0.2998498703425686, "grad_norm": 0.19440414011478424, "learning_rate": 8e-05, "loss": 1.5499, "step": 2197 }, { "epoch": 0.29998635184932443, "grad_norm": 0.2017410844564438, "learning_rate": 8e-05, "loss": 1.5798, "step": 2198 }, { "epoch": 0.30012283335608025, "grad_norm": 0.18070609867572784, "learning_rate": 8e-05, "loss": 1.5214, "step": 2199 }, { "epoch": 0.3002593148628361, "grad_norm": 0.1974911242723465, "learning_rate": 8e-05, "loss": 1.5846, "step": 2200 }, { "epoch": 0.3003957963695919, "grad_norm": 0.18513643741607666, "learning_rate": 8e-05, "loss": 1.5047, "step": 2201 }, { "epoch": 0.30053227787634773, "grad_norm": 0.1845792531967163, "learning_rate": 8e-05, "loss": 1.5012, "step": 2202 }, { "epoch": 0.3006687593831036, "grad_norm": 0.19717486202716827, "learning_rate": 8e-05, "loss": 1.5362, "step": 2203 }, { "epoch": 0.30080524088985944, "grad_norm": 0.18709112703800201, "learning_rate": 8e-05, "loss": 1.5471, "step": 2204 }, { "epoch": 0.30094172239661526, "grad_norm": 0.19449973106384277, "learning_rate": 8e-05, "loss": 1.5513, "step": 2205 }, { "epoch": 0.3010782039033711, "grad_norm": 0.1893892139196396, "learning_rate": 8e-05, "loss": 1.4917, "step": 2206 }, { "epoch": 0.3012146854101269, "grad_norm": 0.18763096630573273, "learning_rate": 8e-05, "loss": 1.5091, "step": 2207 }, { "epoch": 0.30135116691688274, "grad_norm": 0.20694661140441895, "learning_rate": 8e-05, "loss": 1.5444, "step": 2208 }, { "epoch": 0.3014876484236386, "grad_norm": 0.1910211741924286, "learning_rate": 8e-05, "loss": 1.6355, "step": 2209 }, { "epoch": 0.30162412993039445, "grad_norm": 0.1971670240163803, "learning_rate": 8e-05, "loss": 1.6306, "step": 2210 }, { "epoch": 0.3017606114371503, "grad_norm": 0.19217047095298767, "learning_rate": 8e-05, "loss": 1.5826, "step": 2211 }, { "epoch": 0.3018970929439061, "grad_norm": 0.18745028972625732, "learning_rate": 8e-05, "loss": 1.5301, "step": 2212 }, { "epoch": 0.3020335744506619, "grad_norm": 0.18405196070671082, "learning_rate": 8e-05, "loss": 1.4992, "step": 2213 }, { "epoch": 0.30217005595741775, "grad_norm": 0.18875133991241455, "learning_rate": 8e-05, "loss": 1.5238, "step": 2214 }, { "epoch": 0.30230653746417363, "grad_norm": 0.1918160319328308, "learning_rate": 8e-05, "loss": 1.6414, "step": 2215 }, { "epoch": 0.30244301897092946, "grad_norm": 0.18814332783222198, "learning_rate": 8e-05, "loss": 1.4846, "step": 2216 }, { "epoch": 0.3025795004776853, "grad_norm": 0.1866637021303177, "learning_rate": 8e-05, "loss": 1.5262, "step": 2217 }, { "epoch": 0.3027159819844411, "grad_norm": 0.18924444913864136, "learning_rate": 8e-05, "loss": 1.5816, "step": 2218 }, { "epoch": 0.30285246349119693, "grad_norm": 0.19841592013835907, "learning_rate": 8e-05, "loss": 1.5139, "step": 2219 }, { "epoch": 0.30298894499795276, "grad_norm": 0.19458287954330444, "learning_rate": 8e-05, "loss": 1.5963, "step": 2220 }, { "epoch": 0.30312542650470864, "grad_norm": 0.1910163015127182, "learning_rate": 8e-05, "loss": 1.5414, "step": 2221 }, { "epoch": 0.30326190801146446, "grad_norm": 0.19162598252296448, "learning_rate": 8e-05, "loss": 1.557, "step": 2222 }, { "epoch": 0.3033983895182203, "grad_norm": 0.17796890437602997, "learning_rate": 8e-05, "loss": 1.44, "step": 2223 }, { "epoch": 0.3035348710249761, "grad_norm": 0.1886792629957199, "learning_rate": 8e-05, "loss": 1.5535, "step": 2224 }, { "epoch": 0.30367135253173194, "grad_norm": 0.18323218822479248, "learning_rate": 8e-05, "loss": 1.4889, "step": 2225 }, { "epoch": 0.30380783403848777, "grad_norm": 0.18252995610237122, "learning_rate": 8e-05, "loss": 1.5004, "step": 2226 }, { "epoch": 0.3039443155452436, "grad_norm": 0.19099868834018707, "learning_rate": 8e-05, "loss": 1.5952, "step": 2227 }, { "epoch": 0.3040807970519995, "grad_norm": 0.19717931747436523, "learning_rate": 8e-05, "loss": 1.6147, "step": 2228 }, { "epoch": 0.3042172785587553, "grad_norm": 0.18331508338451385, "learning_rate": 8e-05, "loss": 1.5418, "step": 2229 }, { "epoch": 0.3043537600655111, "grad_norm": 0.18861477077007294, "learning_rate": 8e-05, "loss": 1.5587, "step": 2230 }, { "epoch": 0.30449024157226695, "grad_norm": 0.18971489369869232, "learning_rate": 8e-05, "loss": 1.4909, "step": 2231 }, { "epoch": 0.3046267230790228, "grad_norm": 0.18543824553489685, "learning_rate": 8e-05, "loss": 1.4928, "step": 2232 }, { "epoch": 0.3047632045857786, "grad_norm": 0.19533771276474, "learning_rate": 8e-05, "loss": 1.5926, "step": 2233 }, { "epoch": 0.3048996860925345, "grad_norm": 0.18951739370822906, "learning_rate": 8e-05, "loss": 1.6196, "step": 2234 }, { "epoch": 0.3050361675992903, "grad_norm": 0.19273875653743744, "learning_rate": 8e-05, "loss": 1.4793, "step": 2235 }, { "epoch": 0.30517264910604613, "grad_norm": 0.18404525518417358, "learning_rate": 8e-05, "loss": 1.4385, "step": 2236 }, { "epoch": 0.30530913061280196, "grad_norm": 0.18486222624778748, "learning_rate": 8e-05, "loss": 1.5183, "step": 2237 }, { "epoch": 0.3054456121195578, "grad_norm": 0.19355586171150208, "learning_rate": 8e-05, "loss": 1.5795, "step": 2238 }, { "epoch": 0.3055820936263136, "grad_norm": 0.19077162444591522, "learning_rate": 8e-05, "loss": 1.5393, "step": 2239 }, { "epoch": 0.3057185751330695, "grad_norm": 0.19492536783218384, "learning_rate": 8e-05, "loss": 1.6051, "step": 2240 }, { "epoch": 0.3058550566398253, "grad_norm": 0.1930067539215088, "learning_rate": 8e-05, "loss": 1.5448, "step": 2241 }, { "epoch": 0.30599153814658114, "grad_norm": 0.18975308537483215, "learning_rate": 8e-05, "loss": 1.4713, "step": 2242 }, { "epoch": 0.30612801965333697, "grad_norm": 0.1962203085422516, "learning_rate": 8e-05, "loss": 1.5324, "step": 2243 }, { "epoch": 0.3062645011600928, "grad_norm": 0.1905398666858673, "learning_rate": 8e-05, "loss": 1.64, "step": 2244 }, { "epoch": 0.3064009826668486, "grad_norm": 0.1920812577009201, "learning_rate": 8e-05, "loss": 1.5498, "step": 2245 }, { "epoch": 0.3065374641736045, "grad_norm": 0.18745557963848114, "learning_rate": 8e-05, "loss": 1.5659, "step": 2246 }, { "epoch": 0.3066739456803603, "grad_norm": 0.18615038692951202, "learning_rate": 8e-05, "loss": 1.5494, "step": 2247 }, { "epoch": 0.30681042718711615, "grad_norm": 0.19473129510879517, "learning_rate": 8e-05, "loss": 1.5131, "step": 2248 }, { "epoch": 0.306946908693872, "grad_norm": 0.19581641256809235, "learning_rate": 8e-05, "loss": 1.5295, "step": 2249 }, { "epoch": 0.3070833902006278, "grad_norm": 0.19018195569515228, "learning_rate": 8e-05, "loss": 1.5141, "step": 2250 }, { "epoch": 0.30721987170738363, "grad_norm": 0.1852266788482666, "learning_rate": 8e-05, "loss": 1.4906, "step": 2251 }, { "epoch": 0.3073563532141395, "grad_norm": 0.19332687556743622, "learning_rate": 8e-05, "loss": 1.5776, "step": 2252 }, { "epoch": 0.30749283472089534, "grad_norm": 0.1825764924287796, "learning_rate": 8e-05, "loss": 1.4772, "step": 2253 }, { "epoch": 0.30762931622765116, "grad_norm": 0.18047472834587097, "learning_rate": 8e-05, "loss": 1.473, "step": 2254 }, { "epoch": 0.307765797734407, "grad_norm": 0.1946837157011032, "learning_rate": 8e-05, "loss": 1.5735, "step": 2255 }, { "epoch": 0.3079022792411628, "grad_norm": 0.18675142526626587, "learning_rate": 8e-05, "loss": 1.5061, "step": 2256 }, { "epoch": 0.30803876074791864, "grad_norm": 0.19220958650112152, "learning_rate": 8e-05, "loss": 1.5736, "step": 2257 }, { "epoch": 0.30817524225467446, "grad_norm": 0.1952037811279297, "learning_rate": 8e-05, "loss": 1.6106, "step": 2258 }, { "epoch": 0.30831172376143035, "grad_norm": 0.19373290240764618, "learning_rate": 8e-05, "loss": 1.5981, "step": 2259 }, { "epoch": 0.30844820526818617, "grad_norm": 0.18846887350082397, "learning_rate": 8e-05, "loss": 1.5719, "step": 2260 }, { "epoch": 0.308584686774942, "grad_norm": 0.19393399357795715, "learning_rate": 8e-05, "loss": 1.5073, "step": 2261 }, { "epoch": 0.3087211682816978, "grad_norm": 0.1933368295431137, "learning_rate": 8e-05, "loss": 1.5388, "step": 2262 }, { "epoch": 0.30885764978845365, "grad_norm": 0.1938529908657074, "learning_rate": 8e-05, "loss": 1.5824, "step": 2263 }, { "epoch": 0.3089941312952095, "grad_norm": 0.19152048230171204, "learning_rate": 8e-05, "loss": 1.5858, "step": 2264 }, { "epoch": 0.30913061280196535, "grad_norm": 0.19202856719493866, "learning_rate": 8e-05, "loss": 1.5852, "step": 2265 }, { "epoch": 0.3092670943087212, "grad_norm": 0.18733921647071838, "learning_rate": 8e-05, "loss": 1.5532, "step": 2266 }, { "epoch": 0.309403575815477, "grad_norm": 0.1896551251411438, "learning_rate": 8e-05, "loss": 1.549, "step": 2267 }, { "epoch": 0.30954005732223283, "grad_norm": 0.19032728672027588, "learning_rate": 8e-05, "loss": 1.5213, "step": 2268 }, { "epoch": 0.30967653882898866, "grad_norm": 0.19229768216609955, "learning_rate": 8e-05, "loss": 1.5911, "step": 2269 }, { "epoch": 0.3098130203357445, "grad_norm": 0.19707682728767395, "learning_rate": 8e-05, "loss": 1.5564, "step": 2270 }, { "epoch": 0.30994950184250036, "grad_norm": 0.19649268686771393, "learning_rate": 8e-05, "loss": 1.6042, "step": 2271 }, { "epoch": 0.3100859833492562, "grad_norm": 0.18773022294044495, "learning_rate": 8e-05, "loss": 1.5052, "step": 2272 }, { "epoch": 0.310222464856012, "grad_norm": 0.1910863220691681, "learning_rate": 8e-05, "loss": 1.5785, "step": 2273 }, { "epoch": 0.31035894636276784, "grad_norm": 0.18872302770614624, "learning_rate": 8e-05, "loss": 1.5082, "step": 2274 }, { "epoch": 0.31049542786952367, "grad_norm": 0.19204914569854736, "learning_rate": 8e-05, "loss": 1.6146, "step": 2275 }, { "epoch": 0.3106319093762795, "grad_norm": 0.18933150172233582, "learning_rate": 8e-05, "loss": 1.5186, "step": 2276 }, { "epoch": 0.3107683908830354, "grad_norm": 0.18659311532974243, "learning_rate": 8e-05, "loss": 1.5215, "step": 2277 }, { "epoch": 0.3109048723897912, "grad_norm": 0.1954302191734314, "learning_rate": 8e-05, "loss": 1.5407, "step": 2278 }, { "epoch": 0.311041353896547, "grad_norm": 0.19257852435112, "learning_rate": 8e-05, "loss": 1.5342, "step": 2279 }, { "epoch": 0.31117783540330285, "grad_norm": 0.19051837921142578, "learning_rate": 8e-05, "loss": 1.5422, "step": 2280 }, { "epoch": 0.3113143169100587, "grad_norm": 0.1838407814502716, "learning_rate": 8e-05, "loss": 1.526, "step": 2281 }, { "epoch": 0.3114507984168145, "grad_norm": 0.19048361480236053, "learning_rate": 8e-05, "loss": 1.518, "step": 2282 }, { "epoch": 0.3115872799235704, "grad_norm": 0.19328182935714722, "learning_rate": 8e-05, "loss": 1.5502, "step": 2283 }, { "epoch": 0.3117237614303262, "grad_norm": 0.1882355958223343, "learning_rate": 8e-05, "loss": 1.4865, "step": 2284 }, { "epoch": 0.31186024293708203, "grad_norm": 0.1923254132270813, "learning_rate": 8e-05, "loss": 1.5941, "step": 2285 }, { "epoch": 0.31199672444383786, "grad_norm": 0.18768557906150818, "learning_rate": 8e-05, "loss": 1.4729, "step": 2286 }, { "epoch": 0.3121332059505937, "grad_norm": 0.18628625571727753, "learning_rate": 8e-05, "loss": 1.5578, "step": 2287 }, { "epoch": 0.3122696874573495, "grad_norm": 0.18642590939998627, "learning_rate": 8e-05, "loss": 1.5337, "step": 2288 }, { "epoch": 0.3124061689641054, "grad_norm": 0.19136059284210205, "learning_rate": 8e-05, "loss": 1.5309, "step": 2289 }, { "epoch": 0.3125426504708612, "grad_norm": 0.18829867243766785, "learning_rate": 8e-05, "loss": 1.6083, "step": 2290 }, { "epoch": 0.31267913197761704, "grad_norm": 0.18478061258792877, "learning_rate": 8e-05, "loss": 1.5176, "step": 2291 }, { "epoch": 0.31281561348437287, "grad_norm": 0.18714046478271484, "learning_rate": 8e-05, "loss": 1.5038, "step": 2292 }, { "epoch": 0.3129520949911287, "grad_norm": 0.19507615268230438, "learning_rate": 8e-05, "loss": 1.6387, "step": 2293 }, { "epoch": 0.3130885764978845, "grad_norm": 0.18988661468029022, "learning_rate": 8e-05, "loss": 1.5856, "step": 2294 }, { "epoch": 0.31322505800464034, "grad_norm": 0.19226372241973877, "learning_rate": 8e-05, "loss": 1.5508, "step": 2295 }, { "epoch": 0.3133615395113962, "grad_norm": 0.19245581328868866, "learning_rate": 8e-05, "loss": 1.5266, "step": 2296 }, { "epoch": 0.31349802101815205, "grad_norm": 0.18920768797397614, "learning_rate": 8e-05, "loss": 1.5928, "step": 2297 }, { "epoch": 0.3136345025249079, "grad_norm": 0.19054719805717468, "learning_rate": 8e-05, "loss": 1.5213, "step": 2298 }, { "epoch": 0.3137709840316637, "grad_norm": 0.18515238165855408, "learning_rate": 8e-05, "loss": 1.5969, "step": 2299 }, { "epoch": 0.31390746553841953, "grad_norm": 0.18925321102142334, "learning_rate": 8e-05, "loss": 1.5455, "step": 2300 }, { "epoch": 0.31404394704517535, "grad_norm": 0.19498296082019806, "learning_rate": 8e-05, "loss": 1.5079, "step": 2301 }, { "epoch": 0.31418042855193123, "grad_norm": 0.19261011481285095, "learning_rate": 8e-05, "loss": 1.539, "step": 2302 }, { "epoch": 0.31431691005868706, "grad_norm": 0.19898930191993713, "learning_rate": 8e-05, "loss": 1.4837, "step": 2303 }, { "epoch": 0.3144533915654429, "grad_norm": 0.1882028877735138, "learning_rate": 8e-05, "loss": 1.4938, "step": 2304 }, { "epoch": 0.3145898730721987, "grad_norm": 0.2005249708890915, "learning_rate": 8e-05, "loss": 1.615, "step": 2305 }, { "epoch": 0.31472635457895454, "grad_norm": 0.1920951008796692, "learning_rate": 8e-05, "loss": 1.5257, "step": 2306 }, { "epoch": 0.31486283608571036, "grad_norm": 0.19342833757400513, "learning_rate": 8e-05, "loss": 1.5924, "step": 2307 }, { "epoch": 0.31499931759246624, "grad_norm": 0.19189022481441498, "learning_rate": 8e-05, "loss": 1.5786, "step": 2308 }, { "epoch": 0.31513579909922207, "grad_norm": 0.18880152702331543, "learning_rate": 8e-05, "loss": 1.5164, "step": 2309 }, { "epoch": 0.3152722806059779, "grad_norm": 0.18937347829341888, "learning_rate": 8e-05, "loss": 1.5366, "step": 2310 }, { "epoch": 0.3154087621127337, "grad_norm": 0.1873825192451477, "learning_rate": 8e-05, "loss": 1.5834, "step": 2311 }, { "epoch": 0.31554524361948955, "grad_norm": 0.19152410328388214, "learning_rate": 8e-05, "loss": 1.4531, "step": 2312 }, { "epoch": 0.31568172512624537, "grad_norm": 0.18692977726459503, "learning_rate": 8e-05, "loss": 1.5603, "step": 2313 }, { "epoch": 0.31581820663300125, "grad_norm": 0.1933252066373825, "learning_rate": 8e-05, "loss": 1.5676, "step": 2314 }, { "epoch": 0.3159546881397571, "grad_norm": 0.194474995136261, "learning_rate": 8e-05, "loss": 1.5929, "step": 2315 }, { "epoch": 0.3160911696465129, "grad_norm": 0.19405238330364227, "learning_rate": 8e-05, "loss": 1.5608, "step": 2316 }, { "epoch": 0.31622765115326873, "grad_norm": 0.1938561648130417, "learning_rate": 8e-05, "loss": 1.537, "step": 2317 }, { "epoch": 0.31636413266002456, "grad_norm": 0.19599539041519165, "learning_rate": 8e-05, "loss": 1.5803, "step": 2318 }, { "epoch": 0.3165006141667804, "grad_norm": 0.18618819117546082, "learning_rate": 8e-05, "loss": 1.4821, "step": 2319 }, { "epoch": 0.31663709567353626, "grad_norm": 0.20246824622154236, "learning_rate": 8e-05, "loss": 1.5812, "step": 2320 }, { "epoch": 0.3167735771802921, "grad_norm": 0.19695766270160675, "learning_rate": 8e-05, "loss": 1.5596, "step": 2321 }, { "epoch": 0.3169100586870479, "grad_norm": 0.19561611115932465, "learning_rate": 8e-05, "loss": 1.528, "step": 2322 }, { "epoch": 0.31704654019380374, "grad_norm": 0.1915489286184311, "learning_rate": 8e-05, "loss": 1.6143, "step": 2323 }, { "epoch": 0.31718302170055956, "grad_norm": 0.18535694479942322, "learning_rate": 8e-05, "loss": 1.5131, "step": 2324 }, { "epoch": 0.3173195032073154, "grad_norm": 0.19556941092014313, "learning_rate": 8e-05, "loss": 1.5239, "step": 2325 }, { "epoch": 0.31745598471407127, "grad_norm": 0.1962941735982895, "learning_rate": 8e-05, "loss": 1.5594, "step": 2326 }, { "epoch": 0.3175924662208271, "grad_norm": 0.19477158784866333, "learning_rate": 8e-05, "loss": 1.5218, "step": 2327 }, { "epoch": 0.3177289477275829, "grad_norm": 0.1849786788225174, "learning_rate": 8e-05, "loss": 1.4542, "step": 2328 }, { "epoch": 0.31786542923433875, "grad_norm": 0.18511530756950378, "learning_rate": 8e-05, "loss": 1.4631, "step": 2329 }, { "epoch": 0.3180019107410946, "grad_norm": 0.19234132766723633, "learning_rate": 8e-05, "loss": 1.5254, "step": 2330 }, { "epoch": 0.3181383922478504, "grad_norm": 0.20037028193473816, "learning_rate": 8e-05, "loss": 1.5483, "step": 2331 }, { "epoch": 0.3182748737546062, "grad_norm": 0.20021872222423553, "learning_rate": 8e-05, "loss": 1.5144, "step": 2332 }, { "epoch": 0.3184113552613621, "grad_norm": 0.19810526072978973, "learning_rate": 8e-05, "loss": 1.5548, "step": 2333 }, { "epoch": 0.31854783676811793, "grad_norm": 0.19990825653076172, "learning_rate": 8e-05, "loss": 1.5508, "step": 2334 }, { "epoch": 0.31868431827487376, "grad_norm": 0.19812963902950287, "learning_rate": 8e-05, "loss": 1.6368, "step": 2335 }, { "epoch": 0.3188207997816296, "grad_norm": 0.19568397104740143, "learning_rate": 8e-05, "loss": 1.5568, "step": 2336 }, { "epoch": 0.3189572812883854, "grad_norm": 0.19830144941806793, "learning_rate": 8e-05, "loss": 1.5682, "step": 2337 }, { "epoch": 0.31909376279514123, "grad_norm": 0.19387203454971313, "learning_rate": 8e-05, "loss": 1.5488, "step": 2338 }, { "epoch": 0.3192302443018971, "grad_norm": 0.19514256715774536, "learning_rate": 8e-05, "loss": 1.5861, "step": 2339 }, { "epoch": 0.31936672580865294, "grad_norm": 0.19740474224090576, "learning_rate": 8e-05, "loss": 1.5362, "step": 2340 }, { "epoch": 0.31950320731540877, "grad_norm": 0.19206172227859497, "learning_rate": 8e-05, "loss": 1.489, "step": 2341 }, { "epoch": 0.3196396888221646, "grad_norm": 0.18451574444770813, "learning_rate": 8e-05, "loss": 1.542, "step": 2342 }, { "epoch": 0.3197761703289204, "grad_norm": 0.19501104950904846, "learning_rate": 8e-05, "loss": 1.5519, "step": 2343 }, { "epoch": 0.31991265183567624, "grad_norm": 0.1942450851202011, "learning_rate": 8e-05, "loss": 1.5689, "step": 2344 }, { "epoch": 0.3200491333424321, "grad_norm": 0.18815119564533234, "learning_rate": 8e-05, "loss": 1.5291, "step": 2345 }, { "epoch": 0.32018561484918795, "grad_norm": 0.20140774548053741, "learning_rate": 8e-05, "loss": 1.5365, "step": 2346 }, { "epoch": 0.3203220963559438, "grad_norm": 0.2070215940475464, "learning_rate": 8e-05, "loss": 1.581, "step": 2347 }, { "epoch": 0.3204585778626996, "grad_norm": 0.1885538399219513, "learning_rate": 8e-05, "loss": 1.5865, "step": 2348 }, { "epoch": 0.3205950593694554, "grad_norm": 0.19860891997814178, "learning_rate": 8e-05, "loss": 1.5713, "step": 2349 }, { "epoch": 0.32073154087621125, "grad_norm": 0.20438644289970398, "learning_rate": 8e-05, "loss": 1.5322, "step": 2350 }, { "epoch": 0.32086802238296713, "grad_norm": 0.1986912339925766, "learning_rate": 8e-05, "loss": 1.5103, "step": 2351 }, { "epoch": 0.32100450388972296, "grad_norm": 0.20232762396335602, "learning_rate": 8e-05, "loss": 1.5187, "step": 2352 }, { "epoch": 0.3211409853964788, "grad_norm": 0.20045018196105957, "learning_rate": 8e-05, "loss": 1.5956, "step": 2353 }, { "epoch": 0.3212774669032346, "grad_norm": 0.19569602608680725, "learning_rate": 8e-05, "loss": 1.5431, "step": 2354 }, { "epoch": 0.32141394840999044, "grad_norm": 0.1832921952009201, "learning_rate": 8e-05, "loss": 1.4396, "step": 2355 }, { "epoch": 0.32155042991674626, "grad_norm": 0.19435308873653412, "learning_rate": 8e-05, "loss": 1.5759, "step": 2356 }, { "epoch": 0.32168691142350214, "grad_norm": 0.205319344997406, "learning_rate": 8e-05, "loss": 1.6119, "step": 2357 }, { "epoch": 0.32182339293025797, "grad_norm": 0.19843775033950806, "learning_rate": 8e-05, "loss": 1.5019, "step": 2358 }, { "epoch": 0.3219598744370138, "grad_norm": 0.18857765197753906, "learning_rate": 8e-05, "loss": 1.469, "step": 2359 }, { "epoch": 0.3220963559437696, "grad_norm": 0.19354255497455597, "learning_rate": 8e-05, "loss": 1.5083, "step": 2360 }, { "epoch": 0.32223283745052544, "grad_norm": 0.19421203434467316, "learning_rate": 8e-05, "loss": 1.5682, "step": 2361 }, { "epoch": 0.32236931895728127, "grad_norm": 0.19062180817127228, "learning_rate": 8e-05, "loss": 1.5688, "step": 2362 }, { "epoch": 0.3225058004640371, "grad_norm": 0.19428598880767822, "learning_rate": 8e-05, "loss": 1.5789, "step": 2363 }, { "epoch": 0.322642281970793, "grad_norm": 0.19765454530715942, "learning_rate": 8e-05, "loss": 1.5719, "step": 2364 }, { "epoch": 0.3227787634775488, "grad_norm": 0.19465826451778412, "learning_rate": 8e-05, "loss": 1.5461, "step": 2365 }, { "epoch": 0.32291524498430463, "grad_norm": 0.1933916211128235, "learning_rate": 8e-05, "loss": 1.5787, "step": 2366 }, { "epoch": 0.32305172649106045, "grad_norm": 0.1956445574760437, "learning_rate": 8e-05, "loss": 1.5521, "step": 2367 }, { "epoch": 0.3231882079978163, "grad_norm": 0.1907869130373001, "learning_rate": 8e-05, "loss": 1.5517, "step": 2368 }, { "epoch": 0.3233246895045721, "grad_norm": 0.1929887980222702, "learning_rate": 8e-05, "loss": 1.6289, "step": 2369 }, { "epoch": 0.323461171011328, "grad_norm": 0.1880393922328949, "learning_rate": 8e-05, "loss": 1.4537, "step": 2370 }, { "epoch": 0.3235976525180838, "grad_norm": 0.1917516589164734, "learning_rate": 8e-05, "loss": 1.5209, "step": 2371 }, { "epoch": 0.32373413402483964, "grad_norm": 0.19497986137866974, "learning_rate": 8e-05, "loss": 1.5737, "step": 2372 }, { "epoch": 0.32387061553159546, "grad_norm": 0.1887574940919876, "learning_rate": 8e-05, "loss": 1.5907, "step": 2373 }, { "epoch": 0.3240070970383513, "grad_norm": 0.1917102038860321, "learning_rate": 8e-05, "loss": 1.5492, "step": 2374 }, { "epoch": 0.3241435785451071, "grad_norm": 0.1880488395690918, "learning_rate": 8e-05, "loss": 1.4248, "step": 2375 }, { "epoch": 0.324280060051863, "grad_norm": 0.1989101618528366, "learning_rate": 8e-05, "loss": 1.5044, "step": 2376 }, { "epoch": 0.3244165415586188, "grad_norm": 0.18295790255069733, "learning_rate": 8e-05, "loss": 1.4477, "step": 2377 }, { "epoch": 0.32455302306537465, "grad_norm": 0.20745578408241272, "learning_rate": 8e-05, "loss": 1.512, "step": 2378 }, { "epoch": 0.32468950457213047, "grad_norm": 0.19378052651882172, "learning_rate": 8e-05, "loss": 1.4894, "step": 2379 }, { "epoch": 0.3248259860788863, "grad_norm": 0.19062446057796478, "learning_rate": 8e-05, "loss": 1.5067, "step": 2380 }, { "epoch": 0.3249624675856421, "grad_norm": 0.19509540498256683, "learning_rate": 8e-05, "loss": 1.4433, "step": 2381 }, { "epoch": 0.325098949092398, "grad_norm": 0.18596932291984558, "learning_rate": 8e-05, "loss": 1.5445, "step": 2382 }, { "epoch": 0.32523543059915383, "grad_norm": 0.18645717203617096, "learning_rate": 8e-05, "loss": 1.5341, "step": 2383 }, { "epoch": 0.32537191210590966, "grad_norm": 0.19618059694766998, "learning_rate": 8e-05, "loss": 1.5238, "step": 2384 }, { "epoch": 0.3255083936126655, "grad_norm": 0.19547946751117706, "learning_rate": 8e-05, "loss": 1.5788, "step": 2385 }, { "epoch": 0.3256448751194213, "grad_norm": 0.19641852378845215, "learning_rate": 8e-05, "loss": 1.5098, "step": 2386 }, { "epoch": 0.32578135662617713, "grad_norm": 0.2065533697605133, "learning_rate": 8e-05, "loss": 1.4825, "step": 2387 }, { "epoch": 0.325917838132933, "grad_norm": 0.1875258833169937, "learning_rate": 8e-05, "loss": 1.4827, "step": 2388 }, { "epoch": 0.32605431963968884, "grad_norm": 0.19471386075019836, "learning_rate": 8e-05, "loss": 1.5525, "step": 2389 }, { "epoch": 0.32619080114644466, "grad_norm": 0.20052587985992432, "learning_rate": 8e-05, "loss": 1.5229, "step": 2390 }, { "epoch": 0.3263272826532005, "grad_norm": 0.188873291015625, "learning_rate": 8e-05, "loss": 1.511, "step": 2391 }, { "epoch": 0.3264637641599563, "grad_norm": 0.19497057795524597, "learning_rate": 8e-05, "loss": 1.5546, "step": 2392 }, { "epoch": 0.32660024566671214, "grad_norm": 0.19966381788253784, "learning_rate": 8e-05, "loss": 1.5482, "step": 2393 }, { "epoch": 0.326736727173468, "grad_norm": 0.1950003057718277, "learning_rate": 8e-05, "loss": 1.5545, "step": 2394 }, { "epoch": 0.32687320868022385, "grad_norm": 0.20484621822834015, "learning_rate": 8e-05, "loss": 1.5699, "step": 2395 }, { "epoch": 0.3270096901869797, "grad_norm": 0.19592083990573883, "learning_rate": 8e-05, "loss": 1.5922, "step": 2396 }, { "epoch": 0.3271461716937355, "grad_norm": 0.18600867688655853, "learning_rate": 8e-05, "loss": 1.4156, "step": 2397 }, { "epoch": 0.3272826532004913, "grad_norm": 0.19322341680526733, "learning_rate": 8e-05, "loss": 1.5328, "step": 2398 }, { "epoch": 0.32741913470724715, "grad_norm": 0.19816844165325165, "learning_rate": 8e-05, "loss": 1.4969, "step": 2399 }, { "epoch": 0.327555616214003, "grad_norm": 0.19557923078536987, "learning_rate": 8e-05, "loss": 1.5067, "step": 2400 }, { "epoch": 0.32769209772075886, "grad_norm": 0.19665782153606415, "learning_rate": 8e-05, "loss": 1.5693, "step": 2401 }, { "epoch": 0.3278285792275147, "grad_norm": 0.19420155882835388, "learning_rate": 8e-05, "loss": 1.5555, "step": 2402 }, { "epoch": 0.3279650607342705, "grad_norm": 0.19276310503482819, "learning_rate": 8e-05, "loss": 1.5953, "step": 2403 }, { "epoch": 0.32810154224102633, "grad_norm": 0.19227337837219238, "learning_rate": 8e-05, "loss": 1.5358, "step": 2404 }, { "epoch": 0.32823802374778216, "grad_norm": 0.190183624625206, "learning_rate": 8e-05, "loss": 1.5089, "step": 2405 }, { "epoch": 0.328374505254538, "grad_norm": 0.18371525406837463, "learning_rate": 8e-05, "loss": 1.4744, "step": 2406 }, { "epoch": 0.32851098676129387, "grad_norm": 0.19337286055088043, "learning_rate": 8e-05, "loss": 1.4853, "step": 2407 }, { "epoch": 0.3286474682680497, "grad_norm": 0.19246435165405273, "learning_rate": 8e-05, "loss": 1.5389, "step": 2408 }, { "epoch": 0.3287839497748055, "grad_norm": 0.19278155267238617, "learning_rate": 8e-05, "loss": 1.6183, "step": 2409 }, { "epoch": 0.32892043128156134, "grad_norm": 0.1896626204252243, "learning_rate": 8e-05, "loss": 1.6126, "step": 2410 }, { "epoch": 0.32905691278831717, "grad_norm": 0.19147953391075134, "learning_rate": 8e-05, "loss": 1.5556, "step": 2411 }, { "epoch": 0.329193394295073, "grad_norm": 0.18816566467285156, "learning_rate": 8e-05, "loss": 1.4865, "step": 2412 }, { "epoch": 0.3293298758018289, "grad_norm": 0.19373339414596558, "learning_rate": 8e-05, "loss": 1.5485, "step": 2413 }, { "epoch": 0.3294663573085847, "grad_norm": 0.19565635919570923, "learning_rate": 8e-05, "loss": 1.5824, "step": 2414 }, { "epoch": 0.3296028388153405, "grad_norm": 0.18949222564697266, "learning_rate": 8e-05, "loss": 1.5319, "step": 2415 }, { "epoch": 0.32973932032209635, "grad_norm": 0.18624359369277954, "learning_rate": 8e-05, "loss": 1.508, "step": 2416 }, { "epoch": 0.3298758018288522, "grad_norm": 0.1924353688955307, "learning_rate": 8e-05, "loss": 1.6101, "step": 2417 }, { "epoch": 0.330012283335608, "grad_norm": 0.18345238268375397, "learning_rate": 8e-05, "loss": 1.5016, "step": 2418 }, { "epoch": 0.3301487648423639, "grad_norm": 0.19848251342773438, "learning_rate": 8e-05, "loss": 1.5512, "step": 2419 }, { "epoch": 0.3302852463491197, "grad_norm": 0.1907501220703125, "learning_rate": 8e-05, "loss": 1.5866, "step": 2420 }, { "epoch": 0.33042172785587554, "grad_norm": 0.18231217563152313, "learning_rate": 8e-05, "loss": 1.5396, "step": 2421 }, { "epoch": 0.33055820936263136, "grad_norm": 0.1831728219985962, "learning_rate": 8e-05, "loss": 1.4766, "step": 2422 }, { "epoch": 0.3306946908693872, "grad_norm": 0.19155003130435944, "learning_rate": 8e-05, "loss": 1.5518, "step": 2423 }, { "epoch": 0.330831172376143, "grad_norm": 0.19622866809368134, "learning_rate": 8e-05, "loss": 1.5836, "step": 2424 }, { "epoch": 0.3309676538828989, "grad_norm": 0.18896323442459106, "learning_rate": 8e-05, "loss": 1.5218, "step": 2425 }, { "epoch": 0.3311041353896547, "grad_norm": 0.1942889243364334, "learning_rate": 8e-05, "loss": 1.56, "step": 2426 }, { "epoch": 0.33124061689641054, "grad_norm": 0.19754470884799957, "learning_rate": 8e-05, "loss": 1.5717, "step": 2427 }, { "epoch": 0.33137709840316637, "grad_norm": 0.19484834372997284, "learning_rate": 8e-05, "loss": 1.5322, "step": 2428 }, { "epoch": 0.3315135799099222, "grad_norm": 0.19175194203853607, "learning_rate": 8e-05, "loss": 1.5743, "step": 2429 }, { "epoch": 0.331650061416678, "grad_norm": 0.20060524344444275, "learning_rate": 8e-05, "loss": 1.6009, "step": 2430 }, { "epoch": 0.33178654292343385, "grad_norm": 0.19135399162769318, "learning_rate": 8e-05, "loss": 1.5493, "step": 2431 }, { "epoch": 0.33192302443018973, "grad_norm": 0.1867528259754181, "learning_rate": 8e-05, "loss": 1.5305, "step": 2432 }, { "epoch": 0.33205950593694555, "grad_norm": 0.19375182688236237, "learning_rate": 8e-05, "loss": 1.5204, "step": 2433 }, { "epoch": 0.3321959874437014, "grad_norm": 0.18884830176830292, "learning_rate": 8e-05, "loss": 1.489, "step": 2434 }, { "epoch": 0.3323324689504572, "grad_norm": 0.19130641222000122, "learning_rate": 8e-05, "loss": 1.577, "step": 2435 }, { "epoch": 0.33246895045721303, "grad_norm": 0.18871554732322693, "learning_rate": 8e-05, "loss": 1.5083, "step": 2436 }, { "epoch": 0.33260543196396886, "grad_norm": 0.19300872087478638, "learning_rate": 8e-05, "loss": 1.5553, "step": 2437 }, { "epoch": 0.33274191347072474, "grad_norm": 0.19003355503082275, "learning_rate": 8e-05, "loss": 1.5119, "step": 2438 }, { "epoch": 0.33287839497748056, "grad_norm": 0.18634119629859924, "learning_rate": 8e-05, "loss": 1.4546, "step": 2439 }, { "epoch": 0.3330148764842364, "grad_norm": 0.1876678168773651, "learning_rate": 8e-05, "loss": 1.4926, "step": 2440 }, { "epoch": 0.3331513579909922, "grad_norm": 0.19578248262405396, "learning_rate": 8e-05, "loss": 1.4987, "step": 2441 }, { "epoch": 0.33328783949774804, "grad_norm": 0.192447230219841, "learning_rate": 8e-05, "loss": 1.4824, "step": 2442 }, { "epoch": 0.33342432100450387, "grad_norm": 0.19464170932769775, "learning_rate": 8e-05, "loss": 1.5427, "step": 2443 }, { "epoch": 0.33356080251125975, "grad_norm": 0.20000219345092773, "learning_rate": 8e-05, "loss": 1.6517, "step": 2444 }, { "epoch": 0.33369728401801557, "grad_norm": 0.19101779162883759, "learning_rate": 8e-05, "loss": 1.4719, "step": 2445 }, { "epoch": 0.3338337655247714, "grad_norm": 0.19114328920841217, "learning_rate": 8e-05, "loss": 1.5422, "step": 2446 }, { "epoch": 0.3339702470315272, "grad_norm": 0.19326002895832062, "learning_rate": 8e-05, "loss": 1.4871, "step": 2447 }, { "epoch": 0.33410672853828305, "grad_norm": 0.1956133097410202, "learning_rate": 8e-05, "loss": 1.5692, "step": 2448 }, { "epoch": 0.3342432100450389, "grad_norm": 0.1967337727546692, "learning_rate": 8e-05, "loss": 1.6301, "step": 2449 }, { "epoch": 0.33437969155179476, "grad_norm": 0.1888309121131897, "learning_rate": 8e-05, "loss": 1.5053, "step": 2450 }, { "epoch": 0.3345161730585506, "grad_norm": 0.19888371229171753, "learning_rate": 8e-05, "loss": 1.5681, "step": 2451 }, { "epoch": 0.3346526545653064, "grad_norm": 0.19180575013160706, "learning_rate": 8e-05, "loss": 1.571, "step": 2452 }, { "epoch": 0.33478913607206223, "grad_norm": 0.19496522843837738, "learning_rate": 8e-05, "loss": 1.6027, "step": 2453 }, { "epoch": 0.33492561757881806, "grad_norm": 0.1954294741153717, "learning_rate": 8e-05, "loss": 1.5498, "step": 2454 }, { "epoch": 0.3350620990855739, "grad_norm": 0.194271057844162, "learning_rate": 8e-05, "loss": 1.5209, "step": 2455 }, { "epoch": 0.33519858059232976, "grad_norm": 0.19182570278644562, "learning_rate": 8e-05, "loss": 1.5984, "step": 2456 }, { "epoch": 0.3353350620990856, "grad_norm": 0.1884409636259079, "learning_rate": 8e-05, "loss": 1.5275, "step": 2457 }, { "epoch": 0.3354715436058414, "grad_norm": 0.1984720677137375, "learning_rate": 8e-05, "loss": 1.5801, "step": 2458 }, { "epoch": 0.33560802511259724, "grad_norm": 0.1877586543560028, "learning_rate": 8e-05, "loss": 1.5141, "step": 2459 }, { "epoch": 0.33574450661935307, "grad_norm": 0.1922798901796341, "learning_rate": 8e-05, "loss": 1.515, "step": 2460 }, { "epoch": 0.3358809881261089, "grad_norm": 0.18717573583126068, "learning_rate": 8e-05, "loss": 1.5466, "step": 2461 }, { "epoch": 0.3360174696328648, "grad_norm": 0.19478553533554077, "learning_rate": 8e-05, "loss": 1.592, "step": 2462 }, { "epoch": 0.3361539511396206, "grad_norm": 0.18772883713245392, "learning_rate": 8e-05, "loss": 1.5364, "step": 2463 }, { "epoch": 0.3362904326463764, "grad_norm": 0.19882705807685852, "learning_rate": 8e-05, "loss": 1.6195, "step": 2464 }, { "epoch": 0.33642691415313225, "grad_norm": 0.19613197445869446, "learning_rate": 8e-05, "loss": 1.4862, "step": 2465 }, { "epoch": 0.3365633956598881, "grad_norm": 0.20750534534454346, "learning_rate": 8e-05, "loss": 1.5163, "step": 2466 }, { "epoch": 0.3366998771666439, "grad_norm": 0.19459302723407745, "learning_rate": 8e-05, "loss": 1.5366, "step": 2467 }, { "epoch": 0.3368363586733997, "grad_norm": 0.18895673751831055, "learning_rate": 8e-05, "loss": 1.5137, "step": 2468 }, { "epoch": 0.3369728401801556, "grad_norm": 0.19213056564331055, "learning_rate": 8e-05, "loss": 1.5241, "step": 2469 }, { "epoch": 0.33710932168691143, "grad_norm": 0.1933610886335373, "learning_rate": 8e-05, "loss": 1.5422, "step": 2470 }, { "epoch": 0.33724580319366726, "grad_norm": 0.18792147934436798, "learning_rate": 8e-05, "loss": 1.5181, "step": 2471 }, { "epoch": 0.3373822847004231, "grad_norm": 0.18653547763824463, "learning_rate": 8e-05, "loss": 1.517, "step": 2472 }, { "epoch": 0.3375187662071789, "grad_norm": 0.19106873869895935, "learning_rate": 8e-05, "loss": 1.556, "step": 2473 }, { "epoch": 0.33765524771393474, "grad_norm": 0.19438928365707397, "learning_rate": 8e-05, "loss": 1.5411, "step": 2474 }, { "epoch": 0.3377917292206906, "grad_norm": 0.191884383559227, "learning_rate": 8e-05, "loss": 1.5381, "step": 2475 }, { "epoch": 0.33792821072744644, "grad_norm": 0.19358599185943604, "learning_rate": 8e-05, "loss": 1.5264, "step": 2476 }, { "epoch": 0.33806469223420227, "grad_norm": 0.1976454108953476, "learning_rate": 8e-05, "loss": 1.5746, "step": 2477 }, { "epoch": 0.3382011737409581, "grad_norm": 0.1958320587873459, "learning_rate": 8e-05, "loss": 1.5374, "step": 2478 }, { "epoch": 0.3383376552477139, "grad_norm": 0.1989540010690689, "learning_rate": 8e-05, "loss": 1.5225, "step": 2479 }, { "epoch": 0.33847413675446975, "grad_norm": 0.18735961616039276, "learning_rate": 8e-05, "loss": 1.5813, "step": 2480 }, { "epoch": 0.3386106182612256, "grad_norm": 0.18536771833896637, "learning_rate": 8e-05, "loss": 1.4572, "step": 2481 }, { "epoch": 0.33874709976798145, "grad_norm": 0.19066278636455536, "learning_rate": 8e-05, "loss": 1.5354, "step": 2482 }, { "epoch": 0.3388835812747373, "grad_norm": 0.19877173006534576, "learning_rate": 8e-05, "loss": 1.5845, "step": 2483 }, { "epoch": 0.3390200627814931, "grad_norm": 0.19461213052272797, "learning_rate": 8e-05, "loss": 1.5175, "step": 2484 }, { "epoch": 0.33915654428824893, "grad_norm": 0.19343744218349457, "learning_rate": 8e-05, "loss": 1.5634, "step": 2485 }, { "epoch": 0.33929302579500475, "grad_norm": 0.20381417870521545, "learning_rate": 8e-05, "loss": 1.5653, "step": 2486 }, { "epoch": 0.33942950730176064, "grad_norm": 0.19497017562389374, "learning_rate": 8e-05, "loss": 1.53, "step": 2487 }, { "epoch": 0.33956598880851646, "grad_norm": 0.198625385761261, "learning_rate": 8e-05, "loss": 1.5405, "step": 2488 }, { "epoch": 0.3397024703152723, "grad_norm": 0.19684846699237823, "learning_rate": 8e-05, "loss": 1.5069, "step": 2489 }, { "epoch": 0.3398389518220281, "grad_norm": 0.19157476723194122, "learning_rate": 8e-05, "loss": 1.4368, "step": 2490 }, { "epoch": 0.33997543332878394, "grad_norm": 0.2119031548500061, "learning_rate": 8e-05, "loss": 1.5561, "step": 2491 }, { "epoch": 0.34011191483553976, "grad_norm": 0.19901813566684723, "learning_rate": 8e-05, "loss": 1.5205, "step": 2492 }, { "epoch": 0.34024839634229564, "grad_norm": 0.18913337588310242, "learning_rate": 8e-05, "loss": 1.5537, "step": 2493 }, { "epoch": 0.34038487784905147, "grad_norm": 0.21063925325870514, "learning_rate": 8e-05, "loss": 1.5676, "step": 2494 }, { "epoch": 0.3405213593558073, "grad_norm": 0.18714042007923126, "learning_rate": 8e-05, "loss": 1.5346, "step": 2495 }, { "epoch": 0.3406578408625631, "grad_norm": 0.19401006400585175, "learning_rate": 8e-05, "loss": 1.5998, "step": 2496 }, { "epoch": 0.34079432236931895, "grad_norm": 0.20214806497097015, "learning_rate": 8e-05, "loss": 1.595, "step": 2497 }, { "epoch": 0.3409308038760748, "grad_norm": 0.19135676324367523, "learning_rate": 8e-05, "loss": 1.5814, "step": 2498 }, { "epoch": 0.34106728538283065, "grad_norm": 0.19156618416309357, "learning_rate": 8e-05, "loss": 1.4928, "step": 2499 }, { "epoch": 0.3412037668895865, "grad_norm": 0.2019028216600418, "learning_rate": 8e-05, "loss": 1.5715, "step": 2500 }, { "epoch": 0.3413402483963423, "grad_norm": 0.19800348579883575, "learning_rate": 8e-05, "loss": 1.5174, "step": 2501 }, { "epoch": 0.34147672990309813, "grad_norm": 0.19926117360591888, "learning_rate": 8e-05, "loss": 1.5157, "step": 2502 }, { "epoch": 0.34161321140985396, "grad_norm": 0.20413170754909515, "learning_rate": 8e-05, "loss": 1.5151, "step": 2503 }, { "epoch": 0.3417496929166098, "grad_norm": 0.19088317453861237, "learning_rate": 8e-05, "loss": 1.5033, "step": 2504 }, { "epoch": 0.3418861744233656, "grad_norm": 0.1939716935157776, "learning_rate": 8e-05, "loss": 1.5221, "step": 2505 }, { "epoch": 0.3420226559301215, "grad_norm": 0.2010374814271927, "learning_rate": 8e-05, "loss": 1.6089, "step": 2506 }, { "epoch": 0.3421591374368773, "grad_norm": 0.19489942491054535, "learning_rate": 8e-05, "loss": 1.5668, "step": 2507 }, { "epoch": 0.34229561894363314, "grad_norm": 0.19662658870220184, "learning_rate": 8e-05, "loss": 1.5532, "step": 2508 }, { "epoch": 0.34243210045038897, "grad_norm": 0.19633057713508606, "learning_rate": 8e-05, "loss": 1.5361, "step": 2509 }, { "epoch": 0.3425685819571448, "grad_norm": 0.1840714067220688, "learning_rate": 8e-05, "loss": 1.4578, "step": 2510 }, { "epoch": 0.3427050634639006, "grad_norm": 0.2019944041967392, "learning_rate": 8e-05, "loss": 1.5584, "step": 2511 }, { "epoch": 0.3428415449706565, "grad_norm": 0.1952175796031952, "learning_rate": 8e-05, "loss": 1.4928, "step": 2512 }, { "epoch": 0.3429780264774123, "grad_norm": 0.195237398147583, "learning_rate": 8e-05, "loss": 1.5375, "step": 2513 }, { "epoch": 0.34311450798416815, "grad_norm": 0.20003804564476013, "learning_rate": 8e-05, "loss": 1.5677, "step": 2514 }, { "epoch": 0.343250989490924, "grad_norm": 0.19378447532653809, "learning_rate": 8e-05, "loss": 1.4833, "step": 2515 }, { "epoch": 0.3433874709976798, "grad_norm": 0.19758976995944977, "learning_rate": 8e-05, "loss": 1.5546, "step": 2516 }, { "epoch": 0.3435239525044356, "grad_norm": 0.19025437533855438, "learning_rate": 8e-05, "loss": 1.4981, "step": 2517 }, { "epoch": 0.3436604340111915, "grad_norm": 0.18906742334365845, "learning_rate": 8e-05, "loss": 1.5527, "step": 2518 }, { "epoch": 0.34379691551794733, "grad_norm": 0.21416275203227997, "learning_rate": 8e-05, "loss": 1.5897, "step": 2519 }, { "epoch": 0.34393339702470316, "grad_norm": 0.19798676669597626, "learning_rate": 8e-05, "loss": 1.6096, "step": 2520 }, { "epoch": 0.344069878531459, "grad_norm": 0.21285538375377655, "learning_rate": 8e-05, "loss": 1.5735, "step": 2521 }, { "epoch": 0.3442063600382148, "grad_norm": 0.20711740851402283, "learning_rate": 8e-05, "loss": 1.6263, "step": 2522 }, { "epoch": 0.34434284154497063, "grad_norm": 0.20285910367965698, "learning_rate": 8e-05, "loss": 1.489, "step": 2523 }, { "epoch": 0.3444793230517265, "grad_norm": 0.19546018540859222, "learning_rate": 8e-05, "loss": 1.5503, "step": 2524 }, { "epoch": 0.34461580455848234, "grad_norm": 0.18902599811553955, "learning_rate": 8e-05, "loss": 1.5288, "step": 2525 }, { "epoch": 0.34475228606523817, "grad_norm": 0.19517406821250916, "learning_rate": 8e-05, "loss": 1.5003, "step": 2526 }, { "epoch": 0.344888767571994, "grad_norm": 0.19044962525367737, "learning_rate": 8e-05, "loss": 1.5595, "step": 2527 }, { "epoch": 0.3450252490787498, "grad_norm": 0.19572162628173828, "learning_rate": 8e-05, "loss": 1.5806, "step": 2528 }, { "epoch": 0.34516173058550564, "grad_norm": 0.19989044964313507, "learning_rate": 8e-05, "loss": 1.543, "step": 2529 }, { "epoch": 0.3452982120922615, "grad_norm": 0.1954621970653534, "learning_rate": 8e-05, "loss": 1.4734, "step": 2530 }, { "epoch": 0.34543469359901735, "grad_norm": 0.19431091845035553, "learning_rate": 8e-05, "loss": 1.5083, "step": 2531 }, { "epoch": 0.3455711751057732, "grad_norm": 0.20303569734096527, "learning_rate": 8e-05, "loss": 1.5556, "step": 2532 }, { "epoch": 0.345707656612529, "grad_norm": 0.1978529393672943, "learning_rate": 8e-05, "loss": 1.5374, "step": 2533 }, { "epoch": 0.3458441381192848, "grad_norm": 0.1847924143075943, "learning_rate": 8e-05, "loss": 1.4782, "step": 2534 }, { "epoch": 0.34598061962604065, "grad_norm": 0.19170251488685608, "learning_rate": 8e-05, "loss": 1.5808, "step": 2535 }, { "epoch": 0.3461171011327965, "grad_norm": 0.20397469401359558, "learning_rate": 8e-05, "loss": 1.4893, "step": 2536 }, { "epoch": 0.34625358263955236, "grad_norm": 0.19734856486320496, "learning_rate": 8e-05, "loss": 1.5895, "step": 2537 }, { "epoch": 0.3463900641463082, "grad_norm": 0.194797083735466, "learning_rate": 8e-05, "loss": 1.5999, "step": 2538 }, { "epoch": 0.346526545653064, "grad_norm": 0.19497369229793549, "learning_rate": 8e-05, "loss": 1.6216, "step": 2539 }, { "epoch": 0.34666302715981984, "grad_norm": 0.1882125437259674, "learning_rate": 8e-05, "loss": 1.5375, "step": 2540 }, { "epoch": 0.34679950866657566, "grad_norm": 0.19456225633621216, "learning_rate": 8e-05, "loss": 1.4856, "step": 2541 }, { "epoch": 0.3469359901733315, "grad_norm": 0.19356907904148102, "learning_rate": 8e-05, "loss": 1.5466, "step": 2542 }, { "epoch": 0.34707247168008737, "grad_norm": 0.19334271550178528, "learning_rate": 8e-05, "loss": 1.5215, "step": 2543 }, { "epoch": 0.3472089531868432, "grad_norm": 0.194456085562706, "learning_rate": 8e-05, "loss": 1.5894, "step": 2544 }, { "epoch": 0.347345434693599, "grad_norm": 0.19849246740341187, "learning_rate": 8e-05, "loss": 1.5318, "step": 2545 }, { "epoch": 0.34748191620035485, "grad_norm": 0.1903613805770874, "learning_rate": 8e-05, "loss": 1.5425, "step": 2546 }, { "epoch": 0.34761839770711067, "grad_norm": 0.2045399248600006, "learning_rate": 8e-05, "loss": 1.5604, "step": 2547 }, { "epoch": 0.3477548792138665, "grad_norm": 0.19214165210723877, "learning_rate": 8e-05, "loss": 1.5071, "step": 2548 }, { "epoch": 0.3478913607206224, "grad_norm": 0.19689619541168213, "learning_rate": 8e-05, "loss": 1.5792, "step": 2549 }, { "epoch": 0.3480278422273782, "grad_norm": 0.2017962783575058, "learning_rate": 8e-05, "loss": 1.5829, "step": 2550 }, { "epoch": 0.34816432373413403, "grad_norm": 0.1942274421453476, "learning_rate": 8e-05, "loss": 1.5532, "step": 2551 }, { "epoch": 0.34830080524088985, "grad_norm": 0.20240655541419983, "learning_rate": 8e-05, "loss": 1.5994, "step": 2552 }, { "epoch": 0.3484372867476457, "grad_norm": 0.1921750158071518, "learning_rate": 8e-05, "loss": 1.5286, "step": 2553 }, { "epoch": 0.3485737682544015, "grad_norm": 0.19392859935760498, "learning_rate": 8e-05, "loss": 1.5215, "step": 2554 }, { "epoch": 0.3487102497611574, "grad_norm": 0.19483475387096405, "learning_rate": 8e-05, "loss": 1.5094, "step": 2555 }, { "epoch": 0.3488467312679132, "grad_norm": 0.19146482646465302, "learning_rate": 8e-05, "loss": 1.5302, "step": 2556 }, { "epoch": 0.34898321277466904, "grad_norm": 0.19937308132648468, "learning_rate": 8e-05, "loss": 1.5062, "step": 2557 }, { "epoch": 0.34911969428142486, "grad_norm": 0.19719766080379486, "learning_rate": 8e-05, "loss": 1.5262, "step": 2558 }, { "epoch": 0.3492561757881807, "grad_norm": 0.1888270080089569, "learning_rate": 8e-05, "loss": 1.5084, "step": 2559 }, { "epoch": 0.3493926572949365, "grad_norm": 0.20241068303585052, "learning_rate": 8e-05, "loss": 1.5378, "step": 2560 }, { "epoch": 0.3495291388016924, "grad_norm": 0.1963377445936203, "learning_rate": 8e-05, "loss": 1.5937, "step": 2561 }, { "epoch": 0.3496656203084482, "grad_norm": 0.19205492734909058, "learning_rate": 8e-05, "loss": 1.4899, "step": 2562 }, { "epoch": 0.34980210181520405, "grad_norm": 0.2034924328327179, "learning_rate": 8e-05, "loss": 1.6164, "step": 2563 }, { "epoch": 0.3499385833219599, "grad_norm": 0.1940470039844513, "learning_rate": 8e-05, "loss": 1.6141, "step": 2564 }, { "epoch": 0.3500750648287157, "grad_norm": 0.20668810606002808, "learning_rate": 8e-05, "loss": 1.6064, "step": 2565 }, { "epoch": 0.3502115463354715, "grad_norm": 0.2019558995962143, "learning_rate": 8e-05, "loss": 1.5795, "step": 2566 }, { "epoch": 0.3503480278422274, "grad_norm": 0.19554457068443298, "learning_rate": 8e-05, "loss": 1.5758, "step": 2567 }, { "epoch": 0.35048450934898323, "grad_norm": 0.2013549506664276, "learning_rate": 8e-05, "loss": 1.6049, "step": 2568 }, { "epoch": 0.35062099085573906, "grad_norm": 0.18695232272148132, "learning_rate": 8e-05, "loss": 1.5296, "step": 2569 }, { "epoch": 0.3507574723624949, "grad_norm": 0.19889196753501892, "learning_rate": 8e-05, "loss": 1.5455, "step": 2570 }, { "epoch": 0.3508939538692507, "grad_norm": 0.20030449330806732, "learning_rate": 8e-05, "loss": 1.5883, "step": 2571 }, { "epoch": 0.35103043537600653, "grad_norm": 0.19805726408958435, "learning_rate": 8e-05, "loss": 1.6069, "step": 2572 }, { "epoch": 0.35116691688276236, "grad_norm": 0.1995544582605362, "learning_rate": 8e-05, "loss": 1.4945, "step": 2573 }, { "epoch": 0.35130339838951824, "grad_norm": 0.19294312596321106, "learning_rate": 8e-05, "loss": 1.4802, "step": 2574 }, { "epoch": 0.35143987989627407, "grad_norm": 0.19078242778778076, "learning_rate": 8e-05, "loss": 1.4875, "step": 2575 }, { "epoch": 0.3515763614030299, "grad_norm": 0.20172356069087982, "learning_rate": 8e-05, "loss": 1.509, "step": 2576 }, { "epoch": 0.3517128429097857, "grad_norm": 0.19706188142299652, "learning_rate": 8e-05, "loss": 1.5398, "step": 2577 }, { "epoch": 0.35184932441654154, "grad_norm": 0.19191832840442657, "learning_rate": 8e-05, "loss": 1.4459, "step": 2578 }, { "epoch": 0.35198580592329737, "grad_norm": 0.1973387449979782, "learning_rate": 8e-05, "loss": 1.5172, "step": 2579 }, { "epoch": 0.35212228743005325, "grad_norm": 0.19400425255298615, "learning_rate": 8e-05, "loss": 1.5599, "step": 2580 }, { "epoch": 0.3522587689368091, "grad_norm": 0.2051192671060562, "learning_rate": 8e-05, "loss": 1.5903, "step": 2581 }, { "epoch": 0.3523952504435649, "grad_norm": 0.18710793554782867, "learning_rate": 8e-05, "loss": 1.438, "step": 2582 }, { "epoch": 0.3525317319503207, "grad_norm": 0.18687210977077484, "learning_rate": 8e-05, "loss": 1.4444, "step": 2583 }, { "epoch": 0.35266821345707655, "grad_norm": 0.20916792750358582, "learning_rate": 8e-05, "loss": 1.579, "step": 2584 }, { "epoch": 0.3528046949638324, "grad_norm": 0.201734721660614, "learning_rate": 8e-05, "loss": 1.5089, "step": 2585 }, { "epoch": 0.35294117647058826, "grad_norm": 0.19195617735385895, "learning_rate": 8e-05, "loss": 1.5549, "step": 2586 }, { "epoch": 0.3530776579773441, "grad_norm": 0.1995304673910141, "learning_rate": 8e-05, "loss": 1.4883, "step": 2587 }, { "epoch": 0.3532141394840999, "grad_norm": 0.20530566573143005, "learning_rate": 8e-05, "loss": 1.5744, "step": 2588 }, { "epoch": 0.35335062099085573, "grad_norm": 0.1953418105840683, "learning_rate": 8e-05, "loss": 1.4764, "step": 2589 }, { "epoch": 0.35348710249761156, "grad_norm": 0.19232945144176483, "learning_rate": 8e-05, "loss": 1.4842, "step": 2590 }, { "epoch": 0.3536235840043674, "grad_norm": 0.1948445439338684, "learning_rate": 8e-05, "loss": 1.5719, "step": 2591 }, { "epoch": 0.35376006551112327, "grad_norm": 0.19241535663604736, "learning_rate": 8e-05, "loss": 1.5099, "step": 2592 }, { "epoch": 0.3538965470178791, "grad_norm": 0.19627416133880615, "learning_rate": 8e-05, "loss": 1.5361, "step": 2593 }, { "epoch": 0.3540330285246349, "grad_norm": 0.19273103773593903, "learning_rate": 8e-05, "loss": 1.555, "step": 2594 }, { "epoch": 0.35416951003139074, "grad_norm": 0.19843879342079163, "learning_rate": 8e-05, "loss": 1.5934, "step": 2595 }, { "epoch": 0.35430599153814657, "grad_norm": 0.19716261327266693, "learning_rate": 8e-05, "loss": 1.536, "step": 2596 }, { "epoch": 0.3544424730449024, "grad_norm": 0.1959685981273651, "learning_rate": 8e-05, "loss": 1.5541, "step": 2597 }, { "epoch": 0.3545789545516583, "grad_norm": 0.20762799680233002, "learning_rate": 8e-05, "loss": 1.5578, "step": 2598 }, { "epoch": 0.3547154360584141, "grad_norm": 0.19284506142139435, "learning_rate": 8e-05, "loss": 1.5113, "step": 2599 }, { "epoch": 0.3548519175651699, "grad_norm": 0.19372038543224335, "learning_rate": 8e-05, "loss": 1.5686, "step": 2600 }, { "epoch": 0.35498839907192575, "grad_norm": 0.1953934282064438, "learning_rate": 8e-05, "loss": 1.5098, "step": 2601 }, { "epoch": 0.3551248805786816, "grad_norm": 0.20578987896442413, "learning_rate": 8e-05, "loss": 1.5605, "step": 2602 }, { "epoch": 0.3552613620854374, "grad_norm": 0.20066507160663605, "learning_rate": 8e-05, "loss": 1.5113, "step": 2603 }, { "epoch": 0.3553978435921933, "grad_norm": 0.19259048998355865, "learning_rate": 8e-05, "loss": 1.5236, "step": 2604 }, { "epoch": 0.3555343250989491, "grad_norm": 0.2026747316122055, "learning_rate": 8e-05, "loss": 1.4962, "step": 2605 }, { "epoch": 0.35567080660570494, "grad_norm": 0.20289842784404755, "learning_rate": 8e-05, "loss": 1.5384, "step": 2606 }, { "epoch": 0.35580728811246076, "grad_norm": 0.19875353574752808, "learning_rate": 8e-05, "loss": 1.5426, "step": 2607 }, { "epoch": 0.3559437696192166, "grad_norm": 0.2012946903705597, "learning_rate": 8e-05, "loss": 1.5699, "step": 2608 }, { "epoch": 0.3560802511259724, "grad_norm": 0.2037465125322342, "learning_rate": 8e-05, "loss": 1.5519, "step": 2609 }, { "epoch": 0.35621673263272824, "grad_norm": 0.1972435861825943, "learning_rate": 8e-05, "loss": 1.4951, "step": 2610 }, { "epoch": 0.3563532141394841, "grad_norm": 0.19260697066783905, "learning_rate": 8e-05, "loss": 1.5142, "step": 2611 }, { "epoch": 0.35648969564623995, "grad_norm": 0.19583943486213684, "learning_rate": 8e-05, "loss": 1.5071, "step": 2612 }, { "epoch": 0.35662617715299577, "grad_norm": 0.19478709995746613, "learning_rate": 8e-05, "loss": 1.5742, "step": 2613 }, { "epoch": 0.3567626586597516, "grad_norm": 0.18682514131069183, "learning_rate": 8e-05, "loss": 1.5328, "step": 2614 }, { "epoch": 0.3568991401665074, "grad_norm": 0.19836457073688507, "learning_rate": 8e-05, "loss": 1.5571, "step": 2615 }, { "epoch": 0.35703562167326325, "grad_norm": 0.19091983139514923, "learning_rate": 8e-05, "loss": 1.5105, "step": 2616 }, { "epoch": 0.35717210318001913, "grad_norm": 0.197165384888649, "learning_rate": 8e-05, "loss": 1.5314, "step": 2617 }, { "epoch": 0.35730858468677495, "grad_norm": 0.18971925973892212, "learning_rate": 8e-05, "loss": 1.53, "step": 2618 }, { "epoch": 0.3574450661935308, "grad_norm": 0.18598932027816772, "learning_rate": 8e-05, "loss": 1.4239, "step": 2619 }, { "epoch": 0.3575815477002866, "grad_norm": 0.19195757806301117, "learning_rate": 8e-05, "loss": 1.5554, "step": 2620 }, { "epoch": 0.35771802920704243, "grad_norm": 0.19654802978038788, "learning_rate": 8e-05, "loss": 1.5656, "step": 2621 }, { "epoch": 0.35785451071379826, "grad_norm": 0.18688727915287018, "learning_rate": 8e-05, "loss": 1.5071, "step": 2622 }, { "epoch": 0.35799099222055414, "grad_norm": 0.20256516337394714, "learning_rate": 8e-05, "loss": 1.5267, "step": 2623 }, { "epoch": 0.35812747372730996, "grad_norm": 0.19448009133338928, "learning_rate": 8e-05, "loss": 1.4807, "step": 2624 }, { "epoch": 0.3582639552340658, "grad_norm": 0.19296544790267944, "learning_rate": 8e-05, "loss": 1.5376, "step": 2625 }, { "epoch": 0.3584004367408216, "grad_norm": 0.20121923089027405, "learning_rate": 8e-05, "loss": 1.595, "step": 2626 }, { "epoch": 0.35853691824757744, "grad_norm": 0.19750294089317322, "learning_rate": 8e-05, "loss": 1.4965, "step": 2627 }, { "epoch": 0.35867339975433327, "grad_norm": 0.19986854493618011, "learning_rate": 8e-05, "loss": 1.5536, "step": 2628 }, { "epoch": 0.35880988126108915, "grad_norm": 0.19554506242275238, "learning_rate": 8e-05, "loss": 1.5429, "step": 2629 }, { "epoch": 0.358946362767845, "grad_norm": 0.19883698225021362, "learning_rate": 8e-05, "loss": 1.4906, "step": 2630 }, { "epoch": 0.3590828442746008, "grad_norm": 0.18505001068115234, "learning_rate": 8e-05, "loss": 1.5208, "step": 2631 }, { "epoch": 0.3592193257813566, "grad_norm": 0.20315483212471008, "learning_rate": 8e-05, "loss": 1.5348, "step": 2632 }, { "epoch": 0.35935580728811245, "grad_norm": 0.18695324659347534, "learning_rate": 8e-05, "loss": 1.5729, "step": 2633 }, { "epoch": 0.3594922887948683, "grad_norm": 0.18792392313480377, "learning_rate": 8e-05, "loss": 1.4825, "step": 2634 }, { "epoch": 0.35962877030162416, "grad_norm": 0.1980956792831421, "learning_rate": 8e-05, "loss": 1.5276, "step": 2635 }, { "epoch": 0.35976525180838, "grad_norm": 0.20917940139770508, "learning_rate": 8e-05, "loss": 1.5744, "step": 2636 }, { "epoch": 0.3599017333151358, "grad_norm": 0.19428816437721252, "learning_rate": 8e-05, "loss": 1.5126, "step": 2637 }, { "epoch": 0.36003821482189163, "grad_norm": 0.20210137963294983, "learning_rate": 8e-05, "loss": 1.4561, "step": 2638 }, { "epoch": 0.36017469632864746, "grad_norm": 0.19680465757846832, "learning_rate": 8e-05, "loss": 1.55, "step": 2639 }, { "epoch": 0.3603111778354033, "grad_norm": 0.19674304127693176, "learning_rate": 8e-05, "loss": 1.5022, "step": 2640 }, { "epoch": 0.3604476593421591, "grad_norm": 0.20113793015480042, "learning_rate": 8e-05, "loss": 1.5228, "step": 2641 }, { "epoch": 0.360584140848915, "grad_norm": 0.19191408157348633, "learning_rate": 8e-05, "loss": 1.5396, "step": 2642 }, { "epoch": 0.3607206223556708, "grad_norm": 0.19506217539310455, "learning_rate": 8e-05, "loss": 1.4718, "step": 2643 }, { "epoch": 0.36085710386242664, "grad_norm": 0.20981590449810028, "learning_rate": 8e-05, "loss": 1.5621, "step": 2644 }, { "epoch": 0.36099358536918247, "grad_norm": 0.19451993703842163, "learning_rate": 8e-05, "loss": 1.563, "step": 2645 }, { "epoch": 0.3611300668759383, "grad_norm": 0.2060956358909607, "learning_rate": 8e-05, "loss": 1.4423, "step": 2646 }, { "epoch": 0.3612665483826941, "grad_norm": 0.19907425343990326, "learning_rate": 8e-05, "loss": 1.5191, "step": 2647 }, { "epoch": 0.36140302988945, "grad_norm": 0.19366858899593353, "learning_rate": 8e-05, "loss": 1.5272, "step": 2648 }, { "epoch": 0.3615395113962058, "grad_norm": 0.21619334816932678, "learning_rate": 8e-05, "loss": 1.5359, "step": 2649 }, { "epoch": 0.36167599290296165, "grad_norm": 0.1992226541042328, "learning_rate": 8e-05, "loss": 1.5773, "step": 2650 }, { "epoch": 0.3618124744097175, "grad_norm": 0.21054105460643768, "learning_rate": 8e-05, "loss": 1.4579, "step": 2651 }, { "epoch": 0.3619489559164733, "grad_norm": 0.19776053726673126, "learning_rate": 8e-05, "loss": 1.5641, "step": 2652 }, { "epoch": 0.36208543742322913, "grad_norm": 0.1988450139760971, "learning_rate": 8e-05, "loss": 1.5263, "step": 2653 }, { "epoch": 0.362221918929985, "grad_norm": 0.19162483513355255, "learning_rate": 8e-05, "loss": 1.522, "step": 2654 }, { "epoch": 0.36235840043674084, "grad_norm": 0.1950751692056656, "learning_rate": 8e-05, "loss": 1.5236, "step": 2655 }, { "epoch": 0.36249488194349666, "grad_norm": 0.1985386461019516, "learning_rate": 8e-05, "loss": 1.5067, "step": 2656 }, { "epoch": 0.3626313634502525, "grad_norm": 0.20346039533615112, "learning_rate": 8e-05, "loss": 1.4686, "step": 2657 }, { "epoch": 0.3627678449570083, "grad_norm": 0.19609668850898743, "learning_rate": 8e-05, "loss": 1.5484, "step": 2658 }, { "epoch": 0.36290432646376414, "grad_norm": 0.19500140845775604, "learning_rate": 8e-05, "loss": 1.5695, "step": 2659 }, { "epoch": 0.36304080797052, "grad_norm": 0.1938134878873825, "learning_rate": 8e-05, "loss": 1.5155, "step": 2660 }, { "epoch": 0.36317728947727584, "grad_norm": 0.19112981855869293, "learning_rate": 8e-05, "loss": 1.5469, "step": 2661 }, { "epoch": 0.36331377098403167, "grad_norm": 0.18887364864349365, "learning_rate": 8e-05, "loss": 1.4811, "step": 2662 }, { "epoch": 0.3634502524907875, "grad_norm": 0.20011357963085175, "learning_rate": 8e-05, "loss": 1.5197, "step": 2663 }, { "epoch": 0.3635867339975433, "grad_norm": 0.18942269682884216, "learning_rate": 8e-05, "loss": 1.4658, "step": 2664 }, { "epoch": 0.36372321550429915, "grad_norm": 0.19741171598434448, "learning_rate": 8e-05, "loss": 1.5095, "step": 2665 }, { "epoch": 0.363859697011055, "grad_norm": 0.1949249655008316, "learning_rate": 8e-05, "loss": 1.496, "step": 2666 }, { "epoch": 0.36399617851781085, "grad_norm": 0.1998748481273651, "learning_rate": 8e-05, "loss": 1.5745, "step": 2667 }, { "epoch": 0.3641326600245667, "grad_norm": 0.19141504168510437, "learning_rate": 8e-05, "loss": 1.5389, "step": 2668 }, { "epoch": 0.3642691415313225, "grad_norm": 0.19324477016925812, "learning_rate": 8e-05, "loss": 1.5638, "step": 2669 }, { "epoch": 0.36440562303807833, "grad_norm": 0.1996828317642212, "learning_rate": 8e-05, "loss": 1.5698, "step": 2670 }, { "epoch": 0.36454210454483416, "grad_norm": 0.195877805352211, "learning_rate": 8e-05, "loss": 1.5823, "step": 2671 }, { "epoch": 0.36467858605159004, "grad_norm": 0.19857576489448547, "learning_rate": 8e-05, "loss": 1.5508, "step": 2672 }, { "epoch": 0.36481506755834586, "grad_norm": 0.2116616815328598, "learning_rate": 8e-05, "loss": 1.5047, "step": 2673 }, { "epoch": 0.3649515490651017, "grad_norm": 0.20244544744491577, "learning_rate": 8e-05, "loss": 1.6045, "step": 2674 }, { "epoch": 0.3650880305718575, "grad_norm": 0.19361169636249542, "learning_rate": 8e-05, "loss": 1.586, "step": 2675 }, { "epoch": 0.36522451207861334, "grad_norm": 0.19096067547798157, "learning_rate": 8e-05, "loss": 1.5076, "step": 2676 }, { "epoch": 0.36536099358536916, "grad_norm": 0.20173348486423492, "learning_rate": 8e-05, "loss": 1.526, "step": 2677 }, { "epoch": 0.365497475092125, "grad_norm": 0.19586752355098724, "learning_rate": 8e-05, "loss": 1.4857, "step": 2678 }, { "epoch": 0.36563395659888087, "grad_norm": 0.23996679484844208, "learning_rate": 8e-05, "loss": 1.537, "step": 2679 }, { "epoch": 0.3657704381056367, "grad_norm": 0.1985531747341156, "learning_rate": 8e-05, "loss": 1.4903, "step": 2680 }, { "epoch": 0.3659069196123925, "grad_norm": 0.19754116237163544, "learning_rate": 8e-05, "loss": 1.5281, "step": 2681 }, { "epoch": 0.36604340111914835, "grad_norm": 0.20095433294773102, "learning_rate": 8e-05, "loss": 1.5987, "step": 2682 }, { "epoch": 0.3661798826259042, "grad_norm": 0.19450584053993225, "learning_rate": 8e-05, "loss": 1.498, "step": 2683 }, { "epoch": 0.36631636413266, "grad_norm": 0.19317109882831573, "learning_rate": 8e-05, "loss": 1.4674, "step": 2684 }, { "epoch": 0.3664528456394159, "grad_norm": 0.20266908407211304, "learning_rate": 8e-05, "loss": 1.5864, "step": 2685 }, { "epoch": 0.3665893271461717, "grad_norm": 0.20954738557338715, "learning_rate": 8e-05, "loss": 1.6096, "step": 2686 }, { "epoch": 0.36672580865292753, "grad_norm": 0.19602203369140625, "learning_rate": 8e-05, "loss": 1.5178, "step": 2687 }, { "epoch": 0.36686229015968336, "grad_norm": 0.18985100090503693, "learning_rate": 8e-05, "loss": 1.5467, "step": 2688 }, { "epoch": 0.3669987716664392, "grad_norm": 0.2018248736858368, "learning_rate": 8e-05, "loss": 1.5563, "step": 2689 }, { "epoch": 0.367135253173195, "grad_norm": 0.19734995067119598, "learning_rate": 8e-05, "loss": 1.5446, "step": 2690 }, { "epoch": 0.3672717346799509, "grad_norm": 0.19626644253730774, "learning_rate": 8e-05, "loss": 1.5736, "step": 2691 }, { "epoch": 0.3674082161867067, "grad_norm": 0.2009628862142563, "learning_rate": 8e-05, "loss": 1.5271, "step": 2692 }, { "epoch": 0.36754469769346254, "grad_norm": 0.1883717179298401, "learning_rate": 8e-05, "loss": 1.4464, "step": 2693 }, { "epoch": 0.36768117920021837, "grad_norm": 0.1963186413049698, "learning_rate": 8e-05, "loss": 1.5605, "step": 2694 }, { "epoch": 0.3678176607069742, "grad_norm": 0.1977759301662445, "learning_rate": 8e-05, "loss": 1.5988, "step": 2695 }, { "epoch": 0.36795414221373, "grad_norm": 0.1960255205631256, "learning_rate": 8e-05, "loss": 1.5898, "step": 2696 }, { "epoch": 0.3680906237204859, "grad_norm": 0.19921021163463593, "learning_rate": 8e-05, "loss": 1.5368, "step": 2697 }, { "epoch": 0.3682271052272417, "grad_norm": 0.19218194484710693, "learning_rate": 8e-05, "loss": 1.5088, "step": 2698 }, { "epoch": 0.36836358673399755, "grad_norm": 0.19653165340423584, "learning_rate": 8e-05, "loss": 1.5464, "step": 2699 }, { "epoch": 0.3685000682407534, "grad_norm": 0.19092877209186554, "learning_rate": 8e-05, "loss": 1.5098, "step": 2700 }, { "epoch": 0.3686365497475092, "grad_norm": 0.19032913446426392, "learning_rate": 8e-05, "loss": 1.5319, "step": 2701 }, { "epoch": 0.368773031254265, "grad_norm": 0.19808073341846466, "learning_rate": 8e-05, "loss": 1.5306, "step": 2702 }, { "epoch": 0.3689095127610209, "grad_norm": 0.1957745999097824, "learning_rate": 8e-05, "loss": 1.5417, "step": 2703 }, { "epoch": 0.36904599426777673, "grad_norm": 0.19573797285556793, "learning_rate": 8e-05, "loss": 1.5048, "step": 2704 }, { "epoch": 0.36918247577453256, "grad_norm": 0.19019901752471924, "learning_rate": 8e-05, "loss": 1.5143, "step": 2705 }, { "epoch": 0.3693189572812884, "grad_norm": 0.19325849413871765, "learning_rate": 8e-05, "loss": 1.5215, "step": 2706 }, { "epoch": 0.3694554387880442, "grad_norm": 0.19284597039222717, "learning_rate": 8e-05, "loss": 1.54, "step": 2707 }, { "epoch": 0.36959192029480004, "grad_norm": 0.20323938131332397, "learning_rate": 8e-05, "loss": 1.5677, "step": 2708 }, { "epoch": 0.36972840180155586, "grad_norm": 0.20294398069381714, "learning_rate": 8e-05, "loss": 1.5375, "step": 2709 }, { "epoch": 0.36986488330831174, "grad_norm": 0.19054478406906128, "learning_rate": 8e-05, "loss": 1.4954, "step": 2710 }, { "epoch": 0.37000136481506757, "grad_norm": 0.194930300116539, "learning_rate": 8e-05, "loss": 1.4625, "step": 2711 }, { "epoch": 0.3701378463218234, "grad_norm": 0.1962171345949173, "learning_rate": 8e-05, "loss": 1.5472, "step": 2712 }, { "epoch": 0.3702743278285792, "grad_norm": 0.1992320865392685, "learning_rate": 8e-05, "loss": 1.5354, "step": 2713 }, { "epoch": 0.37041080933533505, "grad_norm": 0.19754379987716675, "learning_rate": 8e-05, "loss": 1.5668, "step": 2714 }, { "epoch": 0.37054729084209087, "grad_norm": 0.19990310072898865, "learning_rate": 8e-05, "loss": 1.5396, "step": 2715 }, { "epoch": 0.37068377234884675, "grad_norm": 0.20341627299785614, "learning_rate": 8e-05, "loss": 1.5973, "step": 2716 }, { "epoch": 0.3708202538556026, "grad_norm": 0.19848500192165375, "learning_rate": 8e-05, "loss": 1.4709, "step": 2717 }, { "epoch": 0.3709567353623584, "grad_norm": 0.192295104265213, "learning_rate": 8e-05, "loss": 1.5477, "step": 2718 }, { "epoch": 0.37109321686911423, "grad_norm": 0.19476161897182465, "learning_rate": 8e-05, "loss": 1.4509, "step": 2719 }, { "epoch": 0.37122969837587005, "grad_norm": 0.19318239390850067, "learning_rate": 8e-05, "loss": 1.5363, "step": 2720 }, { "epoch": 0.3713661798826259, "grad_norm": 0.19200479984283447, "learning_rate": 8e-05, "loss": 1.5177, "step": 2721 }, { "epoch": 0.37150266138938176, "grad_norm": 0.19040045142173767, "learning_rate": 8e-05, "loss": 1.5039, "step": 2722 }, { "epoch": 0.3716391428961376, "grad_norm": 0.19674921035766602, "learning_rate": 8e-05, "loss": 1.5137, "step": 2723 }, { "epoch": 0.3717756244028934, "grad_norm": 0.1906231790781021, "learning_rate": 8e-05, "loss": 1.509, "step": 2724 }, { "epoch": 0.37191210590964924, "grad_norm": 0.19204051792621613, "learning_rate": 8e-05, "loss": 1.515, "step": 2725 }, { "epoch": 0.37204858741640506, "grad_norm": 0.19326713681221008, "learning_rate": 8e-05, "loss": 1.5186, "step": 2726 }, { "epoch": 0.3721850689231609, "grad_norm": 0.1923893839120865, "learning_rate": 8e-05, "loss": 1.4785, "step": 2727 }, { "epoch": 0.37232155042991677, "grad_norm": 0.19592903554439545, "learning_rate": 8e-05, "loss": 1.59, "step": 2728 }, { "epoch": 0.3724580319366726, "grad_norm": 0.19696220755577087, "learning_rate": 8e-05, "loss": 1.4988, "step": 2729 }, { "epoch": 0.3725945134434284, "grad_norm": 0.19765207171440125, "learning_rate": 8e-05, "loss": 1.4931, "step": 2730 }, { "epoch": 0.37273099495018425, "grad_norm": 0.19209659099578857, "learning_rate": 8e-05, "loss": 1.5018, "step": 2731 }, { "epoch": 0.3728674764569401, "grad_norm": 0.1987694799900055, "learning_rate": 8e-05, "loss": 1.5979, "step": 2732 }, { "epoch": 0.3730039579636959, "grad_norm": 0.20281600952148438, "learning_rate": 8e-05, "loss": 1.578, "step": 2733 }, { "epoch": 0.3731404394704518, "grad_norm": 0.2019796520471573, "learning_rate": 8e-05, "loss": 1.607, "step": 2734 }, { "epoch": 0.3732769209772076, "grad_norm": 0.19378289580345154, "learning_rate": 8e-05, "loss": 1.5195, "step": 2735 }, { "epoch": 0.37341340248396343, "grad_norm": 0.20103982090950012, "learning_rate": 8e-05, "loss": 1.4692, "step": 2736 }, { "epoch": 0.37354988399071926, "grad_norm": 0.1952180415391922, "learning_rate": 8e-05, "loss": 1.54, "step": 2737 }, { "epoch": 0.3736863654974751, "grad_norm": 0.19639243185520172, "learning_rate": 8e-05, "loss": 1.5822, "step": 2738 }, { "epoch": 0.3738228470042309, "grad_norm": 0.18987591564655304, "learning_rate": 8e-05, "loss": 1.4134, "step": 2739 }, { "epoch": 0.3739593285109868, "grad_norm": 0.20650343596935272, "learning_rate": 8e-05, "loss": 1.6275, "step": 2740 }, { "epoch": 0.3740958100177426, "grad_norm": 0.19434286653995514, "learning_rate": 8e-05, "loss": 1.5034, "step": 2741 }, { "epoch": 0.37423229152449844, "grad_norm": 0.20050287246704102, "learning_rate": 8e-05, "loss": 1.5615, "step": 2742 }, { "epoch": 0.37436877303125426, "grad_norm": 0.19849415123462677, "learning_rate": 8e-05, "loss": 1.5767, "step": 2743 }, { "epoch": 0.3745052545380101, "grad_norm": 0.18816018104553223, "learning_rate": 8e-05, "loss": 1.5101, "step": 2744 }, { "epoch": 0.3746417360447659, "grad_norm": 0.19758187234401703, "learning_rate": 8e-05, "loss": 1.5462, "step": 2745 }, { "epoch": 0.37477821755152174, "grad_norm": 0.1868588775396347, "learning_rate": 8e-05, "loss": 1.461, "step": 2746 }, { "epoch": 0.3749146990582776, "grad_norm": 0.18856370449066162, "learning_rate": 8e-05, "loss": 1.3578, "step": 2747 }, { "epoch": 0.37505118056503345, "grad_norm": 0.1910911649465561, "learning_rate": 8e-05, "loss": 1.506, "step": 2748 }, { "epoch": 0.3751876620717893, "grad_norm": 0.19655504822731018, "learning_rate": 8e-05, "loss": 1.5808, "step": 2749 }, { "epoch": 0.3753241435785451, "grad_norm": 0.196238175034523, "learning_rate": 8e-05, "loss": 1.5747, "step": 2750 }, { "epoch": 0.3754606250853009, "grad_norm": 0.20389612019062042, "learning_rate": 8e-05, "loss": 1.5365, "step": 2751 }, { "epoch": 0.37559710659205675, "grad_norm": 0.1911252737045288, "learning_rate": 8e-05, "loss": 1.5214, "step": 2752 }, { "epoch": 0.37573358809881263, "grad_norm": 0.18786859512329102, "learning_rate": 8e-05, "loss": 1.4832, "step": 2753 }, { "epoch": 0.37587006960556846, "grad_norm": 0.1957067847251892, "learning_rate": 8e-05, "loss": 1.4955, "step": 2754 }, { "epoch": 0.3760065511123243, "grad_norm": 0.19290585815906525, "learning_rate": 8e-05, "loss": 1.5131, "step": 2755 }, { "epoch": 0.3761430326190801, "grad_norm": 0.19869758188724518, "learning_rate": 8e-05, "loss": 1.5123, "step": 2756 }, { "epoch": 0.37627951412583593, "grad_norm": 0.19285526871681213, "learning_rate": 8e-05, "loss": 1.5992, "step": 2757 }, { "epoch": 0.37641599563259176, "grad_norm": 0.1974519044160843, "learning_rate": 8e-05, "loss": 1.5903, "step": 2758 }, { "epoch": 0.37655247713934764, "grad_norm": 0.2015846073627472, "learning_rate": 8e-05, "loss": 1.5061, "step": 2759 }, { "epoch": 0.37668895864610347, "grad_norm": 0.21167241036891937, "learning_rate": 8e-05, "loss": 1.5513, "step": 2760 }, { "epoch": 0.3768254401528593, "grad_norm": 0.1957661211490631, "learning_rate": 8e-05, "loss": 1.589, "step": 2761 }, { "epoch": 0.3769619216596151, "grad_norm": 0.20745979249477386, "learning_rate": 8e-05, "loss": 1.6164, "step": 2762 }, { "epoch": 0.37709840316637094, "grad_norm": 0.20360028743743896, "learning_rate": 8e-05, "loss": 1.5006, "step": 2763 }, { "epoch": 0.37723488467312677, "grad_norm": 0.2007840871810913, "learning_rate": 8e-05, "loss": 1.6025, "step": 2764 }, { "epoch": 0.37737136617988265, "grad_norm": 0.197735995054245, "learning_rate": 8e-05, "loss": 1.4611, "step": 2765 }, { "epoch": 0.3775078476866385, "grad_norm": 0.20469868183135986, "learning_rate": 8e-05, "loss": 1.5672, "step": 2766 }, { "epoch": 0.3776443291933943, "grad_norm": 0.19251105189323425, "learning_rate": 8e-05, "loss": 1.483, "step": 2767 }, { "epoch": 0.3777808107001501, "grad_norm": 0.19543355703353882, "learning_rate": 8e-05, "loss": 1.5771, "step": 2768 }, { "epoch": 0.37791729220690595, "grad_norm": 0.20735904574394226, "learning_rate": 8e-05, "loss": 1.5034, "step": 2769 }, { "epoch": 0.3780537737136618, "grad_norm": 0.19461093842983246, "learning_rate": 8e-05, "loss": 1.5222, "step": 2770 }, { "epoch": 0.37819025522041766, "grad_norm": 0.2013184279203415, "learning_rate": 8e-05, "loss": 1.5441, "step": 2771 }, { "epoch": 0.3783267367271735, "grad_norm": 0.19306708872318268, "learning_rate": 8e-05, "loss": 1.4824, "step": 2772 }, { "epoch": 0.3784632182339293, "grad_norm": 0.20058812201023102, "learning_rate": 8e-05, "loss": 1.5536, "step": 2773 }, { "epoch": 0.37859969974068514, "grad_norm": 0.19264377653598785, "learning_rate": 8e-05, "loss": 1.5083, "step": 2774 }, { "epoch": 0.37873618124744096, "grad_norm": 0.1952289491891861, "learning_rate": 8e-05, "loss": 1.5439, "step": 2775 }, { "epoch": 0.3788726627541968, "grad_norm": 0.19632276892662048, "learning_rate": 8e-05, "loss": 1.5479, "step": 2776 }, { "epoch": 0.37900914426095267, "grad_norm": 0.18710362911224365, "learning_rate": 8e-05, "loss": 1.5114, "step": 2777 }, { "epoch": 0.3791456257677085, "grad_norm": 0.19573870301246643, "learning_rate": 8e-05, "loss": 1.5084, "step": 2778 }, { "epoch": 0.3792821072744643, "grad_norm": 0.19529375433921814, "learning_rate": 8e-05, "loss": 1.5277, "step": 2779 }, { "epoch": 0.37941858878122015, "grad_norm": 0.19804953038692474, "learning_rate": 8e-05, "loss": 1.4959, "step": 2780 }, { "epoch": 0.37955507028797597, "grad_norm": 0.19311289489269257, "learning_rate": 8e-05, "loss": 1.5119, "step": 2781 }, { "epoch": 0.3796915517947318, "grad_norm": 0.1914122998714447, "learning_rate": 8e-05, "loss": 1.4941, "step": 2782 }, { "epoch": 0.3798280333014876, "grad_norm": 0.20707741379737854, "learning_rate": 8e-05, "loss": 1.6479, "step": 2783 }, { "epoch": 0.3799645148082435, "grad_norm": 0.19574078917503357, "learning_rate": 8e-05, "loss": 1.4836, "step": 2784 }, { "epoch": 0.38010099631499933, "grad_norm": 0.19648070633411407, "learning_rate": 8e-05, "loss": 1.5115, "step": 2785 }, { "epoch": 0.38023747782175515, "grad_norm": 0.19596077501773834, "learning_rate": 8e-05, "loss": 1.5651, "step": 2786 }, { "epoch": 0.380373959328511, "grad_norm": 0.19718605279922485, "learning_rate": 8e-05, "loss": 1.5551, "step": 2787 }, { "epoch": 0.3805104408352668, "grad_norm": 0.20196565985679626, "learning_rate": 8e-05, "loss": 1.5213, "step": 2788 }, { "epoch": 0.38064692234202263, "grad_norm": 0.19631926715373993, "learning_rate": 8e-05, "loss": 1.4902, "step": 2789 }, { "epoch": 0.3807834038487785, "grad_norm": 0.19461777806282043, "learning_rate": 8e-05, "loss": 1.5685, "step": 2790 }, { "epoch": 0.38091988535553434, "grad_norm": 0.19680659472942352, "learning_rate": 8e-05, "loss": 1.5474, "step": 2791 }, { "epoch": 0.38105636686229016, "grad_norm": 0.19592802226543427, "learning_rate": 8e-05, "loss": 1.5272, "step": 2792 }, { "epoch": 0.381192848369046, "grad_norm": 0.20139016211032867, "learning_rate": 8e-05, "loss": 1.5193, "step": 2793 }, { "epoch": 0.3813293298758018, "grad_norm": 0.20303493738174438, "learning_rate": 8e-05, "loss": 1.5786, "step": 2794 }, { "epoch": 0.38146581138255764, "grad_norm": 0.19889698922634125, "learning_rate": 8e-05, "loss": 1.5135, "step": 2795 }, { "epoch": 0.3816022928893135, "grad_norm": 0.19793474674224854, "learning_rate": 8e-05, "loss": 1.5852, "step": 2796 }, { "epoch": 0.38173877439606935, "grad_norm": 0.19721032679080963, "learning_rate": 8e-05, "loss": 1.5416, "step": 2797 }, { "epoch": 0.3818752559028252, "grad_norm": 0.1949787586927414, "learning_rate": 8e-05, "loss": 1.4854, "step": 2798 }, { "epoch": 0.382011737409581, "grad_norm": 0.20098304748535156, "learning_rate": 8e-05, "loss": 1.5066, "step": 2799 }, { "epoch": 0.3821482189163368, "grad_norm": 0.1979992389678955, "learning_rate": 8e-05, "loss": 1.5669, "step": 2800 }, { "epoch": 0.38228470042309265, "grad_norm": 0.19819101691246033, "learning_rate": 8e-05, "loss": 1.5275, "step": 2801 }, { "epoch": 0.38242118192984853, "grad_norm": 0.19251833856105804, "learning_rate": 8e-05, "loss": 1.4805, "step": 2802 }, { "epoch": 0.38255766343660436, "grad_norm": 0.19863919913768768, "learning_rate": 8e-05, "loss": 1.4531, "step": 2803 }, { "epoch": 0.3826941449433602, "grad_norm": 0.19388936460018158, "learning_rate": 8e-05, "loss": 1.4497, "step": 2804 }, { "epoch": 0.382830626450116, "grad_norm": 0.19460538029670715, "learning_rate": 8e-05, "loss": 1.4978, "step": 2805 }, { "epoch": 0.38296710795687183, "grad_norm": 0.1990063637495041, "learning_rate": 8e-05, "loss": 1.5652, "step": 2806 }, { "epoch": 0.38310358946362766, "grad_norm": 0.19004380702972412, "learning_rate": 8e-05, "loss": 1.4543, "step": 2807 }, { "epoch": 0.38324007097038354, "grad_norm": 0.20413538813591003, "learning_rate": 8e-05, "loss": 1.4504, "step": 2808 }, { "epoch": 0.38337655247713937, "grad_norm": 0.1971040517091751, "learning_rate": 8e-05, "loss": 1.517, "step": 2809 }, { "epoch": 0.3835130339838952, "grad_norm": 0.20226643979549408, "learning_rate": 8e-05, "loss": 1.5559, "step": 2810 }, { "epoch": 0.383649515490651, "grad_norm": 0.1960776299238205, "learning_rate": 8e-05, "loss": 1.5131, "step": 2811 }, { "epoch": 0.38378599699740684, "grad_norm": 0.20299096405506134, "learning_rate": 8e-05, "loss": 1.5796, "step": 2812 }, { "epoch": 0.38392247850416267, "grad_norm": 0.19002120196819305, "learning_rate": 8e-05, "loss": 1.5198, "step": 2813 }, { "epoch": 0.3840589600109185, "grad_norm": 0.1999003142118454, "learning_rate": 8e-05, "loss": 1.5724, "step": 2814 }, { "epoch": 0.3841954415176744, "grad_norm": 0.19764094054698944, "learning_rate": 8e-05, "loss": 1.5634, "step": 2815 }, { "epoch": 0.3843319230244302, "grad_norm": 0.19543305039405823, "learning_rate": 8e-05, "loss": 1.5514, "step": 2816 }, { "epoch": 0.384468404531186, "grad_norm": 0.19510267674922943, "learning_rate": 8e-05, "loss": 1.5052, "step": 2817 }, { "epoch": 0.38460488603794185, "grad_norm": 0.19557586312294006, "learning_rate": 8e-05, "loss": 1.5178, "step": 2818 }, { "epoch": 0.3847413675446977, "grad_norm": 0.19730573892593384, "learning_rate": 8e-05, "loss": 1.5469, "step": 2819 }, { "epoch": 0.3848778490514535, "grad_norm": 0.19240061938762665, "learning_rate": 8e-05, "loss": 1.4631, "step": 2820 }, { "epoch": 0.3850143305582094, "grad_norm": 0.19733519852161407, "learning_rate": 8e-05, "loss": 1.5521, "step": 2821 }, { "epoch": 0.3851508120649652, "grad_norm": 0.19816966354846954, "learning_rate": 8e-05, "loss": 1.5645, "step": 2822 }, { "epoch": 0.38528729357172103, "grad_norm": 0.20153246819972992, "learning_rate": 8e-05, "loss": 1.4874, "step": 2823 }, { "epoch": 0.38542377507847686, "grad_norm": 0.19149908423423767, "learning_rate": 8e-05, "loss": 1.4352, "step": 2824 }, { "epoch": 0.3855602565852327, "grad_norm": 0.19896867871284485, "learning_rate": 8e-05, "loss": 1.582, "step": 2825 }, { "epoch": 0.3856967380919885, "grad_norm": 0.1990615278482437, "learning_rate": 8e-05, "loss": 1.6214, "step": 2826 }, { "epoch": 0.3858332195987444, "grad_norm": 0.19365647435188293, "learning_rate": 8e-05, "loss": 1.5248, "step": 2827 }, { "epoch": 0.3859697011055002, "grad_norm": 0.20246310532093048, "learning_rate": 8e-05, "loss": 1.574, "step": 2828 }, { "epoch": 0.38610618261225604, "grad_norm": 0.18818911910057068, "learning_rate": 8e-05, "loss": 1.4901, "step": 2829 }, { "epoch": 0.38624266411901187, "grad_norm": 0.19576428830623627, "learning_rate": 8e-05, "loss": 1.5346, "step": 2830 }, { "epoch": 0.3863791456257677, "grad_norm": 0.1970338076353073, "learning_rate": 8e-05, "loss": 1.517, "step": 2831 }, { "epoch": 0.3865156271325235, "grad_norm": 0.19619084894657135, "learning_rate": 8e-05, "loss": 1.5264, "step": 2832 }, { "epoch": 0.3866521086392794, "grad_norm": 0.1897464543581009, "learning_rate": 8e-05, "loss": 1.4905, "step": 2833 }, { "epoch": 0.3867885901460352, "grad_norm": 0.20004330575466156, "learning_rate": 8e-05, "loss": 1.5773, "step": 2834 }, { "epoch": 0.38692507165279105, "grad_norm": 0.18595285713672638, "learning_rate": 8e-05, "loss": 1.4592, "step": 2835 }, { "epoch": 0.3870615531595469, "grad_norm": 0.1970224678516388, "learning_rate": 8e-05, "loss": 1.6325, "step": 2836 }, { "epoch": 0.3871980346663027, "grad_norm": 0.20294629037380219, "learning_rate": 8e-05, "loss": 1.5799, "step": 2837 }, { "epoch": 0.38733451617305853, "grad_norm": 0.193478062748909, "learning_rate": 8e-05, "loss": 1.4498, "step": 2838 }, { "epoch": 0.3874709976798144, "grad_norm": 0.20147845149040222, "learning_rate": 8e-05, "loss": 1.6247, "step": 2839 }, { "epoch": 0.38760747918657024, "grad_norm": 0.1943940818309784, "learning_rate": 8e-05, "loss": 1.477, "step": 2840 }, { "epoch": 0.38774396069332606, "grad_norm": 0.20720873773097992, "learning_rate": 8e-05, "loss": 1.5658, "step": 2841 }, { "epoch": 0.3878804422000819, "grad_norm": 0.19790019094944, "learning_rate": 8e-05, "loss": 1.5243, "step": 2842 }, { "epoch": 0.3880169237068377, "grad_norm": 0.1980382651090622, "learning_rate": 8e-05, "loss": 1.5056, "step": 2843 }, { "epoch": 0.38815340521359354, "grad_norm": 0.20592498779296875, "learning_rate": 8e-05, "loss": 1.5555, "step": 2844 }, { "epoch": 0.3882898867203494, "grad_norm": 0.20602920651435852, "learning_rate": 8e-05, "loss": 1.505, "step": 2845 }, { "epoch": 0.38842636822710525, "grad_norm": 0.19017846882343292, "learning_rate": 8e-05, "loss": 1.4984, "step": 2846 }, { "epoch": 0.38856284973386107, "grad_norm": 0.202789768576622, "learning_rate": 8e-05, "loss": 1.5236, "step": 2847 }, { "epoch": 0.3886993312406169, "grad_norm": 0.1992550641298294, "learning_rate": 8e-05, "loss": 1.4842, "step": 2848 }, { "epoch": 0.3888358127473727, "grad_norm": 0.19333533942699432, "learning_rate": 8e-05, "loss": 1.4712, "step": 2849 }, { "epoch": 0.38897229425412855, "grad_norm": 0.2050512284040451, "learning_rate": 8e-05, "loss": 1.5065, "step": 2850 }, { "epoch": 0.3891087757608844, "grad_norm": 0.20399445295333862, "learning_rate": 8e-05, "loss": 1.5624, "step": 2851 }, { "epoch": 0.38924525726764025, "grad_norm": 0.1994365155696869, "learning_rate": 8e-05, "loss": 1.5203, "step": 2852 }, { "epoch": 0.3893817387743961, "grad_norm": 0.1944725066423416, "learning_rate": 8e-05, "loss": 1.5247, "step": 2853 }, { "epoch": 0.3895182202811519, "grad_norm": 0.20120403170585632, "learning_rate": 8e-05, "loss": 1.565, "step": 2854 }, { "epoch": 0.38965470178790773, "grad_norm": 0.19254420697689056, "learning_rate": 8e-05, "loss": 1.5038, "step": 2855 }, { "epoch": 0.38979118329466356, "grad_norm": 0.19902515411376953, "learning_rate": 8e-05, "loss": 1.514, "step": 2856 }, { "epoch": 0.3899276648014194, "grad_norm": 0.2014210969209671, "learning_rate": 8e-05, "loss": 1.5368, "step": 2857 }, { "epoch": 0.39006414630817526, "grad_norm": 0.19931615889072418, "learning_rate": 8e-05, "loss": 1.5179, "step": 2858 }, { "epoch": 0.3902006278149311, "grad_norm": 0.19515490531921387, "learning_rate": 8e-05, "loss": 1.5191, "step": 2859 }, { "epoch": 0.3903371093216869, "grad_norm": 0.217337965965271, "learning_rate": 8e-05, "loss": 1.5818, "step": 2860 }, { "epoch": 0.39047359082844274, "grad_norm": 0.19456127285957336, "learning_rate": 8e-05, "loss": 1.5288, "step": 2861 }, { "epoch": 0.39061007233519857, "grad_norm": 0.19887696206569672, "learning_rate": 8e-05, "loss": 1.577, "step": 2862 }, { "epoch": 0.3907465538419544, "grad_norm": 0.19483307003974915, "learning_rate": 8e-05, "loss": 1.4973, "step": 2863 }, { "epoch": 0.3908830353487103, "grad_norm": 0.19631604850292206, "learning_rate": 8e-05, "loss": 1.4889, "step": 2864 }, { "epoch": 0.3910195168554661, "grad_norm": 0.19995272159576416, "learning_rate": 8e-05, "loss": 1.5028, "step": 2865 }, { "epoch": 0.3911559983622219, "grad_norm": 0.19609548151493073, "learning_rate": 8e-05, "loss": 1.4999, "step": 2866 }, { "epoch": 0.39129247986897775, "grad_norm": 0.19483570754528046, "learning_rate": 8e-05, "loss": 1.4794, "step": 2867 }, { "epoch": 0.3914289613757336, "grad_norm": 0.20292863249778748, "learning_rate": 8e-05, "loss": 1.5696, "step": 2868 }, { "epoch": 0.3915654428824894, "grad_norm": 0.2035442590713501, "learning_rate": 8e-05, "loss": 1.5346, "step": 2869 }, { "epoch": 0.3917019243892453, "grad_norm": 0.19722416996955872, "learning_rate": 8e-05, "loss": 1.5133, "step": 2870 }, { "epoch": 0.3918384058960011, "grad_norm": 0.1925741732120514, "learning_rate": 8e-05, "loss": 1.4329, "step": 2871 }, { "epoch": 0.39197488740275693, "grad_norm": 0.1932852864265442, "learning_rate": 8e-05, "loss": 1.5362, "step": 2872 }, { "epoch": 0.39211136890951276, "grad_norm": 0.21710823476314545, "learning_rate": 8e-05, "loss": 1.6275, "step": 2873 }, { "epoch": 0.3922478504162686, "grad_norm": 0.19802843034267426, "learning_rate": 8e-05, "loss": 1.5295, "step": 2874 }, { "epoch": 0.3923843319230244, "grad_norm": 0.19122307002544403, "learning_rate": 8e-05, "loss": 1.4646, "step": 2875 }, { "epoch": 0.3925208134297803, "grad_norm": 0.1979711800813675, "learning_rate": 8e-05, "loss": 1.4536, "step": 2876 }, { "epoch": 0.3926572949365361, "grad_norm": 0.19692827761173248, "learning_rate": 8e-05, "loss": 1.4841, "step": 2877 }, { "epoch": 0.39279377644329194, "grad_norm": 0.2021845132112503, "learning_rate": 8e-05, "loss": 1.5376, "step": 2878 }, { "epoch": 0.39293025795004777, "grad_norm": 0.19492624700069427, "learning_rate": 8e-05, "loss": 1.5264, "step": 2879 }, { "epoch": 0.3930667394568036, "grad_norm": 0.1994514763355255, "learning_rate": 8e-05, "loss": 1.5504, "step": 2880 }, { "epoch": 0.3932032209635594, "grad_norm": 0.19496627151966095, "learning_rate": 8e-05, "loss": 1.5816, "step": 2881 }, { "epoch": 0.3933397024703153, "grad_norm": 0.19734826683998108, "learning_rate": 8e-05, "loss": 1.504, "step": 2882 }, { "epoch": 0.3934761839770711, "grad_norm": 0.200940802693367, "learning_rate": 8e-05, "loss": 1.5165, "step": 2883 }, { "epoch": 0.39361266548382695, "grad_norm": 0.1994246244430542, "learning_rate": 8e-05, "loss": 1.5162, "step": 2884 }, { "epoch": 0.3937491469905828, "grad_norm": 0.19973237812519073, "learning_rate": 8e-05, "loss": 1.5419, "step": 2885 }, { "epoch": 0.3938856284973386, "grad_norm": 0.20020098984241486, "learning_rate": 8e-05, "loss": 1.4667, "step": 2886 }, { "epoch": 0.39402211000409443, "grad_norm": 0.20032110810279846, "learning_rate": 8e-05, "loss": 1.5067, "step": 2887 }, { "epoch": 0.39415859151085025, "grad_norm": 0.197679340839386, "learning_rate": 8e-05, "loss": 1.577, "step": 2888 }, { "epoch": 0.39429507301760613, "grad_norm": 0.19777023792266846, "learning_rate": 8e-05, "loss": 1.5246, "step": 2889 }, { "epoch": 0.39443155452436196, "grad_norm": 0.18900208175182343, "learning_rate": 8e-05, "loss": 1.4818, "step": 2890 }, { "epoch": 0.3945680360311178, "grad_norm": 0.20367109775543213, "learning_rate": 8e-05, "loss": 1.4749, "step": 2891 }, { "epoch": 0.3947045175378736, "grad_norm": 0.19022367894649506, "learning_rate": 8e-05, "loss": 1.4721, "step": 2892 }, { "epoch": 0.39484099904462944, "grad_norm": 0.1938842386007309, "learning_rate": 8e-05, "loss": 1.5289, "step": 2893 }, { "epoch": 0.39497748055138526, "grad_norm": 0.19535568356513977, "learning_rate": 8e-05, "loss": 1.5176, "step": 2894 }, { "epoch": 0.39511396205814114, "grad_norm": 0.19387255609035492, "learning_rate": 8e-05, "loss": 1.5186, "step": 2895 }, { "epoch": 0.39525044356489697, "grad_norm": 0.1997697800397873, "learning_rate": 8e-05, "loss": 1.4979, "step": 2896 }, { "epoch": 0.3953869250716528, "grad_norm": 0.19633899629116058, "learning_rate": 8e-05, "loss": 1.5448, "step": 2897 }, { "epoch": 0.3955234065784086, "grad_norm": 0.19447049498558044, "learning_rate": 8e-05, "loss": 1.4986, "step": 2898 }, { "epoch": 0.39565988808516445, "grad_norm": 0.19827061891555786, "learning_rate": 8e-05, "loss": 1.5021, "step": 2899 }, { "epoch": 0.39579636959192027, "grad_norm": 0.2008560746908188, "learning_rate": 8e-05, "loss": 1.5198, "step": 2900 }, { "epoch": 0.39593285109867615, "grad_norm": 0.20060548186302185, "learning_rate": 8e-05, "loss": 1.551, "step": 2901 }, { "epoch": 0.396069332605432, "grad_norm": 0.20607057213783264, "learning_rate": 8e-05, "loss": 1.5539, "step": 2902 }, { "epoch": 0.3962058141121878, "grad_norm": 0.19841592013835907, "learning_rate": 8e-05, "loss": 1.4703, "step": 2903 }, { "epoch": 0.39634229561894363, "grad_norm": 0.20533297955989838, "learning_rate": 8e-05, "loss": 1.6139, "step": 2904 }, { "epoch": 0.39647877712569946, "grad_norm": 0.19957555830478668, "learning_rate": 8e-05, "loss": 1.4912, "step": 2905 }, { "epoch": 0.3966152586324553, "grad_norm": 0.20700012147426605, "learning_rate": 8e-05, "loss": 1.5563, "step": 2906 }, { "epoch": 0.39675174013921116, "grad_norm": 0.1938125193119049, "learning_rate": 8e-05, "loss": 1.4911, "step": 2907 }, { "epoch": 0.396888221645967, "grad_norm": 0.1980900913476944, "learning_rate": 8e-05, "loss": 1.5737, "step": 2908 }, { "epoch": 0.3970247031527228, "grad_norm": 0.1976432502269745, "learning_rate": 8e-05, "loss": 1.5376, "step": 2909 }, { "epoch": 0.39716118465947864, "grad_norm": 0.19101516902446747, "learning_rate": 8e-05, "loss": 1.4588, "step": 2910 }, { "epoch": 0.39729766616623446, "grad_norm": 0.20482368767261505, "learning_rate": 8e-05, "loss": 1.595, "step": 2911 }, { "epoch": 0.3974341476729903, "grad_norm": 0.19892390072345734, "learning_rate": 8e-05, "loss": 1.5517, "step": 2912 }, { "epoch": 0.39757062917974617, "grad_norm": 0.1995740383863449, "learning_rate": 8e-05, "loss": 1.5065, "step": 2913 }, { "epoch": 0.397707110686502, "grad_norm": 0.20146548748016357, "learning_rate": 8e-05, "loss": 1.4846, "step": 2914 }, { "epoch": 0.3978435921932578, "grad_norm": 0.19357001781463623, "learning_rate": 8e-05, "loss": 1.4879, "step": 2915 }, { "epoch": 0.39798007370001365, "grad_norm": 0.19232109189033508, "learning_rate": 8e-05, "loss": 1.5136, "step": 2916 }, { "epoch": 0.3981165552067695, "grad_norm": 0.20586858689785004, "learning_rate": 8e-05, "loss": 1.5242, "step": 2917 }, { "epoch": 0.3982530367135253, "grad_norm": 0.2016007900238037, "learning_rate": 8e-05, "loss": 1.4635, "step": 2918 }, { "epoch": 0.3983895182202811, "grad_norm": 0.1968727707862854, "learning_rate": 8e-05, "loss": 1.505, "step": 2919 }, { "epoch": 0.398525999727037, "grad_norm": 0.1950712502002716, "learning_rate": 8e-05, "loss": 1.5726, "step": 2920 }, { "epoch": 0.39866248123379283, "grad_norm": 0.19620060920715332, "learning_rate": 8e-05, "loss": 1.5645, "step": 2921 }, { "epoch": 0.39879896274054866, "grad_norm": 0.19629189372062683, "learning_rate": 8e-05, "loss": 1.4995, "step": 2922 }, { "epoch": 0.3989354442473045, "grad_norm": 0.18881890177726746, "learning_rate": 8e-05, "loss": 1.5203, "step": 2923 }, { "epoch": 0.3990719257540603, "grad_norm": 0.2067987322807312, "learning_rate": 8e-05, "loss": 1.557, "step": 2924 }, { "epoch": 0.39920840726081613, "grad_norm": 0.20668795704841614, "learning_rate": 8e-05, "loss": 1.5181, "step": 2925 }, { "epoch": 0.399344888767572, "grad_norm": 0.1990017592906952, "learning_rate": 8e-05, "loss": 1.5297, "step": 2926 }, { "epoch": 0.39948137027432784, "grad_norm": 0.21066772937774658, "learning_rate": 8e-05, "loss": 1.4928, "step": 2927 }, { "epoch": 0.39961785178108367, "grad_norm": 0.2047015279531479, "learning_rate": 8e-05, "loss": 1.5481, "step": 2928 }, { "epoch": 0.3997543332878395, "grad_norm": 0.20325912535190582, "learning_rate": 8e-05, "loss": 1.5252, "step": 2929 }, { "epoch": 0.3998908147945953, "grad_norm": 0.19412583112716675, "learning_rate": 8e-05, "loss": 1.54, "step": 2930 }, { "epoch": 0.40002729630135114, "grad_norm": 0.18911629915237427, "learning_rate": 8e-05, "loss": 1.4648, "step": 2931 }, { "epoch": 0.400163777808107, "grad_norm": 0.19966891407966614, "learning_rate": 8e-05, "loss": 1.4965, "step": 2932 }, { "epoch": 0.40030025931486285, "grad_norm": 0.1976897418498993, "learning_rate": 8e-05, "loss": 1.5198, "step": 2933 }, { "epoch": 0.4004367408216187, "grad_norm": 0.20238204300403595, "learning_rate": 8e-05, "loss": 1.5553, "step": 2934 }, { "epoch": 0.4005732223283745, "grad_norm": 0.19850894808769226, "learning_rate": 8e-05, "loss": 1.5272, "step": 2935 }, { "epoch": 0.4007097038351303, "grad_norm": 0.19660159945487976, "learning_rate": 8e-05, "loss": 1.5494, "step": 2936 }, { "epoch": 0.40084618534188615, "grad_norm": 0.19738981127738953, "learning_rate": 8e-05, "loss": 1.5469, "step": 2937 }, { "epoch": 0.40098266684864203, "grad_norm": 0.20087642967700958, "learning_rate": 8e-05, "loss": 1.532, "step": 2938 }, { "epoch": 0.40111914835539786, "grad_norm": 0.2034512609243393, "learning_rate": 8e-05, "loss": 1.5646, "step": 2939 }, { "epoch": 0.4012556298621537, "grad_norm": 0.2034153938293457, "learning_rate": 8e-05, "loss": 1.5602, "step": 2940 }, { "epoch": 0.4013921113689095, "grad_norm": 0.2082194983959198, "learning_rate": 8e-05, "loss": 1.5926, "step": 2941 }, { "epoch": 0.40152859287566534, "grad_norm": 0.2005544900894165, "learning_rate": 8e-05, "loss": 1.5428, "step": 2942 }, { "epoch": 0.40166507438242116, "grad_norm": 0.1988706886768341, "learning_rate": 8e-05, "loss": 1.5231, "step": 2943 }, { "epoch": 0.40180155588917704, "grad_norm": 0.19264404475688934, "learning_rate": 8e-05, "loss": 1.479, "step": 2944 }, { "epoch": 0.40193803739593287, "grad_norm": 0.1900002509355545, "learning_rate": 8e-05, "loss": 1.4628, "step": 2945 }, { "epoch": 0.4020745189026887, "grad_norm": 0.19762036204338074, "learning_rate": 8e-05, "loss": 1.5035, "step": 2946 }, { "epoch": 0.4022110004094445, "grad_norm": 0.20186229050159454, "learning_rate": 8e-05, "loss": 1.5615, "step": 2947 }, { "epoch": 0.40234748191620034, "grad_norm": 0.19748632609844208, "learning_rate": 8e-05, "loss": 1.5256, "step": 2948 }, { "epoch": 0.40248396342295617, "grad_norm": 0.198756605386734, "learning_rate": 8e-05, "loss": 1.5257, "step": 2949 }, { "epoch": 0.40262044492971205, "grad_norm": 0.21259327232837677, "learning_rate": 8e-05, "loss": 1.5608, "step": 2950 }, { "epoch": 0.4027569264364679, "grad_norm": 0.2042684704065323, "learning_rate": 8e-05, "loss": 1.544, "step": 2951 }, { "epoch": 0.4028934079432237, "grad_norm": 0.2009604573249817, "learning_rate": 8e-05, "loss": 1.5201, "step": 2952 }, { "epoch": 0.40302988944997953, "grad_norm": 0.1993720531463623, "learning_rate": 8e-05, "loss": 1.4881, "step": 2953 }, { "epoch": 0.40316637095673535, "grad_norm": 0.20310159027576447, "learning_rate": 8e-05, "loss": 1.5998, "step": 2954 }, { "epoch": 0.4033028524634912, "grad_norm": 0.19351494312286377, "learning_rate": 8e-05, "loss": 1.501, "step": 2955 }, { "epoch": 0.403439333970247, "grad_norm": 0.20704621076583862, "learning_rate": 8e-05, "loss": 1.5394, "step": 2956 }, { "epoch": 0.4035758154770029, "grad_norm": 0.19627414643764496, "learning_rate": 8e-05, "loss": 1.5563, "step": 2957 }, { "epoch": 0.4037122969837587, "grad_norm": 0.1970394402742386, "learning_rate": 8e-05, "loss": 1.5328, "step": 2958 }, { "epoch": 0.40384877849051454, "grad_norm": 0.2014622986316681, "learning_rate": 8e-05, "loss": 1.5313, "step": 2959 }, { "epoch": 0.40398525999727036, "grad_norm": 0.20270584523677826, "learning_rate": 8e-05, "loss": 1.4908, "step": 2960 }, { "epoch": 0.4041217415040262, "grad_norm": 0.19967079162597656, "learning_rate": 8e-05, "loss": 1.4907, "step": 2961 }, { "epoch": 0.404258223010782, "grad_norm": 0.21107113361358643, "learning_rate": 8e-05, "loss": 1.5352, "step": 2962 }, { "epoch": 0.4043947045175379, "grad_norm": 0.19966532289981842, "learning_rate": 8e-05, "loss": 1.4921, "step": 2963 }, { "epoch": 0.4045311860242937, "grad_norm": 0.1987009048461914, "learning_rate": 8e-05, "loss": 1.5128, "step": 2964 }, { "epoch": 0.40466766753104955, "grad_norm": 0.1970715969800949, "learning_rate": 8e-05, "loss": 1.5314, "step": 2965 }, { "epoch": 0.40480414903780537, "grad_norm": 0.1987844705581665, "learning_rate": 8e-05, "loss": 1.5624, "step": 2966 }, { "epoch": 0.4049406305445612, "grad_norm": 0.2008032202720642, "learning_rate": 8e-05, "loss": 1.566, "step": 2967 }, { "epoch": 0.405077112051317, "grad_norm": 0.2003677785396576, "learning_rate": 8e-05, "loss": 1.5563, "step": 2968 }, { "epoch": 0.4052135935580729, "grad_norm": 0.19556944072246552, "learning_rate": 8e-05, "loss": 1.5453, "step": 2969 }, { "epoch": 0.40535007506482873, "grad_norm": 0.1971994936466217, "learning_rate": 8e-05, "loss": 1.4767, "step": 2970 }, { "epoch": 0.40548655657158456, "grad_norm": 0.1930166780948639, "learning_rate": 8e-05, "loss": 1.5198, "step": 2971 }, { "epoch": 0.4056230380783404, "grad_norm": 0.19386087357997894, "learning_rate": 8e-05, "loss": 1.4588, "step": 2972 }, { "epoch": 0.4057595195850962, "grad_norm": 0.2058456838130951, "learning_rate": 8e-05, "loss": 1.5577, "step": 2973 }, { "epoch": 0.40589600109185203, "grad_norm": 0.19446761906147003, "learning_rate": 8e-05, "loss": 1.4449, "step": 2974 }, { "epoch": 0.4060324825986079, "grad_norm": 0.1961190402507782, "learning_rate": 8e-05, "loss": 1.4753, "step": 2975 }, { "epoch": 0.40616896410536374, "grad_norm": 0.20437094569206238, "learning_rate": 8e-05, "loss": 1.6177, "step": 2976 }, { "epoch": 0.40630544561211956, "grad_norm": 0.19437143206596375, "learning_rate": 8e-05, "loss": 1.5454, "step": 2977 }, { "epoch": 0.4064419271188754, "grad_norm": 0.19836843013763428, "learning_rate": 8e-05, "loss": 1.5779, "step": 2978 }, { "epoch": 0.4065784086256312, "grad_norm": 0.206318661570549, "learning_rate": 8e-05, "loss": 1.5858, "step": 2979 }, { "epoch": 0.40671489013238704, "grad_norm": 0.1988915205001831, "learning_rate": 8e-05, "loss": 1.493, "step": 2980 }, { "epoch": 0.4068513716391429, "grad_norm": 0.20537327229976654, "learning_rate": 8e-05, "loss": 1.5248, "step": 2981 }, { "epoch": 0.40698785314589875, "grad_norm": 0.19781246781349182, "learning_rate": 8e-05, "loss": 1.5081, "step": 2982 }, { "epoch": 0.4071243346526546, "grad_norm": 0.20263095200061798, "learning_rate": 8e-05, "loss": 1.514, "step": 2983 }, { "epoch": 0.4072608161594104, "grad_norm": 0.20989759266376495, "learning_rate": 8e-05, "loss": 1.5565, "step": 2984 }, { "epoch": 0.4073972976661662, "grad_norm": 0.1970755010843277, "learning_rate": 8e-05, "loss": 1.5471, "step": 2985 }, { "epoch": 0.40753377917292205, "grad_norm": 0.2051224708557129, "learning_rate": 8e-05, "loss": 1.5323, "step": 2986 }, { "epoch": 0.4076702606796779, "grad_norm": 0.2038838118314743, "learning_rate": 8e-05, "loss": 1.538, "step": 2987 }, { "epoch": 0.40780674218643376, "grad_norm": 0.20259542763233185, "learning_rate": 8e-05, "loss": 1.5166, "step": 2988 }, { "epoch": 0.4079432236931896, "grad_norm": 0.2053993046283722, "learning_rate": 8e-05, "loss": 1.5617, "step": 2989 }, { "epoch": 0.4080797051999454, "grad_norm": 0.20370444655418396, "learning_rate": 8e-05, "loss": 1.5167, "step": 2990 }, { "epoch": 0.40821618670670123, "grad_norm": 0.20440323650836945, "learning_rate": 8e-05, "loss": 1.5729, "step": 2991 }, { "epoch": 0.40835266821345706, "grad_norm": 0.199214905500412, "learning_rate": 8e-05, "loss": 1.5572, "step": 2992 }, { "epoch": 0.4084891497202129, "grad_norm": 0.20004010200500488, "learning_rate": 8e-05, "loss": 1.5302, "step": 2993 }, { "epoch": 0.40862563122696877, "grad_norm": 0.19804321229457855, "learning_rate": 8e-05, "loss": 1.5475, "step": 2994 }, { "epoch": 0.4087621127337246, "grad_norm": 0.19671191275119781, "learning_rate": 8e-05, "loss": 1.5299, "step": 2995 }, { "epoch": 0.4088985942404804, "grad_norm": 0.19805675745010376, "learning_rate": 8e-05, "loss": 1.5643, "step": 2996 }, { "epoch": 0.40903507574723624, "grad_norm": 0.19692322611808777, "learning_rate": 8e-05, "loss": 1.5769, "step": 2997 }, { "epoch": 0.40917155725399207, "grad_norm": 0.20234470069408417, "learning_rate": 8e-05, "loss": 1.5546, "step": 2998 }, { "epoch": 0.4093080387607479, "grad_norm": 0.1951553076505661, "learning_rate": 8e-05, "loss": 1.5432, "step": 2999 }, { "epoch": 0.4094445202675038, "grad_norm": 0.20264188945293427, "learning_rate": 8e-05, "loss": 1.6206, "step": 3000 }, { "epoch": 0.4095810017742596, "grad_norm": 0.20976346731185913, "learning_rate": 8e-05, "loss": 1.5257, "step": 3001 }, { "epoch": 0.4097174832810154, "grad_norm": 0.19725334644317627, "learning_rate": 8e-05, "loss": 1.4706, "step": 3002 }, { "epoch": 0.40985396478777125, "grad_norm": 0.19273890554904938, "learning_rate": 8e-05, "loss": 1.5066, "step": 3003 }, { "epoch": 0.4099904462945271, "grad_norm": 0.1956668496131897, "learning_rate": 8e-05, "loss": 1.4799, "step": 3004 }, { "epoch": 0.4101269278012829, "grad_norm": 0.20118993520736694, "learning_rate": 8e-05, "loss": 1.4773, "step": 3005 }, { "epoch": 0.4102634093080388, "grad_norm": 0.20344021916389465, "learning_rate": 8e-05, "loss": 1.505, "step": 3006 }, { "epoch": 0.4103998908147946, "grad_norm": 0.19675013422966003, "learning_rate": 8e-05, "loss": 1.5045, "step": 3007 }, { "epoch": 0.41053637232155044, "grad_norm": 0.2034749686717987, "learning_rate": 8e-05, "loss": 1.5023, "step": 3008 }, { "epoch": 0.41067285382830626, "grad_norm": 0.20168781280517578, "learning_rate": 8e-05, "loss": 1.5403, "step": 3009 }, { "epoch": 0.4108093353350621, "grad_norm": 0.19701792299747467, "learning_rate": 8e-05, "loss": 1.424, "step": 3010 }, { "epoch": 0.4109458168418179, "grad_norm": 0.21370914578437805, "learning_rate": 8e-05, "loss": 1.5708, "step": 3011 }, { "epoch": 0.4110822983485738, "grad_norm": 0.19638139009475708, "learning_rate": 8e-05, "loss": 1.4563, "step": 3012 }, { "epoch": 0.4112187798553296, "grad_norm": 0.21868322789669037, "learning_rate": 8e-05, "loss": 1.5132, "step": 3013 }, { "epoch": 0.41135526136208544, "grad_norm": 0.20473575592041016, "learning_rate": 8e-05, "loss": 1.4399, "step": 3014 }, { "epoch": 0.41149174286884127, "grad_norm": 0.20193174481391907, "learning_rate": 8e-05, "loss": 1.5402, "step": 3015 }, { "epoch": 0.4116282243755971, "grad_norm": 0.21881115436553955, "learning_rate": 8e-05, "loss": 1.5054, "step": 3016 }, { "epoch": 0.4117647058823529, "grad_norm": 0.2003670632839203, "learning_rate": 8e-05, "loss": 1.5642, "step": 3017 }, { "epoch": 0.4119011873891088, "grad_norm": 0.1960163414478302, "learning_rate": 8e-05, "loss": 1.5517, "step": 3018 }, { "epoch": 0.41203766889586463, "grad_norm": 0.19543088972568512, "learning_rate": 8e-05, "loss": 1.517, "step": 3019 }, { "epoch": 0.41217415040262045, "grad_norm": 0.20008787512779236, "learning_rate": 8e-05, "loss": 1.5052, "step": 3020 }, { "epoch": 0.4123106319093763, "grad_norm": 0.19972200691699982, "learning_rate": 8e-05, "loss": 1.5241, "step": 3021 }, { "epoch": 0.4124471134161321, "grad_norm": 0.2094513475894928, "learning_rate": 8e-05, "loss": 1.5209, "step": 3022 }, { "epoch": 0.41258359492288793, "grad_norm": 0.1957295536994934, "learning_rate": 8e-05, "loss": 1.4843, "step": 3023 }, { "epoch": 0.41272007642964376, "grad_norm": 0.19742745161056519, "learning_rate": 8e-05, "loss": 1.408, "step": 3024 }, { "epoch": 0.41285655793639964, "grad_norm": 0.20839804410934448, "learning_rate": 8e-05, "loss": 1.5112, "step": 3025 }, { "epoch": 0.41299303944315546, "grad_norm": 0.19152940809726715, "learning_rate": 8e-05, "loss": 1.472, "step": 3026 }, { "epoch": 0.4131295209499113, "grad_norm": 0.2101462185382843, "learning_rate": 8e-05, "loss": 1.5703, "step": 3027 }, { "epoch": 0.4132660024566671, "grad_norm": 0.21248017251491547, "learning_rate": 8e-05, "loss": 1.6109, "step": 3028 }, { "epoch": 0.41340248396342294, "grad_norm": 0.2032170593738556, "learning_rate": 8e-05, "loss": 1.5317, "step": 3029 }, { "epoch": 0.41353896547017877, "grad_norm": 0.1994853913784027, "learning_rate": 8e-05, "loss": 1.5385, "step": 3030 }, { "epoch": 0.41367544697693465, "grad_norm": 0.19901813566684723, "learning_rate": 8e-05, "loss": 1.5269, "step": 3031 }, { "epoch": 0.41381192848369047, "grad_norm": 0.19795392453670502, "learning_rate": 8e-05, "loss": 1.491, "step": 3032 }, { "epoch": 0.4139484099904463, "grad_norm": 0.1975666731595993, "learning_rate": 8e-05, "loss": 1.4593, "step": 3033 }, { "epoch": 0.4140848914972021, "grad_norm": 0.20246610045433044, "learning_rate": 8e-05, "loss": 1.5156, "step": 3034 }, { "epoch": 0.41422137300395795, "grad_norm": 0.20654650032520294, "learning_rate": 8e-05, "loss": 1.5134, "step": 3035 }, { "epoch": 0.4143578545107138, "grad_norm": 0.19502303004264832, "learning_rate": 8e-05, "loss": 1.4803, "step": 3036 }, { "epoch": 0.41449433601746966, "grad_norm": 0.1982269287109375, "learning_rate": 8e-05, "loss": 1.5768, "step": 3037 }, { "epoch": 0.4146308175242255, "grad_norm": 0.202792227268219, "learning_rate": 8e-05, "loss": 1.524, "step": 3038 }, { "epoch": 0.4147672990309813, "grad_norm": 0.19526930153369904, "learning_rate": 8e-05, "loss": 1.5967, "step": 3039 }, { "epoch": 0.41490378053773713, "grad_norm": 0.19401291012763977, "learning_rate": 8e-05, "loss": 1.4669, "step": 3040 }, { "epoch": 0.41504026204449296, "grad_norm": 0.20006267726421356, "learning_rate": 8e-05, "loss": 1.5017, "step": 3041 }, { "epoch": 0.4151767435512488, "grad_norm": 0.20156654715538025, "learning_rate": 8e-05, "loss": 1.5939, "step": 3042 }, { "epoch": 0.41531322505800466, "grad_norm": 0.20333774387836456, "learning_rate": 8e-05, "loss": 1.4993, "step": 3043 }, { "epoch": 0.4154497065647605, "grad_norm": 0.20235462486743927, "learning_rate": 8e-05, "loss": 1.5341, "step": 3044 }, { "epoch": 0.4155861880715163, "grad_norm": 0.19902227818965912, "learning_rate": 8e-05, "loss": 1.488, "step": 3045 }, { "epoch": 0.41572266957827214, "grad_norm": 0.19731463491916656, "learning_rate": 8e-05, "loss": 1.5076, "step": 3046 }, { "epoch": 0.41585915108502797, "grad_norm": 0.20182174444198608, "learning_rate": 8e-05, "loss": 1.5478, "step": 3047 }, { "epoch": 0.4159956325917838, "grad_norm": 0.19677038490772247, "learning_rate": 8e-05, "loss": 1.4686, "step": 3048 }, { "epoch": 0.4161321140985397, "grad_norm": 0.2003851681947708, "learning_rate": 8e-05, "loss": 1.5079, "step": 3049 }, { "epoch": 0.4162685956052955, "grad_norm": 0.2017872929573059, "learning_rate": 8e-05, "loss": 1.5434, "step": 3050 }, { "epoch": 0.4164050771120513, "grad_norm": 0.18890967965126038, "learning_rate": 8e-05, "loss": 1.4969, "step": 3051 }, { "epoch": 0.41654155861880715, "grad_norm": 0.20344127714633942, "learning_rate": 8e-05, "loss": 1.5306, "step": 3052 }, { "epoch": 0.416678040125563, "grad_norm": 0.1985037922859192, "learning_rate": 8e-05, "loss": 1.5052, "step": 3053 }, { "epoch": 0.4168145216323188, "grad_norm": 0.20234611630439758, "learning_rate": 8e-05, "loss": 1.5208, "step": 3054 }, { "epoch": 0.4169510031390747, "grad_norm": 0.20751430094242096, "learning_rate": 8e-05, "loss": 1.5102, "step": 3055 }, { "epoch": 0.4170874846458305, "grad_norm": 0.19542154669761658, "learning_rate": 8e-05, "loss": 1.5077, "step": 3056 }, { "epoch": 0.41722396615258633, "grad_norm": 0.2110983282327652, "learning_rate": 8e-05, "loss": 1.541, "step": 3057 }, { "epoch": 0.41736044765934216, "grad_norm": 0.2074403464794159, "learning_rate": 8e-05, "loss": 1.467, "step": 3058 }, { "epoch": 0.417496929166098, "grad_norm": 0.19831030070781708, "learning_rate": 8e-05, "loss": 1.5048, "step": 3059 }, { "epoch": 0.4176334106728538, "grad_norm": 0.20088212192058563, "learning_rate": 8e-05, "loss": 1.4959, "step": 3060 }, { "epoch": 0.41776989217960964, "grad_norm": 0.2006976455450058, "learning_rate": 8e-05, "loss": 1.5641, "step": 3061 }, { "epoch": 0.4179063736863655, "grad_norm": 0.20028850436210632, "learning_rate": 8e-05, "loss": 1.5521, "step": 3062 }, { "epoch": 0.41804285519312134, "grad_norm": 0.20760108530521393, "learning_rate": 8e-05, "loss": 1.5519, "step": 3063 }, { "epoch": 0.41817933669987717, "grad_norm": 0.20036286115646362, "learning_rate": 8e-05, "loss": 1.457, "step": 3064 }, { "epoch": 0.418315818206633, "grad_norm": 0.20541973412036896, "learning_rate": 8e-05, "loss": 1.5338, "step": 3065 }, { "epoch": 0.4184522997133888, "grad_norm": 0.2129804939031601, "learning_rate": 8e-05, "loss": 1.5762, "step": 3066 }, { "epoch": 0.41858878122014465, "grad_norm": 0.2024850994348526, "learning_rate": 8e-05, "loss": 1.4776, "step": 3067 }, { "epoch": 0.4187252627269005, "grad_norm": 0.1978427767753601, "learning_rate": 8e-05, "loss": 1.5426, "step": 3068 }, { "epoch": 0.41886174423365635, "grad_norm": 0.2182529717683792, "learning_rate": 8e-05, "loss": 1.5776, "step": 3069 }, { "epoch": 0.4189982257404122, "grad_norm": 0.2051420360803604, "learning_rate": 8e-05, "loss": 1.5835, "step": 3070 }, { "epoch": 0.419134707247168, "grad_norm": 0.18921470642089844, "learning_rate": 8e-05, "loss": 1.3556, "step": 3071 }, { "epoch": 0.41927118875392383, "grad_norm": 0.19818145036697388, "learning_rate": 8e-05, "loss": 1.473, "step": 3072 }, { "epoch": 0.41940767026067965, "grad_norm": 0.20514588057994843, "learning_rate": 8e-05, "loss": 1.5518, "step": 3073 }, { "epoch": 0.41954415176743554, "grad_norm": 0.2012423723936081, "learning_rate": 8e-05, "loss": 1.5483, "step": 3074 }, { "epoch": 0.41968063327419136, "grad_norm": 0.19921784102916718, "learning_rate": 8e-05, "loss": 1.5449, "step": 3075 }, { "epoch": 0.4198171147809472, "grad_norm": 0.2028958648443222, "learning_rate": 8e-05, "loss": 1.5844, "step": 3076 }, { "epoch": 0.419953596287703, "grad_norm": 0.19983601570129395, "learning_rate": 8e-05, "loss": 1.4525, "step": 3077 }, { "epoch": 0.42009007779445884, "grad_norm": 0.19572323560714722, "learning_rate": 8e-05, "loss": 1.5365, "step": 3078 }, { "epoch": 0.42022655930121466, "grad_norm": 0.19779036939144135, "learning_rate": 8e-05, "loss": 1.5401, "step": 3079 }, { "epoch": 0.42036304080797054, "grad_norm": 0.20040729641914368, "learning_rate": 8e-05, "loss": 1.5868, "step": 3080 }, { "epoch": 0.42049952231472637, "grad_norm": 0.19912153482437134, "learning_rate": 8e-05, "loss": 1.5922, "step": 3081 }, { "epoch": 0.4206360038214822, "grad_norm": 0.18947699666023254, "learning_rate": 8e-05, "loss": 1.369, "step": 3082 }, { "epoch": 0.420772485328238, "grad_norm": 0.20511123538017273, "learning_rate": 8e-05, "loss": 1.5714, "step": 3083 }, { "epoch": 0.42090896683499385, "grad_norm": 0.20144593715667725, "learning_rate": 8e-05, "loss": 1.5145, "step": 3084 }, { "epoch": 0.4210454483417497, "grad_norm": 0.2135966420173645, "learning_rate": 8e-05, "loss": 1.6062, "step": 3085 }, { "epoch": 0.42118192984850555, "grad_norm": 0.20007885992527008, "learning_rate": 8e-05, "loss": 1.4969, "step": 3086 }, { "epoch": 0.4213184113552614, "grad_norm": 0.2059982866048813, "learning_rate": 8e-05, "loss": 1.5513, "step": 3087 }, { "epoch": 0.4214548928620172, "grad_norm": 0.20596514642238617, "learning_rate": 8e-05, "loss": 1.5241, "step": 3088 }, { "epoch": 0.42159137436877303, "grad_norm": 0.19343741238117218, "learning_rate": 8e-05, "loss": 1.5236, "step": 3089 }, { "epoch": 0.42172785587552886, "grad_norm": 0.21002456545829773, "learning_rate": 8e-05, "loss": 1.5842, "step": 3090 }, { "epoch": 0.4218643373822847, "grad_norm": 0.19752705097198486, "learning_rate": 8e-05, "loss": 1.556, "step": 3091 }, { "epoch": 0.4220008188890405, "grad_norm": 0.19870814681053162, "learning_rate": 8e-05, "loss": 1.5724, "step": 3092 }, { "epoch": 0.4221373003957964, "grad_norm": 0.20339490473270416, "learning_rate": 8e-05, "loss": 1.5534, "step": 3093 }, { "epoch": 0.4222737819025522, "grad_norm": 0.19840386509895325, "learning_rate": 8e-05, "loss": 1.5536, "step": 3094 }, { "epoch": 0.42241026340930804, "grad_norm": 0.19879502058029175, "learning_rate": 8e-05, "loss": 1.5096, "step": 3095 }, { "epoch": 0.42254674491606387, "grad_norm": 0.20178432762622833, "learning_rate": 8e-05, "loss": 1.5249, "step": 3096 }, { "epoch": 0.4226832264228197, "grad_norm": 0.20104891061782837, "learning_rate": 8e-05, "loss": 1.527, "step": 3097 }, { "epoch": 0.4228197079295755, "grad_norm": 0.2035837173461914, "learning_rate": 8e-05, "loss": 1.4853, "step": 3098 }, { "epoch": 0.4229561894363314, "grad_norm": 0.19916000962257385, "learning_rate": 8e-05, "loss": 1.469, "step": 3099 }, { "epoch": 0.4230926709430872, "grad_norm": 0.20036402344703674, "learning_rate": 8e-05, "loss": 1.4276, "step": 3100 }, { "epoch": 0.42322915244984305, "grad_norm": 0.21880033612251282, "learning_rate": 8e-05, "loss": 1.5131, "step": 3101 }, { "epoch": 0.4233656339565989, "grad_norm": 0.19910287857055664, "learning_rate": 8e-05, "loss": 1.5119, "step": 3102 }, { "epoch": 0.4235021154633547, "grad_norm": 0.1978716254234314, "learning_rate": 8e-05, "loss": 1.4507, "step": 3103 }, { "epoch": 0.4236385969701105, "grad_norm": 0.20327045023441315, "learning_rate": 8e-05, "loss": 1.5285, "step": 3104 }, { "epoch": 0.4237750784768664, "grad_norm": 0.1991620510816574, "learning_rate": 8e-05, "loss": 1.4018, "step": 3105 }, { "epoch": 0.42391155998362223, "grad_norm": 0.2157338559627533, "learning_rate": 8e-05, "loss": 1.5252, "step": 3106 }, { "epoch": 0.42404804149037806, "grad_norm": 0.197748064994812, "learning_rate": 8e-05, "loss": 1.5556, "step": 3107 }, { "epoch": 0.4241845229971339, "grad_norm": 0.19764789938926697, "learning_rate": 8e-05, "loss": 1.4718, "step": 3108 }, { "epoch": 0.4243210045038897, "grad_norm": 0.19249731302261353, "learning_rate": 8e-05, "loss": 1.4729, "step": 3109 }, { "epoch": 0.42445748601064553, "grad_norm": 0.200058713555336, "learning_rate": 8e-05, "loss": 1.5313, "step": 3110 }, { "epoch": 0.4245939675174014, "grad_norm": 0.20205599069595337, "learning_rate": 8e-05, "loss": 1.507, "step": 3111 }, { "epoch": 0.42473044902415724, "grad_norm": 0.20448777079582214, "learning_rate": 8e-05, "loss": 1.5568, "step": 3112 }, { "epoch": 0.42486693053091307, "grad_norm": 0.20205707848072052, "learning_rate": 8e-05, "loss": 1.5148, "step": 3113 }, { "epoch": 0.4250034120376689, "grad_norm": 0.20858852565288544, "learning_rate": 8e-05, "loss": 1.5147, "step": 3114 }, { "epoch": 0.4251398935444247, "grad_norm": 0.20042209327220917, "learning_rate": 8e-05, "loss": 1.5403, "step": 3115 }, { "epoch": 0.42527637505118054, "grad_norm": 0.19518731534481049, "learning_rate": 8e-05, "loss": 1.5434, "step": 3116 }, { "epoch": 0.4254128565579364, "grad_norm": 0.19998310506343842, "learning_rate": 8e-05, "loss": 1.5566, "step": 3117 }, { "epoch": 0.42554933806469225, "grad_norm": 0.19976671040058136, "learning_rate": 8e-05, "loss": 1.4858, "step": 3118 }, { "epoch": 0.4256858195714481, "grad_norm": 0.1955524981021881, "learning_rate": 8e-05, "loss": 1.4889, "step": 3119 }, { "epoch": 0.4258223010782039, "grad_norm": 0.209818497300148, "learning_rate": 8e-05, "loss": 1.5221, "step": 3120 }, { "epoch": 0.4259587825849597, "grad_norm": 0.19949398934841156, "learning_rate": 8e-05, "loss": 1.5753, "step": 3121 }, { "epoch": 0.42609526409171555, "grad_norm": 0.1976507008075714, "learning_rate": 8e-05, "loss": 1.5398, "step": 3122 }, { "epoch": 0.42623174559847143, "grad_norm": 0.19486378133296967, "learning_rate": 8e-05, "loss": 1.4543, "step": 3123 }, { "epoch": 0.42636822710522726, "grad_norm": 0.19989556074142456, "learning_rate": 8e-05, "loss": 1.5214, "step": 3124 }, { "epoch": 0.4265047086119831, "grad_norm": 0.19861209392547607, "learning_rate": 8e-05, "loss": 1.5327, "step": 3125 }, { "epoch": 0.4266411901187389, "grad_norm": 0.19279126822948456, "learning_rate": 8e-05, "loss": 1.4738, "step": 3126 }, { "epoch": 0.42677767162549474, "grad_norm": 0.19014571607112885, "learning_rate": 8e-05, "loss": 1.463, "step": 3127 }, { "epoch": 0.42691415313225056, "grad_norm": 0.19941695034503937, "learning_rate": 8e-05, "loss": 1.5012, "step": 3128 }, { "epoch": 0.4270506346390064, "grad_norm": 0.20443876087665558, "learning_rate": 8e-05, "loss": 1.4969, "step": 3129 }, { "epoch": 0.42718711614576227, "grad_norm": 0.20131316781044006, "learning_rate": 8e-05, "loss": 1.5353, "step": 3130 }, { "epoch": 0.4273235976525181, "grad_norm": 0.1999267339706421, "learning_rate": 8e-05, "loss": 1.5957, "step": 3131 }, { "epoch": 0.4274600791592739, "grad_norm": 0.20583860576152802, "learning_rate": 8e-05, "loss": 1.4848, "step": 3132 }, { "epoch": 0.42759656066602975, "grad_norm": 0.20933708548545837, "learning_rate": 8e-05, "loss": 1.5214, "step": 3133 }, { "epoch": 0.42773304217278557, "grad_norm": 0.19596390426158905, "learning_rate": 8e-05, "loss": 1.4283, "step": 3134 }, { "epoch": 0.4278695236795414, "grad_norm": 0.2023884356021881, "learning_rate": 8e-05, "loss": 1.5254, "step": 3135 }, { "epoch": 0.4280060051862973, "grad_norm": 0.2096606343984604, "learning_rate": 8e-05, "loss": 1.5316, "step": 3136 }, { "epoch": 0.4281424866930531, "grad_norm": 0.19961294531822205, "learning_rate": 8e-05, "loss": 1.5022, "step": 3137 }, { "epoch": 0.42827896819980893, "grad_norm": 0.205382838845253, "learning_rate": 8e-05, "loss": 1.5835, "step": 3138 }, { "epoch": 0.42841544970656475, "grad_norm": 0.21725013852119446, "learning_rate": 8e-05, "loss": 1.534, "step": 3139 }, { "epoch": 0.4285519312133206, "grad_norm": 0.19860441982746124, "learning_rate": 8e-05, "loss": 1.477, "step": 3140 }, { "epoch": 0.4286884127200764, "grad_norm": 0.20583562552928925, "learning_rate": 8e-05, "loss": 1.4873, "step": 3141 }, { "epoch": 0.4288248942268323, "grad_norm": 0.21374842524528503, "learning_rate": 8e-05, "loss": 1.4914, "step": 3142 }, { "epoch": 0.4289613757335881, "grad_norm": 0.20345541834831238, "learning_rate": 8e-05, "loss": 1.5, "step": 3143 }, { "epoch": 0.42909785724034394, "grad_norm": 0.20387053489685059, "learning_rate": 8e-05, "loss": 1.4675, "step": 3144 }, { "epoch": 0.42923433874709976, "grad_norm": 0.2053491473197937, "learning_rate": 8e-05, "loss": 1.5377, "step": 3145 }, { "epoch": 0.4293708202538556, "grad_norm": 0.2038285881280899, "learning_rate": 8e-05, "loss": 1.4863, "step": 3146 }, { "epoch": 0.4295073017606114, "grad_norm": 0.19712214171886444, "learning_rate": 8e-05, "loss": 1.4469, "step": 3147 }, { "epoch": 0.4296437832673673, "grad_norm": 0.20303410291671753, "learning_rate": 8e-05, "loss": 1.5351, "step": 3148 }, { "epoch": 0.4297802647741231, "grad_norm": 0.2036440223455429, "learning_rate": 8e-05, "loss": 1.4643, "step": 3149 }, { "epoch": 0.42991674628087895, "grad_norm": 0.2042427361011505, "learning_rate": 8e-05, "loss": 1.5281, "step": 3150 }, { "epoch": 0.4300532277876348, "grad_norm": 0.21023394167423248, "learning_rate": 8e-05, "loss": 1.5282, "step": 3151 }, { "epoch": 0.4301897092943906, "grad_norm": 0.19869254529476166, "learning_rate": 8e-05, "loss": 1.4866, "step": 3152 }, { "epoch": 0.4303261908011464, "grad_norm": 0.2015899270772934, "learning_rate": 8e-05, "loss": 1.5495, "step": 3153 }, { "epoch": 0.4304626723079023, "grad_norm": 0.20118282735347748, "learning_rate": 8e-05, "loss": 1.4714, "step": 3154 }, { "epoch": 0.43059915381465813, "grad_norm": 0.2037975788116455, "learning_rate": 8e-05, "loss": 1.4909, "step": 3155 }, { "epoch": 0.43073563532141396, "grad_norm": 0.19756707549095154, "learning_rate": 8e-05, "loss": 1.501, "step": 3156 }, { "epoch": 0.4308721168281698, "grad_norm": 0.20184838771820068, "learning_rate": 8e-05, "loss": 1.5175, "step": 3157 }, { "epoch": 0.4310085983349256, "grad_norm": 0.20584118366241455, "learning_rate": 8e-05, "loss": 1.5376, "step": 3158 }, { "epoch": 0.43114507984168143, "grad_norm": 0.2120056301355362, "learning_rate": 8e-05, "loss": 1.5982, "step": 3159 }, { "epoch": 0.43128156134843726, "grad_norm": 0.20122769474983215, "learning_rate": 8e-05, "loss": 1.5268, "step": 3160 }, { "epoch": 0.43141804285519314, "grad_norm": 0.20029057562351227, "learning_rate": 8e-05, "loss": 1.5738, "step": 3161 }, { "epoch": 0.43155452436194897, "grad_norm": 0.21150970458984375, "learning_rate": 8e-05, "loss": 1.4944, "step": 3162 }, { "epoch": 0.4316910058687048, "grad_norm": 0.20189321041107178, "learning_rate": 8e-05, "loss": 1.48, "step": 3163 }, { "epoch": 0.4318274873754606, "grad_norm": 0.1946241557598114, "learning_rate": 8e-05, "loss": 1.4505, "step": 3164 }, { "epoch": 0.43196396888221644, "grad_norm": 0.19614176452159882, "learning_rate": 8e-05, "loss": 1.4734, "step": 3165 }, { "epoch": 0.43210045038897227, "grad_norm": 0.19670329988002777, "learning_rate": 8e-05, "loss": 1.457, "step": 3166 }, { "epoch": 0.43223693189572815, "grad_norm": 0.1972903311252594, "learning_rate": 8e-05, "loss": 1.5028, "step": 3167 }, { "epoch": 0.432373413402484, "grad_norm": 0.208711639046669, "learning_rate": 8e-05, "loss": 1.5686, "step": 3168 }, { "epoch": 0.4325098949092398, "grad_norm": 0.20804809033870697, "learning_rate": 8e-05, "loss": 1.5582, "step": 3169 }, { "epoch": 0.4326463764159956, "grad_norm": 0.20560160279273987, "learning_rate": 8e-05, "loss": 1.5931, "step": 3170 }, { "epoch": 0.43278285792275145, "grad_norm": 0.20082396268844604, "learning_rate": 8e-05, "loss": 1.5562, "step": 3171 }, { "epoch": 0.4329193394295073, "grad_norm": 0.20229803025722504, "learning_rate": 8e-05, "loss": 1.5243, "step": 3172 }, { "epoch": 0.43305582093626316, "grad_norm": 0.19297587871551514, "learning_rate": 8e-05, "loss": 1.4546, "step": 3173 }, { "epoch": 0.433192302443019, "grad_norm": 0.20060156285762787, "learning_rate": 8e-05, "loss": 1.5067, "step": 3174 }, { "epoch": 0.4333287839497748, "grad_norm": 0.20060162246227264, "learning_rate": 8e-05, "loss": 1.5367, "step": 3175 }, { "epoch": 0.43346526545653064, "grad_norm": 0.1981133371591568, "learning_rate": 8e-05, "loss": 1.4664, "step": 3176 }, { "epoch": 0.43360174696328646, "grad_norm": 0.20163919031620026, "learning_rate": 8e-05, "loss": 1.4975, "step": 3177 }, { "epoch": 0.4337382284700423, "grad_norm": 0.19982893764972687, "learning_rate": 8e-05, "loss": 1.4712, "step": 3178 }, { "epoch": 0.43387470997679817, "grad_norm": 0.19722945988178253, "learning_rate": 8e-05, "loss": 1.5226, "step": 3179 }, { "epoch": 0.434011191483554, "grad_norm": 0.19788241386413574, "learning_rate": 8e-05, "loss": 1.4761, "step": 3180 }, { "epoch": 0.4341476729903098, "grad_norm": 0.19868123531341553, "learning_rate": 8e-05, "loss": 1.4864, "step": 3181 }, { "epoch": 0.43428415449706564, "grad_norm": 0.1970725655555725, "learning_rate": 8e-05, "loss": 1.515, "step": 3182 }, { "epoch": 0.43442063600382147, "grad_norm": 0.21243047714233398, "learning_rate": 8e-05, "loss": 1.5468, "step": 3183 }, { "epoch": 0.4345571175105773, "grad_norm": 0.19795763492584229, "learning_rate": 8e-05, "loss": 1.5324, "step": 3184 }, { "epoch": 0.4346935990173332, "grad_norm": 0.20356614887714386, "learning_rate": 8e-05, "loss": 1.4669, "step": 3185 }, { "epoch": 0.434830080524089, "grad_norm": 0.20835043489933014, "learning_rate": 8e-05, "loss": 1.623, "step": 3186 }, { "epoch": 0.4349665620308448, "grad_norm": 0.2024518996477127, "learning_rate": 8e-05, "loss": 1.5289, "step": 3187 }, { "epoch": 0.43510304353760065, "grad_norm": 0.204525887966156, "learning_rate": 8e-05, "loss": 1.4834, "step": 3188 }, { "epoch": 0.4352395250443565, "grad_norm": 0.20044328272342682, "learning_rate": 8e-05, "loss": 1.4579, "step": 3189 }, { "epoch": 0.4353760065511123, "grad_norm": 0.19587454199790955, "learning_rate": 8e-05, "loss": 1.4651, "step": 3190 }, { "epoch": 0.4355124880578682, "grad_norm": 0.20442837476730347, "learning_rate": 8e-05, "loss": 1.5035, "step": 3191 }, { "epoch": 0.435648969564624, "grad_norm": 0.1999359279870987, "learning_rate": 8e-05, "loss": 1.4941, "step": 3192 }, { "epoch": 0.43578545107137984, "grad_norm": 0.20011280477046967, "learning_rate": 8e-05, "loss": 1.5063, "step": 3193 }, { "epoch": 0.43592193257813566, "grad_norm": 0.20265041291713715, "learning_rate": 8e-05, "loss": 1.4994, "step": 3194 }, { "epoch": 0.4360584140848915, "grad_norm": 0.20954529941082, "learning_rate": 8e-05, "loss": 1.5897, "step": 3195 }, { "epoch": 0.4361948955916473, "grad_norm": 0.21581964194774628, "learning_rate": 8e-05, "loss": 1.5333, "step": 3196 }, { "epoch": 0.43633137709840314, "grad_norm": 0.21604877710342407, "learning_rate": 8e-05, "loss": 1.5659, "step": 3197 }, { "epoch": 0.436467858605159, "grad_norm": 0.20408225059509277, "learning_rate": 8e-05, "loss": 1.4925, "step": 3198 }, { "epoch": 0.43660434011191485, "grad_norm": 0.21442236006259918, "learning_rate": 8e-05, "loss": 1.5619, "step": 3199 }, { "epoch": 0.43674082161867067, "grad_norm": 0.19689717888832092, "learning_rate": 8e-05, "loss": 1.5474, "step": 3200 }, { "epoch": 0.4368773031254265, "grad_norm": 0.20440372824668884, "learning_rate": 8e-05, "loss": 1.5355, "step": 3201 }, { "epoch": 0.4370137846321823, "grad_norm": 0.20694170892238617, "learning_rate": 8e-05, "loss": 1.4718, "step": 3202 }, { "epoch": 0.43715026613893815, "grad_norm": 0.2014540135860443, "learning_rate": 8e-05, "loss": 1.5509, "step": 3203 }, { "epoch": 0.43728674764569403, "grad_norm": 0.19631561636924744, "learning_rate": 8e-05, "loss": 1.4357, "step": 3204 }, { "epoch": 0.43742322915244986, "grad_norm": 0.19948531687259674, "learning_rate": 8e-05, "loss": 1.4631, "step": 3205 }, { "epoch": 0.4375597106592057, "grad_norm": 0.20264436304569244, "learning_rate": 8e-05, "loss": 1.5159, "step": 3206 }, { "epoch": 0.4376961921659615, "grad_norm": 0.21580493450164795, "learning_rate": 8e-05, "loss": 1.5243, "step": 3207 }, { "epoch": 0.43783267367271733, "grad_norm": 0.20941375195980072, "learning_rate": 8e-05, "loss": 1.5043, "step": 3208 }, { "epoch": 0.43796915517947316, "grad_norm": 0.2025696188211441, "learning_rate": 8e-05, "loss": 1.4622, "step": 3209 }, { "epoch": 0.43810563668622904, "grad_norm": 0.20156575739383698, "learning_rate": 8e-05, "loss": 1.577, "step": 3210 }, { "epoch": 0.43824211819298486, "grad_norm": 0.20984598994255066, "learning_rate": 8e-05, "loss": 1.5591, "step": 3211 }, { "epoch": 0.4383785996997407, "grad_norm": 0.21122805774211884, "learning_rate": 8e-05, "loss": 1.6203, "step": 3212 }, { "epoch": 0.4385150812064965, "grad_norm": 0.1980714350938797, "learning_rate": 8e-05, "loss": 1.4614, "step": 3213 }, { "epoch": 0.43865156271325234, "grad_norm": 0.20132118463516235, "learning_rate": 8e-05, "loss": 1.5208, "step": 3214 }, { "epoch": 0.43878804422000817, "grad_norm": 0.1983589380979538, "learning_rate": 8e-05, "loss": 1.5559, "step": 3215 }, { "epoch": 0.43892452572676405, "grad_norm": 0.19686248898506165, "learning_rate": 8e-05, "loss": 1.4681, "step": 3216 }, { "epoch": 0.4390610072335199, "grad_norm": 0.2037227302789688, "learning_rate": 8e-05, "loss": 1.5326, "step": 3217 }, { "epoch": 0.4391974887402757, "grad_norm": 0.20650477707386017, "learning_rate": 8e-05, "loss": 1.5419, "step": 3218 }, { "epoch": 0.4393339702470315, "grad_norm": 0.2068139910697937, "learning_rate": 8e-05, "loss": 1.5189, "step": 3219 }, { "epoch": 0.43947045175378735, "grad_norm": 0.20991960167884827, "learning_rate": 8e-05, "loss": 1.4901, "step": 3220 }, { "epoch": 0.4396069332605432, "grad_norm": 0.20221896469593048, "learning_rate": 8e-05, "loss": 1.491, "step": 3221 }, { "epoch": 0.43974341476729906, "grad_norm": 0.20129981637001038, "learning_rate": 8e-05, "loss": 1.4262, "step": 3222 }, { "epoch": 0.4398798962740549, "grad_norm": 0.2061825692653656, "learning_rate": 8e-05, "loss": 1.514, "step": 3223 }, { "epoch": 0.4400163777808107, "grad_norm": 0.20163790881633759, "learning_rate": 8e-05, "loss": 1.5286, "step": 3224 }, { "epoch": 0.44015285928756653, "grad_norm": 0.20273712277412415, "learning_rate": 8e-05, "loss": 1.5555, "step": 3225 }, { "epoch": 0.44028934079432236, "grad_norm": 0.20562024414539337, "learning_rate": 8e-05, "loss": 1.5778, "step": 3226 }, { "epoch": 0.4404258223010782, "grad_norm": 0.19715231657028198, "learning_rate": 8e-05, "loss": 1.5121, "step": 3227 }, { "epoch": 0.44056230380783407, "grad_norm": 0.20362688601016998, "learning_rate": 8e-05, "loss": 1.4977, "step": 3228 }, { "epoch": 0.4406987853145899, "grad_norm": 0.20189505815505981, "learning_rate": 8e-05, "loss": 1.4321, "step": 3229 }, { "epoch": 0.4408352668213457, "grad_norm": 0.2063058763742447, "learning_rate": 8e-05, "loss": 1.4807, "step": 3230 }, { "epoch": 0.44097174832810154, "grad_norm": 0.20270198583602905, "learning_rate": 8e-05, "loss": 1.4638, "step": 3231 }, { "epoch": 0.44110822983485737, "grad_norm": 0.20998196303844452, "learning_rate": 8e-05, "loss": 1.5371, "step": 3232 }, { "epoch": 0.4412447113416132, "grad_norm": 0.20822195708751678, "learning_rate": 8e-05, "loss": 1.5656, "step": 3233 }, { "epoch": 0.441381192848369, "grad_norm": 0.20829296112060547, "learning_rate": 8e-05, "loss": 1.5192, "step": 3234 }, { "epoch": 0.4415176743551249, "grad_norm": 0.20105651021003723, "learning_rate": 8e-05, "loss": 1.5134, "step": 3235 }, { "epoch": 0.4416541558618807, "grad_norm": 0.1972811073064804, "learning_rate": 8e-05, "loss": 1.4473, "step": 3236 }, { "epoch": 0.44179063736863655, "grad_norm": 0.196736142039299, "learning_rate": 8e-05, "loss": 1.4542, "step": 3237 }, { "epoch": 0.4419271188753924, "grad_norm": 0.19351115822792053, "learning_rate": 8e-05, "loss": 1.487, "step": 3238 }, { "epoch": 0.4420636003821482, "grad_norm": 0.20947715640068054, "learning_rate": 8e-05, "loss": 1.5274, "step": 3239 }, { "epoch": 0.44220008188890403, "grad_norm": 0.20681138336658478, "learning_rate": 8e-05, "loss": 1.5999, "step": 3240 }, { "epoch": 0.4423365633956599, "grad_norm": 0.2098737210035324, "learning_rate": 8e-05, "loss": 1.5699, "step": 3241 }, { "epoch": 0.44247304490241574, "grad_norm": 0.21125026047229767, "learning_rate": 8e-05, "loss": 1.5756, "step": 3242 }, { "epoch": 0.44260952640917156, "grad_norm": 0.19590415060520172, "learning_rate": 8e-05, "loss": 1.5496, "step": 3243 }, { "epoch": 0.4427460079159274, "grad_norm": 0.21082034707069397, "learning_rate": 8e-05, "loss": 1.5465, "step": 3244 }, { "epoch": 0.4428824894226832, "grad_norm": 0.20047715306282043, "learning_rate": 8e-05, "loss": 1.4536, "step": 3245 }, { "epoch": 0.44301897092943904, "grad_norm": 0.19345968961715698, "learning_rate": 8e-05, "loss": 1.4444, "step": 3246 }, { "epoch": 0.4431554524361949, "grad_norm": 0.1998324692249298, "learning_rate": 8e-05, "loss": 1.5389, "step": 3247 }, { "epoch": 0.44329193394295074, "grad_norm": 0.20281356573104858, "learning_rate": 8e-05, "loss": 1.5028, "step": 3248 }, { "epoch": 0.44342841544970657, "grad_norm": 0.2019728124141693, "learning_rate": 8e-05, "loss": 1.5162, "step": 3249 }, { "epoch": 0.4435648969564624, "grad_norm": 0.20388513803482056, "learning_rate": 8e-05, "loss": 1.5196, "step": 3250 }, { "epoch": 0.4437013784632182, "grad_norm": 0.21065255999565125, "learning_rate": 8e-05, "loss": 1.5262, "step": 3251 }, { "epoch": 0.44383785996997405, "grad_norm": 0.20478476583957672, "learning_rate": 8e-05, "loss": 1.5718, "step": 3252 }, { "epoch": 0.44397434147672993, "grad_norm": 0.20145496726036072, "learning_rate": 8e-05, "loss": 1.4952, "step": 3253 }, { "epoch": 0.44411082298348575, "grad_norm": 0.20837576687335968, "learning_rate": 8e-05, "loss": 1.526, "step": 3254 }, { "epoch": 0.4442473044902416, "grad_norm": 0.20676518976688385, "learning_rate": 8e-05, "loss": 1.5352, "step": 3255 }, { "epoch": 0.4443837859969974, "grad_norm": 0.20005717873573303, "learning_rate": 8e-05, "loss": 1.5089, "step": 3256 }, { "epoch": 0.44452026750375323, "grad_norm": 0.20589189231395721, "learning_rate": 8e-05, "loss": 1.4985, "step": 3257 }, { "epoch": 0.44465674901050906, "grad_norm": 0.19458532333374023, "learning_rate": 8e-05, "loss": 1.4664, "step": 3258 }, { "epoch": 0.44479323051726494, "grad_norm": 0.2031342089176178, "learning_rate": 8e-05, "loss": 1.5389, "step": 3259 }, { "epoch": 0.44492971202402076, "grad_norm": 0.20186376571655273, "learning_rate": 8e-05, "loss": 1.5496, "step": 3260 }, { "epoch": 0.4450661935307766, "grad_norm": 0.21457123756408691, "learning_rate": 8e-05, "loss": 1.5478, "step": 3261 }, { "epoch": 0.4452026750375324, "grad_norm": 0.20058996975421906, "learning_rate": 8e-05, "loss": 1.5224, "step": 3262 }, { "epoch": 0.44533915654428824, "grad_norm": 0.19569219648838043, "learning_rate": 8e-05, "loss": 1.4615, "step": 3263 }, { "epoch": 0.44547563805104406, "grad_norm": 0.20526716113090515, "learning_rate": 8e-05, "loss": 1.5319, "step": 3264 }, { "epoch": 0.4456121195577999, "grad_norm": 0.2020854502916336, "learning_rate": 8e-05, "loss": 1.4831, "step": 3265 }, { "epoch": 0.44574860106455577, "grad_norm": 0.20917055010795593, "learning_rate": 8e-05, "loss": 1.5196, "step": 3266 }, { "epoch": 0.4458850825713116, "grad_norm": 0.19959941506385803, "learning_rate": 8e-05, "loss": 1.4752, "step": 3267 }, { "epoch": 0.4460215640780674, "grad_norm": 0.1964586079120636, "learning_rate": 8e-05, "loss": 1.4329, "step": 3268 }, { "epoch": 0.44615804558482325, "grad_norm": 0.20622660219669342, "learning_rate": 8e-05, "loss": 1.4935, "step": 3269 }, { "epoch": 0.4462945270915791, "grad_norm": 0.20728552341461182, "learning_rate": 8e-05, "loss": 1.4974, "step": 3270 }, { "epoch": 0.4464310085983349, "grad_norm": 0.20910215377807617, "learning_rate": 8e-05, "loss": 1.4808, "step": 3271 }, { "epoch": 0.4465674901050908, "grad_norm": 0.20853453874588013, "learning_rate": 8e-05, "loss": 1.4991, "step": 3272 }, { "epoch": 0.4467039716118466, "grad_norm": 0.20444753766059875, "learning_rate": 8e-05, "loss": 1.5367, "step": 3273 }, { "epoch": 0.44684045311860243, "grad_norm": 0.2087266594171524, "learning_rate": 8e-05, "loss": 1.5366, "step": 3274 }, { "epoch": 0.44697693462535826, "grad_norm": 0.2099228948354721, "learning_rate": 8e-05, "loss": 1.5106, "step": 3275 }, { "epoch": 0.4471134161321141, "grad_norm": 0.20607341825962067, "learning_rate": 8e-05, "loss": 1.5667, "step": 3276 }, { "epoch": 0.4472498976388699, "grad_norm": 0.2116592824459076, "learning_rate": 8e-05, "loss": 1.504, "step": 3277 }, { "epoch": 0.4473863791456258, "grad_norm": 0.20507965981960297, "learning_rate": 8e-05, "loss": 1.5422, "step": 3278 }, { "epoch": 0.4475228606523816, "grad_norm": 0.19957639276981354, "learning_rate": 8e-05, "loss": 1.4904, "step": 3279 }, { "epoch": 0.44765934215913744, "grad_norm": 0.20068909227848053, "learning_rate": 8e-05, "loss": 1.4915, "step": 3280 }, { "epoch": 0.44779582366589327, "grad_norm": 0.19837771356105804, "learning_rate": 8e-05, "loss": 1.5519, "step": 3281 }, { "epoch": 0.4479323051726491, "grad_norm": 0.20921039581298828, "learning_rate": 8e-05, "loss": 1.5068, "step": 3282 }, { "epoch": 0.4480687866794049, "grad_norm": 0.19557024538516998, "learning_rate": 8e-05, "loss": 1.5377, "step": 3283 }, { "epoch": 0.4482052681861608, "grad_norm": 0.2009022831916809, "learning_rate": 8e-05, "loss": 1.5053, "step": 3284 }, { "epoch": 0.4483417496929166, "grad_norm": 0.19988054037094116, "learning_rate": 8e-05, "loss": 1.5538, "step": 3285 }, { "epoch": 0.44847823119967245, "grad_norm": 0.20481422543525696, "learning_rate": 8e-05, "loss": 1.548, "step": 3286 }, { "epoch": 0.4486147127064283, "grad_norm": 0.20532605051994324, "learning_rate": 8e-05, "loss": 1.5212, "step": 3287 }, { "epoch": 0.4487511942131841, "grad_norm": 0.19546401500701904, "learning_rate": 8e-05, "loss": 1.4819, "step": 3288 }, { "epoch": 0.4488876757199399, "grad_norm": 0.2069421410560608, "learning_rate": 8e-05, "loss": 1.4446, "step": 3289 }, { "epoch": 0.4490241572266958, "grad_norm": 0.21116097271442413, "learning_rate": 8e-05, "loss": 1.5318, "step": 3290 }, { "epoch": 0.44916063873345163, "grad_norm": 0.20897267758846283, "learning_rate": 8e-05, "loss": 1.5194, "step": 3291 }, { "epoch": 0.44929712024020746, "grad_norm": 0.204427108168602, "learning_rate": 8e-05, "loss": 1.5079, "step": 3292 }, { "epoch": 0.4494336017469633, "grad_norm": 0.20322507619857788, "learning_rate": 8e-05, "loss": 1.5059, "step": 3293 }, { "epoch": 0.4495700832537191, "grad_norm": 0.20074470341205597, "learning_rate": 8e-05, "loss": 1.4903, "step": 3294 }, { "epoch": 0.44970656476047494, "grad_norm": 0.20163731276988983, "learning_rate": 8e-05, "loss": 1.5078, "step": 3295 }, { "epoch": 0.4498430462672308, "grad_norm": 0.20294135808944702, "learning_rate": 8e-05, "loss": 1.5126, "step": 3296 }, { "epoch": 0.44997952777398664, "grad_norm": 0.19917891919612885, "learning_rate": 8e-05, "loss": 1.5193, "step": 3297 }, { "epoch": 0.45011600928074247, "grad_norm": 0.20442698895931244, "learning_rate": 8e-05, "loss": 1.5257, "step": 3298 }, { "epoch": 0.4502524907874983, "grad_norm": 0.21351264417171478, "learning_rate": 8e-05, "loss": 1.5751, "step": 3299 }, { "epoch": 0.4503889722942541, "grad_norm": 0.20064903795719147, "learning_rate": 8e-05, "loss": 1.4876, "step": 3300 }, { "epoch": 0.45052545380100995, "grad_norm": 0.20760200917720795, "learning_rate": 8e-05, "loss": 1.5109, "step": 3301 }, { "epoch": 0.45066193530776577, "grad_norm": 0.21604247391223907, "learning_rate": 8e-05, "loss": 1.5034, "step": 3302 }, { "epoch": 0.45079841681452165, "grad_norm": 0.20299625396728516, "learning_rate": 8e-05, "loss": 1.5225, "step": 3303 }, { "epoch": 0.4509348983212775, "grad_norm": 0.20287269353866577, "learning_rate": 8e-05, "loss": 1.5259, "step": 3304 }, { "epoch": 0.4510713798280333, "grad_norm": 0.20292556285858154, "learning_rate": 8e-05, "loss": 1.5481, "step": 3305 }, { "epoch": 0.45120786133478913, "grad_norm": 0.20476415753364563, "learning_rate": 8e-05, "loss": 1.4859, "step": 3306 }, { "epoch": 0.45134434284154495, "grad_norm": 0.20652766525745392, "learning_rate": 8e-05, "loss": 1.5395, "step": 3307 }, { "epoch": 0.4514808243483008, "grad_norm": 0.1977231353521347, "learning_rate": 8e-05, "loss": 1.4379, "step": 3308 }, { "epoch": 0.45161730585505666, "grad_norm": 0.20224037766456604, "learning_rate": 8e-05, "loss": 1.5549, "step": 3309 }, { "epoch": 0.4517537873618125, "grad_norm": 0.20821750164031982, "learning_rate": 8e-05, "loss": 1.5043, "step": 3310 }, { "epoch": 0.4518902688685683, "grad_norm": 0.20505215227603912, "learning_rate": 8e-05, "loss": 1.5094, "step": 3311 }, { "epoch": 0.45202675037532414, "grad_norm": 0.20222346484661102, "learning_rate": 8e-05, "loss": 1.5545, "step": 3312 }, { "epoch": 0.45216323188207996, "grad_norm": 0.20393307507038116, "learning_rate": 8e-05, "loss": 1.5098, "step": 3313 }, { "epoch": 0.4522997133888358, "grad_norm": 0.20658648014068604, "learning_rate": 8e-05, "loss": 1.556, "step": 3314 }, { "epoch": 0.45243619489559167, "grad_norm": 0.2030039280653, "learning_rate": 8e-05, "loss": 1.5381, "step": 3315 }, { "epoch": 0.4525726764023475, "grad_norm": 0.19749687612056732, "learning_rate": 8e-05, "loss": 1.5239, "step": 3316 }, { "epoch": 0.4527091579091033, "grad_norm": 0.20338688790798187, "learning_rate": 8e-05, "loss": 1.5507, "step": 3317 }, { "epoch": 0.45284563941585915, "grad_norm": 0.20814357697963715, "learning_rate": 8e-05, "loss": 1.487, "step": 3318 }, { "epoch": 0.452982120922615, "grad_norm": 0.20874108374118805, "learning_rate": 8e-05, "loss": 1.5555, "step": 3319 }, { "epoch": 0.4531186024293708, "grad_norm": 0.1981109231710434, "learning_rate": 8e-05, "loss": 1.5383, "step": 3320 }, { "epoch": 0.4532550839361267, "grad_norm": 0.19380518794059753, "learning_rate": 8e-05, "loss": 1.3919, "step": 3321 }, { "epoch": 0.4533915654428825, "grad_norm": 0.19989387691020966, "learning_rate": 8e-05, "loss": 1.4703, "step": 3322 }, { "epoch": 0.45352804694963833, "grad_norm": 0.2048611342906952, "learning_rate": 8e-05, "loss": 1.497, "step": 3323 }, { "epoch": 0.45366452845639416, "grad_norm": 0.19949188828468323, "learning_rate": 8e-05, "loss": 1.5242, "step": 3324 }, { "epoch": 0.45380100996315, "grad_norm": 0.20602813363075256, "learning_rate": 8e-05, "loss": 1.4855, "step": 3325 }, { "epoch": 0.4539374914699058, "grad_norm": 0.2010207623243332, "learning_rate": 8e-05, "loss": 1.487, "step": 3326 }, { "epoch": 0.4540739729766617, "grad_norm": 0.19941088557243347, "learning_rate": 8e-05, "loss": 1.4818, "step": 3327 }, { "epoch": 0.4542104544834175, "grad_norm": 0.21096745133399963, "learning_rate": 8e-05, "loss": 1.5628, "step": 3328 }, { "epoch": 0.45434693599017334, "grad_norm": 0.20490902662277222, "learning_rate": 8e-05, "loss": 1.4486, "step": 3329 }, { "epoch": 0.45448341749692917, "grad_norm": 0.20350489020347595, "learning_rate": 8e-05, "loss": 1.4678, "step": 3330 }, { "epoch": 0.454619899003685, "grad_norm": 0.19724179804325104, "learning_rate": 8e-05, "loss": 1.4632, "step": 3331 }, { "epoch": 0.4547563805104408, "grad_norm": 0.20504145324230194, "learning_rate": 8e-05, "loss": 1.4499, "step": 3332 }, { "epoch": 0.4548928620171967, "grad_norm": 0.20698674023151398, "learning_rate": 8e-05, "loss": 1.5428, "step": 3333 }, { "epoch": 0.4550293435239525, "grad_norm": 0.20949898660182953, "learning_rate": 8e-05, "loss": 1.5215, "step": 3334 }, { "epoch": 0.45516582503070835, "grad_norm": 0.2054181545972824, "learning_rate": 8e-05, "loss": 1.4951, "step": 3335 }, { "epoch": 0.4553023065374642, "grad_norm": 0.20110256969928741, "learning_rate": 8e-05, "loss": 1.5476, "step": 3336 }, { "epoch": 0.45543878804422, "grad_norm": 0.21335738897323608, "learning_rate": 8e-05, "loss": 1.4973, "step": 3337 }, { "epoch": 0.4555752695509758, "grad_norm": 0.22037526965141296, "learning_rate": 8e-05, "loss": 1.5179, "step": 3338 }, { "epoch": 0.45571175105773165, "grad_norm": 0.2023409754037857, "learning_rate": 8e-05, "loss": 1.4262, "step": 3339 }, { "epoch": 0.45584823256448753, "grad_norm": 0.2147899866104126, "learning_rate": 8e-05, "loss": 1.5505, "step": 3340 }, { "epoch": 0.45598471407124336, "grad_norm": 0.20637552440166473, "learning_rate": 8e-05, "loss": 1.4571, "step": 3341 }, { "epoch": 0.4561211955779992, "grad_norm": 0.20200565457344055, "learning_rate": 8e-05, "loss": 1.5192, "step": 3342 }, { "epoch": 0.456257677084755, "grad_norm": 0.2167416363954544, "learning_rate": 8e-05, "loss": 1.4599, "step": 3343 }, { "epoch": 0.45639415859151083, "grad_norm": 0.2135801911354065, "learning_rate": 8e-05, "loss": 1.5743, "step": 3344 }, { "epoch": 0.45653064009826666, "grad_norm": 0.19715051352977753, "learning_rate": 8e-05, "loss": 1.4314, "step": 3345 }, { "epoch": 0.45666712160502254, "grad_norm": 0.21061591804027557, "learning_rate": 8e-05, "loss": 1.4466, "step": 3346 }, { "epoch": 0.45680360311177837, "grad_norm": 0.22038885951042175, "learning_rate": 8e-05, "loss": 1.4793, "step": 3347 }, { "epoch": 0.4569400846185342, "grad_norm": 0.21247152984142303, "learning_rate": 8e-05, "loss": 1.54, "step": 3348 }, { "epoch": 0.45707656612529, "grad_norm": 0.20761588215827942, "learning_rate": 8e-05, "loss": 1.4878, "step": 3349 }, { "epoch": 0.45721304763204584, "grad_norm": 0.20544196665287018, "learning_rate": 8e-05, "loss": 1.4723, "step": 3350 }, { "epoch": 0.45734952913880167, "grad_norm": 0.1954910159111023, "learning_rate": 8e-05, "loss": 1.5197, "step": 3351 }, { "epoch": 0.45748601064555755, "grad_norm": 0.21049875020980835, "learning_rate": 8e-05, "loss": 1.4816, "step": 3352 }, { "epoch": 0.4576224921523134, "grad_norm": 0.20720385015010834, "learning_rate": 8e-05, "loss": 1.5614, "step": 3353 }, { "epoch": 0.4577589736590692, "grad_norm": 0.19467219710350037, "learning_rate": 8e-05, "loss": 1.4886, "step": 3354 }, { "epoch": 0.457895455165825, "grad_norm": 0.21409712731838226, "learning_rate": 8e-05, "loss": 1.5023, "step": 3355 }, { "epoch": 0.45803193667258085, "grad_norm": 0.20801040530204773, "learning_rate": 8e-05, "loss": 1.5532, "step": 3356 }, { "epoch": 0.4581684181793367, "grad_norm": 0.2032715082168579, "learning_rate": 8e-05, "loss": 1.5405, "step": 3357 }, { "epoch": 0.45830489968609256, "grad_norm": 0.2053171992301941, "learning_rate": 8e-05, "loss": 1.49, "step": 3358 }, { "epoch": 0.4584413811928484, "grad_norm": 0.2031218409538269, "learning_rate": 8e-05, "loss": 1.4881, "step": 3359 }, { "epoch": 0.4585778626996042, "grad_norm": 0.20945163071155548, "learning_rate": 8e-05, "loss": 1.5482, "step": 3360 }, { "epoch": 0.45871434420636004, "grad_norm": 0.20190584659576416, "learning_rate": 8e-05, "loss": 1.5562, "step": 3361 }, { "epoch": 0.45885082571311586, "grad_norm": 0.21301424503326416, "learning_rate": 8e-05, "loss": 1.5705, "step": 3362 }, { "epoch": 0.4589873072198717, "grad_norm": 0.2128937691450119, "learning_rate": 8e-05, "loss": 1.549, "step": 3363 }, { "epoch": 0.45912378872662757, "grad_norm": 0.210130974650383, "learning_rate": 8e-05, "loss": 1.5656, "step": 3364 }, { "epoch": 0.4592602702333834, "grad_norm": 0.2120717614889145, "learning_rate": 8e-05, "loss": 1.5912, "step": 3365 }, { "epoch": 0.4593967517401392, "grad_norm": 0.19900810718536377, "learning_rate": 8e-05, "loss": 1.5107, "step": 3366 }, { "epoch": 0.45953323324689505, "grad_norm": 0.20568035542964935, "learning_rate": 8e-05, "loss": 1.426, "step": 3367 }, { "epoch": 0.45966971475365087, "grad_norm": 0.19921958446502686, "learning_rate": 8e-05, "loss": 1.4982, "step": 3368 }, { "epoch": 0.4598061962604067, "grad_norm": 0.19632309675216675, "learning_rate": 8e-05, "loss": 1.486, "step": 3369 }, { "epoch": 0.4599426777671625, "grad_norm": 0.202032670378685, "learning_rate": 8e-05, "loss": 1.5204, "step": 3370 }, { "epoch": 0.4600791592739184, "grad_norm": 0.20294281840324402, "learning_rate": 8e-05, "loss": 1.448, "step": 3371 }, { "epoch": 0.46021564078067423, "grad_norm": 0.204691082239151, "learning_rate": 8e-05, "loss": 1.5305, "step": 3372 }, { "epoch": 0.46035212228743005, "grad_norm": 0.223212331533432, "learning_rate": 8e-05, "loss": 1.538, "step": 3373 }, { "epoch": 0.4604886037941859, "grad_norm": 0.202362522482872, "learning_rate": 8e-05, "loss": 1.428, "step": 3374 }, { "epoch": 0.4606250853009417, "grad_norm": 0.20770061016082764, "learning_rate": 8e-05, "loss": 1.5372, "step": 3375 }, { "epoch": 0.46076156680769753, "grad_norm": 0.20081312954425812, "learning_rate": 8e-05, "loss": 1.4934, "step": 3376 }, { "epoch": 0.4608980483144534, "grad_norm": 0.21202722191810608, "learning_rate": 8e-05, "loss": 1.574, "step": 3377 }, { "epoch": 0.46103452982120924, "grad_norm": 0.20443178713321686, "learning_rate": 8e-05, "loss": 1.4857, "step": 3378 }, { "epoch": 0.46117101132796506, "grad_norm": 0.19714051485061646, "learning_rate": 8e-05, "loss": 1.4355, "step": 3379 }, { "epoch": 0.4613074928347209, "grad_norm": 0.20318607985973358, "learning_rate": 8e-05, "loss": 1.5038, "step": 3380 }, { "epoch": 0.4614439743414767, "grad_norm": 0.21267586946487427, "learning_rate": 8e-05, "loss": 1.5353, "step": 3381 }, { "epoch": 0.46158045584823254, "grad_norm": 0.21322016417980194, "learning_rate": 8e-05, "loss": 1.4911, "step": 3382 }, { "epoch": 0.4617169373549884, "grad_norm": 0.20762303471565247, "learning_rate": 8e-05, "loss": 1.5358, "step": 3383 }, { "epoch": 0.46185341886174425, "grad_norm": 0.19948901236057281, "learning_rate": 8e-05, "loss": 1.4694, "step": 3384 }, { "epoch": 0.4619899003685001, "grad_norm": 0.20969977974891663, "learning_rate": 8e-05, "loss": 1.5382, "step": 3385 }, { "epoch": 0.4621263818752559, "grad_norm": 0.20358628034591675, "learning_rate": 8e-05, "loss": 1.4935, "step": 3386 }, { "epoch": 0.4622628633820117, "grad_norm": 0.20342092216014862, "learning_rate": 8e-05, "loss": 1.4972, "step": 3387 }, { "epoch": 0.46239934488876755, "grad_norm": 0.20850619673728943, "learning_rate": 8e-05, "loss": 1.5141, "step": 3388 }, { "epoch": 0.46253582639552343, "grad_norm": 0.19653436541557312, "learning_rate": 8e-05, "loss": 1.4729, "step": 3389 }, { "epoch": 0.46267230790227926, "grad_norm": 0.2011420726776123, "learning_rate": 8e-05, "loss": 1.5085, "step": 3390 }, { "epoch": 0.4628087894090351, "grad_norm": 0.21052059531211853, "learning_rate": 8e-05, "loss": 1.5378, "step": 3391 }, { "epoch": 0.4629452709157909, "grad_norm": 0.21375620365142822, "learning_rate": 8e-05, "loss": 1.5275, "step": 3392 }, { "epoch": 0.46308175242254673, "grad_norm": 0.20165874063968658, "learning_rate": 8e-05, "loss": 1.4789, "step": 3393 }, { "epoch": 0.46321823392930256, "grad_norm": 0.21167610585689545, "learning_rate": 8e-05, "loss": 1.5844, "step": 3394 }, { "epoch": 0.46335471543605844, "grad_norm": 0.20535144209861755, "learning_rate": 8e-05, "loss": 1.5265, "step": 3395 }, { "epoch": 0.46349119694281427, "grad_norm": 0.20090624690055847, "learning_rate": 8e-05, "loss": 1.4745, "step": 3396 }, { "epoch": 0.4636276784495701, "grad_norm": 0.20968738198280334, "learning_rate": 8e-05, "loss": 1.5621, "step": 3397 }, { "epoch": 0.4637641599563259, "grad_norm": 0.20477834343910217, "learning_rate": 8e-05, "loss": 1.4472, "step": 3398 }, { "epoch": 0.46390064146308174, "grad_norm": 0.1990976184606552, "learning_rate": 8e-05, "loss": 1.433, "step": 3399 }, { "epoch": 0.46403712296983757, "grad_norm": 0.20355604588985443, "learning_rate": 8e-05, "loss": 1.5164, "step": 3400 }, { "epoch": 0.46417360447659345, "grad_norm": 0.2029486745595932, "learning_rate": 8e-05, "loss": 1.5304, "step": 3401 }, { "epoch": 0.4643100859833493, "grad_norm": 0.20830398797988892, "learning_rate": 8e-05, "loss": 1.6072, "step": 3402 }, { "epoch": 0.4644465674901051, "grad_norm": 0.20323744416236877, "learning_rate": 8e-05, "loss": 1.4365, "step": 3403 }, { "epoch": 0.4645830489968609, "grad_norm": 0.20358824729919434, "learning_rate": 8e-05, "loss": 1.5638, "step": 3404 }, { "epoch": 0.46471953050361675, "grad_norm": 0.2191862314939499, "learning_rate": 8e-05, "loss": 1.395, "step": 3405 }, { "epoch": 0.4648560120103726, "grad_norm": 0.2157006561756134, "learning_rate": 8e-05, "loss": 1.5885, "step": 3406 }, { "epoch": 0.4649924935171284, "grad_norm": 0.20152826607227325, "learning_rate": 8e-05, "loss": 1.5038, "step": 3407 }, { "epoch": 0.4651289750238843, "grad_norm": 0.23860777914524078, "learning_rate": 8e-05, "loss": 1.564, "step": 3408 }, { "epoch": 0.4652654565306401, "grad_norm": 0.21131755411624908, "learning_rate": 8e-05, "loss": 1.5317, "step": 3409 }, { "epoch": 0.46540193803739593, "grad_norm": 0.21757672727108002, "learning_rate": 8e-05, "loss": 1.4683, "step": 3410 }, { "epoch": 0.46553841954415176, "grad_norm": 0.22972005605697632, "learning_rate": 8e-05, "loss": 1.471, "step": 3411 }, { "epoch": 0.4656749010509076, "grad_norm": 0.21489542722702026, "learning_rate": 8e-05, "loss": 1.4452, "step": 3412 }, { "epoch": 0.4658113825576634, "grad_norm": 0.21335068345069885, "learning_rate": 8e-05, "loss": 1.5354, "step": 3413 }, { "epoch": 0.4659478640644193, "grad_norm": 0.23898380994796753, "learning_rate": 8e-05, "loss": 1.4957, "step": 3414 }, { "epoch": 0.4660843455711751, "grad_norm": 0.2167702615261078, "learning_rate": 8e-05, "loss": 1.5266, "step": 3415 }, { "epoch": 0.46622082707793094, "grad_norm": 0.22000667452812195, "learning_rate": 8e-05, "loss": 1.5579, "step": 3416 }, { "epoch": 0.46635730858468677, "grad_norm": 0.22867894172668457, "learning_rate": 8e-05, "loss": 1.4241, "step": 3417 }, { "epoch": 0.4664937900914426, "grad_norm": 0.20478494465351105, "learning_rate": 8e-05, "loss": 1.5135, "step": 3418 }, { "epoch": 0.4666302715981984, "grad_norm": 0.20832200348377228, "learning_rate": 8e-05, "loss": 1.4789, "step": 3419 }, { "epoch": 0.4667667531049543, "grad_norm": 0.23364369571208954, "learning_rate": 8e-05, "loss": 1.479, "step": 3420 }, { "epoch": 0.4669032346117101, "grad_norm": 0.20809462666511536, "learning_rate": 8e-05, "loss": 1.5149, "step": 3421 }, { "epoch": 0.46703971611846595, "grad_norm": 0.2016485035419464, "learning_rate": 8e-05, "loss": 1.5433, "step": 3422 }, { "epoch": 0.4671761976252218, "grad_norm": 0.2142065316438675, "learning_rate": 8e-05, "loss": 1.4763, "step": 3423 }, { "epoch": 0.4673126791319776, "grad_norm": 0.21977971494197845, "learning_rate": 8e-05, "loss": 1.4527, "step": 3424 }, { "epoch": 0.46744916063873343, "grad_norm": 0.21415258944034576, "learning_rate": 8e-05, "loss": 1.5198, "step": 3425 }, { "epoch": 0.4675856421454893, "grad_norm": 0.23099614679813385, "learning_rate": 8e-05, "loss": 1.4281, "step": 3426 }, { "epoch": 0.46772212365224514, "grad_norm": 0.2302975356578827, "learning_rate": 8e-05, "loss": 1.5825, "step": 3427 }, { "epoch": 0.46785860515900096, "grad_norm": 0.2032427042722702, "learning_rate": 8e-05, "loss": 1.423, "step": 3428 }, { "epoch": 0.4679950866657568, "grad_norm": 0.21455858647823334, "learning_rate": 8e-05, "loss": 1.4452, "step": 3429 }, { "epoch": 0.4681315681725126, "grad_norm": 0.22139950096607208, "learning_rate": 8e-05, "loss": 1.4743, "step": 3430 }, { "epoch": 0.46826804967926844, "grad_norm": 0.20664794743061066, "learning_rate": 8e-05, "loss": 1.5245, "step": 3431 }, { "epoch": 0.4684045311860243, "grad_norm": 0.21289144456386566, "learning_rate": 8e-05, "loss": 1.5559, "step": 3432 }, { "epoch": 0.46854101269278015, "grad_norm": 0.20248781144618988, "learning_rate": 8e-05, "loss": 1.5166, "step": 3433 }, { "epoch": 0.46867749419953597, "grad_norm": 0.20343711972236633, "learning_rate": 8e-05, "loss": 1.4952, "step": 3434 }, { "epoch": 0.4688139757062918, "grad_norm": 0.20081031322479248, "learning_rate": 8e-05, "loss": 1.4039, "step": 3435 }, { "epoch": 0.4689504572130476, "grad_norm": 0.2264920473098755, "learning_rate": 8e-05, "loss": 1.5469, "step": 3436 }, { "epoch": 0.46908693871980345, "grad_norm": 0.20996640622615814, "learning_rate": 8e-05, "loss": 1.5319, "step": 3437 }, { "epoch": 0.4692234202265593, "grad_norm": 0.2003016173839569, "learning_rate": 8e-05, "loss": 1.4972, "step": 3438 }, { "epoch": 0.46935990173331515, "grad_norm": 0.20201997458934784, "learning_rate": 8e-05, "loss": 1.4892, "step": 3439 }, { "epoch": 0.469496383240071, "grad_norm": 0.21266897022724152, "learning_rate": 8e-05, "loss": 1.5711, "step": 3440 }, { "epoch": 0.4696328647468268, "grad_norm": 0.20544934272766113, "learning_rate": 8e-05, "loss": 1.5186, "step": 3441 }, { "epoch": 0.46976934625358263, "grad_norm": 0.2060510665178299, "learning_rate": 8e-05, "loss": 1.4759, "step": 3442 }, { "epoch": 0.46990582776033846, "grad_norm": 0.20486944913864136, "learning_rate": 8e-05, "loss": 1.489, "step": 3443 }, { "epoch": 0.4700423092670943, "grad_norm": 0.20600253343582153, "learning_rate": 8e-05, "loss": 1.5459, "step": 3444 }, { "epoch": 0.47017879077385016, "grad_norm": 0.20423419773578644, "learning_rate": 8e-05, "loss": 1.4899, "step": 3445 }, { "epoch": 0.470315272280606, "grad_norm": 0.20719143748283386, "learning_rate": 8e-05, "loss": 1.4813, "step": 3446 }, { "epoch": 0.4704517537873618, "grad_norm": 0.20277182757854462, "learning_rate": 8e-05, "loss": 1.4333, "step": 3447 }, { "epoch": 0.47058823529411764, "grad_norm": 0.2142658680677414, "learning_rate": 8e-05, "loss": 1.5602, "step": 3448 }, { "epoch": 0.47072471680087347, "grad_norm": 0.20700852572917938, "learning_rate": 8e-05, "loss": 1.5091, "step": 3449 }, { "epoch": 0.4708611983076293, "grad_norm": 0.20282982289791107, "learning_rate": 8e-05, "loss": 1.4929, "step": 3450 }, { "epoch": 0.4709976798143852, "grad_norm": 0.21157848834991455, "learning_rate": 8e-05, "loss": 1.5719, "step": 3451 }, { "epoch": 0.471134161321141, "grad_norm": 0.20206362009048462, "learning_rate": 8e-05, "loss": 1.4907, "step": 3452 }, { "epoch": 0.4712706428278968, "grad_norm": 0.21035797894001007, "learning_rate": 8e-05, "loss": 1.5157, "step": 3453 }, { "epoch": 0.47140712433465265, "grad_norm": 0.2178732007741928, "learning_rate": 8e-05, "loss": 1.5156, "step": 3454 }, { "epoch": 0.4715436058414085, "grad_norm": 0.19992229342460632, "learning_rate": 8e-05, "loss": 1.4092, "step": 3455 }, { "epoch": 0.4716800873481643, "grad_norm": 0.20279943943023682, "learning_rate": 8e-05, "loss": 1.4586, "step": 3456 }, { "epoch": 0.4718165688549202, "grad_norm": 0.21036101877689362, "learning_rate": 8e-05, "loss": 1.5242, "step": 3457 }, { "epoch": 0.471953050361676, "grad_norm": 0.1957864910364151, "learning_rate": 8e-05, "loss": 1.4407, "step": 3458 }, { "epoch": 0.47208953186843183, "grad_norm": 0.2215675711631775, "learning_rate": 8e-05, "loss": 1.5938, "step": 3459 }, { "epoch": 0.47222601337518766, "grad_norm": 0.2071194052696228, "learning_rate": 8e-05, "loss": 1.4956, "step": 3460 }, { "epoch": 0.4723624948819435, "grad_norm": 0.20393699407577515, "learning_rate": 8e-05, "loss": 1.4817, "step": 3461 }, { "epoch": 0.4724989763886993, "grad_norm": 0.21055908501148224, "learning_rate": 8e-05, "loss": 1.5039, "step": 3462 }, { "epoch": 0.4726354578954552, "grad_norm": 0.20912297070026398, "learning_rate": 8e-05, "loss": 1.5247, "step": 3463 }, { "epoch": 0.472771939402211, "grad_norm": 0.2044554054737091, "learning_rate": 8e-05, "loss": 1.4924, "step": 3464 }, { "epoch": 0.47290842090896684, "grad_norm": 0.21440665423870087, "learning_rate": 8e-05, "loss": 1.474, "step": 3465 }, { "epoch": 0.47304490241572267, "grad_norm": 0.2028266042470932, "learning_rate": 8e-05, "loss": 1.5522, "step": 3466 }, { "epoch": 0.4731813839224785, "grad_norm": 0.20227700471878052, "learning_rate": 8e-05, "loss": 1.5004, "step": 3467 }, { "epoch": 0.4733178654292343, "grad_norm": 0.2044088989496231, "learning_rate": 8e-05, "loss": 1.5612, "step": 3468 }, { "epoch": 0.4734543469359902, "grad_norm": 0.21253597736358643, "learning_rate": 8e-05, "loss": 1.5493, "step": 3469 }, { "epoch": 0.473590828442746, "grad_norm": 0.21428847312927246, "learning_rate": 8e-05, "loss": 1.5311, "step": 3470 }, { "epoch": 0.47372730994950185, "grad_norm": 0.21106933057308197, "learning_rate": 8e-05, "loss": 1.5288, "step": 3471 }, { "epoch": 0.4738637914562577, "grad_norm": 0.20953063666820526, "learning_rate": 8e-05, "loss": 1.5253, "step": 3472 }, { "epoch": 0.4740002729630135, "grad_norm": 0.20550869405269623, "learning_rate": 8e-05, "loss": 1.5317, "step": 3473 }, { "epoch": 0.47413675446976933, "grad_norm": 0.20663829147815704, "learning_rate": 8e-05, "loss": 1.5143, "step": 3474 }, { "epoch": 0.47427323597652515, "grad_norm": 0.20458601415157318, "learning_rate": 8e-05, "loss": 1.5665, "step": 3475 }, { "epoch": 0.47440971748328103, "grad_norm": 0.21060355007648468, "learning_rate": 8e-05, "loss": 1.5437, "step": 3476 }, { "epoch": 0.47454619899003686, "grad_norm": 0.20915454626083374, "learning_rate": 8e-05, "loss": 1.431, "step": 3477 }, { "epoch": 0.4746826804967927, "grad_norm": 0.20820870995521545, "learning_rate": 8e-05, "loss": 1.515, "step": 3478 }, { "epoch": 0.4748191620035485, "grad_norm": 0.20602834224700928, "learning_rate": 8e-05, "loss": 1.5115, "step": 3479 }, { "epoch": 0.47495564351030434, "grad_norm": 0.20320291817188263, "learning_rate": 8e-05, "loss": 1.5297, "step": 3480 }, { "epoch": 0.47509212501706016, "grad_norm": 0.19657786190509796, "learning_rate": 8e-05, "loss": 1.4439, "step": 3481 }, { "epoch": 0.47522860652381604, "grad_norm": 0.2016376107931137, "learning_rate": 8e-05, "loss": 1.4443, "step": 3482 }, { "epoch": 0.47536508803057187, "grad_norm": 0.20283100008964539, "learning_rate": 8e-05, "loss": 1.5235, "step": 3483 }, { "epoch": 0.4755015695373277, "grad_norm": 0.20347493886947632, "learning_rate": 8e-05, "loss": 1.4969, "step": 3484 }, { "epoch": 0.4756380510440835, "grad_norm": 0.20947591960430145, "learning_rate": 8e-05, "loss": 1.52, "step": 3485 }, { "epoch": 0.47577453255083935, "grad_norm": 0.2025967836380005, "learning_rate": 8e-05, "loss": 1.5315, "step": 3486 }, { "epoch": 0.47591101405759517, "grad_norm": 0.20849086344242096, "learning_rate": 8e-05, "loss": 1.5033, "step": 3487 }, { "epoch": 0.47604749556435105, "grad_norm": 0.20108284056186676, "learning_rate": 8e-05, "loss": 1.5088, "step": 3488 }, { "epoch": 0.4761839770711069, "grad_norm": 0.1997194141149521, "learning_rate": 8e-05, "loss": 1.4483, "step": 3489 }, { "epoch": 0.4763204585778627, "grad_norm": 0.20717458426952362, "learning_rate": 8e-05, "loss": 1.5491, "step": 3490 }, { "epoch": 0.47645694008461853, "grad_norm": 0.198810875415802, "learning_rate": 8e-05, "loss": 1.504, "step": 3491 }, { "epoch": 0.47659342159137436, "grad_norm": 0.1989993304014206, "learning_rate": 8e-05, "loss": 1.4878, "step": 3492 }, { "epoch": 0.4767299030981302, "grad_norm": 0.19711193442344666, "learning_rate": 8e-05, "loss": 1.4602, "step": 3493 }, { "epoch": 0.47686638460488606, "grad_norm": 0.20564322173595428, "learning_rate": 8e-05, "loss": 1.5432, "step": 3494 }, { "epoch": 0.4770028661116419, "grad_norm": 0.2079828977584839, "learning_rate": 8e-05, "loss": 1.5008, "step": 3495 }, { "epoch": 0.4771393476183977, "grad_norm": 0.19164466857910156, "learning_rate": 8e-05, "loss": 1.3999, "step": 3496 }, { "epoch": 0.47727582912515354, "grad_norm": 0.2024288922548294, "learning_rate": 8e-05, "loss": 1.4604, "step": 3497 }, { "epoch": 0.47741231063190936, "grad_norm": 0.19840653240680695, "learning_rate": 8e-05, "loss": 1.4869, "step": 3498 }, { "epoch": 0.4775487921386652, "grad_norm": 0.20264117419719696, "learning_rate": 8e-05, "loss": 1.4893, "step": 3499 }, { "epoch": 0.47768527364542107, "grad_norm": 0.2120170295238495, "learning_rate": 8e-05, "loss": 1.6318, "step": 3500 }, { "epoch": 0.4778217551521769, "grad_norm": 0.2042725682258606, "learning_rate": 8e-05, "loss": 1.4688, "step": 3501 }, { "epoch": 0.4779582366589327, "grad_norm": 0.20023949444293976, "learning_rate": 8e-05, "loss": 1.4961, "step": 3502 }, { "epoch": 0.47809471816568855, "grad_norm": 0.2013224959373474, "learning_rate": 8e-05, "loss": 1.5127, "step": 3503 }, { "epoch": 0.4782311996724444, "grad_norm": 0.202875018119812, "learning_rate": 8e-05, "loss": 1.4423, "step": 3504 }, { "epoch": 0.4783676811792002, "grad_norm": 0.2079051434993744, "learning_rate": 8e-05, "loss": 1.527, "step": 3505 }, { "epoch": 0.4785041626859561, "grad_norm": 0.21132634580135345, "learning_rate": 8e-05, "loss": 1.526, "step": 3506 }, { "epoch": 0.4786406441927119, "grad_norm": 0.20469678938388824, "learning_rate": 8e-05, "loss": 1.4491, "step": 3507 }, { "epoch": 0.47877712569946773, "grad_norm": 0.20625104010105133, "learning_rate": 8e-05, "loss": 1.5204, "step": 3508 }, { "epoch": 0.47891360720622356, "grad_norm": 0.1978519707918167, "learning_rate": 8e-05, "loss": 1.4552, "step": 3509 }, { "epoch": 0.4790500887129794, "grad_norm": 0.20960263907909393, "learning_rate": 8e-05, "loss": 1.5739, "step": 3510 }, { "epoch": 0.4791865702197352, "grad_norm": 0.19807633757591248, "learning_rate": 8e-05, "loss": 1.5175, "step": 3511 }, { "epoch": 0.47932305172649103, "grad_norm": 0.20979318022727966, "learning_rate": 8e-05, "loss": 1.5485, "step": 3512 }, { "epoch": 0.4794595332332469, "grad_norm": 0.20573720335960388, "learning_rate": 8e-05, "loss": 1.5056, "step": 3513 }, { "epoch": 0.47959601474000274, "grad_norm": 0.19832788407802582, "learning_rate": 8e-05, "loss": 1.4183, "step": 3514 }, { "epoch": 0.47973249624675857, "grad_norm": 0.20368269085884094, "learning_rate": 8e-05, "loss": 1.5001, "step": 3515 }, { "epoch": 0.4798689777535144, "grad_norm": 0.20572879910469055, "learning_rate": 8e-05, "loss": 1.5071, "step": 3516 }, { "epoch": 0.4800054592602702, "grad_norm": 0.2067234367132187, "learning_rate": 8e-05, "loss": 1.5398, "step": 3517 }, { "epoch": 0.48014194076702604, "grad_norm": 0.21909917891025543, "learning_rate": 8e-05, "loss": 1.5965, "step": 3518 }, { "epoch": 0.4802784222737819, "grad_norm": 0.2101248800754547, "learning_rate": 8e-05, "loss": 1.5358, "step": 3519 }, { "epoch": 0.48041490378053775, "grad_norm": 0.2082640677690506, "learning_rate": 8e-05, "loss": 1.5453, "step": 3520 }, { "epoch": 0.4805513852872936, "grad_norm": 0.20257088541984558, "learning_rate": 8e-05, "loss": 1.5267, "step": 3521 }, { "epoch": 0.4806878667940494, "grad_norm": 0.20171722769737244, "learning_rate": 8e-05, "loss": 1.4967, "step": 3522 }, { "epoch": 0.4808243483008052, "grad_norm": 0.20478981733322144, "learning_rate": 8e-05, "loss": 1.4735, "step": 3523 }, { "epoch": 0.48096082980756105, "grad_norm": 0.21053843200206757, "learning_rate": 8e-05, "loss": 1.537, "step": 3524 }, { "epoch": 0.48109731131431693, "grad_norm": 0.1977248340845108, "learning_rate": 8e-05, "loss": 1.4505, "step": 3525 }, { "epoch": 0.48123379282107276, "grad_norm": 0.20767652988433838, "learning_rate": 8e-05, "loss": 1.5107, "step": 3526 }, { "epoch": 0.4813702743278286, "grad_norm": 0.21315601468086243, "learning_rate": 8e-05, "loss": 1.4999, "step": 3527 }, { "epoch": 0.4815067558345844, "grad_norm": 0.1964656114578247, "learning_rate": 8e-05, "loss": 1.5209, "step": 3528 }, { "epoch": 0.48164323734134024, "grad_norm": 0.217056006193161, "learning_rate": 8e-05, "loss": 1.536, "step": 3529 }, { "epoch": 0.48177971884809606, "grad_norm": 0.20948797464370728, "learning_rate": 8e-05, "loss": 1.5437, "step": 3530 }, { "epoch": 0.48191620035485194, "grad_norm": 0.20981857180595398, "learning_rate": 8e-05, "loss": 1.4925, "step": 3531 }, { "epoch": 0.48205268186160777, "grad_norm": 0.21023494005203247, "learning_rate": 8e-05, "loss": 1.5073, "step": 3532 }, { "epoch": 0.4821891633683636, "grad_norm": 0.21750009059906006, "learning_rate": 8e-05, "loss": 1.5158, "step": 3533 }, { "epoch": 0.4823256448751194, "grad_norm": 0.21467478573322296, "learning_rate": 8e-05, "loss": 1.474, "step": 3534 }, { "epoch": 0.48246212638187524, "grad_norm": 0.21620170772075653, "learning_rate": 8e-05, "loss": 1.4308, "step": 3535 }, { "epoch": 0.48259860788863107, "grad_norm": 0.21362650394439697, "learning_rate": 8e-05, "loss": 1.4961, "step": 3536 }, { "epoch": 0.48273508939538695, "grad_norm": 0.19977182149887085, "learning_rate": 8e-05, "loss": 1.3927, "step": 3537 }, { "epoch": 0.4828715709021428, "grad_norm": 0.21269384026527405, "learning_rate": 8e-05, "loss": 1.5823, "step": 3538 }, { "epoch": 0.4830080524088986, "grad_norm": 0.22291666269302368, "learning_rate": 8e-05, "loss": 1.4763, "step": 3539 }, { "epoch": 0.48314453391565443, "grad_norm": 0.20446087419986725, "learning_rate": 8e-05, "loss": 1.5659, "step": 3540 }, { "epoch": 0.48328101542241025, "grad_norm": 0.21411104500293732, "learning_rate": 8e-05, "loss": 1.5258, "step": 3541 }, { "epoch": 0.4834174969291661, "grad_norm": 0.20731037855148315, "learning_rate": 8e-05, "loss": 1.4386, "step": 3542 }, { "epoch": 0.4835539784359219, "grad_norm": 0.20868165791034698, "learning_rate": 8e-05, "loss": 1.5401, "step": 3543 }, { "epoch": 0.4836904599426778, "grad_norm": 0.21335147321224213, "learning_rate": 8e-05, "loss": 1.5942, "step": 3544 }, { "epoch": 0.4838269414494336, "grad_norm": 0.21525166928768158, "learning_rate": 8e-05, "loss": 1.4952, "step": 3545 }, { "epoch": 0.48396342295618944, "grad_norm": 0.21303965151309967, "learning_rate": 8e-05, "loss": 1.5396, "step": 3546 }, { "epoch": 0.48409990446294526, "grad_norm": 0.19725294411182404, "learning_rate": 8e-05, "loss": 1.4294, "step": 3547 }, { "epoch": 0.4842363859697011, "grad_norm": 0.20208528637886047, "learning_rate": 8e-05, "loss": 1.4196, "step": 3548 }, { "epoch": 0.4843728674764569, "grad_norm": 0.20230498909950256, "learning_rate": 8e-05, "loss": 1.4518, "step": 3549 }, { "epoch": 0.4845093489832128, "grad_norm": 0.20581379532814026, "learning_rate": 8e-05, "loss": 1.5385, "step": 3550 }, { "epoch": 0.4846458304899686, "grad_norm": 0.2000308632850647, "learning_rate": 8e-05, "loss": 1.5019, "step": 3551 }, { "epoch": 0.48478231199672445, "grad_norm": 0.20568643510341644, "learning_rate": 8e-05, "loss": 1.4417, "step": 3552 }, { "epoch": 0.48491879350348027, "grad_norm": 0.20863598585128784, "learning_rate": 8e-05, "loss": 1.4864, "step": 3553 }, { "epoch": 0.4850552750102361, "grad_norm": 0.208083838224411, "learning_rate": 8e-05, "loss": 1.4835, "step": 3554 }, { "epoch": 0.4851917565169919, "grad_norm": 0.20798850059509277, "learning_rate": 8e-05, "loss": 1.5361, "step": 3555 }, { "epoch": 0.4853282380237478, "grad_norm": 0.21109066903591156, "learning_rate": 8e-05, "loss": 1.4942, "step": 3556 }, { "epoch": 0.48546471953050363, "grad_norm": 0.2202380746603012, "learning_rate": 8e-05, "loss": 1.5101, "step": 3557 }, { "epoch": 0.48560120103725946, "grad_norm": 0.20535895228385925, "learning_rate": 8e-05, "loss": 1.5163, "step": 3558 }, { "epoch": 0.4857376825440153, "grad_norm": 0.2063886970281601, "learning_rate": 8e-05, "loss": 1.4956, "step": 3559 }, { "epoch": 0.4858741640507711, "grad_norm": 0.21430860459804535, "learning_rate": 8e-05, "loss": 1.5613, "step": 3560 }, { "epoch": 0.48601064555752693, "grad_norm": 0.20108100771903992, "learning_rate": 8e-05, "loss": 1.4646, "step": 3561 }, { "epoch": 0.4861471270642828, "grad_norm": 0.2038641721010208, "learning_rate": 8e-05, "loss": 1.5428, "step": 3562 }, { "epoch": 0.48628360857103864, "grad_norm": 0.21385547518730164, "learning_rate": 8e-05, "loss": 1.4901, "step": 3563 }, { "epoch": 0.48642009007779446, "grad_norm": 0.20411108434200287, "learning_rate": 8e-05, "loss": 1.5033, "step": 3564 }, { "epoch": 0.4865565715845503, "grad_norm": 0.20390279591083527, "learning_rate": 8e-05, "loss": 1.5523, "step": 3565 }, { "epoch": 0.4866930530913061, "grad_norm": 0.20540545880794525, "learning_rate": 8e-05, "loss": 1.5132, "step": 3566 }, { "epoch": 0.48682953459806194, "grad_norm": 0.20313748717308044, "learning_rate": 8e-05, "loss": 1.4881, "step": 3567 }, { "epoch": 0.4869660161048178, "grad_norm": 0.2098429799079895, "learning_rate": 8e-05, "loss": 1.5084, "step": 3568 }, { "epoch": 0.48710249761157365, "grad_norm": 0.20528361201286316, "learning_rate": 8e-05, "loss": 1.5273, "step": 3569 }, { "epoch": 0.4872389791183295, "grad_norm": 0.2076866179704666, "learning_rate": 8e-05, "loss": 1.551, "step": 3570 }, { "epoch": 0.4873754606250853, "grad_norm": 0.20142139494419098, "learning_rate": 8e-05, "loss": 1.5273, "step": 3571 }, { "epoch": 0.4875119421318411, "grad_norm": 0.2126702517271042, "learning_rate": 8e-05, "loss": 1.5234, "step": 3572 }, { "epoch": 0.48764842363859695, "grad_norm": 0.19976341724395752, "learning_rate": 8e-05, "loss": 1.4567, "step": 3573 }, { "epoch": 0.48778490514535283, "grad_norm": 0.20824775099754333, "learning_rate": 8e-05, "loss": 1.5165, "step": 3574 }, { "epoch": 0.48792138665210866, "grad_norm": 0.21170705556869507, "learning_rate": 8e-05, "loss": 1.4975, "step": 3575 }, { "epoch": 0.4880578681588645, "grad_norm": 0.20064543187618256, "learning_rate": 8e-05, "loss": 1.4591, "step": 3576 }, { "epoch": 0.4881943496656203, "grad_norm": 0.2066013514995575, "learning_rate": 8e-05, "loss": 1.5006, "step": 3577 }, { "epoch": 0.48833083117237613, "grad_norm": 0.20975522696971893, "learning_rate": 8e-05, "loss": 1.4598, "step": 3578 }, { "epoch": 0.48846731267913196, "grad_norm": 0.20584852993488312, "learning_rate": 8e-05, "loss": 1.5, "step": 3579 }, { "epoch": 0.4886037941858878, "grad_norm": 0.20562535524368286, "learning_rate": 8e-05, "loss": 1.5177, "step": 3580 }, { "epoch": 0.48874027569264367, "grad_norm": 0.221099853515625, "learning_rate": 8e-05, "loss": 1.5759, "step": 3581 }, { "epoch": 0.4888767571993995, "grad_norm": 0.2097145915031433, "learning_rate": 8e-05, "loss": 1.545, "step": 3582 }, { "epoch": 0.4890132387061553, "grad_norm": 0.2036169171333313, "learning_rate": 8e-05, "loss": 1.5385, "step": 3583 }, { "epoch": 0.48914972021291114, "grad_norm": 0.21238572895526886, "learning_rate": 8e-05, "loss": 1.4617, "step": 3584 }, { "epoch": 0.48928620171966697, "grad_norm": 0.2007947415113449, "learning_rate": 8e-05, "loss": 1.4473, "step": 3585 }, { "epoch": 0.4894226832264228, "grad_norm": 0.21158722043037415, "learning_rate": 8e-05, "loss": 1.4926, "step": 3586 }, { "epoch": 0.4895591647331787, "grad_norm": 0.21101102232933044, "learning_rate": 8e-05, "loss": 1.4622, "step": 3587 }, { "epoch": 0.4896956462399345, "grad_norm": 0.21405421197414398, "learning_rate": 8e-05, "loss": 1.5565, "step": 3588 }, { "epoch": 0.4898321277466903, "grad_norm": 0.21508747339248657, "learning_rate": 8e-05, "loss": 1.4814, "step": 3589 }, { "epoch": 0.48996860925344615, "grad_norm": 0.216883584856987, "learning_rate": 8e-05, "loss": 1.5504, "step": 3590 }, { "epoch": 0.490105090760202, "grad_norm": 0.20675033330917358, "learning_rate": 8e-05, "loss": 1.4823, "step": 3591 }, { "epoch": 0.4902415722669578, "grad_norm": 0.21256797015666962, "learning_rate": 8e-05, "loss": 1.455, "step": 3592 }, { "epoch": 0.4903780537737137, "grad_norm": 0.21640059351921082, "learning_rate": 8e-05, "loss": 1.5673, "step": 3593 }, { "epoch": 0.4905145352804695, "grad_norm": 0.21707874536514282, "learning_rate": 8e-05, "loss": 1.5362, "step": 3594 }, { "epoch": 0.49065101678722534, "grad_norm": 0.20597729086875916, "learning_rate": 8e-05, "loss": 1.4092, "step": 3595 }, { "epoch": 0.49078749829398116, "grad_norm": 0.20640929043293, "learning_rate": 8e-05, "loss": 1.5204, "step": 3596 }, { "epoch": 0.490923979800737, "grad_norm": 0.20196007192134857, "learning_rate": 8e-05, "loss": 1.4744, "step": 3597 }, { "epoch": 0.4910604613074928, "grad_norm": 0.21097774803638458, "learning_rate": 8e-05, "loss": 1.5503, "step": 3598 }, { "epoch": 0.4911969428142487, "grad_norm": 0.2047613561153412, "learning_rate": 8e-05, "loss": 1.5244, "step": 3599 }, { "epoch": 0.4913334243210045, "grad_norm": 0.20047776401042938, "learning_rate": 8e-05, "loss": 1.495, "step": 3600 }, { "epoch": 0.49146990582776034, "grad_norm": 0.20288142561912537, "learning_rate": 8e-05, "loss": 1.4614, "step": 3601 }, { "epoch": 0.49160638733451617, "grad_norm": 0.20047034323215485, "learning_rate": 8e-05, "loss": 1.5124, "step": 3602 }, { "epoch": 0.491742868841272, "grad_norm": 0.20609702169895172, "learning_rate": 8e-05, "loss": 1.5111, "step": 3603 }, { "epoch": 0.4918793503480278, "grad_norm": 0.20599691569805145, "learning_rate": 8e-05, "loss": 1.4717, "step": 3604 }, { "epoch": 0.4920158318547837, "grad_norm": 0.21311761438846588, "learning_rate": 8e-05, "loss": 1.5359, "step": 3605 }, { "epoch": 0.49215231336153953, "grad_norm": 0.20836657285690308, "learning_rate": 8e-05, "loss": 1.4744, "step": 3606 }, { "epoch": 0.49228879486829535, "grad_norm": 0.20738425850868225, "learning_rate": 8e-05, "loss": 1.5274, "step": 3607 }, { "epoch": 0.4924252763750512, "grad_norm": 0.20239634811878204, "learning_rate": 8e-05, "loss": 1.4545, "step": 3608 }, { "epoch": 0.492561757881807, "grad_norm": 0.20028546452522278, "learning_rate": 8e-05, "loss": 1.5029, "step": 3609 }, { "epoch": 0.49269823938856283, "grad_norm": 0.20458798110485077, "learning_rate": 8e-05, "loss": 1.4169, "step": 3610 }, { "epoch": 0.4928347208953187, "grad_norm": 0.21394610404968262, "learning_rate": 8e-05, "loss": 1.5116, "step": 3611 }, { "epoch": 0.49297120240207454, "grad_norm": 0.2124284952878952, "learning_rate": 8e-05, "loss": 1.5507, "step": 3612 }, { "epoch": 0.49310768390883036, "grad_norm": 0.20730768144130707, "learning_rate": 8e-05, "loss": 1.5256, "step": 3613 }, { "epoch": 0.4932441654155862, "grad_norm": 0.2099892944097519, "learning_rate": 8e-05, "loss": 1.5709, "step": 3614 }, { "epoch": 0.493380646922342, "grad_norm": 0.21305988729000092, "learning_rate": 8e-05, "loss": 1.5925, "step": 3615 }, { "epoch": 0.49351712842909784, "grad_norm": 0.21803003549575806, "learning_rate": 8e-05, "loss": 1.4666, "step": 3616 }, { "epoch": 0.49365360993585367, "grad_norm": 0.2166893631219864, "learning_rate": 8e-05, "loss": 1.5827, "step": 3617 }, { "epoch": 0.49379009144260955, "grad_norm": 0.20721480250358582, "learning_rate": 8e-05, "loss": 1.5001, "step": 3618 }, { "epoch": 0.4939265729493654, "grad_norm": 0.2038077861070633, "learning_rate": 8e-05, "loss": 1.4718, "step": 3619 }, { "epoch": 0.4940630544561212, "grad_norm": 0.20818711817264557, "learning_rate": 8e-05, "loss": 1.5357, "step": 3620 }, { "epoch": 0.494199535962877, "grad_norm": 0.20612770318984985, "learning_rate": 8e-05, "loss": 1.5785, "step": 3621 }, { "epoch": 0.49433601746963285, "grad_norm": 0.20260211825370789, "learning_rate": 8e-05, "loss": 1.4893, "step": 3622 }, { "epoch": 0.4944724989763887, "grad_norm": 0.20106421411037445, "learning_rate": 8e-05, "loss": 1.4803, "step": 3623 }, { "epoch": 0.49460898048314456, "grad_norm": 0.2095767706632614, "learning_rate": 8e-05, "loss": 1.5253, "step": 3624 }, { "epoch": 0.4947454619899004, "grad_norm": 0.20170405507087708, "learning_rate": 8e-05, "loss": 1.4637, "step": 3625 }, { "epoch": 0.4948819434966562, "grad_norm": 0.2099498212337494, "learning_rate": 8e-05, "loss": 1.5176, "step": 3626 }, { "epoch": 0.49501842500341203, "grad_norm": 0.20306894183158875, "learning_rate": 8e-05, "loss": 1.5288, "step": 3627 }, { "epoch": 0.49515490651016786, "grad_norm": 0.21352609992027283, "learning_rate": 8e-05, "loss": 1.4947, "step": 3628 }, { "epoch": 0.4952913880169237, "grad_norm": 0.20478996634483337, "learning_rate": 8e-05, "loss": 1.4511, "step": 3629 }, { "epoch": 0.49542786952367956, "grad_norm": 0.21442031860351562, "learning_rate": 8e-05, "loss": 1.5711, "step": 3630 }, { "epoch": 0.4955643510304354, "grad_norm": 0.21367311477661133, "learning_rate": 8e-05, "loss": 1.5754, "step": 3631 }, { "epoch": 0.4957008325371912, "grad_norm": 0.20358863472938538, "learning_rate": 8e-05, "loss": 1.5117, "step": 3632 }, { "epoch": 0.49583731404394704, "grad_norm": 0.21455514430999756, "learning_rate": 8e-05, "loss": 1.4692, "step": 3633 }, { "epoch": 0.49597379555070287, "grad_norm": 0.20517876744270325, "learning_rate": 8e-05, "loss": 1.4596, "step": 3634 }, { "epoch": 0.4961102770574587, "grad_norm": 0.20770393311977386, "learning_rate": 8e-05, "loss": 1.4801, "step": 3635 }, { "epoch": 0.4962467585642146, "grad_norm": 0.21083983778953552, "learning_rate": 8e-05, "loss": 1.4478, "step": 3636 }, { "epoch": 0.4963832400709704, "grad_norm": 0.20676255226135254, "learning_rate": 8e-05, "loss": 1.4813, "step": 3637 }, { "epoch": 0.4965197215777262, "grad_norm": 0.20775020122528076, "learning_rate": 8e-05, "loss": 1.4894, "step": 3638 }, { "epoch": 0.49665620308448205, "grad_norm": 0.20101529359817505, "learning_rate": 8e-05, "loss": 1.4659, "step": 3639 }, { "epoch": 0.4967926845912379, "grad_norm": 0.20888584852218628, "learning_rate": 8e-05, "loss": 1.4227, "step": 3640 }, { "epoch": 0.4969291660979937, "grad_norm": 0.20747052133083344, "learning_rate": 8e-05, "loss": 1.484, "step": 3641 }, { "epoch": 0.4970656476047496, "grad_norm": 0.19870388507843018, "learning_rate": 8e-05, "loss": 1.4293, "step": 3642 }, { "epoch": 0.4972021291115054, "grad_norm": 0.2139759659767151, "learning_rate": 8e-05, "loss": 1.5168, "step": 3643 }, { "epoch": 0.49733861061826123, "grad_norm": 0.20687726140022278, "learning_rate": 8e-05, "loss": 1.4944, "step": 3644 }, { "epoch": 0.49747509212501706, "grad_norm": 0.20504942536354065, "learning_rate": 8e-05, "loss": 1.4031, "step": 3645 }, { "epoch": 0.4976115736317729, "grad_norm": 0.2088628113269806, "learning_rate": 8e-05, "loss": 1.5369, "step": 3646 }, { "epoch": 0.4977480551385287, "grad_norm": 0.20516029000282288, "learning_rate": 8e-05, "loss": 1.4965, "step": 3647 }, { "epoch": 0.49788453664528454, "grad_norm": 0.20718146860599518, "learning_rate": 8e-05, "loss": 1.498, "step": 3648 }, { "epoch": 0.4980210181520404, "grad_norm": 0.20014266669750214, "learning_rate": 8e-05, "loss": 1.4571, "step": 3649 }, { "epoch": 0.49815749965879624, "grad_norm": 0.20117169618606567, "learning_rate": 8e-05, "loss": 1.4921, "step": 3650 }, { "epoch": 0.49829398116555207, "grad_norm": 0.21267274022102356, "learning_rate": 8e-05, "loss": 1.5599, "step": 3651 }, { "epoch": 0.4984304626723079, "grad_norm": 0.20519742369651794, "learning_rate": 8e-05, "loss": 1.5216, "step": 3652 }, { "epoch": 0.4985669441790637, "grad_norm": 0.20292620360851288, "learning_rate": 8e-05, "loss": 1.4599, "step": 3653 }, { "epoch": 0.49870342568581955, "grad_norm": 0.2203458547592163, "learning_rate": 8e-05, "loss": 1.5247, "step": 3654 }, { "epoch": 0.4988399071925754, "grad_norm": 0.2080739289522171, "learning_rate": 8e-05, "loss": 1.5376, "step": 3655 }, { "epoch": 0.49897638869933125, "grad_norm": 0.21375824511051178, "learning_rate": 8e-05, "loss": 1.5589, "step": 3656 }, { "epoch": 0.4991128702060871, "grad_norm": 0.20661212503910065, "learning_rate": 8e-05, "loss": 1.5196, "step": 3657 }, { "epoch": 0.4992493517128429, "grad_norm": 0.1998106688261032, "learning_rate": 8e-05, "loss": 1.5317, "step": 3658 }, { "epoch": 0.49938583321959873, "grad_norm": 0.20285949110984802, "learning_rate": 8e-05, "loss": 1.5179, "step": 3659 }, { "epoch": 0.49952231472635455, "grad_norm": 0.20221386849880219, "learning_rate": 8e-05, "loss": 1.5154, "step": 3660 }, { "epoch": 0.49965879623311044, "grad_norm": 0.19674454629421234, "learning_rate": 8e-05, "loss": 1.4935, "step": 3661 }, { "epoch": 0.49979527773986626, "grad_norm": 0.20864376425743103, "learning_rate": 8e-05, "loss": 1.5716, "step": 3662 }, { "epoch": 0.4999317592466221, "grad_norm": 0.20189069211483002, "learning_rate": 8e-05, "loss": 1.5073, "step": 3663 }, { "epoch": 0.500068240753378, "grad_norm": 0.20689326524734497, "learning_rate": 8e-05, "loss": 1.4801, "step": 3664 }, { "epoch": 0.5002047222601338, "grad_norm": 0.21195192635059357, "learning_rate": 8e-05, "loss": 1.5078, "step": 3665 }, { "epoch": 0.5003412037668896, "grad_norm": 0.20739254355430603, "learning_rate": 8e-05, "loss": 1.4884, "step": 3666 }, { "epoch": 0.5004776852736454, "grad_norm": 0.21032270789146423, "learning_rate": 8e-05, "loss": 1.525, "step": 3667 }, { "epoch": 0.5006141667804013, "grad_norm": 0.2124892622232437, "learning_rate": 8e-05, "loss": 1.5486, "step": 3668 }, { "epoch": 0.5007506482871571, "grad_norm": 0.21108348667621613, "learning_rate": 8e-05, "loss": 1.5302, "step": 3669 }, { "epoch": 0.5008871297939129, "grad_norm": 0.2078608274459839, "learning_rate": 8e-05, "loss": 1.4826, "step": 3670 }, { "epoch": 0.5010236113006687, "grad_norm": 0.20696720480918884, "learning_rate": 8e-05, "loss": 1.4785, "step": 3671 }, { "epoch": 0.5011600928074246, "grad_norm": 0.22207775712013245, "learning_rate": 8e-05, "loss": 1.4824, "step": 3672 }, { "epoch": 0.5012965743141804, "grad_norm": 0.20768852531909943, "learning_rate": 8e-05, "loss": 1.5411, "step": 3673 }, { "epoch": 0.5014330558209362, "grad_norm": 0.20559285581111908, "learning_rate": 8e-05, "loss": 1.5003, "step": 3674 }, { "epoch": 0.501569537327692, "grad_norm": 0.21189770102500916, "learning_rate": 8e-05, "loss": 1.5065, "step": 3675 }, { "epoch": 0.501706018834448, "grad_norm": 0.21071873605251312, "learning_rate": 8e-05, "loss": 1.5253, "step": 3676 }, { "epoch": 0.5018425003412038, "grad_norm": 0.19959363341331482, "learning_rate": 8e-05, "loss": 1.4835, "step": 3677 }, { "epoch": 0.5019789818479596, "grad_norm": 0.21146203577518463, "learning_rate": 8e-05, "loss": 1.5471, "step": 3678 }, { "epoch": 0.5021154633547155, "grad_norm": 0.21805863082408905, "learning_rate": 8e-05, "loss": 1.4555, "step": 3679 }, { "epoch": 0.5022519448614713, "grad_norm": 0.21758250892162323, "learning_rate": 8e-05, "loss": 1.5638, "step": 3680 }, { "epoch": 0.5023884263682271, "grad_norm": 0.21342754364013672, "learning_rate": 8e-05, "loss": 1.4852, "step": 3681 }, { "epoch": 0.5025249078749829, "grad_norm": 0.20522929728031158, "learning_rate": 8e-05, "loss": 1.5125, "step": 3682 }, { "epoch": 0.5026613893817388, "grad_norm": 0.19855450093746185, "learning_rate": 8e-05, "loss": 1.444, "step": 3683 }, { "epoch": 0.5027978708884946, "grad_norm": 0.2293214350938797, "learning_rate": 8e-05, "loss": 1.5841, "step": 3684 }, { "epoch": 0.5029343523952504, "grad_norm": 0.21070286631584167, "learning_rate": 8e-05, "loss": 1.5231, "step": 3685 }, { "epoch": 0.5030708339020062, "grad_norm": 0.19877244532108307, "learning_rate": 8e-05, "loss": 1.462, "step": 3686 }, { "epoch": 0.5032073154087621, "grad_norm": 0.202780619263649, "learning_rate": 8e-05, "loss": 1.5117, "step": 3687 }, { "epoch": 0.5033437969155179, "grad_norm": 0.21033748984336853, "learning_rate": 8e-05, "loss": 1.4226, "step": 3688 }, { "epoch": 0.5034802784222738, "grad_norm": 0.20581790804862976, "learning_rate": 8e-05, "loss": 1.525, "step": 3689 }, { "epoch": 0.5036167599290297, "grad_norm": 0.21243390440940857, "learning_rate": 8e-05, "loss": 1.5117, "step": 3690 }, { "epoch": 0.5037532414357855, "grad_norm": 0.2094869315624237, "learning_rate": 8e-05, "loss": 1.4767, "step": 3691 }, { "epoch": 0.5038897229425413, "grad_norm": 0.20795494318008423, "learning_rate": 8e-05, "loss": 1.5501, "step": 3692 }, { "epoch": 0.5040262044492971, "grad_norm": 0.2130076289176941, "learning_rate": 8e-05, "loss": 1.4421, "step": 3693 }, { "epoch": 0.504162685956053, "grad_norm": 0.21163424849510193, "learning_rate": 8e-05, "loss": 1.5637, "step": 3694 }, { "epoch": 0.5042991674628088, "grad_norm": 0.20242011547088623, "learning_rate": 8e-05, "loss": 1.507, "step": 3695 }, { "epoch": 0.5044356489695646, "grad_norm": 0.20719332993030548, "learning_rate": 8e-05, "loss": 1.4907, "step": 3696 }, { "epoch": 0.5045721304763204, "grad_norm": 0.213754802942276, "learning_rate": 8e-05, "loss": 1.4698, "step": 3697 }, { "epoch": 0.5047086119830763, "grad_norm": 0.2040412873029709, "learning_rate": 8e-05, "loss": 1.4053, "step": 3698 }, { "epoch": 0.5048450934898321, "grad_norm": 0.2151033878326416, "learning_rate": 8e-05, "loss": 1.516, "step": 3699 }, { "epoch": 0.5049815749965879, "grad_norm": 0.21601992845535278, "learning_rate": 8e-05, "loss": 1.4929, "step": 3700 }, { "epoch": 0.5051180565033438, "grad_norm": 0.2160337269306183, "learning_rate": 8e-05, "loss": 1.491, "step": 3701 }, { "epoch": 0.5052545380100997, "grad_norm": 0.21680496633052826, "learning_rate": 8e-05, "loss": 1.5039, "step": 3702 }, { "epoch": 0.5053910195168555, "grad_norm": 0.20526234805583954, "learning_rate": 8e-05, "loss": 1.4607, "step": 3703 }, { "epoch": 0.5055275010236113, "grad_norm": 0.2081543207168579, "learning_rate": 8e-05, "loss": 1.484, "step": 3704 }, { "epoch": 0.5056639825303672, "grad_norm": 0.2106073647737503, "learning_rate": 8e-05, "loss": 1.5081, "step": 3705 }, { "epoch": 0.505800464037123, "grad_norm": 0.21469458937644958, "learning_rate": 8e-05, "loss": 1.5262, "step": 3706 }, { "epoch": 0.5059369455438788, "grad_norm": 0.20671242475509644, "learning_rate": 8e-05, "loss": 1.4682, "step": 3707 }, { "epoch": 0.5060734270506346, "grad_norm": 0.20423923432826996, "learning_rate": 8e-05, "loss": 1.51, "step": 3708 }, { "epoch": 0.5062099085573905, "grad_norm": 0.21102315187454224, "learning_rate": 8e-05, "loss": 1.5235, "step": 3709 }, { "epoch": 0.5063463900641463, "grad_norm": 0.2006913423538208, "learning_rate": 8e-05, "loss": 1.4962, "step": 3710 }, { "epoch": 0.5064828715709021, "grad_norm": 0.2073921263217926, "learning_rate": 8e-05, "loss": 1.4356, "step": 3711 }, { "epoch": 0.5066193530776579, "grad_norm": 0.21121962368488312, "learning_rate": 8e-05, "loss": 1.5992, "step": 3712 }, { "epoch": 0.5067558345844139, "grad_norm": 0.20361319184303284, "learning_rate": 8e-05, "loss": 1.5119, "step": 3713 }, { "epoch": 0.5068923160911697, "grad_norm": 0.20557788014411926, "learning_rate": 8e-05, "loss": 1.5408, "step": 3714 }, { "epoch": 0.5070287975979255, "grad_norm": 0.20878492295742035, "learning_rate": 8e-05, "loss": 1.523, "step": 3715 }, { "epoch": 0.5071652791046813, "grad_norm": 0.20175319910049438, "learning_rate": 8e-05, "loss": 1.5275, "step": 3716 }, { "epoch": 0.5073017606114372, "grad_norm": 0.2210397571325302, "learning_rate": 8e-05, "loss": 1.4934, "step": 3717 }, { "epoch": 0.507438242118193, "grad_norm": 0.20550814270973206, "learning_rate": 8e-05, "loss": 1.4851, "step": 3718 }, { "epoch": 0.5075747236249488, "grad_norm": 0.20879559218883514, "learning_rate": 8e-05, "loss": 1.5177, "step": 3719 }, { "epoch": 0.5077112051317046, "grad_norm": 0.2052595466375351, "learning_rate": 8e-05, "loss": 1.4464, "step": 3720 }, { "epoch": 0.5078476866384605, "grad_norm": 0.21076737344264984, "learning_rate": 8e-05, "loss": 1.5255, "step": 3721 }, { "epoch": 0.5079841681452163, "grad_norm": 0.20953722298145294, "learning_rate": 8e-05, "loss": 1.5138, "step": 3722 }, { "epoch": 0.5081206496519721, "grad_norm": 0.20284907519817352, "learning_rate": 8e-05, "loss": 1.46, "step": 3723 }, { "epoch": 0.508257131158728, "grad_norm": 0.21095138788223267, "learning_rate": 8e-05, "loss": 1.4421, "step": 3724 }, { "epoch": 0.5083936126654838, "grad_norm": 0.20719744265079498, "learning_rate": 8e-05, "loss": 1.5167, "step": 3725 }, { "epoch": 0.5085300941722397, "grad_norm": 0.21564018726348877, "learning_rate": 8e-05, "loss": 1.4899, "step": 3726 }, { "epoch": 0.5086665756789955, "grad_norm": 0.20956425368785858, "learning_rate": 8e-05, "loss": 1.4464, "step": 3727 }, { "epoch": 0.5088030571857514, "grad_norm": 0.21330702304840088, "learning_rate": 8e-05, "loss": 1.474, "step": 3728 }, { "epoch": 0.5089395386925072, "grad_norm": 0.2078198343515396, "learning_rate": 8e-05, "loss": 1.5246, "step": 3729 }, { "epoch": 0.509076020199263, "grad_norm": 0.20376670360565186, "learning_rate": 8e-05, "loss": 1.5099, "step": 3730 }, { "epoch": 0.5092125017060188, "grad_norm": 0.206795334815979, "learning_rate": 8e-05, "loss": 1.5141, "step": 3731 }, { "epoch": 0.5093489832127747, "grad_norm": 0.20798687636852264, "learning_rate": 8e-05, "loss": 1.5274, "step": 3732 }, { "epoch": 0.5094854647195305, "grad_norm": 0.21329432725906372, "learning_rate": 8e-05, "loss": 1.4971, "step": 3733 }, { "epoch": 0.5096219462262863, "grad_norm": 0.20452523231506348, "learning_rate": 8e-05, "loss": 1.5064, "step": 3734 }, { "epoch": 0.5097584277330421, "grad_norm": 0.2125357836484909, "learning_rate": 8e-05, "loss": 1.4141, "step": 3735 }, { "epoch": 0.509894909239798, "grad_norm": 0.20367489755153656, "learning_rate": 8e-05, "loss": 1.4489, "step": 3736 }, { "epoch": 0.5100313907465538, "grad_norm": 0.2023291438817978, "learning_rate": 8e-05, "loss": 1.4869, "step": 3737 }, { "epoch": 0.5101678722533097, "grad_norm": 0.20648032426834106, "learning_rate": 8e-05, "loss": 1.493, "step": 3738 }, { "epoch": 0.5103043537600656, "grad_norm": 0.2120596319437027, "learning_rate": 8e-05, "loss": 1.5264, "step": 3739 }, { "epoch": 0.5104408352668214, "grad_norm": 0.20421461760997772, "learning_rate": 8e-05, "loss": 1.4898, "step": 3740 }, { "epoch": 0.5105773167735772, "grad_norm": 0.2109621912240982, "learning_rate": 8e-05, "loss": 1.4056, "step": 3741 }, { "epoch": 0.510713798280333, "grad_norm": 0.20625890791416168, "learning_rate": 8e-05, "loss": 1.4449, "step": 3742 }, { "epoch": 0.5108502797870889, "grad_norm": 0.21244002878665924, "learning_rate": 8e-05, "loss": 1.4772, "step": 3743 }, { "epoch": 0.5109867612938447, "grad_norm": 0.2075747400522232, "learning_rate": 8e-05, "loss": 1.4904, "step": 3744 }, { "epoch": 0.5111232428006005, "grad_norm": 0.22499270737171173, "learning_rate": 8e-05, "loss": 1.4971, "step": 3745 }, { "epoch": 0.5112597243073563, "grad_norm": 0.20978441834449768, "learning_rate": 8e-05, "loss": 1.5155, "step": 3746 }, { "epoch": 0.5113962058141122, "grad_norm": 0.215524822473526, "learning_rate": 8e-05, "loss": 1.4872, "step": 3747 }, { "epoch": 0.511532687320868, "grad_norm": 0.21619375050067902, "learning_rate": 8e-05, "loss": 1.4497, "step": 3748 }, { "epoch": 0.5116691688276238, "grad_norm": 0.20606496930122375, "learning_rate": 8e-05, "loss": 1.4718, "step": 3749 }, { "epoch": 0.5118056503343797, "grad_norm": 0.21071958541870117, "learning_rate": 8e-05, "loss": 1.483, "step": 3750 }, { "epoch": 0.5119421318411356, "grad_norm": 0.20513364672660828, "learning_rate": 8e-05, "loss": 1.3652, "step": 3751 }, { "epoch": 0.5120786133478914, "grad_norm": 0.2093500792980194, "learning_rate": 8e-05, "loss": 1.4881, "step": 3752 }, { "epoch": 0.5122150948546472, "grad_norm": 0.20986369252204895, "learning_rate": 8e-05, "loss": 1.5209, "step": 3753 }, { "epoch": 0.512351576361403, "grad_norm": 0.2090405821800232, "learning_rate": 8e-05, "loss": 1.5341, "step": 3754 }, { "epoch": 0.5124880578681589, "grad_norm": 0.20310235023498535, "learning_rate": 8e-05, "loss": 1.4269, "step": 3755 }, { "epoch": 0.5126245393749147, "grad_norm": 0.21172383427619934, "learning_rate": 8e-05, "loss": 1.4991, "step": 3756 }, { "epoch": 0.5127610208816705, "grad_norm": 0.21122828125953674, "learning_rate": 8e-05, "loss": 1.4362, "step": 3757 }, { "epoch": 0.5128975023884264, "grad_norm": 0.2103128582239151, "learning_rate": 8e-05, "loss": 1.4769, "step": 3758 }, { "epoch": 0.5130339838951822, "grad_norm": 0.21107755601406097, "learning_rate": 8e-05, "loss": 1.4483, "step": 3759 }, { "epoch": 0.513170465401938, "grad_norm": 0.20862391591072083, "learning_rate": 8e-05, "loss": 1.4979, "step": 3760 }, { "epoch": 0.5133069469086938, "grad_norm": 0.21597601473331451, "learning_rate": 8e-05, "loss": 1.4893, "step": 3761 }, { "epoch": 0.5134434284154497, "grad_norm": 0.2235720455646515, "learning_rate": 8e-05, "loss": 1.5069, "step": 3762 }, { "epoch": 0.5135799099222056, "grad_norm": 0.2106025516986847, "learning_rate": 8e-05, "loss": 1.4861, "step": 3763 }, { "epoch": 0.5137163914289614, "grad_norm": 0.19796723127365112, "learning_rate": 8e-05, "loss": 1.3393, "step": 3764 }, { "epoch": 0.5138528729357172, "grad_norm": 0.22598546743392944, "learning_rate": 8e-05, "loss": 1.4736, "step": 3765 }, { "epoch": 0.5139893544424731, "grad_norm": 0.21574561297893524, "learning_rate": 8e-05, "loss": 1.4597, "step": 3766 }, { "epoch": 0.5141258359492289, "grad_norm": 0.21665695309638977, "learning_rate": 8e-05, "loss": 1.4543, "step": 3767 }, { "epoch": 0.5142623174559847, "grad_norm": 0.2258956879377365, "learning_rate": 8e-05, "loss": 1.5133, "step": 3768 }, { "epoch": 0.5143987989627405, "grad_norm": 0.20740656554698944, "learning_rate": 8e-05, "loss": 1.4984, "step": 3769 }, { "epoch": 0.5145352804694964, "grad_norm": 0.2127566784620285, "learning_rate": 8e-05, "loss": 1.4984, "step": 3770 }, { "epoch": 0.5146717619762522, "grad_norm": 0.21286697685718536, "learning_rate": 8e-05, "loss": 1.4315, "step": 3771 }, { "epoch": 0.514808243483008, "grad_norm": 0.20938505232334137, "learning_rate": 8e-05, "loss": 1.4674, "step": 3772 }, { "epoch": 0.5149447249897638, "grad_norm": 0.2103201299905777, "learning_rate": 8e-05, "loss": 1.4769, "step": 3773 }, { "epoch": 0.5150812064965197, "grad_norm": 0.2165060192346573, "learning_rate": 8e-05, "loss": 1.4969, "step": 3774 }, { "epoch": 0.5152176880032756, "grad_norm": 0.20270776748657227, "learning_rate": 8e-05, "loss": 1.4778, "step": 3775 }, { "epoch": 0.5153541695100314, "grad_norm": 0.2126288115978241, "learning_rate": 8e-05, "loss": 1.5109, "step": 3776 }, { "epoch": 0.5154906510167873, "grad_norm": 0.21846488118171692, "learning_rate": 8e-05, "loss": 1.5282, "step": 3777 }, { "epoch": 0.5156271325235431, "grad_norm": 0.20854903757572174, "learning_rate": 8e-05, "loss": 1.5025, "step": 3778 }, { "epoch": 0.5157636140302989, "grad_norm": 0.194810152053833, "learning_rate": 8e-05, "loss": 1.4112, "step": 3779 }, { "epoch": 0.5159000955370547, "grad_norm": 0.21077163517475128, "learning_rate": 8e-05, "loss": 1.5123, "step": 3780 }, { "epoch": 0.5160365770438106, "grad_norm": 0.20316071808338165, "learning_rate": 8e-05, "loss": 1.4156, "step": 3781 }, { "epoch": 0.5161730585505664, "grad_norm": 0.21339119970798492, "learning_rate": 8e-05, "loss": 1.5224, "step": 3782 }, { "epoch": 0.5163095400573222, "grad_norm": 0.20145781338214874, "learning_rate": 8e-05, "loss": 1.4631, "step": 3783 }, { "epoch": 0.516446021564078, "grad_norm": 0.22384998202323914, "learning_rate": 8e-05, "loss": 1.5369, "step": 3784 }, { "epoch": 0.5165825030708339, "grad_norm": 0.2159966081380844, "learning_rate": 8e-05, "loss": 1.4336, "step": 3785 }, { "epoch": 0.5167189845775897, "grad_norm": 0.2209702730178833, "learning_rate": 8e-05, "loss": 1.5007, "step": 3786 }, { "epoch": 0.5168554660843455, "grad_norm": 0.23043642938137054, "learning_rate": 8e-05, "loss": 1.4736, "step": 3787 }, { "epoch": 0.5169919475911015, "grad_norm": 0.21323435008525848, "learning_rate": 8e-05, "loss": 1.4337, "step": 3788 }, { "epoch": 0.5171284290978573, "grad_norm": 0.21257047355175018, "learning_rate": 8e-05, "loss": 1.4677, "step": 3789 }, { "epoch": 0.5172649106046131, "grad_norm": 0.2186005413532257, "learning_rate": 8e-05, "loss": 1.4627, "step": 3790 }, { "epoch": 0.5174013921113689, "grad_norm": 0.22197014093399048, "learning_rate": 8e-05, "loss": 1.5594, "step": 3791 }, { "epoch": 0.5175378736181248, "grad_norm": 0.20388200879096985, "learning_rate": 8e-05, "loss": 1.421, "step": 3792 }, { "epoch": 0.5176743551248806, "grad_norm": 0.216207817196846, "learning_rate": 8e-05, "loss": 1.499, "step": 3793 }, { "epoch": 0.5178108366316364, "grad_norm": 0.22408992052078247, "learning_rate": 8e-05, "loss": 1.4913, "step": 3794 }, { "epoch": 0.5179473181383922, "grad_norm": 0.2174125611782074, "learning_rate": 8e-05, "loss": 1.5571, "step": 3795 }, { "epoch": 0.5180837996451481, "grad_norm": 0.20621757209300995, "learning_rate": 8e-05, "loss": 1.48, "step": 3796 }, { "epoch": 0.5182202811519039, "grad_norm": 0.21526406705379486, "learning_rate": 8e-05, "loss": 1.5889, "step": 3797 }, { "epoch": 0.5183567626586597, "grad_norm": 0.20705653727054596, "learning_rate": 8e-05, "loss": 1.5092, "step": 3798 }, { "epoch": 0.5184932441654155, "grad_norm": 0.21425952017307281, "learning_rate": 8e-05, "loss": 1.5011, "step": 3799 }, { "epoch": 0.5186297256721715, "grad_norm": 0.20879510045051575, "learning_rate": 8e-05, "loss": 1.4366, "step": 3800 }, { "epoch": 0.5187662071789273, "grad_norm": 0.21131199598312378, "learning_rate": 8e-05, "loss": 1.5479, "step": 3801 }, { "epoch": 0.5189026886856831, "grad_norm": 0.2092132568359375, "learning_rate": 8e-05, "loss": 1.4868, "step": 3802 }, { "epoch": 0.519039170192439, "grad_norm": 0.2116786539554596, "learning_rate": 8e-05, "loss": 1.532, "step": 3803 }, { "epoch": 0.5191756516991948, "grad_norm": 0.212627574801445, "learning_rate": 8e-05, "loss": 1.5068, "step": 3804 }, { "epoch": 0.5193121332059506, "grad_norm": 0.20355840027332306, "learning_rate": 8e-05, "loss": 1.4063, "step": 3805 }, { "epoch": 0.5194486147127064, "grad_norm": 0.21222664415836334, "learning_rate": 8e-05, "loss": 1.4489, "step": 3806 }, { "epoch": 0.5195850962194623, "grad_norm": 0.21173831820487976, "learning_rate": 8e-05, "loss": 1.5613, "step": 3807 }, { "epoch": 0.5197215777262181, "grad_norm": 0.21133606135845184, "learning_rate": 8e-05, "loss": 1.5506, "step": 3808 }, { "epoch": 0.5198580592329739, "grad_norm": 0.2097448706626892, "learning_rate": 8e-05, "loss": 1.4825, "step": 3809 }, { "epoch": 0.5199945407397297, "grad_norm": 0.2089434266090393, "learning_rate": 8e-05, "loss": 1.5649, "step": 3810 }, { "epoch": 0.5201310222464856, "grad_norm": 0.2001953423023224, "learning_rate": 8e-05, "loss": 1.4594, "step": 3811 }, { "epoch": 0.5202675037532415, "grad_norm": 0.20784500241279602, "learning_rate": 8e-05, "loss": 1.4969, "step": 3812 }, { "epoch": 0.5204039852599973, "grad_norm": 0.20602302253246307, "learning_rate": 8e-05, "loss": 1.4599, "step": 3813 }, { "epoch": 0.5205404667667531, "grad_norm": 0.21282580494880676, "learning_rate": 8e-05, "loss": 1.5503, "step": 3814 }, { "epoch": 0.520676948273509, "grad_norm": 0.20751439034938812, "learning_rate": 8e-05, "loss": 1.417, "step": 3815 }, { "epoch": 0.5208134297802648, "grad_norm": 0.20758989453315735, "learning_rate": 8e-05, "loss": 1.4878, "step": 3816 }, { "epoch": 0.5209499112870206, "grad_norm": 0.21621976792812347, "learning_rate": 8e-05, "loss": 1.5177, "step": 3817 }, { "epoch": 0.5210863927937764, "grad_norm": 0.21620725095272064, "learning_rate": 8e-05, "loss": 1.459, "step": 3818 }, { "epoch": 0.5212228743005323, "grad_norm": 0.2067711502313614, "learning_rate": 8e-05, "loss": 1.4068, "step": 3819 }, { "epoch": 0.5213593558072881, "grad_norm": 0.21770837903022766, "learning_rate": 8e-05, "loss": 1.5302, "step": 3820 }, { "epoch": 0.5214958373140439, "grad_norm": 0.21227087080478668, "learning_rate": 8e-05, "loss": 1.5182, "step": 3821 }, { "epoch": 0.5216323188207997, "grad_norm": 0.20959775149822235, "learning_rate": 8e-05, "loss": 1.5138, "step": 3822 }, { "epoch": 0.5217688003275556, "grad_norm": 0.2112349569797516, "learning_rate": 8e-05, "loss": 1.4204, "step": 3823 }, { "epoch": 0.5219052818343114, "grad_norm": 0.2108369767665863, "learning_rate": 8e-05, "loss": 1.5126, "step": 3824 }, { "epoch": 0.5220417633410673, "grad_norm": 0.21882425248622894, "learning_rate": 8e-05, "loss": 1.4691, "step": 3825 }, { "epoch": 0.5221782448478232, "grad_norm": 0.21501420438289642, "learning_rate": 8e-05, "loss": 1.5139, "step": 3826 }, { "epoch": 0.522314726354579, "grad_norm": 0.21300239861011505, "learning_rate": 8e-05, "loss": 1.4638, "step": 3827 }, { "epoch": 0.5224512078613348, "grad_norm": 0.22172772884368896, "learning_rate": 8e-05, "loss": 1.56, "step": 3828 }, { "epoch": 0.5225876893680906, "grad_norm": 0.22500614821910858, "learning_rate": 8e-05, "loss": 1.487, "step": 3829 }, { "epoch": 0.5227241708748465, "grad_norm": 0.21296440064907074, "learning_rate": 8e-05, "loss": 1.4943, "step": 3830 }, { "epoch": 0.5228606523816023, "grad_norm": 0.21398961544036865, "learning_rate": 8e-05, "loss": 1.497, "step": 3831 }, { "epoch": 0.5229971338883581, "grad_norm": 0.2164476215839386, "learning_rate": 8e-05, "loss": 1.4311, "step": 3832 }, { "epoch": 0.5231336153951139, "grad_norm": 0.2061043381690979, "learning_rate": 8e-05, "loss": 1.5311, "step": 3833 }, { "epoch": 0.5232700969018698, "grad_norm": 0.2058311253786087, "learning_rate": 8e-05, "loss": 1.5408, "step": 3834 }, { "epoch": 0.5234065784086256, "grad_norm": 0.21108292043209076, "learning_rate": 8e-05, "loss": 1.4624, "step": 3835 }, { "epoch": 0.5235430599153814, "grad_norm": 0.21557392179965973, "learning_rate": 8e-05, "loss": 1.5074, "step": 3836 }, { "epoch": 0.5236795414221374, "grad_norm": 0.21924269199371338, "learning_rate": 8e-05, "loss": 1.4889, "step": 3837 }, { "epoch": 0.5238160229288932, "grad_norm": 0.21423862874507904, "learning_rate": 8e-05, "loss": 1.4989, "step": 3838 }, { "epoch": 0.523952504435649, "grad_norm": 0.20681478083133698, "learning_rate": 8e-05, "loss": 1.5162, "step": 3839 }, { "epoch": 0.5240889859424048, "grad_norm": 0.2093067318201065, "learning_rate": 8e-05, "loss": 1.4896, "step": 3840 }, { "epoch": 0.5242254674491607, "grad_norm": 0.20795153081417084, "learning_rate": 8e-05, "loss": 1.4837, "step": 3841 }, { "epoch": 0.5243619489559165, "grad_norm": 0.20789562165737152, "learning_rate": 8e-05, "loss": 1.4681, "step": 3842 }, { "epoch": 0.5244984304626723, "grad_norm": 0.2139589488506317, "learning_rate": 8e-05, "loss": 1.5316, "step": 3843 }, { "epoch": 0.5246349119694281, "grad_norm": 0.2126191109418869, "learning_rate": 8e-05, "loss": 1.5012, "step": 3844 }, { "epoch": 0.524771393476184, "grad_norm": 0.2106030136346817, "learning_rate": 8e-05, "loss": 1.5367, "step": 3845 }, { "epoch": 0.5249078749829398, "grad_norm": 0.20340067148208618, "learning_rate": 8e-05, "loss": 1.4368, "step": 3846 }, { "epoch": 0.5250443564896956, "grad_norm": 0.20585723221302032, "learning_rate": 8e-05, "loss": 1.4946, "step": 3847 }, { "epoch": 0.5251808379964514, "grad_norm": 0.20665878057479858, "learning_rate": 8e-05, "loss": 1.3927, "step": 3848 }, { "epoch": 0.5253173195032074, "grad_norm": 0.20955102145671844, "learning_rate": 8e-05, "loss": 1.5592, "step": 3849 }, { "epoch": 0.5254538010099632, "grad_norm": 0.21391011774539948, "learning_rate": 8e-05, "loss": 1.5452, "step": 3850 }, { "epoch": 0.525590282516719, "grad_norm": 0.21142229437828064, "learning_rate": 8e-05, "loss": 1.4867, "step": 3851 }, { "epoch": 0.5257267640234748, "grad_norm": 0.20864613354206085, "learning_rate": 8e-05, "loss": 1.5329, "step": 3852 }, { "epoch": 0.5258632455302307, "grad_norm": 0.20696291327476501, "learning_rate": 8e-05, "loss": 1.4782, "step": 3853 }, { "epoch": 0.5259997270369865, "grad_norm": 0.20553968846797943, "learning_rate": 8e-05, "loss": 1.4263, "step": 3854 }, { "epoch": 0.5261362085437423, "grad_norm": 0.20468705892562866, "learning_rate": 8e-05, "loss": 1.4806, "step": 3855 }, { "epoch": 0.5262726900504981, "grad_norm": 0.20709489285945892, "learning_rate": 8e-05, "loss": 1.5025, "step": 3856 }, { "epoch": 0.526409171557254, "grad_norm": 0.20180247724056244, "learning_rate": 8e-05, "loss": 1.4691, "step": 3857 }, { "epoch": 0.5265456530640098, "grad_norm": 0.21098428964614868, "learning_rate": 8e-05, "loss": 1.5157, "step": 3858 }, { "epoch": 0.5266821345707656, "grad_norm": 0.20508822798728943, "learning_rate": 8e-05, "loss": 1.462, "step": 3859 }, { "epoch": 0.5268186160775215, "grad_norm": 0.221853107213974, "learning_rate": 8e-05, "loss": 1.6438, "step": 3860 }, { "epoch": 0.5269550975842773, "grad_norm": 0.2085074931383133, "learning_rate": 8e-05, "loss": 1.504, "step": 3861 }, { "epoch": 0.5270915790910332, "grad_norm": 0.210953027009964, "learning_rate": 8e-05, "loss": 1.5135, "step": 3862 }, { "epoch": 0.527228060597789, "grad_norm": 0.20656022429466248, "learning_rate": 8e-05, "loss": 1.4469, "step": 3863 }, { "epoch": 0.5273645421045449, "grad_norm": 0.22291792929172516, "learning_rate": 8e-05, "loss": 1.5327, "step": 3864 }, { "epoch": 0.5275010236113007, "grad_norm": 0.20535455644130707, "learning_rate": 8e-05, "loss": 1.4194, "step": 3865 }, { "epoch": 0.5276375051180565, "grad_norm": 0.2144753783941269, "learning_rate": 8e-05, "loss": 1.4931, "step": 3866 }, { "epoch": 0.5277739866248123, "grad_norm": 0.21257278323173523, "learning_rate": 8e-05, "loss": 1.4718, "step": 3867 }, { "epoch": 0.5279104681315682, "grad_norm": 0.2092253714799881, "learning_rate": 8e-05, "loss": 1.5194, "step": 3868 }, { "epoch": 0.528046949638324, "grad_norm": 0.21771551668643951, "learning_rate": 8e-05, "loss": 1.5127, "step": 3869 }, { "epoch": 0.5281834311450798, "grad_norm": 0.2243928760290146, "learning_rate": 8e-05, "loss": 1.5146, "step": 3870 }, { "epoch": 0.5283199126518356, "grad_norm": 0.21476124227046967, "learning_rate": 8e-05, "loss": 1.4115, "step": 3871 }, { "epoch": 0.5284563941585915, "grad_norm": 0.22355817258358002, "learning_rate": 8e-05, "loss": 1.5377, "step": 3872 }, { "epoch": 0.5285928756653473, "grad_norm": 0.20510250329971313, "learning_rate": 8e-05, "loss": 1.4873, "step": 3873 }, { "epoch": 0.5287293571721032, "grad_norm": 0.21323156356811523, "learning_rate": 8e-05, "loss": 1.491, "step": 3874 }, { "epoch": 0.5288658386788591, "grad_norm": 0.22806447744369507, "learning_rate": 8e-05, "loss": 1.5237, "step": 3875 }, { "epoch": 0.5290023201856149, "grad_norm": 0.21466396749019623, "learning_rate": 8e-05, "loss": 1.4813, "step": 3876 }, { "epoch": 0.5291388016923707, "grad_norm": 0.21021884679794312, "learning_rate": 8e-05, "loss": 1.4875, "step": 3877 }, { "epoch": 0.5292752831991265, "grad_norm": 0.217983677983284, "learning_rate": 8e-05, "loss": 1.5165, "step": 3878 }, { "epoch": 0.5294117647058824, "grad_norm": 0.21329325437545776, "learning_rate": 8e-05, "loss": 1.4848, "step": 3879 }, { "epoch": 0.5295482462126382, "grad_norm": 0.20736046135425568, "learning_rate": 8e-05, "loss": 1.4447, "step": 3880 }, { "epoch": 0.529684727719394, "grad_norm": 0.21129010617733002, "learning_rate": 8e-05, "loss": 1.5435, "step": 3881 }, { "epoch": 0.5298212092261498, "grad_norm": 0.21744251251220703, "learning_rate": 8e-05, "loss": 1.553, "step": 3882 }, { "epoch": 0.5299576907329057, "grad_norm": 0.19702544808387756, "learning_rate": 8e-05, "loss": 1.4878, "step": 3883 }, { "epoch": 0.5300941722396615, "grad_norm": 0.20643533766269684, "learning_rate": 8e-05, "loss": 1.4518, "step": 3884 }, { "epoch": 0.5302306537464173, "grad_norm": 0.20823663473129272, "learning_rate": 8e-05, "loss": 1.4465, "step": 3885 }, { "epoch": 0.5303671352531732, "grad_norm": 0.21344545483589172, "learning_rate": 8e-05, "loss": 1.4446, "step": 3886 }, { "epoch": 0.5305036167599291, "grad_norm": 0.21572035551071167, "learning_rate": 8e-05, "loss": 1.4988, "step": 3887 }, { "epoch": 0.5306400982666849, "grad_norm": 0.21610423922538757, "learning_rate": 8e-05, "loss": 1.486, "step": 3888 }, { "epoch": 0.5307765797734407, "grad_norm": 0.2159118950366974, "learning_rate": 8e-05, "loss": 1.4806, "step": 3889 }, { "epoch": 0.5309130612801966, "grad_norm": 0.2053048312664032, "learning_rate": 8e-05, "loss": 1.4155, "step": 3890 }, { "epoch": 0.5310495427869524, "grad_norm": 0.2087618112564087, "learning_rate": 8e-05, "loss": 1.4348, "step": 3891 }, { "epoch": 0.5311860242937082, "grad_norm": 0.21650056540966034, "learning_rate": 8e-05, "loss": 1.488, "step": 3892 }, { "epoch": 0.531322505800464, "grad_norm": 0.20754197239875793, "learning_rate": 8e-05, "loss": 1.4495, "step": 3893 }, { "epoch": 0.5314589873072199, "grad_norm": 0.20827847719192505, "learning_rate": 8e-05, "loss": 1.4734, "step": 3894 }, { "epoch": 0.5315954688139757, "grad_norm": 0.20556117594242096, "learning_rate": 8e-05, "loss": 1.4977, "step": 3895 }, { "epoch": 0.5317319503207315, "grad_norm": 0.22676751017570496, "learning_rate": 8e-05, "loss": 1.5263, "step": 3896 }, { "epoch": 0.5318684318274873, "grad_norm": 0.21132470667362213, "learning_rate": 8e-05, "loss": 1.5216, "step": 3897 }, { "epoch": 0.5320049133342432, "grad_norm": 0.2211170196533203, "learning_rate": 8e-05, "loss": 1.5794, "step": 3898 }, { "epoch": 0.5321413948409991, "grad_norm": 0.21029207110404968, "learning_rate": 8e-05, "loss": 1.4991, "step": 3899 }, { "epoch": 0.5322778763477549, "grad_norm": 0.20929358899593353, "learning_rate": 8e-05, "loss": 1.4556, "step": 3900 }, { "epoch": 0.5324143578545107, "grad_norm": 0.20441946387290955, "learning_rate": 8e-05, "loss": 1.461, "step": 3901 }, { "epoch": 0.5325508393612666, "grad_norm": 0.21051496267318726, "learning_rate": 8e-05, "loss": 1.5316, "step": 3902 }, { "epoch": 0.5326873208680224, "grad_norm": 0.20627525448799133, "learning_rate": 8e-05, "loss": 1.4385, "step": 3903 }, { "epoch": 0.5328238023747782, "grad_norm": 0.20343826711177826, "learning_rate": 8e-05, "loss": 1.4661, "step": 3904 }, { "epoch": 0.532960283881534, "grad_norm": 0.21207121014595032, "learning_rate": 8e-05, "loss": 1.474, "step": 3905 }, { "epoch": 0.5330967653882899, "grad_norm": 0.20420005917549133, "learning_rate": 8e-05, "loss": 1.4499, "step": 3906 }, { "epoch": 0.5332332468950457, "grad_norm": 0.22073200345039368, "learning_rate": 8e-05, "loss": 1.505, "step": 3907 }, { "epoch": 0.5333697284018015, "grad_norm": 0.21736709773540497, "learning_rate": 8e-05, "loss": 1.5949, "step": 3908 }, { "epoch": 0.5335062099085573, "grad_norm": 0.2219231128692627, "learning_rate": 8e-05, "loss": 1.5289, "step": 3909 }, { "epoch": 0.5336426914153132, "grad_norm": 0.20779363811016083, "learning_rate": 8e-05, "loss": 1.5031, "step": 3910 }, { "epoch": 0.5337791729220691, "grad_norm": 0.20677430927753448, "learning_rate": 8e-05, "loss": 1.4398, "step": 3911 }, { "epoch": 0.5339156544288249, "grad_norm": 0.20664069056510925, "learning_rate": 8e-05, "loss": 1.4434, "step": 3912 }, { "epoch": 0.5340521359355808, "grad_norm": 0.20454922318458557, "learning_rate": 8e-05, "loss": 1.4163, "step": 3913 }, { "epoch": 0.5341886174423366, "grad_norm": 0.2075737863779068, "learning_rate": 8e-05, "loss": 1.4388, "step": 3914 }, { "epoch": 0.5343250989490924, "grad_norm": 0.22747676074504852, "learning_rate": 8e-05, "loss": 1.5373, "step": 3915 }, { "epoch": 0.5344615804558482, "grad_norm": 0.20168982446193695, "learning_rate": 8e-05, "loss": 1.4578, "step": 3916 }, { "epoch": 0.5345980619626041, "grad_norm": 0.2137005627155304, "learning_rate": 8e-05, "loss": 1.5625, "step": 3917 }, { "epoch": 0.5347345434693599, "grad_norm": 0.20854716002941132, "learning_rate": 8e-05, "loss": 1.5014, "step": 3918 }, { "epoch": 0.5348710249761157, "grad_norm": 0.21440574526786804, "learning_rate": 8e-05, "loss": 1.4709, "step": 3919 }, { "epoch": 0.5350075064828715, "grad_norm": 0.21601231396198273, "learning_rate": 8e-05, "loss": 1.4124, "step": 3920 }, { "epoch": 0.5351439879896274, "grad_norm": 0.21279220283031464, "learning_rate": 8e-05, "loss": 1.49, "step": 3921 }, { "epoch": 0.5352804694963832, "grad_norm": 0.21807019412517548, "learning_rate": 8e-05, "loss": 1.5592, "step": 3922 }, { "epoch": 0.5354169510031391, "grad_norm": 0.21771401166915894, "learning_rate": 8e-05, "loss": 1.4738, "step": 3923 }, { "epoch": 0.535553432509895, "grad_norm": 0.22003212571144104, "learning_rate": 8e-05, "loss": 1.5016, "step": 3924 }, { "epoch": 0.5356899140166508, "grad_norm": 0.209725484251976, "learning_rate": 8e-05, "loss": 1.385, "step": 3925 }, { "epoch": 0.5358263955234066, "grad_norm": 0.21322984993457794, "learning_rate": 8e-05, "loss": 1.4744, "step": 3926 }, { "epoch": 0.5359628770301624, "grad_norm": 0.21876545250415802, "learning_rate": 8e-05, "loss": 1.4844, "step": 3927 }, { "epoch": 0.5360993585369183, "grad_norm": 0.21539653837680817, "learning_rate": 8e-05, "loss": 1.4864, "step": 3928 }, { "epoch": 0.5362358400436741, "grad_norm": 0.21845944225788116, "learning_rate": 8e-05, "loss": 1.4573, "step": 3929 }, { "epoch": 0.5363723215504299, "grad_norm": 0.21582849323749542, "learning_rate": 8e-05, "loss": 1.4575, "step": 3930 }, { "epoch": 0.5365088030571857, "grad_norm": 0.21010752022266388, "learning_rate": 8e-05, "loss": 1.519, "step": 3931 }, { "epoch": 0.5366452845639416, "grad_norm": 0.20294950902462006, "learning_rate": 8e-05, "loss": 1.4673, "step": 3932 }, { "epoch": 0.5367817660706974, "grad_norm": 0.21116815507411957, "learning_rate": 8e-05, "loss": 1.4432, "step": 3933 }, { "epoch": 0.5369182475774532, "grad_norm": 0.2148934304714203, "learning_rate": 8e-05, "loss": 1.468, "step": 3934 }, { "epoch": 0.537054729084209, "grad_norm": 0.21122683584690094, "learning_rate": 8e-05, "loss": 1.5065, "step": 3935 }, { "epoch": 0.537191210590965, "grad_norm": 0.2137981504201889, "learning_rate": 8e-05, "loss": 1.4897, "step": 3936 }, { "epoch": 0.5373276920977208, "grad_norm": 0.2208615243434906, "learning_rate": 8e-05, "loss": 1.5134, "step": 3937 }, { "epoch": 0.5374641736044766, "grad_norm": 0.21685846149921417, "learning_rate": 8e-05, "loss": 1.5534, "step": 3938 }, { "epoch": 0.5376006551112325, "grad_norm": 0.21427197754383087, "learning_rate": 8e-05, "loss": 1.5425, "step": 3939 }, { "epoch": 0.5377371366179883, "grad_norm": 0.2091689109802246, "learning_rate": 8e-05, "loss": 1.4375, "step": 3940 }, { "epoch": 0.5378736181247441, "grad_norm": 0.20809611678123474, "learning_rate": 8e-05, "loss": 1.4603, "step": 3941 }, { "epoch": 0.5380100996314999, "grad_norm": 0.20635966956615448, "learning_rate": 8e-05, "loss": 1.5201, "step": 3942 }, { "epoch": 0.5381465811382558, "grad_norm": 0.2120577096939087, "learning_rate": 8e-05, "loss": 1.4263, "step": 3943 }, { "epoch": 0.5382830626450116, "grad_norm": 0.20983166992664337, "learning_rate": 8e-05, "loss": 1.5175, "step": 3944 }, { "epoch": 0.5384195441517674, "grad_norm": 0.22868888080120087, "learning_rate": 8e-05, "loss": 1.5333, "step": 3945 }, { "epoch": 0.5385560256585232, "grad_norm": 0.22498992085456848, "learning_rate": 8e-05, "loss": 1.432, "step": 3946 }, { "epoch": 0.538692507165279, "grad_norm": 0.20812679827213287, "learning_rate": 8e-05, "loss": 1.4087, "step": 3947 }, { "epoch": 0.538828988672035, "grad_norm": 0.2148916870355606, "learning_rate": 8e-05, "loss": 1.4917, "step": 3948 }, { "epoch": 0.5389654701787908, "grad_norm": 0.22119343280792236, "learning_rate": 8e-05, "loss": 1.5319, "step": 3949 }, { "epoch": 0.5391019516855466, "grad_norm": 0.20873288810253143, "learning_rate": 8e-05, "loss": 1.421, "step": 3950 }, { "epoch": 0.5392384331923025, "grad_norm": 0.20993028581142426, "learning_rate": 8e-05, "loss": 1.4836, "step": 3951 }, { "epoch": 0.5393749146990583, "grad_norm": 0.21094287931919098, "learning_rate": 8e-05, "loss": 1.4693, "step": 3952 }, { "epoch": 0.5395113962058141, "grad_norm": 0.21700243651866913, "learning_rate": 8e-05, "loss": 1.5259, "step": 3953 }, { "epoch": 0.53964787771257, "grad_norm": 0.21250613033771515, "learning_rate": 8e-05, "loss": 1.5114, "step": 3954 }, { "epoch": 0.5397843592193258, "grad_norm": 0.23623088002204895, "learning_rate": 8e-05, "loss": 1.5033, "step": 3955 }, { "epoch": 0.5399208407260816, "grad_norm": 0.21170058846473694, "learning_rate": 8e-05, "loss": 1.4847, "step": 3956 }, { "epoch": 0.5400573222328374, "grad_norm": 0.21862830221652985, "learning_rate": 8e-05, "loss": 1.5265, "step": 3957 }, { "epoch": 0.5401938037395932, "grad_norm": 0.20765745639801025, "learning_rate": 8e-05, "loss": 1.431, "step": 3958 }, { "epoch": 0.5403302852463491, "grad_norm": 0.21233604848384857, "learning_rate": 8e-05, "loss": 1.5052, "step": 3959 }, { "epoch": 0.5404667667531049, "grad_norm": 0.208890900015831, "learning_rate": 8e-05, "loss": 1.4278, "step": 3960 }, { "epoch": 0.5406032482598608, "grad_norm": 0.20955172181129456, "learning_rate": 8e-05, "loss": 1.4627, "step": 3961 }, { "epoch": 0.5407397297666167, "grad_norm": 0.22223545610904694, "learning_rate": 8e-05, "loss": 1.5845, "step": 3962 }, { "epoch": 0.5408762112733725, "grad_norm": 0.20734697580337524, "learning_rate": 8e-05, "loss": 1.4233, "step": 3963 }, { "epoch": 0.5410126927801283, "grad_norm": 0.21651016175746918, "learning_rate": 8e-05, "loss": 1.5039, "step": 3964 }, { "epoch": 0.5411491742868841, "grad_norm": 0.21300671994686127, "learning_rate": 8e-05, "loss": 1.5127, "step": 3965 }, { "epoch": 0.54128565579364, "grad_norm": 0.21661721169948578, "learning_rate": 8e-05, "loss": 1.4919, "step": 3966 }, { "epoch": 0.5414221373003958, "grad_norm": 0.215090811252594, "learning_rate": 8e-05, "loss": 1.5335, "step": 3967 }, { "epoch": 0.5415586188071516, "grad_norm": 0.2087790071964264, "learning_rate": 8e-05, "loss": 1.4603, "step": 3968 }, { "epoch": 0.5416951003139074, "grad_norm": 0.20841914415359497, "learning_rate": 8e-05, "loss": 1.4815, "step": 3969 }, { "epoch": 0.5418315818206633, "grad_norm": 0.2188490480184555, "learning_rate": 8e-05, "loss": 1.4941, "step": 3970 }, { "epoch": 0.5419680633274191, "grad_norm": 0.20759616792201996, "learning_rate": 8e-05, "loss": 1.4166, "step": 3971 }, { "epoch": 0.5421045448341749, "grad_norm": 0.21219879388809204, "learning_rate": 8e-05, "loss": 1.4529, "step": 3972 }, { "epoch": 0.5422410263409309, "grad_norm": 0.20806318521499634, "learning_rate": 8e-05, "loss": 1.4983, "step": 3973 }, { "epoch": 0.5423775078476867, "grad_norm": 0.21870160102844238, "learning_rate": 8e-05, "loss": 1.4941, "step": 3974 }, { "epoch": 0.5425139893544425, "grad_norm": 0.21698282659053802, "learning_rate": 8e-05, "loss": 1.5125, "step": 3975 }, { "epoch": 0.5426504708611983, "grad_norm": 0.21326006948947906, "learning_rate": 8e-05, "loss": 1.4657, "step": 3976 }, { "epoch": 0.5427869523679542, "grad_norm": 0.2060515582561493, "learning_rate": 8e-05, "loss": 1.5235, "step": 3977 }, { "epoch": 0.54292343387471, "grad_norm": 0.20540425181388855, "learning_rate": 8e-05, "loss": 1.484, "step": 3978 }, { "epoch": 0.5430599153814658, "grad_norm": 0.2081085592508316, "learning_rate": 8e-05, "loss": 1.4229, "step": 3979 }, { "epoch": 0.5431963968882216, "grad_norm": 0.21043510735034943, "learning_rate": 8e-05, "loss": 1.468, "step": 3980 }, { "epoch": 0.5433328783949775, "grad_norm": 0.21225225925445557, "learning_rate": 8e-05, "loss": 1.5107, "step": 3981 }, { "epoch": 0.5434693599017333, "grad_norm": 0.20884309709072113, "learning_rate": 8e-05, "loss": 1.458, "step": 3982 }, { "epoch": 0.5436058414084891, "grad_norm": 0.2209782898426056, "learning_rate": 8e-05, "loss": 1.5313, "step": 3983 }, { "epoch": 0.5437423229152449, "grad_norm": 0.20940878987312317, "learning_rate": 8e-05, "loss": 1.4542, "step": 3984 }, { "epoch": 0.5438788044220009, "grad_norm": 0.20551931858062744, "learning_rate": 8e-05, "loss": 1.5271, "step": 3985 }, { "epoch": 0.5440152859287567, "grad_norm": 0.21722780168056488, "learning_rate": 8e-05, "loss": 1.5348, "step": 3986 }, { "epoch": 0.5441517674355125, "grad_norm": 0.21256360411643982, "learning_rate": 8e-05, "loss": 1.5125, "step": 3987 }, { "epoch": 0.5442882489422683, "grad_norm": 0.21507519483566284, "learning_rate": 8e-05, "loss": 1.4884, "step": 3988 }, { "epoch": 0.5444247304490242, "grad_norm": 0.20599912106990814, "learning_rate": 8e-05, "loss": 1.4976, "step": 3989 }, { "epoch": 0.54456121195578, "grad_norm": 0.20583081245422363, "learning_rate": 8e-05, "loss": 1.4143, "step": 3990 }, { "epoch": 0.5446976934625358, "grad_norm": 0.22008644044399261, "learning_rate": 8e-05, "loss": 1.5617, "step": 3991 }, { "epoch": 0.5448341749692917, "grad_norm": 0.20400090515613556, "learning_rate": 8e-05, "loss": 1.484, "step": 3992 }, { "epoch": 0.5449706564760475, "grad_norm": 0.2150939404964447, "learning_rate": 8e-05, "loss": 1.5551, "step": 3993 }, { "epoch": 0.5451071379828033, "grad_norm": 0.21547016501426697, "learning_rate": 8e-05, "loss": 1.5237, "step": 3994 }, { "epoch": 0.5452436194895591, "grad_norm": 0.2080121487379074, "learning_rate": 8e-05, "loss": 1.4607, "step": 3995 }, { "epoch": 0.545380100996315, "grad_norm": 0.2108195722103119, "learning_rate": 8e-05, "loss": 1.4577, "step": 3996 }, { "epoch": 0.5455165825030708, "grad_norm": 0.21768449246883392, "learning_rate": 8e-05, "loss": 1.4469, "step": 3997 }, { "epoch": 0.5456530640098267, "grad_norm": 0.21431384980678558, "learning_rate": 8e-05, "loss": 1.5173, "step": 3998 }, { "epoch": 0.5457895455165825, "grad_norm": 0.2129233330488205, "learning_rate": 8e-05, "loss": 1.463, "step": 3999 }, { "epoch": 0.5459260270233384, "grad_norm": 0.2187803089618683, "learning_rate": 8e-05, "loss": 1.5369, "step": 4000 }, { "epoch": 0.5460625085300942, "grad_norm": 0.21778717637062073, "learning_rate": 8e-05, "loss": 1.5528, "step": 4001 }, { "epoch": 0.54619899003685, "grad_norm": 0.2025071531534195, "learning_rate": 8e-05, "loss": 1.4788, "step": 4002 }, { "epoch": 0.5463354715436058, "grad_norm": 0.21569125354290009, "learning_rate": 8e-05, "loss": 1.5646, "step": 4003 }, { "epoch": 0.5464719530503617, "grad_norm": 0.21426476538181305, "learning_rate": 8e-05, "loss": 1.5413, "step": 4004 }, { "epoch": 0.5466084345571175, "grad_norm": 0.21295900642871857, "learning_rate": 8e-05, "loss": 1.4776, "step": 4005 }, { "epoch": 0.5467449160638733, "grad_norm": 0.2086125612258911, "learning_rate": 8e-05, "loss": 1.5145, "step": 4006 }, { "epoch": 0.5468813975706291, "grad_norm": 0.21766814589500427, "learning_rate": 8e-05, "loss": 1.5284, "step": 4007 }, { "epoch": 0.547017879077385, "grad_norm": 0.21663233637809753, "learning_rate": 8e-05, "loss": 1.5655, "step": 4008 }, { "epoch": 0.5471543605841408, "grad_norm": 0.21130946278572083, "learning_rate": 8e-05, "loss": 1.4747, "step": 4009 }, { "epoch": 0.5472908420908967, "grad_norm": 0.20893415808677673, "learning_rate": 8e-05, "loss": 1.4409, "step": 4010 }, { "epoch": 0.5474273235976526, "grad_norm": 0.21529194712638855, "learning_rate": 8e-05, "loss": 1.4994, "step": 4011 }, { "epoch": 0.5475638051044084, "grad_norm": 0.2148546427488327, "learning_rate": 8e-05, "loss": 1.5149, "step": 4012 }, { "epoch": 0.5477002866111642, "grad_norm": 0.21923306584358215, "learning_rate": 8e-05, "loss": 1.5096, "step": 4013 }, { "epoch": 0.54783676811792, "grad_norm": 0.21592554450035095, "learning_rate": 8e-05, "loss": 1.4935, "step": 4014 }, { "epoch": 0.5479732496246759, "grad_norm": 0.2120472490787506, "learning_rate": 8e-05, "loss": 1.4518, "step": 4015 }, { "epoch": 0.5481097311314317, "grad_norm": 0.21756690740585327, "learning_rate": 8e-05, "loss": 1.4899, "step": 4016 }, { "epoch": 0.5482462126381875, "grad_norm": 0.21122270822525024, "learning_rate": 8e-05, "loss": 1.5522, "step": 4017 }, { "epoch": 0.5483826941449433, "grad_norm": 0.2096116691827774, "learning_rate": 8e-05, "loss": 1.5042, "step": 4018 }, { "epoch": 0.5485191756516992, "grad_norm": 0.212167888879776, "learning_rate": 8e-05, "loss": 1.5004, "step": 4019 }, { "epoch": 0.548655657158455, "grad_norm": 0.2202005535364151, "learning_rate": 8e-05, "loss": 1.5873, "step": 4020 }, { "epoch": 0.5487921386652108, "grad_norm": 0.20982125401496887, "learning_rate": 8e-05, "loss": 1.4891, "step": 4021 }, { "epoch": 0.5489286201719668, "grad_norm": 0.21076913177967072, "learning_rate": 8e-05, "loss": 1.5513, "step": 4022 }, { "epoch": 0.5490651016787226, "grad_norm": 0.21276473999023438, "learning_rate": 8e-05, "loss": 1.4744, "step": 4023 }, { "epoch": 0.5492015831854784, "grad_norm": 0.2097943127155304, "learning_rate": 8e-05, "loss": 1.5022, "step": 4024 }, { "epoch": 0.5493380646922342, "grad_norm": 0.2082330286502838, "learning_rate": 8e-05, "loss": 1.513, "step": 4025 }, { "epoch": 0.54947454619899, "grad_norm": 0.21301335096359253, "learning_rate": 8e-05, "loss": 1.5428, "step": 4026 }, { "epoch": 0.5496110277057459, "grad_norm": 0.21643485128879547, "learning_rate": 8e-05, "loss": 1.4966, "step": 4027 }, { "epoch": 0.5497475092125017, "grad_norm": 0.20908930897712708, "learning_rate": 8e-05, "loss": 1.5502, "step": 4028 }, { "epoch": 0.5498839907192575, "grad_norm": 0.22110337018966675, "learning_rate": 8e-05, "loss": 1.5063, "step": 4029 }, { "epoch": 0.5500204722260134, "grad_norm": 0.21730099618434906, "learning_rate": 8e-05, "loss": 1.5338, "step": 4030 }, { "epoch": 0.5501569537327692, "grad_norm": 0.21115638315677643, "learning_rate": 8e-05, "loss": 1.4751, "step": 4031 }, { "epoch": 0.550293435239525, "grad_norm": 0.21456487476825714, "learning_rate": 8e-05, "loss": 1.4253, "step": 4032 }, { "epoch": 0.5504299167462808, "grad_norm": 0.21453270316123962, "learning_rate": 8e-05, "loss": 1.5473, "step": 4033 }, { "epoch": 0.5505663982530367, "grad_norm": 0.20871831476688385, "learning_rate": 8e-05, "loss": 1.4433, "step": 4034 }, { "epoch": 0.5507028797597926, "grad_norm": 0.21607492864131927, "learning_rate": 8e-05, "loss": 1.5027, "step": 4035 }, { "epoch": 0.5508393612665484, "grad_norm": 0.22087296843528748, "learning_rate": 8e-05, "loss": 1.557, "step": 4036 }, { "epoch": 0.5509758427733042, "grad_norm": 0.22039352357387543, "learning_rate": 8e-05, "loss": 1.5249, "step": 4037 }, { "epoch": 0.5511123242800601, "grad_norm": 0.20952408015727997, "learning_rate": 8e-05, "loss": 1.5491, "step": 4038 }, { "epoch": 0.5512488057868159, "grad_norm": 0.21672017872333527, "learning_rate": 8e-05, "loss": 1.5217, "step": 4039 }, { "epoch": 0.5513852872935717, "grad_norm": 0.2058047205209732, "learning_rate": 8e-05, "loss": 1.402, "step": 4040 }, { "epoch": 0.5515217688003275, "grad_norm": 0.21104593575000763, "learning_rate": 8e-05, "loss": 1.4816, "step": 4041 }, { "epoch": 0.5516582503070834, "grad_norm": 0.22632263600826263, "learning_rate": 8e-05, "loss": 1.5738, "step": 4042 }, { "epoch": 0.5517947318138392, "grad_norm": 0.2167060524225235, "learning_rate": 8e-05, "loss": 1.5209, "step": 4043 }, { "epoch": 0.551931213320595, "grad_norm": 0.2084483802318573, "learning_rate": 8e-05, "loss": 1.4777, "step": 4044 }, { "epoch": 0.5520676948273509, "grad_norm": 0.20652832090854645, "learning_rate": 8e-05, "loss": 1.4419, "step": 4045 }, { "epoch": 0.5522041763341067, "grad_norm": 0.20899629592895508, "learning_rate": 8e-05, "loss": 1.512, "step": 4046 }, { "epoch": 0.5523406578408626, "grad_norm": 0.22118784487247467, "learning_rate": 8e-05, "loss": 1.5536, "step": 4047 }, { "epoch": 0.5524771393476184, "grad_norm": 0.20944646000862122, "learning_rate": 8e-05, "loss": 1.4947, "step": 4048 }, { "epoch": 0.5526136208543743, "grad_norm": 0.2094104290008545, "learning_rate": 8e-05, "loss": 1.4742, "step": 4049 }, { "epoch": 0.5527501023611301, "grad_norm": 0.21074897050857544, "learning_rate": 8e-05, "loss": 1.4452, "step": 4050 }, { "epoch": 0.5528865838678859, "grad_norm": 0.21322181820869446, "learning_rate": 8e-05, "loss": 1.4864, "step": 4051 }, { "epoch": 0.5530230653746417, "grad_norm": 0.2102811634540558, "learning_rate": 8e-05, "loss": 1.4214, "step": 4052 }, { "epoch": 0.5531595468813976, "grad_norm": 0.20323963463306427, "learning_rate": 8e-05, "loss": 1.4517, "step": 4053 }, { "epoch": 0.5532960283881534, "grad_norm": 0.22281189262866974, "learning_rate": 8e-05, "loss": 1.5203, "step": 4054 }, { "epoch": 0.5534325098949092, "grad_norm": 0.21449768543243408, "learning_rate": 8e-05, "loss": 1.5186, "step": 4055 }, { "epoch": 0.553568991401665, "grad_norm": 0.22041170299053192, "learning_rate": 8e-05, "loss": 1.4704, "step": 4056 }, { "epoch": 0.5537054729084209, "grad_norm": 0.22577407956123352, "learning_rate": 8e-05, "loss": 1.5236, "step": 4057 }, { "epoch": 0.5538419544151767, "grad_norm": 0.20272588729858398, "learning_rate": 8e-05, "loss": 1.4564, "step": 4058 }, { "epoch": 0.5539784359219326, "grad_norm": 0.2118610143661499, "learning_rate": 8e-05, "loss": 1.4368, "step": 4059 }, { "epoch": 0.5541149174286885, "grad_norm": 0.213422030210495, "learning_rate": 8e-05, "loss": 1.4711, "step": 4060 }, { "epoch": 0.5542513989354443, "grad_norm": 0.2121119350194931, "learning_rate": 8e-05, "loss": 1.4682, "step": 4061 }, { "epoch": 0.5543878804422001, "grad_norm": 0.21609561145305634, "learning_rate": 8e-05, "loss": 1.5048, "step": 4062 }, { "epoch": 0.5545243619489559, "grad_norm": 0.21246649324893951, "learning_rate": 8e-05, "loss": 1.5378, "step": 4063 }, { "epoch": 0.5546608434557118, "grad_norm": 0.21132156252861023, "learning_rate": 8e-05, "loss": 1.4764, "step": 4064 }, { "epoch": 0.5547973249624676, "grad_norm": 0.21442018449306488, "learning_rate": 8e-05, "loss": 1.5307, "step": 4065 }, { "epoch": 0.5549338064692234, "grad_norm": 0.21475937962532043, "learning_rate": 8e-05, "loss": 1.47, "step": 4066 }, { "epoch": 0.5550702879759792, "grad_norm": 0.20682257413864136, "learning_rate": 8e-05, "loss": 1.3839, "step": 4067 }, { "epoch": 0.5552067694827351, "grad_norm": 0.21496626734733582, "learning_rate": 8e-05, "loss": 1.5106, "step": 4068 }, { "epoch": 0.5553432509894909, "grad_norm": 0.22642920911312103, "learning_rate": 8e-05, "loss": 1.5735, "step": 4069 }, { "epoch": 0.5554797324962467, "grad_norm": 0.21580594778060913, "learning_rate": 8e-05, "loss": 1.4999, "step": 4070 }, { "epoch": 0.5556162140030025, "grad_norm": 0.2172602415084839, "learning_rate": 8e-05, "loss": 1.4768, "step": 4071 }, { "epoch": 0.5557526955097585, "grad_norm": 0.20682387053966522, "learning_rate": 8e-05, "loss": 1.504, "step": 4072 }, { "epoch": 0.5558891770165143, "grad_norm": 0.21785330772399902, "learning_rate": 8e-05, "loss": 1.5085, "step": 4073 }, { "epoch": 0.5560256585232701, "grad_norm": 0.21788525581359863, "learning_rate": 8e-05, "loss": 1.4512, "step": 4074 }, { "epoch": 0.556162140030026, "grad_norm": 0.23095788061618805, "learning_rate": 8e-05, "loss": 1.4586, "step": 4075 }, { "epoch": 0.5562986215367818, "grad_norm": 0.2184412032365799, "learning_rate": 8e-05, "loss": 1.5157, "step": 4076 }, { "epoch": 0.5564351030435376, "grad_norm": 0.21445901691913605, "learning_rate": 8e-05, "loss": 1.4934, "step": 4077 }, { "epoch": 0.5565715845502934, "grad_norm": 0.22111648321151733, "learning_rate": 8e-05, "loss": 1.4721, "step": 4078 }, { "epoch": 0.5567080660570493, "grad_norm": 0.22030308842658997, "learning_rate": 8e-05, "loss": 1.4827, "step": 4079 }, { "epoch": 0.5568445475638051, "grad_norm": 0.21483373641967773, "learning_rate": 8e-05, "loss": 1.5023, "step": 4080 }, { "epoch": 0.5569810290705609, "grad_norm": 0.20193031430244446, "learning_rate": 8e-05, "loss": 1.4216, "step": 4081 }, { "epoch": 0.5571175105773167, "grad_norm": 0.2037355899810791, "learning_rate": 8e-05, "loss": 1.4589, "step": 4082 }, { "epoch": 0.5572539920840726, "grad_norm": 0.2091190218925476, "learning_rate": 8e-05, "loss": 1.4874, "step": 4083 }, { "epoch": 0.5573904735908285, "grad_norm": 0.2225044220685959, "learning_rate": 8e-05, "loss": 1.4981, "step": 4084 }, { "epoch": 0.5575269550975843, "grad_norm": 0.2126842588186264, "learning_rate": 8e-05, "loss": 1.3953, "step": 4085 }, { "epoch": 0.5576634366043401, "grad_norm": 0.2057109922170639, "learning_rate": 8e-05, "loss": 1.3893, "step": 4086 }, { "epoch": 0.557799918111096, "grad_norm": 0.21497681736946106, "learning_rate": 8e-05, "loss": 1.4469, "step": 4087 }, { "epoch": 0.5579363996178518, "grad_norm": 0.2119443118572235, "learning_rate": 8e-05, "loss": 1.4575, "step": 4088 }, { "epoch": 0.5580728811246076, "grad_norm": 0.2088376134634018, "learning_rate": 8e-05, "loss": 1.4457, "step": 4089 }, { "epoch": 0.5582093626313634, "grad_norm": 0.2167643904685974, "learning_rate": 8e-05, "loss": 1.5152, "step": 4090 }, { "epoch": 0.5583458441381193, "grad_norm": 0.22091422975063324, "learning_rate": 8e-05, "loss": 1.5714, "step": 4091 }, { "epoch": 0.5584823256448751, "grad_norm": 0.20456083118915558, "learning_rate": 8e-05, "loss": 1.3951, "step": 4092 }, { "epoch": 0.5586188071516309, "grad_norm": 0.2048211395740509, "learning_rate": 8e-05, "loss": 1.4266, "step": 4093 }, { "epoch": 0.5587552886583868, "grad_norm": 0.21812017261981964, "learning_rate": 8e-05, "loss": 1.5849, "step": 4094 }, { "epoch": 0.5588917701651426, "grad_norm": 0.21348117291927338, "learning_rate": 8e-05, "loss": 1.5439, "step": 4095 }, { "epoch": 0.5590282516718985, "grad_norm": 0.21337182819843292, "learning_rate": 8e-05, "loss": 1.5199, "step": 4096 }, { "epoch": 0.5591647331786543, "grad_norm": 0.2136903554201126, "learning_rate": 8e-05, "loss": 1.4224, "step": 4097 }, { "epoch": 0.5593012146854102, "grad_norm": 0.2061806470155716, "learning_rate": 8e-05, "loss": 1.443, "step": 4098 }, { "epoch": 0.559437696192166, "grad_norm": 0.2124415636062622, "learning_rate": 8e-05, "loss": 1.4358, "step": 4099 }, { "epoch": 0.5595741776989218, "grad_norm": 0.21656042337417603, "learning_rate": 8e-05, "loss": 1.4963, "step": 4100 }, { "epoch": 0.5597106592056776, "grad_norm": 0.21043917536735535, "learning_rate": 8e-05, "loss": 1.4818, "step": 4101 }, { "epoch": 0.5598471407124335, "grad_norm": 0.21233618259429932, "learning_rate": 8e-05, "loss": 1.4856, "step": 4102 }, { "epoch": 0.5599836222191893, "grad_norm": 0.2204185128211975, "learning_rate": 8e-05, "loss": 1.5205, "step": 4103 }, { "epoch": 0.5601201037259451, "grad_norm": 0.20545360445976257, "learning_rate": 8e-05, "loss": 1.4258, "step": 4104 }, { "epoch": 0.5602565852327009, "grad_norm": 0.21167825162410736, "learning_rate": 8e-05, "loss": 1.5446, "step": 4105 }, { "epoch": 0.5603930667394568, "grad_norm": 0.2064371109008789, "learning_rate": 8e-05, "loss": 1.3925, "step": 4106 }, { "epoch": 0.5605295482462126, "grad_norm": 0.22175613045692444, "learning_rate": 8e-05, "loss": 1.5344, "step": 4107 }, { "epoch": 0.5606660297529684, "grad_norm": 0.21652434766292572, "learning_rate": 8e-05, "loss": 1.4515, "step": 4108 }, { "epoch": 0.5608025112597244, "grad_norm": 0.21520711481571198, "learning_rate": 8e-05, "loss": 1.4471, "step": 4109 }, { "epoch": 0.5609389927664802, "grad_norm": 0.20460045337677002, "learning_rate": 8e-05, "loss": 1.429, "step": 4110 }, { "epoch": 0.561075474273236, "grad_norm": 0.2147601991891861, "learning_rate": 8e-05, "loss": 1.4549, "step": 4111 }, { "epoch": 0.5612119557799918, "grad_norm": 0.21812841296195984, "learning_rate": 8e-05, "loss": 1.4382, "step": 4112 }, { "epoch": 0.5613484372867477, "grad_norm": 0.22209259867668152, "learning_rate": 8e-05, "loss": 1.5323, "step": 4113 }, { "epoch": 0.5614849187935035, "grad_norm": 0.20632174611091614, "learning_rate": 8e-05, "loss": 1.428, "step": 4114 }, { "epoch": 0.5616214003002593, "grad_norm": 0.21677370369434357, "learning_rate": 8e-05, "loss": 1.5354, "step": 4115 }, { "epoch": 0.5617578818070151, "grad_norm": 0.21731743216514587, "learning_rate": 8e-05, "loss": 1.4925, "step": 4116 }, { "epoch": 0.561894363313771, "grad_norm": 0.2062983363866806, "learning_rate": 8e-05, "loss": 1.4425, "step": 4117 }, { "epoch": 0.5620308448205268, "grad_norm": 0.21958425641059875, "learning_rate": 8e-05, "loss": 1.5141, "step": 4118 }, { "epoch": 0.5621673263272826, "grad_norm": 0.2177102416753769, "learning_rate": 8e-05, "loss": 1.5378, "step": 4119 }, { "epoch": 0.5623038078340384, "grad_norm": 0.21377559006214142, "learning_rate": 8e-05, "loss": 1.522, "step": 4120 }, { "epoch": 0.5624402893407944, "grad_norm": 0.20855441689491272, "learning_rate": 8e-05, "loss": 1.4521, "step": 4121 }, { "epoch": 0.5625767708475502, "grad_norm": 0.2097448855638504, "learning_rate": 8e-05, "loss": 1.5261, "step": 4122 }, { "epoch": 0.562713252354306, "grad_norm": 0.21126492321491241, "learning_rate": 8e-05, "loss": 1.5251, "step": 4123 }, { "epoch": 0.5628497338610619, "grad_norm": 0.21582269668579102, "learning_rate": 8e-05, "loss": 1.4682, "step": 4124 }, { "epoch": 0.5629862153678177, "grad_norm": 0.21713784337043762, "learning_rate": 8e-05, "loss": 1.4589, "step": 4125 }, { "epoch": 0.5631226968745735, "grad_norm": 0.21080413460731506, "learning_rate": 8e-05, "loss": 1.5099, "step": 4126 }, { "epoch": 0.5632591783813293, "grad_norm": 0.20678511261940002, "learning_rate": 8e-05, "loss": 1.4305, "step": 4127 }, { "epoch": 0.5633956598880852, "grad_norm": 0.21512289345264435, "learning_rate": 8e-05, "loss": 1.5092, "step": 4128 }, { "epoch": 0.563532141394841, "grad_norm": 0.21501654386520386, "learning_rate": 8e-05, "loss": 1.5102, "step": 4129 }, { "epoch": 0.5636686229015968, "grad_norm": 0.21599584817886353, "learning_rate": 8e-05, "loss": 1.4619, "step": 4130 }, { "epoch": 0.5638051044083526, "grad_norm": 0.208168163895607, "learning_rate": 8e-05, "loss": 1.4876, "step": 4131 }, { "epoch": 0.5639415859151085, "grad_norm": 0.2146674245595932, "learning_rate": 8e-05, "loss": 1.5214, "step": 4132 }, { "epoch": 0.5640780674218643, "grad_norm": 0.21401694416999817, "learning_rate": 8e-05, "loss": 1.4308, "step": 4133 }, { "epoch": 0.5642145489286202, "grad_norm": 0.2089165896177292, "learning_rate": 8e-05, "loss": 1.431, "step": 4134 }, { "epoch": 0.564351030435376, "grad_norm": 0.20982258021831512, "learning_rate": 8e-05, "loss": 1.498, "step": 4135 }, { "epoch": 0.5644875119421319, "grad_norm": 0.2157997488975525, "learning_rate": 8e-05, "loss": 1.5365, "step": 4136 }, { "epoch": 0.5646239934488877, "grad_norm": 0.2020702064037323, "learning_rate": 8e-05, "loss": 1.4685, "step": 4137 }, { "epoch": 0.5647604749556435, "grad_norm": 0.21577925980091095, "learning_rate": 8e-05, "loss": 1.509, "step": 4138 }, { "epoch": 0.5648969564623993, "grad_norm": 0.21908605098724365, "learning_rate": 8e-05, "loss": 1.5158, "step": 4139 }, { "epoch": 0.5650334379691552, "grad_norm": 0.21253089606761932, "learning_rate": 8e-05, "loss": 1.515, "step": 4140 }, { "epoch": 0.565169919475911, "grad_norm": 0.222563698887825, "learning_rate": 8e-05, "loss": 1.5861, "step": 4141 }, { "epoch": 0.5653064009826668, "grad_norm": 0.2181646078824997, "learning_rate": 8e-05, "loss": 1.5006, "step": 4142 }, { "epoch": 0.5654428824894226, "grad_norm": 0.20826943218708038, "learning_rate": 8e-05, "loss": 1.4852, "step": 4143 }, { "epoch": 0.5655793639961785, "grad_norm": 0.2162502110004425, "learning_rate": 8e-05, "loss": 1.5087, "step": 4144 }, { "epoch": 0.5657158455029343, "grad_norm": 0.20929653942584991, "learning_rate": 8e-05, "loss": 1.5374, "step": 4145 }, { "epoch": 0.5658523270096902, "grad_norm": 0.21493981778621674, "learning_rate": 8e-05, "loss": 1.5394, "step": 4146 }, { "epoch": 0.5659888085164461, "grad_norm": 0.21158720552921295, "learning_rate": 8e-05, "loss": 1.53, "step": 4147 }, { "epoch": 0.5661252900232019, "grad_norm": 0.20743803679943085, "learning_rate": 8e-05, "loss": 1.4894, "step": 4148 }, { "epoch": 0.5662617715299577, "grad_norm": 0.22323064506053925, "learning_rate": 8e-05, "loss": 1.5149, "step": 4149 }, { "epoch": 0.5663982530367135, "grad_norm": 0.2157958298921585, "learning_rate": 8e-05, "loss": 1.4626, "step": 4150 }, { "epoch": 0.5665347345434694, "grad_norm": 0.2215644121170044, "learning_rate": 8e-05, "loss": 1.4799, "step": 4151 }, { "epoch": 0.5666712160502252, "grad_norm": 0.21179339289665222, "learning_rate": 8e-05, "loss": 1.5174, "step": 4152 }, { "epoch": 0.566807697556981, "grad_norm": 0.21576069295406342, "learning_rate": 8e-05, "loss": 1.4788, "step": 4153 }, { "epoch": 0.5669441790637368, "grad_norm": 0.21412812173366547, "learning_rate": 8e-05, "loss": 1.5011, "step": 4154 }, { "epoch": 0.5670806605704927, "grad_norm": 0.21336203813552856, "learning_rate": 8e-05, "loss": 1.5019, "step": 4155 }, { "epoch": 0.5672171420772485, "grad_norm": 0.20645149052143097, "learning_rate": 8e-05, "loss": 1.399, "step": 4156 }, { "epoch": 0.5673536235840043, "grad_norm": 0.21714763343334198, "learning_rate": 8e-05, "loss": 1.4449, "step": 4157 }, { "epoch": 0.5674901050907603, "grad_norm": 0.21368075907230377, "learning_rate": 8e-05, "loss": 1.4458, "step": 4158 }, { "epoch": 0.5676265865975161, "grad_norm": 0.20892494916915894, "learning_rate": 8e-05, "loss": 1.4508, "step": 4159 }, { "epoch": 0.5677630681042719, "grad_norm": 0.21361811459064484, "learning_rate": 8e-05, "loss": 1.4057, "step": 4160 }, { "epoch": 0.5678995496110277, "grad_norm": 0.2225876748561859, "learning_rate": 8e-05, "loss": 1.4793, "step": 4161 }, { "epoch": 0.5680360311177836, "grad_norm": 0.21273940801620483, "learning_rate": 8e-05, "loss": 1.5464, "step": 4162 }, { "epoch": 0.5681725126245394, "grad_norm": 0.21815603971481323, "learning_rate": 8e-05, "loss": 1.4599, "step": 4163 }, { "epoch": 0.5683089941312952, "grad_norm": 0.2130299061536789, "learning_rate": 8e-05, "loss": 1.4188, "step": 4164 }, { "epoch": 0.568445475638051, "grad_norm": 0.21651612222194672, "learning_rate": 8e-05, "loss": 1.5117, "step": 4165 }, { "epoch": 0.5685819571448069, "grad_norm": 0.21552126109600067, "learning_rate": 8e-05, "loss": 1.4394, "step": 4166 }, { "epoch": 0.5687184386515627, "grad_norm": 0.20757144689559937, "learning_rate": 8e-05, "loss": 1.4546, "step": 4167 }, { "epoch": 0.5688549201583185, "grad_norm": 0.22573474049568176, "learning_rate": 8e-05, "loss": 1.5136, "step": 4168 }, { "epoch": 0.5689914016650743, "grad_norm": 0.20971375703811646, "learning_rate": 8e-05, "loss": 1.4433, "step": 4169 }, { "epoch": 0.5691278831718302, "grad_norm": 0.21533392369747162, "learning_rate": 8e-05, "loss": 1.4536, "step": 4170 }, { "epoch": 0.5692643646785861, "grad_norm": 0.21626019477844238, "learning_rate": 8e-05, "loss": 1.5605, "step": 4171 }, { "epoch": 0.5694008461853419, "grad_norm": 0.20644056797027588, "learning_rate": 8e-05, "loss": 1.4657, "step": 4172 }, { "epoch": 0.5695373276920978, "grad_norm": 0.21214927732944489, "learning_rate": 8e-05, "loss": 1.4961, "step": 4173 }, { "epoch": 0.5696738091988536, "grad_norm": 0.20711253583431244, "learning_rate": 8e-05, "loss": 1.3842, "step": 4174 }, { "epoch": 0.5698102907056094, "grad_norm": 0.22512763738632202, "learning_rate": 8e-05, "loss": 1.4976, "step": 4175 }, { "epoch": 0.5699467722123652, "grad_norm": 0.20673586428165436, "learning_rate": 8e-05, "loss": 1.4443, "step": 4176 }, { "epoch": 0.570083253719121, "grad_norm": 0.21036086976528168, "learning_rate": 8e-05, "loss": 1.4698, "step": 4177 }, { "epoch": 0.5702197352258769, "grad_norm": 0.20807988941669464, "learning_rate": 8e-05, "loss": 1.5289, "step": 4178 }, { "epoch": 0.5703562167326327, "grad_norm": 0.20627886056900024, "learning_rate": 8e-05, "loss": 1.4361, "step": 4179 }, { "epoch": 0.5704926982393885, "grad_norm": 0.2068205326795578, "learning_rate": 8e-05, "loss": 1.4131, "step": 4180 }, { "epoch": 0.5706291797461444, "grad_norm": 0.2120501846075058, "learning_rate": 8e-05, "loss": 1.5532, "step": 4181 }, { "epoch": 0.5707656612529002, "grad_norm": 0.2079847753047943, "learning_rate": 8e-05, "loss": 1.5023, "step": 4182 }, { "epoch": 0.5709021427596561, "grad_norm": 0.21569515764713287, "learning_rate": 8e-05, "loss": 1.5044, "step": 4183 }, { "epoch": 0.5710386242664119, "grad_norm": 0.21743503212928772, "learning_rate": 8e-05, "loss": 1.4924, "step": 4184 }, { "epoch": 0.5711751057731678, "grad_norm": 0.21706555783748627, "learning_rate": 8e-05, "loss": 1.4576, "step": 4185 }, { "epoch": 0.5713115872799236, "grad_norm": 0.20865269005298615, "learning_rate": 8e-05, "loss": 1.4638, "step": 4186 }, { "epoch": 0.5714480687866794, "grad_norm": 0.21492353081703186, "learning_rate": 8e-05, "loss": 1.5075, "step": 4187 }, { "epoch": 0.5715845502934352, "grad_norm": 0.21789975464344025, "learning_rate": 8e-05, "loss": 1.5481, "step": 4188 }, { "epoch": 0.5717210318001911, "grad_norm": 0.21637997031211853, "learning_rate": 8e-05, "loss": 1.5046, "step": 4189 }, { "epoch": 0.5718575133069469, "grad_norm": 0.20876067876815796, "learning_rate": 8e-05, "loss": 1.4276, "step": 4190 }, { "epoch": 0.5719939948137027, "grad_norm": 0.21783484518527985, "learning_rate": 8e-05, "loss": 1.4654, "step": 4191 }, { "epoch": 0.5721304763204585, "grad_norm": 0.21375548839569092, "learning_rate": 8e-05, "loss": 1.4505, "step": 4192 }, { "epoch": 0.5722669578272144, "grad_norm": 0.2086593508720398, "learning_rate": 8e-05, "loss": 1.3673, "step": 4193 }, { "epoch": 0.5724034393339702, "grad_norm": 0.2099689394235611, "learning_rate": 8e-05, "loss": 1.4368, "step": 4194 }, { "epoch": 0.5725399208407261, "grad_norm": 0.21592223644256592, "learning_rate": 8e-05, "loss": 1.4757, "step": 4195 }, { "epoch": 0.572676402347482, "grad_norm": 0.21087807416915894, "learning_rate": 8e-05, "loss": 1.456, "step": 4196 }, { "epoch": 0.5728128838542378, "grad_norm": 0.21318086981773376, "learning_rate": 8e-05, "loss": 1.5611, "step": 4197 }, { "epoch": 0.5729493653609936, "grad_norm": 0.21953259408473969, "learning_rate": 8e-05, "loss": 1.4666, "step": 4198 }, { "epoch": 0.5730858468677494, "grad_norm": 0.21577037870883942, "learning_rate": 8e-05, "loss": 1.5368, "step": 4199 }, { "epoch": 0.5732223283745053, "grad_norm": 0.21646249294281006, "learning_rate": 8e-05, "loss": 1.4846, "step": 4200 }, { "epoch": 0.5733588098812611, "grad_norm": 0.21039555966854095, "learning_rate": 8e-05, "loss": 1.4803, "step": 4201 }, { "epoch": 0.5734952913880169, "grad_norm": 0.2150830179452896, "learning_rate": 8e-05, "loss": 1.4846, "step": 4202 }, { "epoch": 0.5736317728947727, "grad_norm": 0.21341609954833984, "learning_rate": 8e-05, "loss": 1.4447, "step": 4203 }, { "epoch": 0.5737682544015286, "grad_norm": 0.20764513313770294, "learning_rate": 8e-05, "loss": 1.4485, "step": 4204 }, { "epoch": 0.5739047359082844, "grad_norm": 0.21798859536647797, "learning_rate": 8e-05, "loss": 1.542, "step": 4205 }, { "epoch": 0.5740412174150402, "grad_norm": 0.21576344966888428, "learning_rate": 8e-05, "loss": 1.5083, "step": 4206 }, { "epoch": 0.574177698921796, "grad_norm": 0.21215452253818512, "learning_rate": 8e-05, "loss": 1.4298, "step": 4207 }, { "epoch": 0.574314180428552, "grad_norm": 0.2231324464082718, "learning_rate": 8e-05, "loss": 1.4565, "step": 4208 }, { "epoch": 0.5744506619353078, "grad_norm": 0.22044414281845093, "learning_rate": 8e-05, "loss": 1.4826, "step": 4209 }, { "epoch": 0.5745871434420636, "grad_norm": 0.22512578964233398, "learning_rate": 8e-05, "loss": 1.5456, "step": 4210 }, { "epoch": 0.5747236249488195, "grad_norm": 0.21472027897834778, "learning_rate": 8e-05, "loss": 1.4495, "step": 4211 }, { "epoch": 0.5748601064555753, "grad_norm": 0.2077253758907318, "learning_rate": 8e-05, "loss": 1.4634, "step": 4212 }, { "epoch": 0.5749965879623311, "grad_norm": 0.2077658623456955, "learning_rate": 8e-05, "loss": 1.48, "step": 4213 }, { "epoch": 0.5751330694690869, "grad_norm": 0.2147304117679596, "learning_rate": 8e-05, "loss": 1.5391, "step": 4214 }, { "epoch": 0.5752695509758428, "grad_norm": 0.2135041058063507, "learning_rate": 8e-05, "loss": 1.4851, "step": 4215 }, { "epoch": 0.5754060324825986, "grad_norm": 0.21538491547107697, "learning_rate": 8e-05, "loss": 1.4741, "step": 4216 }, { "epoch": 0.5755425139893544, "grad_norm": 0.21669740974903107, "learning_rate": 8e-05, "loss": 1.5089, "step": 4217 }, { "epoch": 0.5756789954961102, "grad_norm": 0.21382921934127808, "learning_rate": 8e-05, "loss": 1.5123, "step": 4218 }, { "epoch": 0.5758154770028661, "grad_norm": 0.2091756910085678, "learning_rate": 8e-05, "loss": 1.4511, "step": 4219 }, { "epoch": 0.575951958509622, "grad_norm": 0.22944357991218567, "learning_rate": 8e-05, "loss": 1.5292, "step": 4220 }, { "epoch": 0.5760884400163778, "grad_norm": 0.22602970898151398, "learning_rate": 8e-05, "loss": 1.5171, "step": 4221 }, { "epoch": 0.5762249215231336, "grad_norm": 0.2166919857263565, "learning_rate": 8e-05, "loss": 1.4535, "step": 4222 }, { "epoch": 0.5763614030298895, "grad_norm": 0.22732417285442352, "learning_rate": 8e-05, "loss": 1.4681, "step": 4223 }, { "epoch": 0.5764978845366453, "grad_norm": 0.21656647324562073, "learning_rate": 8e-05, "loss": 1.4863, "step": 4224 }, { "epoch": 0.5766343660434011, "grad_norm": 0.21638262271881104, "learning_rate": 8e-05, "loss": 1.4304, "step": 4225 }, { "epoch": 0.576770847550157, "grad_norm": 0.21608994901180267, "learning_rate": 8e-05, "loss": 1.4176, "step": 4226 }, { "epoch": 0.5769073290569128, "grad_norm": 0.21043655276298523, "learning_rate": 8e-05, "loss": 1.4525, "step": 4227 }, { "epoch": 0.5770438105636686, "grad_norm": 0.21401606500148773, "learning_rate": 8e-05, "loss": 1.4862, "step": 4228 }, { "epoch": 0.5771802920704244, "grad_norm": 0.22227658331394196, "learning_rate": 8e-05, "loss": 1.468, "step": 4229 }, { "epoch": 0.5773167735771803, "grad_norm": 0.22011655569076538, "learning_rate": 8e-05, "loss": 1.4769, "step": 4230 }, { "epoch": 0.5774532550839361, "grad_norm": 0.213987797498703, "learning_rate": 8e-05, "loss": 1.4764, "step": 4231 }, { "epoch": 0.577589736590692, "grad_norm": 0.2208341807126999, "learning_rate": 8e-05, "loss": 1.5738, "step": 4232 }, { "epoch": 0.5777262180974478, "grad_norm": 0.2314114272594452, "learning_rate": 8e-05, "loss": 1.5033, "step": 4233 }, { "epoch": 0.5778626996042037, "grad_norm": 0.2215908020734787, "learning_rate": 8e-05, "loss": 1.5022, "step": 4234 }, { "epoch": 0.5779991811109595, "grad_norm": 0.21595498919487, "learning_rate": 8e-05, "loss": 1.5087, "step": 4235 }, { "epoch": 0.5781356626177153, "grad_norm": 0.2144959270954132, "learning_rate": 8e-05, "loss": 1.5031, "step": 4236 }, { "epoch": 0.5782721441244711, "grad_norm": 0.20927192270755768, "learning_rate": 8e-05, "loss": 1.4652, "step": 4237 }, { "epoch": 0.578408625631227, "grad_norm": 0.22002463042736053, "learning_rate": 8e-05, "loss": 1.4242, "step": 4238 }, { "epoch": 0.5785451071379828, "grad_norm": 0.21876122057437897, "learning_rate": 8e-05, "loss": 1.5275, "step": 4239 }, { "epoch": 0.5786815886447386, "grad_norm": 0.22128397226333618, "learning_rate": 8e-05, "loss": 1.532, "step": 4240 }, { "epoch": 0.5788180701514944, "grad_norm": 0.20736324787139893, "learning_rate": 8e-05, "loss": 1.4651, "step": 4241 }, { "epoch": 0.5789545516582503, "grad_norm": 0.20933184027671814, "learning_rate": 8e-05, "loss": 1.42, "step": 4242 }, { "epoch": 0.5790910331650061, "grad_norm": 0.21193736791610718, "learning_rate": 8e-05, "loss": 1.3944, "step": 4243 }, { "epoch": 0.5792275146717619, "grad_norm": 0.21933895349502563, "learning_rate": 8e-05, "loss": 1.5118, "step": 4244 }, { "epoch": 0.5793639961785179, "grad_norm": 0.21755674481391907, "learning_rate": 8e-05, "loss": 1.5319, "step": 4245 }, { "epoch": 0.5795004776852737, "grad_norm": 0.21228887140750885, "learning_rate": 8e-05, "loss": 1.4707, "step": 4246 }, { "epoch": 0.5796369591920295, "grad_norm": 0.2192274034023285, "learning_rate": 8e-05, "loss": 1.5161, "step": 4247 }, { "epoch": 0.5797734406987853, "grad_norm": 0.21627812087535858, "learning_rate": 8e-05, "loss": 1.4516, "step": 4248 }, { "epoch": 0.5799099222055412, "grad_norm": 0.21966254711151123, "learning_rate": 8e-05, "loss": 1.4812, "step": 4249 }, { "epoch": 0.580046403712297, "grad_norm": 0.21831735968589783, "learning_rate": 8e-05, "loss": 1.4219, "step": 4250 }, { "epoch": 0.5801828852190528, "grad_norm": 0.21938075125217438, "learning_rate": 8e-05, "loss": 1.5152, "step": 4251 }, { "epoch": 0.5803193667258086, "grad_norm": 0.21802319586277008, "learning_rate": 8e-05, "loss": 1.4701, "step": 4252 }, { "epoch": 0.5804558482325645, "grad_norm": 0.21984423696994781, "learning_rate": 8e-05, "loss": 1.5009, "step": 4253 }, { "epoch": 0.5805923297393203, "grad_norm": 0.23234982788562775, "learning_rate": 8e-05, "loss": 1.4623, "step": 4254 }, { "epoch": 0.5807288112460761, "grad_norm": 0.22150513529777527, "learning_rate": 8e-05, "loss": 1.4837, "step": 4255 }, { "epoch": 0.5808652927528319, "grad_norm": 0.22058182954788208, "learning_rate": 8e-05, "loss": 1.5114, "step": 4256 }, { "epoch": 0.5810017742595879, "grad_norm": 0.21117989718914032, "learning_rate": 8e-05, "loss": 1.4165, "step": 4257 }, { "epoch": 0.5811382557663437, "grad_norm": 0.2250738888978958, "learning_rate": 8e-05, "loss": 1.5374, "step": 4258 }, { "epoch": 0.5812747372730995, "grad_norm": 0.2053462713956833, "learning_rate": 8e-05, "loss": 1.4029, "step": 4259 }, { "epoch": 0.5814112187798554, "grad_norm": 0.22288133203983307, "learning_rate": 8e-05, "loss": 1.5203, "step": 4260 }, { "epoch": 0.5815477002866112, "grad_norm": 0.21730506420135498, "learning_rate": 8e-05, "loss": 1.4327, "step": 4261 }, { "epoch": 0.581684181793367, "grad_norm": 0.21201132237911224, "learning_rate": 8e-05, "loss": 1.4785, "step": 4262 }, { "epoch": 0.5818206633001228, "grad_norm": 0.21976672112941742, "learning_rate": 8e-05, "loss": 1.4977, "step": 4263 }, { "epoch": 0.5819571448068787, "grad_norm": 0.21687081456184387, "learning_rate": 8e-05, "loss": 1.526, "step": 4264 }, { "epoch": 0.5820936263136345, "grad_norm": 0.21505995094776154, "learning_rate": 8e-05, "loss": 1.4763, "step": 4265 }, { "epoch": 0.5822301078203903, "grad_norm": 0.21913163363933563, "learning_rate": 8e-05, "loss": 1.5281, "step": 4266 }, { "epoch": 0.5823665893271461, "grad_norm": 0.220303475856781, "learning_rate": 8e-05, "loss": 1.4776, "step": 4267 }, { "epoch": 0.582503070833902, "grad_norm": 0.21774540841579437, "learning_rate": 8e-05, "loss": 1.5078, "step": 4268 }, { "epoch": 0.5826395523406579, "grad_norm": 0.22189544141292572, "learning_rate": 8e-05, "loss": 1.5697, "step": 4269 }, { "epoch": 0.5827760338474137, "grad_norm": 0.2093462198972702, "learning_rate": 8e-05, "loss": 1.4847, "step": 4270 }, { "epoch": 0.5829125153541695, "grad_norm": 0.22963126003742218, "learning_rate": 8e-05, "loss": 1.5603, "step": 4271 }, { "epoch": 0.5830489968609254, "grad_norm": 0.22692115604877472, "learning_rate": 8e-05, "loss": 1.532, "step": 4272 }, { "epoch": 0.5831854783676812, "grad_norm": 0.21378524601459503, "learning_rate": 8e-05, "loss": 1.4867, "step": 4273 }, { "epoch": 0.583321959874437, "grad_norm": 0.21572093665599823, "learning_rate": 8e-05, "loss": 1.4874, "step": 4274 }, { "epoch": 0.5834584413811928, "grad_norm": 0.21749195456504822, "learning_rate": 8e-05, "loss": 1.4915, "step": 4275 }, { "epoch": 0.5835949228879487, "grad_norm": 0.21253842115402222, "learning_rate": 8e-05, "loss": 1.4768, "step": 4276 }, { "epoch": 0.5837314043947045, "grad_norm": 0.2171137034893036, "learning_rate": 8e-05, "loss": 1.5198, "step": 4277 }, { "epoch": 0.5838678859014603, "grad_norm": 0.20603013038635254, "learning_rate": 8e-05, "loss": 1.4383, "step": 4278 }, { "epoch": 0.5840043674082162, "grad_norm": 0.20807403326034546, "learning_rate": 8e-05, "loss": 1.4516, "step": 4279 }, { "epoch": 0.584140848914972, "grad_norm": 0.21349146962165833, "learning_rate": 8e-05, "loss": 1.5423, "step": 4280 }, { "epoch": 0.5842773304217278, "grad_norm": 0.22212353348731995, "learning_rate": 8e-05, "loss": 1.5127, "step": 4281 }, { "epoch": 0.5844138119284837, "grad_norm": 0.2233966886997223, "learning_rate": 8e-05, "loss": 1.5029, "step": 4282 }, { "epoch": 0.5845502934352396, "grad_norm": 0.21821431815624237, "learning_rate": 8e-05, "loss": 1.4709, "step": 4283 }, { "epoch": 0.5846867749419954, "grad_norm": 0.20830968022346497, "learning_rate": 8e-05, "loss": 1.4511, "step": 4284 }, { "epoch": 0.5848232564487512, "grad_norm": 0.21082666516304016, "learning_rate": 8e-05, "loss": 1.4323, "step": 4285 }, { "epoch": 0.584959737955507, "grad_norm": 0.21043847501277924, "learning_rate": 8e-05, "loss": 1.4373, "step": 4286 }, { "epoch": 0.5850962194622629, "grad_norm": 0.2249893993139267, "learning_rate": 8e-05, "loss": 1.5647, "step": 4287 }, { "epoch": 0.5852327009690187, "grad_norm": 0.21635067462921143, "learning_rate": 8e-05, "loss": 1.4678, "step": 4288 }, { "epoch": 0.5853691824757745, "grad_norm": 0.21915218234062195, "learning_rate": 8e-05, "loss": 1.5016, "step": 4289 }, { "epoch": 0.5855056639825303, "grad_norm": 0.2193540632724762, "learning_rate": 8e-05, "loss": 1.3985, "step": 4290 }, { "epoch": 0.5856421454892862, "grad_norm": 0.22085200250148773, "learning_rate": 8e-05, "loss": 1.4823, "step": 4291 }, { "epoch": 0.585778626996042, "grad_norm": 0.22260814905166626, "learning_rate": 8e-05, "loss": 1.4219, "step": 4292 }, { "epoch": 0.5859151085027978, "grad_norm": 0.22045950591564178, "learning_rate": 8e-05, "loss": 1.4317, "step": 4293 }, { "epoch": 0.5860515900095538, "grad_norm": 0.21894249320030212, "learning_rate": 8e-05, "loss": 1.4805, "step": 4294 }, { "epoch": 0.5861880715163096, "grad_norm": 0.21819111704826355, "learning_rate": 8e-05, "loss": 1.4882, "step": 4295 }, { "epoch": 0.5863245530230654, "grad_norm": 0.21281464397907257, "learning_rate": 8e-05, "loss": 1.4893, "step": 4296 }, { "epoch": 0.5864610345298212, "grad_norm": 0.2163061797618866, "learning_rate": 8e-05, "loss": 1.469, "step": 4297 }, { "epoch": 0.5865975160365771, "grad_norm": 0.21715545654296875, "learning_rate": 8e-05, "loss": 1.5247, "step": 4298 }, { "epoch": 0.5867339975433329, "grad_norm": 0.2198237180709839, "learning_rate": 8e-05, "loss": 1.5236, "step": 4299 }, { "epoch": 0.5868704790500887, "grad_norm": 0.21444237232208252, "learning_rate": 8e-05, "loss": 1.4838, "step": 4300 }, { "epoch": 0.5870069605568445, "grad_norm": 0.21362698078155518, "learning_rate": 8e-05, "loss": 1.4141, "step": 4301 }, { "epoch": 0.5871434420636004, "grad_norm": 0.21045954525470734, "learning_rate": 8e-05, "loss": 1.4745, "step": 4302 }, { "epoch": 0.5872799235703562, "grad_norm": 0.22438058257102966, "learning_rate": 8e-05, "loss": 1.4639, "step": 4303 }, { "epoch": 0.587416405077112, "grad_norm": 0.22046953439712524, "learning_rate": 8e-05, "loss": 1.4867, "step": 4304 }, { "epoch": 0.5875528865838678, "grad_norm": 0.2153709977865219, "learning_rate": 8e-05, "loss": 1.4351, "step": 4305 }, { "epoch": 0.5876893680906237, "grad_norm": 0.22235837578773499, "learning_rate": 8e-05, "loss": 1.4856, "step": 4306 }, { "epoch": 0.5878258495973796, "grad_norm": 0.20900604128837585, "learning_rate": 8e-05, "loss": 1.436, "step": 4307 }, { "epoch": 0.5879623311041354, "grad_norm": 0.21753929555416107, "learning_rate": 8e-05, "loss": 1.4049, "step": 4308 }, { "epoch": 0.5880988126108913, "grad_norm": 0.21831852197647095, "learning_rate": 8e-05, "loss": 1.4733, "step": 4309 }, { "epoch": 0.5882352941176471, "grad_norm": 0.22192572057247162, "learning_rate": 8e-05, "loss": 1.5375, "step": 4310 }, { "epoch": 0.5883717756244029, "grad_norm": 0.21853183209896088, "learning_rate": 8e-05, "loss": 1.4609, "step": 4311 }, { "epoch": 0.5885082571311587, "grad_norm": 0.21943886578083038, "learning_rate": 8e-05, "loss": 1.4369, "step": 4312 }, { "epoch": 0.5886447386379146, "grad_norm": 0.21709951758384705, "learning_rate": 8e-05, "loss": 1.5777, "step": 4313 }, { "epoch": 0.5887812201446704, "grad_norm": 0.2150854915380478, "learning_rate": 8e-05, "loss": 1.5301, "step": 4314 }, { "epoch": 0.5889177016514262, "grad_norm": 0.21481004357337952, "learning_rate": 8e-05, "loss": 1.4756, "step": 4315 }, { "epoch": 0.589054183158182, "grad_norm": 0.20692256093025208, "learning_rate": 8e-05, "loss": 1.4759, "step": 4316 }, { "epoch": 0.5891906646649379, "grad_norm": 0.2155621349811554, "learning_rate": 8e-05, "loss": 1.4568, "step": 4317 }, { "epoch": 0.5893271461716937, "grad_norm": 0.22038252651691437, "learning_rate": 8e-05, "loss": 1.5423, "step": 4318 }, { "epoch": 0.5894636276784496, "grad_norm": 0.2143627107143402, "learning_rate": 8e-05, "loss": 1.4941, "step": 4319 }, { "epoch": 0.5896001091852054, "grad_norm": 0.21517311036586761, "learning_rate": 8e-05, "loss": 1.4454, "step": 4320 }, { "epoch": 0.5897365906919613, "grad_norm": 0.212874174118042, "learning_rate": 8e-05, "loss": 1.4637, "step": 4321 }, { "epoch": 0.5898730721987171, "grad_norm": 0.21814869344234467, "learning_rate": 8e-05, "loss": 1.4488, "step": 4322 }, { "epoch": 0.5900095537054729, "grad_norm": 0.21869415044784546, "learning_rate": 8e-05, "loss": 1.4721, "step": 4323 }, { "epoch": 0.5901460352122287, "grad_norm": 0.21262279152870178, "learning_rate": 8e-05, "loss": 1.3927, "step": 4324 }, { "epoch": 0.5902825167189846, "grad_norm": 0.20953911542892456, "learning_rate": 8e-05, "loss": 1.451, "step": 4325 }, { "epoch": 0.5904189982257404, "grad_norm": 0.21339575946331024, "learning_rate": 8e-05, "loss": 1.4435, "step": 4326 }, { "epoch": 0.5905554797324962, "grad_norm": 0.22168762981891632, "learning_rate": 8e-05, "loss": 1.5393, "step": 4327 }, { "epoch": 0.590691961239252, "grad_norm": 0.22402767837047577, "learning_rate": 8e-05, "loss": 1.4545, "step": 4328 }, { "epoch": 0.5908284427460079, "grad_norm": 0.21322095394134521, "learning_rate": 8e-05, "loss": 1.5347, "step": 4329 }, { "epoch": 0.5909649242527637, "grad_norm": 0.2209540754556656, "learning_rate": 8e-05, "loss": 1.5112, "step": 4330 }, { "epoch": 0.5911014057595196, "grad_norm": 0.2232252061367035, "learning_rate": 8e-05, "loss": 1.4771, "step": 4331 }, { "epoch": 0.5912378872662755, "grad_norm": 0.21956667304039001, "learning_rate": 8e-05, "loss": 1.4988, "step": 4332 }, { "epoch": 0.5913743687730313, "grad_norm": 0.2251167744398117, "learning_rate": 8e-05, "loss": 1.4661, "step": 4333 }, { "epoch": 0.5915108502797871, "grad_norm": 0.20992282032966614, "learning_rate": 8e-05, "loss": 1.5166, "step": 4334 }, { "epoch": 0.5916473317865429, "grad_norm": 0.20996713638305664, "learning_rate": 8e-05, "loss": 1.4228, "step": 4335 }, { "epoch": 0.5917838132932988, "grad_norm": 0.22183406352996826, "learning_rate": 8e-05, "loss": 1.4592, "step": 4336 }, { "epoch": 0.5919202948000546, "grad_norm": 0.21954160928726196, "learning_rate": 8e-05, "loss": 1.5288, "step": 4337 }, { "epoch": 0.5920567763068104, "grad_norm": 0.2262692004442215, "learning_rate": 8e-05, "loss": 1.4789, "step": 4338 }, { "epoch": 0.5921932578135662, "grad_norm": 0.2285604327917099, "learning_rate": 8e-05, "loss": 1.5031, "step": 4339 }, { "epoch": 0.5923297393203221, "grad_norm": 0.22797690331935883, "learning_rate": 8e-05, "loss": 1.5224, "step": 4340 }, { "epoch": 0.5924662208270779, "grad_norm": 0.2103288173675537, "learning_rate": 8e-05, "loss": 1.3893, "step": 4341 }, { "epoch": 0.5926027023338337, "grad_norm": 0.2192559391260147, "learning_rate": 8e-05, "loss": 1.4687, "step": 4342 }, { "epoch": 0.5927391838405895, "grad_norm": 0.21651481091976166, "learning_rate": 8e-05, "loss": 1.4397, "step": 4343 }, { "epoch": 0.5928756653473455, "grad_norm": 0.20992860198020935, "learning_rate": 8e-05, "loss": 1.4252, "step": 4344 }, { "epoch": 0.5930121468541013, "grad_norm": 0.22292640805244446, "learning_rate": 8e-05, "loss": 1.415, "step": 4345 }, { "epoch": 0.5931486283608571, "grad_norm": 0.2203783541917801, "learning_rate": 8e-05, "loss": 1.4843, "step": 4346 }, { "epoch": 0.593285109867613, "grad_norm": 0.20854653418064117, "learning_rate": 8e-05, "loss": 1.4086, "step": 4347 }, { "epoch": 0.5934215913743688, "grad_norm": 0.22053131461143494, "learning_rate": 8e-05, "loss": 1.5078, "step": 4348 }, { "epoch": 0.5935580728811246, "grad_norm": 0.21370857954025269, "learning_rate": 8e-05, "loss": 1.4635, "step": 4349 }, { "epoch": 0.5936945543878804, "grad_norm": 0.21468520164489746, "learning_rate": 8e-05, "loss": 1.5132, "step": 4350 }, { "epoch": 0.5938310358946363, "grad_norm": 0.20682238042354584, "learning_rate": 8e-05, "loss": 1.3926, "step": 4351 }, { "epoch": 0.5939675174013921, "grad_norm": 0.2136172503232956, "learning_rate": 8e-05, "loss": 1.5067, "step": 4352 }, { "epoch": 0.5941039989081479, "grad_norm": 0.21396876871585846, "learning_rate": 8e-05, "loss": 1.4278, "step": 4353 }, { "epoch": 0.5942404804149037, "grad_norm": 0.21121741831302643, "learning_rate": 8e-05, "loss": 1.4932, "step": 4354 }, { "epoch": 0.5943769619216596, "grad_norm": 0.22208641469478607, "learning_rate": 8e-05, "loss": 1.4828, "step": 4355 }, { "epoch": 0.5945134434284155, "grad_norm": 0.21309731900691986, "learning_rate": 8e-05, "loss": 1.4857, "step": 4356 }, { "epoch": 0.5946499249351713, "grad_norm": 0.21267130970954895, "learning_rate": 8e-05, "loss": 1.5325, "step": 4357 }, { "epoch": 0.5947864064419272, "grad_norm": 0.21058547496795654, "learning_rate": 8e-05, "loss": 1.4801, "step": 4358 }, { "epoch": 0.594922887948683, "grad_norm": 0.22066126763820648, "learning_rate": 8e-05, "loss": 1.5336, "step": 4359 }, { "epoch": 0.5950593694554388, "grad_norm": 0.21684888005256653, "learning_rate": 8e-05, "loss": 1.5217, "step": 4360 }, { "epoch": 0.5951958509621946, "grad_norm": 0.21596623957157135, "learning_rate": 8e-05, "loss": 1.3702, "step": 4361 }, { "epoch": 0.5953323324689505, "grad_norm": 0.21503545343875885, "learning_rate": 8e-05, "loss": 1.4561, "step": 4362 }, { "epoch": 0.5954688139757063, "grad_norm": 0.2201627790927887, "learning_rate": 8e-05, "loss": 1.4964, "step": 4363 }, { "epoch": 0.5956052954824621, "grad_norm": 0.21484950184822083, "learning_rate": 8e-05, "loss": 1.4513, "step": 4364 }, { "epoch": 0.5957417769892179, "grad_norm": 0.21879707276821136, "learning_rate": 8e-05, "loss": 1.4643, "step": 4365 }, { "epoch": 0.5958782584959738, "grad_norm": 0.21750949323177338, "learning_rate": 8e-05, "loss": 1.4572, "step": 4366 }, { "epoch": 0.5960147400027296, "grad_norm": 0.220553919672966, "learning_rate": 8e-05, "loss": 1.4999, "step": 4367 }, { "epoch": 0.5961512215094855, "grad_norm": 0.21693886816501617, "learning_rate": 8e-05, "loss": 1.4629, "step": 4368 }, { "epoch": 0.5962877030162413, "grad_norm": 0.22107239067554474, "learning_rate": 8e-05, "loss": 1.4695, "step": 4369 }, { "epoch": 0.5964241845229972, "grad_norm": 0.21459034085273743, "learning_rate": 8e-05, "loss": 1.5322, "step": 4370 }, { "epoch": 0.596560666029753, "grad_norm": 0.21887944638729095, "learning_rate": 8e-05, "loss": 1.461, "step": 4371 }, { "epoch": 0.5966971475365088, "grad_norm": 0.22586947679519653, "learning_rate": 8e-05, "loss": 1.4976, "step": 4372 }, { "epoch": 0.5968336290432646, "grad_norm": 0.21632890403270721, "learning_rate": 8e-05, "loss": 1.47, "step": 4373 }, { "epoch": 0.5969701105500205, "grad_norm": 0.22188736498355865, "learning_rate": 8e-05, "loss": 1.5414, "step": 4374 }, { "epoch": 0.5971065920567763, "grad_norm": 0.2110891044139862, "learning_rate": 8e-05, "loss": 1.4918, "step": 4375 }, { "epoch": 0.5972430735635321, "grad_norm": 0.21347954869270325, "learning_rate": 8e-05, "loss": 1.5443, "step": 4376 }, { "epoch": 0.597379555070288, "grad_norm": 0.21682076156139374, "learning_rate": 8e-05, "loss": 1.522, "step": 4377 }, { "epoch": 0.5975160365770438, "grad_norm": 0.22219955921173096, "learning_rate": 8e-05, "loss": 1.4958, "step": 4378 }, { "epoch": 0.5976525180837996, "grad_norm": 0.21607360243797302, "learning_rate": 8e-05, "loss": 1.4693, "step": 4379 }, { "epoch": 0.5977889995905554, "grad_norm": 0.21729676425457, "learning_rate": 8e-05, "loss": 1.4853, "step": 4380 }, { "epoch": 0.5979254810973114, "grad_norm": 0.2149355709552765, "learning_rate": 8e-05, "loss": 1.4832, "step": 4381 }, { "epoch": 0.5980619626040672, "grad_norm": 0.22193880379199982, "learning_rate": 8e-05, "loss": 1.4869, "step": 4382 }, { "epoch": 0.598198444110823, "grad_norm": 0.2167275995016098, "learning_rate": 8e-05, "loss": 1.4121, "step": 4383 }, { "epoch": 0.5983349256175788, "grad_norm": 0.22075915336608887, "learning_rate": 8e-05, "loss": 1.5372, "step": 4384 }, { "epoch": 0.5984714071243347, "grad_norm": 0.22093677520751953, "learning_rate": 8e-05, "loss": 1.5175, "step": 4385 }, { "epoch": 0.5986078886310905, "grad_norm": 0.2314719706773758, "learning_rate": 8e-05, "loss": 1.5699, "step": 4386 }, { "epoch": 0.5987443701378463, "grad_norm": 0.21571598947048187, "learning_rate": 8e-05, "loss": 1.4838, "step": 4387 }, { "epoch": 0.5988808516446021, "grad_norm": 0.21857315301895142, "learning_rate": 8e-05, "loss": 1.4812, "step": 4388 }, { "epoch": 0.599017333151358, "grad_norm": 0.2169128954410553, "learning_rate": 8e-05, "loss": 1.4476, "step": 4389 }, { "epoch": 0.5991538146581138, "grad_norm": 0.22191467881202698, "learning_rate": 8e-05, "loss": 1.3826, "step": 4390 }, { "epoch": 0.5992902961648696, "grad_norm": 0.2280043363571167, "learning_rate": 8e-05, "loss": 1.5041, "step": 4391 }, { "epoch": 0.5994267776716254, "grad_norm": 0.22332550585269928, "learning_rate": 8e-05, "loss": 1.4343, "step": 4392 }, { "epoch": 0.5995632591783814, "grad_norm": 0.2141445130109787, "learning_rate": 8e-05, "loss": 1.4765, "step": 4393 }, { "epoch": 0.5996997406851372, "grad_norm": 0.21590353548526764, "learning_rate": 8e-05, "loss": 1.498, "step": 4394 }, { "epoch": 0.599836222191893, "grad_norm": 0.22238901257514954, "learning_rate": 8e-05, "loss": 1.5007, "step": 4395 }, { "epoch": 0.5999727036986489, "grad_norm": 0.21094655990600586, "learning_rate": 8e-05, "loss": 1.4596, "step": 4396 }, { "epoch": 0.6001091852054047, "grad_norm": 0.22575178742408752, "learning_rate": 8e-05, "loss": 1.4867, "step": 4397 }, { "epoch": 0.6002456667121605, "grad_norm": 0.21845540404319763, "learning_rate": 8e-05, "loss": 1.4768, "step": 4398 }, { "epoch": 0.6003821482189163, "grad_norm": 0.21899642050266266, "learning_rate": 8e-05, "loss": 1.4704, "step": 4399 }, { "epoch": 0.6005186297256722, "grad_norm": 0.23280362784862518, "learning_rate": 8e-05, "loss": 1.479, "step": 4400 }, { "epoch": 0.600655111232428, "grad_norm": 0.21607975661754608, "learning_rate": 8e-05, "loss": 1.4399, "step": 4401 }, { "epoch": 0.6007915927391838, "grad_norm": 0.21461284160614014, "learning_rate": 8e-05, "loss": 1.4183, "step": 4402 }, { "epoch": 0.6009280742459396, "grad_norm": 0.22280895709991455, "learning_rate": 8e-05, "loss": 1.4653, "step": 4403 }, { "epoch": 0.6010645557526955, "grad_norm": 0.22206827998161316, "learning_rate": 8e-05, "loss": 1.4804, "step": 4404 }, { "epoch": 0.6012010372594514, "grad_norm": 0.20972760021686554, "learning_rate": 8e-05, "loss": 1.4289, "step": 4405 }, { "epoch": 0.6013375187662072, "grad_norm": 0.22166205942630768, "learning_rate": 8e-05, "loss": 1.4835, "step": 4406 }, { "epoch": 0.601474000272963, "grad_norm": 0.2218756526708603, "learning_rate": 8e-05, "loss": 1.4418, "step": 4407 }, { "epoch": 0.6016104817797189, "grad_norm": 0.20884943008422852, "learning_rate": 8e-05, "loss": 1.4343, "step": 4408 }, { "epoch": 0.6017469632864747, "grad_norm": 0.21617570519447327, "learning_rate": 8e-05, "loss": 1.4472, "step": 4409 }, { "epoch": 0.6018834447932305, "grad_norm": 0.2209385484457016, "learning_rate": 8e-05, "loss": 1.4672, "step": 4410 }, { "epoch": 0.6020199262999864, "grad_norm": 0.21622245013713837, "learning_rate": 8e-05, "loss": 1.4071, "step": 4411 }, { "epoch": 0.6021564078067422, "grad_norm": 0.21497902274131775, "learning_rate": 8e-05, "loss": 1.4801, "step": 4412 }, { "epoch": 0.602292889313498, "grad_norm": 0.2144337147474289, "learning_rate": 8e-05, "loss": 1.4656, "step": 4413 }, { "epoch": 0.6024293708202538, "grad_norm": 0.21735015511512756, "learning_rate": 8e-05, "loss": 1.5191, "step": 4414 }, { "epoch": 0.6025658523270097, "grad_norm": 0.2120286375284195, "learning_rate": 8e-05, "loss": 1.4712, "step": 4415 }, { "epoch": 0.6027023338337655, "grad_norm": 0.222615048289299, "learning_rate": 8e-05, "loss": 1.5126, "step": 4416 }, { "epoch": 0.6028388153405213, "grad_norm": 0.21790429949760437, "learning_rate": 8e-05, "loss": 1.4811, "step": 4417 }, { "epoch": 0.6029752968472772, "grad_norm": 0.2232406884431839, "learning_rate": 8e-05, "loss": 1.4829, "step": 4418 }, { "epoch": 0.6031117783540331, "grad_norm": 0.2130293995141983, "learning_rate": 8e-05, "loss": 1.4915, "step": 4419 }, { "epoch": 0.6032482598607889, "grad_norm": 0.2127717137336731, "learning_rate": 8e-05, "loss": 1.4441, "step": 4420 }, { "epoch": 0.6033847413675447, "grad_norm": 0.22728444635868073, "learning_rate": 8e-05, "loss": 1.5469, "step": 4421 }, { "epoch": 0.6035212228743005, "grad_norm": 0.21519602835178375, "learning_rate": 8e-05, "loss": 1.4317, "step": 4422 }, { "epoch": 0.6036577043810564, "grad_norm": 0.21472813189029694, "learning_rate": 8e-05, "loss": 1.4801, "step": 4423 }, { "epoch": 0.6037941858878122, "grad_norm": 0.2117292732000351, "learning_rate": 8e-05, "loss": 1.4238, "step": 4424 }, { "epoch": 0.603930667394568, "grad_norm": 0.21555130183696747, "learning_rate": 8e-05, "loss": 1.5036, "step": 4425 }, { "epoch": 0.6040671489013238, "grad_norm": 0.20432916283607483, "learning_rate": 8e-05, "loss": 1.4271, "step": 4426 }, { "epoch": 0.6042036304080797, "grad_norm": 0.22728495299816132, "learning_rate": 8e-05, "loss": 1.5311, "step": 4427 }, { "epoch": 0.6043401119148355, "grad_norm": 0.22419095039367676, "learning_rate": 8e-05, "loss": 1.458, "step": 4428 }, { "epoch": 0.6044765934215913, "grad_norm": 0.21420539915561676, "learning_rate": 8e-05, "loss": 1.4006, "step": 4429 }, { "epoch": 0.6046130749283473, "grad_norm": 0.21803054213523865, "learning_rate": 8e-05, "loss": 1.517, "step": 4430 }, { "epoch": 0.6047495564351031, "grad_norm": 0.22154615819454193, "learning_rate": 8e-05, "loss": 1.4865, "step": 4431 }, { "epoch": 0.6048860379418589, "grad_norm": 0.213043212890625, "learning_rate": 8e-05, "loss": 1.4594, "step": 4432 }, { "epoch": 0.6050225194486147, "grad_norm": 0.22341564297676086, "learning_rate": 8e-05, "loss": 1.4967, "step": 4433 }, { "epoch": 0.6051590009553706, "grad_norm": 0.21855106949806213, "learning_rate": 8e-05, "loss": 1.4474, "step": 4434 }, { "epoch": 0.6052954824621264, "grad_norm": 0.2241329550743103, "learning_rate": 8e-05, "loss": 1.4737, "step": 4435 }, { "epoch": 0.6054319639688822, "grad_norm": 0.21529099345207214, "learning_rate": 8e-05, "loss": 1.4422, "step": 4436 }, { "epoch": 0.605568445475638, "grad_norm": 0.22286510467529297, "learning_rate": 8e-05, "loss": 1.4814, "step": 4437 }, { "epoch": 0.6057049269823939, "grad_norm": 0.23753386735916138, "learning_rate": 8e-05, "loss": 1.59, "step": 4438 }, { "epoch": 0.6058414084891497, "grad_norm": 0.22039948403835297, "learning_rate": 8e-05, "loss": 1.4174, "step": 4439 }, { "epoch": 0.6059778899959055, "grad_norm": 0.22198425233364105, "learning_rate": 8e-05, "loss": 1.4452, "step": 4440 }, { "epoch": 0.6061143715026613, "grad_norm": 0.22369617223739624, "learning_rate": 8e-05, "loss": 1.4947, "step": 4441 }, { "epoch": 0.6062508530094173, "grad_norm": 0.22572718560695648, "learning_rate": 8e-05, "loss": 1.5466, "step": 4442 }, { "epoch": 0.6063873345161731, "grad_norm": 0.2198096364736557, "learning_rate": 8e-05, "loss": 1.4973, "step": 4443 }, { "epoch": 0.6065238160229289, "grad_norm": 0.22440186142921448, "learning_rate": 8e-05, "loss": 1.5005, "step": 4444 }, { "epoch": 0.6066602975296848, "grad_norm": 0.21684330701828003, "learning_rate": 8e-05, "loss": 1.447, "step": 4445 }, { "epoch": 0.6067967790364406, "grad_norm": 0.21544249355793, "learning_rate": 8e-05, "loss": 1.3919, "step": 4446 }, { "epoch": 0.6069332605431964, "grad_norm": 0.22006885707378387, "learning_rate": 8e-05, "loss": 1.5198, "step": 4447 }, { "epoch": 0.6070697420499522, "grad_norm": 0.2181139439344406, "learning_rate": 8e-05, "loss": 1.4701, "step": 4448 }, { "epoch": 0.6072062235567081, "grad_norm": 0.2256515622138977, "learning_rate": 8e-05, "loss": 1.4683, "step": 4449 }, { "epoch": 0.6073427050634639, "grad_norm": 0.2163325846195221, "learning_rate": 8e-05, "loss": 1.4897, "step": 4450 }, { "epoch": 0.6074791865702197, "grad_norm": 0.22249732911586761, "learning_rate": 8e-05, "loss": 1.488, "step": 4451 }, { "epoch": 0.6076156680769755, "grad_norm": 0.21261267364025116, "learning_rate": 8e-05, "loss": 1.3938, "step": 4452 }, { "epoch": 0.6077521495837314, "grad_norm": 0.22215509414672852, "learning_rate": 8e-05, "loss": 1.4832, "step": 4453 }, { "epoch": 0.6078886310904872, "grad_norm": 0.21968472003936768, "learning_rate": 8e-05, "loss": 1.5098, "step": 4454 }, { "epoch": 0.6080251125972431, "grad_norm": 0.22067271173000336, "learning_rate": 8e-05, "loss": 1.4487, "step": 4455 }, { "epoch": 0.608161594103999, "grad_norm": 0.21898362040519714, "learning_rate": 8e-05, "loss": 1.4703, "step": 4456 }, { "epoch": 0.6082980756107548, "grad_norm": 0.22021907567977905, "learning_rate": 8e-05, "loss": 1.4593, "step": 4457 }, { "epoch": 0.6084345571175106, "grad_norm": 0.22044025361537933, "learning_rate": 8e-05, "loss": 1.3641, "step": 4458 }, { "epoch": 0.6085710386242664, "grad_norm": 0.22148315608501434, "learning_rate": 8e-05, "loss": 1.4854, "step": 4459 }, { "epoch": 0.6087075201310223, "grad_norm": 0.2195834070444107, "learning_rate": 8e-05, "loss": 1.5054, "step": 4460 }, { "epoch": 0.6088440016377781, "grad_norm": 0.2089032232761383, "learning_rate": 8e-05, "loss": 1.3413, "step": 4461 }, { "epoch": 0.6089804831445339, "grad_norm": 0.21604180335998535, "learning_rate": 8e-05, "loss": 1.4623, "step": 4462 }, { "epoch": 0.6091169646512897, "grad_norm": 0.21705101430416107, "learning_rate": 8e-05, "loss": 1.4492, "step": 4463 }, { "epoch": 0.6092534461580456, "grad_norm": 0.22070030868053436, "learning_rate": 8e-05, "loss": 1.4829, "step": 4464 }, { "epoch": 0.6093899276648014, "grad_norm": 0.2307535707950592, "learning_rate": 8e-05, "loss": 1.58, "step": 4465 }, { "epoch": 0.6095264091715572, "grad_norm": 0.21350371837615967, "learning_rate": 8e-05, "loss": 1.4244, "step": 4466 }, { "epoch": 0.6096628906783131, "grad_norm": 0.21948514878749847, "learning_rate": 8e-05, "loss": 1.4769, "step": 4467 }, { "epoch": 0.609799372185069, "grad_norm": 0.2194800227880478, "learning_rate": 8e-05, "loss": 1.4951, "step": 4468 }, { "epoch": 0.6099358536918248, "grad_norm": 0.22375673055648804, "learning_rate": 8e-05, "loss": 1.5222, "step": 4469 }, { "epoch": 0.6100723351985806, "grad_norm": 0.22076460719108582, "learning_rate": 8e-05, "loss": 1.571, "step": 4470 }, { "epoch": 0.6102088167053364, "grad_norm": 0.21777185797691345, "learning_rate": 8e-05, "loss": 1.4867, "step": 4471 }, { "epoch": 0.6103452982120923, "grad_norm": 0.22119440138339996, "learning_rate": 8e-05, "loss": 1.4942, "step": 4472 }, { "epoch": 0.6104817797188481, "grad_norm": 0.22467420995235443, "learning_rate": 8e-05, "loss": 1.5233, "step": 4473 }, { "epoch": 0.6106182612256039, "grad_norm": 0.22084012627601624, "learning_rate": 8e-05, "loss": 1.4518, "step": 4474 }, { "epoch": 0.6107547427323597, "grad_norm": 0.21889568865299225, "learning_rate": 8e-05, "loss": 1.444, "step": 4475 }, { "epoch": 0.6108912242391156, "grad_norm": 0.20903992652893066, "learning_rate": 8e-05, "loss": 1.4685, "step": 4476 }, { "epoch": 0.6110277057458714, "grad_norm": 0.2155047059059143, "learning_rate": 8e-05, "loss": 1.503, "step": 4477 }, { "epoch": 0.6111641872526272, "grad_norm": 0.2194477915763855, "learning_rate": 8e-05, "loss": 1.4638, "step": 4478 }, { "epoch": 0.6113006687593832, "grad_norm": 0.2223316878080368, "learning_rate": 8e-05, "loss": 1.4389, "step": 4479 }, { "epoch": 0.611437150266139, "grad_norm": 0.2256895750761032, "learning_rate": 8e-05, "loss": 1.5353, "step": 4480 }, { "epoch": 0.6115736317728948, "grad_norm": 0.21991045773029327, "learning_rate": 8e-05, "loss": 1.4957, "step": 4481 }, { "epoch": 0.6117101132796506, "grad_norm": 0.24591347575187683, "learning_rate": 8e-05, "loss": 1.5205, "step": 4482 }, { "epoch": 0.6118465947864065, "grad_norm": 0.21895599365234375, "learning_rate": 8e-05, "loss": 1.5097, "step": 4483 }, { "epoch": 0.6119830762931623, "grad_norm": 0.21773697435855865, "learning_rate": 8e-05, "loss": 1.5073, "step": 4484 }, { "epoch": 0.6121195577999181, "grad_norm": 0.21382389962673187, "learning_rate": 8e-05, "loss": 1.5024, "step": 4485 }, { "epoch": 0.6122560393066739, "grad_norm": 0.21890582144260406, "learning_rate": 8e-05, "loss": 1.4283, "step": 4486 }, { "epoch": 0.6123925208134298, "grad_norm": 0.21945111453533173, "learning_rate": 8e-05, "loss": 1.5511, "step": 4487 }, { "epoch": 0.6125290023201856, "grad_norm": 0.2107498198747635, "learning_rate": 8e-05, "loss": 1.4393, "step": 4488 }, { "epoch": 0.6126654838269414, "grad_norm": 0.21651676297187805, "learning_rate": 8e-05, "loss": 1.4496, "step": 4489 }, { "epoch": 0.6128019653336972, "grad_norm": 0.21643996238708496, "learning_rate": 8e-05, "loss": 1.382, "step": 4490 }, { "epoch": 0.6129384468404531, "grad_norm": 0.23711667954921722, "learning_rate": 8e-05, "loss": 1.6005, "step": 4491 }, { "epoch": 0.613074928347209, "grad_norm": 0.22493483126163483, "learning_rate": 8e-05, "loss": 1.5201, "step": 4492 }, { "epoch": 0.6132114098539648, "grad_norm": 0.22030393779277802, "learning_rate": 8e-05, "loss": 1.4746, "step": 4493 }, { "epoch": 0.6133478913607207, "grad_norm": 0.218339741230011, "learning_rate": 8e-05, "loss": 1.5275, "step": 4494 }, { "epoch": 0.6134843728674765, "grad_norm": 0.21164961159229279, "learning_rate": 8e-05, "loss": 1.4615, "step": 4495 }, { "epoch": 0.6136208543742323, "grad_norm": 0.21988964080810547, "learning_rate": 8e-05, "loss": 1.4829, "step": 4496 }, { "epoch": 0.6137573358809881, "grad_norm": 0.21515649557113647, "learning_rate": 8e-05, "loss": 1.4605, "step": 4497 }, { "epoch": 0.613893817387744, "grad_norm": 0.22704173624515533, "learning_rate": 8e-05, "loss": 1.539, "step": 4498 }, { "epoch": 0.6140302988944998, "grad_norm": 0.21372933685779572, "learning_rate": 8e-05, "loss": 1.4904, "step": 4499 }, { "epoch": 0.6141667804012556, "grad_norm": 0.22876450419425964, "learning_rate": 8e-05, "loss": 1.5148, "step": 4500 }, { "epoch": 0.6143032619080114, "grad_norm": 0.21961663663387299, "learning_rate": 8e-05, "loss": 1.4927, "step": 4501 }, { "epoch": 0.6144397434147673, "grad_norm": 0.21080389618873596, "learning_rate": 8e-05, "loss": 1.4283, "step": 4502 }, { "epoch": 0.6145762249215231, "grad_norm": 0.2210008054971695, "learning_rate": 8e-05, "loss": 1.4788, "step": 4503 }, { "epoch": 0.614712706428279, "grad_norm": 0.21898935735225677, "learning_rate": 8e-05, "loss": 1.4297, "step": 4504 }, { "epoch": 0.6148491879350348, "grad_norm": 0.22737912833690643, "learning_rate": 8e-05, "loss": 1.5438, "step": 4505 }, { "epoch": 0.6149856694417907, "grad_norm": 0.22331592440605164, "learning_rate": 8e-05, "loss": 1.4847, "step": 4506 }, { "epoch": 0.6151221509485465, "grad_norm": 0.21980531513690948, "learning_rate": 8e-05, "loss": 1.4633, "step": 4507 }, { "epoch": 0.6152586324553023, "grad_norm": 0.22071319818496704, "learning_rate": 8e-05, "loss": 1.4865, "step": 4508 }, { "epoch": 0.6153951139620581, "grad_norm": 0.21640212833881378, "learning_rate": 8e-05, "loss": 1.4671, "step": 4509 }, { "epoch": 0.615531595468814, "grad_norm": 0.23226206004619598, "learning_rate": 8e-05, "loss": 1.5397, "step": 4510 }, { "epoch": 0.6156680769755698, "grad_norm": 0.21327665448188782, "learning_rate": 8e-05, "loss": 1.3976, "step": 4511 }, { "epoch": 0.6158045584823256, "grad_norm": 0.22597189247608185, "learning_rate": 8e-05, "loss": 1.4881, "step": 4512 }, { "epoch": 0.6159410399890815, "grad_norm": 0.21529574692249298, "learning_rate": 8e-05, "loss": 1.4985, "step": 4513 }, { "epoch": 0.6160775214958373, "grad_norm": 0.2108212113380432, "learning_rate": 8e-05, "loss": 1.4375, "step": 4514 }, { "epoch": 0.6162140030025931, "grad_norm": 0.23344916105270386, "learning_rate": 8e-05, "loss": 1.5552, "step": 4515 }, { "epoch": 0.6163504845093489, "grad_norm": 0.221527561545372, "learning_rate": 8e-05, "loss": 1.496, "step": 4516 }, { "epoch": 0.6164869660161049, "grad_norm": 0.2471136599779129, "learning_rate": 8e-05, "loss": 1.5501, "step": 4517 }, { "epoch": 0.6166234475228607, "grad_norm": 0.22684770822525024, "learning_rate": 8e-05, "loss": 1.4926, "step": 4518 }, { "epoch": 0.6167599290296165, "grad_norm": 0.2155984789133072, "learning_rate": 8e-05, "loss": 1.4549, "step": 4519 }, { "epoch": 0.6168964105363723, "grad_norm": 0.20664231479167938, "learning_rate": 8e-05, "loss": 1.4339, "step": 4520 }, { "epoch": 0.6170328920431282, "grad_norm": 0.21298521757125854, "learning_rate": 8e-05, "loss": 1.3996, "step": 4521 }, { "epoch": 0.617169373549884, "grad_norm": 0.21821504831314087, "learning_rate": 8e-05, "loss": 1.5086, "step": 4522 }, { "epoch": 0.6173058550566398, "grad_norm": 0.2191344052553177, "learning_rate": 8e-05, "loss": 1.443, "step": 4523 }, { "epoch": 0.6174423365633956, "grad_norm": 0.2219267189502716, "learning_rate": 8e-05, "loss": 1.4185, "step": 4524 }, { "epoch": 0.6175788180701515, "grad_norm": 0.23031525313854218, "learning_rate": 8e-05, "loss": 1.4623, "step": 4525 }, { "epoch": 0.6177152995769073, "grad_norm": 0.2170715481042862, "learning_rate": 8e-05, "loss": 1.4641, "step": 4526 }, { "epoch": 0.6178517810836631, "grad_norm": 0.22577206790447235, "learning_rate": 8e-05, "loss": 1.5006, "step": 4527 }, { "epoch": 0.617988262590419, "grad_norm": 0.21724019944667816, "learning_rate": 8e-05, "loss": 1.4165, "step": 4528 }, { "epoch": 0.6181247440971749, "grad_norm": 0.2155725508928299, "learning_rate": 8e-05, "loss": 1.4624, "step": 4529 }, { "epoch": 0.6182612256039307, "grad_norm": 0.2135869413614273, "learning_rate": 8e-05, "loss": 1.5414, "step": 4530 }, { "epoch": 0.6183977071106865, "grad_norm": 0.22273477911949158, "learning_rate": 8e-05, "loss": 1.4907, "step": 4531 }, { "epoch": 0.6185341886174424, "grad_norm": 0.22739556431770325, "learning_rate": 8e-05, "loss": 1.4969, "step": 4532 }, { "epoch": 0.6186706701241982, "grad_norm": 0.2174486368894577, "learning_rate": 8e-05, "loss": 1.3822, "step": 4533 }, { "epoch": 0.618807151630954, "grad_norm": 0.21750394999980927, "learning_rate": 8e-05, "loss": 1.4679, "step": 4534 }, { "epoch": 0.6189436331377098, "grad_norm": 0.21488697826862335, "learning_rate": 8e-05, "loss": 1.3563, "step": 4535 }, { "epoch": 0.6190801146444657, "grad_norm": 0.2237270623445511, "learning_rate": 8e-05, "loss": 1.5987, "step": 4536 }, { "epoch": 0.6192165961512215, "grad_norm": 0.22856594622135162, "learning_rate": 8e-05, "loss": 1.4875, "step": 4537 }, { "epoch": 0.6193530776579773, "grad_norm": 0.211374893784523, "learning_rate": 8e-05, "loss": 1.3905, "step": 4538 }, { "epoch": 0.6194895591647331, "grad_norm": 0.21623264253139496, "learning_rate": 8e-05, "loss": 1.5292, "step": 4539 }, { "epoch": 0.619626040671489, "grad_norm": 0.22048892080783844, "learning_rate": 8e-05, "loss": 1.4539, "step": 4540 }, { "epoch": 0.6197625221782449, "grad_norm": 0.2201317548751831, "learning_rate": 8e-05, "loss": 1.5519, "step": 4541 }, { "epoch": 0.6198990036850007, "grad_norm": 0.22852268815040588, "learning_rate": 8e-05, "loss": 1.5251, "step": 4542 }, { "epoch": 0.6200354851917566, "grad_norm": 0.2265300750732422, "learning_rate": 8e-05, "loss": 1.4819, "step": 4543 }, { "epoch": 0.6201719666985124, "grad_norm": 0.22713707387447357, "learning_rate": 8e-05, "loss": 1.5013, "step": 4544 }, { "epoch": 0.6203084482052682, "grad_norm": 0.21535217761993408, "learning_rate": 8e-05, "loss": 1.4675, "step": 4545 }, { "epoch": 0.620444929712024, "grad_norm": 0.21663063764572144, "learning_rate": 8e-05, "loss": 1.4429, "step": 4546 }, { "epoch": 0.6205814112187799, "grad_norm": 0.22075310349464417, "learning_rate": 8e-05, "loss": 1.482, "step": 4547 }, { "epoch": 0.6207178927255357, "grad_norm": 0.2360958307981491, "learning_rate": 8e-05, "loss": 1.4974, "step": 4548 }, { "epoch": 0.6208543742322915, "grad_norm": 0.22052042186260223, "learning_rate": 8e-05, "loss": 1.5452, "step": 4549 }, { "epoch": 0.6209908557390473, "grad_norm": 0.2116163820028305, "learning_rate": 8e-05, "loss": 1.4317, "step": 4550 }, { "epoch": 0.6211273372458032, "grad_norm": 0.2235938012599945, "learning_rate": 8e-05, "loss": 1.5809, "step": 4551 }, { "epoch": 0.621263818752559, "grad_norm": 0.21472059190273285, "learning_rate": 8e-05, "loss": 1.4508, "step": 4552 }, { "epoch": 0.6214003002593148, "grad_norm": 0.2278815060853958, "learning_rate": 8e-05, "loss": 1.5019, "step": 4553 }, { "epoch": 0.6215367817660707, "grad_norm": 0.20541225373744965, "learning_rate": 8e-05, "loss": 1.4077, "step": 4554 }, { "epoch": 0.6216732632728266, "grad_norm": 0.22905808687210083, "learning_rate": 8e-05, "loss": 1.4679, "step": 4555 }, { "epoch": 0.6218097447795824, "grad_norm": 0.2160603553056717, "learning_rate": 8e-05, "loss": 1.469, "step": 4556 }, { "epoch": 0.6219462262863382, "grad_norm": 0.22439983487129211, "learning_rate": 8e-05, "loss": 1.4951, "step": 4557 }, { "epoch": 0.622082707793094, "grad_norm": 0.22758635878562927, "learning_rate": 8e-05, "loss": 1.4272, "step": 4558 }, { "epoch": 0.6222191892998499, "grad_norm": 0.2183096706867218, "learning_rate": 8e-05, "loss": 1.406, "step": 4559 }, { "epoch": 0.6223556708066057, "grad_norm": 0.22571112215518951, "learning_rate": 8e-05, "loss": 1.471, "step": 4560 }, { "epoch": 0.6224921523133615, "grad_norm": 0.23306390643119812, "learning_rate": 8e-05, "loss": 1.5578, "step": 4561 }, { "epoch": 0.6226286338201173, "grad_norm": 0.22306998074054718, "learning_rate": 8e-05, "loss": 1.4923, "step": 4562 }, { "epoch": 0.6227651153268732, "grad_norm": 0.2155541181564331, "learning_rate": 8e-05, "loss": 1.5457, "step": 4563 }, { "epoch": 0.622901596833629, "grad_norm": 0.22404338419437408, "learning_rate": 8e-05, "loss": 1.4552, "step": 4564 }, { "epoch": 0.6230380783403848, "grad_norm": 0.21983791887760162, "learning_rate": 8e-05, "loss": 1.477, "step": 4565 }, { "epoch": 0.6231745598471408, "grad_norm": 0.22456729412078857, "learning_rate": 8e-05, "loss": 1.4743, "step": 4566 }, { "epoch": 0.6233110413538966, "grad_norm": 0.23818491399288177, "learning_rate": 8e-05, "loss": 1.4443, "step": 4567 }, { "epoch": 0.6234475228606524, "grad_norm": 0.22465217113494873, "learning_rate": 8e-05, "loss": 1.5176, "step": 4568 }, { "epoch": 0.6235840043674082, "grad_norm": 0.22717180848121643, "learning_rate": 8e-05, "loss": 1.5033, "step": 4569 }, { "epoch": 0.6237204858741641, "grad_norm": 0.21714508533477783, "learning_rate": 8e-05, "loss": 1.4591, "step": 4570 }, { "epoch": 0.6238569673809199, "grad_norm": 0.22139602899551392, "learning_rate": 8e-05, "loss": 1.4594, "step": 4571 }, { "epoch": 0.6239934488876757, "grad_norm": 0.2278294414281845, "learning_rate": 8e-05, "loss": 1.5099, "step": 4572 }, { "epoch": 0.6241299303944315, "grad_norm": 0.21526354551315308, "learning_rate": 8e-05, "loss": 1.4982, "step": 4573 }, { "epoch": 0.6242664119011874, "grad_norm": 0.22482097148895264, "learning_rate": 8e-05, "loss": 1.5189, "step": 4574 }, { "epoch": 0.6244028934079432, "grad_norm": 0.22274430096149445, "learning_rate": 8e-05, "loss": 1.3734, "step": 4575 }, { "epoch": 0.624539374914699, "grad_norm": 0.22055159509181976, "learning_rate": 8e-05, "loss": 1.3901, "step": 4576 }, { "epoch": 0.6246758564214548, "grad_norm": 0.223702535033226, "learning_rate": 8e-05, "loss": 1.4506, "step": 4577 }, { "epoch": 0.6248123379282108, "grad_norm": 0.22523395717144012, "learning_rate": 8e-05, "loss": 1.4929, "step": 4578 }, { "epoch": 0.6249488194349666, "grad_norm": 0.2230580747127533, "learning_rate": 8e-05, "loss": 1.428, "step": 4579 }, { "epoch": 0.6250853009417224, "grad_norm": 0.21592172980308533, "learning_rate": 8e-05, "loss": 1.4482, "step": 4580 }, { "epoch": 0.6252217824484783, "grad_norm": 0.21322357654571533, "learning_rate": 8e-05, "loss": 1.4407, "step": 4581 }, { "epoch": 0.6253582639552341, "grad_norm": 0.2294072061777115, "learning_rate": 8e-05, "loss": 1.4537, "step": 4582 }, { "epoch": 0.6254947454619899, "grad_norm": 0.2150132954120636, "learning_rate": 8e-05, "loss": 1.4684, "step": 4583 }, { "epoch": 0.6256312269687457, "grad_norm": 0.21548451483249664, "learning_rate": 8e-05, "loss": 1.5178, "step": 4584 }, { "epoch": 0.6257677084755016, "grad_norm": 0.2210894227027893, "learning_rate": 8e-05, "loss": 1.4739, "step": 4585 }, { "epoch": 0.6259041899822574, "grad_norm": 0.23256449401378632, "learning_rate": 8e-05, "loss": 1.5045, "step": 4586 }, { "epoch": 0.6260406714890132, "grad_norm": 0.2129555195569992, "learning_rate": 8e-05, "loss": 1.4096, "step": 4587 }, { "epoch": 0.626177152995769, "grad_norm": 0.22758153080940247, "learning_rate": 8e-05, "loss": 1.5288, "step": 4588 }, { "epoch": 0.6263136345025249, "grad_norm": 0.21438907086849213, "learning_rate": 8e-05, "loss": 1.4614, "step": 4589 }, { "epoch": 0.6264501160092807, "grad_norm": 0.2170381247997284, "learning_rate": 8e-05, "loss": 1.3728, "step": 4590 }, { "epoch": 0.6265865975160366, "grad_norm": 0.2122417390346527, "learning_rate": 8e-05, "loss": 1.4686, "step": 4591 }, { "epoch": 0.6267230790227925, "grad_norm": 0.2286798506975174, "learning_rate": 8e-05, "loss": 1.4809, "step": 4592 }, { "epoch": 0.6268595605295483, "grad_norm": 0.22717009484767914, "learning_rate": 8e-05, "loss": 1.4801, "step": 4593 }, { "epoch": 0.6269960420363041, "grad_norm": 0.22560113668441772, "learning_rate": 8e-05, "loss": 1.4603, "step": 4594 }, { "epoch": 0.6271325235430599, "grad_norm": 0.22476686537265778, "learning_rate": 8e-05, "loss": 1.4726, "step": 4595 }, { "epoch": 0.6272690050498158, "grad_norm": 0.21955826878547668, "learning_rate": 8e-05, "loss": 1.4864, "step": 4596 }, { "epoch": 0.6274054865565716, "grad_norm": 0.22009466588497162, "learning_rate": 8e-05, "loss": 1.4766, "step": 4597 }, { "epoch": 0.6275419680633274, "grad_norm": 0.22455346584320068, "learning_rate": 8e-05, "loss": 1.4874, "step": 4598 }, { "epoch": 0.6276784495700832, "grad_norm": 0.219157412648201, "learning_rate": 8e-05, "loss": 1.506, "step": 4599 }, { "epoch": 0.6278149310768391, "grad_norm": 0.2223764955997467, "learning_rate": 8e-05, "loss": 1.4518, "step": 4600 }, { "epoch": 0.6279514125835949, "grad_norm": 0.2246936410665512, "learning_rate": 8e-05, "loss": 1.454, "step": 4601 }, { "epoch": 0.6280878940903507, "grad_norm": 0.21355292201042175, "learning_rate": 8e-05, "loss": 1.4093, "step": 4602 }, { "epoch": 0.6282243755971066, "grad_norm": 0.2190273404121399, "learning_rate": 8e-05, "loss": 1.4644, "step": 4603 }, { "epoch": 0.6283608571038625, "grad_norm": 0.2212258130311966, "learning_rate": 8e-05, "loss": 1.4718, "step": 4604 }, { "epoch": 0.6284973386106183, "grad_norm": 0.22286319732666016, "learning_rate": 8e-05, "loss": 1.4929, "step": 4605 }, { "epoch": 0.6286338201173741, "grad_norm": 0.22692331671714783, "learning_rate": 8e-05, "loss": 1.3897, "step": 4606 }, { "epoch": 0.62877030162413, "grad_norm": 0.21866215765476227, "learning_rate": 8e-05, "loss": 1.4308, "step": 4607 }, { "epoch": 0.6289067831308858, "grad_norm": 0.2197587639093399, "learning_rate": 8e-05, "loss": 1.5291, "step": 4608 }, { "epoch": 0.6290432646376416, "grad_norm": 0.22517380118370056, "learning_rate": 8e-05, "loss": 1.5383, "step": 4609 }, { "epoch": 0.6291797461443974, "grad_norm": 0.21190588176250458, "learning_rate": 8e-05, "loss": 1.4744, "step": 4610 }, { "epoch": 0.6293162276511532, "grad_norm": 0.22436542809009552, "learning_rate": 8e-05, "loss": 1.445, "step": 4611 }, { "epoch": 0.6294527091579091, "grad_norm": 0.22565437853336334, "learning_rate": 8e-05, "loss": 1.5175, "step": 4612 }, { "epoch": 0.6295891906646649, "grad_norm": 0.21263591945171356, "learning_rate": 8e-05, "loss": 1.4412, "step": 4613 }, { "epoch": 0.6297256721714207, "grad_norm": 0.2213335782289505, "learning_rate": 8e-05, "loss": 1.4525, "step": 4614 }, { "epoch": 0.6298621536781767, "grad_norm": 0.22224274277687073, "learning_rate": 8e-05, "loss": 1.5425, "step": 4615 }, { "epoch": 0.6299986351849325, "grad_norm": 0.22343961894512177, "learning_rate": 8e-05, "loss": 1.5104, "step": 4616 }, { "epoch": 0.6301351166916883, "grad_norm": 0.22053085267543793, "learning_rate": 8e-05, "loss": 1.4877, "step": 4617 }, { "epoch": 0.6302715981984441, "grad_norm": 0.2179500311613083, "learning_rate": 8e-05, "loss": 1.4672, "step": 4618 }, { "epoch": 0.6304080797052, "grad_norm": 0.22118046879768372, "learning_rate": 8e-05, "loss": 1.5334, "step": 4619 }, { "epoch": 0.6305445612119558, "grad_norm": 0.2125585526227951, "learning_rate": 8e-05, "loss": 1.4509, "step": 4620 }, { "epoch": 0.6306810427187116, "grad_norm": 0.22199584543704987, "learning_rate": 8e-05, "loss": 1.4671, "step": 4621 }, { "epoch": 0.6308175242254674, "grad_norm": 0.22178277373313904, "learning_rate": 8e-05, "loss": 1.4565, "step": 4622 }, { "epoch": 0.6309540057322233, "grad_norm": 0.22410519421100616, "learning_rate": 8e-05, "loss": 1.3981, "step": 4623 }, { "epoch": 0.6310904872389791, "grad_norm": 0.2181062549352646, "learning_rate": 8e-05, "loss": 1.5205, "step": 4624 }, { "epoch": 0.6312269687457349, "grad_norm": 0.2156004160642624, "learning_rate": 8e-05, "loss": 1.4433, "step": 4625 }, { "epoch": 0.6313634502524907, "grad_norm": 0.22380991280078888, "learning_rate": 8e-05, "loss": 1.4649, "step": 4626 }, { "epoch": 0.6314999317592466, "grad_norm": 0.21817751228809357, "learning_rate": 8e-05, "loss": 1.4697, "step": 4627 }, { "epoch": 0.6316364132660025, "grad_norm": 0.21054239571094513, "learning_rate": 8e-05, "loss": 1.4228, "step": 4628 }, { "epoch": 0.6317728947727583, "grad_norm": 0.2192118763923645, "learning_rate": 8e-05, "loss": 1.5101, "step": 4629 }, { "epoch": 0.6319093762795142, "grad_norm": 0.2229197472333908, "learning_rate": 8e-05, "loss": 1.5175, "step": 4630 }, { "epoch": 0.63204585778627, "grad_norm": 0.2127682864665985, "learning_rate": 8e-05, "loss": 1.4435, "step": 4631 }, { "epoch": 0.6321823392930258, "grad_norm": 0.22397445142269135, "learning_rate": 8e-05, "loss": 1.441, "step": 4632 }, { "epoch": 0.6323188207997816, "grad_norm": 0.20974363386631012, "learning_rate": 8e-05, "loss": 1.4048, "step": 4633 }, { "epoch": 0.6324553023065375, "grad_norm": 0.22104448080062866, "learning_rate": 8e-05, "loss": 1.5217, "step": 4634 }, { "epoch": 0.6325917838132933, "grad_norm": 0.21387861669063568, "learning_rate": 8e-05, "loss": 1.4287, "step": 4635 }, { "epoch": 0.6327282653200491, "grad_norm": 0.21601299941539764, "learning_rate": 8e-05, "loss": 1.4591, "step": 4636 }, { "epoch": 0.6328647468268049, "grad_norm": 0.2263786792755127, "learning_rate": 8e-05, "loss": 1.4821, "step": 4637 }, { "epoch": 0.6330012283335608, "grad_norm": 0.22038711607456207, "learning_rate": 8e-05, "loss": 1.4461, "step": 4638 }, { "epoch": 0.6331377098403166, "grad_norm": 0.22404839098453522, "learning_rate": 8e-05, "loss": 1.4498, "step": 4639 }, { "epoch": 0.6332741913470725, "grad_norm": 0.21835957467556, "learning_rate": 8e-05, "loss": 1.4563, "step": 4640 }, { "epoch": 0.6334106728538283, "grad_norm": 0.21802298724651337, "learning_rate": 8e-05, "loss": 1.4689, "step": 4641 }, { "epoch": 0.6335471543605842, "grad_norm": 0.22251929342746735, "learning_rate": 8e-05, "loss": 1.4441, "step": 4642 }, { "epoch": 0.63368363586734, "grad_norm": 0.218058243393898, "learning_rate": 8e-05, "loss": 1.4409, "step": 4643 }, { "epoch": 0.6338201173740958, "grad_norm": 0.21673572063446045, "learning_rate": 8e-05, "loss": 1.3803, "step": 4644 }, { "epoch": 0.6339565988808517, "grad_norm": 0.21810711920261383, "learning_rate": 8e-05, "loss": 1.453, "step": 4645 }, { "epoch": 0.6340930803876075, "grad_norm": 0.22305771708488464, "learning_rate": 8e-05, "loss": 1.4995, "step": 4646 }, { "epoch": 0.6342295618943633, "grad_norm": 0.22756484150886536, "learning_rate": 8e-05, "loss": 1.5225, "step": 4647 }, { "epoch": 0.6343660434011191, "grad_norm": 0.2284999042749405, "learning_rate": 8e-05, "loss": 1.436, "step": 4648 }, { "epoch": 0.634502524907875, "grad_norm": 0.2279583066701889, "learning_rate": 8e-05, "loss": 1.4646, "step": 4649 }, { "epoch": 0.6346390064146308, "grad_norm": 0.22005701065063477, "learning_rate": 8e-05, "loss": 1.3923, "step": 4650 }, { "epoch": 0.6347754879213866, "grad_norm": 0.20984189212322235, "learning_rate": 8e-05, "loss": 1.3929, "step": 4651 }, { "epoch": 0.6349119694281425, "grad_norm": 0.22153371572494507, "learning_rate": 8e-05, "loss": 1.4896, "step": 4652 }, { "epoch": 0.6350484509348984, "grad_norm": 0.22544576227664948, "learning_rate": 8e-05, "loss": 1.4896, "step": 4653 }, { "epoch": 0.6351849324416542, "grad_norm": 0.22344543039798737, "learning_rate": 8e-05, "loss": 1.5057, "step": 4654 }, { "epoch": 0.63532141394841, "grad_norm": 0.22263257205486298, "learning_rate": 8e-05, "loss": 1.4561, "step": 4655 }, { "epoch": 0.6354578954551658, "grad_norm": 0.22542722523212433, "learning_rate": 8e-05, "loss": 1.4506, "step": 4656 }, { "epoch": 0.6355943769619217, "grad_norm": 0.22193047404289246, "learning_rate": 8e-05, "loss": 1.473, "step": 4657 }, { "epoch": 0.6357308584686775, "grad_norm": 0.22100095450878143, "learning_rate": 8e-05, "loss": 1.4293, "step": 4658 }, { "epoch": 0.6358673399754333, "grad_norm": 0.21931973099708557, "learning_rate": 8e-05, "loss": 1.4418, "step": 4659 }, { "epoch": 0.6360038214821891, "grad_norm": 0.2258632481098175, "learning_rate": 8e-05, "loss": 1.4678, "step": 4660 }, { "epoch": 0.636140302988945, "grad_norm": 0.22061054408550262, "learning_rate": 8e-05, "loss": 1.4211, "step": 4661 }, { "epoch": 0.6362767844957008, "grad_norm": 0.21946576237678528, "learning_rate": 8e-05, "loss": 1.4316, "step": 4662 }, { "epoch": 0.6364132660024566, "grad_norm": 0.23044532537460327, "learning_rate": 8e-05, "loss": 1.5135, "step": 4663 }, { "epoch": 0.6365497475092124, "grad_norm": 0.23592929542064667, "learning_rate": 8e-05, "loss": 1.4532, "step": 4664 }, { "epoch": 0.6366862290159684, "grad_norm": 0.21670588850975037, "learning_rate": 8e-05, "loss": 1.4584, "step": 4665 }, { "epoch": 0.6368227105227242, "grad_norm": 0.22482140362262726, "learning_rate": 8e-05, "loss": 1.519, "step": 4666 }, { "epoch": 0.63695919202948, "grad_norm": 0.22188323736190796, "learning_rate": 8e-05, "loss": 1.5304, "step": 4667 }, { "epoch": 0.6370956735362359, "grad_norm": 0.2268882840871811, "learning_rate": 8e-05, "loss": 1.4572, "step": 4668 }, { "epoch": 0.6372321550429917, "grad_norm": 0.21680812537670135, "learning_rate": 8e-05, "loss": 1.4597, "step": 4669 }, { "epoch": 0.6373686365497475, "grad_norm": 0.23040521144866943, "learning_rate": 8e-05, "loss": 1.505, "step": 4670 }, { "epoch": 0.6375051180565033, "grad_norm": 0.22414684295654297, "learning_rate": 8e-05, "loss": 1.4926, "step": 4671 }, { "epoch": 0.6376415995632592, "grad_norm": 0.2309904545545578, "learning_rate": 8e-05, "loss": 1.4142, "step": 4672 }, { "epoch": 0.637778081070015, "grad_norm": 0.23031876981258392, "learning_rate": 8e-05, "loss": 1.5281, "step": 4673 }, { "epoch": 0.6379145625767708, "grad_norm": 0.22511087357997894, "learning_rate": 8e-05, "loss": 1.466, "step": 4674 }, { "epoch": 0.6380510440835266, "grad_norm": 0.21712131798267365, "learning_rate": 8e-05, "loss": 1.4357, "step": 4675 }, { "epoch": 0.6381875255902825, "grad_norm": 0.2201714664697647, "learning_rate": 8e-05, "loss": 1.4442, "step": 4676 }, { "epoch": 0.6383240070970384, "grad_norm": 0.21700957417488098, "learning_rate": 8e-05, "loss": 1.4427, "step": 4677 }, { "epoch": 0.6384604886037942, "grad_norm": 0.2210516780614853, "learning_rate": 8e-05, "loss": 1.4643, "step": 4678 }, { "epoch": 0.63859697011055, "grad_norm": 0.21742641925811768, "learning_rate": 8e-05, "loss": 1.3438, "step": 4679 }, { "epoch": 0.6387334516173059, "grad_norm": 0.23449508845806122, "learning_rate": 8e-05, "loss": 1.4639, "step": 4680 }, { "epoch": 0.6388699331240617, "grad_norm": 0.22773809731006622, "learning_rate": 8e-05, "loss": 1.4779, "step": 4681 }, { "epoch": 0.6390064146308175, "grad_norm": 0.2365875095129013, "learning_rate": 8e-05, "loss": 1.4438, "step": 4682 }, { "epoch": 0.6391428961375734, "grad_norm": 0.21995630860328674, "learning_rate": 8e-05, "loss": 1.4766, "step": 4683 }, { "epoch": 0.6392793776443292, "grad_norm": 0.2281271070241928, "learning_rate": 8e-05, "loss": 1.5531, "step": 4684 }, { "epoch": 0.639415859151085, "grad_norm": 0.22183838486671448, "learning_rate": 8e-05, "loss": 1.4539, "step": 4685 }, { "epoch": 0.6395523406578408, "grad_norm": 0.21777133643627167, "learning_rate": 8e-05, "loss": 1.4944, "step": 4686 }, { "epoch": 0.6396888221645967, "grad_norm": 0.222051739692688, "learning_rate": 8e-05, "loss": 1.4529, "step": 4687 }, { "epoch": 0.6398253036713525, "grad_norm": 0.2328973412513733, "learning_rate": 8e-05, "loss": 1.4593, "step": 4688 }, { "epoch": 0.6399617851781083, "grad_norm": 0.22004377841949463, "learning_rate": 8e-05, "loss": 1.5062, "step": 4689 }, { "epoch": 0.6400982666848642, "grad_norm": 0.23322167992591858, "learning_rate": 8e-05, "loss": 1.5073, "step": 4690 }, { "epoch": 0.6402347481916201, "grad_norm": 0.22014138102531433, "learning_rate": 8e-05, "loss": 1.5008, "step": 4691 }, { "epoch": 0.6403712296983759, "grad_norm": 0.22135215997695923, "learning_rate": 8e-05, "loss": 1.4516, "step": 4692 }, { "epoch": 0.6405077112051317, "grad_norm": 0.22722910344600677, "learning_rate": 8e-05, "loss": 1.4994, "step": 4693 }, { "epoch": 0.6406441927118876, "grad_norm": 0.2225387543439865, "learning_rate": 8e-05, "loss": 1.487, "step": 4694 }, { "epoch": 0.6407806742186434, "grad_norm": 0.2370699942111969, "learning_rate": 8e-05, "loss": 1.5507, "step": 4695 }, { "epoch": 0.6409171557253992, "grad_norm": 0.23134349286556244, "learning_rate": 8e-05, "loss": 1.4559, "step": 4696 }, { "epoch": 0.641053637232155, "grad_norm": 0.21570952236652374, "learning_rate": 8e-05, "loss": 1.4273, "step": 4697 }, { "epoch": 0.6411901187389109, "grad_norm": 0.2156054973602295, "learning_rate": 8e-05, "loss": 1.4612, "step": 4698 }, { "epoch": 0.6413266002456667, "grad_norm": 0.23077009618282318, "learning_rate": 8e-05, "loss": 1.5153, "step": 4699 }, { "epoch": 0.6414630817524225, "grad_norm": 0.2185460478067398, "learning_rate": 8e-05, "loss": 1.4978, "step": 4700 }, { "epoch": 0.6415995632591783, "grad_norm": 0.22641827166080475, "learning_rate": 8e-05, "loss": 1.4825, "step": 4701 }, { "epoch": 0.6417360447659343, "grad_norm": 0.21381402015686035, "learning_rate": 8e-05, "loss": 1.438, "step": 4702 }, { "epoch": 0.6418725262726901, "grad_norm": 0.21371695399284363, "learning_rate": 8e-05, "loss": 1.4763, "step": 4703 }, { "epoch": 0.6420090077794459, "grad_norm": 0.21719419956207275, "learning_rate": 8e-05, "loss": 1.4404, "step": 4704 }, { "epoch": 0.6421454892862017, "grad_norm": 0.22064515948295593, "learning_rate": 8e-05, "loss": 1.484, "step": 4705 }, { "epoch": 0.6422819707929576, "grad_norm": 0.2249942421913147, "learning_rate": 8e-05, "loss": 1.4689, "step": 4706 }, { "epoch": 0.6424184522997134, "grad_norm": 0.22294658422470093, "learning_rate": 8e-05, "loss": 1.4685, "step": 4707 }, { "epoch": 0.6425549338064692, "grad_norm": 0.21578864753246307, "learning_rate": 8e-05, "loss": 1.4742, "step": 4708 }, { "epoch": 0.642691415313225, "grad_norm": 0.22582624852657318, "learning_rate": 8e-05, "loss": 1.4423, "step": 4709 }, { "epoch": 0.6428278968199809, "grad_norm": 0.22269436717033386, "learning_rate": 8e-05, "loss": 1.5331, "step": 4710 }, { "epoch": 0.6429643783267367, "grad_norm": 0.21985948085784912, "learning_rate": 8e-05, "loss": 1.5198, "step": 4711 }, { "epoch": 0.6431008598334925, "grad_norm": 0.21801301836967468, "learning_rate": 8e-05, "loss": 1.4797, "step": 4712 }, { "epoch": 0.6432373413402483, "grad_norm": 0.22284044325351715, "learning_rate": 8e-05, "loss": 1.4297, "step": 4713 }, { "epoch": 0.6433738228470043, "grad_norm": 0.21237695217132568, "learning_rate": 8e-05, "loss": 1.4998, "step": 4714 }, { "epoch": 0.6435103043537601, "grad_norm": 0.22127491235733032, "learning_rate": 8e-05, "loss": 1.4805, "step": 4715 }, { "epoch": 0.6436467858605159, "grad_norm": 0.2248886674642563, "learning_rate": 8e-05, "loss": 1.4469, "step": 4716 }, { "epoch": 0.6437832673672718, "grad_norm": 0.2306990623474121, "learning_rate": 8e-05, "loss": 1.5114, "step": 4717 }, { "epoch": 0.6439197488740276, "grad_norm": 0.2188940793275833, "learning_rate": 8e-05, "loss": 1.4716, "step": 4718 }, { "epoch": 0.6440562303807834, "grad_norm": 0.22470921277999878, "learning_rate": 8e-05, "loss": 1.4815, "step": 4719 }, { "epoch": 0.6441927118875392, "grad_norm": 0.21132726967334747, "learning_rate": 8e-05, "loss": 1.4207, "step": 4720 }, { "epoch": 0.6443291933942951, "grad_norm": 0.222573921084404, "learning_rate": 8e-05, "loss": 1.4544, "step": 4721 }, { "epoch": 0.6444656749010509, "grad_norm": 0.22469067573547363, "learning_rate": 8e-05, "loss": 1.5275, "step": 4722 }, { "epoch": 0.6446021564078067, "grad_norm": 0.21866431832313538, "learning_rate": 8e-05, "loss": 1.3667, "step": 4723 }, { "epoch": 0.6447386379145625, "grad_norm": 0.22790862619876862, "learning_rate": 8e-05, "loss": 1.3955, "step": 4724 }, { "epoch": 0.6448751194213184, "grad_norm": 0.22355790436267853, "learning_rate": 8e-05, "loss": 1.4696, "step": 4725 }, { "epoch": 0.6450116009280742, "grad_norm": 0.22280742228031158, "learning_rate": 8e-05, "loss": 1.467, "step": 4726 }, { "epoch": 0.6451480824348301, "grad_norm": 0.22323842346668243, "learning_rate": 8e-05, "loss": 1.4484, "step": 4727 }, { "epoch": 0.645284563941586, "grad_norm": 0.22016021609306335, "learning_rate": 8e-05, "loss": 1.477, "step": 4728 }, { "epoch": 0.6454210454483418, "grad_norm": 0.21709200739860535, "learning_rate": 8e-05, "loss": 1.462, "step": 4729 }, { "epoch": 0.6455575269550976, "grad_norm": 0.22512587904930115, "learning_rate": 8e-05, "loss": 1.5285, "step": 4730 }, { "epoch": 0.6456940084618534, "grad_norm": 0.22144664824008942, "learning_rate": 8e-05, "loss": 1.4605, "step": 4731 }, { "epoch": 0.6458304899686093, "grad_norm": 0.22873267531394958, "learning_rate": 8e-05, "loss": 1.5313, "step": 4732 }, { "epoch": 0.6459669714753651, "grad_norm": 0.227690652012825, "learning_rate": 8e-05, "loss": 1.478, "step": 4733 }, { "epoch": 0.6461034529821209, "grad_norm": 0.22937248647212982, "learning_rate": 8e-05, "loss": 1.4923, "step": 4734 }, { "epoch": 0.6462399344888767, "grad_norm": 0.22020775079727173, "learning_rate": 8e-05, "loss": 1.4465, "step": 4735 }, { "epoch": 0.6463764159956326, "grad_norm": 0.2242947220802307, "learning_rate": 8e-05, "loss": 1.5092, "step": 4736 }, { "epoch": 0.6465128975023884, "grad_norm": 0.2241961508989334, "learning_rate": 8e-05, "loss": 1.4654, "step": 4737 }, { "epoch": 0.6466493790091442, "grad_norm": 0.21888315677642822, "learning_rate": 8e-05, "loss": 1.5085, "step": 4738 }, { "epoch": 0.6467858605159001, "grad_norm": 0.2144775241613388, "learning_rate": 8e-05, "loss": 1.4598, "step": 4739 }, { "epoch": 0.646922342022656, "grad_norm": 0.22623242437839508, "learning_rate": 8e-05, "loss": 1.4952, "step": 4740 }, { "epoch": 0.6470588235294118, "grad_norm": 0.21717987954616547, "learning_rate": 8e-05, "loss": 1.4793, "step": 4741 }, { "epoch": 0.6471953050361676, "grad_norm": 0.24406373500823975, "learning_rate": 8e-05, "loss": 1.526, "step": 4742 }, { "epoch": 0.6473317865429234, "grad_norm": 0.22075706720352173, "learning_rate": 8e-05, "loss": 1.4362, "step": 4743 }, { "epoch": 0.6474682680496793, "grad_norm": 0.22959192097187042, "learning_rate": 8e-05, "loss": 1.5377, "step": 4744 }, { "epoch": 0.6476047495564351, "grad_norm": 0.22353392839431763, "learning_rate": 8e-05, "loss": 1.4983, "step": 4745 }, { "epoch": 0.6477412310631909, "grad_norm": 0.23261985182762146, "learning_rate": 8e-05, "loss": 1.5005, "step": 4746 }, { "epoch": 0.6478777125699468, "grad_norm": 0.24176758527755737, "learning_rate": 8e-05, "loss": 1.5186, "step": 4747 }, { "epoch": 0.6480141940767026, "grad_norm": 0.22109319269657135, "learning_rate": 8e-05, "loss": 1.479, "step": 4748 }, { "epoch": 0.6481506755834584, "grad_norm": 0.23933464288711548, "learning_rate": 8e-05, "loss": 1.5178, "step": 4749 }, { "epoch": 0.6482871570902142, "grad_norm": 0.2160293161869049, "learning_rate": 8e-05, "loss": 1.4483, "step": 4750 }, { "epoch": 0.6484236385969702, "grad_norm": 0.22686642408370972, "learning_rate": 8e-05, "loss": 1.4975, "step": 4751 }, { "epoch": 0.648560120103726, "grad_norm": 0.21621020138263702, "learning_rate": 8e-05, "loss": 1.4265, "step": 4752 }, { "epoch": 0.6486966016104818, "grad_norm": 0.23167496919631958, "learning_rate": 8e-05, "loss": 1.5256, "step": 4753 }, { "epoch": 0.6488330831172376, "grad_norm": 0.22529476881027222, "learning_rate": 8e-05, "loss": 1.4641, "step": 4754 }, { "epoch": 0.6489695646239935, "grad_norm": 0.2205408811569214, "learning_rate": 8e-05, "loss": 1.4231, "step": 4755 }, { "epoch": 0.6491060461307493, "grad_norm": 0.22217302024364471, "learning_rate": 8e-05, "loss": 1.4837, "step": 4756 }, { "epoch": 0.6492425276375051, "grad_norm": 0.22462791204452515, "learning_rate": 8e-05, "loss": 1.4787, "step": 4757 }, { "epoch": 0.6493790091442609, "grad_norm": 0.23155753314495087, "learning_rate": 8e-05, "loss": 1.5147, "step": 4758 }, { "epoch": 0.6495154906510168, "grad_norm": 0.22752583026885986, "learning_rate": 8e-05, "loss": 1.5061, "step": 4759 }, { "epoch": 0.6496519721577726, "grad_norm": 0.2195422500371933, "learning_rate": 8e-05, "loss": 1.4383, "step": 4760 }, { "epoch": 0.6497884536645284, "grad_norm": 0.21294036507606506, "learning_rate": 8e-05, "loss": 1.4774, "step": 4761 }, { "epoch": 0.6499249351712842, "grad_norm": 0.21663227677345276, "learning_rate": 8e-05, "loss": 1.453, "step": 4762 }, { "epoch": 0.6500614166780401, "grad_norm": 0.2227342128753662, "learning_rate": 8e-05, "loss": 1.4878, "step": 4763 }, { "epoch": 0.650197898184796, "grad_norm": 0.23289898037910461, "learning_rate": 8e-05, "loss": 1.5197, "step": 4764 }, { "epoch": 0.6503343796915518, "grad_norm": 0.22494392096996307, "learning_rate": 8e-05, "loss": 1.51, "step": 4765 }, { "epoch": 0.6504708611983077, "grad_norm": 0.23340477049350739, "learning_rate": 8e-05, "loss": 1.4856, "step": 4766 }, { "epoch": 0.6506073427050635, "grad_norm": 0.21949252486228943, "learning_rate": 8e-05, "loss": 1.5001, "step": 4767 }, { "epoch": 0.6507438242118193, "grad_norm": 0.2134321630001068, "learning_rate": 8e-05, "loss": 1.3509, "step": 4768 }, { "epoch": 0.6508803057185751, "grad_norm": 0.2202940136194229, "learning_rate": 8e-05, "loss": 1.481, "step": 4769 }, { "epoch": 0.651016787225331, "grad_norm": 0.22476086020469666, "learning_rate": 8e-05, "loss": 1.4897, "step": 4770 }, { "epoch": 0.6511532687320868, "grad_norm": 0.22764909267425537, "learning_rate": 8e-05, "loss": 1.5429, "step": 4771 }, { "epoch": 0.6512897502388426, "grad_norm": 0.22695961594581604, "learning_rate": 8e-05, "loss": 1.4858, "step": 4772 }, { "epoch": 0.6514262317455984, "grad_norm": 0.22650068998336792, "learning_rate": 8e-05, "loss": 1.4072, "step": 4773 }, { "epoch": 0.6515627132523543, "grad_norm": 0.21934522688388824, "learning_rate": 8e-05, "loss": 1.4634, "step": 4774 }, { "epoch": 0.6516991947591101, "grad_norm": 0.21516390144824982, "learning_rate": 8e-05, "loss": 1.3712, "step": 4775 }, { "epoch": 0.651835676265866, "grad_norm": 0.22440971434116364, "learning_rate": 8e-05, "loss": 1.4333, "step": 4776 }, { "epoch": 0.6519721577726219, "grad_norm": 0.23376627266407013, "learning_rate": 8e-05, "loss": 1.5338, "step": 4777 }, { "epoch": 0.6521086392793777, "grad_norm": 0.22055548429489136, "learning_rate": 8e-05, "loss": 1.4462, "step": 4778 }, { "epoch": 0.6522451207861335, "grad_norm": 0.2215365320444107, "learning_rate": 8e-05, "loss": 1.4301, "step": 4779 }, { "epoch": 0.6523816022928893, "grad_norm": 0.21756118535995483, "learning_rate": 8e-05, "loss": 1.4187, "step": 4780 }, { "epoch": 0.6525180837996452, "grad_norm": 0.2270948588848114, "learning_rate": 8e-05, "loss": 1.5665, "step": 4781 }, { "epoch": 0.652654565306401, "grad_norm": 0.23348069190979004, "learning_rate": 8e-05, "loss": 1.506, "step": 4782 }, { "epoch": 0.6527910468131568, "grad_norm": 0.2224881798028946, "learning_rate": 8e-05, "loss": 1.4738, "step": 4783 }, { "epoch": 0.6529275283199126, "grad_norm": 0.22335462272167206, "learning_rate": 8e-05, "loss": 1.4998, "step": 4784 }, { "epoch": 0.6530640098266685, "grad_norm": 0.22532108426094055, "learning_rate": 8e-05, "loss": 1.4779, "step": 4785 }, { "epoch": 0.6532004913334243, "grad_norm": 0.21094249188899994, "learning_rate": 8e-05, "loss": 1.4098, "step": 4786 }, { "epoch": 0.6533369728401801, "grad_norm": 0.22443215548992157, "learning_rate": 8e-05, "loss": 1.4953, "step": 4787 }, { "epoch": 0.653473454346936, "grad_norm": 0.21294505894184113, "learning_rate": 8e-05, "loss": 1.4089, "step": 4788 }, { "epoch": 0.6536099358536919, "grad_norm": 0.22449451684951782, "learning_rate": 8e-05, "loss": 1.5627, "step": 4789 }, { "epoch": 0.6537464173604477, "grad_norm": 0.22543059289455414, "learning_rate": 8e-05, "loss": 1.4943, "step": 4790 }, { "epoch": 0.6538828988672035, "grad_norm": 0.225270077586174, "learning_rate": 8e-05, "loss": 1.3823, "step": 4791 }, { "epoch": 0.6540193803739593, "grad_norm": 0.223805770277977, "learning_rate": 8e-05, "loss": 1.4371, "step": 4792 }, { "epoch": 0.6541558618807152, "grad_norm": 0.22868068516254425, "learning_rate": 8e-05, "loss": 1.5025, "step": 4793 }, { "epoch": 0.654292343387471, "grad_norm": 0.21598564088344574, "learning_rate": 8e-05, "loss": 1.4061, "step": 4794 }, { "epoch": 0.6544288248942268, "grad_norm": 0.21698512136936188, "learning_rate": 8e-05, "loss": 1.4803, "step": 4795 }, { "epoch": 0.6545653064009826, "grad_norm": 0.216933935880661, "learning_rate": 8e-05, "loss": 1.4623, "step": 4796 }, { "epoch": 0.6547017879077385, "grad_norm": 0.21854951977729797, "learning_rate": 8e-05, "loss": 1.4745, "step": 4797 }, { "epoch": 0.6548382694144943, "grad_norm": 0.21450889110565186, "learning_rate": 8e-05, "loss": 1.4108, "step": 4798 }, { "epoch": 0.6549747509212501, "grad_norm": 0.22637712955474854, "learning_rate": 8e-05, "loss": 1.4848, "step": 4799 }, { "epoch": 0.655111232428006, "grad_norm": 0.22508609294891357, "learning_rate": 8e-05, "loss": 1.4222, "step": 4800 }, { "epoch": 0.6552477139347619, "grad_norm": 0.22846727073192596, "learning_rate": 8e-05, "loss": 1.4767, "step": 4801 }, { "epoch": 0.6553841954415177, "grad_norm": 0.22929659485816956, "learning_rate": 8e-05, "loss": 1.4331, "step": 4802 }, { "epoch": 0.6555206769482735, "grad_norm": 0.22304792702198029, "learning_rate": 8e-05, "loss": 1.4908, "step": 4803 }, { "epoch": 0.6556571584550294, "grad_norm": 0.22466646134853363, "learning_rate": 8e-05, "loss": 1.4804, "step": 4804 }, { "epoch": 0.6557936399617852, "grad_norm": 0.226219043135643, "learning_rate": 8e-05, "loss": 1.5297, "step": 4805 }, { "epoch": 0.655930121468541, "grad_norm": 0.23656295239925385, "learning_rate": 8e-05, "loss": 1.5041, "step": 4806 }, { "epoch": 0.6560666029752968, "grad_norm": 0.2190861701965332, "learning_rate": 8e-05, "loss": 1.4372, "step": 4807 }, { "epoch": 0.6562030844820527, "grad_norm": 0.22630518674850464, "learning_rate": 8e-05, "loss": 1.4726, "step": 4808 }, { "epoch": 0.6563395659888085, "grad_norm": 0.21447741985321045, "learning_rate": 8e-05, "loss": 1.4116, "step": 4809 }, { "epoch": 0.6564760474955643, "grad_norm": 0.22013404965400696, "learning_rate": 8e-05, "loss": 1.445, "step": 4810 }, { "epoch": 0.6566125290023201, "grad_norm": 0.2186201810836792, "learning_rate": 8e-05, "loss": 1.4337, "step": 4811 }, { "epoch": 0.656749010509076, "grad_norm": 0.23519742488861084, "learning_rate": 8e-05, "loss": 1.5156, "step": 4812 }, { "epoch": 0.6568854920158319, "grad_norm": 0.21780787408351898, "learning_rate": 8e-05, "loss": 1.4833, "step": 4813 }, { "epoch": 0.6570219735225877, "grad_norm": 0.2263040691614151, "learning_rate": 8e-05, "loss": 1.4626, "step": 4814 }, { "epoch": 0.6571584550293436, "grad_norm": 0.2249605506658554, "learning_rate": 8e-05, "loss": 1.4738, "step": 4815 }, { "epoch": 0.6572949365360994, "grad_norm": 0.22105370461940765, "learning_rate": 8e-05, "loss": 1.5071, "step": 4816 }, { "epoch": 0.6574314180428552, "grad_norm": 0.2265872061252594, "learning_rate": 8e-05, "loss": 1.534, "step": 4817 }, { "epoch": 0.657567899549611, "grad_norm": 0.22143472731113434, "learning_rate": 8e-05, "loss": 1.4474, "step": 4818 }, { "epoch": 0.6577043810563669, "grad_norm": 0.21892483532428741, "learning_rate": 8e-05, "loss": 1.4338, "step": 4819 }, { "epoch": 0.6578408625631227, "grad_norm": 0.22374549508094788, "learning_rate": 8e-05, "loss": 1.4352, "step": 4820 }, { "epoch": 0.6579773440698785, "grad_norm": 0.22548454999923706, "learning_rate": 8e-05, "loss": 1.4356, "step": 4821 }, { "epoch": 0.6581138255766343, "grad_norm": 0.22463259100914001, "learning_rate": 8e-05, "loss": 1.4787, "step": 4822 }, { "epoch": 0.6582503070833902, "grad_norm": 0.22149738669395447, "learning_rate": 8e-05, "loss": 1.462, "step": 4823 }, { "epoch": 0.658386788590146, "grad_norm": 0.21920955181121826, "learning_rate": 8e-05, "loss": 1.4818, "step": 4824 }, { "epoch": 0.6585232700969019, "grad_norm": 0.22516049444675446, "learning_rate": 8e-05, "loss": 1.4442, "step": 4825 }, { "epoch": 0.6586597516036578, "grad_norm": 0.21563005447387695, "learning_rate": 8e-05, "loss": 1.4867, "step": 4826 }, { "epoch": 0.6587962331104136, "grad_norm": 0.22993803024291992, "learning_rate": 8e-05, "loss": 1.3885, "step": 4827 }, { "epoch": 0.6589327146171694, "grad_norm": 0.24025046825408936, "learning_rate": 8e-05, "loss": 1.4681, "step": 4828 }, { "epoch": 0.6590691961239252, "grad_norm": 0.22974734008312225, "learning_rate": 8e-05, "loss": 1.4857, "step": 4829 }, { "epoch": 0.659205677630681, "grad_norm": 0.2239166498184204, "learning_rate": 8e-05, "loss": 1.4518, "step": 4830 }, { "epoch": 0.6593421591374369, "grad_norm": 0.22037450969219208, "learning_rate": 8e-05, "loss": 1.3848, "step": 4831 }, { "epoch": 0.6594786406441927, "grad_norm": 0.22467266023159027, "learning_rate": 8e-05, "loss": 1.4436, "step": 4832 }, { "epoch": 0.6596151221509485, "grad_norm": 0.22956858575344086, "learning_rate": 8e-05, "loss": 1.4979, "step": 4833 }, { "epoch": 0.6597516036577044, "grad_norm": 0.23247022926807404, "learning_rate": 8e-05, "loss": 1.5004, "step": 4834 }, { "epoch": 0.6598880851644602, "grad_norm": 0.2317829430103302, "learning_rate": 8e-05, "loss": 1.5092, "step": 4835 }, { "epoch": 0.660024566671216, "grad_norm": 0.2160494178533554, "learning_rate": 8e-05, "loss": 1.3702, "step": 4836 }, { "epoch": 0.6601610481779718, "grad_norm": 0.22596830129623413, "learning_rate": 8e-05, "loss": 1.4501, "step": 4837 }, { "epoch": 0.6602975296847278, "grad_norm": 0.22802773118019104, "learning_rate": 8e-05, "loss": 1.4462, "step": 4838 }, { "epoch": 0.6604340111914836, "grad_norm": 0.21835008263587952, "learning_rate": 8e-05, "loss": 1.4373, "step": 4839 }, { "epoch": 0.6605704926982394, "grad_norm": 0.22111277282238007, "learning_rate": 8e-05, "loss": 1.4654, "step": 4840 }, { "epoch": 0.6607069742049952, "grad_norm": 0.22188541293144226, "learning_rate": 8e-05, "loss": 1.5038, "step": 4841 }, { "epoch": 0.6608434557117511, "grad_norm": 0.22060491144657135, "learning_rate": 8e-05, "loss": 1.5301, "step": 4842 }, { "epoch": 0.6609799372185069, "grad_norm": 0.22839945554733276, "learning_rate": 8e-05, "loss": 1.4606, "step": 4843 }, { "epoch": 0.6611164187252627, "grad_norm": 0.22106820344924927, "learning_rate": 8e-05, "loss": 1.4646, "step": 4844 }, { "epoch": 0.6612529002320185, "grad_norm": 0.22889405488967896, "learning_rate": 8e-05, "loss": 1.5057, "step": 4845 }, { "epoch": 0.6613893817387744, "grad_norm": 0.21413250267505646, "learning_rate": 8e-05, "loss": 1.4267, "step": 4846 }, { "epoch": 0.6615258632455302, "grad_norm": 0.2106185257434845, "learning_rate": 8e-05, "loss": 1.3816, "step": 4847 }, { "epoch": 0.661662344752286, "grad_norm": 0.22740787267684937, "learning_rate": 8e-05, "loss": 1.5045, "step": 4848 }, { "epoch": 0.6617988262590419, "grad_norm": 0.22585178911685944, "learning_rate": 8e-05, "loss": 1.468, "step": 4849 }, { "epoch": 0.6619353077657978, "grad_norm": 0.22686122357845306, "learning_rate": 8e-05, "loss": 1.4854, "step": 4850 }, { "epoch": 0.6620717892725536, "grad_norm": 0.21957051753997803, "learning_rate": 8e-05, "loss": 1.4433, "step": 4851 }, { "epoch": 0.6622082707793094, "grad_norm": 0.23281526565551758, "learning_rate": 8e-05, "loss": 1.5103, "step": 4852 }, { "epoch": 0.6623447522860653, "grad_norm": 0.2223740816116333, "learning_rate": 8e-05, "loss": 1.5012, "step": 4853 }, { "epoch": 0.6624812337928211, "grad_norm": 0.22596101462841034, "learning_rate": 8e-05, "loss": 1.476, "step": 4854 }, { "epoch": 0.6626177152995769, "grad_norm": 0.22248028218746185, "learning_rate": 8e-05, "loss": 1.4358, "step": 4855 }, { "epoch": 0.6627541968063327, "grad_norm": 0.22292497754096985, "learning_rate": 8e-05, "loss": 1.4639, "step": 4856 }, { "epoch": 0.6628906783130886, "grad_norm": 0.2175852358341217, "learning_rate": 8e-05, "loss": 1.4742, "step": 4857 }, { "epoch": 0.6630271598198444, "grad_norm": 0.22351761162281036, "learning_rate": 8e-05, "loss": 1.4057, "step": 4858 }, { "epoch": 0.6631636413266002, "grad_norm": 0.22377492487430573, "learning_rate": 8e-05, "loss": 1.4806, "step": 4859 }, { "epoch": 0.663300122833356, "grad_norm": 0.22567082941532135, "learning_rate": 8e-05, "loss": 1.4576, "step": 4860 }, { "epoch": 0.6634366043401119, "grad_norm": 0.2307164967060089, "learning_rate": 8e-05, "loss": 1.5273, "step": 4861 }, { "epoch": 0.6635730858468677, "grad_norm": 0.2237037569284439, "learning_rate": 8e-05, "loss": 1.439, "step": 4862 }, { "epoch": 0.6637095673536236, "grad_norm": 0.22361455857753754, "learning_rate": 8e-05, "loss": 1.4173, "step": 4863 }, { "epoch": 0.6638460488603795, "grad_norm": 0.22322291135787964, "learning_rate": 8e-05, "loss": 1.4929, "step": 4864 }, { "epoch": 0.6639825303671353, "grad_norm": 0.2231309711933136, "learning_rate": 8e-05, "loss": 1.4427, "step": 4865 }, { "epoch": 0.6641190118738911, "grad_norm": 0.22826983034610748, "learning_rate": 8e-05, "loss": 1.4418, "step": 4866 }, { "epoch": 0.6642554933806469, "grad_norm": 0.22191420197486877, "learning_rate": 8e-05, "loss": 1.4785, "step": 4867 }, { "epoch": 0.6643919748874028, "grad_norm": 0.22914348542690277, "learning_rate": 8e-05, "loss": 1.5048, "step": 4868 }, { "epoch": 0.6645284563941586, "grad_norm": 0.22400781512260437, "learning_rate": 8e-05, "loss": 1.4784, "step": 4869 }, { "epoch": 0.6646649379009144, "grad_norm": 0.22684012353420258, "learning_rate": 8e-05, "loss": 1.4736, "step": 4870 }, { "epoch": 0.6648014194076702, "grad_norm": 0.2189878672361374, "learning_rate": 8e-05, "loss": 1.4344, "step": 4871 }, { "epoch": 0.6649379009144261, "grad_norm": 0.21996714174747467, "learning_rate": 8e-05, "loss": 1.3952, "step": 4872 }, { "epoch": 0.6650743824211819, "grad_norm": 0.22384747862815857, "learning_rate": 8e-05, "loss": 1.4758, "step": 4873 }, { "epoch": 0.6652108639279377, "grad_norm": 0.22900566458702087, "learning_rate": 8e-05, "loss": 1.4313, "step": 4874 }, { "epoch": 0.6653473454346936, "grad_norm": 0.2260541319847107, "learning_rate": 8e-05, "loss": 1.4314, "step": 4875 }, { "epoch": 0.6654838269414495, "grad_norm": 0.22457273304462433, "learning_rate": 8e-05, "loss": 1.458, "step": 4876 }, { "epoch": 0.6656203084482053, "grad_norm": 0.2243596464395523, "learning_rate": 8e-05, "loss": 1.4496, "step": 4877 }, { "epoch": 0.6657567899549611, "grad_norm": 0.22972197830677032, "learning_rate": 8e-05, "loss": 1.4549, "step": 4878 }, { "epoch": 0.665893271461717, "grad_norm": 0.22231552004814148, "learning_rate": 8e-05, "loss": 1.495, "step": 4879 }, { "epoch": 0.6660297529684728, "grad_norm": 0.2262486070394516, "learning_rate": 8e-05, "loss": 1.4297, "step": 4880 }, { "epoch": 0.6661662344752286, "grad_norm": 0.22087006270885468, "learning_rate": 8e-05, "loss": 1.454, "step": 4881 }, { "epoch": 0.6663027159819844, "grad_norm": 0.2140192836523056, "learning_rate": 8e-05, "loss": 1.4475, "step": 4882 }, { "epoch": 0.6664391974887403, "grad_norm": 0.22473061084747314, "learning_rate": 8e-05, "loss": 1.4418, "step": 4883 }, { "epoch": 0.6665756789954961, "grad_norm": 0.22641408443450928, "learning_rate": 8e-05, "loss": 1.3948, "step": 4884 }, { "epoch": 0.6667121605022519, "grad_norm": 0.21617218852043152, "learning_rate": 8e-05, "loss": 1.4503, "step": 4885 }, { "epoch": 0.6668486420090077, "grad_norm": 0.2311301976442337, "learning_rate": 8e-05, "loss": 1.4791, "step": 4886 }, { "epoch": 0.6669851235157637, "grad_norm": 0.2306140512228012, "learning_rate": 8e-05, "loss": 1.4462, "step": 4887 }, { "epoch": 0.6671216050225195, "grad_norm": 0.2413230985403061, "learning_rate": 8e-05, "loss": 1.4906, "step": 4888 }, { "epoch": 0.6672580865292753, "grad_norm": 0.2357136607170105, "learning_rate": 8e-05, "loss": 1.4267, "step": 4889 }, { "epoch": 0.6673945680360311, "grad_norm": 0.23352450132369995, "learning_rate": 8e-05, "loss": 1.4838, "step": 4890 }, { "epoch": 0.667531049542787, "grad_norm": 0.22657707333564758, "learning_rate": 8e-05, "loss": 1.4608, "step": 4891 }, { "epoch": 0.6676675310495428, "grad_norm": 0.22659124433994293, "learning_rate": 8e-05, "loss": 1.452, "step": 4892 }, { "epoch": 0.6678040125562986, "grad_norm": 0.2184356302022934, "learning_rate": 8e-05, "loss": 1.4008, "step": 4893 }, { "epoch": 0.6679404940630544, "grad_norm": 0.22633616626262665, "learning_rate": 8e-05, "loss": 1.45, "step": 4894 }, { "epoch": 0.6680769755698103, "grad_norm": 0.2222093939781189, "learning_rate": 8e-05, "loss": 1.4278, "step": 4895 }, { "epoch": 0.6682134570765661, "grad_norm": 0.22351354360580444, "learning_rate": 8e-05, "loss": 1.429, "step": 4896 }, { "epoch": 0.6683499385833219, "grad_norm": 0.21014724671840668, "learning_rate": 8e-05, "loss": 1.3845, "step": 4897 }, { "epoch": 0.6684864200900777, "grad_norm": 0.2480313777923584, "learning_rate": 8e-05, "loss": 1.5126, "step": 4898 }, { "epoch": 0.6686229015968336, "grad_norm": 0.2141083925962448, "learning_rate": 8e-05, "loss": 1.3595, "step": 4899 }, { "epoch": 0.6687593831035895, "grad_norm": 0.22754399478435516, "learning_rate": 8e-05, "loss": 1.3802, "step": 4900 }, { "epoch": 0.6688958646103453, "grad_norm": 0.2352703958749771, "learning_rate": 8e-05, "loss": 1.5338, "step": 4901 }, { "epoch": 0.6690323461171012, "grad_norm": 0.22965887188911438, "learning_rate": 8e-05, "loss": 1.5009, "step": 4902 }, { "epoch": 0.669168827623857, "grad_norm": 0.22749994695186615, "learning_rate": 8e-05, "loss": 1.5276, "step": 4903 }, { "epoch": 0.6693053091306128, "grad_norm": 0.22773905098438263, "learning_rate": 8e-05, "loss": 1.4733, "step": 4904 }, { "epoch": 0.6694417906373686, "grad_norm": 0.22422143816947937, "learning_rate": 8e-05, "loss": 1.4213, "step": 4905 }, { "epoch": 0.6695782721441245, "grad_norm": 0.22762185335159302, "learning_rate": 8e-05, "loss": 1.4212, "step": 4906 }, { "epoch": 0.6697147536508803, "grad_norm": 0.22892232239246368, "learning_rate": 8e-05, "loss": 1.4378, "step": 4907 }, { "epoch": 0.6698512351576361, "grad_norm": 0.22859223186969757, "learning_rate": 8e-05, "loss": 1.4686, "step": 4908 }, { "epoch": 0.6699877166643919, "grad_norm": 0.2299458235502243, "learning_rate": 8e-05, "loss": 1.4645, "step": 4909 }, { "epoch": 0.6701241981711478, "grad_norm": 0.22010061144828796, "learning_rate": 8e-05, "loss": 1.4523, "step": 4910 }, { "epoch": 0.6702606796779036, "grad_norm": 0.22795507311820984, "learning_rate": 8e-05, "loss": 1.5295, "step": 4911 }, { "epoch": 0.6703971611846595, "grad_norm": 0.22864168882369995, "learning_rate": 8e-05, "loss": 1.4218, "step": 4912 }, { "epoch": 0.6705336426914154, "grad_norm": 0.2272016555070877, "learning_rate": 8e-05, "loss": 1.4777, "step": 4913 }, { "epoch": 0.6706701241981712, "grad_norm": 0.21866293251514435, "learning_rate": 8e-05, "loss": 1.4807, "step": 4914 }, { "epoch": 0.670806605704927, "grad_norm": 0.22426877915859222, "learning_rate": 8e-05, "loss": 1.5097, "step": 4915 }, { "epoch": 0.6709430872116828, "grad_norm": 0.22679133713245392, "learning_rate": 8e-05, "loss": 1.4773, "step": 4916 }, { "epoch": 0.6710795687184387, "grad_norm": 0.22114460170269012, "learning_rate": 8e-05, "loss": 1.4161, "step": 4917 }, { "epoch": 0.6712160502251945, "grad_norm": 0.23109637200832367, "learning_rate": 8e-05, "loss": 1.4926, "step": 4918 }, { "epoch": 0.6713525317319503, "grad_norm": 0.22195497155189514, "learning_rate": 8e-05, "loss": 1.5173, "step": 4919 }, { "epoch": 0.6714890132387061, "grad_norm": 0.23254257440567017, "learning_rate": 8e-05, "loss": 1.4935, "step": 4920 }, { "epoch": 0.671625494745462, "grad_norm": 0.21887019276618958, "learning_rate": 8e-05, "loss": 1.424, "step": 4921 }, { "epoch": 0.6717619762522178, "grad_norm": 0.23122921586036682, "learning_rate": 8e-05, "loss": 1.4609, "step": 4922 }, { "epoch": 0.6718984577589736, "grad_norm": 0.2219562530517578, "learning_rate": 8e-05, "loss": 1.4389, "step": 4923 }, { "epoch": 0.6720349392657295, "grad_norm": 0.22108280658721924, "learning_rate": 8e-05, "loss": 1.4785, "step": 4924 }, { "epoch": 0.6721714207724854, "grad_norm": 0.22000271081924438, "learning_rate": 8e-05, "loss": 1.4092, "step": 4925 }, { "epoch": 0.6723079022792412, "grad_norm": 0.2324644774198532, "learning_rate": 8e-05, "loss": 1.4544, "step": 4926 }, { "epoch": 0.672444383785997, "grad_norm": 0.22577349841594696, "learning_rate": 8e-05, "loss": 1.4262, "step": 4927 }, { "epoch": 0.6725808652927528, "grad_norm": 0.23456013202667236, "learning_rate": 8e-05, "loss": 1.5127, "step": 4928 }, { "epoch": 0.6727173467995087, "grad_norm": 0.21577303111553192, "learning_rate": 8e-05, "loss": 1.3793, "step": 4929 }, { "epoch": 0.6728538283062645, "grad_norm": 0.2322041094303131, "learning_rate": 8e-05, "loss": 1.4349, "step": 4930 }, { "epoch": 0.6729903098130203, "grad_norm": 0.23538881540298462, "learning_rate": 8e-05, "loss": 1.4448, "step": 4931 }, { "epoch": 0.6731267913197762, "grad_norm": 0.22795306146144867, "learning_rate": 8e-05, "loss": 1.5101, "step": 4932 }, { "epoch": 0.673263272826532, "grad_norm": 0.23400461673736572, "learning_rate": 8e-05, "loss": 1.4818, "step": 4933 }, { "epoch": 0.6733997543332878, "grad_norm": 0.2218569964170456, "learning_rate": 8e-05, "loss": 1.478, "step": 4934 }, { "epoch": 0.6735362358400436, "grad_norm": 0.22215279936790466, "learning_rate": 8e-05, "loss": 1.4377, "step": 4935 }, { "epoch": 0.6736727173467995, "grad_norm": 0.21808898448944092, "learning_rate": 8e-05, "loss": 1.4161, "step": 4936 }, { "epoch": 0.6738091988535554, "grad_norm": 0.22512227296829224, "learning_rate": 8e-05, "loss": 1.4086, "step": 4937 }, { "epoch": 0.6739456803603112, "grad_norm": 0.2279331088066101, "learning_rate": 8e-05, "loss": 1.5178, "step": 4938 }, { "epoch": 0.674082161867067, "grad_norm": 0.22706177830696106, "learning_rate": 8e-05, "loss": 1.4835, "step": 4939 }, { "epoch": 0.6742186433738229, "grad_norm": 0.22859534621238708, "learning_rate": 8e-05, "loss": 1.5402, "step": 4940 }, { "epoch": 0.6743551248805787, "grad_norm": 0.23102515935897827, "learning_rate": 8e-05, "loss": 1.4611, "step": 4941 }, { "epoch": 0.6744916063873345, "grad_norm": 0.22415749728679657, "learning_rate": 8e-05, "loss": 1.47, "step": 4942 }, { "epoch": 0.6746280878940903, "grad_norm": 0.22143179178237915, "learning_rate": 8e-05, "loss": 1.4665, "step": 4943 }, { "epoch": 0.6747645694008462, "grad_norm": 0.22009111940860748, "learning_rate": 8e-05, "loss": 1.4634, "step": 4944 }, { "epoch": 0.674901050907602, "grad_norm": 0.2190183848142624, "learning_rate": 8e-05, "loss": 1.4684, "step": 4945 }, { "epoch": 0.6750375324143578, "grad_norm": 0.2439507395029068, "learning_rate": 8e-05, "loss": 1.5442, "step": 4946 }, { "epoch": 0.6751740139211136, "grad_norm": 0.21957656741142273, "learning_rate": 8e-05, "loss": 1.4431, "step": 4947 }, { "epoch": 0.6753104954278695, "grad_norm": 0.23306095600128174, "learning_rate": 8e-05, "loss": 1.4892, "step": 4948 }, { "epoch": 0.6754469769346254, "grad_norm": 0.23035183548927307, "learning_rate": 8e-05, "loss": 1.4345, "step": 4949 }, { "epoch": 0.6755834584413812, "grad_norm": 0.2369430810213089, "learning_rate": 8e-05, "loss": 1.5298, "step": 4950 }, { "epoch": 0.6757199399481371, "grad_norm": 0.2293514609336853, "learning_rate": 8e-05, "loss": 1.478, "step": 4951 }, { "epoch": 0.6758564214548929, "grad_norm": 0.2301241159439087, "learning_rate": 8e-05, "loss": 1.4466, "step": 4952 }, { "epoch": 0.6759929029616487, "grad_norm": 0.2373674362897873, "learning_rate": 8e-05, "loss": 1.4985, "step": 4953 }, { "epoch": 0.6761293844684045, "grad_norm": 0.2303207814693451, "learning_rate": 8e-05, "loss": 1.4528, "step": 4954 }, { "epoch": 0.6762658659751604, "grad_norm": 0.22433367371559143, "learning_rate": 8e-05, "loss": 1.4345, "step": 4955 }, { "epoch": 0.6764023474819162, "grad_norm": 0.23255306482315063, "learning_rate": 8e-05, "loss": 1.4989, "step": 4956 }, { "epoch": 0.676538828988672, "grad_norm": 0.22652313113212585, "learning_rate": 8e-05, "loss": 1.4862, "step": 4957 }, { "epoch": 0.6766753104954278, "grad_norm": 0.23774616420269012, "learning_rate": 8e-05, "loss": 1.5009, "step": 4958 }, { "epoch": 0.6768117920021837, "grad_norm": 0.23445482552051544, "learning_rate": 8e-05, "loss": 1.5441, "step": 4959 }, { "epoch": 0.6769482735089395, "grad_norm": 0.22669482231140137, "learning_rate": 8e-05, "loss": 1.499, "step": 4960 }, { "epoch": 0.6770847550156954, "grad_norm": 0.2172665148973465, "learning_rate": 8e-05, "loss": 1.4124, "step": 4961 }, { "epoch": 0.6772212365224513, "grad_norm": 0.21616071462631226, "learning_rate": 8e-05, "loss": 1.4306, "step": 4962 }, { "epoch": 0.6773577180292071, "grad_norm": 0.2244592010974884, "learning_rate": 8e-05, "loss": 1.5063, "step": 4963 }, { "epoch": 0.6774941995359629, "grad_norm": 0.22665441036224365, "learning_rate": 8e-05, "loss": 1.4988, "step": 4964 }, { "epoch": 0.6776306810427187, "grad_norm": 0.2233804613351822, "learning_rate": 8e-05, "loss": 1.4369, "step": 4965 }, { "epoch": 0.6777671625494746, "grad_norm": 0.2426506131887436, "learning_rate": 8e-05, "loss": 1.5139, "step": 4966 }, { "epoch": 0.6779036440562304, "grad_norm": 0.23367524147033691, "learning_rate": 8e-05, "loss": 1.4667, "step": 4967 }, { "epoch": 0.6780401255629862, "grad_norm": 0.218913272023201, "learning_rate": 8e-05, "loss": 1.3863, "step": 4968 }, { "epoch": 0.678176607069742, "grad_norm": 0.2259284257888794, "learning_rate": 8e-05, "loss": 1.4675, "step": 4969 }, { "epoch": 0.6783130885764979, "grad_norm": 0.22640331089496613, "learning_rate": 8e-05, "loss": 1.4751, "step": 4970 }, { "epoch": 0.6784495700832537, "grad_norm": 0.2255599945783615, "learning_rate": 8e-05, "loss": 1.4622, "step": 4971 }, { "epoch": 0.6785860515900095, "grad_norm": 0.2304847240447998, "learning_rate": 8e-05, "loss": 1.4923, "step": 4972 }, { "epoch": 0.6787225330967653, "grad_norm": 0.2298392355442047, "learning_rate": 8e-05, "loss": 1.4948, "step": 4973 }, { "epoch": 0.6788590146035213, "grad_norm": 0.2209482342004776, "learning_rate": 8e-05, "loss": 1.4619, "step": 4974 }, { "epoch": 0.6789954961102771, "grad_norm": 0.22957704961299896, "learning_rate": 8e-05, "loss": 1.5004, "step": 4975 }, { "epoch": 0.6791319776170329, "grad_norm": 0.22390051186084747, "learning_rate": 8e-05, "loss": 1.4065, "step": 4976 }, { "epoch": 0.6792684591237887, "grad_norm": 0.22878144681453705, "learning_rate": 8e-05, "loss": 1.4588, "step": 4977 }, { "epoch": 0.6794049406305446, "grad_norm": 0.22576715052127838, "learning_rate": 8e-05, "loss": 1.3586, "step": 4978 }, { "epoch": 0.6795414221373004, "grad_norm": 0.23070695996284485, "learning_rate": 8e-05, "loss": 1.4443, "step": 4979 }, { "epoch": 0.6796779036440562, "grad_norm": 0.23301833868026733, "learning_rate": 8e-05, "loss": 1.4213, "step": 4980 }, { "epoch": 0.679814385150812, "grad_norm": 0.22595970332622528, "learning_rate": 8e-05, "loss": 1.3941, "step": 4981 }, { "epoch": 0.6799508666575679, "grad_norm": 0.2268526405096054, "learning_rate": 8e-05, "loss": 1.4951, "step": 4982 }, { "epoch": 0.6800873481643237, "grad_norm": 0.22498205304145813, "learning_rate": 8e-05, "loss": 1.4033, "step": 4983 }, { "epoch": 0.6802238296710795, "grad_norm": 0.22633042931556702, "learning_rate": 8e-05, "loss": 1.4526, "step": 4984 }, { "epoch": 0.6803603111778354, "grad_norm": 0.2367359846830368, "learning_rate": 8e-05, "loss": 1.4974, "step": 4985 }, { "epoch": 0.6804967926845913, "grad_norm": 0.22913554310798645, "learning_rate": 8e-05, "loss": 1.4443, "step": 4986 }, { "epoch": 0.6806332741913471, "grad_norm": 0.22574195265769958, "learning_rate": 8e-05, "loss": 1.4723, "step": 4987 }, { "epoch": 0.6807697556981029, "grad_norm": 0.2198648601770401, "learning_rate": 8e-05, "loss": 1.3365, "step": 4988 }, { "epoch": 0.6809062372048588, "grad_norm": 0.22332395613193512, "learning_rate": 8e-05, "loss": 1.5039, "step": 4989 }, { "epoch": 0.6810427187116146, "grad_norm": 0.2273492068052292, "learning_rate": 8e-05, "loss": 1.441, "step": 4990 }, { "epoch": 0.6811792002183704, "grad_norm": 0.22265857458114624, "learning_rate": 8e-05, "loss": 1.4529, "step": 4991 }, { "epoch": 0.6813156817251262, "grad_norm": 0.23119039833545685, "learning_rate": 8e-05, "loss": 1.5478, "step": 4992 }, { "epoch": 0.6814521632318821, "grad_norm": 0.23217029869556427, "learning_rate": 8e-05, "loss": 1.4716, "step": 4993 }, { "epoch": 0.6815886447386379, "grad_norm": 0.22838056087493896, "learning_rate": 8e-05, "loss": 1.4904, "step": 4994 }, { "epoch": 0.6817251262453937, "grad_norm": 0.22141489386558533, "learning_rate": 8e-05, "loss": 1.3976, "step": 4995 }, { "epoch": 0.6818616077521495, "grad_norm": 0.23647750914096832, "learning_rate": 8e-05, "loss": 1.5039, "step": 4996 }, { "epoch": 0.6819980892589054, "grad_norm": 0.22490087151527405, "learning_rate": 8e-05, "loss": 1.4621, "step": 4997 }, { "epoch": 0.6821345707656613, "grad_norm": 0.21920499205589294, "learning_rate": 8e-05, "loss": 1.3793, "step": 4998 }, { "epoch": 0.6822710522724171, "grad_norm": 0.23083849251270294, "learning_rate": 8e-05, "loss": 1.5515, "step": 4999 }, { "epoch": 0.682407533779173, "grad_norm": 0.2255854308605194, "learning_rate": 8e-05, "loss": 1.4975, "step": 5000 }, { "epoch": 0.6825440152859288, "grad_norm": 0.22592756152153015, "learning_rate": 8e-05, "loss": 1.4833, "step": 5001 }, { "epoch": 0.6826804967926846, "grad_norm": 0.22824306786060333, "learning_rate": 8e-05, "loss": 1.4807, "step": 5002 }, { "epoch": 0.6828169782994404, "grad_norm": 0.2275524139404297, "learning_rate": 8e-05, "loss": 1.4865, "step": 5003 }, { "epoch": 0.6829534598061963, "grad_norm": 0.22983388602733612, "learning_rate": 8e-05, "loss": 1.5549, "step": 5004 }, { "epoch": 0.6830899413129521, "grad_norm": 0.2244010865688324, "learning_rate": 8e-05, "loss": 1.4785, "step": 5005 }, { "epoch": 0.6832264228197079, "grad_norm": 0.2313825786113739, "learning_rate": 8e-05, "loss": 1.4259, "step": 5006 }, { "epoch": 0.6833629043264637, "grad_norm": 0.22144214808940887, "learning_rate": 8e-05, "loss": 1.3826, "step": 5007 }, { "epoch": 0.6834993858332196, "grad_norm": 0.23368731141090393, "learning_rate": 8e-05, "loss": 1.4919, "step": 5008 }, { "epoch": 0.6836358673399754, "grad_norm": 0.2234659492969513, "learning_rate": 8e-05, "loss": 1.4538, "step": 5009 }, { "epoch": 0.6837723488467312, "grad_norm": 0.2257426232099533, "learning_rate": 8e-05, "loss": 1.4588, "step": 5010 }, { "epoch": 0.6839088303534872, "grad_norm": 0.2614365220069885, "learning_rate": 8e-05, "loss": 1.6096, "step": 5011 }, { "epoch": 0.684045311860243, "grad_norm": 0.23928286135196686, "learning_rate": 8e-05, "loss": 1.5203, "step": 5012 }, { "epoch": 0.6841817933669988, "grad_norm": 0.23042115569114685, "learning_rate": 8e-05, "loss": 1.4579, "step": 5013 }, { "epoch": 0.6843182748737546, "grad_norm": 0.23792703449726105, "learning_rate": 8e-05, "loss": 1.4417, "step": 5014 }, { "epoch": 0.6844547563805105, "grad_norm": 0.22855287790298462, "learning_rate": 8e-05, "loss": 1.4811, "step": 5015 }, { "epoch": 0.6845912378872663, "grad_norm": 0.22891730070114136, "learning_rate": 8e-05, "loss": 1.439, "step": 5016 }, { "epoch": 0.6847277193940221, "grad_norm": 0.23642709851264954, "learning_rate": 8e-05, "loss": 1.4948, "step": 5017 }, { "epoch": 0.6848642009007779, "grad_norm": 0.22723734378814697, "learning_rate": 8e-05, "loss": 1.4127, "step": 5018 }, { "epoch": 0.6850006824075338, "grad_norm": 0.22933165729045868, "learning_rate": 8e-05, "loss": 1.4133, "step": 5019 }, { "epoch": 0.6851371639142896, "grad_norm": 0.2300548106431961, "learning_rate": 8e-05, "loss": 1.4386, "step": 5020 }, { "epoch": 0.6852736454210454, "grad_norm": 0.24369780719280243, "learning_rate": 8e-05, "loss": 1.4879, "step": 5021 }, { "epoch": 0.6854101269278012, "grad_norm": 0.2261851578950882, "learning_rate": 8e-05, "loss": 1.4612, "step": 5022 }, { "epoch": 0.6855466084345572, "grad_norm": 0.23346692323684692, "learning_rate": 8e-05, "loss": 1.4026, "step": 5023 }, { "epoch": 0.685683089941313, "grad_norm": 0.22375231981277466, "learning_rate": 8e-05, "loss": 1.4465, "step": 5024 }, { "epoch": 0.6858195714480688, "grad_norm": 0.22330830991268158, "learning_rate": 8e-05, "loss": 1.4014, "step": 5025 }, { "epoch": 0.6859560529548246, "grad_norm": 0.24908140301704407, "learning_rate": 8e-05, "loss": 1.5205, "step": 5026 }, { "epoch": 0.6860925344615805, "grad_norm": 0.22809389233589172, "learning_rate": 8e-05, "loss": 1.454, "step": 5027 }, { "epoch": 0.6862290159683363, "grad_norm": 0.22787150740623474, "learning_rate": 8e-05, "loss": 1.4948, "step": 5028 }, { "epoch": 0.6863654974750921, "grad_norm": 0.23242376744747162, "learning_rate": 8e-05, "loss": 1.4976, "step": 5029 }, { "epoch": 0.686501978981848, "grad_norm": 0.24245303869247437, "learning_rate": 8e-05, "loss": 1.5831, "step": 5030 }, { "epoch": 0.6866384604886038, "grad_norm": 0.23010410368442535, "learning_rate": 8e-05, "loss": 1.4847, "step": 5031 }, { "epoch": 0.6867749419953596, "grad_norm": 0.23047325015068054, "learning_rate": 8e-05, "loss": 1.493, "step": 5032 }, { "epoch": 0.6869114235021154, "grad_norm": 0.24399243295192719, "learning_rate": 8e-05, "loss": 1.475, "step": 5033 }, { "epoch": 0.6870479050088713, "grad_norm": 0.22931353747844696, "learning_rate": 8e-05, "loss": 1.4758, "step": 5034 }, { "epoch": 0.6871843865156272, "grad_norm": 0.22713154554367065, "learning_rate": 8e-05, "loss": 1.4207, "step": 5035 }, { "epoch": 0.687320868022383, "grad_norm": 0.23670606315135956, "learning_rate": 8e-05, "loss": 1.4731, "step": 5036 }, { "epoch": 0.6874573495291388, "grad_norm": 0.23109430074691772, "learning_rate": 8e-05, "loss": 1.4922, "step": 5037 }, { "epoch": 0.6875938310358947, "grad_norm": 0.2223534882068634, "learning_rate": 8e-05, "loss": 1.4225, "step": 5038 }, { "epoch": 0.6877303125426505, "grad_norm": 0.23939067125320435, "learning_rate": 8e-05, "loss": 1.4359, "step": 5039 }, { "epoch": 0.6878667940494063, "grad_norm": 0.23268179595470428, "learning_rate": 8e-05, "loss": 1.4389, "step": 5040 }, { "epoch": 0.6880032755561621, "grad_norm": 0.23072464764118195, "learning_rate": 8e-05, "loss": 1.5093, "step": 5041 }, { "epoch": 0.688139757062918, "grad_norm": 0.23465904593467712, "learning_rate": 8e-05, "loss": 1.4979, "step": 5042 }, { "epoch": 0.6882762385696738, "grad_norm": 0.23594097793102264, "learning_rate": 8e-05, "loss": 1.4126, "step": 5043 }, { "epoch": 0.6884127200764296, "grad_norm": 0.21398280560970306, "learning_rate": 8e-05, "loss": 1.3772, "step": 5044 }, { "epoch": 0.6885492015831854, "grad_norm": 0.23254543542861938, "learning_rate": 8e-05, "loss": 1.4277, "step": 5045 }, { "epoch": 0.6886856830899413, "grad_norm": 0.244389146566391, "learning_rate": 8e-05, "loss": 1.5489, "step": 5046 }, { "epoch": 0.6888221645966971, "grad_norm": 0.23840005695819855, "learning_rate": 8e-05, "loss": 1.4898, "step": 5047 }, { "epoch": 0.688958646103453, "grad_norm": 0.22709038853645325, "learning_rate": 8e-05, "loss": 1.4217, "step": 5048 }, { "epoch": 0.6890951276102089, "grad_norm": 0.23280467092990875, "learning_rate": 8e-05, "loss": 1.4706, "step": 5049 }, { "epoch": 0.6892316091169647, "grad_norm": 0.23505531251430511, "learning_rate": 8e-05, "loss": 1.449, "step": 5050 }, { "epoch": 0.6893680906237205, "grad_norm": 0.23104819655418396, "learning_rate": 8e-05, "loss": 1.5353, "step": 5051 }, { "epoch": 0.6895045721304763, "grad_norm": 0.2260597050189972, "learning_rate": 8e-05, "loss": 1.4626, "step": 5052 }, { "epoch": 0.6896410536372322, "grad_norm": 0.24535372853279114, "learning_rate": 8e-05, "loss": 1.49, "step": 5053 }, { "epoch": 0.689777535143988, "grad_norm": 0.2321591079235077, "learning_rate": 8e-05, "loss": 1.4402, "step": 5054 }, { "epoch": 0.6899140166507438, "grad_norm": 0.22221209108829498, "learning_rate": 8e-05, "loss": 1.3016, "step": 5055 }, { "epoch": 0.6900504981574996, "grad_norm": 0.23392553627490997, "learning_rate": 8e-05, "loss": 1.5213, "step": 5056 }, { "epoch": 0.6901869796642555, "grad_norm": 0.22481046617031097, "learning_rate": 8e-05, "loss": 1.4362, "step": 5057 }, { "epoch": 0.6903234611710113, "grad_norm": 0.23036149144172668, "learning_rate": 8e-05, "loss": 1.4001, "step": 5058 }, { "epoch": 0.6904599426777671, "grad_norm": 0.22406387329101562, "learning_rate": 8e-05, "loss": 1.4755, "step": 5059 }, { "epoch": 0.690596424184523, "grad_norm": 0.22814059257507324, "learning_rate": 8e-05, "loss": 1.5234, "step": 5060 }, { "epoch": 0.6907329056912789, "grad_norm": 0.24145053327083588, "learning_rate": 8e-05, "loss": 1.5114, "step": 5061 }, { "epoch": 0.6908693871980347, "grad_norm": 0.2258089929819107, "learning_rate": 8e-05, "loss": 1.3877, "step": 5062 }, { "epoch": 0.6910058687047905, "grad_norm": 0.22434434294700623, "learning_rate": 8e-05, "loss": 1.5118, "step": 5063 }, { "epoch": 0.6911423502115464, "grad_norm": 0.2281690090894699, "learning_rate": 8e-05, "loss": 1.4974, "step": 5064 }, { "epoch": 0.6912788317183022, "grad_norm": 0.23077541589736938, "learning_rate": 8e-05, "loss": 1.4985, "step": 5065 }, { "epoch": 0.691415313225058, "grad_norm": 0.22752626240253448, "learning_rate": 8e-05, "loss": 1.4852, "step": 5066 }, { "epoch": 0.6915517947318138, "grad_norm": 0.23466192185878754, "learning_rate": 8e-05, "loss": 1.4615, "step": 5067 }, { "epoch": 0.6916882762385697, "grad_norm": 0.23212094604969025, "learning_rate": 8e-05, "loss": 1.5266, "step": 5068 }, { "epoch": 0.6918247577453255, "grad_norm": 0.23421894013881683, "learning_rate": 8e-05, "loss": 1.4712, "step": 5069 }, { "epoch": 0.6919612392520813, "grad_norm": 0.2413264513015747, "learning_rate": 8e-05, "loss": 1.4996, "step": 5070 }, { "epoch": 0.6920977207588371, "grad_norm": 0.22626392543315887, "learning_rate": 8e-05, "loss": 1.4517, "step": 5071 }, { "epoch": 0.692234202265593, "grad_norm": 0.22106380760669708, "learning_rate": 8e-05, "loss": 1.3622, "step": 5072 }, { "epoch": 0.6923706837723489, "grad_norm": 0.22447755932807922, "learning_rate": 8e-05, "loss": 1.4501, "step": 5073 }, { "epoch": 0.6925071652791047, "grad_norm": 0.23246055841445923, "learning_rate": 8e-05, "loss": 1.4439, "step": 5074 }, { "epoch": 0.6926436467858605, "grad_norm": 0.23439981043338776, "learning_rate": 8e-05, "loss": 1.4852, "step": 5075 }, { "epoch": 0.6927801282926164, "grad_norm": 0.23108838498592377, "learning_rate": 8e-05, "loss": 1.491, "step": 5076 }, { "epoch": 0.6929166097993722, "grad_norm": 0.23152801394462585, "learning_rate": 8e-05, "loss": 1.4601, "step": 5077 }, { "epoch": 0.693053091306128, "grad_norm": 0.23705871403217316, "learning_rate": 8e-05, "loss": 1.5306, "step": 5078 }, { "epoch": 0.6931895728128838, "grad_norm": 0.2270851880311966, "learning_rate": 8e-05, "loss": 1.4094, "step": 5079 }, { "epoch": 0.6933260543196397, "grad_norm": 0.2301025688648224, "learning_rate": 8e-05, "loss": 1.4674, "step": 5080 }, { "epoch": 0.6934625358263955, "grad_norm": 0.22957973182201385, "learning_rate": 8e-05, "loss": 1.5299, "step": 5081 }, { "epoch": 0.6935990173331513, "grad_norm": 0.2330951988697052, "learning_rate": 8e-05, "loss": 1.4738, "step": 5082 }, { "epoch": 0.6937354988399071, "grad_norm": 0.2390143722295761, "learning_rate": 8e-05, "loss": 1.5102, "step": 5083 }, { "epoch": 0.693871980346663, "grad_norm": 0.22995489835739136, "learning_rate": 8e-05, "loss": 1.4799, "step": 5084 }, { "epoch": 0.6940084618534189, "grad_norm": 0.22315309941768646, "learning_rate": 8e-05, "loss": 1.4708, "step": 5085 }, { "epoch": 0.6941449433601747, "grad_norm": 0.23628775775432587, "learning_rate": 8e-05, "loss": 1.426, "step": 5086 }, { "epoch": 0.6942814248669306, "grad_norm": 0.22730235755443573, "learning_rate": 8e-05, "loss": 1.4846, "step": 5087 }, { "epoch": 0.6944179063736864, "grad_norm": 0.22582364082336426, "learning_rate": 8e-05, "loss": 1.3844, "step": 5088 }, { "epoch": 0.6945543878804422, "grad_norm": 0.22879810631275177, "learning_rate": 8e-05, "loss": 1.4317, "step": 5089 }, { "epoch": 0.694690869387198, "grad_norm": 0.232257679104805, "learning_rate": 8e-05, "loss": 1.4942, "step": 5090 }, { "epoch": 0.6948273508939539, "grad_norm": 0.23786935210227966, "learning_rate": 8e-05, "loss": 1.4452, "step": 5091 }, { "epoch": 0.6949638324007097, "grad_norm": 0.22878646850585938, "learning_rate": 8e-05, "loss": 1.4477, "step": 5092 }, { "epoch": 0.6951003139074655, "grad_norm": 0.22936205565929413, "learning_rate": 8e-05, "loss": 1.4595, "step": 5093 }, { "epoch": 0.6952367954142213, "grad_norm": 0.24095378816127777, "learning_rate": 8e-05, "loss": 1.4435, "step": 5094 }, { "epoch": 0.6953732769209772, "grad_norm": 0.23267251253128052, "learning_rate": 8e-05, "loss": 1.5348, "step": 5095 }, { "epoch": 0.695509758427733, "grad_norm": 0.2193121463060379, "learning_rate": 8e-05, "loss": 1.4508, "step": 5096 }, { "epoch": 0.6956462399344889, "grad_norm": 0.22852331399917603, "learning_rate": 8e-05, "loss": 1.4203, "step": 5097 }, { "epoch": 0.6957827214412448, "grad_norm": 0.23829145729541779, "learning_rate": 8e-05, "loss": 1.469, "step": 5098 }, { "epoch": 0.6959192029480006, "grad_norm": 0.22598522901535034, "learning_rate": 8e-05, "loss": 1.4179, "step": 5099 }, { "epoch": 0.6960556844547564, "grad_norm": 0.2247435748577118, "learning_rate": 8e-05, "loss": 1.4394, "step": 5100 }, { "epoch": 0.6961921659615122, "grad_norm": 0.23520605266094208, "learning_rate": 8e-05, "loss": 1.4892, "step": 5101 }, { "epoch": 0.6963286474682681, "grad_norm": 0.23005187511444092, "learning_rate": 8e-05, "loss": 1.4174, "step": 5102 }, { "epoch": 0.6964651289750239, "grad_norm": 0.2307494729757309, "learning_rate": 8e-05, "loss": 1.4501, "step": 5103 }, { "epoch": 0.6966016104817797, "grad_norm": 0.23237387835979462, "learning_rate": 8e-05, "loss": 1.404, "step": 5104 }, { "epoch": 0.6967380919885355, "grad_norm": 0.22133669257164001, "learning_rate": 8e-05, "loss": 1.4313, "step": 5105 }, { "epoch": 0.6968745734952914, "grad_norm": 0.22346685826778412, "learning_rate": 8e-05, "loss": 1.4596, "step": 5106 }, { "epoch": 0.6970110550020472, "grad_norm": 0.2341427057981491, "learning_rate": 8e-05, "loss": 1.4498, "step": 5107 }, { "epoch": 0.697147536508803, "grad_norm": 0.23348917067050934, "learning_rate": 8e-05, "loss": 1.5226, "step": 5108 }, { "epoch": 0.6972840180155588, "grad_norm": 0.23101168870925903, "learning_rate": 8e-05, "loss": 1.3767, "step": 5109 }, { "epoch": 0.6974204995223148, "grad_norm": 0.2369215339422226, "learning_rate": 8e-05, "loss": 1.467, "step": 5110 }, { "epoch": 0.6975569810290706, "grad_norm": 0.22682523727416992, "learning_rate": 8e-05, "loss": 1.3766, "step": 5111 }, { "epoch": 0.6976934625358264, "grad_norm": 0.23639442026615143, "learning_rate": 8e-05, "loss": 1.4254, "step": 5112 }, { "epoch": 0.6978299440425823, "grad_norm": 0.23330754041671753, "learning_rate": 8e-05, "loss": 1.4597, "step": 5113 }, { "epoch": 0.6979664255493381, "grad_norm": 0.22796671092510223, "learning_rate": 8e-05, "loss": 1.5055, "step": 5114 }, { "epoch": 0.6981029070560939, "grad_norm": 0.22095134854316711, "learning_rate": 8e-05, "loss": 1.4509, "step": 5115 }, { "epoch": 0.6982393885628497, "grad_norm": 0.23071900010108948, "learning_rate": 8e-05, "loss": 1.4699, "step": 5116 }, { "epoch": 0.6983758700696056, "grad_norm": 0.23079606890678406, "learning_rate": 8e-05, "loss": 1.4547, "step": 5117 }, { "epoch": 0.6985123515763614, "grad_norm": 0.2219523787498474, "learning_rate": 8e-05, "loss": 1.4046, "step": 5118 }, { "epoch": 0.6986488330831172, "grad_norm": 0.2280607372522354, "learning_rate": 8e-05, "loss": 1.4232, "step": 5119 }, { "epoch": 0.698785314589873, "grad_norm": 0.22948622703552246, "learning_rate": 8e-05, "loss": 1.4758, "step": 5120 }, { "epoch": 0.6989217960966289, "grad_norm": 0.22942166030406952, "learning_rate": 8e-05, "loss": 1.4399, "step": 5121 }, { "epoch": 0.6990582776033848, "grad_norm": 0.23508676886558533, "learning_rate": 8e-05, "loss": 1.4247, "step": 5122 }, { "epoch": 0.6991947591101406, "grad_norm": 0.23840664327144623, "learning_rate": 8e-05, "loss": 1.4924, "step": 5123 }, { "epoch": 0.6993312406168964, "grad_norm": 0.23543450236320496, "learning_rate": 8e-05, "loss": 1.4932, "step": 5124 }, { "epoch": 0.6994677221236523, "grad_norm": 0.2232559323310852, "learning_rate": 8e-05, "loss": 1.4551, "step": 5125 }, { "epoch": 0.6996042036304081, "grad_norm": 0.2291235327720642, "learning_rate": 8e-05, "loss": 1.4353, "step": 5126 }, { "epoch": 0.6997406851371639, "grad_norm": 0.22774386405944824, "learning_rate": 8e-05, "loss": 1.4892, "step": 5127 }, { "epoch": 0.6998771666439197, "grad_norm": 0.2351008653640747, "learning_rate": 8e-05, "loss": 1.5581, "step": 5128 }, { "epoch": 0.7000136481506756, "grad_norm": 0.23261822760105133, "learning_rate": 8e-05, "loss": 1.4796, "step": 5129 }, { "epoch": 0.7001501296574314, "grad_norm": 0.22495388984680176, "learning_rate": 8e-05, "loss": 1.4327, "step": 5130 }, { "epoch": 0.7002866111641872, "grad_norm": 0.22172313928604126, "learning_rate": 8e-05, "loss": 1.435, "step": 5131 }, { "epoch": 0.700423092670943, "grad_norm": 0.2331259548664093, "learning_rate": 8e-05, "loss": 1.5436, "step": 5132 }, { "epoch": 0.7005595741776989, "grad_norm": 0.2200707495212555, "learning_rate": 8e-05, "loss": 1.435, "step": 5133 }, { "epoch": 0.7006960556844548, "grad_norm": 0.235848069190979, "learning_rate": 8e-05, "loss": 1.4955, "step": 5134 }, { "epoch": 0.7008325371912106, "grad_norm": 0.23125916719436646, "learning_rate": 8e-05, "loss": 1.4108, "step": 5135 }, { "epoch": 0.7009690186979665, "grad_norm": 0.23091630637645721, "learning_rate": 8e-05, "loss": 1.4348, "step": 5136 }, { "epoch": 0.7011055002047223, "grad_norm": 0.22759422659873962, "learning_rate": 8e-05, "loss": 1.342, "step": 5137 }, { "epoch": 0.7012419817114781, "grad_norm": 0.2229062169790268, "learning_rate": 8e-05, "loss": 1.4567, "step": 5138 }, { "epoch": 0.7013784632182339, "grad_norm": 0.23501718044281006, "learning_rate": 8e-05, "loss": 1.5329, "step": 5139 }, { "epoch": 0.7015149447249898, "grad_norm": 0.22614824771881104, "learning_rate": 8e-05, "loss": 1.4227, "step": 5140 }, { "epoch": 0.7016514262317456, "grad_norm": 0.22627761960029602, "learning_rate": 8e-05, "loss": 1.5146, "step": 5141 }, { "epoch": 0.7017879077385014, "grad_norm": 0.23504354059696198, "learning_rate": 8e-05, "loss": 1.3903, "step": 5142 }, { "epoch": 0.7019243892452572, "grad_norm": 0.22979801893234253, "learning_rate": 8e-05, "loss": 1.4805, "step": 5143 }, { "epoch": 0.7020608707520131, "grad_norm": 0.2267935425043106, "learning_rate": 8e-05, "loss": 1.4591, "step": 5144 }, { "epoch": 0.7021973522587689, "grad_norm": 0.2312496155500412, "learning_rate": 8e-05, "loss": 1.4378, "step": 5145 }, { "epoch": 0.7023338337655247, "grad_norm": 0.23789344727993011, "learning_rate": 8e-05, "loss": 1.5066, "step": 5146 }, { "epoch": 0.7024703152722807, "grad_norm": 0.23923785984516144, "learning_rate": 8e-05, "loss": 1.5423, "step": 5147 }, { "epoch": 0.7026067967790365, "grad_norm": 0.22657272219657898, "learning_rate": 8e-05, "loss": 1.4685, "step": 5148 }, { "epoch": 0.7027432782857923, "grad_norm": 0.22614091634750366, "learning_rate": 8e-05, "loss": 1.4455, "step": 5149 }, { "epoch": 0.7028797597925481, "grad_norm": 0.2270379513502121, "learning_rate": 8e-05, "loss": 1.3821, "step": 5150 }, { "epoch": 0.703016241299304, "grad_norm": 0.2266760766506195, "learning_rate": 8e-05, "loss": 1.422, "step": 5151 }, { "epoch": 0.7031527228060598, "grad_norm": 0.2303757220506668, "learning_rate": 8e-05, "loss": 1.4953, "step": 5152 }, { "epoch": 0.7032892043128156, "grad_norm": 0.23265528678894043, "learning_rate": 8e-05, "loss": 1.4871, "step": 5153 }, { "epoch": 0.7034256858195714, "grad_norm": 0.23369629681110382, "learning_rate": 8e-05, "loss": 1.5423, "step": 5154 }, { "epoch": 0.7035621673263273, "grad_norm": 0.23297664523124695, "learning_rate": 8e-05, "loss": 1.4636, "step": 5155 }, { "epoch": 0.7036986488330831, "grad_norm": 0.23646555840969086, "learning_rate": 8e-05, "loss": 1.4958, "step": 5156 }, { "epoch": 0.7038351303398389, "grad_norm": 0.23214687407016754, "learning_rate": 8e-05, "loss": 1.54, "step": 5157 }, { "epoch": 0.7039716118465947, "grad_norm": 0.22202420234680176, "learning_rate": 8e-05, "loss": 1.3915, "step": 5158 }, { "epoch": 0.7041080933533507, "grad_norm": 0.23451454937458038, "learning_rate": 8e-05, "loss": 1.4219, "step": 5159 }, { "epoch": 0.7042445748601065, "grad_norm": 0.226332426071167, "learning_rate": 8e-05, "loss": 1.4657, "step": 5160 }, { "epoch": 0.7043810563668623, "grad_norm": 0.2316066324710846, "learning_rate": 8e-05, "loss": 1.4743, "step": 5161 }, { "epoch": 0.7045175378736181, "grad_norm": 0.22673174738883972, "learning_rate": 8e-05, "loss": 1.424, "step": 5162 }, { "epoch": 0.704654019380374, "grad_norm": 0.22019809484481812, "learning_rate": 8e-05, "loss": 1.4272, "step": 5163 }, { "epoch": 0.7047905008871298, "grad_norm": 0.2284545749425888, "learning_rate": 8e-05, "loss": 1.4517, "step": 5164 }, { "epoch": 0.7049269823938856, "grad_norm": 0.23567375540733337, "learning_rate": 8e-05, "loss": 1.4536, "step": 5165 }, { "epoch": 0.7050634639006415, "grad_norm": 0.23670494556427002, "learning_rate": 8e-05, "loss": 1.5239, "step": 5166 }, { "epoch": 0.7051999454073973, "grad_norm": 0.22246654331684113, "learning_rate": 8e-05, "loss": 1.453, "step": 5167 }, { "epoch": 0.7053364269141531, "grad_norm": 0.2293093353509903, "learning_rate": 8e-05, "loss": 1.4277, "step": 5168 }, { "epoch": 0.7054729084209089, "grad_norm": 0.23489068448543549, "learning_rate": 8e-05, "loss": 1.4116, "step": 5169 }, { "epoch": 0.7056093899276648, "grad_norm": 0.23213842511177063, "learning_rate": 8e-05, "loss": 1.546, "step": 5170 }, { "epoch": 0.7057458714344207, "grad_norm": 0.2341366857290268, "learning_rate": 8e-05, "loss": 1.5383, "step": 5171 }, { "epoch": 0.7058823529411765, "grad_norm": 0.22749823331832886, "learning_rate": 8e-05, "loss": 1.4477, "step": 5172 }, { "epoch": 0.7060188344479323, "grad_norm": 0.22021502256393433, "learning_rate": 8e-05, "loss": 1.4867, "step": 5173 }, { "epoch": 0.7061553159546882, "grad_norm": 0.22996768355369568, "learning_rate": 8e-05, "loss": 1.4815, "step": 5174 }, { "epoch": 0.706291797461444, "grad_norm": 0.2389015108346939, "learning_rate": 8e-05, "loss": 1.4699, "step": 5175 }, { "epoch": 0.7064282789681998, "grad_norm": 0.21842928230762482, "learning_rate": 8e-05, "loss": 1.4846, "step": 5176 }, { "epoch": 0.7065647604749556, "grad_norm": 0.22123534977436066, "learning_rate": 8e-05, "loss": 1.4186, "step": 5177 }, { "epoch": 0.7067012419817115, "grad_norm": 0.2291850745677948, "learning_rate": 8e-05, "loss": 1.4664, "step": 5178 }, { "epoch": 0.7068377234884673, "grad_norm": 0.22739991545677185, "learning_rate": 8e-05, "loss": 1.4828, "step": 5179 }, { "epoch": 0.7069742049952231, "grad_norm": 0.23837895691394806, "learning_rate": 8e-05, "loss": 1.4867, "step": 5180 }, { "epoch": 0.707110686501979, "grad_norm": 0.23669789731502533, "learning_rate": 8e-05, "loss": 1.484, "step": 5181 }, { "epoch": 0.7072471680087348, "grad_norm": 0.21973150968551636, "learning_rate": 8e-05, "loss": 1.4214, "step": 5182 }, { "epoch": 0.7073836495154906, "grad_norm": 0.22442220151424408, "learning_rate": 8e-05, "loss": 1.4484, "step": 5183 }, { "epoch": 0.7075201310222465, "grad_norm": 0.2263316661119461, "learning_rate": 8e-05, "loss": 1.4807, "step": 5184 }, { "epoch": 0.7076566125290024, "grad_norm": 0.2239358276128769, "learning_rate": 8e-05, "loss": 1.4181, "step": 5185 }, { "epoch": 0.7077930940357582, "grad_norm": 0.22882649302482605, "learning_rate": 8e-05, "loss": 1.4479, "step": 5186 }, { "epoch": 0.707929575542514, "grad_norm": 0.22912001609802246, "learning_rate": 8e-05, "loss": 1.4405, "step": 5187 }, { "epoch": 0.7080660570492698, "grad_norm": 0.22843018174171448, "learning_rate": 8e-05, "loss": 1.4517, "step": 5188 }, { "epoch": 0.7082025385560257, "grad_norm": 0.2302873283624649, "learning_rate": 8e-05, "loss": 1.4956, "step": 5189 }, { "epoch": 0.7083390200627815, "grad_norm": 0.22532938420772552, "learning_rate": 8e-05, "loss": 1.4246, "step": 5190 }, { "epoch": 0.7084755015695373, "grad_norm": 0.2349798083305359, "learning_rate": 8e-05, "loss": 1.5045, "step": 5191 }, { "epoch": 0.7086119830762931, "grad_norm": 0.22965744137763977, "learning_rate": 8e-05, "loss": 1.4447, "step": 5192 }, { "epoch": 0.708748464583049, "grad_norm": 0.22220103442668915, "learning_rate": 8e-05, "loss": 1.4446, "step": 5193 }, { "epoch": 0.7088849460898048, "grad_norm": 0.22468991577625275, "learning_rate": 8e-05, "loss": 1.4164, "step": 5194 }, { "epoch": 0.7090214275965606, "grad_norm": 0.22321423888206482, "learning_rate": 8e-05, "loss": 1.5125, "step": 5195 }, { "epoch": 0.7091579091033166, "grad_norm": 0.2211775928735733, "learning_rate": 8e-05, "loss": 1.4781, "step": 5196 }, { "epoch": 0.7092943906100724, "grad_norm": 0.2264275997877121, "learning_rate": 8e-05, "loss": 1.4121, "step": 5197 }, { "epoch": 0.7094308721168282, "grad_norm": 0.2266429215669632, "learning_rate": 8e-05, "loss": 1.5035, "step": 5198 }, { "epoch": 0.709567353623584, "grad_norm": 0.23563487827777863, "learning_rate": 8e-05, "loss": 1.4796, "step": 5199 }, { "epoch": 0.7097038351303399, "grad_norm": 0.22286291420459747, "learning_rate": 8e-05, "loss": 1.4827, "step": 5200 }, { "epoch": 0.7098403166370957, "grad_norm": 0.22390563786029816, "learning_rate": 8e-05, "loss": 1.432, "step": 5201 }, { "epoch": 0.7099767981438515, "grad_norm": 0.2372536063194275, "learning_rate": 8e-05, "loss": 1.5713, "step": 5202 }, { "epoch": 0.7101132796506073, "grad_norm": 0.22790288925170898, "learning_rate": 8e-05, "loss": 1.5238, "step": 5203 }, { "epoch": 0.7102497611573632, "grad_norm": 0.2222626954317093, "learning_rate": 8e-05, "loss": 1.4116, "step": 5204 }, { "epoch": 0.710386242664119, "grad_norm": 0.230938121676445, "learning_rate": 8e-05, "loss": 1.4707, "step": 5205 }, { "epoch": 0.7105227241708748, "grad_norm": 0.22708426415920258, "learning_rate": 8e-05, "loss": 1.4972, "step": 5206 }, { "epoch": 0.7106592056776306, "grad_norm": 0.2290227711200714, "learning_rate": 8e-05, "loss": 1.5229, "step": 5207 }, { "epoch": 0.7107956871843866, "grad_norm": 0.2210305631160736, "learning_rate": 8e-05, "loss": 1.4449, "step": 5208 }, { "epoch": 0.7109321686911424, "grad_norm": 0.22625501453876495, "learning_rate": 8e-05, "loss": 1.4549, "step": 5209 }, { "epoch": 0.7110686501978982, "grad_norm": 0.2209835946559906, "learning_rate": 8e-05, "loss": 1.474, "step": 5210 }, { "epoch": 0.711205131704654, "grad_norm": 0.23020268976688385, "learning_rate": 8e-05, "loss": 1.497, "step": 5211 }, { "epoch": 0.7113416132114099, "grad_norm": 0.22533197700977325, "learning_rate": 8e-05, "loss": 1.4067, "step": 5212 }, { "epoch": 0.7114780947181657, "grad_norm": 0.22457125782966614, "learning_rate": 8e-05, "loss": 1.4041, "step": 5213 }, { "epoch": 0.7116145762249215, "grad_norm": 0.22782807052135468, "learning_rate": 8e-05, "loss": 1.4318, "step": 5214 }, { "epoch": 0.7117510577316774, "grad_norm": 0.2307622879743576, "learning_rate": 8e-05, "loss": 1.4582, "step": 5215 }, { "epoch": 0.7118875392384332, "grad_norm": 0.23082606494426727, "learning_rate": 8e-05, "loss": 1.4382, "step": 5216 }, { "epoch": 0.712024020745189, "grad_norm": 0.23211349546909332, "learning_rate": 8e-05, "loss": 1.5229, "step": 5217 }, { "epoch": 0.7121605022519448, "grad_norm": 0.2375563681125641, "learning_rate": 8e-05, "loss": 1.4792, "step": 5218 }, { "epoch": 0.7122969837587007, "grad_norm": 0.22742272913455963, "learning_rate": 8e-05, "loss": 1.4712, "step": 5219 }, { "epoch": 0.7124334652654565, "grad_norm": 0.22805550694465637, "learning_rate": 8e-05, "loss": 1.4546, "step": 5220 }, { "epoch": 0.7125699467722124, "grad_norm": 0.22987519204616547, "learning_rate": 8e-05, "loss": 1.459, "step": 5221 }, { "epoch": 0.7127064282789682, "grad_norm": 0.22578541934490204, "learning_rate": 8e-05, "loss": 1.4734, "step": 5222 }, { "epoch": 0.7128429097857241, "grad_norm": 0.2283637672662735, "learning_rate": 8e-05, "loss": 1.4528, "step": 5223 }, { "epoch": 0.7129793912924799, "grad_norm": 0.2316460758447647, "learning_rate": 8e-05, "loss": 1.4108, "step": 5224 }, { "epoch": 0.7131158727992357, "grad_norm": 0.23687529563903809, "learning_rate": 8e-05, "loss": 1.5259, "step": 5225 }, { "epoch": 0.7132523543059915, "grad_norm": 0.2246427834033966, "learning_rate": 8e-05, "loss": 1.4126, "step": 5226 }, { "epoch": 0.7133888358127474, "grad_norm": 0.22945912182331085, "learning_rate": 8e-05, "loss": 1.4939, "step": 5227 }, { "epoch": 0.7135253173195032, "grad_norm": 0.23460642993450165, "learning_rate": 8e-05, "loss": 1.5016, "step": 5228 }, { "epoch": 0.713661798826259, "grad_norm": 0.23665134608745575, "learning_rate": 8e-05, "loss": 1.4426, "step": 5229 }, { "epoch": 0.7137982803330148, "grad_norm": 0.22802813351154327, "learning_rate": 8e-05, "loss": 1.5175, "step": 5230 }, { "epoch": 0.7139347618397707, "grad_norm": 0.2241210639476776, "learning_rate": 8e-05, "loss": 1.4899, "step": 5231 }, { "epoch": 0.7140712433465265, "grad_norm": 0.2219220995903015, "learning_rate": 8e-05, "loss": 1.4422, "step": 5232 }, { "epoch": 0.7142077248532824, "grad_norm": 0.22590434551239014, "learning_rate": 8e-05, "loss": 1.4024, "step": 5233 }, { "epoch": 0.7143442063600383, "grad_norm": 0.24560531973838806, "learning_rate": 8e-05, "loss": 1.4571, "step": 5234 }, { "epoch": 0.7144806878667941, "grad_norm": 0.22415179014205933, "learning_rate": 8e-05, "loss": 1.4628, "step": 5235 }, { "epoch": 0.7146171693735499, "grad_norm": 0.22969307005405426, "learning_rate": 8e-05, "loss": 1.3836, "step": 5236 }, { "epoch": 0.7147536508803057, "grad_norm": 0.22613166272640228, "learning_rate": 8e-05, "loss": 1.4849, "step": 5237 }, { "epoch": 0.7148901323870616, "grad_norm": 0.22589626908302307, "learning_rate": 8e-05, "loss": 1.4258, "step": 5238 }, { "epoch": 0.7150266138938174, "grad_norm": 0.2188994586467743, "learning_rate": 8e-05, "loss": 1.459, "step": 5239 }, { "epoch": 0.7151630954005732, "grad_norm": 0.2353687733411789, "learning_rate": 8e-05, "loss": 1.5046, "step": 5240 }, { "epoch": 0.715299576907329, "grad_norm": 0.22919295728206635, "learning_rate": 8e-05, "loss": 1.4929, "step": 5241 }, { "epoch": 0.7154360584140849, "grad_norm": 0.2287200391292572, "learning_rate": 8e-05, "loss": 1.4556, "step": 5242 }, { "epoch": 0.7155725399208407, "grad_norm": 0.22957560420036316, "learning_rate": 8e-05, "loss": 1.4795, "step": 5243 }, { "epoch": 0.7157090214275965, "grad_norm": 0.2190137803554535, "learning_rate": 8e-05, "loss": 1.4529, "step": 5244 }, { "epoch": 0.7158455029343523, "grad_norm": 0.2272428274154663, "learning_rate": 8e-05, "loss": 1.4394, "step": 5245 }, { "epoch": 0.7159819844411083, "grad_norm": 0.22814325988292694, "learning_rate": 8e-05, "loss": 1.4357, "step": 5246 }, { "epoch": 0.7161184659478641, "grad_norm": 0.22830340266227722, "learning_rate": 8e-05, "loss": 1.4706, "step": 5247 }, { "epoch": 0.7162549474546199, "grad_norm": 0.23010452091693878, "learning_rate": 8e-05, "loss": 1.4934, "step": 5248 }, { "epoch": 0.7163914289613758, "grad_norm": 0.2403780221939087, "learning_rate": 8e-05, "loss": 1.4173, "step": 5249 }, { "epoch": 0.7165279104681316, "grad_norm": 0.23053985834121704, "learning_rate": 8e-05, "loss": 1.4399, "step": 5250 }, { "epoch": 0.7166643919748874, "grad_norm": 0.22619538009166718, "learning_rate": 8e-05, "loss": 1.3881, "step": 5251 }, { "epoch": 0.7168008734816432, "grad_norm": 0.22301580011844635, "learning_rate": 8e-05, "loss": 1.4726, "step": 5252 }, { "epoch": 0.7169373549883991, "grad_norm": 0.23423010110855103, "learning_rate": 8e-05, "loss": 1.4662, "step": 5253 }, { "epoch": 0.7170738364951549, "grad_norm": 0.22831673920154572, "learning_rate": 8e-05, "loss": 1.4705, "step": 5254 }, { "epoch": 0.7172103180019107, "grad_norm": 0.22351470589637756, "learning_rate": 8e-05, "loss": 1.3727, "step": 5255 }, { "epoch": 0.7173467995086665, "grad_norm": 0.22754192352294922, "learning_rate": 8e-05, "loss": 1.461, "step": 5256 }, { "epoch": 0.7174832810154224, "grad_norm": 0.23520877957344055, "learning_rate": 8e-05, "loss": 1.4552, "step": 5257 }, { "epoch": 0.7176197625221783, "grad_norm": 0.22898343205451965, "learning_rate": 8e-05, "loss": 1.4936, "step": 5258 }, { "epoch": 0.7177562440289341, "grad_norm": 0.22621594369411469, "learning_rate": 8e-05, "loss": 1.428, "step": 5259 }, { "epoch": 0.71789272553569, "grad_norm": 0.2237178385257721, "learning_rate": 8e-05, "loss": 1.4543, "step": 5260 }, { "epoch": 0.7180292070424458, "grad_norm": 0.2254728525876999, "learning_rate": 8e-05, "loss": 1.4489, "step": 5261 }, { "epoch": 0.7181656885492016, "grad_norm": 0.22403603792190552, "learning_rate": 8e-05, "loss": 1.4413, "step": 5262 }, { "epoch": 0.7183021700559574, "grad_norm": 0.23477238416671753, "learning_rate": 8e-05, "loss": 1.4863, "step": 5263 }, { "epoch": 0.7184386515627132, "grad_norm": 0.23135671019554138, "learning_rate": 8e-05, "loss": 1.5085, "step": 5264 }, { "epoch": 0.7185751330694691, "grad_norm": 0.22708939015865326, "learning_rate": 8e-05, "loss": 1.4665, "step": 5265 }, { "epoch": 0.7187116145762249, "grad_norm": 0.2184794843196869, "learning_rate": 8e-05, "loss": 1.4556, "step": 5266 }, { "epoch": 0.7188480960829807, "grad_norm": 0.23387610912322998, "learning_rate": 8e-05, "loss": 1.5215, "step": 5267 }, { "epoch": 0.7189845775897366, "grad_norm": 0.23199856281280518, "learning_rate": 8e-05, "loss": 1.4596, "step": 5268 }, { "epoch": 0.7191210590964924, "grad_norm": 0.23771816492080688, "learning_rate": 8e-05, "loss": 1.506, "step": 5269 }, { "epoch": 0.7192575406032483, "grad_norm": 0.2327917218208313, "learning_rate": 8e-05, "loss": 1.5286, "step": 5270 }, { "epoch": 0.7193940221100041, "grad_norm": 0.2535395324230194, "learning_rate": 8e-05, "loss": 1.4092, "step": 5271 }, { "epoch": 0.71953050361676, "grad_norm": 0.23165638744831085, "learning_rate": 8e-05, "loss": 1.4898, "step": 5272 }, { "epoch": 0.7196669851235158, "grad_norm": 0.22785091400146484, "learning_rate": 8e-05, "loss": 1.4575, "step": 5273 }, { "epoch": 0.7198034666302716, "grad_norm": 0.22400890290737152, "learning_rate": 8e-05, "loss": 1.418, "step": 5274 }, { "epoch": 0.7199399481370274, "grad_norm": 0.23173034191131592, "learning_rate": 8e-05, "loss": 1.4596, "step": 5275 }, { "epoch": 0.7200764296437833, "grad_norm": 0.23364093899726868, "learning_rate": 8e-05, "loss": 1.5045, "step": 5276 }, { "epoch": 0.7202129111505391, "grad_norm": 0.2318466454744339, "learning_rate": 8e-05, "loss": 1.4517, "step": 5277 }, { "epoch": 0.7203493926572949, "grad_norm": 0.2363099604845047, "learning_rate": 8e-05, "loss": 1.4854, "step": 5278 }, { "epoch": 0.7204858741640507, "grad_norm": 0.22352993488311768, "learning_rate": 8e-05, "loss": 1.4525, "step": 5279 }, { "epoch": 0.7206223556708066, "grad_norm": 0.23048235476016998, "learning_rate": 8e-05, "loss": 1.4913, "step": 5280 }, { "epoch": 0.7207588371775624, "grad_norm": 0.23505954444408417, "learning_rate": 8e-05, "loss": 1.4876, "step": 5281 }, { "epoch": 0.7208953186843182, "grad_norm": 0.24082958698272705, "learning_rate": 8e-05, "loss": 1.482, "step": 5282 }, { "epoch": 0.7210318001910742, "grad_norm": 0.23025617003440857, "learning_rate": 8e-05, "loss": 1.4682, "step": 5283 }, { "epoch": 0.72116828169783, "grad_norm": 0.23293665051460266, "learning_rate": 8e-05, "loss": 1.4987, "step": 5284 }, { "epoch": 0.7213047632045858, "grad_norm": 0.22668147087097168, "learning_rate": 8e-05, "loss": 1.4206, "step": 5285 }, { "epoch": 0.7214412447113416, "grad_norm": 0.23604561388492584, "learning_rate": 8e-05, "loss": 1.4529, "step": 5286 }, { "epoch": 0.7215777262180975, "grad_norm": 0.22835572063922882, "learning_rate": 8e-05, "loss": 1.4464, "step": 5287 }, { "epoch": 0.7217142077248533, "grad_norm": 0.23009514808654785, "learning_rate": 8e-05, "loss": 1.4808, "step": 5288 }, { "epoch": 0.7218506892316091, "grad_norm": 0.228826105594635, "learning_rate": 8e-05, "loss": 1.3958, "step": 5289 }, { "epoch": 0.7219871707383649, "grad_norm": 0.23606547713279724, "learning_rate": 8e-05, "loss": 1.4428, "step": 5290 }, { "epoch": 0.7221236522451208, "grad_norm": 0.230474054813385, "learning_rate": 8e-05, "loss": 1.4446, "step": 5291 }, { "epoch": 0.7222601337518766, "grad_norm": 0.23692047595977783, "learning_rate": 8e-05, "loss": 1.4852, "step": 5292 }, { "epoch": 0.7223966152586324, "grad_norm": 0.2328113317489624, "learning_rate": 8e-05, "loss": 1.4437, "step": 5293 }, { "epoch": 0.7225330967653882, "grad_norm": 0.22825218737125397, "learning_rate": 8e-05, "loss": 1.3887, "step": 5294 }, { "epoch": 0.7226695782721442, "grad_norm": 0.23564966022968292, "learning_rate": 8e-05, "loss": 1.4058, "step": 5295 }, { "epoch": 0.7228060597789, "grad_norm": 0.23616553843021393, "learning_rate": 8e-05, "loss": 1.4228, "step": 5296 }, { "epoch": 0.7229425412856558, "grad_norm": 0.23622408509254456, "learning_rate": 8e-05, "loss": 1.4317, "step": 5297 }, { "epoch": 0.7230790227924117, "grad_norm": 0.2299550473690033, "learning_rate": 8e-05, "loss": 1.4817, "step": 5298 }, { "epoch": 0.7232155042991675, "grad_norm": 0.22760528326034546, "learning_rate": 8e-05, "loss": 1.4367, "step": 5299 }, { "epoch": 0.7233519858059233, "grad_norm": 0.2318352311849594, "learning_rate": 8e-05, "loss": 1.421, "step": 5300 }, { "epoch": 0.7234884673126791, "grad_norm": 0.23360735177993774, "learning_rate": 8e-05, "loss": 1.489, "step": 5301 }, { "epoch": 0.723624948819435, "grad_norm": 0.23550571501255035, "learning_rate": 8e-05, "loss": 1.4587, "step": 5302 }, { "epoch": 0.7237614303261908, "grad_norm": 0.23339222371578217, "learning_rate": 8e-05, "loss": 1.5198, "step": 5303 }, { "epoch": 0.7238979118329466, "grad_norm": 0.22132667899131775, "learning_rate": 8e-05, "loss": 1.4549, "step": 5304 }, { "epoch": 0.7240343933397024, "grad_norm": 0.22522932291030884, "learning_rate": 8e-05, "loss": 1.4377, "step": 5305 }, { "epoch": 0.7241708748464583, "grad_norm": 0.2330664098262787, "learning_rate": 8e-05, "loss": 1.5272, "step": 5306 }, { "epoch": 0.7243073563532142, "grad_norm": 0.24181297421455383, "learning_rate": 8e-05, "loss": 1.535, "step": 5307 }, { "epoch": 0.72444383785997, "grad_norm": 0.23423512279987335, "learning_rate": 8e-05, "loss": 1.5116, "step": 5308 }, { "epoch": 0.7245803193667258, "grad_norm": 0.22781367599964142, "learning_rate": 8e-05, "loss": 1.4697, "step": 5309 }, { "epoch": 0.7247168008734817, "grad_norm": 0.2365158647298813, "learning_rate": 8e-05, "loss": 1.3836, "step": 5310 }, { "epoch": 0.7248532823802375, "grad_norm": 0.22710949182510376, "learning_rate": 8e-05, "loss": 1.4236, "step": 5311 }, { "epoch": 0.7249897638869933, "grad_norm": 0.23008687794208527, "learning_rate": 8e-05, "loss": 1.5005, "step": 5312 }, { "epoch": 0.7251262453937491, "grad_norm": 0.23265910148620605, "learning_rate": 8e-05, "loss": 1.4328, "step": 5313 }, { "epoch": 0.725262726900505, "grad_norm": 0.24594861268997192, "learning_rate": 8e-05, "loss": 1.5129, "step": 5314 }, { "epoch": 0.7253992084072608, "grad_norm": 0.22055862843990326, "learning_rate": 8e-05, "loss": 1.414, "step": 5315 }, { "epoch": 0.7255356899140166, "grad_norm": 0.24171856045722961, "learning_rate": 8e-05, "loss": 1.5024, "step": 5316 }, { "epoch": 0.7256721714207724, "grad_norm": 0.23419013619422913, "learning_rate": 8e-05, "loss": 1.4917, "step": 5317 }, { "epoch": 0.7258086529275283, "grad_norm": 0.24209046363830566, "learning_rate": 8e-05, "loss": 1.4844, "step": 5318 }, { "epoch": 0.7259451344342841, "grad_norm": 0.2303965985774994, "learning_rate": 8e-05, "loss": 1.5032, "step": 5319 }, { "epoch": 0.72608161594104, "grad_norm": 0.22313524782657623, "learning_rate": 8e-05, "loss": 1.455, "step": 5320 }, { "epoch": 0.7262180974477959, "grad_norm": 0.24665449559688568, "learning_rate": 8e-05, "loss": 1.5182, "step": 5321 }, { "epoch": 0.7263545789545517, "grad_norm": 0.22862735390663147, "learning_rate": 8e-05, "loss": 1.427, "step": 5322 }, { "epoch": 0.7264910604613075, "grad_norm": 0.23640479147434235, "learning_rate": 8e-05, "loss": 1.4216, "step": 5323 }, { "epoch": 0.7266275419680633, "grad_norm": 0.23321279883384705, "learning_rate": 8e-05, "loss": 1.4868, "step": 5324 }, { "epoch": 0.7267640234748192, "grad_norm": 0.24157695472240448, "learning_rate": 8e-05, "loss": 1.488, "step": 5325 }, { "epoch": 0.726900504981575, "grad_norm": 0.2316184937953949, "learning_rate": 8e-05, "loss": 1.4296, "step": 5326 }, { "epoch": 0.7270369864883308, "grad_norm": 0.23264643549919128, "learning_rate": 8e-05, "loss": 1.4218, "step": 5327 }, { "epoch": 0.7271734679950866, "grad_norm": 0.23049168288707733, "learning_rate": 8e-05, "loss": 1.457, "step": 5328 }, { "epoch": 0.7273099495018425, "grad_norm": 0.23966392874717712, "learning_rate": 8e-05, "loss": 1.5126, "step": 5329 }, { "epoch": 0.7274464310085983, "grad_norm": 0.23343442380428314, "learning_rate": 8e-05, "loss": 1.5026, "step": 5330 }, { "epoch": 0.7275829125153541, "grad_norm": 0.2502121031284332, "learning_rate": 8e-05, "loss": 1.5157, "step": 5331 }, { "epoch": 0.72771939402211, "grad_norm": 0.2411840260028839, "learning_rate": 8e-05, "loss": 1.4821, "step": 5332 }, { "epoch": 0.7278558755288659, "grad_norm": 0.22902125120162964, "learning_rate": 8e-05, "loss": 1.4848, "step": 5333 }, { "epoch": 0.7279923570356217, "grad_norm": 0.23181374371051788, "learning_rate": 8e-05, "loss": 1.4741, "step": 5334 }, { "epoch": 0.7281288385423775, "grad_norm": 0.23361146450042725, "learning_rate": 8e-05, "loss": 1.4678, "step": 5335 }, { "epoch": 0.7282653200491334, "grad_norm": 0.22817663848400116, "learning_rate": 8e-05, "loss": 1.438, "step": 5336 }, { "epoch": 0.7284018015558892, "grad_norm": 0.23050130903720856, "learning_rate": 8e-05, "loss": 1.4782, "step": 5337 }, { "epoch": 0.728538283062645, "grad_norm": 0.22238372266292572, "learning_rate": 8e-05, "loss": 1.3356, "step": 5338 }, { "epoch": 0.7286747645694008, "grad_norm": 0.24139800667762756, "learning_rate": 8e-05, "loss": 1.4926, "step": 5339 }, { "epoch": 0.7288112460761567, "grad_norm": 0.2403285801410675, "learning_rate": 8e-05, "loss": 1.4118, "step": 5340 }, { "epoch": 0.7289477275829125, "grad_norm": 0.23192352056503296, "learning_rate": 8e-05, "loss": 1.4249, "step": 5341 }, { "epoch": 0.7290842090896683, "grad_norm": 0.23252302408218384, "learning_rate": 8e-05, "loss": 1.4756, "step": 5342 }, { "epoch": 0.7292206905964241, "grad_norm": 0.22691796720027924, "learning_rate": 8e-05, "loss": 1.4441, "step": 5343 }, { "epoch": 0.7293571721031801, "grad_norm": 0.23446527123451233, "learning_rate": 8e-05, "loss": 1.5143, "step": 5344 }, { "epoch": 0.7294936536099359, "grad_norm": 0.23514872789382935, "learning_rate": 8e-05, "loss": 1.4314, "step": 5345 }, { "epoch": 0.7296301351166917, "grad_norm": 0.23052138090133667, "learning_rate": 8e-05, "loss": 1.41, "step": 5346 }, { "epoch": 0.7297666166234476, "grad_norm": 0.22796691954135895, "learning_rate": 8e-05, "loss": 1.4602, "step": 5347 }, { "epoch": 0.7299030981302034, "grad_norm": 0.22415000200271606, "learning_rate": 8e-05, "loss": 1.472, "step": 5348 }, { "epoch": 0.7300395796369592, "grad_norm": 0.22221362590789795, "learning_rate": 8e-05, "loss": 1.5038, "step": 5349 }, { "epoch": 0.730176061143715, "grad_norm": 0.22684365510940552, "learning_rate": 8e-05, "loss": 1.4489, "step": 5350 }, { "epoch": 0.7303125426504709, "grad_norm": 0.2365785390138626, "learning_rate": 8e-05, "loss": 1.5258, "step": 5351 }, { "epoch": 0.7304490241572267, "grad_norm": 0.2281636893749237, "learning_rate": 8e-05, "loss": 1.4443, "step": 5352 }, { "epoch": 0.7305855056639825, "grad_norm": 0.23404833674430847, "learning_rate": 8e-05, "loss": 1.4003, "step": 5353 }, { "epoch": 0.7307219871707383, "grad_norm": 0.24050290882587433, "learning_rate": 8e-05, "loss": 1.4526, "step": 5354 }, { "epoch": 0.7308584686774942, "grad_norm": 0.24188768863677979, "learning_rate": 8e-05, "loss": 1.507, "step": 5355 }, { "epoch": 0.73099495018425, "grad_norm": 0.24535927176475525, "learning_rate": 8e-05, "loss": 1.5357, "step": 5356 }, { "epoch": 0.7311314316910059, "grad_norm": 0.22273485362529755, "learning_rate": 8e-05, "loss": 1.401, "step": 5357 }, { "epoch": 0.7312679131977617, "grad_norm": 0.22941969335079193, "learning_rate": 8e-05, "loss": 1.4562, "step": 5358 }, { "epoch": 0.7314043947045176, "grad_norm": 0.24198800325393677, "learning_rate": 8e-05, "loss": 1.4769, "step": 5359 }, { "epoch": 0.7315408762112734, "grad_norm": 0.22070355713367462, "learning_rate": 8e-05, "loss": 1.3686, "step": 5360 }, { "epoch": 0.7316773577180292, "grad_norm": 0.2219826877117157, "learning_rate": 8e-05, "loss": 1.436, "step": 5361 }, { "epoch": 0.731813839224785, "grad_norm": 0.23151229321956635, "learning_rate": 8e-05, "loss": 1.4796, "step": 5362 }, { "epoch": 0.7319503207315409, "grad_norm": 0.2341909110546112, "learning_rate": 8e-05, "loss": 1.4341, "step": 5363 }, { "epoch": 0.7320868022382967, "grad_norm": 0.23004098236560822, "learning_rate": 8e-05, "loss": 1.4468, "step": 5364 }, { "epoch": 0.7322232837450525, "grad_norm": 0.23812565207481384, "learning_rate": 8e-05, "loss": 1.499, "step": 5365 }, { "epoch": 0.7323597652518083, "grad_norm": 0.23968151211738586, "learning_rate": 8e-05, "loss": 1.4617, "step": 5366 }, { "epoch": 0.7324962467585642, "grad_norm": 0.25700920820236206, "learning_rate": 8e-05, "loss": 1.364, "step": 5367 }, { "epoch": 0.73263272826532, "grad_norm": 0.2526964545249939, "learning_rate": 8e-05, "loss": 1.4054, "step": 5368 }, { "epoch": 0.7327692097720759, "grad_norm": 0.22964175045490265, "learning_rate": 8e-05, "loss": 1.4354, "step": 5369 }, { "epoch": 0.7329056912788318, "grad_norm": 0.24745650589466095, "learning_rate": 8e-05, "loss": 1.4954, "step": 5370 }, { "epoch": 0.7330421727855876, "grad_norm": 0.23828831315040588, "learning_rate": 8e-05, "loss": 1.3966, "step": 5371 }, { "epoch": 0.7331786542923434, "grad_norm": 0.23327599465847015, "learning_rate": 8e-05, "loss": 1.5087, "step": 5372 }, { "epoch": 0.7333151357990992, "grad_norm": 0.24403904378414154, "learning_rate": 8e-05, "loss": 1.4735, "step": 5373 }, { "epoch": 0.7334516173058551, "grad_norm": 0.24699600040912628, "learning_rate": 8e-05, "loss": 1.4378, "step": 5374 }, { "epoch": 0.7335880988126109, "grad_norm": 0.23798972368240356, "learning_rate": 8e-05, "loss": 1.4612, "step": 5375 }, { "epoch": 0.7337245803193667, "grad_norm": 0.2450876235961914, "learning_rate": 8e-05, "loss": 1.4835, "step": 5376 }, { "epoch": 0.7338610618261225, "grad_norm": 0.24554628133773804, "learning_rate": 8e-05, "loss": 1.4479, "step": 5377 }, { "epoch": 0.7339975433328784, "grad_norm": 0.22485744953155518, "learning_rate": 8e-05, "loss": 1.4683, "step": 5378 }, { "epoch": 0.7341340248396342, "grad_norm": 0.23831279575824738, "learning_rate": 8e-05, "loss": 1.4735, "step": 5379 }, { "epoch": 0.73427050634639, "grad_norm": 0.22268791496753693, "learning_rate": 8e-05, "loss": 1.4448, "step": 5380 }, { "epoch": 0.734406987853146, "grad_norm": 0.23480865359306335, "learning_rate": 8e-05, "loss": 1.4468, "step": 5381 }, { "epoch": 0.7345434693599018, "grad_norm": 0.22699372470378876, "learning_rate": 8e-05, "loss": 1.4347, "step": 5382 }, { "epoch": 0.7346799508666576, "grad_norm": 0.23383373022079468, "learning_rate": 8e-05, "loss": 1.4088, "step": 5383 }, { "epoch": 0.7348164323734134, "grad_norm": 0.22804340720176697, "learning_rate": 8e-05, "loss": 1.5105, "step": 5384 }, { "epoch": 0.7349529138801693, "grad_norm": 0.22474032640457153, "learning_rate": 8e-05, "loss": 1.3943, "step": 5385 }, { "epoch": 0.7350893953869251, "grad_norm": 0.2366325855255127, "learning_rate": 8e-05, "loss": 1.4825, "step": 5386 }, { "epoch": 0.7352258768936809, "grad_norm": 0.2294752597808838, "learning_rate": 8e-05, "loss": 1.5151, "step": 5387 }, { "epoch": 0.7353623584004367, "grad_norm": 0.23280516266822815, "learning_rate": 8e-05, "loss": 1.4692, "step": 5388 }, { "epoch": 0.7354988399071926, "grad_norm": 0.23477290570735931, "learning_rate": 8e-05, "loss": 1.4601, "step": 5389 }, { "epoch": 0.7356353214139484, "grad_norm": 0.22921539843082428, "learning_rate": 8e-05, "loss": 1.3693, "step": 5390 }, { "epoch": 0.7357718029207042, "grad_norm": 0.23668691515922546, "learning_rate": 8e-05, "loss": 1.4744, "step": 5391 }, { "epoch": 0.73590828442746, "grad_norm": 0.22349058091640472, "learning_rate": 8e-05, "loss": 1.3654, "step": 5392 }, { "epoch": 0.7360447659342159, "grad_norm": 0.23499074578285217, "learning_rate": 8e-05, "loss": 1.4176, "step": 5393 }, { "epoch": 0.7361812474409718, "grad_norm": 0.23919731378555298, "learning_rate": 8e-05, "loss": 1.4556, "step": 5394 }, { "epoch": 0.7363177289477276, "grad_norm": 0.22556236386299133, "learning_rate": 8e-05, "loss": 1.4111, "step": 5395 }, { "epoch": 0.7364542104544834, "grad_norm": 0.2295866310596466, "learning_rate": 8e-05, "loss": 1.4284, "step": 5396 }, { "epoch": 0.7365906919612393, "grad_norm": 0.23728801310062408, "learning_rate": 8e-05, "loss": 1.4455, "step": 5397 }, { "epoch": 0.7367271734679951, "grad_norm": 0.2346675843000412, "learning_rate": 8e-05, "loss": 1.479, "step": 5398 }, { "epoch": 0.7368636549747509, "grad_norm": 0.2296382188796997, "learning_rate": 8e-05, "loss": 1.3871, "step": 5399 }, { "epoch": 0.7370001364815068, "grad_norm": 0.23015180230140686, "learning_rate": 8e-05, "loss": 1.4386, "step": 5400 }, { "epoch": 0.7371366179882626, "grad_norm": 0.22741682827472687, "learning_rate": 8e-05, "loss": 1.4526, "step": 5401 }, { "epoch": 0.7372730994950184, "grad_norm": 0.2277875393629074, "learning_rate": 8e-05, "loss": 1.457, "step": 5402 }, { "epoch": 0.7374095810017742, "grad_norm": 0.2306147962808609, "learning_rate": 8e-05, "loss": 1.4095, "step": 5403 }, { "epoch": 0.73754606250853, "grad_norm": 0.24104784429073334, "learning_rate": 8e-05, "loss": 1.5147, "step": 5404 }, { "epoch": 0.7376825440152859, "grad_norm": 0.22377142310142517, "learning_rate": 8e-05, "loss": 1.4198, "step": 5405 }, { "epoch": 0.7378190255220418, "grad_norm": 0.22960709035396576, "learning_rate": 8e-05, "loss": 1.4555, "step": 5406 }, { "epoch": 0.7379555070287976, "grad_norm": 0.24812842905521393, "learning_rate": 8e-05, "loss": 1.4761, "step": 5407 }, { "epoch": 0.7380919885355535, "grad_norm": 0.23960065841674805, "learning_rate": 8e-05, "loss": 1.4699, "step": 5408 }, { "epoch": 0.7382284700423093, "grad_norm": 0.23155266046524048, "learning_rate": 8e-05, "loss": 1.4911, "step": 5409 }, { "epoch": 0.7383649515490651, "grad_norm": 0.23349325358867645, "learning_rate": 8e-05, "loss": 1.4345, "step": 5410 }, { "epoch": 0.7385014330558209, "grad_norm": 0.23217438161373138, "learning_rate": 8e-05, "loss": 1.3821, "step": 5411 }, { "epoch": 0.7386379145625768, "grad_norm": 0.2326105386018753, "learning_rate": 8e-05, "loss": 1.3979, "step": 5412 }, { "epoch": 0.7387743960693326, "grad_norm": 0.23590391874313354, "learning_rate": 8e-05, "loss": 1.4757, "step": 5413 }, { "epoch": 0.7389108775760884, "grad_norm": 0.2365354746580124, "learning_rate": 8e-05, "loss": 1.4366, "step": 5414 }, { "epoch": 0.7390473590828442, "grad_norm": 0.2290075719356537, "learning_rate": 8e-05, "loss": 1.4807, "step": 5415 }, { "epoch": 0.7391838405896001, "grad_norm": 0.2313995510339737, "learning_rate": 8e-05, "loss": 1.4226, "step": 5416 }, { "epoch": 0.7393203220963559, "grad_norm": 0.23147158324718475, "learning_rate": 8e-05, "loss": 1.3896, "step": 5417 }, { "epoch": 0.7394568036031117, "grad_norm": 0.22454842925071716, "learning_rate": 8e-05, "loss": 1.403, "step": 5418 }, { "epoch": 0.7395932851098677, "grad_norm": 0.23005039989948273, "learning_rate": 8e-05, "loss": 1.461, "step": 5419 }, { "epoch": 0.7397297666166235, "grad_norm": 0.23498544096946716, "learning_rate": 8e-05, "loss": 1.4847, "step": 5420 }, { "epoch": 0.7398662481233793, "grad_norm": 0.2305397093296051, "learning_rate": 8e-05, "loss": 1.4183, "step": 5421 }, { "epoch": 0.7400027296301351, "grad_norm": 0.2323109656572342, "learning_rate": 8e-05, "loss": 1.477, "step": 5422 }, { "epoch": 0.740139211136891, "grad_norm": 0.2385823130607605, "learning_rate": 8e-05, "loss": 1.4767, "step": 5423 }, { "epoch": 0.7402756926436468, "grad_norm": 0.24378721415996552, "learning_rate": 8e-05, "loss": 1.5704, "step": 5424 }, { "epoch": 0.7404121741504026, "grad_norm": 0.22923608124256134, "learning_rate": 8e-05, "loss": 1.4379, "step": 5425 }, { "epoch": 0.7405486556571584, "grad_norm": 0.23621587455272675, "learning_rate": 8e-05, "loss": 1.4473, "step": 5426 }, { "epoch": 0.7406851371639143, "grad_norm": 0.22832398116588593, "learning_rate": 8e-05, "loss": 1.4601, "step": 5427 }, { "epoch": 0.7408216186706701, "grad_norm": 0.2290707677602768, "learning_rate": 8e-05, "loss": 1.3588, "step": 5428 }, { "epoch": 0.7409581001774259, "grad_norm": 0.22947391867637634, "learning_rate": 8e-05, "loss": 1.4978, "step": 5429 }, { "epoch": 0.7410945816841817, "grad_norm": 0.23131950199604034, "learning_rate": 8e-05, "loss": 1.4231, "step": 5430 }, { "epoch": 0.7412310631909377, "grad_norm": 0.23132729530334473, "learning_rate": 8e-05, "loss": 1.4631, "step": 5431 }, { "epoch": 0.7413675446976935, "grad_norm": 0.23361927270889282, "learning_rate": 8e-05, "loss": 1.3946, "step": 5432 }, { "epoch": 0.7415040262044493, "grad_norm": 0.23760920763015747, "learning_rate": 8e-05, "loss": 1.4802, "step": 5433 }, { "epoch": 0.7416405077112052, "grad_norm": 0.23963181674480438, "learning_rate": 8e-05, "loss": 1.5049, "step": 5434 }, { "epoch": 0.741776989217961, "grad_norm": 0.2447018325328827, "learning_rate": 8e-05, "loss": 1.5161, "step": 5435 }, { "epoch": 0.7419134707247168, "grad_norm": 0.2312150001525879, "learning_rate": 8e-05, "loss": 1.4247, "step": 5436 }, { "epoch": 0.7420499522314726, "grad_norm": 0.2264053374528885, "learning_rate": 8e-05, "loss": 1.4915, "step": 5437 }, { "epoch": 0.7421864337382285, "grad_norm": 0.22064529359340668, "learning_rate": 8e-05, "loss": 1.378, "step": 5438 }, { "epoch": 0.7423229152449843, "grad_norm": 0.2286280393600464, "learning_rate": 8e-05, "loss": 1.4908, "step": 5439 }, { "epoch": 0.7424593967517401, "grad_norm": 0.23819568753242493, "learning_rate": 8e-05, "loss": 1.492, "step": 5440 }, { "epoch": 0.7425958782584959, "grad_norm": 0.22619980573654175, "learning_rate": 8e-05, "loss": 1.3947, "step": 5441 }, { "epoch": 0.7427323597652518, "grad_norm": 0.23072820901870728, "learning_rate": 8e-05, "loss": 1.4505, "step": 5442 }, { "epoch": 0.7428688412720077, "grad_norm": 0.2268265187740326, "learning_rate": 8e-05, "loss": 1.4687, "step": 5443 }, { "epoch": 0.7430053227787635, "grad_norm": 0.23408031463623047, "learning_rate": 8e-05, "loss": 1.4624, "step": 5444 }, { "epoch": 0.7431418042855193, "grad_norm": 0.22525933384895325, "learning_rate": 8e-05, "loss": 1.4429, "step": 5445 }, { "epoch": 0.7432782857922752, "grad_norm": 0.2279914766550064, "learning_rate": 8e-05, "loss": 1.4176, "step": 5446 }, { "epoch": 0.743414767299031, "grad_norm": 0.22487708926200867, "learning_rate": 8e-05, "loss": 1.4733, "step": 5447 }, { "epoch": 0.7435512488057868, "grad_norm": 0.2260654866695404, "learning_rate": 8e-05, "loss": 1.4824, "step": 5448 }, { "epoch": 0.7436877303125426, "grad_norm": 0.23101775348186493, "learning_rate": 8e-05, "loss": 1.4132, "step": 5449 }, { "epoch": 0.7438242118192985, "grad_norm": 0.2334427684545517, "learning_rate": 8e-05, "loss": 1.474, "step": 5450 }, { "epoch": 0.7439606933260543, "grad_norm": 0.2328389436006546, "learning_rate": 8e-05, "loss": 1.5624, "step": 5451 }, { "epoch": 0.7440971748328101, "grad_norm": 0.21947164833545685, "learning_rate": 8e-05, "loss": 1.403, "step": 5452 }, { "epoch": 0.744233656339566, "grad_norm": 0.23512783646583557, "learning_rate": 8e-05, "loss": 1.4572, "step": 5453 }, { "epoch": 0.7443701378463218, "grad_norm": 0.2520940899848938, "learning_rate": 8e-05, "loss": 1.4301, "step": 5454 }, { "epoch": 0.7445066193530776, "grad_norm": 0.23764021694660187, "learning_rate": 8e-05, "loss": 1.4144, "step": 5455 }, { "epoch": 0.7446431008598335, "grad_norm": 0.24062025547027588, "learning_rate": 8e-05, "loss": 1.4733, "step": 5456 }, { "epoch": 0.7447795823665894, "grad_norm": 0.23779985308647156, "learning_rate": 8e-05, "loss": 1.4325, "step": 5457 }, { "epoch": 0.7449160638733452, "grad_norm": 0.22479653358459473, "learning_rate": 8e-05, "loss": 1.4707, "step": 5458 }, { "epoch": 0.745052545380101, "grad_norm": 0.22232404351234436, "learning_rate": 8e-05, "loss": 1.3752, "step": 5459 }, { "epoch": 0.7451890268868568, "grad_norm": 0.23947718739509583, "learning_rate": 8e-05, "loss": 1.4902, "step": 5460 }, { "epoch": 0.7453255083936127, "grad_norm": 0.24322591722011566, "learning_rate": 8e-05, "loss": 1.4603, "step": 5461 }, { "epoch": 0.7454619899003685, "grad_norm": 0.23918889462947845, "learning_rate": 8e-05, "loss": 1.4642, "step": 5462 }, { "epoch": 0.7455984714071243, "grad_norm": 0.22220833599567413, "learning_rate": 8e-05, "loss": 1.3353, "step": 5463 }, { "epoch": 0.7457349529138801, "grad_norm": 0.23220521211624146, "learning_rate": 8e-05, "loss": 1.4376, "step": 5464 }, { "epoch": 0.745871434420636, "grad_norm": 0.22554932534694672, "learning_rate": 8e-05, "loss": 1.4544, "step": 5465 }, { "epoch": 0.7460079159273918, "grad_norm": 0.2210371196269989, "learning_rate": 8e-05, "loss": 1.4403, "step": 5466 }, { "epoch": 0.7461443974341476, "grad_norm": 0.23625583946704865, "learning_rate": 8e-05, "loss": 1.4542, "step": 5467 }, { "epoch": 0.7462808789409036, "grad_norm": 0.23824359476566315, "learning_rate": 8e-05, "loss": 1.4148, "step": 5468 }, { "epoch": 0.7464173604476594, "grad_norm": 0.22984260320663452, "learning_rate": 8e-05, "loss": 1.4721, "step": 5469 }, { "epoch": 0.7465538419544152, "grad_norm": 0.23944777250289917, "learning_rate": 8e-05, "loss": 1.5524, "step": 5470 }, { "epoch": 0.746690323461171, "grad_norm": 0.2285875380039215, "learning_rate": 8e-05, "loss": 1.4443, "step": 5471 }, { "epoch": 0.7468268049679269, "grad_norm": 0.23165689408779144, "learning_rate": 8e-05, "loss": 1.4115, "step": 5472 }, { "epoch": 0.7469632864746827, "grad_norm": 0.2408268004655838, "learning_rate": 8e-05, "loss": 1.5446, "step": 5473 }, { "epoch": 0.7470997679814385, "grad_norm": 0.24573947489261627, "learning_rate": 8e-05, "loss": 1.3973, "step": 5474 }, { "epoch": 0.7472362494881943, "grad_norm": 0.22331851720809937, "learning_rate": 8e-05, "loss": 1.4241, "step": 5475 }, { "epoch": 0.7473727309949502, "grad_norm": 0.24434417486190796, "learning_rate": 8e-05, "loss": 1.529, "step": 5476 }, { "epoch": 0.747509212501706, "grad_norm": 0.2336418330669403, "learning_rate": 8e-05, "loss": 1.5074, "step": 5477 }, { "epoch": 0.7476456940084618, "grad_norm": 0.2304091900587082, "learning_rate": 8e-05, "loss": 1.4056, "step": 5478 }, { "epoch": 0.7477821755152176, "grad_norm": 0.2245134860277176, "learning_rate": 8e-05, "loss": 1.3748, "step": 5479 }, { "epoch": 0.7479186570219736, "grad_norm": 0.23465248942375183, "learning_rate": 8e-05, "loss": 1.4656, "step": 5480 }, { "epoch": 0.7480551385287294, "grad_norm": 0.2328892946243286, "learning_rate": 8e-05, "loss": 1.441, "step": 5481 }, { "epoch": 0.7481916200354852, "grad_norm": 0.24208420515060425, "learning_rate": 8e-05, "loss": 1.4848, "step": 5482 }, { "epoch": 0.748328101542241, "grad_norm": 0.2266128957271576, "learning_rate": 8e-05, "loss": 1.4948, "step": 5483 }, { "epoch": 0.7484645830489969, "grad_norm": 0.2226395457983017, "learning_rate": 8e-05, "loss": 1.4232, "step": 5484 }, { "epoch": 0.7486010645557527, "grad_norm": 0.22303183376789093, "learning_rate": 8e-05, "loss": 1.4097, "step": 5485 }, { "epoch": 0.7487375460625085, "grad_norm": 0.24082913994789124, "learning_rate": 8e-05, "loss": 1.5058, "step": 5486 }, { "epoch": 0.7488740275692644, "grad_norm": 0.22559095919132233, "learning_rate": 8e-05, "loss": 1.4065, "step": 5487 }, { "epoch": 0.7490105090760202, "grad_norm": 0.22808629274368286, "learning_rate": 8e-05, "loss": 1.3575, "step": 5488 }, { "epoch": 0.749146990582776, "grad_norm": 0.23182958364486694, "learning_rate": 8e-05, "loss": 1.4711, "step": 5489 }, { "epoch": 0.7492834720895318, "grad_norm": 0.22415268421173096, "learning_rate": 8e-05, "loss": 1.4239, "step": 5490 }, { "epoch": 0.7494199535962877, "grad_norm": 0.24841336905956268, "learning_rate": 8e-05, "loss": 1.4711, "step": 5491 }, { "epoch": 0.7495564351030435, "grad_norm": 0.23971742391586304, "learning_rate": 8e-05, "loss": 1.5148, "step": 5492 }, { "epoch": 0.7496929166097994, "grad_norm": 0.2354128062725067, "learning_rate": 8e-05, "loss": 1.4599, "step": 5493 }, { "epoch": 0.7498293981165552, "grad_norm": 0.2383018136024475, "learning_rate": 8e-05, "loss": 1.463, "step": 5494 }, { "epoch": 0.7499658796233111, "grad_norm": 0.2353178709745407, "learning_rate": 8e-05, "loss": 1.4126, "step": 5495 }, { "epoch": 0.7501023611300669, "grad_norm": 0.23781658709049225, "learning_rate": 8e-05, "loss": 1.478, "step": 5496 }, { "epoch": 0.7502388426368227, "grad_norm": 0.23770399391651154, "learning_rate": 8e-05, "loss": 1.4369, "step": 5497 }, { "epoch": 0.7503753241435785, "grad_norm": 0.23702123761177063, "learning_rate": 8e-05, "loss": 1.4261, "step": 5498 }, { "epoch": 0.7505118056503344, "grad_norm": 0.24078671634197235, "learning_rate": 8e-05, "loss": 1.4462, "step": 5499 }, { "epoch": 0.7506482871570902, "grad_norm": 0.2312447875738144, "learning_rate": 8e-05, "loss": 1.4066, "step": 5500 }, { "epoch": 0.750784768663846, "grad_norm": 0.24043422937393188, "learning_rate": 8e-05, "loss": 1.462, "step": 5501 }, { "epoch": 0.7509212501706019, "grad_norm": 0.23676154017448425, "learning_rate": 8e-05, "loss": 1.5009, "step": 5502 }, { "epoch": 0.7510577316773577, "grad_norm": 0.23995369672775269, "learning_rate": 8e-05, "loss": 1.5265, "step": 5503 }, { "epoch": 0.7511942131841135, "grad_norm": 0.2344968020915985, "learning_rate": 8e-05, "loss": 1.4269, "step": 5504 }, { "epoch": 0.7513306946908694, "grad_norm": 0.2372904270887375, "learning_rate": 8e-05, "loss": 1.3962, "step": 5505 }, { "epoch": 0.7514671761976253, "grad_norm": 0.22881458699703217, "learning_rate": 8e-05, "loss": 1.4453, "step": 5506 }, { "epoch": 0.7516036577043811, "grad_norm": 0.23556549847126007, "learning_rate": 8e-05, "loss": 1.4125, "step": 5507 }, { "epoch": 0.7517401392111369, "grad_norm": 0.23557212948799133, "learning_rate": 8e-05, "loss": 1.3881, "step": 5508 }, { "epoch": 0.7518766207178927, "grad_norm": 0.2407660335302353, "learning_rate": 8e-05, "loss": 1.4783, "step": 5509 }, { "epoch": 0.7520131022246486, "grad_norm": 0.23004890978336334, "learning_rate": 8e-05, "loss": 1.3628, "step": 5510 }, { "epoch": 0.7521495837314044, "grad_norm": 0.23470093309879303, "learning_rate": 8e-05, "loss": 1.4402, "step": 5511 }, { "epoch": 0.7522860652381602, "grad_norm": 0.23707321286201477, "learning_rate": 8e-05, "loss": 1.411, "step": 5512 }, { "epoch": 0.752422546744916, "grad_norm": 0.2390184849500656, "learning_rate": 8e-05, "loss": 1.4362, "step": 5513 }, { "epoch": 0.7525590282516719, "grad_norm": 0.22809483110904694, "learning_rate": 8e-05, "loss": 1.4173, "step": 5514 }, { "epoch": 0.7526955097584277, "grad_norm": 0.23081955313682556, "learning_rate": 8e-05, "loss": 1.4913, "step": 5515 }, { "epoch": 0.7528319912651835, "grad_norm": 0.24276885390281677, "learning_rate": 8e-05, "loss": 1.4564, "step": 5516 }, { "epoch": 0.7529684727719395, "grad_norm": 0.23636262118816376, "learning_rate": 8e-05, "loss": 1.4392, "step": 5517 }, { "epoch": 0.7531049542786953, "grad_norm": 0.232864648103714, "learning_rate": 8e-05, "loss": 1.4278, "step": 5518 }, { "epoch": 0.7532414357854511, "grad_norm": 0.23204819858074188, "learning_rate": 8e-05, "loss": 1.4283, "step": 5519 }, { "epoch": 0.7533779172922069, "grad_norm": 0.24035769701004028, "learning_rate": 8e-05, "loss": 1.4814, "step": 5520 }, { "epoch": 0.7535143987989628, "grad_norm": 0.2342885434627533, "learning_rate": 8e-05, "loss": 1.3729, "step": 5521 }, { "epoch": 0.7536508803057186, "grad_norm": 0.2284945398569107, "learning_rate": 8e-05, "loss": 1.436, "step": 5522 }, { "epoch": 0.7537873618124744, "grad_norm": 0.23148933053016663, "learning_rate": 8e-05, "loss": 1.4784, "step": 5523 }, { "epoch": 0.7539238433192302, "grad_norm": 0.23114363849163055, "learning_rate": 8e-05, "loss": 1.4384, "step": 5524 }, { "epoch": 0.7540603248259861, "grad_norm": 0.2480083405971527, "learning_rate": 8e-05, "loss": 1.4388, "step": 5525 }, { "epoch": 0.7541968063327419, "grad_norm": 0.22956795990467072, "learning_rate": 8e-05, "loss": 1.4084, "step": 5526 }, { "epoch": 0.7543332878394977, "grad_norm": 0.2277536690235138, "learning_rate": 8e-05, "loss": 1.497, "step": 5527 }, { "epoch": 0.7544697693462535, "grad_norm": 0.2351599633693695, "learning_rate": 8e-05, "loss": 1.4234, "step": 5528 }, { "epoch": 0.7546062508530094, "grad_norm": 0.24506905674934387, "learning_rate": 8e-05, "loss": 1.4337, "step": 5529 }, { "epoch": 0.7547427323597653, "grad_norm": 0.23541918396949768, "learning_rate": 8e-05, "loss": 1.4375, "step": 5530 }, { "epoch": 0.7548792138665211, "grad_norm": 0.23244623839855194, "learning_rate": 8e-05, "loss": 1.4278, "step": 5531 }, { "epoch": 0.755015695373277, "grad_norm": 0.23257069289684296, "learning_rate": 8e-05, "loss": 1.427, "step": 5532 }, { "epoch": 0.7551521768800328, "grad_norm": 0.24327880144119263, "learning_rate": 8e-05, "loss": 1.4318, "step": 5533 }, { "epoch": 0.7552886583867886, "grad_norm": 0.2355663925409317, "learning_rate": 8e-05, "loss": 1.4087, "step": 5534 }, { "epoch": 0.7554251398935444, "grad_norm": 0.22469916939735413, "learning_rate": 8e-05, "loss": 1.4106, "step": 5535 }, { "epoch": 0.7555616214003003, "grad_norm": 0.22179201245307922, "learning_rate": 8e-05, "loss": 1.5001, "step": 5536 }, { "epoch": 0.7556981029070561, "grad_norm": 0.2329602688550949, "learning_rate": 8e-05, "loss": 1.4299, "step": 5537 }, { "epoch": 0.7558345844138119, "grad_norm": 0.23746338486671448, "learning_rate": 8e-05, "loss": 1.4265, "step": 5538 }, { "epoch": 0.7559710659205677, "grad_norm": 0.2400079220533371, "learning_rate": 8e-05, "loss": 1.4602, "step": 5539 }, { "epoch": 0.7561075474273236, "grad_norm": 0.23185886442661285, "learning_rate": 8e-05, "loss": 1.4367, "step": 5540 }, { "epoch": 0.7562440289340794, "grad_norm": 0.23354509472846985, "learning_rate": 8e-05, "loss": 1.4401, "step": 5541 }, { "epoch": 0.7563805104408353, "grad_norm": 0.23393850028514862, "learning_rate": 8e-05, "loss": 1.4732, "step": 5542 }, { "epoch": 0.7565169919475911, "grad_norm": 0.24462071061134338, "learning_rate": 8e-05, "loss": 1.5128, "step": 5543 }, { "epoch": 0.756653473454347, "grad_norm": 0.23847441375255585, "learning_rate": 8e-05, "loss": 1.5163, "step": 5544 }, { "epoch": 0.7567899549611028, "grad_norm": 0.23977838456630707, "learning_rate": 8e-05, "loss": 1.4759, "step": 5545 }, { "epoch": 0.7569264364678586, "grad_norm": 0.22594425082206726, "learning_rate": 8e-05, "loss": 1.4486, "step": 5546 }, { "epoch": 0.7570629179746144, "grad_norm": 0.22592028975486755, "learning_rate": 8e-05, "loss": 1.4175, "step": 5547 }, { "epoch": 0.7571993994813703, "grad_norm": 0.23018395900726318, "learning_rate": 8e-05, "loss": 1.5055, "step": 5548 }, { "epoch": 0.7573358809881261, "grad_norm": 0.23036062717437744, "learning_rate": 8e-05, "loss": 1.3831, "step": 5549 }, { "epoch": 0.7574723624948819, "grad_norm": 0.2336152344942093, "learning_rate": 8e-05, "loss": 1.4317, "step": 5550 }, { "epoch": 0.7576088440016377, "grad_norm": 0.24702188372612, "learning_rate": 8e-05, "loss": 1.5849, "step": 5551 }, { "epoch": 0.7577453255083936, "grad_norm": 0.23044301569461823, "learning_rate": 8e-05, "loss": 1.4932, "step": 5552 }, { "epoch": 0.7578818070151494, "grad_norm": 0.23151962459087372, "learning_rate": 8e-05, "loss": 1.4686, "step": 5553 }, { "epoch": 0.7580182885219053, "grad_norm": 0.2313581109046936, "learning_rate": 8e-05, "loss": 1.5005, "step": 5554 }, { "epoch": 0.7581547700286612, "grad_norm": 0.2368733286857605, "learning_rate": 8e-05, "loss": 1.466, "step": 5555 }, { "epoch": 0.758291251535417, "grad_norm": 0.23817236721515656, "learning_rate": 8e-05, "loss": 1.4798, "step": 5556 }, { "epoch": 0.7584277330421728, "grad_norm": 0.22685439884662628, "learning_rate": 8e-05, "loss": 1.4202, "step": 5557 }, { "epoch": 0.7585642145489286, "grad_norm": 0.2357075810432434, "learning_rate": 8e-05, "loss": 1.4679, "step": 5558 }, { "epoch": 0.7587006960556845, "grad_norm": 0.22319969534873962, "learning_rate": 8e-05, "loss": 1.4108, "step": 5559 }, { "epoch": 0.7588371775624403, "grad_norm": 0.23965150117874146, "learning_rate": 8e-05, "loss": 1.4917, "step": 5560 }, { "epoch": 0.7589736590691961, "grad_norm": 0.2285008579492569, "learning_rate": 8e-05, "loss": 1.4787, "step": 5561 }, { "epoch": 0.7591101405759519, "grad_norm": 0.26351869106292725, "learning_rate": 8e-05, "loss": 1.5554, "step": 5562 }, { "epoch": 0.7592466220827078, "grad_norm": 0.23487111926078796, "learning_rate": 8e-05, "loss": 1.4344, "step": 5563 }, { "epoch": 0.7593831035894636, "grad_norm": 0.2296253740787506, "learning_rate": 8e-05, "loss": 1.3947, "step": 5564 }, { "epoch": 0.7595195850962194, "grad_norm": 0.234577476978302, "learning_rate": 8e-05, "loss": 1.4392, "step": 5565 }, { "epoch": 0.7596560666029752, "grad_norm": 0.23296037316322327, "learning_rate": 8e-05, "loss": 1.4815, "step": 5566 }, { "epoch": 0.7597925481097312, "grad_norm": 0.23680196702480316, "learning_rate": 8e-05, "loss": 1.4446, "step": 5567 }, { "epoch": 0.759929029616487, "grad_norm": 0.22917480766773224, "learning_rate": 8e-05, "loss": 1.4617, "step": 5568 }, { "epoch": 0.7600655111232428, "grad_norm": 0.2350766658782959, "learning_rate": 8e-05, "loss": 1.445, "step": 5569 }, { "epoch": 0.7602019926299987, "grad_norm": 0.2342926561832428, "learning_rate": 8e-05, "loss": 1.4788, "step": 5570 }, { "epoch": 0.7603384741367545, "grad_norm": 0.2328982949256897, "learning_rate": 8e-05, "loss": 1.4398, "step": 5571 }, { "epoch": 0.7604749556435103, "grad_norm": 0.24676910042762756, "learning_rate": 8e-05, "loss": 1.4355, "step": 5572 }, { "epoch": 0.7606114371502661, "grad_norm": 0.24163806438446045, "learning_rate": 8e-05, "loss": 1.5374, "step": 5573 }, { "epoch": 0.760747918657022, "grad_norm": 0.23059327900409698, "learning_rate": 8e-05, "loss": 1.3655, "step": 5574 }, { "epoch": 0.7608844001637778, "grad_norm": 0.2266145795583725, "learning_rate": 8e-05, "loss": 1.3812, "step": 5575 }, { "epoch": 0.7610208816705336, "grad_norm": 0.22920896112918854, "learning_rate": 8e-05, "loss": 1.4125, "step": 5576 }, { "epoch": 0.7611573631772894, "grad_norm": 0.23637108504772186, "learning_rate": 8e-05, "loss": 1.4725, "step": 5577 }, { "epoch": 0.7612938446840453, "grad_norm": 0.22649773955345154, "learning_rate": 8e-05, "loss": 1.4226, "step": 5578 }, { "epoch": 0.7614303261908012, "grad_norm": 0.23809078335762024, "learning_rate": 8e-05, "loss": 1.4471, "step": 5579 }, { "epoch": 0.761566807697557, "grad_norm": 0.23114313185214996, "learning_rate": 8e-05, "loss": 1.4383, "step": 5580 }, { "epoch": 0.7617032892043129, "grad_norm": 0.23516236245632172, "learning_rate": 8e-05, "loss": 1.4248, "step": 5581 }, { "epoch": 0.7618397707110687, "grad_norm": 0.24090434610843658, "learning_rate": 8e-05, "loss": 1.4772, "step": 5582 }, { "epoch": 0.7619762522178245, "grad_norm": 0.24062126874923706, "learning_rate": 8e-05, "loss": 1.4741, "step": 5583 }, { "epoch": 0.7621127337245803, "grad_norm": 0.2393125742673874, "learning_rate": 8e-05, "loss": 1.4725, "step": 5584 }, { "epoch": 0.7622492152313362, "grad_norm": 0.24033060669898987, "learning_rate": 8e-05, "loss": 1.4362, "step": 5585 }, { "epoch": 0.762385696738092, "grad_norm": 0.2344217449426651, "learning_rate": 8e-05, "loss": 1.4526, "step": 5586 }, { "epoch": 0.7625221782448478, "grad_norm": 0.22848385572433472, "learning_rate": 8e-05, "loss": 1.4073, "step": 5587 }, { "epoch": 0.7626586597516036, "grad_norm": 0.22943542897701263, "learning_rate": 8e-05, "loss": 1.4558, "step": 5588 }, { "epoch": 0.7627951412583595, "grad_norm": 0.24466146528720856, "learning_rate": 8e-05, "loss": 1.473, "step": 5589 }, { "epoch": 0.7629316227651153, "grad_norm": 0.23269988596439362, "learning_rate": 8e-05, "loss": 1.4473, "step": 5590 }, { "epoch": 0.7630681042718711, "grad_norm": 0.24303074181079865, "learning_rate": 8e-05, "loss": 1.4473, "step": 5591 }, { "epoch": 0.763204585778627, "grad_norm": 0.23157356679439545, "learning_rate": 8e-05, "loss": 1.3407, "step": 5592 }, { "epoch": 0.7633410672853829, "grad_norm": 0.24543976783752441, "learning_rate": 8e-05, "loss": 1.4786, "step": 5593 }, { "epoch": 0.7634775487921387, "grad_norm": 0.23173443973064423, "learning_rate": 8e-05, "loss": 1.4225, "step": 5594 }, { "epoch": 0.7636140302988945, "grad_norm": 0.23874112963676453, "learning_rate": 8e-05, "loss": 1.3724, "step": 5595 }, { "epoch": 0.7637505118056503, "grad_norm": 0.23418624699115753, "learning_rate": 8e-05, "loss": 1.3807, "step": 5596 }, { "epoch": 0.7638869933124062, "grad_norm": 0.23073336482048035, "learning_rate": 8e-05, "loss": 1.4673, "step": 5597 }, { "epoch": 0.764023474819162, "grad_norm": 0.23574082553386688, "learning_rate": 8e-05, "loss": 1.4335, "step": 5598 }, { "epoch": 0.7641599563259178, "grad_norm": 0.2334708720445633, "learning_rate": 8e-05, "loss": 1.4122, "step": 5599 }, { "epoch": 0.7642964378326736, "grad_norm": 0.2397107183933258, "learning_rate": 8e-05, "loss": 1.4233, "step": 5600 }, { "epoch": 0.7644329193394295, "grad_norm": 0.24064216017723083, "learning_rate": 8e-05, "loss": 1.4608, "step": 5601 }, { "epoch": 0.7645694008461853, "grad_norm": 0.24223579466342926, "learning_rate": 8e-05, "loss": 1.5542, "step": 5602 }, { "epoch": 0.7647058823529411, "grad_norm": 0.23448175191879272, "learning_rate": 8e-05, "loss": 1.4491, "step": 5603 }, { "epoch": 0.7648423638596971, "grad_norm": 0.2318779081106186, "learning_rate": 8e-05, "loss": 1.4085, "step": 5604 }, { "epoch": 0.7649788453664529, "grad_norm": 0.25499752163887024, "learning_rate": 8e-05, "loss": 1.4255, "step": 5605 }, { "epoch": 0.7651153268732087, "grad_norm": 0.2482333928346634, "learning_rate": 8e-05, "loss": 1.449, "step": 5606 }, { "epoch": 0.7652518083799645, "grad_norm": 0.23240843415260315, "learning_rate": 8e-05, "loss": 1.4588, "step": 5607 }, { "epoch": 0.7653882898867204, "grad_norm": 0.23425838351249695, "learning_rate": 8e-05, "loss": 1.4097, "step": 5608 }, { "epoch": 0.7655247713934762, "grad_norm": 0.22358208894729614, "learning_rate": 8e-05, "loss": 1.4147, "step": 5609 }, { "epoch": 0.765661252900232, "grad_norm": 0.2454998642206192, "learning_rate": 8e-05, "loss": 1.5202, "step": 5610 }, { "epoch": 0.7657977344069878, "grad_norm": 0.240730419754982, "learning_rate": 8e-05, "loss": 1.4494, "step": 5611 }, { "epoch": 0.7659342159137437, "grad_norm": 0.2335236817598343, "learning_rate": 8e-05, "loss": 1.5027, "step": 5612 }, { "epoch": 0.7660706974204995, "grad_norm": 0.23862071335315704, "learning_rate": 8e-05, "loss": 1.5036, "step": 5613 }, { "epoch": 0.7662071789272553, "grad_norm": 0.24564708769321442, "learning_rate": 8e-05, "loss": 1.4168, "step": 5614 }, { "epoch": 0.7663436604340111, "grad_norm": 0.23729662597179413, "learning_rate": 8e-05, "loss": 1.4141, "step": 5615 }, { "epoch": 0.7664801419407671, "grad_norm": 0.24129271507263184, "learning_rate": 8e-05, "loss": 1.3664, "step": 5616 }, { "epoch": 0.7666166234475229, "grad_norm": 0.23258242011070251, "learning_rate": 8e-05, "loss": 1.4111, "step": 5617 }, { "epoch": 0.7667531049542787, "grad_norm": 0.23705439269542694, "learning_rate": 8e-05, "loss": 1.4453, "step": 5618 }, { "epoch": 0.7668895864610346, "grad_norm": 0.2408711463212967, "learning_rate": 8e-05, "loss": 1.4681, "step": 5619 }, { "epoch": 0.7670260679677904, "grad_norm": 0.2217932790517807, "learning_rate": 8e-05, "loss": 1.3924, "step": 5620 }, { "epoch": 0.7671625494745462, "grad_norm": 0.23366065323352814, "learning_rate": 8e-05, "loss": 1.4182, "step": 5621 }, { "epoch": 0.767299030981302, "grad_norm": 0.24863748252391815, "learning_rate": 8e-05, "loss": 1.4476, "step": 5622 }, { "epoch": 0.7674355124880579, "grad_norm": 0.23968449234962463, "learning_rate": 8e-05, "loss": 1.4032, "step": 5623 }, { "epoch": 0.7675719939948137, "grad_norm": 0.23493170738220215, "learning_rate": 8e-05, "loss": 1.4457, "step": 5624 }, { "epoch": 0.7677084755015695, "grad_norm": 0.23007884621620178, "learning_rate": 8e-05, "loss": 1.5188, "step": 5625 }, { "epoch": 0.7678449570083253, "grad_norm": 0.23192553222179413, "learning_rate": 8e-05, "loss": 1.4293, "step": 5626 }, { "epoch": 0.7679814385150812, "grad_norm": 0.2348790019750595, "learning_rate": 8e-05, "loss": 1.4815, "step": 5627 }, { "epoch": 0.768117920021837, "grad_norm": 0.2361660897731781, "learning_rate": 8e-05, "loss": 1.4908, "step": 5628 }, { "epoch": 0.7682544015285929, "grad_norm": 0.2330189347267151, "learning_rate": 8e-05, "loss": 1.4654, "step": 5629 }, { "epoch": 0.7683908830353487, "grad_norm": 0.23572468757629395, "learning_rate": 8e-05, "loss": 1.4528, "step": 5630 }, { "epoch": 0.7685273645421046, "grad_norm": 0.238274484872818, "learning_rate": 8e-05, "loss": 1.4934, "step": 5631 }, { "epoch": 0.7686638460488604, "grad_norm": 0.23053410649299622, "learning_rate": 8e-05, "loss": 1.413, "step": 5632 }, { "epoch": 0.7688003275556162, "grad_norm": 0.23708906769752502, "learning_rate": 8e-05, "loss": 1.4098, "step": 5633 }, { "epoch": 0.768936809062372, "grad_norm": 0.2415619194507599, "learning_rate": 8e-05, "loss": 1.364, "step": 5634 }, { "epoch": 0.7690732905691279, "grad_norm": 0.24315476417541504, "learning_rate": 8e-05, "loss": 1.4573, "step": 5635 }, { "epoch": 0.7692097720758837, "grad_norm": 0.22907719016075134, "learning_rate": 8e-05, "loss": 1.4112, "step": 5636 }, { "epoch": 0.7693462535826395, "grad_norm": 0.2439599186182022, "learning_rate": 8e-05, "loss": 1.4219, "step": 5637 }, { "epoch": 0.7694827350893954, "grad_norm": 0.23360145092010498, "learning_rate": 8e-05, "loss": 1.4618, "step": 5638 }, { "epoch": 0.7696192165961512, "grad_norm": 0.2358836531639099, "learning_rate": 8e-05, "loss": 1.4917, "step": 5639 }, { "epoch": 0.769755698102907, "grad_norm": 0.22917979955673218, "learning_rate": 8e-05, "loss": 1.5067, "step": 5640 }, { "epoch": 0.7698921796096629, "grad_norm": 0.22038787603378296, "learning_rate": 8e-05, "loss": 1.4085, "step": 5641 }, { "epoch": 0.7700286611164188, "grad_norm": 0.23641133308410645, "learning_rate": 8e-05, "loss": 1.4813, "step": 5642 }, { "epoch": 0.7701651426231746, "grad_norm": 0.2468692809343338, "learning_rate": 8e-05, "loss": 1.474, "step": 5643 }, { "epoch": 0.7703016241299304, "grad_norm": 0.23734407126903534, "learning_rate": 8e-05, "loss": 1.4122, "step": 5644 }, { "epoch": 0.7704381056366862, "grad_norm": 0.24895983934402466, "learning_rate": 8e-05, "loss": 1.4075, "step": 5645 }, { "epoch": 0.7705745871434421, "grad_norm": 0.23009754717350006, "learning_rate": 8e-05, "loss": 1.3741, "step": 5646 }, { "epoch": 0.7707110686501979, "grad_norm": 0.23996686935424805, "learning_rate": 8e-05, "loss": 1.4986, "step": 5647 }, { "epoch": 0.7708475501569537, "grad_norm": 0.23327364027500153, "learning_rate": 8e-05, "loss": 1.4329, "step": 5648 }, { "epoch": 0.7709840316637095, "grad_norm": 0.23154279589653015, "learning_rate": 8e-05, "loss": 1.4216, "step": 5649 }, { "epoch": 0.7711205131704654, "grad_norm": 0.2311466485261917, "learning_rate": 8e-05, "loss": 1.4872, "step": 5650 }, { "epoch": 0.7712569946772212, "grad_norm": 0.2321779429912567, "learning_rate": 8e-05, "loss": 1.439, "step": 5651 }, { "epoch": 0.771393476183977, "grad_norm": 0.23593227565288544, "learning_rate": 8e-05, "loss": 1.4213, "step": 5652 }, { "epoch": 0.771529957690733, "grad_norm": 0.23477531969547272, "learning_rate": 8e-05, "loss": 1.4634, "step": 5653 }, { "epoch": 0.7716664391974888, "grad_norm": 0.2579036355018616, "learning_rate": 8e-05, "loss": 1.4445, "step": 5654 }, { "epoch": 0.7718029207042446, "grad_norm": 0.23362836241722107, "learning_rate": 8e-05, "loss": 1.418, "step": 5655 }, { "epoch": 0.7719394022110004, "grad_norm": 0.24268262088298798, "learning_rate": 8e-05, "loss": 1.4537, "step": 5656 }, { "epoch": 0.7720758837177563, "grad_norm": 0.2433621734380722, "learning_rate": 8e-05, "loss": 1.4536, "step": 5657 }, { "epoch": 0.7722123652245121, "grad_norm": 0.22754450142383575, "learning_rate": 8e-05, "loss": 1.4487, "step": 5658 }, { "epoch": 0.7723488467312679, "grad_norm": 0.24308529496192932, "learning_rate": 8e-05, "loss": 1.4534, "step": 5659 }, { "epoch": 0.7724853282380237, "grad_norm": 0.2411012500524521, "learning_rate": 8e-05, "loss": 1.4678, "step": 5660 }, { "epoch": 0.7726218097447796, "grad_norm": 0.24843473732471466, "learning_rate": 8e-05, "loss": 1.4702, "step": 5661 }, { "epoch": 0.7727582912515354, "grad_norm": 0.24544332921504974, "learning_rate": 8e-05, "loss": 1.4372, "step": 5662 }, { "epoch": 0.7728947727582912, "grad_norm": 0.22988121211528778, "learning_rate": 8e-05, "loss": 1.421, "step": 5663 }, { "epoch": 0.773031254265047, "grad_norm": 0.23389500379562378, "learning_rate": 8e-05, "loss": 1.3946, "step": 5664 }, { "epoch": 0.7731677357718029, "grad_norm": 0.24111759662628174, "learning_rate": 8e-05, "loss": 1.4396, "step": 5665 }, { "epoch": 0.7733042172785588, "grad_norm": 0.2369510531425476, "learning_rate": 8e-05, "loss": 1.4239, "step": 5666 }, { "epoch": 0.7734406987853146, "grad_norm": 0.237503319978714, "learning_rate": 8e-05, "loss": 1.436, "step": 5667 }, { "epoch": 0.7735771802920705, "grad_norm": 0.24653634428977966, "learning_rate": 8e-05, "loss": 1.4855, "step": 5668 }, { "epoch": 0.7737136617988263, "grad_norm": 0.23189572989940643, "learning_rate": 8e-05, "loss": 1.4411, "step": 5669 }, { "epoch": 0.7738501433055821, "grad_norm": 0.22882738709449768, "learning_rate": 8e-05, "loss": 1.4562, "step": 5670 }, { "epoch": 0.7739866248123379, "grad_norm": 0.23600970208644867, "learning_rate": 8e-05, "loss": 1.4833, "step": 5671 }, { "epoch": 0.7741231063190938, "grad_norm": 0.2194918394088745, "learning_rate": 8e-05, "loss": 1.3665, "step": 5672 }, { "epoch": 0.7742595878258496, "grad_norm": 0.2363158017396927, "learning_rate": 8e-05, "loss": 1.4552, "step": 5673 }, { "epoch": 0.7743960693326054, "grad_norm": 0.2377772182226181, "learning_rate": 8e-05, "loss": 1.4212, "step": 5674 }, { "epoch": 0.7745325508393612, "grad_norm": 0.24195361137390137, "learning_rate": 8e-05, "loss": 1.5101, "step": 5675 }, { "epoch": 0.7746690323461171, "grad_norm": 0.23967291414737701, "learning_rate": 8e-05, "loss": 1.4892, "step": 5676 }, { "epoch": 0.7748055138528729, "grad_norm": 0.24107171595096588, "learning_rate": 8e-05, "loss": 1.4645, "step": 5677 }, { "epoch": 0.7749419953596288, "grad_norm": 0.23616956174373627, "learning_rate": 8e-05, "loss": 1.4338, "step": 5678 }, { "epoch": 0.7750784768663846, "grad_norm": 0.24143461883068085, "learning_rate": 8e-05, "loss": 1.505, "step": 5679 }, { "epoch": 0.7752149583731405, "grad_norm": 0.22714421153068542, "learning_rate": 8e-05, "loss": 1.4414, "step": 5680 }, { "epoch": 0.7753514398798963, "grad_norm": 0.232874795794487, "learning_rate": 8e-05, "loss": 1.4379, "step": 5681 }, { "epoch": 0.7754879213866521, "grad_norm": 0.24673987925052643, "learning_rate": 8e-05, "loss": 1.4433, "step": 5682 }, { "epoch": 0.775624402893408, "grad_norm": 0.23305775225162506, "learning_rate": 8e-05, "loss": 1.4677, "step": 5683 }, { "epoch": 0.7757608844001638, "grad_norm": 0.22705726325511932, "learning_rate": 8e-05, "loss": 1.4363, "step": 5684 }, { "epoch": 0.7758973659069196, "grad_norm": 0.23369674384593964, "learning_rate": 8e-05, "loss": 1.398, "step": 5685 }, { "epoch": 0.7760338474136754, "grad_norm": 0.23789799213409424, "learning_rate": 8e-05, "loss": 1.4561, "step": 5686 }, { "epoch": 0.7761703289204313, "grad_norm": 0.2333407700061798, "learning_rate": 8e-05, "loss": 1.393, "step": 5687 }, { "epoch": 0.7763068104271871, "grad_norm": 0.2326601892709732, "learning_rate": 8e-05, "loss": 1.3852, "step": 5688 }, { "epoch": 0.7764432919339429, "grad_norm": 0.23092429339885712, "learning_rate": 8e-05, "loss": 1.4325, "step": 5689 }, { "epoch": 0.7765797734406988, "grad_norm": 0.24055393040180206, "learning_rate": 8e-05, "loss": 1.4251, "step": 5690 }, { "epoch": 0.7767162549474547, "grad_norm": 0.23387575149536133, "learning_rate": 8e-05, "loss": 1.4462, "step": 5691 }, { "epoch": 0.7768527364542105, "grad_norm": 0.2318478226661682, "learning_rate": 8e-05, "loss": 1.5058, "step": 5692 }, { "epoch": 0.7769892179609663, "grad_norm": 0.23904550075531006, "learning_rate": 8e-05, "loss": 1.5472, "step": 5693 }, { "epoch": 0.7771256994677221, "grad_norm": 0.23743632435798645, "learning_rate": 8e-05, "loss": 1.3661, "step": 5694 }, { "epoch": 0.777262180974478, "grad_norm": 0.23476843535900116, "learning_rate": 8e-05, "loss": 1.4702, "step": 5695 }, { "epoch": 0.7773986624812338, "grad_norm": 0.2407900094985962, "learning_rate": 8e-05, "loss": 1.4776, "step": 5696 }, { "epoch": 0.7775351439879896, "grad_norm": 0.2450813502073288, "learning_rate": 8e-05, "loss": 1.4641, "step": 5697 }, { "epoch": 0.7776716254947454, "grad_norm": 0.24269409477710724, "learning_rate": 8e-05, "loss": 1.4379, "step": 5698 }, { "epoch": 0.7778081070015013, "grad_norm": 0.23397736251354218, "learning_rate": 8e-05, "loss": 1.4238, "step": 5699 }, { "epoch": 0.7779445885082571, "grad_norm": 0.24252445995807648, "learning_rate": 8e-05, "loss": 1.4749, "step": 5700 }, { "epoch": 0.7780810700150129, "grad_norm": 0.23396548628807068, "learning_rate": 8e-05, "loss": 1.4336, "step": 5701 }, { "epoch": 0.7782175515217687, "grad_norm": 0.23370850086212158, "learning_rate": 8e-05, "loss": 1.4516, "step": 5702 }, { "epoch": 0.7783540330285247, "grad_norm": 0.23984333872795105, "learning_rate": 8e-05, "loss": 1.4726, "step": 5703 }, { "epoch": 0.7784905145352805, "grad_norm": 0.24317516386508942, "learning_rate": 8e-05, "loss": 1.4991, "step": 5704 }, { "epoch": 0.7786269960420363, "grad_norm": 0.2452613115310669, "learning_rate": 8e-05, "loss": 1.4498, "step": 5705 }, { "epoch": 0.7787634775487922, "grad_norm": 0.23710697889328003, "learning_rate": 8e-05, "loss": 1.3645, "step": 5706 }, { "epoch": 0.778899959055548, "grad_norm": 0.22427794337272644, "learning_rate": 8e-05, "loss": 1.4195, "step": 5707 }, { "epoch": 0.7790364405623038, "grad_norm": 0.23783913254737854, "learning_rate": 8e-05, "loss": 1.3569, "step": 5708 }, { "epoch": 0.7791729220690596, "grad_norm": 0.25410500168800354, "learning_rate": 8e-05, "loss": 1.3909, "step": 5709 }, { "epoch": 0.7793094035758155, "grad_norm": 0.24704043567180634, "learning_rate": 8e-05, "loss": 1.4314, "step": 5710 }, { "epoch": 0.7794458850825713, "grad_norm": 0.24660030007362366, "learning_rate": 8e-05, "loss": 1.4998, "step": 5711 }, { "epoch": 0.7795823665893271, "grad_norm": 0.23867273330688477, "learning_rate": 8e-05, "loss": 1.4869, "step": 5712 }, { "epoch": 0.7797188480960829, "grad_norm": 0.23602385818958282, "learning_rate": 8e-05, "loss": 1.4554, "step": 5713 }, { "epoch": 0.7798553296028388, "grad_norm": 0.23208419978618622, "learning_rate": 8e-05, "loss": 1.4558, "step": 5714 }, { "epoch": 0.7799918111095947, "grad_norm": 0.23959524929523468, "learning_rate": 8e-05, "loss": 1.4352, "step": 5715 }, { "epoch": 0.7801282926163505, "grad_norm": 0.24005185067653656, "learning_rate": 8e-05, "loss": 1.3903, "step": 5716 }, { "epoch": 0.7802647741231064, "grad_norm": 0.23749057948589325, "learning_rate": 8e-05, "loss": 1.4379, "step": 5717 }, { "epoch": 0.7804012556298622, "grad_norm": 0.23761336505413055, "learning_rate": 8e-05, "loss": 1.3876, "step": 5718 }, { "epoch": 0.780537737136618, "grad_norm": 0.2384224534034729, "learning_rate": 8e-05, "loss": 1.5117, "step": 5719 }, { "epoch": 0.7806742186433738, "grad_norm": 0.23048044741153717, "learning_rate": 8e-05, "loss": 1.3681, "step": 5720 }, { "epoch": 0.7808107001501297, "grad_norm": 0.24642930924892426, "learning_rate": 8e-05, "loss": 1.4614, "step": 5721 }, { "epoch": 0.7809471816568855, "grad_norm": 0.23711124062538147, "learning_rate": 8e-05, "loss": 1.4458, "step": 5722 }, { "epoch": 0.7810836631636413, "grad_norm": 0.2385333627462387, "learning_rate": 8e-05, "loss": 1.4467, "step": 5723 }, { "epoch": 0.7812201446703971, "grad_norm": 0.2434920370578766, "learning_rate": 8e-05, "loss": 1.4035, "step": 5724 }, { "epoch": 0.781356626177153, "grad_norm": 0.22612306475639343, "learning_rate": 8e-05, "loss": 1.4451, "step": 5725 }, { "epoch": 0.7814931076839088, "grad_norm": 0.23159319162368774, "learning_rate": 8e-05, "loss": 1.3625, "step": 5726 }, { "epoch": 0.7816295891906647, "grad_norm": 0.25096338987350464, "learning_rate": 8e-05, "loss": 1.4767, "step": 5727 }, { "epoch": 0.7817660706974205, "grad_norm": 0.245771586894989, "learning_rate": 8e-05, "loss": 1.4348, "step": 5728 }, { "epoch": 0.7819025522041764, "grad_norm": 0.25852423906326294, "learning_rate": 8e-05, "loss": 1.3989, "step": 5729 }, { "epoch": 0.7820390337109322, "grad_norm": 0.23343418538570404, "learning_rate": 8e-05, "loss": 1.4398, "step": 5730 }, { "epoch": 0.782175515217688, "grad_norm": 0.23580682277679443, "learning_rate": 8e-05, "loss": 1.4248, "step": 5731 }, { "epoch": 0.7823119967244438, "grad_norm": 0.24060769379138947, "learning_rate": 8e-05, "loss": 1.357, "step": 5732 }, { "epoch": 0.7824484782311997, "grad_norm": 0.2467980533838272, "learning_rate": 8e-05, "loss": 1.4721, "step": 5733 }, { "epoch": 0.7825849597379555, "grad_norm": 0.23783157765865326, "learning_rate": 8e-05, "loss": 1.4425, "step": 5734 }, { "epoch": 0.7827214412447113, "grad_norm": 0.2356017678976059, "learning_rate": 8e-05, "loss": 1.3779, "step": 5735 }, { "epoch": 0.7828579227514672, "grad_norm": 0.23383989930152893, "learning_rate": 8e-05, "loss": 1.4456, "step": 5736 }, { "epoch": 0.782994404258223, "grad_norm": 0.2379591017961502, "learning_rate": 8e-05, "loss": 1.4702, "step": 5737 }, { "epoch": 0.7831308857649788, "grad_norm": 0.23061588406562805, "learning_rate": 8e-05, "loss": 1.4843, "step": 5738 }, { "epoch": 0.7832673672717346, "grad_norm": 0.2344280630350113, "learning_rate": 8e-05, "loss": 1.4278, "step": 5739 }, { "epoch": 0.7834038487784906, "grad_norm": 0.2349577397108078, "learning_rate": 8e-05, "loss": 1.4269, "step": 5740 }, { "epoch": 0.7835403302852464, "grad_norm": 0.23601274192333221, "learning_rate": 8e-05, "loss": 1.4177, "step": 5741 }, { "epoch": 0.7836768117920022, "grad_norm": 0.2434464991092682, "learning_rate": 8e-05, "loss": 1.4496, "step": 5742 }, { "epoch": 0.783813293298758, "grad_norm": 0.2316821664571762, "learning_rate": 8e-05, "loss": 1.4508, "step": 5743 }, { "epoch": 0.7839497748055139, "grad_norm": 0.22899247705936432, "learning_rate": 8e-05, "loss": 1.4463, "step": 5744 }, { "epoch": 0.7840862563122697, "grad_norm": 0.24308471381664276, "learning_rate": 8e-05, "loss": 1.5011, "step": 5745 }, { "epoch": 0.7842227378190255, "grad_norm": 0.23294997215270996, "learning_rate": 8e-05, "loss": 1.4024, "step": 5746 }, { "epoch": 0.7843592193257813, "grad_norm": 0.2277994304895401, "learning_rate": 8e-05, "loss": 1.4456, "step": 5747 }, { "epoch": 0.7844957008325372, "grad_norm": 0.23825617134571075, "learning_rate": 8e-05, "loss": 1.3822, "step": 5748 }, { "epoch": 0.784632182339293, "grad_norm": 0.24636642634868622, "learning_rate": 8e-05, "loss": 1.4211, "step": 5749 }, { "epoch": 0.7847686638460488, "grad_norm": 0.2409563660621643, "learning_rate": 8e-05, "loss": 1.4095, "step": 5750 }, { "epoch": 0.7849051453528046, "grad_norm": 0.2474474310874939, "learning_rate": 8e-05, "loss": 1.4486, "step": 5751 }, { "epoch": 0.7850416268595606, "grad_norm": 0.24080826342105865, "learning_rate": 8e-05, "loss": 1.4531, "step": 5752 }, { "epoch": 0.7851781083663164, "grad_norm": 0.2336939126253128, "learning_rate": 8e-05, "loss": 1.4528, "step": 5753 }, { "epoch": 0.7853145898730722, "grad_norm": 0.23342564702033997, "learning_rate": 8e-05, "loss": 1.4825, "step": 5754 }, { "epoch": 0.7854510713798281, "grad_norm": 0.22775305807590485, "learning_rate": 8e-05, "loss": 1.3998, "step": 5755 }, { "epoch": 0.7855875528865839, "grad_norm": 0.2438337206840515, "learning_rate": 8e-05, "loss": 1.5008, "step": 5756 }, { "epoch": 0.7857240343933397, "grad_norm": 0.2393786758184433, "learning_rate": 8e-05, "loss": 1.4594, "step": 5757 }, { "epoch": 0.7858605159000955, "grad_norm": 0.23313851654529572, "learning_rate": 8e-05, "loss": 1.3964, "step": 5758 }, { "epoch": 0.7859969974068514, "grad_norm": 0.23362167179584503, "learning_rate": 8e-05, "loss": 1.4476, "step": 5759 }, { "epoch": 0.7861334789136072, "grad_norm": 0.24982048571109772, "learning_rate": 8e-05, "loss": 1.4717, "step": 5760 }, { "epoch": 0.786269960420363, "grad_norm": 0.24634385108947754, "learning_rate": 8e-05, "loss": 1.425, "step": 5761 }, { "epoch": 0.7864064419271188, "grad_norm": 0.24184654653072357, "learning_rate": 8e-05, "loss": 1.4611, "step": 5762 }, { "epoch": 0.7865429234338747, "grad_norm": 0.22984904050827026, "learning_rate": 8e-05, "loss": 1.4161, "step": 5763 }, { "epoch": 0.7866794049406306, "grad_norm": 0.2341347485780716, "learning_rate": 8e-05, "loss": 1.4024, "step": 5764 }, { "epoch": 0.7868158864473864, "grad_norm": 0.22549642622470856, "learning_rate": 8e-05, "loss": 1.3682, "step": 5765 }, { "epoch": 0.7869523679541423, "grad_norm": 0.2493850290775299, "learning_rate": 8e-05, "loss": 1.4396, "step": 5766 }, { "epoch": 0.7870888494608981, "grad_norm": 0.22926899790763855, "learning_rate": 8e-05, "loss": 1.3776, "step": 5767 }, { "epoch": 0.7872253309676539, "grad_norm": 0.2457343190908432, "learning_rate": 8e-05, "loss": 1.4704, "step": 5768 }, { "epoch": 0.7873618124744097, "grad_norm": 0.2367512583732605, "learning_rate": 8e-05, "loss": 1.3657, "step": 5769 }, { "epoch": 0.7874982939811656, "grad_norm": 0.23465313017368317, "learning_rate": 8e-05, "loss": 1.4309, "step": 5770 }, { "epoch": 0.7876347754879214, "grad_norm": 0.2475089132785797, "learning_rate": 8e-05, "loss": 1.5076, "step": 5771 }, { "epoch": 0.7877712569946772, "grad_norm": 0.2477979063987732, "learning_rate": 8e-05, "loss": 1.5706, "step": 5772 }, { "epoch": 0.787907738501433, "grad_norm": 0.22576430439949036, "learning_rate": 8e-05, "loss": 1.4507, "step": 5773 }, { "epoch": 0.7880442200081889, "grad_norm": 0.23471754789352417, "learning_rate": 8e-05, "loss": 1.4922, "step": 5774 }, { "epoch": 0.7881807015149447, "grad_norm": 0.24101491272449493, "learning_rate": 8e-05, "loss": 1.5019, "step": 5775 }, { "epoch": 0.7883171830217005, "grad_norm": 0.24841561913490295, "learning_rate": 8e-05, "loss": 1.4695, "step": 5776 }, { "epoch": 0.7884536645284564, "grad_norm": 0.25206390023231506, "learning_rate": 8e-05, "loss": 1.4677, "step": 5777 }, { "epoch": 0.7885901460352123, "grad_norm": 0.26844990253448486, "learning_rate": 8e-05, "loss": 1.5151, "step": 5778 }, { "epoch": 0.7887266275419681, "grad_norm": 0.22879324853420258, "learning_rate": 8e-05, "loss": 1.37, "step": 5779 }, { "epoch": 0.7888631090487239, "grad_norm": 0.23788219690322876, "learning_rate": 8e-05, "loss": 1.4148, "step": 5780 }, { "epoch": 0.7889995905554797, "grad_norm": 0.2271324247121811, "learning_rate": 8e-05, "loss": 1.4103, "step": 5781 }, { "epoch": 0.7891360720622356, "grad_norm": 0.24694578349590302, "learning_rate": 8e-05, "loss": 1.4345, "step": 5782 }, { "epoch": 0.7892725535689914, "grad_norm": 0.23529550433158875, "learning_rate": 8e-05, "loss": 1.4663, "step": 5783 }, { "epoch": 0.7894090350757472, "grad_norm": 0.24664567410945892, "learning_rate": 8e-05, "loss": 1.4482, "step": 5784 }, { "epoch": 0.789545516582503, "grad_norm": 0.2197350710630417, "learning_rate": 8e-05, "loss": 1.3375, "step": 5785 }, { "epoch": 0.7896819980892589, "grad_norm": 0.23956067860126495, "learning_rate": 8e-05, "loss": 1.4265, "step": 5786 }, { "epoch": 0.7898184795960147, "grad_norm": 0.23393844068050385, "learning_rate": 8e-05, "loss": 1.4376, "step": 5787 }, { "epoch": 0.7899549611027705, "grad_norm": 0.23544368147850037, "learning_rate": 8e-05, "loss": 1.4456, "step": 5788 }, { "epoch": 0.7900914426095265, "grad_norm": 0.2364151030778885, "learning_rate": 8e-05, "loss": 1.4842, "step": 5789 }, { "epoch": 0.7902279241162823, "grad_norm": 0.23336894810199738, "learning_rate": 8e-05, "loss": 1.4068, "step": 5790 }, { "epoch": 0.7903644056230381, "grad_norm": 0.23475079238414764, "learning_rate": 8e-05, "loss": 1.4086, "step": 5791 }, { "epoch": 0.7905008871297939, "grad_norm": 0.24072013795375824, "learning_rate": 8e-05, "loss": 1.4448, "step": 5792 }, { "epoch": 0.7906373686365498, "grad_norm": 0.24194838106632233, "learning_rate": 8e-05, "loss": 1.4594, "step": 5793 }, { "epoch": 0.7907738501433056, "grad_norm": 0.2304919958114624, "learning_rate": 8e-05, "loss": 1.432, "step": 5794 }, { "epoch": 0.7909103316500614, "grad_norm": 0.23333878815174103, "learning_rate": 8e-05, "loss": 1.4229, "step": 5795 }, { "epoch": 0.7910468131568172, "grad_norm": 0.22615814208984375, "learning_rate": 8e-05, "loss": 1.4414, "step": 5796 }, { "epoch": 0.7911832946635731, "grad_norm": 0.23355723917484283, "learning_rate": 8e-05, "loss": 1.4421, "step": 5797 }, { "epoch": 0.7913197761703289, "grad_norm": 0.25367406010627747, "learning_rate": 8e-05, "loss": 1.4863, "step": 5798 }, { "epoch": 0.7914562576770847, "grad_norm": 0.23473483324050903, "learning_rate": 8e-05, "loss": 1.4234, "step": 5799 }, { "epoch": 0.7915927391838405, "grad_norm": 0.23245465755462646, "learning_rate": 8e-05, "loss": 1.4088, "step": 5800 }, { "epoch": 0.7917292206905964, "grad_norm": 0.23365731537342072, "learning_rate": 8e-05, "loss": 1.4332, "step": 5801 }, { "epoch": 0.7918657021973523, "grad_norm": 0.24205321073532104, "learning_rate": 8e-05, "loss": 1.4949, "step": 5802 }, { "epoch": 0.7920021837041081, "grad_norm": 0.2506902515888214, "learning_rate": 8e-05, "loss": 1.5303, "step": 5803 }, { "epoch": 0.792138665210864, "grad_norm": 0.2402496486902237, "learning_rate": 8e-05, "loss": 1.4584, "step": 5804 }, { "epoch": 0.7922751467176198, "grad_norm": 0.240568608045578, "learning_rate": 8e-05, "loss": 1.4864, "step": 5805 }, { "epoch": 0.7924116282243756, "grad_norm": 0.2441534847021103, "learning_rate": 8e-05, "loss": 1.4719, "step": 5806 }, { "epoch": 0.7925481097311314, "grad_norm": 0.2453349232673645, "learning_rate": 8e-05, "loss": 1.5038, "step": 5807 }, { "epoch": 0.7926845912378873, "grad_norm": 0.22573626041412354, "learning_rate": 8e-05, "loss": 1.3912, "step": 5808 }, { "epoch": 0.7928210727446431, "grad_norm": 0.24056889116764069, "learning_rate": 8e-05, "loss": 1.4893, "step": 5809 }, { "epoch": 0.7929575542513989, "grad_norm": 0.24801048636436462, "learning_rate": 8e-05, "loss": 1.4677, "step": 5810 }, { "epoch": 0.7930940357581547, "grad_norm": 0.23811832070350647, "learning_rate": 8e-05, "loss": 1.3905, "step": 5811 }, { "epoch": 0.7932305172649106, "grad_norm": 0.23620720207691193, "learning_rate": 8e-05, "loss": 1.4383, "step": 5812 }, { "epoch": 0.7933669987716664, "grad_norm": 0.22912539541721344, "learning_rate": 8e-05, "loss": 1.4018, "step": 5813 }, { "epoch": 0.7935034802784223, "grad_norm": 0.24289178848266602, "learning_rate": 8e-05, "loss": 1.4613, "step": 5814 }, { "epoch": 0.7936399617851781, "grad_norm": 0.2459285706281662, "learning_rate": 8e-05, "loss": 1.4901, "step": 5815 }, { "epoch": 0.793776443291934, "grad_norm": 0.239617258310318, "learning_rate": 8e-05, "loss": 1.4626, "step": 5816 }, { "epoch": 0.7939129247986898, "grad_norm": 0.24111562967300415, "learning_rate": 8e-05, "loss": 1.4356, "step": 5817 }, { "epoch": 0.7940494063054456, "grad_norm": 0.23637214303016663, "learning_rate": 8e-05, "loss": 1.4121, "step": 5818 }, { "epoch": 0.7941858878122015, "grad_norm": 0.24557903409004211, "learning_rate": 8e-05, "loss": 1.4186, "step": 5819 }, { "epoch": 0.7943223693189573, "grad_norm": 0.24201151728630066, "learning_rate": 8e-05, "loss": 1.477, "step": 5820 }, { "epoch": 0.7944588508257131, "grad_norm": 0.23630541563034058, "learning_rate": 8e-05, "loss": 1.3717, "step": 5821 }, { "epoch": 0.7945953323324689, "grad_norm": 0.23564426600933075, "learning_rate": 8e-05, "loss": 1.4142, "step": 5822 }, { "epoch": 0.7947318138392248, "grad_norm": 0.24715179204940796, "learning_rate": 8e-05, "loss": 1.4402, "step": 5823 }, { "epoch": 0.7948682953459806, "grad_norm": 0.24091872572898865, "learning_rate": 8e-05, "loss": 1.4807, "step": 5824 }, { "epoch": 0.7950047768527364, "grad_norm": 0.2430531084537506, "learning_rate": 8e-05, "loss": 1.4111, "step": 5825 }, { "epoch": 0.7951412583594923, "grad_norm": 0.2375899702310562, "learning_rate": 8e-05, "loss": 1.4413, "step": 5826 }, { "epoch": 0.7952777398662482, "grad_norm": 0.25006020069122314, "learning_rate": 8e-05, "loss": 1.4227, "step": 5827 }, { "epoch": 0.795414221373004, "grad_norm": 0.23300251364707947, "learning_rate": 8e-05, "loss": 1.427, "step": 5828 }, { "epoch": 0.7955507028797598, "grad_norm": 0.23727747797966003, "learning_rate": 8e-05, "loss": 1.4795, "step": 5829 }, { "epoch": 0.7956871843865156, "grad_norm": 0.22807112336158752, "learning_rate": 8e-05, "loss": 1.3708, "step": 5830 }, { "epoch": 0.7958236658932715, "grad_norm": 0.22905097901821136, "learning_rate": 8e-05, "loss": 1.4038, "step": 5831 }, { "epoch": 0.7959601474000273, "grad_norm": 0.2557128071784973, "learning_rate": 8e-05, "loss": 1.4473, "step": 5832 }, { "epoch": 0.7960966289067831, "grad_norm": 0.23994258046150208, "learning_rate": 8e-05, "loss": 1.3798, "step": 5833 }, { "epoch": 0.796233110413539, "grad_norm": 0.24385660886764526, "learning_rate": 8e-05, "loss": 1.4311, "step": 5834 }, { "epoch": 0.7963695919202948, "grad_norm": 0.23541179299354553, "learning_rate": 8e-05, "loss": 1.392, "step": 5835 }, { "epoch": 0.7965060734270506, "grad_norm": 0.248274564743042, "learning_rate": 8e-05, "loss": 1.4952, "step": 5836 }, { "epoch": 0.7966425549338064, "grad_norm": 0.23888981342315674, "learning_rate": 8e-05, "loss": 1.4273, "step": 5837 }, { "epoch": 0.7967790364405622, "grad_norm": 0.24138200283050537, "learning_rate": 8e-05, "loss": 1.4854, "step": 5838 }, { "epoch": 0.7969155179473182, "grad_norm": 0.2429000735282898, "learning_rate": 8e-05, "loss": 1.5035, "step": 5839 }, { "epoch": 0.797051999454074, "grad_norm": 0.23733998835086823, "learning_rate": 8e-05, "loss": 1.4668, "step": 5840 }, { "epoch": 0.7971884809608298, "grad_norm": 0.23367184400558472, "learning_rate": 8e-05, "loss": 1.4765, "step": 5841 }, { "epoch": 0.7973249624675857, "grad_norm": 0.22999729216098785, "learning_rate": 8e-05, "loss": 1.412, "step": 5842 }, { "epoch": 0.7974614439743415, "grad_norm": 0.22672493755817413, "learning_rate": 8e-05, "loss": 1.3822, "step": 5843 }, { "epoch": 0.7975979254810973, "grad_norm": 0.23378343880176544, "learning_rate": 8e-05, "loss": 1.3647, "step": 5844 }, { "epoch": 0.7977344069878531, "grad_norm": 0.24148738384246826, "learning_rate": 8e-05, "loss": 1.4245, "step": 5845 }, { "epoch": 0.797870888494609, "grad_norm": 0.2250978648662567, "learning_rate": 8e-05, "loss": 1.3769, "step": 5846 }, { "epoch": 0.7980073700013648, "grad_norm": 0.2321162074804306, "learning_rate": 8e-05, "loss": 1.403, "step": 5847 }, { "epoch": 0.7981438515081206, "grad_norm": 0.24674364924430847, "learning_rate": 8e-05, "loss": 1.4469, "step": 5848 }, { "epoch": 0.7982803330148764, "grad_norm": 0.24724207818508148, "learning_rate": 8e-05, "loss": 1.4511, "step": 5849 }, { "epoch": 0.7984168145216323, "grad_norm": 0.23696810007095337, "learning_rate": 8e-05, "loss": 1.4158, "step": 5850 }, { "epoch": 0.7985532960283882, "grad_norm": 0.2390514463186264, "learning_rate": 8e-05, "loss": 1.5139, "step": 5851 }, { "epoch": 0.798689777535144, "grad_norm": 0.23427166044712067, "learning_rate": 8e-05, "loss": 1.3764, "step": 5852 }, { "epoch": 0.7988262590418999, "grad_norm": 0.23149864375591278, "learning_rate": 8e-05, "loss": 1.4326, "step": 5853 }, { "epoch": 0.7989627405486557, "grad_norm": 0.23544394969940186, "learning_rate": 8e-05, "loss": 1.5142, "step": 5854 }, { "epoch": 0.7990992220554115, "grad_norm": 0.2442428022623062, "learning_rate": 8e-05, "loss": 1.4347, "step": 5855 }, { "epoch": 0.7992357035621673, "grad_norm": 0.24647310376167297, "learning_rate": 8e-05, "loss": 1.4717, "step": 5856 }, { "epoch": 0.7993721850689232, "grad_norm": 0.23924070596694946, "learning_rate": 8e-05, "loss": 1.4641, "step": 5857 }, { "epoch": 0.799508666575679, "grad_norm": 0.236973837018013, "learning_rate": 8e-05, "loss": 1.3933, "step": 5858 }, { "epoch": 0.7996451480824348, "grad_norm": 0.25349777936935425, "learning_rate": 8e-05, "loss": 1.4755, "step": 5859 }, { "epoch": 0.7997816295891906, "grad_norm": 0.2330014854669571, "learning_rate": 8e-05, "loss": 1.486, "step": 5860 }, { "epoch": 0.7999181110959465, "grad_norm": 0.24783207476139069, "learning_rate": 8e-05, "loss": 1.4333, "step": 5861 }, { "epoch": 0.8000545926027023, "grad_norm": 0.2516767680644989, "learning_rate": 8e-05, "loss": 1.4884, "step": 5862 }, { "epoch": 0.8001910741094582, "grad_norm": 0.2340412139892578, "learning_rate": 8e-05, "loss": 1.4092, "step": 5863 }, { "epoch": 0.800327555616214, "grad_norm": 0.24123387038707733, "learning_rate": 8e-05, "loss": 1.3885, "step": 5864 }, { "epoch": 0.8004640371229699, "grad_norm": 0.2422478049993515, "learning_rate": 8e-05, "loss": 1.4936, "step": 5865 }, { "epoch": 0.8006005186297257, "grad_norm": 0.2396695464849472, "learning_rate": 8e-05, "loss": 1.4435, "step": 5866 }, { "epoch": 0.8007370001364815, "grad_norm": 0.23689812421798706, "learning_rate": 8e-05, "loss": 1.4554, "step": 5867 }, { "epoch": 0.8008734816432374, "grad_norm": 0.23886382579803467, "learning_rate": 8e-05, "loss": 1.4543, "step": 5868 }, { "epoch": 0.8010099631499932, "grad_norm": 0.23431850969791412, "learning_rate": 8e-05, "loss": 1.4788, "step": 5869 }, { "epoch": 0.801146444656749, "grad_norm": 0.2402949333190918, "learning_rate": 8e-05, "loss": 1.4972, "step": 5870 }, { "epoch": 0.8012829261635048, "grad_norm": 0.25060686469078064, "learning_rate": 8e-05, "loss": 1.4427, "step": 5871 }, { "epoch": 0.8014194076702607, "grad_norm": 0.23107130825519562, "learning_rate": 8e-05, "loss": 1.389, "step": 5872 }, { "epoch": 0.8015558891770165, "grad_norm": 0.24028481543064117, "learning_rate": 8e-05, "loss": 1.5149, "step": 5873 }, { "epoch": 0.8016923706837723, "grad_norm": 0.23434941470623016, "learning_rate": 8e-05, "loss": 1.4166, "step": 5874 }, { "epoch": 0.8018288521905281, "grad_norm": 0.2351999282836914, "learning_rate": 8e-05, "loss": 1.4357, "step": 5875 }, { "epoch": 0.8019653336972841, "grad_norm": 0.2402610331773758, "learning_rate": 8e-05, "loss": 1.415, "step": 5876 }, { "epoch": 0.8021018152040399, "grad_norm": 0.23841916024684906, "learning_rate": 8e-05, "loss": 1.4514, "step": 5877 }, { "epoch": 0.8022382967107957, "grad_norm": 0.24063760042190552, "learning_rate": 8e-05, "loss": 1.4285, "step": 5878 }, { "epoch": 0.8023747782175515, "grad_norm": 0.2387036234140396, "learning_rate": 8e-05, "loss": 1.4689, "step": 5879 }, { "epoch": 0.8025112597243074, "grad_norm": 0.23176898062229156, "learning_rate": 8e-05, "loss": 1.4389, "step": 5880 }, { "epoch": 0.8026477412310632, "grad_norm": 0.2307896763086319, "learning_rate": 8e-05, "loss": 1.4082, "step": 5881 }, { "epoch": 0.802784222737819, "grad_norm": 0.2440449446439743, "learning_rate": 8e-05, "loss": 1.4287, "step": 5882 }, { "epoch": 0.8029207042445748, "grad_norm": 0.23851165175437927, "learning_rate": 8e-05, "loss": 1.4477, "step": 5883 }, { "epoch": 0.8030571857513307, "grad_norm": 0.23855364322662354, "learning_rate": 8e-05, "loss": 1.4599, "step": 5884 }, { "epoch": 0.8031936672580865, "grad_norm": 0.24413608014583588, "learning_rate": 8e-05, "loss": 1.4607, "step": 5885 }, { "epoch": 0.8033301487648423, "grad_norm": 0.23794734477996826, "learning_rate": 8e-05, "loss": 1.4959, "step": 5886 }, { "epoch": 0.8034666302715981, "grad_norm": 0.237937331199646, "learning_rate": 8e-05, "loss": 1.466, "step": 5887 }, { "epoch": 0.8036031117783541, "grad_norm": 0.2399560958147049, "learning_rate": 8e-05, "loss": 1.4419, "step": 5888 }, { "epoch": 0.8037395932851099, "grad_norm": 0.23926006257534027, "learning_rate": 8e-05, "loss": 1.3714, "step": 5889 }, { "epoch": 0.8038760747918657, "grad_norm": 0.23486462235450745, "learning_rate": 8e-05, "loss": 1.4651, "step": 5890 }, { "epoch": 0.8040125562986216, "grad_norm": 0.24520274996757507, "learning_rate": 8e-05, "loss": 1.4521, "step": 5891 }, { "epoch": 0.8041490378053774, "grad_norm": 0.25203585624694824, "learning_rate": 8e-05, "loss": 1.4801, "step": 5892 }, { "epoch": 0.8042855193121332, "grad_norm": 0.24074327945709229, "learning_rate": 8e-05, "loss": 1.4297, "step": 5893 }, { "epoch": 0.804422000818889, "grad_norm": 0.23027200996875763, "learning_rate": 8e-05, "loss": 1.4871, "step": 5894 }, { "epoch": 0.8045584823256449, "grad_norm": 0.24325014650821686, "learning_rate": 8e-05, "loss": 1.4093, "step": 5895 }, { "epoch": 0.8046949638324007, "grad_norm": 0.23581823706626892, "learning_rate": 8e-05, "loss": 1.3517, "step": 5896 }, { "epoch": 0.8048314453391565, "grad_norm": 0.2278321087360382, "learning_rate": 8e-05, "loss": 1.4118, "step": 5897 }, { "epoch": 0.8049679268459123, "grad_norm": 0.2396267056465149, "learning_rate": 8e-05, "loss": 1.457, "step": 5898 }, { "epoch": 0.8051044083526682, "grad_norm": 0.2458927184343338, "learning_rate": 8e-05, "loss": 1.4593, "step": 5899 }, { "epoch": 0.8052408898594241, "grad_norm": 0.23562055826187134, "learning_rate": 8e-05, "loss": 1.4458, "step": 5900 }, { "epoch": 0.8053773713661799, "grad_norm": 0.237859308719635, "learning_rate": 8e-05, "loss": 1.4898, "step": 5901 }, { "epoch": 0.8055138528729358, "grad_norm": 0.2368531972169876, "learning_rate": 8e-05, "loss": 1.4266, "step": 5902 }, { "epoch": 0.8056503343796916, "grad_norm": 0.23780620098114014, "learning_rate": 8e-05, "loss": 1.4279, "step": 5903 }, { "epoch": 0.8057868158864474, "grad_norm": 0.2389354705810547, "learning_rate": 8e-05, "loss": 1.4008, "step": 5904 }, { "epoch": 0.8059232973932032, "grad_norm": 0.23608151078224182, "learning_rate": 8e-05, "loss": 1.4178, "step": 5905 }, { "epoch": 0.8060597788999591, "grad_norm": 0.23239582777023315, "learning_rate": 8e-05, "loss": 1.4277, "step": 5906 }, { "epoch": 0.8061962604067149, "grad_norm": 0.2311106026172638, "learning_rate": 8e-05, "loss": 1.4384, "step": 5907 }, { "epoch": 0.8063327419134707, "grad_norm": 0.23089265823364258, "learning_rate": 8e-05, "loss": 1.4571, "step": 5908 }, { "epoch": 0.8064692234202265, "grad_norm": 0.2423102706670761, "learning_rate": 8e-05, "loss": 1.4439, "step": 5909 }, { "epoch": 0.8066057049269824, "grad_norm": 0.23792174458503723, "learning_rate": 8e-05, "loss": 1.5063, "step": 5910 }, { "epoch": 0.8067421864337382, "grad_norm": 0.237611323595047, "learning_rate": 8e-05, "loss": 1.4733, "step": 5911 }, { "epoch": 0.806878667940494, "grad_norm": 0.23351748287677765, "learning_rate": 8e-05, "loss": 1.369, "step": 5912 }, { "epoch": 0.80701514944725, "grad_norm": 0.23891650140285492, "learning_rate": 8e-05, "loss": 1.4121, "step": 5913 }, { "epoch": 0.8071516309540058, "grad_norm": 0.25928664207458496, "learning_rate": 8e-05, "loss": 1.5029, "step": 5914 }, { "epoch": 0.8072881124607616, "grad_norm": 0.2522110044956207, "learning_rate": 8e-05, "loss": 1.4541, "step": 5915 }, { "epoch": 0.8074245939675174, "grad_norm": 0.23376616835594177, "learning_rate": 8e-05, "loss": 1.4315, "step": 5916 }, { "epoch": 0.8075610754742732, "grad_norm": 0.2309272289276123, "learning_rate": 8e-05, "loss": 1.485, "step": 5917 }, { "epoch": 0.8076975569810291, "grad_norm": 0.23657318949699402, "learning_rate": 8e-05, "loss": 1.4249, "step": 5918 }, { "epoch": 0.8078340384877849, "grad_norm": 0.24033544957637787, "learning_rate": 8e-05, "loss": 1.455, "step": 5919 }, { "epoch": 0.8079705199945407, "grad_norm": 0.22799037396907806, "learning_rate": 8e-05, "loss": 1.4172, "step": 5920 }, { "epoch": 0.8081070015012966, "grad_norm": 0.23210591077804565, "learning_rate": 8e-05, "loss": 1.407, "step": 5921 }, { "epoch": 0.8082434830080524, "grad_norm": 0.24740038812160492, "learning_rate": 8e-05, "loss": 1.4427, "step": 5922 }, { "epoch": 0.8083799645148082, "grad_norm": 0.25189754366874695, "learning_rate": 8e-05, "loss": 1.4061, "step": 5923 }, { "epoch": 0.808516446021564, "grad_norm": 0.24986734986305237, "learning_rate": 8e-05, "loss": 1.4796, "step": 5924 }, { "epoch": 0.80865292752832, "grad_norm": 0.24241511523723602, "learning_rate": 8e-05, "loss": 1.4593, "step": 5925 }, { "epoch": 0.8087894090350758, "grad_norm": 0.2458614856004715, "learning_rate": 8e-05, "loss": 1.4674, "step": 5926 }, { "epoch": 0.8089258905418316, "grad_norm": 0.24609558284282684, "learning_rate": 8e-05, "loss": 1.4593, "step": 5927 }, { "epoch": 0.8090623720485874, "grad_norm": 0.22759337723255157, "learning_rate": 8e-05, "loss": 1.3743, "step": 5928 }, { "epoch": 0.8091988535553433, "grad_norm": 0.23357951641082764, "learning_rate": 8e-05, "loss": 1.4003, "step": 5929 }, { "epoch": 0.8093353350620991, "grad_norm": 0.23718619346618652, "learning_rate": 8e-05, "loss": 1.3949, "step": 5930 }, { "epoch": 0.8094718165688549, "grad_norm": 0.23792606592178345, "learning_rate": 8e-05, "loss": 1.4581, "step": 5931 }, { "epoch": 0.8096082980756107, "grad_norm": 0.23944678902626038, "learning_rate": 8e-05, "loss": 1.4094, "step": 5932 }, { "epoch": 0.8097447795823666, "grad_norm": 0.24578560888767242, "learning_rate": 8e-05, "loss": 1.4737, "step": 5933 }, { "epoch": 0.8098812610891224, "grad_norm": 0.23896247148513794, "learning_rate": 8e-05, "loss": 1.3881, "step": 5934 }, { "epoch": 0.8100177425958782, "grad_norm": 0.2452411949634552, "learning_rate": 8e-05, "loss": 1.4772, "step": 5935 }, { "epoch": 0.810154224102634, "grad_norm": 0.2374831736087799, "learning_rate": 8e-05, "loss": 1.454, "step": 5936 }, { "epoch": 0.81029070560939, "grad_norm": 0.2370942085981369, "learning_rate": 8e-05, "loss": 1.3752, "step": 5937 }, { "epoch": 0.8104271871161458, "grad_norm": 0.23482942581176758, "learning_rate": 8e-05, "loss": 1.3415, "step": 5938 }, { "epoch": 0.8105636686229016, "grad_norm": 0.23110154271125793, "learning_rate": 8e-05, "loss": 1.3819, "step": 5939 }, { "epoch": 0.8107001501296575, "grad_norm": 0.2393108457326889, "learning_rate": 8e-05, "loss": 1.4367, "step": 5940 }, { "epoch": 0.8108366316364133, "grad_norm": 0.2416023164987564, "learning_rate": 8e-05, "loss": 1.509, "step": 5941 }, { "epoch": 0.8109731131431691, "grad_norm": 0.24556922912597656, "learning_rate": 8e-05, "loss": 1.4722, "step": 5942 }, { "epoch": 0.8111095946499249, "grad_norm": 0.24086499214172363, "learning_rate": 8e-05, "loss": 1.4327, "step": 5943 }, { "epoch": 0.8112460761566808, "grad_norm": 0.23886358737945557, "learning_rate": 8e-05, "loss": 1.411, "step": 5944 }, { "epoch": 0.8113825576634366, "grad_norm": 0.2562863826751709, "learning_rate": 8e-05, "loss": 1.4752, "step": 5945 }, { "epoch": 0.8115190391701924, "grad_norm": 0.2509612739086151, "learning_rate": 8e-05, "loss": 1.5043, "step": 5946 }, { "epoch": 0.8116555206769482, "grad_norm": 0.2386801689863205, "learning_rate": 8e-05, "loss": 1.4172, "step": 5947 }, { "epoch": 0.8117920021837041, "grad_norm": 0.2328961342573166, "learning_rate": 8e-05, "loss": 1.407, "step": 5948 }, { "epoch": 0.8119284836904599, "grad_norm": 0.23096033930778503, "learning_rate": 8e-05, "loss": 1.4256, "step": 5949 }, { "epoch": 0.8120649651972158, "grad_norm": 0.2541479170322418, "learning_rate": 8e-05, "loss": 1.5027, "step": 5950 }, { "epoch": 0.8122014467039717, "grad_norm": 0.24616189301013947, "learning_rate": 8e-05, "loss": 1.4782, "step": 5951 }, { "epoch": 0.8123379282107275, "grad_norm": 0.24321970343589783, "learning_rate": 8e-05, "loss": 1.3949, "step": 5952 }, { "epoch": 0.8124744097174833, "grad_norm": 0.24209776520729065, "learning_rate": 8e-05, "loss": 1.4533, "step": 5953 }, { "epoch": 0.8126108912242391, "grad_norm": 0.24842603504657745, "learning_rate": 8e-05, "loss": 1.423, "step": 5954 }, { "epoch": 0.812747372730995, "grad_norm": 0.2409479022026062, "learning_rate": 8e-05, "loss": 1.3904, "step": 5955 }, { "epoch": 0.8128838542377508, "grad_norm": 0.2370232343673706, "learning_rate": 8e-05, "loss": 1.466, "step": 5956 }, { "epoch": 0.8130203357445066, "grad_norm": 0.23967401683330536, "learning_rate": 8e-05, "loss": 1.4501, "step": 5957 }, { "epoch": 0.8131568172512624, "grad_norm": 0.23843559622764587, "learning_rate": 8e-05, "loss": 1.4027, "step": 5958 }, { "epoch": 0.8132932987580183, "grad_norm": 0.2317013293504715, "learning_rate": 8e-05, "loss": 1.4067, "step": 5959 }, { "epoch": 0.8134297802647741, "grad_norm": 0.2434590607881546, "learning_rate": 8e-05, "loss": 1.3923, "step": 5960 }, { "epoch": 0.8135662617715299, "grad_norm": 0.22869227826595306, "learning_rate": 8e-05, "loss": 1.3462, "step": 5961 }, { "epoch": 0.8137027432782858, "grad_norm": 0.2440468966960907, "learning_rate": 8e-05, "loss": 1.392, "step": 5962 }, { "epoch": 0.8138392247850417, "grad_norm": 0.2343759834766388, "learning_rate": 8e-05, "loss": 1.3712, "step": 5963 }, { "epoch": 0.8139757062917975, "grad_norm": 0.2400575429201126, "learning_rate": 8e-05, "loss": 1.4022, "step": 5964 }, { "epoch": 0.8141121877985533, "grad_norm": 0.23770837485790253, "learning_rate": 8e-05, "loss": 1.4469, "step": 5965 }, { "epoch": 0.8142486693053091, "grad_norm": 0.23036539554595947, "learning_rate": 8e-05, "loss": 1.4232, "step": 5966 }, { "epoch": 0.814385150812065, "grad_norm": 0.24349334836006165, "learning_rate": 8e-05, "loss": 1.4061, "step": 5967 }, { "epoch": 0.8145216323188208, "grad_norm": 0.24997101724147797, "learning_rate": 8e-05, "loss": 1.4756, "step": 5968 }, { "epoch": 0.8146581138255766, "grad_norm": 0.24676865339279175, "learning_rate": 8e-05, "loss": 1.4677, "step": 5969 }, { "epoch": 0.8147945953323324, "grad_norm": 0.2559300363063812, "learning_rate": 8e-05, "loss": 1.5051, "step": 5970 }, { "epoch": 0.8149310768390883, "grad_norm": 0.23717670142650604, "learning_rate": 8e-05, "loss": 1.4996, "step": 5971 }, { "epoch": 0.8150675583458441, "grad_norm": 0.24250823259353638, "learning_rate": 8e-05, "loss": 1.4873, "step": 5972 }, { "epoch": 0.8152040398525999, "grad_norm": 0.23389583826065063, "learning_rate": 8e-05, "loss": 1.455, "step": 5973 }, { "epoch": 0.8153405213593558, "grad_norm": 0.24449385702610016, "learning_rate": 8e-05, "loss": 1.4517, "step": 5974 }, { "epoch": 0.8154770028661117, "grad_norm": 0.2449869066476822, "learning_rate": 8e-05, "loss": 1.4597, "step": 5975 }, { "epoch": 0.8156134843728675, "grad_norm": 0.24148793518543243, "learning_rate": 8e-05, "loss": 1.4208, "step": 5976 }, { "epoch": 0.8157499658796233, "grad_norm": 0.24427126348018646, "learning_rate": 8e-05, "loss": 1.4333, "step": 5977 }, { "epoch": 0.8158864473863792, "grad_norm": 0.23902970552444458, "learning_rate": 8e-05, "loss": 1.4883, "step": 5978 }, { "epoch": 0.816022928893135, "grad_norm": 0.2562556564807892, "learning_rate": 8e-05, "loss": 1.4677, "step": 5979 }, { "epoch": 0.8161594103998908, "grad_norm": 0.2340439260005951, "learning_rate": 8e-05, "loss": 1.3745, "step": 5980 }, { "epoch": 0.8162958919066466, "grad_norm": 0.22747012972831726, "learning_rate": 8e-05, "loss": 1.4594, "step": 5981 }, { "epoch": 0.8164323734134025, "grad_norm": 0.2271149903535843, "learning_rate": 8e-05, "loss": 1.3993, "step": 5982 }, { "epoch": 0.8165688549201583, "grad_norm": 0.2450660765171051, "learning_rate": 8e-05, "loss": 1.414, "step": 5983 }, { "epoch": 0.8167053364269141, "grad_norm": 0.23845788836479187, "learning_rate": 8e-05, "loss": 1.4452, "step": 5984 }, { "epoch": 0.81684181793367, "grad_norm": 0.24085500836372375, "learning_rate": 8e-05, "loss": 1.4581, "step": 5985 }, { "epoch": 0.8169782994404258, "grad_norm": 0.24057720601558685, "learning_rate": 8e-05, "loss": 1.4643, "step": 5986 }, { "epoch": 0.8171147809471817, "grad_norm": 0.2450377643108368, "learning_rate": 8e-05, "loss": 1.49, "step": 5987 }, { "epoch": 0.8172512624539375, "grad_norm": 0.24217212200164795, "learning_rate": 8e-05, "loss": 1.4426, "step": 5988 }, { "epoch": 0.8173877439606934, "grad_norm": 0.23991000652313232, "learning_rate": 8e-05, "loss": 1.4512, "step": 5989 }, { "epoch": 0.8175242254674492, "grad_norm": 0.23082904517650604, "learning_rate": 8e-05, "loss": 1.4379, "step": 5990 }, { "epoch": 0.817660706974205, "grad_norm": 0.23655807971954346, "learning_rate": 8e-05, "loss": 1.376, "step": 5991 }, { "epoch": 0.8177971884809608, "grad_norm": 0.2400001734495163, "learning_rate": 8e-05, "loss": 1.4429, "step": 5992 }, { "epoch": 0.8179336699877167, "grad_norm": 0.24617899954319, "learning_rate": 8e-05, "loss": 1.5007, "step": 5993 }, { "epoch": 0.8180701514944725, "grad_norm": 0.24426016211509705, "learning_rate": 8e-05, "loss": 1.4215, "step": 5994 }, { "epoch": 0.8182066330012283, "grad_norm": 0.2322605848312378, "learning_rate": 8e-05, "loss": 1.4271, "step": 5995 }, { "epoch": 0.8183431145079841, "grad_norm": 0.23960435390472412, "learning_rate": 8e-05, "loss": 1.4642, "step": 5996 }, { "epoch": 0.81847959601474, "grad_norm": 0.23276013135910034, "learning_rate": 8e-05, "loss": 1.4466, "step": 5997 }, { "epoch": 0.8186160775214958, "grad_norm": 0.23943625390529633, "learning_rate": 8e-05, "loss": 1.4962, "step": 5998 }, { "epoch": 0.8187525590282517, "grad_norm": 0.23996341228485107, "learning_rate": 8e-05, "loss": 1.4147, "step": 5999 }, { "epoch": 0.8188890405350076, "grad_norm": 0.24511149525642395, "learning_rate": 8e-05, "loss": 1.3628, "step": 6000 } ], "logging_steps": 1, "max_steps": 7327, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.4159781870567424e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }