{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 6249, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016002560409665546, "grad_norm": 11.34284496307373, "learning_rate": 5.000000000000001e-07, "loss": 2.1749, "step": 1 }, { "epoch": 0.0003200512081933109, "grad_norm": 10.574485778808594, "learning_rate": 1.0000000000000002e-06, "loss": 2.1855, "step": 2 }, { "epoch": 0.00048007681228996637, "grad_norm": 10.738248825073242, "learning_rate": 1.5e-06, "loss": 2.1209, "step": 3 }, { "epoch": 0.0006401024163866218, "grad_norm": 10.25924301147461, "learning_rate": 2.0000000000000003e-06, "loss": 2.1763, "step": 4 }, { "epoch": 0.0008001280204832773, "grad_norm": 7.499971866607666, "learning_rate": 2.5e-06, "loss": 2.0935, "step": 5 }, { "epoch": 0.0009601536245799327, "grad_norm": 10.077802658081055, "learning_rate": 3e-06, "loss": 2.2066, "step": 6 }, { "epoch": 0.0011201792286765883, "grad_norm": 8.050355911254883, "learning_rate": 3.5000000000000004e-06, "loss": 2.098, "step": 7 }, { "epoch": 0.0012802048327732437, "grad_norm": 7.47072172164917, "learning_rate": 4.000000000000001e-06, "loss": 2.0783, "step": 8 }, { "epoch": 0.0014402304368698992, "grad_norm": 6.722290992736816, "learning_rate": 4.5e-06, "loss": 2.0604, "step": 9 }, { "epoch": 0.0016002560409665546, "grad_norm": 4.592212677001953, "learning_rate": 5e-06, "loss": 2.0427, "step": 10 }, { "epoch": 0.0017602816450632101, "grad_norm": 2.971068859100342, "learning_rate": 5.500000000000001e-06, "loss": 1.9805, "step": 11 }, { "epoch": 0.0019203072491598655, "grad_norm": 2.5956528186798096, "learning_rate": 6e-06, "loss": 2.0171, "step": 12 }, { "epoch": 0.002080332853256521, "grad_norm": 2.343792200088501, "learning_rate": 6.5000000000000004e-06, "loss": 2.0078, "step": 13 }, { "epoch": 0.0022403584573531766, "grad_norm": 2.0429835319519043, "learning_rate": 7.000000000000001e-06, "loss": 1.9499, "step": 14 }, { "epoch": 0.002400384061449832, "grad_norm": 1.8754217624664307, "learning_rate": 7.5e-06, "loss": 1.9943, "step": 15 }, { "epoch": 0.0025604096655464873, "grad_norm": 1.4242690801620483, "learning_rate": 8.000000000000001e-06, "loss": 1.8418, "step": 16 }, { "epoch": 0.002720435269643143, "grad_norm": 1.2679270505905151, "learning_rate": 8.500000000000002e-06, "loss": 1.9199, "step": 17 }, { "epoch": 0.0028804608737397984, "grad_norm": 1.2673335075378418, "learning_rate": 9e-06, "loss": 1.8987, "step": 18 }, { "epoch": 0.003040486477836454, "grad_norm": 1.0493180751800537, "learning_rate": 9.5e-06, "loss": 1.8803, "step": 19 }, { "epoch": 0.003200512081933109, "grad_norm": 1.220038652420044, "learning_rate": 1e-05, "loss": 1.8639, "step": 20 }, { "epoch": 0.003360537686029765, "grad_norm": 1.1799148321151733, "learning_rate": 1.05e-05, "loss": 1.8885, "step": 21 }, { "epoch": 0.0035205632901264203, "grad_norm": 0.9612282514572144, "learning_rate": 1.1000000000000001e-05, "loss": 1.9135, "step": 22 }, { "epoch": 0.0036805888942230756, "grad_norm": 0.775345504283905, "learning_rate": 1.1500000000000002e-05, "loss": 1.7317, "step": 23 }, { "epoch": 0.003840614498319731, "grad_norm": 0.7976018190383911, "learning_rate": 1.2e-05, "loss": 1.7691, "step": 24 }, { "epoch": 0.004000640102416387, "grad_norm": 0.5619055032730103, "learning_rate": 1.25e-05, "loss": 1.7967, "step": 25 }, { "epoch": 0.004160665706513042, "grad_norm": 0.5536738634109497, "learning_rate": 1.3000000000000001e-05, "loss": 1.7197, "step": 26 }, { "epoch": 0.0043206913106096975, "grad_norm": 0.5844358801841736, "learning_rate": 1.3500000000000001e-05, "loss": 1.7827, "step": 27 }, { "epoch": 0.004480716914706353, "grad_norm": 0.5173511505126953, "learning_rate": 1.4000000000000001e-05, "loss": 1.8401, "step": 28 }, { "epoch": 0.004640742518803008, "grad_norm": 0.5080260038375854, "learning_rate": 1.45e-05, "loss": 1.8584, "step": 29 }, { "epoch": 0.004800768122899664, "grad_norm": 0.44315576553344727, "learning_rate": 1.5e-05, "loss": 1.7019, "step": 30 }, { "epoch": 0.00496079372699632, "grad_norm": 0.44345158338546753, "learning_rate": 1.55e-05, "loss": 1.8156, "step": 31 }, { "epoch": 0.005120819331092975, "grad_norm": 0.4278816878795624, "learning_rate": 1.6000000000000003e-05, "loss": 1.7656, "step": 32 }, { "epoch": 0.00528084493518963, "grad_norm": 0.4244326055049896, "learning_rate": 1.65e-05, "loss": 1.735, "step": 33 }, { "epoch": 0.005440870539286286, "grad_norm": 0.4457814693450928, "learning_rate": 1.7000000000000003e-05, "loss": 1.8361, "step": 34 }, { "epoch": 0.005600896143382941, "grad_norm": 0.4924764931201935, "learning_rate": 1.75e-05, "loss": 1.7414, "step": 35 }, { "epoch": 0.005760921747479597, "grad_norm": 0.46615612506866455, "learning_rate": 1.8e-05, "loss": 1.776, "step": 36 }, { "epoch": 0.005920947351576252, "grad_norm": 0.45521417260169983, "learning_rate": 1.85e-05, "loss": 1.7555, "step": 37 }, { "epoch": 0.006080972955672908, "grad_norm": 0.45014330744743347, "learning_rate": 1.9e-05, "loss": 1.7827, "step": 38 }, { "epoch": 0.006240998559769563, "grad_norm": 0.5036975741386414, "learning_rate": 1.9500000000000003e-05, "loss": 1.779, "step": 39 }, { "epoch": 0.006401024163866218, "grad_norm": 0.4792598485946655, "learning_rate": 2e-05, "loss": 1.7476, "step": 40 }, { "epoch": 0.006561049767962874, "grad_norm": 0.44314053654670715, "learning_rate": 2.05e-05, "loss": 1.7643, "step": 41 }, { "epoch": 0.00672107537205953, "grad_norm": 0.4028733968734741, "learning_rate": 2.1e-05, "loss": 1.7438, "step": 42 }, { "epoch": 0.006881100976156185, "grad_norm": 0.4061872959136963, "learning_rate": 2.15e-05, "loss": 1.7767, "step": 43 }, { "epoch": 0.0070411265802528405, "grad_norm": 0.4100809693336487, "learning_rate": 2.2000000000000003e-05, "loss": 1.7141, "step": 44 }, { "epoch": 0.007201152184349496, "grad_norm": 0.40247172117233276, "learning_rate": 2.25e-05, "loss": 1.7357, "step": 45 }, { "epoch": 0.007361177788446151, "grad_norm": 0.3977741003036499, "learning_rate": 2.3000000000000003e-05, "loss": 1.7251, "step": 46 }, { "epoch": 0.007521203392542807, "grad_norm": 0.41272786259651184, "learning_rate": 2.35e-05, "loss": 1.7491, "step": 47 }, { "epoch": 0.007681228996639462, "grad_norm": 0.3685744106769562, "learning_rate": 2.4e-05, "loss": 1.7486, "step": 48 }, { "epoch": 0.007841254600736118, "grad_norm": 0.3876553475856781, "learning_rate": 2.45e-05, "loss": 1.735, "step": 49 }, { "epoch": 0.008001280204832774, "grad_norm": 0.3493002653121948, "learning_rate": 2.5e-05, "loss": 1.7284, "step": 50 }, { "epoch": 0.00816130580892943, "grad_norm": 0.3616872727870941, "learning_rate": 2.5500000000000003e-05, "loss": 1.6993, "step": 51 }, { "epoch": 0.008321331413026083, "grad_norm": 0.3926169276237488, "learning_rate": 2.6000000000000002e-05, "loss": 1.6895, "step": 52 }, { "epoch": 0.008481357017122739, "grad_norm": 0.3967690169811249, "learning_rate": 2.6500000000000004e-05, "loss": 1.8314, "step": 53 }, { "epoch": 0.008641382621219395, "grad_norm": 0.37782537937164307, "learning_rate": 2.7000000000000002e-05, "loss": 1.6878, "step": 54 }, { "epoch": 0.00880140822531605, "grad_norm": 0.36874887347221375, "learning_rate": 2.7500000000000004e-05, "loss": 1.7651, "step": 55 }, { "epoch": 0.008961433829412706, "grad_norm": 0.34464073181152344, "learning_rate": 2.8000000000000003e-05, "loss": 1.6859, "step": 56 }, { "epoch": 0.009121459433509362, "grad_norm": 0.3537423610687256, "learning_rate": 2.8499999999999998e-05, "loss": 1.6514, "step": 57 }, { "epoch": 0.009281485037606016, "grad_norm": 0.4090113937854767, "learning_rate": 2.9e-05, "loss": 1.6612, "step": 58 }, { "epoch": 0.009441510641702672, "grad_norm": 0.36830559372901917, "learning_rate": 2.95e-05, "loss": 1.6359, "step": 59 }, { "epoch": 0.009601536245799328, "grad_norm": 0.4148927628993988, "learning_rate": 3e-05, "loss": 1.7846, "step": 60 }, { "epoch": 0.009761561849895984, "grad_norm": 0.3917708992958069, "learning_rate": 3.05e-05, "loss": 1.6903, "step": 61 }, { "epoch": 0.00992158745399264, "grad_norm": 0.38338273763656616, "learning_rate": 3.1e-05, "loss": 1.7197, "step": 62 }, { "epoch": 0.010081613058089293, "grad_norm": 0.36482760310173035, "learning_rate": 3.15e-05, "loss": 1.6927, "step": 63 }, { "epoch": 0.01024163866218595, "grad_norm": 0.3739733099937439, "learning_rate": 3.2000000000000005e-05, "loss": 1.6978, "step": 64 }, { "epoch": 0.010401664266282605, "grad_norm": 0.36455488204956055, "learning_rate": 3.2500000000000004e-05, "loss": 1.6662, "step": 65 }, { "epoch": 0.01056168987037926, "grad_norm": 0.366486519575119, "learning_rate": 3.3e-05, "loss": 1.6919, "step": 66 }, { "epoch": 0.010721715474475917, "grad_norm": 0.35902631282806396, "learning_rate": 3.35e-05, "loss": 1.7074, "step": 67 }, { "epoch": 0.010881741078572572, "grad_norm": 0.37250199913978577, "learning_rate": 3.4000000000000007e-05, "loss": 1.6608, "step": 68 }, { "epoch": 0.011041766682669226, "grad_norm": 0.3709702789783478, "learning_rate": 3.45e-05, "loss": 1.6521, "step": 69 }, { "epoch": 0.011201792286765882, "grad_norm": 0.3821541666984558, "learning_rate": 3.5e-05, "loss": 1.7547, "step": 70 }, { "epoch": 0.011361817890862538, "grad_norm": 0.3536457121372223, "learning_rate": 3.55e-05, "loss": 1.6463, "step": 71 }, { "epoch": 0.011521843494959194, "grad_norm": 0.3485606014728546, "learning_rate": 3.6e-05, "loss": 1.7201, "step": 72 }, { "epoch": 0.01168186909905585, "grad_norm": 0.3666270971298218, "learning_rate": 3.65e-05, "loss": 1.7478, "step": 73 }, { "epoch": 0.011841894703152504, "grad_norm": 0.3865877687931061, "learning_rate": 3.7e-05, "loss": 1.8086, "step": 74 }, { "epoch": 0.01200192030724916, "grad_norm": 0.37050506472587585, "learning_rate": 3.7500000000000003e-05, "loss": 1.7447, "step": 75 }, { "epoch": 0.012161945911345815, "grad_norm": 0.38805171847343445, "learning_rate": 3.8e-05, "loss": 1.6845, "step": 76 }, { "epoch": 0.012321971515442471, "grad_norm": 0.38227176666259766, "learning_rate": 3.85e-05, "loss": 1.7085, "step": 77 }, { "epoch": 0.012481997119539127, "grad_norm": 0.3793131709098816, "learning_rate": 3.9000000000000006e-05, "loss": 1.7084, "step": 78 }, { "epoch": 0.012642022723635783, "grad_norm": 0.3819984495639801, "learning_rate": 3.9500000000000005e-05, "loss": 1.7262, "step": 79 }, { "epoch": 0.012802048327732437, "grad_norm": 0.4131283760070801, "learning_rate": 4e-05, "loss": 1.7108, "step": 80 }, { "epoch": 0.012962073931829092, "grad_norm": 0.39372193813323975, "learning_rate": 4.05e-05, "loss": 1.6992, "step": 81 }, { "epoch": 0.013122099535925748, "grad_norm": 0.3659203350543976, "learning_rate": 4.1e-05, "loss": 1.732, "step": 82 }, { "epoch": 0.013282125140022404, "grad_norm": 0.35643014311790466, "learning_rate": 4.15e-05, "loss": 1.7575, "step": 83 }, { "epoch": 0.01344215074411906, "grad_norm": 0.36780306696891785, "learning_rate": 4.2e-05, "loss": 1.6059, "step": 84 }, { "epoch": 0.013602176348215714, "grad_norm": 0.3545387089252472, "learning_rate": 4.25e-05, "loss": 1.6921, "step": 85 }, { "epoch": 0.01376220195231237, "grad_norm": 0.379555344581604, "learning_rate": 4.3e-05, "loss": 1.7577, "step": 86 }, { "epoch": 0.013922227556409025, "grad_norm": 0.37808942794799805, "learning_rate": 4.35e-05, "loss": 1.6057, "step": 87 }, { "epoch": 0.014082253160505681, "grad_norm": 0.3477846682071686, "learning_rate": 4.4000000000000006e-05, "loss": 1.7359, "step": 88 }, { "epoch": 0.014242278764602337, "grad_norm": 0.42398858070373535, "learning_rate": 4.4500000000000004e-05, "loss": 1.7216, "step": 89 }, { "epoch": 0.014402304368698993, "grad_norm": 0.3676456809043884, "learning_rate": 4.5e-05, "loss": 1.6362, "step": 90 }, { "epoch": 0.014562329972795647, "grad_norm": 0.37227824330329895, "learning_rate": 4.55e-05, "loss": 1.6976, "step": 91 }, { "epoch": 0.014722355576892302, "grad_norm": 0.4170995056629181, "learning_rate": 4.600000000000001e-05, "loss": 1.7621, "step": 92 }, { "epoch": 0.014882381180988958, "grad_norm": 0.3852943480014801, "learning_rate": 4.6500000000000005e-05, "loss": 1.6702, "step": 93 }, { "epoch": 0.015042406785085614, "grad_norm": 0.35427019000053406, "learning_rate": 4.7e-05, "loss": 1.639, "step": 94 }, { "epoch": 0.01520243238918227, "grad_norm": 0.3681962490081787, "learning_rate": 4.75e-05, "loss": 1.7346, "step": 95 }, { "epoch": 0.015362457993278924, "grad_norm": 0.3738347291946411, "learning_rate": 4.8e-05, "loss": 1.7264, "step": 96 }, { "epoch": 0.01552248359737558, "grad_norm": 0.37646353244781494, "learning_rate": 4.85e-05, "loss": 1.6843, "step": 97 }, { "epoch": 0.015682509201472235, "grad_norm": 0.3784719705581665, "learning_rate": 4.9e-05, "loss": 1.6439, "step": 98 }, { "epoch": 0.01584253480556889, "grad_norm": 0.36535727977752686, "learning_rate": 4.9500000000000004e-05, "loss": 1.6856, "step": 99 }, { "epoch": 0.016002560409665547, "grad_norm": 0.3903757333755493, "learning_rate": 5e-05, "loss": 1.7105, "step": 100 }, { "epoch": 0.016162586013762203, "grad_norm": 0.4496576488018036, "learning_rate": 5e-05, "loss": 1.6356, "step": 101 }, { "epoch": 0.01632261161785886, "grad_norm": 0.4011246860027313, "learning_rate": 5e-05, "loss": 1.7016, "step": 102 }, { "epoch": 0.016482637221955514, "grad_norm": 0.4279324412345886, "learning_rate": 5e-05, "loss": 1.7469, "step": 103 }, { "epoch": 0.016642662826052167, "grad_norm": 0.3915554881095886, "learning_rate": 5e-05, "loss": 1.7445, "step": 104 }, { "epoch": 0.016802688430148822, "grad_norm": 0.3785167634487152, "learning_rate": 5e-05, "loss": 1.6853, "step": 105 }, { "epoch": 0.016962714034245478, "grad_norm": 0.3716273605823517, "learning_rate": 5e-05, "loss": 1.6693, "step": 106 }, { "epoch": 0.017122739638342134, "grad_norm": 0.3602806627750397, "learning_rate": 5e-05, "loss": 1.6379, "step": 107 }, { "epoch": 0.01728276524243879, "grad_norm": 0.4302051067352295, "learning_rate": 5e-05, "loss": 1.6835, "step": 108 }, { "epoch": 0.017442790846535446, "grad_norm": 0.35786888003349304, "learning_rate": 5e-05, "loss": 1.6942, "step": 109 }, { "epoch": 0.0176028164506321, "grad_norm": 0.44819769263267517, "learning_rate": 5e-05, "loss": 1.7461, "step": 110 }, { "epoch": 0.017762842054728757, "grad_norm": 0.4214605391025543, "learning_rate": 5e-05, "loss": 1.6472, "step": 111 }, { "epoch": 0.017922867658825413, "grad_norm": 0.367292582988739, "learning_rate": 5e-05, "loss": 1.7136, "step": 112 }, { "epoch": 0.01808289326292207, "grad_norm": 0.3890340030193329, "learning_rate": 5e-05, "loss": 1.6765, "step": 113 }, { "epoch": 0.018242918867018725, "grad_norm": 0.3374301791191101, "learning_rate": 5e-05, "loss": 1.6655, "step": 114 }, { "epoch": 0.018402944471115377, "grad_norm": 0.38433167338371277, "learning_rate": 5e-05, "loss": 1.6713, "step": 115 }, { "epoch": 0.018562970075212033, "grad_norm": 0.34837761521339417, "learning_rate": 5e-05, "loss": 1.7423, "step": 116 }, { "epoch": 0.01872299567930869, "grad_norm": 0.36625128984451294, "learning_rate": 5e-05, "loss": 1.6187, "step": 117 }, { "epoch": 0.018883021283405344, "grad_norm": 0.39271238446235657, "learning_rate": 5e-05, "loss": 1.6105, "step": 118 }, { "epoch": 0.019043046887502, "grad_norm": 0.3661741018295288, "learning_rate": 5e-05, "loss": 1.7193, "step": 119 }, { "epoch": 0.019203072491598656, "grad_norm": 0.35765039920806885, "learning_rate": 5e-05, "loss": 1.7392, "step": 120 }, { "epoch": 0.01936309809569531, "grad_norm": 0.3357962369918823, "learning_rate": 5e-05, "loss": 1.4883, "step": 121 }, { "epoch": 0.019523123699791967, "grad_norm": 0.39423447847366333, "learning_rate": 5e-05, "loss": 1.6994, "step": 122 }, { "epoch": 0.019683149303888623, "grad_norm": 0.3663979172706604, "learning_rate": 5e-05, "loss": 1.7044, "step": 123 }, { "epoch": 0.01984317490798528, "grad_norm": 0.41292327642440796, "learning_rate": 5e-05, "loss": 1.6097, "step": 124 }, { "epoch": 0.020003200512081935, "grad_norm": 0.3960838317871094, "learning_rate": 5e-05, "loss": 1.6835, "step": 125 }, { "epoch": 0.020163226116178587, "grad_norm": 0.3809219300746918, "learning_rate": 5e-05, "loss": 1.6625, "step": 126 }, { "epoch": 0.020323251720275243, "grad_norm": 0.4039958417415619, "learning_rate": 5e-05, "loss": 1.7089, "step": 127 }, { "epoch": 0.0204832773243719, "grad_norm": 0.3434881567955017, "learning_rate": 5e-05, "loss": 1.6627, "step": 128 }, { "epoch": 0.020643302928468554, "grad_norm": 0.3436790704727173, "learning_rate": 5e-05, "loss": 1.7586, "step": 129 }, { "epoch": 0.02080332853256521, "grad_norm": 0.3413067162036896, "learning_rate": 5e-05, "loss": 1.6805, "step": 130 }, { "epoch": 0.020963354136661866, "grad_norm": 0.32777804136276245, "learning_rate": 5e-05, "loss": 1.6233, "step": 131 }, { "epoch": 0.02112337974075852, "grad_norm": 0.40235379338264465, "learning_rate": 5e-05, "loss": 1.7209, "step": 132 }, { "epoch": 0.021283405344855177, "grad_norm": 0.33976101875305176, "learning_rate": 5e-05, "loss": 1.6511, "step": 133 }, { "epoch": 0.021443430948951833, "grad_norm": 0.33631324768066406, "learning_rate": 5e-05, "loss": 1.6556, "step": 134 }, { "epoch": 0.02160345655304849, "grad_norm": 0.3330569863319397, "learning_rate": 5e-05, "loss": 1.7171, "step": 135 }, { "epoch": 0.021763482157145145, "grad_norm": 0.35355818271636963, "learning_rate": 5e-05, "loss": 1.637, "step": 136 }, { "epoch": 0.021923507761241797, "grad_norm": 0.3647533357143402, "learning_rate": 5e-05, "loss": 1.665, "step": 137 }, { "epoch": 0.022083533365338453, "grad_norm": 0.357576459646225, "learning_rate": 5e-05, "loss": 1.7202, "step": 138 }, { "epoch": 0.02224355896943511, "grad_norm": 0.3431086540222168, "learning_rate": 5e-05, "loss": 1.6246, "step": 139 }, { "epoch": 0.022403584573531764, "grad_norm": 0.347624272108078, "learning_rate": 5e-05, "loss": 1.7358, "step": 140 }, { "epoch": 0.02256361017762842, "grad_norm": 0.33966419100761414, "learning_rate": 5e-05, "loss": 1.6414, "step": 141 }, { "epoch": 0.022723635781725076, "grad_norm": 0.3352697789669037, "learning_rate": 5e-05, "loss": 1.6095, "step": 142 }, { "epoch": 0.022883661385821732, "grad_norm": 0.3469352126121521, "learning_rate": 5e-05, "loss": 1.697, "step": 143 }, { "epoch": 0.023043686989918388, "grad_norm": 0.34578704833984375, "learning_rate": 5e-05, "loss": 1.7325, "step": 144 }, { "epoch": 0.023203712594015043, "grad_norm": 0.34347862005233765, "learning_rate": 5e-05, "loss": 1.6533, "step": 145 }, { "epoch": 0.0233637381981117, "grad_norm": 0.3345264792442322, "learning_rate": 5e-05, "loss": 1.6428, "step": 146 }, { "epoch": 0.023523763802208355, "grad_norm": 0.3277318477630615, "learning_rate": 5e-05, "loss": 1.6358, "step": 147 }, { "epoch": 0.023683789406305007, "grad_norm": 0.34119659662246704, "learning_rate": 5e-05, "loss": 1.6888, "step": 148 }, { "epoch": 0.023843815010401663, "grad_norm": 0.3489167392253876, "learning_rate": 5e-05, "loss": 1.7108, "step": 149 }, { "epoch": 0.02400384061449832, "grad_norm": 0.3618619740009308, "learning_rate": 5e-05, "loss": 1.7104, "step": 150 }, { "epoch": 0.024163866218594975, "grad_norm": 0.3380429744720459, "learning_rate": 5e-05, "loss": 1.7284, "step": 151 }, { "epoch": 0.02432389182269163, "grad_norm": 0.3560449779033661, "learning_rate": 5e-05, "loss": 1.6927, "step": 152 }, { "epoch": 0.024483917426788286, "grad_norm": 0.3437712490558624, "learning_rate": 5e-05, "loss": 1.6685, "step": 153 }, { "epoch": 0.024643943030884942, "grad_norm": 0.3391321897506714, "learning_rate": 5e-05, "loss": 1.5663, "step": 154 }, { "epoch": 0.024803968634981598, "grad_norm": 0.3310987949371338, "learning_rate": 5e-05, "loss": 1.5796, "step": 155 }, { "epoch": 0.024963994239078253, "grad_norm": 0.3305205702781677, "learning_rate": 5e-05, "loss": 1.6727, "step": 156 }, { "epoch": 0.02512401984317491, "grad_norm": 0.3292525112628937, "learning_rate": 5e-05, "loss": 1.7213, "step": 157 }, { "epoch": 0.025284045447271565, "grad_norm": 0.3747202157974243, "learning_rate": 5e-05, "loss": 1.6656, "step": 158 }, { "epoch": 0.025444071051368217, "grad_norm": 0.317796528339386, "learning_rate": 5e-05, "loss": 1.5193, "step": 159 }, { "epoch": 0.025604096655464873, "grad_norm": 0.33460667729377747, "learning_rate": 5e-05, "loss": 1.7167, "step": 160 }, { "epoch": 0.02576412225956153, "grad_norm": 0.323494553565979, "learning_rate": 5e-05, "loss": 1.6139, "step": 161 }, { "epoch": 0.025924147863658185, "grad_norm": 0.3482540249824524, "learning_rate": 5e-05, "loss": 1.6945, "step": 162 }, { "epoch": 0.02608417346775484, "grad_norm": 0.3281523585319519, "learning_rate": 5e-05, "loss": 1.6185, "step": 163 }, { "epoch": 0.026244199071851496, "grad_norm": 0.3313951790332794, "learning_rate": 5e-05, "loss": 1.7389, "step": 164 }, { "epoch": 0.026404224675948152, "grad_norm": 0.3326495587825775, "learning_rate": 5e-05, "loss": 1.6163, "step": 165 }, { "epoch": 0.026564250280044808, "grad_norm": 0.31191641092300415, "learning_rate": 5e-05, "loss": 1.5076, "step": 166 }, { "epoch": 0.026724275884141464, "grad_norm": 0.3224773406982422, "learning_rate": 5e-05, "loss": 1.5973, "step": 167 }, { "epoch": 0.02688430148823812, "grad_norm": 0.35688263177871704, "learning_rate": 5e-05, "loss": 1.6524, "step": 168 }, { "epoch": 0.027044327092334775, "grad_norm": 0.3634006977081299, "learning_rate": 5e-05, "loss": 1.7128, "step": 169 }, { "epoch": 0.027204352696431428, "grad_norm": 0.33243680000305176, "learning_rate": 5e-05, "loss": 1.6782, "step": 170 }, { "epoch": 0.027364378300528083, "grad_norm": 0.34032827615737915, "learning_rate": 5e-05, "loss": 1.5874, "step": 171 }, { "epoch": 0.02752440390462474, "grad_norm": 0.34151890873908997, "learning_rate": 5e-05, "loss": 1.6846, "step": 172 }, { "epoch": 0.027684429508721395, "grad_norm": 0.36432501673698425, "learning_rate": 5e-05, "loss": 1.7287, "step": 173 }, { "epoch": 0.02784445511281805, "grad_norm": 0.33415743708610535, "learning_rate": 5e-05, "loss": 1.717, "step": 174 }, { "epoch": 0.028004480716914706, "grad_norm": 0.3467123806476593, "learning_rate": 5e-05, "loss": 1.6413, "step": 175 }, { "epoch": 0.028164506321011362, "grad_norm": 0.32937970757484436, "learning_rate": 5e-05, "loss": 1.6299, "step": 176 }, { "epoch": 0.028324531925108018, "grad_norm": 0.34811872243881226, "learning_rate": 5e-05, "loss": 1.5807, "step": 177 }, { "epoch": 0.028484557529204674, "grad_norm": 0.33174753189086914, "learning_rate": 5e-05, "loss": 1.6411, "step": 178 }, { "epoch": 0.02864458313330133, "grad_norm": 0.3682177662849426, "learning_rate": 5e-05, "loss": 1.7035, "step": 179 }, { "epoch": 0.028804608737397985, "grad_norm": 0.31987297534942627, "learning_rate": 5e-05, "loss": 1.5939, "step": 180 }, { "epoch": 0.028964634341494638, "grad_norm": 0.4201255440711975, "learning_rate": 5e-05, "loss": 1.6341, "step": 181 }, { "epoch": 0.029124659945591293, "grad_norm": 0.33961376547813416, "learning_rate": 5e-05, "loss": 1.6448, "step": 182 }, { "epoch": 0.02928468554968795, "grad_norm": 0.3711942732334137, "learning_rate": 5e-05, "loss": 1.6252, "step": 183 }, { "epoch": 0.029444711153784605, "grad_norm": 0.31812700629234314, "learning_rate": 5e-05, "loss": 1.6579, "step": 184 }, { "epoch": 0.02960473675788126, "grad_norm": 0.3471561670303345, "learning_rate": 5e-05, "loss": 1.6989, "step": 185 }, { "epoch": 0.029764762361977917, "grad_norm": 0.3234966993331909, "learning_rate": 5e-05, "loss": 1.544, "step": 186 }, { "epoch": 0.029924787966074572, "grad_norm": 0.353284627199173, "learning_rate": 5e-05, "loss": 1.7661, "step": 187 }, { "epoch": 0.030084813570171228, "grad_norm": 0.33583977818489075, "learning_rate": 5e-05, "loss": 1.6425, "step": 188 }, { "epoch": 0.030244839174267884, "grad_norm": 0.32806122303009033, "learning_rate": 5e-05, "loss": 1.669, "step": 189 }, { "epoch": 0.03040486477836454, "grad_norm": 0.36670804023742676, "learning_rate": 5e-05, "loss": 1.7119, "step": 190 }, { "epoch": 0.030564890382461195, "grad_norm": 0.3343648910522461, "learning_rate": 5e-05, "loss": 1.5678, "step": 191 }, { "epoch": 0.030724915986557848, "grad_norm": 0.3759818375110626, "learning_rate": 5e-05, "loss": 1.7352, "step": 192 }, { "epoch": 0.030884941590654504, "grad_norm": 0.36617511510849, "learning_rate": 5e-05, "loss": 1.6593, "step": 193 }, { "epoch": 0.03104496719475116, "grad_norm": 0.3428725302219391, "learning_rate": 5e-05, "loss": 1.6987, "step": 194 }, { "epoch": 0.031204992798847815, "grad_norm": 0.32480713725090027, "learning_rate": 5e-05, "loss": 1.6212, "step": 195 }, { "epoch": 0.03136501840294447, "grad_norm": 0.4033461809158325, "learning_rate": 5e-05, "loss": 1.7463, "step": 196 }, { "epoch": 0.03152504400704113, "grad_norm": 0.3269805610179901, "learning_rate": 5e-05, "loss": 1.6575, "step": 197 }, { "epoch": 0.03168506961113778, "grad_norm": 0.3829464018344879, "learning_rate": 5e-05, "loss": 1.7374, "step": 198 }, { "epoch": 0.03184509521523444, "grad_norm": 0.3098875880241394, "learning_rate": 5e-05, "loss": 1.698, "step": 199 }, { "epoch": 0.032005120819331094, "grad_norm": 0.3744073808193207, "learning_rate": 5e-05, "loss": 1.6003, "step": 200 }, { "epoch": 0.03216514642342775, "grad_norm": 0.3105475604534149, "learning_rate": 5e-05, "loss": 1.5839, "step": 201 }, { "epoch": 0.032325172027524406, "grad_norm": 0.3548446595668793, "learning_rate": 5e-05, "loss": 1.7098, "step": 202 }, { "epoch": 0.03248519763162106, "grad_norm": 0.31813228130340576, "learning_rate": 5e-05, "loss": 1.5906, "step": 203 }, { "epoch": 0.03264522323571772, "grad_norm": 0.34269073605537415, "learning_rate": 5e-05, "loss": 1.6629, "step": 204 }, { "epoch": 0.03280524883981437, "grad_norm": 0.34181439876556396, "learning_rate": 5e-05, "loss": 1.6861, "step": 205 }, { "epoch": 0.03296527444391103, "grad_norm": 0.3270605504512787, "learning_rate": 5e-05, "loss": 1.6634, "step": 206 }, { "epoch": 0.033125300048007685, "grad_norm": 0.3475787043571472, "learning_rate": 5e-05, "loss": 1.7551, "step": 207 }, { "epoch": 0.03328532565210433, "grad_norm": 0.31430113315582275, "learning_rate": 5e-05, "loss": 1.6521, "step": 208 }, { "epoch": 0.03344535125620099, "grad_norm": 0.3291769027709961, "learning_rate": 5e-05, "loss": 1.5844, "step": 209 }, { "epoch": 0.033605376860297645, "grad_norm": 0.3159150779247284, "learning_rate": 5e-05, "loss": 1.6192, "step": 210 }, { "epoch": 0.0337654024643943, "grad_norm": 0.33565157651901245, "learning_rate": 5e-05, "loss": 1.6841, "step": 211 }, { "epoch": 0.033925428068490956, "grad_norm": 0.3138677179813385, "learning_rate": 5e-05, "loss": 1.64, "step": 212 }, { "epoch": 0.03408545367258761, "grad_norm": 0.3399653434753418, "learning_rate": 5e-05, "loss": 1.6352, "step": 213 }, { "epoch": 0.03424547927668427, "grad_norm": 0.324053019285202, "learning_rate": 5e-05, "loss": 1.6453, "step": 214 }, { "epoch": 0.034405504880780924, "grad_norm": 0.32958194613456726, "learning_rate": 5e-05, "loss": 1.7292, "step": 215 }, { "epoch": 0.03456553048487758, "grad_norm": 0.34939104318618774, "learning_rate": 5e-05, "loss": 1.6453, "step": 216 }, { "epoch": 0.034725556088974235, "grad_norm": 0.3107679784297943, "learning_rate": 5e-05, "loss": 1.5603, "step": 217 }, { "epoch": 0.03488558169307089, "grad_norm": 0.32663610577583313, "learning_rate": 5e-05, "loss": 1.6112, "step": 218 }, { "epoch": 0.03504560729716755, "grad_norm": 0.3358449339866638, "learning_rate": 5e-05, "loss": 1.644, "step": 219 }, { "epoch": 0.0352056329012642, "grad_norm": 0.3257986009120941, "learning_rate": 5e-05, "loss": 1.6668, "step": 220 }, { "epoch": 0.03536565850536086, "grad_norm": 0.3242127001285553, "learning_rate": 5e-05, "loss": 1.6919, "step": 221 }, { "epoch": 0.035525684109457514, "grad_norm": 0.33794260025024414, "learning_rate": 5e-05, "loss": 1.6677, "step": 222 }, { "epoch": 0.03568570971355417, "grad_norm": 0.3210519552230835, "learning_rate": 5e-05, "loss": 1.7321, "step": 223 }, { "epoch": 0.035845735317650826, "grad_norm": 0.33050450682640076, "learning_rate": 5e-05, "loss": 1.6363, "step": 224 }, { "epoch": 0.03600576092174748, "grad_norm": 0.32627537846565247, "learning_rate": 5e-05, "loss": 1.6248, "step": 225 }, { "epoch": 0.03616578652584414, "grad_norm": 0.3140614926815033, "learning_rate": 5e-05, "loss": 1.6354, "step": 226 }, { "epoch": 0.03632581212994079, "grad_norm": 0.35683590173721313, "learning_rate": 5e-05, "loss": 1.6823, "step": 227 }, { "epoch": 0.03648583773403745, "grad_norm": 0.32652178406715393, "learning_rate": 5e-05, "loss": 1.6335, "step": 228 }, { "epoch": 0.036645863338134105, "grad_norm": 0.31681597232818604, "learning_rate": 5e-05, "loss": 1.6285, "step": 229 }, { "epoch": 0.036805888942230754, "grad_norm": 0.31447356939315796, "learning_rate": 5e-05, "loss": 1.6082, "step": 230 }, { "epoch": 0.03696591454632741, "grad_norm": 0.3253583312034607, "learning_rate": 5e-05, "loss": 1.5927, "step": 231 }, { "epoch": 0.037125940150424065, "grad_norm": 0.3452536463737488, "learning_rate": 5e-05, "loss": 1.6473, "step": 232 }, { "epoch": 0.03728596575452072, "grad_norm": 0.3151145279407501, "learning_rate": 5e-05, "loss": 1.5993, "step": 233 }, { "epoch": 0.03744599135861738, "grad_norm": 0.35277920961380005, "learning_rate": 5e-05, "loss": 1.6779, "step": 234 }, { "epoch": 0.03760601696271403, "grad_norm": 0.3397216200828552, "learning_rate": 5e-05, "loss": 1.6502, "step": 235 }, { "epoch": 0.03776604256681069, "grad_norm": 0.30411258339881897, "learning_rate": 5e-05, "loss": 1.6277, "step": 236 }, { "epoch": 0.037926068170907344, "grad_norm": 0.33374807238578796, "learning_rate": 5e-05, "loss": 1.6809, "step": 237 }, { "epoch": 0.038086093775004, "grad_norm": 0.31379881501197815, "learning_rate": 5e-05, "loss": 1.6244, "step": 238 }, { "epoch": 0.038246119379100656, "grad_norm": 0.3143311142921448, "learning_rate": 5e-05, "loss": 1.6464, "step": 239 }, { "epoch": 0.03840614498319731, "grad_norm": 0.3256695866584778, "learning_rate": 5e-05, "loss": 1.6487, "step": 240 }, { "epoch": 0.03856617058729397, "grad_norm": 0.3251948952674866, "learning_rate": 5e-05, "loss": 1.6455, "step": 241 }, { "epoch": 0.03872619619139062, "grad_norm": 0.3229178786277771, "learning_rate": 5e-05, "loss": 1.6146, "step": 242 }, { "epoch": 0.03888622179548728, "grad_norm": 0.3351020812988281, "learning_rate": 5e-05, "loss": 1.6345, "step": 243 }, { "epoch": 0.039046247399583935, "grad_norm": 0.35118967294692993, "learning_rate": 5e-05, "loss": 1.6542, "step": 244 }, { "epoch": 0.03920627300368059, "grad_norm": 0.330517053604126, "learning_rate": 5e-05, "loss": 1.5898, "step": 245 }, { "epoch": 0.039366298607777246, "grad_norm": 0.30288243293762207, "learning_rate": 5e-05, "loss": 1.6069, "step": 246 }, { "epoch": 0.0395263242118739, "grad_norm": 0.3663911819458008, "learning_rate": 5e-05, "loss": 1.6141, "step": 247 }, { "epoch": 0.03968634981597056, "grad_norm": 0.33670511841773987, "learning_rate": 5e-05, "loss": 1.6744, "step": 248 }, { "epoch": 0.039846375420067214, "grad_norm": 0.35650548338890076, "learning_rate": 5e-05, "loss": 1.5122, "step": 249 }, { "epoch": 0.04000640102416387, "grad_norm": 0.3215617835521698, "learning_rate": 5e-05, "loss": 1.6679, "step": 250 }, { "epoch": 0.040166426628260525, "grad_norm": 0.34353116154670715, "learning_rate": 5e-05, "loss": 1.5894, "step": 251 }, { "epoch": 0.040326452232357174, "grad_norm": 0.33273983001708984, "learning_rate": 5e-05, "loss": 1.5854, "step": 252 }, { "epoch": 0.04048647783645383, "grad_norm": 0.33299216628074646, "learning_rate": 5e-05, "loss": 1.6466, "step": 253 }, { "epoch": 0.040646503440550485, "grad_norm": 0.3478671610355377, "learning_rate": 5e-05, "loss": 1.6517, "step": 254 }, { "epoch": 0.04080652904464714, "grad_norm": 0.3380471169948578, "learning_rate": 5e-05, "loss": 1.6996, "step": 255 }, { "epoch": 0.0409665546487438, "grad_norm": 0.36710235476493835, "learning_rate": 5e-05, "loss": 1.7168, "step": 256 }, { "epoch": 0.04112658025284045, "grad_norm": 0.3308304250240326, "learning_rate": 5e-05, "loss": 1.6307, "step": 257 }, { "epoch": 0.04128660585693711, "grad_norm": 0.32631850242614746, "learning_rate": 5e-05, "loss": 1.6671, "step": 258 }, { "epoch": 0.041446631461033764, "grad_norm": 0.3591870069503784, "learning_rate": 5e-05, "loss": 1.6059, "step": 259 }, { "epoch": 0.04160665706513042, "grad_norm": 0.3148445785045624, "learning_rate": 5e-05, "loss": 1.713, "step": 260 }, { "epoch": 0.041766682669227076, "grad_norm": 0.3876041769981384, "learning_rate": 5e-05, "loss": 1.7035, "step": 261 }, { "epoch": 0.04192670827332373, "grad_norm": 0.3441046178340912, "learning_rate": 5e-05, "loss": 1.6596, "step": 262 }, { "epoch": 0.04208673387742039, "grad_norm": 0.3760663568973541, "learning_rate": 5e-05, "loss": 1.6459, "step": 263 }, { "epoch": 0.04224675948151704, "grad_norm": 0.3414490520954132, "learning_rate": 5e-05, "loss": 1.6118, "step": 264 }, { "epoch": 0.0424067850856137, "grad_norm": 0.3461393117904663, "learning_rate": 5e-05, "loss": 1.5903, "step": 265 }, { "epoch": 0.042566810689710355, "grad_norm": 0.34326496720314026, "learning_rate": 5e-05, "loss": 1.6185, "step": 266 }, { "epoch": 0.04272683629380701, "grad_norm": 0.30942797660827637, "learning_rate": 5e-05, "loss": 1.6056, "step": 267 }, { "epoch": 0.042886861897903666, "grad_norm": 0.34404948353767395, "learning_rate": 5e-05, "loss": 1.6166, "step": 268 }, { "epoch": 0.04304688750200032, "grad_norm": 0.32068485021591187, "learning_rate": 5e-05, "loss": 1.6546, "step": 269 }, { "epoch": 0.04320691310609698, "grad_norm": 0.34230855107307434, "learning_rate": 5e-05, "loss": 1.667, "step": 270 }, { "epoch": 0.043366938710193634, "grad_norm": 0.2914317548274994, "learning_rate": 5e-05, "loss": 1.5541, "step": 271 }, { "epoch": 0.04352696431429029, "grad_norm": 0.2996683716773987, "learning_rate": 5e-05, "loss": 1.593, "step": 272 }, { "epoch": 0.043686989918386945, "grad_norm": 0.3135250508785248, "learning_rate": 5e-05, "loss": 1.6276, "step": 273 }, { "epoch": 0.043847015522483594, "grad_norm": 0.30590105056762695, "learning_rate": 5e-05, "loss": 1.6185, "step": 274 }, { "epoch": 0.04400704112658025, "grad_norm": 0.3158150017261505, "learning_rate": 5e-05, "loss": 1.7237, "step": 275 }, { "epoch": 0.044167066730676906, "grad_norm": 0.32625770568847656, "learning_rate": 5e-05, "loss": 1.71, "step": 276 }, { "epoch": 0.04432709233477356, "grad_norm": 0.299189954996109, "learning_rate": 5e-05, "loss": 1.5643, "step": 277 }, { "epoch": 0.04448711793887022, "grad_norm": 0.318710595369339, "learning_rate": 5e-05, "loss": 1.7693, "step": 278 }, { "epoch": 0.04464714354296687, "grad_norm": 0.31957748532295227, "learning_rate": 5e-05, "loss": 1.6687, "step": 279 }, { "epoch": 0.04480716914706353, "grad_norm": 0.3053012788295746, "learning_rate": 5e-05, "loss": 1.6546, "step": 280 }, { "epoch": 0.044967194751160185, "grad_norm": 0.31021547317504883, "learning_rate": 5e-05, "loss": 1.6177, "step": 281 }, { "epoch": 0.04512722035525684, "grad_norm": 0.316478967666626, "learning_rate": 5e-05, "loss": 1.5542, "step": 282 }, { "epoch": 0.045287245959353496, "grad_norm": 0.2990763783454895, "learning_rate": 5e-05, "loss": 1.592, "step": 283 }, { "epoch": 0.04544727156345015, "grad_norm": 0.3352183699607849, "learning_rate": 5e-05, "loss": 1.6444, "step": 284 }, { "epoch": 0.04560729716754681, "grad_norm": 0.3264731168746948, "learning_rate": 5e-05, "loss": 1.6737, "step": 285 }, { "epoch": 0.045767322771643464, "grad_norm": 0.3492686152458191, "learning_rate": 5e-05, "loss": 1.6478, "step": 286 }, { "epoch": 0.04592734837574012, "grad_norm": 0.32337069511413574, "learning_rate": 5e-05, "loss": 1.6279, "step": 287 }, { "epoch": 0.046087373979836775, "grad_norm": 0.3202260434627533, "learning_rate": 5e-05, "loss": 1.6296, "step": 288 }, { "epoch": 0.04624739958393343, "grad_norm": 0.30770745873451233, "learning_rate": 5e-05, "loss": 1.6169, "step": 289 }, { "epoch": 0.04640742518803009, "grad_norm": 0.3085482120513916, "learning_rate": 5e-05, "loss": 1.6024, "step": 290 }, { "epoch": 0.04656745079212674, "grad_norm": 0.30687415599823, "learning_rate": 5e-05, "loss": 1.6439, "step": 291 }, { "epoch": 0.0467274763962234, "grad_norm": 0.30679792165756226, "learning_rate": 5e-05, "loss": 1.5483, "step": 292 }, { "epoch": 0.046887502000320054, "grad_norm": 0.31988418102264404, "learning_rate": 5e-05, "loss": 1.7837, "step": 293 }, { "epoch": 0.04704752760441671, "grad_norm": 0.29938483238220215, "learning_rate": 5e-05, "loss": 1.5494, "step": 294 }, { "epoch": 0.04720755320851336, "grad_norm": 0.32384344935417175, "learning_rate": 5e-05, "loss": 1.6281, "step": 295 }, { "epoch": 0.047367578812610014, "grad_norm": 0.31696203351020813, "learning_rate": 5e-05, "loss": 1.5842, "step": 296 }, { "epoch": 0.04752760441670667, "grad_norm": 0.3400418162345886, "learning_rate": 5e-05, "loss": 1.6951, "step": 297 }, { "epoch": 0.047687630020803326, "grad_norm": 0.31100594997406006, "learning_rate": 5e-05, "loss": 1.6318, "step": 298 }, { "epoch": 0.04784765562489998, "grad_norm": 0.34002113342285156, "learning_rate": 5e-05, "loss": 1.782, "step": 299 }, { "epoch": 0.04800768122899664, "grad_norm": 0.3297704756259918, "learning_rate": 5e-05, "loss": 1.7131, "step": 300 }, { "epoch": 0.04816770683309329, "grad_norm": 0.3271639347076416, "learning_rate": 5e-05, "loss": 1.7288, "step": 301 }, { "epoch": 0.04832773243718995, "grad_norm": 0.31002476811408997, "learning_rate": 5e-05, "loss": 1.674, "step": 302 }, { "epoch": 0.048487758041286605, "grad_norm": 0.34394386410713196, "learning_rate": 5e-05, "loss": 1.6507, "step": 303 }, { "epoch": 0.04864778364538326, "grad_norm": 0.29716262221336365, "learning_rate": 5e-05, "loss": 1.5518, "step": 304 }, { "epoch": 0.048807809249479917, "grad_norm": 0.3297494649887085, "learning_rate": 5e-05, "loss": 1.7062, "step": 305 }, { "epoch": 0.04896783485357657, "grad_norm": 0.2993728816509247, "learning_rate": 5e-05, "loss": 1.5501, "step": 306 }, { "epoch": 0.04912786045767323, "grad_norm": 0.30640122294425964, "learning_rate": 5e-05, "loss": 1.6599, "step": 307 }, { "epoch": 0.049287886061769884, "grad_norm": 0.30699095129966736, "learning_rate": 5e-05, "loss": 1.608, "step": 308 }, { "epoch": 0.04944791166586654, "grad_norm": 0.3058755099773407, "learning_rate": 5e-05, "loss": 1.6743, "step": 309 }, { "epoch": 0.049607937269963195, "grad_norm": 0.34107232093811035, "learning_rate": 5e-05, "loss": 1.5594, "step": 310 }, { "epoch": 0.04976796287405985, "grad_norm": 0.2993842363357544, "learning_rate": 5e-05, "loss": 1.6153, "step": 311 }, { "epoch": 0.04992798847815651, "grad_norm": 0.32788172364234924, "learning_rate": 5e-05, "loss": 1.7052, "step": 312 }, { "epoch": 0.05008801408225316, "grad_norm": 0.31073957681655884, "learning_rate": 5e-05, "loss": 1.7071, "step": 313 }, { "epoch": 0.05024803968634982, "grad_norm": 0.31214576959609985, "learning_rate": 5e-05, "loss": 1.6562, "step": 314 }, { "epoch": 0.050408065290446474, "grad_norm": 0.3168680965900421, "learning_rate": 5e-05, "loss": 1.6346, "step": 315 }, { "epoch": 0.05056809089454313, "grad_norm": 0.3206835687160492, "learning_rate": 5e-05, "loss": 1.6136, "step": 316 }, { "epoch": 0.05072811649863978, "grad_norm": 0.3258902132511139, "learning_rate": 5e-05, "loss": 1.6188, "step": 317 }, { "epoch": 0.050888142102736435, "grad_norm": 0.3188284933567047, "learning_rate": 5e-05, "loss": 1.643, "step": 318 }, { "epoch": 0.05104816770683309, "grad_norm": 0.3232021629810333, "learning_rate": 5e-05, "loss": 1.6496, "step": 319 }, { "epoch": 0.051208193310929746, "grad_norm": 0.33951708674430847, "learning_rate": 5e-05, "loss": 1.7217, "step": 320 }, { "epoch": 0.0513682189150264, "grad_norm": 0.3004438877105713, "learning_rate": 5e-05, "loss": 1.5929, "step": 321 }, { "epoch": 0.05152824451912306, "grad_norm": 0.3212479054927826, "learning_rate": 5e-05, "loss": 1.7303, "step": 322 }, { "epoch": 0.051688270123219714, "grad_norm": 0.3147618770599365, "learning_rate": 5e-05, "loss": 1.6717, "step": 323 }, { "epoch": 0.05184829572731637, "grad_norm": 0.3240668773651123, "learning_rate": 5e-05, "loss": 1.7071, "step": 324 }, { "epoch": 0.052008321331413025, "grad_norm": 0.3183201551437378, "learning_rate": 5e-05, "loss": 1.6405, "step": 325 }, { "epoch": 0.05216834693550968, "grad_norm": 0.32996898889541626, "learning_rate": 5e-05, "loss": 1.6528, "step": 326 }, { "epoch": 0.05232837253960634, "grad_norm": 0.30380886793136597, "learning_rate": 5e-05, "loss": 1.6051, "step": 327 }, { "epoch": 0.05248839814370299, "grad_norm": 0.3298283517360687, "learning_rate": 5e-05, "loss": 1.711, "step": 328 }, { "epoch": 0.05264842374779965, "grad_norm": 0.3309849500656128, "learning_rate": 5e-05, "loss": 1.6799, "step": 329 }, { "epoch": 0.052808449351896304, "grad_norm": 0.3111005127429962, "learning_rate": 5e-05, "loss": 1.6162, "step": 330 }, { "epoch": 0.05296847495599296, "grad_norm": 0.3155026435852051, "learning_rate": 5e-05, "loss": 1.5553, "step": 331 }, { "epoch": 0.053128500560089616, "grad_norm": 0.31975892186164856, "learning_rate": 5e-05, "loss": 1.6698, "step": 332 }, { "epoch": 0.05328852616418627, "grad_norm": 0.30563557147979736, "learning_rate": 5e-05, "loss": 1.6222, "step": 333 }, { "epoch": 0.05344855176828293, "grad_norm": 0.3106932044029236, "learning_rate": 5e-05, "loss": 1.7329, "step": 334 }, { "epoch": 0.05360857737237958, "grad_norm": 0.3068872094154358, "learning_rate": 5e-05, "loss": 1.6391, "step": 335 }, { "epoch": 0.05376860297647624, "grad_norm": 0.30709147453308105, "learning_rate": 5e-05, "loss": 1.6324, "step": 336 }, { "epoch": 0.053928628580572895, "grad_norm": 0.3270563781261444, "learning_rate": 5e-05, "loss": 1.7256, "step": 337 }, { "epoch": 0.05408865418466955, "grad_norm": 0.2918051779270172, "learning_rate": 5e-05, "loss": 1.5987, "step": 338 }, { "epoch": 0.0542486797887662, "grad_norm": 0.3009319603443146, "learning_rate": 5e-05, "loss": 1.5495, "step": 339 }, { "epoch": 0.054408705392862855, "grad_norm": 0.30070760846138, "learning_rate": 5e-05, "loss": 1.6071, "step": 340 }, { "epoch": 0.05456873099695951, "grad_norm": 0.31355828046798706, "learning_rate": 5e-05, "loss": 1.669, "step": 341 }, { "epoch": 0.05472875660105617, "grad_norm": 0.3147980272769928, "learning_rate": 5e-05, "loss": 1.5778, "step": 342 }, { "epoch": 0.05488878220515282, "grad_norm": 0.31016767024993896, "learning_rate": 5e-05, "loss": 1.6241, "step": 343 }, { "epoch": 0.05504880780924948, "grad_norm": 0.3096460700035095, "learning_rate": 5e-05, "loss": 1.6729, "step": 344 }, { "epoch": 0.055208833413346134, "grad_norm": 0.30558985471725464, "learning_rate": 5e-05, "loss": 1.5989, "step": 345 }, { "epoch": 0.05536885901744279, "grad_norm": 0.30161720514297485, "learning_rate": 5e-05, "loss": 1.6207, "step": 346 }, { "epoch": 0.055528884621539445, "grad_norm": 0.356425404548645, "learning_rate": 5e-05, "loss": 1.7631, "step": 347 }, { "epoch": 0.0556889102256361, "grad_norm": 0.3047163486480713, "learning_rate": 5e-05, "loss": 1.6351, "step": 348 }, { "epoch": 0.05584893582973276, "grad_norm": 0.3016068637371063, "learning_rate": 5e-05, "loss": 1.5588, "step": 349 }, { "epoch": 0.05600896143382941, "grad_norm": 0.30792728066444397, "learning_rate": 5e-05, "loss": 1.6106, "step": 350 }, { "epoch": 0.05616898703792607, "grad_norm": 0.3088272213935852, "learning_rate": 5e-05, "loss": 1.6641, "step": 351 }, { "epoch": 0.056329012642022724, "grad_norm": 0.2982586622238159, "learning_rate": 5e-05, "loss": 1.6501, "step": 352 }, { "epoch": 0.05648903824611938, "grad_norm": 0.30695587396621704, "learning_rate": 5e-05, "loss": 1.6368, "step": 353 }, { "epoch": 0.056649063850216036, "grad_norm": 0.32044368982315063, "learning_rate": 5e-05, "loss": 1.6555, "step": 354 }, { "epoch": 0.05680908945431269, "grad_norm": 0.2913132607936859, "learning_rate": 5e-05, "loss": 1.6122, "step": 355 }, { "epoch": 0.05696911505840935, "grad_norm": 0.3355959951877594, "learning_rate": 5e-05, "loss": 1.5676, "step": 356 }, { "epoch": 0.057129140662506, "grad_norm": 0.31542283296585083, "learning_rate": 5e-05, "loss": 1.5311, "step": 357 }, { "epoch": 0.05728916626660266, "grad_norm": 0.30560043454170227, "learning_rate": 5e-05, "loss": 1.5822, "step": 358 }, { "epoch": 0.057449191870699315, "grad_norm": 0.3020707070827484, "learning_rate": 5e-05, "loss": 1.5806, "step": 359 }, { "epoch": 0.05760921747479597, "grad_norm": 0.3249915540218353, "learning_rate": 5e-05, "loss": 1.6592, "step": 360 }, { "epoch": 0.05776924307889262, "grad_norm": 0.36068636178970337, "learning_rate": 5e-05, "loss": 1.7081, "step": 361 }, { "epoch": 0.057929268682989275, "grad_norm": 0.3292378783226013, "learning_rate": 5e-05, "loss": 1.6903, "step": 362 }, { "epoch": 0.05808929428708593, "grad_norm": 0.30144280195236206, "learning_rate": 5e-05, "loss": 1.5554, "step": 363 }, { "epoch": 0.05824931989118259, "grad_norm": 0.33675411343574524, "learning_rate": 5e-05, "loss": 1.6147, "step": 364 }, { "epoch": 0.05840934549527924, "grad_norm": 0.3182026743888855, "learning_rate": 5e-05, "loss": 1.6112, "step": 365 }, { "epoch": 0.0585693710993759, "grad_norm": 0.34219294786453247, "learning_rate": 5e-05, "loss": 1.5925, "step": 366 }, { "epoch": 0.058729396703472554, "grad_norm": 0.3494414687156677, "learning_rate": 5e-05, "loss": 1.6066, "step": 367 }, { "epoch": 0.05888942230756921, "grad_norm": 0.3402416408061981, "learning_rate": 5e-05, "loss": 1.6209, "step": 368 }, { "epoch": 0.059049447911665866, "grad_norm": 0.31463149189949036, "learning_rate": 5e-05, "loss": 1.6382, "step": 369 }, { "epoch": 0.05920947351576252, "grad_norm": 0.3591335117816925, "learning_rate": 5e-05, "loss": 1.7039, "step": 370 }, { "epoch": 0.05936949911985918, "grad_norm": 0.3076695203781128, "learning_rate": 5e-05, "loss": 1.5914, "step": 371 }, { "epoch": 0.05952952472395583, "grad_norm": 0.3070884644985199, "learning_rate": 5e-05, "loss": 1.6106, "step": 372 }, { "epoch": 0.05968955032805249, "grad_norm": 0.3196181356906891, "learning_rate": 5e-05, "loss": 1.6541, "step": 373 }, { "epoch": 0.059849575932149145, "grad_norm": 0.30078333616256714, "learning_rate": 5e-05, "loss": 1.5829, "step": 374 }, { "epoch": 0.0600096015362458, "grad_norm": 0.30526861548423767, "learning_rate": 5e-05, "loss": 1.6671, "step": 375 }, { "epoch": 0.060169627140342456, "grad_norm": 0.30642178654670715, "learning_rate": 5e-05, "loss": 1.6319, "step": 376 }, { "epoch": 0.06032965274443911, "grad_norm": 0.2930714190006256, "learning_rate": 5e-05, "loss": 1.6262, "step": 377 }, { "epoch": 0.06048967834853577, "grad_norm": 0.31121599674224854, "learning_rate": 5e-05, "loss": 1.5891, "step": 378 }, { "epoch": 0.060649703952632424, "grad_norm": 0.2973695993423462, "learning_rate": 5e-05, "loss": 1.6216, "step": 379 }, { "epoch": 0.06080972955672908, "grad_norm": 0.3124559819698334, "learning_rate": 5e-05, "loss": 1.6127, "step": 380 }, { "epoch": 0.060969755160825735, "grad_norm": 0.33452746272087097, "learning_rate": 5e-05, "loss": 1.6193, "step": 381 }, { "epoch": 0.06112978076492239, "grad_norm": 0.3232598304748535, "learning_rate": 5e-05, "loss": 1.5383, "step": 382 }, { "epoch": 0.06128980636901904, "grad_norm": 0.34293997287750244, "learning_rate": 5e-05, "loss": 1.6845, "step": 383 }, { "epoch": 0.061449831973115696, "grad_norm": 0.29233554005622864, "learning_rate": 5e-05, "loss": 1.5832, "step": 384 }, { "epoch": 0.06160985757721235, "grad_norm": 0.3192179799079895, "learning_rate": 5e-05, "loss": 1.7132, "step": 385 }, { "epoch": 0.06176988318130901, "grad_norm": 0.2973754405975342, "learning_rate": 5e-05, "loss": 1.5886, "step": 386 }, { "epoch": 0.06192990878540566, "grad_norm": 0.3051092326641083, "learning_rate": 5e-05, "loss": 1.6297, "step": 387 }, { "epoch": 0.06208993438950232, "grad_norm": 0.30153220891952515, "learning_rate": 5e-05, "loss": 1.6352, "step": 388 }, { "epoch": 0.062249959993598974, "grad_norm": 0.3163852095603943, "learning_rate": 5e-05, "loss": 1.6411, "step": 389 }, { "epoch": 0.06240998559769563, "grad_norm": 0.3260617256164551, "learning_rate": 5e-05, "loss": 1.6296, "step": 390 }, { "epoch": 0.06257001120179229, "grad_norm": 0.30562207102775574, "learning_rate": 5e-05, "loss": 1.6166, "step": 391 }, { "epoch": 0.06273003680588894, "grad_norm": 0.3148517608642578, "learning_rate": 5e-05, "loss": 1.6651, "step": 392 }, { "epoch": 0.0628900624099856, "grad_norm": 0.29531407356262207, "learning_rate": 5e-05, "loss": 1.5874, "step": 393 }, { "epoch": 0.06305008801408225, "grad_norm": 0.3246009051799774, "learning_rate": 5e-05, "loss": 1.6542, "step": 394 }, { "epoch": 0.06321011361817891, "grad_norm": 0.30006006360054016, "learning_rate": 5e-05, "loss": 1.5734, "step": 395 }, { "epoch": 0.06337013922227556, "grad_norm": 0.30511030554771423, "learning_rate": 5e-05, "loss": 1.5887, "step": 396 }, { "epoch": 0.06353016482637222, "grad_norm": 0.29986414313316345, "learning_rate": 5e-05, "loss": 1.55, "step": 397 }, { "epoch": 0.06369019043046888, "grad_norm": 0.31718286871910095, "learning_rate": 5e-05, "loss": 1.6851, "step": 398 }, { "epoch": 0.06385021603456553, "grad_norm": 0.31064942479133606, "learning_rate": 5e-05, "loss": 1.5796, "step": 399 }, { "epoch": 0.06401024163866219, "grad_norm": 0.30833685398101807, "learning_rate": 5e-05, "loss": 1.6346, "step": 400 }, { "epoch": 0.06417026724275884, "grad_norm": 0.3086819350719452, "learning_rate": 5e-05, "loss": 1.5972, "step": 401 }, { "epoch": 0.0643302928468555, "grad_norm": 0.3278833329677582, "learning_rate": 5e-05, "loss": 1.704, "step": 402 }, { "epoch": 0.06449031845095216, "grad_norm": 0.30956339836120605, "learning_rate": 5e-05, "loss": 1.5315, "step": 403 }, { "epoch": 0.06465034405504881, "grad_norm": 0.3183640241622925, "learning_rate": 5e-05, "loss": 1.6798, "step": 404 }, { "epoch": 0.06481036965914547, "grad_norm": 0.3333209156990051, "learning_rate": 5e-05, "loss": 1.6651, "step": 405 }, { "epoch": 0.06497039526324212, "grad_norm": 0.32257547974586487, "learning_rate": 5e-05, "loss": 1.6474, "step": 406 }, { "epoch": 0.06513042086733878, "grad_norm": 0.3042536973953247, "learning_rate": 5e-05, "loss": 1.6542, "step": 407 }, { "epoch": 0.06529044647143543, "grad_norm": 0.3078743517398834, "learning_rate": 5e-05, "loss": 1.6469, "step": 408 }, { "epoch": 0.06545047207553209, "grad_norm": 0.3333624601364136, "learning_rate": 5e-05, "loss": 1.6423, "step": 409 }, { "epoch": 0.06561049767962875, "grad_norm": 0.31877976655960083, "learning_rate": 5e-05, "loss": 1.6481, "step": 410 }, { "epoch": 0.0657705232837254, "grad_norm": 0.33101609349250793, "learning_rate": 5e-05, "loss": 1.5614, "step": 411 }, { "epoch": 0.06593054888782206, "grad_norm": 0.3240489959716797, "learning_rate": 5e-05, "loss": 1.6541, "step": 412 }, { "epoch": 0.06609057449191871, "grad_norm": 0.3104690611362457, "learning_rate": 5e-05, "loss": 1.6436, "step": 413 }, { "epoch": 0.06625060009601537, "grad_norm": 0.35985425114631653, "learning_rate": 5e-05, "loss": 1.6126, "step": 414 }, { "epoch": 0.06641062570011202, "grad_norm": 0.3116972744464874, "learning_rate": 5e-05, "loss": 1.6612, "step": 415 }, { "epoch": 0.06657065130420867, "grad_norm": 0.32358917593955994, "learning_rate": 5e-05, "loss": 1.606, "step": 416 }, { "epoch": 0.06673067690830532, "grad_norm": 0.2943195104598999, "learning_rate": 5e-05, "loss": 1.514, "step": 417 }, { "epoch": 0.06689070251240198, "grad_norm": 0.30201664566993713, "learning_rate": 5e-05, "loss": 1.5935, "step": 418 }, { "epoch": 0.06705072811649863, "grad_norm": 0.3508095443248749, "learning_rate": 5e-05, "loss": 1.6032, "step": 419 }, { "epoch": 0.06721075372059529, "grad_norm": 0.3169856369495392, "learning_rate": 5e-05, "loss": 1.7004, "step": 420 }, { "epoch": 0.06737077932469195, "grad_norm": 0.33313778042793274, "learning_rate": 5e-05, "loss": 1.6565, "step": 421 }, { "epoch": 0.0675308049287886, "grad_norm": 0.2959515452384949, "learning_rate": 5e-05, "loss": 1.6154, "step": 422 }, { "epoch": 0.06769083053288526, "grad_norm": 0.3041284680366516, "learning_rate": 5e-05, "loss": 1.6075, "step": 423 }, { "epoch": 0.06785085613698191, "grad_norm": 0.30414479970932007, "learning_rate": 5e-05, "loss": 1.5803, "step": 424 }, { "epoch": 0.06801088174107857, "grad_norm": 0.30508407950401306, "learning_rate": 5e-05, "loss": 1.6435, "step": 425 }, { "epoch": 0.06817090734517522, "grad_norm": 0.29645824432373047, "learning_rate": 5e-05, "loss": 1.6091, "step": 426 }, { "epoch": 0.06833093294927188, "grad_norm": 0.30009499192237854, "learning_rate": 5e-05, "loss": 1.6792, "step": 427 }, { "epoch": 0.06849095855336854, "grad_norm": 0.3132977783679962, "learning_rate": 5e-05, "loss": 1.6559, "step": 428 }, { "epoch": 0.06865098415746519, "grad_norm": 0.3054090440273285, "learning_rate": 5e-05, "loss": 1.6253, "step": 429 }, { "epoch": 0.06881100976156185, "grad_norm": 0.32166406512260437, "learning_rate": 5e-05, "loss": 1.5814, "step": 430 }, { "epoch": 0.0689710353656585, "grad_norm": 0.2989833950996399, "learning_rate": 5e-05, "loss": 1.6052, "step": 431 }, { "epoch": 0.06913106096975516, "grad_norm": 0.3403615653514862, "learning_rate": 5e-05, "loss": 1.6598, "step": 432 }, { "epoch": 0.06929108657385182, "grad_norm": 0.2992939054965973, "learning_rate": 5e-05, "loss": 1.6522, "step": 433 }, { "epoch": 0.06945111217794847, "grad_norm": 0.3004363775253296, "learning_rate": 5e-05, "loss": 1.5697, "step": 434 }, { "epoch": 0.06961113778204513, "grad_norm": 0.31069472432136536, "learning_rate": 5e-05, "loss": 1.6652, "step": 435 }, { "epoch": 0.06977116338614178, "grad_norm": 0.2941669821739197, "learning_rate": 5e-05, "loss": 1.6096, "step": 436 }, { "epoch": 0.06993118899023844, "grad_norm": 0.3046569526195526, "learning_rate": 5e-05, "loss": 1.6028, "step": 437 }, { "epoch": 0.0700912145943351, "grad_norm": 0.30669739842414856, "learning_rate": 5e-05, "loss": 1.5825, "step": 438 }, { "epoch": 0.07025124019843175, "grad_norm": 0.29633277654647827, "learning_rate": 5e-05, "loss": 1.6279, "step": 439 }, { "epoch": 0.0704112658025284, "grad_norm": 0.31028085947036743, "learning_rate": 5e-05, "loss": 1.6134, "step": 440 }, { "epoch": 0.07057129140662506, "grad_norm": 0.3101019859313965, "learning_rate": 5e-05, "loss": 1.5338, "step": 441 }, { "epoch": 0.07073131701072172, "grad_norm": 0.30048006772994995, "learning_rate": 5e-05, "loss": 1.6276, "step": 442 }, { "epoch": 0.07089134261481837, "grad_norm": 0.3083195984363556, "learning_rate": 5e-05, "loss": 1.6523, "step": 443 }, { "epoch": 0.07105136821891503, "grad_norm": 0.30053335428237915, "learning_rate": 5e-05, "loss": 1.6176, "step": 444 }, { "epoch": 0.07121139382301168, "grad_norm": 0.3119806945323944, "learning_rate": 5e-05, "loss": 1.5309, "step": 445 }, { "epoch": 0.07137141942710834, "grad_norm": 0.30943524837493896, "learning_rate": 5e-05, "loss": 1.6201, "step": 446 }, { "epoch": 0.071531445031205, "grad_norm": 0.3038862347602844, "learning_rate": 5e-05, "loss": 1.651, "step": 447 }, { "epoch": 0.07169147063530165, "grad_norm": 0.3074871301651001, "learning_rate": 5e-05, "loss": 1.6256, "step": 448 }, { "epoch": 0.07185149623939831, "grad_norm": 0.29839396476745605, "learning_rate": 5e-05, "loss": 1.5858, "step": 449 }, { "epoch": 0.07201152184349496, "grad_norm": 0.3066733777523041, "learning_rate": 5e-05, "loss": 1.6321, "step": 450 }, { "epoch": 0.07217154744759162, "grad_norm": 0.30604153871536255, "learning_rate": 5e-05, "loss": 1.563, "step": 451 }, { "epoch": 0.07233157305168827, "grad_norm": 0.31282612681388855, "learning_rate": 5e-05, "loss": 1.6211, "step": 452 }, { "epoch": 0.07249159865578493, "grad_norm": 0.30069202184677124, "learning_rate": 5e-05, "loss": 1.5703, "step": 453 }, { "epoch": 0.07265162425988159, "grad_norm": 0.34988269209861755, "learning_rate": 5e-05, "loss": 1.6284, "step": 454 }, { "epoch": 0.07281164986397824, "grad_norm": 0.3280847370624542, "learning_rate": 5e-05, "loss": 1.6481, "step": 455 }, { "epoch": 0.0729716754680749, "grad_norm": 0.3227294385433197, "learning_rate": 5e-05, "loss": 1.6084, "step": 456 }, { "epoch": 0.07313170107217155, "grad_norm": 0.310819536447525, "learning_rate": 5e-05, "loss": 1.6123, "step": 457 }, { "epoch": 0.07329172667626821, "grad_norm": 0.3199651837348938, "learning_rate": 5e-05, "loss": 1.6401, "step": 458 }, { "epoch": 0.07345175228036485, "grad_norm": 0.3017299473285675, "learning_rate": 5e-05, "loss": 1.6517, "step": 459 }, { "epoch": 0.07361177788446151, "grad_norm": 0.29970991611480713, "learning_rate": 5e-05, "loss": 1.6238, "step": 460 }, { "epoch": 0.07377180348855816, "grad_norm": 0.3139737844467163, "learning_rate": 5e-05, "loss": 1.6195, "step": 461 }, { "epoch": 0.07393182909265482, "grad_norm": 0.30827587842941284, "learning_rate": 5e-05, "loss": 1.6969, "step": 462 }, { "epoch": 0.07409185469675147, "grad_norm": 0.2969421446323395, "learning_rate": 5e-05, "loss": 1.594, "step": 463 }, { "epoch": 0.07425188030084813, "grad_norm": 0.2972060441970825, "learning_rate": 5e-05, "loss": 1.5703, "step": 464 }, { "epoch": 0.07441190590494479, "grad_norm": 0.3118858337402344, "learning_rate": 5e-05, "loss": 1.6933, "step": 465 }, { "epoch": 0.07457193150904144, "grad_norm": 0.3136165738105774, "learning_rate": 5e-05, "loss": 1.6082, "step": 466 }, { "epoch": 0.0747319571131381, "grad_norm": 0.3064090609550476, "learning_rate": 5e-05, "loss": 1.6631, "step": 467 }, { "epoch": 0.07489198271723475, "grad_norm": 0.30154550075531006, "learning_rate": 5e-05, "loss": 1.6581, "step": 468 }, { "epoch": 0.07505200832133141, "grad_norm": 0.3014170825481415, "learning_rate": 5e-05, "loss": 1.6497, "step": 469 }, { "epoch": 0.07521203392542807, "grad_norm": 0.2906557619571686, "learning_rate": 5e-05, "loss": 1.6258, "step": 470 }, { "epoch": 0.07537205952952472, "grad_norm": 0.3035149574279785, "learning_rate": 5e-05, "loss": 1.7115, "step": 471 }, { "epoch": 0.07553208513362138, "grad_norm": 0.31138017773628235, "learning_rate": 5e-05, "loss": 1.5853, "step": 472 }, { "epoch": 0.07569211073771803, "grad_norm": 0.2973853647708893, "learning_rate": 5e-05, "loss": 1.6013, "step": 473 }, { "epoch": 0.07585213634181469, "grad_norm": 0.3112841248512268, "learning_rate": 5e-05, "loss": 1.6747, "step": 474 }, { "epoch": 0.07601216194591134, "grad_norm": 0.3140740990638733, "learning_rate": 5e-05, "loss": 1.6667, "step": 475 }, { "epoch": 0.076172187550008, "grad_norm": 0.30891337990760803, "learning_rate": 5e-05, "loss": 1.6049, "step": 476 }, { "epoch": 0.07633221315410466, "grad_norm": 0.2981501817703247, "learning_rate": 5e-05, "loss": 1.6267, "step": 477 }, { "epoch": 0.07649223875820131, "grad_norm": 0.33325743675231934, "learning_rate": 5e-05, "loss": 1.6724, "step": 478 }, { "epoch": 0.07665226436229797, "grad_norm": 0.307003915309906, "learning_rate": 5e-05, "loss": 1.6703, "step": 479 }, { "epoch": 0.07681228996639462, "grad_norm": 0.2963173985481262, "learning_rate": 5e-05, "loss": 1.6102, "step": 480 }, { "epoch": 0.07697231557049128, "grad_norm": 0.31343507766723633, "learning_rate": 5e-05, "loss": 1.6261, "step": 481 }, { "epoch": 0.07713234117458793, "grad_norm": 0.303345650434494, "learning_rate": 5e-05, "loss": 1.5945, "step": 482 }, { "epoch": 0.07729236677868459, "grad_norm": 0.3250291645526886, "learning_rate": 5e-05, "loss": 1.6167, "step": 483 }, { "epoch": 0.07745239238278125, "grad_norm": 0.29382604360580444, "learning_rate": 5e-05, "loss": 1.6638, "step": 484 }, { "epoch": 0.0776124179868779, "grad_norm": 0.30511409044265747, "learning_rate": 5e-05, "loss": 1.6173, "step": 485 }, { "epoch": 0.07777244359097456, "grad_norm": 0.3163903057575226, "learning_rate": 5e-05, "loss": 1.6619, "step": 486 }, { "epoch": 0.07793246919507121, "grad_norm": 0.30982688069343567, "learning_rate": 5e-05, "loss": 1.6769, "step": 487 }, { "epoch": 0.07809249479916787, "grad_norm": 0.30622681975364685, "learning_rate": 5e-05, "loss": 1.6238, "step": 488 }, { "epoch": 0.07825252040326452, "grad_norm": 0.32685887813568115, "learning_rate": 5e-05, "loss": 1.6483, "step": 489 }, { "epoch": 0.07841254600736118, "grad_norm": 0.31251615285873413, "learning_rate": 5e-05, "loss": 1.6226, "step": 490 }, { "epoch": 0.07857257161145784, "grad_norm": 0.3242247402667999, "learning_rate": 5e-05, "loss": 1.5925, "step": 491 }, { "epoch": 0.07873259721555449, "grad_norm": 0.3045639395713806, "learning_rate": 5e-05, "loss": 1.5839, "step": 492 }, { "epoch": 0.07889262281965115, "grad_norm": 0.295484334230423, "learning_rate": 5e-05, "loss": 1.6115, "step": 493 }, { "epoch": 0.0790526484237478, "grad_norm": 0.3142814040184021, "learning_rate": 5e-05, "loss": 1.6479, "step": 494 }, { "epoch": 0.07921267402784446, "grad_norm": 0.2923520505428314, "learning_rate": 5e-05, "loss": 1.6455, "step": 495 }, { "epoch": 0.07937269963194112, "grad_norm": 0.3190021812915802, "learning_rate": 5e-05, "loss": 1.7293, "step": 496 }, { "epoch": 0.07953272523603777, "grad_norm": 0.2929304838180542, "learning_rate": 5e-05, "loss": 1.5523, "step": 497 }, { "epoch": 0.07969275084013443, "grad_norm": 0.30301862955093384, "learning_rate": 5e-05, "loss": 1.5865, "step": 498 }, { "epoch": 0.07985277644423108, "grad_norm": 0.30172595381736755, "learning_rate": 5e-05, "loss": 1.6172, "step": 499 }, { "epoch": 0.08001280204832774, "grad_norm": 0.3047492504119873, "learning_rate": 5e-05, "loss": 1.6181, "step": 500 }, { "epoch": 0.0801728276524244, "grad_norm": 0.30405908823013306, "learning_rate": 5e-05, "loss": 1.6259, "step": 501 }, { "epoch": 0.08033285325652105, "grad_norm": 0.2988501787185669, "learning_rate": 5e-05, "loss": 1.5289, "step": 502 }, { "epoch": 0.08049287886061769, "grad_norm": 0.35212522745132446, "learning_rate": 5e-05, "loss": 1.6072, "step": 503 }, { "epoch": 0.08065290446471435, "grad_norm": 0.31318211555480957, "learning_rate": 5e-05, "loss": 1.622, "step": 504 }, { "epoch": 0.080812930068811, "grad_norm": 0.3376525342464447, "learning_rate": 5e-05, "loss": 1.5449, "step": 505 }, { "epoch": 0.08097295567290766, "grad_norm": 0.31171175837516785, "learning_rate": 5e-05, "loss": 1.6303, "step": 506 }, { "epoch": 0.08113298127700432, "grad_norm": 0.29472944140434265, "learning_rate": 5e-05, "loss": 1.4624, "step": 507 }, { "epoch": 0.08129300688110097, "grad_norm": 0.3490871787071228, "learning_rate": 5e-05, "loss": 1.6248, "step": 508 }, { "epoch": 0.08145303248519763, "grad_norm": 0.2955804169178009, "learning_rate": 5e-05, "loss": 1.576, "step": 509 }, { "epoch": 0.08161305808929428, "grad_norm": 0.3170541226863861, "learning_rate": 5e-05, "loss": 1.6904, "step": 510 }, { "epoch": 0.08177308369339094, "grad_norm": 0.3200487494468689, "learning_rate": 5e-05, "loss": 1.5178, "step": 511 }, { "epoch": 0.0819331092974876, "grad_norm": 0.3053659200668335, "learning_rate": 5e-05, "loss": 1.613, "step": 512 }, { "epoch": 0.08209313490158425, "grad_norm": 0.32590317726135254, "learning_rate": 5e-05, "loss": 1.5609, "step": 513 }, { "epoch": 0.0822531605056809, "grad_norm": 0.31826773285865784, "learning_rate": 5e-05, "loss": 1.6638, "step": 514 }, { "epoch": 0.08241318610977756, "grad_norm": 0.2962810695171356, "learning_rate": 5e-05, "loss": 1.6516, "step": 515 }, { "epoch": 0.08257321171387422, "grad_norm": 0.32144632935523987, "learning_rate": 5e-05, "loss": 1.6803, "step": 516 }, { "epoch": 0.08273323731797087, "grad_norm": 0.29181748628616333, "learning_rate": 5e-05, "loss": 1.5871, "step": 517 }, { "epoch": 0.08289326292206753, "grad_norm": 0.3102729022502899, "learning_rate": 5e-05, "loss": 1.6105, "step": 518 }, { "epoch": 0.08305328852616418, "grad_norm": 0.3095885217189789, "learning_rate": 5e-05, "loss": 1.5831, "step": 519 }, { "epoch": 0.08321331413026084, "grad_norm": 0.30283018946647644, "learning_rate": 5e-05, "loss": 1.6125, "step": 520 }, { "epoch": 0.0833733397343575, "grad_norm": 0.32937943935394287, "learning_rate": 5e-05, "loss": 1.6143, "step": 521 }, { "epoch": 0.08353336533845415, "grad_norm": 0.3034827411174774, "learning_rate": 5e-05, "loss": 1.6606, "step": 522 }, { "epoch": 0.08369339094255081, "grad_norm": 0.3071899116039276, "learning_rate": 5e-05, "loss": 1.5682, "step": 523 }, { "epoch": 0.08385341654664746, "grad_norm": 0.3082549571990967, "learning_rate": 5e-05, "loss": 1.6626, "step": 524 }, { "epoch": 0.08401344215074412, "grad_norm": 0.29751110076904297, "learning_rate": 5e-05, "loss": 1.6416, "step": 525 }, { "epoch": 0.08417346775484078, "grad_norm": 0.3091004192829132, "learning_rate": 5e-05, "loss": 1.6031, "step": 526 }, { "epoch": 0.08433349335893743, "grad_norm": 0.313174843788147, "learning_rate": 5e-05, "loss": 1.5892, "step": 527 }, { "epoch": 0.08449351896303409, "grad_norm": 0.29282379150390625, "learning_rate": 5e-05, "loss": 1.5948, "step": 528 }, { "epoch": 0.08465354456713074, "grad_norm": 0.2982713580131531, "learning_rate": 5e-05, "loss": 1.5118, "step": 529 }, { "epoch": 0.0848135701712274, "grad_norm": 0.3193838894367218, "learning_rate": 5e-05, "loss": 1.6381, "step": 530 }, { "epoch": 0.08497359577532405, "grad_norm": 0.29905033111572266, "learning_rate": 5e-05, "loss": 1.6063, "step": 531 }, { "epoch": 0.08513362137942071, "grad_norm": 0.29381588101387024, "learning_rate": 5e-05, "loss": 1.6288, "step": 532 }, { "epoch": 0.08529364698351737, "grad_norm": 0.3103218972682953, "learning_rate": 5e-05, "loss": 1.5908, "step": 533 }, { "epoch": 0.08545367258761402, "grad_norm": 0.3018602728843689, "learning_rate": 5e-05, "loss": 1.6307, "step": 534 }, { "epoch": 0.08561369819171068, "grad_norm": 0.2974036633968353, "learning_rate": 5e-05, "loss": 1.6026, "step": 535 }, { "epoch": 0.08577372379580733, "grad_norm": 0.30462729930877686, "learning_rate": 5e-05, "loss": 1.6214, "step": 536 }, { "epoch": 0.08593374939990399, "grad_norm": 0.3207927644252777, "learning_rate": 5e-05, "loss": 1.6573, "step": 537 }, { "epoch": 0.08609377500400064, "grad_norm": 0.2940155267715454, "learning_rate": 5e-05, "loss": 1.6607, "step": 538 }, { "epoch": 0.0862538006080973, "grad_norm": 0.2918504476547241, "learning_rate": 5e-05, "loss": 1.5579, "step": 539 }, { "epoch": 0.08641382621219396, "grad_norm": 0.31203946471214294, "learning_rate": 5e-05, "loss": 1.6469, "step": 540 }, { "epoch": 0.08657385181629061, "grad_norm": 0.2977239489555359, "learning_rate": 5e-05, "loss": 1.604, "step": 541 }, { "epoch": 0.08673387742038727, "grad_norm": 0.30379021167755127, "learning_rate": 5e-05, "loss": 1.6205, "step": 542 }, { "epoch": 0.08689390302448392, "grad_norm": 0.30764690041542053, "learning_rate": 5e-05, "loss": 1.6596, "step": 543 }, { "epoch": 0.08705392862858058, "grad_norm": 0.30596622824668884, "learning_rate": 5e-05, "loss": 1.5794, "step": 544 }, { "epoch": 0.08721395423267723, "grad_norm": 0.28839874267578125, "learning_rate": 5e-05, "loss": 1.5148, "step": 545 }, { "epoch": 0.08737397983677389, "grad_norm": 0.3119364380836487, "learning_rate": 5e-05, "loss": 1.6513, "step": 546 }, { "epoch": 0.08753400544087053, "grad_norm": 0.3030255138874054, "learning_rate": 5e-05, "loss": 1.5683, "step": 547 }, { "epoch": 0.08769403104496719, "grad_norm": 0.30642467737197876, "learning_rate": 5e-05, "loss": 1.6161, "step": 548 }, { "epoch": 0.08785405664906384, "grad_norm": 0.3138048052787781, "learning_rate": 5e-05, "loss": 1.5799, "step": 549 }, { "epoch": 0.0880140822531605, "grad_norm": 0.3135557770729065, "learning_rate": 5e-05, "loss": 1.6421, "step": 550 }, { "epoch": 0.08817410785725716, "grad_norm": 0.34371834993362427, "learning_rate": 5e-05, "loss": 1.6433, "step": 551 }, { "epoch": 0.08833413346135381, "grad_norm": 0.30437275767326355, "learning_rate": 5e-05, "loss": 1.6282, "step": 552 }, { "epoch": 0.08849415906545047, "grad_norm": 0.3078581690788269, "learning_rate": 5e-05, "loss": 1.7148, "step": 553 }, { "epoch": 0.08865418466954712, "grad_norm": 0.31156641244888306, "learning_rate": 5e-05, "loss": 1.6323, "step": 554 }, { "epoch": 0.08881421027364378, "grad_norm": 0.32717275619506836, "learning_rate": 5e-05, "loss": 1.6564, "step": 555 }, { "epoch": 0.08897423587774043, "grad_norm": 0.30805501341819763, "learning_rate": 5e-05, "loss": 1.6838, "step": 556 }, { "epoch": 0.08913426148183709, "grad_norm": 0.30430367588996887, "learning_rate": 5e-05, "loss": 1.6343, "step": 557 }, { "epoch": 0.08929428708593375, "grad_norm": 0.3103387653827667, "learning_rate": 5e-05, "loss": 1.6365, "step": 558 }, { "epoch": 0.0894543126900304, "grad_norm": 0.28941604495048523, "learning_rate": 5e-05, "loss": 1.5528, "step": 559 }, { "epoch": 0.08961433829412706, "grad_norm": 0.2936164438724518, "learning_rate": 5e-05, "loss": 1.5874, "step": 560 }, { "epoch": 0.08977436389822371, "grad_norm": 0.30763083696365356, "learning_rate": 5e-05, "loss": 1.6064, "step": 561 }, { "epoch": 0.08993438950232037, "grad_norm": 0.29869160056114197, "learning_rate": 5e-05, "loss": 1.5834, "step": 562 }, { "epoch": 0.09009441510641703, "grad_norm": 0.3143494725227356, "learning_rate": 5e-05, "loss": 1.6821, "step": 563 }, { "epoch": 0.09025444071051368, "grad_norm": 0.32525527477264404, "learning_rate": 5e-05, "loss": 1.6197, "step": 564 }, { "epoch": 0.09041446631461034, "grad_norm": 0.2968398332595825, "learning_rate": 5e-05, "loss": 1.5141, "step": 565 }, { "epoch": 0.09057449191870699, "grad_norm": 0.3077828884124756, "learning_rate": 5e-05, "loss": 1.6904, "step": 566 }, { "epoch": 0.09073451752280365, "grad_norm": 0.32664042711257935, "learning_rate": 5e-05, "loss": 1.6339, "step": 567 }, { "epoch": 0.0908945431269003, "grad_norm": 0.29486411809921265, "learning_rate": 5e-05, "loss": 1.5459, "step": 568 }, { "epoch": 0.09105456873099696, "grad_norm": 0.30034953355789185, "learning_rate": 5e-05, "loss": 1.6509, "step": 569 }, { "epoch": 0.09121459433509362, "grad_norm": 0.28983354568481445, "learning_rate": 5e-05, "loss": 1.5521, "step": 570 }, { "epoch": 0.09137461993919027, "grad_norm": 0.29198259115219116, "learning_rate": 5e-05, "loss": 1.6003, "step": 571 }, { "epoch": 0.09153464554328693, "grad_norm": 0.30958980321884155, "learning_rate": 5e-05, "loss": 1.6207, "step": 572 }, { "epoch": 0.09169467114738358, "grad_norm": 0.2983740568161011, "learning_rate": 5e-05, "loss": 1.5756, "step": 573 }, { "epoch": 0.09185469675148024, "grad_norm": 0.31183916330337524, "learning_rate": 5e-05, "loss": 1.6897, "step": 574 }, { "epoch": 0.0920147223555769, "grad_norm": 0.2985614240169525, "learning_rate": 5e-05, "loss": 1.4855, "step": 575 }, { "epoch": 0.09217474795967355, "grad_norm": 0.3104637563228607, "learning_rate": 5e-05, "loss": 1.5998, "step": 576 }, { "epoch": 0.0923347735637702, "grad_norm": 0.30364903807640076, "learning_rate": 5e-05, "loss": 1.6641, "step": 577 }, { "epoch": 0.09249479916786686, "grad_norm": 0.3011847138404846, "learning_rate": 5e-05, "loss": 1.5445, "step": 578 }, { "epoch": 0.09265482477196352, "grad_norm": 0.35061657428741455, "learning_rate": 5e-05, "loss": 1.5909, "step": 579 }, { "epoch": 0.09281485037606017, "grad_norm": 0.31468790769577026, "learning_rate": 5e-05, "loss": 1.6186, "step": 580 }, { "epoch": 0.09297487598015683, "grad_norm": 0.3174196779727936, "learning_rate": 5e-05, "loss": 1.5419, "step": 581 }, { "epoch": 0.09313490158425348, "grad_norm": 0.30751723051071167, "learning_rate": 5e-05, "loss": 1.7276, "step": 582 }, { "epoch": 0.09329492718835014, "grad_norm": 0.29205605387687683, "learning_rate": 5e-05, "loss": 1.6098, "step": 583 }, { "epoch": 0.0934549527924468, "grad_norm": 0.3095417320728302, "learning_rate": 5e-05, "loss": 1.5969, "step": 584 }, { "epoch": 0.09361497839654345, "grad_norm": 0.28721368312835693, "learning_rate": 5e-05, "loss": 1.6295, "step": 585 }, { "epoch": 0.09377500400064011, "grad_norm": 0.28687378764152527, "learning_rate": 5e-05, "loss": 1.5141, "step": 586 }, { "epoch": 0.09393502960473676, "grad_norm": 0.3082332909107208, "learning_rate": 5e-05, "loss": 1.6536, "step": 587 }, { "epoch": 0.09409505520883342, "grad_norm": 0.30476710200309753, "learning_rate": 5e-05, "loss": 1.6079, "step": 588 }, { "epoch": 0.09425508081293008, "grad_norm": 0.2939762473106384, "learning_rate": 5e-05, "loss": 1.5667, "step": 589 }, { "epoch": 0.09441510641702672, "grad_norm": 0.3143702447414398, "learning_rate": 5e-05, "loss": 1.6841, "step": 590 }, { "epoch": 0.09457513202112337, "grad_norm": 0.296735018491745, "learning_rate": 5e-05, "loss": 1.6316, "step": 591 }, { "epoch": 0.09473515762522003, "grad_norm": 0.3367016911506653, "learning_rate": 5e-05, "loss": 1.6322, "step": 592 }, { "epoch": 0.09489518322931668, "grad_norm": 0.30039671063423157, "learning_rate": 5e-05, "loss": 1.5797, "step": 593 }, { "epoch": 0.09505520883341334, "grad_norm": 0.30832764506340027, "learning_rate": 5e-05, "loss": 1.62, "step": 594 }, { "epoch": 0.09521523443751, "grad_norm": 0.31107205152511597, "learning_rate": 5e-05, "loss": 1.6837, "step": 595 }, { "epoch": 0.09537526004160665, "grad_norm": 0.30939701199531555, "learning_rate": 5e-05, "loss": 1.6519, "step": 596 }, { "epoch": 0.09553528564570331, "grad_norm": 0.30915677547454834, "learning_rate": 5e-05, "loss": 1.6411, "step": 597 }, { "epoch": 0.09569531124979996, "grad_norm": 0.3090711236000061, "learning_rate": 5e-05, "loss": 1.6878, "step": 598 }, { "epoch": 0.09585533685389662, "grad_norm": 0.31171825528144836, "learning_rate": 5e-05, "loss": 1.605, "step": 599 }, { "epoch": 0.09601536245799328, "grad_norm": 0.299827516078949, "learning_rate": 5e-05, "loss": 1.6267, "step": 600 }, { "epoch": 0.09617538806208993, "grad_norm": 0.30313318967819214, "learning_rate": 5e-05, "loss": 1.6849, "step": 601 }, { "epoch": 0.09633541366618659, "grad_norm": 0.29465293884277344, "learning_rate": 5e-05, "loss": 1.572, "step": 602 }, { "epoch": 0.09649543927028324, "grad_norm": 0.30671995878219604, "learning_rate": 5e-05, "loss": 1.6778, "step": 603 }, { "epoch": 0.0966554648743799, "grad_norm": 0.3070688843727112, "learning_rate": 5e-05, "loss": 1.6061, "step": 604 }, { "epoch": 0.09681549047847655, "grad_norm": 0.3005346357822418, "learning_rate": 5e-05, "loss": 1.6676, "step": 605 }, { "epoch": 0.09697551608257321, "grad_norm": 0.2895006239414215, "learning_rate": 5e-05, "loss": 1.5888, "step": 606 }, { "epoch": 0.09713554168666987, "grad_norm": 0.30605000257492065, "learning_rate": 5e-05, "loss": 1.6216, "step": 607 }, { "epoch": 0.09729556729076652, "grad_norm": 0.31832510232925415, "learning_rate": 5e-05, "loss": 1.6528, "step": 608 }, { "epoch": 0.09745559289486318, "grad_norm": 0.29988694190979004, "learning_rate": 5e-05, "loss": 1.5819, "step": 609 }, { "epoch": 0.09761561849895983, "grad_norm": 0.29925698041915894, "learning_rate": 5e-05, "loss": 1.6022, "step": 610 }, { "epoch": 0.09777564410305649, "grad_norm": 0.29428282380104065, "learning_rate": 5e-05, "loss": 1.6255, "step": 611 }, { "epoch": 0.09793566970715314, "grad_norm": 0.304863303899765, "learning_rate": 5e-05, "loss": 1.6488, "step": 612 }, { "epoch": 0.0980956953112498, "grad_norm": 0.2938240170478821, "learning_rate": 5e-05, "loss": 1.5946, "step": 613 }, { "epoch": 0.09825572091534646, "grad_norm": 0.3073767423629761, "learning_rate": 5e-05, "loss": 1.616, "step": 614 }, { "epoch": 0.09841574651944311, "grad_norm": 0.29406803846359253, "learning_rate": 5e-05, "loss": 1.606, "step": 615 }, { "epoch": 0.09857577212353977, "grad_norm": 0.29523563385009766, "learning_rate": 5e-05, "loss": 1.572, "step": 616 }, { "epoch": 0.09873579772763642, "grad_norm": 0.2984640598297119, "learning_rate": 5e-05, "loss": 1.5989, "step": 617 }, { "epoch": 0.09889582333173308, "grad_norm": 0.3199835419654846, "learning_rate": 5e-05, "loss": 1.574, "step": 618 }, { "epoch": 0.09905584893582974, "grad_norm": 0.3105626702308655, "learning_rate": 5e-05, "loss": 1.6198, "step": 619 }, { "epoch": 0.09921587453992639, "grad_norm": 0.3374694287776947, "learning_rate": 5e-05, "loss": 1.6926, "step": 620 }, { "epoch": 0.09937590014402305, "grad_norm": 0.3184521794319153, "learning_rate": 5e-05, "loss": 1.5477, "step": 621 }, { "epoch": 0.0995359257481197, "grad_norm": 0.3123311698436737, "learning_rate": 5e-05, "loss": 1.66, "step": 622 }, { "epoch": 0.09969595135221636, "grad_norm": 0.29385942220687866, "learning_rate": 5e-05, "loss": 1.5907, "step": 623 }, { "epoch": 0.09985597695631301, "grad_norm": 0.30987349152565, "learning_rate": 5e-05, "loss": 1.6403, "step": 624 }, { "epoch": 0.10001600256040967, "grad_norm": 0.2952224612236023, "learning_rate": 5e-05, "loss": 1.5891, "step": 625 }, { "epoch": 0.10017602816450633, "grad_norm": 0.32032114267349243, "learning_rate": 5e-05, "loss": 1.5793, "step": 626 }, { "epoch": 0.10033605376860298, "grad_norm": 0.3112108111381531, "learning_rate": 5e-05, "loss": 1.64, "step": 627 }, { "epoch": 0.10049607937269964, "grad_norm": 0.2905869483947754, "learning_rate": 5e-05, "loss": 1.6115, "step": 628 }, { "epoch": 0.10065610497679629, "grad_norm": 0.2984757125377655, "learning_rate": 5e-05, "loss": 1.5282, "step": 629 }, { "epoch": 0.10081613058089295, "grad_norm": 0.3027442693710327, "learning_rate": 5e-05, "loss": 1.6363, "step": 630 }, { "epoch": 0.1009761561849896, "grad_norm": 0.29338565468788147, "learning_rate": 5e-05, "loss": 1.6198, "step": 631 }, { "epoch": 0.10113618178908626, "grad_norm": 0.28945276141166687, "learning_rate": 5e-05, "loss": 1.5645, "step": 632 }, { "epoch": 0.10129620739318292, "grad_norm": 0.2931254208087921, "learning_rate": 5e-05, "loss": 1.5729, "step": 633 }, { "epoch": 0.10145623299727956, "grad_norm": 0.3042653799057007, "learning_rate": 5e-05, "loss": 1.6756, "step": 634 }, { "epoch": 0.10161625860137621, "grad_norm": 0.2983352243900299, "learning_rate": 5e-05, "loss": 1.5212, "step": 635 }, { "epoch": 0.10177628420547287, "grad_norm": 0.3023688793182373, "learning_rate": 5e-05, "loss": 1.6216, "step": 636 }, { "epoch": 0.10193630980956953, "grad_norm": 0.30821576714515686, "learning_rate": 5e-05, "loss": 1.5761, "step": 637 }, { "epoch": 0.10209633541366618, "grad_norm": 0.31406259536743164, "learning_rate": 5e-05, "loss": 1.6277, "step": 638 }, { "epoch": 0.10225636101776284, "grad_norm": 0.3031347990036011, "learning_rate": 5e-05, "loss": 1.5976, "step": 639 }, { "epoch": 0.10241638662185949, "grad_norm": 0.2982726991176605, "learning_rate": 5e-05, "loss": 1.604, "step": 640 }, { "epoch": 0.10257641222595615, "grad_norm": 0.2966136932373047, "learning_rate": 5e-05, "loss": 1.6223, "step": 641 }, { "epoch": 0.1027364378300528, "grad_norm": 0.29956188797950745, "learning_rate": 5e-05, "loss": 1.6142, "step": 642 }, { "epoch": 0.10289646343414946, "grad_norm": 0.3253490626811981, "learning_rate": 5e-05, "loss": 1.6347, "step": 643 }, { "epoch": 0.10305648903824612, "grad_norm": 0.30956318974494934, "learning_rate": 5e-05, "loss": 1.6119, "step": 644 }, { "epoch": 0.10321651464234277, "grad_norm": 0.3498069643974304, "learning_rate": 5e-05, "loss": 1.576, "step": 645 }, { "epoch": 0.10337654024643943, "grad_norm": 0.3074623644351959, "learning_rate": 5e-05, "loss": 1.6604, "step": 646 }, { "epoch": 0.10353656585053608, "grad_norm": 0.337668776512146, "learning_rate": 5e-05, "loss": 1.597, "step": 647 }, { "epoch": 0.10369659145463274, "grad_norm": 0.2870078980922699, "learning_rate": 5e-05, "loss": 1.4826, "step": 648 }, { "epoch": 0.1038566170587294, "grad_norm": 0.3022780418395996, "learning_rate": 5e-05, "loss": 1.5658, "step": 649 }, { "epoch": 0.10401664266282605, "grad_norm": 0.3061106204986572, "learning_rate": 5e-05, "loss": 1.5899, "step": 650 }, { "epoch": 0.1041766682669227, "grad_norm": 0.2957930266857147, "learning_rate": 5e-05, "loss": 1.5962, "step": 651 }, { "epoch": 0.10433669387101936, "grad_norm": 0.31133976578712463, "learning_rate": 5e-05, "loss": 1.5742, "step": 652 }, { "epoch": 0.10449671947511602, "grad_norm": 0.29941579699516296, "learning_rate": 5e-05, "loss": 1.6604, "step": 653 }, { "epoch": 0.10465674507921267, "grad_norm": 0.29717785120010376, "learning_rate": 5e-05, "loss": 1.6186, "step": 654 }, { "epoch": 0.10481677068330933, "grad_norm": 0.30876776576042175, "learning_rate": 5e-05, "loss": 1.5742, "step": 655 }, { "epoch": 0.10497679628740599, "grad_norm": 0.2806796133518219, "learning_rate": 5e-05, "loss": 1.4879, "step": 656 }, { "epoch": 0.10513682189150264, "grad_norm": 0.2898222506046295, "learning_rate": 5e-05, "loss": 1.6365, "step": 657 }, { "epoch": 0.1052968474955993, "grad_norm": 0.2970595359802246, "learning_rate": 5e-05, "loss": 1.6309, "step": 658 }, { "epoch": 0.10545687309969595, "grad_norm": 0.3056298792362213, "learning_rate": 5e-05, "loss": 1.5662, "step": 659 }, { "epoch": 0.10561689870379261, "grad_norm": 0.2842145562171936, "learning_rate": 5e-05, "loss": 1.5084, "step": 660 }, { "epoch": 0.10577692430788926, "grad_norm": 0.3115387558937073, "learning_rate": 5e-05, "loss": 1.5845, "step": 661 }, { "epoch": 0.10593694991198592, "grad_norm": 0.3064928352832794, "learning_rate": 5e-05, "loss": 1.6666, "step": 662 }, { "epoch": 0.10609697551608258, "grad_norm": 0.2945149838924408, "learning_rate": 5e-05, "loss": 1.5308, "step": 663 }, { "epoch": 0.10625700112017923, "grad_norm": 0.2981780767440796, "learning_rate": 5e-05, "loss": 1.5565, "step": 664 }, { "epoch": 0.10641702672427589, "grad_norm": 0.2928118109703064, "learning_rate": 5e-05, "loss": 1.5891, "step": 665 }, { "epoch": 0.10657705232837254, "grad_norm": 0.2896968424320221, "learning_rate": 5e-05, "loss": 1.5401, "step": 666 }, { "epoch": 0.1067370779324692, "grad_norm": 0.2826479971408844, "learning_rate": 5e-05, "loss": 1.4813, "step": 667 }, { "epoch": 0.10689710353656585, "grad_norm": 0.2900398075580597, "learning_rate": 5e-05, "loss": 1.565, "step": 668 }, { "epoch": 0.10705712914066251, "grad_norm": 0.29468464851379395, "learning_rate": 5e-05, "loss": 1.6203, "step": 669 }, { "epoch": 0.10721715474475917, "grad_norm": 0.29042181372642517, "learning_rate": 5e-05, "loss": 1.5911, "step": 670 }, { "epoch": 0.10737718034885582, "grad_norm": 0.2930859327316284, "learning_rate": 5e-05, "loss": 1.6065, "step": 671 }, { "epoch": 0.10753720595295248, "grad_norm": 0.3008847236633301, "learning_rate": 5e-05, "loss": 1.5952, "step": 672 }, { "epoch": 0.10769723155704913, "grad_norm": 0.2988526225090027, "learning_rate": 5e-05, "loss": 1.6416, "step": 673 }, { "epoch": 0.10785725716114579, "grad_norm": 0.3082142472267151, "learning_rate": 5e-05, "loss": 1.6634, "step": 674 }, { "epoch": 0.10801728276524244, "grad_norm": 0.3118653893470764, "learning_rate": 5e-05, "loss": 1.5896, "step": 675 }, { "epoch": 0.1081773083693391, "grad_norm": 0.30446675419807434, "learning_rate": 5e-05, "loss": 1.5896, "step": 676 }, { "epoch": 0.10833733397343576, "grad_norm": 0.31452280282974243, "learning_rate": 5e-05, "loss": 1.637, "step": 677 }, { "epoch": 0.1084973595775324, "grad_norm": 0.29650694131851196, "learning_rate": 5e-05, "loss": 1.5945, "step": 678 }, { "epoch": 0.10865738518162905, "grad_norm": 0.29441532492637634, "learning_rate": 5e-05, "loss": 1.6904, "step": 679 }, { "epoch": 0.10881741078572571, "grad_norm": 0.31301969289779663, "learning_rate": 5e-05, "loss": 1.6388, "step": 680 }, { "epoch": 0.10897743638982237, "grad_norm": 0.28702476620674133, "learning_rate": 5e-05, "loss": 1.5486, "step": 681 }, { "epoch": 0.10913746199391902, "grad_norm": 0.29066863656044006, "learning_rate": 5e-05, "loss": 1.6355, "step": 682 }, { "epoch": 0.10929748759801568, "grad_norm": 0.2930125892162323, "learning_rate": 5e-05, "loss": 1.5441, "step": 683 }, { "epoch": 0.10945751320211233, "grad_norm": 0.2908686697483063, "learning_rate": 5e-05, "loss": 1.5692, "step": 684 }, { "epoch": 0.10961753880620899, "grad_norm": 0.28955236077308655, "learning_rate": 5e-05, "loss": 1.51, "step": 685 }, { "epoch": 0.10977756441030564, "grad_norm": 0.28564968705177307, "learning_rate": 5e-05, "loss": 1.5401, "step": 686 }, { "epoch": 0.1099375900144023, "grad_norm": 0.2992635667324066, "learning_rate": 5e-05, "loss": 1.5633, "step": 687 }, { "epoch": 0.11009761561849896, "grad_norm": 0.29855698347091675, "learning_rate": 5e-05, "loss": 1.6371, "step": 688 }, { "epoch": 0.11025764122259561, "grad_norm": 0.2951795756816864, "learning_rate": 5e-05, "loss": 1.6485, "step": 689 }, { "epoch": 0.11041766682669227, "grad_norm": 0.3086166977882385, "learning_rate": 5e-05, "loss": 1.6089, "step": 690 }, { "epoch": 0.11057769243078892, "grad_norm": 0.3080406188964844, "learning_rate": 5e-05, "loss": 1.6254, "step": 691 }, { "epoch": 0.11073771803488558, "grad_norm": 0.28959015011787415, "learning_rate": 5e-05, "loss": 1.5745, "step": 692 }, { "epoch": 0.11089774363898224, "grad_norm": 0.2881214916706085, "learning_rate": 5e-05, "loss": 1.6313, "step": 693 }, { "epoch": 0.11105776924307889, "grad_norm": 0.30555641651153564, "learning_rate": 5e-05, "loss": 1.5766, "step": 694 }, { "epoch": 0.11121779484717555, "grad_norm": 0.2990405559539795, "learning_rate": 5e-05, "loss": 1.658, "step": 695 }, { "epoch": 0.1113778204512722, "grad_norm": 0.3000914454460144, "learning_rate": 5e-05, "loss": 1.6643, "step": 696 }, { "epoch": 0.11153784605536886, "grad_norm": 0.30618467926979065, "learning_rate": 5e-05, "loss": 1.5999, "step": 697 }, { "epoch": 0.11169787165946551, "grad_norm": 0.2855369448661804, "learning_rate": 5e-05, "loss": 1.5335, "step": 698 }, { "epoch": 0.11185789726356217, "grad_norm": 0.29679492115974426, "learning_rate": 5e-05, "loss": 1.6099, "step": 699 }, { "epoch": 0.11201792286765883, "grad_norm": 0.2893508970737457, "learning_rate": 5e-05, "loss": 1.5598, "step": 700 }, { "epoch": 0.11217794847175548, "grad_norm": 0.30533483624458313, "learning_rate": 5e-05, "loss": 1.6124, "step": 701 }, { "epoch": 0.11233797407585214, "grad_norm": 0.29540079832077026, "learning_rate": 5e-05, "loss": 1.6273, "step": 702 }, { "epoch": 0.11249799967994879, "grad_norm": 0.2969230115413666, "learning_rate": 5e-05, "loss": 1.5945, "step": 703 }, { "epoch": 0.11265802528404545, "grad_norm": 0.2913186848163605, "learning_rate": 5e-05, "loss": 1.557, "step": 704 }, { "epoch": 0.1128180508881421, "grad_norm": 0.2982619106769562, "learning_rate": 5e-05, "loss": 1.6871, "step": 705 }, { "epoch": 0.11297807649223876, "grad_norm": 0.2953011691570282, "learning_rate": 5e-05, "loss": 1.594, "step": 706 }, { "epoch": 0.11313810209633542, "grad_norm": 0.2996205985546112, "learning_rate": 5e-05, "loss": 1.5939, "step": 707 }, { "epoch": 0.11329812770043207, "grad_norm": 0.2777642607688904, "learning_rate": 5e-05, "loss": 1.5182, "step": 708 }, { "epoch": 0.11345815330452873, "grad_norm": 0.30393123626708984, "learning_rate": 5e-05, "loss": 1.646, "step": 709 }, { "epoch": 0.11361817890862538, "grad_norm": 0.2883550226688385, "learning_rate": 5e-05, "loss": 1.5396, "step": 710 }, { "epoch": 0.11377820451272204, "grad_norm": 0.2965048551559448, "learning_rate": 5e-05, "loss": 1.5745, "step": 711 }, { "epoch": 0.1139382301168187, "grad_norm": 0.2891002297401428, "learning_rate": 5e-05, "loss": 1.5619, "step": 712 }, { "epoch": 0.11409825572091535, "grad_norm": 0.311901718378067, "learning_rate": 5e-05, "loss": 1.6352, "step": 713 }, { "epoch": 0.114258281325012, "grad_norm": 0.29202064871788025, "learning_rate": 5e-05, "loss": 1.573, "step": 714 }, { "epoch": 0.11441830692910866, "grad_norm": 0.29322075843811035, "learning_rate": 5e-05, "loss": 1.578, "step": 715 }, { "epoch": 0.11457833253320532, "grad_norm": 0.28959155082702637, "learning_rate": 5e-05, "loss": 1.4847, "step": 716 }, { "epoch": 0.11473835813730197, "grad_norm": 0.30818361043930054, "learning_rate": 5e-05, "loss": 1.6286, "step": 717 }, { "epoch": 0.11489838374139863, "grad_norm": 0.3014758825302124, "learning_rate": 5e-05, "loss": 1.6226, "step": 718 }, { "epoch": 0.11505840934549529, "grad_norm": 0.29019802808761597, "learning_rate": 5e-05, "loss": 1.5833, "step": 719 }, { "epoch": 0.11521843494959194, "grad_norm": 0.2899012267589569, "learning_rate": 5e-05, "loss": 1.5743, "step": 720 }, { "epoch": 0.1153784605536886, "grad_norm": 0.32968515157699585, "learning_rate": 5e-05, "loss": 1.657, "step": 721 }, { "epoch": 0.11553848615778524, "grad_norm": 0.3081330358982086, "learning_rate": 5e-05, "loss": 1.6077, "step": 722 }, { "epoch": 0.1156985117618819, "grad_norm": 0.34278762340545654, "learning_rate": 5e-05, "loss": 1.6815, "step": 723 }, { "epoch": 0.11585853736597855, "grad_norm": 0.28579992055892944, "learning_rate": 5e-05, "loss": 1.4749, "step": 724 }, { "epoch": 0.1160185629700752, "grad_norm": 0.31420305371284485, "learning_rate": 5e-05, "loss": 1.562, "step": 725 }, { "epoch": 0.11617858857417186, "grad_norm": 0.3153049349784851, "learning_rate": 5e-05, "loss": 1.6324, "step": 726 }, { "epoch": 0.11633861417826852, "grad_norm": 0.3118608593940735, "learning_rate": 5e-05, "loss": 1.6083, "step": 727 }, { "epoch": 0.11649863978236517, "grad_norm": 0.3220824897289276, "learning_rate": 5e-05, "loss": 1.6254, "step": 728 }, { "epoch": 0.11665866538646183, "grad_norm": 0.29637205600738525, "learning_rate": 5e-05, "loss": 1.5431, "step": 729 }, { "epoch": 0.11681869099055849, "grad_norm": 0.31325390934944153, "learning_rate": 5e-05, "loss": 1.5648, "step": 730 }, { "epoch": 0.11697871659465514, "grad_norm": 0.3007293939590454, "learning_rate": 5e-05, "loss": 1.5831, "step": 731 }, { "epoch": 0.1171387421987518, "grad_norm": 0.28535354137420654, "learning_rate": 5e-05, "loss": 1.6358, "step": 732 }, { "epoch": 0.11729876780284845, "grad_norm": 0.30612680315971375, "learning_rate": 5e-05, "loss": 1.6259, "step": 733 }, { "epoch": 0.11745879340694511, "grad_norm": 0.30608704686164856, "learning_rate": 5e-05, "loss": 1.5876, "step": 734 }, { "epoch": 0.11761881901104176, "grad_norm": 0.29007431864738464, "learning_rate": 5e-05, "loss": 1.5588, "step": 735 }, { "epoch": 0.11777884461513842, "grad_norm": 0.2955438792705536, "learning_rate": 5e-05, "loss": 1.5451, "step": 736 }, { "epoch": 0.11793887021923508, "grad_norm": 0.2995418906211853, "learning_rate": 5e-05, "loss": 1.5565, "step": 737 }, { "epoch": 0.11809889582333173, "grad_norm": 0.2892318665981293, "learning_rate": 5e-05, "loss": 1.5975, "step": 738 }, { "epoch": 0.11825892142742839, "grad_norm": 0.3019961714744568, "learning_rate": 5e-05, "loss": 1.5644, "step": 739 }, { "epoch": 0.11841894703152504, "grad_norm": 0.30186668038368225, "learning_rate": 5e-05, "loss": 1.5752, "step": 740 }, { "epoch": 0.1185789726356217, "grad_norm": 0.30555883049964905, "learning_rate": 5e-05, "loss": 1.5644, "step": 741 }, { "epoch": 0.11873899823971835, "grad_norm": 0.3046540915966034, "learning_rate": 5e-05, "loss": 1.6228, "step": 742 }, { "epoch": 0.11889902384381501, "grad_norm": 0.2895658314228058, "learning_rate": 5e-05, "loss": 1.6006, "step": 743 }, { "epoch": 0.11905904944791167, "grad_norm": 0.2996397912502289, "learning_rate": 5e-05, "loss": 1.6272, "step": 744 }, { "epoch": 0.11921907505200832, "grad_norm": 0.30764731764793396, "learning_rate": 5e-05, "loss": 1.613, "step": 745 }, { "epoch": 0.11937910065610498, "grad_norm": 0.2882448136806488, "learning_rate": 5e-05, "loss": 1.6215, "step": 746 }, { "epoch": 0.11953912626020163, "grad_norm": 0.2971187233924866, "learning_rate": 5e-05, "loss": 1.6052, "step": 747 }, { "epoch": 0.11969915186429829, "grad_norm": 0.30234378576278687, "learning_rate": 5e-05, "loss": 1.566, "step": 748 }, { "epoch": 0.11985917746839495, "grad_norm": 0.31240421533584595, "learning_rate": 5e-05, "loss": 1.6536, "step": 749 }, { "epoch": 0.1200192030724916, "grad_norm": 0.29134464263916016, "learning_rate": 5e-05, "loss": 1.6299, "step": 750 }, { "epoch": 0.12017922867658826, "grad_norm": 0.29125604033470154, "learning_rate": 5e-05, "loss": 1.531, "step": 751 }, { "epoch": 0.12033925428068491, "grad_norm": 0.2951525151729584, "learning_rate": 5e-05, "loss": 1.5862, "step": 752 }, { "epoch": 0.12049927988478157, "grad_norm": 0.2983773648738861, "learning_rate": 5e-05, "loss": 1.6107, "step": 753 }, { "epoch": 0.12065930548887822, "grad_norm": 0.295382559299469, "learning_rate": 5e-05, "loss": 1.5948, "step": 754 }, { "epoch": 0.12081933109297488, "grad_norm": 0.29173845052719116, "learning_rate": 5e-05, "loss": 1.5802, "step": 755 }, { "epoch": 0.12097935669707154, "grad_norm": 0.29330989718437195, "learning_rate": 5e-05, "loss": 1.5503, "step": 756 }, { "epoch": 0.12113938230116819, "grad_norm": 0.2795848548412323, "learning_rate": 5e-05, "loss": 1.5502, "step": 757 }, { "epoch": 0.12129940790526485, "grad_norm": 0.29250627756118774, "learning_rate": 5e-05, "loss": 1.5236, "step": 758 }, { "epoch": 0.1214594335093615, "grad_norm": 0.2763270437717438, "learning_rate": 5e-05, "loss": 1.4623, "step": 759 }, { "epoch": 0.12161945911345816, "grad_norm": 0.28894487023353577, "learning_rate": 5e-05, "loss": 1.5113, "step": 760 }, { "epoch": 0.12177948471755481, "grad_norm": 0.30236637592315674, "learning_rate": 5e-05, "loss": 1.6169, "step": 761 }, { "epoch": 0.12193951032165147, "grad_norm": 0.2951010465621948, "learning_rate": 5e-05, "loss": 1.6232, "step": 762 }, { "epoch": 0.12209953592574813, "grad_norm": 0.29431137442588806, "learning_rate": 5e-05, "loss": 1.6353, "step": 763 }, { "epoch": 0.12225956152984478, "grad_norm": 0.2841886281967163, "learning_rate": 5e-05, "loss": 1.604, "step": 764 }, { "epoch": 0.12241958713394142, "grad_norm": 0.29891103506088257, "learning_rate": 5e-05, "loss": 1.6652, "step": 765 }, { "epoch": 0.12257961273803808, "grad_norm": 0.2879895269870758, "learning_rate": 5e-05, "loss": 1.5427, "step": 766 }, { "epoch": 0.12273963834213474, "grad_norm": 0.2981695234775543, "learning_rate": 5e-05, "loss": 1.6471, "step": 767 }, { "epoch": 0.12289966394623139, "grad_norm": 0.2880483865737915, "learning_rate": 5e-05, "loss": 1.5831, "step": 768 }, { "epoch": 0.12305968955032805, "grad_norm": 0.2980557978153229, "learning_rate": 5e-05, "loss": 1.6157, "step": 769 }, { "epoch": 0.1232197151544247, "grad_norm": 0.31789547204971313, "learning_rate": 5e-05, "loss": 1.6314, "step": 770 }, { "epoch": 0.12337974075852136, "grad_norm": 0.3098287880420685, "learning_rate": 5e-05, "loss": 1.5834, "step": 771 }, { "epoch": 0.12353976636261801, "grad_norm": 0.2869642674922943, "learning_rate": 5e-05, "loss": 1.5646, "step": 772 }, { "epoch": 0.12369979196671467, "grad_norm": 0.3044302463531494, "learning_rate": 5e-05, "loss": 1.6829, "step": 773 }, { "epoch": 0.12385981757081133, "grad_norm": 0.31203973293304443, "learning_rate": 5e-05, "loss": 1.5656, "step": 774 }, { "epoch": 0.12401984317490798, "grad_norm": 0.3066349923610687, "learning_rate": 5e-05, "loss": 1.6318, "step": 775 }, { "epoch": 0.12417986877900464, "grad_norm": 0.303682804107666, "learning_rate": 5e-05, "loss": 1.5934, "step": 776 }, { "epoch": 0.1243398943831013, "grad_norm": 0.3064599335193634, "learning_rate": 5e-05, "loss": 1.6179, "step": 777 }, { "epoch": 0.12449991998719795, "grad_norm": 0.30702218413352966, "learning_rate": 5e-05, "loss": 1.628, "step": 778 }, { "epoch": 0.1246599455912946, "grad_norm": 0.2937961220741272, "learning_rate": 5e-05, "loss": 1.5216, "step": 779 }, { "epoch": 0.12481997119539126, "grad_norm": 0.3397887647151947, "learning_rate": 5e-05, "loss": 1.6696, "step": 780 }, { "epoch": 0.12497999679948792, "grad_norm": 0.3139263689517975, "learning_rate": 5e-05, "loss": 1.6172, "step": 781 }, { "epoch": 0.12514002240358457, "grad_norm": 0.33356645703315735, "learning_rate": 5e-05, "loss": 1.6397, "step": 782 }, { "epoch": 0.12530004800768124, "grad_norm": 0.30480900406837463, "learning_rate": 5e-05, "loss": 1.5878, "step": 783 }, { "epoch": 0.12546007361177788, "grad_norm": 0.30840229988098145, "learning_rate": 5e-05, "loss": 1.5228, "step": 784 }, { "epoch": 0.12562009921587455, "grad_norm": 0.3013967275619507, "learning_rate": 5e-05, "loss": 1.554, "step": 785 }, { "epoch": 0.1257801248199712, "grad_norm": 0.30597206950187683, "learning_rate": 5e-05, "loss": 1.5742, "step": 786 }, { "epoch": 0.12594015042406784, "grad_norm": 0.29931405186653137, "learning_rate": 5e-05, "loss": 1.5259, "step": 787 }, { "epoch": 0.1261001760281645, "grad_norm": 0.3003743886947632, "learning_rate": 5e-05, "loss": 1.7056, "step": 788 }, { "epoch": 0.12626020163226115, "grad_norm": 0.3024829030036926, "learning_rate": 5e-05, "loss": 1.5957, "step": 789 }, { "epoch": 0.12642022723635782, "grad_norm": 0.3044320046901703, "learning_rate": 5e-05, "loss": 1.6054, "step": 790 }, { "epoch": 0.12658025284045446, "grad_norm": 0.2835143506526947, "learning_rate": 5e-05, "loss": 1.5506, "step": 791 }, { "epoch": 0.12674027844455113, "grad_norm": 0.3127005994319916, "learning_rate": 5e-05, "loss": 1.5595, "step": 792 }, { "epoch": 0.12690030404864777, "grad_norm": 0.2912502884864807, "learning_rate": 5e-05, "loss": 1.5606, "step": 793 }, { "epoch": 0.12706032965274444, "grad_norm": 0.3090251386165619, "learning_rate": 5e-05, "loss": 1.5704, "step": 794 }, { "epoch": 0.12722035525684108, "grad_norm": 0.3312755227088928, "learning_rate": 5e-05, "loss": 1.6564, "step": 795 }, { "epoch": 0.12738038086093775, "grad_norm": 0.29105129837989807, "learning_rate": 5e-05, "loss": 1.6027, "step": 796 }, { "epoch": 0.1275404064650344, "grad_norm": 0.3544025719165802, "learning_rate": 5e-05, "loss": 1.5912, "step": 797 }, { "epoch": 0.12770043206913106, "grad_norm": 0.2974134385585785, "learning_rate": 5e-05, "loss": 1.6124, "step": 798 }, { "epoch": 0.1278604576732277, "grad_norm": 0.30576518177986145, "learning_rate": 5e-05, "loss": 1.5667, "step": 799 }, { "epoch": 0.12802048327732438, "grad_norm": 0.29515817761421204, "learning_rate": 5e-05, "loss": 1.62, "step": 800 }, { "epoch": 0.12818050888142102, "grad_norm": 0.2952745258808136, "learning_rate": 5e-05, "loss": 1.5783, "step": 801 }, { "epoch": 0.1283405344855177, "grad_norm": 0.30283021926879883, "learning_rate": 5e-05, "loss": 1.651, "step": 802 }, { "epoch": 0.12850056008961433, "grad_norm": 0.30170300602912903, "learning_rate": 5e-05, "loss": 1.6551, "step": 803 }, { "epoch": 0.128660585693711, "grad_norm": 0.2953396141529083, "learning_rate": 5e-05, "loss": 1.5682, "step": 804 }, { "epoch": 0.12882061129780764, "grad_norm": 0.30071738362312317, "learning_rate": 5e-05, "loss": 1.6154, "step": 805 }, { "epoch": 0.1289806369019043, "grad_norm": 0.28424420952796936, "learning_rate": 5e-05, "loss": 1.5711, "step": 806 }, { "epoch": 0.12914066250600095, "grad_norm": 0.2909202575683594, "learning_rate": 5e-05, "loss": 1.5469, "step": 807 }, { "epoch": 0.12930068811009762, "grad_norm": 0.29297202825546265, "learning_rate": 5e-05, "loss": 1.5744, "step": 808 }, { "epoch": 0.12946071371419426, "grad_norm": 0.29022109508514404, "learning_rate": 5e-05, "loss": 1.514, "step": 809 }, { "epoch": 0.12962073931829093, "grad_norm": 0.2986098527908325, "learning_rate": 5e-05, "loss": 1.5718, "step": 810 }, { "epoch": 0.12978076492238758, "grad_norm": 0.29181432723999023, "learning_rate": 5e-05, "loss": 1.6566, "step": 811 }, { "epoch": 0.12994079052648425, "grad_norm": 0.29088470339775085, "learning_rate": 5e-05, "loss": 1.6421, "step": 812 }, { "epoch": 0.1301008161305809, "grad_norm": 0.2819228768348694, "learning_rate": 5e-05, "loss": 1.5143, "step": 813 }, { "epoch": 0.13026084173467756, "grad_norm": 0.2886964976787567, "learning_rate": 5e-05, "loss": 1.5508, "step": 814 }, { "epoch": 0.1304208673387742, "grad_norm": 0.3035587966442108, "learning_rate": 5e-05, "loss": 1.7411, "step": 815 }, { "epoch": 0.13058089294287087, "grad_norm": 0.29772162437438965, "learning_rate": 5e-05, "loss": 1.629, "step": 816 }, { "epoch": 0.1307409185469675, "grad_norm": 0.3003164529800415, "learning_rate": 5e-05, "loss": 1.6101, "step": 817 }, { "epoch": 0.13090094415106418, "grad_norm": 0.2879394590854645, "learning_rate": 5e-05, "loss": 1.5448, "step": 818 }, { "epoch": 0.13106096975516082, "grad_norm": 0.2995832860469818, "learning_rate": 5e-05, "loss": 1.6254, "step": 819 }, { "epoch": 0.1312209953592575, "grad_norm": 0.2897169291973114, "learning_rate": 5e-05, "loss": 1.6109, "step": 820 }, { "epoch": 0.13138102096335413, "grad_norm": 0.2930617928504944, "learning_rate": 5e-05, "loss": 1.6559, "step": 821 }, { "epoch": 0.1315410465674508, "grad_norm": 0.2968486547470093, "learning_rate": 5e-05, "loss": 1.5826, "step": 822 }, { "epoch": 0.13170107217154745, "grad_norm": 0.29531988501548767, "learning_rate": 5e-05, "loss": 1.6138, "step": 823 }, { "epoch": 0.13186109777564411, "grad_norm": 0.30193498730659485, "learning_rate": 5e-05, "loss": 1.5944, "step": 824 }, { "epoch": 0.13202112337974076, "grad_norm": 0.2871190309524536, "learning_rate": 5e-05, "loss": 1.5257, "step": 825 }, { "epoch": 0.13218114898383743, "grad_norm": 0.30401256680488586, "learning_rate": 5e-05, "loss": 1.5902, "step": 826 }, { "epoch": 0.13234117458793407, "grad_norm": 0.3081870675086975, "learning_rate": 5e-05, "loss": 1.6119, "step": 827 }, { "epoch": 0.13250120019203074, "grad_norm": 0.2903107702732086, "learning_rate": 5e-05, "loss": 1.5261, "step": 828 }, { "epoch": 0.13266122579612738, "grad_norm": 0.29801952838897705, "learning_rate": 5e-05, "loss": 1.6308, "step": 829 }, { "epoch": 0.13282125140022405, "grad_norm": 0.3231615722179413, "learning_rate": 5e-05, "loss": 1.6364, "step": 830 }, { "epoch": 0.1329812770043207, "grad_norm": 0.3014492988586426, "learning_rate": 5e-05, "loss": 1.5941, "step": 831 }, { "epoch": 0.13314130260841733, "grad_norm": 0.28752249479293823, "learning_rate": 5e-05, "loss": 1.5466, "step": 832 }, { "epoch": 0.133301328212514, "grad_norm": 0.3118731379508972, "learning_rate": 5e-05, "loss": 1.5889, "step": 833 }, { "epoch": 0.13346135381661065, "grad_norm": 0.28982195258140564, "learning_rate": 5e-05, "loss": 1.4927, "step": 834 }, { "epoch": 0.13362137942070731, "grad_norm": 0.2962459623813629, "learning_rate": 5e-05, "loss": 1.6138, "step": 835 }, { "epoch": 0.13378140502480396, "grad_norm": 0.2937276065349579, "learning_rate": 5e-05, "loss": 1.5745, "step": 836 }, { "epoch": 0.13394143062890063, "grad_norm": 0.2969258427619934, "learning_rate": 5e-05, "loss": 1.591, "step": 837 }, { "epoch": 0.13410145623299727, "grad_norm": 0.30178165435791016, "learning_rate": 5e-05, "loss": 1.5554, "step": 838 }, { "epoch": 0.13426148183709394, "grad_norm": 0.29791152477264404, "learning_rate": 5e-05, "loss": 1.5243, "step": 839 }, { "epoch": 0.13442150744119058, "grad_norm": 0.2972292900085449, "learning_rate": 5e-05, "loss": 1.6026, "step": 840 }, { "epoch": 0.13458153304528725, "grad_norm": 0.29828283190727234, "learning_rate": 5e-05, "loss": 1.5961, "step": 841 }, { "epoch": 0.1347415586493839, "grad_norm": 0.30477845668792725, "learning_rate": 5e-05, "loss": 1.6745, "step": 842 }, { "epoch": 0.13490158425348056, "grad_norm": 0.2896265387535095, "learning_rate": 5e-05, "loss": 1.5347, "step": 843 }, { "epoch": 0.1350616098575772, "grad_norm": 0.32642999291419983, "learning_rate": 5e-05, "loss": 1.6042, "step": 844 }, { "epoch": 0.13522163546167387, "grad_norm": 0.27565091848373413, "learning_rate": 5e-05, "loss": 1.5124, "step": 845 }, { "epoch": 0.13538166106577051, "grad_norm": 0.31965798139572144, "learning_rate": 5e-05, "loss": 1.5863, "step": 846 }, { "epoch": 0.13554168666986718, "grad_norm": 0.2980991303920746, "learning_rate": 5e-05, "loss": 1.5438, "step": 847 }, { "epoch": 0.13570171227396383, "grad_norm": 0.29740655422210693, "learning_rate": 5e-05, "loss": 1.5818, "step": 848 }, { "epoch": 0.1358617378780605, "grad_norm": 0.3485603630542755, "learning_rate": 5e-05, "loss": 1.6229, "step": 849 }, { "epoch": 0.13602176348215714, "grad_norm": 0.30064839124679565, "learning_rate": 5e-05, "loss": 1.5892, "step": 850 }, { "epoch": 0.1361817890862538, "grad_norm": 0.3122445046901703, "learning_rate": 5e-05, "loss": 1.6038, "step": 851 }, { "epoch": 0.13634181469035045, "grad_norm": 0.3221918046474457, "learning_rate": 5e-05, "loss": 1.5125, "step": 852 }, { "epoch": 0.13650184029444712, "grad_norm": 0.2970459759235382, "learning_rate": 5e-05, "loss": 1.5852, "step": 853 }, { "epoch": 0.13666186589854376, "grad_norm": 0.3400065004825592, "learning_rate": 5e-05, "loss": 1.5491, "step": 854 }, { "epoch": 0.13682189150264043, "grad_norm": 0.30669090151786804, "learning_rate": 5e-05, "loss": 1.6558, "step": 855 }, { "epoch": 0.13698191710673707, "grad_norm": 0.32468393445014954, "learning_rate": 5e-05, "loss": 1.6962, "step": 856 }, { "epoch": 0.13714194271083374, "grad_norm": 0.2993735373020172, "learning_rate": 5e-05, "loss": 1.554, "step": 857 }, { "epoch": 0.13730196831493038, "grad_norm": 0.29629039764404297, "learning_rate": 5e-05, "loss": 1.6031, "step": 858 }, { "epoch": 0.13746199391902705, "grad_norm": 0.3271230459213257, "learning_rate": 5e-05, "loss": 1.6797, "step": 859 }, { "epoch": 0.1376220195231237, "grad_norm": 0.304486483335495, "learning_rate": 5e-05, "loss": 1.6302, "step": 860 }, { "epoch": 0.13778204512722037, "grad_norm": 0.3000296950340271, "learning_rate": 5e-05, "loss": 1.598, "step": 861 }, { "epoch": 0.137942070731317, "grad_norm": 0.30352818965911865, "learning_rate": 5e-05, "loss": 1.6153, "step": 862 }, { "epoch": 0.13810209633541368, "grad_norm": 0.3007512390613556, "learning_rate": 5e-05, "loss": 1.6453, "step": 863 }, { "epoch": 0.13826212193951032, "grad_norm": 0.3052937984466553, "learning_rate": 5e-05, "loss": 1.6407, "step": 864 }, { "epoch": 0.138422147543607, "grad_norm": 0.29754966497421265, "learning_rate": 5e-05, "loss": 1.6198, "step": 865 }, { "epoch": 0.13858217314770363, "grad_norm": 0.29007822275161743, "learning_rate": 5e-05, "loss": 1.5957, "step": 866 }, { "epoch": 0.1387421987518003, "grad_norm": 0.3013879954814911, "learning_rate": 5e-05, "loss": 1.5982, "step": 867 }, { "epoch": 0.13890222435589694, "grad_norm": 0.29954999685287476, "learning_rate": 5e-05, "loss": 1.6088, "step": 868 }, { "epoch": 0.1390622499599936, "grad_norm": 0.2969076335430145, "learning_rate": 5e-05, "loss": 1.6152, "step": 869 }, { "epoch": 0.13922227556409025, "grad_norm": 0.2877527177333832, "learning_rate": 5e-05, "loss": 1.5851, "step": 870 }, { "epoch": 0.13938230116818692, "grad_norm": 0.2938113510608673, "learning_rate": 5e-05, "loss": 1.6389, "step": 871 }, { "epoch": 0.13954232677228356, "grad_norm": 0.30200934410095215, "learning_rate": 5e-05, "loss": 1.6202, "step": 872 }, { "epoch": 0.13970235237638023, "grad_norm": 0.2979181408882141, "learning_rate": 5e-05, "loss": 1.6135, "step": 873 }, { "epoch": 0.13986237798047688, "grad_norm": 0.29870763421058655, "learning_rate": 5e-05, "loss": 1.6318, "step": 874 }, { "epoch": 0.14002240358457352, "grad_norm": 0.2865467965602875, "learning_rate": 5e-05, "loss": 1.4631, "step": 875 }, { "epoch": 0.1401824291886702, "grad_norm": 0.2885511815547943, "learning_rate": 5e-05, "loss": 1.5004, "step": 876 }, { "epoch": 0.14034245479276683, "grad_norm": 0.3141051232814789, "learning_rate": 5e-05, "loss": 1.5293, "step": 877 }, { "epoch": 0.1405024803968635, "grad_norm": 0.30619579553604126, "learning_rate": 5e-05, "loss": 1.6674, "step": 878 }, { "epoch": 0.14066250600096014, "grad_norm": 0.3047814965248108, "learning_rate": 5e-05, "loss": 1.6311, "step": 879 }, { "epoch": 0.1408225316050568, "grad_norm": 0.29048269987106323, "learning_rate": 5e-05, "loss": 1.5966, "step": 880 }, { "epoch": 0.14098255720915345, "grad_norm": 0.2772173583507538, "learning_rate": 5e-05, "loss": 1.4642, "step": 881 }, { "epoch": 0.14114258281325012, "grad_norm": 0.29815253615379333, "learning_rate": 5e-05, "loss": 1.6504, "step": 882 }, { "epoch": 0.14130260841734676, "grad_norm": 0.2992915213108063, "learning_rate": 5e-05, "loss": 1.6715, "step": 883 }, { "epoch": 0.14146263402144343, "grad_norm": 0.2984220087528229, "learning_rate": 5e-05, "loss": 1.6368, "step": 884 }, { "epoch": 0.14162265962554008, "grad_norm": 0.30514660477638245, "learning_rate": 5e-05, "loss": 1.583, "step": 885 }, { "epoch": 0.14178268522963675, "grad_norm": 0.3005625307559967, "learning_rate": 5e-05, "loss": 1.6196, "step": 886 }, { "epoch": 0.1419427108337334, "grad_norm": 0.302401065826416, "learning_rate": 5e-05, "loss": 1.6236, "step": 887 }, { "epoch": 0.14210273643783006, "grad_norm": 0.29878750443458557, "learning_rate": 5e-05, "loss": 1.5393, "step": 888 }, { "epoch": 0.1422627620419267, "grad_norm": 0.29336288571357727, "learning_rate": 5e-05, "loss": 1.5683, "step": 889 }, { "epoch": 0.14242278764602337, "grad_norm": 0.33210518956184387, "learning_rate": 5e-05, "loss": 1.6144, "step": 890 }, { "epoch": 0.14258281325012, "grad_norm": 0.311034232378006, "learning_rate": 5e-05, "loss": 1.6227, "step": 891 }, { "epoch": 0.14274283885421668, "grad_norm": 0.31558841466903687, "learning_rate": 5e-05, "loss": 1.6307, "step": 892 }, { "epoch": 0.14290286445831332, "grad_norm": 0.3196813762187958, "learning_rate": 5e-05, "loss": 1.5915, "step": 893 }, { "epoch": 0.14306289006241, "grad_norm": 0.2888175845146179, "learning_rate": 5e-05, "loss": 1.5719, "step": 894 }, { "epoch": 0.14322291566650663, "grad_norm": 0.331696093082428, "learning_rate": 5e-05, "loss": 1.5677, "step": 895 }, { "epoch": 0.1433829412706033, "grad_norm": 0.3146438002586365, "learning_rate": 5e-05, "loss": 1.6412, "step": 896 }, { "epoch": 0.14354296687469995, "grad_norm": 0.3150005638599396, "learning_rate": 5e-05, "loss": 1.6497, "step": 897 }, { "epoch": 0.14370299247879662, "grad_norm": 0.3296453654766083, "learning_rate": 5e-05, "loss": 1.5527, "step": 898 }, { "epoch": 0.14386301808289326, "grad_norm": 0.30586546659469604, "learning_rate": 5e-05, "loss": 1.6051, "step": 899 }, { "epoch": 0.14402304368698993, "grad_norm": 0.29913848638534546, "learning_rate": 5e-05, "loss": 1.6035, "step": 900 }, { "epoch": 0.14418306929108657, "grad_norm": 0.30532097816467285, "learning_rate": 5e-05, "loss": 1.6081, "step": 901 }, { "epoch": 0.14434309489518324, "grad_norm": 0.29329797625541687, "learning_rate": 5e-05, "loss": 1.5894, "step": 902 }, { "epoch": 0.14450312049927988, "grad_norm": 0.2930368185043335, "learning_rate": 5e-05, "loss": 1.5103, "step": 903 }, { "epoch": 0.14466314610337655, "grad_norm": 0.30382710695266724, "learning_rate": 5e-05, "loss": 1.5839, "step": 904 }, { "epoch": 0.1448231717074732, "grad_norm": 0.2909093499183655, "learning_rate": 5e-05, "loss": 1.5627, "step": 905 }, { "epoch": 0.14498319731156986, "grad_norm": 0.29637444019317627, "learning_rate": 5e-05, "loss": 1.5339, "step": 906 }, { "epoch": 0.1451432229156665, "grad_norm": 0.3020568788051605, "learning_rate": 5e-05, "loss": 1.5835, "step": 907 }, { "epoch": 0.14530324851976317, "grad_norm": 0.2940908372402191, "learning_rate": 5e-05, "loss": 1.6191, "step": 908 }, { "epoch": 0.14546327412385981, "grad_norm": 0.3071933090686798, "learning_rate": 5e-05, "loss": 1.6458, "step": 909 }, { "epoch": 0.14562329972795648, "grad_norm": 0.3176635801792145, "learning_rate": 5e-05, "loss": 1.6202, "step": 910 }, { "epoch": 0.14578332533205313, "grad_norm": 0.29852572083473206, "learning_rate": 5e-05, "loss": 1.5665, "step": 911 }, { "epoch": 0.1459433509361498, "grad_norm": 0.3266286551952362, "learning_rate": 5e-05, "loss": 1.5666, "step": 912 }, { "epoch": 0.14610337654024644, "grad_norm": 0.2765955924987793, "learning_rate": 5e-05, "loss": 1.4805, "step": 913 }, { "epoch": 0.1462634021443431, "grad_norm": 0.3100742697715759, "learning_rate": 5e-05, "loss": 1.5707, "step": 914 }, { "epoch": 0.14642342774843975, "grad_norm": 0.30468034744262695, "learning_rate": 5e-05, "loss": 1.6005, "step": 915 }, { "epoch": 0.14658345335253642, "grad_norm": 0.30679404735565186, "learning_rate": 5e-05, "loss": 1.6439, "step": 916 }, { "epoch": 0.14674347895663306, "grad_norm": 0.27446678280830383, "learning_rate": 5e-05, "loss": 1.4754, "step": 917 }, { "epoch": 0.1469035045607297, "grad_norm": 0.2999933063983917, "learning_rate": 5e-05, "loss": 1.6413, "step": 918 }, { "epoch": 0.14706353016482637, "grad_norm": 0.29735299944877625, "learning_rate": 5e-05, "loss": 1.645, "step": 919 }, { "epoch": 0.14722355576892301, "grad_norm": 0.29471054673194885, "learning_rate": 5e-05, "loss": 1.4986, "step": 920 }, { "epoch": 0.14738358137301968, "grad_norm": 0.29777848720550537, "learning_rate": 5e-05, "loss": 1.5595, "step": 921 }, { "epoch": 0.14754360697711633, "grad_norm": 0.2944524884223938, "learning_rate": 5e-05, "loss": 1.5582, "step": 922 }, { "epoch": 0.147703632581213, "grad_norm": 0.2921058237552643, "learning_rate": 5e-05, "loss": 1.5549, "step": 923 }, { "epoch": 0.14786365818530964, "grad_norm": 0.29313722252845764, "learning_rate": 5e-05, "loss": 1.6473, "step": 924 }, { "epoch": 0.1480236837894063, "grad_norm": 0.30857476592063904, "learning_rate": 5e-05, "loss": 1.5956, "step": 925 }, { "epoch": 0.14818370939350295, "grad_norm": 0.2884410321712494, "learning_rate": 5e-05, "loss": 1.5622, "step": 926 }, { "epoch": 0.14834373499759962, "grad_norm": 0.3324461579322815, "learning_rate": 5e-05, "loss": 1.5783, "step": 927 }, { "epoch": 0.14850376060169626, "grad_norm": 0.2976686656475067, "learning_rate": 5e-05, "loss": 1.558, "step": 928 }, { "epoch": 0.14866378620579293, "grad_norm": 0.2867680490016937, "learning_rate": 5e-05, "loss": 1.5941, "step": 929 }, { "epoch": 0.14882381180988957, "grad_norm": 0.2962500751018524, "learning_rate": 5e-05, "loss": 1.5677, "step": 930 }, { "epoch": 0.14898383741398624, "grad_norm": 0.2945684492588043, "learning_rate": 5e-05, "loss": 1.5231, "step": 931 }, { "epoch": 0.14914386301808288, "grad_norm": 0.29288578033447266, "learning_rate": 5e-05, "loss": 1.5758, "step": 932 }, { "epoch": 0.14930388862217955, "grad_norm": 0.31851452589035034, "learning_rate": 5e-05, "loss": 1.7168, "step": 933 }, { "epoch": 0.1494639142262762, "grad_norm": 0.2827620208263397, "learning_rate": 5e-05, "loss": 1.5343, "step": 934 }, { "epoch": 0.14962393983037287, "grad_norm": 0.29435211420059204, "learning_rate": 5e-05, "loss": 1.5541, "step": 935 }, { "epoch": 0.1497839654344695, "grad_norm": 0.29314184188842773, "learning_rate": 5e-05, "loss": 1.5364, "step": 936 }, { "epoch": 0.14994399103856618, "grad_norm": 0.30073368549346924, "learning_rate": 5e-05, "loss": 1.5935, "step": 937 }, { "epoch": 0.15010401664266282, "grad_norm": 0.28710225224494934, "learning_rate": 5e-05, "loss": 1.5599, "step": 938 }, { "epoch": 0.1502640422467595, "grad_norm": 0.30174529552459717, "learning_rate": 5e-05, "loss": 1.5688, "step": 939 }, { "epoch": 0.15042406785085613, "grad_norm": 0.29771339893341064, "learning_rate": 5e-05, "loss": 1.5468, "step": 940 }, { "epoch": 0.1505840934549528, "grad_norm": 0.3065602481365204, "learning_rate": 5e-05, "loss": 1.5876, "step": 941 }, { "epoch": 0.15074411905904944, "grad_norm": 0.3068706691265106, "learning_rate": 5e-05, "loss": 1.5831, "step": 942 }, { "epoch": 0.1509041446631461, "grad_norm": 0.31022870540618896, "learning_rate": 5e-05, "loss": 1.617, "step": 943 }, { "epoch": 0.15106417026724275, "grad_norm": 0.2933255732059479, "learning_rate": 5e-05, "loss": 1.5208, "step": 944 }, { "epoch": 0.15122419587133942, "grad_norm": 0.2900463938713074, "learning_rate": 5e-05, "loss": 1.5081, "step": 945 }, { "epoch": 0.15138422147543606, "grad_norm": 0.3127414882183075, "learning_rate": 5e-05, "loss": 1.6059, "step": 946 }, { "epoch": 0.15154424707953273, "grad_norm": 0.2887090742588043, "learning_rate": 5e-05, "loss": 1.558, "step": 947 }, { "epoch": 0.15170427268362938, "grad_norm": 0.2976473867893219, "learning_rate": 5e-05, "loss": 1.6086, "step": 948 }, { "epoch": 0.15186429828772605, "grad_norm": 0.29422783851623535, "learning_rate": 5e-05, "loss": 1.5477, "step": 949 }, { "epoch": 0.1520243238918227, "grad_norm": 0.29546162486076355, "learning_rate": 5e-05, "loss": 1.5305, "step": 950 }, { "epoch": 0.15218434949591936, "grad_norm": 0.29477888345718384, "learning_rate": 5e-05, "loss": 1.5799, "step": 951 }, { "epoch": 0.152344375100016, "grad_norm": 0.312374472618103, "learning_rate": 5e-05, "loss": 1.6442, "step": 952 }, { "epoch": 0.15250440070411267, "grad_norm": 0.2929285764694214, "learning_rate": 5e-05, "loss": 1.6077, "step": 953 }, { "epoch": 0.1526644263082093, "grad_norm": 0.2860204875469208, "learning_rate": 5e-05, "loss": 1.56, "step": 954 }, { "epoch": 0.15282445191230598, "grad_norm": 0.2989216446876526, "learning_rate": 5e-05, "loss": 1.5497, "step": 955 }, { "epoch": 0.15298447751640262, "grad_norm": 0.29368624091148376, "learning_rate": 5e-05, "loss": 1.575, "step": 956 }, { "epoch": 0.1531445031204993, "grad_norm": 0.31186172366142273, "learning_rate": 5e-05, "loss": 1.5637, "step": 957 }, { "epoch": 0.15330452872459593, "grad_norm": 0.29648932814598083, "learning_rate": 5e-05, "loss": 1.5488, "step": 958 }, { "epoch": 0.1534645543286926, "grad_norm": 0.2895539700984955, "learning_rate": 5e-05, "loss": 1.5249, "step": 959 }, { "epoch": 0.15362457993278925, "grad_norm": 0.30408424139022827, "learning_rate": 5e-05, "loss": 1.6046, "step": 960 }, { "epoch": 0.15378460553688592, "grad_norm": 0.30127936601638794, "learning_rate": 5e-05, "loss": 1.6543, "step": 961 }, { "epoch": 0.15394463114098256, "grad_norm": 0.30784302949905396, "learning_rate": 5e-05, "loss": 1.6335, "step": 962 }, { "epoch": 0.1541046567450792, "grad_norm": 0.3184601962566376, "learning_rate": 5e-05, "loss": 1.7322, "step": 963 }, { "epoch": 0.15426468234917587, "grad_norm": 0.3028116822242737, "learning_rate": 5e-05, "loss": 1.5512, "step": 964 }, { "epoch": 0.1544247079532725, "grad_norm": 0.2948959171772003, "learning_rate": 5e-05, "loss": 1.5622, "step": 965 }, { "epoch": 0.15458473355736918, "grad_norm": 0.30607032775878906, "learning_rate": 5e-05, "loss": 1.5493, "step": 966 }, { "epoch": 0.15474475916146582, "grad_norm": 0.2958053946495056, "learning_rate": 5e-05, "loss": 1.5717, "step": 967 }, { "epoch": 0.1549047847655625, "grad_norm": 0.28853267431259155, "learning_rate": 5e-05, "loss": 1.5386, "step": 968 }, { "epoch": 0.15506481036965913, "grad_norm": 0.2870332896709442, "learning_rate": 5e-05, "loss": 1.6132, "step": 969 }, { "epoch": 0.1552248359737558, "grad_norm": 0.3075844645500183, "learning_rate": 5e-05, "loss": 1.5118, "step": 970 }, { "epoch": 0.15538486157785245, "grad_norm": 0.29530927538871765, "learning_rate": 5e-05, "loss": 1.5529, "step": 971 }, { "epoch": 0.15554488718194912, "grad_norm": 0.3139267563819885, "learning_rate": 5e-05, "loss": 1.6548, "step": 972 }, { "epoch": 0.15570491278604576, "grad_norm": 0.2976396381855011, "learning_rate": 5e-05, "loss": 1.6189, "step": 973 }, { "epoch": 0.15586493839014243, "grad_norm": 0.3263811469078064, "learning_rate": 5e-05, "loss": 1.6047, "step": 974 }, { "epoch": 0.15602496399423907, "grad_norm": 0.30079320073127747, "learning_rate": 5e-05, "loss": 1.5617, "step": 975 }, { "epoch": 0.15618498959833574, "grad_norm": 0.2936108112335205, "learning_rate": 5e-05, "loss": 1.4817, "step": 976 }, { "epoch": 0.15634501520243238, "grad_norm": 0.3151743412017822, "learning_rate": 5e-05, "loss": 1.564, "step": 977 }, { "epoch": 0.15650504080652905, "grad_norm": 0.3024202287197113, "learning_rate": 5e-05, "loss": 1.5681, "step": 978 }, { "epoch": 0.1566650664106257, "grad_norm": 0.296190470457077, "learning_rate": 5e-05, "loss": 1.5482, "step": 979 }, { "epoch": 0.15682509201472236, "grad_norm": 0.30220407247543335, "learning_rate": 5e-05, "loss": 1.5871, "step": 980 }, { "epoch": 0.156985117618819, "grad_norm": 0.31003767251968384, "learning_rate": 5e-05, "loss": 1.5818, "step": 981 }, { "epoch": 0.15714514322291567, "grad_norm": 0.3098604679107666, "learning_rate": 5e-05, "loss": 1.5964, "step": 982 }, { "epoch": 0.15730516882701231, "grad_norm": 0.3050302565097809, "learning_rate": 5e-05, "loss": 1.5803, "step": 983 }, { "epoch": 0.15746519443110898, "grad_norm": 0.2958250343799591, "learning_rate": 5e-05, "loss": 1.5765, "step": 984 }, { "epoch": 0.15762522003520563, "grad_norm": 0.2967265546321869, "learning_rate": 5e-05, "loss": 1.5148, "step": 985 }, { "epoch": 0.1577852456393023, "grad_norm": 0.3025534451007843, "learning_rate": 5e-05, "loss": 1.5837, "step": 986 }, { "epoch": 0.15794527124339894, "grad_norm": 0.2992275655269623, "learning_rate": 5e-05, "loss": 1.5306, "step": 987 }, { "epoch": 0.1581052968474956, "grad_norm": 0.3188266456127167, "learning_rate": 5e-05, "loss": 1.5581, "step": 988 }, { "epoch": 0.15826532245159225, "grad_norm": 0.2885044515132904, "learning_rate": 5e-05, "loss": 1.4997, "step": 989 }, { "epoch": 0.15842534805568892, "grad_norm": 0.3375054895877838, "learning_rate": 5e-05, "loss": 1.5854, "step": 990 }, { "epoch": 0.15858537365978556, "grad_norm": 0.29627370834350586, "learning_rate": 5e-05, "loss": 1.5813, "step": 991 }, { "epoch": 0.15874539926388223, "grad_norm": 0.31443554162979126, "learning_rate": 5e-05, "loss": 1.5612, "step": 992 }, { "epoch": 0.15890542486797887, "grad_norm": 0.2929462492465973, "learning_rate": 5e-05, "loss": 1.6, "step": 993 }, { "epoch": 0.15906545047207554, "grad_norm": 0.293769896030426, "learning_rate": 5e-05, "loss": 1.585, "step": 994 }, { "epoch": 0.15922547607617218, "grad_norm": 0.3095178008079529, "learning_rate": 5e-05, "loss": 1.5889, "step": 995 }, { "epoch": 0.15938550168026885, "grad_norm": 0.2965770661830902, "learning_rate": 5e-05, "loss": 1.5698, "step": 996 }, { "epoch": 0.1595455272843655, "grad_norm": 0.2989773750305176, "learning_rate": 5e-05, "loss": 1.6073, "step": 997 }, { "epoch": 0.15970555288846217, "grad_norm": 0.28702905774116516, "learning_rate": 5e-05, "loss": 1.5027, "step": 998 }, { "epoch": 0.1598655784925588, "grad_norm": 0.31437382102012634, "learning_rate": 5e-05, "loss": 1.5696, "step": 999 }, { "epoch": 0.16002560409665548, "grad_norm": 0.3292289674282074, "learning_rate": 5e-05, "loss": 1.6253, "step": 1000 }, { "epoch": 0.16018562970075212, "grad_norm": 0.30471065640449524, "learning_rate": 5e-05, "loss": 1.5357, "step": 1001 }, { "epoch": 0.1603456553048488, "grad_norm": 0.33436208963394165, "learning_rate": 5e-05, "loss": 1.5446, "step": 1002 }, { "epoch": 0.16050568090894543, "grad_norm": 0.30622902512550354, "learning_rate": 5e-05, "loss": 1.5698, "step": 1003 }, { "epoch": 0.1606657065130421, "grad_norm": 0.3146175742149353, "learning_rate": 5e-05, "loss": 1.5992, "step": 1004 }, { "epoch": 0.16082573211713874, "grad_norm": 0.32538101077079773, "learning_rate": 5e-05, "loss": 1.5639, "step": 1005 }, { "epoch": 0.16098575772123538, "grad_norm": 0.30118420720100403, "learning_rate": 5e-05, "loss": 1.5631, "step": 1006 }, { "epoch": 0.16114578332533205, "grad_norm": 0.31093305349349976, "learning_rate": 5e-05, "loss": 1.6619, "step": 1007 }, { "epoch": 0.1613058089294287, "grad_norm": 0.29856178164482117, "learning_rate": 5e-05, "loss": 1.5925, "step": 1008 }, { "epoch": 0.16146583453352537, "grad_norm": 0.297318696975708, "learning_rate": 5e-05, "loss": 1.5569, "step": 1009 }, { "epoch": 0.161625860137622, "grad_norm": 0.30855631828308105, "learning_rate": 5e-05, "loss": 1.5844, "step": 1010 }, { "epoch": 0.16178588574171868, "grad_norm": 0.2999917268753052, "learning_rate": 5e-05, "loss": 1.5798, "step": 1011 }, { "epoch": 0.16194591134581532, "grad_norm": 0.3039539158344269, "learning_rate": 5e-05, "loss": 1.6355, "step": 1012 }, { "epoch": 0.162105936949912, "grad_norm": 0.29482075572013855, "learning_rate": 5e-05, "loss": 1.5884, "step": 1013 }, { "epoch": 0.16226596255400863, "grad_norm": 0.2951284646987915, "learning_rate": 5e-05, "loss": 1.6014, "step": 1014 }, { "epoch": 0.1624259881581053, "grad_norm": 0.3414249122142792, "learning_rate": 5e-05, "loss": 1.6569, "step": 1015 }, { "epoch": 0.16258601376220194, "grad_norm": 0.2938511371612549, "learning_rate": 5e-05, "loss": 1.6041, "step": 1016 }, { "epoch": 0.1627460393662986, "grad_norm": 0.28433966636657715, "learning_rate": 5e-05, "loss": 1.5402, "step": 1017 }, { "epoch": 0.16290606497039525, "grad_norm": 0.2893889844417572, "learning_rate": 5e-05, "loss": 1.573, "step": 1018 }, { "epoch": 0.16306609057449192, "grad_norm": 0.2977442145347595, "learning_rate": 5e-05, "loss": 1.6659, "step": 1019 }, { "epoch": 0.16322611617858857, "grad_norm": 0.29594242572784424, "learning_rate": 5e-05, "loss": 1.6083, "step": 1020 }, { "epoch": 0.16338614178268523, "grad_norm": 0.3069380223751068, "learning_rate": 5e-05, "loss": 1.6372, "step": 1021 }, { "epoch": 0.16354616738678188, "grad_norm": 0.283553808927536, "learning_rate": 5e-05, "loss": 1.5232, "step": 1022 }, { "epoch": 0.16370619299087855, "grad_norm": 0.304360032081604, "learning_rate": 5e-05, "loss": 1.5656, "step": 1023 }, { "epoch": 0.1638662185949752, "grad_norm": 0.2971641421318054, "learning_rate": 5e-05, "loss": 1.5558, "step": 1024 }, { "epoch": 0.16402624419907186, "grad_norm": 0.30860450863838196, "learning_rate": 5e-05, "loss": 1.61, "step": 1025 }, { "epoch": 0.1641862698031685, "grad_norm": 0.2964167296886444, "learning_rate": 5e-05, "loss": 1.5829, "step": 1026 }, { "epoch": 0.16434629540726517, "grad_norm": 0.29301807284355164, "learning_rate": 5e-05, "loss": 1.5697, "step": 1027 }, { "epoch": 0.1645063210113618, "grad_norm": 0.2849937081336975, "learning_rate": 5e-05, "loss": 1.5078, "step": 1028 }, { "epoch": 0.16466634661545848, "grad_norm": 0.29735931754112244, "learning_rate": 5e-05, "loss": 1.5914, "step": 1029 }, { "epoch": 0.16482637221955512, "grad_norm": 0.3012527823448181, "learning_rate": 5e-05, "loss": 1.6154, "step": 1030 }, { "epoch": 0.1649863978236518, "grad_norm": 0.2989341914653778, "learning_rate": 5e-05, "loss": 1.5597, "step": 1031 }, { "epoch": 0.16514642342774843, "grad_norm": 0.298776239156723, "learning_rate": 5e-05, "loss": 1.5318, "step": 1032 }, { "epoch": 0.1653064490318451, "grad_norm": 0.29288366436958313, "learning_rate": 5e-05, "loss": 1.5667, "step": 1033 }, { "epoch": 0.16546647463594175, "grad_norm": 0.28644201159477234, "learning_rate": 5e-05, "loss": 1.5656, "step": 1034 }, { "epoch": 0.16562650024003842, "grad_norm": 0.2953053414821625, "learning_rate": 5e-05, "loss": 1.5684, "step": 1035 }, { "epoch": 0.16578652584413506, "grad_norm": 0.30549904704093933, "learning_rate": 5e-05, "loss": 1.5665, "step": 1036 }, { "epoch": 0.16594655144823173, "grad_norm": 0.3063546121120453, "learning_rate": 5e-05, "loss": 1.6347, "step": 1037 }, { "epoch": 0.16610657705232837, "grad_norm": 0.29316794872283936, "learning_rate": 5e-05, "loss": 1.6095, "step": 1038 }, { "epoch": 0.16626660265642504, "grad_norm": 0.3040464520454407, "learning_rate": 5e-05, "loss": 1.6067, "step": 1039 }, { "epoch": 0.16642662826052168, "grad_norm": 0.3050750494003296, "learning_rate": 5e-05, "loss": 1.5765, "step": 1040 }, { "epoch": 0.16658665386461835, "grad_norm": 0.29614749550819397, "learning_rate": 5e-05, "loss": 1.5291, "step": 1041 }, { "epoch": 0.166746679468715, "grad_norm": 0.3062516450881958, "learning_rate": 5e-05, "loss": 1.5689, "step": 1042 }, { "epoch": 0.16690670507281166, "grad_norm": 0.2911951541900635, "learning_rate": 5e-05, "loss": 1.5246, "step": 1043 }, { "epoch": 0.1670667306769083, "grad_norm": 0.2950044274330139, "learning_rate": 5e-05, "loss": 1.5488, "step": 1044 }, { "epoch": 0.16722675628100497, "grad_norm": 0.2955586612224579, "learning_rate": 5e-05, "loss": 1.5984, "step": 1045 }, { "epoch": 0.16738678188510162, "grad_norm": 0.28795915842056274, "learning_rate": 5e-05, "loss": 1.5547, "step": 1046 }, { "epoch": 0.16754680748919829, "grad_norm": 0.29268673062324524, "learning_rate": 5e-05, "loss": 1.557, "step": 1047 }, { "epoch": 0.16770683309329493, "grad_norm": 0.3074418008327484, "learning_rate": 5e-05, "loss": 1.6324, "step": 1048 }, { "epoch": 0.16786685869739157, "grad_norm": 0.2933124303817749, "learning_rate": 5e-05, "loss": 1.5413, "step": 1049 }, { "epoch": 0.16802688430148824, "grad_norm": 0.2878250479698181, "learning_rate": 5e-05, "loss": 1.5802, "step": 1050 }, { "epoch": 0.16818690990558488, "grad_norm": 0.2962215840816498, "learning_rate": 5e-05, "loss": 1.5634, "step": 1051 }, { "epoch": 0.16834693550968155, "grad_norm": 0.2919735610485077, "learning_rate": 5e-05, "loss": 1.5325, "step": 1052 }, { "epoch": 0.1685069611137782, "grad_norm": 0.2901112139225006, "learning_rate": 5e-05, "loss": 1.5802, "step": 1053 }, { "epoch": 0.16866698671787486, "grad_norm": 0.28626832365989685, "learning_rate": 5e-05, "loss": 1.5496, "step": 1054 }, { "epoch": 0.1688270123219715, "grad_norm": 0.2976236045360565, "learning_rate": 5e-05, "loss": 1.5465, "step": 1055 }, { "epoch": 0.16898703792606817, "grad_norm": 0.30493640899658203, "learning_rate": 5e-05, "loss": 1.6414, "step": 1056 }, { "epoch": 0.16914706353016482, "grad_norm": 0.301885724067688, "learning_rate": 5e-05, "loss": 1.6452, "step": 1057 }, { "epoch": 0.16930708913426148, "grad_norm": 0.3276175260543823, "learning_rate": 5e-05, "loss": 1.5379, "step": 1058 }, { "epoch": 0.16946711473835813, "grad_norm": 0.29485493898391724, "learning_rate": 5e-05, "loss": 1.5732, "step": 1059 }, { "epoch": 0.1696271403424548, "grad_norm": 0.29838475584983826, "learning_rate": 5e-05, "loss": 1.5963, "step": 1060 }, { "epoch": 0.16978716594655144, "grad_norm": 0.29808688163757324, "learning_rate": 5e-05, "loss": 1.5657, "step": 1061 }, { "epoch": 0.1699471915506481, "grad_norm": 0.298367977142334, "learning_rate": 5e-05, "loss": 1.5694, "step": 1062 }, { "epoch": 0.17010721715474475, "grad_norm": 0.30127599835395813, "learning_rate": 5e-05, "loss": 1.5951, "step": 1063 }, { "epoch": 0.17026724275884142, "grad_norm": 0.308538019657135, "learning_rate": 5e-05, "loss": 1.6015, "step": 1064 }, { "epoch": 0.17042726836293806, "grad_norm": 0.28714418411254883, "learning_rate": 5e-05, "loss": 1.5362, "step": 1065 }, { "epoch": 0.17058729396703473, "grad_norm": 0.2957923114299774, "learning_rate": 5e-05, "loss": 1.53, "step": 1066 }, { "epoch": 0.17074731957113137, "grad_norm": 0.29974180459976196, "learning_rate": 5e-05, "loss": 1.5881, "step": 1067 }, { "epoch": 0.17090734517522804, "grad_norm": 0.302999883890152, "learning_rate": 5e-05, "loss": 1.5743, "step": 1068 }, { "epoch": 0.17106737077932468, "grad_norm": 0.29199862480163574, "learning_rate": 5e-05, "loss": 1.5264, "step": 1069 }, { "epoch": 0.17122739638342135, "grad_norm": 0.2953805923461914, "learning_rate": 5e-05, "loss": 1.5542, "step": 1070 }, { "epoch": 0.171387421987518, "grad_norm": 0.3178240656852722, "learning_rate": 5e-05, "loss": 1.6136, "step": 1071 }, { "epoch": 0.17154744759161467, "grad_norm": 0.30779746174812317, "learning_rate": 5e-05, "loss": 1.5876, "step": 1072 }, { "epoch": 0.1717074731957113, "grad_norm": 0.30030834674835205, "learning_rate": 5e-05, "loss": 1.5404, "step": 1073 }, { "epoch": 0.17186749879980798, "grad_norm": 0.2952796518802643, "learning_rate": 5e-05, "loss": 1.5993, "step": 1074 }, { "epoch": 0.17202752440390462, "grad_norm": 0.3088020086288452, "learning_rate": 5e-05, "loss": 1.6946, "step": 1075 }, { "epoch": 0.1721875500080013, "grad_norm": 0.2951776087284088, "learning_rate": 5e-05, "loss": 1.5771, "step": 1076 }, { "epoch": 0.17234757561209793, "grad_norm": 0.2999538779258728, "learning_rate": 5e-05, "loss": 1.5324, "step": 1077 }, { "epoch": 0.1725076012161946, "grad_norm": 0.30317166447639465, "learning_rate": 5e-05, "loss": 1.6192, "step": 1078 }, { "epoch": 0.17266762682029124, "grad_norm": 0.2976607382297516, "learning_rate": 5e-05, "loss": 1.5963, "step": 1079 }, { "epoch": 0.1728276524243879, "grad_norm": 0.2981988489627838, "learning_rate": 5e-05, "loss": 1.5949, "step": 1080 }, { "epoch": 0.17298767802848455, "grad_norm": 0.30605265498161316, "learning_rate": 5e-05, "loss": 1.5628, "step": 1081 }, { "epoch": 0.17314770363258122, "grad_norm": 0.3027060031890869, "learning_rate": 5e-05, "loss": 1.6181, "step": 1082 }, { "epoch": 0.17330772923667787, "grad_norm": 0.3114129602909088, "learning_rate": 5e-05, "loss": 1.5802, "step": 1083 }, { "epoch": 0.17346775484077454, "grad_norm": 0.30327197909355164, "learning_rate": 5e-05, "loss": 1.5613, "step": 1084 }, { "epoch": 0.17362778044487118, "grad_norm": 0.31283026933670044, "learning_rate": 5e-05, "loss": 1.6301, "step": 1085 }, { "epoch": 0.17378780604896785, "grad_norm": 0.2964191436767578, "learning_rate": 5e-05, "loss": 1.6228, "step": 1086 }, { "epoch": 0.1739478316530645, "grad_norm": 0.31281593441963196, "learning_rate": 5e-05, "loss": 1.5755, "step": 1087 }, { "epoch": 0.17410785725716116, "grad_norm": 0.2982264757156372, "learning_rate": 5e-05, "loss": 1.6507, "step": 1088 }, { "epoch": 0.1742678828612578, "grad_norm": 0.2937644124031067, "learning_rate": 5e-05, "loss": 1.5903, "step": 1089 }, { "epoch": 0.17442790846535447, "grad_norm": 0.2947675883769989, "learning_rate": 5e-05, "loss": 1.6266, "step": 1090 }, { "epoch": 0.1745879340694511, "grad_norm": 0.3068002462387085, "learning_rate": 5e-05, "loss": 1.7188, "step": 1091 }, { "epoch": 0.17474795967354778, "grad_norm": 0.3001297414302826, "learning_rate": 5e-05, "loss": 1.6354, "step": 1092 }, { "epoch": 0.17490798527764442, "grad_norm": 0.2978498637676239, "learning_rate": 5e-05, "loss": 1.6096, "step": 1093 }, { "epoch": 0.17506801088174107, "grad_norm": 0.2984963059425354, "learning_rate": 5e-05, "loss": 1.5409, "step": 1094 }, { "epoch": 0.17522803648583773, "grad_norm": 0.298242449760437, "learning_rate": 5e-05, "loss": 1.6172, "step": 1095 }, { "epoch": 0.17538806208993438, "grad_norm": 0.30051949620246887, "learning_rate": 5e-05, "loss": 1.594, "step": 1096 }, { "epoch": 0.17554808769403105, "grad_norm": 0.2998136579990387, "learning_rate": 5e-05, "loss": 1.6308, "step": 1097 }, { "epoch": 0.1757081132981277, "grad_norm": 0.2994934022426605, "learning_rate": 5e-05, "loss": 1.5679, "step": 1098 }, { "epoch": 0.17586813890222436, "grad_norm": 0.3056764602661133, "learning_rate": 5e-05, "loss": 1.6097, "step": 1099 }, { "epoch": 0.176028164506321, "grad_norm": 0.28988921642303467, "learning_rate": 5e-05, "loss": 1.5366, "step": 1100 }, { "epoch": 0.17618819011041767, "grad_norm": 0.2981385886669159, "learning_rate": 5e-05, "loss": 1.6523, "step": 1101 }, { "epoch": 0.1763482157145143, "grad_norm": 0.3138214349746704, "learning_rate": 5e-05, "loss": 1.6151, "step": 1102 }, { "epoch": 0.17650824131861098, "grad_norm": 0.299723356962204, "learning_rate": 5e-05, "loss": 1.5582, "step": 1103 }, { "epoch": 0.17666826692270762, "grad_norm": 0.30953729152679443, "learning_rate": 5e-05, "loss": 1.6742, "step": 1104 }, { "epoch": 0.1768282925268043, "grad_norm": 0.3005373179912567, "learning_rate": 5e-05, "loss": 1.5576, "step": 1105 }, { "epoch": 0.17698831813090093, "grad_norm": 0.2952139973640442, "learning_rate": 5e-05, "loss": 1.55, "step": 1106 }, { "epoch": 0.1771483437349976, "grad_norm": 0.298130065202713, "learning_rate": 5e-05, "loss": 1.5797, "step": 1107 }, { "epoch": 0.17730836933909425, "grad_norm": 0.2882578372955322, "learning_rate": 5e-05, "loss": 1.458, "step": 1108 }, { "epoch": 0.17746839494319092, "grad_norm": 0.28535670042037964, "learning_rate": 5e-05, "loss": 1.4825, "step": 1109 }, { "epoch": 0.17762842054728756, "grad_norm": 0.28435376286506653, "learning_rate": 5e-05, "loss": 1.566, "step": 1110 }, { "epoch": 0.17778844615138423, "grad_norm": 0.29097697138786316, "learning_rate": 5e-05, "loss": 1.5453, "step": 1111 }, { "epoch": 0.17794847175548087, "grad_norm": 0.3069499135017395, "learning_rate": 5e-05, "loss": 1.6276, "step": 1112 }, { "epoch": 0.17810849735957754, "grad_norm": 0.2860214114189148, "learning_rate": 5e-05, "loss": 1.5218, "step": 1113 }, { "epoch": 0.17826852296367418, "grad_norm": 0.2865912616252899, "learning_rate": 5e-05, "loss": 1.5424, "step": 1114 }, { "epoch": 0.17842854856777085, "grad_norm": 0.33480870723724365, "learning_rate": 5e-05, "loss": 1.6406, "step": 1115 }, { "epoch": 0.1785885741718675, "grad_norm": 0.2938961982727051, "learning_rate": 5e-05, "loss": 1.5416, "step": 1116 }, { "epoch": 0.17874859977596416, "grad_norm": 0.30156105756759644, "learning_rate": 5e-05, "loss": 1.5255, "step": 1117 }, { "epoch": 0.1789086253800608, "grad_norm": 0.3117532432079315, "learning_rate": 5e-05, "loss": 1.5786, "step": 1118 }, { "epoch": 0.17906865098415747, "grad_norm": 0.3073011338710785, "learning_rate": 5e-05, "loss": 1.6107, "step": 1119 }, { "epoch": 0.17922867658825412, "grad_norm": 0.3079906105995178, "learning_rate": 5e-05, "loss": 1.6496, "step": 1120 }, { "epoch": 0.17938870219235079, "grad_norm": 0.2923447787761688, "learning_rate": 5e-05, "loss": 1.5928, "step": 1121 }, { "epoch": 0.17954872779644743, "grad_norm": 0.2948479652404785, "learning_rate": 5e-05, "loss": 1.5839, "step": 1122 }, { "epoch": 0.1797087534005441, "grad_norm": 0.2980300188064575, "learning_rate": 5e-05, "loss": 1.5701, "step": 1123 }, { "epoch": 0.17986877900464074, "grad_norm": 0.31195637583732605, "learning_rate": 5e-05, "loss": 1.6878, "step": 1124 }, { "epoch": 0.1800288046087374, "grad_norm": 0.2911701798439026, "learning_rate": 5e-05, "loss": 1.5584, "step": 1125 }, { "epoch": 0.18018883021283405, "grad_norm": 0.2954866588115692, "learning_rate": 5e-05, "loss": 1.6066, "step": 1126 }, { "epoch": 0.18034885581693072, "grad_norm": 0.30376237630844116, "learning_rate": 5e-05, "loss": 1.5906, "step": 1127 }, { "epoch": 0.18050888142102736, "grad_norm": 0.29941076040267944, "learning_rate": 5e-05, "loss": 1.4931, "step": 1128 }, { "epoch": 0.18066890702512403, "grad_norm": 0.2894861102104187, "learning_rate": 5e-05, "loss": 1.545, "step": 1129 }, { "epoch": 0.18082893262922067, "grad_norm": 0.2940540909767151, "learning_rate": 5e-05, "loss": 1.5741, "step": 1130 }, { "epoch": 0.18098895823331734, "grad_norm": 0.2970993220806122, "learning_rate": 5e-05, "loss": 1.5673, "step": 1131 }, { "epoch": 0.18114898383741398, "grad_norm": 0.3087823688983917, "learning_rate": 5e-05, "loss": 1.641, "step": 1132 }, { "epoch": 0.18130900944151065, "grad_norm": 0.2970801591873169, "learning_rate": 5e-05, "loss": 1.5497, "step": 1133 }, { "epoch": 0.1814690350456073, "grad_norm": 0.29819682240486145, "learning_rate": 5e-05, "loss": 1.5084, "step": 1134 }, { "epoch": 0.18162906064970397, "grad_norm": 0.29993534088134766, "learning_rate": 5e-05, "loss": 1.5546, "step": 1135 }, { "epoch": 0.1817890862538006, "grad_norm": 0.2968157231807709, "learning_rate": 5e-05, "loss": 1.5732, "step": 1136 }, { "epoch": 0.18194911185789725, "grad_norm": 0.28577253222465515, "learning_rate": 5e-05, "loss": 1.5125, "step": 1137 }, { "epoch": 0.18210913746199392, "grad_norm": 0.2955007553100586, "learning_rate": 5e-05, "loss": 1.5813, "step": 1138 }, { "epoch": 0.18226916306609056, "grad_norm": 0.3063058853149414, "learning_rate": 5e-05, "loss": 1.596, "step": 1139 }, { "epoch": 0.18242918867018723, "grad_norm": 0.3027929365634918, "learning_rate": 5e-05, "loss": 1.6422, "step": 1140 }, { "epoch": 0.18258921427428387, "grad_norm": 0.3012959659099579, "learning_rate": 5e-05, "loss": 1.5283, "step": 1141 }, { "epoch": 0.18274923987838054, "grad_norm": 0.3153810501098633, "learning_rate": 5e-05, "loss": 1.6432, "step": 1142 }, { "epoch": 0.18290926548247718, "grad_norm": 0.29712262749671936, "learning_rate": 5e-05, "loss": 1.5484, "step": 1143 }, { "epoch": 0.18306929108657385, "grad_norm": 0.2904387414455414, "learning_rate": 5e-05, "loss": 1.5558, "step": 1144 }, { "epoch": 0.1832293166906705, "grad_norm": 0.3028683662414551, "learning_rate": 5e-05, "loss": 1.621, "step": 1145 }, { "epoch": 0.18338934229476717, "grad_norm": 0.31077513098716736, "learning_rate": 5e-05, "loss": 1.5512, "step": 1146 }, { "epoch": 0.1835493678988638, "grad_norm": 0.2966853976249695, "learning_rate": 5e-05, "loss": 1.6222, "step": 1147 }, { "epoch": 0.18370939350296048, "grad_norm": 0.3113940358161926, "learning_rate": 5e-05, "loss": 1.6632, "step": 1148 }, { "epoch": 0.18386941910705712, "grad_norm": 0.29160481691360474, "learning_rate": 5e-05, "loss": 1.5654, "step": 1149 }, { "epoch": 0.1840294447111538, "grad_norm": 0.28976142406463623, "learning_rate": 5e-05, "loss": 1.5867, "step": 1150 }, { "epoch": 0.18418947031525043, "grad_norm": 0.3012159466743469, "learning_rate": 5e-05, "loss": 1.5751, "step": 1151 }, { "epoch": 0.1843494959193471, "grad_norm": 0.2904215455055237, "learning_rate": 5e-05, "loss": 1.5945, "step": 1152 }, { "epoch": 0.18450952152344374, "grad_norm": 0.2922844886779785, "learning_rate": 5e-05, "loss": 1.6045, "step": 1153 }, { "epoch": 0.1846695471275404, "grad_norm": 0.3160574734210968, "learning_rate": 5e-05, "loss": 1.6015, "step": 1154 }, { "epoch": 0.18482957273163705, "grad_norm": 0.29779672622680664, "learning_rate": 5e-05, "loss": 1.5995, "step": 1155 }, { "epoch": 0.18498959833573372, "grad_norm": 0.30463308095932007, "learning_rate": 5e-05, "loss": 1.5235, "step": 1156 }, { "epoch": 0.18514962393983037, "grad_norm": 0.3039882183074951, "learning_rate": 5e-05, "loss": 1.5568, "step": 1157 }, { "epoch": 0.18530964954392704, "grad_norm": 0.30027326941490173, "learning_rate": 5e-05, "loss": 1.5124, "step": 1158 }, { "epoch": 0.18546967514802368, "grad_norm": 0.3155013620853424, "learning_rate": 5e-05, "loss": 1.5375, "step": 1159 }, { "epoch": 0.18562970075212035, "grad_norm": 0.2857028543949127, "learning_rate": 5e-05, "loss": 1.5091, "step": 1160 }, { "epoch": 0.185789726356217, "grad_norm": 0.3376767933368683, "learning_rate": 5e-05, "loss": 1.6188, "step": 1161 }, { "epoch": 0.18594975196031366, "grad_norm": 0.29391995072364807, "learning_rate": 5e-05, "loss": 1.6002, "step": 1162 }, { "epoch": 0.1861097775644103, "grad_norm": 0.3060718774795532, "learning_rate": 5e-05, "loss": 1.6175, "step": 1163 }, { "epoch": 0.18626980316850697, "grad_norm": 0.2930746078491211, "learning_rate": 5e-05, "loss": 1.4879, "step": 1164 }, { "epoch": 0.1864298287726036, "grad_norm": 0.2839565575122833, "learning_rate": 5e-05, "loss": 1.5693, "step": 1165 }, { "epoch": 0.18658985437670028, "grad_norm": 0.3042803406715393, "learning_rate": 5e-05, "loss": 1.6255, "step": 1166 }, { "epoch": 0.18674987998079692, "grad_norm": 0.3133370578289032, "learning_rate": 5e-05, "loss": 1.6463, "step": 1167 }, { "epoch": 0.1869099055848936, "grad_norm": 0.31637677550315857, "learning_rate": 5e-05, "loss": 1.5516, "step": 1168 }, { "epoch": 0.18706993118899023, "grad_norm": 0.2995264232158661, "learning_rate": 5e-05, "loss": 1.6353, "step": 1169 }, { "epoch": 0.1872299567930869, "grad_norm": 0.3154732286930084, "learning_rate": 5e-05, "loss": 1.5332, "step": 1170 }, { "epoch": 0.18738998239718355, "grad_norm": 0.3089159429073334, "learning_rate": 5e-05, "loss": 1.5942, "step": 1171 }, { "epoch": 0.18755000800128022, "grad_norm": 0.2980691194534302, "learning_rate": 5e-05, "loss": 1.6543, "step": 1172 }, { "epoch": 0.18771003360537686, "grad_norm": 0.32349273562431335, "learning_rate": 5e-05, "loss": 1.6502, "step": 1173 }, { "epoch": 0.18787005920947353, "grad_norm": 0.32439109683036804, "learning_rate": 5e-05, "loss": 1.6284, "step": 1174 }, { "epoch": 0.18803008481357017, "grad_norm": 0.3012238144874573, "learning_rate": 5e-05, "loss": 1.6256, "step": 1175 }, { "epoch": 0.18819011041766684, "grad_norm": 0.3167943060398102, "learning_rate": 5e-05, "loss": 1.5462, "step": 1176 }, { "epoch": 0.18835013602176348, "grad_norm": 0.32704421877861023, "learning_rate": 5e-05, "loss": 1.6194, "step": 1177 }, { "epoch": 0.18851016162586015, "grad_norm": 0.2914808392524719, "learning_rate": 5e-05, "loss": 1.5619, "step": 1178 }, { "epoch": 0.1886701872299568, "grad_norm": 0.34439119696617126, "learning_rate": 5e-05, "loss": 1.4933, "step": 1179 }, { "epoch": 0.18883021283405343, "grad_norm": 0.31080159544944763, "learning_rate": 5e-05, "loss": 1.566, "step": 1180 }, { "epoch": 0.1889902384381501, "grad_norm": 0.3068443834781647, "learning_rate": 5e-05, "loss": 1.5417, "step": 1181 }, { "epoch": 0.18915026404224675, "grad_norm": 0.32386428117752075, "learning_rate": 5e-05, "loss": 1.539, "step": 1182 }, { "epoch": 0.18931028964634342, "grad_norm": 0.2970333397388458, "learning_rate": 5e-05, "loss": 1.5869, "step": 1183 }, { "epoch": 0.18947031525044006, "grad_norm": 0.31644365191459656, "learning_rate": 5e-05, "loss": 1.5409, "step": 1184 }, { "epoch": 0.18963034085453673, "grad_norm": 0.30193033814430237, "learning_rate": 5e-05, "loss": 1.6494, "step": 1185 }, { "epoch": 0.18979036645863337, "grad_norm": 0.2950356602668762, "learning_rate": 5e-05, "loss": 1.5592, "step": 1186 }, { "epoch": 0.18995039206273004, "grad_norm": 0.32257238030433655, "learning_rate": 5e-05, "loss": 1.6081, "step": 1187 }, { "epoch": 0.19011041766682668, "grad_norm": 0.28882643580436707, "learning_rate": 5e-05, "loss": 1.5148, "step": 1188 }, { "epoch": 0.19027044327092335, "grad_norm": 0.2864942252635956, "learning_rate": 5e-05, "loss": 1.5465, "step": 1189 }, { "epoch": 0.19043046887502, "grad_norm": 0.3054693043231964, "learning_rate": 5e-05, "loss": 1.4739, "step": 1190 }, { "epoch": 0.19059049447911666, "grad_norm": 0.2944018542766571, "learning_rate": 5e-05, "loss": 1.5521, "step": 1191 }, { "epoch": 0.1907505200832133, "grad_norm": 0.2880883514881134, "learning_rate": 5e-05, "loss": 1.5137, "step": 1192 }, { "epoch": 0.19091054568730997, "grad_norm": 0.32736432552337646, "learning_rate": 5e-05, "loss": 1.6219, "step": 1193 }, { "epoch": 0.19107057129140662, "grad_norm": 0.2905898988246918, "learning_rate": 5e-05, "loss": 1.5651, "step": 1194 }, { "epoch": 0.19123059689550329, "grad_norm": 0.29816877841949463, "learning_rate": 5e-05, "loss": 1.6157, "step": 1195 }, { "epoch": 0.19139062249959993, "grad_norm": 0.3119134306907654, "learning_rate": 5e-05, "loss": 1.6024, "step": 1196 }, { "epoch": 0.1915506481036966, "grad_norm": 0.2953163683414459, "learning_rate": 5e-05, "loss": 1.5806, "step": 1197 }, { "epoch": 0.19171067370779324, "grad_norm": 0.3047119677066803, "learning_rate": 5e-05, "loss": 1.6347, "step": 1198 }, { "epoch": 0.1918706993118899, "grad_norm": 0.29928988218307495, "learning_rate": 5e-05, "loss": 1.6246, "step": 1199 }, { "epoch": 0.19203072491598655, "grad_norm": 0.3007999658584595, "learning_rate": 5e-05, "loss": 1.6184, "step": 1200 }, { "epoch": 0.19219075052008322, "grad_norm": 0.30223548412323, "learning_rate": 5e-05, "loss": 1.6144, "step": 1201 }, { "epoch": 0.19235077612417986, "grad_norm": 0.3031928241252899, "learning_rate": 5e-05, "loss": 1.6127, "step": 1202 }, { "epoch": 0.19251080172827653, "grad_norm": 0.31080520153045654, "learning_rate": 5e-05, "loss": 1.6113, "step": 1203 }, { "epoch": 0.19267082733237317, "grad_norm": 0.29017946124076843, "learning_rate": 5e-05, "loss": 1.5382, "step": 1204 }, { "epoch": 0.19283085293646984, "grad_norm": 0.2907261252403259, "learning_rate": 5e-05, "loss": 1.5288, "step": 1205 }, { "epoch": 0.19299087854056649, "grad_norm": 0.2962374985218048, "learning_rate": 5e-05, "loss": 1.5932, "step": 1206 }, { "epoch": 0.19315090414466315, "grad_norm": 0.2992134690284729, "learning_rate": 5e-05, "loss": 1.5453, "step": 1207 }, { "epoch": 0.1933109297487598, "grad_norm": 0.3066418468952179, "learning_rate": 5e-05, "loss": 1.5717, "step": 1208 }, { "epoch": 0.19347095535285647, "grad_norm": 0.28798234462738037, "learning_rate": 5e-05, "loss": 1.5099, "step": 1209 }, { "epoch": 0.1936309809569531, "grad_norm": 0.3023149371147156, "learning_rate": 5e-05, "loss": 1.5996, "step": 1210 }, { "epoch": 0.19379100656104978, "grad_norm": 0.30185413360595703, "learning_rate": 5e-05, "loss": 1.5259, "step": 1211 }, { "epoch": 0.19395103216514642, "grad_norm": 0.2976469099521637, "learning_rate": 5e-05, "loss": 1.5886, "step": 1212 }, { "epoch": 0.1941110577692431, "grad_norm": 0.2882872223854065, "learning_rate": 5e-05, "loss": 1.5261, "step": 1213 }, { "epoch": 0.19427108337333973, "grad_norm": 0.3022424280643463, "learning_rate": 5e-05, "loss": 1.6237, "step": 1214 }, { "epoch": 0.1944311089774364, "grad_norm": 0.29402363300323486, "learning_rate": 5e-05, "loss": 1.6009, "step": 1215 }, { "epoch": 0.19459113458153304, "grad_norm": 0.29722121357917786, "learning_rate": 5e-05, "loss": 1.567, "step": 1216 }, { "epoch": 0.1947511601856297, "grad_norm": 0.29514080286026, "learning_rate": 5e-05, "loss": 1.5616, "step": 1217 }, { "epoch": 0.19491118578972635, "grad_norm": 0.2880895435810089, "learning_rate": 5e-05, "loss": 1.5374, "step": 1218 }, { "epoch": 0.19507121139382302, "grad_norm": 0.3024745583534241, "learning_rate": 5e-05, "loss": 1.5261, "step": 1219 }, { "epoch": 0.19523123699791967, "grad_norm": 0.29425063729286194, "learning_rate": 5e-05, "loss": 1.5062, "step": 1220 }, { "epoch": 0.19539126260201634, "grad_norm": 0.29768994450569153, "learning_rate": 5e-05, "loss": 1.5684, "step": 1221 }, { "epoch": 0.19555128820611298, "grad_norm": 0.30393245816230774, "learning_rate": 5e-05, "loss": 1.5385, "step": 1222 }, { "epoch": 0.19571131381020965, "grad_norm": 0.29537031054496765, "learning_rate": 5e-05, "loss": 1.5621, "step": 1223 }, { "epoch": 0.1958713394143063, "grad_norm": 0.304607629776001, "learning_rate": 5e-05, "loss": 1.5475, "step": 1224 }, { "epoch": 0.19603136501840293, "grad_norm": 0.2982870936393738, "learning_rate": 5e-05, "loss": 1.5777, "step": 1225 }, { "epoch": 0.1961913906224996, "grad_norm": 0.3117285966873169, "learning_rate": 5e-05, "loss": 1.6576, "step": 1226 }, { "epoch": 0.19635141622659624, "grad_norm": 0.2975005805492401, "learning_rate": 5e-05, "loss": 1.5356, "step": 1227 }, { "epoch": 0.1965114418306929, "grad_norm": 0.29676786065101624, "learning_rate": 5e-05, "loss": 1.6165, "step": 1228 }, { "epoch": 0.19667146743478955, "grad_norm": 0.3009687066078186, "learning_rate": 5e-05, "loss": 1.5001, "step": 1229 }, { "epoch": 0.19683149303888622, "grad_norm": 0.29598820209503174, "learning_rate": 5e-05, "loss": 1.5176, "step": 1230 }, { "epoch": 0.19699151864298287, "grad_norm": 0.30506402254104614, "learning_rate": 5e-05, "loss": 1.6997, "step": 1231 }, { "epoch": 0.19715154424707954, "grad_norm": 0.301980584859848, "learning_rate": 5e-05, "loss": 1.4953, "step": 1232 }, { "epoch": 0.19731156985117618, "grad_norm": 0.2963476777076721, "learning_rate": 5e-05, "loss": 1.5949, "step": 1233 }, { "epoch": 0.19747159545527285, "grad_norm": 0.32059457898139954, "learning_rate": 5e-05, "loss": 1.6266, "step": 1234 }, { "epoch": 0.1976316210593695, "grad_norm": 0.2837987244129181, "learning_rate": 5e-05, "loss": 1.5036, "step": 1235 }, { "epoch": 0.19779164666346616, "grad_norm": 0.3018515706062317, "learning_rate": 5e-05, "loss": 1.6067, "step": 1236 }, { "epoch": 0.1979516722675628, "grad_norm": 0.29596585035324097, "learning_rate": 5e-05, "loss": 1.547, "step": 1237 }, { "epoch": 0.19811169787165947, "grad_norm": 0.29295432567596436, "learning_rate": 5e-05, "loss": 1.5644, "step": 1238 }, { "epoch": 0.1982717234757561, "grad_norm": 0.30167484283447266, "learning_rate": 5e-05, "loss": 1.5748, "step": 1239 }, { "epoch": 0.19843174907985278, "grad_norm": 0.2953035533428192, "learning_rate": 5e-05, "loss": 1.6217, "step": 1240 }, { "epoch": 0.19859177468394942, "grad_norm": 0.29786416888237, "learning_rate": 5e-05, "loss": 1.5595, "step": 1241 }, { "epoch": 0.1987518002880461, "grad_norm": 0.2976440191268921, "learning_rate": 5e-05, "loss": 1.5498, "step": 1242 }, { "epoch": 0.19891182589214274, "grad_norm": 0.2952960729598999, "learning_rate": 5e-05, "loss": 1.5651, "step": 1243 }, { "epoch": 0.1990718514962394, "grad_norm": 0.2933177053928375, "learning_rate": 5e-05, "loss": 1.5556, "step": 1244 }, { "epoch": 0.19923187710033605, "grad_norm": 0.2831862270832062, "learning_rate": 5e-05, "loss": 1.5144, "step": 1245 }, { "epoch": 0.19939190270443272, "grad_norm": 0.29591530561447144, "learning_rate": 5e-05, "loss": 1.6066, "step": 1246 }, { "epoch": 0.19955192830852936, "grad_norm": 0.29409804940223694, "learning_rate": 5e-05, "loss": 1.5897, "step": 1247 }, { "epoch": 0.19971195391262603, "grad_norm": 0.2888944745063782, "learning_rate": 5e-05, "loss": 1.4995, "step": 1248 }, { "epoch": 0.19987197951672267, "grad_norm": 0.321322500705719, "learning_rate": 5e-05, "loss": 1.6369, "step": 1249 }, { "epoch": 0.20003200512081934, "grad_norm": 0.3115699887275696, "learning_rate": 5e-05, "loss": 1.5889, "step": 1250 }, { "epoch": 0.20019203072491598, "grad_norm": 0.2856740951538086, "learning_rate": 5e-05, "loss": 1.485, "step": 1251 }, { "epoch": 0.20035205632901265, "grad_norm": 0.2998482584953308, "learning_rate": 5e-05, "loss": 1.6188, "step": 1252 }, { "epoch": 0.2005120819331093, "grad_norm": 0.2985326647758484, "learning_rate": 5e-05, "loss": 1.6292, "step": 1253 }, { "epoch": 0.20067210753720596, "grad_norm": 0.3067973256111145, "learning_rate": 5e-05, "loss": 1.6271, "step": 1254 }, { "epoch": 0.2008321331413026, "grad_norm": 0.31100475788116455, "learning_rate": 5e-05, "loss": 1.6344, "step": 1255 }, { "epoch": 0.20099215874539927, "grad_norm": 0.3043568730354309, "learning_rate": 5e-05, "loss": 1.5548, "step": 1256 }, { "epoch": 0.20115218434949592, "grad_norm": 0.30596473813056946, "learning_rate": 5e-05, "loss": 1.6711, "step": 1257 }, { "epoch": 0.20131220995359259, "grad_norm": 0.29619261622428894, "learning_rate": 5e-05, "loss": 1.6122, "step": 1258 }, { "epoch": 0.20147223555768923, "grad_norm": 0.3001739978790283, "learning_rate": 5e-05, "loss": 1.6687, "step": 1259 }, { "epoch": 0.2016322611617859, "grad_norm": 0.29693329334259033, "learning_rate": 5e-05, "loss": 1.5294, "step": 1260 }, { "epoch": 0.20179228676588254, "grad_norm": 0.29976528882980347, "learning_rate": 5e-05, "loss": 1.614, "step": 1261 }, { "epoch": 0.2019523123699792, "grad_norm": 0.29993847012519836, "learning_rate": 5e-05, "loss": 1.629, "step": 1262 }, { "epoch": 0.20211233797407585, "grad_norm": 0.29361671209335327, "learning_rate": 5e-05, "loss": 1.5861, "step": 1263 }, { "epoch": 0.20227236357817252, "grad_norm": 0.3242625594139099, "learning_rate": 5e-05, "loss": 1.6728, "step": 1264 }, { "epoch": 0.20243238918226916, "grad_norm": 0.29489022493362427, "learning_rate": 5e-05, "loss": 1.517, "step": 1265 }, { "epoch": 0.20259241478636583, "grad_norm": 0.3074529469013214, "learning_rate": 5e-05, "loss": 1.5063, "step": 1266 }, { "epoch": 0.20275244039046247, "grad_norm": 0.2943486273288727, "learning_rate": 5e-05, "loss": 1.5326, "step": 1267 }, { "epoch": 0.20291246599455912, "grad_norm": 0.29383543133735657, "learning_rate": 5e-05, "loss": 1.5568, "step": 1268 }, { "epoch": 0.20307249159865579, "grad_norm": 0.3208611309528351, "learning_rate": 5e-05, "loss": 1.5386, "step": 1269 }, { "epoch": 0.20323251720275243, "grad_norm": 0.29640820622444153, "learning_rate": 5e-05, "loss": 1.6038, "step": 1270 }, { "epoch": 0.2033925428068491, "grad_norm": 0.2931867837905884, "learning_rate": 5e-05, "loss": 1.5149, "step": 1271 }, { "epoch": 0.20355256841094574, "grad_norm": 0.30305272340774536, "learning_rate": 5e-05, "loss": 1.5676, "step": 1272 }, { "epoch": 0.2037125940150424, "grad_norm": 0.2854253053665161, "learning_rate": 5e-05, "loss": 1.5002, "step": 1273 }, { "epoch": 0.20387261961913905, "grad_norm": 0.2918570935726166, "learning_rate": 5e-05, "loss": 1.5666, "step": 1274 }, { "epoch": 0.20403264522323572, "grad_norm": 0.2955687940120697, "learning_rate": 5e-05, "loss": 1.5419, "step": 1275 }, { "epoch": 0.20419267082733236, "grad_norm": 0.2989639937877655, "learning_rate": 5e-05, "loss": 1.6422, "step": 1276 }, { "epoch": 0.20435269643142903, "grad_norm": 0.2988419532775879, "learning_rate": 5e-05, "loss": 1.5549, "step": 1277 }, { "epoch": 0.20451272203552567, "grad_norm": 0.300787091255188, "learning_rate": 5e-05, "loss": 1.562, "step": 1278 }, { "epoch": 0.20467274763962234, "grad_norm": 0.30282825231552124, "learning_rate": 5e-05, "loss": 1.5503, "step": 1279 }, { "epoch": 0.20483277324371899, "grad_norm": 0.2986745238304138, "learning_rate": 5e-05, "loss": 1.5267, "step": 1280 }, { "epoch": 0.20499279884781565, "grad_norm": 0.3240320682525635, "learning_rate": 5e-05, "loss": 1.5868, "step": 1281 }, { "epoch": 0.2051528244519123, "grad_norm": 0.29566481709480286, "learning_rate": 5e-05, "loss": 1.5989, "step": 1282 }, { "epoch": 0.20531285005600897, "grad_norm": 0.33579903841018677, "learning_rate": 5e-05, "loss": 1.5734, "step": 1283 }, { "epoch": 0.2054728756601056, "grad_norm": 0.31056761741638184, "learning_rate": 5e-05, "loss": 1.5788, "step": 1284 }, { "epoch": 0.20563290126420228, "grad_norm": 0.31065860390663147, "learning_rate": 5e-05, "loss": 1.5377, "step": 1285 }, { "epoch": 0.20579292686829892, "grad_norm": 0.3068505525588989, "learning_rate": 5e-05, "loss": 1.5747, "step": 1286 }, { "epoch": 0.2059529524723956, "grad_norm": 0.30178138613700867, "learning_rate": 5e-05, "loss": 1.5661, "step": 1287 }, { "epoch": 0.20611297807649223, "grad_norm": 0.32617101073265076, "learning_rate": 5e-05, "loss": 1.623, "step": 1288 }, { "epoch": 0.2062730036805889, "grad_norm": 0.32754620909690857, "learning_rate": 5e-05, "loss": 1.6296, "step": 1289 }, { "epoch": 0.20643302928468554, "grad_norm": 0.33476248383522034, "learning_rate": 5e-05, "loss": 1.6237, "step": 1290 }, { "epoch": 0.2065930548887822, "grad_norm": 0.31779924035072327, "learning_rate": 5e-05, "loss": 1.4491, "step": 1291 }, { "epoch": 0.20675308049287885, "grad_norm": 0.2881567180156708, "learning_rate": 5e-05, "loss": 1.5806, "step": 1292 }, { "epoch": 0.20691310609697552, "grad_norm": 0.3138653039932251, "learning_rate": 5e-05, "loss": 1.519, "step": 1293 }, { "epoch": 0.20707313170107217, "grad_norm": 0.3145190179347992, "learning_rate": 5e-05, "loss": 1.5628, "step": 1294 }, { "epoch": 0.20723315730516884, "grad_norm": 0.2971087694168091, "learning_rate": 5e-05, "loss": 1.5497, "step": 1295 }, { "epoch": 0.20739318290926548, "grad_norm": 0.31070786714553833, "learning_rate": 5e-05, "loss": 1.4976, "step": 1296 }, { "epoch": 0.20755320851336215, "grad_norm": 0.30396556854248047, "learning_rate": 5e-05, "loss": 1.5667, "step": 1297 }, { "epoch": 0.2077132341174588, "grad_norm": 0.29824355244636536, "learning_rate": 5e-05, "loss": 1.5679, "step": 1298 }, { "epoch": 0.20787325972155546, "grad_norm": 0.3129658102989197, "learning_rate": 5e-05, "loss": 1.6235, "step": 1299 }, { "epoch": 0.2080332853256521, "grad_norm": 0.29002708196640015, "learning_rate": 5e-05, "loss": 1.4776, "step": 1300 }, { "epoch": 0.20819331092974877, "grad_norm": 0.2909204065799713, "learning_rate": 5e-05, "loss": 1.5523, "step": 1301 }, { "epoch": 0.2083533365338454, "grad_norm": 0.29671943187713623, "learning_rate": 5e-05, "loss": 1.5475, "step": 1302 }, { "epoch": 0.20851336213794208, "grad_norm": 0.2957269847393036, "learning_rate": 5e-05, "loss": 1.4926, "step": 1303 }, { "epoch": 0.20867338774203872, "grad_norm": 0.2849496304988861, "learning_rate": 5e-05, "loss": 1.543, "step": 1304 }, { "epoch": 0.2088334133461354, "grad_norm": 0.29064252972602844, "learning_rate": 5e-05, "loss": 1.5583, "step": 1305 }, { "epoch": 0.20899343895023204, "grad_norm": 0.3113333284854889, "learning_rate": 5e-05, "loss": 1.6151, "step": 1306 }, { "epoch": 0.2091534645543287, "grad_norm": 0.30049052834510803, "learning_rate": 5e-05, "loss": 1.639, "step": 1307 }, { "epoch": 0.20931349015842535, "grad_norm": 0.320516437292099, "learning_rate": 5e-05, "loss": 1.6296, "step": 1308 }, { "epoch": 0.20947351576252202, "grad_norm": 0.29562416672706604, "learning_rate": 5e-05, "loss": 1.5642, "step": 1309 }, { "epoch": 0.20963354136661866, "grad_norm": 0.2853359282016754, "learning_rate": 5e-05, "loss": 1.4848, "step": 1310 }, { "epoch": 0.2097935669707153, "grad_norm": 0.29760631918907166, "learning_rate": 5e-05, "loss": 1.5299, "step": 1311 }, { "epoch": 0.20995359257481197, "grad_norm": 0.301455557346344, "learning_rate": 5e-05, "loss": 1.5605, "step": 1312 }, { "epoch": 0.2101136181789086, "grad_norm": 0.29930737614631653, "learning_rate": 5e-05, "loss": 1.6242, "step": 1313 }, { "epoch": 0.21027364378300528, "grad_norm": 0.3080472946166992, "learning_rate": 5e-05, "loss": 1.5315, "step": 1314 }, { "epoch": 0.21043366938710192, "grad_norm": 0.29600974917411804, "learning_rate": 5e-05, "loss": 1.5328, "step": 1315 }, { "epoch": 0.2105936949911986, "grad_norm": 0.3060094118118286, "learning_rate": 5e-05, "loss": 1.5213, "step": 1316 }, { "epoch": 0.21075372059529524, "grad_norm": 0.3008503019809723, "learning_rate": 5e-05, "loss": 1.5926, "step": 1317 }, { "epoch": 0.2109137461993919, "grad_norm": 0.29357415437698364, "learning_rate": 5e-05, "loss": 1.5476, "step": 1318 }, { "epoch": 0.21107377180348855, "grad_norm": 0.30868977308273315, "learning_rate": 5e-05, "loss": 1.5572, "step": 1319 }, { "epoch": 0.21123379740758522, "grad_norm": 0.2932281196117401, "learning_rate": 5e-05, "loss": 1.5218, "step": 1320 }, { "epoch": 0.21139382301168186, "grad_norm": 0.29214203357696533, "learning_rate": 5e-05, "loss": 1.4833, "step": 1321 }, { "epoch": 0.21155384861577853, "grad_norm": 0.30604392290115356, "learning_rate": 5e-05, "loss": 1.5458, "step": 1322 }, { "epoch": 0.21171387421987517, "grad_norm": 0.29203537106513977, "learning_rate": 5e-05, "loss": 1.5519, "step": 1323 }, { "epoch": 0.21187389982397184, "grad_norm": 0.29807090759277344, "learning_rate": 5e-05, "loss": 1.6158, "step": 1324 }, { "epoch": 0.21203392542806848, "grad_norm": 0.30180245637893677, "learning_rate": 5e-05, "loss": 1.5244, "step": 1325 }, { "epoch": 0.21219395103216515, "grad_norm": 0.30048221349716187, "learning_rate": 5e-05, "loss": 1.5608, "step": 1326 }, { "epoch": 0.2123539766362618, "grad_norm": 0.30392372608184814, "learning_rate": 5e-05, "loss": 1.6643, "step": 1327 }, { "epoch": 0.21251400224035846, "grad_norm": 0.30164769291877747, "learning_rate": 5e-05, "loss": 1.5933, "step": 1328 }, { "epoch": 0.2126740278444551, "grad_norm": 0.30103734135627747, "learning_rate": 5e-05, "loss": 1.603, "step": 1329 }, { "epoch": 0.21283405344855177, "grad_norm": 0.31148555874824524, "learning_rate": 5e-05, "loss": 1.6387, "step": 1330 }, { "epoch": 0.21299407905264842, "grad_norm": 0.30127984285354614, "learning_rate": 5e-05, "loss": 1.5962, "step": 1331 }, { "epoch": 0.21315410465674509, "grad_norm": 0.28900760412216187, "learning_rate": 5e-05, "loss": 1.493, "step": 1332 }, { "epoch": 0.21331413026084173, "grad_norm": 0.30539894104003906, "learning_rate": 5e-05, "loss": 1.6006, "step": 1333 }, { "epoch": 0.2134741558649384, "grad_norm": 0.29735109210014343, "learning_rate": 5e-05, "loss": 1.5638, "step": 1334 }, { "epoch": 0.21363418146903504, "grad_norm": 0.2975774109363556, "learning_rate": 5e-05, "loss": 1.5823, "step": 1335 }, { "epoch": 0.2137942070731317, "grad_norm": 0.28984421491622925, "learning_rate": 5e-05, "loss": 1.5222, "step": 1336 }, { "epoch": 0.21395423267722835, "grad_norm": 0.3133431077003479, "learning_rate": 5e-05, "loss": 1.6109, "step": 1337 }, { "epoch": 0.21411425828132502, "grad_norm": 0.2977990508079529, "learning_rate": 5e-05, "loss": 1.5672, "step": 1338 }, { "epoch": 0.21427428388542166, "grad_norm": 0.30226385593414307, "learning_rate": 5e-05, "loss": 1.645, "step": 1339 }, { "epoch": 0.21443430948951833, "grad_norm": 0.3038788437843323, "learning_rate": 5e-05, "loss": 1.5885, "step": 1340 }, { "epoch": 0.21459433509361497, "grad_norm": 0.2975189983844757, "learning_rate": 5e-05, "loss": 1.5623, "step": 1341 }, { "epoch": 0.21475436069771164, "grad_norm": 0.28685811161994934, "learning_rate": 5e-05, "loss": 1.4904, "step": 1342 }, { "epoch": 0.21491438630180829, "grad_norm": 0.30303385853767395, "learning_rate": 5e-05, "loss": 1.529, "step": 1343 }, { "epoch": 0.21507441190590496, "grad_norm": 0.2993103861808777, "learning_rate": 5e-05, "loss": 1.5224, "step": 1344 }, { "epoch": 0.2152344375100016, "grad_norm": 0.28797754645347595, "learning_rate": 5e-05, "loss": 1.486, "step": 1345 }, { "epoch": 0.21539446311409827, "grad_norm": 0.310903400182724, "learning_rate": 5e-05, "loss": 1.6154, "step": 1346 }, { "epoch": 0.2155544887181949, "grad_norm": 0.3204765319824219, "learning_rate": 5e-05, "loss": 1.5536, "step": 1347 }, { "epoch": 0.21571451432229158, "grad_norm": 0.30022215843200684, "learning_rate": 5e-05, "loss": 1.5327, "step": 1348 }, { "epoch": 0.21587453992638822, "grad_norm": 0.30144253373146057, "learning_rate": 5e-05, "loss": 1.4762, "step": 1349 }, { "epoch": 0.2160345655304849, "grad_norm": 0.29885414242744446, "learning_rate": 5e-05, "loss": 1.5741, "step": 1350 }, { "epoch": 0.21619459113458153, "grad_norm": 0.30484768748283386, "learning_rate": 5e-05, "loss": 1.5908, "step": 1351 }, { "epoch": 0.2163546167386782, "grad_norm": 0.32925936579704285, "learning_rate": 5e-05, "loss": 1.5483, "step": 1352 }, { "epoch": 0.21651464234277484, "grad_norm": 0.27648186683654785, "learning_rate": 5e-05, "loss": 1.4943, "step": 1353 }, { "epoch": 0.2166746679468715, "grad_norm": 0.30007287859916687, "learning_rate": 5e-05, "loss": 1.5243, "step": 1354 }, { "epoch": 0.21683469355096816, "grad_norm": 0.3179704248905182, "learning_rate": 5e-05, "loss": 1.6292, "step": 1355 }, { "epoch": 0.2169947191550648, "grad_norm": 0.2966255843639374, "learning_rate": 5e-05, "loss": 1.5417, "step": 1356 }, { "epoch": 0.21715474475916147, "grad_norm": 0.31061851978302, "learning_rate": 5e-05, "loss": 1.5661, "step": 1357 }, { "epoch": 0.2173147703632581, "grad_norm": 0.3256315588951111, "learning_rate": 5e-05, "loss": 1.5625, "step": 1358 }, { "epoch": 0.21747479596735478, "grad_norm": 0.29561862349510193, "learning_rate": 5e-05, "loss": 1.5525, "step": 1359 }, { "epoch": 0.21763482157145142, "grad_norm": 0.3228697180747986, "learning_rate": 5e-05, "loss": 1.5928, "step": 1360 }, { "epoch": 0.2177948471755481, "grad_norm": 0.29971837997436523, "learning_rate": 5e-05, "loss": 1.5549, "step": 1361 }, { "epoch": 0.21795487277964473, "grad_norm": 0.3038996160030365, "learning_rate": 5e-05, "loss": 1.5705, "step": 1362 }, { "epoch": 0.2181148983837414, "grad_norm": 0.30714812874794006, "learning_rate": 5e-05, "loss": 1.5664, "step": 1363 }, { "epoch": 0.21827492398783804, "grad_norm": 0.3047005236148834, "learning_rate": 5e-05, "loss": 1.6392, "step": 1364 }, { "epoch": 0.2184349495919347, "grad_norm": 0.28832703828811646, "learning_rate": 5e-05, "loss": 1.515, "step": 1365 }, { "epoch": 0.21859497519603135, "grad_norm": 0.31584230065345764, "learning_rate": 5e-05, "loss": 1.623, "step": 1366 }, { "epoch": 0.21875500080012802, "grad_norm": 0.30012354254722595, "learning_rate": 5e-05, "loss": 1.5162, "step": 1367 }, { "epoch": 0.21891502640422467, "grad_norm": 0.28000783920288086, "learning_rate": 5e-05, "loss": 1.4278, "step": 1368 }, { "epoch": 0.21907505200832134, "grad_norm": 0.30415216088294983, "learning_rate": 5e-05, "loss": 1.5863, "step": 1369 }, { "epoch": 0.21923507761241798, "grad_norm": 0.2943955659866333, "learning_rate": 5e-05, "loss": 1.5068, "step": 1370 }, { "epoch": 0.21939510321651465, "grad_norm": 0.3011736273765564, "learning_rate": 5e-05, "loss": 1.5973, "step": 1371 }, { "epoch": 0.2195551288206113, "grad_norm": 0.2956729233264923, "learning_rate": 5e-05, "loss": 1.4621, "step": 1372 }, { "epoch": 0.21971515442470796, "grad_norm": 0.311553955078125, "learning_rate": 5e-05, "loss": 1.5922, "step": 1373 }, { "epoch": 0.2198751800288046, "grad_norm": 0.306313693523407, "learning_rate": 5e-05, "loss": 1.5435, "step": 1374 }, { "epoch": 0.22003520563290127, "grad_norm": 0.3385516405105591, "learning_rate": 5e-05, "loss": 1.6349, "step": 1375 }, { "epoch": 0.2201952312369979, "grad_norm": 0.31191331148147583, "learning_rate": 5e-05, "loss": 1.6169, "step": 1376 }, { "epoch": 0.22035525684109458, "grad_norm": 0.302223265171051, "learning_rate": 5e-05, "loss": 1.5578, "step": 1377 }, { "epoch": 0.22051528244519122, "grad_norm": 0.3055708110332489, "learning_rate": 5e-05, "loss": 1.58, "step": 1378 }, { "epoch": 0.2206753080492879, "grad_norm": 0.3000025451183319, "learning_rate": 5e-05, "loss": 1.5567, "step": 1379 }, { "epoch": 0.22083533365338454, "grad_norm": 0.3075072467327118, "learning_rate": 5e-05, "loss": 1.6075, "step": 1380 }, { "epoch": 0.2209953592574812, "grad_norm": 0.3068839907646179, "learning_rate": 5e-05, "loss": 1.5755, "step": 1381 }, { "epoch": 0.22115538486157785, "grad_norm": 0.29431480169296265, "learning_rate": 5e-05, "loss": 1.5836, "step": 1382 }, { "epoch": 0.22131541046567452, "grad_norm": 0.28961440920829773, "learning_rate": 5e-05, "loss": 1.4792, "step": 1383 }, { "epoch": 0.22147543606977116, "grad_norm": 0.29808947443962097, "learning_rate": 5e-05, "loss": 1.6109, "step": 1384 }, { "epoch": 0.22163546167386783, "grad_norm": 0.293643981218338, "learning_rate": 5e-05, "loss": 1.5229, "step": 1385 }, { "epoch": 0.22179548727796447, "grad_norm": 0.3002239167690277, "learning_rate": 5e-05, "loss": 1.6201, "step": 1386 }, { "epoch": 0.22195551288206114, "grad_norm": 0.2977261245250702, "learning_rate": 5e-05, "loss": 1.6104, "step": 1387 }, { "epoch": 0.22211553848615778, "grad_norm": 0.29939955472946167, "learning_rate": 5e-05, "loss": 1.587, "step": 1388 }, { "epoch": 0.22227556409025445, "grad_norm": 0.29114970564842224, "learning_rate": 5e-05, "loss": 1.6003, "step": 1389 }, { "epoch": 0.2224355896943511, "grad_norm": 0.3065704107284546, "learning_rate": 5e-05, "loss": 1.6846, "step": 1390 }, { "epoch": 0.22259561529844776, "grad_norm": 0.29792240262031555, "learning_rate": 5e-05, "loss": 1.6477, "step": 1391 }, { "epoch": 0.2227556409025444, "grad_norm": 0.29480570554733276, "learning_rate": 5e-05, "loss": 1.601, "step": 1392 }, { "epoch": 0.22291566650664107, "grad_norm": 0.2946320176124573, "learning_rate": 5e-05, "loss": 1.5724, "step": 1393 }, { "epoch": 0.22307569211073772, "grad_norm": 0.3028450906276703, "learning_rate": 5e-05, "loss": 1.5386, "step": 1394 }, { "epoch": 0.2232357177148344, "grad_norm": 0.2929145097732544, "learning_rate": 5e-05, "loss": 1.5006, "step": 1395 }, { "epoch": 0.22339574331893103, "grad_norm": 0.31036311388015747, "learning_rate": 5e-05, "loss": 1.673, "step": 1396 }, { "epoch": 0.2235557689230277, "grad_norm": 0.2999400198459625, "learning_rate": 5e-05, "loss": 1.5386, "step": 1397 }, { "epoch": 0.22371579452712434, "grad_norm": 0.33066654205322266, "learning_rate": 5e-05, "loss": 1.5824, "step": 1398 }, { "epoch": 0.22387582013122098, "grad_norm": 0.30478712916374207, "learning_rate": 5e-05, "loss": 1.6035, "step": 1399 }, { "epoch": 0.22403584573531765, "grad_norm": 0.32496532797813416, "learning_rate": 5e-05, "loss": 1.571, "step": 1400 }, { "epoch": 0.2241958713394143, "grad_norm": 0.3022894859313965, "learning_rate": 5e-05, "loss": 1.6185, "step": 1401 }, { "epoch": 0.22435589694351096, "grad_norm": 0.3188036382198334, "learning_rate": 5e-05, "loss": 1.6444, "step": 1402 }, { "epoch": 0.2245159225476076, "grad_norm": 0.29860231280326843, "learning_rate": 5e-05, "loss": 1.5615, "step": 1403 }, { "epoch": 0.22467594815170427, "grad_norm": 0.2902033030986786, "learning_rate": 5e-05, "loss": 1.5805, "step": 1404 }, { "epoch": 0.22483597375580092, "grad_norm": 0.3210452198982239, "learning_rate": 5e-05, "loss": 1.5842, "step": 1405 }, { "epoch": 0.22499599935989759, "grad_norm": 0.310249924659729, "learning_rate": 5e-05, "loss": 1.6345, "step": 1406 }, { "epoch": 0.22515602496399423, "grad_norm": 0.3051820695400238, "learning_rate": 5e-05, "loss": 1.5182, "step": 1407 }, { "epoch": 0.2253160505680909, "grad_norm": 0.31045296788215637, "learning_rate": 5e-05, "loss": 1.575, "step": 1408 }, { "epoch": 0.22547607617218754, "grad_norm": 0.30092862248420715, "learning_rate": 5e-05, "loss": 1.6148, "step": 1409 }, { "epoch": 0.2256361017762842, "grad_norm": 0.3162960410118103, "learning_rate": 5e-05, "loss": 1.5141, "step": 1410 }, { "epoch": 0.22579612738038085, "grad_norm": 0.2952965795993805, "learning_rate": 5e-05, "loss": 1.5295, "step": 1411 }, { "epoch": 0.22595615298447752, "grad_norm": 0.3131481409072876, "learning_rate": 5e-05, "loss": 1.5995, "step": 1412 }, { "epoch": 0.22611617858857416, "grad_norm": 0.3006008267402649, "learning_rate": 5e-05, "loss": 1.5714, "step": 1413 }, { "epoch": 0.22627620419267083, "grad_norm": 0.29855453968048096, "learning_rate": 5e-05, "loss": 1.5224, "step": 1414 }, { "epoch": 0.22643622979676747, "grad_norm": 0.33085474371910095, "learning_rate": 5e-05, "loss": 1.6274, "step": 1415 }, { "epoch": 0.22659625540086414, "grad_norm": 0.3013611435890198, "learning_rate": 5e-05, "loss": 1.6339, "step": 1416 }, { "epoch": 0.22675628100496079, "grad_norm": 0.3059428632259369, "learning_rate": 5e-05, "loss": 1.5557, "step": 1417 }, { "epoch": 0.22691630660905746, "grad_norm": 0.31104522943496704, "learning_rate": 5e-05, "loss": 1.5923, "step": 1418 }, { "epoch": 0.2270763322131541, "grad_norm": 0.29741406440734863, "learning_rate": 5e-05, "loss": 1.5732, "step": 1419 }, { "epoch": 0.22723635781725077, "grad_norm": 0.3036835789680481, "learning_rate": 5e-05, "loss": 1.5972, "step": 1420 }, { "epoch": 0.2273963834213474, "grad_norm": 0.2870164215564728, "learning_rate": 5e-05, "loss": 1.5281, "step": 1421 }, { "epoch": 0.22755640902544408, "grad_norm": 0.3081519901752472, "learning_rate": 5e-05, "loss": 1.5842, "step": 1422 }, { "epoch": 0.22771643462954072, "grad_norm": 0.300648957490921, "learning_rate": 5e-05, "loss": 1.578, "step": 1423 }, { "epoch": 0.2278764602336374, "grad_norm": 0.29954373836517334, "learning_rate": 5e-05, "loss": 1.5073, "step": 1424 }, { "epoch": 0.22803648583773403, "grad_norm": 0.29284656047821045, "learning_rate": 5e-05, "loss": 1.488, "step": 1425 }, { "epoch": 0.2281965114418307, "grad_norm": 0.2950717508792877, "learning_rate": 5e-05, "loss": 1.5286, "step": 1426 }, { "epoch": 0.22835653704592734, "grad_norm": 0.2975453734397888, "learning_rate": 5e-05, "loss": 1.5674, "step": 1427 }, { "epoch": 0.228516562650024, "grad_norm": 0.3063991367816925, "learning_rate": 5e-05, "loss": 1.5161, "step": 1428 }, { "epoch": 0.22867658825412066, "grad_norm": 0.30574050545692444, "learning_rate": 5e-05, "loss": 1.5628, "step": 1429 }, { "epoch": 0.22883661385821732, "grad_norm": 0.3024362623691559, "learning_rate": 5e-05, "loss": 1.5718, "step": 1430 }, { "epoch": 0.22899663946231397, "grad_norm": 0.2965330481529236, "learning_rate": 5e-05, "loss": 1.5372, "step": 1431 }, { "epoch": 0.22915666506641064, "grad_norm": 0.2996644079685211, "learning_rate": 5e-05, "loss": 1.6401, "step": 1432 }, { "epoch": 0.22931669067050728, "grad_norm": 0.31137412786483765, "learning_rate": 5e-05, "loss": 1.5821, "step": 1433 }, { "epoch": 0.22947671627460395, "grad_norm": 0.29568761587142944, "learning_rate": 5e-05, "loss": 1.5311, "step": 1434 }, { "epoch": 0.2296367418787006, "grad_norm": 0.3068240284919739, "learning_rate": 5e-05, "loss": 1.5815, "step": 1435 }, { "epoch": 0.22979676748279726, "grad_norm": 0.29314476251602173, "learning_rate": 5e-05, "loss": 1.6038, "step": 1436 }, { "epoch": 0.2299567930868939, "grad_norm": 0.304151326417923, "learning_rate": 5e-05, "loss": 1.6303, "step": 1437 }, { "epoch": 0.23011681869099057, "grad_norm": 0.30328768491744995, "learning_rate": 5e-05, "loss": 1.6103, "step": 1438 }, { "epoch": 0.2302768442950872, "grad_norm": 0.32788631319999695, "learning_rate": 5e-05, "loss": 1.6045, "step": 1439 }, { "epoch": 0.23043686989918388, "grad_norm": 0.3019898533821106, "learning_rate": 5e-05, "loss": 1.5226, "step": 1440 }, { "epoch": 0.23059689550328052, "grad_norm": 0.2989579141139984, "learning_rate": 5e-05, "loss": 1.5575, "step": 1441 }, { "epoch": 0.2307569211073772, "grad_norm": 0.3013453781604767, "learning_rate": 5e-05, "loss": 1.4923, "step": 1442 }, { "epoch": 0.23091694671147384, "grad_norm": 0.3042789697647095, "learning_rate": 5e-05, "loss": 1.6035, "step": 1443 }, { "epoch": 0.23107697231557048, "grad_norm": 0.30362647771835327, "learning_rate": 5e-05, "loss": 1.6102, "step": 1444 }, { "epoch": 0.23123699791966715, "grad_norm": 0.306547075510025, "learning_rate": 5e-05, "loss": 1.5961, "step": 1445 }, { "epoch": 0.2313970235237638, "grad_norm": 0.3162720501422882, "learning_rate": 5e-05, "loss": 1.6078, "step": 1446 }, { "epoch": 0.23155704912786046, "grad_norm": 0.294113427400589, "learning_rate": 5e-05, "loss": 1.5288, "step": 1447 }, { "epoch": 0.2317170747319571, "grad_norm": 0.2963263690471649, "learning_rate": 5e-05, "loss": 1.5036, "step": 1448 }, { "epoch": 0.23187710033605377, "grad_norm": 0.29369068145751953, "learning_rate": 5e-05, "loss": 1.5096, "step": 1449 }, { "epoch": 0.2320371259401504, "grad_norm": 0.28956955671310425, "learning_rate": 5e-05, "loss": 1.4919, "step": 1450 }, { "epoch": 0.23219715154424708, "grad_norm": 0.3000887930393219, "learning_rate": 5e-05, "loss": 1.5031, "step": 1451 }, { "epoch": 0.23235717714834372, "grad_norm": 0.3063856065273285, "learning_rate": 5e-05, "loss": 1.5895, "step": 1452 }, { "epoch": 0.2325172027524404, "grad_norm": 0.297762393951416, "learning_rate": 5e-05, "loss": 1.5569, "step": 1453 }, { "epoch": 0.23267722835653704, "grad_norm": 0.31455403566360474, "learning_rate": 5e-05, "loss": 1.5674, "step": 1454 }, { "epoch": 0.2328372539606337, "grad_norm": 0.30671030282974243, "learning_rate": 5e-05, "loss": 1.5883, "step": 1455 }, { "epoch": 0.23299727956473035, "grad_norm": 0.30811765789985657, "learning_rate": 5e-05, "loss": 1.5942, "step": 1456 }, { "epoch": 0.23315730516882702, "grad_norm": 0.29651880264282227, "learning_rate": 5e-05, "loss": 1.5106, "step": 1457 }, { "epoch": 0.23331733077292366, "grad_norm": 0.30946725606918335, "learning_rate": 5e-05, "loss": 1.5591, "step": 1458 }, { "epoch": 0.23347735637702033, "grad_norm": 0.3187018930912018, "learning_rate": 5e-05, "loss": 1.6615, "step": 1459 }, { "epoch": 0.23363738198111697, "grad_norm": 0.3002423644065857, "learning_rate": 5e-05, "loss": 1.5603, "step": 1460 }, { "epoch": 0.23379740758521364, "grad_norm": 0.29936403036117554, "learning_rate": 5e-05, "loss": 1.5643, "step": 1461 }, { "epoch": 0.23395743318931028, "grad_norm": 0.28847193717956543, "learning_rate": 5e-05, "loss": 1.5507, "step": 1462 }, { "epoch": 0.23411745879340695, "grad_norm": 0.3033243417739868, "learning_rate": 5e-05, "loss": 1.6154, "step": 1463 }, { "epoch": 0.2342774843975036, "grad_norm": 0.3074936866760254, "learning_rate": 5e-05, "loss": 1.5911, "step": 1464 }, { "epoch": 0.23443751000160026, "grad_norm": 0.30404433608055115, "learning_rate": 5e-05, "loss": 1.6057, "step": 1465 }, { "epoch": 0.2345975356056969, "grad_norm": 0.30692410469055176, "learning_rate": 5e-05, "loss": 1.5879, "step": 1466 }, { "epoch": 0.23475756120979357, "grad_norm": 0.29965680837631226, "learning_rate": 5e-05, "loss": 1.6268, "step": 1467 }, { "epoch": 0.23491758681389022, "grad_norm": 0.3024367094039917, "learning_rate": 5e-05, "loss": 1.588, "step": 1468 }, { "epoch": 0.2350776124179869, "grad_norm": 0.3017753064632416, "learning_rate": 5e-05, "loss": 1.5932, "step": 1469 }, { "epoch": 0.23523763802208353, "grad_norm": 0.28892576694488525, "learning_rate": 5e-05, "loss": 1.5177, "step": 1470 }, { "epoch": 0.2353976636261802, "grad_norm": 0.30469927191734314, "learning_rate": 5e-05, "loss": 1.6232, "step": 1471 }, { "epoch": 0.23555768923027684, "grad_norm": 0.3137419521808624, "learning_rate": 5e-05, "loss": 1.6679, "step": 1472 }, { "epoch": 0.2357177148343735, "grad_norm": 0.30301788449287415, "learning_rate": 5e-05, "loss": 1.5232, "step": 1473 }, { "epoch": 0.23587774043847015, "grad_norm": 0.30490273237228394, "learning_rate": 5e-05, "loss": 1.5274, "step": 1474 }, { "epoch": 0.23603776604256682, "grad_norm": 0.2978001832962036, "learning_rate": 5e-05, "loss": 1.595, "step": 1475 }, { "epoch": 0.23619779164666346, "grad_norm": 0.30811017751693726, "learning_rate": 5e-05, "loss": 1.6111, "step": 1476 }, { "epoch": 0.23635781725076013, "grad_norm": 0.30926039814949036, "learning_rate": 5e-05, "loss": 1.5864, "step": 1477 }, { "epoch": 0.23651784285485677, "grad_norm": 0.30314144492149353, "learning_rate": 5e-05, "loss": 1.6066, "step": 1478 }, { "epoch": 0.23667786845895344, "grad_norm": 0.29017603397369385, "learning_rate": 5e-05, "loss": 1.538, "step": 1479 }, { "epoch": 0.2368378940630501, "grad_norm": 0.3013891577720642, "learning_rate": 5e-05, "loss": 1.5562, "step": 1480 }, { "epoch": 0.23699791966714676, "grad_norm": 0.32689568400382996, "learning_rate": 5e-05, "loss": 1.501, "step": 1481 }, { "epoch": 0.2371579452712434, "grad_norm": 0.3047487735748291, "learning_rate": 5e-05, "loss": 1.588, "step": 1482 }, { "epoch": 0.23731797087534007, "grad_norm": 0.3027651309967041, "learning_rate": 5e-05, "loss": 1.5847, "step": 1483 }, { "epoch": 0.2374779964794367, "grad_norm": 0.2879652678966522, "learning_rate": 5e-05, "loss": 1.4674, "step": 1484 }, { "epoch": 0.23763802208353338, "grad_norm": 0.29947197437286377, "learning_rate": 5e-05, "loss": 1.5547, "step": 1485 }, { "epoch": 0.23779804768763002, "grad_norm": 0.297069787979126, "learning_rate": 5e-05, "loss": 1.6099, "step": 1486 }, { "epoch": 0.23795807329172666, "grad_norm": 0.28699880838394165, "learning_rate": 5e-05, "loss": 1.565, "step": 1487 }, { "epoch": 0.23811809889582333, "grad_norm": 0.30674871802330017, "learning_rate": 5e-05, "loss": 1.6017, "step": 1488 }, { "epoch": 0.23827812449991997, "grad_norm": 0.2934042811393738, "learning_rate": 5e-05, "loss": 1.5839, "step": 1489 }, { "epoch": 0.23843815010401664, "grad_norm": 0.2947152256965637, "learning_rate": 5e-05, "loss": 1.4993, "step": 1490 }, { "epoch": 0.23859817570811329, "grad_norm": 0.28676971793174744, "learning_rate": 5e-05, "loss": 1.5737, "step": 1491 }, { "epoch": 0.23875820131220996, "grad_norm": 0.30298617482185364, "learning_rate": 5e-05, "loss": 1.5825, "step": 1492 }, { "epoch": 0.2389182269163066, "grad_norm": 0.30327603220939636, "learning_rate": 5e-05, "loss": 1.5571, "step": 1493 }, { "epoch": 0.23907825252040327, "grad_norm": 0.29310286045074463, "learning_rate": 5e-05, "loss": 1.5534, "step": 1494 }, { "epoch": 0.2392382781244999, "grad_norm": 0.30473411083221436, "learning_rate": 5e-05, "loss": 1.5759, "step": 1495 }, { "epoch": 0.23939830372859658, "grad_norm": 0.30647024512290955, "learning_rate": 5e-05, "loss": 1.5558, "step": 1496 }, { "epoch": 0.23955832933269322, "grad_norm": 0.29942235350608826, "learning_rate": 5e-05, "loss": 1.5134, "step": 1497 }, { "epoch": 0.2397183549367899, "grad_norm": 0.30347704887390137, "learning_rate": 5e-05, "loss": 1.5468, "step": 1498 }, { "epoch": 0.23987838054088653, "grad_norm": 0.29638412594795227, "learning_rate": 5e-05, "loss": 1.5299, "step": 1499 }, { "epoch": 0.2400384061449832, "grad_norm": 0.31247520446777344, "learning_rate": 5e-05, "loss": 1.582, "step": 1500 }, { "epoch": 0.24019843174907984, "grad_norm": 0.31760865449905396, "learning_rate": 5e-05, "loss": 1.5633, "step": 1501 }, { "epoch": 0.2403584573531765, "grad_norm": 0.3131832480430603, "learning_rate": 5e-05, "loss": 1.5915, "step": 1502 }, { "epoch": 0.24051848295727316, "grad_norm": 0.3031702935695648, "learning_rate": 5e-05, "loss": 1.5677, "step": 1503 }, { "epoch": 0.24067850856136982, "grad_norm": 0.30864161252975464, "learning_rate": 5e-05, "loss": 1.6383, "step": 1504 }, { "epoch": 0.24083853416546647, "grad_norm": 0.30577728152275085, "learning_rate": 5e-05, "loss": 1.5788, "step": 1505 }, { "epoch": 0.24099855976956314, "grad_norm": 0.31231361627578735, "learning_rate": 5e-05, "loss": 1.5592, "step": 1506 }, { "epoch": 0.24115858537365978, "grad_norm": 0.3181326985359192, "learning_rate": 5e-05, "loss": 1.609, "step": 1507 }, { "epoch": 0.24131861097775645, "grad_norm": 0.2925111651420593, "learning_rate": 5e-05, "loss": 1.459, "step": 1508 }, { "epoch": 0.2414786365818531, "grad_norm": 0.30936625599861145, "learning_rate": 5e-05, "loss": 1.5605, "step": 1509 }, { "epoch": 0.24163866218594976, "grad_norm": 0.3103073239326477, "learning_rate": 5e-05, "loss": 1.6425, "step": 1510 }, { "epoch": 0.2417986877900464, "grad_norm": 0.29026201367378235, "learning_rate": 5e-05, "loss": 1.5399, "step": 1511 }, { "epoch": 0.24195871339414307, "grad_norm": 0.3180742561817169, "learning_rate": 5e-05, "loss": 1.5508, "step": 1512 }, { "epoch": 0.2421187389982397, "grad_norm": 0.2955387532711029, "learning_rate": 5e-05, "loss": 1.6109, "step": 1513 }, { "epoch": 0.24227876460233638, "grad_norm": 0.31199026107788086, "learning_rate": 5e-05, "loss": 1.5981, "step": 1514 }, { "epoch": 0.24243879020643302, "grad_norm": 0.29972174763679504, "learning_rate": 5e-05, "loss": 1.5452, "step": 1515 }, { "epoch": 0.2425988158105297, "grad_norm": 0.29521873593330383, "learning_rate": 5e-05, "loss": 1.6017, "step": 1516 }, { "epoch": 0.24275884141462634, "grad_norm": 0.29942452907562256, "learning_rate": 5e-05, "loss": 1.5876, "step": 1517 }, { "epoch": 0.242918867018723, "grad_norm": 0.2926692068576813, "learning_rate": 5e-05, "loss": 1.516, "step": 1518 }, { "epoch": 0.24307889262281965, "grad_norm": 0.29538047313690186, "learning_rate": 5e-05, "loss": 1.5269, "step": 1519 }, { "epoch": 0.24323891822691632, "grad_norm": 0.3015592694282532, "learning_rate": 5e-05, "loss": 1.4757, "step": 1520 }, { "epoch": 0.24339894383101296, "grad_norm": 0.3004153072834015, "learning_rate": 5e-05, "loss": 1.6356, "step": 1521 }, { "epoch": 0.24355896943510963, "grad_norm": 0.311160683631897, "learning_rate": 5e-05, "loss": 1.586, "step": 1522 }, { "epoch": 0.24371899503920627, "grad_norm": 0.31041479110717773, "learning_rate": 5e-05, "loss": 1.6095, "step": 1523 }, { "epoch": 0.24387902064330294, "grad_norm": 0.3020724654197693, "learning_rate": 5e-05, "loss": 1.5814, "step": 1524 }, { "epoch": 0.24403904624739958, "grad_norm": 0.29845038056373596, "learning_rate": 5e-05, "loss": 1.5596, "step": 1525 }, { "epoch": 0.24419907185149625, "grad_norm": 0.30452394485473633, "learning_rate": 5e-05, "loss": 1.5564, "step": 1526 }, { "epoch": 0.2443590974555929, "grad_norm": 0.29602178931236267, "learning_rate": 5e-05, "loss": 1.5607, "step": 1527 }, { "epoch": 0.24451912305968956, "grad_norm": 0.298834890127182, "learning_rate": 5e-05, "loss": 1.5634, "step": 1528 }, { "epoch": 0.2446791486637862, "grad_norm": 0.3143480122089386, "learning_rate": 5e-05, "loss": 1.5542, "step": 1529 }, { "epoch": 0.24483917426788285, "grad_norm": 0.29382622241973877, "learning_rate": 5e-05, "loss": 1.4842, "step": 1530 }, { "epoch": 0.24499919987197952, "grad_norm": 0.30634576082229614, "learning_rate": 5e-05, "loss": 1.5165, "step": 1531 }, { "epoch": 0.24515922547607616, "grad_norm": 0.3074406385421753, "learning_rate": 5e-05, "loss": 1.4937, "step": 1532 }, { "epoch": 0.24531925108017283, "grad_norm": 0.3054557740688324, "learning_rate": 5e-05, "loss": 1.6077, "step": 1533 }, { "epoch": 0.24547927668426947, "grad_norm": 0.32040590047836304, "learning_rate": 5e-05, "loss": 1.6154, "step": 1534 }, { "epoch": 0.24563930228836614, "grad_norm": 0.3008425533771515, "learning_rate": 5e-05, "loss": 1.5561, "step": 1535 }, { "epoch": 0.24579932789246278, "grad_norm": 0.3052842319011688, "learning_rate": 5e-05, "loss": 1.4961, "step": 1536 }, { "epoch": 0.24595935349655945, "grad_norm": 0.29577067494392395, "learning_rate": 5e-05, "loss": 1.5174, "step": 1537 }, { "epoch": 0.2461193791006561, "grad_norm": 0.3046630322933197, "learning_rate": 5e-05, "loss": 1.5389, "step": 1538 }, { "epoch": 0.24627940470475276, "grad_norm": 0.3009239435195923, "learning_rate": 5e-05, "loss": 1.5274, "step": 1539 }, { "epoch": 0.2464394303088494, "grad_norm": 0.30920150876045227, "learning_rate": 5e-05, "loss": 1.6345, "step": 1540 }, { "epoch": 0.24659945591294608, "grad_norm": 0.3013177514076233, "learning_rate": 5e-05, "loss": 1.5566, "step": 1541 }, { "epoch": 0.24675948151704272, "grad_norm": 0.3012579679489136, "learning_rate": 5e-05, "loss": 1.5821, "step": 1542 }, { "epoch": 0.2469195071211394, "grad_norm": 0.30040761828422546, "learning_rate": 5e-05, "loss": 1.5889, "step": 1543 }, { "epoch": 0.24707953272523603, "grad_norm": 0.3017534911632538, "learning_rate": 5e-05, "loss": 1.5684, "step": 1544 }, { "epoch": 0.2472395583293327, "grad_norm": 0.3079117238521576, "learning_rate": 5e-05, "loss": 1.6272, "step": 1545 }, { "epoch": 0.24739958393342934, "grad_norm": 0.3110067546367645, "learning_rate": 5e-05, "loss": 1.5662, "step": 1546 }, { "epoch": 0.247559609537526, "grad_norm": 0.30174338817596436, "learning_rate": 5e-05, "loss": 1.5347, "step": 1547 }, { "epoch": 0.24771963514162265, "grad_norm": 0.30210280418395996, "learning_rate": 5e-05, "loss": 1.6103, "step": 1548 }, { "epoch": 0.24787966074571932, "grad_norm": 0.29327860474586487, "learning_rate": 5e-05, "loss": 1.4963, "step": 1549 }, { "epoch": 0.24803968634981596, "grad_norm": 0.3039913773536682, "learning_rate": 5e-05, "loss": 1.5568, "step": 1550 }, { "epoch": 0.24819971195391263, "grad_norm": 0.3023831844329834, "learning_rate": 5e-05, "loss": 1.6183, "step": 1551 }, { "epoch": 0.24835973755800927, "grad_norm": 0.2893945872783661, "learning_rate": 5e-05, "loss": 1.4925, "step": 1552 }, { "epoch": 0.24851976316210594, "grad_norm": 0.2977551221847534, "learning_rate": 5e-05, "loss": 1.5374, "step": 1553 }, { "epoch": 0.2486797887662026, "grad_norm": 0.29477477073669434, "learning_rate": 5e-05, "loss": 1.5348, "step": 1554 }, { "epoch": 0.24883981437029926, "grad_norm": 0.3067284822463989, "learning_rate": 5e-05, "loss": 1.5109, "step": 1555 }, { "epoch": 0.2489998399743959, "grad_norm": 0.2934640645980835, "learning_rate": 5e-05, "loss": 1.6137, "step": 1556 }, { "epoch": 0.24915986557849257, "grad_norm": 0.29530173540115356, "learning_rate": 5e-05, "loss": 1.507, "step": 1557 }, { "epoch": 0.2493198911825892, "grad_norm": 0.2946379780769348, "learning_rate": 5e-05, "loss": 1.4936, "step": 1558 }, { "epoch": 0.24947991678668588, "grad_norm": 0.30363598465919495, "learning_rate": 5e-05, "loss": 1.5337, "step": 1559 }, { "epoch": 0.24963994239078252, "grad_norm": 0.2943868041038513, "learning_rate": 5e-05, "loss": 1.4753, "step": 1560 }, { "epoch": 0.2497999679948792, "grad_norm": 0.29767051339149475, "learning_rate": 5e-05, "loss": 1.5338, "step": 1561 }, { "epoch": 0.24995999359897583, "grad_norm": 0.28518304228782654, "learning_rate": 5e-05, "loss": 1.5165, "step": 1562 }, { "epoch": 0.2501200192030725, "grad_norm": 0.3036326766014099, "learning_rate": 5e-05, "loss": 1.4986, "step": 1563 }, { "epoch": 0.25028004480716914, "grad_norm": 0.31450164318084717, "learning_rate": 5e-05, "loss": 1.6287, "step": 1564 }, { "epoch": 0.2504400704112658, "grad_norm": 0.2983885705471039, "learning_rate": 5e-05, "loss": 1.4221, "step": 1565 }, { "epoch": 0.2506000960153625, "grad_norm": 0.3055410385131836, "learning_rate": 5e-05, "loss": 1.5325, "step": 1566 }, { "epoch": 0.2507601216194591, "grad_norm": 0.30306753516197205, "learning_rate": 5e-05, "loss": 1.5454, "step": 1567 }, { "epoch": 0.25092014722355577, "grad_norm": 0.30742359161376953, "learning_rate": 5e-05, "loss": 1.5413, "step": 1568 }, { "epoch": 0.25108017282765244, "grad_norm": 0.31537798047065735, "learning_rate": 5e-05, "loss": 1.4615, "step": 1569 }, { "epoch": 0.2512401984317491, "grad_norm": 0.3375808894634247, "learning_rate": 5e-05, "loss": 1.5175, "step": 1570 }, { "epoch": 0.2514002240358457, "grad_norm": 0.29661935567855835, "learning_rate": 5e-05, "loss": 1.4748, "step": 1571 }, { "epoch": 0.2515602496399424, "grad_norm": 0.3437850773334503, "learning_rate": 5e-05, "loss": 1.5471, "step": 1572 }, { "epoch": 0.25172027524403906, "grad_norm": 0.30951762199401855, "learning_rate": 5e-05, "loss": 1.6157, "step": 1573 }, { "epoch": 0.2518803008481357, "grad_norm": 0.3174629509449005, "learning_rate": 5e-05, "loss": 1.5474, "step": 1574 }, { "epoch": 0.25204032645223234, "grad_norm": 0.3354226350784302, "learning_rate": 5e-05, "loss": 1.5643, "step": 1575 }, { "epoch": 0.252200352056329, "grad_norm": 0.29176488518714905, "learning_rate": 5e-05, "loss": 1.4735, "step": 1576 }, { "epoch": 0.2523603776604257, "grad_norm": 0.298865407705307, "learning_rate": 5e-05, "loss": 1.541, "step": 1577 }, { "epoch": 0.2525204032645223, "grad_norm": 0.3111476004123688, "learning_rate": 5e-05, "loss": 1.555, "step": 1578 }, { "epoch": 0.25268042886861897, "grad_norm": 0.305764377117157, "learning_rate": 5e-05, "loss": 1.623, "step": 1579 }, { "epoch": 0.25284045447271564, "grad_norm": 0.3039332330226898, "learning_rate": 5e-05, "loss": 1.6233, "step": 1580 }, { "epoch": 0.2530004800768123, "grad_norm": 0.2960338592529297, "learning_rate": 5e-05, "loss": 1.5759, "step": 1581 }, { "epoch": 0.2531605056809089, "grad_norm": 0.30645254254341125, "learning_rate": 5e-05, "loss": 1.5967, "step": 1582 }, { "epoch": 0.2533205312850056, "grad_norm": 0.29562586545944214, "learning_rate": 5e-05, "loss": 1.5213, "step": 1583 }, { "epoch": 0.25348055688910226, "grad_norm": 0.2997507154941559, "learning_rate": 5e-05, "loss": 1.55, "step": 1584 }, { "epoch": 0.25364058249319893, "grad_norm": 0.2952207624912262, "learning_rate": 5e-05, "loss": 1.523, "step": 1585 }, { "epoch": 0.25380060809729554, "grad_norm": 0.2937643229961395, "learning_rate": 5e-05, "loss": 1.5785, "step": 1586 }, { "epoch": 0.2539606337013922, "grad_norm": 0.3020572364330292, "learning_rate": 5e-05, "loss": 1.6338, "step": 1587 }, { "epoch": 0.2541206593054889, "grad_norm": 0.30620771646499634, "learning_rate": 5e-05, "loss": 1.5542, "step": 1588 }, { "epoch": 0.25428068490958555, "grad_norm": 0.30165940523147583, "learning_rate": 5e-05, "loss": 1.5781, "step": 1589 }, { "epoch": 0.25444071051368217, "grad_norm": 0.3078227639198303, "learning_rate": 5e-05, "loss": 1.506, "step": 1590 }, { "epoch": 0.25460073611777884, "grad_norm": 0.2867743968963623, "learning_rate": 5e-05, "loss": 1.5608, "step": 1591 }, { "epoch": 0.2547607617218755, "grad_norm": 0.2983227074146271, "learning_rate": 5e-05, "loss": 1.4348, "step": 1592 }, { "epoch": 0.2549207873259722, "grad_norm": 0.2999253273010254, "learning_rate": 5e-05, "loss": 1.4901, "step": 1593 }, { "epoch": 0.2550808129300688, "grad_norm": 0.2969807982444763, "learning_rate": 5e-05, "loss": 1.5241, "step": 1594 }, { "epoch": 0.25524083853416546, "grad_norm": 0.2995159924030304, "learning_rate": 5e-05, "loss": 1.5034, "step": 1595 }, { "epoch": 0.25540086413826213, "grad_norm": 0.2930397689342499, "learning_rate": 5e-05, "loss": 1.5051, "step": 1596 }, { "epoch": 0.2555608897423588, "grad_norm": 0.30661141872406006, "learning_rate": 5e-05, "loss": 1.5681, "step": 1597 }, { "epoch": 0.2557209153464554, "grad_norm": 0.30392569303512573, "learning_rate": 5e-05, "loss": 1.6379, "step": 1598 }, { "epoch": 0.2558809409505521, "grad_norm": 0.3029404580593109, "learning_rate": 5e-05, "loss": 1.5109, "step": 1599 }, { "epoch": 0.25604096655464875, "grad_norm": 0.3079161047935486, "learning_rate": 5e-05, "loss": 1.5755, "step": 1600 }, { "epoch": 0.2562009921587454, "grad_norm": 0.2990780770778656, "learning_rate": 5e-05, "loss": 1.5526, "step": 1601 }, { "epoch": 0.25636101776284204, "grad_norm": 0.298225075006485, "learning_rate": 5e-05, "loss": 1.5841, "step": 1602 }, { "epoch": 0.2565210433669387, "grad_norm": 0.3007179796695709, "learning_rate": 5e-05, "loss": 1.5241, "step": 1603 }, { "epoch": 0.2566810689710354, "grad_norm": 0.31191161274909973, "learning_rate": 5e-05, "loss": 1.6392, "step": 1604 }, { "epoch": 0.25684109457513205, "grad_norm": 0.29776501655578613, "learning_rate": 5e-05, "loss": 1.4846, "step": 1605 }, { "epoch": 0.25700112017922866, "grad_norm": 0.31003159284591675, "learning_rate": 5e-05, "loss": 1.5127, "step": 1606 }, { "epoch": 0.25716114578332533, "grad_norm": 0.31376418471336365, "learning_rate": 5e-05, "loss": 1.6114, "step": 1607 }, { "epoch": 0.257321171387422, "grad_norm": 0.3014332056045532, "learning_rate": 5e-05, "loss": 1.5727, "step": 1608 }, { "epoch": 0.25748119699151867, "grad_norm": 0.30526530742645264, "learning_rate": 5e-05, "loss": 1.558, "step": 1609 }, { "epoch": 0.2576412225956153, "grad_norm": 0.30233103036880493, "learning_rate": 5e-05, "loss": 1.5556, "step": 1610 }, { "epoch": 0.25780124819971195, "grad_norm": 0.29824742674827576, "learning_rate": 5e-05, "loss": 1.5519, "step": 1611 }, { "epoch": 0.2579612738038086, "grad_norm": 0.299706369638443, "learning_rate": 5e-05, "loss": 1.6343, "step": 1612 }, { "epoch": 0.2581212994079053, "grad_norm": 0.30547165870666504, "learning_rate": 5e-05, "loss": 1.5544, "step": 1613 }, { "epoch": 0.2582813250120019, "grad_norm": 0.31102168560028076, "learning_rate": 5e-05, "loss": 1.6353, "step": 1614 }, { "epoch": 0.2584413506160986, "grad_norm": 0.29281726479530334, "learning_rate": 5e-05, "loss": 1.4646, "step": 1615 }, { "epoch": 0.25860137622019524, "grad_norm": 0.30995628237724304, "learning_rate": 5e-05, "loss": 1.6133, "step": 1616 }, { "epoch": 0.25876140182429186, "grad_norm": 0.2974042296409607, "learning_rate": 5e-05, "loss": 1.4928, "step": 1617 }, { "epoch": 0.25892142742838853, "grad_norm": 0.2950752079486847, "learning_rate": 5e-05, "loss": 1.5357, "step": 1618 }, { "epoch": 0.2590814530324852, "grad_norm": 0.30785810947418213, "learning_rate": 5e-05, "loss": 1.6054, "step": 1619 }, { "epoch": 0.25924147863658187, "grad_norm": 0.2991347908973694, "learning_rate": 5e-05, "loss": 1.5211, "step": 1620 }, { "epoch": 0.2594015042406785, "grad_norm": 0.317604660987854, "learning_rate": 5e-05, "loss": 1.6159, "step": 1621 }, { "epoch": 0.25956152984477515, "grad_norm": 0.3074653744697571, "learning_rate": 5e-05, "loss": 1.5776, "step": 1622 }, { "epoch": 0.2597215554488718, "grad_norm": 0.3057837188243866, "learning_rate": 5e-05, "loss": 1.5499, "step": 1623 }, { "epoch": 0.2598815810529685, "grad_norm": 0.3110387325286865, "learning_rate": 5e-05, "loss": 1.5099, "step": 1624 }, { "epoch": 0.2600416066570651, "grad_norm": 0.30642229318618774, "learning_rate": 5e-05, "loss": 1.6133, "step": 1625 }, { "epoch": 0.2602016322611618, "grad_norm": 0.3067636489868164, "learning_rate": 5e-05, "loss": 1.5813, "step": 1626 }, { "epoch": 0.26036165786525844, "grad_norm": 0.3098999559879303, "learning_rate": 5e-05, "loss": 1.6307, "step": 1627 }, { "epoch": 0.2605216834693551, "grad_norm": 0.29907241463661194, "learning_rate": 5e-05, "loss": 1.5406, "step": 1628 }, { "epoch": 0.26068170907345173, "grad_norm": 0.3015841245651245, "learning_rate": 5e-05, "loss": 1.5224, "step": 1629 }, { "epoch": 0.2608417346775484, "grad_norm": 0.29414910078048706, "learning_rate": 5e-05, "loss": 1.5904, "step": 1630 }, { "epoch": 0.26100176028164507, "grad_norm": 0.31289151310920715, "learning_rate": 5e-05, "loss": 1.5292, "step": 1631 }, { "epoch": 0.26116178588574174, "grad_norm": 0.2958604395389557, "learning_rate": 5e-05, "loss": 1.5045, "step": 1632 }, { "epoch": 0.26132181148983835, "grad_norm": 0.30103185772895813, "learning_rate": 5e-05, "loss": 1.5676, "step": 1633 }, { "epoch": 0.261481837093935, "grad_norm": 0.2904116213321686, "learning_rate": 5e-05, "loss": 1.4921, "step": 1634 }, { "epoch": 0.2616418626980317, "grad_norm": 0.2973178029060364, "learning_rate": 5e-05, "loss": 1.5261, "step": 1635 }, { "epoch": 0.26180188830212836, "grad_norm": 0.29228851199150085, "learning_rate": 5e-05, "loss": 1.5681, "step": 1636 }, { "epoch": 0.261961913906225, "grad_norm": 0.3037562966346741, "learning_rate": 5e-05, "loss": 1.5632, "step": 1637 }, { "epoch": 0.26212193951032164, "grad_norm": 0.2995026707649231, "learning_rate": 5e-05, "loss": 1.5078, "step": 1638 }, { "epoch": 0.2622819651144183, "grad_norm": 0.3081519603729248, "learning_rate": 5e-05, "loss": 1.5756, "step": 1639 }, { "epoch": 0.262441990718515, "grad_norm": 0.32055437564849854, "learning_rate": 5e-05, "loss": 1.5627, "step": 1640 }, { "epoch": 0.2626020163226116, "grad_norm": 0.2999257743358612, "learning_rate": 5e-05, "loss": 1.5659, "step": 1641 }, { "epoch": 0.26276204192670827, "grad_norm": 0.29763633012771606, "learning_rate": 5e-05, "loss": 1.5065, "step": 1642 }, { "epoch": 0.26292206753080494, "grad_norm": 0.3081358075141907, "learning_rate": 5e-05, "loss": 1.6267, "step": 1643 }, { "epoch": 0.2630820931349016, "grad_norm": 0.29147204756736755, "learning_rate": 5e-05, "loss": 1.5192, "step": 1644 }, { "epoch": 0.2632421187389982, "grad_norm": 0.320878803730011, "learning_rate": 5e-05, "loss": 1.6061, "step": 1645 }, { "epoch": 0.2634021443430949, "grad_norm": 0.2901761829853058, "learning_rate": 5e-05, "loss": 1.5689, "step": 1646 }, { "epoch": 0.26356216994719156, "grad_norm": 0.31641268730163574, "learning_rate": 5e-05, "loss": 1.5288, "step": 1647 }, { "epoch": 0.26372219555128823, "grad_norm": 0.29591622948646545, "learning_rate": 5e-05, "loss": 1.5545, "step": 1648 }, { "epoch": 0.26388222115538484, "grad_norm": 0.30583205819129944, "learning_rate": 5e-05, "loss": 1.5325, "step": 1649 }, { "epoch": 0.2640422467594815, "grad_norm": 0.3066059947013855, "learning_rate": 5e-05, "loss": 1.6002, "step": 1650 }, { "epoch": 0.2642022723635782, "grad_norm": 0.30427202582359314, "learning_rate": 5e-05, "loss": 1.5981, "step": 1651 }, { "epoch": 0.26436229796767485, "grad_norm": 0.29595649242401123, "learning_rate": 5e-05, "loss": 1.618, "step": 1652 }, { "epoch": 0.26452232357177147, "grad_norm": 0.2969624996185303, "learning_rate": 5e-05, "loss": 1.477, "step": 1653 }, { "epoch": 0.26468234917586814, "grad_norm": 0.3058716654777527, "learning_rate": 5e-05, "loss": 1.6493, "step": 1654 }, { "epoch": 0.2648423747799648, "grad_norm": 0.2988014817237854, "learning_rate": 5e-05, "loss": 1.5476, "step": 1655 }, { "epoch": 0.2650024003840615, "grad_norm": 0.307029128074646, "learning_rate": 5e-05, "loss": 1.5803, "step": 1656 }, { "epoch": 0.2651624259881581, "grad_norm": 0.2922937870025635, "learning_rate": 5e-05, "loss": 1.5357, "step": 1657 }, { "epoch": 0.26532245159225476, "grad_norm": 0.2954445779323578, "learning_rate": 5e-05, "loss": 1.5348, "step": 1658 }, { "epoch": 0.26548247719635143, "grad_norm": 0.3050667643547058, "learning_rate": 5e-05, "loss": 1.5169, "step": 1659 }, { "epoch": 0.2656425028004481, "grad_norm": 0.3044993579387665, "learning_rate": 5e-05, "loss": 1.5756, "step": 1660 }, { "epoch": 0.2658025284045447, "grad_norm": 0.28738123178482056, "learning_rate": 5e-05, "loss": 1.4575, "step": 1661 }, { "epoch": 0.2659625540086414, "grad_norm": 0.3041124641895294, "learning_rate": 5e-05, "loss": 1.5423, "step": 1662 }, { "epoch": 0.26612257961273805, "grad_norm": 0.30530276894569397, "learning_rate": 5e-05, "loss": 1.5077, "step": 1663 }, { "epoch": 0.26628260521683467, "grad_norm": 0.29950183629989624, "learning_rate": 5e-05, "loss": 1.5045, "step": 1664 }, { "epoch": 0.26644263082093134, "grad_norm": 0.299526572227478, "learning_rate": 5e-05, "loss": 1.5192, "step": 1665 }, { "epoch": 0.266602656425028, "grad_norm": 0.30052587389945984, "learning_rate": 5e-05, "loss": 1.6393, "step": 1666 }, { "epoch": 0.2667626820291247, "grad_norm": 0.2995973229408264, "learning_rate": 5e-05, "loss": 1.5734, "step": 1667 }, { "epoch": 0.2669227076332213, "grad_norm": 0.291433721780777, "learning_rate": 5e-05, "loss": 1.4185, "step": 1668 }, { "epoch": 0.26708273323731796, "grad_norm": 0.29910749197006226, "learning_rate": 5e-05, "loss": 1.5205, "step": 1669 }, { "epoch": 0.26724275884141463, "grad_norm": 0.30191606283187866, "learning_rate": 5e-05, "loss": 1.5482, "step": 1670 }, { "epoch": 0.2674027844455113, "grad_norm": 0.3187236785888672, "learning_rate": 5e-05, "loss": 1.6108, "step": 1671 }, { "epoch": 0.2675628100496079, "grad_norm": 0.3049510717391968, "learning_rate": 5e-05, "loss": 1.6275, "step": 1672 }, { "epoch": 0.2677228356537046, "grad_norm": 0.3215326964855194, "learning_rate": 5e-05, "loss": 1.5356, "step": 1673 }, { "epoch": 0.26788286125780125, "grad_norm": 0.3028148412704468, "learning_rate": 5e-05, "loss": 1.4439, "step": 1674 }, { "epoch": 0.2680428868618979, "grad_norm": 0.3027764558792114, "learning_rate": 5e-05, "loss": 1.5724, "step": 1675 }, { "epoch": 0.26820291246599454, "grad_norm": 0.30808836221694946, "learning_rate": 5e-05, "loss": 1.4918, "step": 1676 }, { "epoch": 0.2683629380700912, "grad_norm": 0.30464717745780945, "learning_rate": 5e-05, "loss": 1.5534, "step": 1677 }, { "epoch": 0.2685229636741879, "grad_norm": 0.2975577116012573, "learning_rate": 5e-05, "loss": 1.5266, "step": 1678 }, { "epoch": 0.26868298927828455, "grad_norm": 0.3006643056869507, "learning_rate": 5e-05, "loss": 1.5064, "step": 1679 }, { "epoch": 0.26884301488238116, "grad_norm": 0.29671382904052734, "learning_rate": 5e-05, "loss": 1.5128, "step": 1680 }, { "epoch": 0.26900304048647783, "grad_norm": 0.3011617958545685, "learning_rate": 5e-05, "loss": 1.539, "step": 1681 }, { "epoch": 0.2691630660905745, "grad_norm": 0.3197185695171356, "learning_rate": 5e-05, "loss": 1.5244, "step": 1682 }, { "epoch": 0.26932309169467117, "grad_norm": 0.303560346364975, "learning_rate": 5e-05, "loss": 1.5294, "step": 1683 }, { "epoch": 0.2694831172987678, "grad_norm": 0.3239825963973999, "learning_rate": 5e-05, "loss": 1.4993, "step": 1684 }, { "epoch": 0.26964314290286445, "grad_norm": 0.30686086416244507, "learning_rate": 5e-05, "loss": 1.5792, "step": 1685 }, { "epoch": 0.2698031685069611, "grad_norm": 0.3202161192893982, "learning_rate": 5e-05, "loss": 1.5562, "step": 1686 }, { "epoch": 0.2699631941110578, "grad_norm": 0.31433114409446716, "learning_rate": 5e-05, "loss": 1.5998, "step": 1687 }, { "epoch": 0.2701232197151544, "grad_norm": 0.3005744218826294, "learning_rate": 5e-05, "loss": 1.5914, "step": 1688 }, { "epoch": 0.2702832453192511, "grad_norm": 0.31427791714668274, "learning_rate": 5e-05, "loss": 1.5482, "step": 1689 }, { "epoch": 0.27044327092334774, "grad_norm": 0.3047752380371094, "learning_rate": 5e-05, "loss": 1.5297, "step": 1690 }, { "epoch": 0.2706032965274444, "grad_norm": 0.3526361882686615, "learning_rate": 5e-05, "loss": 1.5633, "step": 1691 }, { "epoch": 0.27076332213154103, "grad_norm": 0.30182257294654846, "learning_rate": 5e-05, "loss": 1.4886, "step": 1692 }, { "epoch": 0.2709233477356377, "grad_norm": 0.30072999000549316, "learning_rate": 5e-05, "loss": 1.5218, "step": 1693 }, { "epoch": 0.27108337333973437, "grad_norm": 0.28958019614219666, "learning_rate": 5e-05, "loss": 1.5537, "step": 1694 }, { "epoch": 0.27124339894383104, "grad_norm": 0.34957224130630493, "learning_rate": 5e-05, "loss": 1.6713, "step": 1695 }, { "epoch": 0.27140342454792765, "grad_norm": 0.29706063866615295, "learning_rate": 5e-05, "loss": 1.4943, "step": 1696 }, { "epoch": 0.2715634501520243, "grad_norm": 0.3400128483772278, "learning_rate": 5e-05, "loss": 1.5523, "step": 1697 }, { "epoch": 0.271723475756121, "grad_norm": 0.31338152289390564, "learning_rate": 5e-05, "loss": 1.5532, "step": 1698 }, { "epoch": 0.27188350136021766, "grad_norm": 0.3065105080604553, "learning_rate": 5e-05, "loss": 1.5142, "step": 1699 }, { "epoch": 0.2720435269643143, "grad_norm": 0.33471158146858215, "learning_rate": 5e-05, "loss": 1.6335, "step": 1700 }, { "epoch": 0.27220355256841094, "grad_norm": 0.31506675481796265, "learning_rate": 5e-05, "loss": 1.6449, "step": 1701 }, { "epoch": 0.2723635781725076, "grad_norm": 0.3106222450733185, "learning_rate": 5e-05, "loss": 1.6042, "step": 1702 }, { "epoch": 0.2725236037766043, "grad_norm": 0.3135363757610321, "learning_rate": 5e-05, "loss": 1.5929, "step": 1703 }, { "epoch": 0.2726836293807009, "grad_norm": 0.31398290395736694, "learning_rate": 5e-05, "loss": 1.5722, "step": 1704 }, { "epoch": 0.27284365498479757, "grad_norm": 0.31570306420326233, "learning_rate": 5e-05, "loss": 1.5335, "step": 1705 }, { "epoch": 0.27300368058889424, "grad_norm": 0.3086223006248474, "learning_rate": 5e-05, "loss": 1.5802, "step": 1706 }, { "epoch": 0.27316370619299085, "grad_norm": 0.3353649377822876, "learning_rate": 5e-05, "loss": 1.6653, "step": 1707 }, { "epoch": 0.2733237317970875, "grad_norm": 0.2970983684062958, "learning_rate": 5e-05, "loss": 1.5445, "step": 1708 }, { "epoch": 0.2734837574011842, "grad_norm": 0.2949451804161072, "learning_rate": 5e-05, "loss": 1.569, "step": 1709 }, { "epoch": 0.27364378300528086, "grad_norm": 0.29194319248199463, "learning_rate": 5e-05, "loss": 1.564, "step": 1710 }, { "epoch": 0.2738038086093775, "grad_norm": 0.3184705674648285, "learning_rate": 5e-05, "loss": 1.5683, "step": 1711 }, { "epoch": 0.27396383421347414, "grad_norm": 0.30423587560653687, "learning_rate": 5e-05, "loss": 1.6353, "step": 1712 }, { "epoch": 0.2741238598175708, "grad_norm": 0.3203389346599579, "learning_rate": 5e-05, "loss": 1.5856, "step": 1713 }, { "epoch": 0.2742838854216675, "grad_norm": 0.3038877546787262, "learning_rate": 5e-05, "loss": 1.5334, "step": 1714 }, { "epoch": 0.2744439110257641, "grad_norm": 0.29973357915878296, "learning_rate": 5e-05, "loss": 1.4883, "step": 1715 }, { "epoch": 0.27460393662986077, "grad_norm": 0.2937779128551483, "learning_rate": 5e-05, "loss": 1.5371, "step": 1716 }, { "epoch": 0.27476396223395744, "grad_norm": 0.3085375428199768, "learning_rate": 5e-05, "loss": 1.5478, "step": 1717 }, { "epoch": 0.2749239878380541, "grad_norm": 0.3018428385257721, "learning_rate": 5e-05, "loss": 1.5602, "step": 1718 }, { "epoch": 0.2750840134421507, "grad_norm": 0.306317001581192, "learning_rate": 5e-05, "loss": 1.6422, "step": 1719 }, { "epoch": 0.2752440390462474, "grad_norm": 0.294790655374527, "learning_rate": 5e-05, "loss": 1.518, "step": 1720 }, { "epoch": 0.27540406465034406, "grad_norm": 0.3178292512893677, "learning_rate": 5e-05, "loss": 1.5985, "step": 1721 }, { "epoch": 0.27556409025444073, "grad_norm": 0.2943338453769684, "learning_rate": 5e-05, "loss": 1.5391, "step": 1722 }, { "epoch": 0.27572411585853734, "grad_norm": 0.3106151223182678, "learning_rate": 5e-05, "loss": 1.5701, "step": 1723 }, { "epoch": 0.275884141462634, "grad_norm": 0.29549136757850647, "learning_rate": 5e-05, "loss": 1.4875, "step": 1724 }, { "epoch": 0.2760441670667307, "grad_norm": 0.30323734879493713, "learning_rate": 5e-05, "loss": 1.5218, "step": 1725 }, { "epoch": 0.27620419267082735, "grad_norm": 0.3040730357170105, "learning_rate": 5e-05, "loss": 1.5636, "step": 1726 }, { "epoch": 0.27636421827492397, "grad_norm": 0.29262569546699524, "learning_rate": 5e-05, "loss": 1.5075, "step": 1727 }, { "epoch": 0.27652424387902064, "grad_norm": 0.30650684237480164, "learning_rate": 5e-05, "loss": 1.5817, "step": 1728 }, { "epoch": 0.2766842694831173, "grad_norm": 0.31993353366851807, "learning_rate": 5e-05, "loss": 1.5444, "step": 1729 }, { "epoch": 0.276844295087214, "grad_norm": 0.3009777069091797, "learning_rate": 5e-05, "loss": 1.5804, "step": 1730 }, { "epoch": 0.2770043206913106, "grad_norm": 0.2986879050731659, "learning_rate": 5e-05, "loss": 1.5837, "step": 1731 }, { "epoch": 0.27716434629540726, "grad_norm": 0.29270419478416443, "learning_rate": 5e-05, "loss": 1.4827, "step": 1732 }, { "epoch": 0.27732437189950393, "grad_norm": 0.30841585993766785, "learning_rate": 5e-05, "loss": 1.5156, "step": 1733 }, { "epoch": 0.2774843975036006, "grad_norm": 0.3155304193496704, "learning_rate": 5e-05, "loss": 1.6648, "step": 1734 }, { "epoch": 0.2776444231076972, "grad_norm": 0.3031212389469147, "learning_rate": 5e-05, "loss": 1.5595, "step": 1735 }, { "epoch": 0.2778044487117939, "grad_norm": 0.30141717195510864, "learning_rate": 5e-05, "loss": 1.5385, "step": 1736 }, { "epoch": 0.27796447431589055, "grad_norm": 0.29881566762924194, "learning_rate": 5e-05, "loss": 1.5263, "step": 1737 }, { "epoch": 0.2781244999199872, "grad_norm": 0.3102821409702301, "learning_rate": 5e-05, "loss": 1.6362, "step": 1738 }, { "epoch": 0.27828452552408384, "grad_norm": 0.3022639751434326, "learning_rate": 5e-05, "loss": 1.6035, "step": 1739 }, { "epoch": 0.2784445511281805, "grad_norm": 0.3025323152542114, "learning_rate": 5e-05, "loss": 1.5588, "step": 1740 }, { "epoch": 0.2786045767322772, "grad_norm": 0.2979627251625061, "learning_rate": 5e-05, "loss": 1.5605, "step": 1741 }, { "epoch": 0.27876460233637385, "grad_norm": 0.30737602710723877, "learning_rate": 5e-05, "loss": 1.5684, "step": 1742 }, { "epoch": 0.27892462794047046, "grad_norm": 0.2986236810684204, "learning_rate": 5e-05, "loss": 1.5505, "step": 1743 }, { "epoch": 0.27908465354456713, "grad_norm": 0.30772191286087036, "learning_rate": 5e-05, "loss": 1.5163, "step": 1744 }, { "epoch": 0.2792446791486638, "grad_norm": 0.31047603487968445, "learning_rate": 5e-05, "loss": 1.5417, "step": 1745 }, { "epoch": 0.27940470475276047, "grad_norm": 0.3083723783493042, "learning_rate": 5e-05, "loss": 1.6189, "step": 1746 }, { "epoch": 0.2795647303568571, "grad_norm": 0.2941843271255493, "learning_rate": 5e-05, "loss": 1.5087, "step": 1747 }, { "epoch": 0.27972475596095375, "grad_norm": 0.30214542150497437, "learning_rate": 5e-05, "loss": 1.5632, "step": 1748 }, { "epoch": 0.2798847815650504, "grad_norm": 0.3032545745372772, "learning_rate": 5e-05, "loss": 1.5517, "step": 1749 }, { "epoch": 0.28004480716914704, "grad_norm": 0.31186801195144653, "learning_rate": 5e-05, "loss": 1.6248, "step": 1750 }, { "epoch": 0.2802048327732437, "grad_norm": 0.32421791553497314, "learning_rate": 5e-05, "loss": 1.6055, "step": 1751 }, { "epoch": 0.2803648583773404, "grad_norm": 0.3022795617580414, "learning_rate": 5e-05, "loss": 1.5804, "step": 1752 }, { "epoch": 0.28052488398143705, "grad_norm": 0.3064497411251068, "learning_rate": 5e-05, "loss": 1.5847, "step": 1753 }, { "epoch": 0.28068490958553366, "grad_norm": 0.31060606241226196, "learning_rate": 5e-05, "loss": 1.5503, "step": 1754 }, { "epoch": 0.28084493518963033, "grad_norm": 0.3037862479686737, "learning_rate": 5e-05, "loss": 1.5407, "step": 1755 }, { "epoch": 0.281004960793727, "grad_norm": 0.2899343967437744, "learning_rate": 5e-05, "loss": 1.5236, "step": 1756 }, { "epoch": 0.28116498639782367, "grad_norm": 0.30347853899002075, "learning_rate": 5e-05, "loss": 1.5152, "step": 1757 }, { "epoch": 0.2813250120019203, "grad_norm": 0.3005552887916565, "learning_rate": 5e-05, "loss": 1.5434, "step": 1758 }, { "epoch": 0.28148503760601695, "grad_norm": 0.3021591007709503, "learning_rate": 5e-05, "loss": 1.5787, "step": 1759 }, { "epoch": 0.2816450632101136, "grad_norm": 0.3098880350589752, "learning_rate": 5e-05, "loss": 1.5904, "step": 1760 }, { "epoch": 0.2818050888142103, "grad_norm": 0.297989159822464, "learning_rate": 5e-05, "loss": 1.5519, "step": 1761 }, { "epoch": 0.2819651144183069, "grad_norm": 0.2956577241420746, "learning_rate": 5e-05, "loss": 1.5344, "step": 1762 }, { "epoch": 0.2821251400224036, "grad_norm": 0.3137606680393219, "learning_rate": 5e-05, "loss": 1.6195, "step": 1763 }, { "epoch": 0.28228516562650025, "grad_norm": 0.3130006194114685, "learning_rate": 5e-05, "loss": 1.6327, "step": 1764 }, { "epoch": 0.2824451912305969, "grad_norm": 0.2953270375728607, "learning_rate": 5e-05, "loss": 1.4892, "step": 1765 }, { "epoch": 0.28260521683469353, "grad_norm": 0.3012668490409851, "learning_rate": 5e-05, "loss": 1.5333, "step": 1766 }, { "epoch": 0.2827652424387902, "grad_norm": 0.31056973338127136, "learning_rate": 5e-05, "loss": 1.6412, "step": 1767 }, { "epoch": 0.28292526804288687, "grad_norm": 0.29409414529800415, "learning_rate": 5e-05, "loss": 1.5155, "step": 1768 }, { "epoch": 0.28308529364698354, "grad_norm": 0.30823636054992676, "learning_rate": 5e-05, "loss": 1.5733, "step": 1769 }, { "epoch": 0.28324531925108015, "grad_norm": 0.309935986995697, "learning_rate": 5e-05, "loss": 1.5661, "step": 1770 }, { "epoch": 0.2834053448551768, "grad_norm": 0.3132413327693939, "learning_rate": 5e-05, "loss": 1.6388, "step": 1771 }, { "epoch": 0.2835653704592735, "grad_norm": 0.31359803676605225, "learning_rate": 5e-05, "loss": 1.6183, "step": 1772 }, { "epoch": 0.28372539606337016, "grad_norm": 0.3128315210342407, "learning_rate": 5e-05, "loss": 1.62, "step": 1773 }, { "epoch": 0.2838854216674668, "grad_norm": 0.30925604701042175, "learning_rate": 5e-05, "loss": 1.5514, "step": 1774 }, { "epoch": 0.28404544727156344, "grad_norm": 0.30224499106407166, "learning_rate": 5e-05, "loss": 1.546, "step": 1775 }, { "epoch": 0.2842054728756601, "grad_norm": 0.2963906526565552, "learning_rate": 5e-05, "loss": 1.504, "step": 1776 }, { "epoch": 0.2843654984797568, "grad_norm": 0.30368736386299133, "learning_rate": 5e-05, "loss": 1.5623, "step": 1777 }, { "epoch": 0.2845255240838534, "grad_norm": 0.30151236057281494, "learning_rate": 5e-05, "loss": 1.5745, "step": 1778 }, { "epoch": 0.28468554968795007, "grad_norm": 0.309548556804657, "learning_rate": 5e-05, "loss": 1.5711, "step": 1779 }, { "epoch": 0.28484557529204674, "grad_norm": 0.3189665973186493, "learning_rate": 5e-05, "loss": 1.6594, "step": 1780 }, { "epoch": 0.2850056008961434, "grad_norm": 0.2959572970867157, "learning_rate": 5e-05, "loss": 1.4922, "step": 1781 }, { "epoch": 0.28516562650024, "grad_norm": 0.3052579462528229, "learning_rate": 5e-05, "loss": 1.4875, "step": 1782 }, { "epoch": 0.2853256521043367, "grad_norm": 0.31563860177993774, "learning_rate": 5e-05, "loss": 1.6685, "step": 1783 }, { "epoch": 0.28548567770843336, "grad_norm": 0.3016045391559601, "learning_rate": 5e-05, "loss": 1.5589, "step": 1784 }, { "epoch": 0.28564570331253003, "grad_norm": 0.3079080581665039, "learning_rate": 5e-05, "loss": 1.5546, "step": 1785 }, { "epoch": 0.28580572891662664, "grad_norm": 0.3016943037509918, "learning_rate": 5e-05, "loss": 1.5532, "step": 1786 }, { "epoch": 0.2859657545207233, "grad_norm": 0.29519766569137573, "learning_rate": 5e-05, "loss": 1.5351, "step": 1787 }, { "epoch": 0.28612578012482, "grad_norm": 0.303500235080719, "learning_rate": 5e-05, "loss": 1.4848, "step": 1788 }, { "epoch": 0.28628580572891665, "grad_norm": 0.2995098829269409, "learning_rate": 5e-05, "loss": 1.6131, "step": 1789 }, { "epoch": 0.28644583133301327, "grad_norm": 0.29714536666870117, "learning_rate": 5e-05, "loss": 1.5159, "step": 1790 }, { "epoch": 0.28660585693710994, "grad_norm": 0.30230921506881714, "learning_rate": 5e-05, "loss": 1.4945, "step": 1791 }, { "epoch": 0.2867658825412066, "grad_norm": 0.30441221594810486, "learning_rate": 5e-05, "loss": 1.6053, "step": 1792 }, { "epoch": 0.2869259081453032, "grad_norm": 0.29830554127693176, "learning_rate": 5e-05, "loss": 1.5005, "step": 1793 }, { "epoch": 0.2870859337493999, "grad_norm": 0.31724444031715393, "learning_rate": 5e-05, "loss": 1.5525, "step": 1794 }, { "epoch": 0.28724595935349656, "grad_norm": 0.3141976594924927, "learning_rate": 5e-05, "loss": 1.5357, "step": 1795 }, { "epoch": 0.28740598495759323, "grad_norm": 0.30220478773117065, "learning_rate": 5e-05, "loss": 1.4835, "step": 1796 }, { "epoch": 0.28756601056168984, "grad_norm": 0.32402873039245605, "learning_rate": 5e-05, "loss": 1.5758, "step": 1797 }, { "epoch": 0.2877260361657865, "grad_norm": 0.30831918120384216, "learning_rate": 5e-05, "loss": 1.5462, "step": 1798 }, { "epoch": 0.2878860617698832, "grad_norm": 0.30748894810676575, "learning_rate": 5e-05, "loss": 1.5162, "step": 1799 }, { "epoch": 0.28804608737397985, "grad_norm": 0.3061660826206207, "learning_rate": 5e-05, "loss": 1.4992, "step": 1800 }, { "epoch": 0.28820611297807647, "grad_norm": 0.3163347840309143, "learning_rate": 5e-05, "loss": 1.5801, "step": 1801 }, { "epoch": 0.28836613858217314, "grad_norm": 0.3114202916622162, "learning_rate": 5e-05, "loss": 1.5635, "step": 1802 }, { "epoch": 0.2885261641862698, "grad_norm": 0.2970971465110779, "learning_rate": 5e-05, "loss": 1.5041, "step": 1803 }, { "epoch": 0.2886861897903665, "grad_norm": 0.29949435591697693, "learning_rate": 5e-05, "loss": 1.5742, "step": 1804 }, { "epoch": 0.2888462153944631, "grad_norm": 0.307681679725647, "learning_rate": 5e-05, "loss": 1.5785, "step": 1805 }, { "epoch": 0.28900624099855976, "grad_norm": 0.3019464612007141, "learning_rate": 5e-05, "loss": 1.5536, "step": 1806 }, { "epoch": 0.28916626660265643, "grad_norm": 0.2956843972206116, "learning_rate": 5e-05, "loss": 1.5631, "step": 1807 }, { "epoch": 0.2893262922067531, "grad_norm": 0.29861706495285034, "learning_rate": 5e-05, "loss": 1.5432, "step": 1808 }, { "epoch": 0.2894863178108497, "grad_norm": 0.3040740489959717, "learning_rate": 5e-05, "loss": 1.4997, "step": 1809 }, { "epoch": 0.2896463434149464, "grad_norm": 0.3119968771934509, "learning_rate": 5e-05, "loss": 1.6498, "step": 1810 }, { "epoch": 0.28980636901904305, "grad_norm": 0.3023379445075989, "learning_rate": 5e-05, "loss": 1.565, "step": 1811 }, { "epoch": 0.2899663946231397, "grad_norm": 0.30104613304138184, "learning_rate": 5e-05, "loss": 1.5515, "step": 1812 }, { "epoch": 0.29012642022723634, "grad_norm": 0.2897384762763977, "learning_rate": 5e-05, "loss": 1.4714, "step": 1813 }, { "epoch": 0.290286445831333, "grad_norm": 0.3253358006477356, "learning_rate": 5e-05, "loss": 1.66, "step": 1814 }, { "epoch": 0.2904464714354297, "grad_norm": 0.310550719499588, "learning_rate": 5e-05, "loss": 1.5423, "step": 1815 }, { "epoch": 0.29060649703952635, "grad_norm": 0.2935306131839752, "learning_rate": 5e-05, "loss": 1.4647, "step": 1816 }, { "epoch": 0.29076652264362296, "grad_norm": 0.3121715188026428, "learning_rate": 5e-05, "loss": 1.6062, "step": 1817 }, { "epoch": 0.29092654824771963, "grad_norm": 0.3010578751564026, "learning_rate": 5e-05, "loss": 1.5472, "step": 1818 }, { "epoch": 0.2910865738518163, "grad_norm": 0.30876097083091736, "learning_rate": 5e-05, "loss": 1.5568, "step": 1819 }, { "epoch": 0.29124659945591297, "grad_norm": 0.3110181987285614, "learning_rate": 5e-05, "loss": 1.5256, "step": 1820 }, { "epoch": 0.2914066250600096, "grad_norm": 0.31361761689186096, "learning_rate": 5e-05, "loss": 1.6722, "step": 1821 }, { "epoch": 0.29156665066410625, "grad_norm": 0.3159146308898926, "learning_rate": 5e-05, "loss": 1.5439, "step": 1822 }, { "epoch": 0.2917266762682029, "grad_norm": 0.3071803152561188, "learning_rate": 5e-05, "loss": 1.562, "step": 1823 }, { "epoch": 0.2918867018722996, "grad_norm": 0.3108947277069092, "learning_rate": 5e-05, "loss": 1.5614, "step": 1824 }, { "epoch": 0.2920467274763962, "grad_norm": 0.32803311944007874, "learning_rate": 5e-05, "loss": 1.5648, "step": 1825 }, { "epoch": 0.2922067530804929, "grad_norm": 0.29947835206985474, "learning_rate": 5e-05, "loss": 1.5414, "step": 1826 }, { "epoch": 0.29236677868458955, "grad_norm": 0.30832916498184204, "learning_rate": 5e-05, "loss": 1.536, "step": 1827 }, { "epoch": 0.2925268042886862, "grad_norm": 0.30085667967796326, "learning_rate": 5e-05, "loss": 1.4903, "step": 1828 }, { "epoch": 0.29268682989278283, "grad_norm": 0.3064769506454468, "learning_rate": 5e-05, "loss": 1.5842, "step": 1829 }, { "epoch": 0.2928468554968795, "grad_norm": 0.31423863768577576, "learning_rate": 5e-05, "loss": 1.6239, "step": 1830 }, { "epoch": 0.29300688110097617, "grad_norm": 0.31698721647262573, "learning_rate": 5e-05, "loss": 1.5731, "step": 1831 }, { "epoch": 0.29316690670507284, "grad_norm": 0.30314916372299194, "learning_rate": 5e-05, "loss": 1.4525, "step": 1832 }, { "epoch": 0.29332693230916945, "grad_norm": 0.336005836725235, "learning_rate": 5e-05, "loss": 1.6285, "step": 1833 }, { "epoch": 0.2934869579132661, "grad_norm": 0.29950249195098877, "learning_rate": 5e-05, "loss": 1.515, "step": 1834 }, { "epoch": 0.2936469835173628, "grad_norm": 0.2976840138435364, "learning_rate": 5e-05, "loss": 1.5748, "step": 1835 }, { "epoch": 0.2938070091214594, "grad_norm": 0.35028982162475586, "learning_rate": 5e-05, "loss": 1.464, "step": 1836 }, { "epoch": 0.2939670347255561, "grad_norm": 0.3181176483631134, "learning_rate": 5e-05, "loss": 1.5958, "step": 1837 }, { "epoch": 0.29412706032965275, "grad_norm": 0.3068400025367737, "learning_rate": 5e-05, "loss": 1.6071, "step": 1838 }, { "epoch": 0.2942870859337494, "grad_norm": 0.31754663586616516, "learning_rate": 5e-05, "loss": 1.5054, "step": 1839 }, { "epoch": 0.29444711153784603, "grad_norm": 0.3249635398387909, "learning_rate": 5e-05, "loss": 1.6334, "step": 1840 }, { "epoch": 0.2946071371419427, "grad_norm": 0.2956801652908325, "learning_rate": 5e-05, "loss": 1.534, "step": 1841 }, { "epoch": 0.29476716274603937, "grad_norm": 0.31753334403038025, "learning_rate": 5e-05, "loss": 1.5227, "step": 1842 }, { "epoch": 0.29492718835013604, "grad_norm": 0.296264111995697, "learning_rate": 5e-05, "loss": 1.4989, "step": 1843 }, { "epoch": 0.29508721395423265, "grad_norm": 0.2918775975704193, "learning_rate": 5e-05, "loss": 1.4149, "step": 1844 }, { "epoch": 0.2952472395583293, "grad_norm": 0.29671502113342285, "learning_rate": 5e-05, "loss": 1.5215, "step": 1845 }, { "epoch": 0.295407265162426, "grad_norm": 0.30162933468818665, "learning_rate": 5e-05, "loss": 1.5473, "step": 1846 }, { "epoch": 0.29556729076652266, "grad_norm": 0.3215082287788391, "learning_rate": 5e-05, "loss": 1.5911, "step": 1847 }, { "epoch": 0.2957273163706193, "grad_norm": 0.3212738633155823, "learning_rate": 5e-05, "loss": 1.6242, "step": 1848 }, { "epoch": 0.29588734197471595, "grad_norm": 0.29421064257621765, "learning_rate": 5e-05, "loss": 1.5461, "step": 1849 }, { "epoch": 0.2960473675788126, "grad_norm": 0.3033580780029297, "learning_rate": 5e-05, "loss": 1.5662, "step": 1850 }, { "epoch": 0.2962073931829093, "grad_norm": 0.29910290241241455, "learning_rate": 5e-05, "loss": 1.4728, "step": 1851 }, { "epoch": 0.2963674187870059, "grad_norm": 0.3096688985824585, "learning_rate": 5e-05, "loss": 1.5684, "step": 1852 }, { "epoch": 0.29652744439110257, "grad_norm": 0.2985823154449463, "learning_rate": 5e-05, "loss": 1.5271, "step": 1853 }, { "epoch": 0.29668746999519924, "grad_norm": 0.3016878664493561, "learning_rate": 5e-05, "loss": 1.5257, "step": 1854 }, { "epoch": 0.2968474955992959, "grad_norm": 0.3092103600502014, "learning_rate": 5e-05, "loss": 1.5187, "step": 1855 }, { "epoch": 0.2970075212033925, "grad_norm": 0.32277390360832214, "learning_rate": 5e-05, "loss": 1.623, "step": 1856 }, { "epoch": 0.2971675468074892, "grad_norm": 0.29985007643699646, "learning_rate": 5e-05, "loss": 1.5079, "step": 1857 }, { "epoch": 0.29732757241158586, "grad_norm": 0.3118577003479004, "learning_rate": 5e-05, "loss": 1.6041, "step": 1858 }, { "epoch": 0.29748759801568253, "grad_norm": 0.3155111074447632, "learning_rate": 5e-05, "loss": 1.5453, "step": 1859 }, { "epoch": 0.29764762361977914, "grad_norm": 0.2994786500930786, "learning_rate": 5e-05, "loss": 1.465, "step": 1860 }, { "epoch": 0.2978076492238758, "grad_norm": 0.30371177196502686, "learning_rate": 5e-05, "loss": 1.5468, "step": 1861 }, { "epoch": 0.2979676748279725, "grad_norm": 0.3417801260948181, "learning_rate": 5e-05, "loss": 1.5654, "step": 1862 }, { "epoch": 0.29812770043206915, "grad_norm": 0.30755940079689026, "learning_rate": 5e-05, "loss": 1.5368, "step": 1863 }, { "epoch": 0.29828772603616577, "grad_norm": 0.34299176931381226, "learning_rate": 5e-05, "loss": 1.5833, "step": 1864 }, { "epoch": 0.29844775164026244, "grad_norm": 0.3337966501712799, "learning_rate": 5e-05, "loss": 1.6003, "step": 1865 }, { "epoch": 0.2986077772443591, "grad_norm": 0.31119775772094727, "learning_rate": 5e-05, "loss": 1.5927, "step": 1866 }, { "epoch": 0.2987678028484558, "grad_norm": 0.31704211235046387, "learning_rate": 5e-05, "loss": 1.5462, "step": 1867 }, { "epoch": 0.2989278284525524, "grad_norm": 0.33884987235069275, "learning_rate": 5e-05, "loss": 1.5898, "step": 1868 }, { "epoch": 0.29908785405664906, "grad_norm": 0.29982832074165344, "learning_rate": 5e-05, "loss": 1.5455, "step": 1869 }, { "epoch": 0.29924787966074573, "grad_norm": 0.3384244740009308, "learning_rate": 5e-05, "loss": 1.5731, "step": 1870 }, { "epoch": 0.2994079052648424, "grad_norm": 0.29453450441360474, "learning_rate": 5e-05, "loss": 1.4614, "step": 1871 }, { "epoch": 0.299567930868939, "grad_norm": 0.29789644479751587, "learning_rate": 5e-05, "loss": 1.5132, "step": 1872 }, { "epoch": 0.2997279564730357, "grad_norm": 0.3198714852333069, "learning_rate": 5e-05, "loss": 1.5068, "step": 1873 }, { "epoch": 0.29988798207713235, "grad_norm": 0.3325304388999939, "learning_rate": 5e-05, "loss": 1.5478, "step": 1874 }, { "epoch": 0.300048007681229, "grad_norm": 0.3212848901748657, "learning_rate": 5e-05, "loss": 1.5616, "step": 1875 }, { "epoch": 0.30020803328532564, "grad_norm": 0.32543572783470154, "learning_rate": 5e-05, "loss": 1.5829, "step": 1876 }, { "epoch": 0.3003680588894223, "grad_norm": 0.29737910628318787, "learning_rate": 5e-05, "loss": 1.4378, "step": 1877 }, { "epoch": 0.300528084493519, "grad_norm": 0.296651691198349, "learning_rate": 5e-05, "loss": 1.5362, "step": 1878 }, { "epoch": 0.30068811009761565, "grad_norm": 0.3066013753414154, "learning_rate": 5e-05, "loss": 1.4982, "step": 1879 }, { "epoch": 0.30084813570171226, "grad_norm": 0.32350602746009827, "learning_rate": 5e-05, "loss": 1.5573, "step": 1880 }, { "epoch": 0.30100816130580893, "grad_norm": 0.2940897047519684, "learning_rate": 5e-05, "loss": 1.5042, "step": 1881 }, { "epoch": 0.3011681869099056, "grad_norm": 0.31949958205223083, "learning_rate": 5e-05, "loss": 1.5499, "step": 1882 }, { "epoch": 0.3013282125140022, "grad_norm": 0.3191608786582947, "learning_rate": 5e-05, "loss": 1.4999, "step": 1883 }, { "epoch": 0.3014882381180989, "grad_norm": 0.31828728318214417, "learning_rate": 5e-05, "loss": 1.5559, "step": 1884 }, { "epoch": 0.30164826372219555, "grad_norm": 0.3136212229728699, "learning_rate": 5e-05, "loss": 1.5383, "step": 1885 }, { "epoch": 0.3018082893262922, "grad_norm": 0.3079538941383362, "learning_rate": 5e-05, "loss": 1.5775, "step": 1886 }, { "epoch": 0.30196831493038884, "grad_norm": 0.30446162819862366, "learning_rate": 5e-05, "loss": 1.5276, "step": 1887 }, { "epoch": 0.3021283405344855, "grad_norm": 0.2953200936317444, "learning_rate": 5e-05, "loss": 1.5185, "step": 1888 }, { "epoch": 0.3022883661385822, "grad_norm": 0.30795225501060486, "learning_rate": 5e-05, "loss": 1.5704, "step": 1889 }, { "epoch": 0.30244839174267885, "grad_norm": 0.3089234530925751, "learning_rate": 5e-05, "loss": 1.4692, "step": 1890 }, { "epoch": 0.30260841734677546, "grad_norm": 0.3098488748073578, "learning_rate": 5e-05, "loss": 1.6487, "step": 1891 }, { "epoch": 0.30276844295087213, "grad_norm": 0.2999376356601715, "learning_rate": 5e-05, "loss": 1.532, "step": 1892 }, { "epoch": 0.3029284685549688, "grad_norm": 0.30155014991760254, "learning_rate": 5e-05, "loss": 1.5245, "step": 1893 }, { "epoch": 0.30308849415906547, "grad_norm": 0.3130105137825012, "learning_rate": 5e-05, "loss": 1.4372, "step": 1894 }, { "epoch": 0.3032485197631621, "grad_norm": 0.3068635165691376, "learning_rate": 5e-05, "loss": 1.5279, "step": 1895 }, { "epoch": 0.30340854536725875, "grad_norm": 0.2931237518787384, "learning_rate": 5e-05, "loss": 1.4829, "step": 1896 }, { "epoch": 0.3035685709713554, "grad_norm": 0.31487178802490234, "learning_rate": 5e-05, "loss": 1.5925, "step": 1897 }, { "epoch": 0.3037285965754521, "grad_norm": 0.3013266324996948, "learning_rate": 5e-05, "loss": 1.5617, "step": 1898 }, { "epoch": 0.3038886221795487, "grad_norm": 0.3101295530796051, "learning_rate": 5e-05, "loss": 1.6193, "step": 1899 }, { "epoch": 0.3040486477836454, "grad_norm": 0.3193373680114746, "learning_rate": 5e-05, "loss": 1.5256, "step": 1900 }, { "epoch": 0.30420867338774205, "grad_norm": 0.310949444770813, "learning_rate": 5e-05, "loss": 1.603, "step": 1901 }, { "epoch": 0.3043686989918387, "grad_norm": 0.31643936038017273, "learning_rate": 5e-05, "loss": 1.6119, "step": 1902 }, { "epoch": 0.30452872459593533, "grad_norm": 0.30188313126564026, "learning_rate": 5e-05, "loss": 1.5378, "step": 1903 }, { "epoch": 0.304688750200032, "grad_norm": 0.30813291668891907, "learning_rate": 5e-05, "loss": 1.5409, "step": 1904 }, { "epoch": 0.30484877580412867, "grad_norm": 0.3205571472644806, "learning_rate": 5e-05, "loss": 1.577, "step": 1905 }, { "epoch": 0.30500880140822534, "grad_norm": 0.31205782294273376, "learning_rate": 5e-05, "loss": 1.5784, "step": 1906 }, { "epoch": 0.30516882701232195, "grad_norm": 0.3004149794578552, "learning_rate": 5e-05, "loss": 1.5887, "step": 1907 }, { "epoch": 0.3053288526164186, "grad_norm": 0.3270770311355591, "learning_rate": 5e-05, "loss": 1.5574, "step": 1908 }, { "epoch": 0.3054888782205153, "grad_norm": 0.3277081251144409, "learning_rate": 5e-05, "loss": 1.5903, "step": 1909 }, { "epoch": 0.30564890382461196, "grad_norm": 0.3099544942378998, "learning_rate": 5e-05, "loss": 1.6334, "step": 1910 }, { "epoch": 0.3058089294287086, "grad_norm": 0.31934767961502075, "learning_rate": 5e-05, "loss": 1.5472, "step": 1911 }, { "epoch": 0.30596895503280525, "grad_norm": 0.2999463379383087, "learning_rate": 5e-05, "loss": 1.5474, "step": 1912 }, { "epoch": 0.3061289806369019, "grad_norm": 0.32425636053085327, "learning_rate": 5e-05, "loss": 1.5546, "step": 1913 }, { "epoch": 0.3062890062409986, "grad_norm": 0.30661848187446594, "learning_rate": 5e-05, "loss": 1.508, "step": 1914 }, { "epoch": 0.3064490318450952, "grad_norm": 0.3198469579219818, "learning_rate": 5e-05, "loss": 1.6065, "step": 1915 }, { "epoch": 0.30660905744919187, "grad_norm": 0.2970849871635437, "learning_rate": 5e-05, "loss": 1.519, "step": 1916 }, { "epoch": 0.30676908305328854, "grad_norm": 0.30499204993247986, "learning_rate": 5e-05, "loss": 1.581, "step": 1917 }, { "epoch": 0.3069291086573852, "grad_norm": 0.312317818403244, "learning_rate": 5e-05, "loss": 1.545, "step": 1918 }, { "epoch": 0.3070891342614818, "grad_norm": 0.30423539876937866, "learning_rate": 5e-05, "loss": 1.5214, "step": 1919 }, { "epoch": 0.3072491598655785, "grad_norm": 0.3126371204853058, "learning_rate": 5e-05, "loss": 1.5793, "step": 1920 }, { "epoch": 0.30740918546967516, "grad_norm": 0.3074953258037567, "learning_rate": 5e-05, "loss": 1.5587, "step": 1921 }, { "epoch": 0.30756921107377183, "grad_norm": 0.3079175651073456, "learning_rate": 5e-05, "loss": 1.561, "step": 1922 }, { "epoch": 0.30772923667786845, "grad_norm": 0.29708626866340637, "learning_rate": 5e-05, "loss": 1.4417, "step": 1923 }, { "epoch": 0.3078892622819651, "grad_norm": 0.3144456148147583, "learning_rate": 5e-05, "loss": 1.4623, "step": 1924 }, { "epoch": 0.3080492878860618, "grad_norm": 0.31583109498023987, "learning_rate": 5e-05, "loss": 1.5634, "step": 1925 }, { "epoch": 0.3082093134901584, "grad_norm": 0.30498844385147095, "learning_rate": 5e-05, "loss": 1.5662, "step": 1926 }, { "epoch": 0.30836933909425507, "grad_norm": 0.30255046486854553, "learning_rate": 5e-05, "loss": 1.5186, "step": 1927 }, { "epoch": 0.30852936469835174, "grad_norm": 0.3211110234260559, "learning_rate": 5e-05, "loss": 1.5925, "step": 1928 }, { "epoch": 0.3086893903024484, "grad_norm": 0.2998898923397064, "learning_rate": 5e-05, "loss": 1.5677, "step": 1929 }, { "epoch": 0.308849415906545, "grad_norm": 0.32220613956451416, "learning_rate": 5e-05, "loss": 1.5927, "step": 1930 }, { "epoch": 0.3090094415106417, "grad_norm": 0.3042747974395752, "learning_rate": 5e-05, "loss": 1.5213, "step": 1931 }, { "epoch": 0.30916946711473836, "grad_norm": 0.29952046275138855, "learning_rate": 5e-05, "loss": 1.5199, "step": 1932 }, { "epoch": 0.30932949271883503, "grad_norm": 0.3080613315105438, "learning_rate": 5e-05, "loss": 1.6233, "step": 1933 }, { "epoch": 0.30948951832293164, "grad_norm": 0.3047448396682739, "learning_rate": 5e-05, "loss": 1.579, "step": 1934 }, { "epoch": 0.3096495439270283, "grad_norm": 0.298432856798172, "learning_rate": 5e-05, "loss": 1.4253, "step": 1935 }, { "epoch": 0.309809569531125, "grad_norm": 0.3022426664829254, "learning_rate": 5e-05, "loss": 1.5252, "step": 1936 }, { "epoch": 0.30996959513522165, "grad_norm": 0.3098442852497101, "learning_rate": 5e-05, "loss": 1.5238, "step": 1937 }, { "epoch": 0.31012962073931827, "grad_norm": 0.30174821615219116, "learning_rate": 5e-05, "loss": 1.5864, "step": 1938 }, { "epoch": 0.31028964634341494, "grad_norm": 0.3183569014072418, "learning_rate": 5e-05, "loss": 1.5413, "step": 1939 }, { "epoch": 0.3104496719475116, "grad_norm": 0.30597180128097534, "learning_rate": 5e-05, "loss": 1.5539, "step": 1940 }, { "epoch": 0.3106096975516083, "grad_norm": 0.30726519227027893, "learning_rate": 5e-05, "loss": 1.4973, "step": 1941 }, { "epoch": 0.3107697231557049, "grad_norm": 0.31120628118515015, "learning_rate": 5e-05, "loss": 1.5265, "step": 1942 }, { "epoch": 0.31092974875980156, "grad_norm": 0.30062225461006165, "learning_rate": 5e-05, "loss": 1.4777, "step": 1943 }, { "epoch": 0.31108977436389823, "grad_norm": 0.3211478590965271, "learning_rate": 5e-05, "loss": 1.499, "step": 1944 }, { "epoch": 0.3112497999679949, "grad_norm": 0.32656148076057434, "learning_rate": 5e-05, "loss": 1.5463, "step": 1945 }, { "epoch": 0.3114098255720915, "grad_norm": 0.30857813358306885, "learning_rate": 5e-05, "loss": 1.5259, "step": 1946 }, { "epoch": 0.3115698511761882, "grad_norm": 0.3110739588737488, "learning_rate": 5e-05, "loss": 1.5056, "step": 1947 }, { "epoch": 0.31172987678028485, "grad_norm": 0.3041478991508484, "learning_rate": 5e-05, "loss": 1.5276, "step": 1948 }, { "epoch": 0.3118899023843815, "grad_norm": 0.2995641827583313, "learning_rate": 5e-05, "loss": 1.5088, "step": 1949 }, { "epoch": 0.31204992798847814, "grad_norm": 0.32130229473114014, "learning_rate": 5e-05, "loss": 1.6219, "step": 1950 }, { "epoch": 0.3122099535925748, "grad_norm": 0.3090963661670685, "learning_rate": 5e-05, "loss": 1.5228, "step": 1951 }, { "epoch": 0.3123699791966715, "grad_norm": 0.31332963705062866, "learning_rate": 5e-05, "loss": 1.5309, "step": 1952 }, { "epoch": 0.31253000480076815, "grad_norm": 0.2997472286224365, "learning_rate": 5e-05, "loss": 1.5102, "step": 1953 }, { "epoch": 0.31269003040486476, "grad_norm": 0.315674751996994, "learning_rate": 5e-05, "loss": 1.5533, "step": 1954 }, { "epoch": 0.31285005600896143, "grad_norm": 0.32448050379753113, "learning_rate": 5e-05, "loss": 1.5364, "step": 1955 }, { "epoch": 0.3130100816130581, "grad_norm": 0.3055145740509033, "learning_rate": 5e-05, "loss": 1.4837, "step": 1956 }, { "epoch": 0.31317010721715477, "grad_norm": 0.3364216685295105, "learning_rate": 5e-05, "loss": 1.4902, "step": 1957 }, { "epoch": 0.3133301328212514, "grad_norm": 0.3434988260269165, "learning_rate": 5e-05, "loss": 1.5834, "step": 1958 }, { "epoch": 0.31349015842534805, "grad_norm": 0.31535762548446655, "learning_rate": 5e-05, "loss": 1.5807, "step": 1959 }, { "epoch": 0.3136501840294447, "grad_norm": 0.34372812509536743, "learning_rate": 5e-05, "loss": 1.572, "step": 1960 }, { "epoch": 0.3138102096335414, "grad_norm": 0.3206097185611725, "learning_rate": 5e-05, "loss": 1.485, "step": 1961 }, { "epoch": 0.313970235237638, "grad_norm": 0.31620100140571594, "learning_rate": 5e-05, "loss": 1.575, "step": 1962 }, { "epoch": 0.3141302608417347, "grad_norm": 0.33392414450645447, "learning_rate": 5e-05, "loss": 1.5162, "step": 1963 }, { "epoch": 0.31429028644583135, "grad_norm": 0.3114568591117859, "learning_rate": 5e-05, "loss": 1.5997, "step": 1964 }, { "epoch": 0.314450312049928, "grad_norm": 0.32009246945381165, "learning_rate": 5e-05, "loss": 1.5679, "step": 1965 }, { "epoch": 0.31461033765402463, "grad_norm": 0.3106965720653534, "learning_rate": 5e-05, "loss": 1.5483, "step": 1966 }, { "epoch": 0.3147703632581213, "grad_norm": 0.30734914541244507, "learning_rate": 5e-05, "loss": 1.5703, "step": 1967 }, { "epoch": 0.31493038886221797, "grad_norm": 0.30708372592926025, "learning_rate": 5e-05, "loss": 1.5138, "step": 1968 }, { "epoch": 0.3150904144663146, "grad_norm": 0.32574185729026794, "learning_rate": 5e-05, "loss": 1.5366, "step": 1969 }, { "epoch": 0.31525044007041125, "grad_norm": 0.3183721601963043, "learning_rate": 5e-05, "loss": 1.6092, "step": 1970 }, { "epoch": 0.3154104656745079, "grad_norm": 0.3209158182144165, "learning_rate": 5e-05, "loss": 1.6357, "step": 1971 }, { "epoch": 0.3155704912786046, "grad_norm": 0.31995564699172974, "learning_rate": 5e-05, "loss": 1.5031, "step": 1972 }, { "epoch": 0.3157305168827012, "grad_norm": 0.2944192886352539, "learning_rate": 5e-05, "loss": 1.4758, "step": 1973 }, { "epoch": 0.3158905424867979, "grad_norm": 0.311270534992218, "learning_rate": 5e-05, "loss": 1.5648, "step": 1974 }, { "epoch": 0.31605056809089455, "grad_norm": 0.32501959800720215, "learning_rate": 5e-05, "loss": 1.612, "step": 1975 }, { "epoch": 0.3162105936949912, "grad_norm": 0.30819767713546753, "learning_rate": 5e-05, "loss": 1.5421, "step": 1976 }, { "epoch": 0.31637061929908783, "grad_norm": 0.30482104420661926, "learning_rate": 5e-05, "loss": 1.4686, "step": 1977 }, { "epoch": 0.3165306449031845, "grad_norm": 0.30650684237480164, "learning_rate": 5e-05, "loss": 1.4889, "step": 1978 }, { "epoch": 0.31669067050728117, "grad_norm": 0.30760979652404785, "learning_rate": 5e-05, "loss": 1.5869, "step": 1979 }, { "epoch": 0.31685069611137784, "grad_norm": 0.3059599995613098, "learning_rate": 5e-05, "loss": 1.5637, "step": 1980 }, { "epoch": 0.31701072171547445, "grad_norm": 0.30346986651420593, "learning_rate": 5e-05, "loss": 1.5048, "step": 1981 }, { "epoch": 0.3171707473195711, "grad_norm": 0.3357836902141571, "learning_rate": 5e-05, "loss": 1.6485, "step": 1982 }, { "epoch": 0.3173307729236678, "grad_norm": 0.3042948842048645, "learning_rate": 5e-05, "loss": 1.471, "step": 1983 }, { "epoch": 0.31749079852776446, "grad_norm": 0.30463752150535583, "learning_rate": 5e-05, "loss": 1.5151, "step": 1984 }, { "epoch": 0.3176508241318611, "grad_norm": 0.30506083369255066, "learning_rate": 5e-05, "loss": 1.529, "step": 1985 }, { "epoch": 0.31781084973595775, "grad_norm": 0.3281608521938324, "learning_rate": 5e-05, "loss": 1.5824, "step": 1986 }, { "epoch": 0.3179708753400544, "grad_norm": 0.3034864366054535, "learning_rate": 5e-05, "loss": 1.5766, "step": 1987 }, { "epoch": 0.3181309009441511, "grad_norm": 0.29585161805152893, "learning_rate": 5e-05, "loss": 1.5097, "step": 1988 }, { "epoch": 0.3182909265482477, "grad_norm": 0.31066885590553284, "learning_rate": 5e-05, "loss": 1.6228, "step": 1989 }, { "epoch": 0.31845095215234437, "grad_norm": 0.303759902715683, "learning_rate": 5e-05, "loss": 1.5433, "step": 1990 }, { "epoch": 0.31861097775644104, "grad_norm": 0.2975401282310486, "learning_rate": 5e-05, "loss": 1.5448, "step": 1991 }, { "epoch": 0.3187710033605377, "grad_norm": 0.29701104760169983, "learning_rate": 5e-05, "loss": 1.4901, "step": 1992 }, { "epoch": 0.3189310289646343, "grad_norm": 0.3106205463409424, "learning_rate": 5e-05, "loss": 1.5674, "step": 1993 }, { "epoch": 0.319091054568731, "grad_norm": 0.3075954020023346, "learning_rate": 5e-05, "loss": 1.5597, "step": 1994 }, { "epoch": 0.31925108017282766, "grad_norm": 0.29921939969062805, "learning_rate": 5e-05, "loss": 1.4704, "step": 1995 }, { "epoch": 0.31941110577692433, "grad_norm": 0.3167702853679657, "learning_rate": 5e-05, "loss": 1.555, "step": 1996 }, { "epoch": 0.31957113138102095, "grad_norm": 0.30138763785362244, "learning_rate": 5e-05, "loss": 1.5415, "step": 1997 }, { "epoch": 0.3197311569851176, "grad_norm": 0.3137306272983551, "learning_rate": 5e-05, "loss": 1.5625, "step": 1998 }, { "epoch": 0.3198911825892143, "grad_norm": 0.30795714259147644, "learning_rate": 5e-05, "loss": 1.5864, "step": 1999 }, { "epoch": 0.32005120819331095, "grad_norm": 0.31262722611427307, "learning_rate": 5e-05, "loss": 1.5385, "step": 2000 }, { "epoch": 0.32021123379740757, "grad_norm": 0.293448269367218, "learning_rate": 5e-05, "loss": 1.5436, "step": 2001 }, { "epoch": 0.32037125940150424, "grad_norm": 0.3098105788230896, "learning_rate": 5e-05, "loss": 1.5525, "step": 2002 }, { "epoch": 0.3205312850056009, "grad_norm": 0.3005191385746002, "learning_rate": 5e-05, "loss": 1.5435, "step": 2003 }, { "epoch": 0.3206913106096976, "grad_norm": 0.3021060526371002, "learning_rate": 5e-05, "loss": 1.5387, "step": 2004 }, { "epoch": 0.3208513362137942, "grad_norm": 0.3126831352710724, "learning_rate": 5e-05, "loss": 1.6343, "step": 2005 }, { "epoch": 0.32101136181789086, "grad_norm": 0.30161044001579285, "learning_rate": 5e-05, "loss": 1.5201, "step": 2006 }, { "epoch": 0.32117138742198753, "grad_norm": 0.2965162694454193, "learning_rate": 5e-05, "loss": 1.4869, "step": 2007 }, { "epoch": 0.3213314130260842, "grad_norm": 0.31403782963752747, "learning_rate": 5e-05, "loss": 1.5212, "step": 2008 }, { "epoch": 0.3214914386301808, "grad_norm": 0.31644460558891296, "learning_rate": 5e-05, "loss": 1.5515, "step": 2009 }, { "epoch": 0.3216514642342775, "grad_norm": 0.3003852963447571, "learning_rate": 5e-05, "loss": 1.5427, "step": 2010 }, { "epoch": 0.32181148983837415, "grad_norm": 0.3112056851387024, "learning_rate": 5e-05, "loss": 1.6342, "step": 2011 }, { "epoch": 0.32197151544247077, "grad_norm": 0.3039713501930237, "learning_rate": 5e-05, "loss": 1.5364, "step": 2012 }, { "epoch": 0.32213154104656744, "grad_norm": 0.31421369314193726, "learning_rate": 5e-05, "loss": 1.4987, "step": 2013 }, { "epoch": 0.3222915666506641, "grad_norm": 0.3198636770248413, "learning_rate": 5e-05, "loss": 1.5735, "step": 2014 }, { "epoch": 0.3224515922547608, "grad_norm": 0.31381505727767944, "learning_rate": 5e-05, "loss": 1.563, "step": 2015 }, { "epoch": 0.3226116178588574, "grad_norm": 0.31550413370132446, "learning_rate": 5e-05, "loss": 1.5815, "step": 2016 }, { "epoch": 0.32277164346295406, "grad_norm": 0.30431443452835083, "learning_rate": 5e-05, "loss": 1.4959, "step": 2017 }, { "epoch": 0.32293166906705073, "grad_norm": 0.30613070726394653, "learning_rate": 5e-05, "loss": 1.4112, "step": 2018 }, { "epoch": 0.3230916946711474, "grad_norm": 0.313948392868042, "learning_rate": 5e-05, "loss": 1.527, "step": 2019 }, { "epoch": 0.323251720275244, "grad_norm": 0.3254380524158478, "learning_rate": 5e-05, "loss": 1.6059, "step": 2020 }, { "epoch": 0.3234117458793407, "grad_norm": 0.3159467279911041, "learning_rate": 5e-05, "loss": 1.5955, "step": 2021 }, { "epoch": 0.32357177148343735, "grad_norm": 0.2995958626270294, "learning_rate": 5e-05, "loss": 1.3973, "step": 2022 }, { "epoch": 0.323731797087534, "grad_norm": 0.32715287804603577, "learning_rate": 5e-05, "loss": 1.6242, "step": 2023 }, { "epoch": 0.32389182269163064, "grad_norm": 0.31614577770233154, "learning_rate": 5e-05, "loss": 1.5737, "step": 2024 }, { "epoch": 0.3240518482957273, "grad_norm": 0.30782297253608704, "learning_rate": 5e-05, "loss": 1.5217, "step": 2025 }, { "epoch": 0.324211873899824, "grad_norm": 0.3174534738063812, "learning_rate": 5e-05, "loss": 1.4509, "step": 2026 }, { "epoch": 0.32437189950392065, "grad_norm": 0.3188692331314087, "learning_rate": 5e-05, "loss": 1.5125, "step": 2027 }, { "epoch": 0.32453192510801726, "grad_norm": 0.3126909136772156, "learning_rate": 5e-05, "loss": 1.6235, "step": 2028 }, { "epoch": 0.32469195071211393, "grad_norm": 0.3338071405887604, "learning_rate": 5e-05, "loss": 1.5503, "step": 2029 }, { "epoch": 0.3248519763162106, "grad_norm": 0.31614160537719727, "learning_rate": 5e-05, "loss": 1.5103, "step": 2030 }, { "epoch": 0.32501200192030727, "grad_norm": 0.30492299795150757, "learning_rate": 5e-05, "loss": 1.5783, "step": 2031 }, { "epoch": 0.3251720275244039, "grad_norm": 0.33611413836479187, "learning_rate": 5e-05, "loss": 1.5216, "step": 2032 }, { "epoch": 0.32533205312850055, "grad_norm": 0.31121641397476196, "learning_rate": 5e-05, "loss": 1.5742, "step": 2033 }, { "epoch": 0.3254920787325972, "grad_norm": 0.3084391951560974, "learning_rate": 5e-05, "loss": 1.5846, "step": 2034 }, { "epoch": 0.3256521043366939, "grad_norm": 0.30020013451576233, "learning_rate": 5e-05, "loss": 1.5301, "step": 2035 }, { "epoch": 0.3258121299407905, "grad_norm": 0.31525155901908875, "learning_rate": 5e-05, "loss": 1.5671, "step": 2036 }, { "epoch": 0.3259721555448872, "grad_norm": 0.3104480504989624, "learning_rate": 5e-05, "loss": 1.6081, "step": 2037 }, { "epoch": 0.32613218114898385, "grad_norm": 0.3161766827106476, "learning_rate": 5e-05, "loss": 1.5611, "step": 2038 }, { "epoch": 0.3262922067530805, "grad_norm": 0.33173513412475586, "learning_rate": 5e-05, "loss": 1.5136, "step": 2039 }, { "epoch": 0.32645223235717713, "grad_norm": 0.29988446831703186, "learning_rate": 5e-05, "loss": 1.5371, "step": 2040 }, { "epoch": 0.3266122579612738, "grad_norm": 0.3250060975551605, "learning_rate": 5e-05, "loss": 1.5532, "step": 2041 }, { "epoch": 0.32677228356537047, "grad_norm": 0.32108956575393677, "learning_rate": 5e-05, "loss": 1.5582, "step": 2042 }, { "epoch": 0.32693230916946714, "grad_norm": 0.29773765802383423, "learning_rate": 5e-05, "loss": 1.5414, "step": 2043 }, { "epoch": 0.32709233477356375, "grad_norm": 0.332288533449173, "learning_rate": 5e-05, "loss": 1.5443, "step": 2044 }, { "epoch": 0.3272523603776604, "grad_norm": 0.3310232162475586, "learning_rate": 5e-05, "loss": 1.5838, "step": 2045 }, { "epoch": 0.3274123859817571, "grad_norm": 0.30281293392181396, "learning_rate": 5e-05, "loss": 1.4961, "step": 2046 }, { "epoch": 0.32757241158585376, "grad_norm": 0.3515472114086151, "learning_rate": 5e-05, "loss": 1.6105, "step": 2047 }, { "epoch": 0.3277324371899504, "grad_norm": 0.31368982791900635, "learning_rate": 5e-05, "loss": 1.5449, "step": 2048 }, { "epoch": 0.32789246279404705, "grad_norm": 0.29781195521354675, "learning_rate": 5e-05, "loss": 1.5225, "step": 2049 }, { "epoch": 0.3280524883981437, "grad_norm": 0.3289206624031067, "learning_rate": 5e-05, "loss": 1.5314, "step": 2050 }, { "epoch": 0.3282125140022404, "grad_norm": 0.307847261428833, "learning_rate": 5e-05, "loss": 1.5683, "step": 2051 }, { "epoch": 0.328372539606337, "grad_norm": 0.31061214208602905, "learning_rate": 5e-05, "loss": 1.5895, "step": 2052 }, { "epoch": 0.32853256521043367, "grad_norm": 0.3216710388660431, "learning_rate": 5e-05, "loss": 1.5227, "step": 2053 }, { "epoch": 0.32869259081453034, "grad_norm": 0.3025258481502533, "learning_rate": 5e-05, "loss": 1.5945, "step": 2054 }, { "epoch": 0.32885261641862695, "grad_norm": 0.30594968795776367, "learning_rate": 5e-05, "loss": 1.5815, "step": 2055 }, { "epoch": 0.3290126420227236, "grad_norm": 0.31824827194213867, "learning_rate": 5e-05, "loss": 1.5236, "step": 2056 }, { "epoch": 0.3291726676268203, "grad_norm": 0.29784950613975525, "learning_rate": 5e-05, "loss": 1.4909, "step": 2057 }, { "epoch": 0.32933269323091696, "grad_norm": 0.30502545833587646, "learning_rate": 5e-05, "loss": 1.5509, "step": 2058 }, { "epoch": 0.3294927188350136, "grad_norm": 0.29924193024635315, "learning_rate": 5e-05, "loss": 1.5066, "step": 2059 }, { "epoch": 0.32965274443911025, "grad_norm": 0.3022095561027527, "learning_rate": 5e-05, "loss": 1.5568, "step": 2060 }, { "epoch": 0.3298127700432069, "grad_norm": 0.29585424065589905, "learning_rate": 5e-05, "loss": 1.5295, "step": 2061 }, { "epoch": 0.3299727956473036, "grad_norm": 0.3109883964061737, "learning_rate": 5e-05, "loss": 1.5772, "step": 2062 }, { "epoch": 0.3301328212514002, "grad_norm": 0.3095335066318512, "learning_rate": 5e-05, "loss": 1.5599, "step": 2063 }, { "epoch": 0.33029284685549687, "grad_norm": 0.2939128279685974, "learning_rate": 5e-05, "loss": 1.4903, "step": 2064 }, { "epoch": 0.33045287245959354, "grad_norm": 0.31451040506362915, "learning_rate": 5e-05, "loss": 1.5392, "step": 2065 }, { "epoch": 0.3306128980636902, "grad_norm": 0.30409616231918335, "learning_rate": 5e-05, "loss": 1.5153, "step": 2066 }, { "epoch": 0.3307729236677868, "grad_norm": 0.30701354146003723, "learning_rate": 5e-05, "loss": 1.5189, "step": 2067 }, { "epoch": 0.3309329492718835, "grad_norm": 0.3340405523777008, "learning_rate": 5e-05, "loss": 1.6149, "step": 2068 }, { "epoch": 0.33109297487598016, "grad_norm": 0.32392650842666626, "learning_rate": 5e-05, "loss": 1.5146, "step": 2069 }, { "epoch": 0.33125300048007683, "grad_norm": 0.3274012506008148, "learning_rate": 5e-05, "loss": 1.6495, "step": 2070 }, { "epoch": 0.33141302608417345, "grad_norm": 0.33328044414520264, "learning_rate": 5e-05, "loss": 1.5596, "step": 2071 }, { "epoch": 0.3315730516882701, "grad_norm": 0.33060964941978455, "learning_rate": 5e-05, "loss": 1.5654, "step": 2072 }, { "epoch": 0.3317330772923668, "grad_norm": 0.2981410324573517, "learning_rate": 5e-05, "loss": 1.524, "step": 2073 }, { "epoch": 0.33189310289646345, "grad_norm": 0.32655516266822815, "learning_rate": 5e-05, "loss": 1.5943, "step": 2074 }, { "epoch": 0.33205312850056007, "grad_norm": 0.3289265036582947, "learning_rate": 5e-05, "loss": 1.6443, "step": 2075 }, { "epoch": 0.33221315410465674, "grad_norm": 0.3040659427642822, "learning_rate": 5e-05, "loss": 1.5785, "step": 2076 }, { "epoch": 0.3323731797087534, "grad_norm": 0.31940051913261414, "learning_rate": 5e-05, "loss": 1.5921, "step": 2077 }, { "epoch": 0.3325332053128501, "grad_norm": 0.3170227110385895, "learning_rate": 5e-05, "loss": 1.5573, "step": 2078 }, { "epoch": 0.3326932309169467, "grad_norm": 0.30822479724884033, "learning_rate": 5e-05, "loss": 1.5389, "step": 2079 }, { "epoch": 0.33285325652104336, "grad_norm": 0.32184043526649475, "learning_rate": 5e-05, "loss": 1.6445, "step": 2080 }, { "epoch": 0.33301328212514003, "grad_norm": 0.32435473799705505, "learning_rate": 5e-05, "loss": 1.6141, "step": 2081 }, { "epoch": 0.3331733077292367, "grad_norm": 0.29255643486976624, "learning_rate": 5e-05, "loss": 1.4664, "step": 2082 }, { "epoch": 0.3333333333333333, "grad_norm": 0.3102124333381653, "learning_rate": 5e-05, "loss": 1.4786, "step": 2083 }, { "epoch": 0.33349335893743, "grad_norm": 0.3162914216518402, "learning_rate": 5e-05, "loss": 1.5534, "step": 2084 }, { "epoch": 0.33365338454152665, "grad_norm": 0.3017425835132599, "learning_rate": 5e-05, "loss": 1.4823, "step": 2085 }, { "epoch": 0.3338134101456233, "grad_norm": 0.32203298807144165, "learning_rate": 5e-05, "loss": 1.5017, "step": 2086 }, { "epoch": 0.33397343574971994, "grad_norm": 0.32210466265678406, "learning_rate": 5e-05, "loss": 1.5201, "step": 2087 }, { "epoch": 0.3341334613538166, "grad_norm": 0.30898600816726685, "learning_rate": 5e-05, "loss": 1.4955, "step": 2088 }, { "epoch": 0.3342934869579133, "grad_norm": 0.307054728269577, "learning_rate": 5e-05, "loss": 1.4794, "step": 2089 }, { "epoch": 0.33445351256200995, "grad_norm": 0.3307175040245056, "learning_rate": 5e-05, "loss": 1.5874, "step": 2090 }, { "epoch": 0.33461353816610656, "grad_norm": 0.3183031380176544, "learning_rate": 5e-05, "loss": 1.5462, "step": 2091 }, { "epoch": 0.33477356377020323, "grad_norm": 0.3341115415096283, "learning_rate": 5e-05, "loss": 1.4962, "step": 2092 }, { "epoch": 0.3349335893742999, "grad_norm": 0.3173571527004242, "learning_rate": 5e-05, "loss": 1.6104, "step": 2093 }, { "epoch": 0.33509361497839657, "grad_norm": 0.30804443359375, "learning_rate": 5e-05, "loss": 1.5494, "step": 2094 }, { "epoch": 0.3352536405824932, "grad_norm": 0.31888332962989807, "learning_rate": 5e-05, "loss": 1.5916, "step": 2095 }, { "epoch": 0.33541366618658985, "grad_norm": 0.30258435010910034, "learning_rate": 5e-05, "loss": 1.5032, "step": 2096 }, { "epoch": 0.3355736917906865, "grad_norm": 0.309325635433197, "learning_rate": 5e-05, "loss": 1.4981, "step": 2097 }, { "epoch": 0.33573371739478314, "grad_norm": 0.32662051916122437, "learning_rate": 5e-05, "loss": 1.5669, "step": 2098 }, { "epoch": 0.3358937429988798, "grad_norm": 0.29798978567123413, "learning_rate": 5e-05, "loss": 1.4118, "step": 2099 }, { "epoch": 0.3360537686029765, "grad_norm": 0.31110554933547974, "learning_rate": 5e-05, "loss": 1.5515, "step": 2100 }, { "epoch": 0.33621379420707315, "grad_norm": 0.32194891571998596, "learning_rate": 5e-05, "loss": 1.5839, "step": 2101 }, { "epoch": 0.33637381981116976, "grad_norm": 0.3061939477920532, "learning_rate": 5e-05, "loss": 1.4987, "step": 2102 }, { "epoch": 0.33653384541526643, "grad_norm": 0.301114946603775, "learning_rate": 5e-05, "loss": 1.5617, "step": 2103 }, { "epoch": 0.3366938710193631, "grad_norm": 0.30640509724617004, "learning_rate": 5e-05, "loss": 1.5973, "step": 2104 }, { "epoch": 0.33685389662345977, "grad_norm": 0.3063596189022064, "learning_rate": 5e-05, "loss": 1.5476, "step": 2105 }, { "epoch": 0.3370139222275564, "grad_norm": 0.3124504089355469, "learning_rate": 5e-05, "loss": 1.5308, "step": 2106 }, { "epoch": 0.33717394783165305, "grad_norm": 0.31248989701271057, "learning_rate": 5e-05, "loss": 1.4571, "step": 2107 }, { "epoch": 0.3373339734357497, "grad_norm": 0.3113437592983246, "learning_rate": 5e-05, "loss": 1.6083, "step": 2108 }, { "epoch": 0.3374939990398464, "grad_norm": 0.31757962703704834, "learning_rate": 5e-05, "loss": 1.5693, "step": 2109 }, { "epoch": 0.337654024643943, "grad_norm": 0.31616583466529846, "learning_rate": 5e-05, "loss": 1.5855, "step": 2110 }, { "epoch": 0.3378140502480397, "grad_norm": 0.3096024990081787, "learning_rate": 5e-05, "loss": 1.4818, "step": 2111 }, { "epoch": 0.33797407585213635, "grad_norm": 0.3018946051597595, "learning_rate": 5e-05, "loss": 1.4596, "step": 2112 }, { "epoch": 0.338134101456233, "grad_norm": 0.2942415773868561, "learning_rate": 5e-05, "loss": 1.4957, "step": 2113 }, { "epoch": 0.33829412706032963, "grad_norm": 0.3026716709136963, "learning_rate": 5e-05, "loss": 1.5715, "step": 2114 }, { "epoch": 0.3384541526644263, "grad_norm": 0.3174715042114258, "learning_rate": 5e-05, "loss": 1.5384, "step": 2115 }, { "epoch": 0.33861417826852297, "grad_norm": 0.31389856338500977, "learning_rate": 5e-05, "loss": 1.5699, "step": 2116 }, { "epoch": 0.33877420387261964, "grad_norm": 0.31627678871154785, "learning_rate": 5e-05, "loss": 1.5356, "step": 2117 }, { "epoch": 0.33893422947671625, "grad_norm": 0.3049820065498352, "learning_rate": 5e-05, "loss": 1.5212, "step": 2118 }, { "epoch": 0.3390942550808129, "grad_norm": 0.31484583020210266, "learning_rate": 5e-05, "loss": 1.6004, "step": 2119 }, { "epoch": 0.3392542806849096, "grad_norm": 0.32356542348861694, "learning_rate": 5e-05, "loss": 1.599, "step": 2120 }, { "epoch": 0.33941430628900626, "grad_norm": 0.30075958371162415, "learning_rate": 5e-05, "loss": 1.4939, "step": 2121 }, { "epoch": 0.3395743318931029, "grad_norm": 0.299065500497818, "learning_rate": 5e-05, "loss": 1.468, "step": 2122 }, { "epoch": 0.33973435749719955, "grad_norm": 0.32122698426246643, "learning_rate": 5e-05, "loss": 1.6187, "step": 2123 }, { "epoch": 0.3398943831012962, "grad_norm": 0.3183058798313141, "learning_rate": 5e-05, "loss": 1.5913, "step": 2124 }, { "epoch": 0.3400544087053929, "grad_norm": 0.3043568730354309, "learning_rate": 5e-05, "loss": 1.5575, "step": 2125 }, { "epoch": 0.3402144343094895, "grad_norm": 0.31912410259246826, "learning_rate": 5e-05, "loss": 1.577, "step": 2126 }, { "epoch": 0.34037445991358617, "grad_norm": 0.2997113764286041, "learning_rate": 5e-05, "loss": 1.5125, "step": 2127 }, { "epoch": 0.34053448551768284, "grad_norm": 0.2942621409893036, "learning_rate": 5e-05, "loss": 1.4388, "step": 2128 }, { "epoch": 0.3406945111217795, "grad_norm": 0.319581001996994, "learning_rate": 5e-05, "loss": 1.5241, "step": 2129 }, { "epoch": 0.3408545367258761, "grad_norm": 0.32680201530456543, "learning_rate": 5e-05, "loss": 1.506, "step": 2130 }, { "epoch": 0.3410145623299728, "grad_norm": 0.3021116256713867, "learning_rate": 5e-05, "loss": 1.5157, "step": 2131 }, { "epoch": 0.34117458793406946, "grad_norm": 0.314227432012558, "learning_rate": 5e-05, "loss": 1.5603, "step": 2132 }, { "epoch": 0.34133461353816613, "grad_norm": 0.3142857551574707, "learning_rate": 5e-05, "loss": 1.532, "step": 2133 }, { "epoch": 0.34149463914226275, "grad_norm": 0.32570549845695496, "learning_rate": 5e-05, "loss": 1.6017, "step": 2134 }, { "epoch": 0.3416546647463594, "grad_norm": 0.3190460801124573, "learning_rate": 5e-05, "loss": 1.4833, "step": 2135 }, { "epoch": 0.3418146903504561, "grad_norm": 0.32196903228759766, "learning_rate": 5e-05, "loss": 1.5851, "step": 2136 }, { "epoch": 0.34197471595455275, "grad_norm": 0.30021417140960693, "learning_rate": 5e-05, "loss": 1.515, "step": 2137 }, { "epoch": 0.34213474155864937, "grad_norm": 0.3207353353500366, "learning_rate": 5e-05, "loss": 1.5573, "step": 2138 }, { "epoch": 0.34229476716274604, "grad_norm": 0.31601011753082275, "learning_rate": 5e-05, "loss": 1.595, "step": 2139 }, { "epoch": 0.3424547927668427, "grad_norm": 0.3062175512313843, "learning_rate": 5e-05, "loss": 1.5244, "step": 2140 }, { "epoch": 0.3426148183709394, "grad_norm": 0.2990437150001526, "learning_rate": 5e-05, "loss": 1.4849, "step": 2141 }, { "epoch": 0.342774843975036, "grad_norm": 0.32578471302986145, "learning_rate": 5e-05, "loss": 1.6531, "step": 2142 }, { "epoch": 0.34293486957913266, "grad_norm": 0.31402331590652466, "learning_rate": 5e-05, "loss": 1.605, "step": 2143 }, { "epoch": 0.34309489518322933, "grad_norm": 0.3056388199329376, "learning_rate": 5e-05, "loss": 1.5671, "step": 2144 }, { "epoch": 0.34325492078732595, "grad_norm": 0.3033358156681061, "learning_rate": 5e-05, "loss": 1.4963, "step": 2145 }, { "epoch": 0.3434149463914226, "grad_norm": 0.31762591004371643, "learning_rate": 5e-05, "loss": 1.6237, "step": 2146 }, { "epoch": 0.3435749719955193, "grad_norm": 0.3113551437854767, "learning_rate": 5e-05, "loss": 1.5433, "step": 2147 }, { "epoch": 0.34373499759961595, "grad_norm": 0.29870837926864624, "learning_rate": 5e-05, "loss": 1.4614, "step": 2148 }, { "epoch": 0.34389502320371257, "grad_norm": 0.30340576171875, "learning_rate": 5e-05, "loss": 1.4678, "step": 2149 }, { "epoch": 0.34405504880780924, "grad_norm": 0.3072715103626251, "learning_rate": 5e-05, "loss": 1.5224, "step": 2150 }, { "epoch": 0.3442150744119059, "grad_norm": 0.307522714138031, "learning_rate": 5e-05, "loss": 1.5043, "step": 2151 }, { "epoch": 0.3443751000160026, "grad_norm": 0.3151133358478546, "learning_rate": 5e-05, "loss": 1.5787, "step": 2152 }, { "epoch": 0.3445351256200992, "grad_norm": 0.30956339836120605, "learning_rate": 5e-05, "loss": 1.5765, "step": 2153 }, { "epoch": 0.34469515122419586, "grad_norm": 0.302448570728302, "learning_rate": 5e-05, "loss": 1.4951, "step": 2154 }, { "epoch": 0.34485517682829253, "grad_norm": 0.30666133761405945, "learning_rate": 5e-05, "loss": 1.5367, "step": 2155 }, { "epoch": 0.3450152024323892, "grad_norm": 0.299628347158432, "learning_rate": 5e-05, "loss": 1.5272, "step": 2156 }, { "epoch": 0.3451752280364858, "grad_norm": 0.3087063729763031, "learning_rate": 5e-05, "loss": 1.5705, "step": 2157 }, { "epoch": 0.3453352536405825, "grad_norm": 0.3044770359992981, "learning_rate": 5e-05, "loss": 1.5186, "step": 2158 }, { "epoch": 0.34549527924467915, "grad_norm": 0.3101055920124054, "learning_rate": 5e-05, "loss": 1.4924, "step": 2159 }, { "epoch": 0.3456553048487758, "grad_norm": 0.3111697733402252, "learning_rate": 5e-05, "loss": 1.5458, "step": 2160 }, { "epoch": 0.34581533045287244, "grad_norm": 0.3077961206436157, "learning_rate": 5e-05, "loss": 1.4657, "step": 2161 }, { "epoch": 0.3459753560569691, "grad_norm": 0.30569231510162354, "learning_rate": 5e-05, "loss": 1.5282, "step": 2162 }, { "epoch": 0.3461353816610658, "grad_norm": 0.3063148558139801, "learning_rate": 5e-05, "loss": 1.4961, "step": 2163 }, { "epoch": 0.34629540726516245, "grad_norm": 0.30519020557403564, "learning_rate": 5e-05, "loss": 1.5545, "step": 2164 }, { "epoch": 0.34645543286925906, "grad_norm": 0.30004578828811646, "learning_rate": 5e-05, "loss": 1.5284, "step": 2165 }, { "epoch": 0.34661545847335573, "grad_norm": 0.30326154828071594, "learning_rate": 5e-05, "loss": 1.4737, "step": 2166 }, { "epoch": 0.3467754840774524, "grad_norm": 0.3201731741428375, "learning_rate": 5e-05, "loss": 1.6427, "step": 2167 }, { "epoch": 0.34693550968154907, "grad_norm": 0.30452585220336914, "learning_rate": 5e-05, "loss": 1.5788, "step": 2168 }, { "epoch": 0.3470955352856457, "grad_norm": 0.30478695034980774, "learning_rate": 5e-05, "loss": 1.5649, "step": 2169 }, { "epoch": 0.34725556088974235, "grad_norm": 0.3037102222442627, "learning_rate": 5e-05, "loss": 1.5127, "step": 2170 }, { "epoch": 0.347415586493839, "grad_norm": 0.31219953298568726, "learning_rate": 5e-05, "loss": 1.4988, "step": 2171 }, { "epoch": 0.3475756120979357, "grad_norm": 0.30961865186691284, "learning_rate": 5e-05, "loss": 1.5133, "step": 2172 }, { "epoch": 0.3477356377020323, "grad_norm": 0.29491913318634033, "learning_rate": 5e-05, "loss": 1.4877, "step": 2173 }, { "epoch": 0.347895663306129, "grad_norm": 0.302359014749527, "learning_rate": 5e-05, "loss": 1.5727, "step": 2174 }, { "epoch": 0.34805568891022565, "grad_norm": 0.33450689911842346, "learning_rate": 5e-05, "loss": 1.674, "step": 2175 }, { "epoch": 0.3482157145143223, "grad_norm": 0.3085595369338989, "learning_rate": 5e-05, "loss": 1.4941, "step": 2176 }, { "epoch": 0.34837574011841893, "grad_norm": 0.30136609077453613, "learning_rate": 5e-05, "loss": 1.5192, "step": 2177 }, { "epoch": 0.3485357657225156, "grad_norm": 0.31133320927619934, "learning_rate": 5e-05, "loss": 1.5265, "step": 2178 }, { "epoch": 0.34869579132661227, "grad_norm": 0.31278979778289795, "learning_rate": 5e-05, "loss": 1.593, "step": 2179 }, { "epoch": 0.34885581693070894, "grad_norm": 0.31802481412887573, "learning_rate": 5e-05, "loss": 1.5612, "step": 2180 }, { "epoch": 0.34901584253480555, "grad_norm": 0.30887672305107117, "learning_rate": 5e-05, "loss": 1.5815, "step": 2181 }, { "epoch": 0.3491758681389022, "grad_norm": 0.3096591830253601, "learning_rate": 5e-05, "loss": 1.5155, "step": 2182 }, { "epoch": 0.3493358937429989, "grad_norm": 0.30120745301246643, "learning_rate": 5e-05, "loss": 1.5161, "step": 2183 }, { "epoch": 0.34949591934709556, "grad_norm": 0.30098089575767517, "learning_rate": 5e-05, "loss": 1.5464, "step": 2184 }, { "epoch": 0.3496559449511922, "grad_norm": 0.2969001829624176, "learning_rate": 5e-05, "loss": 1.4658, "step": 2185 }, { "epoch": 0.34981597055528885, "grad_norm": 0.30343058705329895, "learning_rate": 5e-05, "loss": 1.4802, "step": 2186 }, { "epoch": 0.3499759961593855, "grad_norm": 0.32156407833099365, "learning_rate": 5e-05, "loss": 1.5719, "step": 2187 }, { "epoch": 0.35013602176348213, "grad_norm": 0.311296671628952, "learning_rate": 5e-05, "loss": 1.578, "step": 2188 }, { "epoch": 0.3502960473675788, "grad_norm": 0.30137550830841064, "learning_rate": 5e-05, "loss": 1.468, "step": 2189 }, { "epoch": 0.35045607297167547, "grad_norm": 0.30954650044441223, "learning_rate": 5e-05, "loss": 1.4881, "step": 2190 }, { "epoch": 0.35061609857577214, "grad_norm": 0.3190652132034302, "learning_rate": 5e-05, "loss": 1.5324, "step": 2191 }, { "epoch": 0.35077612417986875, "grad_norm": 0.3189464807510376, "learning_rate": 5e-05, "loss": 1.5899, "step": 2192 }, { "epoch": 0.3509361497839654, "grad_norm": 0.3071178197860718, "learning_rate": 5e-05, "loss": 1.5713, "step": 2193 }, { "epoch": 0.3510961753880621, "grad_norm": 0.30754995346069336, "learning_rate": 5e-05, "loss": 1.5738, "step": 2194 }, { "epoch": 0.35125620099215876, "grad_norm": 0.2963769733905792, "learning_rate": 5e-05, "loss": 1.5288, "step": 2195 }, { "epoch": 0.3514162265962554, "grad_norm": 0.3127683401107788, "learning_rate": 5e-05, "loss": 1.502, "step": 2196 }, { "epoch": 0.35157625220035205, "grad_norm": 0.31411492824554443, "learning_rate": 5e-05, "loss": 1.5883, "step": 2197 }, { "epoch": 0.3517362778044487, "grad_norm": 0.32436510920524597, "learning_rate": 5e-05, "loss": 1.5755, "step": 2198 }, { "epoch": 0.3518963034085454, "grad_norm": 0.30595776438713074, "learning_rate": 5e-05, "loss": 1.454, "step": 2199 }, { "epoch": 0.352056329012642, "grad_norm": 0.3110528290271759, "learning_rate": 5e-05, "loss": 1.4949, "step": 2200 }, { "epoch": 0.35221635461673867, "grad_norm": 0.333076149225235, "learning_rate": 5e-05, "loss": 1.5856, "step": 2201 }, { "epoch": 0.35237638022083534, "grad_norm": 0.2975061237812042, "learning_rate": 5e-05, "loss": 1.4699, "step": 2202 }, { "epoch": 0.352536405824932, "grad_norm": 0.3208777606487274, "learning_rate": 5e-05, "loss": 1.5638, "step": 2203 }, { "epoch": 0.3526964314290286, "grad_norm": 0.3223569989204407, "learning_rate": 5e-05, "loss": 1.5938, "step": 2204 }, { "epoch": 0.3528564570331253, "grad_norm": 0.30810800194740295, "learning_rate": 5e-05, "loss": 1.529, "step": 2205 }, { "epoch": 0.35301648263722196, "grad_norm": 0.31921645998954773, "learning_rate": 5e-05, "loss": 1.6441, "step": 2206 }, { "epoch": 0.35317650824131863, "grad_norm": 0.3065279424190521, "learning_rate": 5e-05, "loss": 1.5135, "step": 2207 }, { "epoch": 0.35333653384541525, "grad_norm": 0.309926837682724, "learning_rate": 5e-05, "loss": 1.5202, "step": 2208 }, { "epoch": 0.3534965594495119, "grad_norm": 0.31500935554504395, "learning_rate": 5e-05, "loss": 1.3638, "step": 2209 }, { "epoch": 0.3536565850536086, "grad_norm": 0.3006497025489807, "learning_rate": 5e-05, "loss": 1.5342, "step": 2210 }, { "epoch": 0.35381661065770525, "grad_norm": 0.32201194763183594, "learning_rate": 5e-05, "loss": 1.5807, "step": 2211 }, { "epoch": 0.35397663626180187, "grad_norm": 0.3202427625656128, "learning_rate": 5e-05, "loss": 1.5654, "step": 2212 }, { "epoch": 0.35413666186589854, "grad_norm": 0.30676722526550293, "learning_rate": 5e-05, "loss": 1.497, "step": 2213 }, { "epoch": 0.3542966874699952, "grad_norm": 0.30670365691185, "learning_rate": 5e-05, "loss": 1.5462, "step": 2214 }, { "epoch": 0.3544567130740919, "grad_norm": 0.31446096301078796, "learning_rate": 5e-05, "loss": 1.5411, "step": 2215 }, { "epoch": 0.3546167386781885, "grad_norm": 0.3110874593257904, "learning_rate": 5e-05, "loss": 1.5765, "step": 2216 }, { "epoch": 0.35477676428228516, "grad_norm": 0.3155255615711212, "learning_rate": 5e-05, "loss": 1.6248, "step": 2217 }, { "epoch": 0.35493678988638183, "grad_norm": 0.3139932155609131, "learning_rate": 5e-05, "loss": 1.5316, "step": 2218 }, { "epoch": 0.3550968154904785, "grad_norm": 0.30235305428504944, "learning_rate": 5e-05, "loss": 1.5679, "step": 2219 }, { "epoch": 0.3552568410945751, "grad_norm": 0.3133471608161926, "learning_rate": 5e-05, "loss": 1.559, "step": 2220 }, { "epoch": 0.3554168666986718, "grad_norm": 0.3121718466281891, "learning_rate": 5e-05, "loss": 1.563, "step": 2221 }, { "epoch": 0.35557689230276845, "grad_norm": 0.30635902285575867, "learning_rate": 5e-05, "loss": 1.5636, "step": 2222 }, { "epoch": 0.3557369179068651, "grad_norm": 0.3210388720035553, "learning_rate": 5e-05, "loss": 1.5329, "step": 2223 }, { "epoch": 0.35589694351096174, "grad_norm": 0.3308223485946655, "learning_rate": 5e-05, "loss": 1.5532, "step": 2224 }, { "epoch": 0.3560569691150584, "grad_norm": 0.3066284954547882, "learning_rate": 5e-05, "loss": 1.463, "step": 2225 }, { "epoch": 0.3562169947191551, "grad_norm": 0.316307008266449, "learning_rate": 5e-05, "loss": 1.5476, "step": 2226 }, { "epoch": 0.35637702032325175, "grad_norm": 0.3026333153247833, "learning_rate": 5e-05, "loss": 1.4392, "step": 2227 }, { "epoch": 0.35653704592734836, "grad_norm": 0.31354689598083496, "learning_rate": 5e-05, "loss": 1.5676, "step": 2228 }, { "epoch": 0.35669707153144503, "grad_norm": 0.33986321091651917, "learning_rate": 5e-05, "loss": 1.5121, "step": 2229 }, { "epoch": 0.3568570971355417, "grad_norm": 0.3139965832233429, "learning_rate": 5e-05, "loss": 1.5801, "step": 2230 }, { "epoch": 0.3570171227396383, "grad_norm": 0.3108351528644562, "learning_rate": 5e-05, "loss": 1.5813, "step": 2231 }, { "epoch": 0.357177148343735, "grad_norm": 0.32162928581237793, "learning_rate": 5e-05, "loss": 1.5095, "step": 2232 }, { "epoch": 0.35733717394783165, "grad_norm": 0.3035792112350464, "learning_rate": 5e-05, "loss": 1.5266, "step": 2233 }, { "epoch": 0.3574971995519283, "grad_norm": 0.3214038014411926, "learning_rate": 5e-05, "loss": 1.5907, "step": 2234 }, { "epoch": 0.35765722515602494, "grad_norm": 0.33881446719169617, "learning_rate": 5e-05, "loss": 1.6622, "step": 2235 }, { "epoch": 0.3578172507601216, "grad_norm": 0.3036636710166931, "learning_rate": 5e-05, "loss": 1.5309, "step": 2236 }, { "epoch": 0.3579772763642183, "grad_norm": 0.3130498230457306, "learning_rate": 5e-05, "loss": 1.5445, "step": 2237 }, { "epoch": 0.35813730196831495, "grad_norm": 0.31873971223831177, "learning_rate": 5e-05, "loss": 1.4566, "step": 2238 }, { "epoch": 0.35829732757241156, "grad_norm": 0.31345194578170776, "learning_rate": 5e-05, "loss": 1.4261, "step": 2239 }, { "epoch": 0.35845735317650823, "grad_norm": 0.33310675621032715, "learning_rate": 5e-05, "loss": 1.5435, "step": 2240 }, { "epoch": 0.3586173787806049, "grad_norm": 0.33151471614837646, "learning_rate": 5e-05, "loss": 1.576, "step": 2241 }, { "epoch": 0.35877740438470157, "grad_norm": 0.30206140875816345, "learning_rate": 5e-05, "loss": 1.5182, "step": 2242 }, { "epoch": 0.3589374299887982, "grad_norm": 0.30907461047172546, "learning_rate": 5e-05, "loss": 1.4163, "step": 2243 }, { "epoch": 0.35909745559289485, "grad_norm": 0.32118839025497437, "learning_rate": 5e-05, "loss": 1.4943, "step": 2244 }, { "epoch": 0.3592574811969915, "grad_norm": 0.3208298087120056, "learning_rate": 5e-05, "loss": 1.6408, "step": 2245 }, { "epoch": 0.3594175068010882, "grad_norm": 0.3136759400367737, "learning_rate": 5e-05, "loss": 1.5087, "step": 2246 }, { "epoch": 0.3595775324051848, "grad_norm": 0.3306906521320343, "learning_rate": 5e-05, "loss": 1.4733, "step": 2247 }, { "epoch": 0.3597375580092815, "grad_norm": 0.31779178977012634, "learning_rate": 5e-05, "loss": 1.4591, "step": 2248 }, { "epoch": 0.35989758361337815, "grad_norm": 0.3259090483188629, "learning_rate": 5e-05, "loss": 1.4528, "step": 2249 }, { "epoch": 0.3600576092174748, "grad_norm": 0.33372265100479126, "learning_rate": 5e-05, "loss": 1.5561, "step": 2250 }, { "epoch": 0.36021763482157143, "grad_norm": 0.3014984726905823, "learning_rate": 5e-05, "loss": 1.4808, "step": 2251 }, { "epoch": 0.3603776604256681, "grad_norm": 0.3178817629814148, "learning_rate": 5e-05, "loss": 1.5038, "step": 2252 }, { "epoch": 0.36053768602976477, "grad_norm": 0.31380000710487366, "learning_rate": 5e-05, "loss": 1.5606, "step": 2253 }, { "epoch": 0.36069771163386144, "grad_norm": 0.32020658254623413, "learning_rate": 5e-05, "loss": 1.5161, "step": 2254 }, { "epoch": 0.36085773723795805, "grad_norm": 0.31528744101524353, "learning_rate": 5e-05, "loss": 1.5434, "step": 2255 }, { "epoch": 0.3610177628420547, "grad_norm": 0.32600516080856323, "learning_rate": 5e-05, "loss": 1.5081, "step": 2256 }, { "epoch": 0.3611777884461514, "grad_norm": 0.3214963376522064, "learning_rate": 5e-05, "loss": 1.6165, "step": 2257 }, { "epoch": 0.36133781405024806, "grad_norm": 0.32821202278137207, "learning_rate": 5e-05, "loss": 1.5832, "step": 2258 }, { "epoch": 0.3614978396543447, "grad_norm": 0.3188752233982086, "learning_rate": 5e-05, "loss": 1.4735, "step": 2259 }, { "epoch": 0.36165786525844135, "grad_norm": 0.3059481978416443, "learning_rate": 5e-05, "loss": 1.5002, "step": 2260 }, { "epoch": 0.361817890862538, "grad_norm": 0.307205468416214, "learning_rate": 5e-05, "loss": 1.546, "step": 2261 }, { "epoch": 0.3619779164666347, "grad_norm": 0.2955103814601898, "learning_rate": 5e-05, "loss": 1.4745, "step": 2262 }, { "epoch": 0.3621379420707313, "grad_norm": 0.3144378364086151, "learning_rate": 5e-05, "loss": 1.5579, "step": 2263 }, { "epoch": 0.36229796767482797, "grad_norm": 0.3203682601451874, "learning_rate": 5e-05, "loss": 1.5473, "step": 2264 }, { "epoch": 0.36245799327892464, "grad_norm": 0.3135104775428772, "learning_rate": 5e-05, "loss": 1.5098, "step": 2265 }, { "epoch": 0.3626180188830213, "grad_norm": 0.3084709644317627, "learning_rate": 5e-05, "loss": 1.5631, "step": 2266 }, { "epoch": 0.3627780444871179, "grad_norm": 0.30357539653778076, "learning_rate": 5e-05, "loss": 1.4687, "step": 2267 }, { "epoch": 0.3629380700912146, "grad_norm": 0.3193538188934326, "learning_rate": 5e-05, "loss": 1.5698, "step": 2268 }, { "epoch": 0.36309809569531126, "grad_norm": 0.321103036403656, "learning_rate": 5e-05, "loss": 1.5848, "step": 2269 }, { "epoch": 0.36325812129940793, "grad_norm": 0.311927855014801, "learning_rate": 5e-05, "loss": 1.4452, "step": 2270 }, { "epoch": 0.36341814690350455, "grad_norm": 0.3130384683609009, "learning_rate": 5e-05, "loss": 1.5788, "step": 2271 }, { "epoch": 0.3635781725076012, "grad_norm": 0.30563002824783325, "learning_rate": 5e-05, "loss": 1.5073, "step": 2272 }, { "epoch": 0.3637381981116979, "grad_norm": 0.3214343190193176, "learning_rate": 5e-05, "loss": 1.5524, "step": 2273 }, { "epoch": 0.3638982237157945, "grad_norm": 0.3093753159046173, "learning_rate": 5e-05, "loss": 1.4907, "step": 2274 }, { "epoch": 0.36405824931989117, "grad_norm": 0.3052224814891815, "learning_rate": 5e-05, "loss": 1.4966, "step": 2275 }, { "epoch": 0.36421827492398784, "grad_norm": 0.3066674470901489, "learning_rate": 5e-05, "loss": 1.4507, "step": 2276 }, { "epoch": 0.3643783005280845, "grad_norm": 0.30880385637283325, "learning_rate": 5e-05, "loss": 1.5834, "step": 2277 }, { "epoch": 0.3645383261321811, "grad_norm": 0.30470341444015503, "learning_rate": 5e-05, "loss": 1.5941, "step": 2278 }, { "epoch": 0.3646983517362778, "grad_norm": 0.32093536853790283, "learning_rate": 5e-05, "loss": 1.5413, "step": 2279 }, { "epoch": 0.36485837734037446, "grad_norm": 0.31534233689308167, "learning_rate": 5e-05, "loss": 1.562, "step": 2280 }, { "epoch": 0.36501840294447113, "grad_norm": 0.3038574457168579, "learning_rate": 5e-05, "loss": 1.4934, "step": 2281 }, { "epoch": 0.36517842854856775, "grad_norm": 0.31800633668899536, "learning_rate": 5e-05, "loss": 1.52, "step": 2282 }, { "epoch": 0.3653384541526644, "grad_norm": 0.313250869512558, "learning_rate": 5e-05, "loss": 1.5425, "step": 2283 }, { "epoch": 0.3654984797567611, "grad_norm": 0.31263595819473267, "learning_rate": 5e-05, "loss": 1.5525, "step": 2284 }, { "epoch": 0.36565850536085776, "grad_norm": 0.33264249563217163, "learning_rate": 5e-05, "loss": 1.5982, "step": 2285 }, { "epoch": 0.36581853096495437, "grad_norm": 0.3095126748085022, "learning_rate": 5e-05, "loss": 1.5159, "step": 2286 }, { "epoch": 0.36597855656905104, "grad_norm": 0.3194548487663269, "learning_rate": 5e-05, "loss": 1.5305, "step": 2287 }, { "epoch": 0.3661385821731477, "grad_norm": 0.3135497570037842, "learning_rate": 5e-05, "loss": 1.4806, "step": 2288 }, { "epoch": 0.3662986077772444, "grad_norm": 0.3056853413581848, "learning_rate": 5e-05, "loss": 1.5585, "step": 2289 }, { "epoch": 0.366458633381341, "grad_norm": 0.3068288564682007, "learning_rate": 5e-05, "loss": 1.5578, "step": 2290 }, { "epoch": 0.36661865898543766, "grad_norm": 0.31896427273750305, "learning_rate": 5e-05, "loss": 1.5428, "step": 2291 }, { "epoch": 0.36677868458953433, "grad_norm": 0.30912068486213684, "learning_rate": 5e-05, "loss": 1.5105, "step": 2292 }, { "epoch": 0.366938710193631, "grad_norm": 0.3030596077442169, "learning_rate": 5e-05, "loss": 1.5082, "step": 2293 }, { "epoch": 0.3670987357977276, "grad_norm": 0.31776225566864014, "learning_rate": 5e-05, "loss": 1.5873, "step": 2294 }, { "epoch": 0.3672587614018243, "grad_norm": 0.31687241792678833, "learning_rate": 5e-05, "loss": 1.5309, "step": 2295 }, { "epoch": 0.36741878700592095, "grad_norm": 0.308703750371933, "learning_rate": 5e-05, "loss": 1.5466, "step": 2296 }, { "epoch": 0.3675788126100176, "grad_norm": 0.3191680908203125, "learning_rate": 5e-05, "loss": 1.5744, "step": 2297 }, { "epoch": 0.36773883821411424, "grad_norm": 0.30864018201828003, "learning_rate": 5e-05, "loss": 1.5222, "step": 2298 }, { "epoch": 0.3678988638182109, "grad_norm": 0.31369608640670776, "learning_rate": 5e-05, "loss": 1.5301, "step": 2299 }, { "epoch": 0.3680588894223076, "grad_norm": 0.30604326725006104, "learning_rate": 5e-05, "loss": 1.4257, "step": 2300 }, { "epoch": 0.36821891502640425, "grad_norm": 0.3155694603919983, "learning_rate": 5e-05, "loss": 1.493, "step": 2301 }, { "epoch": 0.36837894063050086, "grad_norm": 0.31304022669792175, "learning_rate": 5e-05, "loss": 1.5557, "step": 2302 }, { "epoch": 0.36853896623459753, "grad_norm": 0.33267560601234436, "learning_rate": 5e-05, "loss": 1.5922, "step": 2303 }, { "epoch": 0.3686989918386942, "grad_norm": 0.3183545768260956, "learning_rate": 5e-05, "loss": 1.5256, "step": 2304 }, { "epoch": 0.36885901744279087, "grad_norm": 0.3148074448108673, "learning_rate": 5e-05, "loss": 1.5128, "step": 2305 }, { "epoch": 0.3690190430468875, "grad_norm": 0.3158217966556549, "learning_rate": 5e-05, "loss": 1.5648, "step": 2306 }, { "epoch": 0.36917906865098415, "grad_norm": 0.30615824460983276, "learning_rate": 5e-05, "loss": 1.4935, "step": 2307 }, { "epoch": 0.3693390942550808, "grad_norm": 0.3067065179347992, "learning_rate": 5e-05, "loss": 1.531, "step": 2308 }, { "epoch": 0.3694991198591775, "grad_norm": 0.32234570384025574, "learning_rate": 5e-05, "loss": 1.5479, "step": 2309 }, { "epoch": 0.3696591454632741, "grad_norm": 0.3045298159122467, "learning_rate": 5e-05, "loss": 1.5125, "step": 2310 }, { "epoch": 0.3698191710673708, "grad_norm": 0.3039029538631439, "learning_rate": 5e-05, "loss": 1.5021, "step": 2311 }, { "epoch": 0.36997919667146745, "grad_norm": 0.33098965883255005, "learning_rate": 5e-05, "loss": 1.512, "step": 2312 }, { "epoch": 0.3701392222755641, "grad_norm": 0.31184422969818115, "learning_rate": 5e-05, "loss": 1.5502, "step": 2313 }, { "epoch": 0.37029924787966073, "grad_norm": 0.3197455108165741, "learning_rate": 5e-05, "loss": 1.5645, "step": 2314 }, { "epoch": 0.3704592734837574, "grad_norm": 0.3290407061576843, "learning_rate": 5e-05, "loss": 1.493, "step": 2315 }, { "epoch": 0.37061929908785407, "grad_norm": 0.31650397181510925, "learning_rate": 5e-05, "loss": 1.5981, "step": 2316 }, { "epoch": 0.3707793246919507, "grad_norm": 0.3209233283996582, "learning_rate": 5e-05, "loss": 1.6077, "step": 2317 }, { "epoch": 0.37093935029604735, "grad_norm": 0.3217925429344177, "learning_rate": 5e-05, "loss": 1.5657, "step": 2318 }, { "epoch": 0.371099375900144, "grad_norm": 0.3137418329715729, "learning_rate": 5e-05, "loss": 1.5718, "step": 2319 }, { "epoch": 0.3712594015042407, "grad_norm": 0.30730047821998596, "learning_rate": 5e-05, "loss": 1.4904, "step": 2320 }, { "epoch": 0.3714194271083373, "grad_norm": 0.31276848912239075, "learning_rate": 5e-05, "loss": 1.4611, "step": 2321 }, { "epoch": 0.371579452712434, "grad_norm": 0.31845158338546753, "learning_rate": 5e-05, "loss": 1.553, "step": 2322 }, { "epoch": 0.37173947831653065, "grad_norm": 0.3057023286819458, "learning_rate": 5e-05, "loss": 1.5132, "step": 2323 }, { "epoch": 0.3718995039206273, "grad_norm": 0.3318576216697693, "learning_rate": 5e-05, "loss": 1.5871, "step": 2324 }, { "epoch": 0.37205952952472393, "grad_norm": 0.3231581151485443, "learning_rate": 5e-05, "loss": 1.546, "step": 2325 }, { "epoch": 0.3722195551288206, "grad_norm": 0.3072322607040405, "learning_rate": 5e-05, "loss": 1.522, "step": 2326 }, { "epoch": 0.37237958073291727, "grad_norm": 0.31682103872299194, "learning_rate": 5e-05, "loss": 1.486, "step": 2327 }, { "epoch": 0.37253960633701394, "grad_norm": 0.31661543250083923, "learning_rate": 5e-05, "loss": 1.5414, "step": 2328 }, { "epoch": 0.37269963194111055, "grad_norm": 0.30743059515953064, "learning_rate": 5e-05, "loss": 1.5041, "step": 2329 }, { "epoch": 0.3728596575452072, "grad_norm": 0.336043119430542, "learning_rate": 5e-05, "loss": 1.5084, "step": 2330 }, { "epoch": 0.3730196831493039, "grad_norm": 0.32432904839515686, "learning_rate": 5e-05, "loss": 1.4969, "step": 2331 }, { "epoch": 0.37317970875340056, "grad_norm": 0.30641019344329834, "learning_rate": 5e-05, "loss": 1.5402, "step": 2332 }, { "epoch": 0.3733397343574972, "grad_norm": 0.32752546668052673, "learning_rate": 5e-05, "loss": 1.5292, "step": 2333 }, { "epoch": 0.37349975996159385, "grad_norm": 0.31620800495147705, "learning_rate": 5e-05, "loss": 1.5734, "step": 2334 }, { "epoch": 0.3736597855656905, "grad_norm": 0.308728963136673, "learning_rate": 5e-05, "loss": 1.4795, "step": 2335 }, { "epoch": 0.3738198111697872, "grad_norm": 0.3132389485836029, "learning_rate": 5e-05, "loss": 1.5925, "step": 2336 }, { "epoch": 0.3739798367738838, "grad_norm": 0.31511086225509644, "learning_rate": 5e-05, "loss": 1.5688, "step": 2337 }, { "epoch": 0.37413986237798047, "grad_norm": 0.3068019151687622, "learning_rate": 5e-05, "loss": 1.5129, "step": 2338 }, { "epoch": 0.37429988798207714, "grad_norm": 0.30940911173820496, "learning_rate": 5e-05, "loss": 1.553, "step": 2339 }, { "epoch": 0.3744599135861738, "grad_norm": 0.31078317761421204, "learning_rate": 5e-05, "loss": 1.5262, "step": 2340 }, { "epoch": 0.3746199391902704, "grad_norm": 0.3154125213623047, "learning_rate": 5e-05, "loss": 1.5547, "step": 2341 }, { "epoch": 0.3747799647943671, "grad_norm": 0.3287148177623749, "learning_rate": 5e-05, "loss": 1.6024, "step": 2342 }, { "epoch": 0.37493999039846376, "grad_norm": 0.3085040748119354, "learning_rate": 5e-05, "loss": 1.522, "step": 2343 }, { "epoch": 0.37510001600256043, "grad_norm": 0.3208037316799164, "learning_rate": 5e-05, "loss": 1.5607, "step": 2344 }, { "epoch": 0.37526004160665705, "grad_norm": 0.31095606088638306, "learning_rate": 5e-05, "loss": 1.5231, "step": 2345 }, { "epoch": 0.3754200672107537, "grad_norm": 0.32056924700737, "learning_rate": 5e-05, "loss": 1.5777, "step": 2346 }, { "epoch": 0.3755800928148504, "grad_norm": 0.31637221574783325, "learning_rate": 5e-05, "loss": 1.5598, "step": 2347 }, { "epoch": 0.37574011841894706, "grad_norm": 0.31000784039497375, "learning_rate": 5e-05, "loss": 1.5691, "step": 2348 }, { "epoch": 0.37590014402304367, "grad_norm": 0.3208838403224945, "learning_rate": 5e-05, "loss": 1.5668, "step": 2349 }, { "epoch": 0.37606016962714034, "grad_norm": 0.307087779045105, "learning_rate": 5e-05, "loss": 1.5862, "step": 2350 }, { "epoch": 0.376220195231237, "grad_norm": 0.31705182790756226, "learning_rate": 5e-05, "loss": 1.5632, "step": 2351 }, { "epoch": 0.3763802208353337, "grad_norm": 0.31401345133781433, "learning_rate": 5e-05, "loss": 1.521, "step": 2352 }, { "epoch": 0.3765402464394303, "grad_norm": 0.31065046787261963, "learning_rate": 5e-05, "loss": 1.5349, "step": 2353 }, { "epoch": 0.37670027204352696, "grad_norm": 0.30738914012908936, "learning_rate": 5e-05, "loss": 1.4879, "step": 2354 }, { "epoch": 0.37686029764762363, "grad_norm": 0.32836201786994934, "learning_rate": 5e-05, "loss": 1.6365, "step": 2355 }, { "epoch": 0.3770203232517203, "grad_norm": 0.31614071130752563, "learning_rate": 5e-05, "loss": 1.4594, "step": 2356 }, { "epoch": 0.3771803488558169, "grad_norm": 0.3238954246044159, "learning_rate": 5e-05, "loss": 1.5712, "step": 2357 }, { "epoch": 0.3773403744599136, "grad_norm": 0.3119276463985443, "learning_rate": 5e-05, "loss": 1.5502, "step": 2358 }, { "epoch": 0.37750040006401026, "grad_norm": 0.3124185800552368, "learning_rate": 5e-05, "loss": 1.5474, "step": 2359 }, { "epoch": 0.37766042566810687, "grad_norm": 0.30990904569625854, "learning_rate": 5e-05, "loss": 1.5114, "step": 2360 }, { "epoch": 0.37782045127220354, "grad_norm": 0.3170519769191742, "learning_rate": 5e-05, "loss": 1.5805, "step": 2361 }, { "epoch": 0.3779804768763002, "grad_norm": 0.3102078437805176, "learning_rate": 5e-05, "loss": 1.4666, "step": 2362 }, { "epoch": 0.3781405024803969, "grad_norm": 0.3126901090145111, "learning_rate": 5e-05, "loss": 1.5591, "step": 2363 }, { "epoch": 0.3783005280844935, "grad_norm": 0.31534409523010254, "learning_rate": 5e-05, "loss": 1.4774, "step": 2364 }, { "epoch": 0.37846055368859016, "grad_norm": 0.3116934895515442, "learning_rate": 5e-05, "loss": 1.5231, "step": 2365 }, { "epoch": 0.37862057929268683, "grad_norm": 0.3091055452823639, "learning_rate": 5e-05, "loss": 1.4966, "step": 2366 }, { "epoch": 0.3787806048967835, "grad_norm": 0.31321606040000916, "learning_rate": 5e-05, "loss": 1.488, "step": 2367 }, { "epoch": 0.3789406305008801, "grad_norm": 0.3150392472743988, "learning_rate": 5e-05, "loss": 1.511, "step": 2368 }, { "epoch": 0.3791006561049768, "grad_norm": 0.3063483238220215, "learning_rate": 5e-05, "loss": 1.5037, "step": 2369 }, { "epoch": 0.37926068170907346, "grad_norm": 0.3119852542877197, "learning_rate": 5e-05, "loss": 1.475, "step": 2370 }, { "epoch": 0.3794207073131701, "grad_norm": 0.31852424144744873, "learning_rate": 5e-05, "loss": 1.5153, "step": 2371 }, { "epoch": 0.37958073291726674, "grad_norm": 0.29910534620285034, "learning_rate": 5e-05, "loss": 1.5294, "step": 2372 }, { "epoch": 0.3797407585213634, "grad_norm": 0.31781890988349915, "learning_rate": 5e-05, "loss": 1.5353, "step": 2373 }, { "epoch": 0.3799007841254601, "grad_norm": 0.318812757730484, "learning_rate": 5e-05, "loss": 1.5282, "step": 2374 }, { "epoch": 0.38006080972955675, "grad_norm": 0.2985791862010956, "learning_rate": 5e-05, "loss": 1.4543, "step": 2375 }, { "epoch": 0.38022083533365336, "grad_norm": 0.311247855424881, "learning_rate": 5e-05, "loss": 1.5562, "step": 2376 }, { "epoch": 0.38038086093775003, "grad_norm": 0.31447139382362366, "learning_rate": 5e-05, "loss": 1.5471, "step": 2377 }, { "epoch": 0.3805408865418467, "grad_norm": 0.3079669177532196, "learning_rate": 5e-05, "loss": 1.4787, "step": 2378 }, { "epoch": 0.38070091214594337, "grad_norm": 0.3036450445652008, "learning_rate": 5e-05, "loss": 1.5494, "step": 2379 }, { "epoch": 0.38086093775004, "grad_norm": 0.319537490606308, "learning_rate": 5e-05, "loss": 1.5278, "step": 2380 }, { "epoch": 0.38102096335413665, "grad_norm": 0.31433218717575073, "learning_rate": 5e-05, "loss": 1.4831, "step": 2381 }, { "epoch": 0.3811809889582333, "grad_norm": 0.33043280243873596, "learning_rate": 5e-05, "loss": 1.5037, "step": 2382 }, { "epoch": 0.38134101456233, "grad_norm": 0.32536351680755615, "learning_rate": 5e-05, "loss": 1.5306, "step": 2383 }, { "epoch": 0.3815010401664266, "grad_norm": 0.32166045904159546, "learning_rate": 5e-05, "loss": 1.5086, "step": 2384 }, { "epoch": 0.3816610657705233, "grad_norm": 0.332806259393692, "learning_rate": 5e-05, "loss": 1.5334, "step": 2385 }, { "epoch": 0.38182109137461995, "grad_norm": 0.33741310238838196, "learning_rate": 5e-05, "loss": 1.5686, "step": 2386 }, { "epoch": 0.3819811169787166, "grad_norm": 0.3111858367919922, "learning_rate": 5e-05, "loss": 1.5238, "step": 2387 }, { "epoch": 0.38214114258281323, "grad_norm": 0.2943975329399109, "learning_rate": 5e-05, "loss": 1.3972, "step": 2388 }, { "epoch": 0.3823011681869099, "grad_norm": 0.30659955739974976, "learning_rate": 5e-05, "loss": 1.4577, "step": 2389 }, { "epoch": 0.38246119379100657, "grad_norm": 0.3091545104980469, "learning_rate": 5e-05, "loss": 1.4916, "step": 2390 }, { "epoch": 0.38262121939510324, "grad_norm": 0.32747817039489746, "learning_rate": 5e-05, "loss": 1.6354, "step": 2391 }, { "epoch": 0.38278124499919985, "grad_norm": 0.32515472173690796, "learning_rate": 5e-05, "loss": 1.5902, "step": 2392 }, { "epoch": 0.3829412706032965, "grad_norm": 0.30734461545944214, "learning_rate": 5e-05, "loss": 1.5522, "step": 2393 }, { "epoch": 0.3831012962073932, "grad_norm": 0.3111885190010071, "learning_rate": 5e-05, "loss": 1.5596, "step": 2394 }, { "epoch": 0.38326132181148986, "grad_norm": 0.30471792817115784, "learning_rate": 5e-05, "loss": 1.4882, "step": 2395 }, { "epoch": 0.3834213474155865, "grad_norm": 0.31965571641921997, "learning_rate": 5e-05, "loss": 1.5813, "step": 2396 }, { "epoch": 0.38358137301968315, "grad_norm": 0.3146559000015259, "learning_rate": 5e-05, "loss": 1.6091, "step": 2397 }, { "epoch": 0.3837413986237798, "grad_norm": 0.31939589977264404, "learning_rate": 5e-05, "loss": 1.5826, "step": 2398 }, { "epoch": 0.3839014242278765, "grad_norm": 0.3082549571990967, "learning_rate": 5e-05, "loss": 1.5412, "step": 2399 }, { "epoch": 0.3840614498319731, "grad_norm": 0.3034780025482178, "learning_rate": 5e-05, "loss": 1.5755, "step": 2400 }, { "epoch": 0.38422147543606977, "grad_norm": 0.3071467876434326, "learning_rate": 5e-05, "loss": 1.4654, "step": 2401 }, { "epoch": 0.38438150104016644, "grad_norm": 0.30536314845085144, "learning_rate": 5e-05, "loss": 1.5258, "step": 2402 }, { "epoch": 0.3845415266442631, "grad_norm": 0.3139454126358032, "learning_rate": 5e-05, "loss": 1.5471, "step": 2403 }, { "epoch": 0.3847015522483597, "grad_norm": 0.30531251430511475, "learning_rate": 5e-05, "loss": 1.5845, "step": 2404 }, { "epoch": 0.3848615778524564, "grad_norm": 0.3166674077510834, "learning_rate": 5e-05, "loss": 1.5195, "step": 2405 }, { "epoch": 0.38502160345655306, "grad_norm": 0.30500736832618713, "learning_rate": 5e-05, "loss": 1.5113, "step": 2406 }, { "epoch": 0.3851816290606497, "grad_norm": 0.30301496386528015, "learning_rate": 5e-05, "loss": 1.4992, "step": 2407 }, { "epoch": 0.38534165466474635, "grad_norm": 0.32564371824264526, "learning_rate": 5e-05, "loss": 1.5626, "step": 2408 }, { "epoch": 0.385501680268843, "grad_norm": 0.31127068400382996, "learning_rate": 5e-05, "loss": 1.5364, "step": 2409 }, { "epoch": 0.3856617058729397, "grad_norm": 0.3213133215904236, "learning_rate": 5e-05, "loss": 1.6638, "step": 2410 }, { "epoch": 0.3858217314770363, "grad_norm": 0.298820823431015, "learning_rate": 5e-05, "loss": 1.4221, "step": 2411 }, { "epoch": 0.38598175708113297, "grad_norm": 0.3061271905899048, "learning_rate": 5e-05, "loss": 1.498, "step": 2412 }, { "epoch": 0.38614178268522964, "grad_norm": 0.3090594410896301, "learning_rate": 5e-05, "loss": 1.5234, "step": 2413 }, { "epoch": 0.3863018082893263, "grad_norm": 0.31434717774391174, "learning_rate": 5e-05, "loss": 1.5426, "step": 2414 }, { "epoch": 0.3864618338934229, "grad_norm": 0.30573752522468567, "learning_rate": 5e-05, "loss": 1.4646, "step": 2415 }, { "epoch": 0.3866218594975196, "grad_norm": 0.31171318888664246, "learning_rate": 5e-05, "loss": 1.533, "step": 2416 }, { "epoch": 0.38678188510161626, "grad_norm": 0.2985774576663971, "learning_rate": 5e-05, "loss": 1.5202, "step": 2417 }, { "epoch": 0.38694191070571293, "grad_norm": 0.31264305114746094, "learning_rate": 5e-05, "loss": 1.5732, "step": 2418 }, { "epoch": 0.38710193630980955, "grad_norm": 0.33004099130630493, "learning_rate": 5e-05, "loss": 1.5871, "step": 2419 }, { "epoch": 0.3872619619139062, "grad_norm": 0.3152855336666107, "learning_rate": 5e-05, "loss": 1.5919, "step": 2420 }, { "epoch": 0.3874219875180029, "grad_norm": 0.304760217666626, "learning_rate": 5e-05, "loss": 1.5121, "step": 2421 }, { "epoch": 0.38758201312209956, "grad_norm": 0.3308124542236328, "learning_rate": 5e-05, "loss": 1.6, "step": 2422 }, { "epoch": 0.38774203872619617, "grad_norm": 0.3149816691875458, "learning_rate": 5e-05, "loss": 1.536, "step": 2423 }, { "epoch": 0.38790206433029284, "grad_norm": 0.3160792291164398, "learning_rate": 5e-05, "loss": 1.4879, "step": 2424 }, { "epoch": 0.3880620899343895, "grad_norm": 0.3302178382873535, "learning_rate": 5e-05, "loss": 1.6201, "step": 2425 }, { "epoch": 0.3882221155384862, "grad_norm": 0.3094542920589447, "learning_rate": 5e-05, "loss": 1.5389, "step": 2426 }, { "epoch": 0.3883821411425828, "grad_norm": 0.30894505977630615, "learning_rate": 5e-05, "loss": 1.4989, "step": 2427 }, { "epoch": 0.38854216674667946, "grad_norm": 0.3160769045352936, "learning_rate": 5e-05, "loss": 1.506, "step": 2428 }, { "epoch": 0.38870219235077613, "grad_norm": 0.30666452646255493, "learning_rate": 5e-05, "loss": 1.5472, "step": 2429 }, { "epoch": 0.3888622179548728, "grad_norm": 0.3185930848121643, "learning_rate": 5e-05, "loss": 1.5075, "step": 2430 }, { "epoch": 0.3890222435589694, "grad_norm": 0.3182253837585449, "learning_rate": 5e-05, "loss": 1.5487, "step": 2431 }, { "epoch": 0.3891822691630661, "grad_norm": 0.31894177198410034, "learning_rate": 5e-05, "loss": 1.5676, "step": 2432 }, { "epoch": 0.38934229476716276, "grad_norm": 0.30778321623802185, "learning_rate": 5e-05, "loss": 1.527, "step": 2433 }, { "epoch": 0.3895023203712594, "grad_norm": 0.3001821041107178, "learning_rate": 5e-05, "loss": 1.4915, "step": 2434 }, { "epoch": 0.38966234597535604, "grad_norm": 0.3070475161075592, "learning_rate": 5e-05, "loss": 1.5168, "step": 2435 }, { "epoch": 0.3898223715794527, "grad_norm": 0.30263814330101013, "learning_rate": 5e-05, "loss": 1.4576, "step": 2436 }, { "epoch": 0.3899823971835494, "grad_norm": 0.314899742603302, "learning_rate": 5e-05, "loss": 1.5428, "step": 2437 }, { "epoch": 0.39014242278764605, "grad_norm": 0.31422221660614014, "learning_rate": 5e-05, "loss": 1.5432, "step": 2438 }, { "epoch": 0.39030244839174266, "grad_norm": 0.3076196312904358, "learning_rate": 5e-05, "loss": 1.4631, "step": 2439 }, { "epoch": 0.39046247399583933, "grad_norm": 0.3157544434070587, "learning_rate": 5e-05, "loss": 1.55, "step": 2440 }, { "epoch": 0.390622499599936, "grad_norm": 0.3105469346046448, "learning_rate": 5e-05, "loss": 1.5219, "step": 2441 }, { "epoch": 0.39078252520403267, "grad_norm": 0.3196718990802765, "learning_rate": 5e-05, "loss": 1.5469, "step": 2442 }, { "epoch": 0.3909425508081293, "grad_norm": 0.31365811824798584, "learning_rate": 5e-05, "loss": 1.5796, "step": 2443 }, { "epoch": 0.39110257641222596, "grad_norm": 0.3111330270767212, "learning_rate": 5e-05, "loss": 1.541, "step": 2444 }, { "epoch": 0.3912626020163226, "grad_norm": 0.3132492005825043, "learning_rate": 5e-05, "loss": 1.5853, "step": 2445 }, { "epoch": 0.3914226276204193, "grad_norm": 0.31502825021743774, "learning_rate": 5e-05, "loss": 1.5632, "step": 2446 }, { "epoch": 0.3915826532245159, "grad_norm": 0.31537333130836487, "learning_rate": 5e-05, "loss": 1.522, "step": 2447 }, { "epoch": 0.3917426788286126, "grad_norm": 0.302891343832016, "learning_rate": 5e-05, "loss": 1.5438, "step": 2448 }, { "epoch": 0.39190270443270925, "grad_norm": 0.3159264028072357, "learning_rate": 5e-05, "loss": 1.5264, "step": 2449 }, { "epoch": 0.39206273003680586, "grad_norm": 0.3149125576019287, "learning_rate": 5e-05, "loss": 1.4725, "step": 2450 }, { "epoch": 0.39222275564090253, "grad_norm": 0.31692662835121155, "learning_rate": 5e-05, "loss": 1.5646, "step": 2451 }, { "epoch": 0.3923827812449992, "grad_norm": 0.29956862330436707, "learning_rate": 5e-05, "loss": 1.4656, "step": 2452 }, { "epoch": 0.39254280684909587, "grad_norm": 0.30005311965942383, "learning_rate": 5e-05, "loss": 1.5117, "step": 2453 }, { "epoch": 0.3927028324531925, "grad_norm": 0.3093448281288147, "learning_rate": 5e-05, "loss": 1.5938, "step": 2454 }, { "epoch": 0.39286285805728915, "grad_norm": 0.315364807844162, "learning_rate": 5e-05, "loss": 1.4597, "step": 2455 }, { "epoch": 0.3930228836613858, "grad_norm": 0.3358585834503174, "learning_rate": 5e-05, "loss": 1.6423, "step": 2456 }, { "epoch": 0.3931829092654825, "grad_norm": 0.3262915015220642, "learning_rate": 5e-05, "loss": 1.5415, "step": 2457 }, { "epoch": 0.3933429348695791, "grad_norm": 0.32926347851753235, "learning_rate": 5e-05, "loss": 1.5182, "step": 2458 }, { "epoch": 0.3935029604736758, "grad_norm": 0.30288997292518616, "learning_rate": 5e-05, "loss": 1.4364, "step": 2459 }, { "epoch": 0.39366298607777245, "grad_norm": 0.3161441385746002, "learning_rate": 5e-05, "loss": 1.489, "step": 2460 }, { "epoch": 0.3938230116818691, "grad_norm": 0.3209574520587921, "learning_rate": 5e-05, "loss": 1.5566, "step": 2461 }, { "epoch": 0.39398303728596573, "grad_norm": 0.31100592017173767, "learning_rate": 5e-05, "loss": 1.5628, "step": 2462 }, { "epoch": 0.3941430628900624, "grad_norm": 0.3241320252418518, "learning_rate": 5e-05, "loss": 1.523, "step": 2463 }, { "epoch": 0.39430308849415907, "grad_norm": 0.32244178652763367, "learning_rate": 5e-05, "loss": 1.5129, "step": 2464 }, { "epoch": 0.39446311409825574, "grad_norm": 0.3131848871707916, "learning_rate": 5e-05, "loss": 1.5095, "step": 2465 }, { "epoch": 0.39462313970235235, "grad_norm": 0.3235829472541809, "learning_rate": 5e-05, "loss": 1.5351, "step": 2466 }, { "epoch": 0.394783165306449, "grad_norm": 0.3401525914669037, "learning_rate": 5e-05, "loss": 1.5414, "step": 2467 }, { "epoch": 0.3949431909105457, "grad_norm": 0.31661272048950195, "learning_rate": 5e-05, "loss": 1.5579, "step": 2468 }, { "epoch": 0.39510321651464236, "grad_norm": 0.31583544611930847, "learning_rate": 5e-05, "loss": 1.5019, "step": 2469 }, { "epoch": 0.395263242118739, "grad_norm": 0.3382437527179718, "learning_rate": 5e-05, "loss": 1.5446, "step": 2470 }, { "epoch": 0.39542326772283565, "grad_norm": 0.3197768032550812, "learning_rate": 5e-05, "loss": 1.5619, "step": 2471 }, { "epoch": 0.3955832933269323, "grad_norm": 0.3245241343975067, "learning_rate": 5e-05, "loss": 1.5966, "step": 2472 }, { "epoch": 0.395743318931029, "grad_norm": 0.3215691149234772, "learning_rate": 5e-05, "loss": 1.5376, "step": 2473 }, { "epoch": 0.3959033445351256, "grad_norm": 0.3186042904853821, "learning_rate": 5e-05, "loss": 1.5842, "step": 2474 }, { "epoch": 0.39606337013922227, "grad_norm": 0.30880770087242126, "learning_rate": 5e-05, "loss": 1.5224, "step": 2475 }, { "epoch": 0.39622339574331894, "grad_norm": 0.3410595953464508, "learning_rate": 5e-05, "loss": 1.4614, "step": 2476 }, { "epoch": 0.3963834213474156, "grad_norm": 0.3125253915786743, "learning_rate": 5e-05, "loss": 1.5529, "step": 2477 }, { "epoch": 0.3965434469515122, "grad_norm": 0.314743310213089, "learning_rate": 5e-05, "loss": 1.522, "step": 2478 }, { "epoch": 0.3967034725556089, "grad_norm": 0.3350593149662018, "learning_rate": 5e-05, "loss": 1.5467, "step": 2479 }, { "epoch": 0.39686349815970556, "grad_norm": 0.32156652212142944, "learning_rate": 5e-05, "loss": 1.5253, "step": 2480 }, { "epoch": 0.39702352376380223, "grad_norm": 0.31919464468955994, "learning_rate": 5e-05, "loss": 1.514, "step": 2481 }, { "epoch": 0.39718354936789885, "grad_norm": 0.3152759373188019, "learning_rate": 5e-05, "loss": 1.5281, "step": 2482 }, { "epoch": 0.3973435749719955, "grad_norm": 0.3139837682247162, "learning_rate": 5e-05, "loss": 1.5509, "step": 2483 }, { "epoch": 0.3975036005760922, "grad_norm": 0.3123772442340851, "learning_rate": 5e-05, "loss": 1.5634, "step": 2484 }, { "epoch": 0.39766362618018886, "grad_norm": 0.3214738965034485, "learning_rate": 5e-05, "loss": 1.5382, "step": 2485 }, { "epoch": 0.39782365178428547, "grad_norm": 0.31827208399772644, "learning_rate": 5e-05, "loss": 1.5106, "step": 2486 }, { "epoch": 0.39798367738838214, "grad_norm": 0.3209194540977478, "learning_rate": 5e-05, "loss": 1.4412, "step": 2487 }, { "epoch": 0.3981437029924788, "grad_norm": 0.31921204924583435, "learning_rate": 5e-05, "loss": 1.4651, "step": 2488 }, { "epoch": 0.3983037285965755, "grad_norm": 0.31541624665260315, "learning_rate": 5e-05, "loss": 1.4644, "step": 2489 }, { "epoch": 0.3984637542006721, "grad_norm": 0.31946036219596863, "learning_rate": 5e-05, "loss": 1.5366, "step": 2490 }, { "epoch": 0.39862377980476876, "grad_norm": 0.33022916316986084, "learning_rate": 5e-05, "loss": 1.5487, "step": 2491 }, { "epoch": 0.39878380540886543, "grad_norm": 0.3114371597766876, "learning_rate": 5e-05, "loss": 1.502, "step": 2492 }, { "epoch": 0.39894383101296205, "grad_norm": 0.32832813262939453, "learning_rate": 5e-05, "loss": 1.4547, "step": 2493 }, { "epoch": 0.3991038566170587, "grad_norm": 0.3281005620956421, "learning_rate": 5e-05, "loss": 1.5346, "step": 2494 }, { "epoch": 0.3992638822211554, "grad_norm": 0.3239437937736511, "learning_rate": 5e-05, "loss": 1.5269, "step": 2495 }, { "epoch": 0.39942390782525206, "grad_norm": 0.3296954333782196, "learning_rate": 5e-05, "loss": 1.5262, "step": 2496 }, { "epoch": 0.39958393342934867, "grad_norm": 0.3211473226547241, "learning_rate": 5e-05, "loss": 1.5977, "step": 2497 }, { "epoch": 0.39974395903344534, "grad_norm": 0.33362680673599243, "learning_rate": 5e-05, "loss": 1.5025, "step": 2498 }, { "epoch": 0.399903984637542, "grad_norm": 0.3184685707092285, "learning_rate": 5e-05, "loss": 1.5218, "step": 2499 }, { "epoch": 0.4000640102416387, "grad_norm": 0.3148418962955475, "learning_rate": 5e-05, "loss": 1.5207, "step": 2500 }, { "epoch": 0.4002240358457353, "grad_norm": 0.31432273983955383, "learning_rate": 5e-05, "loss": 1.4979, "step": 2501 }, { "epoch": 0.40038406144983196, "grad_norm": 0.32745757699012756, "learning_rate": 5e-05, "loss": 1.5134, "step": 2502 }, { "epoch": 0.40054408705392863, "grad_norm": 0.3169113099575043, "learning_rate": 5e-05, "loss": 1.5032, "step": 2503 }, { "epoch": 0.4007041126580253, "grad_norm": 0.3230629861354828, "learning_rate": 5e-05, "loss": 1.5764, "step": 2504 }, { "epoch": 0.4008641382621219, "grad_norm": 0.3182300329208374, "learning_rate": 5e-05, "loss": 1.5451, "step": 2505 }, { "epoch": 0.4010241638662186, "grad_norm": 0.3090181052684784, "learning_rate": 5e-05, "loss": 1.5174, "step": 2506 }, { "epoch": 0.40118418947031526, "grad_norm": 0.3307109475135803, "learning_rate": 5e-05, "loss": 1.5243, "step": 2507 }, { "epoch": 0.4013442150744119, "grad_norm": 0.3166615068912506, "learning_rate": 5e-05, "loss": 1.4781, "step": 2508 }, { "epoch": 0.40150424067850854, "grad_norm": 0.3275916874408722, "learning_rate": 5e-05, "loss": 1.6028, "step": 2509 }, { "epoch": 0.4016642662826052, "grad_norm": 0.3151150345802307, "learning_rate": 5e-05, "loss": 1.5812, "step": 2510 }, { "epoch": 0.4018242918867019, "grad_norm": 0.3008846342563629, "learning_rate": 5e-05, "loss": 1.4688, "step": 2511 }, { "epoch": 0.40198431749079855, "grad_norm": 0.31144607067108154, "learning_rate": 5e-05, "loss": 1.5441, "step": 2512 }, { "epoch": 0.40214434309489516, "grad_norm": 0.3152765929698944, "learning_rate": 5e-05, "loss": 1.4764, "step": 2513 }, { "epoch": 0.40230436869899183, "grad_norm": 0.3139379918575287, "learning_rate": 5e-05, "loss": 1.5051, "step": 2514 }, { "epoch": 0.4024643943030885, "grad_norm": 0.30677032470703125, "learning_rate": 5e-05, "loss": 1.5208, "step": 2515 }, { "epoch": 0.40262441990718517, "grad_norm": 0.3199198246002197, "learning_rate": 5e-05, "loss": 1.5575, "step": 2516 }, { "epoch": 0.4027844455112818, "grad_norm": 0.3221912384033203, "learning_rate": 5e-05, "loss": 1.5924, "step": 2517 }, { "epoch": 0.40294447111537846, "grad_norm": 0.3080616891384125, "learning_rate": 5e-05, "loss": 1.4813, "step": 2518 }, { "epoch": 0.4031044967194751, "grad_norm": 0.3073028028011322, "learning_rate": 5e-05, "loss": 1.4891, "step": 2519 }, { "epoch": 0.4032645223235718, "grad_norm": 0.3165455460548401, "learning_rate": 5e-05, "loss": 1.5048, "step": 2520 }, { "epoch": 0.4034245479276684, "grad_norm": 0.32055097818374634, "learning_rate": 5e-05, "loss": 1.5521, "step": 2521 }, { "epoch": 0.4035845735317651, "grad_norm": 0.3104937970638275, "learning_rate": 5e-05, "loss": 1.4483, "step": 2522 }, { "epoch": 0.40374459913586175, "grad_norm": 0.3194759488105774, "learning_rate": 5e-05, "loss": 1.5602, "step": 2523 }, { "epoch": 0.4039046247399584, "grad_norm": 0.3258310854434967, "learning_rate": 5e-05, "loss": 1.578, "step": 2524 }, { "epoch": 0.40406465034405503, "grad_norm": 0.30656155943870544, "learning_rate": 5e-05, "loss": 1.4851, "step": 2525 }, { "epoch": 0.4042246759481517, "grad_norm": 0.31642287969589233, "learning_rate": 5e-05, "loss": 1.6596, "step": 2526 }, { "epoch": 0.40438470155224837, "grad_norm": 0.31986889243125916, "learning_rate": 5e-05, "loss": 1.5479, "step": 2527 }, { "epoch": 0.40454472715634504, "grad_norm": 0.3122445344924927, "learning_rate": 5e-05, "loss": 1.5317, "step": 2528 }, { "epoch": 0.40470475276044166, "grad_norm": 0.31321465969085693, "learning_rate": 5e-05, "loss": 1.5442, "step": 2529 }, { "epoch": 0.4048647783645383, "grad_norm": 0.3072795867919922, "learning_rate": 5e-05, "loss": 1.4781, "step": 2530 }, { "epoch": 0.405024803968635, "grad_norm": 0.31946825981140137, "learning_rate": 5e-05, "loss": 1.4862, "step": 2531 }, { "epoch": 0.40518482957273166, "grad_norm": 0.31993159651756287, "learning_rate": 5e-05, "loss": 1.5683, "step": 2532 }, { "epoch": 0.4053448551768283, "grad_norm": 0.31489625573158264, "learning_rate": 5e-05, "loss": 1.4424, "step": 2533 }, { "epoch": 0.40550488078092495, "grad_norm": 0.33108022809028625, "learning_rate": 5e-05, "loss": 1.5618, "step": 2534 }, { "epoch": 0.4056649063850216, "grad_norm": 0.31765416264533997, "learning_rate": 5e-05, "loss": 1.4849, "step": 2535 }, { "epoch": 0.40582493198911823, "grad_norm": 0.32128509879112244, "learning_rate": 5e-05, "loss": 1.5671, "step": 2536 }, { "epoch": 0.4059849575932149, "grad_norm": 0.3172350823879242, "learning_rate": 5e-05, "loss": 1.5327, "step": 2537 }, { "epoch": 0.40614498319731157, "grad_norm": 0.3183475136756897, "learning_rate": 5e-05, "loss": 1.5534, "step": 2538 }, { "epoch": 0.40630500880140824, "grad_norm": 0.3176467716693878, "learning_rate": 5e-05, "loss": 1.4458, "step": 2539 }, { "epoch": 0.40646503440550485, "grad_norm": 0.31970295310020447, "learning_rate": 5e-05, "loss": 1.5606, "step": 2540 }, { "epoch": 0.4066250600096015, "grad_norm": 0.3192010223865509, "learning_rate": 5e-05, "loss": 1.5179, "step": 2541 }, { "epoch": 0.4067850856136982, "grad_norm": 0.3116588890552521, "learning_rate": 5e-05, "loss": 1.4748, "step": 2542 }, { "epoch": 0.40694511121779486, "grad_norm": 0.3256261646747589, "learning_rate": 5e-05, "loss": 1.5606, "step": 2543 }, { "epoch": 0.4071051368218915, "grad_norm": 0.31673145294189453, "learning_rate": 5e-05, "loss": 1.538, "step": 2544 }, { "epoch": 0.40726516242598815, "grad_norm": 0.31271347403526306, "learning_rate": 5e-05, "loss": 1.5478, "step": 2545 }, { "epoch": 0.4074251880300848, "grad_norm": 0.3117271959781647, "learning_rate": 5e-05, "loss": 1.5474, "step": 2546 }, { "epoch": 0.4075852136341815, "grad_norm": 0.3113075792789459, "learning_rate": 5e-05, "loss": 1.5369, "step": 2547 }, { "epoch": 0.4077452392382781, "grad_norm": 0.3116813600063324, "learning_rate": 5e-05, "loss": 1.4974, "step": 2548 }, { "epoch": 0.40790526484237477, "grad_norm": 0.3233676552772522, "learning_rate": 5e-05, "loss": 1.4899, "step": 2549 }, { "epoch": 0.40806529044647144, "grad_norm": 0.2936322093009949, "learning_rate": 5e-05, "loss": 1.3944, "step": 2550 }, { "epoch": 0.4082253160505681, "grad_norm": 0.3210178017616272, "learning_rate": 5e-05, "loss": 1.5277, "step": 2551 }, { "epoch": 0.4083853416546647, "grad_norm": 0.3101109564304352, "learning_rate": 5e-05, "loss": 1.5407, "step": 2552 }, { "epoch": 0.4085453672587614, "grad_norm": 0.3202590048313141, "learning_rate": 5e-05, "loss": 1.447, "step": 2553 }, { "epoch": 0.40870539286285806, "grad_norm": 0.31972789764404297, "learning_rate": 5e-05, "loss": 1.5708, "step": 2554 }, { "epoch": 0.40886541846695473, "grad_norm": 0.30041274428367615, "learning_rate": 5e-05, "loss": 1.3939, "step": 2555 }, { "epoch": 0.40902544407105135, "grad_norm": 0.3284178078174591, "learning_rate": 5e-05, "loss": 1.6069, "step": 2556 }, { "epoch": 0.409185469675148, "grad_norm": 0.30556970834732056, "learning_rate": 5e-05, "loss": 1.4821, "step": 2557 }, { "epoch": 0.4093454952792447, "grad_norm": 0.31600338220596313, "learning_rate": 5e-05, "loss": 1.5599, "step": 2558 }, { "epoch": 0.40950552088334136, "grad_norm": 0.4316157400608063, "learning_rate": 5e-05, "loss": 1.5533, "step": 2559 }, { "epoch": 0.40966554648743797, "grad_norm": 0.31210339069366455, "learning_rate": 5e-05, "loss": 1.5881, "step": 2560 }, { "epoch": 0.40982557209153464, "grad_norm": 0.31226587295532227, "learning_rate": 5e-05, "loss": 1.477, "step": 2561 }, { "epoch": 0.4099855976956313, "grad_norm": 0.3241964876651764, "learning_rate": 5e-05, "loss": 1.5534, "step": 2562 }, { "epoch": 0.410145623299728, "grad_norm": 0.33663204312324524, "learning_rate": 5e-05, "loss": 1.5843, "step": 2563 }, { "epoch": 0.4103056489038246, "grad_norm": 0.30508294701576233, "learning_rate": 5e-05, "loss": 1.4342, "step": 2564 }, { "epoch": 0.41046567450792126, "grad_norm": 0.32786911725997925, "learning_rate": 5e-05, "loss": 1.5017, "step": 2565 }, { "epoch": 0.41062570011201793, "grad_norm": 0.3276768624782562, "learning_rate": 5e-05, "loss": 1.5067, "step": 2566 }, { "epoch": 0.4107857257161146, "grad_norm": 0.3321877419948578, "learning_rate": 5e-05, "loss": 1.5647, "step": 2567 }, { "epoch": 0.4109457513202112, "grad_norm": 0.316996693611145, "learning_rate": 5e-05, "loss": 1.4833, "step": 2568 }, { "epoch": 0.4111057769243079, "grad_norm": 0.31366637349128723, "learning_rate": 5e-05, "loss": 1.5591, "step": 2569 }, { "epoch": 0.41126580252840456, "grad_norm": 0.32052120566368103, "learning_rate": 5e-05, "loss": 1.6043, "step": 2570 }, { "epoch": 0.4114258281325012, "grad_norm": 0.3171434700489044, "learning_rate": 5e-05, "loss": 1.512, "step": 2571 }, { "epoch": 0.41158585373659784, "grad_norm": 0.3150210976600647, "learning_rate": 5e-05, "loss": 1.4547, "step": 2572 }, { "epoch": 0.4117458793406945, "grad_norm": 0.34689903259277344, "learning_rate": 5e-05, "loss": 1.5427, "step": 2573 }, { "epoch": 0.4119059049447912, "grad_norm": 0.30883926153182983, "learning_rate": 5e-05, "loss": 1.5216, "step": 2574 }, { "epoch": 0.41206593054888785, "grad_norm": 0.31527814269065857, "learning_rate": 5e-05, "loss": 1.4809, "step": 2575 }, { "epoch": 0.41222595615298446, "grad_norm": 0.34035906195640564, "learning_rate": 5e-05, "loss": 1.492, "step": 2576 }, { "epoch": 0.41238598175708113, "grad_norm": 0.30906859040260315, "learning_rate": 5e-05, "loss": 1.499, "step": 2577 }, { "epoch": 0.4125460073611778, "grad_norm": 0.328313946723938, "learning_rate": 5e-05, "loss": 1.5605, "step": 2578 }, { "epoch": 0.4127060329652744, "grad_norm": 0.32618775963783264, "learning_rate": 5e-05, "loss": 1.5544, "step": 2579 }, { "epoch": 0.4128660585693711, "grad_norm": 0.30399930477142334, "learning_rate": 5e-05, "loss": 1.5231, "step": 2580 }, { "epoch": 0.41302608417346776, "grad_norm": 0.3142101466655731, "learning_rate": 5e-05, "loss": 1.5153, "step": 2581 }, { "epoch": 0.4131861097775644, "grad_norm": 0.3265177607536316, "learning_rate": 5e-05, "loss": 1.5554, "step": 2582 }, { "epoch": 0.41334613538166104, "grad_norm": 0.3113693594932556, "learning_rate": 5e-05, "loss": 1.5225, "step": 2583 }, { "epoch": 0.4135061609857577, "grad_norm": 0.31952881813049316, "learning_rate": 5e-05, "loss": 1.5476, "step": 2584 }, { "epoch": 0.4136661865898544, "grad_norm": 0.30865928530693054, "learning_rate": 5e-05, "loss": 1.5374, "step": 2585 }, { "epoch": 0.41382621219395105, "grad_norm": 0.3123105466365814, "learning_rate": 5e-05, "loss": 1.5164, "step": 2586 }, { "epoch": 0.41398623779804766, "grad_norm": 0.31665122509002686, "learning_rate": 5e-05, "loss": 1.5031, "step": 2587 }, { "epoch": 0.41414626340214433, "grad_norm": 0.3267775774002075, "learning_rate": 5e-05, "loss": 1.5016, "step": 2588 }, { "epoch": 0.414306289006241, "grad_norm": 0.31593093276023865, "learning_rate": 5e-05, "loss": 1.5486, "step": 2589 }, { "epoch": 0.41446631461033767, "grad_norm": 0.31786391139030457, "learning_rate": 5e-05, "loss": 1.5471, "step": 2590 }, { "epoch": 0.4146263402144343, "grad_norm": 0.31362733244895935, "learning_rate": 5e-05, "loss": 1.5296, "step": 2591 }, { "epoch": 0.41478636581853096, "grad_norm": 0.322169691324234, "learning_rate": 5e-05, "loss": 1.4918, "step": 2592 }, { "epoch": 0.4149463914226276, "grad_norm": 0.31663978099823, "learning_rate": 5e-05, "loss": 1.5122, "step": 2593 }, { "epoch": 0.4151064170267243, "grad_norm": 0.32707908749580383, "learning_rate": 5e-05, "loss": 1.5774, "step": 2594 }, { "epoch": 0.4152664426308209, "grad_norm": 0.309177428483963, "learning_rate": 5e-05, "loss": 1.4677, "step": 2595 }, { "epoch": 0.4154264682349176, "grad_norm": 0.31457075476646423, "learning_rate": 5e-05, "loss": 1.5279, "step": 2596 }, { "epoch": 0.41558649383901425, "grad_norm": 0.3172902464866638, "learning_rate": 5e-05, "loss": 1.5081, "step": 2597 }, { "epoch": 0.4157465194431109, "grad_norm": 0.31486478447914124, "learning_rate": 5e-05, "loss": 1.4604, "step": 2598 }, { "epoch": 0.41590654504720753, "grad_norm": 0.3200485110282898, "learning_rate": 5e-05, "loss": 1.5137, "step": 2599 }, { "epoch": 0.4160665706513042, "grad_norm": 0.30796557664871216, "learning_rate": 5e-05, "loss": 1.4719, "step": 2600 }, { "epoch": 0.41622659625540087, "grad_norm": 0.3364667296409607, "learning_rate": 5e-05, "loss": 1.6029, "step": 2601 }, { "epoch": 0.41638662185949754, "grad_norm": 0.31708595156669617, "learning_rate": 5e-05, "loss": 1.5874, "step": 2602 }, { "epoch": 0.41654664746359416, "grad_norm": 0.3087363839149475, "learning_rate": 5e-05, "loss": 1.5109, "step": 2603 }, { "epoch": 0.4167066730676908, "grad_norm": 0.3217846155166626, "learning_rate": 5e-05, "loss": 1.6102, "step": 2604 }, { "epoch": 0.4168666986717875, "grad_norm": 0.3235575556755066, "learning_rate": 5e-05, "loss": 1.522, "step": 2605 }, { "epoch": 0.41702672427588416, "grad_norm": 0.3092065155506134, "learning_rate": 5e-05, "loss": 1.5859, "step": 2606 }, { "epoch": 0.4171867498799808, "grad_norm": 0.3225102722644806, "learning_rate": 5e-05, "loss": 1.5034, "step": 2607 }, { "epoch": 0.41734677548407745, "grad_norm": 0.3119697868824005, "learning_rate": 5e-05, "loss": 1.5598, "step": 2608 }, { "epoch": 0.4175068010881741, "grad_norm": 0.3210020959377289, "learning_rate": 5e-05, "loss": 1.5249, "step": 2609 }, { "epoch": 0.4176668266922708, "grad_norm": 0.31582772731781006, "learning_rate": 5e-05, "loss": 1.5189, "step": 2610 }, { "epoch": 0.4178268522963674, "grad_norm": 0.325735479593277, "learning_rate": 5e-05, "loss": 1.5449, "step": 2611 }, { "epoch": 0.41798687790046407, "grad_norm": 0.3427794575691223, "learning_rate": 5e-05, "loss": 1.5906, "step": 2612 }, { "epoch": 0.41814690350456074, "grad_norm": 0.3175331652164459, "learning_rate": 5e-05, "loss": 1.5115, "step": 2613 }, { "epoch": 0.4183069291086574, "grad_norm": 0.3267695903778076, "learning_rate": 5e-05, "loss": 1.5449, "step": 2614 }, { "epoch": 0.418466954712754, "grad_norm": 0.3374026417732239, "learning_rate": 5e-05, "loss": 1.5719, "step": 2615 }, { "epoch": 0.4186269803168507, "grad_norm": 0.3167579174041748, "learning_rate": 5e-05, "loss": 1.5781, "step": 2616 }, { "epoch": 0.41878700592094736, "grad_norm": 0.3300137221813202, "learning_rate": 5e-05, "loss": 1.471, "step": 2617 }, { "epoch": 0.41894703152504403, "grad_norm": 0.33106666803359985, "learning_rate": 5e-05, "loss": 1.5382, "step": 2618 }, { "epoch": 0.41910705712914065, "grad_norm": 0.3126417398452759, "learning_rate": 5e-05, "loss": 1.486, "step": 2619 }, { "epoch": 0.4192670827332373, "grad_norm": 0.32972776889801025, "learning_rate": 5e-05, "loss": 1.5297, "step": 2620 }, { "epoch": 0.419427108337334, "grad_norm": 0.3124549090862274, "learning_rate": 5e-05, "loss": 1.5204, "step": 2621 }, { "epoch": 0.4195871339414306, "grad_norm": 0.314335435628891, "learning_rate": 5e-05, "loss": 1.4974, "step": 2622 }, { "epoch": 0.41974715954552727, "grad_norm": 0.3159481883049011, "learning_rate": 5e-05, "loss": 1.4863, "step": 2623 }, { "epoch": 0.41990718514962394, "grad_norm": 0.3204735517501831, "learning_rate": 5e-05, "loss": 1.5446, "step": 2624 }, { "epoch": 0.4200672107537206, "grad_norm": 0.31419554352760315, "learning_rate": 5e-05, "loss": 1.5294, "step": 2625 }, { "epoch": 0.4202272363578172, "grad_norm": 0.3029853105545044, "learning_rate": 5e-05, "loss": 1.5024, "step": 2626 }, { "epoch": 0.4203872619619139, "grad_norm": 0.30827420949935913, "learning_rate": 5e-05, "loss": 1.4905, "step": 2627 }, { "epoch": 0.42054728756601056, "grad_norm": 0.3138279318809509, "learning_rate": 5e-05, "loss": 1.5974, "step": 2628 }, { "epoch": 0.42070731317010723, "grad_norm": 0.31017887592315674, "learning_rate": 5e-05, "loss": 1.523, "step": 2629 }, { "epoch": 0.42086733877420385, "grad_norm": 0.3130991458892822, "learning_rate": 5e-05, "loss": 1.4728, "step": 2630 }, { "epoch": 0.4210273643783005, "grad_norm": 0.32156282663345337, "learning_rate": 5e-05, "loss": 1.5177, "step": 2631 }, { "epoch": 0.4211873899823972, "grad_norm": 0.32986587285995483, "learning_rate": 5e-05, "loss": 1.6137, "step": 2632 }, { "epoch": 0.42134741558649386, "grad_norm": 0.3186715245246887, "learning_rate": 5e-05, "loss": 1.5222, "step": 2633 }, { "epoch": 0.42150744119059047, "grad_norm": 0.31895533204078674, "learning_rate": 5e-05, "loss": 1.4643, "step": 2634 }, { "epoch": 0.42166746679468714, "grad_norm": 0.32787802815437317, "learning_rate": 5e-05, "loss": 1.6121, "step": 2635 }, { "epoch": 0.4218274923987838, "grad_norm": 0.31836146116256714, "learning_rate": 5e-05, "loss": 1.56, "step": 2636 }, { "epoch": 0.4219875180028805, "grad_norm": 0.3134704530239105, "learning_rate": 5e-05, "loss": 1.5027, "step": 2637 }, { "epoch": 0.4221475436069771, "grad_norm": 0.32567137479782104, "learning_rate": 5e-05, "loss": 1.5531, "step": 2638 }, { "epoch": 0.42230756921107376, "grad_norm": 0.3151669502258301, "learning_rate": 5e-05, "loss": 1.4745, "step": 2639 }, { "epoch": 0.42246759481517043, "grad_norm": 0.309103786945343, "learning_rate": 5e-05, "loss": 1.4573, "step": 2640 }, { "epoch": 0.4226276204192671, "grad_norm": 0.3183147609233856, "learning_rate": 5e-05, "loss": 1.5121, "step": 2641 }, { "epoch": 0.4227876460233637, "grad_norm": 0.30580851435661316, "learning_rate": 5e-05, "loss": 1.5133, "step": 2642 }, { "epoch": 0.4229476716274604, "grad_norm": 0.3177594244480133, "learning_rate": 5e-05, "loss": 1.5343, "step": 2643 }, { "epoch": 0.42310769723155706, "grad_norm": 0.3281997740268707, "learning_rate": 5e-05, "loss": 1.6026, "step": 2644 }, { "epoch": 0.4232677228356537, "grad_norm": 0.32528698444366455, "learning_rate": 5e-05, "loss": 1.5891, "step": 2645 }, { "epoch": 0.42342774843975034, "grad_norm": 0.3199455142021179, "learning_rate": 5e-05, "loss": 1.563, "step": 2646 }, { "epoch": 0.423587774043847, "grad_norm": 0.32649943232536316, "learning_rate": 5e-05, "loss": 1.6074, "step": 2647 }, { "epoch": 0.4237477996479437, "grad_norm": 0.32055848836898804, "learning_rate": 5e-05, "loss": 1.5858, "step": 2648 }, { "epoch": 0.42390782525204035, "grad_norm": 0.30284130573272705, "learning_rate": 5e-05, "loss": 1.5111, "step": 2649 }, { "epoch": 0.42406785085613696, "grad_norm": 0.32186877727508545, "learning_rate": 5e-05, "loss": 1.5456, "step": 2650 }, { "epoch": 0.42422787646023363, "grad_norm": 0.31584668159484863, "learning_rate": 5e-05, "loss": 1.5176, "step": 2651 }, { "epoch": 0.4243879020643303, "grad_norm": 0.3175991475582123, "learning_rate": 5e-05, "loss": 1.5529, "step": 2652 }, { "epoch": 0.42454792766842697, "grad_norm": 0.29653695225715637, "learning_rate": 5e-05, "loss": 1.4161, "step": 2653 }, { "epoch": 0.4247079532725236, "grad_norm": 0.32229647040367126, "learning_rate": 5e-05, "loss": 1.5579, "step": 2654 }, { "epoch": 0.42486797887662026, "grad_norm": 0.31348660588264465, "learning_rate": 5e-05, "loss": 1.5322, "step": 2655 }, { "epoch": 0.4250280044807169, "grad_norm": 0.32131248712539673, "learning_rate": 5e-05, "loss": 1.537, "step": 2656 }, { "epoch": 0.4251880300848136, "grad_norm": 0.30684882402420044, "learning_rate": 5e-05, "loss": 1.5053, "step": 2657 }, { "epoch": 0.4253480556889102, "grad_norm": 0.3243004381656647, "learning_rate": 5e-05, "loss": 1.5733, "step": 2658 }, { "epoch": 0.4255080812930069, "grad_norm": 0.3202950060367584, "learning_rate": 5e-05, "loss": 1.5523, "step": 2659 }, { "epoch": 0.42566810689710355, "grad_norm": 0.31372717022895813, "learning_rate": 5e-05, "loss": 1.519, "step": 2660 }, { "epoch": 0.4258281325012002, "grad_norm": 0.30873653292655945, "learning_rate": 5e-05, "loss": 1.416, "step": 2661 }, { "epoch": 0.42598815810529683, "grad_norm": 0.32357487082481384, "learning_rate": 5e-05, "loss": 1.5388, "step": 2662 }, { "epoch": 0.4261481837093935, "grad_norm": 0.3082636892795563, "learning_rate": 5e-05, "loss": 1.434, "step": 2663 }, { "epoch": 0.42630820931349017, "grad_norm": 0.3144287168979645, "learning_rate": 5e-05, "loss": 1.4841, "step": 2664 }, { "epoch": 0.42646823491758684, "grad_norm": 0.32046589255332947, "learning_rate": 5e-05, "loss": 1.5629, "step": 2665 }, { "epoch": 0.42662826052168346, "grad_norm": 0.3234570622444153, "learning_rate": 5e-05, "loss": 1.5813, "step": 2666 }, { "epoch": 0.4267882861257801, "grad_norm": 0.3293624520301819, "learning_rate": 5e-05, "loss": 1.5008, "step": 2667 }, { "epoch": 0.4269483117298768, "grad_norm": 0.31021854281425476, "learning_rate": 5e-05, "loss": 1.5113, "step": 2668 }, { "epoch": 0.4271083373339734, "grad_norm": 0.31475940346717834, "learning_rate": 5e-05, "loss": 1.4837, "step": 2669 }, { "epoch": 0.4272683629380701, "grad_norm": 0.313419371843338, "learning_rate": 5e-05, "loss": 1.4628, "step": 2670 }, { "epoch": 0.42742838854216675, "grad_norm": 0.31888312101364136, "learning_rate": 5e-05, "loss": 1.4913, "step": 2671 }, { "epoch": 0.4275884141462634, "grad_norm": 0.3133177161216736, "learning_rate": 5e-05, "loss": 1.5291, "step": 2672 }, { "epoch": 0.42774843975036003, "grad_norm": 0.3292069733142853, "learning_rate": 5e-05, "loss": 1.5591, "step": 2673 }, { "epoch": 0.4279084653544567, "grad_norm": 0.3277050852775574, "learning_rate": 5e-05, "loss": 1.6015, "step": 2674 }, { "epoch": 0.42806849095855337, "grad_norm": 0.3209400773048401, "learning_rate": 5e-05, "loss": 1.5874, "step": 2675 }, { "epoch": 0.42822851656265004, "grad_norm": 0.32008203864097595, "learning_rate": 5e-05, "loss": 1.5511, "step": 2676 }, { "epoch": 0.42838854216674666, "grad_norm": 0.3173503577709198, "learning_rate": 5e-05, "loss": 1.5887, "step": 2677 }, { "epoch": 0.4285485677708433, "grad_norm": 0.3167775869369507, "learning_rate": 5e-05, "loss": 1.5173, "step": 2678 }, { "epoch": 0.42870859337494, "grad_norm": 0.30267342925071716, "learning_rate": 5e-05, "loss": 1.4779, "step": 2679 }, { "epoch": 0.42886861897903666, "grad_norm": 0.30839863419532776, "learning_rate": 5e-05, "loss": 1.4734, "step": 2680 }, { "epoch": 0.4290286445831333, "grad_norm": 0.308302104473114, "learning_rate": 5e-05, "loss": 1.5528, "step": 2681 }, { "epoch": 0.42918867018722995, "grad_norm": 0.31460362672805786, "learning_rate": 5e-05, "loss": 1.4967, "step": 2682 }, { "epoch": 0.4293486957913266, "grad_norm": 0.31192389130592346, "learning_rate": 5e-05, "loss": 1.4836, "step": 2683 }, { "epoch": 0.4295087213954233, "grad_norm": 0.3126670718193054, "learning_rate": 5e-05, "loss": 1.5857, "step": 2684 }, { "epoch": 0.4296687469995199, "grad_norm": 0.30213695764541626, "learning_rate": 5e-05, "loss": 1.511, "step": 2685 }, { "epoch": 0.42982877260361657, "grad_norm": 0.328204870223999, "learning_rate": 5e-05, "loss": 1.5709, "step": 2686 }, { "epoch": 0.42998879820771324, "grad_norm": 0.31440845131874084, "learning_rate": 5e-05, "loss": 1.4767, "step": 2687 }, { "epoch": 0.4301488238118099, "grad_norm": 0.32561323046684265, "learning_rate": 5e-05, "loss": 1.5299, "step": 2688 }, { "epoch": 0.4303088494159065, "grad_norm": 0.33259886503219604, "learning_rate": 5e-05, "loss": 1.5856, "step": 2689 }, { "epoch": 0.4304688750200032, "grad_norm": 0.31010591983795166, "learning_rate": 5e-05, "loss": 1.4822, "step": 2690 }, { "epoch": 0.43062890062409986, "grad_norm": 0.3169402480125427, "learning_rate": 5e-05, "loss": 1.4515, "step": 2691 }, { "epoch": 0.43078892622819653, "grad_norm": 0.39166682958602905, "learning_rate": 5e-05, "loss": 1.531, "step": 2692 }, { "epoch": 0.43094895183229315, "grad_norm": 0.3186309337615967, "learning_rate": 5e-05, "loss": 1.4851, "step": 2693 }, { "epoch": 0.4311089774363898, "grad_norm": 0.31454670429229736, "learning_rate": 5e-05, "loss": 1.4603, "step": 2694 }, { "epoch": 0.4312690030404865, "grad_norm": 0.3168363571166992, "learning_rate": 5e-05, "loss": 1.5093, "step": 2695 }, { "epoch": 0.43142902864458316, "grad_norm": 0.31711333990097046, "learning_rate": 5e-05, "loss": 1.5527, "step": 2696 }, { "epoch": 0.43158905424867977, "grad_norm": 0.3129788637161255, "learning_rate": 5e-05, "loss": 1.5472, "step": 2697 }, { "epoch": 0.43174907985277644, "grad_norm": 0.3210759460926056, "learning_rate": 5e-05, "loss": 1.5022, "step": 2698 }, { "epoch": 0.4319091054568731, "grad_norm": 0.31321099400520325, "learning_rate": 5e-05, "loss": 1.527, "step": 2699 }, { "epoch": 0.4320691310609698, "grad_norm": 0.32034236192703247, "learning_rate": 5e-05, "loss": 1.5522, "step": 2700 }, { "epoch": 0.4322291566650664, "grad_norm": 0.33309316635131836, "learning_rate": 5e-05, "loss": 1.5007, "step": 2701 }, { "epoch": 0.43238918226916306, "grad_norm": 0.31473690271377563, "learning_rate": 5e-05, "loss": 1.5231, "step": 2702 }, { "epoch": 0.43254920787325973, "grad_norm": 0.32192525267601013, "learning_rate": 5e-05, "loss": 1.5175, "step": 2703 }, { "epoch": 0.4327092334773564, "grad_norm": 0.320441871881485, "learning_rate": 5e-05, "loss": 1.5264, "step": 2704 }, { "epoch": 0.432869259081453, "grad_norm": 0.31604912877082825, "learning_rate": 5e-05, "loss": 1.5343, "step": 2705 }, { "epoch": 0.4330292846855497, "grad_norm": 0.32134509086608887, "learning_rate": 5e-05, "loss": 1.4514, "step": 2706 }, { "epoch": 0.43318931028964636, "grad_norm": 0.2990448474884033, "learning_rate": 5e-05, "loss": 1.4803, "step": 2707 }, { "epoch": 0.433349335893743, "grad_norm": 0.3100762665271759, "learning_rate": 5e-05, "loss": 1.4834, "step": 2708 }, { "epoch": 0.43350936149783964, "grad_norm": 0.3294585943222046, "learning_rate": 5e-05, "loss": 1.561, "step": 2709 }, { "epoch": 0.4336693871019363, "grad_norm": 0.33837607502937317, "learning_rate": 5e-05, "loss": 1.5378, "step": 2710 }, { "epoch": 0.433829412706033, "grad_norm": 0.31680265069007874, "learning_rate": 5e-05, "loss": 1.4881, "step": 2711 }, { "epoch": 0.4339894383101296, "grad_norm": 0.306578129529953, "learning_rate": 5e-05, "loss": 1.5288, "step": 2712 }, { "epoch": 0.43414946391422626, "grad_norm": 0.3270615339279175, "learning_rate": 5e-05, "loss": 1.5601, "step": 2713 }, { "epoch": 0.43430948951832293, "grad_norm": 0.36801570653915405, "learning_rate": 5e-05, "loss": 1.6123, "step": 2714 }, { "epoch": 0.4344695151224196, "grad_norm": 0.3046894669532776, "learning_rate": 5e-05, "loss": 1.4734, "step": 2715 }, { "epoch": 0.4346295407265162, "grad_norm": 0.31834158301353455, "learning_rate": 5e-05, "loss": 1.4884, "step": 2716 }, { "epoch": 0.4347895663306129, "grad_norm": 0.29933416843414307, "learning_rate": 5e-05, "loss": 1.4058, "step": 2717 }, { "epoch": 0.43494959193470956, "grad_norm": 0.33082956075668335, "learning_rate": 5e-05, "loss": 1.5933, "step": 2718 }, { "epoch": 0.4351096175388062, "grad_norm": 0.31760188937187195, "learning_rate": 5e-05, "loss": 1.5093, "step": 2719 }, { "epoch": 0.43526964314290284, "grad_norm": 0.3360659182071686, "learning_rate": 5e-05, "loss": 1.5035, "step": 2720 }, { "epoch": 0.4354296687469995, "grad_norm": 0.32966428995132446, "learning_rate": 5e-05, "loss": 1.5508, "step": 2721 }, { "epoch": 0.4355896943510962, "grad_norm": 0.31568586826324463, "learning_rate": 5e-05, "loss": 1.5679, "step": 2722 }, { "epoch": 0.43574971995519285, "grad_norm": 0.3355703055858612, "learning_rate": 5e-05, "loss": 1.5716, "step": 2723 }, { "epoch": 0.43590974555928946, "grad_norm": 0.32929232716560364, "learning_rate": 5e-05, "loss": 1.4986, "step": 2724 }, { "epoch": 0.43606977116338613, "grad_norm": 0.3132035434246063, "learning_rate": 5e-05, "loss": 1.4651, "step": 2725 }, { "epoch": 0.4362297967674828, "grad_norm": 0.31028443574905396, "learning_rate": 5e-05, "loss": 1.4647, "step": 2726 }, { "epoch": 0.43638982237157947, "grad_norm": 0.33859366178512573, "learning_rate": 5e-05, "loss": 1.5479, "step": 2727 }, { "epoch": 0.4365498479756761, "grad_norm": 0.32067906856536865, "learning_rate": 5e-05, "loss": 1.5278, "step": 2728 }, { "epoch": 0.43670987357977276, "grad_norm": 0.31280750036239624, "learning_rate": 5e-05, "loss": 1.4907, "step": 2729 }, { "epoch": 0.4368698991838694, "grad_norm": 0.3316201865673065, "learning_rate": 5e-05, "loss": 1.5571, "step": 2730 }, { "epoch": 0.4370299247879661, "grad_norm": 0.3211338520050049, "learning_rate": 5e-05, "loss": 1.5233, "step": 2731 }, { "epoch": 0.4371899503920627, "grad_norm": 0.33255714178085327, "learning_rate": 5e-05, "loss": 1.55, "step": 2732 }, { "epoch": 0.4373499759961594, "grad_norm": 0.3213350474834442, "learning_rate": 5e-05, "loss": 1.6012, "step": 2733 }, { "epoch": 0.43751000160025605, "grad_norm": 0.3146001100540161, "learning_rate": 5e-05, "loss": 1.5048, "step": 2734 }, { "epoch": 0.4376700272043527, "grad_norm": 0.3220946192741394, "learning_rate": 5e-05, "loss": 1.5677, "step": 2735 }, { "epoch": 0.43783005280844933, "grad_norm": 0.31566858291625977, "learning_rate": 5e-05, "loss": 1.4691, "step": 2736 }, { "epoch": 0.437990078412546, "grad_norm": 0.3116326928138733, "learning_rate": 5e-05, "loss": 1.4779, "step": 2737 }, { "epoch": 0.43815010401664267, "grad_norm": 0.3157716989517212, "learning_rate": 5e-05, "loss": 1.4818, "step": 2738 }, { "epoch": 0.43831012962073934, "grad_norm": 0.32354825735092163, "learning_rate": 5e-05, "loss": 1.5571, "step": 2739 }, { "epoch": 0.43847015522483596, "grad_norm": 0.31809085607528687, "learning_rate": 5e-05, "loss": 1.5114, "step": 2740 }, { "epoch": 0.4386301808289326, "grad_norm": 0.3250500559806824, "learning_rate": 5e-05, "loss": 1.5981, "step": 2741 }, { "epoch": 0.4387902064330293, "grad_norm": 0.33287930488586426, "learning_rate": 5e-05, "loss": 1.5339, "step": 2742 }, { "epoch": 0.43895023203712596, "grad_norm": 0.31534770131111145, "learning_rate": 5e-05, "loss": 1.5284, "step": 2743 }, { "epoch": 0.4391102576412226, "grad_norm": 0.317556768655777, "learning_rate": 5e-05, "loss": 1.5485, "step": 2744 }, { "epoch": 0.43927028324531925, "grad_norm": 0.3138717710971832, "learning_rate": 5e-05, "loss": 1.5303, "step": 2745 }, { "epoch": 0.4394303088494159, "grad_norm": 0.3168152868747711, "learning_rate": 5e-05, "loss": 1.4865, "step": 2746 }, { "epoch": 0.4395903344535126, "grad_norm": 0.31952565908432007, "learning_rate": 5e-05, "loss": 1.4931, "step": 2747 }, { "epoch": 0.4397503600576092, "grad_norm": 0.32993242144584656, "learning_rate": 5e-05, "loss": 1.5728, "step": 2748 }, { "epoch": 0.43991038566170587, "grad_norm": 0.3320251405239105, "learning_rate": 5e-05, "loss": 1.5459, "step": 2749 }, { "epoch": 0.44007041126580254, "grad_norm": 0.326422780752182, "learning_rate": 5e-05, "loss": 1.5231, "step": 2750 }, { "epoch": 0.4402304368698992, "grad_norm": 0.31398338079452515, "learning_rate": 5e-05, "loss": 1.4962, "step": 2751 }, { "epoch": 0.4403904624739958, "grad_norm": 0.32526466250419617, "learning_rate": 5e-05, "loss": 1.5288, "step": 2752 }, { "epoch": 0.4405504880780925, "grad_norm": 0.32328227162361145, "learning_rate": 5e-05, "loss": 1.5188, "step": 2753 }, { "epoch": 0.44071051368218916, "grad_norm": 0.320406049489975, "learning_rate": 5e-05, "loss": 1.4931, "step": 2754 }, { "epoch": 0.4408705392862858, "grad_norm": 0.33195775747299194, "learning_rate": 5e-05, "loss": 1.6083, "step": 2755 }, { "epoch": 0.44103056489038245, "grad_norm": 0.3294637203216553, "learning_rate": 5e-05, "loss": 1.6048, "step": 2756 }, { "epoch": 0.4411905904944791, "grad_norm": 0.3191748559474945, "learning_rate": 5e-05, "loss": 1.5863, "step": 2757 }, { "epoch": 0.4413506160985758, "grad_norm": 0.3314332962036133, "learning_rate": 5e-05, "loss": 1.5373, "step": 2758 }, { "epoch": 0.4415106417026724, "grad_norm": 0.30522966384887695, "learning_rate": 5e-05, "loss": 1.4549, "step": 2759 }, { "epoch": 0.44167066730676907, "grad_norm": 0.3062548041343689, "learning_rate": 5e-05, "loss": 1.4869, "step": 2760 }, { "epoch": 0.44183069291086574, "grad_norm": 0.30388933420181274, "learning_rate": 5e-05, "loss": 1.4107, "step": 2761 }, { "epoch": 0.4419907185149624, "grad_norm": 0.31985002756118774, "learning_rate": 5e-05, "loss": 1.5849, "step": 2762 }, { "epoch": 0.442150744119059, "grad_norm": 0.31363996863365173, "learning_rate": 5e-05, "loss": 1.5027, "step": 2763 }, { "epoch": 0.4423107697231557, "grad_norm": 0.31693634390830994, "learning_rate": 5e-05, "loss": 1.4667, "step": 2764 }, { "epoch": 0.44247079532725236, "grad_norm": 0.3308369219303131, "learning_rate": 5e-05, "loss": 1.5741, "step": 2765 }, { "epoch": 0.44263082093134903, "grad_norm": 0.31027212738990784, "learning_rate": 5e-05, "loss": 1.4744, "step": 2766 }, { "epoch": 0.44279084653544565, "grad_norm": 0.3086203932762146, "learning_rate": 5e-05, "loss": 1.4652, "step": 2767 }, { "epoch": 0.4429508721395423, "grad_norm": 0.33682093024253845, "learning_rate": 5e-05, "loss": 1.5764, "step": 2768 }, { "epoch": 0.443110897743639, "grad_norm": 0.32897406816482544, "learning_rate": 5e-05, "loss": 1.5561, "step": 2769 }, { "epoch": 0.44327092334773566, "grad_norm": 0.32977840304374695, "learning_rate": 5e-05, "loss": 1.516, "step": 2770 }, { "epoch": 0.44343094895183227, "grad_norm": 0.3309125304222107, "learning_rate": 5e-05, "loss": 1.5658, "step": 2771 }, { "epoch": 0.44359097455592894, "grad_norm": 0.31492191553115845, "learning_rate": 5e-05, "loss": 1.4414, "step": 2772 }, { "epoch": 0.4437510001600256, "grad_norm": 0.3157287836074829, "learning_rate": 5e-05, "loss": 1.5203, "step": 2773 }, { "epoch": 0.4439110257641223, "grad_norm": 0.325032502412796, "learning_rate": 5e-05, "loss": 1.5592, "step": 2774 }, { "epoch": 0.4440710513682189, "grad_norm": 0.326984167098999, "learning_rate": 5e-05, "loss": 1.5126, "step": 2775 }, { "epoch": 0.44423107697231556, "grad_norm": 0.3167685866355896, "learning_rate": 5e-05, "loss": 1.4613, "step": 2776 }, { "epoch": 0.44439110257641223, "grad_norm": 0.344596803188324, "learning_rate": 5e-05, "loss": 1.5135, "step": 2777 }, { "epoch": 0.4445511281805089, "grad_norm": 0.31732428073883057, "learning_rate": 5e-05, "loss": 1.456, "step": 2778 }, { "epoch": 0.4447111537846055, "grad_norm": 0.31323543190956116, "learning_rate": 5e-05, "loss": 1.483, "step": 2779 }, { "epoch": 0.4448711793887022, "grad_norm": 0.3303591310977936, "learning_rate": 5e-05, "loss": 1.5168, "step": 2780 }, { "epoch": 0.44503120499279886, "grad_norm": 0.313514769077301, "learning_rate": 5e-05, "loss": 1.4602, "step": 2781 }, { "epoch": 0.4451912305968955, "grad_norm": 0.31031572818756104, "learning_rate": 5e-05, "loss": 1.4961, "step": 2782 }, { "epoch": 0.44535125620099214, "grad_norm": 0.33073022961616516, "learning_rate": 5e-05, "loss": 1.5252, "step": 2783 }, { "epoch": 0.4455112818050888, "grad_norm": 0.31981390714645386, "learning_rate": 5e-05, "loss": 1.4767, "step": 2784 }, { "epoch": 0.4456713074091855, "grad_norm": 0.3202761113643646, "learning_rate": 5e-05, "loss": 1.5447, "step": 2785 }, { "epoch": 0.44583133301328215, "grad_norm": 0.32949239015579224, "learning_rate": 5e-05, "loss": 1.4246, "step": 2786 }, { "epoch": 0.44599135861737876, "grad_norm": 0.32496848702430725, "learning_rate": 5e-05, "loss": 1.5524, "step": 2787 }, { "epoch": 0.44615138422147543, "grad_norm": 0.31923919916152954, "learning_rate": 5e-05, "loss": 1.5353, "step": 2788 }, { "epoch": 0.4463114098255721, "grad_norm": 0.32011306285858154, "learning_rate": 5e-05, "loss": 1.4052, "step": 2789 }, { "epoch": 0.4464714354296688, "grad_norm": 0.3086246848106384, "learning_rate": 5e-05, "loss": 1.4828, "step": 2790 }, { "epoch": 0.4466314610337654, "grad_norm": 0.3448430299758911, "learning_rate": 5e-05, "loss": 1.6203, "step": 2791 }, { "epoch": 0.44679148663786206, "grad_norm": 0.2976425588130951, "learning_rate": 5e-05, "loss": 1.4526, "step": 2792 }, { "epoch": 0.4469515122419587, "grad_norm": 0.31408563256263733, "learning_rate": 5e-05, "loss": 1.422, "step": 2793 }, { "epoch": 0.4471115378460554, "grad_norm": 0.31354790925979614, "learning_rate": 5e-05, "loss": 1.4649, "step": 2794 }, { "epoch": 0.447271563450152, "grad_norm": 0.3263112008571625, "learning_rate": 5e-05, "loss": 1.6079, "step": 2795 }, { "epoch": 0.4474315890542487, "grad_norm": 0.32918989658355713, "learning_rate": 5e-05, "loss": 1.5346, "step": 2796 }, { "epoch": 0.44759161465834535, "grad_norm": 0.3019700348377228, "learning_rate": 5e-05, "loss": 1.4989, "step": 2797 }, { "epoch": 0.44775164026244196, "grad_norm": 0.31725603342056274, "learning_rate": 5e-05, "loss": 1.5261, "step": 2798 }, { "epoch": 0.44791166586653863, "grad_norm": 0.32239022850990295, "learning_rate": 5e-05, "loss": 1.526, "step": 2799 }, { "epoch": 0.4480716914706353, "grad_norm": 0.3052259385585785, "learning_rate": 5e-05, "loss": 1.4478, "step": 2800 }, { "epoch": 0.448231717074732, "grad_norm": 0.3158658444881439, "learning_rate": 5e-05, "loss": 1.5086, "step": 2801 }, { "epoch": 0.4483917426788286, "grad_norm": 0.3201383948326111, "learning_rate": 5e-05, "loss": 1.5002, "step": 2802 }, { "epoch": 0.44855176828292526, "grad_norm": 0.3262096643447876, "learning_rate": 5e-05, "loss": 1.487, "step": 2803 }, { "epoch": 0.4487117938870219, "grad_norm": 0.31233832240104675, "learning_rate": 5e-05, "loss": 1.561, "step": 2804 }, { "epoch": 0.4488718194911186, "grad_norm": 0.3065159022808075, "learning_rate": 5e-05, "loss": 1.5021, "step": 2805 }, { "epoch": 0.4490318450952152, "grad_norm": 0.32738441228866577, "learning_rate": 5e-05, "loss": 1.531, "step": 2806 }, { "epoch": 0.4491918706993119, "grad_norm": 0.3253239095211029, "learning_rate": 5e-05, "loss": 1.5976, "step": 2807 }, { "epoch": 0.44935189630340855, "grad_norm": 0.3123715817928314, "learning_rate": 5e-05, "loss": 1.4819, "step": 2808 }, { "epoch": 0.4495119219075052, "grad_norm": 0.3275702893733978, "learning_rate": 5e-05, "loss": 1.5392, "step": 2809 }, { "epoch": 0.44967194751160183, "grad_norm": 0.3206008970737457, "learning_rate": 5e-05, "loss": 1.5487, "step": 2810 }, { "epoch": 0.4498319731156985, "grad_norm": 0.319986492395401, "learning_rate": 5e-05, "loss": 1.5123, "step": 2811 }, { "epoch": 0.44999199871979517, "grad_norm": 0.3174704909324646, "learning_rate": 5e-05, "loss": 1.4563, "step": 2812 }, { "epoch": 0.45015202432389184, "grad_norm": 0.3324163258075714, "learning_rate": 5e-05, "loss": 1.5771, "step": 2813 }, { "epoch": 0.45031204992798846, "grad_norm": 0.33286812901496887, "learning_rate": 5e-05, "loss": 1.6246, "step": 2814 }, { "epoch": 0.4504720755320851, "grad_norm": 0.32659146189689636, "learning_rate": 5e-05, "loss": 1.515, "step": 2815 }, { "epoch": 0.4506321011361818, "grad_norm": 0.32370373606681824, "learning_rate": 5e-05, "loss": 1.5313, "step": 2816 }, { "epoch": 0.45079212674027846, "grad_norm": 0.32498663663864136, "learning_rate": 5e-05, "loss": 1.5264, "step": 2817 }, { "epoch": 0.4509521523443751, "grad_norm": 0.32379743456840515, "learning_rate": 5e-05, "loss": 1.5564, "step": 2818 }, { "epoch": 0.45111217794847175, "grad_norm": 0.3192424476146698, "learning_rate": 5e-05, "loss": 1.5077, "step": 2819 }, { "epoch": 0.4512722035525684, "grad_norm": 0.3145209848880768, "learning_rate": 5e-05, "loss": 1.4919, "step": 2820 }, { "epoch": 0.4514322291566651, "grad_norm": 0.31290340423583984, "learning_rate": 5e-05, "loss": 1.5092, "step": 2821 }, { "epoch": 0.4515922547607617, "grad_norm": 0.32603511214256287, "learning_rate": 5e-05, "loss": 1.4934, "step": 2822 }, { "epoch": 0.45175228036485837, "grad_norm": 0.3116922080516815, "learning_rate": 5e-05, "loss": 1.4765, "step": 2823 }, { "epoch": 0.45191230596895504, "grad_norm": 0.3183179497718811, "learning_rate": 5e-05, "loss": 1.5811, "step": 2824 }, { "epoch": 0.4520723315730517, "grad_norm": 0.3164985775947571, "learning_rate": 5e-05, "loss": 1.5459, "step": 2825 }, { "epoch": 0.4522323571771483, "grad_norm": 0.3215639889240265, "learning_rate": 5e-05, "loss": 1.5474, "step": 2826 }, { "epoch": 0.452392382781245, "grad_norm": 0.32094934582710266, "learning_rate": 5e-05, "loss": 1.5364, "step": 2827 }, { "epoch": 0.45255240838534166, "grad_norm": 0.33001479506492615, "learning_rate": 5e-05, "loss": 1.5611, "step": 2828 }, { "epoch": 0.45271243398943833, "grad_norm": 0.31750407814979553, "learning_rate": 5e-05, "loss": 1.5327, "step": 2829 }, { "epoch": 0.45287245959353495, "grad_norm": 0.3151971399784088, "learning_rate": 5e-05, "loss": 1.5132, "step": 2830 }, { "epoch": 0.4530324851976316, "grad_norm": 0.33852705359458923, "learning_rate": 5e-05, "loss": 1.5414, "step": 2831 }, { "epoch": 0.4531925108017283, "grad_norm": 0.31552156805992126, "learning_rate": 5e-05, "loss": 1.4525, "step": 2832 }, { "epoch": 0.45335253640582496, "grad_norm": 0.3391796946525574, "learning_rate": 5e-05, "loss": 1.5548, "step": 2833 }, { "epoch": 0.45351256200992157, "grad_norm": 0.3079597055912018, "learning_rate": 5e-05, "loss": 1.5049, "step": 2834 }, { "epoch": 0.45367258761401824, "grad_norm": 0.3302728235721588, "learning_rate": 5e-05, "loss": 1.5649, "step": 2835 }, { "epoch": 0.4538326132181149, "grad_norm": 0.31863126158714294, "learning_rate": 5e-05, "loss": 1.526, "step": 2836 }, { "epoch": 0.4539926388222116, "grad_norm": 0.3213496506214142, "learning_rate": 5e-05, "loss": 1.5427, "step": 2837 }, { "epoch": 0.4541526644263082, "grad_norm": 0.30962470173835754, "learning_rate": 5e-05, "loss": 1.4753, "step": 2838 }, { "epoch": 0.45431269003040486, "grad_norm": 0.3197287321090698, "learning_rate": 5e-05, "loss": 1.5311, "step": 2839 }, { "epoch": 0.45447271563450153, "grad_norm": 0.3157151937484741, "learning_rate": 5e-05, "loss": 1.5345, "step": 2840 }, { "epoch": 0.45463274123859815, "grad_norm": 0.3020000159740448, "learning_rate": 5e-05, "loss": 1.4371, "step": 2841 }, { "epoch": 0.4547927668426948, "grad_norm": 0.32622647285461426, "learning_rate": 5e-05, "loss": 1.533, "step": 2842 }, { "epoch": 0.4549527924467915, "grad_norm": 0.3144678473472595, "learning_rate": 5e-05, "loss": 1.4092, "step": 2843 }, { "epoch": 0.45511281805088816, "grad_norm": 0.3256159722805023, "learning_rate": 5e-05, "loss": 1.4908, "step": 2844 }, { "epoch": 0.45527284365498477, "grad_norm": 0.3196593225002289, "learning_rate": 5e-05, "loss": 1.5132, "step": 2845 }, { "epoch": 0.45543286925908144, "grad_norm": 0.32406434416770935, "learning_rate": 5e-05, "loss": 1.5237, "step": 2846 }, { "epoch": 0.4555928948631781, "grad_norm": 0.3218875825405121, "learning_rate": 5e-05, "loss": 1.555, "step": 2847 }, { "epoch": 0.4557529204672748, "grad_norm": 0.3145506978034973, "learning_rate": 5e-05, "loss": 1.511, "step": 2848 }, { "epoch": 0.4559129460713714, "grad_norm": 0.32893726229667664, "learning_rate": 5e-05, "loss": 1.5555, "step": 2849 }, { "epoch": 0.45607297167546806, "grad_norm": 0.31977203488349915, "learning_rate": 5e-05, "loss": 1.477, "step": 2850 }, { "epoch": 0.45623299727956473, "grad_norm": 0.32526257634162903, "learning_rate": 5e-05, "loss": 1.561, "step": 2851 }, { "epoch": 0.4563930228836614, "grad_norm": 0.3266395330429077, "learning_rate": 5e-05, "loss": 1.5286, "step": 2852 }, { "epoch": 0.456553048487758, "grad_norm": 0.3242952525615692, "learning_rate": 5e-05, "loss": 1.5112, "step": 2853 }, { "epoch": 0.4567130740918547, "grad_norm": 0.3163132667541504, "learning_rate": 5e-05, "loss": 1.4657, "step": 2854 }, { "epoch": 0.45687309969595136, "grad_norm": 0.34300497174263, "learning_rate": 5e-05, "loss": 1.5089, "step": 2855 }, { "epoch": 0.457033125300048, "grad_norm": 0.33849847316741943, "learning_rate": 5e-05, "loss": 1.5101, "step": 2856 }, { "epoch": 0.45719315090414464, "grad_norm": 0.32710906863212585, "learning_rate": 5e-05, "loss": 1.5909, "step": 2857 }, { "epoch": 0.4573531765082413, "grad_norm": 0.342473566532135, "learning_rate": 5e-05, "loss": 1.5008, "step": 2858 }, { "epoch": 0.457513202112338, "grad_norm": 0.3182411193847656, "learning_rate": 5e-05, "loss": 1.5472, "step": 2859 }, { "epoch": 0.45767322771643465, "grad_norm": 0.32336920499801636, "learning_rate": 5e-05, "loss": 1.5253, "step": 2860 }, { "epoch": 0.45783325332053126, "grad_norm": 0.32765376567840576, "learning_rate": 5e-05, "loss": 1.5091, "step": 2861 }, { "epoch": 0.45799327892462793, "grad_norm": 0.32802140712738037, "learning_rate": 5e-05, "loss": 1.5256, "step": 2862 }, { "epoch": 0.4581533045287246, "grad_norm": 0.3298431932926178, "learning_rate": 5e-05, "loss": 1.5983, "step": 2863 }, { "epoch": 0.4583133301328213, "grad_norm": 0.3130180239677429, "learning_rate": 5e-05, "loss": 1.5348, "step": 2864 }, { "epoch": 0.4584733557369179, "grad_norm": 0.33739766478538513, "learning_rate": 5e-05, "loss": 1.6066, "step": 2865 }, { "epoch": 0.45863338134101456, "grad_norm": 0.3103930354118347, "learning_rate": 5e-05, "loss": 1.5042, "step": 2866 }, { "epoch": 0.4587934069451112, "grad_norm": 0.30197906494140625, "learning_rate": 5e-05, "loss": 1.4383, "step": 2867 }, { "epoch": 0.4589534325492079, "grad_norm": 0.3153356909751892, "learning_rate": 5e-05, "loss": 1.4736, "step": 2868 }, { "epoch": 0.4591134581533045, "grad_norm": 0.3293820917606354, "learning_rate": 5e-05, "loss": 1.4915, "step": 2869 }, { "epoch": 0.4592734837574012, "grad_norm": 0.3285522758960724, "learning_rate": 5e-05, "loss": 1.5593, "step": 2870 }, { "epoch": 0.45943350936149785, "grad_norm": 0.30902165174484253, "learning_rate": 5e-05, "loss": 1.4679, "step": 2871 }, { "epoch": 0.4595935349655945, "grad_norm": 0.3126634359359741, "learning_rate": 5e-05, "loss": 1.4615, "step": 2872 }, { "epoch": 0.45975356056969113, "grad_norm": 0.33417028188705444, "learning_rate": 5e-05, "loss": 1.4784, "step": 2873 }, { "epoch": 0.4599135861737878, "grad_norm": 0.3191852271556854, "learning_rate": 5e-05, "loss": 1.4373, "step": 2874 }, { "epoch": 0.4600736117778845, "grad_norm": 0.32366490364074707, "learning_rate": 5e-05, "loss": 1.514, "step": 2875 }, { "epoch": 0.46023363738198114, "grad_norm": 0.31304407119750977, "learning_rate": 5e-05, "loss": 1.5254, "step": 2876 }, { "epoch": 0.46039366298607776, "grad_norm": 0.32400697469711304, "learning_rate": 5e-05, "loss": 1.5604, "step": 2877 }, { "epoch": 0.4605536885901744, "grad_norm": 0.31771960854530334, "learning_rate": 5e-05, "loss": 1.5248, "step": 2878 }, { "epoch": 0.4607137141942711, "grad_norm": 0.3171881139278412, "learning_rate": 5e-05, "loss": 1.4778, "step": 2879 }, { "epoch": 0.46087373979836777, "grad_norm": 0.3087197542190552, "learning_rate": 5e-05, "loss": 1.4937, "step": 2880 }, { "epoch": 0.4610337654024644, "grad_norm": 0.32152411341667175, "learning_rate": 5e-05, "loss": 1.4666, "step": 2881 }, { "epoch": 0.46119379100656105, "grad_norm": 0.32887002825737, "learning_rate": 5e-05, "loss": 1.4888, "step": 2882 }, { "epoch": 0.4613538166106577, "grad_norm": 0.32844993472099304, "learning_rate": 5e-05, "loss": 1.5994, "step": 2883 }, { "epoch": 0.4615138422147544, "grad_norm": 0.31364238262176514, "learning_rate": 5e-05, "loss": 1.4755, "step": 2884 }, { "epoch": 0.461673867818851, "grad_norm": 0.3271302580833435, "learning_rate": 5e-05, "loss": 1.5964, "step": 2885 }, { "epoch": 0.46183389342294767, "grad_norm": 0.3085896670818329, "learning_rate": 5e-05, "loss": 1.4564, "step": 2886 }, { "epoch": 0.46199391902704434, "grad_norm": 0.3213440775871277, "learning_rate": 5e-05, "loss": 1.5475, "step": 2887 }, { "epoch": 0.46215394463114096, "grad_norm": 0.3053237497806549, "learning_rate": 5e-05, "loss": 1.5024, "step": 2888 }, { "epoch": 0.4623139702352376, "grad_norm": 0.3357420861721039, "learning_rate": 5e-05, "loss": 1.5372, "step": 2889 }, { "epoch": 0.4624739958393343, "grad_norm": 0.31948617100715637, "learning_rate": 5e-05, "loss": 1.5135, "step": 2890 }, { "epoch": 0.46263402144343097, "grad_norm": 0.31858280301094055, "learning_rate": 5e-05, "loss": 1.5264, "step": 2891 }, { "epoch": 0.4627940470475276, "grad_norm": 0.3161901533603668, "learning_rate": 5e-05, "loss": 1.4847, "step": 2892 }, { "epoch": 0.46295407265162425, "grad_norm": 0.31874439120292664, "learning_rate": 5e-05, "loss": 1.4971, "step": 2893 }, { "epoch": 0.4631140982557209, "grad_norm": 0.328444242477417, "learning_rate": 5e-05, "loss": 1.5257, "step": 2894 }, { "epoch": 0.4632741238598176, "grad_norm": 0.322666198015213, "learning_rate": 5e-05, "loss": 1.4996, "step": 2895 }, { "epoch": 0.4634341494639142, "grad_norm": 0.3136758804321289, "learning_rate": 5e-05, "loss": 1.4229, "step": 2896 }, { "epoch": 0.46359417506801087, "grad_norm": 0.32326480746269226, "learning_rate": 5e-05, "loss": 1.5402, "step": 2897 }, { "epoch": 0.46375420067210754, "grad_norm": 0.3256310224533081, "learning_rate": 5e-05, "loss": 1.4945, "step": 2898 }, { "epoch": 0.4639142262762042, "grad_norm": 0.3220905661582947, "learning_rate": 5e-05, "loss": 1.5536, "step": 2899 }, { "epoch": 0.4640742518803008, "grad_norm": 0.32475125789642334, "learning_rate": 5e-05, "loss": 1.5442, "step": 2900 }, { "epoch": 0.4642342774843975, "grad_norm": 0.3188819885253906, "learning_rate": 5e-05, "loss": 1.481, "step": 2901 }, { "epoch": 0.46439430308849416, "grad_norm": 0.3157956600189209, "learning_rate": 5e-05, "loss": 1.5404, "step": 2902 }, { "epoch": 0.46455432869259083, "grad_norm": 0.32594358921051025, "learning_rate": 5e-05, "loss": 1.5172, "step": 2903 }, { "epoch": 0.46471435429668745, "grad_norm": 0.3135848343372345, "learning_rate": 5e-05, "loss": 1.5247, "step": 2904 }, { "epoch": 0.4648743799007841, "grad_norm": 0.3175068497657776, "learning_rate": 5e-05, "loss": 1.5399, "step": 2905 }, { "epoch": 0.4650344055048808, "grad_norm": 0.3297857344150543, "learning_rate": 5e-05, "loss": 1.442, "step": 2906 }, { "epoch": 0.46519443110897746, "grad_norm": 0.3319510519504547, "learning_rate": 5e-05, "loss": 1.5951, "step": 2907 }, { "epoch": 0.46535445671307407, "grad_norm": 0.3114558160305023, "learning_rate": 5e-05, "loss": 1.5075, "step": 2908 }, { "epoch": 0.46551448231717074, "grad_norm": 0.3210623264312744, "learning_rate": 5e-05, "loss": 1.4921, "step": 2909 }, { "epoch": 0.4656745079212674, "grad_norm": 0.3292990028858185, "learning_rate": 5e-05, "loss": 1.5494, "step": 2910 }, { "epoch": 0.4658345335253641, "grad_norm": 0.33565667271614075, "learning_rate": 5e-05, "loss": 1.5176, "step": 2911 }, { "epoch": 0.4659945591294607, "grad_norm": 0.3036874830722809, "learning_rate": 5e-05, "loss": 1.4361, "step": 2912 }, { "epoch": 0.46615458473355736, "grad_norm": 0.33785805106163025, "learning_rate": 5e-05, "loss": 1.5587, "step": 2913 }, { "epoch": 0.46631461033765403, "grad_norm": 0.321012943983078, "learning_rate": 5e-05, "loss": 1.5129, "step": 2914 }, { "epoch": 0.4664746359417507, "grad_norm": 0.3329792022705078, "learning_rate": 5e-05, "loss": 1.466, "step": 2915 }, { "epoch": 0.4666346615458473, "grad_norm": 0.3234361708164215, "learning_rate": 5e-05, "loss": 1.5468, "step": 2916 }, { "epoch": 0.466794687149944, "grad_norm": 0.3194989562034607, "learning_rate": 5e-05, "loss": 1.4879, "step": 2917 }, { "epoch": 0.46695471275404066, "grad_norm": 0.320772647857666, "learning_rate": 5e-05, "loss": 1.4712, "step": 2918 }, { "epoch": 0.4671147383581373, "grad_norm": 0.32093170285224915, "learning_rate": 5e-05, "loss": 1.635, "step": 2919 }, { "epoch": 0.46727476396223394, "grad_norm": 0.3078162372112274, "learning_rate": 5e-05, "loss": 1.5134, "step": 2920 }, { "epoch": 0.4674347895663306, "grad_norm": 0.3082874119281769, "learning_rate": 5e-05, "loss": 1.5171, "step": 2921 }, { "epoch": 0.4675948151704273, "grad_norm": 0.3259470462799072, "learning_rate": 5e-05, "loss": 1.5777, "step": 2922 }, { "epoch": 0.46775484077452395, "grad_norm": 0.3143087327480316, "learning_rate": 5e-05, "loss": 1.5685, "step": 2923 }, { "epoch": 0.46791486637862056, "grad_norm": 0.3214273750782013, "learning_rate": 5e-05, "loss": 1.5031, "step": 2924 }, { "epoch": 0.46807489198271723, "grad_norm": 0.3235897719860077, "learning_rate": 5e-05, "loss": 1.5263, "step": 2925 }, { "epoch": 0.4682349175868139, "grad_norm": 0.3049080967903137, "learning_rate": 5e-05, "loss": 1.4693, "step": 2926 }, { "epoch": 0.4683949431909106, "grad_norm": 0.32741811871528625, "learning_rate": 5e-05, "loss": 1.5274, "step": 2927 }, { "epoch": 0.4685549687950072, "grad_norm": 0.3220342993736267, "learning_rate": 5e-05, "loss": 1.52, "step": 2928 }, { "epoch": 0.46871499439910386, "grad_norm": 0.3158935308456421, "learning_rate": 5e-05, "loss": 1.4369, "step": 2929 }, { "epoch": 0.4688750200032005, "grad_norm": 0.33141738176345825, "learning_rate": 5e-05, "loss": 1.5192, "step": 2930 }, { "epoch": 0.46903504560729714, "grad_norm": 0.3291212320327759, "learning_rate": 5e-05, "loss": 1.54, "step": 2931 }, { "epoch": 0.4691950712113938, "grad_norm": 0.31618237495422363, "learning_rate": 5e-05, "loss": 1.4095, "step": 2932 }, { "epoch": 0.4693550968154905, "grad_norm": 0.32138633728027344, "learning_rate": 5e-05, "loss": 1.4908, "step": 2933 }, { "epoch": 0.46951512241958715, "grad_norm": 0.3183984160423279, "learning_rate": 5e-05, "loss": 1.5531, "step": 2934 }, { "epoch": 0.46967514802368376, "grad_norm": 0.32084035873413086, "learning_rate": 5e-05, "loss": 1.498, "step": 2935 }, { "epoch": 0.46983517362778043, "grad_norm": 0.3239779472351074, "learning_rate": 5e-05, "loss": 1.4828, "step": 2936 }, { "epoch": 0.4699951992318771, "grad_norm": 0.3218679130077362, "learning_rate": 5e-05, "loss": 1.4547, "step": 2937 }, { "epoch": 0.4701552248359738, "grad_norm": 0.31925830245018005, "learning_rate": 5e-05, "loss": 1.5239, "step": 2938 }, { "epoch": 0.4703152504400704, "grad_norm": 0.3125882148742676, "learning_rate": 5e-05, "loss": 1.4721, "step": 2939 }, { "epoch": 0.47047527604416706, "grad_norm": 0.3248351812362671, "learning_rate": 5e-05, "loss": 1.5354, "step": 2940 }, { "epoch": 0.4706353016482637, "grad_norm": 0.3286011219024658, "learning_rate": 5e-05, "loss": 1.5172, "step": 2941 }, { "epoch": 0.4707953272523604, "grad_norm": 0.31296083331108093, "learning_rate": 5e-05, "loss": 1.4769, "step": 2942 }, { "epoch": 0.470955352856457, "grad_norm": 0.3251446783542633, "learning_rate": 5e-05, "loss": 1.519, "step": 2943 }, { "epoch": 0.4711153784605537, "grad_norm": 0.32130229473114014, "learning_rate": 5e-05, "loss": 1.517, "step": 2944 }, { "epoch": 0.47127540406465035, "grad_norm": 0.31106847524642944, "learning_rate": 5e-05, "loss": 1.4783, "step": 2945 }, { "epoch": 0.471435429668747, "grad_norm": 0.3230256140232086, "learning_rate": 5e-05, "loss": 1.5199, "step": 2946 }, { "epoch": 0.47159545527284363, "grad_norm": 0.333840012550354, "learning_rate": 5e-05, "loss": 1.5928, "step": 2947 }, { "epoch": 0.4717554808769403, "grad_norm": 0.3237602412700653, "learning_rate": 5e-05, "loss": 1.5653, "step": 2948 }, { "epoch": 0.471915506481037, "grad_norm": 0.3359051048755646, "learning_rate": 5e-05, "loss": 1.5565, "step": 2949 }, { "epoch": 0.47207553208513364, "grad_norm": 0.3142199218273163, "learning_rate": 5e-05, "loss": 1.4949, "step": 2950 }, { "epoch": 0.47223555768923026, "grad_norm": 0.31829559803009033, "learning_rate": 5e-05, "loss": 1.5258, "step": 2951 }, { "epoch": 0.4723955832933269, "grad_norm": 0.30946704745292664, "learning_rate": 5e-05, "loss": 1.5218, "step": 2952 }, { "epoch": 0.4725556088974236, "grad_norm": 0.3192158639431, "learning_rate": 5e-05, "loss": 1.4962, "step": 2953 }, { "epoch": 0.47271563450152027, "grad_norm": 0.327085018157959, "learning_rate": 5e-05, "loss": 1.5711, "step": 2954 }, { "epoch": 0.4728756601056169, "grad_norm": 0.3161799907684326, "learning_rate": 5e-05, "loss": 1.5516, "step": 2955 }, { "epoch": 0.47303568570971355, "grad_norm": 0.30569422245025635, "learning_rate": 5e-05, "loss": 1.4586, "step": 2956 }, { "epoch": 0.4731957113138102, "grad_norm": 0.33213379979133606, "learning_rate": 5e-05, "loss": 1.5887, "step": 2957 }, { "epoch": 0.4733557369179069, "grad_norm": 0.3450460135936737, "learning_rate": 5e-05, "loss": 1.5242, "step": 2958 }, { "epoch": 0.4735157625220035, "grad_norm": 0.3290388882160187, "learning_rate": 5e-05, "loss": 1.5854, "step": 2959 }, { "epoch": 0.4736757881261002, "grad_norm": 0.3774629235267639, "learning_rate": 5e-05, "loss": 1.6329, "step": 2960 }, { "epoch": 0.47383581373019684, "grad_norm": 0.3229564130306244, "learning_rate": 5e-05, "loss": 1.48, "step": 2961 }, { "epoch": 0.4739958393342935, "grad_norm": 0.3312984108924866, "learning_rate": 5e-05, "loss": 1.5823, "step": 2962 }, { "epoch": 0.4741558649383901, "grad_norm": 0.3486693203449249, "learning_rate": 5e-05, "loss": 1.5365, "step": 2963 }, { "epoch": 0.4743158905424868, "grad_norm": 0.32860344648361206, "learning_rate": 5e-05, "loss": 1.5328, "step": 2964 }, { "epoch": 0.47447591614658347, "grad_norm": 0.30507153272628784, "learning_rate": 5e-05, "loss": 1.4301, "step": 2965 }, { "epoch": 0.47463594175068013, "grad_norm": 0.3406248092651367, "learning_rate": 5e-05, "loss": 1.5734, "step": 2966 }, { "epoch": 0.47479596735477675, "grad_norm": 0.3351372480392456, "learning_rate": 5e-05, "loss": 1.5724, "step": 2967 }, { "epoch": 0.4749559929588734, "grad_norm": 0.32107657194137573, "learning_rate": 5e-05, "loss": 1.4207, "step": 2968 }, { "epoch": 0.4751160185629701, "grad_norm": 0.35369420051574707, "learning_rate": 5e-05, "loss": 1.5295, "step": 2969 }, { "epoch": 0.47527604416706676, "grad_norm": 0.3235504627227783, "learning_rate": 5e-05, "loss": 1.588, "step": 2970 }, { "epoch": 0.47543606977116337, "grad_norm": 0.3329419493675232, "learning_rate": 5e-05, "loss": 1.5425, "step": 2971 }, { "epoch": 0.47559609537526004, "grad_norm": 0.340305358171463, "learning_rate": 5e-05, "loss": 1.5148, "step": 2972 }, { "epoch": 0.4757561209793567, "grad_norm": 0.3282431364059448, "learning_rate": 5e-05, "loss": 1.5632, "step": 2973 }, { "epoch": 0.4759161465834533, "grad_norm": 0.31457290053367615, "learning_rate": 5e-05, "loss": 1.4609, "step": 2974 }, { "epoch": 0.47607617218755, "grad_norm": 0.3279787600040436, "learning_rate": 5e-05, "loss": 1.5401, "step": 2975 }, { "epoch": 0.47623619779164666, "grad_norm": 0.32480770349502563, "learning_rate": 5e-05, "loss": 1.4939, "step": 2976 }, { "epoch": 0.47639622339574333, "grad_norm": 0.30945855379104614, "learning_rate": 5e-05, "loss": 1.5069, "step": 2977 }, { "epoch": 0.47655624899983995, "grad_norm": 0.33707159757614136, "learning_rate": 5e-05, "loss": 1.5212, "step": 2978 }, { "epoch": 0.4767162746039366, "grad_norm": 0.33305972814559937, "learning_rate": 5e-05, "loss": 1.5181, "step": 2979 }, { "epoch": 0.4768763002080333, "grad_norm": 0.3264942169189453, "learning_rate": 5e-05, "loss": 1.5207, "step": 2980 }, { "epoch": 0.47703632581212996, "grad_norm": 0.34166181087493896, "learning_rate": 5e-05, "loss": 1.5296, "step": 2981 }, { "epoch": 0.47719635141622657, "grad_norm": 0.3141612708568573, "learning_rate": 5e-05, "loss": 1.447, "step": 2982 }, { "epoch": 0.47735637702032324, "grad_norm": 0.32458919286727905, "learning_rate": 5e-05, "loss": 1.5163, "step": 2983 }, { "epoch": 0.4775164026244199, "grad_norm": 0.3113027811050415, "learning_rate": 5e-05, "loss": 1.4936, "step": 2984 }, { "epoch": 0.4776764282285166, "grad_norm": 0.33470040559768677, "learning_rate": 5e-05, "loss": 1.5881, "step": 2985 }, { "epoch": 0.4778364538326132, "grad_norm": 0.3167709708213806, "learning_rate": 5e-05, "loss": 1.55, "step": 2986 }, { "epoch": 0.47799647943670986, "grad_norm": 0.3092174530029297, "learning_rate": 5e-05, "loss": 1.5507, "step": 2987 }, { "epoch": 0.47815650504080653, "grad_norm": 0.32105615735054016, "learning_rate": 5e-05, "loss": 1.5545, "step": 2988 }, { "epoch": 0.4783165306449032, "grad_norm": 0.3318346440792084, "learning_rate": 5e-05, "loss": 1.6147, "step": 2989 }, { "epoch": 0.4784765562489998, "grad_norm": 0.330137699842453, "learning_rate": 5e-05, "loss": 1.5355, "step": 2990 }, { "epoch": 0.4786365818530965, "grad_norm": 0.3285491168498993, "learning_rate": 5e-05, "loss": 1.5112, "step": 2991 }, { "epoch": 0.47879660745719316, "grad_norm": 0.33091211318969727, "learning_rate": 5e-05, "loss": 1.5905, "step": 2992 }, { "epoch": 0.4789566330612898, "grad_norm": 0.3195870518684387, "learning_rate": 5e-05, "loss": 1.5304, "step": 2993 }, { "epoch": 0.47911665866538644, "grad_norm": 0.32016271352767944, "learning_rate": 5e-05, "loss": 1.4844, "step": 2994 }, { "epoch": 0.4792766842694831, "grad_norm": 0.327475368976593, "learning_rate": 5e-05, "loss": 1.4345, "step": 2995 }, { "epoch": 0.4794367098735798, "grad_norm": 0.32520338892936707, "learning_rate": 5e-05, "loss": 1.4684, "step": 2996 }, { "epoch": 0.47959673547767645, "grad_norm": 0.3130253255367279, "learning_rate": 5e-05, "loss": 1.4441, "step": 2997 }, { "epoch": 0.47975676108177306, "grad_norm": 0.31870928406715393, "learning_rate": 5e-05, "loss": 1.5023, "step": 2998 }, { "epoch": 0.47991678668586973, "grad_norm": 0.34060654044151306, "learning_rate": 5e-05, "loss": 1.4877, "step": 2999 }, { "epoch": 0.4800768122899664, "grad_norm": 0.3381800651550293, "learning_rate": 5e-05, "loss": 1.5773, "step": 3000 }, { "epoch": 0.4802368378940631, "grad_norm": 0.31705376505851746, "learning_rate": 5e-05, "loss": 1.4588, "step": 3001 }, { "epoch": 0.4803968634981597, "grad_norm": 0.32966428995132446, "learning_rate": 5e-05, "loss": 1.5555, "step": 3002 }, { "epoch": 0.48055688910225636, "grad_norm": 0.31934836506843567, "learning_rate": 5e-05, "loss": 1.5088, "step": 3003 }, { "epoch": 0.480716914706353, "grad_norm": 0.31602102518081665, "learning_rate": 5e-05, "loss": 1.5448, "step": 3004 }, { "epoch": 0.4808769403104497, "grad_norm": 0.32086122035980225, "learning_rate": 5e-05, "loss": 1.5388, "step": 3005 }, { "epoch": 0.4810369659145463, "grad_norm": 0.3303581774234772, "learning_rate": 5e-05, "loss": 1.4869, "step": 3006 }, { "epoch": 0.481196991518643, "grad_norm": 0.32214051485061646, "learning_rate": 5e-05, "loss": 1.5268, "step": 3007 }, { "epoch": 0.48135701712273965, "grad_norm": 0.33968067169189453, "learning_rate": 5e-05, "loss": 1.5763, "step": 3008 }, { "epoch": 0.4815170427268363, "grad_norm": 0.3195759057998657, "learning_rate": 5e-05, "loss": 1.4479, "step": 3009 }, { "epoch": 0.48167706833093293, "grad_norm": 0.3125060498714447, "learning_rate": 5e-05, "loss": 1.4775, "step": 3010 }, { "epoch": 0.4818370939350296, "grad_norm": 0.31918391585350037, "learning_rate": 5e-05, "loss": 1.4749, "step": 3011 }, { "epoch": 0.4819971195391263, "grad_norm": 0.3141189217567444, "learning_rate": 5e-05, "loss": 1.4915, "step": 3012 }, { "epoch": 0.48215714514322294, "grad_norm": 0.3494149446487427, "learning_rate": 5e-05, "loss": 1.4884, "step": 3013 }, { "epoch": 0.48231717074731956, "grad_norm": 0.3306373357772827, "learning_rate": 5e-05, "loss": 1.5822, "step": 3014 }, { "epoch": 0.4824771963514162, "grad_norm": 0.31847089529037476, "learning_rate": 5e-05, "loss": 1.5246, "step": 3015 }, { "epoch": 0.4826372219555129, "grad_norm": 0.32818347215652466, "learning_rate": 5e-05, "loss": 1.4928, "step": 3016 }, { "epoch": 0.4827972475596095, "grad_norm": 0.3245554566383362, "learning_rate": 5e-05, "loss": 1.5164, "step": 3017 }, { "epoch": 0.4829572731637062, "grad_norm": 0.3325593173503876, "learning_rate": 5e-05, "loss": 1.4978, "step": 3018 }, { "epoch": 0.48311729876780285, "grad_norm": 0.3468886613845825, "learning_rate": 5e-05, "loss": 1.516, "step": 3019 }, { "epoch": 0.4832773243718995, "grad_norm": 0.32546111941337585, "learning_rate": 5e-05, "loss": 1.5525, "step": 3020 }, { "epoch": 0.48343734997599613, "grad_norm": 0.3269898593425751, "learning_rate": 5e-05, "loss": 1.5017, "step": 3021 }, { "epoch": 0.4835973755800928, "grad_norm": 0.33421143889427185, "learning_rate": 5e-05, "loss": 1.5259, "step": 3022 }, { "epoch": 0.4837574011841895, "grad_norm": 0.31147444248199463, "learning_rate": 5e-05, "loss": 1.4169, "step": 3023 }, { "epoch": 0.48391742678828614, "grad_norm": 0.3260347545146942, "learning_rate": 5e-05, "loss": 1.5314, "step": 3024 }, { "epoch": 0.48407745239238276, "grad_norm": 0.32647064328193665, "learning_rate": 5e-05, "loss": 1.5558, "step": 3025 }, { "epoch": 0.4842374779964794, "grad_norm": 0.32549914717674255, "learning_rate": 5e-05, "loss": 1.5631, "step": 3026 }, { "epoch": 0.4843975036005761, "grad_norm": 0.33187490701675415, "learning_rate": 5e-05, "loss": 1.5396, "step": 3027 }, { "epoch": 0.48455752920467277, "grad_norm": 0.32923269271850586, "learning_rate": 5e-05, "loss": 1.5423, "step": 3028 }, { "epoch": 0.4847175548087694, "grad_norm": 0.3266642987728119, "learning_rate": 5e-05, "loss": 1.5391, "step": 3029 }, { "epoch": 0.48487758041286605, "grad_norm": 0.336105614900589, "learning_rate": 5e-05, "loss": 1.4923, "step": 3030 }, { "epoch": 0.4850376060169627, "grad_norm": 0.32492661476135254, "learning_rate": 5e-05, "loss": 1.4388, "step": 3031 }, { "epoch": 0.4851976316210594, "grad_norm": 0.31145530939102173, "learning_rate": 5e-05, "loss": 1.4325, "step": 3032 }, { "epoch": 0.485357657225156, "grad_norm": 0.31226664781570435, "learning_rate": 5e-05, "loss": 1.4989, "step": 3033 }, { "epoch": 0.4855176828292527, "grad_norm": 0.32355278730392456, "learning_rate": 5e-05, "loss": 1.5581, "step": 3034 }, { "epoch": 0.48567770843334934, "grad_norm": 0.32196441292762756, "learning_rate": 5e-05, "loss": 1.5045, "step": 3035 }, { "epoch": 0.485837734037446, "grad_norm": 0.3216044008731842, "learning_rate": 5e-05, "loss": 1.5359, "step": 3036 }, { "epoch": 0.4859977596415426, "grad_norm": 0.30936679244041443, "learning_rate": 5e-05, "loss": 1.4785, "step": 3037 }, { "epoch": 0.4861577852456393, "grad_norm": 0.31330329179763794, "learning_rate": 5e-05, "loss": 1.493, "step": 3038 }, { "epoch": 0.48631781084973597, "grad_norm": 0.32136744260787964, "learning_rate": 5e-05, "loss": 1.5056, "step": 3039 }, { "epoch": 0.48647783645383263, "grad_norm": 0.3176264464855194, "learning_rate": 5e-05, "loss": 1.5047, "step": 3040 }, { "epoch": 0.48663786205792925, "grad_norm": 0.3343697488307953, "learning_rate": 5e-05, "loss": 1.4907, "step": 3041 }, { "epoch": 0.4867978876620259, "grad_norm": 0.32425954937934875, "learning_rate": 5e-05, "loss": 1.5051, "step": 3042 }, { "epoch": 0.4869579132661226, "grad_norm": 0.3197120428085327, "learning_rate": 5e-05, "loss": 1.4781, "step": 3043 }, { "epoch": 0.48711793887021926, "grad_norm": 0.3267155885696411, "learning_rate": 5e-05, "loss": 1.6069, "step": 3044 }, { "epoch": 0.48727796447431587, "grad_norm": 0.320871502161026, "learning_rate": 5e-05, "loss": 1.4495, "step": 3045 }, { "epoch": 0.48743799007841254, "grad_norm": 0.31301993131637573, "learning_rate": 5e-05, "loss": 1.527, "step": 3046 }, { "epoch": 0.4875980156825092, "grad_norm": 0.31360816955566406, "learning_rate": 5e-05, "loss": 1.4951, "step": 3047 }, { "epoch": 0.4877580412866059, "grad_norm": 0.3248850107192993, "learning_rate": 5e-05, "loss": 1.5592, "step": 3048 }, { "epoch": 0.4879180668907025, "grad_norm": 0.31572195887565613, "learning_rate": 5e-05, "loss": 1.4654, "step": 3049 }, { "epoch": 0.48807809249479917, "grad_norm": 0.3230098485946655, "learning_rate": 5e-05, "loss": 1.5213, "step": 3050 }, { "epoch": 0.48823811809889583, "grad_norm": 0.3120233416557312, "learning_rate": 5e-05, "loss": 1.5442, "step": 3051 }, { "epoch": 0.4883981437029925, "grad_norm": 0.3267976939678192, "learning_rate": 5e-05, "loss": 1.5846, "step": 3052 }, { "epoch": 0.4885581693070891, "grad_norm": 0.32308757305145264, "learning_rate": 5e-05, "loss": 1.5111, "step": 3053 }, { "epoch": 0.4887181949111858, "grad_norm": 0.30462196469306946, "learning_rate": 5e-05, "loss": 1.4485, "step": 3054 }, { "epoch": 0.48887822051528246, "grad_norm": 0.3177090287208557, "learning_rate": 5e-05, "loss": 1.4271, "step": 3055 }, { "epoch": 0.4890382461193791, "grad_norm": 0.32540300488471985, "learning_rate": 5e-05, "loss": 1.5299, "step": 3056 }, { "epoch": 0.48919827172347574, "grad_norm": 0.31518784165382385, "learning_rate": 5e-05, "loss": 1.4568, "step": 3057 }, { "epoch": 0.4893582973275724, "grad_norm": 0.3357296884059906, "learning_rate": 5e-05, "loss": 1.5209, "step": 3058 }, { "epoch": 0.4895183229316691, "grad_norm": 0.3200496733188629, "learning_rate": 5e-05, "loss": 1.5318, "step": 3059 }, { "epoch": 0.4896783485357657, "grad_norm": 0.32350587844848633, "learning_rate": 5e-05, "loss": 1.5099, "step": 3060 }, { "epoch": 0.48983837413986236, "grad_norm": 0.32303386926651, "learning_rate": 5e-05, "loss": 1.4882, "step": 3061 }, { "epoch": 0.48999839974395903, "grad_norm": 0.3302907347679138, "learning_rate": 5e-05, "loss": 1.5444, "step": 3062 }, { "epoch": 0.4901584253480557, "grad_norm": 0.3144305348396301, "learning_rate": 5e-05, "loss": 1.4602, "step": 3063 }, { "epoch": 0.4903184509521523, "grad_norm": 0.32994478940963745, "learning_rate": 5e-05, "loss": 1.5968, "step": 3064 }, { "epoch": 0.490478476556249, "grad_norm": 0.3266187906265259, "learning_rate": 5e-05, "loss": 1.5291, "step": 3065 }, { "epoch": 0.49063850216034566, "grad_norm": 0.3183164596557617, "learning_rate": 5e-05, "loss": 1.3902, "step": 3066 }, { "epoch": 0.4907985277644423, "grad_norm": 0.31479793787002563, "learning_rate": 5e-05, "loss": 1.491, "step": 3067 }, { "epoch": 0.49095855336853894, "grad_norm": 0.3144097924232483, "learning_rate": 5e-05, "loss": 1.4251, "step": 3068 }, { "epoch": 0.4911185789726356, "grad_norm": 0.3233608305454254, "learning_rate": 5e-05, "loss": 1.5079, "step": 3069 }, { "epoch": 0.4912786045767323, "grad_norm": 0.30764567852020264, "learning_rate": 5e-05, "loss": 1.4944, "step": 3070 }, { "epoch": 0.49143863018082895, "grad_norm": 0.32447779178619385, "learning_rate": 5e-05, "loss": 1.5707, "step": 3071 }, { "epoch": 0.49159865578492556, "grad_norm": 0.3195626735687256, "learning_rate": 5e-05, "loss": 1.4912, "step": 3072 }, { "epoch": 0.49175868138902223, "grad_norm": 0.32525256276130676, "learning_rate": 5e-05, "loss": 1.4973, "step": 3073 }, { "epoch": 0.4919187069931189, "grad_norm": 0.3162990212440491, "learning_rate": 5e-05, "loss": 1.4603, "step": 3074 }, { "epoch": 0.4920787325972156, "grad_norm": 0.31124886870384216, "learning_rate": 5e-05, "loss": 1.4566, "step": 3075 }, { "epoch": 0.4922387582013122, "grad_norm": 0.31020358204841614, "learning_rate": 5e-05, "loss": 1.5156, "step": 3076 }, { "epoch": 0.49239878380540886, "grad_norm": 0.3189070224761963, "learning_rate": 5e-05, "loss": 1.4909, "step": 3077 }, { "epoch": 0.4925588094095055, "grad_norm": 0.3160281777381897, "learning_rate": 5e-05, "loss": 1.4871, "step": 3078 }, { "epoch": 0.4927188350136022, "grad_norm": 0.32203781604766846, "learning_rate": 5e-05, "loss": 1.4912, "step": 3079 }, { "epoch": 0.4928788606176988, "grad_norm": 0.3290347754955292, "learning_rate": 5e-05, "loss": 1.4902, "step": 3080 }, { "epoch": 0.4930388862217955, "grad_norm": 0.3405555784702301, "learning_rate": 5e-05, "loss": 1.5468, "step": 3081 }, { "epoch": 0.49319891182589215, "grad_norm": 0.3255423307418823, "learning_rate": 5e-05, "loss": 1.4644, "step": 3082 }, { "epoch": 0.4933589374299888, "grad_norm": 0.3318719267845154, "learning_rate": 5e-05, "loss": 1.5251, "step": 3083 }, { "epoch": 0.49351896303408543, "grad_norm": 0.32673266530036926, "learning_rate": 5e-05, "loss": 1.4622, "step": 3084 }, { "epoch": 0.4936789886381821, "grad_norm": 0.32226428389549255, "learning_rate": 5e-05, "loss": 1.5168, "step": 3085 }, { "epoch": 0.4938390142422788, "grad_norm": 0.3197799324989319, "learning_rate": 5e-05, "loss": 1.432, "step": 3086 }, { "epoch": 0.49399903984637544, "grad_norm": 0.3227764964103699, "learning_rate": 5e-05, "loss": 1.4709, "step": 3087 }, { "epoch": 0.49415906545047206, "grad_norm": 0.31877946853637695, "learning_rate": 5e-05, "loss": 1.4656, "step": 3088 }, { "epoch": 0.4943190910545687, "grad_norm": 0.32793331146240234, "learning_rate": 5e-05, "loss": 1.4815, "step": 3089 }, { "epoch": 0.4944791166586654, "grad_norm": 0.31298989057540894, "learning_rate": 5e-05, "loss": 1.452, "step": 3090 }, { "epoch": 0.49463914226276207, "grad_norm": 0.34232309460639954, "learning_rate": 5e-05, "loss": 1.536, "step": 3091 }, { "epoch": 0.4947991678668587, "grad_norm": 0.3356248140335083, "learning_rate": 5e-05, "loss": 1.4928, "step": 3092 }, { "epoch": 0.49495919347095535, "grad_norm": 0.3287542760372162, "learning_rate": 5e-05, "loss": 1.5555, "step": 3093 }, { "epoch": 0.495119219075052, "grad_norm": 0.32796794176101685, "learning_rate": 5e-05, "loss": 1.495, "step": 3094 }, { "epoch": 0.4952792446791487, "grad_norm": 0.3194712996482849, "learning_rate": 5e-05, "loss": 1.4908, "step": 3095 }, { "epoch": 0.4954392702832453, "grad_norm": 0.3276445269584656, "learning_rate": 5e-05, "loss": 1.5241, "step": 3096 }, { "epoch": 0.495599295887342, "grad_norm": 0.3383883833885193, "learning_rate": 5e-05, "loss": 1.5683, "step": 3097 }, { "epoch": 0.49575932149143864, "grad_norm": 0.3213694989681244, "learning_rate": 5e-05, "loss": 1.5035, "step": 3098 }, { "epoch": 0.4959193470955353, "grad_norm": 0.3293825387954712, "learning_rate": 5e-05, "loss": 1.4854, "step": 3099 }, { "epoch": 0.4960793726996319, "grad_norm": 0.3212741017341614, "learning_rate": 5e-05, "loss": 1.4303, "step": 3100 }, { "epoch": 0.4962393983037286, "grad_norm": 0.3277633488178253, "learning_rate": 5e-05, "loss": 1.4858, "step": 3101 }, { "epoch": 0.49639942390782527, "grad_norm": 0.32029470801353455, "learning_rate": 5e-05, "loss": 1.5401, "step": 3102 }, { "epoch": 0.4965594495119219, "grad_norm": 0.34076452255249023, "learning_rate": 5e-05, "loss": 1.5607, "step": 3103 }, { "epoch": 0.49671947511601855, "grad_norm": 0.3339100182056427, "learning_rate": 5e-05, "loss": 1.4708, "step": 3104 }, { "epoch": 0.4968795007201152, "grad_norm": 0.3227928876876831, "learning_rate": 5e-05, "loss": 1.5125, "step": 3105 }, { "epoch": 0.4970395263242119, "grad_norm": 0.32770898938179016, "learning_rate": 5e-05, "loss": 1.5717, "step": 3106 }, { "epoch": 0.4971995519283085, "grad_norm": 0.32331618666648865, "learning_rate": 5e-05, "loss": 1.503, "step": 3107 }, { "epoch": 0.4973595775324052, "grad_norm": 0.3385186493396759, "learning_rate": 5e-05, "loss": 1.5324, "step": 3108 }, { "epoch": 0.49751960313650184, "grad_norm": 0.3089561462402344, "learning_rate": 5e-05, "loss": 1.4703, "step": 3109 }, { "epoch": 0.4976796287405985, "grad_norm": 0.3213292956352234, "learning_rate": 5e-05, "loss": 1.4794, "step": 3110 }, { "epoch": 0.4978396543446951, "grad_norm": 0.319109708070755, "learning_rate": 5e-05, "loss": 1.4411, "step": 3111 }, { "epoch": 0.4979996799487918, "grad_norm": 0.3314836621284485, "learning_rate": 5e-05, "loss": 1.4976, "step": 3112 }, { "epoch": 0.49815970555288847, "grad_norm": 0.3185053765773773, "learning_rate": 5e-05, "loss": 1.4246, "step": 3113 }, { "epoch": 0.49831973115698514, "grad_norm": 0.3209887444972992, "learning_rate": 5e-05, "loss": 1.4759, "step": 3114 }, { "epoch": 0.49847975676108175, "grad_norm": 0.3346211910247803, "learning_rate": 5e-05, "loss": 1.5211, "step": 3115 }, { "epoch": 0.4986397823651784, "grad_norm": 0.33026131987571716, "learning_rate": 5e-05, "loss": 1.451, "step": 3116 }, { "epoch": 0.4987998079692751, "grad_norm": 0.3316846489906311, "learning_rate": 5e-05, "loss": 1.5215, "step": 3117 }, { "epoch": 0.49895983357337176, "grad_norm": 0.33092841506004333, "learning_rate": 5e-05, "loss": 1.4757, "step": 3118 }, { "epoch": 0.4991198591774684, "grad_norm": 0.3375653028488159, "learning_rate": 5e-05, "loss": 1.5301, "step": 3119 }, { "epoch": 0.49927988478156504, "grad_norm": 0.3221588730812073, "learning_rate": 5e-05, "loss": 1.4636, "step": 3120 }, { "epoch": 0.4994399103856617, "grad_norm": 0.33880728483200073, "learning_rate": 5e-05, "loss": 1.4875, "step": 3121 }, { "epoch": 0.4995999359897584, "grad_norm": 0.3116314113140106, "learning_rate": 5e-05, "loss": 1.532, "step": 3122 }, { "epoch": 0.499759961593855, "grad_norm": 0.32921460270881653, "learning_rate": 5e-05, "loss": 1.4615, "step": 3123 }, { "epoch": 0.49991998719795167, "grad_norm": 0.33576518297195435, "learning_rate": 5e-05, "loss": 1.4488, "step": 3124 }, { "epoch": 0.5000800128020483, "grad_norm": 0.3084135055541992, "learning_rate": 5e-05, "loss": 1.4434, "step": 3125 }, { "epoch": 0.500240038406145, "grad_norm": 0.3275096118450165, "learning_rate": 5e-05, "loss": 1.5211, "step": 3126 }, { "epoch": 0.5004000640102416, "grad_norm": 0.32767266035079956, "learning_rate": 5e-05, "loss": 1.5268, "step": 3127 }, { "epoch": 0.5005600896143383, "grad_norm": 0.3190962076187134, "learning_rate": 5e-05, "loss": 1.5059, "step": 3128 }, { "epoch": 0.500720115218435, "grad_norm": 0.33198216557502747, "learning_rate": 5e-05, "loss": 1.4748, "step": 3129 }, { "epoch": 0.5008801408225316, "grad_norm": 0.33191636204719543, "learning_rate": 5e-05, "loss": 1.5846, "step": 3130 }, { "epoch": 0.5010401664266283, "grad_norm": 0.31920331716537476, "learning_rate": 5e-05, "loss": 1.4976, "step": 3131 }, { "epoch": 0.501200192030725, "grad_norm": 0.33464816212654114, "learning_rate": 5e-05, "loss": 1.5037, "step": 3132 }, { "epoch": 0.5013602176348215, "grad_norm": 0.3272404670715332, "learning_rate": 5e-05, "loss": 1.486, "step": 3133 }, { "epoch": 0.5015202432389182, "grad_norm": 0.3315878212451935, "learning_rate": 5e-05, "loss": 1.4833, "step": 3134 }, { "epoch": 0.5016802688430149, "grad_norm": 0.32489141821861267, "learning_rate": 5e-05, "loss": 1.5616, "step": 3135 }, { "epoch": 0.5018402944471115, "grad_norm": 0.32205134630203247, "learning_rate": 5e-05, "loss": 1.4861, "step": 3136 }, { "epoch": 0.5020003200512082, "grad_norm": 0.3257125914096832, "learning_rate": 5e-05, "loss": 1.559, "step": 3137 }, { "epoch": 0.5021603456553049, "grad_norm": 0.3377113938331604, "learning_rate": 5e-05, "loss": 1.564, "step": 3138 }, { "epoch": 0.5023203712594015, "grad_norm": 0.32932719588279724, "learning_rate": 5e-05, "loss": 1.5055, "step": 3139 }, { "epoch": 0.5024803968634982, "grad_norm": 0.32438957691192627, "learning_rate": 5e-05, "loss": 1.4604, "step": 3140 }, { "epoch": 0.5026404224675948, "grad_norm": 0.31944605708122253, "learning_rate": 5e-05, "loss": 1.5187, "step": 3141 }, { "epoch": 0.5028004480716914, "grad_norm": 0.3136952817440033, "learning_rate": 5e-05, "loss": 1.4958, "step": 3142 }, { "epoch": 0.5029604736757881, "grad_norm": 0.3158091604709625, "learning_rate": 5e-05, "loss": 1.5388, "step": 3143 }, { "epoch": 0.5031204992798848, "grad_norm": 0.32455602288246155, "learning_rate": 5e-05, "loss": 1.4626, "step": 3144 }, { "epoch": 0.5032805248839815, "grad_norm": 0.3204946219921112, "learning_rate": 5e-05, "loss": 1.5031, "step": 3145 }, { "epoch": 0.5034405504880781, "grad_norm": 0.32806602120399475, "learning_rate": 5e-05, "loss": 1.4796, "step": 3146 }, { "epoch": 0.5036005760921748, "grad_norm": 0.33311596512794495, "learning_rate": 5e-05, "loss": 1.4935, "step": 3147 }, { "epoch": 0.5037606016962713, "grad_norm": 0.3309608995914459, "learning_rate": 5e-05, "loss": 1.5608, "step": 3148 }, { "epoch": 0.503920627300368, "grad_norm": 0.31030237674713135, "learning_rate": 5e-05, "loss": 1.4166, "step": 3149 }, { "epoch": 0.5040806529044647, "grad_norm": 0.3192208409309387, "learning_rate": 5e-05, "loss": 1.526, "step": 3150 }, { "epoch": 0.5042406785085614, "grad_norm": 0.335277795791626, "learning_rate": 5e-05, "loss": 1.4437, "step": 3151 }, { "epoch": 0.504400704112658, "grad_norm": 0.3124740719795227, "learning_rate": 5e-05, "loss": 1.3962, "step": 3152 }, { "epoch": 0.5045607297167547, "grad_norm": 0.3444817066192627, "learning_rate": 5e-05, "loss": 1.5395, "step": 3153 }, { "epoch": 0.5047207553208514, "grad_norm": 0.32496964931488037, "learning_rate": 5e-05, "loss": 1.4734, "step": 3154 }, { "epoch": 0.504880780924948, "grad_norm": 0.3280503749847412, "learning_rate": 5e-05, "loss": 1.5224, "step": 3155 }, { "epoch": 0.5050408065290446, "grad_norm": 0.3214718699455261, "learning_rate": 5e-05, "loss": 1.5304, "step": 3156 }, { "epoch": 0.5052008321331413, "grad_norm": 0.33191531896591187, "learning_rate": 5e-05, "loss": 1.5192, "step": 3157 }, { "epoch": 0.5053608577372379, "grad_norm": 0.3235267698764801, "learning_rate": 5e-05, "loss": 1.5051, "step": 3158 }, { "epoch": 0.5055208833413346, "grad_norm": 0.32874053716659546, "learning_rate": 5e-05, "loss": 1.4461, "step": 3159 }, { "epoch": 0.5056809089454313, "grad_norm": 0.32418426871299744, "learning_rate": 5e-05, "loss": 1.4762, "step": 3160 }, { "epoch": 0.5058409345495279, "grad_norm": 0.3332507014274597, "learning_rate": 5e-05, "loss": 1.502, "step": 3161 }, { "epoch": 0.5060009601536246, "grad_norm": 0.3206436038017273, "learning_rate": 5e-05, "loss": 1.516, "step": 3162 }, { "epoch": 0.5061609857577213, "grad_norm": 0.3256309926509857, "learning_rate": 5e-05, "loss": 1.5086, "step": 3163 }, { "epoch": 0.5063210113618178, "grad_norm": 0.3360174596309662, "learning_rate": 5e-05, "loss": 1.5296, "step": 3164 }, { "epoch": 0.5064810369659145, "grad_norm": 0.3377054035663605, "learning_rate": 5e-05, "loss": 1.4904, "step": 3165 }, { "epoch": 0.5066410625700112, "grad_norm": 0.3562556803226471, "learning_rate": 5e-05, "loss": 1.5773, "step": 3166 }, { "epoch": 0.5068010881741078, "grad_norm": 0.3260129392147064, "learning_rate": 5e-05, "loss": 1.5135, "step": 3167 }, { "epoch": 0.5069611137782045, "grad_norm": 0.33310773968696594, "learning_rate": 5e-05, "loss": 1.4712, "step": 3168 }, { "epoch": 0.5071211393823012, "grad_norm": 0.3337329030036926, "learning_rate": 5e-05, "loss": 1.5542, "step": 3169 }, { "epoch": 0.5072811649863979, "grad_norm": 0.32173365354537964, "learning_rate": 5e-05, "loss": 1.4451, "step": 3170 }, { "epoch": 0.5074411905904945, "grad_norm": 0.33581990003585815, "learning_rate": 5e-05, "loss": 1.5436, "step": 3171 }, { "epoch": 0.5076012161945911, "grad_norm": 0.323356032371521, "learning_rate": 5e-05, "loss": 1.4949, "step": 3172 }, { "epoch": 0.5077612417986878, "grad_norm": 0.3209819793701172, "learning_rate": 5e-05, "loss": 1.5164, "step": 3173 }, { "epoch": 0.5079212674027844, "grad_norm": 0.34109869599342346, "learning_rate": 5e-05, "loss": 1.4901, "step": 3174 }, { "epoch": 0.5080812930068811, "grad_norm": 0.32823705673217773, "learning_rate": 5e-05, "loss": 1.4497, "step": 3175 }, { "epoch": 0.5082413186109778, "grad_norm": 0.32901859283447266, "learning_rate": 5e-05, "loss": 1.4722, "step": 3176 }, { "epoch": 0.5084013442150744, "grad_norm": 0.341512531042099, "learning_rate": 5e-05, "loss": 1.5665, "step": 3177 }, { "epoch": 0.5085613698191711, "grad_norm": 0.31593918800354004, "learning_rate": 5e-05, "loss": 1.4886, "step": 3178 }, { "epoch": 0.5087213954232678, "grad_norm": 0.329998254776001, "learning_rate": 5e-05, "loss": 1.529, "step": 3179 }, { "epoch": 0.5088814210273643, "grad_norm": 0.33089426159858704, "learning_rate": 5e-05, "loss": 1.5089, "step": 3180 }, { "epoch": 0.509041446631461, "grad_norm": 0.32422947883605957, "learning_rate": 5e-05, "loss": 1.456, "step": 3181 }, { "epoch": 0.5092014722355577, "grad_norm": 0.3226728141307831, "learning_rate": 5e-05, "loss": 1.4405, "step": 3182 }, { "epoch": 0.5093614978396543, "grad_norm": 0.3393298387527466, "learning_rate": 5e-05, "loss": 1.4652, "step": 3183 }, { "epoch": 0.509521523443751, "grad_norm": 0.3241175413131714, "learning_rate": 5e-05, "loss": 1.5118, "step": 3184 }, { "epoch": 0.5096815490478477, "grad_norm": 0.32050973176956177, "learning_rate": 5e-05, "loss": 1.4928, "step": 3185 }, { "epoch": 0.5098415746519444, "grad_norm": 0.3343556225299835, "learning_rate": 5e-05, "loss": 1.5495, "step": 3186 }, { "epoch": 0.510001600256041, "grad_norm": 0.34144628047943115, "learning_rate": 5e-05, "loss": 1.4994, "step": 3187 }, { "epoch": 0.5101616258601376, "grad_norm": 0.32137954235076904, "learning_rate": 5e-05, "loss": 1.5158, "step": 3188 }, { "epoch": 0.5103216514642342, "grad_norm": 0.33134615421295166, "learning_rate": 5e-05, "loss": 1.5065, "step": 3189 }, { "epoch": 0.5104816770683309, "grad_norm": 0.341030091047287, "learning_rate": 5e-05, "loss": 1.5417, "step": 3190 }, { "epoch": 0.5106417026724276, "grad_norm": 0.3215707838535309, "learning_rate": 5e-05, "loss": 1.503, "step": 3191 }, { "epoch": 0.5108017282765243, "grad_norm": 0.34073546528816223, "learning_rate": 5e-05, "loss": 1.6242, "step": 3192 }, { "epoch": 0.5109617538806209, "grad_norm": 0.3423992097377777, "learning_rate": 5e-05, "loss": 1.5188, "step": 3193 }, { "epoch": 0.5111217794847176, "grad_norm": 0.3456670343875885, "learning_rate": 5e-05, "loss": 1.6472, "step": 3194 }, { "epoch": 0.5112818050888142, "grad_norm": 0.315856009721756, "learning_rate": 5e-05, "loss": 1.4972, "step": 3195 }, { "epoch": 0.5114418306929108, "grad_norm": 0.3148317039012909, "learning_rate": 5e-05, "loss": 1.3974, "step": 3196 }, { "epoch": 0.5116018562970075, "grad_norm": 0.32875242829322815, "learning_rate": 5e-05, "loss": 1.4635, "step": 3197 }, { "epoch": 0.5117618819011042, "grad_norm": 0.31963926553726196, "learning_rate": 5e-05, "loss": 1.4115, "step": 3198 }, { "epoch": 0.5119219075052008, "grad_norm": 0.3200531005859375, "learning_rate": 5e-05, "loss": 1.4923, "step": 3199 }, { "epoch": 0.5120819331092975, "grad_norm": 0.3212439715862274, "learning_rate": 5e-05, "loss": 1.5234, "step": 3200 }, { "epoch": 0.5122419587133942, "grad_norm": 0.33300289511680603, "learning_rate": 5e-05, "loss": 1.4883, "step": 3201 }, { "epoch": 0.5124019843174908, "grad_norm": 0.3309611976146698, "learning_rate": 5e-05, "loss": 1.4883, "step": 3202 }, { "epoch": 0.5125620099215874, "grad_norm": 0.32228371500968933, "learning_rate": 5e-05, "loss": 1.501, "step": 3203 }, { "epoch": 0.5127220355256841, "grad_norm": 0.33037006855010986, "learning_rate": 5e-05, "loss": 1.4671, "step": 3204 }, { "epoch": 0.5128820611297807, "grad_norm": 0.32929086685180664, "learning_rate": 5e-05, "loss": 1.5337, "step": 3205 }, { "epoch": 0.5130420867338774, "grad_norm": 0.3320816457271576, "learning_rate": 5e-05, "loss": 1.5584, "step": 3206 }, { "epoch": 0.5132021123379741, "grad_norm": 0.3335808217525482, "learning_rate": 5e-05, "loss": 1.5245, "step": 3207 }, { "epoch": 0.5133621379420708, "grad_norm": 0.3172411322593689, "learning_rate": 5e-05, "loss": 1.4798, "step": 3208 }, { "epoch": 0.5135221635461674, "grad_norm": 0.3220973014831543, "learning_rate": 5e-05, "loss": 1.549, "step": 3209 }, { "epoch": 0.5136821891502641, "grad_norm": 0.32744401693344116, "learning_rate": 5e-05, "loss": 1.5057, "step": 3210 }, { "epoch": 0.5138422147543606, "grad_norm": 0.34558841586112976, "learning_rate": 5e-05, "loss": 1.5819, "step": 3211 }, { "epoch": 0.5140022403584573, "grad_norm": 0.31728169322013855, "learning_rate": 5e-05, "loss": 1.5342, "step": 3212 }, { "epoch": 0.514162265962554, "grad_norm": 0.3226095139980316, "learning_rate": 5e-05, "loss": 1.4508, "step": 3213 }, { "epoch": 0.5143222915666507, "grad_norm": 0.3391305208206177, "learning_rate": 5e-05, "loss": 1.5704, "step": 3214 }, { "epoch": 0.5144823171707473, "grad_norm": 0.3277122676372528, "learning_rate": 5e-05, "loss": 1.5589, "step": 3215 }, { "epoch": 0.514642342774844, "grad_norm": 0.3321702480316162, "learning_rate": 5e-05, "loss": 1.5004, "step": 3216 }, { "epoch": 0.5148023683789407, "grad_norm": 0.32103145122528076, "learning_rate": 5e-05, "loss": 1.5395, "step": 3217 }, { "epoch": 0.5149623939830373, "grad_norm": 0.321310818195343, "learning_rate": 5e-05, "loss": 1.4879, "step": 3218 }, { "epoch": 0.5151224195871339, "grad_norm": 0.3275834619998932, "learning_rate": 5e-05, "loss": 1.5456, "step": 3219 }, { "epoch": 0.5152824451912306, "grad_norm": 0.3167651295661926, "learning_rate": 5e-05, "loss": 1.4431, "step": 3220 }, { "epoch": 0.5154424707953272, "grad_norm": 0.3503282368183136, "learning_rate": 5e-05, "loss": 1.5721, "step": 3221 }, { "epoch": 0.5156024963994239, "grad_norm": 0.3224450349807739, "learning_rate": 5e-05, "loss": 1.4841, "step": 3222 }, { "epoch": 0.5157625220035206, "grad_norm": 0.33248016238212585, "learning_rate": 5e-05, "loss": 1.5318, "step": 3223 }, { "epoch": 0.5159225476076172, "grad_norm": 0.3333366811275482, "learning_rate": 5e-05, "loss": 1.4683, "step": 3224 }, { "epoch": 0.5160825732117139, "grad_norm": 0.3370986580848694, "learning_rate": 5e-05, "loss": 1.5332, "step": 3225 }, { "epoch": 0.5162425988158106, "grad_norm": 0.3124665319919586, "learning_rate": 5e-05, "loss": 1.5073, "step": 3226 }, { "epoch": 0.5164026244199071, "grad_norm": 0.34155797958374023, "learning_rate": 5e-05, "loss": 1.4559, "step": 3227 }, { "epoch": 0.5165626500240038, "grad_norm": 0.33211779594421387, "learning_rate": 5e-05, "loss": 1.4573, "step": 3228 }, { "epoch": 0.5167226756281005, "grad_norm": 0.33653712272644043, "learning_rate": 5e-05, "loss": 1.5379, "step": 3229 }, { "epoch": 0.5168827012321972, "grad_norm": 0.3266640901565552, "learning_rate": 5e-05, "loss": 1.5412, "step": 3230 }, { "epoch": 0.5170427268362938, "grad_norm": 0.3408679664134979, "learning_rate": 5e-05, "loss": 1.514, "step": 3231 }, { "epoch": 0.5172027524403905, "grad_norm": 0.33375823497772217, "learning_rate": 5e-05, "loss": 1.5237, "step": 3232 }, { "epoch": 0.5173627780444872, "grad_norm": 0.32451510429382324, "learning_rate": 5e-05, "loss": 1.5191, "step": 3233 }, { "epoch": 0.5175228036485837, "grad_norm": 0.32183319330215454, "learning_rate": 5e-05, "loss": 1.4947, "step": 3234 }, { "epoch": 0.5176828292526804, "grad_norm": 0.31340447068214417, "learning_rate": 5e-05, "loss": 1.4527, "step": 3235 }, { "epoch": 0.5178428548567771, "grad_norm": 0.3290766775608063, "learning_rate": 5e-05, "loss": 1.5059, "step": 3236 }, { "epoch": 0.5180028804608737, "grad_norm": 0.32842519879341125, "learning_rate": 5e-05, "loss": 1.5071, "step": 3237 }, { "epoch": 0.5181629060649704, "grad_norm": 0.32282769680023193, "learning_rate": 5e-05, "loss": 1.4798, "step": 3238 }, { "epoch": 0.5183229316690671, "grad_norm": 0.3189011514186859, "learning_rate": 5e-05, "loss": 1.4536, "step": 3239 }, { "epoch": 0.5184829572731637, "grad_norm": 0.40332260727882385, "learning_rate": 5e-05, "loss": 1.5163, "step": 3240 }, { "epoch": 0.5186429828772604, "grad_norm": 0.32846400141716003, "learning_rate": 5e-05, "loss": 1.5234, "step": 3241 }, { "epoch": 0.518803008481357, "grad_norm": 0.33376023173332214, "learning_rate": 5e-05, "loss": 1.5577, "step": 3242 }, { "epoch": 0.5189630340854536, "grad_norm": 0.3325027525424957, "learning_rate": 5e-05, "loss": 1.5727, "step": 3243 }, { "epoch": 0.5191230596895503, "grad_norm": 0.3401384949684143, "learning_rate": 5e-05, "loss": 1.497, "step": 3244 }, { "epoch": 0.519283085293647, "grad_norm": 0.31952592730522156, "learning_rate": 5e-05, "loss": 1.4631, "step": 3245 }, { "epoch": 0.5194431108977436, "grad_norm": 0.3222770690917969, "learning_rate": 5e-05, "loss": 1.4836, "step": 3246 }, { "epoch": 0.5196031365018403, "grad_norm": 0.31881383061408997, "learning_rate": 5e-05, "loss": 1.4166, "step": 3247 }, { "epoch": 0.519763162105937, "grad_norm": 0.32017451524734497, "learning_rate": 5e-05, "loss": 1.5284, "step": 3248 }, { "epoch": 0.5199231877100337, "grad_norm": 0.31976577639579773, "learning_rate": 5e-05, "loss": 1.4455, "step": 3249 }, { "epoch": 0.5200832133141302, "grad_norm": 0.3299669325351715, "learning_rate": 5e-05, "loss": 1.5211, "step": 3250 }, { "epoch": 0.5202432389182269, "grad_norm": 0.32312923669815063, "learning_rate": 5e-05, "loss": 1.5413, "step": 3251 }, { "epoch": 0.5204032645223235, "grad_norm": 0.3457547426223755, "learning_rate": 5e-05, "loss": 1.6634, "step": 3252 }, { "epoch": 0.5205632901264202, "grad_norm": 0.31551462411880493, "learning_rate": 5e-05, "loss": 1.4607, "step": 3253 }, { "epoch": 0.5207233157305169, "grad_norm": 0.3287838101387024, "learning_rate": 5e-05, "loss": 1.455, "step": 3254 }, { "epoch": 0.5208833413346136, "grad_norm": 0.32620492577552795, "learning_rate": 5e-05, "loss": 1.4835, "step": 3255 }, { "epoch": 0.5210433669387102, "grad_norm": 0.3149770200252533, "learning_rate": 5e-05, "loss": 1.5141, "step": 3256 }, { "epoch": 0.5212033925428069, "grad_norm": 0.336549311876297, "learning_rate": 5e-05, "loss": 1.5013, "step": 3257 }, { "epoch": 0.5213634181469035, "grad_norm": 0.34889930486679077, "learning_rate": 5e-05, "loss": 1.5775, "step": 3258 }, { "epoch": 0.5215234437510001, "grad_norm": 0.33330556750297546, "learning_rate": 5e-05, "loss": 1.4682, "step": 3259 }, { "epoch": 0.5216834693550968, "grad_norm": 0.33784154057502747, "learning_rate": 5e-05, "loss": 1.5069, "step": 3260 }, { "epoch": 0.5218434949591935, "grad_norm": 0.3221644461154938, "learning_rate": 5e-05, "loss": 1.4345, "step": 3261 }, { "epoch": 0.5220035205632901, "grad_norm": 0.33483684062957764, "learning_rate": 5e-05, "loss": 1.5898, "step": 3262 }, { "epoch": 0.5221635461673868, "grad_norm": 0.3252618908882141, "learning_rate": 5e-05, "loss": 1.5147, "step": 3263 }, { "epoch": 0.5223235717714835, "grad_norm": 0.3238481879234314, "learning_rate": 5e-05, "loss": 1.5057, "step": 3264 }, { "epoch": 0.5224835973755801, "grad_norm": 0.3304922878742218, "learning_rate": 5e-05, "loss": 1.4772, "step": 3265 }, { "epoch": 0.5226436229796767, "grad_norm": 0.31975027918815613, "learning_rate": 5e-05, "loss": 1.4842, "step": 3266 }, { "epoch": 0.5228036485837734, "grad_norm": 0.3296407461166382, "learning_rate": 5e-05, "loss": 1.5606, "step": 3267 }, { "epoch": 0.52296367418787, "grad_norm": 0.31786298751831055, "learning_rate": 5e-05, "loss": 1.4806, "step": 3268 }, { "epoch": 0.5231236997919667, "grad_norm": 0.31829896569252014, "learning_rate": 5e-05, "loss": 1.512, "step": 3269 }, { "epoch": 0.5232837253960634, "grad_norm": 0.5290788412094116, "learning_rate": 5e-05, "loss": 1.4645, "step": 3270 }, { "epoch": 0.52344375100016, "grad_norm": 0.3350558876991272, "learning_rate": 5e-05, "loss": 1.541, "step": 3271 }, { "epoch": 0.5236037766042567, "grad_norm": 0.33788779377937317, "learning_rate": 5e-05, "loss": 1.4519, "step": 3272 }, { "epoch": 0.5237638022083534, "grad_norm": 0.3164636194705963, "learning_rate": 5e-05, "loss": 1.4219, "step": 3273 }, { "epoch": 0.52392382781245, "grad_norm": 0.32854804396629333, "learning_rate": 5e-05, "loss": 1.5212, "step": 3274 }, { "epoch": 0.5240838534165466, "grad_norm": 0.36806172132492065, "learning_rate": 5e-05, "loss": 1.5515, "step": 3275 }, { "epoch": 0.5242438790206433, "grad_norm": 0.3477180004119873, "learning_rate": 5e-05, "loss": 1.5799, "step": 3276 }, { "epoch": 0.52440390462474, "grad_norm": 0.3238092064857483, "learning_rate": 5e-05, "loss": 1.536, "step": 3277 }, { "epoch": 0.5245639302288366, "grad_norm": 0.32570400834083557, "learning_rate": 5e-05, "loss": 1.4031, "step": 3278 }, { "epoch": 0.5247239558329333, "grad_norm": 0.31676438450813293, "learning_rate": 5e-05, "loss": 1.4498, "step": 3279 }, { "epoch": 0.52488398143703, "grad_norm": 0.3141888380050659, "learning_rate": 5e-05, "loss": 1.4348, "step": 3280 }, { "epoch": 0.5250440070411265, "grad_norm": 0.32681867480278015, "learning_rate": 5e-05, "loss": 1.433, "step": 3281 }, { "epoch": 0.5252040326452232, "grad_norm": 0.3198162019252777, "learning_rate": 5e-05, "loss": 1.5127, "step": 3282 }, { "epoch": 0.5253640582493199, "grad_norm": 0.3272474408149719, "learning_rate": 5e-05, "loss": 1.5238, "step": 3283 }, { "epoch": 0.5255240838534165, "grad_norm": 0.3248160779476166, "learning_rate": 5e-05, "loss": 1.4828, "step": 3284 }, { "epoch": 0.5256841094575132, "grad_norm": 0.3266875743865967, "learning_rate": 5e-05, "loss": 1.5177, "step": 3285 }, { "epoch": 0.5258441350616099, "grad_norm": 0.3236526846885681, "learning_rate": 5e-05, "loss": 1.4533, "step": 3286 }, { "epoch": 0.5260041606657065, "grad_norm": 0.3291437327861786, "learning_rate": 5e-05, "loss": 1.5482, "step": 3287 }, { "epoch": 0.5261641862698032, "grad_norm": 0.33180370926856995, "learning_rate": 5e-05, "loss": 1.4842, "step": 3288 }, { "epoch": 0.5263242118738998, "grad_norm": 0.3393414318561554, "learning_rate": 5e-05, "loss": 1.5347, "step": 3289 }, { "epoch": 0.5264842374779964, "grad_norm": 0.3196313679218292, "learning_rate": 5e-05, "loss": 1.4888, "step": 3290 }, { "epoch": 0.5266442630820931, "grad_norm": 0.316313773393631, "learning_rate": 5e-05, "loss": 1.5301, "step": 3291 }, { "epoch": 0.5268042886861898, "grad_norm": 0.32693377137184143, "learning_rate": 5e-05, "loss": 1.4728, "step": 3292 }, { "epoch": 0.5269643142902865, "grad_norm": 0.3311437666416168, "learning_rate": 5e-05, "loss": 1.5163, "step": 3293 }, { "epoch": 0.5271243398943831, "grad_norm": 0.3239712417125702, "learning_rate": 5e-05, "loss": 1.5286, "step": 3294 }, { "epoch": 0.5272843654984798, "grad_norm": 0.33545711636543274, "learning_rate": 5e-05, "loss": 1.5173, "step": 3295 }, { "epoch": 0.5274443911025765, "grad_norm": 0.33484190702438354, "learning_rate": 5e-05, "loss": 1.5158, "step": 3296 }, { "epoch": 0.527604416706673, "grad_norm": 0.3316732347011566, "learning_rate": 5e-05, "loss": 1.5686, "step": 3297 }, { "epoch": 0.5277644423107697, "grad_norm": 0.3118447959423065, "learning_rate": 5e-05, "loss": 1.4561, "step": 3298 }, { "epoch": 0.5279244679148664, "grad_norm": 0.336212694644928, "learning_rate": 5e-05, "loss": 1.5512, "step": 3299 }, { "epoch": 0.528084493518963, "grad_norm": 0.3343636393547058, "learning_rate": 5e-05, "loss": 1.5279, "step": 3300 }, { "epoch": 0.5282445191230597, "grad_norm": 0.3335268795490265, "learning_rate": 5e-05, "loss": 1.4811, "step": 3301 }, { "epoch": 0.5284045447271564, "grad_norm": 0.351222962141037, "learning_rate": 5e-05, "loss": 1.4833, "step": 3302 }, { "epoch": 0.528564570331253, "grad_norm": 0.32213839888572693, "learning_rate": 5e-05, "loss": 1.4453, "step": 3303 }, { "epoch": 0.5287245959353497, "grad_norm": 0.34285539388656616, "learning_rate": 5e-05, "loss": 1.5126, "step": 3304 }, { "epoch": 0.5288846215394463, "grad_norm": 0.3445136547088623, "learning_rate": 5e-05, "loss": 1.4496, "step": 3305 }, { "epoch": 0.5290446471435429, "grad_norm": 0.32961755990982056, "learning_rate": 5e-05, "loss": 1.5254, "step": 3306 }, { "epoch": 0.5292046727476396, "grad_norm": 0.326404333114624, "learning_rate": 5e-05, "loss": 1.4896, "step": 3307 }, { "epoch": 0.5293646983517363, "grad_norm": 0.32442039251327515, "learning_rate": 5e-05, "loss": 1.3775, "step": 3308 }, { "epoch": 0.5295247239558329, "grad_norm": 0.32487809658050537, "learning_rate": 5e-05, "loss": 1.4385, "step": 3309 }, { "epoch": 0.5296847495599296, "grad_norm": 0.3368893563747406, "learning_rate": 5e-05, "loss": 1.5081, "step": 3310 }, { "epoch": 0.5298447751640263, "grad_norm": 0.34587955474853516, "learning_rate": 5e-05, "loss": 1.4841, "step": 3311 }, { "epoch": 0.530004800768123, "grad_norm": 0.3356030285358429, "learning_rate": 5e-05, "loss": 1.5065, "step": 3312 }, { "epoch": 0.5301648263722195, "grad_norm": 0.3292701542377472, "learning_rate": 5e-05, "loss": 1.4571, "step": 3313 }, { "epoch": 0.5303248519763162, "grad_norm": 0.33711129426956177, "learning_rate": 5e-05, "loss": 1.4296, "step": 3314 }, { "epoch": 0.5304848775804129, "grad_norm": 0.3639545440673828, "learning_rate": 5e-05, "loss": 1.5131, "step": 3315 }, { "epoch": 0.5306449031845095, "grad_norm": 0.33431580662727356, "learning_rate": 5e-05, "loss": 1.5618, "step": 3316 }, { "epoch": 0.5308049287886062, "grad_norm": 0.35903698205947876, "learning_rate": 5e-05, "loss": 1.5271, "step": 3317 }, { "epoch": 0.5309649543927029, "grad_norm": 0.34122830629348755, "learning_rate": 5e-05, "loss": 1.5055, "step": 3318 }, { "epoch": 0.5311249799967995, "grad_norm": 0.3156604766845703, "learning_rate": 5e-05, "loss": 1.4041, "step": 3319 }, { "epoch": 0.5312850056008962, "grad_norm": 0.34415051341056824, "learning_rate": 5e-05, "loss": 1.4792, "step": 3320 }, { "epoch": 0.5314450312049928, "grad_norm": 0.34134697914123535, "learning_rate": 5e-05, "loss": 1.5124, "step": 3321 }, { "epoch": 0.5316050568090894, "grad_norm": 0.3204951286315918, "learning_rate": 5e-05, "loss": 1.5108, "step": 3322 }, { "epoch": 0.5317650824131861, "grad_norm": 0.3247896134853363, "learning_rate": 5e-05, "loss": 1.4278, "step": 3323 }, { "epoch": 0.5319251080172828, "grad_norm": 0.3327493369579315, "learning_rate": 5e-05, "loss": 1.4902, "step": 3324 }, { "epoch": 0.5320851336213794, "grad_norm": 0.3231356739997864, "learning_rate": 5e-05, "loss": 1.4961, "step": 3325 }, { "epoch": 0.5322451592254761, "grad_norm": 0.3338193893432617, "learning_rate": 5e-05, "loss": 1.518, "step": 3326 }, { "epoch": 0.5324051848295728, "grad_norm": 0.3297589421272278, "learning_rate": 5e-05, "loss": 1.4325, "step": 3327 }, { "epoch": 0.5325652104336693, "grad_norm": 0.32589268684387207, "learning_rate": 5e-05, "loss": 1.4846, "step": 3328 }, { "epoch": 0.532725236037766, "grad_norm": 0.32409751415252686, "learning_rate": 5e-05, "loss": 1.5625, "step": 3329 }, { "epoch": 0.5328852616418627, "grad_norm": 0.3220789134502411, "learning_rate": 5e-05, "loss": 1.5338, "step": 3330 }, { "epoch": 0.5330452872459593, "grad_norm": 0.32014551758766174, "learning_rate": 5e-05, "loss": 1.5293, "step": 3331 }, { "epoch": 0.533205312850056, "grad_norm": 0.3370777666568756, "learning_rate": 5e-05, "loss": 1.5806, "step": 3332 }, { "epoch": 0.5333653384541527, "grad_norm": 0.3204762637615204, "learning_rate": 5e-05, "loss": 1.5039, "step": 3333 }, { "epoch": 0.5335253640582494, "grad_norm": 0.3382134735584259, "learning_rate": 5e-05, "loss": 1.4953, "step": 3334 }, { "epoch": 0.533685389662346, "grad_norm": 0.3273461163043976, "learning_rate": 5e-05, "loss": 1.4989, "step": 3335 }, { "epoch": 0.5338454152664426, "grad_norm": 0.3197951018810272, "learning_rate": 5e-05, "loss": 1.4787, "step": 3336 }, { "epoch": 0.5340054408705392, "grad_norm": 0.3352622985839844, "learning_rate": 5e-05, "loss": 1.4842, "step": 3337 }, { "epoch": 0.5341654664746359, "grad_norm": 0.33826544880867004, "learning_rate": 5e-05, "loss": 1.4486, "step": 3338 }, { "epoch": 0.5343254920787326, "grad_norm": 0.3305824398994446, "learning_rate": 5e-05, "loss": 1.5154, "step": 3339 }, { "epoch": 0.5344855176828293, "grad_norm": 0.316943883895874, "learning_rate": 5e-05, "loss": 1.4871, "step": 3340 }, { "epoch": 0.5346455432869259, "grad_norm": 0.32590967416763306, "learning_rate": 5e-05, "loss": 1.4948, "step": 3341 }, { "epoch": 0.5348055688910226, "grad_norm": 0.33354705572128296, "learning_rate": 5e-05, "loss": 1.5576, "step": 3342 }, { "epoch": 0.5349655944951193, "grad_norm": 0.32498785853385925, "learning_rate": 5e-05, "loss": 1.4739, "step": 3343 }, { "epoch": 0.5351256200992158, "grad_norm": 0.3273136019706726, "learning_rate": 5e-05, "loss": 1.5155, "step": 3344 }, { "epoch": 0.5352856457033125, "grad_norm": 0.3339628577232361, "learning_rate": 5e-05, "loss": 1.4668, "step": 3345 }, { "epoch": 0.5354456713074092, "grad_norm": 0.323223352432251, "learning_rate": 5e-05, "loss": 1.4513, "step": 3346 }, { "epoch": 0.5356056969115058, "grad_norm": 0.3425917327404022, "learning_rate": 5e-05, "loss": 1.5308, "step": 3347 }, { "epoch": 0.5357657225156025, "grad_norm": 0.3519746661186218, "learning_rate": 5e-05, "loss": 1.5598, "step": 3348 }, { "epoch": 0.5359257481196992, "grad_norm": 0.3385586142539978, "learning_rate": 5e-05, "loss": 1.5405, "step": 3349 }, { "epoch": 0.5360857737237958, "grad_norm": 0.32584482431411743, "learning_rate": 5e-05, "loss": 1.5435, "step": 3350 }, { "epoch": 0.5362457993278925, "grad_norm": 0.31411346793174744, "learning_rate": 5e-05, "loss": 1.4698, "step": 3351 }, { "epoch": 0.5364058249319891, "grad_norm": 0.33031153678894043, "learning_rate": 5e-05, "loss": 1.5801, "step": 3352 }, { "epoch": 0.5365658505360857, "grad_norm": 0.3227485716342926, "learning_rate": 5e-05, "loss": 1.4449, "step": 3353 }, { "epoch": 0.5367258761401824, "grad_norm": 0.3113518953323364, "learning_rate": 5e-05, "loss": 1.4312, "step": 3354 }, { "epoch": 0.5368859017442791, "grad_norm": 0.32910382747650146, "learning_rate": 5e-05, "loss": 1.4571, "step": 3355 }, { "epoch": 0.5370459273483758, "grad_norm": 0.3280286490917206, "learning_rate": 5e-05, "loss": 1.5445, "step": 3356 }, { "epoch": 0.5372059529524724, "grad_norm": 0.3444271981716156, "learning_rate": 5e-05, "loss": 1.5016, "step": 3357 }, { "epoch": 0.5373659785565691, "grad_norm": 0.3269645869731903, "learning_rate": 5e-05, "loss": 1.4947, "step": 3358 }, { "epoch": 0.5375260041606658, "grad_norm": 0.32938310503959656, "learning_rate": 5e-05, "loss": 1.5328, "step": 3359 }, { "epoch": 0.5376860297647623, "grad_norm": 0.33136722445487976, "learning_rate": 5e-05, "loss": 1.6352, "step": 3360 }, { "epoch": 0.537846055368859, "grad_norm": 0.32099050283432007, "learning_rate": 5e-05, "loss": 1.4888, "step": 3361 }, { "epoch": 0.5380060809729557, "grad_norm": 0.33099114894866943, "learning_rate": 5e-05, "loss": 1.478, "step": 3362 }, { "epoch": 0.5381661065770523, "grad_norm": 0.3231077492237091, "learning_rate": 5e-05, "loss": 1.476, "step": 3363 }, { "epoch": 0.538326132181149, "grad_norm": 0.3276190161705017, "learning_rate": 5e-05, "loss": 1.5524, "step": 3364 }, { "epoch": 0.5384861577852457, "grad_norm": 0.3408270478248596, "learning_rate": 5e-05, "loss": 1.4628, "step": 3365 }, { "epoch": 0.5386461833893423, "grad_norm": 0.3353078067302704, "learning_rate": 5e-05, "loss": 1.485, "step": 3366 }, { "epoch": 0.5388062089934389, "grad_norm": 0.31531238555908203, "learning_rate": 5e-05, "loss": 1.4865, "step": 3367 }, { "epoch": 0.5389662345975356, "grad_norm": 0.3192320764064789, "learning_rate": 5e-05, "loss": 1.4447, "step": 3368 }, { "epoch": 0.5391262602016322, "grad_norm": 0.3515748977661133, "learning_rate": 5e-05, "loss": 1.5967, "step": 3369 }, { "epoch": 0.5392862858057289, "grad_norm": 0.3204725682735443, "learning_rate": 5e-05, "loss": 1.4737, "step": 3370 }, { "epoch": 0.5394463114098256, "grad_norm": 0.329241544008255, "learning_rate": 5e-05, "loss": 1.4408, "step": 3371 }, { "epoch": 0.5396063370139222, "grad_norm": 0.32793059945106506, "learning_rate": 5e-05, "loss": 1.5323, "step": 3372 }, { "epoch": 0.5397663626180189, "grad_norm": 0.34425097703933716, "learning_rate": 5e-05, "loss": 1.5695, "step": 3373 }, { "epoch": 0.5399263882221156, "grad_norm": 0.32867974042892456, "learning_rate": 5e-05, "loss": 1.4448, "step": 3374 }, { "epoch": 0.5400864138262121, "grad_norm": 0.32660460472106934, "learning_rate": 5e-05, "loss": 1.5273, "step": 3375 }, { "epoch": 0.5402464394303088, "grad_norm": 0.34861209988594055, "learning_rate": 5e-05, "loss": 1.5013, "step": 3376 }, { "epoch": 0.5404064650344055, "grad_norm": 0.3197218179702759, "learning_rate": 5e-05, "loss": 1.4901, "step": 3377 }, { "epoch": 0.5405664906385022, "grad_norm": 0.3328177332878113, "learning_rate": 5e-05, "loss": 1.5758, "step": 3378 }, { "epoch": 0.5407265162425988, "grad_norm": 0.31311312317848206, "learning_rate": 5e-05, "loss": 1.4331, "step": 3379 }, { "epoch": 0.5408865418466955, "grad_norm": 0.34344133734703064, "learning_rate": 5e-05, "loss": 1.4899, "step": 3380 }, { "epoch": 0.5410465674507922, "grad_norm": 0.32471901178359985, "learning_rate": 5e-05, "loss": 1.4906, "step": 3381 }, { "epoch": 0.5412065930548888, "grad_norm": 0.32571160793304443, "learning_rate": 5e-05, "loss": 1.4789, "step": 3382 }, { "epoch": 0.5413666186589854, "grad_norm": 0.3190106749534607, "learning_rate": 5e-05, "loss": 1.4433, "step": 3383 }, { "epoch": 0.5415266442630821, "grad_norm": 0.3267366588115692, "learning_rate": 5e-05, "loss": 1.5405, "step": 3384 }, { "epoch": 0.5416866698671787, "grad_norm": 0.33246222138404846, "learning_rate": 5e-05, "loss": 1.5216, "step": 3385 }, { "epoch": 0.5418466954712754, "grad_norm": 0.3370186686515808, "learning_rate": 5e-05, "loss": 1.5288, "step": 3386 }, { "epoch": 0.5420067210753721, "grad_norm": 0.32151034474372864, "learning_rate": 5e-05, "loss": 1.4739, "step": 3387 }, { "epoch": 0.5421667466794687, "grad_norm": 0.31990042328834534, "learning_rate": 5e-05, "loss": 1.4726, "step": 3388 }, { "epoch": 0.5423267722835654, "grad_norm": 0.3263079822063446, "learning_rate": 5e-05, "loss": 1.4779, "step": 3389 }, { "epoch": 0.5424867978876621, "grad_norm": 0.32746243476867676, "learning_rate": 5e-05, "loss": 1.4675, "step": 3390 }, { "epoch": 0.5426468234917586, "grad_norm": 0.3277892470359802, "learning_rate": 5e-05, "loss": 1.5471, "step": 3391 }, { "epoch": 0.5428068490958553, "grad_norm": 0.32927921414375305, "learning_rate": 5e-05, "loss": 1.4867, "step": 3392 }, { "epoch": 0.542966874699952, "grad_norm": 0.32425081729888916, "learning_rate": 5e-05, "loss": 1.4686, "step": 3393 }, { "epoch": 0.5431269003040486, "grad_norm": 0.3316580653190613, "learning_rate": 5e-05, "loss": 1.5431, "step": 3394 }, { "epoch": 0.5432869259081453, "grad_norm": 0.3219164311885834, "learning_rate": 5e-05, "loss": 1.4931, "step": 3395 }, { "epoch": 0.543446951512242, "grad_norm": 0.3098680377006531, "learning_rate": 5e-05, "loss": 1.3831, "step": 3396 }, { "epoch": 0.5436069771163387, "grad_norm": 0.3332037329673767, "learning_rate": 5e-05, "loss": 1.5314, "step": 3397 }, { "epoch": 0.5437670027204353, "grad_norm": 0.3254457116127014, "learning_rate": 5e-05, "loss": 1.5384, "step": 3398 }, { "epoch": 0.5439270283245319, "grad_norm": 0.3183578848838806, "learning_rate": 5e-05, "loss": 1.4606, "step": 3399 }, { "epoch": 0.5440870539286286, "grad_norm": 0.31734126806259155, "learning_rate": 5e-05, "loss": 1.4656, "step": 3400 }, { "epoch": 0.5442470795327252, "grad_norm": 0.3164272904396057, "learning_rate": 5e-05, "loss": 1.4298, "step": 3401 }, { "epoch": 0.5444071051368219, "grad_norm": 0.3209702968597412, "learning_rate": 5e-05, "loss": 1.3902, "step": 3402 }, { "epoch": 0.5445671307409186, "grad_norm": 0.3342978060245514, "learning_rate": 5e-05, "loss": 1.5424, "step": 3403 }, { "epoch": 0.5447271563450152, "grad_norm": 0.3324083089828491, "learning_rate": 5e-05, "loss": 1.5567, "step": 3404 }, { "epoch": 0.5448871819491119, "grad_norm": 0.3275960683822632, "learning_rate": 5e-05, "loss": 1.5283, "step": 3405 }, { "epoch": 0.5450472075532086, "grad_norm": 0.3174147307872772, "learning_rate": 5e-05, "loss": 1.481, "step": 3406 }, { "epoch": 0.5452072331573051, "grad_norm": 0.32688361406326294, "learning_rate": 5e-05, "loss": 1.4922, "step": 3407 }, { "epoch": 0.5453672587614018, "grad_norm": 0.3432449698448181, "learning_rate": 5e-05, "loss": 1.5986, "step": 3408 }, { "epoch": 0.5455272843654985, "grad_norm": 0.32665354013442993, "learning_rate": 5e-05, "loss": 1.4126, "step": 3409 }, { "epoch": 0.5456873099695951, "grad_norm": 0.32728666067123413, "learning_rate": 5e-05, "loss": 1.5166, "step": 3410 }, { "epoch": 0.5458473355736918, "grad_norm": 0.3235715925693512, "learning_rate": 5e-05, "loss": 1.4547, "step": 3411 }, { "epoch": 0.5460073611777885, "grad_norm": 0.32518312335014343, "learning_rate": 5e-05, "loss": 1.5021, "step": 3412 }, { "epoch": 0.5461673867818851, "grad_norm": 0.3230125904083252, "learning_rate": 5e-05, "loss": 1.4268, "step": 3413 }, { "epoch": 0.5463274123859817, "grad_norm": 0.32509955763816833, "learning_rate": 5e-05, "loss": 1.5186, "step": 3414 }, { "epoch": 0.5464874379900784, "grad_norm": 0.3062407076358795, "learning_rate": 5e-05, "loss": 1.4106, "step": 3415 }, { "epoch": 0.546647463594175, "grad_norm": 0.3305853307247162, "learning_rate": 5e-05, "loss": 1.5568, "step": 3416 }, { "epoch": 0.5468074891982717, "grad_norm": 0.3192126154899597, "learning_rate": 5e-05, "loss": 1.4942, "step": 3417 }, { "epoch": 0.5469675148023684, "grad_norm": 0.3354358971118927, "learning_rate": 5e-05, "loss": 1.4861, "step": 3418 }, { "epoch": 0.547127540406465, "grad_norm": 0.3268658220767975, "learning_rate": 5e-05, "loss": 1.5215, "step": 3419 }, { "epoch": 0.5472875660105617, "grad_norm": 0.34276652336120605, "learning_rate": 5e-05, "loss": 1.5236, "step": 3420 }, { "epoch": 0.5474475916146584, "grad_norm": 0.33142605423927307, "learning_rate": 5e-05, "loss": 1.5701, "step": 3421 }, { "epoch": 0.547607617218755, "grad_norm": 0.3186163306236267, "learning_rate": 5e-05, "loss": 1.4325, "step": 3422 }, { "epoch": 0.5477676428228516, "grad_norm": 0.3185320496559143, "learning_rate": 5e-05, "loss": 1.4547, "step": 3423 }, { "epoch": 0.5479276684269483, "grad_norm": 0.3373399078845978, "learning_rate": 5e-05, "loss": 1.4576, "step": 3424 }, { "epoch": 0.548087694031045, "grad_norm": 0.3256716728210449, "learning_rate": 5e-05, "loss": 1.4606, "step": 3425 }, { "epoch": 0.5482477196351416, "grad_norm": 0.32451319694519043, "learning_rate": 5e-05, "loss": 1.4898, "step": 3426 }, { "epoch": 0.5484077452392383, "grad_norm": 0.32744482159614563, "learning_rate": 5e-05, "loss": 1.5169, "step": 3427 }, { "epoch": 0.548567770843335, "grad_norm": 0.3461499810218811, "learning_rate": 5e-05, "loss": 1.5192, "step": 3428 }, { "epoch": 0.5487277964474316, "grad_norm": 0.33137577772140503, "learning_rate": 5e-05, "loss": 1.5229, "step": 3429 }, { "epoch": 0.5488878220515282, "grad_norm": 0.34072843194007874, "learning_rate": 5e-05, "loss": 1.5999, "step": 3430 }, { "epoch": 0.5490478476556249, "grad_norm": 0.33582207560539246, "learning_rate": 5e-05, "loss": 1.4696, "step": 3431 }, { "epoch": 0.5492078732597215, "grad_norm": 0.32067790627479553, "learning_rate": 5e-05, "loss": 1.4507, "step": 3432 }, { "epoch": 0.5493678988638182, "grad_norm": 0.33254745602607727, "learning_rate": 5e-05, "loss": 1.5272, "step": 3433 }, { "epoch": 0.5495279244679149, "grad_norm": 0.34343957901000977, "learning_rate": 5e-05, "loss": 1.4471, "step": 3434 }, { "epoch": 0.5496879500720115, "grad_norm": 0.3383028507232666, "learning_rate": 5e-05, "loss": 1.5334, "step": 3435 }, { "epoch": 0.5498479756761082, "grad_norm": 0.33467668294906616, "learning_rate": 5e-05, "loss": 1.5507, "step": 3436 }, { "epoch": 0.5500080012802049, "grad_norm": 0.31577569246292114, "learning_rate": 5e-05, "loss": 1.4711, "step": 3437 }, { "epoch": 0.5501680268843014, "grad_norm": 0.3365507125854492, "learning_rate": 5e-05, "loss": 1.4759, "step": 3438 }, { "epoch": 0.5503280524883981, "grad_norm": 0.32970184087753296, "learning_rate": 5e-05, "loss": 1.4396, "step": 3439 }, { "epoch": 0.5504880780924948, "grad_norm": 0.3193921744823456, "learning_rate": 5e-05, "loss": 1.4633, "step": 3440 }, { "epoch": 0.5506481036965915, "grad_norm": 0.3360198140144348, "learning_rate": 5e-05, "loss": 1.5773, "step": 3441 }, { "epoch": 0.5508081293006881, "grad_norm": 0.3214510381221771, "learning_rate": 5e-05, "loss": 1.4761, "step": 3442 }, { "epoch": 0.5509681549047848, "grad_norm": 0.31739187240600586, "learning_rate": 5e-05, "loss": 1.3786, "step": 3443 }, { "epoch": 0.5511281805088815, "grad_norm": 0.325328528881073, "learning_rate": 5e-05, "loss": 1.4568, "step": 3444 }, { "epoch": 0.5512882061129781, "grad_norm": 0.3246726095676422, "learning_rate": 5e-05, "loss": 1.446, "step": 3445 }, { "epoch": 0.5514482317170747, "grad_norm": 0.33218562602996826, "learning_rate": 5e-05, "loss": 1.4818, "step": 3446 }, { "epoch": 0.5516082573211714, "grad_norm": 0.3303738832473755, "learning_rate": 5e-05, "loss": 1.4623, "step": 3447 }, { "epoch": 0.551768282925268, "grad_norm": 0.33103692531585693, "learning_rate": 5e-05, "loss": 1.4633, "step": 3448 }, { "epoch": 0.5519283085293647, "grad_norm": 0.32679349184036255, "learning_rate": 5e-05, "loss": 1.4446, "step": 3449 }, { "epoch": 0.5520883341334614, "grad_norm": 0.3426879346370697, "learning_rate": 5e-05, "loss": 1.5271, "step": 3450 }, { "epoch": 0.552248359737558, "grad_norm": 0.3212858736515045, "learning_rate": 5e-05, "loss": 1.5126, "step": 3451 }, { "epoch": 0.5524083853416547, "grad_norm": 0.3254852890968323, "learning_rate": 5e-05, "loss": 1.5019, "step": 3452 }, { "epoch": 0.5525684109457513, "grad_norm": 0.33380648493766785, "learning_rate": 5e-05, "loss": 1.585, "step": 3453 }, { "epoch": 0.5527284365498479, "grad_norm": 0.33286136388778687, "learning_rate": 5e-05, "loss": 1.4848, "step": 3454 }, { "epoch": 0.5528884621539446, "grad_norm": 0.33155182003974915, "learning_rate": 5e-05, "loss": 1.5429, "step": 3455 }, { "epoch": 0.5530484877580413, "grad_norm": 0.3235933482646942, "learning_rate": 5e-05, "loss": 1.4619, "step": 3456 }, { "epoch": 0.5532085133621379, "grad_norm": 0.32536831498146057, "learning_rate": 5e-05, "loss": 1.5336, "step": 3457 }, { "epoch": 0.5533685389662346, "grad_norm": 0.34020695090293884, "learning_rate": 5e-05, "loss": 1.5164, "step": 3458 }, { "epoch": 0.5535285645703313, "grad_norm": 0.32615599036216736, "learning_rate": 5e-05, "loss": 1.5785, "step": 3459 }, { "epoch": 0.553688590174428, "grad_norm": 0.32528701424598694, "learning_rate": 5e-05, "loss": 1.4735, "step": 3460 }, { "epoch": 0.5538486157785245, "grad_norm": 0.3154878318309784, "learning_rate": 5e-05, "loss": 1.4419, "step": 3461 }, { "epoch": 0.5540086413826212, "grad_norm": 0.3235040307044983, "learning_rate": 5e-05, "loss": 1.4323, "step": 3462 }, { "epoch": 0.5541686669867179, "grad_norm": 0.345893532037735, "learning_rate": 5e-05, "loss": 1.5342, "step": 3463 }, { "epoch": 0.5543286925908145, "grad_norm": 0.3328620195388794, "learning_rate": 5e-05, "loss": 1.5455, "step": 3464 }, { "epoch": 0.5544887181949112, "grad_norm": 0.34063082933425903, "learning_rate": 5e-05, "loss": 1.563, "step": 3465 }, { "epoch": 0.5546487437990079, "grad_norm": 0.33568915724754333, "learning_rate": 5e-05, "loss": 1.494, "step": 3466 }, { "epoch": 0.5548087694031045, "grad_norm": 0.3294949233531952, "learning_rate": 5e-05, "loss": 1.4961, "step": 3467 }, { "epoch": 0.5549687950072012, "grad_norm": 0.32991930842399597, "learning_rate": 5e-05, "loss": 1.5426, "step": 3468 }, { "epoch": 0.5551288206112978, "grad_norm": 0.31794893741607666, "learning_rate": 5e-05, "loss": 1.5035, "step": 3469 }, { "epoch": 0.5552888462153944, "grad_norm": 0.3353326916694641, "learning_rate": 5e-05, "loss": 1.5353, "step": 3470 }, { "epoch": 0.5554488718194911, "grad_norm": 0.31940484046936035, "learning_rate": 5e-05, "loss": 1.4756, "step": 3471 }, { "epoch": 0.5556088974235878, "grad_norm": 0.3343173861503601, "learning_rate": 5e-05, "loss": 1.519, "step": 3472 }, { "epoch": 0.5557689230276844, "grad_norm": 0.31794309616088867, "learning_rate": 5e-05, "loss": 1.467, "step": 3473 }, { "epoch": 0.5559289486317811, "grad_norm": 0.3445911109447479, "learning_rate": 5e-05, "loss": 1.5307, "step": 3474 }, { "epoch": 0.5560889742358778, "grad_norm": 0.3261464238166809, "learning_rate": 5e-05, "loss": 1.465, "step": 3475 }, { "epoch": 0.5562489998399744, "grad_norm": 0.32046079635620117, "learning_rate": 5e-05, "loss": 1.4789, "step": 3476 }, { "epoch": 0.556409025444071, "grad_norm": 0.3239386975765228, "learning_rate": 5e-05, "loss": 1.4584, "step": 3477 }, { "epoch": 0.5565690510481677, "grad_norm": 0.3296128809452057, "learning_rate": 5e-05, "loss": 1.5141, "step": 3478 }, { "epoch": 0.5567290766522643, "grad_norm": 0.3178916275501251, "learning_rate": 5e-05, "loss": 1.4706, "step": 3479 }, { "epoch": 0.556889102256361, "grad_norm": 0.3329966068267822, "learning_rate": 5e-05, "loss": 1.5903, "step": 3480 }, { "epoch": 0.5570491278604577, "grad_norm": 0.3304685354232788, "learning_rate": 5e-05, "loss": 1.4993, "step": 3481 }, { "epoch": 0.5572091534645544, "grad_norm": 0.33270177245140076, "learning_rate": 5e-05, "loss": 1.4268, "step": 3482 }, { "epoch": 0.557369179068651, "grad_norm": 0.3146515190601349, "learning_rate": 5e-05, "loss": 1.4958, "step": 3483 }, { "epoch": 0.5575292046727477, "grad_norm": 0.3184296488761902, "learning_rate": 5e-05, "loss": 1.5029, "step": 3484 }, { "epoch": 0.5576892302768442, "grad_norm": 0.31336405873298645, "learning_rate": 5e-05, "loss": 1.4244, "step": 3485 }, { "epoch": 0.5578492558809409, "grad_norm": 0.3344501554965973, "learning_rate": 5e-05, "loss": 1.5884, "step": 3486 }, { "epoch": 0.5580092814850376, "grad_norm": 0.3396018147468567, "learning_rate": 5e-05, "loss": 1.539, "step": 3487 }, { "epoch": 0.5581693070891343, "grad_norm": 0.3281652629375458, "learning_rate": 5e-05, "loss": 1.4964, "step": 3488 }, { "epoch": 0.5583293326932309, "grad_norm": 0.33562609553337097, "learning_rate": 5e-05, "loss": 1.4988, "step": 3489 }, { "epoch": 0.5584893582973276, "grad_norm": 0.3352743983268738, "learning_rate": 5e-05, "loss": 1.5731, "step": 3490 }, { "epoch": 0.5586493839014243, "grad_norm": 0.34269192814826965, "learning_rate": 5e-05, "loss": 1.4985, "step": 3491 }, { "epoch": 0.5588094095055209, "grad_norm": 0.3426222503185272, "learning_rate": 5e-05, "loss": 1.5536, "step": 3492 }, { "epoch": 0.5589694351096175, "grad_norm": 0.33799025416374207, "learning_rate": 5e-05, "loss": 1.5771, "step": 3493 }, { "epoch": 0.5591294607137142, "grad_norm": 0.33869028091430664, "learning_rate": 5e-05, "loss": 1.5402, "step": 3494 }, { "epoch": 0.5592894863178108, "grad_norm": 0.33036738634109497, "learning_rate": 5e-05, "loss": 1.5183, "step": 3495 }, { "epoch": 0.5594495119219075, "grad_norm": 0.3237239420413971, "learning_rate": 5e-05, "loss": 1.4565, "step": 3496 }, { "epoch": 0.5596095375260042, "grad_norm": 0.32643401622772217, "learning_rate": 5e-05, "loss": 1.4971, "step": 3497 }, { "epoch": 0.5597695631301008, "grad_norm": 0.3247113525867462, "learning_rate": 5e-05, "loss": 1.4906, "step": 3498 }, { "epoch": 0.5599295887341975, "grad_norm": 0.3264262080192566, "learning_rate": 5e-05, "loss": 1.5791, "step": 3499 }, { "epoch": 0.5600896143382941, "grad_norm": 0.3300858736038208, "learning_rate": 5e-05, "loss": 1.4703, "step": 3500 }, { "epoch": 0.5602496399423907, "grad_norm": 0.32290229201316833, "learning_rate": 5e-05, "loss": 1.532, "step": 3501 }, { "epoch": 0.5604096655464874, "grad_norm": 0.33373406529426575, "learning_rate": 5e-05, "loss": 1.5902, "step": 3502 }, { "epoch": 0.5605696911505841, "grad_norm": 0.3405042290687561, "learning_rate": 5e-05, "loss": 1.527, "step": 3503 }, { "epoch": 0.5607297167546808, "grad_norm": 0.32992780208587646, "learning_rate": 5e-05, "loss": 1.5303, "step": 3504 }, { "epoch": 0.5608897423587774, "grad_norm": 0.32927173376083374, "learning_rate": 5e-05, "loss": 1.4981, "step": 3505 }, { "epoch": 0.5610497679628741, "grad_norm": 0.3554776906967163, "learning_rate": 5e-05, "loss": 1.5244, "step": 3506 }, { "epoch": 0.5612097935669708, "grad_norm": 0.3108518123626709, "learning_rate": 5e-05, "loss": 1.4758, "step": 3507 }, { "epoch": 0.5613698191710673, "grad_norm": 0.330291748046875, "learning_rate": 5e-05, "loss": 1.4367, "step": 3508 }, { "epoch": 0.561529844775164, "grad_norm": 0.33783674240112305, "learning_rate": 5e-05, "loss": 1.4286, "step": 3509 }, { "epoch": 0.5616898703792607, "grad_norm": 0.3321339786052704, "learning_rate": 5e-05, "loss": 1.54, "step": 3510 }, { "epoch": 0.5618498959833573, "grad_norm": 0.3334866166114807, "learning_rate": 5e-05, "loss": 1.4879, "step": 3511 }, { "epoch": 0.562009921587454, "grad_norm": 0.33982425928115845, "learning_rate": 5e-05, "loss": 1.5328, "step": 3512 }, { "epoch": 0.5621699471915507, "grad_norm": 0.3275550603866577, "learning_rate": 5e-05, "loss": 1.4839, "step": 3513 }, { "epoch": 0.5623299727956473, "grad_norm": 0.3388877213001251, "learning_rate": 5e-05, "loss": 1.4589, "step": 3514 }, { "epoch": 0.562489998399744, "grad_norm": 0.3394150733947754, "learning_rate": 5e-05, "loss": 1.5203, "step": 3515 }, { "epoch": 0.5626500240038406, "grad_norm": 0.3273770809173584, "learning_rate": 5e-05, "loss": 1.4842, "step": 3516 }, { "epoch": 0.5628100496079372, "grad_norm": 0.324716180562973, "learning_rate": 5e-05, "loss": 1.4327, "step": 3517 }, { "epoch": 0.5629700752120339, "grad_norm": 0.3270013928413391, "learning_rate": 5e-05, "loss": 1.4722, "step": 3518 }, { "epoch": 0.5631301008161306, "grad_norm": 0.3154605031013489, "learning_rate": 5e-05, "loss": 1.4004, "step": 3519 }, { "epoch": 0.5632901264202272, "grad_norm": 0.328081876039505, "learning_rate": 5e-05, "loss": 1.5617, "step": 3520 }, { "epoch": 0.5634501520243239, "grad_norm": 0.3316260874271393, "learning_rate": 5e-05, "loss": 1.5213, "step": 3521 }, { "epoch": 0.5636101776284206, "grad_norm": 0.33798909187316895, "learning_rate": 5e-05, "loss": 1.4122, "step": 3522 }, { "epoch": 0.5637702032325173, "grad_norm": 0.32749271392822266, "learning_rate": 5e-05, "loss": 1.5229, "step": 3523 }, { "epoch": 0.5639302288366138, "grad_norm": 0.33229389786720276, "learning_rate": 5e-05, "loss": 1.4612, "step": 3524 }, { "epoch": 0.5640902544407105, "grad_norm": 0.31780335307121277, "learning_rate": 5e-05, "loss": 1.4665, "step": 3525 }, { "epoch": 0.5642502800448072, "grad_norm": 0.3394085466861725, "learning_rate": 5e-05, "loss": 1.4637, "step": 3526 }, { "epoch": 0.5644103056489038, "grad_norm": 0.322571337223053, "learning_rate": 5e-05, "loss": 1.4708, "step": 3527 }, { "epoch": 0.5645703312530005, "grad_norm": 0.3237497806549072, "learning_rate": 5e-05, "loss": 1.4675, "step": 3528 }, { "epoch": 0.5647303568570972, "grad_norm": 0.3402816653251648, "learning_rate": 5e-05, "loss": 1.5234, "step": 3529 }, { "epoch": 0.5648903824611938, "grad_norm": 0.32412996888160706, "learning_rate": 5e-05, "loss": 1.5208, "step": 3530 }, { "epoch": 0.5650504080652905, "grad_norm": 0.3228725790977478, "learning_rate": 5e-05, "loss": 1.442, "step": 3531 }, { "epoch": 0.5652104336693871, "grad_norm": 0.3175171911716461, "learning_rate": 5e-05, "loss": 1.3853, "step": 3532 }, { "epoch": 0.5653704592734837, "grad_norm": 0.34156540036201477, "learning_rate": 5e-05, "loss": 1.4743, "step": 3533 }, { "epoch": 0.5655304848775804, "grad_norm": 0.3380386531352997, "learning_rate": 5e-05, "loss": 1.4858, "step": 3534 }, { "epoch": 0.5656905104816771, "grad_norm": 0.34613877534866333, "learning_rate": 5e-05, "loss": 1.554, "step": 3535 }, { "epoch": 0.5658505360857737, "grad_norm": 0.34907203912734985, "learning_rate": 5e-05, "loss": 1.4682, "step": 3536 }, { "epoch": 0.5660105616898704, "grad_norm": 0.3255923092365265, "learning_rate": 5e-05, "loss": 1.4629, "step": 3537 }, { "epoch": 0.5661705872939671, "grad_norm": 0.32764050364494324, "learning_rate": 5e-05, "loss": 1.4215, "step": 3538 }, { "epoch": 0.5663306128980637, "grad_norm": 0.32377883791923523, "learning_rate": 5e-05, "loss": 1.4427, "step": 3539 }, { "epoch": 0.5664906385021603, "grad_norm": 0.3183025121688843, "learning_rate": 5e-05, "loss": 1.472, "step": 3540 }, { "epoch": 0.566650664106257, "grad_norm": 0.3486681878566742, "learning_rate": 5e-05, "loss": 1.572, "step": 3541 }, { "epoch": 0.5668106897103536, "grad_norm": 0.3356749713420868, "learning_rate": 5e-05, "loss": 1.5118, "step": 3542 }, { "epoch": 0.5669707153144503, "grad_norm": 0.33334246277809143, "learning_rate": 5e-05, "loss": 1.5578, "step": 3543 }, { "epoch": 0.567130740918547, "grad_norm": 0.33155620098114014, "learning_rate": 5e-05, "loss": 1.439, "step": 3544 }, { "epoch": 0.5672907665226437, "grad_norm": 0.3297193646430969, "learning_rate": 5e-05, "loss": 1.4052, "step": 3545 }, { "epoch": 0.5674507921267403, "grad_norm": 0.3346495032310486, "learning_rate": 5e-05, "loss": 1.5229, "step": 3546 }, { "epoch": 0.5676108177308369, "grad_norm": 0.32081782817840576, "learning_rate": 5e-05, "loss": 1.4693, "step": 3547 }, { "epoch": 0.5677708433349336, "grad_norm": 0.32573118805885315, "learning_rate": 5e-05, "loss": 1.4891, "step": 3548 }, { "epoch": 0.5679308689390302, "grad_norm": 0.3309071660041809, "learning_rate": 5e-05, "loss": 1.5102, "step": 3549 }, { "epoch": 0.5680908945431269, "grad_norm": 0.3206639885902405, "learning_rate": 5e-05, "loss": 1.5042, "step": 3550 }, { "epoch": 0.5682509201472236, "grad_norm": 0.3253939747810364, "learning_rate": 5e-05, "loss": 1.4593, "step": 3551 }, { "epoch": 0.5684109457513202, "grad_norm": 0.32383978366851807, "learning_rate": 5e-05, "loss": 1.4421, "step": 3552 }, { "epoch": 0.5685709713554169, "grad_norm": 0.32853302359580994, "learning_rate": 5e-05, "loss": 1.4613, "step": 3553 }, { "epoch": 0.5687309969595136, "grad_norm": 0.3358592391014099, "learning_rate": 5e-05, "loss": 1.4615, "step": 3554 }, { "epoch": 0.5688910225636101, "grad_norm": 0.33493900299072266, "learning_rate": 5e-05, "loss": 1.4931, "step": 3555 }, { "epoch": 0.5690510481677068, "grad_norm": 0.33940914273262024, "learning_rate": 5e-05, "loss": 1.5276, "step": 3556 }, { "epoch": 0.5692110737718035, "grad_norm": 0.32768118381500244, "learning_rate": 5e-05, "loss": 1.4969, "step": 3557 }, { "epoch": 0.5693710993759001, "grad_norm": 0.33222848176956177, "learning_rate": 5e-05, "loss": 1.4493, "step": 3558 }, { "epoch": 0.5695311249799968, "grad_norm": 0.3299518823623657, "learning_rate": 5e-05, "loss": 1.4784, "step": 3559 }, { "epoch": 0.5696911505840935, "grad_norm": 0.3249110281467438, "learning_rate": 5e-05, "loss": 1.4547, "step": 3560 }, { "epoch": 0.5698511761881901, "grad_norm": 0.3353275954723358, "learning_rate": 5e-05, "loss": 1.4744, "step": 3561 }, { "epoch": 0.5700112017922868, "grad_norm": 0.3244372308254242, "learning_rate": 5e-05, "loss": 1.4364, "step": 3562 }, { "epoch": 0.5701712273963834, "grad_norm": 0.3240026533603668, "learning_rate": 5e-05, "loss": 1.4547, "step": 3563 }, { "epoch": 0.57033125300048, "grad_norm": 0.3401821255683899, "learning_rate": 5e-05, "loss": 1.5033, "step": 3564 }, { "epoch": 0.5704912786045767, "grad_norm": 0.3330542743206024, "learning_rate": 5e-05, "loss": 1.5107, "step": 3565 }, { "epoch": 0.5706513042086734, "grad_norm": 0.3259572684764862, "learning_rate": 5e-05, "loss": 1.5043, "step": 3566 }, { "epoch": 0.57081132981277, "grad_norm": 0.3444741368293762, "learning_rate": 5e-05, "loss": 1.4713, "step": 3567 }, { "epoch": 0.5709713554168667, "grad_norm": 0.3441907465457916, "learning_rate": 5e-05, "loss": 1.5418, "step": 3568 }, { "epoch": 0.5711313810209634, "grad_norm": 0.3194950520992279, "learning_rate": 5e-05, "loss": 1.4046, "step": 3569 }, { "epoch": 0.5712914066250601, "grad_norm": 0.3364381790161133, "learning_rate": 5e-05, "loss": 1.5364, "step": 3570 }, { "epoch": 0.5714514322291566, "grad_norm": 0.32338303327560425, "learning_rate": 5e-05, "loss": 1.4918, "step": 3571 }, { "epoch": 0.5716114578332533, "grad_norm": 0.3302915394306183, "learning_rate": 5e-05, "loss": 1.5095, "step": 3572 }, { "epoch": 0.57177148343735, "grad_norm": 0.3256651759147644, "learning_rate": 5e-05, "loss": 1.531, "step": 3573 }, { "epoch": 0.5719315090414466, "grad_norm": 0.3431777358055115, "learning_rate": 5e-05, "loss": 1.5487, "step": 3574 }, { "epoch": 0.5720915346455433, "grad_norm": 0.3228130340576172, "learning_rate": 5e-05, "loss": 1.4882, "step": 3575 }, { "epoch": 0.57225156024964, "grad_norm": 0.3279891610145569, "learning_rate": 5e-05, "loss": 1.5058, "step": 3576 }, { "epoch": 0.5724115858537366, "grad_norm": 0.3326508104801178, "learning_rate": 5e-05, "loss": 1.4074, "step": 3577 }, { "epoch": 0.5725716114578333, "grad_norm": 0.32937124371528625, "learning_rate": 5e-05, "loss": 1.4573, "step": 3578 }, { "epoch": 0.5727316370619299, "grad_norm": 0.3328566253185272, "learning_rate": 5e-05, "loss": 1.4858, "step": 3579 }, { "epoch": 0.5728916626660265, "grad_norm": 0.3386555314064026, "learning_rate": 5e-05, "loss": 1.4311, "step": 3580 }, { "epoch": 0.5730516882701232, "grad_norm": 0.34585005044937134, "learning_rate": 5e-05, "loss": 1.5936, "step": 3581 }, { "epoch": 0.5732117138742199, "grad_norm": 0.32410314679145813, "learning_rate": 5e-05, "loss": 1.4834, "step": 3582 }, { "epoch": 0.5733717394783165, "grad_norm": 0.324590802192688, "learning_rate": 5e-05, "loss": 1.4231, "step": 3583 }, { "epoch": 0.5735317650824132, "grad_norm": 0.32591477036476135, "learning_rate": 5e-05, "loss": 1.459, "step": 3584 }, { "epoch": 0.5736917906865099, "grad_norm": 0.33236032724380493, "learning_rate": 5e-05, "loss": 1.4782, "step": 3585 }, { "epoch": 0.5738518162906064, "grad_norm": 0.34564632177352905, "learning_rate": 5e-05, "loss": 1.5808, "step": 3586 }, { "epoch": 0.5740118418947031, "grad_norm": 0.3258468210697174, "learning_rate": 5e-05, "loss": 1.4801, "step": 3587 }, { "epoch": 0.5741718674987998, "grad_norm": 0.32314935326576233, "learning_rate": 5e-05, "loss": 1.5134, "step": 3588 }, { "epoch": 0.5743318931028965, "grad_norm": 0.3232041299343109, "learning_rate": 5e-05, "loss": 1.4476, "step": 3589 }, { "epoch": 0.5744919187069931, "grad_norm": 0.32265910506248474, "learning_rate": 5e-05, "loss": 1.4362, "step": 3590 }, { "epoch": 0.5746519443110898, "grad_norm": 0.3229037821292877, "learning_rate": 5e-05, "loss": 1.5014, "step": 3591 }, { "epoch": 0.5748119699151865, "grad_norm": 0.33635666966438293, "learning_rate": 5e-05, "loss": 1.4724, "step": 3592 }, { "epoch": 0.5749719955192831, "grad_norm": 0.32817932963371277, "learning_rate": 5e-05, "loss": 1.5635, "step": 3593 }, { "epoch": 0.5751320211233797, "grad_norm": 0.341739684343338, "learning_rate": 5e-05, "loss": 1.4862, "step": 3594 }, { "epoch": 0.5752920467274764, "grad_norm": 0.3418486714363098, "learning_rate": 5e-05, "loss": 1.479, "step": 3595 }, { "epoch": 0.575452072331573, "grad_norm": 0.330439031124115, "learning_rate": 5e-05, "loss": 1.4936, "step": 3596 }, { "epoch": 0.5756120979356697, "grad_norm": 0.3415486514568329, "learning_rate": 5e-05, "loss": 1.5478, "step": 3597 }, { "epoch": 0.5757721235397664, "grad_norm": 0.3458402156829834, "learning_rate": 5e-05, "loss": 1.5931, "step": 3598 }, { "epoch": 0.575932149143863, "grad_norm": 0.33714917302131653, "learning_rate": 5e-05, "loss": 1.4957, "step": 3599 }, { "epoch": 0.5760921747479597, "grad_norm": 0.32647889852523804, "learning_rate": 5e-05, "loss": 1.4471, "step": 3600 }, { "epoch": 0.5762522003520564, "grad_norm": 0.32426726818084717, "learning_rate": 5e-05, "loss": 1.4489, "step": 3601 }, { "epoch": 0.5764122259561529, "grad_norm": 0.34025493264198303, "learning_rate": 5e-05, "loss": 1.5862, "step": 3602 }, { "epoch": 0.5765722515602496, "grad_norm": 0.3430715799331665, "learning_rate": 5e-05, "loss": 1.5501, "step": 3603 }, { "epoch": 0.5767322771643463, "grad_norm": 0.34468650817871094, "learning_rate": 5e-05, "loss": 1.509, "step": 3604 }, { "epoch": 0.5768923027684429, "grad_norm": 0.34390056133270264, "learning_rate": 5e-05, "loss": 1.5828, "step": 3605 }, { "epoch": 0.5770523283725396, "grad_norm": 0.3336295187473297, "learning_rate": 5e-05, "loss": 1.5346, "step": 3606 }, { "epoch": 0.5772123539766363, "grad_norm": 0.33329781889915466, "learning_rate": 5e-05, "loss": 1.541, "step": 3607 }, { "epoch": 0.577372379580733, "grad_norm": 0.3313048183917999, "learning_rate": 5e-05, "loss": 1.5198, "step": 3608 }, { "epoch": 0.5775324051848296, "grad_norm": 0.3292047083377838, "learning_rate": 5e-05, "loss": 1.5334, "step": 3609 }, { "epoch": 0.5776924307889262, "grad_norm": 0.3251918852329254, "learning_rate": 5e-05, "loss": 1.4356, "step": 3610 }, { "epoch": 0.5778524563930229, "grad_norm": 0.334023654460907, "learning_rate": 5e-05, "loss": 1.5744, "step": 3611 }, { "epoch": 0.5780124819971195, "grad_norm": 0.32090502977371216, "learning_rate": 5e-05, "loss": 1.4577, "step": 3612 }, { "epoch": 0.5781725076012162, "grad_norm": 0.33085110783576965, "learning_rate": 5e-05, "loss": 1.4583, "step": 3613 }, { "epoch": 0.5783325332053129, "grad_norm": 0.3399093747138977, "learning_rate": 5e-05, "loss": 1.4697, "step": 3614 }, { "epoch": 0.5784925588094095, "grad_norm": 0.3223878741264343, "learning_rate": 5e-05, "loss": 1.514, "step": 3615 }, { "epoch": 0.5786525844135062, "grad_norm": 0.331816703081131, "learning_rate": 5e-05, "loss": 1.4857, "step": 3616 }, { "epoch": 0.5788126100176029, "grad_norm": 0.33864790201187134, "learning_rate": 5e-05, "loss": 1.5236, "step": 3617 }, { "epoch": 0.5789726356216994, "grad_norm": 0.3305520713329315, "learning_rate": 5e-05, "loss": 1.468, "step": 3618 }, { "epoch": 0.5791326612257961, "grad_norm": 0.3250275254249573, "learning_rate": 5e-05, "loss": 1.4594, "step": 3619 }, { "epoch": 0.5792926868298928, "grad_norm": 0.323312908411026, "learning_rate": 5e-05, "loss": 1.4128, "step": 3620 }, { "epoch": 0.5794527124339894, "grad_norm": 0.34194228053092957, "learning_rate": 5e-05, "loss": 1.5182, "step": 3621 }, { "epoch": 0.5796127380380861, "grad_norm": 0.3313743472099304, "learning_rate": 5e-05, "loss": 1.4964, "step": 3622 }, { "epoch": 0.5797727636421828, "grad_norm": 0.34830954670906067, "learning_rate": 5e-05, "loss": 1.5239, "step": 3623 }, { "epoch": 0.5799327892462794, "grad_norm": 0.3159560263156891, "learning_rate": 5e-05, "loss": 1.3817, "step": 3624 }, { "epoch": 0.5800928148503761, "grad_norm": 0.3209897577762604, "learning_rate": 5e-05, "loss": 1.4568, "step": 3625 }, { "epoch": 0.5802528404544727, "grad_norm": 0.32365134358406067, "learning_rate": 5e-05, "loss": 1.4529, "step": 3626 }, { "epoch": 0.5804128660585693, "grad_norm": 0.33953437209129333, "learning_rate": 5e-05, "loss": 1.4673, "step": 3627 }, { "epoch": 0.580572891662666, "grad_norm": 0.3171996772289276, "learning_rate": 5e-05, "loss": 1.4395, "step": 3628 }, { "epoch": 0.5807329172667627, "grad_norm": 0.3372490704059601, "learning_rate": 5e-05, "loss": 1.5467, "step": 3629 }, { "epoch": 0.5808929428708594, "grad_norm": 0.3242195248603821, "learning_rate": 5e-05, "loss": 1.454, "step": 3630 }, { "epoch": 0.581052968474956, "grad_norm": 0.3316873013973236, "learning_rate": 5e-05, "loss": 1.5144, "step": 3631 }, { "epoch": 0.5812129940790527, "grad_norm": 0.3333560824394226, "learning_rate": 5e-05, "loss": 1.5423, "step": 3632 }, { "epoch": 0.5813730196831493, "grad_norm": 0.3404422998428345, "learning_rate": 5e-05, "loss": 1.5149, "step": 3633 }, { "epoch": 0.5815330452872459, "grad_norm": 0.3297191560268402, "learning_rate": 5e-05, "loss": 1.4908, "step": 3634 }, { "epoch": 0.5816930708913426, "grad_norm": 0.33073872327804565, "learning_rate": 5e-05, "loss": 1.5583, "step": 3635 }, { "epoch": 0.5818530964954393, "grad_norm": 0.35302942991256714, "learning_rate": 5e-05, "loss": 1.5773, "step": 3636 }, { "epoch": 0.5820131220995359, "grad_norm": 0.3517844080924988, "learning_rate": 5e-05, "loss": 1.4465, "step": 3637 }, { "epoch": 0.5821731477036326, "grad_norm": 0.3291088044643402, "learning_rate": 5e-05, "loss": 1.4909, "step": 3638 }, { "epoch": 0.5823331733077293, "grad_norm": 0.32745909690856934, "learning_rate": 5e-05, "loss": 1.4294, "step": 3639 }, { "epoch": 0.5824931989118259, "grad_norm": 0.3536793291568756, "learning_rate": 5e-05, "loss": 1.5343, "step": 3640 }, { "epoch": 0.5826532245159225, "grad_norm": 0.3296791613101959, "learning_rate": 5e-05, "loss": 1.4854, "step": 3641 }, { "epoch": 0.5828132501200192, "grad_norm": 0.32799115777015686, "learning_rate": 5e-05, "loss": 1.5442, "step": 3642 }, { "epoch": 0.5829732757241158, "grad_norm": 0.3344719707965851, "learning_rate": 5e-05, "loss": 1.4928, "step": 3643 }, { "epoch": 0.5831333013282125, "grad_norm": 0.3264632523059845, "learning_rate": 5e-05, "loss": 1.5187, "step": 3644 }, { "epoch": 0.5832933269323092, "grad_norm": 0.3271355926990509, "learning_rate": 5e-05, "loss": 1.5564, "step": 3645 }, { "epoch": 0.5834533525364058, "grad_norm": 0.34366655349731445, "learning_rate": 5e-05, "loss": 1.5095, "step": 3646 }, { "epoch": 0.5836133781405025, "grad_norm": 0.3337094485759735, "learning_rate": 5e-05, "loss": 1.4273, "step": 3647 }, { "epoch": 0.5837734037445992, "grad_norm": 0.3351392447948456, "learning_rate": 5e-05, "loss": 1.5974, "step": 3648 }, { "epoch": 0.5839334293486957, "grad_norm": 0.319821298122406, "learning_rate": 5e-05, "loss": 1.4807, "step": 3649 }, { "epoch": 0.5840934549527924, "grad_norm": 0.33414456248283386, "learning_rate": 5e-05, "loss": 1.4933, "step": 3650 }, { "epoch": 0.5842534805568891, "grad_norm": 0.33294954895973206, "learning_rate": 5e-05, "loss": 1.4819, "step": 3651 }, { "epoch": 0.5844135061609858, "grad_norm": 0.3275805115699768, "learning_rate": 5e-05, "loss": 1.4269, "step": 3652 }, { "epoch": 0.5845735317650824, "grad_norm": 0.3291231095790863, "learning_rate": 5e-05, "loss": 1.423, "step": 3653 }, { "epoch": 0.5847335573691791, "grad_norm": 0.32276397943496704, "learning_rate": 5e-05, "loss": 1.4528, "step": 3654 }, { "epoch": 0.5848935829732758, "grad_norm": 0.3397728502750397, "learning_rate": 5e-05, "loss": 1.4901, "step": 3655 }, { "epoch": 0.5850536085773724, "grad_norm": 0.33446556329727173, "learning_rate": 5e-05, "loss": 1.4972, "step": 3656 }, { "epoch": 0.585213634181469, "grad_norm": 0.3316955864429474, "learning_rate": 5e-05, "loss": 1.4489, "step": 3657 }, { "epoch": 0.5853736597855657, "grad_norm": 0.33453646302223206, "learning_rate": 5e-05, "loss": 1.4921, "step": 3658 }, { "epoch": 0.5855336853896623, "grad_norm": 0.3313565254211426, "learning_rate": 5e-05, "loss": 1.5364, "step": 3659 }, { "epoch": 0.585693710993759, "grad_norm": 0.31842827796936035, "learning_rate": 5e-05, "loss": 1.4518, "step": 3660 }, { "epoch": 0.5858537365978557, "grad_norm": 0.34416458010673523, "learning_rate": 5e-05, "loss": 1.538, "step": 3661 }, { "epoch": 0.5860137622019523, "grad_norm": 0.341431587934494, "learning_rate": 5e-05, "loss": 1.5054, "step": 3662 }, { "epoch": 0.586173787806049, "grad_norm": 0.33085954189300537, "learning_rate": 5e-05, "loss": 1.5163, "step": 3663 }, { "epoch": 0.5863338134101457, "grad_norm": 0.32411086559295654, "learning_rate": 5e-05, "loss": 1.4706, "step": 3664 }, { "epoch": 0.5864938390142422, "grad_norm": 0.3261909484863281, "learning_rate": 5e-05, "loss": 1.4735, "step": 3665 }, { "epoch": 0.5866538646183389, "grad_norm": 0.3290809988975525, "learning_rate": 5e-05, "loss": 1.4559, "step": 3666 }, { "epoch": 0.5868138902224356, "grad_norm": 0.33938318490982056, "learning_rate": 5e-05, "loss": 1.4903, "step": 3667 }, { "epoch": 0.5869739158265322, "grad_norm": 0.3207468092441559, "learning_rate": 5e-05, "loss": 1.4604, "step": 3668 }, { "epoch": 0.5871339414306289, "grad_norm": 0.32467368245124817, "learning_rate": 5e-05, "loss": 1.4336, "step": 3669 }, { "epoch": 0.5872939670347256, "grad_norm": 0.33421051502227783, "learning_rate": 5e-05, "loss": 1.4912, "step": 3670 }, { "epoch": 0.5874539926388223, "grad_norm": 0.3330289423465729, "learning_rate": 5e-05, "loss": 1.5555, "step": 3671 }, { "epoch": 0.5876140182429188, "grad_norm": 0.3258289098739624, "learning_rate": 5e-05, "loss": 1.5429, "step": 3672 }, { "epoch": 0.5877740438470155, "grad_norm": 0.3194755017757416, "learning_rate": 5e-05, "loss": 1.4703, "step": 3673 }, { "epoch": 0.5879340694511122, "grad_norm": 0.3293653726577759, "learning_rate": 5e-05, "loss": 1.5181, "step": 3674 }, { "epoch": 0.5880940950552088, "grad_norm": 0.3239315450191498, "learning_rate": 5e-05, "loss": 1.4721, "step": 3675 }, { "epoch": 0.5882541206593055, "grad_norm": 0.3242948055267334, "learning_rate": 5e-05, "loss": 1.4166, "step": 3676 }, { "epoch": 0.5884141462634022, "grad_norm": 0.345174103975296, "learning_rate": 5e-05, "loss": 1.5224, "step": 3677 }, { "epoch": 0.5885741718674988, "grad_norm": 0.34471526741981506, "learning_rate": 5e-05, "loss": 1.4993, "step": 3678 }, { "epoch": 0.5887341974715955, "grad_norm": 0.3236566483974457, "learning_rate": 5e-05, "loss": 1.469, "step": 3679 }, { "epoch": 0.5888942230756921, "grad_norm": 0.3499841094017029, "learning_rate": 5e-05, "loss": 1.4803, "step": 3680 }, { "epoch": 0.5890542486797887, "grad_norm": 0.33926063776016235, "learning_rate": 5e-05, "loss": 1.4321, "step": 3681 }, { "epoch": 0.5892142742838854, "grad_norm": 0.33402615785598755, "learning_rate": 5e-05, "loss": 1.543, "step": 3682 }, { "epoch": 0.5893742998879821, "grad_norm": 0.3455529808998108, "learning_rate": 5e-05, "loss": 1.5191, "step": 3683 }, { "epoch": 0.5895343254920787, "grad_norm": 0.3421160578727722, "learning_rate": 5e-05, "loss": 1.4792, "step": 3684 }, { "epoch": 0.5896943510961754, "grad_norm": 0.3194412887096405, "learning_rate": 5e-05, "loss": 1.4104, "step": 3685 }, { "epoch": 0.5898543767002721, "grad_norm": 0.3494803309440613, "learning_rate": 5e-05, "loss": 1.4702, "step": 3686 }, { "epoch": 0.5900144023043687, "grad_norm": 0.3483862578868866, "learning_rate": 5e-05, "loss": 1.5063, "step": 3687 }, { "epoch": 0.5901744279084653, "grad_norm": 0.34799864888191223, "learning_rate": 5e-05, "loss": 1.4574, "step": 3688 }, { "epoch": 0.590334453512562, "grad_norm": 0.3426370918750763, "learning_rate": 5e-05, "loss": 1.5402, "step": 3689 }, { "epoch": 0.5904944791166586, "grad_norm": 0.34436002373695374, "learning_rate": 5e-05, "loss": 1.4865, "step": 3690 }, { "epoch": 0.5906545047207553, "grad_norm": 0.31878164410591125, "learning_rate": 5e-05, "loss": 1.4493, "step": 3691 }, { "epoch": 0.590814530324852, "grad_norm": 0.322559118270874, "learning_rate": 5e-05, "loss": 1.4504, "step": 3692 }, { "epoch": 0.5909745559289487, "grad_norm": 0.3317222595214844, "learning_rate": 5e-05, "loss": 1.4627, "step": 3693 }, { "epoch": 0.5911345815330453, "grad_norm": 0.3464432954788208, "learning_rate": 5e-05, "loss": 1.5412, "step": 3694 }, { "epoch": 0.591294607137142, "grad_norm": 0.3309410512447357, "learning_rate": 5e-05, "loss": 1.4762, "step": 3695 }, { "epoch": 0.5914546327412386, "grad_norm": 0.32848384976387024, "learning_rate": 5e-05, "loss": 1.4272, "step": 3696 }, { "epoch": 0.5916146583453352, "grad_norm": 0.33122649788856506, "learning_rate": 5e-05, "loss": 1.5216, "step": 3697 }, { "epoch": 0.5917746839494319, "grad_norm": 0.34627196192741394, "learning_rate": 5e-05, "loss": 1.5622, "step": 3698 }, { "epoch": 0.5919347095535286, "grad_norm": 0.34049129486083984, "learning_rate": 5e-05, "loss": 1.4938, "step": 3699 }, { "epoch": 0.5920947351576252, "grad_norm": 0.33826330304145813, "learning_rate": 5e-05, "loss": 1.4867, "step": 3700 }, { "epoch": 0.5922547607617219, "grad_norm": 0.3385780155658722, "learning_rate": 5e-05, "loss": 1.522, "step": 3701 }, { "epoch": 0.5924147863658186, "grad_norm": 0.33784058690071106, "learning_rate": 5e-05, "loss": 1.4633, "step": 3702 }, { "epoch": 0.5925748119699152, "grad_norm": 0.3377549350261688, "learning_rate": 5e-05, "loss": 1.3859, "step": 3703 }, { "epoch": 0.5927348375740118, "grad_norm": 0.3390604257583618, "learning_rate": 5e-05, "loss": 1.4783, "step": 3704 }, { "epoch": 0.5928948631781085, "grad_norm": 0.33594372868537903, "learning_rate": 5e-05, "loss": 1.4781, "step": 3705 }, { "epoch": 0.5930548887822051, "grad_norm": 0.3441244661808014, "learning_rate": 5e-05, "loss": 1.5, "step": 3706 }, { "epoch": 0.5932149143863018, "grad_norm": 0.33237114548683167, "learning_rate": 5e-05, "loss": 1.4935, "step": 3707 }, { "epoch": 0.5933749399903985, "grad_norm": 0.32647189497947693, "learning_rate": 5e-05, "loss": 1.4367, "step": 3708 }, { "epoch": 0.5935349655944951, "grad_norm": 0.32274872064590454, "learning_rate": 5e-05, "loss": 1.4395, "step": 3709 }, { "epoch": 0.5936949911985918, "grad_norm": 0.32576507329940796, "learning_rate": 5e-05, "loss": 1.4632, "step": 3710 }, { "epoch": 0.5938550168026885, "grad_norm": 0.3450562059879303, "learning_rate": 5e-05, "loss": 1.4603, "step": 3711 }, { "epoch": 0.594015042406785, "grad_norm": 0.3225429058074951, "learning_rate": 5e-05, "loss": 1.5265, "step": 3712 }, { "epoch": 0.5941750680108817, "grad_norm": 0.3236158788204193, "learning_rate": 5e-05, "loss": 1.4601, "step": 3713 }, { "epoch": 0.5943350936149784, "grad_norm": 0.34236589074134827, "learning_rate": 5e-05, "loss": 1.5034, "step": 3714 }, { "epoch": 0.594495119219075, "grad_norm": 0.3299331068992615, "learning_rate": 5e-05, "loss": 1.4901, "step": 3715 }, { "epoch": 0.5946551448231717, "grad_norm": 0.34156712889671326, "learning_rate": 5e-05, "loss": 1.5114, "step": 3716 }, { "epoch": 0.5948151704272684, "grad_norm": 0.33136987686157227, "learning_rate": 5e-05, "loss": 1.4665, "step": 3717 }, { "epoch": 0.5949751960313651, "grad_norm": 0.3415651023387909, "learning_rate": 5e-05, "loss": 1.5791, "step": 3718 }, { "epoch": 0.5951352216354616, "grad_norm": 0.3563084602355957, "learning_rate": 5e-05, "loss": 1.5341, "step": 3719 }, { "epoch": 0.5952952472395583, "grad_norm": 0.32968249917030334, "learning_rate": 5e-05, "loss": 1.4837, "step": 3720 }, { "epoch": 0.595455272843655, "grad_norm": 0.33696991205215454, "learning_rate": 5e-05, "loss": 1.5819, "step": 3721 }, { "epoch": 0.5956152984477516, "grad_norm": 0.33373114466667175, "learning_rate": 5e-05, "loss": 1.5373, "step": 3722 }, { "epoch": 0.5957753240518483, "grad_norm": 0.32624611258506775, "learning_rate": 5e-05, "loss": 1.4519, "step": 3723 }, { "epoch": 0.595935349655945, "grad_norm": 0.32251986861228943, "learning_rate": 5e-05, "loss": 1.3678, "step": 3724 }, { "epoch": 0.5960953752600416, "grad_norm": 0.31843340396881104, "learning_rate": 5e-05, "loss": 1.422, "step": 3725 }, { "epoch": 0.5962554008641383, "grad_norm": 0.3289612829685211, "learning_rate": 5e-05, "loss": 1.4626, "step": 3726 }, { "epoch": 0.5964154264682349, "grad_norm": 0.3495955765247345, "learning_rate": 5e-05, "loss": 1.5071, "step": 3727 }, { "epoch": 0.5965754520723315, "grad_norm": 0.35381853580474854, "learning_rate": 5e-05, "loss": 1.5551, "step": 3728 }, { "epoch": 0.5967354776764282, "grad_norm": 0.34112268686294556, "learning_rate": 5e-05, "loss": 1.5157, "step": 3729 }, { "epoch": 0.5968955032805249, "grad_norm": 0.3370564877986908, "learning_rate": 5e-05, "loss": 1.525, "step": 3730 }, { "epoch": 0.5970555288846215, "grad_norm": 0.3269806504249573, "learning_rate": 5e-05, "loss": 1.4944, "step": 3731 }, { "epoch": 0.5972155544887182, "grad_norm": 0.32856494188308716, "learning_rate": 5e-05, "loss": 1.5063, "step": 3732 }, { "epoch": 0.5973755800928149, "grad_norm": 0.34519344568252563, "learning_rate": 5e-05, "loss": 1.5531, "step": 3733 }, { "epoch": 0.5975356056969116, "grad_norm": 0.3477029800415039, "learning_rate": 5e-05, "loss": 1.6037, "step": 3734 }, { "epoch": 0.5976956313010081, "grad_norm": 0.3415643274784088, "learning_rate": 5e-05, "loss": 1.478, "step": 3735 }, { "epoch": 0.5978556569051048, "grad_norm": 0.3242512047290802, "learning_rate": 5e-05, "loss": 1.458, "step": 3736 }, { "epoch": 0.5980156825092015, "grad_norm": 0.3303132951259613, "learning_rate": 5e-05, "loss": 1.5208, "step": 3737 }, { "epoch": 0.5981757081132981, "grad_norm": 0.3308851718902588, "learning_rate": 5e-05, "loss": 1.4451, "step": 3738 }, { "epoch": 0.5983357337173948, "grad_norm": 0.3545796871185303, "learning_rate": 5e-05, "loss": 1.5733, "step": 3739 }, { "epoch": 0.5984957593214915, "grad_norm": 0.34401804208755493, "learning_rate": 5e-05, "loss": 1.544, "step": 3740 }, { "epoch": 0.5986557849255881, "grad_norm": 0.34189870953559875, "learning_rate": 5e-05, "loss": 1.545, "step": 3741 }, { "epoch": 0.5988158105296848, "grad_norm": 0.34099867939949036, "learning_rate": 5e-05, "loss": 1.5768, "step": 3742 }, { "epoch": 0.5989758361337814, "grad_norm": 0.34047931432724, "learning_rate": 5e-05, "loss": 1.4929, "step": 3743 }, { "epoch": 0.599135861737878, "grad_norm": 0.3427598178386688, "learning_rate": 5e-05, "loss": 1.551, "step": 3744 }, { "epoch": 0.5992958873419747, "grad_norm": 0.31637492775917053, "learning_rate": 5e-05, "loss": 1.4639, "step": 3745 }, { "epoch": 0.5994559129460714, "grad_norm": 0.3372688889503479, "learning_rate": 5e-05, "loss": 1.5055, "step": 3746 }, { "epoch": 0.599615938550168, "grad_norm": 0.3410218060016632, "learning_rate": 5e-05, "loss": 1.4897, "step": 3747 }, { "epoch": 0.5997759641542647, "grad_norm": 0.32779625058174133, "learning_rate": 5e-05, "loss": 1.433, "step": 3748 }, { "epoch": 0.5999359897583614, "grad_norm": 0.338044136762619, "learning_rate": 5e-05, "loss": 1.4977, "step": 3749 }, { "epoch": 0.600096015362458, "grad_norm": 0.3182641267776489, "learning_rate": 5e-05, "loss": 1.4428, "step": 3750 }, { "epoch": 0.6002560409665546, "grad_norm": 0.3363727033138275, "learning_rate": 5e-05, "loss": 1.4816, "step": 3751 }, { "epoch": 0.6004160665706513, "grad_norm": 0.35120999813079834, "learning_rate": 5e-05, "loss": 1.5274, "step": 3752 }, { "epoch": 0.6005760921747479, "grad_norm": 0.3411032557487488, "learning_rate": 5e-05, "loss": 1.5336, "step": 3753 }, { "epoch": 0.6007361177788446, "grad_norm": 0.33020010590553284, "learning_rate": 5e-05, "loss": 1.4137, "step": 3754 }, { "epoch": 0.6008961433829413, "grad_norm": 0.3465379774570465, "learning_rate": 5e-05, "loss": 1.5979, "step": 3755 }, { "epoch": 0.601056168987038, "grad_norm": 0.324796199798584, "learning_rate": 5e-05, "loss": 1.4046, "step": 3756 }, { "epoch": 0.6012161945911346, "grad_norm": 0.34822696447372437, "learning_rate": 5e-05, "loss": 1.5997, "step": 3757 }, { "epoch": 0.6013762201952313, "grad_norm": 0.3363582491874695, "learning_rate": 5e-05, "loss": 1.4922, "step": 3758 }, { "epoch": 0.6015362457993279, "grad_norm": 0.34314706921577454, "learning_rate": 5e-05, "loss": 1.4303, "step": 3759 }, { "epoch": 0.6016962714034245, "grad_norm": 0.34583580493927, "learning_rate": 5e-05, "loss": 1.5558, "step": 3760 }, { "epoch": 0.6018562970075212, "grad_norm": 0.3447376787662506, "learning_rate": 5e-05, "loss": 1.4701, "step": 3761 }, { "epoch": 0.6020163226116179, "grad_norm": 0.3343273103237152, "learning_rate": 5e-05, "loss": 1.5216, "step": 3762 }, { "epoch": 0.6021763482157145, "grad_norm": 0.33131757378578186, "learning_rate": 5e-05, "loss": 1.5068, "step": 3763 }, { "epoch": 0.6023363738198112, "grad_norm": 0.3478386104106903, "learning_rate": 5e-05, "loss": 1.4535, "step": 3764 }, { "epoch": 0.6024963994239079, "grad_norm": 0.3523014783859253, "learning_rate": 5e-05, "loss": 1.5157, "step": 3765 }, { "epoch": 0.6026564250280044, "grad_norm": 0.33180567622184753, "learning_rate": 5e-05, "loss": 1.4421, "step": 3766 }, { "epoch": 0.6028164506321011, "grad_norm": 0.3354893624782562, "learning_rate": 5e-05, "loss": 1.4871, "step": 3767 }, { "epoch": 0.6029764762361978, "grad_norm": 0.35503295063972473, "learning_rate": 5e-05, "loss": 1.5734, "step": 3768 }, { "epoch": 0.6031365018402944, "grad_norm": 0.3427967429161072, "learning_rate": 5e-05, "loss": 1.4832, "step": 3769 }, { "epoch": 0.6032965274443911, "grad_norm": 0.32320278882980347, "learning_rate": 5e-05, "loss": 1.4529, "step": 3770 }, { "epoch": 0.6034565530484878, "grad_norm": 0.34010475873947144, "learning_rate": 5e-05, "loss": 1.5188, "step": 3771 }, { "epoch": 0.6036165786525844, "grad_norm": 0.3293897211551666, "learning_rate": 5e-05, "loss": 1.5133, "step": 3772 }, { "epoch": 0.6037766042566811, "grad_norm": 0.32627132534980774, "learning_rate": 5e-05, "loss": 1.4835, "step": 3773 }, { "epoch": 0.6039366298607777, "grad_norm": 0.33172181248664856, "learning_rate": 5e-05, "loss": 1.5466, "step": 3774 }, { "epoch": 0.6040966554648743, "grad_norm": 0.3256419897079468, "learning_rate": 5e-05, "loss": 1.4991, "step": 3775 }, { "epoch": 0.604256681068971, "grad_norm": 0.32614272832870483, "learning_rate": 5e-05, "loss": 1.4842, "step": 3776 }, { "epoch": 0.6044167066730677, "grad_norm": 0.3345460891723633, "learning_rate": 5e-05, "loss": 1.4701, "step": 3777 }, { "epoch": 0.6045767322771644, "grad_norm": 0.32876020669937134, "learning_rate": 5e-05, "loss": 1.4398, "step": 3778 }, { "epoch": 0.604736757881261, "grad_norm": 0.3487277328968048, "learning_rate": 5e-05, "loss": 1.5725, "step": 3779 }, { "epoch": 0.6048967834853577, "grad_norm": 0.3397892713546753, "learning_rate": 5e-05, "loss": 1.5054, "step": 3780 }, { "epoch": 0.6050568090894544, "grad_norm": 0.340154767036438, "learning_rate": 5e-05, "loss": 1.4874, "step": 3781 }, { "epoch": 0.6052168346935509, "grad_norm": 0.3387327194213867, "learning_rate": 5e-05, "loss": 1.4754, "step": 3782 }, { "epoch": 0.6053768602976476, "grad_norm": 0.32650429010391235, "learning_rate": 5e-05, "loss": 1.4323, "step": 3783 }, { "epoch": 0.6055368859017443, "grad_norm": 0.3488690257072449, "learning_rate": 5e-05, "loss": 1.5491, "step": 3784 }, { "epoch": 0.6056969115058409, "grad_norm": 0.3209966719150543, "learning_rate": 5e-05, "loss": 1.3675, "step": 3785 }, { "epoch": 0.6058569371099376, "grad_norm": 0.34037742018699646, "learning_rate": 5e-05, "loss": 1.5521, "step": 3786 }, { "epoch": 0.6060169627140343, "grad_norm": 0.3330674171447754, "learning_rate": 5e-05, "loss": 1.5109, "step": 3787 }, { "epoch": 0.6061769883181309, "grad_norm": 0.32800647616386414, "learning_rate": 5e-05, "loss": 1.4657, "step": 3788 }, { "epoch": 0.6063370139222276, "grad_norm": 0.35303226113319397, "learning_rate": 5e-05, "loss": 1.5777, "step": 3789 }, { "epoch": 0.6064970395263242, "grad_norm": 0.3335198163986206, "learning_rate": 5e-05, "loss": 1.4865, "step": 3790 }, { "epoch": 0.6066570651304208, "grad_norm": 0.3234965205192566, "learning_rate": 5e-05, "loss": 1.4479, "step": 3791 }, { "epoch": 0.6068170907345175, "grad_norm": 0.33594799041748047, "learning_rate": 5e-05, "loss": 1.487, "step": 3792 }, { "epoch": 0.6069771163386142, "grad_norm": 0.3317270576953888, "learning_rate": 5e-05, "loss": 1.4617, "step": 3793 }, { "epoch": 0.6071371419427108, "grad_norm": 0.33819323778152466, "learning_rate": 5e-05, "loss": 1.4932, "step": 3794 }, { "epoch": 0.6072971675468075, "grad_norm": 0.32937905192375183, "learning_rate": 5e-05, "loss": 1.5138, "step": 3795 }, { "epoch": 0.6074571931509042, "grad_norm": 0.32958686351776123, "learning_rate": 5e-05, "loss": 1.5063, "step": 3796 }, { "epoch": 0.6076172187550009, "grad_norm": 0.34820419549942017, "learning_rate": 5e-05, "loss": 1.4725, "step": 3797 }, { "epoch": 0.6077772443590974, "grad_norm": 0.33938121795654297, "learning_rate": 5e-05, "loss": 1.4748, "step": 3798 }, { "epoch": 0.6079372699631941, "grad_norm": 0.325112521648407, "learning_rate": 5e-05, "loss": 1.4354, "step": 3799 }, { "epoch": 0.6080972955672908, "grad_norm": 0.3329426348209381, "learning_rate": 5e-05, "loss": 1.4525, "step": 3800 }, { "epoch": 0.6082573211713874, "grad_norm": 0.3432607054710388, "learning_rate": 5e-05, "loss": 1.5154, "step": 3801 }, { "epoch": 0.6084173467754841, "grad_norm": 0.3398005962371826, "learning_rate": 5e-05, "loss": 1.5392, "step": 3802 }, { "epoch": 0.6085773723795808, "grad_norm": 0.3429766595363617, "learning_rate": 5e-05, "loss": 1.487, "step": 3803 }, { "epoch": 0.6087373979836774, "grad_norm": 0.33880138397216797, "learning_rate": 5e-05, "loss": 1.5233, "step": 3804 }, { "epoch": 0.608897423587774, "grad_norm": 0.34501901268959045, "learning_rate": 5e-05, "loss": 1.5441, "step": 3805 }, { "epoch": 0.6090574491918707, "grad_norm": 0.34233608841896057, "learning_rate": 5e-05, "loss": 1.5109, "step": 3806 }, { "epoch": 0.6092174747959673, "grad_norm": 0.34552398324012756, "learning_rate": 5e-05, "loss": 1.4616, "step": 3807 }, { "epoch": 0.609377500400064, "grad_norm": 0.33082181215286255, "learning_rate": 5e-05, "loss": 1.5154, "step": 3808 }, { "epoch": 0.6095375260041607, "grad_norm": 0.34567344188690186, "learning_rate": 5e-05, "loss": 1.4836, "step": 3809 }, { "epoch": 0.6096975516082573, "grad_norm": 0.3306031823158264, "learning_rate": 5e-05, "loss": 1.435, "step": 3810 }, { "epoch": 0.609857577212354, "grad_norm": 0.3377089202404022, "learning_rate": 5e-05, "loss": 1.4801, "step": 3811 }, { "epoch": 0.6100176028164507, "grad_norm": 0.3304491639137268, "learning_rate": 5e-05, "loss": 1.4555, "step": 3812 }, { "epoch": 0.6101776284205472, "grad_norm": 0.33773043751716614, "learning_rate": 5e-05, "loss": 1.5036, "step": 3813 }, { "epoch": 0.6103376540246439, "grad_norm": 0.3466084599494934, "learning_rate": 5e-05, "loss": 1.4996, "step": 3814 }, { "epoch": 0.6104976796287406, "grad_norm": 0.32995811104774475, "learning_rate": 5e-05, "loss": 1.4581, "step": 3815 }, { "epoch": 0.6106577052328372, "grad_norm": 0.33925360441207886, "learning_rate": 5e-05, "loss": 1.4843, "step": 3816 }, { "epoch": 0.6108177308369339, "grad_norm": 0.33315300941467285, "learning_rate": 5e-05, "loss": 1.4964, "step": 3817 }, { "epoch": 0.6109777564410306, "grad_norm": 0.3295387029647827, "learning_rate": 5e-05, "loss": 1.4019, "step": 3818 }, { "epoch": 0.6111377820451273, "grad_norm": 0.3420364558696747, "learning_rate": 5e-05, "loss": 1.4831, "step": 3819 }, { "epoch": 0.6112978076492239, "grad_norm": 0.33758023381233215, "learning_rate": 5e-05, "loss": 1.4634, "step": 3820 }, { "epoch": 0.6114578332533205, "grad_norm": 0.33037877082824707, "learning_rate": 5e-05, "loss": 1.5169, "step": 3821 }, { "epoch": 0.6116178588574172, "grad_norm": 0.3223680257797241, "learning_rate": 5e-05, "loss": 1.4285, "step": 3822 }, { "epoch": 0.6117778844615138, "grad_norm": 0.35131293535232544, "learning_rate": 5e-05, "loss": 1.5259, "step": 3823 }, { "epoch": 0.6119379100656105, "grad_norm": 0.33952009677886963, "learning_rate": 5e-05, "loss": 1.4727, "step": 3824 }, { "epoch": 0.6120979356697072, "grad_norm": 0.34007853269577026, "learning_rate": 5e-05, "loss": 1.3604, "step": 3825 }, { "epoch": 0.6122579612738038, "grad_norm": 0.33405745029449463, "learning_rate": 5e-05, "loss": 1.4595, "step": 3826 }, { "epoch": 0.6124179868779005, "grad_norm": 0.33873945474624634, "learning_rate": 5e-05, "loss": 1.4463, "step": 3827 }, { "epoch": 0.6125780124819972, "grad_norm": 0.3455680310726166, "learning_rate": 5e-05, "loss": 1.5156, "step": 3828 }, { "epoch": 0.6127380380860937, "grad_norm": 0.33862510323524475, "learning_rate": 5e-05, "loss": 1.4919, "step": 3829 }, { "epoch": 0.6128980636901904, "grad_norm": 0.34471988677978516, "learning_rate": 5e-05, "loss": 1.4609, "step": 3830 }, { "epoch": 0.6130580892942871, "grad_norm": 0.34561532735824585, "learning_rate": 5e-05, "loss": 1.4448, "step": 3831 }, { "epoch": 0.6132181148983837, "grad_norm": 0.32716530561447144, "learning_rate": 5e-05, "loss": 1.4685, "step": 3832 }, { "epoch": 0.6133781405024804, "grad_norm": 0.34136927127838135, "learning_rate": 5e-05, "loss": 1.5507, "step": 3833 }, { "epoch": 0.6135381661065771, "grad_norm": 0.32774490118026733, "learning_rate": 5e-05, "loss": 1.419, "step": 3834 }, { "epoch": 0.6136981917106737, "grad_norm": 0.33289405703544617, "learning_rate": 5e-05, "loss": 1.4765, "step": 3835 }, { "epoch": 0.6138582173147704, "grad_norm": 0.33695584535598755, "learning_rate": 5e-05, "loss": 1.5092, "step": 3836 }, { "epoch": 0.614018242918867, "grad_norm": 0.33900243043899536, "learning_rate": 5e-05, "loss": 1.4296, "step": 3837 }, { "epoch": 0.6141782685229636, "grad_norm": 0.3384841978549957, "learning_rate": 5e-05, "loss": 1.5199, "step": 3838 }, { "epoch": 0.6143382941270603, "grad_norm": 0.329486608505249, "learning_rate": 5e-05, "loss": 1.5481, "step": 3839 }, { "epoch": 0.614498319731157, "grad_norm": 0.3438698947429657, "learning_rate": 5e-05, "loss": 1.5244, "step": 3840 }, { "epoch": 0.6146583453352537, "grad_norm": 0.3337792158126831, "learning_rate": 5e-05, "loss": 1.4439, "step": 3841 }, { "epoch": 0.6148183709393503, "grad_norm": 0.33399349451065063, "learning_rate": 5e-05, "loss": 1.47, "step": 3842 }, { "epoch": 0.614978396543447, "grad_norm": 0.32748401165008545, "learning_rate": 5e-05, "loss": 1.5064, "step": 3843 }, { "epoch": 0.6151384221475437, "grad_norm": 0.33989572525024414, "learning_rate": 5e-05, "loss": 1.5185, "step": 3844 }, { "epoch": 0.6152984477516402, "grad_norm": 0.3376239240169525, "learning_rate": 5e-05, "loss": 1.5226, "step": 3845 }, { "epoch": 0.6154584733557369, "grad_norm": 0.3364550471305847, "learning_rate": 5e-05, "loss": 1.465, "step": 3846 }, { "epoch": 0.6156184989598336, "grad_norm": 0.3437991142272949, "learning_rate": 5e-05, "loss": 1.5319, "step": 3847 }, { "epoch": 0.6157785245639302, "grad_norm": 0.35439372062683105, "learning_rate": 5e-05, "loss": 1.5071, "step": 3848 }, { "epoch": 0.6159385501680269, "grad_norm": 0.32071954011917114, "learning_rate": 5e-05, "loss": 1.4222, "step": 3849 }, { "epoch": 0.6160985757721236, "grad_norm": 0.3380834460258484, "learning_rate": 5e-05, "loss": 1.5157, "step": 3850 }, { "epoch": 0.6162586013762202, "grad_norm": 0.34858348965644836, "learning_rate": 5e-05, "loss": 1.4913, "step": 3851 }, { "epoch": 0.6164186269803168, "grad_norm": 0.3456399738788605, "learning_rate": 5e-05, "loss": 1.5453, "step": 3852 }, { "epoch": 0.6165786525844135, "grad_norm": 0.33331209421157837, "learning_rate": 5e-05, "loss": 1.486, "step": 3853 }, { "epoch": 0.6167386781885101, "grad_norm": 0.32862135767936707, "learning_rate": 5e-05, "loss": 1.4615, "step": 3854 }, { "epoch": 0.6168987037926068, "grad_norm": 0.3378203511238098, "learning_rate": 5e-05, "loss": 1.5038, "step": 3855 }, { "epoch": 0.6170587293967035, "grad_norm": 0.3345264792442322, "learning_rate": 5e-05, "loss": 1.5252, "step": 3856 }, { "epoch": 0.6172187550008001, "grad_norm": 0.33689072728157043, "learning_rate": 5e-05, "loss": 1.4697, "step": 3857 }, { "epoch": 0.6173787806048968, "grad_norm": 0.34930166602134705, "learning_rate": 5e-05, "loss": 1.5287, "step": 3858 }, { "epoch": 0.6175388062089935, "grad_norm": 0.3644125759601593, "learning_rate": 5e-05, "loss": 1.5445, "step": 3859 }, { "epoch": 0.61769883181309, "grad_norm": 0.327953577041626, "learning_rate": 5e-05, "loss": 1.4813, "step": 3860 }, { "epoch": 0.6178588574171867, "grad_norm": 0.3211265504360199, "learning_rate": 5e-05, "loss": 1.4893, "step": 3861 }, { "epoch": 0.6180188830212834, "grad_norm": 0.35845300555229187, "learning_rate": 5e-05, "loss": 1.4884, "step": 3862 }, { "epoch": 0.61817890862538, "grad_norm": 0.3306148946285248, "learning_rate": 5e-05, "loss": 1.5046, "step": 3863 }, { "epoch": 0.6183389342294767, "grad_norm": 0.33756890892982483, "learning_rate": 5e-05, "loss": 1.4488, "step": 3864 }, { "epoch": 0.6184989598335734, "grad_norm": 0.3312028646469116, "learning_rate": 5e-05, "loss": 1.4517, "step": 3865 }, { "epoch": 0.6186589854376701, "grad_norm": 0.3331134617328644, "learning_rate": 5e-05, "loss": 1.554, "step": 3866 }, { "epoch": 0.6188190110417667, "grad_norm": 0.33136337995529175, "learning_rate": 5e-05, "loss": 1.4432, "step": 3867 }, { "epoch": 0.6189790366458633, "grad_norm": 0.3290943503379822, "learning_rate": 5e-05, "loss": 1.4747, "step": 3868 }, { "epoch": 0.61913906224996, "grad_norm": 0.3260573744773865, "learning_rate": 5e-05, "loss": 1.5063, "step": 3869 }, { "epoch": 0.6192990878540566, "grad_norm": 0.3383466899394989, "learning_rate": 5e-05, "loss": 1.3931, "step": 3870 }, { "epoch": 0.6194591134581533, "grad_norm": 0.33449649810791016, "learning_rate": 5e-05, "loss": 1.4674, "step": 3871 }, { "epoch": 0.61961913906225, "grad_norm": 0.3471534550189972, "learning_rate": 5e-05, "loss": 1.4895, "step": 3872 }, { "epoch": 0.6197791646663466, "grad_norm": 0.3313058912754059, "learning_rate": 5e-05, "loss": 1.4946, "step": 3873 }, { "epoch": 0.6199391902704433, "grad_norm": 0.331498920917511, "learning_rate": 5e-05, "loss": 1.4941, "step": 3874 }, { "epoch": 0.62009921587454, "grad_norm": 0.33869510889053345, "learning_rate": 5e-05, "loss": 1.4823, "step": 3875 }, { "epoch": 0.6202592414786365, "grad_norm": 0.33016544580459595, "learning_rate": 5e-05, "loss": 1.5099, "step": 3876 }, { "epoch": 0.6204192670827332, "grad_norm": 0.32337531447410583, "learning_rate": 5e-05, "loss": 1.469, "step": 3877 }, { "epoch": 0.6205792926868299, "grad_norm": 0.33972790837287903, "learning_rate": 5e-05, "loss": 1.4084, "step": 3878 }, { "epoch": 0.6207393182909265, "grad_norm": 0.3315180540084839, "learning_rate": 5e-05, "loss": 1.4836, "step": 3879 }, { "epoch": 0.6208993438950232, "grad_norm": 0.32753080129623413, "learning_rate": 5e-05, "loss": 1.4335, "step": 3880 }, { "epoch": 0.6210593694991199, "grad_norm": 0.33087387681007385, "learning_rate": 5e-05, "loss": 1.4884, "step": 3881 }, { "epoch": 0.6212193951032166, "grad_norm": 0.3357695937156677, "learning_rate": 5e-05, "loss": 1.4678, "step": 3882 }, { "epoch": 0.6213794207073132, "grad_norm": 0.33625462651252747, "learning_rate": 5e-05, "loss": 1.5318, "step": 3883 }, { "epoch": 0.6215394463114098, "grad_norm": 0.34110769629478455, "learning_rate": 5e-05, "loss": 1.5161, "step": 3884 }, { "epoch": 0.6216994719155065, "grad_norm": 0.34054145216941833, "learning_rate": 5e-05, "loss": 1.4405, "step": 3885 }, { "epoch": 0.6218594975196031, "grad_norm": 0.33346959948539734, "learning_rate": 5e-05, "loss": 1.4967, "step": 3886 }, { "epoch": 0.6220195231236998, "grad_norm": 0.34076523780822754, "learning_rate": 5e-05, "loss": 1.5281, "step": 3887 }, { "epoch": 0.6221795487277965, "grad_norm": 0.3378116488456726, "learning_rate": 5e-05, "loss": 1.4312, "step": 3888 }, { "epoch": 0.6223395743318931, "grad_norm": 0.3428119421005249, "learning_rate": 5e-05, "loss": 1.4821, "step": 3889 }, { "epoch": 0.6224995999359898, "grad_norm": 0.33962222933769226, "learning_rate": 5e-05, "loss": 1.4659, "step": 3890 }, { "epoch": 0.6226596255400864, "grad_norm": 0.3358580470085144, "learning_rate": 5e-05, "loss": 1.5597, "step": 3891 }, { "epoch": 0.622819651144183, "grad_norm": 0.3463849127292633, "learning_rate": 5e-05, "loss": 1.5632, "step": 3892 }, { "epoch": 0.6229796767482797, "grad_norm": 0.3266831338405609, "learning_rate": 5e-05, "loss": 1.4271, "step": 3893 }, { "epoch": 0.6231397023523764, "grad_norm": 0.32580262422561646, "learning_rate": 5e-05, "loss": 1.4771, "step": 3894 }, { "epoch": 0.623299727956473, "grad_norm": 0.33856284618377686, "learning_rate": 5e-05, "loss": 1.5017, "step": 3895 }, { "epoch": 0.6234597535605697, "grad_norm": 0.33197352290153503, "learning_rate": 5e-05, "loss": 1.4196, "step": 3896 }, { "epoch": 0.6236197791646664, "grad_norm": 0.35254570841789246, "learning_rate": 5e-05, "loss": 1.5939, "step": 3897 }, { "epoch": 0.623779804768763, "grad_norm": 0.32944175601005554, "learning_rate": 5e-05, "loss": 1.471, "step": 3898 }, { "epoch": 0.6239398303728596, "grad_norm": 0.31548023223876953, "learning_rate": 5e-05, "loss": 1.4148, "step": 3899 }, { "epoch": 0.6240998559769563, "grad_norm": 0.3505900502204895, "learning_rate": 5e-05, "loss": 1.5535, "step": 3900 }, { "epoch": 0.6242598815810529, "grad_norm": 0.33270254731178284, "learning_rate": 5e-05, "loss": 1.4761, "step": 3901 }, { "epoch": 0.6244199071851496, "grad_norm": 0.3381953835487366, "learning_rate": 5e-05, "loss": 1.5526, "step": 3902 }, { "epoch": 0.6245799327892463, "grad_norm": 0.34588292241096497, "learning_rate": 5e-05, "loss": 1.4428, "step": 3903 }, { "epoch": 0.624739958393343, "grad_norm": 0.3382006883621216, "learning_rate": 5e-05, "loss": 1.5114, "step": 3904 }, { "epoch": 0.6248999839974396, "grad_norm": 0.34056752920150757, "learning_rate": 5e-05, "loss": 1.4575, "step": 3905 }, { "epoch": 0.6250600096015363, "grad_norm": 0.3518310785293579, "learning_rate": 5e-05, "loss": 1.5394, "step": 3906 }, { "epoch": 0.6252200352056329, "grad_norm": 0.34844496846199036, "learning_rate": 5e-05, "loss": 1.5266, "step": 3907 }, { "epoch": 0.6253800608097295, "grad_norm": 0.341852068901062, "learning_rate": 5e-05, "loss": 1.5241, "step": 3908 }, { "epoch": 0.6255400864138262, "grad_norm": 0.32696500420570374, "learning_rate": 5e-05, "loss": 1.4257, "step": 3909 }, { "epoch": 0.6257001120179229, "grad_norm": 0.33762845396995544, "learning_rate": 5e-05, "loss": 1.4091, "step": 3910 }, { "epoch": 0.6258601376220195, "grad_norm": 0.3374609351158142, "learning_rate": 5e-05, "loss": 1.4765, "step": 3911 }, { "epoch": 0.6260201632261162, "grad_norm": 0.33739015460014343, "learning_rate": 5e-05, "loss": 1.5017, "step": 3912 }, { "epoch": 0.6261801888302129, "grad_norm": 0.33828166127204895, "learning_rate": 5e-05, "loss": 1.4974, "step": 3913 }, { "epoch": 0.6263402144343095, "grad_norm": 0.33279111981391907, "learning_rate": 5e-05, "loss": 1.5238, "step": 3914 }, { "epoch": 0.6265002400384061, "grad_norm": 0.3351408839225769, "learning_rate": 5e-05, "loss": 1.4643, "step": 3915 }, { "epoch": 0.6266602656425028, "grad_norm": 0.3276762366294861, "learning_rate": 5e-05, "loss": 1.4422, "step": 3916 }, { "epoch": 0.6268202912465994, "grad_norm": 0.3302064538002014, "learning_rate": 5e-05, "loss": 1.4298, "step": 3917 }, { "epoch": 0.6269803168506961, "grad_norm": 0.35648515820503235, "learning_rate": 5e-05, "loss": 1.4529, "step": 3918 }, { "epoch": 0.6271403424547928, "grad_norm": 0.33735421299934387, "learning_rate": 5e-05, "loss": 1.5043, "step": 3919 }, { "epoch": 0.6273003680588894, "grad_norm": 0.3488411009311676, "learning_rate": 5e-05, "loss": 1.4761, "step": 3920 }, { "epoch": 0.6274603936629861, "grad_norm": 0.3270598351955414, "learning_rate": 5e-05, "loss": 1.4875, "step": 3921 }, { "epoch": 0.6276204192670828, "grad_norm": 0.3350156545639038, "learning_rate": 5e-05, "loss": 1.4756, "step": 3922 }, { "epoch": 0.6277804448711793, "grad_norm": 0.33899787068367004, "learning_rate": 5e-05, "loss": 1.5023, "step": 3923 }, { "epoch": 0.627940470475276, "grad_norm": 0.3322708308696747, "learning_rate": 5e-05, "loss": 1.481, "step": 3924 }, { "epoch": 0.6281004960793727, "grad_norm": 0.3222733736038208, "learning_rate": 5e-05, "loss": 1.446, "step": 3925 }, { "epoch": 0.6282605216834694, "grad_norm": 0.3414157032966614, "learning_rate": 5e-05, "loss": 1.5402, "step": 3926 }, { "epoch": 0.628420547287566, "grad_norm": 0.33347463607788086, "learning_rate": 5e-05, "loss": 1.4586, "step": 3927 }, { "epoch": 0.6285805728916627, "grad_norm": 0.3141883909702301, "learning_rate": 5e-05, "loss": 1.4428, "step": 3928 }, { "epoch": 0.6287405984957594, "grad_norm": 0.3401998281478882, "learning_rate": 5e-05, "loss": 1.4561, "step": 3929 }, { "epoch": 0.628900624099856, "grad_norm": 0.33827272057533264, "learning_rate": 5e-05, "loss": 1.4731, "step": 3930 }, { "epoch": 0.6290606497039526, "grad_norm": 0.3433549404144287, "learning_rate": 5e-05, "loss": 1.4844, "step": 3931 }, { "epoch": 0.6292206753080493, "grad_norm": 0.341951847076416, "learning_rate": 5e-05, "loss": 1.4981, "step": 3932 }, { "epoch": 0.6293807009121459, "grad_norm": 0.328674852848053, "learning_rate": 5e-05, "loss": 1.408, "step": 3933 }, { "epoch": 0.6295407265162426, "grad_norm": 0.3511683940887451, "learning_rate": 5e-05, "loss": 1.4868, "step": 3934 }, { "epoch": 0.6297007521203393, "grad_norm": 0.3338421881198883, "learning_rate": 5e-05, "loss": 1.5132, "step": 3935 }, { "epoch": 0.6298607777244359, "grad_norm": 0.33003783226013184, "learning_rate": 5e-05, "loss": 1.4764, "step": 3936 }, { "epoch": 0.6300208033285326, "grad_norm": 0.3442326784133911, "learning_rate": 5e-05, "loss": 1.4977, "step": 3937 }, { "epoch": 0.6301808289326292, "grad_norm": 0.33717039227485657, "learning_rate": 5e-05, "loss": 1.4573, "step": 3938 }, { "epoch": 0.6303408545367258, "grad_norm": 0.3296431005001068, "learning_rate": 5e-05, "loss": 1.5096, "step": 3939 }, { "epoch": 0.6305008801408225, "grad_norm": 0.3392716348171234, "learning_rate": 5e-05, "loss": 1.4924, "step": 3940 }, { "epoch": 0.6306609057449192, "grad_norm": 0.3336281478404999, "learning_rate": 5e-05, "loss": 1.4479, "step": 3941 }, { "epoch": 0.6308209313490158, "grad_norm": 0.32978883385658264, "learning_rate": 5e-05, "loss": 1.4401, "step": 3942 }, { "epoch": 0.6309809569531125, "grad_norm": 0.33548134565353394, "learning_rate": 5e-05, "loss": 1.4887, "step": 3943 }, { "epoch": 0.6311409825572092, "grad_norm": 0.3522873818874359, "learning_rate": 5e-05, "loss": 1.635, "step": 3944 }, { "epoch": 0.6313010081613059, "grad_norm": 0.34604424238204956, "learning_rate": 5e-05, "loss": 1.5556, "step": 3945 }, { "epoch": 0.6314610337654024, "grad_norm": 0.3281524181365967, "learning_rate": 5e-05, "loss": 1.4661, "step": 3946 }, { "epoch": 0.6316210593694991, "grad_norm": 0.33848461508750916, "learning_rate": 5e-05, "loss": 1.5151, "step": 3947 }, { "epoch": 0.6317810849735958, "grad_norm": 0.3386930823326111, "learning_rate": 5e-05, "loss": 1.3998, "step": 3948 }, { "epoch": 0.6319411105776924, "grad_norm": 0.33624914288520813, "learning_rate": 5e-05, "loss": 1.4306, "step": 3949 }, { "epoch": 0.6321011361817891, "grad_norm": 0.34310805797576904, "learning_rate": 5e-05, "loss": 1.5459, "step": 3950 }, { "epoch": 0.6322611617858858, "grad_norm": 0.33212903141975403, "learning_rate": 5e-05, "loss": 1.4466, "step": 3951 }, { "epoch": 0.6324211873899824, "grad_norm": 0.32701608538627625, "learning_rate": 5e-05, "loss": 1.5437, "step": 3952 }, { "epoch": 0.6325812129940791, "grad_norm": 0.3343145251274109, "learning_rate": 5e-05, "loss": 1.507, "step": 3953 }, { "epoch": 0.6327412385981757, "grad_norm": 0.3388446867465973, "learning_rate": 5e-05, "loss": 1.5034, "step": 3954 }, { "epoch": 0.6329012642022723, "grad_norm": 0.32914817333221436, "learning_rate": 5e-05, "loss": 1.5216, "step": 3955 }, { "epoch": 0.633061289806369, "grad_norm": 0.33665892481803894, "learning_rate": 5e-05, "loss": 1.5206, "step": 3956 }, { "epoch": 0.6332213154104657, "grad_norm": 0.33414602279663086, "learning_rate": 5e-05, "loss": 1.4151, "step": 3957 }, { "epoch": 0.6333813410145623, "grad_norm": 0.3421066701412201, "learning_rate": 5e-05, "loss": 1.532, "step": 3958 }, { "epoch": 0.633541366618659, "grad_norm": 0.3392653167247772, "learning_rate": 5e-05, "loss": 1.4837, "step": 3959 }, { "epoch": 0.6337013922227557, "grad_norm": 0.316677451133728, "learning_rate": 5e-05, "loss": 1.3872, "step": 3960 }, { "epoch": 0.6338614178268523, "grad_norm": 0.34353822469711304, "learning_rate": 5e-05, "loss": 1.4937, "step": 3961 }, { "epoch": 0.6340214434309489, "grad_norm": 0.3331367075443268, "learning_rate": 5e-05, "loss": 1.4666, "step": 3962 }, { "epoch": 0.6341814690350456, "grad_norm": 0.3480263352394104, "learning_rate": 5e-05, "loss": 1.4537, "step": 3963 }, { "epoch": 0.6343414946391422, "grad_norm": 0.3301531970500946, "learning_rate": 5e-05, "loss": 1.575, "step": 3964 }, { "epoch": 0.6345015202432389, "grad_norm": 0.34356027841567993, "learning_rate": 5e-05, "loss": 1.4199, "step": 3965 }, { "epoch": 0.6346615458473356, "grad_norm": 0.343982994556427, "learning_rate": 5e-05, "loss": 1.4332, "step": 3966 }, { "epoch": 0.6348215714514323, "grad_norm": 0.3429538607597351, "learning_rate": 5e-05, "loss": 1.4398, "step": 3967 }, { "epoch": 0.6349815970555289, "grad_norm": 0.3481583893299103, "learning_rate": 5e-05, "loss": 1.4893, "step": 3968 }, { "epoch": 0.6351416226596256, "grad_norm": 0.33702483773231506, "learning_rate": 5e-05, "loss": 1.5163, "step": 3969 }, { "epoch": 0.6353016482637222, "grad_norm": 0.3457375764846802, "learning_rate": 5e-05, "loss": 1.4687, "step": 3970 }, { "epoch": 0.6354616738678188, "grad_norm": 0.34237486124038696, "learning_rate": 5e-05, "loss": 1.5012, "step": 3971 }, { "epoch": 0.6356216994719155, "grad_norm": 0.35364794731140137, "learning_rate": 5e-05, "loss": 1.4763, "step": 3972 }, { "epoch": 0.6357817250760122, "grad_norm": 0.34716662764549255, "learning_rate": 5e-05, "loss": 1.4902, "step": 3973 }, { "epoch": 0.6359417506801088, "grad_norm": 0.3398972451686859, "learning_rate": 5e-05, "loss": 1.5587, "step": 3974 }, { "epoch": 0.6361017762842055, "grad_norm": 0.33349382877349854, "learning_rate": 5e-05, "loss": 1.4548, "step": 3975 }, { "epoch": 0.6362618018883022, "grad_norm": 0.33845633268356323, "learning_rate": 5e-05, "loss": 1.5075, "step": 3976 }, { "epoch": 0.6364218274923987, "grad_norm": 0.32620060443878174, "learning_rate": 5e-05, "loss": 1.409, "step": 3977 }, { "epoch": 0.6365818530964954, "grad_norm": 0.3352586627006531, "learning_rate": 5e-05, "loss": 1.5137, "step": 3978 }, { "epoch": 0.6367418787005921, "grad_norm": 0.33890292048454285, "learning_rate": 5e-05, "loss": 1.4032, "step": 3979 }, { "epoch": 0.6369019043046887, "grad_norm": 0.3520145118236542, "learning_rate": 5e-05, "loss": 1.4388, "step": 3980 }, { "epoch": 0.6370619299087854, "grad_norm": 0.3357267379760742, "learning_rate": 5e-05, "loss": 1.4674, "step": 3981 }, { "epoch": 0.6372219555128821, "grad_norm": 0.3471028208732605, "learning_rate": 5e-05, "loss": 1.4654, "step": 3982 }, { "epoch": 0.6373819811169787, "grad_norm": 0.3402099311351776, "learning_rate": 5e-05, "loss": 1.4555, "step": 3983 }, { "epoch": 0.6375420067210754, "grad_norm": 0.32378485798835754, "learning_rate": 5e-05, "loss": 1.4239, "step": 3984 }, { "epoch": 0.637702032325172, "grad_norm": 0.3426347076892853, "learning_rate": 5e-05, "loss": 1.4282, "step": 3985 }, { "epoch": 0.6378620579292686, "grad_norm": 0.32781079411506653, "learning_rate": 5e-05, "loss": 1.4291, "step": 3986 }, { "epoch": 0.6380220835333653, "grad_norm": 0.3618033826351166, "learning_rate": 5e-05, "loss": 1.5293, "step": 3987 }, { "epoch": 0.638182109137462, "grad_norm": 0.3438084125518799, "learning_rate": 5e-05, "loss": 1.3832, "step": 3988 }, { "epoch": 0.6383421347415587, "grad_norm": 0.34055477380752563, "learning_rate": 5e-05, "loss": 1.5552, "step": 3989 }, { "epoch": 0.6385021603456553, "grad_norm": 0.3574841022491455, "learning_rate": 5e-05, "loss": 1.5425, "step": 3990 }, { "epoch": 0.638662185949752, "grad_norm": 0.3416142463684082, "learning_rate": 5e-05, "loss": 1.4678, "step": 3991 }, { "epoch": 0.6388222115538487, "grad_norm": 0.36278557777404785, "learning_rate": 5e-05, "loss": 1.4937, "step": 3992 }, { "epoch": 0.6389822371579452, "grad_norm": 0.339561402797699, "learning_rate": 5e-05, "loss": 1.5134, "step": 3993 }, { "epoch": 0.6391422627620419, "grad_norm": 0.33963677287101746, "learning_rate": 5e-05, "loss": 1.4768, "step": 3994 }, { "epoch": 0.6393022883661386, "grad_norm": 0.35187217593193054, "learning_rate": 5e-05, "loss": 1.4877, "step": 3995 }, { "epoch": 0.6394623139702352, "grad_norm": 0.34004759788513184, "learning_rate": 5e-05, "loss": 1.4838, "step": 3996 }, { "epoch": 0.6396223395743319, "grad_norm": 0.3573174774646759, "learning_rate": 5e-05, "loss": 1.5841, "step": 3997 }, { "epoch": 0.6397823651784286, "grad_norm": 0.3516507148742676, "learning_rate": 5e-05, "loss": 1.558, "step": 3998 }, { "epoch": 0.6399423907825252, "grad_norm": 0.3445981442928314, "learning_rate": 5e-05, "loss": 1.5019, "step": 3999 }, { "epoch": 0.6401024163866219, "grad_norm": 0.33351898193359375, "learning_rate": 5e-05, "loss": 1.482, "step": 4000 }, { "epoch": 0.6402624419907185, "grad_norm": 0.3450971245765686, "learning_rate": 5e-05, "loss": 1.5599, "step": 4001 }, { "epoch": 0.6404224675948151, "grad_norm": 0.33089160919189453, "learning_rate": 5e-05, "loss": 1.4663, "step": 4002 }, { "epoch": 0.6405824931989118, "grad_norm": 0.3520176410675049, "learning_rate": 5e-05, "loss": 1.455, "step": 4003 }, { "epoch": 0.6407425188030085, "grad_norm": 0.34100499749183655, "learning_rate": 5e-05, "loss": 1.4824, "step": 4004 }, { "epoch": 0.6409025444071051, "grad_norm": 0.3273391127586365, "learning_rate": 5e-05, "loss": 1.4599, "step": 4005 }, { "epoch": 0.6410625700112018, "grad_norm": 0.3183225095272064, "learning_rate": 5e-05, "loss": 1.4651, "step": 4006 }, { "epoch": 0.6412225956152985, "grad_norm": 0.33961689472198486, "learning_rate": 5e-05, "loss": 1.4366, "step": 4007 }, { "epoch": 0.6413826212193952, "grad_norm": 0.3521779775619507, "learning_rate": 5e-05, "loss": 1.4604, "step": 4008 }, { "epoch": 0.6415426468234917, "grad_norm": 0.33432936668395996, "learning_rate": 5e-05, "loss": 1.4894, "step": 4009 }, { "epoch": 0.6417026724275884, "grad_norm": 0.33766549825668335, "learning_rate": 5e-05, "loss": 1.463, "step": 4010 }, { "epoch": 0.641862698031685, "grad_norm": 0.3329414129257202, "learning_rate": 5e-05, "loss": 1.4676, "step": 4011 }, { "epoch": 0.6420227236357817, "grad_norm": 0.34704670310020447, "learning_rate": 5e-05, "loss": 1.542, "step": 4012 }, { "epoch": 0.6421827492398784, "grad_norm": 0.3487148582935333, "learning_rate": 5e-05, "loss": 1.4756, "step": 4013 }, { "epoch": 0.6423427748439751, "grad_norm": 0.3493064343929291, "learning_rate": 5e-05, "loss": 1.5369, "step": 4014 }, { "epoch": 0.6425028004480717, "grad_norm": 0.3285278081893921, "learning_rate": 5e-05, "loss": 1.4271, "step": 4015 }, { "epoch": 0.6426628260521684, "grad_norm": 0.34908798336982727, "learning_rate": 5e-05, "loss": 1.5029, "step": 4016 }, { "epoch": 0.642822851656265, "grad_norm": 0.3421923518180847, "learning_rate": 5e-05, "loss": 1.4842, "step": 4017 }, { "epoch": 0.6429828772603616, "grad_norm": 0.33431801199913025, "learning_rate": 5e-05, "loss": 1.4673, "step": 4018 }, { "epoch": 0.6431429028644583, "grad_norm": 0.33869627118110657, "learning_rate": 5e-05, "loss": 1.4506, "step": 4019 }, { "epoch": 0.643302928468555, "grad_norm": 0.3393656015396118, "learning_rate": 5e-05, "loss": 1.4307, "step": 4020 }, { "epoch": 0.6434629540726516, "grad_norm": 0.33596932888031006, "learning_rate": 5e-05, "loss": 1.4312, "step": 4021 }, { "epoch": 0.6436229796767483, "grad_norm": 0.3402356803417206, "learning_rate": 5e-05, "loss": 1.4304, "step": 4022 }, { "epoch": 0.643783005280845, "grad_norm": 0.34586021304130554, "learning_rate": 5e-05, "loss": 1.4752, "step": 4023 }, { "epoch": 0.6439430308849415, "grad_norm": 0.3441677689552307, "learning_rate": 5e-05, "loss": 1.4933, "step": 4024 }, { "epoch": 0.6441030564890382, "grad_norm": 0.337537944316864, "learning_rate": 5e-05, "loss": 1.4317, "step": 4025 }, { "epoch": 0.6442630820931349, "grad_norm": 0.34302079677581787, "learning_rate": 5e-05, "loss": 1.5116, "step": 4026 }, { "epoch": 0.6444231076972315, "grad_norm": 0.33269184827804565, "learning_rate": 5e-05, "loss": 1.5346, "step": 4027 }, { "epoch": 0.6445831333013282, "grad_norm": 0.3402257263660431, "learning_rate": 5e-05, "loss": 1.5349, "step": 4028 }, { "epoch": 0.6447431589054249, "grad_norm": 0.3230588734149933, "learning_rate": 5e-05, "loss": 1.4287, "step": 4029 }, { "epoch": 0.6449031845095216, "grad_norm": 0.3390742242336273, "learning_rate": 5e-05, "loss": 1.4772, "step": 4030 }, { "epoch": 0.6450632101136182, "grad_norm": 0.3371604084968567, "learning_rate": 5e-05, "loss": 1.4596, "step": 4031 }, { "epoch": 0.6452232357177148, "grad_norm": 0.3441160023212433, "learning_rate": 5e-05, "loss": 1.4673, "step": 4032 }, { "epoch": 0.6453832613218115, "grad_norm": 0.34296366572380066, "learning_rate": 5e-05, "loss": 1.495, "step": 4033 }, { "epoch": 0.6455432869259081, "grad_norm": 0.32805633544921875, "learning_rate": 5e-05, "loss": 1.4431, "step": 4034 }, { "epoch": 0.6457033125300048, "grad_norm": 0.33818796277046204, "learning_rate": 5e-05, "loss": 1.5015, "step": 4035 }, { "epoch": 0.6458633381341015, "grad_norm": 0.35088494420051575, "learning_rate": 5e-05, "loss": 1.5511, "step": 4036 }, { "epoch": 0.6460233637381981, "grad_norm": 0.33855292201042175, "learning_rate": 5e-05, "loss": 1.4588, "step": 4037 }, { "epoch": 0.6461833893422948, "grad_norm": 0.3227682411670685, "learning_rate": 5e-05, "loss": 1.4676, "step": 4038 }, { "epoch": 0.6463434149463915, "grad_norm": 0.3209127187728882, "learning_rate": 5e-05, "loss": 1.4011, "step": 4039 }, { "epoch": 0.646503440550488, "grad_norm": 0.3242299556732178, "learning_rate": 5e-05, "loss": 1.451, "step": 4040 }, { "epoch": 0.6466634661545847, "grad_norm": 0.3388531804084778, "learning_rate": 5e-05, "loss": 1.4585, "step": 4041 }, { "epoch": 0.6468234917586814, "grad_norm": 0.34976926445961, "learning_rate": 5e-05, "loss": 1.5786, "step": 4042 }, { "epoch": 0.646983517362778, "grad_norm": 0.32594388723373413, "learning_rate": 5e-05, "loss": 1.4122, "step": 4043 }, { "epoch": 0.6471435429668747, "grad_norm": 0.3292999267578125, "learning_rate": 5e-05, "loss": 1.4943, "step": 4044 }, { "epoch": 0.6473035685709714, "grad_norm": 0.3458764851093292, "learning_rate": 5e-05, "loss": 1.5973, "step": 4045 }, { "epoch": 0.647463594175068, "grad_norm": 0.3376535177230835, "learning_rate": 5e-05, "loss": 1.4912, "step": 4046 }, { "epoch": 0.6476236197791647, "grad_norm": 0.3389747142791748, "learning_rate": 5e-05, "loss": 1.4469, "step": 4047 }, { "epoch": 0.6477836453832613, "grad_norm": 0.3361343741416931, "learning_rate": 5e-05, "loss": 1.441, "step": 4048 }, { "epoch": 0.647943670987358, "grad_norm": 0.3594515025615692, "learning_rate": 5e-05, "loss": 1.4339, "step": 4049 }, { "epoch": 0.6481036965914546, "grad_norm": 0.34556126594543457, "learning_rate": 5e-05, "loss": 1.4752, "step": 4050 }, { "epoch": 0.6482637221955513, "grad_norm": 0.3458104133605957, "learning_rate": 5e-05, "loss": 1.4513, "step": 4051 }, { "epoch": 0.648423747799648, "grad_norm": 0.3342300355434418, "learning_rate": 5e-05, "loss": 1.4871, "step": 4052 }, { "epoch": 0.6485837734037446, "grad_norm": 0.34524887800216675, "learning_rate": 5e-05, "loss": 1.5603, "step": 4053 }, { "epoch": 0.6487437990078413, "grad_norm": 0.33580687642097473, "learning_rate": 5e-05, "loss": 1.4413, "step": 4054 }, { "epoch": 0.648903824611938, "grad_norm": 0.33906957507133484, "learning_rate": 5e-05, "loss": 1.4812, "step": 4055 }, { "epoch": 0.6490638502160345, "grad_norm": 0.3351479470729828, "learning_rate": 5e-05, "loss": 1.4738, "step": 4056 }, { "epoch": 0.6492238758201312, "grad_norm": 0.32945793867111206, "learning_rate": 5e-05, "loss": 1.467, "step": 4057 }, { "epoch": 0.6493839014242279, "grad_norm": 0.3389502763748169, "learning_rate": 5e-05, "loss": 1.4985, "step": 4058 }, { "epoch": 0.6495439270283245, "grad_norm": 0.32925018668174744, "learning_rate": 5e-05, "loss": 1.4164, "step": 4059 }, { "epoch": 0.6497039526324212, "grad_norm": 0.33896517753601074, "learning_rate": 5e-05, "loss": 1.4419, "step": 4060 }, { "epoch": 0.6498639782365179, "grad_norm": 0.3321402072906494, "learning_rate": 5e-05, "loss": 1.4806, "step": 4061 }, { "epoch": 0.6500240038406145, "grad_norm": 0.3299635648727417, "learning_rate": 5e-05, "loss": 1.4959, "step": 4062 }, { "epoch": 0.6501840294447112, "grad_norm": 0.3324022591114044, "learning_rate": 5e-05, "loss": 1.4817, "step": 4063 }, { "epoch": 0.6503440550488078, "grad_norm": 0.3352898955345154, "learning_rate": 5e-05, "loss": 1.4327, "step": 4064 }, { "epoch": 0.6505040806529044, "grad_norm": 0.3294985890388489, "learning_rate": 5e-05, "loss": 1.4714, "step": 4065 }, { "epoch": 0.6506641062570011, "grad_norm": 0.3381156027317047, "learning_rate": 5e-05, "loss": 1.445, "step": 4066 }, { "epoch": 0.6508241318610978, "grad_norm": 0.3423173427581787, "learning_rate": 5e-05, "loss": 1.3983, "step": 4067 }, { "epoch": 0.6509841574651944, "grad_norm": 0.3440476655960083, "learning_rate": 5e-05, "loss": 1.4949, "step": 4068 }, { "epoch": 0.6511441830692911, "grad_norm": 0.3409244120121002, "learning_rate": 5e-05, "loss": 1.4694, "step": 4069 }, { "epoch": 0.6513042086733878, "grad_norm": 0.35360321402549744, "learning_rate": 5e-05, "loss": 1.4693, "step": 4070 }, { "epoch": 0.6514642342774843, "grad_norm": 0.33874601125717163, "learning_rate": 5e-05, "loss": 1.4754, "step": 4071 }, { "epoch": 0.651624259881581, "grad_norm": 0.358736515045166, "learning_rate": 5e-05, "loss": 1.4186, "step": 4072 }, { "epoch": 0.6517842854856777, "grad_norm": 0.34569427371025085, "learning_rate": 5e-05, "loss": 1.4612, "step": 4073 }, { "epoch": 0.6519443110897744, "grad_norm": 0.3397018015384674, "learning_rate": 5e-05, "loss": 1.5097, "step": 4074 }, { "epoch": 0.652104336693871, "grad_norm": 0.34207865595817566, "learning_rate": 5e-05, "loss": 1.4826, "step": 4075 }, { "epoch": 0.6522643622979677, "grad_norm": 0.3368932604789734, "learning_rate": 5e-05, "loss": 1.382, "step": 4076 }, { "epoch": 0.6524243879020644, "grad_norm": 0.34596389532089233, "learning_rate": 5e-05, "loss": 1.5008, "step": 4077 }, { "epoch": 0.652584413506161, "grad_norm": 0.3386182487010956, "learning_rate": 5e-05, "loss": 1.5032, "step": 4078 }, { "epoch": 0.6527444391102576, "grad_norm": 0.34459057450294495, "learning_rate": 5e-05, "loss": 1.492, "step": 4079 }, { "epoch": 0.6529044647143543, "grad_norm": 0.3344078063964844, "learning_rate": 5e-05, "loss": 1.4775, "step": 4080 }, { "epoch": 0.6530644903184509, "grad_norm": 0.3450385630130768, "learning_rate": 5e-05, "loss": 1.481, "step": 4081 }, { "epoch": 0.6532245159225476, "grad_norm": 0.35260581970214844, "learning_rate": 5e-05, "loss": 1.473, "step": 4082 }, { "epoch": 0.6533845415266443, "grad_norm": 0.33470773696899414, "learning_rate": 5e-05, "loss": 1.4018, "step": 4083 }, { "epoch": 0.6535445671307409, "grad_norm": 0.32322901487350464, "learning_rate": 5e-05, "loss": 1.3896, "step": 4084 }, { "epoch": 0.6537045927348376, "grad_norm": 0.33392462134361267, "learning_rate": 5e-05, "loss": 1.4478, "step": 4085 }, { "epoch": 0.6538646183389343, "grad_norm": 0.34922176599502563, "learning_rate": 5e-05, "loss": 1.5307, "step": 4086 }, { "epoch": 0.6540246439430308, "grad_norm": 0.3327694237232208, "learning_rate": 5e-05, "loss": 1.495, "step": 4087 }, { "epoch": 0.6541846695471275, "grad_norm": 0.35107484459877014, "learning_rate": 5e-05, "loss": 1.4871, "step": 4088 }, { "epoch": 0.6543446951512242, "grad_norm": 0.3396608531475067, "learning_rate": 5e-05, "loss": 1.4184, "step": 4089 }, { "epoch": 0.6545047207553208, "grad_norm": 0.3401212990283966, "learning_rate": 5e-05, "loss": 1.4791, "step": 4090 }, { "epoch": 0.6546647463594175, "grad_norm": 0.3411456048488617, "learning_rate": 5e-05, "loss": 1.423, "step": 4091 }, { "epoch": 0.6548247719635142, "grad_norm": 0.3398595154285431, "learning_rate": 5e-05, "loss": 1.4389, "step": 4092 }, { "epoch": 0.6549847975676109, "grad_norm": 0.34275752305984497, "learning_rate": 5e-05, "loss": 1.4688, "step": 4093 }, { "epoch": 0.6551448231717075, "grad_norm": 0.36323386430740356, "learning_rate": 5e-05, "loss": 1.5141, "step": 4094 }, { "epoch": 0.6553048487758041, "grad_norm": 0.3664456605911255, "learning_rate": 5e-05, "loss": 1.5555, "step": 4095 }, { "epoch": 0.6554648743799008, "grad_norm": 0.37222737073898315, "learning_rate": 5e-05, "loss": 1.5666, "step": 4096 }, { "epoch": 0.6556248999839974, "grad_norm": 0.35304367542266846, "learning_rate": 5e-05, "loss": 1.5638, "step": 4097 }, { "epoch": 0.6557849255880941, "grad_norm": 0.329935222864151, "learning_rate": 5e-05, "loss": 1.4766, "step": 4098 }, { "epoch": 0.6559449511921908, "grad_norm": 0.3353748619556427, "learning_rate": 5e-05, "loss": 1.475, "step": 4099 }, { "epoch": 0.6561049767962874, "grad_norm": 0.3397810161113739, "learning_rate": 5e-05, "loss": 1.4672, "step": 4100 }, { "epoch": 0.6562650024003841, "grad_norm": 0.3400084972381592, "learning_rate": 5e-05, "loss": 1.4644, "step": 4101 }, { "epoch": 0.6564250280044808, "grad_norm": 0.33537277579307556, "learning_rate": 5e-05, "loss": 1.4623, "step": 4102 }, { "epoch": 0.6565850536085773, "grad_norm": 0.3398067057132721, "learning_rate": 5e-05, "loss": 1.4938, "step": 4103 }, { "epoch": 0.656745079212674, "grad_norm": 0.34153786301612854, "learning_rate": 5e-05, "loss": 1.4881, "step": 4104 }, { "epoch": 0.6569051048167707, "grad_norm": 0.33683454990386963, "learning_rate": 5e-05, "loss": 1.5061, "step": 4105 }, { "epoch": 0.6570651304208673, "grad_norm": 0.3728537857532501, "learning_rate": 5e-05, "loss": 1.5332, "step": 4106 }, { "epoch": 0.657225156024964, "grad_norm": 0.33804216980934143, "learning_rate": 5e-05, "loss": 1.4416, "step": 4107 }, { "epoch": 0.6573851816290607, "grad_norm": 0.34206661581993103, "learning_rate": 5e-05, "loss": 1.5377, "step": 4108 }, { "epoch": 0.6575452072331573, "grad_norm": 0.3618203103542328, "learning_rate": 5e-05, "loss": 1.5339, "step": 4109 }, { "epoch": 0.6577052328372539, "grad_norm": 0.3461602032184601, "learning_rate": 5e-05, "loss": 1.4928, "step": 4110 }, { "epoch": 0.6578652584413506, "grad_norm": 0.3285312056541443, "learning_rate": 5e-05, "loss": 1.3956, "step": 4111 }, { "epoch": 0.6580252840454472, "grad_norm": 0.35140225291252136, "learning_rate": 5e-05, "loss": 1.5071, "step": 4112 }, { "epoch": 0.6581853096495439, "grad_norm": 0.34176409244537354, "learning_rate": 5e-05, "loss": 1.469, "step": 4113 }, { "epoch": 0.6583453352536406, "grad_norm": 0.3468891978263855, "learning_rate": 5e-05, "loss": 1.5169, "step": 4114 }, { "epoch": 0.6585053608577373, "grad_norm": 0.3474676311016083, "learning_rate": 5e-05, "loss": 1.448, "step": 4115 }, { "epoch": 0.6586653864618339, "grad_norm": 0.3334033787250519, "learning_rate": 5e-05, "loss": 1.4448, "step": 4116 }, { "epoch": 0.6588254120659306, "grad_norm": 0.33932608366012573, "learning_rate": 5e-05, "loss": 1.5014, "step": 4117 }, { "epoch": 0.6589854376700272, "grad_norm": 0.3422168493270874, "learning_rate": 5e-05, "loss": 1.4971, "step": 4118 }, { "epoch": 0.6591454632741238, "grad_norm": 0.33463528752326965, "learning_rate": 5e-05, "loss": 1.4501, "step": 4119 }, { "epoch": 0.6593054888782205, "grad_norm": 0.33869433403015137, "learning_rate": 5e-05, "loss": 1.495, "step": 4120 }, { "epoch": 0.6594655144823172, "grad_norm": 0.3467601537704468, "learning_rate": 5e-05, "loss": 1.5208, "step": 4121 }, { "epoch": 0.6596255400864138, "grad_norm": 0.33643820881843567, "learning_rate": 5e-05, "loss": 1.4249, "step": 4122 }, { "epoch": 0.6597855656905105, "grad_norm": 0.3294602632522583, "learning_rate": 5e-05, "loss": 1.4087, "step": 4123 }, { "epoch": 0.6599455912946072, "grad_norm": 0.3464765250682831, "learning_rate": 5e-05, "loss": 1.5257, "step": 4124 }, { "epoch": 0.6601056168987038, "grad_norm": 0.33355990052223206, "learning_rate": 5e-05, "loss": 1.4287, "step": 4125 }, { "epoch": 0.6602656425028004, "grad_norm": 0.339164137840271, "learning_rate": 5e-05, "loss": 1.482, "step": 4126 }, { "epoch": 0.6604256681068971, "grad_norm": 0.33272257447242737, "learning_rate": 5e-05, "loss": 1.5317, "step": 4127 }, { "epoch": 0.6605856937109937, "grad_norm": 0.345542848110199, "learning_rate": 5e-05, "loss": 1.5077, "step": 4128 }, { "epoch": 0.6607457193150904, "grad_norm": 0.34309783577919006, "learning_rate": 5e-05, "loss": 1.5349, "step": 4129 }, { "epoch": 0.6609057449191871, "grad_norm": 0.3284735083580017, "learning_rate": 5e-05, "loss": 1.4971, "step": 4130 }, { "epoch": 0.6610657705232837, "grad_norm": 0.33071449398994446, "learning_rate": 5e-05, "loss": 1.4787, "step": 4131 }, { "epoch": 0.6612257961273804, "grad_norm": 0.34958016872406006, "learning_rate": 5e-05, "loss": 1.4791, "step": 4132 }, { "epoch": 0.6613858217314771, "grad_norm": 0.34809303283691406, "learning_rate": 5e-05, "loss": 1.5153, "step": 4133 }, { "epoch": 0.6615458473355736, "grad_norm": 0.3459855318069458, "learning_rate": 5e-05, "loss": 1.5047, "step": 4134 }, { "epoch": 0.6617058729396703, "grad_norm": 0.33440908789634705, "learning_rate": 5e-05, "loss": 1.4588, "step": 4135 }, { "epoch": 0.661865898543767, "grad_norm": 0.33247676491737366, "learning_rate": 5e-05, "loss": 1.4493, "step": 4136 }, { "epoch": 0.6620259241478637, "grad_norm": 0.34082773327827454, "learning_rate": 5e-05, "loss": 1.5187, "step": 4137 }, { "epoch": 0.6621859497519603, "grad_norm": 0.3573744297027588, "learning_rate": 5e-05, "loss": 1.4327, "step": 4138 }, { "epoch": 0.662345975356057, "grad_norm": 0.3459263741970062, "learning_rate": 5e-05, "loss": 1.4587, "step": 4139 }, { "epoch": 0.6625060009601537, "grad_norm": 0.34600022435188293, "learning_rate": 5e-05, "loss": 1.5077, "step": 4140 }, { "epoch": 0.6626660265642503, "grad_norm": 0.35942786931991577, "learning_rate": 5e-05, "loss": 1.4904, "step": 4141 }, { "epoch": 0.6628260521683469, "grad_norm": 0.352949857711792, "learning_rate": 5e-05, "loss": 1.5537, "step": 4142 }, { "epoch": 0.6629860777724436, "grad_norm": 0.3465481698513031, "learning_rate": 5e-05, "loss": 1.5151, "step": 4143 }, { "epoch": 0.6631461033765402, "grad_norm": 0.34217146039009094, "learning_rate": 5e-05, "loss": 1.467, "step": 4144 }, { "epoch": 0.6633061289806369, "grad_norm": 0.355909526348114, "learning_rate": 5e-05, "loss": 1.45, "step": 4145 }, { "epoch": 0.6634661545847336, "grad_norm": 0.35389065742492676, "learning_rate": 5e-05, "loss": 1.4422, "step": 4146 }, { "epoch": 0.6636261801888302, "grad_norm": 0.3371725082397461, "learning_rate": 5e-05, "loss": 1.4526, "step": 4147 }, { "epoch": 0.6637862057929269, "grad_norm": 0.33926331996917725, "learning_rate": 5e-05, "loss": 1.4189, "step": 4148 }, { "epoch": 0.6639462313970236, "grad_norm": 0.35319972038269043, "learning_rate": 5e-05, "loss": 1.5378, "step": 4149 }, { "epoch": 0.6641062570011201, "grad_norm": 0.3480391800403595, "learning_rate": 5e-05, "loss": 1.5009, "step": 4150 }, { "epoch": 0.6642662826052168, "grad_norm": 0.3392060101032257, "learning_rate": 5e-05, "loss": 1.4132, "step": 4151 }, { "epoch": 0.6644263082093135, "grad_norm": 0.34661388397216797, "learning_rate": 5e-05, "loss": 1.5511, "step": 4152 }, { "epoch": 0.6645863338134101, "grad_norm": 0.3407416343688965, "learning_rate": 5e-05, "loss": 1.5163, "step": 4153 }, { "epoch": 0.6647463594175068, "grad_norm": 0.3535936772823334, "learning_rate": 5e-05, "loss": 1.509, "step": 4154 }, { "epoch": 0.6649063850216035, "grad_norm": 0.33645594120025635, "learning_rate": 5e-05, "loss": 1.3993, "step": 4155 }, { "epoch": 0.6650664106257002, "grad_norm": 0.34077778458595276, "learning_rate": 5e-05, "loss": 1.4154, "step": 4156 }, { "epoch": 0.6652264362297967, "grad_norm": 0.34393757581710815, "learning_rate": 5e-05, "loss": 1.4697, "step": 4157 }, { "epoch": 0.6653864618338934, "grad_norm": 0.3510162830352783, "learning_rate": 5e-05, "loss": 1.4776, "step": 4158 }, { "epoch": 0.66554648743799, "grad_norm": 0.3480668067932129, "learning_rate": 5e-05, "loss": 1.5597, "step": 4159 }, { "epoch": 0.6657065130420867, "grad_norm": 0.3372040390968323, "learning_rate": 5e-05, "loss": 1.5273, "step": 4160 }, { "epoch": 0.6658665386461834, "grad_norm": 0.3471468687057495, "learning_rate": 5e-05, "loss": 1.4602, "step": 4161 }, { "epoch": 0.6660265642502801, "grad_norm": 0.352130651473999, "learning_rate": 5e-05, "loss": 1.455, "step": 4162 }, { "epoch": 0.6661865898543767, "grad_norm": 0.34419599175453186, "learning_rate": 5e-05, "loss": 1.4893, "step": 4163 }, { "epoch": 0.6663466154584734, "grad_norm": 0.3575839102268219, "learning_rate": 5e-05, "loss": 1.5055, "step": 4164 }, { "epoch": 0.66650664106257, "grad_norm": 0.3397972583770752, "learning_rate": 5e-05, "loss": 1.456, "step": 4165 }, { "epoch": 0.6666666666666666, "grad_norm": 0.336105078458786, "learning_rate": 5e-05, "loss": 1.4409, "step": 4166 }, { "epoch": 0.6668266922707633, "grad_norm": 0.33200860023498535, "learning_rate": 5e-05, "loss": 1.502, "step": 4167 }, { "epoch": 0.66698671787486, "grad_norm": 0.38109076023101807, "learning_rate": 5e-05, "loss": 1.5107, "step": 4168 }, { "epoch": 0.6671467434789566, "grad_norm": 0.34390154480934143, "learning_rate": 5e-05, "loss": 1.5405, "step": 4169 }, { "epoch": 0.6673067690830533, "grad_norm": 0.34565696120262146, "learning_rate": 5e-05, "loss": 1.4691, "step": 4170 }, { "epoch": 0.66746679468715, "grad_norm": 0.35129526257514954, "learning_rate": 5e-05, "loss": 1.4462, "step": 4171 }, { "epoch": 0.6676268202912466, "grad_norm": 0.3461681306362152, "learning_rate": 5e-05, "loss": 1.5189, "step": 4172 }, { "epoch": 0.6677868458953432, "grad_norm": 0.3406469225883484, "learning_rate": 5e-05, "loss": 1.4866, "step": 4173 }, { "epoch": 0.6679468714994399, "grad_norm": 0.3449169993400574, "learning_rate": 5e-05, "loss": 1.4597, "step": 4174 }, { "epoch": 0.6681068971035365, "grad_norm": 0.33304208517074585, "learning_rate": 5e-05, "loss": 1.3753, "step": 4175 }, { "epoch": 0.6682669227076332, "grad_norm": 0.35783350467681885, "learning_rate": 5e-05, "loss": 1.5321, "step": 4176 }, { "epoch": 0.6684269483117299, "grad_norm": 0.35804614424705505, "learning_rate": 5e-05, "loss": 1.5233, "step": 4177 }, { "epoch": 0.6685869739158266, "grad_norm": 0.3478652536869049, "learning_rate": 5e-05, "loss": 1.5416, "step": 4178 }, { "epoch": 0.6687469995199232, "grad_norm": 0.34286755323410034, "learning_rate": 5e-05, "loss": 1.4171, "step": 4179 }, { "epoch": 0.6689070251240199, "grad_norm": 0.3269459009170532, "learning_rate": 5e-05, "loss": 1.3866, "step": 4180 }, { "epoch": 0.6690670507281165, "grad_norm": 0.3382801115512848, "learning_rate": 5e-05, "loss": 1.5147, "step": 4181 }, { "epoch": 0.6692270763322131, "grad_norm": 0.32167544960975647, "learning_rate": 5e-05, "loss": 1.3233, "step": 4182 }, { "epoch": 0.6693871019363098, "grad_norm": 0.3418739438056946, "learning_rate": 5e-05, "loss": 1.4947, "step": 4183 }, { "epoch": 0.6695471275404065, "grad_norm": 0.34033045172691345, "learning_rate": 5e-05, "loss": 1.4089, "step": 4184 }, { "epoch": 0.6697071531445031, "grad_norm": 0.35600510239601135, "learning_rate": 5e-05, "loss": 1.5102, "step": 4185 }, { "epoch": 0.6698671787485998, "grad_norm": 0.34539416432380676, "learning_rate": 5e-05, "loss": 1.4281, "step": 4186 }, { "epoch": 0.6700272043526965, "grad_norm": 0.3404105603694916, "learning_rate": 5e-05, "loss": 1.3985, "step": 4187 }, { "epoch": 0.6701872299567931, "grad_norm": 0.360730916261673, "learning_rate": 5e-05, "loss": 1.4983, "step": 4188 }, { "epoch": 0.6703472555608897, "grad_norm": 0.35233578085899353, "learning_rate": 5e-05, "loss": 1.5345, "step": 4189 }, { "epoch": 0.6705072811649864, "grad_norm": 0.3244589865207672, "learning_rate": 5e-05, "loss": 1.4199, "step": 4190 }, { "epoch": 0.670667306769083, "grad_norm": 0.35122230648994446, "learning_rate": 5e-05, "loss": 1.5486, "step": 4191 }, { "epoch": 0.6708273323731797, "grad_norm": 0.3401183784008026, "learning_rate": 5e-05, "loss": 1.4893, "step": 4192 }, { "epoch": 0.6709873579772764, "grad_norm": 0.3618021607398987, "learning_rate": 5e-05, "loss": 1.5372, "step": 4193 }, { "epoch": 0.671147383581373, "grad_norm": 0.33861854672431946, "learning_rate": 5e-05, "loss": 1.4559, "step": 4194 }, { "epoch": 0.6713074091854697, "grad_norm": 0.34954431653022766, "learning_rate": 5e-05, "loss": 1.4733, "step": 4195 }, { "epoch": 0.6714674347895663, "grad_norm": 0.34149742126464844, "learning_rate": 5e-05, "loss": 1.4566, "step": 4196 }, { "epoch": 0.671627460393663, "grad_norm": 0.36180710792541504, "learning_rate": 5e-05, "loss": 1.5626, "step": 4197 }, { "epoch": 0.6717874859977596, "grad_norm": 0.3416484296321869, "learning_rate": 5e-05, "loss": 1.4721, "step": 4198 }, { "epoch": 0.6719475116018563, "grad_norm": 0.341714084148407, "learning_rate": 5e-05, "loss": 1.5132, "step": 4199 }, { "epoch": 0.672107537205953, "grad_norm": 0.34057343006134033, "learning_rate": 5e-05, "loss": 1.5052, "step": 4200 }, { "epoch": 0.6722675628100496, "grad_norm": 0.34355035424232483, "learning_rate": 5e-05, "loss": 1.5071, "step": 4201 }, { "epoch": 0.6724275884141463, "grad_norm": 0.345603346824646, "learning_rate": 5e-05, "loss": 1.4958, "step": 4202 }, { "epoch": 0.672587614018243, "grad_norm": 0.3418787717819214, "learning_rate": 5e-05, "loss": 1.503, "step": 4203 }, { "epoch": 0.6727476396223395, "grad_norm": 0.33940255641937256, "learning_rate": 5e-05, "loss": 1.4726, "step": 4204 }, { "epoch": 0.6729076652264362, "grad_norm": 0.33993563055992126, "learning_rate": 5e-05, "loss": 1.4203, "step": 4205 }, { "epoch": 0.6730676908305329, "grad_norm": 0.33611035346984863, "learning_rate": 5e-05, "loss": 1.4406, "step": 4206 }, { "epoch": 0.6732277164346295, "grad_norm": 0.34655141830444336, "learning_rate": 5e-05, "loss": 1.4813, "step": 4207 }, { "epoch": 0.6733877420387262, "grad_norm": 0.34605729579925537, "learning_rate": 5e-05, "loss": 1.5087, "step": 4208 }, { "epoch": 0.6735477676428229, "grad_norm": 0.36496928334236145, "learning_rate": 5e-05, "loss": 1.4947, "step": 4209 }, { "epoch": 0.6737077932469195, "grad_norm": 0.34223470091819763, "learning_rate": 5e-05, "loss": 1.4442, "step": 4210 }, { "epoch": 0.6738678188510162, "grad_norm": 0.34704455733299255, "learning_rate": 5e-05, "loss": 1.4843, "step": 4211 }, { "epoch": 0.6740278444551128, "grad_norm": 0.3554282486438751, "learning_rate": 5e-05, "loss": 1.4927, "step": 4212 }, { "epoch": 0.6741878700592094, "grad_norm": 0.3635297417640686, "learning_rate": 5e-05, "loss": 1.4764, "step": 4213 }, { "epoch": 0.6743478956633061, "grad_norm": 0.34876367449760437, "learning_rate": 5e-05, "loss": 1.4502, "step": 4214 }, { "epoch": 0.6745079212674028, "grad_norm": 0.36181873083114624, "learning_rate": 5e-05, "loss": 1.4874, "step": 4215 }, { "epoch": 0.6746679468714994, "grad_norm": 0.33824846148490906, "learning_rate": 5e-05, "loss": 1.3716, "step": 4216 }, { "epoch": 0.6748279724755961, "grad_norm": 0.3340305685997009, "learning_rate": 5e-05, "loss": 1.4099, "step": 4217 }, { "epoch": 0.6749879980796928, "grad_norm": 0.3535272181034088, "learning_rate": 5e-05, "loss": 1.6004, "step": 4218 }, { "epoch": 0.6751480236837895, "grad_norm": 0.3431311845779419, "learning_rate": 5e-05, "loss": 1.4489, "step": 4219 }, { "epoch": 0.675308049287886, "grad_norm": 0.3392755687236786, "learning_rate": 5e-05, "loss": 1.448, "step": 4220 }, { "epoch": 0.6754680748919827, "grad_norm": 0.33757907152175903, "learning_rate": 5e-05, "loss": 1.448, "step": 4221 }, { "epoch": 0.6756281004960794, "grad_norm": 0.35143065452575684, "learning_rate": 5e-05, "loss": 1.3995, "step": 4222 }, { "epoch": 0.675788126100176, "grad_norm": 0.3623443841934204, "learning_rate": 5e-05, "loss": 1.5869, "step": 4223 }, { "epoch": 0.6759481517042727, "grad_norm": 0.35138237476348877, "learning_rate": 5e-05, "loss": 1.5162, "step": 4224 }, { "epoch": 0.6761081773083694, "grad_norm": 0.3554118871688843, "learning_rate": 5e-05, "loss": 1.4631, "step": 4225 }, { "epoch": 0.676268202912466, "grad_norm": 0.37894296646118164, "learning_rate": 5e-05, "loss": 1.5336, "step": 4226 }, { "epoch": 0.6764282285165627, "grad_norm": 0.3467561602592468, "learning_rate": 5e-05, "loss": 1.4748, "step": 4227 }, { "epoch": 0.6765882541206593, "grad_norm": 0.3624655604362488, "learning_rate": 5e-05, "loss": 1.4239, "step": 4228 }, { "epoch": 0.6767482797247559, "grad_norm": 0.3516625761985779, "learning_rate": 5e-05, "loss": 1.4471, "step": 4229 }, { "epoch": 0.6769083053288526, "grad_norm": 0.34908780455589294, "learning_rate": 5e-05, "loss": 1.4267, "step": 4230 }, { "epoch": 0.6770683309329493, "grad_norm": 0.34456050395965576, "learning_rate": 5e-05, "loss": 1.4396, "step": 4231 }, { "epoch": 0.6772283565370459, "grad_norm": 0.3572906255722046, "learning_rate": 5e-05, "loss": 1.5703, "step": 4232 }, { "epoch": 0.6773883821411426, "grad_norm": 0.3429291844367981, "learning_rate": 5e-05, "loss": 1.5722, "step": 4233 }, { "epoch": 0.6775484077452393, "grad_norm": 0.3481510281562805, "learning_rate": 5e-05, "loss": 1.4773, "step": 4234 }, { "epoch": 0.677708433349336, "grad_norm": 0.33978506922721863, "learning_rate": 5e-05, "loss": 1.4981, "step": 4235 }, { "epoch": 0.6778684589534325, "grad_norm": 0.3346719741821289, "learning_rate": 5e-05, "loss": 1.4995, "step": 4236 }, { "epoch": 0.6780284845575292, "grad_norm": 0.33790239691734314, "learning_rate": 5e-05, "loss": 1.4606, "step": 4237 }, { "epoch": 0.6781885101616258, "grad_norm": 0.340628981590271, "learning_rate": 5e-05, "loss": 1.5191, "step": 4238 }, { "epoch": 0.6783485357657225, "grad_norm": 0.3360389173030853, "learning_rate": 5e-05, "loss": 1.4497, "step": 4239 }, { "epoch": 0.6785085613698192, "grad_norm": 0.3366263210773468, "learning_rate": 5e-05, "loss": 1.4798, "step": 4240 }, { "epoch": 0.6786685869739159, "grad_norm": 0.33125630021095276, "learning_rate": 5e-05, "loss": 1.4787, "step": 4241 }, { "epoch": 0.6788286125780125, "grad_norm": 0.33464351296424866, "learning_rate": 5e-05, "loss": 1.4956, "step": 4242 }, { "epoch": 0.6789886381821091, "grad_norm": 0.3467884063720703, "learning_rate": 5e-05, "loss": 1.5176, "step": 4243 }, { "epoch": 0.6791486637862058, "grad_norm": 0.3493647873401642, "learning_rate": 5e-05, "loss": 1.4435, "step": 4244 }, { "epoch": 0.6793086893903024, "grad_norm": 0.3472617268562317, "learning_rate": 5e-05, "loss": 1.3904, "step": 4245 }, { "epoch": 0.6794687149943991, "grad_norm": 0.3479941189289093, "learning_rate": 5e-05, "loss": 1.5336, "step": 4246 }, { "epoch": 0.6796287405984958, "grad_norm": 0.326898992061615, "learning_rate": 5e-05, "loss": 1.4011, "step": 4247 }, { "epoch": 0.6797887662025924, "grad_norm": 0.34978368878364563, "learning_rate": 5e-05, "loss": 1.414, "step": 4248 }, { "epoch": 0.6799487918066891, "grad_norm": 0.34705618023872375, "learning_rate": 5e-05, "loss": 1.4963, "step": 4249 }, { "epoch": 0.6801088174107858, "grad_norm": 0.31858310103416443, "learning_rate": 5e-05, "loss": 1.3399, "step": 4250 }, { "epoch": 0.6802688430148823, "grad_norm": 0.34529951214790344, "learning_rate": 5e-05, "loss": 1.534, "step": 4251 }, { "epoch": 0.680428868618979, "grad_norm": 0.3525719940662384, "learning_rate": 5e-05, "loss": 1.5019, "step": 4252 }, { "epoch": 0.6805888942230757, "grad_norm": 0.3627413213253021, "learning_rate": 5e-05, "loss": 1.4916, "step": 4253 }, { "epoch": 0.6807489198271723, "grad_norm": 0.3324197828769684, "learning_rate": 5e-05, "loss": 1.4269, "step": 4254 }, { "epoch": 0.680908945431269, "grad_norm": 0.33620786666870117, "learning_rate": 5e-05, "loss": 1.4078, "step": 4255 }, { "epoch": 0.6810689710353657, "grad_norm": 0.3392347991466522, "learning_rate": 5e-05, "loss": 1.4339, "step": 4256 }, { "epoch": 0.6812289966394623, "grad_norm": 0.35288006067276, "learning_rate": 5e-05, "loss": 1.4454, "step": 4257 }, { "epoch": 0.681389022243559, "grad_norm": 0.3540213406085968, "learning_rate": 5e-05, "loss": 1.5352, "step": 4258 }, { "epoch": 0.6815490478476556, "grad_norm": 0.34548014402389526, "learning_rate": 5e-05, "loss": 1.4661, "step": 4259 }, { "epoch": 0.6817090734517522, "grad_norm": 0.3461606204509735, "learning_rate": 5e-05, "loss": 1.4866, "step": 4260 }, { "epoch": 0.6818690990558489, "grad_norm": 0.32562384009361267, "learning_rate": 5e-05, "loss": 1.4494, "step": 4261 }, { "epoch": 0.6820291246599456, "grad_norm": 0.34001997113227844, "learning_rate": 5e-05, "loss": 1.4291, "step": 4262 }, { "epoch": 0.6821891502640423, "grad_norm": 0.3430463969707489, "learning_rate": 5e-05, "loss": 1.415, "step": 4263 }, { "epoch": 0.6823491758681389, "grad_norm": 0.3413662910461426, "learning_rate": 5e-05, "loss": 1.5142, "step": 4264 }, { "epoch": 0.6825092014722356, "grad_norm": 0.3324680030345917, "learning_rate": 5e-05, "loss": 1.4765, "step": 4265 }, { "epoch": 0.6826692270763323, "grad_norm": 0.33064693212509155, "learning_rate": 5e-05, "loss": 1.434, "step": 4266 }, { "epoch": 0.6828292526804288, "grad_norm": 0.3515304625034332, "learning_rate": 5e-05, "loss": 1.5131, "step": 4267 }, { "epoch": 0.6829892782845255, "grad_norm": 0.3468967378139496, "learning_rate": 5e-05, "loss": 1.4064, "step": 4268 }, { "epoch": 0.6831493038886222, "grad_norm": 0.33902785181999207, "learning_rate": 5e-05, "loss": 1.4937, "step": 4269 }, { "epoch": 0.6833093294927188, "grad_norm": 0.3414340913295746, "learning_rate": 5e-05, "loss": 1.4365, "step": 4270 }, { "epoch": 0.6834693550968155, "grad_norm": 0.34814003109931946, "learning_rate": 5e-05, "loss": 1.4437, "step": 4271 }, { "epoch": 0.6836293807009122, "grad_norm": 0.3460850119590759, "learning_rate": 5e-05, "loss": 1.5119, "step": 4272 }, { "epoch": 0.6837894063050088, "grad_norm": 0.34284648299217224, "learning_rate": 5e-05, "loss": 1.47, "step": 4273 }, { "epoch": 0.6839494319091055, "grad_norm": 0.3465506136417389, "learning_rate": 5e-05, "loss": 1.4738, "step": 4274 }, { "epoch": 0.6841094575132021, "grad_norm": 0.3414619565010071, "learning_rate": 5e-05, "loss": 1.487, "step": 4275 }, { "epoch": 0.6842694831172987, "grad_norm": 0.3444170355796814, "learning_rate": 5e-05, "loss": 1.4667, "step": 4276 }, { "epoch": 0.6844295087213954, "grad_norm": 0.3498295247554779, "learning_rate": 5e-05, "loss": 1.4638, "step": 4277 }, { "epoch": 0.6845895343254921, "grad_norm": 0.3468032479286194, "learning_rate": 5e-05, "loss": 1.4997, "step": 4278 }, { "epoch": 0.6847495599295887, "grad_norm": 0.3576592803001404, "learning_rate": 5e-05, "loss": 1.5527, "step": 4279 }, { "epoch": 0.6849095855336854, "grad_norm": 0.3461296856403351, "learning_rate": 5e-05, "loss": 1.4681, "step": 4280 }, { "epoch": 0.6850696111377821, "grad_norm": 0.3436461389064789, "learning_rate": 5e-05, "loss": 1.4714, "step": 4281 }, { "epoch": 0.6852296367418788, "grad_norm": 0.3415345251560211, "learning_rate": 5e-05, "loss": 1.473, "step": 4282 }, { "epoch": 0.6853896623459753, "grad_norm": 0.34815332293510437, "learning_rate": 5e-05, "loss": 1.507, "step": 4283 }, { "epoch": 0.685549687950072, "grad_norm": 0.33466601371765137, "learning_rate": 5e-05, "loss": 1.4211, "step": 4284 }, { "epoch": 0.6857097135541687, "grad_norm": 0.34601762890815735, "learning_rate": 5e-05, "loss": 1.5347, "step": 4285 }, { "epoch": 0.6858697391582653, "grad_norm": 0.3464643657207489, "learning_rate": 5e-05, "loss": 1.5096, "step": 4286 }, { "epoch": 0.686029764762362, "grad_norm": 0.3582304120063782, "learning_rate": 5e-05, "loss": 1.5678, "step": 4287 }, { "epoch": 0.6861897903664587, "grad_norm": 0.32834944128990173, "learning_rate": 5e-05, "loss": 1.3954, "step": 4288 }, { "epoch": 0.6863498159705553, "grad_norm": 0.3522815704345703, "learning_rate": 5e-05, "loss": 1.4647, "step": 4289 }, { "epoch": 0.6865098415746519, "grad_norm": 0.3470248579978943, "learning_rate": 5e-05, "loss": 1.4991, "step": 4290 }, { "epoch": 0.6866698671787486, "grad_norm": 0.3461211919784546, "learning_rate": 5e-05, "loss": 1.525, "step": 4291 }, { "epoch": 0.6868298927828452, "grad_norm": 0.3582344949245453, "learning_rate": 5e-05, "loss": 1.514, "step": 4292 }, { "epoch": 0.6869899183869419, "grad_norm": 0.3658178746700287, "learning_rate": 5e-05, "loss": 1.4381, "step": 4293 }, { "epoch": 0.6871499439910386, "grad_norm": 0.3391474485397339, "learning_rate": 5e-05, "loss": 1.4803, "step": 4294 }, { "epoch": 0.6873099695951352, "grad_norm": 0.34426990151405334, "learning_rate": 5e-05, "loss": 1.4439, "step": 4295 }, { "epoch": 0.6874699951992319, "grad_norm": 0.3482383191585541, "learning_rate": 5e-05, "loss": 1.4415, "step": 4296 }, { "epoch": 0.6876300208033286, "grad_norm": 0.32806551456451416, "learning_rate": 5e-05, "loss": 1.3929, "step": 4297 }, { "epoch": 0.6877900464074251, "grad_norm": 0.33703598380088806, "learning_rate": 5e-05, "loss": 1.4785, "step": 4298 }, { "epoch": 0.6879500720115218, "grad_norm": 0.3492802083492279, "learning_rate": 5e-05, "loss": 1.5291, "step": 4299 }, { "epoch": 0.6881100976156185, "grad_norm": 0.3421228528022766, "learning_rate": 5e-05, "loss": 1.4686, "step": 4300 }, { "epoch": 0.6882701232197151, "grad_norm": 0.3445519208908081, "learning_rate": 5e-05, "loss": 1.4392, "step": 4301 }, { "epoch": 0.6884301488238118, "grad_norm": 0.34750840067863464, "learning_rate": 5e-05, "loss": 1.4485, "step": 4302 }, { "epoch": 0.6885901744279085, "grad_norm": 0.3298945426940918, "learning_rate": 5e-05, "loss": 1.4815, "step": 4303 }, { "epoch": 0.6887502000320052, "grad_norm": 0.3460943400859833, "learning_rate": 5e-05, "loss": 1.4062, "step": 4304 }, { "epoch": 0.6889102256361018, "grad_norm": 0.35118094086647034, "learning_rate": 5e-05, "loss": 1.4718, "step": 4305 }, { "epoch": 0.6890702512401984, "grad_norm": 0.4001816213130951, "learning_rate": 5e-05, "loss": 1.5285, "step": 4306 }, { "epoch": 0.689230276844295, "grad_norm": 0.33614957332611084, "learning_rate": 5e-05, "loss": 1.4531, "step": 4307 }, { "epoch": 0.6893903024483917, "grad_norm": 0.33794623613357544, "learning_rate": 5e-05, "loss": 1.4236, "step": 4308 }, { "epoch": 0.6895503280524884, "grad_norm": 0.3629724085330963, "learning_rate": 5e-05, "loss": 1.4795, "step": 4309 }, { "epoch": 0.6897103536565851, "grad_norm": 0.3693599998950958, "learning_rate": 5e-05, "loss": 1.5127, "step": 4310 }, { "epoch": 0.6898703792606817, "grad_norm": 0.33307522535324097, "learning_rate": 5e-05, "loss": 1.4686, "step": 4311 }, { "epoch": 0.6900304048647784, "grad_norm": 0.34174463152885437, "learning_rate": 5e-05, "loss": 1.4271, "step": 4312 }, { "epoch": 0.6901904304688751, "grad_norm": 0.33101487159729004, "learning_rate": 5e-05, "loss": 1.4902, "step": 4313 }, { "epoch": 0.6903504560729716, "grad_norm": 0.338594913482666, "learning_rate": 5e-05, "loss": 1.4826, "step": 4314 }, { "epoch": 0.6905104816770683, "grad_norm": 0.3365645706653595, "learning_rate": 5e-05, "loss": 1.4508, "step": 4315 }, { "epoch": 0.690670507281165, "grad_norm": 0.32929983735084534, "learning_rate": 5e-05, "loss": 1.4393, "step": 4316 }, { "epoch": 0.6908305328852616, "grad_norm": 0.3338644802570343, "learning_rate": 5e-05, "loss": 1.4948, "step": 4317 }, { "epoch": 0.6909905584893583, "grad_norm": 0.3454224169254303, "learning_rate": 5e-05, "loss": 1.4003, "step": 4318 }, { "epoch": 0.691150584093455, "grad_norm": 0.3396053910255432, "learning_rate": 5e-05, "loss": 1.487, "step": 4319 }, { "epoch": 0.6913106096975516, "grad_norm": 0.3474339544773102, "learning_rate": 5e-05, "loss": 1.441, "step": 4320 }, { "epoch": 0.6914706353016483, "grad_norm": 0.3592209219932556, "learning_rate": 5e-05, "loss": 1.4508, "step": 4321 }, { "epoch": 0.6916306609057449, "grad_norm": 0.33350494503974915, "learning_rate": 5e-05, "loss": 1.4905, "step": 4322 }, { "epoch": 0.6917906865098415, "grad_norm": 0.343294233083725, "learning_rate": 5e-05, "loss": 1.4681, "step": 4323 }, { "epoch": 0.6919507121139382, "grad_norm": 0.34399452805519104, "learning_rate": 5e-05, "loss": 1.5114, "step": 4324 }, { "epoch": 0.6921107377180349, "grad_norm": 0.3391386866569519, "learning_rate": 5e-05, "loss": 1.4407, "step": 4325 }, { "epoch": 0.6922707633221316, "grad_norm": 0.33858656883239746, "learning_rate": 5e-05, "loss": 1.4096, "step": 4326 }, { "epoch": 0.6924307889262282, "grad_norm": 0.3467167317867279, "learning_rate": 5e-05, "loss": 1.4261, "step": 4327 }, { "epoch": 0.6925908145303249, "grad_norm": 0.33605843782424927, "learning_rate": 5e-05, "loss": 1.3831, "step": 4328 }, { "epoch": 0.6927508401344215, "grad_norm": 0.3474723994731903, "learning_rate": 5e-05, "loss": 1.4608, "step": 4329 }, { "epoch": 0.6929108657385181, "grad_norm": 0.343982994556427, "learning_rate": 5e-05, "loss": 1.4388, "step": 4330 }, { "epoch": 0.6930708913426148, "grad_norm": 0.34826043248176575, "learning_rate": 5e-05, "loss": 1.4634, "step": 4331 }, { "epoch": 0.6932309169467115, "grad_norm": 0.3507176339626312, "learning_rate": 5e-05, "loss": 1.5756, "step": 4332 }, { "epoch": 0.6933909425508081, "grad_norm": 0.348564475774765, "learning_rate": 5e-05, "loss": 1.3257, "step": 4333 }, { "epoch": 0.6935509681549048, "grad_norm": 0.347167432308197, "learning_rate": 5e-05, "loss": 1.5166, "step": 4334 }, { "epoch": 0.6937109937590015, "grad_norm": 0.34548869729042053, "learning_rate": 5e-05, "loss": 1.5651, "step": 4335 }, { "epoch": 0.6938710193630981, "grad_norm": 0.3486281931400299, "learning_rate": 5e-05, "loss": 1.5107, "step": 4336 }, { "epoch": 0.6940310449671947, "grad_norm": 0.35270172357559204, "learning_rate": 5e-05, "loss": 1.4817, "step": 4337 }, { "epoch": 0.6941910705712914, "grad_norm": 0.3469998240470886, "learning_rate": 5e-05, "loss": 1.5141, "step": 4338 }, { "epoch": 0.694351096175388, "grad_norm": 0.3367080092430115, "learning_rate": 5e-05, "loss": 1.4931, "step": 4339 }, { "epoch": 0.6945111217794847, "grad_norm": 0.35010769963264465, "learning_rate": 5e-05, "loss": 1.4649, "step": 4340 }, { "epoch": 0.6946711473835814, "grad_norm": 0.3349698483943939, "learning_rate": 5e-05, "loss": 1.4933, "step": 4341 }, { "epoch": 0.694831172987678, "grad_norm": 0.3463591933250427, "learning_rate": 5e-05, "loss": 1.4991, "step": 4342 }, { "epoch": 0.6949911985917747, "grad_norm": 0.3341082036495209, "learning_rate": 5e-05, "loss": 1.4522, "step": 4343 }, { "epoch": 0.6951512241958714, "grad_norm": 0.33948951959609985, "learning_rate": 5e-05, "loss": 1.4186, "step": 4344 }, { "epoch": 0.695311249799968, "grad_norm": 0.34616997838020325, "learning_rate": 5e-05, "loss": 1.4863, "step": 4345 }, { "epoch": 0.6954712754040646, "grad_norm": 0.3494262397289276, "learning_rate": 5e-05, "loss": 1.4562, "step": 4346 }, { "epoch": 0.6956313010081613, "grad_norm": 0.35347265005111694, "learning_rate": 5e-05, "loss": 1.5075, "step": 4347 }, { "epoch": 0.695791326612258, "grad_norm": 0.3466571569442749, "learning_rate": 5e-05, "loss": 1.4205, "step": 4348 }, { "epoch": 0.6959513522163546, "grad_norm": 0.3477250039577484, "learning_rate": 5e-05, "loss": 1.4777, "step": 4349 }, { "epoch": 0.6961113778204513, "grad_norm": 0.33827027678489685, "learning_rate": 5e-05, "loss": 1.4629, "step": 4350 }, { "epoch": 0.696271403424548, "grad_norm": 0.3456898033618927, "learning_rate": 5e-05, "loss": 1.4801, "step": 4351 }, { "epoch": 0.6964314290286446, "grad_norm": 0.3364146947860718, "learning_rate": 5e-05, "loss": 1.4088, "step": 4352 }, { "epoch": 0.6965914546327412, "grad_norm": 0.3363203704357147, "learning_rate": 5e-05, "loss": 1.42, "step": 4353 }, { "epoch": 0.6967514802368379, "grad_norm": 0.34719064831733704, "learning_rate": 5e-05, "loss": 1.5179, "step": 4354 }, { "epoch": 0.6969115058409345, "grad_norm": 0.34938034415245056, "learning_rate": 5e-05, "loss": 1.4633, "step": 4355 }, { "epoch": 0.6970715314450312, "grad_norm": 0.3458994925022125, "learning_rate": 5e-05, "loss": 1.464, "step": 4356 }, { "epoch": 0.6972315570491279, "grad_norm": 0.34591367840766907, "learning_rate": 5e-05, "loss": 1.4598, "step": 4357 }, { "epoch": 0.6973915826532245, "grad_norm": 0.3628699481487274, "learning_rate": 5e-05, "loss": 1.5143, "step": 4358 }, { "epoch": 0.6975516082573212, "grad_norm": 0.34320953488349915, "learning_rate": 5e-05, "loss": 1.4706, "step": 4359 }, { "epoch": 0.6977116338614179, "grad_norm": 0.34830671548843384, "learning_rate": 5e-05, "loss": 1.5119, "step": 4360 }, { "epoch": 0.6978716594655144, "grad_norm": 0.3318207859992981, "learning_rate": 5e-05, "loss": 1.5062, "step": 4361 }, { "epoch": 0.6980316850696111, "grad_norm": 0.34465864300727844, "learning_rate": 5e-05, "loss": 1.5007, "step": 4362 }, { "epoch": 0.6981917106737078, "grad_norm": 0.3533013164997101, "learning_rate": 5e-05, "loss": 1.5874, "step": 4363 }, { "epoch": 0.6983517362778044, "grad_norm": 0.3527242839336395, "learning_rate": 5e-05, "loss": 1.5708, "step": 4364 }, { "epoch": 0.6985117618819011, "grad_norm": 0.36280614137649536, "learning_rate": 5e-05, "loss": 1.435, "step": 4365 }, { "epoch": 0.6986717874859978, "grad_norm": 0.34934157133102417, "learning_rate": 5e-05, "loss": 1.4706, "step": 4366 }, { "epoch": 0.6988318130900945, "grad_norm": 0.3526739478111267, "learning_rate": 5e-05, "loss": 1.5187, "step": 4367 }, { "epoch": 0.6989918386941911, "grad_norm": 0.35359156131744385, "learning_rate": 5e-05, "loss": 1.4757, "step": 4368 }, { "epoch": 0.6991518642982877, "grad_norm": 0.3516561686992645, "learning_rate": 5e-05, "loss": 1.4906, "step": 4369 }, { "epoch": 0.6993118899023844, "grad_norm": 0.34172523021698, "learning_rate": 5e-05, "loss": 1.4203, "step": 4370 }, { "epoch": 0.699471915506481, "grad_norm": 0.33531689643859863, "learning_rate": 5e-05, "loss": 1.4541, "step": 4371 }, { "epoch": 0.6996319411105777, "grad_norm": 0.3521783649921417, "learning_rate": 5e-05, "loss": 1.4959, "step": 4372 }, { "epoch": 0.6997919667146744, "grad_norm": 0.3459514081478119, "learning_rate": 5e-05, "loss": 1.4322, "step": 4373 }, { "epoch": 0.699951992318771, "grad_norm": 0.3410612940788269, "learning_rate": 5e-05, "loss": 1.5206, "step": 4374 }, { "epoch": 0.7001120179228677, "grad_norm": 0.35405561327934265, "learning_rate": 5e-05, "loss": 1.4494, "step": 4375 }, { "epoch": 0.7002720435269643, "grad_norm": 0.34155553579330444, "learning_rate": 5e-05, "loss": 1.4394, "step": 4376 }, { "epoch": 0.7004320691310609, "grad_norm": 0.3419945538043976, "learning_rate": 5e-05, "loss": 1.4569, "step": 4377 }, { "epoch": 0.7005920947351576, "grad_norm": 0.34979236125946045, "learning_rate": 5e-05, "loss": 1.4749, "step": 4378 }, { "epoch": 0.7007521203392543, "grad_norm": 0.3506731688976288, "learning_rate": 5e-05, "loss": 1.5369, "step": 4379 }, { "epoch": 0.7009121459433509, "grad_norm": 0.36868301033973694, "learning_rate": 5e-05, "loss": 1.518, "step": 4380 }, { "epoch": 0.7010721715474476, "grad_norm": 0.3561338484287262, "learning_rate": 5e-05, "loss": 1.4984, "step": 4381 }, { "epoch": 0.7012321971515443, "grad_norm": 0.3545253872871399, "learning_rate": 5e-05, "loss": 1.5193, "step": 4382 }, { "epoch": 0.701392222755641, "grad_norm": 0.3519073724746704, "learning_rate": 5e-05, "loss": 1.387, "step": 4383 }, { "epoch": 0.7015522483597375, "grad_norm": 0.3419506549835205, "learning_rate": 5e-05, "loss": 1.4603, "step": 4384 }, { "epoch": 0.7017122739638342, "grad_norm": 0.35148948431015015, "learning_rate": 5e-05, "loss": 1.4283, "step": 4385 }, { "epoch": 0.7018722995679308, "grad_norm": 0.35995784401893616, "learning_rate": 5e-05, "loss": 1.5122, "step": 4386 }, { "epoch": 0.7020323251720275, "grad_norm": 0.34827348589897156, "learning_rate": 5e-05, "loss": 1.4879, "step": 4387 }, { "epoch": 0.7021923507761242, "grad_norm": 0.3517261743545532, "learning_rate": 5e-05, "loss": 1.482, "step": 4388 }, { "epoch": 0.7023523763802209, "grad_norm": 0.3482716977596283, "learning_rate": 5e-05, "loss": 1.4378, "step": 4389 }, { "epoch": 0.7025124019843175, "grad_norm": 0.3476359248161316, "learning_rate": 5e-05, "loss": 1.4798, "step": 4390 }, { "epoch": 0.7026724275884142, "grad_norm": 0.3428530693054199, "learning_rate": 5e-05, "loss": 1.4207, "step": 4391 }, { "epoch": 0.7028324531925108, "grad_norm": 0.36789125204086304, "learning_rate": 5e-05, "loss": 1.5216, "step": 4392 }, { "epoch": 0.7029924787966074, "grad_norm": 0.3677557408809662, "learning_rate": 5e-05, "loss": 1.5465, "step": 4393 }, { "epoch": 0.7031525044007041, "grad_norm": 0.3523310720920563, "learning_rate": 5e-05, "loss": 1.5029, "step": 4394 }, { "epoch": 0.7033125300048008, "grad_norm": 0.34405526518821716, "learning_rate": 5e-05, "loss": 1.5011, "step": 4395 }, { "epoch": 0.7034725556088974, "grad_norm": 0.3732903301715851, "learning_rate": 5e-05, "loss": 1.5361, "step": 4396 }, { "epoch": 0.7036325812129941, "grad_norm": 0.3461301326751709, "learning_rate": 5e-05, "loss": 1.5401, "step": 4397 }, { "epoch": 0.7037926068170908, "grad_norm": 0.3524801433086395, "learning_rate": 5e-05, "loss": 1.4736, "step": 4398 }, { "epoch": 0.7039526324211874, "grad_norm": 0.36196738481521606, "learning_rate": 5e-05, "loss": 1.5351, "step": 4399 }, { "epoch": 0.704112658025284, "grad_norm": 0.34794142842292786, "learning_rate": 5e-05, "loss": 1.5266, "step": 4400 }, { "epoch": 0.7042726836293807, "grad_norm": 0.34277117252349854, "learning_rate": 5e-05, "loss": 1.4546, "step": 4401 }, { "epoch": 0.7044327092334773, "grad_norm": 0.34551307559013367, "learning_rate": 5e-05, "loss": 1.4367, "step": 4402 }, { "epoch": 0.704592734837574, "grad_norm": 0.3441026210784912, "learning_rate": 5e-05, "loss": 1.4514, "step": 4403 }, { "epoch": 0.7047527604416707, "grad_norm": 0.34364816546440125, "learning_rate": 5e-05, "loss": 1.4246, "step": 4404 }, { "epoch": 0.7049127860457673, "grad_norm": 0.3707164525985718, "learning_rate": 5e-05, "loss": 1.5412, "step": 4405 }, { "epoch": 0.705072811649864, "grad_norm": 0.3628157079219818, "learning_rate": 5e-05, "loss": 1.5651, "step": 4406 }, { "epoch": 0.7052328372539607, "grad_norm": 0.3396399915218353, "learning_rate": 5e-05, "loss": 1.4379, "step": 4407 }, { "epoch": 0.7053928628580572, "grad_norm": 0.34231138229370117, "learning_rate": 5e-05, "loss": 1.4021, "step": 4408 }, { "epoch": 0.7055528884621539, "grad_norm": 0.3383221924304962, "learning_rate": 5e-05, "loss": 1.4748, "step": 4409 }, { "epoch": 0.7057129140662506, "grad_norm": 0.33025336265563965, "learning_rate": 5e-05, "loss": 1.4991, "step": 4410 }, { "epoch": 0.7058729396703473, "grad_norm": 0.34573090076446533, "learning_rate": 5e-05, "loss": 1.5106, "step": 4411 }, { "epoch": 0.7060329652744439, "grad_norm": 0.34941670298576355, "learning_rate": 5e-05, "loss": 1.4475, "step": 4412 }, { "epoch": 0.7061929908785406, "grad_norm": 0.34622544050216675, "learning_rate": 5e-05, "loss": 1.5006, "step": 4413 }, { "epoch": 0.7063530164826373, "grad_norm": 0.3438316881656647, "learning_rate": 5e-05, "loss": 1.4865, "step": 4414 }, { "epoch": 0.7065130420867338, "grad_norm": 0.35101044178009033, "learning_rate": 5e-05, "loss": 1.4661, "step": 4415 }, { "epoch": 0.7066730676908305, "grad_norm": 0.3511585295200348, "learning_rate": 5e-05, "loss": 1.5211, "step": 4416 }, { "epoch": 0.7068330932949272, "grad_norm": 0.3446599543094635, "learning_rate": 5e-05, "loss": 1.484, "step": 4417 }, { "epoch": 0.7069931188990238, "grad_norm": 0.35470253229141235, "learning_rate": 5e-05, "loss": 1.5346, "step": 4418 }, { "epoch": 0.7071531445031205, "grad_norm": 0.3527895510196686, "learning_rate": 5e-05, "loss": 1.4446, "step": 4419 }, { "epoch": 0.7073131701072172, "grad_norm": 0.34776797890663147, "learning_rate": 5e-05, "loss": 1.4279, "step": 4420 }, { "epoch": 0.7074731957113138, "grad_norm": 0.3396187126636505, "learning_rate": 5e-05, "loss": 1.436, "step": 4421 }, { "epoch": 0.7076332213154105, "grad_norm": 0.34549039602279663, "learning_rate": 5e-05, "loss": 1.4752, "step": 4422 }, { "epoch": 0.7077932469195071, "grad_norm": 0.3471437692642212, "learning_rate": 5e-05, "loss": 1.4905, "step": 4423 }, { "epoch": 0.7079532725236037, "grad_norm": 0.36392977833747864, "learning_rate": 5e-05, "loss": 1.4879, "step": 4424 }, { "epoch": 0.7081132981277004, "grad_norm": 0.34649136662483215, "learning_rate": 5e-05, "loss": 1.5179, "step": 4425 }, { "epoch": 0.7082733237317971, "grad_norm": 0.3310205340385437, "learning_rate": 5e-05, "loss": 1.3718, "step": 4426 }, { "epoch": 0.7084333493358937, "grad_norm": 0.35429704189300537, "learning_rate": 5e-05, "loss": 1.5425, "step": 4427 }, { "epoch": 0.7085933749399904, "grad_norm": 0.34396126866340637, "learning_rate": 5e-05, "loss": 1.4147, "step": 4428 }, { "epoch": 0.7087534005440871, "grad_norm": 0.3515433967113495, "learning_rate": 5e-05, "loss": 1.4754, "step": 4429 }, { "epoch": 0.7089134261481838, "grad_norm": 0.3294734060764313, "learning_rate": 5e-05, "loss": 1.4356, "step": 4430 }, { "epoch": 0.7090734517522803, "grad_norm": 0.34790945053100586, "learning_rate": 5e-05, "loss": 1.5324, "step": 4431 }, { "epoch": 0.709233477356377, "grad_norm": 0.3559958040714264, "learning_rate": 5e-05, "loss": 1.4635, "step": 4432 }, { "epoch": 0.7093935029604737, "grad_norm": 0.3422231376171112, "learning_rate": 5e-05, "loss": 1.372, "step": 4433 }, { "epoch": 0.7095535285645703, "grad_norm": 0.34587860107421875, "learning_rate": 5e-05, "loss": 1.5008, "step": 4434 }, { "epoch": 0.709713554168667, "grad_norm": 0.3467773199081421, "learning_rate": 5e-05, "loss": 1.5143, "step": 4435 }, { "epoch": 0.7098735797727637, "grad_norm": 0.3415220081806183, "learning_rate": 5e-05, "loss": 1.4831, "step": 4436 }, { "epoch": 0.7100336053768603, "grad_norm": 0.3520839512348175, "learning_rate": 5e-05, "loss": 1.5042, "step": 4437 }, { "epoch": 0.710193630980957, "grad_norm": 0.35588201880455017, "learning_rate": 5e-05, "loss": 1.4934, "step": 4438 }, { "epoch": 0.7103536565850536, "grad_norm": 0.35287123918533325, "learning_rate": 5e-05, "loss": 1.484, "step": 4439 }, { "epoch": 0.7105136821891502, "grad_norm": 0.34318897128105164, "learning_rate": 5e-05, "loss": 1.3788, "step": 4440 }, { "epoch": 0.7106737077932469, "grad_norm": 0.3421129584312439, "learning_rate": 5e-05, "loss": 1.386, "step": 4441 }, { "epoch": 0.7108337333973436, "grad_norm": 0.3638809621334076, "learning_rate": 5e-05, "loss": 1.4925, "step": 4442 }, { "epoch": 0.7109937590014402, "grad_norm": 0.32769155502319336, "learning_rate": 5e-05, "loss": 1.4127, "step": 4443 }, { "epoch": 0.7111537846055369, "grad_norm": 0.34169459342956543, "learning_rate": 5e-05, "loss": 1.4105, "step": 4444 }, { "epoch": 0.7113138102096336, "grad_norm": 0.35125601291656494, "learning_rate": 5e-05, "loss": 1.4794, "step": 4445 }, { "epoch": 0.7114738358137302, "grad_norm": 0.3484952449798584, "learning_rate": 5e-05, "loss": 1.5176, "step": 4446 }, { "epoch": 0.7116338614178268, "grad_norm": 0.3619520366191864, "learning_rate": 5e-05, "loss": 1.5751, "step": 4447 }, { "epoch": 0.7117938870219235, "grad_norm": 0.3447616994380951, "learning_rate": 5e-05, "loss": 1.4675, "step": 4448 }, { "epoch": 0.7119539126260201, "grad_norm": 0.345340758562088, "learning_rate": 5e-05, "loss": 1.4561, "step": 4449 }, { "epoch": 0.7121139382301168, "grad_norm": 0.3281092345714569, "learning_rate": 5e-05, "loss": 1.4328, "step": 4450 }, { "epoch": 0.7122739638342135, "grad_norm": 0.3497835695743561, "learning_rate": 5e-05, "loss": 1.5111, "step": 4451 }, { "epoch": 0.7124339894383102, "grad_norm": 0.3477170169353485, "learning_rate": 5e-05, "loss": 1.4982, "step": 4452 }, { "epoch": 0.7125940150424068, "grad_norm": 0.3635721206665039, "learning_rate": 5e-05, "loss": 1.4467, "step": 4453 }, { "epoch": 0.7127540406465035, "grad_norm": 0.3377009928226471, "learning_rate": 5e-05, "loss": 1.3591, "step": 4454 }, { "epoch": 0.7129140662506, "grad_norm": 0.3312835991382599, "learning_rate": 5e-05, "loss": 1.4768, "step": 4455 }, { "epoch": 0.7130740918546967, "grad_norm": 0.3412044942378998, "learning_rate": 5e-05, "loss": 1.5044, "step": 4456 }, { "epoch": 0.7132341174587934, "grad_norm": 0.3478861153125763, "learning_rate": 5e-05, "loss": 1.4885, "step": 4457 }, { "epoch": 0.7133941430628901, "grad_norm": 0.35413891077041626, "learning_rate": 5e-05, "loss": 1.5283, "step": 4458 }, { "epoch": 0.7135541686669867, "grad_norm": 0.33624619245529175, "learning_rate": 5e-05, "loss": 1.5043, "step": 4459 }, { "epoch": 0.7137141942710834, "grad_norm": 0.3450375199317932, "learning_rate": 5e-05, "loss": 1.4781, "step": 4460 }, { "epoch": 0.7138742198751801, "grad_norm": 0.3542785346508026, "learning_rate": 5e-05, "loss": 1.5539, "step": 4461 }, { "epoch": 0.7140342454792766, "grad_norm": 0.3468513488769531, "learning_rate": 5e-05, "loss": 1.4896, "step": 4462 }, { "epoch": 0.7141942710833733, "grad_norm": 0.3385384976863861, "learning_rate": 5e-05, "loss": 1.4515, "step": 4463 }, { "epoch": 0.71435429668747, "grad_norm": 0.35314634442329407, "learning_rate": 5e-05, "loss": 1.3544, "step": 4464 }, { "epoch": 0.7145143222915666, "grad_norm": 0.354796826839447, "learning_rate": 5e-05, "loss": 1.4613, "step": 4465 }, { "epoch": 0.7146743478956633, "grad_norm": 0.33741286396980286, "learning_rate": 5e-05, "loss": 1.4269, "step": 4466 }, { "epoch": 0.71483437349976, "grad_norm": 0.35262635350227356, "learning_rate": 5e-05, "loss": 1.488, "step": 4467 }, { "epoch": 0.7149943991038566, "grad_norm": 0.3398866653442383, "learning_rate": 5e-05, "loss": 1.355, "step": 4468 }, { "epoch": 0.7151544247079533, "grad_norm": 0.3549768030643463, "learning_rate": 5e-05, "loss": 1.5107, "step": 4469 }, { "epoch": 0.7153144503120499, "grad_norm": 0.37236058712005615, "learning_rate": 5e-05, "loss": 1.4897, "step": 4470 }, { "epoch": 0.7154744759161465, "grad_norm": 0.33473634719848633, "learning_rate": 5e-05, "loss": 1.4794, "step": 4471 }, { "epoch": 0.7156345015202432, "grad_norm": 0.3618687391281128, "learning_rate": 5e-05, "loss": 1.4713, "step": 4472 }, { "epoch": 0.7157945271243399, "grad_norm": 0.3856068551540375, "learning_rate": 5e-05, "loss": 1.4889, "step": 4473 }, { "epoch": 0.7159545527284366, "grad_norm": 0.36486923694610596, "learning_rate": 5e-05, "loss": 1.4291, "step": 4474 }, { "epoch": 0.7161145783325332, "grad_norm": 0.3650929927825928, "learning_rate": 5e-05, "loss": 1.4595, "step": 4475 }, { "epoch": 0.7162746039366299, "grad_norm": 0.33690565824508667, "learning_rate": 5e-05, "loss": 1.4557, "step": 4476 }, { "epoch": 0.7164346295407266, "grad_norm": 0.365329772233963, "learning_rate": 5e-05, "loss": 1.5054, "step": 4477 }, { "epoch": 0.7165946551448231, "grad_norm": 0.35311800241470337, "learning_rate": 5e-05, "loss": 1.4593, "step": 4478 }, { "epoch": 0.7167546807489198, "grad_norm": 0.3589073121547699, "learning_rate": 5e-05, "loss": 1.5222, "step": 4479 }, { "epoch": 0.7169147063530165, "grad_norm": 0.365346759557724, "learning_rate": 5e-05, "loss": 1.5233, "step": 4480 }, { "epoch": 0.7170747319571131, "grad_norm": 0.33907315135002136, "learning_rate": 5e-05, "loss": 1.4581, "step": 4481 }, { "epoch": 0.7172347575612098, "grad_norm": 0.3369113802909851, "learning_rate": 5e-05, "loss": 1.4071, "step": 4482 }, { "epoch": 0.7173947831653065, "grad_norm": 0.3537786900997162, "learning_rate": 5e-05, "loss": 1.4445, "step": 4483 }, { "epoch": 0.7175548087694031, "grad_norm": 0.35264676809310913, "learning_rate": 5e-05, "loss": 1.4566, "step": 4484 }, { "epoch": 0.7177148343734998, "grad_norm": 0.3349469304084778, "learning_rate": 5e-05, "loss": 1.4217, "step": 4485 }, { "epoch": 0.7178748599775964, "grad_norm": 0.35039734840393066, "learning_rate": 5e-05, "loss": 1.5116, "step": 4486 }, { "epoch": 0.718034885581693, "grad_norm": 0.34318435192108154, "learning_rate": 5e-05, "loss": 1.4168, "step": 4487 }, { "epoch": 0.7181949111857897, "grad_norm": 0.3499082028865814, "learning_rate": 5e-05, "loss": 1.536, "step": 4488 }, { "epoch": 0.7183549367898864, "grad_norm": 0.3731105923652649, "learning_rate": 5e-05, "loss": 1.5407, "step": 4489 }, { "epoch": 0.718514962393983, "grad_norm": 0.3577236533164978, "learning_rate": 5e-05, "loss": 1.4588, "step": 4490 }, { "epoch": 0.7186749879980797, "grad_norm": 0.33901989459991455, "learning_rate": 5e-05, "loss": 1.4731, "step": 4491 }, { "epoch": 0.7188350136021764, "grad_norm": 0.3531322479248047, "learning_rate": 5e-05, "loss": 1.4639, "step": 4492 }, { "epoch": 0.7189950392062731, "grad_norm": 0.3409181237220764, "learning_rate": 5e-05, "loss": 1.4427, "step": 4493 }, { "epoch": 0.7191550648103696, "grad_norm": 0.3381254971027374, "learning_rate": 5e-05, "loss": 1.4208, "step": 4494 }, { "epoch": 0.7193150904144663, "grad_norm": 0.3582536578178406, "learning_rate": 5e-05, "loss": 1.5633, "step": 4495 }, { "epoch": 0.719475116018563, "grad_norm": 0.3435380458831787, "learning_rate": 5e-05, "loss": 1.4699, "step": 4496 }, { "epoch": 0.7196351416226596, "grad_norm": 0.33667927980422974, "learning_rate": 5e-05, "loss": 1.4353, "step": 4497 }, { "epoch": 0.7197951672267563, "grad_norm": 0.34592315554618835, "learning_rate": 5e-05, "loss": 1.5489, "step": 4498 }, { "epoch": 0.719955192830853, "grad_norm": 0.3459199368953705, "learning_rate": 5e-05, "loss": 1.4256, "step": 4499 }, { "epoch": 0.7201152184349496, "grad_norm": 0.3489871025085449, "learning_rate": 5e-05, "loss": 1.5269, "step": 4500 }, { "epoch": 0.7202752440390463, "grad_norm": 0.3342382311820984, "learning_rate": 5e-05, "loss": 1.4195, "step": 4501 }, { "epoch": 0.7204352696431429, "grad_norm": 0.3439064621925354, "learning_rate": 5e-05, "loss": 1.4409, "step": 4502 }, { "epoch": 0.7205952952472395, "grad_norm": 0.3563750982284546, "learning_rate": 5e-05, "loss": 1.5098, "step": 4503 }, { "epoch": 0.7207553208513362, "grad_norm": 0.3334384262561798, "learning_rate": 5e-05, "loss": 1.4435, "step": 4504 }, { "epoch": 0.7209153464554329, "grad_norm": 0.34540265798568726, "learning_rate": 5e-05, "loss": 1.4757, "step": 4505 }, { "epoch": 0.7210753720595295, "grad_norm": 0.3471420109272003, "learning_rate": 5e-05, "loss": 1.4698, "step": 4506 }, { "epoch": 0.7212353976636262, "grad_norm": 0.360795795917511, "learning_rate": 5e-05, "loss": 1.4652, "step": 4507 }, { "epoch": 0.7213954232677229, "grad_norm": 0.3635973334312439, "learning_rate": 5e-05, "loss": 1.5368, "step": 4508 }, { "epoch": 0.7215554488718194, "grad_norm": 0.3514442443847656, "learning_rate": 5e-05, "loss": 1.5073, "step": 4509 }, { "epoch": 0.7217154744759161, "grad_norm": 0.3336338996887207, "learning_rate": 5e-05, "loss": 1.4464, "step": 4510 }, { "epoch": 0.7218755000800128, "grad_norm": 0.3380991816520691, "learning_rate": 5e-05, "loss": 1.4774, "step": 4511 }, { "epoch": 0.7220355256841094, "grad_norm": 0.3502142131328583, "learning_rate": 5e-05, "loss": 1.4201, "step": 4512 }, { "epoch": 0.7221955512882061, "grad_norm": 0.3553318381309509, "learning_rate": 5e-05, "loss": 1.5003, "step": 4513 }, { "epoch": 0.7223555768923028, "grad_norm": 0.3374500572681427, "learning_rate": 5e-05, "loss": 1.4728, "step": 4514 }, { "epoch": 0.7225156024963995, "grad_norm": 0.34274566173553467, "learning_rate": 5e-05, "loss": 1.4369, "step": 4515 }, { "epoch": 0.7226756281004961, "grad_norm": 0.3494221568107605, "learning_rate": 5e-05, "loss": 1.4405, "step": 4516 }, { "epoch": 0.7228356537045927, "grad_norm": 0.35945212841033936, "learning_rate": 5e-05, "loss": 1.5301, "step": 4517 }, { "epoch": 0.7229956793086894, "grad_norm": 0.3438040018081665, "learning_rate": 5e-05, "loss": 1.487, "step": 4518 }, { "epoch": 0.723155704912786, "grad_norm": 0.3333112597465515, "learning_rate": 5e-05, "loss": 1.4684, "step": 4519 }, { "epoch": 0.7233157305168827, "grad_norm": 0.3533536195755005, "learning_rate": 5e-05, "loss": 1.4517, "step": 4520 }, { "epoch": 0.7234757561209794, "grad_norm": 0.34993401169776917, "learning_rate": 5e-05, "loss": 1.4501, "step": 4521 }, { "epoch": 0.723635781725076, "grad_norm": 0.34458762407302856, "learning_rate": 5e-05, "loss": 1.4393, "step": 4522 }, { "epoch": 0.7237958073291727, "grad_norm": 0.3563380539417267, "learning_rate": 5e-05, "loss": 1.4407, "step": 4523 }, { "epoch": 0.7239558329332694, "grad_norm": 0.3440999388694763, "learning_rate": 5e-05, "loss": 1.4187, "step": 4524 }, { "epoch": 0.7241158585373659, "grad_norm": 0.3605183959007263, "learning_rate": 5e-05, "loss": 1.5889, "step": 4525 }, { "epoch": 0.7242758841414626, "grad_norm": 0.34247806668281555, "learning_rate": 5e-05, "loss": 1.3865, "step": 4526 }, { "epoch": 0.7244359097455593, "grad_norm": 0.36131128668785095, "learning_rate": 5e-05, "loss": 1.5545, "step": 4527 }, { "epoch": 0.7245959353496559, "grad_norm": 0.33983680605888367, "learning_rate": 5e-05, "loss": 1.4847, "step": 4528 }, { "epoch": 0.7247559609537526, "grad_norm": 0.3513578474521637, "learning_rate": 5e-05, "loss": 1.4875, "step": 4529 }, { "epoch": 0.7249159865578493, "grad_norm": 0.343534380197525, "learning_rate": 5e-05, "loss": 1.4703, "step": 4530 }, { "epoch": 0.725076012161946, "grad_norm": 0.3426852822303772, "learning_rate": 5e-05, "loss": 1.4669, "step": 4531 }, { "epoch": 0.7252360377660426, "grad_norm": 0.3484141528606415, "learning_rate": 5e-05, "loss": 1.4514, "step": 4532 }, { "epoch": 0.7253960633701392, "grad_norm": 0.33460596203804016, "learning_rate": 5e-05, "loss": 1.4242, "step": 4533 }, { "epoch": 0.7255560889742358, "grad_norm": 0.34113818407058716, "learning_rate": 5e-05, "loss": 1.4056, "step": 4534 }, { "epoch": 0.7257161145783325, "grad_norm": 0.34336966276168823, "learning_rate": 5e-05, "loss": 1.4439, "step": 4535 }, { "epoch": 0.7258761401824292, "grad_norm": 0.36099570989608765, "learning_rate": 5e-05, "loss": 1.4982, "step": 4536 }, { "epoch": 0.7260361657865259, "grad_norm": 0.3553713858127594, "learning_rate": 5e-05, "loss": 1.4819, "step": 4537 }, { "epoch": 0.7261961913906225, "grad_norm": 0.33968791365623474, "learning_rate": 5e-05, "loss": 1.4896, "step": 4538 }, { "epoch": 0.7263562169947192, "grad_norm": 0.3356446921825409, "learning_rate": 5e-05, "loss": 1.4498, "step": 4539 }, { "epoch": 0.7265162425988159, "grad_norm": 0.36330097913742065, "learning_rate": 5e-05, "loss": 1.5766, "step": 4540 }, { "epoch": 0.7266762682029124, "grad_norm": 0.338742733001709, "learning_rate": 5e-05, "loss": 1.4549, "step": 4541 }, { "epoch": 0.7268362938070091, "grad_norm": 0.35401633381843567, "learning_rate": 5e-05, "loss": 1.4854, "step": 4542 }, { "epoch": 0.7269963194111058, "grad_norm": 0.344906747341156, "learning_rate": 5e-05, "loss": 1.4748, "step": 4543 }, { "epoch": 0.7271563450152024, "grad_norm": 0.3448941111564636, "learning_rate": 5e-05, "loss": 1.4965, "step": 4544 }, { "epoch": 0.7273163706192991, "grad_norm": 0.335244745016098, "learning_rate": 5e-05, "loss": 1.49, "step": 4545 }, { "epoch": 0.7274763962233958, "grad_norm": 0.34545549750328064, "learning_rate": 5e-05, "loss": 1.4779, "step": 4546 }, { "epoch": 0.7276364218274924, "grad_norm": 0.34462112188339233, "learning_rate": 5e-05, "loss": 1.4347, "step": 4547 }, { "epoch": 0.727796447431589, "grad_norm": 0.3492300510406494, "learning_rate": 5e-05, "loss": 1.4517, "step": 4548 }, { "epoch": 0.7279564730356857, "grad_norm": 0.35046494007110596, "learning_rate": 5e-05, "loss": 1.523, "step": 4549 }, { "epoch": 0.7281164986397823, "grad_norm": 0.3426229655742645, "learning_rate": 5e-05, "loss": 1.4591, "step": 4550 }, { "epoch": 0.728276524243879, "grad_norm": 0.34759098291397095, "learning_rate": 5e-05, "loss": 1.4773, "step": 4551 }, { "epoch": 0.7284365498479757, "grad_norm": 0.3389778435230255, "learning_rate": 5e-05, "loss": 1.4453, "step": 4552 }, { "epoch": 0.7285965754520723, "grad_norm": 0.3753826320171356, "learning_rate": 5e-05, "loss": 1.5732, "step": 4553 }, { "epoch": 0.728756601056169, "grad_norm": 0.34142622351646423, "learning_rate": 5e-05, "loss": 1.4935, "step": 4554 }, { "epoch": 0.7289166266602657, "grad_norm": 0.3442113697528839, "learning_rate": 5e-05, "loss": 1.5129, "step": 4555 }, { "epoch": 0.7290766522643622, "grad_norm": 0.3423462510108948, "learning_rate": 5e-05, "loss": 1.5333, "step": 4556 }, { "epoch": 0.7292366778684589, "grad_norm": 0.35084325075149536, "learning_rate": 5e-05, "loss": 1.4512, "step": 4557 }, { "epoch": 0.7293967034725556, "grad_norm": 0.3442279100418091, "learning_rate": 5e-05, "loss": 1.4432, "step": 4558 }, { "epoch": 0.7295567290766523, "grad_norm": 0.3615057170391083, "learning_rate": 5e-05, "loss": 1.5566, "step": 4559 }, { "epoch": 0.7297167546807489, "grad_norm": 0.3510792553424835, "learning_rate": 5e-05, "loss": 1.472, "step": 4560 }, { "epoch": 0.7298767802848456, "grad_norm": 0.3463803231716156, "learning_rate": 5e-05, "loss": 1.4073, "step": 4561 }, { "epoch": 0.7300368058889423, "grad_norm": 0.3602293133735657, "learning_rate": 5e-05, "loss": 1.5242, "step": 4562 }, { "epoch": 0.7301968314930389, "grad_norm": 0.3471282124519348, "learning_rate": 5e-05, "loss": 1.5067, "step": 4563 }, { "epoch": 0.7303568570971355, "grad_norm": 0.3732004463672638, "learning_rate": 5e-05, "loss": 1.5289, "step": 4564 }, { "epoch": 0.7305168827012322, "grad_norm": 0.34675174951553345, "learning_rate": 5e-05, "loss": 1.5163, "step": 4565 }, { "epoch": 0.7306769083053288, "grad_norm": 0.3405534625053406, "learning_rate": 5e-05, "loss": 1.4708, "step": 4566 }, { "epoch": 0.7308369339094255, "grad_norm": 0.36156490445137024, "learning_rate": 5e-05, "loss": 1.4743, "step": 4567 }, { "epoch": 0.7309969595135222, "grad_norm": 0.34700220823287964, "learning_rate": 5e-05, "loss": 1.4407, "step": 4568 }, { "epoch": 0.7311569851176188, "grad_norm": 0.3378351628780365, "learning_rate": 5e-05, "loss": 1.4204, "step": 4569 }, { "epoch": 0.7313170107217155, "grad_norm": 0.3430124521255493, "learning_rate": 5e-05, "loss": 1.4281, "step": 4570 }, { "epoch": 0.7314770363258122, "grad_norm": 0.3502504527568817, "learning_rate": 5e-05, "loss": 1.5033, "step": 4571 }, { "epoch": 0.7316370619299087, "grad_norm": 0.3538869023323059, "learning_rate": 5e-05, "loss": 1.482, "step": 4572 }, { "epoch": 0.7317970875340054, "grad_norm": 0.3525681495666504, "learning_rate": 5e-05, "loss": 1.4288, "step": 4573 }, { "epoch": 0.7319571131381021, "grad_norm": 0.3426726162433624, "learning_rate": 5e-05, "loss": 1.4295, "step": 4574 }, { "epoch": 0.7321171387421987, "grad_norm": 0.34291163086891174, "learning_rate": 5e-05, "loss": 1.4233, "step": 4575 }, { "epoch": 0.7322771643462954, "grad_norm": 0.35508212447166443, "learning_rate": 5e-05, "loss": 1.4401, "step": 4576 }, { "epoch": 0.7324371899503921, "grad_norm": 0.35250383615493774, "learning_rate": 5e-05, "loss": 1.4519, "step": 4577 }, { "epoch": 0.7325972155544888, "grad_norm": 0.3481490910053253, "learning_rate": 5e-05, "loss": 1.474, "step": 4578 }, { "epoch": 0.7327572411585854, "grad_norm": 0.33923766016960144, "learning_rate": 5e-05, "loss": 1.4371, "step": 4579 }, { "epoch": 0.732917266762682, "grad_norm": 0.3474624454975128, "learning_rate": 5e-05, "loss": 1.4556, "step": 4580 }, { "epoch": 0.7330772923667787, "grad_norm": 0.3387955129146576, "learning_rate": 5e-05, "loss": 1.4099, "step": 4581 }, { "epoch": 0.7332373179708753, "grad_norm": 0.34826406836509705, "learning_rate": 5e-05, "loss": 1.4763, "step": 4582 }, { "epoch": 0.733397343574972, "grad_norm": 0.360495388507843, "learning_rate": 5e-05, "loss": 1.4768, "step": 4583 }, { "epoch": 0.7335573691790687, "grad_norm": 0.3415737748146057, "learning_rate": 5e-05, "loss": 1.4112, "step": 4584 }, { "epoch": 0.7337173947831653, "grad_norm": 0.3421548008918762, "learning_rate": 5e-05, "loss": 1.448, "step": 4585 }, { "epoch": 0.733877420387262, "grad_norm": 0.3426019549369812, "learning_rate": 5e-05, "loss": 1.476, "step": 4586 }, { "epoch": 0.7340374459913587, "grad_norm": 0.345024436712265, "learning_rate": 5e-05, "loss": 1.5227, "step": 4587 }, { "epoch": 0.7341974715954552, "grad_norm": 0.3477115333080292, "learning_rate": 5e-05, "loss": 1.5192, "step": 4588 }, { "epoch": 0.7343574971995519, "grad_norm": 0.3569091856479645, "learning_rate": 5e-05, "loss": 1.4705, "step": 4589 }, { "epoch": 0.7345175228036486, "grad_norm": 0.3471601605415344, "learning_rate": 5e-05, "loss": 1.4348, "step": 4590 }, { "epoch": 0.7346775484077452, "grad_norm": 0.36130306124687195, "learning_rate": 5e-05, "loss": 1.4755, "step": 4591 }, { "epoch": 0.7348375740118419, "grad_norm": 0.35288381576538086, "learning_rate": 5e-05, "loss": 1.5263, "step": 4592 }, { "epoch": 0.7349975996159386, "grad_norm": 0.3397410213947296, "learning_rate": 5e-05, "loss": 1.4256, "step": 4593 }, { "epoch": 0.7351576252200352, "grad_norm": 0.34592902660369873, "learning_rate": 5e-05, "loss": 1.4109, "step": 4594 }, { "epoch": 0.7353176508241318, "grad_norm": 0.36285898089408875, "learning_rate": 5e-05, "loss": 1.4999, "step": 4595 }, { "epoch": 0.7354776764282285, "grad_norm": 0.3430301547050476, "learning_rate": 5e-05, "loss": 1.4895, "step": 4596 }, { "epoch": 0.7356377020323251, "grad_norm": 0.36469021439552307, "learning_rate": 5e-05, "loss": 1.5324, "step": 4597 }, { "epoch": 0.7357977276364218, "grad_norm": 0.3628327250480652, "learning_rate": 5e-05, "loss": 1.5456, "step": 4598 }, { "epoch": 0.7359577532405185, "grad_norm": 0.3503914177417755, "learning_rate": 5e-05, "loss": 1.4342, "step": 4599 }, { "epoch": 0.7361177788446152, "grad_norm": 0.3680550754070282, "learning_rate": 5e-05, "loss": 1.5768, "step": 4600 }, { "epoch": 0.7362778044487118, "grad_norm": 0.36465954780578613, "learning_rate": 5e-05, "loss": 1.5667, "step": 4601 }, { "epoch": 0.7364378300528085, "grad_norm": 0.3372700810432434, "learning_rate": 5e-05, "loss": 1.4066, "step": 4602 }, { "epoch": 0.736597855656905, "grad_norm": 0.37998950481414795, "learning_rate": 5e-05, "loss": 1.5226, "step": 4603 }, { "epoch": 0.7367578812610017, "grad_norm": 0.3507591485977173, "learning_rate": 5e-05, "loss": 1.4793, "step": 4604 }, { "epoch": 0.7369179068650984, "grad_norm": 0.3419710397720337, "learning_rate": 5e-05, "loss": 1.4983, "step": 4605 }, { "epoch": 0.7370779324691951, "grad_norm": 0.3647831678390503, "learning_rate": 5e-05, "loss": 1.4686, "step": 4606 }, { "epoch": 0.7372379580732917, "grad_norm": 0.35562583804130554, "learning_rate": 5e-05, "loss": 1.4256, "step": 4607 }, { "epoch": 0.7373979836773884, "grad_norm": 0.35274869203567505, "learning_rate": 5e-05, "loss": 1.459, "step": 4608 }, { "epoch": 0.7375580092814851, "grad_norm": 0.3620453476905823, "learning_rate": 5e-05, "loss": 1.4937, "step": 4609 }, { "epoch": 0.7377180348855817, "grad_norm": 0.3399403989315033, "learning_rate": 5e-05, "loss": 1.3695, "step": 4610 }, { "epoch": 0.7378780604896783, "grad_norm": 0.35985445976257324, "learning_rate": 5e-05, "loss": 1.5128, "step": 4611 }, { "epoch": 0.738038086093775, "grad_norm": 0.3474292755126953, "learning_rate": 5e-05, "loss": 1.4534, "step": 4612 }, { "epoch": 0.7381981116978716, "grad_norm": 0.3448972702026367, "learning_rate": 5e-05, "loss": 1.5033, "step": 4613 }, { "epoch": 0.7383581373019683, "grad_norm": 0.3449365794658661, "learning_rate": 5e-05, "loss": 1.488, "step": 4614 }, { "epoch": 0.738518162906065, "grad_norm": 0.3589400351047516, "learning_rate": 5e-05, "loss": 1.5533, "step": 4615 }, { "epoch": 0.7386781885101616, "grad_norm": 0.35095614194869995, "learning_rate": 5e-05, "loss": 1.5352, "step": 4616 }, { "epoch": 0.7388382141142583, "grad_norm": 0.34978383779525757, "learning_rate": 5e-05, "loss": 1.4226, "step": 4617 }, { "epoch": 0.738998239718355, "grad_norm": 0.3560124933719635, "learning_rate": 5e-05, "loss": 1.5361, "step": 4618 }, { "epoch": 0.7391582653224515, "grad_norm": 0.3498724102973938, "learning_rate": 5e-05, "loss": 1.4924, "step": 4619 }, { "epoch": 0.7393182909265482, "grad_norm": 0.3591344356536865, "learning_rate": 5e-05, "loss": 1.5016, "step": 4620 }, { "epoch": 0.7394783165306449, "grad_norm": 0.3657580316066742, "learning_rate": 5e-05, "loss": 1.4872, "step": 4621 }, { "epoch": 0.7396383421347416, "grad_norm": 0.3508482277393341, "learning_rate": 5e-05, "loss": 1.4658, "step": 4622 }, { "epoch": 0.7397983677388382, "grad_norm": 0.38137876987457275, "learning_rate": 5e-05, "loss": 1.5229, "step": 4623 }, { "epoch": 0.7399583933429349, "grad_norm": 0.3531438410282135, "learning_rate": 5e-05, "loss": 1.4372, "step": 4624 }, { "epoch": 0.7401184189470316, "grad_norm": 0.34934425354003906, "learning_rate": 5e-05, "loss": 1.4429, "step": 4625 }, { "epoch": 0.7402784445511282, "grad_norm": 0.34964051842689514, "learning_rate": 5e-05, "loss": 1.565, "step": 4626 }, { "epoch": 0.7404384701552248, "grad_norm": 0.34123530983924866, "learning_rate": 5e-05, "loss": 1.4611, "step": 4627 }, { "epoch": 0.7405984957593215, "grad_norm": 0.34678369760513306, "learning_rate": 5e-05, "loss": 1.453, "step": 4628 }, { "epoch": 0.7407585213634181, "grad_norm": 0.3451153039932251, "learning_rate": 5e-05, "loss": 1.475, "step": 4629 }, { "epoch": 0.7409185469675148, "grad_norm": 0.35634633898735046, "learning_rate": 5e-05, "loss": 1.4837, "step": 4630 }, { "epoch": 0.7410785725716115, "grad_norm": 0.3438986539840698, "learning_rate": 5e-05, "loss": 1.4564, "step": 4631 }, { "epoch": 0.7412385981757081, "grad_norm": 0.33940157294273376, "learning_rate": 5e-05, "loss": 1.4195, "step": 4632 }, { "epoch": 0.7413986237798048, "grad_norm": 0.35219520330429077, "learning_rate": 5e-05, "loss": 1.5687, "step": 4633 }, { "epoch": 0.7415586493839014, "grad_norm": 0.354271799325943, "learning_rate": 5e-05, "loss": 1.4541, "step": 4634 }, { "epoch": 0.741718674987998, "grad_norm": 0.34580814838409424, "learning_rate": 5e-05, "loss": 1.4049, "step": 4635 }, { "epoch": 0.7418787005920947, "grad_norm": 0.3601360023021698, "learning_rate": 5e-05, "loss": 1.5278, "step": 4636 }, { "epoch": 0.7420387261961914, "grad_norm": 0.35281848907470703, "learning_rate": 5e-05, "loss": 1.5146, "step": 4637 }, { "epoch": 0.742198751800288, "grad_norm": 0.35405591130256653, "learning_rate": 5e-05, "loss": 1.4888, "step": 4638 }, { "epoch": 0.7423587774043847, "grad_norm": 0.3427213728427887, "learning_rate": 5e-05, "loss": 1.5113, "step": 4639 }, { "epoch": 0.7425188030084814, "grad_norm": 0.3503250777721405, "learning_rate": 5e-05, "loss": 1.4817, "step": 4640 }, { "epoch": 0.7426788286125781, "grad_norm": 0.35608112812042236, "learning_rate": 5e-05, "loss": 1.482, "step": 4641 }, { "epoch": 0.7428388542166746, "grad_norm": 0.335163414478302, "learning_rate": 5e-05, "loss": 1.3747, "step": 4642 }, { "epoch": 0.7429988798207713, "grad_norm": 0.362606018781662, "learning_rate": 5e-05, "loss": 1.516, "step": 4643 }, { "epoch": 0.743158905424868, "grad_norm": 0.3773439824581146, "learning_rate": 5e-05, "loss": 1.5318, "step": 4644 }, { "epoch": 0.7433189310289646, "grad_norm": 0.35726767778396606, "learning_rate": 5e-05, "loss": 1.5414, "step": 4645 }, { "epoch": 0.7434789566330613, "grad_norm": 0.33984652161598206, "learning_rate": 5e-05, "loss": 1.4396, "step": 4646 }, { "epoch": 0.743638982237158, "grad_norm": 0.3396017253398895, "learning_rate": 5e-05, "loss": 1.4767, "step": 4647 }, { "epoch": 0.7437990078412546, "grad_norm": 0.3461008667945862, "learning_rate": 5e-05, "loss": 1.4022, "step": 4648 }, { "epoch": 0.7439590334453513, "grad_norm": 0.33280596137046814, "learning_rate": 5e-05, "loss": 1.3861, "step": 4649 }, { "epoch": 0.7441190590494479, "grad_norm": 0.3333042562007904, "learning_rate": 5e-05, "loss": 1.4051, "step": 4650 }, { "epoch": 0.7442790846535445, "grad_norm": 0.34136101603507996, "learning_rate": 5e-05, "loss": 1.4878, "step": 4651 }, { "epoch": 0.7444391102576412, "grad_norm": 0.3503913879394531, "learning_rate": 5e-05, "loss": 1.4239, "step": 4652 }, { "epoch": 0.7445991358617379, "grad_norm": 0.3469517230987549, "learning_rate": 5e-05, "loss": 1.386, "step": 4653 }, { "epoch": 0.7447591614658345, "grad_norm": 0.34631749987602234, "learning_rate": 5e-05, "loss": 1.4192, "step": 4654 }, { "epoch": 0.7449191870699312, "grad_norm": 0.3595203161239624, "learning_rate": 5e-05, "loss": 1.4665, "step": 4655 }, { "epoch": 0.7450792126740279, "grad_norm": 0.3407025933265686, "learning_rate": 5e-05, "loss": 1.4201, "step": 4656 }, { "epoch": 0.7452392382781245, "grad_norm": 0.3543248772621155, "learning_rate": 5e-05, "loss": 1.4914, "step": 4657 }, { "epoch": 0.7453992638822211, "grad_norm": 0.3532631993293762, "learning_rate": 5e-05, "loss": 1.5072, "step": 4658 }, { "epoch": 0.7455592894863178, "grad_norm": 0.3629884719848633, "learning_rate": 5e-05, "loss": 1.497, "step": 4659 }, { "epoch": 0.7457193150904144, "grad_norm": 0.3513434827327728, "learning_rate": 5e-05, "loss": 1.434, "step": 4660 }, { "epoch": 0.7458793406945111, "grad_norm": 0.35539987683296204, "learning_rate": 5e-05, "loss": 1.4791, "step": 4661 }, { "epoch": 0.7460393662986078, "grad_norm": 0.35378843545913696, "learning_rate": 5e-05, "loss": 1.4409, "step": 4662 }, { "epoch": 0.7461993919027045, "grad_norm": 0.3623867630958557, "learning_rate": 5e-05, "loss": 1.5311, "step": 4663 }, { "epoch": 0.7463594175068011, "grad_norm": 0.3596417307853699, "learning_rate": 5e-05, "loss": 1.4703, "step": 4664 }, { "epoch": 0.7465194431108978, "grad_norm": 0.356116384267807, "learning_rate": 5e-05, "loss": 1.5233, "step": 4665 }, { "epoch": 0.7466794687149944, "grad_norm": 0.34627804160118103, "learning_rate": 5e-05, "loss": 1.4842, "step": 4666 }, { "epoch": 0.746839494319091, "grad_norm": 0.33941546082496643, "learning_rate": 5e-05, "loss": 1.4862, "step": 4667 }, { "epoch": 0.7469995199231877, "grad_norm": 0.34081920981407166, "learning_rate": 5e-05, "loss": 1.4786, "step": 4668 }, { "epoch": 0.7471595455272844, "grad_norm": 0.3493928611278534, "learning_rate": 5e-05, "loss": 1.5074, "step": 4669 }, { "epoch": 0.747319571131381, "grad_norm": 0.34056225419044495, "learning_rate": 5e-05, "loss": 1.4335, "step": 4670 }, { "epoch": 0.7474795967354777, "grad_norm": 0.33701807260513306, "learning_rate": 5e-05, "loss": 1.3771, "step": 4671 }, { "epoch": 0.7476396223395744, "grad_norm": 0.3470149338245392, "learning_rate": 5e-05, "loss": 1.4328, "step": 4672 }, { "epoch": 0.747799647943671, "grad_norm": 0.36061057448387146, "learning_rate": 5e-05, "loss": 1.4537, "step": 4673 }, { "epoch": 0.7479596735477676, "grad_norm": 0.34232762455940247, "learning_rate": 5e-05, "loss": 1.4562, "step": 4674 }, { "epoch": 0.7481196991518643, "grad_norm": 0.3435361683368683, "learning_rate": 5e-05, "loss": 1.4257, "step": 4675 }, { "epoch": 0.7482797247559609, "grad_norm": 0.34722864627838135, "learning_rate": 5e-05, "loss": 1.4766, "step": 4676 }, { "epoch": 0.7484397503600576, "grad_norm": 0.3446466326713562, "learning_rate": 5e-05, "loss": 1.4617, "step": 4677 }, { "epoch": 0.7485997759641543, "grad_norm": 0.3419427275657654, "learning_rate": 5e-05, "loss": 1.4695, "step": 4678 }, { "epoch": 0.748759801568251, "grad_norm": 0.3639848828315735, "learning_rate": 5e-05, "loss": 1.5054, "step": 4679 }, { "epoch": 0.7489198271723476, "grad_norm": 0.35432571172714233, "learning_rate": 5e-05, "loss": 1.4921, "step": 4680 }, { "epoch": 0.7490798527764442, "grad_norm": 0.3603597581386566, "learning_rate": 5e-05, "loss": 1.4723, "step": 4681 }, { "epoch": 0.7492398783805408, "grad_norm": 0.35083797574043274, "learning_rate": 5e-05, "loss": 1.4681, "step": 4682 }, { "epoch": 0.7493999039846375, "grad_norm": 0.3568074703216553, "learning_rate": 5e-05, "loss": 1.4801, "step": 4683 }, { "epoch": 0.7495599295887342, "grad_norm": 0.3529399335384369, "learning_rate": 5e-05, "loss": 1.4401, "step": 4684 }, { "epoch": 0.7497199551928309, "grad_norm": 0.3417294919490814, "learning_rate": 5e-05, "loss": 1.4286, "step": 4685 }, { "epoch": 0.7498799807969275, "grad_norm": 0.3403018116950989, "learning_rate": 5e-05, "loss": 1.4325, "step": 4686 }, { "epoch": 0.7500400064010242, "grad_norm": 0.36578163504600525, "learning_rate": 5e-05, "loss": 1.4885, "step": 4687 }, { "epoch": 0.7502000320051209, "grad_norm": 0.3514179587364197, "learning_rate": 5e-05, "loss": 1.3805, "step": 4688 }, { "epoch": 0.7503600576092174, "grad_norm": 0.35062700510025024, "learning_rate": 5e-05, "loss": 1.5111, "step": 4689 }, { "epoch": 0.7505200832133141, "grad_norm": 0.346769243478775, "learning_rate": 5e-05, "loss": 1.5119, "step": 4690 }, { "epoch": 0.7506801088174108, "grad_norm": 0.3361099362373352, "learning_rate": 5e-05, "loss": 1.4028, "step": 4691 }, { "epoch": 0.7508401344215074, "grad_norm": 0.3734530806541443, "learning_rate": 5e-05, "loss": 1.449, "step": 4692 }, { "epoch": 0.7510001600256041, "grad_norm": 0.36615633964538574, "learning_rate": 5e-05, "loss": 1.5039, "step": 4693 }, { "epoch": 0.7511601856297008, "grad_norm": 0.338742196559906, "learning_rate": 5e-05, "loss": 1.4341, "step": 4694 }, { "epoch": 0.7513202112337974, "grad_norm": 0.35203054547309875, "learning_rate": 5e-05, "loss": 1.4922, "step": 4695 }, { "epoch": 0.7514802368378941, "grad_norm": 0.37136310338974, "learning_rate": 5e-05, "loss": 1.5459, "step": 4696 }, { "epoch": 0.7516402624419907, "grad_norm": 0.3636487126350403, "learning_rate": 5e-05, "loss": 1.5172, "step": 4697 }, { "epoch": 0.7518002880460873, "grad_norm": 0.3497549593448639, "learning_rate": 5e-05, "loss": 1.5168, "step": 4698 }, { "epoch": 0.751960313650184, "grad_norm": 0.3604191541671753, "learning_rate": 5e-05, "loss": 1.4477, "step": 4699 }, { "epoch": 0.7521203392542807, "grad_norm": 0.3466748297214508, "learning_rate": 5e-05, "loss": 1.5134, "step": 4700 }, { "epoch": 0.7522803648583773, "grad_norm": 0.3397122621536255, "learning_rate": 5e-05, "loss": 1.4495, "step": 4701 }, { "epoch": 0.752440390462474, "grad_norm": 0.35069718956947327, "learning_rate": 5e-05, "loss": 1.4305, "step": 4702 }, { "epoch": 0.7526004160665707, "grad_norm": 0.35799792408943176, "learning_rate": 5e-05, "loss": 1.4232, "step": 4703 }, { "epoch": 0.7527604416706674, "grad_norm": 0.34622520208358765, "learning_rate": 5e-05, "loss": 1.4187, "step": 4704 }, { "epoch": 0.7529204672747639, "grad_norm": 0.3479657769203186, "learning_rate": 5e-05, "loss": 1.4154, "step": 4705 }, { "epoch": 0.7530804928788606, "grad_norm": 0.35141655802726746, "learning_rate": 5e-05, "loss": 1.4793, "step": 4706 }, { "epoch": 0.7532405184829573, "grad_norm": 0.34730958938598633, "learning_rate": 5e-05, "loss": 1.4619, "step": 4707 }, { "epoch": 0.7534005440870539, "grad_norm": 0.35531407594680786, "learning_rate": 5e-05, "loss": 1.4392, "step": 4708 }, { "epoch": 0.7535605696911506, "grad_norm": 0.34594935178756714, "learning_rate": 5e-05, "loss": 1.397, "step": 4709 }, { "epoch": 0.7537205952952473, "grad_norm": 0.3398132026195526, "learning_rate": 5e-05, "loss": 1.4055, "step": 4710 }, { "epoch": 0.7538806208993439, "grad_norm": 0.3470761179924011, "learning_rate": 5e-05, "loss": 1.443, "step": 4711 }, { "epoch": 0.7540406465034406, "grad_norm": 0.3534603416919708, "learning_rate": 5e-05, "loss": 1.4868, "step": 4712 }, { "epoch": 0.7542006721075372, "grad_norm": 0.350361168384552, "learning_rate": 5e-05, "loss": 1.4924, "step": 4713 }, { "epoch": 0.7543606977116338, "grad_norm": 0.3481939435005188, "learning_rate": 5e-05, "loss": 1.4364, "step": 4714 }, { "epoch": 0.7545207233157305, "grad_norm": 0.33504584431648254, "learning_rate": 5e-05, "loss": 1.3981, "step": 4715 }, { "epoch": 0.7546807489198272, "grad_norm": 0.3378201723098755, "learning_rate": 5e-05, "loss": 1.4399, "step": 4716 }, { "epoch": 0.7548407745239238, "grad_norm": 0.3539527952671051, "learning_rate": 5e-05, "loss": 1.4832, "step": 4717 }, { "epoch": 0.7550008001280205, "grad_norm": 0.33827394247055054, "learning_rate": 5e-05, "loss": 1.4506, "step": 4718 }, { "epoch": 0.7551608257321172, "grad_norm": 0.341960072517395, "learning_rate": 5e-05, "loss": 1.4857, "step": 4719 }, { "epoch": 0.7553208513362137, "grad_norm": 0.3527398109436035, "learning_rate": 5e-05, "loss": 1.4721, "step": 4720 }, { "epoch": 0.7554808769403104, "grad_norm": 0.34360817074775696, "learning_rate": 5e-05, "loss": 1.4871, "step": 4721 }, { "epoch": 0.7556409025444071, "grad_norm": 0.3502247631549835, "learning_rate": 5e-05, "loss": 1.5393, "step": 4722 }, { "epoch": 0.7558009281485037, "grad_norm": 0.342166930437088, "learning_rate": 5e-05, "loss": 1.4924, "step": 4723 }, { "epoch": 0.7559609537526004, "grad_norm": 0.3511759340763092, "learning_rate": 5e-05, "loss": 1.5441, "step": 4724 }, { "epoch": 0.7561209793566971, "grad_norm": 0.35805705189704895, "learning_rate": 5e-05, "loss": 1.4464, "step": 4725 }, { "epoch": 0.7562810049607938, "grad_norm": 0.355903685092926, "learning_rate": 5e-05, "loss": 1.506, "step": 4726 }, { "epoch": 0.7564410305648904, "grad_norm": 0.3550124168395996, "learning_rate": 5e-05, "loss": 1.4906, "step": 4727 }, { "epoch": 0.756601056168987, "grad_norm": 0.3376818299293518, "learning_rate": 5e-05, "loss": 1.447, "step": 4728 }, { "epoch": 0.7567610817730837, "grad_norm": 0.34520280361175537, "learning_rate": 5e-05, "loss": 1.4528, "step": 4729 }, { "epoch": 0.7569211073771803, "grad_norm": 0.3435426652431488, "learning_rate": 5e-05, "loss": 1.4609, "step": 4730 }, { "epoch": 0.757081132981277, "grad_norm": 0.34733712673187256, "learning_rate": 5e-05, "loss": 1.4945, "step": 4731 }, { "epoch": 0.7572411585853737, "grad_norm": 0.35793739557266235, "learning_rate": 5e-05, "loss": 1.521, "step": 4732 }, { "epoch": 0.7574011841894703, "grad_norm": 0.35399124026298523, "learning_rate": 5e-05, "loss": 1.5263, "step": 4733 }, { "epoch": 0.757561209793567, "grad_norm": 0.3439674973487854, "learning_rate": 5e-05, "loss": 1.4743, "step": 4734 }, { "epoch": 0.7577212353976637, "grad_norm": 0.3539555072784424, "learning_rate": 5e-05, "loss": 1.4329, "step": 4735 }, { "epoch": 0.7578812610017602, "grad_norm": 0.3537921905517578, "learning_rate": 5e-05, "loss": 1.4713, "step": 4736 }, { "epoch": 0.7580412866058569, "grad_norm": 0.3537289500236511, "learning_rate": 5e-05, "loss": 1.5024, "step": 4737 }, { "epoch": 0.7582013122099536, "grad_norm": 0.3450542390346527, "learning_rate": 5e-05, "loss": 1.4953, "step": 4738 }, { "epoch": 0.7583613378140502, "grad_norm": 0.34086188673973083, "learning_rate": 5e-05, "loss": 1.4591, "step": 4739 }, { "epoch": 0.7585213634181469, "grad_norm": 0.33997654914855957, "learning_rate": 5e-05, "loss": 1.4608, "step": 4740 }, { "epoch": 0.7586813890222436, "grad_norm": 0.344372421503067, "learning_rate": 5e-05, "loss": 1.5027, "step": 4741 }, { "epoch": 0.7588414146263402, "grad_norm": 0.3519041836261749, "learning_rate": 5e-05, "loss": 1.467, "step": 4742 }, { "epoch": 0.7590014402304369, "grad_norm": 0.3448658287525177, "learning_rate": 5e-05, "loss": 1.428, "step": 4743 }, { "epoch": 0.7591614658345335, "grad_norm": 0.35573673248291016, "learning_rate": 5e-05, "loss": 1.5605, "step": 4744 }, { "epoch": 0.7593214914386301, "grad_norm": 0.3421025276184082, "learning_rate": 5e-05, "loss": 1.3995, "step": 4745 }, { "epoch": 0.7594815170427268, "grad_norm": 0.3642263412475586, "learning_rate": 5e-05, "loss": 1.5785, "step": 4746 }, { "epoch": 0.7596415426468235, "grad_norm": 0.34769749641418457, "learning_rate": 5e-05, "loss": 1.4283, "step": 4747 }, { "epoch": 0.7598015682509202, "grad_norm": 0.3425593972206116, "learning_rate": 5e-05, "loss": 1.381, "step": 4748 }, { "epoch": 0.7599615938550168, "grad_norm": 0.3503926992416382, "learning_rate": 5e-05, "loss": 1.4632, "step": 4749 }, { "epoch": 0.7601216194591135, "grad_norm": 0.3506636619567871, "learning_rate": 5e-05, "loss": 1.4374, "step": 4750 }, { "epoch": 0.7602816450632102, "grad_norm": 0.37090787291526794, "learning_rate": 5e-05, "loss": 1.4839, "step": 4751 }, { "epoch": 0.7604416706673067, "grad_norm": 0.33992716670036316, "learning_rate": 5e-05, "loss": 1.3925, "step": 4752 }, { "epoch": 0.7606016962714034, "grad_norm": 0.3562690317630768, "learning_rate": 5e-05, "loss": 1.4313, "step": 4753 }, { "epoch": 0.7607617218755001, "grad_norm": 0.35379013419151306, "learning_rate": 5e-05, "loss": 1.4583, "step": 4754 }, { "epoch": 0.7609217474795967, "grad_norm": 0.3507809042930603, "learning_rate": 5e-05, "loss": 1.4574, "step": 4755 }, { "epoch": 0.7610817730836934, "grad_norm": 0.3514275550842285, "learning_rate": 5e-05, "loss": 1.3455, "step": 4756 }, { "epoch": 0.7612417986877901, "grad_norm": 0.35596704483032227, "learning_rate": 5e-05, "loss": 1.4436, "step": 4757 }, { "epoch": 0.7614018242918867, "grad_norm": 0.3610241115093231, "learning_rate": 5e-05, "loss": 1.4289, "step": 4758 }, { "epoch": 0.7615618498959834, "grad_norm": 0.3396405875682831, "learning_rate": 5e-05, "loss": 1.4407, "step": 4759 }, { "epoch": 0.76172187550008, "grad_norm": 0.3520253896713257, "learning_rate": 5e-05, "loss": 1.4305, "step": 4760 }, { "epoch": 0.7618819011041766, "grad_norm": 0.34025341272354126, "learning_rate": 5e-05, "loss": 1.4422, "step": 4761 }, { "epoch": 0.7620419267082733, "grad_norm": 0.37708184123039246, "learning_rate": 5e-05, "loss": 1.5391, "step": 4762 }, { "epoch": 0.76220195231237, "grad_norm": 0.3440622389316559, "learning_rate": 5e-05, "loss": 1.4182, "step": 4763 }, { "epoch": 0.7623619779164666, "grad_norm": 0.3609725534915924, "learning_rate": 5e-05, "loss": 1.4245, "step": 4764 }, { "epoch": 0.7625220035205633, "grad_norm": 0.36972570419311523, "learning_rate": 5e-05, "loss": 1.5371, "step": 4765 }, { "epoch": 0.76268202912466, "grad_norm": 0.3583773076534271, "learning_rate": 5e-05, "loss": 1.4836, "step": 4766 }, { "epoch": 0.7628420547287565, "grad_norm": 0.3668471574783325, "learning_rate": 5e-05, "loss": 1.4621, "step": 4767 }, { "epoch": 0.7630020803328532, "grad_norm": 0.3493598699569702, "learning_rate": 5e-05, "loss": 1.4679, "step": 4768 }, { "epoch": 0.7631621059369499, "grad_norm": 0.3632495701313019, "learning_rate": 5e-05, "loss": 1.4874, "step": 4769 }, { "epoch": 0.7633221315410466, "grad_norm": 0.3498222231864929, "learning_rate": 5e-05, "loss": 1.5259, "step": 4770 }, { "epoch": 0.7634821571451432, "grad_norm": 0.3439981937408447, "learning_rate": 5e-05, "loss": 1.467, "step": 4771 }, { "epoch": 0.7636421827492399, "grad_norm": 0.336662232875824, "learning_rate": 5e-05, "loss": 1.4336, "step": 4772 }, { "epoch": 0.7638022083533366, "grad_norm": 0.3546782433986664, "learning_rate": 5e-05, "loss": 1.4855, "step": 4773 }, { "epoch": 0.7639622339574332, "grad_norm": 0.3564954400062561, "learning_rate": 5e-05, "loss": 1.4509, "step": 4774 }, { "epoch": 0.7641222595615298, "grad_norm": 0.3571676015853882, "learning_rate": 5e-05, "loss": 1.5235, "step": 4775 }, { "epoch": 0.7642822851656265, "grad_norm": 0.36210179328918457, "learning_rate": 5e-05, "loss": 1.5395, "step": 4776 }, { "epoch": 0.7644423107697231, "grad_norm": 0.3424374461174011, "learning_rate": 5e-05, "loss": 1.4629, "step": 4777 }, { "epoch": 0.7646023363738198, "grad_norm": 0.34878790378570557, "learning_rate": 5e-05, "loss": 1.4813, "step": 4778 }, { "epoch": 0.7647623619779165, "grad_norm": 0.3610369861125946, "learning_rate": 5e-05, "loss": 1.5123, "step": 4779 }, { "epoch": 0.7649223875820131, "grad_norm": 0.3509214520454407, "learning_rate": 5e-05, "loss": 1.4618, "step": 4780 }, { "epoch": 0.7650824131861098, "grad_norm": 0.35033082962036133, "learning_rate": 5e-05, "loss": 1.5222, "step": 4781 }, { "epoch": 0.7652424387902065, "grad_norm": 0.36265966296195984, "learning_rate": 5e-05, "loss": 1.5261, "step": 4782 }, { "epoch": 0.765402464394303, "grad_norm": 0.35773101449012756, "learning_rate": 5e-05, "loss": 1.5281, "step": 4783 }, { "epoch": 0.7655624899983997, "grad_norm": 0.34297850728034973, "learning_rate": 5e-05, "loss": 1.4228, "step": 4784 }, { "epoch": 0.7657225156024964, "grad_norm": 0.3331652581691742, "learning_rate": 5e-05, "loss": 1.4644, "step": 4785 }, { "epoch": 0.765882541206593, "grad_norm": 0.36343443393707275, "learning_rate": 5e-05, "loss": 1.5135, "step": 4786 }, { "epoch": 0.7660425668106897, "grad_norm": 0.3405134379863739, "learning_rate": 5e-05, "loss": 1.4436, "step": 4787 }, { "epoch": 0.7662025924147864, "grad_norm": 0.34237611293792725, "learning_rate": 5e-05, "loss": 1.4003, "step": 4788 }, { "epoch": 0.7663626180188831, "grad_norm": 0.3568064868450165, "learning_rate": 5e-05, "loss": 1.5261, "step": 4789 }, { "epoch": 0.7665226436229797, "grad_norm": 0.3638767898082733, "learning_rate": 5e-05, "loss": 1.5989, "step": 4790 }, { "epoch": 0.7666826692270763, "grad_norm": 0.34481629729270935, "learning_rate": 5e-05, "loss": 1.4089, "step": 4791 }, { "epoch": 0.766842694831173, "grad_norm": 0.3432199954986572, "learning_rate": 5e-05, "loss": 1.4854, "step": 4792 }, { "epoch": 0.7670027204352696, "grad_norm": 0.3412010669708252, "learning_rate": 5e-05, "loss": 1.4144, "step": 4793 }, { "epoch": 0.7671627460393663, "grad_norm": 0.34886568784713745, "learning_rate": 5e-05, "loss": 1.4455, "step": 4794 }, { "epoch": 0.767322771643463, "grad_norm": 0.36048445105552673, "learning_rate": 5e-05, "loss": 1.3875, "step": 4795 }, { "epoch": 0.7674827972475596, "grad_norm": 0.34030675888061523, "learning_rate": 5e-05, "loss": 1.5118, "step": 4796 }, { "epoch": 0.7676428228516563, "grad_norm": 0.34563082456588745, "learning_rate": 5e-05, "loss": 1.4209, "step": 4797 }, { "epoch": 0.767802848455753, "grad_norm": 0.35036543011665344, "learning_rate": 5e-05, "loss": 1.4195, "step": 4798 }, { "epoch": 0.7679628740598495, "grad_norm": 0.35970187187194824, "learning_rate": 5e-05, "loss": 1.4745, "step": 4799 }, { "epoch": 0.7681228996639462, "grad_norm": 0.34576383233070374, "learning_rate": 5e-05, "loss": 1.4432, "step": 4800 }, { "epoch": 0.7682829252680429, "grad_norm": 0.35840874910354614, "learning_rate": 5e-05, "loss": 1.4611, "step": 4801 }, { "epoch": 0.7684429508721395, "grad_norm": 0.3493810296058655, "learning_rate": 5e-05, "loss": 1.4447, "step": 4802 }, { "epoch": 0.7686029764762362, "grad_norm": 0.35400405526161194, "learning_rate": 5e-05, "loss": 1.5136, "step": 4803 }, { "epoch": 0.7687630020803329, "grad_norm": 0.33463096618652344, "learning_rate": 5e-05, "loss": 1.4327, "step": 4804 }, { "epoch": 0.7689230276844295, "grad_norm": 0.34775105118751526, "learning_rate": 5e-05, "loss": 1.4615, "step": 4805 }, { "epoch": 0.7690830532885262, "grad_norm": 0.3578675091266632, "learning_rate": 5e-05, "loss": 1.4692, "step": 4806 }, { "epoch": 0.7692430788926228, "grad_norm": 0.35560593008995056, "learning_rate": 5e-05, "loss": 1.3932, "step": 4807 }, { "epoch": 0.7694031044967194, "grad_norm": 0.3653237819671631, "learning_rate": 5e-05, "loss": 1.3691, "step": 4808 }, { "epoch": 0.7695631301008161, "grad_norm": 0.35532987117767334, "learning_rate": 5e-05, "loss": 1.4351, "step": 4809 }, { "epoch": 0.7697231557049128, "grad_norm": 0.35550862550735474, "learning_rate": 5e-05, "loss": 1.5434, "step": 4810 }, { "epoch": 0.7698831813090095, "grad_norm": 0.36836516857147217, "learning_rate": 5e-05, "loss": 1.4179, "step": 4811 }, { "epoch": 0.7700432069131061, "grad_norm": 0.3579255938529968, "learning_rate": 5e-05, "loss": 1.4757, "step": 4812 }, { "epoch": 0.7702032325172028, "grad_norm": 0.34142187237739563, "learning_rate": 5e-05, "loss": 1.4258, "step": 4813 }, { "epoch": 0.7703632581212994, "grad_norm": 0.34790629148483276, "learning_rate": 5e-05, "loss": 1.4523, "step": 4814 }, { "epoch": 0.770523283725396, "grad_norm": 0.35127633810043335, "learning_rate": 5e-05, "loss": 1.4877, "step": 4815 }, { "epoch": 0.7706833093294927, "grad_norm": 0.35193684697151184, "learning_rate": 5e-05, "loss": 1.4392, "step": 4816 }, { "epoch": 0.7708433349335894, "grad_norm": 0.33887672424316406, "learning_rate": 5e-05, "loss": 1.397, "step": 4817 }, { "epoch": 0.771003360537686, "grad_norm": 0.3426749110221863, "learning_rate": 5e-05, "loss": 1.466, "step": 4818 }, { "epoch": 0.7711633861417827, "grad_norm": 0.34764364361763, "learning_rate": 5e-05, "loss": 1.4418, "step": 4819 }, { "epoch": 0.7713234117458794, "grad_norm": 0.33981600403785706, "learning_rate": 5e-05, "loss": 1.412, "step": 4820 }, { "epoch": 0.771483437349976, "grad_norm": 0.3554960787296295, "learning_rate": 5e-05, "loss": 1.4941, "step": 4821 }, { "epoch": 0.7716434629540726, "grad_norm": 0.35116279125213623, "learning_rate": 5e-05, "loss": 1.4542, "step": 4822 }, { "epoch": 0.7718034885581693, "grad_norm": 0.3440268337726593, "learning_rate": 5e-05, "loss": 1.414, "step": 4823 }, { "epoch": 0.7719635141622659, "grad_norm": 0.3489988446235657, "learning_rate": 5e-05, "loss": 1.4311, "step": 4824 }, { "epoch": 0.7721235397663626, "grad_norm": 0.3439866602420807, "learning_rate": 5e-05, "loss": 1.4908, "step": 4825 }, { "epoch": 0.7722835653704593, "grad_norm": 0.342864453792572, "learning_rate": 5e-05, "loss": 1.5032, "step": 4826 }, { "epoch": 0.772443590974556, "grad_norm": 0.33803194761276245, "learning_rate": 5e-05, "loss": 1.418, "step": 4827 }, { "epoch": 0.7726036165786526, "grad_norm": 0.354448527097702, "learning_rate": 5e-05, "loss": 1.5116, "step": 4828 }, { "epoch": 0.7727636421827493, "grad_norm": 0.3494068384170532, "learning_rate": 5e-05, "loss": 1.4631, "step": 4829 }, { "epoch": 0.7729236677868458, "grad_norm": 0.3336520791053772, "learning_rate": 5e-05, "loss": 1.4069, "step": 4830 }, { "epoch": 0.7730836933909425, "grad_norm": 0.35881179571151733, "learning_rate": 5e-05, "loss": 1.4471, "step": 4831 }, { "epoch": 0.7732437189950392, "grad_norm": 0.3520847260951996, "learning_rate": 5e-05, "loss": 1.4553, "step": 4832 }, { "epoch": 0.7734037445991359, "grad_norm": 0.3536766469478607, "learning_rate": 5e-05, "loss": 1.472, "step": 4833 }, { "epoch": 0.7735637702032325, "grad_norm": 0.36149099469184875, "learning_rate": 5e-05, "loss": 1.5391, "step": 4834 }, { "epoch": 0.7737237958073292, "grad_norm": 0.3546740412712097, "learning_rate": 5e-05, "loss": 1.4829, "step": 4835 }, { "epoch": 0.7738838214114259, "grad_norm": 0.3447275757789612, "learning_rate": 5e-05, "loss": 1.3943, "step": 4836 }, { "epoch": 0.7740438470155225, "grad_norm": 0.33974239230155945, "learning_rate": 5e-05, "loss": 1.4747, "step": 4837 }, { "epoch": 0.7742038726196191, "grad_norm": 0.34427398443222046, "learning_rate": 5e-05, "loss": 1.4914, "step": 4838 }, { "epoch": 0.7743638982237158, "grad_norm": 0.35533690452575684, "learning_rate": 5e-05, "loss": 1.4541, "step": 4839 }, { "epoch": 0.7745239238278124, "grad_norm": 0.3471807539463043, "learning_rate": 5e-05, "loss": 1.4537, "step": 4840 }, { "epoch": 0.7746839494319091, "grad_norm": 0.34339961409568787, "learning_rate": 5e-05, "loss": 1.4742, "step": 4841 }, { "epoch": 0.7748439750360058, "grad_norm": 0.35110482573509216, "learning_rate": 5e-05, "loss": 1.4396, "step": 4842 }, { "epoch": 0.7750040006401024, "grad_norm": 0.349992036819458, "learning_rate": 5e-05, "loss": 1.4707, "step": 4843 }, { "epoch": 0.7751640262441991, "grad_norm": 0.35007229447364807, "learning_rate": 5e-05, "loss": 1.4398, "step": 4844 }, { "epoch": 0.7753240518482958, "grad_norm": 0.36266979575157166, "learning_rate": 5e-05, "loss": 1.5355, "step": 4845 }, { "epoch": 0.7754840774523923, "grad_norm": 0.3560570776462555, "learning_rate": 5e-05, "loss": 1.4811, "step": 4846 }, { "epoch": 0.775644103056489, "grad_norm": 0.34620770812034607, "learning_rate": 5e-05, "loss": 1.4406, "step": 4847 }, { "epoch": 0.7758041286605857, "grad_norm": 0.3478875756263733, "learning_rate": 5e-05, "loss": 1.5203, "step": 4848 }, { "epoch": 0.7759641542646823, "grad_norm": 0.3473489582538605, "learning_rate": 5e-05, "loss": 1.4976, "step": 4849 }, { "epoch": 0.776124179868779, "grad_norm": 0.35111743211746216, "learning_rate": 5e-05, "loss": 1.44, "step": 4850 }, { "epoch": 0.7762842054728757, "grad_norm": 0.35598182678222656, "learning_rate": 5e-05, "loss": 1.489, "step": 4851 }, { "epoch": 0.7764442310769724, "grad_norm": 0.35020875930786133, "learning_rate": 5e-05, "loss": 1.3876, "step": 4852 }, { "epoch": 0.7766042566810689, "grad_norm": 0.33782893419265747, "learning_rate": 5e-05, "loss": 1.4559, "step": 4853 }, { "epoch": 0.7767642822851656, "grad_norm": 0.34202712774276733, "learning_rate": 5e-05, "loss": 1.3885, "step": 4854 }, { "epoch": 0.7769243078892623, "grad_norm": 0.34491246938705444, "learning_rate": 5e-05, "loss": 1.4613, "step": 4855 }, { "epoch": 0.7770843334933589, "grad_norm": 0.345065712928772, "learning_rate": 5e-05, "loss": 1.4205, "step": 4856 }, { "epoch": 0.7772443590974556, "grad_norm": 0.34555935859680176, "learning_rate": 5e-05, "loss": 1.502, "step": 4857 }, { "epoch": 0.7774043847015523, "grad_norm": 0.33626511693000793, "learning_rate": 5e-05, "loss": 1.3538, "step": 4858 }, { "epoch": 0.7775644103056489, "grad_norm": 0.3499261736869812, "learning_rate": 5e-05, "loss": 1.4401, "step": 4859 }, { "epoch": 0.7777244359097456, "grad_norm": 0.3622293472290039, "learning_rate": 5e-05, "loss": 1.4363, "step": 4860 }, { "epoch": 0.7778844615138422, "grad_norm": 0.3697584867477417, "learning_rate": 5e-05, "loss": 1.5248, "step": 4861 }, { "epoch": 0.7780444871179388, "grad_norm": 0.3669179677963257, "learning_rate": 5e-05, "loss": 1.4982, "step": 4862 }, { "epoch": 0.7782045127220355, "grad_norm": 0.3650381565093994, "learning_rate": 5e-05, "loss": 1.5664, "step": 4863 }, { "epoch": 0.7783645383261322, "grad_norm": 0.3495618999004364, "learning_rate": 5e-05, "loss": 1.4739, "step": 4864 }, { "epoch": 0.7785245639302288, "grad_norm": 0.35322535037994385, "learning_rate": 5e-05, "loss": 1.5343, "step": 4865 }, { "epoch": 0.7786845895343255, "grad_norm": 0.37399107217788696, "learning_rate": 5e-05, "loss": 1.4768, "step": 4866 }, { "epoch": 0.7788446151384222, "grad_norm": 0.34369751811027527, "learning_rate": 5e-05, "loss": 1.5155, "step": 4867 }, { "epoch": 0.7790046407425189, "grad_norm": 0.337702214717865, "learning_rate": 5e-05, "loss": 1.4786, "step": 4868 }, { "epoch": 0.7791646663466154, "grad_norm": 0.36842599511146545, "learning_rate": 5e-05, "loss": 1.4947, "step": 4869 }, { "epoch": 0.7793246919507121, "grad_norm": 0.3590579628944397, "learning_rate": 5e-05, "loss": 1.4603, "step": 4870 }, { "epoch": 0.7794847175548087, "grad_norm": 0.3635481595993042, "learning_rate": 5e-05, "loss": 1.5387, "step": 4871 }, { "epoch": 0.7796447431589054, "grad_norm": 0.37072035670280457, "learning_rate": 5e-05, "loss": 1.5211, "step": 4872 }, { "epoch": 0.7798047687630021, "grad_norm": 0.34396669268608093, "learning_rate": 5e-05, "loss": 1.3963, "step": 4873 }, { "epoch": 0.7799647943670988, "grad_norm": 0.35032138228416443, "learning_rate": 5e-05, "loss": 1.4606, "step": 4874 }, { "epoch": 0.7801248199711954, "grad_norm": 0.35014432668685913, "learning_rate": 5e-05, "loss": 1.4383, "step": 4875 }, { "epoch": 0.7802848455752921, "grad_norm": 0.35493263602256775, "learning_rate": 5e-05, "loss": 1.456, "step": 4876 }, { "epoch": 0.7804448711793887, "grad_norm": 0.3582402467727661, "learning_rate": 5e-05, "loss": 1.5034, "step": 4877 }, { "epoch": 0.7806048967834853, "grad_norm": 0.3485454022884369, "learning_rate": 5e-05, "loss": 1.4573, "step": 4878 }, { "epoch": 0.780764922387582, "grad_norm": 0.3446756899356842, "learning_rate": 5e-05, "loss": 1.4444, "step": 4879 }, { "epoch": 0.7809249479916787, "grad_norm": 0.35781797766685486, "learning_rate": 5e-05, "loss": 1.5012, "step": 4880 }, { "epoch": 0.7810849735957753, "grad_norm": 0.3474263846874237, "learning_rate": 5e-05, "loss": 1.4906, "step": 4881 }, { "epoch": 0.781244999199872, "grad_norm": 0.3442699909210205, "learning_rate": 5e-05, "loss": 1.4395, "step": 4882 }, { "epoch": 0.7814050248039687, "grad_norm": 0.34434404969215393, "learning_rate": 5e-05, "loss": 1.4208, "step": 4883 }, { "epoch": 0.7815650504080653, "grad_norm": 0.3627853989601135, "learning_rate": 5e-05, "loss": 1.5041, "step": 4884 }, { "epoch": 0.7817250760121619, "grad_norm": 0.35127517580986023, "learning_rate": 5e-05, "loss": 1.4901, "step": 4885 }, { "epoch": 0.7818851016162586, "grad_norm": 0.33401212096214294, "learning_rate": 5e-05, "loss": 1.4127, "step": 4886 }, { "epoch": 0.7820451272203552, "grad_norm": 0.3462105989456177, "learning_rate": 5e-05, "loss": 1.4516, "step": 4887 }, { "epoch": 0.7822051528244519, "grad_norm": 0.35486021637916565, "learning_rate": 5e-05, "loss": 1.4483, "step": 4888 }, { "epoch": 0.7823651784285486, "grad_norm": 0.37827786803245544, "learning_rate": 5e-05, "loss": 1.5737, "step": 4889 }, { "epoch": 0.7825252040326452, "grad_norm": 0.3378944396972656, "learning_rate": 5e-05, "loss": 1.4602, "step": 4890 }, { "epoch": 0.7826852296367419, "grad_norm": 0.34119755029678345, "learning_rate": 5e-05, "loss": 1.469, "step": 4891 }, { "epoch": 0.7828452552408386, "grad_norm": 0.3467644453048706, "learning_rate": 5e-05, "loss": 1.4544, "step": 4892 }, { "epoch": 0.7830052808449351, "grad_norm": 0.3597489893436432, "learning_rate": 5e-05, "loss": 1.4717, "step": 4893 }, { "epoch": 0.7831653064490318, "grad_norm": 0.3523075580596924, "learning_rate": 5e-05, "loss": 1.4552, "step": 4894 }, { "epoch": 0.7833253320531285, "grad_norm": 0.3560878336429596, "learning_rate": 5e-05, "loss": 1.4604, "step": 4895 }, { "epoch": 0.7834853576572252, "grad_norm": 0.33417147397994995, "learning_rate": 5e-05, "loss": 1.4154, "step": 4896 }, { "epoch": 0.7836453832613218, "grad_norm": 0.3677634000778198, "learning_rate": 5e-05, "loss": 1.4959, "step": 4897 }, { "epoch": 0.7838054088654185, "grad_norm": 0.3346513509750366, "learning_rate": 5e-05, "loss": 1.442, "step": 4898 }, { "epoch": 0.7839654344695152, "grad_norm": 0.3507404029369354, "learning_rate": 5e-05, "loss": 1.5037, "step": 4899 }, { "epoch": 0.7841254600736117, "grad_norm": 0.36480605602264404, "learning_rate": 5e-05, "loss": 1.5131, "step": 4900 }, { "epoch": 0.7842854856777084, "grad_norm": 0.3585486114025116, "learning_rate": 5e-05, "loss": 1.4757, "step": 4901 }, { "epoch": 0.7844455112818051, "grad_norm": 0.353651225566864, "learning_rate": 5e-05, "loss": 1.3857, "step": 4902 }, { "epoch": 0.7846055368859017, "grad_norm": 0.3692576289176941, "learning_rate": 5e-05, "loss": 1.4285, "step": 4903 }, { "epoch": 0.7847655624899984, "grad_norm": 0.35537591576576233, "learning_rate": 5e-05, "loss": 1.475, "step": 4904 }, { "epoch": 0.7849255880940951, "grad_norm": 0.3613208830356598, "learning_rate": 5e-05, "loss": 1.4752, "step": 4905 }, { "epoch": 0.7850856136981917, "grad_norm": 0.33955124020576477, "learning_rate": 5e-05, "loss": 1.4282, "step": 4906 }, { "epoch": 0.7852456393022884, "grad_norm": 0.3719375729560852, "learning_rate": 5e-05, "loss": 1.521, "step": 4907 }, { "epoch": 0.785405664906385, "grad_norm": 0.3531760573387146, "learning_rate": 5e-05, "loss": 1.4841, "step": 4908 }, { "epoch": 0.7855656905104816, "grad_norm": 0.3601064085960388, "learning_rate": 5e-05, "loss": 1.4839, "step": 4909 }, { "epoch": 0.7857257161145783, "grad_norm": 0.35731059312820435, "learning_rate": 5e-05, "loss": 1.5147, "step": 4910 }, { "epoch": 0.785885741718675, "grad_norm": 0.3786014914512634, "learning_rate": 5e-05, "loss": 1.5372, "step": 4911 }, { "epoch": 0.7860457673227716, "grad_norm": 0.3466916084289551, "learning_rate": 5e-05, "loss": 1.3977, "step": 4912 }, { "epoch": 0.7862057929268683, "grad_norm": 0.3543183207511902, "learning_rate": 5e-05, "loss": 1.4287, "step": 4913 }, { "epoch": 0.786365818530965, "grad_norm": 0.37046146392822266, "learning_rate": 5e-05, "loss": 1.4699, "step": 4914 }, { "epoch": 0.7865258441350617, "grad_norm": 0.3643021583557129, "learning_rate": 5e-05, "loss": 1.4136, "step": 4915 }, { "epoch": 0.7866858697391582, "grad_norm": 0.3609178960323334, "learning_rate": 5e-05, "loss": 1.4612, "step": 4916 }, { "epoch": 0.7868458953432549, "grad_norm": 0.3557557761669159, "learning_rate": 5e-05, "loss": 1.4354, "step": 4917 }, { "epoch": 0.7870059209473516, "grad_norm": 0.35514530539512634, "learning_rate": 5e-05, "loss": 1.4377, "step": 4918 }, { "epoch": 0.7871659465514482, "grad_norm": 0.3487628102302551, "learning_rate": 5e-05, "loss": 1.464, "step": 4919 }, { "epoch": 0.7873259721555449, "grad_norm": 0.34588366746902466, "learning_rate": 5e-05, "loss": 1.376, "step": 4920 }, { "epoch": 0.7874859977596416, "grad_norm": 0.3640071153640747, "learning_rate": 5e-05, "loss": 1.4803, "step": 4921 }, { "epoch": 0.7876460233637382, "grad_norm": 0.35495877265930176, "learning_rate": 5e-05, "loss": 1.4053, "step": 4922 }, { "epoch": 0.7878060489678349, "grad_norm": 0.3602435886859894, "learning_rate": 5e-05, "loss": 1.4626, "step": 4923 }, { "epoch": 0.7879660745719315, "grad_norm": 0.36254602670669556, "learning_rate": 5e-05, "loss": 1.4355, "step": 4924 }, { "epoch": 0.7881261001760281, "grad_norm": 0.347912073135376, "learning_rate": 5e-05, "loss": 1.5055, "step": 4925 }, { "epoch": 0.7882861257801248, "grad_norm": 0.34468451142311096, "learning_rate": 5e-05, "loss": 1.5042, "step": 4926 }, { "epoch": 0.7884461513842215, "grad_norm": 0.35172274708747864, "learning_rate": 5e-05, "loss": 1.4343, "step": 4927 }, { "epoch": 0.7886061769883181, "grad_norm": 0.3575928509235382, "learning_rate": 5e-05, "loss": 1.3874, "step": 4928 }, { "epoch": 0.7887662025924148, "grad_norm": 0.3449403941631317, "learning_rate": 5e-05, "loss": 1.4671, "step": 4929 }, { "epoch": 0.7889262281965115, "grad_norm": 0.3473127484321594, "learning_rate": 5e-05, "loss": 1.4765, "step": 4930 }, { "epoch": 0.7890862538006082, "grad_norm": 0.3466867208480835, "learning_rate": 5e-05, "loss": 1.4779, "step": 4931 }, { "epoch": 0.7892462794047047, "grad_norm": 0.3483195900917053, "learning_rate": 5e-05, "loss": 1.4431, "step": 4932 }, { "epoch": 0.7894063050088014, "grad_norm": 0.3606315553188324, "learning_rate": 5e-05, "loss": 1.46, "step": 4933 }, { "epoch": 0.789566330612898, "grad_norm": 0.35364335775375366, "learning_rate": 5e-05, "loss": 1.4787, "step": 4934 }, { "epoch": 0.7897263562169947, "grad_norm": 0.36451274156570435, "learning_rate": 5e-05, "loss": 1.4651, "step": 4935 }, { "epoch": 0.7898863818210914, "grad_norm": 0.35480996966362, "learning_rate": 5e-05, "loss": 1.4366, "step": 4936 }, { "epoch": 0.7900464074251881, "grad_norm": 0.3455239236354828, "learning_rate": 5e-05, "loss": 1.3905, "step": 4937 }, { "epoch": 0.7902064330292847, "grad_norm": 0.34843236207962036, "learning_rate": 5e-05, "loss": 1.3631, "step": 4938 }, { "epoch": 0.7903664586333813, "grad_norm": 0.3474659323692322, "learning_rate": 5e-05, "loss": 1.4515, "step": 4939 }, { "epoch": 0.790526484237478, "grad_norm": 0.36707571148872375, "learning_rate": 5e-05, "loss": 1.5131, "step": 4940 }, { "epoch": 0.7906865098415746, "grad_norm": 0.34207406640052795, "learning_rate": 5e-05, "loss": 1.3946, "step": 4941 }, { "epoch": 0.7908465354456713, "grad_norm": 0.3393148183822632, "learning_rate": 5e-05, "loss": 1.4327, "step": 4942 }, { "epoch": 0.791006561049768, "grad_norm": 0.35478729009628296, "learning_rate": 5e-05, "loss": 1.4773, "step": 4943 }, { "epoch": 0.7911665866538646, "grad_norm": 0.3475869596004486, "learning_rate": 5e-05, "loss": 1.4091, "step": 4944 }, { "epoch": 0.7913266122579613, "grad_norm": 0.3713914752006531, "learning_rate": 5e-05, "loss": 1.5474, "step": 4945 }, { "epoch": 0.791486637862058, "grad_norm": 0.36070242524147034, "learning_rate": 5e-05, "loss": 1.4485, "step": 4946 }, { "epoch": 0.7916466634661545, "grad_norm": 0.3527773916721344, "learning_rate": 5e-05, "loss": 1.4277, "step": 4947 }, { "epoch": 0.7918066890702512, "grad_norm": 0.35514160990715027, "learning_rate": 5e-05, "loss": 1.4859, "step": 4948 }, { "epoch": 0.7919667146743479, "grad_norm": 0.36772873997688293, "learning_rate": 5e-05, "loss": 1.5795, "step": 4949 }, { "epoch": 0.7921267402784445, "grad_norm": 0.35147950053215027, "learning_rate": 5e-05, "loss": 1.4678, "step": 4950 }, { "epoch": 0.7922867658825412, "grad_norm": 0.3579823672771454, "learning_rate": 5e-05, "loss": 1.4812, "step": 4951 }, { "epoch": 0.7924467914866379, "grad_norm": 0.35972416400909424, "learning_rate": 5e-05, "loss": 1.5343, "step": 4952 }, { "epoch": 0.7926068170907346, "grad_norm": 0.3468903601169586, "learning_rate": 5e-05, "loss": 1.4286, "step": 4953 }, { "epoch": 0.7927668426948312, "grad_norm": 0.3639618754386902, "learning_rate": 5e-05, "loss": 1.491, "step": 4954 }, { "epoch": 0.7929268682989278, "grad_norm": 0.3537909686565399, "learning_rate": 5e-05, "loss": 1.4525, "step": 4955 }, { "epoch": 0.7930868939030244, "grad_norm": 0.36151671409606934, "learning_rate": 5e-05, "loss": 1.5007, "step": 4956 }, { "epoch": 0.7932469195071211, "grad_norm": 0.3503373861312866, "learning_rate": 5e-05, "loss": 1.4784, "step": 4957 }, { "epoch": 0.7934069451112178, "grad_norm": 0.37215059995651245, "learning_rate": 5e-05, "loss": 1.5091, "step": 4958 }, { "epoch": 0.7935669707153145, "grad_norm": 0.3502693772315979, "learning_rate": 5e-05, "loss": 1.4749, "step": 4959 }, { "epoch": 0.7937269963194111, "grad_norm": 0.33936843276023865, "learning_rate": 5e-05, "loss": 1.3805, "step": 4960 }, { "epoch": 0.7938870219235078, "grad_norm": 0.3460470139980316, "learning_rate": 5e-05, "loss": 1.4622, "step": 4961 }, { "epoch": 0.7940470475276045, "grad_norm": 0.3495754897594452, "learning_rate": 5e-05, "loss": 1.4986, "step": 4962 }, { "epoch": 0.794207073131701, "grad_norm": 0.35871997475624084, "learning_rate": 5e-05, "loss": 1.4848, "step": 4963 }, { "epoch": 0.7943670987357977, "grad_norm": 0.34916895627975464, "learning_rate": 5e-05, "loss": 1.4331, "step": 4964 }, { "epoch": 0.7945271243398944, "grad_norm": 0.34455999732017517, "learning_rate": 5e-05, "loss": 1.4619, "step": 4965 }, { "epoch": 0.794687149943991, "grad_norm": 0.3504277765750885, "learning_rate": 5e-05, "loss": 1.3948, "step": 4966 }, { "epoch": 0.7948471755480877, "grad_norm": 0.3631897270679474, "learning_rate": 5e-05, "loss": 1.4905, "step": 4967 }, { "epoch": 0.7950072011521844, "grad_norm": 0.35195139050483704, "learning_rate": 5e-05, "loss": 1.4712, "step": 4968 }, { "epoch": 0.795167226756281, "grad_norm": 0.35549360513687134, "learning_rate": 5e-05, "loss": 1.4498, "step": 4969 }, { "epoch": 0.7953272523603777, "grad_norm": 0.35201236605644226, "learning_rate": 5e-05, "loss": 1.5061, "step": 4970 }, { "epoch": 0.7954872779644743, "grad_norm": 0.3568252921104431, "learning_rate": 5e-05, "loss": 1.4123, "step": 4971 }, { "epoch": 0.7956473035685709, "grad_norm": 0.3556293547153473, "learning_rate": 5e-05, "loss": 1.4148, "step": 4972 }, { "epoch": 0.7958073291726676, "grad_norm": 0.3537136912345886, "learning_rate": 5e-05, "loss": 1.4682, "step": 4973 }, { "epoch": 0.7959673547767643, "grad_norm": 0.3600456118583679, "learning_rate": 5e-05, "loss": 1.4984, "step": 4974 }, { "epoch": 0.796127380380861, "grad_norm": 0.35518088936805725, "learning_rate": 5e-05, "loss": 1.4155, "step": 4975 }, { "epoch": 0.7962874059849576, "grad_norm": 0.34914204478263855, "learning_rate": 5e-05, "loss": 1.4545, "step": 4976 }, { "epoch": 0.7964474315890543, "grad_norm": 0.3656710386276245, "learning_rate": 5e-05, "loss": 1.4611, "step": 4977 }, { "epoch": 0.796607457193151, "grad_norm": 0.36087098717689514, "learning_rate": 5e-05, "loss": 1.4515, "step": 4978 }, { "epoch": 0.7967674827972475, "grad_norm": 0.36553797125816345, "learning_rate": 5e-05, "loss": 1.4764, "step": 4979 }, { "epoch": 0.7969275084013442, "grad_norm": 0.3640473186969757, "learning_rate": 5e-05, "loss": 1.4294, "step": 4980 }, { "epoch": 0.7970875340054409, "grad_norm": 0.3409501612186432, "learning_rate": 5e-05, "loss": 1.3942, "step": 4981 }, { "epoch": 0.7972475596095375, "grad_norm": 0.3434164226055145, "learning_rate": 5e-05, "loss": 1.4215, "step": 4982 }, { "epoch": 0.7974075852136342, "grad_norm": 0.3494533598423004, "learning_rate": 5e-05, "loss": 1.4501, "step": 4983 }, { "epoch": 0.7975676108177309, "grad_norm": 0.33652186393737793, "learning_rate": 5e-05, "loss": 1.3291, "step": 4984 }, { "epoch": 0.7977276364218275, "grad_norm": 0.3508197069168091, "learning_rate": 5e-05, "loss": 1.4838, "step": 4985 }, { "epoch": 0.7978876620259241, "grad_norm": 0.3562929928302765, "learning_rate": 5e-05, "loss": 1.4835, "step": 4986 }, { "epoch": 0.7980476876300208, "grad_norm": 0.3702389895915985, "learning_rate": 5e-05, "loss": 1.5351, "step": 4987 }, { "epoch": 0.7982077132341174, "grad_norm": 0.355113685131073, "learning_rate": 5e-05, "loss": 1.416, "step": 4988 }, { "epoch": 0.7983677388382141, "grad_norm": 0.3524247407913208, "learning_rate": 5e-05, "loss": 1.4275, "step": 4989 }, { "epoch": 0.7985277644423108, "grad_norm": 0.3610001504421234, "learning_rate": 5e-05, "loss": 1.5011, "step": 4990 }, { "epoch": 0.7986877900464074, "grad_norm": 0.35267430543899536, "learning_rate": 5e-05, "loss": 1.4398, "step": 4991 }, { "epoch": 0.7988478156505041, "grad_norm": 0.35079485177993774, "learning_rate": 5e-05, "loss": 1.4837, "step": 4992 }, { "epoch": 0.7990078412546008, "grad_norm": 0.3503734767436981, "learning_rate": 5e-05, "loss": 1.3552, "step": 4993 }, { "epoch": 0.7991678668586973, "grad_norm": 0.3443908989429474, "learning_rate": 5e-05, "loss": 1.4691, "step": 4994 }, { "epoch": 0.799327892462794, "grad_norm": 0.3409329652786255, "learning_rate": 5e-05, "loss": 1.4486, "step": 4995 }, { "epoch": 0.7994879180668907, "grad_norm": 0.3603549599647522, "learning_rate": 5e-05, "loss": 1.533, "step": 4996 }, { "epoch": 0.7996479436709873, "grad_norm": 0.3598098158836365, "learning_rate": 5e-05, "loss": 1.4159, "step": 4997 }, { "epoch": 0.799807969275084, "grad_norm": 0.3482305705547333, "learning_rate": 5e-05, "loss": 1.4274, "step": 4998 }, { "epoch": 0.7999679948791807, "grad_norm": 0.34587791562080383, "learning_rate": 5e-05, "loss": 1.4501, "step": 4999 }, { "epoch": 0.8001280204832774, "grad_norm": 0.3389081358909607, "learning_rate": 5e-05, "loss": 1.4118, "step": 5000 }, { "epoch": 0.800288046087374, "grad_norm": 0.34435999393463135, "learning_rate": 5e-05, "loss": 1.3972, "step": 5001 }, { "epoch": 0.8004480716914706, "grad_norm": 0.34801408648490906, "learning_rate": 5e-05, "loss": 1.4711, "step": 5002 }, { "epoch": 0.8006080972955673, "grad_norm": 0.35793405771255493, "learning_rate": 5e-05, "loss": 1.5238, "step": 5003 }, { "epoch": 0.8007681228996639, "grad_norm": 0.352497398853302, "learning_rate": 5e-05, "loss": 1.5423, "step": 5004 }, { "epoch": 0.8009281485037606, "grad_norm": 0.3536999523639679, "learning_rate": 5e-05, "loss": 1.5271, "step": 5005 }, { "epoch": 0.8010881741078573, "grad_norm": 0.35461777448654175, "learning_rate": 5e-05, "loss": 1.4204, "step": 5006 }, { "epoch": 0.8012481997119539, "grad_norm": 0.35003897547721863, "learning_rate": 5e-05, "loss": 1.4779, "step": 5007 }, { "epoch": 0.8014082253160506, "grad_norm": 0.3613746762275696, "learning_rate": 5e-05, "loss": 1.5082, "step": 5008 }, { "epoch": 0.8015682509201473, "grad_norm": 0.35601606965065, "learning_rate": 5e-05, "loss": 1.406, "step": 5009 }, { "epoch": 0.8017282765242438, "grad_norm": 0.3459944725036621, "learning_rate": 5e-05, "loss": 1.4781, "step": 5010 }, { "epoch": 0.8018883021283405, "grad_norm": 0.3500415086746216, "learning_rate": 5e-05, "loss": 1.411, "step": 5011 }, { "epoch": 0.8020483277324372, "grad_norm": 0.35882043838500977, "learning_rate": 5e-05, "loss": 1.4324, "step": 5012 }, { "epoch": 0.8022083533365338, "grad_norm": 0.3537096083164215, "learning_rate": 5e-05, "loss": 1.452, "step": 5013 }, { "epoch": 0.8023683789406305, "grad_norm": 0.35241180658340454, "learning_rate": 5e-05, "loss": 1.5181, "step": 5014 }, { "epoch": 0.8025284045447272, "grad_norm": 0.3514463007450104, "learning_rate": 5e-05, "loss": 1.4104, "step": 5015 }, { "epoch": 0.8026884301488239, "grad_norm": 0.37204068899154663, "learning_rate": 5e-05, "loss": 1.4397, "step": 5016 }, { "epoch": 0.8028484557529205, "grad_norm": 0.36575326323509216, "learning_rate": 5e-05, "loss": 1.5122, "step": 5017 }, { "epoch": 0.8030084813570171, "grad_norm": 0.3574649393558502, "learning_rate": 5e-05, "loss": 1.3353, "step": 5018 }, { "epoch": 0.8031685069611137, "grad_norm": 0.3630823493003845, "learning_rate": 5e-05, "loss": 1.4298, "step": 5019 }, { "epoch": 0.8033285325652104, "grad_norm": 0.34445399045944214, "learning_rate": 5e-05, "loss": 1.3626, "step": 5020 }, { "epoch": 0.8034885581693071, "grad_norm": 0.3515399098396301, "learning_rate": 5e-05, "loss": 1.4022, "step": 5021 }, { "epoch": 0.8036485837734038, "grad_norm": 0.35881826281547546, "learning_rate": 5e-05, "loss": 1.4855, "step": 5022 }, { "epoch": 0.8038086093775004, "grad_norm": 0.35770294070243835, "learning_rate": 5e-05, "loss": 1.4582, "step": 5023 }, { "epoch": 0.8039686349815971, "grad_norm": 0.3519250154495239, "learning_rate": 5e-05, "loss": 1.4348, "step": 5024 }, { "epoch": 0.8041286605856938, "grad_norm": 0.3568718135356903, "learning_rate": 5e-05, "loss": 1.5467, "step": 5025 }, { "epoch": 0.8042886861897903, "grad_norm": 0.35074883699417114, "learning_rate": 5e-05, "loss": 1.4062, "step": 5026 }, { "epoch": 0.804448711793887, "grad_norm": 0.34411582350730896, "learning_rate": 5e-05, "loss": 1.3217, "step": 5027 }, { "epoch": 0.8046087373979837, "grad_norm": 0.3639380633831024, "learning_rate": 5e-05, "loss": 1.4812, "step": 5028 }, { "epoch": 0.8047687630020803, "grad_norm": 0.3720206618309021, "learning_rate": 5e-05, "loss": 1.5487, "step": 5029 }, { "epoch": 0.804928788606177, "grad_norm": 0.3763556182384491, "learning_rate": 5e-05, "loss": 1.4892, "step": 5030 }, { "epoch": 0.8050888142102737, "grad_norm": 0.3621581196784973, "learning_rate": 5e-05, "loss": 1.5602, "step": 5031 }, { "epoch": 0.8052488398143703, "grad_norm": 0.3533172607421875, "learning_rate": 5e-05, "loss": 1.4419, "step": 5032 }, { "epoch": 0.8054088654184669, "grad_norm": 0.3494850695133209, "learning_rate": 5e-05, "loss": 1.4068, "step": 5033 }, { "epoch": 0.8055688910225636, "grad_norm": 0.3600940704345703, "learning_rate": 5e-05, "loss": 1.4647, "step": 5034 }, { "epoch": 0.8057289166266602, "grad_norm": 0.3656159043312073, "learning_rate": 5e-05, "loss": 1.5082, "step": 5035 }, { "epoch": 0.8058889422307569, "grad_norm": 0.3735465109348297, "learning_rate": 5e-05, "loss": 1.4651, "step": 5036 }, { "epoch": 0.8060489678348536, "grad_norm": 0.36744096875190735, "learning_rate": 5e-05, "loss": 1.5052, "step": 5037 }, { "epoch": 0.8062089934389502, "grad_norm": 0.3674490749835968, "learning_rate": 5e-05, "loss": 1.4706, "step": 5038 }, { "epoch": 0.8063690190430469, "grad_norm": 0.36619827151298523, "learning_rate": 5e-05, "loss": 1.4865, "step": 5039 }, { "epoch": 0.8065290446471436, "grad_norm": 0.36525243520736694, "learning_rate": 5e-05, "loss": 1.5012, "step": 5040 }, { "epoch": 0.8066890702512401, "grad_norm": 0.34786126017570496, "learning_rate": 5e-05, "loss": 1.4832, "step": 5041 }, { "epoch": 0.8068490958553368, "grad_norm": 0.34523653984069824, "learning_rate": 5e-05, "loss": 1.504, "step": 5042 }, { "epoch": 0.8070091214594335, "grad_norm": 0.35198327898979187, "learning_rate": 5e-05, "loss": 1.3766, "step": 5043 }, { "epoch": 0.8071691470635302, "grad_norm": 0.3462247848510742, "learning_rate": 5e-05, "loss": 1.4119, "step": 5044 }, { "epoch": 0.8073291726676268, "grad_norm": 0.3586856424808502, "learning_rate": 5e-05, "loss": 1.4978, "step": 5045 }, { "epoch": 0.8074891982717235, "grad_norm": 0.3552294075489044, "learning_rate": 5e-05, "loss": 1.447, "step": 5046 }, { "epoch": 0.8076492238758202, "grad_norm": 0.36094582080841064, "learning_rate": 5e-05, "loss": 1.4854, "step": 5047 }, { "epoch": 0.8078092494799168, "grad_norm": 0.3524182438850403, "learning_rate": 5e-05, "loss": 1.4455, "step": 5048 }, { "epoch": 0.8079692750840134, "grad_norm": 0.3529130518436432, "learning_rate": 5e-05, "loss": 1.4733, "step": 5049 }, { "epoch": 0.8081293006881101, "grad_norm": 0.36921241879463196, "learning_rate": 5e-05, "loss": 1.5355, "step": 5050 }, { "epoch": 0.8082893262922067, "grad_norm": 0.34956106543540955, "learning_rate": 5e-05, "loss": 1.3727, "step": 5051 }, { "epoch": 0.8084493518963034, "grad_norm": 0.3675379157066345, "learning_rate": 5e-05, "loss": 1.4901, "step": 5052 }, { "epoch": 0.8086093775004001, "grad_norm": 0.3505450487136841, "learning_rate": 5e-05, "loss": 1.4468, "step": 5053 }, { "epoch": 0.8087694031044967, "grad_norm": 0.35594838857650757, "learning_rate": 5e-05, "loss": 1.4433, "step": 5054 }, { "epoch": 0.8089294287085934, "grad_norm": 0.34537559747695923, "learning_rate": 5e-05, "loss": 1.4049, "step": 5055 }, { "epoch": 0.8090894543126901, "grad_norm": 0.35863417387008667, "learning_rate": 5e-05, "loss": 1.45, "step": 5056 }, { "epoch": 0.8092494799167866, "grad_norm": 0.3599192500114441, "learning_rate": 5e-05, "loss": 1.4621, "step": 5057 }, { "epoch": 0.8094095055208833, "grad_norm": 0.3640359938144684, "learning_rate": 5e-05, "loss": 1.4607, "step": 5058 }, { "epoch": 0.80956953112498, "grad_norm": 0.3646685481071472, "learning_rate": 5e-05, "loss": 1.4436, "step": 5059 }, { "epoch": 0.8097295567290766, "grad_norm": 0.35057252645492554, "learning_rate": 5e-05, "loss": 1.4362, "step": 5060 }, { "epoch": 0.8098895823331733, "grad_norm": 0.35211604833602905, "learning_rate": 5e-05, "loss": 1.4749, "step": 5061 }, { "epoch": 0.81004960793727, "grad_norm": 0.35940301418304443, "learning_rate": 5e-05, "loss": 1.4548, "step": 5062 }, { "epoch": 0.8102096335413667, "grad_norm": 0.35405227541923523, "learning_rate": 5e-05, "loss": 1.4328, "step": 5063 }, { "epoch": 0.8103696591454633, "grad_norm": 0.36108502745628357, "learning_rate": 5e-05, "loss": 1.4614, "step": 5064 }, { "epoch": 0.8105296847495599, "grad_norm": 0.363301157951355, "learning_rate": 5e-05, "loss": 1.4227, "step": 5065 }, { "epoch": 0.8106897103536566, "grad_norm": 0.35655519366264343, "learning_rate": 5e-05, "loss": 1.4707, "step": 5066 }, { "epoch": 0.8108497359577532, "grad_norm": 0.3623993694782257, "learning_rate": 5e-05, "loss": 1.518, "step": 5067 }, { "epoch": 0.8110097615618499, "grad_norm": 0.37817060947418213, "learning_rate": 5e-05, "loss": 1.4803, "step": 5068 }, { "epoch": 0.8111697871659466, "grad_norm": 0.34218743443489075, "learning_rate": 5e-05, "loss": 1.4172, "step": 5069 }, { "epoch": 0.8113298127700432, "grad_norm": 0.3667128384113312, "learning_rate": 5e-05, "loss": 1.4872, "step": 5070 }, { "epoch": 0.8114898383741399, "grad_norm": 0.37623611092567444, "learning_rate": 5e-05, "loss": 1.5068, "step": 5071 }, { "epoch": 0.8116498639782365, "grad_norm": 0.3643370270729065, "learning_rate": 5e-05, "loss": 1.4493, "step": 5072 }, { "epoch": 0.8118098895823331, "grad_norm": 0.34671735763549805, "learning_rate": 5e-05, "loss": 1.4286, "step": 5073 }, { "epoch": 0.8119699151864298, "grad_norm": 0.3606429696083069, "learning_rate": 5e-05, "loss": 1.493, "step": 5074 }, { "epoch": 0.8121299407905265, "grad_norm": 0.374139666557312, "learning_rate": 5e-05, "loss": 1.5137, "step": 5075 }, { "epoch": 0.8122899663946231, "grad_norm": 0.349357932806015, "learning_rate": 5e-05, "loss": 1.3988, "step": 5076 }, { "epoch": 0.8124499919987198, "grad_norm": 0.3585593104362488, "learning_rate": 5e-05, "loss": 1.4237, "step": 5077 }, { "epoch": 0.8126100176028165, "grad_norm": 0.35841548442840576, "learning_rate": 5e-05, "loss": 1.4062, "step": 5078 }, { "epoch": 0.8127700432069132, "grad_norm": 0.360673189163208, "learning_rate": 5e-05, "loss": 1.5378, "step": 5079 }, { "epoch": 0.8129300688110097, "grad_norm": 0.3507130444049835, "learning_rate": 5e-05, "loss": 1.4438, "step": 5080 }, { "epoch": 0.8130900944151064, "grad_norm": 0.3637235164642334, "learning_rate": 5e-05, "loss": 1.4799, "step": 5081 }, { "epoch": 0.813250120019203, "grad_norm": 0.3610769212245941, "learning_rate": 5e-05, "loss": 1.3848, "step": 5082 }, { "epoch": 0.8134101456232997, "grad_norm": 0.3616252541542053, "learning_rate": 5e-05, "loss": 1.4775, "step": 5083 }, { "epoch": 0.8135701712273964, "grad_norm": 0.3654696047306061, "learning_rate": 5e-05, "loss": 1.4975, "step": 5084 }, { "epoch": 0.8137301968314931, "grad_norm": 0.3641238510608673, "learning_rate": 5e-05, "loss": 1.4839, "step": 5085 }, { "epoch": 0.8138902224355897, "grad_norm": 0.36641326546669006, "learning_rate": 5e-05, "loss": 1.5473, "step": 5086 }, { "epoch": 0.8140502480396864, "grad_norm": 0.3448309302330017, "learning_rate": 5e-05, "loss": 1.4595, "step": 5087 }, { "epoch": 0.814210273643783, "grad_norm": 0.3727511763572693, "learning_rate": 5e-05, "loss": 1.4851, "step": 5088 }, { "epoch": 0.8143702992478796, "grad_norm": 0.35043808817863464, "learning_rate": 5e-05, "loss": 1.4262, "step": 5089 }, { "epoch": 0.8145303248519763, "grad_norm": 0.3612383306026459, "learning_rate": 5e-05, "loss": 1.4685, "step": 5090 }, { "epoch": 0.814690350456073, "grad_norm": 0.37092339992523193, "learning_rate": 5e-05, "loss": 1.4389, "step": 5091 }, { "epoch": 0.8148503760601696, "grad_norm": 0.35890647768974304, "learning_rate": 5e-05, "loss": 1.4305, "step": 5092 }, { "epoch": 0.8150104016642663, "grad_norm": 0.34175142645835876, "learning_rate": 5e-05, "loss": 1.403, "step": 5093 }, { "epoch": 0.815170427268363, "grad_norm": 0.36896467208862305, "learning_rate": 5e-05, "loss": 1.4613, "step": 5094 }, { "epoch": 0.8153304528724596, "grad_norm": 0.3526921272277832, "learning_rate": 5e-05, "loss": 1.4374, "step": 5095 }, { "epoch": 0.8154904784765562, "grad_norm": 0.35429346561431885, "learning_rate": 5e-05, "loss": 1.4645, "step": 5096 }, { "epoch": 0.8156505040806529, "grad_norm": 0.35338976979255676, "learning_rate": 5e-05, "loss": 1.3912, "step": 5097 }, { "epoch": 0.8158105296847495, "grad_norm": 0.34569063782691956, "learning_rate": 5e-05, "loss": 1.5163, "step": 5098 }, { "epoch": 0.8159705552888462, "grad_norm": 0.3487534523010254, "learning_rate": 5e-05, "loss": 1.4405, "step": 5099 }, { "epoch": 0.8161305808929429, "grad_norm": 0.35620081424713135, "learning_rate": 5e-05, "loss": 1.3712, "step": 5100 }, { "epoch": 0.8162906064970396, "grad_norm": 0.3740036189556122, "learning_rate": 5e-05, "loss": 1.4676, "step": 5101 }, { "epoch": 0.8164506321011362, "grad_norm": 0.34158337116241455, "learning_rate": 5e-05, "loss": 1.4029, "step": 5102 }, { "epoch": 0.8166106577052329, "grad_norm": 0.36661019921302795, "learning_rate": 5e-05, "loss": 1.4637, "step": 5103 }, { "epoch": 0.8167706833093294, "grad_norm": 0.3436126410961151, "learning_rate": 5e-05, "loss": 1.3873, "step": 5104 }, { "epoch": 0.8169307089134261, "grad_norm": 0.3598284125328064, "learning_rate": 5e-05, "loss": 1.41, "step": 5105 }, { "epoch": 0.8170907345175228, "grad_norm": 0.36112794280052185, "learning_rate": 5e-05, "loss": 1.4825, "step": 5106 }, { "epoch": 0.8172507601216195, "grad_norm": 0.3576867878437042, "learning_rate": 5e-05, "loss": 1.4757, "step": 5107 }, { "epoch": 0.8174107857257161, "grad_norm": 0.36558997631073, "learning_rate": 5e-05, "loss": 1.3983, "step": 5108 }, { "epoch": 0.8175708113298128, "grad_norm": 0.35430797934532166, "learning_rate": 5e-05, "loss": 1.4356, "step": 5109 }, { "epoch": 0.8177308369339095, "grad_norm": 0.37133821845054626, "learning_rate": 5e-05, "loss": 1.4486, "step": 5110 }, { "epoch": 0.8178908625380061, "grad_norm": 0.368447870016098, "learning_rate": 5e-05, "loss": 1.5068, "step": 5111 }, { "epoch": 0.8180508881421027, "grad_norm": 0.36742228269577026, "learning_rate": 5e-05, "loss": 1.4299, "step": 5112 }, { "epoch": 0.8182109137461994, "grad_norm": 0.3698078691959381, "learning_rate": 5e-05, "loss": 1.5308, "step": 5113 }, { "epoch": 0.818370939350296, "grad_norm": 0.35885897278785706, "learning_rate": 5e-05, "loss": 1.4576, "step": 5114 }, { "epoch": 0.8185309649543927, "grad_norm": 0.3426338732242584, "learning_rate": 5e-05, "loss": 1.4454, "step": 5115 }, { "epoch": 0.8186909905584894, "grad_norm": 0.3630627989768982, "learning_rate": 5e-05, "loss": 1.5296, "step": 5116 }, { "epoch": 0.818851016162586, "grad_norm": 0.3581996560096741, "learning_rate": 5e-05, "loss": 1.414, "step": 5117 }, { "epoch": 0.8190110417666827, "grad_norm": 0.3686769902706146, "learning_rate": 5e-05, "loss": 1.5273, "step": 5118 }, { "epoch": 0.8191710673707793, "grad_norm": 0.35767096281051636, "learning_rate": 5e-05, "loss": 1.4595, "step": 5119 }, { "epoch": 0.8193310929748759, "grad_norm": 0.35814565420150757, "learning_rate": 5e-05, "loss": 1.4621, "step": 5120 }, { "epoch": 0.8194911185789726, "grad_norm": 0.3411266803741455, "learning_rate": 5e-05, "loss": 1.3992, "step": 5121 }, { "epoch": 0.8196511441830693, "grad_norm": 0.3748932480812073, "learning_rate": 5e-05, "loss": 1.5085, "step": 5122 }, { "epoch": 0.819811169787166, "grad_norm": 0.34941673278808594, "learning_rate": 5e-05, "loss": 1.417, "step": 5123 }, { "epoch": 0.8199711953912626, "grad_norm": 0.35008347034454346, "learning_rate": 5e-05, "loss": 1.4225, "step": 5124 }, { "epoch": 0.8201312209953593, "grad_norm": 0.349375456571579, "learning_rate": 5e-05, "loss": 1.4262, "step": 5125 }, { "epoch": 0.820291246599456, "grad_norm": 0.35077333450317383, "learning_rate": 5e-05, "loss": 1.4801, "step": 5126 }, { "epoch": 0.8204512722035525, "grad_norm": 0.3536663055419922, "learning_rate": 5e-05, "loss": 1.4152, "step": 5127 }, { "epoch": 0.8206112978076492, "grad_norm": 0.3579009175300598, "learning_rate": 5e-05, "loss": 1.4407, "step": 5128 }, { "epoch": 0.8207713234117459, "grad_norm": 0.3688053786754608, "learning_rate": 5e-05, "loss": 1.5058, "step": 5129 }, { "epoch": 0.8209313490158425, "grad_norm": 0.36008045077323914, "learning_rate": 5e-05, "loss": 1.4804, "step": 5130 }, { "epoch": 0.8210913746199392, "grad_norm": 0.36511000990867615, "learning_rate": 5e-05, "loss": 1.4272, "step": 5131 }, { "epoch": 0.8212514002240359, "grad_norm": 0.3573305904865265, "learning_rate": 5e-05, "loss": 1.4661, "step": 5132 }, { "epoch": 0.8214114258281325, "grad_norm": 0.3599475026130676, "learning_rate": 5e-05, "loss": 1.5113, "step": 5133 }, { "epoch": 0.8215714514322292, "grad_norm": 0.36911725997924805, "learning_rate": 5e-05, "loss": 1.4775, "step": 5134 }, { "epoch": 0.8217314770363258, "grad_norm": 0.3602901101112366, "learning_rate": 5e-05, "loss": 1.5392, "step": 5135 }, { "epoch": 0.8218915026404224, "grad_norm": 0.3594895899295807, "learning_rate": 5e-05, "loss": 1.484, "step": 5136 }, { "epoch": 0.8220515282445191, "grad_norm": 0.3624972105026245, "learning_rate": 5e-05, "loss": 1.4412, "step": 5137 }, { "epoch": 0.8222115538486158, "grad_norm": 0.37280601263046265, "learning_rate": 5e-05, "loss": 1.4654, "step": 5138 }, { "epoch": 0.8223715794527124, "grad_norm": 0.3633151352405548, "learning_rate": 5e-05, "loss": 1.4875, "step": 5139 }, { "epoch": 0.8225316050568091, "grad_norm": 0.3567620813846588, "learning_rate": 5e-05, "loss": 1.4293, "step": 5140 }, { "epoch": 0.8226916306609058, "grad_norm": 0.384816974401474, "learning_rate": 5e-05, "loss": 1.525, "step": 5141 }, { "epoch": 0.8228516562650025, "grad_norm": 0.34340089559555054, "learning_rate": 5e-05, "loss": 1.4013, "step": 5142 }, { "epoch": 0.823011681869099, "grad_norm": 0.3715759813785553, "learning_rate": 5e-05, "loss": 1.4882, "step": 5143 }, { "epoch": 0.8231717074731957, "grad_norm": 0.3529515266418457, "learning_rate": 5e-05, "loss": 1.4605, "step": 5144 }, { "epoch": 0.8233317330772923, "grad_norm": 0.3672594726085663, "learning_rate": 5e-05, "loss": 1.4887, "step": 5145 }, { "epoch": 0.823491758681389, "grad_norm": 0.37153443694114685, "learning_rate": 5e-05, "loss": 1.5088, "step": 5146 }, { "epoch": 0.8236517842854857, "grad_norm": 0.3613296449184418, "learning_rate": 5e-05, "loss": 1.5146, "step": 5147 }, { "epoch": 0.8238118098895824, "grad_norm": 0.36203882098197937, "learning_rate": 5e-05, "loss": 1.4297, "step": 5148 }, { "epoch": 0.823971835493679, "grad_norm": 0.35104724764823914, "learning_rate": 5e-05, "loss": 1.538, "step": 5149 }, { "epoch": 0.8241318610977757, "grad_norm": 0.343633770942688, "learning_rate": 5e-05, "loss": 1.4653, "step": 5150 }, { "epoch": 0.8242918867018723, "grad_norm": 0.3543873727321625, "learning_rate": 5e-05, "loss": 1.5188, "step": 5151 }, { "epoch": 0.8244519123059689, "grad_norm": 0.34616386890411377, "learning_rate": 5e-05, "loss": 1.44, "step": 5152 }, { "epoch": 0.8246119379100656, "grad_norm": 0.3474304974079132, "learning_rate": 5e-05, "loss": 1.4313, "step": 5153 }, { "epoch": 0.8247719635141623, "grad_norm": 0.3649314343929291, "learning_rate": 5e-05, "loss": 1.5023, "step": 5154 }, { "epoch": 0.8249319891182589, "grad_norm": 0.3632870614528656, "learning_rate": 5e-05, "loss": 1.4666, "step": 5155 }, { "epoch": 0.8250920147223556, "grad_norm": 0.3646291196346283, "learning_rate": 5e-05, "loss": 1.4615, "step": 5156 }, { "epoch": 0.8252520403264523, "grad_norm": 0.3552020490169525, "learning_rate": 5e-05, "loss": 1.4671, "step": 5157 }, { "epoch": 0.8254120659305488, "grad_norm": 0.36806222796440125, "learning_rate": 5e-05, "loss": 1.408, "step": 5158 }, { "epoch": 0.8255720915346455, "grad_norm": 0.3647524118423462, "learning_rate": 5e-05, "loss": 1.4701, "step": 5159 }, { "epoch": 0.8257321171387422, "grad_norm": 0.34554675221443176, "learning_rate": 5e-05, "loss": 1.4947, "step": 5160 }, { "epoch": 0.8258921427428388, "grad_norm": 0.3427722454071045, "learning_rate": 5e-05, "loss": 1.4282, "step": 5161 }, { "epoch": 0.8260521683469355, "grad_norm": 0.3658667206764221, "learning_rate": 5e-05, "loss": 1.4868, "step": 5162 }, { "epoch": 0.8262121939510322, "grad_norm": 0.35796138644218445, "learning_rate": 5e-05, "loss": 1.5089, "step": 5163 }, { "epoch": 0.8263722195551289, "grad_norm": 0.36263230443000793, "learning_rate": 5e-05, "loss": 1.4316, "step": 5164 }, { "epoch": 0.8265322451592255, "grad_norm": 0.3749715983867645, "learning_rate": 5e-05, "loss": 1.4584, "step": 5165 }, { "epoch": 0.8266922707633221, "grad_norm": 0.34893471002578735, "learning_rate": 5e-05, "loss": 1.3929, "step": 5166 }, { "epoch": 0.8268522963674187, "grad_norm": 0.37323758006095886, "learning_rate": 5e-05, "loss": 1.4748, "step": 5167 }, { "epoch": 0.8270123219715154, "grad_norm": 0.35833460092544556, "learning_rate": 5e-05, "loss": 1.5062, "step": 5168 }, { "epoch": 0.8271723475756121, "grad_norm": 0.36242547631263733, "learning_rate": 5e-05, "loss": 1.4718, "step": 5169 }, { "epoch": 0.8273323731797088, "grad_norm": 0.36607009172439575, "learning_rate": 5e-05, "loss": 1.4471, "step": 5170 }, { "epoch": 0.8274923987838054, "grad_norm": 0.357923299074173, "learning_rate": 5e-05, "loss": 1.5341, "step": 5171 }, { "epoch": 0.8276524243879021, "grad_norm": 0.3691222667694092, "learning_rate": 5e-05, "loss": 1.4915, "step": 5172 }, { "epoch": 0.8278124499919988, "grad_norm": 0.3480401933193207, "learning_rate": 5e-05, "loss": 1.4399, "step": 5173 }, { "epoch": 0.8279724755960953, "grad_norm": 0.3604823052883148, "learning_rate": 5e-05, "loss": 1.4689, "step": 5174 }, { "epoch": 0.828132501200192, "grad_norm": 0.35022827982902527, "learning_rate": 5e-05, "loss": 1.4171, "step": 5175 }, { "epoch": 0.8282925268042887, "grad_norm": 0.3642093241214752, "learning_rate": 5e-05, "loss": 1.4238, "step": 5176 }, { "epoch": 0.8284525524083853, "grad_norm": 0.3675813376903534, "learning_rate": 5e-05, "loss": 1.4321, "step": 5177 }, { "epoch": 0.828612578012482, "grad_norm": 0.3577188551425934, "learning_rate": 5e-05, "loss": 1.4675, "step": 5178 }, { "epoch": 0.8287726036165787, "grad_norm": 0.3526245355606079, "learning_rate": 5e-05, "loss": 1.4566, "step": 5179 }, { "epoch": 0.8289326292206753, "grad_norm": 0.36081787943840027, "learning_rate": 5e-05, "loss": 1.4767, "step": 5180 }, { "epoch": 0.829092654824772, "grad_norm": 0.3720184564590454, "learning_rate": 5e-05, "loss": 1.4656, "step": 5181 }, { "epoch": 0.8292526804288686, "grad_norm": 0.358527809381485, "learning_rate": 5e-05, "loss": 1.4465, "step": 5182 }, { "epoch": 0.8294127060329652, "grad_norm": 0.37856829166412354, "learning_rate": 5e-05, "loss": 1.6155, "step": 5183 }, { "epoch": 0.8295727316370619, "grad_norm": 0.3522210419178009, "learning_rate": 5e-05, "loss": 1.4523, "step": 5184 }, { "epoch": 0.8297327572411586, "grad_norm": 0.3621343672275543, "learning_rate": 5e-05, "loss": 1.5086, "step": 5185 }, { "epoch": 0.8298927828452553, "grad_norm": 0.3691180348396301, "learning_rate": 5e-05, "loss": 1.5425, "step": 5186 }, { "epoch": 0.8300528084493519, "grad_norm": 0.3566018342971802, "learning_rate": 5e-05, "loss": 1.4394, "step": 5187 }, { "epoch": 0.8302128340534486, "grad_norm": 0.3568160831928253, "learning_rate": 5e-05, "loss": 1.3883, "step": 5188 }, { "epoch": 0.8303728596575453, "grad_norm": 0.36595675349235535, "learning_rate": 5e-05, "loss": 1.5608, "step": 5189 }, { "epoch": 0.8305328852616418, "grad_norm": 0.3713095188140869, "learning_rate": 5e-05, "loss": 1.4461, "step": 5190 }, { "epoch": 0.8306929108657385, "grad_norm": 0.35789772868156433, "learning_rate": 5e-05, "loss": 1.511, "step": 5191 }, { "epoch": 0.8308529364698352, "grad_norm": 0.3740064203739166, "learning_rate": 5e-05, "loss": 1.5625, "step": 5192 }, { "epoch": 0.8310129620739318, "grad_norm": 0.36720114946365356, "learning_rate": 5e-05, "loss": 1.3969, "step": 5193 }, { "epoch": 0.8311729876780285, "grad_norm": 0.345238596200943, "learning_rate": 5e-05, "loss": 1.4824, "step": 5194 }, { "epoch": 0.8313330132821252, "grad_norm": 0.3586617112159729, "learning_rate": 5e-05, "loss": 1.4006, "step": 5195 }, { "epoch": 0.8314930388862218, "grad_norm": 0.35949403047561646, "learning_rate": 5e-05, "loss": 1.4565, "step": 5196 }, { "epoch": 0.8316530644903185, "grad_norm": 0.3672981262207031, "learning_rate": 5e-05, "loss": 1.4927, "step": 5197 }, { "epoch": 0.8318130900944151, "grad_norm": 0.365782231092453, "learning_rate": 5e-05, "loss": 1.4747, "step": 5198 }, { "epoch": 0.8319731156985117, "grad_norm": 0.3693467080593109, "learning_rate": 5e-05, "loss": 1.4586, "step": 5199 }, { "epoch": 0.8321331413026084, "grad_norm": 0.36402103304862976, "learning_rate": 5e-05, "loss": 1.4548, "step": 5200 }, { "epoch": 0.8322931669067051, "grad_norm": 0.35586363077163696, "learning_rate": 5e-05, "loss": 1.4357, "step": 5201 }, { "epoch": 0.8324531925108017, "grad_norm": 0.3401472866535187, "learning_rate": 5e-05, "loss": 1.4469, "step": 5202 }, { "epoch": 0.8326132181148984, "grad_norm": 0.36376118659973145, "learning_rate": 5e-05, "loss": 1.4263, "step": 5203 }, { "epoch": 0.8327732437189951, "grad_norm": 0.3564225137233734, "learning_rate": 5e-05, "loss": 1.476, "step": 5204 }, { "epoch": 0.8329332693230916, "grad_norm": 0.3472265899181366, "learning_rate": 5e-05, "loss": 1.5137, "step": 5205 }, { "epoch": 0.8330932949271883, "grad_norm": 0.3584069609642029, "learning_rate": 5e-05, "loss": 1.4792, "step": 5206 }, { "epoch": 0.833253320531285, "grad_norm": 0.36366334557533264, "learning_rate": 5e-05, "loss": 1.4262, "step": 5207 }, { "epoch": 0.8334133461353816, "grad_norm": 0.35516607761383057, "learning_rate": 5e-05, "loss": 1.4764, "step": 5208 }, { "epoch": 0.8335733717394783, "grad_norm": 0.35912856459617615, "learning_rate": 5e-05, "loss": 1.3992, "step": 5209 }, { "epoch": 0.833733397343575, "grad_norm": 0.3609182834625244, "learning_rate": 5e-05, "loss": 1.459, "step": 5210 }, { "epoch": 0.8338934229476717, "grad_norm": 0.3546257019042969, "learning_rate": 5e-05, "loss": 1.4017, "step": 5211 }, { "epoch": 0.8340534485517683, "grad_norm": 0.3494754433631897, "learning_rate": 5e-05, "loss": 1.4637, "step": 5212 }, { "epoch": 0.8342134741558649, "grad_norm": 0.3560643792152405, "learning_rate": 5e-05, "loss": 1.5344, "step": 5213 }, { "epoch": 0.8343734997599616, "grad_norm": 0.36393171548843384, "learning_rate": 5e-05, "loss": 1.4641, "step": 5214 }, { "epoch": 0.8345335253640582, "grad_norm": 0.3549644351005554, "learning_rate": 5e-05, "loss": 1.5315, "step": 5215 }, { "epoch": 0.8346935509681549, "grad_norm": 0.3605934679508209, "learning_rate": 5e-05, "loss": 1.4589, "step": 5216 }, { "epoch": 0.8348535765722516, "grad_norm": 0.384985089302063, "learning_rate": 5e-05, "loss": 1.4551, "step": 5217 }, { "epoch": 0.8350136021763482, "grad_norm": 0.36740317940711975, "learning_rate": 5e-05, "loss": 1.4242, "step": 5218 }, { "epoch": 0.8351736277804449, "grad_norm": 0.361998051404953, "learning_rate": 5e-05, "loss": 1.3805, "step": 5219 }, { "epoch": 0.8353336533845416, "grad_norm": 0.36482614278793335, "learning_rate": 5e-05, "loss": 1.3784, "step": 5220 }, { "epoch": 0.8354936789886381, "grad_norm": 0.3660333752632141, "learning_rate": 5e-05, "loss": 1.4573, "step": 5221 }, { "epoch": 0.8356537045927348, "grad_norm": 0.361855685710907, "learning_rate": 5e-05, "loss": 1.4027, "step": 5222 }, { "epoch": 0.8358137301968315, "grad_norm": 0.3489514887332916, "learning_rate": 5e-05, "loss": 1.3742, "step": 5223 }, { "epoch": 0.8359737558009281, "grad_norm": 0.35403287410736084, "learning_rate": 5e-05, "loss": 1.4549, "step": 5224 }, { "epoch": 0.8361337814050248, "grad_norm": 0.35948821902275085, "learning_rate": 5e-05, "loss": 1.4702, "step": 5225 }, { "epoch": 0.8362938070091215, "grad_norm": 0.36498942971229553, "learning_rate": 5e-05, "loss": 1.5303, "step": 5226 }, { "epoch": 0.8364538326132182, "grad_norm": 0.3646329939365387, "learning_rate": 5e-05, "loss": 1.5136, "step": 5227 }, { "epoch": 0.8366138582173148, "grad_norm": 0.3733360171318054, "learning_rate": 5e-05, "loss": 1.437, "step": 5228 }, { "epoch": 0.8367738838214114, "grad_norm": 0.3574964106082916, "learning_rate": 5e-05, "loss": 1.4663, "step": 5229 }, { "epoch": 0.836933909425508, "grad_norm": 0.34568724036216736, "learning_rate": 5e-05, "loss": 1.4226, "step": 5230 }, { "epoch": 0.8370939350296047, "grad_norm": 0.3561563491821289, "learning_rate": 5e-05, "loss": 1.4081, "step": 5231 }, { "epoch": 0.8372539606337014, "grad_norm": 0.34554222226142883, "learning_rate": 5e-05, "loss": 1.465, "step": 5232 }, { "epoch": 0.8374139862377981, "grad_norm": 0.3542898893356323, "learning_rate": 5e-05, "loss": 1.3932, "step": 5233 }, { "epoch": 0.8375740118418947, "grad_norm": 0.37000250816345215, "learning_rate": 5e-05, "loss": 1.4864, "step": 5234 }, { "epoch": 0.8377340374459914, "grad_norm": 0.3713914453983307, "learning_rate": 5e-05, "loss": 1.4301, "step": 5235 }, { "epoch": 0.8378940630500881, "grad_norm": 0.3622399568557739, "learning_rate": 5e-05, "loss": 1.4992, "step": 5236 }, { "epoch": 0.8380540886541846, "grad_norm": 0.37647557258605957, "learning_rate": 5e-05, "loss": 1.4623, "step": 5237 }, { "epoch": 0.8382141142582813, "grad_norm": 0.35484346747398376, "learning_rate": 5e-05, "loss": 1.3665, "step": 5238 }, { "epoch": 0.838374139862378, "grad_norm": 0.3542073667049408, "learning_rate": 5e-05, "loss": 1.4488, "step": 5239 }, { "epoch": 0.8385341654664746, "grad_norm": 0.37434911727905273, "learning_rate": 5e-05, "loss": 1.591, "step": 5240 }, { "epoch": 0.8386941910705713, "grad_norm": 0.3775123953819275, "learning_rate": 5e-05, "loss": 1.4196, "step": 5241 }, { "epoch": 0.838854216674668, "grad_norm": 0.37076619267463684, "learning_rate": 5e-05, "loss": 1.4914, "step": 5242 }, { "epoch": 0.8390142422787646, "grad_norm": 0.3609943985939026, "learning_rate": 5e-05, "loss": 1.4926, "step": 5243 }, { "epoch": 0.8391742678828612, "grad_norm": 0.3391343951225281, "learning_rate": 5e-05, "loss": 1.3788, "step": 5244 }, { "epoch": 0.8393342934869579, "grad_norm": 0.3561720550060272, "learning_rate": 5e-05, "loss": 1.4454, "step": 5245 }, { "epoch": 0.8394943190910545, "grad_norm": 0.3489833474159241, "learning_rate": 5e-05, "loss": 1.4949, "step": 5246 }, { "epoch": 0.8396543446951512, "grad_norm": 0.3579457402229309, "learning_rate": 5e-05, "loss": 1.4429, "step": 5247 }, { "epoch": 0.8398143702992479, "grad_norm": 0.3562319874763489, "learning_rate": 5e-05, "loss": 1.4313, "step": 5248 }, { "epoch": 0.8399743959033446, "grad_norm": 0.3644692897796631, "learning_rate": 5e-05, "loss": 1.4701, "step": 5249 }, { "epoch": 0.8401344215074412, "grad_norm": 0.3655875325202942, "learning_rate": 5e-05, "loss": 1.4823, "step": 5250 }, { "epoch": 0.8402944471115379, "grad_norm": 0.3559204339981079, "learning_rate": 5e-05, "loss": 1.4102, "step": 5251 }, { "epoch": 0.8404544727156344, "grad_norm": 0.3651902675628662, "learning_rate": 5e-05, "loss": 1.4658, "step": 5252 }, { "epoch": 0.8406144983197311, "grad_norm": 0.3687346577644348, "learning_rate": 5e-05, "loss": 1.5231, "step": 5253 }, { "epoch": 0.8407745239238278, "grad_norm": 0.3522423505783081, "learning_rate": 5e-05, "loss": 1.4199, "step": 5254 }, { "epoch": 0.8409345495279245, "grad_norm": 0.36321133375167847, "learning_rate": 5e-05, "loss": 1.4482, "step": 5255 }, { "epoch": 0.8410945751320211, "grad_norm": 0.3570118248462677, "learning_rate": 5e-05, "loss": 1.4859, "step": 5256 }, { "epoch": 0.8412546007361178, "grad_norm": 0.3685033917427063, "learning_rate": 5e-05, "loss": 1.5259, "step": 5257 }, { "epoch": 0.8414146263402145, "grad_norm": 0.35301563143730164, "learning_rate": 5e-05, "loss": 1.3931, "step": 5258 }, { "epoch": 0.8415746519443111, "grad_norm": 0.3661206364631653, "learning_rate": 5e-05, "loss": 1.5815, "step": 5259 }, { "epoch": 0.8417346775484077, "grad_norm": 0.35706183314323425, "learning_rate": 5e-05, "loss": 1.3921, "step": 5260 }, { "epoch": 0.8418947031525044, "grad_norm": 0.3504597544670105, "learning_rate": 5e-05, "loss": 1.4571, "step": 5261 }, { "epoch": 0.842054728756601, "grad_norm": 0.35679566860198975, "learning_rate": 5e-05, "loss": 1.5081, "step": 5262 }, { "epoch": 0.8422147543606977, "grad_norm": 0.3516690731048584, "learning_rate": 5e-05, "loss": 1.5202, "step": 5263 }, { "epoch": 0.8423747799647944, "grad_norm": 0.35586968064308167, "learning_rate": 5e-05, "loss": 1.4368, "step": 5264 }, { "epoch": 0.842534805568891, "grad_norm": 0.3427407741546631, "learning_rate": 5e-05, "loss": 1.3926, "step": 5265 }, { "epoch": 0.8426948311729877, "grad_norm": 0.34954771399497986, "learning_rate": 5e-05, "loss": 1.3901, "step": 5266 }, { "epoch": 0.8428548567770844, "grad_norm": 0.3638693392276764, "learning_rate": 5e-05, "loss": 1.5538, "step": 5267 }, { "epoch": 0.8430148823811809, "grad_norm": 0.3596634864807129, "learning_rate": 5e-05, "loss": 1.5001, "step": 5268 }, { "epoch": 0.8431749079852776, "grad_norm": 0.35747745633125305, "learning_rate": 5e-05, "loss": 1.4182, "step": 5269 }, { "epoch": 0.8433349335893743, "grad_norm": 0.3563776910305023, "learning_rate": 5e-05, "loss": 1.504, "step": 5270 }, { "epoch": 0.843494959193471, "grad_norm": 0.3470782935619354, "learning_rate": 5e-05, "loss": 1.3743, "step": 5271 }, { "epoch": 0.8436549847975676, "grad_norm": 0.3555219769477844, "learning_rate": 5e-05, "loss": 1.4169, "step": 5272 }, { "epoch": 0.8438150104016643, "grad_norm": 0.3676205277442932, "learning_rate": 5e-05, "loss": 1.5048, "step": 5273 }, { "epoch": 0.843975036005761, "grad_norm": 0.358807772397995, "learning_rate": 5e-05, "loss": 1.3754, "step": 5274 }, { "epoch": 0.8441350616098576, "grad_norm": 0.3562138080596924, "learning_rate": 5e-05, "loss": 1.3699, "step": 5275 }, { "epoch": 0.8442950872139542, "grad_norm": 0.35900232195854187, "learning_rate": 5e-05, "loss": 1.461, "step": 5276 }, { "epoch": 0.8444551128180509, "grad_norm": 0.37212488055229187, "learning_rate": 5e-05, "loss": 1.5572, "step": 5277 }, { "epoch": 0.8446151384221475, "grad_norm": 0.3626863956451416, "learning_rate": 5e-05, "loss": 1.3766, "step": 5278 }, { "epoch": 0.8447751640262442, "grad_norm": 0.3582229018211365, "learning_rate": 5e-05, "loss": 1.4155, "step": 5279 }, { "epoch": 0.8449351896303409, "grad_norm": 0.3528406023979187, "learning_rate": 5e-05, "loss": 1.4418, "step": 5280 }, { "epoch": 0.8450952152344375, "grad_norm": 0.3690021336078644, "learning_rate": 5e-05, "loss": 1.4197, "step": 5281 }, { "epoch": 0.8452552408385342, "grad_norm": 0.3710613548755646, "learning_rate": 5e-05, "loss": 1.376, "step": 5282 }, { "epoch": 0.8454152664426309, "grad_norm": 0.35239189863204956, "learning_rate": 5e-05, "loss": 1.4712, "step": 5283 }, { "epoch": 0.8455752920467274, "grad_norm": 0.3498145639896393, "learning_rate": 5e-05, "loss": 1.4346, "step": 5284 }, { "epoch": 0.8457353176508241, "grad_norm": 0.3461039662361145, "learning_rate": 5e-05, "loss": 1.4304, "step": 5285 }, { "epoch": 0.8458953432549208, "grad_norm": 0.3650647699832916, "learning_rate": 5e-05, "loss": 1.4461, "step": 5286 }, { "epoch": 0.8460553688590174, "grad_norm": 0.3658352196216583, "learning_rate": 5e-05, "loss": 1.4298, "step": 5287 }, { "epoch": 0.8462153944631141, "grad_norm": 0.3692653477191925, "learning_rate": 5e-05, "loss": 1.4323, "step": 5288 }, { "epoch": 0.8463754200672108, "grad_norm": 0.3694380223751068, "learning_rate": 5e-05, "loss": 1.472, "step": 5289 }, { "epoch": 0.8465354456713075, "grad_norm": 0.3566053807735443, "learning_rate": 5e-05, "loss": 1.4847, "step": 5290 }, { "epoch": 0.846695471275404, "grad_norm": 0.36215853691101074, "learning_rate": 5e-05, "loss": 1.5106, "step": 5291 }, { "epoch": 0.8468554968795007, "grad_norm": 0.3562389016151428, "learning_rate": 5e-05, "loss": 1.5071, "step": 5292 }, { "epoch": 0.8470155224835973, "grad_norm": 0.36853328347206116, "learning_rate": 5e-05, "loss": 1.4589, "step": 5293 }, { "epoch": 0.847175548087694, "grad_norm": 0.37463459372520447, "learning_rate": 5e-05, "loss": 1.4314, "step": 5294 }, { "epoch": 0.8473355736917907, "grad_norm": 0.35951364040374756, "learning_rate": 5e-05, "loss": 1.4176, "step": 5295 }, { "epoch": 0.8474955992958874, "grad_norm": 0.3589048981666565, "learning_rate": 5e-05, "loss": 1.4087, "step": 5296 }, { "epoch": 0.847655624899984, "grad_norm": 0.3663347661495209, "learning_rate": 5e-05, "loss": 1.4912, "step": 5297 }, { "epoch": 0.8478156505040807, "grad_norm": 0.3551987409591675, "learning_rate": 5e-05, "loss": 1.4603, "step": 5298 }, { "epoch": 0.8479756761081773, "grad_norm": 0.3806306719779968, "learning_rate": 5e-05, "loss": 1.4964, "step": 5299 }, { "epoch": 0.8481357017122739, "grad_norm": 0.3748134672641754, "learning_rate": 5e-05, "loss": 1.4699, "step": 5300 }, { "epoch": 0.8482957273163706, "grad_norm": 0.3870297968387604, "learning_rate": 5e-05, "loss": 1.4706, "step": 5301 }, { "epoch": 0.8484557529204673, "grad_norm": 0.3608984649181366, "learning_rate": 5e-05, "loss": 1.5097, "step": 5302 }, { "epoch": 0.8486157785245639, "grad_norm": 0.3578592836856842, "learning_rate": 5e-05, "loss": 1.4665, "step": 5303 }, { "epoch": 0.8487758041286606, "grad_norm": 0.384848415851593, "learning_rate": 5e-05, "loss": 1.445, "step": 5304 }, { "epoch": 0.8489358297327573, "grad_norm": 0.3687969148159027, "learning_rate": 5e-05, "loss": 1.4532, "step": 5305 }, { "epoch": 0.8490958553368539, "grad_norm": 0.3668827414512634, "learning_rate": 5e-05, "loss": 1.455, "step": 5306 }, { "epoch": 0.8492558809409505, "grad_norm": 0.3656708598136902, "learning_rate": 5e-05, "loss": 1.4555, "step": 5307 }, { "epoch": 0.8494159065450472, "grad_norm": 0.34573373198509216, "learning_rate": 5e-05, "loss": 1.3677, "step": 5308 }, { "epoch": 0.8495759321491438, "grad_norm": 0.3529602289199829, "learning_rate": 5e-05, "loss": 1.4333, "step": 5309 }, { "epoch": 0.8497359577532405, "grad_norm": 0.3648768663406372, "learning_rate": 5e-05, "loss": 1.4703, "step": 5310 }, { "epoch": 0.8498959833573372, "grad_norm": 0.35324376821517944, "learning_rate": 5e-05, "loss": 1.4265, "step": 5311 }, { "epoch": 0.8500560089614339, "grad_norm": 0.3643975257873535, "learning_rate": 5e-05, "loss": 1.4747, "step": 5312 }, { "epoch": 0.8502160345655305, "grad_norm": 0.3591623604297638, "learning_rate": 5e-05, "loss": 1.5117, "step": 5313 }, { "epoch": 0.8503760601696272, "grad_norm": 0.3537569046020508, "learning_rate": 5e-05, "loss": 1.4263, "step": 5314 }, { "epoch": 0.8505360857737237, "grad_norm": 0.3828037679195404, "learning_rate": 5e-05, "loss": 1.4643, "step": 5315 }, { "epoch": 0.8506961113778204, "grad_norm": 0.35802027583122253, "learning_rate": 5e-05, "loss": 1.4228, "step": 5316 }, { "epoch": 0.8508561369819171, "grad_norm": 0.36527204513549805, "learning_rate": 5e-05, "loss": 1.5249, "step": 5317 }, { "epoch": 0.8510161625860138, "grad_norm": 0.35363054275512695, "learning_rate": 5e-05, "loss": 1.4443, "step": 5318 }, { "epoch": 0.8511761881901104, "grad_norm": 0.36613988876342773, "learning_rate": 5e-05, "loss": 1.4138, "step": 5319 }, { "epoch": 0.8513362137942071, "grad_norm": 0.3717835247516632, "learning_rate": 5e-05, "loss": 1.492, "step": 5320 }, { "epoch": 0.8514962393983038, "grad_norm": 0.35215842723846436, "learning_rate": 5e-05, "loss": 1.4665, "step": 5321 }, { "epoch": 0.8516562650024004, "grad_norm": 0.37919342517852783, "learning_rate": 5e-05, "loss": 1.4992, "step": 5322 }, { "epoch": 0.851816290606497, "grad_norm": 0.3783343434333801, "learning_rate": 5e-05, "loss": 1.4649, "step": 5323 }, { "epoch": 0.8519763162105937, "grad_norm": 0.3549504280090332, "learning_rate": 5e-05, "loss": 1.4808, "step": 5324 }, { "epoch": 0.8521363418146903, "grad_norm": 0.3485763370990753, "learning_rate": 5e-05, "loss": 1.3409, "step": 5325 }, { "epoch": 0.852296367418787, "grad_norm": 0.36079835891723633, "learning_rate": 5e-05, "loss": 1.4307, "step": 5326 }, { "epoch": 0.8524563930228837, "grad_norm": 0.37406617403030396, "learning_rate": 5e-05, "loss": 1.5038, "step": 5327 }, { "epoch": 0.8526164186269803, "grad_norm": 0.36061322689056396, "learning_rate": 5e-05, "loss": 1.4627, "step": 5328 }, { "epoch": 0.852776444231077, "grad_norm": 0.33806952834129333, "learning_rate": 5e-05, "loss": 1.4385, "step": 5329 }, { "epoch": 0.8529364698351737, "grad_norm": 0.36124855279922485, "learning_rate": 5e-05, "loss": 1.4211, "step": 5330 }, { "epoch": 0.8530964954392702, "grad_norm": 0.3614404797554016, "learning_rate": 5e-05, "loss": 1.3437, "step": 5331 }, { "epoch": 0.8532565210433669, "grad_norm": 0.3594167232513428, "learning_rate": 5e-05, "loss": 1.4527, "step": 5332 }, { "epoch": 0.8534165466474636, "grad_norm": 0.37302467226982117, "learning_rate": 5e-05, "loss": 1.4738, "step": 5333 }, { "epoch": 0.8535765722515603, "grad_norm": 0.36489415168762207, "learning_rate": 5e-05, "loss": 1.5267, "step": 5334 }, { "epoch": 0.8537365978556569, "grad_norm": 0.3588161766529083, "learning_rate": 5e-05, "loss": 1.4132, "step": 5335 }, { "epoch": 0.8538966234597536, "grad_norm": 0.3671915531158447, "learning_rate": 5e-05, "loss": 1.4933, "step": 5336 }, { "epoch": 0.8540566490638503, "grad_norm": 0.3648146092891693, "learning_rate": 5e-05, "loss": 1.4777, "step": 5337 }, { "epoch": 0.8542166746679468, "grad_norm": 0.34985488653182983, "learning_rate": 5e-05, "loss": 1.4181, "step": 5338 }, { "epoch": 0.8543767002720435, "grad_norm": 0.362783282995224, "learning_rate": 5e-05, "loss": 1.4383, "step": 5339 }, { "epoch": 0.8545367258761402, "grad_norm": 0.3686161935329437, "learning_rate": 5e-05, "loss": 1.4188, "step": 5340 }, { "epoch": 0.8546967514802368, "grad_norm": 0.3590852916240692, "learning_rate": 5e-05, "loss": 1.4733, "step": 5341 }, { "epoch": 0.8548567770843335, "grad_norm": 0.3498419523239136, "learning_rate": 5e-05, "loss": 1.4255, "step": 5342 }, { "epoch": 0.8550168026884302, "grad_norm": 0.3488388657569885, "learning_rate": 5e-05, "loss": 1.3881, "step": 5343 }, { "epoch": 0.8551768282925268, "grad_norm": 0.347364217042923, "learning_rate": 5e-05, "loss": 1.3899, "step": 5344 }, { "epoch": 0.8553368538966235, "grad_norm": 0.35646796226501465, "learning_rate": 5e-05, "loss": 1.4713, "step": 5345 }, { "epoch": 0.8554968795007201, "grad_norm": 0.35807493329048157, "learning_rate": 5e-05, "loss": 1.4506, "step": 5346 }, { "epoch": 0.8556569051048167, "grad_norm": 0.36347678303718567, "learning_rate": 5e-05, "loss": 1.3883, "step": 5347 }, { "epoch": 0.8558169307089134, "grad_norm": 0.3484257459640503, "learning_rate": 5e-05, "loss": 1.4028, "step": 5348 }, { "epoch": 0.8559769563130101, "grad_norm": 0.3615444600582123, "learning_rate": 5e-05, "loss": 1.3962, "step": 5349 }, { "epoch": 0.8561369819171067, "grad_norm": 0.36732426285743713, "learning_rate": 5e-05, "loss": 1.4427, "step": 5350 }, { "epoch": 0.8562970075212034, "grad_norm": 0.3638388514518738, "learning_rate": 5e-05, "loss": 1.4385, "step": 5351 }, { "epoch": 0.8564570331253001, "grad_norm": 0.3729099929332733, "learning_rate": 5e-05, "loss": 1.491, "step": 5352 }, { "epoch": 0.8566170587293968, "grad_norm": 0.3556510806083679, "learning_rate": 5e-05, "loss": 1.4245, "step": 5353 }, { "epoch": 0.8567770843334933, "grad_norm": 0.36932483315467834, "learning_rate": 5e-05, "loss": 1.4475, "step": 5354 }, { "epoch": 0.85693710993759, "grad_norm": 0.36708104610443115, "learning_rate": 5e-05, "loss": 1.4509, "step": 5355 }, { "epoch": 0.8570971355416866, "grad_norm": 0.3498094379901886, "learning_rate": 5e-05, "loss": 1.4159, "step": 5356 }, { "epoch": 0.8572571611457833, "grad_norm": 0.3813495934009552, "learning_rate": 5e-05, "loss": 1.4625, "step": 5357 }, { "epoch": 0.85741718674988, "grad_norm": 0.3726329505443573, "learning_rate": 5e-05, "loss": 1.4969, "step": 5358 }, { "epoch": 0.8575772123539767, "grad_norm": 0.3720811605453491, "learning_rate": 5e-05, "loss": 1.5025, "step": 5359 }, { "epoch": 0.8577372379580733, "grad_norm": 0.36397162079811096, "learning_rate": 5e-05, "loss": 1.4296, "step": 5360 }, { "epoch": 0.85789726356217, "grad_norm": 0.34996917843818665, "learning_rate": 5e-05, "loss": 1.3899, "step": 5361 }, { "epoch": 0.8580572891662666, "grad_norm": 0.35997116565704346, "learning_rate": 5e-05, "loss": 1.4315, "step": 5362 }, { "epoch": 0.8582173147703632, "grad_norm": 0.37560608983039856, "learning_rate": 5e-05, "loss": 1.4738, "step": 5363 }, { "epoch": 0.8583773403744599, "grad_norm": 0.3667141795158386, "learning_rate": 5e-05, "loss": 1.5082, "step": 5364 }, { "epoch": 0.8585373659785566, "grad_norm": 0.3753615915775299, "learning_rate": 5e-05, "loss": 1.5086, "step": 5365 }, { "epoch": 0.8586973915826532, "grad_norm": 0.37170058488845825, "learning_rate": 5e-05, "loss": 1.4157, "step": 5366 }, { "epoch": 0.8588574171867499, "grad_norm": 0.3752920627593994, "learning_rate": 5e-05, "loss": 1.4222, "step": 5367 }, { "epoch": 0.8590174427908466, "grad_norm": 0.3565499484539032, "learning_rate": 5e-05, "loss": 1.3296, "step": 5368 }, { "epoch": 0.8591774683949432, "grad_norm": 0.36472174525260925, "learning_rate": 5e-05, "loss": 1.4083, "step": 5369 }, { "epoch": 0.8593374939990398, "grad_norm": 0.38231131434440613, "learning_rate": 5e-05, "loss": 1.5069, "step": 5370 }, { "epoch": 0.8594975196031365, "grad_norm": 0.35191819071769714, "learning_rate": 5e-05, "loss": 1.3776, "step": 5371 }, { "epoch": 0.8596575452072331, "grad_norm": 0.3710957467556, "learning_rate": 5e-05, "loss": 1.3816, "step": 5372 }, { "epoch": 0.8598175708113298, "grad_norm": 0.36498939990997314, "learning_rate": 5e-05, "loss": 1.4576, "step": 5373 }, { "epoch": 0.8599775964154265, "grad_norm": 0.365134596824646, "learning_rate": 5e-05, "loss": 1.3915, "step": 5374 }, { "epoch": 0.8601376220195232, "grad_norm": 0.371830552816391, "learning_rate": 5e-05, "loss": 1.4928, "step": 5375 }, { "epoch": 0.8602976476236198, "grad_norm": 0.3617999851703644, "learning_rate": 5e-05, "loss": 1.4898, "step": 5376 }, { "epoch": 0.8604576732277164, "grad_norm": 0.3610784709453583, "learning_rate": 5e-05, "loss": 1.4622, "step": 5377 }, { "epoch": 0.860617698831813, "grad_norm": 0.3749624490737915, "learning_rate": 5e-05, "loss": 1.4786, "step": 5378 }, { "epoch": 0.8607777244359097, "grad_norm": 0.3607847988605499, "learning_rate": 5e-05, "loss": 1.4827, "step": 5379 }, { "epoch": 0.8609377500400064, "grad_norm": 0.3714104890823364, "learning_rate": 5e-05, "loss": 1.441, "step": 5380 }, { "epoch": 0.8610977756441031, "grad_norm": 0.3714657723903656, "learning_rate": 5e-05, "loss": 1.417, "step": 5381 }, { "epoch": 0.8612578012481997, "grad_norm": 0.35627150535583496, "learning_rate": 5e-05, "loss": 1.4518, "step": 5382 }, { "epoch": 0.8614178268522964, "grad_norm": 0.36096832156181335, "learning_rate": 5e-05, "loss": 1.4438, "step": 5383 }, { "epoch": 0.8615778524563931, "grad_norm": 0.3708482086658478, "learning_rate": 5e-05, "loss": 1.5169, "step": 5384 }, { "epoch": 0.8617378780604896, "grad_norm": 0.35747185349464417, "learning_rate": 5e-05, "loss": 1.4422, "step": 5385 }, { "epoch": 0.8618979036645863, "grad_norm": 0.3663867115974426, "learning_rate": 5e-05, "loss": 1.4313, "step": 5386 }, { "epoch": 0.862057929268683, "grad_norm": 0.3505150377750397, "learning_rate": 5e-05, "loss": 1.4307, "step": 5387 }, { "epoch": 0.8622179548727796, "grad_norm": 0.363861083984375, "learning_rate": 5e-05, "loss": 1.4818, "step": 5388 }, { "epoch": 0.8623779804768763, "grad_norm": 0.3663770854473114, "learning_rate": 5e-05, "loss": 1.4857, "step": 5389 }, { "epoch": 0.862538006080973, "grad_norm": 0.380825936794281, "learning_rate": 5e-05, "loss": 1.5244, "step": 5390 }, { "epoch": 0.8626980316850696, "grad_norm": 0.3643808960914612, "learning_rate": 5e-05, "loss": 1.408, "step": 5391 }, { "epoch": 0.8628580572891663, "grad_norm": 0.3629303276538849, "learning_rate": 5e-05, "loss": 1.5137, "step": 5392 }, { "epoch": 0.8630180828932629, "grad_norm": 0.3693138659000397, "learning_rate": 5e-05, "loss": 1.4881, "step": 5393 }, { "epoch": 0.8631781084973595, "grad_norm": 0.35358870029449463, "learning_rate": 5e-05, "loss": 1.4164, "step": 5394 }, { "epoch": 0.8633381341014562, "grad_norm": 0.35682550072669983, "learning_rate": 5e-05, "loss": 1.3923, "step": 5395 }, { "epoch": 0.8634981597055529, "grad_norm": 0.3683011829853058, "learning_rate": 5e-05, "loss": 1.4738, "step": 5396 }, { "epoch": 0.8636581853096496, "grad_norm": 0.365278959274292, "learning_rate": 5e-05, "loss": 1.4389, "step": 5397 }, { "epoch": 0.8638182109137462, "grad_norm": 0.3656463623046875, "learning_rate": 5e-05, "loss": 1.3911, "step": 5398 }, { "epoch": 0.8639782365178429, "grad_norm": 0.35248908400535583, "learning_rate": 5e-05, "loss": 1.4864, "step": 5399 }, { "epoch": 0.8641382621219396, "grad_norm": 0.36161333322525024, "learning_rate": 5e-05, "loss": 1.4466, "step": 5400 }, { "epoch": 0.8642982877260361, "grad_norm": 0.3595872223377228, "learning_rate": 5e-05, "loss": 1.4258, "step": 5401 }, { "epoch": 0.8644583133301328, "grad_norm": 0.36432337760925293, "learning_rate": 5e-05, "loss": 1.4834, "step": 5402 }, { "epoch": 0.8646183389342295, "grad_norm": 0.3717685043811798, "learning_rate": 5e-05, "loss": 1.4031, "step": 5403 }, { "epoch": 0.8647783645383261, "grad_norm": 0.36459559202194214, "learning_rate": 5e-05, "loss": 1.5074, "step": 5404 }, { "epoch": 0.8649383901424228, "grad_norm": 0.351589173078537, "learning_rate": 5e-05, "loss": 1.4438, "step": 5405 }, { "epoch": 0.8650984157465195, "grad_norm": 0.35971787571907043, "learning_rate": 5e-05, "loss": 1.4367, "step": 5406 }, { "epoch": 0.8652584413506161, "grad_norm": 0.38947317004203796, "learning_rate": 5e-05, "loss": 1.4905, "step": 5407 }, { "epoch": 0.8654184669547128, "grad_norm": 0.3689476251602173, "learning_rate": 5e-05, "loss": 1.4816, "step": 5408 }, { "epoch": 0.8655784925588094, "grad_norm": 0.3570307791233063, "learning_rate": 5e-05, "loss": 1.4355, "step": 5409 }, { "epoch": 0.865738518162906, "grad_norm": 0.3641497492790222, "learning_rate": 5e-05, "loss": 1.4586, "step": 5410 }, { "epoch": 0.8658985437670027, "grad_norm": 0.36207109689712524, "learning_rate": 5e-05, "loss": 1.4419, "step": 5411 }, { "epoch": 0.8660585693710994, "grad_norm": 0.3629544675350189, "learning_rate": 5e-05, "loss": 1.3946, "step": 5412 }, { "epoch": 0.866218594975196, "grad_norm": 0.37062376737594604, "learning_rate": 5e-05, "loss": 1.4567, "step": 5413 }, { "epoch": 0.8663786205792927, "grad_norm": 0.3500845730304718, "learning_rate": 5e-05, "loss": 1.3922, "step": 5414 }, { "epoch": 0.8665386461833894, "grad_norm": 0.36329856514930725, "learning_rate": 5e-05, "loss": 1.4626, "step": 5415 }, { "epoch": 0.866698671787486, "grad_norm": 0.38090386986732483, "learning_rate": 5e-05, "loss": 1.4316, "step": 5416 }, { "epoch": 0.8668586973915826, "grad_norm": 0.36937031149864197, "learning_rate": 5e-05, "loss": 1.4872, "step": 5417 }, { "epoch": 0.8670187229956793, "grad_norm": 0.3660701513290405, "learning_rate": 5e-05, "loss": 1.4196, "step": 5418 }, { "epoch": 0.867178748599776, "grad_norm": 0.360623300075531, "learning_rate": 5e-05, "loss": 1.452, "step": 5419 }, { "epoch": 0.8673387742038726, "grad_norm": 0.36296793818473816, "learning_rate": 5e-05, "loss": 1.5054, "step": 5420 }, { "epoch": 0.8674987998079693, "grad_norm": 0.35801494121551514, "learning_rate": 5e-05, "loss": 1.3477, "step": 5421 }, { "epoch": 0.867658825412066, "grad_norm": 0.34825631976127625, "learning_rate": 5e-05, "loss": 1.3303, "step": 5422 }, { "epoch": 0.8678188510161626, "grad_norm": 0.34995439648628235, "learning_rate": 5e-05, "loss": 1.415, "step": 5423 }, { "epoch": 0.8679788766202592, "grad_norm": 0.36726903915405273, "learning_rate": 5e-05, "loss": 1.4398, "step": 5424 }, { "epoch": 0.8681389022243559, "grad_norm": 0.385963499546051, "learning_rate": 5e-05, "loss": 1.4212, "step": 5425 }, { "epoch": 0.8682989278284525, "grad_norm": 0.3879255950450897, "learning_rate": 5e-05, "loss": 1.4507, "step": 5426 }, { "epoch": 0.8684589534325492, "grad_norm": 0.37203365564346313, "learning_rate": 5e-05, "loss": 1.3704, "step": 5427 }, { "epoch": 0.8686189790366459, "grad_norm": 0.36594414710998535, "learning_rate": 5e-05, "loss": 1.4749, "step": 5428 }, { "epoch": 0.8687790046407425, "grad_norm": 0.3579334616661072, "learning_rate": 5e-05, "loss": 1.4195, "step": 5429 }, { "epoch": 0.8689390302448392, "grad_norm": 0.35981303453445435, "learning_rate": 5e-05, "loss": 1.4271, "step": 5430 }, { "epoch": 0.8690990558489359, "grad_norm": 0.3814661502838135, "learning_rate": 5e-05, "loss": 1.4873, "step": 5431 }, { "epoch": 0.8692590814530324, "grad_norm": 0.3487297594547272, "learning_rate": 5e-05, "loss": 1.4461, "step": 5432 }, { "epoch": 0.8694191070571291, "grad_norm": 0.37123537063598633, "learning_rate": 5e-05, "loss": 1.4106, "step": 5433 }, { "epoch": 0.8695791326612258, "grad_norm": 0.3675425350666046, "learning_rate": 5e-05, "loss": 1.4846, "step": 5434 }, { "epoch": 0.8697391582653224, "grad_norm": 0.37528425455093384, "learning_rate": 5e-05, "loss": 1.4925, "step": 5435 }, { "epoch": 0.8698991838694191, "grad_norm": 0.3752754330635071, "learning_rate": 5e-05, "loss": 1.4926, "step": 5436 }, { "epoch": 0.8700592094735158, "grad_norm": 0.38778746128082275, "learning_rate": 5e-05, "loss": 1.5148, "step": 5437 }, { "epoch": 0.8702192350776125, "grad_norm": 0.3785780370235443, "learning_rate": 5e-05, "loss": 1.4024, "step": 5438 }, { "epoch": 0.8703792606817091, "grad_norm": 0.37157195806503296, "learning_rate": 5e-05, "loss": 1.4549, "step": 5439 }, { "epoch": 0.8705392862858057, "grad_norm": 0.3859073519706726, "learning_rate": 5e-05, "loss": 1.5173, "step": 5440 }, { "epoch": 0.8706993118899023, "grad_norm": 0.3754563629627228, "learning_rate": 5e-05, "loss": 1.5222, "step": 5441 }, { "epoch": 0.870859337493999, "grad_norm": 0.36296209692955017, "learning_rate": 5e-05, "loss": 1.4206, "step": 5442 }, { "epoch": 0.8710193630980957, "grad_norm": 0.3497907519340515, "learning_rate": 5e-05, "loss": 1.3933, "step": 5443 }, { "epoch": 0.8711793887021924, "grad_norm": 0.38572925329208374, "learning_rate": 5e-05, "loss": 1.4868, "step": 5444 }, { "epoch": 0.871339414306289, "grad_norm": 0.3682347536087036, "learning_rate": 5e-05, "loss": 1.4733, "step": 5445 }, { "epoch": 0.8714994399103857, "grad_norm": 0.3525097966194153, "learning_rate": 5e-05, "loss": 1.4482, "step": 5446 }, { "epoch": 0.8716594655144824, "grad_norm": 0.3881663680076599, "learning_rate": 5e-05, "loss": 1.4519, "step": 5447 }, { "epoch": 0.8718194911185789, "grad_norm": 0.37799328565597534, "learning_rate": 5e-05, "loss": 1.4666, "step": 5448 }, { "epoch": 0.8719795167226756, "grad_norm": 0.35877570509910583, "learning_rate": 5e-05, "loss": 1.5163, "step": 5449 }, { "epoch": 0.8721395423267723, "grad_norm": 0.3684101104736328, "learning_rate": 5e-05, "loss": 1.4447, "step": 5450 }, { "epoch": 0.8722995679308689, "grad_norm": 0.3599943220615387, "learning_rate": 5e-05, "loss": 1.4619, "step": 5451 }, { "epoch": 0.8724595935349656, "grad_norm": 0.35005804896354675, "learning_rate": 5e-05, "loss": 1.3582, "step": 5452 }, { "epoch": 0.8726196191390623, "grad_norm": 0.37475934624671936, "learning_rate": 5e-05, "loss": 1.4876, "step": 5453 }, { "epoch": 0.8727796447431589, "grad_norm": 0.3526343107223511, "learning_rate": 5e-05, "loss": 1.456, "step": 5454 }, { "epoch": 0.8729396703472556, "grad_norm": 0.35865694284439087, "learning_rate": 5e-05, "loss": 1.4176, "step": 5455 }, { "epoch": 0.8730996959513522, "grad_norm": 0.37823739647865295, "learning_rate": 5e-05, "loss": 1.453, "step": 5456 }, { "epoch": 0.8732597215554488, "grad_norm": 0.3658694326877594, "learning_rate": 5e-05, "loss": 1.4844, "step": 5457 }, { "epoch": 0.8734197471595455, "grad_norm": 0.355167418718338, "learning_rate": 5e-05, "loss": 1.4762, "step": 5458 }, { "epoch": 0.8735797727636422, "grad_norm": 0.35460159182548523, "learning_rate": 5e-05, "loss": 1.4174, "step": 5459 }, { "epoch": 0.8737397983677389, "grad_norm": 0.37118813395500183, "learning_rate": 5e-05, "loss": 1.4318, "step": 5460 }, { "epoch": 0.8738998239718355, "grad_norm": 0.3724638819694519, "learning_rate": 5e-05, "loss": 1.4951, "step": 5461 }, { "epoch": 0.8740598495759322, "grad_norm": 0.3903367221355438, "learning_rate": 5e-05, "loss": 1.4603, "step": 5462 }, { "epoch": 0.8742198751800287, "grad_norm": 0.3648114800453186, "learning_rate": 5e-05, "loss": 1.4783, "step": 5463 }, { "epoch": 0.8743799007841254, "grad_norm": 0.37189579010009766, "learning_rate": 5e-05, "loss": 1.4299, "step": 5464 }, { "epoch": 0.8745399263882221, "grad_norm": 0.3744593560695648, "learning_rate": 5e-05, "loss": 1.4795, "step": 5465 }, { "epoch": 0.8746999519923188, "grad_norm": 0.363701730966568, "learning_rate": 5e-05, "loss": 1.5235, "step": 5466 }, { "epoch": 0.8748599775964154, "grad_norm": 0.3670175075531006, "learning_rate": 5e-05, "loss": 1.4707, "step": 5467 }, { "epoch": 0.8750200032005121, "grad_norm": 0.3802044987678528, "learning_rate": 5e-05, "loss": 1.4618, "step": 5468 }, { "epoch": 0.8751800288046088, "grad_norm": 0.3632291853427887, "learning_rate": 5e-05, "loss": 1.4435, "step": 5469 }, { "epoch": 0.8753400544087054, "grad_norm": 0.37887436151504517, "learning_rate": 5e-05, "loss": 1.4419, "step": 5470 }, { "epoch": 0.875500080012802, "grad_norm": 0.3690086603164673, "learning_rate": 5e-05, "loss": 1.4251, "step": 5471 }, { "epoch": 0.8756601056168987, "grad_norm": 0.34979528188705444, "learning_rate": 5e-05, "loss": 1.4355, "step": 5472 }, { "epoch": 0.8758201312209953, "grad_norm": 0.3747919797897339, "learning_rate": 5e-05, "loss": 1.5282, "step": 5473 }, { "epoch": 0.875980156825092, "grad_norm": 0.37487491965293884, "learning_rate": 5e-05, "loss": 1.5014, "step": 5474 }, { "epoch": 0.8761401824291887, "grad_norm": 0.3906959593296051, "learning_rate": 5e-05, "loss": 1.5808, "step": 5475 }, { "epoch": 0.8763002080332853, "grad_norm": 0.3620370328426361, "learning_rate": 5e-05, "loss": 1.4845, "step": 5476 }, { "epoch": 0.876460233637382, "grad_norm": 0.35891109704971313, "learning_rate": 5e-05, "loss": 1.3998, "step": 5477 }, { "epoch": 0.8766202592414787, "grad_norm": 0.37405553460121155, "learning_rate": 5e-05, "loss": 1.5306, "step": 5478 }, { "epoch": 0.8767802848455752, "grad_norm": 0.3507932424545288, "learning_rate": 5e-05, "loss": 1.4399, "step": 5479 }, { "epoch": 0.8769403104496719, "grad_norm": 0.35850989818573, "learning_rate": 5e-05, "loss": 1.4315, "step": 5480 }, { "epoch": 0.8771003360537686, "grad_norm": 0.3612746596336365, "learning_rate": 5e-05, "loss": 1.4069, "step": 5481 }, { "epoch": 0.8772603616578653, "grad_norm": 0.3649585545063019, "learning_rate": 5e-05, "loss": 1.4881, "step": 5482 }, { "epoch": 0.8774203872619619, "grad_norm": 0.3779907822608948, "learning_rate": 5e-05, "loss": 1.4924, "step": 5483 }, { "epoch": 0.8775804128660586, "grad_norm": 0.3549347221851349, "learning_rate": 5e-05, "loss": 1.3715, "step": 5484 }, { "epoch": 0.8777404384701553, "grad_norm": 0.37235090136528015, "learning_rate": 5e-05, "loss": 1.4484, "step": 5485 }, { "epoch": 0.8779004640742519, "grad_norm": 0.36804521083831787, "learning_rate": 5e-05, "loss": 1.4374, "step": 5486 }, { "epoch": 0.8780604896783485, "grad_norm": 0.3674398362636566, "learning_rate": 5e-05, "loss": 1.4617, "step": 5487 }, { "epoch": 0.8782205152824452, "grad_norm": 0.37930434942245483, "learning_rate": 5e-05, "loss": 1.445, "step": 5488 }, { "epoch": 0.8783805408865418, "grad_norm": 0.3746415674686432, "learning_rate": 5e-05, "loss": 1.4716, "step": 5489 }, { "epoch": 0.8785405664906385, "grad_norm": 0.3723522424697876, "learning_rate": 5e-05, "loss": 1.4557, "step": 5490 }, { "epoch": 0.8787005920947352, "grad_norm": 0.3812197148799896, "learning_rate": 5e-05, "loss": 1.5476, "step": 5491 }, { "epoch": 0.8788606176988318, "grad_norm": 0.3785598576068878, "learning_rate": 5e-05, "loss": 1.4498, "step": 5492 }, { "epoch": 0.8790206433029285, "grad_norm": 0.37774133682250977, "learning_rate": 5e-05, "loss": 1.4679, "step": 5493 }, { "epoch": 0.8791806689070252, "grad_norm": 0.36786848306655884, "learning_rate": 5e-05, "loss": 1.3989, "step": 5494 }, { "epoch": 0.8793406945111217, "grad_norm": 0.3632245659828186, "learning_rate": 5e-05, "loss": 1.4679, "step": 5495 }, { "epoch": 0.8795007201152184, "grad_norm": 0.3523835837841034, "learning_rate": 5e-05, "loss": 1.4431, "step": 5496 }, { "epoch": 0.8796607457193151, "grad_norm": 0.3710480034351349, "learning_rate": 5e-05, "loss": 1.4786, "step": 5497 }, { "epoch": 0.8798207713234117, "grad_norm": 0.3600468635559082, "learning_rate": 5e-05, "loss": 1.4603, "step": 5498 }, { "epoch": 0.8799807969275084, "grad_norm": 0.356418251991272, "learning_rate": 5e-05, "loss": 1.4261, "step": 5499 }, { "epoch": 0.8801408225316051, "grad_norm": 0.39083701372146606, "learning_rate": 5e-05, "loss": 1.505, "step": 5500 }, { "epoch": 0.8803008481357018, "grad_norm": 0.35169678926467896, "learning_rate": 5e-05, "loss": 1.4047, "step": 5501 }, { "epoch": 0.8804608737397984, "grad_norm": 0.3602787256240845, "learning_rate": 5e-05, "loss": 1.453, "step": 5502 }, { "epoch": 0.880620899343895, "grad_norm": 0.3645155131816864, "learning_rate": 5e-05, "loss": 1.4447, "step": 5503 }, { "epoch": 0.8807809249479917, "grad_norm": 0.3860269784927368, "learning_rate": 5e-05, "loss": 1.4006, "step": 5504 }, { "epoch": 0.8809409505520883, "grad_norm": 0.3778201639652252, "learning_rate": 5e-05, "loss": 1.5722, "step": 5505 }, { "epoch": 0.881100976156185, "grad_norm": 0.3643614947795868, "learning_rate": 5e-05, "loss": 1.4235, "step": 5506 }, { "epoch": 0.8812610017602817, "grad_norm": 0.3609601855278015, "learning_rate": 5e-05, "loss": 1.4097, "step": 5507 }, { "epoch": 0.8814210273643783, "grad_norm": 0.3668358027935028, "learning_rate": 5e-05, "loss": 1.4183, "step": 5508 }, { "epoch": 0.881581052968475, "grad_norm": 0.3568459451198578, "learning_rate": 5e-05, "loss": 1.4164, "step": 5509 }, { "epoch": 0.8817410785725716, "grad_norm": 0.36220017075538635, "learning_rate": 5e-05, "loss": 1.4102, "step": 5510 }, { "epoch": 0.8819011041766682, "grad_norm": 0.3718683421611786, "learning_rate": 5e-05, "loss": 1.4927, "step": 5511 }, { "epoch": 0.8820611297807649, "grad_norm": 0.37051713466644287, "learning_rate": 5e-05, "loss": 1.5173, "step": 5512 }, { "epoch": 0.8822211553848616, "grad_norm": 0.36336347460746765, "learning_rate": 5e-05, "loss": 1.4374, "step": 5513 }, { "epoch": 0.8823811809889582, "grad_norm": 0.359122633934021, "learning_rate": 5e-05, "loss": 1.4354, "step": 5514 }, { "epoch": 0.8825412065930549, "grad_norm": 0.3720283508300781, "learning_rate": 5e-05, "loss": 1.4292, "step": 5515 }, { "epoch": 0.8827012321971516, "grad_norm": 0.3757420480251312, "learning_rate": 5e-05, "loss": 1.5402, "step": 5516 }, { "epoch": 0.8828612578012482, "grad_norm": 0.3644154965877533, "learning_rate": 5e-05, "loss": 1.4801, "step": 5517 }, { "epoch": 0.8830212834053448, "grad_norm": 0.35491475462913513, "learning_rate": 5e-05, "loss": 1.3552, "step": 5518 }, { "epoch": 0.8831813090094415, "grad_norm": 0.36735811829566956, "learning_rate": 5e-05, "loss": 1.4725, "step": 5519 }, { "epoch": 0.8833413346135381, "grad_norm": 0.36226344108581543, "learning_rate": 5e-05, "loss": 1.4863, "step": 5520 }, { "epoch": 0.8835013602176348, "grad_norm": 0.35378381609916687, "learning_rate": 5e-05, "loss": 1.3832, "step": 5521 }, { "epoch": 0.8836613858217315, "grad_norm": 0.361770898103714, "learning_rate": 5e-05, "loss": 1.4741, "step": 5522 }, { "epoch": 0.8838214114258282, "grad_norm": 0.36446383595466614, "learning_rate": 5e-05, "loss": 1.5416, "step": 5523 }, { "epoch": 0.8839814370299248, "grad_norm": 0.3580022156238556, "learning_rate": 5e-05, "loss": 1.4475, "step": 5524 }, { "epoch": 0.8841414626340215, "grad_norm": 0.37371453642845154, "learning_rate": 5e-05, "loss": 1.4696, "step": 5525 }, { "epoch": 0.884301488238118, "grad_norm": 0.371502161026001, "learning_rate": 5e-05, "loss": 1.4643, "step": 5526 }, { "epoch": 0.8844615138422147, "grad_norm": 0.3744927644729614, "learning_rate": 5e-05, "loss": 1.4976, "step": 5527 }, { "epoch": 0.8846215394463114, "grad_norm": 0.35214856266975403, "learning_rate": 5e-05, "loss": 1.458, "step": 5528 }, { "epoch": 0.8847815650504081, "grad_norm": 0.35957130789756775, "learning_rate": 5e-05, "loss": 1.3712, "step": 5529 }, { "epoch": 0.8849415906545047, "grad_norm": 0.35225602984428406, "learning_rate": 5e-05, "loss": 1.4296, "step": 5530 }, { "epoch": 0.8851016162586014, "grad_norm": 0.3622243404388428, "learning_rate": 5e-05, "loss": 1.4264, "step": 5531 }, { "epoch": 0.8852616418626981, "grad_norm": 0.36533528566360474, "learning_rate": 5e-05, "loss": 1.405, "step": 5532 }, { "epoch": 0.8854216674667947, "grad_norm": 0.37555453181266785, "learning_rate": 5e-05, "loss": 1.4734, "step": 5533 }, { "epoch": 0.8855816930708913, "grad_norm": 0.3641352653503418, "learning_rate": 5e-05, "loss": 1.3907, "step": 5534 }, { "epoch": 0.885741718674988, "grad_norm": 0.3681337535381317, "learning_rate": 5e-05, "loss": 1.4284, "step": 5535 }, { "epoch": 0.8859017442790846, "grad_norm": 0.36171793937683105, "learning_rate": 5e-05, "loss": 1.4655, "step": 5536 }, { "epoch": 0.8860617698831813, "grad_norm": 0.35104840993881226, "learning_rate": 5e-05, "loss": 1.4198, "step": 5537 }, { "epoch": 0.886221795487278, "grad_norm": 0.37125536799430847, "learning_rate": 5e-05, "loss": 1.4773, "step": 5538 }, { "epoch": 0.8863818210913746, "grad_norm": 0.3701370656490326, "learning_rate": 5e-05, "loss": 1.4687, "step": 5539 }, { "epoch": 0.8865418466954713, "grad_norm": 0.3570254147052765, "learning_rate": 5e-05, "loss": 1.4698, "step": 5540 }, { "epoch": 0.886701872299568, "grad_norm": 0.37418919801712036, "learning_rate": 5e-05, "loss": 1.4663, "step": 5541 }, { "epoch": 0.8868618979036645, "grad_norm": 0.35489651560783386, "learning_rate": 5e-05, "loss": 1.436, "step": 5542 }, { "epoch": 0.8870219235077612, "grad_norm": 0.35494813323020935, "learning_rate": 5e-05, "loss": 1.4558, "step": 5543 }, { "epoch": 0.8871819491118579, "grad_norm": 0.36279788613319397, "learning_rate": 5e-05, "loss": 1.3986, "step": 5544 }, { "epoch": 0.8873419747159546, "grad_norm": 0.36046457290649414, "learning_rate": 5e-05, "loss": 1.4205, "step": 5545 }, { "epoch": 0.8875020003200512, "grad_norm": 0.36607831716537476, "learning_rate": 5e-05, "loss": 1.4822, "step": 5546 }, { "epoch": 0.8876620259241479, "grad_norm": 0.377071350812912, "learning_rate": 5e-05, "loss": 1.4459, "step": 5547 }, { "epoch": 0.8878220515282446, "grad_norm": 0.35451918840408325, "learning_rate": 5e-05, "loss": 1.3832, "step": 5548 }, { "epoch": 0.8879820771323412, "grad_norm": 0.36378487944602966, "learning_rate": 5e-05, "loss": 1.4508, "step": 5549 }, { "epoch": 0.8881421027364378, "grad_norm": 0.3657391667366028, "learning_rate": 5e-05, "loss": 1.4439, "step": 5550 }, { "epoch": 0.8883021283405345, "grad_norm": 0.36289092898368835, "learning_rate": 5e-05, "loss": 1.4514, "step": 5551 }, { "epoch": 0.8884621539446311, "grad_norm": 0.37647107243537903, "learning_rate": 5e-05, "loss": 1.468, "step": 5552 }, { "epoch": 0.8886221795487278, "grad_norm": 0.3628205955028534, "learning_rate": 5e-05, "loss": 1.3846, "step": 5553 }, { "epoch": 0.8887822051528245, "grad_norm": 0.3531988859176636, "learning_rate": 5e-05, "loss": 1.4096, "step": 5554 }, { "epoch": 0.8889422307569211, "grad_norm": 0.36164960265159607, "learning_rate": 5e-05, "loss": 1.4336, "step": 5555 }, { "epoch": 0.8891022563610178, "grad_norm": 0.362061083316803, "learning_rate": 5e-05, "loss": 1.4292, "step": 5556 }, { "epoch": 0.8892622819651144, "grad_norm": 0.361611545085907, "learning_rate": 5e-05, "loss": 1.3746, "step": 5557 }, { "epoch": 0.889422307569211, "grad_norm": 0.38692858815193176, "learning_rate": 5e-05, "loss": 1.4632, "step": 5558 }, { "epoch": 0.8895823331733077, "grad_norm": 0.37850359082221985, "learning_rate": 5e-05, "loss": 1.4345, "step": 5559 }, { "epoch": 0.8897423587774044, "grad_norm": 0.38663649559020996, "learning_rate": 5e-05, "loss": 1.5228, "step": 5560 }, { "epoch": 0.889902384381501, "grad_norm": 0.38772863149642944, "learning_rate": 5e-05, "loss": 1.4651, "step": 5561 }, { "epoch": 0.8900624099855977, "grad_norm": 0.3750216066837311, "learning_rate": 5e-05, "loss": 1.496, "step": 5562 }, { "epoch": 0.8902224355896944, "grad_norm": 0.35943087935447693, "learning_rate": 5e-05, "loss": 1.4226, "step": 5563 }, { "epoch": 0.890382461193791, "grad_norm": 0.37144598364830017, "learning_rate": 5e-05, "loss": 1.4622, "step": 5564 }, { "epoch": 0.8905424867978876, "grad_norm": 0.37020787596702576, "learning_rate": 5e-05, "loss": 1.4068, "step": 5565 }, { "epoch": 0.8907025124019843, "grad_norm": 0.369259774684906, "learning_rate": 5e-05, "loss": 1.4384, "step": 5566 }, { "epoch": 0.890862538006081, "grad_norm": 0.3647801876068115, "learning_rate": 5e-05, "loss": 1.4702, "step": 5567 }, { "epoch": 0.8910225636101776, "grad_norm": 0.3740254044532776, "learning_rate": 5e-05, "loss": 1.4413, "step": 5568 }, { "epoch": 0.8911825892142743, "grad_norm": 0.38439133763313293, "learning_rate": 5e-05, "loss": 1.3724, "step": 5569 }, { "epoch": 0.891342614818371, "grad_norm": 0.378316193819046, "learning_rate": 5e-05, "loss": 1.4339, "step": 5570 }, { "epoch": 0.8915026404224676, "grad_norm": 0.3949359655380249, "learning_rate": 5e-05, "loss": 1.4347, "step": 5571 }, { "epoch": 0.8916626660265643, "grad_norm": 0.37877899408340454, "learning_rate": 5e-05, "loss": 1.4454, "step": 5572 }, { "epoch": 0.8918226916306609, "grad_norm": 0.3642374277114868, "learning_rate": 5e-05, "loss": 1.4131, "step": 5573 }, { "epoch": 0.8919827172347575, "grad_norm": 0.37167176604270935, "learning_rate": 5e-05, "loss": 1.4463, "step": 5574 }, { "epoch": 0.8921427428388542, "grad_norm": 0.3711163103580475, "learning_rate": 5e-05, "loss": 1.5219, "step": 5575 }, { "epoch": 0.8923027684429509, "grad_norm": 0.3554505407810211, "learning_rate": 5e-05, "loss": 1.5009, "step": 5576 }, { "epoch": 0.8924627940470475, "grad_norm": 0.3703921139240265, "learning_rate": 5e-05, "loss": 1.427, "step": 5577 }, { "epoch": 0.8926228196511442, "grad_norm": 0.35570028424263, "learning_rate": 5e-05, "loss": 1.4246, "step": 5578 }, { "epoch": 0.8927828452552409, "grad_norm": 0.388920396566391, "learning_rate": 5e-05, "loss": 1.4867, "step": 5579 }, { "epoch": 0.8929428708593375, "grad_norm": 0.34813597798347473, "learning_rate": 5e-05, "loss": 1.3291, "step": 5580 }, { "epoch": 0.8931028964634341, "grad_norm": 0.3603309392929077, "learning_rate": 5e-05, "loss": 1.4573, "step": 5581 }, { "epoch": 0.8932629220675308, "grad_norm": 0.35889413952827454, "learning_rate": 5e-05, "loss": 1.4584, "step": 5582 }, { "epoch": 0.8934229476716274, "grad_norm": 0.3679791986942291, "learning_rate": 5e-05, "loss": 1.4409, "step": 5583 }, { "epoch": 0.8935829732757241, "grad_norm": 0.36947324872016907, "learning_rate": 5e-05, "loss": 1.4536, "step": 5584 }, { "epoch": 0.8937429988798208, "grad_norm": 0.3630794584751129, "learning_rate": 5e-05, "loss": 1.4668, "step": 5585 }, { "epoch": 0.8939030244839175, "grad_norm": 0.3575204312801361, "learning_rate": 5e-05, "loss": 1.4312, "step": 5586 }, { "epoch": 0.8940630500880141, "grad_norm": 0.3653874695301056, "learning_rate": 5e-05, "loss": 1.4115, "step": 5587 }, { "epoch": 0.8942230756921108, "grad_norm": 0.3632062077522278, "learning_rate": 5e-05, "loss": 1.4546, "step": 5588 }, { "epoch": 0.8943831012962074, "grad_norm": 0.35567352175712585, "learning_rate": 5e-05, "loss": 1.4131, "step": 5589 }, { "epoch": 0.894543126900304, "grad_norm": 0.3855528235435486, "learning_rate": 5e-05, "loss": 1.4824, "step": 5590 }, { "epoch": 0.8947031525044007, "grad_norm": 0.36868157982826233, "learning_rate": 5e-05, "loss": 1.4547, "step": 5591 }, { "epoch": 0.8948631781084974, "grad_norm": 0.3677530884742737, "learning_rate": 5e-05, "loss": 1.4634, "step": 5592 }, { "epoch": 0.895023203712594, "grad_norm": 0.3751741349697113, "learning_rate": 5e-05, "loss": 1.4455, "step": 5593 }, { "epoch": 0.8951832293166907, "grad_norm": 0.36660531163215637, "learning_rate": 5e-05, "loss": 1.4223, "step": 5594 }, { "epoch": 0.8953432549207874, "grad_norm": 0.367941677570343, "learning_rate": 5e-05, "loss": 1.5078, "step": 5595 }, { "epoch": 0.8955032805248839, "grad_norm": 0.34723523259162903, "learning_rate": 5e-05, "loss": 1.4086, "step": 5596 }, { "epoch": 0.8956633061289806, "grad_norm": 0.35862258076667786, "learning_rate": 5e-05, "loss": 1.431, "step": 5597 }, { "epoch": 0.8958233317330773, "grad_norm": 0.366621196269989, "learning_rate": 5e-05, "loss": 1.4207, "step": 5598 }, { "epoch": 0.8959833573371739, "grad_norm": 0.3657248616218567, "learning_rate": 5e-05, "loss": 1.3953, "step": 5599 }, { "epoch": 0.8961433829412706, "grad_norm": 0.37775087356567383, "learning_rate": 5e-05, "loss": 1.5404, "step": 5600 }, { "epoch": 0.8963034085453673, "grad_norm": 0.3657645583152771, "learning_rate": 5e-05, "loss": 1.505, "step": 5601 }, { "epoch": 0.896463434149464, "grad_norm": 0.35897552967071533, "learning_rate": 5e-05, "loss": 1.472, "step": 5602 }, { "epoch": 0.8966234597535606, "grad_norm": 0.34784644842147827, "learning_rate": 5e-05, "loss": 1.4498, "step": 5603 }, { "epoch": 0.8967834853576572, "grad_norm": 0.3618248701095581, "learning_rate": 5e-05, "loss": 1.3599, "step": 5604 }, { "epoch": 0.8969435109617538, "grad_norm": 0.3719502091407776, "learning_rate": 5e-05, "loss": 1.4402, "step": 5605 }, { "epoch": 0.8971035365658505, "grad_norm": 0.37745144963264465, "learning_rate": 5e-05, "loss": 1.478, "step": 5606 }, { "epoch": 0.8972635621699472, "grad_norm": 0.38432231545448303, "learning_rate": 5e-05, "loss": 1.4487, "step": 5607 }, { "epoch": 0.8974235877740439, "grad_norm": 0.37823113799095154, "learning_rate": 5e-05, "loss": 1.452, "step": 5608 }, { "epoch": 0.8975836133781405, "grad_norm": 0.34320059418678284, "learning_rate": 5e-05, "loss": 1.3692, "step": 5609 }, { "epoch": 0.8977436389822372, "grad_norm": 0.3667503893375397, "learning_rate": 5e-05, "loss": 1.3976, "step": 5610 }, { "epoch": 0.8979036645863339, "grad_norm": 0.3669922351837158, "learning_rate": 5e-05, "loss": 1.4751, "step": 5611 }, { "epoch": 0.8980636901904304, "grad_norm": 0.37583616375923157, "learning_rate": 5e-05, "loss": 1.4557, "step": 5612 }, { "epoch": 0.8982237157945271, "grad_norm": 0.3622366189956665, "learning_rate": 5e-05, "loss": 1.4842, "step": 5613 }, { "epoch": 0.8983837413986238, "grad_norm": 0.3729687035083771, "learning_rate": 5e-05, "loss": 1.4797, "step": 5614 }, { "epoch": 0.8985437670027204, "grad_norm": 0.3894135057926178, "learning_rate": 5e-05, "loss": 1.4834, "step": 5615 }, { "epoch": 0.8987037926068171, "grad_norm": 0.3792852759361267, "learning_rate": 5e-05, "loss": 1.4598, "step": 5616 }, { "epoch": 0.8988638182109138, "grad_norm": 0.3694400489330292, "learning_rate": 5e-05, "loss": 1.4616, "step": 5617 }, { "epoch": 0.8990238438150104, "grad_norm": 0.3632679879665375, "learning_rate": 5e-05, "loss": 1.4978, "step": 5618 }, { "epoch": 0.8991838694191071, "grad_norm": 0.38021379709243774, "learning_rate": 5e-05, "loss": 1.4221, "step": 5619 }, { "epoch": 0.8993438950232037, "grad_norm": 0.39040881395339966, "learning_rate": 5e-05, "loss": 1.3586, "step": 5620 }, { "epoch": 0.8995039206273003, "grad_norm": 0.37264418601989746, "learning_rate": 5e-05, "loss": 1.3965, "step": 5621 }, { "epoch": 0.899663946231397, "grad_norm": 0.3685149550437927, "learning_rate": 5e-05, "loss": 1.4402, "step": 5622 }, { "epoch": 0.8998239718354937, "grad_norm": 0.3652861416339874, "learning_rate": 5e-05, "loss": 1.4147, "step": 5623 }, { "epoch": 0.8999839974395903, "grad_norm": 0.37393778562545776, "learning_rate": 5e-05, "loss": 1.4701, "step": 5624 }, { "epoch": 0.900144023043687, "grad_norm": 0.36308303475379944, "learning_rate": 5e-05, "loss": 1.41, "step": 5625 }, { "epoch": 0.9003040486477837, "grad_norm": 0.3684718906879425, "learning_rate": 5e-05, "loss": 1.4582, "step": 5626 }, { "epoch": 0.9004640742518804, "grad_norm": 0.36339321732521057, "learning_rate": 5e-05, "loss": 1.4717, "step": 5627 }, { "epoch": 0.9006240998559769, "grad_norm": 0.3556152284145355, "learning_rate": 5e-05, "loss": 1.4763, "step": 5628 }, { "epoch": 0.9007841254600736, "grad_norm": 0.3632141053676605, "learning_rate": 5e-05, "loss": 1.4126, "step": 5629 }, { "epoch": 0.9009441510641703, "grad_norm": 0.3743639886379242, "learning_rate": 5e-05, "loss": 1.4039, "step": 5630 }, { "epoch": 0.9011041766682669, "grad_norm": 0.3619968891143799, "learning_rate": 5e-05, "loss": 1.3873, "step": 5631 }, { "epoch": 0.9012642022723636, "grad_norm": 0.38414207100868225, "learning_rate": 5e-05, "loss": 1.5407, "step": 5632 }, { "epoch": 0.9014242278764603, "grad_norm": 0.3830493986606598, "learning_rate": 5e-05, "loss": 1.4906, "step": 5633 }, { "epoch": 0.9015842534805569, "grad_norm": 0.3691459000110626, "learning_rate": 5e-05, "loss": 1.3674, "step": 5634 }, { "epoch": 0.9017442790846536, "grad_norm": 0.34529224038124084, "learning_rate": 5e-05, "loss": 1.3559, "step": 5635 }, { "epoch": 0.9019043046887502, "grad_norm": 0.3663610816001892, "learning_rate": 5e-05, "loss": 1.4515, "step": 5636 }, { "epoch": 0.9020643302928468, "grad_norm": 0.37696337699890137, "learning_rate": 5e-05, "loss": 1.5336, "step": 5637 }, { "epoch": 0.9022243558969435, "grad_norm": 0.35528379678726196, "learning_rate": 5e-05, "loss": 1.3566, "step": 5638 }, { "epoch": 0.9023843815010402, "grad_norm": 0.3794311285018921, "learning_rate": 5e-05, "loss": 1.5387, "step": 5639 }, { "epoch": 0.9025444071051368, "grad_norm": 0.35959315299987793, "learning_rate": 5e-05, "loss": 1.4054, "step": 5640 }, { "epoch": 0.9027044327092335, "grad_norm": 0.36242911219596863, "learning_rate": 5e-05, "loss": 1.4913, "step": 5641 }, { "epoch": 0.9028644583133302, "grad_norm": 0.36538606882095337, "learning_rate": 5e-05, "loss": 1.5216, "step": 5642 }, { "epoch": 0.9030244839174267, "grad_norm": 0.3597790598869324, "learning_rate": 5e-05, "loss": 1.4747, "step": 5643 }, { "epoch": 0.9031845095215234, "grad_norm": 0.3586113452911377, "learning_rate": 5e-05, "loss": 1.4081, "step": 5644 }, { "epoch": 0.9033445351256201, "grad_norm": 0.3809299170970917, "learning_rate": 5e-05, "loss": 1.4743, "step": 5645 }, { "epoch": 0.9035045607297167, "grad_norm": 0.3883845806121826, "learning_rate": 5e-05, "loss": 1.5261, "step": 5646 }, { "epoch": 0.9036645863338134, "grad_norm": 0.35491305589675903, "learning_rate": 5e-05, "loss": 1.429, "step": 5647 }, { "epoch": 0.9038246119379101, "grad_norm": 0.35570067167282104, "learning_rate": 5e-05, "loss": 1.3252, "step": 5648 }, { "epoch": 0.9039846375420068, "grad_norm": 0.37430518865585327, "learning_rate": 5e-05, "loss": 1.4741, "step": 5649 }, { "epoch": 0.9041446631461034, "grad_norm": 0.37150299549102783, "learning_rate": 5e-05, "loss": 1.3784, "step": 5650 }, { "epoch": 0.9043046887502, "grad_norm": 0.36918672919273376, "learning_rate": 5e-05, "loss": 1.4532, "step": 5651 }, { "epoch": 0.9044647143542967, "grad_norm": 0.37651824951171875, "learning_rate": 5e-05, "loss": 1.4387, "step": 5652 }, { "epoch": 0.9046247399583933, "grad_norm": 0.3690677881240845, "learning_rate": 5e-05, "loss": 1.3821, "step": 5653 }, { "epoch": 0.90478476556249, "grad_norm": 0.37415724992752075, "learning_rate": 5e-05, "loss": 1.4637, "step": 5654 }, { "epoch": 0.9049447911665867, "grad_norm": 0.37088534235954285, "learning_rate": 5e-05, "loss": 1.4296, "step": 5655 }, { "epoch": 0.9051048167706833, "grad_norm": 0.3734617531299591, "learning_rate": 5e-05, "loss": 1.4285, "step": 5656 }, { "epoch": 0.90526484237478, "grad_norm": 0.36704713106155396, "learning_rate": 5e-05, "loss": 1.3793, "step": 5657 }, { "epoch": 0.9054248679788767, "grad_norm": 0.3766254186630249, "learning_rate": 5e-05, "loss": 1.4695, "step": 5658 }, { "epoch": 0.9055848935829732, "grad_norm": 0.3827187716960907, "learning_rate": 5e-05, "loss": 1.5925, "step": 5659 }, { "epoch": 0.9057449191870699, "grad_norm": 0.37188655138015747, "learning_rate": 5e-05, "loss": 1.4558, "step": 5660 }, { "epoch": 0.9059049447911666, "grad_norm": 0.3759700357913971, "learning_rate": 5e-05, "loss": 1.4597, "step": 5661 }, { "epoch": 0.9060649703952632, "grad_norm": 0.36566343903541565, "learning_rate": 5e-05, "loss": 1.4545, "step": 5662 }, { "epoch": 0.9062249959993599, "grad_norm": 0.3719598054885864, "learning_rate": 5e-05, "loss": 1.4515, "step": 5663 }, { "epoch": 0.9063850216034566, "grad_norm": 0.3644522428512573, "learning_rate": 5e-05, "loss": 1.4748, "step": 5664 }, { "epoch": 0.9065450472075532, "grad_norm": 0.38823434710502625, "learning_rate": 5e-05, "loss": 1.4688, "step": 5665 }, { "epoch": 0.9067050728116499, "grad_norm": 0.3702561855316162, "learning_rate": 5e-05, "loss": 1.4751, "step": 5666 }, { "epoch": 0.9068650984157465, "grad_norm": 0.3569093644618988, "learning_rate": 5e-05, "loss": 1.379, "step": 5667 }, { "epoch": 0.9070251240198431, "grad_norm": 0.38404524326324463, "learning_rate": 5e-05, "loss": 1.4536, "step": 5668 }, { "epoch": 0.9071851496239398, "grad_norm": 0.37146157026290894, "learning_rate": 5e-05, "loss": 1.4819, "step": 5669 }, { "epoch": 0.9073451752280365, "grad_norm": 0.36435171961784363, "learning_rate": 5e-05, "loss": 1.411, "step": 5670 }, { "epoch": 0.9075052008321332, "grad_norm": 0.3637303411960602, "learning_rate": 5e-05, "loss": 1.4278, "step": 5671 }, { "epoch": 0.9076652264362298, "grad_norm": 0.3697696626186371, "learning_rate": 5e-05, "loss": 1.42, "step": 5672 }, { "epoch": 0.9078252520403265, "grad_norm": 0.3579164445400238, "learning_rate": 5e-05, "loss": 1.4684, "step": 5673 }, { "epoch": 0.9079852776444232, "grad_norm": 0.3606559932231903, "learning_rate": 5e-05, "loss": 1.3482, "step": 5674 }, { "epoch": 0.9081453032485197, "grad_norm": 0.3622587025165558, "learning_rate": 5e-05, "loss": 1.4523, "step": 5675 }, { "epoch": 0.9083053288526164, "grad_norm": 0.3698752224445343, "learning_rate": 5e-05, "loss": 1.4762, "step": 5676 }, { "epoch": 0.9084653544567131, "grad_norm": 0.35771945118904114, "learning_rate": 5e-05, "loss": 1.4188, "step": 5677 }, { "epoch": 0.9086253800608097, "grad_norm": 0.3648870587348938, "learning_rate": 5e-05, "loss": 1.4063, "step": 5678 }, { "epoch": 0.9087854056649064, "grad_norm": 0.3725734353065491, "learning_rate": 5e-05, "loss": 1.4427, "step": 5679 }, { "epoch": 0.9089454312690031, "grad_norm": 0.35386985540390015, "learning_rate": 5e-05, "loss": 1.4195, "step": 5680 }, { "epoch": 0.9091054568730997, "grad_norm": 0.3598414659500122, "learning_rate": 5e-05, "loss": 1.4454, "step": 5681 }, { "epoch": 0.9092654824771963, "grad_norm": 0.37617433071136475, "learning_rate": 5e-05, "loss": 1.4721, "step": 5682 }, { "epoch": 0.909425508081293, "grad_norm": 0.35298147797584534, "learning_rate": 5e-05, "loss": 1.4208, "step": 5683 }, { "epoch": 0.9095855336853896, "grad_norm": 0.3694862425327301, "learning_rate": 5e-05, "loss": 1.5252, "step": 5684 }, { "epoch": 0.9097455592894863, "grad_norm": 0.3628523647785187, "learning_rate": 5e-05, "loss": 1.4573, "step": 5685 }, { "epoch": 0.909905584893583, "grad_norm": 0.38050174713134766, "learning_rate": 5e-05, "loss": 1.4894, "step": 5686 }, { "epoch": 0.9100656104976796, "grad_norm": 0.354182630777359, "learning_rate": 5e-05, "loss": 1.4494, "step": 5687 }, { "epoch": 0.9102256361017763, "grad_norm": 0.35752299427986145, "learning_rate": 5e-05, "loss": 1.4667, "step": 5688 }, { "epoch": 0.910385661705873, "grad_norm": 0.3639729917049408, "learning_rate": 5e-05, "loss": 1.3907, "step": 5689 }, { "epoch": 0.9105456873099695, "grad_norm": 0.383240669965744, "learning_rate": 5e-05, "loss": 1.4675, "step": 5690 }, { "epoch": 0.9107057129140662, "grad_norm": 0.36734142899513245, "learning_rate": 5e-05, "loss": 1.4673, "step": 5691 }, { "epoch": 0.9108657385181629, "grad_norm": 0.37509623169898987, "learning_rate": 5e-05, "loss": 1.4255, "step": 5692 }, { "epoch": 0.9110257641222596, "grad_norm": 0.3524489998817444, "learning_rate": 5e-05, "loss": 1.413, "step": 5693 }, { "epoch": 0.9111857897263562, "grad_norm": 0.351548969745636, "learning_rate": 5e-05, "loss": 1.3879, "step": 5694 }, { "epoch": 0.9113458153304529, "grad_norm": 0.35528257489204407, "learning_rate": 5e-05, "loss": 1.4275, "step": 5695 }, { "epoch": 0.9115058409345496, "grad_norm": 0.373381644487381, "learning_rate": 5e-05, "loss": 1.4658, "step": 5696 }, { "epoch": 0.9116658665386462, "grad_norm": 0.3751133978366852, "learning_rate": 5e-05, "loss": 1.4305, "step": 5697 }, { "epoch": 0.9118258921427428, "grad_norm": 0.3981589376926422, "learning_rate": 5e-05, "loss": 1.5399, "step": 5698 }, { "epoch": 0.9119859177468395, "grad_norm": 0.36710065603256226, "learning_rate": 5e-05, "loss": 1.4779, "step": 5699 }, { "epoch": 0.9121459433509361, "grad_norm": 0.3770950734615326, "learning_rate": 5e-05, "loss": 1.5268, "step": 5700 }, { "epoch": 0.9123059689550328, "grad_norm": 0.37433719635009766, "learning_rate": 5e-05, "loss": 1.4964, "step": 5701 }, { "epoch": 0.9124659945591295, "grad_norm": 0.3663307726383209, "learning_rate": 5e-05, "loss": 1.4975, "step": 5702 }, { "epoch": 0.9126260201632261, "grad_norm": 0.3657505512237549, "learning_rate": 5e-05, "loss": 1.4653, "step": 5703 }, { "epoch": 0.9127860457673228, "grad_norm": 0.37711939215660095, "learning_rate": 5e-05, "loss": 1.5159, "step": 5704 }, { "epoch": 0.9129460713714195, "grad_norm": 0.3837372958660126, "learning_rate": 5e-05, "loss": 1.4099, "step": 5705 }, { "epoch": 0.913106096975516, "grad_norm": 0.37067317962646484, "learning_rate": 5e-05, "loss": 1.4695, "step": 5706 }, { "epoch": 0.9132661225796127, "grad_norm": 0.3605775535106659, "learning_rate": 5e-05, "loss": 1.4473, "step": 5707 }, { "epoch": 0.9134261481837094, "grad_norm": 0.38009360432624817, "learning_rate": 5e-05, "loss": 1.4466, "step": 5708 }, { "epoch": 0.913586173787806, "grad_norm": 0.3669964671134949, "learning_rate": 5e-05, "loss": 1.4312, "step": 5709 }, { "epoch": 0.9137461993919027, "grad_norm": 0.385575532913208, "learning_rate": 5e-05, "loss": 1.4981, "step": 5710 }, { "epoch": 0.9139062249959994, "grad_norm": 0.36591607332229614, "learning_rate": 5e-05, "loss": 1.4218, "step": 5711 }, { "epoch": 0.914066250600096, "grad_norm": 0.3554375469684601, "learning_rate": 5e-05, "loss": 1.3978, "step": 5712 }, { "epoch": 0.9142262762041927, "grad_norm": 0.36857685446739197, "learning_rate": 5e-05, "loss": 1.3646, "step": 5713 }, { "epoch": 0.9143863018082893, "grad_norm": 0.3689711093902588, "learning_rate": 5e-05, "loss": 1.4339, "step": 5714 }, { "epoch": 0.914546327412386, "grad_norm": 0.36340218782424927, "learning_rate": 5e-05, "loss": 1.4945, "step": 5715 }, { "epoch": 0.9147063530164826, "grad_norm": 0.3549066483974457, "learning_rate": 5e-05, "loss": 1.4307, "step": 5716 }, { "epoch": 0.9148663786205793, "grad_norm": 0.3900928795337677, "learning_rate": 5e-05, "loss": 1.4713, "step": 5717 }, { "epoch": 0.915026404224676, "grad_norm": 0.3693355619907379, "learning_rate": 5e-05, "loss": 1.4903, "step": 5718 }, { "epoch": 0.9151864298287726, "grad_norm": 0.3612233102321625, "learning_rate": 5e-05, "loss": 1.4064, "step": 5719 }, { "epoch": 0.9153464554328693, "grad_norm": 0.368056982755661, "learning_rate": 5e-05, "loss": 1.5018, "step": 5720 }, { "epoch": 0.915506481036966, "grad_norm": 0.359126478433609, "learning_rate": 5e-05, "loss": 1.3941, "step": 5721 }, { "epoch": 0.9156665066410625, "grad_norm": 0.3622499704360962, "learning_rate": 5e-05, "loss": 1.4222, "step": 5722 }, { "epoch": 0.9158265322451592, "grad_norm": 0.36759844422340393, "learning_rate": 5e-05, "loss": 1.3735, "step": 5723 }, { "epoch": 0.9159865578492559, "grad_norm": 0.36537984013557434, "learning_rate": 5e-05, "loss": 1.4366, "step": 5724 }, { "epoch": 0.9161465834533525, "grad_norm": 0.3591057360172272, "learning_rate": 5e-05, "loss": 1.4544, "step": 5725 }, { "epoch": 0.9163066090574492, "grad_norm": 0.37569189071655273, "learning_rate": 5e-05, "loss": 1.4289, "step": 5726 }, { "epoch": 0.9164666346615459, "grad_norm": 0.3699096739292145, "learning_rate": 5e-05, "loss": 1.4102, "step": 5727 }, { "epoch": 0.9166266602656425, "grad_norm": 0.37283480167388916, "learning_rate": 5e-05, "loss": 1.4491, "step": 5728 }, { "epoch": 0.9167866858697391, "grad_norm": 0.3665213882923126, "learning_rate": 5e-05, "loss": 1.3917, "step": 5729 }, { "epoch": 0.9169467114738358, "grad_norm": 0.37518662214279175, "learning_rate": 5e-05, "loss": 1.4669, "step": 5730 }, { "epoch": 0.9171067370779324, "grad_norm": 0.3673880100250244, "learning_rate": 5e-05, "loss": 1.4517, "step": 5731 }, { "epoch": 0.9172667626820291, "grad_norm": 0.37013062834739685, "learning_rate": 5e-05, "loss": 1.4305, "step": 5732 }, { "epoch": 0.9174267882861258, "grad_norm": 0.36731448769569397, "learning_rate": 5e-05, "loss": 1.4044, "step": 5733 }, { "epoch": 0.9175868138902225, "grad_norm": 0.3453420400619507, "learning_rate": 5e-05, "loss": 1.3747, "step": 5734 }, { "epoch": 0.9177468394943191, "grad_norm": 0.38351255655288696, "learning_rate": 5e-05, "loss": 1.4851, "step": 5735 }, { "epoch": 0.9179068650984158, "grad_norm": 0.35550811886787415, "learning_rate": 5e-05, "loss": 1.3689, "step": 5736 }, { "epoch": 0.9180668907025124, "grad_norm": 0.3828578591346741, "learning_rate": 5e-05, "loss": 1.5038, "step": 5737 }, { "epoch": 0.918226916306609, "grad_norm": 0.3784162402153015, "learning_rate": 5e-05, "loss": 1.448, "step": 5738 }, { "epoch": 0.9183869419107057, "grad_norm": 0.36521589756011963, "learning_rate": 5e-05, "loss": 1.3855, "step": 5739 }, { "epoch": 0.9185469675148024, "grad_norm": 0.37187284231185913, "learning_rate": 5e-05, "loss": 1.4645, "step": 5740 }, { "epoch": 0.918706993118899, "grad_norm": 0.3649902641773224, "learning_rate": 5e-05, "loss": 1.386, "step": 5741 }, { "epoch": 0.9188670187229957, "grad_norm": 0.3597240746021271, "learning_rate": 5e-05, "loss": 1.4712, "step": 5742 }, { "epoch": 0.9190270443270924, "grad_norm": 0.3578983545303345, "learning_rate": 5e-05, "loss": 1.3476, "step": 5743 }, { "epoch": 0.919187069931189, "grad_norm": 0.3570944666862488, "learning_rate": 5e-05, "loss": 1.4032, "step": 5744 }, { "epoch": 0.9193470955352856, "grad_norm": 0.35794150829315186, "learning_rate": 5e-05, "loss": 1.4861, "step": 5745 }, { "epoch": 0.9195071211393823, "grad_norm": 0.38727447390556335, "learning_rate": 5e-05, "loss": 1.4888, "step": 5746 }, { "epoch": 0.9196671467434789, "grad_norm": 0.36816519498825073, "learning_rate": 5e-05, "loss": 1.4574, "step": 5747 }, { "epoch": 0.9198271723475756, "grad_norm": 0.39092469215393066, "learning_rate": 5e-05, "loss": 1.3915, "step": 5748 }, { "epoch": 0.9199871979516723, "grad_norm": 0.3838469088077545, "learning_rate": 5e-05, "loss": 1.4791, "step": 5749 }, { "epoch": 0.920147223555769, "grad_norm": 0.3699227273464203, "learning_rate": 5e-05, "loss": 1.454, "step": 5750 }, { "epoch": 0.9203072491598656, "grad_norm": 0.36369946599006653, "learning_rate": 5e-05, "loss": 1.4694, "step": 5751 }, { "epoch": 0.9204672747639623, "grad_norm": 0.3637795150279999, "learning_rate": 5e-05, "loss": 1.4309, "step": 5752 }, { "epoch": 0.9206273003680588, "grad_norm": 0.38611170649528503, "learning_rate": 5e-05, "loss": 1.4628, "step": 5753 }, { "epoch": 0.9207873259721555, "grad_norm": 0.3661850094795227, "learning_rate": 5e-05, "loss": 1.3836, "step": 5754 }, { "epoch": 0.9209473515762522, "grad_norm": 0.3870300352573395, "learning_rate": 5e-05, "loss": 1.3727, "step": 5755 }, { "epoch": 0.9211073771803489, "grad_norm": 0.3590182960033417, "learning_rate": 5e-05, "loss": 1.4595, "step": 5756 }, { "epoch": 0.9212674027844455, "grad_norm": 0.3936000168323517, "learning_rate": 5e-05, "loss": 1.4679, "step": 5757 }, { "epoch": 0.9214274283885422, "grad_norm": 0.3835376799106598, "learning_rate": 5e-05, "loss": 1.499, "step": 5758 }, { "epoch": 0.9215874539926389, "grad_norm": 0.37412503361701965, "learning_rate": 5e-05, "loss": 1.4496, "step": 5759 }, { "epoch": 0.9217474795967355, "grad_norm": 0.3747091293334961, "learning_rate": 5e-05, "loss": 1.4504, "step": 5760 }, { "epoch": 0.9219075052008321, "grad_norm": 0.37780073285102844, "learning_rate": 5e-05, "loss": 1.5021, "step": 5761 }, { "epoch": 0.9220675308049288, "grad_norm": 0.38974350690841675, "learning_rate": 5e-05, "loss": 1.4398, "step": 5762 }, { "epoch": 0.9222275564090254, "grad_norm": 0.3845173120498657, "learning_rate": 5e-05, "loss": 1.5405, "step": 5763 }, { "epoch": 0.9223875820131221, "grad_norm": 0.3610118627548218, "learning_rate": 5e-05, "loss": 1.4499, "step": 5764 }, { "epoch": 0.9225476076172188, "grad_norm": 0.3731516897678375, "learning_rate": 5e-05, "loss": 1.5132, "step": 5765 }, { "epoch": 0.9227076332213154, "grad_norm": 0.36876600980758667, "learning_rate": 5e-05, "loss": 1.4741, "step": 5766 }, { "epoch": 0.9228676588254121, "grad_norm": 0.3675035238265991, "learning_rate": 5e-05, "loss": 1.3542, "step": 5767 }, { "epoch": 0.9230276844295088, "grad_norm": 0.37250810861587524, "learning_rate": 5e-05, "loss": 1.4405, "step": 5768 }, { "epoch": 0.9231877100336053, "grad_norm": 0.3726073205471039, "learning_rate": 5e-05, "loss": 1.4576, "step": 5769 }, { "epoch": 0.923347735637702, "grad_norm": 0.37701141834259033, "learning_rate": 5e-05, "loss": 1.473, "step": 5770 }, { "epoch": 0.9235077612417987, "grad_norm": 0.3613712191581726, "learning_rate": 5e-05, "loss": 1.3893, "step": 5771 }, { "epoch": 0.9236677868458953, "grad_norm": 0.3740360736846924, "learning_rate": 5e-05, "loss": 1.4107, "step": 5772 }, { "epoch": 0.923827812449992, "grad_norm": 0.3515617251396179, "learning_rate": 5e-05, "loss": 1.4207, "step": 5773 }, { "epoch": 0.9239878380540887, "grad_norm": 0.3601888418197632, "learning_rate": 5e-05, "loss": 1.4327, "step": 5774 }, { "epoch": 0.9241478636581854, "grad_norm": 0.3810305595397949, "learning_rate": 5e-05, "loss": 1.4962, "step": 5775 }, { "epoch": 0.9243078892622819, "grad_norm": 0.35962367057800293, "learning_rate": 5e-05, "loss": 1.4443, "step": 5776 }, { "epoch": 0.9244679148663786, "grad_norm": 0.3727688789367676, "learning_rate": 5e-05, "loss": 1.4281, "step": 5777 }, { "epoch": 0.9246279404704753, "grad_norm": 0.3730946183204651, "learning_rate": 5e-05, "loss": 1.455, "step": 5778 }, { "epoch": 0.9247879660745719, "grad_norm": 0.37425822019577026, "learning_rate": 5e-05, "loss": 1.4365, "step": 5779 }, { "epoch": 0.9249479916786686, "grad_norm": 0.3738550841808319, "learning_rate": 5e-05, "loss": 1.3971, "step": 5780 }, { "epoch": 0.9251080172827653, "grad_norm": 0.3763563632965088, "learning_rate": 5e-05, "loss": 1.4524, "step": 5781 }, { "epoch": 0.9252680428868619, "grad_norm": 0.36747023463249207, "learning_rate": 5e-05, "loss": 1.4687, "step": 5782 }, { "epoch": 0.9254280684909586, "grad_norm": 0.37524595856666565, "learning_rate": 5e-05, "loss": 1.4962, "step": 5783 }, { "epoch": 0.9255880940950552, "grad_norm": 0.3677474856376648, "learning_rate": 5e-05, "loss": 1.4298, "step": 5784 }, { "epoch": 0.9257481196991518, "grad_norm": 0.355825275182724, "learning_rate": 5e-05, "loss": 1.4322, "step": 5785 }, { "epoch": 0.9259081453032485, "grad_norm": 0.35619881749153137, "learning_rate": 5e-05, "loss": 1.3943, "step": 5786 }, { "epoch": 0.9260681709073452, "grad_norm": 0.38543224334716797, "learning_rate": 5e-05, "loss": 1.4705, "step": 5787 }, { "epoch": 0.9262281965114418, "grad_norm": 0.36144089698791504, "learning_rate": 5e-05, "loss": 1.3246, "step": 5788 }, { "epoch": 0.9263882221155385, "grad_norm": 0.36428302526474, "learning_rate": 5e-05, "loss": 1.4498, "step": 5789 }, { "epoch": 0.9265482477196352, "grad_norm": 0.3625429570674896, "learning_rate": 5e-05, "loss": 1.3942, "step": 5790 }, { "epoch": 0.9267082733237318, "grad_norm": 0.3814019560813904, "learning_rate": 5e-05, "loss": 1.4652, "step": 5791 }, { "epoch": 0.9268682989278284, "grad_norm": 0.3823811709880829, "learning_rate": 5e-05, "loss": 1.396, "step": 5792 }, { "epoch": 0.9270283245319251, "grad_norm": 0.3859488070011139, "learning_rate": 5e-05, "loss": 1.434, "step": 5793 }, { "epoch": 0.9271883501360217, "grad_norm": 0.3779498338699341, "learning_rate": 5e-05, "loss": 1.4362, "step": 5794 }, { "epoch": 0.9273483757401184, "grad_norm": 0.3695049285888672, "learning_rate": 5e-05, "loss": 1.4444, "step": 5795 }, { "epoch": 0.9275084013442151, "grad_norm": 0.3659127950668335, "learning_rate": 5e-05, "loss": 1.4178, "step": 5796 }, { "epoch": 0.9276684269483118, "grad_norm": 0.38461169600486755, "learning_rate": 5e-05, "loss": 1.4509, "step": 5797 }, { "epoch": 0.9278284525524084, "grad_norm": 0.3659183979034424, "learning_rate": 5e-05, "loss": 1.46, "step": 5798 }, { "epoch": 0.9279884781565051, "grad_norm": 0.3627694845199585, "learning_rate": 5e-05, "loss": 1.4208, "step": 5799 }, { "epoch": 0.9281485037606017, "grad_norm": 0.3766348361968994, "learning_rate": 5e-05, "loss": 1.4032, "step": 5800 }, { "epoch": 0.9283085293646983, "grad_norm": 0.36252275109291077, "learning_rate": 5e-05, "loss": 1.4483, "step": 5801 }, { "epoch": 0.928468554968795, "grad_norm": 0.3832102119922638, "learning_rate": 5e-05, "loss": 1.4764, "step": 5802 }, { "epoch": 0.9286285805728917, "grad_norm": 0.36857467889785767, "learning_rate": 5e-05, "loss": 1.4525, "step": 5803 }, { "epoch": 0.9287886061769883, "grad_norm": 0.38970404863357544, "learning_rate": 5e-05, "loss": 1.4674, "step": 5804 }, { "epoch": 0.928948631781085, "grad_norm": 0.3750322163105011, "learning_rate": 5e-05, "loss": 1.4672, "step": 5805 }, { "epoch": 0.9291086573851817, "grad_norm": 0.359142005443573, "learning_rate": 5e-05, "loss": 1.4931, "step": 5806 }, { "epoch": 0.9292686829892783, "grad_norm": 0.3527638614177704, "learning_rate": 5e-05, "loss": 1.3945, "step": 5807 }, { "epoch": 0.9294287085933749, "grad_norm": 0.38031378388404846, "learning_rate": 5e-05, "loss": 1.4402, "step": 5808 }, { "epoch": 0.9295887341974716, "grad_norm": 0.3728049099445343, "learning_rate": 5e-05, "loss": 1.4929, "step": 5809 }, { "epoch": 0.9297487598015682, "grad_norm": 0.37456244230270386, "learning_rate": 5e-05, "loss": 1.4924, "step": 5810 }, { "epoch": 0.9299087854056649, "grad_norm": 0.373351514339447, "learning_rate": 5e-05, "loss": 1.4689, "step": 5811 }, { "epoch": 0.9300688110097616, "grad_norm": 0.3661538362503052, "learning_rate": 5e-05, "loss": 1.4023, "step": 5812 }, { "epoch": 0.9302288366138582, "grad_norm": 0.35582420229911804, "learning_rate": 5e-05, "loss": 1.3893, "step": 5813 }, { "epoch": 0.9303888622179549, "grad_norm": 0.3978792428970337, "learning_rate": 5e-05, "loss": 1.5115, "step": 5814 }, { "epoch": 0.9305488878220515, "grad_norm": 0.3657172918319702, "learning_rate": 5e-05, "loss": 1.4365, "step": 5815 }, { "epoch": 0.9307089134261481, "grad_norm": 0.3653806149959564, "learning_rate": 5e-05, "loss": 1.466, "step": 5816 }, { "epoch": 0.9308689390302448, "grad_norm": 0.38559216260910034, "learning_rate": 5e-05, "loss": 1.4839, "step": 5817 }, { "epoch": 0.9310289646343415, "grad_norm": 0.36272749304771423, "learning_rate": 5e-05, "loss": 1.3209, "step": 5818 }, { "epoch": 0.9311889902384382, "grad_norm": 0.37157171964645386, "learning_rate": 5e-05, "loss": 1.4997, "step": 5819 }, { "epoch": 0.9313490158425348, "grad_norm": 0.3624109923839569, "learning_rate": 5e-05, "loss": 1.4712, "step": 5820 }, { "epoch": 0.9315090414466315, "grad_norm": 0.3807007968425751, "learning_rate": 5e-05, "loss": 1.4542, "step": 5821 }, { "epoch": 0.9316690670507282, "grad_norm": 0.38073599338531494, "learning_rate": 5e-05, "loss": 1.51, "step": 5822 }, { "epoch": 0.9318290926548247, "grad_norm": 0.3686768412590027, "learning_rate": 5e-05, "loss": 1.4353, "step": 5823 }, { "epoch": 0.9319891182589214, "grad_norm": 0.3893921673297882, "learning_rate": 5e-05, "loss": 1.4218, "step": 5824 }, { "epoch": 0.9321491438630181, "grad_norm": 0.37082159519195557, "learning_rate": 5e-05, "loss": 1.4503, "step": 5825 }, { "epoch": 0.9323091694671147, "grad_norm": 0.3834281265735626, "learning_rate": 5e-05, "loss": 1.4418, "step": 5826 }, { "epoch": 0.9324691950712114, "grad_norm": 0.3729589581489563, "learning_rate": 5e-05, "loss": 1.4024, "step": 5827 }, { "epoch": 0.9326292206753081, "grad_norm": 0.3560665249824524, "learning_rate": 5e-05, "loss": 1.3651, "step": 5828 }, { "epoch": 0.9327892462794047, "grad_norm": 0.35766521096229553, "learning_rate": 5e-05, "loss": 1.4458, "step": 5829 }, { "epoch": 0.9329492718835014, "grad_norm": 0.3623954653739929, "learning_rate": 5e-05, "loss": 1.3821, "step": 5830 }, { "epoch": 0.933109297487598, "grad_norm": 0.3695722222328186, "learning_rate": 5e-05, "loss": 1.5372, "step": 5831 }, { "epoch": 0.9332693230916946, "grad_norm": 0.3687898814678192, "learning_rate": 5e-05, "loss": 1.4337, "step": 5832 }, { "epoch": 0.9334293486957913, "grad_norm": 0.372966468334198, "learning_rate": 5e-05, "loss": 1.502, "step": 5833 }, { "epoch": 0.933589374299888, "grad_norm": 0.37564119696617126, "learning_rate": 5e-05, "loss": 1.5348, "step": 5834 }, { "epoch": 0.9337493999039846, "grad_norm": 0.376010537147522, "learning_rate": 5e-05, "loss": 1.4462, "step": 5835 }, { "epoch": 0.9339094255080813, "grad_norm": 0.363605797290802, "learning_rate": 5e-05, "loss": 1.4718, "step": 5836 }, { "epoch": 0.934069451112178, "grad_norm": 0.3892132043838501, "learning_rate": 5e-05, "loss": 1.4807, "step": 5837 }, { "epoch": 0.9342294767162747, "grad_norm": 0.3904852271080017, "learning_rate": 5e-05, "loss": 1.442, "step": 5838 }, { "epoch": 0.9343895023203712, "grad_norm": 0.36489832401275635, "learning_rate": 5e-05, "loss": 1.3959, "step": 5839 }, { "epoch": 0.9345495279244679, "grad_norm": 0.36464250087738037, "learning_rate": 5e-05, "loss": 1.4008, "step": 5840 }, { "epoch": 0.9347095535285646, "grad_norm": 0.37507206201553345, "learning_rate": 5e-05, "loss": 1.388, "step": 5841 }, { "epoch": 0.9348695791326612, "grad_norm": 0.3832249343395233, "learning_rate": 5e-05, "loss": 1.5174, "step": 5842 }, { "epoch": 0.9350296047367579, "grad_norm": 0.3799232840538025, "learning_rate": 5e-05, "loss": 1.4622, "step": 5843 }, { "epoch": 0.9351896303408546, "grad_norm": 0.3693777024745941, "learning_rate": 5e-05, "loss": 1.3924, "step": 5844 }, { "epoch": 0.9353496559449512, "grad_norm": 0.3595154881477356, "learning_rate": 5e-05, "loss": 1.4317, "step": 5845 }, { "epoch": 0.9355096815490479, "grad_norm": 0.3741643726825714, "learning_rate": 5e-05, "loss": 1.4112, "step": 5846 }, { "epoch": 0.9356697071531445, "grad_norm": 0.3793279826641083, "learning_rate": 5e-05, "loss": 1.4861, "step": 5847 }, { "epoch": 0.9358297327572411, "grad_norm": 0.3671140670776367, "learning_rate": 5e-05, "loss": 1.4354, "step": 5848 }, { "epoch": 0.9359897583613378, "grad_norm": 0.38175031542778015, "learning_rate": 5e-05, "loss": 1.4497, "step": 5849 }, { "epoch": 0.9361497839654345, "grad_norm": 0.38044533133506775, "learning_rate": 5e-05, "loss": 1.4416, "step": 5850 }, { "epoch": 0.9363098095695311, "grad_norm": 0.3758181035518646, "learning_rate": 5e-05, "loss": 1.3512, "step": 5851 }, { "epoch": 0.9364698351736278, "grad_norm": 0.3709675371646881, "learning_rate": 5e-05, "loss": 1.4372, "step": 5852 }, { "epoch": 0.9366298607777245, "grad_norm": 0.38159939646720886, "learning_rate": 5e-05, "loss": 1.4595, "step": 5853 }, { "epoch": 0.9367898863818211, "grad_norm": 0.3639200031757355, "learning_rate": 5e-05, "loss": 1.3912, "step": 5854 }, { "epoch": 0.9369499119859177, "grad_norm": 0.3606773614883423, "learning_rate": 5e-05, "loss": 1.3896, "step": 5855 }, { "epoch": 0.9371099375900144, "grad_norm": 0.3757941722869873, "learning_rate": 5e-05, "loss": 1.391, "step": 5856 }, { "epoch": 0.937269963194111, "grad_norm": 0.37696364521980286, "learning_rate": 5e-05, "loss": 1.472, "step": 5857 }, { "epoch": 0.9374299887982077, "grad_norm": 0.37083807587623596, "learning_rate": 5e-05, "loss": 1.4665, "step": 5858 }, { "epoch": 0.9375900144023044, "grad_norm": 0.3755824863910675, "learning_rate": 5e-05, "loss": 1.4314, "step": 5859 }, { "epoch": 0.937750040006401, "grad_norm": 0.3737642467021942, "learning_rate": 5e-05, "loss": 1.4045, "step": 5860 }, { "epoch": 0.9379100656104977, "grad_norm": 0.3521997034549713, "learning_rate": 5e-05, "loss": 1.3672, "step": 5861 }, { "epoch": 0.9380700912145943, "grad_norm": 0.37780892848968506, "learning_rate": 5e-05, "loss": 1.4344, "step": 5862 }, { "epoch": 0.938230116818691, "grad_norm": 0.3581046462059021, "learning_rate": 5e-05, "loss": 1.4272, "step": 5863 }, { "epoch": 0.9383901424227876, "grad_norm": 0.3852851688861847, "learning_rate": 5e-05, "loss": 1.4717, "step": 5864 }, { "epoch": 0.9385501680268843, "grad_norm": 0.3606116473674774, "learning_rate": 5e-05, "loss": 1.444, "step": 5865 }, { "epoch": 0.938710193630981, "grad_norm": 0.3684239387512207, "learning_rate": 5e-05, "loss": 1.4658, "step": 5866 }, { "epoch": 0.9388702192350776, "grad_norm": 0.3603340983390808, "learning_rate": 5e-05, "loss": 1.3764, "step": 5867 }, { "epoch": 0.9390302448391743, "grad_norm": 0.3645889461040497, "learning_rate": 5e-05, "loss": 1.4391, "step": 5868 }, { "epoch": 0.939190270443271, "grad_norm": 0.36966249346733093, "learning_rate": 5e-05, "loss": 1.468, "step": 5869 }, { "epoch": 0.9393502960473675, "grad_norm": 0.366171270608902, "learning_rate": 5e-05, "loss": 1.3401, "step": 5870 }, { "epoch": 0.9395103216514642, "grad_norm": 0.3839876651763916, "learning_rate": 5e-05, "loss": 1.5261, "step": 5871 }, { "epoch": 0.9396703472555609, "grad_norm": 0.37282606959342957, "learning_rate": 5e-05, "loss": 1.4416, "step": 5872 }, { "epoch": 0.9398303728596575, "grad_norm": 0.3670550286769867, "learning_rate": 5e-05, "loss": 1.3773, "step": 5873 }, { "epoch": 0.9399903984637542, "grad_norm": 0.3772101104259491, "learning_rate": 5e-05, "loss": 1.3629, "step": 5874 }, { "epoch": 0.9401504240678509, "grad_norm": 0.397439569234848, "learning_rate": 5e-05, "loss": 1.5079, "step": 5875 }, { "epoch": 0.9403104496719475, "grad_norm": 0.362699031829834, "learning_rate": 5e-05, "loss": 1.4866, "step": 5876 }, { "epoch": 0.9404704752760442, "grad_norm": 0.37404298782348633, "learning_rate": 5e-05, "loss": 1.4817, "step": 5877 }, { "epoch": 0.9406305008801408, "grad_norm": 0.3885417580604553, "learning_rate": 5e-05, "loss": 1.4308, "step": 5878 }, { "epoch": 0.9407905264842374, "grad_norm": 0.3718070089817047, "learning_rate": 5e-05, "loss": 1.4704, "step": 5879 }, { "epoch": 0.9409505520883341, "grad_norm": 0.37582945823669434, "learning_rate": 5e-05, "loss": 1.4616, "step": 5880 }, { "epoch": 0.9411105776924308, "grad_norm": 0.37942808866500854, "learning_rate": 5e-05, "loss": 1.4456, "step": 5881 }, { "epoch": 0.9412706032965275, "grad_norm": 0.3925057351589203, "learning_rate": 5e-05, "loss": 1.4603, "step": 5882 }, { "epoch": 0.9414306289006241, "grad_norm": 0.37354809045791626, "learning_rate": 5e-05, "loss": 1.5057, "step": 5883 }, { "epoch": 0.9415906545047208, "grad_norm": 0.37078601121902466, "learning_rate": 5e-05, "loss": 1.3615, "step": 5884 }, { "epoch": 0.9417506801088175, "grad_norm": 0.395230770111084, "learning_rate": 5e-05, "loss": 1.4802, "step": 5885 }, { "epoch": 0.941910705712914, "grad_norm": 0.3616466522216797, "learning_rate": 5e-05, "loss": 1.3978, "step": 5886 }, { "epoch": 0.9420707313170107, "grad_norm": 0.3789432644844055, "learning_rate": 5e-05, "loss": 1.4239, "step": 5887 }, { "epoch": 0.9422307569211074, "grad_norm": 0.3780387043952942, "learning_rate": 5e-05, "loss": 1.4664, "step": 5888 }, { "epoch": 0.942390782525204, "grad_norm": 0.35808226466178894, "learning_rate": 5e-05, "loss": 1.417, "step": 5889 }, { "epoch": 0.9425508081293007, "grad_norm": 0.37226635217666626, "learning_rate": 5e-05, "loss": 1.4052, "step": 5890 }, { "epoch": 0.9427108337333974, "grad_norm": 0.3885803818702698, "learning_rate": 5e-05, "loss": 1.4685, "step": 5891 }, { "epoch": 0.942870859337494, "grad_norm": 0.3865169286727905, "learning_rate": 5e-05, "loss": 1.5317, "step": 5892 }, { "epoch": 0.9430308849415907, "grad_norm": 0.3578342795372009, "learning_rate": 5e-05, "loss": 1.3934, "step": 5893 }, { "epoch": 0.9431909105456873, "grad_norm": 0.37833356857299805, "learning_rate": 5e-05, "loss": 1.4812, "step": 5894 }, { "epoch": 0.9433509361497839, "grad_norm": 0.37521612644195557, "learning_rate": 5e-05, "loss": 1.526, "step": 5895 }, { "epoch": 0.9435109617538806, "grad_norm": 0.39369046688079834, "learning_rate": 5e-05, "loss": 1.4401, "step": 5896 }, { "epoch": 0.9436709873579773, "grad_norm": 0.3762677013874054, "learning_rate": 5e-05, "loss": 1.4978, "step": 5897 }, { "epoch": 0.943831012962074, "grad_norm": 0.381515771150589, "learning_rate": 5e-05, "loss": 1.4435, "step": 5898 }, { "epoch": 0.9439910385661706, "grad_norm": 0.37013933062553406, "learning_rate": 5e-05, "loss": 1.456, "step": 5899 }, { "epoch": 0.9441510641702673, "grad_norm": 0.35728615522384644, "learning_rate": 5e-05, "loss": 1.4325, "step": 5900 }, { "epoch": 0.9443110897743638, "grad_norm": 0.3814203143119812, "learning_rate": 5e-05, "loss": 1.4912, "step": 5901 }, { "epoch": 0.9444711153784605, "grad_norm": 0.38893139362335205, "learning_rate": 5e-05, "loss": 1.5011, "step": 5902 }, { "epoch": 0.9446311409825572, "grad_norm": 0.36302676796913147, "learning_rate": 5e-05, "loss": 1.4175, "step": 5903 }, { "epoch": 0.9447911665866539, "grad_norm": 0.3551945090293884, "learning_rate": 5e-05, "loss": 1.4286, "step": 5904 }, { "epoch": 0.9449511921907505, "grad_norm": 0.3795490860939026, "learning_rate": 5e-05, "loss": 1.4805, "step": 5905 }, { "epoch": 0.9451112177948472, "grad_norm": 0.37014341354370117, "learning_rate": 5e-05, "loss": 1.4672, "step": 5906 }, { "epoch": 0.9452712433989439, "grad_norm": 0.3646472692489624, "learning_rate": 5e-05, "loss": 1.422, "step": 5907 }, { "epoch": 0.9454312690030405, "grad_norm": 0.3797762095928192, "learning_rate": 5e-05, "loss": 1.4223, "step": 5908 }, { "epoch": 0.9455912946071371, "grad_norm": 0.3810356855392456, "learning_rate": 5e-05, "loss": 1.4806, "step": 5909 }, { "epoch": 0.9457513202112338, "grad_norm": 0.36301863193511963, "learning_rate": 5e-05, "loss": 1.3955, "step": 5910 }, { "epoch": 0.9459113458153304, "grad_norm": 0.3740304708480835, "learning_rate": 5e-05, "loss": 1.4278, "step": 5911 }, { "epoch": 0.9460713714194271, "grad_norm": 0.3735957145690918, "learning_rate": 5e-05, "loss": 1.4524, "step": 5912 }, { "epoch": 0.9462313970235238, "grad_norm": 0.36586520075798035, "learning_rate": 5e-05, "loss": 1.4315, "step": 5913 }, { "epoch": 0.9463914226276204, "grad_norm": 0.37345775961875916, "learning_rate": 5e-05, "loss": 1.413, "step": 5914 }, { "epoch": 0.9465514482317171, "grad_norm": 0.37153688073158264, "learning_rate": 5e-05, "loss": 1.4654, "step": 5915 }, { "epoch": 0.9467114738358138, "grad_norm": 0.37427666783332825, "learning_rate": 5e-05, "loss": 1.4341, "step": 5916 }, { "epoch": 0.9468714994399103, "grad_norm": 0.37450408935546875, "learning_rate": 5e-05, "loss": 1.4625, "step": 5917 }, { "epoch": 0.947031525044007, "grad_norm": 0.3661040961742401, "learning_rate": 5e-05, "loss": 1.4279, "step": 5918 }, { "epoch": 0.9471915506481037, "grad_norm": 0.36572912335395813, "learning_rate": 5e-05, "loss": 1.4069, "step": 5919 }, { "epoch": 0.9473515762522003, "grad_norm": 0.3580487072467804, "learning_rate": 5e-05, "loss": 1.3742, "step": 5920 }, { "epoch": 0.947511601856297, "grad_norm": 0.3779311776161194, "learning_rate": 5e-05, "loss": 1.481, "step": 5921 }, { "epoch": 0.9476716274603937, "grad_norm": 0.37705186009407043, "learning_rate": 5e-05, "loss": 1.4018, "step": 5922 }, { "epoch": 0.9478316530644904, "grad_norm": 0.3817201256752014, "learning_rate": 5e-05, "loss": 1.4563, "step": 5923 }, { "epoch": 0.947991678668587, "grad_norm": 0.36685657501220703, "learning_rate": 5e-05, "loss": 1.4674, "step": 5924 }, { "epoch": 0.9481517042726836, "grad_norm": 0.38319748640060425, "learning_rate": 5e-05, "loss": 1.4779, "step": 5925 }, { "epoch": 0.9483117298767803, "grad_norm": 0.367543488740921, "learning_rate": 5e-05, "loss": 1.4271, "step": 5926 }, { "epoch": 0.9484717554808769, "grad_norm": 0.38320833444595337, "learning_rate": 5e-05, "loss": 1.4425, "step": 5927 }, { "epoch": 0.9486317810849736, "grad_norm": 0.37112829089164734, "learning_rate": 5e-05, "loss": 1.4485, "step": 5928 }, { "epoch": 0.9487918066890703, "grad_norm": 0.37786972522735596, "learning_rate": 5e-05, "loss": 1.4349, "step": 5929 }, { "epoch": 0.9489518322931669, "grad_norm": 0.38604629039764404, "learning_rate": 5e-05, "loss": 1.5341, "step": 5930 }, { "epoch": 0.9491118578972636, "grad_norm": 0.370030015707016, "learning_rate": 5e-05, "loss": 1.4257, "step": 5931 }, { "epoch": 0.9492718835013603, "grad_norm": 0.3666172921657562, "learning_rate": 5e-05, "loss": 1.4504, "step": 5932 }, { "epoch": 0.9494319091054568, "grad_norm": 0.36541539430618286, "learning_rate": 5e-05, "loss": 1.3371, "step": 5933 }, { "epoch": 0.9495919347095535, "grad_norm": 0.3676756024360657, "learning_rate": 5e-05, "loss": 1.3994, "step": 5934 }, { "epoch": 0.9497519603136502, "grad_norm": 0.37123632431030273, "learning_rate": 5e-05, "loss": 1.4369, "step": 5935 }, { "epoch": 0.9499119859177468, "grad_norm": 0.3650367856025696, "learning_rate": 5e-05, "loss": 1.4292, "step": 5936 }, { "epoch": 0.9500720115218435, "grad_norm": 0.3730967938899994, "learning_rate": 5e-05, "loss": 1.4436, "step": 5937 }, { "epoch": 0.9502320371259402, "grad_norm": 0.37025687098503113, "learning_rate": 5e-05, "loss": 1.3378, "step": 5938 }, { "epoch": 0.9503920627300368, "grad_norm": 0.3811746835708618, "learning_rate": 5e-05, "loss": 1.5091, "step": 5939 }, { "epoch": 0.9505520883341335, "grad_norm": 0.36381545662879944, "learning_rate": 5e-05, "loss": 1.3703, "step": 5940 }, { "epoch": 0.9507121139382301, "grad_norm": 0.36580944061279297, "learning_rate": 5e-05, "loss": 1.3761, "step": 5941 }, { "epoch": 0.9508721395423267, "grad_norm": 0.3655485212802887, "learning_rate": 5e-05, "loss": 1.4113, "step": 5942 }, { "epoch": 0.9510321651464234, "grad_norm": 0.3606448471546173, "learning_rate": 5e-05, "loss": 1.429, "step": 5943 }, { "epoch": 0.9511921907505201, "grad_norm": 0.37247365713119507, "learning_rate": 5e-05, "loss": 1.4589, "step": 5944 }, { "epoch": 0.9513522163546168, "grad_norm": 0.3905913233757019, "learning_rate": 5e-05, "loss": 1.4899, "step": 5945 }, { "epoch": 0.9515122419587134, "grad_norm": 0.3661322295665741, "learning_rate": 5e-05, "loss": 1.4448, "step": 5946 }, { "epoch": 0.9516722675628101, "grad_norm": 0.36334025859832764, "learning_rate": 5e-05, "loss": 1.4453, "step": 5947 }, { "epoch": 0.9518322931669067, "grad_norm": 0.3767883777618408, "learning_rate": 5e-05, "loss": 1.4544, "step": 5948 }, { "epoch": 0.9519923187710033, "grad_norm": 0.3805292546749115, "learning_rate": 5e-05, "loss": 1.5146, "step": 5949 }, { "epoch": 0.9521523443751, "grad_norm": 0.370939165353775, "learning_rate": 5e-05, "loss": 1.4804, "step": 5950 }, { "epoch": 0.9523123699791967, "grad_norm": 0.39317643642425537, "learning_rate": 5e-05, "loss": 1.4674, "step": 5951 }, { "epoch": 0.9524723955832933, "grad_norm": 0.3658585846424103, "learning_rate": 5e-05, "loss": 1.4233, "step": 5952 }, { "epoch": 0.95263242118739, "grad_norm": 0.3734930753707886, "learning_rate": 5e-05, "loss": 1.45, "step": 5953 }, { "epoch": 0.9527924467914867, "grad_norm": 0.3913246691226959, "learning_rate": 5e-05, "loss": 1.4023, "step": 5954 }, { "epoch": 0.9529524723955833, "grad_norm": 0.361234575510025, "learning_rate": 5e-05, "loss": 1.3883, "step": 5955 }, { "epoch": 0.9531124979996799, "grad_norm": 0.3704299330711365, "learning_rate": 5e-05, "loss": 1.4778, "step": 5956 }, { "epoch": 0.9532725236037766, "grad_norm": 0.3745312988758087, "learning_rate": 5e-05, "loss": 1.3934, "step": 5957 }, { "epoch": 0.9534325492078732, "grad_norm": 0.383017361164093, "learning_rate": 5e-05, "loss": 1.5005, "step": 5958 }, { "epoch": 0.9535925748119699, "grad_norm": 0.3625771999359131, "learning_rate": 5e-05, "loss": 1.3933, "step": 5959 }, { "epoch": 0.9537526004160666, "grad_norm": 0.3669659197330475, "learning_rate": 5e-05, "loss": 1.4241, "step": 5960 }, { "epoch": 0.9539126260201632, "grad_norm": 0.36424800753593445, "learning_rate": 5e-05, "loss": 1.2995, "step": 5961 }, { "epoch": 0.9540726516242599, "grad_norm": 0.37712591886520386, "learning_rate": 5e-05, "loss": 1.5073, "step": 5962 }, { "epoch": 0.9542326772283566, "grad_norm": 0.374967485666275, "learning_rate": 5e-05, "loss": 1.4701, "step": 5963 }, { "epoch": 0.9543927028324531, "grad_norm": 0.39402228593826294, "learning_rate": 5e-05, "loss": 1.3955, "step": 5964 }, { "epoch": 0.9545527284365498, "grad_norm": 0.3697720170021057, "learning_rate": 5e-05, "loss": 1.4416, "step": 5965 }, { "epoch": 0.9547127540406465, "grad_norm": 0.3780404329299927, "learning_rate": 5e-05, "loss": 1.4483, "step": 5966 }, { "epoch": 0.9548727796447432, "grad_norm": 0.3691938519477844, "learning_rate": 5e-05, "loss": 1.499, "step": 5967 }, { "epoch": 0.9550328052488398, "grad_norm": 0.37551331520080566, "learning_rate": 5e-05, "loss": 1.3791, "step": 5968 }, { "epoch": 0.9551928308529365, "grad_norm": 0.3575998842716217, "learning_rate": 5e-05, "loss": 1.3448, "step": 5969 }, { "epoch": 0.9553528564570332, "grad_norm": 0.37165993452072144, "learning_rate": 5e-05, "loss": 1.41, "step": 5970 }, { "epoch": 0.9555128820611298, "grad_norm": 0.3711983859539032, "learning_rate": 5e-05, "loss": 1.5109, "step": 5971 }, { "epoch": 0.9556729076652264, "grad_norm": 0.39688512682914734, "learning_rate": 5e-05, "loss": 1.4595, "step": 5972 }, { "epoch": 0.9558329332693231, "grad_norm": 0.37833601236343384, "learning_rate": 5e-05, "loss": 1.482, "step": 5973 }, { "epoch": 0.9559929588734197, "grad_norm": 0.3517429530620575, "learning_rate": 5e-05, "loss": 1.3776, "step": 5974 }, { "epoch": 0.9561529844775164, "grad_norm": 0.38530707359313965, "learning_rate": 5e-05, "loss": 1.4672, "step": 5975 }, { "epoch": 0.9563130100816131, "grad_norm": 0.38447463512420654, "learning_rate": 5e-05, "loss": 1.4971, "step": 5976 }, { "epoch": 0.9564730356857097, "grad_norm": 0.36633729934692383, "learning_rate": 5e-05, "loss": 1.4595, "step": 5977 }, { "epoch": 0.9566330612898064, "grad_norm": 0.3727111220359802, "learning_rate": 5e-05, "loss": 1.4488, "step": 5978 }, { "epoch": 0.9567930868939031, "grad_norm": 0.3871779441833496, "learning_rate": 5e-05, "loss": 1.4765, "step": 5979 }, { "epoch": 0.9569531124979996, "grad_norm": 0.39155200123786926, "learning_rate": 5e-05, "loss": 1.4472, "step": 5980 }, { "epoch": 0.9571131381020963, "grad_norm": 0.3773864507675171, "learning_rate": 5e-05, "loss": 1.4797, "step": 5981 }, { "epoch": 0.957273163706193, "grad_norm": 0.3677498400211334, "learning_rate": 5e-05, "loss": 1.409, "step": 5982 }, { "epoch": 0.9574331893102896, "grad_norm": 0.3972966969013214, "learning_rate": 5e-05, "loss": 1.4921, "step": 5983 }, { "epoch": 0.9575932149143863, "grad_norm": 0.3876812756061554, "learning_rate": 5e-05, "loss": 1.497, "step": 5984 }, { "epoch": 0.957753240518483, "grad_norm": 0.3750854730606079, "learning_rate": 5e-05, "loss": 1.4033, "step": 5985 }, { "epoch": 0.9579132661225797, "grad_norm": 0.3847387433052063, "learning_rate": 5e-05, "loss": 1.4512, "step": 5986 }, { "epoch": 0.9580732917266762, "grad_norm": 0.3702529966831207, "learning_rate": 5e-05, "loss": 1.4771, "step": 5987 }, { "epoch": 0.9582333173307729, "grad_norm": 0.3712124824523926, "learning_rate": 5e-05, "loss": 1.405, "step": 5988 }, { "epoch": 0.9583933429348696, "grad_norm": 0.3749273419380188, "learning_rate": 5e-05, "loss": 1.412, "step": 5989 }, { "epoch": 0.9585533685389662, "grad_norm": 0.3729630708694458, "learning_rate": 5e-05, "loss": 1.4147, "step": 5990 }, { "epoch": 0.9587133941430629, "grad_norm": 0.4092143774032593, "learning_rate": 5e-05, "loss": 1.572, "step": 5991 }, { "epoch": 0.9588734197471596, "grad_norm": 0.38331276178359985, "learning_rate": 5e-05, "loss": 1.4529, "step": 5992 }, { "epoch": 0.9590334453512562, "grad_norm": 0.36233285069465637, "learning_rate": 5e-05, "loss": 1.3906, "step": 5993 }, { "epoch": 0.9591934709553529, "grad_norm": 0.3710869550704956, "learning_rate": 5e-05, "loss": 1.478, "step": 5994 }, { "epoch": 0.9593534965594495, "grad_norm": 0.3693365454673767, "learning_rate": 5e-05, "loss": 1.3749, "step": 5995 }, { "epoch": 0.9595135221635461, "grad_norm": 0.3726164996623993, "learning_rate": 5e-05, "loss": 1.4168, "step": 5996 }, { "epoch": 0.9596735477676428, "grad_norm": 0.3687114715576172, "learning_rate": 5e-05, "loss": 1.4524, "step": 5997 }, { "epoch": 0.9598335733717395, "grad_norm": 0.3816275894641876, "learning_rate": 5e-05, "loss": 1.4548, "step": 5998 }, { "epoch": 0.9599935989758361, "grad_norm": 0.36814549565315247, "learning_rate": 5e-05, "loss": 1.4246, "step": 5999 }, { "epoch": 0.9601536245799328, "grad_norm": 0.37464386224746704, "learning_rate": 5e-05, "loss": 1.4368, "step": 6000 }, { "epoch": 0.9603136501840295, "grad_norm": 0.38542068004608154, "learning_rate": 5e-05, "loss": 1.456, "step": 6001 }, { "epoch": 0.9604736757881261, "grad_norm": 0.39198926091194153, "learning_rate": 5e-05, "loss": 1.5084, "step": 6002 }, { "epoch": 0.9606337013922227, "grad_norm": 0.36808085441589355, "learning_rate": 5e-05, "loss": 1.39, "step": 6003 }, { "epoch": 0.9607937269963194, "grad_norm": 0.4074413776397705, "learning_rate": 5e-05, "loss": 1.507, "step": 6004 }, { "epoch": 0.960953752600416, "grad_norm": 0.38685309886932373, "learning_rate": 5e-05, "loss": 1.4737, "step": 6005 }, { "epoch": 0.9611137782045127, "grad_norm": 0.377895712852478, "learning_rate": 5e-05, "loss": 1.5098, "step": 6006 }, { "epoch": 0.9612738038086094, "grad_norm": 0.38168200850486755, "learning_rate": 5e-05, "loss": 1.4465, "step": 6007 }, { "epoch": 0.961433829412706, "grad_norm": 0.369026243686676, "learning_rate": 5e-05, "loss": 1.3896, "step": 6008 }, { "epoch": 0.9615938550168027, "grad_norm": 0.3752148747444153, "learning_rate": 5e-05, "loss": 1.4881, "step": 6009 }, { "epoch": 0.9617538806208994, "grad_norm": 0.3540462851524353, "learning_rate": 5e-05, "loss": 1.3564, "step": 6010 }, { "epoch": 0.961913906224996, "grad_norm": 0.3752222955226898, "learning_rate": 5e-05, "loss": 1.4846, "step": 6011 }, { "epoch": 0.9620739318290926, "grad_norm": 0.3663375675678253, "learning_rate": 5e-05, "loss": 1.448, "step": 6012 }, { "epoch": 0.9622339574331893, "grad_norm": 0.36980071663856506, "learning_rate": 5e-05, "loss": 1.4337, "step": 6013 }, { "epoch": 0.962393983037286, "grad_norm": 0.37141913175582886, "learning_rate": 5e-05, "loss": 1.3444, "step": 6014 }, { "epoch": 0.9625540086413826, "grad_norm": 0.37274184823036194, "learning_rate": 5e-05, "loss": 1.4845, "step": 6015 }, { "epoch": 0.9627140342454793, "grad_norm": 0.36425119638442993, "learning_rate": 5e-05, "loss": 1.3822, "step": 6016 }, { "epoch": 0.962874059849576, "grad_norm": 0.37859275937080383, "learning_rate": 5e-05, "loss": 1.4289, "step": 6017 }, { "epoch": 0.9630340854536726, "grad_norm": 0.3806598484516144, "learning_rate": 5e-05, "loss": 1.4584, "step": 6018 }, { "epoch": 0.9631941110577692, "grad_norm": 0.3581353724002838, "learning_rate": 5e-05, "loss": 1.3805, "step": 6019 }, { "epoch": 0.9633541366618659, "grad_norm": 0.38292187452316284, "learning_rate": 5e-05, "loss": 1.4524, "step": 6020 }, { "epoch": 0.9635141622659625, "grad_norm": 0.36553576588630676, "learning_rate": 5e-05, "loss": 1.4221, "step": 6021 }, { "epoch": 0.9636741878700592, "grad_norm": 0.36594444513320923, "learning_rate": 5e-05, "loss": 1.4104, "step": 6022 }, { "epoch": 0.9638342134741559, "grad_norm": 0.3697940409183502, "learning_rate": 5e-05, "loss": 1.4225, "step": 6023 }, { "epoch": 0.9639942390782525, "grad_norm": 0.3696502447128296, "learning_rate": 5e-05, "loss": 1.3289, "step": 6024 }, { "epoch": 0.9641542646823492, "grad_norm": 0.3744780421257019, "learning_rate": 5e-05, "loss": 1.4314, "step": 6025 }, { "epoch": 0.9643142902864459, "grad_norm": 0.38751283288002014, "learning_rate": 5e-05, "loss": 1.4078, "step": 6026 }, { "epoch": 0.9644743158905424, "grad_norm": 0.367973268032074, "learning_rate": 5e-05, "loss": 1.442, "step": 6027 }, { "epoch": 0.9646343414946391, "grad_norm": 0.37741366028785706, "learning_rate": 5e-05, "loss": 1.443, "step": 6028 }, { "epoch": 0.9647943670987358, "grad_norm": 0.3660295307636261, "learning_rate": 5e-05, "loss": 1.449, "step": 6029 }, { "epoch": 0.9649543927028325, "grad_norm": 0.39451226592063904, "learning_rate": 5e-05, "loss": 1.5338, "step": 6030 }, { "epoch": 0.9651144183069291, "grad_norm": 0.39010879397392273, "learning_rate": 5e-05, "loss": 1.4404, "step": 6031 }, { "epoch": 0.9652744439110258, "grad_norm": 0.365651935338974, "learning_rate": 5e-05, "loss": 1.3707, "step": 6032 }, { "epoch": 0.9654344695151225, "grad_norm": 0.37118086218833923, "learning_rate": 5e-05, "loss": 1.4112, "step": 6033 }, { "epoch": 0.965594495119219, "grad_norm": 0.3615556061267853, "learning_rate": 5e-05, "loss": 1.419, "step": 6034 }, { "epoch": 0.9657545207233157, "grad_norm": 0.37290486693382263, "learning_rate": 5e-05, "loss": 1.4096, "step": 6035 }, { "epoch": 0.9659145463274124, "grad_norm": 0.3736831247806549, "learning_rate": 5e-05, "loss": 1.4133, "step": 6036 }, { "epoch": 0.966074571931509, "grad_norm": 0.37524813413619995, "learning_rate": 5e-05, "loss": 1.5064, "step": 6037 }, { "epoch": 0.9662345975356057, "grad_norm": 0.3680674433708191, "learning_rate": 5e-05, "loss": 1.3552, "step": 6038 }, { "epoch": 0.9663946231397024, "grad_norm": 0.3650568425655365, "learning_rate": 5e-05, "loss": 1.4181, "step": 6039 }, { "epoch": 0.966554648743799, "grad_norm": 0.3702407777309418, "learning_rate": 5e-05, "loss": 1.4277, "step": 6040 }, { "epoch": 0.9667146743478957, "grad_norm": 0.36972999572753906, "learning_rate": 5e-05, "loss": 1.449, "step": 6041 }, { "epoch": 0.9668746999519923, "grad_norm": 0.37173527479171753, "learning_rate": 5e-05, "loss": 1.4621, "step": 6042 }, { "epoch": 0.9670347255560889, "grad_norm": 0.3855796456336975, "learning_rate": 5e-05, "loss": 1.4472, "step": 6043 }, { "epoch": 0.9671947511601856, "grad_norm": 0.38006851077079773, "learning_rate": 5e-05, "loss": 1.5317, "step": 6044 }, { "epoch": 0.9673547767642823, "grad_norm": 0.39085331559181213, "learning_rate": 5e-05, "loss": 1.4463, "step": 6045 }, { "epoch": 0.967514802368379, "grad_norm": 0.382097065448761, "learning_rate": 5e-05, "loss": 1.3838, "step": 6046 }, { "epoch": 0.9676748279724756, "grad_norm": 0.38162729144096375, "learning_rate": 5e-05, "loss": 1.4194, "step": 6047 }, { "epoch": 0.9678348535765723, "grad_norm": 0.38507184386253357, "learning_rate": 5e-05, "loss": 1.4527, "step": 6048 }, { "epoch": 0.967994879180669, "grad_norm": 0.3809961974620819, "learning_rate": 5e-05, "loss": 1.3835, "step": 6049 }, { "epoch": 0.9681549047847655, "grad_norm": 0.3912103474140167, "learning_rate": 5e-05, "loss": 1.4876, "step": 6050 }, { "epoch": 0.9683149303888622, "grad_norm": 0.37902137637138367, "learning_rate": 5e-05, "loss": 1.4624, "step": 6051 }, { "epoch": 0.9684749559929589, "grad_norm": 0.35793888568878174, "learning_rate": 5e-05, "loss": 1.4023, "step": 6052 }, { "epoch": 0.9686349815970555, "grad_norm": 0.39576613903045654, "learning_rate": 5e-05, "loss": 1.5466, "step": 6053 }, { "epoch": 0.9687950072011522, "grad_norm": 0.3839621841907501, "learning_rate": 5e-05, "loss": 1.3875, "step": 6054 }, { "epoch": 0.9689550328052489, "grad_norm": 0.37495100498199463, "learning_rate": 5e-05, "loss": 1.3539, "step": 6055 }, { "epoch": 0.9691150584093455, "grad_norm": 0.3644159436225891, "learning_rate": 5e-05, "loss": 1.4286, "step": 6056 }, { "epoch": 0.9692750840134422, "grad_norm": 0.41657498478889465, "learning_rate": 5e-05, "loss": 1.4101, "step": 6057 }, { "epoch": 0.9694351096175388, "grad_norm": 0.3680585026741028, "learning_rate": 5e-05, "loss": 1.4291, "step": 6058 }, { "epoch": 0.9695951352216354, "grad_norm": 0.38173478841781616, "learning_rate": 5e-05, "loss": 1.4625, "step": 6059 }, { "epoch": 0.9697551608257321, "grad_norm": 0.38632911443710327, "learning_rate": 5e-05, "loss": 1.4607, "step": 6060 }, { "epoch": 0.9699151864298288, "grad_norm": 0.3912810683250427, "learning_rate": 5e-05, "loss": 1.4625, "step": 6061 }, { "epoch": 0.9700752120339254, "grad_norm": 0.3887604773044586, "learning_rate": 5e-05, "loss": 1.4839, "step": 6062 }, { "epoch": 0.9702352376380221, "grad_norm": 0.3771521747112274, "learning_rate": 5e-05, "loss": 1.44, "step": 6063 }, { "epoch": 0.9703952632421188, "grad_norm": 0.3783925473690033, "learning_rate": 5e-05, "loss": 1.4751, "step": 6064 }, { "epoch": 0.9705552888462154, "grad_norm": 0.3732749819755554, "learning_rate": 5e-05, "loss": 1.4916, "step": 6065 }, { "epoch": 0.970715314450312, "grad_norm": 0.3951418101787567, "learning_rate": 5e-05, "loss": 1.5075, "step": 6066 }, { "epoch": 0.9708753400544087, "grad_norm": 0.36194753646850586, "learning_rate": 5e-05, "loss": 1.4531, "step": 6067 }, { "epoch": 0.9710353656585053, "grad_norm": 0.36083436012268066, "learning_rate": 5e-05, "loss": 1.4083, "step": 6068 }, { "epoch": 0.971195391262602, "grad_norm": 0.3778192400932312, "learning_rate": 5e-05, "loss": 1.4675, "step": 6069 }, { "epoch": 0.9713554168666987, "grad_norm": 0.36249589920043945, "learning_rate": 5e-05, "loss": 1.4245, "step": 6070 }, { "epoch": 0.9715154424707954, "grad_norm": 0.36254990100860596, "learning_rate": 5e-05, "loss": 1.3795, "step": 6071 }, { "epoch": 0.971675468074892, "grad_norm": 0.3845757842063904, "learning_rate": 5e-05, "loss": 1.4559, "step": 6072 }, { "epoch": 0.9718354936789887, "grad_norm": 0.372761070728302, "learning_rate": 5e-05, "loss": 1.4194, "step": 6073 }, { "epoch": 0.9719955192830853, "grad_norm": 0.37680602073669434, "learning_rate": 5e-05, "loss": 1.3517, "step": 6074 }, { "epoch": 0.9721555448871819, "grad_norm": 0.36337199807167053, "learning_rate": 5e-05, "loss": 1.3647, "step": 6075 }, { "epoch": 0.9723155704912786, "grad_norm": 0.36623451113700867, "learning_rate": 5e-05, "loss": 1.4545, "step": 6076 }, { "epoch": 0.9724755960953753, "grad_norm": 0.3681119382381439, "learning_rate": 5e-05, "loss": 1.4088, "step": 6077 }, { "epoch": 0.9726356216994719, "grad_norm": 0.3927222192287445, "learning_rate": 5e-05, "loss": 1.5256, "step": 6078 }, { "epoch": 0.9727956473035686, "grad_norm": 0.3802756071090698, "learning_rate": 5e-05, "loss": 1.4519, "step": 6079 }, { "epoch": 0.9729556729076653, "grad_norm": 0.3682069480419159, "learning_rate": 5e-05, "loss": 1.4532, "step": 6080 }, { "epoch": 0.9731156985117618, "grad_norm": 0.37601780891418457, "learning_rate": 5e-05, "loss": 1.4334, "step": 6081 }, { "epoch": 0.9732757241158585, "grad_norm": 0.3758496940135956, "learning_rate": 5e-05, "loss": 1.4601, "step": 6082 }, { "epoch": 0.9734357497199552, "grad_norm": 0.36144721508026123, "learning_rate": 5e-05, "loss": 1.412, "step": 6083 }, { "epoch": 0.9735957753240518, "grad_norm": 0.3657606840133667, "learning_rate": 5e-05, "loss": 1.4123, "step": 6084 }, { "epoch": 0.9737558009281485, "grad_norm": 0.3735799789428711, "learning_rate": 5e-05, "loss": 1.4373, "step": 6085 }, { "epoch": 0.9739158265322452, "grad_norm": 0.37441742420196533, "learning_rate": 5e-05, "loss": 1.4404, "step": 6086 }, { "epoch": 0.9740758521363418, "grad_norm": 0.36527395248413086, "learning_rate": 5e-05, "loss": 1.4235, "step": 6087 }, { "epoch": 0.9742358777404385, "grad_norm": 0.3777921795845032, "learning_rate": 5e-05, "loss": 1.4726, "step": 6088 }, { "epoch": 0.9743959033445351, "grad_norm": 0.3883718252182007, "learning_rate": 5e-05, "loss": 1.4335, "step": 6089 }, { "epoch": 0.9745559289486317, "grad_norm": 0.37389063835144043, "learning_rate": 5e-05, "loss": 1.4104, "step": 6090 }, { "epoch": 0.9747159545527284, "grad_norm": 0.39461550116539, "learning_rate": 5e-05, "loss": 1.4474, "step": 6091 }, { "epoch": 0.9748759801568251, "grad_norm": 0.3842222988605499, "learning_rate": 5e-05, "loss": 1.5079, "step": 6092 }, { "epoch": 0.9750360057609218, "grad_norm": 0.37743908166885376, "learning_rate": 5e-05, "loss": 1.4324, "step": 6093 }, { "epoch": 0.9751960313650184, "grad_norm": 0.38553908467292786, "learning_rate": 5e-05, "loss": 1.4965, "step": 6094 }, { "epoch": 0.9753560569691151, "grad_norm": 0.373360276222229, "learning_rate": 5e-05, "loss": 1.4193, "step": 6095 }, { "epoch": 0.9755160825732118, "grad_norm": 0.37242865562438965, "learning_rate": 5e-05, "loss": 1.4591, "step": 6096 }, { "epoch": 0.9756761081773083, "grad_norm": 0.38269075751304626, "learning_rate": 5e-05, "loss": 1.4522, "step": 6097 }, { "epoch": 0.975836133781405, "grad_norm": 0.37412410974502563, "learning_rate": 5e-05, "loss": 1.3966, "step": 6098 }, { "epoch": 0.9759961593855017, "grad_norm": 0.3825237452983856, "learning_rate": 5e-05, "loss": 1.5149, "step": 6099 }, { "epoch": 0.9761561849895983, "grad_norm": 0.3895252048969269, "learning_rate": 5e-05, "loss": 1.4224, "step": 6100 }, { "epoch": 0.976316210593695, "grad_norm": 0.38487476110458374, "learning_rate": 5e-05, "loss": 1.5326, "step": 6101 }, { "epoch": 0.9764762361977917, "grad_norm": 0.3818266987800598, "learning_rate": 5e-05, "loss": 1.4766, "step": 6102 }, { "epoch": 0.9766362618018883, "grad_norm": 0.38054758310317993, "learning_rate": 5e-05, "loss": 1.4402, "step": 6103 }, { "epoch": 0.976796287405985, "grad_norm": 0.3736788332462311, "learning_rate": 5e-05, "loss": 1.3947, "step": 6104 }, { "epoch": 0.9769563130100816, "grad_norm": 0.3678866922855377, "learning_rate": 5e-05, "loss": 1.3985, "step": 6105 }, { "epoch": 0.9771163386141782, "grad_norm": 0.37039443850517273, "learning_rate": 5e-05, "loss": 1.4199, "step": 6106 }, { "epoch": 0.9772763642182749, "grad_norm": 0.3769499957561493, "learning_rate": 5e-05, "loss": 1.4397, "step": 6107 }, { "epoch": 0.9774363898223716, "grad_norm": 0.3921883702278137, "learning_rate": 5e-05, "loss": 1.525, "step": 6108 }, { "epoch": 0.9775964154264682, "grad_norm": 0.40684735774993896, "learning_rate": 5e-05, "loss": 1.3501, "step": 6109 }, { "epoch": 0.9777564410305649, "grad_norm": 0.3835674226284027, "learning_rate": 5e-05, "loss": 1.476, "step": 6110 }, { "epoch": 0.9779164666346616, "grad_norm": 0.37056592106819153, "learning_rate": 5e-05, "loss": 1.4859, "step": 6111 }, { "epoch": 0.9780764922387583, "grad_norm": 0.3809880018234253, "learning_rate": 5e-05, "loss": 1.4421, "step": 6112 }, { "epoch": 0.9782365178428548, "grad_norm": 0.38007286190986633, "learning_rate": 5e-05, "loss": 1.4183, "step": 6113 }, { "epoch": 0.9783965434469515, "grad_norm": 0.37382301688194275, "learning_rate": 5e-05, "loss": 1.3995, "step": 6114 }, { "epoch": 0.9785565690510482, "grad_norm": 0.38647201657295227, "learning_rate": 5e-05, "loss": 1.46, "step": 6115 }, { "epoch": 0.9787165946551448, "grad_norm": 0.36921343207359314, "learning_rate": 5e-05, "loss": 1.4506, "step": 6116 }, { "epoch": 0.9788766202592415, "grad_norm": 0.3740736246109009, "learning_rate": 5e-05, "loss": 1.4581, "step": 6117 }, { "epoch": 0.9790366458633382, "grad_norm": 0.3701004683971405, "learning_rate": 5e-05, "loss": 1.3863, "step": 6118 }, { "epoch": 0.9791966714674348, "grad_norm": 0.38133350014686584, "learning_rate": 5e-05, "loss": 1.3869, "step": 6119 }, { "epoch": 0.9793566970715314, "grad_norm": 0.39086270332336426, "learning_rate": 5e-05, "loss": 1.4859, "step": 6120 }, { "epoch": 0.9795167226756281, "grad_norm": 0.3792276680469513, "learning_rate": 5e-05, "loss": 1.3999, "step": 6121 }, { "epoch": 0.9796767482797247, "grad_norm": 0.37423795461654663, "learning_rate": 5e-05, "loss": 1.4362, "step": 6122 }, { "epoch": 0.9798367738838214, "grad_norm": 0.36539238691329956, "learning_rate": 5e-05, "loss": 1.3991, "step": 6123 }, { "epoch": 0.9799967994879181, "grad_norm": 0.3957475423812866, "learning_rate": 5e-05, "loss": 1.4512, "step": 6124 }, { "epoch": 0.9801568250920147, "grad_norm": 0.37266337871551514, "learning_rate": 5e-05, "loss": 1.4416, "step": 6125 }, { "epoch": 0.9803168506961114, "grad_norm": 0.3613101840019226, "learning_rate": 5e-05, "loss": 1.4455, "step": 6126 }, { "epoch": 0.9804768763002081, "grad_norm": 0.37971529364585876, "learning_rate": 5e-05, "loss": 1.4463, "step": 6127 }, { "epoch": 0.9806369019043046, "grad_norm": 0.3784366548061371, "learning_rate": 5e-05, "loss": 1.496, "step": 6128 }, { "epoch": 0.9807969275084013, "grad_norm": 0.3801773190498352, "learning_rate": 5e-05, "loss": 1.4375, "step": 6129 }, { "epoch": 0.980956953112498, "grad_norm": 0.38145020604133606, "learning_rate": 5e-05, "loss": 1.4474, "step": 6130 }, { "epoch": 0.9811169787165946, "grad_norm": 0.38151973485946655, "learning_rate": 5e-05, "loss": 1.4104, "step": 6131 }, { "epoch": 0.9812770043206913, "grad_norm": 0.3814515769481659, "learning_rate": 5e-05, "loss": 1.5039, "step": 6132 }, { "epoch": 0.981437029924788, "grad_norm": 0.3548029363155365, "learning_rate": 5e-05, "loss": 1.3765, "step": 6133 }, { "epoch": 0.9815970555288847, "grad_norm": 0.3764757215976715, "learning_rate": 5e-05, "loss": 1.3564, "step": 6134 }, { "epoch": 0.9817570811329813, "grad_norm": 0.3816234767436981, "learning_rate": 5e-05, "loss": 1.4545, "step": 6135 }, { "epoch": 0.9819171067370779, "grad_norm": 0.3728843629360199, "learning_rate": 5e-05, "loss": 1.4134, "step": 6136 }, { "epoch": 0.9820771323411746, "grad_norm": 0.38683855533599854, "learning_rate": 5e-05, "loss": 1.4471, "step": 6137 }, { "epoch": 0.9822371579452712, "grad_norm": 0.39511850476264954, "learning_rate": 5e-05, "loss": 1.4511, "step": 6138 }, { "epoch": 0.9823971835493679, "grad_norm": 0.3833600878715515, "learning_rate": 5e-05, "loss": 1.4342, "step": 6139 }, { "epoch": 0.9825572091534646, "grad_norm": 0.38028061389923096, "learning_rate": 5e-05, "loss": 1.4212, "step": 6140 }, { "epoch": 0.9827172347575612, "grad_norm": 0.3854234516620636, "learning_rate": 5e-05, "loss": 1.4494, "step": 6141 }, { "epoch": 0.9828772603616579, "grad_norm": 0.3866237998008728, "learning_rate": 5e-05, "loss": 1.5037, "step": 6142 }, { "epoch": 0.9830372859657546, "grad_norm": 0.387195885181427, "learning_rate": 5e-05, "loss": 1.4144, "step": 6143 }, { "epoch": 0.9831973115698511, "grad_norm": 0.37367796897888184, "learning_rate": 5e-05, "loss": 1.3247, "step": 6144 }, { "epoch": 0.9833573371739478, "grad_norm": 0.37144237756729126, "learning_rate": 5e-05, "loss": 1.4167, "step": 6145 }, { "epoch": 0.9835173627780445, "grad_norm": 0.3718883693218231, "learning_rate": 5e-05, "loss": 1.434, "step": 6146 }, { "epoch": 0.9836773883821411, "grad_norm": 0.4019818902015686, "learning_rate": 5e-05, "loss": 1.4308, "step": 6147 }, { "epoch": 0.9838374139862378, "grad_norm": 0.36922696232795715, "learning_rate": 5e-05, "loss": 1.3839, "step": 6148 }, { "epoch": 0.9839974395903345, "grad_norm": 0.36813119053840637, "learning_rate": 5e-05, "loss": 1.4687, "step": 6149 }, { "epoch": 0.9841574651944311, "grad_norm": 0.37708309292793274, "learning_rate": 5e-05, "loss": 1.4979, "step": 6150 }, { "epoch": 0.9843174907985278, "grad_norm": 0.3675183653831482, "learning_rate": 5e-05, "loss": 1.4603, "step": 6151 }, { "epoch": 0.9844775164026244, "grad_norm": 0.3738596439361572, "learning_rate": 5e-05, "loss": 1.4396, "step": 6152 }, { "epoch": 0.984637542006721, "grad_norm": 0.38359224796295166, "learning_rate": 5e-05, "loss": 1.4284, "step": 6153 }, { "epoch": 0.9847975676108177, "grad_norm": 0.37225624918937683, "learning_rate": 5e-05, "loss": 1.4087, "step": 6154 }, { "epoch": 0.9849575932149144, "grad_norm": 0.3870382308959961, "learning_rate": 5e-05, "loss": 1.4235, "step": 6155 }, { "epoch": 0.985117618819011, "grad_norm": 0.3770749568939209, "learning_rate": 5e-05, "loss": 1.4025, "step": 6156 }, { "epoch": 0.9852776444231077, "grad_norm": 0.38449618220329285, "learning_rate": 5e-05, "loss": 1.429, "step": 6157 }, { "epoch": 0.9854376700272044, "grad_norm": 0.37109723687171936, "learning_rate": 5e-05, "loss": 1.3932, "step": 6158 }, { "epoch": 0.9855976956313011, "grad_norm": 0.38466671109199524, "learning_rate": 5e-05, "loss": 1.435, "step": 6159 }, { "epoch": 0.9857577212353976, "grad_norm": 0.36050736904144287, "learning_rate": 5e-05, "loss": 1.4098, "step": 6160 }, { "epoch": 0.9859177468394943, "grad_norm": 0.37629011273384094, "learning_rate": 5e-05, "loss": 1.4531, "step": 6161 }, { "epoch": 0.986077772443591, "grad_norm": 0.36327648162841797, "learning_rate": 5e-05, "loss": 1.4219, "step": 6162 }, { "epoch": 0.9862377980476876, "grad_norm": 0.3718591034412384, "learning_rate": 5e-05, "loss": 1.4198, "step": 6163 }, { "epoch": 0.9863978236517843, "grad_norm": 0.3705459535121918, "learning_rate": 5e-05, "loss": 1.3733, "step": 6164 }, { "epoch": 0.986557849255881, "grad_norm": 0.3764915466308594, "learning_rate": 5e-05, "loss": 1.4785, "step": 6165 }, { "epoch": 0.9867178748599776, "grad_norm": 0.3874961733818054, "learning_rate": 5e-05, "loss": 1.442, "step": 6166 }, { "epoch": 0.9868779004640742, "grad_norm": 0.37059906125068665, "learning_rate": 5e-05, "loss": 1.4236, "step": 6167 }, { "epoch": 0.9870379260681709, "grad_norm": 0.39059051871299744, "learning_rate": 5e-05, "loss": 1.3974, "step": 6168 }, { "epoch": 0.9871979516722675, "grad_norm": 0.3730117678642273, "learning_rate": 5e-05, "loss": 1.4386, "step": 6169 }, { "epoch": 0.9873579772763642, "grad_norm": 0.3907149136066437, "learning_rate": 5e-05, "loss": 1.472, "step": 6170 }, { "epoch": 0.9875180028804609, "grad_norm": 0.3788815438747406, "learning_rate": 5e-05, "loss": 1.4686, "step": 6171 }, { "epoch": 0.9876780284845575, "grad_norm": 0.37850454449653625, "learning_rate": 5e-05, "loss": 1.461, "step": 6172 }, { "epoch": 0.9878380540886542, "grad_norm": 0.3765132427215576, "learning_rate": 5e-05, "loss": 1.4278, "step": 6173 }, { "epoch": 0.9879980796927509, "grad_norm": 0.38723552227020264, "learning_rate": 5e-05, "loss": 1.4981, "step": 6174 }, { "epoch": 0.9881581052968474, "grad_norm": 0.39129868149757385, "learning_rate": 5e-05, "loss": 1.4942, "step": 6175 }, { "epoch": 0.9883181309009441, "grad_norm": 0.38095274567604065, "learning_rate": 5e-05, "loss": 1.4938, "step": 6176 }, { "epoch": 0.9884781565050408, "grad_norm": 0.37590572237968445, "learning_rate": 5e-05, "loss": 1.4414, "step": 6177 }, { "epoch": 0.9886381821091375, "grad_norm": 0.35846808552742004, "learning_rate": 5e-05, "loss": 1.387, "step": 6178 }, { "epoch": 0.9887982077132341, "grad_norm": 0.38178882002830505, "learning_rate": 5e-05, "loss": 1.4791, "step": 6179 }, { "epoch": 0.9889582333173308, "grad_norm": 0.36234167218208313, "learning_rate": 5e-05, "loss": 1.4262, "step": 6180 }, { "epoch": 0.9891182589214275, "grad_norm": 0.36625659465789795, "learning_rate": 5e-05, "loss": 1.379, "step": 6181 }, { "epoch": 0.9892782845255241, "grad_norm": 0.3763173222541809, "learning_rate": 5e-05, "loss": 1.4373, "step": 6182 }, { "epoch": 0.9894383101296207, "grad_norm": 0.3814091682434082, "learning_rate": 5e-05, "loss": 1.431, "step": 6183 }, { "epoch": 0.9895983357337174, "grad_norm": 0.3686288893222809, "learning_rate": 5e-05, "loss": 1.3946, "step": 6184 }, { "epoch": 0.989758361337814, "grad_norm": 0.36460641026496887, "learning_rate": 5e-05, "loss": 1.3034, "step": 6185 }, { "epoch": 0.9899183869419107, "grad_norm": 0.38847073912620544, "learning_rate": 5e-05, "loss": 1.3957, "step": 6186 }, { "epoch": 0.9900784125460074, "grad_norm": 0.37717345356941223, "learning_rate": 5e-05, "loss": 1.4098, "step": 6187 }, { "epoch": 0.990238438150104, "grad_norm": 0.3746647536754608, "learning_rate": 5e-05, "loss": 1.5069, "step": 6188 }, { "epoch": 0.9903984637542007, "grad_norm": 0.3821280598640442, "learning_rate": 5e-05, "loss": 1.3909, "step": 6189 }, { "epoch": 0.9905584893582974, "grad_norm": 0.37438106536865234, "learning_rate": 5e-05, "loss": 1.4091, "step": 6190 }, { "epoch": 0.9907185149623939, "grad_norm": 0.3775680363178253, "learning_rate": 5e-05, "loss": 1.4414, "step": 6191 }, { "epoch": 0.9908785405664906, "grad_norm": 0.38122522830963135, "learning_rate": 5e-05, "loss": 1.4052, "step": 6192 }, { "epoch": 0.9910385661705873, "grad_norm": 0.3957843780517578, "learning_rate": 5e-05, "loss": 1.4858, "step": 6193 }, { "epoch": 0.991198591774684, "grad_norm": 0.38548576831817627, "learning_rate": 5e-05, "loss": 1.4584, "step": 6194 }, { "epoch": 0.9913586173787806, "grad_norm": 0.3851167857646942, "learning_rate": 5e-05, "loss": 1.4827, "step": 6195 }, { "epoch": 0.9915186429828773, "grad_norm": 0.3777800500392914, "learning_rate": 5e-05, "loss": 1.4943, "step": 6196 }, { "epoch": 0.991678668586974, "grad_norm": 0.36973318457603455, "learning_rate": 5e-05, "loss": 1.4722, "step": 6197 }, { "epoch": 0.9918386941910706, "grad_norm": 0.3921651840209961, "learning_rate": 5e-05, "loss": 1.5072, "step": 6198 }, { "epoch": 0.9919987197951672, "grad_norm": 0.3687317669391632, "learning_rate": 5e-05, "loss": 1.4507, "step": 6199 }, { "epoch": 0.9921587453992639, "grad_norm": 0.36998608708381653, "learning_rate": 5e-05, "loss": 1.4193, "step": 6200 }, { "epoch": 0.9923187710033605, "grad_norm": 0.38468584418296814, "learning_rate": 5e-05, "loss": 1.4891, "step": 6201 }, { "epoch": 0.9924787966074572, "grad_norm": 0.3681560158729553, "learning_rate": 5e-05, "loss": 1.4372, "step": 6202 }, { "epoch": 0.9926388222115539, "grad_norm": 0.3657340407371521, "learning_rate": 5e-05, "loss": 1.3439, "step": 6203 }, { "epoch": 0.9927988478156505, "grad_norm": 0.36418864130973816, "learning_rate": 5e-05, "loss": 1.4321, "step": 6204 }, { "epoch": 0.9929588734197472, "grad_norm": 0.378925085067749, "learning_rate": 5e-05, "loss": 1.514, "step": 6205 }, { "epoch": 0.9931188990238438, "grad_norm": 0.3869413137435913, "learning_rate": 5e-05, "loss": 1.4659, "step": 6206 }, { "epoch": 0.9932789246279404, "grad_norm": 0.37501460313796997, "learning_rate": 5e-05, "loss": 1.473, "step": 6207 }, { "epoch": 0.9934389502320371, "grad_norm": 0.36853206157684326, "learning_rate": 5e-05, "loss": 1.4791, "step": 6208 }, { "epoch": 0.9935989758361338, "grad_norm": 0.3566480875015259, "learning_rate": 5e-05, "loss": 1.444, "step": 6209 }, { "epoch": 0.9937590014402304, "grad_norm": 0.3703502118587494, "learning_rate": 5e-05, "loss": 1.3565, "step": 6210 }, { "epoch": 0.9939190270443271, "grad_norm": 0.3924415707588196, "learning_rate": 5e-05, "loss": 1.4313, "step": 6211 }, { "epoch": 0.9940790526484238, "grad_norm": 0.38078469038009644, "learning_rate": 5e-05, "loss": 1.4269, "step": 6212 }, { "epoch": 0.9942390782525204, "grad_norm": 0.3639003038406372, "learning_rate": 5e-05, "loss": 1.4292, "step": 6213 }, { "epoch": 0.994399103856617, "grad_norm": 0.3721363842487335, "learning_rate": 5e-05, "loss": 1.4049, "step": 6214 }, { "epoch": 0.9945591294607137, "grad_norm": 0.368726521730423, "learning_rate": 5e-05, "loss": 1.4205, "step": 6215 }, { "epoch": 0.9947191550648103, "grad_norm": 0.3573141396045685, "learning_rate": 5e-05, "loss": 1.3771, "step": 6216 }, { "epoch": 0.994879180668907, "grad_norm": 0.38954958319664, "learning_rate": 5e-05, "loss": 1.4095, "step": 6217 }, { "epoch": 0.9950392062730037, "grad_norm": 0.3865293860435486, "learning_rate": 5e-05, "loss": 1.4251, "step": 6218 }, { "epoch": 0.9951992318771004, "grad_norm": 0.37363579869270325, "learning_rate": 5e-05, "loss": 1.4175, "step": 6219 }, { "epoch": 0.995359257481197, "grad_norm": 0.3840008080005646, "learning_rate": 5e-05, "loss": 1.4383, "step": 6220 }, { "epoch": 0.9955192830852937, "grad_norm": 0.3852651119232178, "learning_rate": 5e-05, "loss": 1.4143, "step": 6221 }, { "epoch": 0.9956793086893903, "grad_norm": 0.3795446455478668, "learning_rate": 5e-05, "loss": 1.4061, "step": 6222 }, { "epoch": 0.9958393342934869, "grad_norm": 0.3958345949649811, "learning_rate": 5e-05, "loss": 1.4804, "step": 6223 }, { "epoch": 0.9959993598975836, "grad_norm": 0.36895808577537537, "learning_rate": 5e-05, "loss": 1.3903, "step": 6224 }, { "epoch": 0.9961593855016803, "grad_norm": 0.366411030292511, "learning_rate": 5e-05, "loss": 1.4409, "step": 6225 }, { "epoch": 0.9963194111057769, "grad_norm": 0.38369104266166687, "learning_rate": 5e-05, "loss": 1.4449, "step": 6226 }, { "epoch": 0.9964794367098736, "grad_norm": 0.3787238895893097, "learning_rate": 5e-05, "loss": 1.4955, "step": 6227 }, { "epoch": 0.9966394623139703, "grad_norm": 0.38204413652420044, "learning_rate": 5e-05, "loss": 1.4669, "step": 6228 }, { "epoch": 0.9967994879180669, "grad_norm": 0.3689804673194885, "learning_rate": 5e-05, "loss": 1.4135, "step": 6229 }, { "epoch": 0.9969595135221635, "grad_norm": 0.3910772502422333, "learning_rate": 5e-05, "loss": 1.4229, "step": 6230 }, { "epoch": 0.9971195391262602, "grad_norm": 0.38142111897468567, "learning_rate": 5e-05, "loss": 1.4197, "step": 6231 }, { "epoch": 0.9972795647303568, "grad_norm": 0.37101536989212036, "learning_rate": 5e-05, "loss": 1.3743, "step": 6232 }, { "epoch": 0.9974395903344535, "grad_norm": 0.38951823115348816, "learning_rate": 5e-05, "loss": 1.5304, "step": 6233 }, { "epoch": 0.9975996159385502, "grad_norm": 0.3799716830253601, "learning_rate": 5e-05, "loss": 1.4344, "step": 6234 }, { "epoch": 0.9977596415426468, "grad_norm": 0.3829742968082428, "learning_rate": 5e-05, "loss": 1.4402, "step": 6235 }, { "epoch": 0.9979196671467435, "grad_norm": 0.3828054964542389, "learning_rate": 5e-05, "loss": 1.5071, "step": 6236 }, { "epoch": 0.9980796927508402, "grad_norm": 0.3755035698413849, "learning_rate": 5e-05, "loss": 1.4148, "step": 6237 }, { "epoch": 0.9982397183549367, "grad_norm": 0.38435012102127075, "learning_rate": 5e-05, "loss": 1.3972, "step": 6238 }, { "epoch": 0.9983997439590334, "grad_norm": 0.38449162244796753, "learning_rate": 5e-05, "loss": 1.5162, "step": 6239 }, { "epoch": 0.9985597695631301, "grad_norm": 0.38746777176856995, "learning_rate": 5e-05, "loss": 1.4634, "step": 6240 }, { "epoch": 0.9987197951672268, "grad_norm": 0.37957242131233215, "learning_rate": 5e-05, "loss": 1.3253, "step": 6241 }, { "epoch": 0.9988798207713234, "grad_norm": 0.3884211778640747, "learning_rate": 5e-05, "loss": 1.4563, "step": 6242 }, { "epoch": 0.9990398463754201, "grad_norm": 0.3816871643066406, "learning_rate": 5e-05, "loss": 1.4027, "step": 6243 }, { "epoch": 0.9991998719795168, "grad_norm": 0.3693656921386719, "learning_rate": 5e-05, "loss": 1.4169, "step": 6244 }, { "epoch": 0.9993598975836134, "grad_norm": 0.3594132959842682, "learning_rate": 5e-05, "loss": 1.3676, "step": 6245 }, { "epoch": 0.99951992318771, "grad_norm": 0.37141624093055725, "learning_rate": 5e-05, "loss": 1.4034, "step": 6246 }, { "epoch": 0.9996799487918067, "grad_norm": 0.38442063331604004, "learning_rate": 5e-05, "loss": 1.4023, "step": 6247 }, { "epoch": 0.9998399743959033, "grad_norm": 0.36132052540779114, "learning_rate": 5e-05, "loss": 1.4419, "step": 6248 }, { "epoch": 1.0, "grad_norm": 0.38786742091178894, "learning_rate": 5e-05, "loss": 1.4474, "step": 6249 }, { "epoch": 1.0, "step": 6249, "total_flos": 3.552588145698118e+19, "train_loss": 1.5197621280621636, "train_runtime": 76672.7845, "train_samples_per_second": 0.652, "train_steps_per_second": 0.082 } ], "logging_steps": 1, "max_steps": 6249, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 750, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.552588145698118e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }