{ "best_global_step": 18445, "best_metric": 67.47337717729287, "best_model_checkpoint": "whisper-tiny-bfloat16-sada/checkpoints/checkpoint-18445", "epoch": 1.2497459177451047, "eval_steps": 3689, "global_step": 18445, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.775526797208483e-05, "grad_norm": 132.70848083496094, "learning_rate": 0.0, "loss": 4.4561, "step": 1 }, { "epoch": 0.00013551053594416967, "grad_norm": 133.01596069335938, "learning_rate": 1.3333333333333334e-07, "loss": 4.8642, "step": 2 }, { "epoch": 0.0002032658039162545, "grad_norm": 106.03809356689453, "learning_rate": 2.6666666666666667e-07, "loss": 4.3778, "step": 3 }, { "epoch": 0.00027102107188833934, "grad_norm": 88.75433349609375, "learning_rate": 4.0000000000000003e-07, "loss": 4.0385, "step": 4 }, { "epoch": 0.00033877633986042414, "grad_norm": 101.19609832763672, "learning_rate": 5.333333333333333e-07, "loss": 3.9092, "step": 5 }, { "epoch": 0.000406531607832509, "grad_norm": 67.02723693847656, "learning_rate": 6.666666666666667e-07, "loss": 3.7556, "step": 6 }, { "epoch": 0.0004742868758045938, "grad_norm": 67.52012634277344, "learning_rate": 8.000000000000001e-07, "loss": 3.7119, "step": 7 }, { "epoch": 0.0005420421437766787, "grad_norm": 83.27348327636719, "learning_rate": 9.333333333333334e-07, "loss": 3.6125, "step": 8 }, { "epoch": 0.0006097974117487635, "grad_norm": 84.20647430419922, "learning_rate": 1.0666666666666667e-06, "loss": 4.1354, "step": 9 }, { "epoch": 0.0006775526797208483, "grad_norm": 110.12113952636719, "learning_rate": 1.2000000000000002e-06, "loss": 3.9594, "step": 10 }, { "epoch": 0.0007453079476929331, "grad_norm": 84.61614990234375, "learning_rate": 1.3333333333333334e-06, "loss": 4.1415, "step": 11 }, { "epoch": 0.000813063215665018, "grad_norm": 119.0740966796875, "learning_rate": 1.4666666666666667e-06, "loss": 4.0689, "step": 12 }, { "epoch": 0.0008808184836371028, "grad_norm": 122.41352081298828, "learning_rate": 1.6000000000000001e-06, "loss": 4.0834, "step": 13 }, { "epoch": 0.0009485737516091876, "grad_norm": 99.06126403808594, "learning_rate": 1.7333333333333334e-06, "loss": 4.4224, "step": 14 }, { "epoch": 0.0010163290195812724, "grad_norm": 113.89531707763672, "learning_rate": 1.8666666666666669e-06, "loss": 4.0188, "step": 15 }, { "epoch": 0.0010840842875533573, "grad_norm": 70.24198150634766, "learning_rate": 2.0000000000000003e-06, "loss": 3.3951, "step": 16 }, { "epoch": 0.001151839555525442, "grad_norm": 67.99130249023438, "learning_rate": 2.1333333333333334e-06, "loss": 3.7385, "step": 17 }, { "epoch": 0.001219594823497527, "grad_norm": 54.554779052734375, "learning_rate": 2.266666666666667e-06, "loss": 2.9009, "step": 18 }, { "epoch": 0.0012873500914696117, "grad_norm": 47.79149627685547, "learning_rate": 2.4000000000000003e-06, "loss": 3.0572, "step": 19 }, { "epoch": 0.0013551053594416966, "grad_norm": 50.05776596069336, "learning_rate": 2.5333333333333334e-06, "loss": 2.8678, "step": 20 }, { "epoch": 0.0014228606274137815, "grad_norm": 55.41701889038086, "learning_rate": 2.666666666666667e-06, "loss": 3.3981, "step": 21 }, { "epoch": 0.0014906158953858662, "grad_norm": 57.1912727355957, "learning_rate": 2.8000000000000003e-06, "loss": 3.3247, "step": 22 }, { "epoch": 0.001558371163357951, "grad_norm": 39.77119445800781, "learning_rate": 2.9333333333333333e-06, "loss": 3.1466, "step": 23 }, { "epoch": 0.001626126431330036, "grad_norm": 37.258392333984375, "learning_rate": 3.066666666666667e-06, "loss": 3.0137, "step": 24 }, { "epoch": 0.0016938816993021207, "grad_norm": 41.15428924560547, "learning_rate": 3.2000000000000003e-06, "loss": 2.9908, "step": 25 }, { "epoch": 0.0017616369672742056, "grad_norm": 44.59917449951172, "learning_rate": 3.3333333333333333e-06, "loss": 3.6947, "step": 26 }, { "epoch": 0.0018293922352462903, "grad_norm": 32.7044792175293, "learning_rate": 3.466666666666667e-06, "loss": 2.9421, "step": 27 }, { "epoch": 0.0018971475032183752, "grad_norm": 28.445444107055664, "learning_rate": 3.6e-06, "loss": 2.9849, "step": 28 }, { "epoch": 0.00196490277119046, "grad_norm": 30.879247665405273, "learning_rate": 3.7333333333333337e-06, "loss": 2.9007, "step": 29 }, { "epoch": 0.002032658039162545, "grad_norm": 22.723188400268555, "learning_rate": 3.866666666666667e-06, "loss": 2.3549, "step": 30 }, { "epoch": 0.0021004133071346296, "grad_norm": 23.59457778930664, "learning_rate": 4.000000000000001e-06, "loss": 2.8564, "step": 31 }, { "epoch": 0.0021681685751067147, "grad_norm": 23.313579559326172, "learning_rate": 4.133333333333333e-06, "loss": 2.453, "step": 32 }, { "epoch": 0.0022359238430787994, "grad_norm": 23.877405166625977, "learning_rate": 4.266666666666667e-06, "loss": 2.2042, "step": 33 }, { "epoch": 0.002303679111050884, "grad_norm": 29.79376792907715, "learning_rate": 4.4e-06, "loss": 2.6679, "step": 34 }, { "epoch": 0.002371434379022969, "grad_norm": 21.018878936767578, "learning_rate": 4.533333333333334e-06, "loss": 2.3983, "step": 35 }, { "epoch": 0.002439189646995054, "grad_norm": 30.22182846069336, "learning_rate": 4.666666666666667e-06, "loss": 2.8668, "step": 36 }, { "epoch": 0.0025069449149671386, "grad_norm": 24.619905471801758, "learning_rate": 4.800000000000001e-06, "loss": 2.5986, "step": 37 }, { "epoch": 0.0025747001829392233, "grad_norm": 25.470375061035156, "learning_rate": 4.933333333333333e-06, "loss": 2.4936, "step": 38 }, { "epoch": 0.0026424554509113084, "grad_norm": 19.872745513916016, "learning_rate": 5.066666666666667e-06, "loss": 2.1633, "step": 39 }, { "epoch": 0.002710210718883393, "grad_norm": 24.880964279174805, "learning_rate": 5.2e-06, "loss": 2.6798, "step": 40 }, { "epoch": 0.002777965986855478, "grad_norm": 22.790157318115234, "learning_rate": 5.333333333333334e-06, "loss": 2.487, "step": 41 }, { "epoch": 0.002845721254827563, "grad_norm": 21.404136657714844, "learning_rate": 5.466666666666667e-06, "loss": 2.3383, "step": 42 }, { "epoch": 0.0029134765227996477, "grad_norm": 22.80590057373047, "learning_rate": 5.600000000000001e-06, "loss": 2.6347, "step": 43 }, { "epoch": 0.0029812317907717324, "grad_norm": 20.625160217285156, "learning_rate": 5.733333333333333e-06, "loss": 2.066, "step": 44 }, { "epoch": 0.0030489870587438175, "grad_norm": 21.700828552246094, "learning_rate": 5.866666666666667e-06, "loss": 2.188, "step": 45 }, { "epoch": 0.003116742326715902, "grad_norm": 18.8945255279541, "learning_rate": 6e-06, "loss": 2.1531, "step": 46 }, { "epoch": 0.003184497594687987, "grad_norm": 20.238525390625, "learning_rate": 6.133333333333334e-06, "loss": 1.9306, "step": 47 }, { "epoch": 0.003252252862660072, "grad_norm": 23.74344825744629, "learning_rate": 6.266666666666666e-06, "loss": 2.3206, "step": 48 }, { "epoch": 0.0033200081306321567, "grad_norm": 22.429964065551758, "learning_rate": 6.4000000000000006e-06, "loss": 2.05, "step": 49 }, { "epoch": 0.0033877633986042414, "grad_norm": 22.078792572021484, "learning_rate": 6.533333333333333e-06, "loss": 2.1155, "step": 50 }, { "epoch": 0.003455518666576326, "grad_norm": 19.691747665405273, "learning_rate": 6.666666666666667e-06, "loss": 1.8719, "step": 51 }, { "epoch": 0.0035232739345484113, "grad_norm": 27.412866592407227, "learning_rate": 6.800000000000001e-06, "loss": 2.0943, "step": 52 }, { "epoch": 0.003591029202520496, "grad_norm": 21.631580352783203, "learning_rate": 6.933333333333334e-06, "loss": 2.246, "step": 53 }, { "epoch": 0.0036587844704925807, "grad_norm": 16.729095458984375, "learning_rate": 7.066666666666667e-06, "loss": 1.8101, "step": 54 }, { "epoch": 0.003726539738464666, "grad_norm": 18.381364822387695, "learning_rate": 7.2e-06, "loss": 1.8668, "step": 55 }, { "epoch": 0.0037942950064367505, "grad_norm": 19.661123275756836, "learning_rate": 7.333333333333334e-06, "loss": 2.1912, "step": 56 }, { "epoch": 0.003862050274408835, "grad_norm": 13.772311210632324, "learning_rate": 7.4666666666666675e-06, "loss": 1.7408, "step": 57 }, { "epoch": 0.00392980554238092, "grad_norm": 20.559425354003906, "learning_rate": 7.6e-06, "loss": 1.9717, "step": 58 }, { "epoch": 0.003997560810353005, "grad_norm": 17.385791778564453, "learning_rate": 7.733333333333334e-06, "loss": 1.7681, "step": 59 }, { "epoch": 0.00406531607832509, "grad_norm": 18.455888748168945, "learning_rate": 7.866666666666667e-06, "loss": 1.8822, "step": 60 }, { "epoch": 0.004133071346297174, "grad_norm": 17.53643798828125, "learning_rate": 8.000000000000001e-06, "loss": 1.8158, "step": 61 }, { "epoch": 0.004200826614269259, "grad_norm": 15.640396118164062, "learning_rate": 8.133333333333332e-06, "loss": 1.8786, "step": 62 }, { "epoch": 0.004268581882241345, "grad_norm": 67.14771270751953, "learning_rate": 8.266666666666667e-06, "loss": 1.845, "step": 63 }, { "epoch": 0.004336337150213429, "grad_norm": 16.778207778930664, "learning_rate": 8.400000000000001e-06, "loss": 1.7286, "step": 64 }, { "epoch": 0.004404092418185514, "grad_norm": 20.485218048095703, "learning_rate": 8.533333333333334e-06, "loss": 1.882, "step": 65 }, { "epoch": 0.004471847686157599, "grad_norm": 15.77828598022461, "learning_rate": 8.666666666666668e-06, "loss": 1.7788, "step": 66 }, { "epoch": 0.0045396029541296835, "grad_norm": 20.225608825683594, "learning_rate": 8.8e-06, "loss": 1.9842, "step": 67 }, { "epoch": 0.004607358222101768, "grad_norm": 16.230987548828125, "learning_rate": 8.933333333333333e-06, "loss": 1.4581, "step": 68 }, { "epoch": 0.004675113490073853, "grad_norm": 13.831804275512695, "learning_rate": 9.066666666666667e-06, "loss": 1.6173, "step": 69 }, { "epoch": 0.004742868758045938, "grad_norm": 16.4691104888916, "learning_rate": 9.2e-06, "loss": 1.6055, "step": 70 }, { "epoch": 0.004810624026018023, "grad_norm": 17.788105010986328, "learning_rate": 9.333333333333334e-06, "loss": 1.7996, "step": 71 }, { "epoch": 0.004878379293990108, "grad_norm": 20.043182373046875, "learning_rate": 9.466666666666667e-06, "loss": 1.7224, "step": 72 }, { "epoch": 0.0049461345619621925, "grad_norm": 14.918837547302246, "learning_rate": 9.600000000000001e-06, "loss": 1.4448, "step": 73 }, { "epoch": 0.005013889829934277, "grad_norm": 15.70170783996582, "learning_rate": 9.733333333333334e-06, "loss": 1.7469, "step": 74 }, { "epoch": 0.005081645097906362, "grad_norm": 19.128456115722656, "learning_rate": 9.866666666666667e-06, "loss": 1.9505, "step": 75 }, { "epoch": 0.005149400365878447, "grad_norm": 34.82925796508789, "learning_rate": 1e-05, "loss": 1.6543, "step": 76 }, { "epoch": 0.005217155633850532, "grad_norm": 19.61214256286621, "learning_rate": 1.0133333333333333e-05, "loss": 1.9051, "step": 77 }, { "epoch": 0.005284910901822617, "grad_norm": 20.190847396850586, "learning_rate": 1.0266666666666668e-05, "loss": 1.6016, "step": 78 }, { "epoch": 0.005352666169794702, "grad_norm": 15.873608589172363, "learning_rate": 1.04e-05, "loss": 1.8286, "step": 79 }, { "epoch": 0.005420421437766786, "grad_norm": 17.513385772705078, "learning_rate": 1.0533333333333335e-05, "loss": 1.7122, "step": 80 }, { "epoch": 0.005488176705738871, "grad_norm": 13.794294357299805, "learning_rate": 1.0666666666666667e-05, "loss": 1.5369, "step": 81 }, { "epoch": 0.005555931973710956, "grad_norm": 16.763822555541992, "learning_rate": 1.08e-05, "loss": 1.688, "step": 82 }, { "epoch": 0.005623687241683041, "grad_norm": 15.182608604431152, "learning_rate": 1.0933333333333334e-05, "loss": 1.9076, "step": 83 }, { "epoch": 0.005691442509655126, "grad_norm": 19.11581802368164, "learning_rate": 1.1066666666666667e-05, "loss": 1.6034, "step": 84 }, { "epoch": 0.005759197777627211, "grad_norm": 18.303098678588867, "learning_rate": 1.1200000000000001e-05, "loss": 1.8248, "step": 85 }, { "epoch": 0.005826953045599295, "grad_norm": 17.912731170654297, "learning_rate": 1.1333333333333334e-05, "loss": 1.737, "step": 86 }, { "epoch": 0.00589470831357138, "grad_norm": 18.4353084564209, "learning_rate": 1.1466666666666666e-05, "loss": 1.7141, "step": 87 }, { "epoch": 0.005962463581543465, "grad_norm": 17.006074905395508, "learning_rate": 1.16e-05, "loss": 1.6568, "step": 88 }, { "epoch": 0.0060302188495155494, "grad_norm": 15.084410667419434, "learning_rate": 1.1733333333333333e-05, "loss": 1.3369, "step": 89 }, { "epoch": 0.006097974117487635, "grad_norm": 14.984724044799805, "learning_rate": 1.1866666666666668e-05, "loss": 1.777, "step": 90 }, { "epoch": 0.00616572938545972, "grad_norm": 19.447574615478516, "learning_rate": 1.2e-05, "loss": 1.4225, "step": 91 }, { "epoch": 0.006233484653431804, "grad_norm": 15.212474822998047, "learning_rate": 1.2133333333333335e-05, "loss": 1.4896, "step": 92 }, { "epoch": 0.006301239921403889, "grad_norm": 17.97924041748047, "learning_rate": 1.2266666666666667e-05, "loss": 1.5947, "step": 93 }, { "epoch": 0.006368995189375974, "grad_norm": 13.695365905761719, "learning_rate": 1.24e-05, "loss": 1.471, "step": 94 }, { "epoch": 0.0064367504573480585, "grad_norm": 15.574562072753906, "learning_rate": 1.2533333333333332e-05, "loss": 1.5803, "step": 95 }, { "epoch": 0.006504505725320144, "grad_norm": 16.71872329711914, "learning_rate": 1.2666666666666668e-05, "loss": 1.7408, "step": 96 }, { "epoch": 0.006572260993292229, "grad_norm": 14.232036590576172, "learning_rate": 1.2800000000000001e-05, "loss": 1.4414, "step": 97 }, { "epoch": 0.0066400162612643135, "grad_norm": 14.430774688720703, "learning_rate": 1.2933333333333334e-05, "loss": 1.712, "step": 98 }, { "epoch": 0.006707771529236398, "grad_norm": 15.887603759765625, "learning_rate": 1.3066666666666666e-05, "loss": 1.6097, "step": 99 }, { "epoch": 0.006775526797208483, "grad_norm": 21.155643463134766, "learning_rate": 1.32e-05, "loss": 2.0581, "step": 100 }, { "epoch": 0.0068432820651805676, "grad_norm": 17.30326271057129, "learning_rate": 1.3333333333333333e-05, "loss": 1.2823, "step": 101 }, { "epoch": 0.006911037333152652, "grad_norm": 18.184707641601562, "learning_rate": 1.3466666666666666e-05, "loss": 1.5165, "step": 102 }, { "epoch": 0.006978792601124738, "grad_norm": 23.46563148498535, "learning_rate": 1.3600000000000002e-05, "loss": 1.406, "step": 103 }, { "epoch": 0.0070465478690968225, "grad_norm": 14.586087226867676, "learning_rate": 1.3733333333333335e-05, "loss": 1.8416, "step": 104 }, { "epoch": 0.007114303137068907, "grad_norm": 14.55811595916748, "learning_rate": 1.3866666666666667e-05, "loss": 1.5466, "step": 105 }, { "epoch": 0.007182058405040992, "grad_norm": 20.244586944580078, "learning_rate": 1.4000000000000001e-05, "loss": 1.755, "step": 106 }, { "epoch": 0.007249813673013077, "grad_norm": 11.932172775268555, "learning_rate": 1.4133333333333334e-05, "loss": 1.4543, "step": 107 }, { "epoch": 0.007317568940985161, "grad_norm": 17.406888961791992, "learning_rate": 1.4266666666666667e-05, "loss": 1.4436, "step": 108 }, { "epoch": 0.007385324208957246, "grad_norm": 14.591705322265625, "learning_rate": 1.44e-05, "loss": 1.4088, "step": 109 }, { "epoch": 0.007453079476929332, "grad_norm": 18.028337478637695, "learning_rate": 1.4533333333333335e-05, "loss": 1.6675, "step": 110 }, { "epoch": 0.007520834744901416, "grad_norm": 16.37979507446289, "learning_rate": 1.4666666666666668e-05, "loss": 1.6646, "step": 111 }, { "epoch": 0.007588590012873501, "grad_norm": 19.028308868408203, "learning_rate": 1.48e-05, "loss": 1.701, "step": 112 }, { "epoch": 0.007656345280845586, "grad_norm": 17.28181266784668, "learning_rate": 1.4933333333333335e-05, "loss": 1.3984, "step": 113 }, { "epoch": 0.00772410054881767, "grad_norm": 16.78407859802246, "learning_rate": 1.5066666666666668e-05, "loss": 1.6851, "step": 114 }, { "epoch": 0.007791855816789755, "grad_norm": 15.634642601013184, "learning_rate": 1.52e-05, "loss": 1.6484, "step": 115 }, { "epoch": 0.00785961108476184, "grad_norm": 13.879626274108887, "learning_rate": 1.5333333333333334e-05, "loss": 1.4609, "step": 116 }, { "epoch": 0.007927366352733925, "grad_norm": 16.247541427612305, "learning_rate": 1.546666666666667e-05, "loss": 1.5219, "step": 117 }, { "epoch": 0.00799512162070601, "grad_norm": 20.57497215270996, "learning_rate": 1.56e-05, "loss": 1.6038, "step": 118 }, { "epoch": 0.008062876888678095, "grad_norm": 16.32591438293457, "learning_rate": 1.5733333333333334e-05, "loss": 1.5315, "step": 119 }, { "epoch": 0.00813063215665018, "grad_norm": 18.236255645751953, "learning_rate": 1.586666666666667e-05, "loss": 1.6589, "step": 120 }, { "epoch": 0.008198387424622264, "grad_norm": 20.850723266601562, "learning_rate": 1.6000000000000003e-05, "loss": 1.5556, "step": 121 }, { "epoch": 0.008266142692594349, "grad_norm": 17.931947708129883, "learning_rate": 1.6133333333333334e-05, "loss": 1.8209, "step": 122 }, { "epoch": 0.008333897960566434, "grad_norm": 20.531381607055664, "learning_rate": 1.6266666666666665e-05, "loss": 1.6421, "step": 123 }, { "epoch": 0.008401653228538518, "grad_norm": 15.6729736328125, "learning_rate": 1.6400000000000002e-05, "loss": 1.5652, "step": 124 }, { "epoch": 0.008469408496510603, "grad_norm": 18.139949798583984, "learning_rate": 1.6533333333333333e-05, "loss": 1.5578, "step": 125 }, { "epoch": 0.00853716376448269, "grad_norm": 15.77291202545166, "learning_rate": 1.6666666666666667e-05, "loss": 1.6091, "step": 126 }, { "epoch": 0.008604919032454774, "grad_norm": 18.00165557861328, "learning_rate": 1.6800000000000002e-05, "loss": 1.4725, "step": 127 }, { "epoch": 0.008672674300426859, "grad_norm": 17.33936309814453, "learning_rate": 1.6933333333333333e-05, "loss": 1.4272, "step": 128 }, { "epoch": 0.008740429568398943, "grad_norm": 16.75677490234375, "learning_rate": 1.7066666666666667e-05, "loss": 1.602, "step": 129 }, { "epoch": 0.008808184836371028, "grad_norm": 17.761140823364258, "learning_rate": 1.7199999999999998e-05, "loss": 1.8523, "step": 130 }, { "epoch": 0.008875940104343113, "grad_norm": 15.888497352600098, "learning_rate": 1.7333333333333336e-05, "loss": 1.5594, "step": 131 }, { "epoch": 0.008943695372315198, "grad_norm": 19.62826156616211, "learning_rate": 1.7466666666666667e-05, "loss": 1.6909, "step": 132 }, { "epoch": 0.009011450640287282, "grad_norm": 15.62592887878418, "learning_rate": 1.76e-05, "loss": 1.5873, "step": 133 }, { "epoch": 0.009079205908259367, "grad_norm": 17.64893913269043, "learning_rate": 1.7733333333333335e-05, "loss": 1.5681, "step": 134 }, { "epoch": 0.009146961176231452, "grad_norm": 15.14854907989502, "learning_rate": 1.7866666666666666e-05, "loss": 1.3324, "step": 135 }, { "epoch": 0.009214716444203536, "grad_norm": 18.115419387817383, "learning_rate": 1.8e-05, "loss": 1.5512, "step": 136 }, { "epoch": 0.009282471712175621, "grad_norm": 18.2511043548584, "learning_rate": 1.8133333333333335e-05, "loss": 1.6405, "step": 137 }, { "epoch": 0.009350226980147706, "grad_norm": 13.296855926513672, "learning_rate": 1.826666666666667e-05, "loss": 1.2269, "step": 138 }, { "epoch": 0.009417982248119792, "grad_norm": 17.17320442199707, "learning_rate": 1.84e-05, "loss": 1.4031, "step": 139 }, { "epoch": 0.009485737516091877, "grad_norm": 15.399650573730469, "learning_rate": 1.8533333333333334e-05, "loss": 1.4793, "step": 140 }, { "epoch": 0.009553492784063962, "grad_norm": 15.0385160446167, "learning_rate": 1.866666666666667e-05, "loss": 1.4129, "step": 141 }, { "epoch": 0.009621248052036046, "grad_norm": 14.586984634399414, "learning_rate": 1.88e-05, "loss": 1.1959, "step": 142 }, { "epoch": 0.009689003320008131, "grad_norm": 14.939247131347656, "learning_rate": 1.8933333333333334e-05, "loss": 1.6728, "step": 143 }, { "epoch": 0.009756758587980216, "grad_norm": 18.1126651763916, "learning_rate": 1.9066666666666668e-05, "loss": 1.5035, "step": 144 }, { "epoch": 0.0098245138559523, "grad_norm": 16.0823974609375, "learning_rate": 1.9200000000000003e-05, "loss": 1.544, "step": 145 }, { "epoch": 0.009892269123924385, "grad_norm": 17.45062828063965, "learning_rate": 1.9333333333333333e-05, "loss": 1.4593, "step": 146 }, { "epoch": 0.00996002439189647, "grad_norm": 21.030731201171875, "learning_rate": 1.9466666666666668e-05, "loss": 1.6544, "step": 147 }, { "epoch": 0.010027779659868554, "grad_norm": 18.31300163269043, "learning_rate": 1.9600000000000002e-05, "loss": 1.7682, "step": 148 }, { "epoch": 0.01009553492784064, "grad_norm": 13.541064262390137, "learning_rate": 1.9733333333333333e-05, "loss": 1.333, "step": 149 }, { "epoch": 0.010163290195812724, "grad_norm": 13.538237571716309, "learning_rate": 1.9866666666666667e-05, "loss": 1.3629, "step": 150 }, { "epoch": 0.010231045463784809, "grad_norm": 18.393657684326172, "learning_rate": 2e-05, "loss": 1.4467, "step": 151 }, { "epoch": 0.010298800731756893, "grad_norm": 19.18985939025879, "learning_rate": 2.0133333333333336e-05, "loss": 1.612, "step": 152 }, { "epoch": 0.01036655599972898, "grad_norm": 14.230488777160645, "learning_rate": 2.0266666666666667e-05, "loss": 1.4047, "step": 153 }, { "epoch": 0.010434311267701064, "grad_norm": 13.807863235473633, "learning_rate": 2.04e-05, "loss": 1.4977, "step": 154 }, { "epoch": 0.010502066535673149, "grad_norm": 15.149568557739258, "learning_rate": 2.0533333333333336e-05, "loss": 1.3856, "step": 155 }, { "epoch": 0.010569821803645234, "grad_norm": 16.095308303833008, "learning_rate": 2.0666666666666666e-05, "loss": 1.3687, "step": 156 }, { "epoch": 0.010637577071617318, "grad_norm": 14.864961624145508, "learning_rate": 2.08e-05, "loss": 1.5707, "step": 157 }, { "epoch": 0.010705332339589403, "grad_norm": 15.9810152053833, "learning_rate": 2.0933333333333335e-05, "loss": 1.6841, "step": 158 }, { "epoch": 0.010773087607561488, "grad_norm": 16.706584930419922, "learning_rate": 2.106666666666667e-05, "loss": 1.5017, "step": 159 }, { "epoch": 0.010840842875533573, "grad_norm": 18.726655960083008, "learning_rate": 2.12e-05, "loss": 1.7177, "step": 160 }, { "epoch": 0.010908598143505657, "grad_norm": 17.22323989868164, "learning_rate": 2.1333333333333335e-05, "loss": 1.5452, "step": 161 }, { "epoch": 0.010976353411477742, "grad_norm": 14.823019027709961, "learning_rate": 2.146666666666667e-05, "loss": 1.2856, "step": 162 }, { "epoch": 0.011044108679449827, "grad_norm": 14.756085395812988, "learning_rate": 2.16e-05, "loss": 1.4942, "step": 163 }, { "epoch": 0.011111863947421911, "grad_norm": 18.523683547973633, "learning_rate": 2.1733333333333334e-05, "loss": 1.4832, "step": 164 }, { "epoch": 0.011179619215393996, "grad_norm": 16.75520896911621, "learning_rate": 2.186666666666667e-05, "loss": 1.1847, "step": 165 }, { "epoch": 0.011247374483366082, "grad_norm": 17.045665740966797, "learning_rate": 2.2000000000000003e-05, "loss": 1.6358, "step": 166 }, { "epoch": 0.011315129751338167, "grad_norm": 17.235095977783203, "learning_rate": 2.2133333333333334e-05, "loss": 1.6984, "step": 167 }, { "epoch": 0.011382885019310252, "grad_norm": 18.120485305786133, "learning_rate": 2.2266666666666668e-05, "loss": 1.5544, "step": 168 }, { "epoch": 0.011450640287282337, "grad_norm": 15.252264976501465, "learning_rate": 2.2400000000000002e-05, "loss": 1.4386, "step": 169 }, { "epoch": 0.011518395555254421, "grad_norm": 13.91727066040039, "learning_rate": 2.2533333333333333e-05, "loss": 1.2083, "step": 170 }, { "epoch": 0.011586150823226506, "grad_norm": 16.50945472717285, "learning_rate": 2.2666666666666668e-05, "loss": 1.5878, "step": 171 }, { "epoch": 0.01165390609119859, "grad_norm": 12.260165214538574, "learning_rate": 2.2800000000000002e-05, "loss": 1.4173, "step": 172 }, { "epoch": 0.011721661359170675, "grad_norm": 17.12419891357422, "learning_rate": 2.2933333333333333e-05, "loss": 1.3117, "step": 173 }, { "epoch": 0.01178941662714276, "grad_norm": 17.508407592773438, "learning_rate": 2.3066666666666667e-05, "loss": 1.4294, "step": 174 }, { "epoch": 0.011857171895114845, "grad_norm": 17.78769302368164, "learning_rate": 2.32e-05, "loss": 1.8437, "step": 175 }, { "epoch": 0.01192492716308693, "grad_norm": 13.79132080078125, "learning_rate": 2.3333333333333336e-05, "loss": 1.3969, "step": 176 }, { "epoch": 0.011992682431059014, "grad_norm": 16.519269943237305, "learning_rate": 2.3466666666666667e-05, "loss": 1.5833, "step": 177 }, { "epoch": 0.012060437699031099, "grad_norm": 15.920942306518555, "learning_rate": 2.36e-05, "loss": 1.4783, "step": 178 }, { "epoch": 0.012128192967003185, "grad_norm": 17.00782585144043, "learning_rate": 2.3733333333333335e-05, "loss": 1.4038, "step": 179 }, { "epoch": 0.01219594823497527, "grad_norm": 18.844655990600586, "learning_rate": 2.3866666666666666e-05, "loss": 1.653, "step": 180 }, { "epoch": 0.012263703502947355, "grad_norm": 18.05002784729004, "learning_rate": 2.4e-05, "loss": 1.4137, "step": 181 }, { "epoch": 0.01233145877091944, "grad_norm": 14.200697898864746, "learning_rate": 2.4133333333333335e-05, "loss": 1.3067, "step": 182 }, { "epoch": 0.012399214038891524, "grad_norm": 16.28152084350586, "learning_rate": 2.426666666666667e-05, "loss": 1.3425, "step": 183 }, { "epoch": 0.012466969306863609, "grad_norm": 18.691898345947266, "learning_rate": 2.44e-05, "loss": 1.5995, "step": 184 }, { "epoch": 0.012534724574835693, "grad_norm": 16.62773895263672, "learning_rate": 2.4533333333333334e-05, "loss": 1.5722, "step": 185 }, { "epoch": 0.012602479842807778, "grad_norm": 19.650226593017578, "learning_rate": 2.466666666666667e-05, "loss": 1.5445, "step": 186 }, { "epoch": 0.012670235110779863, "grad_norm": 14.582402229309082, "learning_rate": 2.48e-05, "loss": 1.4822, "step": 187 }, { "epoch": 0.012737990378751948, "grad_norm": 16.5295352935791, "learning_rate": 2.4933333333333334e-05, "loss": 1.5981, "step": 188 }, { "epoch": 0.012805745646724032, "grad_norm": 17.096031188964844, "learning_rate": 2.5066666666666665e-05, "loss": 1.2527, "step": 189 }, { "epoch": 0.012873500914696117, "grad_norm": 15.424555778503418, "learning_rate": 2.5200000000000003e-05, "loss": 1.5284, "step": 190 }, { "epoch": 0.012941256182668202, "grad_norm": 14.505122184753418, "learning_rate": 2.5333333333333337e-05, "loss": 1.5726, "step": 191 }, { "epoch": 0.013009011450640288, "grad_norm": 13.430007934570312, "learning_rate": 2.5466666666666668e-05, "loss": 1.5905, "step": 192 }, { "epoch": 0.013076766718612373, "grad_norm": 12.291808128356934, "learning_rate": 2.5600000000000002e-05, "loss": 1.3122, "step": 193 }, { "epoch": 0.013144521986584458, "grad_norm": 16.053747177124023, "learning_rate": 2.5733333333333337e-05, "loss": 1.3164, "step": 194 }, { "epoch": 0.013212277254556542, "grad_norm": 17.762910842895508, "learning_rate": 2.5866666666666667e-05, "loss": 1.4727, "step": 195 }, { "epoch": 0.013280032522528627, "grad_norm": 15.695332527160645, "learning_rate": 2.6000000000000002e-05, "loss": 1.5584, "step": 196 }, { "epoch": 0.013347787790500712, "grad_norm": 14.789410591125488, "learning_rate": 2.6133333333333333e-05, "loss": 1.3253, "step": 197 }, { "epoch": 0.013415543058472796, "grad_norm": 15.248231887817383, "learning_rate": 2.6266666666666667e-05, "loss": 1.3087, "step": 198 }, { "epoch": 0.013483298326444881, "grad_norm": 18.894878387451172, "learning_rate": 2.64e-05, "loss": 1.2755, "step": 199 }, { "epoch": 0.013551053594416966, "grad_norm": 16.611616134643555, "learning_rate": 2.6533333333333332e-05, "loss": 1.4218, "step": 200 }, { "epoch": 0.01361880886238905, "grad_norm": 17.056982040405273, "learning_rate": 2.6666666666666667e-05, "loss": 1.4376, "step": 201 }, { "epoch": 0.013686564130361135, "grad_norm": 15.91457748413086, "learning_rate": 2.6800000000000004e-05, "loss": 1.4836, "step": 202 }, { "epoch": 0.01375431939833322, "grad_norm": 16.371660232543945, "learning_rate": 2.6933333333333332e-05, "loss": 1.7374, "step": 203 }, { "epoch": 0.013822074666305304, "grad_norm": 14.45882511138916, "learning_rate": 2.706666666666667e-05, "loss": 1.3324, "step": 204 }, { "epoch": 0.013889829934277391, "grad_norm": 18.779190063476562, "learning_rate": 2.7200000000000004e-05, "loss": 1.4845, "step": 205 }, { "epoch": 0.013957585202249476, "grad_norm": 19.7078914642334, "learning_rate": 2.733333333333333e-05, "loss": 1.6681, "step": 206 }, { "epoch": 0.01402534047022156, "grad_norm": 20.498266220092773, "learning_rate": 2.746666666666667e-05, "loss": 1.559, "step": 207 }, { "epoch": 0.014093095738193645, "grad_norm": 20.2978515625, "learning_rate": 2.7600000000000003e-05, "loss": 1.5821, "step": 208 }, { "epoch": 0.01416085100616573, "grad_norm": 18.85460090637207, "learning_rate": 2.7733333333333334e-05, "loss": 1.3, "step": 209 }, { "epoch": 0.014228606274137814, "grad_norm": 13.912948608398438, "learning_rate": 2.786666666666667e-05, "loss": 1.4833, "step": 210 }, { "epoch": 0.014296361542109899, "grad_norm": 19.084806442260742, "learning_rate": 2.8000000000000003e-05, "loss": 1.3379, "step": 211 }, { "epoch": 0.014364116810081984, "grad_norm": 15.123217582702637, "learning_rate": 2.8133333333333334e-05, "loss": 1.354, "step": 212 }, { "epoch": 0.014431872078054069, "grad_norm": 14.127299308776855, "learning_rate": 2.8266666666666668e-05, "loss": 1.2341, "step": 213 }, { "epoch": 0.014499627346026153, "grad_norm": 15.609139442443848, "learning_rate": 2.84e-05, "loss": 1.3474, "step": 214 }, { "epoch": 0.014567382613998238, "grad_norm": 20.588394165039062, "learning_rate": 2.8533333333333333e-05, "loss": 1.4881, "step": 215 }, { "epoch": 0.014635137881970323, "grad_norm": 12.27170467376709, "learning_rate": 2.8666666666666668e-05, "loss": 1.2255, "step": 216 }, { "epoch": 0.014702893149942407, "grad_norm": 18.14453125, "learning_rate": 2.88e-05, "loss": 1.6328, "step": 217 }, { "epoch": 0.014770648417914492, "grad_norm": 17.722230911254883, "learning_rate": 2.8933333333333333e-05, "loss": 2.0172, "step": 218 }, { "epoch": 0.014838403685886578, "grad_norm": 14.454687118530273, "learning_rate": 2.906666666666667e-05, "loss": 1.2936, "step": 219 }, { "epoch": 0.014906158953858663, "grad_norm": 16.369625091552734, "learning_rate": 2.9199999999999998e-05, "loss": 1.6289, "step": 220 }, { "epoch": 0.014973914221830748, "grad_norm": 17.64832305908203, "learning_rate": 2.9333333333333336e-05, "loss": 1.3135, "step": 221 }, { "epoch": 0.015041669489802833, "grad_norm": 14.401778221130371, "learning_rate": 2.946666666666667e-05, "loss": 1.2614, "step": 222 }, { "epoch": 0.015109424757774917, "grad_norm": 17.145814895629883, "learning_rate": 2.96e-05, "loss": 1.3473, "step": 223 }, { "epoch": 0.015177180025747002, "grad_norm": 17.273954391479492, "learning_rate": 2.9733333333333336e-05, "loss": 1.3747, "step": 224 }, { "epoch": 0.015244935293719087, "grad_norm": 17.190818786621094, "learning_rate": 2.986666666666667e-05, "loss": 1.5452, "step": 225 }, { "epoch": 0.015312690561691171, "grad_norm": 19.573585510253906, "learning_rate": 3e-05, "loss": 1.4942, "step": 226 }, { "epoch": 0.015380445829663256, "grad_norm": 15.349540710449219, "learning_rate": 3.0133333333333335e-05, "loss": 1.4277, "step": 227 }, { "epoch": 0.01544820109763534, "grad_norm": 19.786211013793945, "learning_rate": 3.0266666666666666e-05, "loss": 1.5014, "step": 228 }, { "epoch": 0.015515956365607425, "grad_norm": 17.51229476928711, "learning_rate": 3.04e-05, "loss": 1.695, "step": 229 }, { "epoch": 0.01558371163357951, "grad_norm": 19.894756317138672, "learning_rate": 3.0533333333333335e-05, "loss": 1.4994, "step": 230 }, { "epoch": 0.015651466901551597, "grad_norm": 13.575235366821289, "learning_rate": 3.066666666666667e-05, "loss": 1.4037, "step": 231 }, { "epoch": 0.01571922216952368, "grad_norm": 18.516822814941406, "learning_rate": 3.08e-05, "loss": 1.5494, "step": 232 }, { "epoch": 0.015786977437495766, "grad_norm": 17.343368530273438, "learning_rate": 3.093333333333334e-05, "loss": 1.1708, "step": 233 }, { "epoch": 0.01585473270546785, "grad_norm": 18.78635597229004, "learning_rate": 3.1066666666666665e-05, "loss": 1.6543, "step": 234 }, { "epoch": 0.015922487973439935, "grad_norm": 16.219989776611328, "learning_rate": 3.12e-05, "loss": 1.381, "step": 235 }, { "epoch": 0.01599024324141202, "grad_norm": 15.987724304199219, "learning_rate": 3.1333333333333334e-05, "loss": 1.3981, "step": 236 }, { "epoch": 0.016057998509384105, "grad_norm": 17.251537322998047, "learning_rate": 3.146666666666667e-05, "loss": 1.8181, "step": 237 }, { "epoch": 0.01612575377735619, "grad_norm": 15.804817199707031, "learning_rate": 3.16e-05, "loss": 1.4755, "step": 238 }, { "epoch": 0.016193509045328274, "grad_norm": 16.508943557739258, "learning_rate": 3.173333333333334e-05, "loss": 1.2167, "step": 239 }, { "epoch": 0.01626126431330036, "grad_norm": 17.382783889770508, "learning_rate": 3.1866666666666664e-05, "loss": 1.6186, "step": 240 }, { "epoch": 0.016329019581272444, "grad_norm": 17.938161849975586, "learning_rate": 3.2000000000000005e-05, "loss": 1.3685, "step": 241 }, { "epoch": 0.016396774849244528, "grad_norm": 21.103981018066406, "learning_rate": 3.213333333333334e-05, "loss": 1.6275, "step": 242 }, { "epoch": 0.016464530117216613, "grad_norm": 15.145224571228027, "learning_rate": 3.226666666666667e-05, "loss": 1.2058, "step": 243 }, { "epoch": 0.016532285385188698, "grad_norm": 15.368849754333496, "learning_rate": 3.24e-05, "loss": 1.3436, "step": 244 }, { "epoch": 0.016600040653160782, "grad_norm": 16.95122718811035, "learning_rate": 3.253333333333333e-05, "loss": 1.4346, "step": 245 }, { "epoch": 0.016667795921132867, "grad_norm": 14.612030029296875, "learning_rate": 3.266666666666667e-05, "loss": 1.4759, "step": 246 }, { "epoch": 0.016735551189104952, "grad_norm": 17.6989688873291, "learning_rate": 3.2800000000000004e-05, "loss": 1.3612, "step": 247 }, { "epoch": 0.016803306457077036, "grad_norm": 19.985427856445312, "learning_rate": 3.293333333333333e-05, "loss": 1.2718, "step": 248 }, { "epoch": 0.01687106172504912, "grad_norm": 12.162117958068848, "learning_rate": 3.3066666666666666e-05, "loss": 1.2133, "step": 249 }, { "epoch": 0.016938816993021206, "grad_norm": 19.053850173950195, "learning_rate": 3.32e-05, "loss": 1.1345, "step": 250 }, { "epoch": 0.01700657226099329, "grad_norm": 15.729182243347168, "learning_rate": 3.3333333333333335e-05, "loss": 1.3079, "step": 251 }, { "epoch": 0.01707432752896538, "grad_norm": 18.984880447387695, "learning_rate": 3.346666666666667e-05, "loss": 1.5124, "step": 252 }, { "epoch": 0.017142082796937463, "grad_norm": 14.444266319274902, "learning_rate": 3.3600000000000004e-05, "loss": 1.4279, "step": 253 }, { "epoch": 0.017209838064909548, "grad_norm": 17.174089431762695, "learning_rate": 3.373333333333333e-05, "loss": 1.1192, "step": 254 }, { "epoch": 0.017277593332881633, "grad_norm": 15.885781288146973, "learning_rate": 3.3866666666666665e-05, "loss": 1.3918, "step": 255 }, { "epoch": 0.017345348600853717, "grad_norm": 14.934944152832031, "learning_rate": 3.4000000000000007e-05, "loss": 1.3348, "step": 256 }, { "epoch": 0.017413103868825802, "grad_norm": 17.977643966674805, "learning_rate": 3.4133333333333334e-05, "loss": 1.392, "step": 257 }, { "epoch": 0.017480859136797887, "grad_norm": 17.527130126953125, "learning_rate": 3.426666666666667e-05, "loss": 1.2939, "step": 258 }, { "epoch": 0.01754861440476997, "grad_norm": 17.823453903198242, "learning_rate": 3.4399999999999996e-05, "loss": 1.3118, "step": 259 }, { "epoch": 0.017616369672742056, "grad_norm": 15.746111869812012, "learning_rate": 3.453333333333334e-05, "loss": 1.3865, "step": 260 }, { "epoch": 0.01768412494071414, "grad_norm": 18.798080444335938, "learning_rate": 3.466666666666667e-05, "loss": 1.399, "step": 261 }, { "epoch": 0.017751880208686226, "grad_norm": 15.139904975891113, "learning_rate": 3.48e-05, "loss": 1.5248, "step": 262 }, { "epoch": 0.01781963547665831, "grad_norm": 16.150434494018555, "learning_rate": 3.493333333333333e-05, "loss": 1.5033, "step": 263 }, { "epoch": 0.017887390744630395, "grad_norm": 19.125490188598633, "learning_rate": 3.506666666666667e-05, "loss": 1.4695, "step": 264 }, { "epoch": 0.01795514601260248, "grad_norm": 15.708895683288574, "learning_rate": 3.52e-05, "loss": 1.6189, "step": 265 }, { "epoch": 0.018022901280574564, "grad_norm": 14.185934066772461, "learning_rate": 3.5333333333333336e-05, "loss": 1.3536, "step": 266 }, { "epoch": 0.01809065654854665, "grad_norm": 15.22371768951416, "learning_rate": 3.546666666666667e-05, "loss": 1.4652, "step": 267 }, { "epoch": 0.018158411816518734, "grad_norm": 15.854277610778809, "learning_rate": 3.56e-05, "loss": 1.7211, "step": 268 }, { "epoch": 0.01822616708449082, "grad_norm": 18.41203498840332, "learning_rate": 3.573333333333333e-05, "loss": 1.54, "step": 269 }, { "epoch": 0.018293922352462903, "grad_norm": 15.323198318481445, "learning_rate": 3.586666666666667e-05, "loss": 1.3582, "step": 270 }, { "epoch": 0.018361677620434988, "grad_norm": 15.056265830993652, "learning_rate": 3.6e-05, "loss": 1.2959, "step": 271 }, { "epoch": 0.018429432888407073, "grad_norm": 18.912555694580078, "learning_rate": 3.6133333333333335e-05, "loss": 1.5964, "step": 272 }, { "epoch": 0.018497188156379157, "grad_norm": 15.093083381652832, "learning_rate": 3.626666666666667e-05, "loss": 1.3403, "step": 273 }, { "epoch": 0.018564943424351242, "grad_norm": 17.38453483581543, "learning_rate": 3.6400000000000004e-05, "loss": 1.5082, "step": 274 }, { "epoch": 0.018632698692323327, "grad_norm": 16.53204345703125, "learning_rate": 3.653333333333334e-05, "loss": 1.2373, "step": 275 }, { "epoch": 0.01870045396029541, "grad_norm": 13.423516273498535, "learning_rate": 3.6666666666666666e-05, "loss": 1.3111, "step": 276 }, { "epoch": 0.018768209228267496, "grad_norm": 14.92001724243164, "learning_rate": 3.68e-05, "loss": 1.5767, "step": 277 }, { "epoch": 0.018835964496239584, "grad_norm": 15.229578971862793, "learning_rate": 3.6933333333333334e-05, "loss": 1.19, "step": 278 }, { "epoch": 0.01890371976421167, "grad_norm": 17.159273147583008, "learning_rate": 3.706666666666667e-05, "loss": 1.335, "step": 279 }, { "epoch": 0.018971475032183754, "grad_norm": 13.7230806350708, "learning_rate": 3.72e-05, "loss": 1.2587, "step": 280 }, { "epoch": 0.01903923030015584, "grad_norm": 15.375404357910156, "learning_rate": 3.733333333333334e-05, "loss": 1.2744, "step": 281 }, { "epoch": 0.019106985568127923, "grad_norm": 21.054534912109375, "learning_rate": 3.7466666666666665e-05, "loss": 1.5581, "step": 282 }, { "epoch": 0.019174740836100008, "grad_norm": 17.954307556152344, "learning_rate": 3.76e-05, "loss": 1.6617, "step": 283 }, { "epoch": 0.019242496104072093, "grad_norm": 17.954139709472656, "learning_rate": 3.773333333333334e-05, "loss": 1.7486, "step": 284 }, { "epoch": 0.019310251372044177, "grad_norm": 16.210704803466797, "learning_rate": 3.786666666666667e-05, "loss": 1.661, "step": 285 }, { "epoch": 0.019378006640016262, "grad_norm": 18.14916229248047, "learning_rate": 3.8e-05, "loss": 1.8154, "step": 286 }, { "epoch": 0.019445761907988347, "grad_norm": 16.372896194458008, "learning_rate": 3.8133333333333336e-05, "loss": 1.5464, "step": 287 }, { "epoch": 0.01951351717596043, "grad_norm": 16.4403076171875, "learning_rate": 3.8266666666666664e-05, "loss": 1.0815, "step": 288 }, { "epoch": 0.019581272443932516, "grad_norm": 16.612815856933594, "learning_rate": 3.8400000000000005e-05, "loss": 1.4338, "step": 289 }, { "epoch": 0.0196490277119046, "grad_norm": 17.0092716217041, "learning_rate": 3.853333333333334e-05, "loss": 1.5257, "step": 290 }, { "epoch": 0.019716782979876685, "grad_norm": 13.920248031616211, "learning_rate": 3.866666666666667e-05, "loss": 1.4595, "step": 291 }, { "epoch": 0.01978453824784877, "grad_norm": 12.699677467346191, "learning_rate": 3.88e-05, "loss": 1.2602, "step": 292 }, { "epoch": 0.019852293515820855, "grad_norm": 17.31343650817871, "learning_rate": 3.8933333333333336e-05, "loss": 1.2934, "step": 293 }, { "epoch": 0.01992004878379294, "grad_norm": 16.7819881439209, "learning_rate": 3.906666666666667e-05, "loss": 1.5385, "step": 294 }, { "epoch": 0.019987804051765024, "grad_norm": 14.175074577331543, "learning_rate": 3.9200000000000004e-05, "loss": 1.1644, "step": 295 }, { "epoch": 0.02005555931973711, "grad_norm": 22.863780975341797, "learning_rate": 3.933333333333333e-05, "loss": 1.5932, "step": 296 }, { "epoch": 0.020123314587709194, "grad_norm": 15.278692245483398, "learning_rate": 3.9466666666666666e-05, "loss": 1.3997, "step": 297 }, { "epoch": 0.02019106985568128, "grad_norm": 15.878965377807617, "learning_rate": 3.960000000000001e-05, "loss": 1.5992, "step": 298 }, { "epoch": 0.020258825123653363, "grad_norm": 17.444494247436523, "learning_rate": 3.9733333333333335e-05, "loss": 1.5224, "step": 299 }, { "epoch": 0.020326580391625448, "grad_norm": 15.011860847473145, "learning_rate": 3.986666666666667e-05, "loss": 1.2609, "step": 300 }, { "epoch": 0.020394335659597532, "grad_norm": 17.057064056396484, "learning_rate": 4e-05, "loss": 1.2724, "step": 301 }, { "epoch": 0.020462090927569617, "grad_norm": 14.946621894836426, "learning_rate": 4.013333333333333e-05, "loss": 1.273, "step": 302 }, { "epoch": 0.020529846195541702, "grad_norm": 15.072589874267578, "learning_rate": 4.026666666666667e-05, "loss": 1.2029, "step": 303 }, { "epoch": 0.020597601463513786, "grad_norm": 14.304384231567383, "learning_rate": 4.0400000000000006e-05, "loss": 1.3601, "step": 304 }, { "epoch": 0.020665356731485875, "grad_norm": 16.18627166748047, "learning_rate": 4.0533333333333334e-05, "loss": 1.5844, "step": 305 }, { "epoch": 0.02073311199945796, "grad_norm": 14.144999504089355, "learning_rate": 4.066666666666667e-05, "loss": 1.1638, "step": 306 }, { "epoch": 0.020800867267430044, "grad_norm": 13.793155670166016, "learning_rate": 4.08e-05, "loss": 1.2101, "step": 307 }, { "epoch": 0.02086862253540213, "grad_norm": 19.344085693359375, "learning_rate": 4.093333333333334e-05, "loss": 1.5899, "step": 308 }, { "epoch": 0.020936377803374213, "grad_norm": 13.670002937316895, "learning_rate": 4.106666666666667e-05, "loss": 1.4271, "step": 309 }, { "epoch": 0.021004133071346298, "grad_norm": 17.72041893005371, "learning_rate": 4.12e-05, "loss": 1.5276, "step": 310 }, { "epoch": 0.021071888339318383, "grad_norm": 14.103938102722168, "learning_rate": 4.133333333333333e-05, "loss": 1.2355, "step": 311 }, { "epoch": 0.021139643607290468, "grad_norm": 16.439529418945312, "learning_rate": 4.146666666666667e-05, "loss": 1.3393, "step": 312 }, { "epoch": 0.021207398875262552, "grad_norm": 13.821382522583008, "learning_rate": 4.16e-05, "loss": 1.275, "step": 313 }, { "epoch": 0.021275154143234637, "grad_norm": 19.987564086914062, "learning_rate": 4.1733333333333336e-05, "loss": 1.415, "step": 314 }, { "epoch": 0.02134290941120672, "grad_norm": 12.660404205322266, "learning_rate": 4.186666666666667e-05, "loss": 1.2378, "step": 315 }, { "epoch": 0.021410664679178806, "grad_norm": 14.009933471679688, "learning_rate": 4.2e-05, "loss": 1.1615, "step": 316 }, { "epoch": 0.02147841994715089, "grad_norm": 16.11932945251465, "learning_rate": 4.213333333333334e-05, "loss": 1.0653, "step": 317 }, { "epoch": 0.021546175215122976, "grad_norm": 15.943892478942871, "learning_rate": 4.226666666666667e-05, "loss": 1.6208, "step": 318 }, { "epoch": 0.02161393048309506, "grad_norm": 16.01548194885254, "learning_rate": 4.24e-05, "loss": 1.4052, "step": 319 }, { "epoch": 0.021681685751067145, "grad_norm": 15.703156471252441, "learning_rate": 4.2533333333333335e-05, "loss": 1.3736, "step": 320 }, { "epoch": 0.02174944101903923, "grad_norm": 20.367021560668945, "learning_rate": 4.266666666666667e-05, "loss": 1.5758, "step": 321 }, { "epoch": 0.021817196287011315, "grad_norm": 15.678637504577637, "learning_rate": 4.2800000000000004e-05, "loss": 1.4458, "step": 322 }, { "epoch": 0.0218849515549834, "grad_norm": 15.707669258117676, "learning_rate": 4.293333333333334e-05, "loss": 1.3065, "step": 323 }, { "epoch": 0.021952706822955484, "grad_norm": 16.07583999633789, "learning_rate": 4.3066666666666665e-05, "loss": 1.5875, "step": 324 }, { "epoch": 0.02202046209092757, "grad_norm": 15.248065948486328, "learning_rate": 4.32e-05, "loss": 1.5972, "step": 325 }, { "epoch": 0.022088217358899653, "grad_norm": 17.77379608154297, "learning_rate": 4.3333333333333334e-05, "loss": 1.3385, "step": 326 }, { "epoch": 0.022155972626871738, "grad_norm": 24.381973266601562, "learning_rate": 4.346666666666667e-05, "loss": 1.6428, "step": 327 }, { "epoch": 0.022223727894843823, "grad_norm": 17.3684024810791, "learning_rate": 4.36e-05, "loss": 1.3634, "step": 328 }, { "epoch": 0.022291483162815907, "grad_norm": 20.45623207092285, "learning_rate": 4.373333333333334e-05, "loss": 1.3947, "step": 329 }, { "epoch": 0.022359238430787992, "grad_norm": 17.434114456176758, "learning_rate": 4.3866666666666665e-05, "loss": 1.4269, "step": 330 }, { "epoch": 0.02242699369876008, "grad_norm": 13.574700355529785, "learning_rate": 4.4000000000000006e-05, "loss": 1.2807, "step": 331 }, { "epoch": 0.022494748966732165, "grad_norm": 16.404966354370117, "learning_rate": 4.413333333333334e-05, "loss": 1.4549, "step": 332 }, { "epoch": 0.02256250423470425, "grad_norm": 14.14947509765625, "learning_rate": 4.426666666666667e-05, "loss": 1.4187, "step": 333 }, { "epoch": 0.022630259502676334, "grad_norm": 17.632492065429688, "learning_rate": 4.44e-05, "loss": 1.3515, "step": 334 }, { "epoch": 0.02269801477064842, "grad_norm": 18.149261474609375, "learning_rate": 4.4533333333333336e-05, "loss": 1.4875, "step": 335 }, { "epoch": 0.022765770038620504, "grad_norm": 19.864049911499023, "learning_rate": 4.466666666666667e-05, "loss": 1.5622, "step": 336 }, { "epoch": 0.02283352530659259, "grad_norm": 16.7431583404541, "learning_rate": 4.4800000000000005e-05, "loss": 1.4344, "step": 337 }, { "epoch": 0.022901280574564673, "grad_norm": 16.53998565673828, "learning_rate": 4.493333333333333e-05, "loss": 1.1745, "step": 338 }, { "epoch": 0.022969035842536758, "grad_norm": 17.786945343017578, "learning_rate": 4.5066666666666667e-05, "loss": 1.4335, "step": 339 }, { "epoch": 0.023036791110508843, "grad_norm": 17.730606079101562, "learning_rate": 4.52e-05, "loss": 1.341, "step": 340 }, { "epoch": 0.023104546378480927, "grad_norm": 15.536660194396973, "learning_rate": 4.5333333333333335e-05, "loss": 1.4935, "step": 341 }, { "epoch": 0.023172301646453012, "grad_norm": 22.405893325805664, "learning_rate": 4.546666666666667e-05, "loss": 1.5538, "step": 342 }, { "epoch": 0.023240056914425097, "grad_norm": 15.963828086853027, "learning_rate": 4.5600000000000004e-05, "loss": 1.373, "step": 343 }, { "epoch": 0.02330781218239718, "grad_norm": 13.041223526000977, "learning_rate": 4.573333333333333e-05, "loss": 1.346, "step": 344 }, { "epoch": 0.023375567450369266, "grad_norm": 14.552989959716797, "learning_rate": 4.5866666666666666e-05, "loss": 1.2723, "step": 345 }, { "epoch": 0.02344332271834135, "grad_norm": 16.034404754638672, "learning_rate": 4.600000000000001e-05, "loss": 1.3671, "step": 346 }, { "epoch": 0.023511077986313435, "grad_norm": 17.5505428314209, "learning_rate": 4.6133333333333334e-05, "loss": 1.4553, "step": 347 }, { "epoch": 0.02357883325428552, "grad_norm": 14.029112815856934, "learning_rate": 4.626666666666667e-05, "loss": 1.3639, "step": 348 }, { "epoch": 0.023646588522257605, "grad_norm": 14.633806228637695, "learning_rate": 4.64e-05, "loss": 1.61, "step": 349 }, { "epoch": 0.02371434379022969, "grad_norm": 15.407472610473633, "learning_rate": 4.653333333333334e-05, "loss": 1.2449, "step": 350 }, { "epoch": 0.023782099058201774, "grad_norm": 13.801481246948242, "learning_rate": 4.666666666666667e-05, "loss": 1.3105, "step": 351 }, { "epoch": 0.02384985432617386, "grad_norm": 17.675159454345703, "learning_rate": 4.6800000000000006e-05, "loss": 1.3142, "step": 352 }, { "epoch": 0.023917609594145944, "grad_norm": 15.383625984191895, "learning_rate": 4.6933333333333333e-05, "loss": 1.2976, "step": 353 }, { "epoch": 0.02398536486211803, "grad_norm": 18.087081909179688, "learning_rate": 4.706666666666667e-05, "loss": 1.3391, "step": 354 }, { "epoch": 0.024053120130090113, "grad_norm": 15.790519714355469, "learning_rate": 4.72e-05, "loss": 1.7103, "step": 355 }, { "epoch": 0.024120875398062198, "grad_norm": 15.931246757507324, "learning_rate": 4.7333333333333336e-05, "loss": 1.4792, "step": 356 }, { "epoch": 0.024188630666034282, "grad_norm": 18.39167022705078, "learning_rate": 4.746666666666667e-05, "loss": 1.5085, "step": 357 }, { "epoch": 0.02425638593400637, "grad_norm": 13.518149375915527, "learning_rate": 4.76e-05, "loss": 1.2711, "step": 358 }, { "epoch": 0.024324141201978455, "grad_norm": 18.02762794494629, "learning_rate": 4.773333333333333e-05, "loss": 1.4113, "step": 359 }, { "epoch": 0.02439189646995054, "grad_norm": 15.260608673095703, "learning_rate": 4.7866666666666674e-05, "loss": 1.3286, "step": 360 }, { "epoch": 0.024459651737922625, "grad_norm": 16.26275062561035, "learning_rate": 4.8e-05, "loss": 1.4436, "step": 361 }, { "epoch": 0.02452740700589471, "grad_norm": 14.213051795959473, "learning_rate": 4.8133333333333336e-05, "loss": 1.1339, "step": 362 }, { "epoch": 0.024595162273866794, "grad_norm": 16.478811264038086, "learning_rate": 4.826666666666667e-05, "loss": 1.5534, "step": 363 }, { "epoch": 0.02466291754183888, "grad_norm": 13.24108600616455, "learning_rate": 4.8400000000000004e-05, "loss": 1.4767, "step": 364 }, { "epoch": 0.024730672809810963, "grad_norm": 15.167949676513672, "learning_rate": 4.853333333333334e-05, "loss": 1.1592, "step": 365 }, { "epoch": 0.024798428077783048, "grad_norm": 18.150367736816406, "learning_rate": 4.866666666666667e-05, "loss": 1.4172, "step": 366 }, { "epoch": 0.024866183345755133, "grad_norm": 13.250176429748535, "learning_rate": 4.88e-05, "loss": 1.5022, "step": 367 }, { "epoch": 0.024933938613727218, "grad_norm": 14.780989646911621, "learning_rate": 4.8933333333333335e-05, "loss": 1.3243, "step": 368 }, { "epoch": 0.025001693881699302, "grad_norm": 13.34748649597168, "learning_rate": 4.906666666666667e-05, "loss": 1.285, "step": 369 }, { "epoch": 0.025069449149671387, "grad_norm": 14.677448272705078, "learning_rate": 4.92e-05, "loss": 1.1982, "step": 370 }, { "epoch": 0.02513720441764347, "grad_norm": 18.211145401000977, "learning_rate": 4.933333333333334e-05, "loss": 1.4018, "step": 371 }, { "epoch": 0.025204959685615556, "grad_norm": 17.68402099609375, "learning_rate": 4.9466666666666665e-05, "loss": 1.6153, "step": 372 }, { "epoch": 0.02527271495358764, "grad_norm": 14.44299030303955, "learning_rate": 4.96e-05, "loss": 1.4092, "step": 373 }, { "epoch": 0.025340470221559726, "grad_norm": 15.467754364013672, "learning_rate": 4.973333333333334e-05, "loss": 1.5647, "step": 374 }, { "epoch": 0.02540822548953181, "grad_norm": 14.037840843200684, "learning_rate": 4.986666666666667e-05, "loss": 1.2106, "step": 375 }, { "epoch": 0.025475980757503895, "grad_norm": 15.72461223602295, "learning_rate": 5e-05, "loss": 1.1832, "step": 376 }, { "epoch": 0.02554373602547598, "grad_norm": 15.03637981414795, "learning_rate": 5.013333333333333e-05, "loss": 1.5803, "step": 377 }, { "epoch": 0.025611491293448065, "grad_norm": 15.995429992675781, "learning_rate": 5.026666666666667e-05, "loss": 1.3837, "step": 378 }, { "epoch": 0.02567924656142015, "grad_norm": 17.96516227722168, "learning_rate": 5.0400000000000005e-05, "loss": 1.5114, "step": 379 }, { "epoch": 0.025747001829392234, "grad_norm": 17.960783004760742, "learning_rate": 5.053333333333333e-05, "loss": 1.4012, "step": 380 }, { "epoch": 0.02581475709736432, "grad_norm": 14.399024963378906, "learning_rate": 5.0666666666666674e-05, "loss": 1.2906, "step": 381 }, { "epoch": 0.025882512365336403, "grad_norm": 14.268051147460938, "learning_rate": 5.08e-05, "loss": 1.2354, "step": 382 }, { "epoch": 0.025950267633308488, "grad_norm": 16.070646286010742, "learning_rate": 5.0933333333333336e-05, "loss": 1.272, "step": 383 }, { "epoch": 0.026018022901280576, "grad_norm": 18.698619842529297, "learning_rate": 5.106666666666668e-05, "loss": 1.352, "step": 384 }, { "epoch": 0.02608577816925266, "grad_norm": 13.932543754577637, "learning_rate": 5.1200000000000004e-05, "loss": 1.3249, "step": 385 }, { "epoch": 0.026153533437224746, "grad_norm": 14.76308822631836, "learning_rate": 5.133333333333333e-05, "loss": 1.1807, "step": 386 }, { "epoch": 0.02622128870519683, "grad_norm": 17.602182388305664, "learning_rate": 5.146666666666667e-05, "loss": 1.4017, "step": 387 }, { "epoch": 0.026289043973168915, "grad_norm": 13.589346885681152, "learning_rate": 5.16e-05, "loss": 1.4205, "step": 388 }, { "epoch": 0.026356799241141, "grad_norm": 16.37788200378418, "learning_rate": 5.1733333333333335e-05, "loss": 1.4255, "step": 389 }, { "epoch": 0.026424554509113084, "grad_norm": 16.24977684020996, "learning_rate": 5.1866666666666676e-05, "loss": 1.2526, "step": 390 }, { "epoch": 0.02649230977708517, "grad_norm": 15.02576732635498, "learning_rate": 5.2000000000000004e-05, "loss": 1.2147, "step": 391 }, { "epoch": 0.026560065045057254, "grad_norm": 16.754850387573242, "learning_rate": 5.213333333333333e-05, "loss": 1.5725, "step": 392 }, { "epoch": 0.02662782031302934, "grad_norm": 17.120729446411133, "learning_rate": 5.2266666666666665e-05, "loss": 1.351, "step": 393 }, { "epoch": 0.026695575581001423, "grad_norm": 18.83492660522461, "learning_rate": 5.2400000000000007e-05, "loss": 1.3044, "step": 394 }, { "epoch": 0.026763330848973508, "grad_norm": 18.66061019897461, "learning_rate": 5.2533333333333334e-05, "loss": 1.4889, "step": 395 }, { "epoch": 0.026831086116945593, "grad_norm": 18.493236541748047, "learning_rate": 5.266666666666666e-05, "loss": 1.5015, "step": 396 }, { "epoch": 0.026898841384917677, "grad_norm": 19.523067474365234, "learning_rate": 5.28e-05, "loss": 1.5179, "step": 397 }, { "epoch": 0.026966596652889762, "grad_norm": 18.207366943359375, "learning_rate": 5.293333333333334e-05, "loss": 1.374, "step": 398 }, { "epoch": 0.027034351920861847, "grad_norm": 13.021437644958496, "learning_rate": 5.3066666666666665e-05, "loss": 1.0259, "step": 399 }, { "epoch": 0.02710210718883393, "grad_norm": 14.108929634094238, "learning_rate": 5.3200000000000006e-05, "loss": 1.3914, "step": 400 }, { "epoch": 0.027169862456806016, "grad_norm": 15.087890625, "learning_rate": 5.333333333333333e-05, "loss": 1.3143, "step": 401 }, { "epoch": 0.0272376177247781, "grad_norm": 16.389965057373047, "learning_rate": 5.346666666666667e-05, "loss": 1.5286, "step": 402 }, { "epoch": 0.027305372992750186, "grad_norm": 17.855867385864258, "learning_rate": 5.360000000000001e-05, "loss": 1.4221, "step": 403 }, { "epoch": 0.02737312826072227, "grad_norm": 14.016682624816895, "learning_rate": 5.3733333333333336e-05, "loss": 1.0922, "step": 404 }, { "epoch": 0.027440883528694355, "grad_norm": 16.08883285522461, "learning_rate": 5.3866666666666664e-05, "loss": 1.4932, "step": 405 }, { "epoch": 0.02750863879666644, "grad_norm": 14.883580207824707, "learning_rate": 5.4000000000000005e-05, "loss": 1.0806, "step": 406 }, { "epoch": 0.027576394064638524, "grad_norm": 13.443008422851562, "learning_rate": 5.413333333333334e-05, "loss": 1.2653, "step": 407 }, { "epoch": 0.02764414933261061, "grad_norm": 13.391244888305664, "learning_rate": 5.4266666666666667e-05, "loss": 1.2788, "step": 408 }, { "epoch": 0.027711904600582694, "grad_norm": 13.449023246765137, "learning_rate": 5.440000000000001e-05, "loss": 1.133, "step": 409 }, { "epoch": 0.027779659868554782, "grad_norm": 13.777191162109375, "learning_rate": 5.4533333333333335e-05, "loss": 1.1509, "step": 410 }, { "epoch": 0.027847415136526867, "grad_norm": 19.04452896118164, "learning_rate": 5.466666666666666e-05, "loss": 1.3306, "step": 411 }, { "epoch": 0.02791517040449895, "grad_norm": 16.583112716674805, "learning_rate": 5.4800000000000004e-05, "loss": 1.3018, "step": 412 }, { "epoch": 0.027982925672471036, "grad_norm": 16.8561954498291, "learning_rate": 5.493333333333334e-05, "loss": 1.257, "step": 413 }, { "epoch": 0.02805068094044312, "grad_norm": 15.321064949035645, "learning_rate": 5.5066666666666666e-05, "loss": 1.2498, "step": 414 }, { "epoch": 0.028118436208415205, "grad_norm": 15.825779914855957, "learning_rate": 5.520000000000001e-05, "loss": 1.2177, "step": 415 }, { "epoch": 0.02818619147638729, "grad_norm": 15.665361404418945, "learning_rate": 5.5333333333333334e-05, "loss": 1.244, "step": 416 }, { "epoch": 0.028253946744359375, "grad_norm": 15.516046524047852, "learning_rate": 5.546666666666667e-05, "loss": 1.4957, "step": 417 }, { "epoch": 0.02832170201233146, "grad_norm": 19.148746490478516, "learning_rate": 5.560000000000001e-05, "loss": 1.4346, "step": 418 }, { "epoch": 0.028389457280303544, "grad_norm": 15.215538024902344, "learning_rate": 5.573333333333334e-05, "loss": 1.081, "step": 419 }, { "epoch": 0.02845721254827563, "grad_norm": 15.612996101379395, "learning_rate": 5.5866666666666665e-05, "loss": 1.1671, "step": 420 }, { "epoch": 0.028524967816247714, "grad_norm": 13.754039764404297, "learning_rate": 5.6000000000000006e-05, "loss": 1.2679, "step": 421 }, { "epoch": 0.028592723084219798, "grad_norm": 16.350305557250977, "learning_rate": 5.613333333333334e-05, "loss": 1.3034, "step": 422 }, { "epoch": 0.028660478352191883, "grad_norm": 16.560344696044922, "learning_rate": 5.626666666666667e-05, "loss": 1.3633, "step": 423 }, { "epoch": 0.028728233620163968, "grad_norm": 20.391889572143555, "learning_rate": 5.6399999999999995e-05, "loss": 1.3533, "step": 424 }, { "epoch": 0.028795988888136052, "grad_norm": 16.13326072692871, "learning_rate": 5.6533333333333336e-05, "loss": 1.3678, "step": 425 }, { "epoch": 0.028863744156108137, "grad_norm": 21.544612884521484, "learning_rate": 5.666666666666667e-05, "loss": 1.6275, "step": 426 }, { "epoch": 0.028931499424080222, "grad_norm": 17.097408294677734, "learning_rate": 5.68e-05, "loss": 1.6119, "step": 427 }, { "epoch": 0.028999254692052306, "grad_norm": 15.699347496032715, "learning_rate": 5.693333333333334e-05, "loss": 1.2644, "step": 428 }, { "epoch": 0.02906700996002439, "grad_norm": 16.643613815307617, "learning_rate": 5.706666666666667e-05, "loss": 1.3497, "step": 429 }, { "epoch": 0.029134765227996476, "grad_norm": 20.941123962402344, "learning_rate": 5.72e-05, "loss": 1.4232, "step": 430 }, { "epoch": 0.02920252049596856, "grad_norm": 17.549013137817383, "learning_rate": 5.7333333333333336e-05, "loss": 1.3429, "step": 431 }, { "epoch": 0.029270275763940645, "grad_norm": 19.702617645263672, "learning_rate": 5.746666666666667e-05, "loss": 1.7924, "step": 432 }, { "epoch": 0.02933803103191273, "grad_norm": 16.396209716796875, "learning_rate": 5.76e-05, "loss": 1.3296, "step": 433 }, { "epoch": 0.029405786299884815, "grad_norm": 15.823278427124023, "learning_rate": 5.773333333333334e-05, "loss": 1.4228, "step": 434 }, { "epoch": 0.0294735415678569, "grad_norm": 19.61952018737793, "learning_rate": 5.7866666666666666e-05, "loss": 1.3522, "step": 435 }, { "epoch": 0.029541296835828984, "grad_norm": 14.721433639526367, "learning_rate": 5.8e-05, "loss": 1.284, "step": 436 }, { "epoch": 0.029609052103801072, "grad_norm": 16.594276428222656, "learning_rate": 5.813333333333334e-05, "loss": 1.4636, "step": 437 }, { "epoch": 0.029676807371773157, "grad_norm": 15.98005199432373, "learning_rate": 5.826666666666667e-05, "loss": 1.2825, "step": 438 }, { "epoch": 0.02974456263974524, "grad_norm": 15.501729965209961, "learning_rate": 5.8399999999999997e-05, "loss": 1.3736, "step": 439 }, { "epoch": 0.029812317907717326, "grad_norm": 18.077552795410156, "learning_rate": 5.853333333333334e-05, "loss": 1.4115, "step": 440 }, { "epoch": 0.02988007317568941, "grad_norm": 16.016721725463867, "learning_rate": 5.866666666666667e-05, "loss": 1.3177, "step": 441 }, { "epoch": 0.029947828443661496, "grad_norm": 16.39783477783203, "learning_rate": 5.88e-05, "loss": 1.4465, "step": 442 }, { "epoch": 0.03001558371163358, "grad_norm": 13.970220565795898, "learning_rate": 5.893333333333334e-05, "loss": 1.2956, "step": 443 }, { "epoch": 0.030083338979605665, "grad_norm": 16.5622615814209, "learning_rate": 5.906666666666667e-05, "loss": 1.2144, "step": 444 }, { "epoch": 0.03015109424757775, "grad_norm": 18.191911697387695, "learning_rate": 5.92e-05, "loss": 1.623, "step": 445 }, { "epoch": 0.030218849515549834, "grad_norm": 15.483613967895508, "learning_rate": 5.9333333333333343e-05, "loss": 1.3232, "step": 446 }, { "epoch": 0.03028660478352192, "grad_norm": 15.633200645446777, "learning_rate": 5.946666666666667e-05, "loss": 1.544, "step": 447 }, { "epoch": 0.030354360051494004, "grad_norm": 16.025054931640625, "learning_rate": 5.96e-05, "loss": 1.4837, "step": 448 }, { "epoch": 0.03042211531946609, "grad_norm": 15.954922676086426, "learning_rate": 5.973333333333334e-05, "loss": 1.341, "step": 449 }, { "epoch": 0.030489870587438173, "grad_norm": 14.852401733398438, "learning_rate": 5.9866666666666674e-05, "loss": 1.2346, "step": 450 }, { "epoch": 0.030557625855410258, "grad_norm": 14.887676239013672, "learning_rate": 6e-05, "loss": 1.2789, "step": 451 }, { "epoch": 0.030625381123382343, "grad_norm": 17.25469970703125, "learning_rate": 6.013333333333334e-05, "loss": 1.4446, "step": 452 }, { "epoch": 0.030693136391354427, "grad_norm": 19.356597900390625, "learning_rate": 6.026666666666667e-05, "loss": 1.3765, "step": 453 }, { "epoch": 0.030760891659326512, "grad_norm": 14.16335391998291, "learning_rate": 6.04e-05, "loss": 1.4213, "step": 454 }, { "epoch": 0.030828646927298597, "grad_norm": 15.812528610229492, "learning_rate": 6.053333333333333e-05, "loss": 1.4028, "step": 455 }, { "epoch": 0.03089640219527068, "grad_norm": 15.10727596282959, "learning_rate": 6.066666666666667e-05, "loss": 1.3841, "step": 456 }, { "epoch": 0.030964157463242766, "grad_norm": 19.91744613647461, "learning_rate": 6.08e-05, "loss": 1.4706, "step": 457 }, { "epoch": 0.03103191273121485, "grad_norm": 14.597813606262207, "learning_rate": 6.093333333333333e-05, "loss": 1.2428, "step": 458 }, { "epoch": 0.031099667999186936, "grad_norm": 14.952363014221191, "learning_rate": 6.106666666666667e-05, "loss": 1.3431, "step": 459 }, { "epoch": 0.03116742326715902, "grad_norm": 16.519468307495117, "learning_rate": 6.12e-05, "loss": 1.5399, "step": 460 }, { "epoch": 0.031235178535131105, "grad_norm": 14.55786418914795, "learning_rate": 6.133333333333334e-05, "loss": 1.3844, "step": 461 }, { "epoch": 0.03130293380310319, "grad_norm": 12.52665901184082, "learning_rate": 6.146666666666668e-05, "loss": 1.2085, "step": 462 }, { "epoch": 0.03137068907107528, "grad_norm": 17.549148559570312, "learning_rate": 6.16e-05, "loss": 1.588, "step": 463 }, { "epoch": 0.03143844433904736, "grad_norm": 14.761232376098633, "learning_rate": 6.173333333333333e-05, "loss": 1.1685, "step": 464 }, { "epoch": 0.03150619960701945, "grad_norm": 13.1014404296875, "learning_rate": 6.186666666666668e-05, "loss": 1.1718, "step": 465 }, { "epoch": 0.03157395487499153, "grad_norm": 15.998059272766113, "learning_rate": 6.2e-05, "loss": 1.3177, "step": 466 }, { "epoch": 0.03164171014296362, "grad_norm": 16.402875900268555, "learning_rate": 6.213333333333333e-05, "loss": 1.4821, "step": 467 }, { "epoch": 0.0317094654109357, "grad_norm": 12.871467590332031, "learning_rate": 6.226666666666667e-05, "loss": 1.0176, "step": 468 }, { "epoch": 0.031777220678907786, "grad_norm": 15.602563858032227, "learning_rate": 6.24e-05, "loss": 1.3418, "step": 469 }, { "epoch": 0.03184497594687987, "grad_norm": 13.5369234085083, "learning_rate": 6.253333333333333e-05, "loss": 1.4051, "step": 470 }, { "epoch": 0.031912731214851955, "grad_norm": 16.841650009155273, "learning_rate": 6.266666666666667e-05, "loss": 1.5438, "step": 471 }, { "epoch": 0.03198048648282404, "grad_norm": 14.581981658935547, "learning_rate": 6.280000000000001e-05, "loss": 1.2767, "step": 472 }, { "epoch": 0.032048241750796125, "grad_norm": 14.084460258483887, "learning_rate": 6.293333333333334e-05, "loss": 1.2224, "step": 473 }, { "epoch": 0.03211599701876821, "grad_norm": 15.54316234588623, "learning_rate": 6.306666666666668e-05, "loss": 1.6084, "step": 474 }, { "epoch": 0.032183752286740294, "grad_norm": 15.498743057250977, "learning_rate": 6.32e-05, "loss": 1.3937, "step": 475 }, { "epoch": 0.03225150755471238, "grad_norm": 17.728227615356445, "learning_rate": 6.333333333333333e-05, "loss": 1.2996, "step": 476 }, { "epoch": 0.032319262822684464, "grad_norm": 13.956122398376465, "learning_rate": 6.346666666666667e-05, "loss": 1.4472, "step": 477 }, { "epoch": 0.03238701809065655, "grad_norm": 18.578060150146484, "learning_rate": 6.36e-05, "loss": 1.4659, "step": 478 }, { "epoch": 0.03245477335862863, "grad_norm": 14.055442810058594, "learning_rate": 6.373333333333333e-05, "loss": 1.1979, "step": 479 }, { "epoch": 0.03252252862660072, "grad_norm": 15.902195930480957, "learning_rate": 6.386666666666667e-05, "loss": 1.3367, "step": 480 }, { "epoch": 0.0325902838945728, "grad_norm": 14.212138175964355, "learning_rate": 6.400000000000001e-05, "loss": 1.2716, "step": 481 }, { "epoch": 0.03265803916254489, "grad_norm": 17.711475372314453, "learning_rate": 6.413333333333334e-05, "loss": 1.3016, "step": 482 }, { "epoch": 0.03272579443051697, "grad_norm": 13.068260192871094, "learning_rate": 6.426666666666668e-05, "loss": 1.1346, "step": 483 }, { "epoch": 0.032793549698489056, "grad_norm": 17.422321319580078, "learning_rate": 6.440000000000001e-05, "loss": 1.0931, "step": 484 }, { "epoch": 0.03286130496646114, "grad_norm": 14.98038101196289, "learning_rate": 6.453333333333333e-05, "loss": 1.22, "step": 485 }, { "epoch": 0.032929060234433226, "grad_norm": 13.300479888916016, "learning_rate": 6.466666666666666e-05, "loss": 1.2647, "step": 486 }, { "epoch": 0.03299681550240531, "grad_norm": 14.611360549926758, "learning_rate": 6.48e-05, "loss": 1.2478, "step": 487 }, { "epoch": 0.033064570770377395, "grad_norm": 19.244020462036133, "learning_rate": 6.493333333333333e-05, "loss": 1.2126, "step": 488 }, { "epoch": 0.03313232603834948, "grad_norm": 16.285396575927734, "learning_rate": 6.506666666666666e-05, "loss": 1.1694, "step": 489 }, { "epoch": 0.033200081306321565, "grad_norm": 19.613059997558594, "learning_rate": 6.52e-05, "loss": 1.2118, "step": 490 }, { "epoch": 0.03326783657429365, "grad_norm": 19.19630241394043, "learning_rate": 6.533333333333334e-05, "loss": 1.3093, "step": 491 }, { "epoch": 0.033335591842265734, "grad_norm": 12.653651237487793, "learning_rate": 6.546666666666667e-05, "loss": 1.1545, "step": 492 }, { "epoch": 0.03340334711023782, "grad_norm": 13.434443473815918, "learning_rate": 6.560000000000001e-05, "loss": 1.2543, "step": 493 }, { "epoch": 0.033471102378209903, "grad_norm": 18.893598556518555, "learning_rate": 6.573333333333334e-05, "loss": 1.3632, "step": 494 }, { "epoch": 0.03353885764618199, "grad_norm": 18.339479446411133, "learning_rate": 6.586666666666666e-05, "loss": 1.4369, "step": 495 }, { "epoch": 0.03360661291415407, "grad_norm": 12.303078651428223, "learning_rate": 6.6e-05, "loss": 1.4032, "step": 496 }, { "epoch": 0.03367436818212616, "grad_norm": 17.041015625, "learning_rate": 6.613333333333333e-05, "loss": 1.2395, "step": 497 }, { "epoch": 0.03374212345009824, "grad_norm": 14.035640716552734, "learning_rate": 6.626666666666666e-05, "loss": 1.3541, "step": 498 }, { "epoch": 0.03380987871807033, "grad_norm": 16.93412208557129, "learning_rate": 6.64e-05, "loss": 1.3503, "step": 499 }, { "epoch": 0.03387763398604241, "grad_norm": 16.02039909362793, "learning_rate": 6.653333333333334e-05, "loss": 1.3226, "step": 500 }, { "epoch": 0.033945389254014496, "grad_norm": 16.061542510986328, "learning_rate": 6.666666666666667e-05, "loss": 1.433, "step": 501 }, { "epoch": 0.03401314452198658, "grad_norm": 14.810320854187012, "learning_rate": 6.680000000000001e-05, "loss": 1.3715, "step": 502 }, { "epoch": 0.03408089978995867, "grad_norm": 15.132527351379395, "learning_rate": 6.693333333333334e-05, "loss": 1.0683, "step": 503 }, { "epoch": 0.03414865505793076, "grad_norm": 13.768165588378906, "learning_rate": 6.706666666666667e-05, "loss": 1.234, "step": 504 }, { "epoch": 0.03421641032590284, "grad_norm": 16.227920532226562, "learning_rate": 6.720000000000001e-05, "loss": 1.4181, "step": 505 }, { "epoch": 0.03428416559387493, "grad_norm": 14.205772399902344, "learning_rate": 6.733333333333333e-05, "loss": 1.4342, "step": 506 }, { "epoch": 0.03435192086184701, "grad_norm": 17.451099395751953, "learning_rate": 6.746666666666666e-05, "loss": 1.3338, "step": 507 }, { "epoch": 0.034419676129819096, "grad_norm": 13.7041015625, "learning_rate": 6.76e-05, "loss": 1.3972, "step": 508 }, { "epoch": 0.03448743139779118, "grad_norm": 13.206351280212402, "learning_rate": 6.773333333333333e-05, "loss": 1.4233, "step": 509 }, { "epoch": 0.034555186665763266, "grad_norm": 11.867975234985352, "learning_rate": 6.786666666666667e-05, "loss": 1.111, "step": 510 }, { "epoch": 0.03462294193373535, "grad_norm": 19.726648330688477, "learning_rate": 6.800000000000001e-05, "loss": 1.2918, "step": 511 }, { "epoch": 0.034690697201707435, "grad_norm": 18.914016723632812, "learning_rate": 6.813333333333334e-05, "loss": 1.5998, "step": 512 }, { "epoch": 0.03475845246967952, "grad_norm": 18.065937042236328, "learning_rate": 6.826666666666667e-05, "loss": 1.4952, "step": 513 }, { "epoch": 0.034826207737651604, "grad_norm": 16.42376136779785, "learning_rate": 6.840000000000001e-05, "loss": 1.2807, "step": 514 }, { "epoch": 0.03489396300562369, "grad_norm": 15.537372589111328, "learning_rate": 6.853333333333334e-05, "loss": 1.3524, "step": 515 }, { "epoch": 0.034961718273595774, "grad_norm": 18.395238876342773, "learning_rate": 6.866666666666666e-05, "loss": 1.3022, "step": 516 }, { "epoch": 0.03502947354156786, "grad_norm": 15.80531120300293, "learning_rate": 6.879999999999999e-05, "loss": 1.4201, "step": 517 }, { "epoch": 0.03509722880953994, "grad_norm": 15.361504554748535, "learning_rate": 6.893333333333333e-05, "loss": 1.4351, "step": 518 }, { "epoch": 0.03516498407751203, "grad_norm": 12.501819610595703, "learning_rate": 6.906666666666667e-05, "loss": 1.1894, "step": 519 }, { "epoch": 0.03523273934548411, "grad_norm": 14.53589916229248, "learning_rate": 6.92e-05, "loss": 1.3435, "step": 520 }, { "epoch": 0.0353004946134562, "grad_norm": 15.057633399963379, "learning_rate": 6.933333333333334e-05, "loss": 1.5467, "step": 521 }, { "epoch": 0.03536824988142828, "grad_norm": 16.27166175842285, "learning_rate": 6.946666666666667e-05, "loss": 1.5171, "step": 522 }, { "epoch": 0.03543600514940037, "grad_norm": 18.050413131713867, "learning_rate": 6.96e-05, "loss": 1.2246, "step": 523 }, { "epoch": 0.03550376041737245, "grad_norm": 15.316112518310547, "learning_rate": 6.973333333333334e-05, "loss": 1.2551, "step": 524 }, { "epoch": 0.035571515685344536, "grad_norm": 15.416853904724121, "learning_rate": 6.986666666666667e-05, "loss": 1.1895, "step": 525 }, { "epoch": 0.03563927095331662, "grad_norm": 15.019004821777344, "learning_rate": 7e-05, "loss": 1.5957, "step": 526 }, { "epoch": 0.035707026221288705, "grad_norm": 15.33927059173584, "learning_rate": 7.013333333333333e-05, "loss": 1.3536, "step": 527 }, { "epoch": 0.03577478148926079, "grad_norm": 16.403501510620117, "learning_rate": 7.026666666666668e-05, "loss": 1.6104, "step": 528 }, { "epoch": 0.035842536757232875, "grad_norm": 13.962714195251465, "learning_rate": 7.04e-05, "loss": 1.2122, "step": 529 }, { "epoch": 0.03591029202520496, "grad_norm": 14.530926704406738, "learning_rate": 7.053333333333334e-05, "loss": 1.2879, "step": 530 }, { "epoch": 0.035978047293177044, "grad_norm": 13.99305248260498, "learning_rate": 7.066666666666667e-05, "loss": 1.2063, "step": 531 }, { "epoch": 0.03604580256114913, "grad_norm": 17.54342269897461, "learning_rate": 7.08e-05, "loss": 1.2823, "step": 532 }, { "epoch": 0.036113557829121214, "grad_norm": 11.097670555114746, "learning_rate": 7.093333333333334e-05, "loss": 1.1658, "step": 533 }, { "epoch": 0.0361813130970933, "grad_norm": 19.069889068603516, "learning_rate": 7.106666666666667e-05, "loss": 1.3368, "step": 534 }, { "epoch": 0.03624906836506538, "grad_norm": 12.675487518310547, "learning_rate": 7.12e-05, "loss": 1.1586, "step": 535 }, { "epoch": 0.03631682363303747, "grad_norm": 16.971637725830078, "learning_rate": 7.133333333333334e-05, "loss": 1.2458, "step": 536 }, { "epoch": 0.03638457890100955, "grad_norm": 14.513422012329102, "learning_rate": 7.146666666666666e-05, "loss": 1.236, "step": 537 }, { "epoch": 0.03645233416898164, "grad_norm": 14.271836280822754, "learning_rate": 7.16e-05, "loss": 1.0951, "step": 538 }, { "epoch": 0.03652008943695372, "grad_norm": 14.286022186279297, "learning_rate": 7.173333333333335e-05, "loss": 1.2411, "step": 539 }, { "epoch": 0.03658784470492581, "grad_norm": 16.81059455871582, "learning_rate": 7.186666666666667e-05, "loss": 1.2453, "step": 540 }, { "epoch": 0.03665559997289789, "grad_norm": 16.818300247192383, "learning_rate": 7.2e-05, "loss": 1.2302, "step": 541 }, { "epoch": 0.036723355240869976, "grad_norm": 14.839925765991211, "learning_rate": 7.213333333333334e-05, "loss": 1.3445, "step": 542 }, { "epoch": 0.03679111050884206, "grad_norm": 14.917344093322754, "learning_rate": 7.226666666666667e-05, "loss": 1.3658, "step": 543 }, { "epoch": 0.036858865776814145, "grad_norm": 15.310625076293945, "learning_rate": 7.24e-05, "loss": 1.2565, "step": 544 }, { "epoch": 0.03692662104478623, "grad_norm": 16.7796630859375, "learning_rate": 7.253333333333334e-05, "loss": 1.265, "step": 545 }, { "epoch": 0.036994376312758315, "grad_norm": 19.4224796295166, "learning_rate": 7.266666666666667e-05, "loss": 1.6152, "step": 546 }, { "epoch": 0.0370621315807304, "grad_norm": 15.8001127243042, "learning_rate": 7.280000000000001e-05, "loss": 1.4313, "step": 547 }, { "epoch": 0.037129886848702484, "grad_norm": 13.597784042358398, "learning_rate": 7.293333333333334e-05, "loss": 1.3812, "step": 548 }, { "epoch": 0.03719764211667457, "grad_norm": 13.139778137207031, "learning_rate": 7.306666666666668e-05, "loss": 1.1888, "step": 549 }, { "epoch": 0.037265397384646654, "grad_norm": 17.083406448364258, "learning_rate": 7.32e-05, "loss": 1.1668, "step": 550 }, { "epoch": 0.03733315265261874, "grad_norm": 14.46076774597168, "learning_rate": 7.333333333333333e-05, "loss": 1.2394, "step": 551 }, { "epoch": 0.03740090792059082, "grad_norm": 16.217782974243164, "learning_rate": 7.346666666666667e-05, "loss": 1.3068, "step": 552 }, { "epoch": 0.03746866318856291, "grad_norm": 16.06308937072754, "learning_rate": 7.36e-05, "loss": 0.9678, "step": 553 }, { "epoch": 0.03753641845653499, "grad_norm": 14.652907371520996, "learning_rate": 7.373333333333333e-05, "loss": 1.5375, "step": 554 }, { "epoch": 0.03760417372450708, "grad_norm": 13.870722770690918, "learning_rate": 7.386666666666667e-05, "loss": 1.29, "step": 555 }, { "epoch": 0.03767192899247917, "grad_norm": 14.649571418762207, "learning_rate": 7.4e-05, "loss": 1.3588, "step": 556 }, { "epoch": 0.03773968426045125, "grad_norm": 13.698415756225586, "learning_rate": 7.413333333333334e-05, "loss": 1.0801, "step": 557 }, { "epoch": 0.03780743952842334, "grad_norm": 19.412424087524414, "learning_rate": 7.426666666666668e-05, "loss": 1.4474, "step": 558 }, { "epoch": 0.03787519479639542, "grad_norm": 18.58599853515625, "learning_rate": 7.44e-05, "loss": 1.3294, "step": 559 }, { "epoch": 0.03794295006436751, "grad_norm": 15.283289909362793, "learning_rate": 7.453333333333333e-05, "loss": 1.1554, "step": 560 }, { "epoch": 0.03801070533233959, "grad_norm": 16.289731979370117, "learning_rate": 7.466666666666667e-05, "loss": 1.222, "step": 561 }, { "epoch": 0.03807846060031168, "grad_norm": 25.105520248413086, "learning_rate": 7.48e-05, "loss": 1.2213, "step": 562 }, { "epoch": 0.03814621586828376, "grad_norm": 15.428693771362305, "learning_rate": 7.493333333333333e-05, "loss": 1.2165, "step": 563 }, { "epoch": 0.038213971136255846, "grad_norm": 18.79145622253418, "learning_rate": 7.506666666666667e-05, "loss": 1.4799, "step": 564 }, { "epoch": 0.03828172640422793, "grad_norm": 12.498862266540527, "learning_rate": 7.52e-05, "loss": 1.4348, "step": 565 }, { "epoch": 0.038349481672200016, "grad_norm": 19.302845001220703, "learning_rate": 7.533333333333334e-05, "loss": 1.2656, "step": 566 }, { "epoch": 0.0384172369401721, "grad_norm": 16.28862953186035, "learning_rate": 7.546666666666668e-05, "loss": 1.4779, "step": 567 }, { "epoch": 0.038484992208144185, "grad_norm": 13.972156524658203, "learning_rate": 7.560000000000001e-05, "loss": 1.4775, "step": 568 }, { "epoch": 0.03855274747611627, "grad_norm": 15.443683624267578, "learning_rate": 7.573333333333334e-05, "loss": 1.2671, "step": 569 }, { "epoch": 0.038620502744088354, "grad_norm": 15.203113555908203, "learning_rate": 7.586666666666668e-05, "loss": 1.4209, "step": 570 }, { "epoch": 0.03868825801206044, "grad_norm": 18.516462326049805, "learning_rate": 7.6e-05, "loss": 1.3315, "step": 571 }, { "epoch": 0.038756013280032524, "grad_norm": 13.403026580810547, "learning_rate": 7.613333333333333e-05, "loss": 1.364, "step": 572 }, { "epoch": 0.03882376854800461, "grad_norm": 12.339615821838379, "learning_rate": 7.626666666666667e-05, "loss": 1.2107, "step": 573 }, { "epoch": 0.03889152381597669, "grad_norm": 15.647716522216797, "learning_rate": 7.64e-05, "loss": 1.3165, "step": 574 }, { "epoch": 0.03895927908394878, "grad_norm": 15.854050636291504, "learning_rate": 7.653333333333333e-05, "loss": 1.517, "step": 575 }, { "epoch": 0.03902703435192086, "grad_norm": 11.765929222106934, "learning_rate": 7.666666666666667e-05, "loss": 1.2676, "step": 576 }, { "epoch": 0.03909478961989295, "grad_norm": 12.515352249145508, "learning_rate": 7.680000000000001e-05, "loss": 1.4472, "step": 577 }, { "epoch": 0.03916254488786503, "grad_norm": 11.72417163848877, "learning_rate": 7.693333333333334e-05, "loss": 1.1874, "step": 578 }, { "epoch": 0.03923030015583712, "grad_norm": 15.03148365020752, "learning_rate": 7.706666666666668e-05, "loss": 1.6381, "step": 579 }, { "epoch": 0.0392980554238092, "grad_norm": 14.0188570022583, "learning_rate": 7.72e-05, "loss": 1.4351, "step": 580 }, { "epoch": 0.039365810691781286, "grad_norm": 12.343233108520508, "learning_rate": 7.733333333333333e-05, "loss": 1.2293, "step": 581 }, { "epoch": 0.03943356595975337, "grad_norm": 15.358945846557617, "learning_rate": 7.746666666666666e-05, "loss": 1.307, "step": 582 }, { "epoch": 0.039501321227725456, "grad_norm": 14.131333351135254, "learning_rate": 7.76e-05, "loss": 1.0432, "step": 583 }, { "epoch": 0.03956907649569754, "grad_norm": 16.46926498413086, "learning_rate": 7.773333333333333e-05, "loss": 1.4126, "step": 584 }, { "epoch": 0.039636831763669625, "grad_norm": 15.209906578063965, "learning_rate": 7.786666666666667e-05, "loss": 1.3242, "step": 585 }, { "epoch": 0.03970458703164171, "grad_norm": 15.657282829284668, "learning_rate": 7.800000000000001e-05, "loss": 1.4967, "step": 586 }, { "epoch": 0.039772342299613794, "grad_norm": 11.466882705688477, "learning_rate": 7.813333333333334e-05, "loss": 1.3003, "step": 587 }, { "epoch": 0.03984009756758588, "grad_norm": 19.529300689697266, "learning_rate": 7.826666666666667e-05, "loss": 1.3835, "step": 588 }, { "epoch": 0.039907852835557964, "grad_norm": 17.22064971923828, "learning_rate": 7.840000000000001e-05, "loss": 1.2655, "step": 589 }, { "epoch": 0.03997560810353005, "grad_norm": 14.788103103637695, "learning_rate": 7.853333333333334e-05, "loss": 1.1981, "step": 590 }, { "epoch": 0.04004336337150213, "grad_norm": 13.705521583557129, "learning_rate": 7.866666666666666e-05, "loss": 1.2064, "step": 591 }, { "epoch": 0.04011111863947422, "grad_norm": 14.901930809020996, "learning_rate": 7.88e-05, "loss": 1.2632, "step": 592 }, { "epoch": 0.0401788739074463, "grad_norm": 18.520828247070312, "learning_rate": 7.893333333333333e-05, "loss": 1.4316, "step": 593 }, { "epoch": 0.04024662917541839, "grad_norm": 15.101353645324707, "learning_rate": 7.906666666666667e-05, "loss": 1.3935, "step": 594 }, { "epoch": 0.04031438444339047, "grad_norm": 13.961483001708984, "learning_rate": 7.920000000000001e-05, "loss": 1.1477, "step": 595 }, { "epoch": 0.04038213971136256, "grad_norm": 16.015554428100586, "learning_rate": 7.933333333333334e-05, "loss": 1.5029, "step": 596 }, { "epoch": 0.04044989497933464, "grad_norm": 15.009637832641602, "learning_rate": 7.946666666666667e-05, "loss": 1.3715, "step": 597 }, { "epoch": 0.040517650247306726, "grad_norm": 16.295202255249023, "learning_rate": 7.960000000000001e-05, "loss": 1.5129, "step": 598 }, { "epoch": 0.04058540551527881, "grad_norm": 88.3338623046875, "learning_rate": 7.973333333333334e-05, "loss": 1.2395, "step": 599 }, { "epoch": 0.040653160783250895, "grad_norm": 16.769424438476562, "learning_rate": 7.986666666666667e-05, "loss": 1.4258, "step": 600 }, { "epoch": 0.04072091605122298, "grad_norm": 16.044578552246094, "learning_rate": 8e-05, "loss": 1.365, "step": 601 }, { "epoch": 0.040788671319195065, "grad_norm": 15.282588958740234, "learning_rate": 8.013333333333333e-05, "loss": 1.3212, "step": 602 }, { "epoch": 0.04085642658716715, "grad_norm": 15.275490760803223, "learning_rate": 8.026666666666666e-05, "loss": 1.6048, "step": 603 }, { "epoch": 0.040924181855139234, "grad_norm": 12.952759742736816, "learning_rate": 8.04e-05, "loss": 1.1982, "step": 604 }, { "epoch": 0.04099193712311132, "grad_norm": 16.79343032836914, "learning_rate": 8.053333333333334e-05, "loss": 1.281, "step": 605 }, { "epoch": 0.041059692391083404, "grad_norm": 13.70421314239502, "learning_rate": 8.066666666666667e-05, "loss": 1.2664, "step": 606 }, { "epoch": 0.04112744765905549, "grad_norm": 14.420117378234863, "learning_rate": 8.080000000000001e-05, "loss": 1.2758, "step": 607 }, { "epoch": 0.04119520292702757, "grad_norm": 16.421335220336914, "learning_rate": 8.093333333333334e-05, "loss": 1.3105, "step": 608 }, { "epoch": 0.041262958194999665, "grad_norm": 16.96218490600586, "learning_rate": 8.106666666666667e-05, "loss": 1.4665, "step": 609 }, { "epoch": 0.04133071346297175, "grad_norm": 13.73725700378418, "learning_rate": 8.120000000000001e-05, "loss": 1.2109, "step": 610 }, { "epoch": 0.041398468730943834, "grad_norm": 14.718022346496582, "learning_rate": 8.133333333333334e-05, "loss": 1.2051, "step": 611 }, { "epoch": 0.04146622399891592, "grad_norm": 14.119061470031738, "learning_rate": 8.146666666666666e-05, "loss": 1.0731, "step": 612 }, { "epoch": 0.041533979266888, "grad_norm": 17.90053367614746, "learning_rate": 8.16e-05, "loss": 1.3804, "step": 613 }, { "epoch": 0.04160173453486009, "grad_norm": 12.727055549621582, "learning_rate": 8.173333333333335e-05, "loss": 1.0926, "step": 614 }, { "epoch": 0.04166948980283217, "grad_norm": 14.953054428100586, "learning_rate": 8.186666666666667e-05, "loss": 1.1974, "step": 615 }, { "epoch": 0.04173724507080426, "grad_norm": 16.05322265625, "learning_rate": 8.2e-05, "loss": 1.2526, "step": 616 }, { "epoch": 0.04180500033877634, "grad_norm": 15.028168678283691, "learning_rate": 8.213333333333334e-05, "loss": 1.3593, "step": 617 }, { "epoch": 0.04187275560674843, "grad_norm": 13.127458572387695, "learning_rate": 8.226666666666667e-05, "loss": 1.3981, "step": 618 }, { "epoch": 0.04194051087472051, "grad_norm": 17.323017120361328, "learning_rate": 8.24e-05, "loss": 1.4622, "step": 619 }, { "epoch": 0.042008266142692596, "grad_norm": 16.106731414794922, "learning_rate": 8.253333333333334e-05, "loss": 1.257, "step": 620 }, { "epoch": 0.04207602141066468, "grad_norm": 13.039103507995605, "learning_rate": 8.266666666666667e-05, "loss": 1.2905, "step": 621 }, { "epoch": 0.042143776678636766, "grad_norm": 13.661933898925781, "learning_rate": 8.28e-05, "loss": 1.2779, "step": 622 }, { "epoch": 0.04221153194660885, "grad_norm": 17.325756072998047, "learning_rate": 8.293333333333333e-05, "loss": 1.3934, "step": 623 }, { "epoch": 0.042279287214580935, "grad_norm": 13.898777961730957, "learning_rate": 8.306666666666668e-05, "loss": 1.3735, "step": 624 }, { "epoch": 0.04234704248255302, "grad_norm": 16.787601470947266, "learning_rate": 8.32e-05, "loss": 1.5228, "step": 625 }, { "epoch": 0.042414797750525104, "grad_norm": 13.54299259185791, "learning_rate": 8.333333333333334e-05, "loss": 1.0538, "step": 626 }, { "epoch": 0.04248255301849719, "grad_norm": 14.418194770812988, "learning_rate": 8.346666666666667e-05, "loss": 1.2932, "step": 627 }, { "epoch": 0.042550308286469274, "grad_norm": 13.39255142211914, "learning_rate": 8.36e-05, "loss": 1.1989, "step": 628 }, { "epoch": 0.04261806355444136, "grad_norm": 15.445747375488281, "learning_rate": 8.373333333333334e-05, "loss": 1.3557, "step": 629 }, { "epoch": 0.04268581882241344, "grad_norm": 13.414338111877441, "learning_rate": 8.386666666666667e-05, "loss": 1.1729, "step": 630 }, { "epoch": 0.04275357409038553, "grad_norm": 16.649137496948242, "learning_rate": 8.4e-05, "loss": 1.4212, "step": 631 }, { "epoch": 0.04282132935835761, "grad_norm": 17.583528518676758, "learning_rate": 8.413333333333334e-05, "loss": 1.4838, "step": 632 }, { "epoch": 0.0428890846263297, "grad_norm": 19.32307243347168, "learning_rate": 8.426666666666668e-05, "loss": 1.5958, "step": 633 }, { "epoch": 0.04295683989430178, "grad_norm": 12.703327178955078, "learning_rate": 8.44e-05, "loss": 1.2752, "step": 634 }, { "epoch": 0.04302459516227387, "grad_norm": 15.72768497467041, "learning_rate": 8.453333333333335e-05, "loss": 1.4059, "step": 635 }, { "epoch": 0.04309235043024595, "grad_norm": 13.532344818115234, "learning_rate": 8.466666666666667e-05, "loss": 1.2252, "step": 636 }, { "epoch": 0.043160105698218036, "grad_norm": 13.305481910705566, "learning_rate": 8.48e-05, "loss": 1.5084, "step": 637 }, { "epoch": 0.04322786096619012, "grad_norm": 11.986043930053711, "learning_rate": 8.493333333333334e-05, "loss": 1.2006, "step": 638 }, { "epoch": 0.043295616234162206, "grad_norm": 13.28003978729248, "learning_rate": 8.506666666666667e-05, "loss": 1.2442, "step": 639 }, { "epoch": 0.04336337150213429, "grad_norm": 15.835545539855957, "learning_rate": 8.52e-05, "loss": 1.2466, "step": 640 }, { "epoch": 0.043431126770106375, "grad_norm": 14.887584686279297, "learning_rate": 8.533333333333334e-05, "loss": 1.3828, "step": 641 }, { "epoch": 0.04349888203807846, "grad_norm": 14.740251541137695, "learning_rate": 8.546666666666667e-05, "loss": 1.4847, "step": 642 }, { "epoch": 0.043566637306050544, "grad_norm": 14.109213829040527, "learning_rate": 8.560000000000001e-05, "loss": 1.2251, "step": 643 }, { "epoch": 0.04363439257402263, "grad_norm": 17.33670425415039, "learning_rate": 8.573333333333333e-05, "loss": 1.3817, "step": 644 }, { "epoch": 0.043702147841994714, "grad_norm": 16.92241096496582, "learning_rate": 8.586666666666668e-05, "loss": 1.458, "step": 645 }, { "epoch": 0.0437699031099668, "grad_norm": 15.369187355041504, "learning_rate": 8.6e-05, "loss": 1.28, "step": 646 }, { "epoch": 0.04383765837793888, "grad_norm": 18.748065948486328, "learning_rate": 8.613333333333333e-05, "loss": 1.6275, "step": 647 }, { "epoch": 0.04390541364591097, "grad_norm": 15.650605201721191, "learning_rate": 8.626666666666667e-05, "loss": 1.3742, "step": 648 }, { "epoch": 0.04397316891388305, "grad_norm": 16.170730590820312, "learning_rate": 8.64e-05, "loss": 1.174, "step": 649 }, { "epoch": 0.04404092418185514, "grad_norm": 12.61952018737793, "learning_rate": 8.653333333333333e-05, "loss": 1.2178, "step": 650 }, { "epoch": 0.04410867944982722, "grad_norm": 18.86512565612793, "learning_rate": 8.666666666666667e-05, "loss": 1.4993, "step": 651 }, { "epoch": 0.04417643471779931, "grad_norm": 16.412899017333984, "learning_rate": 8.680000000000001e-05, "loss": 1.2663, "step": 652 }, { "epoch": 0.04424418998577139, "grad_norm": 14.11953353881836, "learning_rate": 8.693333333333334e-05, "loss": 1.3772, "step": 653 }, { "epoch": 0.044311945253743476, "grad_norm": 17.232633590698242, "learning_rate": 8.706666666666668e-05, "loss": 1.3252, "step": 654 }, { "epoch": 0.04437970052171556, "grad_norm": 13.833864212036133, "learning_rate": 8.72e-05, "loss": 1.279, "step": 655 }, { "epoch": 0.044447455789687645, "grad_norm": 11.546829223632812, "learning_rate": 8.733333333333333e-05, "loss": 1.382, "step": 656 }, { "epoch": 0.04451521105765973, "grad_norm": 13.163644790649414, "learning_rate": 8.746666666666667e-05, "loss": 1.2253, "step": 657 }, { "epoch": 0.044582966325631815, "grad_norm": 17.037311553955078, "learning_rate": 8.76e-05, "loss": 1.2727, "step": 658 }, { "epoch": 0.0446507215936039, "grad_norm": 14.610177040100098, "learning_rate": 8.773333333333333e-05, "loss": 1.3082, "step": 659 }, { "epoch": 0.044718476861575984, "grad_norm": 16.305557250976562, "learning_rate": 8.786666666666667e-05, "loss": 1.3948, "step": 660 }, { "epoch": 0.04478623212954807, "grad_norm": 13.207799911499023, "learning_rate": 8.800000000000001e-05, "loss": 1.2764, "step": 661 }, { "epoch": 0.04485398739752016, "grad_norm": 11.451075553894043, "learning_rate": 8.813333333333334e-05, "loss": 1.0831, "step": 662 }, { "epoch": 0.044921742665492245, "grad_norm": 13.555370330810547, "learning_rate": 8.826666666666668e-05, "loss": 1.4996, "step": 663 }, { "epoch": 0.04498949793346433, "grad_norm": 13.544769287109375, "learning_rate": 8.840000000000001e-05, "loss": 1.2277, "step": 664 }, { "epoch": 0.045057253201436415, "grad_norm": 18.05879783630371, "learning_rate": 8.853333333333333e-05, "loss": 1.3964, "step": 665 }, { "epoch": 0.0451250084694085, "grad_norm": 17.309839248657227, "learning_rate": 8.866666666666668e-05, "loss": 1.6233, "step": 666 }, { "epoch": 0.045192763737380584, "grad_norm": 12.732510566711426, "learning_rate": 8.88e-05, "loss": 1.3237, "step": 667 }, { "epoch": 0.04526051900535267, "grad_norm": 13.541101455688477, "learning_rate": 8.893333333333333e-05, "loss": 1.2523, "step": 668 }, { "epoch": 0.04532827427332475, "grad_norm": 17.54905891418457, "learning_rate": 8.906666666666667e-05, "loss": 1.5824, "step": 669 }, { "epoch": 0.04539602954129684, "grad_norm": 12.52578353881836, "learning_rate": 8.92e-05, "loss": 1.2966, "step": 670 }, { "epoch": 0.04546378480926892, "grad_norm": 13.279097557067871, "learning_rate": 8.933333333333334e-05, "loss": 1.4898, "step": 671 }, { "epoch": 0.04553154007724101, "grad_norm": 15.892850875854492, "learning_rate": 8.946666666666668e-05, "loss": 1.3757, "step": 672 }, { "epoch": 0.04559929534521309, "grad_norm": 14.108098983764648, "learning_rate": 8.960000000000001e-05, "loss": 1.1758, "step": 673 }, { "epoch": 0.04566705061318518, "grad_norm": 17.15204429626465, "learning_rate": 8.973333333333334e-05, "loss": 1.3876, "step": 674 }, { "epoch": 0.04573480588115726, "grad_norm": 14.453113555908203, "learning_rate": 8.986666666666666e-05, "loss": 1.2152, "step": 675 }, { "epoch": 0.045802561149129346, "grad_norm": 17.9672794342041, "learning_rate": 9e-05, "loss": 1.2962, "step": 676 }, { "epoch": 0.04587031641710143, "grad_norm": 19.810890197753906, "learning_rate": 9.013333333333333e-05, "loss": 1.3374, "step": 677 }, { "epoch": 0.045938071685073516, "grad_norm": 16.13353729248047, "learning_rate": 9.026666666666666e-05, "loss": 1.4125, "step": 678 }, { "epoch": 0.0460058269530456, "grad_norm": 15.257608413696289, "learning_rate": 9.04e-05, "loss": 1.1931, "step": 679 }, { "epoch": 0.046073582221017685, "grad_norm": 16.88699722290039, "learning_rate": 9.053333333333334e-05, "loss": 1.362, "step": 680 }, { "epoch": 0.04614133748898977, "grad_norm": 15.46777057647705, "learning_rate": 9.066666666666667e-05, "loss": 1.3522, "step": 681 }, { "epoch": 0.046209092756961855, "grad_norm": 13.584056854248047, "learning_rate": 9.080000000000001e-05, "loss": 1.1998, "step": 682 }, { "epoch": 0.04627684802493394, "grad_norm": 14.226449966430664, "learning_rate": 9.093333333333334e-05, "loss": 1.3312, "step": 683 }, { "epoch": 0.046344603292906024, "grad_norm": 15.157097816467285, "learning_rate": 9.106666666666667e-05, "loss": 1.3422, "step": 684 }, { "epoch": 0.04641235856087811, "grad_norm": 14.748275756835938, "learning_rate": 9.120000000000001e-05, "loss": 1.2902, "step": 685 }, { "epoch": 0.04648011382885019, "grad_norm": 13.730619430541992, "learning_rate": 9.133333333333334e-05, "loss": 1.3034, "step": 686 }, { "epoch": 0.04654786909682228, "grad_norm": 12.804062843322754, "learning_rate": 9.146666666666666e-05, "loss": 1.1484, "step": 687 }, { "epoch": 0.04661562436479436, "grad_norm": 16.587923049926758, "learning_rate": 9.16e-05, "loss": 1.4875, "step": 688 }, { "epoch": 0.04668337963276645, "grad_norm": 12.228131294250488, "learning_rate": 9.173333333333333e-05, "loss": 0.917, "step": 689 }, { "epoch": 0.04675113490073853, "grad_norm": 18.680187225341797, "learning_rate": 9.186666666666667e-05, "loss": 1.497, "step": 690 }, { "epoch": 0.04681889016871062, "grad_norm": 14.80630111694336, "learning_rate": 9.200000000000001e-05, "loss": 1.5623, "step": 691 }, { "epoch": 0.0468866454366827, "grad_norm": 13.754642486572266, "learning_rate": 9.213333333333334e-05, "loss": 1.3921, "step": 692 }, { "epoch": 0.046954400704654786, "grad_norm": 14.264236450195312, "learning_rate": 9.226666666666667e-05, "loss": 1.3615, "step": 693 }, { "epoch": 0.04702215597262687, "grad_norm": 15.883113861083984, "learning_rate": 9.240000000000001e-05, "loss": 1.4528, "step": 694 }, { "epoch": 0.047089911240598956, "grad_norm": 13.528610229492188, "learning_rate": 9.253333333333334e-05, "loss": 1.0995, "step": 695 }, { "epoch": 0.04715766650857104, "grad_norm": 15.945343971252441, "learning_rate": 9.266666666666666e-05, "loss": 1.272, "step": 696 }, { "epoch": 0.047225421776543125, "grad_norm": 14.378050804138184, "learning_rate": 9.28e-05, "loss": 1.3545, "step": 697 }, { "epoch": 0.04729317704451521, "grad_norm": 13.457077980041504, "learning_rate": 9.293333333333333e-05, "loss": 1.1751, "step": 698 }, { "epoch": 0.047360932312487294, "grad_norm": 19.285078048706055, "learning_rate": 9.306666666666667e-05, "loss": 1.4383, "step": 699 }, { "epoch": 0.04742868758045938, "grad_norm": 16.683856964111328, "learning_rate": 9.320000000000002e-05, "loss": 1.2499, "step": 700 }, { "epoch": 0.047496442848431464, "grad_norm": 13.818337440490723, "learning_rate": 9.333333333333334e-05, "loss": 1.2325, "step": 701 }, { "epoch": 0.04756419811640355, "grad_norm": 12.51142406463623, "learning_rate": 9.346666666666667e-05, "loss": 1.3317, "step": 702 }, { "epoch": 0.04763195338437563, "grad_norm": 14.697171211242676, "learning_rate": 9.360000000000001e-05, "loss": 1.6055, "step": 703 }, { "epoch": 0.04769970865234772, "grad_norm": 16.942562103271484, "learning_rate": 9.373333333333334e-05, "loss": 1.4279, "step": 704 }, { "epoch": 0.0477674639203198, "grad_norm": 16.739248275756836, "learning_rate": 9.386666666666667e-05, "loss": 1.2285, "step": 705 }, { "epoch": 0.04783521918829189, "grad_norm": 14.479548454284668, "learning_rate": 9.4e-05, "loss": 1.1984, "step": 706 }, { "epoch": 0.04790297445626397, "grad_norm": 14.384824752807617, "learning_rate": 9.413333333333334e-05, "loss": 1.2128, "step": 707 }, { "epoch": 0.04797072972423606, "grad_norm": 15.41884994506836, "learning_rate": 9.426666666666666e-05, "loss": 1.1627, "step": 708 }, { "epoch": 0.04803848499220814, "grad_norm": 13.267902374267578, "learning_rate": 9.44e-05, "loss": 1.2916, "step": 709 }, { "epoch": 0.048106240260180226, "grad_norm": 14.909900665283203, "learning_rate": 9.453333333333335e-05, "loss": 1.1579, "step": 710 }, { "epoch": 0.04817399552815231, "grad_norm": 12.639839172363281, "learning_rate": 9.466666666666667e-05, "loss": 1.2053, "step": 711 }, { "epoch": 0.048241750796124395, "grad_norm": 14.957659721374512, "learning_rate": 9.48e-05, "loss": 1.4556, "step": 712 }, { "epoch": 0.04830950606409648, "grad_norm": 14.008201599121094, "learning_rate": 9.493333333333334e-05, "loss": 1.1575, "step": 713 }, { "epoch": 0.048377261332068565, "grad_norm": 13.857501983642578, "learning_rate": 9.506666666666667e-05, "loss": 1.4121, "step": 714 }, { "epoch": 0.048445016600040657, "grad_norm": 13.867570877075195, "learning_rate": 9.52e-05, "loss": 1.4908, "step": 715 }, { "epoch": 0.04851277186801274, "grad_norm": 14.3545503616333, "learning_rate": 9.533333333333334e-05, "loss": 1.2939, "step": 716 }, { "epoch": 0.048580527135984826, "grad_norm": 15.173371315002441, "learning_rate": 9.546666666666667e-05, "loss": 1.3912, "step": 717 }, { "epoch": 0.04864828240395691, "grad_norm": 13.799453735351562, "learning_rate": 9.56e-05, "loss": 1.4219, "step": 718 }, { "epoch": 0.048716037671928995, "grad_norm": 18.264965057373047, "learning_rate": 9.573333333333335e-05, "loss": 1.4967, "step": 719 }, { "epoch": 0.04878379293990108, "grad_norm": 13.809319496154785, "learning_rate": 9.586666666666667e-05, "loss": 1.3527, "step": 720 }, { "epoch": 0.048851548207873165, "grad_norm": 13.931517601013184, "learning_rate": 9.6e-05, "loss": 1.4018, "step": 721 }, { "epoch": 0.04891930347584525, "grad_norm": 11.74387264251709, "learning_rate": 9.613333333333334e-05, "loss": 1.1981, "step": 722 }, { "epoch": 0.048987058743817334, "grad_norm": 14.960589408874512, "learning_rate": 9.626666666666667e-05, "loss": 1.3373, "step": 723 }, { "epoch": 0.04905481401178942, "grad_norm": 14.170279502868652, "learning_rate": 9.64e-05, "loss": 1.2702, "step": 724 }, { "epoch": 0.049122569279761504, "grad_norm": 15.532767295837402, "learning_rate": 9.653333333333334e-05, "loss": 1.1329, "step": 725 }, { "epoch": 0.04919032454773359, "grad_norm": 12.141284942626953, "learning_rate": 9.666666666666667e-05, "loss": 1.2694, "step": 726 }, { "epoch": 0.04925807981570567, "grad_norm": 12.379233360290527, "learning_rate": 9.680000000000001e-05, "loss": 1.1545, "step": 727 }, { "epoch": 0.04932583508367776, "grad_norm": 14.45605182647705, "learning_rate": 9.693333333333335e-05, "loss": 1.4151, "step": 728 }, { "epoch": 0.04939359035164984, "grad_norm": 15.656912803649902, "learning_rate": 9.706666666666668e-05, "loss": 1.3782, "step": 729 }, { "epoch": 0.04946134561962193, "grad_norm": 13.992748260498047, "learning_rate": 9.72e-05, "loss": 1.2731, "step": 730 }, { "epoch": 0.04952910088759401, "grad_norm": 16.253517150878906, "learning_rate": 9.733333333333335e-05, "loss": 1.4503, "step": 731 }, { "epoch": 0.049596856155566096, "grad_norm": 14.88161563873291, "learning_rate": 9.746666666666667e-05, "loss": 1.3909, "step": 732 }, { "epoch": 0.04966461142353818, "grad_norm": 14.60545539855957, "learning_rate": 9.76e-05, "loss": 1.1803, "step": 733 }, { "epoch": 0.049732366691510266, "grad_norm": 14.589803695678711, "learning_rate": 9.773333333333334e-05, "loss": 1.3844, "step": 734 }, { "epoch": 0.04980012195948235, "grad_norm": 14.781076431274414, "learning_rate": 9.786666666666667e-05, "loss": 1.2706, "step": 735 }, { "epoch": 0.049867877227454435, "grad_norm": 13.717072486877441, "learning_rate": 9.8e-05, "loss": 1.2311, "step": 736 }, { "epoch": 0.04993563249542652, "grad_norm": 17.14280891418457, "learning_rate": 9.813333333333334e-05, "loss": 1.3423, "step": 737 }, { "epoch": 0.050003387763398605, "grad_norm": 14.521373748779297, "learning_rate": 9.826666666666668e-05, "loss": 1.3118, "step": 738 }, { "epoch": 0.05007114303137069, "grad_norm": 14.79515266418457, "learning_rate": 9.84e-05, "loss": 1.3622, "step": 739 }, { "epoch": 0.050138898299342774, "grad_norm": 13.63962459564209, "learning_rate": 9.853333333333333e-05, "loss": 1.3856, "step": 740 }, { "epoch": 0.05020665356731486, "grad_norm": 13.788541793823242, "learning_rate": 9.866666666666668e-05, "loss": 1.3439, "step": 741 }, { "epoch": 0.05027440883528694, "grad_norm": 14.015862464904785, "learning_rate": 9.88e-05, "loss": 1.3263, "step": 742 }, { "epoch": 0.05034216410325903, "grad_norm": 15.407167434692383, "learning_rate": 9.893333333333333e-05, "loss": 1.5115, "step": 743 }, { "epoch": 0.05040991937123111, "grad_norm": 14.542003631591797, "learning_rate": 9.906666666666667e-05, "loss": 1.3934, "step": 744 }, { "epoch": 0.0504776746392032, "grad_norm": 16.733686447143555, "learning_rate": 9.92e-05, "loss": 1.2098, "step": 745 }, { "epoch": 0.05054542990717528, "grad_norm": 14.979568481445312, "learning_rate": 9.933333333333334e-05, "loss": 1.3234, "step": 746 }, { "epoch": 0.05061318517514737, "grad_norm": 13.699801445007324, "learning_rate": 9.946666666666668e-05, "loss": 1.3173, "step": 747 }, { "epoch": 0.05068094044311945, "grad_norm": 13.262088775634766, "learning_rate": 9.960000000000001e-05, "loss": 1.3459, "step": 748 }, { "epoch": 0.050748695711091536, "grad_norm": 14.694448471069336, "learning_rate": 9.973333333333334e-05, "loss": 1.3639, "step": 749 }, { "epoch": 0.05081645097906362, "grad_norm": 13.999008178710938, "learning_rate": 9.986666666666668e-05, "loss": 1.4275, "step": 750 }, { "epoch": 0.050884206247035706, "grad_norm": 11.588037490844727, "learning_rate": 0.0001, "loss": 1.2866, "step": 751 }, { "epoch": 0.05095196151500779, "grad_norm": 16.174110412597656, "learning_rate": 9.999863098090219e-05, "loss": 1.5338, "step": 752 }, { "epoch": 0.051019716782979875, "grad_norm": 17.8669376373291, "learning_rate": 9.999726196180437e-05, "loss": 1.3247, "step": 753 }, { "epoch": 0.05108747205095196, "grad_norm": 13.849075317382812, "learning_rate": 9.999589294270656e-05, "loss": 1.1817, "step": 754 }, { "epoch": 0.051155227318924044, "grad_norm": 14.888331413269043, "learning_rate": 9.999452392360874e-05, "loss": 1.4019, "step": 755 }, { "epoch": 0.05122298258689613, "grad_norm": 18.68206024169922, "learning_rate": 9.999315490451092e-05, "loss": 1.7115, "step": 756 }, { "epoch": 0.051290737854868214, "grad_norm": 14.762079238891602, "learning_rate": 9.99917858854131e-05, "loss": 1.2553, "step": 757 }, { "epoch": 0.0513584931228403, "grad_norm": 14.649972915649414, "learning_rate": 9.99904168663153e-05, "loss": 1.0807, "step": 758 }, { "epoch": 0.05142624839081238, "grad_norm": 13.172977447509766, "learning_rate": 9.998904784721747e-05, "loss": 1.4966, "step": 759 }, { "epoch": 0.05149400365878447, "grad_norm": 14.144796371459961, "learning_rate": 9.998767882811965e-05, "loss": 1.234, "step": 760 }, { "epoch": 0.05156175892675655, "grad_norm": 17.565507888793945, "learning_rate": 9.998630980902184e-05, "loss": 1.4061, "step": 761 }, { "epoch": 0.05162951419472864, "grad_norm": 17.205589294433594, "learning_rate": 9.998494078992402e-05, "loss": 1.6626, "step": 762 }, { "epoch": 0.05169726946270072, "grad_norm": 16.14542007446289, "learning_rate": 9.998357177082621e-05, "loss": 1.3145, "step": 763 }, { "epoch": 0.05176502473067281, "grad_norm": 14.692976951599121, "learning_rate": 9.998220275172839e-05, "loss": 1.2085, "step": 764 }, { "epoch": 0.05183277999864489, "grad_norm": 12.572774887084961, "learning_rate": 9.998083373263057e-05, "loss": 1.3743, "step": 765 }, { "epoch": 0.051900535266616976, "grad_norm": 15.904753684997559, "learning_rate": 9.997946471353275e-05, "loss": 1.3863, "step": 766 }, { "epoch": 0.05196829053458906, "grad_norm": 14.471494674682617, "learning_rate": 9.997809569443493e-05, "loss": 1.3615, "step": 767 }, { "epoch": 0.05203604580256115, "grad_norm": 13.82172966003418, "learning_rate": 9.997672667533712e-05, "loss": 1.2104, "step": 768 }, { "epoch": 0.05210380107053324, "grad_norm": 14.648073196411133, "learning_rate": 9.99753576562393e-05, "loss": 1.1166, "step": 769 }, { "epoch": 0.05217155633850532, "grad_norm": 16.776750564575195, "learning_rate": 9.997398863714149e-05, "loss": 1.4242, "step": 770 }, { "epoch": 0.05223931160647741, "grad_norm": 13.999717712402344, "learning_rate": 9.997261961804367e-05, "loss": 1.2493, "step": 771 }, { "epoch": 0.05230706687444949, "grad_norm": 13.238443374633789, "learning_rate": 9.997125059894586e-05, "loss": 1.0469, "step": 772 }, { "epoch": 0.052374822142421576, "grad_norm": 12.79283332824707, "learning_rate": 9.996988157984804e-05, "loss": 1.1975, "step": 773 }, { "epoch": 0.05244257741039366, "grad_norm": 15.577791213989258, "learning_rate": 9.996851256075022e-05, "loss": 1.4972, "step": 774 }, { "epoch": 0.052510332678365745, "grad_norm": 16.215747833251953, "learning_rate": 9.996714354165241e-05, "loss": 1.4285, "step": 775 }, { "epoch": 0.05257808794633783, "grad_norm": 14.68825626373291, "learning_rate": 9.99657745225546e-05, "loss": 1.2028, "step": 776 }, { "epoch": 0.052645843214309915, "grad_norm": 11.535130500793457, "learning_rate": 9.996440550345677e-05, "loss": 1.2466, "step": 777 }, { "epoch": 0.052713598482282, "grad_norm": 14.865918159484863, "learning_rate": 9.996303648435897e-05, "loss": 1.3903, "step": 778 }, { "epoch": 0.052781353750254084, "grad_norm": 13.626388549804688, "learning_rate": 9.996166746526115e-05, "loss": 1.5446, "step": 779 }, { "epoch": 0.05284910901822617, "grad_norm": 12.957473754882812, "learning_rate": 9.996029844616333e-05, "loss": 1.1641, "step": 780 }, { "epoch": 0.052916864286198254, "grad_norm": 14.928487777709961, "learning_rate": 9.995892942706552e-05, "loss": 1.0929, "step": 781 }, { "epoch": 0.05298461955417034, "grad_norm": 13.592952728271484, "learning_rate": 9.99575604079677e-05, "loss": 1.4684, "step": 782 }, { "epoch": 0.05305237482214242, "grad_norm": 13.836453437805176, "learning_rate": 9.995619138886988e-05, "loss": 1.0128, "step": 783 }, { "epoch": 0.05312013009011451, "grad_norm": 16.654394149780273, "learning_rate": 9.995482236977206e-05, "loss": 1.2464, "step": 784 }, { "epoch": 0.05318788535808659, "grad_norm": 14.4434232711792, "learning_rate": 9.995345335067424e-05, "loss": 1.3649, "step": 785 }, { "epoch": 0.05325564062605868, "grad_norm": 14.15592098236084, "learning_rate": 9.995208433157644e-05, "loss": 1.4426, "step": 786 }, { "epoch": 0.05332339589403076, "grad_norm": 12.97901439666748, "learning_rate": 9.995071531247862e-05, "loss": 1.227, "step": 787 }, { "epoch": 0.053391151162002846, "grad_norm": 13.167902946472168, "learning_rate": 9.99493462933808e-05, "loss": 1.2432, "step": 788 }, { "epoch": 0.05345890642997493, "grad_norm": 15.92297077178955, "learning_rate": 9.994797727428298e-05, "loss": 1.3345, "step": 789 }, { "epoch": 0.053526661697947016, "grad_norm": 13.30277156829834, "learning_rate": 9.994660825518517e-05, "loss": 1.2361, "step": 790 }, { "epoch": 0.0535944169659191, "grad_norm": 15.98779582977295, "learning_rate": 9.994523923608735e-05, "loss": 1.1202, "step": 791 }, { "epoch": 0.053662172233891185, "grad_norm": 16.414382934570312, "learning_rate": 9.994387021698953e-05, "loss": 1.448, "step": 792 }, { "epoch": 0.05372992750186327, "grad_norm": 14.491677284240723, "learning_rate": 9.994250119789171e-05, "loss": 1.0862, "step": 793 }, { "epoch": 0.053797682769835355, "grad_norm": 13.155410766601562, "learning_rate": 9.99411321787939e-05, "loss": 1.312, "step": 794 }, { "epoch": 0.05386543803780744, "grad_norm": 16.11139488220215, "learning_rate": 9.993976315969609e-05, "loss": 1.2043, "step": 795 }, { "epoch": 0.053933193305779524, "grad_norm": 14.259698867797852, "learning_rate": 9.993839414059827e-05, "loss": 1.2256, "step": 796 }, { "epoch": 0.05400094857375161, "grad_norm": 15.751099586486816, "learning_rate": 9.993702512150045e-05, "loss": 1.0732, "step": 797 }, { "epoch": 0.05406870384172369, "grad_norm": 12.766170501708984, "learning_rate": 9.993565610240263e-05, "loss": 1.0362, "step": 798 }, { "epoch": 0.05413645910969578, "grad_norm": 14.61483097076416, "learning_rate": 9.993428708330481e-05, "loss": 1.3107, "step": 799 }, { "epoch": 0.05420421437766786, "grad_norm": 14.571990013122559, "learning_rate": 9.9932918064207e-05, "loss": 1.3616, "step": 800 }, { "epoch": 0.05427196964563995, "grad_norm": 14.69124984741211, "learning_rate": 9.993154904510918e-05, "loss": 1.4273, "step": 801 }, { "epoch": 0.05433972491361203, "grad_norm": 13.834383010864258, "learning_rate": 9.993018002601136e-05, "loss": 1.2886, "step": 802 }, { "epoch": 0.05440748018158412, "grad_norm": 14.178943634033203, "learning_rate": 9.992881100691355e-05, "loss": 1.44, "step": 803 }, { "epoch": 0.0544752354495562, "grad_norm": 13.185267448425293, "learning_rate": 9.992744198781574e-05, "loss": 1.5609, "step": 804 }, { "epoch": 0.054542990717528286, "grad_norm": 15.211031913757324, "learning_rate": 9.992607296871792e-05, "loss": 1.8873, "step": 805 }, { "epoch": 0.05461074598550037, "grad_norm": 11.855447769165039, "learning_rate": 9.99247039496201e-05, "loss": 1.4063, "step": 806 }, { "epoch": 0.054678501253472456, "grad_norm": 14.352787017822266, "learning_rate": 9.992333493052228e-05, "loss": 1.56, "step": 807 }, { "epoch": 0.05474625652144454, "grad_norm": 14.339908599853516, "learning_rate": 9.992196591142446e-05, "loss": 1.1985, "step": 808 }, { "epoch": 0.054814011789416625, "grad_norm": 12.696648597717285, "learning_rate": 9.992059689232665e-05, "loss": 1.1807, "step": 809 }, { "epoch": 0.05488176705738871, "grad_norm": 14.083525657653809, "learning_rate": 9.991922787322883e-05, "loss": 1.4012, "step": 810 }, { "epoch": 0.054949522325360795, "grad_norm": 14.298514366149902, "learning_rate": 9.991785885413101e-05, "loss": 1.4047, "step": 811 }, { "epoch": 0.05501727759333288, "grad_norm": 13.391324996948242, "learning_rate": 9.99164898350332e-05, "loss": 1.2292, "step": 812 }, { "epoch": 0.055085032861304964, "grad_norm": 14.0011625289917, "learning_rate": 9.991512081593539e-05, "loss": 1.3194, "step": 813 }, { "epoch": 0.05515278812927705, "grad_norm": 14.726574897766113, "learning_rate": 9.991375179683757e-05, "loss": 1.3245, "step": 814 }, { "epoch": 0.05522054339724913, "grad_norm": 14.615242004394531, "learning_rate": 9.991238277773975e-05, "loss": 1.3704, "step": 815 }, { "epoch": 0.05528829866522122, "grad_norm": 11.06546688079834, "learning_rate": 9.991101375864193e-05, "loss": 1.0841, "step": 816 }, { "epoch": 0.0553560539331933, "grad_norm": 13.768633842468262, "learning_rate": 9.990964473954411e-05, "loss": 1.2632, "step": 817 }, { "epoch": 0.05542380920116539, "grad_norm": 15.054973602294922, "learning_rate": 9.99082757204463e-05, "loss": 1.2931, "step": 818 }, { "epoch": 0.05549156446913747, "grad_norm": 15.27096176147461, "learning_rate": 9.990690670134848e-05, "loss": 1.1789, "step": 819 }, { "epoch": 0.055559319737109564, "grad_norm": 13.228281021118164, "learning_rate": 9.990553768225067e-05, "loss": 1.4458, "step": 820 }, { "epoch": 0.05562707500508165, "grad_norm": 16.016782760620117, "learning_rate": 9.990416866315286e-05, "loss": 1.5121, "step": 821 }, { "epoch": 0.05569483027305373, "grad_norm": 14.15912914276123, "learning_rate": 9.990279964405504e-05, "loss": 1.3702, "step": 822 }, { "epoch": 0.05576258554102582, "grad_norm": 14.61017894744873, "learning_rate": 9.990143062495722e-05, "loss": 1.2774, "step": 823 }, { "epoch": 0.0558303408089979, "grad_norm": 13.241140365600586, "learning_rate": 9.990006160585941e-05, "loss": 1.3857, "step": 824 }, { "epoch": 0.05589809607696999, "grad_norm": 11.815064430236816, "learning_rate": 9.98986925867616e-05, "loss": 1.1053, "step": 825 }, { "epoch": 0.05596585134494207, "grad_norm": 13.179222106933594, "learning_rate": 9.989732356766377e-05, "loss": 1.4686, "step": 826 }, { "epoch": 0.05603360661291416, "grad_norm": 11.888179779052734, "learning_rate": 9.989595454856597e-05, "loss": 1.3191, "step": 827 }, { "epoch": 0.05610136188088624, "grad_norm": 15.43813419342041, "learning_rate": 9.989458552946815e-05, "loss": 0.9732, "step": 828 }, { "epoch": 0.056169117148858326, "grad_norm": 20.0246524810791, "learning_rate": 9.989321651037033e-05, "loss": 1.5398, "step": 829 }, { "epoch": 0.05623687241683041, "grad_norm": 14.247052192687988, "learning_rate": 9.989184749127251e-05, "loss": 1.5093, "step": 830 }, { "epoch": 0.056304627684802495, "grad_norm": 15.63775634765625, "learning_rate": 9.989047847217469e-05, "loss": 1.3488, "step": 831 }, { "epoch": 0.05637238295277458, "grad_norm": 10.582650184631348, "learning_rate": 9.988910945307688e-05, "loss": 1.3888, "step": 832 }, { "epoch": 0.056440138220746665, "grad_norm": 12.344864845275879, "learning_rate": 9.988774043397906e-05, "loss": 1.2876, "step": 833 }, { "epoch": 0.05650789348871875, "grad_norm": 13.95814323425293, "learning_rate": 9.988637141488124e-05, "loss": 1.2648, "step": 834 }, { "epoch": 0.056575648756690834, "grad_norm": 9.740105628967285, "learning_rate": 9.988500239578342e-05, "loss": 0.9777, "step": 835 }, { "epoch": 0.05664340402466292, "grad_norm": 16.16588020324707, "learning_rate": 9.988363337668562e-05, "loss": 1.5859, "step": 836 }, { "epoch": 0.056711159292635004, "grad_norm": 14.394731521606445, "learning_rate": 9.98822643575878e-05, "loss": 1.3395, "step": 837 }, { "epoch": 0.05677891456060709, "grad_norm": 11.256571769714355, "learning_rate": 9.988089533848998e-05, "loss": 1.0528, "step": 838 }, { "epoch": 0.05684666982857917, "grad_norm": 17.595510482788086, "learning_rate": 9.987952631939216e-05, "loss": 1.2678, "step": 839 }, { "epoch": 0.05691442509655126, "grad_norm": 14.132645606994629, "learning_rate": 9.987815730029434e-05, "loss": 1.4129, "step": 840 }, { "epoch": 0.05698218036452334, "grad_norm": 14.438119888305664, "learning_rate": 9.987678828119653e-05, "loss": 1.4141, "step": 841 }, { "epoch": 0.05704993563249543, "grad_norm": 15.983094215393066, "learning_rate": 9.987541926209871e-05, "loss": 1.1959, "step": 842 }, { "epoch": 0.05711769090046751, "grad_norm": 17.300403594970703, "learning_rate": 9.98740502430009e-05, "loss": 1.1613, "step": 843 }, { "epoch": 0.057185446168439596, "grad_norm": 15.150660514831543, "learning_rate": 9.987268122390307e-05, "loss": 1.2253, "step": 844 }, { "epoch": 0.05725320143641168, "grad_norm": 14.234586715698242, "learning_rate": 9.987131220480525e-05, "loss": 1.2517, "step": 845 }, { "epoch": 0.057320956704383766, "grad_norm": 18.31337547302246, "learning_rate": 9.986994318570745e-05, "loss": 1.7506, "step": 846 }, { "epoch": 0.05738871197235585, "grad_norm": 14.818669319152832, "learning_rate": 9.986857416660963e-05, "loss": 1.358, "step": 847 }, { "epoch": 0.057456467240327935, "grad_norm": 14.640913009643555, "learning_rate": 9.986720514751181e-05, "loss": 1.4481, "step": 848 }, { "epoch": 0.05752422250830002, "grad_norm": 12.277986526489258, "learning_rate": 9.986583612841399e-05, "loss": 1.3428, "step": 849 }, { "epoch": 0.057591977776272105, "grad_norm": 13.817851066589355, "learning_rate": 9.986446710931618e-05, "loss": 1.3514, "step": 850 }, { "epoch": 0.05765973304424419, "grad_norm": 13.706515312194824, "learning_rate": 9.986309809021836e-05, "loss": 1.4295, "step": 851 }, { "epoch": 0.057727488312216274, "grad_norm": 16.649917602539062, "learning_rate": 9.986172907112054e-05, "loss": 1.3895, "step": 852 }, { "epoch": 0.05779524358018836, "grad_norm": 13.659167289733887, "learning_rate": 9.986036005202272e-05, "loss": 1.1545, "step": 853 }, { "epoch": 0.057862998848160443, "grad_norm": 11.264912605285645, "learning_rate": 9.98589910329249e-05, "loss": 1.0766, "step": 854 }, { "epoch": 0.05793075411613253, "grad_norm": 12.930856704711914, "learning_rate": 9.98576220138271e-05, "loss": 1.1665, "step": 855 }, { "epoch": 0.05799850938410461, "grad_norm": 15.999971389770508, "learning_rate": 9.985625299472928e-05, "loss": 1.4595, "step": 856 }, { "epoch": 0.0580662646520767, "grad_norm": 14.566671371459961, "learning_rate": 9.985488397563146e-05, "loss": 1.283, "step": 857 }, { "epoch": 0.05813401992004878, "grad_norm": 16.106964111328125, "learning_rate": 9.985351495653364e-05, "loss": 1.2842, "step": 858 }, { "epoch": 0.05820177518802087, "grad_norm": 15.47492790222168, "learning_rate": 9.985214593743583e-05, "loss": 1.3949, "step": 859 }, { "epoch": 0.05826953045599295, "grad_norm": 10.692886352539062, "learning_rate": 9.985077691833801e-05, "loss": 1.2896, "step": 860 }, { "epoch": 0.058337285723965036, "grad_norm": 14.13198184967041, "learning_rate": 9.98494078992402e-05, "loss": 1.1566, "step": 861 }, { "epoch": 0.05840504099193712, "grad_norm": 14.455452919006348, "learning_rate": 9.984803888014237e-05, "loss": 1.1949, "step": 862 }, { "epoch": 0.058472796259909206, "grad_norm": 15.020733833312988, "learning_rate": 9.984666986104456e-05, "loss": 1.5202, "step": 863 }, { "epoch": 0.05854055152788129, "grad_norm": 21.089344024658203, "learning_rate": 9.984530084194675e-05, "loss": 1.5083, "step": 864 }, { "epoch": 0.058608306795853375, "grad_norm": 14.414257049560547, "learning_rate": 9.984393182284893e-05, "loss": 1.3977, "step": 865 }, { "epoch": 0.05867606206382546, "grad_norm": 15.667798042297363, "learning_rate": 9.984256280375111e-05, "loss": 1.4044, "step": 866 }, { "epoch": 0.058743817331797545, "grad_norm": 12.503005981445312, "learning_rate": 9.98411937846533e-05, "loss": 1.3369, "step": 867 }, { "epoch": 0.05881157259976963, "grad_norm": 13.27022933959961, "learning_rate": 9.983982476555548e-05, "loss": 1.4134, "step": 868 }, { "epoch": 0.058879327867741714, "grad_norm": 16.2034969329834, "learning_rate": 9.983845574645766e-05, "loss": 1.1196, "step": 869 }, { "epoch": 0.0589470831357138, "grad_norm": 9.868896484375, "learning_rate": 9.983708672735986e-05, "loss": 1.0248, "step": 870 }, { "epoch": 0.05901483840368588, "grad_norm": 15.588685989379883, "learning_rate": 9.983571770826204e-05, "loss": 1.5865, "step": 871 }, { "epoch": 0.05908259367165797, "grad_norm": 14.688246726989746, "learning_rate": 9.983434868916422e-05, "loss": 1.2959, "step": 872 }, { "epoch": 0.05915034893963006, "grad_norm": 14.252961158752441, "learning_rate": 9.983297967006641e-05, "loss": 1.3536, "step": 873 }, { "epoch": 0.059218104207602144, "grad_norm": 11.79800033569336, "learning_rate": 9.98316106509686e-05, "loss": 1.331, "step": 874 }, { "epoch": 0.05928585947557423, "grad_norm": 11.900074005126953, "learning_rate": 9.983024163187077e-05, "loss": 1.2241, "step": 875 }, { "epoch": 0.059353614743546314, "grad_norm": 10.696773529052734, "learning_rate": 9.982887261277295e-05, "loss": 1.4041, "step": 876 }, { "epoch": 0.0594213700115184, "grad_norm": 13.532305717468262, "learning_rate": 9.982750359367513e-05, "loss": 1.3621, "step": 877 }, { "epoch": 0.05948912527949048, "grad_norm": 14.107857704162598, "learning_rate": 9.982613457457733e-05, "loss": 1.6327, "step": 878 }, { "epoch": 0.05955688054746257, "grad_norm": 11.584097862243652, "learning_rate": 9.982476555547951e-05, "loss": 0.9606, "step": 879 }, { "epoch": 0.05962463581543465, "grad_norm": 14.240161895751953, "learning_rate": 9.982339653638169e-05, "loss": 1.263, "step": 880 }, { "epoch": 0.05969239108340674, "grad_norm": 14.461871147155762, "learning_rate": 9.982202751728387e-05, "loss": 1.4201, "step": 881 }, { "epoch": 0.05976014635137882, "grad_norm": 14.072705268859863, "learning_rate": 9.982065849818606e-05, "loss": 1.5977, "step": 882 }, { "epoch": 0.05982790161935091, "grad_norm": 14.928994178771973, "learning_rate": 9.981928947908824e-05, "loss": 1.3346, "step": 883 }, { "epoch": 0.05989565688732299, "grad_norm": 14.898951530456543, "learning_rate": 9.981792045999042e-05, "loss": 1.312, "step": 884 }, { "epoch": 0.059963412155295076, "grad_norm": 13.089646339416504, "learning_rate": 9.98165514408926e-05, "loss": 1.0833, "step": 885 }, { "epoch": 0.06003116742326716, "grad_norm": 15.768043518066406, "learning_rate": 9.981518242179478e-05, "loss": 1.1246, "step": 886 }, { "epoch": 0.060098922691239245, "grad_norm": 11.8709135055542, "learning_rate": 9.981381340269698e-05, "loss": 1.1822, "step": 887 }, { "epoch": 0.06016667795921133, "grad_norm": 15.698454856872559, "learning_rate": 9.981244438359916e-05, "loss": 1.3733, "step": 888 }, { "epoch": 0.060234433227183415, "grad_norm": 14.827208518981934, "learning_rate": 9.981107536450134e-05, "loss": 1.3121, "step": 889 }, { "epoch": 0.0603021884951555, "grad_norm": 12.522045135498047, "learning_rate": 9.980970634540352e-05, "loss": 1.5584, "step": 890 }, { "epoch": 0.060369943763127584, "grad_norm": 14.417738914489746, "learning_rate": 9.980833732630571e-05, "loss": 1.2389, "step": 891 }, { "epoch": 0.06043769903109967, "grad_norm": 14.761930465698242, "learning_rate": 9.98069683072079e-05, "loss": 1.5007, "step": 892 }, { "epoch": 0.060505454299071754, "grad_norm": 15.882668495178223, "learning_rate": 9.980559928811007e-05, "loss": 1.2651, "step": 893 }, { "epoch": 0.06057320956704384, "grad_norm": 13.605412483215332, "learning_rate": 9.980423026901225e-05, "loss": 1.5186, "step": 894 }, { "epoch": 0.06064096483501592, "grad_norm": 10.654335021972656, "learning_rate": 9.980286124991443e-05, "loss": 1.1177, "step": 895 }, { "epoch": 0.06070872010298801, "grad_norm": 12.37457275390625, "learning_rate": 9.980149223081663e-05, "loss": 1.2362, "step": 896 }, { "epoch": 0.06077647537096009, "grad_norm": 12.591222763061523, "learning_rate": 9.980012321171881e-05, "loss": 1.2509, "step": 897 }, { "epoch": 0.06084423063893218, "grad_norm": 14.337310791015625, "learning_rate": 9.979875419262099e-05, "loss": 1.2987, "step": 898 }, { "epoch": 0.06091198590690426, "grad_norm": 15.496018409729004, "learning_rate": 9.979738517352317e-05, "loss": 1.5307, "step": 899 }, { "epoch": 0.06097974117487635, "grad_norm": 13.730890274047852, "learning_rate": 9.979601615442535e-05, "loss": 1.2975, "step": 900 }, { "epoch": 0.06104749644284843, "grad_norm": 12.314823150634766, "learning_rate": 9.979464713532754e-05, "loss": 1.1916, "step": 901 }, { "epoch": 0.061115251710820516, "grad_norm": 13.761808395385742, "learning_rate": 9.979327811622972e-05, "loss": 1.3037, "step": 902 }, { "epoch": 0.0611830069787926, "grad_norm": 13.308722496032715, "learning_rate": 9.97919090971319e-05, "loss": 1.0598, "step": 903 }, { "epoch": 0.061250762246764685, "grad_norm": 13.121098518371582, "learning_rate": 9.979054007803408e-05, "loss": 1.3378, "step": 904 }, { "epoch": 0.06131851751473677, "grad_norm": 16.975666046142578, "learning_rate": 9.978917105893628e-05, "loss": 1.2826, "step": 905 }, { "epoch": 0.061386272782708855, "grad_norm": 14.529984474182129, "learning_rate": 9.978780203983846e-05, "loss": 1.3467, "step": 906 }, { "epoch": 0.06145402805068094, "grad_norm": 11.081110000610352, "learning_rate": 9.978643302074064e-05, "loss": 1.1906, "step": 907 }, { "epoch": 0.061521783318653024, "grad_norm": 12.871200561523438, "learning_rate": 9.978506400164282e-05, "loss": 1.1057, "step": 908 }, { "epoch": 0.06158953858662511, "grad_norm": 13.982168197631836, "learning_rate": 9.9783694982545e-05, "loss": 1.3824, "step": 909 }, { "epoch": 0.061657293854597194, "grad_norm": 13.076074600219727, "learning_rate": 9.97823259634472e-05, "loss": 1.2985, "step": 910 }, { "epoch": 0.06172504912256928, "grad_norm": 11.015650749206543, "learning_rate": 9.978095694434937e-05, "loss": 1.2406, "step": 911 }, { "epoch": 0.06179280439054136, "grad_norm": 13.6082763671875, "learning_rate": 9.977958792525155e-05, "loss": 1.3873, "step": 912 }, { "epoch": 0.06186055965851345, "grad_norm": 15.930809020996094, "learning_rate": 9.977821890615375e-05, "loss": 1.3542, "step": 913 }, { "epoch": 0.06192831492648553, "grad_norm": 10.710271835327148, "learning_rate": 9.977684988705593e-05, "loss": 1.1571, "step": 914 }, { "epoch": 0.06199607019445762, "grad_norm": 11.110217094421387, "learning_rate": 9.977548086795811e-05, "loss": 1.3255, "step": 915 }, { "epoch": 0.0620638254624297, "grad_norm": 11.451903343200684, "learning_rate": 9.97741118488603e-05, "loss": 1.3681, "step": 916 }, { "epoch": 0.062131580730401786, "grad_norm": 10.884252548217773, "learning_rate": 9.977274282976248e-05, "loss": 1.1988, "step": 917 }, { "epoch": 0.06219933599837387, "grad_norm": 11.031237602233887, "learning_rate": 9.977137381066466e-05, "loss": 1.26, "step": 918 }, { "epoch": 0.062267091266345956, "grad_norm": 11.585648536682129, "learning_rate": 9.977000479156686e-05, "loss": 1.3099, "step": 919 }, { "epoch": 0.06233484653431804, "grad_norm": 10.867992401123047, "learning_rate": 9.976863577246904e-05, "loss": 1.2624, "step": 920 }, { "epoch": 0.062402601802290125, "grad_norm": 14.552916526794434, "learning_rate": 9.976726675337122e-05, "loss": 1.2244, "step": 921 }, { "epoch": 0.06247035707026221, "grad_norm": 12.101760864257812, "learning_rate": 9.97658977342734e-05, "loss": 1.1925, "step": 922 }, { "epoch": 0.0625381123382343, "grad_norm": 14.113842010498047, "learning_rate": 9.976452871517559e-05, "loss": 1.3395, "step": 923 }, { "epoch": 0.06260586760620639, "grad_norm": 17.214614868164062, "learning_rate": 9.976315969607777e-05, "loss": 1.3943, "step": 924 }, { "epoch": 0.06267362287417846, "grad_norm": 13.43308162689209, "learning_rate": 9.976179067697995e-05, "loss": 1.1596, "step": 925 }, { "epoch": 0.06274137814215056, "grad_norm": 13.806952476501465, "learning_rate": 9.976042165788213e-05, "loss": 1.0354, "step": 926 }, { "epoch": 0.06280913341012263, "grad_norm": 15.638693809509277, "learning_rate": 9.975905263878431e-05, "loss": 1.3807, "step": 927 }, { "epoch": 0.06287688867809473, "grad_norm": 14.337742805480957, "learning_rate": 9.975768361968651e-05, "loss": 1.537, "step": 928 }, { "epoch": 0.0629446439460668, "grad_norm": 14.540297508239746, "learning_rate": 9.975631460058869e-05, "loss": 1.2522, "step": 929 }, { "epoch": 0.0630123992140389, "grad_norm": 15.991955757141113, "learning_rate": 9.975494558149087e-05, "loss": 1.3855, "step": 930 }, { "epoch": 0.06308015448201097, "grad_norm": 13.957479476928711, "learning_rate": 9.975357656239305e-05, "loss": 1.43, "step": 931 }, { "epoch": 0.06314790974998306, "grad_norm": 16.805377960205078, "learning_rate": 9.975220754329523e-05, "loss": 1.383, "step": 932 }, { "epoch": 0.06321566501795514, "grad_norm": 12.41854476928711, "learning_rate": 9.975083852419742e-05, "loss": 1.068, "step": 933 }, { "epoch": 0.06328342028592723, "grad_norm": 15.929006576538086, "learning_rate": 9.97494695050996e-05, "loss": 1.212, "step": 934 }, { "epoch": 0.06335117555389931, "grad_norm": 13.205544471740723, "learning_rate": 9.974810048600178e-05, "loss": 1.3682, "step": 935 }, { "epoch": 0.0634189308218714, "grad_norm": 12.105626106262207, "learning_rate": 9.974673146690396e-05, "loss": 1.3821, "step": 936 }, { "epoch": 0.06348668608984348, "grad_norm": 13.776711463928223, "learning_rate": 9.974536244780616e-05, "loss": 1.1073, "step": 937 }, { "epoch": 0.06355444135781557, "grad_norm": 12.227380752563477, "learning_rate": 9.974399342870834e-05, "loss": 1.2026, "step": 938 }, { "epoch": 0.06362219662578765, "grad_norm": 12.723440170288086, "learning_rate": 9.974262440961052e-05, "loss": 1.1325, "step": 939 }, { "epoch": 0.06368995189375974, "grad_norm": 13.943262100219727, "learning_rate": 9.97412553905127e-05, "loss": 1.1878, "step": 940 }, { "epoch": 0.06375770716173182, "grad_norm": 12.644627571105957, "learning_rate": 9.973988637141488e-05, "loss": 1.3097, "step": 941 }, { "epoch": 0.06382546242970391, "grad_norm": 12.108241081237793, "learning_rate": 9.973851735231707e-05, "loss": 1.1686, "step": 942 }, { "epoch": 0.06389321769767599, "grad_norm": 14.375092506408691, "learning_rate": 9.973714833321925e-05, "loss": 1.2721, "step": 943 }, { "epoch": 0.06396097296564808, "grad_norm": 13.439800262451172, "learning_rate": 9.973577931412143e-05, "loss": 1.3898, "step": 944 }, { "epoch": 0.06402872823362016, "grad_norm": 13.717879295349121, "learning_rate": 9.973441029502361e-05, "loss": 1.3823, "step": 945 }, { "epoch": 0.06409648350159225, "grad_norm": 12.745361328125, "learning_rate": 9.973304127592581e-05, "loss": 1.2396, "step": 946 }, { "epoch": 0.06416423876956433, "grad_norm": 11.784343719482422, "learning_rate": 9.973167225682799e-05, "loss": 1.214, "step": 947 }, { "epoch": 0.06423199403753642, "grad_norm": 14.205467224121094, "learning_rate": 9.973030323773017e-05, "loss": 1.2803, "step": 948 }, { "epoch": 0.0642997493055085, "grad_norm": 13.257532119750977, "learning_rate": 9.972893421863235e-05, "loss": 1.378, "step": 949 }, { "epoch": 0.06436750457348059, "grad_norm": 15.153338432312012, "learning_rate": 9.972756519953453e-05, "loss": 1.2854, "step": 950 }, { "epoch": 0.06443525984145267, "grad_norm": 16.765771865844727, "learning_rate": 9.972619618043672e-05, "loss": 1.3016, "step": 951 }, { "epoch": 0.06450301510942476, "grad_norm": 14.636106491088867, "learning_rate": 9.97248271613389e-05, "loss": 1.3803, "step": 952 }, { "epoch": 0.06457077037739685, "grad_norm": 13.87410831451416, "learning_rate": 9.972345814224108e-05, "loss": 1.4126, "step": 953 }, { "epoch": 0.06463852564536893, "grad_norm": 14.328899383544922, "learning_rate": 9.972208912314326e-05, "loss": 1.061, "step": 954 }, { "epoch": 0.06470628091334102, "grad_norm": 12.485203742980957, "learning_rate": 9.972072010404544e-05, "loss": 1.0985, "step": 955 }, { "epoch": 0.0647740361813131, "grad_norm": 13.77907943725586, "learning_rate": 9.971935108494764e-05, "loss": 1.3534, "step": 956 }, { "epoch": 0.06484179144928519, "grad_norm": 10.579590797424316, "learning_rate": 9.971798206584982e-05, "loss": 1.166, "step": 957 }, { "epoch": 0.06490954671725727, "grad_norm": 14.690185546875, "learning_rate": 9.9716613046752e-05, "loss": 1.3666, "step": 958 }, { "epoch": 0.06497730198522936, "grad_norm": 12.904786109924316, "learning_rate": 9.97152440276542e-05, "loss": 1.14, "step": 959 }, { "epoch": 0.06504505725320144, "grad_norm": 12.126219749450684, "learning_rate": 9.971387500855637e-05, "loss": 1.4157, "step": 960 }, { "epoch": 0.06511281252117353, "grad_norm": 13.747931480407715, "learning_rate": 9.971250598945855e-05, "loss": 1.4557, "step": 961 }, { "epoch": 0.0651805677891456, "grad_norm": 13.232327461242676, "learning_rate": 9.971113697036075e-05, "loss": 1.3471, "step": 962 }, { "epoch": 0.0652483230571177, "grad_norm": 14.886791229248047, "learning_rate": 9.970976795126293e-05, "loss": 1.1866, "step": 963 }, { "epoch": 0.06531607832508977, "grad_norm": 11.747659683227539, "learning_rate": 9.970839893216511e-05, "loss": 1.0443, "step": 964 }, { "epoch": 0.06538383359306187, "grad_norm": 11.181273460388184, "learning_rate": 9.97070299130673e-05, "loss": 1.1391, "step": 965 }, { "epoch": 0.06545158886103394, "grad_norm": 11.9672269821167, "learning_rate": 9.970566089396948e-05, "loss": 1.2847, "step": 966 }, { "epoch": 0.06551934412900604, "grad_norm": 15.825364112854004, "learning_rate": 9.970429187487166e-05, "loss": 1.3301, "step": 967 }, { "epoch": 0.06558709939697811, "grad_norm": 12.26963996887207, "learning_rate": 9.970292285577384e-05, "loss": 1.2524, "step": 968 }, { "epoch": 0.0656548546649502, "grad_norm": 11.440977096557617, "learning_rate": 9.970155383667604e-05, "loss": 1.229, "step": 969 }, { "epoch": 0.06572260993292228, "grad_norm": 10.704546928405762, "learning_rate": 9.970018481757822e-05, "loss": 0.9936, "step": 970 }, { "epoch": 0.06579036520089437, "grad_norm": 13.20880126953125, "learning_rate": 9.96988157984804e-05, "loss": 1.2353, "step": 971 }, { "epoch": 0.06585812046886645, "grad_norm": 13.101622581481934, "learning_rate": 9.969744677938258e-05, "loss": 1.2559, "step": 972 }, { "epoch": 0.06592587573683854, "grad_norm": 11.725826263427734, "learning_rate": 9.969607776028476e-05, "loss": 1.2051, "step": 973 }, { "epoch": 0.06599363100481062, "grad_norm": 11.890633583068848, "learning_rate": 9.969470874118695e-05, "loss": 1.1607, "step": 974 }, { "epoch": 0.06606138627278271, "grad_norm": 11.066970825195312, "learning_rate": 9.969333972208913e-05, "loss": 1.1031, "step": 975 }, { "epoch": 0.06612914154075479, "grad_norm": 12.26187515258789, "learning_rate": 9.969197070299131e-05, "loss": 1.309, "step": 976 }, { "epoch": 0.06619689680872688, "grad_norm": 13.490363121032715, "learning_rate": 9.96906016838935e-05, "loss": 1.3062, "step": 977 }, { "epoch": 0.06626465207669896, "grad_norm": 12.306289672851562, "learning_rate": 9.968923266479567e-05, "loss": 0.9784, "step": 978 }, { "epoch": 0.06633240734467105, "grad_norm": 10.699983596801758, "learning_rate": 9.968786364569787e-05, "loss": 1.0547, "step": 979 }, { "epoch": 0.06640016261264313, "grad_norm": 12.298179626464844, "learning_rate": 9.968649462660005e-05, "loss": 1.2496, "step": 980 }, { "epoch": 0.06646791788061522, "grad_norm": 15.239167213439941, "learning_rate": 9.968512560750223e-05, "loss": 1.2545, "step": 981 }, { "epoch": 0.0665356731485873, "grad_norm": 11.61802864074707, "learning_rate": 9.968375658840441e-05, "loss": 1.1668, "step": 982 }, { "epoch": 0.06660342841655939, "grad_norm": 12.804032325744629, "learning_rate": 9.96823875693066e-05, "loss": 1.3203, "step": 983 }, { "epoch": 0.06667118368453147, "grad_norm": 14.511723518371582, "learning_rate": 9.968101855020878e-05, "loss": 1.7519, "step": 984 }, { "epoch": 0.06673893895250356, "grad_norm": 12.442008018493652, "learning_rate": 9.967964953111096e-05, "loss": 1.3758, "step": 985 }, { "epoch": 0.06680669422047564, "grad_norm": 14.486754417419434, "learning_rate": 9.967828051201314e-05, "loss": 1.1924, "step": 986 }, { "epoch": 0.06687444948844773, "grad_norm": 13.529693603515625, "learning_rate": 9.967691149291532e-05, "loss": 1.2698, "step": 987 }, { "epoch": 0.06694220475641981, "grad_norm": 12.980225563049316, "learning_rate": 9.967554247381752e-05, "loss": 1.1896, "step": 988 }, { "epoch": 0.0670099600243919, "grad_norm": 15.495257377624512, "learning_rate": 9.96741734547197e-05, "loss": 1.286, "step": 989 }, { "epoch": 0.06707771529236398, "grad_norm": 12.67573070526123, "learning_rate": 9.967280443562188e-05, "loss": 1.1751, "step": 990 }, { "epoch": 0.06714547056033607, "grad_norm": 11.141845703125, "learning_rate": 9.967143541652406e-05, "loss": 1.5109, "step": 991 }, { "epoch": 0.06721322582830815, "grad_norm": 11.975769996643066, "learning_rate": 9.967006639742625e-05, "loss": 1.1579, "step": 992 }, { "epoch": 0.06728098109628024, "grad_norm": 13.872209548950195, "learning_rate": 9.966869737832843e-05, "loss": 1.1852, "step": 993 }, { "epoch": 0.06734873636425232, "grad_norm": 11.52573299407959, "learning_rate": 9.966732835923061e-05, "loss": 1.2319, "step": 994 }, { "epoch": 0.0674164916322244, "grad_norm": 12.90494155883789, "learning_rate": 9.96659593401328e-05, "loss": 1.2415, "step": 995 }, { "epoch": 0.06748424690019648, "grad_norm": 14.210317611694336, "learning_rate": 9.966459032103497e-05, "loss": 1.2901, "step": 996 }, { "epoch": 0.06755200216816858, "grad_norm": 12.600135803222656, "learning_rate": 9.966322130193717e-05, "loss": 1.4516, "step": 997 }, { "epoch": 0.06761975743614065, "grad_norm": 14.462118148803711, "learning_rate": 9.966185228283935e-05, "loss": 1.2205, "step": 998 }, { "epoch": 0.06768751270411275, "grad_norm": 12.870843887329102, "learning_rate": 9.966048326374153e-05, "loss": 1.179, "step": 999 }, { "epoch": 0.06775526797208482, "grad_norm": 16.4424991607666, "learning_rate": 9.965911424464371e-05, "loss": 1.5714, "step": 1000 }, { "epoch": 0.06782302324005691, "grad_norm": 12.902230262756348, "learning_rate": 9.96577452255459e-05, "loss": 1.2803, "step": 1001 }, { "epoch": 0.06789077850802899, "grad_norm": 11.469466209411621, "learning_rate": 9.965637620644808e-05, "loss": 1.0771, "step": 1002 }, { "epoch": 0.06795853377600108, "grad_norm": 13.96650505065918, "learning_rate": 9.965500718735026e-05, "loss": 1.2427, "step": 1003 }, { "epoch": 0.06802628904397316, "grad_norm": 11.55516242980957, "learning_rate": 9.965363816825244e-05, "loss": 1.0396, "step": 1004 }, { "epoch": 0.06809404431194525, "grad_norm": 13.34827709197998, "learning_rate": 9.965226914915462e-05, "loss": 1.181, "step": 1005 }, { "epoch": 0.06816179957991735, "grad_norm": 11.243910789489746, "learning_rate": 9.965090013005682e-05, "loss": 1.2875, "step": 1006 }, { "epoch": 0.06822955484788942, "grad_norm": 14.152894020080566, "learning_rate": 9.9649531110959e-05, "loss": 1.3125, "step": 1007 }, { "epoch": 0.06829731011586151, "grad_norm": 13.010010719299316, "learning_rate": 9.964816209186118e-05, "loss": 1.3213, "step": 1008 }, { "epoch": 0.06836506538383359, "grad_norm": 15.990034103393555, "learning_rate": 9.964679307276337e-05, "loss": 1.1878, "step": 1009 }, { "epoch": 0.06843282065180568, "grad_norm": 12.943589210510254, "learning_rate": 9.964542405366555e-05, "loss": 1.2443, "step": 1010 }, { "epoch": 0.06850057591977776, "grad_norm": 12.108896255493164, "learning_rate": 9.964405503456775e-05, "loss": 1.1719, "step": 1011 }, { "epoch": 0.06856833118774985, "grad_norm": 12.097951889038086, "learning_rate": 9.964268601546993e-05, "loss": 1.0653, "step": 1012 }, { "epoch": 0.06863608645572193, "grad_norm": 14.222228050231934, "learning_rate": 9.964131699637211e-05, "loss": 1.3379, "step": 1013 }, { "epoch": 0.06870384172369402, "grad_norm": 12.636894226074219, "learning_rate": 9.963994797727429e-05, "loss": 1.5559, "step": 1014 }, { "epoch": 0.0687715969916661, "grad_norm": 15.458481788635254, "learning_rate": 9.963857895817648e-05, "loss": 1.2542, "step": 1015 }, { "epoch": 0.06883935225963819, "grad_norm": 11.246847152709961, "learning_rate": 9.963720993907866e-05, "loss": 1.1944, "step": 1016 }, { "epoch": 0.06890710752761027, "grad_norm": 11.699065208435059, "learning_rate": 9.963584091998084e-05, "loss": 1.0739, "step": 1017 }, { "epoch": 0.06897486279558236, "grad_norm": 12.259678840637207, "learning_rate": 9.963447190088302e-05, "loss": 1.1365, "step": 1018 }, { "epoch": 0.06904261806355444, "grad_norm": 13.594696998596191, "learning_rate": 9.96331028817852e-05, "loss": 1.1418, "step": 1019 }, { "epoch": 0.06911037333152653, "grad_norm": 12.90888786315918, "learning_rate": 9.96317338626874e-05, "loss": 1.1987, "step": 1020 }, { "epoch": 0.06917812859949861, "grad_norm": 13.04245662689209, "learning_rate": 9.963036484358958e-05, "loss": 1.4288, "step": 1021 }, { "epoch": 0.0692458838674707, "grad_norm": 12.706077575683594, "learning_rate": 9.962899582449176e-05, "loss": 1.222, "step": 1022 }, { "epoch": 0.06931363913544278, "grad_norm": 14.205679893493652, "learning_rate": 9.962762680539394e-05, "loss": 1.3305, "step": 1023 }, { "epoch": 0.06938139440341487, "grad_norm": 17.09891128540039, "learning_rate": 9.962625778629613e-05, "loss": 1.47, "step": 1024 }, { "epoch": 0.06944914967138695, "grad_norm": 14.603500366210938, "learning_rate": 9.962488876719831e-05, "loss": 1.3887, "step": 1025 }, { "epoch": 0.06951690493935904, "grad_norm": 10.820066452026367, "learning_rate": 9.962351974810049e-05, "loss": 1.1543, "step": 1026 }, { "epoch": 0.06958466020733112, "grad_norm": 10.99889850616455, "learning_rate": 9.962215072900267e-05, "loss": 1.2908, "step": 1027 }, { "epoch": 0.06965241547530321, "grad_norm": 13.470711708068848, "learning_rate": 9.962078170990485e-05, "loss": 1.1422, "step": 1028 }, { "epoch": 0.06972017074327529, "grad_norm": 11.086441040039062, "learning_rate": 9.961941269080705e-05, "loss": 0.9835, "step": 1029 }, { "epoch": 0.06978792601124738, "grad_norm": 16.768535614013672, "learning_rate": 9.961804367170923e-05, "loss": 1.404, "step": 1030 }, { "epoch": 0.06985568127921946, "grad_norm": 15.851200103759766, "learning_rate": 9.961667465261141e-05, "loss": 1.4874, "step": 1031 }, { "epoch": 0.06992343654719155, "grad_norm": 11.995482444763184, "learning_rate": 9.961530563351359e-05, "loss": 1.1497, "step": 1032 }, { "epoch": 0.06999119181516363, "grad_norm": 13.591619491577148, "learning_rate": 9.961393661441577e-05, "loss": 1.4773, "step": 1033 }, { "epoch": 0.07005894708313572, "grad_norm": 16.878938674926758, "learning_rate": 9.961256759531796e-05, "loss": 1.2522, "step": 1034 }, { "epoch": 0.0701267023511078, "grad_norm": 11.901616096496582, "learning_rate": 9.961119857622014e-05, "loss": 1.2164, "step": 1035 }, { "epoch": 0.07019445761907989, "grad_norm": 14.935117721557617, "learning_rate": 9.960982955712232e-05, "loss": 1.1251, "step": 1036 }, { "epoch": 0.07026221288705196, "grad_norm": 12.380253791809082, "learning_rate": 9.96084605380245e-05, "loss": 1.3965, "step": 1037 }, { "epoch": 0.07032996815502406, "grad_norm": 11.645035743713379, "learning_rate": 9.96070915189267e-05, "loss": 1.266, "step": 1038 }, { "epoch": 0.07039772342299613, "grad_norm": 14.525420188903809, "learning_rate": 9.960572249982888e-05, "loss": 1.3991, "step": 1039 }, { "epoch": 0.07046547869096823, "grad_norm": 14.775094985961914, "learning_rate": 9.960435348073106e-05, "loss": 1.5958, "step": 1040 }, { "epoch": 0.0705332339589403, "grad_norm": 10.2192964553833, "learning_rate": 9.960298446163324e-05, "loss": 1.1793, "step": 1041 }, { "epoch": 0.0706009892269124, "grad_norm": 13.074480056762695, "learning_rate": 9.960161544253542e-05, "loss": 1.4243, "step": 1042 }, { "epoch": 0.07066874449488447, "grad_norm": 12.679484367370605, "learning_rate": 9.960024642343761e-05, "loss": 1.3398, "step": 1043 }, { "epoch": 0.07073649976285656, "grad_norm": 9.061332702636719, "learning_rate": 9.95988774043398e-05, "loss": 1.0036, "step": 1044 }, { "epoch": 0.07080425503082864, "grad_norm": 13.423661231994629, "learning_rate": 9.959750838524197e-05, "loss": 1.2767, "step": 1045 }, { "epoch": 0.07087201029880073, "grad_norm": 13.955148696899414, "learning_rate": 9.959613936614415e-05, "loss": 1.311, "step": 1046 }, { "epoch": 0.07093976556677281, "grad_norm": 12.746015548706055, "learning_rate": 9.959477034704635e-05, "loss": 1.42, "step": 1047 }, { "epoch": 0.0710075208347449, "grad_norm": 11.409982681274414, "learning_rate": 9.959340132794853e-05, "loss": 1.3051, "step": 1048 }, { "epoch": 0.07107527610271698, "grad_norm": 11.801681518554688, "learning_rate": 9.959203230885071e-05, "loss": 1.2315, "step": 1049 }, { "epoch": 0.07114303137068907, "grad_norm": 13.041158676147461, "learning_rate": 9.959066328975289e-05, "loss": 1.3487, "step": 1050 }, { "epoch": 0.07121078663866115, "grad_norm": 13.474900245666504, "learning_rate": 9.958929427065507e-05, "loss": 1.4542, "step": 1051 }, { "epoch": 0.07127854190663324, "grad_norm": 12.335237503051758, "learning_rate": 9.958792525155726e-05, "loss": 1.1841, "step": 1052 }, { "epoch": 0.07134629717460532, "grad_norm": 14.909475326538086, "learning_rate": 9.958655623245944e-05, "loss": 1.272, "step": 1053 }, { "epoch": 0.07141405244257741, "grad_norm": 13.449742317199707, "learning_rate": 9.958518721336162e-05, "loss": 1.4881, "step": 1054 }, { "epoch": 0.07148180771054949, "grad_norm": 12.2557954788208, "learning_rate": 9.958381819426382e-05, "loss": 1.2059, "step": 1055 }, { "epoch": 0.07154956297852158, "grad_norm": 13.71298885345459, "learning_rate": 9.9582449175166e-05, "loss": 1.2627, "step": 1056 }, { "epoch": 0.07161731824649366, "grad_norm": 15.4293212890625, "learning_rate": 9.958108015606818e-05, "loss": 1.4663, "step": 1057 }, { "epoch": 0.07168507351446575, "grad_norm": 13.665759086608887, "learning_rate": 9.957971113697037e-05, "loss": 1.4634, "step": 1058 }, { "epoch": 0.07175282878243784, "grad_norm": 13.064310073852539, "learning_rate": 9.957834211787255e-05, "loss": 1.2686, "step": 1059 }, { "epoch": 0.07182058405040992, "grad_norm": 15.21746826171875, "learning_rate": 9.957697309877473e-05, "loss": 1.4861, "step": 1060 }, { "epoch": 0.07188833931838201, "grad_norm": 12.499883651733398, "learning_rate": 9.957560407967693e-05, "loss": 1.5275, "step": 1061 }, { "epoch": 0.07195609458635409, "grad_norm": 9.355907440185547, "learning_rate": 9.957423506057911e-05, "loss": 1.1509, "step": 1062 }, { "epoch": 0.07202384985432618, "grad_norm": 10.428252220153809, "learning_rate": 9.957286604148129e-05, "loss": 1.0874, "step": 1063 }, { "epoch": 0.07209160512229826, "grad_norm": 11.351346015930176, "learning_rate": 9.957149702238347e-05, "loss": 1.0887, "step": 1064 }, { "epoch": 0.07215936039027035, "grad_norm": 15.271830558776855, "learning_rate": 9.957012800328565e-05, "loss": 1.2466, "step": 1065 }, { "epoch": 0.07222711565824243, "grad_norm": 11.172418594360352, "learning_rate": 9.956875898418784e-05, "loss": 1.0958, "step": 1066 }, { "epoch": 0.07229487092621452, "grad_norm": 14.221702575683594, "learning_rate": 9.956738996509002e-05, "loss": 1.2288, "step": 1067 }, { "epoch": 0.0723626261941866, "grad_norm": 12.167356491088867, "learning_rate": 9.95660209459922e-05, "loss": 1.1159, "step": 1068 }, { "epoch": 0.07243038146215869, "grad_norm": 12.607329368591309, "learning_rate": 9.956465192689438e-05, "loss": 1.2475, "step": 1069 }, { "epoch": 0.07249813673013077, "grad_norm": 10.59451675415039, "learning_rate": 9.956328290779658e-05, "loss": 1.1493, "step": 1070 }, { "epoch": 0.07256589199810286, "grad_norm": 11.190742492675781, "learning_rate": 9.956191388869876e-05, "loss": 1.4549, "step": 1071 }, { "epoch": 0.07263364726607494, "grad_norm": 11.225564956665039, "learning_rate": 9.956054486960094e-05, "loss": 1.0741, "step": 1072 }, { "epoch": 0.07270140253404703, "grad_norm": 13.648139953613281, "learning_rate": 9.955917585050312e-05, "loss": 1.2942, "step": 1073 }, { "epoch": 0.0727691578020191, "grad_norm": 14.606941223144531, "learning_rate": 9.95578068314053e-05, "loss": 1.253, "step": 1074 }, { "epoch": 0.0728369130699912, "grad_norm": 11.043729782104492, "learning_rate": 9.955643781230749e-05, "loss": 1.2279, "step": 1075 }, { "epoch": 0.07290466833796327, "grad_norm": 12.463634490966797, "learning_rate": 9.955506879320967e-05, "loss": 1.4399, "step": 1076 }, { "epoch": 0.07297242360593537, "grad_norm": 13.74101734161377, "learning_rate": 9.955369977411185e-05, "loss": 1.4323, "step": 1077 }, { "epoch": 0.07304017887390744, "grad_norm": 10.1694974899292, "learning_rate": 9.955233075501403e-05, "loss": 1.0113, "step": 1078 }, { "epoch": 0.07310793414187954, "grad_norm": 14.407991409301758, "learning_rate": 9.955096173591623e-05, "loss": 1.7102, "step": 1079 }, { "epoch": 0.07317568940985161, "grad_norm": 13.84760570526123, "learning_rate": 9.954959271681841e-05, "loss": 1.4491, "step": 1080 }, { "epoch": 0.0732434446778237, "grad_norm": 12.220841407775879, "learning_rate": 9.954822369772059e-05, "loss": 1.1826, "step": 1081 }, { "epoch": 0.07331119994579578, "grad_norm": 11.380377769470215, "learning_rate": 9.954685467862277e-05, "loss": 1.3537, "step": 1082 }, { "epoch": 0.07337895521376787, "grad_norm": 12.916484832763672, "learning_rate": 9.954548565952495e-05, "loss": 1.3406, "step": 1083 }, { "epoch": 0.07344671048173995, "grad_norm": 14.107590675354004, "learning_rate": 9.954411664042714e-05, "loss": 1.0531, "step": 1084 }, { "epoch": 0.07351446574971204, "grad_norm": 13.498798370361328, "learning_rate": 9.954274762132932e-05, "loss": 1.1546, "step": 1085 }, { "epoch": 0.07358222101768412, "grad_norm": 13.155747413635254, "learning_rate": 9.95413786022315e-05, "loss": 1.212, "step": 1086 }, { "epoch": 0.07364997628565621, "grad_norm": 14.135687828063965, "learning_rate": 9.954000958313368e-05, "loss": 1.2001, "step": 1087 }, { "epoch": 0.07371773155362829, "grad_norm": 13.123790740966797, "learning_rate": 9.953864056403586e-05, "loss": 1.2735, "step": 1088 }, { "epoch": 0.07378548682160038, "grad_norm": 14.591660499572754, "learning_rate": 9.953727154493806e-05, "loss": 1.2977, "step": 1089 }, { "epoch": 0.07385324208957246, "grad_norm": 13.30614948272705, "learning_rate": 9.953590252584024e-05, "loss": 1.2445, "step": 1090 }, { "epoch": 0.07392099735754455, "grad_norm": 14.542524337768555, "learning_rate": 9.953453350674242e-05, "loss": 1.2327, "step": 1091 }, { "epoch": 0.07398875262551663, "grad_norm": 12.926522254943848, "learning_rate": 9.95331644876446e-05, "loss": 1.2139, "step": 1092 }, { "epoch": 0.07405650789348872, "grad_norm": 11.622479438781738, "learning_rate": 9.953179546854679e-05, "loss": 1.2887, "step": 1093 }, { "epoch": 0.0741242631614608, "grad_norm": 14.87485122680664, "learning_rate": 9.953042644944897e-05, "loss": 1.3132, "step": 1094 }, { "epoch": 0.07419201842943289, "grad_norm": 13.096991539001465, "learning_rate": 9.952905743035115e-05, "loss": 1.0783, "step": 1095 }, { "epoch": 0.07425977369740497, "grad_norm": 14.676551818847656, "learning_rate": 9.952768841125333e-05, "loss": 1.3171, "step": 1096 }, { "epoch": 0.07432752896537706, "grad_norm": 10.399755477905273, "learning_rate": 9.952631939215551e-05, "loss": 1.0747, "step": 1097 }, { "epoch": 0.07439528423334914, "grad_norm": 11.052745819091797, "learning_rate": 9.952495037305771e-05, "loss": 1.1653, "step": 1098 }, { "epoch": 0.07446303950132123, "grad_norm": 13.014352798461914, "learning_rate": 9.952358135395989e-05, "loss": 1.3727, "step": 1099 }, { "epoch": 0.07453079476929331, "grad_norm": 13.202262878417969, "learning_rate": 9.952221233486207e-05, "loss": 1.5063, "step": 1100 }, { "epoch": 0.0745985500372654, "grad_norm": 14.272111892700195, "learning_rate": 9.952084331576426e-05, "loss": 1.0183, "step": 1101 }, { "epoch": 0.07466630530523748, "grad_norm": 14.23975658416748, "learning_rate": 9.951947429666644e-05, "loss": 1.3518, "step": 1102 }, { "epoch": 0.07473406057320957, "grad_norm": 12.293742179870605, "learning_rate": 9.951810527756862e-05, "loss": 1.4021, "step": 1103 }, { "epoch": 0.07480181584118165, "grad_norm": 16.026020050048828, "learning_rate": 9.951673625847082e-05, "loss": 1.3412, "step": 1104 }, { "epoch": 0.07486957110915374, "grad_norm": 11.120819091796875, "learning_rate": 9.9515367239373e-05, "loss": 1.331, "step": 1105 }, { "epoch": 0.07493732637712582, "grad_norm": 12.363526344299316, "learning_rate": 9.951399822027518e-05, "loss": 1.4284, "step": 1106 }, { "epoch": 0.0750050816450979, "grad_norm": 14.377492904663086, "learning_rate": 9.951262920117737e-05, "loss": 1.3686, "step": 1107 }, { "epoch": 0.07507283691306998, "grad_norm": 11.366288185119629, "learning_rate": 9.951126018207955e-05, "loss": 1.2329, "step": 1108 }, { "epoch": 0.07514059218104208, "grad_norm": 10.26131820678711, "learning_rate": 9.950989116298173e-05, "loss": 1.1091, "step": 1109 }, { "epoch": 0.07520834744901415, "grad_norm": 12.64631462097168, "learning_rate": 9.950852214388391e-05, "loss": 1.1231, "step": 1110 }, { "epoch": 0.07527610271698625, "grad_norm": 13.042781829833984, "learning_rate": 9.95071531247861e-05, "loss": 1.2338, "step": 1111 }, { "epoch": 0.07534385798495834, "grad_norm": 11.577115058898926, "learning_rate": 9.950578410568829e-05, "loss": 1.2143, "step": 1112 }, { "epoch": 0.07541161325293042, "grad_norm": 13.640811920166016, "learning_rate": 9.950441508659047e-05, "loss": 1.224, "step": 1113 }, { "epoch": 0.0754793685209025, "grad_norm": 10.271018981933594, "learning_rate": 9.950304606749265e-05, "loss": 1.0459, "step": 1114 }, { "epoch": 0.07554712378887458, "grad_norm": 12.053836822509766, "learning_rate": 9.950167704839483e-05, "loss": 1.2137, "step": 1115 }, { "epoch": 0.07561487905684668, "grad_norm": 10.022509574890137, "learning_rate": 9.950030802929702e-05, "loss": 1.092, "step": 1116 }, { "epoch": 0.07568263432481875, "grad_norm": 12.28339672088623, "learning_rate": 9.94989390101992e-05, "loss": 1.1616, "step": 1117 }, { "epoch": 0.07575038959279085, "grad_norm": 14.182686805725098, "learning_rate": 9.949756999110138e-05, "loss": 1.5135, "step": 1118 }, { "epoch": 0.07581814486076292, "grad_norm": 10.668661117553711, "learning_rate": 9.949620097200356e-05, "loss": 1.2412, "step": 1119 }, { "epoch": 0.07588590012873501, "grad_norm": 14.443583488464355, "learning_rate": 9.949483195290574e-05, "loss": 1.207, "step": 1120 }, { "epoch": 0.07595365539670709, "grad_norm": 12.418794631958008, "learning_rate": 9.949346293380794e-05, "loss": 1.1907, "step": 1121 }, { "epoch": 0.07602141066467918, "grad_norm": 12.429618835449219, "learning_rate": 9.949209391471012e-05, "loss": 1.4403, "step": 1122 }, { "epoch": 0.07608916593265126, "grad_norm": 9.524622917175293, "learning_rate": 9.94907248956123e-05, "loss": 1.139, "step": 1123 }, { "epoch": 0.07615692120062335, "grad_norm": 10.974812507629395, "learning_rate": 9.948935587651448e-05, "loss": 1.4055, "step": 1124 }, { "epoch": 0.07622467646859543, "grad_norm": 16.152681350708008, "learning_rate": 9.948798685741667e-05, "loss": 1.0874, "step": 1125 }, { "epoch": 0.07629243173656752, "grad_norm": 12.023541450500488, "learning_rate": 9.948661783831885e-05, "loss": 1.2378, "step": 1126 }, { "epoch": 0.0763601870045396, "grad_norm": 11.597234725952148, "learning_rate": 9.948524881922103e-05, "loss": 1.0104, "step": 1127 }, { "epoch": 0.07642794227251169, "grad_norm": 11.374302864074707, "learning_rate": 9.948387980012321e-05, "loss": 1.1099, "step": 1128 }, { "epoch": 0.07649569754048377, "grad_norm": 14.398423194885254, "learning_rate": 9.94825107810254e-05, "loss": 1.5533, "step": 1129 }, { "epoch": 0.07656345280845586, "grad_norm": 13.1026611328125, "learning_rate": 9.948114176192759e-05, "loss": 1.3948, "step": 1130 }, { "epoch": 0.07663120807642794, "grad_norm": 12.012560844421387, "learning_rate": 9.947977274282977e-05, "loss": 1.2156, "step": 1131 }, { "epoch": 0.07669896334440003, "grad_norm": 12.900229454040527, "learning_rate": 9.947840372373195e-05, "loss": 1.2658, "step": 1132 }, { "epoch": 0.07676671861237211, "grad_norm": 15.250492095947266, "learning_rate": 9.947703470463413e-05, "loss": 1.6102, "step": 1133 }, { "epoch": 0.0768344738803442, "grad_norm": 15.08134937286377, "learning_rate": 9.947566568553632e-05, "loss": 1.1947, "step": 1134 }, { "epoch": 0.07690222914831628, "grad_norm": 13.353601455688477, "learning_rate": 9.94742966664385e-05, "loss": 1.3211, "step": 1135 }, { "epoch": 0.07696998441628837, "grad_norm": 11.310175895690918, "learning_rate": 9.947292764734068e-05, "loss": 1.2223, "step": 1136 }, { "epoch": 0.07703773968426045, "grad_norm": 11.800848960876465, "learning_rate": 9.947155862824286e-05, "loss": 1.2131, "step": 1137 }, { "epoch": 0.07710549495223254, "grad_norm": 11.748014450073242, "learning_rate": 9.947018960914504e-05, "loss": 1.0734, "step": 1138 }, { "epoch": 0.07717325022020462, "grad_norm": 12.282258033752441, "learning_rate": 9.946882059004724e-05, "loss": 1.3257, "step": 1139 }, { "epoch": 0.07724100548817671, "grad_norm": 11.93818473815918, "learning_rate": 9.946745157094942e-05, "loss": 1.1452, "step": 1140 }, { "epoch": 0.07730876075614879, "grad_norm": 13.397029876708984, "learning_rate": 9.94660825518516e-05, "loss": 1.2625, "step": 1141 }, { "epoch": 0.07737651602412088, "grad_norm": 12.135769844055176, "learning_rate": 9.946471353275378e-05, "loss": 1.3624, "step": 1142 }, { "epoch": 0.07744427129209296, "grad_norm": 11.304028511047363, "learning_rate": 9.946334451365596e-05, "loss": 1.3019, "step": 1143 }, { "epoch": 0.07751202656006505, "grad_norm": 10.978137016296387, "learning_rate": 9.946197549455815e-05, "loss": 1.3137, "step": 1144 }, { "epoch": 0.07757978182803713, "grad_norm": 10.997323989868164, "learning_rate": 9.946060647546033e-05, "loss": 1.3573, "step": 1145 }, { "epoch": 0.07764753709600922, "grad_norm": 11.883647918701172, "learning_rate": 9.945923745636251e-05, "loss": 1.2587, "step": 1146 }, { "epoch": 0.0777152923639813, "grad_norm": 10.70753288269043, "learning_rate": 9.945786843726471e-05, "loss": 1.0866, "step": 1147 }, { "epoch": 0.07778304763195339, "grad_norm": 13.318743705749512, "learning_rate": 9.945649941816689e-05, "loss": 1.3356, "step": 1148 }, { "epoch": 0.07785080289992546, "grad_norm": 10.656171798706055, "learning_rate": 9.945513039906907e-05, "loss": 1.2155, "step": 1149 }, { "epoch": 0.07791855816789756, "grad_norm": 14.73982048034668, "learning_rate": 9.945376137997126e-05, "loss": 1.2276, "step": 1150 }, { "epoch": 0.07798631343586963, "grad_norm": 12.461714744567871, "learning_rate": 9.945239236087344e-05, "loss": 1.2999, "step": 1151 }, { "epoch": 0.07805406870384173, "grad_norm": 12.277376174926758, "learning_rate": 9.945102334177562e-05, "loss": 1.1131, "step": 1152 }, { "epoch": 0.0781218239718138, "grad_norm": 12.726540565490723, "learning_rate": 9.944965432267782e-05, "loss": 1.4181, "step": 1153 }, { "epoch": 0.0781895792397859, "grad_norm": 11.086180686950684, "learning_rate": 9.944828530358e-05, "loss": 1.3852, "step": 1154 }, { "epoch": 0.07825733450775797, "grad_norm": 12.80537223815918, "learning_rate": 9.944691628448218e-05, "loss": 1.1101, "step": 1155 }, { "epoch": 0.07832508977573006, "grad_norm": 11.960269927978516, "learning_rate": 9.944554726538436e-05, "loss": 1.2186, "step": 1156 }, { "epoch": 0.07839284504370214, "grad_norm": 11.732439041137695, "learning_rate": 9.944417824628655e-05, "loss": 0.9854, "step": 1157 }, { "epoch": 0.07846060031167423, "grad_norm": 12.578715324401855, "learning_rate": 9.944280922718873e-05, "loss": 1.0792, "step": 1158 }, { "epoch": 0.07852835557964631, "grad_norm": 13.779712677001953, "learning_rate": 9.944144020809091e-05, "loss": 1.2468, "step": 1159 }, { "epoch": 0.0785961108476184, "grad_norm": 13.95693588256836, "learning_rate": 9.944007118899309e-05, "loss": 1.234, "step": 1160 }, { "epoch": 0.07866386611559048, "grad_norm": 12.056897163391113, "learning_rate": 9.943870216989527e-05, "loss": 1.3034, "step": 1161 }, { "epoch": 0.07873162138356257, "grad_norm": 11.633442878723145, "learning_rate": 9.943733315079747e-05, "loss": 1.2744, "step": 1162 }, { "epoch": 0.07879937665153465, "grad_norm": 14.062381744384766, "learning_rate": 9.943596413169965e-05, "loss": 1.2535, "step": 1163 }, { "epoch": 0.07886713191950674, "grad_norm": 11.60498332977295, "learning_rate": 9.943459511260183e-05, "loss": 1.1578, "step": 1164 }, { "epoch": 0.07893488718747883, "grad_norm": 9.667806625366211, "learning_rate": 9.943322609350401e-05, "loss": 1.0804, "step": 1165 }, { "epoch": 0.07900264245545091, "grad_norm": 12.30827808380127, "learning_rate": 9.943185707440619e-05, "loss": 1.4167, "step": 1166 }, { "epoch": 0.079070397723423, "grad_norm": 10.196819305419922, "learning_rate": 9.943048805530838e-05, "loss": 1.0547, "step": 1167 }, { "epoch": 0.07913815299139508, "grad_norm": 10.029928207397461, "learning_rate": 9.942911903621056e-05, "loss": 1.1798, "step": 1168 }, { "epoch": 0.07920590825936717, "grad_norm": 11.782978057861328, "learning_rate": 9.942775001711274e-05, "loss": 1.1454, "step": 1169 }, { "epoch": 0.07927366352733925, "grad_norm": 12.25143814086914, "learning_rate": 9.942638099801492e-05, "loss": 1.2854, "step": 1170 }, { "epoch": 0.07934141879531134, "grad_norm": 8.47904109954834, "learning_rate": 9.942501197891712e-05, "loss": 0.9381, "step": 1171 }, { "epoch": 0.07940917406328342, "grad_norm": 13.698802947998047, "learning_rate": 9.94236429598193e-05, "loss": 1.6261, "step": 1172 }, { "epoch": 0.07947692933125551, "grad_norm": 10.885397911071777, "learning_rate": 9.942227394072148e-05, "loss": 1.0645, "step": 1173 }, { "epoch": 0.07954468459922759, "grad_norm": 13.274818420410156, "learning_rate": 9.942090492162366e-05, "loss": 1.3411, "step": 1174 }, { "epoch": 0.07961243986719968, "grad_norm": 14.061238288879395, "learning_rate": 9.941953590252584e-05, "loss": 1.1916, "step": 1175 }, { "epoch": 0.07968019513517176, "grad_norm": 10.074264526367188, "learning_rate": 9.941816688342803e-05, "loss": 1.092, "step": 1176 }, { "epoch": 0.07974795040314385, "grad_norm": 14.741287231445312, "learning_rate": 9.941679786433021e-05, "loss": 1.3774, "step": 1177 }, { "epoch": 0.07981570567111593, "grad_norm": 11.308422088623047, "learning_rate": 9.941542884523239e-05, "loss": 0.9298, "step": 1178 }, { "epoch": 0.07988346093908802, "grad_norm": 14.375280380249023, "learning_rate": 9.941405982613457e-05, "loss": 1.4525, "step": 1179 }, { "epoch": 0.0799512162070601, "grad_norm": 11.724523544311523, "learning_rate": 9.941269080703677e-05, "loss": 1.3616, "step": 1180 }, { "epoch": 0.08001897147503219, "grad_norm": 12.578176498413086, "learning_rate": 9.941132178793895e-05, "loss": 1.1513, "step": 1181 }, { "epoch": 0.08008672674300427, "grad_norm": 12.100804328918457, "learning_rate": 9.940995276884113e-05, "loss": 1.0656, "step": 1182 }, { "epoch": 0.08015448201097636, "grad_norm": 11.680248260498047, "learning_rate": 9.940858374974331e-05, "loss": 1.2358, "step": 1183 }, { "epoch": 0.08022223727894844, "grad_norm": 10.066198348999023, "learning_rate": 9.940721473064549e-05, "loss": 0.9219, "step": 1184 }, { "epoch": 0.08028999254692053, "grad_norm": 10.813334465026855, "learning_rate": 9.940584571154768e-05, "loss": 1.1456, "step": 1185 }, { "epoch": 0.0803577478148926, "grad_norm": 14.004862785339355, "learning_rate": 9.940447669244986e-05, "loss": 1.2375, "step": 1186 }, { "epoch": 0.0804255030828647, "grad_norm": 11.868766784667969, "learning_rate": 9.940310767335204e-05, "loss": 1.3027, "step": 1187 }, { "epoch": 0.08049325835083677, "grad_norm": 12.48153018951416, "learning_rate": 9.940173865425422e-05, "loss": 1.1192, "step": 1188 }, { "epoch": 0.08056101361880887, "grad_norm": 12.340612411499023, "learning_rate": 9.940036963515642e-05, "loss": 1.246, "step": 1189 }, { "epoch": 0.08062876888678094, "grad_norm": 12.205392837524414, "learning_rate": 9.93990006160586e-05, "loss": 1.1233, "step": 1190 }, { "epoch": 0.08069652415475304, "grad_norm": 12.69509220123291, "learning_rate": 9.939763159696078e-05, "loss": 1.2202, "step": 1191 }, { "epoch": 0.08076427942272511, "grad_norm": 12.40784740447998, "learning_rate": 9.939626257786296e-05, "loss": 1.1345, "step": 1192 }, { "epoch": 0.0808320346906972, "grad_norm": 11.678507804870605, "learning_rate": 9.939489355876515e-05, "loss": 1.4513, "step": 1193 }, { "epoch": 0.08089978995866928, "grad_norm": 11.649873733520508, "learning_rate": 9.939352453966733e-05, "loss": 1.3827, "step": 1194 }, { "epoch": 0.08096754522664137, "grad_norm": 12.378853797912598, "learning_rate": 9.939215552056951e-05, "loss": 1.3632, "step": 1195 }, { "epoch": 0.08103530049461345, "grad_norm": 11.023188591003418, "learning_rate": 9.93907865014717e-05, "loss": 1.3892, "step": 1196 }, { "epoch": 0.08110305576258554, "grad_norm": 13.111897468566895, "learning_rate": 9.938941748237389e-05, "loss": 1.1973, "step": 1197 }, { "epoch": 0.08117081103055762, "grad_norm": 10.171613693237305, "learning_rate": 9.938804846327607e-05, "loss": 1.3862, "step": 1198 }, { "epoch": 0.08123856629852971, "grad_norm": 13.327658653259277, "learning_rate": 9.938667944417826e-05, "loss": 1.3173, "step": 1199 }, { "epoch": 0.08130632156650179, "grad_norm": 11.715154647827148, "learning_rate": 9.938531042508044e-05, "loss": 1.0839, "step": 1200 }, { "epoch": 0.08137407683447388, "grad_norm": 11.521212577819824, "learning_rate": 9.938394140598262e-05, "loss": 1.0285, "step": 1201 }, { "epoch": 0.08144183210244596, "grad_norm": 12.024236679077148, "learning_rate": 9.93825723868848e-05, "loss": 1.3025, "step": 1202 }, { "epoch": 0.08150958737041805, "grad_norm": 10.245376586914062, "learning_rate": 9.9381203367787e-05, "loss": 1.1658, "step": 1203 }, { "epoch": 0.08157734263839013, "grad_norm": 10.731759071350098, "learning_rate": 9.937983434868918e-05, "loss": 1.0565, "step": 1204 }, { "epoch": 0.08164509790636222, "grad_norm": 16.714153289794922, "learning_rate": 9.937846532959136e-05, "loss": 1.2761, "step": 1205 }, { "epoch": 0.0817128531743343, "grad_norm": 11.172699928283691, "learning_rate": 9.937709631049354e-05, "loss": 1.1887, "step": 1206 }, { "epoch": 0.08178060844230639, "grad_norm": 11.384743690490723, "learning_rate": 9.937572729139572e-05, "loss": 0.9546, "step": 1207 }, { "epoch": 0.08184836371027847, "grad_norm": 11.163822174072266, "learning_rate": 9.937435827229791e-05, "loss": 1.1498, "step": 1208 }, { "epoch": 0.08191611897825056, "grad_norm": 10.657593727111816, "learning_rate": 9.937298925320009e-05, "loss": 0.9506, "step": 1209 }, { "epoch": 0.08198387424622264, "grad_norm": 10.6862211227417, "learning_rate": 9.937162023410227e-05, "loss": 1.2308, "step": 1210 }, { "epoch": 0.08205162951419473, "grad_norm": 10.649473190307617, "learning_rate": 9.937025121500445e-05, "loss": 1.2679, "step": 1211 }, { "epoch": 0.08211938478216681, "grad_norm": 9.298782348632812, "learning_rate": 9.936888219590665e-05, "loss": 1.1327, "step": 1212 }, { "epoch": 0.0821871400501389, "grad_norm": 11.448348045349121, "learning_rate": 9.936751317680883e-05, "loss": 1.2796, "step": 1213 }, { "epoch": 0.08225489531811098, "grad_norm": 12.141517639160156, "learning_rate": 9.936614415771101e-05, "loss": 1.1128, "step": 1214 }, { "epoch": 0.08232265058608307, "grad_norm": 11.799830436706543, "learning_rate": 9.936477513861319e-05, "loss": 1.0743, "step": 1215 }, { "epoch": 0.08239040585405515, "grad_norm": 11.952958106994629, "learning_rate": 9.936340611951537e-05, "loss": 1.1659, "step": 1216 }, { "epoch": 0.08245816112202724, "grad_norm": 11.870144844055176, "learning_rate": 9.936203710041756e-05, "loss": 1.1195, "step": 1217 }, { "epoch": 0.08252591638999933, "grad_norm": 11.970368385314941, "learning_rate": 9.936066808131974e-05, "loss": 1.2139, "step": 1218 }, { "epoch": 0.08259367165797141, "grad_norm": 11.211687088012695, "learning_rate": 9.935929906222192e-05, "loss": 1.2107, "step": 1219 }, { "epoch": 0.0826614269259435, "grad_norm": 12.987563133239746, "learning_rate": 9.93579300431241e-05, "loss": 1.2804, "step": 1220 }, { "epoch": 0.08272918219391558, "grad_norm": 12.337888717651367, "learning_rate": 9.935656102402628e-05, "loss": 0.8713, "step": 1221 }, { "epoch": 0.08279693746188767, "grad_norm": 11.717150688171387, "learning_rate": 9.935519200492848e-05, "loss": 1.2684, "step": 1222 }, { "epoch": 0.08286469272985975, "grad_norm": 12.658769607543945, "learning_rate": 9.935382298583066e-05, "loss": 1.0231, "step": 1223 }, { "epoch": 0.08293244799783184, "grad_norm": 10.526476860046387, "learning_rate": 9.935245396673284e-05, "loss": 1.0605, "step": 1224 }, { "epoch": 0.08300020326580392, "grad_norm": 10.575004577636719, "learning_rate": 9.935108494763502e-05, "loss": 1.2169, "step": 1225 }, { "epoch": 0.083067958533776, "grad_norm": 10.499407768249512, "learning_rate": 9.934971592853721e-05, "loss": 1.3064, "step": 1226 }, { "epoch": 0.08313571380174808, "grad_norm": 12.25387191772461, "learning_rate": 9.934834690943939e-05, "loss": 1.1554, "step": 1227 }, { "epoch": 0.08320346906972018, "grad_norm": 12.173775672912598, "learning_rate": 9.934697789034157e-05, "loss": 1.0917, "step": 1228 }, { "epoch": 0.08327122433769225, "grad_norm": 12.424721717834473, "learning_rate": 9.934560887124375e-05, "loss": 1.3679, "step": 1229 }, { "epoch": 0.08333897960566435, "grad_norm": 11.291987419128418, "learning_rate": 9.934423985214593e-05, "loss": 1.1439, "step": 1230 }, { "epoch": 0.08340673487363642, "grad_norm": 10.908637046813965, "learning_rate": 9.934287083304813e-05, "loss": 1.0487, "step": 1231 }, { "epoch": 0.08347449014160851, "grad_norm": 14.767544746398926, "learning_rate": 9.934150181395031e-05, "loss": 1.2951, "step": 1232 }, { "epoch": 0.08354224540958059, "grad_norm": 11.959871292114258, "learning_rate": 9.934013279485249e-05, "loss": 1.1244, "step": 1233 }, { "epoch": 0.08361000067755268, "grad_norm": 11.19450569152832, "learning_rate": 9.933876377575467e-05, "loss": 1.2802, "step": 1234 }, { "epoch": 0.08367775594552476, "grad_norm": 10.71377182006836, "learning_rate": 9.933739475665686e-05, "loss": 1.0376, "step": 1235 }, { "epoch": 0.08374551121349685, "grad_norm": 12.174454689025879, "learning_rate": 9.933602573755904e-05, "loss": 1.5779, "step": 1236 }, { "epoch": 0.08381326648146893, "grad_norm": 9.863836288452148, "learning_rate": 9.933465671846122e-05, "loss": 1.0443, "step": 1237 }, { "epoch": 0.08388102174944102, "grad_norm": 11.964838027954102, "learning_rate": 9.93332876993634e-05, "loss": 1.0613, "step": 1238 }, { "epoch": 0.0839487770174131, "grad_norm": 11.49203109741211, "learning_rate": 9.93319186802656e-05, "loss": 1.162, "step": 1239 }, { "epoch": 0.08401653228538519, "grad_norm": 12.548815727233887, "learning_rate": 9.933054966116778e-05, "loss": 1.0652, "step": 1240 }, { "epoch": 0.08408428755335727, "grad_norm": 13.37637996673584, "learning_rate": 9.932918064206996e-05, "loss": 1.2281, "step": 1241 }, { "epoch": 0.08415204282132936, "grad_norm": 12.59211254119873, "learning_rate": 9.932781162297215e-05, "loss": 1.0624, "step": 1242 }, { "epoch": 0.08421979808930144, "grad_norm": 13.386221885681152, "learning_rate": 9.932644260387433e-05, "loss": 1.4393, "step": 1243 }, { "epoch": 0.08428755335727353, "grad_norm": 12.647525787353516, "learning_rate": 9.932507358477651e-05, "loss": 1.2228, "step": 1244 }, { "epoch": 0.08435530862524561, "grad_norm": 12.039474487304688, "learning_rate": 9.93237045656787e-05, "loss": 1.1354, "step": 1245 }, { "epoch": 0.0844230638932177, "grad_norm": 11.373556137084961, "learning_rate": 9.932233554658089e-05, "loss": 1.2418, "step": 1246 }, { "epoch": 0.08449081916118978, "grad_norm": 10.944781303405762, "learning_rate": 9.932096652748307e-05, "loss": 1.1515, "step": 1247 }, { "epoch": 0.08455857442916187, "grad_norm": 12.174854278564453, "learning_rate": 9.931959750838525e-05, "loss": 1.2063, "step": 1248 }, { "epoch": 0.08462632969713395, "grad_norm": 8.846879005432129, "learning_rate": 9.931822848928744e-05, "loss": 0.9639, "step": 1249 }, { "epoch": 0.08469408496510604, "grad_norm": 13.793547630310059, "learning_rate": 9.931685947018962e-05, "loss": 1.1236, "step": 1250 }, { "epoch": 0.08476184023307812, "grad_norm": 14.486831665039062, "learning_rate": 9.93154904510918e-05, "loss": 1.2047, "step": 1251 }, { "epoch": 0.08482959550105021, "grad_norm": 13.262588500976562, "learning_rate": 9.931412143199398e-05, "loss": 1.4091, "step": 1252 }, { "epoch": 0.08489735076902229, "grad_norm": 13.289068222045898, "learning_rate": 9.931275241289616e-05, "loss": 1.5065, "step": 1253 }, { "epoch": 0.08496510603699438, "grad_norm": 10.22205638885498, "learning_rate": 9.931138339379836e-05, "loss": 1.1116, "step": 1254 }, { "epoch": 0.08503286130496646, "grad_norm": 13.141668319702148, "learning_rate": 9.931001437470054e-05, "loss": 1.4006, "step": 1255 }, { "epoch": 0.08510061657293855, "grad_norm": 11.817032814025879, "learning_rate": 9.930864535560272e-05, "loss": 1.2062, "step": 1256 }, { "epoch": 0.08516837184091063, "grad_norm": 10.814498901367188, "learning_rate": 9.93072763365049e-05, "loss": 1.138, "step": 1257 }, { "epoch": 0.08523612710888272, "grad_norm": 12.598155975341797, "learning_rate": 9.930590731740709e-05, "loss": 1.1959, "step": 1258 }, { "epoch": 0.0853038823768548, "grad_norm": 11.909974098205566, "learning_rate": 9.930453829830927e-05, "loss": 1.2891, "step": 1259 }, { "epoch": 0.08537163764482689, "grad_norm": 12.663064956665039, "learning_rate": 9.930316927921145e-05, "loss": 1.3017, "step": 1260 }, { "epoch": 0.08543939291279896, "grad_norm": 10.810627937316895, "learning_rate": 9.930180026011363e-05, "loss": 1.16, "step": 1261 }, { "epoch": 0.08550714818077106, "grad_norm": 9.182926177978516, "learning_rate": 9.930043124101581e-05, "loss": 1.2625, "step": 1262 }, { "epoch": 0.08557490344874313, "grad_norm": 12.73978042602539, "learning_rate": 9.9299062221918e-05, "loss": 0.9904, "step": 1263 }, { "epoch": 0.08564265871671523, "grad_norm": 10.346587181091309, "learning_rate": 9.929769320282019e-05, "loss": 1.0167, "step": 1264 }, { "epoch": 0.0857104139846873, "grad_norm": 12.145682334899902, "learning_rate": 9.929632418372237e-05, "loss": 1.4282, "step": 1265 }, { "epoch": 0.0857781692526594, "grad_norm": 11.515445709228516, "learning_rate": 9.929495516462455e-05, "loss": 1.0527, "step": 1266 }, { "epoch": 0.08584592452063147, "grad_norm": 10.979050636291504, "learning_rate": 9.929358614552674e-05, "loss": 1.2888, "step": 1267 }, { "epoch": 0.08591367978860356, "grad_norm": 10.248215675354004, "learning_rate": 9.929221712642892e-05, "loss": 1.4316, "step": 1268 }, { "epoch": 0.08598143505657564, "grad_norm": 13.743851661682129, "learning_rate": 9.92908481073311e-05, "loss": 1.5047, "step": 1269 }, { "epoch": 0.08604919032454773, "grad_norm": 9.919225692749023, "learning_rate": 9.928947908823328e-05, "loss": 0.9602, "step": 1270 }, { "epoch": 0.08611694559251983, "grad_norm": 9.795915603637695, "learning_rate": 9.928811006913546e-05, "loss": 1.3736, "step": 1271 }, { "epoch": 0.0861847008604919, "grad_norm": 13.108200073242188, "learning_rate": 9.928674105003766e-05, "loss": 1.2591, "step": 1272 }, { "epoch": 0.086252456128464, "grad_norm": 13.108073234558105, "learning_rate": 9.928537203093984e-05, "loss": 1.102, "step": 1273 }, { "epoch": 0.08632021139643607, "grad_norm": 15.177817344665527, "learning_rate": 9.928400301184202e-05, "loss": 1.3164, "step": 1274 }, { "epoch": 0.08638796666440816, "grad_norm": 13.048440933227539, "learning_rate": 9.92826339927442e-05, "loss": 1.3029, "step": 1275 }, { "epoch": 0.08645572193238024, "grad_norm": 10.982895851135254, "learning_rate": 9.928126497364638e-05, "loss": 1.1829, "step": 1276 }, { "epoch": 0.08652347720035233, "grad_norm": 10.901629447937012, "learning_rate": 9.927989595454857e-05, "loss": 1.2116, "step": 1277 }, { "epoch": 0.08659123246832441, "grad_norm": 12.924722671508789, "learning_rate": 9.927852693545075e-05, "loss": 1.6292, "step": 1278 }, { "epoch": 0.0866589877362965, "grad_norm": 12.572770118713379, "learning_rate": 9.927715791635293e-05, "loss": 1.1012, "step": 1279 }, { "epoch": 0.08672674300426858, "grad_norm": 12.803020477294922, "learning_rate": 9.927578889725511e-05, "loss": 1.276, "step": 1280 }, { "epoch": 0.08679449827224067, "grad_norm": 10.92810344696045, "learning_rate": 9.92744198781573e-05, "loss": 1.1876, "step": 1281 }, { "epoch": 0.08686225354021275, "grad_norm": 12.253180503845215, "learning_rate": 9.927305085905949e-05, "loss": 1.1807, "step": 1282 }, { "epoch": 0.08693000880818484, "grad_norm": 11.163126945495605, "learning_rate": 9.927168183996167e-05, "loss": 1.1671, "step": 1283 }, { "epoch": 0.08699776407615692, "grad_norm": 10.726607322692871, "learning_rate": 9.927031282086385e-05, "loss": 1.185, "step": 1284 }, { "epoch": 0.08706551934412901, "grad_norm": 13.265491485595703, "learning_rate": 9.926894380176603e-05, "loss": 1.2916, "step": 1285 }, { "epoch": 0.08713327461210109, "grad_norm": 14.559592247009277, "learning_rate": 9.926757478266822e-05, "loss": 1.288, "step": 1286 }, { "epoch": 0.08720102988007318, "grad_norm": 14.816813468933105, "learning_rate": 9.92662057635704e-05, "loss": 1.4397, "step": 1287 }, { "epoch": 0.08726878514804526, "grad_norm": 11.811420440673828, "learning_rate": 9.926483674447258e-05, "loss": 1.348, "step": 1288 }, { "epoch": 0.08733654041601735, "grad_norm": 10.920133590698242, "learning_rate": 9.926346772537478e-05, "loss": 1.4169, "step": 1289 }, { "epoch": 0.08740429568398943, "grad_norm": 11.690089225769043, "learning_rate": 9.926209870627696e-05, "loss": 1.1199, "step": 1290 }, { "epoch": 0.08747205095196152, "grad_norm": 9.411031723022461, "learning_rate": 9.926072968717914e-05, "loss": 1.031, "step": 1291 }, { "epoch": 0.0875398062199336, "grad_norm": 12.174457550048828, "learning_rate": 9.925936066808133e-05, "loss": 1.0622, "step": 1292 }, { "epoch": 0.08760756148790569, "grad_norm": 10.346089363098145, "learning_rate": 9.925799164898351e-05, "loss": 1.2777, "step": 1293 }, { "epoch": 0.08767531675587777, "grad_norm": 12.534863471984863, "learning_rate": 9.925662262988569e-05, "loss": 1.0093, "step": 1294 }, { "epoch": 0.08774307202384986, "grad_norm": 12.050302505493164, "learning_rate": 9.925525361078789e-05, "loss": 1.0591, "step": 1295 }, { "epoch": 0.08781082729182194, "grad_norm": 11.556166648864746, "learning_rate": 9.925388459169007e-05, "loss": 1.1452, "step": 1296 }, { "epoch": 0.08787858255979403, "grad_norm": 9.693270683288574, "learning_rate": 9.925251557259225e-05, "loss": 1.0332, "step": 1297 }, { "epoch": 0.0879463378277661, "grad_norm": 12.646526336669922, "learning_rate": 9.925114655349443e-05, "loss": 1.1894, "step": 1298 }, { "epoch": 0.0880140930957382, "grad_norm": 10.676809310913086, "learning_rate": 9.924977753439661e-05, "loss": 1.1727, "step": 1299 }, { "epoch": 0.08808184836371027, "grad_norm": 11.182327270507812, "learning_rate": 9.92484085152988e-05, "loss": 1.0145, "step": 1300 }, { "epoch": 0.08814960363168237, "grad_norm": 11.55026626586914, "learning_rate": 9.924703949620098e-05, "loss": 1.2187, "step": 1301 }, { "epoch": 0.08821735889965444, "grad_norm": 11.502679824829102, "learning_rate": 9.924567047710316e-05, "loss": 1.3116, "step": 1302 }, { "epoch": 0.08828511416762654, "grad_norm": 9.676247596740723, "learning_rate": 9.924430145800534e-05, "loss": 1.2869, "step": 1303 }, { "epoch": 0.08835286943559861, "grad_norm": 15.123950004577637, "learning_rate": 9.924293243890754e-05, "loss": 1.0225, "step": 1304 }, { "epoch": 0.0884206247035707, "grad_norm": 14.030994415283203, "learning_rate": 9.924156341980972e-05, "loss": 0.9829, "step": 1305 }, { "epoch": 0.08848837997154278, "grad_norm": 10.00402545928955, "learning_rate": 9.92401944007119e-05, "loss": 1.0946, "step": 1306 }, { "epoch": 0.08855613523951487, "grad_norm": 9.077853202819824, "learning_rate": 9.923882538161408e-05, "loss": 0.852, "step": 1307 }, { "epoch": 0.08862389050748695, "grad_norm": 12.777885437011719, "learning_rate": 9.923745636251626e-05, "loss": 1.0513, "step": 1308 }, { "epoch": 0.08869164577545904, "grad_norm": 10.686469078063965, "learning_rate": 9.923608734341845e-05, "loss": 1.0873, "step": 1309 }, { "epoch": 0.08875940104343112, "grad_norm": 11.51689338684082, "learning_rate": 9.923471832432063e-05, "loss": 1.0037, "step": 1310 }, { "epoch": 0.08882715631140321, "grad_norm": 13.259784698486328, "learning_rate": 9.923334930522281e-05, "loss": 1.2004, "step": 1311 }, { "epoch": 0.08889491157937529, "grad_norm": 10.63463306427002, "learning_rate": 9.923198028612499e-05, "loss": 1.2067, "step": 1312 }, { "epoch": 0.08896266684734738, "grad_norm": 10.838210105895996, "learning_rate": 9.923061126702719e-05, "loss": 1.2939, "step": 1313 }, { "epoch": 0.08903042211531946, "grad_norm": 12.058418273925781, "learning_rate": 9.922924224792937e-05, "loss": 1.3091, "step": 1314 }, { "epoch": 0.08909817738329155, "grad_norm": 10.619451522827148, "learning_rate": 9.922787322883155e-05, "loss": 1.1635, "step": 1315 }, { "epoch": 0.08916593265126363, "grad_norm": 9.37607192993164, "learning_rate": 9.922650420973373e-05, "loss": 1.2206, "step": 1316 }, { "epoch": 0.08923368791923572, "grad_norm": 12.119776725769043, "learning_rate": 9.922513519063591e-05, "loss": 1.2159, "step": 1317 }, { "epoch": 0.0893014431872078, "grad_norm": 14.013461112976074, "learning_rate": 9.92237661715381e-05, "loss": 1.1445, "step": 1318 }, { "epoch": 0.08936919845517989, "grad_norm": 11.560707092285156, "learning_rate": 9.922239715244028e-05, "loss": 1.3617, "step": 1319 }, { "epoch": 0.08943695372315197, "grad_norm": 11.817791938781738, "learning_rate": 9.922102813334246e-05, "loss": 1.1931, "step": 1320 }, { "epoch": 0.08950470899112406, "grad_norm": 11.544127464294434, "learning_rate": 9.921965911424464e-05, "loss": 1.0766, "step": 1321 }, { "epoch": 0.08957246425909614, "grad_norm": 10.67740249633789, "learning_rate": 9.921829009514684e-05, "loss": 1.2103, "step": 1322 }, { "epoch": 0.08964021952706823, "grad_norm": 11.774645805358887, "learning_rate": 9.921692107604902e-05, "loss": 1.1921, "step": 1323 }, { "epoch": 0.08970797479504032, "grad_norm": 12.594759941101074, "learning_rate": 9.92155520569512e-05, "loss": 0.906, "step": 1324 }, { "epoch": 0.0897757300630124, "grad_norm": 10.988224983215332, "learning_rate": 9.921418303785338e-05, "loss": 1.4195, "step": 1325 }, { "epoch": 0.08984348533098449, "grad_norm": 12.22718620300293, "learning_rate": 9.921281401875556e-05, "loss": 1.2461, "step": 1326 }, { "epoch": 0.08991124059895657, "grad_norm": 10.964727401733398, "learning_rate": 9.921144499965775e-05, "loss": 1.1254, "step": 1327 }, { "epoch": 0.08997899586692866, "grad_norm": 11.327523231506348, "learning_rate": 9.921007598055993e-05, "loss": 1.145, "step": 1328 }, { "epoch": 0.09004675113490074, "grad_norm": 9.870691299438477, "learning_rate": 9.920870696146211e-05, "loss": 1.2408, "step": 1329 }, { "epoch": 0.09011450640287283, "grad_norm": 11.02373218536377, "learning_rate": 9.920733794236429e-05, "loss": 1.1084, "step": 1330 }, { "epoch": 0.09018226167084491, "grad_norm": 12.300410270690918, "learning_rate": 9.920596892326647e-05, "loss": 1.1542, "step": 1331 }, { "epoch": 0.090250016938817, "grad_norm": 9.832919120788574, "learning_rate": 9.920459990416867e-05, "loss": 1.1195, "step": 1332 }, { "epoch": 0.09031777220678908, "grad_norm": 10.432522773742676, "learning_rate": 9.920323088507085e-05, "loss": 1.1825, "step": 1333 }, { "epoch": 0.09038552747476117, "grad_norm": 11.878792762756348, "learning_rate": 9.920186186597303e-05, "loss": 0.9647, "step": 1334 }, { "epoch": 0.09045328274273325, "grad_norm": 11.866320610046387, "learning_rate": 9.920049284687522e-05, "loss": 1.3598, "step": 1335 }, { "epoch": 0.09052103801070534, "grad_norm": 14.11543083190918, "learning_rate": 9.91991238277774e-05, "loss": 1.4172, "step": 1336 }, { "epoch": 0.09058879327867742, "grad_norm": 13.841622352600098, "learning_rate": 9.919775480867958e-05, "loss": 1.4119, "step": 1337 }, { "epoch": 0.0906565485466495, "grad_norm": 11.077167510986328, "learning_rate": 9.919638578958178e-05, "loss": 0.984, "step": 1338 }, { "epoch": 0.09072430381462158, "grad_norm": 10.904266357421875, "learning_rate": 9.919501677048396e-05, "loss": 1.0439, "step": 1339 }, { "epoch": 0.09079205908259368, "grad_norm": 11.623948097229004, "learning_rate": 9.919364775138614e-05, "loss": 1.0318, "step": 1340 }, { "epoch": 0.09085981435056575, "grad_norm": 10.893725395202637, "learning_rate": 9.919227873228833e-05, "loss": 1.2626, "step": 1341 }, { "epoch": 0.09092756961853785, "grad_norm": 10.064491271972656, "learning_rate": 9.919090971319051e-05, "loss": 1.1483, "step": 1342 }, { "epoch": 0.09099532488650992, "grad_norm": 9.854101181030273, "learning_rate": 9.918954069409269e-05, "loss": 1.1595, "step": 1343 }, { "epoch": 0.09106308015448202, "grad_norm": 11.682498931884766, "learning_rate": 9.918817167499487e-05, "loss": 1.2887, "step": 1344 }, { "epoch": 0.09113083542245409, "grad_norm": 10.484097480773926, "learning_rate": 9.918680265589707e-05, "loss": 1.2, "step": 1345 }, { "epoch": 0.09119859069042618, "grad_norm": 12.332358360290527, "learning_rate": 9.918543363679925e-05, "loss": 1.262, "step": 1346 }, { "epoch": 0.09126634595839826, "grad_norm": 13.706925392150879, "learning_rate": 9.918406461770143e-05, "loss": 1.2151, "step": 1347 }, { "epoch": 0.09133410122637035, "grad_norm": 13.918478965759277, "learning_rate": 9.91826955986036e-05, "loss": 1.4653, "step": 1348 }, { "epoch": 0.09140185649434243, "grad_norm": 11.970015525817871, "learning_rate": 9.918132657950579e-05, "loss": 1.2452, "step": 1349 }, { "epoch": 0.09146961176231452, "grad_norm": 9.698074340820312, "learning_rate": 9.917995756040798e-05, "loss": 0.9142, "step": 1350 }, { "epoch": 0.0915373670302866, "grad_norm": 9.225728988647461, "learning_rate": 9.917858854131016e-05, "loss": 1.0293, "step": 1351 }, { "epoch": 0.09160512229825869, "grad_norm": 10.77661418914795, "learning_rate": 9.917721952221234e-05, "loss": 1.1865, "step": 1352 }, { "epoch": 0.09167287756623077, "grad_norm": 10.45409870147705, "learning_rate": 9.917585050311452e-05, "loss": 1.1553, "step": 1353 }, { "epoch": 0.09174063283420286, "grad_norm": 10.44918441772461, "learning_rate": 9.91744814840167e-05, "loss": 1.1621, "step": 1354 }, { "epoch": 0.09180838810217494, "grad_norm": 14.769590377807617, "learning_rate": 9.91731124649189e-05, "loss": 1.3502, "step": 1355 }, { "epoch": 0.09187614337014703, "grad_norm": 12.733844757080078, "learning_rate": 9.917174344582108e-05, "loss": 1.1973, "step": 1356 }, { "epoch": 0.09194389863811911, "grad_norm": 13.365818977355957, "learning_rate": 9.917037442672326e-05, "loss": 1.5412, "step": 1357 }, { "epoch": 0.0920116539060912, "grad_norm": 11.163050651550293, "learning_rate": 9.916900540762544e-05, "loss": 1.3142, "step": 1358 }, { "epoch": 0.09207940917406328, "grad_norm": 11.420190811157227, "learning_rate": 9.916763638852763e-05, "loss": 1.2257, "step": 1359 }, { "epoch": 0.09214716444203537, "grad_norm": 9.66398811340332, "learning_rate": 9.916626736942981e-05, "loss": 1.0584, "step": 1360 }, { "epoch": 0.09221491971000745, "grad_norm": 13.643363952636719, "learning_rate": 9.916489835033199e-05, "loss": 1.3783, "step": 1361 }, { "epoch": 0.09228267497797954, "grad_norm": 11.658889770507812, "learning_rate": 9.916352933123417e-05, "loss": 1.2126, "step": 1362 }, { "epoch": 0.09235043024595162, "grad_norm": 11.2728271484375, "learning_rate": 9.916216031213635e-05, "loss": 1.2117, "step": 1363 }, { "epoch": 0.09241818551392371, "grad_norm": 13.576864242553711, "learning_rate": 9.916079129303855e-05, "loss": 1.2922, "step": 1364 }, { "epoch": 0.09248594078189579, "grad_norm": 10.950700759887695, "learning_rate": 9.915942227394073e-05, "loss": 1.1394, "step": 1365 }, { "epoch": 0.09255369604986788, "grad_norm": 11.638351440429688, "learning_rate": 9.91580532548429e-05, "loss": 1.3927, "step": 1366 }, { "epoch": 0.09262145131783996, "grad_norm": 12.355545043945312, "learning_rate": 9.915668423574509e-05, "loss": 1.1861, "step": 1367 }, { "epoch": 0.09268920658581205, "grad_norm": 11.543237686157227, "learning_rate": 9.915531521664728e-05, "loss": 0.9868, "step": 1368 }, { "epoch": 0.09275696185378413, "grad_norm": 11.684252738952637, "learning_rate": 9.915394619754946e-05, "loss": 1.3332, "step": 1369 }, { "epoch": 0.09282471712175622, "grad_norm": 10.775650024414062, "learning_rate": 9.915257717845164e-05, "loss": 1.1164, "step": 1370 }, { "epoch": 0.0928924723897283, "grad_norm": 11.649751663208008, "learning_rate": 9.915120815935382e-05, "loss": 1.0369, "step": 1371 }, { "epoch": 0.09296022765770039, "grad_norm": 9.741403579711914, "learning_rate": 9.9149839140256e-05, "loss": 1.0509, "step": 1372 }, { "epoch": 0.09302798292567246, "grad_norm": 13.804118156433105, "learning_rate": 9.91484701211582e-05, "loss": 1.4318, "step": 1373 }, { "epoch": 0.09309573819364456, "grad_norm": 10.939459800720215, "learning_rate": 9.914710110206038e-05, "loss": 1.218, "step": 1374 }, { "epoch": 0.09316349346161663, "grad_norm": 14.076252937316895, "learning_rate": 9.914573208296256e-05, "loss": 1.4699, "step": 1375 }, { "epoch": 0.09323124872958873, "grad_norm": 12.90072250366211, "learning_rate": 9.914436306386474e-05, "loss": 1.3418, "step": 1376 }, { "epoch": 0.09329900399756082, "grad_norm": 12.7711820602417, "learning_rate": 9.914299404476693e-05, "loss": 0.9866, "step": 1377 }, { "epoch": 0.0933667592655329, "grad_norm": 9.205671310424805, "learning_rate": 9.914162502566911e-05, "loss": 1.1345, "step": 1378 }, { "epoch": 0.09343451453350499, "grad_norm": 11.027194023132324, "learning_rate": 9.914025600657129e-05, "loss": 1.2637, "step": 1379 }, { "epoch": 0.09350226980147706, "grad_norm": 12.861044883728027, "learning_rate": 9.913888698747347e-05, "loss": 1.2348, "step": 1380 }, { "epoch": 0.09357002506944916, "grad_norm": 11.266969680786133, "learning_rate": 9.913751796837567e-05, "loss": 1.0773, "step": 1381 }, { "epoch": 0.09363778033742123, "grad_norm": 13.137110710144043, "learning_rate": 9.913614894927785e-05, "loss": 1.0537, "step": 1382 }, { "epoch": 0.09370553560539333, "grad_norm": 11.343362808227539, "learning_rate": 9.913477993018003e-05, "loss": 1.335, "step": 1383 }, { "epoch": 0.0937732908733654, "grad_norm": 11.472663879394531, "learning_rate": 9.913341091108222e-05, "loss": 1.1362, "step": 1384 }, { "epoch": 0.0938410461413375, "grad_norm": 8.441573143005371, "learning_rate": 9.91320418919844e-05, "loss": 1.1646, "step": 1385 }, { "epoch": 0.09390880140930957, "grad_norm": 12.570130348205566, "learning_rate": 9.913067287288658e-05, "loss": 1.3535, "step": 1386 }, { "epoch": 0.09397655667728166, "grad_norm": 11.671664237976074, "learning_rate": 9.912930385378877e-05, "loss": 1.1707, "step": 1387 }, { "epoch": 0.09404431194525374, "grad_norm": 12.638328552246094, "learning_rate": 9.912793483469096e-05, "loss": 1.057, "step": 1388 }, { "epoch": 0.09411206721322583, "grad_norm": 10.506028175354004, "learning_rate": 9.912656581559314e-05, "loss": 0.9279, "step": 1389 }, { "epoch": 0.09417982248119791, "grad_norm": 11.536858558654785, "learning_rate": 9.912519679649532e-05, "loss": 1.3351, "step": 1390 }, { "epoch": 0.09424757774917, "grad_norm": 12.692436218261719, "learning_rate": 9.912382777739751e-05, "loss": 1.164, "step": 1391 }, { "epoch": 0.09431533301714208, "grad_norm": 12.088066101074219, "learning_rate": 9.912245875829969e-05, "loss": 1.2971, "step": 1392 }, { "epoch": 0.09438308828511417, "grad_norm": 12.133123397827148, "learning_rate": 9.912108973920187e-05, "loss": 1.2742, "step": 1393 }, { "epoch": 0.09445084355308625, "grad_norm": 10.168001174926758, "learning_rate": 9.911972072010405e-05, "loss": 1.0964, "step": 1394 }, { "epoch": 0.09451859882105834, "grad_norm": 10.561311721801758, "learning_rate": 9.911835170100623e-05, "loss": 1.1828, "step": 1395 }, { "epoch": 0.09458635408903042, "grad_norm": 11.497330665588379, "learning_rate": 9.911698268190843e-05, "loss": 1.4699, "step": 1396 }, { "epoch": 0.09465410935700251, "grad_norm": 12.190573692321777, "learning_rate": 9.91156136628106e-05, "loss": 1.1601, "step": 1397 }, { "epoch": 0.09472186462497459, "grad_norm": 10.633028030395508, "learning_rate": 9.911424464371279e-05, "loss": 1.333, "step": 1398 }, { "epoch": 0.09478961989294668, "grad_norm": 12.262279510498047, "learning_rate": 9.911287562461497e-05, "loss": 1.2214, "step": 1399 }, { "epoch": 0.09485737516091876, "grad_norm": 11.506840705871582, "learning_rate": 9.911150660551716e-05, "loss": 1.1172, "step": 1400 }, { "epoch": 0.09492513042889085, "grad_norm": 11.453936576843262, "learning_rate": 9.911013758641934e-05, "loss": 1.2205, "step": 1401 }, { "epoch": 0.09499288569686293, "grad_norm": 9.980772972106934, "learning_rate": 9.910876856732152e-05, "loss": 1.1004, "step": 1402 }, { "epoch": 0.09506064096483502, "grad_norm": 11.775416374206543, "learning_rate": 9.91073995482237e-05, "loss": 1.1587, "step": 1403 }, { "epoch": 0.0951283962328071, "grad_norm": 8.840147972106934, "learning_rate": 9.910603052912588e-05, "loss": 0.8702, "step": 1404 }, { "epoch": 0.09519615150077919, "grad_norm": 10.938506126403809, "learning_rate": 9.910466151002808e-05, "loss": 1.085, "step": 1405 }, { "epoch": 0.09526390676875127, "grad_norm": 11.733402252197266, "learning_rate": 9.910329249093026e-05, "loss": 1.1202, "step": 1406 }, { "epoch": 0.09533166203672336, "grad_norm": 11.616521835327148, "learning_rate": 9.910192347183244e-05, "loss": 1.3483, "step": 1407 }, { "epoch": 0.09539941730469544, "grad_norm": 12.477338790893555, "learning_rate": 9.910055445273462e-05, "loss": 1.4798, "step": 1408 }, { "epoch": 0.09546717257266753, "grad_norm": 11.233193397521973, "learning_rate": 9.90991854336368e-05, "loss": 1.0304, "step": 1409 }, { "epoch": 0.0955349278406396, "grad_norm": 12.586124420166016, "learning_rate": 9.909781641453899e-05, "loss": 1.2216, "step": 1410 }, { "epoch": 0.0956026831086117, "grad_norm": 12.974738121032715, "learning_rate": 9.909644739544117e-05, "loss": 1.1495, "step": 1411 }, { "epoch": 0.09567043837658377, "grad_norm": 9.613628387451172, "learning_rate": 9.909507837634335e-05, "loss": 0.8326, "step": 1412 }, { "epoch": 0.09573819364455587, "grad_norm": 10.644312858581543, "learning_rate": 9.909370935724553e-05, "loss": 1.0182, "step": 1413 }, { "epoch": 0.09580594891252794, "grad_norm": 11.155874252319336, "learning_rate": 9.909234033814773e-05, "loss": 1.2887, "step": 1414 }, { "epoch": 0.09587370418050004, "grad_norm": 12.068909645080566, "learning_rate": 9.90909713190499e-05, "loss": 1.1697, "step": 1415 }, { "epoch": 0.09594145944847211, "grad_norm": 10.66831111907959, "learning_rate": 9.908960229995209e-05, "loss": 1.1275, "step": 1416 }, { "epoch": 0.0960092147164442, "grad_norm": 11.80036449432373, "learning_rate": 9.908823328085427e-05, "loss": 1.38, "step": 1417 }, { "epoch": 0.09607696998441628, "grad_norm": 11.677534103393555, "learning_rate": 9.908686426175645e-05, "loss": 1.075, "step": 1418 }, { "epoch": 0.09614472525238837, "grad_norm": 10.54027271270752, "learning_rate": 9.908549524265864e-05, "loss": 0.9617, "step": 1419 }, { "epoch": 0.09621248052036045, "grad_norm": 9.70718002319336, "learning_rate": 9.908412622356082e-05, "loss": 1.0395, "step": 1420 }, { "epoch": 0.09628023578833254, "grad_norm": 10.439559936523438, "learning_rate": 9.9082757204463e-05, "loss": 1.4112, "step": 1421 }, { "epoch": 0.09634799105630462, "grad_norm": 9.328675270080566, "learning_rate": 9.908138818536518e-05, "loss": 1.0481, "step": 1422 }, { "epoch": 0.09641574632427671, "grad_norm": 12.834508895874023, "learning_rate": 9.908001916626738e-05, "loss": 1.0863, "step": 1423 }, { "epoch": 0.09648350159224879, "grad_norm": 11.885201454162598, "learning_rate": 9.907865014716956e-05, "loss": 1.3509, "step": 1424 }, { "epoch": 0.09655125686022088, "grad_norm": 11.299174308776855, "learning_rate": 9.907728112807174e-05, "loss": 1.1328, "step": 1425 }, { "epoch": 0.09661901212819296, "grad_norm": 13.024226188659668, "learning_rate": 9.907591210897392e-05, "loss": 1.1495, "step": 1426 }, { "epoch": 0.09668676739616505, "grad_norm": 13.418682098388672, "learning_rate": 9.907454308987611e-05, "loss": 1.3827, "step": 1427 }, { "epoch": 0.09675452266413713, "grad_norm": 11.28375244140625, "learning_rate": 9.907317407077829e-05, "loss": 1.3658, "step": 1428 }, { "epoch": 0.09682227793210922, "grad_norm": 9.711199760437012, "learning_rate": 9.907180505168047e-05, "loss": 0.8743, "step": 1429 }, { "epoch": 0.09689003320008131, "grad_norm": 12.292948722839355, "learning_rate": 9.907043603258267e-05, "loss": 1.203, "step": 1430 }, { "epoch": 0.09695778846805339, "grad_norm": 13.195072174072266, "learning_rate": 9.906906701348485e-05, "loss": 1.0403, "step": 1431 }, { "epoch": 0.09702554373602548, "grad_norm": 11.45721435546875, "learning_rate": 9.906769799438703e-05, "loss": 1.2075, "step": 1432 }, { "epoch": 0.09709329900399756, "grad_norm": 10.477989196777344, "learning_rate": 9.906632897528922e-05, "loss": 1.1646, "step": 1433 }, { "epoch": 0.09716105427196965, "grad_norm": 12.572269439697266, "learning_rate": 9.90649599561914e-05, "loss": 1.3944, "step": 1434 }, { "epoch": 0.09722880953994173, "grad_norm": 9.37205982208252, "learning_rate": 9.906359093709358e-05, "loss": 0.9902, "step": 1435 }, { "epoch": 0.09729656480791382, "grad_norm": 11.590779304504395, "learning_rate": 9.906222191799576e-05, "loss": 1.2658, "step": 1436 }, { "epoch": 0.0973643200758859, "grad_norm": 10.35207748413086, "learning_rate": 9.906085289889795e-05, "loss": 0.7905, "step": 1437 }, { "epoch": 0.09743207534385799, "grad_norm": 9.993937492370605, "learning_rate": 9.905948387980013e-05, "loss": 1.1522, "step": 1438 }, { "epoch": 0.09749983061183007, "grad_norm": 9.865569114685059, "learning_rate": 9.905811486070232e-05, "loss": 1.2536, "step": 1439 }, { "epoch": 0.09756758587980216, "grad_norm": 12.836588859558105, "learning_rate": 9.90567458416045e-05, "loss": 1.2216, "step": 1440 }, { "epoch": 0.09763534114777424, "grad_norm": 10.062298774719238, "learning_rate": 9.905537682250668e-05, "loss": 1.1576, "step": 1441 }, { "epoch": 0.09770309641574633, "grad_norm": 10.897071838378906, "learning_rate": 9.905400780340887e-05, "loss": 1.1315, "step": 1442 }, { "epoch": 0.09777085168371841, "grad_norm": 10.366122245788574, "learning_rate": 9.905263878431105e-05, "loss": 1.274, "step": 1443 }, { "epoch": 0.0978386069516905, "grad_norm": 11.632966995239258, "learning_rate": 9.905126976521323e-05, "loss": 1.1427, "step": 1444 }, { "epoch": 0.09790636221966258, "grad_norm": 10.537737846374512, "learning_rate": 9.904990074611541e-05, "loss": 1.0913, "step": 1445 }, { "epoch": 0.09797411748763467, "grad_norm": 9.52363109588623, "learning_rate": 9.90485317270176e-05, "loss": 0.8677, "step": 1446 }, { "epoch": 0.09804187275560675, "grad_norm": 11.511491775512695, "learning_rate": 9.904716270791979e-05, "loss": 1.0381, "step": 1447 }, { "epoch": 0.09810962802357884, "grad_norm": 12.085793495178223, "learning_rate": 9.904579368882197e-05, "loss": 1.1415, "step": 1448 }, { "epoch": 0.09817738329155092, "grad_norm": 8.665430068969727, "learning_rate": 9.904442466972415e-05, "loss": 1.0367, "step": 1449 }, { "epoch": 0.09824513855952301, "grad_norm": 10.900618553161621, "learning_rate": 9.904305565062633e-05, "loss": 0.9835, "step": 1450 }, { "epoch": 0.09831289382749508, "grad_norm": 10.3113431930542, "learning_rate": 9.904168663152852e-05, "loss": 0.924, "step": 1451 }, { "epoch": 0.09838064909546718, "grad_norm": 10.001591682434082, "learning_rate": 9.90403176124307e-05, "loss": 1.0835, "step": 1452 }, { "epoch": 0.09844840436343925, "grad_norm": 11.333273887634277, "learning_rate": 9.903894859333288e-05, "loss": 1.072, "step": 1453 }, { "epoch": 0.09851615963141135, "grad_norm": 10.107904434204102, "learning_rate": 9.903757957423506e-05, "loss": 1.0848, "step": 1454 }, { "epoch": 0.09858391489938342, "grad_norm": 12.578730583190918, "learning_rate": 9.903621055513725e-05, "loss": 1.2735, "step": 1455 }, { "epoch": 0.09865167016735552, "grad_norm": 10.453478813171387, "learning_rate": 9.903484153603944e-05, "loss": 1.3141, "step": 1456 }, { "epoch": 0.09871942543532759, "grad_norm": 10.383566856384277, "learning_rate": 9.903347251694162e-05, "loss": 1.0992, "step": 1457 }, { "epoch": 0.09878718070329968, "grad_norm": 9.612902641296387, "learning_rate": 9.90321034978438e-05, "loss": 1.3103, "step": 1458 }, { "epoch": 0.09885493597127176, "grad_norm": 12.111359596252441, "learning_rate": 9.903073447874598e-05, "loss": 1.3102, "step": 1459 }, { "epoch": 0.09892269123924385, "grad_norm": 9.987195014953613, "learning_rate": 9.902936545964817e-05, "loss": 1.1961, "step": 1460 }, { "epoch": 0.09899044650721593, "grad_norm": 10.900408744812012, "learning_rate": 9.902799644055035e-05, "loss": 1.0208, "step": 1461 }, { "epoch": 0.09905820177518802, "grad_norm": 9.94915771484375, "learning_rate": 9.902662742145253e-05, "loss": 1.3347, "step": 1462 }, { "epoch": 0.0991259570431601, "grad_norm": 13.393661499023438, "learning_rate": 9.902525840235471e-05, "loss": 1.2903, "step": 1463 }, { "epoch": 0.09919371231113219, "grad_norm": 10.122967720031738, "learning_rate": 9.902388938325689e-05, "loss": 1.0229, "step": 1464 }, { "epoch": 0.09926146757910427, "grad_norm": 10.775031089782715, "learning_rate": 9.902252036415909e-05, "loss": 1.1669, "step": 1465 }, { "epoch": 0.09932922284707636, "grad_norm": 9.733497619628906, "learning_rate": 9.902115134506127e-05, "loss": 1.0903, "step": 1466 }, { "epoch": 0.09939697811504844, "grad_norm": 9.230277061462402, "learning_rate": 9.901978232596345e-05, "loss": 1.1738, "step": 1467 }, { "epoch": 0.09946473338302053, "grad_norm": 10.822884559631348, "learning_rate": 9.901841330686563e-05, "loss": 1.3392, "step": 1468 }, { "epoch": 0.09953248865099261, "grad_norm": 10.64195442199707, "learning_rate": 9.901704428776782e-05, "loss": 1.0823, "step": 1469 }, { "epoch": 0.0996002439189647, "grad_norm": 13.73645305633545, "learning_rate": 9.901567526867e-05, "loss": 1.1128, "step": 1470 }, { "epoch": 0.09966799918693678, "grad_norm": 11.361958503723145, "learning_rate": 9.901430624957218e-05, "loss": 1.083, "step": 1471 }, { "epoch": 0.09973575445490887, "grad_norm": 10.839045524597168, "learning_rate": 9.901293723047436e-05, "loss": 1.0288, "step": 1472 }, { "epoch": 0.09980350972288095, "grad_norm": 10.41995906829834, "learning_rate": 9.901156821137656e-05, "loss": 1.0631, "step": 1473 }, { "epoch": 0.09987126499085304, "grad_norm": 11.87709903717041, "learning_rate": 9.901019919227874e-05, "loss": 1.0715, "step": 1474 }, { "epoch": 0.09993902025882512, "grad_norm": 10.46670913696289, "learning_rate": 9.900883017318092e-05, "loss": 1.1684, "step": 1475 }, { "epoch": 0.10000677552679721, "grad_norm": 12.163457870483398, "learning_rate": 9.900746115408311e-05, "loss": 1.4416, "step": 1476 }, { "epoch": 0.10007453079476929, "grad_norm": 13.417581558227539, "learning_rate": 9.900609213498529e-05, "loss": 1.1876, "step": 1477 }, { "epoch": 0.10014228606274138, "grad_norm": 11.35722541809082, "learning_rate": 9.900472311588747e-05, "loss": 1.1389, "step": 1478 }, { "epoch": 0.10021004133071346, "grad_norm": 10.042820930480957, "learning_rate": 9.900335409678966e-05, "loss": 1.2156, "step": 1479 }, { "epoch": 0.10027779659868555, "grad_norm": 10.823782920837402, "learning_rate": 9.900198507769184e-05, "loss": 1.1915, "step": 1480 }, { "epoch": 0.10034555186665763, "grad_norm": 13.6808443069458, "learning_rate": 9.900061605859403e-05, "loss": 1.1724, "step": 1481 }, { "epoch": 0.10041330713462972, "grad_norm": 11.16846752166748, "learning_rate": 9.89992470394962e-05, "loss": 1.2093, "step": 1482 }, { "epoch": 0.10048106240260181, "grad_norm": 10.391450881958008, "learning_rate": 9.89978780203984e-05, "loss": 1.142, "step": 1483 }, { "epoch": 0.10054881767057389, "grad_norm": 9.324288368225098, "learning_rate": 9.899650900130058e-05, "loss": 1.0579, "step": 1484 }, { "epoch": 0.10061657293854598, "grad_norm": 12.601625442504883, "learning_rate": 9.899513998220276e-05, "loss": 1.2643, "step": 1485 }, { "epoch": 0.10068432820651806, "grad_norm": 14.270779609680176, "learning_rate": 9.899377096310494e-05, "loss": 1.2903, "step": 1486 }, { "epoch": 0.10075208347449015, "grad_norm": 11.521232604980469, "learning_rate": 9.899240194400712e-05, "loss": 1.2385, "step": 1487 }, { "epoch": 0.10081983874246223, "grad_norm": 10.76693344116211, "learning_rate": 9.899103292490931e-05, "loss": 1.1745, "step": 1488 }, { "epoch": 0.10088759401043432, "grad_norm": 9.091184616088867, "learning_rate": 9.89896639058115e-05, "loss": 0.9571, "step": 1489 }, { "epoch": 0.1009553492784064, "grad_norm": 11.930106163024902, "learning_rate": 9.898829488671368e-05, "loss": 1.1226, "step": 1490 }, { "epoch": 0.10102310454637849, "grad_norm": 10.90937614440918, "learning_rate": 9.898692586761586e-05, "loss": 1.0776, "step": 1491 }, { "epoch": 0.10109085981435056, "grad_norm": 10.618545532226562, "learning_rate": 9.898555684851805e-05, "loss": 1.1251, "step": 1492 }, { "epoch": 0.10115861508232266, "grad_norm": 10.228861808776855, "learning_rate": 9.898418782942023e-05, "loss": 1.1987, "step": 1493 }, { "epoch": 0.10122637035029473, "grad_norm": 8.807862281799316, "learning_rate": 9.898281881032241e-05, "loss": 0.9339, "step": 1494 }, { "epoch": 0.10129412561826683, "grad_norm": 11.24593448638916, "learning_rate": 9.898144979122459e-05, "loss": 1.1495, "step": 1495 }, { "epoch": 0.1013618808862389, "grad_norm": 11.192438125610352, "learning_rate": 9.898008077212677e-05, "loss": 1.1361, "step": 1496 }, { "epoch": 0.101429636154211, "grad_norm": 10.440075874328613, "learning_rate": 9.897871175302896e-05, "loss": 1.2273, "step": 1497 }, { "epoch": 0.10149739142218307, "grad_norm": 11.103675842285156, "learning_rate": 9.897734273393115e-05, "loss": 1.1308, "step": 1498 }, { "epoch": 0.10156514669015516, "grad_norm": 9.78297233581543, "learning_rate": 9.897597371483333e-05, "loss": 1.0907, "step": 1499 }, { "epoch": 0.10163290195812724, "grad_norm": 10.98086166381836, "learning_rate": 9.89746046957355e-05, "loss": 0.9743, "step": 1500 }, { "epoch": 0.10170065722609933, "grad_norm": 9.268783569335938, "learning_rate": 9.89732356766377e-05, "loss": 0.8917, "step": 1501 }, { "epoch": 0.10176841249407141, "grad_norm": 12.674605369567871, "learning_rate": 9.897186665753988e-05, "loss": 1.2247, "step": 1502 }, { "epoch": 0.1018361677620435, "grad_norm": 10.987565040588379, "learning_rate": 9.897049763844206e-05, "loss": 1.0688, "step": 1503 }, { "epoch": 0.10190392303001558, "grad_norm": 16.014053344726562, "learning_rate": 9.896912861934424e-05, "loss": 0.8334, "step": 1504 }, { "epoch": 0.10197167829798767, "grad_norm": 11.119991302490234, "learning_rate": 9.896775960024642e-05, "loss": 1.3176, "step": 1505 }, { "epoch": 0.10203943356595975, "grad_norm": 13.23279094696045, "learning_rate": 9.896639058114861e-05, "loss": 1.1625, "step": 1506 }, { "epoch": 0.10210718883393184, "grad_norm": 9.3678560256958, "learning_rate": 9.89650215620508e-05, "loss": 1.2774, "step": 1507 }, { "epoch": 0.10217494410190392, "grad_norm": 10.829100608825684, "learning_rate": 9.896365254295298e-05, "loss": 1.2423, "step": 1508 }, { "epoch": 0.10224269936987601, "grad_norm": 12.12694263458252, "learning_rate": 9.896228352385516e-05, "loss": 1.2276, "step": 1509 }, { "epoch": 0.10231045463784809, "grad_norm": 11.626548767089844, "learning_rate": 9.896091450475735e-05, "loss": 1.0871, "step": 1510 }, { "epoch": 0.10237820990582018, "grad_norm": 11.388608932495117, "learning_rate": 9.895954548565953e-05, "loss": 1.5582, "step": 1511 }, { "epoch": 0.10244596517379226, "grad_norm": 9.463730812072754, "learning_rate": 9.895817646656171e-05, "loss": 1.1066, "step": 1512 }, { "epoch": 0.10251372044176435, "grad_norm": 10.291573524475098, "learning_rate": 9.895680744746389e-05, "loss": 1.4026, "step": 1513 }, { "epoch": 0.10258147570973643, "grad_norm": 9.778963088989258, "learning_rate": 9.895543842836607e-05, "loss": 1.1109, "step": 1514 }, { "epoch": 0.10264923097770852, "grad_norm": 9.685966491699219, "learning_rate": 9.895406940926827e-05, "loss": 1.1633, "step": 1515 }, { "epoch": 0.1027169862456806, "grad_norm": 10.76310920715332, "learning_rate": 9.895270039017045e-05, "loss": 1.0813, "step": 1516 }, { "epoch": 0.10278474151365269, "grad_norm": 9.795347213745117, "learning_rate": 9.895133137107263e-05, "loss": 1.2079, "step": 1517 }, { "epoch": 0.10285249678162477, "grad_norm": 9.980990409851074, "learning_rate": 9.89499623519748e-05, "loss": 1.0701, "step": 1518 }, { "epoch": 0.10292025204959686, "grad_norm": 9.682209014892578, "learning_rate": 9.894859333287699e-05, "loss": 1.4133, "step": 1519 }, { "epoch": 0.10298800731756894, "grad_norm": 10.632065773010254, "learning_rate": 9.894722431377918e-05, "loss": 1.0631, "step": 1520 }, { "epoch": 0.10305576258554103, "grad_norm": 10.099474906921387, "learning_rate": 9.894585529468136e-05, "loss": 1.2015, "step": 1521 }, { "epoch": 0.1031235178535131, "grad_norm": 8.289199829101562, "learning_rate": 9.894448627558354e-05, "loss": 1.3009, "step": 1522 }, { "epoch": 0.1031912731214852, "grad_norm": 9.403796195983887, "learning_rate": 9.894311725648573e-05, "loss": 1.113, "step": 1523 }, { "epoch": 0.10325902838945727, "grad_norm": 12.6613130569458, "learning_rate": 9.894174823738792e-05, "loss": 0.9394, "step": 1524 }, { "epoch": 0.10332678365742937, "grad_norm": 9.85255241394043, "learning_rate": 9.894037921829011e-05, "loss": 1.1007, "step": 1525 }, { "epoch": 0.10339453892540144, "grad_norm": 11.918173789978027, "learning_rate": 9.893901019919229e-05, "loss": 1.1817, "step": 1526 }, { "epoch": 0.10346229419337354, "grad_norm": 9.994447708129883, "learning_rate": 9.893764118009447e-05, "loss": 1.2522, "step": 1527 }, { "epoch": 0.10353004946134561, "grad_norm": 9.879289627075195, "learning_rate": 9.893627216099665e-05, "loss": 1.152, "step": 1528 }, { "epoch": 0.1035978047293177, "grad_norm": 10.103482246398926, "learning_rate": 9.893490314189884e-05, "loss": 1.2227, "step": 1529 }, { "epoch": 0.10366555999728978, "grad_norm": 11.173476219177246, "learning_rate": 9.893353412280102e-05, "loss": 1.2188, "step": 1530 }, { "epoch": 0.10373331526526187, "grad_norm": 11.540877342224121, "learning_rate": 9.89321651037032e-05, "loss": 1.1669, "step": 1531 }, { "epoch": 0.10380107053323395, "grad_norm": 10.706154823303223, "learning_rate": 9.893079608460539e-05, "loss": 1.2771, "step": 1532 }, { "epoch": 0.10386882580120604, "grad_norm": 11.781739234924316, "learning_rate": 9.892942706550758e-05, "loss": 1.0501, "step": 1533 }, { "epoch": 0.10393658106917812, "grad_norm": 10.482099533081055, "learning_rate": 9.892805804640976e-05, "loss": 1.2327, "step": 1534 }, { "epoch": 0.10400433633715021, "grad_norm": 12.450867652893066, "learning_rate": 9.892668902731194e-05, "loss": 1.3575, "step": 1535 }, { "epoch": 0.1040720916051223, "grad_norm": 10.236811637878418, "learning_rate": 9.892532000821412e-05, "loss": 1.2559, "step": 1536 }, { "epoch": 0.10413984687309438, "grad_norm": 9.311124801635742, "learning_rate": 9.89239509891163e-05, "loss": 1.1143, "step": 1537 }, { "epoch": 0.10420760214106647, "grad_norm": 9.182706832885742, "learning_rate": 9.89225819700185e-05, "loss": 0.8875, "step": 1538 }, { "epoch": 0.10427535740903855, "grad_norm": 12.762700080871582, "learning_rate": 9.892121295092067e-05, "loss": 1.0552, "step": 1539 }, { "epoch": 0.10434311267701064, "grad_norm": 12.222203254699707, "learning_rate": 9.891984393182285e-05, "loss": 1.3185, "step": 1540 }, { "epoch": 0.10441086794498272, "grad_norm": 11.45807933807373, "learning_rate": 9.891847491272504e-05, "loss": 1.2952, "step": 1541 }, { "epoch": 0.10447862321295481, "grad_norm": 10.445068359375, "learning_rate": 9.891710589362722e-05, "loss": 1.1637, "step": 1542 }, { "epoch": 0.10454637848092689, "grad_norm": 11.758063316345215, "learning_rate": 9.891573687452941e-05, "loss": 1.3202, "step": 1543 }, { "epoch": 0.10461413374889898, "grad_norm": 10.176533699035645, "learning_rate": 9.891436785543159e-05, "loss": 1.1762, "step": 1544 }, { "epoch": 0.10468188901687106, "grad_norm": 10.133155822753906, "learning_rate": 9.891299883633377e-05, "loss": 1.2226, "step": 1545 }, { "epoch": 0.10474964428484315, "grad_norm": 9.883895874023438, "learning_rate": 9.891162981723595e-05, "loss": 0.9687, "step": 1546 }, { "epoch": 0.10481739955281523, "grad_norm": 13.175050735473633, "learning_rate": 9.891026079813814e-05, "loss": 1.2075, "step": 1547 }, { "epoch": 0.10488515482078732, "grad_norm": 11.183597564697266, "learning_rate": 9.890889177904032e-05, "loss": 1.0917, "step": 1548 }, { "epoch": 0.1049529100887594, "grad_norm": 10.135035514831543, "learning_rate": 9.89075227599425e-05, "loss": 1.0087, "step": 1549 }, { "epoch": 0.10502066535673149, "grad_norm": 16.02760887145996, "learning_rate": 9.890615374084469e-05, "loss": 1.2015, "step": 1550 }, { "epoch": 0.10508842062470357, "grad_norm": 11.255363464355469, "learning_rate": 9.890478472174687e-05, "loss": 1.1106, "step": 1551 }, { "epoch": 0.10515617589267566, "grad_norm": 10.740998268127441, "learning_rate": 9.890341570264906e-05, "loss": 1.1835, "step": 1552 }, { "epoch": 0.10522393116064774, "grad_norm": 11.84919548034668, "learning_rate": 9.890204668355124e-05, "loss": 1.1655, "step": 1553 }, { "epoch": 0.10529168642861983, "grad_norm": 12.041108131408691, "learning_rate": 9.890067766445342e-05, "loss": 1.2544, "step": 1554 }, { "epoch": 0.10535944169659191, "grad_norm": 8.966646194458008, "learning_rate": 9.88993086453556e-05, "loss": 1.098, "step": 1555 }, { "epoch": 0.105427196964564, "grad_norm": 11.838338851928711, "learning_rate": 9.88979396262578e-05, "loss": 1.1774, "step": 1556 }, { "epoch": 0.10549495223253608, "grad_norm": 11.186326026916504, "learning_rate": 9.889657060715997e-05, "loss": 1.1778, "step": 1557 }, { "epoch": 0.10556270750050817, "grad_norm": 9.448702812194824, "learning_rate": 9.889520158806216e-05, "loss": 1.0171, "step": 1558 }, { "epoch": 0.10563046276848025, "grad_norm": 10.931096076965332, "learning_rate": 9.889383256896434e-05, "loss": 1.1797, "step": 1559 }, { "epoch": 0.10569821803645234, "grad_norm": 10.293981552124023, "learning_rate": 9.889246354986652e-05, "loss": 1.0723, "step": 1560 }, { "epoch": 0.10576597330442442, "grad_norm": 10.052331924438477, "learning_rate": 9.889109453076871e-05, "loss": 0.831, "step": 1561 }, { "epoch": 0.10583372857239651, "grad_norm": 10.593210220336914, "learning_rate": 9.888972551167089e-05, "loss": 1.3415, "step": 1562 }, { "epoch": 0.10590148384036858, "grad_norm": 11.580954551696777, "learning_rate": 9.888835649257307e-05, "loss": 1.1522, "step": 1563 }, { "epoch": 0.10596923910834068, "grad_norm": 11.495551109313965, "learning_rate": 9.888698747347525e-05, "loss": 1.2515, "step": 1564 }, { "epoch": 0.10603699437631275, "grad_norm": 10.543874740600586, "learning_rate": 9.888561845437743e-05, "loss": 1.1579, "step": 1565 }, { "epoch": 0.10610474964428485, "grad_norm": 10.588164329528809, "learning_rate": 9.888424943527963e-05, "loss": 1.3825, "step": 1566 }, { "epoch": 0.10617250491225692, "grad_norm": 13.591666221618652, "learning_rate": 9.88828804161818e-05, "loss": 1.0941, "step": 1567 }, { "epoch": 0.10624026018022902, "grad_norm": 10.866951942443848, "learning_rate": 9.888151139708399e-05, "loss": 1.478, "step": 1568 }, { "epoch": 0.10630801544820109, "grad_norm": 9.627554893493652, "learning_rate": 9.888014237798618e-05, "loss": 1.004, "step": 1569 }, { "epoch": 0.10637577071617318, "grad_norm": 10.871118545532227, "learning_rate": 9.887877335888836e-05, "loss": 1.1997, "step": 1570 }, { "epoch": 0.10644352598414526, "grad_norm": 10.74503231048584, "learning_rate": 9.887740433979054e-05, "loss": 1.005, "step": 1571 }, { "epoch": 0.10651128125211735, "grad_norm": 11.119452476501465, "learning_rate": 9.887603532069273e-05, "loss": 1.1168, "step": 1572 }, { "epoch": 0.10657903652008943, "grad_norm": 10.601544380187988, "learning_rate": 9.887466630159491e-05, "loss": 1.0256, "step": 1573 }, { "epoch": 0.10664679178806152, "grad_norm": 10.329113006591797, "learning_rate": 9.88732972824971e-05, "loss": 1.3432, "step": 1574 }, { "epoch": 0.1067145470560336, "grad_norm": 9.973999977111816, "learning_rate": 9.887192826339929e-05, "loss": 0.9961, "step": 1575 }, { "epoch": 0.10678230232400569, "grad_norm": 10.981974601745605, "learning_rate": 9.887055924430147e-05, "loss": 1.3936, "step": 1576 }, { "epoch": 0.10685005759197777, "grad_norm": 10.953417778015137, "learning_rate": 9.886919022520365e-05, "loss": 0.9669, "step": 1577 }, { "epoch": 0.10691781285994986, "grad_norm": 10.656323432922363, "learning_rate": 9.886782120610583e-05, "loss": 1.1393, "step": 1578 }, { "epoch": 0.10698556812792194, "grad_norm": 9.434617042541504, "learning_rate": 9.886645218700802e-05, "loss": 1.1813, "step": 1579 }, { "epoch": 0.10705332339589403, "grad_norm": 12.334831237792969, "learning_rate": 9.88650831679102e-05, "loss": 1.453, "step": 1580 }, { "epoch": 0.10712107866386611, "grad_norm": 9.892403602600098, "learning_rate": 9.886371414881238e-05, "loss": 1.1373, "step": 1581 }, { "epoch": 0.1071888339318382, "grad_norm": 9.248678207397461, "learning_rate": 9.886234512971456e-05, "loss": 1.0734, "step": 1582 }, { "epoch": 0.10725658919981028, "grad_norm": 10.317010879516602, "learning_rate": 9.886097611061675e-05, "loss": 1.1252, "step": 1583 }, { "epoch": 0.10732434446778237, "grad_norm": 9.586435317993164, "learning_rate": 9.885960709151894e-05, "loss": 1.1705, "step": 1584 }, { "epoch": 0.10739209973575445, "grad_norm": 8.883166313171387, "learning_rate": 9.885823807242112e-05, "loss": 1.3164, "step": 1585 }, { "epoch": 0.10745985500372654, "grad_norm": 12.870014190673828, "learning_rate": 9.88568690533233e-05, "loss": 1.2187, "step": 1586 }, { "epoch": 0.10752761027169863, "grad_norm": 10.858057975769043, "learning_rate": 9.885550003422548e-05, "loss": 1.2334, "step": 1587 }, { "epoch": 0.10759536553967071, "grad_norm": 9.855050086975098, "learning_rate": 9.885413101512767e-05, "loss": 1.068, "step": 1588 }, { "epoch": 0.1076631208076428, "grad_norm": 12.256099700927734, "learning_rate": 9.885276199602985e-05, "loss": 1.2481, "step": 1589 }, { "epoch": 0.10773087607561488, "grad_norm": 9.756118774414062, "learning_rate": 9.885139297693203e-05, "loss": 1.2365, "step": 1590 }, { "epoch": 0.10779863134358697, "grad_norm": 11.727996826171875, "learning_rate": 9.885002395783421e-05, "loss": 1.3839, "step": 1591 }, { "epoch": 0.10786638661155905, "grad_norm": 11.58463191986084, "learning_rate": 9.88486549387364e-05, "loss": 0.937, "step": 1592 }, { "epoch": 0.10793414187953114, "grad_norm": 12.663015365600586, "learning_rate": 9.884728591963859e-05, "loss": 1.3453, "step": 1593 }, { "epoch": 0.10800189714750322, "grad_norm": 10.950531005859375, "learning_rate": 9.884591690054077e-05, "loss": 1.1345, "step": 1594 }, { "epoch": 0.10806965241547531, "grad_norm": 10.361424446105957, "learning_rate": 9.884454788144295e-05, "loss": 1.2576, "step": 1595 }, { "epoch": 0.10813740768344739, "grad_norm": 11.155044555664062, "learning_rate": 9.884317886234513e-05, "loss": 1.1253, "step": 1596 }, { "epoch": 0.10820516295141948, "grad_norm": 12.457600593566895, "learning_rate": 9.884180984324731e-05, "loss": 1.0959, "step": 1597 }, { "epoch": 0.10827291821939156, "grad_norm": 13.494983673095703, "learning_rate": 9.88404408241495e-05, "loss": 1.1671, "step": 1598 }, { "epoch": 0.10834067348736365, "grad_norm": 11.870648384094238, "learning_rate": 9.883907180505168e-05, "loss": 1.2121, "step": 1599 }, { "epoch": 0.10840842875533573, "grad_norm": 10.327920913696289, "learning_rate": 9.883770278595387e-05, "loss": 1.1319, "step": 1600 }, { "epoch": 0.10847618402330782, "grad_norm": 9.578879356384277, "learning_rate": 9.883633376685605e-05, "loss": 0.9064, "step": 1601 }, { "epoch": 0.1085439392912799, "grad_norm": 9.598779678344727, "learning_rate": 9.883496474775824e-05, "loss": 1.1119, "step": 1602 }, { "epoch": 0.10861169455925199, "grad_norm": 13.522628784179688, "learning_rate": 9.883359572866042e-05, "loss": 1.2549, "step": 1603 }, { "epoch": 0.10867944982722406, "grad_norm": 8.123127937316895, "learning_rate": 9.88322267095626e-05, "loss": 0.8778, "step": 1604 }, { "epoch": 0.10874720509519616, "grad_norm": 13.100455284118652, "learning_rate": 9.883085769046478e-05, "loss": 1.2023, "step": 1605 }, { "epoch": 0.10881496036316823, "grad_norm": 9.897802352905273, "learning_rate": 9.882948867136696e-05, "loss": 1.2094, "step": 1606 }, { "epoch": 0.10888271563114033, "grad_norm": 8.55170726776123, "learning_rate": 9.882811965226915e-05, "loss": 1.0407, "step": 1607 }, { "epoch": 0.1089504708991124, "grad_norm": 10.867438316345215, "learning_rate": 9.882675063317133e-05, "loss": 1.3021, "step": 1608 }, { "epoch": 0.1090182261670845, "grad_norm": 11.062238693237305, "learning_rate": 9.882538161407352e-05, "loss": 1.2627, "step": 1609 }, { "epoch": 0.10908598143505657, "grad_norm": 12.710458755493164, "learning_rate": 9.88240125949757e-05, "loss": 1.6196, "step": 1610 }, { "epoch": 0.10915373670302866, "grad_norm": 10.986543655395508, "learning_rate": 9.882264357587789e-05, "loss": 1.2021, "step": 1611 }, { "epoch": 0.10922149197100074, "grad_norm": 12.792850494384766, "learning_rate": 9.882127455678007e-05, "loss": 1.1246, "step": 1612 }, { "epoch": 0.10928924723897283, "grad_norm": 12.120078086853027, "learning_rate": 9.881990553768225e-05, "loss": 1.2522, "step": 1613 }, { "epoch": 0.10935700250694491, "grad_norm": 10.562193870544434, "learning_rate": 9.881853651858443e-05, "loss": 1.1791, "step": 1614 }, { "epoch": 0.109424757774917, "grad_norm": 13.017698287963867, "learning_rate": 9.881716749948662e-05, "loss": 1.0733, "step": 1615 }, { "epoch": 0.10949251304288908, "grad_norm": 10.904980659484863, "learning_rate": 9.88157984803888e-05, "loss": 1.0166, "step": 1616 }, { "epoch": 0.10956026831086117, "grad_norm": 12.332829475402832, "learning_rate": 9.881442946129099e-05, "loss": 1.0688, "step": 1617 }, { "epoch": 0.10962802357883325, "grad_norm": 12.238106727600098, "learning_rate": 9.881306044219318e-05, "loss": 0.9869, "step": 1618 }, { "epoch": 0.10969577884680534, "grad_norm": 12.75059986114502, "learning_rate": 9.881169142309536e-05, "loss": 1.5365, "step": 1619 }, { "epoch": 0.10976353411477742, "grad_norm": 11.019241333007812, "learning_rate": 9.881032240399754e-05, "loss": 1.2332, "step": 1620 }, { "epoch": 0.10983128938274951, "grad_norm": 11.867498397827148, "learning_rate": 9.880895338489973e-05, "loss": 1.1955, "step": 1621 }, { "epoch": 0.10989904465072159, "grad_norm": 9.685249328613281, "learning_rate": 9.880758436580191e-05, "loss": 1.1972, "step": 1622 }, { "epoch": 0.10996679991869368, "grad_norm": 11.80540657043457, "learning_rate": 9.88062153467041e-05, "loss": 0.9501, "step": 1623 }, { "epoch": 0.11003455518666576, "grad_norm": 9.719080924987793, "learning_rate": 9.880484632760627e-05, "loss": 0.9904, "step": 1624 }, { "epoch": 0.11010231045463785, "grad_norm": 10.906637191772461, "learning_rate": 9.880347730850847e-05, "loss": 1.2807, "step": 1625 }, { "epoch": 0.11017006572260993, "grad_norm": 10.40942668914795, "learning_rate": 9.880210828941065e-05, "loss": 1.2772, "step": 1626 }, { "epoch": 0.11023782099058202, "grad_norm": 10.265239715576172, "learning_rate": 9.880073927031283e-05, "loss": 1.1583, "step": 1627 }, { "epoch": 0.1103055762585541, "grad_norm": 12.158036231994629, "learning_rate": 9.879937025121501e-05, "loss": 1.3907, "step": 1628 }, { "epoch": 0.11037333152652619, "grad_norm": 10.128400802612305, "learning_rate": 9.879800123211719e-05, "loss": 0.8838, "step": 1629 }, { "epoch": 0.11044108679449827, "grad_norm": 9.536952018737793, "learning_rate": 9.879663221301938e-05, "loss": 1.0525, "step": 1630 }, { "epoch": 0.11050884206247036, "grad_norm": 10.826987266540527, "learning_rate": 9.879526319392156e-05, "loss": 1.2005, "step": 1631 }, { "epoch": 0.11057659733044244, "grad_norm": 10.08153247833252, "learning_rate": 9.879389417482374e-05, "loss": 1.1521, "step": 1632 }, { "epoch": 0.11064435259841453, "grad_norm": 10.642118453979492, "learning_rate": 9.879252515572592e-05, "loss": 1.2316, "step": 1633 }, { "epoch": 0.1107121078663866, "grad_norm": 9.595836639404297, "learning_rate": 9.879115613662812e-05, "loss": 1.0969, "step": 1634 }, { "epoch": 0.1107798631343587, "grad_norm": 8.954519271850586, "learning_rate": 9.87897871175303e-05, "loss": 0.9422, "step": 1635 }, { "epoch": 0.11084761840233077, "grad_norm": 9.143898010253906, "learning_rate": 9.878841809843248e-05, "loss": 0.8949, "step": 1636 }, { "epoch": 0.11091537367030287, "grad_norm": 11.656847953796387, "learning_rate": 9.878704907933466e-05, "loss": 0.9739, "step": 1637 }, { "epoch": 0.11098312893827494, "grad_norm": 11.996994018554688, "learning_rate": 9.878568006023684e-05, "loss": 1.3361, "step": 1638 }, { "epoch": 0.11105088420624704, "grad_norm": 11.478599548339844, "learning_rate": 9.878431104113903e-05, "loss": 1.0726, "step": 1639 }, { "epoch": 0.11111863947421913, "grad_norm": 14.594179153442383, "learning_rate": 9.878294202204121e-05, "loss": 1.3725, "step": 1640 }, { "epoch": 0.1111863947421912, "grad_norm": 12.15703296661377, "learning_rate": 9.87815730029434e-05, "loss": 1.1996, "step": 1641 }, { "epoch": 0.1112541500101633, "grad_norm": 9.384215354919434, "learning_rate": 9.878020398384557e-05, "loss": 1.1294, "step": 1642 }, { "epoch": 0.11132190527813537, "grad_norm": 11.131610870361328, "learning_rate": 9.877883496474777e-05, "loss": 1.0526, "step": 1643 }, { "epoch": 0.11138966054610747, "grad_norm": 9.628592491149902, "learning_rate": 9.877746594564995e-05, "loss": 1.0715, "step": 1644 }, { "epoch": 0.11145741581407954, "grad_norm": 10.876824378967285, "learning_rate": 9.877609692655213e-05, "loss": 1.3337, "step": 1645 }, { "epoch": 0.11152517108205164, "grad_norm": 10.489126205444336, "learning_rate": 9.877472790745431e-05, "loss": 1.1727, "step": 1646 }, { "epoch": 0.11159292635002371, "grad_norm": 8.737699508666992, "learning_rate": 9.877335888835649e-05, "loss": 1.1252, "step": 1647 }, { "epoch": 0.1116606816179958, "grad_norm": 9.8843355178833, "learning_rate": 9.877198986925868e-05, "loss": 0.9222, "step": 1648 }, { "epoch": 0.11172843688596788, "grad_norm": 7.930290699005127, "learning_rate": 9.877062085016086e-05, "loss": 1.0095, "step": 1649 }, { "epoch": 0.11179619215393997, "grad_norm": 9.721529006958008, "learning_rate": 9.876925183106304e-05, "loss": 1.0812, "step": 1650 }, { "epoch": 0.11186394742191205, "grad_norm": 10.844324111938477, "learning_rate": 9.876788281196523e-05, "loss": 1.2245, "step": 1651 }, { "epoch": 0.11193170268988414, "grad_norm": 8.750361442565918, "learning_rate": 9.87665137928674e-05, "loss": 0.9983, "step": 1652 }, { "epoch": 0.11199945795785622, "grad_norm": 8.716521263122559, "learning_rate": 9.87651447737696e-05, "loss": 0.8612, "step": 1653 }, { "epoch": 0.11206721322582831, "grad_norm": 10.06696891784668, "learning_rate": 9.876377575467178e-05, "loss": 1.0806, "step": 1654 }, { "epoch": 0.11213496849380039, "grad_norm": 10.526103019714355, "learning_rate": 9.876240673557396e-05, "loss": 1.0799, "step": 1655 }, { "epoch": 0.11220272376177248, "grad_norm": 11.108748435974121, "learning_rate": 9.876103771647614e-05, "loss": 1.3871, "step": 1656 }, { "epoch": 0.11227047902974456, "grad_norm": 10.120033264160156, "learning_rate": 9.875966869737833e-05, "loss": 1.0399, "step": 1657 }, { "epoch": 0.11233823429771665, "grad_norm": 10.589370727539062, "learning_rate": 9.875829967828051e-05, "loss": 1.3249, "step": 1658 }, { "epoch": 0.11240598956568873, "grad_norm": 8.751884460449219, "learning_rate": 9.87569306591827e-05, "loss": 0.9404, "step": 1659 }, { "epoch": 0.11247374483366082, "grad_norm": 9.923823356628418, "learning_rate": 9.875556164008488e-05, "loss": 1.067, "step": 1660 }, { "epoch": 0.1125415001016329, "grad_norm": 9.70035171508789, "learning_rate": 9.875419262098707e-05, "loss": 1.0079, "step": 1661 }, { "epoch": 0.11260925536960499, "grad_norm": 12.337713241577148, "learning_rate": 9.875282360188925e-05, "loss": 1.0962, "step": 1662 }, { "epoch": 0.11267701063757707, "grad_norm": 10.142355918884277, "learning_rate": 9.875145458279143e-05, "loss": 1.1916, "step": 1663 }, { "epoch": 0.11274476590554916, "grad_norm": 11.82714557647705, "learning_rate": 9.875008556369362e-05, "loss": 1.4582, "step": 1664 }, { "epoch": 0.11281252117352124, "grad_norm": 10.524566650390625, "learning_rate": 9.87487165445958e-05, "loss": 1.2156, "step": 1665 }, { "epoch": 0.11288027644149333, "grad_norm": 12.232719421386719, "learning_rate": 9.874734752549798e-05, "loss": 1.2152, "step": 1666 }, { "epoch": 0.11294803170946541, "grad_norm": 10.665773391723633, "learning_rate": 9.874597850640018e-05, "loss": 1.085, "step": 1667 }, { "epoch": 0.1130157869774375, "grad_norm": 10.0956449508667, "learning_rate": 9.874460948730236e-05, "loss": 1.1142, "step": 1668 }, { "epoch": 0.11308354224540958, "grad_norm": 9.27825927734375, "learning_rate": 9.874324046820454e-05, "loss": 1.0263, "step": 1669 }, { "epoch": 0.11315129751338167, "grad_norm": 11.171673774719238, "learning_rate": 9.874187144910672e-05, "loss": 1.5049, "step": 1670 }, { "epoch": 0.11321905278135375, "grad_norm": 10.442279815673828, "learning_rate": 9.874050243000891e-05, "loss": 1.0697, "step": 1671 }, { "epoch": 0.11328680804932584, "grad_norm": 11.563700675964355, "learning_rate": 9.87391334109111e-05, "loss": 1.003, "step": 1672 }, { "epoch": 0.11335456331729792, "grad_norm": 12.447733879089355, "learning_rate": 9.873776439181327e-05, "loss": 1.2279, "step": 1673 }, { "epoch": 0.11342231858527001, "grad_norm": 9.70698070526123, "learning_rate": 9.873639537271545e-05, "loss": 1.2208, "step": 1674 }, { "epoch": 0.11349007385324208, "grad_norm": 10.89858341217041, "learning_rate": 9.873502635361763e-05, "loss": 1.1676, "step": 1675 }, { "epoch": 0.11355782912121418, "grad_norm": 11.106192588806152, "learning_rate": 9.873365733451983e-05, "loss": 1.0918, "step": 1676 }, { "epoch": 0.11362558438918625, "grad_norm": 10.186528205871582, "learning_rate": 9.873228831542201e-05, "loss": 1.1651, "step": 1677 }, { "epoch": 0.11369333965715835, "grad_norm": 10.69977855682373, "learning_rate": 9.873091929632419e-05, "loss": 1.2867, "step": 1678 }, { "epoch": 0.11376109492513042, "grad_norm": 11.934000015258789, "learning_rate": 9.872955027722637e-05, "loss": 1.2051, "step": 1679 }, { "epoch": 0.11382885019310252, "grad_norm": 9.277094841003418, "learning_rate": 9.872818125812856e-05, "loss": 1.17, "step": 1680 }, { "epoch": 0.1138966054610746, "grad_norm": 11.35097885131836, "learning_rate": 9.872681223903074e-05, "loss": 1.2433, "step": 1681 }, { "epoch": 0.11396436072904668, "grad_norm": 11.362833023071289, "learning_rate": 9.872544321993292e-05, "loss": 1.3577, "step": 1682 }, { "epoch": 0.11403211599701876, "grad_norm": 10.944365501403809, "learning_rate": 9.87240742008351e-05, "loss": 1.0731, "step": 1683 }, { "epoch": 0.11409987126499085, "grad_norm": 10.292986869812012, "learning_rate": 9.872270518173728e-05, "loss": 1.0639, "step": 1684 }, { "epoch": 0.11416762653296293, "grad_norm": 10.8629150390625, "learning_rate": 9.872133616263948e-05, "loss": 1.3503, "step": 1685 }, { "epoch": 0.11423538180093502, "grad_norm": 10.277920722961426, "learning_rate": 9.871996714354166e-05, "loss": 1.1544, "step": 1686 }, { "epoch": 0.1143031370689071, "grad_norm": 8.98864459991455, "learning_rate": 9.871859812444384e-05, "loss": 1.3638, "step": 1687 }, { "epoch": 0.11437089233687919, "grad_norm": 8.126718521118164, "learning_rate": 9.871722910534602e-05, "loss": 1.0343, "step": 1688 }, { "epoch": 0.11443864760485127, "grad_norm": 9.229771614074707, "learning_rate": 9.871586008624821e-05, "loss": 1.0137, "step": 1689 }, { "epoch": 0.11450640287282336, "grad_norm": 10.554340362548828, "learning_rate": 9.87144910671504e-05, "loss": 0.9908, "step": 1690 }, { "epoch": 0.11457415814079544, "grad_norm": 11.857934951782227, "learning_rate": 9.871312204805257e-05, "loss": 1.29, "step": 1691 }, { "epoch": 0.11464191340876753, "grad_norm": 9.334272384643555, "learning_rate": 9.871175302895475e-05, "loss": 1.0449, "step": 1692 }, { "epoch": 0.11470966867673962, "grad_norm": 9.092185974121094, "learning_rate": 9.871038400985693e-05, "loss": 1.1386, "step": 1693 }, { "epoch": 0.1147774239447117, "grad_norm": 9.379465103149414, "learning_rate": 9.870901499075913e-05, "loss": 1.0964, "step": 1694 }, { "epoch": 0.11484517921268379, "grad_norm": 10.038492202758789, "learning_rate": 9.870764597166131e-05, "loss": 0.8512, "step": 1695 }, { "epoch": 0.11491293448065587, "grad_norm": 8.2230863571167, "learning_rate": 9.870627695256349e-05, "loss": 1.0646, "step": 1696 }, { "epoch": 0.11498068974862796, "grad_norm": 9.939510345458984, "learning_rate": 9.870490793346567e-05, "loss": 1.081, "step": 1697 }, { "epoch": 0.11504844501660004, "grad_norm": 11.864813804626465, "learning_rate": 9.870353891436786e-05, "loss": 1.2447, "step": 1698 }, { "epoch": 0.11511620028457213, "grad_norm": 11.517714500427246, "learning_rate": 9.870216989527004e-05, "loss": 1.1274, "step": 1699 }, { "epoch": 0.11518395555254421, "grad_norm": 12.172054290771484, "learning_rate": 9.870080087617222e-05, "loss": 1.3278, "step": 1700 }, { "epoch": 0.1152517108205163, "grad_norm": 9.915261268615723, "learning_rate": 9.86994318570744e-05, "loss": 1.0298, "step": 1701 }, { "epoch": 0.11531946608848838, "grad_norm": 10.937396049499512, "learning_rate": 9.869806283797659e-05, "loss": 1.363, "step": 1702 }, { "epoch": 0.11538722135646047, "grad_norm": 11.586840629577637, "learning_rate": 9.869669381887878e-05, "loss": 0.9152, "step": 1703 }, { "epoch": 0.11545497662443255, "grad_norm": 10.035614967346191, "learning_rate": 9.869532479978096e-05, "loss": 1.2152, "step": 1704 }, { "epoch": 0.11552273189240464, "grad_norm": 8.775728225708008, "learning_rate": 9.869395578068314e-05, "loss": 1.0552, "step": 1705 }, { "epoch": 0.11559048716037672, "grad_norm": 10.85958480834961, "learning_rate": 9.869258676158532e-05, "loss": 1.0473, "step": 1706 }, { "epoch": 0.11565824242834881, "grad_norm": 9.311616897583008, "learning_rate": 9.869121774248751e-05, "loss": 1.1704, "step": 1707 }, { "epoch": 0.11572599769632089, "grad_norm": 9.503780364990234, "learning_rate": 9.86898487233897e-05, "loss": 1.1375, "step": 1708 }, { "epoch": 0.11579375296429298, "grad_norm": 9.02602481842041, "learning_rate": 9.868847970429187e-05, "loss": 0.9955, "step": 1709 }, { "epoch": 0.11586150823226506, "grad_norm": 11.473995208740234, "learning_rate": 9.868711068519407e-05, "loss": 0.8842, "step": 1710 }, { "epoch": 0.11592926350023715, "grad_norm": 10.124822616577148, "learning_rate": 9.868574166609625e-05, "loss": 1.0458, "step": 1711 }, { "epoch": 0.11599701876820923, "grad_norm": 11.004744529724121, "learning_rate": 9.868437264699843e-05, "loss": 1.2136, "step": 1712 }, { "epoch": 0.11606477403618132, "grad_norm": 10.497210502624512, "learning_rate": 9.868300362790062e-05, "loss": 1.3457, "step": 1713 }, { "epoch": 0.1161325293041534, "grad_norm": 10.933736801147461, "learning_rate": 9.86816346088028e-05, "loss": 1.0931, "step": 1714 }, { "epoch": 0.11620028457212549, "grad_norm": 11.045526504516602, "learning_rate": 9.868026558970498e-05, "loss": 1.2338, "step": 1715 }, { "epoch": 0.11626803984009756, "grad_norm": 10.763197898864746, "learning_rate": 9.867889657060716e-05, "loss": 1.2325, "step": 1716 }, { "epoch": 0.11633579510806966, "grad_norm": 9.53070068359375, "learning_rate": 9.867752755150936e-05, "loss": 1.1268, "step": 1717 }, { "epoch": 0.11640355037604173, "grad_norm": 10.572071075439453, "learning_rate": 9.867615853241154e-05, "loss": 1.0118, "step": 1718 }, { "epoch": 0.11647130564401383, "grad_norm": 7.633776664733887, "learning_rate": 9.867478951331372e-05, "loss": 1.081, "step": 1719 }, { "epoch": 0.1165390609119859, "grad_norm": 8.915566444396973, "learning_rate": 9.86734204942159e-05, "loss": 1.1526, "step": 1720 }, { "epoch": 0.116606816179958, "grad_norm": 8.296719551086426, "learning_rate": 9.867205147511809e-05, "loss": 1.0571, "step": 1721 }, { "epoch": 0.11667457144793007, "grad_norm": 9.387847900390625, "learning_rate": 9.867068245602027e-05, "loss": 1.0447, "step": 1722 }, { "epoch": 0.11674232671590216, "grad_norm": 10.832168579101562, "learning_rate": 9.866931343692245e-05, "loss": 1.2601, "step": 1723 }, { "epoch": 0.11681008198387424, "grad_norm": 11.631181716918945, "learning_rate": 9.866794441782463e-05, "loss": 1.4507, "step": 1724 }, { "epoch": 0.11687783725184633, "grad_norm": 8.372645378112793, "learning_rate": 9.866657539872681e-05, "loss": 1.1191, "step": 1725 }, { "epoch": 0.11694559251981841, "grad_norm": 9.577934265136719, "learning_rate": 9.866520637962901e-05, "loss": 1.0336, "step": 1726 }, { "epoch": 0.1170133477877905, "grad_norm": 10.626317977905273, "learning_rate": 9.866383736053119e-05, "loss": 1.1595, "step": 1727 }, { "epoch": 0.11708110305576258, "grad_norm": 9.758353233337402, "learning_rate": 9.866246834143337e-05, "loss": 0.8803, "step": 1728 }, { "epoch": 0.11714885832373467, "grad_norm": 9.643457412719727, "learning_rate": 9.866109932233555e-05, "loss": 1.3213, "step": 1729 }, { "epoch": 0.11721661359170675, "grad_norm": 8.910233497619629, "learning_rate": 9.865973030323773e-05, "loss": 1.1123, "step": 1730 }, { "epoch": 0.11728436885967884, "grad_norm": 8.893741607666016, "learning_rate": 9.865836128413992e-05, "loss": 1.1059, "step": 1731 }, { "epoch": 0.11735212412765092, "grad_norm": 12.937616348266602, "learning_rate": 9.86569922650421e-05, "loss": 1.2488, "step": 1732 }, { "epoch": 0.11741987939562301, "grad_norm": 9.696173667907715, "learning_rate": 9.865562324594428e-05, "loss": 1.1076, "step": 1733 }, { "epoch": 0.11748763466359509, "grad_norm": 11.568793296813965, "learning_rate": 9.865425422684646e-05, "loss": 1.1023, "step": 1734 }, { "epoch": 0.11755538993156718, "grad_norm": 11.438271522521973, "learning_rate": 9.865288520774866e-05, "loss": 1.1014, "step": 1735 }, { "epoch": 0.11762314519953926, "grad_norm": 8.217238426208496, "learning_rate": 9.865151618865084e-05, "loss": 0.9566, "step": 1736 }, { "epoch": 0.11769090046751135, "grad_norm": 9.716591835021973, "learning_rate": 9.865014716955302e-05, "loss": 1.0478, "step": 1737 }, { "epoch": 0.11775865573548343, "grad_norm": 11.152593612670898, "learning_rate": 9.86487781504552e-05, "loss": 1.3552, "step": 1738 }, { "epoch": 0.11782641100345552, "grad_norm": 8.824272155761719, "learning_rate": 9.864740913135738e-05, "loss": 1.0463, "step": 1739 }, { "epoch": 0.1178941662714276, "grad_norm": 8.941548347473145, "learning_rate": 9.864604011225957e-05, "loss": 0.9552, "step": 1740 }, { "epoch": 0.11796192153939969, "grad_norm": 10.76177978515625, "learning_rate": 9.864467109316175e-05, "loss": 1.0076, "step": 1741 }, { "epoch": 0.11802967680737177, "grad_norm": 9.87431812286377, "learning_rate": 9.864330207406393e-05, "loss": 0.9905, "step": 1742 }, { "epoch": 0.11809743207534386, "grad_norm": 12.406320571899414, "learning_rate": 9.864193305496611e-05, "loss": 1.2839, "step": 1743 }, { "epoch": 0.11816518734331594, "grad_norm": 12.649428367614746, "learning_rate": 9.864056403586831e-05, "loss": 1.0806, "step": 1744 }, { "epoch": 0.11823294261128803, "grad_norm": 10.888890266418457, "learning_rate": 9.863919501677049e-05, "loss": 0.8805, "step": 1745 }, { "epoch": 0.11830069787926012, "grad_norm": 10.59262752532959, "learning_rate": 9.863782599767267e-05, "loss": 0.9977, "step": 1746 }, { "epoch": 0.1183684531472322, "grad_norm": 11.14206314086914, "learning_rate": 9.863645697857485e-05, "loss": 1.4569, "step": 1747 }, { "epoch": 0.11843620841520429, "grad_norm": 12.148449897766113, "learning_rate": 9.863508795947703e-05, "loss": 1.5222, "step": 1748 }, { "epoch": 0.11850396368317637, "grad_norm": 11.814643859863281, "learning_rate": 9.863371894037922e-05, "loss": 1.3888, "step": 1749 }, { "epoch": 0.11857171895114846, "grad_norm": 12.437151908874512, "learning_rate": 9.86323499212814e-05, "loss": 1.024, "step": 1750 }, { "epoch": 0.11863947421912054, "grad_norm": 9.314751625061035, "learning_rate": 9.863098090218358e-05, "loss": 1.064, "step": 1751 }, { "epoch": 0.11870722948709263, "grad_norm": 12.978782653808594, "learning_rate": 9.862961188308576e-05, "loss": 1.4048, "step": 1752 }, { "epoch": 0.1187749847550647, "grad_norm": 9.825428009033203, "learning_rate": 9.862824286398796e-05, "loss": 0.9631, "step": 1753 }, { "epoch": 0.1188427400230368, "grad_norm": 9.127273559570312, "learning_rate": 9.862687384489014e-05, "loss": 1.0364, "step": 1754 }, { "epoch": 0.11891049529100887, "grad_norm": 10.290020942687988, "learning_rate": 9.862550482579232e-05, "loss": 1.329, "step": 1755 }, { "epoch": 0.11897825055898097, "grad_norm": 9.023946762084961, "learning_rate": 9.862413580669451e-05, "loss": 1.186, "step": 1756 }, { "epoch": 0.11904600582695304, "grad_norm": 11.889911651611328, "learning_rate": 9.86227667875967e-05, "loss": 1.0338, "step": 1757 }, { "epoch": 0.11911376109492514, "grad_norm": 8.938654899597168, "learning_rate": 9.862139776849887e-05, "loss": 1.1933, "step": 1758 }, { "epoch": 0.11918151636289721, "grad_norm": 9.240127563476562, "learning_rate": 9.862002874940107e-05, "loss": 1.2336, "step": 1759 }, { "epoch": 0.1192492716308693, "grad_norm": 10.340953826904297, "learning_rate": 9.861865973030325e-05, "loss": 1.1097, "step": 1760 }, { "epoch": 0.11931702689884138, "grad_norm": 10.807507514953613, "learning_rate": 9.861729071120543e-05, "loss": 0.9127, "step": 1761 }, { "epoch": 0.11938478216681347, "grad_norm": 9.670829772949219, "learning_rate": 9.861592169210761e-05, "loss": 1.2296, "step": 1762 }, { "epoch": 0.11945253743478555, "grad_norm": 10.85981273651123, "learning_rate": 9.86145526730098e-05, "loss": 1.1341, "step": 1763 }, { "epoch": 0.11952029270275764, "grad_norm": 9.198482513427734, "learning_rate": 9.861318365391198e-05, "loss": 1.4397, "step": 1764 }, { "epoch": 0.11958804797072972, "grad_norm": 11.130392074584961, "learning_rate": 9.861181463481416e-05, "loss": 1.381, "step": 1765 }, { "epoch": 0.11965580323870181, "grad_norm": 9.568144798278809, "learning_rate": 9.861044561571634e-05, "loss": 1.0284, "step": 1766 }, { "epoch": 0.11972355850667389, "grad_norm": 9.049298286437988, "learning_rate": 9.860907659661854e-05, "loss": 1.0714, "step": 1767 }, { "epoch": 0.11979131377464598, "grad_norm": 8.948347091674805, "learning_rate": 9.860770757752072e-05, "loss": 1.0248, "step": 1768 }, { "epoch": 0.11985906904261806, "grad_norm": 10.836170196533203, "learning_rate": 9.86063385584229e-05, "loss": 1.0551, "step": 1769 }, { "epoch": 0.11992682431059015, "grad_norm": 11.044917106628418, "learning_rate": 9.860496953932508e-05, "loss": 1.1374, "step": 1770 }, { "epoch": 0.11999457957856223, "grad_norm": 10.975854873657227, "learning_rate": 9.860360052022726e-05, "loss": 1.2018, "step": 1771 }, { "epoch": 0.12006233484653432, "grad_norm": 9.752070426940918, "learning_rate": 9.860223150112945e-05, "loss": 1.0843, "step": 1772 }, { "epoch": 0.1201300901145064, "grad_norm": 9.662758827209473, "learning_rate": 9.860086248203163e-05, "loss": 0.8866, "step": 1773 }, { "epoch": 0.12019784538247849, "grad_norm": 11.268694877624512, "learning_rate": 9.859949346293381e-05, "loss": 1.2653, "step": 1774 }, { "epoch": 0.12026560065045057, "grad_norm": 9.201732635498047, "learning_rate": 9.8598124443836e-05, "loss": 1.1466, "step": 1775 }, { "epoch": 0.12033335591842266, "grad_norm": 12.577048301696777, "learning_rate": 9.859675542473819e-05, "loss": 1.2606, "step": 1776 }, { "epoch": 0.12040111118639474, "grad_norm": 10.051258087158203, "learning_rate": 9.859538640564037e-05, "loss": 1.1533, "step": 1777 }, { "epoch": 0.12046886645436683, "grad_norm": 9.358968734741211, "learning_rate": 9.859401738654255e-05, "loss": 1.0501, "step": 1778 }, { "epoch": 0.12053662172233891, "grad_norm": 11.277347564697266, "learning_rate": 9.859264836744473e-05, "loss": 0.9804, "step": 1779 }, { "epoch": 0.120604376990311, "grad_norm": 12.41299057006836, "learning_rate": 9.859127934834691e-05, "loss": 1.427, "step": 1780 }, { "epoch": 0.12067213225828308, "grad_norm": 10.081144332885742, "learning_rate": 9.85899103292491e-05, "loss": 1.1321, "step": 1781 }, { "epoch": 0.12073988752625517, "grad_norm": 11.469686508178711, "learning_rate": 9.858854131015128e-05, "loss": 1.1972, "step": 1782 }, { "epoch": 0.12080764279422725, "grad_norm": 8.608443260192871, "learning_rate": 9.858717229105346e-05, "loss": 0.9914, "step": 1783 }, { "epoch": 0.12087539806219934, "grad_norm": 11.673405647277832, "learning_rate": 9.858580327195564e-05, "loss": 1.3406, "step": 1784 }, { "epoch": 0.12094315333017142, "grad_norm": 9.533596992492676, "learning_rate": 9.858443425285782e-05, "loss": 1.2119, "step": 1785 }, { "epoch": 0.12101090859814351, "grad_norm": 7.792461395263672, "learning_rate": 9.858306523376002e-05, "loss": 1.167, "step": 1786 }, { "epoch": 0.12107866386611559, "grad_norm": 10.209978103637695, "learning_rate": 9.85816962146622e-05, "loss": 1.164, "step": 1787 }, { "epoch": 0.12114641913408768, "grad_norm": 11.591109275817871, "learning_rate": 9.858032719556438e-05, "loss": 1.2598, "step": 1788 }, { "epoch": 0.12121417440205975, "grad_norm": 10.562797546386719, "learning_rate": 9.857895817646656e-05, "loss": 1.3071, "step": 1789 }, { "epoch": 0.12128192967003185, "grad_norm": 9.419530868530273, "learning_rate": 9.857758915736875e-05, "loss": 0.9951, "step": 1790 }, { "epoch": 0.12134968493800392, "grad_norm": 9.525064468383789, "learning_rate": 9.857622013827093e-05, "loss": 1.1833, "step": 1791 }, { "epoch": 0.12141744020597602, "grad_norm": 9.140012741088867, "learning_rate": 9.857485111917311e-05, "loss": 1.0573, "step": 1792 }, { "epoch": 0.1214851954739481, "grad_norm": 10.88685417175293, "learning_rate": 9.85734821000753e-05, "loss": 1.1669, "step": 1793 }, { "epoch": 0.12155295074192018, "grad_norm": 8.63829231262207, "learning_rate": 9.857211308097747e-05, "loss": 1.1733, "step": 1794 }, { "epoch": 0.12162070600989226, "grad_norm": 9.90237808227539, "learning_rate": 9.857074406187967e-05, "loss": 0.9175, "step": 1795 }, { "epoch": 0.12168846127786435, "grad_norm": 10.068902969360352, "learning_rate": 9.856937504278185e-05, "loss": 0.9422, "step": 1796 }, { "epoch": 0.12175621654583643, "grad_norm": 7.482577800750732, "learning_rate": 9.856800602368403e-05, "loss": 1.0359, "step": 1797 }, { "epoch": 0.12182397181380852, "grad_norm": 10.847206115722656, "learning_rate": 9.856663700458621e-05, "loss": 1.2258, "step": 1798 }, { "epoch": 0.12189172708178062, "grad_norm": 11.122177124023438, "learning_rate": 9.85652679854884e-05, "loss": 1.1378, "step": 1799 }, { "epoch": 0.1219594823497527, "grad_norm": 9.994139671325684, "learning_rate": 9.856389896639058e-05, "loss": 1.2663, "step": 1800 }, { "epoch": 0.12202723761772478, "grad_norm": 10.687590599060059, "learning_rate": 9.856252994729276e-05, "loss": 1.1777, "step": 1801 }, { "epoch": 0.12209499288569686, "grad_norm": 8.403971672058105, "learning_rate": 9.856116092819494e-05, "loss": 0.9812, "step": 1802 }, { "epoch": 0.12216274815366895, "grad_norm": 9.77443790435791, "learning_rate": 9.855979190909714e-05, "loss": 1.0615, "step": 1803 }, { "epoch": 0.12223050342164103, "grad_norm": 10.772642135620117, "learning_rate": 9.855842288999932e-05, "loss": 1.1835, "step": 1804 }, { "epoch": 0.12229825868961312, "grad_norm": 10.509920120239258, "learning_rate": 9.85570538709015e-05, "loss": 1.0361, "step": 1805 }, { "epoch": 0.1223660139575852, "grad_norm": 11.14194393157959, "learning_rate": 9.855568485180369e-05, "loss": 0.9947, "step": 1806 }, { "epoch": 0.12243376922555729, "grad_norm": 11.018975257873535, "learning_rate": 9.855431583270587e-05, "loss": 1.2036, "step": 1807 }, { "epoch": 0.12250152449352937, "grad_norm": 12.099370002746582, "learning_rate": 9.855294681360805e-05, "loss": 1.2977, "step": 1808 }, { "epoch": 0.12256927976150146, "grad_norm": 8.841024398803711, "learning_rate": 9.855157779451025e-05, "loss": 1.2093, "step": 1809 }, { "epoch": 0.12263703502947354, "grad_norm": 9.149311065673828, "learning_rate": 9.855020877541243e-05, "loss": 1.0088, "step": 1810 }, { "epoch": 0.12270479029744563, "grad_norm": 10.53498649597168, "learning_rate": 9.854883975631461e-05, "loss": 0.9527, "step": 1811 }, { "epoch": 0.12277254556541771, "grad_norm": 9.230859756469727, "learning_rate": 9.854747073721679e-05, "loss": 0.9584, "step": 1812 }, { "epoch": 0.1228403008333898, "grad_norm": 11.702610969543457, "learning_rate": 9.854610171811898e-05, "loss": 1.2369, "step": 1813 }, { "epoch": 0.12290805610136188, "grad_norm": 9.912004470825195, "learning_rate": 9.854473269902116e-05, "loss": 1.2571, "step": 1814 }, { "epoch": 0.12297581136933397, "grad_norm": 10.071510314941406, "learning_rate": 9.854336367992334e-05, "loss": 1.1666, "step": 1815 }, { "epoch": 0.12304356663730605, "grad_norm": 10.642035484313965, "learning_rate": 9.854199466082552e-05, "loss": 1.0309, "step": 1816 }, { "epoch": 0.12311132190527814, "grad_norm": 11.135947227478027, "learning_rate": 9.85406256417277e-05, "loss": 1.1276, "step": 1817 }, { "epoch": 0.12317907717325022, "grad_norm": 11.575469017028809, "learning_rate": 9.85392566226299e-05, "loss": 1.1507, "step": 1818 }, { "epoch": 0.12324683244122231, "grad_norm": 10.470771789550781, "learning_rate": 9.853788760353208e-05, "loss": 1.3777, "step": 1819 }, { "epoch": 0.12331458770919439, "grad_norm": 9.77783489227295, "learning_rate": 9.853651858443426e-05, "loss": 1.0492, "step": 1820 }, { "epoch": 0.12338234297716648, "grad_norm": 10.132309913635254, "learning_rate": 9.853514956533644e-05, "loss": 1.1488, "step": 1821 }, { "epoch": 0.12345009824513856, "grad_norm": 11.159482955932617, "learning_rate": 9.853378054623863e-05, "loss": 1.2571, "step": 1822 }, { "epoch": 0.12351785351311065, "grad_norm": 11.599156379699707, "learning_rate": 9.853241152714081e-05, "loss": 1.1734, "step": 1823 }, { "epoch": 0.12358560878108273, "grad_norm": 10.278475761413574, "learning_rate": 9.853104250804299e-05, "loss": 1.0637, "step": 1824 }, { "epoch": 0.12365336404905482, "grad_norm": 12.126015663146973, "learning_rate": 9.852967348894517e-05, "loss": 1.0546, "step": 1825 }, { "epoch": 0.1237211193170269, "grad_norm": 12.094749450683594, "learning_rate": 9.852830446984735e-05, "loss": 1.374, "step": 1826 }, { "epoch": 0.12378887458499899, "grad_norm": 7.607821941375732, "learning_rate": 9.852693545074955e-05, "loss": 0.827, "step": 1827 }, { "epoch": 0.12385662985297106, "grad_norm": 12.086756706237793, "learning_rate": 9.852556643165173e-05, "loss": 1.1219, "step": 1828 }, { "epoch": 0.12392438512094316, "grad_norm": 10.215773582458496, "learning_rate": 9.852419741255391e-05, "loss": 1.0148, "step": 1829 }, { "epoch": 0.12399214038891523, "grad_norm": 10.438709259033203, "learning_rate": 9.852282839345609e-05, "loss": 1.0649, "step": 1830 }, { "epoch": 0.12405989565688733, "grad_norm": 10.564906120300293, "learning_rate": 9.852145937435828e-05, "loss": 1.4346, "step": 1831 }, { "epoch": 0.1241276509248594, "grad_norm": 9.83198356628418, "learning_rate": 9.852009035526046e-05, "loss": 0.847, "step": 1832 }, { "epoch": 0.1241954061928315, "grad_norm": 10.990026473999023, "learning_rate": 9.851872133616264e-05, "loss": 1.2358, "step": 1833 }, { "epoch": 0.12426316146080357, "grad_norm": 10.108382225036621, "learning_rate": 9.851735231706482e-05, "loss": 0.9943, "step": 1834 }, { "epoch": 0.12433091672877566, "grad_norm": 10.23820686340332, "learning_rate": 9.8515983297967e-05, "loss": 1.2464, "step": 1835 }, { "epoch": 0.12439867199674774, "grad_norm": 11.552473068237305, "learning_rate": 9.85146142788692e-05, "loss": 1.2175, "step": 1836 }, { "epoch": 0.12446642726471983, "grad_norm": 8.646978378295898, "learning_rate": 9.851324525977138e-05, "loss": 0.9975, "step": 1837 }, { "epoch": 0.12453418253269191, "grad_norm": 9.344541549682617, "learning_rate": 9.851187624067356e-05, "loss": 0.9463, "step": 1838 }, { "epoch": 0.124601937800664, "grad_norm": 12.645013809204102, "learning_rate": 9.851050722157574e-05, "loss": 1.0396, "step": 1839 }, { "epoch": 0.12466969306863608, "grad_norm": 10.093807220458984, "learning_rate": 9.850913820247792e-05, "loss": 1.2554, "step": 1840 }, { "epoch": 0.12473744833660817, "grad_norm": 8.792567253112793, "learning_rate": 9.850776918338011e-05, "loss": 0.9852, "step": 1841 }, { "epoch": 0.12480520360458025, "grad_norm": 10.448987007141113, "learning_rate": 9.85064001642823e-05, "loss": 1.168, "step": 1842 }, { "epoch": 0.12487295887255234, "grad_norm": 9.953516960144043, "learning_rate": 9.850503114518447e-05, "loss": 1.0116, "step": 1843 }, { "epoch": 0.12494071414052442, "grad_norm": 10.187036514282227, "learning_rate": 9.850366212608665e-05, "loss": 1.203, "step": 1844 }, { "epoch": 0.1250084694084965, "grad_norm": 8.890667915344238, "learning_rate": 9.850229310698885e-05, "loss": 1.0104, "step": 1845 }, { "epoch": 0.1250762246764686, "grad_norm": 11.916625022888184, "learning_rate": 9.850092408789103e-05, "loss": 1.0254, "step": 1846 }, { "epoch": 0.12514397994444068, "grad_norm": 10.934864044189453, "learning_rate": 9.849955506879321e-05, "loss": 0.975, "step": 1847 }, { "epoch": 0.12521173521241277, "grad_norm": 11.214954376220703, "learning_rate": 9.849818604969539e-05, "loss": 1.45, "step": 1848 }, { "epoch": 0.12527949048038484, "grad_norm": 8.801512718200684, "learning_rate": 9.849681703059758e-05, "loss": 1.0899, "step": 1849 }, { "epoch": 0.12534724574835693, "grad_norm": 10.340089797973633, "learning_rate": 9.849544801149976e-05, "loss": 1.1704, "step": 1850 }, { "epoch": 0.12541500101632902, "grad_norm": 9.204201698303223, "learning_rate": 9.849407899240194e-05, "loss": 1.0569, "step": 1851 }, { "epoch": 0.1254827562843011, "grad_norm": 9.43604564666748, "learning_rate": 9.849270997330414e-05, "loss": 1.0721, "step": 1852 }, { "epoch": 0.1255505115522732, "grad_norm": 10.110416412353516, "learning_rate": 9.849134095420632e-05, "loss": 1.1265, "step": 1853 }, { "epoch": 0.12561826682024527, "grad_norm": 10.630755424499512, "learning_rate": 9.84899719351085e-05, "loss": 1.1103, "step": 1854 }, { "epoch": 0.12568602208821736, "grad_norm": 10.888835906982422, "learning_rate": 9.848860291601069e-05, "loss": 1.347, "step": 1855 }, { "epoch": 0.12575377735618945, "grad_norm": 8.84623908996582, "learning_rate": 9.848723389691287e-05, "loss": 0.8531, "step": 1856 }, { "epoch": 0.12582153262416154, "grad_norm": 10.99928092956543, "learning_rate": 9.848586487781505e-05, "loss": 1.1747, "step": 1857 }, { "epoch": 0.1258892878921336, "grad_norm": 8.556151390075684, "learning_rate": 9.848449585871723e-05, "loss": 1.0207, "step": 1858 }, { "epoch": 0.1259570431601057, "grad_norm": 10.207547187805176, "learning_rate": 9.848312683961943e-05, "loss": 1.4782, "step": 1859 }, { "epoch": 0.1260247984280778, "grad_norm": 9.39505386352539, "learning_rate": 9.848175782052161e-05, "loss": 1.1415, "step": 1860 }, { "epoch": 0.12609255369604988, "grad_norm": 10.647768020629883, "learning_rate": 9.848038880142379e-05, "loss": 1.1439, "step": 1861 }, { "epoch": 0.12616030896402194, "grad_norm": 11.517707824707031, "learning_rate": 9.847901978232597e-05, "loss": 1.3181, "step": 1862 }, { "epoch": 0.12622806423199404, "grad_norm": 10.916093826293945, "learning_rate": 9.847765076322815e-05, "loss": 1.0708, "step": 1863 }, { "epoch": 0.12629581949996613, "grad_norm": 9.893363952636719, "learning_rate": 9.847628174413034e-05, "loss": 1.1932, "step": 1864 }, { "epoch": 0.12636357476793822, "grad_norm": 12.349174499511719, "learning_rate": 9.847491272503252e-05, "loss": 1.3161, "step": 1865 }, { "epoch": 0.12643133003591028, "grad_norm": 10.157081604003906, "learning_rate": 9.84735437059347e-05, "loss": 1.1325, "step": 1866 }, { "epoch": 0.12649908530388237, "grad_norm": 9.772073745727539, "learning_rate": 9.847217468683688e-05, "loss": 1.2739, "step": 1867 }, { "epoch": 0.12656684057185447, "grad_norm": 10.468371391296387, "learning_rate": 9.847080566773908e-05, "loss": 1.0279, "step": 1868 }, { "epoch": 0.12663459583982656, "grad_norm": 11.055033683776855, "learning_rate": 9.846943664864126e-05, "loss": 1.0584, "step": 1869 }, { "epoch": 0.12670235110779862, "grad_norm": 10.47987174987793, "learning_rate": 9.846806762954344e-05, "loss": 1.0385, "step": 1870 }, { "epoch": 0.1267701063757707, "grad_norm": 8.933801651000977, "learning_rate": 9.846669861044562e-05, "loss": 1.0259, "step": 1871 }, { "epoch": 0.1268378616437428, "grad_norm": 10.32228946685791, "learning_rate": 9.84653295913478e-05, "loss": 1.0472, "step": 1872 }, { "epoch": 0.1269056169117149, "grad_norm": 9.294051170349121, "learning_rate": 9.846396057224999e-05, "loss": 1.1638, "step": 1873 }, { "epoch": 0.12697337217968696, "grad_norm": 8.32187271118164, "learning_rate": 9.846259155315217e-05, "loss": 0.9865, "step": 1874 }, { "epoch": 0.12704112744765905, "grad_norm": 8.782264709472656, "learning_rate": 9.846122253405435e-05, "loss": 1.0868, "step": 1875 }, { "epoch": 0.12710888271563114, "grad_norm": 10.444697380065918, "learning_rate": 9.845985351495653e-05, "loss": 1.2137, "step": 1876 }, { "epoch": 0.12717663798360324, "grad_norm": 10.543922424316406, "learning_rate": 9.845848449585873e-05, "loss": 1.2337, "step": 1877 }, { "epoch": 0.1272443932515753, "grad_norm": 8.504612922668457, "learning_rate": 9.845711547676091e-05, "loss": 1.0234, "step": 1878 }, { "epoch": 0.1273121485195474, "grad_norm": 8.83178997039795, "learning_rate": 9.845574645766309e-05, "loss": 1.0611, "step": 1879 }, { "epoch": 0.12737990378751948, "grad_norm": 14.37713623046875, "learning_rate": 9.845437743856527e-05, "loss": 1.1704, "step": 1880 }, { "epoch": 0.12744765905549157, "grad_norm": 12.43575668334961, "learning_rate": 9.845300841946745e-05, "loss": 0.848, "step": 1881 }, { "epoch": 0.12751541432346364, "grad_norm": 11.625554084777832, "learning_rate": 9.845163940036964e-05, "loss": 1.2395, "step": 1882 }, { "epoch": 0.12758316959143573, "grad_norm": 7.8962082862854, "learning_rate": 9.845027038127182e-05, "loss": 0.9702, "step": 1883 }, { "epoch": 0.12765092485940782, "grad_norm": 11.689674377441406, "learning_rate": 9.8448901362174e-05, "loss": 1.4885, "step": 1884 }, { "epoch": 0.1277186801273799, "grad_norm": 11.037276268005371, "learning_rate": 9.844753234307618e-05, "loss": 1.1751, "step": 1885 }, { "epoch": 0.12778643539535198, "grad_norm": 11.115680694580078, "learning_rate": 9.844616332397836e-05, "loss": 1.2912, "step": 1886 }, { "epoch": 0.12785419066332407, "grad_norm": 10.553492546081543, "learning_rate": 9.844479430488056e-05, "loss": 1.1001, "step": 1887 }, { "epoch": 0.12792194593129616, "grad_norm": 9.92845630645752, "learning_rate": 9.844342528578274e-05, "loss": 1.1697, "step": 1888 }, { "epoch": 0.12798970119926825, "grad_norm": 9.33232307434082, "learning_rate": 9.844205626668492e-05, "loss": 1.0931, "step": 1889 }, { "epoch": 0.12805745646724032, "grad_norm": 12.35257339477539, "learning_rate": 9.84406872475871e-05, "loss": 0.9819, "step": 1890 }, { "epoch": 0.1281252117352124, "grad_norm": 9.913105010986328, "learning_rate": 9.843931822848929e-05, "loss": 1.2515, "step": 1891 }, { "epoch": 0.1281929670031845, "grad_norm": 11.218729019165039, "learning_rate": 9.843794920939147e-05, "loss": 1.0073, "step": 1892 }, { "epoch": 0.1282607222711566, "grad_norm": 9.97374153137207, "learning_rate": 9.843658019029365e-05, "loss": 1.0527, "step": 1893 }, { "epoch": 0.12832847753912865, "grad_norm": 8.837554931640625, "learning_rate": 9.843521117119583e-05, "loss": 1.0175, "step": 1894 }, { "epoch": 0.12839623280710075, "grad_norm": 9.207158088684082, "learning_rate": 9.843384215209803e-05, "loss": 1.0729, "step": 1895 }, { "epoch": 0.12846398807507284, "grad_norm": 10.960060119628906, "learning_rate": 9.843247313300021e-05, "loss": 1.0485, "step": 1896 }, { "epoch": 0.12853174334304493, "grad_norm": 9.724952697753906, "learning_rate": 9.843110411390239e-05, "loss": 1.2624, "step": 1897 }, { "epoch": 0.128599498611017, "grad_norm": 9.02813720703125, "learning_rate": 9.842973509480458e-05, "loss": 1.011, "step": 1898 }, { "epoch": 0.12866725387898909, "grad_norm": 8.507847785949707, "learning_rate": 9.842836607570676e-05, "loss": 0.9155, "step": 1899 }, { "epoch": 0.12873500914696118, "grad_norm": 10.609807968139648, "learning_rate": 9.842699705660894e-05, "loss": 1.1984, "step": 1900 }, { "epoch": 0.12880276441493327, "grad_norm": 10.144070625305176, "learning_rate": 9.842562803751114e-05, "loss": 1.2331, "step": 1901 }, { "epoch": 0.12887051968290533, "grad_norm": 11.719651222229004, "learning_rate": 9.842425901841332e-05, "loss": 1.2086, "step": 1902 }, { "epoch": 0.12893827495087742, "grad_norm": 10.689997673034668, "learning_rate": 9.84228899993155e-05, "loss": 1.3769, "step": 1903 }, { "epoch": 0.12900603021884952, "grad_norm": 10.7424955368042, "learning_rate": 9.842152098021768e-05, "loss": 1.1844, "step": 1904 }, { "epoch": 0.1290737854868216, "grad_norm": 11.207498550415039, "learning_rate": 9.842015196111987e-05, "loss": 1.2151, "step": 1905 }, { "epoch": 0.1291415407547937, "grad_norm": 10.689212799072266, "learning_rate": 9.841878294202205e-05, "loss": 1.1182, "step": 1906 }, { "epoch": 0.12920929602276576, "grad_norm": 9.708647727966309, "learning_rate": 9.841741392292423e-05, "loss": 1.2874, "step": 1907 }, { "epoch": 0.12927705129073785, "grad_norm": 9.809847831726074, "learning_rate": 9.841604490382641e-05, "loss": 1.1716, "step": 1908 }, { "epoch": 0.12934480655870995, "grad_norm": 8.170798301696777, "learning_rate": 9.84146758847286e-05, "loss": 1.1342, "step": 1909 }, { "epoch": 0.12941256182668204, "grad_norm": 9.12773609161377, "learning_rate": 9.841330686563079e-05, "loss": 1.1506, "step": 1910 }, { "epoch": 0.1294803170946541, "grad_norm": 13.67032241821289, "learning_rate": 9.841193784653297e-05, "loss": 1.2065, "step": 1911 }, { "epoch": 0.1295480723626262, "grad_norm": 9.35611629486084, "learning_rate": 9.841056882743515e-05, "loss": 1.281, "step": 1912 }, { "epoch": 0.12961582763059828, "grad_norm": 9.044548988342285, "learning_rate": 9.840919980833733e-05, "loss": 1.0285, "step": 1913 }, { "epoch": 0.12968358289857038, "grad_norm": 9.955796241760254, "learning_rate": 9.840783078923952e-05, "loss": 1.1742, "step": 1914 }, { "epoch": 0.12975133816654244, "grad_norm": 10.456665992736816, "learning_rate": 9.84064617701417e-05, "loss": 1.3452, "step": 1915 }, { "epoch": 0.12981909343451453, "grad_norm": 10.861869812011719, "learning_rate": 9.840509275104388e-05, "loss": 1.0397, "step": 1916 }, { "epoch": 0.12988684870248662, "grad_norm": 8.546277046203613, "learning_rate": 9.840372373194606e-05, "loss": 0.8762, "step": 1917 }, { "epoch": 0.12995460397045872, "grad_norm": 9.103103637695312, "learning_rate": 9.840235471284824e-05, "loss": 1.0998, "step": 1918 }, { "epoch": 0.13002235923843078, "grad_norm": 9.794631958007812, "learning_rate": 9.840098569375044e-05, "loss": 1.0644, "step": 1919 }, { "epoch": 0.13009011450640287, "grad_norm": 8.702750205993652, "learning_rate": 9.839961667465262e-05, "loss": 1.108, "step": 1920 }, { "epoch": 0.13015786977437496, "grad_norm": 11.108800888061523, "learning_rate": 9.83982476555548e-05, "loss": 1.375, "step": 1921 }, { "epoch": 0.13022562504234705, "grad_norm": 11.253337860107422, "learning_rate": 9.839687863645698e-05, "loss": 1.0246, "step": 1922 }, { "epoch": 0.13029338031031912, "grad_norm": 9.817541122436523, "learning_rate": 9.839550961735917e-05, "loss": 1.0222, "step": 1923 }, { "epoch": 0.1303611355782912, "grad_norm": 9.378199577331543, "learning_rate": 9.839414059826135e-05, "loss": 1.0387, "step": 1924 }, { "epoch": 0.1304288908462633, "grad_norm": 8.789336204528809, "learning_rate": 9.839277157916353e-05, "loss": 1.0635, "step": 1925 }, { "epoch": 0.1304966461142354, "grad_norm": 11.900403022766113, "learning_rate": 9.839140256006571e-05, "loss": 1.1224, "step": 1926 }, { "epoch": 0.13056440138220746, "grad_norm": 10.007912635803223, "learning_rate": 9.83900335409679e-05, "loss": 0.8988, "step": 1927 }, { "epoch": 0.13063215665017955, "grad_norm": 10.157328605651855, "learning_rate": 9.838866452187009e-05, "loss": 0.952, "step": 1928 }, { "epoch": 0.13069991191815164, "grad_norm": 8.763729095458984, "learning_rate": 9.838729550277227e-05, "loss": 0.9304, "step": 1929 }, { "epoch": 0.13076766718612373, "grad_norm": 10.75432300567627, "learning_rate": 9.838592648367445e-05, "loss": 1.0438, "step": 1930 }, { "epoch": 0.1308354224540958, "grad_norm": 8.840702056884766, "learning_rate": 9.838455746457663e-05, "loss": 0.9164, "step": 1931 }, { "epoch": 0.1309031777220679, "grad_norm": 9.526811599731445, "learning_rate": 9.838318844547882e-05, "loss": 1.2181, "step": 1932 }, { "epoch": 0.13097093299003998, "grad_norm": 12.827199935913086, "learning_rate": 9.8381819426381e-05, "loss": 1.223, "step": 1933 }, { "epoch": 0.13103868825801207, "grad_norm": 9.47105884552002, "learning_rate": 9.838045040728318e-05, "loss": 1.1014, "step": 1934 }, { "epoch": 0.13110644352598413, "grad_norm": 9.044878959655762, "learning_rate": 9.837908138818536e-05, "loss": 1.1451, "step": 1935 }, { "epoch": 0.13117419879395623, "grad_norm": 9.24599838256836, "learning_rate": 9.837771236908754e-05, "loss": 1.1144, "step": 1936 }, { "epoch": 0.13124195406192832, "grad_norm": 12.543252944946289, "learning_rate": 9.837634334998974e-05, "loss": 1.1233, "step": 1937 }, { "epoch": 0.1313097093299004, "grad_norm": 10.022245407104492, "learning_rate": 9.837497433089192e-05, "loss": 1.3569, "step": 1938 }, { "epoch": 0.13137746459787247, "grad_norm": 11.967863082885742, "learning_rate": 9.83736053117941e-05, "loss": 1.2086, "step": 1939 }, { "epoch": 0.13144521986584456, "grad_norm": 9.64406967163086, "learning_rate": 9.837223629269628e-05, "loss": 1.1647, "step": 1940 }, { "epoch": 0.13151297513381666, "grad_norm": 8.538762092590332, "learning_rate": 9.837086727359847e-05, "loss": 1.012, "step": 1941 }, { "epoch": 0.13158073040178875, "grad_norm": 10.889129638671875, "learning_rate": 9.836949825450065e-05, "loss": 1.1559, "step": 1942 }, { "epoch": 0.1316484856697608, "grad_norm": 8.407093048095703, "learning_rate": 9.836812923540283e-05, "loss": 0.9624, "step": 1943 }, { "epoch": 0.1317162409377329, "grad_norm": 9.175569534301758, "learning_rate": 9.836676021630503e-05, "loss": 0.9814, "step": 1944 }, { "epoch": 0.131783996205705, "grad_norm": 11.240396499633789, "learning_rate": 9.836539119720721e-05, "loss": 1.3059, "step": 1945 }, { "epoch": 0.1318517514736771, "grad_norm": 8.791098594665527, "learning_rate": 9.836402217810939e-05, "loss": 0.9891, "step": 1946 }, { "epoch": 0.13191950674164915, "grad_norm": 7.401971340179443, "learning_rate": 9.836265315901158e-05, "loss": 0.9706, "step": 1947 }, { "epoch": 0.13198726200962124, "grad_norm": 13.127768516540527, "learning_rate": 9.836128413991376e-05, "loss": 0.8924, "step": 1948 }, { "epoch": 0.13205501727759333, "grad_norm": 9.805618286132812, "learning_rate": 9.835991512081594e-05, "loss": 0.9969, "step": 1949 }, { "epoch": 0.13212277254556543, "grad_norm": 10.500420570373535, "learning_rate": 9.835854610171812e-05, "loss": 1.1045, "step": 1950 }, { "epoch": 0.1321905278135375, "grad_norm": 10.043769836425781, "learning_rate": 9.835717708262032e-05, "loss": 1.2475, "step": 1951 }, { "epoch": 0.13225828308150958, "grad_norm": 10.6277437210083, "learning_rate": 9.83558080635225e-05, "loss": 0.9285, "step": 1952 }, { "epoch": 0.13232603834948167, "grad_norm": 9.011415481567383, "learning_rate": 9.835443904442468e-05, "loss": 1.1895, "step": 1953 }, { "epoch": 0.13239379361745376, "grad_norm": 11.031267166137695, "learning_rate": 9.835307002532686e-05, "loss": 1.1781, "step": 1954 }, { "epoch": 0.13246154888542583, "grad_norm": 9.960331916809082, "learning_rate": 9.835170100622905e-05, "loss": 1.0972, "step": 1955 }, { "epoch": 0.13252930415339792, "grad_norm": 9.549619674682617, "learning_rate": 9.835033198713123e-05, "loss": 1.0918, "step": 1956 }, { "epoch": 0.13259705942137, "grad_norm": 9.780478477478027, "learning_rate": 9.834896296803341e-05, "loss": 1.1491, "step": 1957 }, { "epoch": 0.1326648146893421, "grad_norm": 8.948554992675781, "learning_rate": 9.834759394893559e-05, "loss": 1.0969, "step": 1958 }, { "epoch": 0.1327325699573142, "grad_norm": 9.165532112121582, "learning_rate": 9.834622492983777e-05, "loss": 0.9294, "step": 1959 }, { "epoch": 0.13280032522528626, "grad_norm": 8.738619804382324, "learning_rate": 9.834485591073997e-05, "loss": 1.3074, "step": 1960 }, { "epoch": 0.13286808049325835, "grad_norm": 9.442314147949219, "learning_rate": 9.834348689164215e-05, "loss": 1.1238, "step": 1961 }, { "epoch": 0.13293583576123044, "grad_norm": 12.411934852600098, "learning_rate": 9.834211787254433e-05, "loss": 1.1405, "step": 1962 }, { "epoch": 0.13300359102920253, "grad_norm": 9.911120414733887, "learning_rate": 9.834074885344651e-05, "loss": 1.2197, "step": 1963 }, { "epoch": 0.1330713462971746, "grad_norm": 9.619095802307129, "learning_rate": 9.83393798343487e-05, "loss": 1.0526, "step": 1964 }, { "epoch": 0.1331391015651467, "grad_norm": 10.163374900817871, "learning_rate": 9.833801081525088e-05, "loss": 0.8374, "step": 1965 }, { "epoch": 0.13320685683311878, "grad_norm": 9.342517852783203, "learning_rate": 9.833664179615306e-05, "loss": 1.1908, "step": 1966 }, { "epoch": 0.13327461210109087, "grad_norm": 10.419418334960938, "learning_rate": 9.833527277705524e-05, "loss": 1.1175, "step": 1967 }, { "epoch": 0.13334236736906294, "grad_norm": 9.5196533203125, "learning_rate": 9.833390375795742e-05, "loss": 1.2347, "step": 1968 }, { "epoch": 0.13341012263703503, "grad_norm": 9.242755889892578, "learning_rate": 9.833253473885962e-05, "loss": 1.2401, "step": 1969 }, { "epoch": 0.13347787790500712, "grad_norm": 10.243762969970703, "learning_rate": 9.83311657197618e-05, "loss": 1.3492, "step": 1970 }, { "epoch": 0.1335456331729792, "grad_norm": 9.169745445251465, "learning_rate": 9.832979670066398e-05, "loss": 0.9533, "step": 1971 }, { "epoch": 0.13361338844095128, "grad_norm": 10.292695999145508, "learning_rate": 9.832842768156616e-05, "loss": 1.2216, "step": 1972 }, { "epoch": 0.13368114370892337, "grad_norm": 9.25019645690918, "learning_rate": 9.832705866246834e-05, "loss": 1.1613, "step": 1973 }, { "epoch": 0.13374889897689546, "grad_norm": 8.518020629882812, "learning_rate": 9.832568964337053e-05, "loss": 1.0767, "step": 1974 }, { "epoch": 0.13381665424486755, "grad_norm": 11.824376106262207, "learning_rate": 9.832432062427271e-05, "loss": 1.1884, "step": 1975 }, { "epoch": 0.13388440951283961, "grad_norm": 10.544015884399414, "learning_rate": 9.832295160517489e-05, "loss": 0.9675, "step": 1976 }, { "epoch": 0.1339521647808117, "grad_norm": 9.495721817016602, "learning_rate": 9.832158258607707e-05, "loss": 1.0586, "step": 1977 }, { "epoch": 0.1340199200487838, "grad_norm": 10.378434181213379, "learning_rate": 9.832021356697927e-05, "loss": 1.176, "step": 1978 }, { "epoch": 0.1340876753167559, "grad_norm": 10.026887893676758, "learning_rate": 9.831884454788145e-05, "loss": 1.035, "step": 1979 }, { "epoch": 0.13415543058472795, "grad_norm": 8.878249168395996, "learning_rate": 9.831747552878363e-05, "loss": 0.8441, "step": 1980 }, { "epoch": 0.13422318585270004, "grad_norm": 9.637616157531738, "learning_rate": 9.831610650968581e-05, "loss": 1.1435, "step": 1981 }, { "epoch": 0.13429094112067214, "grad_norm": 10.153711318969727, "learning_rate": 9.831473749058799e-05, "loss": 1.0833, "step": 1982 }, { "epoch": 0.13435869638864423, "grad_norm": 8.48596477508545, "learning_rate": 9.831336847149018e-05, "loss": 0.7752, "step": 1983 }, { "epoch": 0.1344264516566163, "grad_norm": 8.454994201660156, "learning_rate": 9.831199945239236e-05, "loss": 0.8657, "step": 1984 }, { "epoch": 0.13449420692458838, "grad_norm": 10.607659339904785, "learning_rate": 9.831063043329454e-05, "loss": 1.1836, "step": 1985 }, { "epoch": 0.13456196219256047, "grad_norm": 8.560189247131348, "learning_rate": 9.830926141419672e-05, "loss": 0.7877, "step": 1986 }, { "epoch": 0.13462971746053257, "grad_norm": 10.687662124633789, "learning_rate": 9.830789239509892e-05, "loss": 1.1586, "step": 1987 }, { "epoch": 0.13469747272850463, "grad_norm": 9.725050926208496, "learning_rate": 9.83065233760011e-05, "loss": 1.214, "step": 1988 }, { "epoch": 0.13476522799647672, "grad_norm": 9.808280944824219, "learning_rate": 9.830515435690328e-05, "loss": 1.0532, "step": 1989 }, { "epoch": 0.1348329832644488, "grad_norm": 8.838544845581055, "learning_rate": 9.830378533780547e-05, "loss": 1.0872, "step": 1990 }, { "epoch": 0.1349007385324209, "grad_norm": 12.043976783752441, "learning_rate": 9.830241631870765e-05, "loss": 1.2248, "step": 1991 }, { "epoch": 0.13496849380039297, "grad_norm": 10.044602394104004, "learning_rate": 9.830104729960983e-05, "loss": 0.9746, "step": 1992 }, { "epoch": 0.13503624906836506, "grad_norm": 10.861641883850098, "learning_rate": 9.829967828051203e-05, "loss": 0.9868, "step": 1993 }, { "epoch": 0.13510400433633715, "grad_norm": 9.005234718322754, "learning_rate": 9.82983092614142e-05, "loss": 1.0619, "step": 1994 }, { "epoch": 0.13517175960430924, "grad_norm": 9.902874946594238, "learning_rate": 9.829694024231639e-05, "loss": 1.3176, "step": 1995 }, { "epoch": 0.1352395148722813, "grad_norm": 12.133747100830078, "learning_rate": 9.829557122321857e-05, "loss": 1.3634, "step": 1996 }, { "epoch": 0.1353072701402534, "grad_norm": 9.571062088012695, "learning_rate": 9.829420220412076e-05, "loss": 1.139, "step": 1997 }, { "epoch": 0.1353750254082255, "grad_norm": 8.269545555114746, "learning_rate": 9.829283318502294e-05, "loss": 1.1422, "step": 1998 }, { "epoch": 0.13544278067619758, "grad_norm": 9.963309288024902, "learning_rate": 9.829146416592512e-05, "loss": 1.0612, "step": 1999 }, { "epoch": 0.13551053594416965, "grad_norm": 7.434775352478027, "learning_rate": 9.82900951468273e-05, "loss": 1.0418, "step": 2000 }, { "epoch": 0.13557829121214174, "grad_norm": 8.89494800567627, "learning_rate": 9.82887261277295e-05, "loss": 1.0677, "step": 2001 }, { "epoch": 0.13564604648011383, "grad_norm": 9.349754333496094, "learning_rate": 9.828735710863168e-05, "loss": 0.8932, "step": 2002 }, { "epoch": 0.13571380174808592, "grad_norm": 7.574460506439209, "learning_rate": 9.828598808953386e-05, "loss": 0.7652, "step": 2003 }, { "epoch": 0.13578155701605799, "grad_norm": 10.191296577453613, "learning_rate": 9.828461907043604e-05, "loss": 0.9881, "step": 2004 }, { "epoch": 0.13584931228403008, "grad_norm": 11.235671043395996, "learning_rate": 9.828325005133822e-05, "loss": 1.1942, "step": 2005 }, { "epoch": 0.13591706755200217, "grad_norm": 9.97700023651123, "learning_rate": 9.828188103224041e-05, "loss": 1.154, "step": 2006 }, { "epoch": 0.13598482281997426, "grad_norm": 8.283380508422852, "learning_rate": 9.828051201314259e-05, "loss": 0.8207, "step": 2007 }, { "epoch": 0.13605257808794632, "grad_norm": 9.147144317626953, "learning_rate": 9.827914299404477e-05, "loss": 1.0751, "step": 2008 }, { "epoch": 0.13612033335591842, "grad_norm": 9.99758529663086, "learning_rate": 9.827777397494695e-05, "loss": 1.1166, "step": 2009 }, { "epoch": 0.1361880886238905, "grad_norm": 8.340705871582031, "learning_rate": 9.827640495584915e-05, "loss": 1.0311, "step": 2010 }, { "epoch": 0.1362558438918626, "grad_norm": 10.536659240722656, "learning_rate": 9.827503593675133e-05, "loss": 1.1704, "step": 2011 }, { "epoch": 0.1363235991598347, "grad_norm": 9.01259994506836, "learning_rate": 9.827366691765351e-05, "loss": 1.0531, "step": 2012 }, { "epoch": 0.13639135442780675, "grad_norm": 10.476259231567383, "learning_rate": 9.827229789855569e-05, "loss": 1.053, "step": 2013 }, { "epoch": 0.13645910969577885, "grad_norm": 8.496943473815918, "learning_rate": 9.827092887945787e-05, "loss": 0.836, "step": 2014 }, { "epoch": 0.13652686496375094, "grad_norm": 11.638651847839355, "learning_rate": 9.826955986036006e-05, "loss": 1.0886, "step": 2015 }, { "epoch": 0.13659462023172303, "grad_norm": 7.483241558074951, "learning_rate": 9.826819084126224e-05, "loss": 1.0406, "step": 2016 }, { "epoch": 0.1366623754996951, "grad_norm": 8.648175239562988, "learning_rate": 9.826682182216442e-05, "loss": 1.0442, "step": 2017 }, { "epoch": 0.13673013076766719, "grad_norm": 8.593596458435059, "learning_rate": 9.82654528030666e-05, "loss": 1.1184, "step": 2018 }, { "epoch": 0.13679788603563928, "grad_norm": 8.563817977905273, "learning_rate": 9.826408378396878e-05, "loss": 0.8645, "step": 2019 }, { "epoch": 0.13686564130361137, "grad_norm": 7.812311172485352, "learning_rate": 9.826271476487098e-05, "loss": 1.0605, "step": 2020 }, { "epoch": 0.13693339657158343, "grad_norm": 10.97168254852295, "learning_rate": 9.826134574577316e-05, "loss": 1.1474, "step": 2021 }, { "epoch": 0.13700115183955552, "grad_norm": 7.749011993408203, "learning_rate": 9.825997672667534e-05, "loss": 0.9836, "step": 2022 }, { "epoch": 0.13706890710752762, "grad_norm": 11.088539123535156, "learning_rate": 9.825860770757752e-05, "loss": 1.1456, "step": 2023 }, { "epoch": 0.1371366623754997, "grad_norm": 10.960288047790527, "learning_rate": 9.825723868847971e-05, "loss": 1.4315, "step": 2024 }, { "epoch": 0.13720441764347177, "grad_norm": 10.804458618164062, "learning_rate": 9.825586966938189e-05, "loss": 1.0388, "step": 2025 }, { "epoch": 0.13727217291144386, "grad_norm": 7.906947612762451, "learning_rate": 9.825450065028407e-05, "loss": 1.1306, "step": 2026 }, { "epoch": 0.13733992817941595, "grad_norm": 10.404504776000977, "learning_rate": 9.825313163118625e-05, "loss": 1.0907, "step": 2027 }, { "epoch": 0.13740768344738805, "grad_norm": 9.684488296508789, "learning_rate": 9.825176261208843e-05, "loss": 1.1007, "step": 2028 }, { "epoch": 0.1374754387153601, "grad_norm": 8.345703125, "learning_rate": 9.825039359299063e-05, "loss": 0.9342, "step": 2029 }, { "epoch": 0.1375431939833322, "grad_norm": 8.513103485107422, "learning_rate": 9.824902457389281e-05, "loss": 1.1139, "step": 2030 }, { "epoch": 0.1376109492513043, "grad_norm": 8.675403594970703, "learning_rate": 9.824765555479499e-05, "loss": 0.8499, "step": 2031 }, { "epoch": 0.13767870451927638, "grad_norm": 10.550504684448242, "learning_rate": 9.824628653569717e-05, "loss": 1.2563, "step": 2032 }, { "epoch": 0.13774645978724845, "grad_norm": 7.775172710418701, "learning_rate": 9.824491751659936e-05, "loss": 1.0394, "step": 2033 }, { "epoch": 0.13781421505522054, "grad_norm": 10.057134628295898, "learning_rate": 9.824354849750154e-05, "loss": 1.337, "step": 2034 }, { "epoch": 0.13788197032319263, "grad_norm": 10.658480644226074, "learning_rate": 9.824217947840372e-05, "loss": 1.1675, "step": 2035 }, { "epoch": 0.13794972559116472, "grad_norm": 10.499738693237305, "learning_rate": 9.82408104593059e-05, "loss": 1.1397, "step": 2036 }, { "epoch": 0.1380174808591368, "grad_norm": 12.853378295898438, "learning_rate": 9.82394414402081e-05, "loss": 1.2373, "step": 2037 }, { "epoch": 0.13808523612710888, "grad_norm": 8.66174602508545, "learning_rate": 9.823807242111028e-05, "loss": 0.9863, "step": 2038 }, { "epoch": 0.13815299139508097, "grad_norm": 10.246259689331055, "learning_rate": 9.823670340201246e-05, "loss": 1.0892, "step": 2039 }, { "epoch": 0.13822074666305306, "grad_norm": 11.880420684814453, "learning_rate": 9.823533438291465e-05, "loss": 1.069, "step": 2040 }, { "epoch": 0.13828850193102513, "grad_norm": 9.475419998168945, "learning_rate": 9.823396536381683e-05, "loss": 0.9276, "step": 2041 }, { "epoch": 0.13835625719899722, "grad_norm": 9.827219009399414, "learning_rate": 9.823259634471903e-05, "loss": 1.2536, "step": 2042 }, { "epoch": 0.1384240124669693, "grad_norm": 8.558990478515625, "learning_rate": 9.82312273256212e-05, "loss": 1.2779, "step": 2043 }, { "epoch": 0.1384917677349414, "grad_norm": 12.038803100585938, "learning_rate": 9.822985830652339e-05, "loss": 0.9019, "step": 2044 }, { "epoch": 0.13855952300291347, "grad_norm": 10.764846801757812, "learning_rate": 9.822848928742557e-05, "loss": 1.1803, "step": 2045 }, { "epoch": 0.13862727827088556, "grad_norm": 10.788616180419922, "learning_rate": 9.822712026832775e-05, "loss": 1.3469, "step": 2046 }, { "epoch": 0.13869503353885765, "grad_norm": 8.718049049377441, "learning_rate": 9.822575124922994e-05, "loss": 0.9135, "step": 2047 }, { "epoch": 0.13876278880682974, "grad_norm": 12.245726585388184, "learning_rate": 9.822438223013212e-05, "loss": 1.3114, "step": 2048 }, { "epoch": 0.1388305440748018, "grad_norm": 10.55252456665039, "learning_rate": 9.82230132110343e-05, "loss": 1.1303, "step": 2049 }, { "epoch": 0.1388982993427739, "grad_norm": 8.177289962768555, "learning_rate": 9.822164419193648e-05, "loss": 0.9796, "step": 2050 }, { "epoch": 0.138966054610746, "grad_norm": 10.708680152893066, "learning_rate": 9.822027517283866e-05, "loss": 1.1546, "step": 2051 }, { "epoch": 0.13903380987871808, "grad_norm": 8.631631851196289, "learning_rate": 9.821890615374086e-05, "loss": 1.0055, "step": 2052 }, { "epoch": 0.13910156514669014, "grad_norm": 8.778770446777344, "learning_rate": 9.821753713464304e-05, "loss": 0.9626, "step": 2053 }, { "epoch": 0.13916932041466223, "grad_norm": 10.004143714904785, "learning_rate": 9.821616811554522e-05, "loss": 1.1411, "step": 2054 }, { "epoch": 0.13923707568263433, "grad_norm": 9.47324275970459, "learning_rate": 9.82147990964474e-05, "loss": 1.0081, "step": 2055 }, { "epoch": 0.13930483095060642, "grad_norm": 10.014934539794922, "learning_rate": 9.821343007734959e-05, "loss": 1.3015, "step": 2056 }, { "epoch": 0.13937258621857848, "grad_norm": 10.589959144592285, "learning_rate": 9.821206105825177e-05, "loss": 1.128, "step": 2057 }, { "epoch": 0.13944034148655057, "grad_norm": 9.273834228515625, "learning_rate": 9.821069203915395e-05, "loss": 0.7743, "step": 2058 }, { "epoch": 0.13950809675452266, "grad_norm": 10.72019100189209, "learning_rate": 9.820932302005613e-05, "loss": 0.9524, "step": 2059 }, { "epoch": 0.13957585202249476, "grad_norm": 11.212404251098633, "learning_rate": 9.820795400095831e-05, "loss": 1.2044, "step": 2060 }, { "epoch": 0.13964360729046682, "grad_norm": 9.800296783447266, "learning_rate": 9.82065849818605e-05, "loss": 1.0819, "step": 2061 }, { "epoch": 0.1397113625584389, "grad_norm": 8.668676376342773, "learning_rate": 9.820521596276269e-05, "loss": 1.0778, "step": 2062 }, { "epoch": 0.139779117826411, "grad_norm": 10.854613304138184, "learning_rate": 9.820384694366487e-05, "loss": 1.1181, "step": 2063 }, { "epoch": 0.1398468730943831, "grad_norm": 12.019871711730957, "learning_rate": 9.820247792456705e-05, "loss": 1.2507, "step": 2064 }, { "epoch": 0.1399146283623552, "grad_norm": 8.835234642028809, "learning_rate": 9.820110890546924e-05, "loss": 1.1338, "step": 2065 }, { "epoch": 0.13998238363032725, "grad_norm": 8.57636547088623, "learning_rate": 9.819973988637142e-05, "loss": 0.957, "step": 2066 }, { "epoch": 0.14005013889829934, "grad_norm": 9.080939292907715, "learning_rate": 9.81983708672736e-05, "loss": 1.0035, "step": 2067 }, { "epoch": 0.14011789416627143, "grad_norm": 9.616862297058105, "learning_rate": 9.819700184817578e-05, "loss": 0.9045, "step": 2068 }, { "epoch": 0.14018564943424353, "grad_norm": 10.04652214050293, "learning_rate": 9.819563282907796e-05, "loss": 1.1202, "step": 2069 }, { "epoch": 0.1402534047022156, "grad_norm": 9.132543563842773, "learning_rate": 9.819426380998016e-05, "loss": 0.9918, "step": 2070 }, { "epoch": 0.14032115997018768, "grad_norm": 12.023603439331055, "learning_rate": 9.819289479088234e-05, "loss": 1.1271, "step": 2071 }, { "epoch": 0.14038891523815977, "grad_norm": 9.167064666748047, "learning_rate": 9.819152577178452e-05, "loss": 0.9136, "step": 2072 }, { "epoch": 0.14045667050613186, "grad_norm": 11.954336166381836, "learning_rate": 9.81901567526867e-05, "loss": 1.0073, "step": 2073 }, { "epoch": 0.14052442577410393, "grad_norm": 9.166129112243652, "learning_rate": 9.818878773358888e-05, "loss": 1.1101, "step": 2074 }, { "epoch": 0.14059218104207602, "grad_norm": 8.53992748260498, "learning_rate": 9.818741871449107e-05, "loss": 0.9556, "step": 2075 }, { "epoch": 0.1406599363100481, "grad_norm": 10.769463539123535, "learning_rate": 9.818604969539325e-05, "loss": 1.3009, "step": 2076 }, { "epoch": 0.1407276915780202, "grad_norm": 10.938043594360352, "learning_rate": 9.818468067629543e-05, "loss": 1.4299, "step": 2077 }, { "epoch": 0.14079544684599227, "grad_norm": 10.8585844039917, "learning_rate": 9.818331165719761e-05, "loss": 1.0804, "step": 2078 }, { "epoch": 0.14086320211396436, "grad_norm": 8.763557434082031, "learning_rate": 9.81819426380998e-05, "loss": 0.8321, "step": 2079 }, { "epoch": 0.14093095738193645, "grad_norm": 11.917708396911621, "learning_rate": 9.818057361900199e-05, "loss": 1.4293, "step": 2080 }, { "epoch": 0.14099871264990854, "grad_norm": 10.189987182617188, "learning_rate": 9.817920459990417e-05, "loss": 1.139, "step": 2081 }, { "epoch": 0.1410664679178806, "grad_norm": 11.09156608581543, "learning_rate": 9.817783558080635e-05, "loss": 0.9866, "step": 2082 }, { "epoch": 0.1411342231858527, "grad_norm": 9.394566535949707, "learning_rate": 9.817646656170854e-05, "loss": 0.9401, "step": 2083 }, { "epoch": 0.1412019784538248, "grad_norm": 8.232394218444824, "learning_rate": 9.817509754261072e-05, "loss": 0.9193, "step": 2084 }, { "epoch": 0.14126973372179688, "grad_norm": 9.4952392578125, "learning_rate": 9.81737285235129e-05, "loss": 1.3217, "step": 2085 }, { "epoch": 0.14133748898976894, "grad_norm": 10.110014915466309, "learning_rate": 9.81723595044151e-05, "loss": 1.0413, "step": 2086 }, { "epoch": 0.14140524425774104, "grad_norm": 10.850225448608398, "learning_rate": 9.817099048531728e-05, "loss": 1.3722, "step": 2087 }, { "epoch": 0.14147299952571313, "grad_norm": 10.924457550048828, "learning_rate": 9.816962146621946e-05, "loss": 1.4004, "step": 2088 }, { "epoch": 0.14154075479368522, "grad_norm": 10.022381782531738, "learning_rate": 9.816825244712165e-05, "loss": 1.1989, "step": 2089 }, { "epoch": 0.14160851006165728, "grad_norm": 11.537693977355957, "learning_rate": 9.816688342802383e-05, "loss": 1.1922, "step": 2090 }, { "epoch": 0.14167626532962938, "grad_norm": 8.525372505187988, "learning_rate": 9.816551440892601e-05, "loss": 0.9291, "step": 2091 }, { "epoch": 0.14174402059760147, "grad_norm": 8.972722053527832, "learning_rate": 9.816414538982819e-05, "loss": 1.0917, "step": 2092 }, { "epoch": 0.14181177586557356, "grad_norm": 8.386235237121582, "learning_rate": 9.816277637073039e-05, "loss": 1.0482, "step": 2093 }, { "epoch": 0.14187953113354562, "grad_norm": 9.620473861694336, "learning_rate": 9.816140735163257e-05, "loss": 1.2523, "step": 2094 }, { "epoch": 0.1419472864015177, "grad_norm": 9.236804962158203, "learning_rate": 9.816003833253475e-05, "loss": 0.9474, "step": 2095 }, { "epoch": 0.1420150416694898, "grad_norm": 9.06925106048584, "learning_rate": 9.815866931343693e-05, "loss": 0.8921, "step": 2096 }, { "epoch": 0.1420827969374619, "grad_norm": 10.127729415893555, "learning_rate": 9.815730029433912e-05, "loss": 1.1693, "step": 2097 }, { "epoch": 0.14215055220543396, "grad_norm": 10.703007698059082, "learning_rate": 9.81559312752413e-05, "loss": 1.0652, "step": 2098 }, { "epoch": 0.14221830747340605, "grad_norm": 10.004093170166016, "learning_rate": 9.815456225614348e-05, "loss": 1.0158, "step": 2099 }, { "epoch": 0.14228606274137814, "grad_norm": 8.900236129760742, "learning_rate": 9.815319323704566e-05, "loss": 0.7327, "step": 2100 }, { "epoch": 0.14235381800935024, "grad_norm": 13.102290153503418, "learning_rate": 9.815182421794784e-05, "loss": 1.2837, "step": 2101 }, { "epoch": 0.1424215732773223, "grad_norm": 9.32657527923584, "learning_rate": 9.815045519885004e-05, "loss": 1.0812, "step": 2102 }, { "epoch": 0.1424893285452944, "grad_norm": 11.42785930633545, "learning_rate": 9.814908617975222e-05, "loss": 1.4467, "step": 2103 }, { "epoch": 0.14255708381326648, "grad_norm": 9.878450393676758, "learning_rate": 9.81477171606544e-05, "loss": 1.0774, "step": 2104 }, { "epoch": 0.14262483908123857, "grad_norm": 9.329227447509766, "learning_rate": 9.814634814155658e-05, "loss": 1.2536, "step": 2105 }, { "epoch": 0.14269259434921064, "grad_norm": 9.071858406066895, "learning_rate": 9.814497912245876e-05, "loss": 1.148, "step": 2106 }, { "epoch": 0.14276034961718273, "grad_norm": 11.595810890197754, "learning_rate": 9.814361010336095e-05, "loss": 1.1992, "step": 2107 }, { "epoch": 0.14282810488515482, "grad_norm": 10.041107177734375, "learning_rate": 9.814224108426313e-05, "loss": 1.0447, "step": 2108 }, { "epoch": 0.1428958601531269, "grad_norm": 9.913261413574219, "learning_rate": 9.814087206516531e-05, "loss": 1.1629, "step": 2109 }, { "epoch": 0.14296361542109898, "grad_norm": 10.983177185058594, "learning_rate": 9.813950304606749e-05, "loss": 1.0288, "step": 2110 }, { "epoch": 0.14303137068907107, "grad_norm": 10.479610443115234, "learning_rate": 9.813813402696969e-05, "loss": 1.5308, "step": 2111 }, { "epoch": 0.14309912595704316, "grad_norm": 8.70734691619873, "learning_rate": 9.813676500787187e-05, "loss": 0.9987, "step": 2112 }, { "epoch": 0.14316688122501525, "grad_norm": 8.242984771728516, "learning_rate": 9.813539598877405e-05, "loss": 1.0051, "step": 2113 }, { "epoch": 0.14323463649298732, "grad_norm": 8.627467155456543, "learning_rate": 9.813402696967623e-05, "loss": 1.1134, "step": 2114 }, { "epoch": 0.1433023917609594, "grad_norm": 9.970986366271973, "learning_rate": 9.813265795057841e-05, "loss": 1.148, "step": 2115 }, { "epoch": 0.1433701470289315, "grad_norm": 8.719794273376465, "learning_rate": 9.81312889314806e-05, "loss": 1.1013, "step": 2116 }, { "epoch": 0.1434379022969036, "grad_norm": 9.455860137939453, "learning_rate": 9.812991991238278e-05, "loss": 1.2333, "step": 2117 }, { "epoch": 0.14350565756487568, "grad_norm": 8.851629257202148, "learning_rate": 9.812855089328496e-05, "loss": 1.0985, "step": 2118 }, { "epoch": 0.14357341283284775, "grad_norm": 11.843599319458008, "learning_rate": 9.812718187418714e-05, "loss": 1.2292, "step": 2119 }, { "epoch": 0.14364116810081984, "grad_norm": 9.550506591796875, "learning_rate": 9.812581285508934e-05, "loss": 0.9466, "step": 2120 }, { "epoch": 0.14370892336879193, "grad_norm": 9.234643936157227, "learning_rate": 9.812444383599152e-05, "loss": 1.1968, "step": 2121 }, { "epoch": 0.14377667863676402, "grad_norm": 9.365439414978027, "learning_rate": 9.81230748168937e-05, "loss": 1.1643, "step": 2122 }, { "epoch": 0.14384443390473609, "grad_norm": 9.476024627685547, "learning_rate": 9.812170579779588e-05, "loss": 1.0541, "step": 2123 }, { "epoch": 0.14391218917270818, "grad_norm": 9.53847885131836, "learning_rate": 9.812033677869806e-05, "loss": 1.0808, "step": 2124 }, { "epoch": 0.14397994444068027, "grad_norm": 8.918699264526367, "learning_rate": 9.811896775960025e-05, "loss": 1.1055, "step": 2125 }, { "epoch": 0.14404769970865236, "grad_norm": 12.126187324523926, "learning_rate": 9.811759874050243e-05, "loss": 1.6079, "step": 2126 }, { "epoch": 0.14411545497662442, "grad_norm": 10.922599792480469, "learning_rate": 9.811622972140461e-05, "loss": 1.2569, "step": 2127 }, { "epoch": 0.14418321024459652, "grad_norm": 8.582880020141602, "learning_rate": 9.811486070230679e-05, "loss": 0.9562, "step": 2128 }, { "epoch": 0.1442509655125686, "grad_norm": 8.926568031311035, "learning_rate": 9.811349168320899e-05, "loss": 1.0108, "step": 2129 }, { "epoch": 0.1443187207805407, "grad_norm": 13.906332015991211, "learning_rate": 9.811212266411117e-05, "loss": 1.1851, "step": 2130 }, { "epoch": 0.14438647604851276, "grad_norm": 10.374212265014648, "learning_rate": 9.811075364501335e-05, "loss": 1.2661, "step": 2131 }, { "epoch": 0.14445423131648485, "grad_norm": 10.580810546875, "learning_rate": 9.810938462591554e-05, "loss": 0.8967, "step": 2132 }, { "epoch": 0.14452198658445695, "grad_norm": 8.91865062713623, "learning_rate": 9.810801560681772e-05, "loss": 1.032, "step": 2133 }, { "epoch": 0.14458974185242904, "grad_norm": 8.16041374206543, "learning_rate": 9.81066465877199e-05, "loss": 0.8622, "step": 2134 }, { "epoch": 0.1446574971204011, "grad_norm": 8.575905799865723, "learning_rate": 9.81052775686221e-05, "loss": 0.9956, "step": 2135 }, { "epoch": 0.1447252523883732, "grad_norm": 9.620558738708496, "learning_rate": 9.810390854952428e-05, "loss": 1.1442, "step": 2136 }, { "epoch": 0.14479300765634529, "grad_norm": 10.390005111694336, "learning_rate": 9.810253953042646e-05, "loss": 1.0518, "step": 2137 }, { "epoch": 0.14486076292431738, "grad_norm": 10.714217185974121, "learning_rate": 9.810117051132864e-05, "loss": 1.3215, "step": 2138 }, { "epoch": 0.14492851819228944, "grad_norm": 9.390836715698242, "learning_rate": 9.809980149223083e-05, "loss": 1.0936, "step": 2139 }, { "epoch": 0.14499627346026153, "grad_norm": 10.848445892333984, "learning_rate": 9.809843247313301e-05, "loss": 1.1946, "step": 2140 }, { "epoch": 0.14506402872823362, "grad_norm": 11.041672706604004, "learning_rate": 9.809706345403519e-05, "loss": 1.0422, "step": 2141 }, { "epoch": 0.14513178399620572, "grad_norm": 10.1151762008667, "learning_rate": 9.809569443493737e-05, "loss": 1.2199, "step": 2142 }, { "epoch": 0.14519953926417778, "grad_norm": 8.686629295349121, "learning_rate": 9.809432541583957e-05, "loss": 0.9861, "step": 2143 }, { "epoch": 0.14526729453214987, "grad_norm": 10.579313278198242, "learning_rate": 9.809295639674175e-05, "loss": 0.9372, "step": 2144 }, { "epoch": 0.14533504980012196, "grad_norm": 8.916631698608398, "learning_rate": 9.809158737764393e-05, "loss": 0.8024, "step": 2145 }, { "epoch": 0.14540280506809405, "grad_norm": 9.29333209991455, "learning_rate": 9.80902183585461e-05, "loss": 1.0785, "step": 2146 }, { "epoch": 0.14547056033606612, "grad_norm": 10.584277153015137, "learning_rate": 9.808884933944829e-05, "loss": 0.9167, "step": 2147 }, { "epoch": 0.1455383156040382, "grad_norm": 10.68551254272461, "learning_rate": 9.808748032035048e-05, "loss": 1.0019, "step": 2148 }, { "epoch": 0.1456060708720103, "grad_norm": 9.057500839233398, "learning_rate": 9.808611130125266e-05, "loss": 1.194, "step": 2149 }, { "epoch": 0.1456738261399824, "grad_norm": 8.642207145690918, "learning_rate": 9.808474228215484e-05, "loss": 1.0849, "step": 2150 }, { "epoch": 0.14574158140795446, "grad_norm": 9.460419654846191, "learning_rate": 9.808337326305702e-05, "loss": 1.1747, "step": 2151 }, { "epoch": 0.14580933667592655, "grad_norm": 9.003097534179688, "learning_rate": 9.80820042439592e-05, "loss": 0.8967, "step": 2152 }, { "epoch": 0.14587709194389864, "grad_norm": 9.85009765625, "learning_rate": 9.80806352248614e-05, "loss": 1.1698, "step": 2153 }, { "epoch": 0.14594484721187073, "grad_norm": 10.233894348144531, "learning_rate": 9.807926620576358e-05, "loss": 1.0496, "step": 2154 }, { "epoch": 0.1460126024798428, "grad_norm": 9.660355567932129, "learning_rate": 9.807789718666576e-05, "loss": 0.9616, "step": 2155 }, { "epoch": 0.1460803577478149, "grad_norm": 9.46469497680664, "learning_rate": 9.807652816756794e-05, "loss": 1.0005, "step": 2156 }, { "epoch": 0.14614811301578698, "grad_norm": 8.004712104797363, "learning_rate": 9.807515914847013e-05, "loss": 0.8455, "step": 2157 }, { "epoch": 0.14621586828375907, "grad_norm": 10.039002418518066, "learning_rate": 9.807379012937231e-05, "loss": 1.097, "step": 2158 }, { "epoch": 0.14628362355173113, "grad_norm": 11.16292667388916, "learning_rate": 9.807242111027449e-05, "loss": 1.4332, "step": 2159 }, { "epoch": 0.14635137881970323, "grad_norm": 9.34833812713623, "learning_rate": 9.807105209117667e-05, "loss": 1.0438, "step": 2160 }, { "epoch": 0.14641913408767532, "grad_norm": 10.256768226623535, "learning_rate": 9.806968307207885e-05, "loss": 1.2923, "step": 2161 }, { "epoch": 0.1464868893556474, "grad_norm": 8.890941619873047, "learning_rate": 9.806831405298105e-05, "loss": 0.8419, "step": 2162 }, { "epoch": 0.14655464462361947, "grad_norm": 9.340752601623535, "learning_rate": 9.806694503388323e-05, "loss": 0.9504, "step": 2163 }, { "epoch": 0.14662239989159156, "grad_norm": 10.89192008972168, "learning_rate": 9.80655760147854e-05, "loss": 1.012, "step": 2164 }, { "epoch": 0.14669015515956366, "grad_norm": 8.626432418823242, "learning_rate": 9.806420699568759e-05, "loss": 0.8943, "step": 2165 }, { "epoch": 0.14675791042753575, "grad_norm": 9.465259552001953, "learning_rate": 9.806283797658978e-05, "loss": 1.1617, "step": 2166 }, { "epoch": 0.1468256656955078, "grad_norm": 11.695369720458984, "learning_rate": 9.806146895749196e-05, "loss": 1.2719, "step": 2167 }, { "epoch": 0.1468934209634799, "grad_norm": 9.416814804077148, "learning_rate": 9.806009993839414e-05, "loss": 1.3615, "step": 2168 }, { "epoch": 0.146961176231452, "grad_norm": 9.6004638671875, "learning_rate": 9.805873091929632e-05, "loss": 1.2571, "step": 2169 }, { "epoch": 0.1470289314994241, "grad_norm": 9.187546730041504, "learning_rate": 9.80573619001985e-05, "loss": 0.962, "step": 2170 }, { "epoch": 0.14709668676739618, "grad_norm": 10.691286087036133, "learning_rate": 9.80559928811007e-05, "loss": 1.0619, "step": 2171 }, { "epoch": 0.14716444203536824, "grad_norm": 9.17457103729248, "learning_rate": 9.805462386200288e-05, "loss": 1.1507, "step": 2172 }, { "epoch": 0.14723219730334033, "grad_norm": 10.739082336425781, "learning_rate": 9.805325484290506e-05, "loss": 1.2929, "step": 2173 }, { "epoch": 0.14729995257131243, "grad_norm": 9.091232299804688, "learning_rate": 9.805188582380724e-05, "loss": 0.9422, "step": 2174 }, { "epoch": 0.14736770783928452, "grad_norm": 8.231295585632324, "learning_rate": 9.805051680470943e-05, "loss": 0.9695, "step": 2175 }, { "epoch": 0.14743546310725658, "grad_norm": 9.622401237487793, "learning_rate": 9.804914778561161e-05, "loss": 0.982, "step": 2176 }, { "epoch": 0.14750321837522867, "grad_norm": 9.49541187286377, "learning_rate": 9.804777876651379e-05, "loss": 0.9005, "step": 2177 }, { "epoch": 0.14757097364320076, "grad_norm": 10.584654808044434, "learning_rate": 9.804640974741599e-05, "loss": 0.9427, "step": 2178 }, { "epoch": 0.14763872891117286, "grad_norm": 9.132317543029785, "learning_rate": 9.804504072831817e-05, "loss": 1.0192, "step": 2179 }, { "epoch": 0.14770648417914492, "grad_norm": 8.598082542419434, "learning_rate": 9.804367170922035e-05, "loss": 0.9842, "step": 2180 }, { "epoch": 0.147774239447117, "grad_norm": 8.941360473632812, "learning_rate": 9.804230269012254e-05, "loss": 0.9907, "step": 2181 }, { "epoch": 0.1478419947150891, "grad_norm": 8.119913101196289, "learning_rate": 9.804093367102472e-05, "loss": 1.1259, "step": 2182 }, { "epoch": 0.1479097499830612, "grad_norm": 9.505135536193848, "learning_rate": 9.80395646519269e-05, "loss": 1.0509, "step": 2183 }, { "epoch": 0.14797750525103326, "grad_norm": 8.420906066894531, "learning_rate": 9.803819563282908e-05, "loss": 1.1827, "step": 2184 }, { "epoch": 0.14804526051900535, "grad_norm": 9.860353469848633, "learning_rate": 9.803682661373128e-05, "loss": 1.0286, "step": 2185 }, { "epoch": 0.14811301578697744, "grad_norm": 8.259954452514648, "learning_rate": 9.803545759463346e-05, "loss": 1.0094, "step": 2186 }, { "epoch": 0.14818077105494953, "grad_norm": 10.46882438659668, "learning_rate": 9.803408857553564e-05, "loss": 1.1817, "step": 2187 }, { "epoch": 0.1482485263229216, "grad_norm": 9.315580368041992, "learning_rate": 9.803271955643782e-05, "loss": 1.2816, "step": 2188 }, { "epoch": 0.1483162815908937, "grad_norm": 10.408548355102539, "learning_rate": 9.803135053734001e-05, "loss": 1.0355, "step": 2189 }, { "epoch": 0.14838403685886578, "grad_norm": 9.682003021240234, "learning_rate": 9.802998151824219e-05, "loss": 1.0413, "step": 2190 }, { "epoch": 0.14845179212683787, "grad_norm": 10.041797637939453, "learning_rate": 9.802861249914437e-05, "loss": 0.8388, "step": 2191 }, { "epoch": 0.14851954739480994, "grad_norm": 8.367657661437988, "learning_rate": 9.802724348004655e-05, "loss": 1.0724, "step": 2192 }, { "epoch": 0.14858730266278203, "grad_norm": 9.9558744430542, "learning_rate": 9.802587446094873e-05, "loss": 1.3534, "step": 2193 }, { "epoch": 0.14865505793075412, "grad_norm": 9.244332313537598, "learning_rate": 9.802450544185093e-05, "loss": 1.366, "step": 2194 }, { "epoch": 0.1487228131987262, "grad_norm": 9.560718536376953, "learning_rate": 9.80231364227531e-05, "loss": 1.0556, "step": 2195 }, { "epoch": 0.14879056846669828, "grad_norm": 8.724915504455566, "learning_rate": 9.802176740365529e-05, "loss": 1.316, "step": 2196 }, { "epoch": 0.14885832373467037, "grad_norm": 9.468677520751953, "learning_rate": 9.802039838455747e-05, "loss": 1.1289, "step": 2197 }, { "epoch": 0.14892607900264246, "grad_norm": 8.56347942352295, "learning_rate": 9.801902936545966e-05, "loss": 1.0402, "step": 2198 }, { "epoch": 0.14899383427061455, "grad_norm": 9.328559875488281, "learning_rate": 9.801766034636184e-05, "loss": 1.186, "step": 2199 }, { "epoch": 0.14906158953858661, "grad_norm": 10.20579719543457, "learning_rate": 9.801629132726402e-05, "loss": 1.0896, "step": 2200 }, { "epoch": 0.1491293448065587, "grad_norm": 11.614208221435547, "learning_rate": 9.80149223081662e-05, "loss": 1.2742, "step": 2201 }, { "epoch": 0.1491971000745308, "grad_norm": 8.291358947753906, "learning_rate": 9.801355328906838e-05, "loss": 1.0299, "step": 2202 }, { "epoch": 0.1492648553425029, "grad_norm": 8.734344482421875, "learning_rate": 9.801218426997058e-05, "loss": 1.0922, "step": 2203 }, { "epoch": 0.14933261061047495, "grad_norm": 8.498616218566895, "learning_rate": 9.801081525087276e-05, "loss": 0.8436, "step": 2204 }, { "epoch": 0.14940036587844704, "grad_norm": 7.4501447677612305, "learning_rate": 9.800944623177494e-05, "loss": 0.8629, "step": 2205 }, { "epoch": 0.14946812114641914, "grad_norm": 11.652278900146484, "learning_rate": 9.800807721267712e-05, "loss": 1.1089, "step": 2206 }, { "epoch": 0.14953587641439123, "grad_norm": 11.043471336364746, "learning_rate": 9.80067081935793e-05, "loss": 1.2758, "step": 2207 }, { "epoch": 0.1496036316823633, "grad_norm": 9.376245498657227, "learning_rate": 9.800533917448149e-05, "loss": 1.0178, "step": 2208 }, { "epoch": 0.14967138695033538, "grad_norm": 8.678750038146973, "learning_rate": 9.800397015538367e-05, "loss": 1.0276, "step": 2209 }, { "epoch": 0.14973914221830747, "grad_norm": 10.233052253723145, "learning_rate": 9.800260113628585e-05, "loss": 1.0468, "step": 2210 }, { "epoch": 0.14980689748627957, "grad_norm": 10.711477279663086, "learning_rate": 9.800123211718803e-05, "loss": 1.3919, "step": 2211 }, { "epoch": 0.14987465275425163, "grad_norm": 10.130411148071289, "learning_rate": 9.799986309809023e-05, "loss": 1.1085, "step": 2212 }, { "epoch": 0.14994240802222372, "grad_norm": 9.64091682434082, "learning_rate": 9.79984940789924e-05, "loss": 1.0661, "step": 2213 }, { "epoch": 0.1500101632901958, "grad_norm": 9.80176830291748, "learning_rate": 9.799712505989459e-05, "loss": 1.3426, "step": 2214 }, { "epoch": 0.1500779185581679, "grad_norm": 9.34835147857666, "learning_rate": 9.799575604079677e-05, "loss": 1.0281, "step": 2215 }, { "epoch": 0.15014567382613997, "grad_norm": 8.638712882995605, "learning_rate": 9.799438702169895e-05, "loss": 0.9037, "step": 2216 }, { "epoch": 0.15021342909411206, "grad_norm": 10.499733924865723, "learning_rate": 9.799301800260114e-05, "loss": 1.1152, "step": 2217 }, { "epoch": 0.15028118436208415, "grad_norm": 9.093478202819824, "learning_rate": 9.799164898350332e-05, "loss": 1.2026, "step": 2218 }, { "epoch": 0.15034893963005624, "grad_norm": 13.181863784790039, "learning_rate": 9.79902799644055e-05, "loss": 1.3256, "step": 2219 }, { "epoch": 0.1504166948980283, "grad_norm": 10.221563339233398, "learning_rate": 9.798891094530768e-05, "loss": 1.1867, "step": 2220 }, { "epoch": 0.1504844501660004, "grad_norm": 9.512944221496582, "learning_rate": 9.798754192620988e-05, "loss": 1.1145, "step": 2221 }, { "epoch": 0.1505522054339725, "grad_norm": 8.991315841674805, "learning_rate": 9.798617290711206e-05, "loss": 1.0289, "step": 2222 }, { "epoch": 0.15061996070194458, "grad_norm": 9.003118515014648, "learning_rate": 9.798480388801424e-05, "loss": 0.9618, "step": 2223 }, { "epoch": 0.15068771596991667, "grad_norm": 11.337166786193848, "learning_rate": 9.798343486891643e-05, "loss": 1.3864, "step": 2224 }, { "epoch": 0.15075547123788874, "grad_norm": 9.193288803100586, "learning_rate": 9.798206584981861e-05, "loss": 1.1314, "step": 2225 }, { "epoch": 0.15082322650586083, "grad_norm": 10.137048721313477, "learning_rate": 9.798069683072079e-05, "loss": 1.3325, "step": 2226 }, { "epoch": 0.15089098177383292, "grad_norm": 8.248672485351562, "learning_rate": 9.797932781162299e-05, "loss": 1.0688, "step": 2227 }, { "epoch": 0.150958737041805, "grad_norm": 10.51007080078125, "learning_rate": 9.797795879252517e-05, "loss": 1.2191, "step": 2228 }, { "epoch": 0.15102649230977708, "grad_norm": 14.15853214263916, "learning_rate": 9.797658977342735e-05, "loss": 1.0961, "step": 2229 }, { "epoch": 0.15109424757774917, "grad_norm": 8.42485523223877, "learning_rate": 9.797522075432954e-05, "loss": 1.0314, "step": 2230 }, { "epoch": 0.15116200284572126, "grad_norm": 9.3310546875, "learning_rate": 9.797385173523172e-05, "loss": 1.2372, "step": 2231 }, { "epoch": 0.15122975811369335, "grad_norm": 9.323569297790527, "learning_rate": 9.79724827161339e-05, "loss": 1.1823, "step": 2232 }, { "epoch": 0.15129751338166542, "grad_norm": 9.719592094421387, "learning_rate": 9.797111369703608e-05, "loss": 1.0689, "step": 2233 }, { "epoch": 0.1513652686496375, "grad_norm": 9.37340259552002, "learning_rate": 9.796974467793826e-05, "loss": 1.0703, "step": 2234 }, { "epoch": 0.1514330239176096, "grad_norm": 9.803778648376465, "learning_rate": 9.796837565884045e-05, "loss": 1.0422, "step": 2235 }, { "epoch": 0.1515007791855817, "grad_norm": 8.317301750183105, "learning_rate": 9.796700663974264e-05, "loss": 0.9829, "step": 2236 }, { "epoch": 0.15156853445355375, "grad_norm": 8.473258972167969, "learning_rate": 9.796563762064482e-05, "loss": 1.064, "step": 2237 }, { "epoch": 0.15163628972152585, "grad_norm": 8.495006561279297, "learning_rate": 9.7964268601547e-05, "loss": 0.8646, "step": 2238 }, { "epoch": 0.15170404498949794, "grad_norm": 9.140898704528809, "learning_rate": 9.796289958244918e-05, "loss": 1.068, "step": 2239 }, { "epoch": 0.15177180025747003, "grad_norm": 10.264453887939453, "learning_rate": 9.796153056335137e-05, "loss": 0.9681, "step": 2240 }, { "epoch": 0.1518395555254421, "grad_norm": 10.407776832580566, "learning_rate": 9.796016154425355e-05, "loss": 1.2538, "step": 2241 }, { "epoch": 0.15190731079341419, "grad_norm": 10.143677711486816, "learning_rate": 9.795879252515573e-05, "loss": 1.138, "step": 2242 }, { "epoch": 0.15197506606138628, "grad_norm": 12.528799057006836, "learning_rate": 9.795742350605791e-05, "loss": 1.2654, "step": 2243 }, { "epoch": 0.15204282132935837, "grad_norm": 10.635498046875, "learning_rate": 9.79560544869601e-05, "loss": 1.2116, "step": 2244 }, { "epoch": 0.15211057659733043, "grad_norm": 10.71164608001709, "learning_rate": 9.795468546786229e-05, "loss": 1.0262, "step": 2245 }, { "epoch": 0.15217833186530252, "grad_norm": 9.121109962463379, "learning_rate": 9.795331644876447e-05, "loss": 1.1657, "step": 2246 }, { "epoch": 0.15224608713327462, "grad_norm": 8.174636840820312, "learning_rate": 9.795194742966665e-05, "loss": 1.0027, "step": 2247 }, { "epoch": 0.1523138424012467, "grad_norm": 8.763494491577148, "learning_rate": 9.795057841056883e-05, "loss": 1.1103, "step": 2248 }, { "epoch": 0.15238159766921877, "grad_norm": 8.028278350830078, "learning_rate": 9.794920939147102e-05, "loss": 1.0205, "step": 2249 }, { "epoch": 0.15244935293719086, "grad_norm": 8.96112060546875, "learning_rate": 9.79478403723732e-05, "loss": 1.2949, "step": 2250 }, { "epoch": 0.15251710820516295, "grad_norm": 8.67423152923584, "learning_rate": 9.794647135327538e-05, "loss": 1.0602, "step": 2251 }, { "epoch": 0.15258486347313505, "grad_norm": 9.45671272277832, "learning_rate": 9.794510233417756e-05, "loss": 1.0362, "step": 2252 }, { "epoch": 0.1526526187411071, "grad_norm": 10.24669075012207, "learning_rate": 9.794373331507976e-05, "loss": 0.9918, "step": 2253 }, { "epoch": 0.1527203740090792, "grad_norm": 9.014079093933105, "learning_rate": 9.794236429598194e-05, "loss": 1.1636, "step": 2254 }, { "epoch": 0.1527881292770513, "grad_norm": 6.800943851470947, "learning_rate": 9.794099527688412e-05, "loss": 0.9089, "step": 2255 }, { "epoch": 0.15285588454502338, "grad_norm": 9.90794849395752, "learning_rate": 9.79396262577863e-05, "loss": 0.7669, "step": 2256 }, { "epoch": 0.15292363981299545, "grad_norm": 9.870927810668945, "learning_rate": 9.793825723868848e-05, "loss": 1.1243, "step": 2257 }, { "epoch": 0.15299139508096754, "grad_norm": 9.707404136657715, "learning_rate": 9.793688821959067e-05, "loss": 1.1558, "step": 2258 }, { "epoch": 0.15305915034893963, "grad_norm": 8.362896919250488, "learning_rate": 9.793551920049285e-05, "loss": 0.8601, "step": 2259 }, { "epoch": 0.15312690561691172, "grad_norm": 9.536920547485352, "learning_rate": 9.793415018139503e-05, "loss": 1.2741, "step": 2260 }, { "epoch": 0.1531946608848838, "grad_norm": 11.108535766601562, "learning_rate": 9.793278116229721e-05, "loss": 1.3118, "step": 2261 }, { "epoch": 0.15326241615285588, "grad_norm": 7.281479358673096, "learning_rate": 9.793141214319939e-05, "loss": 0.9165, "step": 2262 }, { "epoch": 0.15333017142082797, "grad_norm": 9.166728973388672, "learning_rate": 9.793004312410159e-05, "loss": 1.0845, "step": 2263 }, { "epoch": 0.15339792668880006, "grad_norm": 11.539854049682617, "learning_rate": 9.792867410500377e-05, "loss": 1.1129, "step": 2264 }, { "epoch": 0.15346568195677213, "grad_norm": 8.588869094848633, "learning_rate": 9.792730508590595e-05, "loss": 0.9698, "step": 2265 }, { "epoch": 0.15353343722474422, "grad_norm": 8.270078659057617, "learning_rate": 9.792593606680813e-05, "loss": 1.2306, "step": 2266 }, { "epoch": 0.1536011924927163, "grad_norm": 7.908688545227051, "learning_rate": 9.792456704771032e-05, "loss": 1.283, "step": 2267 }, { "epoch": 0.1536689477606884, "grad_norm": 10.376410484313965, "learning_rate": 9.79231980286125e-05, "loss": 1.0246, "step": 2268 }, { "epoch": 0.15373670302866047, "grad_norm": 9.517715454101562, "learning_rate": 9.792182900951468e-05, "loss": 0.9975, "step": 2269 }, { "epoch": 0.15380445829663256, "grad_norm": 8.789438247680664, "learning_rate": 9.792045999041688e-05, "loss": 1.0886, "step": 2270 }, { "epoch": 0.15387221356460465, "grad_norm": 9.649114608764648, "learning_rate": 9.791909097131906e-05, "loss": 1.1497, "step": 2271 }, { "epoch": 0.15393996883257674, "grad_norm": 8.533876419067383, "learning_rate": 9.791772195222124e-05, "loss": 0.8701, "step": 2272 }, { "epoch": 0.1540077241005488, "grad_norm": 10.64561653137207, "learning_rate": 9.791635293312343e-05, "loss": 1.2755, "step": 2273 }, { "epoch": 0.1540754793685209, "grad_norm": 9.510658264160156, "learning_rate": 9.791498391402561e-05, "loss": 0.8772, "step": 2274 }, { "epoch": 0.154143234636493, "grad_norm": 9.849981307983398, "learning_rate": 9.791361489492779e-05, "loss": 1.1689, "step": 2275 }, { "epoch": 0.15421098990446508, "grad_norm": 8.152152061462402, "learning_rate": 9.791224587582998e-05, "loss": 0.8148, "step": 2276 }, { "epoch": 0.15427874517243717, "grad_norm": 8.653456687927246, "learning_rate": 9.791087685673216e-05, "loss": 1.0061, "step": 2277 }, { "epoch": 0.15434650044040923, "grad_norm": 6.928426742553711, "learning_rate": 9.790950783763435e-05, "loss": 0.9192, "step": 2278 }, { "epoch": 0.15441425570838133, "grad_norm": 9.606708526611328, "learning_rate": 9.790813881853653e-05, "loss": 0.9956, "step": 2279 }, { "epoch": 0.15448201097635342, "grad_norm": 9.42773151397705, "learning_rate": 9.79067697994387e-05, "loss": 0.9302, "step": 2280 }, { "epoch": 0.1545497662443255, "grad_norm": 8.248319625854492, "learning_rate": 9.79054007803409e-05, "loss": 1.2318, "step": 2281 }, { "epoch": 0.15461752151229757, "grad_norm": 9.706393241882324, "learning_rate": 9.790403176124308e-05, "loss": 1.1489, "step": 2282 }, { "epoch": 0.15468527678026966, "grad_norm": 8.88716983795166, "learning_rate": 9.790266274214526e-05, "loss": 0.8686, "step": 2283 }, { "epoch": 0.15475303204824176, "grad_norm": 8.596991539001465, "learning_rate": 9.790129372304744e-05, "loss": 1.0623, "step": 2284 }, { "epoch": 0.15482078731621385, "grad_norm": 9.262425422668457, "learning_rate": 9.789992470394962e-05, "loss": 1.2067, "step": 2285 }, { "epoch": 0.1548885425841859, "grad_norm": 7.772172927856445, "learning_rate": 9.789855568485181e-05, "loss": 0.9976, "step": 2286 }, { "epoch": 0.154956297852158, "grad_norm": 9.320001602172852, "learning_rate": 9.7897186665754e-05, "loss": 1.1377, "step": 2287 }, { "epoch": 0.1550240531201301, "grad_norm": 11.02434253692627, "learning_rate": 9.789581764665618e-05, "loss": 1.1365, "step": 2288 }, { "epoch": 0.1550918083881022, "grad_norm": 9.90654182434082, "learning_rate": 9.789444862755836e-05, "loss": 1.208, "step": 2289 }, { "epoch": 0.15515956365607425, "grad_norm": 9.591778755187988, "learning_rate": 9.789307960846055e-05, "loss": 1.2426, "step": 2290 }, { "epoch": 0.15522731892404634, "grad_norm": 9.221457481384277, "learning_rate": 9.789171058936273e-05, "loss": 1.1449, "step": 2291 }, { "epoch": 0.15529507419201843, "grad_norm": 7.294323444366455, "learning_rate": 9.789034157026491e-05, "loss": 0.8846, "step": 2292 }, { "epoch": 0.15536282945999053, "grad_norm": 10.6463041305542, "learning_rate": 9.788897255116709e-05, "loss": 1.1397, "step": 2293 }, { "epoch": 0.1554305847279626, "grad_norm": 9.412870407104492, "learning_rate": 9.788760353206927e-05, "loss": 1.1857, "step": 2294 }, { "epoch": 0.15549833999593468, "grad_norm": 9.163009643554688, "learning_rate": 9.788623451297147e-05, "loss": 0.8907, "step": 2295 }, { "epoch": 0.15556609526390677, "grad_norm": 8.157798767089844, "learning_rate": 9.788486549387365e-05, "loss": 0.893, "step": 2296 }, { "epoch": 0.15563385053187886, "grad_norm": 9.155460357666016, "learning_rate": 9.788349647477583e-05, "loss": 1.0361, "step": 2297 }, { "epoch": 0.15570160579985093, "grad_norm": 10.280989646911621, "learning_rate": 9.7882127455678e-05, "loss": 1.2953, "step": 2298 }, { "epoch": 0.15576936106782302, "grad_norm": 9.654706954956055, "learning_rate": 9.78807584365802e-05, "loss": 0.9012, "step": 2299 }, { "epoch": 0.1558371163357951, "grad_norm": 12.425939559936523, "learning_rate": 9.787938941748238e-05, "loss": 1.4103, "step": 2300 }, { "epoch": 0.1559048716037672, "grad_norm": 9.888884544372559, "learning_rate": 9.787802039838456e-05, "loss": 1.1599, "step": 2301 }, { "epoch": 0.15597262687173927, "grad_norm": 10.30229377746582, "learning_rate": 9.787665137928674e-05, "loss": 1.0505, "step": 2302 }, { "epoch": 0.15604038213971136, "grad_norm": 11.208477973937988, "learning_rate": 9.787528236018892e-05, "loss": 1.009, "step": 2303 }, { "epoch": 0.15610813740768345, "grad_norm": 9.264431953430176, "learning_rate": 9.787391334109112e-05, "loss": 1.1453, "step": 2304 }, { "epoch": 0.15617589267565554, "grad_norm": 9.0980224609375, "learning_rate": 9.78725443219933e-05, "loss": 1.0824, "step": 2305 }, { "epoch": 0.1562436479436276, "grad_norm": 8.346585273742676, "learning_rate": 9.787117530289548e-05, "loss": 1.0632, "step": 2306 }, { "epoch": 0.1563114032115997, "grad_norm": 10.607507705688477, "learning_rate": 9.786980628379766e-05, "loss": 1.0102, "step": 2307 }, { "epoch": 0.1563791584795718, "grad_norm": 8.189208984375, "learning_rate": 9.786843726469985e-05, "loss": 1.0872, "step": 2308 }, { "epoch": 0.15644691374754388, "grad_norm": 9.84202766418457, "learning_rate": 9.786706824560203e-05, "loss": 0.9871, "step": 2309 }, { "epoch": 0.15651466901551594, "grad_norm": 11.937589645385742, "learning_rate": 9.786569922650421e-05, "loss": 1.2255, "step": 2310 }, { "epoch": 0.15658242428348804, "grad_norm": 9.855016708374023, "learning_rate": 9.786433020740639e-05, "loss": 1.0097, "step": 2311 }, { "epoch": 0.15665017955146013, "grad_norm": 8.661060333251953, "learning_rate": 9.786296118830857e-05, "loss": 0.8732, "step": 2312 }, { "epoch": 0.15671793481943222, "grad_norm": 10.57170295715332, "learning_rate": 9.786159216921077e-05, "loss": 0.862, "step": 2313 }, { "epoch": 0.15678569008740428, "grad_norm": 7.759045600891113, "learning_rate": 9.786022315011295e-05, "loss": 0.77, "step": 2314 }, { "epoch": 0.15685344535537638, "grad_norm": 10.758045196533203, "learning_rate": 9.785885413101513e-05, "loss": 0.8919, "step": 2315 }, { "epoch": 0.15692120062334847, "grad_norm": 8.521660804748535, "learning_rate": 9.78574851119173e-05, "loss": 1.131, "step": 2316 }, { "epoch": 0.15698895589132056, "grad_norm": 8.72917652130127, "learning_rate": 9.78561160928195e-05, "loss": 0.8359, "step": 2317 }, { "epoch": 0.15705671115929262, "grad_norm": 11.679365158081055, "learning_rate": 9.785474707372168e-05, "loss": 1.0078, "step": 2318 }, { "epoch": 0.15712446642726471, "grad_norm": 11.50632381439209, "learning_rate": 9.785337805462386e-05, "loss": 1.208, "step": 2319 }, { "epoch": 0.1571922216952368, "grad_norm": 9.6107759475708, "learning_rate": 9.785200903552605e-05, "loss": 1.0967, "step": 2320 }, { "epoch": 0.1572599769632089, "grad_norm": 8.629117012023926, "learning_rate": 9.785064001642824e-05, "loss": 1.0594, "step": 2321 }, { "epoch": 0.15732773223118096, "grad_norm": 11.136920928955078, "learning_rate": 9.784927099733042e-05, "loss": 1.2874, "step": 2322 }, { "epoch": 0.15739548749915305, "grad_norm": 11.097023963928223, "learning_rate": 9.784790197823261e-05, "loss": 1.1598, "step": 2323 }, { "epoch": 0.15746324276712514, "grad_norm": 11.117433547973633, "learning_rate": 9.784653295913479e-05, "loss": 1.0601, "step": 2324 }, { "epoch": 0.15753099803509724, "grad_norm": 10.152684211730957, "learning_rate": 9.784516394003697e-05, "loss": 1.2763, "step": 2325 }, { "epoch": 0.1575987533030693, "grad_norm": 8.37531852722168, "learning_rate": 9.784379492093915e-05, "loss": 1.1055, "step": 2326 }, { "epoch": 0.1576665085710414, "grad_norm": 10.463939666748047, "learning_rate": 9.784242590184134e-05, "loss": 1.3088, "step": 2327 }, { "epoch": 0.15773426383901348, "grad_norm": 8.74315357208252, "learning_rate": 9.784105688274352e-05, "loss": 1.1165, "step": 2328 }, { "epoch": 0.15780201910698557, "grad_norm": 8.691280364990234, "learning_rate": 9.78396878636457e-05, "loss": 1.1019, "step": 2329 }, { "epoch": 0.15786977437495767, "grad_norm": 10.424938201904297, "learning_rate": 9.783831884454789e-05, "loss": 1.1957, "step": 2330 }, { "epoch": 0.15793752964292973, "grad_norm": 6.867722034454346, "learning_rate": 9.783694982545008e-05, "loss": 1.0476, "step": 2331 }, { "epoch": 0.15800528491090182, "grad_norm": 9.434804916381836, "learning_rate": 9.783558080635226e-05, "loss": 1.0247, "step": 2332 }, { "epoch": 0.1580730401788739, "grad_norm": 7.771170616149902, "learning_rate": 9.783421178725444e-05, "loss": 1.2582, "step": 2333 }, { "epoch": 0.158140795446846, "grad_norm": 8.366608619689941, "learning_rate": 9.783284276815662e-05, "loss": 1.1078, "step": 2334 }, { "epoch": 0.15820855071481807, "grad_norm": 8.85851764678955, "learning_rate": 9.78314737490588e-05, "loss": 1.2405, "step": 2335 }, { "epoch": 0.15827630598279016, "grad_norm": 9.134325981140137, "learning_rate": 9.7830104729961e-05, "loss": 1.2277, "step": 2336 }, { "epoch": 0.15834406125076225, "grad_norm": 9.150130271911621, "learning_rate": 9.782873571086317e-05, "loss": 1.1355, "step": 2337 }, { "epoch": 0.15841181651873434, "grad_norm": 8.687469482421875, "learning_rate": 9.782736669176536e-05, "loss": 1.0454, "step": 2338 }, { "epoch": 0.1584795717867064, "grad_norm": 10.071285247802734, "learning_rate": 9.782599767266754e-05, "loss": 1.0041, "step": 2339 }, { "epoch": 0.1585473270546785, "grad_norm": 8.373369216918945, "learning_rate": 9.782462865356972e-05, "loss": 0.8378, "step": 2340 }, { "epoch": 0.1586150823226506, "grad_norm": 9.427014350891113, "learning_rate": 9.782325963447191e-05, "loss": 1.1413, "step": 2341 }, { "epoch": 0.15868283759062268, "grad_norm": 8.38814640045166, "learning_rate": 9.782189061537409e-05, "loss": 1.0696, "step": 2342 }, { "epoch": 0.15875059285859475, "grad_norm": 8.518644332885742, "learning_rate": 9.782052159627627e-05, "loss": 0.9814, "step": 2343 }, { "epoch": 0.15881834812656684, "grad_norm": 9.926252365112305, "learning_rate": 9.781915257717845e-05, "loss": 1.1971, "step": 2344 }, { "epoch": 0.15888610339453893, "grad_norm": 7.946019649505615, "learning_rate": 9.781778355808064e-05, "loss": 1.1902, "step": 2345 }, { "epoch": 0.15895385866251102, "grad_norm": 8.686945915222168, "learning_rate": 9.781641453898283e-05, "loss": 0.9939, "step": 2346 }, { "epoch": 0.15902161393048309, "grad_norm": 8.225680351257324, "learning_rate": 9.7815045519885e-05, "loss": 1.3665, "step": 2347 }, { "epoch": 0.15908936919845518, "grad_norm": 10.381987571716309, "learning_rate": 9.781367650078719e-05, "loss": 1.09, "step": 2348 }, { "epoch": 0.15915712446642727, "grad_norm": 8.57552719116211, "learning_rate": 9.781230748168937e-05, "loss": 0.8261, "step": 2349 }, { "epoch": 0.15922487973439936, "grad_norm": 9.105220794677734, "learning_rate": 9.781093846259156e-05, "loss": 1.102, "step": 2350 }, { "epoch": 0.15929263500237142, "grad_norm": 10.08092212677002, "learning_rate": 9.780956944349374e-05, "loss": 0.943, "step": 2351 }, { "epoch": 0.15936039027034352, "grad_norm": 10.259852409362793, "learning_rate": 9.780820042439592e-05, "loss": 0.8822, "step": 2352 }, { "epoch": 0.1594281455383156, "grad_norm": 8.31139087677002, "learning_rate": 9.78068314052981e-05, "loss": 1.2167, "step": 2353 }, { "epoch": 0.1594959008062877, "grad_norm": 7.529703617095947, "learning_rate": 9.78054623862003e-05, "loss": 0.8913, "step": 2354 }, { "epoch": 0.15956365607425976, "grad_norm": 8.792675971984863, "learning_rate": 9.780409336710248e-05, "loss": 0.966, "step": 2355 }, { "epoch": 0.15963141134223185, "grad_norm": 9.329866409301758, "learning_rate": 9.780272434800466e-05, "loss": 1.1659, "step": 2356 }, { "epoch": 0.15969916661020395, "grad_norm": 12.14089298248291, "learning_rate": 9.780135532890684e-05, "loss": 1.2019, "step": 2357 }, { "epoch": 0.15976692187817604, "grad_norm": 9.12912654876709, "learning_rate": 9.779998630980902e-05, "loss": 1.1615, "step": 2358 }, { "epoch": 0.1598346771461481, "grad_norm": 9.554464340209961, "learning_rate": 9.779861729071121e-05, "loss": 1.1695, "step": 2359 }, { "epoch": 0.1599024324141202, "grad_norm": 9.317673683166504, "learning_rate": 9.779724827161339e-05, "loss": 0.9606, "step": 2360 }, { "epoch": 0.15997018768209229, "grad_norm": 8.80395793914795, "learning_rate": 9.779587925251557e-05, "loss": 0.9693, "step": 2361 }, { "epoch": 0.16003794295006438, "grad_norm": 11.990642547607422, "learning_rate": 9.779451023341775e-05, "loss": 1.2901, "step": 2362 }, { "epoch": 0.16010569821803644, "grad_norm": 8.188547134399414, "learning_rate": 9.779314121431995e-05, "loss": 0.979, "step": 2363 }, { "epoch": 0.16017345348600853, "grad_norm": 8.316620826721191, "learning_rate": 9.779177219522213e-05, "loss": 0.8601, "step": 2364 }, { "epoch": 0.16024120875398062, "grad_norm": 7.58405876159668, "learning_rate": 9.77904031761243e-05, "loss": 1.1812, "step": 2365 }, { "epoch": 0.16030896402195272, "grad_norm": 7.725598335266113, "learning_rate": 9.77890341570265e-05, "loss": 0.9335, "step": 2366 }, { "epoch": 0.16037671928992478, "grad_norm": 8.6231107711792, "learning_rate": 9.778766513792868e-05, "loss": 1.0162, "step": 2367 }, { "epoch": 0.16044447455789687, "grad_norm": 9.762526512145996, "learning_rate": 9.778629611883086e-05, "loss": 1.3186, "step": 2368 }, { "epoch": 0.16051222982586896, "grad_norm": 11.384220123291016, "learning_rate": 9.778492709973305e-05, "loss": 1.2397, "step": 2369 }, { "epoch": 0.16057998509384105, "grad_norm": 8.841899871826172, "learning_rate": 9.778355808063523e-05, "loss": 0.8708, "step": 2370 }, { "epoch": 0.16064774036181312, "grad_norm": 7.778527736663818, "learning_rate": 9.778218906153741e-05, "loss": 0.9968, "step": 2371 }, { "epoch": 0.1607154956297852, "grad_norm": 8.559181213378906, "learning_rate": 9.77808200424396e-05, "loss": 0.9759, "step": 2372 }, { "epoch": 0.1607832508977573, "grad_norm": 10.273273468017578, "learning_rate": 9.777945102334179e-05, "loss": 0.9485, "step": 2373 }, { "epoch": 0.1608510061657294, "grad_norm": 7.946044445037842, "learning_rate": 9.777808200424397e-05, "loss": 1.105, "step": 2374 }, { "epoch": 0.16091876143370146, "grad_norm": 9.917662620544434, "learning_rate": 9.777671298514615e-05, "loss": 1.0272, "step": 2375 }, { "epoch": 0.16098651670167355, "grad_norm": 10.438239097595215, "learning_rate": 9.777534396604833e-05, "loss": 1.0197, "step": 2376 }, { "epoch": 0.16105427196964564, "grad_norm": 8.799901962280273, "learning_rate": 9.777397494695052e-05, "loss": 1.1401, "step": 2377 }, { "epoch": 0.16112202723761773, "grad_norm": 8.569243431091309, "learning_rate": 9.77726059278527e-05, "loss": 1.2019, "step": 2378 }, { "epoch": 0.1611897825055898, "grad_norm": 10.793002128601074, "learning_rate": 9.777123690875488e-05, "loss": 1.0932, "step": 2379 }, { "epoch": 0.1612575377735619, "grad_norm": 7.825850963592529, "learning_rate": 9.776986788965707e-05, "loss": 1.2226, "step": 2380 }, { "epoch": 0.16132529304153398, "grad_norm": 8.780813217163086, "learning_rate": 9.776849887055925e-05, "loss": 0.7939, "step": 2381 }, { "epoch": 0.16139304830950607, "grad_norm": 8.927638053894043, "learning_rate": 9.776712985146144e-05, "loss": 1.0551, "step": 2382 }, { "epoch": 0.16146080357747816, "grad_norm": 9.08043384552002, "learning_rate": 9.776576083236362e-05, "loss": 1.0037, "step": 2383 }, { "epoch": 0.16152855884545023, "grad_norm": 9.362268447875977, "learning_rate": 9.77643918132658e-05, "loss": 1.332, "step": 2384 }, { "epoch": 0.16159631411342232, "grad_norm": 10.533197402954102, "learning_rate": 9.776302279416798e-05, "loss": 1.0966, "step": 2385 }, { "epoch": 0.1616640693813944, "grad_norm": 9.579266548156738, "learning_rate": 9.776165377507017e-05, "loss": 0.9325, "step": 2386 }, { "epoch": 0.1617318246493665, "grad_norm": 10.071297645568848, "learning_rate": 9.776028475597235e-05, "loss": 0.9496, "step": 2387 }, { "epoch": 0.16179957991733857, "grad_norm": 9.243900299072266, "learning_rate": 9.775891573687453e-05, "loss": 1.13, "step": 2388 }, { "epoch": 0.16186733518531066, "grad_norm": 9.327018737792969, "learning_rate": 9.775754671777672e-05, "loss": 1.2587, "step": 2389 }, { "epoch": 0.16193509045328275, "grad_norm": 7.614950180053711, "learning_rate": 9.77561776986789e-05, "loss": 1.0848, "step": 2390 }, { "epoch": 0.16200284572125484, "grad_norm": 9.86501407623291, "learning_rate": 9.775480867958109e-05, "loss": 1.2504, "step": 2391 }, { "epoch": 0.1620706009892269, "grad_norm": 9.08303451538086, "learning_rate": 9.775343966048327e-05, "loss": 1.4359, "step": 2392 }, { "epoch": 0.162138356257199, "grad_norm": 8.417489051818848, "learning_rate": 9.775207064138545e-05, "loss": 1.244, "step": 2393 }, { "epoch": 0.1622061115251711, "grad_norm": 8.35366439819336, "learning_rate": 9.775070162228763e-05, "loss": 1.0706, "step": 2394 }, { "epoch": 0.16227386679314318, "grad_norm": 9.732915878295898, "learning_rate": 9.774933260318981e-05, "loss": 1.4237, "step": 2395 }, { "epoch": 0.16234162206111524, "grad_norm": 9.131365776062012, "learning_rate": 9.7747963584092e-05, "loss": 0.9207, "step": 2396 }, { "epoch": 0.16240937732908733, "grad_norm": 8.93538761138916, "learning_rate": 9.774659456499419e-05, "loss": 1.0054, "step": 2397 }, { "epoch": 0.16247713259705943, "grad_norm": 8.939055442810059, "learning_rate": 9.774522554589637e-05, "loss": 0.9782, "step": 2398 }, { "epoch": 0.16254488786503152, "grad_norm": 9.251758575439453, "learning_rate": 9.774385652679855e-05, "loss": 1.0968, "step": 2399 }, { "epoch": 0.16261264313300358, "grad_norm": 9.240782737731934, "learning_rate": 9.774248750770074e-05, "loss": 0.7982, "step": 2400 }, { "epoch": 0.16268039840097567, "grad_norm": 8.697726249694824, "learning_rate": 9.774111848860292e-05, "loss": 1.0545, "step": 2401 }, { "epoch": 0.16274815366894776, "grad_norm": 7.9362053871154785, "learning_rate": 9.77397494695051e-05, "loss": 0.8364, "step": 2402 }, { "epoch": 0.16281590893691986, "grad_norm": 11.944025993347168, "learning_rate": 9.773838045040728e-05, "loss": 1.2313, "step": 2403 }, { "epoch": 0.16288366420489192, "grad_norm": 9.496225357055664, "learning_rate": 9.773701143130946e-05, "loss": 1.1363, "step": 2404 }, { "epoch": 0.162951419472864, "grad_norm": 12.96069622039795, "learning_rate": 9.773564241221165e-05, "loss": 1.1205, "step": 2405 }, { "epoch": 0.1630191747408361, "grad_norm": 8.915671348571777, "learning_rate": 9.773427339311384e-05, "loss": 1.1149, "step": 2406 }, { "epoch": 0.1630869300088082, "grad_norm": 10.23763370513916, "learning_rate": 9.773290437401602e-05, "loss": 1.1867, "step": 2407 }, { "epoch": 0.16315468527678026, "grad_norm": 9.117730140686035, "learning_rate": 9.77315353549182e-05, "loss": 1.1792, "step": 2408 }, { "epoch": 0.16322244054475235, "grad_norm": 9.380385398864746, "learning_rate": 9.773016633582039e-05, "loss": 1.1621, "step": 2409 }, { "epoch": 0.16329019581272444, "grad_norm": 8.56508731842041, "learning_rate": 9.772879731672257e-05, "loss": 0.8372, "step": 2410 }, { "epoch": 0.16335795108069653, "grad_norm": 11.707832336425781, "learning_rate": 9.772742829762475e-05, "loss": 1.3191, "step": 2411 }, { "epoch": 0.1634257063486686, "grad_norm": 7.720577716827393, "learning_rate": 9.772605927852694e-05, "loss": 0.9617, "step": 2412 }, { "epoch": 0.1634934616166407, "grad_norm": 8.586542129516602, "learning_rate": 9.772469025942912e-05, "loss": 1.0978, "step": 2413 }, { "epoch": 0.16356121688461278, "grad_norm": 9.021394729614258, "learning_rate": 9.77233212403313e-05, "loss": 0.9358, "step": 2414 }, { "epoch": 0.16362897215258487, "grad_norm": 9.078686714172363, "learning_rate": 9.77219522212335e-05, "loss": 1.3338, "step": 2415 }, { "epoch": 0.16369672742055694, "grad_norm": 9.810312271118164, "learning_rate": 9.772058320213568e-05, "loss": 1.2472, "step": 2416 }, { "epoch": 0.16376448268852903, "grad_norm": 8.933609962463379, "learning_rate": 9.771921418303786e-05, "loss": 0.9115, "step": 2417 }, { "epoch": 0.16383223795650112, "grad_norm": 7.044286251068115, "learning_rate": 9.771784516394005e-05, "loss": 0.7983, "step": 2418 }, { "epoch": 0.1638999932244732, "grad_norm": 11.711495399475098, "learning_rate": 9.771647614484223e-05, "loss": 1.096, "step": 2419 }, { "epoch": 0.16396774849244528, "grad_norm": 9.31049633026123, "learning_rate": 9.771510712574441e-05, "loss": 1.2711, "step": 2420 }, { "epoch": 0.16403550376041737, "grad_norm": 8.10503101348877, "learning_rate": 9.77137381066466e-05, "loss": 0.9095, "step": 2421 }, { "epoch": 0.16410325902838946, "grad_norm": 7.915055751800537, "learning_rate": 9.771236908754877e-05, "loss": 1.0161, "step": 2422 }, { "epoch": 0.16417101429636155, "grad_norm": 8.185515403747559, "learning_rate": 9.771100006845097e-05, "loss": 1.1109, "step": 2423 }, { "epoch": 0.16423876956433361, "grad_norm": 9.960200309753418, "learning_rate": 9.770963104935315e-05, "loss": 0.9757, "step": 2424 }, { "epoch": 0.1643065248323057, "grad_norm": 9.646814346313477, "learning_rate": 9.770826203025533e-05, "loss": 1.0835, "step": 2425 }, { "epoch": 0.1643742801002778, "grad_norm": 9.701393127441406, "learning_rate": 9.770689301115751e-05, "loss": 1.0717, "step": 2426 }, { "epoch": 0.1644420353682499, "grad_norm": 7.887824058532715, "learning_rate": 9.770552399205969e-05, "loss": 1.0937, "step": 2427 }, { "epoch": 0.16450979063622195, "grad_norm": 10.94339370727539, "learning_rate": 9.770415497296188e-05, "loss": 1.2766, "step": 2428 }, { "epoch": 0.16457754590419404, "grad_norm": 10.051490783691406, "learning_rate": 9.770278595386406e-05, "loss": 1.248, "step": 2429 }, { "epoch": 0.16464530117216614, "grad_norm": 8.380006790161133, "learning_rate": 9.770141693476624e-05, "loss": 1.1657, "step": 2430 }, { "epoch": 0.16471305644013823, "grad_norm": 8.077753067016602, "learning_rate": 9.770004791566843e-05, "loss": 0.9511, "step": 2431 }, { "epoch": 0.1647808117081103, "grad_norm": 8.744999885559082, "learning_rate": 9.769867889657062e-05, "loss": 1.0736, "step": 2432 }, { "epoch": 0.16484856697608238, "grad_norm": 8.203909873962402, "learning_rate": 9.76973098774728e-05, "loss": 1.208, "step": 2433 }, { "epoch": 0.16491632224405448, "grad_norm": 9.462398529052734, "learning_rate": 9.769594085837498e-05, "loss": 1.0011, "step": 2434 }, { "epoch": 0.16498407751202657, "grad_norm": 11.190359115600586, "learning_rate": 9.769457183927716e-05, "loss": 1.139, "step": 2435 }, { "epoch": 0.16505183277999866, "grad_norm": 10.454118728637695, "learning_rate": 9.769320282017934e-05, "loss": 1.0799, "step": 2436 }, { "epoch": 0.16511958804797072, "grad_norm": 14.411054611206055, "learning_rate": 9.769183380108153e-05, "loss": 1.0369, "step": 2437 }, { "epoch": 0.16518734331594281, "grad_norm": 11.42679214477539, "learning_rate": 9.769046478198371e-05, "loss": 1.2486, "step": 2438 }, { "epoch": 0.1652550985839149, "grad_norm": 10.520325660705566, "learning_rate": 9.76890957628859e-05, "loss": 1.2355, "step": 2439 }, { "epoch": 0.165322853851887, "grad_norm": 7.958998680114746, "learning_rate": 9.768772674378808e-05, "loss": 1.0822, "step": 2440 }, { "epoch": 0.16539060911985906, "grad_norm": 8.649806022644043, "learning_rate": 9.768635772469027e-05, "loss": 0.9439, "step": 2441 }, { "epoch": 0.16545836438783115, "grad_norm": 8.539712905883789, "learning_rate": 9.768498870559245e-05, "loss": 0.9541, "step": 2442 }, { "epoch": 0.16552611965580324, "grad_norm": 11.40131950378418, "learning_rate": 9.768361968649463e-05, "loss": 1.0568, "step": 2443 }, { "epoch": 0.16559387492377534, "grad_norm": 8.333579063415527, "learning_rate": 9.768225066739681e-05, "loss": 0.924, "step": 2444 }, { "epoch": 0.1656616301917474, "grad_norm": 9.02564525604248, "learning_rate": 9.768088164829899e-05, "loss": 1.1901, "step": 2445 }, { "epoch": 0.1657293854597195, "grad_norm": 9.721306800842285, "learning_rate": 9.767951262920118e-05, "loss": 1.2313, "step": 2446 }, { "epoch": 0.16579714072769158, "grad_norm": 9.876781463623047, "learning_rate": 9.767814361010336e-05, "loss": 1.0165, "step": 2447 }, { "epoch": 0.16586489599566367, "grad_norm": 11.69865894317627, "learning_rate": 9.767677459100555e-05, "loss": 1.1301, "step": 2448 }, { "epoch": 0.16593265126363574, "grad_norm": 11.38391399383545, "learning_rate": 9.767540557190773e-05, "loss": 1.1522, "step": 2449 }, { "epoch": 0.16600040653160783, "grad_norm": 9.18020248413086, "learning_rate": 9.76740365528099e-05, "loss": 1.0515, "step": 2450 }, { "epoch": 0.16606816179957992, "grad_norm": 9.30802059173584, "learning_rate": 9.76726675337121e-05, "loss": 1.0789, "step": 2451 }, { "epoch": 0.166135917067552, "grad_norm": 9.58259391784668, "learning_rate": 9.767129851461428e-05, "loss": 1.1299, "step": 2452 }, { "epoch": 0.16620367233552408, "grad_norm": 9.137594223022461, "learning_rate": 9.766992949551646e-05, "loss": 1.0854, "step": 2453 }, { "epoch": 0.16627142760349617, "grad_norm": 12.410299301147461, "learning_rate": 9.766856047641864e-05, "loss": 1.1717, "step": 2454 }, { "epoch": 0.16633918287146826, "grad_norm": 9.016322135925293, "learning_rate": 9.766719145732083e-05, "loss": 1.1028, "step": 2455 }, { "epoch": 0.16640693813944035, "grad_norm": 10.166184425354004, "learning_rate": 9.766582243822301e-05, "loss": 1.0626, "step": 2456 }, { "epoch": 0.16647469340741242, "grad_norm": 9.030965805053711, "learning_rate": 9.76644534191252e-05, "loss": 1.1761, "step": 2457 }, { "epoch": 0.1665424486753845, "grad_norm": 9.294576644897461, "learning_rate": 9.766308440002739e-05, "loss": 0.943, "step": 2458 }, { "epoch": 0.1666102039433566, "grad_norm": 6.732856273651123, "learning_rate": 9.766171538092957e-05, "loss": 0.7342, "step": 2459 }, { "epoch": 0.1666779592113287, "grad_norm": 8.178942680358887, "learning_rate": 9.766034636183175e-05, "loss": 0.7889, "step": 2460 }, { "epoch": 0.16674571447930076, "grad_norm": 7.771929740905762, "learning_rate": 9.765897734273394e-05, "loss": 1.1129, "step": 2461 }, { "epoch": 0.16681346974727285, "grad_norm": 8.695874214172363, "learning_rate": 9.765760832363612e-05, "loss": 1.1968, "step": 2462 }, { "epoch": 0.16688122501524494, "grad_norm": 10.18800163269043, "learning_rate": 9.76562393045383e-05, "loss": 1.0835, "step": 2463 }, { "epoch": 0.16694898028321703, "grad_norm": 9.310625076293945, "learning_rate": 9.76548702854405e-05, "loss": 1.2042, "step": 2464 }, { "epoch": 0.1670167355511891, "grad_norm": 11.684195518493652, "learning_rate": 9.765350126634268e-05, "loss": 1.2518, "step": 2465 }, { "epoch": 0.16708449081916119, "grad_norm": 10.9056978225708, "learning_rate": 9.765213224724486e-05, "loss": 1.2465, "step": 2466 }, { "epoch": 0.16715224608713328, "grad_norm": 9.871830940246582, "learning_rate": 9.765076322814704e-05, "loss": 1.3192, "step": 2467 }, { "epoch": 0.16722000135510537, "grad_norm": 7.725397109985352, "learning_rate": 9.764939420904922e-05, "loss": 0.9082, "step": 2468 }, { "epoch": 0.16728775662307743, "grad_norm": 8.7415189743042, "learning_rate": 9.764802518995141e-05, "loss": 0.8569, "step": 2469 }, { "epoch": 0.16735551189104952, "grad_norm": 7.8990888595581055, "learning_rate": 9.76466561708536e-05, "loss": 1.0175, "step": 2470 }, { "epoch": 0.16742326715902162, "grad_norm": 8.688074111938477, "learning_rate": 9.764528715175577e-05, "loss": 1.0131, "step": 2471 }, { "epoch": 0.1674910224269937, "grad_norm": 7.902133941650391, "learning_rate": 9.764391813265795e-05, "loss": 0.8956, "step": 2472 }, { "epoch": 0.16755877769496577, "grad_norm": 9.179505348205566, "learning_rate": 9.764254911356013e-05, "loss": 1.175, "step": 2473 }, { "epoch": 0.16762653296293786, "grad_norm": 10.701058387756348, "learning_rate": 9.764118009446233e-05, "loss": 1.003, "step": 2474 }, { "epoch": 0.16769428823090995, "grad_norm": 8.76916217803955, "learning_rate": 9.763981107536451e-05, "loss": 1.102, "step": 2475 }, { "epoch": 0.16776204349888205, "grad_norm": 8.682199478149414, "learning_rate": 9.763844205626669e-05, "loss": 1.2042, "step": 2476 }, { "epoch": 0.1678297987668541, "grad_norm": 9.789544105529785, "learning_rate": 9.763707303716887e-05, "loss": 1.2469, "step": 2477 }, { "epoch": 0.1678975540348262, "grad_norm": 8.250391960144043, "learning_rate": 9.763570401807106e-05, "loss": 0.9742, "step": 2478 }, { "epoch": 0.1679653093027983, "grad_norm": 8.938610076904297, "learning_rate": 9.763433499897324e-05, "loss": 0.9968, "step": 2479 }, { "epoch": 0.16803306457077039, "grad_norm": 9.956622123718262, "learning_rate": 9.763296597987542e-05, "loss": 1.0464, "step": 2480 }, { "epoch": 0.16810081983874245, "grad_norm": 8.582858085632324, "learning_rate": 9.76315969607776e-05, "loss": 1.1915, "step": 2481 }, { "epoch": 0.16816857510671454, "grad_norm": 7.73312520980835, "learning_rate": 9.763022794167979e-05, "loss": 0.8506, "step": 2482 }, { "epoch": 0.16823633037468663, "grad_norm": 8.986891746520996, "learning_rate": 9.762885892258198e-05, "loss": 0.8959, "step": 2483 }, { "epoch": 0.16830408564265872, "grad_norm": 10.047099113464355, "learning_rate": 9.762748990348416e-05, "loss": 1.24, "step": 2484 }, { "epoch": 0.1683718409106308, "grad_norm": 8.517911911010742, "learning_rate": 9.762612088438634e-05, "loss": 0.8171, "step": 2485 }, { "epoch": 0.16843959617860288, "grad_norm": 9.586174964904785, "learning_rate": 9.762475186528852e-05, "loss": 1.1603, "step": 2486 }, { "epoch": 0.16850735144657497, "grad_norm": 9.85086441040039, "learning_rate": 9.762338284619071e-05, "loss": 0.9157, "step": 2487 }, { "epoch": 0.16857510671454706, "grad_norm": 12.023639678955078, "learning_rate": 9.76220138270929e-05, "loss": 1.2032, "step": 2488 }, { "epoch": 0.16864286198251915, "grad_norm": 8.932641983032227, "learning_rate": 9.762064480799507e-05, "loss": 1.0678, "step": 2489 }, { "epoch": 0.16871061725049122, "grad_norm": 10.568282127380371, "learning_rate": 9.761927578889725e-05, "loss": 1.3213, "step": 2490 }, { "epoch": 0.1687783725184633, "grad_norm": 9.116564750671387, "learning_rate": 9.761790676979944e-05, "loss": 1.0525, "step": 2491 }, { "epoch": 0.1688461277864354, "grad_norm": 8.192644119262695, "learning_rate": 9.761653775070163e-05, "loss": 0.9554, "step": 2492 }, { "epoch": 0.1689138830544075, "grad_norm": 9.146562576293945, "learning_rate": 9.761516873160381e-05, "loss": 0.8852, "step": 2493 }, { "epoch": 0.16898163832237956, "grad_norm": 8.82610034942627, "learning_rate": 9.761379971250599e-05, "loss": 1.0521, "step": 2494 }, { "epoch": 0.16904939359035165, "grad_norm": 9.051412582397461, "learning_rate": 9.761243069340817e-05, "loss": 0.913, "step": 2495 }, { "epoch": 0.16911714885832374, "grad_norm": 8.97696304321289, "learning_rate": 9.761106167431036e-05, "loss": 0.887, "step": 2496 }, { "epoch": 0.16918490412629583, "grad_norm": 10.083110809326172, "learning_rate": 9.760969265521254e-05, "loss": 1.3521, "step": 2497 }, { "epoch": 0.1692526593942679, "grad_norm": 7.585256099700928, "learning_rate": 9.760832363611472e-05, "loss": 0.9001, "step": 2498 }, { "epoch": 0.16932041466224, "grad_norm": 10.301995277404785, "learning_rate": 9.76069546170169e-05, "loss": 1.0915, "step": 2499 }, { "epoch": 0.16938816993021208, "grad_norm": 8.517580032348633, "learning_rate": 9.760558559791909e-05, "loss": 1.0097, "step": 2500 }, { "epoch": 0.16945592519818417, "grad_norm": 8.20002555847168, "learning_rate": 9.760421657882128e-05, "loss": 1.2988, "step": 2501 }, { "epoch": 0.16952368046615623, "grad_norm": 9.705124855041504, "learning_rate": 9.760284755972346e-05, "loss": 1.2661, "step": 2502 }, { "epoch": 0.16959143573412833, "grad_norm": 9.167060852050781, "learning_rate": 9.760147854062564e-05, "loss": 1.0556, "step": 2503 }, { "epoch": 0.16965919100210042, "grad_norm": 8.415916442871094, "learning_rate": 9.760010952152783e-05, "loss": 0.7964, "step": 2504 }, { "epoch": 0.1697269462700725, "grad_norm": 7.626298427581787, "learning_rate": 9.759874050243001e-05, "loss": 0.9153, "step": 2505 }, { "epoch": 0.16979470153804457, "grad_norm": 10.595001220703125, "learning_rate": 9.75973714833322e-05, "loss": 0.9911, "step": 2506 }, { "epoch": 0.16986245680601667, "grad_norm": 7.564423084259033, "learning_rate": 9.759600246423439e-05, "loss": 0.9843, "step": 2507 }, { "epoch": 0.16993021207398876, "grad_norm": 7.644829273223877, "learning_rate": 9.759463344513657e-05, "loss": 0.9116, "step": 2508 }, { "epoch": 0.16999796734196085, "grad_norm": 7.54351282119751, "learning_rate": 9.759326442603875e-05, "loss": 1.1528, "step": 2509 }, { "epoch": 0.1700657226099329, "grad_norm": 9.259818077087402, "learning_rate": 9.759189540694094e-05, "loss": 1.0867, "step": 2510 }, { "epoch": 0.170133477877905, "grad_norm": 8.022993087768555, "learning_rate": 9.759052638784312e-05, "loss": 0.7524, "step": 2511 }, { "epoch": 0.1702012331458771, "grad_norm": 8.264616966247559, "learning_rate": 9.75891573687453e-05, "loss": 0.9906, "step": 2512 }, { "epoch": 0.1702689884138492, "grad_norm": 9.606420516967773, "learning_rate": 9.758778834964748e-05, "loss": 0.9287, "step": 2513 }, { "epoch": 0.17033674368182125, "grad_norm": 8.019355773925781, "learning_rate": 9.758641933054966e-05, "loss": 0.879, "step": 2514 }, { "epoch": 0.17040449894979334, "grad_norm": 9.819777488708496, "learning_rate": 9.758505031145186e-05, "loss": 1.3677, "step": 2515 }, { "epoch": 0.17047225421776543, "grad_norm": 9.9795560836792, "learning_rate": 9.758368129235404e-05, "loss": 1.1614, "step": 2516 }, { "epoch": 0.17054000948573753, "grad_norm": 9.51271915435791, "learning_rate": 9.758231227325622e-05, "loss": 1.0423, "step": 2517 }, { "epoch": 0.1706077647537096, "grad_norm": 10.511359214782715, "learning_rate": 9.75809432541584e-05, "loss": 0.9492, "step": 2518 }, { "epoch": 0.17067552002168168, "grad_norm": 9.61755657196045, "learning_rate": 9.757957423506059e-05, "loss": 1.1481, "step": 2519 }, { "epoch": 0.17074327528965377, "grad_norm": 11.246142387390137, "learning_rate": 9.757820521596277e-05, "loss": 1.3981, "step": 2520 }, { "epoch": 0.17081103055762586, "grad_norm": 9.273181915283203, "learning_rate": 9.757683619686495e-05, "loss": 0.9773, "step": 2521 }, { "epoch": 0.17087878582559793, "grad_norm": 11.215822219848633, "learning_rate": 9.757546717776713e-05, "loss": 1.2303, "step": 2522 }, { "epoch": 0.17094654109357002, "grad_norm": 10.139853477478027, "learning_rate": 9.757409815866931e-05, "loss": 1.0807, "step": 2523 }, { "epoch": 0.1710142963615421, "grad_norm": 13.275606155395508, "learning_rate": 9.757272913957151e-05, "loss": 1.3326, "step": 2524 }, { "epoch": 0.1710820516295142, "grad_norm": 8.32109260559082, "learning_rate": 9.757136012047369e-05, "loss": 0.9428, "step": 2525 }, { "epoch": 0.17114980689748627, "grad_norm": 9.275816917419434, "learning_rate": 9.756999110137587e-05, "loss": 1.1531, "step": 2526 }, { "epoch": 0.17121756216545836, "grad_norm": 10.61928939819336, "learning_rate": 9.756862208227805e-05, "loss": 0.9422, "step": 2527 }, { "epoch": 0.17128531743343045, "grad_norm": 7.793631553649902, "learning_rate": 9.756725306318023e-05, "loss": 1.216, "step": 2528 }, { "epoch": 0.17135307270140254, "grad_norm": 7.453477382659912, "learning_rate": 9.756588404408242e-05, "loss": 0.7787, "step": 2529 }, { "epoch": 0.1714208279693746, "grad_norm": 10.39784049987793, "learning_rate": 9.75645150249846e-05, "loss": 1.3171, "step": 2530 }, { "epoch": 0.1714885832373467, "grad_norm": 8.57040786743164, "learning_rate": 9.756314600588678e-05, "loss": 0.8319, "step": 2531 }, { "epoch": 0.1715563385053188, "grad_norm": 8.696785926818848, "learning_rate": 9.756177698678896e-05, "loss": 1.0078, "step": 2532 }, { "epoch": 0.17162409377329088, "grad_norm": 8.212169647216797, "learning_rate": 9.756040796769116e-05, "loss": 0.7851, "step": 2533 }, { "epoch": 0.17169184904126294, "grad_norm": 10.94201946258545, "learning_rate": 9.755903894859334e-05, "loss": 1.0065, "step": 2534 }, { "epoch": 0.17175960430923504, "grad_norm": 12.041540145874023, "learning_rate": 9.755766992949552e-05, "loss": 1.2938, "step": 2535 }, { "epoch": 0.17182735957720713, "grad_norm": 9.289467811584473, "learning_rate": 9.75563009103977e-05, "loss": 1.1521, "step": 2536 }, { "epoch": 0.17189511484517922, "grad_norm": 9.231005668640137, "learning_rate": 9.755493189129988e-05, "loss": 0.9, "step": 2537 }, { "epoch": 0.17196287011315128, "grad_norm": 8.934699058532715, "learning_rate": 9.755356287220207e-05, "loss": 1.2817, "step": 2538 }, { "epoch": 0.17203062538112338, "grad_norm": 10.096917152404785, "learning_rate": 9.755219385310425e-05, "loss": 1.1587, "step": 2539 }, { "epoch": 0.17209838064909547, "grad_norm": 8.864645004272461, "learning_rate": 9.755082483400643e-05, "loss": 1.1781, "step": 2540 }, { "epoch": 0.17216613591706756, "grad_norm": 10.964715957641602, "learning_rate": 9.754945581490861e-05, "loss": 1.2729, "step": 2541 }, { "epoch": 0.17223389118503965, "grad_norm": 10.845703125, "learning_rate": 9.754808679581081e-05, "loss": 1.4475, "step": 2542 }, { "epoch": 0.17230164645301171, "grad_norm": 9.800530433654785, "learning_rate": 9.754671777671299e-05, "loss": 1.0296, "step": 2543 }, { "epoch": 0.1723694017209838, "grad_norm": 8.789834022521973, "learning_rate": 9.754534875761517e-05, "loss": 0.9573, "step": 2544 }, { "epoch": 0.1724371569889559, "grad_norm": 7.731616973876953, "learning_rate": 9.754397973851735e-05, "loss": 0.9875, "step": 2545 }, { "epoch": 0.172504912256928, "grad_norm": 8.942553520202637, "learning_rate": 9.754261071941953e-05, "loss": 1.1897, "step": 2546 }, { "epoch": 0.17257266752490005, "grad_norm": 7.468856334686279, "learning_rate": 9.754124170032172e-05, "loss": 0.8869, "step": 2547 }, { "epoch": 0.17264042279287214, "grad_norm": 8.798864364624023, "learning_rate": 9.75398726812239e-05, "loss": 1.0034, "step": 2548 }, { "epoch": 0.17270817806084424, "grad_norm": 8.579094886779785, "learning_rate": 9.753850366212608e-05, "loss": 1.0211, "step": 2549 }, { "epoch": 0.17277593332881633, "grad_norm": 10.90807819366455, "learning_rate": 9.753713464302827e-05, "loss": 1.3639, "step": 2550 }, { "epoch": 0.1728436885967884, "grad_norm": 8.337204933166504, "learning_rate": 9.753576562393046e-05, "loss": 1.323, "step": 2551 }, { "epoch": 0.17291144386476048, "grad_norm": 8.731851577758789, "learning_rate": 9.753439660483264e-05, "loss": 0.8805, "step": 2552 }, { "epoch": 0.17297919913273258, "grad_norm": 9.541427612304688, "learning_rate": 9.753302758573482e-05, "loss": 1.0368, "step": 2553 }, { "epoch": 0.17304695440070467, "grad_norm": 7.938154697418213, "learning_rate": 9.753165856663701e-05, "loss": 0.9128, "step": 2554 }, { "epoch": 0.17311470966867673, "grad_norm": 8.111212730407715, "learning_rate": 9.75302895475392e-05, "loss": 0.9857, "step": 2555 }, { "epoch": 0.17318246493664882, "grad_norm": 8.583324432373047, "learning_rate": 9.752892052844139e-05, "loss": 1.0975, "step": 2556 }, { "epoch": 0.1732502202046209, "grad_norm": 8.180643081665039, "learning_rate": 9.752755150934357e-05, "loss": 1.21, "step": 2557 }, { "epoch": 0.173317975472593, "grad_norm": 9.740217208862305, "learning_rate": 9.752618249024575e-05, "loss": 1.18, "step": 2558 }, { "epoch": 0.17338573074056507, "grad_norm": 8.725831031799316, "learning_rate": 9.752481347114793e-05, "loss": 0.9816, "step": 2559 }, { "epoch": 0.17345348600853716, "grad_norm": 10.786824226379395, "learning_rate": 9.752344445205011e-05, "loss": 1.0402, "step": 2560 }, { "epoch": 0.17352124127650925, "grad_norm": 8.91720962524414, "learning_rate": 9.75220754329523e-05, "loss": 1.2357, "step": 2561 }, { "epoch": 0.17358899654448134, "grad_norm": 9.02492618560791, "learning_rate": 9.752070641385448e-05, "loss": 1.1264, "step": 2562 }, { "epoch": 0.1736567518124534, "grad_norm": 7.917794227600098, "learning_rate": 9.751933739475666e-05, "loss": 1.0937, "step": 2563 }, { "epoch": 0.1737245070804255, "grad_norm": 11.543112754821777, "learning_rate": 9.751796837565884e-05, "loss": 1.0851, "step": 2564 }, { "epoch": 0.1737922623483976, "grad_norm": 9.114936828613281, "learning_rate": 9.751659935656104e-05, "loss": 0.9565, "step": 2565 }, { "epoch": 0.17386001761636968, "grad_norm": 12.322575569152832, "learning_rate": 9.751523033746322e-05, "loss": 1.0356, "step": 2566 }, { "epoch": 0.17392777288434175, "grad_norm": 11.39137077331543, "learning_rate": 9.75138613183654e-05, "loss": 1.2141, "step": 2567 }, { "epoch": 0.17399552815231384, "grad_norm": 9.505644798278809, "learning_rate": 9.751249229926758e-05, "loss": 0.8307, "step": 2568 }, { "epoch": 0.17406328342028593, "grad_norm": 11.05932331085205, "learning_rate": 9.751112328016976e-05, "loss": 1.1915, "step": 2569 }, { "epoch": 0.17413103868825802, "grad_norm": 11.104498863220215, "learning_rate": 9.750975426107195e-05, "loss": 1.0032, "step": 2570 }, { "epoch": 0.17419879395623009, "grad_norm": 11.366668701171875, "learning_rate": 9.750838524197413e-05, "loss": 1.0857, "step": 2571 }, { "epoch": 0.17426654922420218, "grad_norm": 8.775167465209961, "learning_rate": 9.750701622287631e-05, "loss": 1.1545, "step": 2572 }, { "epoch": 0.17433430449217427, "grad_norm": 8.820073127746582, "learning_rate": 9.75056472037785e-05, "loss": 1.0528, "step": 2573 }, { "epoch": 0.17440205976014636, "grad_norm": 9.53591537475586, "learning_rate": 9.750427818468069e-05, "loss": 1.0562, "step": 2574 }, { "epoch": 0.17446981502811842, "grad_norm": 10.08950138092041, "learning_rate": 9.750290916558287e-05, "loss": 1.0456, "step": 2575 }, { "epoch": 0.17453757029609052, "grad_norm": 8.222607612609863, "learning_rate": 9.750154014648505e-05, "loss": 0.9168, "step": 2576 }, { "epoch": 0.1746053255640626, "grad_norm": 10.067093849182129, "learning_rate": 9.750017112738723e-05, "loss": 0.9952, "step": 2577 }, { "epoch": 0.1746730808320347, "grad_norm": 9.726996421813965, "learning_rate": 9.749880210828941e-05, "loss": 0.9428, "step": 2578 }, { "epoch": 0.17474083610000676, "grad_norm": 9.970647811889648, "learning_rate": 9.74974330891916e-05, "loss": 1.1838, "step": 2579 }, { "epoch": 0.17480859136797886, "grad_norm": 7.8667893409729, "learning_rate": 9.749606407009378e-05, "loss": 1.0341, "step": 2580 }, { "epoch": 0.17487634663595095, "grad_norm": 9.227079391479492, "learning_rate": 9.749469505099596e-05, "loss": 1.0515, "step": 2581 }, { "epoch": 0.17494410190392304, "grad_norm": 9.198224067687988, "learning_rate": 9.749332603189814e-05, "loss": 1.0935, "step": 2582 }, { "epoch": 0.1750118571718951, "grad_norm": 11.105298042297363, "learning_rate": 9.749195701280032e-05, "loss": 1.2115, "step": 2583 }, { "epoch": 0.1750796124398672, "grad_norm": 8.815799713134766, "learning_rate": 9.749058799370252e-05, "loss": 1.1308, "step": 2584 }, { "epoch": 0.17514736770783929, "grad_norm": 10.571581840515137, "learning_rate": 9.74892189746047e-05, "loss": 1.1982, "step": 2585 }, { "epoch": 0.17521512297581138, "grad_norm": 7.368075370788574, "learning_rate": 9.748784995550688e-05, "loss": 0.9352, "step": 2586 }, { "epoch": 0.17528287824378344, "grad_norm": 7.103427410125732, "learning_rate": 9.748648093640906e-05, "loss": 1.0578, "step": 2587 }, { "epoch": 0.17535063351175553, "grad_norm": 7.678786754608154, "learning_rate": 9.748511191731125e-05, "loss": 0.8549, "step": 2588 }, { "epoch": 0.17541838877972762, "grad_norm": 9.514643669128418, "learning_rate": 9.748374289821343e-05, "loss": 1.1835, "step": 2589 }, { "epoch": 0.17548614404769972, "grad_norm": 7.552379131317139, "learning_rate": 9.748237387911561e-05, "loss": 0.9425, "step": 2590 }, { "epoch": 0.17555389931567178, "grad_norm": 7.663018226623535, "learning_rate": 9.74810048600178e-05, "loss": 1.0948, "step": 2591 }, { "epoch": 0.17562165458364387, "grad_norm": 10.293536186218262, "learning_rate": 9.747963584091997e-05, "loss": 1.1636, "step": 2592 }, { "epoch": 0.17568940985161596, "grad_norm": 9.024083137512207, "learning_rate": 9.747826682182217e-05, "loss": 1.1157, "step": 2593 }, { "epoch": 0.17575716511958805, "grad_norm": 8.870935440063477, "learning_rate": 9.747689780272435e-05, "loss": 0.9634, "step": 2594 }, { "epoch": 0.17582492038756015, "grad_norm": 8.553323745727539, "learning_rate": 9.747552878362653e-05, "loss": 1.1515, "step": 2595 }, { "epoch": 0.1758926756555322, "grad_norm": 9.13661003112793, "learning_rate": 9.747415976452871e-05, "loss": 0.9267, "step": 2596 }, { "epoch": 0.1759604309235043, "grad_norm": 8.66097640991211, "learning_rate": 9.74727907454309e-05, "loss": 0.6839, "step": 2597 }, { "epoch": 0.1760281861914764, "grad_norm": 10.07618236541748, "learning_rate": 9.747142172633308e-05, "loss": 1.2822, "step": 2598 }, { "epoch": 0.17609594145944849, "grad_norm": 9.136283874511719, "learning_rate": 9.747005270723526e-05, "loss": 1.0053, "step": 2599 }, { "epoch": 0.17616369672742055, "grad_norm": 9.42113208770752, "learning_rate": 9.746868368813746e-05, "loss": 1.15, "step": 2600 }, { "epoch": 0.17623145199539264, "grad_norm": 8.789713859558105, "learning_rate": 9.746731466903964e-05, "loss": 0.8504, "step": 2601 }, { "epoch": 0.17629920726336473, "grad_norm": 8.704665184020996, "learning_rate": 9.746594564994182e-05, "loss": 1.1516, "step": 2602 }, { "epoch": 0.17636696253133682, "grad_norm": 8.030630111694336, "learning_rate": 9.746457663084401e-05, "loss": 1.044, "step": 2603 }, { "epoch": 0.1764347177993089, "grad_norm": 8.640777587890625, "learning_rate": 9.746320761174619e-05, "loss": 1.0013, "step": 2604 }, { "epoch": 0.17650247306728098, "grad_norm": 7.806771278381348, "learning_rate": 9.746183859264837e-05, "loss": 1.0678, "step": 2605 }, { "epoch": 0.17657022833525307, "grad_norm": 8.883776664733887, "learning_rate": 9.746046957355055e-05, "loss": 0.9277, "step": 2606 }, { "epoch": 0.17663798360322516, "grad_norm": 7.539346694946289, "learning_rate": 9.745910055445275e-05, "loss": 1.0437, "step": 2607 }, { "epoch": 0.17670573887119723, "grad_norm": 9.873644828796387, "learning_rate": 9.745773153535493e-05, "loss": 1.206, "step": 2608 }, { "epoch": 0.17677349413916932, "grad_norm": 10.031026840209961, "learning_rate": 9.745636251625711e-05, "loss": 1.1934, "step": 2609 }, { "epoch": 0.1768412494071414, "grad_norm": 9.194849014282227, "learning_rate": 9.745499349715929e-05, "loss": 1.2365, "step": 2610 }, { "epoch": 0.1769090046751135, "grad_norm": 7.951476097106934, "learning_rate": 9.745362447806148e-05, "loss": 0.8829, "step": 2611 }, { "epoch": 0.17697675994308557, "grad_norm": 8.77597427368164, "learning_rate": 9.745225545896366e-05, "loss": 1.0097, "step": 2612 }, { "epoch": 0.17704451521105766, "grad_norm": 10.356508255004883, "learning_rate": 9.745088643986584e-05, "loss": 1.2603, "step": 2613 }, { "epoch": 0.17711227047902975, "grad_norm": 9.24199390411377, "learning_rate": 9.744951742076802e-05, "loss": 1.0181, "step": 2614 }, { "epoch": 0.17718002574700184, "grad_norm": 8.244451522827148, "learning_rate": 9.74481484016702e-05, "loss": 0.9602, "step": 2615 }, { "epoch": 0.1772477810149739, "grad_norm": 10.769842147827148, "learning_rate": 9.74467793825724e-05, "loss": 1.0646, "step": 2616 }, { "epoch": 0.177315536282946, "grad_norm": 9.381787300109863, "learning_rate": 9.744541036347458e-05, "loss": 0.8985, "step": 2617 }, { "epoch": 0.1773832915509181, "grad_norm": 8.379899978637695, "learning_rate": 9.744404134437676e-05, "loss": 1.0368, "step": 2618 }, { "epoch": 0.17745104681889018, "grad_norm": 9.59954833984375, "learning_rate": 9.744267232527894e-05, "loss": 1.1447, "step": 2619 }, { "epoch": 0.17751880208686224, "grad_norm": 8.83703327178955, "learning_rate": 9.744130330618113e-05, "loss": 1.3435, "step": 2620 }, { "epoch": 0.17758655735483433, "grad_norm": 9.448990821838379, "learning_rate": 9.743993428708331e-05, "loss": 1.1837, "step": 2621 }, { "epoch": 0.17765431262280643, "grad_norm": 9.466961860656738, "learning_rate": 9.74385652679855e-05, "loss": 1.1265, "step": 2622 }, { "epoch": 0.17772206789077852, "grad_norm": 8.68281364440918, "learning_rate": 9.743719624888767e-05, "loss": 0.8804, "step": 2623 }, { "epoch": 0.17778982315875058, "grad_norm": 7.007611274719238, "learning_rate": 9.743582722978985e-05, "loss": 1.03, "step": 2624 }, { "epoch": 0.17785757842672267, "grad_norm": 8.254279136657715, "learning_rate": 9.743445821069205e-05, "loss": 1.0328, "step": 2625 }, { "epoch": 0.17792533369469477, "grad_norm": 9.134196281433105, "learning_rate": 9.743308919159423e-05, "loss": 0.9671, "step": 2626 }, { "epoch": 0.17799308896266686, "grad_norm": 7.803997039794922, "learning_rate": 9.743172017249641e-05, "loss": 0.9692, "step": 2627 }, { "epoch": 0.17806084423063892, "grad_norm": 8.37303638458252, "learning_rate": 9.743035115339859e-05, "loss": 0.8949, "step": 2628 }, { "epoch": 0.178128599498611, "grad_norm": 9.928305625915527, "learning_rate": 9.742898213430078e-05, "loss": 1.2679, "step": 2629 }, { "epoch": 0.1781963547665831, "grad_norm": 8.58604621887207, "learning_rate": 9.742761311520296e-05, "loss": 1.23, "step": 2630 }, { "epoch": 0.1782641100345552, "grad_norm": 9.47903823852539, "learning_rate": 9.742624409610514e-05, "loss": 0.8701, "step": 2631 }, { "epoch": 0.17833186530252726, "grad_norm": 8.013197898864746, "learning_rate": 9.742487507700732e-05, "loss": 0.8325, "step": 2632 }, { "epoch": 0.17839962057049935, "grad_norm": 9.541396141052246, "learning_rate": 9.74235060579095e-05, "loss": 1.0996, "step": 2633 }, { "epoch": 0.17846737583847144, "grad_norm": 7.515398979187012, "learning_rate": 9.74221370388117e-05, "loss": 1.1174, "step": 2634 }, { "epoch": 0.17853513110644353, "grad_norm": 9.562670707702637, "learning_rate": 9.742076801971388e-05, "loss": 0.9727, "step": 2635 }, { "epoch": 0.1786028863744156, "grad_norm": 10.072449684143066, "learning_rate": 9.741939900061606e-05, "loss": 1.0021, "step": 2636 }, { "epoch": 0.1786706416423877, "grad_norm": 9.634955406188965, "learning_rate": 9.741802998151824e-05, "loss": 1.3998, "step": 2637 }, { "epoch": 0.17873839691035978, "grad_norm": 8.78954792022705, "learning_rate": 9.741666096242042e-05, "loss": 0.9602, "step": 2638 }, { "epoch": 0.17880615217833187, "grad_norm": 9.597916603088379, "learning_rate": 9.741529194332261e-05, "loss": 1.0174, "step": 2639 }, { "epoch": 0.17887390744630394, "grad_norm": 9.543583869934082, "learning_rate": 9.74139229242248e-05, "loss": 1.0686, "step": 2640 }, { "epoch": 0.17894166271427603, "grad_norm": 8.247551918029785, "learning_rate": 9.741255390512697e-05, "loss": 1.1026, "step": 2641 }, { "epoch": 0.17900941798224812, "grad_norm": 8.487943649291992, "learning_rate": 9.741118488602915e-05, "loss": 0.8843, "step": 2642 }, { "epoch": 0.1790771732502202, "grad_norm": 9.061832427978516, "learning_rate": 9.740981586693135e-05, "loss": 0.9708, "step": 2643 }, { "epoch": 0.17914492851819228, "grad_norm": 9.242405891418457, "learning_rate": 9.740844684783353e-05, "loss": 1.1344, "step": 2644 }, { "epoch": 0.17921268378616437, "grad_norm": 8.192344665527344, "learning_rate": 9.740707782873571e-05, "loss": 0.7587, "step": 2645 }, { "epoch": 0.17928043905413646, "grad_norm": 6.6164445877075195, "learning_rate": 9.74057088096379e-05, "loss": 0.842, "step": 2646 }, { "epoch": 0.17934819432210855, "grad_norm": 10.285326957702637, "learning_rate": 9.740433979054008e-05, "loss": 1.138, "step": 2647 }, { "epoch": 0.17941594959008064, "grad_norm": 8.55659294128418, "learning_rate": 9.740297077144226e-05, "loss": 0.7925, "step": 2648 }, { "epoch": 0.1794837048580527, "grad_norm": 8.384642601013184, "learning_rate": 9.740160175234446e-05, "loss": 0.9719, "step": 2649 }, { "epoch": 0.1795514601260248, "grad_norm": 9.303678512573242, "learning_rate": 9.740023273324664e-05, "loss": 1.0688, "step": 2650 }, { "epoch": 0.1796192153939969, "grad_norm": 9.642799377441406, "learning_rate": 9.739886371414882e-05, "loss": 0.9758, "step": 2651 }, { "epoch": 0.17968697066196898, "grad_norm": 9.616509437561035, "learning_rate": 9.739749469505101e-05, "loss": 0.9999, "step": 2652 }, { "epoch": 0.17975472592994104, "grad_norm": 10.511337280273438, "learning_rate": 9.739612567595319e-05, "loss": 1.0079, "step": 2653 }, { "epoch": 0.17982248119791314, "grad_norm": 7.480882167816162, "learning_rate": 9.739475665685537e-05, "loss": 0.8353, "step": 2654 }, { "epoch": 0.17989023646588523, "grad_norm": 10.277608871459961, "learning_rate": 9.739338763775755e-05, "loss": 0.9709, "step": 2655 }, { "epoch": 0.17995799173385732, "grad_norm": 9.135882377624512, "learning_rate": 9.739201861865973e-05, "loss": 1.3888, "step": 2656 }, { "epoch": 0.18002574700182938, "grad_norm": 8.147950172424316, "learning_rate": 9.739064959956193e-05, "loss": 1.2235, "step": 2657 }, { "epoch": 0.18009350226980148, "grad_norm": 8.583501815795898, "learning_rate": 9.738928058046411e-05, "loss": 0.9123, "step": 2658 }, { "epoch": 0.18016125753777357, "grad_norm": 8.202300071716309, "learning_rate": 9.738791156136629e-05, "loss": 1.0435, "step": 2659 }, { "epoch": 0.18022901280574566, "grad_norm": 8.121417045593262, "learning_rate": 9.738654254226847e-05, "loss": 0.9351, "step": 2660 }, { "epoch": 0.18029676807371772, "grad_norm": 10.645029067993164, "learning_rate": 9.738517352317065e-05, "loss": 1.5219, "step": 2661 }, { "epoch": 0.18036452334168981, "grad_norm": 9.766422271728516, "learning_rate": 9.738380450407284e-05, "loss": 1.0499, "step": 2662 }, { "epoch": 0.1804322786096619, "grad_norm": 8.137174606323242, "learning_rate": 9.738243548497502e-05, "loss": 1.1871, "step": 2663 }, { "epoch": 0.180500033877634, "grad_norm": 8.422198295593262, "learning_rate": 9.73810664658772e-05, "loss": 0.9524, "step": 2664 }, { "epoch": 0.18056778914560606, "grad_norm": 6.721381664276123, "learning_rate": 9.737969744677938e-05, "loss": 1.0449, "step": 2665 }, { "epoch": 0.18063554441357815, "grad_norm": 9.175302505493164, "learning_rate": 9.737832842768158e-05, "loss": 1.0508, "step": 2666 }, { "epoch": 0.18070329968155024, "grad_norm": 10.165428161621094, "learning_rate": 9.737695940858376e-05, "loss": 0.8744, "step": 2667 }, { "epoch": 0.18077105494952234, "grad_norm": 9.396173477172852, "learning_rate": 9.737559038948594e-05, "loss": 1.0304, "step": 2668 }, { "epoch": 0.1808388102174944, "grad_norm": 9.680516242980957, "learning_rate": 9.737422137038812e-05, "loss": 0.9773, "step": 2669 }, { "epoch": 0.1809065654854665, "grad_norm": 7.570343017578125, "learning_rate": 9.73728523512903e-05, "loss": 0.9159, "step": 2670 }, { "epoch": 0.18097432075343858, "grad_norm": 9.5789213180542, "learning_rate": 9.737148333219249e-05, "loss": 1.1269, "step": 2671 }, { "epoch": 0.18104207602141068, "grad_norm": 8.559327125549316, "learning_rate": 9.737011431309467e-05, "loss": 1.2318, "step": 2672 }, { "epoch": 0.18110983128938274, "grad_norm": 7.630974292755127, "learning_rate": 9.736874529399685e-05, "loss": 0.8996, "step": 2673 }, { "epoch": 0.18117758655735483, "grad_norm": 8.078895568847656, "learning_rate": 9.736737627489903e-05, "loss": 0.9942, "step": 2674 }, { "epoch": 0.18124534182532692, "grad_norm": 8.830656051635742, "learning_rate": 9.736600725580123e-05, "loss": 1.0614, "step": 2675 }, { "epoch": 0.181313097093299, "grad_norm": 7.66297721862793, "learning_rate": 9.736463823670341e-05, "loss": 1.025, "step": 2676 }, { "epoch": 0.18138085236127108, "grad_norm": 8.318365097045898, "learning_rate": 9.736326921760559e-05, "loss": 1.1286, "step": 2677 }, { "epoch": 0.18144860762924317, "grad_norm": 10.180238723754883, "learning_rate": 9.736190019850777e-05, "loss": 1.1304, "step": 2678 }, { "epoch": 0.18151636289721526, "grad_norm": 9.2420654296875, "learning_rate": 9.736053117940995e-05, "loss": 1.157, "step": 2679 }, { "epoch": 0.18158411816518735, "grad_norm": 7.984904766082764, "learning_rate": 9.735916216031214e-05, "loss": 1.1325, "step": 2680 }, { "epoch": 0.18165187343315942, "grad_norm": 11.136275291442871, "learning_rate": 9.735779314121432e-05, "loss": 1.0151, "step": 2681 }, { "epoch": 0.1817196287011315, "grad_norm": 9.954483032226562, "learning_rate": 9.73564241221165e-05, "loss": 1.0934, "step": 2682 }, { "epoch": 0.1817873839691036, "grad_norm": 8.491388320922852, "learning_rate": 9.735505510301868e-05, "loss": 1.2288, "step": 2683 }, { "epoch": 0.1818551392370757, "grad_norm": 9.355586051940918, "learning_rate": 9.735368608392088e-05, "loss": 0.7999, "step": 2684 }, { "epoch": 0.18192289450504776, "grad_norm": 8.829736709594727, "learning_rate": 9.735231706482306e-05, "loss": 0.9711, "step": 2685 }, { "epoch": 0.18199064977301985, "grad_norm": 9.667959213256836, "learning_rate": 9.735094804572524e-05, "loss": 1.1132, "step": 2686 }, { "epoch": 0.18205840504099194, "grad_norm": 6.971467971801758, "learning_rate": 9.734957902662742e-05, "loss": 1.0252, "step": 2687 }, { "epoch": 0.18212616030896403, "grad_norm": 9.69013500213623, "learning_rate": 9.73482100075296e-05, "loss": 0.9562, "step": 2688 }, { "epoch": 0.1821939155769361, "grad_norm": 9.47673511505127, "learning_rate": 9.734684098843179e-05, "loss": 0.9822, "step": 2689 }, { "epoch": 0.18226167084490819, "grad_norm": 9.708051681518555, "learning_rate": 9.734547196933397e-05, "loss": 1.0772, "step": 2690 }, { "epoch": 0.18232942611288028, "grad_norm": 10.617173194885254, "learning_rate": 9.734410295023615e-05, "loss": 1.2686, "step": 2691 }, { "epoch": 0.18239718138085237, "grad_norm": 9.52670955657959, "learning_rate": 9.734273393113835e-05, "loss": 0.9477, "step": 2692 }, { "epoch": 0.18246493664882443, "grad_norm": 10.090290069580078, "learning_rate": 9.734136491204053e-05, "loss": 1.1766, "step": 2693 }, { "epoch": 0.18253269191679652, "grad_norm": 8.709790229797363, "learning_rate": 9.733999589294271e-05, "loss": 1.1327, "step": 2694 }, { "epoch": 0.18260044718476862, "grad_norm": 12.07381534576416, "learning_rate": 9.73386268738449e-05, "loss": 1.3694, "step": 2695 }, { "epoch": 0.1826682024527407, "grad_norm": 8.329826354980469, "learning_rate": 9.733725785474708e-05, "loss": 1.0448, "step": 2696 }, { "epoch": 0.18273595772071277, "grad_norm": 7.551383972167969, "learning_rate": 9.733588883564926e-05, "loss": 1.029, "step": 2697 }, { "epoch": 0.18280371298868486, "grad_norm": 9.393105506896973, "learning_rate": 9.733451981655146e-05, "loss": 0.9729, "step": 2698 }, { "epoch": 0.18287146825665695, "grad_norm": 9.283944129943848, "learning_rate": 9.733315079745364e-05, "loss": 1.2459, "step": 2699 }, { "epoch": 0.18293922352462905, "grad_norm": 9.02878189086914, "learning_rate": 9.733178177835582e-05, "loss": 1.3235, "step": 2700 }, { "epoch": 0.18300697879260114, "grad_norm": 8.735793113708496, "learning_rate": 9.7330412759258e-05, "loss": 1.1447, "step": 2701 }, { "epoch": 0.1830747340605732, "grad_norm": 8.511741638183594, "learning_rate": 9.732904374016018e-05, "loss": 1.0148, "step": 2702 }, { "epoch": 0.1831424893285453, "grad_norm": 6.590076446533203, "learning_rate": 9.732767472106237e-05, "loss": 1.0904, "step": 2703 }, { "epoch": 0.18321024459651739, "grad_norm": 11.523942947387695, "learning_rate": 9.732630570196455e-05, "loss": 0.9349, "step": 2704 }, { "epoch": 0.18327799986448948, "grad_norm": 6.972995281219482, "learning_rate": 9.732493668286673e-05, "loss": 0.9837, "step": 2705 }, { "epoch": 0.18334575513246154, "grad_norm": 7.834453105926514, "learning_rate": 9.732356766376891e-05, "loss": 1.0374, "step": 2706 }, { "epoch": 0.18341351040043363, "grad_norm": 9.44674015045166, "learning_rate": 9.732219864467111e-05, "loss": 1.4479, "step": 2707 }, { "epoch": 0.18348126566840572, "grad_norm": 9.017583847045898, "learning_rate": 9.732082962557329e-05, "loss": 1.136, "step": 2708 }, { "epoch": 0.18354902093637782, "grad_norm": 8.40336799621582, "learning_rate": 9.731946060647547e-05, "loss": 0.9914, "step": 2709 }, { "epoch": 0.18361677620434988, "grad_norm": 10.586843490600586, "learning_rate": 9.731809158737765e-05, "loss": 1.1515, "step": 2710 }, { "epoch": 0.18368453147232197, "grad_norm": 7.611972808837891, "learning_rate": 9.731672256827983e-05, "loss": 1.0821, "step": 2711 }, { "epoch": 0.18375228674029406, "grad_norm": 9.093097686767578, "learning_rate": 9.731535354918202e-05, "loss": 1.1295, "step": 2712 }, { "epoch": 0.18382004200826615, "grad_norm": 8.013279914855957, "learning_rate": 9.73139845300842e-05, "loss": 0.839, "step": 2713 }, { "epoch": 0.18388779727623822, "grad_norm": 7.941558837890625, "learning_rate": 9.731261551098638e-05, "loss": 1.0819, "step": 2714 }, { "epoch": 0.1839555525442103, "grad_norm": 7.09537935256958, "learning_rate": 9.731124649188856e-05, "loss": 0.8606, "step": 2715 }, { "epoch": 0.1840233078121824, "grad_norm": 7.867136478424072, "learning_rate": 9.730987747279074e-05, "loss": 0.9942, "step": 2716 }, { "epoch": 0.1840910630801545, "grad_norm": 8.66762638092041, "learning_rate": 9.730850845369294e-05, "loss": 1.0792, "step": 2717 }, { "epoch": 0.18415881834812656, "grad_norm": 7.97219705581665, "learning_rate": 9.730713943459512e-05, "loss": 1.0366, "step": 2718 }, { "epoch": 0.18422657361609865, "grad_norm": 8.341264724731445, "learning_rate": 9.73057704154973e-05, "loss": 1.0833, "step": 2719 }, { "epoch": 0.18429432888407074, "grad_norm": 8.704034805297852, "learning_rate": 9.730440139639948e-05, "loss": 1.2909, "step": 2720 }, { "epoch": 0.18436208415204283, "grad_norm": 9.759525299072266, "learning_rate": 9.730303237730167e-05, "loss": 1.1749, "step": 2721 }, { "epoch": 0.1844298394200149, "grad_norm": 9.2392578125, "learning_rate": 9.730166335820385e-05, "loss": 1.4037, "step": 2722 }, { "epoch": 0.184497594687987, "grad_norm": 10.519861221313477, "learning_rate": 9.730029433910603e-05, "loss": 1.2692, "step": 2723 }, { "epoch": 0.18456534995595908, "grad_norm": 8.285715103149414, "learning_rate": 9.729892532000821e-05, "loss": 0.8014, "step": 2724 }, { "epoch": 0.18463310522393117, "grad_norm": 7.261229991912842, "learning_rate": 9.72975563009104e-05, "loss": 0.9302, "step": 2725 }, { "epoch": 0.18470086049190323, "grad_norm": 10.149426460266113, "learning_rate": 9.729618728181259e-05, "loss": 1.2855, "step": 2726 }, { "epoch": 0.18476861575987533, "grad_norm": 7.495121955871582, "learning_rate": 9.729481826271477e-05, "loss": 0.9771, "step": 2727 }, { "epoch": 0.18483637102784742, "grad_norm": 9.102607727050781, "learning_rate": 9.729344924361695e-05, "loss": 1.1974, "step": 2728 }, { "epoch": 0.1849041262958195, "grad_norm": 9.57135009765625, "learning_rate": 9.729208022451913e-05, "loss": 0.9758, "step": 2729 }, { "epoch": 0.18497188156379157, "grad_norm": 8.745903015136719, "learning_rate": 9.729071120542132e-05, "loss": 1.1719, "step": 2730 }, { "epoch": 0.18503963683176367, "grad_norm": 10.697607040405273, "learning_rate": 9.72893421863235e-05, "loss": 0.952, "step": 2731 }, { "epoch": 0.18510739209973576, "grad_norm": 9.907280921936035, "learning_rate": 9.728797316722568e-05, "loss": 0.9571, "step": 2732 }, { "epoch": 0.18517514736770785, "grad_norm": 7.285250186920166, "learning_rate": 9.728660414812786e-05, "loss": 0.9002, "step": 2733 }, { "epoch": 0.1852429026356799, "grad_norm": 8.134112358093262, "learning_rate": 9.728523512903004e-05, "loss": 0.8645, "step": 2734 }, { "epoch": 0.185310657903652, "grad_norm": 9.427742004394531, "learning_rate": 9.728386610993224e-05, "loss": 1.2333, "step": 2735 }, { "epoch": 0.1853784131716241, "grad_norm": 8.804586410522461, "learning_rate": 9.728249709083442e-05, "loss": 1.2601, "step": 2736 }, { "epoch": 0.1854461684395962, "grad_norm": 9.144674301147461, "learning_rate": 9.72811280717366e-05, "loss": 0.9503, "step": 2737 }, { "epoch": 0.18551392370756825, "grad_norm": 9.399337768554688, "learning_rate": 9.727975905263879e-05, "loss": 0.8586, "step": 2738 }, { "epoch": 0.18558167897554034, "grad_norm": 7.7377119064331055, "learning_rate": 9.727839003354097e-05, "loss": 0.7511, "step": 2739 }, { "epoch": 0.18564943424351243, "grad_norm": 9.146937370300293, "learning_rate": 9.727702101444315e-05, "loss": 1.0037, "step": 2740 }, { "epoch": 0.18571718951148453, "grad_norm": 7.722357273101807, "learning_rate": 9.727565199534535e-05, "loss": 1.077, "step": 2741 }, { "epoch": 0.1857849447794566, "grad_norm": 7.808566093444824, "learning_rate": 9.727428297624753e-05, "loss": 0.8953, "step": 2742 }, { "epoch": 0.18585270004742868, "grad_norm": 8.765763282775879, "learning_rate": 9.727291395714971e-05, "loss": 1.0691, "step": 2743 }, { "epoch": 0.18592045531540077, "grad_norm": 11.350706100463867, "learning_rate": 9.72715449380519e-05, "loss": 0.9308, "step": 2744 }, { "epoch": 0.18598821058337286, "grad_norm": 7.838237285614014, "learning_rate": 9.727017591895408e-05, "loss": 0.9452, "step": 2745 }, { "epoch": 0.18605596585134493, "grad_norm": 9.660740852355957, "learning_rate": 9.726880689985626e-05, "loss": 0.8705, "step": 2746 }, { "epoch": 0.18612372111931702, "grad_norm": 8.146308898925781, "learning_rate": 9.726743788075844e-05, "loss": 1.0362, "step": 2747 }, { "epoch": 0.1861914763872891, "grad_norm": 9.901165008544922, "learning_rate": 9.726606886166062e-05, "loss": 1.0842, "step": 2748 }, { "epoch": 0.1862592316552612, "grad_norm": 7.650402069091797, "learning_rate": 9.726469984256282e-05, "loss": 0.997, "step": 2749 }, { "epoch": 0.18632698692323327, "grad_norm": 7.760092735290527, "learning_rate": 9.7263330823465e-05, "loss": 0.9903, "step": 2750 }, { "epoch": 0.18639474219120536, "grad_norm": 9.523726463317871, "learning_rate": 9.726196180436718e-05, "loss": 0.9276, "step": 2751 }, { "epoch": 0.18646249745917745, "grad_norm": 7.759490489959717, "learning_rate": 9.726059278526936e-05, "loss": 0.9356, "step": 2752 }, { "epoch": 0.18653025272714954, "grad_norm": 10.949979782104492, "learning_rate": 9.725922376617155e-05, "loss": 1.1451, "step": 2753 }, { "epoch": 0.18659800799512163, "grad_norm": 8.312686920166016, "learning_rate": 9.725785474707373e-05, "loss": 0.973, "step": 2754 }, { "epoch": 0.1866657632630937, "grad_norm": 6.999983787536621, "learning_rate": 9.725648572797591e-05, "loss": 1.0699, "step": 2755 }, { "epoch": 0.1867335185310658, "grad_norm": 9.422745704650879, "learning_rate": 9.725511670887809e-05, "loss": 1.0697, "step": 2756 }, { "epoch": 0.18680127379903788, "grad_norm": 8.054603576660156, "learning_rate": 9.725374768978027e-05, "loss": 0.8764, "step": 2757 }, { "epoch": 0.18686902906700997, "grad_norm": 7.902176856994629, "learning_rate": 9.725237867068247e-05, "loss": 0.9893, "step": 2758 }, { "epoch": 0.18693678433498204, "grad_norm": 8.409537315368652, "learning_rate": 9.725100965158465e-05, "loss": 1.0526, "step": 2759 }, { "epoch": 0.18700453960295413, "grad_norm": 9.867463111877441, "learning_rate": 9.724964063248683e-05, "loss": 1.1218, "step": 2760 }, { "epoch": 0.18707229487092622, "grad_norm": 6.852199554443359, "learning_rate": 9.724827161338901e-05, "loss": 0.9838, "step": 2761 }, { "epoch": 0.1871400501388983, "grad_norm": 9.132448196411133, "learning_rate": 9.72469025942912e-05, "loss": 1.038, "step": 2762 }, { "epoch": 0.18720780540687038, "grad_norm": 8.699213981628418, "learning_rate": 9.724553357519338e-05, "loss": 1.0532, "step": 2763 }, { "epoch": 0.18727556067484247, "grad_norm": 8.520672798156738, "learning_rate": 9.724416455609556e-05, "loss": 1.2942, "step": 2764 }, { "epoch": 0.18734331594281456, "grad_norm": 9.690433502197266, "learning_rate": 9.724279553699774e-05, "loss": 0.9552, "step": 2765 }, { "epoch": 0.18741107121078665, "grad_norm": 7.540090560913086, "learning_rate": 9.724142651789992e-05, "loss": 1.0718, "step": 2766 }, { "epoch": 0.18747882647875871, "grad_norm": 9.072039604187012, "learning_rate": 9.724005749880212e-05, "loss": 1.0313, "step": 2767 }, { "epoch": 0.1875465817467308, "grad_norm": 10.155011177062988, "learning_rate": 9.72386884797043e-05, "loss": 1.3498, "step": 2768 }, { "epoch": 0.1876143370147029, "grad_norm": 7.816718101501465, "learning_rate": 9.723731946060648e-05, "loss": 0.9594, "step": 2769 }, { "epoch": 0.187682092282675, "grad_norm": 10.980911254882812, "learning_rate": 9.723595044150866e-05, "loss": 1.1675, "step": 2770 }, { "epoch": 0.18774984755064705, "grad_norm": 8.865739822387695, "learning_rate": 9.723458142241084e-05, "loss": 1.1796, "step": 2771 }, { "epoch": 0.18781760281861914, "grad_norm": 8.97850227355957, "learning_rate": 9.723321240331303e-05, "loss": 1.0907, "step": 2772 }, { "epoch": 0.18788535808659124, "grad_norm": 8.16921329498291, "learning_rate": 9.723184338421521e-05, "loss": 0.95, "step": 2773 }, { "epoch": 0.18795311335456333, "grad_norm": 8.766203880310059, "learning_rate": 9.723047436511739e-05, "loss": 0.9087, "step": 2774 }, { "epoch": 0.1880208686225354, "grad_norm": 7.410607814788818, "learning_rate": 9.722910534601957e-05, "loss": 0.8082, "step": 2775 }, { "epoch": 0.18808862389050748, "grad_norm": 9.640182495117188, "learning_rate": 9.722773632692177e-05, "loss": 0.99, "step": 2776 }, { "epoch": 0.18815637915847958, "grad_norm": 9.038297653198242, "learning_rate": 9.722636730782395e-05, "loss": 1.0017, "step": 2777 }, { "epoch": 0.18822413442645167, "grad_norm": 10.167421340942383, "learning_rate": 9.722499828872613e-05, "loss": 1.1588, "step": 2778 }, { "epoch": 0.18829188969442373, "grad_norm": 9.911538124084473, "learning_rate": 9.722362926962831e-05, "loss": 1.052, "step": 2779 }, { "epoch": 0.18835964496239582, "grad_norm": 8.78661060333252, "learning_rate": 9.722226025053049e-05, "loss": 0.9827, "step": 2780 }, { "epoch": 0.18842740023036791, "grad_norm": 8.58356761932373, "learning_rate": 9.722089123143268e-05, "loss": 0.9663, "step": 2781 }, { "epoch": 0.18849515549834, "grad_norm": 7.882653713226318, "learning_rate": 9.721952221233486e-05, "loss": 1.0282, "step": 2782 }, { "epoch": 0.18856291076631207, "grad_norm": 8.180728912353516, "learning_rate": 9.721815319323704e-05, "loss": 0.8983, "step": 2783 }, { "epoch": 0.18863066603428416, "grad_norm": 10.785475730895996, "learning_rate": 9.721678417413924e-05, "loss": 1.0949, "step": 2784 }, { "epoch": 0.18869842130225625, "grad_norm": 8.2493257522583, "learning_rate": 9.721541515504142e-05, "loss": 0.8891, "step": 2785 }, { "epoch": 0.18876617657022834, "grad_norm": 8.298515319824219, "learning_rate": 9.72140461359436e-05, "loss": 0.9384, "step": 2786 }, { "epoch": 0.1888339318382004, "grad_norm": 11.643486022949219, "learning_rate": 9.721267711684579e-05, "loss": 1.0915, "step": 2787 }, { "epoch": 0.1889016871061725, "grad_norm": 10.728472709655762, "learning_rate": 9.721130809774797e-05, "loss": 1.562, "step": 2788 }, { "epoch": 0.1889694423741446, "grad_norm": 7.9253435134887695, "learning_rate": 9.720993907865015e-05, "loss": 0.876, "step": 2789 }, { "epoch": 0.18903719764211668, "grad_norm": 9.075439453125, "learning_rate": 9.720857005955235e-05, "loss": 1.1712, "step": 2790 }, { "epoch": 0.18910495291008875, "grad_norm": 8.572853088378906, "learning_rate": 9.720720104045453e-05, "loss": 0.9697, "step": 2791 }, { "epoch": 0.18917270817806084, "grad_norm": 8.40988826751709, "learning_rate": 9.720583202135671e-05, "loss": 1.0904, "step": 2792 }, { "epoch": 0.18924046344603293, "grad_norm": 8.015021324157715, "learning_rate": 9.720446300225889e-05, "loss": 1.0264, "step": 2793 }, { "epoch": 0.18930821871400502, "grad_norm": 10.295256614685059, "learning_rate": 9.720309398316107e-05, "loss": 0.9608, "step": 2794 }, { "epoch": 0.18937597398197709, "grad_norm": 9.377728462219238, "learning_rate": 9.720172496406326e-05, "loss": 1.2169, "step": 2795 }, { "epoch": 0.18944372924994918, "grad_norm": 8.55238151550293, "learning_rate": 9.720035594496544e-05, "loss": 1.0951, "step": 2796 }, { "epoch": 0.18951148451792127, "grad_norm": 10.069438934326172, "learning_rate": 9.719898692586762e-05, "loss": 1.3206, "step": 2797 }, { "epoch": 0.18957923978589336, "grad_norm": 9.229057312011719, "learning_rate": 9.71976179067698e-05, "loss": 1.18, "step": 2798 }, { "epoch": 0.18964699505386542, "grad_norm": 8.911051750183105, "learning_rate": 9.7196248887672e-05, "loss": 1.1351, "step": 2799 }, { "epoch": 0.18971475032183752, "grad_norm": 9.285752296447754, "learning_rate": 9.719487986857418e-05, "loss": 1.0626, "step": 2800 }, { "epoch": 0.1897825055898096, "grad_norm": 8.793561935424805, "learning_rate": 9.719351084947636e-05, "loss": 1.033, "step": 2801 }, { "epoch": 0.1898502608577817, "grad_norm": 10.635384559631348, "learning_rate": 9.719214183037854e-05, "loss": 1.5004, "step": 2802 }, { "epoch": 0.18991801612575376, "grad_norm": 7.9627275466918945, "learning_rate": 9.719077281128072e-05, "loss": 1.0246, "step": 2803 }, { "epoch": 0.18998577139372586, "grad_norm": 8.960352897644043, "learning_rate": 9.718940379218291e-05, "loss": 1.0316, "step": 2804 }, { "epoch": 0.19005352666169795, "grad_norm": 9.522171020507812, "learning_rate": 9.718803477308509e-05, "loss": 1.0219, "step": 2805 }, { "epoch": 0.19012128192967004, "grad_norm": 8.412702560424805, "learning_rate": 9.718666575398727e-05, "loss": 1.0609, "step": 2806 }, { "epoch": 0.19018903719764213, "grad_norm": 8.777839660644531, "learning_rate": 9.718529673488945e-05, "loss": 1.0854, "step": 2807 }, { "epoch": 0.1902567924656142, "grad_norm": 8.932796478271484, "learning_rate": 9.718392771579165e-05, "loss": 1.1661, "step": 2808 }, { "epoch": 0.19032454773358629, "grad_norm": 8.514800071716309, "learning_rate": 9.718255869669383e-05, "loss": 1.0584, "step": 2809 }, { "epoch": 0.19039230300155838, "grad_norm": 7.001948356628418, "learning_rate": 9.718118967759601e-05, "loss": 0.8479, "step": 2810 }, { "epoch": 0.19046005826953047, "grad_norm": 8.977307319641113, "learning_rate": 9.717982065849819e-05, "loss": 1.2556, "step": 2811 }, { "epoch": 0.19052781353750253, "grad_norm": 8.513920783996582, "learning_rate": 9.717845163940037e-05, "loss": 1.2076, "step": 2812 }, { "epoch": 0.19059556880547462, "grad_norm": 8.556622505187988, "learning_rate": 9.717708262030256e-05, "loss": 1.0491, "step": 2813 }, { "epoch": 0.19066332407344672, "grad_norm": 9.8518648147583, "learning_rate": 9.717571360120474e-05, "loss": 1.1179, "step": 2814 }, { "epoch": 0.1907310793414188, "grad_norm": 8.887413024902344, "learning_rate": 9.717434458210692e-05, "loss": 1.0675, "step": 2815 }, { "epoch": 0.19079883460939087, "grad_norm": 7.888981342315674, "learning_rate": 9.71729755630091e-05, "loss": 1.0489, "step": 2816 }, { "epoch": 0.19086658987736296, "grad_norm": 7.692848205566406, "learning_rate": 9.71716065439113e-05, "loss": 1.0602, "step": 2817 }, { "epoch": 0.19093434514533505, "grad_norm": 8.591787338256836, "learning_rate": 9.717023752481348e-05, "loss": 0.8737, "step": 2818 }, { "epoch": 0.19100210041330715, "grad_norm": 9.622870445251465, "learning_rate": 9.716886850571566e-05, "loss": 0.9717, "step": 2819 }, { "epoch": 0.1910698556812792, "grad_norm": 7.949582576751709, "learning_rate": 9.716749948661784e-05, "loss": 0.8651, "step": 2820 }, { "epoch": 0.1911376109492513, "grad_norm": 9.46272087097168, "learning_rate": 9.716613046752002e-05, "loss": 0.9975, "step": 2821 }, { "epoch": 0.1912053662172234, "grad_norm": 8.86549186706543, "learning_rate": 9.716476144842221e-05, "loss": 1.0805, "step": 2822 }, { "epoch": 0.19127312148519549, "grad_norm": 8.039673805236816, "learning_rate": 9.716339242932439e-05, "loss": 1.121, "step": 2823 }, { "epoch": 0.19134087675316755, "grad_norm": 8.606979370117188, "learning_rate": 9.716202341022657e-05, "loss": 0.9217, "step": 2824 }, { "epoch": 0.19140863202113964, "grad_norm": 10.429420471191406, "learning_rate": 9.716065439112875e-05, "loss": 1.2266, "step": 2825 }, { "epoch": 0.19147638728911173, "grad_norm": 7.339411735534668, "learning_rate": 9.715928537203093e-05, "loss": 1.0235, "step": 2826 }, { "epoch": 0.19154414255708382, "grad_norm": 8.247300148010254, "learning_rate": 9.715791635293313e-05, "loss": 0.8993, "step": 2827 }, { "epoch": 0.1916118978250559, "grad_norm": 8.475278854370117, "learning_rate": 9.715654733383531e-05, "loss": 0.9611, "step": 2828 }, { "epoch": 0.19167965309302798, "grad_norm": 9.792519569396973, "learning_rate": 9.715517831473749e-05, "loss": 1.0831, "step": 2829 }, { "epoch": 0.19174740836100007, "grad_norm": 6.757070541381836, "learning_rate": 9.715380929563967e-05, "loss": 1.0282, "step": 2830 }, { "epoch": 0.19181516362897216, "grad_norm": 6.707785129547119, "learning_rate": 9.715244027654186e-05, "loss": 0.7813, "step": 2831 }, { "epoch": 0.19188291889694423, "grad_norm": 9.990489959716797, "learning_rate": 9.715107125744404e-05, "loss": 1.0504, "step": 2832 }, { "epoch": 0.19195067416491632, "grad_norm": 10.57358169555664, "learning_rate": 9.714970223834622e-05, "loss": 0.9355, "step": 2833 }, { "epoch": 0.1920184294328884, "grad_norm": 9.225300788879395, "learning_rate": 9.714833321924842e-05, "loss": 1.2404, "step": 2834 }, { "epoch": 0.1920861847008605, "grad_norm": 8.36042594909668, "learning_rate": 9.71469642001506e-05, "loss": 0.9012, "step": 2835 }, { "epoch": 0.19215393996883257, "grad_norm": 9.624984741210938, "learning_rate": 9.714559518105278e-05, "loss": 1.0509, "step": 2836 }, { "epoch": 0.19222169523680466, "grad_norm": 12.569930076599121, "learning_rate": 9.714422616195497e-05, "loss": 1.2663, "step": 2837 }, { "epoch": 0.19228945050477675, "grad_norm": 12.278885841369629, "learning_rate": 9.714285714285715e-05, "loss": 1.1547, "step": 2838 }, { "epoch": 0.19235720577274884, "grad_norm": 9.3023681640625, "learning_rate": 9.714148812375933e-05, "loss": 1.0873, "step": 2839 }, { "epoch": 0.1924249610407209, "grad_norm": 8.241714477539062, "learning_rate": 9.714011910466153e-05, "loss": 0.8222, "step": 2840 }, { "epoch": 0.192492716308693, "grad_norm": 10.45174503326416, "learning_rate": 9.71387500855637e-05, "loss": 1.1974, "step": 2841 }, { "epoch": 0.1925604715766651, "grad_norm": 8.096826553344727, "learning_rate": 9.713738106646589e-05, "loss": 1.1147, "step": 2842 }, { "epoch": 0.19262822684463718, "grad_norm": 9.053191184997559, "learning_rate": 9.713601204736807e-05, "loss": 0.9835, "step": 2843 }, { "epoch": 0.19269598211260924, "grad_norm": 8.050823211669922, "learning_rate": 9.713464302827025e-05, "loss": 1.0826, "step": 2844 }, { "epoch": 0.19276373738058133, "grad_norm": 10.000917434692383, "learning_rate": 9.713327400917244e-05, "loss": 1.1723, "step": 2845 }, { "epoch": 0.19283149264855343, "grad_norm": 8.992589950561523, "learning_rate": 9.713190499007462e-05, "loss": 1.0203, "step": 2846 }, { "epoch": 0.19289924791652552, "grad_norm": 10.340285301208496, "learning_rate": 9.71305359709768e-05, "loss": 1.2523, "step": 2847 }, { "epoch": 0.19296700318449758, "grad_norm": 8.250594139099121, "learning_rate": 9.712916695187898e-05, "loss": 1.0606, "step": 2848 }, { "epoch": 0.19303475845246967, "grad_norm": 7.16335916519165, "learning_rate": 9.712779793278116e-05, "loss": 0.9988, "step": 2849 }, { "epoch": 0.19310251372044177, "grad_norm": 9.089055061340332, "learning_rate": 9.712642891368336e-05, "loss": 0.9368, "step": 2850 }, { "epoch": 0.19317026898841386, "grad_norm": 7.568434238433838, "learning_rate": 9.712505989458554e-05, "loss": 0.9417, "step": 2851 }, { "epoch": 0.19323802425638592, "grad_norm": 8.470823287963867, "learning_rate": 9.712369087548772e-05, "loss": 1.0271, "step": 2852 }, { "epoch": 0.193305779524358, "grad_norm": 7.746623992919922, "learning_rate": 9.71223218563899e-05, "loss": 0.7641, "step": 2853 }, { "epoch": 0.1933735347923301, "grad_norm": 7.44852352142334, "learning_rate": 9.712095283729209e-05, "loss": 0.9984, "step": 2854 }, { "epoch": 0.1934412900603022, "grad_norm": 7.393777847290039, "learning_rate": 9.711958381819427e-05, "loss": 0.8515, "step": 2855 }, { "epoch": 0.19350904532827426, "grad_norm": 8.247236251831055, "learning_rate": 9.711821479909645e-05, "loss": 0.9867, "step": 2856 }, { "epoch": 0.19357680059624635, "grad_norm": 8.484920501708984, "learning_rate": 9.711684577999863e-05, "loss": 0.9099, "step": 2857 }, { "epoch": 0.19364455586421844, "grad_norm": 9.85857105255127, "learning_rate": 9.711547676090081e-05, "loss": 1.0134, "step": 2858 }, { "epoch": 0.19371231113219053, "grad_norm": 12.191691398620605, "learning_rate": 9.7114107741803e-05, "loss": 1.15, "step": 2859 }, { "epoch": 0.19378006640016263, "grad_norm": 9.691742897033691, "learning_rate": 9.711273872270519e-05, "loss": 1.2415, "step": 2860 }, { "epoch": 0.1938478216681347, "grad_norm": 11.395289421081543, "learning_rate": 9.711136970360737e-05, "loss": 1.3239, "step": 2861 }, { "epoch": 0.19391557693610678, "grad_norm": 9.222856521606445, "learning_rate": 9.711000068450955e-05, "loss": 0.9087, "step": 2862 }, { "epoch": 0.19398333220407887, "grad_norm": 8.062904357910156, "learning_rate": 9.710863166541174e-05, "loss": 1.093, "step": 2863 }, { "epoch": 0.19405108747205096, "grad_norm": 8.160481452941895, "learning_rate": 9.710726264631392e-05, "loss": 0.842, "step": 2864 }, { "epoch": 0.19411884274002303, "grad_norm": 8.165858268737793, "learning_rate": 9.71058936272161e-05, "loss": 0.8807, "step": 2865 }, { "epoch": 0.19418659800799512, "grad_norm": 8.120240211486816, "learning_rate": 9.710452460811828e-05, "loss": 0.9733, "step": 2866 }, { "epoch": 0.1942543532759672, "grad_norm": 11.363536834716797, "learning_rate": 9.710315558902046e-05, "loss": 1.1144, "step": 2867 }, { "epoch": 0.1943221085439393, "grad_norm": 8.776150703430176, "learning_rate": 9.710178656992266e-05, "loss": 0.7561, "step": 2868 }, { "epoch": 0.19438986381191137, "grad_norm": 9.393696784973145, "learning_rate": 9.710041755082484e-05, "loss": 0.8092, "step": 2869 }, { "epoch": 0.19445761907988346, "grad_norm": 10.820277214050293, "learning_rate": 9.709904853172702e-05, "loss": 0.9905, "step": 2870 }, { "epoch": 0.19452537434785555, "grad_norm": 8.041844367980957, "learning_rate": 9.70976795126292e-05, "loss": 0.8801, "step": 2871 }, { "epoch": 0.19459312961582764, "grad_norm": 8.05355167388916, "learning_rate": 9.709631049353138e-05, "loss": 0.9946, "step": 2872 }, { "epoch": 0.1946608848837997, "grad_norm": 9.46949577331543, "learning_rate": 9.709494147443357e-05, "loss": 1.0582, "step": 2873 }, { "epoch": 0.1947286401517718, "grad_norm": 8.796204566955566, "learning_rate": 9.709357245533575e-05, "loss": 1.1542, "step": 2874 }, { "epoch": 0.1947963954197439, "grad_norm": 10.45006275177002, "learning_rate": 9.709220343623793e-05, "loss": 1.2363, "step": 2875 }, { "epoch": 0.19486415068771598, "grad_norm": 10.490102767944336, "learning_rate": 9.709083441714011e-05, "loss": 1.1977, "step": 2876 }, { "epoch": 0.19493190595568805, "grad_norm": 9.048376083374023, "learning_rate": 9.708946539804231e-05, "loss": 0.8127, "step": 2877 }, { "epoch": 0.19499966122366014, "grad_norm": 6.523111343383789, "learning_rate": 9.708809637894449e-05, "loss": 0.8182, "step": 2878 }, { "epoch": 0.19506741649163223, "grad_norm": 7.170145511627197, "learning_rate": 9.708672735984667e-05, "loss": 0.9643, "step": 2879 }, { "epoch": 0.19513517175960432, "grad_norm": 7.9981818199157715, "learning_rate": 9.708535834074886e-05, "loss": 1.3179, "step": 2880 }, { "epoch": 0.19520292702757638, "grad_norm": 8.786405563354492, "learning_rate": 9.708398932165104e-05, "loss": 0.9217, "step": 2881 }, { "epoch": 0.19527068229554848, "grad_norm": 6.7907867431640625, "learning_rate": 9.708262030255322e-05, "loss": 1.0381, "step": 2882 }, { "epoch": 0.19533843756352057, "grad_norm": 7.1751556396484375, "learning_rate": 9.708125128345542e-05, "loss": 0.8334, "step": 2883 }, { "epoch": 0.19540619283149266, "grad_norm": 8.236610412597656, "learning_rate": 9.70798822643576e-05, "loss": 1.2058, "step": 2884 }, { "epoch": 0.19547394809946472, "grad_norm": 7.576273441314697, "learning_rate": 9.707851324525978e-05, "loss": 1.091, "step": 2885 }, { "epoch": 0.19554170336743681, "grad_norm": 8.698029518127441, "learning_rate": 9.707714422616197e-05, "loss": 1.0262, "step": 2886 }, { "epoch": 0.1956094586354089, "grad_norm": 8.583345413208008, "learning_rate": 9.707577520706415e-05, "loss": 1.0401, "step": 2887 }, { "epoch": 0.195677213903381, "grad_norm": 7.242405891418457, "learning_rate": 9.707440618796633e-05, "loss": 0.785, "step": 2888 }, { "epoch": 0.19574496917135306, "grad_norm": 8.4541654586792, "learning_rate": 9.707303716886851e-05, "loss": 1.0537, "step": 2889 }, { "epoch": 0.19581272443932515, "grad_norm": 7.838657855987549, "learning_rate": 9.707166814977069e-05, "loss": 1.0062, "step": 2890 }, { "epoch": 0.19588047970729724, "grad_norm": 6.812248229980469, "learning_rate": 9.707029913067289e-05, "loss": 0.885, "step": 2891 }, { "epoch": 0.19594823497526934, "grad_norm": 8.789966583251953, "learning_rate": 9.706893011157507e-05, "loss": 1.0366, "step": 2892 }, { "epoch": 0.1960159902432414, "grad_norm": 8.860052108764648, "learning_rate": 9.706756109247725e-05, "loss": 1.2247, "step": 2893 }, { "epoch": 0.1960837455112135, "grad_norm": 11.235320091247559, "learning_rate": 9.706619207337943e-05, "loss": 1.1216, "step": 2894 }, { "epoch": 0.19615150077918558, "grad_norm": 8.865259170532227, "learning_rate": 9.706482305428162e-05, "loss": 0.9467, "step": 2895 }, { "epoch": 0.19621925604715768, "grad_norm": 8.345112800598145, "learning_rate": 9.70634540351838e-05, "loss": 0.9849, "step": 2896 }, { "epoch": 0.19628701131512974, "grad_norm": 8.98128604888916, "learning_rate": 9.706208501608598e-05, "loss": 1.1421, "step": 2897 }, { "epoch": 0.19635476658310183, "grad_norm": 7.786384582519531, "learning_rate": 9.706071599698816e-05, "loss": 1.0926, "step": 2898 }, { "epoch": 0.19642252185107392, "grad_norm": 8.816730499267578, "learning_rate": 9.705934697789034e-05, "loss": 1.2626, "step": 2899 }, { "epoch": 0.19649027711904601, "grad_norm": 7.831095218658447, "learning_rate": 9.705797795879254e-05, "loss": 0.9355, "step": 2900 }, { "epoch": 0.19655803238701808, "grad_norm": 8.788371086120605, "learning_rate": 9.705660893969472e-05, "loss": 0.8883, "step": 2901 }, { "epoch": 0.19662578765499017, "grad_norm": 8.88425064086914, "learning_rate": 9.70552399205969e-05, "loss": 1.1169, "step": 2902 }, { "epoch": 0.19669354292296226, "grad_norm": 9.648268699645996, "learning_rate": 9.705387090149908e-05, "loss": 1.0628, "step": 2903 }, { "epoch": 0.19676129819093435, "grad_norm": 8.19676685333252, "learning_rate": 9.705250188240126e-05, "loss": 0.9395, "step": 2904 }, { "epoch": 0.19682905345890642, "grad_norm": 7.420725345611572, "learning_rate": 9.705113286330345e-05, "loss": 0.947, "step": 2905 }, { "epoch": 0.1968968087268785, "grad_norm": 7.221796989440918, "learning_rate": 9.704976384420563e-05, "loss": 0.8491, "step": 2906 }, { "epoch": 0.1969645639948506, "grad_norm": 9.932676315307617, "learning_rate": 9.704839482510781e-05, "loss": 1.117, "step": 2907 }, { "epoch": 0.1970323192628227, "grad_norm": 11.816266059875488, "learning_rate": 9.704702580600999e-05, "loss": 1.4347, "step": 2908 }, { "epoch": 0.19710007453079476, "grad_norm": 8.804407119750977, "learning_rate": 9.704565678691219e-05, "loss": 0.808, "step": 2909 }, { "epoch": 0.19716782979876685, "grad_norm": 7.740353584289551, "learning_rate": 9.704428776781437e-05, "loss": 1.0732, "step": 2910 }, { "epoch": 0.19723558506673894, "grad_norm": 12.040196418762207, "learning_rate": 9.704291874871655e-05, "loss": 0.947, "step": 2911 }, { "epoch": 0.19730334033471103, "grad_norm": 7.727171421051025, "learning_rate": 9.704154972961873e-05, "loss": 0.8211, "step": 2912 }, { "epoch": 0.19737109560268312, "grad_norm": 8.779428482055664, "learning_rate": 9.704018071052091e-05, "loss": 1.2204, "step": 2913 }, { "epoch": 0.19743885087065519, "grad_norm": 7.907576084136963, "learning_rate": 9.70388116914231e-05, "loss": 1.0594, "step": 2914 }, { "epoch": 0.19750660613862728, "grad_norm": 6.769292831420898, "learning_rate": 9.703744267232528e-05, "loss": 0.8743, "step": 2915 }, { "epoch": 0.19757436140659937, "grad_norm": 8.966355323791504, "learning_rate": 9.703607365322746e-05, "loss": 1.1607, "step": 2916 }, { "epoch": 0.19764211667457146, "grad_norm": 7.389810085296631, "learning_rate": 9.703470463412964e-05, "loss": 0.9728, "step": 2917 }, { "epoch": 0.19770987194254352, "grad_norm": 10.877386093139648, "learning_rate": 9.703333561503184e-05, "loss": 1.0623, "step": 2918 }, { "epoch": 0.19777762721051562, "grad_norm": 9.160116195678711, "learning_rate": 9.703196659593402e-05, "loss": 1.1374, "step": 2919 }, { "epoch": 0.1978453824784877, "grad_norm": 7.6289167404174805, "learning_rate": 9.70305975768362e-05, "loss": 0.9498, "step": 2920 }, { "epoch": 0.1979131377464598, "grad_norm": 8.663583755493164, "learning_rate": 9.702922855773838e-05, "loss": 1.1595, "step": 2921 }, { "epoch": 0.19798089301443186, "grad_norm": 7.595486640930176, "learning_rate": 9.702785953864056e-05, "loss": 1.0623, "step": 2922 }, { "epoch": 0.19804864828240396, "grad_norm": 8.680171966552734, "learning_rate": 9.702649051954275e-05, "loss": 1.0138, "step": 2923 }, { "epoch": 0.19811640355037605, "grad_norm": 9.998015403747559, "learning_rate": 9.702512150044493e-05, "loss": 1.2006, "step": 2924 }, { "epoch": 0.19818415881834814, "grad_norm": 7.801429271697998, "learning_rate": 9.702375248134711e-05, "loss": 0.8467, "step": 2925 }, { "epoch": 0.1982519140863202, "grad_norm": 8.570688247680664, "learning_rate": 9.70223834622493e-05, "loss": 1.1439, "step": 2926 }, { "epoch": 0.1983196693542923, "grad_norm": 7.835936069488525, "learning_rate": 9.702101444315149e-05, "loss": 0.9671, "step": 2927 }, { "epoch": 0.19838742462226439, "grad_norm": 8.415708541870117, "learning_rate": 9.701964542405367e-05, "loss": 1.0381, "step": 2928 }, { "epoch": 0.19845517989023648, "grad_norm": 10.370524406433105, "learning_rate": 9.701827640495586e-05, "loss": 1.151, "step": 2929 }, { "epoch": 0.19852293515820854, "grad_norm": 8.228797912597656, "learning_rate": 9.701690738585804e-05, "loss": 1.093, "step": 2930 }, { "epoch": 0.19859069042618063, "grad_norm": 8.070756912231445, "learning_rate": 9.701553836676022e-05, "loss": 1.0272, "step": 2931 }, { "epoch": 0.19865844569415272, "grad_norm": 9.19532585144043, "learning_rate": 9.701416934766242e-05, "loss": 1.1195, "step": 2932 }, { "epoch": 0.19872620096212482, "grad_norm": 10.692606925964355, "learning_rate": 9.70128003285646e-05, "loss": 1.3148, "step": 2933 }, { "epoch": 0.19879395623009688, "grad_norm": 10.058424949645996, "learning_rate": 9.701143130946678e-05, "loss": 1.4236, "step": 2934 }, { "epoch": 0.19886171149806897, "grad_norm": 6.197395324707031, "learning_rate": 9.701006229036896e-05, "loss": 0.8822, "step": 2935 }, { "epoch": 0.19892946676604106, "grad_norm": 7.603270053863525, "learning_rate": 9.700869327127114e-05, "loss": 0.9655, "step": 2936 }, { "epoch": 0.19899722203401315, "grad_norm": 8.192676544189453, "learning_rate": 9.700732425217333e-05, "loss": 0.9668, "step": 2937 }, { "epoch": 0.19906497730198522, "grad_norm": 7.121623992919922, "learning_rate": 9.700595523307551e-05, "loss": 1.0037, "step": 2938 }, { "epoch": 0.1991327325699573, "grad_norm": 6.8974127769470215, "learning_rate": 9.700458621397769e-05, "loss": 0.876, "step": 2939 }, { "epoch": 0.1992004878379294, "grad_norm": 7.590656757354736, "learning_rate": 9.700321719487987e-05, "loss": 1.0346, "step": 2940 }, { "epoch": 0.1992682431059015, "grad_norm": 8.530266761779785, "learning_rate": 9.700184817578207e-05, "loss": 1.0882, "step": 2941 }, { "epoch": 0.19933599837387356, "grad_norm": 8.064129829406738, "learning_rate": 9.700047915668425e-05, "loss": 0.9949, "step": 2942 }, { "epoch": 0.19940375364184565, "grad_norm": 7.23117208480835, "learning_rate": 9.699911013758643e-05, "loss": 0.7269, "step": 2943 }, { "epoch": 0.19947150890981774, "grad_norm": 7.326268196105957, "learning_rate": 9.69977411184886e-05, "loss": 0.8542, "step": 2944 }, { "epoch": 0.19953926417778983, "grad_norm": 7.708505153656006, "learning_rate": 9.699637209939079e-05, "loss": 0.8206, "step": 2945 }, { "epoch": 0.1996070194457619, "grad_norm": 10.134513854980469, "learning_rate": 9.699500308029298e-05, "loss": 1.0079, "step": 2946 }, { "epoch": 0.199674774713734, "grad_norm": 9.264663696289062, "learning_rate": 9.699363406119516e-05, "loss": 1.1702, "step": 2947 }, { "epoch": 0.19974252998170608, "grad_norm": 8.894827842712402, "learning_rate": 9.699226504209734e-05, "loss": 0.9184, "step": 2948 }, { "epoch": 0.19981028524967817, "grad_norm": 6.935434341430664, "learning_rate": 9.699089602299952e-05, "loss": 0.8554, "step": 2949 }, { "epoch": 0.19987804051765024, "grad_norm": 9.57607364654541, "learning_rate": 9.698952700390172e-05, "loss": 1.0937, "step": 2950 }, { "epoch": 0.19994579578562233, "grad_norm": 7.99752140045166, "learning_rate": 9.69881579848039e-05, "loss": 1.0797, "step": 2951 }, { "epoch": 0.20001355105359442, "grad_norm": 8.067659378051758, "learning_rate": 9.698678896570608e-05, "loss": 1.0515, "step": 2952 }, { "epoch": 0.2000813063215665, "grad_norm": 9.66697883605957, "learning_rate": 9.698541994660826e-05, "loss": 1.289, "step": 2953 }, { "epoch": 0.20014906158953857, "grad_norm": 7.3660664558410645, "learning_rate": 9.698405092751044e-05, "loss": 0.7409, "step": 2954 }, { "epoch": 0.20021681685751067, "grad_norm": 8.254073143005371, "learning_rate": 9.698268190841263e-05, "loss": 1.1095, "step": 2955 }, { "epoch": 0.20028457212548276, "grad_norm": 8.221102714538574, "learning_rate": 9.698131288931481e-05, "loss": 1.0083, "step": 2956 }, { "epoch": 0.20035232739345485, "grad_norm": 9.813411712646484, "learning_rate": 9.697994387021699e-05, "loss": 1.2888, "step": 2957 }, { "epoch": 0.2004200826614269, "grad_norm": 10.87628173828125, "learning_rate": 9.697857485111917e-05, "loss": 1.16, "step": 2958 }, { "epoch": 0.200487837929399, "grad_norm": 7.094732284545898, "learning_rate": 9.697720583202135e-05, "loss": 0.8959, "step": 2959 }, { "epoch": 0.2005555931973711, "grad_norm": 9.814677238464355, "learning_rate": 9.697583681292355e-05, "loss": 1.1011, "step": 2960 }, { "epoch": 0.2006233484653432, "grad_norm": 8.982966423034668, "learning_rate": 9.697446779382573e-05, "loss": 1.001, "step": 2961 }, { "epoch": 0.20069110373331525, "grad_norm": 8.49453067779541, "learning_rate": 9.697309877472791e-05, "loss": 0.9848, "step": 2962 }, { "epoch": 0.20075885900128734, "grad_norm": 7.239814758300781, "learning_rate": 9.697172975563009e-05, "loss": 0.8252, "step": 2963 }, { "epoch": 0.20082661426925943, "grad_norm": 10.874746322631836, "learning_rate": 9.697036073653228e-05, "loss": 1.0683, "step": 2964 }, { "epoch": 0.20089436953723153, "grad_norm": 7.679197311401367, "learning_rate": 9.696899171743446e-05, "loss": 1.2797, "step": 2965 }, { "epoch": 0.20096212480520362, "grad_norm": 10.089177131652832, "learning_rate": 9.696762269833664e-05, "loss": 1.1905, "step": 2966 }, { "epoch": 0.20102988007317568, "grad_norm": 8.182350158691406, "learning_rate": 9.696625367923882e-05, "loss": 0.9917, "step": 2967 }, { "epoch": 0.20109763534114777, "grad_norm": 7.8756256103515625, "learning_rate": 9.6964884660141e-05, "loss": 1.0368, "step": 2968 }, { "epoch": 0.20116539060911987, "grad_norm": 9.193910598754883, "learning_rate": 9.69635156410432e-05, "loss": 1.094, "step": 2969 }, { "epoch": 0.20123314587709196, "grad_norm": 8.854869842529297, "learning_rate": 9.696214662194538e-05, "loss": 1.1344, "step": 2970 }, { "epoch": 0.20130090114506402, "grad_norm": 10.419108390808105, "learning_rate": 9.696077760284756e-05, "loss": 0.9937, "step": 2971 }, { "epoch": 0.2013686564130361, "grad_norm": 9.329347610473633, "learning_rate": 9.695940858374975e-05, "loss": 0.9454, "step": 2972 }, { "epoch": 0.2014364116810082, "grad_norm": 8.23154067993164, "learning_rate": 9.695803956465193e-05, "loss": 0.8691, "step": 2973 }, { "epoch": 0.2015041669489803, "grad_norm": 8.016939163208008, "learning_rate": 9.695667054555411e-05, "loss": 0.9182, "step": 2974 }, { "epoch": 0.20157192221695236, "grad_norm": 9.717400550842285, "learning_rate": 9.69553015264563e-05, "loss": 1.1251, "step": 2975 }, { "epoch": 0.20163967748492445, "grad_norm": 10.470111846923828, "learning_rate": 9.695393250735849e-05, "loss": 1.1634, "step": 2976 }, { "epoch": 0.20170743275289654, "grad_norm": 8.540326118469238, "learning_rate": 9.695256348826067e-05, "loss": 0.9229, "step": 2977 }, { "epoch": 0.20177518802086863, "grad_norm": 6.997597694396973, "learning_rate": 9.695119446916286e-05, "loss": 0.9911, "step": 2978 }, { "epoch": 0.2018429432888407, "grad_norm": 7.23951530456543, "learning_rate": 9.694982545006504e-05, "loss": 0.8372, "step": 2979 }, { "epoch": 0.2019106985568128, "grad_norm": 9.217951774597168, "learning_rate": 9.694845643096722e-05, "loss": 0.8154, "step": 2980 }, { "epoch": 0.20197845382478488, "grad_norm": 8.128033638000488, "learning_rate": 9.69470874118694e-05, "loss": 0.9494, "step": 2981 }, { "epoch": 0.20204620909275697, "grad_norm": 8.328935623168945, "learning_rate": 9.694571839277158e-05, "loss": 1.0537, "step": 2982 }, { "epoch": 0.20211396436072904, "grad_norm": 8.877389907836914, "learning_rate": 9.694434937367378e-05, "loss": 0.8683, "step": 2983 }, { "epoch": 0.20218171962870113, "grad_norm": 7.285436630249023, "learning_rate": 9.694298035457596e-05, "loss": 0.7514, "step": 2984 }, { "epoch": 0.20224947489667322, "grad_norm": 9.209798812866211, "learning_rate": 9.694161133547814e-05, "loss": 1.0771, "step": 2985 }, { "epoch": 0.2023172301646453, "grad_norm": 8.625777244567871, "learning_rate": 9.694024231638032e-05, "loss": 1.0085, "step": 2986 }, { "epoch": 0.20238498543261738, "grad_norm": 8.50123405456543, "learning_rate": 9.693887329728251e-05, "loss": 0.969, "step": 2987 }, { "epoch": 0.20245274070058947, "grad_norm": 7.314642429351807, "learning_rate": 9.693750427818469e-05, "loss": 0.8075, "step": 2988 }, { "epoch": 0.20252049596856156, "grad_norm": 9.474241256713867, "learning_rate": 9.693613525908687e-05, "loss": 1.0989, "step": 2989 }, { "epoch": 0.20258825123653365, "grad_norm": 7.3510637283325195, "learning_rate": 9.693476623998905e-05, "loss": 0.8044, "step": 2990 }, { "epoch": 0.20265600650450571, "grad_norm": 7.203106880187988, "learning_rate": 9.693339722089123e-05, "loss": 1.0359, "step": 2991 }, { "epoch": 0.2027237617724778, "grad_norm": 6.71024227142334, "learning_rate": 9.693202820179343e-05, "loss": 0.8687, "step": 2992 }, { "epoch": 0.2027915170404499, "grad_norm": 8.327759742736816, "learning_rate": 9.69306591826956e-05, "loss": 1.0042, "step": 2993 }, { "epoch": 0.202859272308422, "grad_norm": 8.682476997375488, "learning_rate": 9.692929016359779e-05, "loss": 1.0273, "step": 2994 }, { "epoch": 0.20292702757639405, "grad_norm": 8.555792808532715, "learning_rate": 9.692792114449997e-05, "loss": 0.8712, "step": 2995 }, { "epoch": 0.20299478284436615, "grad_norm": 11.036639213562012, "learning_rate": 9.692655212540216e-05, "loss": 1.1452, "step": 2996 }, { "epoch": 0.20306253811233824, "grad_norm": 10.207952499389648, "learning_rate": 9.692518310630434e-05, "loss": 1.2267, "step": 2997 }, { "epoch": 0.20313029338031033, "grad_norm": 8.715107917785645, "learning_rate": 9.692381408720652e-05, "loss": 1.151, "step": 2998 }, { "epoch": 0.2031980486482824, "grad_norm": 8.72461986541748, "learning_rate": 9.69224450681087e-05, "loss": 0.9787, "step": 2999 }, { "epoch": 0.20326580391625448, "grad_norm": 8.717243194580078, "learning_rate": 9.692107604901088e-05, "loss": 1.0734, "step": 3000 }, { "epoch": 0.20333355918422658, "grad_norm": 7.039597511291504, "learning_rate": 9.691970702991308e-05, "loss": 0.9215, "step": 3001 }, { "epoch": 0.20340131445219867, "grad_norm": 10.568238258361816, "learning_rate": 9.691833801081526e-05, "loss": 1.2248, "step": 3002 }, { "epoch": 0.20346906972017073, "grad_norm": 9.515549659729004, "learning_rate": 9.691696899171744e-05, "loss": 1.1401, "step": 3003 }, { "epoch": 0.20353682498814282, "grad_norm": 9.650483131408691, "learning_rate": 9.691559997261962e-05, "loss": 0.9839, "step": 3004 }, { "epoch": 0.20360458025611491, "grad_norm": 6.817119598388672, "learning_rate": 9.691423095352181e-05, "loss": 0.8542, "step": 3005 }, { "epoch": 0.203672335524087, "grad_norm": 7.616591930389404, "learning_rate": 9.691286193442399e-05, "loss": 0.9647, "step": 3006 }, { "epoch": 0.20374009079205907, "grad_norm": 7.2600274085998535, "learning_rate": 9.691149291532617e-05, "loss": 0.962, "step": 3007 }, { "epoch": 0.20380784606003116, "grad_norm": 9.714008331298828, "learning_rate": 9.691012389622835e-05, "loss": 1.1567, "step": 3008 }, { "epoch": 0.20387560132800325, "grad_norm": 9.095394134521484, "learning_rate": 9.690875487713053e-05, "loss": 1.1315, "step": 3009 }, { "epoch": 0.20394335659597534, "grad_norm": 9.683954238891602, "learning_rate": 9.690738585803273e-05, "loss": 1.4039, "step": 3010 }, { "epoch": 0.2040111118639474, "grad_norm": 8.829015731811523, "learning_rate": 9.69060168389349e-05, "loss": 0.9176, "step": 3011 }, { "epoch": 0.2040788671319195, "grad_norm": 7.835269927978516, "learning_rate": 9.690464781983709e-05, "loss": 1.0077, "step": 3012 }, { "epoch": 0.2041466223998916, "grad_norm": 9.94642448425293, "learning_rate": 9.690327880073927e-05, "loss": 1.0815, "step": 3013 }, { "epoch": 0.20421437766786368, "grad_norm": 8.184757232666016, "learning_rate": 9.690190978164145e-05, "loss": 0.84, "step": 3014 }, { "epoch": 0.20428213293583575, "grad_norm": 9.060220718383789, "learning_rate": 9.690054076254364e-05, "loss": 0.907, "step": 3015 }, { "epoch": 0.20434988820380784, "grad_norm": 6.848534107208252, "learning_rate": 9.689917174344582e-05, "loss": 0.7549, "step": 3016 }, { "epoch": 0.20441764347177993, "grad_norm": 7.820966720581055, "learning_rate": 9.6897802724348e-05, "loss": 0.9899, "step": 3017 }, { "epoch": 0.20448539873975202, "grad_norm": 10.148963928222656, "learning_rate": 9.68964337052502e-05, "loss": 1.2403, "step": 3018 }, { "epoch": 0.20455315400772411, "grad_norm": 8.273184776306152, "learning_rate": 9.689506468615238e-05, "loss": 1.0002, "step": 3019 }, { "epoch": 0.20462090927569618, "grad_norm": 7.618801593780518, "learning_rate": 9.689369566705456e-05, "loss": 0.8864, "step": 3020 }, { "epoch": 0.20468866454366827, "grad_norm": 7.952611446380615, "learning_rate": 9.689232664795675e-05, "loss": 0.8002, "step": 3021 }, { "epoch": 0.20475641981164036, "grad_norm": 9.938977241516113, "learning_rate": 9.689095762885893e-05, "loss": 0.8956, "step": 3022 }, { "epoch": 0.20482417507961245, "grad_norm": 7.807236194610596, "learning_rate": 9.688958860976111e-05, "loss": 1.1777, "step": 3023 }, { "epoch": 0.20489193034758452, "grad_norm": 7.7249369621276855, "learning_rate": 9.68882195906633e-05, "loss": 0.8881, "step": 3024 }, { "epoch": 0.2049596856155566, "grad_norm": 7.747461795806885, "learning_rate": 9.688685057156549e-05, "loss": 0.9844, "step": 3025 }, { "epoch": 0.2050274408835287, "grad_norm": 10.017412185668945, "learning_rate": 9.688548155246767e-05, "loss": 1.0815, "step": 3026 }, { "epoch": 0.2050951961515008, "grad_norm": 6.54990816116333, "learning_rate": 9.688411253336985e-05, "loss": 0.9319, "step": 3027 }, { "epoch": 0.20516295141947286, "grad_norm": 7.358734130859375, "learning_rate": 9.688274351427204e-05, "loss": 0.9313, "step": 3028 }, { "epoch": 0.20523070668744495, "grad_norm": 7.608468055725098, "learning_rate": 9.688137449517422e-05, "loss": 1.0071, "step": 3029 }, { "epoch": 0.20529846195541704, "grad_norm": 7.013155937194824, "learning_rate": 9.68800054760764e-05, "loss": 0.9419, "step": 3030 }, { "epoch": 0.20536621722338913, "grad_norm": 9.907796859741211, "learning_rate": 9.687863645697858e-05, "loss": 1.2082, "step": 3031 }, { "epoch": 0.2054339724913612, "grad_norm": 9.059138298034668, "learning_rate": 9.687726743788076e-05, "loss": 1.0712, "step": 3032 }, { "epoch": 0.20550172775933329, "grad_norm": 10.519928932189941, "learning_rate": 9.687589841878296e-05, "loss": 1.1627, "step": 3033 }, { "epoch": 0.20556948302730538, "grad_norm": 8.381184577941895, "learning_rate": 9.687452939968514e-05, "loss": 1.087, "step": 3034 }, { "epoch": 0.20563723829527747, "grad_norm": 6.514460563659668, "learning_rate": 9.687316038058732e-05, "loss": 0.8657, "step": 3035 }, { "epoch": 0.20570499356324953, "grad_norm": 10.034708023071289, "learning_rate": 9.68717913614895e-05, "loss": 1.1529, "step": 3036 }, { "epoch": 0.20577274883122162, "grad_norm": 7.202263355255127, "learning_rate": 9.687042234239168e-05, "loss": 0.7923, "step": 3037 }, { "epoch": 0.20584050409919372, "grad_norm": 9.7435302734375, "learning_rate": 9.686905332329387e-05, "loss": 1.5071, "step": 3038 }, { "epoch": 0.2059082593671658, "grad_norm": 9.57016658782959, "learning_rate": 9.686768430419605e-05, "loss": 0.883, "step": 3039 }, { "epoch": 0.20597601463513787, "grad_norm": 7.3575029373168945, "learning_rate": 9.686631528509823e-05, "loss": 0.9999, "step": 3040 }, { "epoch": 0.20604376990310996, "grad_norm": 9.0224027633667, "learning_rate": 9.686494626600041e-05, "loss": 1.0121, "step": 3041 }, { "epoch": 0.20611152517108206, "grad_norm": 11.173224449157715, "learning_rate": 9.68635772469026e-05, "loss": 1.0556, "step": 3042 }, { "epoch": 0.20617928043905415, "grad_norm": 8.858287811279297, "learning_rate": 9.686220822780479e-05, "loss": 0.9377, "step": 3043 }, { "epoch": 0.2062470357070262, "grad_norm": 7.3096795082092285, "learning_rate": 9.686083920870697e-05, "loss": 0.8808, "step": 3044 }, { "epoch": 0.2063147909749983, "grad_norm": 8.700214385986328, "learning_rate": 9.685947018960915e-05, "loss": 1.2121, "step": 3045 }, { "epoch": 0.2063825462429704, "grad_norm": 10.57944107055664, "learning_rate": 9.685810117051133e-05, "loss": 1.0931, "step": 3046 }, { "epoch": 0.20645030151094249, "grad_norm": 8.765487670898438, "learning_rate": 9.685673215141352e-05, "loss": 1.0522, "step": 3047 }, { "epoch": 0.20651805677891455, "grad_norm": 7.717139720916748, "learning_rate": 9.68553631323157e-05, "loss": 0.9492, "step": 3048 }, { "epoch": 0.20658581204688664, "grad_norm": 9.301026344299316, "learning_rate": 9.685399411321788e-05, "loss": 1.0191, "step": 3049 }, { "epoch": 0.20665356731485873, "grad_norm": 10.251668930053711, "learning_rate": 9.685262509412006e-05, "loss": 0.991, "step": 3050 }, { "epoch": 0.20672132258283082, "grad_norm": 11.597551345825195, "learning_rate": 9.685125607502226e-05, "loss": 1.0169, "step": 3051 }, { "epoch": 0.2067890778508029, "grad_norm": 10.293901443481445, "learning_rate": 9.684988705592444e-05, "loss": 1.0623, "step": 3052 }, { "epoch": 0.20685683311877498, "grad_norm": 8.496854782104492, "learning_rate": 9.684851803682662e-05, "loss": 1.0466, "step": 3053 }, { "epoch": 0.20692458838674707, "grad_norm": 6.76383638381958, "learning_rate": 9.68471490177288e-05, "loss": 0.8155, "step": 3054 }, { "epoch": 0.20699234365471916, "grad_norm": 8.168519020080566, "learning_rate": 9.684577999863098e-05, "loss": 0.9663, "step": 3055 }, { "epoch": 0.20706009892269123, "grad_norm": 8.0086030960083, "learning_rate": 9.684441097953317e-05, "loss": 0.889, "step": 3056 }, { "epoch": 0.20712785419066332, "grad_norm": 8.347359657287598, "learning_rate": 9.684304196043535e-05, "loss": 0.938, "step": 3057 }, { "epoch": 0.2071956094586354, "grad_norm": 8.0283203125, "learning_rate": 9.684167294133753e-05, "loss": 0.7633, "step": 3058 }, { "epoch": 0.2072633647266075, "grad_norm": 8.509317398071289, "learning_rate": 9.684030392223971e-05, "loss": 1.1052, "step": 3059 }, { "epoch": 0.20733111999457957, "grad_norm": 9.441505432128906, "learning_rate": 9.683893490314189e-05, "loss": 1.1177, "step": 3060 }, { "epoch": 0.20739887526255166, "grad_norm": 8.131098747253418, "learning_rate": 9.683756588404409e-05, "loss": 0.9272, "step": 3061 }, { "epoch": 0.20746663053052375, "grad_norm": 8.423643112182617, "learning_rate": 9.683619686494627e-05, "loss": 0.875, "step": 3062 }, { "epoch": 0.20753438579849584, "grad_norm": 7.513223171234131, "learning_rate": 9.683482784584845e-05, "loss": 1.0659, "step": 3063 }, { "epoch": 0.2076021410664679, "grad_norm": 7.5881171226501465, "learning_rate": 9.683345882675063e-05, "loss": 0.9466, "step": 3064 }, { "epoch": 0.20766989633444, "grad_norm": 8.717775344848633, "learning_rate": 9.683208980765282e-05, "loss": 1.1019, "step": 3065 }, { "epoch": 0.2077376516024121, "grad_norm": 11.917694091796875, "learning_rate": 9.6830720788555e-05, "loss": 1.1085, "step": 3066 }, { "epoch": 0.20780540687038418, "grad_norm": 9.28741455078125, "learning_rate": 9.682935176945718e-05, "loss": 1.1458, "step": 3067 }, { "epoch": 0.20787316213835624, "grad_norm": 6.746860980987549, "learning_rate": 9.682798275035938e-05, "loss": 0.929, "step": 3068 }, { "epoch": 0.20794091740632833, "grad_norm": 8.48763370513916, "learning_rate": 9.682661373126156e-05, "loss": 1.0702, "step": 3069 }, { "epoch": 0.20800867267430043, "grad_norm": 8.871308326721191, "learning_rate": 9.682524471216374e-05, "loss": 0.9199, "step": 3070 }, { "epoch": 0.20807642794227252, "grad_norm": 8.275801658630371, "learning_rate": 9.682387569306593e-05, "loss": 1.0756, "step": 3071 }, { "epoch": 0.2081441832102446, "grad_norm": 8.985222816467285, "learning_rate": 9.682250667396811e-05, "loss": 1.2364, "step": 3072 }, { "epoch": 0.20821193847821667, "grad_norm": 7.749682426452637, "learning_rate": 9.682113765487029e-05, "loss": 0.9444, "step": 3073 }, { "epoch": 0.20827969374618877, "grad_norm": 9.111614227294922, "learning_rate": 9.681976863577248e-05, "loss": 1.338, "step": 3074 }, { "epoch": 0.20834744901416086, "grad_norm": 7.2874674797058105, "learning_rate": 9.681839961667466e-05, "loss": 0.838, "step": 3075 }, { "epoch": 0.20841520428213295, "grad_norm": 6.873099327087402, "learning_rate": 9.681703059757685e-05, "loss": 0.9194, "step": 3076 }, { "epoch": 0.208482959550105, "grad_norm": 7.564418792724609, "learning_rate": 9.681566157847903e-05, "loss": 1.1209, "step": 3077 }, { "epoch": 0.2085507148180771, "grad_norm": 9.088560104370117, "learning_rate": 9.68142925593812e-05, "loss": 0.8425, "step": 3078 }, { "epoch": 0.2086184700860492, "grad_norm": 7.182369709014893, "learning_rate": 9.68129235402834e-05, "loss": 1.0938, "step": 3079 }, { "epoch": 0.2086862253540213, "grad_norm": 8.853677749633789, "learning_rate": 9.681155452118558e-05, "loss": 1.2611, "step": 3080 }, { "epoch": 0.20875398062199335, "grad_norm": 8.56440258026123, "learning_rate": 9.681018550208776e-05, "loss": 1.1112, "step": 3081 }, { "epoch": 0.20882173588996544, "grad_norm": 8.356021881103516, "learning_rate": 9.680881648298994e-05, "loss": 1.0044, "step": 3082 }, { "epoch": 0.20888949115793753, "grad_norm": 9.083736419677734, "learning_rate": 9.680744746389213e-05, "loss": 1.2065, "step": 3083 }, { "epoch": 0.20895724642590963, "grad_norm": 7.990222454071045, "learning_rate": 9.680607844479432e-05, "loss": 1.0179, "step": 3084 }, { "epoch": 0.2090250016938817, "grad_norm": 8.381364822387695, "learning_rate": 9.68047094256965e-05, "loss": 0.8392, "step": 3085 }, { "epoch": 0.20909275696185378, "grad_norm": 9.017950057983398, "learning_rate": 9.680334040659868e-05, "loss": 0.9757, "step": 3086 }, { "epoch": 0.20916051222982587, "grad_norm": 8.525566101074219, "learning_rate": 9.680197138750086e-05, "loss": 1.1521, "step": 3087 }, { "epoch": 0.20922826749779797, "grad_norm": 8.432148933410645, "learning_rate": 9.680060236840305e-05, "loss": 0.8559, "step": 3088 }, { "epoch": 0.20929602276577003, "grad_norm": 9.985367774963379, "learning_rate": 9.679923334930523e-05, "loss": 1.2035, "step": 3089 }, { "epoch": 0.20936377803374212, "grad_norm": 9.771974563598633, "learning_rate": 9.679786433020741e-05, "loss": 1.1614, "step": 3090 }, { "epoch": 0.2094315333017142, "grad_norm": 10.646146774291992, "learning_rate": 9.679649531110959e-05, "loss": 1.2165, "step": 3091 }, { "epoch": 0.2094992885696863, "grad_norm": 8.93340015411377, "learning_rate": 9.679512629201177e-05, "loss": 0.9883, "step": 3092 }, { "epoch": 0.20956704383765837, "grad_norm": 8.498669624328613, "learning_rate": 9.679375727291397e-05, "loss": 1.0318, "step": 3093 }, { "epoch": 0.20963479910563046, "grad_norm": 7.536258220672607, "learning_rate": 9.679238825381615e-05, "loss": 0.9949, "step": 3094 }, { "epoch": 0.20970255437360255, "grad_norm": 8.314896583557129, "learning_rate": 9.679101923471833e-05, "loss": 1.0502, "step": 3095 }, { "epoch": 0.20977030964157464, "grad_norm": 7.470542907714844, "learning_rate": 9.67896502156205e-05, "loss": 0.8871, "step": 3096 }, { "epoch": 0.2098380649095467, "grad_norm": 8.951095581054688, "learning_rate": 9.67882811965227e-05, "loss": 0.9793, "step": 3097 }, { "epoch": 0.2099058201775188, "grad_norm": 7.879035472869873, "learning_rate": 9.678691217742488e-05, "loss": 1.0193, "step": 3098 }, { "epoch": 0.2099735754454909, "grad_norm": 8.890814781188965, "learning_rate": 9.678554315832706e-05, "loss": 0.9738, "step": 3099 }, { "epoch": 0.21004133071346298, "grad_norm": 8.863816261291504, "learning_rate": 9.678417413922924e-05, "loss": 1.1213, "step": 3100 }, { "epoch": 0.21010908598143505, "grad_norm": 9.59538745880127, "learning_rate": 9.678280512013142e-05, "loss": 1.0215, "step": 3101 }, { "epoch": 0.21017684124940714, "grad_norm": 8.811614990234375, "learning_rate": 9.678143610103362e-05, "loss": 0.8703, "step": 3102 }, { "epoch": 0.21024459651737923, "grad_norm": 7.274720191955566, "learning_rate": 9.67800670819358e-05, "loss": 0.898, "step": 3103 }, { "epoch": 0.21031235178535132, "grad_norm": 11.239364624023438, "learning_rate": 9.677869806283798e-05, "loss": 0.9823, "step": 3104 }, { "epoch": 0.21038010705332338, "grad_norm": 8.807086944580078, "learning_rate": 9.677732904374016e-05, "loss": 1.0074, "step": 3105 }, { "epoch": 0.21044786232129548, "grad_norm": 9.065536499023438, "learning_rate": 9.677596002464235e-05, "loss": 1.1861, "step": 3106 }, { "epoch": 0.21051561758926757, "grad_norm": 11.035104751586914, "learning_rate": 9.677459100554453e-05, "loss": 1.0067, "step": 3107 }, { "epoch": 0.21058337285723966, "grad_norm": 8.010696411132812, "learning_rate": 9.677322198644671e-05, "loss": 1.0855, "step": 3108 }, { "epoch": 0.21065112812521172, "grad_norm": 9.104195594787598, "learning_rate": 9.677185296734889e-05, "loss": 1.0497, "step": 3109 }, { "epoch": 0.21071888339318381, "grad_norm": 8.731512069702148, "learning_rate": 9.677048394825107e-05, "loss": 1.1108, "step": 3110 }, { "epoch": 0.2107866386611559, "grad_norm": 8.823514938354492, "learning_rate": 9.676911492915327e-05, "loss": 1.0271, "step": 3111 }, { "epoch": 0.210854393929128, "grad_norm": 7.446425914764404, "learning_rate": 9.676774591005545e-05, "loss": 0.9182, "step": 3112 }, { "epoch": 0.21092214919710006, "grad_norm": 10.75915241241455, "learning_rate": 9.676637689095763e-05, "loss": 0.93, "step": 3113 }, { "epoch": 0.21098990446507215, "grad_norm": 10.065240859985352, "learning_rate": 9.676500787185982e-05, "loss": 1.0508, "step": 3114 }, { "epoch": 0.21105765973304424, "grad_norm": 8.790117263793945, "learning_rate": 9.6763638852762e-05, "loss": 0.8971, "step": 3115 }, { "epoch": 0.21112541500101634, "grad_norm": 8.286596298217773, "learning_rate": 9.676226983366418e-05, "loss": 0.9403, "step": 3116 }, { "epoch": 0.2111931702689884, "grad_norm": 8.542399406433105, "learning_rate": 9.676090081456637e-05, "loss": 1.1638, "step": 3117 }, { "epoch": 0.2112609255369605, "grad_norm": 9.461727142333984, "learning_rate": 9.675953179546856e-05, "loss": 1.0859, "step": 3118 }, { "epoch": 0.21132868080493258, "grad_norm": 7.0917229652404785, "learning_rate": 9.675816277637074e-05, "loss": 0.9931, "step": 3119 }, { "epoch": 0.21139643607290468, "grad_norm": 9.192744255065918, "learning_rate": 9.675679375727293e-05, "loss": 1.2138, "step": 3120 }, { "epoch": 0.21146419134087674, "grad_norm": 7.744256973266602, "learning_rate": 9.675542473817511e-05, "loss": 1.3615, "step": 3121 }, { "epoch": 0.21153194660884883, "grad_norm": 6.915426254272461, "learning_rate": 9.675405571907729e-05, "loss": 0.89, "step": 3122 }, { "epoch": 0.21159970187682092, "grad_norm": 9.888227462768555, "learning_rate": 9.675268669997947e-05, "loss": 0.9569, "step": 3123 }, { "epoch": 0.21166745714479301, "grad_norm": 7.541590690612793, "learning_rate": 9.675131768088165e-05, "loss": 0.9638, "step": 3124 }, { "epoch": 0.2117352124127651, "grad_norm": 7.883132457733154, "learning_rate": 9.674994866178384e-05, "loss": 0.8901, "step": 3125 }, { "epoch": 0.21180296768073717, "grad_norm": 8.493675231933594, "learning_rate": 9.674857964268602e-05, "loss": 0.8894, "step": 3126 }, { "epoch": 0.21187072294870926, "grad_norm": 7.348284721374512, "learning_rate": 9.67472106235882e-05, "loss": 0.8721, "step": 3127 }, { "epoch": 0.21193847821668135, "grad_norm": 9.094710350036621, "learning_rate": 9.674584160449039e-05, "loss": 0.9255, "step": 3128 }, { "epoch": 0.21200623348465344, "grad_norm": 7.316446304321289, "learning_rate": 9.674447258539258e-05, "loss": 1.2577, "step": 3129 }, { "epoch": 0.2120739887526255, "grad_norm": 8.105271339416504, "learning_rate": 9.674310356629476e-05, "loss": 0.8157, "step": 3130 }, { "epoch": 0.2121417440205976, "grad_norm": 8.433457374572754, "learning_rate": 9.674173454719694e-05, "loss": 1.0398, "step": 3131 }, { "epoch": 0.2122094992885697, "grad_norm": 7.787237644195557, "learning_rate": 9.674036552809912e-05, "loss": 0.7095, "step": 3132 }, { "epoch": 0.21227725455654178, "grad_norm": 10.60180950164795, "learning_rate": 9.67389965090013e-05, "loss": 1.0482, "step": 3133 }, { "epoch": 0.21234500982451385, "grad_norm": 8.428773880004883, "learning_rate": 9.67376274899035e-05, "loss": 0.8677, "step": 3134 }, { "epoch": 0.21241276509248594, "grad_norm": 8.204195022583008, "learning_rate": 9.673625847080568e-05, "loss": 1.14, "step": 3135 }, { "epoch": 0.21248052036045803, "grad_norm": 6.449087619781494, "learning_rate": 9.673488945170786e-05, "loss": 1.0181, "step": 3136 }, { "epoch": 0.21254827562843012, "grad_norm": 9.588041305541992, "learning_rate": 9.673352043261004e-05, "loss": 0.9996, "step": 3137 }, { "epoch": 0.21261603089640219, "grad_norm": 9.626228332519531, "learning_rate": 9.673215141351223e-05, "loss": 1.0501, "step": 3138 }, { "epoch": 0.21268378616437428, "grad_norm": 9.036309242248535, "learning_rate": 9.673078239441441e-05, "loss": 1.093, "step": 3139 }, { "epoch": 0.21275154143234637, "grad_norm": 9.415257453918457, "learning_rate": 9.672941337531659e-05, "loss": 1.0983, "step": 3140 }, { "epoch": 0.21281929670031846, "grad_norm": 8.309000015258789, "learning_rate": 9.672804435621877e-05, "loss": 1.0803, "step": 3141 }, { "epoch": 0.21288705196829052, "grad_norm": 7.420774459838867, "learning_rate": 9.672667533712095e-05, "loss": 1.1027, "step": 3142 }, { "epoch": 0.21295480723626262, "grad_norm": 9.291664123535156, "learning_rate": 9.672530631802314e-05, "loss": 1.1617, "step": 3143 }, { "epoch": 0.2130225625042347, "grad_norm": 7.656317234039307, "learning_rate": 9.672393729892533e-05, "loss": 0.9423, "step": 3144 }, { "epoch": 0.2130903177722068, "grad_norm": 5.812994003295898, "learning_rate": 9.67225682798275e-05, "loss": 0.9187, "step": 3145 }, { "epoch": 0.21315807304017886, "grad_norm": 9.492706298828125, "learning_rate": 9.672119926072969e-05, "loss": 1.1804, "step": 3146 }, { "epoch": 0.21322582830815096, "grad_norm": 9.37720012664795, "learning_rate": 9.671983024163187e-05, "loss": 0.8885, "step": 3147 }, { "epoch": 0.21329358357612305, "grad_norm": 9.183650016784668, "learning_rate": 9.671846122253406e-05, "loss": 0.8967, "step": 3148 }, { "epoch": 0.21336133884409514, "grad_norm": 8.037003517150879, "learning_rate": 9.671709220343624e-05, "loss": 1.2605, "step": 3149 }, { "epoch": 0.2134290941120672, "grad_norm": 8.694345474243164, "learning_rate": 9.671572318433842e-05, "loss": 1.003, "step": 3150 }, { "epoch": 0.2134968493800393, "grad_norm": 6.429176330566406, "learning_rate": 9.67143541652406e-05, "loss": 1.1018, "step": 3151 }, { "epoch": 0.21356460464801139, "grad_norm": 7.964774131774902, "learning_rate": 9.67129851461428e-05, "loss": 0.9121, "step": 3152 }, { "epoch": 0.21363235991598348, "grad_norm": 7.5509033203125, "learning_rate": 9.671161612704498e-05, "loss": 0.9577, "step": 3153 }, { "epoch": 0.21370011518395554, "grad_norm": 7.968616962432861, "learning_rate": 9.671024710794716e-05, "loss": 1.1337, "step": 3154 }, { "epoch": 0.21376787045192763, "grad_norm": 7.340758800506592, "learning_rate": 9.670887808884934e-05, "loss": 0.8068, "step": 3155 }, { "epoch": 0.21383562571989972, "grad_norm": 7.890449523925781, "learning_rate": 9.670750906975152e-05, "loss": 1.0263, "step": 3156 }, { "epoch": 0.21390338098787182, "grad_norm": 10.908242225646973, "learning_rate": 9.670614005065371e-05, "loss": 1.0384, "step": 3157 }, { "epoch": 0.21397113625584388, "grad_norm": 7.62697696685791, "learning_rate": 9.670477103155589e-05, "loss": 0.918, "step": 3158 }, { "epoch": 0.21403889152381597, "grad_norm": 10.487192153930664, "learning_rate": 9.670340201245807e-05, "loss": 1.1135, "step": 3159 }, { "epoch": 0.21410664679178806, "grad_norm": 7.076718807220459, "learning_rate": 9.670203299336026e-05, "loss": 0.9286, "step": 3160 }, { "epoch": 0.21417440205976015, "grad_norm": 10.05949592590332, "learning_rate": 9.670066397426245e-05, "loss": 1.1317, "step": 3161 }, { "epoch": 0.21424215732773222, "grad_norm": 8.656492233276367, "learning_rate": 9.669929495516463e-05, "loss": 0.9864, "step": 3162 }, { "epoch": 0.2143099125957043, "grad_norm": 10.591787338256836, "learning_rate": 9.669792593606682e-05, "loss": 1.0801, "step": 3163 }, { "epoch": 0.2143776678636764, "grad_norm": 8.19133472442627, "learning_rate": 9.6696556916969e-05, "loss": 0.9377, "step": 3164 }, { "epoch": 0.2144454231316485, "grad_norm": 8.402669906616211, "learning_rate": 9.669518789787118e-05, "loss": 0.9283, "step": 3165 }, { "epoch": 0.21451317839962056, "grad_norm": 9.713315963745117, "learning_rate": 9.669381887877337e-05, "loss": 1.1865, "step": 3166 }, { "epoch": 0.21458093366759265, "grad_norm": 6.777700424194336, "learning_rate": 9.669244985967555e-05, "loss": 0.9094, "step": 3167 }, { "epoch": 0.21464868893556474, "grad_norm": 9.381082534790039, "learning_rate": 9.669108084057773e-05, "loss": 0.7461, "step": 3168 }, { "epoch": 0.21471644420353683, "grad_norm": 8.568790435791016, "learning_rate": 9.668971182147992e-05, "loss": 0.7339, "step": 3169 }, { "epoch": 0.2147841994715089, "grad_norm": 9.257226943969727, "learning_rate": 9.66883428023821e-05, "loss": 1.1892, "step": 3170 }, { "epoch": 0.214851954739481, "grad_norm": 8.544146537780762, "learning_rate": 9.668697378328429e-05, "loss": 1.0307, "step": 3171 }, { "epoch": 0.21491971000745308, "grad_norm": 9.540971755981445, "learning_rate": 9.668560476418647e-05, "loss": 0.8464, "step": 3172 }, { "epoch": 0.21498746527542517, "grad_norm": 9.180089950561523, "learning_rate": 9.668423574508865e-05, "loss": 0.951, "step": 3173 }, { "epoch": 0.21505522054339726, "grad_norm": 10.706409454345703, "learning_rate": 9.668286672599083e-05, "loss": 1.141, "step": 3174 }, { "epoch": 0.21512297581136933, "grad_norm": 9.421865463256836, "learning_rate": 9.668149770689302e-05, "loss": 1.0701, "step": 3175 }, { "epoch": 0.21519073107934142, "grad_norm": 9.120182037353516, "learning_rate": 9.66801286877952e-05, "loss": 1.1618, "step": 3176 }, { "epoch": 0.2152584863473135, "grad_norm": 7.879681587219238, "learning_rate": 9.667875966869738e-05, "loss": 0.8048, "step": 3177 }, { "epoch": 0.2153262416152856, "grad_norm": 7.233819007873535, "learning_rate": 9.667739064959957e-05, "loss": 0.9718, "step": 3178 }, { "epoch": 0.21539399688325767, "grad_norm": 7.79316520690918, "learning_rate": 9.667602163050175e-05, "loss": 1.0999, "step": 3179 }, { "epoch": 0.21546175215122976, "grad_norm": 8.873052597045898, "learning_rate": 9.667465261140394e-05, "loss": 0.9444, "step": 3180 }, { "epoch": 0.21552950741920185, "grad_norm": 7.386806488037109, "learning_rate": 9.667328359230612e-05, "loss": 0.8661, "step": 3181 }, { "epoch": 0.21559726268717394, "grad_norm": 7.682179927825928, "learning_rate": 9.66719145732083e-05, "loss": 0.7904, "step": 3182 }, { "epoch": 0.215665017955146, "grad_norm": 7.9192914962768555, "learning_rate": 9.667054555411048e-05, "loss": 1.0563, "step": 3183 }, { "epoch": 0.2157327732231181, "grad_norm": 6.972530364990234, "learning_rate": 9.666917653501267e-05, "loss": 0.7482, "step": 3184 }, { "epoch": 0.2158005284910902, "grad_norm": 9.844091415405273, "learning_rate": 9.666780751591485e-05, "loss": 1.0289, "step": 3185 }, { "epoch": 0.21586828375906228, "grad_norm": 8.724159240722656, "learning_rate": 9.666643849681704e-05, "loss": 1.1419, "step": 3186 }, { "epoch": 0.21593603902703434, "grad_norm": 8.859795570373535, "learning_rate": 9.666506947771922e-05, "loss": 0.9252, "step": 3187 }, { "epoch": 0.21600379429500643, "grad_norm": 10.741375923156738, "learning_rate": 9.66637004586214e-05, "loss": 1.2328, "step": 3188 }, { "epoch": 0.21607154956297853, "grad_norm": 8.468697547912598, "learning_rate": 9.666233143952359e-05, "loss": 1.1779, "step": 3189 }, { "epoch": 0.21613930483095062, "grad_norm": 7.220376014709473, "learning_rate": 9.666096242042577e-05, "loss": 0.9271, "step": 3190 }, { "epoch": 0.21620706009892268, "grad_norm": 8.3795804977417, "learning_rate": 9.665959340132795e-05, "loss": 1.0941, "step": 3191 }, { "epoch": 0.21627481536689477, "grad_norm": 9.743870735168457, "learning_rate": 9.665822438223013e-05, "loss": 0.9396, "step": 3192 }, { "epoch": 0.21634257063486687, "grad_norm": 8.927998542785645, "learning_rate": 9.665685536313231e-05, "loss": 1.0915, "step": 3193 }, { "epoch": 0.21641032590283896, "grad_norm": 8.214877128601074, "learning_rate": 9.66554863440345e-05, "loss": 0.8767, "step": 3194 }, { "epoch": 0.21647808117081102, "grad_norm": 9.605053901672363, "learning_rate": 9.665411732493669e-05, "loss": 1.1829, "step": 3195 }, { "epoch": 0.2165458364387831, "grad_norm": 8.430122375488281, "learning_rate": 9.665274830583887e-05, "loss": 0.8234, "step": 3196 }, { "epoch": 0.2166135917067552, "grad_norm": 8.868674278259277, "learning_rate": 9.665137928674105e-05, "loss": 1.0467, "step": 3197 }, { "epoch": 0.2166813469747273, "grad_norm": 8.451126098632812, "learning_rate": 9.665001026764324e-05, "loss": 1.134, "step": 3198 }, { "epoch": 0.21674910224269936, "grad_norm": 8.749373435974121, "learning_rate": 9.664864124854542e-05, "loss": 0.9825, "step": 3199 }, { "epoch": 0.21681685751067145, "grad_norm": 8.478997230529785, "learning_rate": 9.66472722294476e-05, "loss": 0.9823, "step": 3200 }, { "epoch": 0.21688461277864354, "grad_norm": 6.554266929626465, "learning_rate": 9.664590321034978e-05, "loss": 1.2293, "step": 3201 }, { "epoch": 0.21695236804661563, "grad_norm": 7.415618896484375, "learning_rate": 9.664453419125196e-05, "loss": 0.9767, "step": 3202 }, { "epoch": 0.2170201233145877, "grad_norm": 9.277578353881836, "learning_rate": 9.664316517215416e-05, "loss": 1.0042, "step": 3203 }, { "epoch": 0.2170878785825598, "grad_norm": 7.1379241943359375, "learning_rate": 9.664179615305634e-05, "loss": 0.924, "step": 3204 }, { "epoch": 0.21715563385053188, "grad_norm": 8.837613105773926, "learning_rate": 9.664042713395852e-05, "loss": 0.9889, "step": 3205 }, { "epoch": 0.21722338911850397, "grad_norm": 8.118345260620117, "learning_rate": 9.663905811486071e-05, "loss": 1.2376, "step": 3206 }, { "epoch": 0.21729114438647604, "grad_norm": 10.383713722229004, "learning_rate": 9.663768909576289e-05, "loss": 0.7988, "step": 3207 }, { "epoch": 0.21735889965444813, "grad_norm": 7.0938262939453125, "learning_rate": 9.663632007666507e-05, "loss": 0.8589, "step": 3208 }, { "epoch": 0.21742665492242022, "grad_norm": 7.732020854949951, "learning_rate": 9.663495105756726e-05, "loss": 1.0837, "step": 3209 }, { "epoch": 0.2174944101903923, "grad_norm": 7.718788146972656, "learning_rate": 9.663358203846944e-05, "loss": 0.8949, "step": 3210 }, { "epoch": 0.21756216545836438, "grad_norm": 9.187674522399902, "learning_rate": 9.663221301937162e-05, "loss": 0.9443, "step": 3211 }, { "epoch": 0.21762992072633647, "grad_norm": 8.596949577331543, "learning_rate": 9.663084400027382e-05, "loss": 1.0855, "step": 3212 }, { "epoch": 0.21769767599430856, "grad_norm": 7.854368209838867, "learning_rate": 9.6629474981176e-05, "loss": 0.9203, "step": 3213 }, { "epoch": 0.21776543126228065, "grad_norm": 9.96597957611084, "learning_rate": 9.662810596207818e-05, "loss": 0.836, "step": 3214 }, { "epoch": 0.21783318653025271, "grad_norm": 7.254636764526367, "learning_rate": 9.662673694298036e-05, "loss": 0.7926, "step": 3215 }, { "epoch": 0.2179009417982248, "grad_norm": 9.079703330993652, "learning_rate": 9.662536792388255e-05, "loss": 1.2072, "step": 3216 }, { "epoch": 0.2179686970661969, "grad_norm": 8.051166534423828, "learning_rate": 9.662399890478473e-05, "loss": 0.9719, "step": 3217 }, { "epoch": 0.218036452334169, "grad_norm": 9.189994812011719, "learning_rate": 9.662262988568691e-05, "loss": 0.9878, "step": 3218 }, { "epoch": 0.21810420760214105, "grad_norm": 9.806111335754395, "learning_rate": 9.66212608665891e-05, "loss": 1.1621, "step": 3219 }, { "epoch": 0.21817196287011315, "grad_norm": 7.833159923553467, "learning_rate": 9.661989184749128e-05, "loss": 0.9811, "step": 3220 }, { "epoch": 0.21823971813808524, "grad_norm": 7.537334442138672, "learning_rate": 9.661852282839347e-05, "loss": 0.9916, "step": 3221 }, { "epoch": 0.21830747340605733, "grad_norm": 9.828896522521973, "learning_rate": 9.661715380929565e-05, "loss": 1.09, "step": 3222 }, { "epoch": 0.2183752286740294, "grad_norm": 7.99267578125, "learning_rate": 9.661578479019783e-05, "loss": 1.0852, "step": 3223 }, { "epoch": 0.21844298394200148, "grad_norm": 8.085976600646973, "learning_rate": 9.661441577110001e-05, "loss": 1.1762, "step": 3224 }, { "epoch": 0.21851073920997358, "grad_norm": 6.523219585418701, "learning_rate": 9.661304675200219e-05, "loss": 1.0814, "step": 3225 }, { "epoch": 0.21857849447794567, "grad_norm": 9.796062469482422, "learning_rate": 9.661167773290438e-05, "loss": 1.1447, "step": 3226 }, { "epoch": 0.21864624974591776, "grad_norm": 8.570687294006348, "learning_rate": 9.661030871380656e-05, "loss": 1.0552, "step": 3227 }, { "epoch": 0.21871400501388982, "grad_norm": 9.70113468170166, "learning_rate": 9.660893969470874e-05, "loss": 1.0379, "step": 3228 }, { "epoch": 0.21878176028186191, "grad_norm": 8.195375442504883, "learning_rate": 9.660757067561093e-05, "loss": 0.9348, "step": 3229 }, { "epoch": 0.218849515549834, "grad_norm": 7.33494758605957, "learning_rate": 9.660620165651312e-05, "loss": 0.8977, "step": 3230 }, { "epoch": 0.2189172708178061, "grad_norm": 7.543430328369141, "learning_rate": 9.66048326374153e-05, "loss": 1.023, "step": 3231 }, { "epoch": 0.21898502608577816, "grad_norm": 8.45494556427002, "learning_rate": 9.660346361831748e-05, "loss": 1.2995, "step": 3232 }, { "epoch": 0.21905278135375025, "grad_norm": 8.197903633117676, "learning_rate": 9.660209459921966e-05, "loss": 0.9651, "step": 3233 }, { "epoch": 0.21912053662172234, "grad_norm": 8.392546653747559, "learning_rate": 9.660072558012184e-05, "loss": 0.9858, "step": 3234 }, { "epoch": 0.21918829188969444, "grad_norm": 6.499540328979492, "learning_rate": 9.659935656102403e-05, "loss": 0.9679, "step": 3235 }, { "epoch": 0.2192560471576665, "grad_norm": 7.784618377685547, "learning_rate": 9.659798754192621e-05, "loss": 0.9316, "step": 3236 }, { "epoch": 0.2193238024256386, "grad_norm": 9.014801979064941, "learning_rate": 9.65966185228284e-05, "loss": 0.9261, "step": 3237 }, { "epoch": 0.21939155769361068, "grad_norm": 8.238675117492676, "learning_rate": 9.659524950373058e-05, "loss": 1.0156, "step": 3238 }, { "epoch": 0.21945931296158278, "grad_norm": 9.447864532470703, "learning_rate": 9.659388048463277e-05, "loss": 1.2186, "step": 3239 }, { "epoch": 0.21952706822955484, "grad_norm": 6.894035816192627, "learning_rate": 9.659251146553495e-05, "loss": 0.9157, "step": 3240 }, { "epoch": 0.21959482349752693, "grad_norm": 7.24024772644043, "learning_rate": 9.659114244643713e-05, "loss": 1.1564, "step": 3241 }, { "epoch": 0.21966257876549902, "grad_norm": 9.135485649108887, "learning_rate": 9.658977342733931e-05, "loss": 1.2002, "step": 3242 }, { "epoch": 0.21973033403347111, "grad_norm": 6.119181156158447, "learning_rate": 9.658840440824149e-05, "loss": 0.788, "step": 3243 }, { "epoch": 0.21979808930144318, "grad_norm": 8.550108909606934, "learning_rate": 9.658703538914368e-05, "loss": 0.9251, "step": 3244 }, { "epoch": 0.21986584456941527, "grad_norm": 9.546792984008789, "learning_rate": 9.658566637004586e-05, "loss": 1.2231, "step": 3245 }, { "epoch": 0.21993359983738736, "grad_norm": 9.451306343078613, "learning_rate": 9.658429735094805e-05, "loss": 0.8991, "step": 3246 }, { "epoch": 0.22000135510535945, "grad_norm": 9.914873123168945, "learning_rate": 9.658292833185023e-05, "loss": 1.3102, "step": 3247 }, { "epoch": 0.22006911037333152, "grad_norm": 10.038002967834473, "learning_rate": 9.65815593127524e-05, "loss": 1.254, "step": 3248 }, { "epoch": 0.2201368656413036, "grad_norm": 7.590802192687988, "learning_rate": 9.65801902936546e-05, "loss": 1.0032, "step": 3249 }, { "epoch": 0.2202046209092757, "grad_norm": 8.632338523864746, "learning_rate": 9.657882127455678e-05, "loss": 0.8922, "step": 3250 }, { "epoch": 0.2202723761772478, "grad_norm": 6.483764171600342, "learning_rate": 9.657745225545896e-05, "loss": 0.9167, "step": 3251 }, { "epoch": 0.22034013144521986, "grad_norm": 8.28577709197998, "learning_rate": 9.657608323636115e-05, "loss": 1.0413, "step": 3252 }, { "epoch": 0.22040788671319195, "grad_norm": 9.218433380126953, "learning_rate": 9.657471421726333e-05, "loss": 1.1681, "step": 3253 }, { "epoch": 0.22047564198116404, "grad_norm": 6.653755187988281, "learning_rate": 9.657334519816552e-05, "loss": 0.9645, "step": 3254 }, { "epoch": 0.22054339724913613, "grad_norm": 8.784723281860352, "learning_rate": 9.657197617906771e-05, "loss": 0.7489, "step": 3255 }, { "epoch": 0.2206111525171082, "grad_norm": 9.041324615478516, "learning_rate": 9.657060715996989e-05, "loss": 1.0478, "step": 3256 }, { "epoch": 0.22067890778508029, "grad_norm": 7.811746597290039, "learning_rate": 9.656923814087207e-05, "loss": 0.9511, "step": 3257 }, { "epoch": 0.22074666305305238, "grad_norm": 9.811897277832031, "learning_rate": 9.656786912177426e-05, "loss": 1.4235, "step": 3258 }, { "epoch": 0.22081441832102447, "grad_norm": 7.313972473144531, "learning_rate": 9.656650010267644e-05, "loss": 1.0075, "step": 3259 }, { "epoch": 0.22088217358899653, "grad_norm": 9.360795021057129, "learning_rate": 9.656513108357862e-05, "loss": 0.9634, "step": 3260 }, { "epoch": 0.22094992885696862, "grad_norm": 9.322724342346191, "learning_rate": 9.65637620644808e-05, "loss": 1.0669, "step": 3261 }, { "epoch": 0.22101768412494072, "grad_norm": 10.269414901733398, "learning_rate": 9.6562393045383e-05, "loss": 1.1288, "step": 3262 }, { "epoch": 0.2210854393929128, "grad_norm": 7.672987937927246, "learning_rate": 9.656102402628518e-05, "loss": 1.0517, "step": 3263 }, { "epoch": 0.22115319466088487, "grad_norm": 8.725695610046387, "learning_rate": 9.655965500718736e-05, "loss": 1.0987, "step": 3264 }, { "epoch": 0.22122094992885696, "grad_norm": 8.577162742614746, "learning_rate": 9.655828598808954e-05, "loss": 0.821, "step": 3265 }, { "epoch": 0.22128870519682906, "grad_norm": 8.497530937194824, "learning_rate": 9.655691696899172e-05, "loss": 0.9171, "step": 3266 }, { "epoch": 0.22135646046480115, "grad_norm": 9.665203094482422, "learning_rate": 9.655554794989391e-05, "loss": 1.105, "step": 3267 }, { "epoch": 0.2214242157327732, "grad_norm": 9.42651081085205, "learning_rate": 9.65541789307961e-05, "loss": 0.9526, "step": 3268 }, { "epoch": 0.2214919710007453, "grad_norm": 7.461474418640137, "learning_rate": 9.655280991169827e-05, "loss": 0.9554, "step": 3269 }, { "epoch": 0.2215597262687174, "grad_norm": 10.56189250946045, "learning_rate": 9.655144089260045e-05, "loss": 1.1504, "step": 3270 }, { "epoch": 0.22162748153668949, "grad_norm": 8.245696067810059, "learning_rate": 9.655007187350265e-05, "loss": 1.0352, "step": 3271 }, { "epoch": 0.22169523680466155, "grad_norm": 9.182881355285645, "learning_rate": 9.654870285440483e-05, "loss": 1.1153, "step": 3272 }, { "epoch": 0.22176299207263364, "grad_norm": 8.101743698120117, "learning_rate": 9.654733383530701e-05, "loss": 1.1399, "step": 3273 }, { "epoch": 0.22183074734060573, "grad_norm": 9.072972297668457, "learning_rate": 9.654596481620919e-05, "loss": 1.1508, "step": 3274 }, { "epoch": 0.22189850260857782, "grad_norm": 8.480910301208496, "learning_rate": 9.654459579711137e-05, "loss": 0.9659, "step": 3275 }, { "epoch": 0.2219662578765499, "grad_norm": 7.095332622528076, "learning_rate": 9.654322677801356e-05, "loss": 0.9417, "step": 3276 }, { "epoch": 0.22203401314452198, "grad_norm": 8.183112144470215, "learning_rate": 9.654185775891574e-05, "loss": 0.9307, "step": 3277 }, { "epoch": 0.22210176841249407, "grad_norm": 8.60648250579834, "learning_rate": 9.654048873981792e-05, "loss": 1.141, "step": 3278 }, { "epoch": 0.22216952368046616, "grad_norm": 6.992738246917725, "learning_rate": 9.65391197207201e-05, "loss": 0.9429, "step": 3279 }, { "epoch": 0.22223727894843825, "grad_norm": 6.998913764953613, "learning_rate": 9.653775070162229e-05, "loss": 1.0159, "step": 3280 }, { "epoch": 0.22230503421641032, "grad_norm": 7.922050952911377, "learning_rate": 9.653638168252448e-05, "loss": 1.1349, "step": 3281 }, { "epoch": 0.2223727894843824, "grad_norm": 9.171984672546387, "learning_rate": 9.653501266342666e-05, "loss": 1.149, "step": 3282 }, { "epoch": 0.2224405447523545, "grad_norm": 9.88930606842041, "learning_rate": 9.653364364432884e-05, "loss": 1.1036, "step": 3283 }, { "epoch": 0.2225083000203266, "grad_norm": 10.978727340698242, "learning_rate": 9.653227462523102e-05, "loss": 1.0345, "step": 3284 }, { "epoch": 0.22257605528829866, "grad_norm": 8.712138175964355, "learning_rate": 9.653090560613321e-05, "loss": 1.1404, "step": 3285 }, { "epoch": 0.22264381055627075, "grad_norm": 8.79491901397705, "learning_rate": 9.65295365870354e-05, "loss": 1.4334, "step": 3286 }, { "epoch": 0.22271156582424284, "grad_norm": 9.580904960632324, "learning_rate": 9.652816756793757e-05, "loss": 1.0693, "step": 3287 }, { "epoch": 0.22277932109221493, "grad_norm": 7.335433006286621, "learning_rate": 9.652679854883976e-05, "loss": 1.0559, "step": 3288 }, { "epoch": 0.222847076360187, "grad_norm": 8.534089088439941, "learning_rate": 9.652542952974194e-05, "loss": 0.9835, "step": 3289 }, { "epoch": 0.2229148316281591, "grad_norm": 6.626898288726807, "learning_rate": 9.652406051064413e-05, "loss": 0.8056, "step": 3290 }, { "epoch": 0.22298258689613118, "grad_norm": 7.774187088012695, "learning_rate": 9.652269149154631e-05, "loss": 0.8032, "step": 3291 }, { "epoch": 0.22305034216410327, "grad_norm": 8.029982566833496, "learning_rate": 9.652132247244849e-05, "loss": 1.0536, "step": 3292 }, { "epoch": 0.22311809743207534, "grad_norm": 8.701712608337402, "learning_rate": 9.651995345335067e-05, "loss": 0.9422, "step": 3293 }, { "epoch": 0.22318585270004743, "grad_norm": 9.17507553100586, "learning_rate": 9.651858443425286e-05, "loss": 1.0094, "step": 3294 }, { "epoch": 0.22325360796801952, "grad_norm": 8.464656829833984, "learning_rate": 9.651721541515504e-05, "loss": 0.9546, "step": 3295 }, { "epoch": 0.2233213632359916, "grad_norm": 6.839638710021973, "learning_rate": 9.651584639605722e-05, "loss": 0.8296, "step": 3296 }, { "epoch": 0.22338911850396367, "grad_norm": 7.773430824279785, "learning_rate": 9.65144773769594e-05, "loss": 1.1344, "step": 3297 }, { "epoch": 0.22345687377193577, "grad_norm": 9.725523948669434, "learning_rate": 9.65131083578616e-05, "loss": 0.9814, "step": 3298 }, { "epoch": 0.22352462903990786, "grad_norm": 8.995366096496582, "learning_rate": 9.651173933876378e-05, "loss": 1.2263, "step": 3299 }, { "epoch": 0.22359238430787995, "grad_norm": 8.735838890075684, "learning_rate": 9.651037031966596e-05, "loss": 1.1707, "step": 3300 }, { "epoch": 0.223660139575852, "grad_norm": 7.177740097045898, "learning_rate": 9.650900130056815e-05, "loss": 0.9491, "step": 3301 }, { "epoch": 0.2237278948438241, "grad_norm": 8.393296241760254, "learning_rate": 9.650763228147033e-05, "loss": 0.9765, "step": 3302 }, { "epoch": 0.2237956501117962, "grad_norm": 9.773447036743164, "learning_rate": 9.650626326237251e-05, "loss": 1.0409, "step": 3303 }, { "epoch": 0.2238634053797683, "grad_norm": 6.951483249664307, "learning_rate": 9.650489424327471e-05, "loss": 0.9848, "step": 3304 }, { "epoch": 0.22393116064774035, "grad_norm": 8.579717636108398, "learning_rate": 9.650352522417689e-05, "loss": 0.9722, "step": 3305 }, { "epoch": 0.22399891591571244, "grad_norm": 7.798871040344238, "learning_rate": 9.650215620507907e-05, "loss": 0.8046, "step": 3306 }, { "epoch": 0.22406667118368453, "grad_norm": 9.504851341247559, "learning_rate": 9.650078718598125e-05, "loss": 1.1972, "step": 3307 }, { "epoch": 0.22413442645165663, "grad_norm": 8.384056091308594, "learning_rate": 9.649941816688344e-05, "loss": 1.0654, "step": 3308 }, { "epoch": 0.2242021817196287, "grad_norm": 10.469207763671875, "learning_rate": 9.649804914778562e-05, "loss": 1.0694, "step": 3309 }, { "epoch": 0.22426993698760078, "grad_norm": 8.5567045211792, "learning_rate": 9.64966801286878e-05, "loss": 1.0371, "step": 3310 }, { "epoch": 0.22433769225557287, "grad_norm": 9.427155494689941, "learning_rate": 9.649531110958998e-05, "loss": 1.0185, "step": 3311 }, { "epoch": 0.22440544752354497, "grad_norm": 8.238412857055664, "learning_rate": 9.649394209049216e-05, "loss": 0.9744, "step": 3312 }, { "epoch": 0.22447320279151703, "grad_norm": 9.36423110961914, "learning_rate": 9.649257307139436e-05, "loss": 0.9184, "step": 3313 }, { "epoch": 0.22454095805948912, "grad_norm": 7.485356330871582, "learning_rate": 9.649120405229654e-05, "loss": 0.9948, "step": 3314 }, { "epoch": 0.2246087133274612, "grad_norm": 7.247208118438721, "learning_rate": 9.648983503319872e-05, "loss": 0.8592, "step": 3315 }, { "epoch": 0.2246764685954333, "grad_norm": 9.266085624694824, "learning_rate": 9.64884660141009e-05, "loss": 1.0314, "step": 3316 }, { "epoch": 0.22474422386340537, "grad_norm": 11.147274017333984, "learning_rate": 9.64870969950031e-05, "loss": 1.0403, "step": 3317 }, { "epoch": 0.22481197913137746, "grad_norm": 10.961670875549316, "learning_rate": 9.648572797590527e-05, "loss": 1.3147, "step": 3318 }, { "epoch": 0.22487973439934955, "grad_norm": 7.002868175506592, "learning_rate": 9.648435895680745e-05, "loss": 0.9044, "step": 3319 }, { "epoch": 0.22494748966732164, "grad_norm": 7.886247634887695, "learning_rate": 9.648298993770963e-05, "loss": 0.8901, "step": 3320 }, { "epoch": 0.2250152449352937, "grad_norm": 8.933539390563965, "learning_rate": 9.648162091861181e-05, "loss": 0.8758, "step": 3321 }, { "epoch": 0.2250830002032658, "grad_norm": 7.223681449890137, "learning_rate": 9.648025189951401e-05, "loss": 1.1502, "step": 3322 }, { "epoch": 0.2251507554712379, "grad_norm": 8.808623313903809, "learning_rate": 9.647888288041619e-05, "loss": 1.0706, "step": 3323 }, { "epoch": 0.22521851073920998, "grad_norm": 9.557942390441895, "learning_rate": 9.647751386131837e-05, "loss": 0.8113, "step": 3324 }, { "epoch": 0.22528626600718205, "grad_norm": 9.855717658996582, "learning_rate": 9.647614484222055e-05, "loss": 1.251, "step": 3325 }, { "epoch": 0.22535402127515414, "grad_norm": 9.588946342468262, "learning_rate": 9.647477582312273e-05, "loss": 1.3029, "step": 3326 }, { "epoch": 0.22542177654312623, "grad_norm": 9.277341842651367, "learning_rate": 9.647340680402492e-05, "loss": 0.9708, "step": 3327 }, { "epoch": 0.22548953181109832, "grad_norm": 9.607316970825195, "learning_rate": 9.64720377849271e-05, "loss": 0.9614, "step": 3328 }, { "epoch": 0.22555728707907038, "grad_norm": 7.949220657348633, "learning_rate": 9.647066876582928e-05, "loss": 1.0066, "step": 3329 }, { "epoch": 0.22562504234704248, "grad_norm": 8.383744239807129, "learning_rate": 9.646929974673146e-05, "loss": 0.797, "step": 3330 }, { "epoch": 0.22569279761501457, "grad_norm": 7.203914642333984, "learning_rate": 9.646793072763366e-05, "loss": 0.9625, "step": 3331 }, { "epoch": 0.22576055288298666, "grad_norm": 8.95102596282959, "learning_rate": 9.646656170853584e-05, "loss": 1.0728, "step": 3332 }, { "epoch": 0.22582830815095875, "grad_norm": 8.044751167297363, "learning_rate": 9.646519268943802e-05, "loss": 1.2155, "step": 3333 }, { "epoch": 0.22589606341893081, "grad_norm": 9.164902687072754, "learning_rate": 9.64638236703402e-05, "loss": 1.2923, "step": 3334 }, { "epoch": 0.2259638186869029, "grad_norm": 6.792164325714111, "learning_rate": 9.646245465124238e-05, "loss": 0.9956, "step": 3335 }, { "epoch": 0.226031573954875, "grad_norm": 7.76467752456665, "learning_rate": 9.646108563214457e-05, "loss": 1.0057, "step": 3336 }, { "epoch": 0.2260993292228471, "grad_norm": 8.541545867919922, "learning_rate": 9.645971661304675e-05, "loss": 1.1013, "step": 3337 }, { "epoch": 0.22616708449081915, "grad_norm": 7.8545050621032715, "learning_rate": 9.645834759394893e-05, "loss": 0.8565, "step": 3338 }, { "epoch": 0.22623483975879125, "grad_norm": 9.322896003723145, "learning_rate": 9.645697857485112e-05, "loss": 1.0509, "step": 3339 }, { "epoch": 0.22630259502676334, "grad_norm": 9.540191650390625, "learning_rate": 9.645560955575331e-05, "loss": 1.1757, "step": 3340 }, { "epoch": 0.22637035029473543, "grad_norm": 8.97028923034668, "learning_rate": 9.645424053665549e-05, "loss": 1.0342, "step": 3341 }, { "epoch": 0.2264381055627075, "grad_norm": 6.444105625152588, "learning_rate": 9.645287151755767e-05, "loss": 0.8377, "step": 3342 }, { "epoch": 0.22650586083067958, "grad_norm": 9.237889289855957, "learning_rate": 9.645150249845985e-05, "loss": 1.0468, "step": 3343 }, { "epoch": 0.22657361609865168, "grad_norm": 9.567046165466309, "learning_rate": 9.645013347936203e-05, "loss": 1.3875, "step": 3344 }, { "epoch": 0.22664137136662377, "grad_norm": 8.302481651306152, "learning_rate": 9.644876446026422e-05, "loss": 1.0455, "step": 3345 }, { "epoch": 0.22670912663459583, "grad_norm": 9.124307632446289, "learning_rate": 9.64473954411664e-05, "loss": 0.9828, "step": 3346 }, { "epoch": 0.22677688190256792, "grad_norm": 7.700011730194092, "learning_rate": 9.644602642206858e-05, "loss": 0.8475, "step": 3347 }, { "epoch": 0.22684463717054001, "grad_norm": 6.1064276695251465, "learning_rate": 9.644465740297078e-05, "loss": 0.7779, "step": 3348 }, { "epoch": 0.2269123924385121, "grad_norm": 8.924312591552734, "learning_rate": 9.644328838387296e-05, "loss": 1.3825, "step": 3349 }, { "epoch": 0.22698014770648417, "grad_norm": 8.169050216674805, "learning_rate": 9.644191936477514e-05, "loss": 1.1198, "step": 3350 }, { "epoch": 0.22704790297445626, "grad_norm": 8.368382453918457, "learning_rate": 9.644055034567733e-05, "loss": 1.0729, "step": 3351 }, { "epoch": 0.22711565824242835, "grad_norm": 10.206897735595703, "learning_rate": 9.643918132657951e-05, "loss": 0.9022, "step": 3352 }, { "epoch": 0.22718341351040044, "grad_norm": 6.288288116455078, "learning_rate": 9.64378123074817e-05, "loss": 0.7868, "step": 3353 }, { "epoch": 0.2272511687783725, "grad_norm": 8.446578025817871, "learning_rate": 9.643644328838389e-05, "loss": 1.0426, "step": 3354 }, { "epoch": 0.2273189240463446, "grad_norm": 8.425249099731445, "learning_rate": 9.643507426928607e-05, "loss": 0.823, "step": 3355 }, { "epoch": 0.2273866793143167, "grad_norm": 6.610576629638672, "learning_rate": 9.643370525018825e-05, "loss": 0.9251, "step": 3356 }, { "epoch": 0.22745443458228878, "grad_norm": 8.365503311157227, "learning_rate": 9.643233623109043e-05, "loss": 1.0258, "step": 3357 }, { "epoch": 0.22752218985026085, "grad_norm": 8.654791831970215, "learning_rate": 9.643096721199261e-05, "loss": 0.8781, "step": 3358 }, { "epoch": 0.22758994511823294, "grad_norm": 6.612964153289795, "learning_rate": 9.64295981928948e-05, "loss": 0.7472, "step": 3359 }, { "epoch": 0.22765770038620503, "grad_norm": 9.4874267578125, "learning_rate": 9.642822917379698e-05, "loss": 0.994, "step": 3360 }, { "epoch": 0.22772545565417712, "grad_norm": 9.7284574508667, "learning_rate": 9.642686015469916e-05, "loss": 0.9293, "step": 3361 }, { "epoch": 0.2277932109221492, "grad_norm": 8.787792205810547, "learning_rate": 9.642549113560134e-05, "loss": 1.1368, "step": 3362 }, { "epoch": 0.22786096619012128, "grad_norm": 9.218424797058105, "learning_rate": 9.642412211650354e-05, "loss": 1.2437, "step": 3363 }, { "epoch": 0.22792872145809337, "grad_norm": 7.9437127113342285, "learning_rate": 9.642275309740572e-05, "loss": 0.925, "step": 3364 }, { "epoch": 0.22799647672606546, "grad_norm": 10.552806854248047, "learning_rate": 9.64213840783079e-05, "loss": 0.936, "step": 3365 }, { "epoch": 0.22806423199403753, "grad_norm": 6.572626113891602, "learning_rate": 9.642001505921008e-05, "loss": 0.8968, "step": 3366 }, { "epoch": 0.22813198726200962, "grad_norm": 8.689478874206543, "learning_rate": 9.641864604011226e-05, "loss": 0.8605, "step": 3367 }, { "epoch": 0.2281997425299817, "grad_norm": 10.450199127197266, "learning_rate": 9.641727702101445e-05, "loss": 1.0871, "step": 3368 }, { "epoch": 0.2282674977979538, "grad_norm": 9.260807991027832, "learning_rate": 9.641590800191663e-05, "loss": 0.8469, "step": 3369 }, { "epoch": 0.22833525306592586, "grad_norm": 13.626687049865723, "learning_rate": 9.641453898281881e-05, "loss": 1.0615, "step": 3370 }, { "epoch": 0.22840300833389796, "grad_norm": 7.419554710388184, "learning_rate": 9.6413169963721e-05, "loss": 0.9351, "step": 3371 }, { "epoch": 0.22847076360187005, "grad_norm": 9.33206558227539, "learning_rate": 9.641180094462319e-05, "loss": 1.1826, "step": 3372 }, { "epoch": 0.22853851886984214, "grad_norm": 7.148665904998779, "learning_rate": 9.641043192552537e-05, "loss": 1.1121, "step": 3373 }, { "epoch": 0.2286062741378142, "grad_norm": 6.784035682678223, "learning_rate": 9.640906290642755e-05, "loss": 0.9649, "step": 3374 }, { "epoch": 0.2286740294057863, "grad_norm": 7.357334613800049, "learning_rate": 9.640769388732973e-05, "loss": 0.9005, "step": 3375 }, { "epoch": 0.22874178467375839, "grad_norm": 9.157288551330566, "learning_rate": 9.640632486823191e-05, "loss": 0.9585, "step": 3376 }, { "epoch": 0.22880953994173048, "grad_norm": 9.068450927734375, "learning_rate": 9.64049558491341e-05, "loss": 0.9248, "step": 3377 }, { "epoch": 0.22887729520970254, "grad_norm": 9.446860313415527, "learning_rate": 9.640358683003628e-05, "loss": 1.0016, "step": 3378 }, { "epoch": 0.22894505047767463, "grad_norm": 7.63693904876709, "learning_rate": 9.640221781093846e-05, "loss": 1.0158, "step": 3379 }, { "epoch": 0.22901280574564672, "grad_norm": 7.569469451904297, "learning_rate": 9.640084879184064e-05, "loss": 0.9149, "step": 3380 }, { "epoch": 0.22908056101361882, "grad_norm": 8.837080955505371, "learning_rate": 9.639947977274282e-05, "loss": 0.9379, "step": 3381 }, { "epoch": 0.22914831628159088, "grad_norm": 9.141901969909668, "learning_rate": 9.639811075364502e-05, "loss": 0.9121, "step": 3382 }, { "epoch": 0.22921607154956297, "grad_norm": 7.68120813369751, "learning_rate": 9.63967417345472e-05, "loss": 1.0235, "step": 3383 }, { "epoch": 0.22928382681753506, "grad_norm": 8.82229995727539, "learning_rate": 9.639537271544938e-05, "loss": 1.1573, "step": 3384 }, { "epoch": 0.22935158208550716, "grad_norm": 9.490239143371582, "learning_rate": 9.639400369635156e-05, "loss": 0.8776, "step": 3385 }, { "epoch": 0.22941933735347925, "grad_norm": 10.257567405700684, "learning_rate": 9.639263467725375e-05, "loss": 1.0696, "step": 3386 }, { "epoch": 0.2294870926214513, "grad_norm": 7.793918609619141, "learning_rate": 9.639126565815593e-05, "loss": 1.0892, "step": 3387 }, { "epoch": 0.2295548478894234, "grad_norm": 8.983718872070312, "learning_rate": 9.638989663905811e-05, "loss": 0.908, "step": 3388 }, { "epoch": 0.2296226031573955, "grad_norm": 8.31851577758789, "learning_rate": 9.63885276199603e-05, "loss": 1.0168, "step": 3389 }, { "epoch": 0.22969035842536759, "grad_norm": 9.343503952026367, "learning_rate": 9.638715860086248e-05, "loss": 1.1174, "step": 3390 }, { "epoch": 0.22975811369333965, "grad_norm": 7.617075443267822, "learning_rate": 9.638578958176467e-05, "loss": 0.9076, "step": 3391 }, { "epoch": 0.22982586896131174, "grad_norm": 8.102355003356934, "learning_rate": 9.638442056266685e-05, "loss": 1.0476, "step": 3392 }, { "epoch": 0.22989362422928383, "grad_norm": 9.366684913635254, "learning_rate": 9.638305154356903e-05, "loss": 1.1892, "step": 3393 }, { "epoch": 0.22996137949725592, "grad_norm": 8.204293251037598, "learning_rate": 9.638168252447122e-05, "loss": 0.7731, "step": 3394 }, { "epoch": 0.230029134765228, "grad_norm": 9.68787670135498, "learning_rate": 9.63803135053734e-05, "loss": 1.1126, "step": 3395 }, { "epoch": 0.23009689003320008, "grad_norm": 6.6209330558776855, "learning_rate": 9.637894448627558e-05, "loss": 0.9006, "step": 3396 }, { "epoch": 0.23016464530117217, "grad_norm": 7.963149547576904, "learning_rate": 9.637757546717778e-05, "loss": 0.8275, "step": 3397 }, { "epoch": 0.23023240056914426, "grad_norm": 8.74716567993164, "learning_rate": 9.637620644807996e-05, "loss": 1.0592, "step": 3398 }, { "epoch": 0.23030015583711633, "grad_norm": 8.819621086120605, "learning_rate": 9.637483742898214e-05, "loss": 0.8799, "step": 3399 }, { "epoch": 0.23036791110508842, "grad_norm": 8.538536071777344, "learning_rate": 9.637346840988433e-05, "loss": 1.1498, "step": 3400 }, { "epoch": 0.2304356663730605, "grad_norm": 8.787203788757324, "learning_rate": 9.637209939078651e-05, "loss": 0.8149, "step": 3401 }, { "epoch": 0.2305034216410326, "grad_norm": 8.85105037689209, "learning_rate": 9.63707303716887e-05, "loss": 0.8525, "step": 3402 }, { "epoch": 0.23057117690900467, "grad_norm": 9.773297309875488, "learning_rate": 9.636936135259087e-05, "loss": 0.9448, "step": 3403 }, { "epoch": 0.23063893217697676, "grad_norm": 7.804196357727051, "learning_rate": 9.636799233349307e-05, "loss": 0.9549, "step": 3404 }, { "epoch": 0.23070668744494885, "grad_norm": 8.398775100708008, "learning_rate": 9.636662331439525e-05, "loss": 0.9111, "step": 3405 }, { "epoch": 0.23077444271292094, "grad_norm": 8.960867881774902, "learning_rate": 9.636525429529743e-05, "loss": 1.1533, "step": 3406 }, { "epoch": 0.230842197980893, "grad_norm": 7.513526439666748, "learning_rate": 9.636388527619961e-05, "loss": 1.113, "step": 3407 }, { "epoch": 0.2309099532488651, "grad_norm": 8.891728401184082, "learning_rate": 9.636251625710179e-05, "loss": 0.8727, "step": 3408 }, { "epoch": 0.2309777085168372, "grad_norm": 6.972326755523682, "learning_rate": 9.636114723800398e-05, "loss": 0.8085, "step": 3409 }, { "epoch": 0.23104546378480928, "grad_norm": 9.195303916931152, "learning_rate": 9.635977821890616e-05, "loss": 1.0276, "step": 3410 }, { "epoch": 0.23111321905278134, "grad_norm": 9.36483383178711, "learning_rate": 9.635840919980834e-05, "loss": 1.1369, "step": 3411 }, { "epoch": 0.23118097432075344, "grad_norm": 7.282619953155518, "learning_rate": 9.635704018071052e-05, "loss": 0.9214, "step": 3412 }, { "epoch": 0.23124872958872553, "grad_norm": 6.289726257324219, "learning_rate": 9.63556711616127e-05, "loss": 0.8556, "step": 3413 }, { "epoch": 0.23131648485669762, "grad_norm": 8.646320343017578, "learning_rate": 9.63543021425149e-05, "loss": 1.0833, "step": 3414 }, { "epoch": 0.23138424012466968, "grad_norm": 8.372559547424316, "learning_rate": 9.635293312341708e-05, "loss": 0.9944, "step": 3415 }, { "epoch": 0.23145199539264177, "grad_norm": 8.002387046813965, "learning_rate": 9.635156410431926e-05, "loss": 1.015, "step": 3416 }, { "epoch": 0.23151975066061387, "grad_norm": 8.4190092086792, "learning_rate": 9.635019508522144e-05, "loss": 0.9766, "step": 3417 }, { "epoch": 0.23158750592858596, "grad_norm": 8.759613037109375, "learning_rate": 9.634882606612363e-05, "loss": 1.2348, "step": 3418 }, { "epoch": 0.23165526119655802, "grad_norm": 8.439151763916016, "learning_rate": 9.634745704702581e-05, "loss": 0.8778, "step": 3419 }, { "epoch": 0.2317230164645301, "grad_norm": 8.05675220489502, "learning_rate": 9.6346088027928e-05, "loss": 0.9525, "step": 3420 }, { "epoch": 0.2317907717325022, "grad_norm": 9.196937561035156, "learning_rate": 9.634471900883017e-05, "loss": 1.0663, "step": 3421 }, { "epoch": 0.2318585270004743, "grad_norm": 7.622048854827881, "learning_rate": 9.634334998973235e-05, "loss": 0.9522, "step": 3422 }, { "epoch": 0.23192628226844636, "grad_norm": 10.235919952392578, "learning_rate": 9.634198097063455e-05, "loss": 1.1084, "step": 3423 }, { "epoch": 0.23199403753641845, "grad_norm": 7.920719623565674, "learning_rate": 9.634061195153673e-05, "loss": 0.9443, "step": 3424 }, { "epoch": 0.23206179280439054, "grad_norm": 7.440132141113281, "learning_rate": 9.633924293243891e-05, "loss": 1.0757, "step": 3425 }, { "epoch": 0.23212954807236263, "grad_norm": 7.0108771324157715, "learning_rate": 9.633787391334109e-05, "loss": 0.8043, "step": 3426 }, { "epoch": 0.2321973033403347, "grad_norm": 6.620635032653809, "learning_rate": 9.633650489424328e-05, "loss": 0.999, "step": 3427 }, { "epoch": 0.2322650586083068, "grad_norm": 7.27518367767334, "learning_rate": 9.633513587514546e-05, "loss": 0.888, "step": 3428 }, { "epoch": 0.23233281387627888, "grad_norm": 7.081945419311523, "learning_rate": 9.633376685604764e-05, "loss": 0.8837, "step": 3429 }, { "epoch": 0.23240056914425097, "grad_norm": 7.225597858428955, "learning_rate": 9.633239783694982e-05, "loss": 0.7774, "step": 3430 }, { "epoch": 0.23246832441222304, "grad_norm": 7.0490217208862305, "learning_rate": 9.6331028817852e-05, "loss": 1.0654, "step": 3431 }, { "epoch": 0.23253607968019513, "grad_norm": 8.641639709472656, "learning_rate": 9.63296597987542e-05, "loss": 1.1071, "step": 3432 }, { "epoch": 0.23260383494816722, "grad_norm": 7.442448139190674, "learning_rate": 9.632829077965638e-05, "loss": 0.9392, "step": 3433 }, { "epoch": 0.2326715902161393, "grad_norm": 6.6959452629089355, "learning_rate": 9.632692176055856e-05, "loss": 0.8334, "step": 3434 }, { "epoch": 0.23273934548411138, "grad_norm": 9.67419719696045, "learning_rate": 9.632555274146074e-05, "loss": 1.1948, "step": 3435 }, { "epoch": 0.23280710075208347, "grad_norm": 7.384359836578369, "learning_rate": 9.632418372236292e-05, "loss": 0.7992, "step": 3436 }, { "epoch": 0.23287485602005556, "grad_norm": 6.566294193267822, "learning_rate": 9.632281470326511e-05, "loss": 0.9393, "step": 3437 }, { "epoch": 0.23294261128802765, "grad_norm": 7.842831134796143, "learning_rate": 9.63214456841673e-05, "loss": 1.1372, "step": 3438 }, { "epoch": 0.23301036655599974, "grad_norm": 9.487961769104004, "learning_rate": 9.632007666506947e-05, "loss": 1.0381, "step": 3439 }, { "epoch": 0.2330781218239718, "grad_norm": 8.06712532043457, "learning_rate": 9.631870764597167e-05, "loss": 1.4106, "step": 3440 }, { "epoch": 0.2331458770919439, "grad_norm": 7.026076793670654, "learning_rate": 9.631733862687385e-05, "loss": 1.0662, "step": 3441 }, { "epoch": 0.233213632359916, "grad_norm": 10.926567077636719, "learning_rate": 9.631596960777603e-05, "loss": 1.2932, "step": 3442 }, { "epoch": 0.23328138762788808, "grad_norm": 8.380082130432129, "learning_rate": 9.631460058867822e-05, "loss": 1.0784, "step": 3443 }, { "epoch": 0.23334914289586015, "grad_norm": 8.768819808959961, "learning_rate": 9.63132315695804e-05, "loss": 1.1501, "step": 3444 }, { "epoch": 0.23341689816383224, "grad_norm": 8.467490196228027, "learning_rate": 9.631186255048258e-05, "loss": 0.7555, "step": 3445 }, { "epoch": 0.23348465343180433, "grad_norm": 6.0409770011901855, "learning_rate": 9.631049353138478e-05, "loss": 0.9099, "step": 3446 }, { "epoch": 0.23355240869977642, "grad_norm": 7.160163402557373, "learning_rate": 9.630912451228696e-05, "loss": 0.8386, "step": 3447 }, { "epoch": 0.23362016396774848, "grad_norm": 9.102558135986328, "learning_rate": 9.630775549318914e-05, "loss": 0.9259, "step": 3448 }, { "epoch": 0.23368791923572058, "grad_norm": 7.071728706359863, "learning_rate": 9.630638647409132e-05, "loss": 1.0426, "step": 3449 }, { "epoch": 0.23375567450369267, "grad_norm": 8.358567237854004, "learning_rate": 9.630501745499351e-05, "loss": 0.9806, "step": 3450 }, { "epoch": 0.23382342977166476, "grad_norm": 12.588993072509766, "learning_rate": 9.630364843589569e-05, "loss": 1.0796, "step": 3451 }, { "epoch": 0.23389118503963682, "grad_norm": 8.665871620178223, "learning_rate": 9.630227941679787e-05, "loss": 1.0224, "step": 3452 }, { "epoch": 0.23395894030760891, "grad_norm": 9.288796424865723, "learning_rate": 9.630091039770005e-05, "loss": 0.9329, "step": 3453 }, { "epoch": 0.234026695575581, "grad_norm": 7.319186210632324, "learning_rate": 9.629954137860223e-05, "loss": 1.057, "step": 3454 }, { "epoch": 0.2340944508435531, "grad_norm": 8.640669822692871, "learning_rate": 9.629817235950443e-05, "loss": 0.9756, "step": 3455 }, { "epoch": 0.23416220611152516, "grad_norm": 10.73513126373291, "learning_rate": 9.629680334040661e-05, "loss": 0.958, "step": 3456 }, { "epoch": 0.23422996137949725, "grad_norm": 7.40097713470459, "learning_rate": 9.629543432130879e-05, "loss": 0.9062, "step": 3457 }, { "epoch": 0.23429771664746935, "grad_norm": 9.506468772888184, "learning_rate": 9.629406530221097e-05, "loss": 1.0083, "step": 3458 }, { "epoch": 0.23436547191544144, "grad_norm": 7.291172504425049, "learning_rate": 9.629269628311315e-05, "loss": 0.9966, "step": 3459 }, { "epoch": 0.2344332271834135, "grad_norm": 8.962395668029785, "learning_rate": 9.629132726401534e-05, "loss": 0.8504, "step": 3460 }, { "epoch": 0.2345009824513856, "grad_norm": 10.29174518585205, "learning_rate": 9.628995824491752e-05, "loss": 1.0807, "step": 3461 }, { "epoch": 0.23456873771935768, "grad_norm": 9.461160659790039, "learning_rate": 9.62885892258197e-05, "loss": 0.996, "step": 3462 }, { "epoch": 0.23463649298732978, "grad_norm": 7.13394021987915, "learning_rate": 9.628722020672188e-05, "loss": 0.7163, "step": 3463 }, { "epoch": 0.23470424825530184, "grad_norm": 9.379724502563477, "learning_rate": 9.628585118762408e-05, "loss": 0.9571, "step": 3464 }, { "epoch": 0.23477200352327393, "grad_norm": 9.72339916229248, "learning_rate": 9.628448216852626e-05, "loss": 1.2559, "step": 3465 }, { "epoch": 0.23483975879124602, "grad_norm": 7.353830337524414, "learning_rate": 9.628311314942844e-05, "loss": 1.0563, "step": 3466 }, { "epoch": 0.23490751405921811, "grad_norm": 8.423667907714844, "learning_rate": 9.628174413033062e-05, "loss": 1.0799, "step": 3467 }, { "epoch": 0.23497526932719018, "grad_norm": 6.426609516143799, "learning_rate": 9.62803751112328e-05, "loss": 0.9417, "step": 3468 }, { "epoch": 0.23504302459516227, "grad_norm": 10.882522583007812, "learning_rate": 9.627900609213499e-05, "loss": 1.1498, "step": 3469 }, { "epoch": 0.23511077986313436, "grad_norm": 7.768298625946045, "learning_rate": 9.627763707303717e-05, "loss": 0.7389, "step": 3470 }, { "epoch": 0.23517853513110645, "grad_norm": 9.002137184143066, "learning_rate": 9.627626805393935e-05, "loss": 1.2338, "step": 3471 }, { "epoch": 0.23524629039907852, "grad_norm": 8.671159744262695, "learning_rate": 9.627489903484153e-05, "loss": 1.1114, "step": 3472 }, { "epoch": 0.2353140456670506, "grad_norm": 5.836034774780273, "learning_rate": 9.627353001574373e-05, "loss": 0.7075, "step": 3473 }, { "epoch": 0.2353818009350227, "grad_norm": 8.145447731018066, "learning_rate": 9.627216099664591e-05, "loss": 0.9504, "step": 3474 }, { "epoch": 0.2354495562029948, "grad_norm": 7.3333563804626465, "learning_rate": 9.627079197754809e-05, "loss": 0.8443, "step": 3475 }, { "epoch": 0.23551731147096686, "grad_norm": 8.191438674926758, "learning_rate": 9.626942295845027e-05, "loss": 0.8163, "step": 3476 }, { "epoch": 0.23558506673893895, "grad_norm": 10.042975425720215, "learning_rate": 9.626805393935245e-05, "loss": 1.0715, "step": 3477 }, { "epoch": 0.23565282200691104, "grad_norm": 7.854464054107666, "learning_rate": 9.626668492025464e-05, "loss": 0.9263, "step": 3478 }, { "epoch": 0.23572057727488313, "grad_norm": 9.286520004272461, "learning_rate": 9.626531590115682e-05, "loss": 1.0172, "step": 3479 }, { "epoch": 0.2357883325428552, "grad_norm": 7.321778297424316, "learning_rate": 9.6263946882059e-05, "loss": 1.0661, "step": 3480 }, { "epoch": 0.2358560878108273, "grad_norm": 7.432394981384277, "learning_rate": 9.626257786296118e-05, "loss": 1.1682, "step": 3481 }, { "epoch": 0.23592384307879938, "grad_norm": 10.230997085571289, "learning_rate": 9.626120884386338e-05, "loss": 1.0498, "step": 3482 }, { "epoch": 0.23599159834677147, "grad_norm": 8.530696868896484, "learning_rate": 9.625983982476556e-05, "loss": 1.0598, "step": 3483 }, { "epoch": 0.23605935361474353, "grad_norm": 7.35584020614624, "learning_rate": 9.625847080566774e-05, "loss": 1.0234, "step": 3484 }, { "epoch": 0.23612710888271563, "grad_norm": 8.030210494995117, "learning_rate": 9.625710178656992e-05, "loss": 1.0241, "step": 3485 }, { "epoch": 0.23619486415068772, "grad_norm": 7.508440971374512, "learning_rate": 9.625573276747211e-05, "loss": 0.9466, "step": 3486 }, { "epoch": 0.2362626194186598, "grad_norm": 7.4618401527404785, "learning_rate": 9.62543637483743e-05, "loss": 0.88, "step": 3487 }, { "epoch": 0.23633037468663187, "grad_norm": 7.738553524017334, "learning_rate": 9.625299472927647e-05, "loss": 1.0146, "step": 3488 }, { "epoch": 0.23639812995460396, "grad_norm": 6.782055854797363, "learning_rate": 9.625162571017867e-05, "loss": 0.8196, "step": 3489 }, { "epoch": 0.23646588522257606, "grad_norm": 7.495883464813232, "learning_rate": 9.625025669108085e-05, "loss": 1.0025, "step": 3490 }, { "epoch": 0.23653364049054815, "grad_norm": 7.224970817565918, "learning_rate": 9.624888767198303e-05, "loss": 1.0069, "step": 3491 }, { "epoch": 0.23660139575852024, "grad_norm": 8.686829566955566, "learning_rate": 9.624751865288522e-05, "loss": 0.8927, "step": 3492 }, { "epoch": 0.2366691510264923, "grad_norm": 10.551370620727539, "learning_rate": 9.62461496337874e-05, "loss": 1.0973, "step": 3493 }, { "epoch": 0.2367369062944644, "grad_norm": 8.38442611694336, "learning_rate": 9.624478061468958e-05, "loss": 0.8145, "step": 3494 }, { "epoch": 0.23680466156243649, "grad_norm": 7.099575996398926, "learning_rate": 9.624341159559176e-05, "loss": 0.8304, "step": 3495 }, { "epoch": 0.23687241683040858, "grad_norm": 7.905203342437744, "learning_rate": 9.624204257649396e-05, "loss": 1.1708, "step": 3496 }, { "epoch": 0.23694017209838064, "grad_norm": 8.08218765258789, "learning_rate": 9.624067355739614e-05, "loss": 0.8367, "step": 3497 }, { "epoch": 0.23700792736635273, "grad_norm": 7.766988277435303, "learning_rate": 9.623930453829832e-05, "loss": 1.1126, "step": 3498 }, { "epoch": 0.23707568263432482, "grad_norm": 8.3229398727417, "learning_rate": 9.62379355192005e-05, "loss": 0.9798, "step": 3499 }, { "epoch": 0.23714343790229692, "grad_norm": 7.113401889801025, "learning_rate": 9.623656650010268e-05, "loss": 0.7711, "step": 3500 }, { "epoch": 0.23721119317026898, "grad_norm": 7.286252975463867, "learning_rate": 9.623519748100487e-05, "loss": 0.8018, "step": 3501 }, { "epoch": 0.23727894843824107, "grad_norm": 8.724101066589355, "learning_rate": 9.623382846190705e-05, "loss": 0.9077, "step": 3502 }, { "epoch": 0.23734670370621316, "grad_norm": 9.201606750488281, "learning_rate": 9.623245944280923e-05, "loss": 1.2277, "step": 3503 }, { "epoch": 0.23741445897418526, "grad_norm": 6.827572822570801, "learning_rate": 9.623109042371141e-05, "loss": 0.9262, "step": 3504 }, { "epoch": 0.23748221424215732, "grad_norm": 7.41288423538208, "learning_rate": 9.622972140461361e-05, "loss": 0.762, "step": 3505 }, { "epoch": 0.2375499695101294, "grad_norm": 9.649503707885742, "learning_rate": 9.622835238551579e-05, "loss": 1.0371, "step": 3506 }, { "epoch": 0.2376177247781015, "grad_norm": 9.630754470825195, "learning_rate": 9.622698336641797e-05, "loss": 0.9994, "step": 3507 }, { "epoch": 0.2376854800460736, "grad_norm": 8.949817657470703, "learning_rate": 9.622561434732015e-05, "loss": 0.8555, "step": 3508 }, { "epoch": 0.23775323531404566, "grad_norm": 8.096979141235352, "learning_rate": 9.622424532822233e-05, "loss": 0.9939, "step": 3509 }, { "epoch": 0.23782099058201775, "grad_norm": 8.714512825012207, "learning_rate": 9.622287630912452e-05, "loss": 0.9533, "step": 3510 }, { "epoch": 0.23788874584998984, "grad_norm": 8.663968086242676, "learning_rate": 9.62215072900267e-05, "loss": 0.9974, "step": 3511 }, { "epoch": 0.23795650111796193, "grad_norm": 8.704265594482422, "learning_rate": 9.622013827092888e-05, "loss": 1.2524, "step": 3512 }, { "epoch": 0.238024256385934, "grad_norm": 6.585339069366455, "learning_rate": 9.621876925183106e-05, "loss": 0.9021, "step": 3513 }, { "epoch": 0.2380920116539061, "grad_norm": 7.992496013641357, "learning_rate": 9.621740023273324e-05, "loss": 0.9345, "step": 3514 }, { "epoch": 0.23815976692187818, "grad_norm": 7.076025009155273, "learning_rate": 9.621603121363544e-05, "loss": 1.0487, "step": 3515 }, { "epoch": 0.23822752218985027, "grad_norm": 8.833822250366211, "learning_rate": 9.621466219453762e-05, "loss": 0.9612, "step": 3516 }, { "epoch": 0.23829527745782234, "grad_norm": 7.8553385734558105, "learning_rate": 9.62132931754398e-05, "loss": 1.0452, "step": 3517 }, { "epoch": 0.23836303272579443, "grad_norm": 8.44243049621582, "learning_rate": 9.621192415634198e-05, "loss": 0.6874, "step": 3518 }, { "epoch": 0.23843078799376652, "grad_norm": 8.1638822555542, "learning_rate": 9.621055513724417e-05, "loss": 0.9239, "step": 3519 }, { "epoch": 0.2384985432617386, "grad_norm": 8.315443992614746, "learning_rate": 9.620918611814635e-05, "loss": 1.1256, "step": 3520 }, { "epoch": 0.23856629852971067, "grad_norm": 10.451863288879395, "learning_rate": 9.620781709904853e-05, "loss": 1.1453, "step": 3521 }, { "epoch": 0.23863405379768277, "grad_norm": 9.117147445678711, "learning_rate": 9.620644807995071e-05, "loss": 1.0751, "step": 3522 }, { "epoch": 0.23870180906565486, "grad_norm": 7.746578216552734, "learning_rate": 9.62050790608529e-05, "loss": 0.867, "step": 3523 }, { "epoch": 0.23876956433362695, "grad_norm": 8.366421699523926, "learning_rate": 9.620371004175509e-05, "loss": 1.0863, "step": 3524 }, { "epoch": 0.238837319601599, "grad_norm": 9.951937675476074, "learning_rate": 9.620234102265727e-05, "loss": 1.0431, "step": 3525 }, { "epoch": 0.2389050748695711, "grad_norm": 9.077424049377441, "learning_rate": 9.620097200355945e-05, "loss": 1.0433, "step": 3526 }, { "epoch": 0.2389728301375432, "grad_norm": 6.919139385223389, "learning_rate": 9.619960298446163e-05, "loss": 0.8655, "step": 3527 }, { "epoch": 0.2390405854055153, "grad_norm": 7.729475975036621, "learning_rate": 9.619823396536382e-05, "loss": 1.0943, "step": 3528 }, { "epoch": 0.23910834067348735, "grad_norm": 9.782391548156738, "learning_rate": 9.6196864946266e-05, "loss": 1.1362, "step": 3529 }, { "epoch": 0.23917609594145944, "grad_norm": 8.685064315795898, "learning_rate": 9.619549592716818e-05, "loss": 0.9885, "step": 3530 }, { "epoch": 0.23924385120943154, "grad_norm": 6.963294982910156, "learning_rate": 9.619412690807036e-05, "loss": 0.9061, "step": 3531 }, { "epoch": 0.23931160647740363, "grad_norm": 8.584861755371094, "learning_rate": 9.619275788897256e-05, "loss": 0.8527, "step": 3532 }, { "epoch": 0.2393793617453757, "grad_norm": 9.253739356994629, "learning_rate": 9.619138886987474e-05, "loss": 1.0643, "step": 3533 }, { "epoch": 0.23944711701334778, "grad_norm": 7.659543037414551, "learning_rate": 9.619001985077692e-05, "loss": 0.9553, "step": 3534 }, { "epoch": 0.23951487228131987, "grad_norm": 8.769670486450195, "learning_rate": 9.618865083167911e-05, "loss": 1.0576, "step": 3535 }, { "epoch": 0.23958262754929197, "grad_norm": 7.416141033172607, "learning_rate": 9.618728181258129e-05, "loss": 1.0064, "step": 3536 }, { "epoch": 0.23965038281726403, "grad_norm": 7.91802978515625, "learning_rate": 9.618591279348347e-05, "loss": 1.0095, "step": 3537 }, { "epoch": 0.23971813808523612, "grad_norm": 6.665622234344482, "learning_rate": 9.618454377438567e-05, "loss": 0.8099, "step": 3538 }, { "epoch": 0.2397858933532082, "grad_norm": 7.3240203857421875, "learning_rate": 9.618317475528785e-05, "loss": 0.9496, "step": 3539 }, { "epoch": 0.2398536486211803, "grad_norm": 7.268299102783203, "learning_rate": 9.618180573619003e-05, "loss": 1.0504, "step": 3540 }, { "epoch": 0.23992140388915237, "grad_norm": 8.710535049438477, "learning_rate": 9.618043671709221e-05, "loss": 0.9228, "step": 3541 }, { "epoch": 0.23998915915712446, "grad_norm": 8.5223970413208, "learning_rate": 9.61790676979944e-05, "loss": 1.0383, "step": 3542 }, { "epoch": 0.24005691442509655, "grad_norm": 7.635293960571289, "learning_rate": 9.617769867889658e-05, "loss": 0.99, "step": 3543 }, { "epoch": 0.24012466969306864, "grad_norm": 9.41180419921875, "learning_rate": 9.617632965979876e-05, "loss": 1.1214, "step": 3544 }, { "epoch": 0.24019242496104073, "grad_norm": 9.31615924835205, "learning_rate": 9.617496064070094e-05, "loss": 0.9369, "step": 3545 }, { "epoch": 0.2402601802290128, "grad_norm": 9.427112579345703, "learning_rate": 9.617359162160312e-05, "loss": 1.1829, "step": 3546 }, { "epoch": 0.2403279354969849, "grad_norm": 7.825446605682373, "learning_rate": 9.617222260250532e-05, "loss": 0.9459, "step": 3547 }, { "epoch": 0.24039569076495698, "grad_norm": 7.8191633224487305, "learning_rate": 9.61708535834075e-05, "loss": 1.1144, "step": 3548 }, { "epoch": 0.24046344603292907, "grad_norm": 8.224778175354004, "learning_rate": 9.616948456430968e-05, "loss": 0.8544, "step": 3549 }, { "epoch": 0.24053120130090114, "grad_norm": 9.618694305419922, "learning_rate": 9.616811554521186e-05, "loss": 1.2254, "step": 3550 }, { "epoch": 0.24059895656887323, "grad_norm": 7.786314964294434, "learning_rate": 9.616674652611405e-05, "loss": 0.8867, "step": 3551 }, { "epoch": 0.24066671183684532, "grad_norm": 9.397835731506348, "learning_rate": 9.616537750701623e-05, "loss": 1.0505, "step": 3552 }, { "epoch": 0.2407344671048174, "grad_norm": 7.701049327850342, "learning_rate": 9.616400848791841e-05, "loss": 0.897, "step": 3553 }, { "epoch": 0.24080222237278948, "grad_norm": 7.573019504547119, "learning_rate": 9.616263946882059e-05, "loss": 0.9972, "step": 3554 }, { "epoch": 0.24086997764076157, "grad_norm": 8.337100982666016, "learning_rate": 9.616127044972277e-05, "loss": 1.0835, "step": 3555 }, { "epoch": 0.24093773290873366, "grad_norm": 8.702056884765625, "learning_rate": 9.615990143062497e-05, "loss": 0.7688, "step": 3556 }, { "epoch": 0.24100548817670575, "grad_norm": 8.482346534729004, "learning_rate": 9.615853241152715e-05, "loss": 1.1972, "step": 3557 }, { "epoch": 0.24107324344467781, "grad_norm": 7.193674087524414, "learning_rate": 9.615716339242933e-05, "loss": 1.0389, "step": 3558 }, { "epoch": 0.2411409987126499, "grad_norm": 8.803317070007324, "learning_rate": 9.615579437333151e-05, "loss": 1.0429, "step": 3559 }, { "epoch": 0.241208753980622, "grad_norm": 9.055732727050781, "learning_rate": 9.61544253542337e-05, "loss": 1.1161, "step": 3560 }, { "epoch": 0.2412765092485941, "grad_norm": 10.80001449584961, "learning_rate": 9.615305633513588e-05, "loss": 0.9628, "step": 3561 }, { "epoch": 0.24134426451656615, "grad_norm": 7.707313060760498, "learning_rate": 9.615168731603806e-05, "loss": 0.8806, "step": 3562 }, { "epoch": 0.24141201978453825, "grad_norm": 8.823626518249512, "learning_rate": 9.615031829694024e-05, "loss": 0.8771, "step": 3563 }, { "epoch": 0.24147977505251034, "grad_norm": 7.984725475311279, "learning_rate": 9.614894927784242e-05, "loss": 0.9456, "step": 3564 }, { "epoch": 0.24154753032048243, "grad_norm": 7.335816860198975, "learning_rate": 9.614758025874462e-05, "loss": 1.0323, "step": 3565 }, { "epoch": 0.2416152855884545, "grad_norm": 5.959085941314697, "learning_rate": 9.61462112396468e-05, "loss": 0.8049, "step": 3566 }, { "epoch": 0.24168304085642658, "grad_norm": 8.775632858276367, "learning_rate": 9.614484222054898e-05, "loss": 0.8875, "step": 3567 }, { "epoch": 0.24175079612439868, "grad_norm": 6.589362621307373, "learning_rate": 9.614347320145116e-05, "loss": 1.0551, "step": 3568 }, { "epoch": 0.24181855139237077, "grad_norm": 7.49434232711792, "learning_rate": 9.614210418235334e-05, "loss": 0.7662, "step": 3569 }, { "epoch": 0.24188630666034283, "grad_norm": 7.759862899780273, "learning_rate": 9.614073516325553e-05, "loss": 0.7364, "step": 3570 }, { "epoch": 0.24195406192831492, "grad_norm": 7.9510273933410645, "learning_rate": 9.613936614415771e-05, "loss": 0.9277, "step": 3571 }, { "epoch": 0.24202181719628701, "grad_norm": 8.308568954467773, "learning_rate": 9.61379971250599e-05, "loss": 1.1689, "step": 3572 }, { "epoch": 0.2420895724642591, "grad_norm": 9.44938850402832, "learning_rate": 9.613662810596207e-05, "loss": 1.0937, "step": 3573 }, { "epoch": 0.24215732773223117, "grad_norm": 11.66707706451416, "learning_rate": 9.613525908686427e-05, "loss": 0.77, "step": 3574 }, { "epoch": 0.24222508300020326, "grad_norm": 9.25683879852295, "learning_rate": 9.613389006776645e-05, "loss": 0.9661, "step": 3575 }, { "epoch": 0.24229283826817535, "grad_norm": 7.289797782897949, "learning_rate": 9.613252104866863e-05, "loss": 0.8597, "step": 3576 }, { "epoch": 0.24236059353614745, "grad_norm": 6.669293403625488, "learning_rate": 9.613115202957081e-05, "loss": 0.9134, "step": 3577 }, { "epoch": 0.2424283488041195, "grad_norm": 8.785436630249023, "learning_rate": 9.612978301047299e-05, "loss": 0.7546, "step": 3578 }, { "epoch": 0.2424961040720916, "grad_norm": 7.386310577392578, "learning_rate": 9.612841399137518e-05, "loss": 1.0635, "step": 3579 }, { "epoch": 0.2425638593400637, "grad_norm": 8.23388957977295, "learning_rate": 9.612704497227736e-05, "loss": 0.8598, "step": 3580 }, { "epoch": 0.24263161460803578, "grad_norm": 8.498323440551758, "learning_rate": 9.612567595317954e-05, "loss": 1.083, "step": 3581 }, { "epoch": 0.24269936987600785, "grad_norm": 7.849715709686279, "learning_rate": 9.612430693408174e-05, "loss": 0.7673, "step": 3582 }, { "epoch": 0.24276712514397994, "grad_norm": 8.113242149353027, "learning_rate": 9.612293791498392e-05, "loss": 1.1003, "step": 3583 }, { "epoch": 0.24283488041195203, "grad_norm": 6.983048915863037, "learning_rate": 9.61215688958861e-05, "loss": 0.8668, "step": 3584 }, { "epoch": 0.24290263567992412, "grad_norm": 8.389126777648926, "learning_rate": 9.612019987678829e-05, "loss": 1.0008, "step": 3585 }, { "epoch": 0.2429703909478962, "grad_norm": 7.593414783477783, "learning_rate": 9.611883085769047e-05, "loss": 0.9829, "step": 3586 }, { "epoch": 0.24303814621586828, "grad_norm": 7.836172103881836, "learning_rate": 9.611746183859265e-05, "loss": 0.8128, "step": 3587 }, { "epoch": 0.24310590148384037, "grad_norm": 8.981040954589844, "learning_rate": 9.611609281949485e-05, "loss": 1.1249, "step": 3588 }, { "epoch": 0.24317365675181246, "grad_norm": 6.724935054779053, "learning_rate": 9.611472380039703e-05, "loss": 0.7699, "step": 3589 }, { "epoch": 0.24324141201978453, "grad_norm": 9.456436157226562, "learning_rate": 9.611335478129921e-05, "loss": 1.0085, "step": 3590 }, { "epoch": 0.24330916728775662, "grad_norm": 11.044548988342285, "learning_rate": 9.611198576220139e-05, "loss": 1.0671, "step": 3591 }, { "epoch": 0.2433769225557287, "grad_norm": 10.590521812438965, "learning_rate": 9.611061674310358e-05, "loss": 1.0051, "step": 3592 }, { "epoch": 0.2434446778237008, "grad_norm": 8.595380783081055, "learning_rate": 9.610924772400576e-05, "loss": 0.9655, "step": 3593 }, { "epoch": 0.24351243309167286, "grad_norm": 7.794788837432861, "learning_rate": 9.610787870490794e-05, "loss": 0.9551, "step": 3594 }, { "epoch": 0.24358018835964496, "grad_norm": 7.609074115753174, "learning_rate": 9.610650968581012e-05, "loss": 0.9856, "step": 3595 }, { "epoch": 0.24364794362761705, "grad_norm": 6.909607410430908, "learning_rate": 9.61051406667123e-05, "loss": 1.0195, "step": 3596 }, { "epoch": 0.24371569889558914, "grad_norm": 7.958381652832031, "learning_rate": 9.61037716476145e-05, "loss": 0.8263, "step": 3597 }, { "epoch": 0.24378345416356123, "grad_norm": 7.38173246383667, "learning_rate": 9.610240262851668e-05, "loss": 0.8551, "step": 3598 }, { "epoch": 0.2438512094315333, "grad_norm": 7.1448822021484375, "learning_rate": 9.610103360941886e-05, "loss": 0.8189, "step": 3599 }, { "epoch": 0.2439189646995054, "grad_norm": 7.44658088684082, "learning_rate": 9.609966459032104e-05, "loss": 0.8056, "step": 3600 }, { "epoch": 0.24398671996747748, "grad_norm": 7.480542182922363, "learning_rate": 9.609829557122322e-05, "loss": 0.8591, "step": 3601 }, { "epoch": 0.24405447523544957, "grad_norm": 9.895995140075684, "learning_rate": 9.609692655212541e-05, "loss": 1.1256, "step": 3602 }, { "epoch": 0.24412223050342163, "grad_norm": 7.009078025817871, "learning_rate": 9.609555753302759e-05, "loss": 0.9045, "step": 3603 }, { "epoch": 0.24418998577139372, "grad_norm": 8.714953422546387, "learning_rate": 9.609418851392977e-05, "loss": 1.1207, "step": 3604 }, { "epoch": 0.24425774103936582, "grad_norm": 7.239734172821045, "learning_rate": 9.609281949483195e-05, "loss": 0.7811, "step": 3605 }, { "epoch": 0.2443254963073379, "grad_norm": 10.486507415771484, "learning_rate": 9.609145047573415e-05, "loss": 1.2213, "step": 3606 }, { "epoch": 0.24439325157530997, "grad_norm": 12.031790733337402, "learning_rate": 9.609008145663633e-05, "loss": 1.0316, "step": 3607 }, { "epoch": 0.24446100684328206, "grad_norm": 7.607183456420898, "learning_rate": 9.608871243753851e-05, "loss": 0.7754, "step": 3608 }, { "epoch": 0.24452876211125416, "grad_norm": 9.313577651977539, "learning_rate": 9.608734341844069e-05, "loss": 0.9907, "step": 3609 }, { "epoch": 0.24459651737922625, "grad_norm": 7.672274589538574, "learning_rate": 9.608597439934287e-05, "loss": 0.8428, "step": 3610 }, { "epoch": 0.2446642726471983, "grad_norm": 8.259462356567383, "learning_rate": 9.608460538024506e-05, "loss": 0.8863, "step": 3611 }, { "epoch": 0.2447320279151704, "grad_norm": 8.50256061553955, "learning_rate": 9.608323636114724e-05, "loss": 0.7771, "step": 3612 }, { "epoch": 0.2447997831831425, "grad_norm": 6.8893818855285645, "learning_rate": 9.608186734204942e-05, "loss": 0.9791, "step": 3613 }, { "epoch": 0.24486753845111459, "grad_norm": 8.954825401306152, "learning_rate": 9.60804983229516e-05, "loss": 1.0712, "step": 3614 }, { "epoch": 0.24493529371908665, "grad_norm": 8.83995532989502, "learning_rate": 9.60791293038538e-05, "loss": 1.1982, "step": 3615 }, { "epoch": 0.24500304898705874, "grad_norm": 8.96689224243164, "learning_rate": 9.607776028475598e-05, "loss": 0.9816, "step": 3616 }, { "epoch": 0.24507080425503083, "grad_norm": 15.17086124420166, "learning_rate": 9.607639126565816e-05, "loss": 1.1416, "step": 3617 }, { "epoch": 0.24513855952300292, "grad_norm": 9.527314186096191, "learning_rate": 9.607502224656034e-05, "loss": 1.1699, "step": 3618 }, { "epoch": 0.245206314790975, "grad_norm": 7.378002166748047, "learning_rate": 9.607365322746252e-05, "loss": 1.1303, "step": 3619 }, { "epoch": 0.24527407005894708, "grad_norm": 7.204291343688965, "learning_rate": 9.607228420836471e-05, "loss": 0.8078, "step": 3620 }, { "epoch": 0.24534182532691917, "grad_norm": 8.181205749511719, "learning_rate": 9.607091518926689e-05, "loss": 0.9221, "step": 3621 }, { "epoch": 0.24540958059489126, "grad_norm": 8.479545593261719, "learning_rate": 9.606954617016907e-05, "loss": 0.8133, "step": 3622 }, { "epoch": 0.24547733586286333, "grad_norm": 7.591360092163086, "learning_rate": 9.606817715107125e-05, "loss": 1.0632, "step": 3623 }, { "epoch": 0.24554509113083542, "grad_norm": 8.558969497680664, "learning_rate": 9.606680813197343e-05, "loss": 1.0755, "step": 3624 }, { "epoch": 0.2456128463988075, "grad_norm": 8.02037525177002, "learning_rate": 9.606543911287563e-05, "loss": 1.0164, "step": 3625 }, { "epoch": 0.2456806016667796, "grad_norm": 7.49207878112793, "learning_rate": 9.606407009377781e-05, "loss": 1.1325, "step": 3626 }, { "epoch": 0.24574835693475167, "grad_norm": 7.376079559326172, "learning_rate": 9.606270107467999e-05, "loss": 0.7917, "step": 3627 }, { "epoch": 0.24581611220272376, "grad_norm": 7.331247329711914, "learning_rate": 9.606133205558218e-05, "loss": 1.2186, "step": 3628 }, { "epoch": 0.24588386747069585, "grad_norm": 7.393257141113281, "learning_rate": 9.605996303648436e-05, "loss": 0.7155, "step": 3629 }, { "epoch": 0.24595162273866794, "grad_norm": 9.15472412109375, "learning_rate": 9.605859401738654e-05, "loss": 0.9562, "step": 3630 }, { "epoch": 0.24601937800664, "grad_norm": 6.846646785736084, "learning_rate": 9.605722499828874e-05, "loss": 1.109, "step": 3631 }, { "epoch": 0.2460871332746121, "grad_norm": 8.57854175567627, "learning_rate": 9.605585597919092e-05, "loss": 0.8706, "step": 3632 }, { "epoch": 0.2461548885425842, "grad_norm": 7.089768886566162, "learning_rate": 9.60544869600931e-05, "loss": 0.8776, "step": 3633 }, { "epoch": 0.24622264381055628, "grad_norm": 7.549044132232666, "learning_rate": 9.605311794099529e-05, "loss": 0.9956, "step": 3634 }, { "epoch": 0.24629039907852834, "grad_norm": 6.839412689208984, "learning_rate": 9.605174892189747e-05, "loss": 0.8722, "step": 3635 }, { "epoch": 0.24635815434650044, "grad_norm": 7.378058910369873, "learning_rate": 9.605037990279965e-05, "loss": 0.8663, "step": 3636 }, { "epoch": 0.24642590961447253, "grad_norm": 7.893070220947266, "learning_rate": 9.604901088370183e-05, "loss": 0.9213, "step": 3637 }, { "epoch": 0.24649366488244462, "grad_norm": 7.3345232009887695, "learning_rate": 9.604764186460403e-05, "loss": 0.9827, "step": 3638 }, { "epoch": 0.24656142015041668, "grad_norm": 6.120781421661377, "learning_rate": 9.60462728455062e-05, "loss": 0.6371, "step": 3639 }, { "epoch": 0.24662917541838877, "grad_norm": 8.695615768432617, "learning_rate": 9.604490382640839e-05, "loss": 0.9769, "step": 3640 }, { "epoch": 0.24669693068636087, "grad_norm": 8.469325065612793, "learning_rate": 9.604353480731057e-05, "loss": 0.9267, "step": 3641 }, { "epoch": 0.24676468595433296, "grad_norm": 9.191173553466797, "learning_rate": 9.604216578821275e-05, "loss": 1.1818, "step": 3642 }, { "epoch": 0.24683244122230502, "grad_norm": 9.337483406066895, "learning_rate": 9.604079676911494e-05, "loss": 0.9904, "step": 3643 }, { "epoch": 0.2469001964902771, "grad_norm": 7.597773551940918, "learning_rate": 9.603942775001712e-05, "loss": 0.8928, "step": 3644 }, { "epoch": 0.2469679517582492, "grad_norm": 8.155903816223145, "learning_rate": 9.60380587309193e-05, "loss": 0.9511, "step": 3645 }, { "epoch": 0.2470357070262213, "grad_norm": 7.695154666900635, "learning_rate": 9.603668971182148e-05, "loss": 1.0724, "step": 3646 }, { "epoch": 0.24710346229419336, "grad_norm": 7.492908000946045, "learning_rate": 9.603532069272366e-05, "loss": 0.9133, "step": 3647 }, { "epoch": 0.24717121756216545, "grad_norm": 8.190613746643066, "learning_rate": 9.603395167362586e-05, "loss": 0.8972, "step": 3648 }, { "epoch": 0.24723897283013754, "grad_norm": 7.9199347496032715, "learning_rate": 9.603258265452804e-05, "loss": 1.0501, "step": 3649 }, { "epoch": 0.24730672809810963, "grad_norm": 8.386896133422852, "learning_rate": 9.603121363543022e-05, "loss": 0.998, "step": 3650 }, { "epoch": 0.24737448336608173, "grad_norm": 6.536781311035156, "learning_rate": 9.60298446163324e-05, "loss": 0.8379, "step": 3651 }, { "epoch": 0.2474422386340538, "grad_norm": 6.918766021728516, "learning_rate": 9.602847559723459e-05, "loss": 0.9397, "step": 3652 }, { "epoch": 0.24750999390202588, "grad_norm": 7.00775146484375, "learning_rate": 9.602710657813677e-05, "loss": 1.0079, "step": 3653 }, { "epoch": 0.24757774916999797, "grad_norm": 7.693192958831787, "learning_rate": 9.602573755903895e-05, "loss": 1.0657, "step": 3654 }, { "epoch": 0.24764550443797007, "grad_norm": 10.667771339416504, "learning_rate": 9.602436853994113e-05, "loss": 1.0657, "step": 3655 }, { "epoch": 0.24771325970594213, "grad_norm": 8.622758865356445, "learning_rate": 9.602299952084331e-05, "loss": 0.9792, "step": 3656 }, { "epoch": 0.24778101497391422, "grad_norm": 8.834444046020508, "learning_rate": 9.602163050174551e-05, "loss": 1.1014, "step": 3657 }, { "epoch": 0.2478487702418863, "grad_norm": 7.717538356781006, "learning_rate": 9.602026148264769e-05, "loss": 0.8781, "step": 3658 }, { "epoch": 0.2479165255098584, "grad_norm": 7.797954559326172, "learning_rate": 9.601889246354987e-05, "loss": 0.9772, "step": 3659 }, { "epoch": 0.24798428077783047, "grad_norm": 7.376112937927246, "learning_rate": 9.601752344445205e-05, "loss": 1.0468, "step": 3660 }, { "epoch": 0.24805203604580256, "grad_norm": 7.7266387939453125, "learning_rate": 9.601615442535424e-05, "loss": 1.007, "step": 3661 }, { "epoch": 0.24811979131377465, "grad_norm": 9.196928977966309, "learning_rate": 9.601478540625642e-05, "loss": 1.1327, "step": 3662 }, { "epoch": 0.24818754658174674, "grad_norm": 7.892288684844971, "learning_rate": 9.60134163871586e-05, "loss": 0.7511, "step": 3663 }, { "epoch": 0.2482553018497188, "grad_norm": 8.854056358337402, "learning_rate": 9.601204736806078e-05, "loss": 0.9546, "step": 3664 }, { "epoch": 0.2483230571176909, "grad_norm": 7.985452651977539, "learning_rate": 9.601067834896296e-05, "loss": 1.2186, "step": 3665 }, { "epoch": 0.248390812385663, "grad_norm": 8.336162567138672, "learning_rate": 9.600930932986516e-05, "loss": 0.864, "step": 3666 }, { "epoch": 0.24845856765363508, "grad_norm": 10.857757568359375, "learning_rate": 9.600794031076734e-05, "loss": 1.1948, "step": 3667 }, { "epoch": 0.24852632292160715, "grad_norm": 8.168721199035645, "learning_rate": 9.600657129166952e-05, "loss": 1.1151, "step": 3668 }, { "epoch": 0.24859407818957924, "grad_norm": 7.509332656860352, "learning_rate": 9.60052022725717e-05, "loss": 0.9215, "step": 3669 }, { "epoch": 0.24866183345755133, "grad_norm": 10.4354829788208, "learning_rate": 9.600383325347389e-05, "loss": 1.0696, "step": 3670 }, { "epoch": 0.24872958872552342, "grad_norm": 6.930381774902344, "learning_rate": 9.600246423437607e-05, "loss": 0.9229, "step": 3671 }, { "epoch": 0.24879734399349548, "grad_norm": 6.608088970184326, "learning_rate": 9.600109521527825e-05, "loss": 0.928, "step": 3672 }, { "epoch": 0.24886509926146758, "grad_norm": 7.4217095375061035, "learning_rate": 9.599972619618043e-05, "loss": 0.9608, "step": 3673 }, { "epoch": 0.24893285452943967, "grad_norm": 7.46991491317749, "learning_rate": 9.599835717708263e-05, "loss": 0.8928, "step": 3674 }, { "epoch": 0.24900060979741176, "grad_norm": 8.76001262664795, "learning_rate": 9.599698815798481e-05, "loss": 1.191, "step": 3675 }, { "epoch": 0.24906836506538382, "grad_norm": 8.395779609680176, "learning_rate": 9.599561913888699e-05, "loss": 1.1479, "step": 3676 }, { "epoch": 0.24913612033335591, "grad_norm": 8.00460147857666, "learning_rate": 9.599425011978918e-05, "loss": 1.1005, "step": 3677 }, { "epoch": 0.249203875601328, "grad_norm": 7.817287445068359, "learning_rate": 9.599288110069136e-05, "loss": 1.1568, "step": 3678 }, { "epoch": 0.2492716308693001, "grad_norm": 9.60706615447998, "learning_rate": 9.599151208159354e-05, "loss": 0.9734, "step": 3679 }, { "epoch": 0.24933938613727216, "grad_norm": 8.347691535949707, "learning_rate": 9.599014306249574e-05, "loss": 1.2208, "step": 3680 }, { "epoch": 0.24940714140524425, "grad_norm": 8.106613159179688, "learning_rate": 9.598877404339792e-05, "loss": 0.9253, "step": 3681 }, { "epoch": 0.24947489667321635, "grad_norm": 8.099063873291016, "learning_rate": 9.59874050243001e-05, "loss": 1.0516, "step": 3682 }, { "epoch": 0.24954265194118844, "grad_norm": 7.839589595794678, "learning_rate": 9.598603600520228e-05, "loss": 0.9764, "step": 3683 }, { "epoch": 0.2496104072091605, "grad_norm": 8.679840087890625, "learning_rate": 9.598466698610447e-05, "loss": 1.0407, "step": 3684 }, { "epoch": 0.2496781624771326, "grad_norm": 6.854926109313965, "learning_rate": 9.598329796700665e-05, "loss": 0.9457, "step": 3685 }, { "epoch": 0.24974591774510468, "grad_norm": 9.133661270141602, "learning_rate": 9.598192894790883e-05, "loss": 1.0391, "step": 3686 }, { "epoch": 0.24981367301307678, "grad_norm": 7.744726657867432, "learning_rate": 9.598055992881101e-05, "loss": 1.0256, "step": 3687 }, { "epoch": 0.24988142828104884, "grad_norm": 8.9452543258667, "learning_rate": 9.597919090971319e-05, "loss": 0.9649, "step": 3688 }, { "epoch": 0.24994918354902093, "grad_norm": 8.396431922912598, "learning_rate": 9.597782189061539e-05, "loss": 0.8189, "step": 3689 }, { "epoch": 0.24994918354902093, "eval_loss": 0.9518795609474182, "eval_noise_accuracy": 0.0, "eval_runtime": 1533.2421, "eval_samples_per_second": 3.352, "eval_steps_per_second": 0.21, "eval_wer": 88.77438705594233, "step": 3689 }, { "epoch": 0.250016938816993, "grad_norm": 7.252607822418213, "learning_rate": 9.597645287151757e-05, "loss": 1.0734, "step": 3690 }, { "epoch": 0.2500846940849651, "grad_norm": 8.768789291381836, "learning_rate": 9.597508385241975e-05, "loss": 0.9763, "step": 3691 }, { "epoch": 0.2501524493529372, "grad_norm": 6.9475321769714355, "learning_rate": 9.597371483332193e-05, "loss": 0.8121, "step": 3692 }, { "epoch": 0.25022020462090927, "grad_norm": 7.9007248878479, "learning_rate": 9.597234581422412e-05, "loss": 0.9668, "step": 3693 }, { "epoch": 0.25028795988888136, "grad_norm": 6.524989604949951, "learning_rate": 9.59709767951263e-05, "loss": 0.9151, "step": 3694 }, { "epoch": 0.25035571515685345, "grad_norm": 7.83770751953125, "learning_rate": 9.596960777602848e-05, "loss": 0.8338, "step": 3695 }, { "epoch": 0.25042347042482554, "grad_norm": 8.619128227233887, "learning_rate": 9.596823875693066e-05, "loss": 1.0394, "step": 3696 }, { "epoch": 0.25049122569279764, "grad_norm": 7.238592624664307, "learning_rate": 9.596686973783284e-05, "loss": 0.8773, "step": 3697 }, { "epoch": 0.2505589809607697, "grad_norm": 10.257181167602539, "learning_rate": 9.596550071873504e-05, "loss": 1.0137, "step": 3698 }, { "epoch": 0.25062673622874176, "grad_norm": 9.302980422973633, "learning_rate": 9.596413169963722e-05, "loss": 0.8317, "step": 3699 }, { "epoch": 0.25069449149671386, "grad_norm": 7.485314846038818, "learning_rate": 9.59627626805394e-05, "loss": 0.9272, "step": 3700 }, { "epoch": 0.25076224676468595, "grad_norm": 7.870807647705078, "learning_rate": 9.596139366144158e-05, "loss": 1.009, "step": 3701 }, { "epoch": 0.25083000203265804, "grad_norm": 8.245805740356445, "learning_rate": 9.596002464234376e-05, "loss": 0.8616, "step": 3702 }, { "epoch": 0.25089775730063013, "grad_norm": 10.324470520019531, "learning_rate": 9.595865562324595e-05, "loss": 1.1567, "step": 3703 }, { "epoch": 0.2509655125686022, "grad_norm": 7.588774681091309, "learning_rate": 9.595728660414813e-05, "loss": 1.0025, "step": 3704 }, { "epoch": 0.2510332678365743, "grad_norm": 8.32935905456543, "learning_rate": 9.595591758505031e-05, "loss": 1.1826, "step": 3705 }, { "epoch": 0.2511010231045464, "grad_norm": 7.146906852722168, "learning_rate": 9.595454856595249e-05, "loss": 0.8108, "step": 3706 }, { "epoch": 0.25116877837251844, "grad_norm": 8.91852855682373, "learning_rate": 9.595317954685469e-05, "loss": 1.1377, "step": 3707 }, { "epoch": 0.25123653364049053, "grad_norm": 7.726437568664551, "learning_rate": 9.595181052775687e-05, "loss": 0.9971, "step": 3708 }, { "epoch": 0.2513042889084626, "grad_norm": 11.102527618408203, "learning_rate": 9.595044150865905e-05, "loss": 1.1075, "step": 3709 }, { "epoch": 0.2513720441764347, "grad_norm": 6.124303340911865, "learning_rate": 9.594907248956123e-05, "loss": 0.8523, "step": 3710 }, { "epoch": 0.2514397994444068, "grad_norm": 8.56926441192627, "learning_rate": 9.594770347046341e-05, "loss": 1.0801, "step": 3711 }, { "epoch": 0.2515075547123789, "grad_norm": 6.994394779205322, "learning_rate": 9.59463344513656e-05, "loss": 0.8559, "step": 3712 }, { "epoch": 0.251575309980351, "grad_norm": 7.428825378417969, "learning_rate": 9.594496543226778e-05, "loss": 0.8861, "step": 3713 }, { "epoch": 0.2516430652483231, "grad_norm": 8.7849760055542, "learning_rate": 9.594359641316996e-05, "loss": 0.9083, "step": 3714 }, { "epoch": 0.2517108205162951, "grad_norm": 7.608119964599609, "learning_rate": 9.594222739407214e-05, "loss": 0.8529, "step": 3715 }, { "epoch": 0.2517785757842672, "grad_norm": 7.076242446899414, "learning_rate": 9.594085837497434e-05, "loss": 0.8817, "step": 3716 }, { "epoch": 0.2518463310522393, "grad_norm": 8.78627872467041, "learning_rate": 9.593948935587652e-05, "loss": 1.1752, "step": 3717 }, { "epoch": 0.2519140863202114, "grad_norm": 8.589457511901855, "learning_rate": 9.59381203367787e-05, "loss": 0.9099, "step": 3718 }, { "epoch": 0.2519818415881835, "grad_norm": 8.67271900177002, "learning_rate": 9.593675131768088e-05, "loss": 1.0304, "step": 3719 }, { "epoch": 0.2520495968561556, "grad_norm": 9.180156707763672, "learning_rate": 9.593538229858307e-05, "loss": 1.0815, "step": 3720 }, { "epoch": 0.25211735212412767, "grad_norm": 7.973734378814697, "learning_rate": 9.593401327948525e-05, "loss": 1.0816, "step": 3721 }, { "epoch": 0.25218510739209976, "grad_norm": 7.272556781768799, "learning_rate": 9.593264426038743e-05, "loss": 0.9027, "step": 3722 }, { "epoch": 0.2522528626600718, "grad_norm": 8.033550262451172, "learning_rate": 9.593127524128963e-05, "loss": 1.0772, "step": 3723 }, { "epoch": 0.2523206179280439, "grad_norm": 7.861289978027344, "learning_rate": 9.59299062221918e-05, "loss": 0.8774, "step": 3724 }, { "epoch": 0.252388373196016, "grad_norm": 7.043121337890625, "learning_rate": 9.592853720309399e-05, "loss": 1.0165, "step": 3725 }, { "epoch": 0.25245612846398807, "grad_norm": 7.830938816070557, "learning_rate": 9.592716818399618e-05, "loss": 1.0379, "step": 3726 }, { "epoch": 0.25252388373196016, "grad_norm": 8.138580322265625, "learning_rate": 9.592579916489836e-05, "loss": 0.954, "step": 3727 }, { "epoch": 0.25259163899993226, "grad_norm": 6.810534477233887, "learning_rate": 9.592443014580054e-05, "loss": 0.7475, "step": 3728 }, { "epoch": 0.25265939426790435, "grad_norm": 6.66425895690918, "learning_rate": 9.592306112670272e-05, "loss": 0.8203, "step": 3729 }, { "epoch": 0.25272714953587644, "grad_norm": 7.962128162384033, "learning_rate": 9.592169210760492e-05, "loss": 0.9923, "step": 3730 }, { "epoch": 0.2527949048038485, "grad_norm": 8.372082710266113, "learning_rate": 9.59203230885071e-05, "loss": 0.809, "step": 3731 }, { "epoch": 0.25286266007182057, "grad_norm": 8.343878746032715, "learning_rate": 9.591895406940928e-05, "loss": 0.9373, "step": 3732 }, { "epoch": 0.25293041533979266, "grad_norm": 9.935523986816406, "learning_rate": 9.591758505031146e-05, "loss": 1.0037, "step": 3733 }, { "epoch": 0.25299817060776475, "grad_norm": 8.378336906433105, "learning_rate": 9.591621603121364e-05, "loss": 0.9834, "step": 3734 }, { "epoch": 0.25306592587573684, "grad_norm": 8.581600189208984, "learning_rate": 9.591484701211583e-05, "loss": 1.1192, "step": 3735 }, { "epoch": 0.25313368114370893, "grad_norm": 8.007279396057129, "learning_rate": 9.591347799301801e-05, "loss": 1.0261, "step": 3736 }, { "epoch": 0.253201436411681, "grad_norm": 7.40525484085083, "learning_rate": 9.591210897392019e-05, "loss": 0.9273, "step": 3737 }, { "epoch": 0.2532691916796531, "grad_norm": 7.228291988372803, "learning_rate": 9.591073995482237e-05, "loss": 1.0219, "step": 3738 }, { "epoch": 0.25333694694762515, "grad_norm": 6.714911460876465, "learning_rate": 9.590937093572457e-05, "loss": 0.9134, "step": 3739 }, { "epoch": 0.25340470221559724, "grad_norm": 6.150938510894775, "learning_rate": 9.590800191662675e-05, "loss": 0.8577, "step": 3740 }, { "epoch": 0.25347245748356934, "grad_norm": 10.693168640136719, "learning_rate": 9.590663289752893e-05, "loss": 1.2867, "step": 3741 }, { "epoch": 0.2535402127515414, "grad_norm": 8.114953994750977, "learning_rate": 9.590526387843111e-05, "loss": 1.0602, "step": 3742 }, { "epoch": 0.2536079680195135, "grad_norm": 8.472567558288574, "learning_rate": 9.590389485933329e-05, "loss": 0.9992, "step": 3743 }, { "epoch": 0.2536757232874856, "grad_norm": 8.681047439575195, "learning_rate": 9.590252584023548e-05, "loss": 1.003, "step": 3744 }, { "epoch": 0.2537434785554577, "grad_norm": 8.753557205200195, "learning_rate": 9.590115682113766e-05, "loss": 0.9447, "step": 3745 }, { "epoch": 0.2538112338234298, "grad_norm": 6.80125093460083, "learning_rate": 9.589978780203984e-05, "loss": 0.8522, "step": 3746 }, { "epoch": 0.25387898909140183, "grad_norm": 8.829830169677734, "learning_rate": 9.589841878294202e-05, "loss": 0.9131, "step": 3747 }, { "epoch": 0.2539467443593739, "grad_norm": 7.068274021148682, "learning_rate": 9.589704976384422e-05, "loss": 0.8348, "step": 3748 }, { "epoch": 0.254014499627346, "grad_norm": 8.19235897064209, "learning_rate": 9.58956807447464e-05, "loss": 0.8975, "step": 3749 }, { "epoch": 0.2540822548953181, "grad_norm": 8.1896333694458, "learning_rate": 9.589431172564858e-05, "loss": 1.1285, "step": 3750 }, { "epoch": 0.2541500101632902, "grad_norm": 7.581019878387451, "learning_rate": 9.589294270655076e-05, "loss": 0.8838, "step": 3751 }, { "epoch": 0.2542177654312623, "grad_norm": 6.806415557861328, "learning_rate": 9.589157368745294e-05, "loss": 0.9144, "step": 3752 }, { "epoch": 0.2542855206992344, "grad_norm": 6.077991485595703, "learning_rate": 9.589020466835513e-05, "loss": 0.7032, "step": 3753 }, { "epoch": 0.25435327596720647, "grad_norm": 9.278702735900879, "learning_rate": 9.588883564925731e-05, "loss": 1.1806, "step": 3754 }, { "epoch": 0.2544210312351785, "grad_norm": 7.136252403259277, "learning_rate": 9.588746663015949e-05, "loss": 0.9589, "step": 3755 }, { "epoch": 0.2544887865031506, "grad_norm": 6.349377632141113, "learning_rate": 9.588609761106167e-05, "loss": 0.9972, "step": 3756 }, { "epoch": 0.2545565417711227, "grad_norm": 10.120612144470215, "learning_rate": 9.588472859196385e-05, "loss": 1.124, "step": 3757 }, { "epoch": 0.2546242970390948, "grad_norm": 6.840261459350586, "learning_rate": 9.588335957286605e-05, "loss": 0.8897, "step": 3758 }, { "epoch": 0.2546920523070669, "grad_norm": 6.069836616516113, "learning_rate": 9.588199055376823e-05, "loss": 0.7794, "step": 3759 }, { "epoch": 0.25475980757503897, "grad_norm": 6.336123943328857, "learning_rate": 9.588062153467041e-05, "loss": 0.8936, "step": 3760 }, { "epoch": 0.25482756284301106, "grad_norm": 7.337663173675537, "learning_rate": 9.587925251557259e-05, "loss": 0.9241, "step": 3761 }, { "epoch": 0.25489531811098315, "grad_norm": 9.220332145690918, "learning_rate": 9.587788349647478e-05, "loss": 1.2426, "step": 3762 }, { "epoch": 0.25496307337895524, "grad_norm": 6.4654951095581055, "learning_rate": 9.587651447737696e-05, "loss": 0.7795, "step": 3763 }, { "epoch": 0.2550308286469273, "grad_norm": 8.458954811096191, "learning_rate": 9.587514545827914e-05, "loss": 1.0493, "step": 3764 }, { "epoch": 0.25509858391489937, "grad_norm": 6.73598575592041, "learning_rate": 9.587377643918132e-05, "loss": 0.9777, "step": 3765 }, { "epoch": 0.25516633918287146, "grad_norm": 8.673493385314941, "learning_rate": 9.587240742008352e-05, "loss": 1.1108, "step": 3766 }, { "epoch": 0.25523409445084355, "grad_norm": 7.328574180603027, "learning_rate": 9.58710384009857e-05, "loss": 0.9502, "step": 3767 }, { "epoch": 0.25530184971881564, "grad_norm": 7.9878692626953125, "learning_rate": 9.586966938188788e-05, "loss": 1.0191, "step": 3768 }, { "epoch": 0.25536960498678773, "grad_norm": 7.247650623321533, "learning_rate": 9.586830036279007e-05, "loss": 0.9356, "step": 3769 }, { "epoch": 0.2554373602547598, "grad_norm": 7.29271125793457, "learning_rate": 9.586693134369225e-05, "loss": 0.9877, "step": 3770 }, { "epoch": 0.2555051155227319, "grad_norm": 7.802029132843018, "learning_rate": 9.586556232459443e-05, "loss": 0.8923, "step": 3771 }, { "epoch": 0.25557287079070395, "grad_norm": 8.129645347595215, "learning_rate": 9.586419330549663e-05, "loss": 0.9451, "step": 3772 }, { "epoch": 0.25564062605867605, "grad_norm": 7.033285140991211, "learning_rate": 9.58628242863988e-05, "loss": 0.8529, "step": 3773 }, { "epoch": 0.25570838132664814, "grad_norm": 7.490065574645996, "learning_rate": 9.586145526730099e-05, "loss": 0.8912, "step": 3774 }, { "epoch": 0.25577613659462023, "grad_norm": 8.730104446411133, "learning_rate": 9.586008624820317e-05, "loss": 1.2685, "step": 3775 }, { "epoch": 0.2558438918625923, "grad_norm": 9.958065032958984, "learning_rate": 9.585871722910536e-05, "loss": 1.1792, "step": 3776 }, { "epoch": 0.2559116471305644, "grad_norm": 6.82180118560791, "learning_rate": 9.585734821000754e-05, "loss": 0.8734, "step": 3777 }, { "epoch": 0.2559794023985365, "grad_norm": 9.444950103759766, "learning_rate": 9.585597919090972e-05, "loss": 1.0368, "step": 3778 }, { "epoch": 0.2560471576665086, "grad_norm": 9.437919616699219, "learning_rate": 9.58546101718119e-05, "loss": 1.2728, "step": 3779 }, { "epoch": 0.25611491293448063, "grad_norm": 8.925026893615723, "learning_rate": 9.585324115271408e-05, "loss": 0.944, "step": 3780 }, { "epoch": 0.2561826682024527, "grad_norm": 8.421260833740234, "learning_rate": 9.585187213361628e-05, "loss": 0.9913, "step": 3781 }, { "epoch": 0.2562504234704248, "grad_norm": 8.5851469039917, "learning_rate": 9.585050311451846e-05, "loss": 0.9885, "step": 3782 }, { "epoch": 0.2563181787383969, "grad_norm": 7.179548263549805, "learning_rate": 9.584913409542064e-05, "loss": 1.0647, "step": 3783 }, { "epoch": 0.256385934006369, "grad_norm": 8.60708999633789, "learning_rate": 9.584776507632282e-05, "loss": 1.0475, "step": 3784 }, { "epoch": 0.2564536892743411, "grad_norm": 8.462443351745605, "learning_rate": 9.584639605722501e-05, "loss": 1.1381, "step": 3785 }, { "epoch": 0.2565214445423132, "grad_norm": 7.044154644012451, "learning_rate": 9.584502703812719e-05, "loss": 0.8223, "step": 3786 }, { "epoch": 0.2565891998102853, "grad_norm": 8.842753410339355, "learning_rate": 9.584365801902937e-05, "loss": 1.1205, "step": 3787 }, { "epoch": 0.2566569550782573, "grad_norm": 7.294439792633057, "learning_rate": 9.584228899993155e-05, "loss": 0.9563, "step": 3788 }, { "epoch": 0.2567247103462294, "grad_norm": 7.842654228210449, "learning_rate": 9.584091998083373e-05, "loss": 1.2798, "step": 3789 }, { "epoch": 0.2567924656142015, "grad_norm": 5.479234218597412, "learning_rate": 9.583955096173593e-05, "loss": 0.7835, "step": 3790 }, { "epoch": 0.2568602208821736, "grad_norm": 7.670284271240234, "learning_rate": 9.58381819426381e-05, "loss": 0.8263, "step": 3791 }, { "epoch": 0.2569279761501457, "grad_norm": 8.564105987548828, "learning_rate": 9.583681292354029e-05, "loss": 1.032, "step": 3792 }, { "epoch": 0.25699573141811777, "grad_norm": 8.706098556518555, "learning_rate": 9.583544390444247e-05, "loss": 1.0166, "step": 3793 }, { "epoch": 0.25706348668608986, "grad_norm": 6.727125644683838, "learning_rate": 9.583407488534466e-05, "loss": 0.756, "step": 3794 }, { "epoch": 0.25713124195406195, "grad_norm": 8.941418647766113, "learning_rate": 9.583270586624684e-05, "loss": 1.0223, "step": 3795 }, { "epoch": 0.257198997222034, "grad_norm": 7.35167932510376, "learning_rate": 9.583133684714902e-05, "loss": 0.9117, "step": 3796 }, { "epoch": 0.2572667524900061, "grad_norm": 9.245199203491211, "learning_rate": 9.58299678280512e-05, "loss": 1.0131, "step": 3797 }, { "epoch": 0.25733450775797817, "grad_norm": 10.583107948303223, "learning_rate": 9.582859880895338e-05, "loss": 1.0452, "step": 3798 }, { "epoch": 0.25740226302595026, "grad_norm": 7.672145843505859, "learning_rate": 9.582722978985558e-05, "loss": 0.9961, "step": 3799 }, { "epoch": 0.25747001829392235, "grad_norm": 6.5851640701293945, "learning_rate": 9.582586077075776e-05, "loss": 0.9217, "step": 3800 }, { "epoch": 0.25753777356189445, "grad_norm": 9.444985389709473, "learning_rate": 9.582449175165994e-05, "loss": 1.1704, "step": 3801 }, { "epoch": 0.25760552882986654, "grad_norm": 7.136216163635254, "learning_rate": 9.582312273256212e-05, "loss": 0.9597, "step": 3802 }, { "epoch": 0.25767328409783863, "grad_norm": 9.74889087677002, "learning_rate": 9.582175371346431e-05, "loss": 0.9866, "step": 3803 }, { "epoch": 0.25774103936581066, "grad_norm": 7.692512512207031, "learning_rate": 9.582038469436649e-05, "loss": 1.0412, "step": 3804 }, { "epoch": 0.25780879463378276, "grad_norm": 7.882124423980713, "learning_rate": 9.581901567526867e-05, "loss": 1.1129, "step": 3805 }, { "epoch": 0.25787654990175485, "grad_norm": 8.117066383361816, "learning_rate": 9.581764665617085e-05, "loss": 0.968, "step": 3806 }, { "epoch": 0.25794430516972694, "grad_norm": 8.883692741394043, "learning_rate": 9.581627763707303e-05, "loss": 1.0859, "step": 3807 }, { "epoch": 0.25801206043769903, "grad_norm": 7.985278129577637, "learning_rate": 9.581490861797523e-05, "loss": 0.9511, "step": 3808 }, { "epoch": 0.2580798157056711, "grad_norm": 10.487812995910645, "learning_rate": 9.58135395988774e-05, "loss": 1.2395, "step": 3809 }, { "epoch": 0.2581475709736432, "grad_norm": 7.5507707595825195, "learning_rate": 9.581217057977959e-05, "loss": 0.9959, "step": 3810 }, { "epoch": 0.2582153262416153, "grad_norm": 9.950063705444336, "learning_rate": 9.581080156068177e-05, "loss": 0.9453, "step": 3811 }, { "epoch": 0.2582830815095874, "grad_norm": 7.301966190338135, "learning_rate": 9.580943254158396e-05, "loss": 0.8468, "step": 3812 }, { "epoch": 0.25835083677755943, "grad_norm": 6.434390544891357, "learning_rate": 9.580806352248614e-05, "loss": 1.0513, "step": 3813 }, { "epoch": 0.2584185920455315, "grad_norm": 8.878791809082031, "learning_rate": 9.580669450338832e-05, "loss": 1.1264, "step": 3814 }, { "epoch": 0.2584863473135036, "grad_norm": 8.107111930847168, "learning_rate": 9.580532548429052e-05, "loss": 1.3037, "step": 3815 }, { "epoch": 0.2585541025814757, "grad_norm": 7.54078483581543, "learning_rate": 9.58039564651927e-05, "loss": 0.917, "step": 3816 }, { "epoch": 0.2586218578494478, "grad_norm": 9.369047164916992, "learning_rate": 9.580258744609488e-05, "loss": 0.8761, "step": 3817 }, { "epoch": 0.2586896131174199, "grad_norm": 8.927732467651367, "learning_rate": 9.580121842699707e-05, "loss": 1.0591, "step": 3818 }, { "epoch": 0.258757368385392, "grad_norm": 9.261579513549805, "learning_rate": 9.579984940789925e-05, "loss": 1.0671, "step": 3819 }, { "epoch": 0.2588251236533641, "grad_norm": 7.396904468536377, "learning_rate": 9.579848038880143e-05, "loss": 0.8242, "step": 3820 }, { "epoch": 0.2588928789213361, "grad_norm": 6.665501594543457, "learning_rate": 9.579711136970361e-05, "loss": 0.7686, "step": 3821 }, { "epoch": 0.2589606341893082, "grad_norm": 7.0808281898498535, "learning_rate": 9.57957423506058e-05, "loss": 0.9757, "step": 3822 }, { "epoch": 0.2590283894572803, "grad_norm": 7.734886646270752, "learning_rate": 9.579437333150799e-05, "loss": 1.0365, "step": 3823 }, { "epoch": 0.2590961447252524, "grad_norm": 7.383622169494629, "learning_rate": 9.579300431241017e-05, "loss": 0.8458, "step": 3824 }, { "epoch": 0.2591638999932245, "grad_norm": 7.758030891418457, "learning_rate": 9.579163529331235e-05, "loss": 0.9454, "step": 3825 }, { "epoch": 0.25923165526119657, "grad_norm": 9.456972122192383, "learning_rate": 9.579026627421454e-05, "loss": 1.065, "step": 3826 }, { "epoch": 0.25929941052916866, "grad_norm": 8.046957969665527, "learning_rate": 9.578889725511672e-05, "loss": 0.7021, "step": 3827 }, { "epoch": 0.25936716579714075, "grad_norm": 9.415145874023438, "learning_rate": 9.57875282360189e-05, "loss": 1.2344, "step": 3828 }, { "epoch": 0.2594349210651128, "grad_norm": 6.525821685791016, "learning_rate": 9.578615921692108e-05, "loss": 0.8005, "step": 3829 }, { "epoch": 0.2595026763330849, "grad_norm": 6.948854923248291, "learning_rate": 9.578479019782326e-05, "loss": 0.8313, "step": 3830 }, { "epoch": 0.259570431601057, "grad_norm": 6.991540431976318, "learning_rate": 9.578342117872546e-05, "loss": 0.945, "step": 3831 }, { "epoch": 0.25963818686902906, "grad_norm": 7.0269551277160645, "learning_rate": 9.578205215962764e-05, "loss": 0.8927, "step": 3832 }, { "epoch": 0.25970594213700116, "grad_norm": 7.773914813995361, "learning_rate": 9.578068314052982e-05, "loss": 0.8469, "step": 3833 }, { "epoch": 0.25977369740497325, "grad_norm": 9.6503267288208, "learning_rate": 9.5779314121432e-05, "loss": 1.1192, "step": 3834 }, { "epoch": 0.25984145267294534, "grad_norm": 6.582554340362549, "learning_rate": 9.577794510233418e-05, "loss": 0.8996, "step": 3835 }, { "epoch": 0.25990920794091743, "grad_norm": 10.097637176513672, "learning_rate": 9.577657608323637e-05, "loss": 1.211, "step": 3836 }, { "epoch": 0.25997696320888947, "grad_norm": 8.63124942779541, "learning_rate": 9.577520706413855e-05, "loss": 1.0353, "step": 3837 }, { "epoch": 0.26004471847686156, "grad_norm": 8.282122611999512, "learning_rate": 9.577383804504073e-05, "loss": 0.9677, "step": 3838 }, { "epoch": 0.26011247374483365, "grad_norm": 8.298484802246094, "learning_rate": 9.577246902594291e-05, "loss": 0.909, "step": 3839 }, { "epoch": 0.26018022901280574, "grad_norm": 7.91752290725708, "learning_rate": 9.57711000068451e-05, "loss": 1.1106, "step": 3840 }, { "epoch": 0.26024798428077783, "grad_norm": 8.073543548583984, "learning_rate": 9.576973098774729e-05, "loss": 0.9777, "step": 3841 }, { "epoch": 0.2603157395487499, "grad_norm": 8.225390434265137, "learning_rate": 9.576836196864947e-05, "loss": 1.0302, "step": 3842 }, { "epoch": 0.260383494816722, "grad_norm": 7.086613655090332, "learning_rate": 9.576699294955165e-05, "loss": 1.0061, "step": 3843 }, { "epoch": 0.2604512500846941, "grad_norm": 6.9043965339660645, "learning_rate": 9.576562393045383e-05, "loss": 0.8707, "step": 3844 }, { "epoch": 0.26051900535266614, "grad_norm": 9.083130836486816, "learning_rate": 9.576425491135602e-05, "loss": 1.0567, "step": 3845 }, { "epoch": 0.26058676062063824, "grad_norm": 6.962080478668213, "learning_rate": 9.57628858922582e-05, "loss": 0.8592, "step": 3846 }, { "epoch": 0.2606545158886103, "grad_norm": 7.196011066436768, "learning_rate": 9.576151687316038e-05, "loss": 0.8318, "step": 3847 }, { "epoch": 0.2607222711565824, "grad_norm": 7.421074867248535, "learning_rate": 9.576014785406256e-05, "loss": 1.0162, "step": 3848 }, { "epoch": 0.2607900264245545, "grad_norm": 7.065299034118652, "learning_rate": 9.575877883496476e-05, "loss": 0.9999, "step": 3849 }, { "epoch": 0.2608577816925266, "grad_norm": 7.442328929901123, "learning_rate": 9.575740981586694e-05, "loss": 0.8118, "step": 3850 }, { "epoch": 0.2609255369604987, "grad_norm": 6.888897895812988, "learning_rate": 9.575604079676912e-05, "loss": 1.0343, "step": 3851 }, { "epoch": 0.2609932922284708, "grad_norm": 7.788427829742432, "learning_rate": 9.57546717776713e-05, "loss": 0.9931, "step": 3852 }, { "epoch": 0.2610610474964428, "grad_norm": 7.247363567352295, "learning_rate": 9.575330275857348e-05, "loss": 0.8403, "step": 3853 }, { "epoch": 0.2611288027644149, "grad_norm": 7.305066108703613, "learning_rate": 9.575193373947567e-05, "loss": 1.0222, "step": 3854 }, { "epoch": 0.261196558032387, "grad_norm": 5.8723249435424805, "learning_rate": 9.575056472037785e-05, "loss": 0.7461, "step": 3855 }, { "epoch": 0.2612643133003591, "grad_norm": 8.933609008789062, "learning_rate": 9.574919570128003e-05, "loss": 1.1537, "step": 3856 }, { "epoch": 0.2613320685683312, "grad_norm": 7.3203125, "learning_rate": 9.574782668218221e-05, "loss": 1.0277, "step": 3857 }, { "epoch": 0.2613998238363033, "grad_norm": 7.455322742462158, "learning_rate": 9.57464576630844e-05, "loss": 0.8136, "step": 3858 }, { "epoch": 0.26146757910427537, "grad_norm": 8.074299812316895, "learning_rate": 9.574508864398659e-05, "loss": 0.9597, "step": 3859 }, { "epoch": 0.26153533437224746, "grad_norm": 8.732856750488281, "learning_rate": 9.574371962488877e-05, "loss": 1.2009, "step": 3860 }, { "epoch": 0.2616030896402195, "grad_norm": 7.179652690887451, "learning_rate": 9.574235060579095e-05, "loss": 0.8365, "step": 3861 }, { "epoch": 0.2616708449081916, "grad_norm": 8.975394248962402, "learning_rate": 9.574098158669314e-05, "loss": 0.9099, "step": 3862 }, { "epoch": 0.2617386001761637, "grad_norm": 9.083860397338867, "learning_rate": 9.573961256759532e-05, "loss": 1.0307, "step": 3863 }, { "epoch": 0.2618063554441358, "grad_norm": 7.449617385864258, "learning_rate": 9.57382435484975e-05, "loss": 1.1442, "step": 3864 }, { "epoch": 0.26187411071210787, "grad_norm": 8.299210548400879, "learning_rate": 9.57368745293997e-05, "loss": 1.0119, "step": 3865 }, { "epoch": 0.26194186598007996, "grad_norm": 6.847742080688477, "learning_rate": 9.573550551030188e-05, "loss": 0.9058, "step": 3866 }, { "epoch": 0.26200962124805205, "grad_norm": 7.057496070861816, "learning_rate": 9.573413649120406e-05, "loss": 1.0022, "step": 3867 }, { "epoch": 0.26207737651602414, "grad_norm": 7.9798359870910645, "learning_rate": 9.573276747210625e-05, "loss": 0.9555, "step": 3868 }, { "epoch": 0.26214513178399623, "grad_norm": 8.119134902954102, "learning_rate": 9.573139845300843e-05, "loss": 1.1039, "step": 3869 }, { "epoch": 0.26221288705196827, "grad_norm": 6.998579502105713, "learning_rate": 9.573002943391061e-05, "loss": 1.0448, "step": 3870 }, { "epoch": 0.26228064231994036, "grad_norm": 6.59659481048584, "learning_rate": 9.572866041481279e-05, "loss": 0.8946, "step": 3871 }, { "epoch": 0.26234839758791245, "grad_norm": 8.110078811645508, "learning_rate": 9.572729139571498e-05, "loss": 0.8568, "step": 3872 }, { "epoch": 0.26241615285588454, "grad_norm": 9.192879676818848, "learning_rate": 9.572592237661717e-05, "loss": 0.9517, "step": 3873 }, { "epoch": 0.26248390812385664, "grad_norm": 7.378695964813232, "learning_rate": 9.572455335751935e-05, "loss": 1.0917, "step": 3874 }, { "epoch": 0.2625516633918287, "grad_norm": 10.016194343566895, "learning_rate": 9.572318433842153e-05, "loss": 0.9754, "step": 3875 }, { "epoch": 0.2626194186598008, "grad_norm": 7.392832279205322, "learning_rate": 9.57218153193237e-05, "loss": 0.8258, "step": 3876 }, { "epoch": 0.2626871739277729, "grad_norm": 6.59785795211792, "learning_rate": 9.57204463002259e-05, "loss": 0.9656, "step": 3877 }, { "epoch": 0.26275492919574495, "grad_norm": 7.799993991851807, "learning_rate": 9.571907728112808e-05, "loss": 0.9776, "step": 3878 }, { "epoch": 0.26282268446371704, "grad_norm": 9.275368690490723, "learning_rate": 9.571770826203026e-05, "loss": 0.8827, "step": 3879 }, { "epoch": 0.26289043973168913, "grad_norm": 9.413054466247559, "learning_rate": 9.571633924293244e-05, "loss": 1.0671, "step": 3880 }, { "epoch": 0.2629581949996612, "grad_norm": 8.446796417236328, "learning_rate": 9.571497022383464e-05, "loss": 0.8702, "step": 3881 }, { "epoch": 0.2630259502676333, "grad_norm": 12.962898254394531, "learning_rate": 9.571360120473682e-05, "loss": 0.9281, "step": 3882 }, { "epoch": 0.2630937055356054, "grad_norm": 7.7250213623046875, "learning_rate": 9.5712232185639e-05, "loss": 0.8908, "step": 3883 }, { "epoch": 0.2631614608035775, "grad_norm": 10.405988693237305, "learning_rate": 9.571086316654118e-05, "loss": 1.2047, "step": 3884 }, { "epoch": 0.2632292160715496, "grad_norm": 9.009016036987305, "learning_rate": 9.570949414744336e-05, "loss": 1.0619, "step": 3885 }, { "epoch": 0.2632969713395216, "grad_norm": 8.600632667541504, "learning_rate": 9.570812512834555e-05, "loss": 1.4186, "step": 3886 }, { "epoch": 0.2633647266074937, "grad_norm": 7.9944071769714355, "learning_rate": 9.570675610924773e-05, "loss": 1.1493, "step": 3887 }, { "epoch": 0.2634324818754658, "grad_norm": 7.7023539543151855, "learning_rate": 9.570538709014991e-05, "loss": 0.8716, "step": 3888 }, { "epoch": 0.2635002371434379, "grad_norm": 7.743750095367432, "learning_rate": 9.570401807105209e-05, "loss": 0.9091, "step": 3889 }, { "epoch": 0.26356799241141, "grad_norm": 7.080264091491699, "learning_rate": 9.570264905195427e-05, "loss": 0.8064, "step": 3890 }, { "epoch": 0.2636357476793821, "grad_norm": 10.498579025268555, "learning_rate": 9.570128003285647e-05, "loss": 1.2763, "step": 3891 }, { "epoch": 0.2637035029473542, "grad_norm": 8.932741165161133, "learning_rate": 9.569991101375865e-05, "loss": 1.0108, "step": 3892 }, { "epoch": 0.26377125821532627, "grad_norm": 7.670261859893799, "learning_rate": 9.569854199466083e-05, "loss": 0.9312, "step": 3893 }, { "epoch": 0.2638390134832983, "grad_norm": 6.713436126708984, "learning_rate": 9.5697172975563e-05, "loss": 0.9319, "step": 3894 }, { "epoch": 0.2639067687512704, "grad_norm": 6.828521728515625, "learning_rate": 9.56958039564652e-05, "loss": 0.908, "step": 3895 }, { "epoch": 0.2639745240192425, "grad_norm": 7.664526462554932, "learning_rate": 9.569443493736738e-05, "loss": 0.9204, "step": 3896 }, { "epoch": 0.2640422792872146, "grad_norm": 7.330194473266602, "learning_rate": 9.569306591826956e-05, "loss": 0.9971, "step": 3897 }, { "epoch": 0.26411003455518667, "grad_norm": 7.202576160430908, "learning_rate": 9.569169689917174e-05, "loss": 1.1183, "step": 3898 }, { "epoch": 0.26417778982315876, "grad_norm": 7.7107720375061035, "learning_rate": 9.569032788007392e-05, "loss": 1.0794, "step": 3899 }, { "epoch": 0.26424554509113085, "grad_norm": 8.634172439575195, "learning_rate": 9.568895886097612e-05, "loss": 0.9124, "step": 3900 }, { "epoch": 0.26431330035910294, "grad_norm": 7.264395236968994, "learning_rate": 9.56875898418783e-05, "loss": 1.0044, "step": 3901 }, { "epoch": 0.264381055627075, "grad_norm": 7.352424144744873, "learning_rate": 9.568622082278048e-05, "loss": 0.9213, "step": 3902 }, { "epoch": 0.26444881089504707, "grad_norm": 8.39152717590332, "learning_rate": 9.568485180368266e-05, "loss": 0.914, "step": 3903 }, { "epoch": 0.26451656616301916, "grad_norm": 6.5833611488342285, "learning_rate": 9.568348278458485e-05, "loss": 0.9906, "step": 3904 }, { "epoch": 0.26458432143099125, "grad_norm": 7.952385902404785, "learning_rate": 9.568211376548703e-05, "loss": 0.9624, "step": 3905 }, { "epoch": 0.26465207669896335, "grad_norm": 7.232090950012207, "learning_rate": 9.568074474638921e-05, "loss": 0.8852, "step": 3906 }, { "epoch": 0.26471983196693544, "grad_norm": 7.817921161651611, "learning_rate": 9.567937572729139e-05, "loss": 0.8474, "step": 3907 }, { "epoch": 0.26478758723490753, "grad_norm": 8.926132202148438, "learning_rate": 9.567800670819359e-05, "loss": 1.0302, "step": 3908 }, { "epoch": 0.2648553425028796, "grad_norm": 8.923449516296387, "learning_rate": 9.567663768909577e-05, "loss": 1.0837, "step": 3909 }, { "epoch": 0.26492309777085166, "grad_norm": 8.431096076965332, "learning_rate": 9.567526866999795e-05, "loss": 0.7992, "step": 3910 }, { "epoch": 0.26499085303882375, "grad_norm": 10.121541976928711, "learning_rate": 9.567389965090014e-05, "loss": 1.0082, "step": 3911 }, { "epoch": 0.26505860830679584, "grad_norm": 9.349747657775879, "learning_rate": 9.567253063180232e-05, "loss": 0.8257, "step": 3912 }, { "epoch": 0.26512636357476793, "grad_norm": 9.438392639160156, "learning_rate": 9.56711616127045e-05, "loss": 0.979, "step": 3913 }, { "epoch": 0.26519411884274, "grad_norm": 7.422990798950195, "learning_rate": 9.56697925936067e-05, "loss": 0.8456, "step": 3914 }, { "epoch": 0.2652618741107121, "grad_norm": 6.354233741760254, "learning_rate": 9.566842357450888e-05, "loss": 0.6639, "step": 3915 }, { "epoch": 0.2653296293786842, "grad_norm": 9.160786628723145, "learning_rate": 9.566705455541106e-05, "loss": 1.1126, "step": 3916 }, { "epoch": 0.2653973846466563, "grad_norm": 7.786096096038818, "learning_rate": 9.566568553631324e-05, "loss": 1.2703, "step": 3917 }, { "epoch": 0.2654651399146284, "grad_norm": 7.358225345611572, "learning_rate": 9.566431651721543e-05, "loss": 1.0625, "step": 3918 }, { "epoch": 0.2655328951826004, "grad_norm": 8.920319557189941, "learning_rate": 9.566294749811761e-05, "loss": 1.0037, "step": 3919 }, { "epoch": 0.2656006504505725, "grad_norm": 7.16439151763916, "learning_rate": 9.566157847901979e-05, "loss": 1.0486, "step": 3920 }, { "epoch": 0.2656684057185446, "grad_norm": 7.374850749969482, "learning_rate": 9.566020945992197e-05, "loss": 1.011, "step": 3921 }, { "epoch": 0.2657361609865167, "grad_norm": 5.965388298034668, "learning_rate": 9.565884044082415e-05, "loss": 0.9043, "step": 3922 }, { "epoch": 0.2658039162544888, "grad_norm": 7.1143879890441895, "learning_rate": 9.565747142172634e-05, "loss": 0.923, "step": 3923 }, { "epoch": 0.2658716715224609, "grad_norm": 9.05667495727539, "learning_rate": 9.565610240262853e-05, "loss": 1.1247, "step": 3924 }, { "epoch": 0.265939426790433, "grad_norm": 6.407328128814697, "learning_rate": 9.56547333835307e-05, "loss": 0.9028, "step": 3925 }, { "epoch": 0.26600718205840507, "grad_norm": 9.335012435913086, "learning_rate": 9.565336436443289e-05, "loss": 0.9761, "step": 3926 }, { "epoch": 0.2660749373263771, "grad_norm": 7.462203025817871, "learning_rate": 9.565199534533508e-05, "loss": 0.8812, "step": 3927 }, { "epoch": 0.2661426925943492, "grad_norm": 8.162378311157227, "learning_rate": 9.565062632623726e-05, "loss": 0.991, "step": 3928 }, { "epoch": 0.2662104478623213, "grad_norm": 8.835287094116211, "learning_rate": 9.564925730713944e-05, "loss": 1.3085, "step": 3929 }, { "epoch": 0.2662782031302934, "grad_norm": 9.219624519348145, "learning_rate": 9.564788828804162e-05, "loss": 0.989, "step": 3930 }, { "epoch": 0.26634595839826547, "grad_norm": 6.832587718963623, "learning_rate": 9.56465192689438e-05, "loss": 1.0186, "step": 3931 }, { "epoch": 0.26641371366623756, "grad_norm": 8.075157165527344, "learning_rate": 9.5645150249846e-05, "loss": 0.9734, "step": 3932 }, { "epoch": 0.26648146893420965, "grad_norm": 9.403346061706543, "learning_rate": 9.564378123074818e-05, "loss": 1.1875, "step": 3933 }, { "epoch": 0.26654922420218174, "grad_norm": 8.556446075439453, "learning_rate": 9.564241221165036e-05, "loss": 1.0932, "step": 3934 }, { "epoch": 0.2666169794701538, "grad_norm": 8.178442001342773, "learning_rate": 9.564104319255254e-05, "loss": 0.7654, "step": 3935 }, { "epoch": 0.2666847347381259, "grad_norm": 8.82776165008545, "learning_rate": 9.563967417345473e-05, "loss": 1.0518, "step": 3936 }, { "epoch": 0.26675249000609796, "grad_norm": 8.001256942749023, "learning_rate": 9.563830515435691e-05, "loss": 1.0109, "step": 3937 }, { "epoch": 0.26682024527407006, "grad_norm": 7.439608573913574, "learning_rate": 9.563693613525909e-05, "loss": 0.9365, "step": 3938 }, { "epoch": 0.26688800054204215, "grad_norm": 5.78077507019043, "learning_rate": 9.563556711616127e-05, "loss": 0.9356, "step": 3939 }, { "epoch": 0.26695575581001424, "grad_norm": 7.6134819984436035, "learning_rate": 9.563419809706345e-05, "loss": 1.0003, "step": 3940 }, { "epoch": 0.26702351107798633, "grad_norm": 8.467934608459473, "learning_rate": 9.563282907796565e-05, "loss": 0.8052, "step": 3941 }, { "epoch": 0.2670912663459584, "grad_norm": 8.88598346710205, "learning_rate": 9.563146005886783e-05, "loss": 1.0069, "step": 3942 }, { "epoch": 0.26715902161393046, "grad_norm": 7.627633094787598, "learning_rate": 9.563009103977e-05, "loss": 1.1463, "step": 3943 }, { "epoch": 0.26722677688190255, "grad_norm": 7.948824882507324, "learning_rate": 9.562872202067219e-05, "loss": 0.9451, "step": 3944 }, { "epoch": 0.26729453214987464, "grad_norm": 8.10439395904541, "learning_rate": 9.562735300157437e-05, "loss": 1.1375, "step": 3945 }, { "epoch": 0.26736228741784673, "grad_norm": 6.488743782043457, "learning_rate": 9.562598398247656e-05, "loss": 0.8608, "step": 3946 }, { "epoch": 0.2674300426858188, "grad_norm": 9.731819152832031, "learning_rate": 9.562461496337874e-05, "loss": 1.2055, "step": 3947 }, { "epoch": 0.2674977979537909, "grad_norm": 10.455330848693848, "learning_rate": 9.562324594428092e-05, "loss": 1.1095, "step": 3948 }, { "epoch": 0.267565553221763, "grad_norm": 6.7713212966918945, "learning_rate": 9.56218769251831e-05, "loss": 0.9047, "step": 3949 }, { "epoch": 0.2676333084897351, "grad_norm": 9.292582511901855, "learning_rate": 9.56205079060853e-05, "loss": 0.9404, "step": 3950 }, { "epoch": 0.26770106375770714, "grad_norm": 8.252067565917969, "learning_rate": 9.561913888698748e-05, "loss": 0.8826, "step": 3951 }, { "epoch": 0.26776881902567923, "grad_norm": 7.126963138580322, "learning_rate": 9.561776986788966e-05, "loss": 0.8445, "step": 3952 }, { "epoch": 0.2678365742936513, "grad_norm": 8.352923393249512, "learning_rate": 9.561640084879184e-05, "loss": 1.1081, "step": 3953 }, { "epoch": 0.2679043295616234, "grad_norm": 6.933292388916016, "learning_rate": 9.561503182969403e-05, "loss": 0.8228, "step": 3954 }, { "epoch": 0.2679720848295955, "grad_norm": 7.9115986824035645, "learning_rate": 9.561366281059621e-05, "loss": 1.0984, "step": 3955 }, { "epoch": 0.2680398400975676, "grad_norm": 6.988186359405518, "learning_rate": 9.561229379149839e-05, "loss": 0.9334, "step": 3956 }, { "epoch": 0.2681075953655397, "grad_norm": 6.6764631271362305, "learning_rate": 9.561092477240058e-05, "loss": 0.907, "step": 3957 }, { "epoch": 0.2681753506335118, "grad_norm": 9.355245590209961, "learning_rate": 9.560955575330277e-05, "loss": 0.9087, "step": 3958 }, { "epoch": 0.2682431059014838, "grad_norm": 8.177611351013184, "learning_rate": 9.560818673420495e-05, "loss": 1.0815, "step": 3959 }, { "epoch": 0.2683108611694559, "grad_norm": 9.085289001464844, "learning_rate": 9.560681771510714e-05, "loss": 1.1291, "step": 3960 }, { "epoch": 0.268378616437428, "grad_norm": 7.056759357452393, "learning_rate": 9.560544869600932e-05, "loss": 0.8623, "step": 3961 }, { "epoch": 0.2684463717054001, "grad_norm": 8.175825119018555, "learning_rate": 9.56040796769115e-05, "loss": 0.8755, "step": 3962 }, { "epoch": 0.2685141269733722, "grad_norm": 7.127376079559326, "learning_rate": 9.560271065781368e-05, "loss": 0.8877, "step": 3963 }, { "epoch": 0.26858188224134427, "grad_norm": 9.635464668273926, "learning_rate": 9.560134163871587e-05, "loss": 0.8675, "step": 3964 }, { "epoch": 0.26864963750931636, "grad_norm": 7.8138275146484375, "learning_rate": 9.559997261961805e-05, "loss": 0.9997, "step": 3965 }, { "epoch": 0.26871739277728846, "grad_norm": 7.242639064788818, "learning_rate": 9.559860360052024e-05, "loss": 0.7692, "step": 3966 }, { "epoch": 0.2687851480452605, "grad_norm": 8.106497764587402, "learning_rate": 9.559723458142242e-05, "loss": 0.9801, "step": 3967 }, { "epoch": 0.2688529033132326, "grad_norm": 7.468952178955078, "learning_rate": 9.55958655623246e-05, "loss": 0.939, "step": 3968 }, { "epoch": 0.2689206585812047, "grad_norm": 9.002805709838867, "learning_rate": 9.559449654322679e-05, "loss": 1.1655, "step": 3969 }, { "epoch": 0.26898841384917677, "grad_norm": 8.787810325622559, "learning_rate": 9.559312752412897e-05, "loss": 0.9663, "step": 3970 }, { "epoch": 0.26905616911714886, "grad_norm": 9.113668441772461, "learning_rate": 9.559175850503115e-05, "loss": 1.0041, "step": 3971 }, { "epoch": 0.26912392438512095, "grad_norm": 6.289670944213867, "learning_rate": 9.559038948593333e-05, "loss": 0.8714, "step": 3972 }, { "epoch": 0.26919167965309304, "grad_norm": 7.755144119262695, "learning_rate": 9.558902046683552e-05, "loss": 0.8606, "step": 3973 }, { "epoch": 0.26925943492106513, "grad_norm": 8.989197731018066, "learning_rate": 9.55876514477377e-05, "loss": 0.9956, "step": 3974 }, { "epoch": 0.2693271901890372, "grad_norm": 6.45689058303833, "learning_rate": 9.558628242863989e-05, "loss": 0.7416, "step": 3975 }, { "epoch": 0.26939494545700926, "grad_norm": 7.772951126098633, "learning_rate": 9.558491340954207e-05, "loss": 0.927, "step": 3976 }, { "epoch": 0.26946270072498135, "grad_norm": 7.347445487976074, "learning_rate": 9.558354439044425e-05, "loss": 0.9337, "step": 3977 }, { "epoch": 0.26953045599295344, "grad_norm": 8.20067310333252, "learning_rate": 9.558217537134644e-05, "loss": 0.8324, "step": 3978 }, { "epoch": 0.26959821126092554, "grad_norm": 10.375189781188965, "learning_rate": 9.558080635224862e-05, "loss": 1.1103, "step": 3979 }, { "epoch": 0.2696659665288976, "grad_norm": 8.187355041503906, "learning_rate": 9.55794373331508e-05, "loss": 0.8469, "step": 3980 }, { "epoch": 0.2697337217968697, "grad_norm": 6.336839199066162, "learning_rate": 9.557806831405298e-05, "loss": 1.037, "step": 3981 }, { "epoch": 0.2698014770648418, "grad_norm": 6.368093967437744, "learning_rate": 9.557669929495517e-05, "loss": 0.6851, "step": 3982 }, { "epoch": 0.2698692323328139, "grad_norm": 7.394474506378174, "learning_rate": 9.557533027585736e-05, "loss": 1.0921, "step": 3983 }, { "epoch": 0.26993698760078594, "grad_norm": 9.0152006149292, "learning_rate": 9.557396125675954e-05, "loss": 0.9765, "step": 3984 }, { "epoch": 0.27000474286875803, "grad_norm": 8.247949600219727, "learning_rate": 9.557259223766172e-05, "loss": 1.0941, "step": 3985 }, { "epoch": 0.2700724981367301, "grad_norm": 7.9166579246521, "learning_rate": 9.55712232185639e-05, "loss": 0.798, "step": 3986 }, { "epoch": 0.2701402534047022, "grad_norm": 8.501713752746582, "learning_rate": 9.556985419946609e-05, "loss": 0.923, "step": 3987 }, { "epoch": 0.2702080086726743, "grad_norm": 7.450741767883301, "learning_rate": 9.556848518036827e-05, "loss": 0.9485, "step": 3988 }, { "epoch": 0.2702757639406464, "grad_norm": 6.7950239181518555, "learning_rate": 9.556711616127045e-05, "loss": 0.9041, "step": 3989 }, { "epoch": 0.2703435192086185, "grad_norm": 8.021660804748535, "learning_rate": 9.556574714217263e-05, "loss": 0.9494, "step": 3990 }, { "epoch": 0.2704112744765906, "grad_norm": 8.049949645996094, "learning_rate": 9.556437812307482e-05, "loss": 0.9629, "step": 3991 }, { "epoch": 0.2704790297445626, "grad_norm": 9.56615161895752, "learning_rate": 9.5563009103977e-05, "loss": 0.8822, "step": 3992 }, { "epoch": 0.2705467850125347, "grad_norm": 7.12232780456543, "learning_rate": 9.556164008487919e-05, "loss": 1.1377, "step": 3993 }, { "epoch": 0.2706145402805068, "grad_norm": 7.224277496337891, "learning_rate": 9.556027106578137e-05, "loss": 1.1023, "step": 3994 }, { "epoch": 0.2706822955484789, "grad_norm": 8.0076322555542, "learning_rate": 9.555890204668355e-05, "loss": 1.0599, "step": 3995 }, { "epoch": 0.270750050816451, "grad_norm": 7.8958845138549805, "learning_rate": 9.555753302758574e-05, "loss": 1.0455, "step": 3996 }, { "epoch": 0.2708178060844231, "grad_norm": 7.802896976470947, "learning_rate": 9.555616400848792e-05, "loss": 1.1089, "step": 3997 }, { "epoch": 0.27088556135239517, "grad_norm": 8.122269630432129, "learning_rate": 9.55547949893901e-05, "loss": 1.0708, "step": 3998 }, { "epoch": 0.27095331662036726, "grad_norm": 6.3488383293151855, "learning_rate": 9.555342597029228e-05, "loss": 0.85, "step": 3999 }, { "epoch": 0.2710210718883393, "grad_norm": 10.236666679382324, "learning_rate": 9.555205695119448e-05, "loss": 0.962, "step": 4000 }, { "epoch": 0.2710888271563114, "grad_norm": 7.594062328338623, "learning_rate": 9.555068793209666e-05, "loss": 1.1641, "step": 4001 }, { "epoch": 0.2711565824242835, "grad_norm": 6.824306964874268, "learning_rate": 9.554931891299884e-05, "loss": 0.7914, "step": 4002 }, { "epoch": 0.27122433769225557, "grad_norm": 7.826432228088379, "learning_rate": 9.554794989390103e-05, "loss": 1.0062, "step": 4003 }, { "epoch": 0.27129209296022766, "grad_norm": 7.189459800720215, "learning_rate": 9.554658087480321e-05, "loss": 0.7324, "step": 4004 }, { "epoch": 0.27135984822819975, "grad_norm": 7.949024200439453, "learning_rate": 9.554521185570539e-05, "loss": 1.1033, "step": 4005 }, { "epoch": 0.27142760349617184, "grad_norm": 6.575378894805908, "learning_rate": 9.554384283660758e-05, "loss": 1.0048, "step": 4006 }, { "epoch": 0.27149535876414393, "grad_norm": 8.585273742675781, "learning_rate": 9.554247381750976e-05, "loss": 1.0338, "step": 4007 }, { "epoch": 0.27156311403211597, "grad_norm": 7.999851703643799, "learning_rate": 9.554110479841194e-05, "loss": 1.1788, "step": 4008 }, { "epoch": 0.27163086930008806, "grad_norm": 8.40134334564209, "learning_rate": 9.553973577931413e-05, "loss": 1.0986, "step": 4009 }, { "epoch": 0.27169862456806015, "grad_norm": 6.380734920501709, "learning_rate": 9.553836676021632e-05, "loss": 0.717, "step": 4010 }, { "epoch": 0.27176637983603225, "grad_norm": 7.673857688903809, "learning_rate": 9.55369977411185e-05, "loss": 1.1646, "step": 4011 }, { "epoch": 0.27183413510400434, "grad_norm": 10.897799491882324, "learning_rate": 9.553562872202068e-05, "loss": 1.2796, "step": 4012 }, { "epoch": 0.27190189037197643, "grad_norm": 9.101582527160645, "learning_rate": 9.553425970292286e-05, "loss": 0.8778, "step": 4013 }, { "epoch": 0.2719696456399485, "grad_norm": 6.685849666595459, "learning_rate": 9.553289068382505e-05, "loss": 1.2329, "step": 4014 }, { "epoch": 0.2720374009079206, "grad_norm": 6.580325603485107, "learning_rate": 9.553152166472723e-05, "loss": 0.81, "step": 4015 }, { "epoch": 0.27210515617589265, "grad_norm": 7.854914665222168, "learning_rate": 9.553015264562941e-05, "loss": 1.1152, "step": 4016 }, { "epoch": 0.27217291144386474, "grad_norm": 7.293428421020508, "learning_rate": 9.55287836265316e-05, "loss": 0.9238, "step": 4017 }, { "epoch": 0.27224066671183683, "grad_norm": 6.944539546966553, "learning_rate": 9.552741460743378e-05, "loss": 1.018, "step": 4018 }, { "epoch": 0.2723084219798089, "grad_norm": 7.550015926361084, "learning_rate": 9.552604558833597e-05, "loss": 1.0332, "step": 4019 }, { "epoch": 0.272376177247781, "grad_norm": 8.035116195678711, "learning_rate": 9.552467656923815e-05, "loss": 1.0752, "step": 4020 }, { "epoch": 0.2724439325157531, "grad_norm": 7.088611125946045, "learning_rate": 9.552330755014033e-05, "loss": 0.833, "step": 4021 }, { "epoch": 0.2725116877837252, "grad_norm": 7.615128040313721, "learning_rate": 9.552193853104251e-05, "loss": 1.0236, "step": 4022 }, { "epoch": 0.2725794430516973, "grad_norm": 7.366427421569824, "learning_rate": 9.552056951194469e-05, "loss": 0.892, "step": 4023 }, { "epoch": 0.2726471983196694, "grad_norm": 7.073375225067139, "learning_rate": 9.551920049284688e-05, "loss": 0.8082, "step": 4024 }, { "epoch": 0.2727149535876414, "grad_norm": 6.350280284881592, "learning_rate": 9.551783147374906e-05, "loss": 0.8154, "step": 4025 }, { "epoch": 0.2727827088556135, "grad_norm": 5.656667709350586, "learning_rate": 9.551646245465125e-05, "loss": 0.8558, "step": 4026 }, { "epoch": 0.2728504641235856, "grad_norm": 6.565401077270508, "learning_rate": 9.551509343555343e-05, "loss": 1.0397, "step": 4027 }, { "epoch": 0.2729182193915577, "grad_norm": 8.4253511428833, "learning_rate": 9.551372441645562e-05, "loss": 0.9872, "step": 4028 }, { "epoch": 0.2729859746595298, "grad_norm": 7.32992696762085, "learning_rate": 9.55123553973578e-05, "loss": 0.8262, "step": 4029 }, { "epoch": 0.2730537299275019, "grad_norm": 7.277110576629639, "learning_rate": 9.551098637825998e-05, "loss": 1.218, "step": 4030 }, { "epoch": 0.27312148519547397, "grad_norm": 9.594376564025879, "learning_rate": 9.550961735916216e-05, "loss": 0.9785, "step": 4031 }, { "epoch": 0.27318924046344606, "grad_norm": 9.339418411254883, "learning_rate": 9.550824834006434e-05, "loss": 0.911, "step": 4032 }, { "epoch": 0.2732569957314181, "grad_norm": 6.717375755310059, "learning_rate": 9.550687932096653e-05, "loss": 0.8084, "step": 4033 }, { "epoch": 0.2733247509993902, "grad_norm": 6.447595596313477, "learning_rate": 9.550551030186872e-05, "loss": 0.9882, "step": 4034 }, { "epoch": 0.2733925062673623, "grad_norm": 7.6800312995910645, "learning_rate": 9.55041412827709e-05, "loss": 0.7895, "step": 4035 }, { "epoch": 0.27346026153533437, "grad_norm": 7.270735263824463, "learning_rate": 9.550277226367308e-05, "loss": 0.83, "step": 4036 }, { "epoch": 0.27352801680330646, "grad_norm": 8.246411323547363, "learning_rate": 9.550140324457527e-05, "loss": 0.8406, "step": 4037 }, { "epoch": 0.27359577207127855, "grad_norm": 9.59301471710205, "learning_rate": 9.550003422547745e-05, "loss": 0.9433, "step": 4038 }, { "epoch": 0.27366352733925065, "grad_norm": 7.304765701293945, "learning_rate": 9.549866520637963e-05, "loss": 0.8551, "step": 4039 }, { "epoch": 0.27373128260722274, "grad_norm": 10.581608772277832, "learning_rate": 9.549729618728181e-05, "loss": 1.0734, "step": 4040 }, { "epoch": 0.2737990378751948, "grad_norm": 8.858924865722656, "learning_rate": 9.549592716818399e-05, "loss": 1.1424, "step": 4041 }, { "epoch": 0.27386679314316686, "grad_norm": 7.042451858520508, "learning_rate": 9.549455814908618e-05, "loss": 0.9556, "step": 4042 }, { "epoch": 0.27393454841113896, "grad_norm": 7.213229656219482, "learning_rate": 9.549318912998837e-05, "loss": 1.0575, "step": 4043 }, { "epoch": 0.27400230367911105, "grad_norm": 10.457990646362305, "learning_rate": 9.549182011089055e-05, "loss": 1.1868, "step": 4044 }, { "epoch": 0.27407005894708314, "grad_norm": 7.598734378814697, "learning_rate": 9.549045109179273e-05, "loss": 0.7216, "step": 4045 }, { "epoch": 0.27413781421505523, "grad_norm": 7.892279148101807, "learning_rate": 9.548908207269492e-05, "loss": 1.1437, "step": 4046 }, { "epoch": 0.2742055694830273, "grad_norm": 7.455031394958496, "learning_rate": 9.54877130535971e-05, "loss": 0.8402, "step": 4047 }, { "epoch": 0.2742733247509994, "grad_norm": 7.1315107345581055, "learning_rate": 9.548634403449928e-05, "loss": 0.653, "step": 4048 }, { "epoch": 0.27434108001897145, "grad_norm": 7.116184234619141, "learning_rate": 9.548497501540147e-05, "loss": 0.9, "step": 4049 }, { "epoch": 0.27440883528694354, "grad_norm": 7.833000659942627, "learning_rate": 9.548360599630365e-05, "loss": 0.9328, "step": 4050 }, { "epoch": 0.27447659055491563, "grad_norm": 7.393906116485596, "learning_rate": 9.548223697720584e-05, "loss": 1.0643, "step": 4051 }, { "epoch": 0.2745443458228877, "grad_norm": 8.286185264587402, "learning_rate": 9.548086795810803e-05, "loss": 1.0402, "step": 4052 }, { "epoch": 0.2746121010908598, "grad_norm": 7.88281774520874, "learning_rate": 9.547949893901021e-05, "loss": 1.0117, "step": 4053 }, { "epoch": 0.2746798563588319, "grad_norm": 9.544231414794922, "learning_rate": 9.547812991991239e-05, "loss": 1.1522, "step": 4054 }, { "epoch": 0.274747611626804, "grad_norm": 7.637237071990967, "learning_rate": 9.547676090081457e-05, "loss": 1.0805, "step": 4055 }, { "epoch": 0.2748153668947761, "grad_norm": 7.00446891784668, "learning_rate": 9.547539188171676e-05, "loss": 0.9941, "step": 4056 }, { "epoch": 0.27488312216274813, "grad_norm": 6.8821306228637695, "learning_rate": 9.547402286261894e-05, "loss": 0.997, "step": 4057 }, { "epoch": 0.2749508774307202, "grad_norm": 6.466810703277588, "learning_rate": 9.547265384352112e-05, "loss": 0.8872, "step": 4058 }, { "epoch": 0.2750186326986923, "grad_norm": 7.136430263519287, "learning_rate": 9.54712848244233e-05, "loss": 0.768, "step": 4059 }, { "epoch": 0.2750863879666644, "grad_norm": 7.117071151733398, "learning_rate": 9.54699158053255e-05, "loss": 0.965, "step": 4060 }, { "epoch": 0.2751541432346365, "grad_norm": 6.811083793640137, "learning_rate": 9.546854678622768e-05, "loss": 0.8448, "step": 4061 }, { "epoch": 0.2752218985026086, "grad_norm": 6.055437088012695, "learning_rate": 9.546717776712986e-05, "loss": 1.0217, "step": 4062 }, { "epoch": 0.2752896537705807, "grad_norm": 8.154548645019531, "learning_rate": 9.546580874803204e-05, "loss": 1.0642, "step": 4063 }, { "epoch": 0.27535740903855277, "grad_norm": 7.394543170928955, "learning_rate": 9.546443972893422e-05, "loss": 1.0716, "step": 4064 }, { "epoch": 0.2754251643065248, "grad_norm": 8.716939926147461, "learning_rate": 9.546307070983641e-05, "loss": 0.8457, "step": 4065 }, { "epoch": 0.2754929195744969, "grad_norm": 8.732163429260254, "learning_rate": 9.54617016907386e-05, "loss": 1.0349, "step": 4066 }, { "epoch": 0.275560674842469, "grad_norm": 8.67320442199707, "learning_rate": 9.546033267164077e-05, "loss": 1.1179, "step": 4067 }, { "epoch": 0.2756284301104411, "grad_norm": 8.010993003845215, "learning_rate": 9.545896365254296e-05, "loss": 1.0168, "step": 4068 }, { "epoch": 0.27569618537841317, "grad_norm": 6.747826099395752, "learning_rate": 9.545759463344515e-05, "loss": 0.7905, "step": 4069 }, { "epoch": 0.27576394064638526, "grad_norm": 8.352065086364746, "learning_rate": 9.545622561434733e-05, "loss": 1.1287, "step": 4070 }, { "epoch": 0.27583169591435736, "grad_norm": 8.072574615478516, "learning_rate": 9.545485659524951e-05, "loss": 1.1582, "step": 4071 }, { "epoch": 0.27589945118232945, "grad_norm": 8.851838111877441, "learning_rate": 9.545348757615169e-05, "loss": 1.0434, "step": 4072 }, { "epoch": 0.2759672064503015, "grad_norm": 9.833956718444824, "learning_rate": 9.545211855705387e-05, "loss": 1.1956, "step": 4073 }, { "epoch": 0.2760349617182736, "grad_norm": 8.043981552124023, "learning_rate": 9.545074953795606e-05, "loss": 1.0625, "step": 4074 }, { "epoch": 0.27610271698624567, "grad_norm": 7.420129776000977, "learning_rate": 9.544938051885824e-05, "loss": 0.8933, "step": 4075 }, { "epoch": 0.27617047225421776, "grad_norm": 7.062417030334473, "learning_rate": 9.544801149976042e-05, "loss": 0.9655, "step": 4076 }, { "epoch": 0.27623822752218985, "grad_norm": 8.733392715454102, "learning_rate": 9.54466424806626e-05, "loss": 0.9182, "step": 4077 }, { "epoch": 0.27630598279016194, "grad_norm": 8.931736946105957, "learning_rate": 9.544527346156479e-05, "loss": 1.0243, "step": 4078 }, { "epoch": 0.27637373805813403, "grad_norm": 7.487978935241699, "learning_rate": 9.544390444246698e-05, "loss": 0.9967, "step": 4079 }, { "epoch": 0.2764414933261061, "grad_norm": 8.259819030761719, "learning_rate": 9.544253542336916e-05, "loss": 0.942, "step": 4080 }, { "epoch": 0.2765092485940782, "grad_norm": 9.625347137451172, "learning_rate": 9.544116640427134e-05, "loss": 1.2148, "step": 4081 }, { "epoch": 0.27657700386205025, "grad_norm": 7.737034797668457, "learning_rate": 9.543979738517352e-05, "loss": 0.7905, "step": 4082 }, { "epoch": 0.27664475913002234, "grad_norm": 7.118561744689941, "learning_rate": 9.543842836607571e-05, "loss": 0.9883, "step": 4083 }, { "epoch": 0.27671251439799444, "grad_norm": 9.749618530273438, "learning_rate": 9.54370593469779e-05, "loss": 1.0249, "step": 4084 }, { "epoch": 0.2767802696659665, "grad_norm": 8.805608749389648, "learning_rate": 9.543569032788008e-05, "loss": 1.041, "step": 4085 }, { "epoch": 0.2768480249339386, "grad_norm": 9.240931510925293, "learning_rate": 9.543432130878226e-05, "loss": 1.0598, "step": 4086 }, { "epoch": 0.2769157802019107, "grad_norm": 6.621399879455566, "learning_rate": 9.543295228968444e-05, "loss": 0.8884, "step": 4087 }, { "epoch": 0.2769835354698828, "grad_norm": 6.869698524475098, "learning_rate": 9.543158327058663e-05, "loss": 0.8309, "step": 4088 }, { "epoch": 0.2770512907378549, "grad_norm": 7.135868549346924, "learning_rate": 9.543021425148881e-05, "loss": 0.9399, "step": 4089 }, { "epoch": 0.27711904600582693, "grad_norm": 7.172493934631348, "learning_rate": 9.542884523239099e-05, "loss": 0.9626, "step": 4090 }, { "epoch": 0.277186801273799, "grad_norm": 6.903214931488037, "learning_rate": 9.542747621329317e-05, "loss": 1.0047, "step": 4091 }, { "epoch": 0.2772545565417711, "grad_norm": 7.557178020477295, "learning_rate": 9.542610719419536e-05, "loss": 0.6978, "step": 4092 }, { "epoch": 0.2773223118097432, "grad_norm": 7.468019485473633, "learning_rate": 9.542473817509754e-05, "loss": 0.8109, "step": 4093 }, { "epoch": 0.2773900670777153, "grad_norm": 8.699142456054688, "learning_rate": 9.542336915599973e-05, "loss": 0.7031, "step": 4094 }, { "epoch": 0.2774578223456874, "grad_norm": 8.03862190246582, "learning_rate": 9.54220001369019e-05, "loss": 0.9824, "step": 4095 }, { "epoch": 0.2775255776136595, "grad_norm": 9.884957313537598, "learning_rate": 9.54206311178041e-05, "loss": 0.8122, "step": 4096 }, { "epoch": 0.27759333288163157, "grad_norm": 9.435370445251465, "learning_rate": 9.541926209870628e-05, "loss": 1.2642, "step": 4097 }, { "epoch": 0.2776610881496036, "grad_norm": 8.154888153076172, "learning_rate": 9.541789307960846e-05, "loss": 0.9814, "step": 4098 }, { "epoch": 0.2777288434175757, "grad_norm": 9.771589279174805, "learning_rate": 9.541652406051065e-05, "loss": 1.0959, "step": 4099 }, { "epoch": 0.2777965986855478, "grad_norm": 7.635507106781006, "learning_rate": 9.541515504141283e-05, "loss": 0.8268, "step": 4100 }, { "epoch": 0.2778643539535199, "grad_norm": 9.028327941894531, "learning_rate": 9.541378602231501e-05, "loss": 1.227, "step": 4101 }, { "epoch": 0.277932109221492, "grad_norm": 8.327515602111816, "learning_rate": 9.541241700321721e-05, "loss": 0.8644, "step": 4102 }, { "epoch": 0.27799986448946407, "grad_norm": 7.547940254211426, "learning_rate": 9.541104798411939e-05, "loss": 1.0169, "step": 4103 }, { "epoch": 0.27806761975743616, "grad_norm": 8.0435152053833, "learning_rate": 9.540967896502157e-05, "loss": 1.0883, "step": 4104 }, { "epoch": 0.27813537502540825, "grad_norm": 7.7741217613220215, "learning_rate": 9.540830994592375e-05, "loss": 0.9389, "step": 4105 }, { "epoch": 0.2782031302933803, "grad_norm": 8.059552192687988, "learning_rate": 9.540694092682594e-05, "loss": 1.0024, "step": 4106 }, { "epoch": 0.2782708855613524, "grad_norm": 9.13268756866455, "learning_rate": 9.540557190772812e-05, "loss": 1.0448, "step": 4107 }, { "epoch": 0.27833864082932447, "grad_norm": 7.901900768280029, "learning_rate": 9.54042028886303e-05, "loss": 0.8235, "step": 4108 }, { "epoch": 0.27840639609729656, "grad_norm": 8.727076530456543, "learning_rate": 9.540283386953248e-05, "loss": 1.1047, "step": 4109 }, { "epoch": 0.27847415136526865, "grad_norm": 7.1972880363464355, "learning_rate": 9.540146485043466e-05, "loss": 0.9456, "step": 4110 }, { "epoch": 0.27854190663324074, "grad_norm": 6.886523246765137, "learning_rate": 9.540009583133686e-05, "loss": 1.0041, "step": 4111 }, { "epoch": 0.27860966190121284, "grad_norm": 7.595452308654785, "learning_rate": 9.539872681223904e-05, "loss": 1.0038, "step": 4112 }, { "epoch": 0.2786774171691849, "grad_norm": 6.007086753845215, "learning_rate": 9.539735779314122e-05, "loss": 0.8042, "step": 4113 }, { "epoch": 0.27874517243715696, "grad_norm": 7.112758159637451, "learning_rate": 9.53959887740434e-05, "loss": 0.8252, "step": 4114 }, { "epoch": 0.27881292770512905, "grad_norm": 10.120092391967773, "learning_rate": 9.53946197549456e-05, "loss": 1.1589, "step": 4115 }, { "epoch": 0.27888068297310115, "grad_norm": 7.587961196899414, "learning_rate": 9.539325073584777e-05, "loss": 1.0699, "step": 4116 }, { "epoch": 0.27894843824107324, "grad_norm": 7.671876430511475, "learning_rate": 9.539188171674995e-05, "loss": 0.9699, "step": 4117 }, { "epoch": 0.27901619350904533, "grad_norm": 7.345922470092773, "learning_rate": 9.539051269765213e-05, "loss": 0.9957, "step": 4118 }, { "epoch": 0.2790839487770174, "grad_norm": 9.215903282165527, "learning_rate": 9.538914367855432e-05, "loss": 1.156, "step": 4119 }, { "epoch": 0.2791517040449895, "grad_norm": 10.056458473205566, "learning_rate": 9.538777465945651e-05, "loss": 1.1229, "step": 4120 }, { "epoch": 0.2792194593129616, "grad_norm": 7.9655938148498535, "learning_rate": 9.538640564035869e-05, "loss": 1.017, "step": 4121 }, { "epoch": 0.27928721458093364, "grad_norm": 8.49431324005127, "learning_rate": 9.538503662126087e-05, "loss": 1.0035, "step": 4122 }, { "epoch": 0.27935496984890573, "grad_norm": 8.746543884277344, "learning_rate": 9.538366760216305e-05, "loss": 1.0711, "step": 4123 }, { "epoch": 0.2794227251168778, "grad_norm": 7.75557279586792, "learning_rate": 9.538229858306524e-05, "loss": 0.6868, "step": 4124 }, { "epoch": 0.2794904803848499, "grad_norm": 7.1494622230529785, "learning_rate": 9.538092956396742e-05, "loss": 0.721, "step": 4125 }, { "epoch": 0.279558235652822, "grad_norm": 6.971895217895508, "learning_rate": 9.53795605448696e-05, "loss": 1.0669, "step": 4126 }, { "epoch": 0.2796259909207941, "grad_norm": 6.478157043457031, "learning_rate": 9.537819152577178e-05, "loss": 0.9348, "step": 4127 }, { "epoch": 0.2796937461887662, "grad_norm": 6.307050704956055, "learning_rate": 9.537682250667397e-05, "loss": 0.9622, "step": 4128 }, { "epoch": 0.2797615014567383, "grad_norm": 9.505130767822266, "learning_rate": 9.537545348757616e-05, "loss": 1.1675, "step": 4129 }, { "epoch": 0.2798292567247104, "grad_norm": 8.11099624633789, "learning_rate": 9.537408446847834e-05, "loss": 0.853, "step": 4130 }, { "epoch": 0.2798970119926824, "grad_norm": 6.637272834777832, "learning_rate": 9.537271544938052e-05, "loss": 1.0993, "step": 4131 }, { "epoch": 0.2799647672606545, "grad_norm": 7.888055801391602, "learning_rate": 9.53713464302827e-05, "loss": 0.9582, "step": 4132 }, { "epoch": 0.2800325225286266, "grad_norm": 6.289199352264404, "learning_rate": 9.536997741118488e-05, "loss": 0.9027, "step": 4133 }, { "epoch": 0.2801002777965987, "grad_norm": 7.488378047943115, "learning_rate": 9.536860839208707e-05, "loss": 1.0697, "step": 4134 }, { "epoch": 0.2801680330645708, "grad_norm": 8.341411590576172, "learning_rate": 9.536723937298925e-05, "loss": 1.0705, "step": 4135 }, { "epoch": 0.28023578833254287, "grad_norm": 7.55519437789917, "learning_rate": 9.536587035389144e-05, "loss": 0.7589, "step": 4136 }, { "epoch": 0.28030354360051496, "grad_norm": 6.104217052459717, "learning_rate": 9.536450133479362e-05, "loss": 0.9161, "step": 4137 }, { "epoch": 0.28037129886848705, "grad_norm": 6.97914457321167, "learning_rate": 9.536313231569581e-05, "loss": 1.0342, "step": 4138 }, { "epoch": 0.2804390541364591, "grad_norm": 8.791030883789062, "learning_rate": 9.536176329659799e-05, "loss": 0.863, "step": 4139 }, { "epoch": 0.2805068094044312, "grad_norm": 6.868939399719238, "learning_rate": 9.536039427750017e-05, "loss": 0.9005, "step": 4140 }, { "epoch": 0.28057456467240327, "grad_norm": 9.854182243347168, "learning_rate": 9.535902525840235e-05, "loss": 0.773, "step": 4141 }, { "epoch": 0.28064231994037536, "grad_norm": 7.64580774307251, "learning_rate": 9.535765623930454e-05, "loss": 0.9999, "step": 4142 }, { "epoch": 0.28071007520834745, "grad_norm": 6.032886028289795, "learning_rate": 9.535628722020672e-05, "loss": 0.8916, "step": 4143 }, { "epoch": 0.28077783047631955, "grad_norm": 7.0441060066223145, "learning_rate": 9.53549182011089e-05, "loss": 0.9787, "step": 4144 }, { "epoch": 0.28084558574429164, "grad_norm": 6.4428629875183105, "learning_rate": 9.53535491820111e-05, "loss": 0.6749, "step": 4145 }, { "epoch": 0.28091334101226373, "grad_norm": 8.476522445678711, "learning_rate": 9.535218016291328e-05, "loss": 0.9408, "step": 4146 }, { "epoch": 0.28098109628023576, "grad_norm": 8.720208168029785, "learning_rate": 9.535081114381546e-05, "loss": 0.8571, "step": 4147 }, { "epoch": 0.28104885154820786, "grad_norm": 7.846646785736084, "learning_rate": 9.534944212471765e-05, "loss": 0.9193, "step": 4148 }, { "epoch": 0.28111660681617995, "grad_norm": 7.848026275634766, "learning_rate": 9.534807310561983e-05, "loss": 1.1975, "step": 4149 }, { "epoch": 0.28118436208415204, "grad_norm": 9.543595314025879, "learning_rate": 9.534670408652201e-05, "loss": 0.8769, "step": 4150 }, { "epoch": 0.28125211735212413, "grad_norm": 7.252998352050781, "learning_rate": 9.53453350674242e-05, "loss": 1.029, "step": 4151 }, { "epoch": 0.2813198726200962, "grad_norm": 6.271702289581299, "learning_rate": 9.534396604832639e-05, "loss": 0.8992, "step": 4152 }, { "epoch": 0.2813876278880683, "grad_norm": 9.071548461914062, "learning_rate": 9.534259702922857e-05, "loss": 1.0518, "step": 4153 }, { "epoch": 0.2814553831560404, "grad_norm": 7.452267169952393, "learning_rate": 9.534122801013075e-05, "loss": 0.9573, "step": 4154 }, { "epoch": 0.28152313842401244, "grad_norm": 8.72459602355957, "learning_rate": 9.533985899103293e-05, "loss": 1.09, "step": 4155 }, { "epoch": 0.28159089369198453, "grad_norm": 8.032079696655273, "learning_rate": 9.533848997193511e-05, "loss": 0.9595, "step": 4156 }, { "epoch": 0.2816586489599566, "grad_norm": 7.622939109802246, "learning_rate": 9.53371209528373e-05, "loss": 0.8722, "step": 4157 }, { "epoch": 0.2817264042279287, "grad_norm": 8.330899238586426, "learning_rate": 9.533575193373948e-05, "loss": 0.985, "step": 4158 }, { "epoch": 0.2817941594959008, "grad_norm": 9.378608703613281, "learning_rate": 9.533438291464166e-05, "loss": 0.8458, "step": 4159 }, { "epoch": 0.2818619147638729, "grad_norm": 7.305957317352295, "learning_rate": 9.533301389554384e-05, "loss": 0.9653, "step": 4160 }, { "epoch": 0.281929670031845, "grad_norm": 8.557588577270508, "learning_rate": 9.533164487644604e-05, "loss": 0.8742, "step": 4161 }, { "epoch": 0.2819974252998171, "grad_norm": 8.77814769744873, "learning_rate": 9.533027585734822e-05, "loss": 1.1782, "step": 4162 }, { "epoch": 0.2820651805677891, "grad_norm": 7.212672233581543, "learning_rate": 9.53289068382504e-05, "loss": 0.6333, "step": 4163 }, { "epoch": 0.2821329358357612, "grad_norm": 10.004176139831543, "learning_rate": 9.532753781915258e-05, "loss": 0.9544, "step": 4164 }, { "epoch": 0.2822006911037333, "grad_norm": 7.577065467834473, "learning_rate": 9.532616880005476e-05, "loss": 1.0708, "step": 4165 }, { "epoch": 0.2822684463717054, "grad_norm": 7.181521415710449, "learning_rate": 9.532479978095695e-05, "loss": 0.829, "step": 4166 }, { "epoch": 0.2823362016396775, "grad_norm": 5.7575249671936035, "learning_rate": 9.532343076185913e-05, "loss": 0.9353, "step": 4167 }, { "epoch": 0.2824039569076496, "grad_norm": 7.974564075469971, "learning_rate": 9.532206174276131e-05, "loss": 0.9976, "step": 4168 }, { "epoch": 0.28247171217562167, "grad_norm": 8.870126724243164, "learning_rate": 9.53206927236635e-05, "loss": 0.9704, "step": 4169 }, { "epoch": 0.28253946744359376, "grad_norm": 6.596248149871826, "learning_rate": 9.531932370456569e-05, "loss": 0.8763, "step": 4170 }, { "epoch": 0.2826072227115658, "grad_norm": 7.725964546203613, "learning_rate": 9.531795468546787e-05, "loss": 1.4009, "step": 4171 }, { "epoch": 0.2826749779795379, "grad_norm": 6.741204261779785, "learning_rate": 9.531658566637005e-05, "loss": 0.9103, "step": 4172 }, { "epoch": 0.28274273324751, "grad_norm": 9.002605438232422, "learning_rate": 9.531521664727223e-05, "loss": 1.1446, "step": 4173 }, { "epoch": 0.2828104885154821, "grad_norm": 7.860680103302002, "learning_rate": 9.531384762817441e-05, "loss": 1.081, "step": 4174 }, { "epoch": 0.28287824378345416, "grad_norm": 7.939533233642578, "learning_rate": 9.53124786090766e-05, "loss": 0.8876, "step": 4175 }, { "epoch": 0.28294599905142626, "grad_norm": 7.104232311248779, "learning_rate": 9.531110958997878e-05, "loss": 0.8687, "step": 4176 }, { "epoch": 0.28301375431939835, "grad_norm": 9.095148086547852, "learning_rate": 9.530974057088096e-05, "loss": 0.873, "step": 4177 }, { "epoch": 0.28308150958737044, "grad_norm": 6.869518280029297, "learning_rate": 9.530837155178314e-05, "loss": 0.8372, "step": 4178 }, { "epoch": 0.2831492648553425, "grad_norm": 7.7245049476623535, "learning_rate": 9.530700253268534e-05, "loss": 1.0267, "step": 4179 }, { "epoch": 0.28321702012331457, "grad_norm": 8.298705101013184, "learning_rate": 9.530563351358752e-05, "loss": 0.8919, "step": 4180 }, { "epoch": 0.28328477539128666, "grad_norm": 6.176532745361328, "learning_rate": 9.53042644944897e-05, "loss": 0.8603, "step": 4181 }, { "epoch": 0.28335253065925875, "grad_norm": 6.592353820800781, "learning_rate": 9.530289547539188e-05, "loss": 0.9415, "step": 4182 }, { "epoch": 0.28342028592723084, "grad_norm": 7.962296962738037, "learning_rate": 9.530152645629406e-05, "loss": 0.8764, "step": 4183 }, { "epoch": 0.28348804119520293, "grad_norm": 6.484033584594727, "learning_rate": 9.530015743719625e-05, "loss": 0.9577, "step": 4184 }, { "epoch": 0.283555796463175, "grad_norm": 8.156810760498047, "learning_rate": 9.529878841809843e-05, "loss": 1.1628, "step": 4185 }, { "epoch": 0.2836235517311471, "grad_norm": 6.916367053985596, "learning_rate": 9.529741939900061e-05, "loss": 1.1518, "step": 4186 }, { "epoch": 0.2836913069991192, "grad_norm": 7.70388650894165, "learning_rate": 9.52960503799028e-05, "loss": 0.9309, "step": 4187 }, { "epoch": 0.28375906226709124, "grad_norm": 8.770346641540527, "learning_rate": 9.529468136080499e-05, "loss": 0.891, "step": 4188 }, { "epoch": 0.28382681753506334, "grad_norm": 7.100319862365723, "learning_rate": 9.529331234170717e-05, "loss": 1.0302, "step": 4189 }, { "epoch": 0.2838945728030354, "grad_norm": 7.376253128051758, "learning_rate": 9.529194332260935e-05, "loss": 0.7906, "step": 4190 }, { "epoch": 0.2839623280710075, "grad_norm": 10.125496864318848, "learning_rate": 9.529057430351154e-05, "loss": 0.9829, "step": 4191 }, { "epoch": 0.2840300833389796, "grad_norm": 7.877635955810547, "learning_rate": 9.528920528441372e-05, "loss": 0.8803, "step": 4192 }, { "epoch": 0.2840978386069517, "grad_norm": 8.096887588500977, "learning_rate": 9.52878362653159e-05, "loss": 0.9342, "step": 4193 }, { "epoch": 0.2841655938749238, "grad_norm": 7.934850215911865, "learning_rate": 9.52864672462181e-05, "loss": 1.1699, "step": 4194 }, { "epoch": 0.2842333491428959, "grad_norm": 8.237794876098633, "learning_rate": 9.528509822712028e-05, "loss": 1.1718, "step": 4195 }, { "epoch": 0.2843011044108679, "grad_norm": 7.528624057769775, "learning_rate": 9.528372920802246e-05, "loss": 0.8229, "step": 4196 }, { "epoch": 0.28436885967884, "grad_norm": 7.210242748260498, "learning_rate": 9.528236018892464e-05, "loss": 0.8875, "step": 4197 }, { "epoch": 0.2844366149468121, "grad_norm": 7.630309581756592, "learning_rate": 9.528099116982683e-05, "loss": 0.8811, "step": 4198 }, { "epoch": 0.2845043702147842, "grad_norm": 11.624275207519531, "learning_rate": 9.527962215072901e-05, "loss": 1.0702, "step": 4199 }, { "epoch": 0.2845721254827563, "grad_norm": 7.52834415435791, "learning_rate": 9.52782531316312e-05, "loss": 0.9066, "step": 4200 }, { "epoch": 0.2846398807507284, "grad_norm": 8.418506622314453, "learning_rate": 9.527688411253337e-05, "loss": 0.8147, "step": 4201 }, { "epoch": 0.28470763601870047, "grad_norm": 9.67719554901123, "learning_rate": 9.527551509343557e-05, "loss": 0.976, "step": 4202 }, { "epoch": 0.28477539128667256, "grad_norm": 7.107409954071045, "learning_rate": 9.527414607433775e-05, "loss": 0.695, "step": 4203 }, { "epoch": 0.2848431465546446, "grad_norm": 7.840113162994385, "learning_rate": 9.527277705523993e-05, "loss": 0.9629, "step": 4204 }, { "epoch": 0.2849109018226167, "grad_norm": 8.170151710510254, "learning_rate": 9.527140803614211e-05, "loss": 0.8715, "step": 4205 }, { "epoch": 0.2849786570905888, "grad_norm": 7.882331848144531, "learning_rate": 9.527003901704429e-05, "loss": 1.0364, "step": 4206 }, { "epoch": 0.2850464123585609, "grad_norm": 10.077646255493164, "learning_rate": 9.526866999794648e-05, "loss": 1.2749, "step": 4207 }, { "epoch": 0.28511416762653297, "grad_norm": 8.749690055847168, "learning_rate": 9.526730097884866e-05, "loss": 0.9815, "step": 4208 }, { "epoch": 0.28518192289450506, "grad_norm": 6.793465614318848, "learning_rate": 9.526593195975084e-05, "loss": 1.0165, "step": 4209 }, { "epoch": 0.28524967816247715, "grad_norm": 7.186471939086914, "learning_rate": 9.526456294065302e-05, "loss": 0.911, "step": 4210 }, { "epoch": 0.28531743343044924, "grad_norm": 8.116944313049316, "learning_rate": 9.52631939215552e-05, "loss": 1.051, "step": 4211 }, { "epoch": 0.2853851886984213, "grad_norm": 5.9162750244140625, "learning_rate": 9.52618249024574e-05, "loss": 0.8165, "step": 4212 }, { "epoch": 0.28545294396639337, "grad_norm": 7.20265531539917, "learning_rate": 9.526045588335958e-05, "loss": 1.0693, "step": 4213 }, { "epoch": 0.28552069923436546, "grad_norm": 7.380153179168701, "learning_rate": 9.525908686426176e-05, "loss": 0.9611, "step": 4214 }, { "epoch": 0.28558845450233755, "grad_norm": 7.211367607116699, "learning_rate": 9.525771784516394e-05, "loss": 0.7595, "step": 4215 }, { "epoch": 0.28565620977030964, "grad_norm": 6.8030104637146, "learning_rate": 9.525634882606613e-05, "loss": 0.8281, "step": 4216 }, { "epoch": 0.28572396503828174, "grad_norm": 7.774519443511963, "learning_rate": 9.525497980696831e-05, "loss": 0.963, "step": 4217 }, { "epoch": 0.2857917203062538, "grad_norm": 8.210673332214355, "learning_rate": 9.52536107878705e-05, "loss": 1.3076, "step": 4218 }, { "epoch": 0.2858594755742259, "grad_norm": 7.3878583908081055, "learning_rate": 9.525224176877267e-05, "loss": 0.9546, "step": 4219 }, { "epoch": 0.28592723084219795, "grad_norm": 10.107827186584473, "learning_rate": 9.525087274967485e-05, "loss": 1.3039, "step": 4220 }, { "epoch": 0.28599498611017005, "grad_norm": 7.974700450897217, "learning_rate": 9.524950373057705e-05, "loss": 0.9651, "step": 4221 }, { "epoch": 0.28606274137814214, "grad_norm": 8.606413841247559, "learning_rate": 9.524813471147923e-05, "loss": 0.9459, "step": 4222 }, { "epoch": 0.28613049664611423, "grad_norm": 6.7952799797058105, "learning_rate": 9.524676569238141e-05, "loss": 0.8818, "step": 4223 }, { "epoch": 0.2861982519140863, "grad_norm": 7.2026214599609375, "learning_rate": 9.524539667328359e-05, "loss": 0.8578, "step": 4224 }, { "epoch": 0.2862660071820584, "grad_norm": 9.135619163513184, "learning_rate": 9.524402765418578e-05, "loss": 1.1594, "step": 4225 }, { "epoch": 0.2863337624500305, "grad_norm": 7.704957962036133, "learning_rate": 9.524265863508796e-05, "loss": 0.9715, "step": 4226 }, { "epoch": 0.2864015177180026, "grad_norm": 6.570467948913574, "learning_rate": 9.524128961599014e-05, "loss": 0.8916, "step": 4227 }, { "epoch": 0.28646927298597463, "grad_norm": 8.359966278076172, "learning_rate": 9.523992059689232e-05, "loss": 1.0021, "step": 4228 }, { "epoch": 0.2865370282539467, "grad_norm": 7.011820316314697, "learning_rate": 9.52385515777945e-05, "loss": 0.8718, "step": 4229 }, { "epoch": 0.2866047835219188, "grad_norm": 6.853650093078613, "learning_rate": 9.52371825586967e-05, "loss": 0.8041, "step": 4230 }, { "epoch": 0.2866725387898909, "grad_norm": 6.95853328704834, "learning_rate": 9.523581353959888e-05, "loss": 0.8763, "step": 4231 }, { "epoch": 0.286740294057863, "grad_norm": 9.217144012451172, "learning_rate": 9.523444452050106e-05, "loss": 1.046, "step": 4232 }, { "epoch": 0.2868080493258351, "grad_norm": 8.338934898376465, "learning_rate": 9.523307550140324e-05, "loss": 0.9205, "step": 4233 }, { "epoch": 0.2868758045938072, "grad_norm": 9.220541954040527, "learning_rate": 9.523170648230543e-05, "loss": 1.1468, "step": 4234 }, { "epoch": 0.2869435598617793, "grad_norm": 7.641387462615967, "learning_rate": 9.523033746320761e-05, "loss": 1.118, "step": 4235 }, { "epoch": 0.28701131512975137, "grad_norm": 7.52994441986084, "learning_rate": 9.52289684441098e-05, "loss": 0.851, "step": 4236 }, { "epoch": 0.2870790703977234, "grad_norm": 8.712708473205566, "learning_rate": 9.522759942501199e-05, "loss": 1.1741, "step": 4237 }, { "epoch": 0.2871468256656955, "grad_norm": 7.8549723625183105, "learning_rate": 9.522623040591417e-05, "loss": 0.9744, "step": 4238 }, { "epoch": 0.2872145809336676, "grad_norm": 6.8177642822265625, "learning_rate": 9.522486138681635e-05, "loss": 1.0432, "step": 4239 }, { "epoch": 0.2872823362016397, "grad_norm": 10.273691177368164, "learning_rate": 9.522349236771854e-05, "loss": 1.0777, "step": 4240 }, { "epoch": 0.28735009146961177, "grad_norm": 8.261405944824219, "learning_rate": 9.522212334862072e-05, "loss": 0.9415, "step": 4241 }, { "epoch": 0.28741784673758386, "grad_norm": 10.067824363708496, "learning_rate": 9.52207543295229e-05, "loss": 1.1091, "step": 4242 }, { "epoch": 0.28748560200555595, "grad_norm": 9.639914512634277, "learning_rate": 9.521938531042508e-05, "loss": 0.9534, "step": 4243 }, { "epoch": 0.28755335727352804, "grad_norm": 6.993269920349121, "learning_rate": 9.521801629132728e-05, "loss": 1.1193, "step": 4244 }, { "epoch": 0.2876211125415001, "grad_norm": 7.779829502105713, "learning_rate": 9.521664727222946e-05, "loss": 0.8744, "step": 4245 }, { "epoch": 0.28768886780947217, "grad_norm": 7.070007801055908, "learning_rate": 9.521527825313164e-05, "loss": 0.8609, "step": 4246 }, { "epoch": 0.28775662307744426, "grad_norm": 6.091519355773926, "learning_rate": 9.521390923403382e-05, "loss": 0.8018, "step": 4247 }, { "epoch": 0.28782437834541635, "grad_norm": 6.76396369934082, "learning_rate": 9.521254021493601e-05, "loss": 0.8509, "step": 4248 }, { "epoch": 0.28789213361338845, "grad_norm": 8.478080749511719, "learning_rate": 9.521117119583819e-05, "loss": 1.0355, "step": 4249 }, { "epoch": 0.28795988888136054, "grad_norm": 7.830933094024658, "learning_rate": 9.520980217674037e-05, "loss": 0.8699, "step": 4250 }, { "epoch": 0.28802764414933263, "grad_norm": 8.354218482971191, "learning_rate": 9.520843315764255e-05, "loss": 0.9228, "step": 4251 }, { "epoch": 0.2880953994173047, "grad_norm": 8.529300689697266, "learning_rate": 9.520706413854473e-05, "loss": 1.1215, "step": 4252 }, { "epoch": 0.28816315468527676, "grad_norm": 6.831529140472412, "learning_rate": 9.520569511944693e-05, "loss": 1.0665, "step": 4253 }, { "epoch": 0.28823090995324885, "grad_norm": 7.824577808380127, "learning_rate": 9.520432610034911e-05, "loss": 0.8864, "step": 4254 }, { "epoch": 0.28829866522122094, "grad_norm": 7.795472621917725, "learning_rate": 9.520295708125129e-05, "loss": 1.0775, "step": 4255 }, { "epoch": 0.28836642048919303, "grad_norm": 7.210735321044922, "learning_rate": 9.520158806215347e-05, "loss": 1.0244, "step": 4256 }, { "epoch": 0.2884341757571651, "grad_norm": 7.224759101867676, "learning_rate": 9.520021904305566e-05, "loss": 1.0527, "step": 4257 }, { "epoch": 0.2885019310251372, "grad_norm": 7.608676910400391, "learning_rate": 9.519885002395784e-05, "loss": 0.7978, "step": 4258 }, { "epoch": 0.2885696862931093, "grad_norm": 7.426436424255371, "learning_rate": 9.519748100486002e-05, "loss": 0.859, "step": 4259 }, { "epoch": 0.2886374415610814, "grad_norm": 7.585330963134766, "learning_rate": 9.51961119857622e-05, "loss": 1.1091, "step": 4260 }, { "epoch": 0.28870519682905343, "grad_norm": 6.930294990539551, "learning_rate": 9.519474296666438e-05, "loss": 0.8862, "step": 4261 }, { "epoch": 0.2887729520970255, "grad_norm": 8.092456817626953, "learning_rate": 9.519337394756658e-05, "loss": 1.051, "step": 4262 }, { "epoch": 0.2888407073649976, "grad_norm": 7.897385597229004, "learning_rate": 9.519200492846876e-05, "loss": 1.0589, "step": 4263 }, { "epoch": 0.2889084626329697, "grad_norm": 7.344932556152344, "learning_rate": 9.519063590937094e-05, "loss": 1.0319, "step": 4264 }, { "epoch": 0.2889762179009418, "grad_norm": 8.676694869995117, "learning_rate": 9.518926689027312e-05, "loss": 0.9119, "step": 4265 }, { "epoch": 0.2890439731689139, "grad_norm": 7.291116714477539, "learning_rate": 9.51878978711753e-05, "loss": 0.8249, "step": 4266 }, { "epoch": 0.289111728436886, "grad_norm": 7.467698574066162, "learning_rate": 9.51865288520775e-05, "loss": 0.9807, "step": 4267 }, { "epoch": 0.2891794837048581, "grad_norm": 7.744437217712402, "learning_rate": 9.518515983297967e-05, "loss": 0.8792, "step": 4268 }, { "epoch": 0.2892472389728301, "grad_norm": 6.8492560386657715, "learning_rate": 9.518379081388185e-05, "loss": 0.9216, "step": 4269 }, { "epoch": 0.2893149942408022, "grad_norm": 7.247424602508545, "learning_rate": 9.518242179478403e-05, "loss": 1.0865, "step": 4270 }, { "epoch": 0.2893827495087743, "grad_norm": 8.125252723693848, "learning_rate": 9.518105277568623e-05, "loss": 1.0298, "step": 4271 }, { "epoch": 0.2894505047767464, "grad_norm": 6.578275680541992, "learning_rate": 9.517968375658841e-05, "loss": 0.9353, "step": 4272 }, { "epoch": 0.2895182600447185, "grad_norm": 8.099616050720215, "learning_rate": 9.517831473749059e-05, "loss": 1.038, "step": 4273 }, { "epoch": 0.28958601531269057, "grad_norm": 7.529900074005127, "learning_rate": 9.517694571839277e-05, "loss": 0.9753, "step": 4274 }, { "epoch": 0.28965377058066266, "grad_norm": 7.521812915802002, "learning_rate": 9.517557669929495e-05, "loss": 0.972, "step": 4275 }, { "epoch": 0.28972152584863475, "grad_norm": 7.780272483825684, "learning_rate": 9.517420768019714e-05, "loss": 0.9927, "step": 4276 }, { "epoch": 0.2897892811166068, "grad_norm": 7.982085704803467, "learning_rate": 9.517283866109932e-05, "loss": 0.9462, "step": 4277 }, { "epoch": 0.2898570363845789, "grad_norm": 7.325984477996826, "learning_rate": 9.51714696420015e-05, "loss": 1.0814, "step": 4278 }, { "epoch": 0.289924791652551, "grad_norm": 7.833248138427734, "learning_rate": 9.517010062290368e-05, "loss": 1.1736, "step": 4279 }, { "epoch": 0.28999254692052306, "grad_norm": 6.983424186706543, "learning_rate": 9.516873160380588e-05, "loss": 0.9662, "step": 4280 }, { "epoch": 0.29006030218849516, "grad_norm": 7.954293251037598, "learning_rate": 9.516736258470806e-05, "loss": 0.8449, "step": 4281 }, { "epoch": 0.29012805745646725, "grad_norm": 7.47749137878418, "learning_rate": 9.516599356561024e-05, "loss": 0.7869, "step": 4282 }, { "epoch": 0.29019581272443934, "grad_norm": 8.70733642578125, "learning_rate": 9.516462454651243e-05, "loss": 0.915, "step": 4283 }, { "epoch": 0.29026356799241143, "grad_norm": 7.003527641296387, "learning_rate": 9.516325552741461e-05, "loss": 0.8543, "step": 4284 }, { "epoch": 0.2903313232603835, "grad_norm": 8.230785369873047, "learning_rate": 9.51618865083168e-05, "loss": 1.0022, "step": 4285 }, { "epoch": 0.29039907852835556, "grad_norm": 6.20754337310791, "learning_rate": 9.516051748921899e-05, "loss": 0.8967, "step": 4286 }, { "epoch": 0.29046683379632765, "grad_norm": 7.0158538818359375, "learning_rate": 9.515914847012117e-05, "loss": 0.9162, "step": 4287 }, { "epoch": 0.29053458906429974, "grad_norm": 6.215134143829346, "learning_rate": 9.515777945102335e-05, "loss": 0.7362, "step": 4288 }, { "epoch": 0.29060234433227183, "grad_norm": 7.366815090179443, "learning_rate": 9.515641043192553e-05, "loss": 1.0174, "step": 4289 }, { "epoch": 0.2906700996002439, "grad_norm": 7.514114856719971, "learning_rate": 9.515504141282772e-05, "loss": 0.9979, "step": 4290 }, { "epoch": 0.290737854868216, "grad_norm": 6.899611949920654, "learning_rate": 9.51536723937299e-05, "loss": 0.6909, "step": 4291 }, { "epoch": 0.2908056101361881, "grad_norm": 7.254974842071533, "learning_rate": 9.515230337463208e-05, "loss": 1.1744, "step": 4292 }, { "epoch": 0.2908733654041602, "grad_norm": 7.3820719718933105, "learning_rate": 9.515093435553426e-05, "loss": 1.0514, "step": 4293 }, { "epoch": 0.29094112067213224, "grad_norm": 7.556882858276367, "learning_rate": 9.514956533643646e-05, "loss": 1.0515, "step": 4294 }, { "epoch": 0.29100887594010433, "grad_norm": 7.479793548583984, "learning_rate": 9.514819631733864e-05, "loss": 0.9634, "step": 4295 }, { "epoch": 0.2910766312080764, "grad_norm": 6.750072956085205, "learning_rate": 9.514682729824082e-05, "loss": 0.8536, "step": 4296 }, { "epoch": 0.2911443864760485, "grad_norm": 6.5008931159973145, "learning_rate": 9.5145458279143e-05, "loss": 0.8414, "step": 4297 }, { "epoch": 0.2912121417440206, "grad_norm": 5.633193016052246, "learning_rate": 9.514408926004518e-05, "loss": 0.9628, "step": 4298 }, { "epoch": 0.2912798970119927, "grad_norm": 7.052159309387207, "learning_rate": 9.514272024094737e-05, "loss": 0.9612, "step": 4299 }, { "epoch": 0.2913476522799648, "grad_norm": 6.944911003112793, "learning_rate": 9.514135122184955e-05, "loss": 0.8237, "step": 4300 }, { "epoch": 0.2914154075479369, "grad_norm": 6.553284645080566, "learning_rate": 9.513998220275173e-05, "loss": 1.103, "step": 4301 }, { "epoch": 0.2914831628159089, "grad_norm": 6.414531707763672, "learning_rate": 9.513861318365391e-05, "loss": 1.163, "step": 4302 }, { "epoch": 0.291550918083881, "grad_norm": 9.551664352416992, "learning_rate": 9.513724416455611e-05, "loss": 1.0118, "step": 4303 }, { "epoch": 0.2916186733518531, "grad_norm": 8.332045555114746, "learning_rate": 9.513587514545829e-05, "loss": 1.0938, "step": 4304 }, { "epoch": 0.2916864286198252, "grad_norm": 7.512511253356934, "learning_rate": 9.513450612636047e-05, "loss": 0.9499, "step": 4305 }, { "epoch": 0.2917541838877973, "grad_norm": 7.233335494995117, "learning_rate": 9.513313710726265e-05, "loss": 0.9298, "step": 4306 }, { "epoch": 0.29182193915576937, "grad_norm": 6.711233615875244, "learning_rate": 9.513176808816483e-05, "loss": 0.8409, "step": 4307 }, { "epoch": 0.29188969442374146, "grad_norm": 6.433267593383789, "learning_rate": 9.513039906906702e-05, "loss": 0.7654, "step": 4308 }, { "epoch": 0.29195744969171356, "grad_norm": 7.049487113952637, "learning_rate": 9.51290300499692e-05, "loss": 0.8363, "step": 4309 }, { "epoch": 0.2920252049596856, "grad_norm": 7.030927658081055, "learning_rate": 9.512766103087138e-05, "loss": 0.8536, "step": 4310 }, { "epoch": 0.2920929602276577, "grad_norm": 7.723023414611816, "learning_rate": 9.512629201177356e-05, "loss": 0.8738, "step": 4311 }, { "epoch": 0.2921607154956298, "grad_norm": 6.293034553527832, "learning_rate": 9.512492299267576e-05, "loss": 0.8247, "step": 4312 }, { "epoch": 0.29222847076360187, "grad_norm": 7.551876544952393, "learning_rate": 9.512355397357794e-05, "loss": 0.9144, "step": 4313 }, { "epoch": 0.29229622603157396, "grad_norm": 7.78217077255249, "learning_rate": 9.512218495448012e-05, "loss": 0.911, "step": 4314 }, { "epoch": 0.29236398129954605, "grad_norm": 6.316245079040527, "learning_rate": 9.51208159353823e-05, "loss": 0.7426, "step": 4315 }, { "epoch": 0.29243173656751814, "grad_norm": 6.730735778808594, "learning_rate": 9.511944691628448e-05, "loss": 0.8938, "step": 4316 }, { "epoch": 0.29249949183549023, "grad_norm": 8.128830909729004, "learning_rate": 9.511807789718667e-05, "loss": 1.129, "step": 4317 }, { "epoch": 0.29256724710346227, "grad_norm": 8.023150444030762, "learning_rate": 9.511670887808885e-05, "loss": 1.1377, "step": 4318 }, { "epoch": 0.29263500237143436, "grad_norm": 9.075041770935059, "learning_rate": 9.511533985899103e-05, "loss": 1.0358, "step": 4319 }, { "epoch": 0.29270275763940645, "grad_norm": 9.679409980773926, "learning_rate": 9.511397083989321e-05, "loss": 0.9415, "step": 4320 }, { "epoch": 0.29277051290737854, "grad_norm": 7.316888809204102, "learning_rate": 9.51126018207954e-05, "loss": 0.9422, "step": 4321 }, { "epoch": 0.29283826817535064, "grad_norm": 8.017754554748535, "learning_rate": 9.511123280169759e-05, "loss": 0.9685, "step": 4322 }, { "epoch": 0.2929060234433227, "grad_norm": 7.990202903747559, "learning_rate": 9.510986378259977e-05, "loss": 0.9028, "step": 4323 }, { "epoch": 0.2929737787112948, "grad_norm": 7.538297653198242, "learning_rate": 9.510849476350195e-05, "loss": 0.8146, "step": 4324 }, { "epoch": 0.2930415339792669, "grad_norm": 6.794483184814453, "learning_rate": 9.510712574440413e-05, "loss": 0.8859, "step": 4325 }, { "epoch": 0.29310928924723895, "grad_norm": 9.820608139038086, "learning_rate": 9.510575672530632e-05, "loss": 1.0486, "step": 4326 }, { "epoch": 0.29317704451521104, "grad_norm": 8.107736587524414, "learning_rate": 9.51043877062085e-05, "loss": 0.9978, "step": 4327 }, { "epoch": 0.29324479978318313, "grad_norm": 8.129117965698242, "learning_rate": 9.510301868711068e-05, "loss": 0.9966, "step": 4328 }, { "epoch": 0.2933125550511552, "grad_norm": 7.15314245223999, "learning_rate": 9.510164966801288e-05, "loss": 0.8078, "step": 4329 }, { "epoch": 0.2933803103191273, "grad_norm": 7.951611518859863, "learning_rate": 9.510028064891506e-05, "loss": 0.8566, "step": 4330 }, { "epoch": 0.2934480655870994, "grad_norm": 8.350584030151367, "learning_rate": 9.509891162981724e-05, "loss": 1.1757, "step": 4331 }, { "epoch": 0.2935158208550715, "grad_norm": 7.556168079376221, "learning_rate": 9.509754261071943e-05, "loss": 1.175, "step": 4332 }, { "epoch": 0.2935835761230436, "grad_norm": 8.102100372314453, "learning_rate": 9.509617359162161e-05, "loss": 0.8948, "step": 4333 }, { "epoch": 0.2936513313910156, "grad_norm": 6.762083530426025, "learning_rate": 9.509480457252379e-05, "loss": 0.9597, "step": 4334 }, { "epoch": 0.2937190866589877, "grad_norm": 7.2654948234558105, "learning_rate": 9.509343555342599e-05, "loss": 0.9352, "step": 4335 }, { "epoch": 0.2937868419269598, "grad_norm": 8.114389419555664, "learning_rate": 9.509206653432817e-05, "loss": 0.9758, "step": 4336 }, { "epoch": 0.2938545971949319, "grad_norm": 6.972601890563965, "learning_rate": 9.509069751523035e-05, "loss": 0.9322, "step": 4337 }, { "epoch": 0.293922352462904, "grad_norm": 7.261653423309326, "learning_rate": 9.508932849613253e-05, "loss": 0.8714, "step": 4338 }, { "epoch": 0.2939901077308761, "grad_norm": 8.368372917175293, "learning_rate": 9.508795947703471e-05, "loss": 1.1235, "step": 4339 }, { "epoch": 0.2940578629988482, "grad_norm": 8.295226097106934, "learning_rate": 9.50865904579369e-05, "loss": 1.0647, "step": 4340 }, { "epoch": 0.29412561826682027, "grad_norm": 8.720281600952148, "learning_rate": 9.508522143883908e-05, "loss": 1.0779, "step": 4341 }, { "epoch": 0.29419337353479236, "grad_norm": 6.721040725708008, "learning_rate": 9.508385241974126e-05, "loss": 1.0219, "step": 4342 }, { "epoch": 0.2942611288027644, "grad_norm": 6.298686504364014, "learning_rate": 9.508248340064344e-05, "loss": 0.8011, "step": 4343 }, { "epoch": 0.2943288840707365, "grad_norm": 6.915674686431885, "learning_rate": 9.508111438154562e-05, "loss": 0.9702, "step": 4344 }, { "epoch": 0.2943966393387086, "grad_norm": 6.907165050506592, "learning_rate": 9.507974536244782e-05, "loss": 0.861, "step": 4345 }, { "epoch": 0.29446439460668067, "grad_norm": 7.540262699127197, "learning_rate": 9.507837634335e-05, "loss": 0.9426, "step": 4346 }, { "epoch": 0.29453214987465276, "grad_norm": 7.145787715911865, "learning_rate": 9.507700732425218e-05, "loss": 0.911, "step": 4347 }, { "epoch": 0.29459990514262485, "grad_norm": 9.937151908874512, "learning_rate": 9.507563830515436e-05, "loss": 0.9825, "step": 4348 }, { "epoch": 0.29466766041059694, "grad_norm": 7.125603199005127, "learning_rate": 9.507426928605655e-05, "loss": 0.855, "step": 4349 }, { "epoch": 0.29473541567856903, "grad_norm": 7.014605522155762, "learning_rate": 9.507290026695873e-05, "loss": 0.9282, "step": 4350 }, { "epoch": 0.29480317094654107, "grad_norm": 8.753725051879883, "learning_rate": 9.507153124786091e-05, "loss": 1.1047, "step": 4351 }, { "epoch": 0.29487092621451316, "grad_norm": 7.921840190887451, "learning_rate": 9.50701622287631e-05, "loss": 0.7857, "step": 4352 }, { "epoch": 0.29493868148248525, "grad_norm": 6.762521266937256, "learning_rate": 9.506879320966527e-05, "loss": 0.8537, "step": 4353 }, { "epoch": 0.29500643675045735, "grad_norm": 8.441102981567383, "learning_rate": 9.506742419056747e-05, "loss": 1.1356, "step": 4354 }, { "epoch": 0.29507419201842944, "grad_norm": 6.748636245727539, "learning_rate": 9.506605517146965e-05, "loss": 1.0962, "step": 4355 }, { "epoch": 0.29514194728640153, "grad_norm": 8.168306350708008, "learning_rate": 9.506468615237183e-05, "loss": 0.9628, "step": 4356 }, { "epoch": 0.2952097025543736, "grad_norm": 6.545300483703613, "learning_rate": 9.506331713327401e-05, "loss": 1.0041, "step": 4357 }, { "epoch": 0.2952774578223457, "grad_norm": 6.958950996398926, "learning_rate": 9.50619481141762e-05, "loss": 0.9742, "step": 4358 }, { "epoch": 0.29534521309031775, "grad_norm": 6.817789554595947, "learning_rate": 9.506057909507838e-05, "loss": 0.8281, "step": 4359 }, { "epoch": 0.29541296835828984, "grad_norm": 8.255404472351074, "learning_rate": 9.505921007598056e-05, "loss": 0.9171, "step": 4360 }, { "epoch": 0.29548072362626193, "grad_norm": 7.552668571472168, "learning_rate": 9.505784105688274e-05, "loss": 0.8665, "step": 4361 }, { "epoch": 0.295548478894234, "grad_norm": 6.980686664581299, "learning_rate": 9.505647203778492e-05, "loss": 0.9461, "step": 4362 }, { "epoch": 0.2956162341622061, "grad_norm": 6.765429973602295, "learning_rate": 9.505510301868712e-05, "loss": 0.7946, "step": 4363 }, { "epoch": 0.2956839894301782, "grad_norm": 8.403508186340332, "learning_rate": 9.50537339995893e-05, "loss": 0.9015, "step": 4364 }, { "epoch": 0.2957517446981503, "grad_norm": 7.822012901306152, "learning_rate": 9.505236498049148e-05, "loss": 1.0313, "step": 4365 }, { "epoch": 0.2958194999661224, "grad_norm": 9.843314170837402, "learning_rate": 9.505099596139366e-05, "loss": 1.11, "step": 4366 }, { "epoch": 0.2958872552340944, "grad_norm": 8.155379295349121, "learning_rate": 9.504962694229584e-05, "loss": 1.1565, "step": 4367 }, { "epoch": 0.2959550105020665, "grad_norm": 8.446005821228027, "learning_rate": 9.504825792319803e-05, "loss": 1.0588, "step": 4368 }, { "epoch": 0.2960227657700386, "grad_norm": 7.244038105010986, "learning_rate": 9.504688890410021e-05, "loss": 0.8648, "step": 4369 }, { "epoch": 0.2960905210380107, "grad_norm": 8.324630737304688, "learning_rate": 9.50455198850024e-05, "loss": 0.9862, "step": 4370 }, { "epoch": 0.2961582763059828, "grad_norm": 7.308585166931152, "learning_rate": 9.504415086590457e-05, "loss": 1.082, "step": 4371 }, { "epoch": 0.2962260315739549, "grad_norm": 8.17287826538086, "learning_rate": 9.504278184680677e-05, "loss": 1.0359, "step": 4372 }, { "epoch": 0.296293786841927, "grad_norm": 6.8774943351745605, "learning_rate": 9.504141282770895e-05, "loss": 0.904, "step": 4373 }, { "epoch": 0.29636154210989907, "grad_norm": 8.641596794128418, "learning_rate": 9.504004380861113e-05, "loss": 0.9822, "step": 4374 }, { "epoch": 0.2964292973778711, "grad_norm": 7.289584159851074, "learning_rate": 9.503867478951331e-05, "loss": 1.0274, "step": 4375 }, { "epoch": 0.2964970526458432, "grad_norm": 9.820521354675293, "learning_rate": 9.50373057704155e-05, "loss": 1.0895, "step": 4376 }, { "epoch": 0.2965648079138153, "grad_norm": 8.6587553024292, "learning_rate": 9.503593675131768e-05, "loss": 0.7513, "step": 4377 }, { "epoch": 0.2966325631817874, "grad_norm": 7.444339752197266, "learning_rate": 9.503456773221986e-05, "loss": 1.0892, "step": 4378 }, { "epoch": 0.29670031844975947, "grad_norm": 6.4494948387146, "learning_rate": 9.503319871312206e-05, "loss": 0.9465, "step": 4379 }, { "epoch": 0.29676807371773156, "grad_norm": 7.744002819061279, "learning_rate": 9.503182969402424e-05, "loss": 1.0613, "step": 4380 }, { "epoch": 0.29683582898570365, "grad_norm": 8.152511596679688, "learning_rate": 9.503046067492642e-05, "loss": 1.0115, "step": 4381 }, { "epoch": 0.29690358425367575, "grad_norm": 6.076781749725342, "learning_rate": 9.502909165582861e-05, "loss": 0.8255, "step": 4382 }, { "epoch": 0.2969713395216478, "grad_norm": 6.787397861480713, "learning_rate": 9.502772263673079e-05, "loss": 1.1132, "step": 4383 }, { "epoch": 0.2970390947896199, "grad_norm": 8.592107772827148, "learning_rate": 9.502635361763297e-05, "loss": 0.8715, "step": 4384 }, { "epoch": 0.29710685005759196, "grad_norm": 7.620471954345703, "learning_rate": 9.502498459853515e-05, "loss": 0.9521, "step": 4385 }, { "epoch": 0.29717460532556406, "grad_norm": 8.026017189025879, "learning_rate": 9.502361557943735e-05, "loss": 0.8575, "step": 4386 }, { "epoch": 0.29724236059353615, "grad_norm": 8.82768726348877, "learning_rate": 9.502224656033953e-05, "loss": 1.0642, "step": 4387 }, { "epoch": 0.29731011586150824, "grad_norm": 7.179310321807861, "learning_rate": 9.502087754124171e-05, "loss": 0.9161, "step": 4388 }, { "epoch": 0.29737787112948033, "grad_norm": 7.4363861083984375, "learning_rate": 9.501950852214389e-05, "loss": 1.0825, "step": 4389 }, { "epoch": 0.2974456263974524, "grad_norm": 6.218450546264648, "learning_rate": 9.501813950304608e-05, "loss": 0.8461, "step": 4390 }, { "epoch": 0.2975133816654245, "grad_norm": 6.540037631988525, "learning_rate": 9.501677048394826e-05, "loss": 0.9099, "step": 4391 }, { "epoch": 0.29758113693339655, "grad_norm": 9.617361068725586, "learning_rate": 9.501540146485044e-05, "loss": 0.9205, "step": 4392 }, { "epoch": 0.29764889220136864, "grad_norm": 8.519254684448242, "learning_rate": 9.501403244575262e-05, "loss": 0.9774, "step": 4393 }, { "epoch": 0.29771664746934073, "grad_norm": 8.101237297058105, "learning_rate": 9.50126634266548e-05, "loss": 0.9019, "step": 4394 }, { "epoch": 0.2977844027373128, "grad_norm": 6.703762531280518, "learning_rate": 9.5011294407557e-05, "loss": 0.9486, "step": 4395 }, { "epoch": 0.2978521580052849, "grad_norm": 8.651348114013672, "learning_rate": 9.500992538845918e-05, "loss": 0.9189, "step": 4396 }, { "epoch": 0.297919913273257, "grad_norm": 7.783169269561768, "learning_rate": 9.500855636936136e-05, "loss": 0.933, "step": 4397 }, { "epoch": 0.2979876685412291, "grad_norm": 8.011998176574707, "learning_rate": 9.500718735026354e-05, "loss": 1.0642, "step": 4398 }, { "epoch": 0.2980554238092012, "grad_norm": 10.782837867736816, "learning_rate": 9.500581833116572e-05, "loss": 1.1406, "step": 4399 }, { "epoch": 0.29812317907717323, "grad_norm": 7.699460029602051, "learning_rate": 9.500444931206791e-05, "loss": 0.9627, "step": 4400 }, { "epoch": 0.2981909343451453, "grad_norm": 8.144030570983887, "learning_rate": 9.500308029297009e-05, "loss": 0.8278, "step": 4401 }, { "epoch": 0.2982586896131174, "grad_norm": 7.882628917694092, "learning_rate": 9.500171127387227e-05, "loss": 0.7924, "step": 4402 }, { "epoch": 0.2983264448810895, "grad_norm": 6.887333393096924, "learning_rate": 9.500034225477445e-05, "loss": 0.8385, "step": 4403 }, { "epoch": 0.2983942001490616, "grad_norm": 8.446837425231934, "learning_rate": 9.499897323567665e-05, "loss": 1.0352, "step": 4404 }, { "epoch": 0.2984619554170337, "grad_norm": 8.389452934265137, "learning_rate": 9.499760421657883e-05, "loss": 0.8053, "step": 4405 }, { "epoch": 0.2985297106850058, "grad_norm": 7.09201717376709, "learning_rate": 9.499623519748101e-05, "loss": 0.999, "step": 4406 }, { "epoch": 0.29859746595297787, "grad_norm": 9.670032501220703, "learning_rate": 9.499486617838319e-05, "loss": 0.8558, "step": 4407 }, { "epoch": 0.2986652212209499, "grad_norm": 9.050987243652344, "learning_rate": 9.499349715928537e-05, "loss": 1.0356, "step": 4408 }, { "epoch": 0.298732976488922, "grad_norm": 6.220524787902832, "learning_rate": 9.499212814018756e-05, "loss": 0.8278, "step": 4409 }, { "epoch": 0.2988007317568941, "grad_norm": 7.464169979095459, "learning_rate": 9.499075912108974e-05, "loss": 0.963, "step": 4410 }, { "epoch": 0.2988684870248662, "grad_norm": 8.479538917541504, "learning_rate": 9.498939010199192e-05, "loss": 1.1108, "step": 4411 }, { "epoch": 0.2989362422928383, "grad_norm": 8.23353385925293, "learning_rate": 9.49880210828941e-05, "loss": 1.0156, "step": 4412 }, { "epoch": 0.29900399756081036, "grad_norm": 7.2161335945129395, "learning_rate": 9.49866520637963e-05, "loss": 0.9964, "step": 4413 }, { "epoch": 0.29907175282878246, "grad_norm": 7.893895149230957, "learning_rate": 9.498528304469848e-05, "loss": 0.9353, "step": 4414 }, { "epoch": 0.29913950809675455, "grad_norm": 8.448126792907715, "learning_rate": 9.498391402560066e-05, "loss": 0.9488, "step": 4415 }, { "epoch": 0.2992072633647266, "grad_norm": 8.160601615905762, "learning_rate": 9.498254500650284e-05, "loss": 0.8934, "step": 4416 }, { "epoch": 0.2992750186326987, "grad_norm": 7.6815266609191895, "learning_rate": 9.498117598740502e-05, "loss": 0.8938, "step": 4417 }, { "epoch": 0.29934277390067077, "grad_norm": 6.795403480529785, "learning_rate": 9.497980696830721e-05, "loss": 0.7074, "step": 4418 }, { "epoch": 0.29941052916864286, "grad_norm": 7.904134750366211, "learning_rate": 9.497843794920939e-05, "loss": 0.8747, "step": 4419 }, { "epoch": 0.29947828443661495, "grad_norm": 7.776915073394775, "learning_rate": 9.497706893011157e-05, "loss": 1.1141, "step": 4420 }, { "epoch": 0.29954603970458704, "grad_norm": 7.425099849700928, "learning_rate": 9.497569991101375e-05, "loss": 0.8826, "step": 4421 }, { "epoch": 0.29961379497255913, "grad_norm": 10.530119895935059, "learning_rate": 9.497433089191595e-05, "loss": 1.0893, "step": 4422 }, { "epoch": 0.2996815502405312, "grad_norm": 7.118913650512695, "learning_rate": 9.497296187281813e-05, "loss": 0.9179, "step": 4423 }, { "epoch": 0.29974930550850326, "grad_norm": 6.6407060623168945, "learning_rate": 9.497159285372031e-05, "loss": 0.8763, "step": 4424 }, { "epoch": 0.29981706077647535, "grad_norm": 6.975287437438965, "learning_rate": 9.49702238346225e-05, "loss": 0.9192, "step": 4425 }, { "epoch": 0.29988481604444744, "grad_norm": 11.773819923400879, "learning_rate": 9.496885481552468e-05, "loss": 1.3173, "step": 4426 }, { "epoch": 0.29995257131241954, "grad_norm": 13.157849311828613, "learning_rate": 9.496748579642686e-05, "loss": 0.8773, "step": 4427 }, { "epoch": 0.3000203265803916, "grad_norm": 7.372555732727051, "learning_rate": 9.496611677732906e-05, "loss": 1.0028, "step": 4428 }, { "epoch": 0.3000880818483637, "grad_norm": 8.804738998413086, "learning_rate": 9.496474775823124e-05, "loss": 1.0684, "step": 4429 }, { "epoch": 0.3001558371163358, "grad_norm": 7.723897933959961, "learning_rate": 9.496337873913342e-05, "loss": 0.9113, "step": 4430 }, { "epoch": 0.3002235923843079, "grad_norm": 10.261975288391113, "learning_rate": 9.49620097200356e-05, "loss": 1.242, "step": 4431 }, { "epoch": 0.30029134765227994, "grad_norm": 7.396425247192383, "learning_rate": 9.496064070093779e-05, "loss": 1.1045, "step": 4432 }, { "epoch": 0.30035910292025203, "grad_norm": 9.235404014587402, "learning_rate": 9.495927168183997e-05, "loss": 1.1058, "step": 4433 }, { "epoch": 0.3004268581882241, "grad_norm": 6.465355396270752, "learning_rate": 9.495790266274215e-05, "loss": 0.9163, "step": 4434 }, { "epoch": 0.3004946134561962, "grad_norm": 6.401477813720703, "learning_rate": 9.495653364364433e-05, "loss": 0.8224, "step": 4435 }, { "epoch": 0.3005623687241683, "grad_norm": 6.599462985992432, "learning_rate": 9.495516462454653e-05, "loss": 0.8104, "step": 4436 }, { "epoch": 0.3006301239921404, "grad_norm": 8.553694725036621, "learning_rate": 9.495379560544871e-05, "loss": 1.1467, "step": 4437 }, { "epoch": 0.3006978792601125, "grad_norm": 6.262206554412842, "learning_rate": 9.495242658635089e-05, "loss": 0.7903, "step": 4438 }, { "epoch": 0.3007656345280846, "grad_norm": 7.284942150115967, "learning_rate": 9.495105756725307e-05, "loss": 1.0585, "step": 4439 }, { "epoch": 0.3008333897960566, "grad_norm": 8.880256652832031, "learning_rate": 9.494968854815525e-05, "loss": 1.0553, "step": 4440 }, { "epoch": 0.3009011450640287, "grad_norm": 9.358749389648438, "learning_rate": 9.494831952905744e-05, "loss": 1.1874, "step": 4441 }, { "epoch": 0.3009689003320008, "grad_norm": 6.7088470458984375, "learning_rate": 9.494695050995962e-05, "loss": 0.8641, "step": 4442 }, { "epoch": 0.3010366555999729, "grad_norm": 9.420478820800781, "learning_rate": 9.49455814908618e-05, "loss": 0.8829, "step": 4443 }, { "epoch": 0.301104410867945, "grad_norm": 8.877934455871582, "learning_rate": 9.494421247176398e-05, "loss": 1.1206, "step": 4444 }, { "epoch": 0.3011721661359171, "grad_norm": 9.361932754516602, "learning_rate": 9.494284345266618e-05, "loss": 0.7993, "step": 4445 }, { "epoch": 0.30123992140388917, "grad_norm": 7.4104790687561035, "learning_rate": 9.494147443356836e-05, "loss": 1.0655, "step": 4446 }, { "epoch": 0.30130767667186126, "grad_norm": 7.0485992431640625, "learning_rate": 9.494010541447054e-05, "loss": 0.8878, "step": 4447 }, { "epoch": 0.30137543193983335, "grad_norm": 7.162468433380127, "learning_rate": 9.493873639537272e-05, "loss": 0.9467, "step": 4448 }, { "epoch": 0.3014431872078054, "grad_norm": 9.214662551879883, "learning_rate": 9.49373673762749e-05, "loss": 0.9288, "step": 4449 }, { "epoch": 0.3015109424757775, "grad_norm": 11.189544677734375, "learning_rate": 9.493599835717709e-05, "loss": 0.9933, "step": 4450 }, { "epoch": 0.30157869774374957, "grad_norm": 9.398331642150879, "learning_rate": 9.493462933807927e-05, "loss": 0.9248, "step": 4451 }, { "epoch": 0.30164645301172166, "grad_norm": 8.26975154876709, "learning_rate": 9.493326031898145e-05, "loss": 1.0308, "step": 4452 }, { "epoch": 0.30171420827969375, "grad_norm": 9.262918472290039, "learning_rate": 9.493189129988363e-05, "loss": 0.9367, "step": 4453 }, { "epoch": 0.30178196354766584, "grad_norm": 7.079284191131592, "learning_rate": 9.493052228078581e-05, "loss": 1.0415, "step": 4454 }, { "epoch": 0.30184971881563794, "grad_norm": 9.081875801086426, "learning_rate": 9.492915326168801e-05, "loss": 1.0783, "step": 4455 }, { "epoch": 0.30191747408361, "grad_norm": 8.476323127746582, "learning_rate": 9.492778424259019e-05, "loss": 0.9743, "step": 4456 }, { "epoch": 0.30198522935158206, "grad_norm": 7.064181327819824, "learning_rate": 9.492641522349237e-05, "loss": 0.7966, "step": 4457 }, { "epoch": 0.30205298461955415, "grad_norm": 7.837399959564209, "learning_rate": 9.492504620439455e-05, "loss": 0.8757, "step": 4458 }, { "epoch": 0.30212073988752625, "grad_norm": 7.2140936851501465, "learning_rate": 9.492367718529674e-05, "loss": 1.1334, "step": 4459 }, { "epoch": 0.30218849515549834, "grad_norm": 7.836158752441406, "learning_rate": 9.492230816619892e-05, "loss": 1.1179, "step": 4460 }, { "epoch": 0.30225625042347043, "grad_norm": 8.686471939086914, "learning_rate": 9.49209391471011e-05, "loss": 0.9826, "step": 4461 }, { "epoch": 0.3023240056914425, "grad_norm": 8.442061424255371, "learning_rate": 9.491957012800328e-05, "loss": 1.0392, "step": 4462 }, { "epoch": 0.3023917609594146, "grad_norm": 7.024100303649902, "learning_rate": 9.491820110890546e-05, "loss": 0.7857, "step": 4463 }, { "epoch": 0.3024595162273867, "grad_norm": 7.306611061096191, "learning_rate": 9.491683208980766e-05, "loss": 1.0154, "step": 4464 }, { "epoch": 0.30252727149535874, "grad_norm": 7.3064470291137695, "learning_rate": 9.491546307070984e-05, "loss": 1.0311, "step": 4465 }, { "epoch": 0.30259502676333083, "grad_norm": 7.264878749847412, "learning_rate": 9.491409405161202e-05, "loss": 0.932, "step": 4466 }, { "epoch": 0.3026627820313029, "grad_norm": 7.43487024307251, "learning_rate": 9.49127250325142e-05, "loss": 0.8024, "step": 4467 }, { "epoch": 0.302730537299275, "grad_norm": 7.710512161254883, "learning_rate": 9.491135601341639e-05, "loss": 0.7989, "step": 4468 }, { "epoch": 0.3027982925672471, "grad_norm": 7.263044834136963, "learning_rate": 9.490998699431857e-05, "loss": 0.9481, "step": 4469 }, { "epoch": 0.3028660478352192, "grad_norm": 7.284390449523926, "learning_rate": 9.490861797522075e-05, "loss": 0.8008, "step": 4470 }, { "epoch": 0.3029338031031913, "grad_norm": 8.490337371826172, "learning_rate": 9.490724895612295e-05, "loss": 1.1867, "step": 4471 }, { "epoch": 0.3030015583711634, "grad_norm": 6.555050849914551, "learning_rate": 9.490587993702513e-05, "loss": 0.8615, "step": 4472 }, { "epoch": 0.3030693136391354, "grad_norm": 6.497532367706299, "learning_rate": 9.490451091792731e-05, "loss": 0.9643, "step": 4473 }, { "epoch": 0.3031370689071075, "grad_norm": 6.556066036224365, "learning_rate": 9.49031418988295e-05, "loss": 0.9757, "step": 4474 }, { "epoch": 0.3032048241750796, "grad_norm": 6.649857044219971, "learning_rate": 9.490177287973168e-05, "loss": 1.0196, "step": 4475 }, { "epoch": 0.3032725794430517, "grad_norm": 8.725894927978516, "learning_rate": 9.490040386063386e-05, "loss": 1.1662, "step": 4476 }, { "epoch": 0.3033403347110238, "grad_norm": 6.520106792449951, "learning_rate": 9.489903484153604e-05, "loss": 0.8353, "step": 4477 }, { "epoch": 0.3034080899789959, "grad_norm": 6.688689708709717, "learning_rate": 9.489766582243824e-05, "loss": 0.8101, "step": 4478 }, { "epoch": 0.30347584524696797, "grad_norm": 7.817204475402832, "learning_rate": 9.489629680334042e-05, "loss": 0.9354, "step": 4479 }, { "epoch": 0.30354360051494006, "grad_norm": 7.536436080932617, "learning_rate": 9.48949277842426e-05, "loss": 1.2358, "step": 4480 }, { "epoch": 0.3036113557829121, "grad_norm": 8.635372161865234, "learning_rate": 9.489355876514478e-05, "loss": 0.8344, "step": 4481 }, { "epoch": 0.3036791110508842, "grad_norm": 7.561103343963623, "learning_rate": 9.489218974604697e-05, "loss": 0.864, "step": 4482 }, { "epoch": 0.3037468663188563, "grad_norm": 8.284873962402344, "learning_rate": 9.489082072694915e-05, "loss": 0.8756, "step": 4483 }, { "epoch": 0.30381462158682837, "grad_norm": 8.976608276367188, "learning_rate": 9.488945170785133e-05, "loss": 1.0523, "step": 4484 }, { "epoch": 0.30388237685480046, "grad_norm": 11.891230583190918, "learning_rate": 9.488808268875351e-05, "loss": 1.0367, "step": 4485 }, { "epoch": 0.30395013212277255, "grad_norm": 8.247873306274414, "learning_rate": 9.488671366965569e-05, "loss": 0.8173, "step": 4486 }, { "epoch": 0.30401788739074465, "grad_norm": 6.74050760269165, "learning_rate": 9.488534465055789e-05, "loss": 0.8192, "step": 4487 }, { "epoch": 0.30408564265871674, "grad_norm": 7.035127639770508, "learning_rate": 9.488397563146007e-05, "loss": 0.8416, "step": 4488 }, { "epoch": 0.3041533979266888, "grad_norm": 7.281885623931885, "learning_rate": 9.488260661236225e-05, "loss": 1.072, "step": 4489 }, { "epoch": 0.30422115319466086, "grad_norm": 6.236363887786865, "learning_rate": 9.488123759326443e-05, "loss": 0.8738, "step": 4490 }, { "epoch": 0.30428890846263296, "grad_norm": 8.20182991027832, "learning_rate": 9.487986857416662e-05, "loss": 1.1965, "step": 4491 }, { "epoch": 0.30435666373060505, "grad_norm": 9.837231636047363, "learning_rate": 9.48784995550688e-05, "loss": 0.9125, "step": 4492 }, { "epoch": 0.30442441899857714, "grad_norm": 8.810354232788086, "learning_rate": 9.487713053597098e-05, "loss": 1.0623, "step": 4493 }, { "epoch": 0.30449217426654923, "grad_norm": 6.360396385192871, "learning_rate": 9.487576151687316e-05, "loss": 0.8683, "step": 4494 }, { "epoch": 0.3045599295345213, "grad_norm": 8.158722877502441, "learning_rate": 9.487439249777534e-05, "loss": 0.9562, "step": 4495 }, { "epoch": 0.3046276848024934, "grad_norm": 12.142204284667969, "learning_rate": 9.487302347867754e-05, "loss": 0.8937, "step": 4496 }, { "epoch": 0.3046954400704655, "grad_norm": 8.125468254089355, "learning_rate": 9.487165445957972e-05, "loss": 1.0587, "step": 4497 }, { "epoch": 0.30476319533843754, "grad_norm": 7.607559680938721, "learning_rate": 9.48702854404819e-05, "loss": 0.8296, "step": 4498 }, { "epoch": 0.30483095060640963, "grad_norm": 6.430202484130859, "learning_rate": 9.486891642138408e-05, "loss": 1.067, "step": 4499 }, { "epoch": 0.3048987058743817, "grad_norm": 7.2482805252075195, "learning_rate": 9.486754740228626e-05, "loss": 1.0611, "step": 4500 }, { "epoch": 0.3049664611423538, "grad_norm": 9.431131362915039, "learning_rate": 9.486617838318845e-05, "loss": 0.9665, "step": 4501 }, { "epoch": 0.3050342164103259, "grad_norm": 8.319073677062988, "learning_rate": 9.486480936409063e-05, "loss": 0.983, "step": 4502 }, { "epoch": 0.305101971678298, "grad_norm": 6.784060478210449, "learning_rate": 9.486344034499281e-05, "loss": 0.8534, "step": 4503 }, { "epoch": 0.3051697269462701, "grad_norm": 7.521092414855957, "learning_rate": 9.486207132589499e-05, "loss": 0.9167, "step": 4504 }, { "epoch": 0.3052374822142422, "grad_norm": 7.7156982421875, "learning_rate": 9.486070230679719e-05, "loss": 0.7691, "step": 4505 }, { "epoch": 0.3053052374822142, "grad_norm": 6.727222919464111, "learning_rate": 9.485933328769937e-05, "loss": 0.7156, "step": 4506 }, { "epoch": 0.3053729927501863, "grad_norm": 7.416358470916748, "learning_rate": 9.485796426860155e-05, "loss": 1.0458, "step": 4507 }, { "epoch": 0.3054407480181584, "grad_norm": 8.601702690124512, "learning_rate": 9.485659524950373e-05, "loss": 1.1664, "step": 4508 }, { "epoch": 0.3055085032861305, "grad_norm": 6.742228984832764, "learning_rate": 9.485522623040591e-05, "loss": 0.9715, "step": 4509 }, { "epoch": 0.3055762585541026, "grad_norm": 6.782869815826416, "learning_rate": 9.48538572113081e-05, "loss": 0.7644, "step": 4510 }, { "epoch": 0.3056440138220747, "grad_norm": 5.753969669342041, "learning_rate": 9.485248819221028e-05, "loss": 0.8468, "step": 4511 }, { "epoch": 0.30571176909004677, "grad_norm": 10.150800704956055, "learning_rate": 9.485111917311246e-05, "loss": 1.1521, "step": 4512 }, { "epoch": 0.30577952435801886, "grad_norm": 7.476456642150879, "learning_rate": 9.484975015401464e-05, "loss": 1.0091, "step": 4513 }, { "epoch": 0.3058472796259909, "grad_norm": 8.955781936645508, "learning_rate": 9.484838113491684e-05, "loss": 1.1837, "step": 4514 }, { "epoch": 0.305915034893963, "grad_norm": 8.779487609863281, "learning_rate": 9.484701211581902e-05, "loss": 0.9747, "step": 4515 }, { "epoch": 0.3059827901619351, "grad_norm": 9.432082176208496, "learning_rate": 9.48456430967212e-05, "loss": 0.9298, "step": 4516 }, { "epoch": 0.3060505454299072, "grad_norm": 8.186026573181152, "learning_rate": 9.484427407762339e-05, "loss": 0.9026, "step": 4517 }, { "epoch": 0.30611830069787926, "grad_norm": 7.902477264404297, "learning_rate": 9.484290505852557e-05, "loss": 1.0116, "step": 4518 }, { "epoch": 0.30618605596585136, "grad_norm": 9.117185592651367, "learning_rate": 9.484153603942775e-05, "loss": 1.0867, "step": 4519 }, { "epoch": 0.30625381123382345, "grad_norm": 8.473755836486816, "learning_rate": 9.484016702032995e-05, "loss": 0.8742, "step": 4520 }, { "epoch": 0.30632156650179554, "grad_norm": 12.328755378723145, "learning_rate": 9.483879800123213e-05, "loss": 1.2981, "step": 4521 }, { "epoch": 0.3063893217697676, "grad_norm": 7.099585056304932, "learning_rate": 9.483742898213431e-05, "loss": 0.9685, "step": 4522 }, { "epoch": 0.30645707703773967, "grad_norm": 8.027973175048828, "learning_rate": 9.48360599630365e-05, "loss": 1.0194, "step": 4523 }, { "epoch": 0.30652483230571176, "grad_norm": 8.275866508483887, "learning_rate": 9.483469094393868e-05, "loss": 1.2424, "step": 4524 }, { "epoch": 0.30659258757368385, "grad_norm": 8.546550750732422, "learning_rate": 9.483332192484086e-05, "loss": 0.9284, "step": 4525 }, { "epoch": 0.30666034284165594, "grad_norm": 5.6628522872924805, "learning_rate": 9.483195290574304e-05, "loss": 0.882, "step": 4526 }, { "epoch": 0.30672809810962803, "grad_norm": 7.189582824707031, "learning_rate": 9.483058388664522e-05, "loss": 0.8087, "step": 4527 }, { "epoch": 0.3067958533776001, "grad_norm": 6.341355323791504, "learning_rate": 9.482921486754742e-05, "loss": 0.8759, "step": 4528 }, { "epoch": 0.3068636086455722, "grad_norm": 6.838935852050781, "learning_rate": 9.48278458484496e-05, "loss": 0.6662, "step": 4529 }, { "epoch": 0.30693136391354425, "grad_norm": 5.441661834716797, "learning_rate": 9.482647682935178e-05, "loss": 0.7008, "step": 4530 }, { "epoch": 0.30699911918151634, "grad_norm": 9.242436408996582, "learning_rate": 9.482510781025396e-05, "loss": 0.8675, "step": 4531 }, { "epoch": 0.30706687444948844, "grad_norm": 7.393209934234619, "learning_rate": 9.482373879115614e-05, "loss": 0.7829, "step": 4532 }, { "epoch": 0.30713462971746053, "grad_norm": 8.613391876220703, "learning_rate": 9.482236977205833e-05, "loss": 1.0276, "step": 4533 }, { "epoch": 0.3072023849854326, "grad_norm": 8.517586708068848, "learning_rate": 9.482100075296051e-05, "loss": 0.8115, "step": 4534 }, { "epoch": 0.3072701402534047, "grad_norm": 7.575396537780762, "learning_rate": 9.481963173386269e-05, "loss": 0.8254, "step": 4535 }, { "epoch": 0.3073378955213768, "grad_norm": 8.90748405456543, "learning_rate": 9.481826271476487e-05, "loss": 0.9864, "step": 4536 }, { "epoch": 0.3074056507893489, "grad_norm": 10.321487426757812, "learning_rate": 9.481689369566707e-05, "loss": 1.0247, "step": 4537 }, { "epoch": 0.30747340605732093, "grad_norm": 8.15263843536377, "learning_rate": 9.481552467656925e-05, "loss": 0.8475, "step": 4538 }, { "epoch": 0.307541161325293, "grad_norm": 7.198974609375, "learning_rate": 9.481415565747143e-05, "loss": 1.1028, "step": 4539 }, { "epoch": 0.3076089165932651, "grad_norm": 8.286428451538086, "learning_rate": 9.481278663837361e-05, "loss": 0.9451, "step": 4540 }, { "epoch": 0.3076766718612372, "grad_norm": 7.620658874511719, "learning_rate": 9.481141761927579e-05, "loss": 0.8243, "step": 4541 }, { "epoch": 0.3077444271292093, "grad_norm": 7.8145341873168945, "learning_rate": 9.481004860017798e-05, "loss": 0.7741, "step": 4542 }, { "epoch": 0.3078121823971814, "grad_norm": 7.216073989868164, "learning_rate": 9.480867958108016e-05, "loss": 0.7973, "step": 4543 }, { "epoch": 0.3078799376651535, "grad_norm": 7.704121112823486, "learning_rate": 9.480731056198234e-05, "loss": 0.9388, "step": 4544 }, { "epoch": 0.30794769293312557, "grad_norm": 7.872618675231934, "learning_rate": 9.480594154288452e-05, "loss": 0.8628, "step": 4545 }, { "epoch": 0.3080154482010976, "grad_norm": 5.696948528289795, "learning_rate": 9.480457252378672e-05, "loss": 0.8319, "step": 4546 }, { "epoch": 0.3080832034690697, "grad_norm": 7.372182846069336, "learning_rate": 9.48032035046889e-05, "loss": 0.9198, "step": 4547 }, { "epoch": 0.3081509587370418, "grad_norm": 6.9523420333862305, "learning_rate": 9.480183448559108e-05, "loss": 1.0383, "step": 4548 }, { "epoch": 0.3082187140050139, "grad_norm": 7.862281799316406, "learning_rate": 9.480046546649326e-05, "loss": 0.9764, "step": 4549 }, { "epoch": 0.308286469272986, "grad_norm": 6.563671588897705, "learning_rate": 9.479909644739544e-05, "loss": 0.9587, "step": 4550 }, { "epoch": 0.30835422454095807, "grad_norm": 6.150197505950928, "learning_rate": 9.479772742829763e-05, "loss": 0.8265, "step": 4551 }, { "epoch": 0.30842197980893016, "grad_norm": 5.330137729644775, "learning_rate": 9.479635840919981e-05, "loss": 0.697, "step": 4552 }, { "epoch": 0.30848973507690225, "grad_norm": 7.886617183685303, "learning_rate": 9.479498939010199e-05, "loss": 0.8645, "step": 4553 }, { "epoch": 0.30855749034487434, "grad_norm": 8.9354248046875, "learning_rate": 9.479362037100417e-05, "loss": 0.9669, "step": 4554 }, { "epoch": 0.3086252456128464, "grad_norm": 6.7398457527160645, "learning_rate": 9.479225135190635e-05, "loss": 0.6935, "step": 4555 }, { "epoch": 0.30869300088081847, "grad_norm": 6.668737888336182, "learning_rate": 9.479088233280855e-05, "loss": 1.0038, "step": 4556 }, { "epoch": 0.30876075614879056, "grad_norm": 9.82985782623291, "learning_rate": 9.478951331371073e-05, "loss": 1.1031, "step": 4557 }, { "epoch": 0.30882851141676265, "grad_norm": 7.345351696014404, "learning_rate": 9.478814429461291e-05, "loss": 1.032, "step": 4558 }, { "epoch": 0.30889626668473474, "grad_norm": 8.863839149475098, "learning_rate": 9.478677527551509e-05, "loss": 1.0042, "step": 4559 }, { "epoch": 0.30896402195270684, "grad_norm": 7.280172824859619, "learning_rate": 9.478540625641728e-05, "loss": 0.977, "step": 4560 }, { "epoch": 0.3090317772206789, "grad_norm": 7.121008396148682, "learning_rate": 9.478403723731946e-05, "loss": 0.7969, "step": 4561 }, { "epoch": 0.309099532488651, "grad_norm": 8.222799301147461, "learning_rate": 9.478266821822164e-05, "loss": 1.0655, "step": 4562 }, { "epoch": 0.30916728775662305, "grad_norm": 7.74297571182251, "learning_rate": 9.478129919912384e-05, "loss": 0.7976, "step": 4563 }, { "epoch": 0.30923504302459515, "grad_norm": 8.211404800415039, "learning_rate": 9.477993018002602e-05, "loss": 0.9004, "step": 4564 }, { "epoch": 0.30930279829256724, "grad_norm": 6.897336006164551, "learning_rate": 9.47785611609282e-05, "loss": 0.9347, "step": 4565 }, { "epoch": 0.30937055356053933, "grad_norm": 6.508474826812744, "learning_rate": 9.477719214183039e-05, "loss": 0.862, "step": 4566 }, { "epoch": 0.3094383088285114, "grad_norm": 6.972619533538818, "learning_rate": 9.477582312273257e-05, "loss": 0.8142, "step": 4567 }, { "epoch": 0.3095060640964835, "grad_norm": 8.018939971923828, "learning_rate": 9.477445410363475e-05, "loss": 1.1858, "step": 4568 }, { "epoch": 0.3095738193644556, "grad_norm": 7.123754024505615, "learning_rate": 9.477308508453695e-05, "loss": 0.9347, "step": 4569 }, { "epoch": 0.3096415746324277, "grad_norm": 7.701295375823975, "learning_rate": 9.477171606543913e-05, "loss": 1.0511, "step": 4570 }, { "epoch": 0.30970932990039973, "grad_norm": 7.858259677886963, "learning_rate": 9.47703470463413e-05, "loss": 0.8332, "step": 4571 }, { "epoch": 0.3097770851683718, "grad_norm": 9.541449546813965, "learning_rate": 9.476897802724349e-05, "loss": 1.3054, "step": 4572 }, { "epoch": 0.3098448404363439, "grad_norm": 7.877323150634766, "learning_rate": 9.476760900814567e-05, "loss": 0.9087, "step": 4573 }, { "epoch": 0.309912595704316, "grad_norm": 6.667144775390625, "learning_rate": 9.476623998904786e-05, "loss": 0.9066, "step": 4574 }, { "epoch": 0.3099803509722881, "grad_norm": 6.78439998626709, "learning_rate": 9.476487096995004e-05, "loss": 0.9524, "step": 4575 }, { "epoch": 0.3100481062402602, "grad_norm": 6.174951076507568, "learning_rate": 9.476350195085222e-05, "loss": 0.9853, "step": 4576 }, { "epoch": 0.3101158615082323, "grad_norm": 7.872679233551025, "learning_rate": 9.47621329317544e-05, "loss": 1.2686, "step": 4577 }, { "epoch": 0.3101836167762044, "grad_norm": 6.736785411834717, "learning_rate": 9.47607639126566e-05, "loss": 0.8149, "step": 4578 }, { "epoch": 0.3102513720441764, "grad_norm": 7.353178977966309, "learning_rate": 9.475939489355878e-05, "loss": 0.7092, "step": 4579 }, { "epoch": 0.3103191273121485, "grad_norm": 8.10339069366455, "learning_rate": 9.475802587446096e-05, "loss": 1.1926, "step": 4580 }, { "epoch": 0.3103868825801206, "grad_norm": 7.225834369659424, "learning_rate": 9.475665685536314e-05, "loss": 1.1534, "step": 4581 }, { "epoch": 0.3104546378480927, "grad_norm": 7.356152057647705, "learning_rate": 9.475528783626532e-05, "loss": 1.1098, "step": 4582 }, { "epoch": 0.3105223931160648, "grad_norm": 10.347126960754395, "learning_rate": 9.475391881716751e-05, "loss": 1.136, "step": 4583 }, { "epoch": 0.31059014838403687, "grad_norm": 7.562332630157471, "learning_rate": 9.475254979806969e-05, "loss": 0.9211, "step": 4584 }, { "epoch": 0.31065790365200896, "grad_norm": 6.505655288696289, "learning_rate": 9.475118077897187e-05, "loss": 0.9538, "step": 4585 }, { "epoch": 0.31072565891998105, "grad_norm": 6.551809787750244, "learning_rate": 9.474981175987405e-05, "loss": 1.1335, "step": 4586 }, { "epoch": 0.3107934141879531, "grad_norm": 7.4724321365356445, "learning_rate": 9.474844274077623e-05, "loss": 1.0441, "step": 4587 }, { "epoch": 0.3108611694559252, "grad_norm": 8.463167190551758, "learning_rate": 9.474707372167843e-05, "loss": 1.0979, "step": 4588 }, { "epoch": 0.31092892472389727, "grad_norm": 6.868551731109619, "learning_rate": 9.47457047025806e-05, "loss": 0.6893, "step": 4589 }, { "epoch": 0.31099667999186936, "grad_norm": 5.831715106964111, "learning_rate": 9.474433568348279e-05, "loss": 0.9197, "step": 4590 }, { "epoch": 0.31106443525984145, "grad_norm": 7.562331676483154, "learning_rate": 9.474296666438497e-05, "loss": 0.8768, "step": 4591 }, { "epoch": 0.31113219052781355, "grad_norm": 7.750473499298096, "learning_rate": 9.474159764528716e-05, "loss": 0.8046, "step": 4592 }, { "epoch": 0.31119994579578564, "grad_norm": 7.836174964904785, "learning_rate": 9.474022862618934e-05, "loss": 0.9346, "step": 4593 }, { "epoch": 0.31126770106375773, "grad_norm": 6.159252166748047, "learning_rate": 9.473885960709152e-05, "loss": 0.8661, "step": 4594 }, { "epoch": 0.31133545633172977, "grad_norm": 7.959331512451172, "learning_rate": 9.47374905879937e-05, "loss": 0.919, "step": 4595 }, { "epoch": 0.31140321159970186, "grad_norm": 8.349802017211914, "learning_rate": 9.473612156889588e-05, "loss": 0.9762, "step": 4596 }, { "epoch": 0.31147096686767395, "grad_norm": 6.385254859924316, "learning_rate": 9.473475254979808e-05, "loss": 1.0168, "step": 4597 }, { "epoch": 0.31153872213564604, "grad_norm": 6.848907947540283, "learning_rate": 9.473338353070026e-05, "loss": 0.8235, "step": 4598 }, { "epoch": 0.31160647740361813, "grad_norm": 8.07978630065918, "learning_rate": 9.473201451160244e-05, "loss": 0.8394, "step": 4599 }, { "epoch": 0.3116742326715902, "grad_norm": 8.702666282653809, "learning_rate": 9.473064549250462e-05, "loss": 1.1859, "step": 4600 }, { "epoch": 0.3117419879395623, "grad_norm": 7.322251796722412, "learning_rate": 9.472927647340681e-05, "loss": 0.7089, "step": 4601 }, { "epoch": 0.3118097432075344, "grad_norm": 7.717091083526611, "learning_rate": 9.472790745430899e-05, "loss": 0.8609, "step": 4602 }, { "epoch": 0.3118774984755065, "grad_norm": 7.298448085784912, "learning_rate": 9.472653843521117e-05, "loss": 1.0044, "step": 4603 }, { "epoch": 0.31194525374347853, "grad_norm": 8.221037864685059, "learning_rate": 9.472516941611335e-05, "loss": 1.0403, "step": 4604 }, { "epoch": 0.3120130090114506, "grad_norm": 8.83371639251709, "learning_rate": 9.472380039701553e-05, "loss": 1.2884, "step": 4605 }, { "epoch": 0.3120807642794227, "grad_norm": 6.774711608886719, "learning_rate": 9.472243137791773e-05, "loss": 0.8077, "step": 4606 }, { "epoch": 0.3121485195473948, "grad_norm": 8.470376968383789, "learning_rate": 9.472106235881991e-05, "loss": 0.9549, "step": 4607 }, { "epoch": 0.3122162748153669, "grad_norm": 6.509616374969482, "learning_rate": 9.471969333972209e-05, "loss": 0.9595, "step": 4608 }, { "epoch": 0.312284030083339, "grad_norm": 6.885564804077148, "learning_rate": 9.471832432062427e-05, "loss": 0.9934, "step": 4609 }, { "epoch": 0.3123517853513111, "grad_norm": 12.500927925109863, "learning_rate": 9.471695530152646e-05, "loss": 0.9493, "step": 4610 }, { "epoch": 0.3124195406192832, "grad_norm": 8.701812744140625, "learning_rate": 9.471558628242864e-05, "loss": 0.9975, "step": 4611 }, { "epoch": 0.3124872958872552, "grad_norm": 8.313292503356934, "learning_rate": 9.471421726333082e-05, "loss": 1.0069, "step": 4612 }, { "epoch": 0.3125550511552273, "grad_norm": 6.841222763061523, "learning_rate": 9.471284824423302e-05, "loss": 0.8078, "step": 4613 }, { "epoch": 0.3126228064231994, "grad_norm": 9.038453102111816, "learning_rate": 9.47114792251352e-05, "loss": 1.0445, "step": 4614 }, { "epoch": 0.3126905616911715, "grad_norm": 7.772367000579834, "learning_rate": 9.471011020603738e-05, "loss": 1.0336, "step": 4615 }, { "epoch": 0.3127583169591436, "grad_norm": 6.843810558319092, "learning_rate": 9.470874118693957e-05, "loss": 0.8171, "step": 4616 }, { "epoch": 0.31282607222711567, "grad_norm": 7.012472629547119, "learning_rate": 9.470737216784175e-05, "loss": 0.8906, "step": 4617 }, { "epoch": 0.31289382749508776, "grad_norm": 7.138260364532471, "learning_rate": 9.470600314874393e-05, "loss": 0.9742, "step": 4618 }, { "epoch": 0.31296158276305985, "grad_norm": 7.92933988571167, "learning_rate": 9.470463412964611e-05, "loss": 0.8097, "step": 4619 }, { "epoch": 0.3130293380310319, "grad_norm": 8.222073554992676, "learning_rate": 9.47032651105483e-05, "loss": 0.9354, "step": 4620 }, { "epoch": 0.313097093299004, "grad_norm": 7.705626010894775, "learning_rate": 9.470189609145049e-05, "loss": 0.7776, "step": 4621 }, { "epoch": 0.3131648485669761, "grad_norm": 7.6136345863342285, "learning_rate": 9.470052707235267e-05, "loss": 1.0204, "step": 4622 }, { "epoch": 0.31323260383494816, "grad_norm": 6.365856647491455, "learning_rate": 9.469915805325485e-05, "loss": 0.8812, "step": 4623 }, { "epoch": 0.31330035910292026, "grad_norm": 6.876771926879883, "learning_rate": 9.469778903415704e-05, "loss": 0.7518, "step": 4624 }, { "epoch": 0.31336811437089235, "grad_norm": 6.329056262969971, "learning_rate": 9.469642001505922e-05, "loss": 0.8902, "step": 4625 }, { "epoch": 0.31343586963886444, "grad_norm": 6.1461310386657715, "learning_rate": 9.46950509959614e-05, "loss": 1.0617, "step": 4626 }, { "epoch": 0.31350362490683653, "grad_norm": 7.062450408935547, "learning_rate": 9.469368197686358e-05, "loss": 0.9081, "step": 4627 }, { "epoch": 0.31357138017480857, "grad_norm": 7.412983417510986, "learning_rate": 9.469231295776576e-05, "loss": 1.0845, "step": 4628 }, { "epoch": 0.31363913544278066, "grad_norm": 7.904543876647949, "learning_rate": 9.469094393866796e-05, "loss": 0.803, "step": 4629 }, { "epoch": 0.31370689071075275, "grad_norm": 6.614920616149902, "learning_rate": 9.468957491957014e-05, "loss": 0.8369, "step": 4630 }, { "epoch": 0.31377464597872484, "grad_norm": 8.245738983154297, "learning_rate": 9.468820590047232e-05, "loss": 0.9232, "step": 4631 }, { "epoch": 0.31384240124669693, "grad_norm": 7.923001766204834, "learning_rate": 9.46868368813745e-05, "loss": 0.822, "step": 4632 }, { "epoch": 0.313910156514669, "grad_norm": 8.80931282043457, "learning_rate": 9.468546786227668e-05, "loss": 0.9253, "step": 4633 }, { "epoch": 0.3139779117826411, "grad_norm": 6.997625350952148, "learning_rate": 9.468409884317887e-05, "loss": 0.8564, "step": 4634 }, { "epoch": 0.3140456670506132, "grad_norm": 7.699014186859131, "learning_rate": 9.468272982408105e-05, "loss": 1.0097, "step": 4635 }, { "epoch": 0.31411342231858524, "grad_norm": 7.379316329956055, "learning_rate": 9.468136080498323e-05, "loss": 0.8455, "step": 4636 }, { "epoch": 0.31418117758655734, "grad_norm": 7.040482521057129, "learning_rate": 9.467999178588541e-05, "loss": 0.9065, "step": 4637 }, { "epoch": 0.31424893285452943, "grad_norm": 8.588302612304688, "learning_rate": 9.46786227667876e-05, "loss": 0.8063, "step": 4638 }, { "epoch": 0.3143166881225015, "grad_norm": 8.310260772705078, "learning_rate": 9.467725374768979e-05, "loss": 0.9012, "step": 4639 }, { "epoch": 0.3143844433904736, "grad_norm": 5.794011116027832, "learning_rate": 9.467588472859197e-05, "loss": 0.7026, "step": 4640 }, { "epoch": 0.3144521986584457, "grad_norm": 8.277170181274414, "learning_rate": 9.467451570949415e-05, "loss": 0.8472, "step": 4641 }, { "epoch": 0.3145199539264178, "grad_norm": 8.856977462768555, "learning_rate": 9.467314669039633e-05, "loss": 1.1543, "step": 4642 }, { "epoch": 0.3145877091943899, "grad_norm": 7.343659400939941, "learning_rate": 9.467177767129852e-05, "loss": 0.8366, "step": 4643 }, { "epoch": 0.3146554644623619, "grad_norm": 6.619773864746094, "learning_rate": 9.46704086522007e-05, "loss": 0.9176, "step": 4644 }, { "epoch": 0.314723219730334, "grad_norm": 8.375277519226074, "learning_rate": 9.466903963310288e-05, "loss": 1.0437, "step": 4645 }, { "epoch": 0.3147909749983061, "grad_norm": 6.792671203613281, "learning_rate": 9.466767061400506e-05, "loss": 0.7271, "step": 4646 }, { "epoch": 0.3148587302662782, "grad_norm": 7.419206619262695, "learning_rate": 9.466630159490726e-05, "loss": 0.9376, "step": 4647 }, { "epoch": 0.3149264855342503, "grad_norm": 6.413398742675781, "learning_rate": 9.466493257580944e-05, "loss": 0.8988, "step": 4648 }, { "epoch": 0.3149942408022224, "grad_norm": 7.8477373123168945, "learning_rate": 9.466356355671162e-05, "loss": 1.0791, "step": 4649 }, { "epoch": 0.31506199607019447, "grad_norm": 7.070709705352783, "learning_rate": 9.46621945376138e-05, "loss": 0.7796, "step": 4650 }, { "epoch": 0.31512975133816656, "grad_norm": 7.023820400238037, "learning_rate": 9.466082551851598e-05, "loss": 0.928, "step": 4651 }, { "epoch": 0.3151975066061386, "grad_norm": 8.347658157348633, "learning_rate": 9.465945649941817e-05, "loss": 0.8768, "step": 4652 }, { "epoch": 0.3152652618741107, "grad_norm": 7.446930408477783, "learning_rate": 9.465808748032035e-05, "loss": 0.7215, "step": 4653 }, { "epoch": 0.3153330171420828, "grad_norm": 9.261749267578125, "learning_rate": 9.465671846122253e-05, "loss": 1.0612, "step": 4654 }, { "epoch": 0.3154007724100549, "grad_norm": 6.765881061553955, "learning_rate": 9.465534944212471e-05, "loss": 0.9636, "step": 4655 }, { "epoch": 0.31546852767802697, "grad_norm": 7.752283096313477, "learning_rate": 9.46539804230269e-05, "loss": 1.0348, "step": 4656 }, { "epoch": 0.31553628294599906, "grad_norm": 8.769716262817383, "learning_rate": 9.465261140392909e-05, "loss": 0.9147, "step": 4657 }, { "epoch": 0.31560403821397115, "grad_norm": 9.27787971496582, "learning_rate": 9.465124238483127e-05, "loss": 1.0287, "step": 4658 }, { "epoch": 0.31567179348194324, "grad_norm": 5.802475452423096, "learning_rate": 9.464987336573346e-05, "loss": 0.7968, "step": 4659 }, { "epoch": 0.31573954874991533, "grad_norm": 7.436465263366699, "learning_rate": 9.464850434663564e-05, "loss": 1.1087, "step": 4660 }, { "epoch": 0.31580730401788737, "grad_norm": 8.243688583374023, "learning_rate": 9.464713532753782e-05, "loss": 1.0257, "step": 4661 }, { "epoch": 0.31587505928585946, "grad_norm": 7.885406017303467, "learning_rate": 9.464576630844002e-05, "loss": 0.9944, "step": 4662 }, { "epoch": 0.31594281455383155, "grad_norm": 8.342294692993164, "learning_rate": 9.46443972893422e-05, "loss": 0.8351, "step": 4663 }, { "epoch": 0.31601056982180364, "grad_norm": 7.476551055908203, "learning_rate": 9.464302827024438e-05, "loss": 0.8484, "step": 4664 }, { "epoch": 0.31607832508977574, "grad_norm": 7.239434719085693, "learning_rate": 9.464165925114656e-05, "loss": 0.749, "step": 4665 }, { "epoch": 0.3161460803577478, "grad_norm": 8.02225399017334, "learning_rate": 9.464029023204875e-05, "loss": 1.0679, "step": 4666 }, { "epoch": 0.3162138356257199, "grad_norm": 5.708010673522949, "learning_rate": 9.463892121295093e-05, "loss": 0.6935, "step": 4667 }, { "epoch": 0.316281590893692, "grad_norm": 9.107060432434082, "learning_rate": 9.463755219385311e-05, "loss": 0.7951, "step": 4668 }, { "epoch": 0.31634934616166405, "grad_norm": 8.045939445495605, "learning_rate": 9.463618317475529e-05, "loss": 0.9943, "step": 4669 }, { "epoch": 0.31641710142963614, "grad_norm": 8.284626007080078, "learning_rate": 9.463481415565749e-05, "loss": 0.9638, "step": 4670 }, { "epoch": 0.31648485669760823, "grad_norm": 6.9800920486450195, "learning_rate": 9.463344513655967e-05, "loss": 0.9211, "step": 4671 }, { "epoch": 0.3165526119655803, "grad_norm": 7.476776599884033, "learning_rate": 9.463207611746185e-05, "loss": 0.8704, "step": 4672 }, { "epoch": 0.3166203672335524, "grad_norm": 6.303709506988525, "learning_rate": 9.463070709836403e-05, "loss": 0.9031, "step": 4673 }, { "epoch": 0.3166881225015245, "grad_norm": 8.267644882202148, "learning_rate": 9.46293380792662e-05, "loss": 1.0899, "step": 4674 }, { "epoch": 0.3167558777694966, "grad_norm": 8.25999641418457, "learning_rate": 9.46279690601684e-05, "loss": 0.9286, "step": 4675 }, { "epoch": 0.3168236330374687, "grad_norm": 8.577073097229004, "learning_rate": 9.462660004107058e-05, "loss": 0.8837, "step": 4676 }, { "epoch": 0.3168913883054407, "grad_norm": 7.183437824249268, "learning_rate": 9.462523102197276e-05, "loss": 0.9972, "step": 4677 }, { "epoch": 0.3169591435734128, "grad_norm": 7.93414831161499, "learning_rate": 9.462386200287494e-05, "loss": 0.9806, "step": 4678 }, { "epoch": 0.3170268988413849, "grad_norm": 8.037418365478516, "learning_rate": 9.462249298377714e-05, "loss": 0.8945, "step": 4679 }, { "epoch": 0.317094654109357, "grad_norm": 7.5245585441589355, "learning_rate": 9.462112396467932e-05, "loss": 0.9444, "step": 4680 }, { "epoch": 0.3171624093773291, "grad_norm": 7.428105354309082, "learning_rate": 9.46197549455815e-05, "loss": 0.9916, "step": 4681 }, { "epoch": 0.3172301646453012, "grad_norm": 7.977080345153809, "learning_rate": 9.461838592648368e-05, "loss": 1.2206, "step": 4682 }, { "epoch": 0.3172979199132733, "grad_norm": 6.285130023956299, "learning_rate": 9.461701690738586e-05, "loss": 0.8222, "step": 4683 }, { "epoch": 0.31736567518124537, "grad_norm": 6.9027018547058105, "learning_rate": 9.461564788828805e-05, "loss": 1.0265, "step": 4684 }, { "epoch": 0.3174334304492174, "grad_norm": 7.620064735412598, "learning_rate": 9.461427886919023e-05, "loss": 0.9672, "step": 4685 }, { "epoch": 0.3175011857171895, "grad_norm": 7.635505676269531, "learning_rate": 9.461290985009241e-05, "loss": 0.9329, "step": 4686 }, { "epoch": 0.3175689409851616, "grad_norm": 7.532449245452881, "learning_rate": 9.461154083099459e-05, "loss": 1.0509, "step": 4687 }, { "epoch": 0.3176366962531337, "grad_norm": 6.428747177124023, "learning_rate": 9.461017181189677e-05, "loss": 0.9151, "step": 4688 }, { "epoch": 0.31770445152110577, "grad_norm": 7.214437007904053, "learning_rate": 9.460880279279897e-05, "loss": 0.9758, "step": 4689 }, { "epoch": 0.31777220678907786, "grad_norm": 7.525144577026367, "learning_rate": 9.460743377370115e-05, "loss": 0.7723, "step": 4690 }, { "epoch": 0.31783996205704995, "grad_norm": 8.008615493774414, "learning_rate": 9.460606475460333e-05, "loss": 0.9945, "step": 4691 }, { "epoch": 0.31790771732502204, "grad_norm": 6.715225696563721, "learning_rate": 9.460469573550551e-05, "loss": 0.9805, "step": 4692 }, { "epoch": 0.3179754725929941, "grad_norm": 7.146914005279541, "learning_rate": 9.46033267164077e-05, "loss": 0.9583, "step": 4693 }, { "epoch": 0.31804322786096617, "grad_norm": 7.280580997467041, "learning_rate": 9.460195769730988e-05, "loss": 0.9772, "step": 4694 }, { "epoch": 0.31811098312893826, "grad_norm": 7.84500789642334, "learning_rate": 9.460058867821206e-05, "loss": 0.9755, "step": 4695 }, { "epoch": 0.31817873839691035, "grad_norm": 7.289769649505615, "learning_rate": 9.459921965911424e-05, "loss": 1.1441, "step": 4696 }, { "epoch": 0.31824649366488245, "grad_norm": 9.061880111694336, "learning_rate": 9.459785064001642e-05, "loss": 0.9699, "step": 4697 }, { "epoch": 0.31831424893285454, "grad_norm": 7.284442901611328, "learning_rate": 9.459648162091862e-05, "loss": 1.0023, "step": 4698 }, { "epoch": 0.31838200420082663, "grad_norm": 7.017299175262451, "learning_rate": 9.45951126018208e-05, "loss": 0.8598, "step": 4699 }, { "epoch": 0.3184497594687987, "grad_norm": 7.236936092376709, "learning_rate": 9.459374358272298e-05, "loss": 0.9888, "step": 4700 }, { "epoch": 0.31851751473677076, "grad_norm": 8.148765563964844, "learning_rate": 9.459237456362516e-05, "loss": 0.9978, "step": 4701 }, { "epoch": 0.31858527000474285, "grad_norm": 8.023640632629395, "learning_rate": 9.459100554452735e-05, "loss": 1.0941, "step": 4702 }, { "epoch": 0.31865302527271494, "grad_norm": 6.2704291343688965, "learning_rate": 9.458963652542953e-05, "loss": 0.8616, "step": 4703 }, { "epoch": 0.31872078054068703, "grad_norm": 7.873881816864014, "learning_rate": 9.458826750633171e-05, "loss": 0.9212, "step": 4704 }, { "epoch": 0.3187885358086591, "grad_norm": 7.088031768798828, "learning_rate": 9.45868984872339e-05, "loss": 0.6453, "step": 4705 }, { "epoch": 0.3188562910766312, "grad_norm": 7.319702625274658, "learning_rate": 9.458552946813609e-05, "loss": 0.8328, "step": 4706 }, { "epoch": 0.3189240463446033, "grad_norm": 6.979453086853027, "learning_rate": 9.458416044903827e-05, "loss": 0.7806, "step": 4707 }, { "epoch": 0.3189918016125754, "grad_norm": 7.868389129638672, "learning_rate": 9.458279142994046e-05, "loss": 0.8052, "step": 4708 }, { "epoch": 0.3190595568805475, "grad_norm": 7.2435431480407715, "learning_rate": 9.458142241084264e-05, "loss": 0.8922, "step": 4709 }, { "epoch": 0.3191273121485195, "grad_norm": 8.62594985961914, "learning_rate": 9.458005339174482e-05, "loss": 1.0572, "step": 4710 }, { "epoch": 0.3191950674164916, "grad_norm": 8.404034614562988, "learning_rate": 9.457868437264701e-05, "loss": 1.1592, "step": 4711 }, { "epoch": 0.3192628226844637, "grad_norm": 6.669247150421143, "learning_rate": 9.45773153535492e-05, "loss": 0.8727, "step": 4712 }, { "epoch": 0.3193305779524358, "grad_norm": 7.565460681915283, "learning_rate": 9.457594633445138e-05, "loss": 0.7803, "step": 4713 }, { "epoch": 0.3193983332204079, "grad_norm": 7.726403713226318, "learning_rate": 9.457457731535356e-05, "loss": 0.81, "step": 4714 }, { "epoch": 0.31946608848838, "grad_norm": 7.344959259033203, "learning_rate": 9.457320829625574e-05, "loss": 0.6959, "step": 4715 }, { "epoch": 0.3195338437563521, "grad_norm": 5.9748759269714355, "learning_rate": 9.457183927715793e-05, "loss": 0.8332, "step": 4716 }, { "epoch": 0.31960159902432417, "grad_norm": 7.241995334625244, "learning_rate": 9.457047025806011e-05, "loss": 0.8243, "step": 4717 }, { "epoch": 0.3196693542922962, "grad_norm": 7.812702655792236, "learning_rate": 9.456910123896229e-05, "loss": 0.8603, "step": 4718 }, { "epoch": 0.3197371095602683, "grad_norm": 9.492036819458008, "learning_rate": 9.456773221986447e-05, "loss": 1.0208, "step": 4719 }, { "epoch": 0.3198048648282404, "grad_norm": 8.922654151916504, "learning_rate": 9.456636320076665e-05, "loss": 1.1939, "step": 4720 }, { "epoch": 0.3198726200962125, "grad_norm": 6.459314346313477, "learning_rate": 9.456499418166885e-05, "loss": 1.0332, "step": 4721 }, { "epoch": 0.31994037536418457, "grad_norm": 7.104556560516357, "learning_rate": 9.456362516257103e-05, "loss": 1.0608, "step": 4722 }, { "epoch": 0.32000813063215666, "grad_norm": 7.620473861694336, "learning_rate": 9.45622561434732e-05, "loss": 0.888, "step": 4723 }, { "epoch": 0.32007588590012875, "grad_norm": 7.757092475891113, "learning_rate": 9.456088712437539e-05, "loss": 1.0355, "step": 4724 }, { "epoch": 0.32014364116810085, "grad_norm": 7.084576606750488, "learning_rate": 9.455951810527758e-05, "loss": 0.7148, "step": 4725 }, { "epoch": 0.3202113964360729, "grad_norm": 6.5413079261779785, "learning_rate": 9.455814908617976e-05, "loss": 0.8678, "step": 4726 }, { "epoch": 0.320279151704045, "grad_norm": 8.085969924926758, "learning_rate": 9.455678006708194e-05, "loss": 1.0137, "step": 4727 }, { "epoch": 0.32034690697201706, "grad_norm": 6.338340759277344, "learning_rate": 9.455541104798412e-05, "loss": 0.9573, "step": 4728 }, { "epoch": 0.32041466223998916, "grad_norm": 8.724467277526855, "learning_rate": 9.45540420288863e-05, "loss": 1.0585, "step": 4729 }, { "epoch": 0.32048241750796125, "grad_norm": 5.904287815093994, "learning_rate": 9.45526730097885e-05, "loss": 0.6882, "step": 4730 }, { "epoch": 0.32055017277593334, "grad_norm": 6.975876331329346, "learning_rate": 9.455130399069068e-05, "loss": 0.8641, "step": 4731 }, { "epoch": 0.32061792804390543, "grad_norm": 7.307252407073975, "learning_rate": 9.454993497159286e-05, "loss": 0.8961, "step": 4732 }, { "epoch": 0.3206856833118775, "grad_norm": 6.06977653503418, "learning_rate": 9.454856595249504e-05, "loss": 0.8388, "step": 4733 }, { "epoch": 0.32075343857984956, "grad_norm": 5.903672218322754, "learning_rate": 9.454719693339723e-05, "loss": 0.6962, "step": 4734 }, { "epoch": 0.32082119384782165, "grad_norm": 9.061569213867188, "learning_rate": 9.454582791429941e-05, "loss": 1.0246, "step": 4735 }, { "epoch": 0.32088894911579374, "grad_norm": 9.154926300048828, "learning_rate": 9.454445889520159e-05, "loss": 0.9336, "step": 4736 }, { "epoch": 0.32095670438376583, "grad_norm": 6.51992654800415, "learning_rate": 9.454308987610377e-05, "loss": 0.8753, "step": 4737 }, { "epoch": 0.3210244596517379, "grad_norm": 7.160184383392334, "learning_rate": 9.454172085700595e-05, "loss": 1.0036, "step": 4738 }, { "epoch": 0.32109221491971, "grad_norm": 6.895291805267334, "learning_rate": 9.454035183790815e-05, "loss": 0.8359, "step": 4739 }, { "epoch": 0.3211599701876821, "grad_norm": 7.363986015319824, "learning_rate": 9.453898281881033e-05, "loss": 1.0207, "step": 4740 }, { "epoch": 0.3212277254556542, "grad_norm": 9.040234565734863, "learning_rate": 9.45376137997125e-05, "loss": 1.0123, "step": 4741 }, { "epoch": 0.32129548072362624, "grad_norm": 7.679563045501709, "learning_rate": 9.453624478061469e-05, "loss": 1.1201, "step": 4742 }, { "epoch": 0.32136323599159833, "grad_norm": 7.640948295593262, "learning_rate": 9.453487576151687e-05, "loss": 1.1834, "step": 4743 }, { "epoch": 0.3214309912595704, "grad_norm": 6.348153591156006, "learning_rate": 9.453350674241906e-05, "loss": 0.735, "step": 4744 }, { "epoch": 0.3214987465275425, "grad_norm": 6.899050712585449, "learning_rate": 9.453213772332124e-05, "loss": 0.9054, "step": 4745 }, { "epoch": 0.3215665017955146, "grad_norm": 8.880489349365234, "learning_rate": 9.453076870422342e-05, "loss": 1.0253, "step": 4746 }, { "epoch": 0.3216342570634867, "grad_norm": 7.2754597663879395, "learning_rate": 9.45293996851256e-05, "loss": 0.75, "step": 4747 }, { "epoch": 0.3217020123314588, "grad_norm": 7.26841402053833, "learning_rate": 9.45280306660278e-05, "loss": 1.2198, "step": 4748 }, { "epoch": 0.3217697675994309, "grad_norm": 6.0724310874938965, "learning_rate": 9.452666164692998e-05, "loss": 0.7444, "step": 4749 }, { "epoch": 0.3218375228674029, "grad_norm": 7.82569694519043, "learning_rate": 9.452529262783216e-05, "loss": 0.9266, "step": 4750 }, { "epoch": 0.321905278135375, "grad_norm": 7.153421878814697, "learning_rate": 9.452392360873435e-05, "loss": 1.2223, "step": 4751 }, { "epoch": 0.3219730334033471, "grad_norm": 6.586106777191162, "learning_rate": 9.452255458963653e-05, "loss": 0.9157, "step": 4752 }, { "epoch": 0.3220407886713192, "grad_norm": 7.776567459106445, "learning_rate": 9.452118557053871e-05, "loss": 0.7396, "step": 4753 }, { "epoch": 0.3221085439392913, "grad_norm": 7.4870381355285645, "learning_rate": 9.45198165514409e-05, "loss": 1.0812, "step": 4754 }, { "epoch": 0.3221762992072634, "grad_norm": 8.112491607666016, "learning_rate": 9.451844753234309e-05, "loss": 1.2816, "step": 4755 }, { "epoch": 0.32224405447523546, "grad_norm": 8.562600135803223, "learning_rate": 9.451707851324527e-05, "loss": 0.9872, "step": 4756 }, { "epoch": 0.32231180974320756, "grad_norm": 9.139601707458496, "learning_rate": 9.451570949414746e-05, "loss": 1.019, "step": 4757 }, { "epoch": 0.3223795650111796, "grad_norm": 7.6649370193481445, "learning_rate": 9.451434047504964e-05, "loss": 1.1594, "step": 4758 }, { "epoch": 0.3224473202791517, "grad_norm": 8.193527221679688, "learning_rate": 9.451297145595182e-05, "loss": 0.8804, "step": 4759 }, { "epoch": 0.3225150755471238, "grad_norm": 5.558340072631836, "learning_rate": 9.4511602436854e-05, "loss": 0.8778, "step": 4760 }, { "epoch": 0.32258283081509587, "grad_norm": 7.296480178833008, "learning_rate": 9.451023341775618e-05, "loss": 1.0039, "step": 4761 }, { "epoch": 0.32265058608306796, "grad_norm": 8.916117668151855, "learning_rate": 9.450886439865837e-05, "loss": 0.9516, "step": 4762 }, { "epoch": 0.32271834135104005, "grad_norm": 7.691675662994385, "learning_rate": 9.450749537956056e-05, "loss": 1.1872, "step": 4763 }, { "epoch": 0.32278609661901214, "grad_norm": 6.425968647003174, "learning_rate": 9.450612636046274e-05, "loss": 0.7914, "step": 4764 }, { "epoch": 0.32285385188698423, "grad_norm": 6.476465225219727, "learning_rate": 9.450475734136492e-05, "loss": 0.9449, "step": 4765 }, { "epoch": 0.3229216071549563, "grad_norm": 7.470714569091797, "learning_rate": 9.450338832226711e-05, "loss": 0.8132, "step": 4766 }, { "epoch": 0.32298936242292836, "grad_norm": 6.947244644165039, "learning_rate": 9.450201930316929e-05, "loss": 1.0203, "step": 4767 }, { "epoch": 0.32305711769090045, "grad_norm": 6.939558982849121, "learning_rate": 9.450065028407147e-05, "loss": 0.7484, "step": 4768 }, { "epoch": 0.32312487295887254, "grad_norm": 6.562455177307129, "learning_rate": 9.449928126497365e-05, "loss": 0.9659, "step": 4769 }, { "epoch": 0.32319262822684464, "grad_norm": 7.884627819061279, "learning_rate": 9.449791224587583e-05, "loss": 0.9394, "step": 4770 }, { "epoch": 0.3232603834948167, "grad_norm": 8.175631523132324, "learning_rate": 9.449654322677802e-05, "loss": 1.3089, "step": 4771 }, { "epoch": 0.3233281387627888, "grad_norm": 7.907345294952393, "learning_rate": 9.44951742076802e-05, "loss": 0.7641, "step": 4772 }, { "epoch": 0.3233958940307609, "grad_norm": 8.59745979309082, "learning_rate": 9.449380518858239e-05, "loss": 1.1506, "step": 4773 }, { "epoch": 0.323463649298733, "grad_norm": 8.216800689697266, "learning_rate": 9.449243616948457e-05, "loss": 1.1007, "step": 4774 }, { "epoch": 0.32353140456670504, "grad_norm": 7.630847930908203, "learning_rate": 9.449106715038675e-05, "loss": 0.6435, "step": 4775 }, { "epoch": 0.32359915983467713, "grad_norm": 8.40709400177002, "learning_rate": 9.448969813128894e-05, "loss": 1.0299, "step": 4776 }, { "epoch": 0.3236669151026492, "grad_norm": 7.3637166023254395, "learning_rate": 9.448832911219112e-05, "loss": 1.2733, "step": 4777 }, { "epoch": 0.3237346703706213, "grad_norm": 7.707301139831543, "learning_rate": 9.44869600930933e-05, "loss": 0.91, "step": 4778 }, { "epoch": 0.3238024256385934, "grad_norm": 6.911712169647217, "learning_rate": 9.448559107399548e-05, "loss": 1.0425, "step": 4779 }, { "epoch": 0.3238701809065655, "grad_norm": 7.387215614318848, "learning_rate": 9.448422205489768e-05, "loss": 1.1508, "step": 4780 }, { "epoch": 0.3239379361745376, "grad_norm": 6.074267864227295, "learning_rate": 9.448285303579986e-05, "loss": 0.7861, "step": 4781 }, { "epoch": 0.3240056914425097, "grad_norm": 7.4029436111450195, "learning_rate": 9.448148401670204e-05, "loss": 1.1438, "step": 4782 }, { "epoch": 0.3240734467104817, "grad_norm": 8.514384269714355, "learning_rate": 9.448011499760422e-05, "loss": 1.1075, "step": 4783 }, { "epoch": 0.3241412019784538, "grad_norm": 8.503793716430664, "learning_rate": 9.44787459785064e-05, "loss": 0.9377, "step": 4784 }, { "epoch": 0.3242089572464259, "grad_norm": 6.770750522613525, "learning_rate": 9.447737695940859e-05, "loss": 0.8342, "step": 4785 }, { "epoch": 0.324276712514398, "grad_norm": 7.019729137420654, "learning_rate": 9.447600794031077e-05, "loss": 0.8236, "step": 4786 }, { "epoch": 0.3243444677823701, "grad_norm": 6.147645950317383, "learning_rate": 9.447463892121295e-05, "loss": 0.9493, "step": 4787 }, { "epoch": 0.3244122230503422, "grad_norm": 9.160635948181152, "learning_rate": 9.447326990211513e-05, "loss": 1.1632, "step": 4788 }, { "epoch": 0.32447997831831427, "grad_norm": 7.918509483337402, "learning_rate": 9.447190088301733e-05, "loss": 1.0154, "step": 4789 }, { "epoch": 0.32454773358628636, "grad_norm": 8.457235336303711, "learning_rate": 9.44705318639195e-05, "loss": 1.0373, "step": 4790 }, { "epoch": 0.3246154888542584, "grad_norm": 7.332309722900391, "learning_rate": 9.446916284482169e-05, "loss": 1.09, "step": 4791 }, { "epoch": 0.3246832441222305, "grad_norm": 7.7085700035095215, "learning_rate": 9.446779382572387e-05, "loss": 0.8492, "step": 4792 }, { "epoch": 0.3247509993902026, "grad_norm": 6.121610641479492, "learning_rate": 9.446642480662605e-05, "loss": 1.0399, "step": 4793 }, { "epoch": 0.32481875465817467, "grad_norm": 6.767165184020996, "learning_rate": 9.446505578752824e-05, "loss": 0.6908, "step": 4794 }, { "epoch": 0.32488650992614676, "grad_norm": 7.629088401794434, "learning_rate": 9.446368676843042e-05, "loss": 0.9356, "step": 4795 }, { "epoch": 0.32495426519411885, "grad_norm": 7.590803146362305, "learning_rate": 9.44623177493326e-05, "loss": 0.7671, "step": 4796 }, { "epoch": 0.32502202046209094, "grad_norm": 7.876105785369873, "learning_rate": 9.44609487302348e-05, "loss": 0.9654, "step": 4797 }, { "epoch": 0.32508977573006304, "grad_norm": 8.503900527954102, "learning_rate": 9.445957971113698e-05, "loss": 0.9726, "step": 4798 }, { "epoch": 0.32515753099803507, "grad_norm": 9.96375846862793, "learning_rate": 9.445821069203916e-05, "loss": 0.7587, "step": 4799 }, { "epoch": 0.32522528626600716, "grad_norm": 6.24782133102417, "learning_rate": 9.445684167294135e-05, "loss": 0.8028, "step": 4800 }, { "epoch": 0.32529304153397925, "grad_norm": 7.5481181144714355, "learning_rate": 9.445547265384353e-05, "loss": 0.8194, "step": 4801 }, { "epoch": 0.32536079680195135, "grad_norm": 8.148533821105957, "learning_rate": 9.445410363474571e-05, "loss": 1.0009, "step": 4802 }, { "epoch": 0.32542855206992344, "grad_norm": 7.8531341552734375, "learning_rate": 9.44527346156479e-05, "loss": 1.0629, "step": 4803 }, { "epoch": 0.32549630733789553, "grad_norm": 7.7668843269348145, "learning_rate": 9.445136559655008e-05, "loss": 1.1211, "step": 4804 }, { "epoch": 0.3255640626058676, "grad_norm": 9.245609283447266, "learning_rate": 9.444999657745226e-05, "loss": 1.0407, "step": 4805 }, { "epoch": 0.3256318178738397, "grad_norm": 7.387469291687012, "learning_rate": 9.444862755835445e-05, "loss": 1.128, "step": 4806 }, { "epoch": 0.32569957314181175, "grad_norm": 6.636757850646973, "learning_rate": 9.444725853925663e-05, "loss": 0.8386, "step": 4807 }, { "epoch": 0.32576732840978384, "grad_norm": 8.264979362487793, "learning_rate": 9.444588952015882e-05, "loss": 0.904, "step": 4808 }, { "epoch": 0.32583508367775593, "grad_norm": 7.77110481262207, "learning_rate": 9.4444520501061e-05, "loss": 1.0292, "step": 4809 }, { "epoch": 0.325902838945728, "grad_norm": 6.825851917266846, "learning_rate": 9.444315148196318e-05, "loss": 0.7484, "step": 4810 }, { "epoch": 0.3259705942137001, "grad_norm": 6.475069999694824, "learning_rate": 9.444178246286536e-05, "loss": 0.9085, "step": 4811 }, { "epoch": 0.3260383494816722, "grad_norm": 8.736098289489746, "learning_rate": 9.444041344376755e-05, "loss": 1.1817, "step": 4812 }, { "epoch": 0.3261061047496443, "grad_norm": 7.0992608070373535, "learning_rate": 9.443904442466973e-05, "loss": 0.8654, "step": 4813 }, { "epoch": 0.3261738600176164, "grad_norm": 6.594883441925049, "learning_rate": 9.443767540557192e-05, "loss": 1.0002, "step": 4814 }, { "epoch": 0.3262416152855885, "grad_norm": 7.857585906982422, "learning_rate": 9.44363063864741e-05, "loss": 1.1711, "step": 4815 }, { "epoch": 0.3263093705535605, "grad_norm": 6.884295463562012, "learning_rate": 9.443493736737628e-05, "loss": 0.8003, "step": 4816 }, { "epoch": 0.3263771258215326, "grad_norm": 7.344529628753662, "learning_rate": 9.443356834827847e-05, "loss": 1.0009, "step": 4817 }, { "epoch": 0.3264448810895047, "grad_norm": 6.892088413238525, "learning_rate": 9.443219932918065e-05, "loss": 0.9797, "step": 4818 }, { "epoch": 0.3265126363574768, "grad_norm": 12.833809852600098, "learning_rate": 9.443083031008283e-05, "loss": 1.1143, "step": 4819 }, { "epoch": 0.3265803916254489, "grad_norm": 7.343682765960693, "learning_rate": 9.442946129098501e-05, "loss": 0.6302, "step": 4820 }, { "epoch": 0.326648146893421, "grad_norm": 7.089320659637451, "learning_rate": 9.442809227188719e-05, "loss": 1.0129, "step": 4821 }, { "epoch": 0.32671590216139307, "grad_norm": 6.60029935836792, "learning_rate": 9.442672325278938e-05, "loss": 0.9259, "step": 4822 }, { "epoch": 0.32678365742936516, "grad_norm": 10.470613479614258, "learning_rate": 9.442535423369157e-05, "loss": 1.1124, "step": 4823 }, { "epoch": 0.3268514126973372, "grad_norm": 8.491450309753418, "learning_rate": 9.442398521459375e-05, "loss": 0.9943, "step": 4824 }, { "epoch": 0.3269191679653093, "grad_norm": 6.033353328704834, "learning_rate": 9.442261619549593e-05, "loss": 1.0003, "step": 4825 }, { "epoch": 0.3269869232332814, "grad_norm": 7.0092549324035645, "learning_rate": 9.442124717639812e-05, "loss": 0.7843, "step": 4826 }, { "epoch": 0.32705467850125347, "grad_norm": 8.52950382232666, "learning_rate": 9.44198781573003e-05, "loss": 1.1036, "step": 4827 }, { "epoch": 0.32712243376922556, "grad_norm": 7.509829044342041, "learning_rate": 9.441850913820248e-05, "loss": 0.9969, "step": 4828 }, { "epoch": 0.32719018903719765, "grad_norm": 7.235622406005859, "learning_rate": 9.441714011910466e-05, "loss": 0.9131, "step": 4829 }, { "epoch": 0.32725794430516975, "grad_norm": 8.332746505737305, "learning_rate": 9.441577110000684e-05, "loss": 1.1663, "step": 4830 }, { "epoch": 0.32732569957314184, "grad_norm": 8.321161270141602, "learning_rate": 9.441440208090904e-05, "loss": 1.0091, "step": 4831 }, { "epoch": 0.3273934548411139, "grad_norm": 7.0502753257751465, "learning_rate": 9.441303306181122e-05, "loss": 1.0273, "step": 4832 }, { "epoch": 0.32746121010908597, "grad_norm": 6.921250343322754, "learning_rate": 9.44116640427134e-05, "loss": 0.873, "step": 4833 }, { "epoch": 0.32752896537705806, "grad_norm": 7.386787414550781, "learning_rate": 9.441029502361558e-05, "loss": 0.7415, "step": 4834 }, { "epoch": 0.32759672064503015, "grad_norm": 6.814700603485107, "learning_rate": 9.440892600451777e-05, "loss": 0.8827, "step": 4835 }, { "epoch": 0.32766447591300224, "grad_norm": 7.551968574523926, "learning_rate": 9.440755698541995e-05, "loss": 0.9752, "step": 4836 }, { "epoch": 0.32773223118097433, "grad_norm": 8.299920082092285, "learning_rate": 9.440618796632213e-05, "loss": 1.0253, "step": 4837 }, { "epoch": 0.3277999864489464, "grad_norm": 7.607963562011719, "learning_rate": 9.440481894722431e-05, "loss": 1.0562, "step": 4838 }, { "epoch": 0.3278677417169185, "grad_norm": 8.098003387451172, "learning_rate": 9.440344992812649e-05, "loss": 0.9055, "step": 4839 }, { "epoch": 0.32793549698489055, "grad_norm": 6.6647257804870605, "learning_rate": 9.440208090902869e-05, "loss": 0.8188, "step": 4840 }, { "epoch": 0.32800325225286264, "grad_norm": 7.541131496429443, "learning_rate": 9.440071188993087e-05, "loss": 0.7095, "step": 4841 }, { "epoch": 0.32807100752083473, "grad_norm": 10.203042984008789, "learning_rate": 9.439934287083305e-05, "loss": 0.8256, "step": 4842 }, { "epoch": 0.3281387627888068, "grad_norm": 8.049646377563477, "learning_rate": 9.439797385173524e-05, "loss": 1.0244, "step": 4843 }, { "epoch": 0.3282065180567789, "grad_norm": 6.727071285247803, "learning_rate": 9.439660483263742e-05, "loss": 0.9681, "step": 4844 }, { "epoch": 0.328274273324751, "grad_norm": 8.106125831604004, "learning_rate": 9.43952358135396e-05, "loss": 1.0944, "step": 4845 }, { "epoch": 0.3283420285927231, "grad_norm": 8.768218994140625, "learning_rate": 9.43938667944418e-05, "loss": 0.7493, "step": 4846 }, { "epoch": 0.3284097838606952, "grad_norm": 6.333378791809082, "learning_rate": 9.439249777534397e-05, "loss": 0.9187, "step": 4847 }, { "epoch": 0.32847753912866723, "grad_norm": 7.732221603393555, "learning_rate": 9.439112875624616e-05, "loss": 1.2522, "step": 4848 }, { "epoch": 0.3285452943966393, "grad_norm": 7.67783784866333, "learning_rate": 9.438975973714835e-05, "loss": 0.8682, "step": 4849 }, { "epoch": 0.3286130496646114, "grad_norm": 7.557129859924316, "learning_rate": 9.438839071805053e-05, "loss": 0.7843, "step": 4850 }, { "epoch": 0.3286808049325835, "grad_norm": 6.559933185577393, "learning_rate": 9.438702169895271e-05, "loss": 0.902, "step": 4851 }, { "epoch": 0.3287485602005556, "grad_norm": 7.604280471801758, "learning_rate": 9.438565267985489e-05, "loss": 0.8905, "step": 4852 }, { "epoch": 0.3288163154685277, "grad_norm": 7.508764266967773, "learning_rate": 9.438428366075707e-05, "loss": 0.932, "step": 4853 }, { "epoch": 0.3288840707364998, "grad_norm": 6.650167942047119, "learning_rate": 9.438291464165926e-05, "loss": 1.0507, "step": 4854 }, { "epoch": 0.32895182600447187, "grad_norm": 8.334061622619629, "learning_rate": 9.438154562256144e-05, "loss": 0.7949, "step": 4855 }, { "epoch": 0.3290195812724439, "grad_norm": 9.498878479003906, "learning_rate": 9.438017660346362e-05, "loss": 0.8487, "step": 4856 }, { "epoch": 0.329087336540416, "grad_norm": 7.810210227966309, "learning_rate": 9.43788075843658e-05, "loss": 0.9917, "step": 4857 }, { "epoch": 0.3291550918083881, "grad_norm": 7.337824821472168, "learning_rate": 9.4377438565268e-05, "loss": 0.8683, "step": 4858 }, { "epoch": 0.3292228470763602, "grad_norm": 8.331491470336914, "learning_rate": 9.437606954617018e-05, "loss": 1.178, "step": 4859 }, { "epoch": 0.3292906023443323, "grad_norm": 6.5013227462768555, "learning_rate": 9.437470052707236e-05, "loss": 0.7093, "step": 4860 }, { "epoch": 0.32935835761230436, "grad_norm": 8.814985275268555, "learning_rate": 9.437333150797454e-05, "loss": 0.9212, "step": 4861 }, { "epoch": 0.32942611288027646, "grad_norm": 8.78661060333252, "learning_rate": 9.437196248887672e-05, "loss": 0.8827, "step": 4862 }, { "epoch": 0.32949386814824855, "grad_norm": 8.912193298339844, "learning_rate": 9.437059346977891e-05, "loss": 1.133, "step": 4863 }, { "epoch": 0.3295616234162206, "grad_norm": 7.901734352111816, "learning_rate": 9.43692244506811e-05, "loss": 0.8293, "step": 4864 }, { "epoch": 0.3296293786841927, "grad_norm": 8.270600318908691, "learning_rate": 9.436785543158328e-05, "loss": 1.1584, "step": 4865 }, { "epoch": 0.32969713395216477, "grad_norm": 6.765751838684082, "learning_rate": 9.436648641248546e-05, "loss": 0.7705, "step": 4866 }, { "epoch": 0.32976488922013686, "grad_norm": 7.272820949554443, "learning_rate": 9.436511739338765e-05, "loss": 1.2151, "step": 4867 }, { "epoch": 0.32983264448810895, "grad_norm": 6.639741897583008, "learning_rate": 9.436374837428983e-05, "loss": 0.8868, "step": 4868 }, { "epoch": 0.32990039975608104, "grad_norm": 7.6254143714904785, "learning_rate": 9.436237935519201e-05, "loss": 1.0342, "step": 4869 }, { "epoch": 0.32996815502405313, "grad_norm": 6.546008586883545, "learning_rate": 9.436101033609419e-05, "loss": 0.7223, "step": 4870 }, { "epoch": 0.3300359102920252, "grad_norm": 6.215434551239014, "learning_rate": 9.435964131699637e-05, "loss": 0.8506, "step": 4871 }, { "epoch": 0.3301036655599973, "grad_norm": 7.897797584533691, "learning_rate": 9.435827229789856e-05, "loss": 0.8699, "step": 4872 }, { "epoch": 0.33017142082796935, "grad_norm": 8.3803129196167, "learning_rate": 9.435690327880074e-05, "loss": 1.0922, "step": 4873 }, { "epoch": 0.33023917609594144, "grad_norm": 6.675380706787109, "learning_rate": 9.435553425970293e-05, "loss": 0.8401, "step": 4874 }, { "epoch": 0.33030693136391354, "grad_norm": 7.380627632141113, "learning_rate": 9.43541652406051e-05, "loss": 0.615, "step": 4875 }, { "epoch": 0.33037468663188563, "grad_norm": 8.120410919189453, "learning_rate": 9.435279622150729e-05, "loss": 1.2622, "step": 4876 }, { "epoch": 0.3304424418998577, "grad_norm": 7.170663356781006, "learning_rate": 9.435142720240948e-05, "loss": 0.893, "step": 4877 }, { "epoch": 0.3305101971678298, "grad_norm": 12.477727890014648, "learning_rate": 9.435005818331166e-05, "loss": 0.9654, "step": 4878 }, { "epoch": 0.3305779524358019, "grad_norm": 6.562829971313477, "learning_rate": 9.434868916421384e-05, "loss": 0.7042, "step": 4879 }, { "epoch": 0.330645707703774, "grad_norm": 8.406328201293945, "learning_rate": 9.434732014511602e-05, "loss": 1.0807, "step": 4880 }, { "epoch": 0.33071346297174603, "grad_norm": 9.06248950958252, "learning_rate": 9.434595112601821e-05, "loss": 0.8586, "step": 4881 }, { "epoch": 0.3307812182397181, "grad_norm": 8.743175506591797, "learning_rate": 9.43445821069204e-05, "loss": 0.9501, "step": 4882 }, { "epoch": 0.3308489735076902, "grad_norm": 7.666022777557373, "learning_rate": 9.434321308782258e-05, "loss": 0.9411, "step": 4883 }, { "epoch": 0.3309167287756623, "grad_norm": 6.989424705505371, "learning_rate": 9.434184406872476e-05, "loss": 0.9511, "step": 4884 }, { "epoch": 0.3309844840436344, "grad_norm": 6.9908905029296875, "learning_rate": 9.434047504962694e-05, "loss": 0.9769, "step": 4885 }, { "epoch": 0.3310522393116065, "grad_norm": 7.985763072967529, "learning_rate": 9.433910603052913e-05, "loss": 1.1041, "step": 4886 }, { "epoch": 0.3311199945795786, "grad_norm": 7.9841694831848145, "learning_rate": 9.433773701143131e-05, "loss": 0.8341, "step": 4887 }, { "epoch": 0.33118774984755067, "grad_norm": 6.9602952003479, "learning_rate": 9.433636799233349e-05, "loss": 0.9388, "step": 4888 }, { "epoch": 0.3312555051155227, "grad_norm": 7.2530012130737305, "learning_rate": 9.433499897323567e-05, "loss": 0.9396, "step": 4889 }, { "epoch": 0.3313232603834948, "grad_norm": 7.338861465454102, "learning_rate": 9.433362995413786e-05, "loss": 0.9168, "step": 4890 }, { "epoch": 0.3313910156514669, "grad_norm": 7.08353853225708, "learning_rate": 9.433226093504005e-05, "loss": 0.9332, "step": 4891 }, { "epoch": 0.331458770919439, "grad_norm": 7.379842758178711, "learning_rate": 9.433089191594223e-05, "loss": 0.7139, "step": 4892 }, { "epoch": 0.3315265261874111, "grad_norm": 8.599993705749512, "learning_rate": 9.432952289684442e-05, "loss": 1.0023, "step": 4893 }, { "epoch": 0.33159428145538317, "grad_norm": 7.096752643585205, "learning_rate": 9.43281538777466e-05, "loss": 0.9052, "step": 4894 }, { "epoch": 0.33166203672335526, "grad_norm": 6.28333044052124, "learning_rate": 9.432678485864878e-05, "loss": 0.8721, "step": 4895 }, { "epoch": 0.33172979199132735, "grad_norm": 8.48362922668457, "learning_rate": 9.432541583955097e-05, "loss": 0.9774, "step": 4896 }, { "epoch": 0.3317975472592994, "grad_norm": 8.214259147644043, "learning_rate": 9.432404682045315e-05, "loss": 0.999, "step": 4897 }, { "epoch": 0.3318653025272715, "grad_norm": 9.603363990783691, "learning_rate": 9.432267780135533e-05, "loss": 0.7848, "step": 4898 }, { "epoch": 0.33193305779524357, "grad_norm": 7.982606887817383, "learning_rate": 9.432130878225753e-05, "loss": 0.7167, "step": 4899 }, { "epoch": 0.33200081306321566, "grad_norm": 6.8360915184021, "learning_rate": 9.431993976315971e-05, "loss": 1.1884, "step": 4900 }, { "epoch": 0.33206856833118775, "grad_norm": 9.70743179321289, "learning_rate": 9.431857074406189e-05, "loss": 1.0198, "step": 4901 }, { "epoch": 0.33213632359915984, "grad_norm": 7.7961554527282715, "learning_rate": 9.431720172496407e-05, "loss": 0.8637, "step": 4902 }, { "epoch": 0.33220407886713194, "grad_norm": 7.809814453125, "learning_rate": 9.431583270586625e-05, "loss": 1.0737, "step": 4903 }, { "epoch": 0.332271834135104, "grad_norm": 6.5765862464904785, "learning_rate": 9.431446368676844e-05, "loss": 0.6574, "step": 4904 }, { "epoch": 0.33233958940307606, "grad_norm": 6.912911891937256, "learning_rate": 9.431309466767062e-05, "loss": 0.9647, "step": 4905 }, { "epoch": 0.33240734467104815, "grad_norm": 7.2532877922058105, "learning_rate": 9.43117256485728e-05, "loss": 1.098, "step": 4906 }, { "epoch": 0.33247509993902025, "grad_norm": 6.589138984680176, "learning_rate": 9.431035662947498e-05, "loss": 0.9948, "step": 4907 }, { "epoch": 0.33254285520699234, "grad_norm": 9.1469144821167, "learning_rate": 9.430898761037717e-05, "loss": 0.9656, "step": 4908 }, { "epoch": 0.33261061047496443, "grad_norm": 7.124277591705322, "learning_rate": 9.430761859127936e-05, "loss": 0.8704, "step": 4909 }, { "epoch": 0.3326783657429365, "grad_norm": 6.862776279449463, "learning_rate": 9.430624957218154e-05, "loss": 0.8008, "step": 4910 }, { "epoch": 0.3327461210109086, "grad_norm": 6.245189666748047, "learning_rate": 9.430488055308372e-05, "loss": 0.8529, "step": 4911 }, { "epoch": 0.3328138762788807, "grad_norm": 8.546821594238281, "learning_rate": 9.43035115339859e-05, "loss": 1.1747, "step": 4912 }, { "epoch": 0.33288163154685274, "grad_norm": 7.5279765129089355, "learning_rate": 9.43021425148881e-05, "loss": 0.8725, "step": 4913 }, { "epoch": 0.33294938681482483, "grad_norm": 7.987123966217041, "learning_rate": 9.430077349579027e-05, "loss": 0.8752, "step": 4914 }, { "epoch": 0.3330171420827969, "grad_norm": 10.1973876953125, "learning_rate": 9.429940447669245e-05, "loss": 1.0813, "step": 4915 }, { "epoch": 0.333084897350769, "grad_norm": 7.406893253326416, "learning_rate": 9.429803545759464e-05, "loss": 0.7064, "step": 4916 }, { "epoch": 0.3331526526187411, "grad_norm": 6.545569896697998, "learning_rate": 9.429666643849682e-05, "loss": 0.9005, "step": 4917 }, { "epoch": 0.3332204078867132, "grad_norm": 8.624588012695312, "learning_rate": 9.429529741939901e-05, "loss": 0.939, "step": 4918 }, { "epoch": 0.3332881631546853, "grad_norm": 6.811842441558838, "learning_rate": 9.429392840030119e-05, "loss": 0.8367, "step": 4919 }, { "epoch": 0.3333559184226574, "grad_norm": 7.256634712219238, "learning_rate": 9.429255938120337e-05, "loss": 1.0455, "step": 4920 }, { "epoch": 0.3334236736906295, "grad_norm": 6.900022983551025, "learning_rate": 9.429119036210555e-05, "loss": 0.954, "step": 4921 }, { "epoch": 0.3334914289586015, "grad_norm": 9.217741012573242, "learning_rate": 9.428982134300774e-05, "loss": 0.6392, "step": 4922 }, { "epoch": 0.3335591842265736, "grad_norm": 7.729586124420166, "learning_rate": 9.428845232390992e-05, "loss": 0.9372, "step": 4923 }, { "epoch": 0.3336269394945457, "grad_norm": 6.606268405914307, "learning_rate": 9.42870833048121e-05, "loss": 0.8127, "step": 4924 }, { "epoch": 0.3336946947625178, "grad_norm": 8.078627586364746, "learning_rate": 9.428571428571429e-05, "loss": 0.847, "step": 4925 }, { "epoch": 0.3337624500304899, "grad_norm": 8.212615966796875, "learning_rate": 9.428434526661647e-05, "loss": 1.0133, "step": 4926 }, { "epoch": 0.33383020529846197, "grad_norm": 6.68360710144043, "learning_rate": 9.428297624751866e-05, "loss": 0.9796, "step": 4927 }, { "epoch": 0.33389796056643406, "grad_norm": 7.115147590637207, "learning_rate": 9.428160722842084e-05, "loss": 0.9106, "step": 4928 }, { "epoch": 0.33396571583440615, "grad_norm": 7.178501129150391, "learning_rate": 9.428023820932302e-05, "loss": 0.7531, "step": 4929 }, { "epoch": 0.3340334711023782, "grad_norm": 7.303642749786377, "learning_rate": 9.42788691902252e-05, "loss": 0.9163, "step": 4930 }, { "epoch": 0.3341012263703503, "grad_norm": 7.532678604125977, "learning_rate": 9.427750017112738e-05, "loss": 1.001, "step": 4931 }, { "epoch": 0.33416898163832237, "grad_norm": 6.791550636291504, "learning_rate": 9.427613115202957e-05, "loss": 0.9005, "step": 4932 }, { "epoch": 0.33423673690629446, "grad_norm": 7.264156341552734, "learning_rate": 9.427476213293176e-05, "loss": 0.8076, "step": 4933 }, { "epoch": 0.33430449217426655, "grad_norm": 8.477554321289062, "learning_rate": 9.427339311383394e-05, "loss": 0.9363, "step": 4934 }, { "epoch": 0.33437224744223865, "grad_norm": 7.020787239074707, "learning_rate": 9.427202409473612e-05, "loss": 0.6933, "step": 4935 }, { "epoch": 0.33444000271021074, "grad_norm": 7.990420818328857, "learning_rate": 9.427065507563831e-05, "loss": 0.9759, "step": 4936 }, { "epoch": 0.33450775797818283, "grad_norm": 8.950580596923828, "learning_rate": 9.426928605654049e-05, "loss": 1.2876, "step": 4937 }, { "epoch": 0.33457551324615487, "grad_norm": 6.879635334014893, "learning_rate": 9.426791703744267e-05, "loss": 0.992, "step": 4938 }, { "epoch": 0.33464326851412696, "grad_norm": 7.640803813934326, "learning_rate": 9.426654801834486e-05, "loss": 1.1523, "step": 4939 }, { "epoch": 0.33471102378209905, "grad_norm": 7.156732559204102, "learning_rate": 9.426517899924704e-05, "loss": 0.6521, "step": 4940 }, { "epoch": 0.33477877905007114, "grad_norm": 7.543867588043213, "learning_rate": 9.426380998014922e-05, "loss": 0.8495, "step": 4941 }, { "epoch": 0.33484653431804323, "grad_norm": 8.554939270019531, "learning_rate": 9.426244096105142e-05, "loss": 1.0593, "step": 4942 }, { "epoch": 0.3349142895860153, "grad_norm": 7.902100563049316, "learning_rate": 9.42610719419536e-05, "loss": 1.0271, "step": 4943 }, { "epoch": 0.3349820448539874, "grad_norm": 7.945007801055908, "learning_rate": 9.425970292285578e-05, "loss": 0.7839, "step": 4944 }, { "epoch": 0.3350498001219595, "grad_norm": 7.539274215698242, "learning_rate": 9.425833390375797e-05, "loss": 0.968, "step": 4945 }, { "epoch": 0.33511755538993154, "grad_norm": 7.23228120803833, "learning_rate": 9.425696488466015e-05, "loss": 0.8319, "step": 4946 }, { "epoch": 0.33518531065790363, "grad_norm": 7.251431941986084, "learning_rate": 9.425559586556233e-05, "loss": 0.9513, "step": 4947 }, { "epoch": 0.3352530659258757, "grad_norm": 7.047804355621338, "learning_rate": 9.425422684646451e-05, "loss": 0.8677, "step": 4948 }, { "epoch": 0.3353208211938478, "grad_norm": 7.7244696617126465, "learning_rate": 9.42528578273667e-05, "loss": 0.8705, "step": 4949 }, { "epoch": 0.3353885764618199, "grad_norm": 6.078210830688477, "learning_rate": 9.425148880826889e-05, "loss": 0.9205, "step": 4950 }, { "epoch": 0.335456331729792, "grad_norm": 7.856949329376221, "learning_rate": 9.425011978917107e-05, "loss": 0.8637, "step": 4951 }, { "epoch": 0.3355240869977641, "grad_norm": 7.341653347015381, "learning_rate": 9.424875077007325e-05, "loss": 0.9582, "step": 4952 }, { "epoch": 0.3355918422657362, "grad_norm": 7.456873893737793, "learning_rate": 9.424738175097543e-05, "loss": 0.8581, "step": 4953 }, { "epoch": 0.3356595975337082, "grad_norm": 7.885173797607422, "learning_rate": 9.424601273187761e-05, "loss": 0.9551, "step": 4954 }, { "epoch": 0.3357273528016803, "grad_norm": 7.016728401184082, "learning_rate": 9.42446437127798e-05, "loss": 0.9493, "step": 4955 }, { "epoch": 0.3357951080696524, "grad_norm": 6.383670806884766, "learning_rate": 9.424327469368198e-05, "loss": 0.8178, "step": 4956 }, { "epoch": 0.3358628633376245, "grad_norm": 6.930068492889404, "learning_rate": 9.424190567458416e-05, "loss": 1.1073, "step": 4957 }, { "epoch": 0.3359306186055966, "grad_norm": 7.001153469085693, "learning_rate": 9.424053665548634e-05, "loss": 0.844, "step": 4958 }, { "epoch": 0.3359983738735687, "grad_norm": 8.316204071044922, "learning_rate": 9.423916763638854e-05, "loss": 0.9363, "step": 4959 }, { "epoch": 0.33606612914154077, "grad_norm": 7.931922912597656, "learning_rate": 9.423779861729072e-05, "loss": 1.1617, "step": 4960 }, { "epoch": 0.33613388440951286, "grad_norm": 7.447847366333008, "learning_rate": 9.42364295981929e-05, "loss": 0.9137, "step": 4961 }, { "epoch": 0.3362016396774849, "grad_norm": 6.582141876220703, "learning_rate": 9.423506057909508e-05, "loss": 1.0516, "step": 4962 }, { "epoch": 0.336269394945457, "grad_norm": 6.278825283050537, "learning_rate": 9.423369155999726e-05, "loss": 0.9489, "step": 4963 }, { "epoch": 0.3363371502134291, "grad_norm": 7.415517330169678, "learning_rate": 9.423232254089945e-05, "loss": 0.9275, "step": 4964 }, { "epoch": 0.3364049054814012, "grad_norm": 6.073827266693115, "learning_rate": 9.423095352180163e-05, "loss": 0.7514, "step": 4965 }, { "epoch": 0.33647266074937326, "grad_norm": 9.100361824035645, "learning_rate": 9.422958450270381e-05, "loss": 1.0882, "step": 4966 }, { "epoch": 0.33654041601734536, "grad_norm": 8.23529052734375, "learning_rate": 9.4228215483606e-05, "loss": 0.8323, "step": 4967 }, { "epoch": 0.33660817128531745, "grad_norm": 9.86119270324707, "learning_rate": 9.422684646450819e-05, "loss": 0.8997, "step": 4968 }, { "epoch": 0.33667592655328954, "grad_norm": 7.328428268432617, "learning_rate": 9.422547744541037e-05, "loss": 0.8558, "step": 4969 }, { "epoch": 0.3367436818212616, "grad_norm": 6.703019618988037, "learning_rate": 9.422410842631255e-05, "loss": 0.782, "step": 4970 }, { "epoch": 0.33681143708923367, "grad_norm": 7.780190944671631, "learning_rate": 9.422273940721473e-05, "loss": 1.1681, "step": 4971 }, { "epoch": 0.33687919235720576, "grad_norm": 6.972787380218506, "learning_rate": 9.422137038811691e-05, "loss": 1.0137, "step": 4972 }, { "epoch": 0.33694694762517785, "grad_norm": 6.521172523498535, "learning_rate": 9.42200013690191e-05, "loss": 0.8866, "step": 4973 }, { "epoch": 0.33701470289314994, "grad_norm": 6.414315223693848, "learning_rate": 9.421863234992128e-05, "loss": 0.7614, "step": 4974 }, { "epoch": 0.33708245816112203, "grad_norm": 8.930048942565918, "learning_rate": 9.421726333082346e-05, "loss": 1.0757, "step": 4975 }, { "epoch": 0.3371502134290941, "grad_norm": 6.710857391357422, "learning_rate": 9.421589431172565e-05, "loss": 0.8966, "step": 4976 }, { "epoch": 0.3372179686970662, "grad_norm": 10.788898468017578, "learning_rate": 9.421452529262784e-05, "loss": 0.906, "step": 4977 }, { "epoch": 0.3372857239650383, "grad_norm": 6.601465225219727, "learning_rate": 9.421315627353002e-05, "loss": 0.967, "step": 4978 }, { "epoch": 0.33735347923301034, "grad_norm": 7.0998406410217285, "learning_rate": 9.42117872544322e-05, "loss": 1.0584, "step": 4979 }, { "epoch": 0.33742123450098244, "grad_norm": 8.517425537109375, "learning_rate": 9.421041823533438e-05, "loss": 0.83, "step": 4980 }, { "epoch": 0.33748898976895453, "grad_norm": 7.631463050842285, "learning_rate": 9.420904921623656e-05, "loss": 0.7431, "step": 4981 }, { "epoch": 0.3375567450369266, "grad_norm": 7.006965637207031, "learning_rate": 9.420768019713875e-05, "loss": 1.0081, "step": 4982 }, { "epoch": 0.3376245003048987, "grad_norm": 7.085225582122803, "learning_rate": 9.420631117804093e-05, "loss": 1.0094, "step": 4983 }, { "epoch": 0.3376922555728708, "grad_norm": 8.096879959106445, "learning_rate": 9.420494215894312e-05, "loss": 1.0376, "step": 4984 }, { "epoch": 0.3377600108408429, "grad_norm": 7.823955535888672, "learning_rate": 9.420357313984531e-05, "loss": 0.975, "step": 4985 }, { "epoch": 0.337827766108815, "grad_norm": 6.340082168579102, "learning_rate": 9.420220412074749e-05, "loss": 0.6133, "step": 4986 }, { "epoch": 0.337895521376787, "grad_norm": 9.930416107177734, "learning_rate": 9.420083510164967e-05, "loss": 1.0132, "step": 4987 }, { "epoch": 0.3379632766447591, "grad_norm": 6.7059221267700195, "learning_rate": 9.419946608255186e-05, "loss": 0.7784, "step": 4988 }, { "epoch": 0.3380310319127312, "grad_norm": 7.869287967681885, "learning_rate": 9.419809706345404e-05, "loss": 0.9114, "step": 4989 }, { "epoch": 0.3380987871807033, "grad_norm": 8.061338424682617, "learning_rate": 9.419672804435622e-05, "loss": 1.0982, "step": 4990 }, { "epoch": 0.3381665424486754, "grad_norm": 6.226239204406738, "learning_rate": 9.419535902525842e-05, "loss": 0.8251, "step": 4991 }, { "epoch": 0.3382342977166475, "grad_norm": 6.236758232116699, "learning_rate": 9.41939900061606e-05, "loss": 1.0367, "step": 4992 }, { "epoch": 0.3383020529846196, "grad_norm": 8.880475044250488, "learning_rate": 9.419262098706278e-05, "loss": 1.0941, "step": 4993 }, { "epoch": 0.33836980825259166, "grad_norm": 7.7189621925354, "learning_rate": 9.419125196796496e-05, "loss": 1.2868, "step": 4994 }, { "epoch": 0.3384375635205637, "grad_norm": 8.368658065795898, "learning_rate": 9.418988294886714e-05, "loss": 1.1495, "step": 4995 }, { "epoch": 0.3385053187885358, "grad_norm": 6.596102237701416, "learning_rate": 9.418851392976933e-05, "loss": 0.9639, "step": 4996 }, { "epoch": 0.3385730740565079, "grad_norm": 6.297356605529785, "learning_rate": 9.418714491067151e-05, "loss": 0.8802, "step": 4997 }, { "epoch": 0.33864082932448, "grad_norm": 6.869907855987549, "learning_rate": 9.41857758915737e-05, "loss": 0.8247, "step": 4998 }, { "epoch": 0.33870858459245207, "grad_norm": 8.147067070007324, "learning_rate": 9.418440687247587e-05, "loss": 0.7532, "step": 4999 }, { "epoch": 0.33877633986042416, "grad_norm": 7.368498802185059, "learning_rate": 9.418303785337807e-05, "loss": 0.8697, "step": 5000 }, { "epoch": 0.33884409512839625, "grad_norm": 6.064372539520264, "learning_rate": 9.418166883428025e-05, "loss": 0.8134, "step": 5001 }, { "epoch": 0.33891185039636834, "grad_norm": 7.22601318359375, "learning_rate": 9.418029981518243e-05, "loss": 0.8224, "step": 5002 }, { "epoch": 0.3389796056643404, "grad_norm": 7.832242012023926, "learning_rate": 9.417893079608461e-05, "loss": 0.9114, "step": 5003 }, { "epoch": 0.33904736093231247, "grad_norm": 7.05675745010376, "learning_rate": 9.417756177698679e-05, "loss": 0.8475, "step": 5004 }, { "epoch": 0.33911511620028456, "grad_norm": 6.385340690612793, "learning_rate": 9.417619275788898e-05, "loss": 0.8604, "step": 5005 }, { "epoch": 0.33918287146825665, "grad_norm": 7.100057601928711, "learning_rate": 9.417482373879116e-05, "loss": 0.8418, "step": 5006 }, { "epoch": 0.33925062673622874, "grad_norm": 8.983677864074707, "learning_rate": 9.417345471969334e-05, "loss": 1.1282, "step": 5007 }, { "epoch": 0.33931838200420084, "grad_norm": 8.526215553283691, "learning_rate": 9.417208570059552e-05, "loss": 0.8759, "step": 5008 }, { "epoch": 0.3393861372721729, "grad_norm": 7.278728485107422, "learning_rate": 9.41707166814977e-05, "loss": 1.0722, "step": 5009 }, { "epoch": 0.339453892540145, "grad_norm": 9.66884994506836, "learning_rate": 9.41693476623999e-05, "loss": 0.8776, "step": 5010 }, { "epoch": 0.33952164780811706, "grad_norm": 7.001797199249268, "learning_rate": 9.416797864330208e-05, "loss": 0.7863, "step": 5011 }, { "epoch": 0.33958940307608915, "grad_norm": 7.707855224609375, "learning_rate": 9.416660962420426e-05, "loss": 0.9138, "step": 5012 }, { "epoch": 0.33965715834406124, "grad_norm": 7.721848964691162, "learning_rate": 9.416524060510644e-05, "loss": 1.1121, "step": 5013 }, { "epoch": 0.33972491361203333, "grad_norm": 8.91602611541748, "learning_rate": 9.416387158600863e-05, "loss": 0.9152, "step": 5014 }, { "epoch": 0.3397926688800054, "grad_norm": 7.332693099975586, "learning_rate": 9.416250256691081e-05, "loss": 0.9559, "step": 5015 }, { "epoch": 0.3398604241479775, "grad_norm": 7.198998928070068, "learning_rate": 9.4161133547813e-05, "loss": 0.9239, "step": 5016 }, { "epoch": 0.3399281794159496, "grad_norm": 7.566831588745117, "learning_rate": 9.415976452871517e-05, "loss": 0.9281, "step": 5017 }, { "epoch": 0.3399959346839217, "grad_norm": 6.832939624786377, "learning_rate": 9.415839550961736e-05, "loss": 0.9164, "step": 5018 }, { "epoch": 0.34006368995189373, "grad_norm": 7.087849140167236, "learning_rate": 9.415702649051955e-05, "loss": 0.9314, "step": 5019 }, { "epoch": 0.3401314452198658, "grad_norm": 7.417871952056885, "learning_rate": 9.415565747142173e-05, "loss": 0.9687, "step": 5020 }, { "epoch": 0.3401992004878379, "grad_norm": 6.9238362312316895, "learning_rate": 9.415428845232391e-05, "loss": 1.0757, "step": 5021 }, { "epoch": 0.34026695575581, "grad_norm": 7.4261698722839355, "learning_rate": 9.415291943322609e-05, "loss": 1.0636, "step": 5022 }, { "epoch": 0.3403347110237821, "grad_norm": 6.1964030265808105, "learning_rate": 9.415155041412828e-05, "loss": 0.739, "step": 5023 }, { "epoch": 0.3404024662917542, "grad_norm": 7.07076358795166, "learning_rate": 9.415018139503046e-05, "loss": 0.9435, "step": 5024 }, { "epoch": 0.3404702215597263, "grad_norm": 8.069670677185059, "learning_rate": 9.414881237593264e-05, "loss": 0.9745, "step": 5025 }, { "epoch": 0.3405379768276984, "grad_norm": 7.559169769287109, "learning_rate": 9.414744335683482e-05, "loss": 0.9329, "step": 5026 }, { "epoch": 0.34060573209567047, "grad_norm": 8.438983917236328, "learning_rate": 9.4146074337737e-05, "loss": 1.0608, "step": 5027 }, { "epoch": 0.3406734873636425, "grad_norm": 6.522724151611328, "learning_rate": 9.41447053186392e-05, "loss": 0.7912, "step": 5028 }, { "epoch": 0.3407412426316146, "grad_norm": 8.31757926940918, "learning_rate": 9.414333629954138e-05, "loss": 0.7535, "step": 5029 }, { "epoch": 0.3408089978995867, "grad_norm": 7.540639877319336, "learning_rate": 9.414196728044356e-05, "loss": 0.7239, "step": 5030 }, { "epoch": 0.3408767531675588, "grad_norm": 7.016546726226807, "learning_rate": 9.414059826134575e-05, "loss": 1.0658, "step": 5031 }, { "epoch": 0.34094450843553087, "grad_norm": 5.6253814697265625, "learning_rate": 9.413922924224793e-05, "loss": 0.8522, "step": 5032 }, { "epoch": 0.34101226370350296, "grad_norm": 7.693328380584717, "learning_rate": 9.413786022315011e-05, "loss": 1.1011, "step": 5033 }, { "epoch": 0.34108001897147505, "grad_norm": 8.77978229522705, "learning_rate": 9.413649120405231e-05, "loss": 1.0617, "step": 5034 }, { "epoch": 0.34114777423944714, "grad_norm": 6.854990005493164, "learning_rate": 9.413512218495449e-05, "loss": 1.0232, "step": 5035 }, { "epoch": 0.3412155295074192, "grad_norm": 8.380729675292969, "learning_rate": 9.413375316585667e-05, "loss": 0.7554, "step": 5036 }, { "epoch": 0.34128328477539127, "grad_norm": 6.667641639709473, "learning_rate": 9.413238414675886e-05, "loss": 0.8992, "step": 5037 }, { "epoch": 0.34135104004336336, "grad_norm": 7.5778489112854, "learning_rate": 9.413101512766104e-05, "loss": 0.9521, "step": 5038 }, { "epoch": 0.34141879531133545, "grad_norm": 10.053051948547363, "learning_rate": 9.412964610856322e-05, "loss": 0.9595, "step": 5039 }, { "epoch": 0.34148655057930755, "grad_norm": 7.460272789001465, "learning_rate": 9.41282770894654e-05, "loss": 0.683, "step": 5040 }, { "epoch": 0.34155430584727964, "grad_norm": 7.724341869354248, "learning_rate": 9.412690807036758e-05, "loss": 0.9902, "step": 5041 }, { "epoch": 0.34162206111525173, "grad_norm": 7.729767799377441, "learning_rate": 9.412553905126978e-05, "loss": 0.9249, "step": 5042 }, { "epoch": 0.3416898163832238, "grad_norm": 6.388404846191406, "learning_rate": 9.412417003217196e-05, "loss": 1.0154, "step": 5043 }, { "epoch": 0.34175757165119586, "grad_norm": 6.2206130027771, "learning_rate": 9.412280101307414e-05, "loss": 0.8169, "step": 5044 }, { "epoch": 0.34182532691916795, "grad_norm": 7.383151054382324, "learning_rate": 9.412143199397632e-05, "loss": 1.0476, "step": 5045 }, { "epoch": 0.34189308218714004, "grad_norm": 7.733860969543457, "learning_rate": 9.412006297487851e-05, "loss": 0.9861, "step": 5046 }, { "epoch": 0.34196083745511213, "grad_norm": 7.823349475860596, "learning_rate": 9.41186939557807e-05, "loss": 0.8351, "step": 5047 }, { "epoch": 0.3420285927230842, "grad_norm": 10.20052433013916, "learning_rate": 9.411732493668287e-05, "loss": 1.2573, "step": 5048 }, { "epoch": 0.3420963479910563, "grad_norm": 7.878631114959717, "learning_rate": 9.411595591758505e-05, "loss": 1.0979, "step": 5049 }, { "epoch": 0.3421641032590284, "grad_norm": 7.230095386505127, "learning_rate": 9.411458689848723e-05, "loss": 0.9929, "step": 5050 }, { "epoch": 0.3422318585270005, "grad_norm": 7.354804992675781, "learning_rate": 9.411321787938943e-05, "loss": 1.0552, "step": 5051 }, { "epoch": 0.34229961379497253, "grad_norm": 9.846439361572266, "learning_rate": 9.411184886029161e-05, "loss": 1.0616, "step": 5052 }, { "epoch": 0.3423673690629446, "grad_norm": 6.999122142791748, "learning_rate": 9.411047984119379e-05, "loss": 0.9958, "step": 5053 }, { "epoch": 0.3424351243309167, "grad_norm": 5.719442367553711, "learning_rate": 9.410911082209597e-05, "loss": 0.8239, "step": 5054 }, { "epoch": 0.3425028795988888, "grad_norm": 7.650005340576172, "learning_rate": 9.410774180299816e-05, "loss": 0.9736, "step": 5055 }, { "epoch": 0.3425706348668609, "grad_norm": 7.938320159912109, "learning_rate": 9.410637278390034e-05, "loss": 0.7809, "step": 5056 }, { "epoch": 0.342638390134833, "grad_norm": 8.319568634033203, "learning_rate": 9.410500376480252e-05, "loss": 0.9162, "step": 5057 }, { "epoch": 0.3427061454028051, "grad_norm": 8.151114463806152, "learning_rate": 9.41036347457047e-05, "loss": 1.1071, "step": 5058 }, { "epoch": 0.3427739006707772, "grad_norm": 7.333863735198975, "learning_rate": 9.410226572660688e-05, "loss": 0.8425, "step": 5059 }, { "epoch": 0.3428416559387492, "grad_norm": 6.848262310028076, "learning_rate": 9.410089670750908e-05, "loss": 0.9385, "step": 5060 }, { "epoch": 0.3429094112067213, "grad_norm": 7.205636024475098, "learning_rate": 9.409952768841126e-05, "loss": 0.8448, "step": 5061 }, { "epoch": 0.3429771664746934, "grad_norm": 7.133283615112305, "learning_rate": 9.409815866931344e-05, "loss": 0.8306, "step": 5062 }, { "epoch": 0.3430449217426655, "grad_norm": 6.693148136138916, "learning_rate": 9.409678965021562e-05, "loss": 1.0321, "step": 5063 }, { "epoch": 0.3431126770106376, "grad_norm": 7.183022975921631, "learning_rate": 9.40954206311178e-05, "loss": 1.0067, "step": 5064 }, { "epoch": 0.34318043227860967, "grad_norm": 7.537884712219238, "learning_rate": 9.409405161202e-05, "loss": 0.9719, "step": 5065 }, { "epoch": 0.34324818754658176, "grad_norm": 7.812978267669678, "learning_rate": 9.409268259292217e-05, "loss": 0.9936, "step": 5066 }, { "epoch": 0.34331594281455385, "grad_norm": 7.93212366104126, "learning_rate": 9.409131357382435e-05, "loss": 1.1685, "step": 5067 }, { "epoch": 0.3433836980825259, "grad_norm": 7.298057556152344, "learning_rate": 9.408994455472653e-05, "loss": 0.908, "step": 5068 }, { "epoch": 0.343451453350498, "grad_norm": 8.108441352844238, "learning_rate": 9.408857553562873e-05, "loss": 0.9916, "step": 5069 }, { "epoch": 0.3435192086184701, "grad_norm": 7.206883430480957, "learning_rate": 9.408720651653091e-05, "loss": 0.9431, "step": 5070 }, { "epoch": 0.34358696388644216, "grad_norm": 8.54073715209961, "learning_rate": 9.408583749743309e-05, "loss": 0.9156, "step": 5071 }, { "epoch": 0.34365471915441426, "grad_norm": 7.274294376373291, "learning_rate": 9.408446847833527e-05, "loss": 0.7436, "step": 5072 }, { "epoch": 0.34372247442238635, "grad_norm": 7.734363079071045, "learning_rate": 9.408309945923745e-05, "loss": 1.054, "step": 5073 }, { "epoch": 0.34379022969035844, "grad_norm": 7.952118873596191, "learning_rate": 9.408173044013964e-05, "loss": 0.9152, "step": 5074 }, { "epoch": 0.34385798495833053, "grad_norm": 7.481127738952637, "learning_rate": 9.408036142104182e-05, "loss": 1.0134, "step": 5075 }, { "epoch": 0.34392574022630257, "grad_norm": 8.52331256866455, "learning_rate": 9.4078992401944e-05, "loss": 0.9724, "step": 5076 }, { "epoch": 0.34399349549427466, "grad_norm": 7.454250335693359, "learning_rate": 9.40776233828462e-05, "loss": 0.9535, "step": 5077 }, { "epoch": 0.34406125076224675, "grad_norm": 7.192932605743408, "learning_rate": 9.407625436374838e-05, "loss": 0.8923, "step": 5078 }, { "epoch": 0.34412900603021884, "grad_norm": 8.29765796661377, "learning_rate": 9.407488534465056e-05, "loss": 1.0374, "step": 5079 }, { "epoch": 0.34419676129819093, "grad_norm": 7.1159820556640625, "learning_rate": 9.407351632555275e-05, "loss": 0.7876, "step": 5080 }, { "epoch": 0.344264516566163, "grad_norm": 7.157162189483643, "learning_rate": 9.407214730645493e-05, "loss": 0.8747, "step": 5081 }, { "epoch": 0.3443322718341351, "grad_norm": 6.686028003692627, "learning_rate": 9.407077828735711e-05, "loss": 0.8189, "step": 5082 }, { "epoch": 0.3444000271021072, "grad_norm": 8.957246780395508, "learning_rate": 9.406940926825931e-05, "loss": 1.0617, "step": 5083 }, { "epoch": 0.3444677823700793, "grad_norm": 7.599720001220703, "learning_rate": 9.406804024916149e-05, "loss": 1.0455, "step": 5084 }, { "epoch": 0.34453553763805134, "grad_norm": 5.757546424865723, "learning_rate": 9.406667123006367e-05, "loss": 0.7654, "step": 5085 }, { "epoch": 0.34460329290602343, "grad_norm": 6.592649936676025, "learning_rate": 9.406530221096585e-05, "loss": 0.9713, "step": 5086 }, { "epoch": 0.3446710481739955, "grad_norm": 7.119808673858643, "learning_rate": 9.406393319186803e-05, "loss": 0.7413, "step": 5087 }, { "epoch": 0.3447388034419676, "grad_norm": 6.288692951202393, "learning_rate": 9.406256417277022e-05, "loss": 0.9479, "step": 5088 }, { "epoch": 0.3448065587099397, "grad_norm": 6.267573356628418, "learning_rate": 9.40611951536724e-05, "loss": 0.9745, "step": 5089 }, { "epoch": 0.3448743139779118, "grad_norm": 7.639352798461914, "learning_rate": 9.405982613457458e-05, "loss": 0.9655, "step": 5090 }, { "epoch": 0.3449420692458839, "grad_norm": 6.32698917388916, "learning_rate": 9.405845711547676e-05, "loss": 0.789, "step": 5091 }, { "epoch": 0.345009824513856, "grad_norm": 8.769354820251465, "learning_rate": 9.405708809637896e-05, "loss": 1.1124, "step": 5092 }, { "epoch": 0.345077579781828, "grad_norm": 7.179650783538818, "learning_rate": 9.405571907728114e-05, "loss": 0.7939, "step": 5093 }, { "epoch": 0.3451453350498001, "grad_norm": 10.112159729003906, "learning_rate": 9.405435005818332e-05, "loss": 0.943, "step": 5094 }, { "epoch": 0.3452130903177722, "grad_norm": 6.925206661224365, "learning_rate": 9.40529810390855e-05, "loss": 1.0444, "step": 5095 }, { "epoch": 0.3452808455857443, "grad_norm": 7.8664398193359375, "learning_rate": 9.405161201998768e-05, "loss": 0.9643, "step": 5096 }, { "epoch": 0.3453486008537164, "grad_norm": 7.060378551483154, "learning_rate": 9.405024300088987e-05, "loss": 0.9043, "step": 5097 }, { "epoch": 0.3454163561216885, "grad_norm": 8.105093002319336, "learning_rate": 9.404887398179205e-05, "loss": 1.1106, "step": 5098 }, { "epoch": 0.34548411138966056, "grad_norm": 7.8055739402771, "learning_rate": 9.404750496269423e-05, "loss": 0.9638, "step": 5099 }, { "epoch": 0.34555186665763266, "grad_norm": 8.225363731384277, "learning_rate": 9.404613594359641e-05, "loss": 0.7878, "step": 5100 }, { "epoch": 0.3456196219256047, "grad_norm": 7.809800148010254, "learning_rate": 9.404476692449861e-05, "loss": 0.8241, "step": 5101 }, { "epoch": 0.3456873771935768, "grad_norm": 7.520929336547852, "learning_rate": 9.404339790540079e-05, "loss": 1.0689, "step": 5102 }, { "epoch": 0.3457551324615489, "grad_norm": 5.996449947357178, "learning_rate": 9.404202888630297e-05, "loss": 0.9163, "step": 5103 }, { "epoch": 0.34582288772952097, "grad_norm": 7.2040114402771, "learning_rate": 9.404065986720515e-05, "loss": 1.0615, "step": 5104 }, { "epoch": 0.34589064299749306, "grad_norm": 7.268972873687744, "learning_rate": 9.403929084810733e-05, "loss": 0.8785, "step": 5105 }, { "epoch": 0.34595839826546515, "grad_norm": 9.244166374206543, "learning_rate": 9.403792182900952e-05, "loss": 0.8714, "step": 5106 }, { "epoch": 0.34602615353343724, "grad_norm": 6.766915321350098, "learning_rate": 9.40365528099117e-05, "loss": 0.8075, "step": 5107 }, { "epoch": 0.34609390880140933, "grad_norm": 8.137444496154785, "learning_rate": 9.403518379081388e-05, "loss": 1.0687, "step": 5108 }, { "epoch": 0.34616166406938137, "grad_norm": 7.488312244415283, "learning_rate": 9.403381477171606e-05, "loss": 0.8056, "step": 5109 }, { "epoch": 0.34622941933735346, "grad_norm": 6.555777549743652, "learning_rate": 9.403244575261826e-05, "loss": 0.8761, "step": 5110 }, { "epoch": 0.34629717460532555, "grad_norm": 11.631979942321777, "learning_rate": 9.403107673352044e-05, "loss": 0.8139, "step": 5111 }, { "epoch": 0.34636492987329764, "grad_norm": 5.973453998565674, "learning_rate": 9.402970771442262e-05, "loss": 0.8816, "step": 5112 }, { "epoch": 0.34643268514126974, "grad_norm": 5.817539215087891, "learning_rate": 9.40283386953248e-05, "loss": 0.6034, "step": 5113 }, { "epoch": 0.3465004404092418, "grad_norm": 8.215511322021484, "learning_rate": 9.402696967622698e-05, "loss": 0.8876, "step": 5114 }, { "epoch": 0.3465681956772139, "grad_norm": 5.848570346832275, "learning_rate": 9.402560065712917e-05, "loss": 0.7093, "step": 5115 }, { "epoch": 0.346635950945186, "grad_norm": 7.996893405914307, "learning_rate": 9.402423163803135e-05, "loss": 0.8865, "step": 5116 }, { "epoch": 0.34670370621315805, "grad_norm": 7.348632335662842, "learning_rate": 9.402286261893353e-05, "loss": 0.8042, "step": 5117 }, { "epoch": 0.34677146148113014, "grad_norm": 6.820111274719238, "learning_rate": 9.402149359983571e-05, "loss": 0.8744, "step": 5118 }, { "epoch": 0.34683921674910223, "grad_norm": 7.354914665222168, "learning_rate": 9.40201245807379e-05, "loss": 0.892, "step": 5119 }, { "epoch": 0.3469069720170743, "grad_norm": 7.751887321472168, "learning_rate": 9.401875556164009e-05, "loss": 0.8752, "step": 5120 }, { "epoch": 0.3469747272850464, "grad_norm": 9.630827903747559, "learning_rate": 9.401738654254227e-05, "loss": 0.999, "step": 5121 }, { "epoch": 0.3470424825530185, "grad_norm": 9.546615600585938, "learning_rate": 9.401601752344445e-05, "loss": 1.341, "step": 5122 }, { "epoch": 0.3471102378209906, "grad_norm": 7.031918048858643, "learning_rate": 9.401464850434663e-05, "loss": 1.1371, "step": 5123 }, { "epoch": 0.3471779930889627, "grad_norm": 9.030802726745605, "learning_rate": 9.401327948524882e-05, "loss": 0.9883, "step": 5124 }, { "epoch": 0.3472457483569347, "grad_norm": 6.786712646484375, "learning_rate": 9.4011910466151e-05, "loss": 0.8692, "step": 5125 }, { "epoch": 0.3473135036249068, "grad_norm": 7.327836990356445, "learning_rate": 9.401054144705318e-05, "loss": 0.8157, "step": 5126 }, { "epoch": 0.3473812588928789, "grad_norm": 6.41863489151001, "learning_rate": 9.400917242795538e-05, "loss": 0.8565, "step": 5127 }, { "epoch": 0.347449014160851, "grad_norm": 5.737055778503418, "learning_rate": 9.400780340885756e-05, "loss": 0.7385, "step": 5128 }, { "epoch": 0.3475167694288231, "grad_norm": 6.321473121643066, "learning_rate": 9.400643438975974e-05, "loss": 0.6459, "step": 5129 }, { "epoch": 0.3475845246967952, "grad_norm": 9.07598876953125, "learning_rate": 9.400506537066193e-05, "loss": 1.3719, "step": 5130 }, { "epoch": 0.3476522799647673, "grad_norm": 6.3348388671875, "learning_rate": 9.400369635156411e-05, "loss": 0.903, "step": 5131 }, { "epoch": 0.34772003523273937, "grad_norm": 7.836405277252197, "learning_rate": 9.40023273324663e-05, "loss": 0.8854, "step": 5132 }, { "epoch": 0.34778779050071146, "grad_norm": 7.712037086486816, "learning_rate": 9.400095831336849e-05, "loss": 0.8756, "step": 5133 }, { "epoch": 0.3478555457686835, "grad_norm": 7.445090293884277, "learning_rate": 9.399958929427067e-05, "loss": 1.0776, "step": 5134 }, { "epoch": 0.3479233010366556, "grad_norm": 6.7496724128723145, "learning_rate": 9.399822027517285e-05, "loss": 0.9425, "step": 5135 }, { "epoch": 0.3479910563046277, "grad_norm": 7.703073501586914, "learning_rate": 9.399685125607503e-05, "loss": 0.9217, "step": 5136 }, { "epoch": 0.34805881157259977, "grad_norm": 5.972830772399902, "learning_rate": 9.399548223697721e-05, "loss": 0.5922, "step": 5137 }, { "epoch": 0.34812656684057186, "grad_norm": 7.165718078613281, "learning_rate": 9.39941132178794e-05, "loss": 0.9624, "step": 5138 }, { "epoch": 0.34819432210854395, "grad_norm": 8.589313507080078, "learning_rate": 9.399274419878158e-05, "loss": 1.0485, "step": 5139 }, { "epoch": 0.34826207737651604, "grad_norm": 6.503042697906494, "learning_rate": 9.399137517968376e-05, "loss": 0.7833, "step": 5140 }, { "epoch": 0.34832983264448814, "grad_norm": 9.673978805541992, "learning_rate": 9.399000616058594e-05, "loss": 1.2266, "step": 5141 }, { "epoch": 0.34839758791246017, "grad_norm": 6.86154842376709, "learning_rate": 9.398863714148812e-05, "loss": 0.8709, "step": 5142 }, { "epoch": 0.34846534318043226, "grad_norm": 8.111627578735352, "learning_rate": 9.398726812239032e-05, "loss": 0.9657, "step": 5143 }, { "epoch": 0.34853309844840435, "grad_norm": 7.725754737854004, "learning_rate": 9.39858991032925e-05, "loss": 0.9551, "step": 5144 }, { "epoch": 0.34860085371637645, "grad_norm": 8.093070030212402, "learning_rate": 9.398453008419468e-05, "loss": 0.9814, "step": 5145 }, { "epoch": 0.34866860898434854, "grad_norm": 8.375652313232422, "learning_rate": 9.398316106509686e-05, "loss": 0.9232, "step": 5146 }, { "epoch": 0.34873636425232063, "grad_norm": 7.012859344482422, "learning_rate": 9.398179204599905e-05, "loss": 0.8322, "step": 5147 }, { "epoch": 0.3488041195202927, "grad_norm": 8.437539100646973, "learning_rate": 9.398042302690123e-05, "loss": 0.852, "step": 5148 }, { "epoch": 0.3488718747882648, "grad_norm": 9.844721794128418, "learning_rate": 9.397905400780341e-05, "loss": 0.8601, "step": 5149 }, { "epoch": 0.34893963005623685, "grad_norm": 7.359288215637207, "learning_rate": 9.39776849887056e-05, "loss": 0.9313, "step": 5150 }, { "epoch": 0.34900738532420894, "grad_norm": 7.528818607330322, "learning_rate": 9.397631596960777e-05, "loss": 0.923, "step": 5151 }, { "epoch": 0.34907514059218103, "grad_norm": 7.8577399253845215, "learning_rate": 9.397494695050997e-05, "loss": 0.9569, "step": 5152 }, { "epoch": 0.3491428958601531, "grad_norm": 6.960932731628418, "learning_rate": 9.397357793141215e-05, "loss": 0.9025, "step": 5153 }, { "epoch": 0.3492106511281252, "grad_norm": 6.392679691314697, "learning_rate": 9.397220891231433e-05, "loss": 0.7948, "step": 5154 }, { "epoch": 0.3492784063960973, "grad_norm": 8.22850227355957, "learning_rate": 9.397083989321651e-05, "loss": 0.9932, "step": 5155 }, { "epoch": 0.3493461616640694, "grad_norm": 6.1851277351379395, "learning_rate": 9.39694708741187e-05, "loss": 0.776, "step": 5156 }, { "epoch": 0.3494139169320415, "grad_norm": 9.058121681213379, "learning_rate": 9.396810185502088e-05, "loss": 1.1036, "step": 5157 }, { "epoch": 0.3494816722000135, "grad_norm": 7.11410665512085, "learning_rate": 9.396673283592306e-05, "loss": 0.8862, "step": 5158 }, { "epoch": 0.3495494274679856, "grad_norm": 7.148082256317139, "learning_rate": 9.396536381682524e-05, "loss": 0.9796, "step": 5159 }, { "epoch": 0.3496171827359577, "grad_norm": 7.258500576019287, "learning_rate": 9.396399479772742e-05, "loss": 0.9617, "step": 5160 }, { "epoch": 0.3496849380039298, "grad_norm": 5.1490044593811035, "learning_rate": 9.396262577862962e-05, "loss": 0.6844, "step": 5161 }, { "epoch": 0.3497526932719019, "grad_norm": 6.73121452331543, "learning_rate": 9.39612567595318e-05, "loss": 1.0669, "step": 5162 }, { "epoch": 0.349820448539874, "grad_norm": 7.891530990600586, "learning_rate": 9.395988774043398e-05, "loss": 1.0439, "step": 5163 }, { "epoch": 0.3498882038078461, "grad_norm": 7.256270885467529, "learning_rate": 9.395851872133616e-05, "loss": 0.9682, "step": 5164 }, { "epoch": 0.34995595907581817, "grad_norm": 7.153442859649658, "learning_rate": 9.395714970223835e-05, "loss": 0.913, "step": 5165 }, { "epoch": 0.3500237143437902, "grad_norm": 8.722851753234863, "learning_rate": 9.395578068314053e-05, "loss": 0.8749, "step": 5166 }, { "epoch": 0.3500914696117623, "grad_norm": 6.8031005859375, "learning_rate": 9.395441166404271e-05, "loss": 0.8755, "step": 5167 }, { "epoch": 0.3501592248797344, "grad_norm": 5.449770450592041, "learning_rate": 9.39530426449449e-05, "loss": 0.8813, "step": 5168 }, { "epoch": 0.3502269801477065, "grad_norm": 7.74420690536499, "learning_rate": 9.395167362584707e-05, "loss": 0.8361, "step": 5169 }, { "epoch": 0.35029473541567857, "grad_norm": 7.8701677322387695, "learning_rate": 9.395030460674927e-05, "loss": 0.9294, "step": 5170 }, { "epoch": 0.35036249068365066, "grad_norm": 5.897029876708984, "learning_rate": 9.394893558765145e-05, "loss": 0.7739, "step": 5171 }, { "epoch": 0.35043024595162275, "grad_norm": 6.7868781089782715, "learning_rate": 9.394756656855363e-05, "loss": 0.7252, "step": 5172 }, { "epoch": 0.35049800121959485, "grad_norm": 10.289764404296875, "learning_rate": 9.394619754945582e-05, "loss": 1.0943, "step": 5173 }, { "epoch": 0.3505657564875669, "grad_norm": 7.676051616668701, "learning_rate": 9.3944828530358e-05, "loss": 0.881, "step": 5174 }, { "epoch": 0.350633511755539, "grad_norm": 7.702653408050537, "learning_rate": 9.394345951126018e-05, "loss": 1.0346, "step": 5175 }, { "epoch": 0.35070126702351107, "grad_norm": 7.525374889373779, "learning_rate": 9.394209049216238e-05, "loss": 0.9955, "step": 5176 }, { "epoch": 0.35076902229148316, "grad_norm": 7.942657470703125, "learning_rate": 9.394072147306456e-05, "loss": 0.9715, "step": 5177 }, { "epoch": 0.35083677755945525, "grad_norm": 9.328110694885254, "learning_rate": 9.393935245396674e-05, "loss": 1.0753, "step": 5178 }, { "epoch": 0.35090453282742734, "grad_norm": 8.149508476257324, "learning_rate": 9.393798343486893e-05, "loss": 0.9818, "step": 5179 }, { "epoch": 0.35097228809539943, "grad_norm": 6.882593154907227, "learning_rate": 9.393661441577111e-05, "loss": 1.105, "step": 5180 }, { "epoch": 0.3510400433633715, "grad_norm": 7.821188926696777, "learning_rate": 9.393524539667329e-05, "loss": 0.8106, "step": 5181 }, { "epoch": 0.35110779863134356, "grad_norm": 7.235447406768799, "learning_rate": 9.393387637757547e-05, "loss": 0.9304, "step": 5182 }, { "epoch": 0.35117555389931565, "grad_norm": 6.859196662902832, "learning_rate": 9.393250735847765e-05, "loss": 0.7797, "step": 5183 }, { "epoch": 0.35124330916728774, "grad_norm": 8.33879566192627, "learning_rate": 9.393113833937985e-05, "loss": 1.0075, "step": 5184 }, { "epoch": 0.35131106443525983, "grad_norm": 6.733922004699707, "learning_rate": 9.392976932028203e-05, "loss": 0.7304, "step": 5185 }, { "epoch": 0.3513788197032319, "grad_norm": 8.839714050292969, "learning_rate": 9.392840030118421e-05, "loss": 0.8852, "step": 5186 }, { "epoch": 0.351446574971204, "grad_norm": 9.954099655151367, "learning_rate": 9.392703128208639e-05, "loss": 1.1631, "step": 5187 }, { "epoch": 0.3515143302391761, "grad_norm": 8.042675971984863, "learning_rate": 9.392566226298858e-05, "loss": 0.8741, "step": 5188 }, { "epoch": 0.3515820855071482, "grad_norm": 6.661304950714111, "learning_rate": 9.392429324389076e-05, "loss": 0.8879, "step": 5189 }, { "epoch": 0.3516498407751203, "grad_norm": 6.568962097167969, "learning_rate": 9.392292422479294e-05, "loss": 0.6823, "step": 5190 }, { "epoch": 0.35171759604309233, "grad_norm": 6.836343288421631, "learning_rate": 9.392155520569512e-05, "loss": 0.8174, "step": 5191 }, { "epoch": 0.3517853513110644, "grad_norm": 7.555830478668213, "learning_rate": 9.39201861865973e-05, "loss": 0.803, "step": 5192 }, { "epoch": 0.3518531065790365, "grad_norm": 7.264036178588867, "learning_rate": 9.39188171674995e-05, "loss": 1.0786, "step": 5193 }, { "epoch": 0.3519208618470086, "grad_norm": 5.6876420974731445, "learning_rate": 9.391744814840168e-05, "loss": 0.7553, "step": 5194 }, { "epoch": 0.3519886171149807, "grad_norm": 8.890271186828613, "learning_rate": 9.391607912930386e-05, "loss": 1.0685, "step": 5195 }, { "epoch": 0.3520563723829528, "grad_norm": 6.010801315307617, "learning_rate": 9.391471011020604e-05, "loss": 0.7915, "step": 5196 }, { "epoch": 0.3521241276509249, "grad_norm": 8.496731758117676, "learning_rate": 9.391334109110822e-05, "loss": 1.2103, "step": 5197 }, { "epoch": 0.35219188291889697, "grad_norm": 8.196046829223633, "learning_rate": 9.391197207201041e-05, "loss": 1.0173, "step": 5198 }, { "epoch": 0.352259638186869, "grad_norm": 8.809300422668457, "learning_rate": 9.391060305291259e-05, "loss": 0.906, "step": 5199 }, { "epoch": 0.3523273934548411, "grad_norm": 8.437650680541992, "learning_rate": 9.390923403381477e-05, "loss": 0.8865, "step": 5200 }, { "epoch": 0.3523951487228132, "grad_norm": 8.659364700317383, "learning_rate": 9.390786501471695e-05, "loss": 0.8878, "step": 5201 }, { "epoch": 0.3524629039907853, "grad_norm": 6.658010482788086, "learning_rate": 9.390649599561915e-05, "loss": 0.9141, "step": 5202 }, { "epoch": 0.3525306592587574, "grad_norm": 6.702786445617676, "learning_rate": 9.390512697652133e-05, "loss": 0.8193, "step": 5203 }, { "epoch": 0.35259841452672946, "grad_norm": 8.0852632522583, "learning_rate": 9.390375795742351e-05, "loss": 0.9655, "step": 5204 }, { "epoch": 0.35266616979470156, "grad_norm": 6.598480224609375, "learning_rate": 9.390238893832569e-05, "loss": 0.8858, "step": 5205 }, { "epoch": 0.35273392506267365, "grad_norm": 8.510173797607422, "learning_rate": 9.390101991922787e-05, "loss": 1.0543, "step": 5206 }, { "epoch": 0.3528016803306457, "grad_norm": 7.498827934265137, "learning_rate": 9.389965090013006e-05, "loss": 1.138, "step": 5207 }, { "epoch": 0.3528694355986178, "grad_norm": 6.370652675628662, "learning_rate": 9.389828188103224e-05, "loss": 0.996, "step": 5208 }, { "epoch": 0.35293719086658987, "grad_norm": 6.830577373504639, "learning_rate": 9.389691286193442e-05, "loss": 0.7562, "step": 5209 }, { "epoch": 0.35300494613456196, "grad_norm": 7.01290225982666, "learning_rate": 9.38955438428366e-05, "loss": 0.8141, "step": 5210 }, { "epoch": 0.35307270140253405, "grad_norm": 6.957060813903809, "learning_rate": 9.38941748237388e-05, "loss": 1.1923, "step": 5211 }, { "epoch": 0.35314045667050614, "grad_norm": 5.929959297180176, "learning_rate": 9.389280580464098e-05, "loss": 0.7928, "step": 5212 }, { "epoch": 0.35320821193847823, "grad_norm": 8.654718399047852, "learning_rate": 9.389143678554316e-05, "loss": 1.0896, "step": 5213 }, { "epoch": 0.3532759672064503, "grad_norm": 5.981748580932617, "learning_rate": 9.389006776644534e-05, "loss": 0.9777, "step": 5214 }, { "epoch": 0.35334372247442236, "grad_norm": 6.251374244689941, "learning_rate": 9.388869874734752e-05, "loss": 0.8562, "step": 5215 }, { "epoch": 0.35341147774239445, "grad_norm": 7.049698352813721, "learning_rate": 9.388732972824971e-05, "loss": 0.8297, "step": 5216 }, { "epoch": 0.35347923301036654, "grad_norm": 8.04577350616455, "learning_rate": 9.38859607091519e-05, "loss": 1.2035, "step": 5217 }, { "epoch": 0.35354698827833864, "grad_norm": 6.800088405609131, "learning_rate": 9.388459169005407e-05, "loss": 0.9551, "step": 5218 }, { "epoch": 0.35361474354631073, "grad_norm": 7.368710517883301, "learning_rate": 9.388322267095627e-05, "loss": 0.8174, "step": 5219 }, { "epoch": 0.3536824988142828, "grad_norm": 7.573550701141357, "learning_rate": 9.388185365185845e-05, "loss": 1.0921, "step": 5220 }, { "epoch": 0.3537502540822549, "grad_norm": 8.282757759094238, "learning_rate": 9.388048463276063e-05, "loss": 0.9124, "step": 5221 }, { "epoch": 0.353818009350227, "grad_norm": 8.21078872680664, "learning_rate": 9.387911561366282e-05, "loss": 1.0818, "step": 5222 }, { "epoch": 0.35388576461819904, "grad_norm": 7.401734352111816, "learning_rate": 9.3877746594565e-05, "loss": 0.9906, "step": 5223 }, { "epoch": 0.35395351988617113, "grad_norm": 8.078129768371582, "learning_rate": 9.387637757546718e-05, "loss": 1.1233, "step": 5224 }, { "epoch": 0.3540212751541432, "grad_norm": 7.231998920440674, "learning_rate": 9.387500855636938e-05, "loss": 0.8412, "step": 5225 }, { "epoch": 0.3540890304221153, "grad_norm": 8.709794044494629, "learning_rate": 9.387363953727156e-05, "loss": 0.8346, "step": 5226 }, { "epoch": 0.3541567856900874, "grad_norm": 8.45758056640625, "learning_rate": 9.387227051817374e-05, "loss": 1.0201, "step": 5227 }, { "epoch": 0.3542245409580595, "grad_norm": 5.156687259674072, "learning_rate": 9.387090149907592e-05, "loss": 0.8029, "step": 5228 }, { "epoch": 0.3542922962260316, "grad_norm": 6.2013163566589355, "learning_rate": 9.38695324799781e-05, "loss": 0.8984, "step": 5229 }, { "epoch": 0.3543600514940037, "grad_norm": 7.414056301116943, "learning_rate": 9.386816346088029e-05, "loss": 0.8606, "step": 5230 }, { "epoch": 0.3544278067619757, "grad_norm": 8.167763710021973, "learning_rate": 9.386679444178247e-05, "loss": 0.973, "step": 5231 }, { "epoch": 0.3544955620299478, "grad_norm": 7.290304183959961, "learning_rate": 9.386542542268465e-05, "loss": 0.8988, "step": 5232 }, { "epoch": 0.3545633172979199, "grad_norm": 7.181061744689941, "learning_rate": 9.386405640358683e-05, "loss": 0.8689, "step": 5233 }, { "epoch": 0.354631072565892, "grad_norm": 8.87985610961914, "learning_rate": 9.386268738448903e-05, "loss": 1.0076, "step": 5234 }, { "epoch": 0.3546988278338641, "grad_norm": 6.205500602722168, "learning_rate": 9.386131836539121e-05, "loss": 0.8101, "step": 5235 }, { "epoch": 0.3547665831018362, "grad_norm": 7.425187587738037, "learning_rate": 9.385994934629339e-05, "loss": 0.9122, "step": 5236 }, { "epoch": 0.35483433836980827, "grad_norm": 9.541454315185547, "learning_rate": 9.385858032719557e-05, "loss": 0.9868, "step": 5237 }, { "epoch": 0.35490209363778036, "grad_norm": 6.853835582733154, "learning_rate": 9.385721130809775e-05, "loss": 0.9506, "step": 5238 }, { "epoch": 0.35496984890575245, "grad_norm": 9.07067584991455, "learning_rate": 9.385584228899994e-05, "loss": 0.9146, "step": 5239 }, { "epoch": 0.3550376041737245, "grad_norm": 7.5352373123168945, "learning_rate": 9.385447326990212e-05, "loss": 1.0089, "step": 5240 }, { "epoch": 0.3551053594416966, "grad_norm": 5.626684188842773, "learning_rate": 9.38531042508043e-05, "loss": 0.7057, "step": 5241 }, { "epoch": 0.35517311470966867, "grad_norm": 6.1156768798828125, "learning_rate": 9.385173523170648e-05, "loss": 0.808, "step": 5242 }, { "epoch": 0.35524086997764076, "grad_norm": 7.227247714996338, "learning_rate": 9.385036621260868e-05, "loss": 0.9477, "step": 5243 }, { "epoch": 0.35530862524561285, "grad_norm": 7.034331798553467, "learning_rate": 9.384899719351086e-05, "loss": 0.7929, "step": 5244 }, { "epoch": 0.35537638051358494, "grad_norm": 5.943993091583252, "learning_rate": 9.384762817441304e-05, "loss": 0.8886, "step": 5245 }, { "epoch": 0.35544413578155704, "grad_norm": 7.859194278717041, "learning_rate": 9.384625915531522e-05, "loss": 1.0028, "step": 5246 }, { "epoch": 0.3555118910495291, "grad_norm": 6.8557448387146, "learning_rate": 9.38448901362174e-05, "loss": 0.8946, "step": 5247 }, { "epoch": 0.35557964631750116, "grad_norm": 6.733648777008057, "learning_rate": 9.384352111711959e-05, "loss": 0.9186, "step": 5248 }, { "epoch": 0.35564740158547326, "grad_norm": 5.929673194885254, "learning_rate": 9.384215209802177e-05, "loss": 0.8132, "step": 5249 }, { "epoch": 0.35571515685344535, "grad_norm": 6.680620193481445, "learning_rate": 9.384078307892395e-05, "loss": 0.9197, "step": 5250 }, { "epoch": 0.35578291212141744, "grad_norm": 8.043455123901367, "learning_rate": 9.383941405982613e-05, "loss": 1.0841, "step": 5251 }, { "epoch": 0.35585066738938953, "grad_norm": 9.225475311279297, "learning_rate": 9.383804504072831e-05, "loss": 1.3691, "step": 5252 }, { "epoch": 0.3559184226573616, "grad_norm": 8.588724136352539, "learning_rate": 9.383667602163051e-05, "loss": 1.1282, "step": 5253 }, { "epoch": 0.3559861779253337, "grad_norm": 8.21908950805664, "learning_rate": 9.383530700253269e-05, "loss": 0.7606, "step": 5254 }, { "epoch": 0.3560539331933058, "grad_norm": 6.107995510101318, "learning_rate": 9.383393798343487e-05, "loss": 1.1818, "step": 5255 }, { "epoch": 0.35612168846127784, "grad_norm": 6.612033367156982, "learning_rate": 9.383256896433705e-05, "loss": 0.8892, "step": 5256 }, { "epoch": 0.35618944372924993, "grad_norm": 6.935641288757324, "learning_rate": 9.383119994523924e-05, "loss": 0.8565, "step": 5257 }, { "epoch": 0.356257198997222, "grad_norm": 7.328373908996582, "learning_rate": 9.382983092614142e-05, "loss": 0.9409, "step": 5258 }, { "epoch": 0.3563249542651941, "grad_norm": 7.016412734985352, "learning_rate": 9.38284619070436e-05, "loss": 0.8768, "step": 5259 }, { "epoch": 0.3563927095331662, "grad_norm": 7.122356414794922, "learning_rate": 9.382709288794578e-05, "loss": 0.9048, "step": 5260 }, { "epoch": 0.3564604648011383, "grad_norm": 7.590730667114258, "learning_rate": 9.382572386884796e-05, "loss": 0.8154, "step": 5261 }, { "epoch": 0.3565282200691104, "grad_norm": 5.5446858406066895, "learning_rate": 9.382435484975016e-05, "loss": 0.8783, "step": 5262 }, { "epoch": 0.3565959753370825, "grad_norm": 7.153842449188232, "learning_rate": 9.382298583065234e-05, "loss": 1.2412, "step": 5263 }, { "epoch": 0.3566637306050545, "grad_norm": 7.115203857421875, "learning_rate": 9.382161681155452e-05, "loss": 1.0302, "step": 5264 }, { "epoch": 0.3567314858730266, "grad_norm": 5.7724833488464355, "learning_rate": 9.382024779245671e-05, "loss": 0.5839, "step": 5265 }, { "epoch": 0.3567992411409987, "grad_norm": 7.793944358825684, "learning_rate": 9.381887877335889e-05, "loss": 1.0893, "step": 5266 }, { "epoch": 0.3568669964089708, "grad_norm": 6.561145782470703, "learning_rate": 9.381750975426107e-05, "loss": 0.911, "step": 5267 }, { "epoch": 0.3569347516769429, "grad_norm": 6.053153038024902, "learning_rate": 9.381614073516327e-05, "loss": 0.9239, "step": 5268 }, { "epoch": 0.357002506944915, "grad_norm": 7.162718772888184, "learning_rate": 9.381477171606545e-05, "loss": 0.7193, "step": 5269 }, { "epoch": 0.35707026221288707, "grad_norm": 7.634250640869141, "learning_rate": 9.381340269696763e-05, "loss": 0.8826, "step": 5270 }, { "epoch": 0.35713801748085916, "grad_norm": 6.502168655395508, "learning_rate": 9.381203367786982e-05, "loss": 0.8278, "step": 5271 }, { "epoch": 0.3572057727488312, "grad_norm": 7.339065074920654, "learning_rate": 9.3810664658772e-05, "loss": 0.8818, "step": 5272 }, { "epoch": 0.3572735280168033, "grad_norm": 8.079582214355469, "learning_rate": 9.380929563967418e-05, "loss": 0.8596, "step": 5273 }, { "epoch": 0.3573412832847754, "grad_norm": 6.368169784545898, "learning_rate": 9.380792662057636e-05, "loss": 0.7352, "step": 5274 }, { "epoch": 0.35740903855274747, "grad_norm": 7.813303470611572, "learning_rate": 9.380655760147854e-05, "loss": 0.876, "step": 5275 }, { "epoch": 0.35747679382071956, "grad_norm": 8.180051803588867, "learning_rate": 9.380518858238074e-05, "loss": 0.8893, "step": 5276 }, { "epoch": 0.35754454908869165, "grad_norm": 7.618046283721924, "learning_rate": 9.380381956328292e-05, "loss": 0.9647, "step": 5277 }, { "epoch": 0.35761230435666375, "grad_norm": 7.507559776306152, "learning_rate": 9.38024505441851e-05, "loss": 0.9524, "step": 5278 }, { "epoch": 0.35768005962463584, "grad_norm": 9.470221519470215, "learning_rate": 9.380108152508728e-05, "loss": 1.0057, "step": 5279 }, { "epoch": 0.3577478148926079, "grad_norm": 6.612621784210205, "learning_rate": 9.379971250598947e-05, "loss": 0.8489, "step": 5280 }, { "epoch": 0.35781557016057997, "grad_norm": 7.146090030670166, "learning_rate": 9.379834348689165e-05, "loss": 0.9585, "step": 5281 }, { "epoch": 0.35788332542855206, "grad_norm": 6.300119400024414, "learning_rate": 9.379697446779383e-05, "loss": 0.881, "step": 5282 }, { "epoch": 0.35795108069652415, "grad_norm": 7.285689353942871, "learning_rate": 9.379560544869601e-05, "loss": 1.0182, "step": 5283 }, { "epoch": 0.35801883596449624, "grad_norm": 8.943527221679688, "learning_rate": 9.379423642959819e-05, "loss": 0.9782, "step": 5284 }, { "epoch": 0.35808659123246833, "grad_norm": 8.551790237426758, "learning_rate": 9.379286741050039e-05, "loss": 0.8513, "step": 5285 }, { "epoch": 0.3581543465004404, "grad_norm": 8.670862197875977, "learning_rate": 9.379149839140257e-05, "loss": 0.9026, "step": 5286 }, { "epoch": 0.3582221017684125, "grad_norm": 8.31614875793457, "learning_rate": 9.379012937230475e-05, "loss": 0.7666, "step": 5287 }, { "epoch": 0.35828985703638455, "grad_norm": 6.347659111022949, "learning_rate": 9.378876035320693e-05, "loss": 0.953, "step": 5288 }, { "epoch": 0.35835761230435664, "grad_norm": 9.098658561706543, "learning_rate": 9.378739133410912e-05, "loss": 1.3235, "step": 5289 }, { "epoch": 0.35842536757232873, "grad_norm": 7.218830585479736, "learning_rate": 9.37860223150113e-05, "loss": 1.0008, "step": 5290 }, { "epoch": 0.3584931228403008, "grad_norm": 7.346166133880615, "learning_rate": 9.378465329591348e-05, "loss": 0.8441, "step": 5291 }, { "epoch": 0.3585608781082729, "grad_norm": 6.566136360168457, "learning_rate": 9.378328427681566e-05, "loss": 1.0381, "step": 5292 }, { "epoch": 0.358628633376245, "grad_norm": 7.599377632141113, "learning_rate": 9.378191525771784e-05, "loss": 0.8131, "step": 5293 }, { "epoch": 0.3586963886442171, "grad_norm": 6.4648284912109375, "learning_rate": 9.378054623862004e-05, "loss": 0.9599, "step": 5294 }, { "epoch": 0.3587641439121892, "grad_norm": 6.155094146728516, "learning_rate": 9.377917721952222e-05, "loss": 1.0911, "step": 5295 }, { "epoch": 0.3588318991801613, "grad_norm": 5.981008529663086, "learning_rate": 9.37778082004244e-05, "loss": 0.7692, "step": 5296 }, { "epoch": 0.3588996544481333, "grad_norm": 7.424664497375488, "learning_rate": 9.377643918132658e-05, "loss": 0.9248, "step": 5297 }, { "epoch": 0.3589674097161054, "grad_norm": 8.310460090637207, "learning_rate": 9.377507016222877e-05, "loss": 0.9659, "step": 5298 }, { "epoch": 0.3590351649840775, "grad_norm": 7.189448833465576, "learning_rate": 9.377370114313095e-05, "loss": 0.9844, "step": 5299 }, { "epoch": 0.3591029202520496, "grad_norm": 6.688578128814697, "learning_rate": 9.377233212403313e-05, "loss": 0.8488, "step": 5300 }, { "epoch": 0.3591706755200217, "grad_norm": 8.917186737060547, "learning_rate": 9.377096310493531e-05, "loss": 1.0579, "step": 5301 }, { "epoch": 0.3592384307879938, "grad_norm": 8.004783630371094, "learning_rate": 9.37695940858375e-05, "loss": 0.8347, "step": 5302 }, { "epoch": 0.35930618605596587, "grad_norm": 8.461618423461914, "learning_rate": 9.376822506673969e-05, "loss": 1.1304, "step": 5303 }, { "epoch": 0.35937394132393796, "grad_norm": 7.35521936416626, "learning_rate": 9.376685604764187e-05, "loss": 0.8222, "step": 5304 }, { "epoch": 0.35944169659191, "grad_norm": 7.697165012359619, "learning_rate": 9.376548702854405e-05, "loss": 0.8885, "step": 5305 }, { "epoch": 0.3595094518598821, "grad_norm": 8.730046272277832, "learning_rate": 9.376411800944623e-05, "loss": 0.9201, "step": 5306 }, { "epoch": 0.3595772071278542, "grad_norm": 7.738508701324463, "learning_rate": 9.376274899034841e-05, "loss": 0.828, "step": 5307 }, { "epoch": 0.3596449623958263, "grad_norm": 6.961246013641357, "learning_rate": 9.37613799712506e-05, "loss": 1.2298, "step": 5308 }, { "epoch": 0.35971271766379836, "grad_norm": 7.723383903503418, "learning_rate": 9.376001095215278e-05, "loss": 0.9714, "step": 5309 }, { "epoch": 0.35978047293177046, "grad_norm": 6.885270595550537, "learning_rate": 9.375864193305496e-05, "loss": 0.8352, "step": 5310 }, { "epoch": 0.35984822819974255, "grad_norm": 8.623848915100098, "learning_rate": 9.375727291395716e-05, "loss": 0.7216, "step": 5311 }, { "epoch": 0.35991598346771464, "grad_norm": 7.042871952056885, "learning_rate": 9.375590389485934e-05, "loss": 0.8098, "step": 5312 }, { "epoch": 0.3599837387356867, "grad_norm": 6.125864505767822, "learning_rate": 9.375453487576152e-05, "loss": 0.7628, "step": 5313 }, { "epoch": 0.36005149400365877, "grad_norm": 7.560012340545654, "learning_rate": 9.375316585666371e-05, "loss": 0.9114, "step": 5314 }, { "epoch": 0.36011924927163086, "grad_norm": 6.125675678253174, "learning_rate": 9.375179683756589e-05, "loss": 0.7894, "step": 5315 }, { "epoch": 0.36018700453960295, "grad_norm": 7.076975345611572, "learning_rate": 9.375042781846807e-05, "loss": 1.1084, "step": 5316 }, { "epoch": 0.36025475980757504, "grad_norm": 6.712325096130371, "learning_rate": 9.374905879937027e-05, "loss": 0.9326, "step": 5317 }, { "epoch": 0.36032251507554713, "grad_norm": 5.849967956542969, "learning_rate": 9.374768978027245e-05, "loss": 0.7884, "step": 5318 }, { "epoch": 0.3603902703435192, "grad_norm": 7.5400614738464355, "learning_rate": 9.374632076117463e-05, "loss": 0.9844, "step": 5319 }, { "epoch": 0.3604580256114913, "grad_norm": 5.720027446746826, "learning_rate": 9.374495174207681e-05, "loss": 0.8233, "step": 5320 }, { "epoch": 0.36052578087946335, "grad_norm": 6.872372627258301, "learning_rate": 9.3743582722979e-05, "loss": 0.6808, "step": 5321 }, { "epoch": 0.36059353614743545, "grad_norm": 6.98433780670166, "learning_rate": 9.374221370388118e-05, "loss": 0.642, "step": 5322 }, { "epoch": 0.36066129141540754, "grad_norm": 7.231049537658691, "learning_rate": 9.374084468478336e-05, "loss": 1.1379, "step": 5323 }, { "epoch": 0.36072904668337963, "grad_norm": 8.301416397094727, "learning_rate": 9.373947566568554e-05, "loss": 0.9184, "step": 5324 }, { "epoch": 0.3607968019513517, "grad_norm": 6.9752068519592285, "learning_rate": 9.373810664658772e-05, "loss": 0.9984, "step": 5325 }, { "epoch": 0.3608645572193238, "grad_norm": 6.826324939727783, "learning_rate": 9.373673762748992e-05, "loss": 0.8923, "step": 5326 }, { "epoch": 0.3609323124872959, "grad_norm": 6.605769634246826, "learning_rate": 9.37353686083921e-05, "loss": 0.8809, "step": 5327 }, { "epoch": 0.361000067755268, "grad_norm": 7.791666030883789, "learning_rate": 9.373399958929428e-05, "loss": 0.9199, "step": 5328 }, { "epoch": 0.36106782302324003, "grad_norm": 7.015176773071289, "learning_rate": 9.373263057019646e-05, "loss": 0.8271, "step": 5329 }, { "epoch": 0.3611355782912121, "grad_norm": 6.63329553604126, "learning_rate": 9.373126155109864e-05, "loss": 0.9052, "step": 5330 }, { "epoch": 0.3612033335591842, "grad_norm": 9.516122817993164, "learning_rate": 9.372989253200083e-05, "loss": 1.1517, "step": 5331 }, { "epoch": 0.3612710888271563, "grad_norm": 7.7722978591918945, "learning_rate": 9.372852351290301e-05, "loss": 1.0195, "step": 5332 }, { "epoch": 0.3613388440951284, "grad_norm": 7.2577338218688965, "learning_rate": 9.372715449380519e-05, "loss": 0.9093, "step": 5333 }, { "epoch": 0.3614065993631005, "grad_norm": 6.668231010437012, "learning_rate": 9.372578547470737e-05, "loss": 0.7677, "step": 5334 }, { "epoch": 0.3614743546310726, "grad_norm": 8.366500854492188, "learning_rate": 9.372441645560957e-05, "loss": 1.0869, "step": 5335 }, { "epoch": 0.3615421098990447, "grad_norm": 7.495104789733887, "learning_rate": 9.372304743651175e-05, "loss": 0.8074, "step": 5336 }, { "epoch": 0.3616098651670167, "grad_norm": 6.546140670776367, "learning_rate": 9.372167841741393e-05, "loss": 0.8792, "step": 5337 }, { "epoch": 0.3616776204349888, "grad_norm": 7.664621353149414, "learning_rate": 9.372030939831611e-05, "loss": 0.8177, "step": 5338 }, { "epoch": 0.3617453757029609, "grad_norm": 7.482497215270996, "learning_rate": 9.371894037921829e-05, "loss": 0.7624, "step": 5339 }, { "epoch": 0.361813130970933, "grad_norm": 11.148695945739746, "learning_rate": 9.371757136012048e-05, "loss": 1.0792, "step": 5340 }, { "epoch": 0.3618808862389051, "grad_norm": 9.08121109008789, "learning_rate": 9.371620234102266e-05, "loss": 1.0027, "step": 5341 }, { "epoch": 0.36194864150687717, "grad_norm": 8.424308776855469, "learning_rate": 9.371483332192484e-05, "loss": 1.2137, "step": 5342 }, { "epoch": 0.36201639677484926, "grad_norm": 8.55742359161377, "learning_rate": 9.371346430282702e-05, "loss": 0.8023, "step": 5343 }, { "epoch": 0.36208415204282135, "grad_norm": 7.197443008422852, "learning_rate": 9.371209528372922e-05, "loss": 1.0458, "step": 5344 }, { "epoch": 0.36215190731079344, "grad_norm": 8.317964553833008, "learning_rate": 9.37107262646314e-05, "loss": 0.9829, "step": 5345 }, { "epoch": 0.3622196625787655, "grad_norm": 8.819951057434082, "learning_rate": 9.370935724553358e-05, "loss": 1.0024, "step": 5346 }, { "epoch": 0.36228741784673757, "grad_norm": 6.795368671417236, "learning_rate": 9.370798822643576e-05, "loss": 1.0216, "step": 5347 }, { "epoch": 0.36235517311470966, "grad_norm": 5.370431900024414, "learning_rate": 9.370661920733794e-05, "loss": 0.8693, "step": 5348 }, { "epoch": 0.36242292838268175, "grad_norm": 7.330100059509277, "learning_rate": 9.370525018824013e-05, "loss": 1.0576, "step": 5349 }, { "epoch": 0.36249068365065384, "grad_norm": 6.56719446182251, "learning_rate": 9.370388116914231e-05, "loss": 0.8367, "step": 5350 }, { "epoch": 0.36255843891862594, "grad_norm": 7.462427616119385, "learning_rate": 9.370251215004449e-05, "loss": 0.8284, "step": 5351 }, { "epoch": 0.362626194186598, "grad_norm": 10.382340431213379, "learning_rate": 9.370114313094667e-05, "loss": 1.1184, "step": 5352 }, { "epoch": 0.3626939494545701, "grad_norm": 6.872713088989258, "learning_rate": 9.369977411184885e-05, "loss": 0.9656, "step": 5353 }, { "epoch": 0.36276170472254216, "grad_norm": 6.2565741539001465, "learning_rate": 9.369840509275105e-05, "loss": 0.9206, "step": 5354 }, { "epoch": 0.36282945999051425, "grad_norm": 8.036307334899902, "learning_rate": 9.369703607365323e-05, "loss": 1.2556, "step": 5355 }, { "epoch": 0.36289721525848634, "grad_norm": 7.401971340179443, "learning_rate": 9.369566705455541e-05, "loss": 0.9584, "step": 5356 }, { "epoch": 0.36296497052645843, "grad_norm": 7.890522003173828, "learning_rate": 9.369429803545759e-05, "loss": 1.0172, "step": 5357 }, { "epoch": 0.3630327257944305, "grad_norm": 7.4986348152160645, "learning_rate": 9.369292901635978e-05, "loss": 0.9985, "step": 5358 }, { "epoch": 0.3631004810624026, "grad_norm": 6.275259494781494, "learning_rate": 9.369155999726196e-05, "loss": 0.9051, "step": 5359 }, { "epoch": 0.3631682363303747, "grad_norm": 6.2667365074157715, "learning_rate": 9.369019097816416e-05, "loss": 0.9137, "step": 5360 }, { "epoch": 0.3632359915983468, "grad_norm": 7.5247483253479, "learning_rate": 9.368882195906634e-05, "loss": 0.9157, "step": 5361 }, { "epoch": 0.36330374686631883, "grad_norm": 8.757854461669922, "learning_rate": 9.368745293996852e-05, "loss": 1.2596, "step": 5362 }, { "epoch": 0.3633715021342909, "grad_norm": 7.257226943969727, "learning_rate": 9.368608392087071e-05, "loss": 0.8211, "step": 5363 }, { "epoch": 0.363439257402263, "grad_norm": 6.194060325622559, "learning_rate": 9.368471490177289e-05, "loss": 0.8816, "step": 5364 }, { "epoch": 0.3635070126702351, "grad_norm": 10.445760726928711, "learning_rate": 9.368334588267507e-05, "loss": 0.858, "step": 5365 }, { "epoch": 0.3635747679382072, "grad_norm": 6.156910419464111, "learning_rate": 9.368197686357725e-05, "loss": 1.0348, "step": 5366 }, { "epoch": 0.3636425232061793, "grad_norm": 6.490479946136475, "learning_rate": 9.368060784447945e-05, "loss": 0.8834, "step": 5367 }, { "epoch": 0.3637102784741514, "grad_norm": 6.535744667053223, "learning_rate": 9.367923882538163e-05, "loss": 0.6612, "step": 5368 }, { "epoch": 0.3637780337421235, "grad_norm": 8.068840980529785, "learning_rate": 9.36778698062838e-05, "loss": 1.0348, "step": 5369 }, { "epoch": 0.3638457890100955, "grad_norm": 6.666348934173584, "learning_rate": 9.367650078718599e-05, "loss": 0.8346, "step": 5370 }, { "epoch": 0.3639135442780676, "grad_norm": 7.482582092285156, "learning_rate": 9.367513176808817e-05, "loss": 0.7356, "step": 5371 }, { "epoch": 0.3639812995460397, "grad_norm": 6.928842067718506, "learning_rate": 9.367376274899036e-05, "loss": 0.7963, "step": 5372 }, { "epoch": 0.3640490548140118, "grad_norm": 6.574978351593018, "learning_rate": 9.367239372989254e-05, "loss": 0.7893, "step": 5373 }, { "epoch": 0.3641168100819839, "grad_norm": 6.267110824584961, "learning_rate": 9.367102471079472e-05, "loss": 0.6282, "step": 5374 }, { "epoch": 0.36418456534995597, "grad_norm": 6.368738651275635, "learning_rate": 9.36696556916969e-05, "loss": 0.8983, "step": 5375 }, { "epoch": 0.36425232061792806, "grad_norm": 7.211498737335205, "learning_rate": 9.36682866725991e-05, "loss": 0.9471, "step": 5376 }, { "epoch": 0.36432007588590015, "grad_norm": 6.124608516693115, "learning_rate": 9.366691765350128e-05, "loss": 0.8649, "step": 5377 }, { "epoch": 0.3643878311538722, "grad_norm": 7.325745582580566, "learning_rate": 9.366554863440346e-05, "loss": 0.9811, "step": 5378 }, { "epoch": 0.3644555864218443, "grad_norm": 5.563783645629883, "learning_rate": 9.366417961530564e-05, "loss": 0.833, "step": 5379 }, { "epoch": 0.36452334168981637, "grad_norm": 7.581454753875732, "learning_rate": 9.366281059620782e-05, "loss": 0.9078, "step": 5380 }, { "epoch": 0.36459109695778846, "grad_norm": 7.746644020080566, "learning_rate": 9.366144157711001e-05, "loss": 0.9143, "step": 5381 }, { "epoch": 0.36465885222576055, "grad_norm": 7.729243278503418, "learning_rate": 9.366007255801219e-05, "loss": 0.856, "step": 5382 }, { "epoch": 0.36472660749373265, "grad_norm": 7.337301731109619, "learning_rate": 9.365870353891437e-05, "loss": 0.9059, "step": 5383 }, { "epoch": 0.36479436276170474, "grad_norm": 7.622936248779297, "learning_rate": 9.365733451981655e-05, "loss": 1.0493, "step": 5384 }, { "epoch": 0.36486211802967683, "grad_norm": 7.159453392028809, "learning_rate": 9.365596550071873e-05, "loss": 0.824, "step": 5385 }, { "epoch": 0.36492987329764887, "grad_norm": 7.111423492431641, "learning_rate": 9.365459648162093e-05, "loss": 0.7201, "step": 5386 }, { "epoch": 0.36499762856562096, "grad_norm": 8.7478666305542, "learning_rate": 9.365322746252311e-05, "loss": 1.2497, "step": 5387 }, { "epoch": 0.36506538383359305, "grad_norm": 9.522677421569824, "learning_rate": 9.365185844342529e-05, "loss": 0.9175, "step": 5388 }, { "epoch": 0.36513313910156514, "grad_norm": 7.881449222564697, "learning_rate": 9.365048942432747e-05, "loss": 1.1123, "step": 5389 }, { "epoch": 0.36520089436953723, "grad_norm": 6.60174560546875, "learning_rate": 9.364912040522966e-05, "loss": 0.9148, "step": 5390 }, { "epoch": 0.3652686496375093, "grad_norm": 7.549520492553711, "learning_rate": 9.364775138613184e-05, "loss": 0.991, "step": 5391 }, { "epoch": 0.3653364049054814, "grad_norm": 7.287946701049805, "learning_rate": 9.364638236703402e-05, "loss": 0.984, "step": 5392 }, { "epoch": 0.3654041601734535, "grad_norm": 9.351056098937988, "learning_rate": 9.36450133479362e-05, "loss": 1.1052, "step": 5393 }, { "epoch": 0.36547191544142554, "grad_norm": 8.443605422973633, "learning_rate": 9.364364432883838e-05, "loss": 0.941, "step": 5394 }, { "epoch": 0.36553967070939763, "grad_norm": 6.416137218475342, "learning_rate": 9.364227530974058e-05, "loss": 0.9359, "step": 5395 }, { "epoch": 0.3656074259773697, "grad_norm": 7.125934600830078, "learning_rate": 9.364090629064276e-05, "loss": 1.0006, "step": 5396 }, { "epoch": 0.3656751812453418, "grad_norm": 8.161697387695312, "learning_rate": 9.363953727154494e-05, "loss": 0.8284, "step": 5397 }, { "epoch": 0.3657429365133139, "grad_norm": 6.955849647521973, "learning_rate": 9.363816825244712e-05, "loss": 0.7467, "step": 5398 }, { "epoch": 0.365810691781286, "grad_norm": 6.489741325378418, "learning_rate": 9.363679923334931e-05, "loss": 0.7611, "step": 5399 }, { "epoch": 0.3658784470492581, "grad_norm": 7.332844257354736, "learning_rate": 9.363543021425149e-05, "loss": 0.7666, "step": 5400 }, { "epoch": 0.3659462023172302, "grad_norm": 9.001418113708496, "learning_rate": 9.363406119515367e-05, "loss": 1.0219, "step": 5401 }, { "epoch": 0.3660139575852023, "grad_norm": 8.972160339355469, "learning_rate": 9.363269217605585e-05, "loss": 1.0122, "step": 5402 }, { "epoch": 0.3660817128531743, "grad_norm": 7.487551212310791, "learning_rate": 9.363132315695803e-05, "loss": 1.2551, "step": 5403 }, { "epoch": 0.3661494681211464, "grad_norm": 5.9293904304504395, "learning_rate": 9.362995413786023e-05, "loss": 0.7594, "step": 5404 }, { "epoch": 0.3662172233891185, "grad_norm": 7.69260311126709, "learning_rate": 9.362858511876241e-05, "loss": 0.9297, "step": 5405 }, { "epoch": 0.3662849786570906, "grad_norm": 6.723849296569824, "learning_rate": 9.362721609966459e-05, "loss": 1.0034, "step": 5406 }, { "epoch": 0.3663527339250627, "grad_norm": 8.467827796936035, "learning_rate": 9.362584708056678e-05, "loss": 0.9831, "step": 5407 }, { "epoch": 0.36642048919303477, "grad_norm": 6.563052654266357, "learning_rate": 9.362447806146896e-05, "loss": 0.8464, "step": 5408 }, { "epoch": 0.36648824446100686, "grad_norm": 7.124545574188232, "learning_rate": 9.362310904237114e-05, "loss": 1.1, "step": 5409 }, { "epoch": 0.36655599972897895, "grad_norm": 8.016179084777832, "learning_rate": 9.362174002327334e-05, "loss": 0.9962, "step": 5410 }, { "epoch": 0.366623754996951, "grad_norm": 5.937708854675293, "learning_rate": 9.362037100417552e-05, "loss": 0.5908, "step": 5411 }, { "epoch": 0.3666915102649231, "grad_norm": 6.945435523986816, "learning_rate": 9.36190019850777e-05, "loss": 0.7576, "step": 5412 }, { "epoch": 0.3667592655328952, "grad_norm": 6.756751537322998, "learning_rate": 9.361763296597989e-05, "loss": 0.7966, "step": 5413 }, { "epoch": 0.36682702080086727, "grad_norm": 6.434855937957764, "learning_rate": 9.361626394688207e-05, "loss": 0.8401, "step": 5414 }, { "epoch": 0.36689477606883936, "grad_norm": 6.420316696166992, "learning_rate": 9.361489492778425e-05, "loss": 0.8031, "step": 5415 }, { "epoch": 0.36696253133681145, "grad_norm": 5.286546230316162, "learning_rate": 9.361352590868643e-05, "loss": 0.6665, "step": 5416 }, { "epoch": 0.36703028660478354, "grad_norm": 8.99885368347168, "learning_rate": 9.361215688958861e-05, "loss": 0.7545, "step": 5417 }, { "epoch": 0.36709804187275563, "grad_norm": 7.546742916107178, "learning_rate": 9.36107878704908e-05, "loss": 1.0391, "step": 5418 }, { "epoch": 0.36716579714072767, "grad_norm": 6.452430248260498, "learning_rate": 9.360941885139299e-05, "loss": 0.8458, "step": 5419 }, { "epoch": 0.36723355240869976, "grad_norm": 7.125503063201904, "learning_rate": 9.360804983229517e-05, "loss": 0.8061, "step": 5420 }, { "epoch": 0.36730130767667185, "grad_norm": 7.046570301055908, "learning_rate": 9.360668081319735e-05, "loss": 1.0968, "step": 5421 }, { "epoch": 0.36736906294464394, "grad_norm": 7.032671928405762, "learning_rate": 9.360531179409954e-05, "loss": 0.9834, "step": 5422 }, { "epoch": 0.36743681821261603, "grad_norm": 8.430721282958984, "learning_rate": 9.360394277500172e-05, "loss": 0.8875, "step": 5423 }, { "epoch": 0.3675045734805881, "grad_norm": 8.67082405090332, "learning_rate": 9.36025737559039e-05, "loss": 0.9981, "step": 5424 }, { "epoch": 0.3675723287485602, "grad_norm": 6.5781707763671875, "learning_rate": 9.360120473680608e-05, "loss": 0.9702, "step": 5425 }, { "epoch": 0.3676400840165323, "grad_norm": 9.087675094604492, "learning_rate": 9.359983571770826e-05, "loss": 1.1141, "step": 5426 }, { "epoch": 0.36770783928450435, "grad_norm": 6.949502468109131, "learning_rate": 9.359846669861046e-05, "loss": 0.9167, "step": 5427 }, { "epoch": 0.36777559455247644, "grad_norm": 9.335396766662598, "learning_rate": 9.359709767951264e-05, "loss": 0.8119, "step": 5428 }, { "epoch": 0.36784334982044853, "grad_norm": 8.041236877441406, "learning_rate": 9.359572866041482e-05, "loss": 1.0112, "step": 5429 }, { "epoch": 0.3679111050884206, "grad_norm": 6.435655117034912, "learning_rate": 9.3594359641317e-05, "loss": 0.9589, "step": 5430 }, { "epoch": 0.3679788603563927, "grad_norm": 8.047952651977539, "learning_rate": 9.359299062221919e-05, "loss": 0.7143, "step": 5431 }, { "epoch": 0.3680466156243648, "grad_norm": 6.986575126647949, "learning_rate": 9.359162160312137e-05, "loss": 0.8857, "step": 5432 }, { "epoch": 0.3681143708923369, "grad_norm": 5.447303771972656, "learning_rate": 9.359025258402355e-05, "loss": 0.7229, "step": 5433 }, { "epoch": 0.368182126160309, "grad_norm": 6.337569713592529, "learning_rate": 9.358888356492573e-05, "loss": 0.8595, "step": 5434 }, { "epoch": 0.368249881428281, "grad_norm": 7.318792819976807, "learning_rate": 9.358751454582791e-05, "loss": 0.8131, "step": 5435 }, { "epoch": 0.3683176366962531, "grad_norm": 6.816128730773926, "learning_rate": 9.35861455267301e-05, "loss": 0.6972, "step": 5436 }, { "epoch": 0.3683853919642252, "grad_norm": 6.886023998260498, "learning_rate": 9.358477650763229e-05, "loss": 0.9075, "step": 5437 }, { "epoch": 0.3684531472321973, "grad_norm": 7.4018049240112305, "learning_rate": 9.358340748853447e-05, "loss": 0.7913, "step": 5438 }, { "epoch": 0.3685209025001694, "grad_norm": 6.976071834564209, "learning_rate": 9.358203846943665e-05, "loss": 0.8306, "step": 5439 }, { "epoch": 0.3685886577681415, "grad_norm": 8.005870819091797, "learning_rate": 9.358066945033883e-05, "loss": 1.0109, "step": 5440 }, { "epoch": 0.3686564130361136, "grad_norm": 8.600711822509766, "learning_rate": 9.357930043124102e-05, "loss": 1.022, "step": 5441 }, { "epoch": 0.36872416830408566, "grad_norm": 9.857340812683105, "learning_rate": 9.35779314121432e-05, "loss": 1.1266, "step": 5442 }, { "epoch": 0.3687919235720577, "grad_norm": 8.064130783081055, "learning_rate": 9.357656239304538e-05, "loss": 0.9224, "step": 5443 }, { "epoch": 0.3688596788400298, "grad_norm": 9.217019081115723, "learning_rate": 9.357519337394756e-05, "loss": 1.0041, "step": 5444 }, { "epoch": 0.3689274341080019, "grad_norm": 7.042741298675537, "learning_rate": 9.357382435484976e-05, "loss": 0.6798, "step": 5445 }, { "epoch": 0.368995189375974, "grad_norm": 6.679181098937988, "learning_rate": 9.357245533575194e-05, "loss": 0.9641, "step": 5446 }, { "epoch": 0.36906294464394607, "grad_norm": 8.608474731445312, "learning_rate": 9.357108631665412e-05, "loss": 0.8224, "step": 5447 }, { "epoch": 0.36913069991191816, "grad_norm": 8.975968360900879, "learning_rate": 9.35697172975563e-05, "loss": 0.9103, "step": 5448 }, { "epoch": 0.36919845517989025, "grad_norm": 7.249898910522461, "learning_rate": 9.356834827845848e-05, "loss": 1.0178, "step": 5449 }, { "epoch": 0.36926621044786234, "grad_norm": 6.6788787841796875, "learning_rate": 9.356697925936067e-05, "loss": 0.7786, "step": 5450 }, { "epoch": 0.36933396571583443, "grad_norm": 7.834086894989014, "learning_rate": 9.356561024026285e-05, "loss": 1.0282, "step": 5451 }, { "epoch": 0.36940172098380647, "grad_norm": 7.36384391784668, "learning_rate": 9.356424122116503e-05, "loss": 1.0448, "step": 5452 }, { "epoch": 0.36946947625177856, "grad_norm": 7.843540191650391, "learning_rate": 9.356287220206723e-05, "loss": 1.0646, "step": 5453 }, { "epoch": 0.36953723151975065, "grad_norm": 8.009191513061523, "learning_rate": 9.35615031829694e-05, "loss": 1.0279, "step": 5454 }, { "epoch": 0.36960498678772274, "grad_norm": 7.594022274017334, "learning_rate": 9.356013416387159e-05, "loss": 1.0235, "step": 5455 }, { "epoch": 0.36967274205569484, "grad_norm": 6.167095184326172, "learning_rate": 9.355876514477378e-05, "loss": 0.7107, "step": 5456 }, { "epoch": 0.36974049732366693, "grad_norm": 8.07007884979248, "learning_rate": 9.355739612567596e-05, "loss": 0.9667, "step": 5457 }, { "epoch": 0.369808252591639, "grad_norm": 7.6446099281311035, "learning_rate": 9.355602710657814e-05, "loss": 0.9758, "step": 5458 }, { "epoch": 0.3698760078596111, "grad_norm": 7.282294273376465, "learning_rate": 9.355465808748034e-05, "loss": 0.929, "step": 5459 }, { "epoch": 0.36994376312758315, "grad_norm": 9.878494262695312, "learning_rate": 9.355328906838252e-05, "loss": 0.9909, "step": 5460 }, { "epoch": 0.37001151839555524, "grad_norm": 7.331822395324707, "learning_rate": 9.35519200492847e-05, "loss": 0.9693, "step": 5461 }, { "epoch": 0.37007927366352733, "grad_norm": 7.3209404945373535, "learning_rate": 9.355055103018688e-05, "loss": 0.8587, "step": 5462 }, { "epoch": 0.3701470289314994, "grad_norm": 8.261675834655762, "learning_rate": 9.354918201108906e-05, "loss": 0.9391, "step": 5463 }, { "epoch": 0.3702147841994715, "grad_norm": 7.214691162109375, "learning_rate": 9.354781299199125e-05, "loss": 0.8805, "step": 5464 }, { "epoch": 0.3702825394674436, "grad_norm": 7.409287452697754, "learning_rate": 9.354644397289343e-05, "loss": 0.6792, "step": 5465 }, { "epoch": 0.3703502947354157, "grad_norm": 6.368542194366455, "learning_rate": 9.354507495379561e-05, "loss": 0.6464, "step": 5466 }, { "epoch": 0.3704180500033878, "grad_norm": 7.106698513031006, "learning_rate": 9.354370593469779e-05, "loss": 1.0956, "step": 5467 }, { "epoch": 0.3704858052713598, "grad_norm": 7.591500759124756, "learning_rate": 9.354233691559999e-05, "loss": 0.9643, "step": 5468 }, { "epoch": 0.3705535605393319, "grad_norm": 7.255499839782715, "learning_rate": 9.354096789650217e-05, "loss": 0.7782, "step": 5469 }, { "epoch": 0.370621315807304, "grad_norm": 8.414693832397461, "learning_rate": 9.353959887740435e-05, "loss": 0.9433, "step": 5470 }, { "epoch": 0.3706890710752761, "grad_norm": 7.294419765472412, "learning_rate": 9.353822985830653e-05, "loss": 0.9643, "step": 5471 }, { "epoch": 0.3707568263432482, "grad_norm": 7.249752998352051, "learning_rate": 9.353686083920871e-05, "loss": 0.8529, "step": 5472 }, { "epoch": 0.3708245816112203, "grad_norm": 9.345986366271973, "learning_rate": 9.35354918201109e-05, "loss": 0.9641, "step": 5473 }, { "epoch": 0.3708923368791924, "grad_norm": 7.828972816467285, "learning_rate": 9.353412280101308e-05, "loss": 1.1, "step": 5474 }, { "epoch": 0.37096009214716447, "grad_norm": 7.739612579345703, "learning_rate": 9.353275378191526e-05, "loss": 0.8228, "step": 5475 }, { "epoch": 0.3710278474151365, "grad_norm": 7.853143215179443, "learning_rate": 9.353138476281744e-05, "loss": 0.9088, "step": 5476 }, { "epoch": 0.3710956026831086, "grad_norm": 7.996474742889404, "learning_rate": 9.353001574371964e-05, "loss": 0.9291, "step": 5477 }, { "epoch": 0.3711633579510807, "grad_norm": 7.374247074127197, "learning_rate": 9.352864672462182e-05, "loss": 1.0619, "step": 5478 }, { "epoch": 0.3712311132190528, "grad_norm": 8.037210464477539, "learning_rate": 9.3527277705524e-05, "loss": 0.9426, "step": 5479 }, { "epoch": 0.37129886848702487, "grad_norm": 5.515965938568115, "learning_rate": 9.352590868642618e-05, "loss": 0.6838, "step": 5480 }, { "epoch": 0.37136662375499696, "grad_norm": 8.530059814453125, "learning_rate": 9.352453966732836e-05, "loss": 0.9221, "step": 5481 }, { "epoch": 0.37143437902296905, "grad_norm": 8.07000732421875, "learning_rate": 9.352317064823055e-05, "loss": 0.9321, "step": 5482 }, { "epoch": 0.37150213429094114, "grad_norm": 8.79816722869873, "learning_rate": 9.352180162913273e-05, "loss": 1.0875, "step": 5483 }, { "epoch": 0.3715698895589132, "grad_norm": 8.476999282836914, "learning_rate": 9.352043261003491e-05, "loss": 1.0144, "step": 5484 }, { "epoch": 0.37163764482688527, "grad_norm": 5.7845540046691895, "learning_rate": 9.351906359093709e-05, "loss": 0.6802, "step": 5485 }, { "epoch": 0.37170540009485736, "grad_norm": 6.311066627502441, "learning_rate": 9.351769457183929e-05, "loss": 0.7673, "step": 5486 }, { "epoch": 0.37177315536282945, "grad_norm": 6.929514408111572, "learning_rate": 9.351632555274147e-05, "loss": 1.1287, "step": 5487 }, { "epoch": 0.37184091063080155, "grad_norm": 7.7819132804870605, "learning_rate": 9.351495653364365e-05, "loss": 0.8828, "step": 5488 }, { "epoch": 0.37190866589877364, "grad_norm": 7.906317710876465, "learning_rate": 9.351358751454583e-05, "loss": 0.7961, "step": 5489 }, { "epoch": 0.37197642116674573, "grad_norm": 8.190624237060547, "learning_rate": 9.351221849544801e-05, "loss": 0.8106, "step": 5490 }, { "epoch": 0.3720441764347178, "grad_norm": 6.0358734130859375, "learning_rate": 9.35108494763502e-05, "loss": 1.0342, "step": 5491 }, { "epoch": 0.37211193170268986, "grad_norm": 7.096914291381836, "learning_rate": 9.350948045725238e-05, "loss": 0.6652, "step": 5492 }, { "epoch": 0.37217968697066195, "grad_norm": 7.488151550292969, "learning_rate": 9.350811143815456e-05, "loss": 0.9283, "step": 5493 }, { "epoch": 0.37224744223863404, "grad_norm": 7.084420680999756, "learning_rate": 9.350674241905674e-05, "loss": 1.0424, "step": 5494 }, { "epoch": 0.37231519750660613, "grad_norm": 7.483108997344971, "learning_rate": 9.350537339995892e-05, "loss": 1.0657, "step": 5495 }, { "epoch": 0.3723829527745782, "grad_norm": 7.312635898590088, "learning_rate": 9.350400438086112e-05, "loss": 0.7831, "step": 5496 }, { "epoch": 0.3724507080425503, "grad_norm": 6.901159763336182, "learning_rate": 9.35026353617633e-05, "loss": 0.9832, "step": 5497 }, { "epoch": 0.3725184633105224, "grad_norm": 7.787426471710205, "learning_rate": 9.350126634266548e-05, "loss": 0.9112, "step": 5498 }, { "epoch": 0.3725862185784945, "grad_norm": 7.2583746910095215, "learning_rate": 9.349989732356767e-05, "loss": 0.757, "step": 5499 }, { "epoch": 0.37265397384646654, "grad_norm": 7.068667411804199, "learning_rate": 9.349852830446985e-05, "loss": 0.8516, "step": 5500 }, { "epoch": 0.3727217291144386, "grad_norm": 8.109786987304688, "learning_rate": 9.349715928537203e-05, "loss": 0.7925, "step": 5501 }, { "epoch": 0.3727894843824107, "grad_norm": 7.539520740509033, "learning_rate": 9.349579026627423e-05, "loss": 1.283, "step": 5502 }, { "epoch": 0.3728572396503828, "grad_norm": 8.608511924743652, "learning_rate": 9.34944212471764e-05, "loss": 1.2367, "step": 5503 }, { "epoch": 0.3729249949183549, "grad_norm": 7.932346820831299, "learning_rate": 9.349305222807859e-05, "loss": 0.894, "step": 5504 }, { "epoch": 0.372992750186327, "grad_norm": 6.520984172821045, "learning_rate": 9.349168320898078e-05, "loss": 0.884, "step": 5505 }, { "epoch": 0.3730605054542991, "grad_norm": 6.876826286315918, "learning_rate": 9.349031418988296e-05, "loss": 0.9447, "step": 5506 }, { "epoch": 0.3731282607222712, "grad_norm": 8.540080070495605, "learning_rate": 9.348894517078514e-05, "loss": 1.0709, "step": 5507 }, { "epoch": 0.37319601599024327, "grad_norm": 7.760754585266113, "learning_rate": 9.348757615168732e-05, "loss": 1.0666, "step": 5508 }, { "epoch": 0.3732637712582153, "grad_norm": 6.1746134757995605, "learning_rate": 9.348620713258951e-05, "loss": 0.8367, "step": 5509 }, { "epoch": 0.3733315265261874, "grad_norm": 8.053776741027832, "learning_rate": 9.34848381134917e-05, "loss": 1.253, "step": 5510 }, { "epoch": 0.3733992817941595, "grad_norm": 10.046021461486816, "learning_rate": 9.348346909439388e-05, "loss": 0.9086, "step": 5511 }, { "epoch": 0.3734670370621316, "grad_norm": 7.247015953063965, "learning_rate": 9.348210007529606e-05, "loss": 1.3036, "step": 5512 }, { "epoch": 0.37353479233010367, "grad_norm": 6.103085517883301, "learning_rate": 9.348073105619824e-05, "loss": 0.9778, "step": 5513 }, { "epoch": 0.37360254759807576, "grad_norm": 7.330771446228027, "learning_rate": 9.347936203710043e-05, "loss": 0.8335, "step": 5514 }, { "epoch": 0.37367030286604785, "grad_norm": 7.571770668029785, "learning_rate": 9.347799301800261e-05, "loss": 0.9963, "step": 5515 }, { "epoch": 0.37373805813401995, "grad_norm": 8.018990516662598, "learning_rate": 9.347662399890479e-05, "loss": 0.7925, "step": 5516 }, { "epoch": 0.373805813401992, "grad_norm": 7.156185626983643, "learning_rate": 9.347525497980697e-05, "loss": 0.7821, "step": 5517 }, { "epoch": 0.3738735686699641, "grad_norm": 8.189929008483887, "learning_rate": 9.347388596070915e-05, "loss": 0.8997, "step": 5518 }, { "epoch": 0.37394132393793617, "grad_norm": 6.826793670654297, "learning_rate": 9.347251694161135e-05, "loss": 0.7392, "step": 5519 }, { "epoch": 0.37400907920590826, "grad_norm": 7.931643962860107, "learning_rate": 9.347114792251353e-05, "loss": 1.1607, "step": 5520 }, { "epoch": 0.37407683447388035, "grad_norm": 6.973018646240234, "learning_rate": 9.34697789034157e-05, "loss": 0.7419, "step": 5521 }, { "epoch": 0.37414458974185244, "grad_norm": 7.43549919128418, "learning_rate": 9.346840988431789e-05, "loss": 1.0035, "step": 5522 }, { "epoch": 0.37421234500982453, "grad_norm": 6.700629234313965, "learning_rate": 9.346704086522008e-05, "loss": 1.0953, "step": 5523 }, { "epoch": 0.3742801002777966, "grad_norm": 6.039200782775879, "learning_rate": 9.346567184612226e-05, "loss": 0.6512, "step": 5524 }, { "epoch": 0.37434785554576866, "grad_norm": 8.45660400390625, "learning_rate": 9.346430282702444e-05, "loss": 1.0725, "step": 5525 }, { "epoch": 0.37441561081374075, "grad_norm": 8.787965774536133, "learning_rate": 9.346293380792662e-05, "loss": 1.0101, "step": 5526 }, { "epoch": 0.37448336608171284, "grad_norm": 8.229619026184082, "learning_rate": 9.34615647888288e-05, "loss": 1.2651, "step": 5527 }, { "epoch": 0.37455112134968493, "grad_norm": 6.913321018218994, "learning_rate": 9.3460195769731e-05, "loss": 0.9196, "step": 5528 }, { "epoch": 0.374618876617657, "grad_norm": 5.944606781005859, "learning_rate": 9.345882675063318e-05, "loss": 0.857, "step": 5529 }, { "epoch": 0.3746866318856291, "grad_norm": 9.863933563232422, "learning_rate": 9.345745773153536e-05, "loss": 1.0566, "step": 5530 }, { "epoch": 0.3747543871536012, "grad_norm": 6.494686603546143, "learning_rate": 9.345608871243754e-05, "loss": 0.8839, "step": 5531 }, { "epoch": 0.3748221424215733, "grad_norm": 6.558131694793701, "learning_rate": 9.345471969333973e-05, "loss": 0.9802, "step": 5532 }, { "epoch": 0.37488989768954534, "grad_norm": 7.316765308380127, "learning_rate": 9.345335067424191e-05, "loss": 1.1635, "step": 5533 }, { "epoch": 0.37495765295751743, "grad_norm": 6.615121841430664, "learning_rate": 9.345198165514409e-05, "loss": 0.919, "step": 5534 }, { "epoch": 0.3750254082254895, "grad_norm": 8.071523666381836, "learning_rate": 9.345061263604627e-05, "loss": 1.0023, "step": 5535 }, { "epoch": 0.3750931634934616, "grad_norm": 7.563895225524902, "learning_rate": 9.344924361694845e-05, "loss": 0.963, "step": 5536 }, { "epoch": 0.3751609187614337, "grad_norm": 6.013591766357422, "learning_rate": 9.344787459785065e-05, "loss": 0.7862, "step": 5537 }, { "epoch": 0.3752286740294058, "grad_norm": 7.296882629394531, "learning_rate": 9.344650557875283e-05, "loss": 1.0106, "step": 5538 }, { "epoch": 0.3752964292973779, "grad_norm": 7.192817211151123, "learning_rate": 9.3445136559655e-05, "loss": 0.8491, "step": 5539 }, { "epoch": 0.37536418456535, "grad_norm": 7.962235927581787, "learning_rate": 9.344376754055719e-05, "loss": 1.0711, "step": 5540 }, { "epoch": 0.375431939833322, "grad_norm": 6.515812873840332, "learning_rate": 9.344239852145937e-05, "loss": 0.8779, "step": 5541 }, { "epoch": 0.3754996951012941, "grad_norm": 8.563304901123047, "learning_rate": 9.344102950236156e-05, "loss": 1.3236, "step": 5542 }, { "epoch": 0.3755674503692662, "grad_norm": 8.214926719665527, "learning_rate": 9.343966048326374e-05, "loss": 0.9231, "step": 5543 }, { "epoch": 0.3756352056372383, "grad_norm": 6.213039398193359, "learning_rate": 9.343829146416592e-05, "loss": 0.8446, "step": 5544 }, { "epoch": 0.3757029609052104, "grad_norm": 6.392467021942139, "learning_rate": 9.343692244506812e-05, "loss": 0.6742, "step": 5545 }, { "epoch": 0.3757707161731825, "grad_norm": 6.8930206298828125, "learning_rate": 9.34355534259703e-05, "loss": 0.9973, "step": 5546 }, { "epoch": 0.37583847144115456, "grad_norm": 8.074743270874023, "learning_rate": 9.343418440687248e-05, "loss": 0.8808, "step": 5547 }, { "epoch": 0.37590622670912666, "grad_norm": 8.443988800048828, "learning_rate": 9.343281538777467e-05, "loss": 1.0066, "step": 5548 }, { "epoch": 0.3759739819770987, "grad_norm": 6.530252933502197, "learning_rate": 9.343144636867685e-05, "loss": 1.0203, "step": 5549 }, { "epoch": 0.3760417372450708, "grad_norm": 6.693957328796387, "learning_rate": 9.343007734957903e-05, "loss": 0.9484, "step": 5550 }, { "epoch": 0.3761094925130429, "grad_norm": 7.103133201599121, "learning_rate": 9.342870833048122e-05, "loss": 1.0442, "step": 5551 }, { "epoch": 0.37617724778101497, "grad_norm": 6.089579105377197, "learning_rate": 9.34273393113834e-05, "loss": 0.8828, "step": 5552 }, { "epoch": 0.37624500304898706, "grad_norm": 5.47523307800293, "learning_rate": 9.342597029228559e-05, "loss": 0.8302, "step": 5553 }, { "epoch": 0.37631275831695915, "grad_norm": 7.930117130279541, "learning_rate": 9.342460127318777e-05, "loss": 0.9565, "step": 5554 }, { "epoch": 0.37638051358493124, "grad_norm": 6.38248348236084, "learning_rate": 9.342323225408996e-05, "loss": 0.8815, "step": 5555 }, { "epoch": 0.37644826885290333, "grad_norm": 6.382977485656738, "learning_rate": 9.342186323499214e-05, "loss": 0.8472, "step": 5556 }, { "epoch": 0.3765160241208754, "grad_norm": 6.029202461242676, "learning_rate": 9.342049421589432e-05, "loss": 0.8102, "step": 5557 }, { "epoch": 0.37658377938884746, "grad_norm": 9.764986991882324, "learning_rate": 9.34191251967965e-05, "loss": 0.7955, "step": 5558 }, { "epoch": 0.37665153465681955, "grad_norm": 8.568723678588867, "learning_rate": 9.341775617769868e-05, "loss": 1.0919, "step": 5559 }, { "epoch": 0.37671928992479164, "grad_norm": 13.348160743713379, "learning_rate": 9.341638715860087e-05, "loss": 1.202, "step": 5560 }, { "epoch": 0.37678704519276374, "grad_norm": 7.161466121673584, "learning_rate": 9.341501813950306e-05, "loss": 0.909, "step": 5561 }, { "epoch": 0.37685480046073583, "grad_norm": 8.446759223937988, "learning_rate": 9.341364912040524e-05, "loss": 0.8871, "step": 5562 }, { "epoch": 0.3769225557287079, "grad_norm": 7.228363037109375, "learning_rate": 9.341228010130742e-05, "loss": 0.961, "step": 5563 }, { "epoch": 0.37699031099668, "grad_norm": 7.427947998046875, "learning_rate": 9.341091108220961e-05, "loss": 1.1086, "step": 5564 }, { "epoch": 0.3770580662646521, "grad_norm": 8.64295482635498, "learning_rate": 9.340954206311179e-05, "loss": 0.9684, "step": 5565 }, { "epoch": 0.37712582153262414, "grad_norm": 10.287007331848145, "learning_rate": 9.340817304401397e-05, "loss": 1.0202, "step": 5566 }, { "epoch": 0.37719357680059623, "grad_norm": 7.803440570831299, "learning_rate": 9.340680402491615e-05, "loss": 0.8545, "step": 5567 }, { "epoch": 0.3772613320685683, "grad_norm": 7.74205207824707, "learning_rate": 9.340543500581833e-05, "loss": 1.0585, "step": 5568 }, { "epoch": 0.3773290873365404, "grad_norm": 6.5861053466796875, "learning_rate": 9.340406598672053e-05, "loss": 0.8838, "step": 5569 }, { "epoch": 0.3773968426045125, "grad_norm": 7.1810431480407715, "learning_rate": 9.34026969676227e-05, "loss": 0.8669, "step": 5570 }, { "epoch": 0.3774645978724846, "grad_norm": 7.8471808433532715, "learning_rate": 9.340132794852489e-05, "loss": 1.0412, "step": 5571 }, { "epoch": 0.3775323531404567, "grad_norm": 7.740540027618408, "learning_rate": 9.339995892942707e-05, "loss": 0.944, "step": 5572 }, { "epoch": 0.3776001084084288, "grad_norm": 7.580658912658691, "learning_rate": 9.339858991032925e-05, "loss": 0.8676, "step": 5573 }, { "epoch": 0.3776678636764008, "grad_norm": 9.143624305725098, "learning_rate": 9.339722089123144e-05, "loss": 0.9752, "step": 5574 }, { "epoch": 0.3777356189443729, "grad_norm": 8.142240524291992, "learning_rate": 9.339585187213362e-05, "loss": 0.9733, "step": 5575 }, { "epoch": 0.377803374212345, "grad_norm": 8.034200668334961, "learning_rate": 9.33944828530358e-05, "loss": 0.9867, "step": 5576 }, { "epoch": 0.3778711294803171, "grad_norm": 7.345114231109619, "learning_rate": 9.339311383393798e-05, "loss": 0.9401, "step": 5577 }, { "epoch": 0.3779388847482892, "grad_norm": 7.522977828979492, "learning_rate": 9.339174481484018e-05, "loss": 0.965, "step": 5578 }, { "epoch": 0.3780066400162613, "grad_norm": 6.02646541595459, "learning_rate": 9.339037579574236e-05, "loss": 0.9446, "step": 5579 }, { "epoch": 0.37807439528423337, "grad_norm": 6.161006450653076, "learning_rate": 9.338900677664454e-05, "loss": 0.8803, "step": 5580 }, { "epoch": 0.37814215055220546, "grad_norm": 6.318620681762695, "learning_rate": 9.338763775754672e-05, "loss": 0.646, "step": 5581 }, { "epoch": 0.3782099058201775, "grad_norm": 5.793397903442383, "learning_rate": 9.33862687384489e-05, "loss": 0.7483, "step": 5582 }, { "epoch": 0.3782776610881496, "grad_norm": 7.235496997833252, "learning_rate": 9.338489971935109e-05, "loss": 0.9768, "step": 5583 }, { "epoch": 0.3783454163561217, "grad_norm": 6.682423114776611, "learning_rate": 9.338353070025327e-05, "loss": 0.8466, "step": 5584 }, { "epoch": 0.37841317162409377, "grad_norm": 9.257969856262207, "learning_rate": 9.338216168115545e-05, "loss": 1.1546, "step": 5585 }, { "epoch": 0.37848092689206586, "grad_norm": 7.223050594329834, "learning_rate": 9.338079266205763e-05, "loss": 0.7594, "step": 5586 }, { "epoch": 0.37854868216003795, "grad_norm": 7.735831260681152, "learning_rate": 9.337942364295983e-05, "loss": 1.0392, "step": 5587 }, { "epoch": 0.37861643742801004, "grad_norm": 6.439917087554932, "learning_rate": 9.3378054623862e-05, "loss": 1.0901, "step": 5588 }, { "epoch": 0.37868419269598214, "grad_norm": 7.018877983093262, "learning_rate": 9.337668560476419e-05, "loss": 0.9918, "step": 5589 }, { "epoch": 0.37875194796395417, "grad_norm": 5.858334541320801, "learning_rate": 9.337531658566637e-05, "loss": 0.6788, "step": 5590 }, { "epoch": 0.37881970323192626, "grad_norm": 8.445877075195312, "learning_rate": 9.337394756656856e-05, "loss": 0.9777, "step": 5591 }, { "epoch": 0.37888745849989836, "grad_norm": 6.7996439933776855, "learning_rate": 9.337257854747074e-05, "loss": 1.0086, "step": 5592 }, { "epoch": 0.37895521376787045, "grad_norm": 6.983299255371094, "learning_rate": 9.337120952837292e-05, "loss": 0.9981, "step": 5593 }, { "epoch": 0.37902296903584254, "grad_norm": 7.999194622039795, "learning_rate": 9.336984050927511e-05, "loss": 0.9166, "step": 5594 }, { "epoch": 0.37909072430381463, "grad_norm": 7.698132038116455, "learning_rate": 9.33684714901773e-05, "loss": 0.8333, "step": 5595 }, { "epoch": 0.3791584795717867, "grad_norm": 5.9128217697143555, "learning_rate": 9.336710247107948e-05, "loss": 0.7822, "step": 5596 }, { "epoch": 0.3792262348397588, "grad_norm": 7.200125217437744, "learning_rate": 9.336573345198167e-05, "loss": 0.906, "step": 5597 }, { "epoch": 0.37929399010773085, "grad_norm": 5.82244348526001, "learning_rate": 9.336436443288385e-05, "loss": 0.7376, "step": 5598 }, { "epoch": 0.37936174537570294, "grad_norm": 6.775420188903809, "learning_rate": 9.336299541378603e-05, "loss": 0.6975, "step": 5599 }, { "epoch": 0.37942950064367503, "grad_norm": 6.542332649230957, "learning_rate": 9.336162639468821e-05, "loss": 0.9788, "step": 5600 }, { "epoch": 0.3794972559116471, "grad_norm": 6.677700519561768, "learning_rate": 9.33602573755904e-05, "loss": 0.79, "step": 5601 }, { "epoch": 0.3795650111796192, "grad_norm": 8.561968803405762, "learning_rate": 9.335888835649258e-05, "loss": 0.9499, "step": 5602 }, { "epoch": 0.3796327664475913, "grad_norm": 7.0320234298706055, "learning_rate": 9.335751933739477e-05, "loss": 0.865, "step": 5603 }, { "epoch": 0.3797005217155634, "grad_norm": 7.31481409072876, "learning_rate": 9.335615031829695e-05, "loss": 1.0356, "step": 5604 }, { "epoch": 0.3797682769835355, "grad_norm": 8.205849647521973, "learning_rate": 9.335478129919913e-05, "loss": 0.7836, "step": 5605 }, { "epoch": 0.3798360322515075, "grad_norm": 6.882634162902832, "learning_rate": 9.335341228010132e-05, "loss": 0.8879, "step": 5606 }, { "epoch": 0.3799037875194796, "grad_norm": 8.171550750732422, "learning_rate": 9.33520432610035e-05, "loss": 0.8849, "step": 5607 }, { "epoch": 0.3799715427874517, "grad_norm": 7.184536457061768, "learning_rate": 9.335067424190568e-05, "loss": 0.8206, "step": 5608 }, { "epoch": 0.3800392980554238, "grad_norm": 6.50775146484375, "learning_rate": 9.334930522280786e-05, "loss": 0.7802, "step": 5609 }, { "epoch": 0.3801070533233959, "grad_norm": 7.537467002868652, "learning_rate": 9.334793620371005e-05, "loss": 1.1643, "step": 5610 }, { "epoch": 0.380174808591368, "grad_norm": 6.740983486175537, "learning_rate": 9.334656718461223e-05, "loss": 0.686, "step": 5611 }, { "epoch": 0.3802425638593401, "grad_norm": 6.01100492477417, "learning_rate": 9.334519816551442e-05, "loss": 0.8931, "step": 5612 }, { "epoch": 0.38031031912731217, "grad_norm": 6.056909084320068, "learning_rate": 9.33438291464166e-05, "loss": 0.7861, "step": 5613 }, { "epoch": 0.38037807439528426, "grad_norm": 6.271320343017578, "learning_rate": 9.334246012731878e-05, "loss": 0.8277, "step": 5614 }, { "epoch": 0.3804458296632563, "grad_norm": 5.80530309677124, "learning_rate": 9.334109110822097e-05, "loss": 0.6765, "step": 5615 }, { "epoch": 0.3805135849312284, "grad_norm": 6.031884670257568, "learning_rate": 9.333972208912315e-05, "loss": 0.6482, "step": 5616 }, { "epoch": 0.3805813401992005, "grad_norm": 7.629202842712402, "learning_rate": 9.333835307002533e-05, "loss": 1.0782, "step": 5617 }, { "epoch": 0.38064909546717257, "grad_norm": 8.182783126831055, "learning_rate": 9.333698405092751e-05, "loss": 0.8355, "step": 5618 }, { "epoch": 0.38071685073514466, "grad_norm": 7.317597389221191, "learning_rate": 9.33356150318297e-05, "loss": 0.9035, "step": 5619 }, { "epoch": 0.38078460600311675, "grad_norm": 8.783252716064453, "learning_rate": 9.333424601273189e-05, "loss": 1.1105, "step": 5620 }, { "epoch": 0.38085236127108885, "grad_norm": 6.5179290771484375, "learning_rate": 9.333287699363407e-05, "loss": 0.7859, "step": 5621 }, { "epoch": 0.38092011653906094, "grad_norm": 7.544989585876465, "learning_rate": 9.333150797453625e-05, "loss": 0.8601, "step": 5622 }, { "epoch": 0.380987871807033, "grad_norm": 7.307798862457275, "learning_rate": 9.333013895543843e-05, "loss": 1.071, "step": 5623 }, { "epoch": 0.38105562707500507, "grad_norm": 6.946296215057373, "learning_rate": 9.332876993634062e-05, "loss": 0.8445, "step": 5624 }, { "epoch": 0.38112338234297716, "grad_norm": 8.099409103393555, "learning_rate": 9.33274009172428e-05, "loss": 0.9821, "step": 5625 }, { "epoch": 0.38119113761094925, "grad_norm": 7.492844104766846, "learning_rate": 9.332603189814498e-05, "loss": 0.9696, "step": 5626 }, { "epoch": 0.38125889287892134, "grad_norm": 6.632567882537842, "learning_rate": 9.332466287904716e-05, "loss": 0.8853, "step": 5627 }, { "epoch": 0.38132664814689343, "grad_norm": 5.6510748863220215, "learning_rate": 9.332329385994934e-05, "loss": 0.8686, "step": 5628 }, { "epoch": 0.3813944034148655, "grad_norm": 6.428233623504639, "learning_rate": 9.332192484085154e-05, "loss": 0.8912, "step": 5629 }, { "epoch": 0.3814621586828376, "grad_norm": 8.161954879760742, "learning_rate": 9.332055582175372e-05, "loss": 0.888, "step": 5630 }, { "epoch": 0.38152991395080965, "grad_norm": 7.743470191955566, "learning_rate": 9.33191868026559e-05, "loss": 0.8759, "step": 5631 }, { "epoch": 0.38159766921878174, "grad_norm": 7.648897647857666, "learning_rate": 9.331781778355808e-05, "loss": 1.2331, "step": 5632 }, { "epoch": 0.38166542448675383, "grad_norm": 7.748523235321045, "learning_rate": 9.331644876446027e-05, "loss": 1.1732, "step": 5633 }, { "epoch": 0.3817331797547259, "grad_norm": 7.337912082672119, "learning_rate": 9.331507974536245e-05, "loss": 0.8043, "step": 5634 }, { "epoch": 0.381800935022698, "grad_norm": 7.525491714477539, "learning_rate": 9.331371072626463e-05, "loss": 0.8762, "step": 5635 }, { "epoch": 0.3818686902906701, "grad_norm": 7.477416515350342, "learning_rate": 9.331234170716681e-05, "loss": 0.9053, "step": 5636 }, { "epoch": 0.3819364455586422, "grad_norm": 6.855381965637207, "learning_rate": 9.331097268806899e-05, "loss": 0.9124, "step": 5637 }, { "epoch": 0.3820042008266143, "grad_norm": 7.663267135620117, "learning_rate": 9.330960366897119e-05, "loss": 0.8139, "step": 5638 }, { "epoch": 0.38207195609458633, "grad_norm": 6.08014440536499, "learning_rate": 9.330823464987337e-05, "loss": 0.7084, "step": 5639 }, { "epoch": 0.3821397113625584, "grad_norm": 7.804579734802246, "learning_rate": 9.330686563077555e-05, "loss": 1.206, "step": 5640 }, { "epoch": 0.3822074666305305, "grad_norm": 7.1148552894592285, "learning_rate": 9.330549661167774e-05, "loss": 1.1221, "step": 5641 }, { "epoch": 0.3822752218985026, "grad_norm": 6.727654457092285, "learning_rate": 9.330412759257992e-05, "loss": 1.025, "step": 5642 }, { "epoch": 0.3823429771664747, "grad_norm": 7.059203147888184, "learning_rate": 9.33027585734821e-05, "loss": 1.0819, "step": 5643 }, { "epoch": 0.3824107324344468, "grad_norm": 8.003962516784668, "learning_rate": 9.33013895543843e-05, "loss": 0.95, "step": 5644 }, { "epoch": 0.3824784877024189, "grad_norm": 7.414921283721924, "learning_rate": 9.330002053528647e-05, "loss": 0.9279, "step": 5645 }, { "epoch": 0.38254624297039097, "grad_norm": 5.462070465087891, "learning_rate": 9.329865151618866e-05, "loss": 0.8003, "step": 5646 }, { "epoch": 0.382613998238363, "grad_norm": 7.23457145690918, "learning_rate": 9.329728249709085e-05, "loss": 0.8939, "step": 5647 }, { "epoch": 0.3826817535063351, "grad_norm": 6.371642112731934, "learning_rate": 9.329591347799303e-05, "loss": 0.7125, "step": 5648 }, { "epoch": 0.3827495087743072, "grad_norm": 6.855792999267578, "learning_rate": 9.329454445889521e-05, "loss": 0.745, "step": 5649 }, { "epoch": 0.3828172640422793, "grad_norm": 6.945821762084961, "learning_rate": 9.329317543979739e-05, "loss": 1.1931, "step": 5650 }, { "epoch": 0.3828850193102514, "grad_norm": 7.453825950622559, "learning_rate": 9.329180642069957e-05, "loss": 0.9513, "step": 5651 }, { "epoch": 0.38295277457822346, "grad_norm": 7.040378093719482, "learning_rate": 9.329043740160176e-05, "loss": 0.8896, "step": 5652 }, { "epoch": 0.38302052984619556, "grad_norm": 8.740388870239258, "learning_rate": 9.328906838250394e-05, "loss": 0.771, "step": 5653 }, { "epoch": 0.38308828511416765, "grad_norm": 6.791045665740967, "learning_rate": 9.328769936340613e-05, "loss": 0.8835, "step": 5654 }, { "epoch": 0.3831560403821397, "grad_norm": 6.560173511505127, "learning_rate": 9.32863303443083e-05, "loss": 0.8516, "step": 5655 }, { "epoch": 0.3832237956501118, "grad_norm": 6.763086795806885, "learning_rate": 9.32849613252105e-05, "loss": 0.7976, "step": 5656 }, { "epoch": 0.38329155091808387, "grad_norm": 5.9037370681762695, "learning_rate": 9.328359230611268e-05, "loss": 0.943, "step": 5657 }, { "epoch": 0.38335930618605596, "grad_norm": 4.944889545440674, "learning_rate": 9.328222328701486e-05, "loss": 0.7913, "step": 5658 }, { "epoch": 0.38342706145402805, "grad_norm": 8.701375007629395, "learning_rate": 9.328085426791704e-05, "loss": 0.8, "step": 5659 }, { "epoch": 0.38349481672200014, "grad_norm": 7.1927289962768555, "learning_rate": 9.327948524881922e-05, "loss": 0.8888, "step": 5660 }, { "epoch": 0.38356257198997223, "grad_norm": 8.781030654907227, "learning_rate": 9.327811622972141e-05, "loss": 0.951, "step": 5661 }, { "epoch": 0.3836303272579443, "grad_norm": 7.27484655380249, "learning_rate": 9.32767472106236e-05, "loss": 0.8034, "step": 5662 }, { "epoch": 0.3836980825259164, "grad_norm": 7.739286422729492, "learning_rate": 9.327537819152578e-05, "loss": 1.0769, "step": 5663 }, { "epoch": 0.38376583779388845, "grad_norm": 7.0264973640441895, "learning_rate": 9.327400917242796e-05, "loss": 1.0652, "step": 5664 }, { "epoch": 0.38383359306186055, "grad_norm": 6.887399673461914, "learning_rate": 9.327264015333015e-05, "loss": 0.7986, "step": 5665 }, { "epoch": 0.38390134832983264, "grad_norm": 9.081385612487793, "learning_rate": 9.327127113423233e-05, "loss": 1.2249, "step": 5666 }, { "epoch": 0.38396910359780473, "grad_norm": 7.966154098510742, "learning_rate": 9.326990211513451e-05, "loss": 0.8924, "step": 5667 }, { "epoch": 0.3840368588657768, "grad_norm": 7.321074962615967, "learning_rate": 9.326853309603669e-05, "loss": 1.0022, "step": 5668 }, { "epoch": 0.3841046141337489, "grad_norm": 7.060865879058838, "learning_rate": 9.326716407693887e-05, "loss": 0.8849, "step": 5669 }, { "epoch": 0.384172369401721, "grad_norm": 7.009364604949951, "learning_rate": 9.326579505784106e-05, "loss": 0.9032, "step": 5670 }, { "epoch": 0.3842401246696931, "grad_norm": 8.417223930358887, "learning_rate": 9.326442603874325e-05, "loss": 1.1162, "step": 5671 }, { "epoch": 0.38430787993766513, "grad_norm": 6.774781227111816, "learning_rate": 9.326305701964543e-05, "loss": 0.773, "step": 5672 }, { "epoch": 0.3843756352056372, "grad_norm": 6.5464911460876465, "learning_rate": 9.32616880005476e-05, "loss": 1.0374, "step": 5673 }, { "epoch": 0.3844433904736093, "grad_norm": 6.618286609649658, "learning_rate": 9.326031898144979e-05, "loss": 0.9538, "step": 5674 }, { "epoch": 0.3845111457415814, "grad_norm": 7.598459720611572, "learning_rate": 9.325894996235198e-05, "loss": 0.7811, "step": 5675 }, { "epoch": 0.3845789010095535, "grad_norm": 6.456278324127197, "learning_rate": 9.325758094325416e-05, "loss": 1.055, "step": 5676 }, { "epoch": 0.3846466562775256, "grad_norm": 7.940021991729736, "learning_rate": 9.325621192415634e-05, "loss": 1.1605, "step": 5677 }, { "epoch": 0.3847144115454977, "grad_norm": 8.113789558410645, "learning_rate": 9.325484290505852e-05, "loss": 1.0405, "step": 5678 }, { "epoch": 0.3847821668134698, "grad_norm": 7.918695449829102, "learning_rate": 9.325347388596071e-05, "loss": 0.8019, "step": 5679 }, { "epoch": 0.3848499220814418, "grad_norm": 8.129600524902344, "learning_rate": 9.32521048668629e-05, "loss": 0.9581, "step": 5680 }, { "epoch": 0.3849176773494139, "grad_norm": 6.6353230476379395, "learning_rate": 9.325073584776508e-05, "loss": 0.8932, "step": 5681 }, { "epoch": 0.384985432617386, "grad_norm": 6.153687477111816, "learning_rate": 9.324936682866726e-05, "loss": 0.7646, "step": 5682 }, { "epoch": 0.3850531878853581, "grad_norm": 6.837560653686523, "learning_rate": 9.324799780956944e-05, "loss": 0.8998, "step": 5683 }, { "epoch": 0.3851209431533302, "grad_norm": 8.48747444152832, "learning_rate": 9.324662879047163e-05, "loss": 0.7761, "step": 5684 }, { "epoch": 0.38518869842130227, "grad_norm": 10.725845336914062, "learning_rate": 9.324525977137381e-05, "loss": 1.1755, "step": 5685 }, { "epoch": 0.38525645368927436, "grad_norm": 5.731265544891357, "learning_rate": 9.324389075227599e-05, "loss": 0.8974, "step": 5686 }, { "epoch": 0.38532420895724645, "grad_norm": 8.479190826416016, "learning_rate": 9.324252173317818e-05, "loss": 0.9307, "step": 5687 }, { "epoch": 0.3853919642252185, "grad_norm": 6.539626598358154, "learning_rate": 9.324115271408037e-05, "loss": 0.9463, "step": 5688 }, { "epoch": 0.3854597194931906, "grad_norm": 7.225162029266357, "learning_rate": 9.323978369498255e-05, "loss": 0.8206, "step": 5689 }, { "epoch": 0.38552747476116267, "grad_norm": 8.779760360717773, "learning_rate": 9.323841467588474e-05, "loss": 1.1208, "step": 5690 }, { "epoch": 0.38559523002913476, "grad_norm": 7.580684185028076, "learning_rate": 9.323704565678692e-05, "loss": 0.7993, "step": 5691 }, { "epoch": 0.38566298529710685, "grad_norm": 8.699392318725586, "learning_rate": 9.32356766376891e-05, "loss": 1.073, "step": 5692 }, { "epoch": 0.38573074056507894, "grad_norm": 7.190006256103516, "learning_rate": 9.32343076185913e-05, "loss": 1.0204, "step": 5693 }, { "epoch": 0.38579849583305104, "grad_norm": 9.289970397949219, "learning_rate": 9.323293859949347e-05, "loss": 0.7091, "step": 5694 }, { "epoch": 0.3858662511010231, "grad_norm": 9.533041000366211, "learning_rate": 9.323156958039565e-05, "loss": 1.103, "step": 5695 }, { "epoch": 0.38593400636899516, "grad_norm": 7.375948905944824, "learning_rate": 9.323020056129783e-05, "loss": 0.9815, "step": 5696 }, { "epoch": 0.38600176163696726, "grad_norm": 8.580230712890625, "learning_rate": 9.322883154220003e-05, "loss": 1.1056, "step": 5697 }, { "epoch": 0.38606951690493935, "grad_norm": 8.332324981689453, "learning_rate": 9.322746252310221e-05, "loss": 0.8809, "step": 5698 }, { "epoch": 0.38613727217291144, "grad_norm": 6.298001289367676, "learning_rate": 9.322609350400439e-05, "loss": 0.9255, "step": 5699 }, { "epoch": 0.38620502744088353, "grad_norm": 7.031383514404297, "learning_rate": 9.322472448490657e-05, "loss": 0.9196, "step": 5700 }, { "epoch": 0.3862727827088556, "grad_norm": 6.791995048522949, "learning_rate": 9.322335546580875e-05, "loss": 0.8184, "step": 5701 }, { "epoch": 0.3863405379768277, "grad_norm": 7.81817102432251, "learning_rate": 9.322198644671094e-05, "loss": 0.9331, "step": 5702 }, { "epoch": 0.3864082932447998, "grad_norm": 7.933851718902588, "learning_rate": 9.322061742761312e-05, "loss": 0.872, "step": 5703 }, { "epoch": 0.38647604851277184, "grad_norm": 7.805744171142578, "learning_rate": 9.32192484085153e-05, "loss": 0.8378, "step": 5704 }, { "epoch": 0.38654380378074393, "grad_norm": 9.205484390258789, "learning_rate": 9.321787938941749e-05, "loss": 0.9853, "step": 5705 }, { "epoch": 0.386611559048716, "grad_norm": 7.313584804534912, "learning_rate": 9.321651037031967e-05, "loss": 0.7576, "step": 5706 }, { "epoch": 0.3866793143166881, "grad_norm": 10.125847816467285, "learning_rate": 9.321514135122186e-05, "loss": 0.8497, "step": 5707 }, { "epoch": 0.3867470695846602, "grad_norm": 7.392860412597656, "learning_rate": 9.321377233212404e-05, "loss": 0.9151, "step": 5708 }, { "epoch": 0.3868148248526323, "grad_norm": 8.305766105651855, "learning_rate": 9.321240331302622e-05, "loss": 1.1604, "step": 5709 }, { "epoch": 0.3868825801206044, "grad_norm": 5.897716522216797, "learning_rate": 9.32110342939284e-05, "loss": 0.6471, "step": 5710 }, { "epoch": 0.3869503353885765, "grad_norm": 6.7423200607299805, "learning_rate": 9.32096652748306e-05, "loss": 0.7968, "step": 5711 }, { "epoch": 0.3870180906565485, "grad_norm": 8.511382102966309, "learning_rate": 9.320829625573277e-05, "loss": 1.1875, "step": 5712 }, { "epoch": 0.3870858459245206, "grad_norm": 8.105764389038086, "learning_rate": 9.320692723663495e-05, "loss": 1.0717, "step": 5713 }, { "epoch": 0.3871536011924927, "grad_norm": 6.555315971374512, "learning_rate": 9.320555821753714e-05, "loss": 0.7491, "step": 5714 }, { "epoch": 0.3872213564604648, "grad_norm": 7.481291770935059, "learning_rate": 9.320418919843932e-05, "loss": 1.0057, "step": 5715 }, { "epoch": 0.3872891117284369, "grad_norm": 8.126254081726074, "learning_rate": 9.320282017934151e-05, "loss": 0.94, "step": 5716 }, { "epoch": 0.387356866996409, "grad_norm": 5.760578632354736, "learning_rate": 9.320145116024369e-05, "loss": 0.8861, "step": 5717 }, { "epoch": 0.38742462226438107, "grad_norm": 7.664496421813965, "learning_rate": 9.320008214114587e-05, "loss": 0.8005, "step": 5718 }, { "epoch": 0.38749237753235316, "grad_norm": 8.473989486694336, "learning_rate": 9.319871312204805e-05, "loss": 0.8196, "step": 5719 }, { "epoch": 0.38756013280032525, "grad_norm": 8.553350448608398, "learning_rate": 9.319734410295024e-05, "loss": 0.8356, "step": 5720 }, { "epoch": 0.3876278880682973, "grad_norm": 7.713801860809326, "learning_rate": 9.319597508385242e-05, "loss": 1.0326, "step": 5721 }, { "epoch": 0.3876956433362694, "grad_norm": 7.013209819793701, "learning_rate": 9.31946060647546e-05, "loss": 1.1149, "step": 5722 }, { "epoch": 0.38776339860424147, "grad_norm": 8.171217918395996, "learning_rate": 9.319323704565679e-05, "loss": 1.141, "step": 5723 }, { "epoch": 0.38783115387221356, "grad_norm": 8.459320068359375, "learning_rate": 9.319186802655897e-05, "loss": 0.8859, "step": 5724 }, { "epoch": 0.38789890914018565, "grad_norm": 6.681031227111816, "learning_rate": 9.319049900746116e-05, "loss": 0.8746, "step": 5725 }, { "epoch": 0.38796666440815775, "grad_norm": 6.796359539031982, "learning_rate": 9.318912998836334e-05, "loss": 1.0891, "step": 5726 }, { "epoch": 0.38803441967612984, "grad_norm": 6.819052696228027, "learning_rate": 9.318776096926552e-05, "loss": 0.8087, "step": 5727 }, { "epoch": 0.38810217494410193, "grad_norm": 6.746981620788574, "learning_rate": 9.31863919501677e-05, "loss": 1.1591, "step": 5728 }, { "epoch": 0.38816993021207397, "grad_norm": 7.767449855804443, "learning_rate": 9.318502293106988e-05, "loss": 1.1012, "step": 5729 }, { "epoch": 0.38823768548004606, "grad_norm": 7.967437744140625, "learning_rate": 9.318365391197207e-05, "loss": 1.0262, "step": 5730 }, { "epoch": 0.38830544074801815, "grad_norm": 6.053138256072998, "learning_rate": 9.318228489287426e-05, "loss": 0.6821, "step": 5731 }, { "epoch": 0.38837319601599024, "grad_norm": 7.298278331756592, "learning_rate": 9.318091587377644e-05, "loss": 1.0869, "step": 5732 }, { "epoch": 0.38844095128396233, "grad_norm": 6.621678352355957, "learning_rate": 9.317954685467863e-05, "loss": 0.9388, "step": 5733 }, { "epoch": 0.3885087065519344, "grad_norm": 7.588876247406006, "learning_rate": 9.317817783558081e-05, "loss": 0.8775, "step": 5734 }, { "epoch": 0.3885764618199065, "grad_norm": 6.0856242179870605, "learning_rate": 9.317680881648299e-05, "loss": 0.8057, "step": 5735 }, { "epoch": 0.3886442170878786, "grad_norm": 6.144415378570557, "learning_rate": 9.317543979738518e-05, "loss": 0.9205, "step": 5736 }, { "epoch": 0.38871197235585064, "grad_norm": 8.25857925415039, "learning_rate": 9.317407077828736e-05, "loss": 0.9608, "step": 5737 }, { "epoch": 0.38877972762382274, "grad_norm": 9.107572555541992, "learning_rate": 9.317270175918954e-05, "loss": 0.7823, "step": 5738 }, { "epoch": 0.3888474828917948, "grad_norm": 6.879073619842529, "learning_rate": 9.317133274009174e-05, "loss": 0.9137, "step": 5739 }, { "epoch": 0.3889152381597669, "grad_norm": 6.114928245544434, "learning_rate": 9.316996372099392e-05, "loss": 0.9713, "step": 5740 }, { "epoch": 0.388982993427739, "grad_norm": 6.454460620880127, "learning_rate": 9.31685947018961e-05, "loss": 0.9224, "step": 5741 }, { "epoch": 0.3890507486957111, "grad_norm": 7.231433868408203, "learning_rate": 9.316722568279828e-05, "loss": 0.9087, "step": 5742 }, { "epoch": 0.3891185039636832, "grad_norm": 7.442675590515137, "learning_rate": 9.316585666370047e-05, "loss": 0.9038, "step": 5743 }, { "epoch": 0.3891862592316553, "grad_norm": 6.472715854644775, "learning_rate": 9.316448764460265e-05, "loss": 0.7429, "step": 5744 }, { "epoch": 0.3892540144996273, "grad_norm": 7.20242977142334, "learning_rate": 9.316311862550483e-05, "loss": 0.9592, "step": 5745 }, { "epoch": 0.3893217697675994, "grad_norm": 5.8578715324401855, "learning_rate": 9.316174960640701e-05, "loss": 0.901, "step": 5746 }, { "epoch": 0.3893895250355715, "grad_norm": 5.1851983070373535, "learning_rate": 9.31603805873092e-05, "loss": 0.6872, "step": 5747 }, { "epoch": 0.3894572803035436, "grad_norm": 5.5100531578063965, "learning_rate": 9.315901156821139e-05, "loss": 0.8329, "step": 5748 }, { "epoch": 0.3895250355715157, "grad_norm": 7.71047830581665, "learning_rate": 9.315764254911357e-05, "loss": 0.8726, "step": 5749 }, { "epoch": 0.3895927908394878, "grad_norm": 5.928194522857666, "learning_rate": 9.315627353001575e-05, "loss": 0.6503, "step": 5750 }, { "epoch": 0.38966054610745987, "grad_norm": 6.636775016784668, "learning_rate": 9.315490451091793e-05, "loss": 0.8905, "step": 5751 }, { "epoch": 0.38972830137543196, "grad_norm": 7.5196027755737305, "learning_rate": 9.315353549182012e-05, "loss": 0.9333, "step": 5752 }, { "epoch": 0.389796056643404, "grad_norm": 7.1420087814331055, "learning_rate": 9.31521664727223e-05, "loss": 0.777, "step": 5753 }, { "epoch": 0.3898638119113761, "grad_norm": 6.713923931121826, "learning_rate": 9.315079745362448e-05, "loss": 0.7667, "step": 5754 }, { "epoch": 0.3899315671793482, "grad_norm": 6.81540584564209, "learning_rate": 9.314942843452666e-05, "loss": 0.8838, "step": 5755 }, { "epoch": 0.3899993224473203, "grad_norm": 7.069910049438477, "learning_rate": 9.314805941542885e-05, "loss": 0.9638, "step": 5756 }, { "epoch": 0.39006707771529237, "grad_norm": 6.350069999694824, "learning_rate": 9.314669039633104e-05, "loss": 0.8163, "step": 5757 }, { "epoch": 0.39013483298326446, "grad_norm": 5.552088737487793, "learning_rate": 9.314532137723322e-05, "loss": 0.7061, "step": 5758 }, { "epoch": 0.39020258825123655, "grad_norm": 7.8301167488098145, "learning_rate": 9.31439523581354e-05, "loss": 1.0415, "step": 5759 }, { "epoch": 0.39027034351920864, "grad_norm": 7.079575538635254, "learning_rate": 9.314258333903758e-05, "loss": 0.7247, "step": 5760 }, { "epoch": 0.3903380987871807, "grad_norm": 7.282689571380615, "learning_rate": 9.314121431993976e-05, "loss": 0.8361, "step": 5761 }, { "epoch": 0.39040585405515277, "grad_norm": 7.18900203704834, "learning_rate": 9.313984530084195e-05, "loss": 1.0206, "step": 5762 }, { "epoch": 0.39047360932312486, "grad_norm": 5.926472187042236, "learning_rate": 9.313847628174413e-05, "loss": 0.8551, "step": 5763 }, { "epoch": 0.39054136459109695, "grad_norm": 8.176214218139648, "learning_rate": 9.313710726264631e-05, "loss": 0.8106, "step": 5764 }, { "epoch": 0.39060911985906904, "grad_norm": 7.093206882476807, "learning_rate": 9.31357382435485e-05, "loss": 0.9986, "step": 5765 }, { "epoch": 0.39067687512704113, "grad_norm": 8.531807899475098, "learning_rate": 9.313436922445069e-05, "loss": 0.9368, "step": 5766 }, { "epoch": 0.3907446303950132, "grad_norm": 6.479072570800781, "learning_rate": 9.313300020535287e-05, "loss": 0.858, "step": 5767 }, { "epoch": 0.3908123856629853, "grad_norm": 6.749851226806641, "learning_rate": 9.313163118625505e-05, "loss": 0.638, "step": 5768 }, { "epoch": 0.3908801409309574, "grad_norm": 8.79270076751709, "learning_rate": 9.313026216715723e-05, "loss": 0.7734, "step": 5769 }, { "epoch": 0.39094789619892945, "grad_norm": 6.972496509552002, "learning_rate": 9.312889314805941e-05, "loss": 0.8557, "step": 5770 }, { "epoch": 0.39101565146690154, "grad_norm": 6.48140811920166, "learning_rate": 9.31275241289616e-05, "loss": 0.8183, "step": 5771 }, { "epoch": 0.39108340673487363, "grad_norm": 6.5230536460876465, "learning_rate": 9.312615510986378e-05, "loss": 0.7863, "step": 5772 }, { "epoch": 0.3911511620028457, "grad_norm": 6.982974052429199, "learning_rate": 9.312478609076597e-05, "loss": 0.9604, "step": 5773 }, { "epoch": 0.3912189172708178, "grad_norm": 7.524245262145996, "learning_rate": 9.312341707166815e-05, "loss": 0.8145, "step": 5774 }, { "epoch": 0.3912866725387899, "grad_norm": 7.770455837249756, "learning_rate": 9.312204805257034e-05, "loss": 0.858, "step": 5775 }, { "epoch": 0.391354427806762, "grad_norm": 6.145147800445557, "learning_rate": 9.312067903347252e-05, "loss": 0.6833, "step": 5776 }, { "epoch": 0.3914221830747341, "grad_norm": 7.118679523468018, "learning_rate": 9.31193100143747e-05, "loss": 0.9945, "step": 5777 }, { "epoch": 0.3914899383427061, "grad_norm": 7.793931007385254, "learning_rate": 9.311794099527688e-05, "loss": 1.0227, "step": 5778 }, { "epoch": 0.3915576936106782, "grad_norm": 6.189599514007568, "learning_rate": 9.311657197617907e-05, "loss": 1.0355, "step": 5779 }, { "epoch": 0.3916254488786503, "grad_norm": 8.16638469696045, "learning_rate": 9.311520295708125e-05, "loss": 0.998, "step": 5780 }, { "epoch": 0.3916932041466224, "grad_norm": 7.059429168701172, "learning_rate": 9.311383393798343e-05, "loss": 0.8641, "step": 5781 }, { "epoch": 0.3917609594145945, "grad_norm": 9.555390357971191, "learning_rate": 9.311246491888563e-05, "loss": 0.9243, "step": 5782 }, { "epoch": 0.3918287146825666, "grad_norm": 7.566288948059082, "learning_rate": 9.311109589978781e-05, "loss": 0.8025, "step": 5783 }, { "epoch": 0.3918964699505387, "grad_norm": 8.842116355895996, "learning_rate": 9.310972688068999e-05, "loss": 0.8875, "step": 5784 }, { "epoch": 0.39196422521851076, "grad_norm": 7.426934719085693, "learning_rate": 9.310835786159218e-05, "loss": 0.8116, "step": 5785 }, { "epoch": 0.3920319804864828, "grad_norm": 7.222875595092773, "learning_rate": 9.310698884249436e-05, "loss": 0.8286, "step": 5786 }, { "epoch": 0.3920997357544549, "grad_norm": 8.364874839782715, "learning_rate": 9.310561982339654e-05, "loss": 1.1628, "step": 5787 }, { "epoch": 0.392167491022427, "grad_norm": 6.4203104972839355, "learning_rate": 9.310425080429872e-05, "loss": 0.8501, "step": 5788 }, { "epoch": 0.3922352462903991, "grad_norm": 9.124777793884277, "learning_rate": 9.310288178520092e-05, "loss": 1.1962, "step": 5789 }, { "epoch": 0.39230300155837117, "grad_norm": 7.944338321685791, "learning_rate": 9.31015127661031e-05, "loss": 1.3275, "step": 5790 }, { "epoch": 0.39237075682634326, "grad_norm": 7.256115913391113, "learning_rate": 9.310014374700528e-05, "loss": 0.8594, "step": 5791 }, { "epoch": 0.39243851209431535, "grad_norm": 6.452229976654053, "learning_rate": 9.309877472790746e-05, "loss": 0.7744, "step": 5792 }, { "epoch": 0.39250626736228744, "grad_norm": 8.207562446594238, "learning_rate": 9.309740570880964e-05, "loss": 1.0615, "step": 5793 }, { "epoch": 0.3925740226302595, "grad_norm": 6.627633571624756, "learning_rate": 9.309603668971183e-05, "loss": 1.1951, "step": 5794 }, { "epoch": 0.39264177789823157, "grad_norm": 6.888060092926025, "learning_rate": 9.309466767061401e-05, "loss": 0.7771, "step": 5795 }, { "epoch": 0.39270953316620366, "grad_norm": 5.327685356140137, "learning_rate": 9.30932986515162e-05, "loss": 0.6568, "step": 5796 }, { "epoch": 0.39277728843417575, "grad_norm": 7.033134460449219, "learning_rate": 9.309192963241837e-05, "loss": 0.8601, "step": 5797 }, { "epoch": 0.39284504370214784, "grad_norm": 6.694526195526123, "learning_rate": 9.309056061332057e-05, "loss": 0.9084, "step": 5798 }, { "epoch": 0.39291279897011994, "grad_norm": 6.9023027420043945, "learning_rate": 9.308919159422275e-05, "loss": 0.8983, "step": 5799 }, { "epoch": 0.39298055423809203, "grad_norm": 9.527570724487305, "learning_rate": 9.308782257512493e-05, "loss": 0.9773, "step": 5800 }, { "epoch": 0.3930483095060641, "grad_norm": 7.497427940368652, "learning_rate": 9.308645355602711e-05, "loss": 0.9713, "step": 5801 }, { "epoch": 0.39311606477403616, "grad_norm": 8.860198020935059, "learning_rate": 9.308508453692929e-05, "loss": 0.8866, "step": 5802 }, { "epoch": 0.39318382004200825, "grad_norm": 7.34425687789917, "learning_rate": 9.308371551783148e-05, "loss": 0.9442, "step": 5803 }, { "epoch": 0.39325157530998034, "grad_norm": 10.30170726776123, "learning_rate": 9.308234649873366e-05, "loss": 0.8957, "step": 5804 }, { "epoch": 0.39331933057795243, "grad_norm": 8.141131401062012, "learning_rate": 9.308097747963584e-05, "loss": 1.1858, "step": 5805 }, { "epoch": 0.3933870858459245, "grad_norm": 7.154587268829346, "learning_rate": 9.307960846053802e-05, "loss": 0.9056, "step": 5806 }, { "epoch": 0.3934548411138966, "grad_norm": 6.086984634399414, "learning_rate": 9.30782394414402e-05, "loss": 0.8943, "step": 5807 }, { "epoch": 0.3935225963818687, "grad_norm": 6.48331880569458, "learning_rate": 9.30768704223424e-05, "loss": 0.8631, "step": 5808 }, { "epoch": 0.3935903516498408, "grad_norm": 9.434244155883789, "learning_rate": 9.307550140324458e-05, "loss": 1.0951, "step": 5809 }, { "epoch": 0.39365810691781283, "grad_norm": 7.943146228790283, "learning_rate": 9.307413238414676e-05, "loss": 0.9073, "step": 5810 }, { "epoch": 0.3937258621857849, "grad_norm": 6.200165271759033, "learning_rate": 9.307276336504894e-05, "loss": 0.8406, "step": 5811 }, { "epoch": 0.393793617453757, "grad_norm": 6.221752643585205, "learning_rate": 9.307139434595113e-05, "loss": 0.9181, "step": 5812 }, { "epoch": 0.3938613727217291, "grad_norm": 6.645714282989502, "learning_rate": 9.307002532685331e-05, "loss": 0.8508, "step": 5813 }, { "epoch": 0.3939291279897012, "grad_norm": 7.978506088256836, "learning_rate": 9.30686563077555e-05, "loss": 1.1544, "step": 5814 }, { "epoch": 0.3939968832576733, "grad_norm": 6.967216968536377, "learning_rate": 9.306728728865767e-05, "loss": 0.7956, "step": 5815 }, { "epoch": 0.3940646385256454, "grad_norm": 9.8863525390625, "learning_rate": 9.306591826955986e-05, "loss": 0.9111, "step": 5816 }, { "epoch": 0.3941323937936175, "grad_norm": 7.202954292297363, "learning_rate": 9.306454925046205e-05, "loss": 0.9267, "step": 5817 }, { "epoch": 0.3942001490615895, "grad_norm": 6.386566162109375, "learning_rate": 9.306318023136423e-05, "loss": 0.9494, "step": 5818 }, { "epoch": 0.3942679043295616, "grad_norm": 6.505050182342529, "learning_rate": 9.306181121226641e-05, "loss": 0.904, "step": 5819 }, { "epoch": 0.3943356595975337, "grad_norm": 7.091371536254883, "learning_rate": 9.306044219316859e-05, "loss": 0.7305, "step": 5820 }, { "epoch": 0.3944034148655058, "grad_norm": 7.322198390960693, "learning_rate": 9.305907317407078e-05, "loss": 0.8101, "step": 5821 }, { "epoch": 0.3944711701334779, "grad_norm": 6.345345497131348, "learning_rate": 9.305770415497296e-05, "loss": 0.7821, "step": 5822 }, { "epoch": 0.39453892540144997, "grad_norm": 6.418498992919922, "learning_rate": 9.305633513587514e-05, "loss": 0.8844, "step": 5823 }, { "epoch": 0.39460668066942206, "grad_norm": 7.375457763671875, "learning_rate": 9.305496611677733e-05, "loss": 1.0099, "step": 5824 }, { "epoch": 0.39467443593739415, "grad_norm": 7.758962154388428, "learning_rate": 9.305359709767952e-05, "loss": 1.1459, "step": 5825 }, { "epoch": 0.39474219120536624, "grad_norm": 9.552523612976074, "learning_rate": 9.30522280785817e-05, "loss": 1.0285, "step": 5826 }, { "epoch": 0.3948099464733383, "grad_norm": 7.053111553192139, "learning_rate": 9.305085905948388e-05, "loss": 0.8006, "step": 5827 }, { "epoch": 0.39487770174131037, "grad_norm": 7.239533424377441, "learning_rate": 9.304949004038607e-05, "loss": 0.9594, "step": 5828 }, { "epoch": 0.39494545700928246, "grad_norm": 5.681763172149658, "learning_rate": 9.304812102128825e-05, "loss": 0.7517, "step": 5829 }, { "epoch": 0.39501321227725456, "grad_norm": 7.394958019256592, "learning_rate": 9.304675200219043e-05, "loss": 0.9656, "step": 5830 }, { "epoch": 0.39508096754522665, "grad_norm": 5.894937515258789, "learning_rate": 9.304538298309263e-05, "loss": 0.7568, "step": 5831 }, { "epoch": 0.39514872281319874, "grad_norm": 7.866422653198242, "learning_rate": 9.304401396399481e-05, "loss": 1.1937, "step": 5832 }, { "epoch": 0.39521647808117083, "grad_norm": 6.807196140289307, "learning_rate": 9.304264494489699e-05, "loss": 0.7785, "step": 5833 }, { "epoch": 0.3952842333491429, "grad_norm": 7.117645740509033, "learning_rate": 9.304127592579917e-05, "loss": 0.8462, "step": 5834 }, { "epoch": 0.39535198861711496, "grad_norm": 5.873225212097168, "learning_rate": 9.303990690670136e-05, "loss": 0.8372, "step": 5835 }, { "epoch": 0.39541974388508705, "grad_norm": 6.9400410652160645, "learning_rate": 9.303853788760354e-05, "loss": 0.8154, "step": 5836 }, { "epoch": 0.39548749915305914, "grad_norm": 5.7771100997924805, "learning_rate": 9.303716886850572e-05, "loss": 0.7411, "step": 5837 }, { "epoch": 0.39555525442103123, "grad_norm": 6.784138202667236, "learning_rate": 9.30357998494079e-05, "loss": 1.062, "step": 5838 }, { "epoch": 0.3956230096890033, "grad_norm": 7.726989269256592, "learning_rate": 9.303443083031008e-05, "loss": 0.9726, "step": 5839 }, { "epoch": 0.3956907649569754, "grad_norm": 6.342170715332031, "learning_rate": 9.303306181121228e-05, "loss": 0.8388, "step": 5840 }, { "epoch": 0.3957585202249475, "grad_norm": 8.047952651977539, "learning_rate": 9.303169279211446e-05, "loss": 1.0734, "step": 5841 }, { "epoch": 0.3958262754929196, "grad_norm": 7.073848724365234, "learning_rate": 9.303032377301664e-05, "loss": 0.6968, "step": 5842 }, { "epoch": 0.39589403076089164, "grad_norm": 7.159115791320801, "learning_rate": 9.302895475391882e-05, "loss": 0.7991, "step": 5843 }, { "epoch": 0.3959617860288637, "grad_norm": 6.908319473266602, "learning_rate": 9.302758573482101e-05, "loss": 0.9283, "step": 5844 }, { "epoch": 0.3960295412968358, "grad_norm": 6.5849690437316895, "learning_rate": 9.30262167157232e-05, "loss": 0.8258, "step": 5845 }, { "epoch": 0.3960972965648079, "grad_norm": 8.763665199279785, "learning_rate": 9.302484769662537e-05, "loss": 0.7734, "step": 5846 }, { "epoch": 0.39616505183278, "grad_norm": 7.134938716888428, "learning_rate": 9.302347867752755e-05, "loss": 0.8084, "step": 5847 }, { "epoch": 0.3962328071007521, "grad_norm": 6.418683052062988, "learning_rate": 9.302210965842973e-05, "loss": 0.8225, "step": 5848 }, { "epoch": 0.3963005623687242, "grad_norm": 7.124704837799072, "learning_rate": 9.302074063933193e-05, "loss": 0.6787, "step": 5849 }, { "epoch": 0.3963683176366963, "grad_norm": 6.653177261352539, "learning_rate": 9.301937162023411e-05, "loss": 0.8026, "step": 5850 }, { "epoch": 0.3964360729046683, "grad_norm": 7.981827735900879, "learning_rate": 9.301800260113629e-05, "loss": 0.9281, "step": 5851 }, { "epoch": 0.3965038281726404, "grad_norm": 5.823386192321777, "learning_rate": 9.301663358203847e-05, "loss": 0.8158, "step": 5852 }, { "epoch": 0.3965715834406125, "grad_norm": 8.96346664428711, "learning_rate": 9.301526456294066e-05, "loss": 1.0875, "step": 5853 }, { "epoch": 0.3966393387085846, "grad_norm": 6.114203929901123, "learning_rate": 9.301389554384284e-05, "loss": 0.7896, "step": 5854 }, { "epoch": 0.3967070939765567, "grad_norm": 6.386680603027344, "learning_rate": 9.301252652474502e-05, "loss": 1.0199, "step": 5855 }, { "epoch": 0.39677484924452877, "grad_norm": 8.502519607543945, "learning_rate": 9.30111575056472e-05, "loss": 1.0127, "step": 5856 }, { "epoch": 0.39684260451250086, "grad_norm": 6.875016689300537, "learning_rate": 9.300978848654938e-05, "loss": 0.8263, "step": 5857 }, { "epoch": 0.39691035978047295, "grad_norm": 8.344440460205078, "learning_rate": 9.300841946745158e-05, "loss": 1.0351, "step": 5858 }, { "epoch": 0.396978115048445, "grad_norm": 6.42828893661499, "learning_rate": 9.300705044835376e-05, "loss": 0.8503, "step": 5859 }, { "epoch": 0.3970458703164171, "grad_norm": 6.0403547286987305, "learning_rate": 9.300568142925594e-05, "loss": 0.9078, "step": 5860 }, { "epoch": 0.3971136255843892, "grad_norm": 5.010101795196533, "learning_rate": 9.300431241015812e-05, "loss": 0.8215, "step": 5861 }, { "epoch": 0.39718138085236127, "grad_norm": 6.132750988006592, "learning_rate": 9.30029433910603e-05, "loss": 0.7887, "step": 5862 }, { "epoch": 0.39724913612033336, "grad_norm": 6.075906753540039, "learning_rate": 9.30015743719625e-05, "loss": 0.822, "step": 5863 }, { "epoch": 0.39731689138830545, "grad_norm": 8.225122451782227, "learning_rate": 9.300020535286467e-05, "loss": 0.7771, "step": 5864 }, { "epoch": 0.39738464665627754, "grad_norm": 6.863472938537598, "learning_rate": 9.299883633376685e-05, "loss": 0.9185, "step": 5865 }, { "epoch": 0.39745240192424963, "grad_norm": 7.280022144317627, "learning_rate": 9.299746731466903e-05, "loss": 0.8206, "step": 5866 }, { "epoch": 0.39752015719222167, "grad_norm": 5.646960258483887, "learning_rate": 9.299609829557123e-05, "loss": 0.7897, "step": 5867 }, { "epoch": 0.39758791246019376, "grad_norm": 8.248296737670898, "learning_rate": 9.299472927647341e-05, "loss": 0.802, "step": 5868 }, { "epoch": 0.39765566772816585, "grad_norm": 6.389570236206055, "learning_rate": 9.299336025737559e-05, "loss": 0.7744, "step": 5869 }, { "epoch": 0.39772342299613794, "grad_norm": 6.452336311340332, "learning_rate": 9.299199123827777e-05, "loss": 0.9003, "step": 5870 }, { "epoch": 0.39779117826411003, "grad_norm": 6.798274040222168, "learning_rate": 9.299062221917995e-05, "loss": 0.9585, "step": 5871 }, { "epoch": 0.3978589335320821, "grad_norm": 6.267078876495361, "learning_rate": 9.298925320008214e-05, "loss": 0.7971, "step": 5872 }, { "epoch": 0.3979266888000542, "grad_norm": 7.0529069900512695, "learning_rate": 9.298788418098432e-05, "loss": 0.9384, "step": 5873 }, { "epoch": 0.3979944440680263, "grad_norm": 6.686244964599609, "learning_rate": 9.298651516188652e-05, "loss": 0.8589, "step": 5874 }, { "epoch": 0.3980621993359984, "grad_norm": 7.230234622955322, "learning_rate": 9.29851461427887e-05, "loss": 0.8638, "step": 5875 }, { "epoch": 0.39812995460397044, "grad_norm": 8.14789867401123, "learning_rate": 9.298377712369088e-05, "loss": 0.7231, "step": 5876 }, { "epoch": 0.39819770987194253, "grad_norm": 6.5517497062683105, "learning_rate": 9.298240810459307e-05, "loss": 1.0435, "step": 5877 }, { "epoch": 0.3982654651399146, "grad_norm": 7.095836639404297, "learning_rate": 9.298103908549525e-05, "loss": 0.7551, "step": 5878 }, { "epoch": 0.3983332204078867, "grad_norm": 6.932202339172363, "learning_rate": 9.297967006639743e-05, "loss": 0.9075, "step": 5879 }, { "epoch": 0.3984009756758588, "grad_norm": 8.045002937316895, "learning_rate": 9.297830104729961e-05, "loss": 0.9787, "step": 5880 }, { "epoch": 0.3984687309438309, "grad_norm": 7.1430511474609375, "learning_rate": 9.297693202820181e-05, "loss": 0.8549, "step": 5881 }, { "epoch": 0.398536486211803, "grad_norm": 5.857006549835205, "learning_rate": 9.297556300910399e-05, "loss": 0.7573, "step": 5882 }, { "epoch": 0.3986042414797751, "grad_norm": 6.143594741821289, "learning_rate": 9.297419399000617e-05, "loss": 0.747, "step": 5883 }, { "epoch": 0.3986719967477471, "grad_norm": 7.4967169761657715, "learning_rate": 9.297282497090835e-05, "loss": 1.0366, "step": 5884 }, { "epoch": 0.3987397520157192, "grad_norm": 6.6401166915893555, "learning_rate": 9.297145595181054e-05, "loss": 0.9889, "step": 5885 }, { "epoch": 0.3988075072836913, "grad_norm": 5.577928066253662, "learning_rate": 9.297008693271272e-05, "loss": 0.6917, "step": 5886 }, { "epoch": 0.3988752625516634, "grad_norm": 5.9933061599731445, "learning_rate": 9.29687179136149e-05, "loss": 0.9545, "step": 5887 }, { "epoch": 0.3989430178196355, "grad_norm": 6.353165149688721, "learning_rate": 9.296734889451708e-05, "loss": 0.7512, "step": 5888 }, { "epoch": 0.3990107730876076, "grad_norm": 6.181026458740234, "learning_rate": 9.296597987541926e-05, "loss": 0.767, "step": 5889 }, { "epoch": 0.39907852835557966, "grad_norm": 6.579110622406006, "learning_rate": 9.296461085632146e-05, "loss": 1.0604, "step": 5890 }, { "epoch": 0.39914628362355176, "grad_norm": 6.501206398010254, "learning_rate": 9.296324183722364e-05, "loss": 0.9821, "step": 5891 }, { "epoch": 0.3992140388915238, "grad_norm": 8.253642082214355, "learning_rate": 9.296187281812582e-05, "loss": 0.8827, "step": 5892 }, { "epoch": 0.3992817941594959, "grad_norm": 7.590344429016113, "learning_rate": 9.2960503799028e-05, "loss": 0.9614, "step": 5893 }, { "epoch": 0.399349549427468, "grad_norm": 6.755953311920166, "learning_rate": 9.295913477993018e-05, "loss": 0.8298, "step": 5894 }, { "epoch": 0.39941730469544007, "grad_norm": 6.040559768676758, "learning_rate": 9.295776576083237e-05, "loss": 0.7182, "step": 5895 }, { "epoch": 0.39948505996341216, "grad_norm": 6.301966667175293, "learning_rate": 9.295639674173455e-05, "loss": 0.8545, "step": 5896 }, { "epoch": 0.39955281523138425, "grad_norm": 5.77929162979126, "learning_rate": 9.295502772263673e-05, "loss": 0.9398, "step": 5897 }, { "epoch": 0.39962057049935634, "grad_norm": 6.633763313293457, "learning_rate": 9.295365870353891e-05, "loss": 0.8914, "step": 5898 }, { "epoch": 0.39968832576732843, "grad_norm": 7.260994911193848, "learning_rate": 9.295228968444111e-05, "loss": 0.7725, "step": 5899 }, { "epoch": 0.39975608103530047, "grad_norm": 8.820511817932129, "learning_rate": 9.295092066534329e-05, "loss": 0.7847, "step": 5900 }, { "epoch": 0.39982383630327256, "grad_norm": 7.335788726806641, "learning_rate": 9.294955164624547e-05, "loss": 0.683, "step": 5901 }, { "epoch": 0.39989159157124465, "grad_norm": 7.959702491760254, "learning_rate": 9.294818262714765e-05, "loss": 0.7598, "step": 5902 }, { "epoch": 0.39995934683921675, "grad_norm": 9.680485725402832, "learning_rate": 9.294681360804983e-05, "loss": 1.0359, "step": 5903 }, { "epoch": 0.40002710210718884, "grad_norm": 6.917464733123779, "learning_rate": 9.294544458895202e-05, "loss": 1.0155, "step": 5904 }, { "epoch": 0.40009485737516093, "grad_norm": 6.221781253814697, "learning_rate": 9.29440755698542e-05, "loss": 0.9645, "step": 5905 }, { "epoch": 0.400162612643133, "grad_norm": 6.778574466705322, "learning_rate": 9.294270655075638e-05, "loss": 0.794, "step": 5906 }, { "epoch": 0.4002303679111051, "grad_norm": 7.8015522956848145, "learning_rate": 9.294133753165856e-05, "loss": 0.9472, "step": 5907 }, { "epoch": 0.40029812317907715, "grad_norm": 5.018773555755615, "learning_rate": 9.293996851256076e-05, "loss": 0.8961, "step": 5908 }, { "epoch": 0.40036587844704924, "grad_norm": 6.184563636779785, "learning_rate": 9.293859949346294e-05, "loss": 0.7988, "step": 5909 }, { "epoch": 0.40043363371502133, "grad_norm": 6.593270778656006, "learning_rate": 9.293723047436512e-05, "loss": 0.8248, "step": 5910 }, { "epoch": 0.4005013889829934, "grad_norm": 7.14009952545166, "learning_rate": 9.29358614552673e-05, "loss": 0.8589, "step": 5911 }, { "epoch": 0.4005691442509655, "grad_norm": 8.044157981872559, "learning_rate": 9.293449243616948e-05, "loss": 0.9397, "step": 5912 }, { "epoch": 0.4006368995189376, "grad_norm": 7.3374247550964355, "learning_rate": 9.293312341707167e-05, "loss": 0.8615, "step": 5913 }, { "epoch": 0.4007046547869097, "grad_norm": 6.510500907897949, "learning_rate": 9.293175439797385e-05, "loss": 0.7151, "step": 5914 }, { "epoch": 0.4007724100548818, "grad_norm": 15.020646095275879, "learning_rate": 9.293038537887603e-05, "loss": 0.9349, "step": 5915 }, { "epoch": 0.4008401653228538, "grad_norm": 6.779942989349365, "learning_rate": 9.292901635977821e-05, "loss": 0.8018, "step": 5916 }, { "epoch": 0.4009079205908259, "grad_norm": 6.3740339279174805, "learning_rate": 9.29276473406804e-05, "loss": 0.7691, "step": 5917 }, { "epoch": 0.400975675858798, "grad_norm": 6.902404308319092, "learning_rate": 9.292627832158259e-05, "loss": 0.7431, "step": 5918 }, { "epoch": 0.4010434311267701, "grad_norm": 7.324024200439453, "learning_rate": 9.292490930248477e-05, "loss": 0.9314, "step": 5919 }, { "epoch": 0.4011111863947422, "grad_norm": 8.297179222106934, "learning_rate": 9.292354028338695e-05, "loss": 1.0072, "step": 5920 }, { "epoch": 0.4011789416627143, "grad_norm": 6.1418914794921875, "learning_rate": 9.292217126428914e-05, "loss": 0.948, "step": 5921 }, { "epoch": 0.4012466969306864, "grad_norm": 8.698518753051758, "learning_rate": 9.292080224519132e-05, "loss": 0.8509, "step": 5922 }, { "epoch": 0.40131445219865847, "grad_norm": 6.628043174743652, "learning_rate": 9.29194332260935e-05, "loss": 0.9388, "step": 5923 }, { "epoch": 0.4013822074666305, "grad_norm": 8.38621711730957, "learning_rate": 9.29180642069957e-05, "loss": 1.0795, "step": 5924 }, { "epoch": 0.4014499627346026, "grad_norm": 7.546327114105225, "learning_rate": 9.291669518789788e-05, "loss": 0.865, "step": 5925 }, { "epoch": 0.4015177180025747, "grad_norm": 7.399687767028809, "learning_rate": 9.291532616880006e-05, "loss": 0.8276, "step": 5926 }, { "epoch": 0.4015854732705468, "grad_norm": 6.921968460083008, "learning_rate": 9.291395714970225e-05, "loss": 0.9275, "step": 5927 }, { "epoch": 0.40165322853851887, "grad_norm": 7.856932640075684, "learning_rate": 9.291258813060443e-05, "loss": 1.1376, "step": 5928 }, { "epoch": 0.40172098380649096, "grad_norm": 7.86414098739624, "learning_rate": 9.291121911150661e-05, "loss": 1.0225, "step": 5929 }, { "epoch": 0.40178873907446305, "grad_norm": 7.028629302978516, "learning_rate": 9.29098500924088e-05, "loss": 1.0157, "step": 5930 }, { "epoch": 0.40185649434243514, "grad_norm": 5.826450347900391, "learning_rate": 9.290848107331099e-05, "loss": 0.8605, "step": 5931 }, { "epoch": 0.40192424961040724, "grad_norm": 6.072175025939941, "learning_rate": 9.290711205421317e-05, "loss": 0.6261, "step": 5932 }, { "epoch": 0.40199200487837927, "grad_norm": 6.829746246337891, "learning_rate": 9.290574303511535e-05, "loss": 0.849, "step": 5933 }, { "epoch": 0.40205976014635136, "grad_norm": 6.809370994567871, "learning_rate": 9.290437401601753e-05, "loss": 0.8659, "step": 5934 }, { "epoch": 0.40212751541432346, "grad_norm": 7.195353984832764, "learning_rate": 9.290300499691971e-05, "loss": 0.9559, "step": 5935 }, { "epoch": 0.40219527068229555, "grad_norm": 8.443836212158203, "learning_rate": 9.29016359778219e-05, "loss": 0.7687, "step": 5936 }, { "epoch": 0.40226302595026764, "grad_norm": 5.932136058807373, "learning_rate": 9.290026695872408e-05, "loss": 0.7125, "step": 5937 }, { "epoch": 0.40233078121823973, "grad_norm": 6.654352188110352, "learning_rate": 9.289889793962626e-05, "loss": 0.9933, "step": 5938 }, { "epoch": 0.4023985364862118, "grad_norm": 7.100243091583252, "learning_rate": 9.289752892052844e-05, "loss": 0.9271, "step": 5939 }, { "epoch": 0.4024662917541839, "grad_norm": 7.78497838973999, "learning_rate": 9.289615990143062e-05, "loss": 0.9358, "step": 5940 }, { "epoch": 0.40253404702215595, "grad_norm": 6.294493198394775, "learning_rate": 9.289479088233282e-05, "loss": 0.8308, "step": 5941 }, { "epoch": 0.40260180229012804, "grad_norm": 6.8807244300842285, "learning_rate": 9.2893421863235e-05, "loss": 0.7728, "step": 5942 }, { "epoch": 0.40266955755810013, "grad_norm": 7.030758857727051, "learning_rate": 9.289205284413718e-05, "loss": 0.8725, "step": 5943 }, { "epoch": 0.4027373128260722, "grad_norm": 6.787613868713379, "learning_rate": 9.289068382503936e-05, "loss": 0.8725, "step": 5944 }, { "epoch": 0.4028050680940443, "grad_norm": 6.4748148918151855, "learning_rate": 9.288931480594155e-05, "loss": 0.9305, "step": 5945 }, { "epoch": 0.4028728233620164, "grad_norm": 5.930908203125, "learning_rate": 9.288794578684373e-05, "loss": 0.814, "step": 5946 }, { "epoch": 0.4029405786299885, "grad_norm": 6.687366008758545, "learning_rate": 9.288657676774591e-05, "loss": 0.7893, "step": 5947 }, { "epoch": 0.4030083338979606, "grad_norm": 9.194374084472656, "learning_rate": 9.28852077486481e-05, "loss": 1.1462, "step": 5948 }, { "epoch": 0.4030760891659326, "grad_norm": 8.393781661987305, "learning_rate": 9.288383872955027e-05, "loss": 0.9375, "step": 5949 }, { "epoch": 0.4031438444339047, "grad_norm": 7.802282810211182, "learning_rate": 9.288246971045247e-05, "loss": 1.1499, "step": 5950 }, { "epoch": 0.4032115997018768, "grad_norm": 7.085485458374023, "learning_rate": 9.288110069135465e-05, "loss": 0.9834, "step": 5951 }, { "epoch": 0.4032793549698489, "grad_norm": 7.905081272125244, "learning_rate": 9.287973167225683e-05, "loss": 1.11, "step": 5952 }, { "epoch": 0.403347110237821, "grad_norm": 5.531884670257568, "learning_rate": 9.287836265315901e-05, "loss": 0.8386, "step": 5953 }, { "epoch": 0.4034148655057931, "grad_norm": 5.959394931793213, "learning_rate": 9.28769936340612e-05, "loss": 0.858, "step": 5954 }, { "epoch": 0.4034826207737652, "grad_norm": 7.020748138427734, "learning_rate": 9.287562461496338e-05, "loss": 0.8708, "step": 5955 }, { "epoch": 0.40355037604173727, "grad_norm": 9.01052474975586, "learning_rate": 9.287425559586556e-05, "loss": 1.0213, "step": 5956 }, { "epoch": 0.4036181313097093, "grad_norm": 7.091532230377197, "learning_rate": 9.287288657676774e-05, "loss": 1.0133, "step": 5957 }, { "epoch": 0.4036858865776814, "grad_norm": 5.7289276123046875, "learning_rate": 9.287151755766992e-05, "loss": 0.6517, "step": 5958 }, { "epoch": 0.4037536418456535, "grad_norm": 5.692935466766357, "learning_rate": 9.287014853857212e-05, "loss": 0.865, "step": 5959 }, { "epoch": 0.4038213971136256, "grad_norm": 7.469212532043457, "learning_rate": 9.28687795194743e-05, "loss": 0.8707, "step": 5960 }, { "epoch": 0.40388915238159767, "grad_norm": 6.172707557678223, "learning_rate": 9.286741050037648e-05, "loss": 0.8217, "step": 5961 }, { "epoch": 0.40395690764956976, "grad_norm": 8.741066932678223, "learning_rate": 9.286604148127866e-05, "loss": 0.8087, "step": 5962 }, { "epoch": 0.40402466291754185, "grad_norm": 6.240641117095947, "learning_rate": 9.286467246218085e-05, "loss": 1.1012, "step": 5963 }, { "epoch": 0.40409241818551395, "grad_norm": 6.801406383514404, "learning_rate": 9.286330344308303e-05, "loss": 0.9335, "step": 5964 }, { "epoch": 0.404160173453486, "grad_norm": 6.114485263824463, "learning_rate": 9.286193442398521e-05, "loss": 0.8987, "step": 5965 }, { "epoch": 0.4042279287214581, "grad_norm": 9.715784072875977, "learning_rate": 9.28605654048874e-05, "loss": 1.0337, "step": 5966 }, { "epoch": 0.40429568398943017, "grad_norm": 10.362801551818848, "learning_rate": 9.285919638578959e-05, "loss": 0.7685, "step": 5967 }, { "epoch": 0.40436343925740226, "grad_norm": 7.567534446716309, "learning_rate": 9.285782736669177e-05, "loss": 0.8397, "step": 5968 }, { "epoch": 0.40443119452537435, "grad_norm": 7.282614231109619, "learning_rate": 9.285645834759395e-05, "loss": 1.0515, "step": 5969 }, { "epoch": 0.40449894979334644, "grad_norm": 6.683037757873535, "learning_rate": 9.285508932849614e-05, "loss": 1.1006, "step": 5970 }, { "epoch": 0.40456670506131853, "grad_norm": 8.437498092651367, "learning_rate": 9.285372030939832e-05, "loss": 1.0656, "step": 5971 }, { "epoch": 0.4046344603292906, "grad_norm": 6.153039932250977, "learning_rate": 9.28523512903005e-05, "loss": 0.8285, "step": 5972 }, { "epoch": 0.40470221559726266, "grad_norm": 6.3334221839904785, "learning_rate": 9.28509822712027e-05, "loss": 0.8968, "step": 5973 }, { "epoch": 0.40476997086523475, "grad_norm": 9.119121551513672, "learning_rate": 9.284961325210488e-05, "loss": 0.8814, "step": 5974 }, { "epoch": 0.40483772613320684, "grad_norm": 7.621852397918701, "learning_rate": 9.284824423300706e-05, "loss": 0.9297, "step": 5975 }, { "epoch": 0.40490548140117893, "grad_norm": 7.415964126586914, "learning_rate": 9.284687521390924e-05, "loss": 0.8597, "step": 5976 }, { "epoch": 0.404973236669151, "grad_norm": 6.424054145812988, "learning_rate": 9.284550619481143e-05, "loss": 0.6792, "step": 5977 }, { "epoch": 0.4050409919371231, "grad_norm": 7.875925540924072, "learning_rate": 9.284413717571361e-05, "loss": 0.7931, "step": 5978 }, { "epoch": 0.4051087472050952, "grad_norm": 5.702389240264893, "learning_rate": 9.284276815661579e-05, "loss": 0.7124, "step": 5979 }, { "epoch": 0.4051765024730673, "grad_norm": 6.58071231842041, "learning_rate": 9.284139913751797e-05, "loss": 0.8965, "step": 5980 }, { "epoch": 0.4052442577410394, "grad_norm": 8.180785179138184, "learning_rate": 9.284003011842015e-05, "loss": 1.0545, "step": 5981 }, { "epoch": 0.40531201300901143, "grad_norm": 7.393392562866211, "learning_rate": 9.283866109932235e-05, "loss": 0.8044, "step": 5982 }, { "epoch": 0.4053797682769835, "grad_norm": 6.009011745452881, "learning_rate": 9.283729208022453e-05, "loss": 0.7688, "step": 5983 }, { "epoch": 0.4054475235449556, "grad_norm": 6.8165388107299805, "learning_rate": 9.283592306112671e-05, "loss": 0.7743, "step": 5984 }, { "epoch": 0.4055152788129277, "grad_norm": 7.851406574249268, "learning_rate": 9.283455404202889e-05, "loss": 0.9081, "step": 5985 }, { "epoch": 0.4055830340808998, "grad_norm": 7.138257026672363, "learning_rate": 9.283318502293108e-05, "loss": 0.9843, "step": 5986 }, { "epoch": 0.4056507893488719, "grad_norm": 7.70706033706665, "learning_rate": 9.283181600383326e-05, "loss": 0.9158, "step": 5987 }, { "epoch": 0.405718544616844, "grad_norm": 7.948725700378418, "learning_rate": 9.283044698473544e-05, "loss": 0.8694, "step": 5988 }, { "epoch": 0.40578629988481607, "grad_norm": 6.4174628257751465, "learning_rate": 9.282907796563762e-05, "loss": 0.8547, "step": 5989 }, { "epoch": 0.4058540551527881, "grad_norm": 7.62558126449585, "learning_rate": 9.28277089465398e-05, "loss": 0.7978, "step": 5990 }, { "epoch": 0.4059218104207602, "grad_norm": 7.624577522277832, "learning_rate": 9.2826339927442e-05, "loss": 1.0346, "step": 5991 }, { "epoch": 0.4059895656887323, "grad_norm": 7.392852783203125, "learning_rate": 9.282497090834418e-05, "loss": 0.8833, "step": 5992 }, { "epoch": 0.4060573209567044, "grad_norm": 7.652538776397705, "learning_rate": 9.282360188924636e-05, "loss": 1.0438, "step": 5993 }, { "epoch": 0.4061250762246765, "grad_norm": 7.050436973571777, "learning_rate": 9.282223287014854e-05, "loss": 0.8291, "step": 5994 }, { "epoch": 0.40619283149264857, "grad_norm": 7.119441032409668, "learning_rate": 9.282086385105072e-05, "loss": 0.6427, "step": 5995 }, { "epoch": 0.40626058676062066, "grad_norm": 6.1112565994262695, "learning_rate": 9.281949483195291e-05, "loss": 0.7737, "step": 5996 }, { "epoch": 0.40632834202859275, "grad_norm": 6.173165321350098, "learning_rate": 9.28181258128551e-05, "loss": 0.8099, "step": 5997 }, { "epoch": 0.4063960972965648, "grad_norm": 6.372697830200195, "learning_rate": 9.281675679375727e-05, "loss": 0.7902, "step": 5998 }, { "epoch": 0.4064638525645369, "grad_norm": 7.213540077209473, "learning_rate": 9.281538777465945e-05, "loss": 0.7273, "step": 5999 }, { "epoch": 0.40653160783250897, "grad_norm": 8.106986999511719, "learning_rate": 9.281401875556165e-05, "loss": 0.7516, "step": 6000 }, { "epoch": 0.40659936310048106, "grad_norm": 8.372703552246094, "learning_rate": 9.281264973646383e-05, "loss": 1.1737, "step": 6001 }, { "epoch": 0.40666711836845315, "grad_norm": 9.540267944335938, "learning_rate": 9.281128071736601e-05, "loss": 1.068, "step": 6002 }, { "epoch": 0.40673487363642524, "grad_norm": 6.878968715667725, "learning_rate": 9.280991169826819e-05, "loss": 0.9329, "step": 6003 }, { "epoch": 0.40680262890439733, "grad_norm": 6.826279163360596, "learning_rate": 9.280854267917037e-05, "loss": 0.8267, "step": 6004 }, { "epoch": 0.4068703841723694, "grad_norm": 7.155866622924805, "learning_rate": 9.280717366007256e-05, "loss": 0.9133, "step": 6005 }, { "epoch": 0.40693813944034146, "grad_norm": 6.883568286895752, "learning_rate": 9.280580464097474e-05, "loss": 0.709, "step": 6006 }, { "epoch": 0.40700589470831355, "grad_norm": 6.944139003753662, "learning_rate": 9.280443562187692e-05, "loss": 0.8137, "step": 6007 }, { "epoch": 0.40707364997628565, "grad_norm": 5.899077892303467, "learning_rate": 9.28030666027791e-05, "loss": 0.733, "step": 6008 }, { "epoch": 0.40714140524425774, "grad_norm": 7.213099956512451, "learning_rate": 9.28016975836813e-05, "loss": 1.1728, "step": 6009 }, { "epoch": 0.40720916051222983, "grad_norm": 7.830915927886963, "learning_rate": 9.280032856458348e-05, "loss": 0.7465, "step": 6010 }, { "epoch": 0.4072769157802019, "grad_norm": 7.609717845916748, "learning_rate": 9.279895954548566e-05, "loss": 1.2036, "step": 6011 }, { "epoch": 0.407344671048174, "grad_norm": 8.978927612304688, "learning_rate": 9.279759052638784e-05, "loss": 0.8903, "step": 6012 }, { "epoch": 0.4074124263161461, "grad_norm": 8.331847190856934, "learning_rate": 9.279622150729003e-05, "loss": 1.0853, "step": 6013 }, { "epoch": 0.40748018158411814, "grad_norm": 6.639584541320801, "learning_rate": 9.279485248819221e-05, "loss": 0.9113, "step": 6014 }, { "epoch": 0.40754793685209023, "grad_norm": 6.87017822265625, "learning_rate": 9.27934834690944e-05, "loss": 0.748, "step": 6015 }, { "epoch": 0.4076156921200623, "grad_norm": 6.675489902496338, "learning_rate": 9.279211444999659e-05, "loss": 0.8002, "step": 6016 }, { "epoch": 0.4076834473880344, "grad_norm": 7.666563034057617, "learning_rate": 9.279074543089877e-05, "loss": 0.9973, "step": 6017 }, { "epoch": 0.4077512026560065, "grad_norm": 7.878670692443848, "learning_rate": 9.278937641180095e-05, "loss": 1.0647, "step": 6018 }, { "epoch": 0.4078189579239786, "grad_norm": 6.263443946838379, "learning_rate": 9.278800739270314e-05, "loss": 0.8827, "step": 6019 }, { "epoch": 0.4078867131919507, "grad_norm": 9.69717788696289, "learning_rate": 9.278663837360532e-05, "loss": 0.728, "step": 6020 }, { "epoch": 0.4079544684599228, "grad_norm": 8.236658096313477, "learning_rate": 9.27852693545075e-05, "loss": 0.78, "step": 6021 }, { "epoch": 0.4080222237278948, "grad_norm": 6.9468512535095215, "learning_rate": 9.278390033540968e-05, "loss": 0.7674, "step": 6022 }, { "epoch": 0.4080899789958669, "grad_norm": 8.783222198486328, "learning_rate": 9.278253131631188e-05, "loss": 0.9062, "step": 6023 }, { "epoch": 0.408157734263839, "grad_norm": 6.449438571929932, "learning_rate": 9.278116229721406e-05, "loss": 0.9683, "step": 6024 }, { "epoch": 0.4082254895318111, "grad_norm": 6.185464382171631, "learning_rate": 9.277979327811624e-05, "loss": 0.8899, "step": 6025 }, { "epoch": 0.4082932447997832, "grad_norm": 6.422550678253174, "learning_rate": 9.277842425901842e-05, "loss": 0.8765, "step": 6026 }, { "epoch": 0.4083610000677553, "grad_norm": 6.8761372566223145, "learning_rate": 9.27770552399206e-05, "loss": 0.7326, "step": 6027 }, { "epoch": 0.40842875533572737, "grad_norm": 5.4224162101745605, "learning_rate": 9.277568622082279e-05, "loss": 0.7882, "step": 6028 }, { "epoch": 0.40849651060369946, "grad_norm": 6.6288275718688965, "learning_rate": 9.277431720172497e-05, "loss": 0.936, "step": 6029 }, { "epoch": 0.4085642658716715, "grad_norm": 7.305326461791992, "learning_rate": 9.277294818262715e-05, "loss": 0.8254, "step": 6030 }, { "epoch": 0.4086320211396436, "grad_norm": 6.697827339172363, "learning_rate": 9.277157916352933e-05, "loss": 0.9365, "step": 6031 }, { "epoch": 0.4086997764076157, "grad_norm": 8.074564933776855, "learning_rate": 9.277021014443153e-05, "loss": 1.0454, "step": 6032 }, { "epoch": 0.40876753167558777, "grad_norm": 7.539030075073242, "learning_rate": 9.276884112533371e-05, "loss": 0.9275, "step": 6033 }, { "epoch": 0.40883528694355986, "grad_norm": 7.1338582038879395, "learning_rate": 9.276747210623589e-05, "loss": 0.8395, "step": 6034 }, { "epoch": 0.40890304221153195, "grad_norm": 8.182660102844238, "learning_rate": 9.276610308713807e-05, "loss": 1.1907, "step": 6035 }, { "epoch": 0.40897079747950404, "grad_norm": 6.1452813148498535, "learning_rate": 9.276473406804025e-05, "loss": 0.6271, "step": 6036 }, { "epoch": 0.40903855274747614, "grad_norm": 7.247335433959961, "learning_rate": 9.276336504894244e-05, "loss": 0.8665, "step": 6037 }, { "epoch": 0.40910630801544823, "grad_norm": 8.27696418762207, "learning_rate": 9.276199602984462e-05, "loss": 0.968, "step": 6038 }, { "epoch": 0.40917406328342026, "grad_norm": 6.888766288757324, "learning_rate": 9.27606270107468e-05, "loss": 0.8212, "step": 6039 }, { "epoch": 0.40924181855139236, "grad_norm": 6.885414123535156, "learning_rate": 9.275925799164898e-05, "loss": 0.8978, "step": 6040 }, { "epoch": 0.40930957381936445, "grad_norm": 7.307199478149414, "learning_rate": 9.275788897255118e-05, "loss": 0.9602, "step": 6041 }, { "epoch": 0.40937732908733654, "grad_norm": 6.921801567077637, "learning_rate": 9.275651995345336e-05, "loss": 0.863, "step": 6042 }, { "epoch": 0.40944508435530863, "grad_norm": 6.910282611846924, "learning_rate": 9.275515093435554e-05, "loss": 0.8481, "step": 6043 }, { "epoch": 0.4095128396232807, "grad_norm": 7.98552131652832, "learning_rate": 9.275378191525772e-05, "loss": 0.9096, "step": 6044 }, { "epoch": 0.4095805948912528, "grad_norm": 9.74826431274414, "learning_rate": 9.27524128961599e-05, "loss": 0.867, "step": 6045 }, { "epoch": 0.4096483501592249, "grad_norm": 10.083016395568848, "learning_rate": 9.275104387706209e-05, "loss": 0.9664, "step": 6046 }, { "epoch": 0.40971610542719694, "grad_norm": 8.351798057556152, "learning_rate": 9.274967485796427e-05, "loss": 0.8164, "step": 6047 }, { "epoch": 0.40978386069516903, "grad_norm": 7.7515106201171875, "learning_rate": 9.274830583886645e-05, "loss": 0.9784, "step": 6048 }, { "epoch": 0.4098516159631411, "grad_norm": 6.339774131774902, "learning_rate": 9.274693681976863e-05, "loss": 0.7614, "step": 6049 }, { "epoch": 0.4099193712311132, "grad_norm": 8.455992698669434, "learning_rate": 9.274556780067081e-05, "loss": 0.989, "step": 6050 }, { "epoch": 0.4099871264990853, "grad_norm": 9.367591857910156, "learning_rate": 9.274419878157301e-05, "loss": 1.0155, "step": 6051 }, { "epoch": 0.4100548817670574, "grad_norm": 8.611092567443848, "learning_rate": 9.274282976247519e-05, "loss": 0.9541, "step": 6052 }, { "epoch": 0.4101226370350295, "grad_norm": 8.239481925964355, "learning_rate": 9.274146074337737e-05, "loss": 0.9511, "step": 6053 }, { "epoch": 0.4101903923030016, "grad_norm": 7.31620979309082, "learning_rate": 9.274009172427955e-05, "loss": 1.0126, "step": 6054 }, { "epoch": 0.4102581475709736, "grad_norm": 6.814750671386719, "learning_rate": 9.273872270518174e-05, "loss": 0.9463, "step": 6055 }, { "epoch": 0.4103259028389457, "grad_norm": 7.000329494476318, "learning_rate": 9.273735368608392e-05, "loss": 1.0664, "step": 6056 }, { "epoch": 0.4103936581069178, "grad_norm": 6.309933662414551, "learning_rate": 9.27359846669861e-05, "loss": 0.8605, "step": 6057 }, { "epoch": 0.4104614133748899, "grad_norm": 8.13158893585205, "learning_rate": 9.273461564788828e-05, "loss": 0.9158, "step": 6058 }, { "epoch": 0.410529168642862, "grad_norm": 8.334741592407227, "learning_rate": 9.273324662879048e-05, "loss": 0.8699, "step": 6059 }, { "epoch": 0.4105969239108341, "grad_norm": 8.019463539123535, "learning_rate": 9.273187760969266e-05, "loss": 1.0366, "step": 6060 }, { "epoch": 0.41066467917880617, "grad_norm": 6.104535102844238, "learning_rate": 9.273050859059484e-05, "loss": 0.8273, "step": 6061 }, { "epoch": 0.41073243444677826, "grad_norm": 8.345130920410156, "learning_rate": 9.272913957149703e-05, "loss": 1.3653, "step": 6062 }, { "epoch": 0.4108001897147503, "grad_norm": 8.028311729431152, "learning_rate": 9.272777055239921e-05, "loss": 1.096, "step": 6063 }, { "epoch": 0.4108679449827224, "grad_norm": 7.079861164093018, "learning_rate": 9.272640153330139e-05, "loss": 1.0157, "step": 6064 }, { "epoch": 0.4109357002506945, "grad_norm": 8.497052192687988, "learning_rate": 9.272503251420359e-05, "loss": 0.9972, "step": 6065 }, { "epoch": 0.41100345551866657, "grad_norm": 8.042581558227539, "learning_rate": 9.272366349510577e-05, "loss": 0.908, "step": 6066 }, { "epoch": 0.41107121078663866, "grad_norm": 6.567187786102295, "learning_rate": 9.272229447600795e-05, "loss": 0.7992, "step": 6067 }, { "epoch": 0.41113896605461075, "grad_norm": 5.78397274017334, "learning_rate": 9.272092545691013e-05, "loss": 0.773, "step": 6068 }, { "epoch": 0.41120672132258285, "grad_norm": 8.116515159606934, "learning_rate": 9.271955643781232e-05, "loss": 0.8101, "step": 6069 }, { "epoch": 0.41127447659055494, "grad_norm": 7.628951549530029, "learning_rate": 9.27181874187145e-05, "loss": 0.9282, "step": 6070 }, { "epoch": 0.411342231858527, "grad_norm": 7.830180644989014, "learning_rate": 9.271681839961668e-05, "loss": 0.8681, "step": 6071 }, { "epoch": 0.41140998712649907, "grad_norm": 7.690285682678223, "learning_rate": 9.271544938051886e-05, "loss": 0.9064, "step": 6072 }, { "epoch": 0.41147774239447116, "grad_norm": 7.585133075714111, "learning_rate": 9.271408036142106e-05, "loss": 0.8305, "step": 6073 }, { "epoch": 0.41154549766244325, "grad_norm": 6.18320894241333, "learning_rate": 9.271271134232324e-05, "loss": 1.0395, "step": 6074 }, { "epoch": 0.41161325293041534, "grad_norm": 8.301353454589844, "learning_rate": 9.271134232322542e-05, "loss": 1.1528, "step": 6075 }, { "epoch": 0.41168100819838743, "grad_norm": 6.261874198913574, "learning_rate": 9.27099733041276e-05, "loss": 0.7802, "step": 6076 }, { "epoch": 0.4117487634663595, "grad_norm": 7.161525726318359, "learning_rate": 9.270860428502978e-05, "loss": 1.1292, "step": 6077 }, { "epoch": 0.4118165187343316, "grad_norm": 6.522838115692139, "learning_rate": 9.270723526593197e-05, "loss": 0.8013, "step": 6078 }, { "epoch": 0.41188427400230365, "grad_norm": 6.919328212738037, "learning_rate": 9.270586624683415e-05, "loss": 0.8172, "step": 6079 }, { "epoch": 0.41195202927027574, "grad_norm": 6.236283302307129, "learning_rate": 9.270449722773633e-05, "loss": 0.9677, "step": 6080 }, { "epoch": 0.41201978453824784, "grad_norm": 5.593216896057129, "learning_rate": 9.270312820863851e-05, "loss": 0.922, "step": 6081 }, { "epoch": 0.4120875398062199, "grad_norm": 8.651509284973145, "learning_rate": 9.27017591895407e-05, "loss": 0.9209, "step": 6082 }, { "epoch": 0.412155295074192, "grad_norm": 6.787774085998535, "learning_rate": 9.270039017044289e-05, "loss": 1.0991, "step": 6083 }, { "epoch": 0.4122230503421641, "grad_norm": 6.774304389953613, "learning_rate": 9.269902115134507e-05, "loss": 0.7265, "step": 6084 }, { "epoch": 0.4122908056101362, "grad_norm": 7.097721099853516, "learning_rate": 9.269765213224725e-05, "loss": 1.0353, "step": 6085 }, { "epoch": 0.4123585608781083, "grad_norm": 8.65166187286377, "learning_rate": 9.269628311314943e-05, "loss": 1.0157, "step": 6086 }, { "epoch": 0.4124263161460804, "grad_norm": 5.778721809387207, "learning_rate": 9.269491409405162e-05, "loss": 0.8381, "step": 6087 }, { "epoch": 0.4124940714140524, "grad_norm": 6.297547340393066, "learning_rate": 9.26935450749538e-05, "loss": 0.9421, "step": 6088 }, { "epoch": 0.4125618266820245, "grad_norm": 7.8641462326049805, "learning_rate": 9.269217605585598e-05, "loss": 0.9987, "step": 6089 }, { "epoch": 0.4126295819499966, "grad_norm": 7.222883701324463, "learning_rate": 9.269080703675816e-05, "loss": 0.7314, "step": 6090 }, { "epoch": 0.4126973372179687, "grad_norm": 7.291220188140869, "learning_rate": 9.268943801766034e-05, "loss": 0.9199, "step": 6091 }, { "epoch": 0.4127650924859408, "grad_norm": 7.00157356262207, "learning_rate": 9.268806899856254e-05, "loss": 0.8965, "step": 6092 }, { "epoch": 0.4128328477539129, "grad_norm": 8.229329109191895, "learning_rate": 9.268669997946472e-05, "loss": 1.0392, "step": 6093 }, { "epoch": 0.41290060302188497, "grad_norm": 6.9192633628845215, "learning_rate": 9.26853309603669e-05, "loss": 0.8987, "step": 6094 }, { "epoch": 0.41296835828985706, "grad_norm": 6.3700761795043945, "learning_rate": 9.268396194126908e-05, "loss": 0.7713, "step": 6095 }, { "epoch": 0.4130361135578291, "grad_norm": 5.654745101928711, "learning_rate": 9.268259292217127e-05, "loss": 0.6733, "step": 6096 }, { "epoch": 0.4131038688258012, "grad_norm": 6.898359298706055, "learning_rate": 9.268122390307345e-05, "loss": 0.9611, "step": 6097 }, { "epoch": 0.4131716240937733, "grad_norm": 6.53093147277832, "learning_rate": 9.267985488397563e-05, "loss": 0.795, "step": 6098 }, { "epoch": 0.4132393793617454, "grad_norm": 8.565315246582031, "learning_rate": 9.267848586487781e-05, "loss": 0.7333, "step": 6099 }, { "epoch": 0.41330713462971747, "grad_norm": 7.8006744384765625, "learning_rate": 9.267711684578e-05, "loss": 0.9579, "step": 6100 }, { "epoch": 0.41337488989768956, "grad_norm": 7.270709037780762, "learning_rate": 9.267574782668219e-05, "loss": 0.9105, "step": 6101 }, { "epoch": 0.41344264516566165, "grad_norm": 7.98935079574585, "learning_rate": 9.267437880758437e-05, "loss": 0.9697, "step": 6102 }, { "epoch": 0.41351040043363374, "grad_norm": 6.606309413909912, "learning_rate": 9.267300978848655e-05, "loss": 0.8649, "step": 6103 }, { "epoch": 0.4135781557016058, "grad_norm": 7.456545352935791, "learning_rate": 9.267164076938873e-05, "loss": 1.0884, "step": 6104 }, { "epoch": 0.41364591096957787, "grad_norm": 6.820968151092529, "learning_rate": 9.267027175029092e-05, "loss": 0.6651, "step": 6105 }, { "epoch": 0.41371366623754996, "grad_norm": 7.569576740264893, "learning_rate": 9.26689027311931e-05, "loss": 1.024, "step": 6106 }, { "epoch": 0.41378142150552205, "grad_norm": 7.982132911682129, "learning_rate": 9.266753371209528e-05, "loss": 0.9111, "step": 6107 }, { "epoch": 0.41384917677349414, "grad_norm": 7.610587120056152, "learning_rate": 9.266616469299748e-05, "loss": 0.9005, "step": 6108 }, { "epoch": 0.41391693204146623, "grad_norm": 6.773017883300781, "learning_rate": 9.266479567389966e-05, "loss": 0.6696, "step": 6109 }, { "epoch": 0.4139846873094383, "grad_norm": 7.702723979949951, "learning_rate": 9.266342665480184e-05, "loss": 0.7951, "step": 6110 }, { "epoch": 0.4140524425774104, "grad_norm": 7.299111843109131, "learning_rate": 9.266205763570403e-05, "loss": 1.0422, "step": 6111 }, { "epoch": 0.41412019784538245, "grad_norm": 5.148745536804199, "learning_rate": 9.266068861660621e-05, "loss": 0.6821, "step": 6112 }, { "epoch": 0.41418795311335455, "grad_norm": 6.768344402313232, "learning_rate": 9.265931959750839e-05, "loss": 0.8119, "step": 6113 }, { "epoch": 0.41425570838132664, "grad_norm": 6.882130146026611, "learning_rate": 9.265795057841057e-05, "loss": 0.9931, "step": 6114 }, { "epoch": 0.41432346364929873, "grad_norm": 7.292698383331299, "learning_rate": 9.265658155931277e-05, "loss": 1.0415, "step": 6115 }, { "epoch": 0.4143912189172708, "grad_norm": 7.241159915924072, "learning_rate": 9.265521254021495e-05, "loss": 0.9122, "step": 6116 }, { "epoch": 0.4144589741852429, "grad_norm": 6.7898736000061035, "learning_rate": 9.265384352111713e-05, "loss": 0.9399, "step": 6117 }, { "epoch": 0.414526729453215, "grad_norm": 6.505312919616699, "learning_rate": 9.265247450201931e-05, "loss": 0.9463, "step": 6118 }, { "epoch": 0.4145944847211871, "grad_norm": 5.761348724365234, "learning_rate": 9.26511054829215e-05, "loss": 0.8365, "step": 6119 }, { "epoch": 0.41466223998915913, "grad_norm": 6.377706527709961, "learning_rate": 9.264973646382368e-05, "loss": 0.7451, "step": 6120 }, { "epoch": 0.4147299952571312, "grad_norm": 8.465597152709961, "learning_rate": 9.264836744472586e-05, "loss": 0.8582, "step": 6121 }, { "epoch": 0.4147977505251033, "grad_norm": 6.3482770919799805, "learning_rate": 9.264699842562804e-05, "loss": 0.8291, "step": 6122 }, { "epoch": 0.4148655057930754, "grad_norm": 7.984723091125488, "learning_rate": 9.264562940653022e-05, "loss": 0.9641, "step": 6123 }, { "epoch": 0.4149332610610475, "grad_norm": 6.695097923278809, "learning_rate": 9.264426038743242e-05, "loss": 0.7661, "step": 6124 }, { "epoch": 0.4150010163290196, "grad_norm": 6.403726100921631, "learning_rate": 9.26428913683346e-05, "loss": 0.6342, "step": 6125 }, { "epoch": 0.4150687715969917, "grad_norm": 5.906423091888428, "learning_rate": 9.264152234923678e-05, "loss": 0.9588, "step": 6126 }, { "epoch": 0.4151365268649638, "grad_norm": 6.482539653778076, "learning_rate": 9.264015333013896e-05, "loss": 1.0244, "step": 6127 }, { "epoch": 0.4152042821329358, "grad_norm": 7.783926486968994, "learning_rate": 9.263878431104114e-05, "loss": 0.8611, "step": 6128 }, { "epoch": 0.4152720374009079, "grad_norm": 8.377721786499023, "learning_rate": 9.263741529194333e-05, "loss": 0.9757, "step": 6129 }, { "epoch": 0.41533979266888, "grad_norm": 8.719101905822754, "learning_rate": 9.263604627284551e-05, "loss": 1.0766, "step": 6130 }, { "epoch": 0.4154075479368521, "grad_norm": 6.0139641761779785, "learning_rate": 9.263467725374769e-05, "loss": 0.8036, "step": 6131 }, { "epoch": 0.4154753032048242, "grad_norm": 8.683677673339844, "learning_rate": 9.263330823464987e-05, "loss": 0.9476, "step": 6132 }, { "epoch": 0.41554305847279627, "grad_norm": 7.566380977630615, "learning_rate": 9.263193921555207e-05, "loss": 1.0983, "step": 6133 }, { "epoch": 0.41561081374076836, "grad_norm": 6.716690540313721, "learning_rate": 9.263057019645425e-05, "loss": 0.8803, "step": 6134 }, { "epoch": 0.41567856900874045, "grad_norm": 6.123441696166992, "learning_rate": 9.262920117735643e-05, "loss": 0.7158, "step": 6135 }, { "epoch": 0.4157463242767125, "grad_norm": 8.015522956848145, "learning_rate": 9.262783215825861e-05, "loss": 1.0269, "step": 6136 }, { "epoch": 0.4158140795446846, "grad_norm": 5.564431667327881, "learning_rate": 9.262646313916079e-05, "loss": 0.8309, "step": 6137 }, { "epoch": 0.41588183481265667, "grad_norm": 7.946048736572266, "learning_rate": 9.262509412006298e-05, "loss": 0.9334, "step": 6138 }, { "epoch": 0.41594959008062876, "grad_norm": 6.741854667663574, "learning_rate": 9.262372510096516e-05, "loss": 0.6461, "step": 6139 }, { "epoch": 0.41601734534860085, "grad_norm": 8.229652404785156, "learning_rate": 9.262235608186734e-05, "loss": 0.9046, "step": 6140 }, { "epoch": 0.41608510061657294, "grad_norm": 5.590304851531982, "learning_rate": 9.262098706276952e-05, "loss": 0.6979, "step": 6141 }, { "epoch": 0.41615285588454504, "grad_norm": 6.79884672164917, "learning_rate": 9.261961804367172e-05, "loss": 0.853, "step": 6142 }, { "epoch": 0.41622061115251713, "grad_norm": 7.175544738769531, "learning_rate": 9.26182490245739e-05, "loss": 0.9265, "step": 6143 }, { "epoch": 0.4162883664204892, "grad_norm": 8.839093208312988, "learning_rate": 9.261688000547608e-05, "loss": 0.9541, "step": 6144 }, { "epoch": 0.41635612168846126, "grad_norm": 6.818619728088379, "learning_rate": 9.261551098637826e-05, "loss": 0.6284, "step": 6145 }, { "epoch": 0.41642387695643335, "grad_norm": 7.31305456161499, "learning_rate": 9.261414196728044e-05, "loss": 0.797, "step": 6146 }, { "epoch": 0.41649163222440544, "grad_norm": 8.566871643066406, "learning_rate": 9.261277294818263e-05, "loss": 1.0893, "step": 6147 }, { "epoch": 0.41655938749237753, "grad_norm": 6.028139114379883, "learning_rate": 9.261140392908481e-05, "loss": 0.781, "step": 6148 }, { "epoch": 0.4166271427603496, "grad_norm": 7.383317947387695, "learning_rate": 9.261003490998699e-05, "loss": 0.5846, "step": 6149 }, { "epoch": 0.4166948980283217, "grad_norm": 6.392228126525879, "learning_rate": 9.260866589088917e-05, "loss": 0.9403, "step": 6150 }, { "epoch": 0.4167626532962938, "grad_norm": 6.1830644607543945, "learning_rate": 9.260729687179137e-05, "loss": 0.9502, "step": 6151 }, { "epoch": 0.4168304085642659, "grad_norm": 6.851447582244873, "learning_rate": 9.260592785269355e-05, "loss": 0.8042, "step": 6152 }, { "epoch": 0.41689816383223793, "grad_norm": 5.995123386383057, "learning_rate": 9.260455883359573e-05, "loss": 0.8875, "step": 6153 }, { "epoch": 0.41696591910021, "grad_norm": 6.834797382354736, "learning_rate": 9.260318981449791e-05, "loss": 0.8652, "step": 6154 }, { "epoch": 0.4170336743681821, "grad_norm": 6.0200371742248535, "learning_rate": 9.26018207954001e-05, "loss": 0.9198, "step": 6155 }, { "epoch": 0.4171014296361542, "grad_norm": 7.138192176818848, "learning_rate": 9.260045177630228e-05, "loss": 0.8284, "step": 6156 }, { "epoch": 0.4171691849041263, "grad_norm": 7.5531229972839355, "learning_rate": 9.259908275720446e-05, "loss": 0.7188, "step": 6157 }, { "epoch": 0.4172369401720984, "grad_norm": 8.347415924072266, "learning_rate": 9.259771373810666e-05, "loss": 1.1853, "step": 6158 }, { "epoch": 0.4173046954400705, "grad_norm": 8.188237190246582, "learning_rate": 9.259634471900884e-05, "loss": 0.7712, "step": 6159 }, { "epoch": 0.4173724507080426, "grad_norm": 7.238736629486084, "learning_rate": 9.259497569991102e-05, "loss": 0.7731, "step": 6160 }, { "epoch": 0.4174402059760146, "grad_norm": 8.175471305847168, "learning_rate": 9.259360668081321e-05, "loss": 1.0769, "step": 6161 }, { "epoch": 0.4175079612439867, "grad_norm": 6.97186803817749, "learning_rate": 9.259223766171539e-05, "loss": 0.7469, "step": 6162 }, { "epoch": 0.4175757165119588, "grad_norm": 5.761664390563965, "learning_rate": 9.259086864261757e-05, "loss": 0.8716, "step": 6163 }, { "epoch": 0.4176434717799309, "grad_norm": 8.567249298095703, "learning_rate": 9.258949962351975e-05, "loss": 0.8376, "step": 6164 }, { "epoch": 0.417711227047903, "grad_norm": 5.827561378479004, "learning_rate": 9.258813060442195e-05, "loss": 0.8468, "step": 6165 }, { "epoch": 0.41777898231587507, "grad_norm": 7.336645603179932, "learning_rate": 9.258676158532413e-05, "loss": 0.8529, "step": 6166 }, { "epoch": 0.41784673758384716, "grad_norm": 7.711108207702637, "learning_rate": 9.258539256622631e-05, "loss": 0.8802, "step": 6167 }, { "epoch": 0.41791449285181925, "grad_norm": 8.625036239624023, "learning_rate": 9.258402354712849e-05, "loss": 1.0263, "step": 6168 }, { "epoch": 0.4179822481197913, "grad_norm": 7.005527019500732, "learning_rate": 9.258265452803067e-05, "loss": 0.9728, "step": 6169 }, { "epoch": 0.4180500033877634, "grad_norm": 6.906123161315918, "learning_rate": 9.258128550893286e-05, "loss": 0.8958, "step": 6170 }, { "epoch": 0.41811775865573547, "grad_norm": 7.147536754608154, "learning_rate": 9.257991648983504e-05, "loss": 1.1466, "step": 6171 }, { "epoch": 0.41818551392370756, "grad_norm": 5.8256001472473145, "learning_rate": 9.257854747073722e-05, "loss": 0.7189, "step": 6172 }, { "epoch": 0.41825326919167966, "grad_norm": 5.144516944885254, "learning_rate": 9.25771784516394e-05, "loss": 0.6368, "step": 6173 }, { "epoch": 0.41832102445965175, "grad_norm": 7.161872863769531, "learning_rate": 9.25758094325416e-05, "loss": 1.0074, "step": 6174 }, { "epoch": 0.41838877972762384, "grad_norm": 9.098782539367676, "learning_rate": 9.257444041344378e-05, "loss": 0.9261, "step": 6175 }, { "epoch": 0.41845653499559593, "grad_norm": 7.566091060638428, "learning_rate": 9.257307139434596e-05, "loss": 0.8845, "step": 6176 }, { "epoch": 0.41852429026356797, "grad_norm": 8.618456840515137, "learning_rate": 9.257170237524814e-05, "loss": 0.8643, "step": 6177 }, { "epoch": 0.41859204553154006, "grad_norm": 8.870187759399414, "learning_rate": 9.257033335615032e-05, "loss": 1.1915, "step": 6178 }, { "epoch": 0.41865980079951215, "grad_norm": 6.494687557220459, "learning_rate": 9.256896433705251e-05, "loss": 0.6523, "step": 6179 }, { "epoch": 0.41872755606748424, "grad_norm": 7.417816638946533, "learning_rate": 9.256759531795469e-05, "loss": 0.9454, "step": 6180 }, { "epoch": 0.41879531133545633, "grad_norm": 11.310504913330078, "learning_rate": 9.256622629885687e-05, "loss": 1.2984, "step": 6181 }, { "epoch": 0.4188630666034284, "grad_norm": 6.3585615158081055, "learning_rate": 9.256485727975905e-05, "loss": 0.8379, "step": 6182 }, { "epoch": 0.4189308218714005, "grad_norm": 8.134123802185059, "learning_rate": 9.256348826066123e-05, "loss": 0.9962, "step": 6183 }, { "epoch": 0.4189985771393726, "grad_norm": 5.962007999420166, "learning_rate": 9.256211924156343e-05, "loss": 0.8923, "step": 6184 }, { "epoch": 0.41906633240734464, "grad_norm": 6.423482894897461, "learning_rate": 9.256075022246561e-05, "loss": 0.7991, "step": 6185 }, { "epoch": 0.41913408767531674, "grad_norm": 6.406213283538818, "learning_rate": 9.255938120336779e-05, "loss": 0.9454, "step": 6186 }, { "epoch": 0.4192018429432888, "grad_norm": 4.926299571990967, "learning_rate": 9.255801218426997e-05, "loss": 0.6442, "step": 6187 }, { "epoch": 0.4192695982112609, "grad_norm": 6.160318374633789, "learning_rate": 9.255664316517216e-05, "loss": 0.8575, "step": 6188 }, { "epoch": 0.419337353479233, "grad_norm": 9.17479133605957, "learning_rate": 9.255527414607434e-05, "loss": 1.2344, "step": 6189 }, { "epoch": 0.4194051087472051, "grad_norm": 8.95871639251709, "learning_rate": 9.255390512697652e-05, "loss": 0.9193, "step": 6190 }, { "epoch": 0.4194728640151772, "grad_norm": 5.636984825134277, "learning_rate": 9.25525361078787e-05, "loss": 0.725, "step": 6191 }, { "epoch": 0.4195406192831493, "grad_norm": 6.044233798980713, "learning_rate": 9.255116708878088e-05, "loss": 0.7526, "step": 6192 }, { "epoch": 0.4196083745511214, "grad_norm": 6.230378150939941, "learning_rate": 9.254979806968308e-05, "loss": 0.7923, "step": 6193 }, { "epoch": 0.4196761298190934, "grad_norm": 8.391528129577637, "learning_rate": 9.254842905058526e-05, "loss": 1.0368, "step": 6194 }, { "epoch": 0.4197438850870655, "grad_norm": 7.150004863739014, "learning_rate": 9.254706003148744e-05, "loss": 0.9087, "step": 6195 }, { "epoch": 0.4198116403550376, "grad_norm": 8.44096565246582, "learning_rate": 9.254569101238962e-05, "loss": 0.9511, "step": 6196 }, { "epoch": 0.4198793956230097, "grad_norm": 7.8519768714904785, "learning_rate": 9.254432199329181e-05, "loss": 1.0565, "step": 6197 }, { "epoch": 0.4199471508909818, "grad_norm": 6.873769760131836, "learning_rate": 9.254295297419399e-05, "loss": 0.9016, "step": 6198 }, { "epoch": 0.42001490615895387, "grad_norm": 6.1226396560668945, "learning_rate": 9.254158395509617e-05, "loss": 0.8439, "step": 6199 }, { "epoch": 0.42008266142692596, "grad_norm": 6.68569803237915, "learning_rate": 9.254021493599835e-05, "loss": 0.9836, "step": 6200 }, { "epoch": 0.42015041669489805, "grad_norm": 5.907567501068115, "learning_rate": 9.253884591690055e-05, "loss": 0.8047, "step": 6201 }, { "epoch": 0.4202181719628701, "grad_norm": 5.849989891052246, "learning_rate": 9.253747689780273e-05, "loss": 0.8791, "step": 6202 }, { "epoch": 0.4202859272308422, "grad_norm": 5.872654914855957, "learning_rate": 9.253610787870491e-05, "loss": 0.8368, "step": 6203 }, { "epoch": 0.4203536824988143, "grad_norm": 5.799046039581299, "learning_rate": 9.25347388596071e-05, "loss": 0.8215, "step": 6204 }, { "epoch": 0.42042143776678637, "grad_norm": 5.331169605255127, "learning_rate": 9.253336984050928e-05, "loss": 0.9533, "step": 6205 }, { "epoch": 0.42048919303475846, "grad_norm": 6.436175346374512, "learning_rate": 9.253200082141146e-05, "loss": 0.7356, "step": 6206 }, { "epoch": 0.42055694830273055, "grad_norm": 6.290867328643799, "learning_rate": 9.253063180231366e-05, "loss": 0.7771, "step": 6207 }, { "epoch": 0.42062470357070264, "grad_norm": 8.476110458374023, "learning_rate": 9.252926278321584e-05, "loss": 1.0357, "step": 6208 }, { "epoch": 0.42069245883867473, "grad_norm": 8.561487197875977, "learning_rate": 9.252789376411802e-05, "loss": 0.974, "step": 6209 }, { "epoch": 0.42076021410664677, "grad_norm": 7.24920654296875, "learning_rate": 9.25265247450202e-05, "loss": 1.2393, "step": 6210 }, { "epoch": 0.42082796937461886, "grad_norm": 7.135931015014648, "learning_rate": 9.252515572592239e-05, "loss": 1.0491, "step": 6211 }, { "epoch": 0.42089572464259095, "grad_norm": 8.867389678955078, "learning_rate": 9.252378670682457e-05, "loss": 0.836, "step": 6212 }, { "epoch": 0.42096347991056304, "grad_norm": 6.738379955291748, "learning_rate": 9.252241768772675e-05, "loss": 0.7829, "step": 6213 }, { "epoch": 0.42103123517853513, "grad_norm": 7.226996898651123, "learning_rate": 9.252104866862893e-05, "loss": 0.8716, "step": 6214 }, { "epoch": 0.4210989904465072, "grad_norm": 6.236578941345215, "learning_rate": 9.251967964953111e-05, "loss": 0.62, "step": 6215 }, { "epoch": 0.4211667457144793, "grad_norm": 7.801733016967773, "learning_rate": 9.25183106304333e-05, "loss": 1.1107, "step": 6216 }, { "epoch": 0.4212345009824514, "grad_norm": 6.062146186828613, "learning_rate": 9.251694161133549e-05, "loss": 0.8655, "step": 6217 }, { "epoch": 0.42130225625042345, "grad_norm": 5.408603668212891, "learning_rate": 9.251557259223767e-05, "loss": 0.7054, "step": 6218 }, { "epoch": 0.42137001151839554, "grad_norm": 9.531839370727539, "learning_rate": 9.251420357313985e-05, "loss": 0.9609, "step": 6219 }, { "epoch": 0.42143776678636763, "grad_norm": 6.054145336151123, "learning_rate": 9.251283455404204e-05, "loss": 0.9311, "step": 6220 }, { "epoch": 0.4215055220543397, "grad_norm": 6.372133731842041, "learning_rate": 9.251146553494422e-05, "loss": 0.8797, "step": 6221 }, { "epoch": 0.4215732773223118, "grad_norm": 6.394374847412109, "learning_rate": 9.25100965158464e-05, "loss": 0.7603, "step": 6222 }, { "epoch": 0.4216410325902839, "grad_norm": 7.188971519470215, "learning_rate": 9.250872749674858e-05, "loss": 0.5915, "step": 6223 }, { "epoch": 0.421708787858256, "grad_norm": 7.44260311126709, "learning_rate": 9.250735847765076e-05, "loss": 0.8215, "step": 6224 }, { "epoch": 0.4217765431262281, "grad_norm": 7.43394660949707, "learning_rate": 9.250598945855296e-05, "loss": 0.891, "step": 6225 }, { "epoch": 0.4218442983942001, "grad_norm": 6.9161200523376465, "learning_rate": 9.250462043945514e-05, "loss": 0.8795, "step": 6226 }, { "epoch": 0.4219120536621722, "grad_norm": 6.955477714538574, "learning_rate": 9.250325142035732e-05, "loss": 0.9781, "step": 6227 }, { "epoch": 0.4219798089301443, "grad_norm": 7.632521629333496, "learning_rate": 9.25018824012595e-05, "loss": 0.7374, "step": 6228 }, { "epoch": 0.4220475641981164, "grad_norm": 7.344869613647461, "learning_rate": 9.250051338216169e-05, "loss": 0.9506, "step": 6229 }, { "epoch": 0.4221153194660885, "grad_norm": 7.199647426605225, "learning_rate": 9.249914436306387e-05, "loss": 0.9225, "step": 6230 }, { "epoch": 0.4221830747340606, "grad_norm": 5.884180068969727, "learning_rate": 9.249777534396605e-05, "loss": 0.729, "step": 6231 }, { "epoch": 0.4222508300020327, "grad_norm": 6.82388973236084, "learning_rate": 9.249640632486823e-05, "loss": 1.0978, "step": 6232 }, { "epoch": 0.42231858527000476, "grad_norm": 8.883796691894531, "learning_rate": 9.249503730577041e-05, "loss": 1.1025, "step": 6233 }, { "epoch": 0.4223863405379768, "grad_norm": 6.1692938804626465, "learning_rate": 9.24936682866726e-05, "loss": 0.6734, "step": 6234 }, { "epoch": 0.4224540958059489, "grad_norm": 8.827327728271484, "learning_rate": 9.249229926757479e-05, "loss": 1.1097, "step": 6235 }, { "epoch": 0.422521851073921, "grad_norm": 6.649320602416992, "learning_rate": 9.249093024847697e-05, "loss": 0.9978, "step": 6236 }, { "epoch": 0.4225896063418931, "grad_norm": 8.04487419128418, "learning_rate": 9.248956122937915e-05, "loss": 0.9293, "step": 6237 }, { "epoch": 0.42265736160986517, "grad_norm": 7.031024932861328, "learning_rate": 9.248819221028133e-05, "loss": 0.7474, "step": 6238 }, { "epoch": 0.42272511687783726, "grad_norm": 4.858736038208008, "learning_rate": 9.248682319118352e-05, "loss": 0.6429, "step": 6239 }, { "epoch": 0.42279287214580935, "grad_norm": 6.075960159301758, "learning_rate": 9.24854541720857e-05, "loss": 0.7595, "step": 6240 }, { "epoch": 0.42286062741378144, "grad_norm": 11.653654098510742, "learning_rate": 9.248408515298788e-05, "loss": 0.7962, "step": 6241 }, { "epoch": 0.4229283826817535, "grad_norm": 6.255251884460449, "learning_rate": 9.248271613389006e-05, "loss": 0.9027, "step": 6242 }, { "epoch": 0.42299613794972557, "grad_norm": 6.908040523529053, "learning_rate": 9.248134711479226e-05, "loss": 0.936, "step": 6243 }, { "epoch": 0.42306389321769766, "grad_norm": 7.853124141693115, "learning_rate": 9.247997809569444e-05, "loss": 1.0966, "step": 6244 }, { "epoch": 0.42313164848566975, "grad_norm": 7.661264896392822, "learning_rate": 9.247860907659662e-05, "loss": 0.8407, "step": 6245 }, { "epoch": 0.42319940375364185, "grad_norm": 6.52161169052124, "learning_rate": 9.24772400574988e-05, "loss": 0.862, "step": 6246 }, { "epoch": 0.42326715902161394, "grad_norm": 6.456472873687744, "learning_rate": 9.247587103840099e-05, "loss": 0.713, "step": 6247 }, { "epoch": 0.42333491428958603, "grad_norm": 6.655475616455078, "learning_rate": 9.247450201930317e-05, "loss": 0.7422, "step": 6248 }, { "epoch": 0.4234026695575581, "grad_norm": 6.448397636413574, "learning_rate": 9.247313300020535e-05, "loss": 0.8645, "step": 6249 }, { "epoch": 0.4234704248255302, "grad_norm": 5.964814186096191, "learning_rate": 9.247176398110755e-05, "loss": 0.6173, "step": 6250 }, { "epoch": 0.42353818009350225, "grad_norm": 6.125659465789795, "learning_rate": 9.247039496200973e-05, "loss": 0.6951, "step": 6251 }, { "epoch": 0.42360593536147434, "grad_norm": 6.881291389465332, "learning_rate": 9.246902594291191e-05, "loss": 0.7525, "step": 6252 }, { "epoch": 0.42367369062944643, "grad_norm": 6.647464752197266, "learning_rate": 9.24676569238141e-05, "loss": 0.6757, "step": 6253 }, { "epoch": 0.4237414458974185, "grad_norm": 6.530306816101074, "learning_rate": 9.246628790471628e-05, "loss": 1.1478, "step": 6254 }, { "epoch": 0.4238092011653906, "grad_norm": 7.5514984130859375, "learning_rate": 9.246491888561846e-05, "loss": 0.868, "step": 6255 }, { "epoch": 0.4238769564333627, "grad_norm": 10.405878067016602, "learning_rate": 9.246354986652064e-05, "loss": 1.0607, "step": 6256 }, { "epoch": 0.4239447117013348, "grad_norm": 6.7608418464660645, "learning_rate": 9.246218084742284e-05, "loss": 0.9173, "step": 6257 }, { "epoch": 0.4240124669693069, "grad_norm": 7.583088397979736, "learning_rate": 9.246081182832502e-05, "loss": 0.9091, "step": 6258 }, { "epoch": 0.4240802222372789, "grad_norm": 7.009425163269043, "learning_rate": 9.24594428092272e-05, "loss": 0.9509, "step": 6259 }, { "epoch": 0.424147977505251, "grad_norm": 10.783225059509277, "learning_rate": 9.245807379012938e-05, "loss": 0.9033, "step": 6260 }, { "epoch": 0.4242157327732231, "grad_norm": 6.1244611740112305, "learning_rate": 9.245670477103156e-05, "loss": 0.7285, "step": 6261 }, { "epoch": 0.4242834880411952, "grad_norm": 6.983814716339111, "learning_rate": 9.245533575193375e-05, "loss": 0.9463, "step": 6262 }, { "epoch": 0.4243512433091673, "grad_norm": 6.019556522369385, "learning_rate": 9.245396673283593e-05, "loss": 0.8072, "step": 6263 }, { "epoch": 0.4244189985771394, "grad_norm": 7.2982001304626465, "learning_rate": 9.245259771373811e-05, "loss": 0.8905, "step": 6264 }, { "epoch": 0.4244867538451115, "grad_norm": 7.231256008148193, "learning_rate": 9.245122869464029e-05, "loss": 0.9114, "step": 6265 }, { "epoch": 0.42455450911308357, "grad_norm": 6.597518444061279, "learning_rate": 9.244985967554249e-05, "loss": 1.0431, "step": 6266 }, { "epoch": 0.4246222643810556, "grad_norm": 8.651244163513184, "learning_rate": 9.244849065644467e-05, "loss": 1.1346, "step": 6267 }, { "epoch": 0.4246900196490277, "grad_norm": 9.052881240844727, "learning_rate": 9.244712163734685e-05, "loss": 0.7779, "step": 6268 }, { "epoch": 0.4247577749169998, "grad_norm": 6.886295318603516, "learning_rate": 9.244575261824903e-05, "loss": 0.9643, "step": 6269 }, { "epoch": 0.4248255301849719, "grad_norm": 6.241147518157959, "learning_rate": 9.244438359915121e-05, "loss": 0.7539, "step": 6270 }, { "epoch": 0.42489328545294397, "grad_norm": 7.910638332366943, "learning_rate": 9.24430145800534e-05, "loss": 0.9967, "step": 6271 }, { "epoch": 0.42496104072091606, "grad_norm": 7.068695545196533, "learning_rate": 9.244164556095558e-05, "loss": 0.9531, "step": 6272 }, { "epoch": 0.42502879598888815, "grad_norm": 7.7406907081604, "learning_rate": 9.244027654185776e-05, "loss": 0.8593, "step": 6273 }, { "epoch": 0.42509655125686024, "grad_norm": 6.548360824584961, "learning_rate": 9.243890752275994e-05, "loss": 0.7806, "step": 6274 }, { "epoch": 0.4251643065248323, "grad_norm": 6.20359468460083, "learning_rate": 9.243753850366214e-05, "loss": 0.846, "step": 6275 }, { "epoch": 0.42523206179280437, "grad_norm": 6.6235270500183105, "learning_rate": 9.243616948456432e-05, "loss": 0.806, "step": 6276 }, { "epoch": 0.42529981706077646, "grad_norm": 7.216398239135742, "learning_rate": 9.24348004654665e-05, "loss": 1.0497, "step": 6277 }, { "epoch": 0.42536757232874856, "grad_norm": 6.946768283843994, "learning_rate": 9.243343144636868e-05, "loss": 0.7862, "step": 6278 }, { "epoch": 0.42543532759672065, "grad_norm": 6.441595554351807, "learning_rate": 9.243206242727086e-05, "loss": 0.8663, "step": 6279 }, { "epoch": 0.42550308286469274, "grad_norm": 8.521356582641602, "learning_rate": 9.243069340817305e-05, "loss": 1.3689, "step": 6280 }, { "epoch": 0.42557083813266483, "grad_norm": 7.311733722686768, "learning_rate": 9.242932438907523e-05, "loss": 0.8788, "step": 6281 }, { "epoch": 0.4256385934006369, "grad_norm": 7.157277584075928, "learning_rate": 9.242795536997741e-05, "loss": 0.8245, "step": 6282 }, { "epoch": 0.42570634866860896, "grad_norm": 5.332032680511475, "learning_rate": 9.242658635087959e-05, "loss": 0.7333, "step": 6283 }, { "epoch": 0.42577410393658105, "grad_norm": 6.090252876281738, "learning_rate": 9.242521733178179e-05, "loss": 0.7084, "step": 6284 }, { "epoch": 0.42584185920455314, "grad_norm": 6.971512317657471, "learning_rate": 9.242384831268397e-05, "loss": 0.7226, "step": 6285 }, { "epoch": 0.42590961447252523, "grad_norm": 5.37277364730835, "learning_rate": 9.242247929358615e-05, "loss": 0.701, "step": 6286 }, { "epoch": 0.4259773697404973, "grad_norm": 6.7667622566223145, "learning_rate": 9.242111027448833e-05, "loss": 0.9389, "step": 6287 }, { "epoch": 0.4260451250084694, "grad_norm": 6.8329596519470215, "learning_rate": 9.241974125539051e-05, "loss": 0.8574, "step": 6288 }, { "epoch": 0.4261128802764415, "grad_norm": 6.124345779418945, "learning_rate": 9.24183722362927e-05, "loss": 0.9497, "step": 6289 }, { "epoch": 0.4261806355444136, "grad_norm": 6.190674304962158, "learning_rate": 9.241700321719488e-05, "loss": 0.8933, "step": 6290 }, { "epoch": 0.42624839081238564, "grad_norm": 6.881906986236572, "learning_rate": 9.241563419809706e-05, "loss": 1.0343, "step": 6291 }, { "epoch": 0.4263161460803577, "grad_norm": 6.8810648918151855, "learning_rate": 9.241426517899924e-05, "loss": 1.1141, "step": 6292 }, { "epoch": 0.4263839013483298, "grad_norm": 7.524305820465088, "learning_rate": 9.241289615990144e-05, "loss": 0.9136, "step": 6293 }, { "epoch": 0.4264516566163019, "grad_norm": 5.732180595397949, "learning_rate": 9.241152714080362e-05, "loss": 0.8808, "step": 6294 }, { "epoch": 0.426519411884274, "grad_norm": 5.485534191131592, "learning_rate": 9.24101581217058e-05, "loss": 0.8755, "step": 6295 }, { "epoch": 0.4265871671522461, "grad_norm": 7.75483512878418, "learning_rate": 9.240878910260799e-05, "loss": 0.6602, "step": 6296 }, { "epoch": 0.4266549224202182, "grad_norm": 5.6334075927734375, "learning_rate": 9.240742008351017e-05, "loss": 0.6075, "step": 6297 }, { "epoch": 0.4267226776881903, "grad_norm": 6.316882133483887, "learning_rate": 9.240605106441235e-05, "loss": 0.8607, "step": 6298 }, { "epoch": 0.42679043295616237, "grad_norm": 7.0021257400512695, "learning_rate": 9.240468204531455e-05, "loss": 0.8648, "step": 6299 }, { "epoch": 0.4268581882241344, "grad_norm": 5.8421125411987305, "learning_rate": 9.240331302621673e-05, "loss": 0.7887, "step": 6300 }, { "epoch": 0.4269259434921065, "grad_norm": 8.877615928649902, "learning_rate": 9.24019440071189e-05, "loss": 1.3777, "step": 6301 }, { "epoch": 0.4269936987600786, "grad_norm": 7.440095901489258, "learning_rate": 9.240057498802109e-05, "loss": 1.0015, "step": 6302 }, { "epoch": 0.4270614540280507, "grad_norm": 6.460360527038574, "learning_rate": 9.239920596892328e-05, "loss": 0.6861, "step": 6303 }, { "epoch": 0.42712920929602277, "grad_norm": 9.731608390808105, "learning_rate": 9.239783694982546e-05, "loss": 0.8601, "step": 6304 }, { "epoch": 0.42719696456399486, "grad_norm": 7.891725540161133, "learning_rate": 9.239646793072764e-05, "loss": 0.7478, "step": 6305 }, { "epoch": 0.42726471983196695, "grad_norm": 6.742074966430664, "learning_rate": 9.239509891162982e-05, "loss": 0.7774, "step": 6306 }, { "epoch": 0.42733247509993905, "grad_norm": 7.515460014343262, "learning_rate": 9.239372989253202e-05, "loss": 0.9524, "step": 6307 }, { "epoch": 0.4274002303679111, "grad_norm": 6.278934478759766, "learning_rate": 9.23923608734342e-05, "loss": 0.9216, "step": 6308 }, { "epoch": 0.4274679856358832, "grad_norm": 6.528146266937256, "learning_rate": 9.239099185433638e-05, "loss": 0.9959, "step": 6309 }, { "epoch": 0.42753574090385527, "grad_norm": 5.953476905822754, "learning_rate": 9.238962283523856e-05, "loss": 1.0062, "step": 6310 }, { "epoch": 0.42760349617182736, "grad_norm": 7.488530158996582, "learning_rate": 9.238825381614074e-05, "loss": 0.8633, "step": 6311 }, { "epoch": 0.42767125143979945, "grad_norm": 6.974276542663574, "learning_rate": 9.238688479704293e-05, "loss": 1.0849, "step": 6312 }, { "epoch": 0.42773900670777154, "grad_norm": 7.560183048248291, "learning_rate": 9.238551577794511e-05, "loss": 0.9238, "step": 6313 }, { "epoch": 0.42780676197574363, "grad_norm": 6.859951019287109, "learning_rate": 9.238414675884729e-05, "loss": 0.7021, "step": 6314 }, { "epoch": 0.4278745172437157, "grad_norm": 8.095294952392578, "learning_rate": 9.238277773974947e-05, "loss": 0.9374, "step": 6315 }, { "epoch": 0.42794227251168776, "grad_norm": 6.5458221435546875, "learning_rate": 9.238140872065165e-05, "loss": 0.7089, "step": 6316 }, { "epoch": 0.42801002777965985, "grad_norm": 6.9012274742126465, "learning_rate": 9.238003970155385e-05, "loss": 1.0967, "step": 6317 }, { "epoch": 0.42807778304763194, "grad_norm": 7.991293430328369, "learning_rate": 9.237867068245603e-05, "loss": 0.8149, "step": 6318 }, { "epoch": 0.42814553831560404, "grad_norm": 6.636972904205322, "learning_rate": 9.23773016633582e-05, "loss": 0.9311, "step": 6319 }, { "epoch": 0.4282132935835761, "grad_norm": 7.938980579376221, "learning_rate": 9.237593264426039e-05, "loss": 1.021, "step": 6320 }, { "epoch": 0.4282810488515482, "grad_norm": 6.600100994110107, "learning_rate": 9.237456362516258e-05, "loss": 0.9375, "step": 6321 }, { "epoch": 0.4283488041195203, "grad_norm": 8.848981857299805, "learning_rate": 9.237319460606476e-05, "loss": 0.7755, "step": 6322 }, { "epoch": 0.4284165593874924, "grad_norm": 8.488492012023926, "learning_rate": 9.237182558696694e-05, "loss": 0.8267, "step": 6323 }, { "epoch": 0.42848431465546444, "grad_norm": 5.880466938018799, "learning_rate": 9.237045656786912e-05, "loss": 0.7597, "step": 6324 }, { "epoch": 0.42855206992343653, "grad_norm": 6.318797588348389, "learning_rate": 9.23690875487713e-05, "loss": 1.0001, "step": 6325 }, { "epoch": 0.4286198251914086, "grad_norm": 6.730221271514893, "learning_rate": 9.23677185296735e-05, "loss": 0.9811, "step": 6326 }, { "epoch": 0.4286875804593807, "grad_norm": 6.913969993591309, "learning_rate": 9.236634951057568e-05, "loss": 0.8782, "step": 6327 }, { "epoch": 0.4287553357273528, "grad_norm": 7.533535480499268, "learning_rate": 9.236498049147786e-05, "loss": 0.652, "step": 6328 }, { "epoch": 0.4288230909953249, "grad_norm": 7.687053680419922, "learning_rate": 9.236361147238004e-05, "loss": 0.9665, "step": 6329 }, { "epoch": 0.428890846263297, "grad_norm": 7.747707843780518, "learning_rate": 9.236224245328223e-05, "loss": 1.1481, "step": 6330 }, { "epoch": 0.4289586015312691, "grad_norm": 7.709572792053223, "learning_rate": 9.236087343418441e-05, "loss": 0.9191, "step": 6331 }, { "epoch": 0.4290263567992411, "grad_norm": 8.473289489746094, "learning_rate": 9.235950441508659e-05, "loss": 1.0646, "step": 6332 }, { "epoch": 0.4290941120672132, "grad_norm": 6.672016620635986, "learning_rate": 9.235813539598877e-05, "loss": 0.7865, "step": 6333 }, { "epoch": 0.4291618673351853, "grad_norm": 6.022176265716553, "learning_rate": 9.235676637689095e-05, "loss": 0.8593, "step": 6334 }, { "epoch": 0.4292296226031574, "grad_norm": 6.375059604644775, "learning_rate": 9.235539735779315e-05, "loss": 0.8754, "step": 6335 }, { "epoch": 0.4292973778711295, "grad_norm": 6.617783546447754, "learning_rate": 9.235402833869533e-05, "loss": 1.022, "step": 6336 }, { "epoch": 0.4293651331391016, "grad_norm": 5.790512561798096, "learning_rate": 9.235265931959751e-05, "loss": 0.8903, "step": 6337 }, { "epoch": 0.42943288840707367, "grad_norm": 6.528816223144531, "learning_rate": 9.235129030049969e-05, "loss": 0.8999, "step": 6338 }, { "epoch": 0.42950064367504576, "grad_norm": 7.407886505126953, "learning_rate": 9.234992128140188e-05, "loss": 0.6748, "step": 6339 }, { "epoch": 0.4295683989430178, "grad_norm": 6.933940887451172, "learning_rate": 9.234855226230406e-05, "loss": 1.0334, "step": 6340 }, { "epoch": 0.4296361542109899, "grad_norm": 6.4698686599731445, "learning_rate": 9.234718324320624e-05, "loss": 0.8778, "step": 6341 }, { "epoch": 0.429703909478962, "grad_norm": 7.047656059265137, "learning_rate": 9.234581422410844e-05, "loss": 0.9406, "step": 6342 }, { "epoch": 0.42977166474693407, "grad_norm": 6.107243061065674, "learning_rate": 9.234444520501062e-05, "loss": 0.716, "step": 6343 }, { "epoch": 0.42983942001490616, "grad_norm": 7.644023895263672, "learning_rate": 9.23430761859128e-05, "loss": 0.9632, "step": 6344 }, { "epoch": 0.42990717528287825, "grad_norm": 8.450486183166504, "learning_rate": 9.234170716681499e-05, "loss": 0.9608, "step": 6345 }, { "epoch": 0.42997493055085034, "grad_norm": 6.914335250854492, "learning_rate": 9.234033814771717e-05, "loss": 0.9473, "step": 6346 }, { "epoch": 0.43004268581882243, "grad_norm": 7.677610874176025, "learning_rate": 9.233896912861935e-05, "loss": 0.7403, "step": 6347 }, { "epoch": 0.4301104410867945, "grad_norm": 8.03879165649414, "learning_rate": 9.233760010952153e-05, "loss": 1.1334, "step": 6348 }, { "epoch": 0.43017819635476656, "grad_norm": 8.243664741516113, "learning_rate": 9.233623109042373e-05, "loss": 0.8745, "step": 6349 }, { "epoch": 0.43024595162273865, "grad_norm": 5.417998313903809, "learning_rate": 9.23348620713259e-05, "loss": 0.8243, "step": 6350 }, { "epoch": 0.43031370689071075, "grad_norm": 9.148211479187012, "learning_rate": 9.233349305222809e-05, "loss": 1.0485, "step": 6351 }, { "epoch": 0.43038146215868284, "grad_norm": 5.762056827545166, "learning_rate": 9.233212403313027e-05, "loss": 0.8528, "step": 6352 }, { "epoch": 0.43044921742665493, "grad_norm": 7.960264682769775, "learning_rate": 9.233075501403246e-05, "loss": 1.1308, "step": 6353 }, { "epoch": 0.430516972694627, "grad_norm": 5.9596710205078125, "learning_rate": 9.232938599493464e-05, "loss": 0.8385, "step": 6354 }, { "epoch": 0.4305847279625991, "grad_norm": 7.483585357666016, "learning_rate": 9.232801697583682e-05, "loss": 0.8117, "step": 6355 }, { "epoch": 0.4306524832305712, "grad_norm": 11.998135566711426, "learning_rate": 9.2326647956739e-05, "loss": 0.8077, "step": 6356 }, { "epoch": 0.43072023849854324, "grad_norm": 5.418194770812988, "learning_rate": 9.232527893764118e-05, "loss": 0.6575, "step": 6357 }, { "epoch": 0.43078799376651533, "grad_norm": 7.568809509277344, "learning_rate": 9.232390991854338e-05, "loss": 0.9036, "step": 6358 }, { "epoch": 0.4308557490344874, "grad_norm": 6.613016128540039, "learning_rate": 9.232254089944556e-05, "loss": 0.9918, "step": 6359 }, { "epoch": 0.4309235043024595, "grad_norm": 7.7656683921813965, "learning_rate": 9.232117188034774e-05, "loss": 1.1026, "step": 6360 }, { "epoch": 0.4309912595704316, "grad_norm": 6.714265823364258, "learning_rate": 9.231980286124992e-05, "loss": 0.8636, "step": 6361 }, { "epoch": 0.4310590148384037, "grad_norm": 6.468122482299805, "learning_rate": 9.231843384215211e-05, "loss": 0.7608, "step": 6362 }, { "epoch": 0.4311267701063758, "grad_norm": 7.882842540740967, "learning_rate": 9.231706482305429e-05, "loss": 0.8926, "step": 6363 }, { "epoch": 0.4311945253743479, "grad_norm": 6.002429485321045, "learning_rate": 9.231569580395647e-05, "loss": 0.8137, "step": 6364 }, { "epoch": 0.4312622806423199, "grad_norm": 6.818765163421631, "learning_rate": 9.231432678485865e-05, "loss": 0.8249, "step": 6365 }, { "epoch": 0.431330035910292, "grad_norm": 8.535319328308105, "learning_rate": 9.231295776576083e-05, "loss": 0.888, "step": 6366 }, { "epoch": 0.4313977911782641, "grad_norm": 7.823378086090088, "learning_rate": 9.231158874666303e-05, "loss": 0.8847, "step": 6367 }, { "epoch": 0.4314655464462362, "grad_norm": 7.392731189727783, "learning_rate": 9.23102197275652e-05, "loss": 0.7595, "step": 6368 }, { "epoch": 0.4315333017142083, "grad_norm": 5.637443542480469, "learning_rate": 9.230885070846739e-05, "loss": 0.6636, "step": 6369 }, { "epoch": 0.4316010569821804, "grad_norm": 8.26915454864502, "learning_rate": 9.230748168936957e-05, "loss": 0.7704, "step": 6370 }, { "epoch": 0.43166881225015247, "grad_norm": 8.045785903930664, "learning_rate": 9.230611267027175e-05, "loss": 0.8427, "step": 6371 }, { "epoch": 0.43173656751812456, "grad_norm": 8.981038093566895, "learning_rate": 9.230474365117394e-05, "loss": 0.9997, "step": 6372 }, { "epoch": 0.4318043227860966, "grad_norm": 7.137066841125488, "learning_rate": 9.230337463207612e-05, "loss": 0.7654, "step": 6373 }, { "epoch": 0.4318720780540687, "grad_norm": 6.7576165199279785, "learning_rate": 9.23020056129783e-05, "loss": 0.8766, "step": 6374 }, { "epoch": 0.4319398333220408, "grad_norm": 6.381602764129639, "learning_rate": 9.230063659388048e-05, "loss": 0.9203, "step": 6375 }, { "epoch": 0.43200758859001287, "grad_norm": 6.969717502593994, "learning_rate": 9.229926757478268e-05, "loss": 0.9507, "step": 6376 }, { "epoch": 0.43207534385798496, "grad_norm": 5.76108455657959, "learning_rate": 9.229789855568486e-05, "loss": 0.8533, "step": 6377 }, { "epoch": 0.43214309912595705, "grad_norm": 6.446774005889893, "learning_rate": 9.229652953658704e-05, "loss": 0.9094, "step": 6378 }, { "epoch": 0.43221085439392914, "grad_norm": 9.543696403503418, "learning_rate": 9.229516051748922e-05, "loss": 0.8955, "step": 6379 }, { "epoch": 0.43227860966190124, "grad_norm": 8.095965385437012, "learning_rate": 9.22937914983914e-05, "loss": 0.633, "step": 6380 }, { "epoch": 0.4323463649298733, "grad_norm": 9.561396598815918, "learning_rate": 9.229242247929359e-05, "loss": 0.9367, "step": 6381 }, { "epoch": 0.43241412019784536, "grad_norm": 7.348874092102051, "learning_rate": 9.229105346019577e-05, "loss": 0.7585, "step": 6382 }, { "epoch": 0.43248187546581746, "grad_norm": 5.866179466247559, "learning_rate": 9.228968444109795e-05, "loss": 0.8046, "step": 6383 }, { "epoch": 0.43254963073378955, "grad_norm": 7.945270538330078, "learning_rate": 9.228831542200013e-05, "loss": 0.8599, "step": 6384 }, { "epoch": 0.43261738600176164, "grad_norm": 6.365564346313477, "learning_rate": 9.228694640290233e-05, "loss": 0.8213, "step": 6385 }, { "epoch": 0.43268514126973373, "grad_norm": 7.802513599395752, "learning_rate": 9.22855773838045e-05, "loss": 0.7779, "step": 6386 }, { "epoch": 0.4327528965377058, "grad_norm": 7.740975379943848, "learning_rate": 9.228420836470669e-05, "loss": 0.9564, "step": 6387 }, { "epoch": 0.4328206518056779, "grad_norm": 8.962937355041504, "learning_rate": 9.228283934560887e-05, "loss": 0.7704, "step": 6388 }, { "epoch": 0.43288840707364995, "grad_norm": 9.266582489013672, "learning_rate": 9.228147032651106e-05, "loss": 1.055, "step": 6389 }, { "epoch": 0.43295616234162204, "grad_norm": 8.01395320892334, "learning_rate": 9.228010130741324e-05, "loss": 1.0197, "step": 6390 }, { "epoch": 0.43302391760959413, "grad_norm": 6.83071231842041, "learning_rate": 9.227873228831543e-05, "loss": 0.8489, "step": 6391 }, { "epoch": 0.4330916728775662, "grad_norm": 7.923128604888916, "learning_rate": 9.227736326921762e-05, "loss": 0.8601, "step": 6392 }, { "epoch": 0.4331594281455383, "grad_norm": 6.395363807678223, "learning_rate": 9.22759942501198e-05, "loss": 0.8167, "step": 6393 }, { "epoch": 0.4332271834135104, "grad_norm": 5.6835408210754395, "learning_rate": 9.227462523102198e-05, "loss": 0.895, "step": 6394 }, { "epoch": 0.4332949386814825, "grad_norm": 7.308006286621094, "learning_rate": 9.227325621192417e-05, "loss": 0.985, "step": 6395 }, { "epoch": 0.4333626939494546, "grad_norm": 6.754067897796631, "learning_rate": 9.227188719282635e-05, "loss": 0.9144, "step": 6396 }, { "epoch": 0.4334304492174266, "grad_norm": 7.917202949523926, "learning_rate": 9.227051817372853e-05, "loss": 0.8683, "step": 6397 }, { "epoch": 0.4334982044853987, "grad_norm": 7.502418041229248, "learning_rate": 9.226914915463071e-05, "loss": 1.0849, "step": 6398 }, { "epoch": 0.4335659597533708, "grad_norm": 6.86057186126709, "learning_rate": 9.22677801355329e-05, "loss": 0.9286, "step": 6399 }, { "epoch": 0.4336337150213429, "grad_norm": 8.595507621765137, "learning_rate": 9.226641111643509e-05, "loss": 0.9766, "step": 6400 }, { "epoch": 0.433701470289315, "grad_norm": 6.231963157653809, "learning_rate": 9.226504209733727e-05, "loss": 0.8714, "step": 6401 }, { "epoch": 0.4337692255572871, "grad_norm": 7.1258625984191895, "learning_rate": 9.226367307823945e-05, "loss": 1.1432, "step": 6402 }, { "epoch": 0.4338369808252592, "grad_norm": 6.715908050537109, "learning_rate": 9.226230405914163e-05, "loss": 0.8271, "step": 6403 }, { "epoch": 0.43390473609323127, "grad_norm": 7.551729679107666, "learning_rate": 9.226093504004382e-05, "loss": 0.8919, "step": 6404 }, { "epoch": 0.43397249136120336, "grad_norm": 7.136280059814453, "learning_rate": 9.2259566020946e-05, "loss": 1.0586, "step": 6405 }, { "epoch": 0.4340402466291754, "grad_norm": 5.612763404846191, "learning_rate": 9.225819700184818e-05, "loss": 0.7163, "step": 6406 }, { "epoch": 0.4341080018971475, "grad_norm": 7.445765972137451, "learning_rate": 9.225682798275036e-05, "loss": 1.0488, "step": 6407 }, { "epoch": 0.4341757571651196, "grad_norm": 7.239571571350098, "learning_rate": 9.225545896365255e-05, "loss": 0.9174, "step": 6408 }, { "epoch": 0.43424351243309167, "grad_norm": 7.86649751663208, "learning_rate": 9.225408994455474e-05, "loss": 0.9312, "step": 6409 }, { "epoch": 0.43431126770106376, "grad_norm": 7.049384117126465, "learning_rate": 9.225272092545692e-05, "loss": 0.8739, "step": 6410 }, { "epoch": 0.43437902296903586, "grad_norm": 6.824424743652344, "learning_rate": 9.22513519063591e-05, "loss": 0.9335, "step": 6411 }, { "epoch": 0.43444677823700795, "grad_norm": 6.616923809051514, "learning_rate": 9.224998288726128e-05, "loss": 0.7854, "step": 6412 }, { "epoch": 0.43451453350498004, "grad_norm": 6.989858150482178, "learning_rate": 9.224861386816347e-05, "loss": 0.915, "step": 6413 }, { "epoch": 0.4345822887729521, "grad_norm": 7.5784406661987305, "learning_rate": 9.224724484906565e-05, "loss": 0.9316, "step": 6414 }, { "epoch": 0.43465004404092417, "grad_norm": 5.958365440368652, "learning_rate": 9.224587582996783e-05, "loss": 0.9103, "step": 6415 }, { "epoch": 0.43471779930889626, "grad_norm": 6.811526775360107, "learning_rate": 9.224450681087001e-05, "loss": 0.8778, "step": 6416 }, { "epoch": 0.43478555457686835, "grad_norm": 6.9535722732543945, "learning_rate": 9.22431377917722e-05, "loss": 0.9407, "step": 6417 }, { "epoch": 0.43485330984484044, "grad_norm": 7.412068843841553, "learning_rate": 9.224176877267439e-05, "loss": 0.7526, "step": 6418 }, { "epoch": 0.43492106511281253, "grad_norm": 6.990227699279785, "learning_rate": 9.224039975357657e-05, "loss": 0.8432, "step": 6419 }, { "epoch": 0.4349888203807846, "grad_norm": 7.477065563201904, "learning_rate": 9.223903073447875e-05, "loss": 0.9579, "step": 6420 }, { "epoch": 0.4350565756487567, "grad_norm": 6.9343581199646, "learning_rate": 9.223766171538093e-05, "loss": 0.7967, "step": 6421 }, { "epoch": 0.43512433091672875, "grad_norm": 7.360040664672852, "learning_rate": 9.223629269628312e-05, "loss": 0.807, "step": 6422 }, { "epoch": 0.43519208618470084, "grad_norm": 6.708899974822998, "learning_rate": 9.22349236771853e-05, "loss": 0.723, "step": 6423 }, { "epoch": 0.43525984145267294, "grad_norm": 7.195553302764893, "learning_rate": 9.223355465808748e-05, "loss": 1.0543, "step": 6424 }, { "epoch": 0.435327596720645, "grad_norm": 6.833982467651367, "learning_rate": 9.223218563898966e-05, "loss": 0.7874, "step": 6425 }, { "epoch": 0.4353953519886171, "grad_norm": 7.278003692626953, "learning_rate": 9.223081661989184e-05, "loss": 1.0377, "step": 6426 }, { "epoch": 0.4354631072565892, "grad_norm": 6.5219407081604, "learning_rate": 9.222944760079404e-05, "loss": 0.8975, "step": 6427 }, { "epoch": 0.4355308625245613, "grad_norm": 15.948746681213379, "learning_rate": 9.222807858169622e-05, "loss": 1.1554, "step": 6428 }, { "epoch": 0.4355986177925334, "grad_norm": 8.12380599975586, "learning_rate": 9.22267095625984e-05, "loss": 0.8078, "step": 6429 }, { "epoch": 0.43566637306050543, "grad_norm": 6.6606340408325195, "learning_rate": 9.222534054350058e-05, "loss": 1.065, "step": 6430 }, { "epoch": 0.4357341283284775, "grad_norm": 8.079832077026367, "learning_rate": 9.222397152440277e-05, "loss": 0.9253, "step": 6431 }, { "epoch": 0.4358018835964496, "grad_norm": 6.005470275878906, "learning_rate": 9.222260250530495e-05, "loss": 0.8853, "step": 6432 }, { "epoch": 0.4358696388644217, "grad_norm": 8.552530288696289, "learning_rate": 9.222123348620713e-05, "loss": 0.9477, "step": 6433 }, { "epoch": 0.4359373941323938, "grad_norm": 6.014112949371338, "learning_rate": 9.221986446710931e-05, "loss": 0.9249, "step": 6434 }, { "epoch": 0.4360051494003659, "grad_norm": 8.349777221679688, "learning_rate": 9.22184954480115e-05, "loss": 0.9278, "step": 6435 }, { "epoch": 0.436072904668338, "grad_norm": 7.474494934082031, "learning_rate": 9.221712642891369e-05, "loss": 1.1279, "step": 6436 }, { "epoch": 0.43614065993631007, "grad_norm": 7.27971076965332, "learning_rate": 9.221575740981587e-05, "loss": 0.8476, "step": 6437 }, { "epoch": 0.4362084152042821, "grad_norm": 7.819595813751221, "learning_rate": 9.221438839071806e-05, "loss": 0.828, "step": 6438 }, { "epoch": 0.4362761704722542, "grad_norm": 8.474798202514648, "learning_rate": 9.221301937162024e-05, "loss": 1.0775, "step": 6439 }, { "epoch": 0.4363439257402263, "grad_norm": 7.175133228302002, "learning_rate": 9.221165035252242e-05, "loss": 0.8889, "step": 6440 }, { "epoch": 0.4364116810081984, "grad_norm": 5.881869316101074, "learning_rate": 9.221028133342461e-05, "loss": 0.7435, "step": 6441 }, { "epoch": 0.4364794362761705, "grad_norm": 8.366180419921875, "learning_rate": 9.22089123143268e-05, "loss": 1.1461, "step": 6442 }, { "epoch": 0.43654719154414257, "grad_norm": 7.311695098876953, "learning_rate": 9.220754329522898e-05, "loss": 0.7601, "step": 6443 }, { "epoch": 0.43661494681211466, "grad_norm": 6.886138439178467, "learning_rate": 9.220617427613116e-05, "loss": 0.9432, "step": 6444 }, { "epoch": 0.43668270208008675, "grad_norm": 10.014945983886719, "learning_rate": 9.220480525703335e-05, "loss": 0.8914, "step": 6445 }, { "epoch": 0.4367504573480588, "grad_norm": 6.025598526000977, "learning_rate": 9.220343623793553e-05, "loss": 0.8601, "step": 6446 }, { "epoch": 0.4368182126160309, "grad_norm": 6.072788715362549, "learning_rate": 9.220206721883771e-05, "loss": 0.7345, "step": 6447 }, { "epoch": 0.43688596788400297, "grad_norm": 8.272831916809082, "learning_rate": 9.220069819973989e-05, "loss": 0.7427, "step": 6448 }, { "epoch": 0.43695372315197506, "grad_norm": 6.721330642700195, "learning_rate": 9.219932918064207e-05, "loss": 0.8294, "step": 6449 }, { "epoch": 0.43702147841994715, "grad_norm": 7.108659267425537, "learning_rate": 9.219796016154426e-05, "loss": 0.7635, "step": 6450 }, { "epoch": 0.43708923368791924, "grad_norm": 11.399370193481445, "learning_rate": 9.219659114244645e-05, "loss": 1.0645, "step": 6451 }, { "epoch": 0.43715698895589133, "grad_norm": 7.976221561431885, "learning_rate": 9.219522212334863e-05, "loss": 0.9437, "step": 6452 }, { "epoch": 0.4372247442238634, "grad_norm": 8.320938110351562, "learning_rate": 9.21938531042508e-05, "loss": 0.8029, "step": 6453 }, { "epoch": 0.4372924994918355, "grad_norm": 6.146340370178223, "learning_rate": 9.2192484085153e-05, "loss": 0.7872, "step": 6454 }, { "epoch": 0.43736025475980755, "grad_norm": 6.408365726470947, "learning_rate": 9.219111506605518e-05, "loss": 0.5922, "step": 6455 }, { "epoch": 0.43742801002777965, "grad_norm": 7.011279106140137, "learning_rate": 9.218974604695736e-05, "loss": 0.9152, "step": 6456 }, { "epoch": 0.43749576529575174, "grad_norm": 6.819314002990723, "learning_rate": 9.218837702785954e-05, "loss": 1.002, "step": 6457 }, { "epoch": 0.43756352056372383, "grad_norm": 10.63784122467041, "learning_rate": 9.218700800876172e-05, "loss": 1.0873, "step": 6458 }, { "epoch": 0.4376312758316959, "grad_norm": 6.964840888977051, "learning_rate": 9.218563898966391e-05, "loss": 0.6972, "step": 6459 }, { "epoch": 0.437699031099668, "grad_norm": 8.421072959899902, "learning_rate": 9.21842699705661e-05, "loss": 1.0543, "step": 6460 }, { "epoch": 0.4377667863676401, "grad_norm": 5.883727073669434, "learning_rate": 9.218290095146828e-05, "loss": 0.8138, "step": 6461 }, { "epoch": 0.4378345416356122, "grad_norm": 8.054207801818848, "learning_rate": 9.218153193237046e-05, "loss": 0.8885, "step": 6462 }, { "epoch": 0.43790229690358423, "grad_norm": 7.546504974365234, "learning_rate": 9.218016291327265e-05, "loss": 1.2157, "step": 6463 }, { "epoch": 0.4379700521715563, "grad_norm": 10.255965232849121, "learning_rate": 9.217879389417483e-05, "loss": 1.0082, "step": 6464 }, { "epoch": 0.4380378074395284, "grad_norm": 6.057038307189941, "learning_rate": 9.217742487507701e-05, "loss": 0.8386, "step": 6465 }, { "epoch": 0.4381055627075005, "grad_norm": 8.343341827392578, "learning_rate": 9.217605585597919e-05, "loss": 0.7051, "step": 6466 }, { "epoch": 0.4381733179754726, "grad_norm": 8.163117408752441, "learning_rate": 9.217468683688137e-05, "loss": 1.1442, "step": 6467 }, { "epoch": 0.4382410732434447, "grad_norm": 6.132575035095215, "learning_rate": 9.217331781778357e-05, "loss": 0.7034, "step": 6468 }, { "epoch": 0.4383088285114168, "grad_norm": 6.2581658363342285, "learning_rate": 9.217194879868575e-05, "loss": 0.919, "step": 6469 }, { "epoch": 0.4383765837793889, "grad_norm": 6.228326797485352, "learning_rate": 9.217057977958793e-05, "loss": 0.7785, "step": 6470 }, { "epoch": 0.4384443390473609, "grad_norm": 5.782427787780762, "learning_rate": 9.21692107604901e-05, "loss": 0.9185, "step": 6471 }, { "epoch": 0.438512094315333, "grad_norm": 5.726394176483154, "learning_rate": 9.21678417413923e-05, "loss": 0.593, "step": 6472 }, { "epoch": 0.4385798495833051, "grad_norm": 7.62056303024292, "learning_rate": 9.216647272229448e-05, "loss": 0.8947, "step": 6473 }, { "epoch": 0.4386476048512772, "grad_norm": 7.467504024505615, "learning_rate": 9.216510370319666e-05, "loss": 0.8738, "step": 6474 }, { "epoch": 0.4387153601192493, "grad_norm": 6.0644989013671875, "learning_rate": 9.216373468409884e-05, "loss": 0.7715, "step": 6475 }, { "epoch": 0.43878311538722137, "grad_norm": 7.380848407745361, "learning_rate": 9.216236566500102e-05, "loss": 0.8055, "step": 6476 }, { "epoch": 0.43885087065519346, "grad_norm": 7.162757873535156, "learning_rate": 9.216099664590322e-05, "loss": 0.8826, "step": 6477 }, { "epoch": 0.43891862592316555, "grad_norm": 8.315613746643066, "learning_rate": 9.21596276268054e-05, "loss": 1.1239, "step": 6478 }, { "epoch": 0.4389863811911376, "grad_norm": 6.754839897155762, "learning_rate": 9.215825860770758e-05, "loss": 0.8331, "step": 6479 }, { "epoch": 0.4390541364591097, "grad_norm": 5.4843831062316895, "learning_rate": 9.215688958860976e-05, "loss": 0.8134, "step": 6480 }, { "epoch": 0.43912189172708177, "grad_norm": 7.7780656814575195, "learning_rate": 9.215552056951195e-05, "loss": 1.0216, "step": 6481 }, { "epoch": 0.43918964699505386, "grad_norm": 7.779257297515869, "learning_rate": 9.215415155041413e-05, "loss": 0.91, "step": 6482 }, { "epoch": 0.43925740226302595, "grad_norm": 7.926174640655518, "learning_rate": 9.215278253131631e-05, "loss": 0.9635, "step": 6483 }, { "epoch": 0.43932515753099805, "grad_norm": 5.595751762390137, "learning_rate": 9.21514135122185e-05, "loss": 0.9092, "step": 6484 }, { "epoch": 0.43939291279897014, "grad_norm": 8.04053020477295, "learning_rate": 9.215004449312069e-05, "loss": 0.6872, "step": 6485 }, { "epoch": 0.43946066806694223, "grad_norm": 12.462115287780762, "learning_rate": 9.214867547402287e-05, "loss": 1.2741, "step": 6486 }, { "epoch": 0.43952842333491426, "grad_norm": 6.239933490753174, "learning_rate": 9.214730645492506e-05, "loss": 0.7711, "step": 6487 }, { "epoch": 0.43959617860288636, "grad_norm": 7.584579944610596, "learning_rate": 9.214593743582724e-05, "loss": 0.8666, "step": 6488 }, { "epoch": 0.43966393387085845, "grad_norm": 7.129024505615234, "learning_rate": 9.214456841672942e-05, "loss": 0.8664, "step": 6489 }, { "epoch": 0.43973168913883054, "grad_norm": 7.35465145111084, "learning_rate": 9.21431993976316e-05, "loss": 1.2072, "step": 6490 }, { "epoch": 0.43979944440680263, "grad_norm": 7.911463737487793, "learning_rate": 9.21418303785338e-05, "loss": 0.8019, "step": 6491 }, { "epoch": 0.4398671996747747, "grad_norm": 7.9478349685668945, "learning_rate": 9.214046135943597e-05, "loss": 0.8945, "step": 6492 }, { "epoch": 0.4399349549427468, "grad_norm": 7.73642635345459, "learning_rate": 9.213909234033815e-05, "loss": 1.2203, "step": 6493 }, { "epoch": 0.4400027102107189, "grad_norm": 8.306556701660156, "learning_rate": 9.213772332124034e-05, "loss": 0.8548, "step": 6494 }, { "epoch": 0.44007046547869094, "grad_norm": 7.496469497680664, "learning_rate": 9.213635430214253e-05, "loss": 1.1485, "step": 6495 }, { "epoch": 0.44013822074666303, "grad_norm": 6.734534740447998, "learning_rate": 9.213498528304471e-05, "loss": 0.8758, "step": 6496 }, { "epoch": 0.4402059760146351, "grad_norm": 6.782371997833252, "learning_rate": 9.213361626394689e-05, "loss": 0.5879, "step": 6497 }, { "epoch": 0.4402737312826072, "grad_norm": 6.044846057891846, "learning_rate": 9.213224724484907e-05, "loss": 1.0055, "step": 6498 }, { "epoch": 0.4403414865505793, "grad_norm": 7.979533672332764, "learning_rate": 9.213087822575125e-05, "loss": 0.8265, "step": 6499 }, { "epoch": 0.4404092418185514, "grad_norm": 8.245573997497559, "learning_rate": 9.212950920665344e-05, "loss": 0.5549, "step": 6500 }, { "epoch": 0.4404769970865235, "grad_norm": 6.346557140350342, "learning_rate": 9.212814018755562e-05, "loss": 0.857, "step": 6501 }, { "epoch": 0.4405447523544956, "grad_norm": 6.079600811004639, "learning_rate": 9.21267711684578e-05, "loss": 0.9127, "step": 6502 }, { "epoch": 0.4406125076224676, "grad_norm": 9.402070045471191, "learning_rate": 9.212540214935999e-05, "loss": 0.8104, "step": 6503 }, { "epoch": 0.4406802628904397, "grad_norm": 8.062244415283203, "learning_rate": 9.212403313026217e-05, "loss": 1.195, "step": 6504 }, { "epoch": 0.4407480181584118, "grad_norm": 8.54050064086914, "learning_rate": 9.212266411116436e-05, "loss": 0.854, "step": 6505 }, { "epoch": 0.4408157734263839, "grad_norm": 6.040203094482422, "learning_rate": 9.212129509206654e-05, "loss": 0.7028, "step": 6506 }, { "epoch": 0.440883528694356, "grad_norm": 7.689701080322266, "learning_rate": 9.211992607296872e-05, "loss": 0.9999, "step": 6507 }, { "epoch": 0.4409512839623281, "grad_norm": 7.860703468322754, "learning_rate": 9.21185570538709e-05, "loss": 1.0413, "step": 6508 }, { "epoch": 0.44101903923030017, "grad_norm": 7.390681266784668, "learning_rate": 9.21171880347731e-05, "loss": 1.0337, "step": 6509 }, { "epoch": 0.44108679449827226, "grad_norm": 6.800081253051758, "learning_rate": 9.211581901567527e-05, "loss": 0.9613, "step": 6510 }, { "epoch": 0.44115454976624435, "grad_norm": 5.59462833404541, "learning_rate": 9.211444999657746e-05, "loss": 0.7001, "step": 6511 }, { "epoch": 0.4412223050342164, "grad_norm": 8.300586700439453, "learning_rate": 9.211308097747964e-05, "loss": 1.0087, "step": 6512 }, { "epoch": 0.4412900603021885, "grad_norm": 6.590997695922852, "learning_rate": 9.211171195838182e-05, "loss": 0.8233, "step": 6513 }, { "epoch": 0.44135781557016057, "grad_norm": 8.995779991149902, "learning_rate": 9.211034293928401e-05, "loss": 0.9385, "step": 6514 }, { "epoch": 0.44142557083813266, "grad_norm": 6.689282417297363, "learning_rate": 9.210897392018619e-05, "loss": 0.9455, "step": 6515 }, { "epoch": 0.44149332610610476, "grad_norm": 6.494836330413818, "learning_rate": 9.210760490108837e-05, "loss": 0.9009, "step": 6516 }, { "epoch": 0.44156108137407685, "grad_norm": 7.502823352813721, "learning_rate": 9.210623588199055e-05, "loss": 0.7972, "step": 6517 }, { "epoch": 0.44162883664204894, "grad_norm": 6.561639785766602, "learning_rate": 9.210486686289274e-05, "loss": 0.7988, "step": 6518 }, { "epoch": 0.44169659191002103, "grad_norm": 6.1523847579956055, "learning_rate": 9.210349784379493e-05, "loss": 0.9561, "step": 6519 }, { "epoch": 0.44176434717799307, "grad_norm": 6.792056560516357, "learning_rate": 9.21021288246971e-05, "loss": 0.7623, "step": 6520 }, { "epoch": 0.44183210244596516, "grad_norm": 6.477465629577637, "learning_rate": 9.210075980559929e-05, "loss": 0.6875, "step": 6521 }, { "epoch": 0.44189985771393725, "grad_norm": 7.468865394592285, "learning_rate": 9.209939078650147e-05, "loss": 0.8727, "step": 6522 }, { "epoch": 0.44196761298190934, "grad_norm": 5.351073265075684, "learning_rate": 9.209802176740366e-05, "loss": 0.8114, "step": 6523 }, { "epoch": 0.44203536824988143, "grad_norm": 7.701816558837891, "learning_rate": 9.209665274830584e-05, "loss": 1.1438, "step": 6524 }, { "epoch": 0.4421031235178535, "grad_norm": 6.072995662689209, "learning_rate": 9.209528372920802e-05, "loss": 0.9237, "step": 6525 }, { "epoch": 0.4421708787858256, "grad_norm": 7.1209716796875, "learning_rate": 9.20939147101102e-05, "loss": 0.8445, "step": 6526 }, { "epoch": 0.4422386340537977, "grad_norm": 7.518238067626953, "learning_rate": 9.20925456910124e-05, "loss": 1.0969, "step": 6527 }, { "epoch": 0.44230638932176974, "grad_norm": 5.822054862976074, "learning_rate": 9.209117667191458e-05, "loss": 0.843, "step": 6528 }, { "epoch": 0.44237414458974184, "grad_norm": 6.8769612312316895, "learning_rate": 9.208980765281676e-05, "loss": 0.9661, "step": 6529 }, { "epoch": 0.4424418998577139, "grad_norm": 6.266312122344971, "learning_rate": 9.208843863371895e-05, "loss": 0.7924, "step": 6530 }, { "epoch": 0.442509655125686, "grad_norm": 6.57719612121582, "learning_rate": 9.208706961462113e-05, "loss": 0.7929, "step": 6531 }, { "epoch": 0.4425774103936581, "grad_norm": 6.038028240203857, "learning_rate": 9.208570059552331e-05, "loss": 0.7683, "step": 6532 }, { "epoch": 0.4426451656616302, "grad_norm": 7.042256832122803, "learning_rate": 9.20843315764255e-05, "loss": 0.6646, "step": 6533 }, { "epoch": 0.4427129209296023, "grad_norm": 7.218042850494385, "learning_rate": 9.208296255732768e-05, "loss": 0.9532, "step": 6534 }, { "epoch": 0.4427806761975744, "grad_norm": 8.851286888122559, "learning_rate": 9.208159353822986e-05, "loss": 0.819, "step": 6535 }, { "epoch": 0.4428484314655464, "grad_norm": 5.37313985824585, "learning_rate": 9.208022451913205e-05, "loss": 0.6068, "step": 6536 }, { "epoch": 0.4429161867335185, "grad_norm": 6.643946647644043, "learning_rate": 9.207885550003424e-05, "loss": 1.1088, "step": 6537 }, { "epoch": 0.4429839420014906, "grad_norm": 7.155415058135986, "learning_rate": 9.207748648093642e-05, "loss": 0.9693, "step": 6538 }, { "epoch": 0.4430516972694627, "grad_norm": 7.293460369110107, "learning_rate": 9.20761174618386e-05, "loss": 1.0701, "step": 6539 }, { "epoch": 0.4431194525374348, "grad_norm": 6.664429664611816, "learning_rate": 9.207474844274078e-05, "loss": 0.6425, "step": 6540 }, { "epoch": 0.4431872078054069, "grad_norm": 5.979015350341797, "learning_rate": 9.207337942364297e-05, "loss": 0.6666, "step": 6541 }, { "epoch": 0.44325496307337897, "grad_norm": 5.959255695343018, "learning_rate": 9.207201040454515e-05, "loss": 0.6914, "step": 6542 }, { "epoch": 0.44332271834135106, "grad_norm": 7.694621562957764, "learning_rate": 9.207064138544733e-05, "loss": 0.8844, "step": 6543 }, { "epoch": 0.4433904736093231, "grad_norm": 7.128264904022217, "learning_rate": 9.206927236634951e-05, "loss": 0.6571, "step": 6544 }, { "epoch": 0.4434582288772952, "grad_norm": 7.676843166351318, "learning_rate": 9.20679033472517e-05, "loss": 0.8944, "step": 6545 }, { "epoch": 0.4435259841452673, "grad_norm": 8.858941078186035, "learning_rate": 9.206653432815389e-05, "loss": 0.8057, "step": 6546 }, { "epoch": 0.4435937394132394, "grad_norm": 7.0214033126831055, "learning_rate": 9.206516530905607e-05, "loss": 0.7815, "step": 6547 }, { "epoch": 0.44366149468121147, "grad_norm": 5.209787845611572, "learning_rate": 9.206379628995825e-05, "loss": 0.8764, "step": 6548 }, { "epoch": 0.44372924994918356, "grad_norm": 7.076624870300293, "learning_rate": 9.206242727086043e-05, "loss": 0.8881, "step": 6549 }, { "epoch": 0.44379700521715565, "grad_norm": 7.659173965454102, "learning_rate": 9.206105825176262e-05, "loss": 0.8945, "step": 6550 }, { "epoch": 0.44386476048512774, "grad_norm": 6.2118611335754395, "learning_rate": 9.20596892326648e-05, "loss": 0.8022, "step": 6551 }, { "epoch": 0.4439325157530998, "grad_norm": 6.649642467498779, "learning_rate": 9.205832021356698e-05, "loss": 0.9765, "step": 6552 }, { "epoch": 0.44400027102107187, "grad_norm": 6.0131683349609375, "learning_rate": 9.205695119446917e-05, "loss": 0.6928, "step": 6553 }, { "epoch": 0.44406802628904396, "grad_norm": 6.295266151428223, "learning_rate": 9.205558217537135e-05, "loss": 0.9785, "step": 6554 }, { "epoch": 0.44413578155701605, "grad_norm": 6.524127006530762, "learning_rate": 9.205421315627354e-05, "loss": 0.8065, "step": 6555 }, { "epoch": 0.44420353682498814, "grad_norm": 7.478303909301758, "learning_rate": 9.205284413717572e-05, "loss": 0.9533, "step": 6556 }, { "epoch": 0.44427129209296023, "grad_norm": 7.92426061630249, "learning_rate": 9.20514751180779e-05, "loss": 1.1578, "step": 6557 }, { "epoch": 0.4443390473609323, "grad_norm": 7.532670497894287, "learning_rate": 9.205010609898008e-05, "loss": 0.9318, "step": 6558 }, { "epoch": 0.4444068026289044, "grad_norm": 8.458464622497559, "learning_rate": 9.204873707988226e-05, "loss": 1.087, "step": 6559 }, { "epoch": 0.4444745578968765, "grad_norm": 5.044344425201416, "learning_rate": 9.204736806078445e-05, "loss": 0.7836, "step": 6560 }, { "epoch": 0.44454231316484855, "grad_norm": 6.682579517364502, "learning_rate": 9.204599904168663e-05, "loss": 0.8407, "step": 6561 }, { "epoch": 0.44461006843282064, "grad_norm": 6.948788166046143, "learning_rate": 9.204463002258882e-05, "loss": 0.72, "step": 6562 }, { "epoch": 0.44467782370079273, "grad_norm": 7.333683967590332, "learning_rate": 9.2043261003491e-05, "loss": 0.934, "step": 6563 }, { "epoch": 0.4447455789687648, "grad_norm": 7.648477077484131, "learning_rate": 9.204189198439319e-05, "loss": 0.8992, "step": 6564 }, { "epoch": 0.4448133342367369, "grad_norm": 6.203789710998535, "learning_rate": 9.204052296529537e-05, "loss": 1.011, "step": 6565 }, { "epoch": 0.444881089504709, "grad_norm": 7.739291667938232, "learning_rate": 9.203915394619755e-05, "loss": 0.8695, "step": 6566 }, { "epoch": 0.4449488447726811, "grad_norm": 6.642831325531006, "learning_rate": 9.203778492709973e-05, "loss": 0.666, "step": 6567 }, { "epoch": 0.4450166000406532, "grad_norm": 7.760629653930664, "learning_rate": 9.203641590800191e-05, "loss": 1.1022, "step": 6568 }, { "epoch": 0.4450843553086252, "grad_norm": 6.180607318878174, "learning_rate": 9.20350468889041e-05, "loss": 0.8599, "step": 6569 }, { "epoch": 0.4451521105765973, "grad_norm": 6.284255504608154, "learning_rate": 9.203367786980629e-05, "loss": 0.7226, "step": 6570 }, { "epoch": 0.4452198658445694, "grad_norm": 5.078493595123291, "learning_rate": 9.203230885070847e-05, "loss": 0.8089, "step": 6571 }, { "epoch": 0.4452876211125415, "grad_norm": 6.533141136169434, "learning_rate": 9.203093983161065e-05, "loss": 0.8426, "step": 6572 }, { "epoch": 0.4453553763805136, "grad_norm": 6.589559555053711, "learning_rate": 9.202957081251284e-05, "loss": 1.0924, "step": 6573 }, { "epoch": 0.4454231316484857, "grad_norm": 5.858087539672852, "learning_rate": 9.202820179341502e-05, "loss": 0.8007, "step": 6574 }, { "epoch": 0.4454908869164578, "grad_norm": 6.8898491859436035, "learning_rate": 9.20268327743172e-05, "loss": 0.7622, "step": 6575 }, { "epoch": 0.44555864218442987, "grad_norm": 5.419859409332275, "learning_rate": 9.20254637552194e-05, "loss": 0.711, "step": 6576 }, { "epoch": 0.4456263974524019, "grad_norm": 7.31076192855835, "learning_rate": 9.202409473612157e-05, "loss": 1.0181, "step": 6577 }, { "epoch": 0.445694152720374, "grad_norm": 8.525489807128906, "learning_rate": 9.202272571702375e-05, "loss": 1.0324, "step": 6578 }, { "epoch": 0.4457619079883461, "grad_norm": 6.309749603271484, "learning_rate": 9.202135669792595e-05, "loss": 1.1596, "step": 6579 }, { "epoch": 0.4458296632563182, "grad_norm": 5.667306423187256, "learning_rate": 9.201998767882813e-05, "loss": 0.8091, "step": 6580 }, { "epoch": 0.44589741852429027, "grad_norm": 6.930799961090088, "learning_rate": 9.201861865973031e-05, "loss": 0.915, "step": 6581 }, { "epoch": 0.44596517379226236, "grad_norm": 8.236886024475098, "learning_rate": 9.201724964063249e-05, "loss": 0.8973, "step": 6582 }, { "epoch": 0.44603292906023445, "grad_norm": 8.804180145263672, "learning_rate": 9.201588062153468e-05, "loss": 0.9985, "step": 6583 }, { "epoch": 0.44610068432820654, "grad_norm": 6.966750621795654, "learning_rate": 9.201451160243686e-05, "loss": 0.9065, "step": 6584 }, { "epoch": 0.4461684395961786, "grad_norm": 6.3808698654174805, "learning_rate": 9.201314258333904e-05, "loss": 0.8519, "step": 6585 }, { "epoch": 0.44623619486415067, "grad_norm": 6.380456447601318, "learning_rate": 9.201177356424122e-05, "loss": 0.6365, "step": 6586 }, { "epoch": 0.44630395013212276, "grad_norm": 6.893950939178467, "learning_rate": 9.201040454514342e-05, "loss": 0.8017, "step": 6587 }, { "epoch": 0.44637170540009485, "grad_norm": 6.246983051300049, "learning_rate": 9.20090355260456e-05, "loss": 0.8914, "step": 6588 }, { "epoch": 0.44643946066806695, "grad_norm": 5.012611389160156, "learning_rate": 9.200766650694778e-05, "loss": 0.7421, "step": 6589 }, { "epoch": 0.44650721593603904, "grad_norm": 6.981064319610596, "learning_rate": 9.200629748784996e-05, "loss": 0.9045, "step": 6590 }, { "epoch": 0.44657497120401113, "grad_norm": 6.434818744659424, "learning_rate": 9.200492846875214e-05, "loss": 1.0952, "step": 6591 }, { "epoch": 0.4466427264719832, "grad_norm": 7.2661027908325195, "learning_rate": 9.200355944965433e-05, "loss": 0.8943, "step": 6592 }, { "epoch": 0.44671048173995526, "grad_norm": 6.347513675689697, "learning_rate": 9.200219043055651e-05, "loss": 0.7958, "step": 6593 }, { "epoch": 0.44677823700792735, "grad_norm": 6.98016881942749, "learning_rate": 9.20008214114587e-05, "loss": 0.7197, "step": 6594 }, { "epoch": 0.44684599227589944, "grad_norm": 5.671807765960693, "learning_rate": 9.199945239236087e-05, "loss": 0.7577, "step": 6595 }, { "epoch": 0.44691374754387153, "grad_norm": 8.79588794708252, "learning_rate": 9.199808337326307e-05, "loss": 0.9273, "step": 6596 }, { "epoch": 0.4469815028118436, "grad_norm": 7.276648998260498, "learning_rate": 9.199671435416525e-05, "loss": 0.8705, "step": 6597 }, { "epoch": 0.4470492580798157, "grad_norm": 8.435094833374023, "learning_rate": 9.199534533506743e-05, "loss": 0.9946, "step": 6598 }, { "epoch": 0.4471170133477878, "grad_norm": 6.148091793060303, "learning_rate": 9.199397631596961e-05, "loss": 0.9541, "step": 6599 }, { "epoch": 0.4471847686157599, "grad_norm": 8.867514610290527, "learning_rate": 9.199260729687179e-05, "loss": 1.029, "step": 6600 }, { "epoch": 0.44725252388373193, "grad_norm": 7.480152606964111, "learning_rate": 9.199123827777398e-05, "loss": 0.8819, "step": 6601 }, { "epoch": 0.447320279151704, "grad_norm": 7.422079086303711, "learning_rate": 9.198986925867616e-05, "loss": 0.9998, "step": 6602 }, { "epoch": 0.4473880344196761, "grad_norm": 6.239773750305176, "learning_rate": 9.198850023957834e-05, "loss": 0.9129, "step": 6603 }, { "epoch": 0.4474557896876482, "grad_norm": 6.59074592590332, "learning_rate": 9.198713122048053e-05, "loss": 1.1373, "step": 6604 }, { "epoch": 0.4475235449556203, "grad_norm": 6.096022129058838, "learning_rate": 9.198576220138272e-05, "loss": 0.8916, "step": 6605 }, { "epoch": 0.4475913002235924, "grad_norm": 5.775374889373779, "learning_rate": 9.19843931822849e-05, "loss": 0.8679, "step": 6606 }, { "epoch": 0.4476590554915645, "grad_norm": 6.836724758148193, "learning_rate": 9.198302416318708e-05, "loss": 0.8205, "step": 6607 }, { "epoch": 0.4477268107595366, "grad_norm": 6.876745700836182, "learning_rate": 9.198165514408926e-05, "loss": 0.9564, "step": 6608 }, { "epoch": 0.4477945660275086, "grad_norm": 6.725889205932617, "learning_rate": 9.198028612499144e-05, "loss": 0.7783, "step": 6609 }, { "epoch": 0.4478623212954807, "grad_norm": 6.312922477722168, "learning_rate": 9.197891710589363e-05, "loss": 0.8358, "step": 6610 }, { "epoch": 0.4479300765634528, "grad_norm": 5.7454071044921875, "learning_rate": 9.197754808679581e-05, "loss": 0.7174, "step": 6611 }, { "epoch": 0.4479978318314249, "grad_norm": 9.245071411132812, "learning_rate": 9.1976179067698e-05, "loss": 0.8224, "step": 6612 }, { "epoch": 0.448065587099397, "grad_norm": 7.2646307945251465, "learning_rate": 9.197481004860018e-05, "loss": 0.7895, "step": 6613 }, { "epoch": 0.44813334236736907, "grad_norm": 6.799703598022461, "learning_rate": 9.197344102950236e-05, "loss": 0.9685, "step": 6614 }, { "epoch": 0.44820109763534116, "grad_norm": 6.408104419708252, "learning_rate": 9.197207201040455e-05, "loss": 0.9136, "step": 6615 }, { "epoch": 0.44826885290331325, "grad_norm": 7.4527363777160645, "learning_rate": 9.197070299130673e-05, "loss": 0.9364, "step": 6616 }, { "epoch": 0.44833660817128534, "grad_norm": 6.287598609924316, "learning_rate": 9.196933397220891e-05, "loss": 0.8987, "step": 6617 }, { "epoch": 0.4484043634392574, "grad_norm": 7.270476341247559, "learning_rate": 9.196796495311109e-05, "loss": 1.0113, "step": 6618 }, { "epoch": 0.4484721187072295, "grad_norm": 8.098075866699219, "learning_rate": 9.196659593401328e-05, "loss": 0.7957, "step": 6619 }, { "epoch": 0.44853987397520156, "grad_norm": 6.268966197967529, "learning_rate": 9.196522691491546e-05, "loss": 0.7363, "step": 6620 }, { "epoch": 0.44860762924317366, "grad_norm": 8.81617259979248, "learning_rate": 9.196385789581765e-05, "loss": 1.1112, "step": 6621 }, { "epoch": 0.44867538451114575, "grad_norm": 7.593179225921631, "learning_rate": 9.196248887671984e-05, "loss": 0.7988, "step": 6622 }, { "epoch": 0.44874313977911784, "grad_norm": 7.044666290283203, "learning_rate": 9.196111985762202e-05, "loss": 0.832, "step": 6623 }, { "epoch": 0.44881089504708993, "grad_norm": 5.546746730804443, "learning_rate": 9.19597508385242e-05, "loss": 0.7273, "step": 6624 }, { "epoch": 0.448878650315062, "grad_norm": 7.123478412628174, "learning_rate": 9.19583818194264e-05, "loss": 0.9006, "step": 6625 }, { "epoch": 0.44894640558303406, "grad_norm": 9.073535919189453, "learning_rate": 9.195701280032857e-05, "loss": 0.8702, "step": 6626 }, { "epoch": 0.44901416085100615, "grad_norm": 7.320106506347656, "learning_rate": 9.195564378123075e-05, "loss": 0.7704, "step": 6627 }, { "epoch": 0.44908191611897824, "grad_norm": 7.6163177490234375, "learning_rate": 9.195427476213295e-05, "loss": 1.1432, "step": 6628 }, { "epoch": 0.44914967138695033, "grad_norm": 6.271851062774658, "learning_rate": 9.195290574303513e-05, "loss": 0.9378, "step": 6629 }, { "epoch": 0.4492174266549224, "grad_norm": 6.142657279968262, "learning_rate": 9.195153672393731e-05, "loss": 0.9356, "step": 6630 }, { "epoch": 0.4492851819228945, "grad_norm": 6.669857025146484, "learning_rate": 9.195016770483949e-05, "loss": 0.5939, "step": 6631 }, { "epoch": 0.4493529371908666, "grad_norm": 7.524440288543701, "learning_rate": 9.194879868574167e-05, "loss": 0.7756, "step": 6632 }, { "epoch": 0.4494206924588387, "grad_norm": 7.47619104385376, "learning_rate": 9.194742966664386e-05, "loss": 0.7881, "step": 6633 }, { "epoch": 0.44948844772681074, "grad_norm": 5.466754913330078, "learning_rate": 9.194606064754604e-05, "loss": 0.7653, "step": 6634 }, { "epoch": 0.4495562029947828, "grad_norm": 6.149755954742432, "learning_rate": 9.194469162844822e-05, "loss": 0.8208, "step": 6635 }, { "epoch": 0.4496239582627549, "grad_norm": 6.091528415679932, "learning_rate": 9.19433226093504e-05, "loss": 1.0214, "step": 6636 }, { "epoch": 0.449691713530727, "grad_norm": 5.599562168121338, "learning_rate": 9.194195359025258e-05, "loss": 1.0719, "step": 6637 }, { "epoch": 0.4497594687986991, "grad_norm": 7.242834091186523, "learning_rate": 9.194058457115478e-05, "loss": 1.0185, "step": 6638 }, { "epoch": 0.4498272240666712, "grad_norm": 6.523613452911377, "learning_rate": 9.193921555205696e-05, "loss": 0.9699, "step": 6639 }, { "epoch": 0.4498949793346433, "grad_norm": 6.815830230712891, "learning_rate": 9.193784653295914e-05, "loss": 0.814, "step": 6640 }, { "epoch": 0.4499627346026154, "grad_norm": 5.4793524742126465, "learning_rate": 9.193647751386132e-05, "loss": 0.724, "step": 6641 }, { "epoch": 0.4500304898705874, "grad_norm": 7.303586959838867, "learning_rate": 9.193510849476351e-05, "loss": 0.9991, "step": 6642 }, { "epoch": 0.4500982451385595, "grad_norm": 7.781108856201172, "learning_rate": 9.19337394756657e-05, "loss": 0.925, "step": 6643 }, { "epoch": 0.4501660004065316, "grad_norm": 5.924014568328857, "learning_rate": 9.193237045656787e-05, "loss": 0.8656, "step": 6644 }, { "epoch": 0.4502337556745037, "grad_norm": 8.77978801727295, "learning_rate": 9.193100143747005e-05, "loss": 0.6839, "step": 6645 }, { "epoch": 0.4503015109424758, "grad_norm": 6.725009441375732, "learning_rate": 9.192963241837223e-05, "loss": 1.09, "step": 6646 }, { "epoch": 0.45036926621044787, "grad_norm": 5.697434902191162, "learning_rate": 9.192826339927443e-05, "loss": 0.6516, "step": 6647 }, { "epoch": 0.45043702147841996, "grad_norm": 7.783257007598877, "learning_rate": 9.192689438017661e-05, "loss": 1.1097, "step": 6648 }, { "epoch": 0.45050477674639205, "grad_norm": 7.0213541984558105, "learning_rate": 9.192552536107879e-05, "loss": 1.0617, "step": 6649 }, { "epoch": 0.4505725320143641, "grad_norm": 9.152633666992188, "learning_rate": 9.192415634198097e-05, "loss": 0.9626, "step": 6650 }, { "epoch": 0.4506402872823362, "grad_norm": 8.690450668334961, "learning_rate": 9.192278732288316e-05, "loss": 0.7741, "step": 6651 }, { "epoch": 0.4507080425503083, "grad_norm": 6.784976482391357, "learning_rate": 9.192141830378534e-05, "loss": 1.092, "step": 6652 }, { "epoch": 0.45077579781828037, "grad_norm": 6.294474124908447, "learning_rate": 9.192004928468752e-05, "loss": 0.9322, "step": 6653 }, { "epoch": 0.45084355308625246, "grad_norm": 5.054771423339844, "learning_rate": 9.19186802655897e-05, "loss": 0.7711, "step": 6654 }, { "epoch": 0.45091130835422455, "grad_norm": 7.640350341796875, "learning_rate": 9.191731124649189e-05, "loss": 0.8006, "step": 6655 }, { "epoch": 0.45097906362219664, "grad_norm": 6.9720001220703125, "learning_rate": 9.191594222739408e-05, "loss": 0.7912, "step": 6656 }, { "epoch": 0.45104681889016873, "grad_norm": 7.61221981048584, "learning_rate": 9.191457320829626e-05, "loss": 0.8612, "step": 6657 }, { "epoch": 0.45111457415814077, "grad_norm": 7.7177958488464355, "learning_rate": 9.191320418919844e-05, "loss": 1.0009, "step": 6658 }, { "epoch": 0.45118232942611286, "grad_norm": 6.217735290527344, "learning_rate": 9.191183517010062e-05, "loss": 0.8091, "step": 6659 }, { "epoch": 0.45125008469408495, "grad_norm": 8.2019681930542, "learning_rate": 9.191046615100281e-05, "loss": 1.055, "step": 6660 }, { "epoch": 0.45131783996205704, "grad_norm": 5.853175640106201, "learning_rate": 9.1909097131905e-05, "loss": 1.1045, "step": 6661 }, { "epoch": 0.45138559523002914, "grad_norm": 6.276338577270508, "learning_rate": 9.190772811280717e-05, "loss": 0.9436, "step": 6662 }, { "epoch": 0.4514533504980012, "grad_norm": 11.320722579956055, "learning_rate": 9.190635909370935e-05, "loss": 1.1028, "step": 6663 }, { "epoch": 0.4515211057659733, "grad_norm": 6.311827659606934, "learning_rate": 9.190499007461154e-05, "loss": 0.9232, "step": 6664 }, { "epoch": 0.4515888610339454, "grad_norm": 8.186714172363281, "learning_rate": 9.190362105551373e-05, "loss": 0.7087, "step": 6665 }, { "epoch": 0.4516566163019175, "grad_norm": 6.127712249755859, "learning_rate": 9.190225203641591e-05, "loss": 1.0268, "step": 6666 }, { "epoch": 0.45172437156988954, "grad_norm": 6.840565204620361, "learning_rate": 9.190088301731809e-05, "loss": 0.9955, "step": 6667 }, { "epoch": 0.45179212683786163, "grad_norm": 6.302945137023926, "learning_rate": 9.189951399822027e-05, "loss": 0.9892, "step": 6668 }, { "epoch": 0.4518598821058337, "grad_norm": 9.08460521697998, "learning_rate": 9.189814497912246e-05, "loss": 0.7988, "step": 6669 }, { "epoch": 0.4519276373738058, "grad_norm": 7.497137069702148, "learning_rate": 9.189677596002464e-05, "loss": 0.8358, "step": 6670 }, { "epoch": 0.4519953926417779, "grad_norm": 6.515604496002197, "learning_rate": 9.189540694092682e-05, "loss": 0.7724, "step": 6671 }, { "epoch": 0.45206314790975, "grad_norm": 6.092652320861816, "learning_rate": 9.189403792182902e-05, "loss": 0.7308, "step": 6672 }, { "epoch": 0.4521309031777221, "grad_norm": 8.810023307800293, "learning_rate": 9.18926689027312e-05, "loss": 0.7249, "step": 6673 }, { "epoch": 0.4521986584456942, "grad_norm": 8.328235626220703, "learning_rate": 9.189129988363338e-05, "loss": 0.8115, "step": 6674 }, { "epoch": 0.4522664137136662, "grad_norm": 7.251335620880127, "learning_rate": 9.188993086453557e-05, "loss": 0.9843, "step": 6675 }, { "epoch": 0.4523341689816383, "grad_norm": 8.03663444519043, "learning_rate": 9.188856184543775e-05, "loss": 0.9925, "step": 6676 }, { "epoch": 0.4524019242496104, "grad_norm": 9.7393798828125, "learning_rate": 9.188719282633993e-05, "loss": 0.7575, "step": 6677 }, { "epoch": 0.4524696795175825, "grad_norm": 6.343038558959961, "learning_rate": 9.188582380724211e-05, "loss": 1.0638, "step": 6678 }, { "epoch": 0.4525374347855546, "grad_norm": 7.530363082885742, "learning_rate": 9.188445478814431e-05, "loss": 1.0212, "step": 6679 }, { "epoch": 0.4526051900535267, "grad_norm": 8.741231918334961, "learning_rate": 9.188308576904649e-05, "loss": 1.1723, "step": 6680 }, { "epoch": 0.45267294532149877, "grad_norm": 9.057670593261719, "learning_rate": 9.188171674994867e-05, "loss": 1.0396, "step": 6681 }, { "epoch": 0.45274070058947086, "grad_norm": 6.638017177581787, "learning_rate": 9.188034773085085e-05, "loss": 0.9465, "step": 6682 }, { "epoch": 0.4528084558574429, "grad_norm": 7.643139839172363, "learning_rate": 9.187897871175304e-05, "loss": 0.9346, "step": 6683 }, { "epoch": 0.452876211125415, "grad_norm": 7.176743984222412, "learning_rate": 9.187760969265522e-05, "loss": 1.036, "step": 6684 }, { "epoch": 0.4529439663933871, "grad_norm": 7.506284713745117, "learning_rate": 9.18762406735574e-05, "loss": 0.9487, "step": 6685 }, { "epoch": 0.45301172166135917, "grad_norm": 6.865815162658691, "learning_rate": 9.187487165445958e-05, "loss": 0.8211, "step": 6686 }, { "epoch": 0.45307947692933126, "grad_norm": 6.889796733856201, "learning_rate": 9.187350263536176e-05, "loss": 0.7901, "step": 6687 }, { "epoch": 0.45314723219730335, "grad_norm": 7.016167163848877, "learning_rate": 9.187213361626396e-05, "loss": 0.7699, "step": 6688 }, { "epoch": 0.45321498746527544, "grad_norm": 8.25091552734375, "learning_rate": 9.187076459716614e-05, "loss": 0.9778, "step": 6689 }, { "epoch": 0.45328274273324753, "grad_norm": 7.092465877532959, "learning_rate": 9.186939557806832e-05, "loss": 1.2544, "step": 6690 }, { "epoch": 0.45335049800121957, "grad_norm": 6.402629375457764, "learning_rate": 9.18680265589705e-05, "loss": 0.9225, "step": 6691 }, { "epoch": 0.45341825326919166, "grad_norm": 7.404470920562744, "learning_rate": 9.186665753987268e-05, "loss": 0.7975, "step": 6692 }, { "epoch": 0.45348600853716375, "grad_norm": 5.976271629333496, "learning_rate": 9.186528852077487e-05, "loss": 0.8868, "step": 6693 }, { "epoch": 0.45355376380513585, "grad_norm": 7.843896389007568, "learning_rate": 9.186391950167705e-05, "loss": 0.8034, "step": 6694 }, { "epoch": 0.45362151907310794, "grad_norm": 7.217418193817139, "learning_rate": 9.186255048257923e-05, "loss": 0.6763, "step": 6695 }, { "epoch": 0.45368927434108003, "grad_norm": 6.091420650482178, "learning_rate": 9.186118146348141e-05, "loss": 0.6565, "step": 6696 }, { "epoch": 0.4537570296090521, "grad_norm": 8.277983665466309, "learning_rate": 9.185981244438361e-05, "loss": 0.7589, "step": 6697 }, { "epoch": 0.4538247848770242, "grad_norm": 9.001012802124023, "learning_rate": 9.185844342528579e-05, "loss": 0.8664, "step": 6698 }, { "epoch": 0.45389254014499625, "grad_norm": 6.039417266845703, "learning_rate": 9.185707440618797e-05, "loss": 0.809, "step": 6699 }, { "epoch": 0.45396029541296834, "grad_norm": 6.930225849151611, "learning_rate": 9.185570538709015e-05, "loss": 0.7567, "step": 6700 }, { "epoch": 0.45402805068094043, "grad_norm": 7.135040760040283, "learning_rate": 9.185433636799233e-05, "loss": 0.7669, "step": 6701 }, { "epoch": 0.4540958059489125, "grad_norm": 7.994284152984619, "learning_rate": 9.185296734889452e-05, "loss": 0.7671, "step": 6702 }, { "epoch": 0.4541635612168846, "grad_norm": 6.146820068359375, "learning_rate": 9.18515983297967e-05, "loss": 0.8975, "step": 6703 }, { "epoch": 0.4542313164848567, "grad_norm": 6.619115352630615, "learning_rate": 9.185022931069888e-05, "loss": 0.9972, "step": 6704 }, { "epoch": 0.4542990717528288, "grad_norm": 5.762700080871582, "learning_rate": 9.184886029160106e-05, "loss": 0.6798, "step": 6705 }, { "epoch": 0.4543668270208009, "grad_norm": 7.270383358001709, "learning_rate": 9.184749127250326e-05, "loss": 0.8676, "step": 6706 }, { "epoch": 0.4544345822887729, "grad_norm": 6.667981147766113, "learning_rate": 9.184612225340544e-05, "loss": 0.7988, "step": 6707 }, { "epoch": 0.454502337556745, "grad_norm": 5.513166904449463, "learning_rate": 9.184475323430762e-05, "loss": 0.868, "step": 6708 }, { "epoch": 0.4545700928247171, "grad_norm": 6.376199245452881, "learning_rate": 9.18433842152098e-05, "loss": 0.7859, "step": 6709 }, { "epoch": 0.4546378480926892, "grad_norm": 6.444526672363281, "learning_rate": 9.184201519611198e-05, "loss": 0.7793, "step": 6710 }, { "epoch": 0.4547056033606613, "grad_norm": 8.83858871459961, "learning_rate": 9.184064617701417e-05, "loss": 0.9903, "step": 6711 }, { "epoch": 0.4547733586286334, "grad_norm": 6.07218599319458, "learning_rate": 9.183927715791635e-05, "loss": 0.8349, "step": 6712 }, { "epoch": 0.4548411138966055, "grad_norm": 6.652568340301514, "learning_rate": 9.183790813881853e-05, "loss": 0.8982, "step": 6713 }, { "epoch": 0.45490886916457757, "grad_norm": 7.582399845123291, "learning_rate": 9.183653911972071e-05, "loss": 0.9833, "step": 6714 }, { "epoch": 0.4549766244325496, "grad_norm": 8.411558151245117, "learning_rate": 9.183517010062291e-05, "loss": 0.9433, "step": 6715 }, { "epoch": 0.4550443797005217, "grad_norm": 7.420217037200928, "learning_rate": 9.183380108152509e-05, "loss": 0.9508, "step": 6716 }, { "epoch": 0.4551121349684938, "grad_norm": 7.064800262451172, "learning_rate": 9.183243206242727e-05, "loss": 0.9414, "step": 6717 }, { "epoch": 0.4551798902364659, "grad_norm": 8.102936744689941, "learning_rate": 9.183106304332946e-05, "loss": 0.9061, "step": 6718 }, { "epoch": 0.45524764550443797, "grad_norm": 8.261469841003418, "learning_rate": 9.182969402423164e-05, "loss": 1.2411, "step": 6719 }, { "epoch": 0.45531540077241006, "grad_norm": 7.5003204345703125, "learning_rate": 9.182832500513382e-05, "loss": 0.9296, "step": 6720 }, { "epoch": 0.45538315604038215, "grad_norm": 7.358273029327393, "learning_rate": 9.182695598603602e-05, "loss": 0.8109, "step": 6721 }, { "epoch": 0.45545091130835424, "grad_norm": 7.0917768478393555, "learning_rate": 9.18255869669382e-05, "loss": 0.9517, "step": 6722 }, { "epoch": 0.45551866657632634, "grad_norm": 6.559359550476074, "learning_rate": 9.182421794784038e-05, "loss": 0.8381, "step": 6723 }, { "epoch": 0.4555864218442984, "grad_norm": 6.368768215179443, "learning_rate": 9.182284892874256e-05, "loss": 0.9104, "step": 6724 }, { "epoch": 0.45565417711227046, "grad_norm": 6.576778411865234, "learning_rate": 9.182147990964475e-05, "loss": 0.9116, "step": 6725 }, { "epoch": 0.45572193238024256, "grad_norm": 6.608447551727295, "learning_rate": 9.182011089054693e-05, "loss": 0.9371, "step": 6726 }, { "epoch": 0.45578968764821465, "grad_norm": 6.38028621673584, "learning_rate": 9.181874187144911e-05, "loss": 0.9607, "step": 6727 }, { "epoch": 0.45585744291618674, "grad_norm": 8.349663734436035, "learning_rate": 9.18173728523513e-05, "loss": 0.8739, "step": 6728 }, { "epoch": 0.45592519818415883, "grad_norm": 7.229827880859375, "learning_rate": 9.181600383325349e-05, "loss": 0.6525, "step": 6729 }, { "epoch": 0.4559929534521309, "grad_norm": 7.870299816131592, "learning_rate": 9.181463481415567e-05, "loss": 0.9938, "step": 6730 }, { "epoch": 0.456060708720103, "grad_norm": 7.6586503982543945, "learning_rate": 9.181326579505785e-05, "loss": 0.9489, "step": 6731 }, { "epoch": 0.45612846398807505, "grad_norm": 8.91909122467041, "learning_rate": 9.181189677596003e-05, "loss": 0.9642, "step": 6732 }, { "epoch": 0.45619621925604714, "grad_norm": 8.341778755187988, "learning_rate": 9.181052775686221e-05, "loss": 1.0161, "step": 6733 }, { "epoch": 0.45626397452401923, "grad_norm": 7.269313812255859, "learning_rate": 9.18091587377644e-05, "loss": 0.8535, "step": 6734 }, { "epoch": 0.4563317297919913, "grad_norm": 6.635743141174316, "learning_rate": 9.180778971866658e-05, "loss": 0.6519, "step": 6735 }, { "epoch": 0.4563994850599634, "grad_norm": 7.213603973388672, "learning_rate": 9.180642069956876e-05, "loss": 0.8314, "step": 6736 }, { "epoch": 0.4564672403279355, "grad_norm": 6.491673469543457, "learning_rate": 9.180505168047094e-05, "loss": 0.8691, "step": 6737 }, { "epoch": 0.4565349955959076, "grad_norm": 6.087094783782959, "learning_rate": 9.180368266137314e-05, "loss": 0.8732, "step": 6738 }, { "epoch": 0.4566027508638797, "grad_norm": 6.0057172775268555, "learning_rate": 9.180231364227532e-05, "loss": 0.74, "step": 6739 }, { "epoch": 0.45667050613185173, "grad_norm": 6.74835729598999, "learning_rate": 9.18009446231775e-05, "loss": 0.7628, "step": 6740 }, { "epoch": 0.4567382613998238, "grad_norm": 5.433539867401123, "learning_rate": 9.179957560407968e-05, "loss": 0.6438, "step": 6741 }, { "epoch": 0.4568060166677959, "grad_norm": 7.143089771270752, "learning_rate": 9.179820658498186e-05, "loss": 0.807, "step": 6742 }, { "epoch": 0.456873771935768, "grad_norm": 6.345632076263428, "learning_rate": 9.179683756588405e-05, "loss": 0.8652, "step": 6743 }, { "epoch": 0.4569415272037401, "grad_norm": 6.4180908203125, "learning_rate": 9.179546854678623e-05, "loss": 0.8225, "step": 6744 }, { "epoch": 0.4570092824717122, "grad_norm": 7.375923156738281, "learning_rate": 9.179409952768841e-05, "loss": 0.7922, "step": 6745 }, { "epoch": 0.4570770377396843, "grad_norm": 5.134999752044678, "learning_rate": 9.17927305085906e-05, "loss": 0.8943, "step": 6746 }, { "epoch": 0.45714479300765637, "grad_norm": 8.040843963623047, "learning_rate": 9.179136148949277e-05, "loss": 0.9971, "step": 6747 }, { "epoch": 0.4572125482756284, "grad_norm": 5.302629470825195, "learning_rate": 9.178999247039497e-05, "loss": 0.6811, "step": 6748 }, { "epoch": 0.4572803035436005, "grad_norm": 6.630457878112793, "learning_rate": 9.178862345129715e-05, "loss": 0.7929, "step": 6749 }, { "epoch": 0.4573480588115726, "grad_norm": 5.959926605224609, "learning_rate": 9.178725443219933e-05, "loss": 0.7941, "step": 6750 }, { "epoch": 0.4574158140795447, "grad_norm": 7.855923175811768, "learning_rate": 9.178588541310151e-05, "loss": 0.911, "step": 6751 }, { "epoch": 0.45748356934751677, "grad_norm": 7.775002479553223, "learning_rate": 9.17845163940037e-05, "loss": 0.8315, "step": 6752 }, { "epoch": 0.45755132461548886, "grad_norm": 7.259592056274414, "learning_rate": 9.178314737490588e-05, "loss": 0.8383, "step": 6753 }, { "epoch": 0.45761907988346096, "grad_norm": 8.843462944030762, "learning_rate": 9.178177835580806e-05, "loss": 1.1005, "step": 6754 }, { "epoch": 0.45768683515143305, "grad_norm": 6.322036266326904, "learning_rate": 9.178040933671024e-05, "loss": 0.8553, "step": 6755 }, { "epoch": 0.4577545904194051, "grad_norm": 6.644796848297119, "learning_rate": 9.177904031761242e-05, "loss": 1.0933, "step": 6756 }, { "epoch": 0.4578223456873772, "grad_norm": 7.463156700134277, "learning_rate": 9.177767129851462e-05, "loss": 0.9584, "step": 6757 }, { "epoch": 0.45789010095534927, "grad_norm": 5.70056676864624, "learning_rate": 9.17763022794168e-05, "loss": 0.6945, "step": 6758 }, { "epoch": 0.45795785622332136, "grad_norm": 6.347555637359619, "learning_rate": 9.177493326031898e-05, "loss": 0.9143, "step": 6759 }, { "epoch": 0.45802561149129345, "grad_norm": 6.00151252746582, "learning_rate": 9.177356424122116e-05, "loss": 0.8139, "step": 6760 }, { "epoch": 0.45809336675926554, "grad_norm": 8.02507495880127, "learning_rate": 9.177219522212335e-05, "loss": 0.8941, "step": 6761 }, { "epoch": 0.45816112202723763, "grad_norm": 7.0266876220703125, "learning_rate": 9.177082620302553e-05, "loss": 1.0009, "step": 6762 }, { "epoch": 0.4582288772952097, "grad_norm": 5.483405113220215, "learning_rate": 9.176945718392771e-05, "loss": 0.8113, "step": 6763 }, { "epoch": 0.45829663256318176, "grad_norm": 5.661553382873535, "learning_rate": 9.176808816482991e-05, "loss": 0.7524, "step": 6764 }, { "epoch": 0.45836438783115385, "grad_norm": 7.485569953918457, "learning_rate": 9.176671914573209e-05, "loss": 0.8712, "step": 6765 }, { "epoch": 0.45843214309912594, "grad_norm": 8.262595176696777, "learning_rate": 9.176535012663427e-05, "loss": 0.7777, "step": 6766 }, { "epoch": 0.45849989836709804, "grad_norm": 6.955083847045898, "learning_rate": 9.176398110753646e-05, "loss": 0.8515, "step": 6767 }, { "epoch": 0.4585676536350701, "grad_norm": 5.7011871337890625, "learning_rate": 9.176261208843864e-05, "loss": 0.7977, "step": 6768 }, { "epoch": 0.4586354089030422, "grad_norm": 7.344122886657715, "learning_rate": 9.176124306934082e-05, "loss": 0.877, "step": 6769 }, { "epoch": 0.4587031641710143, "grad_norm": 6.595543384552002, "learning_rate": 9.1759874050243e-05, "loss": 0.7739, "step": 6770 }, { "epoch": 0.4587709194389864, "grad_norm": 6.087022304534912, "learning_rate": 9.17585050311452e-05, "loss": 0.9127, "step": 6771 }, { "epoch": 0.4588386747069585, "grad_norm": 6.13311243057251, "learning_rate": 9.175713601204738e-05, "loss": 0.8803, "step": 6772 }, { "epoch": 0.45890642997493053, "grad_norm": 5.866177558898926, "learning_rate": 9.175576699294956e-05, "loss": 0.9859, "step": 6773 }, { "epoch": 0.4589741852429026, "grad_norm": 7.406826972961426, "learning_rate": 9.175439797385174e-05, "loss": 1.2433, "step": 6774 }, { "epoch": 0.4590419405108747, "grad_norm": 5.886981964111328, "learning_rate": 9.175302895475393e-05, "loss": 0.9994, "step": 6775 }, { "epoch": 0.4591096957788468, "grad_norm": 6.240331172943115, "learning_rate": 9.175165993565611e-05, "loss": 0.6988, "step": 6776 }, { "epoch": 0.4591774510468189, "grad_norm": 6.331019878387451, "learning_rate": 9.175029091655829e-05, "loss": 0.9625, "step": 6777 }, { "epoch": 0.459245206314791, "grad_norm": 6.082772731781006, "learning_rate": 9.174892189746047e-05, "loss": 0.6876, "step": 6778 }, { "epoch": 0.4593129615827631, "grad_norm": 9.173615455627441, "learning_rate": 9.174755287836265e-05, "loss": 0.9572, "step": 6779 }, { "epoch": 0.45938071685073517, "grad_norm": 7.658091068267822, "learning_rate": 9.174618385926485e-05, "loss": 1.0724, "step": 6780 }, { "epoch": 0.4594484721187072, "grad_norm": 5.414113521575928, "learning_rate": 9.174481484016703e-05, "loss": 0.963, "step": 6781 }, { "epoch": 0.4595162273866793, "grad_norm": 8.290900230407715, "learning_rate": 9.174344582106921e-05, "loss": 0.8012, "step": 6782 }, { "epoch": 0.4595839826546514, "grad_norm": 5.778069019317627, "learning_rate": 9.174207680197139e-05, "loss": 0.9199, "step": 6783 }, { "epoch": 0.4596517379226235, "grad_norm": 7.42002010345459, "learning_rate": 9.174070778287358e-05, "loss": 1.022, "step": 6784 }, { "epoch": 0.4597194931905956, "grad_norm": 5.714671611785889, "learning_rate": 9.173933876377576e-05, "loss": 0.8194, "step": 6785 }, { "epoch": 0.45978724845856767, "grad_norm": 6.420266628265381, "learning_rate": 9.173796974467794e-05, "loss": 0.8879, "step": 6786 }, { "epoch": 0.45985500372653976, "grad_norm": 7.046072006225586, "learning_rate": 9.173660072558012e-05, "loss": 0.7672, "step": 6787 }, { "epoch": 0.45992275899451185, "grad_norm": 8.797179222106934, "learning_rate": 9.17352317064823e-05, "loss": 0.7714, "step": 6788 }, { "epoch": 0.4599905142624839, "grad_norm": 6.534037113189697, "learning_rate": 9.17338626873845e-05, "loss": 0.8629, "step": 6789 }, { "epoch": 0.460058269530456, "grad_norm": 7.647995948791504, "learning_rate": 9.173249366828668e-05, "loss": 0.8143, "step": 6790 }, { "epoch": 0.46012602479842807, "grad_norm": 5.4971418380737305, "learning_rate": 9.173112464918886e-05, "loss": 0.7946, "step": 6791 }, { "epoch": 0.46019378006640016, "grad_norm": 6.304740905761719, "learning_rate": 9.172975563009104e-05, "loss": 0.9233, "step": 6792 }, { "epoch": 0.46026153533437225, "grad_norm": 7.571120262145996, "learning_rate": 9.172838661099323e-05, "loss": 0.9919, "step": 6793 }, { "epoch": 0.46032929060234434, "grad_norm": 6.336854457855225, "learning_rate": 9.172701759189541e-05, "loss": 0.7232, "step": 6794 }, { "epoch": 0.46039704587031643, "grad_norm": 6.926676273345947, "learning_rate": 9.17256485727976e-05, "loss": 0.758, "step": 6795 }, { "epoch": 0.4604648011382885, "grad_norm": 5.480643272399902, "learning_rate": 9.172427955369977e-05, "loss": 0.5871, "step": 6796 }, { "epoch": 0.46053255640626056, "grad_norm": 8.286799430847168, "learning_rate": 9.172291053460195e-05, "loss": 0.8574, "step": 6797 }, { "epoch": 0.46060031167423265, "grad_norm": 8.043119430541992, "learning_rate": 9.172154151550415e-05, "loss": 0.6363, "step": 6798 }, { "epoch": 0.46066806694220475, "grad_norm": 8.308424949645996, "learning_rate": 9.172017249640633e-05, "loss": 0.9203, "step": 6799 }, { "epoch": 0.46073582221017684, "grad_norm": 7.131639003753662, "learning_rate": 9.171880347730851e-05, "loss": 0.685, "step": 6800 }, { "epoch": 0.46080357747814893, "grad_norm": 8.397472381591797, "learning_rate": 9.171743445821069e-05, "loss": 0.9637, "step": 6801 }, { "epoch": 0.460871332746121, "grad_norm": 6.666365146636963, "learning_rate": 9.171606543911287e-05, "loss": 1.0735, "step": 6802 }, { "epoch": 0.4609390880140931, "grad_norm": 11.102825164794922, "learning_rate": 9.171469642001506e-05, "loss": 0.8958, "step": 6803 }, { "epoch": 0.4610068432820652, "grad_norm": 7.294391632080078, "learning_rate": 9.171332740091724e-05, "loss": 1.0354, "step": 6804 }, { "epoch": 0.46107459855003724, "grad_norm": 5.9984517097473145, "learning_rate": 9.171195838181942e-05, "loss": 0.605, "step": 6805 }, { "epoch": 0.46114235381800933, "grad_norm": 6.209224224090576, "learning_rate": 9.17105893627216e-05, "loss": 0.8715, "step": 6806 }, { "epoch": 0.4612101090859814, "grad_norm": 5.404207229614258, "learning_rate": 9.17092203436238e-05, "loss": 0.6694, "step": 6807 }, { "epoch": 0.4612778643539535, "grad_norm": 6.675217151641846, "learning_rate": 9.170785132452598e-05, "loss": 0.7288, "step": 6808 }, { "epoch": 0.4613456196219256, "grad_norm": 8.444995880126953, "learning_rate": 9.170648230542816e-05, "loss": 1.0756, "step": 6809 }, { "epoch": 0.4614133748898977, "grad_norm": 8.167703628540039, "learning_rate": 9.170511328633035e-05, "loss": 1.1605, "step": 6810 }, { "epoch": 0.4614811301578698, "grad_norm": 7.55181360244751, "learning_rate": 9.170374426723253e-05, "loss": 0.7805, "step": 6811 }, { "epoch": 0.4615488854258419, "grad_norm": 6.380015850067139, "learning_rate": 9.170237524813471e-05, "loss": 0.6584, "step": 6812 }, { "epoch": 0.4616166406938139, "grad_norm": 6.2454376220703125, "learning_rate": 9.170100622903691e-05, "loss": 0.8212, "step": 6813 }, { "epoch": 0.461684395961786, "grad_norm": 7.530882358551025, "learning_rate": 9.169963720993909e-05, "loss": 0.8926, "step": 6814 }, { "epoch": 0.4617521512297581, "grad_norm": 6.3245158195495605, "learning_rate": 9.169826819084127e-05, "loss": 0.8672, "step": 6815 }, { "epoch": 0.4618199064977302, "grad_norm": 5.822272300720215, "learning_rate": 9.169689917174346e-05, "loss": 0.7972, "step": 6816 }, { "epoch": 0.4618876617657023, "grad_norm": 8.160684585571289, "learning_rate": 9.169553015264564e-05, "loss": 1.0076, "step": 6817 }, { "epoch": 0.4619554170336744, "grad_norm": 6.745192050933838, "learning_rate": 9.169416113354782e-05, "loss": 0.8187, "step": 6818 }, { "epoch": 0.46202317230164647, "grad_norm": 8.530631065368652, "learning_rate": 9.169279211445e-05, "loss": 0.7655, "step": 6819 }, { "epoch": 0.46209092756961856, "grad_norm": 7.213817596435547, "learning_rate": 9.169142309535218e-05, "loss": 0.8737, "step": 6820 }, { "epoch": 0.4621586828375906, "grad_norm": 6.5060577392578125, "learning_rate": 9.169005407625438e-05, "loss": 0.815, "step": 6821 }, { "epoch": 0.4622264381055627, "grad_norm": 6.782070636749268, "learning_rate": 9.168868505715656e-05, "loss": 0.792, "step": 6822 }, { "epoch": 0.4622941933735348, "grad_norm": 6.130987644195557, "learning_rate": 9.168731603805874e-05, "loss": 0.6743, "step": 6823 }, { "epoch": 0.46236194864150687, "grad_norm": 8.178153991699219, "learning_rate": 9.168594701896092e-05, "loss": 1.0958, "step": 6824 }, { "epoch": 0.46242970390947896, "grad_norm": 8.360578536987305, "learning_rate": 9.16845779998631e-05, "loss": 0.9709, "step": 6825 }, { "epoch": 0.46249745917745105, "grad_norm": 7.507253170013428, "learning_rate": 9.168320898076529e-05, "loss": 0.9384, "step": 6826 }, { "epoch": 0.46256521444542315, "grad_norm": 8.285658836364746, "learning_rate": 9.168183996166747e-05, "loss": 0.8913, "step": 6827 }, { "epoch": 0.46263296971339524, "grad_norm": 7.765392780303955, "learning_rate": 9.168047094256965e-05, "loss": 0.8754, "step": 6828 }, { "epoch": 0.46270072498136733, "grad_norm": 7.6219892501831055, "learning_rate": 9.167910192347183e-05, "loss": 0.7401, "step": 6829 }, { "epoch": 0.46276848024933936, "grad_norm": 9.139801979064941, "learning_rate": 9.167773290437403e-05, "loss": 0.8068, "step": 6830 }, { "epoch": 0.46283623551731146, "grad_norm": 7.655275821685791, "learning_rate": 9.167636388527621e-05, "loss": 0.8314, "step": 6831 }, { "epoch": 0.46290399078528355, "grad_norm": 6.699294090270996, "learning_rate": 9.167499486617839e-05, "loss": 0.8665, "step": 6832 }, { "epoch": 0.46297174605325564, "grad_norm": 8.436169624328613, "learning_rate": 9.167362584708057e-05, "loss": 1.3026, "step": 6833 }, { "epoch": 0.46303950132122773, "grad_norm": 5.195062160491943, "learning_rate": 9.167225682798275e-05, "loss": 0.7468, "step": 6834 }, { "epoch": 0.4631072565891998, "grad_norm": 5.665080547332764, "learning_rate": 9.167088780888494e-05, "loss": 0.7357, "step": 6835 }, { "epoch": 0.4631750118571719, "grad_norm": 6.243655681610107, "learning_rate": 9.166951878978712e-05, "loss": 0.7705, "step": 6836 }, { "epoch": 0.463242767125144, "grad_norm": 7.49260950088501, "learning_rate": 9.16681497706893e-05, "loss": 0.8061, "step": 6837 }, { "epoch": 0.46331052239311604, "grad_norm": 7.076335430145264, "learning_rate": 9.166678075159148e-05, "loss": 0.8307, "step": 6838 }, { "epoch": 0.46337827766108813, "grad_norm": 6.332518100738525, "learning_rate": 9.166541173249368e-05, "loss": 0.7431, "step": 6839 }, { "epoch": 0.4634460329290602, "grad_norm": 6.261449813842773, "learning_rate": 9.166404271339586e-05, "loss": 0.8817, "step": 6840 }, { "epoch": 0.4635137881970323, "grad_norm": 7.467769622802734, "learning_rate": 9.166267369429804e-05, "loss": 0.9473, "step": 6841 }, { "epoch": 0.4635815434650044, "grad_norm": 5.55718469619751, "learning_rate": 9.166130467520022e-05, "loss": 0.7726, "step": 6842 }, { "epoch": 0.4636492987329765, "grad_norm": 7.012959003448486, "learning_rate": 9.16599356561024e-05, "loss": 0.9274, "step": 6843 }, { "epoch": 0.4637170540009486, "grad_norm": 6.344860076904297, "learning_rate": 9.165856663700459e-05, "loss": 1.0617, "step": 6844 }, { "epoch": 0.4637848092689207, "grad_norm": 6.024655342102051, "learning_rate": 9.165719761790677e-05, "loss": 0.8224, "step": 6845 }, { "epoch": 0.4638525645368927, "grad_norm": 6.97758674621582, "learning_rate": 9.165582859880895e-05, "loss": 0.8344, "step": 6846 }, { "epoch": 0.4639203198048648, "grad_norm": 6.874304294586182, "learning_rate": 9.165445957971113e-05, "loss": 0.8698, "step": 6847 }, { "epoch": 0.4639880750728369, "grad_norm": 8.15816593170166, "learning_rate": 9.165309056061331e-05, "loss": 1.0457, "step": 6848 }, { "epoch": 0.464055830340809, "grad_norm": 7.304631233215332, "learning_rate": 9.165172154151551e-05, "loss": 0.9027, "step": 6849 }, { "epoch": 0.4641235856087811, "grad_norm": 7.377762317657471, "learning_rate": 9.165035252241769e-05, "loss": 0.6635, "step": 6850 }, { "epoch": 0.4641913408767532, "grad_norm": 6.051864147186279, "learning_rate": 9.164898350331987e-05, "loss": 0.925, "step": 6851 }, { "epoch": 0.46425909614472527, "grad_norm": 6.560476779937744, "learning_rate": 9.164761448422205e-05, "loss": 0.7674, "step": 6852 }, { "epoch": 0.46432685141269736, "grad_norm": 6.828582763671875, "learning_rate": 9.164624546512424e-05, "loss": 0.8159, "step": 6853 }, { "epoch": 0.4643946066806694, "grad_norm": 6.463281631469727, "learning_rate": 9.164487644602642e-05, "loss": 0.6261, "step": 6854 }, { "epoch": 0.4644623619486415, "grad_norm": 6.3371734619140625, "learning_rate": 9.16435074269286e-05, "loss": 0.6489, "step": 6855 }, { "epoch": 0.4645301172166136, "grad_norm": 6.052369117736816, "learning_rate": 9.16421384078308e-05, "loss": 0.6899, "step": 6856 }, { "epoch": 0.46459787248458567, "grad_norm": 6.464377403259277, "learning_rate": 9.164076938873298e-05, "loss": 0.7603, "step": 6857 }, { "epoch": 0.46466562775255776, "grad_norm": 5.9912567138671875, "learning_rate": 9.163940036963516e-05, "loss": 0.7188, "step": 6858 }, { "epoch": 0.46473338302052986, "grad_norm": 8.469727516174316, "learning_rate": 9.163803135053735e-05, "loss": 0.9991, "step": 6859 }, { "epoch": 0.46480113828850195, "grad_norm": 6.261715888977051, "learning_rate": 9.163666233143953e-05, "loss": 0.7429, "step": 6860 }, { "epoch": 0.46486889355647404, "grad_norm": 7.389404296875, "learning_rate": 9.163529331234171e-05, "loss": 0.6294, "step": 6861 }, { "epoch": 0.4649366488244461, "grad_norm": 6.988171100616455, "learning_rate": 9.16339242932439e-05, "loss": 0.8705, "step": 6862 }, { "epoch": 0.46500440409241817, "grad_norm": 6.285641193389893, "learning_rate": 9.163255527414609e-05, "loss": 0.7026, "step": 6863 }, { "epoch": 0.46507215936039026, "grad_norm": 8.77840518951416, "learning_rate": 9.163118625504827e-05, "loss": 0.8766, "step": 6864 }, { "epoch": 0.46513991462836235, "grad_norm": 6.199909210205078, "learning_rate": 9.162981723595045e-05, "loss": 0.7304, "step": 6865 }, { "epoch": 0.46520766989633444, "grad_norm": 6.947317600250244, "learning_rate": 9.162844821685263e-05, "loss": 0.8649, "step": 6866 }, { "epoch": 0.46527542516430653, "grad_norm": 8.396434783935547, "learning_rate": 9.162707919775482e-05, "loss": 1.2161, "step": 6867 }, { "epoch": 0.4653431804322786, "grad_norm": 5.935467720031738, "learning_rate": 9.1625710178657e-05, "loss": 0.8499, "step": 6868 }, { "epoch": 0.4654109357002507, "grad_norm": 6.258296966552734, "learning_rate": 9.162434115955918e-05, "loss": 0.9541, "step": 6869 }, { "epoch": 0.46547869096822275, "grad_norm": 6.29650354385376, "learning_rate": 9.162297214046136e-05, "loss": 0.7774, "step": 6870 }, { "epoch": 0.46554644623619484, "grad_norm": 5.948836803436279, "learning_rate": 9.162160312136356e-05, "loss": 0.7579, "step": 6871 }, { "epoch": 0.46561420150416694, "grad_norm": 5.719659805297852, "learning_rate": 9.162023410226574e-05, "loss": 0.7482, "step": 6872 }, { "epoch": 0.465681956772139, "grad_norm": 6.145468235015869, "learning_rate": 9.161886508316792e-05, "loss": 0.8816, "step": 6873 }, { "epoch": 0.4657497120401111, "grad_norm": 6.33701753616333, "learning_rate": 9.16174960640701e-05, "loss": 0.7617, "step": 6874 }, { "epoch": 0.4658174673080832, "grad_norm": 6.143844127655029, "learning_rate": 9.161612704497228e-05, "loss": 0.9695, "step": 6875 }, { "epoch": 0.4658852225760553, "grad_norm": 6.316319942474365, "learning_rate": 9.161475802587447e-05, "loss": 0.8837, "step": 6876 }, { "epoch": 0.4659529778440274, "grad_norm": 6.68782377243042, "learning_rate": 9.161338900677665e-05, "loss": 0.7324, "step": 6877 }, { "epoch": 0.4660207331119995, "grad_norm": 6.216282844543457, "learning_rate": 9.161201998767883e-05, "loss": 0.7787, "step": 6878 }, { "epoch": 0.4660884883799715, "grad_norm": 6.8673176765441895, "learning_rate": 9.161065096858101e-05, "loss": 1.1118, "step": 6879 }, { "epoch": 0.4661562436479436, "grad_norm": 6.71323823928833, "learning_rate": 9.16092819494832e-05, "loss": 0.9176, "step": 6880 }, { "epoch": 0.4662239989159157, "grad_norm": 8.589679718017578, "learning_rate": 9.160791293038539e-05, "loss": 0.9758, "step": 6881 }, { "epoch": 0.4662917541838878, "grad_norm": 6.692760467529297, "learning_rate": 9.160654391128757e-05, "loss": 0.8867, "step": 6882 }, { "epoch": 0.4663595094518599, "grad_norm": 8.462479591369629, "learning_rate": 9.160517489218975e-05, "loss": 0.9738, "step": 6883 }, { "epoch": 0.466427264719832, "grad_norm": 6.068343162536621, "learning_rate": 9.160380587309193e-05, "loss": 0.6652, "step": 6884 }, { "epoch": 0.46649501998780407, "grad_norm": 6.04793643951416, "learning_rate": 9.160243685399412e-05, "loss": 0.8575, "step": 6885 }, { "epoch": 0.46656277525577616, "grad_norm": 7.8360514640808105, "learning_rate": 9.16010678348963e-05, "loss": 0.8488, "step": 6886 }, { "epoch": 0.4666305305237482, "grad_norm": 6.028532981872559, "learning_rate": 9.159969881579848e-05, "loss": 0.8841, "step": 6887 }, { "epoch": 0.4666982857917203, "grad_norm": 7.673785209655762, "learning_rate": 9.159832979670066e-05, "loss": 0.9035, "step": 6888 }, { "epoch": 0.4667660410596924, "grad_norm": 7.042590618133545, "learning_rate": 9.159696077760284e-05, "loss": 0.8812, "step": 6889 }, { "epoch": 0.4668337963276645, "grad_norm": 6.562109470367432, "learning_rate": 9.159559175850504e-05, "loss": 0.7179, "step": 6890 }, { "epoch": 0.46690155159563657, "grad_norm": 6.083657264709473, "learning_rate": 9.159422273940722e-05, "loss": 0.7322, "step": 6891 }, { "epoch": 0.46696930686360866, "grad_norm": 7.408811569213867, "learning_rate": 9.15928537203094e-05, "loss": 0.9291, "step": 6892 }, { "epoch": 0.46703706213158075, "grad_norm": 6.70425271987915, "learning_rate": 9.159148470121158e-05, "loss": 0.9796, "step": 6893 }, { "epoch": 0.46710481739955284, "grad_norm": 7.486738681793213, "learning_rate": 9.159011568211377e-05, "loss": 1.128, "step": 6894 }, { "epoch": 0.4671725726675249, "grad_norm": 6.747304916381836, "learning_rate": 9.158874666301595e-05, "loss": 0.8432, "step": 6895 }, { "epoch": 0.46724032793549697, "grad_norm": 7.494656085968018, "learning_rate": 9.158737764391813e-05, "loss": 0.9587, "step": 6896 }, { "epoch": 0.46730808320346906, "grad_norm": 5.556826114654541, "learning_rate": 9.158600862482031e-05, "loss": 0.6565, "step": 6897 }, { "epoch": 0.46737583847144115, "grad_norm": 6.550345420837402, "learning_rate": 9.15846396057225e-05, "loss": 0.7854, "step": 6898 }, { "epoch": 0.46744359373941324, "grad_norm": 7.0627899169921875, "learning_rate": 9.158327058662469e-05, "loss": 0.9204, "step": 6899 }, { "epoch": 0.46751134900738534, "grad_norm": 7.1070661544799805, "learning_rate": 9.158190156752687e-05, "loss": 0.9357, "step": 6900 }, { "epoch": 0.4675791042753574, "grad_norm": 7.313487529754639, "learning_rate": 9.158053254842905e-05, "loss": 0.7561, "step": 6901 }, { "epoch": 0.4676468595433295, "grad_norm": 6.839418888092041, "learning_rate": 9.157916352933123e-05, "loss": 0.9551, "step": 6902 }, { "epoch": 0.46771461481130155, "grad_norm": 6.881319999694824, "learning_rate": 9.157779451023342e-05, "loss": 0.8799, "step": 6903 }, { "epoch": 0.46778237007927365, "grad_norm": 7.470109462738037, "learning_rate": 9.15764254911356e-05, "loss": 0.806, "step": 6904 }, { "epoch": 0.46785012534724574, "grad_norm": 7.056912899017334, "learning_rate": 9.157505647203778e-05, "loss": 0.7905, "step": 6905 }, { "epoch": 0.46791788061521783, "grad_norm": 6.295861721038818, "learning_rate": 9.157368745293998e-05, "loss": 0.8256, "step": 6906 }, { "epoch": 0.4679856358831899, "grad_norm": 5.80570650100708, "learning_rate": 9.157231843384216e-05, "loss": 0.7747, "step": 6907 }, { "epoch": 0.468053391151162, "grad_norm": 7.0094218254089355, "learning_rate": 9.157094941474435e-05, "loss": 0.9504, "step": 6908 }, { "epoch": 0.4681211464191341, "grad_norm": 5.334716320037842, "learning_rate": 9.156958039564653e-05, "loss": 0.7608, "step": 6909 }, { "epoch": 0.4681889016871062, "grad_norm": 8.058252334594727, "learning_rate": 9.156821137654871e-05, "loss": 0.889, "step": 6910 }, { "epoch": 0.46825665695507823, "grad_norm": 7.769287586212158, "learning_rate": 9.156684235745089e-05, "loss": 1.0391, "step": 6911 }, { "epoch": 0.4683244122230503, "grad_norm": 5.96143102645874, "learning_rate": 9.156547333835307e-05, "loss": 0.8793, "step": 6912 }, { "epoch": 0.4683921674910224, "grad_norm": 5.6451005935668945, "learning_rate": 9.156410431925527e-05, "loss": 0.7804, "step": 6913 }, { "epoch": 0.4684599227589945, "grad_norm": 7.539672374725342, "learning_rate": 9.156273530015745e-05, "loss": 0.8137, "step": 6914 }, { "epoch": 0.4685276780269666, "grad_norm": 7.447227954864502, "learning_rate": 9.156136628105963e-05, "loss": 1.0692, "step": 6915 }, { "epoch": 0.4685954332949387, "grad_norm": 8.100126266479492, "learning_rate": 9.155999726196181e-05, "loss": 0.8124, "step": 6916 }, { "epoch": 0.4686631885629108, "grad_norm": 8.560744285583496, "learning_rate": 9.1558628242864e-05, "loss": 0.8659, "step": 6917 }, { "epoch": 0.4687309438308829, "grad_norm": 7.135112285614014, "learning_rate": 9.155725922376618e-05, "loss": 0.9009, "step": 6918 }, { "epoch": 0.4687986990988549, "grad_norm": 7.749111175537109, "learning_rate": 9.155589020466836e-05, "loss": 0.9209, "step": 6919 }, { "epoch": 0.468866454366827, "grad_norm": 6.616466999053955, "learning_rate": 9.155452118557054e-05, "loss": 0.9283, "step": 6920 }, { "epoch": 0.4689342096347991, "grad_norm": 6.961619853973389, "learning_rate": 9.155315216647272e-05, "loss": 0.8142, "step": 6921 }, { "epoch": 0.4690019649027712, "grad_norm": 7.225759506225586, "learning_rate": 9.155178314737492e-05, "loss": 0.7626, "step": 6922 }, { "epoch": 0.4690697201707433, "grad_norm": 6.0465922355651855, "learning_rate": 9.15504141282771e-05, "loss": 0.8281, "step": 6923 }, { "epoch": 0.46913747543871537, "grad_norm": 7.507081985473633, "learning_rate": 9.154904510917928e-05, "loss": 1.1642, "step": 6924 }, { "epoch": 0.46920523070668746, "grad_norm": 6.281520843505859, "learning_rate": 9.154767609008146e-05, "loss": 0.8694, "step": 6925 }, { "epoch": 0.46927298597465955, "grad_norm": 6.0980658531188965, "learning_rate": 9.154630707098365e-05, "loss": 0.6983, "step": 6926 }, { "epoch": 0.4693407412426316, "grad_norm": 5.842291355133057, "learning_rate": 9.154493805188583e-05, "loss": 0.8392, "step": 6927 }, { "epoch": 0.4694084965106037, "grad_norm": 7.486947536468506, "learning_rate": 9.154356903278801e-05, "loss": 0.8363, "step": 6928 }, { "epoch": 0.46947625177857577, "grad_norm": 7.279699802398682, "learning_rate": 9.154220001369019e-05, "loss": 0.9697, "step": 6929 }, { "epoch": 0.46954400704654786, "grad_norm": 6.720832347869873, "learning_rate": 9.154083099459237e-05, "loss": 1.0377, "step": 6930 }, { "epoch": 0.46961176231451995, "grad_norm": 9.792084693908691, "learning_rate": 9.153946197549457e-05, "loss": 1.1697, "step": 6931 }, { "epoch": 0.46967951758249205, "grad_norm": 6.329649925231934, "learning_rate": 9.153809295639675e-05, "loss": 0.8825, "step": 6932 }, { "epoch": 0.46974727285046414, "grad_norm": 5.9268269538879395, "learning_rate": 9.153672393729893e-05, "loss": 0.9157, "step": 6933 }, { "epoch": 0.46981502811843623, "grad_norm": 7.7385430335998535, "learning_rate": 9.153535491820111e-05, "loss": 0.7662, "step": 6934 }, { "epoch": 0.4698827833864083, "grad_norm": 6.896132946014404, "learning_rate": 9.153398589910329e-05, "loss": 1.004, "step": 6935 }, { "epoch": 0.46995053865438036, "grad_norm": 6.822011470794678, "learning_rate": 9.153261688000548e-05, "loss": 1.0163, "step": 6936 }, { "epoch": 0.47001829392235245, "grad_norm": 6.4482574462890625, "learning_rate": 9.153124786090766e-05, "loss": 0.8293, "step": 6937 }, { "epoch": 0.47008604919032454, "grad_norm": 6.855703353881836, "learning_rate": 9.152987884180984e-05, "loss": 0.9737, "step": 6938 }, { "epoch": 0.47015380445829663, "grad_norm": 6.508902549743652, "learning_rate": 9.152850982271202e-05, "loss": 1.0892, "step": 6939 }, { "epoch": 0.4702215597262687, "grad_norm": 5.452862739562988, "learning_rate": 9.152714080361422e-05, "loss": 0.803, "step": 6940 }, { "epoch": 0.4702893149942408, "grad_norm": 5.583015441894531, "learning_rate": 9.15257717845164e-05, "loss": 0.6624, "step": 6941 }, { "epoch": 0.4703570702622129, "grad_norm": 7.870943069458008, "learning_rate": 9.152440276541858e-05, "loss": 0.929, "step": 6942 }, { "epoch": 0.470424825530185, "grad_norm": 7.723261833190918, "learning_rate": 9.152303374632076e-05, "loss": 0.8467, "step": 6943 }, { "epoch": 0.47049258079815703, "grad_norm": 8.4433012008667, "learning_rate": 9.152166472722294e-05, "loss": 0.9935, "step": 6944 }, { "epoch": 0.4705603360661291, "grad_norm": 7.142673015594482, "learning_rate": 9.152029570812513e-05, "loss": 0.9821, "step": 6945 }, { "epoch": 0.4706280913341012, "grad_norm": 6.800427436828613, "learning_rate": 9.151892668902731e-05, "loss": 0.7329, "step": 6946 }, { "epoch": 0.4706958466020733, "grad_norm": 6.462594985961914, "learning_rate": 9.151755766992949e-05, "loss": 0.6495, "step": 6947 }, { "epoch": 0.4707636018700454, "grad_norm": 6.2622294425964355, "learning_rate": 9.151618865083167e-05, "loss": 1.0438, "step": 6948 }, { "epoch": 0.4708313571380175, "grad_norm": 7.76660680770874, "learning_rate": 9.151481963173387e-05, "loss": 0.8855, "step": 6949 }, { "epoch": 0.4708991124059896, "grad_norm": 4.535599231719971, "learning_rate": 9.151345061263605e-05, "loss": 0.6823, "step": 6950 }, { "epoch": 0.4709668676739617, "grad_norm": 5.862152099609375, "learning_rate": 9.151208159353823e-05, "loss": 0.7322, "step": 6951 }, { "epoch": 0.4710346229419337, "grad_norm": 6.145107746124268, "learning_rate": 9.151071257444042e-05, "loss": 0.8258, "step": 6952 }, { "epoch": 0.4711023782099058, "grad_norm": 5.889834880828857, "learning_rate": 9.15093435553426e-05, "loss": 0.8009, "step": 6953 }, { "epoch": 0.4711701334778779, "grad_norm": 8.594234466552734, "learning_rate": 9.150797453624478e-05, "loss": 0.8349, "step": 6954 }, { "epoch": 0.47123788874585, "grad_norm": 7.7569379806518555, "learning_rate": 9.150660551714698e-05, "loss": 0.6711, "step": 6955 }, { "epoch": 0.4713056440138221, "grad_norm": 7.4088544845581055, "learning_rate": 9.150523649804916e-05, "loss": 0.8936, "step": 6956 }, { "epoch": 0.47137339928179417, "grad_norm": 6.610263347625732, "learning_rate": 9.150386747895134e-05, "loss": 0.9786, "step": 6957 }, { "epoch": 0.47144115454976626, "grad_norm": 6.46956729888916, "learning_rate": 9.150249845985352e-05, "loss": 0.6781, "step": 6958 }, { "epoch": 0.47150890981773835, "grad_norm": 6.152948379516602, "learning_rate": 9.150112944075571e-05, "loss": 0.8375, "step": 6959 }, { "epoch": 0.4715766650857104, "grad_norm": 6.96013879776001, "learning_rate": 9.149976042165789e-05, "loss": 0.8629, "step": 6960 }, { "epoch": 0.4716444203536825, "grad_norm": 6.767139434814453, "learning_rate": 9.149839140256007e-05, "loss": 0.8043, "step": 6961 }, { "epoch": 0.4717121756216546, "grad_norm": 7.95654296875, "learning_rate": 9.149702238346225e-05, "loss": 1.0179, "step": 6962 }, { "epoch": 0.47177993088962666, "grad_norm": 8.155875205993652, "learning_rate": 9.149565336436445e-05, "loss": 1.0663, "step": 6963 }, { "epoch": 0.47184768615759876, "grad_norm": 7.903263092041016, "learning_rate": 9.149428434526663e-05, "loss": 1.0214, "step": 6964 }, { "epoch": 0.47191544142557085, "grad_norm": 7.510760307312012, "learning_rate": 9.149291532616881e-05, "loss": 0.9942, "step": 6965 }, { "epoch": 0.47198319669354294, "grad_norm": 5.814423084259033, "learning_rate": 9.149154630707099e-05, "loss": 0.7677, "step": 6966 }, { "epoch": 0.47205095196151503, "grad_norm": 8.29617977142334, "learning_rate": 9.149017728797317e-05, "loss": 0.8268, "step": 6967 }, { "epoch": 0.47211870722948707, "grad_norm": 7.393543720245361, "learning_rate": 9.148880826887536e-05, "loss": 0.9067, "step": 6968 }, { "epoch": 0.47218646249745916, "grad_norm": 7.611250400543213, "learning_rate": 9.148743924977754e-05, "loss": 0.8908, "step": 6969 }, { "epoch": 0.47225421776543125, "grad_norm": 9.330535888671875, "learning_rate": 9.148607023067972e-05, "loss": 0.6368, "step": 6970 }, { "epoch": 0.47232197303340334, "grad_norm": 7.130900859832764, "learning_rate": 9.14847012115819e-05, "loss": 0.8157, "step": 6971 }, { "epoch": 0.47238972830137543, "grad_norm": 6.899352550506592, "learning_rate": 9.14833321924841e-05, "loss": 0.7058, "step": 6972 }, { "epoch": 0.4724574835693475, "grad_norm": 7.850022792816162, "learning_rate": 9.148196317338628e-05, "loss": 0.9622, "step": 6973 }, { "epoch": 0.4725252388373196, "grad_norm": 8.331214904785156, "learning_rate": 9.148059415428846e-05, "loss": 0.9366, "step": 6974 }, { "epoch": 0.4725929941052917, "grad_norm": 6.377279758453369, "learning_rate": 9.147922513519064e-05, "loss": 0.8701, "step": 6975 }, { "epoch": 0.47266074937326374, "grad_norm": 6.650668621063232, "learning_rate": 9.147785611609282e-05, "loss": 0.8066, "step": 6976 }, { "epoch": 0.47272850464123584, "grad_norm": 7.406231880187988, "learning_rate": 9.147648709699501e-05, "loss": 0.8833, "step": 6977 }, { "epoch": 0.4727962599092079, "grad_norm": 6.31017541885376, "learning_rate": 9.147511807789719e-05, "loss": 0.8001, "step": 6978 }, { "epoch": 0.47286401517718, "grad_norm": 6.4131927490234375, "learning_rate": 9.147374905879937e-05, "loss": 0.77, "step": 6979 }, { "epoch": 0.4729317704451521, "grad_norm": 9.2667236328125, "learning_rate": 9.147238003970155e-05, "loss": 0.9891, "step": 6980 }, { "epoch": 0.4729995257131242, "grad_norm": 7.107274055480957, "learning_rate": 9.147101102060373e-05, "loss": 1.0087, "step": 6981 }, { "epoch": 0.4730672809810963, "grad_norm": 8.102210998535156, "learning_rate": 9.146964200150593e-05, "loss": 0.7827, "step": 6982 }, { "epoch": 0.4731350362490684, "grad_norm": 6.7619099617004395, "learning_rate": 9.146827298240811e-05, "loss": 0.7432, "step": 6983 }, { "epoch": 0.4732027915170405, "grad_norm": 7.551081657409668, "learning_rate": 9.146690396331029e-05, "loss": 0.6776, "step": 6984 }, { "epoch": 0.4732705467850125, "grad_norm": 6.923361301422119, "learning_rate": 9.146553494421247e-05, "loss": 0.962, "step": 6985 }, { "epoch": 0.4733383020529846, "grad_norm": 6.971046447753906, "learning_rate": 9.146416592511466e-05, "loss": 0.9481, "step": 6986 }, { "epoch": 0.4734060573209567, "grad_norm": 7.046440601348877, "learning_rate": 9.146279690601684e-05, "loss": 1.0244, "step": 6987 }, { "epoch": 0.4734738125889288, "grad_norm": 6.281298637390137, "learning_rate": 9.146142788691902e-05, "loss": 0.8104, "step": 6988 }, { "epoch": 0.4735415678569009, "grad_norm": 10.22514820098877, "learning_rate": 9.14600588678212e-05, "loss": 1.1109, "step": 6989 }, { "epoch": 0.47360932312487297, "grad_norm": 6.466033458709717, "learning_rate": 9.145868984872338e-05, "loss": 0.9219, "step": 6990 }, { "epoch": 0.47367707839284506, "grad_norm": 7.837368011474609, "learning_rate": 9.145732082962558e-05, "loss": 1.0033, "step": 6991 }, { "epoch": 0.47374483366081716, "grad_norm": 7.501054286956787, "learning_rate": 9.145595181052776e-05, "loss": 0.919, "step": 6992 }, { "epoch": 0.4738125889287892, "grad_norm": 6.243696212768555, "learning_rate": 9.145458279142994e-05, "loss": 0.7736, "step": 6993 }, { "epoch": 0.4738803441967613, "grad_norm": 7.519147872924805, "learning_rate": 9.145321377233212e-05, "loss": 0.8014, "step": 6994 }, { "epoch": 0.4739480994647334, "grad_norm": 5.793912887573242, "learning_rate": 9.145184475323431e-05, "loss": 0.7508, "step": 6995 }, { "epoch": 0.47401585473270547, "grad_norm": 7.050177097320557, "learning_rate": 9.145047573413649e-05, "loss": 0.8174, "step": 6996 }, { "epoch": 0.47408361000067756, "grad_norm": 5.979698657989502, "learning_rate": 9.144910671503867e-05, "loss": 0.9812, "step": 6997 }, { "epoch": 0.47415136526864965, "grad_norm": 7.254084587097168, "learning_rate": 9.144773769594087e-05, "loss": 0.8015, "step": 6998 }, { "epoch": 0.47421912053662174, "grad_norm": 8.168086051940918, "learning_rate": 9.144636867684305e-05, "loss": 0.8107, "step": 6999 }, { "epoch": 0.47428687580459383, "grad_norm": 6.693539619445801, "learning_rate": 9.144499965774523e-05, "loss": 0.7048, "step": 7000 }, { "epoch": 0.47435463107256587, "grad_norm": 7.216420650482178, "learning_rate": 9.144363063864742e-05, "loss": 0.7434, "step": 7001 }, { "epoch": 0.47442238634053796, "grad_norm": 8.014084815979004, "learning_rate": 9.14422616195496e-05, "loss": 1.1213, "step": 7002 }, { "epoch": 0.47449014160851005, "grad_norm": 6.1906938552856445, "learning_rate": 9.144089260045178e-05, "loss": 0.8188, "step": 7003 }, { "epoch": 0.47455789687648214, "grad_norm": 8.70464038848877, "learning_rate": 9.143952358135398e-05, "loss": 1.0273, "step": 7004 }, { "epoch": 0.47462565214445424, "grad_norm": 5.303441524505615, "learning_rate": 9.143815456225616e-05, "loss": 0.6782, "step": 7005 }, { "epoch": 0.4746934074124263, "grad_norm": 8.176512718200684, "learning_rate": 9.143678554315834e-05, "loss": 0.7853, "step": 7006 }, { "epoch": 0.4747611626803984, "grad_norm": 6.074409008026123, "learning_rate": 9.143541652406052e-05, "loss": 0.9088, "step": 7007 }, { "epoch": 0.4748289179483705, "grad_norm": 7.32485294342041, "learning_rate": 9.14340475049627e-05, "loss": 1.0558, "step": 7008 }, { "epoch": 0.47489667321634255, "grad_norm": 5.4144463539123535, "learning_rate": 9.143267848586489e-05, "loss": 0.9064, "step": 7009 }, { "epoch": 0.47496442848431464, "grad_norm": 7.717291355133057, "learning_rate": 9.143130946676707e-05, "loss": 0.9129, "step": 7010 }, { "epoch": 0.47503218375228673, "grad_norm": 7.902177333831787, "learning_rate": 9.142994044766925e-05, "loss": 1.1144, "step": 7011 }, { "epoch": 0.4750999390202588, "grad_norm": 6.94300651550293, "learning_rate": 9.142857142857143e-05, "loss": 0.7982, "step": 7012 }, { "epoch": 0.4751676942882309, "grad_norm": 7.08957052230835, "learning_rate": 9.142720240947361e-05, "loss": 1.009, "step": 7013 }, { "epoch": 0.475235449556203, "grad_norm": 5.148087024688721, "learning_rate": 9.14258333903758e-05, "loss": 0.7443, "step": 7014 }, { "epoch": 0.4753032048241751, "grad_norm": 6.155871868133545, "learning_rate": 9.142446437127799e-05, "loss": 0.8135, "step": 7015 }, { "epoch": 0.4753709600921472, "grad_norm": 6.569172382354736, "learning_rate": 9.142309535218017e-05, "loss": 0.7684, "step": 7016 }, { "epoch": 0.4754387153601192, "grad_norm": 9.110980033874512, "learning_rate": 9.142172633308235e-05, "loss": 1.0382, "step": 7017 }, { "epoch": 0.4755064706280913, "grad_norm": 6.317762851715088, "learning_rate": 9.142035731398454e-05, "loss": 0.7206, "step": 7018 }, { "epoch": 0.4755742258960634, "grad_norm": 7.054732799530029, "learning_rate": 9.141898829488672e-05, "loss": 0.7442, "step": 7019 }, { "epoch": 0.4756419811640355, "grad_norm": 8.904619216918945, "learning_rate": 9.14176192757889e-05, "loss": 1.1686, "step": 7020 }, { "epoch": 0.4757097364320076, "grad_norm": 7.206265926361084, "learning_rate": 9.141625025669108e-05, "loss": 0.8675, "step": 7021 }, { "epoch": 0.4757774916999797, "grad_norm": 7.7408318519592285, "learning_rate": 9.141488123759326e-05, "loss": 0.9132, "step": 7022 }, { "epoch": 0.4758452469679518, "grad_norm": 5.807632923126221, "learning_rate": 9.141351221849546e-05, "loss": 0.8356, "step": 7023 }, { "epoch": 0.47591300223592387, "grad_norm": 8.07009506225586, "learning_rate": 9.141214319939764e-05, "loss": 1.1522, "step": 7024 }, { "epoch": 0.4759807575038959, "grad_norm": 7.409401893615723, "learning_rate": 9.141077418029982e-05, "loss": 0.7972, "step": 7025 }, { "epoch": 0.476048512771868, "grad_norm": 7.14201545715332, "learning_rate": 9.1409405161202e-05, "loss": 0.9284, "step": 7026 }, { "epoch": 0.4761162680398401, "grad_norm": 6.279862880706787, "learning_rate": 9.140803614210419e-05, "loss": 0.7995, "step": 7027 }, { "epoch": 0.4761840233078122, "grad_norm": 6.493180274963379, "learning_rate": 9.140666712300637e-05, "loss": 0.8848, "step": 7028 }, { "epoch": 0.47625177857578427, "grad_norm": 8.528377532958984, "learning_rate": 9.140529810390855e-05, "loss": 0.9142, "step": 7029 }, { "epoch": 0.47631953384375636, "grad_norm": 6.636556625366211, "learning_rate": 9.140392908481073e-05, "loss": 0.6841, "step": 7030 }, { "epoch": 0.47638728911172845, "grad_norm": 7.340085983276367, "learning_rate": 9.140256006571291e-05, "loss": 1.0198, "step": 7031 }, { "epoch": 0.47645504437970054, "grad_norm": 7.510453224182129, "learning_rate": 9.14011910466151e-05, "loss": 0.8581, "step": 7032 }, { "epoch": 0.4765227996476726, "grad_norm": 5.910886287689209, "learning_rate": 9.139982202751729e-05, "loss": 0.7947, "step": 7033 }, { "epoch": 0.47659055491564467, "grad_norm": 6.257016181945801, "learning_rate": 9.139845300841947e-05, "loss": 0.8759, "step": 7034 }, { "epoch": 0.47665831018361676, "grad_norm": 7.840462684631348, "learning_rate": 9.139708398932165e-05, "loss": 1.1975, "step": 7035 }, { "epoch": 0.47672606545158885, "grad_norm": 9.107316970825195, "learning_rate": 9.139571497022383e-05, "loss": 0.8321, "step": 7036 }, { "epoch": 0.47679382071956095, "grad_norm": 5.630661964416504, "learning_rate": 9.139434595112602e-05, "loss": 0.7302, "step": 7037 }, { "epoch": 0.47686157598753304, "grad_norm": 6.390323162078857, "learning_rate": 9.13929769320282e-05, "loss": 0.862, "step": 7038 }, { "epoch": 0.47692933125550513, "grad_norm": 8.85464096069336, "learning_rate": 9.139160791293038e-05, "loss": 0.9283, "step": 7039 }, { "epoch": 0.4769970865234772, "grad_norm": 6.312126159667969, "learning_rate": 9.139023889383256e-05, "loss": 0.933, "step": 7040 }, { "epoch": 0.4770648417914493, "grad_norm": 6.027670383453369, "learning_rate": 9.138886987473476e-05, "loss": 0.8221, "step": 7041 }, { "epoch": 0.47713259705942135, "grad_norm": 8.296350479125977, "learning_rate": 9.138750085563694e-05, "loss": 0.8621, "step": 7042 }, { "epoch": 0.47720035232739344, "grad_norm": 8.337299346923828, "learning_rate": 9.138613183653912e-05, "loss": 0.9126, "step": 7043 }, { "epoch": 0.47726810759536553, "grad_norm": 7.545529842376709, "learning_rate": 9.138476281744131e-05, "loss": 1.1327, "step": 7044 }, { "epoch": 0.4773358628633376, "grad_norm": 5.35225248336792, "learning_rate": 9.138339379834349e-05, "loss": 0.7311, "step": 7045 }, { "epoch": 0.4774036181313097, "grad_norm": 7.592402458190918, "learning_rate": 9.138202477924567e-05, "loss": 0.9959, "step": 7046 }, { "epoch": 0.4774713733992818, "grad_norm": 8.010600090026855, "learning_rate": 9.138065576014787e-05, "loss": 0.7788, "step": 7047 }, { "epoch": 0.4775391286672539, "grad_norm": 7.170941352844238, "learning_rate": 9.137928674105005e-05, "loss": 0.9218, "step": 7048 }, { "epoch": 0.477606883935226, "grad_norm": 6.852916240692139, "learning_rate": 9.137791772195223e-05, "loss": 0.7614, "step": 7049 }, { "epoch": 0.477674639203198, "grad_norm": 8.727351188659668, "learning_rate": 9.137654870285442e-05, "loss": 0.8449, "step": 7050 }, { "epoch": 0.4777423944711701, "grad_norm": 6.188234329223633, "learning_rate": 9.13751796837566e-05, "loss": 0.7001, "step": 7051 }, { "epoch": 0.4778101497391422, "grad_norm": 6.4075398445129395, "learning_rate": 9.137381066465878e-05, "loss": 0.9363, "step": 7052 }, { "epoch": 0.4778779050071143, "grad_norm": 7.744530200958252, "learning_rate": 9.137244164556096e-05, "loss": 0.9253, "step": 7053 }, { "epoch": 0.4779456602750864, "grad_norm": 6.745645999908447, "learning_rate": 9.137107262646314e-05, "loss": 0.9649, "step": 7054 }, { "epoch": 0.4780134155430585, "grad_norm": 8.560202598571777, "learning_rate": 9.136970360736534e-05, "loss": 1.2453, "step": 7055 }, { "epoch": 0.4780811708110306, "grad_norm": 6.941448211669922, "learning_rate": 9.136833458826752e-05, "loss": 0.8577, "step": 7056 }, { "epoch": 0.47814892607900267, "grad_norm": 6.4724626541137695, "learning_rate": 9.13669655691697e-05, "loss": 0.9304, "step": 7057 }, { "epoch": 0.4782166813469747, "grad_norm": 6.535644054412842, "learning_rate": 9.136559655007188e-05, "loss": 0.8323, "step": 7058 }, { "epoch": 0.4782844366149468, "grad_norm": 6.4093217849731445, "learning_rate": 9.136422753097407e-05, "loss": 0.9816, "step": 7059 }, { "epoch": 0.4783521918829189, "grad_norm": 6.645406246185303, "learning_rate": 9.136285851187625e-05, "loss": 0.9507, "step": 7060 }, { "epoch": 0.478419947150891, "grad_norm": 8.383099555969238, "learning_rate": 9.136148949277843e-05, "loss": 1.0339, "step": 7061 }, { "epoch": 0.47848770241886307, "grad_norm": 6.430543422698975, "learning_rate": 9.136012047368061e-05, "loss": 0.755, "step": 7062 }, { "epoch": 0.47855545768683516, "grad_norm": 6.5899176597595215, "learning_rate": 9.135875145458279e-05, "loss": 1.0841, "step": 7063 }, { "epoch": 0.47862321295480725, "grad_norm": 9.340789794921875, "learning_rate": 9.135738243548499e-05, "loss": 0.7539, "step": 7064 }, { "epoch": 0.47869096822277934, "grad_norm": 7.300266742706299, "learning_rate": 9.135601341638717e-05, "loss": 0.9049, "step": 7065 }, { "epoch": 0.4787587234907514, "grad_norm": 6.495701313018799, "learning_rate": 9.135464439728935e-05, "loss": 0.9168, "step": 7066 }, { "epoch": 0.4788264787587235, "grad_norm": 8.14792537689209, "learning_rate": 9.135327537819153e-05, "loss": 0.7678, "step": 7067 }, { "epoch": 0.47889423402669556, "grad_norm": 5.638490200042725, "learning_rate": 9.135190635909371e-05, "loss": 0.8914, "step": 7068 }, { "epoch": 0.47896198929466766, "grad_norm": 7.043061256408691, "learning_rate": 9.13505373399959e-05, "loss": 0.9575, "step": 7069 }, { "epoch": 0.47902974456263975, "grad_norm": 7.867429256439209, "learning_rate": 9.134916832089808e-05, "loss": 0.9887, "step": 7070 }, { "epoch": 0.47909749983061184, "grad_norm": 5.737957000732422, "learning_rate": 9.134779930180026e-05, "loss": 0.8605, "step": 7071 }, { "epoch": 0.47916525509858393, "grad_norm": 9.436585426330566, "learning_rate": 9.134643028270244e-05, "loss": 1.0482, "step": 7072 }, { "epoch": 0.479233010366556, "grad_norm": 6.28998327255249, "learning_rate": 9.134506126360464e-05, "loss": 1.0235, "step": 7073 }, { "epoch": 0.47930076563452806, "grad_norm": 5.121227741241455, "learning_rate": 9.134369224450682e-05, "loss": 0.8735, "step": 7074 }, { "epoch": 0.47936852090250015, "grad_norm": 5.818413734436035, "learning_rate": 9.1342323225409e-05, "loss": 0.738, "step": 7075 }, { "epoch": 0.47943627617047224, "grad_norm": 6.716359615325928, "learning_rate": 9.134095420631118e-05, "loss": 0.8825, "step": 7076 }, { "epoch": 0.47950403143844433, "grad_norm": 5.944983959197998, "learning_rate": 9.133958518721336e-05, "loss": 0.9145, "step": 7077 }, { "epoch": 0.4795717867064164, "grad_norm": 6.65972375869751, "learning_rate": 9.133821616811555e-05, "loss": 0.7746, "step": 7078 }, { "epoch": 0.4796395419743885, "grad_norm": 7.595485210418701, "learning_rate": 9.133684714901773e-05, "loss": 0.817, "step": 7079 }, { "epoch": 0.4797072972423606, "grad_norm": 5.7008280754089355, "learning_rate": 9.133547812991991e-05, "loss": 0.7996, "step": 7080 }, { "epoch": 0.4797750525103327, "grad_norm": 6.7002668380737305, "learning_rate": 9.133410911082209e-05, "loss": 1.0676, "step": 7081 }, { "epoch": 0.47984280777830474, "grad_norm": 8.43227481842041, "learning_rate": 9.133274009172429e-05, "loss": 0.9844, "step": 7082 }, { "epoch": 0.47991056304627683, "grad_norm": 5.9677653312683105, "learning_rate": 9.133137107262647e-05, "loss": 0.9158, "step": 7083 }, { "epoch": 0.4799783183142489, "grad_norm": 7.774659156799316, "learning_rate": 9.133000205352865e-05, "loss": 0.6829, "step": 7084 }, { "epoch": 0.480046073582221, "grad_norm": 7.344854354858398, "learning_rate": 9.132863303443083e-05, "loss": 0.8579, "step": 7085 }, { "epoch": 0.4801138288501931, "grad_norm": 6.4308624267578125, "learning_rate": 9.132726401533301e-05, "loss": 0.936, "step": 7086 }, { "epoch": 0.4801815841181652, "grad_norm": 7.600352764129639, "learning_rate": 9.13258949962352e-05, "loss": 0.8561, "step": 7087 }, { "epoch": 0.4802493393861373, "grad_norm": 9.475528717041016, "learning_rate": 9.132452597713738e-05, "loss": 0.9684, "step": 7088 }, { "epoch": 0.4803170946541094, "grad_norm": 5.927639007568359, "learning_rate": 9.132315695803956e-05, "loss": 0.7472, "step": 7089 }, { "epoch": 0.48038484992208147, "grad_norm": 7.304515838623047, "learning_rate": 9.132178793894176e-05, "loss": 0.8983, "step": 7090 }, { "epoch": 0.4804526051900535, "grad_norm": 7.6303391456604, "learning_rate": 9.132041891984394e-05, "loss": 0.864, "step": 7091 }, { "epoch": 0.4805203604580256, "grad_norm": 6.243314266204834, "learning_rate": 9.131904990074612e-05, "loss": 0.8448, "step": 7092 }, { "epoch": 0.4805881157259977, "grad_norm": 6.947970867156982, "learning_rate": 9.131768088164831e-05, "loss": 0.7853, "step": 7093 }, { "epoch": 0.4806558709939698, "grad_norm": 5.687379837036133, "learning_rate": 9.131631186255049e-05, "loss": 0.838, "step": 7094 }, { "epoch": 0.48072362626194187, "grad_norm": 6.477471351623535, "learning_rate": 9.131494284345267e-05, "loss": 0.9849, "step": 7095 }, { "epoch": 0.48079138152991396, "grad_norm": 6.526174068450928, "learning_rate": 9.131357382435487e-05, "loss": 0.9269, "step": 7096 }, { "epoch": 0.48085913679788606, "grad_norm": 7.404792308807373, "learning_rate": 9.131220480525705e-05, "loss": 0.8257, "step": 7097 }, { "epoch": 0.48092689206585815, "grad_norm": 7.290988445281982, "learning_rate": 9.131083578615923e-05, "loss": 1.0275, "step": 7098 }, { "epoch": 0.4809946473338302, "grad_norm": 7.744154930114746, "learning_rate": 9.13094667670614e-05, "loss": 0.9692, "step": 7099 }, { "epoch": 0.4810624026018023, "grad_norm": 5.7665252685546875, "learning_rate": 9.130809774796359e-05, "loss": 0.6751, "step": 7100 }, { "epoch": 0.48113015786977437, "grad_norm": 6.921631813049316, "learning_rate": 9.130672872886578e-05, "loss": 0.8771, "step": 7101 }, { "epoch": 0.48119791313774646, "grad_norm": 6.097098350524902, "learning_rate": 9.130535970976796e-05, "loss": 0.737, "step": 7102 }, { "epoch": 0.48126566840571855, "grad_norm": 7.192615032196045, "learning_rate": 9.130399069067014e-05, "loss": 0.9283, "step": 7103 }, { "epoch": 0.48133342367369064, "grad_norm": 6.464249610900879, "learning_rate": 9.130262167157232e-05, "loss": 1.0467, "step": 7104 }, { "epoch": 0.48140117894166273, "grad_norm": 4.616031646728516, "learning_rate": 9.130125265247452e-05, "loss": 0.5657, "step": 7105 }, { "epoch": 0.4814689342096348, "grad_norm": 6.356307029724121, "learning_rate": 9.12998836333767e-05, "loss": 0.722, "step": 7106 }, { "epoch": 0.48153668947760686, "grad_norm": 6.398674011230469, "learning_rate": 9.129851461427888e-05, "loss": 1.066, "step": 7107 }, { "epoch": 0.48160444474557895, "grad_norm": 7.015667915344238, "learning_rate": 9.129714559518106e-05, "loss": 0.8681, "step": 7108 }, { "epoch": 0.48167220001355104, "grad_norm": 8.212510108947754, "learning_rate": 9.129577657608324e-05, "loss": 0.8158, "step": 7109 }, { "epoch": 0.48173995528152314, "grad_norm": 8.409659385681152, "learning_rate": 9.129440755698543e-05, "loss": 0.7391, "step": 7110 }, { "epoch": 0.4818077105494952, "grad_norm": 7.42771053314209, "learning_rate": 9.129303853788761e-05, "loss": 0.9876, "step": 7111 }, { "epoch": 0.4818754658174673, "grad_norm": 7.816039562225342, "learning_rate": 9.129166951878979e-05, "loss": 0.9601, "step": 7112 }, { "epoch": 0.4819432210854394, "grad_norm": 8.761859893798828, "learning_rate": 9.129030049969197e-05, "loss": 0.6283, "step": 7113 }, { "epoch": 0.4820109763534115, "grad_norm": 6.981362342834473, "learning_rate": 9.128893148059415e-05, "loss": 0.9901, "step": 7114 }, { "epoch": 0.48207873162138354, "grad_norm": 7.718024253845215, "learning_rate": 9.128756246149635e-05, "loss": 0.8318, "step": 7115 }, { "epoch": 0.48214648688935563, "grad_norm": 8.191770553588867, "learning_rate": 9.128619344239853e-05, "loss": 0.9108, "step": 7116 }, { "epoch": 0.4822142421573277, "grad_norm": 8.095576286315918, "learning_rate": 9.12848244233007e-05, "loss": 0.8404, "step": 7117 }, { "epoch": 0.4822819974252998, "grad_norm": 6.66763162612915, "learning_rate": 9.128345540420289e-05, "loss": 0.6538, "step": 7118 }, { "epoch": 0.4823497526932719, "grad_norm": 7.253853797912598, "learning_rate": 9.128208638510508e-05, "loss": 0.8893, "step": 7119 }, { "epoch": 0.482417507961244, "grad_norm": 8.162705421447754, "learning_rate": 9.128071736600726e-05, "loss": 0.9602, "step": 7120 }, { "epoch": 0.4824852632292161, "grad_norm": 7.548867225646973, "learning_rate": 9.127934834690944e-05, "loss": 0.8701, "step": 7121 }, { "epoch": 0.4825530184971882, "grad_norm": 5.984834671020508, "learning_rate": 9.127797932781162e-05, "loss": 1.0416, "step": 7122 }, { "epoch": 0.4826207737651602, "grad_norm": 8.716156005859375, "learning_rate": 9.12766103087138e-05, "loss": 1.016, "step": 7123 }, { "epoch": 0.4826885290331323, "grad_norm": 6.2188873291015625, "learning_rate": 9.1275241289616e-05, "loss": 0.7147, "step": 7124 }, { "epoch": 0.4827562843011044, "grad_norm": 10.026150703430176, "learning_rate": 9.127387227051818e-05, "loss": 0.9221, "step": 7125 }, { "epoch": 0.4828240395690765, "grad_norm": 7.320329666137695, "learning_rate": 9.127250325142036e-05, "loss": 0.8486, "step": 7126 }, { "epoch": 0.4828917948370486, "grad_norm": 6.134348392486572, "learning_rate": 9.127113423232254e-05, "loss": 0.7383, "step": 7127 }, { "epoch": 0.4829595501050207, "grad_norm": 7.2880778312683105, "learning_rate": 9.126976521322473e-05, "loss": 1.059, "step": 7128 }, { "epoch": 0.48302730537299277, "grad_norm": 6.100679874420166, "learning_rate": 9.126839619412691e-05, "loss": 0.8673, "step": 7129 }, { "epoch": 0.48309506064096486, "grad_norm": 6.899023056030273, "learning_rate": 9.126702717502909e-05, "loss": 1.1572, "step": 7130 }, { "epoch": 0.4831628159089369, "grad_norm": 6.187694549560547, "learning_rate": 9.126565815593127e-05, "loss": 0.9584, "step": 7131 }, { "epoch": 0.483230571176909, "grad_norm": 7.093903064727783, "learning_rate": 9.126428913683345e-05, "loss": 0.969, "step": 7132 }, { "epoch": 0.4832983264448811, "grad_norm": 6.003389835357666, "learning_rate": 9.126292011773565e-05, "loss": 0.7638, "step": 7133 }, { "epoch": 0.48336608171285317, "grad_norm": 5.557130336761475, "learning_rate": 9.126155109863783e-05, "loss": 0.8529, "step": 7134 }, { "epoch": 0.48343383698082526, "grad_norm": 6.029399871826172, "learning_rate": 9.126018207954001e-05, "loss": 0.6359, "step": 7135 }, { "epoch": 0.48350159224879735, "grad_norm": 6.123723030090332, "learning_rate": 9.12588130604422e-05, "loss": 0.9462, "step": 7136 }, { "epoch": 0.48356934751676944, "grad_norm": 6.213245868682861, "learning_rate": 9.125744404134438e-05, "loss": 0.8322, "step": 7137 }, { "epoch": 0.48363710278474153, "grad_norm": 7.509876251220703, "learning_rate": 9.125607502224656e-05, "loss": 1.0556, "step": 7138 }, { "epoch": 0.48370485805271357, "grad_norm": 6.355532646179199, "learning_rate": 9.125470600314876e-05, "loss": 0.9281, "step": 7139 }, { "epoch": 0.48377261332068566, "grad_norm": 7.302781105041504, "learning_rate": 9.125333698405094e-05, "loss": 0.846, "step": 7140 }, { "epoch": 0.48384036858865775, "grad_norm": 7.081716537475586, "learning_rate": 9.125196796495312e-05, "loss": 0.8571, "step": 7141 }, { "epoch": 0.48390812385662985, "grad_norm": 7.652805328369141, "learning_rate": 9.125059894585531e-05, "loss": 0.7986, "step": 7142 }, { "epoch": 0.48397587912460194, "grad_norm": 5.906263828277588, "learning_rate": 9.124922992675749e-05, "loss": 0.8058, "step": 7143 }, { "epoch": 0.48404363439257403, "grad_norm": 7.398087024688721, "learning_rate": 9.124786090765967e-05, "loss": 0.8961, "step": 7144 }, { "epoch": 0.4841113896605461, "grad_norm": 9.616337776184082, "learning_rate": 9.124649188856185e-05, "loss": 0.9403, "step": 7145 }, { "epoch": 0.4841791449285182, "grad_norm": 7.74692440032959, "learning_rate": 9.124512286946403e-05, "loss": 0.7659, "step": 7146 }, { "epoch": 0.4842469001964903, "grad_norm": 6.067378520965576, "learning_rate": 9.124375385036623e-05, "loss": 0.8094, "step": 7147 }, { "epoch": 0.48431465546446234, "grad_norm": 7.943274974822998, "learning_rate": 9.12423848312684e-05, "loss": 0.8103, "step": 7148 }, { "epoch": 0.48438241073243443, "grad_norm": 7.710971355438232, "learning_rate": 9.124101581217059e-05, "loss": 1.0818, "step": 7149 }, { "epoch": 0.4844501660004065, "grad_norm": 6.904791831970215, "learning_rate": 9.123964679307277e-05, "loss": 0.9413, "step": 7150 }, { "epoch": 0.4845179212683786, "grad_norm": 8.46650505065918, "learning_rate": 9.123827777397496e-05, "loss": 1.1661, "step": 7151 }, { "epoch": 0.4845856765363507, "grad_norm": 7.021801471710205, "learning_rate": 9.123690875487714e-05, "loss": 0.7305, "step": 7152 }, { "epoch": 0.4846534318043228, "grad_norm": 7.553677082061768, "learning_rate": 9.123553973577932e-05, "loss": 0.9806, "step": 7153 }, { "epoch": 0.4847211870722949, "grad_norm": 6.204870700836182, "learning_rate": 9.12341707166815e-05, "loss": 0.8433, "step": 7154 }, { "epoch": 0.484788942340267, "grad_norm": 7.81880521774292, "learning_rate": 9.123280169758368e-05, "loss": 1.112, "step": 7155 }, { "epoch": 0.484856697608239, "grad_norm": 7.029433727264404, "learning_rate": 9.123143267848588e-05, "loss": 0.8068, "step": 7156 }, { "epoch": 0.4849244528762111, "grad_norm": 6.795009136199951, "learning_rate": 9.123006365938806e-05, "loss": 0.766, "step": 7157 }, { "epoch": 0.4849922081441832, "grad_norm": 5.924415588378906, "learning_rate": 9.122869464029024e-05, "loss": 0.7914, "step": 7158 }, { "epoch": 0.4850599634121553, "grad_norm": 5.561947345733643, "learning_rate": 9.122732562119242e-05, "loss": 1.1486, "step": 7159 }, { "epoch": 0.4851277186801274, "grad_norm": 7.202826976776123, "learning_rate": 9.122595660209461e-05, "loss": 0.8802, "step": 7160 }, { "epoch": 0.4851954739480995, "grad_norm": 6.435755729675293, "learning_rate": 9.122458758299679e-05, "loss": 0.888, "step": 7161 }, { "epoch": 0.48526322921607157, "grad_norm": 6.197578430175781, "learning_rate": 9.122321856389897e-05, "loss": 0.9139, "step": 7162 }, { "epoch": 0.48533098448404366, "grad_norm": 6.596435070037842, "learning_rate": 9.122184954480115e-05, "loss": 0.7802, "step": 7163 }, { "epoch": 0.4853987397520157, "grad_norm": 7.858447551727295, "learning_rate": 9.122048052570333e-05, "loss": 0.8759, "step": 7164 }, { "epoch": 0.4854664950199878, "grad_norm": 6.002086162567139, "learning_rate": 9.121911150660553e-05, "loss": 0.9621, "step": 7165 }, { "epoch": 0.4855342502879599, "grad_norm": 5.917041301727295, "learning_rate": 9.12177424875077e-05, "loss": 0.8144, "step": 7166 }, { "epoch": 0.48560200555593197, "grad_norm": 6.202271461486816, "learning_rate": 9.121637346840989e-05, "loss": 0.9937, "step": 7167 }, { "epoch": 0.48566976082390406, "grad_norm": 6.635425567626953, "learning_rate": 9.121500444931207e-05, "loss": 0.8103, "step": 7168 }, { "epoch": 0.48573751609187615, "grad_norm": 7.288759708404541, "learning_rate": 9.121363543021425e-05, "loss": 0.8354, "step": 7169 }, { "epoch": 0.48580527135984825, "grad_norm": 6.015995979309082, "learning_rate": 9.121226641111644e-05, "loss": 1.0732, "step": 7170 }, { "epoch": 0.48587302662782034, "grad_norm": 6.693684101104736, "learning_rate": 9.121089739201862e-05, "loss": 0.9792, "step": 7171 }, { "epoch": 0.4859407818957924, "grad_norm": 7.2573561668396, "learning_rate": 9.12095283729208e-05, "loss": 0.8823, "step": 7172 }, { "epoch": 0.48600853716376446, "grad_norm": 6.1881585121154785, "learning_rate": 9.120815935382298e-05, "loss": 0.8397, "step": 7173 }, { "epoch": 0.48607629243173656, "grad_norm": 7.354151725769043, "learning_rate": 9.120679033472518e-05, "loss": 1.0145, "step": 7174 }, { "epoch": 0.48614404769970865, "grad_norm": 6.806859016418457, "learning_rate": 9.120542131562736e-05, "loss": 0.9289, "step": 7175 }, { "epoch": 0.48621180296768074, "grad_norm": 6.098382949829102, "learning_rate": 9.120405229652954e-05, "loss": 0.9819, "step": 7176 }, { "epoch": 0.48627955823565283, "grad_norm": 6.944835186004639, "learning_rate": 9.120268327743172e-05, "loss": 0.8004, "step": 7177 }, { "epoch": 0.4863473135036249, "grad_norm": 7.9429497718811035, "learning_rate": 9.12013142583339e-05, "loss": 0.9473, "step": 7178 }, { "epoch": 0.486415068771597, "grad_norm": 6.831770420074463, "learning_rate": 9.119994523923609e-05, "loss": 0.8866, "step": 7179 }, { "epoch": 0.48648282403956905, "grad_norm": 7.992087364196777, "learning_rate": 9.119857622013827e-05, "loss": 0.6405, "step": 7180 }, { "epoch": 0.48655057930754114, "grad_norm": 7.081966400146484, "learning_rate": 9.119720720104045e-05, "loss": 0.9166, "step": 7181 }, { "epoch": 0.48661833457551323, "grad_norm": 8.257608413696289, "learning_rate": 9.119583818194263e-05, "loss": 0.7556, "step": 7182 }, { "epoch": 0.4866860898434853, "grad_norm": 6.714028835296631, "learning_rate": 9.119446916284483e-05, "loss": 0.784, "step": 7183 }, { "epoch": 0.4867538451114574, "grad_norm": 5.71464204788208, "learning_rate": 9.1193100143747e-05, "loss": 0.815, "step": 7184 }, { "epoch": 0.4868216003794295, "grad_norm": 8.290979385375977, "learning_rate": 9.119173112464919e-05, "loss": 0.822, "step": 7185 }, { "epoch": 0.4868893556474016, "grad_norm": 6.116361141204834, "learning_rate": 9.119036210555138e-05, "loss": 0.7727, "step": 7186 }, { "epoch": 0.4869571109153737, "grad_norm": 5.137567043304443, "learning_rate": 9.118899308645356e-05, "loss": 0.4916, "step": 7187 }, { "epoch": 0.48702486618334573, "grad_norm": 6.987879753112793, "learning_rate": 9.118762406735574e-05, "loss": 1.0662, "step": 7188 }, { "epoch": 0.4870926214513178, "grad_norm": 7.223506927490234, "learning_rate": 9.118625504825794e-05, "loss": 0.7408, "step": 7189 }, { "epoch": 0.4871603767192899, "grad_norm": 7.493766784667969, "learning_rate": 9.118488602916012e-05, "loss": 0.9625, "step": 7190 }, { "epoch": 0.487228131987262, "grad_norm": 7.265352725982666, "learning_rate": 9.11835170100623e-05, "loss": 0.9228, "step": 7191 }, { "epoch": 0.4872958872552341, "grad_norm": 6.04194450378418, "learning_rate": 9.118214799096449e-05, "loss": 0.8081, "step": 7192 }, { "epoch": 0.4873636425232062, "grad_norm": 7.033185958862305, "learning_rate": 9.118077897186667e-05, "loss": 1.0709, "step": 7193 }, { "epoch": 0.4874313977911783, "grad_norm": 5.993305683135986, "learning_rate": 9.117940995276885e-05, "loss": 0.8222, "step": 7194 }, { "epoch": 0.48749915305915037, "grad_norm": 6.695589542388916, "learning_rate": 9.117804093367103e-05, "loss": 0.7981, "step": 7195 }, { "epoch": 0.48756690832712246, "grad_norm": 6.6938157081604, "learning_rate": 9.117667191457321e-05, "loss": 0.8485, "step": 7196 }, { "epoch": 0.4876346635950945, "grad_norm": 6.760074138641357, "learning_rate": 9.11753028954754e-05, "loss": 0.6706, "step": 7197 }, { "epoch": 0.4877024188630666, "grad_norm": 6.397393703460693, "learning_rate": 9.117393387637759e-05, "loss": 0.8247, "step": 7198 }, { "epoch": 0.4877701741310387, "grad_norm": 7.138194561004639, "learning_rate": 9.117256485727977e-05, "loss": 1.0124, "step": 7199 }, { "epoch": 0.4878379293990108, "grad_norm": 6.4646172523498535, "learning_rate": 9.117119583818195e-05, "loss": 1.0531, "step": 7200 }, { "epoch": 0.48790568466698286, "grad_norm": 6.58534574508667, "learning_rate": 9.116982681908413e-05, "loss": 0.962, "step": 7201 }, { "epoch": 0.48797343993495496, "grad_norm": 8.237184524536133, "learning_rate": 9.116845779998632e-05, "loss": 1.0363, "step": 7202 }, { "epoch": 0.48804119520292705, "grad_norm": 9.047645568847656, "learning_rate": 9.11670887808885e-05, "loss": 0.8925, "step": 7203 }, { "epoch": 0.48810895047089914, "grad_norm": 5.87882137298584, "learning_rate": 9.116571976179068e-05, "loss": 0.7843, "step": 7204 }, { "epoch": 0.4881767057388712, "grad_norm": 7.792137622833252, "learning_rate": 9.116435074269286e-05, "loss": 0.6999, "step": 7205 }, { "epoch": 0.48824446100684327, "grad_norm": 6.028510570526123, "learning_rate": 9.116298172359506e-05, "loss": 0.9233, "step": 7206 }, { "epoch": 0.48831221627481536, "grad_norm": 6.219117164611816, "learning_rate": 9.116161270449724e-05, "loss": 0.5807, "step": 7207 }, { "epoch": 0.48837997154278745, "grad_norm": 5.99801778793335, "learning_rate": 9.116024368539942e-05, "loss": 0.9267, "step": 7208 }, { "epoch": 0.48844772681075954, "grad_norm": 7.163285255432129, "learning_rate": 9.11588746663016e-05, "loss": 0.8695, "step": 7209 }, { "epoch": 0.48851548207873163, "grad_norm": 5.209384441375732, "learning_rate": 9.115750564720378e-05, "loss": 0.8395, "step": 7210 }, { "epoch": 0.4885832373467037, "grad_norm": 5.942741394042969, "learning_rate": 9.115613662810597e-05, "loss": 0.8593, "step": 7211 }, { "epoch": 0.4886509926146758, "grad_norm": 6.742303848266602, "learning_rate": 9.115476760900815e-05, "loss": 0.7964, "step": 7212 }, { "epoch": 0.48871874788264785, "grad_norm": 6.951894283294678, "learning_rate": 9.115339858991033e-05, "loss": 0.9285, "step": 7213 }, { "epoch": 0.48878650315061994, "grad_norm": 5.752067565917969, "learning_rate": 9.115202957081251e-05, "loss": 0.6053, "step": 7214 }, { "epoch": 0.48885425841859204, "grad_norm": 6.7097883224487305, "learning_rate": 9.11506605517147e-05, "loss": 0.82, "step": 7215 }, { "epoch": 0.4889220136865641, "grad_norm": 6.396644592285156, "learning_rate": 9.114929153261689e-05, "loss": 0.9438, "step": 7216 }, { "epoch": 0.4889897689545362, "grad_norm": 6.378931999206543, "learning_rate": 9.114792251351907e-05, "loss": 0.9635, "step": 7217 }, { "epoch": 0.4890575242225083, "grad_norm": 5.802820682525635, "learning_rate": 9.114655349442125e-05, "loss": 0.7166, "step": 7218 }, { "epoch": 0.4891252794904804, "grad_norm": 6.493535041809082, "learning_rate": 9.114518447532343e-05, "loss": 0.985, "step": 7219 }, { "epoch": 0.4891930347584525, "grad_norm": 7.591537952423096, "learning_rate": 9.114381545622562e-05, "loss": 0.8348, "step": 7220 }, { "epoch": 0.48926079002642453, "grad_norm": 6.379971027374268, "learning_rate": 9.11424464371278e-05, "loss": 0.9761, "step": 7221 }, { "epoch": 0.4893285452943966, "grad_norm": 9.187169075012207, "learning_rate": 9.114107741802998e-05, "loss": 1.1654, "step": 7222 }, { "epoch": 0.4893963005623687, "grad_norm": 6.582739353179932, "learning_rate": 9.113970839893216e-05, "loss": 0.8921, "step": 7223 }, { "epoch": 0.4894640558303408, "grad_norm": 5.6152544021606445, "learning_rate": 9.113833937983434e-05, "loss": 0.7498, "step": 7224 }, { "epoch": 0.4895318110983129, "grad_norm": 6.960738182067871, "learning_rate": 9.113697036073654e-05, "loss": 0.7443, "step": 7225 }, { "epoch": 0.489599566366285, "grad_norm": 6.515749454498291, "learning_rate": 9.113560134163872e-05, "loss": 0.903, "step": 7226 }, { "epoch": 0.4896673216342571, "grad_norm": 7.327613830566406, "learning_rate": 9.11342323225409e-05, "loss": 0.8484, "step": 7227 }, { "epoch": 0.48973507690222917, "grad_norm": 6.575616359710693, "learning_rate": 9.113286330344308e-05, "loss": 1.001, "step": 7228 }, { "epoch": 0.4898028321702012, "grad_norm": 6.429412364959717, "learning_rate": 9.113149428434527e-05, "loss": 0.8028, "step": 7229 }, { "epoch": 0.4898705874381733, "grad_norm": 6.13348388671875, "learning_rate": 9.113012526524745e-05, "loss": 0.8966, "step": 7230 }, { "epoch": 0.4899383427061454, "grad_norm": 5.246626377105713, "learning_rate": 9.112875624614963e-05, "loss": 0.7548, "step": 7231 }, { "epoch": 0.4900060979741175, "grad_norm": 8.197700500488281, "learning_rate": 9.112738722705183e-05, "loss": 0.9122, "step": 7232 }, { "epoch": 0.4900738532420896, "grad_norm": 7.987671375274658, "learning_rate": 9.1126018207954e-05, "loss": 1.0089, "step": 7233 }, { "epoch": 0.49014160851006167, "grad_norm": 6.8674211502075195, "learning_rate": 9.112464918885619e-05, "loss": 0.7244, "step": 7234 }, { "epoch": 0.49020936377803376, "grad_norm": 9.719727516174316, "learning_rate": 9.112328016975838e-05, "loss": 0.9079, "step": 7235 }, { "epoch": 0.49027711904600585, "grad_norm": 6.982578277587891, "learning_rate": 9.112191115066056e-05, "loss": 0.7876, "step": 7236 }, { "epoch": 0.4903448743139779, "grad_norm": 5.029915809631348, "learning_rate": 9.112054213156274e-05, "loss": 0.7574, "step": 7237 }, { "epoch": 0.49041262958195, "grad_norm": 8.605433464050293, "learning_rate": 9.111917311246493e-05, "loss": 0.9255, "step": 7238 }, { "epoch": 0.49048038484992207, "grad_norm": 7.910008907318115, "learning_rate": 9.111780409336711e-05, "loss": 0.9514, "step": 7239 }, { "epoch": 0.49054814011789416, "grad_norm": 6.401332855224609, "learning_rate": 9.11164350742693e-05, "loss": 0.9243, "step": 7240 }, { "epoch": 0.49061589538586625, "grad_norm": 6.722992420196533, "learning_rate": 9.111506605517148e-05, "loss": 0.7595, "step": 7241 }, { "epoch": 0.49068365065383834, "grad_norm": 6.60951566696167, "learning_rate": 9.111369703607366e-05, "loss": 0.9381, "step": 7242 }, { "epoch": 0.49075140592181044, "grad_norm": 7.443787574768066, "learning_rate": 9.111232801697585e-05, "loss": 1.0037, "step": 7243 }, { "epoch": 0.4908191611897825, "grad_norm": 7.43011999130249, "learning_rate": 9.111095899787803e-05, "loss": 0.8213, "step": 7244 }, { "epoch": 0.49088691645775456, "grad_norm": 8.865147590637207, "learning_rate": 9.110958997878021e-05, "loss": 0.9547, "step": 7245 }, { "epoch": 0.49095467172572665, "grad_norm": 5.83010721206665, "learning_rate": 9.110822095968239e-05, "loss": 0.7253, "step": 7246 }, { "epoch": 0.49102242699369875, "grad_norm": 6.1638946533203125, "learning_rate": 9.110685194058458e-05, "loss": 0.7643, "step": 7247 }, { "epoch": 0.49109018226167084, "grad_norm": 6.534294128417969, "learning_rate": 9.110548292148677e-05, "loss": 0.9102, "step": 7248 }, { "epoch": 0.49115793752964293, "grad_norm": 8.244972229003906, "learning_rate": 9.110411390238895e-05, "loss": 0.9615, "step": 7249 }, { "epoch": 0.491225692797615, "grad_norm": 7.7991461753845215, "learning_rate": 9.110274488329113e-05, "loss": 0.9697, "step": 7250 }, { "epoch": 0.4912934480655871, "grad_norm": 7.489588737487793, "learning_rate": 9.11013758641933e-05, "loss": 0.7339, "step": 7251 }, { "epoch": 0.4913612033335592, "grad_norm": 6.112767696380615, "learning_rate": 9.11000068450955e-05, "loss": 1.1147, "step": 7252 }, { "epoch": 0.4914289586015313, "grad_norm": 6.319901943206787, "learning_rate": 9.109863782599768e-05, "loss": 0.9777, "step": 7253 }, { "epoch": 0.49149671386950333, "grad_norm": 7.175682544708252, "learning_rate": 9.109726880689986e-05, "loss": 0.7403, "step": 7254 }, { "epoch": 0.4915644691374754, "grad_norm": 7.254464149475098, "learning_rate": 9.109589978780204e-05, "loss": 0.8533, "step": 7255 }, { "epoch": 0.4916322244054475, "grad_norm": 6.266726493835449, "learning_rate": 9.109453076870422e-05, "loss": 0.8648, "step": 7256 }, { "epoch": 0.4916999796734196, "grad_norm": 7.290742874145508, "learning_rate": 9.109316174960642e-05, "loss": 0.7274, "step": 7257 }, { "epoch": 0.4917677349413917, "grad_norm": 6.882185459136963, "learning_rate": 9.10917927305086e-05, "loss": 0.9184, "step": 7258 }, { "epoch": 0.4918354902093638, "grad_norm": 6.796192169189453, "learning_rate": 9.109042371141078e-05, "loss": 0.9734, "step": 7259 }, { "epoch": 0.4919032454773359, "grad_norm": 7.461274147033691, "learning_rate": 9.108905469231296e-05, "loss": 0.8931, "step": 7260 }, { "epoch": 0.491971000745308, "grad_norm": 6.522415637969971, "learning_rate": 9.108768567321515e-05, "loss": 0.7529, "step": 7261 }, { "epoch": 0.49203875601328, "grad_norm": 7.108310699462891, "learning_rate": 9.108631665411733e-05, "loss": 0.7551, "step": 7262 }, { "epoch": 0.4921065112812521, "grad_norm": 6.65360164642334, "learning_rate": 9.108494763501951e-05, "loss": 1.1178, "step": 7263 }, { "epoch": 0.4921742665492242, "grad_norm": 8.197813034057617, "learning_rate": 9.108357861592169e-05, "loss": 1.2079, "step": 7264 }, { "epoch": 0.4922420218171963, "grad_norm": 6.898741245269775, "learning_rate": 9.108220959682387e-05, "loss": 0.7838, "step": 7265 }, { "epoch": 0.4923097770851684, "grad_norm": 7.363327980041504, "learning_rate": 9.108084057772607e-05, "loss": 0.763, "step": 7266 }, { "epoch": 0.49237753235314047, "grad_norm": 7.18175745010376, "learning_rate": 9.107947155862825e-05, "loss": 0.7809, "step": 7267 }, { "epoch": 0.49244528762111256, "grad_norm": 5.595573902130127, "learning_rate": 9.107810253953043e-05, "loss": 0.8471, "step": 7268 }, { "epoch": 0.49251304288908465, "grad_norm": 6.333422660827637, "learning_rate": 9.10767335204326e-05, "loss": 0.7616, "step": 7269 }, { "epoch": 0.4925807981570567, "grad_norm": 8.157796859741211, "learning_rate": 9.10753645013348e-05, "loss": 0.8233, "step": 7270 }, { "epoch": 0.4926485534250288, "grad_norm": 6.123366832733154, "learning_rate": 9.107399548223698e-05, "loss": 0.7266, "step": 7271 }, { "epoch": 0.49271630869300087, "grad_norm": 10.229715347290039, "learning_rate": 9.107262646313916e-05, "loss": 0.9589, "step": 7272 }, { "epoch": 0.49278406396097296, "grad_norm": 5.360836505889893, "learning_rate": 9.107125744404134e-05, "loss": 0.7874, "step": 7273 }, { "epoch": 0.49285181922894505, "grad_norm": 6.273800373077393, "learning_rate": 9.106988842494352e-05, "loss": 0.9512, "step": 7274 }, { "epoch": 0.49291957449691715, "grad_norm": 7.902069568634033, "learning_rate": 9.106851940584572e-05, "loss": 0.799, "step": 7275 }, { "epoch": 0.49298732976488924, "grad_norm": 9.35932731628418, "learning_rate": 9.10671503867479e-05, "loss": 0.8973, "step": 7276 }, { "epoch": 0.49305508503286133, "grad_norm": 7.119052410125732, "learning_rate": 9.106578136765008e-05, "loss": 1.2001, "step": 7277 }, { "epoch": 0.49312284030083336, "grad_norm": 6.111217498779297, "learning_rate": 9.106441234855227e-05, "loss": 0.7685, "step": 7278 }, { "epoch": 0.49319059556880546, "grad_norm": 6.093493461608887, "learning_rate": 9.106304332945445e-05, "loss": 0.7097, "step": 7279 }, { "epoch": 0.49325835083677755, "grad_norm": 6.722117900848389, "learning_rate": 9.106167431035663e-05, "loss": 0.805, "step": 7280 }, { "epoch": 0.49332610610474964, "grad_norm": 6.489585876464844, "learning_rate": 9.106030529125882e-05, "loss": 0.9748, "step": 7281 }, { "epoch": 0.49339386137272173, "grad_norm": 6.1473236083984375, "learning_rate": 9.1058936272161e-05, "loss": 0.8299, "step": 7282 }, { "epoch": 0.4934616166406938, "grad_norm": 7.472615718841553, "learning_rate": 9.105756725306319e-05, "loss": 1.0012, "step": 7283 }, { "epoch": 0.4935293719086659, "grad_norm": 7.1405463218688965, "learning_rate": 9.105619823396538e-05, "loss": 0.9959, "step": 7284 }, { "epoch": 0.493597127176638, "grad_norm": 6.21019172668457, "learning_rate": 9.105482921486756e-05, "loss": 0.867, "step": 7285 }, { "epoch": 0.49366488244461004, "grad_norm": 6.836954593658447, "learning_rate": 9.105346019576974e-05, "loss": 0.8058, "step": 7286 }, { "epoch": 0.49373263771258213, "grad_norm": 6.4547319412231445, "learning_rate": 9.105209117667192e-05, "loss": 0.8028, "step": 7287 }, { "epoch": 0.4938003929805542, "grad_norm": 6.335334300994873, "learning_rate": 9.10507221575741e-05, "loss": 0.763, "step": 7288 }, { "epoch": 0.4938681482485263, "grad_norm": 7.21290397644043, "learning_rate": 9.10493531384763e-05, "loss": 0.9477, "step": 7289 }, { "epoch": 0.4939359035164984, "grad_norm": 7.856054782867432, "learning_rate": 9.104798411937847e-05, "loss": 1.1117, "step": 7290 }, { "epoch": 0.4940036587844705, "grad_norm": 6.308975696563721, "learning_rate": 9.104661510028066e-05, "loss": 1.0397, "step": 7291 }, { "epoch": 0.4940714140524426, "grad_norm": 6.111830711364746, "learning_rate": 9.104524608118284e-05, "loss": 0.8082, "step": 7292 }, { "epoch": 0.4941391693204147, "grad_norm": 7.741870403289795, "learning_rate": 9.104387706208503e-05, "loss": 0.804, "step": 7293 }, { "epoch": 0.4942069245883867, "grad_norm": 7.447502613067627, "learning_rate": 9.104250804298721e-05, "loss": 0.9074, "step": 7294 }, { "epoch": 0.4942746798563588, "grad_norm": 4.931535243988037, "learning_rate": 9.104113902388939e-05, "loss": 0.7519, "step": 7295 }, { "epoch": 0.4943424351243309, "grad_norm": 9.830883026123047, "learning_rate": 9.103977000479157e-05, "loss": 0.8659, "step": 7296 }, { "epoch": 0.494410190392303, "grad_norm": 6.182522296905518, "learning_rate": 9.103840098569375e-05, "loss": 0.7021, "step": 7297 }, { "epoch": 0.4944779456602751, "grad_norm": 8.73188591003418, "learning_rate": 9.103703196659594e-05, "loss": 0.8496, "step": 7298 }, { "epoch": 0.4945457009282472, "grad_norm": 7.2238640785217285, "learning_rate": 9.103566294749813e-05, "loss": 1.0778, "step": 7299 }, { "epoch": 0.49461345619621927, "grad_norm": 6.101573944091797, "learning_rate": 9.10342939284003e-05, "loss": 0.9264, "step": 7300 }, { "epoch": 0.49468121146419136, "grad_norm": 7.327548503875732, "learning_rate": 9.103292490930249e-05, "loss": 0.7973, "step": 7301 }, { "epoch": 0.49474896673216345, "grad_norm": 7.1809186935424805, "learning_rate": 9.103155589020467e-05, "loss": 0.9586, "step": 7302 }, { "epoch": 0.4948167220001355, "grad_norm": 5.164478778839111, "learning_rate": 9.103018687110686e-05, "loss": 0.6197, "step": 7303 }, { "epoch": 0.4948844772681076, "grad_norm": 6.387687683105469, "learning_rate": 9.102881785200904e-05, "loss": 0.9509, "step": 7304 }, { "epoch": 0.4949522325360797, "grad_norm": 8.202160835266113, "learning_rate": 9.102744883291122e-05, "loss": 0.8557, "step": 7305 }, { "epoch": 0.49501998780405176, "grad_norm": 6.164126873016357, "learning_rate": 9.10260798138134e-05, "loss": 0.7898, "step": 7306 }, { "epoch": 0.49508774307202386, "grad_norm": 6.448176383972168, "learning_rate": 9.10247107947156e-05, "loss": 0.9094, "step": 7307 }, { "epoch": 0.49515549833999595, "grad_norm": 6.582845687866211, "learning_rate": 9.102334177561778e-05, "loss": 0.8778, "step": 7308 }, { "epoch": 0.49522325360796804, "grad_norm": 7.6885552406311035, "learning_rate": 9.102197275651996e-05, "loss": 1.0229, "step": 7309 }, { "epoch": 0.49529100887594013, "grad_norm": 6.840595722198486, "learning_rate": 9.102060373742214e-05, "loss": 0.972, "step": 7310 }, { "epoch": 0.49535876414391217, "grad_norm": 6.486509323120117, "learning_rate": 9.101923471832432e-05, "loss": 0.8788, "step": 7311 }, { "epoch": 0.49542651941188426, "grad_norm": 9.052862167358398, "learning_rate": 9.101786569922651e-05, "loss": 0.8917, "step": 7312 }, { "epoch": 0.49549427467985635, "grad_norm": 5.528444766998291, "learning_rate": 9.101649668012869e-05, "loss": 0.6167, "step": 7313 }, { "epoch": 0.49556202994782844, "grad_norm": 6.7654032707214355, "learning_rate": 9.101512766103087e-05, "loss": 0.9848, "step": 7314 }, { "epoch": 0.49562978521580053, "grad_norm": 6.247506618499756, "learning_rate": 9.101375864193305e-05, "loss": 0.5868, "step": 7315 }, { "epoch": 0.4956975404837726, "grad_norm": 6.155362129211426, "learning_rate": 9.101238962283525e-05, "loss": 0.8344, "step": 7316 }, { "epoch": 0.4957652957517447, "grad_norm": 8.219099998474121, "learning_rate": 9.101102060373743e-05, "loss": 0.8839, "step": 7317 }, { "epoch": 0.4958330510197168, "grad_norm": 5.146651744842529, "learning_rate": 9.10096515846396e-05, "loss": 0.7872, "step": 7318 }, { "epoch": 0.49590080628768884, "grad_norm": 7.989886283874512, "learning_rate": 9.100828256554179e-05, "loss": 1.0485, "step": 7319 }, { "epoch": 0.49596856155566094, "grad_norm": 7.214312553405762, "learning_rate": 9.100691354644397e-05, "loss": 0.9304, "step": 7320 }, { "epoch": 0.49603631682363303, "grad_norm": 6.856055736541748, "learning_rate": 9.100554452734616e-05, "loss": 0.8934, "step": 7321 }, { "epoch": 0.4961040720916051, "grad_norm": 6.771759986877441, "learning_rate": 9.100417550824834e-05, "loss": 0.9349, "step": 7322 }, { "epoch": 0.4961718273595772, "grad_norm": 8.19810962677002, "learning_rate": 9.100280648915052e-05, "loss": 1.0005, "step": 7323 }, { "epoch": 0.4962395826275493, "grad_norm": 6.946470737457275, "learning_rate": 9.100143747005271e-05, "loss": 0.7505, "step": 7324 }, { "epoch": 0.4963073378955214, "grad_norm": 6.6737518310546875, "learning_rate": 9.10000684509549e-05, "loss": 0.8722, "step": 7325 }, { "epoch": 0.4963750931634935, "grad_norm": 5.637021064758301, "learning_rate": 9.099869943185708e-05, "loss": 0.9587, "step": 7326 }, { "epoch": 0.4964428484314655, "grad_norm": 5.846226692199707, "learning_rate": 9.099733041275927e-05, "loss": 0.7726, "step": 7327 }, { "epoch": 0.4965106036994376, "grad_norm": 7.612580299377441, "learning_rate": 9.099596139366145e-05, "loss": 0.7203, "step": 7328 }, { "epoch": 0.4965783589674097, "grad_norm": 5.490561485290527, "learning_rate": 9.099459237456363e-05, "loss": 0.9457, "step": 7329 }, { "epoch": 0.4966461142353818, "grad_norm": 5.573283672332764, "learning_rate": 9.099322335546582e-05, "loss": 0.8116, "step": 7330 }, { "epoch": 0.4967138695033539, "grad_norm": 7.063168048858643, "learning_rate": 9.0991854336368e-05, "loss": 0.906, "step": 7331 }, { "epoch": 0.496781624771326, "grad_norm": 7.066551685333252, "learning_rate": 9.099048531727018e-05, "loss": 0.8515, "step": 7332 }, { "epoch": 0.49684938003929807, "grad_norm": 6.462795257568359, "learning_rate": 9.098911629817237e-05, "loss": 0.6325, "step": 7333 }, { "epoch": 0.49691713530727016, "grad_norm": 6.59752893447876, "learning_rate": 9.098774727907455e-05, "loss": 0.7356, "step": 7334 }, { "epoch": 0.4969848905752422, "grad_norm": 5.600460052490234, "learning_rate": 9.098637825997674e-05, "loss": 0.6307, "step": 7335 }, { "epoch": 0.4970526458432143, "grad_norm": 6.0045270919799805, "learning_rate": 9.098500924087892e-05, "loss": 1.0137, "step": 7336 }, { "epoch": 0.4971204011111864, "grad_norm": 7.3824143409729, "learning_rate": 9.09836402217811e-05, "loss": 0.8982, "step": 7337 }, { "epoch": 0.4971881563791585, "grad_norm": 6.573738098144531, "learning_rate": 9.098227120268328e-05, "loss": 0.7594, "step": 7338 }, { "epoch": 0.49725591164713057, "grad_norm": 8.101619720458984, "learning_rate": 9.098090218358547e-05, "loss": 1.1994, "step": 7339 }, { "epoch": 0.49732366691510266, "grad_norm": 5.76462459564209, "learning_rate": 9.097953316448765e-05, "loss": 0.7082, "step": 7340 }, { "epoch": 0.49739142218307475, "grad_norm": 6.358243465423584, "learning_rate": 9.097816414538983e-05, "loss": 0.8459, "step": 7341 }, { "epoch": 0.49745917745104684, "grad_norm": 7.145965576171875, "learning_rate": 9.097679512629202e-05, "loss": 0.9953, "step": 7342 }, { "epoch": 0.4975269327190189, "grad_norm": 8.23405933380127, "learning_rate": 9.09754261071942e-05, "loss": 1.1466, "step": 7343 }, { "epoch": 0.49759468798699097, "grad_norm": 6.5327982902526855, "learning_rate": 9.097405708809639e-05, "loss": 1.0226, "step": 7344 }, { "epoch": 0.49766244325496306, "grad_norm": 7.101400375366211, "learning_rate": 9.097268806899857e-05, "loss": 1.0476, "step": 7345 }, { "epoch": 0.49773019852293515, "grad_norm": 7.278493404388428, "learning_rate": 9.097131904990075e-05, "loss": 0.9499, "step": 7346 }, { "epoch": 0.49779795379090724, "grad_norm": 6.826780796051025, "learning_rate": 9.096995003080293e-05, "loss": 0.8371, "step": 7347 }, { "epoch": 0.49786570905887934, "grad_norm": 6.522684097290039, "learning_rate": 9.096858101170512e-05, "loss": 0.9318, "step": 7348 }, { "epoch": 0.4979334643268514, "grad_norm": 6.954566478729248, "learning_rate": 9.09672119926073e-05, "loss": 0.9174, "step": 7349 }, { "epoch": 0.4980012195948235, "grad_norm": 6.442493438720703, "learning_rate": 9.096584297350949e-05, "loss": 0.8619, "step": 7350 }, { "epoch": 0.49806897486279555, "grad_norm": 7.186161041259766, "learning_rate": 9.096447395441167e-05, "loss": 0.661, "step": 7351 }, { "epoch": 0.49813673013076765, "grad_norm": 6.716146945953369, "learning_rate": 9.096310493531385e-05, "loss": 0.9148, "step": 7352 }, { "epoch": 0.49820448539873974, "grad_norm": 5.643620014190674, "learning_rate": 9.096173591621604e-05, "loss": 0.8393, "step": 7353 }, { "epoch": 0.49827224066671183, "grad_norm": 7.107893466949463, "learning_rate": 9.096036689711822e-05, "loss": 0.972, "step": 7354 }, { "epoch": 0.4983399959346839, "grad_norm": 6.967519283294678, "learning_rate": 9.09589978780204e-05, "loss": 0.7033, "step": 7355 }, { "epoch": 0.498407751202656, "grad_norm": 7.291131496429443, "learning_rate": 9.095762885892258e-05, "loss": 0.8853, "step": 7356 }, { "epoch": 0.4984755064706281, "grad_norm": 7.630476474761963, "learning_rate": 9.095625983982476e-05, "loss": 0.8116, "step": 7357 }, { "epoch": 0.4985432617386002, "grad_norm": 6.2367167472839355, "learning_rate": 9.095489082072695e-05, "loss": 0.8344, "step": 7358 }, { "epoch": 0.4986110170065723, "grad_norm": 9.436936378479004, "learning_rate": 9.095352180162914e-05, "loss": 1.043, "step": 7359 }, { "epoch": 0.4986787722745443, "grad_norm": 5.330153942108154, "learning_rate": 9.095215278253132e-05, "loss": 0.7025, "step": 7360 }, { "epoch": 0.4987465275425164, "grad_norm": 5.117184162139893, "learning_rate": 9.09507837634335e-05, "loss": 0.8207, "step": 7361 }, { "epoch": 0.4988142828104885, "grad_norm": 7.965060234069824, "learning_rate": 9.094941474433569e-05, "loss": 0.9344, "step": 7362 }, { "epoch": 0.4988820380784606, "grad_norm": 6.985347747802734, "learning_rate": 9.094804572523787e-05, "loss": 0.8708, "step": 7363 }, { "epoch": 0.4989497933464327, "grad_norm": 8.380836486816406, "learning_rate": 9.094667670614005e-05, "loss": 0.8901, "step": 7364 }, { "epoch": 0.4990175486144048, "grad_norm": 6.336101055145264, "learning_rate": 9.094530768704223e-05, "loss": 0.9709, "step": 7365 }, { "epoch": 0.4990853038823769, "grad_norm": 6.435248851776123, "learning_rate": 9.094393866794441e-05, "loss": 0.8913, "step": 7366 }, { "epoch": 0.49915305915034897, "grad_norm": 7.674434661865234, "learning_rate": 9.09425696488466e-05, "loss": 1.0581, "step": 7367 }, { "epoch": 0.499220814418321, "grad_norm": 7.2185211181640625, "learning_rate": 9.094120062974879e-05, "loss": 0.9202, "step": 7368 }, { "epoch": 0.4992885696862931, "grad_norm": 6.717129230499268, "learning_rate": 9.093983161065097e-05, "loss": 0.8664, "step": 7369 }, { "epoch": 0.4993563249542652, "grad_norm": 6.229526996612549, "learning_rate": 9.093846259155316e-05, "loss": 0.7374, "step": 7370 }, { "epoch": 0.4994240802222373, "grad_norm": 7.177096843719482, "learning_rate": 9.093709357245534e-05, "loss": 1.0426, "step": 7371 }, { "epoch": 0.49949183549020937, "grad_norm": 7.289033889770508, "learning_rate": 9.093572455335752e-05, "loss": 0.8705, "step": 7372 }, { "epoch": 0.49955959075818146, "grad_norm": 8.260116577148438, "learning_rate": 9.093435553425971e-05, "loss": 0.6928, "step": 7373 }, { "epoch": 0.49962734602615355, "grad_norm": 5.730698108673096, "learning_rate": 9.09329865151619e-05, "loss": 0.7852, "step": 7374 }, { "epoch": 0.49969510129412564, "grad_norm": 6.90589714050293, "learning_rate": 9.093161749606407e-05, "loss": 0.9258, "step": 7375 }, { "epoch": 0.4997628565620977, "grad_norm": 5.358353137969971, "learning_rate": 9.093024847696627e-05, "loss": 0.7327, "step": 7376 }, { "epoch": 0.49983061183006977, "grad_norm": 5.4898505210876465, "learning_rate": 9.092887945786845e-05, "loss": 0.7371, "step": 7377 }, { "epoch": 0.49989836709804186, "grad_norm": 7.045073986053467, "learning_rate": 9.092751043877063e-05, "loss": 0.8785, "step": 7378 }, { "epoch": 0.49989836709804186, "eval_loss": 0.8431733846664429, "eval_noise_accuracy": 0.0, "eval_runtime": 1466.1466, "eval_samples_per_second": 3.505, "eval_steps_per_second": 0.22, "eval_wer": 75.89224292121845, "step": 7378 }, { "epoch": 0.49996612236601395, "grad_norm": 6.144540309906006, "learning_rate": 9.092614141967281e-05, "loss": 0.8467, "step": 7379 }, { "epoch": 0.500033877633986, "grad_norm": 5.819301605224609, "learning_rate": 9.0924772400575e-05, "loss": 0.7813, "step": 7380 }, { "epoch": 0.5001016329019581, "grad_norm": 5.759615421295166, "learning_rate": 9.092340338147718e-05, "loss": 0.7834, "step": 7381 }, { "epoch": 0.5001693881699302, "grad_norm": 5.7733917236328125, "learning_rate": 9.092203436237936e-05, "loss": 0.6671, "step": 7382 }, { "epoch": 0.5002371434379023, "grad_norm": 8.171788215637207, "learning_rate": 9.092066534328154e-05, "loss": 0.9039, "step": 7383 }, { "epoch": 0.5003048987058744, "grad_norm": 6.261331081390381, "learning_rate": 9.091929632418373e-05, "loss": 0.9185, "step": 7384 }, { "epoch": 0.5003726539738464, "grad_norm": 4.867089748382568, "learning_rate": 9.091792730508592e-05, "loss": 0.8967, "step": 7385 }, { "epoch": 0.5004404092418185, "grad_norm": 8.540884971618652, "learning_rate": 9.09165582859881e-05, "loss": 1.0576, "step": 7386 }, { "epoch": 0.5005081645097906, "grad_norm": 6.116450786590576, "learning_rate": 9.091518926689028e-05, "loss": 0.8109, "step": 7387 }, { "epoch": 0.5005759197777627, "grad_norm": 5.271210670471191, "learning_rate": 9.091382024779246e-05, "loss": 1.0116, "step": 7388 }, { "epoch": 0.5006436750457348, "grad_norm": 5.851868152618408, "learning_rate": 9.091245122869464e-05, "loss": 0.8285, "step": 7389 }, { "epoch": 0.5007114303137069, "grad_norm": 6.349635124206543, "learning_rate": 9.091108220959683e-05, "loss": 1.0065, "step": 7390 }, { "epoch": 0.500779185581679, "grad_norm": 7.653061389923096, "learning_rate": 9.090971319049901e-05, "loss": 0.8314, "step": 7391 }, { "epoch": 0.5008469408496511, "grad_norm": 7.560361385345459, "learning_rate": 9.09083441714012e-05, "loss": 0.9674, "step": 7392 }, { "epoch": 0.5009146961176232, "grad_norm": 5.517054080963135, "learning_rate": 9.090697515230338e-05, "loss": 0.6423, "step": 7393 }, { "epoch": 0.5009824513855953, "grad_norm": 6.644406795501709, "learning_rate": 9.090560613320557e-05, "loss": 0.7256, "step": 7394 }, { "epoch": 0.5010502066535674, "grad_norm": 6.9502854347229, "learning_rate": 9.090423711410775e-05, "loss": 0.8194, "step": 7395 }, { "epoch": 0.5011179619215393, "grad_norm": 5.900984287261963, "learning_rate": 9.090286809500993e-05, "loss": 1.1089, "step": 7396 }, { "epoch": 0.5011857171895114, "grad_norm": 5.964234352111816, "learning_rate": 9.090149907591211e-05, "loss": 0.9525, "step": 7397 }, { "epoch": 0.5012534724574835, "grad_norm": 7.2671895027160645, "learning_rate": 9.090013005681429e-05, "loss": 0.7658, "step": 7398 }, { "epoch": 0.5013212277254556, "grad_norm": 6.128476619720459, "learning_rate": 9.089876103771648e-05, "loss": 0.8413, "step": 7399 }, { "epoch": 0.5013889829934277, "grad_norm": 5.146761894226074, "learning_rate": 9.089739201861866e-05, "loss": 0.7552, "step": 7400 }, { "epoch": 0.5014567382613998, "grad_norm": 7.736568927764893, "learning_rate": 9.089602299952085e-05, "loss": 0.5941, "step": 7401 }, { "epoch": 0.5015244935293719, "grad_norm": 7.113034725189209, "learning_rate": 9.089465398042303e-05, "loss": 0.8414, "step": 7402 }, { "epoch": 0.501592248797344, "grad_norm": 6.421565055847168, "learning_rate": 9.089328496132522e-05, "loss": 0.7477, "step": 7403 }, { "epoch": 0.5016600040653161, "grad_norm": 6.609166145324707, "learning_rate": 9.08919159422274e-05, "loss": 0.8404, "step": 7404 }, { "epoch": 0.5017277593332882, "grad_norm": 5.771233558654785, "learning_rate": 9.089054692312958e-05, "loss": 0.8446, "step": 7405 }, { "epoch": 0.5017955146012603, "grad_norm": 5.4080119132995605, "learning_rate": 9.088917790403176e-05, "loss": 0.7046, "step": 7406 }, { "epoch": 0.5018632698692324, "grad_norm": 7.0025248527526855, "learning_rate": 9.088780888493394e-05, "loss": 1.0269, "step": 7407 }, { "epoch": 0.5019310251372044, "grad_norm": 6.687203884124756, "learning_rate": 9.088643986583613e-05, "loss": 0.9794, "step": 7408 }, { "epoch": 0.5019987804051765, "grad_norm": 7.627871036529541, "learning_rate": 9.088507084673831e-05, "loss": 1.0068, "step": 7409 }, { "epoch": 0.5020665356731486, "grad_norm": 6.824975490570068, "learning_rate": 9.08837018276405e-05, "loss": 0.9582, "step": 7410 }, { "epoch": 0.5021342909411207, "grad_norm": 5.561855792999268, "learning_rate": 9.088233280854268e-05, "loss": 0.7373, "step": 7411 }, { "epoch": 0.5022020462090928, "grad_norm": 6.751492023468018, "learning_rate": 9.088096378944486e-05, "loss": 1.1699, "step": 7412 }, { "epoch": 0.5022698014770648, "grad_norm": 6.15092658996582, "learning_rate": 9.087959477034705e-05, "loss": 0.656, "step": 7413 }, { "epoch": 0.5023375567450369, "grad_norm": 7.125277042388916, "learning_rate": 9.087822575124923e-05, "loss": 0.8737, "step": 7414 }, { "epoch": 0.502405312013009, "grad_norm": 8.87856674194336, "learning_rate": 9.087685673215141e-05, "loss": 0.9961, "step": 7415 }, { "epoch": 0.5024730672809811, "grad_norm": 5.436285495758057, "learning_rate": 9.087548771305359e-05, "loss": 0.8834, "step": 7416 }, { "epoch": 0.5025408225489532, "grad_norm": 6.28549861907959, "learning_rate": 9.087411869395578e-05, "loss": 0.6354, "step": 7417 }, { "epoch": 0.5026085778169253, "grad_norm": 5.951661109924316, "learning_rate": 9.087274967485797e-05, "loss": 0.8347, "step": 7418 }, { "epoch": 0.5026763330848973, "grad_norm": 5.268624305725098, "learning_rate": 9.087138065576015e-05, "loss": 0.9966, "step": 7419 }, { "epoch": 0.5027440883528694, "grad_norm": 6.974735260009766, "learning_rate": 9.087001163666234e-05, "loss": 0.9685, "step": 7420 }, { "epoch": 0.5028118436208415, "grad_norm": 6.689586639404297, "learning_rate": 9.086864261756452e-05, "loss": 0.8843, "step": 7421 }, { "epoch": 0.5028795988888136, "grad_norm": 6.510265827178955, "learning_rate": 9.086727359846671e-05, "loss": 1.0125, "step": 7422 }, { "epoch": 0.5029473541567857, "grad_norm": 7.039668560028076, "learning_rate": 9.08659045793689e-05, "loss": 0.7289, "step": 7423 }, { "epoch": 0.5030151094247578, "grad_norm": 6.1734442710876465, "learning_rate": 9.086453556027107e-05, "loss": 0.8023, "step": 7424 }, { "epoch": 0.5030828646927299, "grad_norm": 7.895476818084717, "learning_rate": 9.086316654117325e-05, "loss": 1.0043, "step": 7425 }, { "epoch": 0.503150619960702, "grad_norm": 6.599829196929932, "learning_rate": 9.086179752207545e-05, "loss": 0.8537, "step": 7426 }, { "epoch": 0.5032183752286741, "grad_norm": 8.60390567779541, "learning_rate": 9.086042850297763e-05, "loss": 1.1038, "step": 7427 }, { "epoch": 0.5032861304966462, "grad_norm": 6.656140327453613, "learning_rate": 9.085905948387981e-05, "loss": 0.9533, "step": 7428 }, { "epoch": 0.5033538857646181, "grad_norm": 5.768946170806885, "learning_rate": 9.085769046478199e-05, "loss": 0.6271, "step": 7429 }, { "epoch": 0.5034216410325902, "grad_norm": 5.938215732574463, "learning_rate": 9.085632144568417e-05, "loss": 1.0308, "step": 7430 }, { "epoch": 0.5034893963005623, "grad_norm": 7.145301342010498, "learning_rate": 9.085495242658636e-05, "loss": 0.8695, "step": 7431 }, { "epoch": 0.5035571515685344, "grad_norm": 5.975915908813477, "learning_rate": 9.085358340748854e-05, "loss": 0.9071, "step": 7432 }, { "epoch": 0.5036249068365065, "grad_norm": 5.689105033874512, "learning_rate": 9.085221438839072e-05, "loss": 0.9037, "step": 7433 }, { "epoch": 0.5036926621044786, "grad_norm": 6.513401985168457, "learning_rate": 9.08508453692929e-05, "loss": 0.7534, "step": 7434 }, { "epoch": 0.5037604173724507, "grad_norm": 7.220860958099365, "learning_rate": 9.084947635019509e-05, "loss": 1.0833, "step": 7435 }, { "epoch": 0.5038281726404228, "grad_norm": 10.640632629394531, "learning_rate": 9.084810733109728e-05, "loss": 0.9143, "step": 7436 }, { "epoch": 0.5038959279083949, "grad_norm": 6.900107383728027, "learning_rate": 9.084673831199946e-05, "loss": 0.8239, "step": 7437 }, { "epoch": 0.503963683176367, "grad_norm": 6.290066719055176, "learning_rate": 9.084536929290164e-05, "loss": 0.8982, "step": 7438 }, { "epoch": 0.5040314384443391, "grad_norm": 7.073644638061523, "learning_rate": 9.084400027380382e-05, "loss": 1.0932, "step": 7439 }, { "epoch": 0.5040991937123112, "grad_norm": 7.144145965576172, "learning_rate": 9.084263125470601e-05, "loss": 0.7772, "step": 7440 }, { "epoch": 0.5041669489802832, "grad_norm": 5.8309326171875, "learning_rate": 9.08412622356082e-05, "loss": 0.8945, "step": 7441 }, { "epoch": 0.5042347042482553, "grad_norm": 7.0719218254089355, "learning_rate": 9.083989321651037e-05, "loss": 0.7513, "step": 7442 }, { "epoch": 0.5043024595162274, "grad_norm": 7.7847795486450195, "learning_rate": 9.083852419741255e-05, "loss": 0.9553, "step": 7443 }, { "epoch": 0.5043702147841995, "grad_norm": 6.7899169921875, "learning_rate": 9.083715517831474e-05, "loss": 0.7908, "step": 7444 }, { "epoch": 0.5044379700521715, "grad_norm": 7.399930000305176, "learning_rate": 9.083578615921693e-05, "loss": 0.8104, "step": 7445 }, { "epoch": 0.5045057253201436, "grad_norm": 6.694761753082275, "learning_rate": 9.083441714011911e-05, "loss": 0.8481, "step": 7446 }, { "epoch": 0.5045734805881157, "grad_norm": 5.661715030670166, "learning_rate": 9.083304812102129e-05, "loss": 0.7598, "step": 7447 }, { "epoch": 0.5046412358560878, "grad_norm": 6.502758979797363, "learning_rate": 9.083167910192347e-05, "loss": 0.8789, "step": 7448 }, { "epoch": 0.5047089911240599, "grad_norm": 7.428299427032471, "learning_rate": 9.083031008282566e-05, "loss": 1.0323, "step": 7449 }, { "epoch": 0.504776746392032, "grad_norm": 7.600015163421631, "learning_rate": 9.082894106372784e-05, "loss": 0.8985, "step": 7450 }, { "epoch": 0.504844501660004, "grad_norm": 5.523435592651367, "learning_rate": 9.082757204463002e-05, "loss": 0.7891, "step": 7451 }, { "epoch": 0.5049122569279761, "grad_norm": 7.293107509613037, "learning_rate": 9.08262030255322e-05, "loss": 1.0804, "step": 7452 }, { "epoch": 0.5049800121959482, "grad_norm": 5.669400215148926, "learning_rate": 9.082483400643439e-05, "loss": 0.9136, "step": 7453 }, { "epoch": 0.5050477674639203, "grad_norm": 6.409341812133789, "learning_rate": 9.082346498733658e-05, "loss": 1.1784, "step": 7454 }, { "epoch": 0.5051155227318924, "grad_norm": 5.636824607849121, "learning_rate": 9.082209596823876e-05, "loss": 0.7079, "step": 7455 }, { "epoch": 0.5051832779998645, "grad_norm": 6.060736179351807, "learning_rate": 9.082072694914094e-05, "loss": 0.9279, "step": 7456 }, { "epoch": 0.5052510332678366, "grad_norm": 8.116156578063965, "learning_rate": 9.081935793004312e-05, "loss": 0.919, "step": 7457 }, { "epoch": 0.5053187885358087, "grad_norm": 7.159115791320801, "learning_rate": 9.081798891094531e-05, "loss": 0.8237, "step": 7458 }, { "epoch": 0.5053865438037808, "grad_norm": 7.625302314758301, "learning_rate": 9.08166198918475e-05, "loss": 0.8204, "step": 7459 }, { "epoch": 0.5054542990717529, "grad_norm": 7.276190280914307, "learning_rate": 9.081525087274967e-05, "loss": 0.6219, "step": 7460 }, { "epoch": 0.505522054339725, "grad_norm": 8.660572052001953, "learning_rate": 9.081388185365186e-05, "loss": 0.9772, "step": 7461 }, { "epoch": 0.505589809607697, "grad_norm": 8.218971252441406, "learning_rate": 9.081251283455404e-05, "loss": 0.7771, "step": 7462 }, { "epoch": 0.505657564875669, "grad_norm": 5.246251106262207, "learning_rate": 9.081114381545623e-05, "loss": 0.7706, "step": 7463 }, { "epoch": 0.5057253201436411, "grad_norm": 8.52219009399414, "learning_rate": 9.080977479635841e-05, "loss": 0.986, "step": 7464 }, { "epoch": 0.5057930754116132, "grad_norm": 5.994356632232666, "learning_rate": 9.080840577726059e-05, "loss": 0.783, "step": 7465 }, { "epoch": 0.5058608306795853, "grad_norm": 7.397661209106445, "learning_rate": 9.080703675816278e-05, "loss": 0.8338, "step": 7466 }, { "epoch": 0.5059285859475574, "grad_norm": 7.875096797943115, "learning_rate": 9.080566773906496e-05, "loss": 1.0615, "step": 7467 }, { "epoch": 0.5059963412155295, "grad_norm": 5.616501331329346, "learning_rate": 9.080429871996714e-05, "loss": 0.8735, "step": 7468 }, { "epoch": 0.5060640964835016, "grad_norm": 7.229982852935791, "learning_rate": 9.080292970086934e-05, "loss": 0.6812, "step": 7469 }, { "epoch": 0.5061318517514737, "grad_norm": 6.370266437530518, "learning_rate": 9.080156068177152e-05, "loss": 0.999, "step": 7470 }, { "epoch": 0.5061996070194458, "grad_norm": 7.752940654754639, "learning_rate": 9.08001916626737e-05, "loss": 1.0239, "step": 7471 }, { "epoch": 0.5062673622874179, "grad_norm": 6.016890048980713, "learning_rate": 9.079882264357589e-05, "loss": 0.8494, "step": 7472 }, { "epoch": 0.50633511755539, "grad_norm": 5.739022731781006, "learning_rate": 9.079745362447807e-05, "loss": 0.8505, "step": 7473 }, { "epoch": 0.506402872823362, "grad_norm": 5.543849468231201, "learning_rate": 9.079608460538025e-05, "loss": 0.884, "step": 7474 }, { "epoch": 0.5064706280913341, "grad_norm": 6.608273506164551, "learning_rate": 9.079471558628243e-05, "loss": 0.832, "step": 7475 }, { "epoch": 0.5065383833593062, "grad_norm": 5.356375217437744, "learning_rate": 9.079334656718461e-05, "loss": 0.7631, "step": 7476 }, { "epoch": 0.5066061386272783, "grad_norm": 6.19942045211792, "learning_rate": 9.079197754808681e-05, "loss": 0.8971, "step": 7477 }, { "epoch": 0.5066738938952503, "grad_norm": 6.8287506103515625, "learning_rate": 9.079060852898899e-05, "loss": 1.1027, "step": 7478 }, { "epoch": 0.5067416491632224, "grad_norm": 5.98441219329834, "learning_rate": 9.078923950989117e-05, "loss": 0.8476, "step": 7479 }, { "epoch": 0.5068094044311945, "grad_norm": 5.990217208862305, "learning_rate": 9.078787049079335e-05, "loss": 0.7411, "step": 7480 }, { "epoch": 0.5068771596991666, "grad_norm": 7.649387836456299, "learning_rate": 9.078650147169554e-05, "loss": 0.8498, "step": 7481 }, { "epoch": 0.5069449149671387, "grad_norm": 5.848696708679199, "learning_rate": 9.078513245259772e-05, "loss": 0.9977, "step": 7482 }, { "epoch": 0.5070126702351108, "grad_norm": 7.848155498504639, "learning_rate": 9.07837634334999e-05, "loss": 1.0863, "step": 7483 }, { "epoch": 0.5070804255030829, "grad_norm": 6.667200088500977, "learning_rate": 9.078239441440208e-05, "loss": 0.8052, "step": 7484 }, { "epoch": 0.507148180771055, "grad_norm": 7.332635879516602, "learning_rate": 9.078102539530426e-05, "loss": 0.8855, "step": 7485 }, { "epoch": 0.507215936039027, "grad_norm": 8.586542129516602, "learning_rate": 9.077965637620646e-05, "loss": 1.0024, "step": 7486 }, { "epoch": 0.5072836913069991, "grad_norm": 7.780113697052002, "learning_rate": 9.077828735710864e-05, "loss": 0.9129, "step": 7487 }, { "epoch": 0.5073514465749712, "grad_norm": 6.412726879119873, "learning_rate": 9.077691833801082e-05, "loss": 0.6586, "step": 7488 }, { "epoch": 0.5074192018429433, "grad_norm": 7.84084415435791, "learning_rate": 9.0775549318913e-05, "loss": 0.8709, "step": 7489 }, { "epoch": 0.5074869571109154, "grad_norm": 6.463359355926514, "learning_rate": 9.077418029981518e-05, "loss": 0.7824, "step": 7490 }, { "epoch": 0.5075547123788875, "grad_norm": 7.006936550140381, "learning_rate": 9.077281128071737e-05, "loss": 0.8868, "step": 7491 }, { "epoch": 0.5076224676468596, "grad_norm": 6.153975486755371, "learning_rate": 9.077144226161955e-05, "loss": 0.8607, "step": 7492 }, { "epoch": 0.5076902229148317, "grad_norm": 6.791597366333008, "learning_rate": 9.077007324252173e-05, "loss": 0.8782, "step": 7493 }, { "epoch": 0.5077579781828037, "grad_norm": 8.730172157287598, "learning_rate": 9.076870422342391e-05, "loss": 0.9439, "step": 7494 }, { "epoch": 0.5078257334507758, "grad_norm": 8.325760841369629, "learning_rate": 9.076733520432611e-05, "loss": 0.9425, "step": 7495 }, { "epoch": 0.5078934887187478, "grad_norm": 5.549458026885986, "learning_rate": 9.076596618522829e-05, "loss": 0.741, "step": 7496 }, { "epoch": 0.5079612439867199, "grad_norm": 6.16536283493042, "learning_rate": 9.076459716613047e-05, "loss": 0.9809, "step": 7497 }, { "epoch": 0.508028999254692, "grad_norm": 7.393336772918701, "learning_rate": 9.076322814703265e-05, "loss": 0.7392, "step": 7498 }, { "epoch": 0.5080967545226641, "grad_norm": 6.697513580322266, "learning_rate": 9.076185912793483e-05, "loss": 0.9413, "step": 7499 }, { "epoch": 0.5081645097906362, "grad_norm": 5.854069709777832, "learning_rate": 9.076049010883702e-05, "loss": 0.8432, "step": 7500 }, { "epoch": 0.5082322650586083, "grad_norm": 5.70686149597168, "learning_rate": 9.07591210897392e-05, "loss": 0.762, "step": 7501 }, { "epoch": 0.5083000203265804, "grad_norm": 7.012457370758057, "learning_rate": 9.075775207064138e-05, "loss": 0.8047, "step": 7502 }, { "epoch": 0.5083677755945525, "grad_norm": 6.485003471374512, "learning_rate": 9.075638305154357e-05, "loss": 0.6968, "step": 7503 }, { "epoch": 0.5084355308625246, "grad_norm": 6.104341506958008, "learning_rate": 9.075501403244576e-05, "loss": 0.82, "step": 7504 }, { "epoch": 0.5085032861304967, "grad_norm": 7.397383689880371, "learning_rate": 9.075364501334794e-05, "loss": 0.7462, "step": 7505 }, { "epoch": 0.5085710413984688, "grad_norm": 6.942671775817871, "learning_rate": 9.075227599425012e-05, "loss": 0.8829, "step": 7506 }, { "epoch": 0.5086387966664409, "grad_norm": 7.897338390350342, "learning_rate": 9.07509069751523e-05, "loss": 0.8725, "step": 7507 }, { "epoch": 0.5087065519344129, "grad_norm": 6.410269260406494, "learning_rate": 9.074953795605448e-05, "loss": 0.6648, "step": 7508 }, { "epoch": 0.508774307202385, "grad_norm": 7.2909955978393555, "learning_rate": 9.074816893695667e-05, "loss": 0.8097, "step": 7509 }, { "epoch": 0.508842062470357, "grad_norm": 7.618723392486572, "learning_rate": 9.074679991785885e-05, "loss": 1.0666, "step": 7510 }, { "epoch": 0.5089098177383291, "grad_norm": 6.482639789581299, "learning_rate": 9.074543089876103e-05, "loss": 0.7946, "step": 7511 }, { "epoch": 0.5089775730063012, "grad_norm": 8.747861862182617, "learning_rate": 9.074406187966323e-05, "loss": 0.9464, "step": 7512 }, { "epoch": 0.5090453282742733, "grad_norm": 8.655475616455078, "learning_rate": 9.074269286056541e-05, "loss": 1.0564, "step": 7513 }, { "epoch": 0.5091130835422454, "grad_norm": 5.97476053237915, "learning_rate": 9.074132384146759e-05, "loss": 0.8152, "step": 7514 }, { "epoch": 0.5091808388102175, "grad_norm": 7.176423072814941, "learning_rate": 9.073995482236978e-05, "loss": 0.9338, "step": 7515 }, { "epoch": 0.5092485940781896, "grad_norm": 6.205722808837891, "learning_rate": 9.073858580327196e-05, "loss": 0.9404, "step": 7516 }, { "epoch": 0.5093163493461617, "grad_norm": 6.857678413391113, "learning_rate": 9.073721678417414e-05, "loss": 0.7308, "step": 7517 }, { "epoch": 0.5093841046141337, "grad_norm": 5.610182285308838, "learning_rate": 9.073584776507634e-05, "loss": 0.7608, "step": 7518 }, { "epoch": 0.5094518598821058, "grad_norm": 5.618816375732422, "learning_rate": 9.073447874597852e-05, "loss": 0.6947, "step": 7519 }, { "epoch": 0.5095196151500779, "grad_norm": 5.652774810791016, "learning_rate": 9.07331097268807e-05, "loss": 0.6527, "step": 7520 }, { "epoch": 0.50958737041805, "grad_norm": 6.597334861755371, "learning_rate": 9.073174070778288e-05, "loss": 0.8084, "step": 7521 }, { "epoch": 0.5096551256860221, "grad_norm": 6.7087202072143555, "learning_rate": 9.073037168868506e-05, "loss": 0.9075, "step": 7522 }, { "epoch": 0.5097228809539942, "grad_norm": 6.56160306930542, "learning_rate": 9.072900266958725e-05, "loss": 0.9683, "step": 7523 }, { "epoch": 0.5097906362219663, "grad_norm": 7.624117851257324, "learning_rate": 9.072763365048943e-05, "loss": 1.0495, "step": 7524 }, { "epoch": 0.5098583914899384, "grad_norm": 5.690593719482422, "learning_rate": 9.072626463139161e-05, "loss": 1.0348, "step": 7525 }, { "epoch": 0.5099261467579105, "grad_norm": 5.954509258270264, "learning_rate": 9.07248956122938e-05, "loss": 0.8622, "step": 7526 }, { "epoch": 0.5099939020258825, "grad_norm": 6.603368759155273, "learning_rate": 9.072352659319599e-05, "loss": 0.6596, "step": 7527 }, { "epoch": 0.5100616572938546, "grad_norm": 6.074961185455322, "learning_rate": 9.072215757409817e-05, "loss": 1.1204, "step": 7528 }, { "epoch": 0.5101294125618266, "grad_norm": 7.444427967071533, "learning_rate": 9.072078855500035e-05, "loss": 0.8495, "step": 7529 }, { "epoch": 0.5101971678297987, "grad_norm": 6.496705532073975, "learning_rate": 9.071941953590253e-05, "loss": 0.8796, "step": 7530 }, { "epoch": 0.5102649230977708, "grad_norm": 6.413107872009277, "learning_rate": 9.071805051680471e-05, "loss": 0.8713, "step": 7531 }, { "epoch": 0.5103326783657429, "grad_norm": 5.7435221672058105, "learning_rate": 9.07166814977069e-05, "loss": 0.7322, "step": 7532 }, { "epoch": 0.510400433633715, "grad_norm": 5.807244777679443, "learning_rate": 9.071531247860908e-05, "loss": 0.8808, "step": 7533 }, { "epoch": 0.5104681889016871, "grad_norm": 7.4514617919921875, "learning_rate": 9.071394345951126e-05, "loss": 0.9344, "step": 7534 }, { "epoch": 0.5105359441696592, "grad_norm": 8.864927291870117, "learning_rate": 9.071257444041344e-05, "loss": 1.1036, "step": 7535 }, { "epoch": 0.5106036994376313, "grad_norm": 6.26414155960083, "learning_rate": 9.071120542131564e-05, "loss": 0.7743, "step": 7536 }, { "epoch": 0.5106714547056034, "grad_norm": 6.583436012268066, "learning_rate": 9.070983640221782e-05, "loss": 0.8079, "step": 7537 }, { "epoch": 0.5107392099735755, "grad_norm": 6.833841323852539, "learning_rate": 9.070846738312e-05, "loss": 0.9583, "step": 7538 }, { "epoch": 0.5108069652415476, "grad_norm": 7.538537502288818, "learning_rate": 9.070709836402218e-05, "loss": 0.9455, "step": 7539 }, { "epoch": 0.5108747205095197, "grad_norm": 6.710206031799316, "learning_rate": 9.070572934492436e-05, "loss": 0.8598, "step": 7540 }, { "epoch": 0.5109424757774917, "grad_norm": 5.586297035217285, "learning_rate": 9.070436032582655e-05, "loss": 0.7335, "step": 7541 }, { "epoch": 0.5110102310454638, "grad_norm": 6.353386878967285, "learning_rate": 9.070299130672873e-05, "loss": 0.7107, "step": 7542 }, { "epoch": 0.5110779863134358, "grad_norm": 7.016844272613525, "learning_rate": 9.070162228763091e-05, "loss": 0.8202, "step": 7543 }, { "epoch": 0.5111457415814079, "grad_norm": 7.088860988616943, "learning_rate": 9.07002532685331e-05, "loss": 0.7402, "step": 7544 }, { "epoch": 0.51121349684938, "grad_norm": 7.208921432495117, "learning_rate": 9.069888424943527e-05, "loss": 0.8991, "step": 7545 }, { "epoch": 0.5112812521173521, "grad_norm": 7.308844566345215, "learning_rate": 9.069751523033747e-05, "loss": 0.9908, "step": 7546 }, { "epoch": 0.5113490073853242, "grad_norm": 6.0522308349609375, "learning_rate": 9.069614621123965e-05, "loss": 0.788, "step": 7547 }, { "epoch": 0.5114167626532963, "grad_norm": 6.078645706176758, "learning_rate": 9.069477719214183e-05, "loss": 0.7327, "step": 7548 }, { "epoch": 0.5114845179212684, "grad_norm": 7.585491180419922, "learning_rate": 9.069340817304401e-05, "loss": 1.0976, "step": 7549 }, { "epoch": 0.5115522731892405, "grad_norm": 5.420160293579102, "learning_rate": 9.06920391539462e-05, "loss": 0.5896, "step": 7550 }, { "epoch": 0.5116200284572126, "grad_norm": 7.09340763092041, "learning_rate": 9.069067013484838e-05, "loss": 0.8464, "step": 7551 }, { "epoch": 0.5116877837251846, "grad_norm": 5.813265323638916, "learning_rate": 9.068930111575056e-05, "loss": 0.685, "step": 7552 }, { "epoch": 0.5117555389931567, "grad_norm": 8.996179580688477, "learning_rate": 9.068793209665274e-05, "loss": 1.0089, "step": 7553 }, { "epoch": 0.5118232942611288, "grad_norm": 5.605385780334473, "learning_rate": 9.068656307755493e-05, "loss": 0.8688, "step": 7554 }, { "epoch": 0.5118910495291009, "grad_norm": 7.07156229019165, "learning_rate": 9.068519405845712e-05, "loss": 0.7926, "step": 7555 }, { "epoch": 0.511958804797073, "grad_norm": 6.07735538482666, "learning_rate": 9.06838250393593e-05, "loss": 0.7721, "step": 7556 }, { "epoch": 0.5120265600650451, "grad_norm": 8.173517227172852, "learning_rate": 9.068245602026148e-05, "loss": 0.8789, "step": 7557 }, { "epoch": 0.5120943153330172, "grad_norm": 6.656474590301514, "learning_rate": 9.068108700116367e-05, "loss": 0.9221, "step": 7558 }, { "epoch": 0.5121620706009892, "grad_norm": 7.5140061378479, "learning_rate": 9.067971798206585e-05, "loss": 0.7789, "step": 7559 }, { "epoch": 0.5122298258689613, "grad_norm": 6.517942428588867, "learning_rate": 9.067834896296803e-05, "loss": 0.7376, "step": 7560 }, { "epoch": 0.5122975811369334, "grad_norm": 6.736027717590332, "learning_rate": 9.067697994387023e-05, "loss": 0.8051, "step": 7561 }, { "epoch": 0.5123653364049054, "grad_norm": 11.268937110900879, "learning_rate": 9.067561092477241e-05, "loss": 0.7261, "step": 7562 }, { "epoch": 0.5124330916728775, "grad_norm": 6.4452667236328125, "learning_rate": 9.067424190567459e-05, "loss": 0.9289, "step": 7563 }, { "epoch": 0.5125008469408496, "grad_norm": 6.565738201141357, "learning_rate": 9.067287288657678e-05, "loss": 0.8039, "step": 7564 }, { "epoch": 0.5125686022088217, "grad_norm": 5.919821739196777, "learning_rate": 9.067150386747896e-05, "loss": 0.7832, "step": 7565 }, { "epoch": 0.5126363574767938, "grad_norm": 7.885809898376465, "learning_rate": 9.067013484838114e-05, "loss": 0.7081, "step": 7566 }, { "epoch": 0.5127041127447659, "grad_norm": 6.188759803771973, "learning_rate": 9.066876582928332e-05, "loss": 0.9531, "step": 7567 }, { "epoch": 0.512771868012738, "grad_norm": 7.5221452713012695, "learning_rate": 9.06673968101855e-05, "loss": 0.9727, "step": 7568 }, { "epoch": 0.5128396232807101, "grad_norm": 6.461081027984619, "learning_rate": 9.06660277910877e-05, "loss": 0.9477, "step": 7569 }, { "epoch": 0.5129073785486822, "grad_norm": 7.04016637802124, "learning_rate": 9.066465877198988e-05, "loss": 0.7655, "step": 7570 }, { "epoch": 0.5129751338166543, "grad_norm": 6.721169471740723, "learning_rate": 9.066328975289206e-05, "loss": 0.7675, "step": 7571 }, { "epoch": 0.5130428890846264, "grad_norm": 6.552652835845947, "learning_rate": 9.066192073379424e-05, "loss": 0.6938, "step": 7572 }, { "epoch": 0.5131106443525985, "grad_norm": 5.2619452476501465, "learning_rate": 9.066055171469643e-05, "loss": 0.8072, "step": 7573 }, { "epoch": 0.5131783996205705, "grad_norm": 6.426028728485107, "learning_rate": 9.065918269559861e-05, "loss": 0.8989, "step": 7574 }, { "epoch": 0.5132461548885426, "grad_norm": 7.762108325958252, "learning_rate": 9.06578136765008e-05, "loss": 1.0512, "step": 7575 }, { "epoch": 0.5133139101565146, "grad_norm": 5.862361431121826, "learning_rate": 9.065644465740297e-05, "loss": 0.8233, "step": 7576 }, { "epoch": 0.5133816654244867, "grad_norm": 6.885676860809326, "learning_rate": 9.065507563830515e-05, "loss": 0.9328, "step": 7577 }, { "epoch": 0.5134494206924588, "grad_norm": 6.061431884765625, "learning_rate": 9.065370661920735e-05, "loss": 0.6901, "step": 7578 }, { "epoch": 0.5135171759604309, "grad_norm": 5.702830791473389, "learning_rate": 9.065233760010953e-05, "loss": 0.8137, "step": 7579 }, { "epoch": 0.513584931228403, "grad_norm": 7.937398910522461, "learning_rate": 9.065096858101171e-05, "loss": 1.1357, "step": 7580 }, { "epoch": 0.5136526864963751, "grad_norm": 6.353341102600098, "learning_rate": 9.064959956191389e-05, "loss": 0.8848, "step": 7581 }, { "epoch": 0.5137204417643472, "grad_norm": 5.204100608825684, "learning_rate": 9.064823054281608e-05, "loss": 0.6812, "step": 7582 }, { "epoch": 0.5137881970323193, "grad_norm": 6.726477146148682, "learning_rate": 9.064686152371826e-05, "loss": 0.8012, "step": 7583 }, { "epoch": 0.5138559523002914, "grad_norm": 8.397887229919434, "learning_rate": 9.064549250462044e-05, "loss": 0.6674, "step": 7584 }, { "epoch": 0.5139237075682634, "grad_norm": 6.773873329162598, "learning_rate": 9.064412348552262e-05, "loss": 0.7718, "step": 7585 }, { "epoch": 0.5139914628362355, "grad_norm": 5.611907005310059, "learning_rate": 9.06427544664248e-05, "loss": 0.9307, "step": 7586 }, { "epoch": 0.5140592181042076, "grad_norm": 7.3962225914001465, "learning_rate": 9.0641385447327e-05, "loss": 0.8438, "step": 7587 }, { "epoch": 0.5141269733721797, "grad_norm": 7.1288580894470215, "learning_rate": 9.064001642822918e-05, "loss": 0.8185, "step": 7588 }, { "epoch": 0.5141947286401518, "grad_norm": 6.204834461212158, "learning_rate": 9.063864740913136e-05, "loss": 0.8201, "step": 7589 }, { "epoch": 0.5142624839081239, "grad_norm": 6.229215145111084, "learning_rate": 9.063727839003354e-05, "loss": 0.811, "step": 7590 }, { "epoch": 0.514330239176096, "grad_norm": 7.3174285888671875, "learning_rate": 9.063590937093573e-05, "loss": 0.9745, "step": 7591 }, { "epoch": 0.514397994444068, "grad_norm": 7.100773334503174, "learning_rate": 9.063454035183791e-05, "loss": 0.8132, "step": 7592 }, { "epoch": 0.5144657497120401, "grad_norm": 7.439940929412842, "learning_rate": 9.06331713327401e-05, "loss": 1.042, "step": 7593 }, { "epoch": 0.5145335049800122, "grad_norm": 6.337569713592529, "learning_rate": 9.063180231364227e-05, "loss": 0.6381, "step": 7594 }, { "epoch": 0.5146012602479842, "grad_norm": 8.016763687133789, "learning_rate": 9.063043329454445e-05, "loss": 0.944, "step": 7595 }, { "epoch": 0.5146690155159563, "grad_norm": 7.090301036834717, "learning_rate": 9.062906427544665e-05, "loss": 1.0519, "step": 7596 }, { "epoch": 0.5147367707839284, "grad_norm": 6.24937629699707, "learning_rate": 9.062769525634883e-05, "loss": 0.916, "step": 7597 }, { "epoch": 0.5148045260519005, "grad_norm": 6.316051959991455, "learning_rate": 9.062632623725101e-05, "loss": 0.8483, "step": 7598 }, { "epoch": 0.5148722813198726, "grad_norm": 7.601284027099609, "learning_rate": 9.062495721815319e-05, "loss": 0.9532, "step": 7599 }, { "epoch": 0.5149400365878447, "grad_norm": 6.206179618835449, "learning_rate": 9.062358819905537e-05, "loss": 0.8767, "step": 7600 }, { "epoch": 0.5150077918558168, "grad_norm": 7.3104119300842285, "learning_rate": 9.062221917995756e-05, "loss": 0.9337, "step": 7601 }, { "epoch": 0.5150755471237889, "grad_norm": 7.43108606338501, "learning_rate": 9.062085016085974e-05, "loss": 0.9725, "step": 7602 }, { "epoch": 0.515143302391761, "grad_norm": 8.126846313476562, "learning_rate": 9.061948114176192e-05, "loss": 1.0367, "step": 7603 }, { "epoch": 0.5152110576597331, "grad_norm": 6.7592926025390625, "learning_rate": 9.061811212266412e-05, "loss": 0.9557, "step": 7604 }, { "epoch": 0.5152788129277052, "grad_norm": 5.046600341796875, "learning_rate": 9.06167431035663e-05, "loss": 0.601, "step": 7605 }, { "epoch": 0.5153465681956773, "grad_norm": 7.141878604888916, "learning_rate": 9.061537408446848e-05, "loss": 0.8938, "step": 7606 }, { "epoch": 0.5154143234636493, "grad_norm": 8.64307689666748, "learning_rate": 9.061400506537067e-05, "loss": 1.1668, "step": 7607 }, { "epoch": 0.5154820787316213, "grad_norm": 6.312736988067627, "learning_rate": 9.061263604627285e-05, "loss": 0.7902, "step": 7608 }, { "epoch": 0.5155498339995934, "grad_norm": 8.432990074157715, "learning_rate": 9.061126702717503e-05, "loss": 0.9889, "step": 7609 }, { "epoch": 0.5156175892675655, "grad_norm": 9.558629989624023, "learning_rate": 9.060989800807723e-05, "loss": 0.8032, "step": 7610 }, { "epoch": 0.5156853445355376, "grad_norm": 5.524839878082275, "learning_rate": 9.060852898897941e-05, "loss": 0.8536, "step": 7611 }, { "epoch": 0.5157530998035097, "grad_norm": 6.718236923217773, "learning_rate": 9.060715996988159e-05, "loss": 1.1837, "step": 7612 }, { "epoch": 0.5158208550714818, "grad_norm": 4.946925163269043, "learning_rate": 9.060579095078377e-05, "loss": 0.8149, "step": 7613 }, { "epoch": 0.5158886103394539, "grad_norm": 8.684269905090332, "learning_rate": 9.060442193168596e-05, "loss": 1.1112, "step": 7614 }, { "epoch": 0.515956365607426, "grad_norm": 5.708873271942139, "learning_rate": 9.060305291258814e-05, "loss": 0.9813, "step": 7615 }, { "epoch": 0.5160241208753981, "grad_norm": 7.8680419921875, "learning_rate": 9.060168389349032e-05, "loss": 0.8572, "step": 7616 }, { "epoch": 0.5160918761433702, "grad_norm": 7.092006206512451, "learning_rate": 9.06003148743925e-05, "loss": 1.0532, "step": 7617 }, { "epoch": 0.5161596314113422, "grad_norm": 6.504335880279541, "learning_rate": 9.059894585529468e-05, "loss": 0.969, "step": 7618 }, { "epoch": 0.5162273866793143, "grad_norm": 7.514725208282471, "learning_rate": 9.059757683619688e-05, "loss": 0.7966, "step": 7619 }, { "epoch": 0.5162951419472864, "grad_norm": 5.896969318389893, "learning_rate": 9.059620781709906e-05, "loss": 0.6818, "step": 7620 }, { "epoch": 0.5163628972152585, "grad_norm": 11.024290084838867, "learning_rate": 9.059483879800124e-05, "loss": 0.8365, "step": 7621 }, { "epoch": 0.5164306524832306, "grad_norm": 6.390562534332275, "learning_rate": 9.059346977890342e-05, "loss": 0.9039, "step": 7622 }, { "epoch": 0.5164984077512027, "grad_norm": 6.528719425201416, "learning_rate": 9.05921007598056e-05, "loss": 0.8762, "step": 7623 }, { "epoch": 0.5165661630191748, "grad_norm": 6.40498685836792, "learning_rate": 9.059073174070779e-05, "loss": 1.1455, "step": 7624 }, { "epoch": 0.5166339182871468, "grad_norm": 6.250789165496826, "learning_rate": 9.058936272160997e-05, "loss": 0.8012, "step": 7625 }, { "epoch": 0.5167016735551189, "grad_norm": 5.280569076538086, "learning_rate": 9.058799370251215e-05, "loss": 0.5803, "step": 7626 }, { "epoch": 0.516769428823091, "grad_norm": 6.143814563751221, "learning_rate": 9.058662468341433e-05, "loss": 0.8817, "step": 7627 }, { "epoch": 0.516837184091063, "grad_norm": 5.753292560577393, "learning_rate": 9.058525566431653e-05, "loss": 0.6251, "step": 7628 }, { "epoch": 0.5169049393590351, "grad_norm": 7.431488037109375, "learning_rate": 9.058388664521871e-05, "loss": 0.807, "step": 7629 }, { "epoch": 0.5169726946270072, "grad_norm": 7.9407548904418945, "learning_rate": 9.058251762612089e-05, "loss": 0.9708, "step": 7630 }, { "epoch": 0.5170404498949793, "grad_norm": 9.618108749389648, "learning_rate": 9.058114860702307e-05, "loss": 0.769, "step": 7631 }, { "epoch": 0.5171082051629514, "grad_norm": 7.05092716217041, "learning_rate": 9.057977958792525e-05, "loss": 0.8202, "step": 7632 }, { "epoch": 0.5171759604309235, "grad_norm": 8.478489875793457, "learning_rate": 9.057841056882744e-05, "loss": 0.836, "step": 7633 }, { "epoch": 0.5172437156988956, "grad_norm": 9.781081199645996, "learning_rate": 9.057704154972962e-05, "loss": 1.1411, "step": 7634 }, { "epoch": 0.5173114709668677, "grad_norm": 6.536010265350342, "learning_rate": 9.05756725306318e-05, "loss": 1.0199, "step": 7635 }, { "epoch": 0.5173792262348398, "grad_norm": 6.633424758911133, "learning_rate": 9.057430351153398e-05, "loss": 0.8167, "step": 7636 }, { "epoch": 0.5174469815028119, "grad_norm": 8.581771850585938, "learning_rate": 9.057293449243618e-05, "loss": 0.9926, "step": 7637 }, { "epoch": 0.517514736770784, "grad_norm": 5.538379669189453, "learning_rate": 9.057156547333836e-05, "loss": 0.7093, "step": 7638 }, { "epoch": 0.5175824920387561, "grad_norm": 6.037271022796631, "learning_rate": 9.057019645424054e-05, "loss": 0.9901, "step": 7639 }, { "epoch": 0.5176502473067282, "grad_norm": 5.663453578948975, "learning_rate": 9.056882743514272e-05, "loss": 0.929, "step": 7640 }, { "epoch": 0.5177180025747001, "grad_norm": 9.440011978149414, "learning_rate": 9.05674584160449e-05, "loss": 0.6656, "step": 7641 }, { "epoch": 0.5177857578426722, "grad_norm": 7.314742088317871, "learning_rate": 9.056608939694709e-05, "loss": 1.0248, "step": 7642 }, { "epoch": 0.5178535131106443, "grad_norm": 6.524215221405029, "learning_rate": 9.056472037784927e-05, "loss": 0.7533, "step": 7643 }, { "epoch": 0.5179212683786164, "grad_norm": 7.727807998657227, "learning_rate": 9.056335135875145e-05, "loss": 0.8106, "step": 7644 }, { "epoch": 0.5179890236465885, "grad_norm": 6.237912654876709, "learning_rate": 9.056198233965363e-05, "loss": 0.8731, "step": 7645 }, { "epoch": 0.5180567789145606, "grad_norm": 5.7623796463012695, "learning_rate": 9.056061332055583e-05, "loss": 0.7102, "step": 7646 }, { "epoch": 0.5181245341825327, "grad_norm": 7.946072578430176, "learning_rate": 9.055924430145801e-05, "loss": 0.9145, "step": 7647 }, { "epoch": 0.5181922894505048, "grad_norm": 6.882560729980469, "learning_rate": 9.055787528236019e-05, "loss": 0.9543, "step": 7648 }, { "epoch": 0.5182600447184769, "grad_norm": 5.425245761871338, "learning_rate": 9.055650626326237e-05, "loss": 0.5889, "step": 7649 }, { "epoch": 0.518327799986449, "grad_norm": 5.8002142906188965, "learning_rate": 9.055513724416456e-05, "loss": 0.8603, "step": 7650 }, { "epoch": 0.518395555254421, "grad_norm": 6.522377014160156, "learning_rate": 9.055376822506674e-05, "loss": 0.8288, "step": 7651 }, { "epoch": 0.5184633105223931, "grad_norm": 6.6606221199035645, "learning_rate": 9.055239920596892e-05, "loss": 0.8647, "step": 7652 }, { "epoch": 0.5185310657903652, "grad_norm": 7.66829776763916, "learning_rate": 9.055103018687112e-05, "loss": 1.0252, "step": 7653 }, { "epoch": 0.5185988210583373, "grad_norm": 5.306484222412109, "learning_rate": 9.05496611677733e-05, "loss": 0.7053, "step": 7654 }, { "epoch": 0.5186665763263094, "grad_norm": 5.434155464172363, "learning_rate": 9.054829214867548e-05, "loss": 0.7684, "step": 7655 }, { "epoch": 0.5187343315942815, "grad_norm": 6.3002495765686035, "learning_rate": 9.054692312957767e-05, "loss": 0.8797, "step": 7656 }, { "epoch": 0.5188020868622535, "grad_norm": 7.075455665588379, "learning_rate": 9.054555411047985e-05, "loss": 1.0045, "step": 7657 }, { "epoch": 0.5188698421302256, "grad_norm": 8.508553504943848, "learning_rate": 9.054418509138203e-05, "loss": 0.8166, "step": 7658 }, { "epoch": 0.5189375973981977, "grad_norm": 6.156308174133301, "learning_rate": 9.054281607228421e-05, "loss": 0.5456, "step": 7659 }, { "epoch": 0.5190053526661698, "grad_norm": 5.993220806121826, "learning_rate": 9.054144705318641e-05, "loss": 0.8783, "step": 7660 }, { "epoch": 0.5190731079341419, "grad_norm": 7.312931537628174, "learning_rate": 9.054007803408859e-05, "loss": 0.8758, "step": 7661 }, { "epoch": 0.519140863202114, "grad_norm": 6.825846195220947, "learning_rate": 9.053870901499077e-05, "loss": 0.9189, "step": 7662 }, { "epoch": 0.519208618470086, "grad_norm": 6.013767242431641, "learning_rate": 9.053733999589295e-05, "loss": 0.9579, "step": 7663 }, { "epoch": 0.5192763737380581, "grad_norm": 7.817983627319336, "learning_rate": 9.053597097679513e-05, "loss": 0.7481, "step": 7664 }, { "epoch": 0.5193441290060302, "grad_norm": 6.868429183959961, "learning_rate": 9.053460195769732e-05, "loss": 0.9323, "step": 7665 }, { "epoch": 0.5194118842740023, "grad_norm": 8.720475196838379, "learning_rate": 9.05332329385995e-05, "loss": 0.6936, "step": 7666 }, { "epoch": 0.5194796395419744, "grad_norm": 4.687426567077637, "learning_rate": 9.053186391950168e-05, "loss": 0.6088, "step": 7667 }, { "epoch": 0.5195473948099465, "grad_norm": 7.063420295715332, "learning_rate": 9.053049490040386e-05, "loss": 1.0478, "step": 7668 }, { "epoch": 0.5196151500779186, "grad_norm": 6.082928657531738, "learning_rate": 9.052912588130606e-05, "loss": 0.7404, "step": 7669 }, { "epoch": 0.5196829053458907, "grad_norm": 6.03659200668335, "learning_rate": 9.052775686220824e-05, "loss": 0.9515, "step": 7670 }, { "epoch": 0.5197506606138628, "grad_norm": 5.9596333503723145, "learning_rate": 9.052638784311042e-05, "loss": 0.789, "step": 7671 }, { "epoch": 0.5198184158818349, "grad_norm": 6.595233917236328, "learning_rate": 9.05250188240126e-05, "loss": 0.8488, "step": 7672 }, { "epoch": 0.519886171149807, "grad_norm": 5.341801643371582, "learning_rate": 9.052364980491478e-05, "loss": 0.7815, "step": 7673 }, { "epoch": 0.5199539264177789, "grad_norm": 9.91911506652832, "learning_rate": 9.052228078581697e-05, "loss": 0.9121, "step": 7674 }, { "epoch": 0.520021681685751, "grad_norm": 6.1603498458862305, "learning_rate": 9.052091176671915e-05, "loss": 0.9465, "step": 7675 }, { "epoch": 0.5200894369537231, "grad_norm": 7.671917915344238, "learning_rate": 9.051954274762133e-05, "loss": 0.8225, "step": 7676 }, { "epoch": 0.5201571922216952, "grad_norm": 7.148565769195557, "learning_rate": 9.051817372852351e-05, "loss": 0.6911, "step": 7677 }, { "epoch": 0.5202249474896673, "grad_norm": 5.975964546203613, "learning_rate": 9.05168047094257e-05, "loss": 0.6338, "step": 7678 }, { "epoch": 0.5202927027576394, "grad_norm": 6.8202924728393555, "learning_rate": 9.051543569032789e-05, "loss": 0.8961, "step": 7679 }, { "epoch": 0.5203604580256115, "grad_norm": 6.167017459869385, "learning_rate": 9.051406667123007e-05, "loss": 0.7479, "step": 7680 }, { "epoch": 0.5204282132935836, "grad_norm": 6.801616668701172, "learning_rate": 9.051269765213225e-05, "loss": 0.7939, "step": 7681 }, { "epoch": 0.5204959685615557, "grad_norm": 5.709447860717773, "learning_rate": 9.051132863303443e-05, "loss": 0.74, "step": 7682 }, { "epoch": 0.5205637238295278, "grad_norm": 7.522529602050781, "learning_rate": 9.050995961393662e-05, "loss": 0.8341, "step": 7683 }, { "epoch": 0.5206314790974998, "grad_norm": 7.840425491333008, "learning_rate": 9.05085905948388e-05, "loss": 0.6591, "step": 7684 }, { "epoch": 0.5206992343654719, "grad_norm": 5.461009979248047, "learning_rate": 9.050722157574098e-05, "loss": 0.6172, "step": 7685 }, { "epoch": 0.520766989633444, "grad_norm": 6.047004222869873, "learning_rate": 9.050585255664316e-05, "loss": 0.8012, "step": 7686 }, { "epoch": 0.5208347449014161, "grad_norm": 7.2090630531311035, "learning_rate": 9.050448353754534e-05, "loss": 0.9606, "step": 7687 }, { "epoch": 0.5209025001693882, "grad_norm": 6.153731822967529, "learning_rate": 9.050311451844754e-05, "loss": 0.8408, "step": 7688 }, { "epoch": 0.5209702554373603, "grad_norm": 6.858744144439697, "learning_rate": 9.050174549934972e-05, "loss": 0.9427, "step": 7689 }, { "epoch": 0.5210380107053323, "grad_norm": 6.979424476623535, "learning_rate": 9.05003764802519e-05, "loss": 0.7301, "step": 7690 }, { "epoch": 0.5211057659733044, "grad_norm": 6.714144706726074, "learning_rate": 9.049900746115408e-05, "loss": 1.0239, "step": 7691 }, { "epoch": 0.5211735212412765, "grad_norm": 6.382346153259277, "learning_rate": 9.049763844205627e-05, "loss": 0.7648, "step": 7692 }, { "epoch": 0.5212412765092486, "grad_norm": 6.722228050231934, "learning_rate": 9.049626942295845e-05, "loss": 0.5747, "step": 7693 }, { "epoch": 0.5213090317772207, "grad_norm": 8.618867874145508, "learning_rate": 9.049490040386063e-05, "loss": 0.9178, "step": 7694 }, { "epoch": 0.5213767870451927, "grad_norm": 7.99964714050293, "learning_rate": 9.049353138476281e-05, "loss": 0.989, "step": 7695 }, { "epoch": 0.5214445423131648, "grad_norm": 8.504440307617188, "learning_rate": 9.0492162365665e-05, "loss": 0.7935, "step": 7696 }, { "epoch": 0.5215122975811369, "grad_norm": 6.088517665863037, "learning_rate": 9.049079334656719e-05, "loss": 0.894, "step": 7697 }, { "epoch": 0.521580052849109, "grad_norm": 7.038356304168701, "learning_rate": 9.048942432746937e-05, "loss": 0.8796, "step": 7698 }, { "epoch": 0.5216478081170811, "grad_norm": 6.817752361297607, "learning_rate": 9.048805530837155e-05, "loss": 0.8724, "step": 7699 }, { "epoch": 0.5217155633850532, "grad_norm": 6.776190280914307, "learning_rate": 9.048668628927374e-05, "loss": 0.8975, "step": 7700 }, { "epoch": 0.5217833186530253, "grad_norm": 6.522316932678223, "learning_rate": 9.048531727017592e-05, "loss": 0.7861, "step": 7701 }, { "epoch": 0.5218510739209974, "grad_norm": 8.256267547607422, "learning_rate": 9.04839482510781e-05, "loss": 1.1714, "step": 7702 }, { "epoch": 0.5219188291889695, "grad_norm": 6.675931930541992, "learning_rate": 9.04825792319803e-05, "loss": 0.8718, "step": 7703 }, { "epoch": 0.5219865844569416, "grad_norm": 7.407254695892334, "learning_rate": 9.048121021288248e-05, "loss": 1.1478, "step": 7704 }, { "epoch": 0.5220543397249137, "grad_norm": 7.207369327545166, "learning_rate": 9.047984119378466e-05, "loss": 0.9569, "step": 7705 }, { "epoch": 0.5221220949928856, "grad_norm": 5.364320278167725, "learning_rate": 9.047847217468685e-05, "loss": 0.7651, "step": 7706 }, { "epoch": 0.5221898502608577, "grad_norm": 6.586124897003174, "learning_rate": 9.047710315558903e-05, "loss": 0.8322, "step": 7707 }, { "epoch": 0.5222576055288298, "grad_norm": 6.659854412078857, "learning_rate": 9.047573413649121e-05, "loss": 0.8912, "step": 7708 }, { "epoch": 0.5223253607968019, "grad_norm": 7.059124946594238, "learning_rate": 9.047436511739339e-05, "loss": 1.012, "step": 7709 }, { "epoch": 0.522393116064774, "grad_norm": 6.54586124420166, "learning_rate": 9.047299609829557e-05, "loss": 0.9704, "step": 7710 }, { "epoch": 0.5224608713327461, "grad_norm": 5.587647438049316, "learning_rate": 9.047162707919777e-05, "loss": 0.6115, "step": 7711 }, { "epoch": 0.5225286266007182, "grad_norm": 6.951663970947266, "learning_rate": 9.047025806009995e-05, "loss": 0.7751, "step": 7712 }, { "epoch": 0.5225963818686903, "grad_norm": 7.021738052368164, "learning_rate": 9.046888904100213e-05, "loss": 1.1632, "step": 7713 }, { "epoch": 0.5226641371366624, "grad_norm": 6.46588134765625, "learning_rate": 9.046752002190431e-05, "loss": 0.7517, "step": 7714 }, { "epoch": 0.5227318924046345, "grad_norm": 6.638917922973633, "learning_rate": 9.04661510028065e-05, "loss": 1.0171, "step": 7715 }, { "epoch": 0.5227996476726066, "grad_norm": 5.955547332763672, "learning_rate": 9.046478198370868e-05, "loss": 0.9812, "step": 7716 }, { "epoch": 0.5228674029405787, "grad_norm": 6.903220176696777, "learning_rate": 9.046341296461086e-05, "loss": 0.7894, "step": 7717 }, { "epoch": 0.5229351582085507, "grad_norm": 7.656357288360596, "learning_rate": 9.046204394551304e-05, "loss": 1.0162, "step": 7718 }, { "epoch": 0.5230029134765228, "grad_norm": 8.025165557861328, "learning_rate": 9.046067492641522e-05, "loss": 0.9158, "step": 7719 }, { "epoch": 0.5230706687444949, "grad_norm": 7.652121067047119, "learning_rate": 9.045930590731742e-05, "loss": 0.9347, "step": 7720 }, { "epoch": 0.523138424012467, "grad_norm": 7.028977870941162, "learning_rate": 9.04579368882196e-05, "loss": 0.9914, "step": 7721 }, { "epoch": 0.523206179280439, "grad_norm": 6.23792028427124, "learning_rate": 9.045656786912178e-05, "loss": 0.7154, "step": 7722 }, { "epoch": 0.5232739345484111, "grad_norm": 8.56432056427002, "learning_rate": 9.045519885002396e-05, "loss": 0.9627, "step": 7723 }, { "epoch": 0.5233416898163832, "grad_norm": 6.733086109161377, "learning_rate": 9.045382983092615e-05, "loss": 0.8932, "step": 7724 }, { "epoch": 0.5234094450843553, "grad_norm": 6.419559955596924, "learning_rate": 9.045246081182833e-05, "loss": 0.6503, "step": 7725 }, { "epoch": 0.5234772003523274, "grad_norm": 6.498406887054443, "learning_rate": 9.045109179273051e-05, "loss": 1.0333, "step": 7726 }, { "epoch": 0.5235449556202995, "grad_norm": 6.396055698394775, "learning_rate": 9.044972277363269e-05, "loss": 0.7628, "step": 7727 }, { "epoch": 0.5236127108882715, "grad_norm": 5.671381950378418, "learning_rate": 9.044835375453487e-05, "loss": 0.7091, "step": 7728 }, { "epoch": 0.5236804661562436, "grad_norm": 7.36892557144165, "learning_rate": 9.044698473543707e-05, "loss": 0.8571, "step": 7729 }, { "epoch": 0.5237482214242157, "grad_norm": 7.574361324310303, "learning_rate": 9.044561571633925e-05, "loss": 0.9784, "step": 7730 }, { "epoch": 0.5238159766921878, "grad_norm": 7.9222540855407715, "learning_rate": 9.044424669724143e-05, "loss": 0.8634, "step": 7731 }, { "epoch": 0.5238837319601599, "grad_norm": 6.013153076171875, "learning_rate": 9.044287767814361e-05, "loss": 0.9073, "step": 7732 }, { "epoch": 0.523951487228132, "grad_norm": 7.480499744415283, "learning_rate": 9.044150865904579e-05, "loss": 0.9001, "step": 7733 }, { "epoch": 0.5240192424961041, "grad_norm": 5.559460163116455, "learning_rate": 9.044013963994798e-05, "loss": 0.7644, "step": 7734 }, { "epoch": 0.5240869977640762, "grad_norm": 5.924765110015869, "learning_rate": 9.043877062085016e-05, "loss": 0.6714, "step": 7735 }, { "epoch": 0.5241547530320483, "grad_norm": 5.304020404815674, "learning_rate": 9.043740160175234e-05, "loss": 0.6169, "step": 7736 }, { "epoch": 0.5242225083000204, "grad_norm": 6.051050662994385, "learning_rate": 9.043603258265452e-05, "loss": 0.9661, "step": 7737 }, { "epoch": 0.5242902635679925, "grad_norm": 7.776313304901123, "learning_rate": 9.043466356355672e-05, "loss": 0.9922, "step": 7738 }, { "epoch": 0.5243580188359644, "grad_norm": 6.945296287536621, "learning_rate": 9.04332945444589e-05, "loss": 0.8607, "step": 7739 }, { "epoch": 0.5244257741039365, "grad_norm": 7.384977340698242, "learning_rate": 9.043192552536108e-05, "loss": 1.1737, "step": 7740 }, { "epoch": 0.5244935293719086, "grad_norm": 7.252483367919922, "learning_rate": 9.043055650626326e-05, "loss": 0.8508, "step": 7741 }, { "epoch": 0.5245612846398807, "grad_norm": 6.793081283569336, "learning_rate": 9.042918748716544e-05, "loss": 0.8632, "step": 7742 }, { "epoch": 0.5246290399078528, "grad_norm": 6.1907057762146, "learning_rate": 9.042781846806763e-05, "loss": 0.8447, "step": 7743 }, { "epoch": 0.5246967951758249, "grad_norm": 6.956373691558838, "learning_rate": 9.042644944896981e-05, "loss": 0.7821, "step": 7744 }, { "epoch": 0.524764550443797, "grad_norm": 6.5989484786987305, "learning_rate": 9.0425080429872e-05, "loss": 0.9129, "step": 7745 }, { "epoch": 0.5248323057117691, "grad_norm": 6.416126728057861, "learning_rate": 9.042371141077419e-05, "loss": 0.7622, "step": 7746 }, { "epoch": 0.5249000609797412, "grad_norm": 7.08583402633667, "learning_rate": 9.042234239167637e-05, "loss": 0.868, "step": 7747 }, { "epoch": 0.5249678162477133, "grad_norm": 6.688178539276123, "learning_rate": 9.042097337257855e-05, "loss": 0.8158, "step": 7748 }, { "epoch": 0.5250355715156854, "grad_norm": 6.829257488250732, "learning_rate": 9.041960435348074e-05, "loss": 1.0495, "step": 7749 }, { "epoch": 0.5251033267836575, "grad_norm": 9.319602966308594, "learning_rate": 9.041823533438292e-05, "loss": 1.1006, "step": 7750 }, { "epoch": 0.5251710820516295, "grad_norm": 5.709659576416016, "learning_rate": 9.04168663152851e-05, "loss": 0.8396, "step": 7751 }, { "epoch": 0.5252388373196016, "grad_norm": 7.805490493774414, "learning_rate": 9.04154972961873e-05, "loss": 0.8824, "step": 7752 }, { "epoch": 0.5253065925875737, "grad_norm": 6.146732330322266, "learning_rate": 9.041412827708948e-05, "loss": 0.6136, "step": 7753 }, { "epoch": 0.5253743478555458, "grad_norm": 7.149491310119629, "learning_rate": 9.041275925799166e-05, "loss": 0.8545, "step": 7754 }, { "epoch": 0.5254421031235178, "grad_norm": 6.13656759262085, "learning_rate": 9.041139023889384e-05, "loss": 0.6775, "step": 7755 }, { "epoch": 0.5255098583914899, "grad_norm": 7.198244571685791, "learning_rate": 9.041002121979602e-05, "loss": 0.8732, "step": 7756 }, { "epoch": 0.525577613659462, "grad_norm": 7.588368892669678, "learning_rate": 9.040865220069821e-05, "loss": 0.8651, "step": 7757 }, { "epoch": 0.5256453689274341, "grad_norm": 6.695016860961914, "learning_rate": 9.040728318160039e-05, "loss": 0.8592, "step": 7758 }, { "epoch": 0.5257131241954062, "grad_norm": 6.87295389175415, "learning_rate": 9.040591416250257e-05, "loss": 0.8187, "step": 7759 }, { "epoch": 0.5257808794633783, "grad_norm": 6.2048163414001465, "learning_rate": 9.040454514340475e-05, "loss": 0.9454, "step": 7760 }, { "epoch": 0.5258486347313504, "grad_norm": 6.1639933586120605, "learning_rate": 9.040317612430695e-05, "loss": 0.7686, "step": 7761 }, { "epoch": 0.5259163899993224, "grad_norm": 6.537137508392334, "learning_rate": 9.040180710520913e-05, "loss": 0.8203, "step": 7762 }, { "epoch": 0.5259841452672945, "grad_norm": 6.688928127288818, "learning_rate": 9.040043808611131e-05, "loss": 0.7651, "step": 7763 }, { "epoch": 0.5260519005352666, "grad_norm": 7.225207805633545, "learning_rate": 9.039906906701349e-05, "loss": 0.7461, "step": 7764 }, { "epoch": 0.5261196558032387, "grad_norm": 6.173831939697266, "learning_rate": 9.039770004791567e-05, "loss": 0.6654, "step": 7765 }, { "epoch": 0.5261874110712108, "grad_norm": 6.1730146408081055, "learning_rate": 9.039633102881786e-05, "loss": 0.7595, "step": 7766 }, { "epoch": 0.5262551663391829, "grad_norm": 6.485074520111084, "learning_rate": 9.039496200972004e-05, "loss": 0.8252, "step": 7767 }, { "epoch": 0.526322921607155, "grad_norm": 5.83876895904541, "learning_rate": 9.039359299062222e-05, "loss": 0.7358, "step": 7768 }, { "epoch": 0.5263906768751271, "grad_norm": 6.3718953132629395, "learning_rate": 9.03922239715244e-05, "loss": 0.8205, "step": 7769 }, { "epoch": 0.5264584321430992, "grad_norm": 7.796750068664551, "learning_rate": 9.03908549524266e-05, "loss": 1.285, "step": 7770 }, { "epoch": 0.5265261874110712, "grad_norm": 6.802064418792725, "learning_rate": 9.038948593332878e-05, "loss": 0.8769, "step": 7771 }, { "epoch": 0.5265939426790432, "grad_norm": 6.941751956939697, "learning_rate": 9.038811691423096e-05, "loss": 0.8375, "step": 7772 }, { "epoch": 0.5266616979470153, "grad_norm": 4.891567707061768, "learning_rate": 9.038674789513314e-05, "loss": 0.7047, "step": 7773 }, { "epoch": 0.5267294532149874, "grad_norm": 6.294151306152344, "learning_rate": 9.038537887603532e-05, "loss": 0.9026, "step": 7774 }, { "epoch": 0.5267972084829595, "grad_norm": 8.100536346435547, "learning_rate": 9.038400985693751e-05, "loss": 0.912, "step": 7775 }, { "epoch": 0.5268649637509316, "grad_norm": 6.022440433502197, "learning_rate": 9.038264083783969e-05, "loss": 0.9056, "step": 7776 }, { "epoch": 0.5269327190189037, "grad_norm": 5.4183454513549805, "learning_rate": 9.038127181874187e-05, "loss": 0.883, "step": 7777 }, { "epoch": 0.5270004742868758, "grad_norm": 7.119368076324463, "learning_rate": 9.037990279964405e-05, "loss": 0.8019, "step": 7778 }, { "epoch": 0.5270682295548479, "grad_norm": 7.654230117797852, "learning_rate": 9.037853378054625e-05, "loss": 0.9149, "step": 7779 }, { "epoch": 0.52713598482282, "grad_norm": 8.382317543029785, "learning_rate": 9.037716476144843e-05, "loss": 0.8678, "step": 7780 }, { "epoch": 0.5272037400907921, "grad_norm": 6.171057224273682, "learning_rate": 9.037579574235061e-05, "loss": 0.7199, "step": 7781 }, { "epoch": 0.5272714953587642, "grad_norm": 7.584953784942627, "learning_rate": 9.037442672325279e-05, "loss": 1.1612, "step": 7782 }, { "epoch": 0.5273392506267363, "grad_norm": 8.255087852478027, "learning_rate": 9.037305770415497e-05, "loss": 1.0615, "step": 7783 }, { "epoch": 0.5274070058947083, "grad_norm": 5.925463676452637, "learning_rate": 9.037168868505716e-05, "loss": 0.5921, "step": 7784 }, { "epoch": 0.5274747611626804, "grad_norm": 6.816803932189941, "learning_rate": 9.037031966595934e-05, "loss": 0.7954, "step": 7785 }, { "epoch": 0.5275425164306525, "grad_norm": 5.511174201965332, "learning_rate": 9.036895064686152e-05, "loss": 0.6013, "step": 7786 }, { "epoch": 0.5276102716986246, "grad_norm": 8.431069374084473, "learning_rate": 9.03675816277637e-05, "loss": 1.1493, "step": 7787 }, { "epoch": 0.5276780269665966, "grad_norm": 7.5087666511535645, "learning_rate": 9.036621260866588e-05, "loss": 0.9416, "step": 7788 }, { "epoch": 0.5277457822345687, "grad_norm": 6.478548049926758, "learning_rate": 9.036484358956808e-05, "loss": 0.7571, "step": 7789 }, { "epoch": 0.5278135375025408, "grad_norm": 6.319628715515137, "learning_rate": 9.036347457047026e-05, "loss": 1.076, "step": 7790 }, { "epoch": 0.5278812927705129, "grad_norm": 7.512424945831299, "learning_rate": 9.036210555137244e-05, "loss": 1.2705, "step": 7791 }, { "epoch": 0.527949048038485, "grad_norm": 9.076953887939453, "learning_rate": 9.036073653227463e-05, "loss": 0.723, "step": 7792 }, { "epoch": 0.5280168033064571, "grad_norm": 7.088381767272949, "learning_rate": 9.035936751317681e-05, "loss": 0.8198, "step": 7793 }, { "epoch": 0.5280845585744292, "grad_norm": 5.79082727432251, "learning_rate": 9.035799849407899e-05, "loss": 0.6164, "step": 7794 }, { "epoch": 0.5281523138424012, "grad_norm": 5.18831729888916, "learning_rate": 9.035662947498119e-05, "loss": 0.7178, "step": 7795 }, { "epoch": 0.5282200691103733, "grad_norm": 5.686639308929443, "learning_rate": 9.035526045588337e-05, "loss": 0.8515, "step": 7796 }, { "epoch": 0.5282878243783454, "grad_norm": 5.51866340637207, "learning_rate": 9.035389143678555e-05, "loss": 0.7451, "step": 7797 }, { "epoch": 0.5283555796463175, "grad_norm": 5.142022609710693, "learning_rate": 9.035252241768774e-05, "loss": 0.8063, "step": 7798 }, { "epoch": 0.5284233349142896, "grad_norm": 8.204577445983887, "learning_rate": 9.035115339858992e-05, "loss": 1.0189, "step": 7799 }, { "epoch": 0.5284910901822617, "grad_norm": 7.62236213684082, "learning_rate": 9.03497843794921e-05, "loss": 1.0905, "step": 7800 }, { "epoch": 0.5285588454502338, "grad_norm": 7.067042827606201, "learning_rate": 9.034841536039428e-05, "loss": 0.8651, "step": 7801 }, { "epoch": 0.5286266007182059, "grad_norm": 5.675161361694336, "learning_rate": 9.034704634129648e-05, "loss": 0.8717, "step": 7802 }, { "epoch": 0.528694355986178, "grad_norm": 6.024029731750488, "learning_rate": 9.034567732219866e-05, "loss": 0.8326, "step": 7803 }, { "epoch": 0.52876211125415, "grad_norm": 6.060807228088379, "learning_rate": 9.034430830310084e-05, "loss": 0.7887, "step": 7804 }, { "epoch": 0.528829866522122, "grad_norm": 7.034177780151367, "learning_rate": 9.034293928400302e-05, "loss": 0.8416, "step": 7805 }, { "epoch": 0.5288976217900941, "grad_norm": 6.663289546966553, "learning_rate": 9.03415702649052e-05, "loss": 0.8078, "step": 7806 }, { "epoch": 0.5289653770580662, "grad_norm": 5.6733880043029785, "learning_rate": 9.034020124580739e-05, "loss": 0.8106, "step": 7807 }, { "epoch": 0.5290331323260383, "grad_norm": 7.0584940910339355, "learning_rate": 9.033883222670957e-05, "loss": 0.8558, "step": 7808 }, { "epoch": 0.5291008875940104, "grad_norm": 7.715261459350586, "learning_rate": 9.033746320761175e-05, "loss": 0.9663, "step": 7809 }, { "epoch": 0.5291686428619825, "grad_norm": 6.987175464630127, "learning_rate": 9.033609418851393e-05, "loss": 0.8837, "step": 7810 }, { "epoch": 0.5292363981299546, "grad_norm": 6.454022407531738, "learning_rate": 9.033472516941611e-05, "loss": 0.7697, "step": 7811 }, { "epoch": 0.5293041533979267, "grad_norm": 6.083451271057129, "learning_rate": 9.03333561503183e-05, "loss": 0.7759, "step": 7812 }, { "epoch": 0.5293719086658988, "grad_norm": 6.736910343170166, "learning_rate": 9.033198713122049e-05, "loss": 0.7654, "step": 7813 }, { "epoch": 0.5294396639338709, "grad_norm": 7.494657516479492, "learning_rate": 9.033061811212267e-05, "loss": 0.9894, "step": 7814 }, { "epoch": 0.529507419201843, "grad_norm": 5.999954700469971, "learning_rate": 9.032924909302485e-05, "loss": 0.7583, "step": 7815 }, { "epoch": 0.5295751744698151, "grad_norm": 6.741001605987549, "learning_rate": 9.032788007392704e-05, "loss": 0.6704, "step": 7816 }, { "epoch": 0.5296429297377871, "grad_norm": 7.073209762573242, "learning_rate": 9.032651105482922e-05, "loss": 0.8104, "step": 7817 }, { "epoch": 0.5297106850057592, "grad_norm": 6.424438953399658, "learning_rate": 9.03251420357314e-05, "loss": 0.8042, "step": 7818 }, { "epoch": 0.5297784402737313, "grad_norm": 5.467334270477295, "learning_rate": 9.032377301663358e-05, "loss": 0.7108, "step": 7819 }, { "epoch": 0.5298461955417033, "grad_norm": 7.322316646575928, "learning_rate": 9.032240399753576e-05, "loss": 0.9461, "step": 7820 }, { "epoch": 0.5299139508096754, "grad_norm": 6.560715198516846, "learning_rate": 9.032103497843796e-05, "loss": 1.135, "step": 7821 }, { "epoch": 0.5299817060776475, "grad_norm": 6.125954627990723, "learning_rate": 9.031966595934014e-05, "loss": 1.0004, "step": 7822 }, { "epoch": 0.5300494613456196, "grad_norm": 6.26869535446167, "learning_rate": 9.031829694024232e-05, "loss": 0.7594, "step": 7823 }, { "epoch": 0.5301172166135917, "grad_norm": 7.5978193283081055, "learning_rate": 9.03169279211445e-05, "loss": 0.7051, "step": 7824 }, { "epoch": 0.5301849718815638, "grad_norm": 9.408920288085938, "learning_rate": 9.031555890204669e-05, "loss": 0.7884, "step": 7825 }, { "epoch": 0.5302527271495359, "grad_norm": 7.134509086608887, "learning_rate": 9.031418988294887e-05, "loss": 0.844, "step": 7826 }, { "epoch": 0.530320482417508, "grad_norm": 9.650331497192383, "learning_rate": 9.031282086385105e-05, "loss": 0.9636, "step": 7827 }, { "epoch": 0.53038823768548, "grad_norm": 8.131028175354004, "learning_rate": 9.031145184475323e-05, "loss": 1.1321, "step": 7828 }, { "epoch": 0.5304559929534521, "grad_norm": 6.363440036773682, "learning_rate": 9.031008282565541e-05, "loss": 0.7437, "step": 7829 }, { "epoch": 0.5305237482214242, "grad_norm": 6.6181440353393555, "learning_rate": 9.030871380655761e-05, "loss": 1.1998, "step": 7830 }, { "epoch": 0.5305915034893963, "grad_norm": 6.380220413208008, "learning_rate": 9.030734478745979e-05, "loss": 0.8995, "step": 7831 }, { "epoch": 0.5306592587573684, "grad_norm": 7.590890407562256, "learning_rate": 9.030597576836197e-05, "loss": 0.8736, "step": 7832 }, { "epoch": 0.5307270140253405, "grad_norm": 7.453338146209717, "learning_rate": 9.030460674926415e-05, "loss": 0.7418, "step": 7833 }, { "epoch": 0.5307947692933126, "grad_norm": 5.30055570602417, "learning_rate": 9.030323773016634e-05, "loss": 0.7002, "step": 7834 }, { "epoch": 0.5308625245612847, "grad_norm": 7.599035739898682, "learning_rate": 9.030186871106852e-05, "loss": 1.0054, "step": 7835 }, { "epoch": 0.5309302798292568, "grad_norm": 7.773530006408691, "learning_rate": 9.03004996919707e-05, "loss": 0.8603, "step": 7836 }, { "epoch": 0.5309980350972288, "grad_norm": 7.378914833068848, "learning_rate": 9.029913067287288e-05, "loss": 0.8082, "step": 7837 }, { "epoch": 0.5310657903652009, "grad_norm": 5.327150344848633, "learning_rate": 9.029776165377508e-05, "loss": 0.7642, "step": 7838 }, { "epoch": 0.5311335456331729, "grad_norm": 6.01016092300415, "learning_rate": 9.029639263467726e-05, "loss": 1.0466, "step": 7839 }, { "epoch": 0.531201300901145, "grad_norm": 4.763113498687744, "learning_rate": 9.029502361557944e-05, "loss": 0.7451, "step": 7840 }, { "epoch": 0.5312690561691171, "grad_norm": 5.175201416015625, "learning_rate": 9.029365459648163e-05, "loss": 0.6928, "step": 7841 }, { "epoch": 0.5313368114370892, "grad_norm": 9.04288387298584, "learning_rate": 9.029228557738381e-05, "loss": 0.7087, "step": 7842 }, { "epoch": 0.5314045667050613, "grad_norm": 6.132844924926758, "learning_rate": 9.029091655828599e-05, "loss": 0.8231, "step": 7843 }, { "epoch": 0.5314723219730334, "grad_norm": 7.074441909790039, "learning_rate": 9.028954753918819e-05, "loss": 0.7966, "step": 7844 }, { "epoch": 0.5315400772410055, "grad_norm": 7.046668529510498, "learning_rate": 9.028817852009037e-05, "loss": 0.9588, "step": 7845 }, { "epoch": 0.5316078325089776, "grad_norm": 6.8423662185668945, "learning_rate": 9.028680950099255e-05, "loss": 0.9413, "step": 7846 }, { "epoch": 0.5316755877769497, "grad_norm": 5.093846797943115, "learning_rate": 9.028544048189473e-05, "loss": 0.6967, "step": 7847 }, { "epoch": 0.5317433430449218, "grad_norm": 7.116734027862549, "learning_rate": 9.028407146279692e-05, "loss": 0.8084, "step": 7848 }, { "epoch": 0.5318110983128939, "grad_norm": 6.06544828414917, "learning_rate": 9.02827024436991e-05, "loss": 0.8041, "step": 7849 }, { "epoch": 0.531878853580866, "grad_norm": 6.238218307495117, "learning_rate": 9.028133342460128e-05, "loss": 0.9562, "step": 7850 }, { "epoch": 0.531946608848838, "grad_norm": 5.939441680908203, "learning_rate": 9.027996440550346e-05, "loss": 0.8226, "step": 7851 }, { "epoch": 0.5320143641168101, "grad_norm": 5.409734725952148, "learning_rate": 9.027859538640564e-05, "loss": 0.7485, "step": 7852 }, { "epoch": 0.5320821193847821, "grad_norm": 5.72265625, "learning_rate": 9.027722636730784e-05, "loss": 0.691, "step": 7853 }, { "epoch": 0.5321498746527542, "grad_norm": 5.743767261505127, "learning_rate": 9.027585734821002e-05, "loss": 0.8193, "step": 7854 }, { "epoch": 0.5322176299207263, "grad_norm": 7.155900955200195, "learning_rate": 9.02744883291122e-05, "loss": 1.1046, "step": 7855 }, { "epoch": 0.5322853851886984, "grad_norm": 5.2816996574401855, "learning_rate": 9.027311931001438e-05, "loss": 0.5548, "step": 7856 }, { "epoch": 0.5323531404566705, "grad_norm": 8.357234001159668, "learning_rate": 9.027175029091657e-05, "loss": 0.9597, "step": 7857 }, { "epoch": 0.5324208957246426, "grad_norm": 6.937591075897217, "learning_rate": 9.027038127181875e-05, "loss": 1.0591, "step": 7858 }, { "epoch": 0.5324886509926147, "grad_norm": 9.02571964263916, "learning_rate": 9.026901225272093e-05, "loss": 0.9186, "step": 7859 }, { "epoch": 0.5325564062605868, "grad_norm": 6.292126178741455, "learning_rate": 9.026764323362311e-05, "loss": 0.9691, "step": 7860 }, { "epoch": 0.5326241615285588, "grad_norm": 6.714123725891113, "learning_rate": 9.026627421452529e-05, "loss": 0.6972, "step": 7861 }, { "epoch": 0.5326919167965309, "grad_norm": 8.798256874084473, "learning_rate": 9.026490519542749e-05, "loss": 1.237, "step": 7862 }, { "epoch": 0.532759672064503, "grad_norm": 7.367827892303467, "learning_rate": 9.026353617632967e-05, "loss": 0.8725, "step": 7863 }, { "epoch": 0.5328274273324751, "grad_norm": 7.99265193939209, "learning_rate": 9.026216715723185e-05, "loss": 0.7761, "step": 7864 }, { "epoch": 0.5328951826004472, "grad_norm": 6.861504554748535, "learning_rate": 9.026079813813403e-05, "loss": 1.0259, "step": 7865 }, { "epoch": 0.5329629378684193, "grad_norm": 6.661327838897705, "learning_rate": 9.025942911903621e-05, "loss": 0.7245, "step": 7866 }, { "epoch": 0.5330306931363914, "grad_norm": 6.973964691162109, "learning_rate": 9.02580600999384e-05, "loss": 1.0628, "step": 7867 }, { "epoch": 0.5330984484043635, "grad_norm": 6.732471466064453, "learning_rate": 9.025669108084058e-05, "loss": 0.7972, "step": 7868 }, { "epoch": 0.5331662036723355, "grad_norm": 5.995905876159668, "learning_rate": 9.025532206174276e-05, "loss": 0.831, "step": 7869 }, { "epoch": 0.5332339589403076, "grad_norm": 7.870401382446289, "learning_rate": 9.025395304264494e-05, "loss": 0.9793, "step": 7870 }, { "epoch": 0.5333017142082797, "grad_norm": 5.577757835388184, "learning_rate": 9.025258402354714e-05, "loss": 0.7032, "step": 7871 }, { "epoch": 0.5333694694762517, "grad_norm": 7.106391429901123, "learning_rate": 9.025121500444932e-05, "loss": 0.8424, "step": 7872 }, { "epoch": 0.5334372247442238, "grad_norm": 5.373198986053467, "learning_rate": 9.02498459853515e-05, "loss": 0.8357, "step": 7873 }, { "epoch": 0.5335049800121959, "grad_norm": 6.655058860778809, "learning_rate": 9.024847696625368e-05, "loss": 1.0405, "step": 7874 }, { "epoch": 0.533572735280168, "grad_norm": 5.896321773529053, "learning_rate": 9.024710794715586e-05, "loss": 1.0676, "step": 7875 }, { "epoch": 0.5336404905481401, "grad_norm": 7.391895771026611, "learning_rate": 9.024573892805805e-05, "loss": 0.9807, "step": 7876 }, { "epoch": 0.5337082458161122, "grad_norm": 8.472131729125977, "learning_rate": 9.024436990896023e-05, "loss": 1.0328, "step": 7877 }, { "epoch": 0.5337760010840843, "grad_norm": 5.47896671295166, "learning_rate": 9.024300088986241e-05, "loss": 0.6698, "step": 7878 }, { "epoch": 0.5338437563520564, "grad_norm": 7.560204982757568, "learning_rate": 9.024163187076459e-05, "loss": 0.7431, "step": 7879 }, { "epoch": 0.5339115116200285, "grad_norm": 7.789547920227051, "learning_rate": 9.024026285166679e-05, "loss": 0.7852, "step": 7880 }, { "epoch": 0.5339792668880006, "grad_norm": 6.868436336517334, "learning_rate": 9.023889383256897e-05, "loss": 0.8807, "step": 7881 }, { "epoch": 0.5340470221559727, "grad_norm": 5.439123153686523, "learning_rate": 9.023752481347115e-05, "loss": 0.8914, "step": 7882 }, { "epoch": 0.5341147774239448, "grad_norm": 6.825755596160889, "learning_rate": 9.023615579437333e-05, "loss": 0.9643, "step": 7883 }, { "epoch": 0.5341825326919168, "grad_norm": 8.002971649169922, "learning_rate": 9.023478677527552e-05, "loss": 0.8881, "step": 7884 }, { "epoch": 0.5342502879598889, "grad_norm": 6.1144022941589355, "learning_rate": 9.02334177561777e-05, "loss": 0.9887, "step": 7885 }, { "epoch": 0.5343180432278609, "grad_norm": 6.414967060089111, "learning_rate": 9.023204873707988e-05, "loss": 0.8882, "step": 7886 }, { "epoch": 0.534385798495833, "grad_norm": 6.222415447235107, "learning_rate": 9.023067971798208e-05, "loss": 0.7824, "step": 7887 }, { "epoch": 0.5344535537638051, "grad_norm": 6.128655910491943, "learning_rate": 9.022931069888426e-05, "loss": 1.0061, "step": 7888 }, { "epoch": 0.5345213090317772, "grad_norm": 8.61054515838623, "learning_rate": 9.022794167978644e-05, "loss": 0.8921, "step": 7889 }, { "epoch": 0.5345890642997493, "grad_norm": 7.387160301208496, "learning_rate": 9.022657266068863e-05, "loss": 1.0001, "step": 7890 }, { "epoch": 0.5346568195677214, "grad_norm": 5.878726005554199, "learning_rate": 9.022520364159081e-05, "loss": 0.8865, "step": 7891 }, { "epoch": 0.5347245748356935, "grad_norm": 6.011378288269043, "learning_rate": 9.022383462249299e-05, "loss": 0.8961, "step": 7892 }, { "epoch": 0.5347923301036656, "grad_norm": 6.258980751037598, "learning_rate": 9.022246560339517e-05, "loss": 0.8744, "step": 7893 }, { "epoch": 0.5348600853716377, "grad_norm": 8.403583526611328, "learning_rate": 9.022109658429737e-05, "loss": 0.7601, "step": 7894 }, { "epoch": 0.5349278406396097, "grad_norm": 9.266546249389648, "learning_rate": 9.021972756519955e-05, "loss": 1.0684, "step": 7895 }, { "epoch": 0.5349955959075818, "grad_norm": 6.160810470581055, "learning_rate": 9.021835854610173e-05, "loss": 0.6533, "step": 7896 }, { "epoch": 0.5350633511755539, "grad_norm": 6.975162982940674, "learning_rate": 9.02169895270039e-05, "loss": 0.8974, "step": 7897 }, { "epoch": 0.535131106443526, "grad_norm": 6.557673931121826, "learning_rate": 9.021562050790609e-05, "loss": 0.8291, "step": 7898 }, { "epoch": 0.5351988617114981, "grad_norm": 6.393524169921875, "learning_rate": 9.021425148880828e-05, "loss": 0.8898, "step": 7899 }, { "epoch": 0.5352666169794702, "grad_norm": 6.089913368225098, "learning_rate": 9.021288246971046e-05, "loss": 0.9246, "step": 7900 }, { "epoch": 0.5353343722474423, "grad_norm": 5.542110919952393, "learning_rate": 9.021151345061264e-05, "loss": 0.7785, "step": 7901 }, { "epoch": 0.5354021275154143, "grad_norm": 6.648521900177002, "learning_rate": 9.021014443151482e-05, "loss": 0.7992, "step": 7902 }, { "epoch": 0.5354698827833864, "grad_norm": 5.972752571105957, "learning_rate": 9.020877541241702e-05, "loss": 0.8436, "step": 7903 }, { "epoch": 0.5355376380513585, "grad_norm": 8.538671493530273, "learning_rate": 9.02074063933192e-05, "loss": 0.7983, "step": 7904 }, { "epoch": 0.5356053933193305, "grad_norm": 6.8005690574646, "learning_rate": 9.020603737422138e-05, "loss": 0.6566, "step": 7905 }, { "epoch": 0.5356731485873026, "grad_norm": 9.595316886901855, "learning_rate": 9.020466835512356e-05, "loss": 0.9392, "step": 7906 }, { "epoch": 0.5357409038552747, "grad_norm": 6.57702112197876, "learning_rate": 9.020329933602574e-05, "loss": 0.8176, "step": 7907 }, { "epoch": 0.5358086591232468, "grad_norm": 6.209599018096924, "learning_rate": 9.020193031692793e-05, "loss": 0.8261, "step": 7908 }, { "epoch": 0.5358764143912189, "grad_norm": 8.036966323852539, "learning_rate": 9.020056129783011e-05, "loss": 0.8524, "step": 7909 }, { "epoch": 0.535944169659191, "grad_norm": 7.813173770904541, "learning_rate": 9.019919227873229e-05, "loss": 1.0166, "step": 7910 }, { "epoch": 0.5360119249271631, "grad_norm": 6.065460205078125, "learning_rate": 9.019782325963447e-05, "loss": 0.9869, "step": 7911 }, { "epoch": 0.5360796801951352, "grad_norm": 5.9636993408203125, "learning_rate": 9.019645424053667e-05, "loss": 0.8227, "step": 7912 }, { "epoch": 0.5361474354631073, "grad_norm": 7.270611763000488, "learning_rate": 9.019508522143885e-05, "loss": 0.7653, "step": 7913 }, { "epoch": 0.5362151907310794, "grad_norm": 6.054271221160889, "learning_rate": 9.019371620234103e-05, "loss": 0.6753, "step": 7914 }, { "epoch": 0.5362829459990515, "grad_norm": 6.736015796661377, "learning_rate": 9.019234718324321e-05, "loss": 0.868, "step": 7915 }, { "epoch": 0.5363507012670236, "grad_norm": 6.971399307250977, "learning_rate": 9.019097816414539e-05, "loss": 0.9097, "step": 7916 }, { "epoch": 0.5364184565349956, "grad_norm": 7.001399040222168, "learning_rate": 9.018960914504758e-05, "loss": 0.9245, "step": 7917 }, { "epoch": 0.5364862118029676, "grad_norm": 6.465878009796143, "learning_rate": 9.018824012594976e-05, "loss": 0.8308, "step": 7918 }, { "epoch": 0.5365539670709397, "grad_norm": 6.622878551483154, "learning_rate": 9.018687110685194e-05, "loss": 0.6466, "step": 7919 }, { "epoch": 0.5366217223389118, "grad_norm": 7.167222023010254, "learning_rate": 9.018550208775412e-05, "loss": 0.9521, "step": 7920 }, { "epoch": 0.5366894776068839, "grad_norm": 7.362137794494629, "learning_rate": 9.01841330686563e-05, "loss": 0.7561, "step": 7921 }, { "epoch": 0.536757232874856, "grad_norm": 6.314590930938721, "learning_rate": 9.01827640495585e-05, "loss": 0.8892, "step": 7922 }, { "epoch": 0.5368249881428281, "grad_norm": 5.427424430847168, "learning_rate": 9.018139503046068e-05, "loss": 0.7648, "step": 7923 }, { "epoch": 0.5368927434108002, "grad_norm": 8.388633728027344, "learning_rate": 9.018002601136286e-05, "loss": 1.0141, "step": 7924 }, { "epoch": 0.5369604986787723, "grad_norm": 5.852472305297852, "learning_rate": 9.017865699226504e-05, "loss": 0.7744, "step": 7925 }, { "epoch": 0.5370282539467444, "grad_norm": 6.68207311630249, "learning_rate": 9.017728797316723e-05, "loss": 0.9003, "step": 7926 }, { "epoch": 0.5370960092147165, "grad_norm": 6.475498199462891, "learning_rate": 9.017591895406941e-05, "loss": 1.0294, "step": 7927 }, { "epoch": 0.5371637644826885, "grad_norm": 8.690805435180664, "learning_rate": 9.017454993497159e-05, "loss": 1.0175, "step": 7928 }, { "epoch": 0.5372315197506606, "grad_norm": 5.874716758728027, "learning_rate": 9.017318091587377e-05, "loss": 0.7761, "step": 7929 }, { "epoch": 0.5372992750186327, "grad_norm": 5.937534332275391, "learning_rate": 9.017181189677595e-05, "loss": 0.6782, "step": 7930 }, { "epoch": 0.5373670302866048, "grad_norm": 6.882140159606934, "learning_rate": 9.017044287767815e-05, "loss": 0.8815, "step": 7931 }, { "epoch": 0.5374347855545769, "grad_norm": 5.27649450302124, "learning_rate": 9.016907385858033e-05, "loss": 0.6816, "step": 7932 }, { "epoch": 0.537502540822549, "grad_norm": 5.861900329589844, "learning_rate": 9.016770483948251e-05, "loss": 0.7224, "step": 7933 }, { "epoch": 0.537570296090521, "grad_norm": 5.196268558502197, "learning_rate": 9.01663358203847e-05, "loss": 0.7526, "step": 7934 }, { "epoch": 0.5376380513584931, "grad_norm": 7.050784111022949, "learning_rate": 9.016496680128688e-05, "loss": 0.7451, "step": 7935 }, { "epoch": 0.5377058066264652, "grad_norm": 6.952356338500977, "learning_rate": 9.016359778218906e-05, "loss": 0.8074, "step": 7936 }, { "epoch": 0.5377735618944373, "grad_norm": 6.38202428817749, "learning_rate": 9.016222876309126e-05, "loss": 0.8738, "step": 7937 }, { "epoch": 0.5378413171624093, "grad_norm": 7.252553462982178, "learning_rate": 9.016085974399344e-05, "loss": 1.001, "step": 7938 }, { "epoch": 0.5379090724303814, "grad_norm": 7.240688323974609, "learning_rate": 9.015949072489562e-05, "loss": 0.818, "step": 7939 }, { "epoch": 0.5379768276983535, "grad_norm": 8.255992889404297, "learning_rate": 9.015812170579781e-05, "loss": 0.9443, "step": 7940 }, { "epoch": 0.5380445829663256, "grad_norm": 8.154399871826172, "learning_rate": 9.015675268669999e-05, "loss": 1.2508, "step": 7941 }, { "epoch": 0.5381123382342977, "grad_norm": 6.9914069175720215, "learning_rate": 9.015538366760217e-05, "loss": 0.8552, "step": 7942 }, { "epoch": 0.5381800935022698, "grad_norm": 6.447218894958496, "learning_rate": 9.015401464850435e-05, "loss": 0.9249, "step": 7943 }, { "epoch": 0.5382478487702419, "grad_norm": 6.867689609527588, "learning_rate": 9.015264562940653e-05, "loss": 1.0145, "step": 7944 }, { "epoch": 0.538315604038214, "grad_norm": 5.5277419090271, "learning_rate": 9.015127661030873e-05, "loss": 0.5239, "step": 7945 }, { "epoch": 0.5383833593061861, "grad_norm": 6.092846393585205, "learning_rate": 9.01499075912109e-05, "loss": 0.7933, "step": 7946 }, { "epoch": 0.5384511145741582, "grad_norm": 5.636915683746338, "learning_rate": 9.014853857211309e-05, "loss": 1.0177, "step": 7947 }, { "epoch": 0.5385188698421303, "grad_norm": 6.031810760498047, "learning_rate": 9.014716955301527e-05, "loss": 0.7189, "step": 7948 }, { "epoch": 0.5385866251101024, "grad_norm": 6.822633266448975, "learning_rate": 9.014580053391746e-05, "loss": 0.8511, "step": 7949 }, { "epoch": 0.5386543803780744, "grad_norm": 6.105456352233887, "learning_rate": 9.014443151481964e-05, "loss": 0.8941, "step": 7950 }, { "epoch": 0.5387221356460464, "grad_norm": 6.250092029571533, "learning_rate": 9.014306249572182e-05, "loss": 0.7968, "step": 7951 }, { "epoch": 0.5387898909140185, "grad_norm": 7.53422212600708, "learning_rate": 9.0141693476624e-05, "loss": 0.7581, "step": 7952 }, { "epoch": 0.5388576461819906, "grad_norm": 5.800909042358398, "learning_rate": 9.014032445752618e-05, "loss": 0.5946, "step": 7953 }, { "epoch": 0.5389254014499627, "grad_norm": 6.202663898468018, "learning_rate": 9.013895543842838e-05, "loss": 0.9158, "step": 7954 }, { "epoch": 0.5389931567179348, "grad_norm": 6.153894424438477, "learning_rate": 9.013758641933056e-05, "loss": 0.921, "step": 7955 }, { "epoch": 0.5390609119859069, "grad_norm": 7.0276384353637695, "learning_rate": 9.013621740023274e-05, "loss": 0.8353, "step": 7956 }, { "epoch": 0.539128667253879, "grad_norm": 7.378089427947998, "learning_rate": 9.013484838113492e-05, "loss": 0.7336, "step": 7957 }, { "epoch": 0.5391964225218511, "grad_norm": 7.097971439361572, "learning_rate": 9.013347936203711e-05, "loss": 0.7911, "step": 7958 }, { "epoch": 0.5392641777898232, "grad_norm": 6.771259307861328, "learning_rate": 9.013211034293929e-05, "loss": 0.8515, "step": 7959 }, { "epoch": 0.5393319330577953, "grad_norm": 6.949087142944336, "learning_rate": 9.013074132384147e-05, "loss": 1.0146, "step": 7960 }, { "epoch": 0.5393996883257673, "grad_norm": 7.584465026855469, "learning_rate": 9.012937230474365e-05, "loss": 0.681, "step": 7961 }, { "epoch": 0.5394674435937394, "grad_norm": 7.227475643157959, "learning_rate": 9.012800328564583e-05, "loss": 1.085, "step": 7962 }, { "epoch": 0.5395351988617115, "grad_norm": 5.863607406616211, "learning_rate": 9.012663426654803e-05, "loss": 0.7184, "step": 7963 }, { "epoch": 0.5396029541296836, "grad_norm": 6.440633296966553, "learning_rate": 9.01252652474502e-05, "loss": 0.7896, "step": 7964 }, { "epoch": 0.5396707093976557, "grad_norm": 6.37113094329834, "learning_rate": 9.012389622835239e-05, "loss": 0.8235, "step": 7965 }, { "epoch": 0.5397384646656278, "grad_norm": 7.00465202331543, "learning_rate": 9.012252720925457e-05, "loss": 0.9816, "step": 7966 }, { "epoch": 0.5398062199335998, "grad_norm": 6.948329925537109, "learning_rate": 9.012115819015676e-05, "loss": 0.721, "step": 7967 }, { "epoch": 0.5398739752015719, "grad_norm": 7.836682319641113, "learning_rate": 9.011978917105894e-05, "loss": 0.8239, "step": 7968 }, { "epoch": 0.539941730469544, "grad_norm": 7.849715232849121, "learning_rate": 9.011842015196112e-05, "loss": 0.9537, "step": 7969 }, { "epoch": 0.5400094857375161, "grad_norm": 6.9721221923828125, "learning_rate": 9.01170511328633e-05, "loss": 0.8685, "step": 7970 }, { "epoch": 0.5400772410054882, "grad_norm": 6.895398139953613, "learning_rate": 9.011568211376548e-05, "loss": 0.7817, "step": 7971 }, { "epoch": 0.5401449962734602, "grad_norm": 6.040884017944336, "learning_rate": 9.011431309466768e-05, "loss": 0.7531, "step": 7972 }, { "epoch": 0.5402127515414323, "grad_norm": 8.733409881591797, "learning_rate": 9.011294407556986e-05, "loss": 1.1737, "step": 7973 }, { "epoch": 0.5402805068094044, "grad_norm": 7.4223761558532715, "learning_rate": 9.011157505647204e-05, "loss": 0.7954, "step": 7974 }, { "epoch": 0.5403482620773765, "grad_norm": 6.471921443939209, "learning_rate": 9.011020603737422e-05, "loss": 0.8795, "step": 7975 }, { "epoch": 0.5404160173453486, "grad_norm": 7.537938594818115, "learning_rate": 9.01088370182764e-05, "loss": 0.888, "step": 7976 }, { "epoch": 0.5404837726133207, "grad_norm": 7.756222724914551, "learning_rate": 9.010746799917859e-05, "loss": 1.0905, "step": 7977 }, { "epoch": 0.5405515278812928, "grad_norm": 6.659689426422119, "learning_rate": 9.010609898008077e-05, "loss": 0.7667, "step": 7978 }, { "epoch": 0.5406192831492649, "grad_norm": 9.589966773986816, "learning_rate": 9.010472996098295e-05, "loss": 0.9853, "step": 7979 }, { "epoch": 0.540687038417237, "grad_norm": 7.351951599121094, "learning_rate": 9.010336094188515e-05, "loss": 0.7348, "step": 7980 }, { "epoch": 0.5407547936852091, "grad_norm": 7.74218225479126, "learning_rate": 9.010199192278733e-05, "loss": 1.1712, "step": 7981 }, { "epoch": 0.5408225489531812, "grad_norm": 6.658298015594482, "learning_rate": 9.01006229036895e-05, "loss": 0.8334, "step": 7982 }, { "epoch": 0.5408903042211531, "grad_norm": 6.736339092254639, "learning_rate": 9.00992538845917e-05, "loss": 0.9479, "step": 7983 }, { "epoch": 0.5409580594891252, "grad_norm": 6.63740873336792, "learning_rate": 9.009788486549388e-05, "loss": 0.9086, "step": 7984 }, { "epoch": 0.5410258147570973, "grad_norm": 6.095336437225342, "learning_rate": 9.009651584639606e-05, "loss": 0.8407, "step": 7985 }, { "epoch": 0.5410935700250694, "grad_norm": 5.067421913146973, "learning_rate": 9.009514682729826e-05, "loss": 0.8147, "step": 7986 }, { "epoch": 0.5411613252930415, "grad_norm": 5.876379013061523, "learning_rate": 9.009377780820044e-05, "loss": 0.8639, "step": 7987 }, { "epoch": 0.5412290805610136, "grad_norm": 6.525248050689697, "learning_rate": 9.009240878910262e-05, "loss": 0.7483, "step": 7988 }, { "epoch": 0.5412968358289857, "grad_norm": 7.615539073944092, "learning_rate": 9.00910397700048e-05, "loss": 0.8863, "step": 7989 }, { "epoch": 0.5413645910969578, "grad_norm": 7.767553329467773, "learning_rate": 9.008967075090699e-05, "loss": 0.8501, "step": 7990 }, { "epoch": 0.5414323463649299, "grad_norm": 7.159237384796143, "learning_rate": 9.008830173180917e-05, "loss": 0.8759, "step": 7991 }, { "epoch": 0.541500101632902, "grad_norm": 6.989110469818115, "learning_rate": 9.008693271271135e-05, "loss": 0.9121, "step": 7992 }, { "epoch": 0.5415678569008741, "grad_norm": 6.767205238342285, "learning_rate": 9.008556369361353e-05, "loss": 0.6788, "step": 7993 }, { "epoch": 0.5416356121688461, "grad_norm": 5.92645788192749, "learning_rate": 9.008419467451571e-05, "loss": 0.6165, "step": 7994 }, { "epoch": 0.5417033674368182, "grad_norm": 5.536478042602539, "learning_rate": 9.00828256554179e-05, "loss": 0.6475, "step": 7995 }, { "epoch": 0.5417711227047903, "grad_norm": 6.186464786529541, "learning_rate": 9.008145663632009e-05, "loss": 0.6399, "step": 7996 }, { "epoch": 0.5418388779727624, "grad_norm": 6.986947536468506, "learning_rate": 9.008008761722227e-05, "loss": 0.8797, "step": 7997 }, { "epoch": 0.5419066332407345, "grad_norm": 7.718267917633057, "learning_rate": 9.007871859812445e-05, "loss": 0.8077, "step": 7998 }, { "epoch": 0.5419743885087066, "grad_norm": 6.4064483642578125, "learning_rate": 9.007734957902663e-05, "loss": 0.7077, "step": 7999 }, { "epoch": 0.5420421437766786, "grad_norm": 7.1472320556640625, "learning_rate": 9.007598055992882e-05, "loss": 0.8007, "step": 8000 }, { "epoch": 0.5421098990446507, "grad_norm": 7.518289566040039, "learning_rate": 9.0074611540831e-05, "loss": 0.8132, "step": 8001 }, { "epoch": 0.5421776543126228, "grad_norm": 6.509357929229736, "learning_rate": 9.007324252173318e-05, "loss": 0.8157, "step": 8002 }, { "epoch": 0.5422454095805949, "grad_norm": 6.129663467407227, "learning_rate": 9.007187350263536e-05, "loss": 1.0118, "step": 8003 }, { "epoch": 0.542313164848567, "grad_norm": 7.274816513061523, "learning_rate": 9.007050448353756e-05, "loss": 1.0167, "step": 8004 }, { "epoch": 0.542380920116539, "grad_norm": 6.708817005157471, "learning_rate": 9.006913546443974e-05, "loss": 0.8317, "step": 8005 }, { "epoch": 0.5424486753845111, "grad_norm": 6.974154949188232, "learning_rate": 9.006776644534192e-05, "loss": 0.6758, "step": 8006 }, { "epoch": 0.5425164306524832, "grad_norm": 6.915993690490723, "learning_rate": 9.00663974262441e-05, "loss": 0.8113, "step": 8007 }, { "epoch": 0.5425841859204553, "grad_norm": 7.1193623542785645, "learning_rate": 9.006502840714628e-05, "loss": 0.9428, "step": 8008 }, { "epoch": 0.5426519411884274, "grad_norm": 6.0862298011779785, "learning_rate": 9.006365938804847e-05, "loss": 0.8424, "step": 8009 }, { "epoch": 0.5427196964563995, "grad_norm": 7.726379871368408, "learning_rate": 9.006229036895065e-05, "loss": 1.0722, "step": 8010 }, { "epoch": 0.5427874517243716, "grad_norm": 7.901097774505615, "learning_rate": 9.006092134985283e-05, "loss": 0.9023, "step": 8011 }, { "epoch": 0.5428552069923437, "grad_norm": 6.264953136444092, "learning_rate": 9.005955233075501e-05, "loss": 0.967, "step": 8012 }, { "epoch": 0.5429229622603158, "grad_norm": 7.827919960021973, "learning_rate": 9.00581833116572e-05, "loss": 0.8026, "step": 8013 }, { "epoch": 0.5429907175282879, "grad_norm": 5.923975944519043, "learning_rate": 9.005681429255939e-05, "loss": 0.8619, "step": 8014 }, { "epoch": 0.54305847279626, "grad_norm": 8.078742027282715, "learning_rate": 9.005544527346157e-05, "loss": 1.0838, "step": 8015 }, { "epoch": 0.5431262280642319, "grad_norm": 8.241996765136719, "learning_rate": 9.005407625436375e-05, "loss": 0.8315, "step": 8016 }, { "epoch": 0.543193983332204, "grad_norm": 8.346336364746094, "learning_rate": 9.005270723526593e-05, "loss": 0.809, "step": 8017 }, { "epoch": 0.5432617386001761, "grad_norm": 8.163125991821289, "learning_rate": 9.005133821616812e-05, "loss": 0.9445, "step": 8018 }, { "epoch": 0.5433294938681482, "grad_norm": 6.224919319152832, "learning_rate": 9.00499691970703e-05, "loss": 0.8583, "step": 8019 }, { "epoch": 0.5433972491361203, "grad_norm": 6.901029586791992, "learning_rate": 9.004860017797248e-05, "loss": 0.8486, "step": 8020 }, { "epoch": 0.5434650044040924, "grad_norm": 5.692861080169678, "learning_rate": 9.004723115887466e-05, "loss": 0.7233, "step": 8021 }, { "epoch": 0.5435327596720645, "grad_norm": 7.3206939697265625, "learning_rate": 9.004586213977684e-05, "loss": 0.9268, "step": 8022 }, { "epoch": 0.5436005149400366, "grad_norm": 5.781581401824951, "learning_rate": 9.004449312067904e-05, "loss": 0.8536, "step": 8023 }, { "epoch": 0.5436682702080087, "grad_norm": 6.006667137145996, "learning_rate": 9.004312410158122e-05, "loss": 0.6872, "step": 8024 }, { "epoch": 0.5437360254759808, "grad_norm": 6.082067012786865, "learning_rate": 9.00417550824834e-05, "loss": 1.0156, "step": 8025 }, { "epoch": 0.5438037807439529, "grad_norm": 9.1718111038208, "learning_rate": 9.004038606338559e-05, "loss": 1.1392, "step": 8026 }, { "epoch": 0.543871536011925, "grad_norm": 8.803008079528809, "learning_rate": 9.003901704428777e-05, "loss": 1.1443, "step": 8027 }, { "epoch": 0.543939291279897, "grad_norm": 6.125607013702393, "learning_rate": 9.003764802518995e-05, "loss": 0.8334, "step": 8028 }, { "epoch": 0.5440070465478691, "grad_norm": 5.974092960357666, "learning_rate": 9.003627900609215e-05, "loss": 0.7561, "step": 8029 }, { "epoch": 0.5440748018158412, "grad_norm": 6.347378730773926, "learning_rate": 9.003490998699433e-05, "loss": 0.7946, "step": 8030 }, { "epoch": 0.5441425570838133, "grad_norm": 6.224796295166016, "learning_rate": 9.00335409678965e-05, "loss": 0.7501, "step": 8031 }, { "epoch": 0.5442103123517853, "grad_norm": 9.026642799377441, "learning_rate": 9.00321719487987e-05, "loss": 0.8056, "step": 8032 }, { "epoch": 0.5442780676197574, "grad_norm": 6.823796272277832, "learning_rate": 9.003080292970088e-05, "loss": 0.7378, "step": 8033 }, { "epoch": 0.5443458228877295, "grad_norm": 6.37065315246582, "learning_rate": 9.002943391060306e-05, "loss": 1.0857, "step": 8034 }, { "epoch": 0.5444135781557016, "grad_norm": 6.7236151695251465, "learning_rate": 9.002806489150524e-05, "loss": 0.7145, "step": 8035 }, { "epoch": 0.5444813334236737, "grad_norm": 8.093012809753418, "learning_rate": 9.002669587240743e-05, "loss": 1.1783, "step": 8036 }, { "epoch": 0.5445490886916458, "grad_norm": 6.60814094543457, "learning_rate": 9.002532685330962e-05, "loss": 0.7929, "step": 8037 }, { "epoch": 0.5446168439596178, "grad_norm": 7.279522895812988, "learning_rate": 9.00239578342118e-05, "loss": 0.6758, "step": 8038 }, { "epoch": 0.5446845992275899, "grad_norm": 5.578268051147461, "learning_rate": 9.002258881511398e-05, "loss": 0.7601, "step": 8039 }, { "epoch": 0.544752354495562, "grad_norm": 4.871660232543945, "learning_rate": 9.002121979601616e-05, "loss": 0.635, "step": 8040 }, { "epoch": 0.5448201097635341, "grad_norm": 6.011699199676514, "learning_rate": 9.001985077691835e-05, "loss": 0.8716, "step": 8041 }, { "epoch": 0.5448878650315062, "grad_norm": 5.491373062133789, "learning_rate": 9.001848175782053e-05, "loss": 0.9069, "step": 8042 }, { "epoch": 0.5449556202994783, "grad_norm": 7.352384567260742, "learning_rate": 9.001711273872271e-05, "loss": 0.9996, "step": 8043 }, { "epoch": 0.5450233755674504, "grad_norm": 5.829630374908447, "learning_rate": 9.001574371962489e-05, "loss": 0.9563, "step": 8044 }, { "epoch": 0.5450911308354225, "grad_norm": 6.887197017669678, "learning_rate": 9.001437470052708e-05, "loss": 1.0043, "step": 8045 }, { "epoch": 0.5451588861033946, "grad_norm": 7.507176399230957, "learning_rate": 9.001300568142927e-05, "loss": 1.1256, "step": 8046 }, { "epoch": 0.5452266413713667, "grad_norm": 5.741685390472412, "learning_rate": 9.001163666233145e-05, "loss": 0.7904, "step": 8047 }, { "epoch": 0.5452943966393388, "grad_norm": 6.044425964355469, "learning_rate": 9.001026764323363e-05, "loss": 0.8707, "step": 8048 }, { "epoch": 0.5453621519073107, "grad_norm": 7.574776649475098, "learning_rate": 9.00088986241358e-05, "loss": 1.1286, "step": 8049 }, { "epoch": 0.5454299071752828, "grad_norm": 6.032942771911621, "learning_rate": 9.0007529605038e-05, "loss": 0.7711, "step": 8050 }, { "epoch": 0.5454976624432549, "grad_norm": 9.648497581481934, "learning_rate": 9.000616058594018e-05, "loss": 0.8222, "step": 8051 }, { "epoch": 0.545565417711227, "grad_norm": 6.6689839363098145, "learning_rate": 9.000479156684236e-05, "loss": 0.7225, "step": 8052 }, { "epoch": 0.5456331729791991, "grad_norm": 4.215627193450928, "learning_rate": 9.000342254774454e-05, "loss": 0.6903, "step": 8053 }, { "epoch": 0.5457009282471712, "grad_norm": 6.513290882110596, "learning_rate": 9.000205352864672e-05, "loss": 0.9443, "step": 8054 }, { "epoch": 0.5457686835151433, "grad_norm": 5.759246826171875, "learning_rate": 9.000068450954892e-05, "loss": 0.7979, "step": 8055 }, { "epoch": 0.5458364387831154, "grad_norm": 5.937248229980469, "learning_rate": 8.99993154904511e-05, "loss": 0.8381, "step": 8056 }, { "epoch": 0.5459041940510875, "grad_norm": 8.01611328125, "learning_rate": 8.999794647135328e-05, "loss": 0.9463, "step": 8057 }, { "epoch": 0.5459719493190596, "grad_norm": 7.086623191833496, "learning_rate": 8.999657745225546e-05, "loss": 0.711, "step": 8058 }, { "epoch": 0.5460397045870317, "grad_norm": 8.19179916381836, "learning_rate": 8.999520843315765e-05, "loss": 0.7565, "step": 8059 }, { "epoch": 0.5461074598550038, "grad_norm": 7.0273518562316895, "learning_rate": 8.999383941405983e-05, "loss": 0.9285, "step": 8060 }, { "epoch": 0.5461752151229758, "grad_norm": 7.5481743812561035, "learning_rate": 8.999247039496201e-05, "loss": 0.9347, "step": 8061 }, { "epoch": 0.5462429703909479, "grad_norm": 7.882439613342285, "learning_rate": 8.999110137586419e-05, "loss": 0.8584, "step": 8062 }, { "epoch": 0.54631072565892, "grad_norm": 7.260676860809326, "learning_rate": 8.998973235676637e-05, "loss": 0.7111, "step": 8063 }, { "epoch": 0.5463784809268921, "grad_norm": 6.826397895812988, "learning_rate": 8.998836333766857e-05, "loss": 0.6173, "step": 8064 }, { "epoch": 0.5464462361948641, "grad_norm": 7.557692527770996, "learning_rate": 8.998699431857075e-05, "loss": 0.9096, "step": 8065 }, { "epoch": 0.5465139914628362, "grad_norm": 5.558192253112793, "learning_rate": 8.998562529947293e-05, "loss": 0.8556, "step": 8066 }, { "epoch": 0.5465817467308083, "grad_norm": 7.186037540435791, "learning_rate": 8.99842562803751e-05, "loss": 1.0854, "step": 8067 }, { "epoch": 0.5466495019987804, "grad_norm": 6.516674518585205, "learning_rate": 8.99828872612773e-05, "loss": 0.7311, "step": 8068 }, { "epoch": 0.5467172572667525, "grad_norm": 7.974714279174805, "learning_rate": 8.998151824217948e-05, "loss": 0.9699, "step": 8069 }, { "epoch": 0.5467850125347246, "grad_norm": 6.709014892578125, "learning_rate": 8.998014922308166e-05, "loss": 0.8903, "step": 8070 }, { "epoch": 0.5468527678026966, "grad_norm": 5.7826361656188965, "learning_rate": 8.997878020398384e-05, "loss": 0.7266, "step": 8071 }, { "epoch": 0.5469205230706687, "grad_norm": 6.743555068969727, "learning_rate": 8.997741118488604e-05, "loss": 0.9665, "step": 8072 }, { "epoch": 0.5469882783386408, "grad_norm": 6.872563362121582, "learning_rate": 8.997604216578822e-05, "loss": 0.8867, "step": 8073 }, { "epoch": 0.5470560336066129, "grad_norm": 7.1092658042907715, "learning_rate": 8.99746731466904e-05, "loss": 1.1457, "step": 8074 }, { "epoch": 0.547123788874585, "grad_norm": 5.996377944946289, "learning_rate": 8.997330412759259e-05, "loss": 0.784, "step": 8075 }, { "epoch": 0.5471915441425571, "grad_norm": 6.767289638519287, "learning_rate": 8.997193510849477e-05, "loss": 0.9997, "step": 8076 }, { "epoch": 0.5472592994105292, "grad_norm": 8.10904598236084, "learning_rate": 8.997056608939695e-05, "loss": 0.9479, "step": 8077 }, { "epoch": 0.5473270546785013, "grad_norm": 7.407421588897705, "learning_rate": 8.996919707029914e-05, "loss": 0.6885, "step": 8078 }, { "epoch": 0.5473948099464734, "grad_norm": 9.024150848388672, "learning_rate": 8.996782805120132e-05, "loss": 0.8968, "step": 8079 }, { "epoch": 0.5474625652144455, "grad_norm": 5.622673511505127, "learning_rate": 8.99664590321035e-05, "loss": 0.6612, "step": 8080 }, { "epoch": 0.5475303204824175, "grad_norm": 6.339783191680908, "learning_rate": 8.996509001300569e-05, "loss": 1.0033, "step": 8081 }, { "epoch": 0.5475980757503895, "grad_norm": 6.384222984313965, "learning_rate": 8.996372099390788e-05, "loss": 0.9403, "step": 8082 }, { "epoch": 0.5476658310183616, "grad_norm": 4.613900661468506, "learning_rate": 8.996235197481006e-05, "loss": 0.6112, "step": 8083 }, { "epoch": 0.5477335862863337, "grad_norm": 8.393567085266113, "learning_rate": 8.996098295571224e-05, "loss": 1.0299, "step": 8084 }, { "epoch": 0.5478013415543058, "grad_norm": 6.199436664581299, "learning_rate": 8.995961393661442e-05, "loss": 0.7737, "step": 8085 }, { "epoch": 0.5478690968222779, "grad_norm": 8.227348327636719, "learning_rate": 8.99582449175166e-05, "loss": 0.9148, "step": 8086 }, { "epoch": 0.54793685209025, "grad_norm": 4.897773265838623, "learning_rate": 8.99568758984188e-05, "loss": 0.7832, "step": 8087 }, { "epoch": 0.5480046073582221, "grad_norm": 5.995389938354492, "learning_rate": 8.995550687932098e-05, "loss": 0.7117, "step": 8088 }, { "epoch": 0.5480723626261942, "grad_norm": 6.70106840133667, "learning_rate": 8.995413786022316e-05, "loss": 0.6605, "step": 8089 }, { "epoch": 0.5481401178941663, "grad_norm": 6.50885534286499, "learning_rate": 8.995276884112534e-05, "loss": 0.8888, "step": 8090 }, { "epoch": 0.5482078731621384, "grad_norm": 6.068159580230713, "learning_rate": 8.995139982202753e-05, "loss": 0.6167, "step": 8091 }, { "epoch": 0.5482756284301105, "grad_norm": 7.8484578132629395, "learning_rate": 8.995003080292971e-05, "loss": 1.0058, "step": 8092 }, { "epoch": 0.5483433836980826, "grad_norm": 8.712899208068848, "learning_rate": 8.994866178383189e-05, "loss": 1.0509, "step": 8093 }, { "epoch": 0.5484111389660546, "grad_norm": 5.627629280090332, "learning_rate": 8.994729276473407e-05, "loss": 0.6542, "step": 8094 }, { "epoch": 0.5484788942340267, "grad_norm": 9.192571640014648, "learning_rate": 8.994592374563625e-05, "loss": 1.072, "step": 8095 }, { "epoch": 0.5485466495019988, "grad_norm": 7.169675350189209, "learning_rate": 8.994455472653844e-05, "loss": 0.8983, "step": 8096 }, { "epoch": 0.5486144047699709, "grad_norm": 5.486343860626221, "learning_rate": 8.994318570744063e-05, "loss": 0.8792, "step": 8097 }, { "epoch": 0.5486821600379429, "grad_norm": 7.902504920959473, "learning_rate": 8.99418166883428e-05, "loss": 0.7665, "step": 8098 }, { "epoch": 0.548749915305915, "grad_norm": 6.8819122314453125, "learning_rate": 8.994044766924499e-05, "loss": 0.7169, "step": 8099 }, { "epoch": 0.5488176705738871, "grad_norm": 8.217923164367676, "learning_rate": 8.993907865014718e-05, "loss": 0.7619, "step": 8100 }, { "epoch": 0.5488854258418592, "grad_norm": 6.089666366577148, "learning_rate": 8.993770963104936e-05, "loss": 0.6823, "step": 8101 }, { "epoch": 0.5489531811098313, "grad_norm": 5.951727390289307, "learning_rate": 8.993634061195154e-05, "loss": 0.9311, "step": 8102 }, { "epoch": 0.5490209363778034, "grad_norm": 5.070009231567383, "learning_rate": 8.993497159285372e-05, "loss": 0.7265, "step": 8103 }, { "epoch": 0.5490886916457755, "grad_norm": 6.631993293762207, "learning_rate": 8.99336025737559e-05, "loss": 0.8403, "step": 8104 }, { "epoch": 0.5491564469137475, "grad_norm": 6.938666820526123, "learning_rate": 8.99322335546581e-05, "loss": 1.0324, "step": 8105 }, { "epoch": 0.5492242021817196, "grad_norm": 6.868338584899902, "learning_rate": 8.993086453556028e-05, "loss": 0.8591, "step": 8106 }, { "epoch": 0.5492919574496917, "grad_norm": 6.081043720245361, "learning_rate": 8.992949551646246e-05, "loss": 0.9657, "step": 8107 }, { "epoch": 0.5493597127176638, "grad_norm": 6.885746955871582, "learning_rate": 8.992812649736464e-05, "loss": 0.7396, "step": 8108 }, { "epoch": 0.5494274679856359, "grad_norm": 7.478306770324707, "learning_rate": 8.992675747826682e-05, "loss": 0.8213, "step": 8109 }, { "epoch": 0.549495223253608, "grad_norm": 6.534050941467285, "learning_rate": 8.992538845916901e-05, "loss": 0.928, "step": 8110 }, { "epoch": 0.5495629785215801, "grad_norm": 6.312312602996826, "learning_rate": 8.992401944007119e-05, "loss": 0.8283, "step": 8111 }, { "epoch": 0.5496307337895522, "grad_norm": 6.73023796081543, "learning_rate": 8.992265042097337e-05, "loss": 0.9155, "step": 8112 }, { "epoch": 0.5496984890575243, "grad_norm": 6.13808012008667, "learning_rate": 8.992128140187555e-05, "loss": 0.9905, "step": 8113 }, { "epoch": 0.5497662443254963, "grad_norm": 5.021573066711426, "learning_rate": 8.991991238277775e-05, "loss": 0.6841, "step": 8114 }, { "epoch": 0.5498339995934683, "grad_norm": 6.291537284851074, "learning_rate": 8.991854336367993e-05, "loss": 1.1789, "step": 8115 }, { "epoch": 0.5499017548614404, "grad_norm": 5.784443378448486, "learning_rate": 8.99171743445821e-05, "loss": 0.7338, "step": 8116 }, { "epoch": 0.5499695101294125, "grad_norm": 5.844326496124268, "learning_rate": 8.991580532548429e-05, "loss": 0.7649, "step": 8117 }, { "epoch": 0.5500372653973846, "grad_norm": 6.961515426635742, "learning_rate": 8.991443630638648e-05, "loss": 0.8765, "step": 8118 }, { "epoch": 0.5501050206653567, "grad_norm": 6.371798515319824, "learning_rate": 8.991306728728866e-05, "loss": 0.7896, "step": 8119 }, { "epoch": 0.5501727759333288, "grad_norm": 8.220330238342285, "learning_rate": 8.991169826819084e-05, "loss": 0.9147, "step": 8120 }, { "epoch": 0.5502405312013009, "grad_norm": 15.172338485717773, "learning_rate": 8.991032924909303e-05, "loss": 0.798, "step": 8121 }, { "epoch": 0.550308286469273, "grad_norm": 7.026042938232422, "learning_rate": 8.990896022999522e-05, "loss": 0.9536, "step": 8122 }, { "epoch": 0.5503760417372451, "grad_norm": 7.6364336013793945, "learning_rate": 8.99075912108974e-05, "loss": 0.8633, "step": 8123 }, { "epoch": 0.5504437970052172, "grad_norm": 7.09596586227417, "learning_rate": 8.990622219179959e-05, "loss": 0.7266, "step": 8124 }, { "epoch": 0.5505115522731893, "grad_norm": 6.502638816833496, "learning_rate": 8.990485317270177e-05, "loss": 1.0156, "step": 8125 }, { "epoch": 0.5505793075411614, "grad_norm": 6.83535623550415, "learning_rate": 8.990348415360395e-05, "loss": 0.7604, "step": 8126 }, { "epoch": 0.5506470628091334, "grad_norm": 8.162100791931152, "learning_rate": 8.990211513450613e-05, "loss": 0.9476, "step": 8127 }, { "epoch": 0.5507148180771055, "grad_norm": 7.7978410720825195, "learning_rate": 8.990074611540832e-05, "loss": 0.9341, "step": 8128 }, { "epoch": 0.5507825733450776, "grad_norm": 7.659994602203369, "learning_rate": 8.98993770963105e-05, "loss": 0.9403, "step": 8129 }, { "epoch": 0.5508503286130496, "grad_norm": 7.724880218505859, "learning_rate": 8.989800807721268e-05, "loss": 0.8563, "step": 8130 }, { "epoch": 0.5509180838810217, "grad_norm": 8.337576866149902, "learning_rate": 8.989663905811487e-05, "loss": 0.874, "step": 8131 }, { "epoch": 0.5509858391489938, "grad_norm": 6.336655616760254, "learning_rate": 8.989527003901705e-05, "loss": 0.6825, "step": 8132 }, { "epoch": 0.5510535944169659, "grad_norm": 8.086697578430176, "learning_rate": 8.989390101991924e-05, "loss": 1.1419, "step": 8133 }, { "epoch": 0.551121349684938, "grad_norm": 6.572722434997559, "learning_rate": 8.989253200082142e-05, "loss": 0.9213, "step": 8134 }, { "epoch": 0.5511891049529101, "grad_norm": 6.7894487380981445, "learning_rate": 8.98911629817236e-05, "loss": 1.0189, "step": 8135 }, { "epoch": 0.5512568602208822, "grad_norm": 8.123970985412598, "learning_rate": 8.988979396262578e-05, "loss": 1.1131, "step": 8136 }, { "epoch": 0.5513246154888543, "grad_norm": 7.39744758605957, "learning_rate": 8.988842494352797e-05, "loss": 0.7286, "step": 8137 }, { "epoch": 0.5513923707568263, "grad_norm": 6.353941440582275, "learning_rate": 8.988705592443015e-05, "loss": 0.7356, "step": 8138 }, { "epoch": 0.5514601260247984, "grad_norm": 10.126983642578125, "learning_rate": 8.988568690533234e-05, "loss": 0.8082, "step": 8139 }, { "epoch": 0.5515278812927705, "grad_norm": 7.518803596496582, "learning_rate": 8.988431788623452e-05, "loss": 0.801, "step": 8140 }, { "epoch": 0.5515956365607426, "grad_norm": 6.769824981689453, "learning_rate": 8.98829488671367e-05, "loss": 0.7756, "step": 8141 }, { "epoch": 0.5516633918287147, "grad_norm": 6.841986656188965, "learning_rate": 8.988157984803889e-05, "loss": 0.6801, "step": 8142 }, { "epoch": 0.5517311470966868, "grad_norm": 6.92106294631958, "learning_rate": 8.988021082894107e-05, "loss": 0.9085, "step": 8143 }, { "epoch": 0.5517989023646589, "grad_norm": 5.728677272796631, "learning_rate": 8.987884180984325e-05, "loss": 0.8921, "step": 8144 }, { "epoch": 0.551866657632631, "grad_norm": 6.752305507659912, "learning_rate": 8.987747279074543e-05, "loss": 0.7462, "step": 8145 }, { "epoch": 0.551934412900603, "grad_norm": 7.200686454772949, "learning_rate": 8.987610377164762e-05, "loss": 1.3344, "step": 8146 }, { "epoch": 0.5520021681685751, "grad_norm": 5.663136959075928, "learning_rate": 8.98747347525498e-05, "loss": 0.8558, "step": 8147 }, { "epoch": 0.5520699234365471, "grad_norm": 5.89145565032959, "learning_rate": 8.987336573345199e-05, "loss": 0.8393, "step": 8148 }, { "epoch": 0.5521376787045192, "grad_norm": 6.481657028198242, "learning_rate": 8.987199671435417e-05, "loss": 1.0019, "step": 8149 }, { "epoch": 0.5522054339724913, "grad_norm": 6.341575622558594, "learning_rate": 8.987062769525635e-05, "loss": 0.84, "step": 8150 }, { "epoch": 0.5522731892404634, "grad_norm": 8.580132484436035, "learning_rate": 8.986925867615854e-05, "loss": 0.9604, "step": 8151 }, { "epoch": 0.5523409445084355, "grad_norm": 6.118538856506348, "learning_rate": 8.986788965706072e-05, "loss": 0.8404, "step": 8152 }, { "epoch": 0.5524086997764076, "grad_norm": 6.263945579528809, "learning_rate": 8.98665206379629e-05, "loss": 0.93, "step": 8153 }, { "epoch": 0.5524764550443797, "grad_norm": 6.55873441696167, "learning_rate": 8.986515161886508e-05, "loss": 0.8502, "step": 8154 }, { "epoch": 0.5525442103123518, "grad_norm": 6.372352600097656, "learning_rate": 8.986378259976726e-05, "loss": 1.0641, "step": 8155 }, { "epoch": 0.5526119655803239, "grad_norm": 7.0687665939331055, "learning_rate": 8.986241358066946e-05, "loss": 0.8543, "step": 8156 }, { "epoch": 0.552679720848296, "grad_norm": 8.265472412109375, "learning_rate": 8.986104456157164e-05, "loss": 0.9062, "step": 8157 }, { "epoch": 0.5527474761162681, "grad_norm": 6.169217586517334, "learning_rate": 8.985967554247382e-05, "loss": 0.7173, "step": 8158 }, { "epoch": 0.5528152313842402, "grad_norm": 5.441207408905029, "learning_rate": 8.9858306523376e-05, "loss": 0.6463, "step": 8159 }, { "epoch": 0.5528829866522122, "grad_norm": 7.118704795837402, "learning_rate": 8.985693750427819e-05, "loss": 0.8944, "step": 8160 }, { "epoch": 0.5529507419201843, "grad_norm": 6.042327404022217, "learning_rate": 8.985556848518037e-05, "loss": 1.081, "step": 8161 }, { "epoch": 0.5530184971881564, "grad_norm": 7.080348014831543, "learning_rate": 8.985419946608255e-05, "loss": 0.6969, "step": 8162 }, { "epoch": 0.5530862524561284, "grad_norm": 6.585824489593506, "learning_rate": 8.985283044698473e-05, "loss": 0.817, "step": 8163 }, { "epoch": 0.5531540077241005, "grad_norm": 7.307035446166992, "learning_rate": 8.985146142788692e-05, "loss": 0.9677, "step": 8164 }, { "epoch": 0.5532217629920726, "grad_norm": 7.577461242675781, "learning_rate": 8.98500924087891e-05, "loss": 0.9558, "step": 8165 }, { "epoch": 0.5532895182600447, "grad_norm": 8.000713348388672, "learning_rate": 8.984872338969129e-05, "loss": 0.8819, "step": 8166 }, { "epoch": 0.5533572735280168, "grad_norm": 8.770408630371094, "learning_rate": 8.984735437059348e-05, "loss": 1.0097, "step": 8167 }, { "epoch": 0.5534250287959889, "grad_norm": 6.20897102355957, "learning_rate": 8.984598535149566e-05, "loss": 0.936, "step": 8168 }, { "epoch": 0.553492784063961, "grad_norm": 6.364378929138184, "learning_rate": 8.984461633239784e-05, "loss": 0.8158, "step": 8169 }, { "epoch": 0.553560539331933, "grad_norm": 6.148647308349609, "learning_rate": 8.984324731330003e-05, "loss": 0.8356, "step": 8170 }, { "epoch": 0.5536282945999051, "grad_norm": 6.537686347961426, "learning_rate": 8.984187829420221e-05, "loss": 0.9588, "step": 8171 }, { "epoch": 0.5536960498678772, "grad_norm": 5.661696910858154, "learning_rate": 8.98405092751044e-05, "loss": 0.8554, "step": 8172 }, { "epoch": 0.5537638051358493, "grad_norm": 5.869355201721191, "learning_rate": 8.983914025600658e-05, "loss": 0.7676, "step": 8173 }, { "epoch": 0.5538315604038214, "grad_norm": 5.434372425079346, "learning_rate": 8.983777123690877e-05, "loss": 0.7861, "step": 8174 }, { "epoch": 0.5538993156717935, "grad_norm": 7.260756015777588, "learning_rate": 8.983640221781095e-05, "loss": 0.793, "step": 8175 }, { "epoch": 0.5539670709397656, "grad_norm": 7.556422710418701, "learning_rate": 8.983503319871313e-05, "loss": 0.9004, "step": 8176 }, { "epoch": 0.5540348262077377, "grad_norm": 5.610649585723877, "learning_rate": 8.983366417961531e-05, "loss": 0.6254, "step": 8177 }, { "epoch": 0.5541025814757098, "grad_norm": 8.736811637878418, "learning_rate": 8.98322951605175e-05, "loss": 1.028, "step": 8178 }, { "epoch": 0.5541703367436818, "grad_norm": 6.036482334136963, "learning_rate": 8.983092614141968e-05, "loss": 0.805, "step": 8179 }, { "epoch": 0.5542380920116539, "grad_norm": 7.7951250076293945, "learning_rate": 8.982955712232186e-05, "loss": 0.8463, "step": 8180 }, { "epoch": 0.554305847279626, "grad_norm": 5.3911871910095215, "learning_rate": 8.982818810322404e-05, "loss": 0.6961, "step": 8181 }, { "epoch": 0.554373602547598, "grad_norm": 7.813619613647461, "learning_rate": 8.982681908412623e-05, "loss": 0.7671, "step": 8182 }, { "epoch": 0.5544413578155701, "grad_norm": 6.328850269317627, "learning_rate": 8.982545006502842e-05, "loss": 0.7286, "step": 8183 }, { "epoch": 0.5545091130835422, "grad_norm": 7.214839935302734, "learning_rate": 8.98240810459306e-05, "loss": 0.8523, "step": 8184 }, { "epoch": 0.5545768683515143, "grad_norm": 5.996763229370117, "learning_rate": 8.982271202683278e-05, "loss": 0.8575, "step": 8185 }, { "epoch": 0.5546446236194864, "grad_norm": 8.032330513000488, "learning_rate": 8.982134300773496e-05, "loss": 0.9224, "step": 8186 }, { "epoch": 0.5547123788874585, "grad_norm": 7.919235706329346, "learning_rate": 8.981997398863714e-05, "loss": 0.9237, "step": 8187 }, { "epoch": 0.5547801341554306, "grad_norm": 7.324405670166016, "learning_rate": 8.981860496953933e-05, "loss": 0.904, "step": 8188 }, { "epoch": 0.5548478894234027, "grad_norm": 7.116741180419922, "learning_rate": 8.981723595044151e-05, "loss": 1.078, "step": 8189 }, { "epoch": 0.5549156446913748, "grad_norm": 6.27305269241333, "learning_rate": 8.98158669313437e-05, "loss": 0.7974, "step": 8190 }, { "epoch": 0.5549833999593469, "grad_norm": 6.298983097076416, "learning_rate": 8.981449791224588e-05, "loss": 0.8742, "step": 8191 }, { "epoch": 0.555051155227319, "grad_norm": 5.965509414672852, "learning_rate": 8.981312889314807e-05, "loss": 0.7601, "step": 8192 }, { "epoch": 0.555118910495291, "grad_norm": 6.754343509674072, "learning_rate": 8.981175987405025e-05, "loss": 1.0106, "step": 8193 }, { "epoch": 0.5551866657632631, "grad_norm": 4.9216790199279785, "learning_rate": 8.981039085495243e-05, "loss": 0.7036, "step": 8194 }, { "epoch": 0.5552544210312351, "grad_norm": 5.694727897644043, "learning_rate": 8.980902183585461e-05, "loss": 0.6663, "step": 8195 }, { "epoch": 0.5553221762992072, "grad_norm": 7.885236740112305, "learning_rate": 8.980765281675679e-05, "loss": 0.9234, "step": 8196 }, { "epoch": 0.5553899315671793, "grad_norm": 9.071615219116211, "learning_rate": 8.980628379765898e-05, "loss": 0.8357, "step": 8197 }, { "epoch": 0.5554576868351514, "grad_norm": 5.474040508270264, "learning_rate": 8.980491477856116e-05, "loss": 0.6571, "step": 8198 }, { "epoch": 0.5555254421031235, "grad_norm": 6.519590854644775, "learning_rate": 8.980354575946335e-05, "loss": 0.7547, "step": 8199 }, { "epoch": 0.5555931973710956, "grad_norm": 6.605935096740723, "learning_rate": 8.980217674036553e-05, "loss": 0.8487, "step": 8200 }, { "epoch": 0.5556609526390677, "grad_norm": 7.937171459197998, "learning_rate": 8.980080772126772e-05, "loss": 1.0935, "step": 8201 }, { "epoch": 0.5557287079070398, "grad_norm": 8.487578392028809, "learning_rate": 8.97994387021699e-05, "loss": 1.2286, "step": 8202 }, { "epoch": 0.5557964631750119, "grad_norm": 5.452174186706543, "learning_rate": 8.979806968307208e-05, "loss": 0.8182, "step": 8203 }, { "epoch": 0.555864218442984, "grad_norm": 7.257054805755615, "learning_rate": 8.979670066397426e-05, "loss": 1.0002, "step": 8204 }, { "epoch": 0.555931973710956, "grad_norm": 6.426529407501221, "learning_rate": 8.979533164487644e-05, "loss": 0.8226, "step": 8205 }, { "epoch": 0.5559997289789281, "grad_norm": 7.804161548614502, "learning_rate": 8.979396262577863e-05, "loss": 1.0396, "step": 8206 }, { "epoch": 0.5560674842469002, "grad_norm": 5.7507004737854, "learning_rate": 8.979259360668082e-05, "loss": 0.856, "step": 8207 }, { "epoch": 0.5561352395148723, "grad_norm": 5.974365234375, "learning_rate": 8.9791224587583e-05, "loss": 0.7983, "step": 8208 }, { "epoch": 0.5562029947828444, "grad_norm": 8.598740577697754, "learning_rate": 8.978985556848518e-05, "loss": 0.9326, "step": 8209 }, { "epoch": 0.5562707500508165, "grad_norm": 6.554419994354248, "learning_rate": 8.978848654938736e-05, "loss": 0.8367, "step": 8210 }, { "epoch": 0.5563385053187886, "grad_norm": 5.485220432281494, "learning_rate": 8.978711753028955e-05, "loss": 0.8231, "step": 8211 }, { "epoch": 0.5564062605867606, "grad_norm": 7.286705493927002, "learning_rate": 8.978574851119173e-05, "loss": 1.0673, "step": 8212 }, { "epoch": 0.5564740158547327, "grad_norm": 7.718635559082031, "learning_rate": 8.978437949209391e-05, "loss": 1.1467, "step": 8213 }, { "epoch": 0.5565417711227048, "grad_norm": 7.21420431137085, "learning_rate": 8.97830104729961e-05, "loss": 0.7332, "step": 8214 }, { "epoch": 0.5566095263906768, "grad_norm": 7.05917501449585, "learning_rate": 8.978164145389828e-05, "loss": 0.8301, "step": 8215 }, { "epoch": 0.5566772816586489, "grad_norm": 7.492615699768066, "learning_rate": 8.978027243480047e-05, "loss": 0.7282, "step": 8216 }, { "epoch": 0.556745036926621, "grad_norm": 5.647882461547852, "learning_rate": 8.977890341570266e-05, "loss": 0.9054, "step": 8217 }, { "epoch": 0.5568127921945931, "grad_norm": 7.022364616394043, "learning_rate": 8.977753439660484e-05, "loss": 0.7759, "step": 8218 }, { "epoch": 0.5568805474625652, "grad_norm": 7.175726890563965, "learning_rate": 8.977616537750702e-05, "loss": 0.8165, "step": 8219 }, { "epoch": 0.5569483027305373, "grad_norm": 5.680481433868408, "learning_rate": 8.977479635840921e-05, "loss": 0.8801, "step": 8220 }, { "epoch": 0.5570160579985094, "grad_norm": 6.144750118255615, "learning_rate": 8.97734273393114e-05, "loss": 0.8273, "step": 8221 }, { "epoch": 0.5570838132664815, "grad_norm": 6.248444080352783, "learning_rate": 8.977205832021357e-05, "loss": 1.1512, "step": 8222 }, { "epoch": 0.5571515685344536, "grad_norm": 5.904661655426025, "learning_rate": 8.977068930111575e-05, "loss": 0.8319, "step": 8223 }, { "epoch": 0.5572193238024257, "grad_norm": 5.726762771606445, "learning_rate": 8.976932028201795e-05, "loss": 0.7412, "step": 8224 }, { "epoch": 0.5572870790703978, "grad_norm": 6.143310070037842, "learning_rate": 8.976795126292013e-05, "loss": 1.0178, "step": 8225 }, { "epoch": 0.5573548343383699, "grad_norm": 5.4348907470703125, "learning_rate": 8.976658224382231e-05, "loss": 0.7297, "step": 8226 }, { "epoch": 0.557422589606342, "grad_norm": 6.981692314147949, "learning_rate": 8.976521322472449e-05, "loss": 0.8807, "step": 8227 }, { "epoch": 0.5574903448743139, "grad_norm": 7.862269878387451, "learning_rate": 8.976384420562667e-05, "loss": 1.0944, "step": 8228 }, { "epoch": 0.557558100142286, "grad_norm": 6.2533345222473145, "learning_rate": 8.976247518652886e-05, "loss": 0.8815, "step": 8229 }, { "epoch": 0.5576258554102581, "grad_norm": 5.445539951324463, "learning_rate": 8.976110616743104e-05, "loss": 0.7853, "step": 8230 }, { "epoch": 0.5576936106782302, "grad_norm": 5.220200538635254, "learning_rate": 8.975973714833322e-05, "loss": 0.6072, "step": 8231 }, { "epoch": 0.5577613659462023, "grad_norm": 7.320289611816406, "learning_rate": 8.97583681292354e-05, "loss": 0.7023, "step": 8232 }, { "epoch": 0.5578291212141744, "grad_norm": 6.331526756286621, "learning_rate": 8.97569991101376e-05, "loss": 0.8804, "step": 8233 }, { "epoch": 0.5578968764821465, "grad_norm": 7.493820667266846, "learning_rate": 8.975563009103978e-05, "loss": 0.8167, "step": 8234 }, { "epoch": 0.5579646317501186, "grad_norm": 5.947346210479736, "learning_rate": 8.975426107194196e-05, "loss": 0.6117, "step": 8235 }, { "epoch": 0.5580323870180907, "grad_norm": 6.589901447296143, "learning_rate": 8.975289205284414e-05, "loss": 0.8721, "step": 8236 }, { "epoch": 0.5581001422860628, "grad_norm": 5.9715423583984375, "learning_rate": 8.975152303374632e-05, "loss": 0.8644, "step": 8237 }, { "epoch": 0.5581678975540348, "grad_norm": 8.454349517822266, "learning_rate": 8.975015401464851e-05, "loss": 0.89, "step": 8238 }, { "epoch": 0.5582356528220069, "grad_norm": 6.455380916595459, "learning_rate": 8.97487849955507e-05, "loss": 0.8405, "step": 8239 }, { "epoch": 0.558303408089979, "grad_norm": 6.558000564575195, "learning_rate": 8.974741597645287e-05, "loss": 0.8561, "step": 8240 }, { "epoch": 0.5583711633579511, "grad_norm": 6.80634880065918, "learning_rate": 8.974604695735506e-05, "loss": 0.8508, "step": 8241 }, { "epoch": 0.5584389186259232, "grad_norm": 5.507918357849121, "learning_rate": 8.974467793825724e-05, "loss": 0.9459, "step": 8242 }, { "epoch": 0.5585066738938953, "grad_norm": 6.2580037117004395, "learning_rate": 8.974330891915943e-05, "loss": 0.919, "step": 8243 }, { "epoch": 0.5585744291618673, "grad_norm": 7.050917148590088, "learning_rate": 8.974193990006161e-05, "loss": 0.6281, "step": 8244 }, { "epoch": 0.5586421844298394, "grad_norm": 6.926290512084961, "learning_rate": 8.974057088096379e-05, "loss": 0.9378, "step": 8245 }, { "epoch": 0.5587099396978115, "grad_norm": 7.242706298828125, "learning_rate": 8.973920186186597e-05, "loss": 0.9724, "step": 8246 }, { "epoch": 0.5587776949657836, "grad_norm": 6.239006519317627, "learning_rate": 8.973783284276816e-05, "loss": 1.1008, "step": 8247 }, { "epoch": 0.5588454502337556, "grad_norm": 5.4303998947143555, "learning_rate": 8.973646382367034e-05, "loss": 0.8486, "step": 8248 }, { "epoch": 0.5589132055017277, "grad_norm": 7.883191108703613, "learning_rate": 8.973509480457252e-05, "loss": 1.1543, "step": 8249 }, { "epoch": 0.5589809607696998, "grad_norm": 6.10230016708374, "learning_rate": 8.97337257854747e-05, "loss": 0.8662, "step": 8250 }, { "epoch": 0.5590487160376719, "grad_norm": 7.152645587921143, "learning_rate": 8.973235676637689e-05, "loss": 0.7332, "step": 8251 }, { "epoch": 0.559116471305644, "grad_norm": 5.797628879547119, "learning_rate": 8.973098774727908e-05, "loss": 0.7454, "step": 8252 }, { "epoch": 0.5591842265736161, "grad_norm": 6.342854976654053, "learning_rate": 8.972961872818126e-05, "loss": 0.6993, "step": 8253 }, { "epoch": 0.5592519818415882, "grad_norm": 8.492813110351562, "learning_rate": 8.972824970908344e-05, "loss": 0.8986, "step": 8254 }, { "epoch": 0.5593197371095603, "grad_norm": 5.740788459777832, "learning_rate": 8.972688068998562e-05, "loss": 0.8702, "step": 8255 }, { "epoch": 0.5593874923775324, "grad_norm": 5.833809852600098, "learning_rate": 8.972551167088781e-05, "loss": 0.9314, "step": 8256 }, { "epoch": 0.5594552476455045, "grad_norm": 7.672467231750488, "learning_rate": 8.972414265179e-05, "loss": 0.7654, "step": 8257 }, { "epoch": 0.5595230029134766, "grad_norm": 6.214809417724609, "learning_rate": 8.972277363269218e-05, "loss": 0.8391, "step": 8258 }, { "epoch": 0.5595907581814487, "grad_norm": 6.621498107910156, "learning_rate": 8.972140461359436e-05, "loss": 0.8521, "step": 8259 }, { "epoch": 0.5596585134494207, "grad_norm": 6.759031295776367, "learning_rate": 8.972003559449655e-05, "loss": 1.0709, "step": 8260 }, { "epoch": 0.5597262687173927, "grad_norm": 6.506051540374756, "learning_rate": 8.971866657539873e-05, "loss": 1.1408, "step": 8261 }, { "epoch": 0.5597940239853648, "grad_norm": 5.346510410308838, "learning_rate": 8.971729755630091e-05, "loss": 0.7054, "step": 8262 }, { "epoch": 0.5598617792533369, "grad_norm": 7.418613910675049, "learning_rate": 8.97159285372031e-05, "loss": 0.976, "step": 8263 }, { "epoch": 0.559929534521309, "grad_norm": 5.501760482788086, "learning_rate": 8.971455951810528e-05, "loss": 0.7454, "step": 8264 }, { "epoch": 0.5599972897892811, "grad_norm": 6.592947959899902, "learning_rate": 8.971319049900746e-05, "loss": 0.6618, "step": 8265 }, { "epoch": 0.5600650450572532, "grad_norm": 7.502736568450928, "learning_rate": 8.971182147990966e-05, "loss": 0.8033, "step": 8266 }, { "epoch": 0.5601328003252253, "grad_norm": 7.374648094177246, "learning_rate": 8.971045246081184e-05, "loss": 0.8319, "step": 8267 }, { "epoch": 0.5602005555931974, "grad_norm": 7.346577167510986, "learning_rate": 8.970908344171402e-05, "loss": 0.9729, "step": 8268 }, { "epoch": 0.5602683108611695, "grad_norm": 5.724850654602051, "learning_rate": 8.97077144226162e-05, "loss": 0.8124, "step": 8269 }, { "epoch": 0.5603360661291416, "grad_norm": 7.397567272186279, "learning_rate": 8.97063454035184e-05, "loss": 1.2057, "step": 8270 }, { "epoch": 0.5604038213971136, "grad_norm": 8.235245704650879, "learning_rate": 8.970497638442057e-05, "loss": 0.7024, "step": 8271 }, { "epoch": 0.5604715766650857, "grad_norm": 6.514690399169922, "learning_rate": 8.970360736532275e-05, "loss": 0.7149, "step": 8272 }, { "epoch": 0.5605393319330578, "grad_norm": 7.889196872711182, "learning_rate": 8.970223834622493e-05, "loss": 0.664, "step": 8273 }, { "epoch": 0.5606070872010299, "grad_norm": 6.1832475662231445, "learning_rate": 8.970086932712711e-05, "loss": 0.8841, "step": 8274 }, { "epoch": 0.560674842469002, "grad_norm": 5.73254919052124, "learning_rate": 8.969950030802931e-05, "loss": 0.8285, "step": 8275 }, { "epoch": 0.5607425977369741, "grad_norm": 7.358850955963135, "learning_rate": 8.969813128893149e-05, "loss": 0.8508, "step": 8276 }, { "epoch": 0.5608103530049461, "grad_norm": 7.491328716278076, "learning_rate": 8.969676226983367e-05, "loss": 1.0391, "step": 8277 }, { "epoch": 0.5608781082729182, "grad_norm": 6.1154375076293945, "learning_rate": 8.969539325073585e-05, "loss": 0.8254, "step": 8278 }, { "epoch": 0.5609458635408903, "grad_norm": 7.245144367218018, "learning_rate": 8.969402423163804e-05, "loss": 0.9224, "step": 8279 }, { "epoch": 0.5610136188088624, "grad_norm": 7.001935958862305, "learning_rate": 8.969265521254022e-05, "loss": 0.6674, "step": 8280 }, { "epoch": 0.5610813740768344, "grad_norm": 7.420647144317627, "learning_rate": 8.96912861934424e-05, "loss": 0.9413, "step": 8281 }, { "epoch": 0.5611491293448065, "grad_norm": 8.350207328796387, "learning_rate": 8.968991717434458e-05, "loss": 1.0078, "step": 8282 }, { "epoch": 0.5612168846127786, "grad_norm": 7.219613075256348, "learning_rate": 8.968854815524676e-05, "loss": 0.9071, "step": 8283 }, { "epoch": 0.5612846398807507, "grad_norm": 6.428452014923096, "learning_rate": 8.968717913614896e-05, "loss": 1.0902, "step": 8284 }, { "epoch": 0.5613523951487228, "grad_norm": 6.86848258972168, "learning_rate": 8.968581011705114e-05, "loss": 0.967, "step": 8285 }, { "epoch": 0.5614201504166949, "grad_norm": 6.413376808166504, "learning_rate": 8.968444109795332e-05, "loss": 0.7595, "step": 8286 }, { "epoch": 0.561487905684667, "grad_norm": 7.2994866371154785, "learning_rate": 8.96830720788555e-05, "loss": 1.176, "step": 8287 }, { "epoch": 0.5615556609526391, "grad_norm": 6.439824104309082, "learning_rate": 8.968170305975768e-05, "loss": 0.8067, "step": 8288 }, { "epoch": 0.5616234162206112, "grad_norm": 7.186101913452148, "learning_rate": 8.968033404065987e-05, "loss": 0.6857, "step": 8289 }, { "epoch": 0.5616911714885833, "grad_norm": 7.133284091949463, "learning_rate": 8.967896502156205e-05, "loss": 0.9404, "step": 8290 }, { "epoch": 0.5617589267565554, "grad_norm": 5.822772979736328, "learning_rate": 8.967759600246423e-05, "loss": 0.8007, "step": 8291 }, { "epoch": 0.5618266820245275, "grad_norm": 6.281779766082764, "learning_rate": 8.967622698336642e-05, "loss": 0.8698, "step": 8292 }, { "epoch": 0.5618944372924994, "grad_norm": 6.691267490386963, "learning_rate": 8.967485796426861e-05, "loss": 0.7307, "step": 8293 }, { "epoch": 0.5619621925604715, "grad_norm": 8.061805725097656, "learning_rate": 8.967348894517079e-05, "loss": 0.9223, "step": 8294 }, { "epoch": 0.5620299478284436, "grad_norm": 6.480556488037109, "learning_rate": 8.967211992607297e-05, "loss": 0.6982, "step": 8295 }, { "epoch": 0.5620977030964157, "grad_norm": 6.673006534576416, "learning_rate": 8.967075090697515e-05, "loss": 0.9059, "step": 8296 }, { "epoch": 0.5621654583643878, "grad_norm": 5.882975101470947, "learning_rate": 8.966938188787733e-05, "loss": 0.7252, "step": 8297 }, { "epoch": 0.5622332136323599, "grad_norm": 7.131857872009277, "learning_rate": 8.966801286877952e-05, "loss": 0.8641, "step": 8298 }, { "epoch": 0.562300968900332, "grad_norm": 6.564770698547363, "learning_rate": 8.96666438496817e-05, "loss": 0.8122, "step": 8299 }, { "epoch": 0.5623687241683041, "grad_norm": 5.747824192047119, "learning_rate": 8.966527483058388e-05, "loss": 0.8257, "step": 8300 }, { "epoch": 0.5624364794362762, "grad_norm": 5.892056465148926, "learning_rate": 8.966390581148607e-05, "loss": 0.5754, "step": 8301 }, { "epoch": 0.5625042347042483, "grad_norm": 7.563606262207031, "learning_rate": 8.966253679238826e-05, "loss": 0.7932, "step": 8302 }, { "epoch": 0.5625719899722204, "grad_norm": 6.839925289154053, "learning_rate": 8.966116777329044e-05, "loss": 0.8489, "step": 8303 }, { "epoch": 0.5626397452401924, "grad_norm": 7.593649387359619, "learning_rate": 8.965979875419262e-05, "loss": 0.7083, "step": 8304 }, { "epoch": 0.5627075005081645, "grad_norm": 6.513439178466797, "learning_rate": 8.96584297350948e-05, "loss": 0.8906, "step": 8305 }, { "epoch": 0.5627752557761366, "grad_norm": 7.1831889152526855, "learning_rate": 8.9657060715997e-05, "loss": 1.1598, "step": 8306 }, { "epoch": 0.5628430110441087, "grad_norm": 5.392392158508301, "learning_rate": 8.965569169689917e-05, "loss": 0.7419, "step": 8307 }, { "epoch": 0.5629107663120808, "grad_norm": 5.79964017868042, "learning_rate": 8.965432267780135e-05, "loss": 0.5992, "step": 8308 }, { "epoch": 0.5629785215800529, "grad_norm": 6.701503276824951, "learning_rate": 8.965295365870355e-05, "loss": 0.9259, "step": 8309 }, { "epoch": 0.5630462768480249, "grad_norm": 7.373070240020752, "learning_rate": 8.965158463960573e-05, "loss": 0.7898, "step": 8310 }, { "epoch": 0.563114032115997, "grad_norm": 5.849572658538818, "learning_rate": 8.965021562050791e-05, "loss": 0.6339, "step": 8311 }, { "epoch": 0.5631817873839691, "grad_norm": 7.531026840209961, "learning_rate": 8.96488466014101e-05, "loss": 0.9883, "step": 8312 }, { "epoch": 0.5632495426519412, "grad_norm": 6.910517692565918, "learning_rate": 8.964747758231228e-05, "loss": 0.8582, "step": 8313 }, { "epoch": 0.5633172979199133, "grad_norm": 7.760013103485107, "learning_rate": 8.964610856321446e-05, "loss": 0.8696, "step": 8314 }, { "epoch": 0.5633850531878853, "grad_norm": 5.592878341674805, "learning_rate": 8.964473954411664e-05, "loss": 0.7325, "step": 8315 }, { "epoch": 0.5634528084558574, "grad_norm": 6.491446018218994, "learning_rate": 8.964337052501884e-05, "loss": 0.7607, "step": 8316 }, { "epoch": 0.5635205637238295, "grad_norm": 5.725716590881348, "learning_rate": 8.964200150592102e-05, "loss": 0.928, "step": 8317 }, { "epoch": 0.5635883189918016, "grad_norm": 6.139345169067383, "learning_rate": 8.96406324868232e-05, "loss": 0.9763, "step": 8318 }, { "epoch": 0.5636560742597737, "grad_norm": 7.040091514587402, "learning_rate": 8.963926346772538e-05, "loss": 0.7395, "step": 8319 }, { "epoch": 0.5637238295277458, "grad_norm": 5.59568977355957, "learning_rate": 8.963789444862756e-05, "loss": 0.7304, "step": 8320 }, { "epoch": 0.5637915847957179, "grad_norm": 5.827798366546631, "learning_rate": 8.963652542952975e-05, "loss": 0.7063, "step": 8321 }, { "epoch": 0.56385934006369, "grad_norm": 7.218785285949707, "learning_rate": 8.963515641043193e-05, "loss": 0.8324, "step": 8322 }, { "epoch": 0.5639270953316621, "grad_norm": 6.695993423461914, "learning_rate": 8.963378739133411e-05, "loss": 0.7374, "step": 8323 }, { "epoch": 0.5639948505996342, "grad_norm": 7.842550754547119, "learning_rate": 8.96324183722363e-05, "loss": 1.1097, "step": 8324 }, { "epoch": 0.5640626058676063, "grad_norm": 10.275467872619629, "learning_rate": 8.963104935313849e-05, "loss": 0.974, "step": 8325 }, { "epoch": 0.5641303611355782, "grad_norm": 5.597927093505859, "learning_rate": 8.962968033404067e-05, "loss": 0.8399, "step": 8326 }, { "epoch": 0.5641981164035503, "grad_norm": 8.49342155456543, "learning_rate": 8.962831131494285e-05, "loss": 0.8277, "step": 8327 }, { "epoch": 0.5642658716715224, "grad_norm": 5.775453567504883, "learning_rate": 8.962694229584503e-05, "loss": 0.7712, "step": 8328 }, { "epoch": 0.5643336269394945, "grad_norm": 6.063767910003662, "learning_rate": 8.962557327674721e-05, "loss": 0.6816, "step": 8329 }, { "epoch": 0.5644013822074666, "grad_norm": 6.82351541519165, "learning_rate": 8.96242042576494e-05, "loss": 0.7759, "step": 8330 }, { "epoch": 0.5644691374754387, "grad_norm": 7.734725475311279, "learning_rate": 8.962283523855158e-05, "loss": 0.856, "step": 8331 }, { "epoch": 0.5645368927434108, "grad_norm": 9.442476272583008, "learning_rate": 8.962146621945376e-05, "loss": 0.9022, "step": 8332 }, { "epoch": 0.5646046480113829, "grad_norm": 4.6692352294921875, "learning_rate": 8.962009720035594e-05, "loss": 0.689, "step": 8333 }, { "epoch": 0.564672403279355, "grad_norm": 7.143623352050781, "learning_rate": 8.961872818125814e-05, "loss": 0.7562, "step": 8334 }, { "epoch": 0.5647401585473271, "grad_norm": 6.053460597991943, "learning_rate": 8.961735916216032e-05, "loss": 0.6182, "step": 8335 }, { "epoch": 0.5648079138152992, "grad_norm": 6.414344310760498, "learning_rate": 8.96159901430625e-05, "loss": 1.0768, "step": 8336 }, { "epoch": 0.5648756690832712, "grad_norm": 5.544490814208984, "learning_rate": 8.961462112396468e-05, "loss": 0.722, "step": 8337 }, { "epoch": 0.5649434243512433, "grad_norm": 7.075659275054932, "learning_rate": 8.961325210486686e-05, "loss": 1.0114, "step": 8338 }, { "epoch": 0.5650111796192154, "grad_norm": 6.1844024658203125, "learning_rate": 8.961188308576905e-05, "loss": 0.7534, "step": 8339 }, { "epoch": 0.5650789348871875, "grad_norm": 7.791917324066162, "learning_rate": 8.961051406667123e-05, "loss": 0.9825, "step": 8340 }, { "epoch": 0.5651466901551596, "grad_norm": 6.58551025390625, "learning_rate": 8.960914504757341e-05, "loss": 0.7715, "step": 8341 }, { "epoch": 0.5652144454231316, "grad_norm": 6.225470066070557, "learning_rate": 8.96077760284756e-05, "loss": 0.7685, "step": 8342 }, { "epoch": 0.5652822006911037, "grad_norm": 10.40294361114502, "learning_rate": 8.960640700937778e-05, "loss": 0.9553, "step": 8343 }, { "epoch": 0.5653499559590758, "grad_norm": 5.97885274887085, "learning_rate": 8.960503799027997e-05, "loss": 0.7282, "step": 8344 }, { "epoch": 0.5654177112270479, "grad_norm": 7.777407646179199, "learning_rate": 8.960366897118215e-05, "loss": 0.859, "step": 8345 }, { "epoch": 0.56548546649502, "grad_norm": 6.5945892333984375, "learning_rate": 8.960229995208433e-05, "loss": 1.0263, "step": 8346 }, { "epoch": 0.565553221762992, "grad_norm": 5.678567409515381, "learning_rate": 8.960093093298651e-05, "loss": 0.6956, "step": 8347 }, { "epoch": 0.5656209770309641, "grad_norm": 6.3759846687316895, "learning_rate": 8.95995619138887e-05, "loss": 0.8164, "step": 8348 }, { "epoch": 0.5656887322989362, "grad_norm": 6.627008438110352, "learning_rate": 8.959819289479088e-05, "loss": 0.9505, "step": 8349 }, { "epoch": 0.5657564875669083, "grad_norm": 8.078243255615234, "learning_rate": 8.959682387569306e-05, "loss": 0.9061, "step": 8350 }, { "epoch": 0.5658242428348804, "grad_norm": 5.019404888153076, "learning_rate": 8.959545485659524e-05, "loss": 0.7382, "step": 8351 }, { "epoch": 0.5658919981028525, "grad_norm": 8.406132698059082, "learning_rate": 8.959408583749744e-05, "loss": 1.0107, "step": 8352 }, { "epoch": 0.5659597533708246, "grad_norm": 6.881223678588867, "learning_rate": 8.959271681839962e-05, "loss": 0.7417, "step": 8353 }, { "epoch": 0.5660275086387967, "grad_norm": 5.801870822906494, "learning_rate": 8.95913477993018e-05, "loss": 0.7736, "step": 8354 }, { "epoch": 0.5660952639067688, "grad_norm": 5.533797740936279, "learning_rate": 8.9589978780204e-05, "loss": 0.9576, "step": 8355 }, { "epoch": 0.5661630191747409, "grad_norm": 5.76112174987793, "learning_rate": 8.958860976110617e-05, "loss": 0.8774, "step": 8356 }, { "epoch": 0.566230774442713, "grad_norm": 6.424200534820557, "learning_rate": 8.958724074200835e-05, "loss": 0.9104, "step": 8357 }, { "epoch": 0.566298529710685, "grad_norm": 7.279561519622803, "learning_rate": 8.958587172291055e-05, "loss": 0.8566, "step": 8358 }, { "epoch": 0.566366284978657, "grad_norm": 7.023104667663574, "learning_rate": 8.958450270381273e-05, "loss": 0.785, "step": 8359 }, { "epoch": 0.5664340402466291, "grad_norm": 5.087419033050537, "learning_rate": 8.958313368471491e-05, "loss": 0.7379, "step": 8360 }, { "epoch": 0.5665017955146012, "grad_norm": 6.156972885131836, "learning_rate": 8.958176466561709e-05, "loss": 0.9361, "step": 8361 }, { "epoch": 0.5665695507825733, "grad_norm": 6.432037830352783, "learning_rate": 8.958039564651928e-05, "loss": 0.804, "step": 8362 }, { "epoch": 0.5666373060505454, "grad_norm": 6.592261791229248, "learning_rate": 8.957902662742146e-05, "loss": 0.8668, "step": 8363 }, { "epoch": 0.5667050613185175, "grad_norm": 6.8097944259643555, "learning_rate": 8.957765760832364e-05, "loss": 0.8852, "step": 8364 }, { "epoch": 0.5667728165864896, "grad_norm": 6.4310078620910645, "learning_rate": 8.957628858922582e-05, "loss": 0.9113, "step": 8365 }, { "epoch": 0.5668405718544617, "grad_norm": 4.992232322692871, "learning_rate": 8.957491957012802e-05, "loss": 0.7373, "step": 8366 }, { "epoch": 0.5669083271224338, "grad_norm": 6.666651725769043, "learning_rate": 8.95735505510302e-05, "loss": 0.9169, "step": 8367 }, { "epoch": 0.5669760823904059, "grad_norm": 6.658245086669922, "learning_rate": 8.957218153193238e-05, "loss": 0.7536, "step": 8368 }, { "epoch": 0.567043837658378, "grad_norm": 7.354119300842285, "learning_rate": 8.957081251283456e-05, "loss": 0.9655, "step": 8369 }, { "epoch": 0.56711159292635, "grad_norm": 7.857820510864258, "learning_rate": 8.956944349373674e-05, "loss": 1.0672, "step": 8370 }, { "epoch": 0.5671793481943221, "grad_norm": 9.102689743041992, "learning_rate": 8.956807447463893e-05, "loss": 1.1671, "step": 8371 }, { "epoch": 0.5672471034622942, "grad_norm": 7.795739650726318, "learning_rate": 8.956670545554111e-05, "loss": 0.8685, "step": 8372 }, { "epoch": 0.5673148587302663, "grad_norm": 9.154378890991211, "learning_rate": 8.95653364364433e-05, "loss": 0.9891, "step": 8373 }, { "epoch": 0.5673826139982384, "grad_norm": 5.6434645652771, "learning_rate": 8.956396741734547e-05, "loss": 0.7481, "step": 8374 }, { "epoch": 0.5674503692662104, "grad_norm": 4.90270471572876, "learning_rate": 8.956259839824765e-05, "loss": 0.5697, "step": 8375 }, { "epoch": 0.5675181245341825, "grad_norm": 6.498236656188965, "learning_rate": 8.956122937914985e-05, "loss": 0.8037, "step": 8376 }, { "epoch": 0.5675858798021546, "grad_norm": 6.723139762878418, "learning_rate": 8.955986036005203e-05, "loss": 0.7044, "step": 8377 }, { "epoch": 0.5676536350701267, "grad_norm": 6.958408832550049, "learning_rate": 8.955849134095421e-05, "loss": 1.2551, "step": 8378 }, { "epoch": 0.5677213903380988, "grad_norm": 5.749763488769531, "learning_rate": 8.955712232185639e-05, "loss": 0.6988, "step": 8379 }, { "epoch": 0.5677891456060709, "grad_norm": 8.176393508911133, "learning_rate": 8.955575330275858e-05, "loss": 0.9854, "step": 8380 }, { "epoch": 0.567856900874043, "grad_norm": 6.546833038330078, "learning_rate": 8.955438428366076e-05, "loss": 0.6731, "step": 8381 }, { "epoch": 0.567924656142015, "grad_norm": 5.418013095855713, "learning_rate": 8.955301526456294e-05, "loss": 0.6531, "step": 8382 }, { "epoch": 0.5679924114099871, "grad_norm": 5.538142681121826, "learning_rate": 8.955164624546512e-05, "loss": 0.6456, "step": 8383 }, { "epoch": 0.5680601666779592, "grad_norm": 7.09976053237915, "learning_rate": 8.95502772263673e-05, "loss": 0.9614, "step": 8384 }, { "epoch": 0.5681279219459313, "grad_norm": 6.105874061584473, "learning_rate": 8.95489082072695e-05, "loss": 0.6241, "step": 8385 }, { "epoch": 0.5681956772139034, "grad_norm": 6.9980998039245605, "learning_rate": 8.954753918817168e-05, "loss": 0.9669, "step": 8386 }, { "epoch": 0.5682634324818755, "grad_norm": 6.8302764892578125, "learning_rate": 8.954617016907386e-05, "loss": 0.6397, "step": 8387 }, { "epoch": 0.5683311877498476, "grad_norm": 7.777739524841309, "learning_rate": 8.954480114997604e-05, "loss": 1.0691, "step": 8388 }, { "epoch": 0.5683989430178197, "grad_norm": 7.322915554046631, "learning_rate": 8.954343213087823e-05, "loss": 0.9179, "step": 8389 }, { "epoch": 0.5684666982857918, "grad_norm": 8.225046157836914, "learning_rate": 8.954206311178041e-05, "loss": 1.1727, "step": 8390 }, { "epoch": 0.5685344535537638, "grad_norm": 5.573197364807129, "learning_rate": 8.95406940926826e-05, "loss": 0.9111, "step": 8391 }, { "epoch": 0.5686022088217358, "grad_norm": 5.810042381286621, "learning_rate": 8.953932507358477e-05, "loss": 0.7115, "step": 8392 }, { "epoch": 0.5686699640897079, "grad_norm": 7.172820091247559, "learning_rate": 8.953795605448695e-05, "loss": 1.0404, "step": 8393 }, { "epoch": 0.56873771935768, "grad_norm": 8.901079177856445, "learning_rate": 8.953658703538915e-05, "loss": 1.164, "step": 8394 }, { "epoch": 0.5688054746256521, "grad_norm": 5.692160129547119, "learning_rate": 8.953521801629133e-05, "loss": 0.7094, "step": 8395 }, { "epoch": 0.5688732298936242, "grad_norm": 5.794686317443848, "learning_rate": 8.953384899719351e-05, "loss": 0.7625, "step": 8396 }, { "epoch": 0.5689409851615963, "grad_norm": 6.330542087554932, "learning_rate": 8.953247997809569e-05, "loss": 0.8826, "step": 8397 }, { "epoch": 0.5690087404295684, "grad_norm": 6.6873698234558105, "learning_rate": 8.953111095899788e-05, "loss": 0.8109, "step": 8398 }, { "epoch": 0.5690764956975405, "grad_norm": 5.331307411193848, "learning_rate": 8.952974193990006e-05, "loss": 0.6919, "step": 8399 }, { "epoch": 0.5691442509655126, "grad_norm": 6.942624568939209, "learning_rate": 8.952837292080224e-05, "loss": 0.9336, "step": 8400 }, { "epoch": 0.5692120062334847, "grad_norm": 5.526673793792725, "learning_rate": 8.952700390170444e-05, "loss": 0.6552, "step": 8401 }, { "epoch": 0.5692797615014568, "grad_norm": 6.674069404602051, "learning_rate": 8.952563488260662e-05, "loss": 0.7824, "step": 8402 }, { "epoch": 0.5693475167694289, "grad_norm": 7.103878498077393, "learning_rate": 8.95242658635088e-05, "loss": 0.7983, "step": 8403 }, { "epoch": 0.5694152720374009, "grad_norm": 5.963317394256592, "learning_rate": 8.952289684441099e-05, "loss": 0.7804, "step": 8404 }, { "epoch": 0.569483027305373, "grad_norm": 5.7701215744018555, "learning_rate": 8.952152782531317e-05, "loss": 0.6684, "step": 8405 }, { "epoch": 0.5695507825733451, "grad_norm": 6.809589862823486, "learning_rate": 8.952015880621535e-05, "loss": 0.8358, "step": 8406 }, { "epoch": 0.5696185378413171, "grad_norm": 5.772964954376221, "learning_rate": 8.951878978711753e-05, "loss": 0.8283, "step": 8407 }, { "epoch": 0.5696862931092892, "grad_norm": 5.902827262878418, "learning_rate": 8.951742076801973e-05, "loss": 0.9207, "step": 8408 }, { "epoch": 0.5697540483772613, "grad_norm": 6.312051296234131, "learning_rate": 8.951605174892191e-05, "loss": 0.8424, "step": 8409 }, { "epoch": 0.5698218036452334, "grad_norm": 6.687371730804443, "learning_rate": 8.951468272982409e-05, "loss": 0.9563, "step": 8410 }, { "epoch": 0.5698895589132055, "grad_norm": 5.801156520843506, "learning_rate": 8.951331371072627e-05, "loss": 0.898, "step": 8411 }, { "epoch": 0.5699573141811776, "grad_norm": 5.955209255218506, "learning_rate": 8.951194469162846e-05, "loss": 0.9062, "step": 8412 }, { "epoch": 0.5700250694491497, "grad_norm": 6.845731258392334, "learning_rate": 8.951057567253064e-05, "loss": 0.7345, "step": 8413 }, { "epoch": 0.5700928247171217, "grad_norm": 7.945831298828125, "learning_rate": 8.950920665343282e-05, "loss": 1.1156, "step": 8414 }, { "epoch": 0.5701605799850938, "grad_norm": 6.423880100250244, "learning_rate": 8.9507837634335e-05, "loss": 0.886, "step": 8415 }, { "epoch": 0.5702283352530659, "grad_norm": 6.089376449584961, "learning_rate": 8.950646861523718e-05, "loss": 0.7217, "step": 8416 }, { "epoch": 0.570296090521038, "grad_norm": 7.061302661895752, "learning_rate": 8.950509959613938e-05, "loss": 1.0641, "step": 8417 }, { "epoch": 0.5703638457890101, "grad_norm": 7.304849147796631, "learning_rate": 8.950373057704156e-05, "loss": 0.8099, "step": 8418 }, { "epoch": 0.5704316010569822, "grad_norm": 7.116214752197266, "learning_rate": 8.950236155794374e-05, "loss": 0.7974, "step": 8419 }, { "epoch": 0.5704993563249543, "grad_norm": 6.840792655944824, "learning_rate": 8.950099253884592e-05, "loss": 0.7558, "step": 8420 }, { "epoch": 0.5705671115929264, "grad_norm": 5.458795547485352, "learning_rate": 8.94996235197481e-05, "loss": 0.7173, "step": 8421 }, { "epoch": 0.5706348668608985, "grad_norm": 7.809031963348389, "learning_rate": 8.949825450065029e-05, "loss": 0.7751, "step": 8422 }, { "epoch": 0.5707026221288706, "grad_norm": 7.147863388061523, "learning_rate": 8.949688548155247e-05, "loss": 0.7824, "step": 8423 }, { "epoch": 0.5707703773968426, "grad_norm": 7.230483055114746, "learning_rate": 8.949551646245465e-05, "loss": 1.0771, "step": 8424 }, { "epoch": 0.5708381326648146, "grad_norm": 7.998863697052002, "learning_rate": 8.949414744335683e-05, "loss": 0.8166, "step": 8425 }, { "epoch": 0.5709058879327867, "grad_norm": 6.230087757110596, "learning_rate": 8.949277842425903e-05, "loss": 0.8565, "step": 8426 }, { "epoch": 0.5709736432007588, "grad_norm": 5.557027816772461, "learning_rate": 8.949140940516121e-05, "loss": 0.9794, "step": 8427 }, { "epoch": 0.5710413984687309, "grad_norm": 6.321481704711914, "learning_rate": 8.949004038606339e-05, "loss": 0.8254, "step": 8428 }, { "epoch": 0.571109153736703, "grad_norm": 4.4708428382873535, "learning_rate": 8.948867136696557e-05, "loss": 0.8051, "step": 8429 }, { "epoch": 0.5711769090046751, "grad_norm": 6.521273612976074, "learning_rate": 8.948730234786775e-05, "loss": 0.6903, "step": 8430 }, { "epoch": 0.5712446642726472, "grad_norm": 7.250330448150635, "learning_rate": 8.948593332876994e-05, "loss": 1.0477, "step": 8431 }, { "epoch": 0.5713124195406193, "grad_norm": 6.6420207023620605, "learning_rate": 8.948456430967212e-05, "loss": 0.7598, "step": 8432 }, { "epoch": 0.5713801748085914, "grad_norm": 5.907214164733887, "learning_rate": 8.94831952905743e-05, "loss": 0.8957, "step": 8433 }, { "epoch": 0.5714479300765635, "grad_norm": 7.927393913269043, "learning_rate": 8.948182627147648e-05, "loss": 0.814, "step": 8434 }, { "epoch": 0.5715156853445356, "grad_norm": 7.30061674118042, "learning_rate": 8.948045725237868e-05, "loss": 1.0433, "step": 8435 }, { "epoch": 0.5715834406125077, "grad_norm": 6.409942150115967, "learning_rate": 8.947908823328086e-05, "loss": 1.1921, "step": 8436 }, { "epoch": 0.5716511958804797, "grad_norm": 6.589274883270264, "learning_rate": 8.947771921418304e-05, "loss": 0.9055, "step": 8437 }, { "epoch": 0.5717189511484518, "grad_norm": 6.010043144226074, "learning_rate": 8.947635019508522e-05, "loss": 0.8581, "step": 8438 }, { "epoch": 0.5717867064164239, "grad_norm": 5.75832462310791, "learning_rate": 8.94749811759874e-05, "loss": 0.7936, "step": 8439 }, { "epoch": 0.5718544616843959, "grad_norm": 5.600092887878418, "learning_rate": 8.94736121568896e-05, "loss": 0.6994, "step": 8440 }, { "epoch": 0.571922216952368, "grad_norm": 5.774161338806152, "learning_rate": 8.947224313779177e-05, "loss": 0.8647, "step": 8441 }, { "epoch": 0.5719899722203401, "grad_norm": 6.7266950607299805, "learning_rate": 8.947087411869395e-05, "loss": 1.0754, "step": 8442 }, { "epoch": 0.5720577274883122, "grad_norm": 7.049251556396484, "learning_rate": 8.946950509959613e-05, "loss": 0.7269, "step": 8443 }, { "epoch": 0.5721254827562843, "grad_norm": 7.4380879402160645, "learning_rate": 8.946813608049833e-05, "loss": 0.8803, "step": 8444 }, { "epoch": 0.5721932380242564, "grad_norm": 6.558119297027588, "learning_rate": 8.946676706140051e-05, "loss": 0.774, "step": 8445 }, { "epoch": 0.5722609932922285, "grad_norm": 6.586884021759033, "learning_rate": 8.946539804230269e-05, "loss": 0.8607, "step": 8446 }, { "epoch": 0.5723287485602006, "grad_norm": 7.708719253540039, "learning_rate": 8.946402902320487e-05, "loss": 0.9618, "step": 8447 }, { "epoch": 0.5723965038281726, "grad_norm": 7.740994930267334, "learning_rate": 8.946266000410706e-05, "loss": 0.6727, "step": 8448 }, { "epoch": 0.5724642590961447, "grad_norm": 6.2427287101745605, "learning_rate": 8.946129098500924e-05, "loss": 0.8622, "step": 8449 }, { "epoch": 0.5725320143641168, "grad_norm": 8.826958656311035, "learning_rate": 8.945992196591142e-05, "loss": 0.8872, "step": 8450 }, { "epoch": 0.5725997696320889, "grad_norm": 7.137535572052002, "learning_rate": 8.945855294681362e-05, "loss": 0.7906, "step": 8451 }, { "epoch": 0.572667524900061, "grad_norm": 6.379401206970215, "learning_rate": 8.94571839277158e-05, "loss": 0.9356, "step": 8452 }, { "epoch": 0.5727352801680331, "grad_norm": 6.800835609436035, "learning_rate": 8.945581490861798e-05, "loss": 0.7702, "step": 8453 }, { "epoch": 0.5728030354360052, "grad_norm": 6.04707670211792, "learning_rate": 8.945444588952017e-05, "loss": 0.7877, "step": 8454 }, { "epoch": 0.5728707907039773, "grad_norm": 6.483813285827637, "learning_rate": 8.945307687042235e-05, "loss": 1.0264, "step": 8455 }, { "epoch": 0.5729385459719493, "grad_norm": 7.314120292663574, "learning_rate": 8.945170785132453e-05, "loss": 1.2052, "step": 8456 }, { "epoch": 0.5730063012399214, "grad_norm": 8.42261791229248, "learning_rate": 8.945033883222671e-05, "loss": 0.963, "step": 8457 }, { "epoch": 0.5730740565078934, "grad_norm": 5.949848175048828, "learning_rate": 8.944896981312891e-05, "loss": 0.7765, "step": 8458 }, { "epoch": 0.5731418117758655, "grad_norm": 8.215453147888184, "learning_rate": 8.944760079403109e-05, "loss": 0.9526, "step": 8459 }, { "epoch": 0.5732095670438376, "grad_norm": 7.214929580688477, "learning_rate": 8.944623177493327e-05, "loss": 0.9869, "step": 8460 }, { "epoch": 0.5732773223118097, "grad_norm": 7.484433174133301, "learning_rate": 8.944486275583545e-05, "loss": 0.9432, "step": 8461 }, { "epoch": 0.5733450775797818, "grad_norm": 6.356048107147217, "learning_rate": 8.944349373673763e-05, "loss": 0.8457, "step": 8462 }, { "epoch": 0.5734128328477539, "grad_norm": 6.3201117515563965, "learning_rate": 8.944212471763982e-05, "loss": 0.8232, "step": 8463 }, { "epoch": 0.573480588115726, "grad_norm": 7.538288593292236, "learning_rate": 8.9440755698542e-05, "loss": 0.655, "step": 8464 }, { "epoch": 0.5735483433836981, "grad_norm": 6.674449920654297, "learning_rate": 8.943938667944418e-05, "loss": 0.7804, "step": 8465 }, { "epoch": 0.5736160986516702, "grad_norm": 6.578172206878662, "learning_rate": 8.943801766034636e-05, "loss": 0.7174, "step": 8466 }, { "epoch": 0.5736838539196423, "grad_norm": 5.858245372772217, "learning_rate": 8.943664864124856e-05, "loss": 0.7236, "step": 8467 }, { "epoch": 0.5737516091876144, "grad_norm": 5.510379791259766, "learning_rate": 8.943527962215074e-05, "loss": 0.7767, "step": 8468 }, { "epoch": 0.5738193644555865, "grad_norm": 6.925917148590088, "learning_rate": 8.943391060305292e-05, "loss": 1.0637, "step": 8469 }, { "epoch": 0.5738871197235585, "grad_norm": 5.841418266296387, "learning_rate": 8.94325415839551e-05, "loss": 0.7703, "step": 8470 }, { "epoch": 0.5739548749915306, "grad_norm": 5.818962097167969, "learning_rate": 8.943117256485728e-05, "loss": 0.8358, "step": 8471 }, { "epoch": 0.5740226302595027, "grad_norm": 4.567956924438477, "learning_rate": 8.942980354575947e-05, "loss": 0.7853, "step": 8472 }, { "epoch": 0.5740903855274747, "grad_norm": 7.374995231628418, "learning_rate": 8.942843452666165e-05, "loss": 0.7623, "step": 8473 }, { "epoch": 0.5741581407954468, "grad_norm": 6.090158939361572, "learning_rate": 8.942706550756383e-05, "loss": 0.617, "step": 8474 }, { "epoch": 0.5742258960634189, "grad_norm": 7.125964641571045, "learning_rate": 8.942569648846601e-05, "loss": 0.9063, "step": 8475 }, { "epoch": 0.574293651331391, "grad_norm": 6.784455299377441, "learning_rate": 8.94243274693682e-05, "loss": 0.8758, "step": 8476 }, { "epoch": 0.5743614065993631, "grad_norm": 6.138723373413086, "learning_rate": 8.942295845027039e-05, "loss": 0.9269, "step": 8477 }, { "epoch": 0.5744291618673352, "grad_norm": 7.604799747467041, "learning_rate": 8.942158943117257e-05, "loss": 0.9721, "step": 8478 }, { "epoch": 0.5744969171353073, "grad_norm": 7.989227771759033, "learning_rate": 8.942022041207475e-05, "loss": 0.9722, "step": 8479 }, { "epoch": 0.5745646724032794, "grad_norm": 5.422422885894775, "learning_rate": 8.941885139297693e-05, "loss": 0.6328, "step": 8480 }, { "epoch": 0.5746324276712514, "grad_norm": 5.384946346282959, "learning_rate": 8.941748237387912e-05, "loss": 0.677, "step": 8481 }, { "epoch": 0.5747001829392235, "grad_norm": 6.695030212402344, "learning_rate": 8.94161133547813e-05, "loss": 0.8051, "step": 8482 }, { "epoch": 0.5747679382071956, "grad_norm": 6.037063121795654, "learning_rate": 8.941474433568348e-05, "loss": 0.7381, "step": 8483 }, { "epoch": 0.5748356934751677, "grad_norm": 5.64218807220459, "learning_rate": 8.941337531658566e-05, "loss": 0.943, "step": 8484 }, { "epoch": 0.5749034487431398, "grad_norm": 7.324346542358398, "learning_rate": 8.941200629748784e-05, "loss": 0.8886, "step": 8485 }, { "epoch": 0.5749712040111119, "grad_norm": 7.247791767120361, "learning_rate": 8.941063727839004e-05, "loss": 0.7939, "step": 8486 }, { "epoch": 0.575038959279084, "grad_norm": 6.780027866363525, "learning_rate": 8.940926825929222e-05, "loss": 0.9357, "step": 8487 }, { "epoch": 0.5751067145470561, "grad_norm": 6.983544826507568, "learning_rate": 8.94078992401944e-05, "loss": 0.73, "step": 8488 }, { "epoch": 0.5751744698150281, "grad_norm": 5.811152458190918, "learning_rate": 8.940653022109658e-05, "loss": 0.875, "step": 8489 }, { "epoch": 0.5752422250830002, "grad_norm": 5.372483253479004, "learning_rate": 8.940516120199877e-05, "loss": 0.8868, "step": 8490 }, { "epoch": 0.5753099803509722, "grad_norm": 7.48350191116333, "learning_rate": 8.940379218290095e-05, "loss": 1.0441, "step": 8491 }, { "epoch": 0.5753777356189443, "grad_norm": 4.902670383453369, "learning_rate": 8.940242316380313e-05, "loss": 0.6726, "step": 8492 }, { "epoch": 0.5754454908869164, "grad_norm": 5.900030136108398, "learning_rate": 8.940105414470531e-05, "loss": 0.799, "step": 8493 }, { "epoch": 0.5755132461548885, "grad_norm": 7.37599515914917, "learning_rate": 8.939968512560751e-05, "loss": 1.0577, "step": 8494 }, { "epoch": 0.5755810014228606, "grad_norm": 5.871433734893799, "learning_rate": 8.939831610650969e-05, "loss": 0.9335, "step": 8495 }, { "epoch": 0.5756487566908327, "grad_norm": 7.249385833740234, "learning_rate": 8.939694708741187e-05, "loss": 0.8244, "step": 8496 }, { "epoch": 0.5757165119588048, "grad_norm": 7.102729320526123, "learning_rate": 8.939557806831406e-05, "loss": 0.7431, "step": 8497 }, { "epoch": 0.5757842672267769, "grad_norm": 6.558697700500488, "learning_rate": 8.939420904921624e-05, "loss": 0.7765, "step": 8498 }, { "epoch": 0.575852022494749, "grad_norm": 6.82787561416626, "learning_rate": 8.939284003011842e-05, "loss": 0.9449, "step": 8499 }, { "epoch": 0.5759197777627211, "grad_norm": 7.055886268615723, "learning_rate": 8.939147101102062e-05, "loss": 0.8397, "step": 8500 }, { "epoch": 0.5759875330306932, "grad_norm": 7.750473976135254, "learning_rate": 8.93901019919228e-05, "loss": 0.8001, "step": 8501 }, { "epoch": 0.5760552882986653, "grad_norm": 9.581280708312988, "learning_rate": 8.938873297282498e-05, "loss": 0.7048, "step": 8502 }, { "epoch": 0.5761230435666373, "grad_norm": 6.592195510864258, "learning_rate": 8.938736395372716e-05, "loss": 0.9294, "step": 8503 }, { "epoch": 0.5761907988346094, "grad_norm": 5.0412797927856445, "learning_rate": 8.938599493462935e-05, "loss": 0.6723, "step": 8504 }, { "epoch": 0.5762585541025814, "grad_norm": 6.099496364593506, "learning_rate": 8.938462591553153e-05, "loss": 0.8038, "step": 8505 }, { "epoch": 0.5763263093705535, "grad_norm": 6.402875900268555, "learning_rate": 8.938325689643371e-05, "loss": 0.7956, "step": 8506 }, { "epoch": 0.5763940646385256, "grad_norm": 6.937565326690674, "learning_rate": 8.938188787733589e-05, "loss": 0.9354, "step": 8507 }, { "epoch": 0.5764618199064977, "grad_norm": 7.218740940093994, "learning_rate": 8.938051885823807e-05, "loss": 0.8022, "step": 8508 }, { "epoch": 0.5765295751744698, "grad_norm": 7.368991851806641, "learning_rate": 8.937914983914027e-05, "loss": 1.0043, "step": 8509 }, { "epoch": 0.5765973304424419, "grad_norm": 5.916724681854248, "learning_rate": 8.937778082004245e-05, "loss": 0.7391, "step": 8510 }, { "epoch": 0.576665085710414, "grad_norm": 6.9001007080078125, "learning_rate": 8.937641180094463e-05, "loss": 0.9798, "step": 8511 }, { "epoch": 0.5767328409783861, "grad_norm": 6.652282238006592, "learning_rate": 8.937504278184681e-05, "loss": 0.9313, "step": 8512 }, { "epoch": 0.5768005962463582, "grad_norm": 7.920009613037109, "learning_rate": 8.9373673762749e-05, "loss": 0.8018, "step": 8513 }, { "epoch": 0.5768683515143302, "grad_norm": 7.468683242797852, "learning_rate": 8.937230474365118e-05, "loss": 0.8666, "step": 8514 }, { "epoch": 0.5769361067823023, "grad_norm": 5.6963653564453125, "learning_rate": 8.937093572455336e-05, "loss": 0.8645, "step": 8515 }, { "epoch": 0.5770038620502744, "grad_norm": 9.493772506713867, "learning_rate": 8.936956670545554e-05, "loss": 0.7124, "step": 8516 }, { "epoch": 0.5770716173182465, "grad_norm": 6.918614864349365, "learning_rate": 8.936819768635772e-05, "loss": 0.7712, "step": 8517 }, { "epoch": 0.5771393725862186, "grad_norm": 5.827054977416992, "learning_rate": 8.936682866725992e-05, "loss": 0.7484, "step": 8518 }, { "epoch": 0.5772071278541907, "grad_norm": 4.4998626708984375, "learning_rate": 8.93654596481621e-05, "loss": 0.7199, "step": 8519 }, { "epoch": 0.5772748831221628, "grad_norm": 5.738202095031738, "learning_rate": 8.936409062906428e-05, "loss": 0.9027, "step": 8520 }, { "epoch": 0.5773426383901349, "grad_norm": 6.771662712097168, "learning_rate": 8.936272160996646e-05, "loss": 0.7006, "step": 8521 }, { "epoch": 0.5774103936581069, "grad_norm": 6.145763397216797, "learning_rate": 8.936135259086865e-05, "loss": 0.927, "step": 8522 }, { "epoch": 0.577478148926079, "grad_norm": 6.981403827667236, "learning_rate": 8.935998357177083e-05, "loss": 0.8556, "step": 8523 }, { "epoch": 0.577545904194051, "grad_norm": 6.167423725128174, "learning_rate": 8.935861455267301e-05, "loss": 0.643, "step": 8524 }, { "epoch": 0.5776136594620231, "grad_norm": 6.135037899017334, "learning_rate": 8.93572455335752e-05, "loss": 0.8866, "step": 8525 }, { "epoch": 0.5776814147299952, "grad_norm": 5.348565101623535, "learning_rate": 8.935587651447737e-05, "loss": 0.7559, "step": 8526 }, { "epoch": 0.5777491699979673, "grad_norm": 5.95448637008667, "learning_rate": 8.935450749537957e-05, "loss": 0.6934, "step": 8527 }, { "epoch": 0.5778169252659394, "grad_norm": 6.3629961013793945, "learning_rate": 8.935313847628175e-05, "loss": 0.7634, "step": 8528 }, { "epoch": 0.5778846805339115, "grad_norm": 6.271450996398926, "learning_rate": 8.935176945718393e-05, "loss": 0.8269, "step": 8529 }, { "epoch": 0.5779524358018836, "grad_norm": 5.41464900970459, "learning_rate": 8.935040043808611e-05, "loss": 0.5833, "step": 8530 }, { "epoch": 0.5780201910698557, "grad_norm": 10.096611022949219, "learning_rate": 8.934903141898829e-05, "loss": 0.7474, "step": 8531 }, { "epoch": 0.5780879463378278, "grad_norm": 7.8773393630981445, "learning_rate": 8.934766239989048e-05, "loss": 0.894, "step": 8532 }, { "epoch": 0.5781557016057999, "grad_norm": 6.695224761962891, "learning_rate": 8.934629338079266e-05, "loss": 0.8072, "step": 8533 }, { "epoch": 0.578223456873772, "grad_norm": 6.079283237457275, "learning_rate": 8.934492436169484e-05, "loss": 0.6978, "step": 8534 }, { "epoch": 0.5782912121417441, "grad_norm": 6.053205490112305, "learning_rate": 8.934355534259702e-05, "loss": 0.8264, "step": 8535 }, { "epoch": 0.5783589674097162, "grad_norm": 6.483332633972168, "learning_rate": 8.934218632349922e-05, "loss": 0.9318, "step": 8536 }, { "epoch": 0.5784267226776882, "grad_norm": 7.496954441070557, "learning_rate": 8.93408173044014e-05, "loss": 0.69, "step": 8537 }, { "epoch": 0.5784944779456602, "grad_norm": 6.839014530181885, "learning_rate": 8.933944828530358e-05, "loss": 0.8462, "step": 8538 }, { "epoch": 0.5785622332136323, "grad_norm": 7.050039291381836, "learning_rate": 8.933807926620576e-05, "loss": 1.0005, "step": 8539 }, { "epoch": 0.5786299884816044, "grad_norm": 6.284921169281006, "learning_rate": 8.933671024710795e-05, "loss": 0.6977, "step": 8540 }, { "epoch": 0.5786977437495765, "grad_norm": 6.051140308380127, "learning_rate": 8.933534122801013e-05, "loss": 0.9931, "step": 8541 }, { "epoch": 0.5787654990175486, "grad_norm": 6.233377933502197, "learning_rate": 8.933397220891231e-05, "loss": 0.6643, "step": 8542 }, { "epoch": 0.5788332542855207, "grad_norm": 5.1774678230285645, "learning_rate": 8.933260318981451e-05, "loss": 0.8994, "step": 8543 }, { "epoch": 0.5789010095534928, "grad_norm": 5.453426837921143, "learning_rate": 8.933123417071669e-05, "loss": 0.9404, "step": 8544 }, { "epoch": 0.5789687648214649, "grad_norm": 6.1316609382629395, "learning_rate": 8.932986515161887e-05, "loss": 0.8306, "step": 8545 }, { "epoch": 0.579036520089437, "grad_norm": 7.448431968688965, "learning_rate": 8.932849613252106e-05, "loss": 0.8259, "step": 8546 }, { "epoch": 0.579104275357409, "grad_norm": 8.094486236572266, "learning_rate": 8.932712711342324e-05, "loss": 1.011, "step": 8547 }, { "epoch": 0.5791720306253811, "grad_norm": 4.893822193145752, "learning_rate": 8.932575809432542e-05, "loss": 0.8489, "step": 8548 }, { "epoch": 0.5792397858933532, "grad_norm": 5.577503204345703, "learning_rate": 8.93243890752276e-05, "loss": 0.8933, "step": 8549 }, { "epoch": 0.5793075411613253, "grad_norm": 5.874213218688965, "learning_rate": 8.93230200561298e-05, "loss": 1.0447, "step": 8550 }, { "epoch": 0.5793752964292974, "grad_norm": 7.764857769012451, "learning_rate": 8.932165103703198e-05, "loss": 1.0503, "step": 8551 }, { "epoch": 0.5794430516972695, "grad_norm": 6.291406631469727, "learning_rate": 8.932028201793416e-05, "loss": 0.7368, "step": 8552 }, { "epoch": 0.5795108069652416, "grad_norm": 6.035033226013184, "learning_rate": 8.931891299883634e-05, "loss": 0.7683, "step": 8553 }, { "epoch": 0.5795785622332136, "grad_norm": 4.60941743850708, "learning_rate": 8.931754397973853e-05, "loss": 0.8216, "step": 8554 }, { "epoch": 0.5796463175011857, "grad_norm": 5.524953365325928, "learning_rate": 8.931617496064071e-05, "loss": 0.8958, "step": 8555 }, { "epoch": 0.5797140727691578, "grad_norm": 5.544130802154541, "learning_rate": 8.931480594154289e-05, "loss": 0.7621, "step": 8556 }, { "epoch": 0.5797818280371299, "grad_norm": 5.973321437835693, "learning_rate": 8.931343692244507e-05, "loss": 0.8805, "step": 8557 }, { "epoch": 0.579849583305102, "grad_norm": 5.566673755645752, "learning_rate": 8.931206790334725e-05, "loss": 0.7309, "step": 8558 }, { "epoch": 0.579917338573074, "grad_norm": 5.564174652099609, "learning_rate": 8.931069888424945e-05, "loss": 0.7333, "step": 8559 }, { "epoch": 0.5799850938410461, "grad_norm": 5.655849933624268, "learning_rate": 8.930932986515163e-05, "loss": 0.7378, "step": 8560 }, { "epoch": 0.5800528491090182, "grad_norm": 6.19892692565918, "learning_rate": 8.930796084605381e-05, "loss": 0.5912, "step": 8561 }, { "epoch": 0.5801206043769903, "grad_norm": 6.6580095291137695, "learning_rate": 8.930659182695599e-05, "loss": 0.7054, "step": 8562 }, { "epoch": 0.5801883596449624, "grad_norm": 5.171754837036133, "learning_rate": 8.930522280785817e-05, "loss": 0.7478, "step": 8563 }, { "epoch": 0.5802561149129345, "grad_norm": 6.493400573730469, "learning_rate": 8.930385378876036e-05, "loss": 0.7751, "step": 8564 }, { "epoch": 0.5803238701809066, "grad_norm": 7.6895833015441895, "learning_rate": 8.930248476966254e-05, "loss": 0.776, "step": 8565 }, { "epoch": 0.5803916254488787, "grad_norm": 6.17990255355835, "learning_rate": 8.930111575056472e-05, "loss": 0.6127, "step": 8566 }, { "epoch": 0.5804593807168508, "grad_norm": 7.613852024078369, "learning_rate": 8.92997467314669e-05, "loss": 0.901, "step": 8567 }, { "epoch": 0.5805271359848229, "grad_norm": 6.891161918640137, "learning_rate": 8.92983777123691e-05, "loss": 0.8535, "step": 8568 }, { "epoch": 0.580594891252795, "grad_norm": 8.584257125854492, "learning_rate": 8.929700869327128e-05, "loss": 0.8043, "step": 8569 }, { "epoch": 0.580662646520767, "grad_norm": 6.883901119232178, "learning_rate": 8.929563967417346e-05, "loss": 1.0738, "step": 8570 }, { "epoch": 0.580730401788739, "grad_norm": 6.844675064086914, "learning_rate": 8.929427065507564e-05, "loss": 0.868, "step": 8571 }, { "epoch": 0.5807981570567111, "grad_norm": 8.33270263671875, "learning_rate": 8.929290163597782e-05, "loss": 1.2373, "step": 8572 }, { "epoch": 0.5808659123246832, "grad_norm": 7.0670037269592285, "learning_rate": 8.929153261688001e-05, "loss": 0.875, "step": 8573 }, { "epoch": 0.5809336675926553, "grad_norm": 6.652496337890625, "learning_rate": 8.929016359778219e-05, "loss": 0.9693, "step": 8574 }, { "epoch": 0.5810014228606274, "grad_norm": 6.9150390625, "learning_rate": 8.928879457868437e-05, "loss": 1.0438, "step": 8575 }, { "epoch": 0.5810691781285995, "grad_norm": 7.5465192794799805, "learning_rate": 8.928742555958655e-05, "loss": 0.7079, "step": 8576 }, { "epoch": 0.5811369333965716, "grad_norm": 5.88304328918457, "learning_rate": 8.928605654048875e-05, "loss": 0.8251, "step": 8577 }, { "epoch": 0.5812046886645437, "grad_norm": 6.625080108642578, "learning_rate": 8.928468752139093e-05, "loss": 0.833, "step": 8578 }, { "epoch": 0.5812724439325158, "grad_norm": 5.93492317199707, "learning_rate": 8.928331850229311e-05, "loss": 0.8211, "step": 8579 }, { "epoch": 0.5813401992004879, "grad_norm": 5.411419868469238, "learning_rate": 8.928194948319529e-05, "loss": 0.8365, "step": 8580 }, { "epoch": 0.5814079544684599, "grad_norm": 6.126975059509277, "learning_rate": 8.928058046409747e-05, "loss": 0.8646, "step": 8581 }, { "epoch": 0.581475709736432, "grad_norm": 6.076915264129639, "learning_rate": 8.927921144499966e-05, "loss": 0.8304, "step": 8582 }, { "epoch": 0.5815434650044041, "grad_norm": 7.348392009735107, "learning_rate": 8.927784242590184e-05, "loss": 0.8467, "step": 8583 }, { "epoch": 0.5816112202723762, "grad_norm": 5.092489242553711, "learning_rate": 8.927647340680402e-05, "loss": 0.7371, "step": 8584 }, { "epoch": 0.5816789755403483, "grad_norm": 5.680901527404785, "learning_rate": 8.92751043877062e-05, "loss": 0.7388, "step": 8585 }, { "epoch": 0.5817467308083204, "grad_norm": 7.501118183135986, "learning_rate": 8.92737353686084e-05, "loss": 0.6264, "step": 8586 }, { "epoch": 0.5818144860762924, "grad_norm": 6.8912529945373535, "learning_rate": 8.927236634951058e-05, "loss": 0.8985, "step": 8587 }, { "epoch": 0.5818822413442645, "grad_norm": 6.703160285949707, "learning_rate": 8.927099733041276e-05, "loss": 1.1611, "step": 8588 }, { "epoch": 0.5819499966122366, "grad_norm": 5.655977725982666, "learning_rate": 8.926962831131495e-05, "loss": 0.7641, "step": 8589 }, { "epoch": 0.5820177518802087, "grad_norm": 7.246835708618164, "learning_rate": 8.926825929221713e-05, "loss": 0.97, "step": 8590 }, { "epoch": 0.5820855071481807, "grad_norm": 7.098568916320801, "learning_rate": 8.926689027311931e-05, "loss": 0.9531, "step": 8591 }, { "epoch": 0.5821532624161528, "grad_norm": 5.854083061218262, "learning_rate": 8.92655212540215e-05, "loss": 0.5604, "step": 8592 }, { "epoch": 0.5822210176841249, "grad_norm": 9.047096252441406, "learning_rate": 8.926415223492369e-05, "loss": 0.9453, "step": 8593 }, { "epoch": 0.582288772952097, "grad_norm": 6.015660762786865, "learning_rate": 8.926278321582587e-05, "loss": 0.8029, "step": 8594 }, { "epoch": 0.5823565282200691, "grad_norm": 5.913661479949951, "learning_rate": 8.926141419672805e-05, "loss": 0.6898, "step": 8595 }, { "epoch": 0.5824242834880412, "grad_norm": 5.473297595977783, "learning_rate": 8.926004517763024e-05, "loss": 0.9829, "step": 8596 }, { "epoch": 0.5824920387560133, "grad_norm": 5.53269624710083, "learning_rate": 8.925867615853242e-05, "loss": 0.7763, "step": 8597 }, { "epoch": 0.5825597940239854, "grad_norm": 6.618366241455078, "learning_rate": 8.92573071394346e-05, "loss": 1.1884, "step": 8598 }, { "epoch": 0.5826275492919575, "grad_norm": 6.628344535827637, "learning_rate": 8.925593812033678e-05, "loss": 0.7971, "step": 8599 }, { "epoch": 0.5826953045599296, "grad_norm": 6.690967082977295, "learning_rate": 8.925456910123898e-05, "loss": 0.837, "step": 8600 }, { "epoch": 0.5827630598279017, "grad_norm": 6.788214683532715, "learning_rate": 8.925320008214116e-05, "loss": 0.6239, "step": 8601 }, { "epoch": 0.5828308150958738, "grad_norm": 6.47343111038208, "learning_rate": 8.925183106304334e-05, "loss": 0.8656, "step": 8602 }, { "epoch": 0.5828985703638457, "grad_norm": 7.423914909362793, "learning_rate": 8.925046204394552e-05, "loss": 0.9741, "step": 8603 }, { "epoch": 0.5829663256318178, "grad_norm": 6.18986701965332, "learning_rate": 8.92490930248477e-05, "loss": 0.8261, "step": 8604 }, { "epoch": 0.5830340808997899, "grad_norm": 6.904397010803223, "learning_rate": 8.924772400574989e-05, "loss": 0.6875, "step": 8605 }, { "epoch": 0.583101836167762, "grad_norm": 6.054782867431641, "learning_rate": 8.924635498665207e-05, "loss": 0.7726, "step": 8606 }, { "epoch": 0.5831695914357341, "grad_norm": 7.563791751861572, "learning_rate": 8.924498596755425e-05, "loss": 1.038, "step": 8607 }, { "epoch": 0.5832373467037062, "grad_norm": 7.547990322113037, "learning_rate": 8.924361694845643e-05, "loss": 0.8108, "step": 8608 }, { "epoch": 0.5833051019716783, "grad_norm": 6.109129905700684, "learning_rate": 8.924224792935861e-05, "loss": 0.9512, "step": 8609 }, { "epoch": 0.5833728572396504, "grad_norm": 6.7443528175354, "learning_rate": 8.924087891026081e-05, "loss": 1.0178, "step": 8610 }, { "epoch": 0.5834406125076225, "grad_norm": 4.522619247436523, "learning_rate": 8.923950989116299e-05, "loss": 0.7338, "step": 8611 }, { "epoch": 0.5835083677755946, "grad_norm": 6.25988245010376, "learning_rate": 8.923814087206517e-05, "loss": 0.9412, "step": 8612 }, { "epoch": 0.5835761230435667, "grad_norm": 8.428157806396484, "learning_rate": 8.923677185296735e-05, "loss": 0.9129, "step": 8613 }, { "epoch": 0.5836438783115387, "grad_norm": 6.854711055755615, "learning_rate": 8.923540283386954e-05, "loss": 0.9614, "step": 8614 }, { "epoch": 0.5837116335795108, "grad_norm": 5.909132957458496, "learning_rate": 8.923403381477172e-05, "loss": 1.0066, "step": 8615 }, { "epoch": 0.5837793888474829, "grad_norm": 6.080239295959473, "learning_rate": 8.92326647956739e-05, "loss": 0.8778, "step": 8616 }, { "epoch": 0.583847144115455, "grad_norm": 5.880582809448242, "learning_rate": 8.923129577657608e-05, "loss": 0.674, "step": 8617 }, { "epoch": 0.5839148993834271, "grad_norm": 5.32850980758667, "learning_rate": 8.922992675747826e-05, "loss": 0.7366, "step": 8618 }, { "epoch": 0.5839826546513991, "grad_norm": 5.818620204925537, "learning_rate": 8.922855773838046e-05, "loss": 0.8723, "step": 8619 }, { "epoch": 0.5840504099193712, "grad_norm": 7.105060577392578, "learning_rate": 8.922718871928264e-05, "loss": 0.9449, "step": 8620 }, { "epoch": 0.5841181651873433, "grad_norm": 7.047363758087158, "learning_rate": 8.922581970018482e-05, "loss": 1.0505, "step": 8621 }, { "epoch": 0.5841859204553154, "grad_norm": 7.657389163970947, "learning_rate": 8.9224450681087e-05, "loss": 1.0046, "step": 8622 }, { "epoch": 0.5842536757232875, "grad_norm": 6.699533462524414, "learning_rate": 8.922308166198919e-05, "loss": 0.9382, "step": 8623 }, { "epoch": 0.5843214309912595, "grad_norm": 7.720858573913574, "learning_rate": 8.922171264289137e-05, "loss": 0.732, "step": 8624 }, { "epoch": 0.5843891862592316, "grad_norm": 5.816531658172607, "learning_rate": 8.922034362379355e-05, "loss": 0.7825, "step": 8625 }, { "epoch": 0.5844569415272037, "grad_norm": 5.647473335266113, "learning_rate": 8.921897460469573e-05, "loss": 0.8234, "step": 8626 }, { "epoch": 0.5845246967951758, "grad_norm": 6.133992671966553, "learning_rate": 8.921760558559791e-05, "loss": 0.6883, "step": 8627 }, { "epoch": 0.5845924520631479, "grad_norm": 6.076829433441162, "learning_rate": 8.921623656650011e-05, "loss": 0.6453, "step": 8628 }, { "epoch": 0.58466020733112, "grad_norm": 6.3974385261535645, "learning_rate": 8.921486754740229e-05, "loss": 0.7473, "step": 8629 }, { "epoch": 0.5847279625990921, "grad_norm": 8.298005104064941, "learning_rate": 8.921349852830447e-05, "loss": 0.563, "step": 8630 }, { "epoch": 0.5847957178670642, "grad_norm": 6.789668083190918, "learning_rate": 8.921212950920665e-05, "loss": 0.8418, "step": 8631 }, { "epoch": 0.5848634731350363, "grad_norm": 5.68512487411499, "learning_rate": 8.921076049010884e-05, "loss": 0.7862, "step": 8632 }, { "epoch": 0.5849312284030084, "grad_norm": 7.587048053741455, "learning_rate": 8.920939147101102e-05, "loss": 0.6798, "step": 8633 }, { "epoch": 0.5849989836709805, "grad_norm": 6.540048122406006, "learning_rate": 8.92080224519132e-05, "loss": 0.9243, "step": 8634 }, { "epoch": 0.5850667389389526, "grad_norm": 6.688183784484863, "learning_rate": 8.92066534328154e-05, "loss": 0.7537, "step": 8635 }, { "epoch": 0.5851344942069245, "grad_norm": 6.010653972625732, "learning_rate": 8.920528441371758e-05, "loss": 0.962, "step": 8636 }, { "epoch": 0.5852022494748966, "grad_norm": 6.197324752807617, "learning_rate": 8.920391539461976e-05, "loss": 0.8345, "step": 8637 }, { "epoch": 0.5852700047428687, "grad_norm": 6.193318843841553, "learning_rate": 8.920254637552195e-05, "loss": 0.77, "step": 8638 }, { "epoch": 0.5853377600108408, "grad_norm": 7.584078788757324, "learning_rate": 8.920117735642413e-05, "loss": 0.9704, "step": 8639 }, { "epoch": 0.5854055152788129, "grad_norm": 9.150726318359375, "learning_rate": 8.919980833732631e-05, "loss": 0.9343, "step": 8640 }, { "epoch": 0.585473270546785, "grad_norm": 5.945910930633545, "learning_rate": 8.919843931822849e-05, "loss": 0.8009, "step": 8641 }, { "epoch": 0.5855410258147571, "grad_norm": 6.68170690536499, "learning_rate": 8.919707029913069e-05, "loss": 0.7701, "step": 8642 }, { "epoch": 0.5856087810827292, "grad_norm": 5.520997047424316, "learning_rate": 8.919570128003287e-05, "loss": 0.8759, "step": 8643 }, { "epoch": 0.5856765363507013, "grad_norm": 6.285208702087402, "learning_rate": 8.919433226093505e-05, "loss": 0.9845, "step": 8644 }, { "epoch": 0.5857442916186734, "grad_norm": 5.63783597946167, "learning_rate": 8.919296324183723e-05, "loss": 0.7603, "step": 8645 }, { "epoch": 0.5858120468866455, "grad_norm": 6.416496753692627, "learning_rate": 8.919159422273942e-05, "loss": 0.6493, "step": 8646 }, { "epoch": 0.5858798021546175, "grad_norm": 6.023496627807617, "learning_rate": 8.91902252036416e-05, "loss": 0.7045, "step": 8647 }, { "epoch": 0.5859475574225896, "grad_norm": 6.045313835144043, "learning_rate": 8.918885618454378e-05, "loss": 0.8136, "step": 8648 }, { "epoch": 0.5860153126905617, "grad_norm": 5.985274314880371, "learning_rate": 8.918748716544596e-05, "loss": 0.6254, "step": 8649 }, { "epoch": 0.5860830679585338, "grad_norm": 6.4496965408325195, "learning_rate": 8.918611814634814e-05, "loss": 0.8043, "step": 8650 }, { "epoch": 0.5861508232265059, "grad_norm": 6.74000883102417, "learning_rate": 8.918474912725034e-05, "loss": 0.7568, "step": 8651 }, { "epoch": 0.5862185784944779, "grad_norm": 6.625565052032471, "learning_rate": 8.918338010815252e-05, "loss": 0.8329, "step": 8652 }, { "epoch": 0.58628633376245, "grad_norm": 8.42115592956543, "learning_rate": 8.91820110890547e-05, "loss": 0.8818, "step": 8653 }, { "epoch": 0.5863540890304221, "grad_norm": 5.852290153503418, "learning_rate": 8.918064206995688e-05, "loss": 0.7521, "step": 8654 }, { "epoch": 0.5864218442983942, "grad_norm": 5.760341644287109, "learning_rate": 8.917927305085907e-05, "loss": 0.7142, "step": 8655 }, { "epoch": 0.5864895995663663, "grad_norm": 6.094460964202881, "learning_rate": 8.917790403176125e-05, "loss": 0.7863, "step": 8656 }, { "epoch": 0.5865573548343384, "grad_norm": 7.665438652038574, "learning_rate": 8.917653501266343e-05, "loss": 0.7998, "step": 8657 }, { "epoch": 0.5866251101023104, "grad_norm": 8.002902030944824, "learning_rate": 8.917516599356561e-05, "loss": 0.6918, "step": 8658 }, { "epoch": 0.5866928653702825, "grad_norm": 8.93215274810791, "learning_rate": 8.917379697446779e-05, "loss": 1.044, "step": 8659 }, { "epoch": 0.5867606206382546, "grad_norm": 6.267491340637207, "learning_rate": 8.917242795536999e-05, "loss": 0.9676, "step": 8660 }, { "epoch": 0.5868283759062267, "grad_norm": 8.059505462646484, "learning_rate": 8.917105893627217e-05, "loss": 0.6688, "step": 8661 }, { "epoch": 0.5868961311741988, "grad_norm": 9.9888277053833, "learning_rate": 8.916968991717435e-05, "loss": 1.0911, "step": 8662 }, { "epoch": 0.5869638864421709, "grad_norm": 5.3233232498168945, "learning_rate": 8.916832089807653e-05, "loss": 0.6354, "step": 8663 }, { "epoch": 0.587031641710143, "grad_norm": 6.694457054138184, "learning_rate": 8.916695187897871e-05, "loss": 0.8184, "step": 8664 }, { "epoch": 0.5870993969781151, "grad_norm": 6.7131547927856445, "learning_rate": 8.91655828598809e-05, "loss": 0.9447, "step": 8665 }, { "epoch": 0.5871671522460872, "grad_norm": 6.680534362792969, "learning_rate": 8.916421384078308e-05, "loss": 0.7022, "step": 8666 }, { "epoch": 0.5872349075140593, "grad_norm": 5.335456371307373, "learning_rate": 8.916284482168526e-05, "loss": 0.806, "step": 8667 }, { "epoch": 0.5873026627820312, "grad_norm": 7.5574235916137695, "learning_rate": 8.916147580258744e-05, "loss": 0.7042, "step": 8668 }, { "epoch": 0.5873704180500033, "grad_norm": 6.056331634521484, "learning_rate": 8.916010678348964e-05, "loss": 0.7419, "step": 8669 }, { "epoch": 0.5874381733179754, "grad_norm": 6.732639312744141, "learning_rate": 8.915873776439182e-05, "loss": 0.7788, "step": 8670 }, { "epoch": 0.5875059285859475, "grad_norm": 5.853601932525635, "learning_rate": 8.9157368745294e-05, "loss": 0.7423, "step": 8671 }, { "epoch": 0.5875736838539196, "grad_norm": 5.647787570953369, "learning_rate": 8.915599972619618e-05, "loss": 0.6611, "step": 8672 }, { "epoch": 0.5876414391218917, "grad_norm": 5.323878288269043, "learning_rate": 8.915463070709836e-05, "loss": 0.7538, "step": 8673 }, { "epoch": 0.5877091943898638, "grad_norm": 7.562190055847168, "learning_rate": 8.915326168800055e-05, "loss": 0.865, "step": 8674 }, { "epoch": 0.5877769496578359, "grad_norm": 5.60343599319458, "learning_rate": 8.915189266890273e-05, "loss": 0.8023, "step": 8675 }, { "epoch": 0.587844704925808, "grad_norm": 6.189206123352051, "learning_rate": 8.915052364980491e-05, "loss": 0.8698, "step": 8676 }, { "epoch": 0.5879124601937801, "grad_norm": 6.227395534515381, "learning_rate": 8.914915463070709e-05, "loss": 0.9052, "step": 8677 }, { "epoch": 0.5879802154617522, "grad_norm": 5.004229545593262, "learning_rate": 8.914778561160929e-05, "loss": 0.6775, "step": 8678 }, { "epoch": 0.5880479707297243, "grad_norm": 8.418085098266602, "learning_rate": 8.914641659251147e-05, "loss": 1.1209, "step": 8679 }, { "epoch": 0.5881157259976963, "grad_norm": 7.0277204513549805, "learning_rate": 8.914504757341365e-05, "loss": 0.9533, "step": 8680 }, { "epoch": 0.5881834812656684, "grad_norm": 6.450404644012451, "learning_rate": 8.914367855431584e-05, "loss": 0.9277, "step": 8681 }, { "epoch": 0.5882512365336405, "grad_norm": 6.549577236175537, "learning_rate": 8.914230953521802e-05, "loss": 0.7418, "step": 8682 }, { "epoch": 0.5883189918016126, "grad_norm": 7.030699729919434, "learning_rate": 8.91409405161202e-05, "loss": 0.8336, "step": 8683 }, { "epoch": 0.5883867470695847, "grad_norm": 6.665583610534668, "learning_rate": 8.91395714970224e-05, "loss": 0.8458, "step": 8684 }, { "epoch": 0.5884545023375567, "grad_norm": 7.172003269195557, "learning_rate": 8.913820247792458e-05, "loss": 0.9778, "step": 8685 }, { "epoch": 0.5885222576055288, "grad_norm": 5.771976947784424, "learning_rate": 8.913683345882676e-05, "loss": 0.6404, "step": 8686 }, { "epoch": 0.5885900128735009, "grad_norm": 6.679132461547852, "learning_rate": 8.913546443972895e-05, "loss": 0.8758, "step": 8687 }, { "epoch": 0.588657768141473, "grad_norm": 6.341593265533447, "learning_rate": 8.913409542063113e-05, "loss": 0.9231, "step": 8688 }, { "epoch": 0.5887255234094451, "grad_norm": 4.834532260894775, "learning_rate": 8.913272640153331e-05, "loss": 0.6709, "step": 8689 }, { "epoch": 0.5887932786774172, "grad_norm": 6.240522861480713, "learning_rate": 8.913135738243549e-05, "loss": 0.923, "step": 8690 }, { "epoch": 0.5888610339453892, "grad_norm": 6.935693264007568, "learning_rate": 8.912998836333767e-05, "loss": 0.7663, "step": 8691 }, { "epoch": 0.5889287892133613, "grad_norm": 6.702020168304443, "learning_rate": 8.912861934423987e-05, "loss": 0.937, "step": 8692 }, { "epoch": 0.5889965444813334, "grad_norm": 6.361667156219482, "learning_rate": 8.912725032514205e-05, "loss": 0.7574, "step": 8693 }, { "epoch": 0.5890642997493055, "grad_norm": 7.599695682525635, "learning_rate": 8.912588130604423e-05, "loss": 0.8366, "step": 8694 }, { "epoch": 0.5891320550172776, "grad_norm": 7.101802349090576, "learning_rate": 8.912451228694641e-05, "loss": 0.9409, "step": 8695 }, { "epoch": 0.5891998102852497, "grad_norm": 7.674192905426025, "learning_rate": 8.912314326784859e-05, "loss": 1.0846, "step": 8696 }, { "epoch": 0.5892675655532218, "grad_norm": 6.223476886749268, "learning_rate": 8.912177424875078e-05, "loss": 0.9597, "step": 8697 }, { "epoch": 0.5893353208211939, "grad_norm": 5.847829341888428, "learning_rate": 8.912040522965296e-05, "loss": 0.8017, "step": 8698 }, { "epoch": 0.589403076089166, "grad_norm": 6.459780216217041, "learning_rate": 8.911903621055514e-05, "loss": 0.877, "step": 8699 }, { "epoch": 0.5894708313571381, "grad_norm": 5.9923481941223145, "learning_rate": 8.911766719145732e-05, "loss": 0.9193, "step": 8700 }, { "epoch": 0.58953858662511, "grad_norm": 5.799968242645264, "learning_rate": 8.911629817235952e-05, "loss": 0.7449, "step": 8701 }, { "epoch": 0.5896063418930821, "grad_norm": 6.3980393409729, "learning_rate": 8.91149291532617e-05, "loss": 0.9118, "step": 8702 }, { "epoch": 0.5896740971610542, "grad_norm": 6.184665203094482, "learning_rate": 8.911356013416388e-05, "loss": 0.6857, "step": 8703 }, { "epoch": 0.5897418524290263, "grad_norm": 7.494194030761719, "learning_rate": 8.911219111506606e-05, "loss": 1.0483, "step": 8704 }, { "epoch": 0.5898096076969984, "grad_norm": 5.360753059387207, "learning_rate": 8.911082209596824e-05, "loss": 0.7064, "step": 8705 }, { "epoch": 0.5898773629649705, "grad_norm": 6.393972873687744, "learning_rate": 8.910945307687043e-05, "loss": 0.9095, "step": 8706 }, { "epoch": 0.5899451182329426, "grad_norm": 8.099264144897461, "learning_rate": 8.910808405777261e-05, "loss": 1.0004, "step": 8707 }, { "epoch": 0.5900128735009147, "grad_norm": 5.654821872711182, "learning_rate": 8.910671503867479e-05, "loss": 0.753, "step": 8708 }, { "epoch": 0.5900806287688868, "grad_norm": 7.046943187713623, "learning_rate": 8.910534601957697e-05, "loss": 1.012, "step": 8709 }, { "epoch": 0.5901483840368589, "grad_norm": 6.534369945526123, "learning_rate": 8.910397700047917e-05, "loss": 0.8296, "step": 8710 }, { "epoch": 0.590216139304831, "grad_norm": 5.612612724304199, "learning_rate": 8.910260798138135e-05, "loss": 0.7516, "step": 8711 }, { "epoch": 0.5902838945728031, "grad_norm": 5.311154365539551, "learning_rate": 8.910123896228353e-05, "loss": 0.7594, "step": 8712 }, { "epoch": 0.5903516498407752, "grad_norm": 6.404855251312256, "learning_rate": 8.909986994318571e-05, "loss": 0.7252, "step": 8713 }, { "epoch": 0.5904194051087472, "grad_norm": 7.446944236755371, "learning_rate": 8.909850092408789e-05, "loss": 0.9048, "step": 8714 }, { "epoch": 0.5904871603767193, "grad_norm": 5.755975246429443, "learning_rate": 8.909713190499008e-05, "loss": 0.6617, "step": 8715 }, { "epoch": 0.5905549156446914, "grad_norm": 6.966080188751221, "learning_rate": 8.909576288589226e-05, "loss": 0.5794, "step": 8716 }, { "epoch": 0.5906226709126634, "grad_norm": 6.13206672668457, "learning_rate": 8.909439386679444e-05, "loss": 0.8356, "step": 8717 }, { "epoch": 0.5906904261806355, "grad_norm": 7.370462417602539, "learning_rate": 8.909302484769662e-05, "loss": 0.7215, "step": 8718 }, { "epoch": 0.5907581814486076, "grad_norm": 7.727412223815918, "learning_rate": 8.90916558285988e-05, "loss": 0.7943, "step": 8719 }, { "epoch": 0.5908259367165797, "grad_norm": 7.801811218261719, "learning_rate": 8.9090286809501e-05, "loss": 0.7986, "step": 8720 }, { "epoch": 0.5908936919845518, "grad_norm": 8.468879699707031, "learning_rate": 8.908891779040318e-05, "loss": 0.9178, "step": 8721 }, { "epoch": 0.5909614472525239, "grad_norm": 5.8401665687561035, "learning_rate": 8.908754877130536e-05, "loss": 0.9042, "step": 8722 }, { "epoch": 0.591029202520496, "grad_norm": 6.750467777252197, "learning_rate": 8.908617975220754e-05, "loss": 0.7983, "step": 8723 }, { "epoch": 0.591096957788468, "grad_norm": 5.489363193511963, "learning_rate": 8.908481073310973e-05, "loss": 0.6839, "step": 8724 }, { "epoch": 0.5911647130564401, "grad_norm": 8.433201789855957, "learning_rate": 8.908344171401191e-05, "loss": 0.9413, "step": 8725 }, { "epoch": 0.5912324683244122, "grad_norm": 6.133078575134277, "learning_rate": 8.908207269491409e-05, "loss": 1.0624, "step": 8726 }, { "epoch": 0.5913002235923843, "grad_norm": 7.1547698974609375, "learning_rate": 8.908070367581627e-05, "loss": 0.7873, "step": 8727 }, { "epoch": 0.5913679788603564, "grad_norm": 6.1702494621276855, "learning_rate": 8.907933465671847e-05, "loss": 0.737, "step": 8728 }, { "epoch": 0.5914357341283285, "grad_norm": 8.64965534210205, "learning_rate": 8.907796563762065e-05, "loss": 0.7949, "step": 8729 }, { "epoch": 0.5915034893963006, "grad_norm": 6.769810676574707, "learning_rate": 8.907659661852283e-05, "loss": 0.9665, "step": 8730 }, { "epoch": 0.5915712446642727, "grad_norm": 6.564850807189941, "learning_rate": 8.907522759942502e-05, "loss": 0.658, "step": 8731 }, { "epoch": 0.5916389999322448, "grad_norm": 5.231021404266357, "learning_rate": 8.90738585803272e-05, "loss": 0.9782, "step": 8732 }, { "epoch": 0.5917067552002169, "grad_norm": 6.880924701690674, "learning_rate": 8.907248956122938e-05, "loss": 0.9236, "step": 8733 }, { "epoch": 0.5917745104681889, "grad_norm": 7.0143585205078125, "learning_rate": 8.907112054213158e-05, "loss": 0.8532, "step": 8734 }, { "epoch": 0.5918422657361609, "grad_norm": 6.451882839202881, "learning_rate": 8.906975152303376e-05, "loss": 0.7571, "step": 8735 }, { "epoch": 0.591910021004133, "grad_norm": 7.905577182769775, "learning_rate": 8.906838250393594e-05, "loss": 0.7599, "step": 8736 }, { "epoch": 0.5919777762721051, "grad_norm": 6.122454643249512, "learning_rate": 8.906701348483812e-05, "loss": 0.9153, "step": 8737 }, { "epoch": 0.5920455315400772, "grad_norm": 7.811397552490234, "learning_rate": 8.906564446574031e-05, "loss": 0.906, "step": 8738 }, { "epoch": 0.5921132868080493, "grad_norm": 5.695565223693848, "learning_rate": 8.906427544664249e-05, "loss": 0.7452, "step": 8739 }, { "epoch": 0.5921810420760214, "grad_norm": 6.3696393966674805, "learning_rate": 8.906290642754467e-05, "loss": 0.8223, "step": 8740 }, { "epoch": 0.5922487973439935, "grad_norm": 6.49605655670166, "learning_rate": 8.906153740844685e-05, "loss": 0.9092, "step": 8741 }, { "epoch": 0.5923165526119656, "grad_norm": 5.290238380432129, "learning_rate": 8.906016838934903e-05, "loss": 0.7335, "step": 8742 }, { "epoch": 0.5923843078799377, "grad_norm": 6.913309097290039, "learning_rate": 8.905879937025123e-05, "loss": 0.7756, "step": 8743 }, { "epoch": 0.5924520631479098, "grad_norm": 5.938857555389404, "learning_rate": 8.90574303511534e-05, "loss": 0.758, "step": 8744 }, { "epoch": 0.5925198184158819, "grad_norm": 7.276566982269287, "learning_rate": 8.905606133205559e-05, "loss": 0.8429, "step": 8745 }, { "epoch": 0.592587573683854, "grad_norm": 7.6287522315979, "learning_rate": 8.905469231295777e-05, "loss": 1.0939, "step": 8746 }, { "epoch": 0.592655328951826, "grad_norm": 7.293666362762451, "learning_rate": 8.905332329385996e-05, "loss": 0.8264, "step": 8747 }, { "epoch": 0.5927230842197981, "grad_norm": 5.522965908050537, "learning_rate": 8.905195427476214e-05, "loss": 0.6578, "step": 8748 }, { "epoch": 0.5927908394877702, "grad_norm": 6.8883466720581055, "learning_rate": 8.905058525566432e-05, "loss": 0.9412, "step": 8749 }, { "epoch": 0.5928585947557422, "grad_norm": 6.682039737701416, "learning_rate": 8.90492162365665e-05, "loss": 0.8375, "step": 8750 }, { "epoch": 0.5929263500237143, "grad_norm": 9.619691848754883, "learning_rate": 8.904784721746868e-05, "loss": 0.9461, "step": 8751 }, { "epoch": 0.5929941052916864, "grad_norm": 6.880954265594482, "learning_rate": 8.904647819837088e-05, "loss": 0.895, "step": 8752 }, { "epoch": 0.5930618605596585, "grad_norm": 5.3324761390686035, "learning_rate": 8.904510917927306e-05, "loss": 0.7613, "step": 8753 }, { "epoch": 0.5931296158276306, "grad_norm": 7.377603054046631, "learning_rate": 8.904374016017524e-05, "loss": 0.8565, "step": 8754 }, { "epoch": 0.5931973710956027, "grad_norm": 6.537837982177734, "learning_rate": 8.904237114107742e-05, "loss": 0.9888, "step": 8755 }, { "epoch": 0.5932651263635748, "grad_norm": 6.159317493438721, "learning_rate": 8.904100212197961e-05, "loss": 1.0156, "step": 8756 }, { "epoch": 0.5933328816315468, "grad_norm": 10.275593757629395, "learning_rate": 8.903963310288179e-05, "loss": 0.8343, "step": 8757 }, { "epoch": 0.5934006368995189, "grad_norm": 6.211344242095947, "learning_rate": 8.903826408378397e-05, "loss": 0.8728, "step": 8758 }, { "epoch": 0.593468392167491, "grad_norm": 8.628105163574219, "learning_rate": 8.903689506468615e-05, "loss": 0.8468, "step": 8759 }, { "epoch": 0.5935361474354631, "grad_norm": 5.519963264465332, "learning_rate": 8.903552604558833e-05, "loss": 0.7188, "step": 8760 }, { "epoch": 0.5936039027034352, "grad_norm": 6.6959991455078125, "learning_rate": 8.903415702649053e-05, "loss": 1.1405, "step": 8761 }, { "epoch": 0.5936716579714073, "grad_norm": 5.843451499938965, "learning_rate": 8.90327880073927e-05, "loss": 0.7847, "step": 8762 }, { "epoch": 0.5937394132393794, "grad_norm": 5.644291400909424, "learning_rate": 8.903141898829489e-05, "loss": 0.6311, "step": 8763 }, { "epoch": 0.5938071685073515, "grad_norm": 6.7555952072143555, "learning_rate": 8.903004996919707e-05, "loss": 1.0387, "step": 8764 }, { "epoch": 0.5938749237753236, "grad_norm": 6.006453990936279, "learning_rate": 8.902868095009926e-05, "loss": 0.6646, "step": 8765 }, { "epoch": 0.5939426790432956, "grad_norm": 7.705087184906006, "learning_rate": 8.902731193100144e-05, "loss": 1.1749, "step": 8766 }, { "epoch": 0.5940104343112677, "grad_norm": 6.157181739807129, "learning_rate": 8.902594291190362e-05, "loss": 0.979, "step": 8767 }, { "epoch": 0.5940781895792397, "grad_norm": 6.080206394195557, "learning_rate": 8.90245738928058e-05, "loss": 0.9294, "step": 8768 }, { "epoch": 0.5941459448472118, "grad_norm": 5.215951919555664, "learning_rate": 8.902320487370798e-05, "loss": 0.7154, "step": 8769 }, { "epoch": 0.5942137001151839, "grad_norm": 6.383238792419434, "learning_rate": 8.902183585461018e-05, "loss": 0.9136, "step": 8770 }, { "epoch": 0.594281455383156, "grad_norm": 5.297086238861084, "learning_rate": 8.902046683551236e-05, "loss": 0.7147, "step": 8771 }, { "epoch": 0.5943492106511281, "grad_norm": 6.686932563781738, "learning_rate": 8.901909781641454e-05, "loss": 1.0018, "step": 8772 }, { "epoch": 0.5944169659191002, "grad_norm": 5.988333702087402, "learning_rate": 8.901772879731672e-05, "loss": 0.7809, "step": 8773 }, { "epoch": 0.5944847211870723, "grad_norm": 6.060636043548584, "learning_rate": 8.901635977821891e-05, "loss": 0.6055, "step": 8774 }, { "epoch": 0.5945524764550444, "grad_norm": 6.24500036239624, "learning_rate": 8.901499075912109e-05, "loss": 0.8885, "step": 8775 }, { "epoch": 0.5946202317230165, "grad_norm": 6.189664363861084, "learning_rate": 8.901362174002327e-05, "loss": 0.8793, "step": 8776 }, { "epoch": 0.5946879869909886, "grad_norm": 5.860182762145996, "learning_rate": 8.901225272092547e-05, "loss": 0.9083, "step": 8777 }, { "epoch": 0.5947557422589607, "grad_norm": 10.454379081726074, "learning_rate": 8.901088370182765e-05, "loss": 0.6481, "step": 8778 }, { "epoch": 0.5948234975269328, "grad_norm": 6.585578918457031, "learning_rate": 8.900951468272983e-05, "loss": 1.0381, "step": 8779 }, { "epoch": 0.5948912527949048, "grad_norm": 7.335190296173096, "learning_rate": 8.900814566363202e-05, "loss": 0.947, "step": 8780 }, { "epoch": 0.5949590080628769, "grad_norm": 6.560307502746582, "learning_rate": 8.90067766445342e-05, "loss": 0.951, "step": 8781 }, { "epoch": 0.595026763330849, "grad_norm": 5.926759243011475, "learning_rate": 8.900540762543638e-05, "loss": 0.9487, "step": 8782 }, { "epoch": 0.595094518598821, "grad_norm": 5.300271034240723, "learning_rate": 8.900403860633856e-05, "loss": 0.7232, "step": 8783 }, { "epoch": 0.5951622738667931, "grad_norm": 6.828729629516602, "learning_rate": 8.900266958724076e-05, "loss": 0.9124, "step": 8784 }, { "epoch": 0.5952300291347652, "grad_norm": 5.51554012298584, "learning_rate": 8.900130056814294e-05, "loss": 0.7698, "step": 8785 }, { "epoch": 0.5952977844027373, "grad_norm": 6.492809295654297, "learning_rate": 8.899993154904512e-05, "loss": 0.7388, "step": 8786 }, { "epoch": 0.5953655396707094, "grad_norm": 6.4316792488098145, "learning_rate": 8.89985625299473e-05, "loss": 0.7843, "step": 8787 }, { "epoch": 0.5954332949386815, "grad_norm": 5.711614608764648, "learning_rate": 8.899719351084949e-05, "loss": 0.8458, "step": 8788 }, { "epoch": 0.5955010502066536, "grad_norm": 7.605274677276611, "learning_rate": 8.899582449175167e-05, "loss": 0.8907, "step": 8789 }, { "epoch": 0.5955688054746257, "grad_norm": 6.734317302703857, "learning_rate": 8.899445547265385e-05, "loss": 0.8063, "step": 8790 }, { "epoch": 0.5956365607425977, "grad_norm": 10.887665748596191, "learning_rate": 8.899308645355603e-05, "loss": 0.861, "step": 8791 }, { "epoch": 0.5957043160105698, "grad_norm": 7.033245086669922, "learning_rate": 8.899171743445821e-05, "loss": 1.0345, "step": 8792 }, { "epoch": 0.5957720712785419, "grad_norm": 5.743894577026367, "learning_rate": 8.89903484153604e-05, "loss": 0.7492, "step": 8793 }, { "epoch": 0.595839826546514, "grad_norm": 5.664433479309082, "learning_rate": 8.898897939626259e-05, "loss": 0.9206, "step": 8794 }, { "epoch": 0.5959075818144861, "grad_norm": 5.304537773132324, "learning_rate": 8.898761037716477e-05, "loss": 0.7669, "step": 8795 }, { "epoch": 0.5959753370824582, "grad_norm": 6.449788570404053, "learning_rate": 8.898624135806695e-05, "loss": 0.9168, "step": 8796 }, { "epoch": 0.5960430923504303, "grad_norm": 7.1110429763793945, "learning_rate": 8.898487233896913e-05, "loss": 0.8633, "step": 8797 }, { "epoch": 0.5961108476184024, "grad_norm": 8.217866897583008, "learning_rate": 8.898350331987132e-05, "loss": 0.9472, "step": 8798 }, { "epoch": 0.5961786028863744, "grad_norm": 6.968807697296143, "learning_rate": 8.89821343007735e-05, "loss": 0.7977, "step": 8799 }, { "epoch": 0.5962463581543465, "grad_norm": 6.342806339263916, "learning_rate": 8.898076528167568e-05, "loss": 0.8461, "step": 8800 }, { "epoch": 0.5963141134223185, "grad_norm": 6.918409824371338, "learning_rate": 8.897939626257786e-05, "loss": 0.9768, "step": 8801 }, { "epoch": 0.5963818686902906, "grad_norm": 6.3519978523254395, "learning_rate": 8.897802724348006e-05, "loss": 0.794, "step": 8802 }, { "epoch": 0.5964496239582627, "grad_norm": 9.268524169921875, "learning_rate": 8.897665822438224e-05, "loss": 1.0708, "step": 8803 }, { "epoch": 0.5965173792262348, "grad_norm": 6.6414008140563965, "learning_rate": 8.897528920528442e-05, "loss": 0.9317, "step": 8804 }, { "epoch": 0.5965851344942069, "grad_norm": 5.683966159820557, "learning_rate": 8.89739201861866e-05, "loss": 0.8018, "step": 8805 }, { "epoch": 0.596652889762179, "grad_norm": 8.505208015441895, "learning_rate": 8.897255116708878e-05, "loss": 0.8207, "step": 8806 }, { "epoch": 0.5967206450301511, "grad_norm": 6.509139060974121, "learning_rate": 8.897118214799097e-05, "loss": 0.8822, "step": 8807 }, { "epoch": 0.5967884002981232, "grad_norm": 4.886582851409912, "learning_rate": 8.896981312889315e-05, "loss": 1.1176, "step": 8808 }, { "epoch": 0.5968561555660953, "grad_norm": 6.053840637207031, "learning_rate": 8.896844410979533e-05, "loss": 0.6305, "step": 8809 }, { "epoch": 0.5969239108340674, "grad_norm": 6.561148166656494, "learning_rate": 8.896707509069751e-05, "loss": 1.0339, "step": 8810 }, { "epoch": 0.5969916661020395, "grad_norm": 7.02574348449707, "learning_rate": 8.89657060715997e-05, "loss": 0.8793, "step": 8811 }, { "epoch": 0.5970594213700116, "grad_norm": 6.380439758300781, "learning_rate": 8.896433705250189e-05, "loss": 0.9474, "step": 8812 }, { "epoch": 0.5971271766379836, "grad_norm": 6.902020454406738, "learning_rate": 8.896296803340407e-05, "loss": 0.8541, "step": 8813 }, { "epoch": 0.5971949319059557, "grad_norm": 5.170351505279541, "learning_rate": 8.896159901430625e-05, "loss": 0.6224, "step": 8814 }, { "epoch": 0.5972626871739277, "grad_norm": 6.399029731750488, "learning_rate": 8.896022999520843e-05, "loss": 0.7733, "step": 8815 }, { "epoch": 0.5973304424418998, "grad_norm": 7.200798988342285, "learning_rate": 8.895886097611062e-05, "loss": 0.9942, "step": 8816 }, { "epoch": 0.5973981977098719, "grad_norm": 5.057744979858398, "learning_rate": 8.89574919570128e-05, "loss": 0.8113, "step": 8817 }, { "epoch": 0.597465952977844, "grad_norm": 6.267950534820557, "learning_rate": 8.895612293791498e-05, "loss": 1.1343, "step": 8818 }, { "epoch": 0.5975337082458161, "grad_norm": 5.712194919586182, "learning_rate": 8.895475391881716e-05, "loss": 0.6812, "step": 8819 }, { "epoch": 0.5976014635137882, "grad_norm": 6.567288875579834, "learning_rate": 8.895338489971936e-05, "loss": 0.8451, "step": 8820 }, { "epoch": 0.5976692187817603, "grad_norm": 6.08546781539917, "learning_rate": 8.895201588062154e-05, "loss": 0.7353, "step": 8821 }, { "epoch": 0.5977369740497324, "grad_norm": 6.859540939331055, "learning_rate": 8.895064686152372e-05, "loss": 0.6795, "step": 8822 }, { "epoch": 0.5978047293177045, "grad_norm": 5.679804801940918, "learning_rate": 8.894927784242591e-05, "loss": 0.7286, "step": 8823 }, { "epoch": 0.5978724845856765, "grad_norm": 7.603654384613037, "learning_rate": 8.894790882332809e-05, "loss": 0.87, "step": 8824 }, { "epoch": 0.5979402398536486, "grad_norm": 6.440685272216797, "learning_rate": 8.894653980423027e-05, "loss": 1.043, "step": 8825 }, { "epoch": 0.5980079951216207, "grad_norm": 6.420576572418213, "learning_rate": 8.894517078513247e-05, "loss": 0.6191, "step": 8826 }, { "epoch": 0.5980757503895928, "grad_norm": 6.01546573638916, "learning_rate": 8.894380176603465e-05, "loss": 0.9581, "step": 8827 }, { "epoch": 0.5981435056575649, "grad_norm": 6.9343767166137695, "learning_rate": 8.894243274693683e-05, "loss": 0.9531, "step": 8828 }, { "epoch": 0.598211260925537, "grad_norm": 6.511411666870117, "learning_rate": 8.8941063727839e-05, "loss": 0.6982, "step": 8829 }, { "epoch": 0.5982790161935091, "grad_norm": 5.96348762512207, "learning_rate": 8.89396947087412e-05, "loss": 0.6761, "step": 8830 }, { "epoch": 0.5983467714614811, "grad_norm": 5.571112155914307, "learning_rate": 8.893832568964338e-05, "loss": 0.7856, "step": 8831 }, { "epoch": 0.5984145267294532, "grad_norm": 7.437000751495361, "learning_rate": 8.893695667054556e-05, "loss": 0.8045, "step": 8832 }, { "epoch": 0.5984822819974253, "grad_norm": 6.710272789001465, "learning_rate": 8.893558765144774e-05, "loss": 0.725, "step": 8833 }, { "epoch": 0.5985500372653973, "grad_norm": 8.211858749389648, "learning_rate": 8.893421863234994e-05, "loss": 1.0648, "step": 8834 }, { "epoch": 0.5986177925333694, "grad_norm": 6.0621018409729, "learning_rate": 8.893284961325212e-05, "loss": 0.8461, "step": 8835 }, { "epoch": 0.5986855478013415, "grad_norm": 6.834799766540527, "learning_rate": 8.89314805941543e-05, "loss": 0.9162, "step": 8836 }, { "epoch": 0.5987533030693136, "grad_norm": 7.666240215301514, "learning_rate": 8.893011157505648e-05, "loss": 0.8657, "step": 8837 }, { "epoch": 0.5988210583372857, "grad_norm": 5.868284225463867, "learning_rate": 8.892874255595866e-05, "loss": 0.78, "step": 8838 }, { "epoch": 0.5988888136052578, "grad_norm": 8.84867000579834, "learning_rate": 8.892737353686085e-05, "loss": 1.0317, "step": 8839 }, { "epoch": 0.5989565688732299, "grad_norm": 7.292559623718262, "learning_rate": 8.892600451776303e-05, "loss": 0.9336, "step": 8840 }, { "epoch": 0.599024324141202, "grad_norm": 8.978638648986816, "learning_rate": 8.892463549866521e-05, "loss": 0.7901, "step": 8841 }, { "epoch": 0.5990920794091741, "grad_norm": 6.713160037994385, "learning_rate": 8.892326647956739e-05, "loss": 0.9241, "step": 8842 }, { "epoch": 0.5991598346771462, "grad_norm": 7.525753021240234, "learning_rate": 8.892189746046959e-05, "loss": 1.0159, "step": 8843 }, { "epoch": 0.5992275899451183, "grad_norm": 5.174670696258545, "learning_rate": 8.892052844137177e-05, "loss": 0.6338, "step": 8844 }, { "epoch": 0.5992953452130904, "grad_norm": 6.392926216125488, "learning_rate": 8.891915942227395e-05, "loss": 0.9449, "step": 8845 }, { "epoch": 0.5993631004810624, "grad_norm": 6.458075523376465, "learning_rate": 8.891779040317613e-05, "loss": 0.8109, "step": 8846 }, { "epoch": 0.5994308557490345, "grad_norm": 6.092816352844238, "learning_rate": 8.89164213840783e-05, "loss": 0.6486, "step": 8847 }, { "epoch": 0.5994986110170065, "grad_norm": 5.358661651611328, "learning_rate": 8.89150523649805e-05, "loss": 0.6597, "step": 8848 }, { "epoch": 0.5995663662849786, "grad_norm": 7.168787956237793, "learning_rate": 8.891368334588268e-05, "loss": 0.9348, "step": 8849 }, { "epoch": 0.5996341215529507, "grad_norm": 8.961309432983398, "learning_rate": 8.891231432678486e-05, "loss": 1.078, "step": 8850 }, { "epoch": 0.5997018768209228, "grad_norm": 6.025523662567139, "learning_rate": 8.891094530768704e-05, "loss": 0.8771, "step": 8851 }, { "epoch": 0.5997696320888949, "grad_norm": 6.479866981506348, "learning_rate": 8.890957628858922e-05, "loss": 0.6807, "step": 8852 }, { "epoch": 0.599837387356867, "grad_norm": 7.786321640014648, "learning_rate": 8.890820726949142e-05, "loss": 1.0007, "step": 8853 }, { "epoch": 0.5999051426248391, "grad_norm": 6.334638595581055, "learning_rate": 8.89068382503936e-05, "loss": 0.9056, "step": 8854 }, { "epoch": 0.5999728978928112, "grad_norm": 6.959164619445801, "learning_rate": 8.890546923129578e-05, "loss": 0.9682, "step": 8855 }, { "epoch": 0.6000406531607833, "grad_norm": 7.86105489730835, "learning_rate": 8.890410021219796e-05, "loss": 0.7696, "step": 8856 }, { "epoch": 0.6001084084287553, "grad_norm": 9.848732948303223, "learning_rate": 8.890273119310015e-05, "loss": 0.8796, "step": 8857 }, { "epoch": 0.6001761636967274, "grad_norm": 6.406124591827393, "learning_rate": 8.890136217400233e-05, "loss": 0.9671, "step": 8858 }, { "epoch": 0.6002439189646995, "grad_norm": 6.441462993621826, "learning_rate": 8.889999315490451e-05, "loss": 0.851, "step": 8859 }, { "epoch": 0.6003116742326716, "grad_norm": 7.711560249328613, "learning_rate": 8.889862413580669e-05, "loss": 1.0492, "step": 8860 }, { "epoch": 0.6003794295006437, "grad_norm": 7.2431960105896, "learning_rate": 8.889725511670887e-05, "loss": 0.9887, "step": 8861 }, { "epoch": 0.6004471847686158, "grad_norm": 7.861832618713379, "learning_rate": 8.889588609761107e-05, "loss": 1.0759, "step": 8862 }, { "epoch": 0.6005149400365879, "grad_norm": 6.643199920654297, "learning_rate": 8.889451707851325e-05, "loss": 0.7989, "step": 8863 }, { "epoch": 0.6005826953045599, "grad_norm": 5.903257369995117, "learning_rate": 8.889314805941543e-05, "loss": 0.6606, "step": 8864 }, { "epoch": 0.600650450572532, "grad_norm": 6.016655445098877, "learning_rate": 8.889177904031761e-05, "loss": 0.6489, "step": 8865 }, { "epoch": 0.6007182058405041, "grad_norm": 7.733530521392822, "learning_rate": 8.88904100212198e-05, "loss": 1.0653, "step": 8866 }, { "epoch": 0.6007859611084762, "grad_norm": 6.951436996459961, "learning_rate": 8.888904100212198e-05, "loss": 0.821, "step": 8867 }, { "epoch": 0.6008537163764482, "grad_norm": 5.853886604309082, "learning_rate": 8.888767198302416e-05, "loss": 0.7154, "step": 8868 }, { "epoch": 0.6009214716444203, "grad_norm": 7.300787925720215, "learning_rate": 8.888630296392636e-05, "loss": 1.0176, "step": 8869 }, { "epoch": 0.6009892269123924, "grad_norm": 6.771045684814453, "learning_rate": 8.888493394482854e-05, "loss": 0.7652, "step": 8870 }, { "epoch": 0.6010569821803645, "grad_norm": 6.345951557159424, "learning_rate": 8.888356492573072e-05, "loss": 1.0231, "step": 8871 }, { "epoch": 0.6011247374483366, "grad_norm": 6.479809284210205, "learning_rate": 8.888219590663291e-05, "loss": 0.7587, "step": 8872 }, { "epoch": 0.6011924927163087, "grad_norm": 7.4716796875, "learning_rate": 8.888082688753509e-05, "loss": 0.8283, "step": 8873 }, { "epoch": 0.6012602479842808, "grad_norm": 7.2003068923950195, "learning_rate": 8.887945786843727e-05, "loss": 0.8622, "step": 8874 }, { "epoch": 0.6013280032522529, "grad_norm": 5.774078845977783, "learning_rate": 8.887808884933945e-05, "loss": 0.7783, "step": 8875 }, { "epoch": 0.601395758520225, "grad_norm": 5.885690689086914, "learning_rate": 8.887671983024164e-05, "loss": 0.984, "step": 8876 }, { "epoch": 0.6014635137881971, "grad_norm": 6.9094038009643555, "learning_rate": 8.887535081114383e-05, "loss": 0.8093, "step": 8877 }, { "epoch": 0.6015312690561692, "grad_norm": 7.623313903808594, "learning_rate": 8.8873981792046e-05, "loss": 0.8604, "step": 8878 }, { "epoch": 0.6015990243241413, "grad_norm": 4.915633201599121, "learning_rate": 8.887261277294819e-05, "loss": 0.6538, "step": 8879 }, { "epoch": 0.6016667795921132, "grad_norm": 6.35188627243042, "learning_rate": 8.887124375385038e-05, "loss": 0.7546, "step": 8880 }, { "epoch": 0.6017345348600853, "grad_norm": 6.262838840484619, "learning_rate": 8.886987473475256e-05, "loss": 0.7816, "step": 8881 }, { "epoch": 0.6018022901280574, "grad_norm": 6.2654032707214355, "learning_rate": 8.886850571565474e-05, "loss": 1.0446, "step": 8882 }, { "epoch": 0.6018700453960295, "grad_norm": 6.389410972595215, "learning_rate": 8.886713669655692e-05, "loss": 0.9104, "step": 8883 }, { "epoch": 0.6019378006640016, "grad_norm": 6.192864894866943, "learning_rate": 8.88657676774591e-05, "loss": 0.8855, "step": 8884 }, { "epoch": 0.6020055559319737, "grad_norm": 6.384714126586914, "learning_rate": 8.88643986583613e-05, "loss": 0.7535, "step": 8885 }, { "epoch": 0.6020733111999458, "grad_norm": 6.433627605438232, "learning_rate": 8.886302963926348e-05, "loss": 0.6994, "step": 8886 }, { "epoch": 0.6021410664679179, "grad_norm": 6.539730072021484, "learning_rate": 8.886166062016566e-05, "loss": 1.0176, "step": 8887 }, { "epoch": 0.60220882173589, "grad_norm": 7.459704875946045, "learning_rate": 8.886029160106784e-05, "loss": 0.8599, "step": 8888 }, { "epoch": 0.6022765770038621, "grad_norm": 7.512004375457764, "learning_rate": 8.885892258197003e-05, "loss": 0.7238, "step": 8889 }, { "epoch": 0.6023443322718341, "grad_norm": 7.773438453674316, "learning_rate": 8.885755356287221e-05, "loss": 0.72, "step": 8890 }, { "epoch": 0.6024120875398062, "grad_norm": 5.3314948081970215, "learning_rate": 8.885618454377439e-05, "loss": 0.9285, "step": 8891 }, { "epoch": 0.6024798428077783, "grad_norm": 5.834819793701172, "learning_rate": 8.885481552467657e-05, "loss": 0.9032, "step": 8892 }, { "epoch": 0.6025475980757504, "grad_norm": 6.342952251434326, "learning_rate": 8.885344650557875e-05, "loss": 0.8987, "step": 8893 }, { "epoch": 0.6026153533437225, "grad_norm": 5.579010963439941, "learning_rate": 8.885207748648095e-05, "loss": 0.9275, "step": 8894 }, { "epoch": 0.6026831086116946, "grad_norm": 5.604851722717285, "learning_rate": 8.885070846738313e-05, "loss": 0.8163, "step": 8895 }, { "epoch": 0.6027508638796667, "grad_norm": 6.057446479797363, "learning_rate": 8.88493394482853e-05, "loss": 0.8466, "step": 8896 }, { "epoch": 0.6028186191476387, "grad_norm": 8.311997413635254, "learning_rate": 8.884797042918749e-05, "loss": 0.8493, "step": 8897 }, { "epoch": 0.6028863744156108, "grad_norm": 6.672399044036865, "learning_rate": 8.884660141008968e-05, "loss": 0.7638, "step": 8898 }, { "epoch": 0.6029541296835829, "grad_norm": 7.031404495239258, "learning_rate": 8.884523239099186e-05, "loss": 0.7692, "step": 8899 }, { "epoch": 0.603021884951555, "grad_norm": 6.296429634094238, "learning_rate": 8.884386337189404e-05, "loss": 0.9525, "step": 8900 }, { "epoch": 0.603089640219527, "grad_norm": 6.698690891265869, "learning_rate": 8.884249435279622e-05, "loss": 1.0234, "step": 8901 }, { "epoch": 0.6031573954874991, "grad_norm": 5.885977268218994, "learning_rate": 8.88411253336984e-05, "loss": 0.6533, "step": 8902 }, { "epoch": 0.6032251507554712, "grad_norm": 7.2855072021484375, "learning_rate": 8.88397563146006e-05, "loss": 0.7969, "step": 8903 }, { "epoch": 0.6032929060234433, "grad_norm": 5.964268207550049, "learning_rate": 8.883838729550278e-05, "loss": 0.9055, "step": 8904 }, { "epoch": 0.6033606612914154, "grad_norm": 8.085535049438477, "learning_rate": 8.883701827640496e-05, "loss": 0.9988, "step": 8905 }, { "epoch": 0.6034284165593875, "grad_norm": 6.891632080078125, "learning_rate": 8.883564925730714e-05, "loss": 1.0141, "step": 8906 }, { "epoch": 0.6034961718273596, "grad_norm": 5.324792861938477, "learning_rate": 8.883428023820932e-05, "loss": 0.7212, "step": 8907 }, { "epoch": 0.6035639270953317, "grad_norm": 5.735985279083252, "learning_rate": 8.883291121911151e-05, "loss": 0.9523, "step": 8908 }, { "epoch": 0.6036316823633038, "grad_norm": 7.501296043395996, "learning_rate": 8.883154220001369e-05, "loss": 0.938, "step": 8909 }, { "epoch": 0.6036994376312759, "grad_norm": 6.442415714263916, "learning_rate": 8.883017318091587e-05, "loss": 0.5799, "step": 8910 }, { "epoch": 0.603767192899248, "grad_norm": 6.753159999847412, "learning_rate": 8.882880416181805e-05, "loss": 0.6011, "step": 8911 }, { "epoch": 0.60383494816722, "grad_norm": 7.805209636688232, "learning_rate": 8.882743514272025e-05, "loss": 1.1798, "step": 8912 }, { "epoch": 0.603902703435192, "grad_norm": 5.123414993286133, "learning_rate": 8.882606612362243e-05, "loss": 0.7891, "step": 8913 }, { "epoch": 0.6039704587031641, "grad_norm": 6.106377601623535, "learning_rate": 8.88246971045246e-05, "loss": 0.8042, "step": 8914 }, { "epoch": 0.6040382139711362, "grad_norm": 6.874667167663574, "learning_rate": 8.88233280854268e-05, "loss": 0.8167, "step": 8915 }, { "epoch": 0.6041059692391083, "grad_norm": 5.306685447692871, "learning_rate": 8.882195906632898e-05, "loss": 0.6487, "step": 8916 }, { "epoch": 0.6041737245070804, "grad_norm": 6.309480667114258, "learning_rate": 8.882059004723116e-05, "loss": 0.6866, "step": 8917 }, { "epoch": 0.6042414797750525, "grad_norm": 7.042599201202393, "learning_rate": 8.881922102813335e-05, "loss": 0.8205, "step": 8918 }, { "epoch": 0.6043092350430246, "grad_norm": 5.572051048278809, "learning_rate": 8.881785200903554e-05, "loss": 0.8188, "step": 8919 }, { "epoch": 0.6043769903109967, "grad_norm": 5.881922245025635, "learning_rate": 8.881648298993772e-05, "loss": 0.8344, "step": 8920 }, { "epoch": 0.6044447455789688, "grad_norm": 6.141275882720947, "learning_rate": 8.881511397083991e-05, "loss": 0.7084, "step": 8921 }, { "epoch": 0.6045125008469409, "grad_norm": 6.847328186035156, "learning_rate": 8.881374495174209e-05, "loss": 0.9248, "step": 8922 }, { "epoch": 0.604580256114913, "grad_norm": 8.392770767211914, "learning_rate": 8.881237593264427e-05, "loss": 1.0319, "step": 8923 }, { "epoch": 0.604648011382885, "grad_norm": 6.491360664367676, "learning_rate": 8.881100691354645e-05, "loss": 0.8098, "step": 8924 }, { "epoch": 0.6047157666508571, "grad_norm": 7.355408191680908, "learning_rate": 8.880963789444863e-05, "loss": 0.9513, "step": 8925 }, { "epoch": 0.6047835219188292, "grad_norm": 5.667849063873291, "learning_rate": 8.880826887535082e-05, "loss": 0.6251, "step": 8926 }, { "epoch": 0.6048512771868013, "grad_norm": 7.0746636390686035, "learning_rate": 8.8806899856253e-05, "loss": 0.9789, "step": 8927 }, { "epoch": 0.6049190324547734, "grad_norm": 8.575697898864746, "learning_rate": 8.880553083715519e-05, "loss": 0.6239, "step": 8928 }, { "epoch": 0.6049867877227454, "grad_norm": 5.967097759246826, "learning_rate": 8.880416181805737e-05, "loss": 0.7224, "step": 8929 }, { "epoch": 0.6050545429907175, "grad_norm": 7.468807697296143, "learning_rate": 8.880279279895955e-05, "loss": 0.663, "step": 8930 }, { "epoch": 0.6051222982586896, "grad_norm": 7.503567218780518, "learning_rate": 8.880142377986174e-05, "loss": 1.0067, "step": 8931 }, { "epoch": 0.6051900535266617, "grad_norm": 5.722857475280762, "learning_rate": 8.880005476076392e-05, "loss": 0.6875, "step": 8932 }, { "epoch": 0.6052578087946338, "grad_norm": 5.868055820465088, "learning_rate": 8.87986857416661e-05, "loss": 0.9231, "step": 8933 }, { "epoch": 0.6053255640626058, "grad_norm": 6.367112636566162, "learning_rate": 8.879731672256828e-05, "loss": 0.7726, "step": 8934 }, { "epoch": 0.6053933193305779, "grad_norm": 5.738692760467529, "learning_rate": 8.879594770347047e-05, "loss": 0.6841, "step": 8935 }, { "epoch": 0.60546107459855, "grad_norm": 6.935656547546387, "learning_rate": 8.879457868437266e-05, "loss": 0.9394, "step": 8936 }, { "epoch": 0.6055288298665221, "grad_norm": 5.599362373352051, "learning_rate": 8.879320966527484e-05, "loss": 0.741, "step": 8937 }, { "epoch": 0.6055965851344942, "grad_norm": 6.967918395996094, "learning_rate": 8.879184064617702e-05, "loss": 0.7936, "step": 8938 }, { "epoch": 0.6056643404024663, "grad_norm": 7.082143306732178, "learning_rate": 8.87904716270792e-05, "loss": 0.9448, "step": 8939 }, { "epoch": 0.6057320956704384, "grad_norm": 7.133969783782959, "learning_rate": 8.878910260798139e-05, "loss": 1.0402, "step": 8940 }, { "epoch": 0.6057998509384105, "grad_norm": 6.228809356689453, "learning_rate": 8.878773358888357e-05, "loss": 0.9436, "step": 8941 }, { "epoch": 0.6058676062063826, "grad_norm": 7.93583345413208, "learning_rate": 8.878636456978575e-05, "loss": 0.8431, "step": 8942 }, { "epoch": 0.6059353614743547, "grad_norm": 6.536636829376221, "learning_rate": 8.878499555068793e-05, "loss": 0.6499, "step": 8943 }, { "epoch": 0.6060031167423268, "grad_norm": 6.2305145263671875, "learning_rate": 8.878362653159012e-05, "loss": 0.8509, "step": 8944 }, { "epoch": 0.6060708720102989, "grad_norm": 6.194677829742432, "learning_rate": 8.87822575124923e-05, "loss": 0.8863, "step": 8945 }, { "epoch": 0.6061386272782708, "grad_norm": 6.955784797668457, "learning_rate": 8.878088849339449e-05, "loss": 0.669, "step": 8946 }, { "epoch": 0.6062063825462429, "grad_norm": 6.535221099853516, "learning_rate": 8.877951947429667e-05, "loss": 0.9381, "step": 8947 }, { "epoch": 0.606274137814215, "grad_norm": 6.1568803787231445, "learning_rate": 8.877815045519885e-05, "loss": 0.8616, "step": 8948 }, { "epoch": 0.6063418930821871, "grad_norm": 6.541617393493652, "learning_rate": 8.877678143610104e-05, "loss": 0.8262, "step": 8949 }, { "epoch": 0.6064096483501592, "grad_norm": 6.519277572631836, "learning_rate": 8.877541241700322e-05, "loss": 0.6887, "step": 8950 }, { "epoch": 0.6064774036181313, "grad_norm": 5.049574375152588, "learning_rate": 8.87740433979054e-05, "loss": 0.5653, "step": 8951 }, { "epoch": 0.6065451588861034, "grad_norm": 5.1171698570251465, "learning_rate": 8.877267437880758e-05, "loss": 0.9257, "step": 8952 }, { "epoch": 0.6066129141540755, "grad_norm": 5.691270351409912, "learning_rate": 8.877130535970978e-05, "loss": 0.7008, "step": 8953 }, { "epoch": 0.6066806694220476, "grad_norm": 6.948482990264893, "learning_rate": 8.876993634061196e-05, "loss": 1.0571, "step": 8954 }, { "epoch": 0.6067484246900197, "grad_norm": 7.013044357299805, "learning_rate": 8.876856732151414e-05, "loss": 0.901, "step": 8955 }, { "epoch": 0.6068161799579918, "grad_norm": 6.465219974517822, "learning_rate": 8.876719830241632e-05, "loss": 0.8805, "step": 8956 }, { "epoch": 0.6068839352259638, "grad_norm": 6.187651634216309, "learning_rate": 8.87658292833185e-05, "loss": 0.9569, "step": 8957 }, { "epoch": 0.6069516904939359, "grad_norm": 7.414092063903809, "learning_rate": 8.876446026422069e-05, "loss": 0.8365, "step": 8958 }, { "epoch": 0.607019445761908, "grad_norm": 6.181675910949707, "learning_rate": 8.876309124512287e-05, "loss": 0.8135, "step": 8959 }, { "epoch": 0.6070872010298801, "grad_norm": 5.536195278167725, "learning_rate": 8.876172222602505e-05, "loss": 0.8226, "step": 8960 }, { "epoch": 0.6071549562978522, "grad_norm": 6.445333480834961, "learning_rate": 8.876035320692723e-05, "loss": 0.851, "step": 8961 }, { "epoch": 0.6072227115658242, "grad_norm": 7.526693820953369, "learning_rate": 8.875898418782943e-05, "loss": 0.806, "step": 8962 }, { "epoch": 0.6072904668337963, "grad_norm": 6.761377334594727, "learning_rate": 8.87576151687316e-05, "loss": 0.7834, "step": 8963 }, { "epoch": 0.6073582221017684, "grad_norm": 6.632135391235352, "learning_rate": 8.875624614963379e-05, "loss": 0.8597, "step": 8964 }, { "epoch": 0.6074259773697405, "grad_norm": 6.839773178100586, "learning_rate": 8.875487713053598e-05, "loss": 0.7056, "step": 8965 }, { "epoch": 0.6074937326377126, "grad_norm": 6.412302017211914, "learning_rate": 8.875350811143816e-05, "loss": 0.8321, "step": 8966 }, { "epoch": 0.6075614879056846, "grad_norm": 10.579608917236328, "learning_rate": 8.875213909234034e-05, "loss": 1.0393, "step": 8967 }, { "epoch": 0.6076292431736567, "grad_norm": 7.9641265869140625, "learning_rate": 8.875077007324253e-05, "loss": 0.8742, "step": 8968 }, { "epoch": 0.6076969984416288, "grad_norm": 5.490036487579346, "learning_rate": 8.874940105414471e-05, "loss": 0.9735, "step": 8969 }, { "epoch": 0.6077647537096009, "grad_norm": 6.595509052276611, "learning_rate": 8.87480320350469e-05, "loss": 0.9047, "step": 8970 }, { "epoch": 0.607832508977573, "grad_norm": 5.147292137145996, "learning_rate": 8.874666301594908e-05, "loss": 0.6643, "step": 8971 }, { "epoch": 0.6079002642455451, "grad_norm": 6.266574859619141, "learning_rate": 8.874529399685127e-05, "loss": 0.832, "step": 8972 }, { "epoch": 0.6079680195135172, "grad_norm": 5.882715225219727, "learning_rate": 8.874392497775345e-05, "loss": 0.9908, "step": 8973 }, { "epoch": 0.6080357747814893, "grad_norm": 7.928326606750488, "learning_rate": 8.874255595865563e-05, "loss": 1.0295, "step": 8974 }, { "epoch": 0.6081035300494614, "grad_norm": 8.489121437072754, "learning_rate": 8.874118693955781e-05, "loss": 0.9872, "step": 8975 }, { "epoch": 0.6081712853174335, "grad_norm": 5.946715354919434, "learning_rate": 8.873981792046e-05, "loss": 1.0244, "step": 8976 }, { "epoch": 0.6082390405854056, "grad_norm": 6.594414234161377, "learning_rate": 8.873844890136218e-05, "loss": 0.9772, "step": 8977 }, { "epoch": 0.6083067958533775, "grad_norm": 5.421601295471191, "learning_rate": 8.873707988226436e-05, "loss": 0.8735, "step": 8978 }, { "epoch": 0.6083745511213496, "grad_norm": 6.623301029205322, "learning_rate": 8.873571086316655e-05, "loss": 0.9308, "step": 8979 }, { "epoch": 0.6084423063893217, "grad_norm": 7.260282516479492, "learning_rate": 8.873434184406873e-05, "loss": 0.8352, "step": 8980 }, { "epoch": 0.6085100616572938, "grad_norm": 5.427891254425049, "learning_rate": 8.873297282497092e-05, "loss": 0.5448, "step": 8981 }, { "epoch": 0.6085778169252659, "grad_norm": 6.445272922515869, "learning_rate": 8.87316038058731e-05, "loss": 0.7464, "step": 8982 }, { "epoch": 0.608645572193238, "grad_norm": 5.404473304748535, "learning_rate": 8.873023478677528e-05, "loss": 0.8883, "step": 8983 }, { "epoch": 0.6087133274612101, "grad_norm": 5.924696445465088, "learning_rate": 8.872886576767746e-05, "loss": 0.9852, "step": 8984 }, { "epoch": 0.6087810827291822, "grad_norm": 7.119851112365723, "learning_rate": 8.872749674857964e-05, "loss": 0.8267, "step": 8985 }, { "epoch": 0.6088488379971543, "grad_norm": 6.306662559509277, "learning_rate": 8.872612772948183e-05, "loss": 0.8916, "step": 8986 }, { "epoch": 0.6089165932651264, "grad_norm": 6.999206066131592, "learning_rate": 8.872475871038402e-05, "loss": 0.9852, "step": 8987 }, { "epoch": 0.6089843485330985, "grad_norm": 7.93400239944458, "learning_rate": 8.87233896912862e-05, "loss": 0.7336, "step": 8988 }, { "epoch": 0.6090521038010706, "grad_norm": 6.08065938949585, "learning_rate": 8.872202067218838e-05, "loss": 0.8714, "step": 8989 }, { "epoch": 0.6091198590690426, "grad_norm": 8.639623641967773, "learning_rate": 8.872065165309057e-05, "loss": 0.7973, "step": 8990 }, { "epoch": 0.6091876143370147, "grad_norm": 6.663565158843994, "learning_rate": 8.871928263399275e-05, "loss": 0.7789, "step": 8991 }, { "epoch": 0.6092553696049868, "grad_norm": 5.1584153175354, "learning_rate": 8.871791361489493e-05, "loss": 0.7933, "step": 8992 }, { "epoch": 0.6093231248729589, "grad_norm": 5.59836483001709, "learning_rate": 8.871654459579711e-05, "loss": 0.731, "step": 8993 }, { "epoch": 0.609390880140931, "grad_norm": 6.6651482582092285, "learning_rate": 8.871517557669929e-05, "loss": 0.624, "step": 8994 }, { "epoch": 0.609458635408903, "grad_norm": 5.002302169799805, "learning_rate": 8.871380655760148e-05, "loss": 0.6816, "step": 8995 }, { "epoch": 0.6095263906768751, "grad_norm": 7.68015718460083, "learning_rate": 8.871243753850367e-05, "loss": 0.7353, "step": 8996 }, { "epoch": 0.6095941459448472, "grad_norm": 6.140766143798828, "learning_rate": 8.871106851940585e-05, "loss": 0.7172, "step": 8997 }, { "epoch": 0.6096619012128193, "grad_norm": 6.370824813842773, "learning_rate": 8.870969950030803e-05, "loss": 0.7664, "step": 8998 }, { "epoch": 0.6097296564807914, "grad_norm": 6.050932884216309, "learning_rate": 8.870833048121022e-05, "loss": 0.8182, "step": 8999 }, { "epoch": 0.6097974117487635, "grad_norm": 6.733234882354736, "learning_rate": 8.87069614621124e-05, "loss": 0.8736, "step": 9000 }, { "epoch": 0.6098651670167355, "grad_norm": 5.3225417137146, "learning_rate": 8.870559244301458e-05, "loss": 0.737, "step": 9001 }, { "epoch": 0.6099329222847076, "grad_norm": 5.601250171661377, "learning_rate": 8.870422342391676e-05, "loss": 0.7802, "step": 9002 }, { "epoch": 0.6100006775526797, "grad_norm": 4.972486972808838, "learning_rate": 8.870285440481894e-05, "loss": 0.6247, "step": 9003 }, { "epoch": 0.6100684328206518, "grad_norm": 6.177496433258057, "learning_rate": 8.870148538572114e-05, "loss": 0.8745, "step": 9004 }, { "epoch": 0.6101361880886239, "grad_norm": 5.5521321296691895, "learning_rate": 8.870011636662332e-05, "loss": 0.7281, "step": 9005 }, { "epoch": 0.610203943356596, "grad_norm": 5.7942705154418945, "learning_rate": 8.86987473475255e-05, "loss": 0.8116, "step": 9006 }, { "epoch": 0.6102716986245681, "grad_norm": 6.355384349822998, "learning_rate": 8.869737832842768e-05, "loss": 0.9577, "step": 9007 }, { "epoch": 0.6103394538925402, "grad_norm": 9.047319412231445, "learning_rate": 8.869600930932987e-05, "loss": 0.7913, "step": 9008 }, { "epoch": 0.6104072091605123, "grad_norm": 6.6220855712890625, "learning_rate": 8.869464029023205e-05, "loss": 0.8176, "step": 9009 }, { "epoch": 0.6104749644284844, "grad_norm": 7.567444324493408, "learning_rate": 8.869327127113423e-05, "loss": 0.9745, "step": 9010 }, { "epoch": 0.6105427196964563, "grad_norm": 7.12336540222168, "learning_rate": 8.869190225203642e-05, "loss": 0.7816, "step": 9011 }, { "epoch": 0.6106104749644284, "grad_norm": 5.867663860321045, "learning_rate": 8.86905332329386e-05, "loss": 0.8209, "step": 9012 }, { "epoch": 0.6106782302324005, "grad_norm": 6.362299919128418, "learning_rate": 8.868916421384079e-05, "loss": 0.8131, "step": 9013 }, { "epoch": 0.6107459855003726, "grad_norm": 6.171093463897705, "learning_rate": 8.868779519474298e-05, "loss": 0.783, "step": 9014 }, { "epoch": 0.6108137407683447, "grad_norm": 5.57711124420166, "learning_rate": 8.868642617564516e-05, "loss": 0.8022, "step": 9015 }, { "epoch": 0.6108814960363168, "grad_norm": 6.855584144592285, "learning_rate": 8.868505715654734e-05, "loss": 0.8162, "step": 9016 }, { "epoch": 0.6109492513042889, "grad_norm": 5.897914886474609, "learning_rate": 8.868368813744952e-05, "loss": 0.7343, "step": 9017 }, { "epoch": 0.611017006572261, "grad_norm": 5.253844261169434, "learning_rate": 8.868231911835171e-05, "loss": 0.7384, "step": 9018 }, { "epoch": 0.6110847618402331, "grad_norm": 5.258492469787598, "learning_rate": 8.86809500992539e-05, "loss": 0.7803, "step": 9019 }, { "epoch": 0.6111525171082052, "grad_norm": 5.407173156738281, "learning_rate": 8.867958108015607e-05, "loss": 0.7416, "step": 9020 }, { "epoch": 0.6112202723761773, "grad_norm": 7.5746283531188965, "learning_rate": 8.867821206105826e-05, "loss": 0.8327, "step": 9021 }, { "epoch": 0.6112880276441494, "grad_norm": 6.094844818115234, "learning_rate": 8.867684304196045e-05, "loss": 0.76, "step": 9022 }, { "epoch": 0.6113557829121214, "grad_norm": 6.436967849731445, "learning_rate": 8.867547402286263e-05, "loss": 0.8966, "step": 9023 }, { "epoch": 0.6114235381800935, "grad_norm": 4.939077377319336, "learning_rate": 8.867410500376481e-05, "loss": 0.8912, "step": 9024 }, { "epoch": 0.6114912934480656, "grad_norm": 6.9223127365112305, "learning_rate": 8.867273598466699e-05, "loss": 0.9135, "step": 9025 }, { "epoch": 0.6115590487160377, "grad_norm": 5.3313517570495605, "learning_rate": 8.867136696556917e-05, "loss": 0.7065, "step": 9026 }, { "epoch": 0.6116268039840097, "grad_norm": 5.880809783935547, "learning_rate": 8.866999794647136e-05, "loss": 0.8402, "step": 9027 }, { "epoch": 0.6116945592519818, "grad_norm": 5.272537708282471, "learning_rate": 8.866862892737354e-05, "loss": 0.6471, "step": 9028 }, { "epoch": 0.6117623145199539, "grad_norm": 4.6608147621154785, "learning_rate": 8.866725990827572e-05, "loss": 0.663, "step": 9029 }, { "epoch": 0.611830069787926, "grad_norm": 7.082324981689453, "learning_rate": 8.86658908891779e-05, "loss": 0.7523, "step": 9030 }, { "epoch": 0.6118978250558981, "grad_norm": 5.356766223907471, "learning_rate": 8.86645218700801e-05, "loss": 0.8098, "step": 9031 }, { "epoch": 0.6119655803238702, "grad_norm": 4.575824737548828, "learning_rate": 8.866315285098228e-05, "loss": 0.7311, "step": 9032 }, { "epoch": 0.6120333355918423, "grad_norm": 6.3496012687683105, "learning_rate": 8.866178383188446e-05, "loss": 0.9489, "step": 9033 }, { "epoch": 0.6121010908598143, "grad_norm": 5.932507038116455, "learning_rate": 8.866041481278664e-05, "loss": 0.7362, "step": 9034 }, { "epoch": 0.6121688461277864, "grad_norm": 6.566854000091553, "learning_rate": 8.865904579368882e-05, "loss": 0.9414, "step": 9035 }, { "epoch": 0.6122366013957585, "grad_norm": 6.274519920349121, "learning_rate": 8.865767677459101e-05, "loss": 0.9277, "step": 9036 }, { "epoch": 0.6123043566637306, "grad_norm": 6.512722492218018, "learning_rate": 8.86563077554932e-05, "loss": 0.8095, "step": 9037 }, { "epoch": 0.6123721119317027, "grad_norm": 5.9804558753967285, "learning_rate": 8.865493873639538e-05, "loss": 0.7803, "step": 9038 }, { "epoch": 0.6124398671996748, "grad_norm": 7.561446189880371, "learning_rate": 8.865356971729756e-05, "loss": 0.8143, "step": 9039 }, { "epoch": 0.6125076224676469, "grad_norm": 6.611248016357422, "learning_rate": 8.865220069819974e-05, "loss": 0.813, "step": 9040 }, { "epoch": 0.612575377735619, "grad_norm": 6.7652764320373535, "learning_rate": 8.865083167910193e-05, "loss": 0.8242, "step": 9041 }, { "epoch": 0.6126431330035911, "grad_norm": 4.964203834533691, "learning_rate": 8.864946266000411e-05, "loss": 0.6155, "step": 9042 }, { "epoch": 0.6127108882715631, "grad_norm": 5.260312080383301, "learning_rate": 8.864809364090629e-05, "loss": 0.694, "step": 9043 }, { "epoch": 0.6127786435395352, "grad_norm": 6.220287799835205, "learning_rate": 8.864672462180847e-05, "loss": 0.7783, "step": 9044 }, { "epoch": 0.6128463988075072, "grad_norm": 6.5388078689575195, "learning_rate": 8.864535560271066e-05, "loss": 0.6654, "step": 9045 }, { "epoch": 0.6129141540754793, "grad_norm": 6.570968151092529, "learning_rate": 8.864398658361284e-05, "loss": 1.15, "step": 9046 }, { "epoch": 0.6129819093434514, "grad_norm": 5.285862445831299, "learning_rate": 8.864261756451503e-05, "loss": 0.5951, "step": 9047 }, { "epoch": 0.6130496646114235, "grad_norm": 6.997344017028809, "learning_rate": 8.86412485454172e-05, "loss": 0.8675, "step": 9048 }, { "epoch": 0.6131174198793956, "grad_norm": 7.796441555023193, "learning_rate": 8.863987952631939e-05, "loss": 0.7794, "step": 9049 }, { "epoch": 0.6131851751473677, "grad_norm": 5.723931789398193, "learning_rate": 8.863851050722158e-05, "loss": 0.7727, "step": 9050 }, { "epoch": 0.6132529304153398, "grad_norm": 6.294017791748047, "learning_rate": 8.863714148812376e-05, "loss": 0.9198, "step": 9051 }, { "epoch": 0.6133206856833119, "grad_norm": 7.23032808303833, "learning_rate": 8.863577246902594e-05, "loss": 0.9892, "step": 9052 }, { "epoch": 0.613388440951284, "grad_norm": 5.533211708068848, "learning_rate": 8.863440344992812e-05, "loss": 0.7416, "step": 9053 }, { "epoch": 0.6134561962192561, "grad_norm": 5.181722164154053, "learning_rate": 8.863303443083031e-05, "loss": 0.7471, "step": 9054 }, { "epoch": 0.6135239514872282, "grad_norm": 5.428781986236572, "learning_rate": 8.86316654117325e-05, "loss": 0.8347, "step": 9055 }, { "epoch": 0.6135917067552003, "grad_norm": 7.403633117675781, "learning_rate": 8.863029639263468e-05, "loss": 0.9809, "step": 9056 }, { "epoch": 0.6136594620231723, "grad_norm": 6.061591625213623, "learning_rate": 8.862892737353687e-05, "loss": 0.7752, "step": 9057 }, { "epoch": 0.6137272172911444, "grad_norm": 6.5160393714904785, "learning_rate": 8.862755835443905e-05, "loss": 0.8982, "step": 9058 }, { "epoch": 0.6137949725591165, "grad_norm": 6.234467506408691, "learning_rate": 8.862618933534123e-05, "loss": 0.7428, "step": 9059 }, { "epoch": 0.6138627278270885, "grad_norm": 7.634365081787109, "learning_rate": 8.862482031624342e-05, "loss": 0.8113, "step": 9060 }, { "epoch": 0.6139304830950606, "grad_norm": 6.873602867126465, "learning_rate": 8.86234512971456e-05, "loss": 0.8789, "step": 9061 }, { "epoch": 0.6139982383630327, "grad_norm": 8.263740539550781, "learning_rate": 8.862208227804778e-05, "loss": 1.0911, "step": 9062 }, { "epoch": 0.6140659936310048, "grad_norm": 7.048012733459473, "learning_rate": 8.862071325894996e-05, "loss": 0.7947, "step": 9063 }, { "epoch": 0.6141337488989769, "grad_norm": 6.902647018432617, "learning_rate": 8.861934423985216e-05, "loss": 0.9391, "step": 9064 }, { "epoch": 0.614201504166949, "grad_norm": 7.542623043060303, "learning_rate": 8.861797522075434e-05, "loss": 0.8601, "step": 9065 }, { "epoch": 0.6142692594349211, "grad_norm": 6.404265403747559, "learning_rate": 8.861660620165652e-05, "loss": 0.7602, "step": 9066 }, { "epoch": 0.6143370147028931, "grad_norm": 5.523179054260254, "learning_rate": 8.86152371825587e-05, "loss": 0.7821, "step": 9067 }, { "epoch": 0.6144047699708652, "grad_norm": 5.909379005432129, "learning_rate": 8.86138681634609e-05, "loss": 0.788, "step": 9068 }, { "epoch": 0.6144725252388373, "grad_norm": 6.69690465927124, "learning_rate": 8.861249914436307e-05, "loss": 0.9958, "step": 9069 }, { "epoch": 0.6145402805068094, "grad_norm": 4.888934135437012, "learning_rate": 8.861113012526525e-05, "loss": 0.6594, "step": 9070 }, { "epoch": 0.6146080357747815, "grad_norm": 6.2297892570495605, "learning_rate": 8.860976110616743e-05, "loss": 0.7989, "step": 9071 }, { "epoch": 0.6146757910427536, "grad_norm": 6.807466506958008, "learning_rate": 8.860839208706962e-05, "loss": 0.9594, "step": 9072 }, { "epoch": 0.6147435463107257, "grad_norm": 5.524564266204834, "learning_rate": 8.860702306797181e-05, "loss": 0.9957, "step": 9073 }, { "epoch": 0.6148113015786978, "grad_norm": 6.862370014190674, "learning_rate": 8.860565404887399e-05, "loss": 0.8455, "step": 9074 }, { "epoch": 0.6148790568466699, "grad_norm": 6.885018825531006, "learning_rate": 8.860428502977617e-05, "loss": 0.8901, "step": 9075 }, { "epoch": 0.6149468121146419, "grad_norm": 5.2585344314575195, "learning_rate": 8.860291601067835e-05, "loss": 0.6791, "step": 9076 }, { "epoch": 0.615014567382614, "grad_norm": 6.821567058563232, "learning_rate": 8.860154699158054e-05, "loss": 1.1323, "step": 9077 }, { "epoch": 0.615082322650586, "grad_norm": 5.113526344299316, "learning_rate": 8.860017797248272e-05, "loss": 0.8949, "step": 9078 }, { "epoch": 0.6151500779185581, "grad_norm": 6.890782356262207, "learning_rate": 8.85988089533849e-05, "loss": 0.9682, "step": 9079 }, { "epoch": 0.6152178331865302, "grad_norm": 6.634467124938965, "learning_rate": 8.859743993428708e-05, "loss": 0.8697, "step": 9080 }, { "epoch": 0.6152855884545023, "grad_norm": 8.704490661621094, "learning_rate": 8.859607091518927e-05, "loss": 1.0182, "step": 9081 }, { "epoch": 0.6153533437224744, "grad_norm": 6.126344680786133, "learning_rate": 8.859470189609146e-05, "loss": 0.8234, "step": 9082 }, { "epoch": 0.6154210989904465, "grad_norm": 4.622169017791748, "learning_rate": 8.859333287699364e-05, "loss": 0.5793, "step": 9083 }, { "epoch": 0.6154888542584186, "grad_norm": 6.331384181976318, "learning_rate": 8.859196385789582e-05, "loss": 0.7669, "step": 9084 }, { "epoch": 0.6155566095263907, "grad_norm": 5.072127819061279, "learning_rate": 8.8590594838798e-05, "loss": 0.8827, "step": 9085 }, { "epoch": 0.6156243647943628, "grad_norm": 5.524892330169678, "learning_rate": 8.85892258197002e-05, "loss": 0.5116, "step": 9086 }, { "epoch": 0.6156921200623349, "grad_norm": 7.506863594055176, "learning_rate": 8.858785680060237e-05, "loss": 1.1547, "step": 9087 }, { "epoch": 0.615759875330307, "grad_norm": 5.211267471313477, "learning_rate": 8.858648778150455e-05, "loss": 0.7001, "step": 9088 }, { "epoch": 0.615827630598279, "grad_norm": 5.154802322387695, "learning_rate": 8.858511876240674e-05, "loss": 0.6498, "step": 9089 }, { "epoch": 0.6158953858662511, "grad_norm": 6.631251335144043, "learning_rate": 8.858374974330892e-05, "loss": 0.7026, "step": 9090 }, { "epoch": 0.6159631411342232, "grad_norm": 6.677391529083252, "learning_rate": 8.858238072421111e-05, "loss": 0.8582, "step": 9091 }, { "epoch": 0.6160308964021952, "grad_norm": 5.569202423095703, "learning_rate": 8.858101170511329e-05, "loss": 0.7886, "step": 9092 }, { "epoch": 0.6160986516701673, "grad_norm": 6.454385280609131, "learning_rate": 8.857964268601547e-05, "loss": 0.7088, "step": 9093 }, { "epoch": 0.6161664069381394, "grad_norm": 6.360875606536865, "learning_rate": 8.857827366691765e-05, "loss": 0.7079, "step": 9094 }, { "epoch": 0.6162341622061115, "grad_norm": 6.538075923919678, "learning_rate": 8.857690464781983e-05, "loss": 0.851, "step": 9095 }, { "epoch": 0.6163019174740836, "grad_norm": 6.111706256866455, "learning_rate": 8.857553562872202e-05, "loss": 0.6732, "step": 9096 }, { "epoch": 0.6163696727420557, "grad_norm": 6.8173956871032715, "learning_rate": 8.85741666096242e-05, "loss": 0.8164, "step": 9097 }, { "epoch": 0.6164374280100278, "grad_norm": 6.984659194946289, "learning_rate": 8.857279759052639e-05, "loss": 0.7083, "step": 9098 }, { "epoch": 0.6165051832779999, "grad_norm": 5.256351947784424, "learning_rate": 8.857142857142857e-05, "loss": 0.7592, "step": 9099 }, { "epoch": 0.616572938545972, "grad_norm": 8.32116985321045, "learning_rate": 8.857005955233076e-05, "loss": 1.041, "step": 9100 }, { "epoch": 0.616640693813944, "grad_norm": 5.583303928375244, "learning_rate": 8.856869053323294e-05, "loss": 0.9433, "step": 9101 }, { "epoch": 0.6167084490819161, "grad_norm": 8.839009284973145, "learning_rate": 8.856732151413512e-05, "loss": 1.1276, "step": 9102 }, { "epoch": 0.6167762043498882, "grad_norm": 6.0189528465271, "learning_rate": 8.856595249503731e-05, "loss": 0.8986, "step": 9103 }, { "epoch": 0.6168439596178603, "grad_norm": 6.6191205978393555, "learning_rate": 8.85645834759395e-05, "loss": 0.8594, "step": 9104 }, { "epoch": 0.6169117148858324, "grad_norm": 7.318183898925781, "learning_rate": 8.856321445684167e-05, "loss": 0.936, "step": 9105 }, { "epoch": 0.6169794701538045, "grad_norm": 5.551211357116699, "learning_rate": 8.856184543774387e-05, "loss": 0.8986, "step": 9106 }, { "epoch": 0.6170472254217766, "grad_norm": 6.483643054962158, "learning_rate": 8.856047641864605e-05, "loss": 0.8394, "step": 9107 }, { "epoch": 0.6171149806897487, "grad_norm": 6.838339805603027, "learning_rate": 8.855910739954823e-05, "loss": 0.8305, "step": 9108 }, { "epoch": 0.6171827359577207, "grad_norm": 6.734610557556152, "learning_rate": 8.855773838045042e-05, "loss": 0.8967, "step": 9109 }, { "epoch": 0.6172504912256928, "grad_norm": 5.749598503112793, "learning_rate": 8.85563693613526e-05, "loss": 0.8226, "step": 9110 }, { "epoch": 0.6173182464936648, "grad_norm": 5.9449872970581055, "learning_rate": 8.855500034225478e-05, "loss": 0.7528, "step": 9111 }, { "epoch": 0.6173860017616369, "grad_norm": 7.123237133026123, "learning_rate": 8.855363132315696e-05, "loss": 0.8002, "step": 9112 }, { "epoch": 0.617453757029609, "grad_norm": 6.046530723571777, "learning_rate": 8.855226230405914e-05, "loss": 0.7788, "step": 9113 }, { "epoch": 0.6175215122975811, "grad_norm": 5.466145992279053, "learning_rate": 8.855089328496134e-05, "loss": 0.6303, "step": 9114 }, { "epoch": 0.6175892675655532, "grad_norm": 6.014889717102051, "learning_rate": 8.854952426586352e-05, "loss": 0.9445, "step": 9115 }, { "epoch": 0.6176570228335253, "grad_norm": 6.305761814117432, "learning_rate": 8.85481552467657e-05, "loss": 0.7314, "step": 9116 }, { "epoch": 0.6177247781014974, "grad_norm": 7.205751895904541, "learning_rate": 8.854678622766788e-05, "loss": 0.7635, "step": 9117 }, { "epoch": 0.6177925333694695, "grad_norm": 7.25909948348999, "learning_rate": 8.854541720857006e-05, "loss": 0.8655, "step": 9118 }, { "epoch": 0.6178602886374416, "grad_norm": 6.141407489776611, "learning_rate": 8.854404818947225e-05, "loss": 0.7744, "step": 9119 }, { "epoch": 0.6179280439054137, "grad_norm": 7.026790142059326, "learning_rate": 8.854267917037443e-05, "loss": 0.6679, "step": 9120 }, { "epoch": 0.6179957991733858, "grad_norm": 6.5543437004089355, "learning_rate": 8.854131015127661e-05, "loss": 0.7657, "step": 9121 }, { "epoch": 0.6180635544413579, "grad_norm": 6.620877742767334, "learning_rate": 8.85399411321788e-05, "loss": 0.7058, "step": 9122 }, { "epoch": 0.61813130970933, "grad_norm": 6.877584457397461, "learning_rate": 8.853857211308099e-05, "loss": 0.8799, "step": 9123 }, { "epoch": 0.618199064977302, "grad_norm": 6.836280822753906, "learning_rate": 8.853720309398317e-05, "loss": 0.7901, "step": 9124 }, { "epoch": 0.618266820245274, "grad_norm": 5.966172695159912, "learning_rate": 8.853583407488535e-05, "loss": 0.6202, "step": 9125 }, { "epoch": 0.6183345755132461, "grad_norm": 6.592352867126465, "learning_rate": 8.853446505578753e-05, "loss": 0.8176, "step": 9126 }, { "epoch": 0.6184023307812182, "grad_norm": 7.010197639465332, "learning_rate": 8.853309603668971e-05, "loss": 0.8645, "step": 9127 }, { "epoch": 0.6184700860491903, "grad_norm": 6.962997913360596, "learning_rate": 8.85317270175919e-05, "loss": 0.7481, "step": 9128 }, { "epoch": 0.6185378413171624, "grad_norm": 6.8080244064331055, "learning_rate": 8.853035799849408e-05, "loss": 0.8811, "step": 9129 }, { "epoch": 0.6186055965851345, "grad_norm": 5.936764240264893, "learning_rate": 8.852898897939626e-05, "loss": 0.6795, "step": 9130 }, { "epoch": 0.6186733518531066, "grad_norm": 6.132038116455078, "learning_rate": 8.852761996029844e-05, "loss": 0.9207, "step": 9131 }, { "epoch": 0.6187411071210787, "grad_norm": 6.451957702636719, "learning_rate": 8.852625094120064e-05, "loss": 0.7098, "step": 9132 }, { "epoch": 0.6188088623890508, "grad_norm": 6.429556369781494, "learning_rate": 8.852488192210282e-05, "loss": 0.8215, "step": 9133 }, { "epoch": 0.6188766176570228, "grad_norm": 5.414734363555908, "learning_rate": 8.8523512903005e-05, "loss": 0.7205, "step": 9134 }, { "epoch": 0.6189443729249949, "grad_norm": 7.48563289642334, "learning_rate": 8.852214388390718e-05, "loss": 0.9738, "step": 9135 }, { "epoch": 0.619012128192967, "grad_norm": 7.014744758605957, "learning_rate": 8.852077486480936e-05, "loss": 0.7494, "step": 9136 }, { "epoch": 0.6190798834609391, "grad_norm": 9.400522232055664, "learning_rate": 8.851940584571155e-05, "loss": 0.8805, "step": 9137 }, { "epoch": 0.6191476387289112, "grad_norm": 5.838070869445801, "learning_rate": 8.851803682661373e-05, "loss": 0.7817, "step": 9138 }, { "epoch": 0.6192153939968833, "grad_norm": 6.750514984130859, "learning_rate": 8.851666780751591e-05, "loss": 0.8173, "step": 9139 }, { "epoch": 0.6192831492648554, "grad_norm": 8.359286308288574, "learning_rate": 8.85152987884181e-05, "loss": 0.8902, "step": 9140 }, { "epoch": 0.6193509045328274, "grad_norm": 4.75096321105957, "learning_rate": 8.851392976932029e-05, "loss": 0.6834, "step": 9141 }, { "epoch": 0.6194186598007995, "grad_norm": 6.427238464355469, "learning_rate": 8.851256075022247e-05, "loss": 0.7946, "step": 9142 }, { "epoch": 0.6194864150687716, "grad_norm": 6.961672782897949, "learning_rate": 8.851119173112465e-05, "loss": 0.6882, "step": 9143 }, { "epoch": 0.6195541703367436, "grad_norm": 7.266171455383301, "learning_rate": 8.850982271202683e-05, "loss": 0.8905, "step": 9144 }, { "epoch": 0.6196219256047157, "grad_norm": 6.961668491363525, "learning_rate": 8.850845369292901e-05, "loss": 0.7874, "step": 9145 }, { "epoch": 0.6196896808726878, "grad_norm": 6.123990535736084, "learning_rate": 8.85070846738312e-05, "loss": 0.7126, "step": 9146 }, { "epoch": 0.6197574361406599, "grad_norm": 7.221274375915527, "learning_rate": 8.850571565473338e-05, "loss": 0.8443, "step": 9147 }, { "epoch": 0.619825191408632, "grad_norm": 6.273942947387695, "learning_rate": 8.850434663563556e-05, "loss": 0.8795, "step": 9148 }, { "epoch": 0.6198929466766041, "grad_norm": 6.505329608917236, "learning_rate": 8.850297761653776e-05, "loss": 0.845, "step": 9149 }, { "epoch": 0.6199607019445762, "grad_norm": 6.682892322540283, "learning_rate": 8.850160859743994e-05, "loss": 0.8908, "step": 9150 }, { "epoch": 0.6200284572125483, "grad_norm": 6.669549942016602, "learning_rate": 8.850023957834212e-05, "loss": 0.8027, "step": 9151 }, { "epoch": 0.6200962124805204, "grad_norm": 4.51635217666626, "learning_rate": 8.849887055924431e-05, "loss": 0.6746, "step": 9152 }, { "epoch": 0.6201639677484925, "grad_norm": 7.118916988372803, "learning_rate": 8.84975015401465e-05, "loss": 1.1932, "step": 9153 }, { "epoch": 0.6202317230164646, "grad_norm": 5.470358371734619, "learning_rate": 8.849613252104867e-05, "loss": 0.9683, "step": 9154 }, { "epoch": 0.6202994782844367, "grad_norm": 5.723430633544922, "learning_rate": 8.849476350195087e-05, "loss": 0.9483, "step": 9155 }, { "epoch": 0.6203672335524087, "grad_norm": 6.195518493652344, "learning_rate": 8.849339448285305e-05, "loss": 0.6756, "step": 9156 }, { "epoch": 0.6204349888203808, "grad_norm": 5.686561584472656, "learning_rate": 8.849202546375523e-05, "loss": 0.6946, "step": 9157 }, { "epoch": 0.6205027440883528, "grad_norm": 6.569991588592529, "learning_rate": 8.849065644465741e-05, "loss": 0.8295, "step": 9158 }, { "epoch": 0.6205704993563249, "grad_norm": 7.306698799133301, "learning_rate": 8.848928742555959e-05, "loss": 1.0515, "step": 9159 }, { "epoch": 0.620638254624297, "grad_norm": 7.042558670043945, "learning_rate": 8.848791840646178e-05, "loss": 1.0191, "step": 9160 }, { "epoch": 0.6207060098922691, "grad_norm": 6.552324295043945, "learning_rate": 8.848654938736396e-05, "loss": 0.7424, "step": 9161 }, { "epoch": 0.6207737651602412, "grad_norm": 7.646967887878418, "learning_rate": 8.848518036826614e-05, "loss": 0.8457, "step": 9162 }, { "epoch": 0.6208415204282133, "grad_norm": 6.561422348022461, "learning_rate": 8.848381134916832e-05, "loss": 0.7605, "step": 9163 }, { "epoch": 0.6209092756961854, "grad_norm": 7.425536155700684, "learning_rate": 8.848244233007052e-05, "loss": 0.8042, "step": 9164 }, { "epoch": 0.6209770309641575, "grad_norm": 5.6792521476745605, "learning_rate": 8.84810733109727e-05, "loss": 0.8775, "step": 9165 }, { "epoch": 0.6210447862321296, "grad_norm": 6.988245010375977, "learning_rate": 8.847970429187488e-05, "loss": 0.7295, "step": 9166 }, { "epoch": 0.6211125415001016, "grad_norm": 4.709329605102539, "learning_rate": 8.847833527277706e-05, "loss": 0.6508, "step": 9167 }, { "epoch": 0.6211802967680737, "grad_norm": 7.601302623748779, "learning_rate": 8.847696625367924e-05, "loss": 0.6558, "step": 9168 }, { "epoch": 0.6212480520360458, "grad_norm": 6.5276007652282715, "learning_rate": 8.847559723458143e-05, "loss": 0.6851, "step": 9169 }, { "epoch": 0.6213158073040179, "grad_norm": 6.256960391998291, "learning_rate": 8.847422821548361e-05, "loss": 0.9257, "step": 9170 }, { "epoch": 0.62138356257199, "grad_norm": 7.133554935455322, "learning_rate": 8.84728591963858e-05, "loss": 1.0145, "step": 9171 }, { "epoch": 0.6214513178399621, "grad_norm": 7.465946674346924, "learning_rate": 8.847149017728797e-05, "loss": 1.1211, "step": 9172 }, { "epoch": 0.6215190731079342, "grad_norm": 7.24479341506958, "learning_rate": 8.847012115819015e-05, "loss": 0.787, "step": 9173 }, { "epoch": 0.6215868283759062, "grad_norm": 6.052404403686523, "learning_rate": 8.846875213909235e-05, "loss": 0.7718, "step": 9174 }, { "epoch": 0.6216545836438783, "grad_norm": 7.27697229385376, "learning_rate": 8.846738311999453e-05, "loss": 0.8102, "step": 9175 }, { "epoch": 0.6217223389118504, "grad_norm": 6.269348621368408, "learning_rate": 8.846601410089671e-05, "loss": 0.9089, "step": 9176 }, { "epoch": 0.6217900941798225, "grad_norm": 5.706981658935547, "learning_rate": 8.846464508179889e-05, "loss": 0.8213, "step": 9177 }, { "epoch": 0.6218578494477945, "grad_norm": 7.0556817054748535, "learning_rate": 8.846327606270108e-05, "loss": 0.696, "step": 9178 }, { "epoch": 0.6219256047157666, "grad_norm": 5.418951034545898, "learning_rate": 8.846190704360326e-05, "loss": 0.6344, "step": 9179 }, { "epoch": 0.6219933599837387, "grad_norm": 6.63913106918335, "learning_rate": 8.846053802450544e-05, "loss": 0.8222, "step": 9180 }, { "epoch": 0.6220611152517108, "grad_norm": 6.560824394226074, "learning_rate": 8.845916900540762e-05, "loss": 0.7443, "step": 9181 }, { "epoch": 0.6221288705196829, "grad_norm": 6.946655750274658, "learning_rate": 8.84577999863098e-05, "loss": 1.0413, "step": 9182 }, { "epoch": 0.622196625787655, "grad_norm": 5.037294864654541, "learning_rate": 8.8456430967212e-05, "loss": 0.7489, "step": 9183 }, { "epoch": 0.6222643810556271, "grad_norm": 6.396673202514648, "learning_rate": 8.845506194811418e-05, "loss": 0.768, "step": 9184 }, { "epoch": 0.6223321363235992, "grad_norm": 6.354964733123779, "learning_rate": 8.845369292901636e-05, "loss": 1.0378, "step": 9185 }, { "epoch": 0.6223998915915713, "grad_norm": 7.494623184204102, "learning_rate": 8.845232390991854e-05, "loss": 0.763, "step": 9186 }, { "epoch": 0.6224676468595434, "grad_norm": 6.109148025512695, "learning_rate": 8.845095489082073e-05, "loss": 0.7609, "step": 9187 }, { "epoch": 0.6225354021275155, "grad_norm": 7.039491653442383, "learning_rate": 8.844958587172291e-05, "loss": 0.7999, "step": 9188 }, { "epoch": 0.6226031573954875, "grad_norm": 5.7005486488342285, "learning_rate": 8.84482168526251e-05, "loss": 0.7632, "step": 9189 }, { "epoch": 0.6226709126634595, "grad_norm": 6.816334247589111, "learning_rate": 8.844684783352727e-05, "loss": 0.6813, "step": 9190 }, { "epoch": 0.6227386679314316, "grad_norm": 7.970419883728027, "learning_rate": 8.844547881442946e-05, "loss": 0.5728, "step": 9191 }, { "epoch": 0.6228064231994037, "grad_norm": 6.688904285430908, "learning_rate": 8.844410979533165e-05, "loss": 0.7974, "step": 9192 }, { "epoch": 0.6228741784673758, "grad_norm": 6.076619625091553, "learning_rate": 8.844274077623383e-05, "loss": 0.7217, "step": 9193 }, { "epoch": 0.6229419337353479, "grad_norm": 7.09970760345459, "learning_rate": 8.844137175713601e-05, "loss": 0.8479, "step": 9194 }, { "epoch": 0.62300968900332, "grad_norm": 7.291125297546387, "learning_rate": 8.84400027380382e-05, "loss": 0.9391, "step": 9195 }, { "epoch": 0.6230774442712921, "grad_norm": 6.2532148361206055, "learning_rate": 8.843863371894038e-05, "loss": 0.9657, "step": 9196 }, { "epoch": 0.6231451995392642, "grad_norm": 6.567989349365234, "learning_rate": 8.843726469984256e-05, "loss": 0.767, "step": 9197 }, { "epoch": 0.6232129548072363, "grad_norm": 5.543100357055664, "learning_rate": 8.843589568074476e-05, "loss": 0.7537, "step": 9198 }, { "epoch": 0.6232807100752084, "grad_norm": 7.001931190490723, "learning_rate": 8.843452666164694e-05, "loss": 0.8386, "step": 9199 }, { "epoch": 0.6233484653431804, "grad_norm": 6.852741718292236, "learning_rate": 8.843315764254912e-05, "loss": 0.9314, "step": 9200 }, { "epoch": 0.6234162206111525, "grad_norm": 6.225865364074707, "learning_rate": 8.843178862345131e-05, "loss": 0.8302, "step": 9201 }, { "epoch": 0.6234839758791246, "grad_norm": 7.8117594718933105, "learning_rate": 8.843041960435349e-05, "loss": 0.9156, "step": 9202 }, { "epoch": 0.6235517311470967, "grad_norm": 7.585949420928955, "learning_rate": 8.842905058525567e-05, "loss": 0.9901, "step": 9203 }, { "epoch": 0.6236194864150688, "grad_norm": 5.7599945068359375, "learning_rate": 8.842768156615785e-05, "loss": 0.8697, "step": 9204 }, { "epoch": 0.6236872416830409, "grad_norm": 6.114898204803467, "learning_rate": 8.842631254706003e-05, "loss": 0.917, "step": 9205 }, { "epoch": 0.623754996951013, "grad_norm": 6.57565450668335, "learning_rate": 8.842494352796223e-05, "loss": 0.9872, "step": 9206 }, { "epoch": 0.623822752218985, "grad_norm": 6.324807643890381, "learning_rate": 8.842357450886441e-05, "loss": 0.804, "step": 9207 }, { "epoch": 0.6238905074869571, "grad_norm": 7.262860298156738, "learning_rate": 8.842220548976659e-05, "loss": 0.8051, "step": 9208 }, { "epoch": 0.6239582627549292, "grad_norm": 6.4210968017578125, "learning_rate": 8.842083647066877e-05, "loss": 0.9117, "step": 9209 }, { "epoch": 0.6240260180229013, "grad_norm": 5.8516154289245605, "learning_rate": 8.841946745157096e-05, "loss": 0.7292, "step": 9210 }, { "epoch": 0.6240937732908733, "grad_norm": 7.249476909637451, "learning_rate": 8.841809843247314e-05, "loss": 0.7149, "step": 9211 }, { "epoch": 0.6241615285588454, "grad_norm": 6.276209831237793, "learning_rate": 8.841672941337532e-05, "loss": 0.8576, "step": 9212 }, { "epoch": 0.6242292838268175, "grad_norm": 7.875953674316406, "learning_rate": 8.84153603942775e-05, "loss": 0.8345, "step": 9213 }, { "epoch": 0.6242970390947896, "grad_norm": 6.010235786437988, "learning_rate": 8.841399137517968e-05, "loss": 0.8376, "step": 9214 }, { "epoch": 0.6243647943627617, "grad_norm": 6.451040744781494, "learning_rate": 8.841262235608188e-05, "loss": 0.8943, "step": 9215 }, { "epoch": 0.6244325496307338, "grad_norm": 7.024655342102051, "learning_rate": 8.841125333698406e-05, "loss": 0.634, "step": 9216 }, { "epoch": 0.6245003048987059, "grad_norm": 7.169036865234375, "learning_rate": 8.840988431788624e-05, "loss": 0.8139, "step": 9217 }, { "epoch": 0.624568060166678, "grad_norm": 5.163486003875732, "learning_rate": 8.840851529878842e-05, "loss": 0.847, "step": 9218 }, { "epoch": 0.6246358154346501, "grad_norm": 6.5144429206848145, "learning_rate": 8.840714627969061e-05, "loss": 0.8632, "step": 9219 }, { "epoch": 0.6247035707026222, "grad_norm": 8.250146865844727, "learning_rate": 8.84057772605928e-05, "loss": 1.1546, "step": 9220 }, { "epoch": 0.6247713259705943, "grad_norm": 5.385178565979004, "learning_rate": 8.840440824149497e-05, "loss": 1.2391, "step": 9221 }, { "epoch": 0.6248390812385664, "grad_norm": 6.536712169647217, "learning_rate": 8.840303922239715e-05, "loss": 0.8697, "step": 9222 }, { "epoch": 0.6249068365065383, "grad_norm": 5.161468982696533, "learning_rate": 8.840167020329933e-05, "loss": 0.6399, "step": 9223 }, { "epoch": 0.6249745917745104, "grad_norm": 6.684597969055176, "learning_rate": 8.840030118420153e-05, "loss": 0.8301, "step": 9224 }, { "epoch": 0.6250423470424825, "grad_norm": 7.047337055206299, "learning_rate": 8.839893216510371e-05, "loss": 1.0555, "step": 9225 }, { "epoch": 0.6251101023104546, "grad_norm": 6.454337120056152, "learning_rate": 8.839756314600589e-05, "loss": 0.7768, "step": 9226 }, { "epoch": 0.6251778575784267, "grad_norm": 5.521293640136719, "learning_rate": 8.839619412690807e-05, "loss": 0.8204, "step": 9227 }, { "epoch": 0.6252456128463988, "grad_norm": 7.6041364669799805, "learning_rate": 8.839482510781025e-05, "loss": 1.0063, "step": 9228 }, { "epoch": 0.6253133681143709, "grad_norm": 6.339493751525879, "learning_rate": 8.839345608871244e-05, "loss": 0.6371, "step": 9229 }, { "epoch": 0.625381123382343, "grad_norm": 6.844937324523926, "learning_rate": 8.839208706961462e-05, "loss": 0.7956, "step": 9230 }, { "epoch": 0.6254488786503151, "grad_norm": 5.3728461265563965, "learning_rate": 8.83907180505168e-05, "loss": 0.765, "step": 9231 }, { "epoch": 0.6255166339182872, "grad_norm": 9.041521072387695, "learning_rate": 8.838934903141898e-05, "loss": 0.6906, "step": 9232 }, { "epoch": 0.6255843891862592, "grad_norm": 7.101466178894043, "learning_rate": 8.838798001232118e-05, "loss": 1.174, "step": 9233 }, { "epoch": 0.6256521444542313, "grad_norm": 7.049058437347412, "learning_rate": 8.838661099322336e-05, "loss": 0.7884, "step": 9234 }, { "epoch": 0.6257198997222034, "grad_norm": 6.012722969055176, "learning_rate": 8.838524197412554e-05, "loss": 0.7543, "step": 9235 }, { "epoch": 0.6257876549901755, "grad_norm": 8.046865463256836, "learning_rate": 8.838387295502772e-05, "loss": 0.7745, "step": 9236 }, { "epoch": 0.6258554102581476, "grad_norm": 6.681391716003418, "learning_rate": 8.83825039359299e-05, "loss": 0.8218, "step": 9237 }, { "epoch": 0.6259231655261197, "grad_norm": 8.432860374450684, "learning_rate": 8.83811349168321e-05, "loss": 0.84, "step": 9238 }, { "epoch": 0.6259909207940917, "grad_norm": 7.283944606781006, "learning_rate": 8.837976589773427e-05, "loss": 0.7066, "step": 9239 }, { "epoch": 0.6260586760620638, "grad_norm": 6.028378009796143, "learning_rate": 8.837839687863645e-05, "loss": 0.7066, "step": 9240 }, { "epoch": 0.6261264313300359, "grad_norm": 5.715835094451904, "learning_rate": 8.837702785953863e-05, "loss": 0.9115, "step": 9241 }, { "epoch": 0.626194186598008, "grad_norm": 5.851448059082031, "learning_rate": 8.837565884044083e-05, "loss": 0.7464, "step": 9242 }, { "epoch": 0.62626194186598, "grad_norm": 7.112005710601807, "learning_rate": 8.837428982134301e-05, "loss": 0.8635, "step": 9243 }, { "epoch": 0.6263296971339521, "grad_norm": 6.428948879241943, "learning_rate": 8.837292080224519e-05, "loss": 0.9229, "step": 9244 }, { "epoch": 0.6263974524019242, "grad_norm": 5.358401298522949, "learning_rate": 8.837155178314738e-05, "loss": 0.5362, "step": 9245 }, { "epoch": 0.6264652076698963, "grad_norm": 5.889663219451904, "learning_rate": 8.837018276404956e-05, "loss": 0.995, "step": 9246 }, { "epoch": 0.6265329629378684, "grad_norm": 5.585958480834961, "learning_rate": 8.836881374495174e-05, "loss": 0.8504, "step": 9247 }, { "epoch": 0.6266007182058405, "grad_norm": 6.147828102111816, "learning_rate": 8.836744472585394e-05, "loss": 0.8975, "step": 9248 }, { "epoch": 0.6266684734738126, "grad_norm": 6.951436996459961, "learning_rate": 8.836607570675612e-05, "loss": 1.063, "step": 9249 }, { "epoch": 0.6267362287417847, "grad_norm": 6.059296131134033, "learning_rate": 8.83647066876583e-05, "loss": 0.815, "step": 9250 }, { "epoch": 0.6268039840097568, "grad_norm": 8.177648544311523, "learning_rate": 8.836333766856048e-05, "loss": 0.9035, "step": 9251 }, { "epoch": 0.6268717392777289, "grad_norm": 5.385120868682861, "learning_rate": 8.836196864946267e-05, "loss": 0.5322, "step": 9252 }, { "epoch": 0.626939494545701, "grad_norm": 9.279441833496094, "learning_rate": 8.836059963036485e-05, "loss": 0.8954, "step": 9253 }, { "epoch": 0.6270072498136731, "grad_norm": 6.143721103668213, "learning_rate": 8.835923061126703e-05, "loss": 0.7225, "step": 9254 }, { "epoch": 0.627075005081645, "grad_norm": 7.277685642242432, "learning_rate": 8.835786159216921e-05, "loss": 0.769, "step": 9255 }, { "epoch": 0.6271427603496171, "grad_norm": 6.348362445831299, "learning_rate": 8.835649257307141e-05, "loss": 0.8076, "step": 9256 }, { "epoch": 0.6272105156175892, "grad_norm": 5.717894077301025, "learning_rate": 8.835512355397359e-05, "loss": 0.5646, "step": 9257 }, { "epoch": 0.6272782708855613, "grad_norm": 5.959596157073975, "learning_rate": 8.835375453487577e-05, "loss": 0.8393, "step": 9258 }, { "epoch": 0.6273460261535334, "grad_norm": 6.298025131225586, "learning_rate": 8.835238551577795e-05, "loss": 0.6772, "step": 9259 }, { "epoch": 0.6274137814215055, "grad_norm": 8.076531410217285, "learning_rate": 8.835101649668013e-05, "loss": 1.2792, "step": 9260 }, { "epoch": 0.6274815366894776, "grad_norm": 7.445847034454346, "learning_rate": 8.834964747758232e-05, "loss": 0.8439, "step": 9261 }, { "epoch": 0.6275492919574497, "grad_norm": 8.871906280517578, "learning_rate": 8.83482784584845e-05, "loss": 0.7631, "step": 9262 }, { "epoch": 0.6276170472254218, "grad_norm": 5.552763938903809, "learning_rate": 8.834690943938668e-05, "loss": 0.7293, "step": 9263 }, { "epoch": 0.6276848024933939, "grad_norm": 6.359529495239258, "learning_rate": 8.834554042028886e-05, "loss": 0.8246, "step": 9264 }, { "epoch": 0.627752557761366, "grad_norm": 6.481078147888184, "learning_rate": 8.834417140119106e-05, "loss": 0.8931, "step": 9265 }, { "epoch": 0.627820313029338, "grad_norm": 6.024886608123779, "learning_rate": 8.834280238209324e-05, "loss": 0.6816, "step": 9266 }, { "epoch": 0.6278880682973101, "grad_norm": 6.775660514831543, "learning_rate": 8.834143336299542e-05, "loss": 0.9066, "step": 9267 }, { "epoch": 0.6279558235652822, "grad_norm": 6.696643829345703, "learning_rate": 8.83400643438976e-05, "loss": 0.7173, "step": 9268 }, { "epoch": 0.6280235788332543, "grad_norm": 7.639727592468262, "learning_rate": 8.833869532479978e-05, "loss": 0.772, "step": 9269 }, { "epoch": 0.6280913341012264, "grad_norm": 7.364283084869385, "learning_rate": 8.833732630570197e-05, "loss": 0.8966, "step": 9270 }, { "epoch": 0.6281590893691985, "grad_norm": 6.8663482666015625, "learning_rate": 8.833595728660415e-05, "loss": 0.9459, "step": 9271 }, { "epoch": 0.6282268446371705, "grad_norm": 7.687761306762695, "learning_rate": 8.833458826750633e-05, "loss": 1.0611, "step": 9272 }, { "epoch": 0.6282945999051426, "grad_norm": 6.051063060760498, "learning_rate": 8.833321924840851e-05, "loss": 0.6693, "step": 9273 }, { "epoch": 0.6283623551731147, "grad_norm": 6.312000274658203, "learning_rate": 8.833185022931071e-05, "loss": 0.6403, "step": 9274 }, { "epoch": 0.6284301104410868, "grad_norm": 6.270723819732666, "learning_rate": 8.833048121021289e-05, "loss": 0.9183, "step": 9275 }, { "epoch": 0.6284978657090589, "grad_norm": 6.80443811416626, "learning_rate": 8.832911219111507e-05, "loss": 0.6951, "step": 9276 }, { "epoch": 0.628565620977031, "grad_norm": 6.09138822555542, "learning_rate": 8.832774317201725e-05, "loss": 0.8251, "step": 9277 }, { "epoch": 0.628633376245003, "grad_norm": 5.778343200683594, "learning_rate": 8.832637415291943e-05, "loss": 0.5598, "step": 9278 }, { "epoch": 0.6287011315129751, "grad_norm": 6.845189571380615, "learning_rate": 8.832500513382162e-05, "loss": 0.9961, "step": 9279 }, { "epoch": 0.6287688867809472, "grad_norm": 5.6591081619262695, "learning_rate": 8.83236361147238e-05, "loss": 0.9207, "step": 9280 }, { "epoch": 0.6288366420489193, "grad_norm": 6.356192588806152, "learning_rate": 8.832226709562598e-05, "loss": 0.9117, "step": 9281 }, { "epoch": 0.6289043973168914, "grad_norm": 6.646953582763672, "learning_rate": 8.832089807652816e-05, "loss": 0.8016, "step": 9282 }, { "epoch": 0.6289721525848635, "grad_norm": 7.553640365600586, "learning_rate": 8.831952905743034e-05, "loss": 0.872, "step": 9283 }, { "epoch": 0.6290399078528356, "grad_norm": 6.47942590713501, "learning_rate": 8.831816003833254e-05, "loss": 0.7643, "step": 9284 }, { "epoch": 0.6291076631208077, "grad_norm": 4.924723148345947, "learning_rate": 8.831679101923472e-05, "loss": 0.673, "step": 9285 }, { "epoch": 0.6291754183887798, "grad_norm": 6.718869686126709, "learning_rate": 8.83154220001369e-05, "loss": 0.9365, "step": 9286 }, { "epoch": 0.6292431736567519, "grad_norm": 6.06726598739624, "learning_rate": 8.831405298103908e-05, "loss": 0.7183, "step": 9287 }, { "epoch": 0.6293109289247238, "grad_norm": 6.576848030090332, "learning_rate": 8.831268396194127e-05, "loss": 0.8866, "step": 9288 }, { "epoch": 0.6293786841926959, "grad_norm": 7.97581148147583, "learning_rate": 8.831131494284345e-05, "loss": 0.8479, "step": 9289 }, { "epoch": 0.629446439460668, "grad_norm": 5.632781028747559, "learning_rate": 8.830994592374563e-05, "loss": 0.7944, "step": 9290 }, { "epoch": 0.6295141947286401, "grad_norm": 7.232397556304932, "learning_rate": 8.830857690464783e-05, "loss": 0.9172, "step": 9291 }, { "epoch": 0.6295819499966122, "grad_norm": 7.6181464195251465, "learning_rate": 8.830720788555001e-05, "loss": 0.6232, "step": 9292 }, { "epoch": 0.6296497052645843, "grad_norm": 7.363900661468506, "learning_rate": 8.830583886645219e-05, "loss": 0.9468, "step": 9293 }, { "epoch": 0.6297174605325564, "grad_norm": 6.03270149230957, "learning_rate": 8.830446984735438e-05, "loss": 0.9073, "step": 9294 }, { "epoch": 0.6297852158005285, "grad_norm": 6.522334098815918, "learning_rate": 8.830310082825656e-05, "loss": 0.7139, "step": 9295 }, { "epoch": 0.6298529710685006, "grad_norm": 7.849470615386963, "learning_rate": 8.830173180915874e-05, "loss": 1.0359, "step": 9296 }, { "epoch": 0.6299207263364727, "grad_norm": 7.876535892486572, "learning_rate": 8.830036279006094e-05, "loss": 0.8647, "step": 9297 }, { "epoch": 0.6299884816044448, "grad_norm": 5.959194183349609, "learning_rate": 8.829899377096312e-05, "loss": 0.9567, "step": 9298 }, { "epoch": 0.6300562368724169, "grad_norm": 5.821471691131592, "learning_rate": 8.82976247518653e-05, "loss": 0.8456, "step": 9299 }, { "epoch": 0.6301239921403889, "grad_norm": 5.807693004608154, "learning_rate": 8.829625573276748e-05, "loss": 0.8929, "step": 9300 }, { "epoch": 0.630191747408361, "grad_norm": 6.666048049926758, "learning_rate": 8.829488671366966e-05, "loss": 0.891, "step": 9301 }, { "epoch": 0.6302595026763331, "grad_norm": 6.061259746551514, "learning_rate": 8.829351769457185e-05, "loss": 0.7318, "step": 9302 }, { "epoch": 0.6303272579443052, "grad_norm": 6.72253942489624, "learning_rate": 8.829214867547403e-05, "loss": 0.9176, "step": 9303 }, { "epoch": 0.6303950132122772, "grad_norm": 6.540244102478027, "learning_rate": 8.829077965637621e-05, "loss": 0.7941, "step": 9304 }, { "epoch": 0.6304627684802493, "grad_norm": 6.3027262687683105, "learning_rate": 8.82894106372784e-05, "loss": 0.7615, "step": 9305 }, { "epoch": 0.6305305237482214, "grad_norm": 5.5668745040893555, "learning_rate": 8.828804161818057e-05, "loss": 0.7446, "step": 9306 }, { "epoch": 0.6305982790161935, "grad_norm": 6.234310150146484, "learning_rate": 8.828667259908277e-05, "loss": 0.8601, "step": 9307 }, { "epoch": 0.6306660342841656, "grad_norm": 6.879257678985596, "learning_rate": 8.828530357998495e-05, "loss": 0.8312, "step": 9308 }, { "epoch": 0.6307337895521377, "grad_norm": 5.684000015258789, "learning_rate": 8.828393456088713e-05, "loss": 0.8129, "step": 9309 }, { "epoch": 0.6308015448201097, "grad_norm": 6.523510932922363, "learning_rate": 8.828256554178931e-05, "loss": 1.0274, "step": 9310 }, { "epoch": 0.6308693000880818, "grad_norm": 6.060532569885254, "learning_rate": 8.82811965226915e-05, "loss": 0.6513, "step": 9311 }, { "epoch": 0.6309370553560539, "grad_norm": 6.2253828048706055, "learning_rate": 8.827982750359368e-05, "loss": 0.7539, "step": 9312 }, { "epoch": 0.631004810624026, "grad_norm": 5.188565254211426, "learning_rate": 8.827845848449586e-05, "loss": 0.7904, "step": 9313 }, { "epoch": 0.6310725658919981, "grad_norm": 6.081325054168701, "learning_rate": 8.827708946539804e-05, "loss": 0.8399, "step": 9314 }, { "epoch": 0.6311403211599702, "grad_norm": 5.515079975128174, "learning_rate": 8.827572044630022e-05, "loss": 0.7415, "step": 9315 }, { "epoch": 0.6312080764279423, "grad_norm": 6.149160385131836, "learning_rate": 8.827435142720242e-05, "loss": 0.6537, "step": 9316 }, { "epoch": 0.6312758316959144, "grad_norm": 8.075578689575195, "learning_rate": 8.82729824081046e-05, "loss": 0.9744, "step": 9317 }, { "epoch": 0.6313435869638865, "grad_norm": 5.570530414581299, "learning_rate": 8.827161338900678e-05, "loss": 0.6468, "step": 9318 }, { "epoch": 0.6314113422318586, "grad_norm": 7.706857204437256, "learning_rate": 8.827024436990896e-05, "loss": 1.0308, "step": 9319 }, { "epoch": 0.6314790974998307, "grad_norm": 8.566661834716797, "learning_rate": 8.826887535081115e-05, "loss": 0.6278, "step": 9320 }, { "epoch": 0.6315468527678026, "grad_norm": 6.264912128448486, "learning_rate": 8.826750633171333e-05, "loss": 0.9, "step": 9321 }, { "epoch": 0.6316146080357747, "grad_norm": 6.289927005767822, "learning_rate": 8.826613731261551e-05, "loss": 0.788, "step": 9322 }, { "epoch": 0.6316823633037468, "grad_norm": 6.285764217376709, "learning_rate": 8.82647682935177e-05, "loss": 0.7928, "step": 9323 }, { "epoch": 0.6317501185717189, "grad_norm": 6.555515766143799, "learning_rate": 8.826339927441987e-05, "loss": 0.7345, "step": 9324 }, { "epoch": 0.631817873839691, "grad_norm": 7.84982967376709, "learning_rate": 8.826203025532207e-05, "loss": 0.917, "step": 9325 }, { "epoch": 0.6318856291076631, "grad_norm": 5.471888065338135, "learning_rate": 8.826066123622425e-05, "loss": 0.7449, "step": 9326 }, { "epoch": 0.6319533843756352, "grad_norm": 7.806143283843994, "learning_rate": 8.825929221712643e-05, "loss": 0.8559, "step": 9327 }, { "epoch": 0.6320211396436073, "grad_norm": 6.160640716552734, "learning_rate": 8.825792319802861e-05, "loss": 0.7819, "step": 9328 }, { "epoch": 0.6320888949115794, "grad_norm": 6.968262195587158, "learning_rate": 8.825655417893079e-05, "loss": 1.1055, "step": 9329 }, { "epoch": 0.6321566501795515, "grad_norm": 5.455170154571533, "learning_rate": 8.825518515983298e-05, "loss": 0.7852, "step": 9330 }, { "epoch": 0.6322244054475236, "grad_norm": 7.499693870544434, "learning_rate": 8.825381614073516e-05, "loss": 0.8827, "step": 9331 }, { "epoch": 0.6322921607154957, "grad_norm": 6.363000869750977, "learning_rate": 8.825244712163734e-05, "loss": 0.9469, "step": 9332 }, { "epoch": 0.6323599159834677, "grad_norm": 6.631052017211914, "learning_rate": 8.825107810253952e-05, "loss": 0.8891, "step": 9333 }, { "epoch": 0.6324276712514398, "grad_norm": 5.2757697105407715, "learning_rate": 8.824970908344172e-05, "loss": 0.8287, "step": 9334 }, { "epoch": 0.6324954265194119, "grad_norm": 8.268365859985352, "learning_rate": 8.82483400643439e-05, "loss": 0.9812, "step": 9335 }, { "epoch": 0.632563181787384, "grad_norm": 6.276435852050781, "learning_rate": 8.824697104524608e-05, "loss": 0.9827, "step": 9336 }, { "epoch": 0.632630937055356, "grad_norm": 5.93610954284668, "learning_rate": 8.824560202614827e-05, "loss": 0.8656, "step": 9337 }, { "epoch": 0.6326986923233281, "grad_norm": 6.036667346954346, "learning_rate": 8.824423300705045e-05, "loss": 0.9812, "step": 9338 }, { "epoch": 0.6327664475913002, "grad_norm": 5.460302829742432, "learning_rate": 8.824286398795263e-05, "loss": 0.7286, "step": 9339 }, { "epoch": 0.6328342028592723, "grad_norm": 6.362276554107666, "learning_rate": 8.824149496885483e-05, "loss": 0.8244, "step": 9340 }, { "epoch": 0.6329019581272444, "grad_norm": 7.416135311126709, "learning_rate": 8.824012594975701e-05, "loss": 0.7997, "step": 9341 }, { "epoch": 0.6329697133952165, "grad_norm": 7.711816787719727, "learning_rate": 8.823875693065919e-05, "loss": 1.0054, "step": 9342 }, { "epoch": 0.6330374686631886, "grad_norm": 5.800533771514893, "learning_rate": 8.823738791156138e-05, "loss": 1.1502, "step": 9343 }, { "epoch": 0.6331052239311606, "grad_norm": 5.00458288192749, "learning_rate": 8.823601889246356e-05, "loss": 0.6607, "step": 9344 }, { "epoch": 0.6331729791991327, "grad_norm": 6.7192277908325195, "learning_rate": 8.823464987336574e-05, "loss": 1.0327, "step": 9345 }, { "epoch": 0.6332407344671048, "grad_norm": 6.815017223358154, "learning_rate": 8.823328085426792e-05, "loss": 0.7579, "step": 9346 }, { "epoch": 0.6333084897350769, "grad_norm": 7.114835262298584, "learning_rate": 8.82319118351701e-05, "loss": 1.0215, "step": 9347 }, { "epoch": 0.633376245003049, "grad_norm": 4.819394111633301, "learning_rate": 8.82305428160723e-05, "loss": 0.6611, "step": 9348 }, { "epoch": 0.6334440002710211, "grad_norm": 7.824159145355225, "learning_rate": 8.822917379697448e-05, "loss": 1.0557, "step": 9349 }, { "epoch": 0.6335117555389932, "grad_norm": 6.462610721588135, "learning_rate": 8.822780477787666e-05, "loss": 1.1399, "step": 9350 }, { "epoch": 0.6335795108069653, "grad_norm": 5.1685709953308105, "learning_rate": 8.822643575877884e-05, "loss": 0.7156, "step": 9351 }, { "epoch": 0.6336472660749374, "grad_norm": 6.346011638641357, "learning_rate": 8.822506673968103e-05, "loss": 0.8654, "step": 9352 }, { "epoch": 0.6337150213429094, "grad_norm": 5.18143367767334, "learning_rate": 8.822369772058321e-05, "loss": 0.6909, "step": 9353 }, { "epoch": 0.6337827766108814, "grad_norm": 5.92888069152832, "learning_rate": 8.822232870148539e-05, "loss": 0.7467, "step": 9354 }, { "epoch": 0.6338505318788535, "grad_norm": 7.556412696838379, "learning_rate": 8.822095968238757e-05, "loss": 0.9099, "step": 9355 }, { "epoch": 0.6339182871468256, "grad_norm": 5.749037742614746, "learning_rate": 8.821959066328975e-05, "loss": 0.7791, "step": 9356 }, { "epoch": 0.6339860424147977, "grad_norm": 6.511964321136475, "learning_rate": 8.821822164419195e-05, "loss": 0.879, "step": 9357 }, { "epoch": 0.6340537976827698, "grad_norm": 7.070270538330078, "learning_rate": 8.821685262509413e-05, "loss": 0.8151, "step": 9358 }, { "epoch": 0.6341215529507419, "grad_norm": 7.1327409744262695, "learning_rate": 8.821548360599631e-05, "loss": 0.9491, "step": 9359 }, { "epoch": 0.634189308218714, "grad_norm": 5.8238911628723145, "learning_rate": 8.821411458689849e-05, "loss": 0.6894, "step": 9360 }, { "epoch": 0.6342570634866861, "grad_norm": 5.967693328857422, "learning_rate": 8.821274556780067e-05, "loss": 0.7576, "step": 9361 }, { "epoch": 0.6343248187546582, "grad_norm": 6.3497395515441895, "learning_rate": 8.821137654870286e-05, "loss": 0.9133, "step": 9362 }, { "epoch": 0.6343925740226303, "grad_norm": 5.200571060180664, "learning_rate": 8.821000752960504e-05, "loss": 0.6492, "step": 9363 }, { "epoch": 0.6344603292906024, "grad_norm": 6.661485195159912, "learning_rate": 8.820863851050722e-05, "loss": 0.7363, "step": 9364 }, { "epoch": 0.6345280845585745, "grad_norm": 5.2447733879089355, "learning_rate": 8.82072694914094e-05, "loss": 0.7672, "step": 9365 }, { "epoch": 0.6345958398265465, "grad_norm": 6.811657905578613, "learning_rate": 8.82059004723116e-05, "loss": 0.9903, "step": 9366 }, { "epoch": 0.6346635950945186, "grad_norm": 5.834871768951416, "learning_rate": 8.820453145321378e-05, "loss": 0.697, "step": 9367 }, { "epoch": 0.6347313503624907, "grad_norm": 6.931889057159424, "learning_rate": 8.820316243411596e-05, "loss": 0.8039, "step": 9368 }, { "epoch": 0.6347991056304628, "grad_norm": 5.947389602661133, "learning_rate": 8.820179341501814e-05, "loss": 0.7339, "step": 9369 }, { "epoch": 0.6348668608984348, "grad_norm": 6.914769649505615, "learning_rate": 8.820042439592032e-05, "loss": 0.7366, "step": 9370 }, { "epoch": 0.6349346161664069, "grad_norm": 7.74104118347168, "learning_rate": 8.819905537682251e-05, "loss": 1.1219, "step": 9371 }, { "epoch": 0.635002371434379, "grad_norm": 6.544697284698486, "learning_rate": 8.819768635772469e-05, "loss": 0.7795, "step": 9372 }, { "epoch": 0.6350701267023511, "grad_norm": 6.4533772468566895, "learning_rate": 8.819631733862687e-05, "loss": 0.8343, "step": 9373 }, { "epoch": 0.6351378819703232, "grad_norm": 6.726015090942383, "learning_rate": 8.819494831952905e-05, "loss": 0.6306, "step": 9374 }, { "epoch": 0.6352056372382953, "grad_norm": 6.396018981933594, "learning_rate": 8.819357930043125e-05, "loss": 0.7632, "step": 9375 }, { "epoch": 0.6352733925062674, "grad_norm": 7.739027500152588, "learning_rate": 8.819221028133343e-05, "loss": 1.2215, "step": 9376 }, { "epoch": 0.6353411477742394, "grad_norm": 6.827197551727295, "learning_rate": 8.819084126223561e-05, "loss": 1.0954, "step": 9377 }, { "epoch": 0.6354089030422115, "grad_norm": 6.967258930206299, "learning_rate": 8.818947224313779e-05, "loss": 1.1225, "step": 9378 }, { "epoch": 0.6354766583101836, "grad_norm": 7.872466564178467, "learning_rate": 8.818810322403997e-05, "loss": 0.8114, "step": 9379 }, { "epoch": 0.6355444135781557, "grad_norm": 6.985890865325928, "learning_rate": 8.818673420494216e-05, "loss": 0.8177, "step": 9380 }, { "epoch": 0.6356121688461278, "grad_norm": 7.022130966186523, "learning_rate": 8.818536518584434e-05, "loss": 0.7718, "step": 9381 }, { "epoch": 0.6356799241140999, "grad_norm": 8.023907661437988, "learning_rate": 8.818399616674652e-05, "loss": 0.9181, "step": 9382 }, { "epoch": 0.635747679382072, "grad_norm": 7.373246669769287, "learning_rate": 8.818262714764872e-05, "loss": 0.8733, "step": 9383 }, { "epoch": 0.6358154346500441, "grad_norm": 5.75626802444458, "learning_rate": 8.81812581285509e-05, "loss": 0.6196, "step": 9384 }, { "epoch": 0.6358831899180162, "grad_norm": 7.449457168579102, "learning_rate": 8.817988910945308e-05, "loss": 1.1738, "step": 9385 }, { "epoch": 0.6359509451859882, "grad_norm": 6.01223087310791, "learning_rate": 8.817852009035527e-05, "loss": 0.6577, "step": 9386 }, { "epoch": 0.6360187004539603, "grad_norm": 5.752939701080322, "learning_rate": 8.817715107125745e-05, "loss": 0.8448, "step": 9387 }, { "epoch": 0.6360864557219323, "grad_norm": 6.657622337341309, "learning_rate": 8.817578205215963e-05, "loss": 0.8274, "step": 9388 }, { "epoch": 0.6361542109899044, "grad_norm": 8.236910820007324, "learning_rate": 8.817441303306183e-05, "loss": 1.0927, "step": 9389 }, { "epoch": 0.6362219662578765, "grad_norm": 6.963682651519775, "learning_rate": 8.817304401396401e-05, "loss": 1.0313, "step": 9390 }, { "epoch": 0.6362897215258486, "grad_norm": 7.404387950897217, "learning_rate": 8.817167499486619e-05, "loss": 1.0147, "step": 9391 }, { "epoch": 0.6363574767938207, "grad_norm": 6.6976470947265625, "learning_rate": 8.817030597576837e-05, "loss": 0.8769, "step": 9392 }, { "epoch": 0.6364252320617928, "grad_norm": 5.77000617980957, "learning_rate": 8.816893695667055e-05, "loss": 0.8143, "step": 9393 }, { "epoch": 0.6364929873297649, "grad_norm": 5.952037334442139, "learning_rate": 8.816756793757274e-05, "loss": 0.8243, "step": 9394 }, { "epoch": 0.636560742597737, "grad_norm": 7.398003578186035, "learning_rate": 8.816619891847492e-05, "loss": 1.0528, "step": 9395 }, { "epoch": 0.6366284978657091, "grad_norm": 7.800195693969727, "learning_rate": 8.81648298993771e-05, "loss": 0.8146, "step": 9396 }, { "epoch": 0.6366962531336812, "grad_norm": 5.836297035217285, "learning_rate": 8.816346088027928e-05, "loss": 0.6748, "step": 9397 }, { "epoch": 0.6367640084016533, "grad_norm": 8.607617378234863, "learning_rate": 8.816209186118148e-05, "loss": 1.1096, "step": 9398 }, { "epoch": 0.6368317636696254, "grad_norm": 5.9134392738342285, "learning_rate": 8.816072284208366e-05, "loss": 0.8241, "step": 9399 }, { "epoch": 0.6368995189375974, "grad_norm": 6.997758865356445, "learning_rate": 8.815935382298584e-05, "loss": 0.842, "step": 9400 }, { "epoch": 0.6369672742055695, "grad_norm": 6.37071418762207, "learning_rate": 8.815798480388802e-05, "loss": 0.8297, "step": 9401 }, { "epoch": 0.6370350294735415, "grad_norm": 5.971574306488037, "learning_rate": 8.81566157847902e-05, "loss": 0.7567, "step": 9402 }, { "epoch": 0.6371027847415136, "grad_norm": 7.232084274291992, "learning_rate": 8.815524676569239e-05, "loss": 0.9095, "step": 9403 }, { "epoch": 0.6371705400094857, "grad_norm": 4.349423885345459, "learning_rate": 8.815387774659457e-05, "loss": 0.568, "step": 9404 }, { "epoch": 0.6372382952774578, "grad_norm": 5.8268208503723145, "learning_rate": 8.815250872749675e-05, "loss": 0.9203, "step": 9405 }, { "epoch": 0.6373060505454299, "grad_norm": 7.490527153015137, "learning_rate": 8.815113970839893e-05, "loss": 1.1386, "step": 9406 }, { "epoch": 0.637373805813402, "grad_norm": 7.359919548034668, "learning_rate": 8.814977068930113e-05, "loss": 1.1407, "step": 9407 }, { "epoch": 0.6374415610813741, "grad_norm": 8.83666706085205, "learning_rate": 8.814840167020331e-05, "loss": 0.7869, "step": 9408 }, { "epoch": 0.6375093163493462, "grad_norm": 8.88762378692627, "learning_rate": 8.814703265110549e-05, "loss": 1.1761, "step": 9409 }, { "epoch": 0.6375770716173182, "grad_norm": 5.899496078491211, "learning_rate": 8.814566363200767e-05, "loss": 0.8142, "step": 9410 }, { "epoch": 0.6376448268852903, "grad_norm": 5.661779880523682, "learning_rate": 8.814429461290985e-05, "loss": 0.7744, "step": 9411 }, { "epoch": 0.6377125821532624, "grad_norm": 6.715488910675049, "learning_rate": 8.814292559381204e-05, "loss": 0.7039, "step": 9412 }, { "epoch": 0.6377803374212345, "grad_norm": 6.482609272003174, "learning_rate": 8.814155657471422e-05, "loss": 0.6712, "step": 9413 }, { "epoch": 0.6378480926892066, "grad_norm": 6.715174198150635, "learning_rate": 8.81401875556164e-05, "loss": 0.899, "step": 9414 }, { "epoch": 0.6379158479571787, "grad_norm": 5.657138824462891, "learning_rate": 8.813881853651858e-05, "loss": 0.7513, "step": 9415 }, { "epoch": 0.6379836032251508, "grad_norm": 5.905570983886719, "learning_rate": 8.813744951742076e-05, "loss": 0.8074, "step": 9416 }, { "epoch": 0.6380513584931229, "grad_norm": 7.106529235839844, "learning_rate": 8.813608049832296e-05, "loss": 0.8908, "step": 9417 }, { "epoch": 0.638119113761095, "grad_norm": 6.900413990020752, "learning_rate": 8.813471147922514e-05, "loss": 0.8776, "step": 9418 }, { "epoch": 0.638186869029067, "grad_norm": 6.651158809661865, "learning_rate": 8.813334246012732e-05, "loss": 0.8336, "step": 9419 }, { "epoch": 0.638254624297039, "grad_norm": 8.57502269744873, "learning_rate": 8.81319734410295e-05, "loss": 1.0396, "step": 9420 }, { "epoch": 0.6383223795650111, "grad_norm": 4.871134281158447, "learning_rate": 8.813060442193169e-05, "loss": 0.7517, "step": 9421 }, { "epoch": 0.6383901348329832, "grad_norm": 10.170848846435547, "learning_rate": 8.812923540283387e-05, "loss": 0.9677, "step": 9422 }, { "epoch": 0.6384578901009553, "grad_norm": 6.10742712020874, "learning_rate": 8.812786638373605e-05, "loss": 0.9884, "step": 9423 }, { "epoch": 0.6385256453689274, "grad_norm": 5.653887748718262, "learning_rate": 8.812649736463823e-05, "loss": 0.7325, "step": 9424 }, { "epoch": 0.6385934006368995, "grad_norm": 6.501628398895264, "learning_rate": 8.812512834554041e-05, "loss": 0.8378, "step": 9425 }, { "epoch": 0.6386611559048716, "grad_norm": 5.696453094482422, "learning_rate": 8.812375932644261e-05, "loss": 0.787, "step": 9426 }, { "epoch": 0.6387289111728437, "grad_norm": 6.385643482208252, "learning_rate": 8.812239030734479e-05, "loss": 0.7208, "step": 9427 }, { "epoch": 0.6387966664408158, "grad_norm": 8.655128479003906, "learning_rate": 8.812102128824697e-05, "loss": 0.5881, "step": 9428 }, { "epoch": 0.6388644217087879, "grad_norm": 5.63440465927124, "learning_rate": 8.811965226914916e-05, "loss": 0.8304, "step": 9429 }, { "epoch": 0.63893217697676, "grad_norm": 7.352899074554443, "learning_rate": 8.811828325005134e-05, "loss": 0.9048, "step": 9430 }, { "epoch": 0.6389999322447321, "grad_norm": 6.372758865356445, "learning_rate": 8.811691423095352e-05, "loss": 0.8811, "step": 9431 }, { "epoch": 0.6390676875127042, "grad_norm": 8.284529685974121, "learning_rate": 8.811554521185572e-05, "loss": 1.0255, "step": 9432 }, { "epoch": 0.6391354427806762, "grad_norm": 4.690229892730713, "learning_rate": 8.81141761927579e-05, "loss": 0.6671, "step": 9433 }, { "epoch": 0.6392031980486483, "grad_norm": 5.906257629394531, "learning_rate": 8.811280717366008e-05, "loss": 1.0164, "step": 9434 }, { "epoch": 0.6392709533166203, "grad_norm": 5.012555122375488, "learning_rate": 8.811143815456227e-05, "loss": 0.5985, "step": 9435 }, { "epoch": 0.6393387085845924, "grad_norm": 9.228821754455566, "learning_rate": 8.811006913546445e-05, "loss": 0.9541, "step": 9436 }, { "epoch": 0.6394064638525645, "grad_norm": 6.516229152679443, "learning_rate": 8.810870011636663e-05, "loss": 1.0687, "step": 9437 }, { "epoch": 0.6394742191205366, "grad_norm": 6.953182220458984, "learning_rate": 8.810733109726881e-05, "loss": 0.6515, "step": 9438 }, { "epoch": 0.6395419743885087, "grad_norm": 5.848710536956787, "learning_rate": 8.810596207817099e-05, "loss": 0.9173, "step": 9439 }, { "epoch": 0.6396097296564808, "grad_norm": 6.093873023986816, "learning_rate": 8.810459305907319e-05, "loss": 0.7612, "step": 9440 }, { "epoch": 0.6396774849244529, "grad_norm": 6.799055099487305, "learning_rate": 8.810322403997537e-05, "loss": 0.9219, "step": 9441 }, { "epoch": 0.639745240192425, "grad_norm": 6.2249436378479, "learning_rate": 8.810185502087755e-05, "loss": 0.8459, "step": 9442 }, { "epoch": 0.639812995460397, "grad_norm": 9.087257385253906, "learning_rate": 8.810048600177973e-05, "loss": 0.7746, "step": 9443 }, { "epoch": 0.6398807507283691, "grad_norm": 5.2963433265686035, "learning_rate": 8.809911698268192e-05, "loss": 1.0891, "step": 9444 }, { "epoch": 0.6399485059963412, "grad_norm": 6.330840110778809, "learning_rate": 8.80977479635841e-05, "loss": 0.7877, "step": 9445 }, { "epoch": 0.6400162612643133, "grad_norm": 6.288755416870117, "learning_rate": 8.809637894448628e-05, "loss": 0.8538, "step": 9446 }, { "epoch": 0.6400840165322854, "grad_norm": 6.01615571975708, "learning_rate": 8.809500992538846e-05, "loss": 0.8595, "step": 9447 }, { "epoch": 0.6401517718002575, "grad_norm": 5.402866840362549, "learning_rate": 8.809364090629064e-05, "loss": 0.7067, "step": 9448 }, { "epoch": 0.6402195270682296, "grad_norm": 5.39243221282959, "learning_rate": 8.809227188719284e-05, "loss": 0.7097, "step": 9449 }, { "epoch": 0.6402872823362017, "grad_norm": 5.97304630279541, "learning_rate": 8.809090286809502e-05, "loss": 0.8014, "step": 9450 }, { "epoch": 0.6403550376041737, "grad_norm": 9.370304107666016, "learning_rate": 8.80895338489972e-05, "loss": 0.8021, "step": 9451 }, { "epoch": 0.6404227928721458, "grad_norm": 5.073042869567871, "learning_rate": 8.808816482989938e-05, "loss": 0.7241, "step": 9452 }, { "epoch": 0.6404905481401179, "grad_norm": 6.402749061584473, "learning_rate": 8.808679581080157e-05, "loss": 0.733, "step": 9453 }, { "epoch": 0.64055830340809, "grad_norm": 5.919348239898682, "learning_rate": 8.808542679170375e-05, "loss": 1.0033, "step": 9454 }, { "epoch": 0.640626058676062, "grad_norm": 5.996345520019531, "learning_rate": 8.808405777260593e-05, "loss": 0.5867, "step": 9455 }, { "epoch": 0.6406938139440341, "grad_norm": 6.826650619506836, "learning_rate": 8.808268875350811e-05, "loss": 1.1324, "step": 9456 }, { "epoch": 0.6407615692120062, "grad_norm": 5.9293999671936035, "learning_rate": 8.808131973441029e-05, "loss": 0.7688, "step": 9457 }, { "epoch": 0.6408293244799783, "grad_norm": 7.4283833503723145, "learning_rate": 8.807995071531249e-05, "loss": 1.0136, "step": 9458 }, { "epoch": 0.6408970797479504, "grad_norm": 6.848118782043457, "learning_rate": 8.807858169621467e-05, "loss": 0.8612, "step": 9459 }, { "epoch": 0.6409648350159225, "grad_norm": 5.884200096130371, "learning_rate": 8.807721267711685e-05, "loss": 0.7734, "step": 9460 }, { "epoch": 0.6410325902838946, "grad_norm": 6.456984996795654, "learning_rate": 8.807584365801903e-05, "loss": 0.899, "step": 9461 }, { "epoch": 0.6411003455518667, "grad_norm": 6.896454334259033, "learning_rate": 8.807447463892121e-05, "loss": 0.9756, "step": 9462 }, { "epoch": 0.6411681008198388, "grad_norm": 5.393690586090088, "learning_rate": 8.80731056198234e-05, "loss": 0.5639, "step": 9463 }, { "epoch": 0.6412358560878109, "grad_norm": 5.285966873168945, "learning_rate": 8.807173660072558e-05, "loss": 0.6522, "step": 9464 }, { "epoch": 0.641303611355783, "grad_norm": 5.970489501953125, "learning_rate": 8.807036758162776e-05, "loss": 0.6489, "step": 9465 }, { "epoch": 0.641371366623755, "grad_norm": 6.003796100616455, "learning_rate": 8.806899856252994e-05, "loss": 0.8582, "step": 9466 }, { "epoch": 0.641439121891727, "grad_norm": 5.609601974487305, "learning_rate": 8.806762954343214e-05, "loss": 0.6631, "step": 9467 }, { "epoch": 0.6415068771596991, "grad_norm": 6.617072582244873, "learning_rate": 8.806626052433432e-05, "loss": 0.8057, "step": 9468 }, { "epoch": 0.6415746324276712, "grad_norm": 5.329599857330322, "learning_rate": 8.80648915052365e-05, "loss": 0.7941, "step": 9469 }, { "epoch": 0.6416423876956433, "grad_norm": 6.617512226104736, "learning_rate": 8.806352248613868e-05, "loss": 0.7594, "step": 9470 }, { "epoch": 0.6417101429636154, "grad_norm": 5.257902145385742, "learning_rate": 8.806215346704086e-05, "loss": 0.8737, "step": 9471 }, { "epoch": 0.6417778982315875, "grad_norm": 8.781728744506836, "learning_rate": 8.806078444794305e-05, "loss": 0.9662, "step": 9472 }, { "epoch": 0.6418456534995596, "grad_norm": 5.8004889488220215, "learning_rate": 8.805941542884523e-05, "loss": 0.8442, "step": 9473 }, { "epoch": 0.6419134087675317, "grad_norm": 7.527797698974609, "learning_rate": 8.805804640974741e-05, "loss": 0.8849, "step": 9474 }, { "epoch": 0.6419811640355038, "grad_norm": 6.8633952140808105, "learning_rate": 8.80566773906496e-05, "loss": 0.9317, "step": 9475 }, { "epoch": 0.6420489193034759, "grad_norm": 5.778538703918457, "learning_rate": 8.805530837155179e-05, "loss": 0.7787, "step": 9476 }, { "epoch": 0.6421166745714479, "grad_norm": 5.698131084442139, "learning_rate": 8.805393935245397e-05, "loss": 0.7335, "step": 9477 }, { "epoch": 0.64218442983942, "grad_norm": 7.117033958435059, "learning_rate": 8.805257033335615e-05, "loss": 0.9524, "step": 9478 }, { "epoch": 0.6422521851073921, "grad_norm": 7.394477367401123, "learning_rate": 8.805120131425834e-05, "loss": 0.8617, "step": 9479 }, { "epoch": 0.6423199403753642, "grad_norm": 6.299468517303467, "learning_rate": 8.804983229516052e-05, "loss": 0.7959, "step": 9480 }, { "epoch": 0.6423876956433363, "grad_norm": 5.769999027252197, "learning_rate": 8.80484632760627e-05, "loss": 0.8217, "step": 9481 }, { "epoch": 0.6424554509113084, "grad_norm": 6.29069709777832, "learning_rate": 8.80470942569649e-05, "loss": 0.8485, "step": 9482 }, { "epoch": 0.6425232061792805, "grad_norm": 6.773715496063232, "learning_rate": 8.804572523786708e-05, "loss": 0.9625, "step": 9483 }, { "epoch": 0.6425909614472525, "grad_norm": 5.882883071899414, "learning_rate": 8.804435621876926e-05, "loss": 0.977, "step": 9484 }, { "epoch": 0.6426587167152246, "grad_norm": 5.250417709350586, "learning_rate": 8.804298719967145e-05, "loss": 0.844, "step": 9485 }, { "epoch": 0.6427264719831967, "grad_norm": 6.4407267570495605, "learning_rate": 8.804161818057363e-05, "loss": 0.749, "step": 9486 }, { "epoch": 0.6427942272511687, "grad_norm": 5.549936294555664, "learning_rate": 8.804024916147581e-05, "loss": 0.6762, "step": 9487 }, { "epoch": 0.6428619825191408, "grad_norm": 4.843740463256836, "learning_rate": 8.803888014237799e-05, "loss": 0.6318, "step": 9488 }, { "epoch": 0.6429297377871129, "grad_norm": 8.922931671142578, "learning_rate": 8.803751112328017e-05, "loss": 0.943, "step": 9489 }, { "epoch": 0.642997493055085, "grad_norm": 5.983587265014648, "learning_rate": 8.803614210418237e-05, "loss": 0.6995, "step": 9490 }, { "epoch": 0.6430652483230571, "grad_norm": 10.325361251831055, "learning_rate": 8.803477308508455e-05, "loss": 0.6907, "step": 9491 }, { "epoch": 0.6431330035910292, "grad_norm": 7.284884452819824, "learning_rate": 8.803340406598673e-05, "loss": 0.8517, "step": 9492 }, { "epoch": 0.6432007588590013, "grad_norm": 5.540932655334473, "learning_rate": 8.803203504688891e-05, "loss": 0.6415, "step": 9493 }, { "epoch": 0.6432685141269734, "grad_norm": 8.60664176940918, "learning_rate": 8.803066602779109e-05, "loss": 0.8818, "step": 9494 }, { "epoch": 0.6433362693949455, "grad_norm": 5.478827476501465, "learning_rate": 8.802929700869328e-05, "loss": 0.5589, "step": 9495 }, { "epoch": 0.6434040246629176, "grad_norm": 6.415865421295166, "learning_rate": 8.802792798959546e-05, "loss": 0.748, "step": 9496 }, { "epoch": 0.6434717799308897, "grad_norm": 7.106724262237549, "learning_rate": 8.802655897049764e-05, "loss": 1.0591, "step": 9497 }, { "epoch": 0.6435395351988618, "grad_norm": 7.584676742553711, "learning_rate": 8.802518995139982e-05, "loss": 0.924, "step": 9498 }, { "epoch": 0.6436072904668338, "grad_norm": 7.438377380371094, "learning_rate": 8.802382093230202e-05, "loss": 1.0451, "step": 9499 }, { "epoch": 0.6436750457348058, "grad_norm": 6.775513648986816, "learning_rate": 8.80224519132042e-05, "loss": 0.8273, "step": 9500 }, { "epoch": 0.6437428010027779, "grad_norm": 8.242133140563965, "learning_rate": 8.802108289410638e-05, "loss": 1.0045, "step": 9501 }, { "epoch": 0.64381055627075, "grad_norm": 6.122509956359863, "learning_rate": 8.801971387500856e-05, "loss": 0.8026, "step": 9502 }, { "epoch": 0.6438783115387221, "grad_norm": 5.884267330169678, "learning_rate": 8.801834485591074e-05, "loss": 0.7935, "step": 9503 }, { "epoch": 0.6439460668066942, "grad_norm": 5.653980731964111, "learning_rate": 8.801697583681293e-05, "loss": 0.7223, "step": 9504 }, { "epoch": 0.6440138220746663, "grad_norm": 9.43459701538086, "learning_rate": 8.801560681771511e-05, "loss": 0.7211, "step": 9505 }, { "epoch": 0.6440815773426384, "grad_norm": 6.32805061340332, "learning_rate": 8.801423779861729e-05, "loss": 0.5007, "step": 9506 }, { "epoch": 0.6441493326106105, "grad_norm": 5.424881458282471, "learning_rate": 8.801286877951947e-05, "loss": 0.8556, "step": 9507 }, { "epoch": 0.6442170878785826, "grad_norm": 6.115161418914795, "learning_rate": 8.801149976042167e-05, "loss": 1.068, "step": 9508 }, { "epoch": 0.6442848431465547, "grad_norm": 6.05783748626709, "learning_rate": 8.801013074132385e-05, "loss": 0.7372, "step": 9509 }, { "epoch": 0.6443525984145267, "grad_norm": 5.965503692626953, "learning_rate": 8.800876172222603e-05, "loss": 0.7439, "step": 9510 }, { "epoch": 0.6444203536824988, "grad_norm": 6.0100483894348145, "learning_rate": 8.800739270312821e-05, "loss": 0.8335, "step": 9511 }, { "epoch": 0.6444881089504709, "grad_norm": 4.494331359863281, "learning_rate": 8.800602368403039e-05, "loss": 0.7585, "step": 9512 }, { "epoch": 0.644555864218443, "grad_norm": 6.9478631019592285, "learning_rate": 8.800465466493258e-05, "loss": 0.9518, "step": 9513 }, { "epoch": 0.6446236194864151, "grad_norm": 6.849409103393555, "learning_rate": 8.800328564583476e-05, "loss": 0.9584, "step": 9514 }, { "epoch": 0.6446913747543872, "grad_norm": 5.837069034576416, "learning_rate": 8.800191662673694e-05, "loss": 0.7976, "step": 9515 }, { "epoch": 0.6447591300223592, "grad_norm": 8.214330673217773, "learning_rate": 8.800054760763912e-05, "loss": 0.6255, "step": 9516 }, { "epoch": 0.6448268852903313, "grad_norm": 5.174075126647949, "learning_rate": 8.79991785885413e-05, "loss": 0.5772, "step": 9517 }, { "epoch": 0.6448946405583034, "grad_norm": 5.130014896392822, "learning_rate": 8.79978095694435e-05, "loss": 0.8923, "step": 9518 }, { "epoch": 0.6449623958262755, "grad_norm": 6.434813499450684, "learning_rate": 8.799644055034568e-05, "loss": 0.9681, "step": 9519 }, { "epoch": 0.6450301510942476, "grad_norm": 6.135867118835449, "learning_rate": 8.799507153124786e-05, "loss": 0.7337, "step": 9520 }, { "epoch": 0.6450979063622196, "grad_norm": 5.493929386138916, "learning_rate": 8.799370251215004e-05, "loss": 0.6834, "step": 9521 }, { "epoch": 0.6451656616301917, "grad_norm": 7.558812618255615, "learning_rate": 8.799233349305223e-05, "loss": 1.0415, "step": 9522 }, { "epoch": 0.6452334168981638, "grad_norm": 6.610668659210205, "learning_rate": 8.799096447395441e-05, "loss": 0.6631, "step": 9523 }, { "epoch": 0.6453011721661359, "grad_norm": 6.513849258422852, "learning_rate": 8.798959545485659e-05, "loss": 0.7852, "step": 9524 }, { "epoch": 0.645368927434108, "grad_norm": 5.489187717437744, "learning_rate": 8.798822643575879e-05, "loss": 0.8824, "step": 9525 }, { "epoch": 0.6454366827020801, "grad_norm": 6.717770576477051, "learning_rate": 8.798685741666097e-05, "loss": 0.8239, "step": 9526 }, { "epoch": 0.6455044379700522, "grad_norm": 5.649533271789551, "learning_rate": 8.798548839756315e-05, "loss": 0.7458, "step": 9527 }, { "epoch": 0.6455721932380243, "grad_norm": 4.854784965515137, "learning_rate": 8.798411937846534e-05, "loss": 0.6289, "step": 9528 }, { "epoch": 0.6456399485059964, "grad_norm": 7.124615669250488, "learning_rate": 8.798275035936752e-05, "loss": 0.8035, "step": 9529 }, { "epoch": 0.6457077037739685, "grad_norm": 7.39643669128418, "learning_rate": 8.79813813402697e-05, "loss": 0.776, "step": 9530 }, { "epoch": 0.6457754590419406, "grad_norm": 5.295919418334961, "learning_rate": 8.79800123211719e-05, "loss": 0.8542, "step": 9531 }, { "epoch": 0.6458432143099126, "grad_norm": 6.795876502990723, "learning_rate": 8.797864330207408e-05, "loss": 0.8777, "step": 9532 }, { "epoch": 0.6459109695778846, "grad_norm": 6.2083001136779785, "learning_rate": 8.797727428297626e-05, "loss": 1.0176, "step": 9533 }, { "epoch": 0.6459787248458567, "grad_norm": 7.269214153289795, "learning_rate": 8.797590526387844e-05, "loss": 1.0724, "step": 9534 }, { "epoch": 0.6460464801138288, "grad_norm": 5.762153625488281, "learning_rate": 8.797453624478062e-05, "loss": 0.7247, "step": 9535 }, { "epoch": 0.6461142353818009, "grad_norm": 5.321116924285889, "learning_rate": 8.797316722568281e-05, "loss": 0.6601, "step": 9536 }, { "epoch": 0.646181990649773, "grad_norm": 5.3908586502075195, "learning_rate": 8.797179820658499e-05, "loss": 0.5765, "step": 9537 }, { "epoch": 0.6462497459177451, "grad_norm": 6.827112197875977, "learning_rate": 8.797042918748717e-05, "loss": 0.8915, "step": 9538 }, { "epoch": 0.6463175011857172, "grad_norm": 6.380746841430664, "learning_rate": 8.796906016838935e-05, "loss": 0.8016, "step": 9539 }, { "epoch": 0.6463852564536893, "grad_norm": 6.202441215515137, "learning_rate": 8.796769114929155e-05, "loss": 0.8524, "step": 9540 }, { "epoch": 0.6464530117216614, "grad_norm": 7.0272955894470215, "learning_rate": 8.796632213019373e-05, "loss": 0.7491, "step": 9541 }, { "epoch": 0.6465207669896335, "grad_norm": 5.641077995300293, "learning_rate": 8.79649531110959e-05, "loss": 0.6814, "step": 9542 }, { "epoch": 0.6465885222576055, "grad_norm": 5.427177429199219, "learning_rate": 8.796358409199809e-05, "loss": 0.6764, "step": 9543 }, { "epoch": 0.6466562775255776, "grad_norm": 5.034342288970947, "learning_rate": 8.796221507290027e-05, "loss": 0.7099, "step": 9544 }, { "epoch": 0.6467240327935497, "grad_norm": 7.89331579208374, "learning_rate": 8.796084605380246e-05, "loss": 0.7996, "step": 9545 }, { "epoch": 0.6467917880615218, "grad_norm": 7.4129767417907715, "learning_rate": 8.795947703470464e-05, "loss": 0.6799, "step": 9546 }, { "epoch": 0.6468595433294939, "grad_norm": 5.55999755859375, "learning_rate": 8.795810801560682e-05, "loss": 0.858, "step": 9547 }, { "epoch": 0.646927298597466, "grad_norm": 5.925971031188965, "learning_rate": 8.7956738996509e-05, "loss": 0.7046, "step": 9548 }, { "epoch": 0.646995053865438, "grad_norm": 5.659457206726074, "learning_rate": 8.795536997741118e-05, "loss": 0.8139, "step": 9549 }, { "epoch": 0.6470628091334101, "grad_norm": 6.554767608642578, "learning_rate": 8.795400095831338e-05, "loss": 1.0621, "step": 9550 }, { "epoch": 0.6471305644013822, "grad_norm": 5.473086357116699, "learning_rate": 8.795263193921556e-05, "loss": 0.8589, "step": 9551 }, { "epoch": 0.6471983196693543, "grad_norm": 5.166335582733154, "learning_rate": 8.795126292011774e-05, "loss": 0.7418, "step": 9552 }, { "epoch": 0.6472660749373264, "grad_norm": 10.33619213104248, "learning_rate": 8.794989390101992e-05, "loss": 0.7379, "step": 9553 }, { "epoch": 0.6473338302052984, "grad_norm": 8.167377471923828, "learning_rate": 8.794852488192211e-05, "loss": 1.0573, "step": 9554 }, { "epoch": 0.6474015854732705, "grad_norm": 9.861980438232422, "learning_rate": 8.794715586282429e-05, "loss": 0.9429, "step": 9555 }, { "epoch": 0.6474693407412426, "grad_norm": 6.154029369354248, "learning_rate": 8.794578684372647e-05, "loss": 0.9017, "step": 9556 }, { "epoch": 0.6475370960092147, "grad_norm": 5.818716526031494, "learning_rate": 8.794441782462865e-05, "loss": 0.8584, "step": 9557 }, { "epoch": 0.6476048512771868, "grad_norm": 7.98982048034668, "learning_rate": 8.794304880553083e-05, "loss": 0.8423, "step": 9558 }, { "epoch": 0.6476726065451589, "grad_norm": 6.663909435272217, "learning_rate": 8.794167978643303e-05, "loss": 1.0512, "step": 9559 }, { "epoch": 0.647740361813131, "grad_norm": 4.941400051116943, "learning_rate": 8.794031076733521e-05, "loss": 0.6979, "step": 9560 }, { "epoch": 0.6478081170811031, "grad_norm": 6.668032169342041, "learning_rate": 8.793894174823739e-05, "loss": 0.9286, "step": 9561 }, { "epoch": 0.6478758723490752, "grad_norm": 6.0007100105285645, "learning_rate": 8.793757272913957e-05, "loss": 1.0924, "step": 9562 }, { "epoch": 0.6479436276170473, "grad_norm": 5.80806303024292, "learning_rate": 8.793620371004176e-05, "loss": 0.8201, "step": 9563 }, { "epoch": 0.6480113828850194, "grad_norm": 5.381270408630371, "learning_rate": 8.793483469094394e-05, "loss": 0.813, "step": 9564 }, { "epoch": 0.6480791381529913, "grad_norm": 6.4966721534729, "learning_rate": 8.793346567184612e-05, "loss": 1.0333, "step": 9565 }, { "epoch": 0.6481468934209634, "grad_norm": 6.251739978790283, "learning_rate": 8.79320966527483e-05, "loss": 0.8608, "step": 9566 }, { "epoch": 0.6482146486889355, "grad_norm": 6.237257957458496, "learning_rate": 8.793072763365048e-05, "loss": 0.713, "step": 9567 }, { "epoch": 0.6482824039569076, "grad_norm": 5.686729907989502, "learning_rate": 8.792935861455268e-05, "loss": 0.7128, "step": 9568 }, { "epoch": 0.6483501592248797, "grad_norm": 7.520707130432129, "learning_rate": 8.792798959545486e-05, "loss": 0.8688, "step": 9569 }, { "epoch": 0.6484179144928518, "grad_norm": 4.83900785446167, "learning_rate": 8.792662057635704e-05, "loss": 0.5782, "step": 9570 }, { "epoch": 0.6484856697608239, "grad_norm": 8.14783763885498, "learning_rate": 8.792525155725923e-05, "loss": 0.5705, "step": 9571 }, { "epoch": 0.648553425028796, "grad_norm": 5.105820655822754, "learning_rate": 8.792388253816141e-05, "loss": 0.7871, "step": 9572 }, { "epoch": 0.6486211802967681, "grad_norm": 5.665055274963379, "learning_rate": 8.792251351906359e-05, "loss": 0.8448, "step": 9573 }, { "epoch": 0.6486889355647402, "grad_norm": 7.109708309173584, "learning_rate": 8.792114449996579e-05, "loss": 0.6738, "step": 9574 }, { "epoch": 0.6487566908327123, "grad_norm": 6.253261089324951, "learning_rate": 8.791977548086797e-05, "loss": 0.6266, "step": 9575 }, { "epoch": 0.6488244461006843, "grad_norm": 7.64295768737793, "learning_rate": 8.791840646177015e-05, "loss": 1.0729, "step": 9576 }, { "epoch": 0.6488922013686564, "grad_norm": 4.579455375671387, "learning_rate": 8.791703744267234e-05, "loss": 0.7472, "step": 9577 }, { "epoch": 0.6489599566366285, "grad_norm": 6.2152791023254395, "learning_rate": 8.791566842357452e-05, "loss": 0.8086, "step": 9578 }, { "epoch": 0.6490277119046006, "grad_norm": 6.039999008178711, "learning_rate": 8.79142994044767e-05, "loss": 0.6148, "step": 9579 }, { "epoch": 0.6490954671725727, "grad_norm": 6.458342552185059, "learning_rate": 8.791293038537888e-05, "loss": 0.9861, "step": 9580 }, { "epoch": 0.6491632224405448, "grad_norm": 7.025355815887451, "learning_rate": 8.791156136628106e-05, "loss": 0.9073, "step": 9581 }, { "epoch": 0.6492309777085168, "grad_norm": 5.3977484703063965, "learning_rate": 8.791019234718326e-05, "loss": 0.7138, "step": 9582 }, { "epoch": 0.6492987329764889, "grad_norm": 5.696099281311035, "learning_rate": 8.790882332808544e-05, "loss": 0.952, "step": 9583 }, { "epoch": 0.649366488244461, "grad_norm": 5.6307501792907715, "learning_rate": 8.790745430898762e-05, "loss": 0.8517, "step": 9584 }, { "epoch": 0.6494342435124331, "grad_norm": 8.366070747375488, "learning_rate": 8.79060852898898e-05, "loss": 0.9537, "step": 9585 }, { "epoch": 0.6495019987804052, "grad_norm": 5.901167392730713, "learning_rate": 8.790471627079199e-05, "loss": 0.8325, "step": 9586 }, { "epoch": 0.6495697540483772, "grad_norm": 11.905413627624512, "learning_rate": 8.790334725169417e-05, "loss": 0.8988, "step": 9587 }, { "epoch": 0.6496375093163493, "grad_norm": 7.553168773651123, "learning_rate": 8.790197823259635e-05, "loss": 0.8318, "step": 9588 }, { "epoch": 0.6497052645843214, "grad_norm": 6.661622047424316, "learning_rate": 8.790060921349853e-05, "loss": 0.7276, "step": 9589 }, { "epoch": 0.6497730198522935, "grad_norm": 5.579341411590576, "learning_rate": 8.789924019440071e-05, "loss": 0.8388, "step": 9590 }, { "epoch": 0.6498407751202656, "grad_norm": 6.08758020401001, "learning_rate": 8.78978711753029e-05, "loss": 0.9075, "step": 9591 }, { "epoch": 0.6499085303882377, "grad_norm": 7.589080810546875, "learning_rate": 8.789650215620509e-05, "loss": 0.7852, "step": 9592 }, { "epoch": 0.6499762856562098, "grad_norm": 6.479293346405029, "learning_rate": 8.789513313710727e-05, "loss": 0.768, "step": 9593 }, { "epoch": 0.6500440409241819, "grad_norm": 5.796462535858154, "learning_rate": 8.789376411800945e-05, "loss": 0.7431, "step": 9594 }, { "epoch": 0.650111796192154, "grad_norm": 5.195106506347656, "learning_rate": 8.789239509891163e-05, "loss": 0.9473, "step": 9595 }, { "epoch": 0.6501795514601261, "grad_norm": 5.687699317932129, "learning_rate": 8.789102607981382e-05, "loss": 0.8332, "step": 9596 }, { "epoch": 0.6502473067280982, "grad_norm": 5.329159259796143, "learning_rate": 8.7889657060716e-05, "loss": 0.8068, "step": 9597 }, { "epoch": 0.6503150619960701, "grad_norm": 7.383645057678223, "learning_rate": 8.788828804161818e-05, "loss": 0.9984, "step": 9598 }, { "epoch": 0.6503828172640422, "grad_norm": 7.364968776702881, "learning_rate": 8.788691902252036e-05, "loss": 0.7574, "step": 9599 }, { "epoch": 0.6504505725320143, "grad_norm": 6.2040886878967285, "learning_rate": 8.788555000342256e-05, "loss": 0.6552, "step": 9600 }, { "epoch": 0.6505183277999864, "grad_norm": 10.075439453125, "learning_rate": 8.788418098432474e-05, "loss": 0.659, "step": 9601 }, { "epoch": 0.6505860830679585, "grad_norm": 6.742908954620361, "learning_rate": 8.788281196522692e-05, "loss": 0.6485, "step": 9602 }, { "epoch": 0.6506538383359306, "grad_norm": 6.678378582000732, "learning_rate": 8.78814429461291e-05, "loss": 0.7113, "step": 9603 }, { "epoch": 0.6507215936039027, "grad_norm": 5.988241195678711, "learning_rate": 8.788007392703128e-05, "loss": 0.8417, "step": 9604 }, { "epoch": 0.6507893488718748, "grad_norm": 6.3953704833984375, "learning_rate": 8.787870490793347e-05, "loss": 0.8448, "step": 9605 }, { "epoch": 0.6508571041398469, "grad_norm": 11.006570816040039, "learning_rate": 8.787733588883565e-05, "loss": 0.9258, "step": 9606 }, { "epoch": 0.650924859407819, "grad_norm": 7.086367607116699, "learning_rate": 8.787596686973783e-05, "loss": 0.86, "step": 9607 }, { "epoch": 0.6509926146757911, "grad_norm": 5.534137725830078, "learning_rate": 8.787459785064001e-05, "loss": 0.7984, "step": 9608 }, { "epoch": 0.6510603699437632, "grad_norm": 6.710955619812012, "learning_rate": 8.78732288315422e-05, "loss": 0.9852, "step": 9609 }, { "epoch": 0.6511281252117352, "grad_norm": 7.151968479156494, "learning_rate": 8.787185981244439e-05, "loss": 0.7383, "step": 9610 }, { "epoch": 0.6511958804797073, "grad_norm": 6.063338279724121, "learning_rate": 8.787049079334657e-05, "loss": 0.8971, "step": 9611 }, { "epoch": 0.6512636357476794, "grad_norm": 7.058009147644043, "learning_rate": 8.786912177424875e-05, "loss": 0.9314, "step": 9612 }, { "epoch": 0.6513313910156515, "grad_norm": 8.007342338562012, "learning_rate": 8.786775275515093e-05, "loss": 0.6878, "step": 9613 }, { "epoch": 0.6513991462836235, "grad_norm": 6.176419734954834, "learning_rate": 8.786638373605312e-05, "loss": 0.8863, "step": 9614 }, { "epoch": 0.6514669015515956, "grad_norm": 5.785898685455322, "learning_rate": 8.78650147169553e-05, "loss": 1.0512, "step": 9615 }, { "epoch": 0.6515346568195677, "grad_norm": 5.100857734680176, "learning_rate": 8.786364569785748e-05, "loss": 0.6969, "step": 9616 }, { "epoch": 0.6516024120875398, "grad_norm": 6.292816638946533, "learning_rate": 8.786227667875968e-05, "loss": 0.8185, "step": 9617 }, { "epoch": 0.6516701673555119, "grad_norm": 6.242623805999756, "learning_rate": 8.786090765966186e-05, "loss": 0.6833, "step": 9618 }, { "epoch": 0.651737922623484, "grad_norm": 7.2665791511535645, "learning_rate": 8.785953864056404e-05, "loss": 1.0487, "step": 9619 }, { "epoch": 0.651805677891456, "grad_norm": 5.973618507385254, "learning_rate": 8.785816962146623e-05, "loss": 0.8992, "step": 9620 }, { "epoch": 0.6518734331594281, "grad_norm": 8.300751686096191, "learning_rate": 8.785680060236841e-05, "loss": 0.9334, "step": 9621 }, { "epoch": 0.6519411884274002, "grad_norm": 5.687087535858154, "learning_rate": 8.785543158327059e-05, "loss": 0.8131, "step": 9622 }, { "epoch": 0.6520089436953723, "grad_norm": 5.419602870941162, "learning_rate": 8.785406256417279e-05, "loss": 0.6697, "step": 9623 }, { "epoch": 0.6520766989633444, "grad_norm": 7.112249851226807, "learning_rate": 8.785269354507497e-05, "loss": 0.8477, "step": 9624 }, { "epoch": 0.6521444542313165, "grad_norm": 5.755582809448242, "learning_rate": 8.785132452597715e-05, "loss": 0.6766, "step": 9625 }, { "epoch": 0.6522122094992886, "grad_norm": 5.415201187133789, "learning_rate": 8.784995550687933e-05, "loss": 0.6871, "step": 9626 }, { "epoch": 0.6522799647672607, "grad_norm": 5.576985836029053, "learning_rate": 8.78485864877815e-05, "loss": 0.9001, "step": 9627 }, { "epoch": 0.6523477200352328, "grad_norm": 5.954935073852539, "learning_rate": 8.78472174686837e-05, "loss": 0.6694, "step": 9628 }, { "epoch": 0.6524154753032049, "grad_norm": 5.59356164932251, "learning_rate": 8.784584844958588e-05, "loss": 0.74, "step": 9629 }, { "epoch": 0.652483230571177, "grad_norm": 5.1607346534729, "learning_rate": 8.784447943048806e-05, "loss": 0.7869, "step": 9630 }, { "epoch": 0.652550985839149, "grad_norm": 6.959099769592285, "learning_rate": 8.784311041139024e-05, "loss": 0.8096, "step": 9631 }, { "epoch": 0.652618741107121, "grad_norm": 6.869060516357422, "learning_rate": 8.784174139229244e-05, "loss": 0.9917, "step": 9632 }, { "epoch": 0.6526864963750931, "grad_norm": 6.426820278167725, "learning_rate": 8.784037237319462e-05, "loss": 0.718, "step": 9633 }, { "epoch": 0.6527542516430652, "grad_norm": 5.208813667297363, "learning_rate": 8.78390033540968e-05, "loss": 0.651, "step": 9634 }, { "epoch": 0.6528220069110373, "grad_norm": 6.748283863067627, "learning_rate": 8.783763433499898e-05, "loss": 1.0808, "step": 9635 }, { "epoch": 0.6528897621790094, "grad_norm": 6.567190647125244, "learning_rate": 8.783626531590116e-05, "loss": 0.9373, "step": 9636 }, { "epoch": 0.6529575174469815, "grad_norm": 5.621391773223877, "learning_rate": 8.783489629680335e-05, "loss": 0.9382, "step": 9637 }, { "epoch": 0.6530252727149536, "grad_norm": 7.807063102722168, "learning_rate": 8.783352727770553e-05, "loss": 0.8237, "step": 9638 }, { "epoch": 0.6530930279829257, "grad_norm": 6.320357322692871, "learning_rate": 8.783215825860771e-05, "loss": 0.8245, "step": 9639 }, { "epoch": 0.6531607832508978, "grad_norm": 6.225609302520752, "learning_rate": 8.783078923950989e-05, "loss": 0.932, "step": 9640 }, { "epoch": 0.6532285385188699, "grad_norm": 5.9908366203308105, "learning_rate": 8.782942022041209e-05, "loss": 0.5793, "step": 9641 }, { "epoch": 0.653296293786842, "grad_norm": 8.215045928955078, "learning_rate": 8.782805120131427e-05, "loss": 0.7771, "step": 9642 }, { "epoch": 0.653364049054814, "grad_norm": 5.830068588256836, "learning_rate": 8.782668218221645e-05, "loss": 0.8499, "step": 9643 }, { "epoch": 0.6534318043227861, "grad_norm": 5.979434013366699, "learning_rate": 8.782531316311863e-05, "loss": 0.7335, "step": 9644 }, { "epoch": 0.6534995595907582, "grad_norm": 6.369264125823975, "learning_rate": 8.782394414402081e-05, "loss": 0.994, "step": 9645 }, { "epoch": 0.6535673148587303, "grad_norm": 6.8189287185668945, "learning_rate": 8.7822575124923e-05, "loss": 0.7789, "step": 9646 }, { "epoch": 0.6536350701267023, "grad_norm": 5.990640163421631, "learning_rate": 8.782120610582518e-05, "loss": 0.8369, "step": 9647 }, { "epoch": 0.6537028253946744, "grad_norm": 6.319588661193848, "learning_rate": 8.781983708672736e-05, "loss": 0.9394, "step": 9648 }, { "epoch": 0.6537705806626465, "grad_norm": 5.965325355529785, "learning_rate": 8.781846806762954e-05, "loss": 1.0256, "step": 9649 }, { "epoch": 0.6538383359306186, "grad_norm": 6.193709373474121, "learning_rate": 8.781709904853172e-05, "loss": 0.7261, "step": 9650 }, { "epoch": 0.6539060911985907, "grad_norm": 6.463015079498291, "learning_rate": 8.781573002943392e-05, "loss": 0.6503, "step": 9651 }, { "epoch": 0.6539738464665628, "grad_norm": 5.9288554191589355, "learning_rate": 8.78143610103361e-05, "loss": 0.7345, "step": 9652 }, { "epoch": 0.6540416017345348, "grad_norm": 6.894443035125732, "learning_rate": 8.781299199123828e-05, "loss": 0.7115, "step": 9653 }, { "epoch": 0.6541093570025069, "grad_norm": 5.954974174499512, "learning_rate": 8.781162297214046e-05, "loss": 0.8184, "step": 9654 }, { "epoch": 0.654177112270479, "grad_norm": 7.200540065765381, "learning_rate": 8.781025395304265e-05, "loss": 0.7661, "step": 9655 }, { "epoch": 0.6542448675384511, "grad_norm": 5.560801982879639, "learning_rate": 8.780888493394483e-05, "loss": 0.6097, "step": 9656 }, { "epoch": 0.6543126228064232, "grad_norm": 9.168293952941895, "learning_rate": 8.780751591484701e-05, "loss": 1.0032, "step": 9657 }, { "epoch": 0.6543803780743953, "grad_norm": 6.599915027618408, "learning_rate": 8.780614689574919e-05, "loss": 0.9359, "step": 9658 }, { "epoch": 0.6544481333423674, "grad_norm": 6.528720855712891, "learning_rate": 8.780477787665137e-05, "loss": 0.8039, "step": 9659 }, { "epoch": 0.6545158886103395, "grad_norm": 7.300596714019775, "learning_rate": 8.780340885755357e-05, "loss": 0.823, "step": 9660 }, { "epoch": 0.6545836438783116, "grad_norm": 6.100801467895508, "learning_rate": 8.780203983845575e-05, "loss": 0.7662, "step": 9661 }, { "epoch": 0.6546513991462837, "grad_norm": 6.72039270401001, "learning_rate": 8.780067081935793e-05, "loss": 0.8888, "step": 9662 }, { "epoch": 0.6547191544142557, "grad_norm": 5.6597065925598145, "learning_rate": 8.779930180026012e-05, "loss": 0.7939, "step": 9663 }, { "epoch": 0.6547869096822277, "grad_norm": 5.955264568328857, "learning_rate": 8.77979327811623e-05, "loss": 0.8051, "step": 9664 }, { "epoch": 0.6548546649501998, "grad_norm": 6.064132213592529, "learning_rate": 8.779656376206448e-05, "loss": 0.6377, "step": 9665 }, { "epoch": 0.6549224202181719, "grad_norm": 6.713923931121826, "learning_rate": 8.779519474296668e-05, "loss": 0.9388, "step": 9666 }, { "epoch": 0.654990175486144, "grad_norm": 8.160643577575684, "learning_rate": 8.779382572386886e-05, "loss": 1.0358, "step": 9667 }, { "epoch": 0.6550579307541161, "grad_norm": 7.116758346557617, "learning_rate": 8.779245670477104e-05, "loss": 0.9486, "step": 9668 }, { "epoch": 0.6551256860220882, "grad_norm": 9.230103492736816, "learning_rate": 8.779108768567323e-05, "loss": 0.5998, "step": 9669 }, { "epoch": 0.6551934412900603, "grad_norm": 7.513571739196777, "learning_rate": 8.778971866657541e-05, "loss": 1.0098, "step": 9670 }, { "epoch": 0.6552611965580324, "grad_norm": 6.080422878265381, "learning_rate": 8.778834964747759e-05, "loss": 0.7787, "step": 9671 }, { "epoch": 0.6553289518260045, "grad_norm": 6.582731246948242, "learning_rate": 8.778698062837977e-05, "loss": 0.9038, "step": 9672 }, { "epoch": 0.6553967070939766, "grad_norm": 6.8090620040893555, "learning_rate": 8.778561160928196e-05, "loss": 0.7586, "step": 9673 }, { "epoch": 0.6554644623619487, "grad_norm": 8.279650688171387, "learning_rate": 8.778424259018415e-05, "loss": 0.8105, "step": 9674 }, { "epoch": 0.6555322176299208, "grad_norm": 5.152581691741943, "learning_rate": 8.778287357108633e-05, "loss": 0.7084, "step": 9675 }, { "epoch": 0.6555999728978928, "grad_norm": 6.42199182510376, "learning_rate": 8.77815045519885e-05, "loss": 0.7351, "step": 9676 }, { "epoch": 0.6556677281658649, "grad_norm": 6.543702125549316, "learning_rate": 8.778013553289069e-05, "loss": 0.8382, "step": 9677 }, { "epoch": 0.655735483433837, "grad_norm": 4.9357171058654785, "learning_rate": 8.777876651379288e-05, "loss": 0.8202, "step": 9678 }, { "epoch": 0.655803238701809, "grad_norm": 5.525670051574707, "learning_rate": 8.777739749469506e-05, "loss": 0.7993, "step": 9679 }, { "epoch": 0.6558709939697811, "grad_norm": 6.35006046295166, "learning_rate": 8.777602847559724e-05, "loss": 0.925, "step": 9680 }, { "epoch": 0.6559387492377532, "grad_norm": 6.477513790130615, "learning_rate": 8.777465945649942e-05, "loss": 0.8741, "step": 9681 }, { "epoch": 0.6560065045057253, "grad_norm": 6.017436981201172, "learning_rate": 8.77732904374016e-05, "loss": 0.8689, "step": 9682 }, { "epoch": 0.6560742597736974, "grad_norm": 5.3170599937438965, "learning_rate": 8.77719214183038e-05, "loss": 0.6794, "step": 9683 }, { "epoch": 0.6561420150416695, "grad_norm": 7.529482841491699, "learning_rate": 8.777055239920598e-05, "loss": 1.0313, "step": 9684 }, { "epoch": 0.6562097703096416, "grad_norm": 5.189818859100342, "learning_rate": 8.776918338010816e-05, "loss": 0.7463, "step": 9685 }, { "epoch": 0.6562775255776137, "grad_norm": 6.473649024963379, "learning_rate": 8.776781436101034e-05, "loss": 0.9591, "step": 9686 }, { "epoch": 0.6563452808455857, "grad_norm": 7.400620937347412, "learning_rate": 8.776644534191253e-05, "loss": 0.8153, "step": 9687 }, { "epoch": 0.6564130361135578, "grad_norm": 5.564664840698242, "learning_rate": 8.776507632281471e-05, "loss": 0.8807, "step": 9688 }, { "epoch": 0.6564807913815299, "grad_norm": 6.968201160430908, "learning_rate": 8.776370730371689e-05, "loss": 0.8327, "step": 9689 }, { "epoch": 0.656548546649502, "grad_norm": 8.013174057006836, "learning_rate": 8.776233828461907e-05, "loss": 0.8123, "step": 9690 }, { "epoch": 0.6566163019174741, "grad_norm": 6.545875072479248, "learning_rate": 8.776096926552125e-05, "loss": 0.7223, "step": 9691 }, { "epoch": 0.6566840571854462, "grad_norm": 6.459381580352783, "learning_rate": 8.775960024642345e-05, "loss": 1.0484, "step": 9692 }, { "epoch": 0.6567518124534183, "grad_norm": 6.893752098083496, "learning_rate": 8.775823122732563e-05, "loss": 0.841, "step": 9693 }, { "epoch": 0.6568195677213904, "grad_norm": 6.36741828918457, "learning_rate": 8.77568622082278e-05, "loss": 0.7719, "step": 9694 }, { "epoch": 0.6568873229893625, "grad_norm": 6.700324058532715, "learning_rate": 8.775549318912999e-05, "loss": 0.8612, "step": 9695 }, { "epoch": 0.6569550782573345, "grad_norm": 5.423570156097412, "learning_rate": 8.775412417003218e-05, "loss": 0.7947, "step": 9696 }, { "epoch": 0.6570228335253065, "grad_norm": 6.15387487411499, "learning_rate": 8.775275515093436e-05, "loss": 0.7978, "step": 9697 }, { "epoch": 0.6570905887932786, "grad_norm": 6.407439708709717, "learning_rate": 8.775138613183654e-05, "loss": 0.8016, "step": 9698 }, { "epoch": 0.6571583440612507, "grad_norm": 5.311725616455078, "learning_rate": 8.775001711273872e-05, "loss": 0.564, "step": 9699 }, { "epoch": 0.6572260993292228, "grad_norm": 8.463066101074219, "learning_rate": 8.77486480936409e-05, "loss": 0.9351, "step": 9700 }, { "epoch": 0.6572938545971949, "grad_norm": 6.503357410430908, "learning_rate": 8.77472790745431e-05, "loss": 0.8545, "step": 9701 }, { "epoch": 0.657361609865167, "grad_norm": 4.668941020965576, "learning_rate": 8.774591005544528e-05, "loss": 0.5757, "step": 9702 }, { "epoch": 0.6574293651331391, "grad_norm": 7.31153678894043, "learning_rate": 8.774454103634746e-05, "loss": 0.8225, "step": 9703 }, { "epoch": 0.6574971204011112, "grad_norm": 7.220789432525635, "learning_rate": 8.774317201724964e-05, "loss": 1.2222, "step": 9704 }, { "epoch": 0.6575648756690833, "grad_norm": 8.469976425170898, "learning_rate": 8.774180299815182e-05, "loss": 0.8927, "step": 9705 }, { "epoch": 0.6576326309370554, "grad_norm": 5.122034072875977, "learning_rate": 8.774043397905401e-05, "loss": 0.6429, "step": 9706 }, { "epoch": 0.6577003862050275, "grad_norm": 7.948295593261719, "learning_rate": 8.773906495995619e-05, "loss": 0.8037, "step": 9707 }, { "epoch": 0.6577681414729996, "grad_norm": 8.193120956420898, "learning_rate": 8.773769594085837e-05, "loss": 0.8298, "step": 9708 }, { "epoch": 0.6578358967409716, "grad_norm": 5.10132360458374, "learning_rate": 8.773632692176057e-05, "loss": 0.6058, "step": 9709 }, { "epoch": 0.6579036520089437, "grad_norm": 7.845650672912598, "learning_rate": 8.773495790266275e-05, "loss": 0.9913, "step": 9710 }, { "epoch": 0.6579714072769158, "grad_norm": 6.442379951477051, "learning_rate": 8.773358888356493e-05, "loss": 0.7329, "step": 9711 }, { "epoch": 0.6580391625448878, "grad_norm": 6.904378414154053, "learning_rate": 8.773221986446712e-05, "loss": 0.9929, "step": 9712 }, { "epoch": 0.6581069178128599, "grad_norm": 7.309523105621338, "learning_rate": 8.77308508453693e-05, "loss": 0.8333, "step": 9713 }, { "epoch": 0.658174673080832, "grad_norm": 5.351863861083984, "learning_rate": 8.772948182627148e-05, "loss": 0.718, "step": 9714 }, { "epoch": 0.6582424283488041, "grad_norm": 6.481664180755615, "learning_rate": 8.772811280717367e-05, "loss": 0.7649, "step": 9715 }, { "epoch": 0.6583101836167762, "grad_norm": 6.753706932067871, "learning_rate": 8.772674378807586e-05, "loss": 0.6513, "step": 9716 }, { "epoch": 0.6583779388847483, "grad_norm": 6.8181047439575195, "learning_rate": 8.772537476897804e-05, "loss": 0.8805, "step": 9717 }, { "epoch": 0.6584456941527204, "grad_norm": 6.623274803161621, "learning_rate": 8.772400574988022e-05, "loss": 1.0795, "step": 9718 }, { "epoch": 0.6585134494206925, "grad_norm": 5.954155445098877, "learning_rate": 8.772263673078241e-05, "loss": 0.7594, "step": 9719 }, { "epoch": 0.6585812046886645, "grad_norm": 5.537624835968018, "learning_rate": 8.772126771168459e-05, "loss": 0.7049, "step": 9720 }, { "epoch": 0.6586489599566366, "grad_norm": 6.186052322387695, "learning_rate": 8.771989869258677e-05, "loss": 0.8639, "step": 9721 }, { "epoch": 0.6587167152246087, "grad_norm": 6.996501445770264, "learning_rate": 8.771852967348895e-05, "loss": 0.9438, "step": 9722 }, { "epoch": 0.6587844704925808, "grad_norm": 6.926022052764893, "learning_rate": 8.771716065439113e-05, "loss": 0.7466, "step": 9723 }, { "epoch": 0.6588522257605529, "grad_norm": 5.302411079406738, "learning_rate": 8.771579163529332e-05, "loss": 0.7562, "step": 9724 }, { "epoch": 0.658919981028525, "grad_norm": 6.900167465209961, "learning_rate": 8.77144226161955e-05, "loss": 0.8841, "step": 9725 }, { "epoch": 0.6589877362964971, "grad_norm": 5.357388019561768, "learning_rate": 8.771305359709769e-05, "loss": 0.7033, "step": 9726 }, { "epoch": 0.6590554915644692, "grad_norm": 5.690728187561035, "learning_rate": 8.771168457799987e-05, "loss": 0.7668, "step": 9727 }, { "epoch": 0.6591232468324412, "grad_norm": 6.126123428344727, "learning_rate": 8.771031555890206e-05, "loss": 0.7644, "step": 9728 }, { "epoch": 0.6591910021004133, "grad_norm": 6.951844692230225, "learning_rate": 8.770894653980424e-05, "loss": 0.6453, "step": 9729 }, { "epoch": 0.6592587573683854, "grad_norm": 7.219118118286133, "learning_rate": 8.770757752070642e-05, "loss": 0.7606, "step": 9730 }, { "epoch": 0.6593265126363574, "grad_norm": 7.441622734069824, "learning_rate": 8.77062085016086e-05, "loss": 1.0002, "step": 9731 }, { "epoch": 0.6593942679043295, "grad_norm": 7.127715110778809, "learning_rate": 8.770483948251078e-05, "loss": 0.9236, "step": 9732 }, { "epoch": 0.6594620231723016, "grad_norm": 5.041599750518799, "learning_rate": 8.770347046341298e-05, "loss": 0.5938, "step": 9733 }, { "epoch": 0.6595297784402737, "grad_norm": 5.044239044189453, "learning_rate": 8.770210144431516e-05, "loss": 0.7437, "step": 9734 }, { "epoch": 0.6595975337082458, "grad_norm": 6.069377422332764, "learning_rate": 8.770073242521734e-05, "loss": 0.6742, "step": 9735 }, { "epoch": 0.6596652889762179, "grad_norm": 7.166933059692383, "learning_rate": 8.769936340611952e-05, "loss": 0.9085, "step": 9736 }, { "epoch": 0.65973304424419, "grad_norm": 6.837136268615723, "learning_rate": 8.76979943870217e-05, "loss": 0.7626, "step": 9737 }, { "epoch": 0.6598007995121621, "grad_norm": 5.012059211730957, "learning_rate": 8.769662536792389e-05, "loss": 0.6234, "step": 9738 }, { "epoch": 0.6598685547801342, "grad_norm": 5.913621425628662, "learning_rate": 8.769525634882607e-05, "loss": 0.9147, "step": 9739 }, { "epoch": 0.6599363100481063, "grad_norm": 6.618444919586182, "learning_rate": 8.769388732972825e-05, "loss": 1.0521, "step": 9740 }, { "epoch": 0.6600040653160784, "grad_norm": 6.666975975036621, "learning_rate": 8.769251831063043e-05, "loss": 0.8245, "step": 9741 }, { "epoch": 0.6600718205840505, "grad_norm": 6.840112686157227, "learning_rate": 8.769114929153263e-05, "loss": 0.8726, "step": 9742 }, { "epoch": 0.6601395758520225, "grad_norm": 7.093020915985107, "learning_rate": 8.76897802724348e-05, "loss": 0.9649, "step": 9743 }, { "epoch": 0.6602073311199946, "grad_norm": 6.4766845703125, "learning_rate": 8.768841125333699e-05, "loss": 0.7608, "step": 9744 }, { "epoch": 0.6602750863879666, "grad_norm": 6.574507236480713, "learning_rate": 8.768704223423917e-05, "loss": 0.9587, "step": 9745 }, { "epoch": 0.6603428416559387, "grad_norm": 6.037952423095703, "learning_rate": 8.768567321514135e-05, "loss": 0.8102, "step": 9746 }, { "epoch": 0.6604105969239108, "grad_norm": 5.707187175750732, "learning_rate": 8.768430419604354e-05, "loss": 0.7906, "step": 9747 }, { "epoch": 0.6604783521918829, "grad_norm": 5.039308547973633, "learning_rate": 8.768293517694572e-05, "loss": 0.7443, "step": 9748 }, { "epoch": 0.660546107459855, "grad_norm": 6.081298351287842, "learning_rate": 8.76815661578479e-05, "loss": 0.8813, "step": 9749 }, { "epoch": 0.6606138627278271, "grad_norm": 6.228826522827148, "learning_rate": 8.768019713875008e-05, "loss": 0.755, "step": 9750 }, { "epoch": 0.6606816179957992, "grad_norm": 6.391602993011475, "learning_rate": 8.767882811965228e-05, "loss": 1.0464, "step": 9751 }, { "epoch": 0.6607493732637713, "grad_norm": 6.1509318351745605, "learning_rate": 8.767745910055446e-05, "loss": 0.8464, "step": 9752 }, { "epoch": 0.6608171285317433, "grad_norm": 7.630395412445068, "learning_rate": 8.767609008145664e-05, "loss": 0.7216, "step": 9753 }, { "epoch": 0.6608848837997154, "grad_norm": 6.150453090667725, "learning_rate": 8.767472106235882e-05, "loss": 0.747, "step": 9754 }, { "epoch": 0.6609526390676875, "grad_norm": 6.866214752197266, "learning_rate": 8.7673352043261e-05, "loss": 0.7093, "step": 9755 }, { "epoch": 0.6610203943356596, "grad_norm": 5.30288553237915, "learning_rate": 8.767198302416319e-05, "loss": 0.8519, "step": 9756 }, { "epoch": 0.6610881496036317, "grad_norm": 7.960119724273682, "learning_rate": 8.767061400506537e-05, "loss": 1.0947, "step": 9757 }, { "epoch": 0.6611559048716038, "grad_norm": 6.809545993804932, "learning_rate": 8.766924498596755e-05, "loss": 0.7943, "step": 9758 }, { "epoch": 0.6612236601395759, "grad_norm": 7.028867721557617, "learning_rate": 8.766787596686975e-05, "loss": 0.8684, "step": 9759 }, { "epoch": 0.661291415407548, "grad_norm": 6.686776161193848, "learning_rate": 8.766650694777193e-05, "loss": 0.7796, "step": 9760 }, { "epoch": 0.66135917067552, "grad_norm": 5.486933708190918, "learning_rate": 8.76651379286741e-05, "loss": 0.5741, "step": 9761 }, { "epoch": 0.6614269259434921, "grad_norm": 6.500797748565674, "learning_rate": 8.76637689095763e-05, "loss": 1.0022, "step": 9762 }, { "epoch": 0.6614946812114642, "grad_norm": 8.424103736877441, "learning_rate": 8.766239989047848e-05, "loss": 0.9727, "step": 9763 }, { "epoch": 0.6615624364794362, "grad_norm": 5.002140998840332, "learning_rate": 8.766103087138066e-05, "loss": 0.5935, "step": 9764 }, { "epoch": 0.6616301917474083, "grad_norm": 6.721834659576416, "learning_rate": 8.765966185228285e-05, "loss": 0.7682, "step": 9765 }, { "epoch": 0.6616979470153804, "grad_norm": 6.448259353637695, "learning_rate": 8.765829283318503e-05, "loss": 0.6641, "step": 9766 }, { "epoch": 0.6617657022833525, "grad_norm": 5.544651985168457, "learning_rate": 8.765692381408722e-05, "loss": 0.6952, "step": 9767 }, { "epoch": 0.6618334575513246, "grad_norm": 7.139290809631348, "learning_rate": 8.76555547949894e-05, "loss": 0.8574, "step": 9768 }, { "epoch": 0.6619012128192967, "grad_norm": 7.421456813812256, "learning_rate": 8.765418577589158e-05, "loss": 0.9057, "step": 9769 }, { "epoch": 0.6619689680872688, "grad_norm": 6.075616359710693, "learning_rate": 8.765281675679377e-05, "loss": 0.7174, "step": 9770 }, { "epoch": 0.6620367233552409, "grad_norm": 5.6527323722839355, "learning_rate": 8.765144773769595e-05, "loss": 0.6707, "step": 9771 }, { "epoch": 0.662104478623213, "grad_norm": 5.773962020874023, "learning_rate": 8.765007871859813e-05, "loss": 0.714, "step": 9772 }, { "epoch": 0.6621722338911851, "grad_norm": 5.672632694244385, "learning_rate": 8.764870969950031e-05, "loss": 0.6529, "step": 9773 }, { "epoch": 0.6622399891591572, "grad_norm": 7.087960243225098, "learning_rate": 8.76473406804025e-05, "loss": 0.8998, "step": 9774 }, { "epoch": 0.6623077444271293, "grad_norm": 6.828940391540527, "learning_rate": 8.764597166130468e-05, "loss": 0.9055, "step": 9775 }, { "epoch": 0.6623754996951013, "grad_norm": 5.858781814575195, "learning_rate": 8.764460264220687e-05, "loss": 0.7853, "step": 9776 }, { "epoch": 0.6624432549630733, "grad_norm": 6.570882797241211, "learning_rate": 8.764323362310905e-05, "loss": 1.1202, "step": 9777 }, { "epoch": 0.6625110102310454, "grad_norm": 6.794251918792725, "learning_rate": 8.764186460401123e-05, "loss": 0.7795, "step": 9778 }, { "epoch": 0.6625787654990175, "grad_norm": 5.775513648986816, "learning_rate": 8.764049558491342e-05, "loss": 0.8422, "step": 9779 }, { "epoch": 0.6626465207669896, "grad_norm": 5.90095329284668, "learning_rate": 8.76391265658156e-05, "loss": 0.7624, "step": 9780 }, { "epoch": 0.6627142760349617, "grad_norm": 6.769818305969238, "learning_rate": 8.763775754671778e-05, "loss": 0.7761, "step": 9781 }, { "epoch": 0.6627820313029338, "grad_norm": 5.702734470367432, "learning_rate": 8.763638852761996e-05, "loss": 0.6399, "step": 9782 }, { "epoch": 0.6628497865709059, "grad_norm": 8.020594596862793, "learning_rate": 8.763501950852214e-05, "loss": 0.8082, "step": 9783 }, { "epoch": 0.662917541838878, "grad_norm": 6.043667316436768, "learning_rate": 8.763365048942434e-05, "loss": 0.69, "step": 9784 }, { "epoch": 0.6629852971068501, "grad_norm": 6.844956874847412, "learning_rate": 8.763228147032652e-05, "loss": 0.7028, "step": 9785 }, { "epoch": 0.6630530523748221, "grad_norm": 6.554644584655762, "learning_rate": 8.76309124512287e-05, "loss": 1.0118, "step": 9786 }, { "epoch": 0.6631208076427942, "grad_norm": 6.549968242645264, "learning_rate": 8.762954343213088e-05, "loss": 0.8027, "step": 9787 }, { "epoch": 0.6631885629107663, "grad_norm": 5.934162616729736, "learning_rate": 8.762817441303307e-05, "loss": 0.5964, "step": 9788 }, { "epoch": 0.6632563181787384, "grad_norm": 6.02857780456543, "learning_rate": 8.762680539393525e-05, "loss": 0.997, "step": 9789 }, { "epoch": 0.6633240734467105, "grad_norm": 7.619875907897949, "learning_rate": 8.762543637483743e-05, "loss": 1.0416, "step": 9790 }, { "epoch": 0.6633918287146826, "grad_norm": 7.058054447174072, "learning_rate": 8.762406735573961e-05, "loss": 0.7885, "step": 9791 }, { "epoch": 0.6634595839826547, "grad_norm": 7.8069353103637695, "learning_rate": 8.762269833664179e-05, "loss": 0.8077, "step": 9792 }, { "epoch": 0.6635273392506268, "grad_norm": 6.246554851531982, "learning_rate": 8.762132931754399e-05, "loss": 0.8138, "step": 9793 }, { "epoch": 0.6635950945185988, "grad_norm": 7.489177227020264, "learning_rate": 8.761996029844617e-05, "loss": 0.8638, "step": 9794 }, { "epoch": 0.6636628497865709, "grad_norm": 7.148414134979248, "learning_rate": 8.761859127934835e-05, "loss": 0.7998, "step": 9795 }, { "epoch": 0.663730605054543, "grad_norm": 6.611279010772705, "learning_rate": 8.761722226025053e-05, "loss": 0.6272, "step": 9796 }, { "epoch": 0.663798360322515, "grad_norm": 7.739448070526123, "learning_rate": 8.761585324115272e-05, "loss": 0.7987, "step": 9797 }, { "epoch": 0.6638661155904871, "grad_norm": 4.832591533660889, "learning_rate": 8.76144842220549e-05, "loss": 0.5663, "step": 9798 }, { "epoch": 0.6639338708584592, "grad_norm": 4.939646244049072, "learning_rate": 8.761311520295708e-05, "loss": 0.5951, "step": 9799 }, { "epoch": 0.6640016261264313, "grad_norm": 6.326164245605469, "learning_rate": 8.761174618385926e-05, "loss": 0.9925, "step": 9800 }, { "epoch": 0.6640693813944034, "grad_norm": 5.790089130401611, "learning_rate": 8.761037716476144e-05, "loss": 1.0175, "step": 9801 }, { "epoch": 0.6641371366623755, "grad_norm": 6.139710903167725, "learning_rate": 8.760900814566364e-05, "loss": 0.7322, "step": 9802 }, { "epoch": 0.6642048919303476, "grad_norm": 6.73042106628418, "learning_rate": 8.760763912656582e-05, "loss": 0.7969, "step": 9803 }, { "epoch": 0.6642726471983197, "grad_norm": 6.9577765464782715, "learning_rate": 8.7606270107468e-05, "loss": 0.7294, "step": 9804 }, { "epoch": 0.6643404024662918, "grad_norm": 7.609181880950928, "learning_rate": 8.760490108837019e-05, "loss": 0.9833, "step": 9805 }, { "epoch": 0.6644081577342639, "grad_norm": 5.679005146026611, "learning_rate": 8.760353206927237e-05, "loss": 0.8292, "step": 9806 }, { "epoch": 0.664475913002236, "grad_norm": 6.903763771057129, "learning_rate": 8.760216305017455e-05, "loss": 0.8298, "step": 9807 }, { "epoch": 0.664543668270208, "grad_norm": 5.705727577209473, "learning_rate": 8.760079403107674e-05, "loss": 0.8319, "step": 9808 }, { "epoch": 0.6646114235381801, "grad_norm": 5.971374988555908, "learning_rate": 8.759942501197892e-05, "loss": 0.8034, "step": 9809 }, { "epoch": 0.6646791788061521, "grad_norm": 4.7771806716918945, "learning_rate": 8.75980559928811e-05, "loss": 0.8539, "step": 9810 }, { "epoch": 0.6647469340741242, "grad_norm": 6.891615867614746, "learning_rate": 8.75966869737833e-05, "loss": 0.667, "step": 9811 }, { "epoch": 0.6648146893420963, "grad_norm": 6.467392444610596, "learning_rate": 8.759531795468548e-05, "loss": 0.8197, "step": 9812 }, { "epoch": 0.6648824446100684, "grad_norm": 5.87471342086792, "learning_rate": 8.759394893558766e-05, "loss": 0.9105, "step": 9813 }, { "epoch": 0.6649501998780405, "grad_norm": 5.994086265563965, "learning_rate": 8.759257991648984e-05, "loss": 0.8333, "step": 9814 }, { "epoch": 0.6650179551460126, "grad_norm": 6.536230564117432, "learning_rate": 8.759121089739202e-05, "loss": 1.0726, "step": 9815 }, { "epoch": 0.6650857104139847, "grad_norm": 7.012213230133057, "learning_rate": 8.758984187829421e-05, "loss": 1.0442, "step": 9816 }, { "epoch": 0.6651534656819568, "grad_norm": 6.069978713989258, "learning_rate": 8.75884728591964e-05, "loss": 0.6364, "step": 9817 }, { "epoch": 0.6652212209499289, "grad_norm": 5.200802803039551, "learning_rate": 8.758710384009858e-05, "loss": 0.6563, "step": 9818 }, { "epoch": 0.665288976217901, "grad_norm": 6.931329727172852, "learning_rate": 8.758573482100076e-05, "loss": 0.8925, "step": 9819 }, { "epoch": 0.665356731485873, "grad_norm": 6.1655731201171875, "learning_rate": 8.758436580190295e-05, "loss": 0.7658, "step": 9820 }, { "epoch": 0.6654244867538451, "grad_norm": 6.437480926513672, "learning_rate": 8.758299678280513e-05, "loss": 0.8279, "step": 9821 }, { "epoch": 0.6654922420218172, "grad_norm": 5.941578388214111, "learning_rate": 8.758162776370731e-05, "loss": 0.7095, "step": 9822 }, { "epoch": 0.6655599972897893, "grad_norm": 6.083083152770996, "learning_rate": 8.758025874460949e-05, "loss": 0.9792, "step": 9823 }, { "epoch": 0.6656277525577614, "grad_norm": 5.512528419494629, "learning_rate": 8.757888972551167e-05, "loss": 0.8771, "step": 9824 }, { "epoch": 0.6656955078257335, "grad_norm": 5.142529487609863, "learning_rate": 8.757752070641386e-05, "loss": 0.7935, "step": 9825 }, { "epoch": 0.6657632630937055, "grad_norm": 5.460729122161865, "learning_rate": 8.757615168731604e-05, "loss": 0.9105, "step": 9826 }, { "epoch": 0.6658310183616776, "grad_norm": 8.098207473754883, "learning_rate": 8.757478266821823e-05, "loss": 0.8831, "step": 9827 }, { "epoch": 0.6658987736296497, "grad_norm": 5.392515659332275, "learning_rate": 8.75734136491204e-05, "loss": 0.8327, "step": 9828 }, { "epoch": 0.6659665288976218, "grad_norm": 4.9883294105529785, "learning_rate": 8.75720446300226e-05, "loss": 0.7598, "step": 9829 }, { "epoch": 0.6660342841655938, "grad_norm": 5.96735954284668, "learning_rate": 8.757067561092478e-05, "loss": 0.9694, "step": 9830 }, { "epoch": 0.6661020394335659, "grad_norm": 6.818420886993408, "learning_rate": 8.756930659182696e-05, "loss": 0.9571, "step": 9831 }, { "epoch": 0.666169794701538, "grad_norm": 6.297762870788574, "learning_rate": 8.756793757272914e-05, "loss": 0.6912, "step": 9832 }, { "epoch": 0.6662375499695101, "grad_norm": 6.834626197814941, "learning_rate": 8.756656855363132e-05, "loss": 0.8697, "step": 9833 }, { "epoch": 0.6663053052374822, "grad_norm": 6.614404678344727, "learning_rate": 8.756519953453351e-05, "loss": 0.9481, "step": 9834 }, { "epoch": 0.6663730605054543, "grad_norm": 6.524447441101074, "learning_rate": 8.75638305154357e-05, "loss": 0.7346, "step": 9835 }, { "epoch": 0.6664408157734264, "grad_norm": 7.389026641845703, "learning_rate": 8.756246149633788e-05, "loss": 1.0821, "step": 9836 }, { "epoch": 0.6665085710413985, "grad_norm": 5.654766082763672, "learning_rate": 8.756109247724006e-05, "loss": 0.8859, "step": 9837 }, { "epoch": 0.6665763263093706, "grad_norm": 6.345546722412109, "learning_rate": 8.755972345814224e-05, "loss": 0.8386, "step": 9838 }, { "epoch": 0.6666440815773427, "grad_norm": 5.268699645996094, "learning_rate": 8.755835443904443e-05, "loss": 0.6937, "step": 9839 }, { "epoch": 0.6667118368453148, "grad_norm": 5.590981960296631, "learning_rate": 8.755698541994661e-05, "loss": 0.7624, "step": 9840 }, { "epoch": 0.6667795921132869, "grad_norm": 7.142122268676758, "learning_rate": 8.755561640084879e-05, "loss": 0.807, "step": 9841 }, { "epoch": 0.666847347381259, "grad_norm": 7.878788948059082, "learning_rate": 8.755424738175097e-05, "loss": 0.6766, "step": 9842 }, { "epoch": 0.6669151026492309, "grad_norm": 7.469075679779053, "learning_rate": 8.755287836265316e-05, "loss": 0.7736, "step": 9843 }, { "epoch": 0.666982857917203, "grad_norm": 6.271559715270996, "learning_rate": 8.755150934355535e-05, "loss": 1.1519, "step": 9844 }, { "epoch": 0.6670506131851751, "grad_norm": 5.424012660980225, "learning_rate": 8.755014032445753e-05, "loss": 0.6141, "step": 9845 }, { "epoch": 0.6671183684531472, "grad_norm": 6.098863124847412, "learning_rate": 8.75487713053597e-05, "loss": 0.8144, "step": 9846 }, { "epoch": 0.6671861237211193, "grad_norm": 7.099893569946289, "learning_rate": 8.754740228626189e-05, "loss": 0.897, "step": 9847 }, { "epoch": 0.6672538789890914, "grad_norm": 4.874231338500977, "learning_rate": 8.754603326716408e-05, "loss": 0.7885, "step": 9848 }, { "epoch": 0.6673216342570635, "grad_norm": 7.58192777633667, "learning_rate": 8.754466424806626e-05, "loss": 0.7326, "step": 9849 }, { "epoch": 0.6673893895250356, "grad_norm": 5.8943047523498535, "learning_rate": 8.754329522896844e-05, "loss": 0.8714, "step": 9850 }, { "epoch": 0.6674571447930077, "grad_norm": 7.8243865966796875, "learning_rate": 8.754192620987063e-05, "loss": 0.749, "step": 9851 }, { "epoch": 0.6675249000609798, "grad_norm": 6.121888160705566, "learning_rate": 8.754055719077282e-05, "loss": 0.7823, "step": 9852 }, { "epoch": 0.6675926553289518, "grad_norm": 8.671948432922363, "learning_rate": 8.7539188171675e-05, "loss": 0.7592, "step": 9853 }, { "epoch": 0.6676604105969239, "grad_norm": 7.32505989074707, "learning_rate": 8.753781915257719e-05, "loss": 0.816, "step": 9854 }, { "epoch": 0.667728165864896, "grad_norm": 5.141097545623779, "learning_rate": 8.753645013347937e-05, "loss": 0.913, "step": 9855 }, { "epoch": 0.6677959211328681, "grad_norm": 6.519028186798096, "learning_rate": 8.753508111438155e-05, "loss": 0.8542, "step": 9856 }, { "epoch": 0.6678636764008402, "grad_norm": 5.298619747161865, "learning_rate": 8.753371209528374e-05, "loss": 0.8773, "step": 9857 }, { "epoch": 0.6679314316688123, "grad_norm": 4.540358066558838, "learning_rate": 8.753234307618592e-05, "loss": 0.5887, "step": 9858 }, { "epoch": 0.6679991869367843, "grad_norm": 7.409801483154297, "learning_rate": 8.75309740570881e-05, "loss": 0.7, "step": 9859 }, { "epoch": 0.6680669422047564, "grad_norm": 7.78483772277832, "learning_rate": 8.752960503799028e-05, "loss": 0.9825, "step": 9860 }, { "epoch": 0.6681346974727285, "grad_norm": 4.964968204498291, "learning_rate": 8.752823601889248e-05, "loss": 0.8111, "step": 9861 }, { "epoch": 0.6682024527407006, "grad_norm": 6.426743984222412, "learning_rate": 8.752686699979466e-05, "loss": 1.0148, "step": 9862 }, { "epoch": 0.6682702080086727, "grad_norm": 5.606266975402832, "learning_rate": 8.752549798069684e-05, "loss": 0.7848, "step": 9863 }, { "epoch": 0.6683379632766447, "grad_norm": 6.663419723510742, "learning_rate": 8.752412896159902e-05, "loss": 0.5602, "step": 9864 }, { "epoch": 0.6684057185446168, "grad_norm": 5.719392776489258, "learning_rate": 8.75227599425012e-05, "loss": 0.6434, "step": 9865 }, { "epoch": 0.6684734738125889, "grad_norm": 6.0621747970581055, "learning_rate": 8.75213909234034e-05, "loss": 0.7172, "step": 9866 }, { "epoch": 0.668541229080561, "grad_norm": 7.197140216827393, "learning_rate": 8.752002190430557e-05, "loss": 0.9543, "step": 9867 }, { "epoch": 0.6686089843485331, "grad_norm": 7.329906940460205, "learning_rate": 8.751865288520775e-05, "loss": 0.8873, "step": 9868 }, { "epoch": 0.6686767396165052, "grad_norm": 8.936700820922852, "learning_rate": 8.751728386610994e-05, "loss": 0.8918, "step": 9869 }, { "epoch": 0.6687444948844773, "grad_norm": 5.39838171005249, "learning_rate": 8.751591484701212e-05, "loss": 1.0986, "step": 9870 }, { "epoch": 0.6688122501524494, "grad_norm": 7.213238716125488, "learning_rate": 8.751454582791431e-05, "loss": 0.9257, "step": 9871 }, { "epoch": 0.6688800054204215, "grad_norm": 7.646907806396484, "learning_rate": 8.751317680881649e-05, "loss": 0.8563, "step": 9872 }, { "epoch": 0.6689477606883936, "grad_norm": 6.506889343261719, "learning_rate": 8.751180778971867e-05, "loss": 0.8348, "step": 9873 }, { "epoch": 0.6690155159563657, "grad_norm": 8.101831436157227, "learning_rate": 8.751043877062085e-05, "loss": 0.8127, "step": 9874 }, { "epoch": 0.6690832712243376, "grad_norm": 7.347453594207764, "learning_rate": 8.750906975152304e-05, "loss": 0.768, "step": 9875 }, { "epoch": 0.6691510264923097, "grad_norm": 6.852962970733643, "learning_rate": 8.750770073242522e-05, "loss": 0.8699, "step": 9876 }, { "epoch": 0.6692187817602818, "grad_norm": 5.527287006378174, "learning_rate": 8.75063317133274e-05, "loss": 0.6164, "step": 9877 }, { "epoch": 0.6692865370282539, "grad_norm": 6.611326217651367, "learning_rate": 8.750496269422959e-05, "loss": 0.715, "step": 9878 }, { "epoch": 0.669354292296226, "grad_norm": 6.488336086273193, "learning_rate": 8.750359367513177e-05, "loss": 0.9815, "step": 9879 }, { "epoch": 0.6694220475641981, "grad_norm": 8.057877540588379, "learning_rate": 8.750222465603396e-05, "loss": 0.9369, "step": 9880 }, { "epoch": 0.6694898028321702, "grad_norm": 7.809643745422363, "learning_rate": 8.750085563693614e-05, "loss": 0.7953, "step": 9881 }, { "epoch": 0.6695575581001423, "grad_norm": 5.484494209289551, "learning_rate": 8.749948661783832e-05, "loss": 0.8527, "step": 9882 }, { "epoch": 0.6696253133681144, "grad_norm": 8.870565414428711, "learning_rate": 8.74981175987405e-05, "loss": 0.7632, "step": 9883 }, { "epoch": 0.6696930686360865, "grad_norm": 5.734538555145264, "learning_rate": 8.74967485796427e-05, "loss": 0.9434, "step": 9884 }, { "epoch": 0.6697608239040586, "grad_norm": 6.731936454772949, "learning_rate": 8.749537956054487e-05, "loss": 0.9988, "step": 9885 }, { "epoch": 0.6698285791720306, "grad_norm": 6.063337326049805, "learning_rate": 8.749401054144706e-05, "loss": 0.6882, "step": 9886 }, { "epoch": 0.6698963344400027, "grad_norm": 6.489291667938232, "learning_rate": 8.749264152234924e-05, "loss": 0.8978, "step": 9887 }, { "epoch": 0.6699640897079748, "grad_norm": 6.419568061828613, "learning_rate": 8.749127250325142e-05, "loss": 0.84, "step": 9888 }, { "epoch": 0.6700318449759469, "grad_norm": 7.085015773773193, "learning_rate": 8.748990348415361e-05, "loss": 1.0001, "step": 9889 }, { "epoch": 0.670099600243919, "grad_norm": 5.279534339904785, "learning_rate": 8.748853446505579e-05, "loss": 0.613, "step": 9890 }, { "epoch": 0.670167355511891, "grad_norm": 7.26226282119751, "learning_rate": 8.748716544595797e-05, "loss": 0.7144, "step": 9891 }, { "epoch": 0.6702351107798631, "grad_norm": 5.066533088684082, "learning_rate": 8.748579642686015e-05, "loss": 0.8349, "step": 9892 }, { "epoch": 0.6703028660478352, "grad_norm": 5.847897052764893, "learning_rate": 8.748442740776233e-05, "loss": 0.778, "step": 9893 }, { "epoch": 0.6703706213158073, "grad_norm": 7.026902198791504, "learning_rate": 8.748305838866452e-05, "loss": 0.7131, "step": 9894 }, { "epoch": 0.6704383765837794, "grad_norm": 9.44549560546875, "learning_rate": 8.74816893695667e-05, "loss": 0.7093, "step": 9895 }, { "epoch": 0.6705061318517515, "grad_norm": 5.592066764831543, "learning_rate": 8.748032035046889e-05, "loss": 0.7979, "step": 9896 }, { "epoch": 0.6705738871197235, "grad_norm": 7.530812740325928, "learning_rate": 8.747895133137108e-05, "loss": 0.7899, "step": 9897 }, { "epoch": 0.6706416423876956, "grad_norm": 7.456013202667236, "learning_rate": 8.747758231227326e-05, "loss": 0.9772, "step": 9898 }, { "epoch": 0.6707093976556677, "grad_norm": 7.631428241729736, "learning_rate": 8.747621329317544e-05, "loss": 0.9485, "step": 9899 }, { "epoch": 0.6707771529236398, "grad_norm": 5.919754981994629, "learning_rate": 8.747484427407763e-05, "loss": 0.8696, "step": 9900 }, { "epoch": 0.6708449081916119, "grad_norm": 6.412426948547363, "learning_rate": 8.747347525497981e-05, "loss": 0.8179, "step": 9901 }, { "epoch": 0.670912663459584, "grad_norm": 5.753444671630859, "learning_rate": 8.7472106235882e-05, "loss": 0.7817, "step": 9902 }, { "epoch": 0.6709804187275561, "grad_norm": 6.622903347015381, "learning_rate": 8.747073721678419e-05, "loss": 0.8184, "step": 9903 }, { "epoch": 0.6710481739955282, "grad_norm": 8.03353500366211, "learning_rate": 8.746936819768637e-05, "loss": 0.9982, "step": 9904 }, { "epoch": 0.6711159292635003, "grad_norm": 7.467156410217285, "learning_rate": 8.746799917858855e-05, "loss": 0.6797, "step": 9905 }, { "epoch": 0.6711836845314724, "grad_norm": 5.091195106506348, "learning_rate": 8.746663015949073e-05, "loss": 0.6776, "step": 9906 }, { "epoch": 0.6712514397994445, "grad_norm": 6.8875627517700195, "learning_rate": 8.746526114039292e-05, "loss": 0.926, "step": 9907 }, { "epoch": 0.6713191950674164, "grad_norm": 6.183489799499512, "learning_rate": 8.74638921212951e-05, "loss": 0.9849, "step": 9908 }, { "epoch": 0.6713869503353885, "grad_norm": 6.648115634918213, "learning_rate": 8.746252310219728e-05, "loss": 0.789, "step": 9909 }, { "epoch": 0.6714547056033606, "grad_norm": 5.949080944061279, "learning_rate": 8.746115408309946e-05, "loss": 0.6008, "step": 9910 }, { "epoch": 0.6715224608713327, "grad_norm": 6.88314151763916, "learning_rate": 8.745978506400164e-05, "loss": 0.758, "step": 9911 }, { "epoch": 0.6715902161393048, "grad_norm": 7.288704872131348, "learning_rate": 8.745841604490384e-05, "loss": 0.8002, "step": 9912 }, { "epoch": 0.6716579714072769, "grad_norm": 6.439271926879883, "learning_rate": 8.745704702580602e-05, "loss": 0.9295, "step": 9913 }, { "epoch": 0.671725726675249, "grad_norm": 6.891064643859863, "learning_rate": 8.74556780067082e-05, "loss": 0.7798, "step": 9914 }, { "epoch": 0.6717934819432211, "grad_norm": 7.882637977600098, "learning_rate": 8.745430898761038e-05, "loss": 1.0254, "step": 9915 }, { "epoch": 0.6718612372111932, "grad_norm": 6.752426624298096, "learning_rate": 8.745293996851256e-05, "loss": 0.7995, "step": 9916 }, { "epoch": 0.6719289924791653, "grad_norm": 5.643410682678223, "learning_rate": 8.745157094941475e-05, "loss": 0.7354, "step": 9917 }, { "epoch": 0.6719967477471374, "grad_norm": 7.521575927734375, "learning_rate": 8.745020193031693e-05, "loss": 0.7256, "step": 9918 }, { "epoch": 0.6720645030151094, "grad_norm": 7.460186004638672, "learning_rate": 8.744883291121911e-05, "loss": 1.063, "step": 9919 }, { "epoch": 0.6721322582830815, "grad_norm": 4.6668477058410645, "learning_rate": 8.74474638921213e-05, "loss": 0.6914, "step": 9920 }, { "epoch": 0.6722000135510536, "grad_norm": 7.0220160484313965, "learning_rate": 8.744609487302349e-05, "loss": 0.7925, "step": 9921 }, { "epoch": 0.6722677688190257, "grad_norm": 7.594117641448975, "learning_rate": 8.744472585392567e-05, "loss": 0.9337, "step": 9922 }, { "epoch": 0.6723355240869978, "grad_norm": 7.195069313049316, "learning_rate": 8.744335683482785e-05, "loss": 0.9651, "step": 9923 }, { "epoch": 0.6724032793549698, "grad_norm": 5.2893853187561035, "learning_rate": 8.744198781573003e-05, "loss": 0.7974, "step": 9924 }, { "epoch": 0.6724710346229419, "grad_norm": 7.59480619430542, "learning_rate": 8.744061879663221e-05, "loss": 0.9182, "step": 9925 }, { "epoch": 0.672538789890914, "grad_norm": 5.582269668579102, "learning_rate": 8.74392497775344e-05, "loss": 0.8509, "step": 9926 }, { "epoch": 0.6726065451588861, "grad_norm": 6.992897033691406, "learning_rate": 8.743788075843658e-05, "loss": 0.8779, "step": 9927 }, { "epoch": 0.6726743004268582, "grad_norm": 7.686391353607178, "learning_rate": 8.743651173933876e-05, "loss": 1.0611, "step": 9928 }, { "epoch": 0.6727420556948303, "grad_norm": 6.090497016906738, "learning_rate": 8.743514272024095e-05, "loss": 0.9857, "step": 9929 }, { "epoch": 0.6728098109628023, "grad_norm": 7.6027936935424805, "learning_rate": 8.743377370114314e-05, "loss": 1.0939, "step": 9930 }, { "epoch": 0.6728775662307744, "grad_norm": 6.2990593910217285, "learning_rate": 8.743240468204532e-05, "loss": 0.8225, "step": 9931 }, { "epoch": 0.6729453214987465, "grad_norm": 6.154484272003174, "learning_rate": 8.74310356629475e-05, "loss": 1.2153, "step": 9932 }, { "epoch": 0.6730130767667186, "grad_norm": 5.471065044403076, "learning_rate": 8.742966664384968e-05, "loss": 0.9146, "step": 9933 }, { "epoch": 0.6730808320346907, "grad_norm": 7.006470680236816, "learning_rate": 8.742829762475186e-05, "loss": 0.8704, "step": 9934 }, { "epoch": 0.6731485873026628, "grad_norm": 6.060577869415283, "learning_rate": 8.742692860565405e-05, "loss": 1.0097, "step": 9935 }, { "epoch": 0.6732163425706349, "grad_norm": 6.05491304397583, "learning_rate": 8.742555958655623e-05, "loss": 0.7488, "step": 9936 }, { "epoch": 0.673284097838607, "grad_norm": 4.715060710906982, "learning_rate": 8.742419056745842e-05, "loss": 0.6577, "step": 9937 }, { "epoch": 0.6733518531065791, "grad_norm": 5.644432544708252, "learning_rate": 8.74228215483606e-05, "loss": 0.6954, "step": 9938 }, { "epoch": 0.6734196083745512, "grad_norm": 6.377723217010498, "learning_rate": 8.742145252926279e-05, "loss": 0.7948, "step": 9939 }, { "epoch": 0.6734873636425232, "grad_norm": 4.606221675872803, "learning_rate": 8.742008351016497e-05, "loss": 0.8975, "step": 9940 }, { "epoch": 0.6735551189104952, "grad_norm": 7.170515537261963, "learning_rate": 8.741871449106715e-05, "loss": 0.8839, "step": 9941 }, { "epoch": 0.6736228741784673, "grad_norm": 5.995856761932373, "learning_rate": 8.741734547196933e-05, "loss": 0.8133, "step": 9942 }, { "epoch": 0.6736906294464394, "grad_norm": 5.5055036544799805, "learning_rate": 8.741597645287152e-05, "loss": 0.5468, "step": 9943 }, { "epoch": 0.6737583847144115, "grad_norm": 8.358444213867188, "learning_rate": 8.74146074337737e-05, "loss": 0.9229, "step": 9944 }, { "epoch": 0.6738261399823836, "grad_norm": 6.350693225860596, "learning_rate": 8.741323841467588e-05, "loss": 0.9045, "step": 9945 }, { "epoch": 0.6738938952503557, "grad_norm": 8.909754753112793, "learning_rate": 8.741186939557808e-05, "loss": 0.8389, "step": 9946 }, { "epoch": 0.6739616505183278, "grad_norm": 6.272004127502441, "learning_rate": 8.741050037648026e-05, "loss": 0.8733, "step": 9947 }, { "epoch": 0.6740294057862999, "grad_norm": 7.538585662841797, "learning_rate": 8.740913135738244e-05, "loss": 0.7856, "step": 9948 }, { "epoch": 0.674097161054272, "grad_norm": 8.257597923278809, "learning_rate": 8.740776233828463e-05, "loss": 0.9473, "step": 9949 }, { "epoch": 0.6741649163222441, "grad_norm": 8.649487495422363, "learning_rate": 8.740639331918681e-05, "loss": 0.874, "step": 9950 }, { "epoch": 0.6742326715902162, "grad_norm": 7.1884765625, "learning_rate": 8.7405024300089e-05, "loss": 0.8108, "step": 9951 }, { "epoch": 0.6743004268581883, "grad_norm": 6.152512550354004, "learning_rate": 8.740365528099117e-05, "loss": 1.0727, "step": 9952 }, { "epoch": 0.6743681821261603, "grad_norm": 6.419736385345459, "learning_rate": 8.740228626189337e-05, "loss": 0.6842, "step": 9953 }, { "epoch": 0.6744359373941324, "grad_norm": 7.59826135635376, "learning_rate": 8.740091724279555e-05, "loss": 0.9312, "step": 9954 }, { "epoch": 0.6745036926621045, "grad_norm": 6.706602573394775, "learning_rate": 8.739954822369773e-05, "loss": 0.9033, "step": 9955 }, { "epoch": 0.6745714479300766, "grad_norm": 6.8712992668151855, "learning_rate": 8.739817920459991e-05, "loss": 0.8354, "step": 9956 }, { "epoch": 0.6746392031980486, "grad_norm": 4.86088228225708, "learning_rate": 8.739681018550209e-05, "loss": 0.7645, "step": 9957 }, { "epoch": 0.6747069584660207, "grad_norm": 4.672707557678223, "learning_rate": 8.739544116640428e-05, "loss": 0.5008, "step": 9958 }, { "epoch": 0.6747747137339928, "grad_norm": 7.2205634117126465, "learning_rate": 8.739407214730646e-05, "loss": 0.6439, "step": 9959 }, { "epoch": 0.6748424690019649, "grad_norm": 7.193275451660156, "learning_rate": 8.739270312820864e-05, "loss": 0.6493, "step": 9960 }, { "epoch": 0.674910224269937, "grad_norm": 7.060335159301758, "learning_rate": 8.739133410911082e-05, "loss": 1.0086, "step": 9961 }, { "epoch": 0.6749779795379091, "grad_norm": 8.672296524047852, "learning_rate": 8.738996509001302e-05, "loss": 0.8854, "step": 9962 }, { "epoch": 0.6750457348058811, "grad_norm": 5.923908710479736, "learning_rate": 8.73885960709152e-05, "loss": 0.7789, "step": 9963 }, { "epoch": 0.6751134900738532, "grad_norm": 5.861504554748535, "learning_rate": 8.738722705181738e-05, "loss": 0.7219, "step": 9964 }, { "epoch": 0.6751812453418253, "grad_norm": 8.418654441833496, "learning_rate": 8.738585803271956e-05, "loss": 0.7766, "step": 9965 }, { "epoch": 0.6752490006097974, "grad_norm": 6.130760669708252, "learning_rate": 8.738448901362174e-05, "loss": 0.6711, "step": 9966 }, { "epoch": 0.6753167558777695, "grad_norm": 6.2231526374816895, "learning_rate": 8.738311999452393e-05, "loss": 0.764, "step": 9967 }, { "epoch": 0.6753845111457416, "grad_norm": 6.3183417320251465, "learning_rate": 8.738175097542611e-05, "loss": 0.6871, "step": 9968 }, { "epoch": 0.6754522664137137, "grad_norm": 7.590784549713135, "learning_rate": 8.73803819563283e-05, "loss": 0.8666, "step": 9969 }, { "epoch": 0.6755200216816858, "grad_norm": 5.8160600662231445, "learning_rate": 8.737901293723047e-05, "loss": 1.0468, "step": 9970 }, { "epoch": 0.6755877769496579, "grad_norm": 7.236125469207764, "learning_rate": 8.737764391813266e-05, "loss": 0.6594, "step": 9971 }, { "epoch": 0.67565553221763, "grad_norm": 7.984396934509277, "learning_rate": 8.737627489903485e-05, "loss": 0.8579, "step": 9972 }, { "epoch": 0.675723287485602, "grad_norm": 6.760086536407471, "learning_rate": 8.737490587993703e-05, "loss": 0.6263, "step": 9973 }, { "epoch": 0.675791042753574, "grad_norm": 6.506689071655273, "learning_rate": 8.737353686083921e-05, "loss": 0.8551, "step": 9974 }, { "epoch": 0.6758587980215461, "grad_norm": 6.718931674957275, "learning_rate": 8.737216784174139e-05, "loss": 0.8464, "step": 9975 }, { "epoch": 0.6759265532895182, "grad_norm": 7.459061145782471, "learning_rate": 8.737079882264358e-05, "loss": 0.8845, "step": 9976 }, { "epoch": 0.6759943085574903, "grad_norm": 6.885556221008301, "learning_rate": 8.736942980354576e-05, "loss": 0.963, "step": 9977 }, { "epoch": 0.6760620638254624, "grad_norm": 6.1340861320495605, "learning_rate": 8.736806078444794e-05, "loss": 0.8624, "step": 9978 }, { "epoch": 0.6761298190934345, "grad_norm": 6.667585372924805, "learning_rate": 8.736669176535012e-05, "loss": 0.9265, "step": 9979 }, { "epoch": 0.6761975743614066, "grad_norm": 5.923494338989258, "learning_rate": 8.73653227462523e-05, "loss": 0.9644, "step": 9980 }, { "epoch": 0.6762653296293787, "grad_norm": 6.134291648864746, "learning_rate": 8.73639537271545e-05, "loss": 0.821, "step": 9981 }, { "epoch": 0.6763330848973508, "grad_norm": 8.043211936950684, "learning_rate": 8.736258470805668e-05, "loss": 1.1772, "step": 9982 }, { "epoch": 0.6764008401653229, "grad_norm": 5.358396053314209, "learning_rate": 8.736121568895886e-05, "loss": 0.8436, "step": 9983 }, { "epoch": 0.676468595433295, "grad_norm": 6.659102916717529, "learning_rate": 8.735984666986104e-05, "loss": 0.9908, "step": 9984 }, { "epoch": 0.676536350701267, "grad_norm": 5.888321876525879, "learning_rate": 8.735847765076323e-05, "loss": 0.8068, "step": 9985 }, { "epoch": 0.6766041059692391, "grad_norm": 6.159606456756592, "learning_rate": 8.735710863166541e-05, "loss": 0.7639, "step": 9986 }, { "epoch": 0.6766718612372112, "grad_norm": 6.044656276702881, "learning_rate": 8.73557396125676e-05, "loss": 1.0384, "step": 9987 }, { "epoch": 0.6767396165051833, "grad_norm": 6.538012504577637, "learning_rate": 8.735437059346978e-05, "loss": 0.9542, "step": 9988 }, { "epoch": 0.6768073717731553, "grad_norm": 6.6876301765441895, "learning_rate": 8.735300157437196e-05, "loss": 0.8471, "step": 9989 }, { "epoch": 0.6768751270411274, "grad_norm": 7.488297462463379, "learning_rate": 8.735163255527415e-05, "loss": 0.797, "step": 9990 }, { "epoch": 0.6769428823090995, "grad_norm": 5.536472320556641, "learning_rate": 8.735026353617633e-05, "loss": 0.7013, "step": 9991 }, { "epoch": 0.6770106375770716, "grad_norm": 6.263519763946533, "learning_rate": 8.734889451707851e-05, "loss": 0.991, "step": 9992 }, { "epoch": 0.6770783928450437, "grad_norm": 5.862089157104492, "learning_rate": 8.73475254979807e-05, "loss": 0.7879, "step": 9993 }, { "epoch": 0.6771461481130158, "grad_norm": 4.833024501800537, "learning_rate": 8.734615647888288e-05, "loss": 0.7024, "step": 9994 }, { "epoch": 0.6772139033809879, "grad_norm": 6.159411430358887, "learning_rate": 8.734478745978506e-05, "loss": 0.7301, "step": 9995 }, { "epoch": 0.67728165864896, "grad_norm": 5.821317672729492, "learning_rate": 8.734341844068726e-05, "loss": 0.7049, "step": 9996 }, { "epoch": 0.677349413916932, "grad_norm": 6.7138872146606445, "learning_rate": 8.734204942158944e-05, "loss": 0.9978, "step": 9997 }, { "epoch": 0.6774171691849041, "grad_norm": 6.190992832183838, "learning_rate": 8.734068040249162e-05, "loss": 0.8829, "step": 9998 }, { "epoch": 0.6774849244528762, "grad_norm": 6.912006378173828, "learning_rate": 8.733931138339381e-05, "loss": 0.7127, "step": 9999 }, { "epoch": 0.6775526797208483, "grad_norm": 7.386782169342041, "learning_rate": 8.7337942364296e-05, "loss": 0.7233, "step": 10000 }, { "epoch": 0.6776204349888204, "grad_norm": 6.710719585418701, "learning_rate": 8.733657334519817e-05, "loss": 0.8965, "step": 10001 }, { "epoch": 0.6776881902567925, "grad_norm": 5.841215133666992, "learning_rate": 8.733520432610035e-05, "loss": 1.0521, "step": 10002 }, { "epoch": 0.6777559455247646, "grad_norm": 7.529298782348633, "learning_rate": 8.733383530700253e-05, "loss": 0.9908, "step": 10003 }, { "epoch": 0.6778237007927367, "grad_norm": 6.102165699005127, "learning_rate": 8.733246628790473e-05, "loss": 0.7755, "step": 10004 }, { "epoch": 0.6778914560607088, "grad_norm": 5.879965782165527, "learning_rate": 8.733109726880691e-05, "loss": 0.6404, "step": 10005 }, { "epoch": 0.6779592113286808, "grad_norm": 5.979146480560303, "learning_rate": 8.732972824970909e-05, "loss": 0.6719, "step": 10006 }, { "epoch": 0.6780269665966528, "grad_norm": 6.233981609344482, "learning_rate": 8.732835923061127e-05, "loss": 0.7691, "step": 10007 }, { "epoch": 0.6780947218646249, "grad_norm": 5.87860107421875, "learning_rate": 8.732699021151346e-05, "loss": 0.611, "step": 10008 }, { "epoch": 0.678162477132597, "grad_norm": 5.905231952667236, "learning_rate": 8.732562119241564e-05, "loss": 0.8869, "step": 10009 }, { "epoch": 0.6782302324005691, "grad_norm": 5.827250003814697, "learning_rate": 8.732425217331782e-05, "loss": 0.8758, "step": 10010 }, { "epoch": 0.6782979876685412, "grad_norm": 6.833671569824219, "learning_rate": 8.732288315422e-05, "loss": 0.653, "step": 10011 }, { "epoch": 0.6783657429365133, "grad_norm": 6.360918045043945, "learning_rate": 8.732151413512218e-05, "loss": 0.8103, "step": 10012 }, { "epoch": 0.6784334982044854, "grad_norm": 5.429161548614502, "learning_rate": 8.732014511602438e-05, "loss": 0.742, "step": 10013 }, { "epoch": 0.6785012534724575, "grad_norm": 7.001778602600098, "learning_rate": 8.731877609692656e-05, "loss": 0.7378, "step": 10014 }, { "epoch": 0.6785690087404296, "grad_norm": 6.182924270629883, "learning_rate": 8.731740707782874e-05, "loss": 0.9072, "step": 10015 }, { "epoch": 0.6786367640084017, "grad_norm": 6.020389080047607, "learning_rate": 8.731603805873092e-05, "loss": 0.7117, "step": 10016 }, { "epoch": 0.6787045192763738, "grad_norm": 6.754500865936279, "learning_rate": 8.731466903963311e-05, "loss": 1.0786, "step": 10017 }, { "epoch": 0.6787722745443459, "grad_norm": 6.181823253631592, "learning_rate": 8.73133000205353e-05, "loss": 0.9011, "step": 10018 }, { "epoch": 0.678840029812318, "grad_norm": 5.791422367095947, "learning_rate": 8.731193100143747e-05, "loss": 0.6102, "step": 10019 }, { "epoch": 0.67890778508029, "grad_norm": 6.784679412841797, "learning_rate": 8.731056198233965e-05, "loss": 0.7262, "step": 10020 }, { "epoch": 0.6789755403482621, "grad_norm": 6.07905387878418, "learning_rate": 8.730919296324183e-05, "loss": 1.0617, "step": 10021 }, { "epoch": 0.6790432956162341, "grad_norm": 6.993971347808838, "learning_rate": 8.730782394414403e-05, "loss": 0.9212, "step": 10022 }, { "epoch": 0.6791110508842062, "grad_norm": 6.363950729370117, "learning_rate": 8.730645492504621e-05, "loss": 1.0471, "step": 10023 }, { "epoch": 0.6791788061521783, "grad_norm": 7.276193618774414, "learning_rate": 8.730508590594839e-05, "loss": 0.8397, "step": 10024 }, { "epoch": 0.6792465614201504, "grad_norm": 6.6545491218566895, "learning_rate": 8.730371688685057e-05, "loss": 0.6311, "step": 10025 }, { "epoch": 0.6793143166881225, "grad_norm": 4.888131141662598, "learning_rate": 8.730234786775275e-05, "loss": 0.5807, "step": 10026 }, { "epoch": 0.6793820719560946, "grad_norm": 5.920746326446533, "learning_rate": 8.730097884865494e-05, "loss": 0.624, "step": 10027 }, { "epoch": 0.6794498272240667, "grad_norm": 5.996628761291504, "learning_rate": 8.729960982955712e-05, "loss": 0.831, "step": 10028 }, { "epoch": 0.6795175824920388, "grad_norm": 7.457335472106934, "learning_rate": 8.72982408104593e-05, "loss": 0.7734, "step": 10029 }, { "epoch": 0.6795853377600108, "grad_norm": 5.239184856414795, "learning_rate": 8.729687179136148e-05, "loss": 0.6022, "step": 10030 }, { "epoch": 0.6796530930279829, "grad_norm": 8.256555557250977, "learning_rate": 8.729550277226368e-05, "loss": 1.265, "step": 10031 }, { "epoch": 0.679720848295955, "grad_norm": 5.370911121368408, "learning_rate": 8.729413375316586e-05, "loss": 0.847, "step": 10032 }, { "epoch": 0.6797886035639271, "grad_norm": 6.657277584075928, "learning_rate": 8.729276473406804e-05, "loss": 1.0096, "step": 10033 }, { "epoch": 0.6798563588318992, "grad_norm": 7.417891502380371, "learning_rate": 8.729139571497022e-05, "loss": 0.8401, "step": 10034 }, { "epoch": 0.6799241140998713, "grad_norm": 6.959234237670898, "learning_rate": 8.72900266958724e-05, "loss": 0.8181, "step": 10035 }, { "epoch": 0.6799918693678434, "grad_norm": 6.10018253326416, "learning_rate": 8.72886576767746e-05, "loss": 0.7132, "step": 10036 }, { "epoch": 0.6800596246358155, "grad_norm": 6.869701385498047, "learning_rate": 8.728728865767677e-05, "loss": 0.6435, "step": 10037 }, { "epoch": 0.6801273799037875, "grad_norm": 6.735001087188721, "learning_rate": 8.728591963857895e-05, "loss": 0.628, "step": 10038 }, { "epoch": 0.6801951351717596, "grad_norm": 5.84961462020874, "learning_rate": 8.728455061948115e-05, "loss": 0.7503, "step": 10039 }, { "epoch": 0.6802628904397316, "grad_norm": 6.914517879486084, "learning_rate": 8.728318160038333e-05, "loss": 0.8171, "step": 10040 }, { "epoch": 0.6803306457077037, "grad_norm": 8.97480297088623, "learning_rate": 8.728181258128551e-05, "loss": 0.9635, "step": 10041 }, { "epoch": 0.6803984009756758, "grad_norm": 5.28425407409668, "learning_rate": 8.72804435621877e-05, "loss": 0.656, "step": 10042 }, { "epoch": 0.6804661562436479, "grad_norm": 6.638722896575928, "learning_rate": 8.727907454308988e-05, "loss": 0.797, "step": 10043 }, { "epoch": 0.68053391151162, "grad_norm": 9.306808471679688, "learning_rate": 8.727770552399206e-05, "loss": 1.0276, "step": 10044 }, { "epoch": 0.6806016667795921, "grad_norm": 5.549346446990967, "learning_rate": 8.727633650489426e-05, "loss": 0.7556, "step": 10045 }, { "epoch": 0.6806694220475642, "grad_norm": 9.099546432495117, "learning_rate": 8.727496748579644e-05, "loss": 0.9813, "step": 10046 }, { "epoch": 0.6807371773155363, "grad_norm": 6.115594863891602, "learning_rate": 8.727359846669862e-05, "loss": 0.7509, "step": 10047 }, { "epoch": 0.6808049325835084, "grad_norm": 6.244608402252197, "learning_rate": 8.72722294476008e-05, "loss": 0.8274, "step": 10048 }, { "epoch": 0.6808726878514805, "grad_norm": 5.933996677398682, "learning_rate": 8.727086042850298e-05, "loss": 0.9218, "step": 10049 }, { "epoch": 0.6809404431194526, "grad_norm": 6.216856002807617, "learning_rate": 8.726949140940517e-05, "loss": 0.8434, "step": 10050 }, { "epoch": 0.6810081983874247, "grad_norm": 8.29095458984375, "learning_rate": 8.726812239030735e-05, "loss": 0.9376, "step": 10051 }, { "epoch": 0.6810759536553967, "grad_norm": 6.203293323516846, "learning_rate": 8.726675337120953e-05, "loss": 0.865, "step": 10052 }, { "epoch": 0.6811437089233688, "grad_norm": 7.393670082092285, "learning_rate": 8.726538435211171e-05, "loss": 0.9512, "step": 10053 }, { "epoch": 0.6812114641913409, "grad_norm": 5.229176044464111, "learning_rate": 8.726401533301391e-05, "loss": 0.6983, "step": 10054 }, { "epoch": 0.6812792194593129, "grad_norm": 6.016887187957764, "learning_rate": 8.726264631391609e-05, "loss": 0.8935, "step": 10055 }, { "epoch": 0.681346974727285, "grad_norm": 8.189292907714844, "learning_rate": 8.726127729481827e-05, "loss": 0.9643, "step": 10056 }, { "epoch": 0.6814147299952571, "grad_norm": 7.380198001861572, "learning_rate": 8.725990827572045e-05, "loss": 0.9616, "step": 10057 }, { "epoch": 0.6814824852632292, "grad_norm": 5.0216546058654785, "learning_rate": 8.725853925662263e-05, "loss": 0.9492, "step": 10058 }, { "epoch": 0.6815502405312013, "grad_norm": 4.686762809753418, "learning_rate": 8.725717023752482e-05, "loss": 0.7308, "step": 10059 }, { "epoch": 0.6816179957991734, "grad_norm": 7.183378219604492, "learning_rate": 8.7255801218427e-05, "loss": 0.7267, "step": 10060 }, { "epoch": 0.6816857510671455, "grad_norm": 6.899569988250732, "learning_rate": 8.725443219932918e-05, "loss": 1.2609, "step": 10061 }, { "epoch": 0.6817535063351176, "grad_norm": 6.386919021606445, "learning_rate": 8.725306318023136e-05, "loss": 0.8907, "step": 10062 }, { "epoch": 0.6818212616030896, "grad_norm": 6.532101631164551, "learning_rate": 8.725169416113356e-05, "loss": 0.8595, "step": 10063 }, { "epoch": 0.6818890168710617, "grad_norm": 6.360471725463867, "learning_rate": 8.725032514203574e-05, "loss": 0.8667, "step": 10064 }, { "epoch": 0.6819567721390338, "grad_norm": 6.6123480796813965, "learning_rate": 8.724895612293792e-05, "loss": 0.9812, "step": 10065 }, { "epoch": 0.6820245274070059, "grad_norm": 7.219352722167969, "learning_rate": 8.72475871038401e-05, "loss": 0.8871, "step": 10066 }, { "epoch": 0.682092282674978, "grad_norm": 5.686796188354492, "learning_rate": 8.724621808474228e-05, "loss": 0.6971, "step": 10067 }, { "epoch": 0.6821600379429501, "grad_norm": 5.568495273590088, "learning_rate": 8.724484906564447e-05, "loss": 0.6986, "step": 10068 }, { "epoch": 0.6822277932109222, "grad_norm": 6.997779369354248, "learning_rate": 8.724348004654665e-05, "loss": 1.0181, "step": 10069 }, { "epoch": 0.6822955484788943, "grad_norm": 6.177464962005615, "learning_rate": 8.724211102744883e-05, "loss": 0.8675, "step": 10070 }, { "epoch": 0.6823633037468663, "grad_norm": 5.105398654937744, "learning_rate": 8.724074200835101e-05, "loss": 0.7752, "step": 10071 }, { "epoch": 0.6824310590148384, "grad_norm": 5.883255481719971, "learning_rate": 8.723937298925321e-05, "loss": 0.8301, "step": 10072 }, { "epoch": 0.6824988142828105, "grad_norm": 5.932136058807373, "learning_rate": 8.723800397015539e-05, "loss": 0.7768, "step": 10073 }, { "epoch": 0.6825665695507825, "grad_norm": 6.31164026260376, "learning_rate": 8.723663495105757e-05, "loss": 1.0803, "step": 10074 }, { "epoch": 0.6826343248187546, "grad_norm": 4.6186089515686035, "learning_rate": 8.723526593195975e-05, "loss": 0.7403, "step": 10075 }, { "epoch": 0.6827020800867267, "grad_norm": 6.847713470458984, "learning_rate": 8.723389691286193e-05, "loss": 0.8813, "step": 10076 }, { "epoch": 0.6827698353546988, "grad_norm": 6.186002254486084, "learning_rate": 8.723252789376412e-05, "loss": 0.7373, "step": 10077 }, { "epoch": 0.6828375906226709, "grad_norm": 5.436232089996338, "learning_rate": 8.72311588746663e-05, "loss": 0.8996, "step": 10078 }, { "epoch": 0.682905345890643, "grad_norm": 7.013981819152832, "learning_rate": 8.722978985556848e-05, "loss": 0.6978, "step": 10079 }, { "epoch": 0.6829731011586151, "grad_norm": 6.762590408325195, "learning_rate": 8.722842083647066e-05, "loss": 0.8004, "step": 10080 }, { "epoch": 0.6830408564265872, "grad_norm": 6.398361682891846, "learning_rate": 8.722705181737284e-05, "loss": 0.8928, "step": 10081 }, { "epoch": 0.6831086116945593, "grad_norm": 5.5384111404418945, "learning_rate": 8.722568279827504e-05, "loss": 0.6761, "step": 10082 }, { "epoch": 0.6831763669625314, "grad_norm": 6.1554179191589355, "learning_rate": 8.722431377917722e-05, "loss": 0.8894, "step": 10083 }, { "epoch": 0.6832441222305035, "grad_norm": 7.378477096557617, "learning_rate": 8.72229447600794e-05, "loss": 0.776, "step": 10084 }, { "epoch": 0.6833118774984756, "grad_norm": 5.925946235656738, "learning_rate": 8.72215757409816e-05, "loss": 0.978, "step": 10085 }, { "epoch": 0.6833796327664476, "grad_norm": 9.17393684387207, "learning_rate": 8.722020672188377e-05, "loss": 0.9067, "step": 10086 }, { "epoch": 0.6834473880344196, "grad_norm": 5.253411293029785, "learning_rate": 8.721883770278595e-05, "loss": 0.822, "step": 10087 }, { "epoch": 0.6835151433023917, "grad_norm": 5.1418046951293945, "learning_rate": 8.721746868368815e-05, "loss": 0.7038, "step": 10088 }, { "epoch": 0.6835828985703638, "grad_norm": 6.0158281326293945, "learning_rate": 8.721609966459033e-05, "loss": 1.0841, "step": 10089 }, { "epoch": 0.6836506538383359, "grad_norm": 5.677688121795654, "learning_rate": 8.721473064549251e-05, "loss": 0.7498, "step": 10090 }, { "epoch": 0.683718409106308, "grad_norm": 7.7817254066467285, "learning_rate": 8.72133616263947e-05, "loss": 0.5788, "step": 10091 }, { "epoch": 0.6837861643742801, "grad_norm": 5.363152503967285, "learning_rate": 8.721199260729688e-05, "loss": 0.7424, "step": 10092 }, { "epoch": 0.6838539196422522, "grad_norm": 6.539010524749756, "learning_rate": 8.721062358819906e-05, "loss": 0.8402, "step": 10093 }, { "epoch": 0.6839216749102243, "grad_norm": 5.907912254333496, "learning_rate": 8.720925456910124e-05, "loss": 0.9475, "step": 10094 }, { "epoch": 0.6839894301781964, "grad_norm": 6.317841529846191, "learning_rate": 8.720788555000344e-05, "loss": 0.7352, "step": 10095 }, { "epoch": 0.6840571854461684, "grad_norm": 6.071649074554443, "learning_rate": 8.720651653090562e-05, "loss": 0.7539, "step": 10096 }, { "epoch": 0.6841249407141405, "grad_norm": 7.052052974700928, "learning_rate": 8.72051475118078e-05, "loss": 0.788, "step": 10097 }, { "epoch": 0.6841926959821126, "grad_norm": 5.975690841674805, "learning_rate": 8.720377849270998e-05, "loss": 0.8239, "step": 10098 }, { "epoch": 0.6842604512500847, "grad_norm": 5.96523904800415, "learning_rate": 8.720240947361216e-05, "loss": 0.9525, "step": 10099 }, { "epoch": 0.6843282065180568, "grad_norm": 6.296563148498535, "learning_rate": 8.720104045451435e-05, "loss": 0.691, "step": 10100 }, { "epoch": 0.6843959617860289, "grad_norm": 5.338788986206055, "learning_rate": 8.719967143541653e-05, "loss": 0.7297, "step": 10101 }, { "epoch": 0.684463717054001, "grad_norm": 6.04310417175293, "learning_rate": 8.719830241631871e-05, "loss": 0.8976, "step": 10102 }, { "epoch": 0.684531472321973, "grad_norm": 7.260922431945801, "learning_rate": 8.71969333972209e-05, "loss": 0.804, "step": 10103 }, { "epoch": 0.6845992275899451, "grad_norm": 7.797060489654541, "learning_rate": 8.719556437812307e-05, "loss": 1.1672, "step": 10104 }, { "epoch": 0.6846669828579172, "grad_norm": 4.863615989685059, "learning_rate": 8.719419535902527e-05, "loss": 0.7793, "step": 10105 }, { "epoch": 0.6847347381258893, "grad_norm": 6.105317115783691, "learning_rate": 8.719282633992745e-05, "loss": 0.7928, "step": 10106 }, { "epoch": 0.6848024933938613, "grad_norm": 5.737043380737305, "learning_rate": 8.719145732082963e-05, "loss": 0.627, "step": 10107 }, { "epoch": 0.6848702486618334, "grad_norm": 5.305082321166992, "learning_rate": 8.719008830173181e-05, "loss": 0.6404, "step": 10108 }, { "epoch": 0.6849380039298055, "grad_norm": 6.310640335083008, "learning_rate": 8.7188719282634e-05, "loss": 1.0716, "step": 10109 }, { "epoch": 0.6850057591977776, "grad_norm": 6.286160469055176, "learning_rate": 8.718735026353618e-05, "loss": 0.8632, "step": 10110 }, { "epoch": 0.6850735144657497, "grad_norm": 6.600961208343506, "learning_rate": 8.718598124443836e-05, "loss": 0.7053, "step": 10111 }, { "epoch": 0.6851412697337218, "grad_norm": 7.745927333831787, "learning_rate": 8.718461222534054e-05, "loss": 1.3105, "step": 10112 }, { "epoch": 0.6852090250016939, "grad_norm": 7.073805332183838, "learning_rate": 8.718324320624272e-05, "loss": 1.0302, "step": 10113 }, { "epoch": 0.685276780269666, "grad_norm": 6.142061233520508, "learning_rate": 8.718187418714492e-05, "loss": 0.7433, "step": 10114 }, { "epoch": 0.6853445355376381, "grad_norm": 6.279247760772705, "learning_rate": 8.71805051680471e-05, "loss": 0.9076, "step": 10115 }, { "epoch": 0.6854122908056102, "grad_norm": 6.235780239105225, "learning_rate": 8.717913614894928e-05, "loss": 1.0124, "step": 10116 }, { "epoch": 0.6854800460735823, "grad_norm": 7.218427658081055, "learning_rate": 8.717776712985146e-05, "loss": 0.8519, "step": 10117 }, { "epoch": 0.6855478013415544, "grad_norm": 5.481386661529541, "learning_rate": 8.717639811075365e-05, "loss": 0.6997, "step": 10118 }, { "epoch": 0.6856155566095264, "grad_norm": 6.176963806152344, "learning_rate": 8.717502909165583e-05, "loss": 0.7735, "step": 10119 }, { "epoch": 0.6856833118774984, "grad_norm": 5.452063083648682, "learning_rate": 8.717366007255801e-05, "loss": 0.6307, "step": 10120 }, { "epoch": 0.6857510671454705, "grad_norm": 8.103320121765137, "learning_rate": 8.71722910534602e-05, "loss": 0.794, "step": 10121 }, { "epoch": 0.6858188224134426, "grad_norm": 7.873292922973633, "learning_rate": 8.717092203436237e-05, "loss": 0.8941, "step": 10122 }, { "epoch": 0.6858865776814147, "grad_norm": 6.97474479675293, "learning_rate": 8.716955301526457e-05, "loss": 0.627, "step": 10123 }, { "epoch": 0.6859543329493868, "grad_norm": 7.643387794494629, "learning_rate": 8.716818399616675e-05, "loss": 0.8326, "step": 10124 }, { "epoch": 0.6860220882173589, "grad_norm": 8.497008323669434, "learning_rate": 8.716681497706893e-05, "loss": 0.959, "step": 10125 }, { "epoch": 0.686089843485331, "grad_norm": 6.50831413269043, "learning_rate": 8.716544595797111e-05, "loss": 0.6411, "step": 10126 }, { "epoch": 0.6861575987533031, "grad_norm": 7.354664325714111, "learning_rate": 8.71640769388733e-05, "loss": 0.9987, "step": 10127 }, { "epoch": 0.6862253540212752, "grad_norm": 7.188365936279297, "learning_rate": 8.716270791977548e-05, "loss": 0.9559, "step": 10128 }, { "epoch": 0.6862931092892472, "grad_norm": 6.465545177459717, "learning_rate": 8.716133890067766e-05, "loss": 0.9607, "step": 10129 }, { "epoch": 0.6863608645572193, "grad_norm": 5.860640048980713, "learning_rate": 8.715996988157984e-05, "loss": 1.0831, "step": 10130 }, { "epoch": 0.6864286198251914, "grad_norm": 5.8733296394348145, "learning_rate": 8.715860086248204e-05, "loss": 0.94, "step": 10131 }, { "epoch": 0.6864963750931635, "grad_norm": 6.498860836029053, "learning_rate": 8.715723184338422e-05, "loss": 0.9037, "step": 10132 }, { "epoch": 0.6865641303611356, "grad_norm": 6.163070201873779, "learning_rate": 8.71558628242864e-05, "loss": 0.7879, "step": 10133 }, { "epoch": 0.6866318856291077, "grad_norm": 5.94657564163208, "learning_rate": 8.715449380518859e-05, "loss": 0.8599, "step": 10134 }, { "epoch": 0.6866996408970798, "grad_norm": 6.023927211761475, "learning_rate": 8.715312478609077e-05, "loss": 0.7475, "step": 10135 }, { "epoch": 0.6867673961650518, "grad_norm": 6.270586967468262, "learning_rate": 8.715175576699295e-05, "loss": 0.754, "step": 10136 }, { "epoch": 0.6868351514330239, "grad_norm": 6.190057277679443, "learning_rate": 8.715038674789515e-05, "loss": 0.6548, "step": 10137 }, { "epoch": 0.686902906700996, "grad_norm": 5.793675899505615, "learning_rate": 8.714901772879733e-05, "loss": 0.5634, "step": 10138 }, { "epoch": 0.686970661968968, "grad_norm": 6.0643310546875, "learning_rate": 8.714764870969951e-05, "loss": 1.0238, "step": 10139 }, { "epoch": 0.6870384172369401, "grad_norm": 5.128119945526123, "learning_rate": 8.714627969060169e-05, "loss": 0.6071, "step": 10140 }, { "epoch": 0.6871061725049122, "grad_norm": 6.435021877288818, "learning_rate": 8.714491067150388e-05, "loss": 0.7108, "step": 10141 }, { "epoch": 0.6871739277728843, "grad_norm": 7.8082122802734375, "learning_rate": 8.714354165240606e-05, "loss": 1.0643, "step": 10142 }, { "epoch": 0.6872416830408564, "grad_norm": 8.007335662841797, "learning_rate": 8.714217263330824e-05, "loss": 0.962, "step": 10143 }, { "epoch": 0.6873094383088285, "grad_norm": 6.111318588256836, "learning_rate": 8.714080361421042e-05, "loss": 0.8697, "step": 10144 }, { "epoch": 0.6873771935768006, "grad_norm": 5.64456844329834, "learning_rate": 8.71394345951126e-05, "loss": 1.128, "step": 10145 }, { "epoch": 0.6874449488447727, "grad_norm": 7.482577323913574, "learning_rate": 8.71380655760148e-05, "loss": 0.7496, "step": 10146 }, { "epoch": 0.6875127041127448, "grad_norm": 5.837367534637451, "learning_rate": 8.713669655691698e-05, "loss": 0.7851, "step": 10147 }, { "epoch": 0.6875804593807169, "grad_norm": 5.765466213226318, "learning_rate": 8.713532753781916e-05, "loss": 0.718, "step": 10148 }, { "epoch": 0.687648214648689, "grad_norm": 7.491219520568848, "learning_rate": 8.713395851872134e-05, "loss": 1.0697, "step": 10149 }, { "epoch": 0.6877159699166611, "grad_norm": 6.724207401275635, "learning_rate": 8.713258949962353e-05, "loss": 0.7963, "step": 10150 }, { "epoch": 0.6877837251846332, "grad_norm": 6.340019226074219, "learning_rate": 8.713122048052571e-05, "loss": 1.0246, "step": 10151 }, { "epoch": 0.6878514804526051, "grad_norm": 4.966742038726807, "learning_rate": 8.712985146142789e-05, "loss": 0.9185, "step": 10152 }, { "epoch": 0.6879192357205772, "grad_norm": 6.308014869689941, "learning_rate": 8.712848244233007e-05, "loss": 0.9262, "step": 10153 }, { "epoch": 0.6879869909885493, "grad_norm": 7.576474189758301, "learning_rate": 8.712711342323225e-05, "loss": 0.7432, "step": 10154 }, { "epoch": 0.6880547462565214, "grad_norm": 5.722362041473389, "learning_rate": 8.712574440413445e-05, "loss": 0.76, "step": 10155 }, { "epoch": 0.6881225015244935, "grad_norm": 4.6947197914123535, "learning_rate": 8.712437538503663e-05, "loss": 0.7429, "step": 10156 }, { "epoch": 0.6881902567924656, "grad_norm": 6.494797229766846, "learning_rate": 8.712300636593881e-05, "loss": 0.9554, "step": 10157 }, { "epoch": 0.6882580120604377, "grad_norm": 7.0943403244018555, "learning_rate": 8.712163734684099e-05, "loss": 0.9698, "step": 10158 }, { "epoch": 0.6883257673284098, "grad_norm": 5.764694690704346, "learning_rate": 8.712026832774317e-05, "loss": 0.5992, "step": 10159 }, { "epoch": 0.6883935225963819, "grad_norm": 8.111281394958496, "learning_rate": 8.711889930864536e-05, "loss": 0.9348, "step": 10160 }, { "epoch": 0.688461277864354, "grad_norm": 6.08704948425293, "learning_rate": 8.711753028954754e-05, "loss": 0.7621, "step": 10161 }, { "epoch": 0.688529033132326, "grad_norm": 7.329418659210205, "learning_rate": 8.711616127044972e-05, "loss": 0.8404, "step": 10162 }, { "epoch": 0.6885967884002981, "grad_norm": 5.368319988250732, "learning_rate": 8.71147922513519e-05, "loss": 0.6699, "step": 10163 }, { "epoch": 0.6886645436682702, "grad_norm": 4.94074821472168, "learning_rate": 8.71134232322541e-05, "loss": 0.7722, "step": 10164 }, { "epoch": 0.6887322989362423, "grad_norm": 7.2699408531188965, "learning_rate": 8.711205421315628e-05, "loss": 0.9826, "step": 10165 }, { "epoch": 0.6888000542042144, "grad_norm": 7.965369701385498, "learning_rate": 8.711068519405846e-05, "loss": 1.0658, "step": 10166 }, { "epoch": 0.6888678094721865, "grad_norm": 6.148140907287598, "learning_rate": 8.710931617496064e-05, "loss": 0.7169, "step": 10167 }, { "epoch": 0.6889355647401586, "grad_norm": 7.394513130187988, "learning_rate": 8.710794715586282e-05, "loss": 1.0415, "step": 10168 }, { "epoch": 0.6890033200081306, "grad_norm": 6.450402736663818, "learning_rate": 8.710657813676501e-05, "loss": 0.8588, "step": 10169 }, { "epoch": 0.6890710752761027, "grad_norm": 7.909549236297607, "learning_rate": 8.71052091176672e-05, "loss": 0.8389, "step": 10170 }, { "epoch": 0.6891388305440748, "grad_norm": 6.7112603187561035, "learning_rate": 8.710384009856937e-05, "loss": 0.8337, "step": 10171 }, { "epoch": 0.6892065858120469, "grad_norm": 5.394613265991211, "learning_rate": 8.710247107947155e-05, "loss": 0.6975, "step": 10172 }, { "epoch": 0.689274341080019, "grad_norm": 7.6131911277771, "learning_rate": 8.710110206037375e-05, "loss": 0.9802, "step": 10173 }, { "epoch": 0.689342096347991, "grad_norm": 5.598437786102295, "learning_rate": 8.709973304127593e-05, "loss": 0.6925, "step": 10174 }, { "epoch": 0.6894098516159631, "grad_norm": 7.252137660980225, "learning_rate": 8.709836402217811e-05, "loss": 0.9988, "step": 10175 }, { "epoch": 0.6894776068839352, "grad_norm": 5.04716682434082, "learning_rate": 8.709699500308029e-05, "loss": 0.6327, "step": 10176 }, { "epoch": 0.6895453621519073, "grad_norm": 5.729875564575195, "learning_rate": 8.709562598398248e-05, "loss": 0.7374, "step": 10177 }, { "epoch": 0.6896131174198794, "grad_norm": 6.3525166511535645, "learning_rate": 8.709425696488466e-05, "loss": 0.9111, "step": 10178 }, { "epoch": 0.6896808726878515, "grad_norm": 8.86587905883789, "learning_rate": 8.709288794578684e-05, "loss": 0.6059, "step": 10179 }, { "epoch": 0.6897486279558236, "grad_norm": 6.739536762237549, "learning_rate": 8.709151892668904e-05, "loss": 0.6525, "step": 10180 }, { "epoch": 0.6898163832237957, "grad_norm": 6.119062900543213, "learning_rate": 8.709014990759122e-05, "loss": 0.8521, "step": 10181 }, { "epoch": 0.6898841384917678, "grad_norm": 6.746237754821777, "learning_rate": 8.70887808884934e-05, "loss": 0.7897, "step": 10182 }, { "epoch": 0.6899518937597399, "grad_norm": 6.002828121185303, "learning_rate": 8.708741186939559e-05, "loss": 0.7147, "step": 10183 }, { "epoch": 0.690019649027712, "grad_norm": 7.710058212280273, "learning_rate": 8.708604285029777e-05, "loss": 0.6674, "step": 10184 }, { "epoch": 0.6900874042956839, "grad_norm": 7.451826572418213, "learning_rate": 8.708467383119995e-05, "loss": 0.8059, "step": 10185 }, { "epoch": 0.690155159563656, "grad_norm": 7.2232465744018555, "learning_rate": 8.708330481210213e-05, "loss": 0.8831, "step": 10186 }, { "epoch": 0.6902229148316281, "grad_norm": 6.742265701293945, "learning_rate": 8.708193579300433e-05, "loss": 0.851, "step": 10187 }, { "epoch": 0.6902906700996002, "grad_norm": 6.47482967376709, "learning_rate": 8.708056677390651e-05, "loss": 0.7428, "step": 10188 }, { "epoch": 0.6903584253675723, "grad_norm": 6.393016815185547, "learning_rate": 8.707919775480869e-05, "loss": 0.9357, "step": 10189 }, { "epoch": 0.6904261806355444, "grad_norm": 5.898126602172852, "learning_rate": 8.707782873571087e-05, "loss": 0.7807, "step": 10190 }, { "epoch": 0.6904939359035165, "grad_norm": 7.400199890136719, "learning_rate": 8.707645971661305e-05, "loss": 0.9482, "step": 10191 }, { "epoch": 0.6905616911714886, "grad_norm": 8.556063652038574, "learning_rate": 8.707509069751524e-05, "loss": 0.7055, "step": 10192 }, { "epoch": 0.6906294464394607, "grad_norm": 7.6027936935424805, "learning_rate": 8.707372167841742e-05, "loss": 0.9158, "step": 10193 }, { "epoch": 0.6906972017074328, "grad_norm": 6.305631637573242, "learning_rate": 8.70723526593196e-05, "loss": 0.8744, "step": 10194 }, { "epoch": 0.6907649569754049, "grad_norm": 5.370072841644287, "learning_rate": 8.707098364022178e-05, "loss": 0.7291, "step": 10195 }, { "epoch": 0.690832712243377, "grad_norm": 6.723821640014648, "learning_rate": 8.706961462112398e-05, "loss": 0.9333, "step": 10196 }, { "epoch": 0.690900467511349, "grad_norm": 6.01531982421875, "learning_rate": 8.706824560202616e-05, "loss": 0.6833, "step": 10197 }, { "epoch": 0.6909682227793211, "grad_norm": 7.747717380523682, "learning_rate": 8.706687658292834e-05, "loss": 1.0669, "step": 10198 }, { "epoch": 0.6910359780472932, "grad_norm": 6.549305438995361, "learning_rate": 8.706550756383052e-05, "loss": 0.6726, "step": 10199 }, { "epoch": 0.6911037333152653, "grad_norm": 5.983778476715088, "learning_rate": 8.70641385447327e-05, "loss": 0.7065, "step": 10200 }, { "epoch": 0.6911714885832373, "grad_norm": 6.709543704986572, "learning_rate": 8.706276952563489e-05, "loss": 0.7668, "step": 10201 }, { "epoch": 0.6912392438512094, "grad_norm": 6.432425498962402, "learning_rate": 8.706140050653707e-05, "loss": 0.8235, "step": 10202 }, { "epoch": 0.6913069991191815, "grad_norm": 6.770932197570801, "learning_rate": 8.706003148743925e-05, "loss": 0.9328, "step": 10203 }, { "epoch": 0.6913747543871536, "grad_norm": 6.075129508972168, "learning_rate": 8.705866246834143e-05, "loss": 0.6302, "step": 10204 }, { "epoch": 0.6914425096551257, "grad_norm": 5.651463985443115, "learning_rate": 8.705729344924363e-05, "loss": 0.8457, "step": 10205 }, { "epoch": 0.6915102649230978, "grad_norm": 4.9870524406433105, "learning_rate": 8.705592443014581e-05, "loss": 0.685, "step": 10206 }, { "epoch": 0.6915780201910698, "grad_norm": 5.86956787109375, "learning_rate": 8.705455541104799e-05, "loss": 0.9293, "step": 10207 }, { "epoch": 0.6916457754590419, "grad_norm": 6.015864849090576, "learning_rate": 8.705318639195017e-05, "loss": 0.7802, "step": 10208 }, { "epoch": 0.691713530727014, "grad_norm": 6.092733383178711, "learning_rate": 8.705181737285235e-05, "loss": 0.9689, "step": 10209 }, { "epoch": 0.6917812859949861, "grad_norm": 5.461453437805176, "learning_rate": 8.705044835375454e-05, "loss": 0.8396, "step": 10210 }, { "epoch": 0.6918490412629582, "grad_norm": 5.914142608642578, "learning_rate": 8.704907933465672e-05, "loss": 0.7936, "step": 10211 }, { "epoch": 0.6919167965309303, "grad_norm": 7.034205436706543, "learning_rate": 8.70477103155589e-05, "loss": 0.9324, "step": 10212 }, { "epoch": 0.6919845517989024, "grad_norm": 5.621762275695801, "learning_rate": 8.704634129646108e-05, "loss": 0.7763, "step": 10213 }, { "epoch": 0.6920523070668745, "grad_norm": 6.53078031539917, "learning_rate": 8.704497227736326e-05, "loss": 0.9933, "step": 10214 }, { "epoch": 0.6921200623348466, "grad_norm": 6.093494415283203, "learning_rate": 8.704360325826546e-05, "loss": 0.8164, "step": 10215 }, { "epoch": 0.6921878176028187, "grad_norm": 5.476284503936768, "learning_rate": 8.704223423916764e-05, "loss": 0.8249, "step": 10216 }, { "epoch": 0.6922555728707908, "grad_norm": 8.038809776306152, "learning_rate": 8.704086522006982e-05, "loss": 1.1032, "step": 10217 }, { "epoch": 0.6923233281387627, "grad_norm": 6.303304672241211, "learning_rate": 8.7039496200972e-05, "loss": 0.8769, "step": 10218 }, { "epoch": 0.6923910834067348, "grad_norm": 5.814499855041504, "learning_rate": 8.703812718187419e-05, "loss": 0.9121, "step": 10219 }, { "epoch": 0.6924588386747069, "grad_norm": 6.704540729522705, "learning_rate": 8.703675816277637e-05, "loss": 1.005, "step": 10220 }, { "epoch": 0.692526593942679, "grad_norm": 5.562582969665527, "learning_rate": 8.703538914367855e-05, "loss": 0.7873, "step": 10221 }, { "epoch": 0.6925943492106511, "grad_norm": 6.583497524261475, "learning_rate": 8.703402012458073e-05, "loss": 0.9385, "step": 10222 }, { "epoch": 0.6926621044786232, "grad_norm": 6.376049518585205, "learning_rate": 8.703265110548291e-05, "loss": 0.9021, "step": 10223 }, { "epoch": 0.6927298597465953, "grad_norm": 5.302101135253906, "learning_rate": 8.703128208638511e-05, "loss": 0.917, "step": 10224 }, { "epoch": 0.6927976150145674, "grad_norm": 7.336282730102539, "learning_rate": 8.702991306728729e-05, "loss": 0.6929, "step": 10225 }, { "epoch": 0.6928653702825395, "grad_norm": 6.04905366897583, "learning_rate": 8.702854404818948e-05, "loss": 0.7451, "step": 10226 }, { "epoch": 0.6929331255505116, "grad_norm": 5.474672317504883, "learning_rate": 8.702717502909166e-05, "loss": 0.7867, "step": 10227 }, { "epoch": 0.6930008808184837, "grad_norm": 5.713932037353516, "learning_rate": 8.702580600999384e-05, "loss": 0.7569, "step": 10228 }, { "epoch": 0.6930686360864557, "grad_norm": 7.172578811645508, "learning_rate": 8.702443699089604e-05, "loss": 0.7756, "step": 10229 }, { "epoch": 0.6931363913544278, "grad_norm": 5.1826019287109375, "learning_rate": 8.702306797179822e-05, "loss": 0.8408, "step": 10230 }, { "epoch": 0.6932041466223999, "grad_norm": 8.113396644592285, "learning_rate": 8.70216989527004e-05, "loss": 0.8555, "step": 10231 }, { "epoch": 0.693271901890372, "grad_norm": 5.396690845489502, "learning_rate": 8.702032993360258e-05, "loss": 0.8125, "step": 10232 }, { "epoch": 0.6933396571583441, "grad_norm": 5.951786518096924, "learning_rate": 8.701896091450477e-05, "loss": 0.8391, "step": 10233 }, { "epoch": 0.6934074124263161, "grad_norm": 7.0163774490356445, "learning_rate": 8.701759189540695e-05, "loss": 0.853, "step": 10234 }, { "epoch": 0.6934751676942882, "grad_norm": 7.563843727111816, "learning_rate": 8.701622287630913e-05, "loss": 0.9711, "step": 10235 }, { "epoch": 0.6935429229622603, "grad_norm": 5.8124284744262695, "learning_rate": 8.701485385721131e-05, "loss": 0.7018, "step": 10236 }, { "epoch": 0.6936106782302324, "grad_norm": 6.22074556350708, "learning_rate": 8.701348483811349e-05, "loss": 0.7756, "step": 10237 }, { "epoch": 0.6936784334982045, "grad_norm": 5.601717472076416, "learning_rate": 8.701211581901569e-05, "loss": 0.7832, "step": 10238 }, { "epoch": 0.6937461887661766, "grad_norm": 7.209207534790039, "learning_rate": 8.701074679991787e-05, "loss": 0.7866, "step": 10239 }, { "epoch": 0.6938139440341486, "grad_norm": 5.176044940948486, "learning_rate": 8.700937778082005e-05, "loss": 0.6875, "step": 10240 }, { "epoch": 0.6938816993021207, "grad_norm": 6.441755771636963, "learning_rate": 8.700800876172223e-05, "loss": 0.6563, "step": 10241 }, { "epoch": 0.6939494545700928, "grad_norm": 5.935150146484375, "learning_rate": 8.700663974262442e-05, "loss": 0.881, "step": 10242 }, { "epoch": 0.6940172098380649, "grad_norm": 6.082694053649902, "learning_rate": 8.70052707235266e-05, "loss": 0.7059, "step": 10243 }, { "epoch": 0.694084965106037, "grad_norm": 7.9285383224487305, "learning_rate": 8.700390170442878e-05, "loss": 0.8529, "step": 10244 }, { "epoch": 0.6941527203740091, "grad_norm": 6.027041435241699, "learning_rate": 8.700253268533096e-05, "loss": 0.7919, "step": 10245 }, { "epoch": 0.6942204756419812, "grad_norm": 6.956554889678955, "learning_rate": 8.700116366623314e-05, "loss": 0.8655, "step": 10246 }, { "epoch": 0.6942882309099533, "grad_norm": 6.508672714233398, "learning_rate": 8.699979464713534e-05, "loss": 0.7306, "step": 10247 }, { "epoch": 0.6943559861779254, "grad_norm": 6.117376804351807, "learning_rate": 8.699842562803752e-05, "loss": 0.7016, "step": 10248 }, { "epoch": 0.6944237414458975, "grad_norm": 5.537294864654541, "learning_rate": 8.69970566089397e-05, "loss": 0.8452, "step": 10249 }, { "epoch": 0.6944914967138694, "grad_norm": 6.4718241691589355, "learning_rate": 8.699568758984188e-05, "loss": 0.9693, "step": 10250 }, { "epoch": 0.6945592519818415, "grad_norm": 6.249986171722412, "learning_rate": 8.699431857074407e-05, "loss": 0.8, "step": 10251 }, { "epoch": 0.6946270072498136, "grad_norm": 5.435842037200928, "learning_rate": 8.699294955164625e-05, "loss": 0.684, "step": 10252 }, { "epoch": 0.6946947625177857, "grad_norm": 7.15748405456543, "learning_rate": 8.699158053254843e-05, "loss": 0.9773, "step": 10253 }, { "epoch": 0.6947625177857578, "grad_norm": 6.881677150726318, "learning_rate": 8.699021151345061e-05, "loss": 0.8775, "step": 10254 }, { "epoch": 0.6948302730537299, "grad_norm": 4.51616096496582, "learning_rate": 8.69888424943528e-05, "loss": 0.615, "step": 10255 }, { "epoch": 0.694898028321702, "grad_norm": 6.824566841125488, "learning_rate": 8.698747347525499e-05, "loss": 0.7369, "step": 10256 }, { "epoch": 0.6949657835896741, "grad_norm": 5.30488395690918, "learning_rate": 8.698610445615717e-05, "loss": 0.7246, "step": 10257 }, { "epoch": 0.6950335388576462, "grad_norm": 7.59017276763916, "learning_rate": 8.698473543705935e-05, "loss": 0.6782, "step": 10258 }, { "epoch": 0.6951012941256183, "grad_norm": 7.0920867919921875, "learning_rate": 8.698336641796153e-05, "loss": 0.8515, "step": 10259 }, { "epoch": 0.6951690493935904, "grad_norm": 6.791457653045654, "learning_rate": 8.698199739886372e-05, "loss": 0.761, "step": 10260 }, { "epoch": 0.6952368046615625, "grad_norm": 5.619175910949707, "learning_rate": 8.69806283797659e-05, "loss": 1.08, "step": 10261 }, { "epoch": 0.6953045599295345, "grad_norm": 6.623924732208252, "learning_rate": 8.697925936066808e-05, "loss": 0.8813, "step": 10262 }, { "epoch": 0.6953723151975066, "grad_norm": 5.582348823547363, "learning_rate": 8.697789034157026e-05, "loss": 0.8114, "step": 10263 }, { "epoch": 0.6954400704654787, "grad_norm": 6.643357753753662, "learning_rate": 8.697652132247244e-05, "loss": 0.8595, "step": 10264 }, { "epoch": 0.6955078257334508, "grad_norm": 5.2722296714782715, "learning_rate": 8.697515230337464e-05, "loss": 0.757, "step": 10265 }, { "epoch": 0.6955755810014229, "grad_norm": 6.885534286499023, "learning_rate": 8.697378328427682e-05, "loss": 0.9969, "step": 10266 }, { "epoch": 0.6956433362693949, "grad_norm": 7.001368522644043, "learning_rate": 8.6972414265179e-05, "loss": 0.9588, "step": 10267 }, { "epoch": 0.695711091537367, "grad_norm": 7.451569557189941, "learning_rate": 8.697104524608118e-05, "loss": 0.7535, "step": 10268 }, { "epoch": 0.6957788468053391, "grad_norm": 7.196292877197266, "learning_rate": 8.696967622698336e-05, "loss": 1.1084, "step": 10269 }, { "epoch": 0.6958466020733112, "grad_norm": 7.165349006652832, "learning_rate": 8.696830720788555e-05, "loss": 1.0226, "step": 10270 }, { "epoch": 0.6959143573412833, "grad_norm": 6.453275680541992, "learning_rate": 8.696693818878773e-05, "loss": 0.6691, "step": 10271 }, { "epoch": 0.6959821126092554, "grad_norm": 7.4177069664001465, "learning_rate": 8.696556916968991e-05, "loss": 0.6886, "step": 10272 }, { "epoch": 0.6960498678772274, "grad_norm": 5.137477874755859, "learning_rate": 8.696420015059211e-05, "loss": 0.9635, "step": 10273 }, { "epoch": 0.6961176231451995, "grad_norm": 7.988467693328857, "learning_rate": 8.696283113149429e-05, "loss": 0.7967, "step": 10274 }, { "epoch": 0.6961853784131716, "grad_norm": 7.050410747528076, "learning_rate": 8.696146211239647e-05, "loss": 1.0407, "step": 10275 }, { "epoch": 0.6962531336811437, "grad_norm": 6.711641311645508, "learning_rate": 8.696009309329866e-05, "loss": 0.8607, "step": 10276 }, { "epoch": 0.6963208889491158, "grad_norm": 4.641478061676025, "learning_rate": 8.695872407420084e-05, "loss": 0.6665, "step": 10277 }, { "epoch": 0.6963886442170879, "grad_norm": 5.737218379974365, "learning_rate": 8.695735505510302e-05, "loss": 0.6503, "step": 10278 }, { "epoch": 0.69645639948506, "grad_norm": 5.518801212310791, "learning_rate": 8.695598603600522e-05, "loss": 0.9215, "step": 10279 }, { "epoch": 0.6965241547530321, "grad_norm": 6.198950290679932, "learning_rate": 8.69546170169074e-05, "loss": 0.6259, "step": 10280 }, { "epoch": 0.6965919100210042, "grad_norm": 7.505566596984863, "learning_rate": 8.695324799780958e-05, "loss": 0.7902, "step": 10281 }, { "epoch": 0.6966596652889763, "grad_norm": 6.407822132110596, "learning_rate": 8.695187897871176e-05, "loss": 0.7535, "step": 10282 }, { "epoch": 0.6967274205569483, "grad_norm": 7.691595554351807, "learning_rate": 8.695050995961395e-05, "loss": 0.7941, "step": 10283 }, { "epoch": 0.6967951758249203, "grad_norm": 5.803621292114258, "learning_rate": 8.694914094051613e-05, "loss": 0.6529, "step": 10284 }, { "epoch": 0.6968629310928924, "grad_norm": 6.0364580154418945, "learning_rate": 8.694777192141831e-05, "loss": 0.6357, "step": 10285 }, { "epoch": 0.6969306863608645, "grad_norm": 6.369047164916992, "learning_rate": 8.694640290232049e-05, "loss": 0.7526, "step": 10286 }, { "epoch": 0.6969984416288366, "grad_norm": 5.736650466918945, "learning_rate": 8.694503388322267e-05, "loss": 0.945, "step": 10287 }, { "epoch": 0.6970661968968087, "grad_norm": 5.924343109130859, "learning_rate": 8.694366486412487e-05, "loss": 0.7378, "step": 10288 }, { "epoch": 0.6971339521647808, "grad_norm": 8.118910789489746, "learning_rate": 8.694229584502705e-05, "loss": 1.2927, "step": 10289 }, { "epoch": 0.6972017074327529, "grad_norm": 6.7456464767456055, "learning_rate": 8.694092682592923e-05, "loss": 0.8051, "step": 10290 }, { "epoch": 0.697269462700725, "grad_norm": 8.029818534851074, "learning_rate": 8.693955780683141e-05, "loss": 0.7865, "step": 10291 }, { "epoch": 0.6973372179686971, "grad_norm": 8.77468204498291, "learning_rate": 8.693818878773359e-05, "loss": 0.8901, "step": 10292 }, { "epoch": 0.6974049732366692, "grad_norm": 8.635099411010742, "learning_rate": 8.693681976863578e-05, "loss": 0.7982, "step": 10293 }, { "epoch": 0.6974727285046413, "grad_norm": 6.938762187957764, "learning_rate": 8.693545074953796e-05, "loss": 0.7571, "step": 10294 }, { "epoch": 0.6975404837726134, "grad_norm": 6.177728652954102, "learning_rate": 8.693408173044014e-05, "loss": 0.8976, "step": 10295 }, { "epoch": 0.6976082390405854, "grad_norm": 7.001784324645996, "learning_rate": 8.693271271134232e-05, "loss": 0.8799, "step": 10296 }, { "epoch": 0.6976759943085575, "grad_norm": 5.89376163482666, "learning_rate": 8.693134369224452e-05, "loss": 0.6843, "step": 10297 }, { "epoch": 0.6977437495765296, "grad_norm": 6.412653923034668, "learning_rate": 8.69299746731467e-05, "loss": 0.8198, "step": 10298 }, { "epoch": 0.6978115048445016, "grad_norm": 6.647368907928467, "learning_rate": 8.692860565404888e-05, "loss": 1.0389, "step": 10299 }, { "epoch": 0.6978792601124737, "grad_norm": 7.058552265167236, "learning_rate": 8.692723663495106e-05, "loss": 0.9025, "step": 10300 }, { "epoch": 0.6979470153804458, "grad_norm": 6.235774993896484, "learning_rate": 8.692586761585324e-05, "loss": 0.7788, "step": 10301 }, { "epoch": 0.6980147706484179, "grad_norm": 6.167398929595947, "learning_rate": 8.692449859675543e-05, "loss": 0.6807, "step": 10302 }, { "epoch": 0.69808252591639, "grad_norm": 5.845956802368164, "learning_rate": 8.692312957765761e-05, "loss": 0.8136, "step": 10303 }, { "epoch": 0.6981502811843621, "grad_norm": 6.548614025115967, "learning_rate": 8.692176055855979e-05, "loss": 0.7352, "step": 10304 }, { "epoch": 0.6982180364523342, "grad_norm": 6.432018756866455, "learning_rate": 8.692039153946197e-05, "loss": 0.9196, "step": 10305 }, { "epoch": 0.6982857917203062, "grad_norm": 7.851593971252441, "learning_rate": 8.691902252036417e-05, "loss": 1.1077, "step": 10306 }, { "epoch": 0.6983535469882783, "grad_norm": 6.037036895751953, "learning_rate": 8.691765350126635e-05, "loss": 0.6674, "step": 10307 }, { "epoch": 0.6984213022562504, "grad_norm": 6.833919048309326, "learning_rate": 8.691628448216853e-05, "loss": 1.1161, "step": 10308 }, { "epoch": 0.6984890575242225, "grad_norm": 7.268690586090088, "learning_rate": 8.691491546307071e-05, "loss": 0.8906, "step": 10309 }, { "epoch": 0.6985568127921946, "grad_norm": 5.807277679443359, "learning_rate": 8.691354644397289e-05, "loss": 0.8622, "step": 10310 }, { "epoch": 0.6986245680601667, "grad_norm": 6.888156414031982, "learning_rate": 8.691217742487508e-05, "loss": 0.9483, "step": 10311 }, { "epoch": 0.6986923233281388, "grad_norm": 5.432703971862793, "learning_rate": 8.691080840577726e-05, "loss": 0.6413, "step": 10312 }, { "epoch": 0.6987600785961109, "grad_norm": 6.416975021362305, "learning_rate": 8.690943938667944e-05, "loss": 0.862, "step": 10313 }, { "epoch": 0.698827833864083, "grad_norm": 5.268738746643066, "learning_rate": 8.690807036758162e-05, "loss": 0.6074, "step": 10314 }, { "epoch": 0.698895589132055, "grad_norm": 7.375731945037842, "learning_rate": 8.690670134848382e-05, "loss": 0.8446, "step": 10315 }, { "epoch": 0.698963344400027, "grad_norm": 5.654892921447754, "learning_rate": 8.6905332329386e-05, "loss": 0.7604, "step": 10316 }, { "epoch": 0.6990310996679991, "grad_norm": 6.280389308929443, "learning_rate": 8.690396331028818e-05, "loss": 0.7515, "step": 10317 }, { "epoch": 0.6990988549359712, "grad_norm": 4.915734767913818, "learning_rate": 8.690259429119036e-05, "loss": 0.8976, "step": 10318 }, { "epoch": 0.6991666102039433, "grad_norm": 6.705817222595215, "learning_rate": 8.690122527209255e-05, "loss": 0.7172, "step": 10319 }, { "epoch": 0.6992343654719154, "grad_norm": 5.9727253913879395, "learning_rate": 8.689985625299473e-05, "loss": 0.9687, "step": 10320 }, { "epoch": 0.6993021207398875, "grad_norm": 8.154400825500488, "learning_rate": 8.689848723389691e-05, "loss": 0.8834, "step": 10321 }, { "epoch": 0.6993698760078596, "grad_norm": 5.356873512268066, "learning_rate": 8.68971182147991e-05, "loss": 0.608, "step": 10322 }, { "epoch": 0.6994376312758317, "grad_norm": 7.385077476501465, "learning_rate": 8.689574919570129e-05, "loss": 0.7914, "step": 10323 }, { "epoch": 0.6995053865438038, "grad_norm": 5.762533664703369, "learning_rate": 8.689438017660347e-05, "loss": 0.7171, "step": 10324 }, { "epoch": 0.6995731418117759, "grad_norm": 6.16245698928833, "learning_rate": 8.689301115750566e-05, "loss": 0.7742, "step": 10325 }, { "epoch": 0.699640897079748, "grad_norm": 5.776895523071289, "learning_rate": 8.689164213840784e-05, "loss": 0.8008, "step": 10326 }, { "epoch": 0.6997086523477201, "grad_norm": 7.285096645355225, "learning_rate": 8.689027311931002e-05, "loss": 1.0003, "step": 10327 }, { "epoch": 0.6997764076156922, "grad_norm": 6.187610149383545, "learning_rate": 8.68889041002122e-05, "loss": 0.9697, "step": 10328 }, { "epoch": 0.6998441628836642, "grad_norm": 7.224822521209717, "learning_rate": 8.68875350811144e-05, "loss": 0.8965, "step": 10329 }, { "epoch": 0.6999119181516363, "grad_norm": 7.907904624938965, "learning_rate": 8.688616606201658e-05, "loss": 0.9466, "step": 10330 }, { "epoch": 0.6999796734196084, "grad_norm": 5.577702522277832, "learning_rate": 8.688479704291876e-05, "loss": 0.7779, "step": 10331 }, { "epoch": 0.7000474286875804, "grad_norm": 6.485890865325928, "learning_rate": 8.688342802382094e-05, "loss": 0.9368, "step": 10332 }, { "epoch": 0.7001151839555525, "grad_norm": 6.532778739929199, "learning_rate": 8.688205900472312e-05, "loss": 0.9044, "step": 10333 }, { "epoch": 0.7001829392235246, "grad_norm": 9.568724632263184, "learning_rate": 8.688068998562531e-05, "loss": 1.1657, "step": 10334 }, { "epoch": 0.7002506944914967, "grad_norm": 7.1607255935668945, "learning_rate": 8.687932096652749e-05, "loss": 0.8107, "step": 10335 }, { "epoch": 0.7003184497594688, "grad_norm": 7.112110614776611, "learning_rate": 8.687795194742967e-05, "loss": 0.8092, "step": 10336 }, { "epoch": 0.7003862050274409, "grad_norm": 6.201446056365967, "learning_rate": 8.687658292833185e-05, "loss": 0.8682, "step": 10337 }, { "epoch": 0.700453960295413, "grad_norm": 5.587967395782471, "learning_rate": 8.687521390923405e-05, "loss": 0.7769, "step": 10338 }, { "epoch": 0.700521715563385, "grad_norm": 4.441295623779297, "learning_rate": 8.687384489013623e-05, "loss": 0.5984, "step": 10339 }, { "epoch": 0.7005894708313571, "grad_norm": 7.061400413513184, "learning_rate": 8.687247587103841e-05, "loss": 0.7871, "step": 10340 }, { "epoch": 0.7006572260993292, "grad_norm": 6.004641532897949, "learning_rate": 8.687110685194059e-05, "loss": 0.8535, "step": 10341 }, { "epoch": 0.7007249813673013, "grad_norm": 6.329019546508789, "learning_rate": 8.686973783284277e-05, "loss": 0.7604, "step": 10342 }, { "epoch": 0.7007927366352734, "grad_norm": 5.995157718658447, "learning_rate": 8.686836881374496e-05, "loss": 0.8674, "step": 10343 }, { "epoch": 0.7008604919032455, "grad_norm": 6.5860915184021, "learning_rate": 8.686699979464714e-05, "loss": 0.6908, "step": 10344 }, { "epoch": 0.7009282471712176, "grad_norm": 7.01938009262085, "learning_rate": 8.686563077554932e-05, "loss": 0.7821, "step": 10345 }, { "epoch": 0.7009960024391897, "grad_norm": 4.958036422729492, "learning_rate": 8.68642617564515e-05, "loss": 0.7112, "step": 10346 }, { "epoch": 0.7010637577071618, "grad_norm": 5.658689022064209, "learning_rate": 8.686289273735368e-05, "loss": 0.768, "step": 10347 }, { "epoch": 0.7011315129751338, "grad_norm": 7.060564994812012, "learning_rate": 8.686152371825588e-05, "loss": 1.1327, "step": 10348 }, { "epoch": 0.7011992682431059, "grad_norm": 6.2527265548706055, "learning_rate": 8.686015469915806e-05, "loss": 0.8952, "step": 10349 }, { "epoch": 0.701267023511078, "grad_norm": 7.9083452224731445, "learning_rate": 8.685878568006024e-05, "loss": 0.9505, "step": 10350 }, { "epoch": 0.70133477877905, "grad_norm": 7.475040435791016, "learning_rate": 8.685741666096242e-05, "loss": 0.8404, "step": 10351 }, { "epoch": 0.7014025340470221, "grad_norm": 7.27475643157959, "learning_rate": 8.685604764186461e-05, "loss": 0.9485, "step": 10352 }, { "epoch": 0.7014702893149942, "grad_norm": 5.844339847564697, "learning_rate": 8.685467862276679e-05, "loss": 0.9179, "step": 10353 }, { "epoch": 0.7015380445829663, "grad_norm": 6.823174953460693, "learning_rate": 8.685330960366897e-05, "loss": 0.7872, "step": 10354 }, { "epoch": 0.7016057998509384, "grad_norm": 7.774914264678955, "learning_rate": 8.685194058457115e-05, "loss": 0.9215, "step": 10355 }, { "epoch": 0.7016735551189105, "grad_norm": 6.16814661026001, "learning_rate": 8.685057156547333e-05, "loss": 0.9785, "step": 10356 }, { "epoch": 0.7017413103868826, "grad_norm": 5.761654853820801, "learning_rate": 8.684920254637553e-05, "loss": 0.8459, "step": 10357 }, { "epoch": 0.7018090656548547, "grad_norm": 5.926375865936279, "learning_rate": 8.684783352727771e-05, "loss": 0.7908, "step": 10358 }, { "epoch": 0.7018768209228268, "grad_norm": 7.2848639488220215, "learning_rate": 8.684646450817989e-05, "loss": 0.8424, "step": 10359 }, { "epoch": 0.7019445761907989, "grad_norm": 6.377554416656494, "learning_rate": 8.684509548908207e-05, "loss": 0.8161, "step": 10360 }, { "epoch": 0.702012331458771, "grad_norm": 6.2031426429748535, "learning_rate": 8.684372646998426e-05, "loss": 0.9576, "step": 10361 }, { "epoch": 0.702080086726743, "grad_norm": 7.374354362487793, "learning_rate": 8.684235745088644e-05, "loss": 0.8022, "step": 10362 }, { "epoch": 0.7021478419947151, "grad_norm": 5.276646614074707, "learning_rate": 8.684098843178862e-05, "loss": 0.5603, "step": 10363 }, { "epoch": 0.7022155972626871, "grad_norm": 5.207109451293945, "learning_rate": 8.68396194126908e-05, "loss": 0.5605, "step": 10364 }, { "epoch": 0.7022833525306592, "grad_norm": 6.302850723266602, "learning_rate": 8.6838250393593e-05, "loss": 0.861, "step": 10365 }, { "epoch": 0.7023511077986313, "grad_norm": 5.8094072341918945, "learning_rate": 8.683688137449518e-05, "loss": 0.8374, "step": 10366 }, { "epoch": 0.7024188630666034, "grad_norm": 6.657436370849609, "learning_rate": 8.683551235539736e-05, "loss": 0.9263, "step": 10367 }, { "epoch": 0.7024866183345755, "grad_norm": 5.042036533355713, "learning_rate": 8.683414333629955e-05, "loss": 0.6188, "step": 10368 }, { "epoch": 0.7025543736025476, "grad_norm": 5.913759231567383, "learning_rate": 8.683277431720173e-05, "loss": 0.6699, "step": 10369 }, { "epoch": 0.7026221288705197, "grad_norm": 6.477380752563477, "learning_rate": 8.683140529810391e-05, "loss": 0.8353, "step": 10370 }, { "epoch": 0.7026898841384918, "grad_norm": 5.284722805023193, "learning_rate": 8.68300362790061e-05, "loss": 0.8633, "step": 10371 }, { "epoch": 0.7027576394064639, "grad_norm": 5.480528354644775, "learning_rate": 8.682866725990829e-05, "loss": 0.7068, "step": 10372 }, { "epoch": 0.7028253946744359, "grad_norm": 5.857044696807861, "learning_rate": 8.682729824081047e-05, "loss": 0.8696, "step": 10373 }, { "epoch": 0.702893149942408, "grad_norm": 6.4731764793396, "learning_rate": 8.682592922171265e-05, "loss": 0.7464, "step": 10374 }, { "epoch": 0.7029609052103801, "grad_norm": 7.0602827072143555, "learning_rate": 8.682456020261484e-05, "loss": 0.6128, "step": 10375 }, { "epoch": 0.7030286604783522, "grad_norm": 5.9556474685668945, "learning_rate": 8.682319118351702e-05, "loss": 0.7004, "step": 10376 }, { "epoch": 0.7030964157463243, "grad_norm": 7.673183917999268, "learning_rate": 8.68218221644192e-05, "loss": 0.7612, "step": 10377 }, { "epoch": 0.7031641710142964, "grad_norm": 7.043504238128662, "learning_rate": 8.682045314532138e-05, "loss": 1.1159, "step": 10378 }, { "epoch": 0.7032319262822685, "grad_norm": 6.224184513092041, "learning_rate": 8.681908412622356e-05, "loss": 0.7247, "step": 10379 }, { "epoch": 0.7032996815502406, "grad_norm": 7.104019641876221, "learning_rate": 8.681771510712576e-05, "loss": 0.8121, "step": 10380 }, { "epoch": 0.7033674368182126, "grad_norm": 6.4362263679504395, "learning_rate": 8.681634608802794e-05, "loss": 0.8133, "step": 10381 }, { "epoch": 0.7034351920861847, "grad_norm": 5.4112067222595215, "learning_rate": 8.681497706893012e-05, "loss": 0.8037, "step": 10382 }, { "epoch": 0.7035029473541567, "grad_norm": 8.056005477905273, "learning_rate": 8.68136080498323e-05, "loss": 1.0531, "step": 10383 }, { "epoch": 0.7035707026221288, "grad_norm": 6.620260715484619, "learning_rate": 8.681223903073449e-05, "loss": 0.8135, "step": 10384 }, { "epoch": 0.7036384578901009, "grad_norm": 5.953632354736328, "learning_rate": 8.681087001163667e-05, "loss": 0.9362, "step": 10385 }, { "epoch": 0.703706213158073, "grad_norm": 4.4729719161987305, "learning_rate": 8.680950099253885e-05, "loss": 0.8507, "step": 10386 }, { "epoch": 0.7037739684260451, "grad_norm": 6.998383522033691, "learning_rate": 8.680813197344103e-05, "loss": 0.8018, "step": 10387 }, { "epoch": 0.7038417236940172, "grad_norm": 5.445269584655762, "learning_rate": 8.680676295434321e-05, "loss": 0.7241, "step": 10388 }, { "epoch": 0.7039094789619893, "grad_norm": 7.320235729217529, "learning_rate": 8.68053939352454e-05, "loss": 1.0455, "step": 10389 }, { "epoch": 0.7039772342299614, "grad_norm": 7.40581750869751, "learning_rate": 8.680402491614759e-05, "loss": 0.8128, "step": 10390 }, { "epoch": 0.7040449894979335, "grad_norm": 6.813145637512207, "learning_rate": 8.680265589704977e-05, "loss": 0.9455, "step": 10391 }, { "epoch": 0.7041127447659056, "grad_norm": 5.903909683227539, "learning_rate": 8.680128687795195e-05, "loss": 0.6496, "step": 10392 }, { "epoch": 0.7041805000338777, "grad_norm": 4.9222846031188965, "learning_rate": 8.679991785885414e-05, "loss": 0.6605, "step": 10393 }, { "epoch": 0.7042482553018498, "grad_norm": 6.948107719421387, "learning_rate": 8.679854883975632e-05, "loss": 0.9015, "step": 10394 }, { "epoch": 0.7043160105698218, "grad_norm": 6.005917072296143, "learning_rate": 8.67971798206585e-05, "loss": 0.9129, "step": 10395 }, { "epoch": 0.7043837658377939, "grad_norm": 5.235043048858643, "learning_rate": 8.679581080156068e-05, "loss": 0.9046, "step": 10396 }, { "epoch": 0.7044515211057659, "grad_norm": 6.271544456481934, "learning_rate": 8.679444178246286e-05, "loss": 1.0159, "step": 10397 }, { "epoch": 0.704519276373738, "grad_norm": 5.432122707366943, "learning_rate": 8.679307276336506e-05, "loss": 0.8806, "step": 10398 }, { "epoch": 0.7045870316417101, "grad_norm": 5.534310817718506, "learning_rate": 8.679170374426724e-05, "loss": 0.8565, "step": 10399 }, { "epoch": 0.7046547869096822, "grad_norm": 6.202160835266113, "learning_rate": 8.679033472516942e-05, "loss": 1.1205, "step": 10400 }, { "epoch": 0.7047225421776543, "grad_norm": 7.187075614929199, "learning_rate": 8.67889657060716e-05, "loss": 0.8384, "step": 10401 }, { "epoch": 0.7047902974456264, "grad_norm": 4.62196159362793, "learning_rate": 8.678759668697378e-05, "loss": 0.6643, "step": 10402 }, { "epoch": 0.7048580527135985, "grad_norm": 10.150405883789062, "learning_rate": 8.678622766787597e-05, "loss": 0.6745, "step": 10403 }, { "epoch": 0.7049258079815706, "grad_norm": 6.843104362487793, "learning_rate": 8.678485864877815e-05, "loss": 0.7891, "step": 10404 }, { "epoch": 0.7049935632495427, "grad_norm": 6.199191570281982, "learning_rate": 8.678348962968033e-05, "loss": 0.7102, "step": 10405 }, { "epoch": 0.7050613185175147, "grad_norm": 5.368592739105225, "learning_rate": 8.678212061058251e-05, "loss": 0.8577, "step": 10406 }, { "epoch": 0.7051290737854868, "grad_norm": 4.696331977844238, "learning_rate": 8.67807515914847e-05, "loss": 0.6958, "step": 10407 }, { "epoch": 0.7051968290534589, "grad_norm": 6.961827754974365, "learning_rate": 8.677938257238689e-05, "loss": 1.0023, "step": 10408 }, { "epoch": 0.705264584321431, "grad_norm": 6.114429473876953, "learning_rate": 8.677801355328907e-05, "loss": 0.6351, "step": 10409 }, { "epoch": 0.7053323395894031, "grad_norm": 7.005643844604492, "learning_rate": 8.677664453419125e-05, "loss": 1.039, "step": 10410 }, { "epoch": 0.7054000948573752, "grad_norm": 5.262114524841309, "learning_rate": 8.677527551509344e-05, "loss": 0.7621, "step": 10411 }, { "epoch": 0.7054678501253473, "grad_norm": 6.364197731018066, "learning_rate": 8.677390649599562e-05, "loss": 0.8989, "step": 10412 }, { "epoch": 0.7055356053933193, "grad_norm": 5.497344970703125, "learning_rate": 8.67725374768978e-05, "loss": 0.6066, "step": 10413 }, { "epoch": 0.7056033606612914, "grad_norm": 6.382382869720459, "learning_rate": 8.67711684578e-05, "loss": 0.7266, "step": 10414 }, { "epoch": 0.7056711159292635, "grad_norm": 7.423126220703125, "learning_rate": 8.676979943870218e-05, "loss": 0.7454, "step": 10415 }, { "epoch": 0.7057388711972356, "grad_norm": 7.46668004989624, "learning_rate": 8.676843041960436e-05, "loss": 0.845, "step": 10416 }, { "epoch": 0.7058066264652076, "grad_norm": 5.152261734008789, "learning_rate": 8.676706140050655e-05, "loss": 0.8531, "step": 10417 }, { "epoch": 0.7058743817331797, "grad_norm": 8.402978897094727, "learning_rate": 8.676569238140873e-05, "loss": 1.0256, "step": 10418 }, { "epoch": 0.7059421370011518, "grad_norm": 5.3230299949646, "learning_rate": 8.676432336231091e-05, "loss": 0.7647, "step": 10419 }, { "epoch": 0.7060098922691239, "grad_norm": 7.257562160491943, "learning_rate": 8.676295434321309e-05, "loss": 0.8413, "step": 10420 }, { "epoch": 0.706077647537096, "grad_norm": 5.904243469238281, "learning_rate": 8.676158532411529e-05, "loss": 0.8503, "step": 10421 }, { "epoch": 0.7061454028050681, "grad_norm": 6.7053141593933105, "learning_rate": 8.676021630501747e-05, "loss": 0.8836, "step": 10422 }, { "epoch": 0.7062131580730402, "grad_norm": 7.1715407371521, "learning_rate": 8.675884728591965e-05, "loss": 0.8266, "step": 10423 }, { "epoch": 0.7062809133410123, "grad_norm": 6.313091278076172, "learning_rate": 8.675747826682183e-05, "loss": 0.6628, "step": 10424 }, { "epoch": 0.7063486686089844, "grad_norm": 5.576920986175537, "learning_rate": 8.675610924772401e-05, "loss": 0.7227, "step": 10425 }, { "epoch": 0.7064164238769565, "grad_norm": 6.882504463195801, "learning_rate": 8.67547402286262e-05, "loss": 0.8294, "step": 10426 }, { "epoch": 0.7064841791449286, "grad_norm": 8.857022285461426, "learning_rate": 8.675337120952838e-05, "loss": 0.5265, "step": 10427 }, { "epoch": 0.7065519344129007, "grad_norm": 6.785702228546143, "learning_rate": 8.675200219043056e-05, "loss": 0.8107, "step": 10428 }, { "epoch": 0.7066196896808727, "grad_norm": 7.2406415939331055, "learning_rate": 8.675063317133274e-05, "loss": 0.8089, "step": 10429 }, { "epoch": 0.7066874449488447, "grad_norm": 5.409148216247559, "learning_rate": 8.674926415223494e-05, "loss": 0.7191, "step": 10430 }, { "epoch": 0.7067552002168168, "grad_norm": 6.049896717071533, "learning_rate": 8.674789513313712e-05, "loss": 0.8811, "step": 10431 }, { "epoch": 0.7068229554847889, "grad_norm": 8.478447914123535, "learning_rate": 8.67465261140393e-05, "loss": 0.8117, "step": 10432 }, { "epoch": 0.706890710752761, "grad_norm": 6.473963260650635, "learning_rate": 8.674515709494148e-05, "loss": 1.0078, "step": 10433 }, { "epoch": 0.7069584660207331, "grad_norm": 5.57169771194458, "learning_rate": 8.674378807584366e-05, "loss": 0.6526, "step": 10434 }, { "epoch": 0.7070262212887052, "grad_norm": 5.3196492195129395, "learning_rate": 8.674241905674585e-05, "loss": 0.6312, "step": 10435 }, { "epoch": 0.7070939765566773, "grad_norm": 5.9507293701171875, "learning_rate": 8.674105003764803e-05, "loss": 0.7502, "step": 10436 }, { "epoch": 0.7071617318246494, "grad_norm": 5.272159099578857, "learning_rate": 8.673968101855021e-05, "loss": 0.7445, "step": 10437 }, { "epoch": 0.7072294870926215, "grad_norm": 8.225152969360352, "learning_rate": 8.673831199945239e-05, "loss": 0.9625, "step": 10438 }, { "epoch": 0.7072972423605935, "grad_norm": 5.791821479797363, "learning_rate": 8.673694298035459e-05, "loss": 0.7997, "step": 10439 }, { "epoch": 0.7073649976285656, "grad_norm": 6.391631126403809, "learning_rate": 8.673557396125677e-05, "loss": 0.8449, "step": 10440 }, { "epoch": 0.7074327528965377, "grad_norm": 6.157900333404541, "learning_rate": 8.673420494215895e-05, "loss": 0.8001, "step": 10441 }, { "epoch": 0.7075005081645098, "grad_norm": 5.64890193939209, "learning_rate": 8.673283592306113e-05, "loss": 0.6663, "step": 10442 }, { "epoch": 0.7075682634324819, "grad_norm": 7.436509132385254, "learning_rate": 8.673146690396331e-05, "loss": 0.8742, "step": 10443 }, { "epoch": 0.707636018700454, "grad_norm": 4.78845739364624, "learning_rate": 8.67300978848655e-05, "loss": 0.5905, "step": 10444 }, { "epoch": 0.7077037739684261, "grad_norm": 7.130674362182617, "learning_rate": 8.672872886576768e-05, "loss": 1.2396, "step": 10445 }, { "epoch": 0.7077715292363981, "grad_norm": 7.3212761878967285, "learning_rate": 8.672735984666986e-05, "loss": 0.8896, "step": 10446 }, { "epoch": 0.7078392845043702, "grad_norm": 7.6907548904418945, "learning_rate": 8.672599082757204e-05, "loss": 1.1194, "step": 10447 }, { "epoch": 0.7079070397723423, "grad_norm": 6.078713417053223, "learning_rate": 8.672462180847424e-05, "loss": 0.8578, "step": 10448 }, { "epoch": 0.7079747950403144, "grad_norm": 6.047597408294678, "learning_rate": 8.672325278937642e-05, "loss": 0.6988, "step": 10449 }, { "epoch": 0.7080425503082864, "grad_norm": 6.882000923156738, "learning_rate": 8.67218837702786e-05, "loss": 1.0003, "step": 10450 }, { "epoch": 0.7081103055762585, "grad_norm": 7.0581560134887695, "learning_rate": 8.672051475118078e-05, "loss": 0.8585, "step": 10451 }, { "epoch": 0.7081780608442306, "grad_norm": 5.636070728302002, "learning_rate": 8.671914573208296e-05, "loss": 0.9355, "step": 10452 }, { "epoch": 0.7082458161122027, "grad_norm": 7.167375564575195, "learning_rate": 8.671777671298515e-05, "loss": 0.8098, "step": 10453 }, { "epoch": 0.7083135713801748, "grad_norm": 6.989759922027588, "learning_rate": 8.671640769388733e-05, "loss": 1.1052, "step": 10454 }, { "epoch": 0.7083813266481469, "grad_norm": 5.774247646331787, "learning_rate": 8.671503867478951e-05, "loss": 0.8362, "step": 10455 }, { "epoch": 0.708449081916119, "grad_norm": 5.6326518058776855, "learning_rate": 8.671366965569169e-05, "loss": 0.644, "step": 10456 }, { "epoch": 0.7085168371840911, "grad_norm": 6.38750696182251, "learning_rate": 8.671230063659389e-05, "loss": 0.7756, "step": 10457 }, { "epoch": 0.7085845924520632, "grad_norm": 6.129147529602051, "learning_rate": 8.671093161749607e-05, "loss": 0.8983, "step": 10458 }, { "epoch": 0.7086523477200353, "grad_norm": 7.424493789672852, "learning_rate": 8.670956259839825e-05, "loss": 0.9429, "step": 10459 }, { "epoch": 0.7087201029880074, "grad_norm": 6.838191509246826, "learning_rate": 8.670819357930044e-05, "loss": 0.7546, "step": 10460 }, { "epoch": 0.7087878582559795, "grad_norm": 5.380428791046143, "learning_rate": 8.670682456020262e-05, "loss": 0.6884, "step": 10461 }, { "epoch": 0.7088556135239514, "grad_norm": 5.4953203201293945, "learning_rate": 8.67054555411048e-05, "loss": 1.0417, "step": 10462 }, { "epoch": 0.7089233687919235, "grad_norm": 6.5481133460998535, "learning_rate": 8.6704086522007e-05, "loss": 0.7382, "step": 10463 }, { "epoch": 0.7089911240598956, "grad_norm": 7.717205047607422, "learning_rate": 8.670271750290918e-05, "loss": 0.9389, "step": 10464 }, { "epoch": 0.7090588793278677, "grad_norm": 6.287739276885986, "learning_rate": 8.670134848381136e-05, "loss": 0.9393, "step": 10465 }, { "epoch": 0.7091266345958398, "grad_norm": 5.565641403198242, "learning_rate": 8.669997946471354e-05, "loss": 0.758, "step": 10466 }, { "epoch": 0.7091943898638119, "grad_norm": 5.262805938720703, "learning_rate": 8.669861044561573e-05, "loss": 0.792, "step": 10467 }, { "epoch": 0.709262145131784, "grad_norm": 5.002110004425049, "learning_rate": 8.669724142651791e-05, "loss": 0.7365, "step": 10468 }, { "epoch": 0.7093299003997561, "grad_norm": 6.84413480758667, "learning_rate": 8.669587240742009e-05, "loss": 1.0521, "step": 10469 }, { "epoch": 0.7093976556677282, "grad_norm": 6.899505138397217, "learning_rate": 8.669450338832227e-05, "loss": 0.795, "step": 10470 }, { "epoch": 0.7094654109357003, "grad_norm": 5.376099109649658, "learning_rate": 8.669313436922447e-05, "loss": 0.51, "step": 10471 }, { "epoch": 0.7095331662036723, "grad_norm": 6.934320449829102, "learning_rate": 8.669176535012665e-05, "loss": 0.9405, "step": 10472 }, { "epoch": 0.7096009214716444, "grad_norm": 5.896731376647949, "learning_rate": 8.669039633102883e-05, "loss": 0.6869, "step": 10473 }, { "epoch": 0.7096686767396165, "grad_norm": 5.4463887214660645, "learning_rate": 8.6689027311931e-05, "loss": 0.8513, "step": 10474 }, { "epoch": 0.7097364320075886, "grad_norm": 6.024421215057373, "learning_rate": 8.668765829283319e-05, "loss": 0.7993, "step": 10475 }, { "epoch": 0.7098041872755607, "grad_norm": 7.861370086669922, "learning_rate": 8.668628927373538e-05, "loss": 0.7246, "step": 10476 }, { "epoch": 0.7098719425435328, "grad_norm": 5.0704779624938965, "learning_rate": 8.668492025463756e-05, "loss": 0.6759, "step": 10477 }, { "epoch": 0.7099396978115049, "grad_norm": 6.787322998046875, "learning_rate": 8.668355123553974e-05, "loss": 0.8075, "step": 10478 }, { "epoch": 0.7100074530794769, "grad_norm": 5.564799785614014, "learning_rate": 8.668218221644192e-05, "loss": 0.7596, "step": 10479 }, { "epoch": 0.710075208347449, "grad_norm": 6.072136402130127, "learning_rate": 8.66808131973441e-05, "loss": 0.6902, "step": 10480 }, { "epoch": 0.7101429636154211, "grad_norm": 6.825998783111572, "learning_rate": 8.66794441782463e-05, "loss": 0.7987, "step": 10481 }, { "epoch": 0.7102107188833932, "grad_norm": 6.803398609161377, "learning_rate": 8.667807515914848e-05, "loss": 0.8946, "step": 10482 }, { "epoch": 0.7102784741513652, "grad_norm": 5.5623250007629395, "learning_rate": 8.667670614005066e-05, "loss": 0.9588, "step": 10483 }, { "epoch": 0.7103462294193373, "grad_norm": 6.420827865600586, "learning_rate": 8.667533712095284e-05, "loss": 0.9871, "step": 10484 }, { "epoch": 0.7104139846873094, "grad_norm": 5.774916172027588, "learning_rate": 8.667396810185503e-05, "loss": 0.7826, "step": 10485 }, { "epoch": 0.7104817399552815, "grad_norm": 6.701958656311035, "learning_rate": 8.667259908275721e-05, "loss": 0.9528, "step": 10486 }, { "epoch": 0.7105494952232536, "grad_norm": 6.663124084472656, "learning_rate": 8.667123006365939e-05, "loss": 0.7279, "step": 10487 }, { "epoch": 0.7106172504912257, "grad_norm": 6.165869235992432, "learning_rate": 8.666986104456157e-05, "loss": 0.7835, "step": 10488 }, { "epoch": 0.7106850057591978, "grad_norm": 5.795663356781006, "learning_rate": 8.666849202546375e-05, "loss": 0.6844, "step": 10489 }, { "epoch": 0.7107527610271699, "grad_norm": 5.601436138153076, "learning_rate": 8.666712300636595e-05, "loss": 0.7844, "step": 10490 }, { "epoch": 0.710820516295142, "grad_norm": 6.733765125274658, "learning_rate": 8.666575398726813e-05, "loss": 0.7687, "step": 10491 }, { "epoch": 0.7108882715631141, "grad_norm": 6.032510757446289, "learning_rate": 8.66643849681703e-05, "loss": 0.7252, "step": 10492 }, { "epoch": 0.7109560268310862, "grad_norm": 4.691253662109375, "learning_rate": 8.666301594907249e-05, "loss": 0.7354, "step": 10493 }, { "epoch": 0.7110237820990583, "grad_norm": 6.740907669067383, "learning_rate": 8.666164692997468e-05, "loss": 0.854, "step": 10494 }, { "epoch": 0.7110915373670302, "grad_norm": 6.258440971374512, "learning_rate": 8.666027791087686e-05, "loss": 0.7787, "step": 10495 }, { "epoch": 0.7111592926350023, "grad_norm": 5.37103271484375, "learning_rate": 8.665890889177904e-05, "loss": 0.8159, "step": 10496 }, { "epoch": 0.7112270479029744, "grad_norm": 5.726749420166016, "learning_rate": 8.665753987268122e-05, "loss": 1.0529, "step": 10497 }, { "epoch": 0.7112948031709465, "grad_norm": 6.467258930206299, "learning_rate": 8.66561708535834e-05, "loss": 0.7445, "step": 10498 }, { "epoch": 0.7113625584389186, "grad_norm": 5.931604385375977, "learning_rate": 8.66548018344856e-05, "loss": 0.6146, "step": 10499 }, { "epoch": 0.7114303137068907, "grad_norm": 7.676519393920898, "learning_rate": 8.665343281538778e-05, "loss": 0.9234, "step": 10500 }, { "epoch": 0.7114980689748628, "grad_norm": 6.444290637969971, "learning_rate": 8.665206379628996e-05, "loss": 0.8529, "step": 10501 }, { "epoch": 0.7115658242428349, "grad_norm": 6.420405864715576, "learning_rate": 8.665069477719214e-05, "loss": 0.8439, "step": 10502 }, { "epoch": 0.711633579510807, "grad_norm": 5.584212779998779, "learning_rate": 8.664932575809432e-05, "loss": 0.6583, "step": 10503 }, { "epoch": 0.7117013347787791, "grad_norm": 6.1522746086120605, "learning_rate": 8.664795673899651e-05, "loss": 0.7773, "step": 10504 }, { "epoch": 0.7117690900467512, "grad_norm": 6.573955535888672, "learning_rate": 8.664658771989869e-05, "loss": 0.7553, "step": 10505 }, { "epoch": 0.7118368453147232, "grad_norm": 7.660068988800049, "learning_rate": 8.664521870080087e-05, "loss": 0.7807, "step": 10506 }, { "epoch": 0.7119046005826953, "grad_norm": 6.398780822753906, "learning_rate": 8.664384968170307e-05, "loss": 0.7814, "step": 10507 }, { "epoch": 0.7119723558506674, "grad_norm": 6.873563766479492, "learning_rate": 8.664248066260525e-05, "loss": 0.8067, "step": 10508 }, { "epoch": 0.7120401111186395, "grad_norm": 6.932216644287109, "learning_rate": 8.664111164350743e-05, "loss": 0.9248, "step": 10509 }, { "epoch": 0.7121078663866116, "grad_norm": 6.539022445678711, "learning_rate": 8.663974262440962e-05, "loss": 0.8059, "step": 10510 }, { "epoch": 0.7121756216545836, "grad_norm": 6.882415771484375, "learning_rate": 8.66383736053118e-05, "loss": 0.853, "step": 10511 }, { "epoch": 0.7122433769225557, "grad_norm": 7.576079368591309, "learning_rate": 8.663700458621398e-05, "loss": 1.1152, "step": 10512 }, { "epoch": 0.7123111321905278, "grad_norm": 5.934848785400391, "learning_rate": 8.663563556711617e-05, "loss": 0.7804, "step": 10513 }, { "epoch": 0.7123788874584999, "grad_norm": 5.297085762023926, "learning_rate": 8.663426654801836e-05, "loss": 0.6543, "step": 10514 }, { "epoch": 0.712446642726472, "grad_norm": 5.618426322937012, "learning_rate": 8.663289752892054e-05, "loss": 0.5286, "step": 10515 }, { "epoch": 0.712514397994444, "grad_norm": 5.978342533111572, "learning_rate": 8.663152850982272e-05, "loss": 0.6195, "step": 10516 }, { "epoch": 0.7125821532624161, "grad_norm": 7.146844863891602, "learning_rate": 8.663015949072491e-05, "loss": 0.9239, "step": 10517 }, { "epoch": 0.7126499085303882, "grad_norm": 7.71320915222168, "learning_rate": 8.662879047162709e-05, "loss": 0.8047, "step": 10518 }, { "epoch": 0.7127176637983603, "grad_norm": 5.022526741027832, "learning_rate": 8.662742145252927e-05, "loss": 0.5567, "step": 10519 }, { "epoch": 0.7127854190663324, "grad_norm": 6.884553909301758, "learning_rate": 8.662605243343145e-05, "loss": 0.9116, "step": 10520 }, { "epoch": 0.7128531743343045, "grad_norm": 9.86026668548584, "learning_rate": 8.662468341433363e-05, "loss": 1.1616, "step": 10521 }, { "epoch": 0.7129209296022766, "grad_norm": 6.307768821716309, "learning_rate": 8.662331439523583e-05, "loss": 0.8304, "step": 10522 }, { "epoch": 0.7129886848702487, "grad_norm": 8.609663009643555, "learning_rate": 8.6621945376138e-05, "loss": 0.6635, "step": 10523 }, { "epoch": 0.7130564401382208, "grad_norm": 5.718436241149902, "learning_rate": 8.662057635704019e-05, "loss": 0.889, "step": 10524 }, { "epoch": 0.7131241954061929, "grad_norm": 6.126955986022949, "learning_rate": 8.661920733794237e-05, "loss": 0.6973, "step": 10525 }, { "epoch": 0.713191950674165, "grad_norm": 6.689998149871826, "learning_rate": 8.661783831884456e-05, "loss": 0.6399, "step": 10526 }, { "epoch": 0.713259705942137, "grad_norm": 6.500828742980957, "learning_rate": 8.661646929974674e-05, "loss": 1.0992, "step": 10527 }, { "epoch": 0.713327461210109, "grad_norm": 7.04468297958374, "learning_rate": 8.661510028064892e-05, "loss": 1.0635, "step": 10528 }, { "epoch": 0.7133952164780811, "grad_norm": 6.968896865844727, "learning_rate": 8.66137312615511e-05, "loss": 0.8095, "step": 10529 }, { "epoch": 0.7134629717460532, "grad_norm": 7.557732105255127, "learning_rate": 8.661236224245328e-05, "loss": 0.6779, "step": 10530 }, { "epoch": 0.7135307270140253, "grad_norm": 4.746248245239258, "learning_rate": 8.661099322335548e-05, "loss": 0.7275, "step": 10531 }, { "epoch": 0.7135984822819974, "grad_norm": 7.140705108642578, "learning_rate": 8.660962420425766e-05, "loss": 0.9274, "step": 10532 }, { "epoch": 0.7136662375499695, "grad_norm": 6.661166191101074, "learning_rate": 8.660825518515984e-05, "loss": 0.8924, "step": 10533 }, { "epoch": 0.7137339928179416, "grad_norm": 6.4814653396606445, "learning_rate": 8.660688616606202e-05, "loss": 0.7357, "step": 10534 }, { "epoch": 0.7138017480859137, "grad_norm": 9.411799430847168, "learning_rate": 8.66055171469642e-05, "loss": 0.5209, "step": 10535 }, { "epoch": 0.7138695033538858, "grad_norm": 5.223617076873779, "learning_rate": 8.660414812786639e-05, "loss": 0.7623, "step": 10536 }, { "epoch": 0.7139372586218579, "grad_norm": 8.094182014465332, "learning_rate": 8.660277910876857e-05, "loss": 1.0194, "step": 10537 }, { "epoch": 0.71400501388983, "grad_norm": 5.444286823272705, "learning_rate": 8.660141008967075e-05, "loss": 0.8126, "step": 10538 }, { "epoch": 0.714072769157802, "grad_norm": 4.902561664581299, "learning_rate": 8.660004107057293e-05, "loss": 0.6524, "step": 10539 }, { "epoch": 0.7141405244257741, "grad_norm": 7.155951023101807, "learning_rate": 8.659867205147513e-05, "loss": 0.9928, "step": 10540 }, { "epoch": 0.7142082796937462, "grad_norm": 6.7633538246154785, "learning_rate": 8.65973030323773e-05, "loss": 1.0564, "step": 10541 }, { "epoch": 0.7142760349617183, "grad_norm": 6.050258636474609, "learning_rate": 8.659593401327949e-05, "loss": 0.8051, "step": 10542 }, { "epoch": 0.7143437902296904, "grad_norm": 4.88824987411499, "learning_rate": 8.659456499418167e-05, "loss": 0.6135, "step": 10543 }, { "epoch": 0.7144115454976624, "grad_norm": 5.773684501647949, "learning_rate": 8.659319597508385e-05, "loss": 0.5467, "step": 10544 }, { "epoch": 0.7144793007656345, "grad_norm": 7.082754611968994, "learning_rate": 8.659182695598604e-05, "loss": 0.7677, "step": 10545 }, { "epoch": 0.7145470560336066, "grad_norm": 5.4242424964904785, "learning_rate": 8.659045793688822e-05, "loss": 0.7915, "step": 10546 }, { "epoch": 0.7146148113015787, "grad_norm": 5.280063152313232, "learning_rate": 8.65890889177904e-05, "loss": 0.5614, "step": 10547 }, { "epoch": 0.7146825665695508, "grad_norm": 6.720800876617432, "learning_rate": 8.658771989869258e-05, "loss": 1.064, "step": 10548 }, { "epoch": 0.7147503218375229, "grad_norm": 7.908580303192139, "learning_rate": 8.658635087959478e-05, "loss": 0.7564, "step": 10549 }, { "epoch": 0.7148180771054949, "grad_norm": 6.164776802062988, "learning_rate": 8.658498186049696e-05, "loss": 0.8359, "step": 10550 }, { "epoch": 0.714885832373467, "grad_norm": 5.345958709716797, "learning_rate": 8.658361284139914e-05, "loss": 0.9018, "step": 10551 }, { "epoch": 0.7149535876414391, "grad_norm": 6.751514911651611, "learning_rate": 8.658224382230132e-05, "loss": 0.9795, "step": 10552 }, { "epoch": 0.7150213429094112, "grad_norm": 5.850390911102295, "learning_rate": 8.658087480320351e-05, "loss": 0.8055, "step": 10553 }, { "epoch": 0.7150890981773833, "grad_norm": 6.081165790557861, "learning_rate": 8.657950578410569e-05, "loss": 0.7952, "step": 10554 }, { "epoch": 0.7151568534453554, "grad_norm": 5.211760997772217, "learning_rate": 8.657813676500787e-05, "loss": 0.6467, "step": 10555 }, { "epoch": 0.7152246087133275, "grad_norm": 7.24871826171875, "learning_rate": 8.657676774591007e-05, "loss": 0.8434, "step": 10556 }, { "epoch": 0.7152923639812996, "grad_norm": 6.2204413414001465, "learning_rate": 8.657539872681225e-05, "loss": 0.7948, "step": 10557 }, { "epoch": 0.7153601192492717, "grad_norm": 6.038403511047363, "learning_rate": 8.657402970771443e-05, "loss": 0.69, "step": 10558 }, { "epoch": 0.7154278745172438, "grad_norm": 6.616792678833008, "learning_rate": 8.657266068861662e-05, "loss": 0.9007, "step": 10559 }, { "epoch": 0.7154956297852157, "grad_norm": 6.901274681091309, "learning_rate": 8.65712916695188e-05, "loss": 0.8176, "step": 10560 }, { "epoch": 0.7155633850531878, "grad_norm": 6.145236015319824, "learning_rate": 8.656992265042098e-05, "loss": 0.6697, "step": 10561 }, { "epoch": 0.7156311403211599, "grad_norm": 6.30226993560791, "learning_rate": 8.656855363132316e-05, "loss": 0.7585, "step": 10562 }, { "epoch": 0.715698895589132, "grad_norm": 5.349961757659912, "learning_rate": 8.656718461222535e-05, "loss": 0.6811, "step": 10563 }, { "epoch": 0.7157666508571041, "grad_norm": 6.9230170249938965, "learning_rate": 8.656581559312753e-05, "loss": 0.7924, "step": 10564 }, { "epoch": 0.7158344061250762, "grad_norm": 6.30393123626709, "learning_rate": 8.656444657402972e-05, "loss": 0.8558, "step": 10565 }, { "epoch": 0.7159021613930483, "grad_norm": 7.642063617706299, "learning_rate": 8.65630775549319e-05, "loss": 0.8416, "step": 10566 }, { "epoch": 0.7159699166610204, "grad_norm": 6.944372653961182, "learning_rate": 8.656170853583408e-05, "loss": 1.0213, "step": 10567 }, { "epoch": 0.7160376719289925, "grad_norm": 6.925499439239502, "learning_rate": 8.656033951673627e-05, "loss": 0.8486, "step": 10568 }, { "epoch": 0.7161054271969646, "grad_norm": 5.875875949859619, "learning_rate": 8.655897049763845e-05, "loss": 0.8515, "step": 10569 }, { "epoch": 0.7161731824649367, "grad_norm": 9.030150413513184, "learning_rate": 8.655760147854063e-05, "loss": 0.9014, "step": 10570 }, { "epoch": 0.7162409377329088, "grad_norm": 5.825559139251709, "learning_rate": 8.655623245944281e-05, "loss": 0.8333, "step": 10571 }, { "epoch": 0.7163086930008808, "grad_norm": 5.936555862426758, "learning_rate": 8.6554863440345e-05, "loss": 0.6806, "step": 10572 }, { "epoch": 0.7163764482688529, "grad_norm": 5.446226596832275, "learning_rate": 8.655349442124719e-05, "loss": 0.687, "step": 10573 }, { "epoch": 0.716444203536825, "grad_norm": 7.467900276184082, "learning_rate": 8.655212540214937e-05, "loss": 1.1386, "step": 10574 }, { "epoch": 0.7165119588047971, "grad_norm": 6.0190534591674805, "learning_rate": 8.655075638305155e-05, "loss": 0.9846, "step": 10575 }, { "epoch": 0.7165797140727691, "grad_norm": 6.0063252449035645, "learning_rate": 8.654938736395373e-05, "loss": 0.7755, "step": 10576 }, { "epoch": 0.7166474693407412, "grad_norm": 7.271022796630859, "learning_rate": 8.654801834485592e-05, "loss": 0.8129, "step": 10577 }, { "epoch": 0.7167152246087133, "grad_norm": 5.204225063323975, "learning_rate": 8.65466493257581e-05, "loss": 0.707, "step": 10578 }, { "epoch": 0.7167829798766854, "grad_norm": 6.814970016479492, "learning_rate": 8.654528030666028e-05, "loss": 0.9433, "step": 10579 }, { "epoch": 0.7168507351446575, "grad_norm": 9.155210494995117, "learning_rate": 8.654391128756246e-05, "loss": 0.8058, "step": 10580 }, { "epoch": 0.7169184904126296, "grad_norm": 6.666374683380127, "learning_rate": 8.654254226846465e-05, "loss": 0.9789, "step": 10581 }, { "epoch": 0.7169862456806017, "grad_norm": 5.498271465301514, "learning_rate": 8.654117324936684e-05, "loss": 0.7266, "step": 10582 }, { "epoch": 0.7170540009485737, "grad_norm": 6.632149696350098, "learning_rate": 8.653980423026902e-05, "loss": 0.8929, "step": 10583 }, { "epoch": 0.7171217562165458, "grad_norm": 6.820444107055664, "learning_rate": 8.65384352111712e-05, "loss": 0.7493, "step": 10584 }, { "epoch": 0.7171895114845179, "grad_norm": 9.759723663330078, "learning_rate": 8.653706619207338e-05, "loss": 0.8465, "step": 10585 }, { "epoch": 0.71725726675249, "grad_norm": 6.131860256195068, "learning_rate": 8.653569717297557e-05, "loss": 0.8678, "step": 10586 }, { "epoch": 0.7173250220204621, "grad_norm": 5.567459583282471, "learning_rate": 8.653432815387775e-05, "loss": 0.7342, "step": 10587 }, { "epoch": 0.7173927772884342, "grad_norm": 4.433963775634766, "learning_rate": 8.653295913477993e-05, "loss": 0.7901, "step": 10588 }, { "epoch": 0.7174605325564063, "grad_norm": 5.557954788208008, "learning_rate": 8.653159011568211e-05, "loss": 0.6189, "step": 10589 }, { "epoch": 0.7175282878243784, "grad_norm": 6.555310249328613, "learning_rate": 8.653022109658429e-05, "loss": 0.8554, "step": 10590 }, { "epoch": 0.7175960430923505, "grad_norm": 7.396895408630371, "learning_rate": 8.652885207748649e-05, "loss": 0.9261, "step": 10591 }, { "epoch": 0.7176637983603226, "grad_norm": 5.6380181312561035, "learning_rate": 8.652748305838867e-05, "loss": 0.7968, "step": 10592 }, { "epoch": 0.7177315536282945, "grad_norm": 5.689277648925781, "learning_rate": 8.652611403929085e-05, "loss": 0.7951, "step": 10593 }, { "epoch": 0.7177993088962666, "grad_norm": 5.647032737731934, "learning_rate": 8.652474502019303e-05, "loss": 0.8273, "step": 10594 }, { "epoch": 0.7178670641642387, "grad_norm": 6.165719985961914, "learning_rate": 8.652337600109522e-05, "loss": 0.7203, "step": 10595 }, { "epoch": 0.7179348194322108, "grad_norm": 5.114332675933838, "learning_rate": 8.65220069819974e-05, "loss": 0.6582, "step": 10596 }, { "epoch": 0.7180025747001829, "grad_norm": 6.3832879066467285, "learning_rate": 8.652063796289958e-05, "loss": 0.8028, "step": 10597 }, { "epoch": 0.718070329968155, "grad_norm": 5.82213020324707, "learning_rate": 8.651926894380176e-05, "loss": 0.5738, "step": 10598 }, { "epoch": 0.7181380852361271, "grad_norm": 6.337172031402588, "learning_rate": 8.651789992470396e-05, "loss": 0.8797, "step": 10599 }, { "epoch": 0.7182058405040992, "grad_norm": 5.312211513519287, "learning_rate": 8.651653090560614e-05, "loss": 0.732, "step": 10600 }, { "epoch": 0.7182735957720713, "grad_norm": 8.132328033447266, "learning_rate": 8.651516188650832e-05, "loss": 0.9214, "step": 10601 }, { "epoch": 0.7183413510400434, "grad_norm": 6.073488235473633, "learning_rate": 8.651379286741051e-05, "loss": 0.7914, "step": 10602 }, { "epoch": 0.7184091063080155, "grad_norm": 4.74514102935791, "learning_rate": 8.651242384831269e-05, "loss": 0.7396, "step": 10603 }, { "epoch": 0.7184768615759876, "grad_norm": 6.970630645751953, "learning_rate": 8.651105482921487e-05, "loss": 0.6759, "step": 10604 }, { "epoch": 0.7185446168439596, "grad_norm": 5.5301408767700195, "learning_rate": 8.650968581011706e-05, "loss": 0.7496, "step": 10605 }, { "epoch": 0.7186123721119317, "grad_norm": 9.243334770202637, "learning_rate": 8.650831679101924e-05, "loss": 0.9962, "step": 10606 }, { "epoch": 0.7186801273799038, "grad_norm": 4.740606784820557, "learning_rate": 8.650694777192143e-05, "loss": 0.5528, "step": 10607 }, { "epoch": 0.7187478826478759, "grad_norm": 6.146499156951904, "learning_rate": 8.65055787528236e-05, "loss": 0.8339, "step": 10608 }, { "epoch": 0.7188156379158479, "grad_norm": 6.534127235412598, "learning_rate": 8.65042097337258e-05, "loss": 1.1163, "step": 10609 }, { "epoch": 0.71888339318382, "grad_norm": 8.349403381347656, "learning_rate": 8.650284071462798e-05, "loss": 0.8872, "step": 10610 }, { "epoch": 0.7189511484517921, "grad_norm": 4.883057117462158, "learning_rate": 8.650147169553016e-05, "loss": 0.7221, "step": 10611 }, { "epoch": 0.7190189037197642, "grad_norm": 9.08403491973877, "learning_rate": 8.650010267643234e-05, "loss": 0.9264, "step": 10612 }, { "epoch": 0.7190866589877363, "grad_norm": 8.753477096557617, "learning_rate": 8.649873365733452e-05, "loss": 0.715, "step": 10613 }, { "epoch": 0.7191544142557084, "grad_norm": 6.945448875427246, "learning_rate": 8.649736463823671e-05, "loss": 0.7706, "step": 10614 }, { "epoch": 0.7192221695236805, "grad_norm": 6.655423164367676, "learning_rate": 8.64959956191389e-05, "loss": 0.7832, "step": 10615 }, { "epoch": 0.7192899247916525, "grad_norm": 6.322832107543945, "learning_rate": 8.649462660004108e-05, "loss": 0.8017, "step": 10616 }, { "epoch": 0.7193576800596246, "grad_norm": 6.454827785491943, "learning_rate": 8.649325758094326e-05, "loss": 0.7492, "step": 10617 }, { "epoch": 0.7194254353275967, "grad_norm": 7.011631011962891, "learning_rate": 8.649188856184545e-05, "loss": 0.7562, "step": 10618 }, { "epoch": 0.7194931905955688, "grad_norm": 6.621539115905762, "learning_rate": 8.649051954274763e-05, "loss": 0.76, "step": 10619 }, { "epoch": 0.7195609458635409, "grad_norm": 8.84100341796875, "learning_rate": 8.648915052364981e-05, "loss": 0.9858, "step": 10620 }, { "epoch": 0.719628701131513, "grad_norm": 6.356812000274658, "learning_rate": 8.648778150455199e-05, "loss": 0.7553, "step": 10621 }, { "epoch": 0.7196964563994851, "grad_norm": 6.760133743286133, "learning_rate": 8.648641248545417e-05, "loss": 0.8642, "step": 10622 }, { "epoch": 0.7197642116674572, "grad_norm": 6.104750633239746, "learning_rate": 8.648504346635636e-05, "loss": 0.6905, "step": 10623 }, { "epoch": 0.7198319669354293, "grad_norm": 6.207709312438965, "learning_rate": 8.648367444725855e-05, "loss": 0.8549, "step": 10624 }, { "epoch": 0.7198997222034013, "grad_norm": 6.436330795288086, "learning_rate": 8.648230542816073e-05, "loss": 0.7064, "step": 10625 }, { "epoch": 0.7199674774713734, "grad_norm": 5.475677967071533, "learning_rate": 8.64809364090629e-05, "loss": 0.6146, "step": 10626 }, { "epoch": 0.7200352327393454, "grad_norm": 9.833735466003418, "learning_rate": 8.64795673899651e-05, "loss": 0.7222, "step": 10627 }, { "epoch": 0.7201029880073175, "grad_norm": 8.227372169494629, "learning_rate": 8.647819837086728e-05, "loss": 0.9564, "step": 10628 }, { "epoch": 0.7201707432752896, "grad_norm": 7.26641321182251, "learning_rate": 8.647682935176946e-05, "loss": 0.796, "step": 10629 }, { "epoch": 0.7202384985432617, "grad_norm": 6.712799549102783, "learning_rate": 8.647546033267164e-05, "loss": 0.9736, "step": 10630 }, { "epoch": 0.7203062538112338, "grad_norm": 6.906972885131836, "learning_rate": 8.647409131357382e-05, "loss": 0.8028, "step": 10631 }, { "epoch": 0.7203740090792059, "grad_norm": 6.211350440979004, "learning_rate": 8.647272229447601e-05, "loss": 0.7953, "step": 10632 }, { "epoch": 0.720441764347178, "grad_norm": 7.281525611877441, "learning_rate": 8.64713532753782e-05, "loss": 0.7667, "step": 10633 }, { "epoch": 0.7205095196151501, "grad_norm": 6.922200679779053, "learning_rate": 8.646998425628038e-05, "loss": 0.9156, "step": 10634 }, { "epoch": 0.7205772748831222, "grad_norm": 4.672682762145996, "learning_rate": 8.646861523718256e-05, "loss": 0.6602, "step": 10635 }, { "epoch": 0.7206450301510943, "grad_norm": 6.199947834014893, "learning_rate": 8.646724621808474e-05, "loss": 1.058, "step": 10636 }, { "epoch": 0.7207127854190664, "grad_norm": 6.395276069641113, "learning_rate": 8.646587719898693e-05, "loss": 0.834, "step": 10637 }, { "epoch": 0.7207805406870385, "grad_norm": 5.92854118347168, "learning_rate": 8.646450817988911e-05, "loss": 0.9097, "step": 10638 }, { "epoch": 0.7208482959550105, "grad_norm": 9.13015079498291, "learning_rate": 8.646313916079129e-05, "loss": 0.7069, "step": 10639 }, { "epoch": 0.7209160512229826, "grad_norm": 5.72170877456665, "learning_rate": 8.646177014169347e-05, "loss": 0.9316, "step": 10640 }, { "epoch": 0.7209838064909547, "grad_norm": 7.367129325866699, "learning_rate": 8.646040112259567e-05, "loss": 0.7785, "step": 10641 }, { "epoch": 0.7210515617589267, "grad_norm": 5.120598316192627, "learning_rate": 8.645903210349785e-05, "loss": 0.6656, "step": 10642 }, { "epoch": 0.7211193170268988, "grad_norm": 6.610129356384277, "learning_rate": 8.645766308440003e-05, "loss": 0.9131, "step": 10643 }, { "epoch": 0.7211870722948709, "grad_norm": 6.49082612991333, "learning_rate": 8.64562940653022e-05, "loss": 0.8923, "step": 10644 }, { "epoch": 0.721254827562843, "grad_norm": 7.010980129241943, "learning_rate": 8.64549250462044e-05, "loss": 0.794, "step": 10645 }, { "epoch": 0.7213225828308151, "grad_norm": 7.219010829925537, "learning_rate": 8.645355602710658e-05, "loss": 0.9228, "step": 10646 }, { "epoch": 0.7213903380987872, "grad_norm": 5.3610053062438965, "learning_rate": 8.645218700800876e-05, "loss": 0.7479, "step": 10647 }, { "epoch": 0.7214580933667593, "grad_norm": 6.72417688369751, "learning_rate": 8.645081798891095e-05, "loss": 0.7015, "step": 10648 }, { "epoch": 0.7215258486347313, "grad_norm": 6.321094989776611, "learning_rate": 8.644944896981313e-05, "loss": 0.7247, "step": 10649 }, { "epoch": 0.7215936039027034, "grad_norm": 6.939053058624268, "learning_rate": 8.644807995071532e-05, "loss": 0.9851, "step": 10650 }, { "epoch": 0.7216613591706755, "grad_norm": 7.304567337036133, "learning_rate": 8.644671093161751e-05, "loss": 0.9053, "step": 10651 }, { "epoch": 0.7217291144386476, "grad_norm": 7.707671165466309, "learning_rate": 8.644534191251969e-05, "loss": 0.903, "step": 10652 }, { "epoch": 0.7217968697066197, "grad_norm": 8.089873313903809, "learning_rate": 8.644397289342187e-05, "loss": 0.8698, "step": 10653 }, { "epoch": 0.7218646249745918, "grad_norm": 7.8891119956970215, "learning_rate": 8.644260387432405e-05, "loss": 0.9856, "step": 10654 }, { "epoch": 0.7219323802425639, "grad_norm": 5.457139015197754, "learning_rate": 8.644123485522624e-05, "loss": 0.6027, "step": 10655 }, { "epoch": 0.722000135510536, "grad_norm": 5.993939399719238, "learning_rate": 8.643986583612842e-05, "loss": 0.8584, "step": 10656 }, { "epoch": 0.7220678907785081, "grad_norm": 5.376394271850586, "learning_rate": 8.64384968170306e-05, "loss": 0.6551, "step": 10657 }, { "epoch": 0.7221356460464801, "grad_norm": 7.0075249671936035, "learning_rate": 8.643712779793279e-05, "loss": 0.6486, "step": 10658 }, { "epoch": 0.7222034013144522, "grad_norm": 6.753172397613525, "learning_rate": 8.643575877883498e-05, "loss": 1.0099, "step": 10659 }, { "epoch": 0.7222711565824242, "grad_norm": 8.42198371887207, "learning_rate": 8.643438975973716e-05, "loss": 0.8545, "step": 10660 }, { "epoch": 0.7223389118503963, "grad_norm": 9.268589973449707, "learning_rate": 8.643302074063934e-05, "loss": 1.0352, "step": 10661 }, { "epoch": 0.7224066671183684, "grad_norm": 6.209371566772461, "learning_rate": 8.643165172154152e-05, "loss": 0.8474, "step": 10662 }, { "epoch": 0.7224744223863405, "grad_norm": 10.753402709960938, "learning_rate": 8.64302827024437e-05, "loss": 0.8922, "step": 10663 }, { "epoch": 0.7225421776543126, "grad_norm": 7.065412998199463, "learning_rate": 8.64289136833459e-05, "loss": 0.7056, "step": 10664 }, { "epoch": 0.7226099329222847, "grad_norm": 5.867188930511475, "learning_rate": 8.642754466424807e-05, "loss": 0.6052, "step": 10665 }, { "epoch": 0.7226776881902568, "grad_norm": 7.415475368499756, "learning_rate": 8.642617564515025e-05, "loss": 0.8593, "step": 10666 }, { "epoch": 0.7227454434582289, "grad_norm": 6.486458778381348, "learning_rate": 8.642480662605244e-05, "loss": 0.8857, "step": 10667 }, { "epoch": 0.722813198726201, "grad_norm": 8.942933082580566, "learning_rate": 8.642343760695462e-05, "loss": 0.8, "step": 10668 }, { "epoch": 0.7228809539941731, "grad_norm": 4.676167011260986, "learning_rate": 8.642206858785681e-05, "loss": 0.8456, "step": 10669 }, { "epoch": 0.7229487092621452, "grad_norm": 6.750422477722168, "learning_rate": 8.642069956875899e-05, "loss": 0.9438, "step": 10670 }, { "epoch": 0.7230164645301173, "grad_norm": 8.17405891418457, "learning_rate": 8.641933054966117e-05, "loss": 0.9946, "step": 10671 }, { "epoch": 0.7230842197980893, "grad_norm": 7.05765438079834, "learning_rate": 8.641796153056335e-05, "loss": 1.0896, "step": 10672 }, { "epoch": 0.7231519750660614, "grad_norm": 5.9634857177734375, "learning_rate": 8.641659251146554e-05, "loss": 0.7975, "step": 10673 }, { "epoch": 0.7232197303340334, "grad_norm": 5.722130298614502, "learning_rate": 8.641522349236772e-05, "loss": 0.7891, "step": 10674 }, { "epoch": 0.7232874856020055, "grad_norm": 6.058647155761719, "learning_rate": 8.64138544732699e-05, "loss": 0.9987, "step": 10675 }, { "epoch": 0.7233552408699776, "grad_norm": 8.651153564453125, "learning_rate": 8.641248545417209e-05, "loss": 0.8333, "step": 10676 }, { "epoch": 0.7234229961379497, "grad_norm": 5.720202445983887, "learning_rate": 8.641111643507427e-05, "loss": 1.0346, "step": 10677 }, { "epoch": 0.7234907514059218, "grad_norm": 5.499077796936035, "learning_rate": 8.640974741597646e-05, "loss": 0.6993, "step": 10678 }, { "epoch": 0.7235585066738939, "grad_norm": 4.752992153167725, "learning_rate": 8.640837839687864e-05, "loss": 0.6134, "step": 10679 }, { "epoch": 0.723626261941866, "grad_norm": 5.855991363525391, "learning_rate": 8.640700937778082e-05, "loss": 0.7177, "step": 10680 }, { "epoch": 0.7236940172098381, "grad_norm": 6.163865566253662, "learning_rate": 8.6405640358683e-05, "loss": 0.8502, "step": 10681 }, { "epoch": 0.7237617724778102, "grad_norm": 9.418116569519043, "learning_rate": 8.64042713395852e-05, "loss": 1.1688, "step": 10682 }, { "epoch": 0.7238295277457822, "grad_norm": 6.628981113433838, "learning_rate": 8.640290232048737e-05, "loss": 0.6387, "step": 10683 }, { "epoch": 0.7238972830137543, "grad_norm": 5.6346659660339355, "learning_rate": 8.640153330138956e-05, "loss": 1.1043, "step": 10684 }, { "epoch": 0.7239650382817264, "grad_norm": 6.523744583129883, "learning_rate": 8.640016428229174e-05, "loss": 0.8915, "step": 10685 }, { "epoch": 0.7240327935496985, "grad_norm": 5.4516167640686035, "learning_rate": 8.639879526319392e-05, "loss": 0.6427, "step": 10686 }, { "epoch": 0.7241005488176706, "grad_norm": 6.612290382385254, "learning_rate": 8.639742624409611e-05, "loss": 0.8633, "step": 10687 }, { "epoch": 0.7241683040856427, "grad_norm": 5.145784854888916, "learning_rate": 8.639605722499829e-05, "loss": 0.7764, "step": 10688 }, { "epoch": 0.7242360593536148, "grad_norm": 5.991262912750244, "learning_rate": 8.639468820590047e-05, "loss": 0.723, "step": 10689 }, { "epoch": 0.7243038146215869, "grad_norm": 5.2909955978393555, "learning_rate": 8.639331918680265e-05, "loss": 0.6494, "step": 10690 }, { "epoch": 0.7243715698895589, "grad_norm": 5.02228307723999, "learning_rate": 8.639195016770484e-05, "loss": 0.5259, "step": 10691 }, { "epoch": 0.724439325157531, "grad_norm": 7.383895397186279, "learning_rate": 8.639058114860703e-05, "loss": 0.8099, "step": 10692 }, { "epoch": 0.724507080425503, "grad_norm": 7.651692867279053, "learning_rate": 8.63892121295092e-05, "loss": 0.9502, "step": 10693 }, { "epoch": 0.7245748356934751, "grad_norm": 7.732839107513428, "learning_rate": 8.63878431104114e-05, "loss": 0.6534, "step": 10694 }, { "epoch": 0.7246425909614472, "grad_norm": 6.229733467102051, "learning_rate": 8.638647409131358e-05, "loss": 0.9293, "step": 10695 }, { "epoch": 0.7247103462294193, "grad_norm": 6.323513507843018, "learning_rate": 8.638510507221576e-05, "loss": 0.7275, "step": 10696 }, { "epoch": 0.7247781014973914, "grad_norm": 4.998154163360596, "learning_rate": 8.638373605311795e-05, "loss": 0.7756, "step": 10697 }, { "epoch": 0.7248458567653635, "grad_norm": 5.609971046447754, "learning_rate": 8.638236703402013e-05, "loss": 0.9864, "step": 10698 }, { "epoch": 0.7249136120333356, "grad_norm": 8.138459205627441, "learning_rate": 8.638099801492231e-05, "loss": 0.8453, "step": 10699 }, { "epoch": 0.7249813673013077, "grad_norm": 6.91035795211792, "learning_rate": 8.63796289958245e-05, "loss": 0.7852, "step": 10700 }, { "epoch": 0.7250491225692798, "grad_norm": 5.772835731506348, "learning_rate": 8.637825997672669e-05, "loss": 0.7951, "step": 10701 }, { "epoch": 0.7251168778372519, "grad_norm": 7.034023761749268, "learning_rate": 8.637689095762887e-05, "loss": 0.9835, "step": 10702 }, { "epoch": 0.725184633105224, "grad_norm": 6.605203628540039, "learning_rate": 8.637552193853105e-05, "loss": 0.6946, "step": 10703 }, { "epoch": 0.725252388373196, "grad_norm": 6.754047870635986, "learning_rate": 8.637415291943323e-05, "loss": 0.605, "step": 10704 }, { "epoch": 0.7253201436411681, "grad_norm": 5.961748123168945, "learning_rate": 8.637278390033542e-05, "loss": 0.8438, "step": 10705 }, { "epoch": 0.7253878989091402, "grad_norm": 5.660187721252441, "learning_rate": 8.63714148812376e-05, "loss": 0.7882, "step": 10706 }, { "epoch": 0.7254556541771122, "grad_norm": 6.974256992340088, "learning_rate": 8.637004586213978e-05, "loss": 0.9528, "step": 10707 }, { "epoch": 0.7255234094450843, "grad_norm": 6.0205183029174805, "learning_rate": 8.636867684304196e-05, "loss": 0.8064, "step": 10708 }, { "epoch": 0.7255911647130564, "grad_norm": 5.7911057472229, "learning_rate": 8.636730782394415e-05, "loss": 0.7973, "step": 10709 }, { "epoch": 0.7256589199810285, "grad_norm": 6.384799480438232, "learning_rate": 8.636593880484634e-05, "loss": 0.8016, "step": 10710 }, { "epoch": 0.7257266752490006, "grad_norm": 5.134740352630615, "learning_rate": 8.636456978574852e-05, "loss": 0.7034, "step": 10711 }, { "epoch": 0.7257944305169727, "grad_norm": 6.371201992034912, "learning_rate": 8.63632007666507e-05, "loss": 0.8934, "step": 10712 }, { "epoch": 0.7258621857849448, "grad_norm": 9.626450538635254, "learning_rate": 8.636183174755288e-05, "loss": 0.9802, "step": 10713 }, { "epoch": 0.7259299410529169, "grad_norm": 7.453325271606445, "learning_rate": 8.636046272845507e-05, "loss": 0.7881, "step": 10714 }, { "epoch": 0.725997696320889, "grad_norm": 6.08189582824707, "learning_rate": 8.635909370935725e-05, "loss": 0.8223, "step": 10715 }, { "epoch": 0.726065451588861, "grad_norm": 6.120866298675537, "learning_rate": 8.635772469025943e-05, "loss": 0.7251, "step": 10716 }, { "epoch": 0.7261332068568331, "grad_norm": 5.628901481628418, "learning_rate": 8.635635567116161e-05, "loss": 0.7254, "step": 10717 }, { "epoch": 0.7262009621248052, "grad_norm": 5.944281101226807, "learning_rate": 8.63549866520638e-05, "loss": 0.9566, "step": 10718 }, { "epoch": 0.7262687173927773, "grad_norm": 5.806936740875244, "learning_rate": 8.635361763296599e-05, "loss": 0.7517, "step": 10719 }, { "epoch": 0.7263364726607494, "grad_norm": 6.54838228225708, "learning_rate": 8.635224861386817e-05, "loss": 0.6679, "step": 10720 }, { "epoch": 0.7264042279287215, "grad_norm": 6.255834102630615, "learning_rate": 8.635087959477035e-05, "loss": 0.5863, "step": 10721 }, { "epoch": 0.7264719831966936, "grad_norm": 6.097255706787109, "learning_rate": 8.634951057567253e-05, "loss": 0.7183, "step": 10722 }, { "epoch": 0.7265397384646656, "grad_norm": 8.153336524963379, "learning_rate": 8.634814155657471e-05, "loss": 0.89, "step": 10723 }, { "epoch": 0.7266074937326377, "grad_norm": 5.64036226272583, "learning_rate": 8.63467725374769e-05, "loss": 0.7711, "step": 10724 }, { "epoch": 0.7266752490006098, "grad_norm": 5.449916362762451, "learning_rate": 8.634540351837908e-05, "loss": 0.7613, "step": 10725 }, { "epoch": 0.7267430042685818, "grad_norm": 5.611260890960693, "learning_rate": 8.634403449928127e-05, "loss": 0.7947, "step": 10726 }, { "epoch": 0.7268107595365539, "grad_norm": 6.064743518829346, "learning_rate": 8.634266548018345e-05, "loss": 0.7733, "step": 10727 }, { "epoch": 0.726878514804526, "grad_norm": 6.760382175445557, "learning_rate": 8.634129646108564e-05, "loss": 0.7532, "step": 10728 }, { "epoch": 0.7269462700724981, "grad_norm": 6.390462398529053, "learning_rate": 8.633992744198782e-05, "loss": 0.827, "step": 10729 }, { "epoch": 0.7270140253404702, "grad_norm": 5.5772881507873535, "learning_rate": 8.633855842289e-05, "loss": 0.6548, "step": 10730 }, { "epoch": 0.7270817806084423, "grad_norm": 6.615449905395508, "learning_rate": 8.633718940379218e-05, "loss": 0.7181, "step": 10731 }, { "epoch": 0.7271495358764144, "grad_norm": 5.905831336975098, "learning_rate": 8.633582038469436e-05, "loss": 0.8118, "step": 10732 }, { "epoch": 0.7272172911443865, "grad_norm": 8.367280006408691, "learning_rate": 8.633445136559655e-05, "loss": 0.895, "step": 10733 }, { "epoch": 0.7272850464123586, "grad_norm": 5.006227493286133, "learning_rate": 8.633308234649873e-05, "loss": 0.7743, "step": 10734 }, { "epoch": 0.7273528016803307, "grad_norm": 5.736496448516846, "learning_rate": 8.633171332740092e-05, "loss": 0.7135, "step": 10735 }, { "epoch": 0.7274205569483028, "grad_norm": 6.69479513168335, "learning_rate": 8.63303443083031e-05, "loss": 0.9781, "step": 10736 }, { "epoch": 0.7274883122162749, "grad_norm": 5.120262622833252, "learning_rate": 8.632897528920529e-05, "loss": 0.703, "step": 10737 }, { "epoch": 0.727556067484247, "grad_norm": 5.343075275421143, "learning_rate": 8.632760627010747e-05, "loss": 0.7495, "step": 10738 }, { "epoch": 0.727623822752219, "grad_norm": 5.489655494689941, "learning_rate": 8.632623725100965e-05, "loss": 0.7753, "step": 10739 }, { "epoch": 0.727691578020191, "grad_norm": 5.322958469390869, "learning_rate": 8.632486823191184e-05, "loss": 0.7566, "step": 10740 }, { "epoch": 0.7277593332881631, "grad_norm": 8.018611907958984, "learning_rate": 8.632349921281402e-05, "loss": 0.8898, "step": 10741 }, { "epoch": 0.7278270885561352, "grad_norm": 5.888221740722656, "learning_rate": 8.63221301937162e-05, "loss": 0.7483, "step": 10742 }, { "epoch": 0.7278948438241073, "grad_norm": 7.260030746459961, "learning_rate": 8.63207611746184e-05, "loss": 0.826, "step": 10743 }, { "epoch": 0.7279625990920794, "grad_norm": 4.39701509475708, "learning_rate": 8.631939215552058e-05, "loss": 0.7154, "step": 10744 }, { "epoch": 0.7280303543600515, "grad_norm": 7.766528606414795, "learning_rate": 8.631802313642276e-05, "loss": 0.9013, "step": 10745 }, { "epoch": 0.7280981096280236, "grad_norm": 6.7016167640686035, "learning_rate": 8.631665411732494e-05, "loss": 0.6431, "step": 10746 }, { "epoch": 0.7281658648959957, "grad_norm": 7.331559181213379, "learning_rate": 8.631528509822713e-05, "loss": 0.8061, "step": 10747 }, { "epoch": 0.7282336201639678, "grad_norm": 7.0098114013671875, "learning_rate": 8.631391607912931e-05, "loss": 0.9915, "step": 10748 }, { "epoch": 0.7283013754319398, "grad_norm": 5.108738899230957, "learning_rate": 8.63125470600315e-05, "loss": 0.66, "step": 10749 }, { "epoch": 0.7283691306999119, "grad_norm": 5.508449554443359, "learning_rate": 8.631117804093367e-05, "loss": 0.6366, "step": 10750 }, { "epoch": 0.728436885967884, "grad_norm": 5.624075889587402, "learning_rate": 8.630980902183587e-05, "loss": 0.8407, "step": 10751 }, { "epoch": 0.7285046412358561, "grad_norm": 6.403767108917236, "learning_rate": 8.630844000273805e-05, "loss": 0.7612, "step": 10752 }, { "epoch": 0.7285723965038282, "grad_norm": 5.629929542541504, "learning_rate": 8.630707098364023e-05, "loss": 1.0102, "step": 10753 }, { "epoch": 0.7286401517718003, "grad_norm": 8.822092056274414, "learning_rate": 8.630570196454241e-05, "loss": 0.8066, "step": 10754 }, { "epoch": 0.7287079070397724, "grad_norm": 8.513496398925781, "learning_rate": 8.630433294544459e-05, "loss": 1.061, "step": 10755 }, { "epoch": 0.7287756623077444, "grad_norm": 5.223329067230225, "learning_rate": 8.630296392634678e-05, "loss": 0.7784, "step": 10756 }, { "epoch": 0.7288434175757165, "grad_norm": 7.098320960998535, "learning_rate": 8.630159490724896e-05, "loss": 0.9385, "step": 10757 }, { "epoch": 0.7289111728436886, "grad_norm": 5.522270679473877, "learning_rate": 8.630022588815114e-05, "loss": 0.6976, "step": 10758 }, { "epoch": 0.7289789281116607, "grad_norm": 6.411101341247559, "learning_rate": 8.629885686905332e-05, "loss": 0.7332, "step": 10759 }, { "epoch": 0.7290466833796327, "grad_norm": 5.263405799865723, "learning_rate": 8.629748784995552e-05, "loss": 0.699, "step": 10760 }, { "epoch": 0.7291144386476048, "grad_norm": 8.649581909179688, "learning_rate": 8.62961188308577e-05, "loss": 0.818, "step": 10761 }, { "epoch": 0.7291821939155769, "grad_norm": 5.901177883148193, "learning_rate": 8.629474981175988e-05, "loss": 0.8351, "step": 10762 }, { "epoch": 0.729249949183549, "grad_norm": 7.491204738616943, "learning_rate": 8.629338079266206e-05, "loss": 0.8642, "step": 10763 }, { "epoch": 0.7293177044515211, "grad_norm": 7.189452171325684, "learning_rate": 8.629201177356424e-05, "loss": 0.8524, "step": 10764 }, { "epoch": 0.7293854597194932, "grad_norm": 5.667553901672363, "learning_rate": 8.629064275446643e-05, "loss": 0.8197, "step": 10765 }, { "epoch": 0.7294532149874653, "grad_norm": 6.584259510040283, "learning_rate": 8.628927373536861e-05, "loss": 0.629, "step": 10766 }, { "epoch": 0.7295209702554374, "grad_norm": 7.937713146209717, "learning_rate": 8.62879047162708e-05, "loss": 0.8158, "step": 10767 }, { "epoch": 0.7295887255234095, "grad_norm": 9.484813690185547, "learning_rate": 8.628653569717297e-05, "loss": 1.0614, "step": 10768 }, { "epoch": 0.7296564807913816, "grad_norm": 5.351037502288818, "learning_rate": 8.628516667807516e-05, "loss": 0.6763, "step": 10769 }, { "epoch": 0.7297242360593537, "grad_norm": 7.94450569152832, "learning_rate": 8.628379765897735e-05, "loss": 1.1605, "step": 10770 }, { "epoch": 0.7297919913273258, "grad_norm": 8.101015090942383, "learning_rate": 8.628242863987953e-05, "loss": 0.8566, "step": 10771 }, { "epoch": 0.7298597465952977, "grad_norm": 5.217764854431152, "learning_rate": 8.628105962078171e-05, "loss": 0.7402, "step": 10772 }, { "epoch": 0.7299275018632698, "grad_norm": 6.194571495056152, "learning_rate": 8.627969060168389e-05, "loss": 0.7202, "step": 10773 }, { "epoch": 0.7299952571312419, "grad_norm": 5.476653575897217, "learning_rate": 8.627832158258608e-05, "loss": 0.7069, "step": 10774 }, { "epoch": 0.730063012399214, "grad_norm": 6.67211389541626, "learning_rate": 8.627695256348826e-05, "loss": 0.7778, "step": 10775 }, { "epoch": 0.7301307676671861, "grad_norm": 5.744596004486084, "learning_rate": 8.627558354439044e-05, "loss": 0.877, "step": 10776 }, { "epoch": 0.7301985229351582, "grad_norm": 7.225982666015625, "learning_rate": 8.627421452529263e-05, "loss": 0.8411, "step": 10777 }, { "epoch": 0.7302662782031303, "grad_norm": 5.470047473907471, "learning_rate": 8.62728455061948e-05, "loss": 0.6319, "step": 10778 }, { "epoch": 0.7303340334711024, "grad_norm": 6.065576553344727, "learning_rate": 8.6271476487097e-05, "loss": 0.744, "step": 10779 }, { "epoch": 0.7304017887390745, "grad_norm": 6.549447536468506, "learning_rate": 8.627010746799918e-05, "loss": 0.7161, "step": 10780 }, { "epoch": 0.7304695440070466, "grad_norm": 4.89664363861084, "learning_rate": 8.626873844890136e-05, "loss": 0.7585, "step": 10781 }, { "epoch": 0.7305372992750186, "grad_norm": 5.463417053222656, "learning_rate": 8.626736942980354e-05, "loss": 0.866, "step": 10782 }, { "epoch": 0.7306050545429907, "grad_norm": 7.537135601043701, "learning_rate": 8.626600041070573e-05, "loss": 0.8118, "step": 10783 }, { "epoch": 0.7306728098109628, "grad_norm": 6.079577445983887, "learning_rate": 8.626463139160791e-05, "loss": 0.846, "step": 10784 }, { "epoch": 0.7307405650789349, "grad_norm": 8.43422794342041, "learning_rate": 8.62632623725101e-05, "loss": 0.4532, "step": 10785 }, { "epoch": 0.730808320346907, "grad_norm": 9.411410331726074, "learning_rate": 8.626189335341228e-05, "loss": 0.808, "step": 10786 }, { "epoch": 0.7308760756148791, "grad_norm": 6.041145324707031, "learning_rate": 8.626052433431447e-05, "loss": 0.7452, "step": 10787 }, { "epoch": 0.7309438308828511, "grad_norm": 5.750189304351807, "learning_rate": 8.625915531521665e-05, "loss": 0.5868, "step": 10788 }, { "epoch": 0.7310115861508232, "grad_norm": 6.068814754486084, "learning_rate": 8.625778629611883e-05, "loss": 0.8551, "step": 10789 }, { "epoch": 0.7310793414187953, "grad_norm": 4.875567436218262, "learning_rate": 8.625641727702102e-05, "loss": 0.7231, "step": 10790 }, { "epoch": 0.7311470966867674, "grad_norm": 6.8060712814331055, "learning_rate": 8.62550482579232e-05, "loss": 0.7692, "step": 10791 }, { "epoch": 0.7312148519547395, "grad_norm": 5.561190605163574, "learning_rate": 8.625367923882538e-05, "loss": 0.7405, "step": 10792 }, { "epoch": 0.7312826072227115, "grad_norm": 5.9766130447387695, "learning_rate": 8.625231021972758e-05, "loss": 0.8625, "step": 10793 }, { "epoch": 0.7313503624906836, "grad_norm": 7.188475131988525, "learning_rate": 8.625094120062976e-05, "loss": 1.0192, "step": 10794 }, { "epoch": 0.7314181177586557, "grad_norm": 7.752885341644287, "learning_rate": 8.624957218153194e-05, "loss": 0.7109, "step": 10795 }, { "epoch": 0.7314858730266278, "grad_norm": 6.262071132659912, "learning_rate": 8.624820316243412e-05, "loss": 0.7859, "step": 10796 }, { "epoch": 0.7315536282945999, "grad_norm": 8.485372543334961, "learning_rate": 8.624683414333631e-05, "loss": 0.585, "step": 10797 }, { "epoch": 0.731621383562572, "grad_norm": 8.141338348388672, "learning_rate": 8.62454651242385e-05, "loss": 1.1567, "step": 10798 }, { "epoch": 0.7316891388305441, "grad_norm": 7.912255764007568, "learning_rate": 8.624409610514067e-05, "loss": 0.7, "step": 10799 }, { "epoch": 0.7317568940985162, "grad_norm": 5.951817035675049, "learning_rate": 8.624272708604285e-05, "loss": 0.9155, "step": 10800 }, { "epoch": 0.7318246493664883, "grad_norm": 6.594394683837891, "learning_rate": 8.624135806694503e-05, "loss": 0.9209, "step": 10801 }, { "epoch": 0.7318924046344604, "grad_norm": 6.647227764129639, "learning_rate": 8.623998904784723e-05, "loss": 0.7095, "step": 10802 }, { "epoch": 0.7319601599024325, "grad_norm": 5.953242778778076, "learning_rate": 8.623862002874941e-05, "loss": 0.6739, "step": 10803 }, { "epoch": 0.7320279151704046, "grad_norm": 6.691287517547607, "learning_rate": 8.623725100965159e-05, "loss": 0.8772, "step": 10804 }, { "epoch": 0.7320956704383765, "grad_norm": 8.432060241699219, "learning_rate": 8.623588199055377e-05, "loss": 0.676, "step": 10805 }, { "epoch": 0.7321634257063486, "grad_norm": 7.356803894042969, "learning_rate": 8.623451297145596e-05, "loss": 0.866, "step": 10806 }, { "epoch": 0.7322311809743207, "grad_norm": 6.421758651733398, "learning_rate": 8.623314395235814e-05, "loss": 0.9497, "step": 10807 }, { "epoch": 0.7322989362422928, "grad_norm": 4.597375869750977, "learning_rate": 8.623177493326032e-05, "loss": 0.788, "step": 10808 }, { "epoch": 0.7323666915102649, "grad_norm": 5.468592166900635, "learning_rate": 8.62304059141625e-05, "loss": 0.7864, "step": 10809 }, { "epoch": 0.732434446778237, "grad_norm": 6.1158833503723145, "learning_rate": 8.622903689506468e-05, "loss": 0.7756, "step": 10810 }, { "epoch": 0.7325022020462091, "grad_norm": 6.584497451782227, "learning_rate": 8.622766787596688e-05, "loss": 0.806, "step": 10811 }, { "epoch": 0.7325699573141812, "grad_norm": 4.517557621002197, "learning_rate": 8.622629885686906e-05, "loss": 0.5618, "step": 10812 }, { "epoch": 0.7326377125821533, "grad_norm": 9.059310913085938, "learning_rate": 8.622492983777124e-05, "loss": 0.8049, "step": 10813 }, { "epoch": 0.7327054678501254, "grad_norm": 5.33046293258667, "learning_rate": 8.622356081867342e-05, "loss": 0.7975, "step": 10814 }, { "epoch": 0.7327732231180974, "grad_norm": 6.890832424163818, "learning_rate": 8.622219179957561e-05, "loss": 0.781, "step": 10815 }, { "epoch": 0.7328409783860695, "grad_norm": 5.804647922515869, "learning_rate": 8.62208227804778e-05, "loss": 0.6286, "step": 10816 }, { "epoch": 0.7329087336540416, "grad_norm": 6.476672649383545, "learning_rate": 8.621945376137997e-05, "loss": 0.7815, "step": 10817 }, { "epoch": 0.7329764889220137, "grad_norm": 6.72651481628418, "learning_rate": 8.621808474228215e-05, "loss": 0.9064, "step": 10818 }, { "epoch": 0.7330442441899858, "grad_norm": 7.354333877563477, "learning_rate": 8.621671572318433e-05, "loss": 0.8214, "step": 10819 }, { "epoch": 0.7331119994579579, "grad_norm": 7.4680609703063965, "learning_rate": 8.621534670408653e-05, "loss": 0.9942, "step": 10820 }, { "epoch": 0.7331797547259299, "grad_norm": 6.3199968338012695, "learning_rate": 8.621397768498871e-05, "loss": 0.9796, "step": 10821 }, { "epoch": 0.733247509993902, "grad_norm": 5.8508453369140625, "learning_rate": 8.621260866589089e-05, "loss": 0.8393, "step": 10822 }, { "epoch": 0.7333152652618741, "grad_norm": 7.697128772735596, "learning_rate": 8.621123964679307e-05, "loss": 1.1706, "step": 10823 }, { "epoch": 0.7333830205298462, "grad_norm": 6.382595539093018, "learning_rate": 8.620987062769525e-05, "loss": 0.9019, "step": 10824 }, { "epoch": 0.7334507757978183, "grad_norm": 6.121464729309082, "learning_rate": 8.620850160859744e-05, "loss": 0.7314, "step": 10825 }, { "epoch": 0.7335185310657903, "grad_norm": 5.412440299987793, "learning_rate": 8.620713258949962e-05, "loss": 0.904, "step": 10826 }, { "epoch": 0.7335862863337624, "grad_norm": 5.761203765869141, "learning_rate": 8.62057635704018e-05, "loss": 0.7746, "step": 10827 }, { "epoch": 0.7336540416017345, "grad_norm": 5.339764595031738, "learning_rate": 8.620439455130399e-05, "loss": 0.8274, "step": 10828 }, { "epoch": 0.7337217968697066, "grad_norm": 4.942664623260498, "learning_rate": 8.620302553220618e-05, "loss": 0.7429, "step": 10829 }, { "epoch": 0.7337895521376787, "grad_norm": 6.011295318603516, "learning_rate": 8.620165651310836e-05, "loss": 0.6289, "step": 10830 }, { "epoch": 0.7338573074056508, "grad_norm": 5.268429279327393, "learning_rate": 8.620028749401054e-05, "loss": 0.7213, "step": 10831 }, { "epoch": 0.7339250626736229, "grad_norm": 5.994687080383301, "learning_rate": 8.619891847491272e-05, "loss": 0.7625, "step": 10832 }, { "epoch": 0.733992817941595, "grad_norm": 5.908527374267578, "learning_rate": 8.619754945581491e-05, "loss": 0.6557, "step": 10833 }, { "epoch": 0.7340605732095671, "grad_norm": 6.297107696533203, "learning_rate": 8.61961804367171e-05, "loss": 0.7886, "step": 10834 }, { "epoch": 0.7341283284775392, "grad_norm": 7.262679576873779, "learning_rate": 8.619481141761927e-05, "loss": 0.703, "step": 10835 }, { "epoch": 0.7341960837455113, "grad_norm": 6.179491996765137, "learning_rate": 8.619344239852147e-05, "loss": 0.646, "step": 10836 }, { "epoch": 0.7342638390134832, "grad_norm": 5.197315216064453, "learning_rate": 8.619207337942365e-05, "loss": 0.8495, "step": 10837 }, { "epoch": 0.7343315942814553, "grad_norm": 5.623149394989014, "learning_rate": 8.619070436032583e-05, "loss": 0.7513, "step": 10838 }, { "epoch": 0.7343993495494274, "grad_norm": 6.872591018676758, "learning_rate": 8.618933534122802e-05, "loss": 0.756, "step": 10839 }, { "epoch": 0.7344671048173995, "grad_norm": 6.851233005523682, "learning_rate": 8.61879663221302e-05, "loss": 0.9182, "step": 10840 }, { "epoch": 0.7345348600853716, "grad_norm": 6.495956897735596, "learning_rate": 8.618659730303238e-05, "loss": 0.9298, "step": 10841 }, { "epoch": 0.7346026153533437, "grad_norm": 5.927680492401123, "learning_rate": 8.618522828393456e-05, "loss": 0.7563, "step": 10842 }, { "epoch": 0.7346703706213158, "grad_norm": 6.39896821975708, "learning_rate": 8.618385926483676e-05, "loss": 0.993, "step": 10843 }, { "epoch": 0.7347381258892879, "grad_norm": 5.756770133972168, "learning_rate": 8.618249024573894e-05, "loss": 0.6887, "step": 10844 }, { "epoch": 0.73480588115726, "grad_norm": 4.987828731536865, "learning_rate": 8.618112122664112e-05, "loss": 0.5908, "step": 10845 }, { "epoch": 0.7348736364252321, "grad_norm": 7.487382888793945, "learning_rate": 8.61797522075433e-05, "loss": 0.6574, "step": 10846 }, { "epoch": 0.7349413916932042, "grad_norm": 5.079615592956543, "learning_rate": 8.617838318844549e-05, "loss": 0.9038, "step": 10847 }, { "epoch": 0.7350091469611763, "grad_norm": 7.286651134490967, "learning_rate": 8.617701416934767e-05, "loss": 0.9635, "step": 10848 }, { "epoch": 0.7350769022291483, "grad_norm": 6.60608434677124, "learning_rate": 8.617564515024985e-05, "loss": 0.8811, "step": 10849 }, { "epoch": 0.7351446574971204, "grad_norm": 5.821297645568848, "learning_rate": 8.617427613115203e-05, "loss": 0.6802, "step": 10850 }, { "epoch": 0.7352124127650925, "grad_norm": 5.659653663635254, "learning_rate": 8.617290711205421e-05, "loss": 0.9145, "step": 10851 }, { "epoch": 0.7352801680330646, "grad_norm": 6.8696980476379395, "learning_rate": 8.617153809295641e-05, "loss": 1.0301, "step": 10852 }, { "epoch": 0.7353479233010367, "grad_norm": 5.688724040985107, "learning_rate": 8.617016907385859e-05, "loss": 0.7185, "step": 10853 }, { "epoch": 0.7354156785690087, "grad_norm": 6.016847610473633, "learning_rate": 8.616880005476077e-05, "loss": 0.7597, "step": 10854 }, { "epoch": 0.7354834338369808, "grad_norm": 6.0264692306518555, "learning_rate": 8.616743103566295e-05, "loss": 0.6665, "step": 10855 }, { "epoch": 0.7355511891049529, "grad_norm": 5.6370415687561035, "learning_rate": 8.616606201656513e-05, "loss": 0.7088, "step": 10856 }, { "epoch": 0.735618944372925, "grad_norm": 6.686164379119873, "learning_rate": 8.616469299746732e-05, "loss": 0.7139, "step": 10857 }, { "epoch": 0.7356866996408971, "grad_norm": 6.4555134773254395, "learning_rate": 8.61633239783695e-05, "loss": 0.7969, "step": 10858 }, { "epoch": 0.7357544549088691, "grad_norm": 5.4364752769470215, "learning_rate": 8.616195495927168e-05, "loss": 0.6922, "step": 10859 }, { "epoch": 0.7358222101768412, "grad_norm": 7.089079856872559, "learning_rate": 8.616058594017386e-05, "loss": 1.0301, "step": 10860 }, { "epoch": 0.7358899654448133, "grad_norm": 5.888257026672363, "learning_rate": 8.615921692107606e-05, "loss": 0.5942, "step": 10861 }, { "epoch": 0.7359577207127854, "grad_norm": 5.762383460998535, "learning_rate": 8.615784790197824e-05, "loss": 0.6695, "step": 10862 }, { "epoch": 0.7360254759807575, "grad_norm": 5.797264575958252, "learning_rate": 8.615647888288042e-05, "loss": 0.705, "step": 10863 }, { "epoch": 0.7360932312487296, "grad_norm": 4.282798767089844, "learning_rate": 8.61551098637826e-05, "loss": 0.6221, "step": 10864 }, { "epoch": 0.7361609865167017, "grad_norm": 5.539671897888184, "learning_rate": 8.615374084468478e-05, "loss": 0.739, "step": 10865 }, { "epoch": 0.7362287417846738, "grad_norm": 5.675551891326904, "learning_rate": 8.615237182558697e-05, "loss": 0.9089, "step": 10866 }, { "epoch": 0.7362964970526459, "grad_norm": 5.868411064147949, "learning_rate": 8.615100280648915e-05, "loss": 0.8312, "step": 10867 }, { "epoch": 0.736364252320618, "grad_norm": 5.487252712249756, "learning_rate": 8.614963378739133e-05, "loss": 1.0099, "step": 10868 }, { "epoch": 0.7364320075885901, "grad_norm": 6.392849445343018, "learning_rate": 8.614826476829351e-05, "loss": 0.6691, "step": 10869 }, { "epoch": 0.736499762856562, "grad_norm": 6.58278226852417, "learning_rate": 8.614689574919571e-05, "loss": 0.8738, "step": 10870 }, { "epoch": 0.7365675181245341, "grad_norm": 8.645120620727539, "learning_rate": 8.614552673009789e-05, "loss": 0.7185, "step": 10871 }, { "epoch": 0.7366352733925062, "grad_norm": 4.876821041107178, "learning_rate": 8.614415771100007e-05, "loss": 0.6354, "step": 10872 }, { "epoch": 0.7367030286604783, "grad_norm": 5.1632232666015625, "learning_rate": 8.614278869190225e-05, "loss": 0.6593, "step": 10873 }, { "epoch": 0.7367707839284504, "grad_norm": 7.005191326141357, "learning_rate": 8.614141967280443e-05, "loss": 0.9814, "step": 10874 }, { "epoch": 0.7368385391964225, "grad_norm": 5.898367881774902, "learning_rate": 8.614005065370662e-05, "loss": 0.7328, "step": 10875 }, { "epoch": 0.7369062944643946, "grad_norm": 6.076502799987793, "learning_rate": 8.61386816346088e-05, "loss": 0.8477, "step": 10876 }, { "epoch": 0.7369740497323667, "grad_norm": 7.934567928314209, "learning_rate": 8.613731261551098e-05, "loss": 0.7914, "step": 10877 }, { "epoch": 0.7370418050003388, "grad_norm": 6.465484619140625, "learning_rate": 8.613594359641316e-05, "loss": 0.768, "step": 10878 }, { "epoch": 0.7371095602683109, "grad_norm": 5.772295951843262, "learning_rate": 8.613457457731536e-05, "loss": 0.7722, "step": 10879 }, { "epoch": 0.737177315536283, "grad_norm": 5.704565525054932, "learning_rate": 8.613320555821754e-05, "loss": 0.6609, "step": 10880 }, { "epoch": 0.737245070804255, "grad_norm": 6.054666042327881, "learning_rate": 8.613183653911972e-05, "loss": 0.8513, "step": 10881 }, { "epoch": 0.7373128260722271, "grad_norm": 6.808941841125488, "learning_rate": 8.613046752002191e-05, "loss": 0.8188, "step": 10882 }, { "epoch": 0.7373805813401992, "grad_norm": 5.95088005065918, "learning_rate": 8.61290985009241e-05, "loss": 0.8952, "step": 10883 }, { "epoch": 0.7374483366081713, "grad_norm": 6.34380578994751, "learning_rate": 8.612772948182627e-05, "loss": 0.9835, "step": 10884 }, { "epoch": 0.7375160918761434, "grad_norm": 6.720582008361816, "learning_rate": 8.612636046272847e-05, "loss": 1.0013, "step": 10885 }, { "epoch": 0.7375838471441154, "grad_norm": 7.400576591491699, "learning_rate": 8.612499144363065e-05, "loss": 0.906, "step": 10886 }, { "epoch": 0.7376516024120875, "grad_norm": 7.385793209075928, "learning_rate": 8.612362242453283e-05, "loss": 0.9448, "step": 10887 }, { "epoch": 0.7377193576800596, "grad_norm": 4.82860803604126, "learning_rate": 8.612225340543501e-05, "loss": 0.724, "step": 10888 }, { "epoch": 0.7377871129480317, "grad_norm": 7.288435935974121, "learning_rate": 8.61208843863372e-05, "loss": 1.0472, "step": 10889 }, { "epoch": 0.7378548682160038, "grad_norm": 5.416788578033447, "learning_rate": 8.611951536723938e-05, "loss": 0.6952, "step": 10890 }, { "epoch": 0.7379226234839759, "grad_norm": 6.170418739318848, "learning_rate": 8.611814634814156e-05, "loss": 0.8321, "step": 10891 }, { "epoch": 0.737990378751948, "grad_norm": 6.4705119132995605, "learning_rate": 8.611677732904374e-05, "loss": 0.9207, "step": 10892 }, { "epoch": 0.73805813401992, "grad_norm": 5.7486138343811035, "learning_rate": 8.611540830994594e-05, "loss": 0.8527, "step": 10893 }, { "epoch": 0.7381258892878921, "grad_norm": 6.1065568923950195, "learning_rate": 8.611403929084812e-05, "loss": 0.7026, "step": 10894 }, { "epoch": 0.7381936445558642, "grad_norm": 5.706049919128418, "learning_rate": 8.61126702717503e-05, "loss": 0.8366, "step": 10895 }, { "epoch": 0.7382613998238363, "grad_norm": 4.653761863708496, "learning_rate": 8.611130125265248e-05, "loss": 0.538, "step": 10896 }, { "epoch": 0.7383291550918084, "grad_norm": 7.086725234985352, "learning_rate": 8.610993223355466e-05, "loss": 0.906, "step": 10897 }, { "epoch": 0.7383969103597805, "grad_norm": 8.890185356140137, "learning_rate": 8.610856321445685e-05, "loss": 1.0786, "step": 10898 }, { "epoch": 0.7384646656277526, "grad_norm": 6.072719097137451, "learning_rate": 8.610719419535903e-05, "loss": 0.8558, "step": 10899 }, { "epoch": 0.7385324208957247, "grad_norm": 6.266420364379883, "learning_rate": 8.610582517626121e-05, "loss": 0.9337, "step": 10900 }, { "epoch": 0.7386001761636968, "grad_norm": 5.14294958114624, "learning_rate": 8.61044561571634e-05, "loss": 0.5262, "step": 10901 }, { "epoch": 0.7386679314316689, "grad_norm": 8.386168479919434, "learning_rate": 8.610308713806559e-05, "loss": 1.3214, "step": 10902 }, { "epoch": 0.7387356866996408, "grad_norm": 4.496156692504883, "learning_rate": 8.610171811896777e-05, "loss": 0.81, "step": 10903 }, { "epoch": 0.7388034419676129, "grad_norm": 8.191973686218262, "learning_rate": 8.610034909986995e-05, "loss": 0.7648, "step": 10904 }, { "epoch": 0.738871197235585, "grad_norm": 6.122156620025635, "learning_rate": 8.609898008077213e-05, "loss": 0.8471, "step": 10905 }, { "epoch": 0.7389389525035571, "grad_norm": 6.742517471313477, "learning_rate": 8.609761106167431e-05, "loss": 0.8456, "step": 10906 }, { "epoch": 0.7390067077715292, "grad_norm": 4.137988090515137, "learning_rate": 8.60962420425765e-05, "loss": 0.7035, "step": 10907 }, { "epoch": 0.7390744630395013, "grad_norm": 6.304561138153076, "learning_rate": 8.609487302347868e-05, "loss": 0.8491, "step": 10908 }, { "epoch": 0.7391422183074734, "grad_norm": 7.52929162979126, "learning_rate": 8.609350400438086e-05, "loss": 0.6814, "step": 10909 }, { "epoch": 0.7392099735754455, "grad_norm": 5.229283809661865, "learning_rate": 8.609213498528304e-05, "loss": 0.8091, "step": 10910 }, { "epoch": 0.7392777288434176, "grad_norm": 7.810683250427246, "learning_rate": 8.609076596618522e-05, "loss": 1.0311, "step": 10911 }, { "epoch": 0.7393454841113897, "grad_norm": 4.896294116973877, "learning_rate": 8.608939694708742e-05, "loss": 0.7789, "step": 10912 }, { "epoch": 0.7394132393793618, "grad_norm": 5.661660194396973, "learning_rate": 8.60880279279896e-05, "loss": 0.8267, "step": 10913 }, { "epoch": 0.7394809946473339, "grad_norm": 6.691354274749756, "learning_rate": 8.608665890889178e-05, "loss": 0.9455, "step": 10914 }, { "epoch": 0.739548749915306, "grad_norm": 4.900321960449219, "learning_rate": 8.608528988979396e-05, "loss": 0.7102, "step": 10915 }, { "epoch": 0.739616505183278, "grad_norm": 6.105568885803223, "learning_rate": 8.608392087069615e-05, "loss": 0.825, "step": 10916 }, { "epoch": 0.7396842604512501, "grad_norm": 6.132299900054932, "learning_rate": 8.608255185159833e-05, "loss": 0.754, "step": 10917 }, { "epoch": 0.7397520157192222, "grad_norm": 6.12472677230835, "learning_rate": 8.608118283250051e-05, "loss": 0.9008, "step": 10918 }, { "epoch": 0.7398197709871942, "grad_norm": 6.3657002449035645, "learning_rate": 8.60798138134027e-05, "loss": 1.0276, "step": 10919 }, { "epoch": 0.7398875262551663, "grad_norm": 5.224886894226074, "learning_rate": 8.607844479430487e-05, "loss": 0.8636, "step": 10920 }, { "epoch": 0.7399552815231384, "grad_norm": 12.790693283081055, "learning_rate": 8.607707577520707e-05, "loss": 0.9617, "step": 10921 }, { "epoch": 0.7400230367911105, "grad_norm": 5.787003040313721, "learning_rate": 8.607570675610925e-05, "loss": 0.6847, "step": 10922 }, { "epoch": 0.7400907920590826, "grad_norm": 5.556890964508057, "learning_rate": 8.607433773701143e-05, "loss": 0.7855, "step": 10923 }, { "epoch": 0.7401585473270547, "grad_norm": 4.655983924865723, "learning_rate": 8.607296871791361e-05, "loss": 0.7021, "step": 10924 }, { "epoch": 0.7402263025950268, "grad_norm": 6.317244052886963, "learning_rate": 8.60715996988158e-05, "loss": 0.7709, "step": 10925 }, { "epoch": 0.7402940578629988, "grad_norm": 5.975430965423584, "learning_rate": 8.607023067971798e-05, "loss": 0.8145, "step": 10926 }, { "epoch": 0.7403618131309709, "grad_norm": 5.794954776763916, "learning_rate": 8.606886166062016e-05, "loss": 0.8564, "step": 10927 }, { "epoch": 0.740429568398943, "grad_norm": 5.966963291168213, "learning_rate": 8.606749264152236e-05, "loss": 0.7833, "step": 10928 }, { "epoch": 0.7404973236669151, "grad_norm": 8.969901084899902, "learning_rate": 8.606612362242454e-05, "loss": 0.9768, "step": 10929 }, { "epoch": 0.7405650789348872, "grad_norm": 7.2957353591918945, "learning_rate": 8.606475460332672e-05, "loss": 1.0157, "step": 10930 }, { "epoch": 0.7406328342028593, "grad_norm": 7.000049591064453, "learning_rate": 8.606338558422891e-05, "loss": 0.7907, "step": 10931 }, { "epoch": 0.7407005894708314, "grad_norm": 7.120476722717285, "learning_rate": 8.606201656513109e-05, "loss": 0.7708, "step": 10932 }, { "epoch": 0.7407683447388035, "grad_norm": 6.740073204040527, "learning_rate": 8.606064754603327e-05, "loss": 0.8841, "step": 10933 }, { "epoch": 0.7408361000067756, "grad_norm": 4.036641597747803, "learning_rate": 8.605927852693545e-05, "loss": 0.6445, "step": 10934 }, { "epoch": 0.7409038552747476, "grad_norm": 6.200973033905029, "learning_rate": 8.605790950783765e-05, "loss": 0.6976, "step": 10935 }, { "epoch": 0.7409716105427196, "grad_norm": 6.047748565673828, "learning_rate": 8.605654048873983e-05, "loss": 0.8174, "step": 10936 }, { "epoch": 0.7410393658106917, "grad_norm": 6.8715081214904785, "learning_rate": 8.605517146964201e-05, "loss": 0.9817, "step": 10937 }, { "epoch": 0.7411071210786638, "grad_norm": 5.555269241333008, "learning_rate": 8.605380245054419e-05, "loss": 0.6318, "step": 10938 }, { "epoch": 0.7411748763466359, "grad_norm": 8.042158126831055, "learning_rate": 8.605243343144638e-05, "loss": 1.2133, "step": 10939 }, { "epoch": 0.741242631614608, "grad_norm": 6.014042854309082, "learning_rate": 8.605106441234856e-05, "loss": 0.7934, "step": 10940 }, { "epoch": 0.7413103868825801, "grad_norm": 5.317357063293457, "learning_rate": 8.604969539325074e-05, "loss": 0.7135, "step": 10941 }, { "epoch": 0.7413781421505522, "grad_norm": 5.755384922027588, "learning_rate": 8.604832637415292e-05, "loss": 0.4912, "step": 10942 }, { "epoch": 0.7414458974185243, "grad_norm": 5.812741279602051, "learning_rate": 8.60469573550551e-05, "loss": 1.029, "step": 10943 }, { "epoch": 0.7415136526864964, "grad_norm": 6.497066020965576, "learning_rate": 8.60455883359573e-05, "loss": 0.8707, "step": 10944 }, { "epoch": 0.7415814079544685, "grad_norm": 5.14995002746582, "learning_rate": 8.604421931685948e-05, "loss": 0.6412, "step": 10945 }, { "epoch": 0.7416491632224406, "grad_norm": 6.122746467590332, "learning_rate": 8.604285029776166e-05, "loss": 0.7705, "step": 10946 }, { "epoch": 0.7417169184904127, "grad_norm": 5.844207763671875, "learning_rate": 8.604148127866384e-05, "loss": 0.9171, "step": 10947 }, { "epoch": 0.7417846737583847, "grad_norm": 4.956113815307617, "learning_rate": 8.604011225956603e-05, "loss": 0.6854, "step": 10948 }, { "epoch": 0.7418524290263568, "grad_norm": 6.357004165649414, "learning_rate": 8.603874324046821e-05, "loss": 0.9097, "step": 10949 }, { "epoch": 0.7419201842943289, "grad_norm": 8.953754425048828, "learning_rate": 8.60373742213704e-05, "loss": 0.7142, "step": 10950 }, { "epoch": 0.741987939562301, "grad_norm": 6.2500691413879395, "learning_rate": 8.603600520227257e-05, "loss": 0.5626, "step": 10951 }, { "epoch": 0.742055694830273, "grad_norm": 6.902349472045898, "learning_rate": 8.603463618317475e-05, "loss": 1.0085, "step": 10952 }, { "epoch": 0.7421234500982451, "grad_norm": 5.850625991821289, "learning_rate": 8.603326716407695e-05, "loss": 0.691, "step": 10953 }, { "epoch": 0.7421912053662172, "grad_norm": 6.940263748168945, "learning_rate": 8.603189814497913e-05, "loss": 0.9006, "step": 10954 }, { "epoch": 0.7422589606341893, "grad_norm": 6.827815532684326, "learning_rate": 8.603052912588131e-05, "loss": 0.7597, "step": 10955 }, { "epoch": 0.7423267159021614, "grad_norm": 7.243155479431152, "learning_rate": 8.602916010678349e-05, "loss": 0.7973, "step": 10956 }, { "epoch": 0.7423944711701335, "grad_norm": 5.524760723114014, "learning_rate": 8.602779108768567e-05, "loss": 0.8164, "step": 10957 }, { "epoch": 0.7424622264381056, "grad_norm": 6.662420749664307, "learning_rate": 8.602642206858786e-05, "loss": 0.6328, "step": 10958 }, { "epoch": 0.7425299817060776, "grad_norm": 8.47179889678955, "learning_rate": 8.602505304949004e-05, "loss": 1.1036, "step": 10959 }, { "epoch": 0.7425977369740497, "grad_norm": 6.377445220947266, "learning_rate": 8.602368403039222e-05, "loss": 0.7944, "step": 10960 }, { "epoch": 0.7426654922420218, "grad_norm": 6.111810207366943, "learning_rate": 8.60223150112944e-05, "loss": 1.008, "step": 10961 }, { "epoch": 0.7427332475099939, "grad_norm": 3.763913154602051, "learning_rate": 8.60209459921966e-05, "loss": 0.6067, "step": 10962 }, { "epoch": 0.742801002777966, "grad_norm": 7.272477626800537, "learning_rate": 8.601957697309878e-05, "loss": 0.813, "step": 10963 }, { "epoch": 0.7428687580459381, "grad_norm": 6.8635406494140625, "learning_rate": 8.601820795400096e-05, "loss": 0.6991, "step": 10964 }, { "epoch": 0.7429365133139102, "grad_norm": 5.985293388366699, "learning_rate": 8.601683893490314e-05, "loss": 0.8404, "step": 10965 }, { "epoch": 0.7430042685818823, "grad_norm": 7.298139572143555, "learning_rate": 8.601546991580532e-05, "loss": 0.7766, "step": 10966 }, { "epoch": 0.7430720238498544, "grad_norm": 6.489261150360107, "learning_rate": 8.601410089670751e-05, "loss": 0.8351, "step": 10967 }, { "epoch": 0.7431397791178264, "grad_norm": 6.494144439697266, "learning_rate": 8.60127318776097e-05, "loss": 0.7456, "step": 10968 }, { "epoch": 0.7432075343857985, "grad_norm": 7.2324090003967285, "learning_rate": 8.601136285851187e-05, "loss": 0.7192, "step": 10969 }, { "epoch": 0.7432752896537705, "grad_norm": 5.349514007568359, "learning_rate": 8.600999383941405e-05, "loss": 0.8994, "step": 10970 }, { "epoch": 0.7433430449217426, "grad_norm": 5.59208869934082, "learning_rate": 8.600862482031625e-05, "loss": 0.6534, "step": 10971 }, { "epoch": 0.7434108001897147, "grad_norm": 6.849013328552246, "learning_rate": 8.600725580121843e-05, "loss": 0.7908, "step": 10972 }, { "epoch": 0.7434785554576868, "grad_norm": 7.590834617614746, "learning_rate": 8.600588678212061e-05, "loss": 0.7773, "step": 10973 }, { "epoch": 0.7435463107256589, "grad_norm": 5.68290901184082, "learning_rate": 8.60045177630228e-05, "loss": 0.9227, "step": 10974 }, { "epoch": 0.743614065993631, "grad_norm": 7.747159004211426, "learning_rate": 8.600314874392498e-05, "loss": 0.8515, "step": 10975 }, { "epoch": 0.7436818212616031, "grad_norm": 6.546794891357422, "learning_rate": 8.600177972482716e-05, "loss": 0.7743, "step": 10976 }, { "epoch": 0.7437495765295752, "grad_norm": 6.478428363800049, "learning_rate": 8.600041070572936e-05, "loss": 0.844, "step": 10977 }, { "epoch": 0.7438173317975473, "grad_norm": 6.902507781982422, "learning_rate": 8.599904168663154e-05, "loss": 0.9503, "step": 10978 }, { "epoch": 0.7438850870655194, "grad_norm": 5.328883171081543, "learning_rate": 8.599767266753372e-05, "loss": 0.9925, "step": 10979 }, { "epoch": 0.7439528423334915, "grad_norm": 7.376800537109375, "learning_rate": 8.599630364843591e-05, "loss": 1.0425, "step": 10980 }, { "epoch": 0.7440205976014636, "grad_norm": 5.4552693367004395, "learning_rate": 8.599493462933809e-05, "loss": 0.7362, "step": 10981 }, { "epoch": 0.7440883528694356, "grad_norm": 6.60410213470459, "learning_rate": 8.599356561024027e-05, "loss": 0.5882, "step": 10982 }, { "epoch": 0.7441561081374077, "grad_norm": 5.760132789611816, "learning_rate": 8.599219659114245e-05, "loss": 0.8001, "step": 10983 }, { "epoch": 0.7442238634053797, "grad_norm": 6.064749717712402, "learning_rate": 8.599082757204463e-05, "loss": 0.6922, "step": 10984 }, { "epoch": 0.7442916186733518, "grad_norm": 5.042727470397949, "learning_rate": 8.598945855294683e-05, "loss": 0.7558, "step": 10985 }, { "epoch": 0.7443593739413239, "grad_norm": 5.222632884979248, "learning_rate": 8.598808953384901e-05, "loss": 0.6729, "step": 10986 }, { "epoch": 0.744427129209296, "grad_norm": 6.379947185516357, "learning_rate": 8.598672051475119e-05, "loss": 0.995, "step": 10987 }, { "epoch": 0.7444948844772681, "grad_norm": 8.097314834594727, "learning_rate": 8.598535149565337e-05, "loss": 0.9257, "step": 10988 }, { "epoch": 0.7445626397452402, "grad_norm": 7.161442279815674, "learning_rate": 8.598398247655555e-05, "loss": 0.9308, "step": 10989 }, { "epoch": 0.7446303950132123, "grad_norm": 8.118345260620117, "learning_rate": 8.598261345745774e-05, "loss": 0.7901, "step": 10990 }, { "epoch": 0.7446981502811844, "grad_norm": 5.9762797355651855, "learning_rate": 8.598124443835992e-05, "loss": 0.7084, "step": 10991 }, { "epoch": 0.7447659055491564, "grad_norm": 5.742055416107178, "learning_rate": 8.59798754192621e-05, "loss": 0.6301, "step": 10992 }, { "epoch": 0.7448336608171285, "grad_norm": 5.871792793273926, "learning_rate": 8.597850640016428e-05, "loss": 0.6938, "step": 10993 }, { "epoch": 0.7449014160851006, "grad_norm": 6.759030342102051, "learning_rate": 8.597713738106648e-05, "loss": 0.8002, "step": 10994 }, { "epoch": 0.7449691713530727, "grad_norm": 7.746789455413818, "learning_rate": 8.597576836196866e-05, "loss": 1.0539, "step": 10995 }, { "epoch": 0.7450369266210448, "grad_norm": 8.104504585266113, "learning_rate": 8.597439934287084e-05, "loss": 0.8205, "step": 10996 }, { "epoch": 0.7451046818890169, "grad_norm": 5.84213399887085, "learning_rate": 8.597303032377302e-05, "loss": 1.0039, "step": 10997 }, { "epoch": 0.745172437156989, "grad_norm": 6.890494346618652, "learning_rate": 8.59716613046752e-05, "loss": 1.1614, "step": 10998 }, { "epoch": 0.7452401924249611, "grad_norm": 5.201790809631348, "learning_rate": 8.597029228557739e-05, "loss": 0.6836, "step": 10999 }, { "epoch": 0.7453079476929331, "grad_norm": 5.523726463317871, "learning_rate": 8.596892326647957e-05, "loss": 0.9281, "step": 11000 }, { "epoch": 0.7453757029609052, "grad_norm": 6.218978404998779, "learning_rate": 8.596755424738175e-05, "loss": 0.8996, "step": 11001 }, { "epoch": 0.7454434582288773, "grad_norm": 4.860278606414795, "learning_rate": 8.596618522828393e-05, "loss": 0.8022, "step": 11002 }, { "epoch": 0.7455112134968493, "grad_norm": 6.433527946472168, "learning_rate": 8.596481620918613e-05, "loss": 0.8649, "step": 11003 }, { "epoch": 0.7455789687648214, "grad_norm": 6.700179100036621, "learning_rate": 8.596344719008831e-05, "loss": 0.8124, "step": 11004 }, { "epoch": 0.7456467240327935, "grad_norm": 6.234446048736572, "learning_rate": 8.596207817099049e-05, "loss": 0.8406, "step": 11005 }, { "epoch": 0.7457144793007656, "grad_norm": 6.376819610595703, "learning_rate": 8.596070915189267e-05, "loss": 0.7509, "step": 11006 }, { "epoch": 0.7457822345687377, "grad_norm": 5.808053970336914, "learning_rate": 8.595934013279485e-05, "loss": 0.864, "step": 11007 }, { "epoch": 0.7458499898367098, "grad_norm": 6.195199012756348, "learning_rate": 8.595797111369704e-05, "loss": 0.9481, "step": 11008 }, { "epoch": 0.7459177451046819, "grad_norm": 5.600381374359131, "learning_rate": 8.595660209459922e-05, "loss": 0.8916, "step": 11009 }, { "epoch": 0.745985500372654, "grad_norm": 5.866032600402832, "learning_rate": 8.59552330755014e-05, "loss": 0.813, "step": 11010 }, { "epoch": 0.7460532556406261, "grad_norm": 7.881616592407227, "learning_rate": 8.595386405640358e-05, "loss": 0.7804, "step": 11011 }, { "epoch": 0.7461210109085982, "grad_norm": 6.006317138671875, "learning_rate": 8.595249503730576e-05, "loss": 0.9739, "step": 11012 }, { "epoch": 0.7461887661765703, "grad_norm": 5.69516658782959, "learning_rate": 8.595112601820796e-05, "loss": 0.876, "step": 11013 }, { "epoch": 0.7462565214445424, "grad_norm": 6.4250006675720215, "learning_rate": 8.594975699911014e-05, "loss": 1.126, "step": 11014 }, { "epoch": 0.7463242767125144, "grad_norm": 4.753213405609131, "learning_rate": 8.594838798001232e-05, "loss": 0.9536, "step": 11015 }, { "epoch": 0.7463920319804865, "grad_norm": 6.818500995635986, "learning_rate": 8.59470189609145e-05, "loss": 0.8902, "step": 11016 }, { "epoch": 0.7464597872484585, "grad_norm": 5.300811290740967, "learning_rate": 8.594564994181669e-05, "loss": 0.7823, "step": 11017 }, { "epoch": 0.7465275425164306, "grad_norm": 7.0292582511901855, "learning_rate": 8.594428092271887e-05, "loss": 0.733, "step": 11018 }, { "epoch": 0.7465952977844027, "grad_norm": 6.624716281890869, "learning_rate": 8.594291190362105e-05, "loss": 0.8518, "step": 11019 }, { "epoch": 0.7466630530523748, "grad_norm": 6.195197105407715, "learning_rate": 8.594154288452323e-05, "loss": 0.9108, "step": 11020 }, { "epoch": 0.7467308083203469, "grad_norm": 5.729263782501221, "learning_rate": 8.594017386542543e-05, "loss": 0.8028, "step": 11021 }, { "epoch": 0.746798563588319, "grad_norm": 5.285633563995361, "learning_rate": 8.593880484632761e-05, "loss": 0.765, "step": 11022 }, { "epoch": 0.7468663188562911, "grad_norm": 7.621737480163574, "learning_rate": 8.593743582722979e-05, "loss": 0.811, "step": 11023 }, { "epoch": 0.7469340741242632, "grad_norm": 6.321857929229736, "learning_rate": 8.593606680813198e-05, "loss": 0.8962, "step": 11024 }, { "epoch": 0.7470018293922353, "grad_norm": 8.122148513793945, "learning_rate": 8.593469778903416e-05, "loss": 0.9326, "step": 11025 }, { "epoch": 0.7470695846602073, "grad_norm": 8.037406921386719, "learning_rate": 8.593332876993634e-05, "loss": 0.7504, "step": 11026 }, { "epoch": 0.7471373399281794, "grad_norm": 6.311530590057373, "learning_rate": 8.593195975083854e-05, "loss": 0.8079, "step": 11027 }, { "epoch": 0.7472050951961515, "grad_norm": 6.504730224609375, "learning_rate": 8.593059073174072e-05, "loss": 0.8561, "step": 11028 }, { "epoch": 0.7472728504641236, "grad_norm": 6.117341995239258, "learning_rate": 8.59292217126429e-05, "loss": 1.0076, "step": 11029 }, { "epoch": 0.7473406057320957, "grad_norm": 5.374571800231934, "learning_rate": 8.592785269354508e-05, "loss": 0.763, "step": 11030 }, { "epoch": 0.7474083610000678, "grad_norm": 5.502954006195068, "learning_rate": 8.592648367444727e-05, "loss": 0.7479, "step": 11031 }, { "epoch": 0.7474761162680399, "grad_norm": 8.302000045776367, "learning_rate": 8.592511465534945e-05, "loss": 0.9581, "step": 11032 }, { "epoch": 0.7475438715360119, "grad_norm": 5.1271748542785645, "learning_rate": 8.592374563625163e-05, "loss": 0.8742, "step": 11033 }, { "epoch": 0.747611626803984, "grad_norm": 7.070951461791992, "learning_rate": 8.592237661715381e-05, "loss": 0.8405, "step": 11034 }, { "epoch": 0.7476793820719561, "grad_norm": 6.252740383148193, "learning_rate": 8.592100759805601e-05, "loss": 0.9714, "step": 11035 }, { "epoch": 0.7477471373399281, "grad_norm": 5.273550033569336, "learning_rate": 8.591963857895819e-05, "loss": 0.847, "step": 11036 }, { "epoch": 0.7478148926079002, "grad_norm": 5.054128646850586, "learning_rate": 8.591826955986037e-05, "loss": 0.7635, "step": 11037 }, { "epoch": 0.7478826478758723, "grad_norm": 6.404119968414307, "learning_rate": 8.591690054076255e-05, "loss": 0.8501, "step": 11038 }, { "epoch": 0.7479504031438444, "grad_norm": 4.8361430168151855, "learning_rate": 8.591553152166473e-05, "loss": 0.8474, "step": 11039 }, { "epoch": 0.7480181584118165, "grad_norm": 8.294208526611328, "learning_rate": 8.591416250256692e-05, "loss": 0.8207, "step": 11040 }, { "epoch": 0.7480859136797886, "grad_norm": 4.709441661834717, "learning_rate": 8.59127934834691e-05, "loss": 0.7697, "step": 11041 }, { "epoch": 0.7481536689477607, "grad_norm": 7.428562641143799, "learning_rate": 8.591142446437128e-05, "loss": 0.9717, "step": 11042 }, { "epoch": 0.7482214242157328, "grad_norm": 5.442418098449707, "learning_rate": 8.591005544527346e-05, "loss": 0.7347, "step": 11043 }, { "epoch": 0.7482891794837049, "grad_norm": 6.216116428375244, "learning_rate": 8.590868642617564e-05, "loss": 0.6919, "step": 11044 }, { "epoch": 0.748356934751677, "grad_norm": 6.814431190490723, "learning_rate": 8.590731740707784e-05, "loss": 0.9049, "step": 11045 }, { "epoch": 0.7484246900196491, "grad_norm": 6.422941207885742, "learning_rate": 8.590594838798002e-05, "loss": 0.8655, "step": 11046 }, { "epoch": 0.7484924452876212, "grad_norm": 6.052054405212402, "learning_rate": 8.59045793688822e-05, "loss": 0.8558, "step": 11047 }, { "epoch": 0.7485602005555932, "grad_norm": 7.135115146636963, "learning_rate": 8.590321034978438e-05, "loss": 0.7672, "step": 11048 }, { "epoch": 0.7486279558235652, "grad_norm": 5.608342170715332, "learning_rate": 8.590184133068657e-05, "loss": 0.6696, "step": 11049 }, { "epoch": 0.7486957110915373, "grad_norm": 6.936807155609131, "learning_rate": 8.590047231158875e-05, "loss": 0.9131, "step": 11050 }, { "epoch": 0.7487634663595094, "grad_norm": 6.121455669403076, "learning_rate": 8.589910329249093e-05, "loss": 0.8561, "step": 11051 }, { "epoch": 0.7488312216274815, "grad_norm": 5.633101463317871, "learning_rate": 8.589773427339311e-05, "loss": 0.7347, "step": 11052 }, { "epoch": 0.7488989768954536, "grad_norm": 5.5565080642700195, "learning_rate": 8.58963652542953e-05, "loss": 0.9127, "step": 11053 }, { "epoch": 0.7489667321634257, "grad_norm": 6.5937418937683105, "learning_rate": 8.589499623519749e-05, "loss": 0.9561, "step": 11054 }, { "epoch": 0.7490344874313978, "grad_norm": 5.2491774559021, "learning_rate": 8.589362721609967e-05, "loss": 0.5976, "step": 11055 }, { "epoch": 0.7491022426993699, "grad_norm": 7.343997478485107, "learning_rate": 8.589225819700185e-05, "loss": 0.7404, "step": 11056 }, { "epoch": 0.749169997967342, "grad_norm": 5.535366058349609, "learning_rate": 8.589088917790403e-05, "loss": 0.9568, "step": 11057 }, { "epoch": 0.749237753235314, "grad_norm": 5.751670837402344, "learning_rate": 8.588952015880622e-05, "loss": 0.6918, "step": 11058 }, { "epoch": 0.7493055085032861, "grad_norm": 6.7734150886535645, "learning_rate": 8.58881511397084e-05, "loss": 0.7541, "step": 11059 }, { "epoch": 0.7493732637712582, "grad_norm": 6.3308424949646, "learning_rate": 8.588678212061058e-05, "loss": 0.9318, "step": 11060 }, { "epoch": 0.7494410190392303, "grad_norm": 6.561203479766846, "learning_rate": 8.588541310151276e-05, "loss": 0.8515, "step": 11061 }, { "epoch": 0.7495087743072024, "grad_norm": 6.7505974769592285, "learning_rate": 8.588404408241494e-05, "loss": 0.5823, "step": 11062 }, { "epoch": 0.7495765295751745, "grad_norm": 6.116795063018799, "learning_rate": 8.588267506331714e-05, "loss": 0.8536, "step": 11063 }, { "epoch": 0.7496442848431466, "grad_norm": 5.443800926208496, "learning_rate": 8.588130604421932e-05, "loss": 0.8771, "step": 11064 }, { "epoch": 0.7497120401111187, "grad_norm": 6.711660385131836, "learning_rate": 8.58799370251215e-05, "loss": 0.9655, "step": 11065 }, { "epoch": 0.7497797953790907, "grad_norm": 7.173767566680908, "learning_rate": 8.587856800602368e-05, "loss": 0.8445, "step": 11066 }, { "epoch": 0.7498475506470628, "grad_norm": 5.898787498474121, "learning_rate": 8.587719898692587e-05, "loss": 0.9952, "step": 11067 }, { "epoch": 0.7498475506470628, "eval_loss": 0.784003496170044, "eval_noise_accuracy": 0.0, "eval_runtime": 1565.4788, "eval_samples_per_second": 3.283, "eval_steps_per_second": 0.206, "eval_wer": 88.605156710052, "step": 11067 }, { "epoch": 0.7499153059150349, "grad_norm": 5.584912300109863, "learning_rate": 8.587582996782805e-05, "loss": 0.6039, "step": 11068 }, { "epoch": 0.749983061183007, "grad_norm": 7.05438232421875, "learning_rate": 8.587446094873023e-05, "loss": 0.8871, "step": 11069 }, { "epoch": 0.750050816450979, "grad_norm": 5.896789073944092, "learning_rate": 8.587309192963243e-05, "loss": 0.6864, "step": 11070 }, { "epoch": 0.7501185717189511, "grad_norm": 5.357899188995361, "learning_rate": 8.587172291053461e-05, "loss": 0.7744, "step": 11071 }, { "epoch": 0.7501863269869232, "grad_norm": 5.943928241729736, "learning_rate": 8.587035389143679e-05, "loss": 0.8338, "step": 11072 }, { "epoch": 0.7502540822548953, "grad_norm": 5.910472393035889, "learning_rate": 8.586898487233898e-05, "loss": 0.6772, "step": 11073 }, { "epoch": 0.7503218375228674, "grad_norm": 7.976921558380127, "learning_rate": 8.586761585324116e-05, "loss": 0.6949, "step": 11074 }, { "epoch": 0.7503895927908395, "grad_norm": 10.462014198303223, "learning_rate": 8.586624683414334e-05, "loss": 0.903, "step": 11075 }, { "epoch": 0.7504573480588116, "grad_norm": 7.0388946533203125, "learning_rate": 8.586487781504552e-05, "loss": 0.9651, "step": 11076 }, { "epoch": 0.7505251033267837, "grad_norm": 6.222432613372803, "learning_rate": 8.586350879594772e-05, "loss": 0.8637, "step": 11077 }, { "epoch": 0.7505928585947558, "grad_norm": 5.401667594909668, "learning_rate": 8.58621397768499e-05, "loss": 1.0538, "step": 11078 }, { "epoch": 0.7506606138627279, "grad_norm": 5.605716705322266, "learning_rate": 8.586077075775208e-05, "loss": 0.589, "step": 11079 }, { "epoch": 0.7507283691307, "grad_norm": 5.001210689544678, "learning_rate": 8.585940173865426e-05, "loss": 0.8688, "step": 11080 }, { "epoch": 0.750796124398672, "grad_norm": 6.432781219482422, "learning_rate": 8.585803271955645e-05, "loss": 0.8322, "step": 11081 }, { "epoch": 0.750863879666644, "grad_norm": 5.7087483406066895, "learning_rate": 8.585666370045863e-05, "loss": 0.913, "step": 11082 }, { "epoch": 0.7509316349346161, "grad_norm": 6.006197452545166, "learning_rate": 8.585529468136081e-05, "loss": 0.7931, "step": 11083 }, { "epoch": 0.7509993902025882, "grad_norm": 6.532283782958984, "learning_rate": 8.585392566226299e-05, "loss": 0.7063, "step": 11084 }, { "epoch": 0.7510671454705603, "grad_norm": 5.088547229766846, "learning_rate": 8.585255664316517e-05, "loss": 0.7701, "step": 11085 }, { "epoch": 0.7511349007385324, "grad_norm": 4.305655479431152, "learning_rate": 8.585118762406737e-05, "loss": 0.5815, "step": 11086 }, { "epoch": 0.7512026560065045, "grad_norm": 6.688506603240967, "learning_rate": 8.584981860496955e-05, "loss": 0.7622, "step": 11087 }, { "epoch": 0.7512704112744766, "grad_norm": 4.817746639251709, "learning_rate": 8.584844958587173e-05, "loss": 0.6914, "step": 11088 }, { "epoch": 0.7513381665424487, "grad_norm": 6.330674648284912, "learning_rate": 8.584708056677391e-05, "loss": 0.7376, "step": 11089 }, { "epoch": 0.7514059218104208, "grad_norm": 6.616771697998047, "learning_rate": 8.584571154767609e-05, "loss": 0.741, "step": 11090 }, { "epoch": 0.7514736770783929, "grad_norm": 5.213818550109863, "learning_rate": 8.584434252857828e-05, "loss": 0.7452, "step": 11091 }, { "epoch": 0.751541432346365, "grad_norm": 6.440800189971924, "learning_rate": 8.584297350948046e-05, "loss": 0.7903, "step": 11092 }, { "epoch": 0.751609187614337, "grad_norm": 7.082359790802002, "learning_rate": 8.584160449038264e-05, "loss": 0.9452, "step": 11093 }, { "epoch": 0.7516769428823091, "grad_norm": 5.019514083862305, "learning_rate": 8.584023547128482e-05, "loss": 0.6764, "step": 11094 }, { "epoch": 0.7517446981502812, "grad_norm": 6.08909797668457, "learning_rate": 8.583886645218702e-05, "loss": 0.6528, "step": 11095 }, { "epoch": 0.7518124534182533, "grad_norm": 6.15897274017334, "learning_rate": 8.58374974330892e-05, "loss": 0.7528, "step": 11096 }, { "epoch": 0.7518802086862254, "grad_norm": 6.012785911560059, "learning_rate": 8.583612841399138e-05, "loss": 0.8348, "step": 11097 }, { "epoch": 0.7519479639541974, "grad_norm": 6.340338230133057, "learning_rate": 8.583475939489356e-05, "loss": 0.8322, "step": 11098 }, { "epoch": 0.7520157192221695, "grad_norm": 6.704728603363037, "learning_rate": 8.583339037579574e-05, "loss": 0.9286, "step": 11099 }, { "epoch": 0.7520834744901416, "grad_norm": 7.134490013122559, "learning_rate": 8.583202135669793e-05, "loss": 0.9965, "step": 11100 }, { "epoch": 0.7521512297581137, "grad_norm": 6.226062774658203, "learning_rate": 8.583065233760011e-05, "loss": 0.6716, "step": 11101 }, { "epoch": 0.7522189850260858, "grad_norm": 5.990030765533447, "learning_rate": 8.582928331850229e-05, "loss": 0.7636, "step": 11102 }, { "epoch": 0.7522867402940578, "grad_norm": 5.9684834480285645, "learning_rate": 8.582791429940447e-05, "loss": 0.7847, "step": 11103 }, { "epoch": 0.7523544955620299, "grad_norm": 6.136056900024414, "learning_rate": 8.582654528030667e-05, "loss": 0.8641, "step": 11104 }, { "epoch": 0.752422250830002, "grad_norm": 6.887735366821289, "learning_rate": 8.582517626120885e-05, "loss": 0.8848, "step": 11105 }, { "epoch": 0.7524900060979741, "grad_norm": 5.988890647888184, "learning_rate": 8.582380724211103e-05, "loss": 0.8409, "step": 11106 }, { "epoch": 0.7525577613659462, "grad_norm": 5.09688138961792, "learning_rate": 8.582243822301321e-05, "loss": 0.813, "step": 11107 }, { "epoch": 0.7526255166339183, "grad_norm": 5.739447593688965, "learning_rate": 8.582106920391539e-05, "loss": 0.8414, "step": 11108 }, { "epoch": 0.7526932719018904, "grad_norm": 7.205900192260742, "learning_rate": 8.581970018481758e-05, "loss": 0.7134, "step": 11109 }, { "epoch": 0.7527610271698625, "grad_norm": 5.002781867980957, "learning_rate": 8.581833116571976e-05, "loss": 0.7937, "step": 11110 }, { "epoch": 0.7528287824378346, "grad_norm": 7.405692100524902, "learning_rate": 8.581696214662194e-05, "loss": 0.7896, "step": 11111 }, { "epoch": 0.7528965377058067, "grad_norm": 6.863432884216309, "learning_rate": 8.581559312752412e-05, "loss": 1.0113, "step": 11112 }, { "epoch": 0.7529642929737788, "grad_norm": 6.594325542449951, "learning_rate": 8.581422410842632e-05, "loss": 1.1729, "step": 11113 }, { "epoch": 0.7530320482417509, "grad_norm": 7.210272312164307, "learning_rate": 8.58128550893285e-05, "loss": 1.0648, "step": 11114 }, { "epoch": 0.7530998035097228, "grad_norm": 6.694648742675781, "learning_rate": 8.581148607023068e-05, "loss": 0.7308, "step": 11115 }, { "epoch": 0.7531675587776949, "grad_norm": 4.825174808502197, "learning_rate": 8.581011705113287e-05, "loss": 0.7378, "step": 11116 }, { "epoch": 0.753235314045667, "grad_norm": 7.157209873199463, "learning_rate": 8.580874803203505e-05, "loss": 0.8906, "step": 11117 }, { "epoch": 0.7533030693136391, "grad_norm": 7.719106674194336, "learning_rate": 8.580737901293723e-05, "loss": 0.5838, "step": 11118 }, { "epoch": 0.7533708245816112, "grad_norm": 5.7004828453063965, "learning_rate": 8.580600999383943e-05, "loss": 0.8375, "step": 11119 }, { "epoch": 0.7534385798495833, "grad_norm": 6.208179950714111, "learning_rate": 8.580464097474161e-05, "loss": 0.7722, "step": 11120 }, { "epoch": 0.7535063351175554, "grad_norm": 7.243391990661621, "learning_rate": 8.580327195564379e-05, "loss": 1.0486, "step": 11121 }, { "epoch": 0.7535740903855275, "grad_norm": 5.903680324554443, "learning_rate": 8.580190293654597e-05, "loss": 0.7814, "step": 11122 }, { "epoch": 0.7536418456534996, "grad_norm": 6.017585277557373, "learning_rate": 8.580053391744816e-05, "loss": 0.9528, "step": 11123 }, { "epoch": 0.7537096009214717, "grad_norm": 6.117088317871094, "learning_rate": 8.579916489835034e-05, "loss": 0.8894, "step": 11124 }, { "epoch": 0.7537773561894437, "grad_norm": 4.541194915771484, "learning_rate": 8.579779587925252e-05, "loss": 0.6975, "step": 11125 }, { "epoch": 0.7538451114574158, "grad_norm": 5.8757452964782715, "learning_rate": 8.57964268601547e-05, "loss": 1.0208, "step": 11126 }, { "epoch": 0.7539128667253879, "grad_norm": 6.408355712890625, "learning_rate": 8.57950578410569e-05, "loss": 0.7566, "step": 11127 }, { "epoch": 0.75398062199336, "grad_norm": 6.257778167724609, "learning_rate": 8.579368882195908e-05, "loss": 0.8246, "step": 11128 }, { "epoch": 0.7540483772613321, "grad_norm": 5.630858898162842, "learning_rate": 8.579231980286126e-05, "loss": 0.9085, "step": 11129 }, { "epoch": 0.7541161325293042, "grad_norm": 5.071380138397217, "learning_rate": 8.579095078376344e-05, "loss": 0.6106, "step": 11130 }, { "epoch": 0.7541838877972762, "grad_norm": 5.429622173309326, "learning_rate": 8.578958176466562e-05, "loss": 0.5699, "step": 11131 }, { "epoch": 0.7542516430652483, "grad_norm": 7.031247615814209, "learning_rate": 8.578821274556781e-05, "loss": 0.9928, "step": 11132 }, { "epoch": 0.7543193983332204, "grad_norm": 6.868895053863525, "learning_rate": 8.578684372646999e-05, "loss": 0.5757, "step": 11133 }, { "epoch": 0.7543871536011925, "grad_norm": 6.9466118812561035, "learning_rate": 8.578547470737217e-05, "loss": 0.7591, "step": 11134 }, { "epoch": 0.7544549088691646, "grad_norm": 5.605920314788818, "learning_rate": 8.578410568827435e-05, "loss": 0.7244, "step": 11135 }, { "epoch": 0.7545226641371366, "grad_norm": 5.563654899597168, "learning_rate": 8.578273666917655e-05, "loss": 0.6451, "step": 11136 }, { "epoch": 0.7545904194051087, "grad_norm": 5.8858771324157715, "learning_rate": 8.578136765007873e-05, "loss": 0.9521, "step": 11137 }, { "epoch": 0.7546581746730808, "grad_norm": 6.400531768798828, "learning_rate": 8.577999863098091e-05, "loss": 0.7077, "step": 11138 }, { "epoch": 0.7547259299410529, "grad_norm": 6.579067230224609, "learning_rate": 8.577862961188309e-05, "loss": 0.8734, "step": 11139 }, { "epoch": 0.754793685209025, "grad_norm": 5.629753112792969, "learning_rate": 8.577726059278527e-05, "loss": 0.7637, "step": 11140 }, { "epoch": 0.7548614404769971, "grad_norm": 5.546406269073486, "learning_rate": 8.577589157368746e-05, "loss": 0.8094, "step": 11141 }, { "epoch": 0.7549291957449692, "grad_norm": 5.510883331298828, "learning_rate": 8.577452255458964e-05, "loss": 0.8629, "step": 11142 }, { "epoch": 0.7549969510129413, "grad_norm": 8.156396865844727, "learning_rate": 8.577315353549182e-05, "loss": 0.7051, "step": 11143 }, { "epoch": 0.7550647062809134, "grad_norm": 5.59662389755249, "learning_rate": 8.5771784516394e-05, "loss": 0.8627, "step": 11144 }, { "epoch": 0.7551324615488855, "grad_norm": 7.06483793258667, "learning_rate": 8.577041549729618e-05, "loss": 0.8544, "step": 11145 }, { "epoch": 0.7552002168168576, "grad_norm": 5.5211663246154785, "learning_rate": 8.576904647819838e-05, "loss": 0.9621, "step": 11146 }, { "epoch": 0.7552679720848295, "grad_norm": 5.117467880249023, "learning_rate": 8.576767745910056e-05, "loss": 0.7759, "step": 11147 }, { "epoch": 0.7553357273528016, "grad_norm": 6.539487361907959, "learning_rate": 8.576630844000274e-05, "loss": 0.8292, "step": 11148 }, { "epoch": 0.7554034826207737, "grad_norm": 6.704295635223389, "learning_rate": 8.576493942090492e-05, "loss": 0.6924, "step": 11149 }, { "epoch": 0.7554712378887458, "grad_norm": 6.1922926902771, "learning_rate": 8.576357040180711e-05, "loss": 0.9602, "step": 11150 }, { "epoch": 0.7555389931567179, "grad_norm": 6.421977519989014, "learning_rate": 8.576220138270929e-05, "loss": 0.6867, "step": 11151 }, { "epoch": 0.75560674842469, "grad_norm": 9.157228469848633, "learning_rate": 8.576083236361147e-05, "loss": 0.7311, "step": 11152 }, { "epoch": 0.7556745036926621, "grad_norm": 6.960337162017822, "learning_rate": 8.575946334451365e-05, "loss": 0.986, "step": 11153 }, { "epoch": 0.7557422589606342, "grad_norm": 7.604123115539551, "learning_rate": 8.575809432541583e-05, "loss": 0.8723, "step": 11154 }, { "epoch": 0.7558100142286063, "grad_norm": 4.943920135498047, "learning_rate": 8.575672530631803e-05, "loss": 0.7067, "step": 11155 }, { "epoch": 0.7558777694965784, "grad_norm": 6.594372272491455, "learning_rate": 8.575535628722021e-05, "loss": 0.8089, "step": 11156 }, { "epoch": 0.7559455247645505, "grad_norm": 6.759579181671143, "learning_rate": 8.575398726812239e-05, "loss": 0.8882, "step": 11157 }, { "epoch": 0.7560132800325225, "grad_norm": 7.21705961227417, "learning_rate": 8.575261824902457e-05, "loss": 0.7922, "step": 11158 }, { "epoch": 0.7560810353004946, "grad_norm": 5.465212821960449, "learning_rate": 8.575124922992676e-05, "loss": 0.5913, "step": 11159 }, { "epoch": 0.7561487905684667, "grad_norm": 5.213903427124023, "learning_rate": 8.574988021082894e-05, "loss": 0.8809, "step": 11160 }, { "epoch": 0.7562165458364388, "grad_norm": 5.521292686462402, "learning_rate": 8.574851119173112e-05, "loss": 0.8858, "step": 11161 }, { "epoch": 0.7562843011044109, "grad_norm": 4.67765474319458, "learning_rate": 8.574714217263332e-05, "loss": 0.624, "step": 11162 }, { "epoch": 0.756352056372383, "grad_norm": 6.993425369262695, "learning_rate": 8.57457731535355e-05, "loss": 0.8126, "step": 11163 }, { "epoch": 0.756419811640355, "grad_norm": 6.692846775054932, "learning_rate": 8.574440413443768e-05, "loss": 1.0727, "step": 11164 }, { "epoch": 0.7564875669083271, "grad_norm": 6.0148773193359375, "learning_rate": 8.574303511533987e-05, "loss": 0.9334, "step": 11165 }, { "epoch": 0.7565553221762992, "grad_norm": 8.869120597839355, "learning_rate": 8.574166609624205e-05, "loss": 0.9927, "step": 11166 }, { "epoch": 0.7566230774442713, "grad_norm": 6.259548187255859, "learning_rate": 8.574029707714423e-05, "loss": 0.9905, "step": 11167 }, { "epoch": 0.7566908327122434, "grad_norm": 6.2330322265625, "learning_rate": 8.573892805804643e-05, "loss": 0.5814, "step": 11168 }, { "epoch": 0.7567585879802154, "grad_norm": 5.996000289916992, "learning_rate": 8.57375590389486e-05, "loss": 0.7676, "step": 11169 }, { "epoch": 0.7568263432481875, "grad_norm": 5.289711952209473, "learning_rate": 8.573619001985079e-05, "loss": 0.5761, "step": 11170 }, { "epoch": 0.7568940985161596, "grad_norm": 7.447518825531006, "learning_rate": 8.573482100075297e-05, "loss": 0.8822, "step": 11171 }, { "epoch": 0.7569618537841317, "grad_norm": 5.87143087387085, "learning_rate": 8.573345198165515e-05, "loss": 0.7935, "step": 11172 }, { "epoch": 0.7570296090521038, "grad_norm": 5.967605113983154, "learning_rate": 8.573208296255734e-05, "loss": 0.7755, "step": 11173 }, { "epoch": 0.7570973643200759, "grad_norm": 5.8669562339782715, "learning_rate": 8.573071394345952e-05, "loss": 0.7079, "step": 11174 }, { "epoch": 0.757165119588048, "grad_norm": 5.084571361541748, "learning_rate": 8.57293449243617e-05, "loss": 0.5683, "step": 11175 }, { "epoch": 0.7572328748560201, "grad_norm": 5.499231338500977, "learning_rate": 8.572797590526388e-05, "loss": 0.7443, "step": 11176 }, { "epoch": 0.7573006301239922, "grad_norm": 6.275290489196777, "learning_rate": 8.572660688616606e-05, "loss": 0.8232, "step": 11177 }, { "epoch": 0.7573683853919643, "grad_norm": 9.729387283325195, "learning_rate": 8.572523786706826e-05, "loss": 1.0499, "step": 11178 }, { "epoch": 0.7574361406599364, "grad_norm": 5.243049144744873, "learning_rate": 8.572386884797044e-05, "loss": 0.8695, "step": 11179 }, { "epoch": 0.7575038959279083, "grad_norm": 6.83506965637207, "learning_rate": 8.572249982887262e-05, "loss": 1.0048, "step": 11180 }, { "epoch": 0.7575716511958804, "grad_norm": 5.801395893096924, "learning_rate": 8.57211308097748e-05, "loss": 0.7159, "step": 11181 }, { "epoch": 0.7576394064638525, "grad_norm": 5.878986835479736, "learning_rate": 8.571976179067699e-05, "loss": 0.8116, "step": 11182 }, { "epoch": 0.7577071617318246, "grad_norm": 5.621567726135254, "learning_rate": 8.571839277157917e-05, "loss": 0.771, "step": 11183 }, { "epoch": 0.7577749169997967, "grad_norm": 6.197023391723633, "learning_rate": 8.571702375248135e-05, "loss": 0.7588, "step": 11184 }, { "epoch": 0.7578426722677688, "grad_norm": 6.170006275177002, "learning_rate": 8.571565473338353e-05, "loss": 0.7644, "step": 11185 }, { "epoch": 0.7579104275357409, "grad_norm": 7.654489517211914, "learning_rate": 8.571428571428571e-05, "loss": 0.8256, "step": 11186 }, { "epoch": 0.757978182803713, "grad_norm": 6.45438289642334, "learning_rate": 8.57129166951879e-05, "loss": 0.8437, "step": 11187 }, { "epoch": 0.7580459380716851, "grad_norm": 6.441734790802002, "learning_rate": 8.571154767609009e-05, "loss": 0.86, "step": 11188 }, { "epoch": 0.7581136933396572, "grad_norm": 6.674750804901123, "learning_rate": 8.571017865699227e-05, "loss": 0.761, "step": 11189 }, { "epoch": 0.7581814486076293, "grad_norm": 5.539404392242432, "learning_rate": 8.570880963789445e-05, "loss": 0.7542, "step": 11190 }, { "epoch": 0.7582492038756014, "grad_norm": 7.210087776184082, "learning_rate": 8.570744061879664e-05, "loss": 0.9209, "step": 11191 }, { "epoch": 0.7583169591435734, "grad_norm": 5.365791320800781, "learning_rate": 8.570607159969882e-05, "loss": 0.7885, "step": 11192 }, { "epoch": 0.7583847144115455, "grad_norm": 5.170724391937256, "learning_rate": 8.5704702580601e-05, "loss": 0.7241, "step": 11193 }, { "epoch": 0.7584524696795176, "grad_norm": 6.509468078613281, "learning_rate": 8.570333356150318e-05, "loss": 0.6644, "step": 11194 }, { "epoch": 0.7585202249474897, "grad_norm": 7.682931900024414, "learning_rate": 8.570196454240536e-05, "loss": 0.8989, "step": 11195 }, { "epoch": 0.7585879802154617, "grad_norm": 5.240300178527832, "learning_rate": 8.570059552330756e-05, "loss": 0.7577, "step": 11196 }, { "epoch": 0.7586557354834338, "grad_norm": 6.523096084594727, "learning_rate": 8.569922650420974e-05, "loss": 0.9607, "step": 11197 }, { "epoch": 0.7587234907514059, "grad_norm": 7.254205703735352, "learning_rate": 8.569785748511192e-05, "loss": 0.744, "step": 11198 }, { "epoch": 0.758791246019378, "grad_norm": 6.33684778213501, "learning_rate": 8.56964884660141e-05, "loss": 0.7514, "step": 11199 }, { "epoch": 0.7588590012873501, "grad_norm": 7.9795403480529785, "learning_rate": 8.569511944691628e-05, "loss": 0.8608, "step": 11200 }, { "epoch": 0.7589267565553222, "grad_norm": 9.582289695739746, "learning_rate": 8.569375042781847e-05, "loss": 0.63, "step": 11201 }, { "epoch": 0.7589945118232942, "grad_norm": 7.716179847717285, "learning_rate": 8.569238140872065e-05, "loss": 0.8992, "step": 11202 }, { "epoch": 0.7590622670912663, "grad_norm": 7.2479095458984375, "learning_rate": 8.569101238962283e-05, "loss": 0.7297, "step": 11203 }, { "epoch": 0.7591300223592384, "grad_norm": 5.631564140319824, "learning_rate": 8.568964337052501e-05, "loss": 0.5615, "step": 11204 }, { "epoch": 0.7591977776272105, "grad_norm": 6.547656536102295, "learning_rate": 8.568827435142721e-05, "loss": 0.7367, "step": 11205 }, { "epoch": 0.7592655328951826, "grad_norm": 5.980971813201904, "learning_rate": 8.568690533232939e-05, "loss": 0.857, "step": 11206 }, { "epoch": 0.7593332881631547, "grad_norm": 5.813320159912109, "learning_rate": 8.568553631323157e-05, "loss": 0.9032, "step": 11207 }, { "epoch": 0.7594010434311268, "grad_norm": 8.85381031036377, "learning_rate": 8.568416729413376e-05, "loss": 1.2161, "step": 11208 }, { "epoch": 0.7594687986990989, "grad_norm": 5.185744285583496, "learning_rate": 8.568279827503594e-05, "loss": 0.7288, "step": 11209 }, { "epoch": 0.759536553967071, "grad_norm": 6.757566452026367, "learning_rate": 8.568142925593812e-05, "loss": 0.8418, "step": 11210 }, { "epoch": 0.7596043092350431, "grad_norm": 6.195647239685059, "learning_rate": 8.568006023684032e-05, "loss": 0.8931, "step": 11211 }, { "epoch": 0.759672064503015, "grad_norm": 5.0666117668151855, "learning_rate": 8.56786912177425e-05, "loss": 0.677, "step": 11212 }, { "epoch": 0.7597398197709871, "grad_norm": 6.256303787231445, "learning_rate": 8.567732219864468e-05, "loss": 0.8905, "step": 11213 }, { "epoch": 0.7598075750389592, "grad_norm": 5.787086009979248, "learning_rate": 8.567595317954687e-05, "loss": 0.7823, "step": 11214 }, { "epoch": 0.7598753303069313, "grad_norm": 6.895840644836426, "learning_rate": 8.567458416044905e-05, "loss": 0.7453, "step": 11215 }, { "epoch": 0.7599430855749034, "grad_norm": 5.626133441925049, "learning_rate": 8.567321514135123e-05, "loss": 0.681, "step": 11216 }, { "epoch": 0.7600108408428755, "grad_norm": 5.967296123504639, "learning_rate": 8.567184612225341e-05, "loss": 0.7011, "step": 11217 }, { "epoch": 0.7600785961108476, "grad_norm": 6.5361175537109375, "learning_rate": 8.567047710315559e-05, "loss": 0.9365, "step": 11218 }, { "epoch": 0.7601463513788197, "grad_norm": 5.349835395812988, "learning_rate": 8.566910808405779e-05, "loss": 0.8221, "step": 11219 }, { "epoch": 0.7602141066467918, "grad_norm": 5.8798089027404785, "learning_rate": 8.566773906495997e-05, "loss": 0.8863, "step": 11220 }, { "epoch": 0.7602818619147639, "grad_norm": 5.611867427825928, "learning_rate": 8.566637004586215e-05, "loss": 0.7967, "step": 11221 }, { "epoch": 0.760349617182736, "grad_norm": 7.200530529022217, "learning_rate": 8.566500102676433e-05, "loss": 0.7193, "step": 11222 }, { "epoch": 0.7604173724507081, "grad_norm": 8.199413299560547, "learning_rate": 8.566363200766651e-05, "loss": 1.0548, "step": 11223 }, { "epoch": 0.7604851277186802, "grad_norm": 6.439299583435059, "learning_rate": 8.56622629885687e-05, "loss": 0.9681, "step": 11224 }, { "epoch": 0.7605528829866522, "grad_norm": 5.07982873916626, "learning_rate": 8.566089396947088e-05, "loss": 0.5863, "step": 11225 }, { "epoch": 0.7606206382546243, "grad_norm": 5.301721572875977, "learning_rate": 8.565952495037306e-05, "loss": 0.6788, "step": 11226 }, { "epoch": 0.7606883935225964, "grad_norm": 6.359459400177002, "learning_rate": 8.565815593127524e-05, "loss": 0.8885, "step": 11227 }, { "epoch": 0.7607561487905685, "grad_norm": 7.1541619300842285, "learning_rate": 8.565678691217744e-05, "loss": 0.7039, "step": 11228 }, { "epoch": 0.7608239040585405, "grad_norm": 5.493987560272217, "learning_rate": 8.565541789307962e-05, "loss": 0.698, "step": 11229 }, { "epoch": 0.7608916593265126, "grad_norm": 5.63759708404541, "learning_rate": 8.56540488739818e-05, "loss": 0.73, "step": 11230 }, { "epoch": 0.7609594145944847, "grad_norm": 4.844949245452881, "learning_rate": 8.565267985488398e-05, "loss": 0.9189, "step": 11231 }, { "epoch": 0.7610271698624568, "grad_norm": 5.61946964263916, "learning_rate": 8.565131083578616e-05, "loss": 0.9857, "step": 11232 }, { "epoch": 0.7610949251304289, "grad_norm": 6.210287570953369, "learning_rate": 8.564994181668835e-05, "loss": 0.9976, "step": 11233 }, { "epoch": 0.761162680398401, "grad_norm": 8.433357238769531, "learning_rate": 8.564857279759053e-05, "loss": 0.7793, "step": 11234 }, { "epoch": 0.761230435666373, "grad_norm": 6.107748031616211, "learning_rate": 8.564720377849271e-05, "loss": 0.8645, "step": 11235 }, { "epoch": 0.7612981909343451, "grad_norm": 5.744904041290283, "learning_rate": 8.564583475939489e-05, "loss": 0.9381, "step": 11236 }, { "epoch": 0.7613659462023172, "grad_norm": 7.262130260467529, "learning_rate": 8.564446574029709e-05, "loss": 0.6813, "step": 11237 }, { "epoch": 0.7614337014702893, "grad_norm": 5.340671062469482, "learning_rate": 8.564309672119927e-05, "loss": 0.8911, "step": 11238 }, { "epoch": 0.7615014567382614, "grad_norm": 5.608626842498779, "learning_rate": 8.564172770210145e-05, "loss": 0.8672, "step": 11239 }, { "epoch": 0.7615692120062335, "grad_norm": 5.490970611572266, "learning_rate": 8.564035868300363e-05, "loss": 0.7373, "step": 11240 }, { "epoch": 0.7616369672742056, "grad_norm": 5.939301013946533, "learning_rate": 8.563898966390581e-05, "loss": 0.8574, "step": 11241 }, { "epoch": 0.7617047225421777, "grad_norm": 5.254984378814697, "learning_rate": 8.5637620644808e-05, "loss": 0.888, "step": 11242 }, { "epoch": 0.7617724778101498, "grad_norm": 5.318180561065674, "learning_rate": 8.563625162571018e-05, "loss": 0.7723, "step": 11243 }, { "epoch": 0.7618402330781219, "grad_norm": 6.106221675872803, "learning_rate": 8.563488260661236e-05, "loss": 1.0503, "step": 11244 }, { "epoch": 0.7619079883460939, "grad_norm": 6.882452011108398, "learning_rate": 8.563351358751454e-05, "loss": 0.875, "step": 11245 }, { "epoch": 0.761975743614066, "grad_norm": 5.541532516479492, "learning_rate": 8.563214456841674e-05, "loss": 0.7503, "step": 11246 }, { "epoch": 0.762043498882038, "grad_norm": 6.156836986541748, "learning_rate": 8.563077554931892e-05, "loss": 0.6966, "step": 11247 }, { "epoch": 0.7621112541500101, "grad_norm": 5.691517353057861, "learning_rate": 8.56294065302211e-05, "loss": 0.8133, "step": 11248 }, { "epoch": 0.7621790094179822, "grad_norm": 5.82022762298584, "learning_rate": 8.562803751112328e-05, "loss": 0.8547, "step": 11249 }, { "epoch": 0.7622467646859543, "grad_norm": 7.37861442565918, "learning_rate": 8.562666849202546e-05, "loss": 0.9013, "step": 11250 }, { "epoch": 0.7623145199539264, "grad_norm": 6.810284614562988, "learning_rate": 8.562529947292765e-05, "loss": 0.8586, "step": 11251 }, { "epoch": 0.7623822752218985, "grad_norm": 5.333098411560059, "learning_rate": 8.562393045382983e-05, "loss": 0.719, "step": 11252 }, { "epoch": 0.7624500304898706, "grad_norm": 5.309335231781006, "learning_rate": 8.562256143473201e-05, "loss": 0.6253, "step": 11253 }, { "epoch": 0.7625177857578427, "grad_norm": 6.738717079162598, "learning_rate": 8.562119241563419e-05, "loss": 0.8135, "step": 11254 }, { "epoch": 0.7625855410258148, "grad_norm": 7.389588832855225, "learning_rate": 8.561982339653639e-05, "loss": 0.9612, "step": 11255 }, { "epoch": 0.7626532962937869, "grad_norm": 5.986783504486084, "learning_rate": 8.561845437743857e-05, "loss": 0.8069, "step": 11256 }, { "epoch": 0.762721051561759, "grad_norm": 4.995396614074707, "learning_rate": 8.561708535834076e-05, "loss": 0.8176, "step": 11257 }, { "epoch": 0.762788806829731, "grad_norm": 5.639811992645264, "learning_rate": 8.561571633924294e-05, "loss": 0.8188, "step": 11258 }, { "epoch": 0.7628565620977031, "grad_norm": 5.036534309387207, "learning_rate": 8.561434732014512e-05, "loss": 0.6055, "step": 11259 }, { "epoch": 0.7629243173656752, "grad_norm": 7.262217998504639, "learning_rate": 8.561297830104732e-05, "loss": 0.9039, "step": 11260 }, { "epoch": 0.7629920726336472, "grad_norm": 6.3017497062683105, "learning_rate": 8.56116092819495e-05, "loss": 0.9808, "step": 11261 }, { "epoch": 0.7630598279016193, "grad_norm": 5.999700546264648, "learning_rate": 8.561024026285168e-05, "loss": 0.771, "step": 11262 }, { "epoch": 0.7631275831695914, "grad_norm": 5.911481857299805, "learning_rate": 8.560887124375386e-05, "loss": 0.8681, "step": 11263 }, { "epoch": 0.7631953384375635, "grad_norm": 6.147214412689209, "learning_rate": 8.560750222465604e-05, "loss": 0.7072, "step": 11264 }, { "epoch": 0.7632630937055356, "grad_norm": 7.115184783935547, "learning_rate": 8.560613320555823e-05, "loss": 0.6084, "step": 11265 }, { "epoch": 0.7633308489735077, "grad_norm": 5.635624885559082, "learning_rate": 8.560476418646041e-05, "loss": 0.7086, "step": 11266 }, { "epoch": 0.7633986042414798, "grad_norm": 5.630052089691162, "learning_rate": 8.560339516736259e-05, "loss": 0.7175, "step": 11267 }, { "epoch": 0.7634663595094519, "grad_norm": 6.82834529876709, "learning_rate": 8.560202614826477e-05, "loss": 0.8163, "step": 11268 }, { "epoch": 0.763534114777424, "grad_norm": 5.716002464294434, "learning_rate": 8.560065712916697e-05, "loss": 0.7745, "step": 11269 }, { "epoch": 0.763601870045396, "grad_norm": 8.498682022094727, "learning_rate": 8.559928811006915e-05, "loss": 1.2038, "step": 11270 }, { "epoch": 0.7636696253133681, "grad_norm": 6.311810493469238, "learning_rate": 8.559791909097133e-05, "loss": 0.723, "step": 11271 }, { "epoch": 0.7637373805813402, "grad_norm": 8.272360801696777, "learning_rate": 8.55965500718735e-05, "loss": 0.8831, "step": 11272 }, { "epoch": 0.7638051358493123, "grad_norm": 8.232126235961914, "learning_rate": 8.559518105277569e-05, "loss": 0.9191, "step": 11273 }, { "epoch": 0.7638728911172844, "grad_norm": 5.009904384613037, "learning_rate": 8.559381203367788e-05, "loss": 0.7523, "step": 11274 }, { "epoch": 0.7639406463852565, "grad_norm": 5.405948638916016, "learning_rate": 8.559244301458006e-05, "loss": 0.8288, "step": 11275 }, { "epoch": 0.7640084016532286, "grad_norm": 5.3381428718566895, "learning_rate": 8.559107399548224e-05, "loss": 0.8073, "step": 11276 }, { "epoch": 0.7640761569212007, "grad_norm": 5.603418827056885, "learning_rate": 8.558970497638442e-05, "loss": 0.5939, "step": 11277 }, { "epoch": 0.7641439121891727, "grad_norm": 6.921864986419678, "learning_rate": 8.55883359572866e-05, "loss": 0.7877, "step": 11278 }, { "epoch": 0.7642116674571447, "grad_norm": 7.049600124359131, "learning_rate": 8.55869669381888e-05, "loss": 1.0853, "step": 11279 }, { "epoch": 0.7642794227251168, "grad_norm": 6.981689929962158, "learning_rate": 8.558559791909098e-05, "loss": 0.9528, "step": 11280 }, { "epoch": 0.7643471779930889, "grad_norm": 5.135133266448975, "learning_rate": 8.558422889999316e-05, "loss": 0.6623, "step": 11281 }, { "epoch": 0.764414933261061, "grad_norm": 4.53966760635376, "learning_rate": 8.558285988089534e-05, "loss": 0.5281, "step": 11282 }, { "epoch": 0.7644826885290331, "grad_norm": 7.814493656158447, "learning_rate": 8.558149086179753e-05, "loss": 1.132, "step": 11283 }, { "epoch": 0.7645504437970052, "grad_norm": 7.641327381134033, "learning_rate": 8.558012184269971e-05, "loss": 1.0694, "step": 11284 }, { "epoch": 0.7646181990649773, "grad_norm": 5.932433128356934, "learning_rate": 8.557875282360189e-05, "loss": 0.9439, "step": 11285 }, { "epoch": 0.7646859543329494, "grad_norm": 5.546622276306152, "learning_rate": 8.557738380450407e-05, "loss": 0.8643, "step": 11286 }, { "epoch": 0.7647537096009215, "grad_norm": 5.611812114715576, "learning_rate": 8.557601478540625e-05, "loss": 0.8404, "step": 11287 }, { "epoch": 0.7648214648688936, "grad_norm": 6.550889015197754, "learning_rate": 8.557464576630845e-05, "loss": 0.6958, "step": 11288 }, { "epoch": 0.7648892201368657, "grad_norm": 5.113431930541992, "learning_rate": 8.557327674721063e-05, "loss": 0.5798, "step": 11289 }, { "epoch": 0.7649569754048378, "grad_norm": 5.645472526550293, "learning_rate": 8.557190772811281e-05, "loss": 0.9511, "step": 11290 }, { "epoch": 0.7650247306728098, "grad_norm": 6.488102912902832, "learning_rate": 8.557053870901499e-05, "loss": 1.1031, "step": 11291 }, { "epoch": 0.7650924859407819, "grad_norm": 5.89171028137207, "learning_rate": 8.556916968991718e-05, "loss": 0.6539, "step": 11292 }, { "epoch": 0.765160241208754, "grad_norm": 6.806115627288818, "learning_rate": 8.556780067081936e-05, "loss": 0.7376, "step": 11293 }, { "epoch": 0.765227996476726, "grad_norm": 4.954026222229004, "learning_rate": 8.556643165172154e-05, "loss": 0.8474, "step": 11294 }, { "epoch": 0.7652957517446981, "grad_norm": 5.056695461273193, "learning_rate": 8.556506263262372e-05, "loss": 0.6977, "step": 11295 }, { "epoch": 0.7653635070126702, "grad_norm": 6.3873162269592285, "learning_rate": 8.55636936135259e-05, "loss": 0.8378, "step": 11296 }, { "epoch": 0.7654312622806423, "grad_norm": 6.397019386291504, "learning_rate": 8.55623245944281e-05, "loss": 0.7574, "step": 11297 }, { "epoch": 0.7654990175486144, "grad_norm": 7.052022457122803, "learning_rate": 8.556095557533028e-05, "loss": 0.6341, "step": 11298 }, { "epoch": 0.7655667728165865, "grad_norm": 6.845788478851318, "learning_rate": 8.555958655623246e-05, "loss": 0.9614, "step": 11299 }, { "epoch": 0.7656345280845586, "grad_norm": 6.307339668273926, "learning_rate": 8.555821753713464e-05, "loss": 0.8428, "step": 11300 }, { "epoch": 0.7657022833525307, "grad_norm": 9.020713806152344, "learning_rate": 8.555684851803683e-05, "loss": 0.9878, "step": 11301 }, { "epoch": 0.7657700386205027, "grad_norm": 5.308076858520508, "learning_rate": 8.555547949893901e-05, "loss": 0.614, "step": 11302 }, { "epoch": 0.7658377938884748, "grad_norm": 5.961923122406006, "learning_rate": 8.555411047984119e-05, "loss": 0.7601, "step": 11303 }, { "epoch": 0.7659055491564469, "grad_norm": 5.368841648101807, "learning_rate": 8.555274146074339e-05, "loss": 0.802, "step": 11304 }, { "epoch": 0.765973304424419, "grad_norm": 6.24576473236084, "learning_rate": 8.555137244164557e-05, "loss": 0.8612, "step": 11305 }, { "epoch": 0.7660410596923911, "grad_norm": 6.770591735839844, "learning_rate": 8.555000342254775e-05, "loss": 0.9074, "step": 11306 }, { "epoch": 0.7661088149603632, "grad_norm": 7.066356658935547, "learning_rate": 8.554863440344994e-05, "loss": 0.9979, "step": 11307 }, { "epoch": 0.7661765702283353, "grad_norm": 6.5950727462768555, "learning_rate": 8.554726538435212e-05, "loss": 0.6511, "step": 11308 }, { "epoch": 0.7662443254963074, "grad_norm": 7.035087585449219, "learning_rate": 8.55458963652543e-05, "loss": 0.6259, "step": 11309 }, { "epoch": 0.7663120807642794, "grad_norm": 5.893954753875732, "learning_rate": 8.554452734615648e-05, "loss": 0.8179, "step": 11310 }, { "epoch": 0.7663798360322515, "grad_norm": 9.012917518615723, "learning_rate": 8.554315832705868e-05, "loss": 0.8251, "step": 11311 }, { "epoch": 0.7664475913002236, "grad_norm": 6.316323280334473, "learning_rate": 8.554178930796086e-05, "loss": 0.9139, "step": 11312 }, { "epoch": 0.7665153465681956, "grad_norm": 7.094173908233643, "learning_rate": 8.554042028886304e-05, "loss": 0.8208, "step": 11313 }, { "epoch": 0.7665831018361677, "grad_norm": 6.499126434326172, "learning_rate": 8.553905126976522e-05, "loss": 0.7567, "step": 11314 }, { "epoch": 0.7666508571041398, "grad_norm": 5.729644775390625, "learning_rate": 8.553768225066741e-05, "loss": 0.7671, "step": 11315 }, { "epoch": 0.7667186123721119, "grad_norm": 8.627473831176758, "learning_rate": 8.553631323156959e-05, "loss": 1.0222, "step": 11316 }, { "epoch": 0.766786367640084, "grad_norm": 6.386945724487305, "learning_rate": 8.553494421247177e-05, "loss": 0.7227, "step": 11317 }, { "epoch": 0.7668541229080561, "grad_norm": 5.726612567901611, "learning_rate": 8.553357519337395e-05, "loss": 0.931, "step": 11318 }, { "epoch": 0.7669218781760282, "grad_norm": 6.600430488586426, "learning_rate": 8.553220617427613e-05, "loss": 0.9788, "step": 11319 }, { "epoch": 0.7669896334440003, "grad_norm": 4.86123514175415, "learning_rate": 8.553083715517833e-05, "loss": 0.7648, "step": 11320 }, { "epoch": 0.7670573887119724, "grad_norm": 6.815804481506348, "learning_rate": 8.55294681360805e-05, "loss": 0.5505, "step": 11321 }, { "epoch": 0.7671251439799445, "grad_norm": 6.213870048522949, "learning_rate": 8.552809911698269e-05, "loss": 0.8116, "step": 11322 }, { "epoch": 0.7671928992479166, "grad_norm": 6.11044454574585, "learning_rate": 8.552673009788487e-05, "loss": 0.8966, "step": 11323 }, { "epoch": 0.7672606545158887, "grad_norm": 6.12937068939209, "learning_rate": 8.552536107878706e-05, "loss": 0.7981, "step": 11324 }, { "epoch": 0.7673284097838607, "grad_norm": 6.787543773651123, "learning_rate": 8.552399205968924e-05, "loss": 0.8515, "step": 11325 }, { "epoch": 0.7673961650518328, "grad_norm": 6.377131462097168, "learning_rate": 8.552262304059142e-05, "loss": 0.7469, "step": 11326 }, { "epoch": 0.7674639203198048, "grad_norm": 6.438225746154785, "learning_rate": 8.55212540214936e-05, "loss": 0.8255, "step": 11327 }, { "epoch": 0.7675316755877769, "grad_norm": 8.09716510772705, "learning_rate": 8.551988500239578e-05, "loss": 1.0083, "step": 11328 }, { "epoch": 0.767599430855749, "grad_norm": 6.187016487121582, "learning_rate": 8.551851598329798e-05, "loss": 0.8532, "step": 11329 }, { "epoch": 0.7676671861237211, "grad_norm": 6.157771587371826, "learning_rate": 8.551714696420016e-05, "loss": 0.7899, "step": 11330 }, { "epoch": 0.7677349413916932, "grad_norm": 6.330532073974609, "learning_rate": 8.551577794510234e-05, "loss": 0.8263, "step": 11331 }, { "epoch": 0.7678026966596653, "grad_norm": 6.730197906494141, "learning_rate": 8.551440892600452e-05, "loss": 0.7482, "step": 11332 }, { "epoch": 0.7678704519276374, "grad_norm": 9.031699180603027, "learning_rate": 8.55130399069067e-05, "loss": 0.7286, "step": 11333 }, { "epoch": 0.7679382071956095, "grad_norm": 7.25761604309082, "learning_rate": 8.551167088780889e-05, "loss": 1.0674, "step": 11334 }, { "epoch": 0.7680059624635815, "grad_norm": 6.347570896148682, "learning_rate": 8.551030186871107e-05, "loss": 0.7851, "step": 11335 }, { "epoch": 0.7680737177315536, "grad_norm": 5.421909809112549, "learning_rate": 8.550893284961325e-05, "loss": 0.869, "step": 11336 }, { "epoch": 0.7681414729995257, "grad_norm": 6.994184970855713, "learning_rate": 8.550756383051543e-05, "loss": 0.9512, "step": 11337 }, { "epoch": 0.7682092282674978, "grad_norm": 5.763876914978027, "learning_rate": 8.550619481141763e-05, "loss": 0.7909, "step": 11338 }, { "epoch": 0.7682769835354699, "grad_norm": 5.998788833618164, "learning_rate": 8.55048257923198e-05, "loss": 0.8029, "step": 11339 }, { "epoch": 0.768344738803442, "grad_norm": 5.988320827484131, "learning_rate": 8.550345677322199e-05, "loss": 0.7934, "step": 11340 }, { "epoch": 0.7684124940714141, "grad_norm": 4.84094762802124, "learning_rate": 8.550208775412417e-05, "loss": 0.8953, "step": 11341 }, { "epoch": 0.7684802493393862, "grad_norm": 6.44489049911499, "learning_rate": 8.550071873502635e-05, "loss": 1.0824, "step": 11342 }, { "epoch": 0.7685480046073582, "grad_norm": 5.414109230041504, "learning_rate": 8.549934971592854e-05, "loss": 0.8749, "step": 11343 }, { "epoch": 0.7686157598753303, "grad_norm": 5.344674110412598, "learning_rate": 8.549798069683072e-05, "loss": 0.5946, "step": 11344 }, { "epoch": 0.7686835151433024, "grad_norm": 5.795503616333008, "learning_rate": 8.54966116777329e-05, "loss": 0.8889, "step": 11345 }, { "epoch": 0.7687512704112744, "grad_norm": 6.298269271850586, "learning_rate": 8.549524265863508e-05, "loss": 0.5105, "step": 11346 }, { "epoch": 0.7688190256792465, "grad_norm": 5.435570240020752, "learning_rate": 8.549387363953728e-05, "loss": 0.738, "step": 11347 }, { "epoch": 0.7688867809472186, "grad_norm": 5.928055763244629, "learning_rate": 8.549250462043946e-05, "loss": 0.8317, "step": 11348 }, { "epoch": 0.7689545362151907, "grad_norm": 6.4229350090026855, "learning_rate": 8.549113560134164e-05, "loss": 0.6707, "step": 11349 }, { "epoch": 0.7690222914831628, "grad_norm": 6.353605270385742, "learning_rate": 8.548976658224383e-05, "loss": 0.9008, "step": 11350 }, { "epoch": 0.7690900467511349, "grad_norm": 5.449488639831543, "learning_rate": 8.548839756314601e-05, "loss": 0.966, "step": 11351 }, { "epoch": 0.769157802019107, "grad_norm": 4.989541053771973, "learning_rate": 8.548702854404819e-05, "loss": 0.604, "step": 11352 }, { "epoch": 0.7692255572870791, "grad_norm": 5.058244228363037, "learning_rate": 8.548565952495039e-05, "loss": 0.6988, "step": 11353 }, { "epoch": 0.7692933125550512, "grad_norm": 5.852552890777588, "learning_rate": 8.548429050585257e-05, "loss": 0.6343, "step": 11354 }, { "epoch": 0.7693610678230233, "grad_norm": 7.096445560455322, "learning_rate": 8.548292148675475e-05, "loss": 0.7653, "step": 11355 }, { "epoch": 0.7694288230909954, "grad_norm": 6.468287467956543, "learning_rate": 8.548155246765693e-05, "loss": 0.7799, "step": 11356 }, { "epoch": 0.7694965783589675, "grad_norm": 7.869673252105713, "learning_rate": 8.548018344855912e-05, "loss": 0.9937, "step": 11357 }, { "epoch": 0.7695643336269395, "grad_norm": 6.300357341766357, "learning_rate": 8.54788144294613e-05, "loss": 0.644, "step": 11358 }, { "epoch": 0.7696320888949115, "grad_norm": 6.4069318771362305, "learning_rate": 8.547744541036348e-05, "loss": 0.8422, "step": 11359 }, { "epoch": 0.7696998441628836, "grad_norm": 5.846930980682373, "learning_rate": 8.547607639126566e-05, "loss": 0.7335, "step": 11360 }, { "epoch": 0.7697675994308557, "grad_norm": 6.80328893661499, "learning_rate": 8.547470737216785e-05, "loss": 0.6947, "step": 11361 }, { "epoch": 0.7698353546988278, "grad_norm": 5.2702412605285645, "learning_rate": 8.547333835307004e-05, "loss": 0.6963, "step": 11362 }, { "epoch": 0.7699031099667999, "grad_norm": 5.755353927612305, "learning_rate": 8.547196933397222e-05, "loss": 0.7093, "step": 11363 }, { "epoch": 0.769970865234772, "grad_norm": 5.961780071258545, "learning_rate": 8.54706003148744e-05, "loss": 0.8871, "step": 11364 }, { "epoch": 0.7700386205027441, "grad_norm": 5.585000514984131, "learning_rate": 8.546923129577658e-05, "loss": 0.677, "step": 11365 }, { "epoch": 0.7701063757707162, "grad_norm": 5.904151916503906, "learning_rate": 8.546786227667877e-05, "loss": 0.8357, "step": 11366 }, { "epoch": 0.7701741310386883, "grad_norm": 5.614160537719727, "learning_rate": 8.546649325758095e-05, "loss": 0.8266, "step": 11367 }, { "epoch": 0.7702418863066604, "grad_norm": 5.502987861633301, "learning_rate": 8.546512423848313e-05, "loss": 0.715, "step": 11368 }, { "epoch": 0.7703096415746324, "grad_norm": 5.5060625076293945, "learning_rate": 8.546375521938531e-05, "loss": 0.6332, "step": 11369 }, { "epoch": 0.7703773968426045, "grad_norm": 6.361515522003174, "learning_rate": 8.54623862002875e-05, "loss": 0.8958, "step": 11370 }, { "epoch": 0.7704451521105766, "grad_norm": 5.42423152923584, "learning_rate": 8.546101718118969e-05, "loss": 0.5965, "step": 11371 }, { "epoch": 0.7705129073785487, "grad_norm": 7.705050945281982, "learning_rate": 8.545964816209187e-05, "loss": 0.8397, "step": 11372 }, { "epoch": 0.7705806626465208, "grad_norm": 8.009198188781738, "learning_rate": 8.545827914299405e-05, "loss": 0.861, "step": 11373 }, { "epoch": 0.7706484179144929, "grad_norm": 7.349557876586914, "learning_rate": 8.545691012389623e-05, "loss": 1.1441, "step": 11374 }, { "epoch": 0.770716173182465, "grad_norm": 6.47999382019043, "learning_rate": 8.545554110479842e-05, "loss": 0.8032, "step": 11375 }, { "epoch": 0.770783928450437, "grad_norm": 8.485747337341309, "learning_rate": 8.54541720857006e-05, "loss": 0.8073, "step": 11376 }, { "epoch": 0.7708516837184091, "grad_norm": 7.178839683532715, "learning_rate": 8.545280306660278e-05, "loss": 0.8595, "step": 11377 }, { "epoch": 0.7709194389863812, "grad_norm": 7.898037910461426, "learning_rate": 8.545143404750496e-05, "loss": 1.0232, "step": 11378 }, { "epoch": 0.7709871942543532, "grad_norm": 6.554230690002441, "learning_rate": 8.545006502840716e-05, "loss": 0.8139, "step": 11379 }, { "epoch": 0.7710549495223253, "grad_norm": 5.184169769287109, "learning_rate": 8.544869600930934e-05, "loss": 0.8208, "step": 11380 }, { "epoch": 0.7711227047902974, "grad_norm": 6.4749579429626465, "learning_rate": 8.544732699021152e-05, "loss": 0.9345, "step": 11381 }, { "epoch": 0.7711904600582695, "grad_norm": 6.739956378936768, "learning_rate": 8.54459579711137e-05, "loss": 0.8543, "step": 11382 }, { "epoch": 0.7712582153262416, "grad_norm": 6.217097759246826, "learning_rate": 8.544458895201588e-05, "loss": 1.0393, "step": 11383 }, { "epoch": 0.7713259705942137, "grad_norm": 5.0891828536987305, "learning_rate": 8.544321993291807e-05, "loss": 0.7836, "step": 11384 }, { "epoch": 0.7713937258621858, "grad_norm": 4.892578125, "learning_rate": 8.544185091382025e-05, "loss": 0.8143, "step": 11385 }, { "epoch": 0.7714614811301579, "grad_norm": 5.4404168128967285, "learning_rate": 8.544048189472243e-05, "loss": 0.8841, "step": 11386 }, { "epoch": 0.77152923639813, "grad_norm": 6.725840091705322, "learning_rate": 8.543911287562461e-05, "loss": 0.9308, "step": 11387 }, { "epoch": 0.7715969916661021, "grad_norm": 4.622491359710693, "learning_rate": 8.543774385652679e-05, "loss": 0.5187, "step": 11388 }, { "epoch": 0.7716647469340742, "grad_norm": 5.4316182136535645, "learning_rate": 8.543637483742899e-05, "loss": 0.7526, "step": 11389 }, { "epoch": 0.7717325022020463, "grad_norm": 6.089479923248291, "learning_rate": 8.543500581833117e-05, "loss": 0.8785, "step": 11390 }, { "epoch": 0.7718002574700183, "grad_norm": 7.127860069274902, "learning_rate": 8.543363679923335e-05, "loss": 1.0935, "step": 11391 }, { "epoch": 0.7718680127379903, "grad_norm": 6.1316399574279785, "learning_rate": 8.543226778013553e-05, "loss": 0.7755, "step": 11392 }, { "epoch": 0.7719357680059624, "grad_norm": 5.672332286834717, "learning_rate": 8.543089876103772e-05, "loss": 0.8099, "step": 11393 }, { "epoch": 0.7720035232739345, "grad_norm": 7.249478816986084, "learning_rate": 8.54295297419399e-05, "loss": 0.7333, "step": 11394 }, { "epoch": 0.7720712785419066, "grad_norm": 6.270430088043213, "learning_rate": 8.542816072284208e-05, "loss": 0.7662, "step": 11395 }, { "epoch": 0.7721390338098787, "grad_norm": 5.953348636627197, "learning_rate": 8.542679170374428e-05, "loss": 0.7991, "step": 11396 }, { "epoch": 0.7722067890778508, "grad_norm": 9.44437313079834, "learning_rate": 8.542542268464646e-05, "loss": 0.8061, "step": 11397 }, { "epoch": 0.7722745443458229, "grad_norm": 6.52545690536499, "learning_rate": 8.542405366554864e-05, "loss": 1.0734, "step": 11398 }, { "epoch": 0.772342299613795, "grad_norm": 6.201472282409668, "learning_rate": 8.542268464645083e-05, "loss": 1.0589, "step": 11399 }, { "epoch": 0.7724100548817671, "grad_norm": 6.648289680480957, "learning_rate": 8.542131562735301e-05, "loss": 0.6268, "step": 11400 }, { "epoch": 0.7724778101497392, "grad_norm": 6.1713433265686035, "learning_rate": 8.541994660825519e-05, "loss": 0.8162, "step": 11401 }, { "epoch": 0.7725455654177112, "grad_norm": 6.964521884918213, "learning_rate": 8.541857758915738e-05, "loss": 0.8242, "step": 11402 }, { "epoch": 0.7726133206856833, "grad_norm": 6.025942802429199, "learning_rate": 8.541720857005956e-05, "loss": 0.748, "step": 11403 }, { "epoch": 0.7726810759536554, "grad_norm": 5.319873809814453, "learning_rate": 8.541583955096175e-05, "loss": 0.7437, "step": 11404 }, { "epoch": 0.7727488312216275, "grad_norm": 7.246333599090576, "learning_rate": 8.541447053186393e-05, "loss": 0.7307, "step": 11405 }, { "epoch": 0.7728165864895996, "grad_norm": 8.031798362731934, "learning_rate": 8.54131015127661e-05, "loss": 1.2193, "step": 11406 }, { "epoch": 0.7728843417575717, "grad_norm": 5.135364532470703, "learning_rate": 8.54117324936683e-05, "loss": 0.7107, "step": 11407 }, { "epoch": 0.7729520970255437, "grad_norm": 8.0211820602417, "learning_rate": 8.541036347457048e-05, "loss": 0.7786, "step": 11408 }, { "epoch": 0.7730198522935158, "grad_norm": 7.091327667236328, "learning_rate": 8.540899445547266e-05, "loss": 0.7232, "step": 11409 }, { "epoch": 0.7730876075614879, "grad_norm": 7.18704891204834, "learning_rate": 8.540762543637484e-05, "loss": 1.0204, "step": 11410 }, { "epoch": 0.77315536282946, "grad_norm": 5.484143257141113, "learning_rate": 8.540625641727702e-05, "loss": 0.9076, "step": 11411 }, { "epoch": 0.773223118097432, "grad_norm": 8.248093605041504, "learning_rate": 8.540488739817921e-05, "loss": 0.8882, "step": 11412 }, { "epoch": 0.7732908733654041, "grad_norm": 5.386202335357666, "learning_rate": 8.54035183790814e-05, "loss": 0.8552, "step": 11413 }, { "epoch": 0.7733586286333762, "grad_norm": 5.230656623840332, "learning_rate": 8.540214935998358e-05, "loss": 0.6101, "step": 11414 }, { "epoch": 0.7734263839013483, "grad_norm": 6.001654624938965, "learning_rate": 8.540078034088576e-05, "loss": 0.905, "step": 11415 }, { "epoch": 0.7734941391693204, "grad_norm": 5.581679344177246, "learning_rate": 8.539941132178795e-05, "loss": 0.8538, "step": 11416 }, { "epoch": 0.7735618944372925, "grad_norm": 5.559355735778809, "learning_rate": 8.539804230269013e-05, "loss": 0.8776, "step": 11417 }, { "epoch": 0.7736296497052646, "grad_norm": 4.970002174377441, "learning_rate": 8.539667328359231e-05, "loss": 0.6365, "step": 11418 }, { "epoch": 0.7736974049732367, "grad_norm": 6.487758159637451, "learning_rate": 8.539530426449449e-05, "loss": 1.0137, "step": 11419 }, { "epoch": 0.7737651602412088, "grad_norm": 6.612691402435303, "learning_rate": 8.539393524539667e-05, "loss": 0.9539, "step": 11420 }, { "epoch": 0.7738329155091809, "grad_norm": 9.316730499267578, "learning_rate": 8.539256622629887e-05, "loss": 0.761, "step": 11421 }, { "epoch": 0.773900670777153, "grad_norm": 6.791004180908203, "learning_rate": 8.539119720720105e-05, "loss": 0.9281, "step": 11422 }, { "epoch": 0.7739684260451251, "grad_norm": 6.122042179107666, "learning_rate": 8.538982818810323e-05, "loss": 0.8082, "step": 11423 }, { "epoch": 0.774036181313097, "grad_norm": 5.878390312194824, "learning_rate": 8.53884591690054e-05, "loss": 0.7934, "step": 11424 }, { "epoch": 0.7741039365810691, "grad_norm": 5.959982395172119, "learning_rate": 8.53870901499076e-05, "loss": 0.7919, "step": 11425 }, { "epoch": 0.7741716918490412, "grad_norm": 7.212625980377197, "learning_rate": 8.538572113080978e-05, "loss": 0.6346, "step": 11426 }, { "epoch": 0.7742394471170133, "grad_norm": 5.554856300354004, "learning_rate": 8.538435211171196e-05, "loss": 0.757, "step": 11427 }, { "epoch": 0.7743072023849854, "grad_norm": 7.405214309692383, "learning_rate": 8.538298309261414e-05, "loss": 0.8239, "step": 11428 }, { "epoch": 0.7743749576529575, "grad_norm": 5.671273231506348, "learning_rate": 8.538161407351632e-05, "loss": 0.6547, "step": 11429 }, { "epoch": 0.7744427129209296, "grad_norm": 5.7590436935424805, "learning_rate": 8.538024505441852e-05, "loss": 0.6526, "step": 11430 }, { "epoch": 0.7745104681889017, "grad_norm": 5.3632283210754395, "learning_rate": 8.53788760353207e-05, "loss": 0.6368, "step": 11431 }, { "epoch": 0.7745782234568738, "grad_norm": 5.514582633972168, "learning_rate": 8.537750701622288e-05, "loss": 0.8242, "step": 11432 }, { "epoch": 0.7746459787248459, "grad_norm": 6.291776657104492, "learning_rate": 8.537613799712506e-05, "loss": 0.7272, "step": 11433 }, { "epoch": 0.774713733992818, "grad_norm": 6.716917991638184, "learning_rate": 8.537476897802725e-05, "loss": 0.658, "step": 11434 }, { "epoch": 0.77478148926079, "grad_norm": 6.479708194732666, "learning_rate": 8.537339995892943e-05, "loss": 1.0441, "step": 11435 }, { "epoch": 0.7748492445287621, "grad_norm": 7.607060432434082, "learning_rate": 8.537203093983161e-05, "loss": 0.849, "step": 11436 }, { "epoch": 0.7749169997967342, "grad_norm": 5.455706596374512, "learning_rate": 8.537066192073379e-05, "loss": 0.8267, "step": 11437 }, { "epoch": 0.7749847550647063, "grad_norm": 6.571371078491211, "learning_rate": 8.536929290163597e-05, "loss": 0.9552, "step": 11438 }, { "epoch": 0.7750525103326784, "grad_norm": 7.029003143310547, "learning_rate": 8.536792388253817e-05, "loss": 0.7186, "step": 11439 }, { "epoch": 0.7751202656006505, "grad_norm": 6.117072582244873, "learning_rate": 8.536655486344035e-05, "loss": 0.6946, "step": 11440 }, { "epoch": 0.7751880208686225, "grad_norm": 6.154125690460205, "learning_rate": 8.536518584434253e-05, "loss": 0.6694, "step": 11441 }, { "epoch": 0.7752557761365946, "grad_norm": 5.271462917327881, "learning_rate": 8.536381682524472e-05, "loss": 0.6984, "step": 11442 }, { "epoch": 0.7753235314045667, "grad_norm": 7.075232982635498, "learning_rate": 8.53624478061469e-05, "loss": 0.8232, "step": 11443 }, { "epoch": 0.7753912866725388, "grad_norm": 8.889443397521973, "learning_rate": 8.536107878704908e-05, "loss": 0.9988, "step": 11444 }, { "epoch": 0.7754590419405109, "grad_norm": 6.270964622497559, "learning_rate": 8.535970976795127e-05, "loss": 0.8725, "step": 11445 }, { "epoch": 0.7755267972084829, "grad_norm": 6.906674861907959, "learning_rate": 8.535834074885345e-05, "loss": 0.8918, "step": 11446 }, { "epoch": 0.775594552476455, "grad_norm": 7.697040557861328, "learning_rate": 8.535697172975564e-05, "loss": 0.8286, "step": 11447 }, { "epoch": 0.7756623077444271, "grad_norm": 6.875545024871826, "learning_rate": 8.535560271065783e-05, "loss": 0.9181, "step": 11448 }, { "epoch": 0.7757300630123992, "grad_norm": 6.059504985809326, "learning_rate": 8.535423369156001e-05, "loss": 0.7066, "step": 11449 }, { "epoch": 0.7757978182803713, "grad_norm": 6.182839870452881, "learning_rate": 8.535286467246219e-05, "loss": 0.7472, "step": 11450 }, { "epoch": 0.7758655735483434, "grad_norm": 6.222153663635254, "learning_rate": 8.535149565336437e-05, "loss": 0.7874, "step": 11451 }, { "epoch": 0.7759333288163155, "grad_norm": 5.410420894622803, "learning_rate": 8.535012663426655e-05, "loss": 0.6223, "step": 11452 }, { "epoch": 0.7760010840842876, "grad_norm": 6.875960350036621, "learning_rate": 8.534875761516874e-05, "loss": 0.668, "step": 11453 }, { "epoch": 0.7760688393522597, "grad_norm": 6.519979953765869, "learning_rate": 8.534738859607092e-05, "loss": 0.6604, "step": 11454 }, { "epoch": 0.7761365946202318, "grad_norm": 5.24510383605957, "learning_rate": 8.53460195769731e-05, "loss": 0.685, "step": 11455 }, { "epoch": 0.7762043498882039, "grad_norm": 5.399496555328369, "learning_rate": 8.534465055787529e-05, "loss": 0.6585, "step": 11456 }, { "epoch": 0.7762721051561758, "grad_norm": 6.070941925048828, "learning_rate": 8.534328153877748e-05, "loss": 0.6592, "step": 11457 }, { "epoch": 0.7763398604241479, "grad_norm": 6.634647846221924, "learning_rate": 8.534191251967966e-05, "loss": 0.6772, "step": 11458 }, { "epoch": 0.77640761569212, "grad_norm": 4.490801811218262, "learning_rate": 8.534054350058184e-05, "loss": 0.6787, "step": 11459 }, { "epoch": 0.7764753709600921, "grad_norm": 9.601375579833984, "learning_rate": 8.533917448148402e-05, "loss": 0.7703, "step": 11460 }, { "epoch": 0.7765431262280642, "grad_norm": 8.57113265991211, "learning_rate": 8.53378054623862e-05, "loss": 0.6969, "step": 11461 }, { "epoch": 0.7766108814960363, "grad_norm": 5.441416263580322, "learning_rate": 8.53364364432884e-05, "loss": 0.7218, "step": 11462 }, { "epoch": 0.7766786367640084, "grad_norm": 5.4659857749938965, "learning_rate": 8.533506742419057e-05, "loss": 0.7042, "step": 11463 }, { "epoch": 0.7767463920319805, "grad_norm": 5.655581951141357, "learning_rate": 8.533369840509276e-05, "loss": 0.8021, "step": 11464 }, { "epoch": 0.7768141472999526, "grad_norm": 5.530257225036621, "learning_rate": 8.533232938599494e-05, "loss": 0.8872, "step": 11465 }, { "epoch": 0.7768819025679247, "grad_norm": 6.074810981750488, "learning_rate": 8.533096036689712e-05, "loss": 0.7353, "step": 11466 }, { "epoch": 0.7769496578358968, "grad_norm": 5.74225378036499, "learning_rate": 8.532959134779931e-05, "loss": 0.9214, "step": 11467 }, { "epoch": 0.7770174131038688, "grad_norm": 8.271156311035156, "learning_rate": 8.532822232870149e-05, "loss": 0.9913, "step": 11468 }, { "epoch": 0.7770851683718409, "grad_norm": 5.124596118927002, "learning_rate": 8.532685330960367e-05, "loss": 0.6335, "step": 11469 }, { "epoch": 0.777152923639813, "grad_norm": 6.918234348297119, "learning_rate": 8.532548429050585e-05, "loss": 0.9158, "step": 11470 }, { "epoch": 0.7772206789077851, "grad_norm": 5.966104984283447, "learning_rate": 8.532411527140804e-05, "loss": 0.9534, "step": 11471 }, { "epoch": 0.7772884341757572, "grad_norm": 6.7147536277771, "learning_rate": 8.532274625231023e-05, "loss": 0.7466, "step": 11472 }, { "epoch": 0.7773561894437292, "grad_norm": 6.448988914489746, "learning_rate": 8.53213772332124e-05, "loss": 1.0013, "step": 11473 }, { "epoch": 0.7774239447117013, "grad_norm": 4.238838195800781, "learning_rate": 8.532000821411459e-05, "loss": 0.6666, "step": 11474 }, { "epoch": 0.7774916999796734, "grad_norm": 6.865900993347168, "learning_rate": 8.531863919501677e-05, "loss": 0.7966, "step": 11475 }, { "epoch": 0.7775594552476455, "grad_norm": 5.297105312347412, "learning_rate": 8.531727017591896e-05, "loss": 0.8075, "step": 11476 }, { "epoch": 0.7776272105156176, "grad_norm": 6.865470886230469, "learning_rate": 8.531590115682114e-05, "loss": 0.7531, "step": 11477 }, { "epoch": 0.7776949657835897, "grad_norm": 12.770363807678223, "learning_rate": 8.531453213772332e-05, "loss": 0.765, "step": 11478 }, { "epoch": 0.7777627210515617, "grad_norm": 5.952977180480957, "learning_rate": 8.53131631186255e-05, "loss": 0.6781, "step": 11479 }, { "epoch": 0.7778304763195338, "grad_norm": 6.006490230560303, "learning_rate": 8.53117940995277e-05, "loss": 0.7149, "step": 11480 }, { "epoch": 0.7778982315875059, "grad_norm": 5.353832244873047, "learning_rate": 8.531042508042988e-05, "loss": 0.5234, "step": 11481 }, { "epoch": 0.777965986855478, "grad_norm": 7.221078395843506, "learning_rate": 8.530905606133206e-05, "loss": 0.8062, "step": 11482 }, { "epoch": 0.7780337421234501, "grad_norm": 5.661943435668945, "learning_rate": 8.530768704223424e-05, "loss": 0.7662, "step": 11483 }, { "epoch": 0.7781014973914222, "grad_norm": 7.68919038772583, "learning_rate": 8.530631802313642e-05, "loss": 0.9475, "step": 11484 }, { "epoch": 0.7781692526593943, "grad_norm": 6.683036804199219, "learning_rate": 8.530494900403861e-05, "loss": 0.8855, "step": 11485 }, { "epoch": 0.7782370079273664, "grad_norm": 6.948663234710693, "learning_rate": 8.530357998494079e-05, "loss": 0.8388, "step": 11486 }, { "epoch": 0.7783047631953385, "grad_norm": 6.295228004455566, "learning_rate": 8.530221096584297e-05, "loss": 0.8796, "step": 11487 }, { "epoch": 0.7783725184633106, "grad_norm": 5.598083019256592, "learning_rate": 8.530084194674516e-05, "loss": 1.1443, "step": 11488 }, { "epoch": 0.7784402737312827, "grad_norm": 6.99757194519043, "learning_rate": 8.529947292764735e-05, "loss": 0.9209, "step": 11489 }, { "epoch": 0.7785080289992546, "grad_norm": 4.9374189376831055, "learning_rate": 8.529810390854953e-05, "loss": 0.7056, "step": 11490 }, { "epoch": 0.7785757842672267, "grad_norm": 5.831791877746582, "learning_rate": 8.529673488945172e-05, "loss": 0.9647, "step": 11491 }, { "epoch": 0.7786435395351988, "grad_norm": 5.7701945304870605, "learning_rate": 8.52953658703539e-05, "loss": 0.5656, "step": 11492 }, { "epoch": 0.7787112948031709, "grad_norm": 6.0373945236206055, "learning_rate": 8.529399685125608e-05, "loss": 0.7656, "step": 11493 }, { "epoch": 0.778779050071143, "grad_norm": 5.0245184898376465, "learning_rate": 8.529262783215827e-05, "loss": 0.6954, "step": 11494 }, { "epoch": 0.7788468053391151, "grad_norm": 5.802394390106201, "learning_rate": 8.529125881306045e-05, "loss": 0.7926, "step": 11495 }, { "epoch": 0.7789145606070872, "grad_norm": 5.6064252853393555, "learning_rate": 8.528988979396263e-05, "loss": 0.6536, "step": 11496 }, { "epoch": 0.7789823158750593, "grad_norm": 6.606382846832275, "learning_rate": 8.528852077486481e-05, "loss": 0.8506, "step": 11497 }, { "epoch": 0.7790500711430314, "grad_norm": 5.858041286468506, "learning_rate": 8.5287151755767e-05, "loss": 0.9821, "step": 11498 }, { "epoch": 0.7791178264110035, "grad_norm": 7.08564567565918, "learning_rate": 8.528578273666919e-05, "loss": 0.8554, "step": 11499 }, { "epoch": 0.7791855816789756, "grad_norm": 8.084799766540527, "learning_rate": 8.528441371757137e-05, "loss": 0.7015, "step": 11500 }, { "epoch": 0.7792533369469477, "grad_norm": 5.143333911895752, "learning_rate": 8.528304469847355e-05, "loss": 0.815, "step": 11501 }, { "epoch": 0.7793210922149197, "grad_norm": 6.680109977722168, "learning_rate": 8.528167567937573e-05, "loss": 0.8661, "step": 11502 }, { "epoch": 0.7793888474828918, "grad_norm": 6.3008341789245605, "learning_rate": 8.528030666027792e-05, "loss": 0.9275, "step": 11503 }, { "epoch": 0.7794566027508639, "grad_norm": 6.935196876525879, "learning_rate": 8.52789376411801e-05, "loss": 0.6915, "step": 11504 }, { "epoch": 0.779524358018836, "grad_norm": 5.888092517852783, "learning_rate": 8.527756862208228e-05, "loss": 0.6035, "step": 11505 }, { "epoch": 0.779592113286808, "grad_norm": 6.207918167114258, "learning_rate": 8.527619960298447e-05, "loss": 0.6619, "step": 11506 }, { "epoch": 0.7796598685547801, "grad_norm": 6.820822238922119, "learning_rate": 8.527483058388665e-05, "loss": 0.9905, "step": 11507 }, { "epoch": 0.7797276238227522, "grad_norm": 6.440364837646484, "learning_rate": 8.527346156478884e-05, "loss": 1.0333, "step": 11508 }, { "epoch": 0.7797953790907243, "grad_norm": 6.21598482131958, "learning_rate": 8.527209254569102e-05, "loss": 0.6562, "step": 11509 }, { "epoch": 0.7798631343586964, "grad_norm": 8.483455657958984, "learning_rate": 8.52707235265932e-05, "loss": 0.6767, "step": 11510 }, { "epoch": 0.7799308896266685, "grad_norm": 6.934547424316406, "learning_rate": 8.526935450749538e-05, "loss": 0.8354, "step": 11511 }, { "epoch": 0.7799986448946405, "grad_norm": 8.640101432800293, "learning_rate": 8.526798548839757e-05, "loss": 0.9985, "step": 11512 }, { "epoch": 0.7800664001626126, "grad_norm": 5.356478214263916, "learning_rate": 8.526661646929975e-05, "loss": 0.8524, "step": 11513 }, { "epoch": 0.7801341554305847, "grad_norm": 6.551882743835449, "learning_rate": 8.526524745020193e-05, "loss": 0.9502, "step": 11514 }, { "epoch": 0.7802019106985568, "grad_norm": 6.971212387084961, "learning_rate": 8.526387843110412e-05, "loss": 0.909, "step": 11515 }, { "epoch": 0.7802696659665289, "grad_norm": 7.040616035461426, "learning_rate": 8.52625094120063e-05, "loss": 1.1058, "step": 11516 }, { "epoch": 0.780337421234501, "grad_norm": 5.899086952209473, "learning_rate": 8.526114039290849e-05, "loss": 0.7611, "step": 11517 }, { "epoch": 0.7804051765024731, "grad_norm": 6.242020606994629, "learning_rate": 8.525977137381067e-05, "loss": 0.7059, "step": 11518 }, { "epoch": 0.7804729317704452, "grad_norm": 6.418991565704346, "learning_rate": 8.525840235471285e-05, "loss": 0.8625, "step": 11519 }, { "epoch": 0.7805406870384173, "grad_norm": 4.487674236297607, "learning_rate": 8.525703333561503e-05, "loss": 0.7343, "step": 11520 }, { "epoch": 0.7806084423063894, "grad_norm": 5.526739597320557, "learning_rate": 8.525566431651721e-05, "loss": 0.8331, "step": 11521 }, { "epoch": 0.7806761975743614, "grad_norm": 7.298591136932373, "learning_rate": 8.52542952974194e-05, "loss": 1.0112, "step": 11522 }, { "epoch": 0.7807439528423334, "grad_norm": 7.665398120880127, "learning_rate": 8.525292627832159e-05, "loss": 0.8248, "step": 11523 }, { "epoch": 0.7808117081103055, "grad_norm": 5.981564044952393, "learning_rate": 8.525155725922377e-05, "loss": 0.8557, "step": 11524 }, { "epoch": 0.7808794633782776, "grad_norm": 6.268359184265137, "learning_rate": 8.525018824012595e-05, "loss": 0.5943, "step": 11525 }, { "epoch": 0.7809472186462497, "grad_norm": 5.51030969619751, "learning_rate": 8.524881922102814e-05, "loss": 0.7657, "step": 11526 }, { "epoch": 0.7810149739142218, "grad_norm": 6.035849094390869, "learning_rate": 8.524745020193032e-05, "loss": 0.7501, "step": 11527 }, { "epoch": 0.7810827291821939, "grad_norm": 5.455019474029541, "learning_rate": 8.52460811828325e-05, "loss": 0.6627, "step": 11528 }, { "epoch": 0.781150484450166, "grad_norm": 5.731950283050537, "learning_rate": 8.524471216373468e-05, "loss": 0.7456, "step": 11529 }, { "epoch": 0.7812182397181381, "grad_norm": 6.446641445159912, "learning_rate": 8.524334314463686e-05, "loss": 0.668, "step": 11530 }, { "epoch": 0.7812859949861102, "grad_norm": 6.093938827514648, "learning_rate": 8.524197412553905e-05, "loss": 0.7248, "step": 11531 }, { "epoch": 0.7813537502540823, "grad_norm": 5.524089813232422, "learning_rate": 8.524060510644124e-05, "loss": 0.8411, "step": 11532 }, { "epoch": 0.7814215055220544, "grad_norm": 6.203104496002197, "learning_rate": 8.523923608734342e-05, "loss": 0.8882, "step": 11533 }, { "epoch": 0.7814892607900265, "grad_norm": 5.39666748046875, "learning_rate": 8.52378670682456e-05, "loss": 0.8111, "step": 11534 }, { "epoch": 0.7815570160579985, "grad_norm": 5.305294990539551, "learning_rate": 8.523649804914779e-05, "loss": 0.6809, "step": 11535 }, { "epoch": 0.7816247713259706, "grad_norm": 5.251042366027832, "learning_rate": 8.523512903004997e-05, "loss": 0.8533, "step": 11536 }, { "epoch": 0.7816925265939427, "grad_norm": 7.346649169921875, "learning_rate": 8.523376001095215e-05, "loss": 0.857, "step": 11537 }, { "epoch": 0.7817602818619148, "grad_norm": 6.78896427154541, "learning_rate": 8.523239099185434e-05, "loss": 0.77, "step": 11538 }, { "epoch": 0.7818280371298868, "grad_norm": 6.815920352935791, "learning_rate": 8.523102197275652e-05, "loss": 0.6838, "step": 11539 }, { "epoch": 0.7818957923978589, "grad_norm": 5.649730682373047, "learning_rate": 8.52296529536587e-05, "loss": 0.833, "step": 11540 }, { "epoch": 0.781963547665831, "grad_norm": 7.253706932067871, "learning_rate": 8.52282839345609e-05, "loss": 0.8077, "step": 11541 }, { "epoch": 0.7820313029338031, "grad_norm": 4.436539649963379, "learning_rate": 8.522691491546308e-05, "loss": 0.6202, "step": 11542 }, { "epoch": 0.7820990582017752, "grad_norm": 5.917550563812256, "learning_rate": 8.522554589636526e-05, "loss": 0.82, "step": 11543 }, { "epoch": 0.7821668134697473, "grad_norm": 5.576394081115723, "learning_rate": 8.522417687726744e-05, "loss": 0.6957, "step": 11544 }, { "epoch": 0.7822345687377193, "grad_norm": 7.591821670532227, "learning_rate": 8.522280785816963e-05, "loss": 0.9026, "step": 11545 }, { "epoch": 0.7823023240056914, "grad_norm": 8.16065502166748, "learning_rate": 8.522143883907181e-05, "loss": 1.0274, "step": 11546 }, { "epoch": 0.7823700792736635, "grad_norm": 9.164224624633789, "learning_rate": 8.5220069819974e-05, "loss": 0.7935, "step": 11547 }, { "epoch": 0.7824378345416356, "grad_norm": 5.825256824493408, "learning_rate": 8.521870080087617e-05, "loss": 0.8263, "step": 11548 }, { "epoch": 0.7825055898096077, "grad_norm": 7.315790176391602, "learning_rate": 8.521733178177837e-05, "loss": 0.9282, "step": 11549 }, { "epoch": 0.7825733450775798, "grad_norm": 6.677700519561768, "learning_rate": 8.521596276268055e-05, "loss": 1.011, "step": 11550 }, { "epoch": 0.7826411003455519, "grad_norm": 7.136357307434082, "learning_rate": 8.521459374358273e-05, "loss": 0.7062, "step": 11551 }, { "epoch": 0.782708855613524, "grad_norm": 6.2828168869018555, "learning_rate": 8.521322472448491e-05, "loss": 0.6732, "step": 11552 }, { "epoch": 0.7827766108814961, "grad_norm": 6.77725887298584, "learning_rate": 8.521185570538709e-05, "loss": 0.8692, "step": 11553 }, { "epoch": 0.7828443661494682, "grad_norm": 6.065176963806152, "learning_rate": 8.521048668628928e-05, "loss": 0.7296, "step": 11554 }, { "epoch": 0.7829121214174402, "grad_norm": 6.393941402435303, "learning_rate": 8.520911766719146e-05, "loss": 0.7382, "step": 11555 }, { "epoch": 0.7829798766854122, "grad_norm": 5.480118751525879, "learning_rate": 8.520774864809364e-05, "loss": 1.0189, "step": 11556 }, { "epoch": 0.7830476319533843, "grad_norm": 5.192300796508789, "learning_rate": 8.520637962899583e-05, "loss": 0.7421, "step": 11557 }, { "epoch": 0.7831153872213564, "grad_norm": 4.797957897186279, "learning_rate": 8.520501060989802e-05, "loss": 0.5936, "step": 11558 }, { "epoch": 0.7831831424893285, "grad_norm": 5.605537414550781, "learning_rate": 8.52036415908002e-05, "loss": 0.6871, "step": 11559 }, { "epoch": 0.7832508977573006, "grad_norm": 4.938405513763428, "learning_rate": 8.520227257170238e-05, "loss": 0.7197, "step": 11560 }, { "epoch": 0.7833186530252727, "grad_norm": 5.017856597900391, "learning_rate": 8.520090355260456e-05, "loss": 0.9076, "step": 11561 }, { "epoch": 0.7833864082932448, "grad_norm": 4.1797285079956055, "learning_rate": 8.519953453350674e-05, "loss": 0.7057, "step": 11562 }, { "epoch": 0.7834541635612169, "grad_norm": 6.84492301940918, "learning_rate": 8.519816551440893e-05, "loss": 0.7758, "step": 11563 }, { "epoch": 0.783521918829189, "grad_norm": 5.999660968780518, "learning_rate": 8.519679649531111e-05, "loss": 0.7158, "step": 11564 }, { "epoch": 0.7835896740971611, "grad_norm": 6.352871894836426, "learning_rate": 8.51954274762133e-05, "loss": 0.8369, "step": 11565 }, { "epoch": 0.7836574293651332, "grad_norm": 7.0241923332214355, "learning_rate": 8.519405845711548e-05, "loss": 0.8714, "step": 11566 }, { "epoch": 0.7837251846331053, "grad_norm": 6.709263324737549, "learning_rate": 8.519268943801767e-05, "loss": 0.7421, "step": 11567 }, { "epoch": 0.7837929399010773, "grad_norm": 6.393216133117676, "learning_rate": 8.519132041891985e-05, "loss": 0.7692, "step": 11568 }, { "epoch": 0.7838606951690494, "grad_norm": 5.52962589263916, "learning_rate": 8.518995139982203e-05, "loss": 0.8172, "step": 11569 }, { "epoch": 0.7839284504370215, "grad_norm": 5.78670072555542, "learning_rate": 8.518858238072421e-05, "loss": 0.8083, "step": 11570 }, { "epoch": 0.7839962057049935, "grad_norm": 6.439652919769287, "learning_rate": 8.518721336162639e-05, "loss": 0.8051, "step": 11571 }, { "epoch": 0.7840639609729656, "grad_norm": 7.955451011657715, "learning_rate": 8.518584434252858e-05, "loss": 0.8791, "step": 11572 }, { "epoch": 0.7841317162409377, "grad_norm": 9.079748153686523, "learning_rate": 8.518447532343076e-05, "loss": 0.8787, "step": 11573 }, { "epoch": 0.7841994715089098, "grad_norm": 7.1748366355896, "learning_rate": 8.518310630433295e-05, "loss": 0.9591, "step": 11574 }, { "epoch": 0.7842672267768819, "grad_norm": 4.792274475097656, "learning_rate": 8.518173728523513e-05, "loss": 0.6126, "step": 11575 }, { "epoch": 0.784334982044854, "grad_norm": 8.38217544555664, "learning_rate": 8.51803682661373e-05, "loss": 1.1548, "step": 11576 }, { "epoch": 0.7844027373128261, "grad_norm": 8.210965156555176, "learning_rate": 8.51789992470395e-05, "loss": 0.879, "step": 11577 }, { "epoch": 0.7844704925807982, "grad_norm": 6.52365255355835, "learning_rate": 8.517763022794168e-05, "loss": 1.1208, "step": 11578 }, { "epoch": 0.7845382478487702, "grad_norm": 8.54340934753418, "learning_rate": 8.517626120884386e-05, "loss": 0.8938, "step": 11579 }, { "epoch": 0.7846060031167423, "grad_norm": 4.85086727142334, "learning_rate": 8.517489218974604e-05, "loss": 0.5674, "step": 11580 }, { "epoch": 0.7846737583847144, "grad_norm": 5.40913200378418, "learning_rate": 8.517352317064823e-05, "loss": 0.6512, "step": 11581 }, { "epoch": 0.7847415136526865, "grad_norm": 5.471793174743652, "learning_rate": 8.517215415155041e-05, "loss": 0.6023, "step": 11582 }, { "epoch": 0.7848092689206586, "grad_norm": 6.162317752838135, "learning_rate": 8.51707851324526e-05, "loss": 0.6595, "step": 11583 }, { "epoch": 0.7848770241886307, "grad_norm": 4.846681594848633, "learning_rate": 8.516941611335479e-05, "loss": 0.7467, "step": 11584 }, { "epoch": 0.7849447794566028, "grad_norm": 6.785501956939697, "learning_rate": 8.516804709425697e-05, "loss": 0.9231, "step": 11585 }, { "epoch": 0.7850125347245749, "grad_norm": 5.503603935241699, "learning_rate": 8.516667807515915e-05, "loss": 0.6025, "step": 11586 }, { "epoch": 0.785080289992547, "grad_norm": 6.446911334991455, "learning_rate": 8.516530905606134e-05, "loss": 0.8209, "step": 11587 }, { "epoch": 0.785148045260519, "grad_norm": 6.04754114151001, "learning_rate": 8.516394003696352e-05, "loss": 0.7663, "step": 11588 }, { "epoch": 0.785215800528491, "grad_norm": 8.239023208618164, "learning_rate": 8.51625710178657e-05, "loss": 1.06, "step": 11589 }, { "epoch": 0.7852835557964631, "grad_norm": 5.434386253356934, "learning_rate": 8.51612019987679e-05, "loss": 0.6585, "step": 11590 }, { "epoch": 0.7853513110644352, "grad_norm": 7.99647855758667, "learning_rate": 8.515983297967008e-05, "loss": 1.3215, "step": 11591 }, { "epoch": 0.7854190663324073, "grad_norm": 6.199028491973877, "learning_rate": 8.515846396057226e-05, "loss": 0.7499, "step": 11592 }, { "epoch": 0.7854868216003794, "grad_norm": 5.438849449157715, "learning_rate": 8.515709494147444e-05, "loss": 1.0288, "step": 11593 }, { "epoch": 0.7855545768683515, "grad_norm": 6.933850288391113, "learning_rate": 8.515572592237662e-05, "loss": 0.8108, "step": 11594 }, { "epoch": 0.7856223321363236, "grad_norm": 7.720444679260254, "learning_rate": 8.515435690327881e-05, "loss": 0.7747, "step": 11595 }, { "epoch": 0.7856900874042957, "grad_norm": 5.852978706359863, "learning_rate": 8.5152987884181e-05, "loss": 0.9843, "step": 11596 }, { "epoch": 0.7857578426722678, "grad_norm": 5.039735794067383, "learning_rate": 8.515161886508317e-05, "loss": 0.8137, "step": 11597 }, { "epoch": 0.7858255979402399, "grad_norm": 5.9263529777526855, "learning_rate": 8.515024984598535e-05, "loss": 0.7445, "step": 11598 }, { "epoch": 0.785893353208212, "grad_norm": 7.095968246459961, "learning_rate": 8.514888082688753e-05, "loss": 0.9883, "step": 11599 }, { "epoch": 0.7859611084761841, "grad_norm": 7.839550018310547, "learning_rate": 8.514751180778973e-05, "loss": 0.8652, "step": 11600 }, { "epoch": 0.7860288637441561, "grad_norm": 6.799243927001953, "learning_rate": 8.514614278869191e-05, "loss": 0.9413, "step": 11601 }, { "epoch": 0.7860966190121282, "grad_norm": 5.903491020202637, "learning_rate": 8.514477376959409e-05, "loss": 0.7944, "step": 11602 }, { "epoch": 0.7861643742801003, "grad_norm": 5.674754619598389, "learning_rate": 8.514340475049627e-05, "loss": 0.639, "step": 11603 }, { "epoch": 0.7862321295480723, "grad_norm": 6.983302593231201, "learning_rate": 8.514203573139846e-05, "loss": 1.0028, "step": 11604 }, { "epoch": 0.7862998848160444, "grad_norm": 6.731539726257324, "learning_rate": 8.514066671230064e-05, "loss": 0.8127, "step": 11605 }, { "epoch": 0.7863676400840165, "grad_norm": 5.513458251953125, "learning_rate": 8.513929769320282e-05, "loss": 0.8266, "step": 11606 }, { "epoch": 0.7864353953519886, "grad_norm": 5.592813968658447, "learning_rate": 8.5137928674105e-05, "loss": 0.8913, "step": 11607 }, { "epoch": 0.7865031506199607, "grad_norm": 6.7772603034973145, "learning_rate": 8.513655965500719e-05, "loss": 0.9662, "step": 11608 }, { "epoch": 0.7865709058879328, "grad_norm": 6.447290420532227, "learning_rate": 8.513519063590938e-05, "loss": 0.8599, "step": 11609 }, { "epoch": 0.7866386611559049, "grad_norm": 5.317022800445557, "learning_rate": 8.513382161681156e-05, "loss": 0.7079, "step": 11610 }, { "epoch": 0.786706416423877, "grad_norm": 6.355508327484131, "learning_rate": 8.513245259771374e-05, "loss": 0.783, "step": 11611 }, { "epoch": 0.786774171691849, "grad_norm": 6.773859977722168, "learning_rate": 8.513108357861592e-05, "loss": 0.9146, "step": 11612 }, { "epoch": 0.7868419269598211, "grad_norm": 6.100228786468506, "learning_rate": 8.512971455951811e-05, "loss": 0.8626, "step": 11613 }, { "epoch": 0.7869096822277932, "grad_norm": 4.6573004722595215, "learning_rate": 8.51283455404203e-05, "loss": 0.4562, "step": 11614 }, { "epoch": 0.7869774374957653, "grad_norm": 5.893970966339111, "learning_rate": 8.512697652132247e-05, "loss": 0.9437, "step": 11615 }, { "epoch": 0.7870451927637374, "grad_norm": 6.907064914703369, "learning_rate": 8.512560750222465e-05, "loss": 0.7592, "step": 11616 }, { "epoch": 0.7871129480317095, "grad_norm": 5.383542060852051, "learning_rate": 8.512423848312684e-05, "loss": 0.8429, "step": 11617 }, { "epoch": 0.7871807032996816, "grad_norm": 4.651388168334961, "learning_rate": 8.512286946402903e-05, "loss": 0.7447, "step": 11618 }, { "epoch": 0.7872484585676537, "grad_norm": 7.3148417472839355, "learning_rate": 8.512150044493121e-05, "loss": 0.905, "step": 11619 }, { "epoch": 0.7873162138356257, "grad_norm": 6.7118353843688965, "learning_rate": 8.512013142583339e-05, "loss": 0.8032, "step": 11620 }, { "epoch": 0.7873839691035978, "grad_norm": 7.792928695678711, "learning_rate": 8.511876240673557e-05, "loss": 0.7426, "step": 11621 }, { "epoch": 0.7874517243715699, "grad_norm": 7.6762614250183105, "learning_rate": 8.511739338763776e-05, "loss": 0.7188, "step": 11622 }, { "epoch": 0.7875194796395419, "grad_norm": 4.8527607917785645, "learning_rate": 8.511602436853994e-05, "loss": 0.4442, "step": 11623 }, { "epoch": 0.787587234907514, "grad_norm": 4.9824957847595215, "learning_rate": 8.511465534944212e-05, "loss": 0.7234, "step": 11624 }, { "epoch": 0.7876549901754861, "grad_norm": 6.530035495758057, "learning_rate": 8.51132863303443e-05, "loss": 0.8197, "step": 11625 }, { "epoch": 0.7877227454434582, "grad_norm": 6.13261079788208, "learning_rate": 8.511191731124649e-05, "loss": 0.922, "step": 11626 }, { "epoch": 0.7877905007114303, "grad_norm": 7.777993202209473, "learning_rate": 8.511054829214868e-05, "loss": 0.7275, "step": 11627 }, { "epoch": 0.7878582559794024, "grad_norm": 5.4319281578063965, "learning_rate": 8.510917927305086e-05, "loss": 0.6158, "step": 11628 }, { "epoch": 0.7879260112473745, "grad_norm": 7.162403583526611, "learning_rate": 8.510781025395304e-05, "loss": 0.6238, "step": 11629 }, { "epoch": 0.7879937665153466, "grad_norm": 7.020395755767822, "learning_rate": 8.510644123485523e-05, "loss": 1.0743, "step": 11630 }, { "epoch": 0.7880615217833187, "grad_norm": 7.972400665283203, "learning_rate": 8.510507221575741e-05, "loss": 0.8562, "step": 11631 }, { "epoch": 0.7881292770512908, "grad_norm": 7.600156307220459, "learning_rate": 8.51037031966596e-05, "loss": 0.9075, "step": 11632 }, { "epoch": 0.7881970323192629, "grad_norm": 6.119834899902344, "learning_rate": 8.510233417756179e-05, "loss": 1.0396, "step": 11633 }, { "epoch": 0.788264787587235, "grad_norm": 6.254417896270752, "learning_rate": 8.510096515846397e-05, "loss": 0.8023, "step": 11634 }, { "epoch": 0.788332542855207, "grad_norm": 7.466210842132568, "learning_rate": 8.509959613936615e-05, "loss": 0.9744, "step": 11635 }, { "epoch": 0.788400298123179, "grad_norm": 6.635150909423828, "learning_rate": 8.509822712026834e-05, "loss": 0.8722, "step": 11636 }, { "epoch": 0.7884680533911511, "grad_norm": 6.4519362449646, "learning_rate": 8.509685810117052e-05, "loss": 0.7355, "step": 11637 }, { "epoch": 0.7885358086591232, "grad_norm": 6.042237758636475, "learning_rate": 8.50954890820727e-05, "loss": 0.7921, "step": 11638 }, { "epoch": 0.7886035639270953, "grad_norm": 6.3814616203308105, "learning_rate": 8.509412006297488e-05, "loss": 0.8843, "step": 11639 }, { "epoch": 0.7886713191950674, "grad_norm": 7.140514373779297, "learning_rate": 8.509275104387706e-05, "loss": 0.8913, "step": 11640 }, { "epoch": 0.7887390744630395, "grad_norm": 5.622684478759766, "learning_rate": 8.509138202477926e-05, "loss": 0.8172, "step": 11641 }, { "epoch": 0.7888068297310116, "grad_norm": 5.332169532775879, "learning_rate": 8.509001300568144e-05, "loss": 0.7585, "step": 11642 }, { "epoch": 0.7888745849989837, "grad_norm": 5.908132553100586, "learning_rate": 8.508864398658362e-05, "loss": 0.9236, "step": 11643 }, { "epoch": 0.7889423402669558, "grad_norm": 5.472514629364014, "learning_rate": 8.50872749674858e-05, "loss": 0.8923, "step": 11644 }, { "epoch": 0.7890100955349278, "grad_norm": 6.155751705169678, "learning_rate": 8.5085905948388e-05, "loss": 0.9054, "step": 11645 }, { "epoch": 0.7890778508028999, "grad_norm": 5.493722438812256, "learning_rate": 8.508453692929017e-05, "loss": 0.7483, "step": 11646 }, { "epoch": 0.789145606070872, "grad_norm": 7.661139965057373, "learning_rate": 8.508316791019235e-05, "loss": 0.7433, "step": 11647 }, { "epoch": 0.7892133613388441, "grad_norm": 6.037461757659912, "learning_rate": 8.508179889109453e-05, "loss": 0.933, "step": 11648 }, { "epoch": 0.7892811166068162, "grad_norm": 5.072673797607422, "learning_rate": 8.508042987199671e-05, "loss": 0.8115, "step": 11649 }, { "epoch": 0.7893488718747883, "grad_norm": 5.48813009262085, "learning_rate": 8.507906085289891e-05, "loss": 0.7581, "step": 11650 }, { "epoch": 0.7894166271427604, "grad_norm": 7.160548686981201, "learning_rate": 8.507769183380109e-05, "loss": 0.8798, "step": 11651 }, { "epoch": 0.7894843824107325, "grad_norm": 5.741982936859131, "learning_rate": 8.507632281470327e-05, "loss": 0.7952, "step": 11652 }, { "epoch": 0.7895521376787045, "grad_norm": 6.27877950668335, "learning_rate": 8.507495379560545e-05, "loss": 0.9341, "step": 11653 }, { "epoch": 0.7896198929466766, "grad_norm": 6.65634822845459, "learning_rate": 8.507358477650763e-05, "loss": 1.0704, "step": 11654 }, { "epoch": 0.7896876482146487, "grad_norm": 4.268200397491455, "learning_rate": 8.507221575740982e-05, "loss": 0.6692, "step": 11655 }, { "epoch": 0.7897554034826207, "grad_norm": 6.084795951843262, "learning_rate": 8.5070846738312e-05, "loss": 0.9024, "step": 11656 }, { "epoch": 0.7898231587505928, "grad_norm": 6.263867378234863, "learning_rate": 8.506947771921418e-05, "loss": 0.7795, "step": 11657 }, { "epoch": 0.7898909140185649, "grad_norm": 5.393991947174072, "learning_rate": 8.506810870011636e-05, "loss": 0.7585, "step": 11658 }, { "epoch": 0.789958669286537, "grad_norm": 5.708785057067871, "learning_rate": 8.506673968101856e-05, "loss": 0.9771, "step": 11659 }, { "epoch": 0.7900264245545091, "grad_norm": 5.779507637023926, "learning_rate": 8.506537066192074e-05, "loss": 0.7698, "step": 11660 }, { "epoch": 0.7900941798224812, "grad_norm": 5.451954364776611, "learning_rate": 8.506400164282292e-05, "loss": 0.8186, "step": 11661 }, { "epoch": 0.7901619350904533, "grad_norm": 6.849606513977051, "learning_rate": 8.50626326237251e-05, "loss": 0.5914, "step": 11662 }, { "epoch": 0.7902296903584254, "grad_norm": 6.583099365234375, "learning_rate": 8.506126360462728e-05, "loss": 0.7948, "step": 11663 }, { "epoch": 0.7902974456263975, "grad_norm": 5.769179821014404, "learning_rate": 8.505989458552947e-05, "loss": 0.7482, "step": 11664 }, { "epoch": 0.7903652008943696, "grad_norm": 6.720520496368408, "learning_rate": 8.505852556643165e-05, "loss": 0.8047, "step": 11665 }, { "epoch": 0.7904329561623417, "grad_norm": 5.712310791015625, "learning_rate": 8.505715654733383e-05, "loss": 0.744, "step": 11666 }, { "epoch": 0.7905007114303138, "grad_norm": 5.840827941894531, "learning_rate": 8.505578752823601e-05, "loss": 0.6534, "step": 11667 }, { "epoch": 0.7905684666982858, "grad_norm": 5.9518351554870605, "learning_rate": 8.505441850913821e-05, "loss": 0.9304, "step": 11668 }, { "epoch": 0.7906362219662578, "grad_norm": 6.9761528968811035, "learning_rate": 8.505304949004039e-05, "loss": 0.8118, "step": 11669 }, { "epoch": 0.7907039772342299, "grad_norm": 5.4346842765808105, "learning_rate": 8.505168047094257e-05, "loss": 0.7391, "step": 11670 }, { "epoch": 0.790771732502202, "grad_norm": 6.628547191619873, "learning_rate": 8.505031145184475e-05, "loss": 0.5708, "step": 11671 }, { "epoch": 0.7908394877701741, "grad_norm": 7.494357109069824, "learning_rate": 8.504894243274693e-05, "loss": 0.8687, "step": 11672 }, { "epoch": 0.7909072430381462, "grad_norm": 4.743175029754639, "learning_rate": 8.504757341364912e-05, "loss": 0.7287, "step": 11673 }, { "epoch": 0.7909749983061183, "grad_norm": 6.512623310089111, "learning_rate": 8.50462043945513e-05, "loss": 0.9047, "step": 11674 }, { "epoch": 0.7910427535740904, "grad_norm": 7.142396926879883, "learning_rate": 8.504483537545348e-05, "loss": 0.7012, "step": 11675 }, { "epoch": 0.7911105088420625, "grad_norm": 5.33573055267334, "learning_rate": 8.504346635635568e-05, "loss": 0.6606, "step": 11676 }, { "epoch": 0.7911782641100346, "grad_norm": 6.757303714752197, "learning_rate": 8.504209733725786e-05, "loss": 0.8881, "step": 11677 }, { "epoch": 0.7912460193780066, "grad_norm": 6.163415908813477, "learning_rate": 8.504072831816004e-05, "loss": 0.8034, "step": 11678 }, { "epoch": 0.7913137746459787, "grad_norm": 5.699300765991211, "learning_rate": 8.503935929906223e-05, "loss": 0.9178, "step": 11679 }, { "epoch": 0.7913815299139508, "grad_norm": 5.064122676849365, "learning_rate": 8.503799027996441e-05, "loss": 0.8219, "step": 11680 }, { "epoch": 0.7914492851819229, "grad_norm": 5.224904537200928, "learning_rate": 8.50366212608666e-05, "loss": 0.7344, "step": 11681 }, { "epoch": 0.791517040449895, "grad_norm": 4.929017543792725, "learning_rate": 8.503525224176879e-05, "loss": 0.7713, "step": 11682 }, { "epoch": 0.7915847957178671, "grad_norm": 6.1249918937683105, "learning_rate": 8.503388322267097e-05, "loss": 0.8553, "step": 11683 }, { "epoch": 0.7916525509858392, "grad_norm": 5.393836975097656, "learning_rate": 8.503251420357315e-05, "loss": 0.6989, "step": 11684 }, { "epoch": 0.7917203062538112, "grad_norm": 6.436197757720947, "learning_rate": 8.503114518447533e-05, "loss": 1.0481, "step": 11685 }, { "epoch": 0.7917880615217833, "grad_norm": 5.787166118621826, "learning_rate": 8.502977616537751e-05, "loss": 0.9203, "step": 11686 }, { "epoch": 0.7918558167897554, "grad_norm": 5.892452716827393, "learning_rate": 8.50284071462797e-05, "loss": 0.9302, "step": 11687 }, { "epoch": 0.7919235720577275, "grad_norm": 7.195859909057617, "learning_rate": 8.502703812718188e-05, "loss": 0.9175, "step": 11688 }, { "epoch": 0.7919913273256995, "grad_norm": 6.343230247497559, "learning_rate": 8.502566910808406e-05, "loss": 0.7292, "step": 11689 }, { "epoch": 0.7920590825936716, "grad_norm": 6.082936763763428, "learning_rate": 8.502430008898624e-05, "loss": 0.8826, "step": 11690 }, { "epoch": 0.7921268378616437, "grad_norm": 5.115715980529785, "learning_rate": 8.502293106988844e-05, "loss": 0.6697, "step": 11691 }, { "epoch": 0.7921945931296158, "grad_norm": 6.96610164642334, "learning_rate": 8.502156205079062e-05, "loss": 0.9914, "step": 11692 }, { "epoch": 0.7922623483975879, "grad_norm": 7.457095146179199, "learning_rate": 8.50201930316928e-05, "loss": 0.7839, "step": 11693 }, { "epoch": 0.79233010366556, "grad_norm": 7.025375843048096, "learning_rate": 8.501882401259498e-05, "loss": 1.0105, "step": 11694 }, { "epoch": 0.7923978589335321, "grad_norm": 5.951284408569336, "learning_rate": 8.501745499349716e-05, "loss": 0.6889, "step": 11695 }, { "epoch": 0.7924656142015042, "grad_norm": 5.650534629821777, "learning_rate": 8.501608597439935e-05, "loss": 0.7848, "step": 11696 }, { "epoch": 0.7925333694694763, "grad_norm": 5.552826881408691, "learning_rate": 8.501471695530153e-05, "loss": 0.6213, "step": 11697 }, { "epoch": 0.7926011247374484, "grad_norm": 5.661040306091309, "learning_rate": 8.501334793620371e-05, "loss": 0.5737, "step": 11698 }, { "epoch": 0.7926688800054205, "grad_norm": 5.965978622436523, "learning_rate": 8.50119789171059e-05, "loss": 0.8278, "step": 11699 }, { "epoch": 0.7927366352733926, "grad_norm": 5.854281902313232, "learning_rate": 8.501060989800809e-05, "loss": 0.831, "step": 11700 }, { "epoch": 0.7928043905413646, "grad_norm": 6.782879829406738, "learning_rate": 8.500924087891027e-05, "loss": 0.8058, "step": 11701 }, { "epoch": 0.7928721458093366, "grad_norm": 6.70954704284668, "learning_rate": 8.500787185981245e-05, "loss": 0.6725, "step": 11702 }, { "epoch": 0.7929399010773087, "grad_norm": 5.684144020080566, "learning_rate": 8.500650284071463e-05, "loss": 0.8527, "step": 11703 }, { "epoch": 0.7930076563452808, "grad_norm": 5.054625511169434, "learning_rate": 8.500513382161681e-05, "loss": 0.8579, "step": 11704 }, { "epoch": 0.7930754116132529, "grad_norm": 6.6226887702941895, "learning_rate": 8.5003764802519e-05, "loss": 0.9449, "step": 11705 }, { "epoch": 0.793143166881225, "grad_norm": 6.413197040557861, "learning_rate": 8.500239578342118e-05, "loss": 0.646, "step": 11706 }, { "epoch": 0.7932109221491971, "grad_norm": 4.308269500732422, "learning_rate": 8.500102676432336e-05, "loss": 0.6515, "step": 11707 }, { "epoch": 0.7932786774171692, "grad_norm": 5.270321846008301, "learning_rate": 8.499965774522554e-05, "loss": 0.6476, "step": 11708 }, { "epoch": 0.7933464326851413, "grad_norm": 6.440086364746094, "learning_rate": 8.499828872612772e-05, "loss": 0.8295, "step": 11709 }, { "epoch": 0.7934141879531134, "grad_norm": 6.078103065490723, "learning_rate": 8.499691970702992e-05, "loss": 0.8173, "step": 11710 }, { "epoch": 0.7934819432210855, "grad_norm": 4.972411155700684, "learning_rate": 8.49955506879321e-05, "loss": 0.6696, "step": 11711 }, { "epoch": 0.7935496984890575, "grad_norm": 5.865548610687256, "learning_rate": 8.499418166883428e-05, "loss": 0.7337, "step": 11712 }, { "epoch": 0.7936174537570296, "grad_norm": 6.041398048400879, "learning_rate": 8.499281264973646e-05, "loss": 0.8586, "step": 11713 }, { "epoch": 0.7936852090250017, "grad_norm": 8.569158554077148, "learning_rate": 8.499144363063865e-05, "loss": 0.9027, "step": 11714 }, { "epoch": 0.7937529642929738, "grad_norm": 6.1328020095825195, "learning_rate": 8.499007461154083e-05, "loss": 0.7019, "step": 11715 }, { "epoch": 0.7938207195609459, "grad_norm": 5.501848220825195, "learning_rate": 8.498870559244301e-05, "loss": 0.8227, "step": 11716 }, { "epoch": 0.793888474828918, "grad_norm": 8.566222190856934, "learning_rate": 8.49873365733452e-05, "loss": 0.8148, "step": 11717 }, { "epoch": 0.79395623009689, "grad_norm": 10.178439140319824, "learning_rate": 8.498596755424737e-05, "loss": 1.0213, "step": 11718 }, { "epoch": 0.7940239853648621, "grad_norm": 7.6949310302734375, "learning_rate": 8.498459853514957e-05, "loss": 0.7581, "step": 11719 }, { "epoch": 0.7940917406328342, "grad_norm": 5.988775730133057, "learning_rate": 8.498322951605175e-05, "loss": 0.7738, "step": 11720 }, { "epoch": 0.7941594959008063, "grad_norm": 5.81986665725708, "learning_rate": 8.498186049695393e-05, "loss": 0.7798, "step": 11721 }, { "epoch": 0.7942272511687783, "grad_norm": 5.84140157699585, "learning_rate": 8.498049147785612e-05, "loss": 0.8285, "step": 11722 }, { "epoch": 0.7942950064367504, "grad_norm": 4.675839900970459, "learning_rate": 8.49791224587583e-05, "loss": 0.6703, "step": 11723 }, { "epoch": 0.7943627617047225, "grad_norm": 5.857149124145508, "learning_rate": 8.497775343966048e-05, "loss": 0.8915, "step": 11724 }, { "epoch": 0.7944305169726946, "grad_norm": 10.169214248657227, "learning_rate": 8.497638442056268e-05, "loss": 0.8606, "step": 11725 }, { "epoch": 0.7944982722406667, "grad_norm": 5.2627058029174805, "learning_rate": 8.497501540146486e-05, "loss": 0.7705, "step": 11726 }, { "epoch": 0.7945660275086388, "grad_norm": 7.3032355308532715, "learning_rate": 8.497364638236704e-05, "loss": 1.0857, "step": 11727 }, { "epoch": 0.7946337827766109, "grad_norm": 7.499369144439697, "learning_rate": 8.497227736326923e-05, "loss": 0.8655, "step": 11728 }, { "epoch": 0.794701538044583, "grad_norm": 6.25002908706665, "learning_rate": 8.497090834417141e-05, "loss": 0.7271, "step": 11729 }, { "epoch": 0.7947692933125551, "grad_norm": 6.5583882331848145, "learning_rate": 8.496953932507359e-05, "loss": 0.771, "step": 11730 }, { "epoch": 0.7948370485805272, "grad_norm": 6.737629413604736, "learning_rate": 8.496817030597577e-05, "loss": 0.8865, "step": 11731 }, { "epoch": 0.7949048038484993, "grad_norm": 7.376718044281006, "learning_rate": 8.496680128687795e-05, "loss": 0.8407, "step": 11732 }, { "epoch": 0.7949725591164714, "grad_norm": 7.077400207519531, "learning_rate": 8.496543226778015e-05, "loss": 0.8339, "step": 11733 }, { "epoch": 0.7950403143844433, "grad_norm": 6.768246173858643, "learning_rate": 8.496406324868233e-05, "loss": 0.8654, "step": 11734 }, { "epoch": 0.7951080696524154, "grad_norm": 5.732030391693115, "learning_rate": 8.496269422958451e-05, "loss": 0.9901, "step": 11735 }, { "epoch": 0.7951758249203875, "grad_norm": 7.728169918060303, "learning_rate": 8.496132521048669e-05, "loss": 1.0275, "step": 11736 }, { "epoch": 0.7952435801883596, "grad_norm": 4.867015838623047, "learning_rate": 8.495995619138888e-05, "loss": 0.7226, "step": 11737 }, { "epoch": 0.7953113354563317, "grad_norm": 6.376992225646973, "learning_rate": 8.495858717229106e-05, "loss": 0.8501, "step": 11738 }, { "epoch": 0.7953790907243038, "grad_norm": 6.90419340133667, "learning_rate": 8.495721815319324e-05, "loss": 0.5617, "step": 11739 }, { "epoch": 0.7954468459922759, "grad_norm": 7.2407450675964355, "learning_rate": 8.495584913409542e-05, "loss": 0.8224, "step": 11740 }, { "epoch": 0.795514601260248, "grad_norm": 5.104798316955566, "learning_rate": 8.49544801149976e-05, "loss": 0.761, "step": 11741 }, { "epoch": 0.7955823565282201, "grad_norm": 5.49780797958374, "learning_rate": 8.49531110958998e-05, "loss": 0.6668, "step": 11742 }, { "epoch": 0.7956501117961922, "grad_norm": 4.905865669250488, "learning_rate": 8.495174207680198e-05, "loss": 0.7844, "step": 11743 }, { "epoch": 0.7957178670641643, "grad_norm": 6.6370391845703125, "learning_rate": 8.495037305770416e-05, "loss": 1.0102, "step": 11744 }, { "epoch": 0.7957856223321363, "grad_norm": 6.089507579803467, "learning_rate": 8.494900403860634e-05, "loss": 0.8679, "step": 11745 }, { "epoch": 0.7958533776001084, "grad_norm": 5.490042209625244, "learning_rate": 8.494763501950853e-05, "loss": 0.7481, "step": 11746 }, { "epoch": 0.7959211328680805, "grad_norm": 5.85631799697876, "learning_rate": 8.494626600041071e-05, "loss": 0.7926, "step": 11747 }, { "epoch": 0.7959888881360526, "grad_norm": 6.780750274658203, "learning_rate": 8.49448969813129e-05, "loss": 0.5902, "step": 11748 }, { "epoch": 0.7960566434040247, "grad_norm": 6.697319984436035, "learning_rate": 8.494352796221507e-05, "loss": 0.835, "step": 11749 }, { "epoch": 0.7961243986719968, "grad_norm": 6.056969165802002, "learning_rate": 8.494215894311725e-05, "loss": 0.8725, "step": 11750 }, { "epoch": 0.7961921539399688, "grad_norm": 9.009320259094238, "learning_rate": 8.494078992401945e-05, "loss": 0.8103, "step": 11751 }, { "epoch": 0.7962599092079409, "grad_norm": 7.897705554962158, "learning_rate": 8.493942090492163e-05, "loss": 0.7913, "step": 11752 }, { "epoch": 0.796327664475913, "grad_norm": 5.677363872528076, "learning_rate": 8.493805188582381e-05, "loss": 0.6845, "step": 11753 }, { "epoch": 0.7963954197438851, "grad_norm": 6.340780735015869, "learning_rate": 8.493668286672599e-05, "loss": 0.6519, "step": 11754 }, { "epoch": 0.7964631750118571, "grad_norm": 5.341319561004639, "learning_rate": 8.493531384762818e-05, "loss": 0.7437, "step": 11755 }, { "epoch": 0.7965309302798292, "grad_norm": 5.388099193572998, "learning_rate": 8.493394482853036e-05, "loss": 0.6879, "step": 11756 }, { "epoch": 0.7965986855478013, "grad_norm": 6.492825031280518, "learning_rate": 8.493257580943254e-05, "loss": 0.8562, "step": 11757 }, { "epoch": 0.7966664408157734, "grad_norm": 6.953293323516846, "learning_rate": 8.493120679033472e-05, "loss": 0.826, "step": 11758 }, { "epoch": 0.7967341960837455, "grad_norm": 5.61476993560791, "learning_rate": 8.49298377712369e-05, "loss": 0.7604, "step": 11759 }, { "epoch": 0.7968019513517176, "grad_norm": 7.391281604766846, "learning_rate": 8.49284687521391e-05, "loss": 0.9048, "step": 11760 }, { "epoch": 0.7968697066196897, "grad_norm": 5.455954074859619, "learning_rate": 8.492709973304128e-05, "loss": 0.8972, "step": 11761 }, { "epoch": 0.7969374618876618, "grad_norm": 10.402437210083008, "learning_rate": 8.492573071394346e-05, "loss": 0.8567, "step": 11762 }, { "epoch": 0.7970052171556339, "grad_norm": 7.704123497009277, "learning_rate": 8.492436169484564e-05, "loss": 0.8926, "step": 11763 }, { "epoch": 0.797072972423606, "grad_norm": 10.505579948425293, "learning_rate": 8.492299267574782e-05, "loss": 0.7695, "step": 11764 }, { "epoch": 0.7971407276915781, "grad_norm": 8.137372016906738, "learning_rate": 8.492162365665001e-05, "loss": 0.814, "step": 11765 }, { "epoch": 0.7972084829595502, "grad_norm": 7.7297587394714355, "learning_rate": 8.49202546375522e-05, "loss": 0.9038, "step": 11766 }, { "epoch": 0.7972762382275221, "grad_norm": 5.5629048347473145, "learning_rate": 8.491888561845437e-05, "loss": 0.5899, "step": 11767 }, { "epoch": 0.7973439934954942, "grad_norm": 9.380592346191406, "learning_rate": 8.491751659935655e-05, "loss": 0.8522, "step": 11768 }, { "epoch": 0.7974117487634663, "grad_norm": 5.808100700378418, "learning_rate": 8.491614758025875e-05, "loss": 0.923, "step": 11769 }, { "epoch": 0.7974795040314384, "grad_norm": 6.091804504394531, "learning_rate": 8.491477856116093e-05, "loss": 0.8042, "step": 11770 }, { "epoch": 0.7975472592994105, "grad_norm": 6.675506114959717, "learning_rate": 8.491340954206312e-05, "loss": 1.0141, "step": 11771 }, { "epoch": 0.7976150145673826, "grad_norm": 7.39612340927124, "learning_rate": 8.49120405229653e-05, "loss": 0.9027, "step": 11772 }, { "epoch": 0.7976827698353547, "grad_norm": 7.455977439880371, "learning_rate": 8.491067150386748e-05, "loss": 0.7705, "step": 11773 }, { "epoch": 0.7977505251033268, "grad_norm": 6.535350322723389, "learning_rate": 8.490930248476968e-05, "loss": 0.7648, "step": 11774 }, { "epoch": 0.7978182803712989, "grad_norm": 8.165600776672363, "learning_rate": 8.490793346567186e-05, "loss": 0.9073, "step": 11775 }, { "epoch": 0.797886035639271, "grad_norm": 5.424903869628906, "learning_rate": 8.490656444657404e-05, "loss": 0.7656, "step": 11776 }, { "epoch": 0.797953790907243, "grad_norm": 6.095433235168457, "learning_rate": 8.490519542747622e-05, "loss": 0.5978, "step": 11777 }, { "epoch": 0.7980215461752151, "grad_norm": 4.698237419128418, "learning_rate": 8.490382640837841e-05, "loss": 0.689, "step": 11778 }, { "epoch": 0.7980893014431872, "grad_norm": 5.7089691162109375, "learning_rate": 8.490245738928059e-05, "loss": 0.719, "step": 11779 }, { "epoch": 0.7981570567111593, "grad_norm": 4.3311848640441895, "learning_rate": 8.490108837018277e-05, "loss": 0.8484, "step": 11780 }, { "epoch": 0.7982248119791314, "grad_norm": 5.905073165893555, "learning_rate": 8.489971935108495e-05, "loss": 0.7679, "step": 11781 }, { "epoch": 0.7982925672471035, "grad_norm": 7.171839714050293, "learning_rate": 8.489835033198713e-05, "loss": 0.6716, "step": 11782 }, { "epoch": 0.7983603225150755, "grad_norm": 8.157262802124023, "learning_rate": 8.489698131288933e-05, "loss": 0.7821, "step": 11783 }, { "epoch": 0.7984280777830476, "grad_norm": 5.551645278930664, "learning_rate": 8.489561229379151e-05, "loss": 0.5619, "step": 11784 }, { "epoch": 0.7984958330510197, "grad_norm": 6.759763240814209, "learning_rate": 8.489424327469369e-05, "loss": 0.9527, "step": 11785 }, { "epoch": 0.7985635883189918, "grad_norm": 5.427465915679932, "learning_rate": 8.489287425559587e-05, "loss": 0.5603, "step": 11786 }, { "epoch": 0.7986313435869639, "grad_norm": 5.436514377593994, "learning_rate": 8.489150523649805e-05, "loss": 0.6464, "step": 11787 }, { "epoch": 0.798699098854936, "grad_norm": 5.558623313903809, "learning_rate": 8.489013621740024e-05, "loss": 0.7742, "step": 11788 }, { "epoch": 0.798766854122908, "grad_norm": 6.840981960296631, "learning_rate": 8.488876719830242e-05, "loss": 0.9908, "step": 11789 }, { "epoch": 0.7988346093908801, "grad_norm": 6.066009044647217, "learning_rate": 8.48873981792046e-05, "loss": 0.7004, "step": 11790 }, { "epoch": 0.7989023646588522, "grad_norm": 5.6321024894714355, "learning_rate": 8.488602916010678e-05, "loss": 0.7741, "step": 11791 }, { "epoch": 0.7989701199268243, "grad_norm": 6.667470932006836, "learning_rate": 8.488466014100898e-05, "loss": 0.9582, "step": 11792 }, { "epoch": 0.7990378751947964, "grad_norm": 5.693469524383545, "learning_rate": 8.488329112191116e-05, "loss": 0.8868, "step": 11793 }, { "epoch": 0.7991056304627685, "grad_norm": 4.270181179046631, "learning_rate": 8.488192210281334e-05, "loss": 0.5002, "step": 11794 }, { "epoch": 0.7991733857307406, "grad_norm": 6.748290061950684, "learning_rate": 8.488055308371552e-05, "loss": 0.9551, "step": 11795 }, { "epoch": 0.7992411409987127, "grad_norm": 6.088380336761475, "learning_rate": 8.48791840646177e-05, "loss": 0.735, "step": 11796 }, { "epoch": 0.7993088962666848, "grad_norm": 7.105894565582275, "learning_rate": 8.487781504551989e-05, "loss": 0.8939, "step": 11797 }, { "epoch": 0.7993766515346569, "grad_norm": 5.425162315368652, "learning_rate": 8.487644602642207e-05, "loss": 0.612, "step": 11798 }, { "epoch": 0.799444406802629, "grad_norm": 6.511662006378174, "learning_rate": 8.487507700732425e-05, "loss": 0.6636, "step": 11799 }, { "epoch": 0.7995121620706009, "grad_norm": 6.1298828125, "learning_rate": 8.487370798822643e-05, "loss": 0.7554, "step": 11800 }, { "epoch": 0.799579917338573, "grad_norm": 8.377950668334961, "learning_rate": 8.487233896912863e-05, "loss": 0.9965, "step": 11801 }, { "epoch": 0.7996476726065451, "grad_norm": 5.563699245452881, "learning_rate": 8.487096995003081e-05, "loss": 0.9248, "step": 11802 }, { "epoch": 0.7997154278745172, "grad_norm": 5.939857006072998, "learning_rate": 8.486960093093299e-05, "loss": 0.822, "step": 11803 }, { "epoch": 0.7997831831424893, "grad_norm": 7.076834678649902, "learning_rate": 8.486823191183517e-05, "loss": 0.9286, "step": 11804 }, { "epoch": 0.7998509384104614, "grad_norm": 5.123041152954102, "learning_rate": 8.486686289273735e-05, "loss": 0.5529, "step": 11805 }, { "epoch": 0.7999186936784335, "grad_norm": 7.112299919128418, "learning_rate": 8.486549387363954e-05, "loss": 1.0114, "step": 11806 }, { "epoch": 0.7999864489464056, "grad_norm": 5.081669807434082, "learning_rate": 8.486412485454172e-05, "loss": 0.6341, "step": 11807 }, { "epoch": 0.8000542042143777, "grad_norm": 4.5973310470581055, "learning_rate": 8.48627558354439e-05, "loss": 0.6517, "step": 11808 }, { "epoch": 0.8001219594823498, "grad_norm": 11.162668228149414, "learning_rate": 8.486138681634608e-05, "loss": 0.8336, "step": 11809 }, { "epoch": 0.8001897147503219, "grad_norm": 6.119869709014893, "learning_rate": 8.486001779724826e-05, "loss": 0.8596, "step": 11810 }, { "epoch": 0.800257470018294, "grad_norm": 6.878213405609131, "learning_rate": 8.485864877815046e-05, "loss": 1.0812, "step": 11811 }, { "epoch": 0.800325225286266, "grad_norm": 5.8862786293029785, "learning_rate": 8.485727975905264e-05, "loss": 0.5654, "step": 11812 }, { "epoch": 0.8003929805542381, "grad_norm": 5.611292362213135, "learning_rate": 8.485591073995482e-05, "loss": 0.7241, "step": 11813 }, { "epoch": 0.8004607358222102, "grad_norm": 5.294071197509766, "learning_rate": 8.4854541720857e-05, "loss": 0.8368, "step": 11814 }, { "epoch": 0.8005284910901823, "grad_norm": 7.11854362487793, "learning_rate": 8.485317270175919e-05, "loss": 0.7055, "step": 11815 }, { "epoch": 0.8005962463581543, "grad_norm": 6.1037373542785645, "learning_rate": 8.485180368266137e-05, "loss": 0.9473, "step": 11816 }, { "epoch": 0.8006640016261264, "grad_norm": 7.161137580871582, "learning_rate": 8.485043466356355e-05, "loss": 0.909, "step": 11817 }, { "epoch": 0.8007317568940985, "grad_norm": 5.295969009399414, "learning_rate": 8.484906564446575e-05, "loss": 0.7958, "step": 11818 }, { "epoch": 0.8007995121620706, "grad_norm": 7.93539571762085, "learning_rate": 8.484769662536793e-05, "loss": 0.823, "step": 11819 }, { "epoch": 0.8008672674300427, "grad_norm": 10.116933822631836, "learning_rate": 8.484632760627011e-05, "loss": 0.7204, "step": 11820 }, { "epoch": 0.8009350226980148, "grad_norm": 5.872991561889648, "learning_rate": 8.48449585871723e-05, "loss": 0.7758, "step": 11821 }, { "epoch": 0.8010027779659868, "grad_norm": 5.063296318054199, "learning_rate": 8.484358956807448e-05, "loss": 0.7124, "step": 11822 }, { "epoch": 0.8010705332339589, "grad_norm": 7.007580757141113, "learning_rate": 8.484222054897666e-05, "loss": 1.0091, "step": 11823 }, { "epoch": 0.801138288501931, "grad_norm": 7.909097671508789, "learning_rate": 8.484085152987886e-05, "loss": 0.7811, "step": 11824 }, { "epoch": 0.8012060437699031, "grad_norm": 6.1278486251831055, "learning_rate": 8.483948251078104e-05, "loss": 1.0353, "step": 11825 }, { "epoch": 0.8012737990378752, "grad_norm": 5.439823627471924, "learning_rate": 8.483811349168322e-05, "loss": 0.6914, "step": 11826 }, { "epoch": 0.8013415543058473, "grad_norm": 5.741817474365234, "learning_rate": 8.48367444725854e-05, "loss": 0.6987, "step": 11827 }, { "epoch": 0.8014093095738194, "grad_norm": 5.603649139404297, "learning_rate": 8.483537545348758e-05, "loss": 0.5928, "step": 11828 }, { "epoch": 0.8014770648417915, "grad_norm": 5.263033390045166, "learning_rate": 8.483400643438977e-05, "loss": 0.6639, "step": 11829 }, { "epoch": 0.8015448201097636, "grad_norm": 5.7066874504089355, "learning_rate": 8.483263741529195e-05, "loss": 0.8714, "step": 11830 }, { "epoch": 0.8016125753777357, "grad_norm": 7.526313781738281, "learning_rate": 8.483126839619413e-05, "loss": 0.8741, "step": 11831 }, { "epoch": 0.8016803306457077, "grad_norm": 7.491227626800537, "learning_rate": 8.482989937709631e-05, "loss": 0.7122, "step": 11832 }, { "epoch": 0.8017480859136797, "grad_norm": 5.182889938354492, "learning_rate": 8.482853035799851e-05, "loss": 0.7191, "step": 11833 }, { "epoch": 0.8018158411816518, "grad_norm": 7.201566219329834, "learning_rate": 8.482716133890069e-05, "loss": 1.0528, "step": 11834 }, { "epoch": 0.8018835964496239, "grad_norm": 6.984012126922607, "learning_rate": 8.482579231980287e-05, "loss": 0.943, "step": 11835 }, { "epoch": 0.801951351717596, "grad_norm": 6.960568904876709, "learning_rate": 8.482442330070505e-05, "loss": 0.8563, "step": 11836 }, { "epoch": 0.8020191069855681, "grad_norm": 5.898118495941162, "learning_rate": 8.482305428160723e-05, "loss": 0.5819, "step": 11837 }, { "epoch": 0.8020868622535402, "grad_norm": 7.0784592628479, "learning_rate": 8.482168526250942e-05, "loss": 1.0053, "step": 11838 }, { "epoch": 0.8021546175215123, "grad_norm": 5.741284370422363, "learning_rate": 8.48203162434116e-05, "loss": 0.8304, "step": 11839 }, { "epoch": 0.8022223727894844, "grad_norm": 6.108719348907471, "learning_rate": 8.481894722431378e-05, "loss": 0.7938, "step": 11840 }, { "epoch": 0.8022901280574565, "grad_norm": 8.281269073486328, "learning_rate": 8.481757820521596e-05, "loss": 0.8584, "step": 11841 }, { "epoch": 0.8023578833254286, "grad_norm": 9.610613822937012, "learning_rate": 8.481620918611814e-05, "loss": 0.9888, "step": 11842 }, { "epoch": 0.8024256385934007, "grad_norm": 7.831075191497803, "learning_rate": 8.481484016702034e-05, "loss": 0.7218, "step": 11843 }, { "epoch": 0.8024933938613728, "grad_norm": 5.8057756423950195, "learning_rate": 8.481347114792252e-05, "loss": 0.7296, "step": 11844 }, { "epoch": 0.8025611491293448, "grad_norm": 7.156800270080566, "learning_rate": 8.48121021288247e-05, "loss": 0.8085, "step": 11845 }, { "epoch": 0.8026289043973169, "grad_norm": 8.218076705932617, "learning_rate": 8.481073310972688e-05, "loss": 0.8111, "step": 11846 }, { "epoch": 0.802696659665289, "grad_norm": 5.695756435394287, "learning_rate": 8.480936409062907e-05, "loss": 0.6723, "step": 11847 }, { "epoch": 0.802764414933261, "grad_norm": 6.302215576171875, "learning_rate": 8.480799507153125e-05, "loss": 0.9768, "step": 11848 }, { "epoch": 0.8028321702012331, "grad_norm": 6.877220630645752, "learning_rate": 8.480662605243343e-05, "loss": 0.756, "step": 11849 }, { "epoch": 0.8028999254692052, "grad_norm": 5.5505805015563965, "learning_rate": 8.480525703333561e-05, "loss": 0.8171, "step": 11850 }, { "epoch": 0.8029676807371773, "grad_norm": 6.355271339416504, "learning_rate": 8.48038880142378e-05, "loss": 0.791, "step": 11851 }, { "epoch": 0.8030354360051494, "grad_norm": 5.083590984344482, "learning_rate": 8.480251899513999e-05, "loss": 0.6983, "step": 11852 }, { "epoch": 0.8031031912731215, "grad_norm": 5.824821472167969, "learning_rate": 8.480114997604217e-05, "loss": 0.8647, "step": 11853 }, { "epoch": 0.8031709465410936, "grad_norm": 5.969753742218018, "learning_rate": 8.479978095694435e-05, "loss": 0.6172, "step": 11854 }, { "epoch": 0.8032387018090656, "grad_norm": 6.784320831298828, "learning_rate": 8.479841193784653e-05, "loss": 0.9374, "step": 11855 }, { "epoch": 0.8033064570770377, "grad_norm": 8.921832084655762, "learning_rate": 8.479704291874872e-05, "loss": 0.6769, "step": 11856 }, { "epoch": 0.8033742123450098, "grad_norm": 5.738458633422852, "learning_rate": 8.47956738996509e-05, "loss": 0.784, "step": 11857 }, { "epoch": 0.8034419676129819, "grad_norm": 5.5067949295043945, "learning_rate": 8.479430488055308e-05, "loss": 0.8279, "step": 11858 }, { "epoch": 0.803509722880954, "grad_norm": 7.618658065795898, "learning_rate": 8.479293586145526e-05, "loss": 0.7528, "step": 11859 }, { "epoch": 0.8035774781489261, "grad_norm": 7.020671844482422, "learning_rate": 8.479156684235744e-05, "loss": 0.8522, "step": 11860 }, { "epoch": 0.8036452334168982, "grad_norm": 6.179223537445068, "learning_rate": 8.479019782325964e-05, "loss": 0.7367, "step": 11861 }, { "epoch": 0.8037129886848703, "grad_norm": 7.495842933654785, "learning_rate": 8.478882880416182e-05, "loss": 0.9065, "step": 11862 }, { "epoch": 0.8037807439528424, "grad_norm": 5.692570686340332, "learning_rate": 8.4787459785064e-05, "loss": 0.964, "step": 11863 }, { "epoch": 0.8038484992208145, "grad_norm": 6.177666664123535, "learning_rate": 8.478609076596619e-05, "loss": 1.0165, "step": 11864 }, { "epoch": 0.8039162544887865, "grad_norm": 5.492598533630371, "learning_rate": 8.478472174686837e-05, "loss": 0.7103, "step": 11865 }, { "epoch": 0.8039840097567585, "grad_norm": 6.331650257110596, "learning_rate": 8.478335272777055e-05, "loss": 0.7544, "step": 11866 }, { "epoch": 0.8040517650247306, "grad_norm": 6.0005412101745605, "learning_rate": 8.478198370867275e-05, "loss": 1.0188, "step": 11867 }, { "epoch": 0.8041195202927027, "grad_norm": 5.383848190307617, "learning_rate": 8.478061468957493e-05, "loss": 0.6002, "step": 11868 }, { "epoch": 0.8041872755606748, "grad_norm": 5.411609172821045, "learning_rate": 8.477924567047711e-05, "loss": 0.7154, "step": 11869 }, { "epoch": 0.8042550308286469, "grad_norm": 6.1650614738464355, "learning_rate": 8.47778766513793e-05, "loss": 0.6797, "step": 11870 }, { "epoch": 0.804322786096619, "grad_norm": 5.903098106384277, "learning_rate": 8.477650763228148e-05, "loss": 0.8813, "step": 11871 }, { "epoch": 0.8043905413645911, "grad_norm": 5.579502105712891, "learning_rate": 8.477513861318366e-05, "loss": 0.8935, "step": 11872 }, { "epoch": 0.8044582966325632, "grad_norm": 5.614695072174072, "learning_rate": 8.477376959408584e-05, "loss": 0.7914, "step": 11873 }, { "epoch": 0.8045260519005353, "grad_norm": 8.45317268371582, "learning_rate": 8.477240057498802e-05, "loss": 0.8245, "step": 11874 }, { "epoch": 0.8045938071685074, "grad_norm": 4.028397560119629, "learning_rate": 8.477103155589022e-05, "loss": 0.7105, "step": 11875 }, { "epoch": 0.8046615624364795, "grad_norm": 6.691359519958496, "learning_rate": 8.47696625367924e-05, "loss": 0.8871, "step": 11876 }, { "epoch": 0.8047293177044516, "grad_norm": 5.918386936187744, "learning_rate": 8.476829351769458e-05, "loss": 0.9558, "step": 11877 }, { "epoch": 0.8047970729724236, "grad_norm": 5.369225978851318, "learning_rate": 8.476692449859676e-05, "loss": 0.9613, "step": 11878 }, { "epoch": 0.8048648282403957, "grad_norm": 6.17078161239624, "learning_rate": 8.476555547949895e-05, "loss": 0.969, "step": 11879 }, { "epoch": 0.8049325835083678, "grad_norm": 7.199222087860107, "learning_rate": 8.476418646040113e-05, "loss": 1.0042, "step": 11880 }, { "epoch": 0.8050003387763398, "grad_norm": 4.739121913909912, "learning_rate": 8.476281744130331e-05, "loss": 0.695, "step": 11881 }, { "epoch": 0.8050680940443119, "grad_norm": 5.670197486877441, "learning_rate": 8.476144842220549e-05, "loss": 0.7452, "step": 11882 }, { "epoch": 0.805135849312284, "grad_norm": 5.977322578430176, "learning_rate": 8.476007940310767e-05, "loss": 0.9352, "step": 11883 }, { "epoch": 0.8052036045802561, "grad_norm": 4.873468399047852, "learning_rate": 8.475871038400987e-05, "loss": 0.857, "step": 11884 }, { "epoch": 0.8052713598482282, "grad_norm": 6.405252933502197, "learning_rate": 8.475734136491205e-05, "loss": 0.8847, "step": 11885 }, { "epoch": 0.8053391151162003, "grad_norm": 7.104851722717285, "learning_rate": 8.475597234581423e-05, "loss": 0.9097, "step": 11886 }, { "epoch": 0.8054068703841724, "grad_norm": 7.579138278961182, "learning_rate": 8.475460332671641e-05, "loss": 1.1768, "step": 11887 }, { "epoch": 0.8054746256521444, "grad_norm": 7.450385093688965, "learning_rate": 8.47532343076186e-05, "loss": 1.1769, "step": 11888 }, { "epoch": 0.8055423809201165, "grad_norm": 5.401349067687988, "learning_rate": 8.475186528852078e-05, "loss": 0.7389, "step": 11889 }, { "epoch": 0.8056101361880886, "grad_norm": 7.890493869781494, "learning_rate": 8.475049626942296e-05, "loss": 0.8679, "step": 11890 }, { "epoch": 0.8056778914560607, "grad_norm": 6.528770446777344, "learning_rate": 8.474912725032514e-05, "loss": 0.637, "step": 11891 }, { "epoch": 0.8057456467240328, "grad_norm": 5.3305277824401855, "learning_rate": 8.474775823122732e-05, "loss": 0.7671, "step": 11892 }, { "epoch": 0.8058134019920049, "grad_norm": 6.40350866317749, "learning_rate": 8.474638921212952e-05, "loss": 1.055, "step": 11893 }, { "epoch": 0.805881157259977, "grad_norm": 7.686482906341553, "learning_rate": 8.47450201930317e-05, "loss": 0.7023, "step": 11894 }, { "epoch": 0.8059489125279491, "grad_norm": 5.42139196395874, "learning_rate": 8.474365117393388e-05, "loss": 0.7528, "step": 11895 }, { "epoch": 0.8060166677959212, "grad_norm": 5.809099197387695, "learning_rate": 8.474228215483606e-05, "loss": 0.8843, "step": 11896 }, { "epoch": 0.8060844230638932, "grad_norm": 7.206499099731445, "learning_rate": 8.474091313573824e-05, "loss": 0.9504, "step": 11897 }, { "epoch": 0.8061521783318653, "grad_norm": 9.313186645507812, "learning_rate": 8.473954411664043e-05, "loss": 0.665, "step": 11898 }, { "epoch": 0.8062199335998373, "grad_norm": 6.453405857086182, "learning_rate": 8.473817509754261e-05, "loss": 0.6757, "step": 11899 }, { "epoch": 0.8062876888678094, "grad_norm": 6.309181213378906, "learning_rate": 8.473680607844479e-05, "loss": 0.8481, "step": 11900 }, { "epoch": 0.8063554441357815, "grad_norm": 7.917797088623047, "learning_rate": 8.473543705934697e-05, "loss": 0.7202, "step": 11901 }, { "epoch": 0.8064231994037536, "grad_norm": 5.929610252380371, "learning_rate": 8.473406804024917e-05, "loss": 0.8476, "step": 11902 }, { "epoch": 0.8064909546717257, "grad_norm": 7.981934070587158, "learning_rate": 8.473269902115135e-05, "loss": 0.6972, "step": 11903 }, { "epoch": 0.8065587099396978, "grad_norm": 6.3853440284729, "learning_rate": 8.473133000205353e-05, "loss": 0.881, "step": 11904 }, { "epoch": 0.8066264652076699, "grad_norm": 6.253373622894287, "learning_rate": 8.472996098295571e-05, "loss": 0.8463, "step": 11905 }, { "epoch": 0.806694220475642, "grad_norm": 5.279558181762695, "learning_rate": 8.472859196385789e-05, "loss": 0.8723, "step": 11906 }, { "epoch": 0.8067619757436141, "grad_norm": 7.0585126876831055, "learning_rate": 8.472722294476008e-05, "loss": 0.8384, "step": 11907 }, { "epoch": 0.8068297310115862, "grad_norm": 9.070088386535645, "learning_rate": 8.472585392566226e-05, "loss": 0.6578, "step": 11908 }, { "epoch": 0.8068974862795583, "grad_norm": 7.324275970458984, "learning_rate": 8.472448490656444e-05, "loss": 0.7454, "step": 11909 }, { "epoch": 0.8069652415475304, "grad_norm": 5.854486465454102, "learning_rate": 8.472311588746664e-05, "loss": 0.9014, "step": 11910 }, { "epoch": 0.8070329968155024, "grad_norm": 7.762482166290283, "learning_rate": 8.472174686836882e-05, "loss": 0.8265, "step": 11911 }, { "epoch": 0.8071007520834745, "grad_norm": 7.545839786529541, "learning_rate": 8.4720377849271e-05, "loss": 0.7831, "step": 11912 }, { "epoch": 0.8071685073514466, "grad_norm": 4.952934741973877, "learning_rate": 8.471900883017319e-05, "loss": 0.6514, "step": 11913 }, { "epoch": 0.8072362626194186, "grad_norm": 5.232631206512451, "learning_rate": 8.471763981107537e-05, "loss": 0.6642, "step": 11914 }, { "epoch": 0.8073040178873907, "grad_norm": 6.2648844718933105, "learning_rate": 8.471627079197755e-05, "loss": 0.9277, "step": 11915 }, { "epoch": 0.8073717731553628, "grad_norm": 6.143033027648926, "learning_rate": 8.471490177287975e-05, "loss": 0.6482, "step": 11916 }, { "epoch": 0.8074395284233349, "grad_norm": 7.901699066162109, "learning_rate": 8.471353275378193e-05, "loss": 0.739, "step": 11917 }, { "epoch": 0.807507283691307, "grad_norm": 7.756877899169922, "learning_rate": 8.471216373468411e-05, "loss": 0.8307, "step": 11918 }, { "epoch": 0.8075750389592791, "grad_norm": 8.90708065032959, "learning_rate": 8.471079471558629e-05, "loss": 1.0344, "step": 11919 }, { "epoch": 0.8076427942272512, "grad_norm": 6.328546524047852, "learning_rate": 8.470942569648847e-05, "loss": 0.8514, "step": 11920 }, { "epoch": 0.8077105494952233, "grad_norm": 7.169960975646973, "learning_rate": 8.470805667739066e-05, "loss": 0.8809, "step": 11921 }, { "epoch": 0.8077783047631953, "grad_norm": 6.628053188323975, "learning_rate": 8.470668765829284e-05, "loss": 0.9416, "step": 11922 }, { "epoch": 0.8078460600311674, "grad_norm": 7.435145378112793, "learning_rate": 8.470531863919502e-05, "loss": 0.6911, "step": 11923 }, { "epoch": 0.8079138152991395, "grad_norm": 5.010408878326416, "learning_rate": 8.47039496200972e-05, "loss": 0.7926, "step": 11924 }, { "epoch": 0.8079815705671116, "grad_norm": 5.625992298126221, "learning_rate": 8.47025806009994e-05, "loss": 0.8863, "step": 11925 }, { "epoch": 0.8080493258350837, "grad_norm": 8.575138092041016, "learning_rate": 8.470121158190158e-05, "loss": 0.7859, "step": 11926 }, { "epoch": 0.8081170811030558, "grad_norm": 5.657655239105225, "learning_rate": 8.469984256280376e-05, "loss": 0.7417, "step": 11927 }, { "epoch": 0.8081848363710279, "grad_norm": 6.131776809692383, "learning_rate": 8.469847354370594e-05, "loss": 0.8232, "step": 11928 }, { "epoch": 0.808252591639, "grad_norm": 7.183122158050537, "learning_rate": 8.469710452460812e-05, "loss": 0.7562, "step": 11929 }, { "epoch": 0.808320346906972, "grad_norm": 6.112959384918213, "learning_rate": 8.469573550551031e-05, "loss": 0.8744, "step": 11930 }, { "epoch": 0.8083881021749441, "grad_norm": 6.5298590660095215, "learning_rate": 8.469436648641249e-05, "loss": 0.8624, "step": 11931 }, { "epoch": 0.8084558574429161, "grad_norm": 7.30040979385376, "learning_rate": 8.469299746731467e-05, "loss": 0.8141, "step": 11932 }, { "epoch": 0.8085236127108882, "grad_norm": 5.7774977684021, "learning_rate": 8.469162844821685e-05, "loss": 0.7541, "step": 11933 }, { "epoch": 0.8085913679788603, "grad_norm": 6.179437637329102, "learning_rate": 8.469025942911905e-05, "loss": 0.7737, "step": 11934 }, { "epoch": 0.8086591232468324, "grad_norm": 5.715566635131836, "learning_rate": 8.468889041002123e-05, "loss": 0.7055, "step": 11935 }, { "epoch": 0.8087268785148045, "grad_norm": 5.082167148590088, "learning_rate": 8.468752139092341e-05, "loss": 0.7135, "step": 11936 }, { "epoch": 0.8087946337827766, "grad_norm": 5.4100117683410645, "learning_rate": 8.468615237182559e-05, "loss": 0.6587, "step": 11937 }, { "epoch": 0.8088623890507487, "grad_norm": 5.654635429382324, "learning_rate": 8.468478335272777e-05, "loss": 0.8371, "step": 11938 }, { "epoch": 0.8089301443187208, "grad_norm": 13.093804359436035, "learning_rate": 8.468341433362996e-05, "loss": 0.7622, "step": 11939 }, { "epoch": 0.8089978995866929, "grad_norm": 6.519128322601318, "learning_rate": 8.468204531453214e-05, "loss": 0.5443, "step": 11940 }, { "epoch": 0.809065654854665, "grad_norm": 6.814082622528076, "learning_rate": 8.468067629543432e-05, "loss": 0.7527, "step": 11941 }, { "epoch": 0.8091334101226371, "grad_norm": 5.064624786376953, "learning_rate": 8.46793072763365e-05, "loss": 0.7392, "step": 11942 }, { "epoch": 0.8092011653906092, "grad_norm": 6.819398403167725, "learning_rate": 8.467793825723868e-05, "loss": 0.8395, "step": 11943 }, { "epoch": 0.8092689206585812, "grad_norm": 5.4765520095825195, "learning_rate": 8.467656923814088e-05, "loss": 0.6145, "step": 11944 }, { "epoch": 0.8093366759265533, "grad_norm": 5.149988651275635, "learning_rate": 8.467520021904306e-05, "loss": 0.6676, "step": 11945 }, { "epoch": 0.8094044311945253, "grad_norm": 5.497957229614258, "learning_rate": 8.467383119994524e-05, "loss": 0.8605, "step": 11946 }, { "epoch": 0.8094721864624974, "grad_norm": 6.20892333984375, "learning_rate": 8.467246218084742e-05, "loss": 0.7578, "step": 11947 }, { "epoch": 0.8095399417304695, "grad_norm": 5.368823528289795, "learning_rate": 8.467109316174961e-05, "loss": 0.6105, "step": 11948 }, { "epoch": 0.8096076969984416, "grad_norm": 5.05849552154541, "learning_rate": 8.466972414265179e-05, "loss": 0.6165, "step": 11949 }, { "epoch": 0.8096754522664137, "grad_norm": 6.1569366455078125, "learning_rate": 8.466835512355397e-05, "loss": 0.6865, "step": 11950 }, { "epoch": 0.8097432075343858, "grad_norm": 4.664434432983398, "learning_rate": 8.466698610445615e-05, "loss": 0.7348, "step": 11951 }, { "epoch": 0.8098109628023579, "grad_norm": 5.691340446472168, "learning_rate": 8.466561708535833e-05, "loss": 0.625, "step": 11952 }, { "epoch": 0.80987871807033, "grad_norm": 5.343607425689697, "learning_rate": 8.466424806626053e-05, "loss": 0.5954, "step": 11953 }, { "epoch": 0.809946473338302, "grad_norm": 7.289133548736572, "learning_rate": 8.466287904716271e-05, "loss": 0.8564, "step": 11954 }, { "epoch": 0.8100142286062741, "grad_norm": 5.425955772399902, "learning_rate": 8.466151002806489e-05, "loss": 0.6588, "step": 11955 }, { "epoch": 0.8100819838742462, "grad_norm": 6.205384731292725, "learning_rate": 8.466014100896708e-05, "loss": 0.8809, "step": 11956 }, { "epoch": 0.8101497391422183, "grad_norm": 7.989570140838623, "learning_rate": 8.465877198986926e-05, "loss": 0.6923, "step": 11957 }, { "epoch": 0.8102174944101904, "grad_norm": 5.3845343589782715, "learning_rate": 8.465740297077144e-05, "loss": 0.645, "step": 11958 }, { "epoch": 0.8102852496781625, "grad_norm": 5.856838703155518, "learning_rate": 8.465603395167364e-05, "loss": 0.9173, "step": 11959 }, { "epoch": 0.8103530049461346, "grad_norm": 7.78700590133667, "learning_rate": 8.465466493257582e-05, "loss": 0.7514, "step": 11960 }, { "epoch": 0.8104207602141067, "grad_norm": 6.29768705368042, "learning_rate": 8.4653295913478e-05, "loss": 0.862, "step": 11961 }, { "epoch": 0.8104885154820788, "grad_norm": 5.1076579093933105, "learning_rate": 8.465192689438019e-05, "loss": 0.6395, "step": 11962 }, { "epoch": 0.8105562707500508, "grad_norm": 7.518921375274658, "learning_rate": 8.465055787528237e-05, "loss": 0.9251, "step": 11963 }, { "epoch": 0.8106240260180229, "grad_norm": 5.740368843078613, "learning_rate": 8.464918885618455e-05, "loss": 0.8354, "step": 11964 }, { "epoch": 0.810691781285995, "grad_norm": 7.306612491607666, "learning_rate": 8.464781983708673e-05, "loss": 0.7414, "step": 11965 }, { "epoch": 0.810759536553967, "grad_norm": 5.480811595916748, "learning_rate": 8.464645081798893e-05, "loss": 0.7021, "step": 11966 }, { "epoch": 0.8108272918219391, "grad_norm": 6.626734733581543, "learning_rate": 8.46450817988911e-05, "loss": 0.7866, "step": 11967 }, { "epoch": 0.8108950470899112, "grad_norm": 5.154781818389893, "learning_rate": 8.464371277979329e-05, "loss": 0.653, "step": 11968 }, { "epoch": 0.8109628023578833, "grad_norm": 6.271585464477539, "learning_rate": 8.464234376069547e-05, "loss": 0.9318, "step": 11969 }, { "epoch": 0.8110305576258554, "grad_norm": 6.008039951324463, "learning_rate": 8.464097474159765e-05, "loss": 0.6224, "step": 11970 }, { "epoch": 0.8110983128938275, "grad_norm": 6.575869560241699, "learning_rate": 8.463960572249984e-05, "loss": 0.685, "step": 11971 }, { "epoch": 0.8111660681617996, "grad_norm": 6.166112899780273, "learning_rate": 8.463823670340202e-05, "loss": 0.6738, "step": 11972 }, { "epoch": 0.8112338234297717, "grad_norm": 6.191090106964111, "learning_rate": 8.46368676843042e-05, "loss": 0.9672, "step": 11973 }, { "epoch": 0.8113015786977438, "grad_norm": 5.888790130615234, "learning_rate": 8.463549866520638e-05, "loss": 0.5671, "step": 11974 }, { "epoch": 0.8113693339657159, "grad_norm": 6.156980514526367, "learning_rate": 8.463412964610856e-05, "loss": 0.6792, "step": 11975 }, { "epoch": 0.811437089233688, "grad_norm": 6.996181011199951, "learning_rate": 8.463276062701076e-05, "loss": 0.9247, "step": 11976 }, { "epoch": 0.81150484450166, "grad_norm": 7.532526016235352, "learning_rate": 8.463139160791294e-05, "loss": 0.7807, "step": 11977 }, { "epoch": 0.8115725997696321, "grad_norm": 5.303388595581055, "learning_rate": 8.463002258881512e-05, "loss": 0.6836, "step": 11978 }, { "epoch": 0.8116403550376041, "grad_norm": 5.388332843780518, "learning_rate": 8.46286535697173e-05, "loss": 0.8513, "step": 11979 }, { "epoch": 0.8117081103055762, "grad_norm": 5.5608601570129395, "learning_rate": 8.462728455061949e-05, "loss": 0.6047, "step": 11980 }, { "epoch": 0.8117758655735483, "grad_norm": 6.766813278198242, "learning_rate": 8.462591553152167e-05, "loss": 0.8592, "step": 11981 }, { "epoch": 0.8118436208415204, "grad_norm": 6.640246868133545, "learning_rate": 8.462454651242385e-05, "loss": 0.6319, "step": 11982 }, { "epoch": 0.8119113761094925, "grad_norm": 6.7665815353393555, "learning_rate": 8.462317749332603e-05, "loss": 0.8778, "step": 11983 }, { "epoch": 0.8119791313774646, "grad_norm": 5.935091018676758, "learning_rate": 8.462180847422821e-05, "loss": 0.6987, "step": 11984 }, { "epoch": 0.8120468866454367, "grad_norm": 4.797418594360352, "learning_rate": 8.46204394551304e-05, "loss": 0.6862, "step": 11985 }, { "epoch": 0.8121146419134088, "grad_norm": 5.728845119476318, "learning_rate": 8.461907043603259e-05, "loss": 0.8859, "step": 11986 }, { "epoch": 0.8121823971813809, "grad_norm": 6.456442356109619, "learning_rate": 8.461770141693477e-05, "loss": 0.8329, "step": 11987 }, { "epoch": 0.812250152449353, "grad_norm": 6.974035739898682, "learning_rate": 8.461633239783695e-05, "loss": 0.8881, "step": 11988 }, { "epoch": 0.812317907717325, "grad_norm": 6.6539788246154785, "learning_rate": 8.461496337873914e-05, "loss": 1.1583, "step": 11989 }, { "epoch": 0.8123856629852971, "grad_norm": 4.992013931274414, "learning_rate": 8.461359435964132e-05, "loss": 0.6866, "step": 11990 }, { "epoch": 0.8124534182532692, "grad_norm": 6.416220664978027, "learning_rate": 8.46122253405435e-05, "loss": 1.1336, "step": 11991 }, { "epoch": 0.8125211735212413, "grad_norm": 6.000530242919922, "learning_rate": 8.461085632144568e-05, "loss": 0.8182, "step": 11992 }, { "epoch": 0.8125889287892134, "grad_norm": 6.560791492462158, "learning_rate": 8.460948730234786e-05, "loss": 0.5594, "step": 11993 }, { "epoch": 0.8126566840571855, "grad_norm": 5.342809200286865, "learning_rate": 8.460811828325006e-05, "loss": 0.9214, "step": 11994 }, { "epoch": 0.8127244393251575, "grad_norm": 6.472506046295166, "learning_rate": 8.460674926415224e-05, "loss": 0.9376, "step": 11995 }, { "epoch": 0.8127921945931296, "grad_norm": 10.083342552185059, "learning_rate": 8.460538024505442e-05, "loss": 0.8582, "step": 11996 }, { "epoch": 0.8128599498611017, "grad_norm": 6.755568504333496, "learning_rate": 8.46040112259566e-05, "loss": 1.2787, "step": 11997 }, { "epoch": 0.8129277051290738, "grad_norm": 5.924015998840332, "learning_rate": 8.460264220685878e-05, "loss": 0.7204, "step": 11998 }, { "epoch": 0.8129954603970458, "grad_norm": 7.797226428985596, "learning_rate": 8.460127318776097e-05, "loss": 1.0529, "step": 11999 }, { "epoch": 0.8130632156650179, "grad_norm": 6.317507743835449, "learning_rate": 8.459990416866315e-05, "loss": 0.4736, "step": 12000 }, { "epoch": 0.81313097093299, "grad_norm": 6.195952415466309, "learning_rate": 8.459853514956533e-05, "loss": 0.8751, "step": 12001 }, { "epoch": 0.8131987262009621, "grad_norm": 8.634666442871094, "learning_rate": 8.459716613046753e-05, "loss": 0.8552, "step": 12002 }, { "epoch": 0.8132664814689342, "grad_norm": 6.352993488311768, "learning_rate": 8.459579711136971e-05, "loss": 0.9277, "step": 12003 }, { "epoch": 0.8133342367369063, "grad_norm": 6.608835697174072, "learning_rate": 8.459442809227189e-05, "loss": 0.9077, "step": 12004 }, { "epoch": 0.8134019920048784, "grad_norm": 5.515098571777344, "learning_rate": 8.459305907317408e-05, "loss": 0.7215, "step": 12005 }, { "epoch": 0.8134697472728505, "grad_norm": 5.592660427093506, "learning_rate": 8.459169005407626e-05, "loss": 0.6318, "step": 12006 }, { "epoch": 0.8135375025408226, "grad_norm": 6.810677528381348, "learning_rate": 8.459032103497844e-05, "loss": 0.8533, "step": 12007 }, { "epoch": 0.8136052578087947, "grad_norm": 6.685205936431885, "learning_rate": 8.458895201588064e-05, "loss": 0.8084, "step": 12008 }, { "epoch": 0.8136730130767668, "grad_norm": 5.3733062744140625, "learning_rate": 8.458758299678282e-05, "loss": 0.8156, "step": 12009 }, { "epoch": 0.8137407683447389, "grad_norm": 4.988927841186523, "learning_rate": 8.4586213977685e-05, "loss": 0.777, "step": 12010 }, { "epoch": 0.8138085236127109, "grad_norm": 7.371654510498047, "learning_rate": 8.458484495858718e-05, "loss": 0.7513, "step": 12011 }, { "epoch": 0.8138762788806829, "grad_norm": 6.63214111328125, "learning_rate": 8.458347593948937e-05, "loss": 0.7991, "step": 12012 }, { "epoch": 0.813944034148655, "grad_norm": 6.847991466522217, "learning_rate": 8.458210692039155e-05, "loss": 0.8741, "step": 12013 }, { "epoch": 0.8140117894166271, "grad_norm": 6.964975357055664, "learning_rate": 8.458073790129373e-05, "loss": 0.8665, "step": 12014 }, { "epoch": 0.8140795446845992, "grad_norm": 6.188068866729736, "learning_rate": 8.457936888219591e-05, "loss": 0.8174, "step": 12015 }, { "epoch": 0.8141472999525713, "grad_norm": 8.014631271362305, "learning_rate": 8.457799986309809e-05, "loss": 0.8603, "step": 12016 }, { "epoch": 0.8142150552205434, "grad_norm": 4.80557107925415, "learning_rate": 8.457663084400029e-05, "loss": 0.7971, "step": 12017 }, { "epoch": 0.8142828104885155, "grad_norm": 4.926700115203857, "learning_rate": 8.457526182490247e-05, "loss": 0.6986, "step": 12018 }, { "epoch": 0.8143505657564876, "grad_norm": 7.593190670013428, "learning_rate": 8.457389280580465e-05, "loss": 0.8656, "step": 12019 }, { "epoch": 0.8144183210244597, "grad_norm": 5.325191497802734, "learning_rate": 8.457252378670683e-05, "loss": 0.7471, "step": 12020 }, { "epoch": 0.8144860762924317, "grad_norm": 6.783299446105957, "learning_rate": 8.457115476760902e-05, "loss": 0.9003, "step": 12021 }, { "epoch": 0.8145538315604038, "grad_norm": 5.972321033477783, "learning_rate": 8.45697857485112e-05, "loss": 0.9259, "step": 12022 }, { "epoch": 0.8146215868283759, "grad_norm": 4.9444708824157715, "learning_rate": 8.456841672941338e-05, "loss": 0.6313, "step": 12023 }, { "epoch": 0.814689342096348, "grad_norm": 6.034842014312744, "learning_rate": 8.456704771031556e-05, "loss": 0.736, "step": 12024 }, { "epoch": 0.8147570973643201, "grad_norm": 7.463682651519775, "learning_rate": 8.456567869121774e-05, "loss": 0.8651, "step": 12025 }, { "epoch": 0.8148248526322922, "grad_norm": 6.878032207489014, "learning_rate": 8.456430967211994e-05, "loss": 0.9458, "step": 12026 }, { "epoch": 0.8148926079002643, "grad_norm": 6.859936237335205, "learning_rate": 8.456294065302212e-05, "loss": 0.9125, "step": 12027 }, { "epoch": 0.8149603631682363, "grad_norm": 6.0320329666137695, "learning_rate": 8.45615716339243e-05, "loss": 0.6817, "step": 12028 }, { "epoch": 0.8150281184362084, "grad_norm": 6.321547031402588, "learning_rate": 8.456020261482648e-05, "loss": 0.7538, "step": 12029 }, { "epoch": 0.8150958737041805, "grad_norm": 6.318676471710205, "learning_rate": 8.455883359572866e-05, "loss": 0.9262, "step": 12030 }, { "epoch": 0.8151636289721526, "grad_norm": 5.807433605194092, "learning_rate": 8.455746457663085e-05, "loss": 0.7597, "step": 12031 }, { "epoch": 0.8152313842401246, "grad_norm": 6.104518413543701, "learning_rate": 8.455609555753303e-05, "loss": 0.7186, "step": 12032 }, { "epoch": 0.8152991395080967, "grad_norm": 6.957059860229492, "learning_rate": 8.455472653843521e-05, "loss": 0.7533, "step": 12033 }, { "epoch": 0.8153668947760688, "grad_norm": 6.928465366363525, "learning_rate": 8.455335751933739e-05, "loss": 0.578, "step": 12034 }, { "epoch": 0.8154346500440409, "grad_norm": 6.699448108673096, "learning_rate": 8.455198850023959e-05, "loss": 0.7888, "step": 12035 }, { "epoch": 0.815502405312013, "grad_norm": 7.328460693359375, "learning_rate": 8.455061948114177e-05, "loss": 0.72, "step": 12036 }, { "epoch": 0.8155701605799851, "grad_norm": 7.082894802093506, "learning_rate": 8.454925046204395e-05, "loss": 0.9115, "step": 12037 }, { "epoch": 0.8156379158479572, "grad_norm": 5.156605243682861, "learning_rate": 8.454788144294613e-05, "loss": 0.748, "step": 12038 }, { "epoch": 0.8157056711159293, "grad_norm": 6.401536464691162, "learning_rate": 8.454651242384831e-05, "loss": 0.8282, "step": 12039 }, { "epoch": 0.8157734263839014, "grad_norm": 7.056277275085449, "learning_rate": 8.45451434047505e-05, "loss": 0.9653, "step": 12040 }, { "epoch": 0.8158411816518735, "grad_norm": 5.628291130065918, "learning_rate": 8.454377438565268e-05, "loss": 0.8733, "step": 12041 }, { "epoch": 0.8159089369198456, "grad_norm": 5.863224506378174, "learning_rate": 8.454240536655486e-05, "loss": 0.8301, "step": 12042 }, { "epoch": 0.8159766921878177, "grad_norm": 7.33843994140625, "learning_rate": 8.454103634745704e-05, "loss": 0.6597, "step": 12043 }, { "epoch": 0.8160444474557896, "grad_norm": 5.626278400421143, "learning_rate": 8.453966732835924e-05, "loss": 0.7648, "step": 12044 }, { "epoch": 0.8161122027237617, "grad_norm": 5.470703125, "learning_rate": 8.453829830926142e-05, "loss": 0.6636, "step": 12045 }, { "epoch": 0.8161799579917338, "grad_norm": 4.597399711608887, "learning_rate": 8.45369292901636e-05, "loss": 0.7107, "step": 12046 }, { "epoch": 0.8162477132597059, "grad_norm": 6.172791957855225, "learning_rate": 8.453556027106578e-05, "loss": 0.7516, "step": 12047 }, { "epoch": 0.816315468527678, "grad_norm": 6.869264125823975, "learning_rate": 8.453419125196796e-05, "loss": 0.7626, "step": 12048 }, { "epoch": 0.8163832237956501, "grad_norm": 6.974149703979492, "learning_rate": 8.453282223287015e-05, "loss": 0.8517, "step": 12049 }, { "epoch": 0.8164509790636222, "grad_norm": 6.083059787750244, "learning_rate": 8.453145321377233e-05, "loss": 0.7275, "step": 12050 }, { "epoch": 0.8165187343315943, "grad_norm": 4.947962284088135, "learning_rate": 8.453008419467451e-05, "loss": 0.663, "step": 12051 }, { "epoch": 0.8165864895995664, "grad_norm": 5.22273588180542, "learning_rate": 8.45287151755767e-05, "loss": 0.6265, "step": 12052 }, { "epoch": 0.8166542448675385, "grad_norm": 7.42067289352417, "learning_rate": 8.452734615647889e-05, "loss": 1.1131, "step": 12053 }, { "epoch": 0.8167220001355106, "grad_norm": 6.975278854370117, "learning_rate": 8.452597713738107e-05, "loss": 0.8143, "step": 12054 }, { "epoch": 0.8167897554034826, "grad_norm": 5.899443626403809, "learning_rate": 8.452460811828326e-05, "loss": 0.7033, "step": 12055 }, { "epoch": 0.8168575106714547, "grad_norm": 5.515699863433838, "learning_rate": 8.452323909918544e-05, "loss": 0.676, "step": 12056 }, { "epoch": 0.8169252659394268, "grad_norm": 5.615140438079834, "learning_rate": 8.452187008008762e-05, "loss": 0.6282, "step": 12057 }, { "epoch": 0.8169930212073989, "grad_norm": 5.279138088226318, "learning_rate": 8.452050106098982e-05, "loss": 0.7139, "step": 12058 }, { "epoch": 0.817060776475371, "grad_norm": 4.64931583404541, "learning_rate": 8.4519132041892e-05, "loss": 0.7691, "step": 12059 }, { "epoch": 0.817128531743343, "grad_norm": 5.363344192504883, "learning_rate": 8.451776302279418e-05, "loss": 0.8622, "step": 12060 }, { "epoch": 0.8171962870113151, "grad_norm": 5.616733551025391, "learning_rate": 8.451639400369636e-05, "loss": 0.729, "step": 12061 }, { "epoch": 0.8172640422792872, "grad_norm": 6.5094451904296875, "learning_rate": 8.451502498459854e-05, "loss": 0.7148, "step": 12062 }, { "epoch": 0.8173317975472593, "grad_norm": 6.721555233001709, "learning_rate": 8.451365596550073e-05, "loss": 0.8657, "step": 12063 }, { "epoch": 0.8173995528152314, "grad_norm": 6.426924705505371, "learning_rate": 8.451228694640291e-05, "loss": 0.7729, "step": 12064 }, { "epoch": 0.8174673080832034, "grad_norm": 5.894415378570557, "learning_rate": 8.451091792730509e-05, "loss": 0.8842, "step": 12065 }, { "epoch": 0.8175350633511755, "grad_norm": 9.44097900390625, "learning_rate": 8.450954890820727e-05, "loss": 0.7221, "step": 12066 }, { "epoch": 0.8176028186191476, "grad_norm": 9.030364990234375, "learning_rate": 8.450817988910947e-05, "loss": 0.9898, "step": 12067 }, { "epoch": 0.8176705738871197, "grad_norm": 5.958207607269287, "learning_rate": 8.450681087001165e-05, "loss": 0.9868, "step": 12068 }, { "epoch": 0.8177383291550918, "grad_norm": 5.822267532348633, "learning_rate": 8.450544185091383e-05, "loss": 0.7575, "step": 12069 }, { "epoch": 0.8178060844230639, "grad_norm": 6.314889430999756, "learning_rate": 8.4504072831816e-05, "loss": 0.8706, "step": 12070 }, { "epoch": 0.817873839691036, "grad_norm": 9.256656646728516, "learning_rate": 8.450270381271819e-05, "loss": 0.7976, "step": 12071 }, { "epoch": 0.8179415949590081, "grad_norm": 5.871959209442139, "learning_rate": 8.450133479362038e-05, "loss": 0.6814, "step": 12072 }, { "epoch": 0.8180093502269802, "grad_norm": 9.304550170898438, "learning_rate": 8.449996577452256e-05, "loss": 0.6588, "step": 12073 }, { "epoch": 0.8180771054949523, "grad_norm": 5.510218620300293, "learning_rate": 8.449859675542474e-05, "loss": 0.5854, "step": 12074 }, { "epoch": 0.8181448607629244, "grad_norm": 4.799395561218262, "learning_rate": 8.449722773632692e-05, "loss": 0.6387, "step": 12075 }, { "epoch": 0.8182126160308965, "grad_norm": 7.109929084777832, "learning_rate": 8.44958587172291e-05, "loss": 1.0768, "step": 12076 }, { "epoch": 0.8182803712988684, "grad_norm": 5.443954944610596, "learning_rate": 8.44944896981313e-05, "loss": 0.7782, "step": 12077 }, { "epoch": 0.8183481265668405, "grad_norm": 5.463802814483643, "learning_rate": 8.449312067903348e-05, "loss": 0.8792, "step": 12078 }, { "epoch": 0.8184158818348126, "grad_norm": 6.221611022949219, "learning_rate": 8.449175165993566e-05, "loss": 0.786, "step": 12079 }, { "epoch": 0.8184836371027847, "grad_norm": 5.399687767028809, "learning_rate": 8.449038264083784e-05, "loss": 0.7112, "step": 12080 }, { "epoch": 0.8185513923707568, "grad_norm": 6.230489730834961, "learning_rate": 8.448901362174003e-05, "loss": 0.6785, "step": 12081 }, { "epoch": 0.8186191476387289, "grad_norm": 6.096298694610596, "learning_rate": 8.448764460264221e-05, "loss": 0.641, "step": 12082 }, { "epoch": 0.818686902906701, "grad_norm": 5.790489673614502, "learning_rate": 8.448627558354439e-05, "loss": 0.8582, "step": 12083 }, { "epoch": 0.8187546581746731, "grad_norm": 5.866037845611572, "learning_rate": 8.448490656444657e-05, "loss": 0.8527, "step": 12084 }, { "epoch": 0.8188224134426452, "grad_norm": 6.1855854988098145, "learning_rate": 8.448353754534875e-05, "loss": 0.8359, "step": 12085 }, { "epoch": 0.8188901687106173, "grad_norm": 6.506312370300293, "learning_rate": 8.448216852625095e-05, "loss": 0.6803, "step": 12086 }, { "epoch": 0.8189579239785894, "grad_norm": 8.091931343078613, "learning_rate": 8.448079950715313e-05, "loss": 1.0264, "step": 12087 }, { "epoch": 0.8190256792465614, "grad_norm": 6.240049362182617, "learning_rate": 8.447943048805531e-05, "loss": 0.7127, "step": 12088 }, { "epoch": 0.8190934345145335, "grad_norm": 6.7533650398254395, "learning_rate": 8.447806146895749e-05, "loss": 1.0634, "step": 12089 }, { "epoch": 0.8191611897825056, "grad_norm": 5.064426422119141, "learning_rate": 8.447669244985968e-05, "loss": 0.6213, "step": 12090 }, { "epoch": 0.8192289450504777, "grad_norm": 5.491628646850586, "learning_rate": 8.447532343076186e-05, "loss": 0.5707, "step": 12091 }, { "epoch": 0.8192967003184498, "grad_norm": 7.105623722076416, "learning_rate": 8.447395441166404e-05, "loss": 0.9926, "step": 12092 }, { "epoch": 0.8193644555864218, "grad_norm": 7.82690954208374, "learning_rate": 8.447258539256622e-05, "loss": 0.7902, "step": 12093 }, { "epoch": 0.8194322108543939, "grad_norm": 5.6463494300842285, "learning_rate": 8.44712163734684e-05, "loss": 0.8812, "step": 12094 }, { "epoch": 0.819499966122366, "grad_norm": 7.104325771331787, "learning_rate": 8.44698473543706e-05, "loss": 0.6544, "step": 12095 }, { "epoch": 0.8195677213903381, "grad_norm": 5.303103923797607, "learning_rate": 8.446847833527278e-05, "loss": 0.8304, "step": 12096 }, { "epoch": 0.8196354766583102, "grad_norm": 6.566699504852295, "learning_rate": 8.446710931617496e-05, "loss": 0.8582, "step": 12097 }, { "epoch": 0.8197032319262822, "grad_norm": 7.369137287139893, "learning_rate": 8.446574029707715e-05, "loss": 0.7852, "step": 12098 }, { "epoch": 0.8197709871942543, "grad_norm": 6.183825492858887, "learning_rate": 8.446437127797933e-05, "loss": 0.8196, "step": 12099 }, { "epoch": 0.8198387424622264, "grad_norm": 4.837382793426514, "learning_rate": 8.446300225888151e-05, "loss": 0.6597, "step": 12100 }, { "epoch": 0.8199064977301985, "grad_norm": 6.405309200286865, "learning_rate": 8.44616332397837e-05, "loss": 0.7934, "step": 12101 }, { "epoch": 0.8199742529981706, "grad_norm": 6.547097682952881, "learning_rate": 8.446026422068589e-05, "loss": 0.8375, "step": 12102 }, { "epoch": 0.8200420082661427, "grad_norm": 6.001138687133789, "learning_rate": 8.445889520158807e-05, "loss": 0.7713, "step": 12103 }, { "epoch": 0.8201097635341148, "grad_norm": 5.217280864715576, "learning_rate": 8.445752618249026e-05, "loss": 0.605, "step": 12104 }, { "epoch": 0.8201775188020869, "grad_norm": 5.498340129852295, "learning_rate": 8.445615716339244e-05, "loss": 0.7789, "step": 12105 }, { "epoch": 0.820245274070059, "grad_norm": 9.02701187133789, "learning_rate": 8.445478814429462e-05, "loss": 0.8801, "step": 12106 }, { "epoch": 0.8203130293380311, "grad_norm": 7.654047966003418, "learning_rate": 8.44534191251968e-05, "loss": 0.7072, "step": 12107 }, { "epoch": 0.8203807846060032, "grad_norm": 5.503271102905273, "learning_rate": 8.445205010609898e-05, "loss": 0.7304, "step": 12108 }, { "epoch": 0.8204485398739751, "grad_norm": 5.020559310913086, "learning_rate": 8.445068108700118e-05, "loss": 0.5477, "step": 12109 }, { "epoch": 0.8205162951419472, "grad_norm": 6.803164958953857, "learning_rate": 8.444931206790336e-05, "loss": 0.922, "step": 12110 }, { "epoch": 0.8205840504099193, "grad_norm": 5.567500114440918, "learning_rate": 8.444794304880554e-05, "loss": 0.5649, "step": 12111 }, { "epoch": 0.8206518056778914, "grad_norm": 6.515629291534424, "learning_rate": 8.444657402970772e-05, "loss": 0.5943, "step": 12112 }, { "epoch": 0.8207195609458635, "grad_norm": 6.220151424407959, "learning_rate": 8.444520501060991e-05, "loss": 0.7958, "step": 12113 }, { "epoch": 0.8207873162138356, "grad_norm": 6.00366735458374, "learning_rate": 8.444383599151209e-05, "loss": 0.6698, "step": 12114 }, { "epoch": 0.8208550714818077, "grad_norm": 7.6385955810546875, "learning_rate": 8.444246697241427e-05, "loss": 0.7866, "step": 12115 }, { "epoch": 0.8209228267497798, "grad_norm": 4.94298791885376, "learning_rate": 8.444109795331645e-05, "loss": 0.6862, "step": 12116 }, { "epoch": 0.8209905820177519, "grad_norm": 4.838351726531982, "learning_rate": 8.443972893421863e-05, "loss": 0.6723, "step": 12117 }, { "epoch": 0.821058337285724, "grad_norm": 5.836402893066406, "learning_rate": 8.443835991512083e-05, "loss": 0.6631, "step": 12118 }, { "epoch": 0.8211260925536961, "grad_norm": 6.094921588897705, "learning_rate": 8.4436990896023e-05, "loss": 0.6569, "step": 12119 }, { "epoch": 0.8211938478216682, "grad_norm": 6.998372554779053, "learning_rate": 8.443562187692519e-05, "loss": 0.8489, "step": 12120 }, { "epoch": 0.8212616030896402, "grad_norm": 8.239564895629883, "learning_rate": 8.443425285782737e-05, "loss": 0.9627, "step": 12121 }, { "epoch": 0.8213293583576123, "grad_norm": 5.836661338806152, "learning_rate": 8.443288383872956e-05, "loss": 0.6482, "step": 12122 }, { "epoch": 0.8213971136255844, "grad_norm": 5.142320156097412, "learning_rate": 8.443151481963174e-05, "loss": 0.5763, "step": 12123 }, { "epoch": 0.8214648688935565, "grad_norm": 5.773630619049072, "learning_rate": 8.443014580053392e-05, "loss": 0.7495, "step": 12124 }, { "epoch": 0.8215326241615286, "grad_norm": 6.265152931213379, "learning_rate": 8.44287767814361e-05, "loss": 0.732, "step": 12125 }, { "epoch": 0.8216003794295006, "grad_norm": 5.69442081451416, "learning_rate": 8.442740776233828e-05, "loss": 0.7479, "step": 12126 }, { "epoch": 0.8216681346974727, "grad_norm": 5.151772499084473, "learning_rate": 8.442603874324048e-05, "loss": 0.9404, "step": 12127 }, { "epoch": 0.8217358899654448, "grad_norm": 5.117092609405518, "learning_rate": 8.442466972414266e-05, "loss": 0.8087, "step": 12128 }, { "epoch": 0.8218036452334169, "grad_norm": 6.638974666595459, "learning_rate": 8.442330070504484e-05, "loss": 1.018, "step": 12129 }, { "epoch": 0.821871400501389, "grad_norm": 5.713891983032227, "learning_rate": 8.442193168594702e-05, "loss": 0.7464, "step": 12130 }, { "epoch": 0.821939155769361, "grad_norm": 5.336922645568848, "learning_rate": 8.44205626668492e-05, "loss": 0.7876, "step": 12131 }, { "epoch": 0.8220069110373331, "grad_norm": 6.789290904998779, "learning_rate": 8.441919364775139e-05, "loss": 0.8613, "step": 12132 }, { "epoch": 0.8220746663053052, "grad_norm": 5.347286701202393, "learning_rate": 8.441782462865357e-05, "loss": 0.899, "step": 12133 }, { "epoch": 0.8221424215732773, "grad_norm": 5.311189651489258, "learning_rate": 8.441645560955575e-05, "loss": 0.6064, "step": 12134 }, { "epoch": 0.8222101768412494, "grad_norm": 5.930995464324951, "learning_rate": 8.441508659045793e-05, "loss": 0.7213, "step": 12135 }, { "epoch": 0.8222779321092215, "grad_norm": 7.024041652679443, "learning_rate": 8.441371757136013e-05, "loss": 0.7926, "step": 12136 }, { "epoch": 0.8223456873771936, "grad_norm": 5.4607768058776855, "learning_rate": 8.44123485522623e-05, "loss": 0.752, "step": 12137 }, { "epoch": 0.8224134426451657, "grad_norm": 6.063724994659424, "learning_rate": 8.441097953316449e-05, "loss": 0.6209, "step": 12138 }, { "epoch": 0.8224811979131378, "grad_norm": 7.265159606933594, "learning_rate": 8.440961051406667e-05, "loss": 0.65, "step": 12139 }, { "epoch": 0.8225489531811099, "grad_norm": 7.551514148712158, "learning_rate": 8.440824149496885e-05, "loss": 1.0007, "step": 12140 }, { "epoch": 0.822616708449082, "grad_norm": 4.997889995574951, "learning_rate": 8.440687247587104e-05, "loss": 0.6301, "step": 12141 }, { "epoch": 0.822684463717054, "grad_norm": 5.728504657745361, "learning_rate": 8.440550345677322e-05, "loss": 0.8397, "step": 12142 }, { "epoch": 0.822752218985026, "grad_norm": 6.213530540466309, "learning_rate": 8.44041344376754e-05, "loss": 0.7488, "step": 12143 }, { "epoch": 0.8228199742529981, "grad_norm": 8.768404006958008, "learning_rate": 8.44027654185776e-05, "loss": 0.9469, "step": 12144 }, { "epoch": 0.8228877295209702, "grad_norm": 7.084804534912109, "learning_rate": 8.440139639947978e-05, "loss": 0.697, "step": 12145 }, { "epoch": 0.8229554847889423, "grad_norm": 4.6725239753723145, "learning_rate": 8.440002738038196e-05, "loss": 0.6902, "step": 12146 }, { "epoch": 0.8230232400569144, "grad_norm": 5.927494049072266, "learning_rate": 8.439865836128415e-05, "loss": 0.7912, "step": 12147 }, { "epoch": 0.8230909953248865, "grad_norm": 5.5850419998168945, "learning_rate": 8.439728934218633e-05, "loss": 0.6394, "step": 12148 }, { "epoch": 0.8231587505928586, "grad_norm": 6.570766448974609, "learning_rate": 8.439592032308851e-05, "loss": 1.004, "step": 12149 }, { "epoch": 0.8232265058608307, "grad_norm": 6.343209266662598, "learning_rate": 8.43945513039907e-05, "loss": 0.5969, "step": 12150 }, { "epoch": 0.8232942611288028, "grad_norm": 7.24255895614624, "learning_rate": 8.439318228489289e-05, "loss": 0.9409, "step": 12151 }, { "epoch": 0.8233620163967749, "grad_norm": 5.600708961486816, "learning_rate": 8.439181326579507e-05, "loss": 1.0606, "step": 12152 }, { "epoch": 0.823429771664747, "grad_norm": 7.920993804931641, "learning_rate": 8.439044424669725e-05, "loss": 0.8452, "step": 12153 }, { "epoch": 0.823497526932719, "grad_norm": 6.625662326812744, "learning_rate": 8.438907522759944e-05, "loss": 0.8304, "step": 12154 }, { "epoch": 0.8235652822006911, "grad_norm": 7.728579998016357, "learning_rate": 8.438770620850162e-05, "loss": 0.7777, "step": 12155 }, { "epoch": 0.8236330374686632, "grad_norm": 7.7875752449035645, "learning_rate": 8.43863371894038e-05, "loss": 0.8318, "step": 12156 }, { "epoch": 0.8237007927366353, "grad_norm": 5.524309158325195, "learning_rate": 8.438496817030598e-05, "loss": 0.7924, "step": 12157 }, { "epoch": 0.8237685480046073, "grad_norm": 4.976869106292725, "learning_rate": 8.438359915120816e-05, "loss": 0.7819, "step": 12158 }, { "epoch": 0.8238363032725794, "grad_norm": 7.557702541351318, "learning_rate": 8.438223013211036e-05, "loss": 0.6508, "step": 12159 }, { "epoch": 0.8239040585405515, "grad_norm": 6.236000061035156, "learning_rate": 8.438086111301254e-05, "loss": 0.9233, "step": 12160 }, { "epoch": 0.8239718138085236, "grad_norm": 4.986820220947266, "learning_rate": 8.437949209391472e-05, "loss": 0.7341, "step": 12161 }, { "epoch": 0.8240395690764957, "grad_norm": 6.939243793487549, "learning_rate": 8.43781230748169e-05, "loss": 1.0755, "step": 12162 }, { "epoch": 0.8241073243444678, "grad_norm": 5.3395843505859375, "learning_rate": 8.437675405571908e-05, "loss": 0.6432, "step": 12163 }, { "epoch": 0.8241750796124399, "grad_norm": 5.462789058685303, "learning_rate": 8.437538503662127e-05, "loss": 0.7004, "step": 12164 }, { "epoch": 0.824242834880412, "grad_norm": 7.232882022857666, "learning_rate": 8.437401601752345e-05, "loss": 0.7802, "step": 12165 }, { "epoch": 0.824310590148384, "grad_norm": 4.660044193267822, "learning_rate": 8.437264699842563e-05, "loss": 0.6253, "step": 12166 }, { "epoch": 0.8243783454163561, "grad_norm": 6.779306888580322, "learning_rate": 8.437127797932781e-05, "loss": 0.9231, "step": 12167 }, { "epoch": 0.8244461006843282, "grad_norm": 5.5832905769348145, "learning_rate": 8.436990896023e-05, "loss": 0.8409, "step": 12168 }, { "epoch": 0.8245138559523003, "grad_norm": 6.603589057922363, "learning_rate": 8.436853994113219e-05, "loss": 0.897, "step": 12169 }, { "epoch": 0.8245816112202724, "grad_norm": 7.075769424438477, "learning_rate": 8.436717092203437e-05, "loss": 0.6168, "step": 12170 }, { "epoch": 0.8246493664882445, "grad_norm": 5.7542948722839355, "learning_rate": 8.436580190293655e-05, "loss": 0.7924, "step": 12171 }, { "epoch": 0.8247171217562166, "grad_norm": 6.221360683441162, "learning_rate": 8.436443288383873e-05, "loss": 0.8444, "step": 12172 }, { "epoch": 0.8247848770241887, "grad_norm": 5.970016002655029, "learning_rate": 8.436306386474092e-05, "loss": 0.8126, "step": 12173 }, { "epoch": 0.8248526322921608, "grad_norm": 5.611728668212891, "learning_rate": 8.43616948456431e-05, "loss": 0.8069, "step": 12174 }, { "epoch": 0.8249203875601328, "grad_norm": 5.260378360748291, "learning_rate": 8.436032582654528e-05, "loss": 0.7535, "step": 12175 }, { "epoch": 0.8249881428281048, "grad_norm": 11.273239135742188, "learning_rate": 8.435895680744746e-05, "loss": 0.6942, "step": 12176 }, { "epoch": 0.8250558980960769, "grad_norm": 5.525880813598633, "learning_rate": 8.435758778834966e-05, "loss": 0.8583, "step": 12177 }, { "epoch": 0.825123653364049, "grad_norm": 8.410612106323242, "learning_rate": 8.435621876925184e-05, "loss": 0.852, "step": 12178 }, { "epoch": 0.8251914086320211, "grad_norm": 6.29329252243042, "learning_rate": 8.435484975015402e-05, "loss": 0.7961, "step": 12179 }, { "epoch": 0.8252591638999932, "grad_norm": 5.848037242889404, "learning_rate": 8.43534807310562e-05, "loss": 0.7964, "step": 12180 }, { "epoch": 0.8253269191679653, "grad_norm": 5.5981669425964355, "learning_rate": 8.435211171195838e-05, "loss": 0.7474, "step": 12181 }, { "epoch": 0.8253946744359374, "grad_norm": 7.036654949188232, "learning_rate": 8.435074269286057e-05, "loss": 0.9101, "step": 12182 }, { "epoch": 0.8254624297039095, "grad_norm": 6.488468170166016, "learning_rate": 8.434937367376275e-05, "loss": 1.1791, "step": 12183 }, { "epoch": 0.8255301849718816, "grad_norm": 6.368350505828857, "learning_rate": 8.434800465466493e-05, "loss": 0.7757, "step": 12184 }, { "epoch": 0.8255979402398537, "grad_norm": 5.316969394683838, "learning_rate": 8.434663563556711e-05, "loss": 0.7681, "step": 12185 }, { "epoch": 0.8256656955078258, "grad_norm": 6.011645793914795, "learning_rate": 8.434526661646929e-05, "loss": 0.7887, "step": 12186 }, { "epoch": 0.8257334507757979, "grad_norm": 6.3625664710998535, "learning_rate": 8.434389759737149e-05, "loss": 0.7103, "step": 12187 }, { "epoch": 0.8258012060437699, "grad_norm": 5.478143215179443, "learning_rate": 8.434252857827367e-05, "loss": 0.6532, "step": 12188 }, { "epoch": 0.825868961311742, "grad_norm": 6.654770851135254, "learning_rate": 8.434115955917585e-05, "loss": 0.7328, "step": 12189 }, { "epoch": 0.8259367165797141, "grad_norm": 5.170722007751465, "learning_rate": 8.433979054007804e-05, "loss": 0.7592, "step": 12190 }, { "epoch": 0.8260044718476861, "grad_norm": 5.70284366607666, "learning_rate": 8.433842152098022e-05, "loss": 0.7273, "step": 12191 }, { "epoch": 0.8260722271156582, "grad_norm": 5.822709560394287, "learning_rate": 8.43370525018824e-05, "loss": 0.9407, "step": 12192 }, { "epoch": 0.8261399823836303, "grad_norm": 7.529512882232666, "learning_rate": 8.43356834827846e-05, "loss": 0.9318, "step": 12193 }, { "epoch": 0.8262077376516024, "grad_norm": 5.989007949829102, "learning_rate": 8.433431446368678e-05, "loss": 0.6854, "step": 12194 }, { "epoch": 0.8262754929195745, "grad_norm": 5.390767574310303, "learning_rate": 8.433294544458896e-05, "loss": 0.9111, "step": 12195 }, { "epoch": 0.8263432481875466, "grad_norm": 6.274799346923828, "learning_rate": 8.433157642549115e-05, "loss": 0.5621, "step": 12196 }, { "epoch": 0.8264110034555187, "grad_norm": 6.217073917388916, "learning_rate": 8.433020740639333e-05, "loss": 0.8127, "step": 12197 }, { "epoch": 0.8264787587234907, "grad_norm": 5.437521457672119, "learning_rate": 8.432883838729551e-05, "loss": 0.6947, "step": 12198 }, { "epoch": 0.8265465139914628, "grad_norm": 6.302811145782471, "learning_rate": 8.432746936819769e-05, "loss": 0.855, "step": 12199 }, { "epoch": 0.8266142692594349, "grad_norm": 6.268338203430176, "learning_rate": 8.432610034909988e-05, "loss": 0.8669, "step": 12200 }, { "epoch": 0.826682024527407, "grad_norm": 6.356218338012695, "learning_rate": 8.432473133000207e-05, "loss": 0.7883, "step": 12201 }, { "epoch": 0.8267497797953791, "grad_norm": 7.442529201507568, "learning_rate": 8.432336231090425e-05, "loss": 1.159, "step": 12202 }, { "epoch": 0.8268175350633512, "grad_norm": 6.274961471557617, "learning_rate": 8.432199329180643e-05, "loss": 1.0073, "step": 12203 }, { "epoch": 0.8268852903313233, "grad_norm": 5.910490036010742, "learning_rate": 8.43206242727086e-05, "loss": 0.781, "step": 12204 }, { "epoch": 0.8269530455992954, "grad_norm": 8.246582984924316, "learning_rate": 8.43192552536108e-05, "loss": 0.9337, "step": 12205 }, { "epoch": 0.8270208008672675, "grad_norm": 6.15574312210083, "learning_rate": 8.431788623451298e-05, "loss": 0.7707, "step": 12206 }, { "epoch": 0.8270885561352395, "grad_norm": 5.993718147277832, "learning_rate": 8.431651721541516e-05, "loss": 0.73, "step": 12207 }, { "epoch": 0.8271563114032116, "grad_norm": 6.5491533279418945, "learning_rate": 8.431514819631734e-05, "loss": 0.6576, "step": 12208 }, { "epoch": 0.8272240666711836, "grad_norm": 5.608559608459473, "learning_rate": 8.431377917721953e-05, "loss": 0.7199, "step": 12209 }, { "epoch": 0.8272918219391557, "grad_norm": 6.178080081939697, "learning_rate": 8.431241015812172e-05, "loss": 0.5885, "step": 12210 }, { "epoch": 0.8273595772071278, "grad_norm": 6.405505657196045, "learning_rate": 8.43110411390239e-05, "loss": 0.7247, "step": 12211 }, { "epoch": 0.8274273324750999, "grad_norm": 4.907893180847168, "learning_rate": 8.430967211992608e-05, "loss": 0.6837, "step": 12212 }, { "epoch": 0.827495087743072, "grad_norm": 4.041346549987793, "learning_rate": 8.430830310082826e-05, "loss": 0.5236, "step": 12213 }, { "epoch": 0.8275628430110441, "grad_norm": 5.496605396270752, "learning_rate": 8.430693408173045e-05, "loss": 0.6765, "step": 12214 }, { "epoch": 0.8276305982790162, "grad_norm": 5.595060348510742, "learning_rate": 8.430556506263263e-05, "loss": 1.0309, "step": 12215 }, { "epoch": 0.8276983535469883, "grad_norm": 5.200067043304443, "learning_rate": 8.430419604353481e-05, "loss": 0.6944, "step": 12216 }, { "epoch": 0.8277661088149604, "grad_norm": 6.38013219833374, "learning_rate": 8.430282702443699e-05, "loss": 1.1399, "step": 12217 }, { "epoch": 0.8278338640829325, "grad_norm": 5.848254203796387, "learning_rate": 8.430145800533917e-05, "loss": 0.7259, "step": 12218 }, { "epoch": 0.8279016193509046, "grad_norm": 7.698366641998291, "learning_rate": 8.430008898624137e-05, "loss": 0.8736, "step": 12219 }, { "epoch": 0.8279693746188767, "grad_norm": 6.256243705749512, "learning_rate": 8.429871996714355e-05, "loss": 0.8018, "step": 12220 }, { "epoch": 0.8280371298868487, "grad_norm": 7.630728721618652, "learning_rate": 8.429735094804573e-05, "loss": 0.6287, "step": 12221 }, { "epoch": 0.8281048851548208, "grad_norm": 5.663332939147949, "learning_rate": 8.42959819289479e-05, "loss": 0.7079, "step": 12222 }, { "epoch": 0.8281726404227929, "grad_norm": 9.478702545166016, "learning_rate": 8.42946129098501e-05, "loss": 0.9036, "step": 12223 }, { "epoch": 0.8282403956907649, "grad_norm": 5.983333110809326, "learning_rate": 8.429324389075228e-05, "loss": 0.777, "step": 12224 }, { "epoch": 0.828308150958737, "grad_norm": 5.364030361175537, "learning_rate": 8.429187487165446e-05, "loss": 0.8591, "step": 12225 }, { "epoch": 0.8283759062267091, "grad_norm": 6.575251579284668, "learning_rate": 8.429050585255664e-05, "loss": 0.8834, "step": 12226 }, { "epoch": 0.8284436614946812, "grad_norm": 6.744650840759277, "learning_rate": 8.428913683345882e-05, "loss": 0.8544, "step": 12227 }, { "epoch": 0.8285114167626533, "grad_norm": 7.059446334838867, "learning_rate": 8.428776781436102e-05, "loss": 0.7385, "step": 12228 }, { "epoch": 0.8285791720306254, "grad_norm": 5.541356086730957, "learning_rate": 8.42863987952632e-05, "loss": 0.7784, "step": 12229 }, { "epoch": 0.8286469272985975, "grad_norm": 7.66465425491333, "learning_rate": 8.428502977616538e-05, "loss": 0.7394, "step": 12230 }, { "epoch": 0.8287146825665695, "grad_norm": 5.495204448699951, "learning_rate": 8.428366075706756e-05, "loss": 0.6893, "step": 12231 }, { "epoch": 0.8287824378345416, "grad_norm": 5.460160255432129, "learning_rate": 8.428229173796975e-05, "loss": 0.8543, "step": 12232 }, { "epoch": 0.8288501931025137, "grad_norm": 5.867201805114746, "learning_rate": 8.428092271887193e-05, "loss": 0.7469, "step": 12233 }, { "epoch": 0.8289179483704858, "grad_norm": 5.914271354675293, "learning_rate": 8.427955369977411e-05, "loss": 0.9328, "step": 12234 }, { "epoch": 0.8289857036384579, "grad_norm": 7.290322303771973, "learning_rate": 8.427818468067629e-05, "loss": 0.936, "step": 12235 }, { "epoch": 0.82905345890643, "grad_norm": 7.530186653137207, "learning_rate": 8.427681566157849e-05, "loss": 0.7333, "step": 12236 }, { "epoch": 0.8291212141744021, "grad_norm": 7.260166168212891, "learning_rate": 8.427544664248067e-05, "loss": 0.7697, "step": 12237 }, { "epoch": 0.8291889694423742, "grad_norm": 5.8253703117370605, "learning_rate": 8.427407762338285e-05, "loss": 0.8838, "step": 12238 }, { "epoch": 0.8292567247103463, "grad_norm": 6.672026634216309, "learning_rate": 8.427270860428504e-05, "loss": 0.5896, "step": 12239 }, { "epoch": 0.8293244799783183, "grad_norm": 5.347162246704102, "learning_rate": 8.427133958518722e-05, "loss": 0.7497, "step": 12240 }, { "epoch": 0.8293922352462904, "grad_norm": 8.071971893310547, "learning_rate": 8.42699705660894e-05, "loss": 0.9985, "step": 12241 }, { "epoch": 0.8294599905142624, "grad_norm": 5.0009636878967285, "learning_rate": 8.42686015469916e-05, "loss": 0.8166, "step": 12242 }, { "epoch": 0.8295277457822345, "grad_norm": 5.593808174133301, "learning_rate": 8.426723252789377e-05, "loss": 0.6928, "step": 12243 }, { "epoch": 0.8295955010502066, "grad_norm": 5.249474048614502, "learning_rate": 8.426586350879596e-05, "loss": 0.7625, "step": 12244 }, { "epoch": 0.8296632563181787, "grad_norm": 5.920688152313232, "learning_rate": 8.426449448969814e-05, "loss": 0.7011, "step": 12245 }, { "epoch": 0.8297310115861508, "grad_norm": 9.127151489257812, "learning_rate": 8.426312547060033e-05, "loss": 0.917, "step": 12246 }, { "epoch": 0.8297987668541229, "grad_norm": 6.6722822189331055, "learning_rate": 8.426175645150251e-05, "loss": 0.7894, "step": 12247 }, { "epoch": 0.829866522122095, "grad_norm": 7.910020351409912, "learning_rate": 8.426038743240469e-05, "loss": 0.6994, "step": 12248 }, { "epoch": 0.8299342773900671, "grad_norm": 7.736839294433594, "learning_rate": 8.425901841330687e-05, "loss": 0.6598, "step": 12249 }, { "epoch": 0.8300020326580392, "grad_norm": 5.101329803466797, "learning_rate": 8.425764939420905e-05, "loss": 0.8365, "step": 12250 }, { "epoch": 0.8300697879260113, "grad_norm": 6.494842052459717, "learning_rate": 8.425628037511124e-05, "loss": 0.8936, "step": 12251 }, { "epoch": 0.8301375431939834, "grad_norm": 5.946401596069336, "learning_rate": 8.425491135601343e-05, "loss": 0.8427, "step": 12252 }, { "epoch": 0.8302052984619555, "grad_norm": 5.827462673187256, "learning_rate": 8.42535423369156e-05, "loss": 0.7694, "step": 12253 }, { "epoch": 0.8302730537299275, "grad_norm": 5.955854415893555, "learning_rate": 8.425217331781779e-05, "loss": 0.8644, "step": 12254 }, { "epoch": 0.8303408089978996, "grad_norm": 5.749096870422363, "learning_rate": 8.425080429871998e-05, "loss": 0.7225, "step": 12255 }, { "epoch": 0.8304085642658716, "grad_norm": 5.03810453414917, "learning_rate": 8.424943527962216e-05, "loss": 0.5006, "step": 12256 }, { "epoch": 0.8304763195338437, "grad_norm": 7.007089614868164, "learning_rate": 8.424806626052434e-05, "loss": 0.9485, "step": 12257 }, { "epoch": 0.8305440748018158, "grad_norm": 5.671133041381836, "learning_rate": 8.424669724142652e-05, "loss": 0.6842, "step": 12258 }, { "epoch": 0.8306118300697879, "grad_norm": 4.326511383056641, "learning_rate": 8.42453282223287e-05, "loss": 0.7619, "step": 12259 }, { "epoch": 0.83067958533776, "grad_norm": 4.897543907165527, "learning_rate": 8.42439592032309e-05, "loss": 0.6889, "step": 12260 }, { "epoch": 0.8307473406057321, "grad_norm": 5.780319690704346, "learning_rate": 8.424259018413308e-05, "loss": 1.0366, "step": 12261 }, { "epoch": 0.8308150958737042, "grad_norm": 6.754616737365723, "learning_rate": 8.424122116503526e-05, "loss": 0.9706, "step": 12262 }, { "epoch": 0.8308828511416763, "grad_norm": 5.684625625610352, "learning_rate": 8.423985214593744e-05, "loss": 0.7783, "step": 12263 }, { "epoch": 0.8309506064096484, "grad_norm": 5.692160129547119, "learning_rate": 8.423848312683962e-05, "loss": 0.6993, "step": 12264 }, { "epoch": 0.8310183616776204, "grad_norm": 5.838659286499023, "learning_rate": 8.423711410774181e-05, "loss": 0.8698, "step": 12265 }, { "epoch": 0.8310861169455925, "grad_norm": 5.119133472442627, "learning_rate": 8.423574508864399e-05, "loss": 0.8026, "step": 12266 }, { "epoch": 0.8311538722135646, "grad_norm": 6.305530071258545, "learning_rate": 8.423437606954617e-05, "loss": 0.8705, "step": 12267 }, { "epoch": 0.8312216274815367, "grad_norm": 5.659543037414551, "learning_rate": 8.423300705044835e-05, "loss": 0.7701, "step": 12268 }, { "epoch": 0.8312893827495088, "grad_norm": 6.706612586975098, "learning_rate": 8.423163803135055e-05, "loss": 0.6325, "step": 12269 }, { "epoch": 0.8313571380174809, "grad_norm": 6.535792827606201, "learning_rate": 8.423026901225273e-05, "loss": 0.7957, "step": 12270 }, { "epoch": 0.831424893285453, "grad_norm": 5.140613079071045, "learning_rate": 8.42288999931549e-05, "loss": 0.7975, "step": 12271 }, { "epoch": 0.831492648553425, "grad_norm": 5.579657554626465, "learning_rate": 8.422753097405709e-05, "loss": 0.6756, "step": 12272 }, { "epoch": 0.8315604038213971, "grad_norm": 8.427629470825195, "learning_rate": 8.422616195495927e-05, "loss": 0.7253, "step": 12273 }, { "epoch": 0.8316281590893692, "grad_norm": 5.536694526672363, "learning_rate": 8.422479293586146e-05, "loss": 0.8035, "step": 12274 }, { "epoch": 0.8316959143573412, "grad_norm": 7.416363716125488, "learning_rate": 8.422342391676364e-05, "loss": 1.0227, "step": 12275 }, { "epoch": 0.8317636696253133, "grad_norm": 8.551451683044434, "learning_rate": 8.422205489766582e-05, "loss": 0.9759, "step": 12276 }, { "epoch": 0.8318314248932854, "grad_norm": 6.636923789978027, "learning_rate": 8.4220685878568e-05, "loss": 0.8017, "step": 12277 }, { "epoch": 0.8318991801612575, "grad_norm": 5.85496711730957, "learning_rate": 8.42193168594702e-05, "loss": 0.7672, "step": 12278 }, { "epoch": 0.8319669354292296, "grad_norm": 5.499538898468018, "learning_rate": 8.421794784037238e-05, "loss": 0.6539, "step": 12279 }, { "epoch": 0.8320346906972017, "grad_norm": 5.9310150146484375, "learning_rate": 8.421657882127456e-05, "loss": 0.6745, "step": 12280 }, { "epoch": 0.8321024459651738, "grad_norm": 5.379483222961426, "learning_rate": 8.421520980217674e-05, "loss": 0.7548, "step": 12281 }, { "epoch": 0.8321702012331459, "grad_norm": 5.287676811218262, "learning_rate": 8.421384078307892e-05, "loss": 0.7051, "step": 12282 }, { "epoch": 0.832237956501118, "grad_norm": 8.057753562927246, "learning_rate": 8.421247176398111e-05, "loss": 0.7589, "step": 12283 }, { "epoch": 0.8323057117690901, "grad_norm": 6.534327983856201, "learning_rate": 8.421110274488329e-05, "loss": 0.7855, "step": 12284 }, { "epoch": 0.8323734670370622, "grad_norm": 7.2967753410339355, "learning_rate": 8.420973372578547e-05, "loss": 0.7163, "step": 12285 }, { "epoch": 0.8324412223050343, "grad_norm": 7.196539402008057, "learning_rate": 8.420836470668767e-05, "loss": 0.8134, "step": 12286 }, { "epoch": 0.8325089775730063, "grad_norm": 5.886240005493164, "learning_rate": 8.420699568758985e-05, "loss": 0.7407, "step": 12287 }, { "epoch": 0.8325767328409784, "grad_norm": 6.262639045715332, "learning_rate": 8.420562666849204e-05, "loss": 0.7359, "step": 12288 }, { "epoch": 0.8326444881089504, "grad_norm": 6.491570949554443, "learning_rate": 8.420425764939422e-05, "loss": 0.7787, "step": 12289 }, { "epoch": 0.8327122433769225, "grad_norm": 5.070222854614258, "learning_rate": 8.42028886302964e-05, "loss": 0.7032, "step": 12290 }, { "epoch": 0.8327799986448946, "grad_norm": 5.188915252685547, "learning_rate": 8.420151961119858e-05, "loss": 0.7538, "step": 12291 }, { "epoch": 0.8328477539128667, "grad_norm": 5.764748573303223, "learning_rate": 8.420015059210077e-05, "loss": 0.8324, "step": 12292 }, { "epoch": 0.8329155091808388, "grad_norm": 5.611788272857666, "learning_rate": 8.419878157300295e-05, "loss": 0.6855, "step": 12293 }, { "epoch": 0.8329832644488109, "grad_norm": 5.202719688415527, "learning_rate": 8.419741255390513e-05, "loss": 0.7333, "step": 12294 }, { "epoch": 0.833051019716783, "grad_norm": 5.643661975860596, "learning_rate": 8.419604353480732e-05, "loss": 0.8347, "step": 12295 }, { "epoch": 0.8331187749847551, "grad_norm": 5.144847869873047, "learning_rate": 8.41946745157095e-05, "loss": 0.6602, "step": 12296 }, { "epoch": 0.8331865302527272, "grad_norm": 6.511287212371826, "learning_rate": 8.419330549661169e-05, "loss": 0.6691, "step": 12297 }, { "epoch": 0.8332542855206992, "grad_norm": 6.4027276039123535, "learning_rate": 8.419193647751387e-05, "loss": 0.7781, "step": 12298 }, { "epoch": 0.8333220407886713, "grad_norm": 6.630940914154053, "learning_rate": 8.419056745841605e-05, "loss": 0.8879, "step": 12299 }, { "epoch": 0.8333897960566434, "grad_norm": 7.193549633026123, "learning_rate": 8.418919843931823e-05, "loss": 1.0063, "step": 12300 }, { "epoch": 0.8334575513246155, "grad_norm": 5.909510612487793, "learning_rate": 8.418782942022042e-05, "loss": 0.8406, "step": 12301 }, { "epoch": 0.8335253065925876, "grad_norm": 5.470668315887451, "learning_rate": 8.41864604011226e-05, "loss": 0.7256, "step": 12302 }, { "epoch": 0.8335930618605597, "grad_norm": 5.57338809967041, "learning_rate": 8.418509138202479e-05, "loss": 0.6156, "step": 12303 }, { "epoch": 0.8336608171285318, "grad_norm": 5.950277805328369, "learning_rate": 8.418372236292697e-05, "loss": 0.7232, "step": 12304 }, { "epoch": 0.8337285723965038, "grad_norm": 4.642901420593262, "learning_rate": 8.418235334382915e-05, "loss": 0.634, "step": 12305 }, { "epoch": 0.8337963276644759, "grad_norm": 7.471027374267578, "learning_rate": 8.418098432473134e-05, "loss": 1.0318, "step": 12306 }, { "epoch": 0.833864082932448, "grad_norm": 5.720177173614502, "learning_rate": 8.417961530563352e-05, "loss": 0.806, "step": 12307 }, { "epoch": 0.83393183820042, "grad_norm": 7.918213844299316, "learning_rate": 8.41782462865357e-05, "loss": 0.8564, "step": 12308 }, { "epoch": 0.8339995934683921, "grad_norm": 6.492531776428223, "learning_rate": 8.417687726743788e-05, "loss": 1.0221, "step": 12309 }, { "epoch": 0.8340673487363642, "grad_norm": 5.253319263458252, "learning_rate": 8.417550824834007e-05, "loss": 0.7599, "step": 12310 }, { "epoch": 0.8341351040043363, "grad_norm": 6.8917975425720215, "learning_rate": 8.417413922924225e-05, "loss": 0.7486, "step": 12311 }, { "epoch": 0.8342028592723084, "grad_norm": 7.639297962188721, "learning_rate": 8.417277021014444e-05, "loss": 0.9052, "step": 12312 }, { "epoch": 0.8342706145402805, "grad_norm": 6.974343776702881, "learning_rate": 8.417140119104662e-05, "loss": 0.6297, "step": 12313 }, { "epoch": 0.8343383698082526, "grad_norm": 5.5928802490234375, "learning_rate": 8.41700321719488e-05, "loss": 0.8208, "step": 12314 }, { "epoch": 0.8344061250762247, "grad_norm": 5.503357410430908, "learning_rate": 8.416866315285099e-05, "loss": 0.7472, "step": 12315 }, { "epoch": 0.8344738803441968, "grad_norm": 6.915671348571777, "learning_rate": 8.416729413375317e-05, "loss": 0.7894, "step": 12316 }, { "epoch": 0.8345416356121689, "grad_norm": 8.041422843933105, "learning_rate": 8.416592511465535e-05, "loss": 0.9854, "step": 12317 }, { "epoch": 0.834609390880141, "grad_norm": 6.027585506439209, "learning_rate": 8.416455609555753e-05, "loss": 1.0832, "step": 12318 }, { "epoch": 0.8346771461481131, "grad_norm": 7.771341800689697, "learning_rate": 8.416318707645971e-05, "loss": 0.9197, "step": 12319 }, { "epoch": 0.8347449014160851, "grad_norm": 5.460988521575928, "learning_rate": 8.41618180573619e-05, "loss": 0.7626, "step": 12320 }, { "epoch": 0.8348126566840571, "grad_norm": 5.957553863525391, "learning_rate": 8.416044903826409e-05, "loss": 0.5958, "step": 12321 }, { "epoch": 0.8348804119520292, "grad_norm": 6.164322853088379, "learning_rate": 8.415908001916627e-05, "loss": 0.7692, "step": 12322 }, { "epoch": 0.8349481672200013, "grad_norm": 6.864661693572998, "learning_rate": 8.415771100006845e-05, "loss": 0.8181, "step": 12323 }, { "epoch": 0.8350159224879734, "grad_norm": 4.985629558563232, "learning_rate": 8.415634198097064e-05, "loss": 0.8815, "step": 12324 }, { "epoch": 0.8350836777559455, "grad_norm": 6.169389724731445, "learning_rate": 8.415497296187282e-05, "loss": 0.7982, "step": 12325 }, { "epoch": 0.8351514330239176, "grad_norm": 7.641390800476074, "learning_rate": 8.4153603942775e-05, "loss": 0.7122, "step": 12326 }, { "epoch": 0.8352191882918897, "grad_norm": 5.388051509857178, "learning_rate": 8.415223492367718e-05, "loss": 0.9924, "step": 12327 }, { "epoch": 0.8352869435598618, "grad_norm": 6.870946407318115, "learning_rate": 8.415086590457936e-05, "loss": 0.5997, "step": 12328 }, { "epoch": 0.8353546988278339, "grad_norm": 7.626512050628662, "learning_rate": 8.414949688548156e-05, "loss": 0.7789, "step": 12329 }, { "epoch": 0.835422454095806, "grad_norm": 5.378123760223389, "learning_rate": 8.414812786638374e-05, "loss": 0.8172, "step": 12330 }, { "epoch": 0.835490209363778, "grad_norm": 6.5015716552734375, "learning_rate": 8.414675884728592e-05, "loss": 0.805, "step": 12331 }, { "epoch": 0.8355579646317501, "grad_norm": 5.173733711242676, "learning_rate": 8.414538982818811e-05, "loss": 0.6039, "step": 12332 }, { "epoch": 0.8356257198997222, "grad_norm": 5.68528413772583, "learning_rate": 8.414402080909029e-05, "loss": 0.8144, "step": 12333 }, { "epoch": 0.8356934751676943, "grad_norm": 6.874687671661377, "learning_rate": 8.414265178999247e-05, "loss": 0.889, "step": 12334 }, { "epoch": 0.8357612304356664, "grad_norm": 8.360238075256348, "learning_rate": 8.414128277089466e-05, "loss": 0.9033, "step": 12335 }, { "epoch": 0.8358289857036385, "grad_norm": 5.4390106201171875, "learning_rate": 8.413991375179684e-05, "loss": 0.6946, "step": 12336 }, { "epoch": 0.8358967409716106, "grad_norm": 9.848974227905273, "learning_rate": 8.413854473269903e-05, "loss": 0.7939, "step": 12337 }, { "epoch": 0.8359644962395826, "grad_norm": 7.111310958862305, "learning_rate": 8.413717571360122e-05, "loss": 0.739, "step": 12338 }, { "epoch": 0.8360322515075547, "grad_norm": 6.9361443519592285, "learning_rate": 8.41358066945034e-05, "loss": 0.8542, "step": 12339 }, { "epoch": 0.8361000067755268, "grad_norm": 8.055535316467285, "learning_rate": 8.413443767540558e-05, "loss": 0.7256, "step": 12340 }, { "epoch": 0.8361677620434989, "grad_norm": 5.564542770385742, "learning_rate": 8.413306865630776e-05, "loss": 0.7472, "step": 12341 }, { "epoch": 0.8362355173114709, "grad_norm": 5.300485134124756, "learning_rate": 8.413169963720995e-05, "loss": 0.5295, "step": 12342 }, { "epoch": 0.836303272579443, "grad_norm": 5.996912479400635, "learning_rate": 8.413033061811213e-05, "loss": 0.9896, "step": 12343 }, { "epoch": 0.8363710278474151, "grad_norm": 5.013984680175781, "learning_rate": 8.412896159901431e-05, "loss": 0.8257, "step": 12344 }, { "epoch": 0.8364387831153872, "grad_norm": 6.624052047729492, "learning_rate": 8.41275925799165e-05, "loss": 0.9886, "step": 12345 }, { "epoch": 0.8365065383833593, "grad_norm": 6.04857063293457, "learning_rate": 8.412622356081868e-05, "loss": 0.8513, "step": 12346 }, { "epoch": 0.8365742936513314, "grad_norm": 4.840993404388428, "learning_rate": 8.412485454172087e-05, "loss": 0.8743, "step": 12347 }, { "epoch": 0.8366420489193035, "grad_norm": 7.008707046508789, "learning_rate": 8.412348552262305e-05, "loss": 0.85, "step": 12348 }, { "epoch": 0.8367098041872756, "grad_norm": 7.408865451812744, "learning_rate": 8.412211650352523e-05, "loss": 0.9099, "step": 12349 }, { "epoch": 0.8367775594552477, "grad_norm": 6.566858291625977, "learning_rate": 8.412074748442741e-05, "loss": 0.9804, "step": 12350 }, { "epoch": 0.8368453147232198, "grad_norm": 5.375271797180176, "learning_rate": 8.411937846532959e-05, "loss": 0.6209, "step": 12351 }, { "epoch": 0.8369130699911919, "grad_norm": 6.087400436401367, "learning_rate": 8.411800944623178e-05, "loss": 0.6413, "step": 12352 }, { "epoch": 0.836980825259164, "grad_norm": 6.311927318572998, "learning_rate": 8.411664042713396e-05, "loss": 0.7192, "step": 12353 }, { "epoch": 0.8370485805271359, "grad_norm": 7.907698154449463, "learning_rate": 8.411527140803615e-05, "loss": 0.9493, "step": 12354 }, { "epoch": 0.837116335795108, "grad_norm": 5.864373683929443, "learning_rate": 8.411390238893833e-05, "loss": 0.921, "step": 12355 }, { "epoch": 0.8371840910630801, "grad_norm": 5.77023458480835, "learning_rate": 8.411253336984052e-05, "loss": 0.8094, "step": 12356 }, { "epoch": 0.8372518463310522, "grad_norm": 7.120599746704102, "learning_rate": 8.41111643507427e-05, "loss": 0.7822, "step": 12357 }, { "epoch": 0.8373196015990243, "grad_norm": 6.42577600479126, "learning_rate": 8.410979533164488e-05, "loss": 0.9123, "step": 12358 }, { "epoch": 0.8373873568669964, "grad_norm": 6.504154205322266, "learning_rate": 8.410842631254706e-05, "loss": 0.7072, "step": 12359 }, { "epoch": 0.8374551121349685, "grad_norm": 6.982132434844971, "learning_rate": 8.410705729344924e-05, "loss": 0.9333, "step": 12360 }, { "epoch": 0.8375228674029406, "grad_norm": 5.410412311553955, "learning_rate": 8.410568827435143e-05, "loss": 0.7905, "step": 12361 }, { "epoch": 0.8375906226709127, "grad_norm": 5.881120681762695, "learning_rate": 8.410431925525361e-05, "loss": 0.7371, "step": 12362 }, { "epoch": 0.8376583779388848, "grad_norm": 6.792932033538818, "learning_rate": 8.41029502361558e-05, "loss": 0.6885, "step": 12363 }, { "epoch": 0.8377261332068568, "grad_norm": 6.715199947357178, "learning_rate": 8.410158121705798e-05, "loss": 0.8673, "step": 12364 }, { "epoch": 0.8377938884748289, "grad_norm": 6.608368873596191, "learning_rate": 8.410021219796017e-05, "loss": 1.0625, "step": 12365 }, { "epoch": 0.837861643742801, "grad_norm": 8.170140266418457, "learning_rate": 8.409884317886235e-05, "loss": 0.8656, "step": 12366 }, { "epoch": 0.8379293990107731, "grad_norm": 7.180694103240967, "learning_rate": 8.409747415976453e-05, "loss": 0.7619, "step": 12367 }, { "epoch": 0.8379971542787452, "grad_norm": 5.306380748748779, "learning_rate": 8.409610514066671e-05, "loss": 0.6672, "step": 12368 }, { "epoch": 0.8380649095467173, "grad_norm": 9.249306678771973, "learning_rate": 8.409473612156889e-05, "loss": 0.759, "step": 12369 }, { "epoch": 0.8381326648146893, "grad_norm": 6.1703572273254395, "learning_rate": 8.409336710247108e-05, "loss": 0.661, "step": 12370 }, { "epoch": 0.8382004200826614, "grad_norm": 4.662862300872803, "learning_rate": 8.409199808337327e-05, "loss": 0.6401, "step": 12371 }, { "epoch": 0.8382681753506335, "grad_norm": 4.667969226837158, "learning_rate": 8.409062906427545e-05, "loss": 0.6579, "step": 12372 }, { "epoch": 0.8383359306186056, "grad_norm": 6.247222900390625, "learning_rate": 8.408926004517763e-05, "loss": 0.7907, "step": 12373 }, { "epoch": 0.8384036858865777, "grad_norm": 6.933483123779297, "learning_rate": 8.40878910260798e-05, "loss": 0.8402, "step": 12374 }, { "epoch": 0.8384714411545497, "grad_norm": 5.13164758682251, "learning_rate": 8.4086522006982e-05, "loss": 0.7144, "step": 12375 }, { "epoch": 0.8385391964225218, "grad_norm": 5.230576515197754, "learning_rate": 8.408515298788418e-05, "loss": 0.6836, "step": 12376 }, { "epoch": 0.8386069516904939, "grad_norm": 6.535162448883057, "learning_rate": 8.408378396878636e-05, "loss": 0.9004, "step": 12377 }, { "epoch": 0.838674706958466, "grad_norm": 7.608928203582764, "learning_rate": 8.408241494968855e-05, "loss": 0.9791, "step": 12378 }, { "epoch": 0.8387424622264381, "grad_norm": 6.712092399597168, "learning_rate": 8.408104593059073e-05, "loss": 0.6513, "step": 12379 }, { "epoch": 0.8388102174944102, "grad_norm": 6.102575778961182, "learning_rate": 8.407967691149292e-05, "loss": 0.8751, "step": 12380 }, { "epoch": 0.8388779727623823, "grad_norm": 5.246453285217285, "learning_rate": 8.407830789239511e-05, "loss": 0.6477, "step": 12381 }, { "epoch": 0.8389457280303544, "grad_norm": 6.3806633949279785, "learning_rate": 8.407693887329729e-05, "loss": 0.7549, "step": 12382 }, { "epoch": 0.8390134832983265, "grad_norm": 6.284943103790283, "learning_rate": 8.407556985419947e-05, "loss": 0.7873, "step": 12383 }, { "epoch": 0.8390812385662986, "grad_norm": 6.552889823913574, "learning_rate": 8.407420083510166e-05, "loss": 0.8152, "step": 12384 }, { "epoch": 0.8391489938342707, "grad_norm": 7.694222927093506, "learning_rate": 8.407283181600384e-05, "loss": 0.6777, "step": 12385 }, { "epoch": 0.8392167491022428, "grad_norm": 6.1887125968933105, "learning_rate": 8.407146279690602e-05, "loss": 0.9402, "step": 12386 }, { "epoch": 0.8392845043702147, "grad_norm": 6.24276065826416, "learning_rate": 8.40700937778082e-05, "loss": 1.0976, "step": 12387 }, { "epoch": 0.8393522596381868, "grad_norm": 5.816521167755127, "learning_rate": 8.40687247587104e-05, "loss": 0.8681, "step": 12388 }, { "epoch": 0.8394200149061589, "grad_norm": 5.3127923011779785, "learning_rate": 8.406735573961258e-05, "loss": 0.7598, "step": 12389 }, { "epoch": 0.839487770174131, "grad_norm": 7.444540500640869, "learning_rate": 8.406598672051476e-05, "loss": 0.8451, "step": 12390 }, { "epoch": 0.8395555254421031, "grad_norm": 8.680950164794922, "learning_rate": 8.406461770141694e-05, "loss": 1.0441, "step": 12391 }, { "epoch": 0.8396232807100752, "grad_norm": 6.707736968994141, "learning_rate": 8.406324868231912e-05, "loss": 0.7834, "step": 12392 }, { "epoch": 0.8396910359780473, "grad_norm": 5.440469741821289, "learning_rate": 8.406187966322131e-05, "loss": 0.7424, "step": 12393 }, { "epoch": 0.8397587912460194, "grad_norm": 9.19237995147705, "learning_rate": 8.40605106441235e-05, "loss": 0.7367, "step": 12394 }, { "epoch": 0.8398265465139915, "grad_norm": 6.017999172210693, "learning_rate": 8.405914162502567e-05, "loss": 0.822, "step": 12395 }, { "epoch": 0.8398943017819636, "grad_norm": 6.79027795791626, "learning_rate": 8.405777260592785e-05, "loss": 0.9386, "step": 12396 }, { "epoch": 0.8399620570499357, "grad_norm": 5.836680889129639, "learning_rate": 8.405640358683004e-05, "loss": 0.7095, "step": 12397 }, { "epoch": 0.8400298123179077, "grad_norm": 5.866933345794678, "learning_rate": 8.405503456773223e-05, "loss": 0.7168, "step": 12398 }, { "epoch": 0.8400975675858798, "grad_norm": 6.663938999176025, "learning_rate": 8.405366554863441e-05, "loss": 0.867, "step": 12399 }, { "epoch": 0.8401653228538519, "grad_norm": 5.113254547119141, "learning_rate": 8.405229652953659e-05, "loss": 0.6785, "step": 12400 }, { "epoch": 0.840233078121824, "grad_norm": 6.106493949890137, "learning_rate": 8.405092751043877e-05, "loss": 0.8815, "step": 12401 }, { "epoch": 0.8403008333897961, "grad_norm": 5.644641399383545, "learning_rate": 8.404955849134096e-05, "loss": 0.6902, "step": 12402 }, { "epoch": 0.8403685886577681, "grad_norm": 5.706465244293213, "learning_rate": 8.404818947224314e-05, "loss": 0.7725, "step": 12403 }, { "epoch": 0.8404363439257402, "grad_norm": 8.362403869628906, "learning_rate": 8.404682045314532e-05, "loss": 0.9757, "step": 12404 }, { "epoch": 0.8405040991937123, "grad_norm": 5.341566562652588, "learning_rate": 8.40454514340475e-05, "loss": 0.8645, "step": 12405 }, { "epoch": 0.8405718544616844, "grad_norm": 6.9801859855651855, "learning_rate": 8.404408241494969e-05, "loss": 0.7933, "step": 12406 }, { "epoch": 0.8406396097296565, "grad_norm": 7.076079845428467, "learning_rate": 8.404271339585188e-05, "loss": 0.9387, "step": 12407 }, { "epoch": 0.8407073649976285, "grad_norm": 5.684225082397461, "learning_rate": 8.404134437675406e-05, "loss": 0.6575, "step": 12408 }, { "epoch": 0.8407751202656006, "grad_norm": 6.203160285949707, "learning_rate": 8.403997535765624e-05, "loss": 0.6188, "step": 12409 }, { "epoch": 0.8408428755335727, "grad_norm": 7.039827346801758, "learning_rate": 8.403860633855842e-05, "loss": 0.8741, "step": 12410 }, { "epoch": 0.8409106308015448, "grad_norm": 5.340671062469482, "learning_rate": 8.403723731946061e-05, "loss": 0.6957, "step": 12411 }, { "epoch": 0.8409783860695169, "grad_norm": 6.316356182098389, "learning_rate": 8.40358683003628e-05, "loss": 0.8684, "step": 12412 }, { "epoch": 0.841046141337489, "grad_norm": 5.906060218811035, "learning_rate": 8.403449928126497e-05, "loss": 0.9206, "step": 12413 }, { "epoch": 0.8411138966054611, "grad_norm": 7.139042854309082, "learning_rate": 8.403313026216716e-05, "loss": 0.862, "step": 12414 }, { "epoch": 0.8411816518734332, "grad_norm": 6.347969055175781, "learning_rate": 8.403176124306934e-05, "loss": 0.7029, "step": 12415 }, { "epoch": 0.8412494071414053, "grad_norm": 7.3854780197143555, "learning_rate": 8.403039222397153e-05, "loss": 0.9811, "step": 12416 }, { "epoch": 0.8413171624093774, "grad_norm": 5.81329870223999, "learning_rate": 8.402902320487371e-05, "loss": 0.9048, "step": 12417 }, { "epoch": 0.8413849176773495, "grad_norm": 5.879884719848633, "learning_rate": 8.402765418577589e-05, "loss": 0.8854, "step": 12418 }, { "epoch": 0.8414526729453214, "grad_norm": 5.32490348815918, "learning_rate": 8.402628516667807e-05, "loss": 1.0044, "step": 12419 }, { "epoch": 0.8415204282132935, "grad_norm": 6.529613018035889, "learning_rate": 8.402491614758026e-05, "loss": 0.7893, "step": 12420 }, { "epoch": 0.8415881834812656, "grad_norm": 6.274640083312988, "learning_rate": 8.402354712848244e-05, "loss": 1.0186, "step": 12421 }, { "epoch": 0.8416559387492377, "grad_norm": 6.493834972381592, "learning_rate": 8.402217810938463e-05, "loss": 0.9347, "step": 12422 }, { "epoch": 0.8417236940172098, "grad_norm": 5.429368495941162, "learning_rate": 8.40208090902868e-05, "loss": 0.7862, "step": 12423 }, { "epoch": 0.8417914492851819, "grad_norm": 5.897449970245361, "learning_rate": 8.4019440071189e-05, "loss": 0.7278, "step": 12424 }, { "epoch": 0.841859204553154, "grad_norm": 5.485132694244385, "learning_rate": 8.401807105209118e-05, "loss": 0.5984, "step": 12425 }, { "epoch": 0.8419269598211261, "grad_norm": 6.194313049316406, "learning_rate": 8.401670203299336e-05, "loss": 0.8396, "step": 12426 }, { "epoch": 0.8419947150890982, "grad_norm": 6.459996700286865, "learning_rate": 8.401533301389555e-05, "loss": 0.8757, "step": 12427 }, { "epoch": 0.8420624703570703, "grad_norm": 5.770567417144775, "learning_rate": 8.401396399479773e-05, "loss": 0.6524, "step": 12428 }, { "epoch": 0.8421302256250424, "grad_norm": 5.737586975097656, "learning_rate": 8.401259497569991e-05, "loss": 0.7936, "step": 12429 }, { "epoch": 0.8421979808930145, "grad_norm": 5.61273717880249, "learning_rate": 8.401122595660211e-05, "loss": 0.8404, "step": 12430 }, { "epoch": 0.8422657361609865, "grad_norm": 5.813633918762207, "learning_rate": 8.400985693750429e-05, "loss": 0.8042, "step": 12431 }, { "epoch": 0.8423334914289586, "grad_norm": 10.310413360595703, "learning_rate": 8.400848791840647e-05, "loss": 0.7039, "step": 12432 }, { "epoch": 0.8424012466969307, "grad_norm": 5.826198577880859, "learning_rate": 8.400711889930865e-05, "loss": 0.7276, "step": 12433 }, { "epoch": 0.8424690019649028, "grad_norm": 6.436339855194092, "learning_rate": 8.400574988021084e-05, "loss": 0.7409, "step": 12434 }, { "epoch": 0.8425367572328749, "grad_norm": 5.5825700759887695, "learning_rate": 8.400438086111302e-05, "loss": 0.6429, "step": 12435 }, { "epoch": 0.8426045125008469, "grad_norm": 5.8885297775268555, "learning_rate": 8.40030118420152e-05, "loss": 0.8675, "step": 12436 }, { "epoch": 0.842672267768819, "grad_norm": 6.454819679260254, "learning_rate": 8.400164282291738e-05, "loss": 0.6461, "step": 12437 }, { "epoch": 0.8427400230367911, "grad_norm": 6.442966938018799, "learning_rate": 8.400027380381956e-05, "loss": 0.9699, "step": 12438 }, { "epoch": 0.8428077783047632, "grad_norm": 5.841514587402344, "learning_rate": 8.399890478472176e-05, "loss": 0.734, "step": 12439 }, { "epoch": 0.8428755335727353, "grad_norm": 5.0523858070373535, "learning_rate": 8.399753576562394e-05, "loss": 0.7477, "step": 12440 }, { "epoch": 0.8429432888407073, "grad_norm": 6.7047810554504395, "learning_rate": 8.399616674652612e-05, "loss": 0.8632, "step": 12441 }, { "epoch": 0.8430110441086794, "grad_norm": 5.654105186462402, "learning_rate": 8.39947977274283e-05, "loss": 0.6073, "step": 12442 }, { "epoch": 0.8430787993766515, "grad_norm": 6.039260387420654, "learning_rate": 8.39934287083305e-05, "loss": 0.7417, "step": 12443 }, { "epoch": 0.8431465546446236, "grad_norm": 6.465257167816162, "learning_rate": 8.399205968923267e-05, "loss": 0.8625, "step": 12444 }, { "epoch": 0.8432143099125957, "grad_norm": 7.922903060913086, "learning_rate": 8.399069067013485e-05, "loss": 0.8621, "step": 12445 }, { "epoch": 0.8432820651805678, "grad_norm": 6.696178436279297, "learning_rate": 8.398932165103703e-05, "loss": 0.7575, "step": 12446 }, { "epoch": 0.8433498204485399, "grad_norm": 7.259800910949707, "learning_rate": 8.398795263193921e-05, "loss": 0.9376, "step": 12447 }, { "epoch": 0.843417575716512, "grad_norm": 5.065820217132568, "learning_rate": 8.398658361284141e-05, "loss": 0.8156, "step": 12448 }, { "epoch": 0.8434853309844841, "grad_norm": 6.689807415008545, "learning_rate": 8.398521459374359e-05, "loss": 0.6496, "step": 12449 }, { "epoch": 0.8435530862524562, "grad_norm": 7.013186454772949, "learning_rate": 8.398384557464577e-05, "loss": 1.0973, "step": 12450 }, { "epoch": 0.8436208415204283, "grad_norm": 6.942663669586182, "learning_rate": 8.398247655554795e-05, "loss": 0.9876, "step": 12451 }, { "epoch": 0.8436885967884002, "grad_norm": 6.426051139831543, "learning_rate": 8.398110753645013e-05, "loss": 0.6516, "step": 12452 }, { "epoch": 0.8437563520563723, "grad_norm": 5.993021488189697, "learning_rate": 8.397973851735232e-05, "loss": 0.5495, "step": 12453 }, { "epoch": 0.8438241073243444, "grad_norm": 7.648578643798828, "learning_rate": 8.39783694982545e-05, "loss": 1.3047, "step": 12454 }, { "epoch": 0.8438918625923165, "grad_norm": 6.795345306396484, "learning_rate": 8.397700047915668e-05, "loss": 0.8851, "step": 12455 }, { "epoch": 0.8439596178602886, "grad_norm": 9.486870765686035, "learning_rate": 8.397563146005887e-05, "loss": 0.7838, "step": 12456 }, { "epoch": 0.8440273731282607, "grad_norm": 6.156589031219482, "learning_rate": 8.397426244096106e-05, "loss": 0.7293, "step": 12457 }, { "epoch": 0.8440951283962328, "grad_norm": 6.535582542419434, "learning_rate": 8.397289342186324e-05, "loss": 0.853, "step": 12458 }, { "epoch": 0.8441628836642049, "grad_norm": 6.530213832855225, "learning_rate": 8.397152440276542e-05, "loss": 1.0661, "step": 12459 }, { "epoch": 0.844230638932177, "grad_norm": 5.963207721710205, "learning_rate": 8.39701553836676e-05, "loss": 0.6558, "step": 12460 }, { "epoch": 0.8442983942001491, "grad_norm": 6.132920742034912, "learning_rate": 8.396878636456978e-05, "loss": 0.7386, "step": 12461 }, { "epoch": 0.8443661494681212, "grad_norm": 4.533850193023682, "learning_rate": 8.396741734547197e-05, "loss": 0.6762, "step": 12462 }, { "epoch": 0.8444339047360933, "grad_norm": 5.42271614074707, "learning_rate": 8.396604832637415e-05, "loss": 0.9324, "step": 12463 }, { "epoch": 0.8445016600040653, "grad_norm": 7.894658088684082, "learning_rate": 8.396467930727633e-05, "loss": 1.0492, "step": 12464 }, { "epoch": 0.8445694152720374, "grad_norm": 6.1358137130737305, "learning_rate": 8.396331028817852e-05, "loss": 0.6701, "step": 12465 }, { "epoch": 0.8446371705400095, "grad_norm": 6.7011871337890625, "learning_rate": 8.396194126908071e-05, "loss": 0.9623, "step": 12466 }, { "epoch": 0.8447049258079816, "grad_norm": 6.27651834487915, "learning_rate": 8.396057224998289e-05, "loss": 0.8553, "step": 12467 }, { "epoch": 0.8447726810759536, "grad_norm": 6.427924156188965, "learning_rate": 8.395920323088507e-05, "loss": 0.9759, "step": 12468 }, { "epoch": 0.8448404363439257, "grad_norm": 6.353015422821045, "learning_rate": 8.395783421178725e-05, "loss": 0.7357, "step": 12469 }, { "epoch": 0.8449081916118978, "grad_norm": 6.444865703582764, "learning_rate": 8.395646519268944e-05, "loss": 0.6417, "step": 12470 }, { "epoch": 0.8449759468798699, "grad_norm": 6.375645160675049, "learning_rate": 8.395509617359162e-05, "loss": 1.0158, "step": 12471 }, { "epoch": 0.845043702147842, "grad_norm": 4.871046543121338, "learning_rate": 8.39537271544938e-05, "loss": 0.7491, "step": 12472 }, { "epoch": 0.8451114574158141, "grad_norm": 5.615902423858643, "learning_rate": 8.3952358135396e-05, "loss": 0.6925, "step": 12473 }, { "epoch": 0.8451792126837862, "grad_norm": 6.724735260009766, "learning_rate": 8.395098911629818e-05, "loss": 0.6802, "step": 12474 }, { "epoch": 0.8452469679517582, "grad_norm": 7.350205421447754, "learning_rate": 8.394962009720036e-05, "loss": 1.1414, "step": 12475 }, { "epoch": 0.8453147232197303, "grad_norm": 5.377062797546387, "learning_rate": 8.394825107810255e-05, "loss": 0.8551, "step": 12476 }, { "epoch": 0.8453824784877024, "grad_norm": 5.765392303466797, "learning_rate": 8.394688205900473e-05, "loss": 0.6638, "step": 12477 }, { "epoch": 0.8454502337556745, "grad_norm": 5.007123947143555, "learning_rate": 8.394551303990691e-05, "loss": 0.7236, "step": 12478 }, { "epoch": 0.8455179890236466, "grad_norm": 6.559772491455078, "learning_rate": 8.39441440208091e-05, "loss": 0.8623, "step": 12479 }, { "epoch": 0.8455857442916187, "grad_norm": 6.448508262634277, "learning_rate": 8.394277500171129e-05, "loss": 0.7342, "step": 12480 }, { "epoch": 0.8456534995595908, "grad_norm": 6.105748653411865, "learning_rate": 8.394140598261347e-05, "loss": 0.7703, "step": 12481 }, { "epoch": 0.8457212548275629, "grad_norm": 6.34128999710083, "learning_rate": 8.394003696351565e-05, "loss": 0.8306, "step": 12482 }, { "epoch": 0.845789010095535, "grad_norm": 6.052567005157471, "learning_rate": 8.393866794441783e-05, "loss": 0.7125, "step": 12483 }, { "epoch": 0.845856765363507, "grad_norm": 5.930888652801514, "learning_rate": 8.393729892532001e-05, "loss": 0.9559, "step": 12484 }, { "epoch": 0.845924520631479, "grad_norm": 5.138692378997803, "learning_rate": 8.39359299062222e-05, "loss": 0.6747, "step": 12485 }, { "epoch": 0.8459922758994511, "grad_norm": 6.211801528930664, "learning_rate": 8.393456088712438e-05, "loss": 0.7852, "step": 12486 }, { "epoch": 0.8460600311674232, "grad_norm": 6.416335105895996, "learning_rate": 8.393319186802656e-05, "loss": 0.9246, "step": 12487 }, { "epoch": 0.8461277864353953, "grad_norm": 5.819685459136963, "learning_rate": 8.393182284892874e-05, "loss": 0.9555, "step": 12488 }, { "epoch": 0.8461955417033674, "grad_norm": 6.297514915466309, "learning_rate": 8.393045382983094e-05, "loss": 0.8673, "step": 12489 }, { "epoch": 0.8462632969713395, "grad_norm": 11.973522186279297, "learning_rate": 8.392908481073312e-05, "loss": 0.9805, "step": 12490 }, { "epoch": 0.8463310522393116, "grad_norm": 7.551254749298096, "learning_rate": 8.39277157916353e-05, "loss": 0.855, "step": 12491 }, { "epoch": 0.8463988075072837, "grad_norm": 5.7769389152526855, "learning_rate": 8.392634677253748e-05, "loss": 0.6064, "step": 12492 }, { "epoch": 0.8464665627752558, "grad_norm": 4.919633388519287, "learning_rate": 8.392497775343966e-05, "loss": 0.7027, "step": 12493 }, { "epoch": 0.8465343180432279, "grad_norm": 5.183913230895996, "learning_rate": 8.392360873434185e-05, "loss": 0.8747, "step": 12494 }, { "epoch": 0.8466020733112, "grad_norm": 5.794585704803467, "learning_rate": 8.392223971524403e-05, "loss": 0.7961, "step": 12495 }, { "epoch": 0.8466698285791721, "grad_norm": 6.117403030395508, "learning_rate": 8.392087069614621e-05, "loss": 0.5892, "step": 12496 }, { "epoch": 0.8467375838471441, "grad_norm": 7.63482666015625, "learning_rate": 8.39195016770484e-05, "loss": 0.8159, "step": 12497 }, { "epoch": 0.8468053391151162, "grad_norm": 5.1039347648620605, "learning_rate": 8.391813265795059e-05, "loss": 0.5934, "step": 12498 }, { "epoch": 0.8468730943830883, "grad_norm": 6.369871616363525, "learning_rate": 8.391676363885277e-05, "loss": 0.9178, "step": 12499 }, { "epoch": 0.8469408496510604, "grad_norm": 6.773191928863525, "learning_rate": 8.391539461975495e-05, "loss": 1.0055, "step": 12500 }, { "epoch": 0.8470086049190324, "grad_norm": 5.994389057159424, "learning_rate": 8.391402560065713e-05, "loss": 0.8812, "step": 12501 }, { "epoch": 0.8470763601870045, "grad_norm": 5.712553977966309, "learning_rate": 8.391265658155931e-05, "loss": 0.7008, "step": 12502 }, { "epoch": 0.8471441154549766, "grad_norm": 5.810184955596924, "learning_rate": 8.39112875624615e-05, "loss": 0.7936, "step": 12503 }, { "epoch": 0.8472118707229487, "grad_norm": 6.312936782836914, "learning_rate": 8.390991854336368e-05, "loss": 0.8583, "step": 12504 }, { "epoch": 0.8472796259909208, "grad_norm": 6.539886474609375, "learning_rate": 8.390854952426586e-05, "loss": 0.7788, "step": 12505 }, { "epoch": 0.8473473812588929, "grad_norm": 7.018226146697998, "learning_rate": 8.390718050516804e-05, "loss": 0.7205, "step": 12506 }, { "epoch": 0.847415136526865, "grad_norm": 6.536552429199219, "learning_rate": 8.390581148607023e-05, "loss": 0.9422, "step": 12507 }, { "epoch": 0.847482891794837, "grad_norm": 6.521510601043701, "learning_rate": 8.390444246697242e-05, "loss": 0.7813, "step": 12508 }, { "epoch": 0.8475506470628091, "grad_norm": 5.533815383911133, "learning_rate": 8.39030734478746e-05, "loss": 0.8097, "step": 12509 }, { "epoch": 0.8476184023307812, "grad_norm": 5.778811931610107, "learning_rate": 8.390170442877678e-05, "loss": 0.7295, "step": 12510 }, { "epoch": 0.8476861575987533, "grad_norm": 5.98907995223999, "learning_rate": 8.390033540967896e-05, "loss": 0.7462, "step": 12511 }, { "epoch": 0.8477539128667254, "grad_norm": 5.19685697555542, "learning_rate": 8.389896639058115e-05, "loss": 0.735, "step": 12512 }, { "epoch": 0.8478216681346975, "grad_norm": 5.980901718139648, "learning_rate": 8.389759737148333e-05, "loss": 0.5655, "step": 12513 }, { "epoch": 0.8478894234026696, "grad_norm": 4.931701183319092, "learning_rate": 8.389622835238551e-05, "loss": 0.6426, "step": 12514 }, { "epoch": 0.8479571786706417, "grad_norm": 5.470427513122559, "learning_rate": 8.38948593332877e-05, "loss": 0.8698, "step": 12515 }, { "epoch": 0.8480249339386138, "grad_norm": 8.906782150268555, "learning_rate": 8.389349031418989e-05, "loss": 0.9118, "step": 12516 }, { "epoch": 0.8480926892065858, "grad_norm": 7.61644172668457, "learning_rate": 8.389212129509207e-05, "loss": 1.0529, "step": 12517 }, { "epoch": 0.8481604444745579, "grad_norm": 6.867774963378906, "learning_rate": 8.389075227599425e-05, "loss": 0.6104, "step": 12518 }, { "epoch": 0.8482281997425299, "grad_norm": 7.476839065551758, "learning_rate": 8.388938325689644e-05, "loss": 1.036, "step": 12519 }, { "epoch": 0.848295955010502, "grad_norm": 6.415992259979248, "learning_rate": 8.388801423779862e-05, "loss": 0.9505, "step": 12520 }, { "epoch": 0.8483637102784741, "grad_norm": 6.049834251403809, "learning_rate": 8.38866452187008e-05, "loss": 0.7437, "step": 12521 }, { "epoch": 0.8484314655464462, "grad_norm": 5.392312526702881, "learning_rate": 8.3885276199603e-05, "loss": 0.7305, "step": 12522 }, { "epoch": 0.8484992208144183, "grad_norm": 6.002782821655273, "learning_rate": 8.388390718050518e-05, "loss": 0.6241, "step": 12523 }, { "epoch": 0.8485669760823904, "grad_norm": 7.464806079864502, "learning_rate": 8.388253816140736e-05, "loss": 1.0126, "step": 12524 }, { "epoch": 0.8486347313503625, "grad_norm": 6.660724639892578, "learning_rate": 8.388116914230954e-05, "loss": 0.8958, "step": 12525 }, { "epoch": 0.8487024866183346, "grad_norm": 6.646303176879883, "learning_rate": 8.387980012321173e-05, "loss": 0.951, "step": 12526 }, { "epoch": 0.8487702418863067, "grad_norm": 6.518253803253174, "learning_rate": 8.387843110411391e-05, "loss": 0.8785, "step": 12527 }, { "epoch": 0.8488379971542788, "grad_norm": 4.314852237701416, "learning_rate": 8.38770620850161e-05, "loss": 0.8498, "step": 12528 }, { "epoch": 0.8489057524222509, "grad_norm": 5.918713092803955, "learning_rate": 8.387569306591827e-05, "loss": 0.9178, "step": 12529 }, { "epoch": 0.848973507690223, "grad_norm": 7.2677788734436035, "learning_rate": 8.387432404682045e-05, "loss": 0.8692, "step": 12530 }, { "epoch": 0.849041262958195, "grad_norm": 5.739150524139404, "learning_rate": 8.387295502772265e-05, "loss": 0.7924, "step": 12531 }, { "epoch": 0.8491090182261671, "grad_norm": 5.6296000480651855, "learning_rate": 8.387158600862483e-05, "loss": 0.7735, "step": 12532 }, { "epoch": 0.8491767734941391, "grad_norm": 5.1605000495910645, "learning_rate": 8.387021698952701e-05, "loss": 0.7029, "step": 12533 }, { "epoch": 0.8492445287621112, "grad_norm": 6.645575046539307, "learning_rate": 8.386884797042919e-05, "loss": 1.0384, "step": 12534 }, { "epoch": 0.8493122840300833, "grad_norm": 6.592693328857422, "learning_rate": 8.386747895133138e-05, "loss": 0.7987, "step": 12535 }, { "epoch": 0.8493800392980554, "grad_norm": 7.588740825653076, "learning_rate": 8.386610993223356e-05, "loss": 0.6359, "step": 12536 }, { "epoch": 0.8494477945660275, "grad_norm": 4.178662300109863, "learning_rate": 8.386474091313574e-05, "loss": 0.7081, "step": 12537 }, { "epoch": 0.8495155498339996, "grad_norm": 5.266997814178467, "learning_rate": 8.386337189403792e-05, "loss": 0.688, "step": 12538 }, { "epoch": 0.8495833051019717, "grad_norm": 5.189149379730225, "learning_rate": 8.38620028749401e-05, "loss": 0.8713, "step": 12539 }, { "epoch": 0.8496510603699438, "grad_norm": 5.311150074005127, "learning_rate": 8.38606338558423e-05, "loss": 0.7709, "step": 12540 }, { "epoch": 0.8497188156379158, "grad_norm": 4.873614311218262, "learning_rate": 8.385926483674448e-05, "loss": 0.8112, "step": 12541 }, { "epoch": 0.8497865709058879, "grad_norm": 6.431467056274414, "learning_rate": 8.385789581764666e-05, "loss": 0.9818, "step": 12542 }, { "epoch": 0.84985432617386, "grad_norm": 7.026850700378418, "learning_rate": 8.385652679854884e-05, "loss": 0.7773, "step": 12543 }, { "epoch": 0.8499220814418321, "grad_norm": 5.634081840515137, "learning_rate": 8.385515777945103e-05, "loss": 0.8703, "step": 12544 }, { "epoch": 0.8499898367098042, "grad_norm": 5.535772800445557, "learning_rate": 8.385378876035321e-05, "loss": 0.6963, "step": 12545 }, { "epoch": 0.8500575919777763, "grad_norm": 6.581893444061279, "learning_rate": 8.38524197412554e-05, "loss": 0.7044, "step": 12546 }, { "epoch": 0.8501253472457484, "grad_norm": 5.496252536773682, "learning_rate": 8.385105072215757e-05, "loss": 0.7298, "step": 12547 }, { "epoch": 0.8501931025137205, "grad_norm": 6.205685615539551, "learning_rate": 8.384968170305975e-05, "loss": 0.7728, "step": 12548 }, { "epoch": 0.8502608577816926, "grad_norm": 7.986889362335205, "learning_rate": 8.384831268396195e-05, "loss": 0.767, "step": 12549 }, { "epoch": 0.8503286130496646, "grad_norm": 6.026723384857178, "learning_rate": 8.384694366486413e-05, "loss": 0.9097, "step": 12550 }, { "epoch": 0.8503963683176367, "grad_norm": 5.750411510467529, "learning_rate": 8.384557464576631e-05, "loss": 0.8567, "step": 12551 }, { "epoch": 0.8504641235856087, "grad_norm": 6.5039896965026855, "learning_rate": 8.384420562666849e-05, "loss": 0.882, "step": 12552 }, { "epoch": 0.8505318788535808, "grad_norm": 6.847061634063721, "learning_rate": 8.384283660757068e-05, "loss": 0.5942, "step": 12553 }, { "epoch": 0.8505996341215529, "grad_norm": 4.668815612792969, "learning_rate": 8.384146758847286e-05, "loss": 0.5723, "step": 12554 }, { "epoch": 0.850667389389525, "grad_norm": 7.477560043334961, "learning_rate": 8.384009856937504e-05, "loss": 0.9085, "step": 12555 }, { "epoch": 0.8507351446574971, "grad_norm": 5.242186069488525, "learning_rate": 8.383872955027722e-05, "loss": 0.5811, "step": 12556 }, { "epoch": 0.8508028999254692, "grad_norm": 6.460277080535889, "learning_rate": 8.38373605311794e-05, "loss": 0.6622, "step": 12557 }, { "epoch": 0.8508706551934413, "grad_norm": 8.43552017211914, "learning_rate": 8.38359915120816e-05, "loss": 0.6877, "step": 12558 }, { "epoch": 0.8509384104614134, "grad_norm": 5.636725902557373, "learning_rate": 8.383462249298378e-05, "loss": 0.7697, "step": 12559 }, { "epoch": 0.8510061657293855, "grad_norm": 7.544033050537109, "learning_rate": 8.383325347388596e-05, "loss": 1.031, "step": 12560 }, { "epoch": 0.8510739209973576, "grad_norm": 10.117722511291504, "learning_rate": 8.383188445478814e-05, "loss": 1.1007, "step": 12561 }, { "epoch": 0.8511416762653297, "grad_norm": 5.651546955108643, "learning_rate": 8.383051543569032e-05, "loss": 0.6959, "step": 12562 }, { "epoch": 0.8512094315333018, "grad_norm": 7.87699556350708, "learning_rate": 8.382914641659251e-05, "loss": 0.873, "step": 12563 }, { "epoch": 0.8512771868012738, "grad_norm": 5.291513442993164, "learning_rate": 8.38277773974947e-05, "loss": 0.6965, "step": 12564 }, { "epoch": 0.8513449420692459, "grad_norm": 5.983782768249512, "learning_rate": 8.382640837839687e-05, "loss": 0.908, "step": 12565 }, { "epoch": 0.8514126973372179, "grad_norm": 4.902809143066406, "learning_rate": 8.382503935929907e-05, "loss": 0.6924, "step": 12566 }, { "epoch": 0.85148045260519, "grad_norm": 4.983574390411377, "learning_rate": 8.382367034020125e-05, "loss": 0.7984, "step": 12567 }, { "epoch": 0.8515482078731621, "grad_norm": 6.039658069610596, "learning_rate": 8.382230132110343e-05, "loss": 0.683, "step": 12568 }, { "epoch": 0.8516159631411342, "grad_norm": 5.577428340911865, "learning_rate": 8.382093230200562e-05, "loss": 0.8412, "step": 12569 }, { "epoch": 0.8516837184091063, "grad_norm": 6.592475891113281, "learning_rate": 8.38195632829078e-05, "loss": 0.6641, "step": 12570 }, { "epoch": 0.8517514736770784, "grad_norm": 5.534562587738037, "learning_rate": 8.381819426380998e-05, "loss": 0.9195, "step": 12571 }, { "epoch": 0.8518192289450505, "grad_norm": 5.535101413726807, "learning_rate": 8.381682524471218e-05, "loss": 0.723, "step": 12572 }, { "epoch": 0.8518869842130226, "grad_norm": 6.00454044342041, "learning_rate": 8.381545622561436e-05, "loss": 0.6079, "step": 12573 }, { "epoch": 0.8519547394809946, "grad_norm": 6.607147216796875, "learning_rate": 8.381408720651654e-05, "loss": 0.9253, "step": 12574 }, { "epoch": 0.8520224947489667, "grad_norm": 5.750394821166992, "learning_rate": 8.381271818741872e-05, "loss": 0.689, "step": 12575 }, { "epoch": 0.8520902500169388, "grad_norm": 6.017903804779053, "learning_rate": 8.381134916832091e-05, "loss": 0.8317, "step": 12576 }, { "epoch": 0.8521580052849109, "grad_norm": 6.839371681213379, "learning_rate": 8.380998014922309e-05, "loss": 0.9688, "step": 12577 }, { "epoch": 0.852225760552883, "grad_norm": 5.614028453826904, "learning_rate": 8.380861113012527e-05, "loss": 0.5495, "step": 12578 }, { "epoch": 0.8522935158208551, "grad_norm": 6.759237766265869, "learning_rate": 8.380724211102745e-05, "loss": 0.7634, "step": 12579 }, { "epoch": 0.8523612710888272, "grad_norm": 5.667263031005859, "learning_rate": 8.380587309192963e-05, "loss": 0.8539, "step": 12580 }, { "epoch": 0.8524290263567993, "grad_norm": 5.995064735412598, "learning_rate": 8.380450407283183e-05, "loss": 0.7092, "step": 12581 }, { "epoch": 0.8524967816247713, "grad_norm": 6.612915515899658, "learning_rate": 8.380313505373401e-05, "loss": 0.822, "step": 12582 }, { "epoch": 0.8525645368927434, "grad_norm": 5.579259395599365, "learning_rate": 8.380176603463619e-05, "loss": 0.682, "step": 12583 }, { "epoch": 0.8526322921607155, "grad_norm": 6.590492248535156, "learning_rate": 8.380039701553837e-05, "loss": 0.7243, "step": 12584 }, { "epoch": 0.8527000474286875, "grad_norm": 4.9304680824279785, "learning_rate": 8.379902799644055e-05, "loss": 0.8079, "step": 12585 }, { "epoch": 0.8527678026966596, "grad_norm": 8.054821014404297, "learning_rate": 8.379765897734274e-05, "loss": 0.9659, "step": 12586 }, { "epoch": 0.8528355579646317, "grad_norm": 5.432389736175537, "learning_rate": 8.379628995824492e-05, "loss": 0.7316, "step": 12587 }, { "epoch": 0.8529033132326038, "grad_norm": 5.523181915283203, "learning_rate": 8.37949209391471e-05, "loss": 0.9273, "step": 12588 }, { "epoch": 0.8529710685005759, "grad_norm": 6.227122783660889, "learning_rate": 8.379355192004928e-05, "loss": 0.7196, "step": 12589 }, { "epoch": 0.853038823768548, "grad_norm": 6.5586371421813965, "learning_rate": 8.379218290095148e-05, "loss": 0.793, "step": 12590 }, { "epoch": 0.8531065790365201, "grad_norm": 6.881863594055176, "learning_rate": 8.379081388185366e-05, "loss": 0.6888, "step": 12591 }, { "epoch": 0.8531743343044922, "grad_norm": 6.569089889526367, "learning_rate": 8.378944486275584e-05, "loss": 1.0791, "step": 12592 }, { "epoch": 0.8532420895724643, "grad_norm": 4.835020542144775, "learning_rate": 8.378807584365802e-05, "loss": 0.866, "step": 12593 }, { "epoch": 0.8533098448404364, "grad_norm": 5.276635646820068, "learning_rate": 8.37867068245602e-05, "loss": 0.5689, "step": 12594 }, { "epoch": 0.8533776001084085, "grad_norm": 5.557784080505371, "learning_rate": 8.378533780546239e-05, "loss": 0.8455, "step": 12595 }, { "epoch": 0.8534453553763806, "grad_norm": 6.33231258392334, "learning_rate": 8.378396878636457e-05, "loss": 0.6639, "step": 12596 }, { "epoch": 0.8535131106443526, "grad_norm": 6.476678371429443, "learning_rate": 8.378259976726675e-05, "loss": 0.6786, "step": 12597 }, { "epoch": 0.8535808659123247, "grad_norm": 5.376199245452881, "learning_rate": 8.378123074816893e-05, "loss": 0.7915, "step": 12598 }, { "epoch": 0.8536486211802967, "grad_norm": 6.826034069061279, "learning_rate": 8.377986172907113e-05, "loss": 0.7511, "step": 12599 }, { "epoch": 0.8537163764482688, "grad_norm": 5.847662925720215, "learning_rate": 8.377849270997331e-05, "loss": 0.7095, "step": 12600 }, { "epoch": 0.8537841317162409, "grad_norm": 5.112993240356445, "learning_rate": 8.377712369087549e-05, "loss": 0.5066, "step": 12601 }, { "epoch": 0.853851886984213, "grad_norm": 8.529839515686035, "learning_rate": 8.377575467177767e-05, "loss": 0.8007, "step": 12602 }, { "epoch": 0.8539196422521851, "grad_norm": 5.600361347198486, "learning_rate": 8.377438565267985e-05, "loss": 0.847, "step": 12603 }, { "epoch": 0.8539873975201572, "grad_norm": 7.553412914276123, "learning_rate": 8.377301663358204e-05, "loss": 0.8847, "step": 12604 }, { "epoch": 0.8540551527881293, "grad_norm": 4.93065881729126, "learning_rate": 8.377164761448422e-05, "loss": 0.5527, "step": 12605 }, { "epoch": 0.8541229080561014, "grad_norm": 6.301657676696777, "learning_rate": 8.37702785953864e-05, "loss": 0.9723, "step": 12606 }, { "epoch": 0.8541906633240735, "grad_norm": 6.898577690124512, "learning_rate": 8.376890957628858e-05, "loss": 0.6861, "step": 12607 }, { "epoch": 0.8542584185920455, "grad_norm": 6.174037933349609, "learning_rate": 8.376754055719078e-05, "loss": 0.8906, "step": 12608 }, { "epoch": 0.8543261738600176, "grad_norm": 5.768240928649902, "learning_rate": 8.376617153809296e-05, "loss": 0.8712, "step": 12609 }, { "epoch": 0.8543939291279897, "grad_norm": 5.855865001678467, "learning_rate": 8.376480251899514e-05, "loss": 0.8333, "step": 12610 }, { "epoch": 0.8544616843959618, "grad_norm": 5.274192810058594, "learning_rate": 8.376343349989732e-05, "loss": 0.6786, "step": 12611 }, { "epoch": 0.8545294396639339, "grad_norm": 5.603989601135254, "learning_rate": 8.376206448079951e-05, "loss": 0.7428, "step": 12612 }, { "epoch": 0.854597194931906, "grad_norm": 4.8484883308410645, "learning_rate": 8.37606954617017e-05, "loss": 0.7456, "step": 12613 }, { "epoch": 0.8546649501998781, "grad_norm": 5.10874605178833, "learning_rate": 8.375932644260387e-05, "loss": 0.6326, "step": 12614 }, { "epoch": 0.8547327054678501, "grad_norm": 4.878294944763184, "learning_rate": 8.375795742350607e-05, "loss": 0.6428, "step": 12615 }, { "epoch": 0.8548004607358222, "grad_norm": 7.003088474273682, "learning_rate": 8.375658840440825e-05, "loss": 0.8246, "step": 12616 }, { "epoch": 0.8548682160037943, "grad_norm": 7.047825336456299, "learning_rate": 8.375521938531043e-05, "loss": 0.833, "step": 12617 }, { "epoch": 0.8549359712717663, "grad_norm": 9.071378707885742, "learning_rate": 8.375385036621262e-05, "loss": 0.8094, "step": 12618 }, { "epoch": 0.8550037265397384, "grad_norm": 5.345335960388184, "learning_rate": 8.37524813471148e-05, "loss": 0.79, "step": 12619 }, { "epoch": 0.8550714818077105, "grad_norm": 5.579035758972168, "learning_rate": 8.375111232801698e-05, "loss": 0.7526, "step": 12620 }, { "epoch": 0.8551392370756826, "grad_norm": 6.771716117858887, "learning_rate": 8.374974330891916e-05, "loss": 0.7558, "step": 12621 }, { "epoch": 0.8552069923436547, "grad_norm": 7.533701419830322, "learning_rate": 8.374837428982136e-05, "loss": 1.1432, "step": 12622 }, { "epoch": 0.8552747476116268, "grad_norm": 7.239196300506592, "learning_rate": 8.374700527072354e-05, "loss": 1.1609, "step": 12623 }, { "epoch": 0.8553425028795989, "grad_norm": 5.9439544677734375, "learning_rate": 8.374563625162572e-05, "loss": 0.8908, "step": 12624 }, { "epoch": 0.855410258147571, "grad_norm": 7.268623352050781, "learning_rate": 8.37442672325279e-05, "loss": 1.2071, "step": 12625 }, { "epoch": 0.8554780134155431, "grad_norm": 8.557291984558105, "learning_rate": 8.374289821343008e-05, "loss": 1.0076, "step": 12626 }, { "epoch": 0.8555457686835152, "grad_norm": 5.16387939453125, "learning_rate": 8.374152919433227e-05, "loss": 0.7862, "step": 12627 }, { "epoch": 0.8556135239514873, "grad_norm": 5.909477710723877, "learning_rate": 8.374016017523445e-05, "loss": 0.8143, "step": 12628 }, { "epoch": 0.8556812792194594, "grad_norm": 6.213109016418457, "learning_rate": 8.373879115613663e-05, "loss": 0.8753, "step": 12629 }, { "epoch": 0.8557490344874314, "grad_norm": 6.664327621459961, "learning_rate": 8.373742213703881e-05, "loss": 0.7626, "step": 12630 }, { "epoch": 0.8558167897554034, "grad_norm": 5.750275135040283, "learning_rate": 8.373605311794101e-05, "loss": 0.8285, "step": 12631 }, { "epoch": 0.8558845450233755, "grad_norm": 6.6249237060546875, "learning_rate": 8.373468409884319e-05, "loss": 0.9556, "step": 12632 }, { "epoch": 0.8559523002913476, "grad_norm": 5.466978549957275, "learning_rate": 8.373331507974537e-05, "loss": 0.9843, "step": 12633 }, { "epoch": 0.8560200555593197, "grad_norm": 7.178730487823486, "learning_rate": 8.373194606064755e-05, "loss": 0.9921, "step": 12634 }, { "epoch": 0.8560878108272918, "grad_norm": 5.387322902679443, "learning_rate": 8.373057704154973e-05, "loss": 0.8924, "step": 12635 }, { "epoch": 0.8561555660952639, "grad_norm": 6.278534889221191, "learning_rate": 8.372920802245192e-05, "loss": 1.05, "step": 12636 }, { "epoch": 0.856223321363236, "grad_norm": 6.567953586578369, "learning_rate": 8.37278390033541e-05, "loss": 0.9592, "step": 12637 }, { "epoch": 0.8562910766312081, "grad_norm": 4.848402976989746, "learning_rate": 8.372646998425628e-05, "loss": 0.7223, "step": 12638 }, { "epoch": 0.8563588318991802, "grad_norm": 5.792343616485596, "learning_rate": 8.372510096515846e-05, "loss": 0.8339, "step": 12639 }, { "epoch": 0.8564265871671523, "grad_norm": 7.278899669647217, "learning_rate": 8.372373194606064e-05, "loss": 0.9728, "step": 12640 }, { "epoch": 0.8564943424351243, "grad_norm": 6.241243839263916, "learning_rate": 8.372236292696284e-05, "loss": 0.6706, "step": 12641 }, { "epoch": 0.8565620977030964, "grad_norm": 4.546154022216797, "learning_rate": 8.372099390786502e-05, "loss": 0.6978, "step": 12642 }, { "epoch": 0.8566298529710685, "grad_norm": 5.440952777862549, "learning_rate": 8.37196248887672e-05, "loss": 0.5912, "step": 12643 }, { "epoch": 0.8566976082390406, "grad_norm": 5.9767656326293945, "learning_rate": 8.371825586966938e-05, "loss": 0.8739, "step": 12644 }, { "epoch": 0.8567653635070127, "grad_norm": 5.622544288635254, "learning_rate": 8.371688685057157e-05, "loss": 0.9851, "step": 12645 }, { "epoch": 0.8568331187749848, "grad_norm": 4.423412322998047, "learning_rate": 8.371551783147375e-05, "loss": 0.578, "step": 12646 }, { "epoch": 0.8569008740429569, "grad_norm": 7.680568695068359, "learning_rate": 8.371414881237593e-05, "loss": 0.8078, "step": 12647 }, { "epoch": 0.8569686293109289, "grad_norm": 5.702794075012207, "learning_rate": 8.371277979327811e-05, "loss": 0.734, "step": 12648 }, { "epoch": 0.857036384578901, "grad_norm": 6.302651882171631, "learning_rate": 8.37114107741803e-05, "loss": 0.7974, "step": 12649 }, { "epoch": 0.8571041398468731, "grad_norm": 6.76057243347168, "learning_rate": 8.371004175508249e-05, "loss": 0.8451, "step": 12650 }, { "epoch": 0.8571718951148452, "grad_norm": 5.101822376251221, "learning_rate": 8.370867273598467e-05, "loss": 0.7528, "step": 12651 }, { "epoch": 0.8572396503828172, "grad_norm": 5.3127641677856445, "learning_rate": 8.370730371688685e-05, "loss": 0.7881, "step": 12652 }, { "epoch": 0.8573074056507893, "grad_norm": 6.015966892242432, "learning_rate": 8.370593469778903e-05, "loss": 0.6502, "step": 12653 }, { "epoch": 0.8573751609187614, "grad_norm": 4.573479175567627, "learning_rate": 8.370456567869122e-05, "loss": 0.5229, "step": 12654 }, { "epoch": 0.8574429161867335, "grad_norm": 7.478061676025391, "learning_rate": 8.37031966595934e-05, "loss": 1.1038, "step": 12655 }, { "epoch": 0.8575106714547056, "grad_norm": 6.543705940246582, "learning_rate": 8.370182764049558e-05, "loss": 0.733, "step": 12656 }, { "epoch": 0.8575784267226777, "grad_norm": 5.073485851287842, "learning_rate": 8.370045862139776e-05, "loss": 0.8401, "step": 12657 }, { "epoch": 0.8576461819906498, "grad_norm": 6.257830619812012, "learning_rate": 8.369908960229996e-05, "loss": 0.709, "step": 12658 }, { "epoch": 0.8577139372586219, "grad_norm": 7.744876384735107, "learning_rate": 8.369772058320214e-05, "loss": 0.7146, "step": 12659 }, { "epoch": 0.857781692526594, "grad_norm": 6.404613971710205, "learning_rate": 8.369635156410432e-05, "loss": 0.7723, "step": 12660 }, { "epoch": 0.8578494477945661, "grad_norm": 7.350032806396484, "learning_rate": 8.369498254500651e-05, "loss": 0.9622, "step": 12661 }, { "epoch": 0.8579172030625382, "grad_norm": 7.5004191398620605, "learning_rate": 8.369361352590869e-05, "loss": 0.8981, "step": 12662 }, { "epoch": 0.8579849583305103, "grad_norm": 6.804741382598877, "learning_rate": 8.369224450681087e-05, "loss": 0.7108, "step": 12663 }, { "epoch": 0.8580527135984822, "grad_norm": 6.440323829650879, "learning_rate": 8.369087548771307e-05, "loss": 0.7311, "step": 12664 }, { "epoch": 0.8581204688664543, "grad_norm": 8.581847190856934, "learning_rate": 8.368950646861525e-05, "loss": 0.9141, "step": 12665 }, { "epoch": 0.8581882241344264, "grad_norm": 9.157425880432129, "learning_rate": 8.368813744951743e-05, "loss": 0.8747, "step": 12666 }, { "epoch": 0.8582559794023985, "grad_norm": 5.952433109283447, "learning_rate": 8.368676843041961e-05, "loss": 0.6971, "step": 12667 }, { "epoch": 0.8583237346703706, "grad_norm": 7.463055610656738, "learning_rate": 8.36853994113218e-05, "loss": 0.8685, "step": 12668 }, { "epoch": 0.8583914899383427, "grad_norm": 6.422219753265381, "learning_rate": 8.368403039222398e-05, "loss": 0.7494, "step": 12669 }, { "epoch": 0.8584592452063148, "grad_norm": 7.621025085449219, "learning_rate": 8.368266137312616e-05, "loss": 0.7981, "step": 12670 }, { "epoch": 0.8585270004742869, "grad_norm": 5.8451690673828125, "learning_rate": 8.368129235402834e-05, "loss": 0.6134, "step": 12671 }, { "epoch": 0.858594755742259, "grad_norm": 6.098859786987305, "learning_rate": 8.367992333493052e-05, "loss": 0.7698, "step": 12672 }, { "epoch": 0.858662511010231, "grad_norm": 7.19790506362915, "learning_rate": 8.367855431583272e-05, "loss": 0.8783, "step": 12673 }, { "epoch": 0.8587302662782031, "grad_norm": 7.8366899490356445, "learning_rate": 8.36771852967349e-05, "loss": 0.8574, "step": 12674 }, { "epoch": 0.8587980215461752, "grad_norm": 6.070743083953857, "learning_rate": 8.367581627763708e-05, "loss": 0.9785, "step": 12675 }, { "epoch": 0.8588657768141473, "grad_norm": 7.112607002258301, "learning_rate": 8.367444725853926e-05, "loss": 0.8502, "step": 12676 }, { "epoch": 0.8589335320821194, "grad_norm": 5.572093963623047, "learning_rate": 8.367307823944145e-05, "loss": 0.9328, "step": 12677 }, { "epoch": 0.8590012873500915, "grad_norm": 5.674850940704346, "learning_rate": 8.367170922034363e-05, "loss": 0.8259, "step": 12678 }, { "epoch": 0.8590690426180636, "grad_norm": 5.8405890464782715, "learning_rate": 8.367034020124581e-05, "loss": 0.6563, "step": 12679 }, { "epoch": 0.8591367978860356, "grad_norm": 5.2222981452941895, "learning_rate": 8.366897118214799e-05, "loss": 0.7247, "step": 12680 }, { "epoch": 0.8592045531540077, "grad_norm": 5.854241847991943, "learning_rate": 8.366760216305017e-05, "loss": 0.5706, "step": 12681 }, { "epoch": 0.8592723084219798, "grad_norm": 6.589792251586914, "learning_rate": 8.366623314395237e-05, "loss": 0.6776, "step": 12682 }, { "epoch": 0.8593400636899519, "grad_norm": 5.009568691253662, "learning_rate": 8.366486412485455e-05, "loss": 0.7079, "step": 12683 }, { "epoch": 0.859407818957924, "grad_norm": 5.921725749969482, "learning_rate": 8.366349510575673e-05, "loss": 0.9658, "step": 12684 }, { "epoch": 0.859475574225896, "grad_norm": 7.070530414581299, "learning_rate": 8.366212608665891e-05, "loss": 0.7979, "step": 12685 }, { "epoch": 0.8595433294938681, "grad_norm": 8.296648979187012, "learning_rate": 8.36607570675611e-05, "loss": 0.8313, "step": 12686 }, { "epoch": 0.8596110847618402, "grad_norm": 5.253691673278809, "learning_rate": 8.365938804846328e-05, "loss": 0.7979, "step": 12687 }, { "epoch": 0.8596788400298123, "grad_norm": 6.794055461883545, "learning_rate": 8.365801902936546e-05, "loss": 0.8337, "step": 12688 }, { "epoch": 0.8597465952977844, "grad_norm": 5.086427211761475, "learning_rate": 8.365665001026764e-05, "loss": 0.4667, "step": 12689 }, { "epoch": 0.8598143505657565, "grad_norm": 5.407829761505127, "learning_rate": 8.365528099116982e-05, "loss": 0.7093, "step": 12690 }, { "epoch": 0.8598821058337286, "grad_norm": 5.971084117889404, "learning_rate": 8.365391197207202e-05, "loss": 1.0033, "step": 12691 }, { "epoch": 0.8599498611017007, "grad_norm": 5.477384090423584, "learning_rate": 8.36525429529742e-05, "loss": 0.5806, "step": 12692 }, { "epoch": 0.8600176163696728, "grad_norm": 7.340150833129883, "learning_rate": 8.365117393387638e-05, "loss": 0.7215, "step": 12693 }, { "epoch": 0.8600853716376449, "grad_norm": 5.252416133880615, "learning_rate": 8.364980491477856e-05, "loss": 0.7346, "step": 12694 }, { "epoch": 0.860153126905617, "grad_norm": 7.507197380065918, "learning_rate": 8.364843589568074e-05, "loss": 1.0633, "step": 12695 }, { "epoch": 0.860220882173589, "grad_norm": 4.731564998626709, "learning_rate": 8.364706687658293e-05, "loss": 0.5687, "step": 12696 }, { "epoch": 0.860288637441561, "grad_norm": 6.081350803375244, "learning_rate": 8.364569785748511e-05, "loss": 0.7199, "step": 12697 }, { "epoch": 0.8603563927095331, "grad_norm": 6.91575288772583, "learning_rate": 8.36443288383873e-05, "loss": 0.9717, "step": 12698 }, { "epoch": 0.8604241479775052, "grad_norm": 5.101013660430908, "learning_rate": 8.364295981928947e-05, "loss": 0.772, "step": 12699 }, { "epoch": 0.8604919032454773, "grad_norm": 6.185006141662598, "learning_rate": 8.364159080019167e-05, "loss": 0.601, "step": 12700 }, { "epoch": 0.8605596585134494, "grad_norm": 6.696321487426758, "learning_rate": 8.364022178109385e-05, "loss": 0.8923, "step": 12701 }, { "epoch": 0.8606274137814215, "grad_norm": 4.8003129959106445, "learning_rate": 8.363885276199603e-05, "loss": 0.656, "step": 12702 }, { "epoch": 0.8606951690493936, "grad_norm": 8.93622875213623, "learning_rate": 8.363748374289821e-05, "loss": 0.8062, "step": 12703 }, { "epoch": 0.8607629243173657, "grad_norm": 5.659854412078857, "learning_rate": 8.36361147238004e-05, "loss": 0.8201, "step": 12704 }, { "epoch": 0.8608306795853378, "grad_norm": 6.121464729309082, "learning_rate": 8.363474570470258e-05, "loss": 0.8329, "step": 12705 }, { "epoch": 0.8608984348533099, "grad_norm": 5.242605686187744, "learning_rate": 8.363337668560476e-05, "loss": 0.7151, "step": 12706 }, { "epoch": 0.860966190121282, "grad_norm": 5.1735382080078125, "learning_rate": 8.363200766650696e-05, "loss": 0.7501, "step": 12707 }, { "epoch": 0.861033945389254, "grad_norm": 7.21480655670166, "learning_rate": 8.363063864740914e-05, "loss": 0.9973, "step": 12708 }, { "epoch": 0.8611017006572261, "grad_norm": 6.435115814208984, "learning_rate": 8.362926962831132e-05, "loss": 0.7217, "step": 12709 }, { "epoch": 0.8611694559251982, "grad_norm": 6.879031658172607, "learning_rate": 8.362790060921351e-05, "loss": 0.9322, "step": 12710 }, { "epoch": 0.8612372111931703, "grad_norm": 5.913161277770996, "learning_rate": 8.362653159011569e-05, "loss": 0.6451, "step": 12711 }, { "epoch": 0.8613049664611424, "grad_norm": 4.3174920082092285, "learning_rate": 8.362516257101787e-05, "loss": 0.682, "step": 12712 }, { "epoch": 0.8613727217291144, "grad_norm": 5.966519832611084, "learning_rate": 8.362379355192005e-05, "loss": 0.6362, "step": 12713 }, { "epoch": 0.8614404769970865, "grad_norm": 6.3389410972595215, "learning_rate": 8.362242453282225e-05, "loss": 0.7318, "step": 12714 }, { "epoch": 0.8615082322650586, "grad_norm": 8.78852653503418, "learning_rate": 8.362105551372443e-05, "loss": 0.7471, "step": 12715 }, { "epoch": 0.8615759875330307, "grad_norm": 5.232541084289551, "learning_rate": 8.361968649462661e-05, "loss": 0.7463, "step": 12716 }, { "epoch": 0.8616437428010028, "grad_norm": 6.598004341125488, "learning_rate": 8.361831747552879e-05, "loss": 0.7651, "step": 12717 }, { "epoch": 0.8617114980689748, "grad_norm": 6.944410800933838, "learning_rate": 8.361694845643097e-05, "loss": 0.8616, "step": 12718 }, { "epoch": 0.8617792533369469, "grad_norm": 5.001755714416504, "learning_rate": 8.361557943733316e-05, "loss": 0.6435, "step": 12719 }, { "epoch": 0.861847008604919, "grad_norm": 5.759768486022949, "learning_rate": 8.361421041823534e-05, "loss": 0.8236, "step": 12720 }, { "epoch": 0.8619147638728911, "grad_norm": 7.86954927444458, "learning_rate": 8.361284139913752e-05, "loss": 0.9191, "step": 12721 }, { "epoch": 0.8619825191408632, "grad_norm": 7.7088541984558105, "learning_rate": 8.36114723800397e-05, "loss": 0.874, "step": 12722 }, { "epoch": 0.8620502744088353, "grad_norm": 5.526132583618164, "learning_rate": 8.36101033609419e-05, "loss": 0.8083, "step": 12723 }, { "epoch": 0.8621180296768074, "grad_norm": 6.153379440307617, "learning_rate": 8.360873434184408e-05, "loss": 0.8097, "step": 12724 }, { "epoch": 0.8621857849447795, "grad_norm": 6.498526573181152, "learning_rate": 8.360736532274626e-05, "loss": 0.8441, "step": 12725 }, { "epoch": 0.8622535402127516, "grad_norm": 6.869676113128662, "learning_rate": 8.360599630364844e-05, "loss": 0.8353, "step": 12726 }, { "epoch": 0.8623212954807237, "grad_norm": 5.733700275421143, "learning_rate": 8.360462728455062e-05, "loss": 0.6627, "step": 12727 }, { "epoch": 0.8623890507486958, "grad_norm": 5.4007415771484375, "learning_rate": 8.360325826545281e-05, "loss": 0.5945, "step": 12728 }, { "epoch": 0.8624568060166677, "grad_norm": 5.028225898742676, "learning_rate": 8.360188924635499e-05, "loss": 0.7625, "step": 12729 }, { "epoch": 0.8625245612846398, "grad_norm": 6.950478553771973, "learning_rate": 8.360052022725717e-05, "loss": 0.9458, "step": 12730 }, { "epoch": 0.8625923165526119, "grad_norm": 5.532278060913086, "learning_rate": 8.359915120815935e-05, "loss": 0.7427, "step": 12731 }, { "epoch": 0.862660071820584, "grad_norm": 6.450150966644287, "learning_rate": 8.359778218906155e-05, "loss": 0.7621, "step": 12732 }, { "epoch": 0.8627278270885561, "grad_norm": 8.56269645690918, "learning_rate": 8.359641316996373e-05, "loss": 1.0904, "step": 12733 }, { "epoch": 0.8627955823565282, "grad_norm": 5.204941272735596, "learning_rate": 8.359504415086591e-05, "loss": 0.6651, "step": 12734 }, { "epoch": 0.8628633376245003, "grad_norm": 6.284374237060547, "learning_rate": 8.359367513176809e-05, "loss": 0.7062, "step": 12735 }, { "epoch": 0.8629310928924724, "grad_norm": 5.432925224304199, "learning_rate": 8.359230611267027e-05, "loss": 0.7484, "step": 12736 }, { "epoch": 0.8629988481604445, "grad_norm": 8.31678581237793, "learning_rate": 8.359093709357246e-05, "loss": 0.7803, "step": 12737 }, { "epoch": 0.8630666034284166, "grad_norm": 5.666910171508789, "learning_rate": 8.358956807447464e-05, "loss": 0.6977, "step": 12738 }, { "epoch": 0.8631343586963887, "grad_norm": 5.904063701629639, "learning_rate": 8.358819905537682e-05, "loss": 0.9111, "step": 12739 }, { "epoch": 0.8632021139643608, "grad_norm": 5.75467586517334, "learning_rate": 8.3586830036279e-05, "loss": 0.8442, "step": 12740 }, { "epoch": 0.8632698692323328, "grad_norm": 6.118785381317139, "learning_rate": 8.35854610171812e-05, "loss": 0.8948, "step": 12741 }, { "epoch": 0.8633376245003049, "grad_norm": 6.068502426147461, "learning_rate": 8.358409199808338e-05, "loss": 0.6608, "step": 12742 }, { "epoch": 0.863405379768277, "grad_norm": 6.425013542175293, "learning_rate": 8.358272297898556e-05, "loss": 0.6886, "step": 12743 }, { "epoch": 0.8634731350362491, "grad_norm": 6.034631729125977, "learning_rate": 8.358135395988774e-05, "loss": 0.7105, "step": 12744 }, { "epoch": 0.8635408903042211, "grad_norm": 5.688156604766846, "learning_rate": 8.357998494078992e-05, "loss": 0.7705, "step": 12745 }, { "epoch": 0.8636086455721932, "grad_norm": 6.057868480682373, "learning_rate": 8.357861592169211e-05, "loss": 0.7228, "step": 12746 }, { "epoch": 0.8636764008401653, "grad_norm": 9.307186126708984, "learning_rate": 8.357724690259429e-05, "loss": 0.8587, "step": 12747 }, { "epoch": 0.8637441561081374, "grad_norm": 4.899465560913086, "learning_rate": 8.357587788349647e-05, "loss": 0.78, "step": 12748 }, { "epoch": 0.8638119113761095, "grad_norm": 5.833337783813477, "learning_rate": 8.357450886439865e-05, "loss": 0.8024, "step": 12749 }, { "epoch": 0.8638796666440816, "grad_norm": 5.353335380554199, "learning_rate": 8.357313984530085e-05, "loss": 0.9984, "step": 12750 }, { "epoch": 0.8639474219120536, "grad_norm": 5.963881969451904, "learning_rate": 8.357177082620303e-05, "loss": 0.6835, "step": 12751 }, { "epoch": 0.8640151771800257, "grad_norm": 5.2977166175842285, "learning_rate": 8.357040180710521e-05, "loss": 0.8039, "step": 12752 }, { "epoch": 0.8640829324479978, "grad_norm": 5.586292743682861, "learning_rate": 8.35690327880074e-05, "loss": 0.8733, "step": 12753 }, { "epoch": 0.8641506877159699, "grad_norm": 8.01198959350586, "learning_rate": 8.356766376890958e-05, "loss": 1.0226, "step": 12754 }, { "epoch": 0.864218442983942, "grad_norm": 5.4859089851379395, "learning_rate": 8.356629474981176e-05, "loss": 0.9956, "step": 12755 }, { "epoch": 0.8642861982519141, "grad_norm": 6.660008907318115, "learning_rate": 8.356492573071396e-05, "loss": 0.8324, "step": 12756 }, { "epoch": 0.8643539535198862, "grad_norm": 4.699826717376709, "learning_rate": 8.356355671161614e-05, "loss": 0.6846, "step": 12757 }, { "epoch": 0.8644217087878583, "grad_norm": 5.154027938842773, "learning_rate": 8.356218769251832e-05, "loss": 0.7131, "step": 12758 }, { "epoch": 0.8644894640558304, "grad_norm": 6.356171131134033, "learning_rate": 8.35608186734205e-05, "loss": 0.9622, "step": 12759 }, { "epoch": 0.8645572193238025, "grad_norm": 4.833785057067871, "learning_rate": 8.355944965432269e-05, "loss": 0.7694, "step": 12760 }, { "epoch": 0.8646249745917746, "grad_norm": 5.7593889236450195, "learning_rate": 8.355808063522487e-05, "loss": 0.5833, "step": 12761 }, { "epoch": 0.8646927298597465, "grad_norm": 5.679107189178467, "learning_rate": 8.355671161612705e-05, "loss": 0.7964, "step": 12762 }, { "epoch": 0.8647604851277186, "grad_norm": 7.541195869445801, "learning_rate": 8.355534259702923e-05, "loss": 0.7552, "step": 12763 }, { "epoch": 0.8648282403956907, "grad_norm": 7.550341606140137, "learning_rate": 8.355397357793143e-05, "loss": 0.9926, "step": 12764 }, { "epoch": 0.8648959956636628, "grad_norm": 5.840928554534912, "learning_rate": 8.35526045588336e-05, "loss": 0.7363, "step": 12765 }, { "epoch": 0.8649637509316349, "grad_norm": 5.607713222503662, "learning_rate": 8.355123553973579e-05, "loss": 0.7449, "step": 12766 }, { "epoch": 0.865031506199607, "grad_norm": 4.359575271606445, "learning_rate": 8.354986652063797e-05, "loss": 0.7983, "step": 12767 }, { "epoch": 0.8650992614675791, "grad_norm": 5.7104363441467285, "learning_rate": 8.354849750154015e-05, "loss": 0.7692, "step": 12768 }, { "epoch": 0.8651670167355512, "grad_norm": 6.246327877044678, "learning_rate": 8.354712848244234e-05, "loss": 0.8406, "step": 12769 }, { "epoch": 0.8652347720035233, "grad_norm": 5.536504745483398, "learning_rate": 8.354575946334452e-05, "loss": 0.8198, "step": 12770 }, { "epoch": 0.8653025272714954, "grad_norm": 6.570160388946533, "learning_rate": 8.35443904442467e-05, "loss": 0.4944, "step": 12771 }, { "epoch": 0.8653702825394675, "grad_norm": 4.442549705505371, "learning_rate": 8.354302142514888e-05, "loss": 0.6964, "step": 12772 }, { "epoch": 0.8654380378074396, "grad_norm": 5.418501853942871, "learning_rate": 8.354165240605106e-05, "loss": 0.7211, "step": 12773 }, { "epoch": 0.8655057930754116, "grad_norm": 8.48850154876709, "learning_rate": 8.354028338695326e-05, "loss": 0.7467, "step": 12774 }, { "epoch": 0.8655735483433837, "grad_norm": 7.069798946380615, "learning_rate": 8.353891436785544e-05, "loss": 0.6164, "step": 12775 }, { "epoch": 0.8656413036113558, "grad_norm": 9.349570274353027, "learning_rate": 8.353754534875762e-05, "loss": 0.7237, "step": 12776 }, { "epoch": 0.8657090588793279, "grad_norm": 6.17117977142334, "learning_rate": 8.35361763296598e-05, "loss": 0.9118, "step": 12777 }, { "epoch": 0.8657768141472999, "grad_norm": 6.606432914733887, "learning_rate": 8.353480731056199e-05, "loss": 0.7991, "step": 12778 }, { "epoch": 0.865844569415272, "grad_norm": 8.163229942321777, "learning_rate": 8.353343829146417e-05, "loss": 1.0244, "step": 12779 }, { "epoch": 0.8659123246832441, "grad_norm": 8.662758827209473, "learning_rate": 8.353206927236635e-05, "loss": 0.9709, "step": 12780 }, { "epoch": 0.8659800799512162, "grad_norm": 5.596740245819092, "learning_rate": 8.353070025326853e-05, "loss": 0.6034, "step": 12781 }, { "epoch": 0.8660478352191883, "grad_norm": 6.353254795074463, "learning_rate": 8.352933123417071e-05, "loss": 0.8139, "step": 12782 }, { "epoch": 0.8661155904871604, "grad_norm": 6.522273540496826, "learning_rate": 8.352796221507291e-05, "loss": 0.7098, "step": 12783 }, { "epoch": 0.8661833457551324, "grad_norm": 6.948729038238525, "learning_rate": 8.352659319597509e-05, "loss": 0.8503, "step": 12784 }, { "epoch": 0.8662511010231045, "grad_norm": 4.999991416931152, "learning_rate": 8.352522417687727e-05, "loss": 0.8581, "step": 12785 }, { "epoch": 0.8663188562910766, "grad_norm": 5.613487243652344, "learning_rate": 8.352385515777945e-05, "loss": 0.7931, "step": 12786 }, { "epoch": 0.8663866115590487, "grad_norm": 4.850801944732666, "learning_rate": 8.352248613868164e-05, "loss": 0.8297, "step": 12787 }, { "epoch": 0.8664543668270208, "grad_norm": 6.223756313323975, "learning_rate": 8.352111711958382e-05, "loss": 0.7722, "step": 12788 }, { "epoch": 0.8665221220949929, "grad_norm": 6.001883029937744, "learning_rate": 8.3519748100486e-05, "loss": 0.7325, "step": 12789 }, { "epoch": 0.866589877362965, "grad_norm": 6.25042724609375, "learning_rate": 8.351837908138818e-05, "loss": 0.5647, "step": 12790 }, { "epoch": 0.8666576326309371, "grad_norm": 5.111363410949707, "learning_rate": 8.351701006229036e-05, "loss": 0.6094, "step": 12791 }, { "epoch": 0.8667253878989092, "grad_norm": 6.161365985870361, "learning_rate": 8.351564104319256e-05, "loss": 0.9958, "step": 12792 }, { "epoch": 0.8667931431668813, "grad_norm": 7.109573841094971, "learning_rate": 8.351427202409474e-05, "loss": 0.8822, "step": 12793 }, { "epoch": 0.8668608984348533, "grad_norm": 5.816695690155029, "learning_rate": 8.351290300499692e-05, "loss": 0.7643, "step": 12794 }, { "epoch": 0.8669286537028253, "grad_norm": 6.266007423400879, "learning_rate": 8.35115339858991e-05, "loss": 0.6835, "step": 12795 }, { "epoch": 0.8669964089707974, "grad_norm": 5.289031505584717, "learning_rate": 8.351016496680129e-05, "loss": 0.6873, "step": 12796 }, { "epoch": 0.8670641642387695, "grad_norm": 6.6583251953125, "learning_rate": 8.350879594770347e-05, "loss": 0.9201, "step": 12797 }, { "epoch": 0.8671319195067416, "grad_norm": 8.268150329589844, "learning_rate": 8.350742692860565e-05, "loss": 1.0113, "step": 12798 }, { "epoch": 0.8671996747747137, "grad_norm": 5.9207377433776855, "learning_rate": 8.350605790950783e-05, "loss": 0.8132, "step": 12799 }, { "epoch": 0.8672674300426858, "grad_norm": 7.960853099822998, "learning_rate": 8.350468889041003e-05, "loss": 0.8865, "step": 12800 }, { "epoch": 0.8673351853106579, "grad_norm": 7.3755364418029785, "learning_rate": 8.350331987131221e-05, "loss": 1.0102, "step": 12801 }, { "epoch": 0.86740294057863, "grad_norm": 5.826414108276367, "learning_rate": 8.35019508522144e-05, "loss": 0.6119, "step": 12802 }, { "epoch": 0.8674706958466021, "grad_norm": 4.6021857261657715, "learning_rate": 8.350058183311658e-05, "loss": 0.7426, "step": 12803 }, { "epoch": 0.8675384511145742, "grad_norm": 6.744377136230469, "learning_rate": 8.349921281401876e-05, "loss": 0.6373, "step": 12804 }, { "epoch": 0.8676062063825463, "grad_norm": 5.894944190979004, "learning_rate": 8.349784379492094e-05, "loss": 0.8591, "step": 12805 }, { "epoch": 0.8676739616505184, "grad_norm": 6.757943630218506, "learning_rate": 8.349647477582314e-05, "loss": 0.8015, "step": 12806 }, { "epoch": 0.8677417169184904, "grad_norm": 4.994389057159424, "learning_rate": 8.349510575672532e-05, "loss": 0.8663, "step": 12807 }, { "epoch": 0.8678094721864625, "grad_norm": 6.01758337020874, "learning_rate": 8.34937367376275e-05, "loss": 0.8132, "step": 12808 }, { "epoch": 0.8678772274544346, "grad_norm": 5.205808639526367, "learning_rate": 8.349236771852968e-05, "loss": 0.7275, "step": 12809 }, { "epoch": 0.8679449827224067, "grad_norm": 6.259244441986084, "learning_rate": 8.349099869943187e-05, "loss": 0.9726, "step": 12810 }, { "epoch": 0.8680127379903787, "grad_norm": 9.059528350830078, "learning_rate": 8.348962968033405e-05, "loss": 0.9158, "step": 12811 }, { "epoch": 0.8680804932583508, "grad_norm": 4.859555244445801, "learning_rate": 8.348826066123623e-05, "loss": 1.0459, "step": 12812 }, { "epoch": 0.8681482485263229, "grad_norm": 5.262827396392822, "learning_rate": 8.348689164213841e-05, "loss": 0.6795, "step": 12813 }, { "epoch": 0.868216003794295, "grad_norm": 7.7913818359375, "learning_rate": 8.348552262304059e-05, "loss": 0.9334, "step": 12814 }, { "epoch": 0.8682837590622671, "grad_norm": 6.01145601272583, "learning_rate": 8.348415360394279e-05, "loss": 0.8221, "step": 12815 }, { "epoch": 0.8683515143302392, "grad_norm": 6.34686279296875, "learning_rate": 8.348278458484497e-05, "loss": 0.7442, "step": 12816 }, { "epoch": 0.8684192695982113, "grad_norm": 6.925653457641602, "learning_rate": 8.348141556574715e-05, "loss": 0.9746, "step": 12817 }, { "epoch": 0.8684870248661833, "grad_norm": 5.890630722045898, "learning_rate": 8.348004654664933e-05, "loss": 0.8322, "step": 12818 }, { "epoch": 0.8685547801341554, "grad_norm": 5.628925800323486, "learning_rate": 8.347867752755152e-05, "loss": 0.7154, "step": 12819 }, { "epoch": 0.8686225354021275, "grad_norm": 5.708745956420898, "learning_rate": 8.34773085084537e-05, "loss": 0.5718, "step": 12820 }, { "epoch": 0.8686902906700996, "grad_norm": 5.534167289733887, "learning_rate": 8.347593948935588e-05, "loss": 0.84, "step": 12821 }, { "epoch": 0.8687580459380717, "grad_norm": 6.858389854431152, "learning_rate": 8.347457047025806e-05, "loss": 0.7687, "step": 12822 }, { "epoch": 0.8688258012060438, "grad_norm": 5.650959491729736, "learning_rate": 8.347320145116024e-05, "loss": 0.8645, "step": 12823 }, { "epoch": 0.8688935564740159, "grad_norm": 5.0962324142456055, "learning_rate": 8.347183243206244e-05, "loss": 0.6123, "step": 12824 }, { "epoch": 0.868961311741988, "grad_norm": 7.235836029052734, "learning_rate": 8.347046341296462e-05, "loss": 0.831, "step": 12825 }, { "epoch": 0.8690290670099601, "grad_norm": 5.751955986022949, "learning_rate": 8.34690943938668e-05, "loss": 0.6362, "step": 12826 }, { "epoch": 0.8690968222779321, "grad_norm": 6.159754276275635, "learning_rate": 8.346772537476898e-05, "loss": 0.6077, "step": 12827 }, { "epoch": 0.8691645775459041, "grad_norm": 5.868882179260254, "learning_rate": 8.346635635567116e-05, "loss": 0.6687, "step": 12828 }, { "epoch": 0.8692323328138762, "grad_norm": 6.147858142852783, "learning_rate": 8.346498733657335e-05, "loss": 0.7494, "step": 12829 }, { "epoch": 0.8693000880818483, "grad_norm": 7.173940181732178, "learning_rate": 8.346361831747553e-05, "loss": 0.906, "step": 12830 }, { "epoch": 0.8693678433498204, "grad_norm": 6.0628814697265625, "learning_rate": 8.346224929837771e-05, "loss": 0.6627, "step": 12831 }, { "epoch": 0.8694355986177925, "grad_norm": 5.7082343101501465, "learning_rate": 8.346088027927989e-05, "loss": 0.7305, "step": 12832 }, { "epoch": 0.8695033538857646, "grad_norm": 7.864633083343506, "learning_rate": 8.345951126018209e-05, "loss": 0.7189, "step": 12833 }, { "epoch": 0.8695711091537367, "grad_norm": 6.37849235534668, "learning_rate": 8.345814224108427e-05, "loss": 0.6766, "step": 12834 }, { "epoch": 0.8696388644217088, "grad_norm": 5.001320838928223, "learning_rate": 8.345677322198645e-05, "loss": 0.6396, "step": 12835 }, { "epoch": 0.8697066196896809, "grad_norm": 5.3315606117248535, "learning_rate": 8.345540420288863e-05, "loss": 0.7312, "step": 12836 }, { "epoch": 0.869774374957653, "grad_norm": 8.665613174438477, "learning_rate": 8.345403518379081e-05, "loss": 0.7123, "step": 12837 }, { "epoch": 0.8698421302256251, "grad_norm": 6.103067874908447, "learning_rate": 8.3452666164693e-05, "loss": 0.6624, "step": 12838 }, { "epoch": 0.8699098854935972, "grad_norm": 5.679276943206787, "learning_rate": 8.345129714559518e-05, "loss": 0.8496, "step": 12839 }, { "epoch": 0.8699776407615692, "grad_norm": 5.381316184997559, "learning_rate": 8.344992812649736e-05, "loss": 0.5979, "step": 12840 }, { "epoch": 0.8700453960295413, "grad_norm": 7.85809326171875, "learning_rate": 8.344855910739954e-05, "loss": 0.934, "step": 12841 }, { "epoch": 0.8701131512975134, "grad_norm": 5.871927738189697, "learning_rate": 8.344719008830174e-05, "loss": 0.8423, "step": 12842 }, { "epoch": 0.8701809065654854, "grad_norm": 7.4773268699646, "learning_rate": 8.344582106920392e-05, "loss": 0.9766, "step": 12843 }, { "epoch": 0.8702486618334575, "grad_norm": 5.408422946929932, "learning_rate": 8.34444520501061e-05, "loss": 0.8514, "step": 12844 }, { "epoch": 0.8703164171014296, "grad_norm": 5.86200475692749, "learning_rate": 8.344308303100828e-05, "loss": 0.7288, "step": 12845 }, { "epoch": 0.8703841723694017, "grad_norm": 5.635140419006348, "learning_rate": 8.344171401191047e-05, "loss": 0.7645, "step": 12846 }, { "epoch": 0.8704519276373738, "grad_norm": 4.9930524826049805, "learning_rate": 8.344034499281265e-05, "loss": 0.6171, "step": 12847 }, { "epoch": 0.8705196829053459, "grad_norm": 7.424667835235596, "learning_rate": 8.343897597371483e-05, "loss": 0.6401, "step": 12848 }, { "epoch": 0.870587438173318, "grad_norm": 6.124965667724609, "learning_rate": 8.343760695461703e-05, "loss": 0.8959, "step": 12849 }, { "epoch": 0.87065519344129, "grad_norm": 7.060937881469727, "learning_rate": 8.34362379355192e-05, "loss": 0.9213, "step": 12850 }, { "epoch": 0.8707229487092621, "grad_norm": 5.401867389678955, "learning_rate": 8.343486891642139e-05, "loss": 0.6287, "step": 12851 }, { "epoch": 0.8707907039772342, "grad_norm": 6.958117485046387, "learning_rate": 8.343349989732358e-05, "loss": 0.9408, "step": 12852 }, { "epoch": 0.8708584592452063, "grad_norm": 6.046300888061523, "learning_rate": 8.343213087822576e-05, "loss": 0.875, "step": 12853 }, { "epoch": 0.8709262145131784, "grad_norm": 6.581618785858154, "learning_rate": 8.343076185912794e-05, "loss": 0.9302, "step": 12854 }, { "epoch": 0.8709939697811505, "grad_norm": 6.795581817626953, "learning_rate": 8.342939284003012e-05, "loss": 0.8144, "step": 12855 }, { "epoch": 0.8710617250491226, "grad_norm": 7.541666030883789, "learning_rate": 8.342802382093232e-05, "loss": 0.7349, "step": 12856 }, { "epoch": 0.8711294803170947, "grad_norm": 7.175131320953369, "learning_rate": 8.34266548018345e-05, "loss": 0.9659, "step": 12857 }, { "epoch": 0.8711972355850668, "grad_norm": 4.722870349884033, "learning_rate": 8.342528578273668e-05, "loss": 0.819, "step": 12858 }, { "epoch": 0.8712649908530389, "grad_norm": 6.122445106506348, "learning_rate": 8.342391676363886e-05, "loss": 0.839, "step": 12859 }, { "epoch": 0.8713327461210109, "grad_norm": 6.808223724365234, "learning_rate": 8.342254774454104e-05, "loss": 0.8034, "step": 12860 }, { "epoch": 0.871400501388983, "grad_norm": 5.828917026519775, "learning_rate": 8.342117872544323e-05, "loss": 0.6161, "step": 12861 }, { "epoch": 0.871468256656955, "grad_norm": 5.570794582366943, "learning_rate": 8.341980970634541e-05, "loss": 0.738, "step": 12862 }, { "epoch": 0.8715360119249271, "grad_norm": 8.307096481323242, "learning_rate": 8.341844068724759e-05, "loss": 0.6682, "step": 12863 }, { "epoch": 0.8716037671928992, "grad_norm": 5.596463203430176, "learning_rate": 8.341707166814977e-05, "loss": 0.7721, "step": 12864 }, { "epoch": 0.8716715224608713, "grad_norm": 6.285654067993164, "learning_rate": 8.341570264905197e-05, "loss": 0.8688, "step": 12865 }, { "epoch": 0.8717392777288434, "grad_norm": 5.791274070739746, "learning_rate": 8.341433362995415e-05, "loss": 0.6696, "step": 12866 }, { "epoch": 0.8718070329968155, "grad_norm": 6.1828413009643555, "learning_rate": 8.341296461085633e-05, "loss": 0.6573, "step": 12867 }, { "epoch": 0.8718747882647876, "grad_norm": 6.662983417510986, "learning_rate": 8.341159559175851e-05, "loss": 1.0418, "step": 12868 }, { "epoch": 0.8719425435327597, "grad_norm": 6.25025749206543, "learning_rate": 8.341022657266069e-05, "loss": 0.8527, "step": 12869 }, { "epoch": 0.8720102988007318, "grad_norm": 5.201798915863037, "learning_rate": 8.340885755356288e-05, "loss": 1.0302, "step": 12870 }, { "epoch": 0.8720780540687039, "grad_norm": 7.105745315551758, "learning_rate": 8.340748853446506e-05, "loss": 0.8022, "step": 12871 }, { "epoch": 0.872145809336676, "grad_norm": 5.172214031219482, "learning_rate": 8.340611951536724e-05, "loss": 0.7184, "step": 12872 }, { "epoch": 0.872213564604648, "grad_norm": 4.862717151641846, "learning_rate": 8.340475049626942e-05, "loss": 0.5587, "step": 12873 }, { "epoch": 0.8722813198726201, "grad_norm": 5.38953971862793, "learning_rate": 8.340338147717162e-05, "loss": 0.7139, "step": 12874 }, { "epoch": 0.8723490751405922, "grad_norm": 6.497892379760742, "learning_rate": 8.34020124580738e-05, "loss": 0.7788, "step": 12875 }, { "epoch": 0.8724168304085642, "grad_norm": 5.505675792694092, "learning_rate": 8.340064343897598e-05, "loss": 0.8322, "step": 12876 }, { "epoch": 0.8724845856765363, "grad_norm": 6.221973419189453, "learning_rate": 8.339927441987816e-05, "loss": 0.9316, "step": 12877 }, { "epoch": 0.8725523409445084, "grad_norm": 5.295744895935059, "learning_rate": 8.339790540078034e-05, "loss": 0.8424, "step": 12878 }, { "epoch": 0.8726200962124805, "grad_norm": 8.020045280456543, "learning_rate": 8.339653638168253e-05, "loss": 0.7412, "step": 12879 }, { "epoch": 0.8726878514804526, "grad_norm": 7.37377405166626, "learning_rate": 8.339516736258471e-05, "loss": 0.6915, "step": 12880 }, { "epoch": 0.8727556067484247, "grad_norm": 4.970482349395752, "learning_rate": 8.339379834348689e-05, "loss": 0.6638, "step": 12881 }, { "epoch": 0.8728233620163968, "grad_norm": 5.549528121948242, "learning_rate": 8.339242932438907e-05, "loss": 0.6753, "step": 12882 }, { "epoch": 0.8728911172843689, "grad_norm": 6.604176044464111, "learning_rate": 8.339106030529125e-05, "loss": 0.8787, "step": 12883 }, { "epoch": 0.872958872552341, "grad_norm": 5.274704933166504, "learning_rate": 8.338969128619345e-05, "loss": 0.7042, "step": 12884 }, { "epoch": 0.873026627820313, "grad_norm": 6.73942232131958, "learning_rate": 8.338832226709563e-05, "loss": 0.9337, "step": 12885 }, { "epoch": 0.8730943830882851, "grad_norm": 5.404533863067627, "learning_rate": 8.338695324799781e-05, "loss": 0.8108, "step": 12886 }, { "epoch": 0.8731621383562572, "grad_norm": 5.6985087394714355, "learning_rate": 8.338558422889999e-05, "loss": 0.8906, "step": 12887 }, { "epoch": 0.8732298936242293, "grad_norm": 6.844064235687256, "learning_rate": 8.338421520980218e-05, "loss": 0.8528, "step": 12888 }, { "epoch": 0.8732976488922014, "grad_norm": 7.021403789520264, "learning_rate": 8.338284619070436e-05, "loss": 0.8084, "step": 12889 }, { "epoch": 0.8733654041601735, "grad_norm": 7.886348247528076, "learning_rate": 8.338147717160654e-05, "loss": 0.6421, "step": 12890 }, { "epoch": 0.8734331594281456, "grad_norm": 6.461071968078613, "learning_rate": 8.338010815250872e-05, "loss": 0.7556, "step": 12891 }, { "epoch": 0.8735009146961176, "grad_norm": 5.232357501983643, "learning_rate": 8.337873913341092e-05, "loss": 0.7246, "step": 12892 }, { "epoch": 0.8735686699640897, "grad_norm": 4.585625648498535, "learning_rate": 8.33773701143131e-05, "loss": 0.6826, "step": 12893 }, { "epoch": 0.8736364252320618, "grad_norm": 5.381401538848877, "learning_rate": 8.337600109521528e-05, "loss": 0.8486, "step": 12894 }, { "epoch": 0.8737041805000338, "grad_norm": 6.673171043395996, "learning_rate": 8.337463207611747e-05, "loss": 0.6716, "step": 12895 }, { "epoch": 0.8737719357680059, "grad_norm": 9.136744499206543, "learning_rate": 8.337326305701965e-05, "loss": 1.0048, "step": 12896 }, { "epoch": 0.873839691035978, "grad_norm": 7.344038963317871, "learning_rate": 8.337189403792183e-05, "loss": 1.0041, "step": 12897 }, { "epoch": 0.8739074463039501, "grad_norm": 5.5072832107543945, "learning_rate": 8.337052501882403e-05, "loss": 0.6608, "step": 12898 }, { "epoch": 0.8739752015719222, "grad_norm": 6.74569845199585, "learning_rate": 8.33691559997262e-05, "loss": 0.9765, "step": 12899 }, { "epoch": 0.8740429568398943, "grad_norm": 6.700042247772217, "learning_rate": 8.336778698062839e-05, "loss": 0.8127, "step": 12900 }, { "epoch": 0.8741107121078664, "grad_norm": 5.775038242340088, "learning_rate": 8.336641796153057e-05, "loss": 0.8717, "step": 12901 }, { "epoch": 0.8741784673758385, "grad_norm": 6.800004005432129, "learning_rate": 8.336504894243276e-05, "loss": 0.9006, "step": 12902 }, { "epoch": 0.8742462226438106, "grad_norm": 6.40112829208374, "learning_rate": 8.336367992333494e-05, "loss": 1.0521, "step": 12903 }, { "epoch": 0.8743139779117827, "grad_norm": 8.33122444152832, "learning_rate": 8.336231090423712e-05, "loss": 0.8228, "step": 12904 }, { "epoch": 0.8743817331797548, "grad_norm": 6.429588317871094, "learning_rate": 8.33609418851393e-05, "loss": 0.8859, "step": 12905 }, { "epoch": 0.8744494884477269, "grad_norm": 6.242363452911377, "learning_rate": 8.335957286604148e-05, "loss": 0.7872, "step": 12906 }, { "epoch": 0.8745172437156989, "grad_norm": 4.990901470184326, "learning_rate": 8.335820384694368e-05, "loss": 0.6444, "step": 12907 }, { "epoch": 0.874584998983671, "grad_norm": 5.876582145690918, "learning_rate": 8.335683482784586e-05, "loss": 1.0041, "step": 12908 }, { "epoch": 0.874652754251643, "grad_norm": 5.543231964111328, "learning_rate": 8.335546580874804e-05, "loss": 0.658, "step": 12909 }, { "epoch": 0.8747205095196151, "grad_norm": 6.464064598083496, "learning_rate": 8.335409678965022e-05, "loss": 0.642, "step": 12910 }, { "epoch": 0.8747882647875872, "grad_norm": 6.268253803253174, "learning_rate": 8.335272777055241e-05, "loss": 0.9617, "step": 12911 }, { "epoch": 0.8748560200555593, "grad_norm": 6.467350482940674, "learning_rate": 8.335135875145459e-05, "loss": 0.7592, "step": 12912 }, { "epoch": 0.8749237753235314, "grad_norm": 5.470207214355469, "learning_rate": 8.334998973235677e-05, "loss": 0.7742, "step": 12913 }, { "epoch": 0.8749915305915035, "grad_norm": 6.371903419494629, "learning_rate": 8.334862071325895e-05, "loss": 0.6947, "step": 12914 }, { "epoch": 0.8750592858594756, "grad_norm": 4.678818702697754, "learning_rate": 8.334725169416113e-05, "loss": 0.6938, "step": 12915 }, { "epoch": 0.8751270411274477, "grad_norm": 9.119450569152832, "learning_rate": 8.334588267506333e-05, "loss": 1.0259, "step": 12916 }, { "epoch": 0.8751947963954197, "grad_norm": 5.258639335632324, "learning_rate": 8.33445136559655e-05, "loss": 0.9058, "step": 12917 }, { "epoch": 0.8752625516633918, "grad_norm": 6.689220905303955, "learning_rate": 8.334314463686769e-05, "loss": 0.9682, "step": 12918 }, { "epoch": 0.8753303069313639, "grad_norm": 5.658355712890625, "learning_rate": 8.334177561776987e-05, "loss": 0.7693, "step": 12919 }, { "epoch": 0.875398062199336, "grad_norm": 5.163204193115234, "learning_rate": 8.334040659867206e-05, "loss": 0.7801, "step": 12920 }, { "epoch": 0.8754658174673081, "grad_norm": 5.308339595794678, "learning_rate": 8.333903757957424e-05, "loss": 0.9244, "step": 12921 }, { "epoch": 0.8755335727352802, "grad_norm": 6.663207054138184, "learning_rate": 8.333766856047642e-05, "loss": 0.677, "step": 12922 }, { "epoch": 0.8756013280032523, "grad_norm": 5.615358829498291, "learning_rate": 8.33362995413786e-05, "loss": 0.7344, "step": 12923 }, { "epoch": 0.8756690832712244, "grad_norm": 6.694277763366699, "learning_rate": 8.333493052228078e-05, "loss": 0.8418, "step": 12924 }, { "epoch": 0.8757368385391964, "grad_norm": 7.067589282989502, "learning_rate": 8.333356150318298e-05, "loss": 0.8682, "step": 12925 }, { "epoch": 0.8758045938071685, "grad_norm": 6.071625709533691, "learning_rate": 8.333219248408516e-05, "loss": 1.0198, "step": 12926 }, { "epoch": 0.8758723490751406, "grad_norm": 5.014382362365723, "learning_rate": 8.333082346498734e-05, "loss": 0.8123, "step": 12927 }, { "epoch": 0.8759401043431126, "grad_norm": 7.893514156341553, "learning_rate": 8.332945444588952e-05, "loss": 0.7958, "step": 12928 }, { "epoch": 0.8760078596110847, "grad_norm": 9.142471313476562, "learning_rate": 8.332808542679171e-05, "loss": 0.9072, "step": 12929 }, { "epoch": 0.8760756148790568, "grad_norm": 4.87850284576416, "learning_rate": 8.332671640769389e-05, "loss": 0.6293, "step": 12930 }, { "epoch": 0.8761433701470289, "grad_norm": 6.533822536468506, "learning_rate": 8.332534738859607e-05, "loss": 0.6865, "step": 12931 }, { "epoch": 0.876211125415001, "grad_norm": 6.230561256408691, "learning_rate": 8.332397836949825e-05, "loss": 0.8655, "step": 12932 }, { "epoch": 0.8762788806829731, "grad_norm": 5.474959373474121, "learning_rate": 8.332260935040043e-05, "loss": 0.7077, "step": 12933 }, { "epoch": 0.8763466359509452, "grad_norm": 6.593677043914795, "learning_rate": 8.332124033130263e-05, "loss": 0.8323, "step": 12934 }, { "epoch": 0.8764143912189173, "grad_norm": 6.031239986419678, "learning_rate": 8.33198713122048e-05, "loss": 0.8379, "step": 12935 }, { "epoch": 0.8764821464868894, "grad_norm": 5.653693199157715, "learning_rate": 8.331850229310699e-05, "loss": 0.6913, "step": 12936 }, { "epoch": 0.8765499017548615, "grad_norm": 4.6549296379089355, "learning_rate": 8.331713327400917e-05, "loss": 0.7085, "step": 12937 }, { "epoch": 0.8766176570228336, "grad_norm": 7.875330448150635, "learning_rate": 8.331576425491136e-05, "loss": 0.7635, "step": 12938 }, { "epoch": 0.8766854122908057, "grad_norm": 6.341701030731201, "learning_rate": 8.331439523581354e-05, "loss": 1.2747, "step": 12939 }, { "epoch": 0.8767531675587777, "grad_norm": 5.573342323303223, "learning_rate": 8.331302621671572e-05, "loss": 0.8176, "step": 12940 }, { "epoch": 0.8768209228267497, "grad_norm": 6.964317321777344, "learning_rate": 8.331165719761792e-05, "loss": 0.8216, "step": 12941 }, { "epoch": 0.8768886780947218, "grad_norm": 5.422325611114502, "learning_rate": 8.33102881785201e-05, "loss": 0.6236, "step": 12942 }, { "epoch": 0.8769564333626939, "grad_norm": 6.422939300537109, "learning_rate": 8.330891915942228e-05, "loss": 1.0188, "step": 12943 }, { "epoch": 0.877024188630666, "grad_norm": 5.843238830566406, "learning_rate": 8.330755014032447e-05, "loss": 0.9579, "step": 12944 }, { "epoch": 0.8770919438986381, "grad_norm": 5.675114631652832, "learning_rate": 8.330618112122665e-05, "loss": 0.7776, "step": 12945 }, { "epoch": 0.8771596991666102, "grad_norm": 6.512584686279297, "learning_rate": 8.330481210212883e-05, "loss": 0.8351, "step": 12946 }, { "epoch": 0.8772274544345823, "grad_norm": 7.719078063964844, "learning_rate": 8.330344308303101e-05, "loss": 1.0703, "step": 12947 }, { "epoch": 0.8772952097025544, "grad_norm": 6.948828220367432, "learning_rate": 8.33020740639332e-05, "loss": 0.843, "step": 12948 }, { "epoch": 0.8773629649705265, "grad_norm": 5.105550289154053, "learning_rate": 8.330070504483539e-05, "loss": 0.7685, "step": 12949 }, { "epoch": 0.8774307202384986, "grad_norm": 4.994384288787842, "learning_rate": 8.329933602573757e-05, "loss": 0.6291, "step": 12950 }, { "epoch": 0.8774984755064706, "grad_norm": 5.846680164337158, "learning_rate": 8.329796700663975e-05, "loss": 0.7701, "step": 12951 }, { "epoch": 0.8775662307744427, "grad_norm": 8.621549606323242, "learning_rate": 8.329659798754194e-05, "loss": 0.7899, "step": 12952 }, { "epoch": 0.8776339860424148, "grad_norm": 5.434492588043213, "learning_rate": 8.329522896844412e-05, "loss": 0.7942, "step": 12953 }, { "epoch": 0.8777017413103869, "grad_norm": 8.437414169311523, "learning_rate": 8.32938599493463e-05, "loss": 0.8845, "step": 12954 }, { "epoch": 0.877769496578359, "grad_norm": 5.602294921875, "learning_rate": 8.329249093024848e-05, "loss": 0.7475, "step": 12955 }, { "epoch": 0.8778372518463311, "grad_norm": 6.249868392944336, "learning_rate": 8.329112191115066e-05, "loss": 0.6117, "step": 12956 }, { "epoch": 0.8779050071143031, "grad_norm": 6.123753547668457, "learning_rate": 8.328975289205286e-05, "loss": 0.996, "step": 12957 }, { "epoch": 0.8779727623822752, "grad_norm": 6.797160625457764, "learning_rate": 8.328838387295504e-05, "loss": 0.8562, "step": 12958 }, { "epoch": 0.8780405176502473, "grad_norm": 6.009333610534668, "learning_rate": 8.328701485385722e-05, "loss": 0.7396, "step": 12959 }, { "epoch": 0.8781082729182194, "grad_norm": 6.817856788635254, "learning_rate": 8.32856458347594e-05, "loss": 0.9193, "step": 12960 }, { "epoch": 0.8781760281861914, "grad_norm": 6.004026412963867, "learning_rate": 8.328427681566158e-05, "loss": 0.7822, "step": 12961 }, { "epoch": 0.8782437834541635, "grad_norm": 5.585984230041504, "learning_rate": 8.328290779656377e-05, "loss": 0.6767, "step": 12962 }, { "epoch": 0.8783115387221356, "grad_norm": 9.209588050842285, "learning_rate": 8.328153877746595e-05, "loss": 0.9714, "step": 12963 }, { "epoch": 0.8783792939901077, "grad_norm": 6.652541160583496, "learning_rate": 8.328016975836813e-05, "loss": 0.7528, "step": 12964 }, { "epoch": 0.8784470492580798, "grad_norm": 6.689975261688232, "learning_rate": 8.327880073927031e-05, "loss": 0.7516, "step": 12965 }, { "epoch": 0.8785148045260519, "grad_norm": 5.454050540924072, "learning_rate": 8.32774317201725e-05, "loss": 0.7394, "step": 12966 }, { "epoch": 0.878582559794024, "grad_norm": 4.421219348907471, "learning_rate": 8.327606270107469e-05, "loss": 0.7704, "step": 12967 }, { "epoch": 0.8786503150619961, "grad_norm": 4.957309246063232, "learning_rate": 8.327469368197687e-05, "loss": 0.7643, "step": 12968 }, { "epoch": 0.8787180703299682, "grad_norm": 5.908604621887207, "learning_rate": 8.327332466287905e-05, "loss": 0.9247, "step": 12969 }, { "epoch": 0.8787858255979403, "grad_norm": 7.434008598327637, "learning_rate": 8.327195564378123e-05, "loss": 0.9366, "step": 12970 }, { "epoch": 0.8788535808659124, "grad_norm": 7.337964057922363, "learning_rate": 8.327058662468342e-05, "loss": 0.94, "step": 12971 }, { "epoch": 0.8789213361338845, "grad_norm": 6.679739475250244, "learning_rate": 8.32692176055856e-05, "loss": 0.9922, "step": 12972 }, { "epoch": 0.8789890914018565, "grad_norm": 5.989971160888672, "learning_rate": 8.326784858648778e-05, "loss": 0.8581, "step": 12973 }, { "epoch": 0.8790568466698285, "grad_norm": 4.795865058898926, "learning_rate": 8.326647956738996e-05, "loss": 0.8417, "step": 12974 }, { "epoch": 0.8791246019378006, "grad_norm": 6.574687957763672, "learning_rate": 8.326511054829216e-05, "loss": 0.9009, "step": 12975 }, { "epoch": 0.8791923572057727, "grad_norm": 5.56545352935791, "learning_rate": 8.326374152919434e-05, "loss": 0.7627, "step": 12976 }, { "epoch": 0.8792601124737448, "grad_norm": 7.365011215209961, "learning_rate": 8.326237251009652e-05, "loss": 1.0212, "step": 12977 }, { "epoch": 0.8793278677417169, "grad_norm": 5.9187140464782715, "learning_rate": 8.32610034909987e-05, "loss": 0.804, "step": 12978 }, { "epoch": 0.879395623009689, "grad_norm": 5.450742721557617, "learning_rate": 8.325963447190088e-05, "loss": 0.7946, "step": 12979 }, { "epoch": 0.8794633782776611, "grad_norm": 5.892670154571533, "learning_rate": 8.325826545280307e-05, "loss": 0.7495, "step": 12980 }, { "epoch": 0.8795311335456332, "grad_norm": 5.977981090545654, "learning_rate": 8.325689643370525e-05, "loss": 0.789, "step": 12981 }, { "epoch": 0.8795988888136053, "grad_norm": 6.273918628692627, "learning_rate": 8.325552741460743e-05, "loss": 1.0261, "step": 12982 }, { "epoch": 0.8796666440815774, "grad_norm": 5.750874996185303, "learning_rate": 8.325415839550961e-05, "loss": 0.8008, "step": 12983 }, { "epoch": 0.8797343993495494, "grad_norm": 6.168051242828369, "learning_rate": 8.32527893764118e-05, "loss": 0.7802, "step": 12984 }, { "epoch": 0.8798021546175215, "grad_norm": 6.125532627105713, "learning_rate": 8.325142035731399e-05, "loss": 0.8281, "step": 12985 }, { "epoch": 0.8798699098854936, "grad_norm": 5.973419666290283, "learning_rate": 8.325005133821617e-05, "loss": 0.8108, "step": 12986 }, { "epoch": 0.8799376651534657, "grad_norm": 5.780817031860352, "learning_rate": 8.324868231911836e-05, "loss": 0.6535, "step": 12987 }, { "epoch": 0.8800054204214378, "grad_norm": 5.342846870422363, "learning_rate": 8.324731330002054e-05, "loss": 0.6626, "step": 12988 }, { "epoch": 0.8800731756894099, "grad_norm": 5.165645599365234, "learning_rate": 8.324594428092272e-05, "loss": 0.8454, "step": 12989 }, { "epoch": 0.8801409309573819, "grad_norm": 7.062921524047852, "learning_rate": 8.324457526182492e-05, "loss": 0.6814, "step": 12990 }, { "epoch": 0.880208686225354, "grad_norm": 8.127230644226074, "learning_rate": 8.32432062427271e-05, "loss": 0.9044, "step": 12991 }, { "epoch": 0.8802764414933261, "grad_norm": 7.321716785430908, "learning_rate": 8.324183722362928e-05, "loss": 0.8596, "step": 12992 }, { "epoch": 0.8803441967612982, "grad_norm": 6.0520806312561035, "learning_rate": 8.324046820453146e-05, "loss": 0.7636, "step": 12993 }, { "epoch": 0.8804119520292703, "grad_norm": 4.8115973472595215, "learning_rate": 8.323909918543365e-05, "loss": 0.5694, "step": 12994 }, { "epoch": 0.8804797072972423, "grad_norm": 7.053106784820557, "learning_rate": 8.323773016633583e-05, "loss": 0.5812, "step": 12995 }, { "epoch": 0.8805474625652144, "grad_norm": 5.414585590362549, "learning_rate": 8.323636114723801e-05, "loss": 0.6227, "step": 12996 }, { "epoch": 0.8806152178331865, "grad_norm": 6.686954498291016, "learning_rate": 8.323499212814019e-05, "loss": 0.8822, "step": 12997 }, { "epoch": 0.8806829731011586, "grad_norm": 5.530512809753418, "learning_rate": 8.323362310904239e-05, "loss": 0.745, "step": 12998 }, { "epoch": 0.8807507283691307, "grad_norm": 7.229578495025635, "learning_rate": 8.323225408994457e-05, "loss": 0.7521, "step": 12999 }, { "epoch": 0.8808184836371028, "grad_norm": 5.444945335388184, "learning_rate": 8.323088507084675e-05, "loss": 0.741, "step": 13000 }, { "epoch": 0.8808862389050749, "grad_norm": 5.646410942077637, "learning_rate": 8.322951605174893e-05, "loss": 0.6562, "step": 13001 }, { "epoch": 0.880953994173047, "grad_norm": 6.224180698394775, "learning_rate": 8.32281470326511e-05, "loss": 0.6087, "step": 13002 }, { "epoch": 0.8810217494410191, "grad_norm": 5.3466997146606445, "learning_rate": 8.32267780135533e-05, "loss": 0.7514, "step": 13003 }, { "epoch": 0.8810895047089912, "grad_norm": 8.124218940734863, "learning_rate": 8.322540899445548e-05, "loss": 0.8344, "step": 13004 }, { "epoch": 0.8811572599769633, "grad_norm": 5.5622172355651855, "learning_rate": 8.322403997535766e-05, "loss": 0.8112, "step": 13005 }, { "epoch": 0.8812250152449352, "grad_norm": 6.751789093017578, "learning_rate": 8.322267095625984e-05, "loss": 0.9156, "step": 13006 }, { "epoch": 0.8812927705129073, "grad_norm": 6.383172512054443, "learning_rate": 8.322130193716204e-05, "loss": 0.8049, "step": 13007 }, { "epoch": 0.8813605257808794, "grad_norm": 5.960168838500977, "learning_rate": 8.321993291806422e-05, "loss": 0.9027, "step": 13008 }, { "epoch": 0.8814282810488515, "grad_norm": 5.624762535095215, "learning_rate": 8.32185638989664e-05, "loss": 0.6223, "step": 13009 }, { "epoch": 0.8814960363168236, "grad_norm": 5.830256938934326, "learning_rate": 8.321719487986858e-05, "loss": 0.9301, "step": 13010 }, { "epoch": 0.8815637915847957, "grad_norm": 11.064977645874023, "learning_rate": 8.321582586077076e-05, "loss": 0.8125, "step": 13011 }, { "epoch": 0.8816315468527678, "grad_norm": 5.739172458648682, "learning_rate": 8.321445684167295e-05, "loss": 0.8507, "step": 13012 }, { "epoch": 0.8816993021207399, "grad_norm": 5.400548458099365, "learning_rate": 8.321308782257513e-05, "loss": 0.7381, "step": 13013 }, { "epoch": 0.881767057388712, "grad_norm": 5.973668098449707, "learning_rate": 8.321171880347731e-05, "loss": 0.7881, "step": 13014 }, { "epoch": 0.8818348126566841, "grad_norm": 7.544906139373779, "learning_rate": 8.321034978437949e-05, "loss": 0.7593, "step": 13015 }, { "epoch": 0.8819025679246562, "grad_norm": 5.305513381958008, "learning_rate": 8.320898076528167e-05, "loss": 0.7552, "step": 13016 }, { "epoch": 0.8819703231926282, "grad_norm": 5.9566545486450195, "learning_rate": 8.320761174618387e-05, "loss": 0.8524, "step": 13017 }, { "epoch": 0.8820380784606003, "grad_norm": 5.806572437286377, "learning_rate": 8.320624272708605e-05, "loss": 0.6832, "step": 13018 }, { "epoch": 0.8821058337285724, "grad_norm": 5.694754600524902, "learning_rate": 8.320487370798823e-05, "loss": 0.8726, "step": 13019 }, { "epoch": 0.8821735889965445, "grad_norm": 6.508894920349121, "learning_rate": 8.32035046888904e-05, "loss": 0.8468, "step": 13020 }, { "epoch": 0.8822413442645166, "grad_norm": 5.450093746185303, "learning_rate": 8.32021356697926e-05, "loss": 0.715, "step": 13021 }, { "epoch": 0.8823090995324887, "grad_norm": 7.250792026519775, "learning_rate": 8.320076665069478e-05, "loss": 0.8587, "step": 13022 }, { "epoch": 0.8823768548004607, "grad_norm": 7.200087070465088, "learning_rate": 8.319939763159696e-05, "loss": 0.7188, "step": 13023 }, { "epoch": 0.8824446100684328, "grad_norm": 7.003962993621826, "learning_rate": 8.319802861249914e-05, "loss": 0.7586, "step": 13024 }, { "epoch": 0.8825123653364049, "grad_norm": 5.545292854309082, "learning_rate": 8.319665959340132e-05, "loss": 0.7623, "step": 13025 }, { "epoch": 0.882580120604377, "grad_norm": 7.445784091949463, "learning_rate": 8.319529057430352e-05, "loss": 1.0729, "step": 13026 }, { "epoch": 0.882647875872349, "grad_norm": 7.989095211029053, "learning_rate": 8.31939215552057e-05, "loss": 0.8033, "step": 13027 }, { "epoch": 0.8827156311403211, "grad_norm": 5.488166809082031, "learning_rate": 8.319255253610788e-05, "loss": 0.7301, "step": 13028 }, { "epoch": 0.8827833864082932, "grad_norm": 7.751863956451416, "learning_rate": 8.319118351701006e-05, "loss": 0.5935, "step": 13029 }, { "epoch": 0.8828511416762653, "grad_norm": 7.284234046936035, "learning_rate": 8.318981449791225e-05, "loss": 0.8942, "step": 13030 }, { "epoch": 0.8829188969442374, "grad_norm": 6.073469638824463, "learning_rate": 8.318844547881443e-05, "loss": 0.7782, "step": 13031 }, { "epoch": 0.8829866522122095, "grad_norm": 5.341745853424072, "learning_rate": 8.318707645971661e-05, "loss": 0.8347, "step": 13032 }, { "epoch": 0.8830544074801816, "grad_norm": 5.9635820388793945, "learning_rate": 8.31857074406188e-05, "loss": 0.9042, "step": 13033 }, { "epoch": 0.8831221627481537, "grad_norm": 5.801054954528809, "learning_rate": 8.318433842152099e-05, "loss": 0.7764, "step": 13034 }, { "epoch": 0.8831899180161258, "grad_norm": 7.042034149169922, "learning_rate": 8.318296940242317e-05, "loss": 1.0679, "step": 13035 }, { "epoch": 0.8832576732840979, "grad_norm": 4.915299892425537, "learning_rate": 8.318160038332536e-05, "loss": 0.7412, "step": 13036 }, { "epoch": 0.88332542855207, "grad_norm": 9.298592567443848, "learning_rate": 8.318023136422754e-05, "loss": 0.8991, "step": 13037 }, { "epoch": 0.8833931838200421, "grad_norm": 4.614468097686768, "learning_rate": 8.317886234512972e-05, "loss": 0.6786, "step": 13038 }, { "epoch": 0.883460939088014, "grad_norm": 5.394043445587158, "learning_rate": 8.31774933260319e-05, "loss": 0.7442, "step": 13039 }, { "epoch": 0.8835286943559861, "grad_norm": 6.138361930847168, "learning_rate": 8.31761243069341e-05, "loss": 0.5959, "step": 13040 }, { "epoch": 0.8835964496239582, "grad_norm": 6.376340866088867, "learning_rate": 8.317475528783628e-05, "loss": 1.0073, "step": 13041 }, { "epoch": 0.8836642048919303, "grad_norm": 4.798174858093262, "learning_rate": 8.317338626873846e-05, "loss": 0.6251, "step": 13042 }, { "epoch": 0.8837319601599024, "grad_norm": 6.462924957275391, "learning_rate": 8.317201724964064e-05, "loss": 0.9081, "step": 13043 }, { "epoch": 0.8837997154278745, "grad_norm": 5.447483062744141, "learning_rate": 8.317064823054283e-05, "loss": 0.8058, "step": 13044 }, { "epoch": 0.8838674706958466, "grad_norm": 6.656740188598633, "learning_rate": 8.316927921144501e-05, "loss": 0.8681, "step": 13045 }, { "epoch": 0.8839352259638187, "grad_norm": 5.062714099884033, "learning_rate": 8.316791019234719e-05, "loss": 0.5941, "step": 13046 }, { "epoch": 0.8840029812317908, "grad_norm": 6.954104900360107, "learning_rate": 8.316654117324937e-05, "loss": 0.9577, "step": 13047 }, { "epoch": 0.8840707364997629, "grad_norm": 5.395656585693359, "learning_rate": 8.316517215415155e-05, "loss": 0.6636, "step": 13048 }, { "epoch": 0.884138491767735, "grad_norm": 7.5406270027160645, "learning_rate": 8.316380313505374e-05, "loss": 0.8675, "step": 13049 }, { "epoch": 0.884206247035707, "grad_norm": 4.966914653778076, "learning_rate": 8.316243411595593e-05, "loss": 0.7242, "step": 13050 }, { "epoch": 0.8842740023036791, "grad_norm": 6.133572101593018, "learning_rate": 8.31610650968581e-05, "loss": 0.8462, "step": 13051 }, { "epoch": 0.8843417575716512, "grad_norm": 4.549497127532959, "learning_rate": 8.315969607776029e-05, "loss": 0.7792, "step": 13052 }, { "epoch": 0.8844095128396233, "grad_norm": 5.715592861175537, "learning_rate": 8.315832705866248e-05, "loss": 0.6261, "step": 13053 }, { "epoch": 0.8844772681075954, "grad_norm": 7.551342010498047, "learning_rate": 8.315695803956466e-05, "loss": 0.8017, "step": 13054 }, { "epoch": 0.8845450233755674, "grad_norm": 5.671367645263672, "learning_rate": 8.315558902046684e-05, "loss": 0.8092, "step": 13055 }, { "epoch": 0.8846127786435395, "grad_norm": 5.61579704284668, "learning_rate": 8.315422000136902e-05, "loss": 0.7854, "step": 13056 }, { "epoch": 0.8846805339115116, "grad_norm": 8.364435195922852, "learning_rate": 8.31528509822712e-05, "loss": 0.7893, "step": 13057 }, { "epoch": 0.8847482891794837, "grad_norm": 7.481292724609375, "learning_rate": 8.31514819631734e-05, "loss": 0.7083, "step": 13058 }, { "epoch": 0.8848160444474558, "grad_norm": 6.476237773895264, "learning_rate": 8.315011294407558e-05, "loss": 0.7749, "step": 13059 }, { "epoch": 0.8848837997154279, "grad_norm": 6.060905456542969, "learning_rate": 8.314874392497776e-05, "loss": 0.8042, "step": 13060 }, { "epoch": 0.8849515549834, "grad_norm": 5.672494888305664, "learning_rate": 8.314737490587994e-05, "loss": 0.5887, "step": 13061 }, { "epoch": 0.885019310251372, "grad_norm": 8.22986888885498, "learning_rate": 8.314600588678213e-05, "loss": 1.1182, "step": 13062 }, { "epoch": 0.8850870655193441, "grad_norm": 8.389451026916504, "learning_rate": 8.314463686768431e-05, "loss": 1.022, "step": 13063 }, { "epoch": 0.8851548207873162, "grad_norm": 5.47923469543457, "learning_rate": 8.314326784858649e-05, "loss": 0.7902, "step": 13064 }, { "epoch": 0.8852225760552883, "grad_norm": 5.775954723358154, "learning_rate": 8.314189882948867e-05, "loss": 0.8831, "step": 13065 }, { "epoch": 0.8852903313232604, "grad_norm": 6.233015060424805, "learning_rate": 8.314052981039085e-05, "loss": 0.7723, "step": 13066 }, { "epoch": 0.8853580865912325, "grad_norm": 6.433823585510254, "learning_rate": 8.313916079129305e-05, "loss": 0.7764, "step": 13067 }, { "epoch": 0.8854258418592046, "grad_norm": 8.425026893615723, "learning_rate": 8.313779177219523e-05, "loss": 0.9493, "step": 13068 }, { "epoch": 0.8854935971271767, "grad_norm": 5.355708599090576, "learning_rate": 8.31364227530974e-05, "loss": 0.7761, "step": 13069 }, { "epoch": 0.8855613523951488, "grad_norm": 4.858754634857178, "learning_rate": 8.313505373399959e-05, "loss": 0.6928, "step": 13070 }, { "epoch": 0.8856291076631209, "grad_norm": 6.304715633392334, "learning_rate": 8.313368471490177e-05, "loss": 0.7542, "step": 13071 }, { "epoch": 0.8856968629310928, "grad_norm": 5.10394811630249, "learning_rate": 8.313231569580396e-05, "loss": 0.6926, "step": 13072 }, { "epoch": 0.8857646181990649, "grad_norm": 5.681312561035156, "learning_rate": 8.313094667670614e-05, "loss": 0.5444, "step": 13073 }, { "epoch": 0.885832373467037, "grad_norm": 5.030150890350342, "learning_rate": 8.312957765760832e-05, "loss": 0.7594, "step": 13074 }, { "epoch": 0.8859001287350091, "grad_norm": 5.284333229064941, "learning_rate": 8.31282086385105e-05, "loss": 0.6214, "step": 13075 }, { "epoch": 0.8859678840029812, "grad_norm": 6.305645942687988, "learning_rate": 8.31268396194127e-05, "loss": 0.5335, "step": 13076 }, { "epoch": 0.8860356392709533, "grad_norm": 4.828126907348633, "learning_rate": 8.312547060031488e-05, "loss": 0.5699, "step": 13077 }, { "epoch": 0.8861033945389254, "grad_norm": 6.154019355773926, "learning_rate": 8.312410158121706e-05, "loss": 0.8389, "step": 13078 }, { "epoch": 0.8861711498068975, "grad_norm": 7.554732322692871, "learning_rate": 8.312273256211924e-05, "loss": 1.1726, "step": 13079 }, { "epoch": 0.8862389050748696, "grad_norm": 7.06660270690918, "learning_rate": 8.312136354302143e-05, "loss": 0.7025, "step": 13080 }, { "epoch": 0.8863066603428417, "grad_norm": 6.983936786651611, "learning_rate": 8.311999452392361e-05, "loss": 0.7986, "step": 13081 }, { "epoch": 0.8863744156108138, "grad_norm": 6.566361427307129, "learning_rate": 8.311862550482579e-05, "loss": 0.6886, "step": 13082 }, { "epoch": 0.8864421708787859, "grad_norm": 7.804087162017822, "learning_rate": 8.311725648572798e-05, "loss": 0.8087, "step": 13083 }, { "epoch": 0.8865099261467579, "grad_norm": 7.015219211578369, "learning_rate": 8.311588746663017e-05, "loss": 0.7983, "step": 13084 }, { "epoch": 0.88657768141473, "grad_norm": 7.756356239318848, "learning_rate": 8.311451844753235e-05, "loss": 0.865, "step": 13085 }, { "epoch": 0.8866454366827021, "grad_norm": 4.957078456878662, "learning_rate": 8.311314942843454e-05, "loss": 0.7427, "step": 13086 }, { "epoch": 0.8867131919506742, "grad_norm": 7.29202127456665, "learning_rate": 8.311178040933672e-05, "loss": 0.9095, "step": 13087 }, { "epoch": 0.8867809472186462, "grad_norm": 6.850350856781006, "learning_rate": 8.31104113902389e-05, "loss": 0.6982, "step": 13088 }, { "epoch": 0.8868487024866183, "grad_norm": 7.409617900848389, "learning_rate": 8.310904237114108e-05, "loss": 0.7422, "step": 13089 }, { "epoch": 0.8869164577545904, "grad_norm": 6.317065715789795, "learning_rate": 8.310767335204327e-05, "loss": 0.7854, "step": 13090 }, { "epoch": 0.8869842130225625, "grad_norm": 6.616084098815918, "learning_rate": 8.310630433294545e-05, "loss": 0.5892, "step": 13091 }, { "epoch": 0.8870519682905346, "grad_norm": 4.691561222076416, "learning_rate": 8.310493531384764e-05, "loss": 0.7925, "step": 13092 }, { "epoch": 0.8871197235585067, "grad_norm": 6.057029724121094, "learning_rate": 8.310356629474982e-05, "loss": 0.9739, "step": 13093 }, { "epoch": 0.8871874788264787, "grad_norm": 6.407534122467041, "learning_rate": 8.3102197275652e-05, "loss": 0.8429, "step": 13094 }, { "epoch": 0.8872552340944508, "grad_norm": 5.778253078460693, "learning_rate": 8.310082825655419e-05, "loss": 0.8443, "step": 13095 }, { "epoch": 0.8873229893624229, "grad_norm": 6.153006553649902, "learning_rate": 8.309945923745637e-05, "loss": 0.8315, "step": 13096 }, { "epoch": 0.887390744630395, "grad_norm": 6.307031631469727, "learning_rate": 8.309809021835855e-05, "loss": 1.0075, "step": 13097 }, { "epoch": 0.8874584998983671, "grad_norm": 6.513178825378418, "learning_rate": 8.309672119926073e-05, "loss": 0.7945, "step": 13098 }, { "epoch": 0.8875262551663392, "grad_norm": 7.987000942230225, "learning_rate": 8.309535218016292e-05, "loss": 0.7709, "step": 13099 }, { "epoch": 0.8875940104343113, "grad_norm": 5.424191951751709, "learning_rate": 8.30939831610651e-05, "loss": 0.7174, "step": 13100 }, { "epoch": 0.8876617657022834, "grad_norm": 4.814406871795654, "learning_rate": 8.309261414196729e-05, "loss": 0.8897, "step": 13101 }, { "epoch": 0.8877295209702555, "grad_norm": 5.997096538543701, "learning_rate": 8.309124512286947e-05, "loss": 0.6153, "step": 13102 }, { "epoch": 0.8877972762382276, "grad_norm": 7.016286373138428, "learning_rate": 8.308987610377165e-05, "loss": 0.9323, "step": 13103 }, { "epoch": 0.8878650315061996, "grad_norm": 5.908369541168213, "learning_rate": 8.308850708467384e-05, "loss": 0.7758, "step": 13104 }, { "epoch": 0.8879327867741716, "grad_norm": 6.3806376457214355, "learning_rate": 8.308713806557602e-05, "loss": 0.803, "step": 13105 }, { "epoch": 0.8880005420421437, "grad_norm": 5.187054634094238, "learning_rate": 8.30857690464782e-05, "loss": 0.8509, "step": 13106 }, { "epoch": 0.8880682973101158, "grad_norm": 5.6671905517578125, "learning_rate": 8.308440002738038e-05, "loss": 0.9532, "step": 13107 }, { "epoch": 0.8881360525780879, "grad_norm": 5.64413595199585, "learning_rate": 8.308303100828257e-05, "loss": 0.6387, "step": 13108 }, { "epoch": 0.88820380784606, "grad_norm": 7.69677209854126, "learning_rate": 8.308166198918476e-05, "loss": 0.9477, "step": 13109 }, { "epoch": 0.8882715631140321, "grad_norm": 6.669020652770996, "learning_rate": 8.308029297008694e-05, "loss": 0.7831, "step": 13110 }, { "epoch": 0.8883393183820042, "grad_norm": 8.060406684875488, "learning_rate": 8.307892395098912e-05, "loss": 0.7206, "step": 13111 }, { "epoch": 0.8884070736499763, "grad_norm": 6.262596607208252, "learning_rate": 8.30775549318913e-05, "loss": 0.7359, "step": 13112 }, { "epoch": 0.8884748289179484, "grad_norm": 7.678366661071777, "learning_rate": 8.307618591279349e-05, "loss": 0.7463, "step": 13113 }, { "epoch": 0.8885425841859205, "grad_norm": 4.828142166137695, "learning_rate": 8.307481689369567e-05, "loss": 0.6186, "step": 13114 }, { "epoch": 0.8886103394538926, "grad_norm": 6.147395133972168, "learning_rate": 8.307344787459785e-05, "loss": 0.7742, "step": 13115 }, { "epoch": 0.8886780947218647, "grad_norm": 7.005827903747559, "learning_rate": 8.307207885550003e-05, "loss": 0.899, "step": 13116 }, { "epoch": 0.8887458499898367, "grad_norm": 6.948807239532471, "learning_rate": 8.307070983640221e-05, "loss": 0.9572, "step": 13117 }, { "epoch": 0.8888136052578088, "grad_norm": 5.939198970794678, "learning_rate": 8.30693408173044e-05, "loss": 0.8103, "step": 13118 }, { "epoch": 0.8888813605257809, "grad_norm": 5.84893274307251, "learning_rate": 8.306797179820659e-05, "loss": 0.7653, "step": 13119 }, { "epoch": 0.888949115793753, "grad_norm": 9.817476272583008, "learning_rate": 8.306660277910877e-05, "loss": 0.8496, "step": 13120 }, { "epoch": 0.889016871061725, "grad_norm": 6.369333267211914, "learning_rate": 8.306523376001095e-05, "loss": 0.8215, "step": 13121 }, { "epoch": 0.8890846263296971, "grad_norm": 6.099016189575195, "learning_rate": 8.306386474091314e-05, "loss": 0.913, "step": 13122 }, { "epoch": 0.8891523815976692, "grad_norm": 5.678265571594238, "learning_rate": 8.306249572181532e-05, "loss": 0.7645, "step": 13123 }, { "epoch": 0.8892201368656413, "grad_norm": 5.685331344604492, "learning_rate": 8.30611267027175e-05, "loss": 0.8976, "step": 13124 }, { "epoch": 0.8892878921336134, "grad_norm": 6.949743747711182, "learning_rate": 8.305975768361968e-05, "loss": 0.708, "step": 13125 }, { "epoch": 0.8893556474015855, "grad_norm": 6.98148250579834, "learning_rate": 8.305838866452188e-05, "loss": 1.0055, "step": 13126 }, { "epoch": 0.8894234026695576, "grad_norm": 6.679849147796631, "learning_rate": 8.305701964542406e-05, "loss": 0.7262, "step": 13127 }, { "epoch": 0.8894911579375296, "grad_norm": 7.759592533111572, "learning_rate": 8.305565062632624e-05, "loss": 0.5805, "step": 13128 }, { "epoch": 0.8895589132055017, "grad_norm": 5.812005519866943, "learning_rate": 8.305428160722843e-05, "loss": 0.8662, "step": 13129 }, { "epoch": 0.8896266684734738, "grad_norm": 4.861279487609863, "learning_rate": 8.305291258813061e-05, "loss": 0.5661, "step": 13130 }, { "epoch": 0.8896944237414459, "grad_norm": 6.805129051208496, "learning_rate": 8.305154356903279e-05, "loss": 0.9156, "step": 13131 }, { "epoch": 0.889762179009418, "grad_norm": 4.98897647857666, "learning_rate": 8.305017454993498e-05, "loss": 0.698, "step": 13132 }, { "epoch": 0.8898299342773901, "grad_norm": 6.2775726318359375, "learning_rate": 8.304880553083716e-05, "loss": 0.9092, "step": 13133 }, { "epoch": 0.8898976895453622, "grad_norm": 7.023934841156006, "learning_rate": 8.304743651173934e-05, "loss": 0.7898, "step": 13134 }, { "epoch": 0.8899654448133343, "grad_norm": 6.283311367034912, "learning_rate": 8.304606749264153e-05, "loss": 0.7596, "step": 13135 }, { "epoch": 0.8900332000813064, "grad_norm": 4.842438220977783, "learning_rate": 8.304469847354372e-05, "loss": 0.7008, "step": 13136 }, { "epoch": 0.8901009553492784, "grad_norm": 5.7367987632751465, "learning_rate": 8.30433294544459e-05, "loss": 0.987, "step": 13137 }, { "epoch": 0.8901687106172504, "grad_norm": 6.082010746002197, "learning_rate": 8.304196043534808e-05, "loss": 0.6194, "step": 13138 }, { "epoch": 0.8902364658852225, "grad_norm": 4.863292217254639, "learning_rate": 8.304059141625026e-05, "loss": 0.689, "step": 13139 }, { "epoch": 0.8903042211531946, "grad_norm": 7.170241832733154, "learning_rate": 8.303922239715245e-05, "loss": 0.8191, "step": 13140 }, { "epoch": 0.8903719764211667, "grad_norm": 5.541210174560547, "learning_rate": 8.303785337805463e-05, "loss": 0.7614, "step": 13141 }, { "epoch": 0.8904397316891388, "grad_norm": 4.93407678604126, "learning_rate": 8.303648435895681e-05, "loss": 0.6698, "step": 13142 }, { "epoch": 0.8905074869571109, "grad_norm": 5.758559226989746, "learning_rate": 8.3035115339859e-05, "loss": 0.8883, "step": 13143 }, { "epoch": 0.890575242225083, "grad_norm": 5.253477096557617, "learning_rate": 8.303374632076118e-05, "loss": 0.7487, "step": 13144 }, { "epoch": 0.8906429974930551, "grad_norm": 7.421339511871338, "learning_rate": 8.303237730166337e-05, "loss": 0.7125, "step": 13145 }, { "epoch": 0.8907107527610272, "grad_norm": 6.252211093902588, "learning_rate": 8.303100828256555e-05, "loss": 0.7768, "step": 13146 }, { "epoch": 0.8907785080289993, "grad_norm": 5.624354362487793, "learning_rate": 8.302963926346773e-05, "loss": 0.7325, "step": 13147 }, { "epoch": 0.8908462632969714, "grad_norm": 7.876077651977539, "learning_rate": 8.302827024436991e-05, "loss": 0.9607, "step": 13148 }, { "epoch": 0.8909140185649435, "grad_norm": 7.236328601837158, "learning_rate": 8.302690122527209e-05, "loss": 0.9411, "step": 13149 }, { "epoch": 0.8909817738329155, "grad_norm": 5.283116817474365, "learning_rate": 8.302553220617428e-05, "loss": 0.8631, "step": 13150 }, { "epoch": 0.8910495291008876, "grad_norm": 4.565162181854248, "learning_rate": 8.302416318707646e-05, "loss": 0.5822, "step": 13151 }, { "epoch": 0.8911172843688597, "grad_norm": 8.308517456054688, "learning_rate": 8.302279416797865e-05, "loss": 0.7722, "step": 13152 }, { "epoch": 0.8911850396368317, "grad_norm": 5.399427890777588, "learning_rate": 8.302142514888083e-05, "loss": 0.7398, "step": 13153 }, { "epoch": 0.8912527949048038, "grad_norm": 6.947094917297363, "learning_rate": 8.302005612978302e-05, "loss": 0.8397, "step": 13154 }, { "epoch": 0.8913205501727759, "grad_norm": 5.054037094116211, "learning_rate": 8.30186871106852e-05, "loss": 0.8779, "step": 13155 }, { "epoch": 0.891388305440748, "grad_norm": 5.343729496002197, "learning_rate": 8.301731809158738e-05, "loss": 0.6804, "step": 13156 }, { "epoch": 0.8914560607087201, "grad_norm": 5.4643425941467285, "learning_rate": 8.301594907248956e-05, "loss": 0.764, "step": 13157 }, { "epoch": 0.8915238159766922, "grad_norm": 5.799702167510986, "learning_rate": 8.301458005339174e-05, "loss": 0.8367, "step": 13158 }, { "epoch": 0.8915915712446643, "grad_norm": 5.780505657196045, "learning_rate": 8.301321103429393e-05, "loss": 0.6387, "step": 13159 }, { "epoch": 0.8916593265126364, "grad_norm": 7.076272964477539, "learning_rate": 8.301184201519612e-05, "loss": 0.8751, "step": 13160 }, { "epoch": 0.8917270817806084, "grad_norm": 5.820427417755127, "learning_rate": 8.30104729960983e-05, "loss": 0.7296, "step": 13161 }, { "epoch": 0.8917948370485805, "grad_norm": 6.612248420715332, "learning_rate": 8.300910397700048e-05, "loss": 0.9585, "step": 13162 }, { "epoch": 0.8918625923165526, "grad_norm": 5.051817893981934, "learning_rate": 8.300773495790267e-05, "loss": 0.7389, "step": 13163 }, { "epoch": 0.8919303475845247, "grad_norm": 6.8403639793396, "learning_rate": 8.300636593880485e-05, "loss": 0.9632, "step": 13164 }, { "epoch": 0.8919981028524968, "grad_norm": 5.8375983238220215, "learning_rate": 8.300499691970703e-05, "loss": 0.7462, "step": 13165 }, { "epoch": 0.8920658581204689, "grad_norm": 4.138559341430664, "learning_rate": 8.300362790060921e-05, "loss": 0.5961, "step": 13166 }, { "epoch": 0.892133613388441, "grad_norm": 5.837608814239502, "learning_rate": 8.300225888151139e-05, "loss": 0.8189, "step": 13167 }, { "epoch": 0.8922013686564131, "grad_norm": 5.9026875495910645, "learning_rate": 8.300088986241358e-05, "loss": 0.8394, "step": 13168 }, { "epoch": 0.8922691239243851, "grad_norm": 5.615271091461182, "learning_rate": 8.299952084331577e-05, "loss": 0.7897, "step": 13169 }, { "epoch": 0.8923368791923572, "grad_norm": 5.872547149658203, "learning_rate": 8.299815182421795e-05, "loss": 0.7276, "step": 13170 }, { "epoch": 0.8924046344603292, "grad_norm": 7.943142414093018, "learning_rate": 8.299678280512013e-05, "loss": 0.7512, "step": 13171 }, { "epoch": 0.8924723897283013, "grad_norm": 5.548654079437256, "learning_rate": 8.299541378602232e-05, "loss": 0.8011, "step": 13172 }, { "epoch": 0.8925401449962734, "grad_norm": 5.295721054077148, "learning_rate": 8.29940447669245e-05, "loss": 0.7481, "step": 13173 }, { "epoch": 0.8926079002642455, "grad_norm": 5.564395904541016, "learning_rate": 8.299267574782668e-05, "loss": 0.7028, "step": 13174 }, { "epoch": 0.8926756555322176, "grad_norm": 6.351992607116699, "learning_rate": 8.299130672872887e-05, "loss": 0.6943, "step": 13175 }, { "epoch": 0.8927434108001897, "grad_norm": 8.362895965576172, "learning_rate": 8.298993770963105e-05, "loss": 0.9287, "step": 13176 }, { "epoch": 0.8928111660681618, "grad_norm": 6.428536415100098, "learning_rate": 8.298856869053324e-05, "loss": 0.8992, "step": 13177 }, { "epoch": 0.8928789213361339, "grad_norm": 6.148324489593506, "learning_rate": 8.298719967143543e-05, "loss": 0.6605, "step": 13178 }, { "epoch": 0.892946676604106, "grad_norm": 6.530026912689209, "learning_rate": 8.298583065233761e-05, "loss": 0.6085, "step": 13179 }, { "epoch": 0.8930144318720781, "grad_norm": 5.863303184509277, "learning_rate": 8.298446163323979e-05, "loss": 0.7368, "step": 13180 }, { "epoch": 0.8930821871400502, "grad_norm": 11.581231117248535, "learning_rate": 8.298309261414197e-05, "loss": 0.732, "step": 13181 }, { "epoch": 0.8931499424080223, "grad_norm": 7.566738128662109, "learning_rate": 8.298172359504416e-05, "loss": 0.9834, "step": 13182 }, { "epoch": 0.8932176976759943, "grad_norm": 7.298031330108643, "learning_rate": 8.298035457594634e-05, "loss": 0.9663, "step": 13183 }, { "epoch": 0.8932854529439664, "grad_norm": 6.619592666625977, "learning_rate": 8.297898555684852e-05, "loss": 0.6992, "step": 13184 }, { "epoch": 0.8933532082119385, "grad_norm": 6.211091041564941, "learning_rate": 8.29776165377507e-05, "loss": 0.6848, "step": 13185 }, { "epoch": 0.8934209634799105, "grad_norm": 5.228729248046875, "learning_rate": 8.29762475186529e-05, "loss": 0.629, "step": 13186 }, { "epoch": 0.8934887187478826, "grad_norm": 5.966601848602295, "learning_rate": 8.297487849955508e-05, "loss": 0.7837, "step": 13187 }, { "epoch": 0.8935564740158547, "grad_norm": 5.975196838378906, "learning_rate": 8.297350948045726e-05, "loss": 0.8608, "step": 13188 }, { "epoch": 0.8936242292838268, "grad_norm": 7.837411880493164, "learning_rate": 8.297214046135944e-05, "loss": 1.006, "step": 13189 }, { "epoch": 0.8936919845517989, "grad_norm": 6.313420295715332, "learning_rate": 8.297077144226162e-05, "loss": 0.9157, "step": 13190 }, { "epoch": 0.893759739819771, "grad_norm": 4.761128902435303, "learning_rate": 8.296940242316381e-05, "loss": 0.7262, "step": 13191 }, { "epoch": 0.8938274950877431, "grad_norm": 6.320545673370361, "learning_rate": 8.2968033404066e-05, "loss": 0.8437, "step": 13192 }, { "epoch": 0.8938952503557152, "grad_norm": 6.234335899353027, "learning_rate": 8.296666438496817e-05, "loss": 0.8268, "step": 13193 }, { "epoch": 0.8939630056236872, "grad_norm": 6.060486316680908, "learning_rate": 8.296529536587036e-05, "loss": 0.6344, "step": 13194 }, { "epoch": 0.8940307608916593, "grad_norm": 4.9132771492004395, "learning_rate": 8.296392634677255e-05, "loss": 0.6422, "step": 13195 }, { "epoch": 0.8940985161596314, "grad_norm": 7.546984672546387, "learning_rate": 8.296255732767473e-05, "loss": 0.8527, "step": 13196 }, { "epoch": 0.8941662714276035, "grad_norm": 5.650018215179443, "learning_rate": 8.296118830857691e-05, "loss": 0.7624, "step": 13197 }, { "epoch": 0.8942340266955756, "grad_norm": 6.712080955505371, "learning_rate": 8.295981928947909e-05, "loss": 0.9443, "step": 13198 }, { "epoch": 0.8943017819635477, "grad_norm": 7.756785869598389, "learning_rate": 8.295845027038127e-05, "loss": 0.9292, "step": 13199 }, { "epoch": 0.8943695372315198, "grad_norm": 5.824887752532959, "learning_rate": 8.295708125128346e-05, "loss": 0.7814, "step": 13200 }, { "epoch": 0.8944372924994919, "grad_norm": 6.378854751586914, "learning_rate": 8.295571223218564e-05, "loss": 0.7699, "step": 13201 }, { "epoch": 0.8945050477674639, "grad_norm": 6.914346218109131, "learning_rate": 8.295434321308782e-05, "loss": 0.7734, "step": 13202 }, { "epoch": 0.894572803035436, "grad_norm": 5.830096244812012, "learning_rate": 8.295297419399e-05, "loss": 0.762, "step": 13203 }, { "epoch": 0.894640558303408, "grad_norm": 5.791236877441406, "learning_rate": 8.295160517489219e-05, "loss": 0.6882, "step": 13204 }, { "epoch": 0.8947083135713801, "grad_norm": 5.9879326820373535, "learning_rate": 8.295023615579438e-05, "loss": 0.8134, "step": 13205 }, { "epoch": 0.8947760688393522, "grad_norm": 6.294375419616699, "learning_rate": 8.294886713669656e-05, "loss": 0.6101, "step": 13206 }, { "epoch": 0.8948438241073243, "grad_norm": 5.5092267990112305, "learning_rate": 8.294749811759874e-05, "loss": 0.8178, "step": 13207 }, { "epoch": 0.8949115793752964, "grad_norm": 5.606123924255371, "learning_rate": 8.294612909850092e-05, "loss": 0.7347, "step": 13208 }, { "epoch": 0.8949793346432685, "grad_norm": 7.427051544189453, "learning_rate": 8.294476007940311e-05, "loss": 0.8645, "step": 13209 }, { "epoch": 0.8950470899112406, "grad_norm": 8.88985538482666, "learning_rate": 8.29433910603053e-05, "loss": 0.793, "step": 13210 }, { "epoch": 0.8951148451792127, "grad_norm": 5.20978307723999, "learning_rate": 8.294202204120748e-05, "loss": 0.954, "step": 13211 }, { "epoch": 0.8951826004471848, "grad_norm": 8.192554473876953, "learning_rate": 8.294065302210966e-05, "loss": 0.812, "step": 13212 }, { "epoch": 0.8952503557151569, "grad_norm": 5.569815158843994, "learning_rate": 8.293928400301184e-05, "loss": 0.702, "step": 13213 }, { "epoch": 0.895318110983129, "grad_norm": 5.993325233459473, "learning_rate": 8.293791498391403e-05, "loss": 0.9014, "step": 13214 }, { "epoch": 0.8953858662511011, "grad_norm": 5.743150234222412, "learning_rate": 8.293654596481621e-05, "loss": 0.5031, "step": 13215 }, { "epoch": 0.8954536215190732, "grad_norm": 6.546820163726807, "learning_rate": 8.293517694571839e-05, "loss": 0.8145, "step": 13216 }, { "epoch": 0.8955213767870452, "grad_norm": 6.425495147705078, "learning_rate": 8.293380792662057e-05, "loss": 0.6972, "step": 13217 }, { "epoch": 0.8955891320550172, "grad_norm": 5.768934726715088, "learning_rate": 8.293243890752276e-05, "loss": 0.7145, "step": 13218 }, { "epoch": 0.8956568873229893, "grad_norm": 5.6524224281311035, "learning_rate": 8.293106988842494e-05, "loss": 0.8436, "step": 13219 }, { "epoch": 0.8957246425909614, "grad_norm": 7.842732906341553, "learning_rate": 8.292970086932713e-05, "loss": 0.9397, "step": 13220 }, { "epoch": 0.8957923978589335, "grad_norm": 6.612356185913086, "learning_rate": 8.292833185022932e-05, "loss": 0.7468, "step": 13221 }, { "epoch": 0.8958601531269056, "grad_norm": 6.291922569274902, "learning_rate": 8.29269628311315e-05, "loss": 0.7622, "step": 13222 }, { "epoch": 0.8959279083948777, "grad_norm": 6.623983860015869, "learning_rate": 8.292559381203368e-05, "loss": 0.79, "step": 13223 }, { "epoch": 0.8959956636628498, "grad_norm": 5.58621883392334, "learning_rate": 8.292422479293587e-05, "loss": 0.7481, "step": 13224 }, { "epoch": 0.8960634189308219, "grad_norm": 5.839790344238281, "learning_rate": 8.292285577383805e-05, "loss": 0.7201, "step": 13225 }, { "epoch": 0.896131174198794, "grad_norm": 7.310943126678467, "learning_rate": 8.292148675474023e-05, "loss": 1.062, "step": 13226 }, { "epoch": 0.896198929466766, "grad_norm": 7.1324872970581055, "learning_rate": 8.292011773564241e-05, "loss": 0.86, "step": 13227 }, { "epoch": 0.8962666847347381, "grad_norm": 5.25512170791626, "learning_rate": 8.291874871654461e-05, "loss": 0.7487, "step": 13228 }, { "epoch": 0.8963344400027102, "grad_norm": 4.826694488525391, "learning_rate": 8.291737969744679e-05, "loss": 0.6497, "step": 13229 }, { "epoch": 0.8964021952706823, "grad_norm": 6.0009918212890625, "learning_rate": 8.291601067834897e-05, "loss": 0.7122, "step": 13230 }, { "epoch": 0.8964699505386544, "grad_norm": 6.770015716552734, "learning_rate": 8.291464165925115e-05, "loss": 0.9121, "step": 13231 }, { "epoch": 0.8965377058066265, "grad_norm": 7.527721405029297, "learning_rate": 8.291327264015334e-05, "loss": 0.7042, "step": 13232 }, { "epoch": 0.8966054610745986, "grad_norm": 5.726056098937988, "learning_rate": 8.291190362105552e-05, "loss": 0.7324, "step": 13233 }, { "epoch": 0.8966732163425707, "grad_norm": 5.054379463195801, "learning_rate": 8.29105346019577e-05, "loss": 0.8064, "step": 13234 }, { "epoch": 0.8967409716105427, "grad_norm": 5.584482669830322, "learning_rate": 8.290916558285988e-05, "loss": 0.7225, "step": 13235 }, { "epoch": 0.8968087268785148, "grad_norm": 6.002537727355957, "learning_rate": 8.290779656376206e-05, "loss": 1.0223, "step": 13236 }, { "epoch": 0.8968764821464869, "grad_norm": 6.58087158203125, "learning_rate": 8.290642754466426e-05, "loss": 0.847, "step": 13237 }, { "epoch": 0.896944237414459, "grad_norm": 7.690030097961426, "learning_rate": 8.290505852556644e-05, "loss": 0.6217, "step": 13238 }, { "epoch": 0.897011992682431, "grad_norm": 6.937661170959473, "learning_rate": 8.290368950646862e-05, "loss": 0.8138, "step": 13239 }, { "epoch": 0.8970797479504031, "grad_norm": 5.334490776062012, "learning_rate": 8.29023204873708e-05, "loss": 0.8908, "step": 13240 }, { "epoch": 0.8971475032183752, "grad_norm": 5.833104133605957, "learning_rate": 8.2900951468273e-05, "loss": 0.5743, "step": 13241 }, { "epoch": 0.8972152584863473, "grad_norm": 5.803739547729492, "learning_rate": 8.289958244917517e-05, "loss": 0.6845, "step": 13242 }, { "epoch": 0.8972830137543194, "grad_norm": 5.436889171600342, "learning_rate": 8.289821343007735e-05, "loss": 0.8216, "step": 13243 }, { "epoch": 0.8973507690222915, "grad_norm": 4.356090068817139, "learning_rate": 8.289684441097953e-05, "loss": 0.7757, "step": 13244 }, { "epoch": 0.8974185242902636, "grad_norm": 5.816674709320068, "learning_rate": 8.289547539188172e-05, "loss": 0.8839, "step": 13245 }, { "epoch": 0.8974862795582357, "grad_norm": 7.878244876861572, "learning_rate": 8.289410637278391e-05, "loss": 1.0252, "step": 13246 }, { "epoch": 0.8975540348262078, "grad_norm": 6.409861087799072, "learning_rate": 8.289273735368609e-05, "loss": 0.8281, "step": 13247 }, { "epoch": 0.8976217900941799, "grad_norm": 6.617053031921387, "learning_rate": 8.289136833458827e-05, "loss": 1.0406, "step": 13248 }, { "epoch": 0.897689545362152, "grad_norm": 5.893568992614746, "learning_rate": 8.288999931549045e-05, "loss": 0.8713, "step": 13249 }, { "epoch": 0.897757300630124, "grad_norm": 5.194582939147949, "learning_rate": 8.288863029639263e-05, "loss": 0.7163, "step": 13250 }, { "epoch": 0.897825055898096, "grad_norm": 5.706815719604492, "learning_rate": 8.288726127729482e-05, "loss": 0.7675, "step": 13251 }, { "epoch": 0.8978928111660681, "grad_norm": 5.490322113037109, "learning_rate": 8.2885892258197e-05, "loss": 0.7559, "step": 13252 }, { "epoch": 0.8979605664340402, "grad_norm": 4.902472019195557, "learning_rate": 8.288452323909918e-05, "loss": 0.6095, "step": 13253 }, { "epoch": 0.8980283217020123, "grad_norm": 4.228281497955322, "learning_rate": 8.288315422000137e-05, "loss": 0.5824, "step": 13254 }, { "epoch": 0.8980960769699844, "grad_norm": 5.2611494064331055, "learning_rate": 8.288178520090356e-05, "loss": 0.8144, "step": 13255 }, { "epoch": 0.8981638322379565, "grad_norm": 5.848245143890381, "learning_rate": 8.288041618180574e-05, "loss": 0.7095, "step": 13256 }, { "epoch": 0.8982315875059286, "grad_norm": 5.738656044006348, "learning_rate": 8.287904716270792e-05, "loss": 0.731, "step": 13257 }, { "epoch": 0.8982993427739007, "grad_norm": 6.212946891784668, "learning_rate": 8.28776781436101e-05, "loss": 0.6781, "step": 13258 }, { "epoch": 0.8983670980418728, "grad_norm": 4.827934741973877, "learning_rate": 8.287630912451228e-05, "loss": 0.7693, "step": 13259 }, { "epoch": 0.8984348533098448, "grad_norm": 5.135469436645508, "learning_rate": 8.287494010541447e-05, "loss": 0.7819, "step": 13260 }, { "epoch": 0.8985026085778169, "grad_norm": 8.885536193847656, "learning_rate": 8.287357108631665e-05, "loss": 0.7556, "step": 13261 }, { "epoch": 0.898570363845789, "grad_norm": 8.197842597961426, "learning_rate": 8.287220206721884e-05, "loss": 0.7379, "step": 13262 }, { "epoch": 0.8986381191137611, "grad_norm": 5.406810283660889, "learning_rate": 8.287083304812102e-05, "loss": 0.6618, "step": 13263 }, { "epoch": 0.8987058743817332, "grad_norm": 5.688655376434326, "learning_rate": 8.286946402902321e-05, "loss": 0.9949, "step": 13264 }, { "epoch": 0.8987736296497053, "grad_norm": 7.776078701019287, "learning_rate": 8.286809500992539e-05, "loss": 0.5491, "step": 13265 }, { "epoch": 0.8988413849176774, "grad_norm": 7.738804817199707, "learning_rate": 8.286672599082757e-05, "loss": 1.06, "step": 13266 }, { "epoch": 0.8989091401856494, "grad_norm": 5.860600471496582, "learning_rate": 8.286535697172976e-05, "loss": 0.7621, "step": 13267 }, { "epoch": 0.8989768954536215, "grad_norm": 5.878091335296631, "learning_rate": 8.286398795263194e-05, "loss": 0.9419, "step": 13268 }, { "epoch": 0.8990446507215936, "grad_norm": 6.572638511657715, "learning_rate": 8.286261893353412e-05, "loss": 1.1296, "step": 13269 }, { "epoch": 0.8991124059895657, "grad_norm": 6.110701084136963, "learning_rate": 8.286124991443632e-05, "loss": 0.6976, "step": 13270 }, { "epoch": 0.8991801612575377, "grad_norm": 5.455756187438965, "learning_rate": 8.28598808953385e-05, "loss": 0.7511, "step": 13271 }, { "epoch": 0.8992479165255098, "grad_norm": 5.473435878753662, "learning_rate": 8.285851187624068e-05, "loss": 0.7217, "step": 13272 }, { "epoch": 0.8993156717934819, "grad_norm": 6.81781005859375, "learning_rate": 8.285714285714287e-05, "loss": 0.6594, "step": 13273 }, { "epoch": 0.899383427061454, "grad_norm": 6.982245445251465, "learning_rate": 8.285577383804505e-05, "loss": 0.8544, "step": 13274 }, { "epoch": 0.8994511823294261, "grad_norm": 4.480033874511719, "learning_rate": 8.285440481894723e-05, "loss": 0.8935, "step": 13275 }, { "epoch": 0.8995189375973982, "grad_norm": 5.940613746643066, "learning_rate": 8.285303579984941e-05, "loss": 0.8299, "step": 13276 }, { "epoch": 0.8995866928653703, "grad_norm": 5.0714335441589355, "learning_rate": 8.28516667807516e-05, "loss": 0.6672, "step": 13277 }, { "epoch": 0.8996544481333424, "grad_norm": 5.617218971252441, "learning_rate": 8.285029776165379e-05, "loss": 0.8913, "step": 13278 }, { "epoch": 0.8997222034013145, "grad_norm": 8.896486282348633, "learning_rate": 8.284892874255597e-05, "loss": 0.9386, "step": 13279 }, { "epoch": 0.8997899586692866, "grad_norm": 6.059647083282471, "learning_rate": 8.284755972345815e-05, "loss": 0.8023, "step": 13280 }, { "epoch": 0.8998577139372587, "grad_norm": 5.9106950759887695, "learning_rate": 8.284619070436033e-05, "loss": 0.8094, "step": 13281 }, { "epoch": 0.8999254692052308, "grad_norm": 7.264159679412842, "learning_rate": 8.284482168526251e-05, "loss": 0.8013, "step": 13282 }, { "epoch": 0.8999932244732028, "grad_norm": 7.696917533874512, "learning_rate": 8.28434526661647e-05, "loss": 0.8387, "step": 13283 }, { "epoch": 0.9000609797411748, "grad_norm": 6.536654472351074, "learning_rate": 8.284208364706688e-05, "loss": 0.8861, "step": 13284 }, { "epoch": 0.9001287350091469, "grad_norm": 5.371990203857422, "learning_rate": 8.284071462796906e-05, "loss": 0.7841, "step": 13285 }, { "epoch": 0.900196490277119, "grad_norm": 7.402823448181152, "learning_rate": 8.283934560887124e-05, "loss": 0.9058, "step": 13286 }, { "epoch": 0.9002642455450911, "grad_norm": 5.5579328536987305, "learning_rate": 8.283797658977344e-05, "loss": 0.698, "step": 13287 }, { "epoch": 0.9003320008130632, "grad_norm": 5.994121074676514, "learning_rate": 8.283660757067562e-05, "loss": 0.7502, "step": 13288 }, { "epoch": 0.9003997560810353, "grad_norm": 6.9693427085876465, "learning_rate": 8.28352385515778e-05, "loss": 0.9358, "step": 13289 }, { "epoch": 0.9004675113490074, "grad_norm": 6.08130407333374, "learning_rate": 8.283386953247998e-05, "loss": 0.7868, "step": 13290 }, { "epoch": 0.9005352666169795, "grad_norm": 6.161886692047119, "learning_rate": 8.283250051338216e-05, "loss": 0.6941, "step": 13291 }, { "epoch": 0.9006030218849516, "grad_norm": 6.822792053222656, "learning_rate": 8.283113149428435e-05, "loss": 0.8085, "step": 13292 }, { "epoch": 0.9006707771529237, "grad_norm": 5.883656978607178, "learning_rate": 8.282976247518653e-05, "loss": 0.7429, "step": 13293 }, { "epoch": 0.9007385324208957, "grad_norm": 4.72324800491333, "learning_rate": 8.282839345608871e-05, "loss": 0.6083, "step": 13294 }, { "epoch": 0.9008062876888678, "grad_norm": 5.912334442138672, "learning_rate": 8.28270244369909e-05, "loss": 0.8443, "step": 13295 }, { "epoch": 0.9008740429568399, "grad_norm": 4.501663684844971, "learning_rate": 8.282565541789309e-05, "loss": 0.7635, "step": 13296 }, { "epoch": 0.900941798224812, "grad_norm": 6.771607875823975, "learning_rate": 8.282428639879527e-05, "loss": 0.9044, "step": 13297 }, { "epoch": 0.9010095534927841, "grad_norm": 6.7166428565979, "learning_rate": 8.282291737969745e-05, "loss": 0.8295, "step": 13298 }, { "epoch": 0.9010773087607562, "grad_norm": 6.394474029541016, "learning_rate": 8.282154836059963e-05, "loss": 0.7899, "step": 13299 }, { "epoch": 0.9011450640287282, "grad_norm": 5.574833869934082, "learning_rate": 8.282017934150181e-05, "loss": 0.752, "step": 13300 }, { "epoch": 0.9012128192967003, "grad_norm": 6.585177898406982, "learning_rate": 8.2818810322404e-05, "loss": 0.6169, "step": 13301 }, { "epoch": 0.9012805745646724, "grad_norm": 5.940279960632324, "learning_rate": 8.281744130330618e-05, "loss": 0.7563, "step": 13302 }, { "epoch": 0.9013483298326445, "grad_norm": 6.039457321166992, "learning_rate": 8.281607228420836e-05, "loss": 0.7505, "step": 13303 }, { "epoch": 0.9014160851006165, "grad_norm": 10.80169677734375, "learning_rate": 8.281470326511054e-05, "loss": 0.8651, "step": 13304 }, { "epoch": 0.9014838403685886, "grad_norm": 5.8975043296813965, "learning_rate": 8.281333424601273e-05, "loss": 0.8325, "step": 13305 }, { "epoch": 0.9015515956365607, "grad_norm": 5.902426719665527, "learning_rate": 8.281196522691492e-05, "loss": 0.697, "step": 13306 }, { "epoch": 0.9016193509045328, "grad_norm": 7.651483058929443, "learning_rate": 8.28105962078171e-05, "loss": 0.8322, "step": 13307 }, { "epoch": 0.9016871061725049, "grad_norm": 5.933381080627441, "learning_rate": 8.280922718871928e-05, "loss": 0.834, "step": 13308 }, { "epoch": 0.901754861440477, "grad_norm": 5.620283126831055, "learning_rate": 8.280785816962146e-05, "loss": 0.8047, "step": 13309 }, { "epoch": 0.9018226167084491, "grad_norm": 4.271581172943115, "learning_rate": 8.280648915052365e-05, "loss": 0.7422, "step": 13310 }, { "epoch": 0.9018903719764212, "grad_norm": 7.821619033813477, "learning_rate": 8.280512013142583e-05, "loss": 0.9909, "step": 13311 }, { "epoch": 0.9019581272443933, "grad_norm": 5.665806770324707, "learning_rate": 8.280375111232801e-05, "loss": 0.6748, "step": 13312 }, { "epoch": 0.9020258825123654, "grad_norm": 7.500571250915527, "learning_rate": 8.28023820932302e-05, "loss": 0.9495, "step": 13313 }, { "epoch": 0.9020936377803375, "grad_norm": 7.833176612854004, "learning_rate": 8.280101307413239e-05, "loss": 0.8774, "step": 13314 }, { "epoch": 0.9021613930483096, "grad_norm": 10.599181175231934, "learning_rate": 8.279964405503457e-05, "loss": 0.9064, "step": 13315 }, { "epoch": 0.9022291483162815, "grad_norm": 5.582043647766113, "learning_rate": 8.279827503593675e-05, "loss": 0.5968, "step": 13316 }, { "epoch": 0.9022969035842536, "grad_norm": 10.205772399902344, "learning_rate": 8.279690601683894e-05, "loss": 0.8455, "step": 13317 }, { "epoch": 0.9023646588522257, "grad_norm": 6.769802093505859, "learning_rate": 8.279553699774112e-05, "loss": 0.8242, "step": 13318 }, { "epoch": 0.9024324141201978, "grad_norm": 8.564204216003418, "learning_rate": 8.279416797864332e-05, "loss": 0.9918, "step": 13319 }, { "epoch": 0.9025001693881699, "grad_norm": 6.2231645584106445, "learning_rate": 8.27927989595455e-05, "loss": 0.8375, "step": 13320 }, { "epoch": 0.902567924656142, "grad_norm": 6.065762519836426, "learning_rate": 8.279142994044768e-05, "loss": 0.8746, "step": 13321 }, { "epoch": 0.9026356799241141, "grad_norm": 7.134408950805664, "learning_rate": 8.279006092134986e-05, "loss": 1.1725, "step": 13322 }, { "epoch": 0.9027034351920862, "grad_norm": 7.31404972076416, "learning_rate": 8.278869190225204e-05, "loss": 0.6658, "step": 13323 }, { "epoch": 0.9027711904600583, "grad_norm": 4.640890121459961, "learning_rate": 8.278732288315423e-05, "loss": 0.882, "step": 13324 }, { "epoch": 0.9028389457280304, "grad_norm": 6.120473861694336, "learning_rate": 8.278595386405641e-05, "loss": 0.5502, "step": 13325 }, { "epoch": 0.9029067009960025, "grad_norm": 5.745514392852783, "learning_rate": 8.27845848449586e-05, "loss": 0.6134, "step": 13326 }, { "epoch": 0.9029744562639745, "grad_norm": 8.348088264465332, "learning_rate": 8.278321582586077e-05, "loss": 0.8996, "step": 13327 }, { "epoch": 0.9030422115319466, "grad_norm": 6.585142612457275, "learning_rate": 8.278184680676297e-05, "loss": 0.8028, "step": 13328 }, { "epoch": 0.9031099667999187, "grad_norm": 4.813357830047607, "learning_rate": 8.278047778766515e-05, "loss": 0.6632, "step": 13329 }, { "epoch": 0.9031777220678908, "grad_norm": 6.714639663696289, "learning_rate": 8.277910876856733e-05, "loss": 0.7389, "step": 13330 }, { "epoch": 0.9032454773358629, "grad_norm": 6.200788974761963, "learning_rate": 8.277773974946951e-05, "loss": 0.8529, "step": 13331 }, { "epoch": 0.903313232603835, "grad_norm": 6.508268356323242, "learning_rate": 8.277637073037169e-05, "loss": 0.9533, "step": 13332 }, { "epoch": 0.903380987871807, "grad_norm": 6.651120662689209, "learning_rate": 8.277500171127388e-05, "loss": 0.5568, "step": 13333 }, { "epoch": 0.9034487431397791, "grad_norm": 6.641012668609619, "learning_rate": 8.277363269217606e-05, "loss": 0.7402, "step": 13334 }, { "epoch": 0.9035164984077512, "grad_norm": 5.423649787902832, "learning_rate": 8.277226367307824e-05, "loss": 0.6719, "step": 13335 }, { "epoch": 0.9035842536757233, "grad_norm": 4.8691558837890625, "learning_rate": 8.277089465398042e-05, "loss": 0.5897, "step": 13336 }, { "epoch": 0.9036520089436954, "grad_norm": 4.2445759773254395, "learning_rate": 8.27695256348826e-05, "loss": 0.8443, "step": 13337 }, { "epoch": 0.9037197642116674, "grad_norm": 5.379455089569092, "learning_rate": 8.27681566157848e-05, "loss": 0.6049, "step": 13338 }, { "epoch": 0.9037875194796395, "grad_norm": 5.717207908630371, "learning_rate": 8.276678759668698e-05, "loss": 0.8009, "step": 13339 }, { "epoch": 0.9038552747476116, "grad_norm": 5.412480354309082, "learning_rate": 8.276541857758916e-05, "loss": 0.7753, "step": 13340 }, { "epoch": 0.9039230300155837, "grad_norm": 8.551285743713379, "learning_rate": 8.276404955849134e-05, "loss": 0.9724, "step": 13341 }, { "epoch": 0.9039907852835558, "grad_norm": 6.7664899826049805, "learning_rate": 8.276268053939353e-05, "loss": 0.9088, "step": 13342 }, { "epoch": 0.9040585405515279, "grad_norm": 6.312598705291748, "learning_rate": 8.276131152029571e-05, "loss": 0.6874, "step": 13343 }, { "epoch": 0.9041262958195, "grad_norm": 6.274691104888916, "learning_rate": 8.27599425011979e-05, "loss": 0.9076, "step": 13344 }, { "epoch": 0.9041940510874721, "grad_norm": 5.1724958419799805, "learning_rate": 8.275857348210007e-05, "loss": 0.6779, "step": 13345 }, { "epoch": 0.9042618063554442, "grad_norm": 6.403627872467041, "learning_rate": 8.275720446300225e-05, "loss": 0.8524, "step": 13346 }, { "epoch": 0.9043295616234163, "grad_norm": 5.662676811218262, "learning_rate": 8.275583544390445e-05, "loss": 0.8278, "step": 13347 }, { "epoch": 0.9043973168913884, "grad_norm": 5.776680946350098, "learning_rate": 8.275446642480663e-05, "loss": 0.7354, "step": 13348 }, { "epoch": 0.9044650721593603, "grad_norm": 5.1496076583862305, "learning_rate": 8.275309740570881e-05, "loss": 0.7018, "step": 13349 }, { "epoch": 0.9045328274273324, "grad_norm": 6.148200035095215, "learning_rate": 8.275172838661099e-05, "loss": 0.7145, "step": 13350 }, { "epoch": 0.9046005826953045, "grad_norm": 6.302008152008057, "learning_rate": 8.275035936751318e-05, "loss": 0.7148, "step": 13351 }, { "epoch": 0.9046683379632766, "grad_norm": 5.558827877044678, "learning_rate": 8.274899034841536e-05, "loss": 0.7591, "step": 13352 }, { "epoch": 0.9047360932312487, "grad_norm": 4.537057876586914, "learning_rate": 8.274762132931754e-05, "loss": 0.5739, "step": 13353 }, { "epoch": 0.9048038484992208, "grad_norm": 5.1838555335998535, "learning_rate": 8.274625231021972e-05, "loss": 0.7811, "step": 13354 }, { "epoch": 0.9048716037671929, "grad_norm": 5.07068395614624, "learning_rate": 8.27448832911219e-05, "loss": 0.9203, "step": 13355 }, { "epoch": 0.904939359035165, "grad_norm": 6.439149856567383, "learning_rate": 8.27435142720241e-05, "loss": 0.7654, "step": 13356 }, { "epoch": 0.9050071143031371, "grad_norm": 8.04731559753418, "learning_rate": 8.274214525292628e-05, "loss": 1.1907, "step": 13357 }, { "epoch": 0.9050748695711092, "grad_norm": 5.4005961418151855, "learning_rate": 8.274077623382846e-05, "loss": 0.8081, "step": 13358 }, { "epoch": 0.9051426248390813, "grad_norm": 5.338225364685059, "learning_rate": 8.273940721473064e-05, "loss": 0.5817, "step": 13359 }, { "epoch": 0.9052103801070533, "grad_norm": 7.287635326385498, "learning_rate": 8.273803819563283e-05, "loss": 0.9362, "step": 13360 }, { "epoch": 0.9052781353750254, "grad_norm": 7.328275203704834, "learning_rate": 8.273666917653501e-05, "loss": 0.9302, "step": 13361 }, { "epoch": 0.9053458906429975, "grad_norm": 5.7136359214782715, "learning_rate": 8.27353001574372e-05, "loss": 0.8013, "step": 13362 }, { "epoch": 0.9054136459109696, "grad_norm": 5.051130294799805, "learning_rate": 8.273393113833939e-05, "loss": 0.6682, "step": 13363 }, { "epoch": 0.9054814011789417, "grad_norm": 6.250859260559082, "learning_rate": 8.273256211924157e-05, "loss": 0.8889, "step": 13364 }, { "epoch": 0.9055491564469137, "grad_norm": 6.39178991317749, "learning_rate": 8.273119310014375e-05, "loss": 0.6838, "step": 13365 }, { "epoch": 0.9056169117148858, "grad_norm": 5.243597507476807, "learning_rate": 8.272982408104594e-05, "loss": 0.8019, "step": 13366 }, { "epoch": 0.9056846669828579, "grad_norm": 6.917558670043945, "learning_rate": 8.272845506194812e-05, "loss": 0.7121, "step": 13367 }, { "epoch": 0.90575242225083, "grad_norm": 4.401981830596924, "learning_rate": 8.27270860428503e-05, "loss": 0.5813, "step": 13368 }, { "epoch": 0.9058201775188021, "grad_norm": 6.838183879852295, "learning_rate": 8.272571702375248e-05, "loss": 0.8273, "step": 13369 }, { "epoch": 0.9058879327867742, "grad_norm": 6.264207363128662, "learning_rate": 8.272434800465468e-05, "loss": 0.6854, "step": 13370 }, { "epoch": 0.9059556880547462, "grad_norm": 4.945788383483887, "learning_rate": 8.272297898555686e-05, "loss": 0.5941, "step": 13371 }, { "epoch": 0.9060234433227183, "grad_norm": 6.928656578063965, "learning_rate": 8.272160996645904e-05, "loss": 0.754, "step": 13372 }, { "epoch": 0.9060911985906904, "grad_norm": 4.5859551429748535, "learning_rate": 8.272024094736122e-05, "loss": 0.6271, "step": 13373 }, { "epoch": 0.9061589538586625, "grad_norm": 6.357034206390381, "learning_rate": 8.271887192826341e-05, "loss": 0.6632, "step": 13374 }, { "epoch": 0.9062267091266346, "grad_norm": 6.899624824523926, "learning_rate": 8.271750290916559e-05, "loss": 0.8332, "step": 13375 }, { "epoch": 0.9062944643946067, "grad_norm": 8.641779899597168, "learning_rate": 8.271613389006777e-05, "loss": 0.8412, "step": 13376 }, { "epoch": 0.9063622196625788, "grad_norm": 6.443648815155029, "learning_rate": 8.271476487096995e-05, "loss": 0.8374, "step": 13377 }, { "epoch": 0.9064299749305509, "grad_norm": 8.222790718078613, "learning_rate": 8.271339585187213e-05, "loss": 0.9038, "step": 13378 }, { "epoch": 0.906497730198523, "grad_norm": 5.121990203857422, "learning_rate": 8.271202683277433e-05, "loss": 0.6617, "step": 13379 }, { "epoch": 0.9065654854664951, "grad_norm": 6.855564117431641, "learning_rate": 8.271065781367651e-05, "loss": 0.7803, "step": 13380 }, { "epoch": 0.906633240734467, "grad_norm": 6.987429141998291, "learning_rate": 8.270928879457869e-05, "loss": 0.6917, "step": 13381 }, { "epoch": 0.9067009960024391, "grad_norm": 6.5699896812438965, "learning_rate": 8.270791977548087e-05, "loss": 0.9687, "step": 13382 }, { "epoch": 0.9067687512704112, "grad_norm": 6.9197797775268555, "learning_rate": 8.270655075638306e-05, "loss": 0.7054, "step": 13383 }, { "epoch": 0.9068365065383833, "grad_norm": 6.5933380126953125, "learning_rate": 8.270518173728524e-05, "loss": 0.7276, "step": 13384 }, { "epoch": 0.9069042618063554, "grad_norm": 5.359493732452393, "learning_rate": 8.270381271818742e-05, "loss": 0.6566, "step": 13385 }, { "epoch": 0.9069720170743275, "grad_norm": 9.177874565124512, "learning_rate": 8.27024436990896e-05, "loss": 0.7313, "step": 13386 }, { "epoch": 0.9070397723422996, "grad_norm": 7.813451766967773, "learning_rate": 8.270107467999178e-05, "loss": 0.6503, "step": 13387 }, { "epoch": 0.9071075276102717, "grad_norm": 5.215133190155029, "learning_rate": 8.269970566089398e-05, "loss": 0.8036, "step": 13388 }, { "epoch": 0.9071752828782438, "grad_norm": 5.473424434661865, "learning_rate": 8.269833664179616e-05, "loss": 0.7178, "step": 13389 }, { "epoch": 0.9072430381462159, "grad_norm": 7.770848751068115, "learning_rate": 8.269696762269834e-05, "loss": 0.8419, "step": 13390 }, { "epoch": 0.907310793414188, "grad_norm": 6.786417007446289, "learning_rate": 8.269559860360052e-05, "loss": 0.9242, "step": 13391 }, { "epoch": 0.9073785486821601, "grad_norm": 4.775079250335693, "learning_rate": 8.26942295845027e-05, "loss": 0.7514, "step": 13392 }, { "epoch": 0.9074463039501321, "grad_norm": 5.795116901397705, "learning_rate": 8.26928605654049e-05, "loss": 0.8425, "step": 13393 }, { "epoch": 0.9075140592181042, "grad_norm": 7.329837322235107, "learning_rate": 8.269149154630707e-05, "loss": 0.8125, "step": 13394 }, { "epoch": 0.9075818144860763, "grad_norm": 4.981736660003662, "learning_rate": 8.269012252720925e-05, "loss": 0.6766, "step": 13395 }, { "epoch": 0.9076495697540484, "grad_norm": 7.599316120147705, "learning_rate": 8.268875350811143e-05, "loss": 0.6769, "step": 13396 }, { "epoch": 0.9077173250220205, "grad_norm": 5.966447830200195, "learning_rate": 8.268738448901363e-05, "loss": 0.7356, "step": 13397 }, { "epoch": 0.9077850802899925, "grad_norm": 6.569231986999512, "learning_rate": 8.268601546991581e-05, "loss": 0.84, "step": 13398 }, { "epoch": 0.9078528355579646, "grad_norm": 5.759048938751221, "learning_rate": 8.268464645081799e-05, "loss": 0.9159, "step": 13399 }, { "epoch": 0.9079205908259367, "grad_norm": 5.395700931549072, "learning_rate": 8.268327743172017e-05, "loss": 0.662, "step": 13400 }, { "epoch": 0.9079883460939088, "grad_norm": 6.1797099113464355, "learning_rate": 8.268190841262235e-05, "loss": 0.6498, "step": 13401 }, { "epoch": 0.9080561013618809, "grad_norm": 7.686827659606934, "learning_rate": 8.268053939352454e-05, "loss": 0.9687, "step": 13402 }, { "epoch": 0.908123856629853, "grad_norm": 6.104824066162109, "learning_rate": 8.267917037442672e-05, "loss": 0.7184, "step": 13403 }, { "epoch": 0.908191611897825, "grad_norm": 6.225331783294678, "learning_rate": 8.26778013553289e-05, "loss": 0.8403, "step": 13404 }, { "epoch": 0.9082593671657971, "grad_norm": 5.349873065948486, "learning_rate": 8.267643233623108e-05, "loss": 0.8676, "step": 13405 }, { "epoch": 0.9083271224337692, "grad_norm": 5.427150249481201, "learning_rate": 8.267506331713328e-05, "loss": 0.7199, "step": 13406 }, { "epoch": 0.9083948777017413, "grad_norm": 6.000942707061768, "learning_rate": 8.267369429803546e-05, "loss": 0.6516, "step": 13407 }, { "epoch": 0.9084626329697134, "grad_norm": 7.7418532371521, "learning_rate": 8.267232527893764e-05, "loss": 0.8854, "step": 13408 }, { "epoch": 0.9085303882376855, "grad_norm": 6.350762844085693, "learning_rate": 8.267095625983983e-05, "loss": 1.0689, "step": 13409 }, { "epoch": 0.9085981435056576, "grad_norm": 6.675297260284424, "learning_rate": 8.266958724074201e-05, "loss": 1.1803, "step": 13410 }, { "epoch": 0.9086658987736297, "grad_norm": 5.116997718811035, "learning_rate": 8.26682182216442e-05, "loss": 0.6724, "step": 13411 }, { "epoch": 0.9087336540416018, "grad_norm": 6.529482364654541, "learning_rate": 8.266684920254639e-05, "loss": 0.7508, "step": 13412 }, { "epoch": 0.9088014093095739, "grad_norm": 6.559128284454346, "learning_rate": 8.266548018344857e-05, "loss": 0.7786, "step": 13413 }, { "epoch": 0.9088691645775459, "grad_norm": 6.002798080444336, "learning_rate": 8.266411116435075e-05, "loss": 0.613, "step": 13414 }, { "epoch": 0.9089369198455179, "grad_norm": 5.554568767547607, "learning_rate": 8.266274214525293e-05, "loss": 0.7622, "step": 13415 }, { "epoch": 0.90900467511349, "grad_norm": 4.8590521812438965, "learning_rate": 8.266137312615512e-05, "loss": 0.6765, "step": 13416 }, { "epoch": 0.9090724303814621, "grad_norm": 5.818526744842529, "learning_rate": 8.26600041070573e-05, "loss": 0.8034, "step": 13417 }, { "epoch": 0.9091401856494342, "grad_norm": 5.614035606384277, "learning_rate": 8.265863508795948e-05, "loss": 0.6917, "step": 13418 }, { "epoch": 0.9092079409174063, "grad_norm": 4.978684425354004, "learning_rate": 8.265726606886166e-05, "loss": 0.6436, "step": 13419 }, { "epoch": 0.9092756961853784, "grad_norm": 4.747771263122559, "learning_rate": 8.265589704976386e-05, "loss": 0.7033, "step": 13420 }, { "epoch": 0.9093434514533505, "grad_norm": 5.64393424987793, "learning_rate": 8.265452803066604e-05, "loss": 0.6504, "step": 13421 }, { "epoch": 0.9094112067213226, "grad_norm": 4.84307336807251, "learning_rate": 8.265315901156822e-05, "loss": 0.6661, "step": 13422 }, { "epoch": 0.9094789619892947, "grad_norm": 7.957591533660889, "learning_rate": 8.26517899924704e-05, "loss": 0.797, "step": 13423 }, { "epoch": 0.9095467172572668, "grad_norm": 7.481930255889893, "learning_rate": 8.265042097337258e-05, "loss": 0.9757, "step": 13424 }, { "epoch": 0.9096144725252389, "grad_norm": 9.150352478027344, "learning_rate": 8.264905195427477e-05, "loss": 0.6074, "step": 13425 }, { "epoch": 0.909682227793211, "grad_norm": 7.335263252258301, "learning_rate": 8.264768293517695e-05, "loss": 0.7082, "step": 13426 }, { "epoch": 0.909749983061183, "grad_norm": 6.145523548126221, "learning_rate": 8.264631391607913e-05, "loss": 0.7136, "step": 13427 }, { "epoch": 0.9098177383291551, "grad_norm": 8.037848472595215, "learning_rate": 8.264494489698131e-05, "loss": 0.7071, "step": 13428 }, { "epoch": 0.9098854935971272, "grad_norm": 7.532377243041992, "learning_rate": 8.264357587788351e-05, "loss": 0.7352, "step": 13429 }, { "epoch": 0.9099532488650992, "grad_norm": 5.716782093048096, "learning_rate": 8.264220685878569e-05, "loss": 0.6108, "step": 13430 }, { "epoch": 0.9100210041330713, "grad_norm": 5.553226947784424, "learning_rate": 8.264083783968787e-05, "loss": 0.8414, "step": 13431 }, { "epoch": 0.9100887594010434, "grad_norm": 5.714873313903809, "learning_rate": 8.263946882059005e-05, "loss": 0.7346, "step": 13432 }, { "epoch": 0.9101565146690155, "grad_norm": 8.493986129760742, "learning_rate": 8.263809980149223e-05, "loss": 0.7027, "step": 13433 }, { "epoch": 0.9102242699369876, "grad_norm": 4.617614269256592, "learning_rate": 8.263673078239442e-05, "loss": 0.7141, "step": 13434 }, { "epoch": 0.9102920252049597, "grad_norm": 5.8947649002075195, "learning_rate": 8.26353617632966e-05, "loss": 0.8719, "step": 13435 }, { "epoch": 0.9103597804729318, "grad_norm": 7.168681621551514, "learning_rate": 8.263399274419878e-05, "loss": 1.0277, "step": 13436 }, { "epoch": 0.9104275357409038, "grad_norm": 7.014023303985596, "learning_rate": 8.263262372510096e-05, "loss": 0.9736, "step": 13437 }, { "epoch": 0.9104952910088759, "grad_norm": 6.311854362487793, "learning_rate": 8.263125470600314e-05, "loss": 0.7287, "step": 13438 }, { "epoch": 0.910563046276848, "grad_norm": 6.3631486892700195, "learning_rate": 8.262988568690534e-05, "loss": 0.8432, "step": 13439 }, { "epoch": 0.9106308015448201, "grad_norm": 7.433747291564941, "learning_rate": 8.262851666780752e-05, "loss": 0.8998, "step": 13440 }, { "epoch": 0.9106985568127922, "grad_norm": 6.583968639373779, "learning_rate": 8.26271476487097e-05, "loss": 0.7926, "step": 13441 }, { "epoch": 0.9107663120807643, "grad_norm": 5.414675235748291, "learning_rate": 8.262577862961188e-05, "loss": 0.6653, "step": 13442 }, { "epoch": 0.9108340673487364, "grad_norm": 6.068636417388916, "learning_rate": 8.262440961051407e-05, "loss": 0.7299, "step": 13443 }, { "epoch": 0.9109018226167085, "grad_norm": 6.4102277755737305, "learning_rate": 8.262304059141625e-05, "loss": 0.6988, "step": 13444 }, { "epoch": 0.9109695778846806, "grad_norm": 7.261764049530029, "learning_rate": 8.262167157231843e-05, "loss": 0.7473, "step": 13445 }, { "epoch": 0.9110373331526527, "grad_norm": 6.129340171813965, "learning_rate": 8.262030255322061e-05, "loss": 0.7437, "step": 13446 }, { "epoch": 0.9111050884206247, "grad_norm": 7.05224609375, "learning_rate": 8.26189335341228e-05, "loss": 0.8106, "step": 13447 }, { "epoch": 0.9111728436885967, "grad_norm": 6.788548469543457, "learning_rate": 8.261756451502499e-05, "loss": 0.8289, "step": 13448 }, { "epoch": 0.9112405989565688, "grad_norm": 6.397700786590576, "learning_rate": 8.261619549592717e-05, "loss": 0.7633, "step": 13449 }, { "epoch": 0.9113083542245409, "grad_norm": 6.438706874847412, "learning_rate": 8.261482647682935e-05, "loss": 1.0421, "step": 13450 }, { "epoch": 0.911376109492513, "grad_norm": 5.280536651611328, "learning_rate": 8.261345745773153e-05, "loss": 0.7404, "step": 13451 }, { "epoch": 0.9114438647604851, "grad_norm": 5.514378070831299, "learning_rate": 8.261208843863372e-05, "loss": 0.6536, "step": 13452 }, { "epoch": 0.9115116200284572, "grad_norm": 6.182071685791016, "learning_rate": 8.26107194195359e-05, "loss": 1.0355, "step": 13453 }, { "epoch": 0.9115793752964293, "grad_norm": 5.280673503875732, "learning_rate": 8.260935040043808e-05, "loss": 0.6193, "step": 13454 }, { "epoch": 0.9116471305644014, "grad_norm": 6.420233726501465, "learning_rate": 8.260798138134028e-05, "loss": 0.7545, "step": 13455 }, { "epoch": 0.9117148858323735, "grad_norm": 4.983266353607178, "learning_rate": 8.260661236224246e-05, "loss": 0.7821, "step": 13456 }, { "epoch": 0.9117826411003456, "grad_norm": 7.941768646240234, "learning_rate": 8.260524334314464e-05, "loss": 0.851, "step": 13457 }, { "epoch": 0.9118503963683177, "grad_norm": 6.576548099517822, "learning_rate": 8.260387432404683e-05, "loss": 1.0445, "step": 13458 }, { "epoch": 0.9119181516362898, "grad_norm": 4.9454121589660645, "learning_rate": 8.260250530494901e-05, "loss": 0.7082, "step": 13459 }, { "epoch": 0.9119859069042618, "grad_norm": 5.048241138458252, "learning_rate": 8.260113628585119e-05, "loss": 0.6747, "step": 13460 }, { "epoch": 0.9120536621722339, "grad_norm": 6.229146957397461, "learning_rate": 8.259976726675339e-05, "loss": 0.9753, "step": 13461 }, { "epoch": 0.912121417440206, "grad_norm": 7.635250568389893, "learning_rate": 8.259839824765557e-05, "loss": 0.7616, "step": 13462 }, { "epoch": 0.912189172708178, "grad_norm": 5.580092430114746, "learning_rate": 8.259702922855775e-05, "loss": 0.5658, "step": 13463 }, { "epoch": 0.9122569279761501, "grad_norm": 5.2181715965271, "learning_rate": 8.259566020945993e-05, "loss": 0.7053, "step": 13464 }, { "epoch": 0.9123246832441222, "grad_norm": 6.143484592437744, "learning_rate": 8.259429119036211e-05, "loss": 0.6313, "step": 13465 }, { "epoch": 0.9123924385120943, "grad_norm": 5.8051323890686035, "learning_rate": 8.25929221712643e-05, "loss": 0.7655, "step": 13466 }, { "epoch": 0.9124601937800664, "grad_norm": 5.798500061035156, "learning_rate": 8.259155315216648e-05, "loss": 0.7226, "step": 13467 }, { "epoch": 0.9125279490480385, "grad_norm": 5.753519535064697, "learning_rate": 8.259018413306866e-05, "loss": 0.7487, "step": 13468 }, { "epoch": 0.9125957043160106, "grad_norm": 5.924856662750244, "learning_rate": 8.258881511397084e-05, "loss": 0.821, "step": 13469 }, { "epoch": 0.9126634595839827, "grad_norm": 5.954171180725098, "learning_rate": 8.258744609487302e-05, "loss": 0.8115, "step": 13470 }, { "epoch": 0.9127312148519547, "grad_norm": 6.430693626403809, "learning_rate": 8.258607707577522e-05, "loss": 0.7691, "step": 13471 }, { "epoch": 0.9127989701199268, "grad_norm": 4.200928688049316, "learning_rate": 8.25847080566774e-05, "loss": 0.6181, "step": 13472 }, { "epoch": 0.9128667253878989, "grad_norm": 6.997094631195068, "learning_rate": 8.258333903757958e-05, "loss": 1.0318, "step": 13473 }, { "epoch": 0.912934480655871, "grad_norm": 6.03936243057251, "learning_rate": 8.258197001848176e-05, "loss": 0.8103, "step": 13474 }, { "epoch": 0.9130022359238431, "grad_norm": 8.341856002807617, "learning_rate": 8.258060099938395e-05, "loss": 0.8473, "step": 13475 }, { "epoch": 0.9130699911918152, "grad_norm": 7.553666591644287, "learning_rate": 8.257923198028613e-05, "loss": 0.8751, "step": 13476 }, { "epoch": 0.9131377464597873, "grad_norm": 4.391775608062744, "learning_rate": 8.257786296118831e-05, "loss": 0.6136, "step": 13477 }, { "epoch": 0.9132055017277594, "grad_norm": 5.554635047912598, "learning_rate": 8.25764939420905e-05, "loss": 0.584, "step": 13478 }, { "epoch": 0.9132732569957314, "grad_norm": 4.761404037475586, "learning_rate": 8.257512492299267e-05, "loss": 0.7827, "step": 13479 }, { "epoch": 0.9133410122637035, "grad_norm": 5.858761310577393, "learning_rate": 8.257375590389487e-05, "loss": 0.7184, "step": 13480 }, { "epoch": 0.9134087675316755, "grad_norm": 5.618086338043213, "learning_rate": 8.257238688479705e-05, "loss": 0.6523, "step": 13481 }, { "epoch": 0.9134765227996476, "grad_norm": 5.3456854820251465, "learning_rate": 8.257101786569923e-05, "loss": 0.7383, "step": 13482 }, { "epoch": 0.9135442780676197, "grad_norm": 7.581971645355225, "learning_rate": 8.256964884660141e-05, "loss": 0.8992, "step": 13483 }, { "epoch": 0.9136120333355918, "grad_norm": 7.308854579925537, "learning_rate": 8.25682798275036e-05, "loss": 0.8322, "step": 13484 }, { "epoch": 0.9136797886035639, "grad_norm": 6.603922367095947, "learning_rate": 8.256691080840578e-05, "loss": 0.7841, "step": 13485 }, { "epoch": 0.913747543871536, "grad_norm": 5.805631637573242, "learning_rate": 8.256554178930796e-05, "loss": 0.6594, "step": 13486 }, { "epoch": 0.9138152991395081, "grad_norm": 5.206735134124756, "learning_rate": 8.256417277021014e-05, "loss": 0.6513, "step": 13487 }, { "epoch": 0.9138830544074802, "grad_norm": 7.223725318908691, "learning_rate": 8.256280375111232e-05, "loss": 0.768, "step": 13488 }, { "epoch": 0.9139508096754523, "grad_norm": 5.860501766204834, "learning_rate": 8.256143473201452e-05, "loss": 0.8824, "step": 13489 }, { "epoch": 0.9140185649434244, "grad_norm": 7.430714130401611, "learning_rate": 8.25600657129167e-05, "loss": 0.8971, "step": 13490 }, { "epoch": 0.9140863202113965, "grad_norm": 6.006752014160156, "learning_rate": 8.255869669381888e-05, "loss": 0.7208, "step": 13491 }, { "epoch": 0.9141540754793686, "grad_norm": 6.061511993408203, "learning_rate": 8.255732767472106e-05, "loss": 0.7222, "step": 13492 }, { "epoch": 0.9142218307473406, "grad_norm": 5.890130043029785, "learning_rate": 8.255595865562324e-05, "loss": 0.7764, "step": 13493 }, { "epoch": 0.9142895860153127, "grad_norm": 6.152371883392334, "learning_rate": 8.255458963652543e-05, "loss": 0.896, "step": 13494 }, { "epoch": 0.9143573412832848, "grad_norm": 5.317947864532471, "learning_rate": 8.255322061742761e-05, "loss": 0.7058, "step": 13495 }, { "epoch": 0.9144250965512568, "grad_norm": 5.516234397888184, "learning_rate": 8.25518515983298e-05, "loss": 0.7669, "step": 13496 }, { "epoch": 0.9144928518192289, "grad_norm": 5.327831268310547, "learning_rate": 8.255048257923197e-05, "loss": 0.6878, "step": 13497 }, { "epoch": 0.914560607087201, "grad_norm": 6.158792495727539, "learning_rate": 8.254911356013417e-05, "loss": 0.7393, "step": 13498 }, { "epoch": 0.9146283623551731, "grad_norm": 5.450939655303955, "learning_rate": 8.254774454103635e-05, "loss": 0.7131, "step": 13499 }, { "epoch": 0.9146961176231452, "grad_norm": 5.03593635559082, "learning_rate": 8.254637552193853e-05, "loss": 0.5819, "step": 13500 }, { "epoch": 0.9147638728911173, "grad_norm": 7.179828643798828, "learning_rate": 8.254500650284072e-05, "loss": 0.8142, "step": 13501 }, { "epoch": 0.9148316281590894, "grad_norm": 7.470744609832764, "learning_rate": 8.25436374837429e-05, "loss": 0.7455, "step": 13502 }, { "epoch": 0.9148993834270615, "grad_norm": 6.138060569763184, "learning_rate": 8.254226846464508e-05, "loss": 0.7474, "step": 13503 }, { "epoch": 0.9149671386950335, "grad_norm": 6.90313720703125, "learning_rate": 8.254089944554728e-05, "loss": 0.6724, "step": 13504 }, { "epoch": 0.9150348939630056, "grad_norm": 6.103466033935547, "learning_rate": 8.253953042644946e-05, "loss": 0.7163, "step": 13505 }, { "epoch": 0.9151026492309777, "grad_norm": 7.782615661621094, "learning_rate": 8.253816140735164e-05, "loss": 0.9758, "step": 13506 }, { "epoch": 0.9151704044989498, "grad_norm": 6.611595630645752, "learning_rate": 8.253679238825383e-05, "loss": 0.7287, "step": 13507 }, { "epoch": 0.9152381597669219, "grad_norm": 7.578495502471924, "learning_rate": 8.253542336915601e-05, "loss": 0.8566, "step": 13508 }, { "epoch": 0.915305915034894, "grad_norm": 5.957526206970215, "learning_rate": 8.253405435005819e-05, "loss": 0.712, "step": 13509 }, { "epoch": 0.9153736703028661, "grad_norm": 7.773584365844727, "learning_rate": 8.253268533096037e-05, "loss": 0.8836, "step": 13510 }, { "epoch": 0.9154414255708382, "grad_norm": 6.391456604003906, "learning_rate": 8.253131631186255e-05, "loss": 0.6992, "step": 13511 }, { "epoch": 0.9155091808388102, "grad_norm": 6.0647687911987305, "learning_rate": 8.252994729276475e-05, "loss": 0.6445, "step": 13512 }, { "epoch": 0.9155769361067823, "grad_norm": 5.610140800476074, "learning_rate": 8.252857827366693e-05, "loss": 0.7257, "step": 13513 }, { "epoch": 0.9156446913747543, "grad_norm": 4.631004810333252, "learning_rate": 8.252720925456911e-05, "loss": 0.7086, "step": 13514 }, { "epoch": 0.9157124466427264, "grad_norm": 5.651133060455322, "learning_rate": 8.252584023547129e-05, "loss": 1.0037, "step": 13515 }, { "epoch": 0.9157802019106985, "grad_norm": 6.308481693267822, "learning_rate": 8.252447121637348e-05, "loss": 0.8149, "step": 13516 }, { "epoch": 0.9158479571786706, "grad_norm": 7.645565509796143, "learning_rate": 8.252310219727566e-05, "loss": 0.698, "step": 13517 }, { "epoch": 0.9159157124466427, "grad_norm": 6.530562400817871, "learning_rate": 8.252173317817784e-05, "loss": 0.6629, "step": 13518 }, { "epoch": 0.9159834677146148, "grad_norm": 6.290469169616699, "learning_rate": 8.252036415908002e-05, "loss": 0.9803, "step": 13519 }, { "epoch": 0.9160512229825869, "grad_norm": 6.369983673095703, "learning_rate": 8.25189951399822e-05, "loss": 0.8061, "step": 13520 }, { "epoch": 0.916118978250559, "grad_norm": 5.648921489715576, "learning_rate": 8.25176261208844e-05, "loss": 0.8378, "step": 13521 }, { "epoch": 0.9161867335185311, "grad_norm": 5.623139381408691, "learning_rate": 8.251625710178658e-05, "loss": 0.9232, "step": 13522 }, { "epoch": 0.9162544887865032, "grad_norm": 7.031765460968018, "learning_rate": 8.251488808268876e-05, "loss": 0.9497, "step": 13523 }, { "epoch": 0.9163222440544753, "grad_norm": 5.612135410308838, "learning_rate": 8.251351906359094e-05, "loss": 0.931, "step": 13524 }, { "epoch": 0.9163899993224474, "grad_norm": 4.216965675354004, "learning_rate": 8.251215004449312e-05, "loss": 0.716, "step": 13525 }, { "epoch": 0.9164577545904194, "grad_norm": 5.5640645027160645, "learning_rate": 8.251078102539531e-05, "loss": 0.6835, "step": 13526 }, { "epoch": 0.9165255098583915, "grad_norm": 4.81538200378418, "learning_rate": 8.250941200629749e-05, "loss": 0.7627, "step": 13527 }, { "epoch": 0.9165932651263635, "grad_norm": 5.384982109069824, "learning_rate": 8.250804298719967e-05, "loss": 0.5343, "step": 13528 }, { "epoch": 0.9166610203943356, "grad_norm": 6.007334232330322, "learning_rate": 8.250667396810185e-05, "loss": 0.7204, "step": 13529 }, { "epoch": 0.9167287756623077, "grad_norm": 6.315242290496826, "learning_rate": 8.250530494900405e-05, "loss": 0.7633, "step": 13530 }, { "epoch": 0.9167965309302798, "grad_norm": 5.613879203796387, "learning_rate": 8.250393592990623e-05, "loss": 0.6349, "step": 13531 }, { "epoch": 0.9168642861982519, "grad_norm": 6.26859712600708, "learning_rate": 8.250256691080841e-05, "loss": 0.7504, "step": 13532 }, { "epoch": 0.916932041466224, "grad_norm": 7.103095054626465, "learning_rate": 8.250119789171059e-05, "loss": 1.1573, "step": 13533 }, { "epoch": 0.9169997967341961, "grad_norm": 5.007758617401123, "learning_rate": 8.249982887261277e-05, "loss": 0.7824, "step": 13534 }, { "epoch": 0.9170675520021682, "grad_norm": 7.608954429626465, "learning_rate": 8.249845985351496e-05, "loss": 0.9533, "step": 13535 }, { "epoch": 0.9171353072701403, "grad_norm": 6.751156806945801, "learning_rate": 8.249709083441714e-05, "loss": 0.6626, "step": 13536 }, { "epoch": 0.9172030625381123, "grad_norm": 6.883885860443115, "learning_rate": 8.249572181531932e-05, "loss": 0.7154, "step": 13537 }, { "epoch": 0.9172708178060844, "grad_norm": 5.1738481521606445, "learning_rate": 8.24943527962215e-05, "loss": 0.6065, "step": 13538 }, { "epoch": 0.9173385730740565, "grad_norm": 5.373385906219482, "learning_rate": 8.24929837771237e-05, "loss": 0.7061, "step": 13539 }, { "epoch": 0.9174063283420286, "grad_norm": 6.883655548095703, "learning_rate": 8.249161475802588e-05, "loss": 1.0318, "step": 13540 }, { "epoch": 0.9174740836100007, "grad_norm": 6.240233898162842, "learning_rate": 8.249024573892806e-05, "loss": 0.7813, "step": 13541 }, { "epoch": 0.9175418388779728, "grad_norm": 3.9278335571289062, "learning_rate": 8.248887671983024e-05, "loss": 0.5349, "step": 13542 }, { "epoch": 0.9176095941459449, "grad_norm": 4.724002838134766, "learning_rate": 8.248750770073242e-05, "loss": 0.6608, "step": 13543 }, { "epoch": 0.917677349413917, "grad_norm": 6.562617778778076, "learning_rate": 8.248613868163461e-05, "loss": 0.7865, "step": 13544 }, { "epoch": 0.917745104681889, "grad_norm": 5.862000942230225, "learning_rate": 8.248476966253679e-05, "loss": 0.7887, "step": 13545 }, { "epoch": 0.9178128599498611, "grad_norm": 5.171735763549805, "learning_rate": 8.248340064343897e-05, "loss": 0.5521, "step": 13546 }, { "epoch": 0.9178806152178332, "grad_norm": 6.028750419616699, "learning_rate": 8.248203162434117e-05, "loss": 0.7401, "step": 13547 }, { "epoch": 0.9179483704858052, "grad_norm": 6.340005397796631, "learning_rate": 8.248066260524335e-05, "loss": 0.8042, "step": 13548 }, { "epoch": 0.9180161257537773, "grad_norm": 6.127305030822754, "learning_rate": 8.247929358614553e-05, "loss": 0.5721, "step": 13549 }, { "epoch": 0.9180838810217494, "grad_norm": 8.631043434143066, "learning_rate": 8.247792456704772e-05, "loss": 0.9929, "step": 13550 }, { "epoch": 0.9181516362897215, "grad_norm": 7.284387111663818, "learning_rate": 8.24765555479499e-05, "loss": 0.9029, "step": 13551 }, { "epoch": 0.9182193915576936, "grad_norm": 4.782154560089111, "learning_rate": 8.247518652885208e-05, "loss": 0.6233, "step": 13552 }, { "epoch": 0.9182871468256657, "grad_norm": 7.067320346832275, "learning_rate": 8.247381750975428e-05, "loss": 0.8092, "step": 13553 }, { "epoch": 0.9183549020936378, "grad_norm": 6.034327983856201, "learning_rate": 8.247244849065646e-05, "loss": 0.8534, "step": 13554 }, { "epoch": 0.9184226573616099, "grad_norm": 7.13966178894043, "learning_rate": 8.247107947155864e-05, "loss": 0.8442, "step": 13555 }, { "epoch": 0.918490412629582, "grad_norm": 6.099913120269775, "learning_rate": 8.246971045246082e-05, "loss": 0.862, "step": 13556 }, { "epoch": 0.9185581678975541, "grad_norm": 5.149213790893555, "learning_rate": 8.2468341433363e-05, "loss": 0.6381, "step": 13557 }, { "epoch": 0.9186259231655262, "grad_norm": 5.378545761108398, "learning_rate": 8.246697241426519e-05, "loss": 0.8651, "step": 13558 }, { "epoch": 0.9186936784334983, "grad_norm": 5.342377185821533, "learning_rate": 8.246560339516737e-05, "loss": 0.7287, "step": 13559 }, { "epoch": 0.9187614337014703, "grad_norm": 6.145232200622559, "learning_rate": 8.246423437606955e-05, "loss": 0.9639, "step": 13560 }, { "epoch": 0.9188291889694423, "grad_norm": 5.195524215698242, "learning_rate": 8.246286535697173e-05, "loss": 0.5293, "step": 13561 }, { "epoch": 0.9188969442374144, "grad_norm": 5.601246356964111, "learning_rate": 8.246149633787393e-05, "loss": 0.808, "step": 13562 }, { "epoch": 0.9189646995053865, "grad_norm": 7.764710426330566, "learning_rate": 8.246012731877611e-05, "loss": 0.6616, "step": 13563 }, { "epoch": 0.9190324547733586, "grad_norm": 5.822594165802002, "learning_rate": 8.245875829967829e-05, "loss": 0.6981, "step": 13564 }, { "epoch": 0.9191002100413307, "grad_norm": 6.025305271148682, "learning_rate": 8.245738928058047e-05, "loss": 0.8169, "step": 13565 }, { "epoch": 0.9191679653093028, "grad_norm": 5.495863437652588, "learning_rate": 8.245602026148265e-05, "loss": 0.9271, "step": 13566 }, { "epoch": 0.9192357205772749, "grad_norm": 6.753162384033203, "learning_rate": 8.245465124238484e-05, "loss": 0.5892, "step": 13567 }, { "epoch": 0.919303475845247, "grad_norm": 6.381758213043213, "learning_rate": 8.245328222328702e-05, "loss": 0.9818, "step": 13568 }, { "epoch": 0.9193712311132191, "grad_norm": 6.070631980895996, "learning_rate": 8.24519132041892e-05, "loss": 0.7322, "step": 13569 }, { "epoch": 0.9194389863811911, "grad_norm": 4.506857872009277, "learning_rate": 8.245054418509138e-05, "loss": 0.6529, "step": 13570 }, { "epoch": 0.9195067416491632, "grad_norm": 6.378490924835205, "learning_rate": 8.244917516599356e-05, "loss": 0.8146, "step": 13571 }, { "epoch": 0.9195744969171353, "grad_norm": 4.585328102111816, "learning_rate": 8.244780614689576e-05, "loss": 0.6215, "step": 13572 }, { "epoch": 0.9196422521851074, "grad_norm": 6.161722660064697, "learning_rate": 8.244643712779794e-05, "loss": 0.7576, "step": 13573 }, { "epoch": 0.9197100074530795, "grad_norm": 5.033822536468506, "learning_rate": 8.244506810870012e-05, "loss": 0.625, "step": 13574 }, { "epoch": 0.9197777627210516, "grad_norm": 4.940533638000488, "learning_rate": 8.24436990896023e-05, "loss": 0.5779, "step": 13575 }, { "epoch": 0.9198455179890237, "grad_norm": 5.566405296325684, "learning_rate": 8.244233007050449e-05, "loss": 0.7108, "step": 13576 }, { "epoch": 0.9199132732569957, "grad_norm": 5.4210896492004395, "learning_rate": 8.244096105140667e-05, "loss": 0.6497, "step": 13577 }, { "epoch": 0.9199810285249678, "grad_norm": 8.807047843933105, "learning_rate": 8.243959203230885e-05, "loss": 0.8228, "step": 13578 }, { "epoch": 0.9200487837929399, "grad_norm": 6.093658447265625, "learning_rate": 8.243822301321103e-05, "loss": 0.99, "step": 13579 }, { "epoch": 0.920116539060912, "grad_norm": 7.17921781539917, "learning_rate": 8.243685399411321e-05, "loss": 0.9162, "step": 13580 }, { "epoch": 0.920184294328884, "grad_norm": 5.841926574707031, "learning_rate": 8.243548497501541e-05, "loss": 0.8073, "step": 13581 }, { "epoch": 0.9202520495968561, "grad_norm": 6.103463649749756, "learning_rate": 8.243411595591759e-05, "loss": 0.7466, "step": 13582 }, { "epoch": 0.9203198048648282, "grad_norm": 7.478387355804443, "learning_rate": 8.243274693681977e-05, "loss": 0.5682, "step": 13583 }, { "epoch": 0.9203875601328003, "grad_norm": 7.954921245574951, "learning_rate": 8.243137791772195e-05, "loss": 0.7956, "step": 13584 }, { "epoch": 0.9204553154007724, "grad_norm": 5.1390509605407715, "learning_rate": 8.243000889862414e-05, "loss": 0.7722, "step": 13585 }, { "epoch": 0.9205230706687445, "grad_norm": 5.33015251159668, "learning_rate": 8.242863987952632e-05, "loss": 0.8534, "step": 13586 }, { "epoch": 0.9205908259367166, "grad_norm": 6.494523525238037, "learning_rate": 8.24272708604285e-05, "loss": 0.7041, "step": 13587 }, { "epoch": 0.9206585812046887, "grad_norm": 5.0413055419921875, "learning_rate": 8.242590184133068e-05, "loss": 0.8746, "step": 13588 }, { "epoch": 0.9207263364726608, "grad_norm": 5.492376327514648, "learning_rate": 8.242453282223286e-05, "loss": 0.7865, "step": 13589 }, { "epoch": 0.9207940917406329, "grad_norm": 4.8724870681762695, "learning_rate": 8.242316380313506e-05, "loss": 0.7237, "step": 13590 }, { "epoch": 0.920861847008605, "grad_norm": 7.61802339553833, "learning_rate": 8.242179478403724e-05, "loss": 1.0021, "step": 13591 }, { "epoch": 0.920929602276577, "grad_norm": 7.968650817871094, "learning_rate": 8.242042576493942e-05, "loss": 0.8771, "step": 13592 }, { "epoch": 0.920997357544549, "grad_norm": 4.494787216186523, "learning_rate": 8.24190567458416e-05, "loss": 0.6646, "step": 13593 }, { "epoch": 0.9210651128125211, "grad_norm": 7.044222831726074, "learning_rate": 8.241768772674379e-05, "loss": 0.9055, "step": 13594 }, { "epoch": 0.9211328680804932, "grad_norm": 6.417403697967529, "learning_rate": 8.241631870764597e-05, "loss": 0.6975, "step": 13595 }, { "epoch": 0.9212006233484653, "grad_norm": 7.259861469268799, "learning_rate": 8.241494968854815e-05, "loss": 0.736, "step": 13596 }, { "epoch": 0.9212683786164374, "grad_norm": 7.667874813079834, "learning_rate": 8.241358066945035e-05, "loss": 0.741, "step": 13597 }, { "epoch": 0.9213361338844095, "grad_norm": 5.993992805480957, "learning_rate": 8.241221165035253e-05, "loss": 0.74, "step": 13598 }, { "epoch": 0.9214038891523816, "grad_norm": 6.172394275665283, "learning_rate": 8.241084263125471e-05, "loss": 0.6285, "step": 13599 }, { "epoch": 0.9214716444203537, "grad_norm": 5.315210342407227, "learning_rate": 8.24094736121569e-05, "loss": 0.8465, "step": 13600 }, { "epoch": 0.9215393996883258, "grad_norm": 7.3281683921813965, "learning_rate": 8.240810459305908e-05, "loss": 0.8761, "step": 13601 }, { "epoch": 0.9216071549562979, "grad_norm": 4.546838760375977, "learning_rate": 8.240673557396126e-05, "loss": 0.5582, "step": 13602 }, { "epoch": 0.92167491022427, "grad_norm": 10.452659606933594, "learning_rate": 8.240536655486344e-05, "loss": 0.7345, "step": 13603 }, { "epoch": 0.921742665492242, "grad_norm": 5.254201412200928, "learning_rate": 8.240399753576564e-05, "loss": 0.6984, "step": 13604 }, { "epoch": 0.9218104207602141, "grad_norm": 7.967467784881592, "learning_rate": 8.240262851666782e-05, "loss": 0.775, "step": 13605 }, { "epoch": 0.9218781760281862, "grad_norm": 8.285964012145996, "learning_rate": 8.240125949757e-05, "loss": 0.7648, "step": 13606 }, { "epoch": 0.9219459312961583, "grad_norm": 5.65543794631958, "learning_rate": 8.239989047847218e-05, "loss": 0.6109, "step": 13607 }, { "epoch": 0.9220136865641304, "grad_norm": 5.535354137420654, "learning_rate": 8.239852145937437e-05, "loss": 0.5679, "step": 13608 }, { "epoch": 0.9220814418321025, "grad_norm": 4.902824878692627, "learning_rate": 8.239715244027655e-05, "loss": 0.574, "step": 13609 }, { "epoch": 0.9221491971000745, "grad_norm": 5.251827716827393, "learning_rate": 8.239578342117873e-05, "loss": 0.5183, "step": 13610 }, { "epoch": 0.9222169523680466, "grad_norm": 4.334627151489258, "learning_rate": 8.239441440208091e-05, "loss": 0.6654, "step": 13611 }, { "epoch": 0.9222847076360187, "grad_norm": 5.845222473144531, "learning_rate": 8.239304538298309e-05, "loss": 0.7711, "step": 13612 }, { "epoch": 0.9223524629039908, "grad_norm": 4.581849575042725, "learning_rate": 8.239167636388529e-05, "loss": 0.6276, "step": 13613 }, { "epoch": 0.9224202181719628, "grad_norm": 6.175132751464844, "learning_rate": 8.239030734478747e-05, "loss": 0.6509, "step": 13614 }, { "epoch": 0.9224879734399349, "grad_norm": 5.571422100067139, "learning_rate": 8.238893832568965e-05, "loss": 0.874, "step": 13615 }, { "epoch": 0.922555728707907, "grad_norm": 6.991871356964111, "learning_rate": 8.238756930659183e-05, "loss": 0.7676, "step": 13616 }, { "epoch": 0.9226234839758791, "grad_norm": 7.442737102508545, "learning_rate": 8.238620028749402e-05, "loss": 0.9373, "step": 13617 }, { "epoch": 0.9226912392438512, "grad_norm": 6.320655822753906, "learning_rate": 8.23848312683962e-05, "loss": 0.6271, "step": 13618 }, { "epoch": 0.9227589945118233, "grad_norm": 4.894016265869141, "learning_rate": 8.238346224929838e-05, "loss": 0.7149, "step": 13619 }, { "epoch": 0.9228267497797954, "grad_norm": 6.294023036956787, "learning_rate": 8.238209323020056e-05, "loss": 0.6531, "step": 13620 }, { "epoch": 0.9228945050477675, "grad_norm": 8.631871223449707, "learning_rate": 8.238072421110274e-05, "loss": 0.6823, "step": 13621 }, { "epoch": 0.9229622603157396, "grad_norm": 9.09997844696045, "learning_rate": 8.237935519200494e-05, "loss": 0.8513, "step": 13622 }, { "epoch": 0.9230300155837117, "grad_norm": 5.959611892700195, "learning_rate": 8.237798617290712e-05, "loss": 0.8519, "step": 13623 }, { "epoch": 0.9230977708516838, "grad_norm": 7.821328163146973, "learning_rate": 8.23766171538093e-05, "loss": 0.7974, "step": 13624 }, { "epoch": 0.9231655261196559, "grad_norm": 5.780052185058594, "learning_rate": 8.237524813471148e-05, "loss": 0.8274, "step": 13625 }, { "epoch": 0.9232332813876278, "grad_norm": 5.173895359039307, "learning_rate": 8.237387911561366e-05, "loss": 0.6296, "step": 13626 }, { "epoch": 0.9233010366555999, "grad_norm": 6.263997554779053, "learning_rate": 8.237251009651585e-05, "loss": 0.8015, "step": 13627 }, { "epoch": 0.923368791923572, "grad_norm": 5.063817501068115, "learning_rate": 8.237114107741803e-05, "loss": 0.8485, "step": 13628 }, { "epoch": 0.9234365471915441, "grad_norm": 5.420182704925537, "learning_rate": 8.236977205832021e-05, "loss": 0.8133, "step": 13629 }, { "epoch": 0.9235043024595162, "grad_norm": 6.21091890335083, "learning_rate": 8.236840303922239e-05, "loss": 0.9033, "step": 13630 }, { "epoch": 0.9235720577274883, "grad_norm": 5.321242332458496, "learning_rate": 8.236703402012459e-05, "loss": 0.7809, "step": 13631 }, { "epoch": 0.9236398129954604, "grad_norm": 4.922853469848633, "learning_rate": 8.236566500102677e-05, "loss": 0.6684, "step": 13632 }, { "epoch": 0.9237075682634325, "grad_norm": 7.8503193855285645, "learning_rate": 8.236429598192895e-05, "loss": 0.9825, "step": 13633 }, { "epoch": 0.9237753235314046, "grad_norm": 6.110293388366699, "learning_rate": 8.236292696283113e-05, "loss": 0.7034, "step": 13634 }, { "epoch": 0.9238430787993767, "grad_norm": 7.469491481781006, "learning_rate": 8.236155794373331e-05, "loss": 0.9736, "step": 13635 }, { "epoch": 0.9239108340673488, "grad_norm": 6.1995086669921875, "learning_rate": 8.23601889246355e-05, "loss": 0.8417, "step": 13636 }, { "epoch": 0.9239785893353208, "grad_norm": 4.243618011474609, "learning_rate": 8.235881990553768e-05, "loss": 0.5993, "step": 13637 }, { "epoch": 0.9240463446032929, "grad_norm": 5.490956783294678, "learning_rate": 8.235745088643986e-05, "loss": 0.7956, "step": 13638 }, { "epoch": 0.924114099871265, "grad_norm": 6.069158554077148, "learning_rate": 8.235608186734204e-05, "loss": 0.6775, "step": 13639 }, { "epoch": 0.9241818551392371, "grad_norm": 6.96622896194458, "learning_rate": 8.235471284824424e-05, "loss": 0.8268, "step": 13640 }, { "epoch": 0.9242496104072092, "grad_norm": 8.088959693908691, "learning_rate": 8.235334382914642e-05, "loss": 0.9065, "step": 13641 }, { "epoch": 0.9243173656751812, "grad_norm": 5.17006778717041, "learning_rate": 8.23519748100486e-05, "loss": 0.5922, "step": 13642 }, { "epoch": 0.9243851209431533, "grad_norm": 5.012120246887207, "learning_rate": 8.235060579095079e-05, "loss": 0.6376, "step": 13643 }, { "epoch": 0.9244528762111254, "grad_norm": 5.864466667175293, "learning_rate": 8.234923677185297e-05, "loss": 0.6989, "step": 13644 }, { "epoch": 0.9245206314790975, "grad_norm": 5.791763782501221, "learning_rate": 8.234786775275515e-05, "loss": 0.818, "step": 13645 }, { "epoch": 0.9245883867470696, "grad_norm": 5.13279390335083, "learning_rate": 8.234649873365735e-05, "loss": 0.6238, "step": 13646 }, { "epoch": 0.9246561420150416, "grad_norm": 6.566293716430664, "learning_rate": 8.234512971455953e-05, "loss": 0.7528, "step": 13647 }, { "epoch": 0.9247238972830137, "grad_norm": 4.981358528137207, "learning_rate": 8.234376069546171e-05, "loss": 0.713, "step": 13648 }, { "epoch": 0.9247916525509858, "grad_norm": 5.388360500335693, "learning_rate": 8.23423916763639e-05, "loss": 0.8413, "step": 13649 }, { "epoch": 0.9248594078189579, "grad_norm": 5.867083549499512, "learning_rate": 8.234102265726608e-05, "loss": 0.7376, "step": 13650 }, { "epoch": 0.92492716308693, "grad_norm": 6.004108428955078, "learning_rate": 8.233965363816826e-05, "loss": 0.7179, "step": 13651 }, { "epoch": 0.9249949183549021, "grad_norm": 5.956449031829834, "learning_rate": 8.233828461907044e-05, "loss": 0.856, "step": 13652 }, { "epoch": 0.9250626736228742, "grad_norm": 4.9217209815979, "learning_rate": 8.233691559997262e-05, "loss": 0.7254, "step": 13653 }, { "epoch": 0.9251304288908463, "grad_norm": 5.971928596496582, "learning_rate": 8.233554658087482e-05, "loss": 0.8707, "step": 13654 }, { "epoch": 0.9251981841588184, "grad_norm": 5.525825500488281, "learning_rate": 8.2334177561777e-05, "loss": 0.6256, "step": 13655 }, { "epoch": 0.9252659394267905, "grad_norm": 6.858259201049805, "learning_rate": 8.233280854267918e-05, "loss": 0.771, "step": 13656 }, { "epoch": 0.9253336946947626, "grad_norm": 5.588975429534912, "learning_rate": 8.233143952358136e-05, "loss": 0.7937, "step": 13657 }, { "epoch": 0.9254014499627347, "grad_norm": 5.949060916900635, "learning_rate": 8.233007050448354e-05, "loss": 0.7975, "step": 13658 }, { "epoch": 0.9254692052307066, "grad_norm": 5.091200828552246, "learning_rate": 8.232870148538573e-05, "loss": 0.5729, "step": 13659 }, { "epoch": 0.9255369604986787, "grad_norm": 7.370169639587402, "learning_rate": 8.232733246628791e-05, "loss": 0.9528, "step": 13660 }, { "epoch": 0.9256047157666508, "grad_norm": 5.088458061218262, "learning_rate": 8.232596344719009e-05, "loss": 0.7491, "step": 13661 }, { "epoch": 0.9256724710346229, "grad_norm": 6.5189433097839355, "learning_rate": 8.232459442809227e-05, "loss": 0.7068, "step": 13662 }, { "epoch": 0.925740226302595, "grad_norm": 6.852166175842285, "learning_rate": 8.232322540899447e-05, "loss": 1.0309, "step": 13663 }, { "epoch": 0.9258079815705671, "grad_norm": 6.470550060272217, "learning_rate": 8.232185638989665e-05, "loss": 0.8087, "step": 13664 }, { "epoch": 0.9258757368385392, "grad_norm": 6.046988010406494, "learning_rate": 8.232048737079883e-05, "loss": 0.7183, "step": 13665 }, { "epoch": 0.9259434921065113, "grad_norm": 5.661501884460449, "learning_rate": 8.231911835170101e-05, "loss": 0.806, "step": 13666 }, { "epoch": 0.9260112473744834, "grad_norm": 7.144009113311768, "learning_rate": 8.231774933260319e-05, "loss": 0.8562, "step": 13667 }, { "epoch": 0.9260790026424555, "grad_norm": 6.237162113189697, "learning_rate": 8.231638031350538e-05, "loss": 0.6964, "step": 13668 }, { "epoch": 0.9261467579104276, "grad_norm": 10.49614429473877, "learning_rate": 8.231501129440756e-05, "loss": 0.8433, "step": 13669 }, { "epoch": 0.9262145131783996, "grad_norm": 8.371081352233887, "learning_rate": 8.231364227530974e-05, "loss": 0.8938, "step": 13670 }, { "epoch": 0.9262822684463717, "grad_norm": 5.304482936859131, "learning_rate": 8.231227325621192e-05, "loss": 0.8161, "step": 13671 }, { "epoch": 0.9263500237143438, "grad_norm": 7.333506107330322, "learning_rate": 8.231090423711412e-05, "loss": 1.0648, "step": 13672 }, { "epoch": 0.9264177789823159, "grad_norm": 6.7197065353393555, "learning_rate": 8.23095352180163e-05, "loss": 0.5901, "step": 13673 }, { "epoch": 0.926485534250288, "grad_norm": 6.151739120483398, "learning_rate": 8.230816619891848e-05, "loss": 0.832, "step": 13674 }, { "epoch": 0.92655328951826, "grad_norm": 5.662747859954834, "learning_rate": 8.230679717982066e-05, "loss": 0.8096, "step": 13675 }, { "epoch": 0.9266210447862321, "grad_norm": 6.029799938201904, "learning_rate": 8.230542816072284e-05, "loss": 0.7201, "step": 13676 }, { "epoch": 0.9266888000542042, "grad_norm": 4.9484686851501465, "learning_rate": 8.230405914162503e-05, "loss": 0.6618, "step": 13677 }, { "epoch": 0.9267565553221763, "grad_norm": 6.27154016494751, "learning_rate": 8.230269012252721e-05, "loss": 0.9026, "step": 13678 }, { "epoch": 0.9268243105901484, "grad_norm": 7.863352298736572, "learning_rate": 8.230132110342939e-05, "loss": 0.6036, "step": 13679 }, { "epoch": 0.9268920658581205, "grad_norm": 6.766676425933838, "learning_rate": 8.229995208433157e-05, "loss": 0.9587, "step": 13680 }, { "epoch": 0.9269598211260925, "grad_norm": 7.4474711418151855, "learning_rate": 8.229858306523375e-05, "loss": 0.8504, "step": 13681 }, { "epoch": 0.9270275763940646, "grad_norm": 5.815866470336914, "learning_rate": 8.229721404613595e-05, "loss": 0.6245, "step": 13682 }, { "epoch": 0.9270953316620367, "grad_norm": 7.574525833129883, "learning_rate": 8.229584502703813e-05, "loss": 0.6652, "step": 13683 }, { "epoch": 0.9271630869300088, "grad_norm": 8.183697700500488, "learning_rate": 8.229447600794031e-05, "loss": 0.7956, "step": 13684 }, { "epoch": 0.9272308421979809, "grad_norm": 5.091358184814453, "learning_rate": 8.229310698884249e-05, "loss": 0.6387, "step": 13685 }, { "epoch": 0.927298597465953, "grad_norm": 5.971275806427002, "learning_rate": 8.229173796974468e-05, "loss": 1.0076, "step": 13686 }, { "epoch": 0.9273663527339251, "grad_norm": 5.206945896148682, "learning_rate": 8.229036895064686e-05, "loss": 0.615, "step": 13687 }, { "epoch": 0.9274341080018972, "grad_norm": 6.2954864501953125, "learning_rate": 8.228899993154904e-05, "loss": 0.7028, "step": 13688 }, { "epoch": 0.9275018632698693, "grad_norm": 7.77675724029541, "learning_rate": 8.228763091245124e-05, "loss": 0.8924, "step": 13689 }, { "epoch": 0.9275696185378414, "grad_norm": 7.986929893493652, "learning_rate": 8.228626189335342e-05, "loss": 0.9092, "step": 13690 }, { "epoch": 0.9276373738058133, "grad_norm": 6.880285739898682, "learning_rate": 8.22848928742556e-05, "loss": 0.8328, "step": 13691 }, { "epoch": 0.9277051290737854, "grad_norm": 5.309557914733887, "learning_rate": 8.228352385515779e-05, "loss": 0.6386, "step": 13692 }, { "epoch": 0.9277728843417575, "grad_norm": 5.744555473327637, "learning_rate": 8.228215483605997e-05, "loss": 0.8106, "step": 13693 }, { "epoch": 0.9278406396097296, "grad_norm": 10.664202690124512, "learning_rate": 8.228078581696215e-05, "loss": 0.6584, "step": 13694 }, { "epoch": 0.9279083948777017, "grad_norm": 5.7767791748046875, "learning_rate": 8.227941679786435e-05, "loss": 0.7672, "step": 13695 }, { "epoch": 0.9279761501456738, "grad_norm": 4.621129035949707, "learning_rate": 8.227804777876653e-05, "loss": 0.8555, "step": 13696 }, { "epoch": 0.9280439054136459, "grad_norm": 5.1587018966674805, "learning_rate": 8.22766787596687e-05, "loss": 0.526, "step": 13697 }, { "epoch": 0.928111660681618, "grad_norm": 6.041534900665283, "learning_rate": 8.227530974057089e-05, "loss": 0.6588, "step": 13698 }, { "epoch": 0.9281794159495901, "grad_norm": 5.863411903381348, "learning_rate": 8.227394072147307e-05, "loss": 0.821, "step": 13699 }, { "epoch": 0.9282471712175622, "grad_norm": 5.905420780181885, "learning_rate": 8.227257170237526e-05, "loss": 0.9116, "step": 13700 }, { "epoch": 0.9283149264855343, "grad_norm": 8.96728229522705, "learning_rate": 8.227120268327744e-05, "loss": 0.7997, "step": 13701 }, { "epoch": 0.9283826817535064, "grad_norm": 7.430652141571045, "learning_rate": 8.226983366417962e-05, "loss": 0.8505, "step": 13702 }, { "epoch": 0.9284504370214784, "grad_norm": 6.5419921875, "learning_rate": 8.22684646450818e-05, "loss": 1.1061, "step": 13703 }, { "epoch": 0.9285181922894505, "grad_norm": 5.459079265594482, "learning_rate": 8.226709562598398e-05, "loss": 0.7481, "step": 13704 }, { "epoch": 0.9285859475574226, "grad_norm": 6.421117782592773, "learning_rate": 8.226572660688618e-05, "loss": 0.7031, "step": 13705 }, { "epoch": 0.9286537028253947, "grad_norm": 5.486372470855713, "learning_rate": 8.226435758778836e-05, "loss": 0.7731, "step": 13706 }, { "epoch": 0.9287214580933668, "grad_norm": 8.439654350280762, "learning_rate": 8.226298856869054e-05, "loss": 0.8965, "step": 13707 }, { "epoch": 0.9287892133613388, "grad_norm": 5.904208183288574, "learning_rate": 8.226161954959272e-05, "loss": 0.8596, "step": 13708 }, { "epoch": 0.9288569686293109, "grad_norm": 7.829963684082031, "learning_rate": 8.226025053049491e-05, "loss": 0.8663, "step": 13709 }, { "epoch": 0.928924723897283, "grad_norm": 6.749328136444092, "learning_rate": 8.225888151139709e-05, "loss": 0.6586, "step": 13710 }, { "epoch": 0.9289924791652551, "grad_norm": 6.042569637298584, "learning_rate": 8.225751249229927e-05, "loss": 0.6156, "step": 13711 }, { "epoch": 0.9290602344332272, "grad_norm": 4.903025150299072, "learning_rate": 8.225614347320145e-05, "loss": 0.7658, "step": 13712 }, { "epoch": 0.9291279897011993, "grad_norm": 5.892563343048096, "learning_rate": 8.225477445410363e-05, "loss": 0.8609, "step": 13713 }, { "epoch": 0.9291957449691713, "grad_norm": 5.452030658721924, "learning_rate": 8.225340543500583e-05, "loss": 1.0457, "step": 13714 }, { "epoch": 0.9292635002371434, "grad_norm": 6.059377193450928, "learning_rate": 8.2252036415908e-05, "loss": 0.8657, "step": 13715 }, { "epoch": 0.9293312555051155, "grad_norm": 5.9446210861206055, "learning_rate": 8.225066739681019e-05, "loss": 1.0325, "step": 13716 }, { "epoch": 0.9293990107730876, "grad_norm": 5.6474690437316895, "learning_rate": 8.224929837771237e-05, "loss": 0.6533, "step": 13717 }, { "epoch": 0.9294667660410597, "grad_norm": 6.072984218597412, "learning_rate": 8.224792935861456e-05, "loss": 0.8738, "step": 13718 }, { "epoch": 0.9295345213090318, "grad_norm": 7.003471851348877, "learning_rate": 8.224656033951674e-05, "loss": 0.7797, "step": 13719 }, { "epoch": 0.9296022765770039, "grad_norm": 6.7634148597717285, "learning_rate": 8.224519132041892e-05, "loss": 0.9885, "step": 13720 }, { "epoch": 0.929670031844976, "grad_norm": 4.705183506011963, "learning_rate": 8.22438223013211e-05, "loss": 0.8115, "step": 13721 }, { "epoch": 0.9297377871129481, "grad_norm": 5.442021369934082, "learning_rate": 8.224245328222328e-05, "loss": 0.9005, "step": 13722 }, { "epoch": 0.9298055423809202, "grad_norm": 5.542171001434326, "learning_rate": 8.224108426312548e-05, "loss": 0.6873, "step": 13723 }, { "epoch": 0.9298732976488921, "grad_norm": 4.973824977874756, "learning_rate": 8.223971524402766e-05, "loss": 0.8902, "step": 13724 }, { "epoch": 0.9299410529168642, "grad_norm": 6.488674640655518, "learning_rate": 8.223834622492984e-05, "loss": 0.6895, "step": 13725 }, { "epoch": 0.9300088081848363, "grad_norm": 6.3187150955200195, "learning_rate": 8.223697720583202e-05, "loss": 0.591, "step": 13726 }, { "epoch": 0.9300765634528084, "grad_norm": 4.76509952545166, "learning_rate": 8.223560818673421e-05, "loss": 0.5401, "step": 13727 }, { "epoch": 0.9301443187207805, "grad_norm": 6.474403381347656, "learning_rate": 8.223423916763639e-05, "loss": 0.788, "step": 13728 }, { "epoch": 0.9302120739887526, "grad_norm": 5.295207977294922, "learning_rate": 8.223287014853857e-05, "loss": 0.8156, "step": 13729 }, { "epoch": 0.9302798292567247, "grad_norm": 4.847303867340088, "learning_rate": 8.223150112944075e-05, "loss": 0.7142, "step": 13730 }, { "epoch": 0.9303475845246968, "grad_norm": 7.824878692626953, "learning_rate": 8.223013211034293e-05, "loss": 0.6943, "step": 13731 }, { "epoch": 0.9304153397926689, "grad_norm": 5.352818965911865, "learning_rate": 8.222876309124513e-05, "loss": 0.6813, "step": 13732 }, { "epoch": 0.930483095060641, "grad_norm": 5.922751426696777, "learning_rate": 8.222739407214731e-05, "loss": 0.6284, "step": 13733 }, { "epoch": 0.9305508503286131, "grad_norm": 6.715836048126221, "learning_rate": 8.222602505304949e-05, "loss": 0.7602, "step": 13734 }, { "epoch": 0.9306186055965852, "grad_norm": 5.062824726104736, "learning_rate": 8.222465603395168e-05, "loss": 0.6008, "step": 13735 }, { "epoch": 0.9306863608645572, "grad_norm": 5.607885837554932, "learning_rate": 8.222328701485386e-05, "loss": 0.7161, "step": 13736 }, { "epoch": 0.9307541161325293, "grad_norm": 6.048231601715088, "learning_rate": 8.222191799575604e-05, "loss": 0.6162, "step": 13737 }, { "epoch": 0.9308218714005014, "grad_norm": 6.022711277008057, "learning_rate": 8.222054897665824e-05, "loss": 0.7804, "step": 13738 }, { "epoch": 0.9308896266684735, "grad_norm": 8.368260383605957, "learning_rate": 8.221917995756042e-05, "loss": 0.8318, "step": 13739 }, { "epoch": 0.9309573819364455, "grad_norm": 4.99493932723999, "learning_rate": 8.22178109384626e-05, "loss": 0.6902, "step": 13740 }, { "epoch": 0.9310251372044176, "grad_norm": 5.73579740524292, "learning_rate": 8.221644191936479e-05, "loss": 0.6253, "step": 13741 }, { "epoch": 0.9310928924723897, "grad_norm": 6.078760623931885, "learning_rate": 8.221507290026697e-05, "loss": 0.9188, "step": 13742 }, { "epoch": 0.9311606477403618, "grad_norm": 5.923210620880127, "learning_rate": 8.221370388116915e-05, "loss": 0.8844, "step": 13743 }, { "epoch": 0.9312284030083339, "grad_norm": 4.4179229736328125, "learning_rate": 8.221233486207133e-05, "loss": 0.6355, "step": 13744 }, { "epoch": 0.931296158276306, "grad_norm": 5.3702311515808105, "learning_rate": 8.221096584297351e-05, "loss": 0.8017, "step": 13745 }, { "epoch": 0.931363913544278, "grad_norm": 7.519911766052246, "learning_rate": 8.22095968238757e-05, "loss": 0.8302, "step": 13746 }, { "epoch": 0.9314316688122501, "grad_norm": 9.204391479492188, "learning_rate": 8.220822780477789e-05, "loss": 0.8657, "step": 13747 }, { "epoch": 0.9314994240802222, "grad_norm": 6.563374042510986, "learning_rate": 8.220685878568007e-05, "loss": 0.7257, "step": 13748 }, { "epoch": 0.9315671793481943, "grad_norm": 6.106207370758057, "learning_rate": 8.220548976658225e-05, "loss": 0.836, "step": 13749 }, { "epoch": 0.9316349346161664, "grad_norm": 8.019493103027344, "learning_rate": 8.220412074748444e-05, "loss": 0.7405, "step": 13750 }, { "epoch": 0.9317026898841385, "grad_norm": 5.366190433502197, "learning_rate": 8.220275172838662e-05, "loss": 0.7408, "step": 13751 }, { "epoch": 0.9317704451521106, "grad_norm": 3.966377019882202, "learning_rate": 8.22013827092888e-05, "loss": 0.5997, "step": 13752 }, { "epoch": 0.9318382004200827, "grad_norm": 8.112096786499023, "learning_rate": 8.220001369019098e-05, "loss": 1.1498, "step": 13753 }, { "epoch": 0.9319059556880548, "grad_norm": 7.886656284332275, "learning_rate": 8.219864467109316e-05, "loss": 0.8555, "step": 13754 }, { "epoch": 0.9319737109560269, "grad_norm": 5.4407219886779785, "learning_rate": 8.219727565199536e-05, "loss": 0.8506, "step": 13755 }, { "epoch": 0.932041466223999, "grad_norm": 6.584644794464111, "learning_rate": 8.219590663289754e-05, "loss": 0.8408, "step": 13756 }, { "epoch": 0.932109221491971, "grad_norm": 5.497494697570801, "learning_rate": 8.219453761379972e-05, "loss": 0.6424, "step": 13757 }, { "epoch": 0.932176976759943, "grad_norm": 5.6908698081970215, "learning_rate": 8.21931685947019e-05, "loss": 0.6629, "step": 13758 }, { "epoch": 0.9322447320279151, "grad_norm": 5.446488380432129, "learning_rate": 8.219179957560408e-05, "loss": 0.6875, "step": 13759 }, { "epoch": 0.9323124872958872, "grad_norm": 5.45862340927124, "learning_rate": 8.219043055650627e-05, "loss": 0.6987, "step": 13760 }, { "epoch": 0.9323802425638593, "grad_norm": 5.105297565460205, "learning_rate": 8.218906153740845e-05, "loss": 0.5886, "step": 13761 }, { "epoch": 0.9324479978318314, "grad_norm": 5.2332892417907715, "learning_rate": 8.218769251831063e-05, "loss": 0.7667, "step": 13762 }, { "epoch": 0.9325157530998035, "grad_norm": 5.849374294281006, "learning_rate": 8.218632349921281e-05, "loss": 0.7964, "step": 13763 }, { "epoch": 0.9325835083677756, "grad_norm": 5.5074872970581055, "learning_rate": 8.2184954480115e-05, "loss": 0.8375, "step": 13764 }, { "epoch": 0.9326512636357477, "grad_norm": 6.26788854598999, "learning_rate": 8.218358546101719e-05, "loss": 0.6793, "step": 13765 }, { "epoch": 0.9327190189037198, "grad_norm": 7.429625988006592, "learning_rate": 8.218221644191937e-05, "loss": 0.8389, "step": 13766 }, { "epoch": 0.9327867741716919, "grad_norm": 9.86705207824707, "learning_rate": 8.218084742282155e-05, "loss": 0.9663, "step": 13767 }, { "epoch": 0.932854529439664, "grad_norm": 7.741724491119385, "learning_rate": 8.217947840372373e-05, "loss": 0.8483, "step": 13768 }, { "epoch": 0.932922284707636, "grad_norm": 5.413618087768555, "learning_rate": 8.217810938462592e-05, "loss": 0.6838, "step": 13769 }, { "epoch": 0.9329900399756081, "grad_norm": 5.695152282714844, "learning_rate": 8.21767403655281e-05, "loss": 0.8978, "step": 13770 }, { "epoch": 0.9330577952435802, "grad_norm": 6.387299537658691, "learning_rate": 8.217537134643028e-05, "loss": 0.5846, "step": 13771 }, { "epoch": 0.9331255505115523, "grad_norm": 4.797947883605957, "learning_rate": 8.217400232733246e-05, "loss": 0.617, "step": 13772 }, { "epoch": 0.9331933057795243, "grad_norm": 5.718557357788086, "learning_rate": 8.217263330823466e-05, "loss": 0.7501, "step": 13773 }, { "epoch": 0.9332610610474964, "grad_norm": 6.03369665145874, "learning_rate": 8.217126428913684e-05, "loss": 0.5945, "step": 13774 }, { "epoch": 0.9333288163154685, "grad_norm": 4.953873634338379, "learning_rate": 8.216989527003902e-05, "loss": 0.6359, "step": 13775 }, { "epoch": 0.9333965715834406, "grad_norm": 6.724967956542969, "learning_rate": 8.21685262509412e-05, "loss": 0.788, "step": 13776 }, { "epoch": 0.9334643268514127, "grad_norm": 5.307444095611572, "learning_rate": 8.216715723184338e-05, "loss": 0.8166, "step": 13777 }, { "epoch": 0.9335320821193848, "grad_norm": 7.63820219039917, "learning_rate": 8.216578821274557e-05, "loss": 1.0157, "step": 13778 }, { "epoch": 0.9335998373873569, "grad_norm": 5.278408527374268, "learning_rate": 8.216441919364775e-05, "loss": 0.6433, "step": 13779 }, { "epoch": 0.933667592655329, "grad_norm": 8.680825233459473, "learning_rate": 8.216305017454993e-05, "loss": 0.84, "step": 13780 }, { "epoch": 0.933735347923301, "grad_norm": 5.572779655456543, "learning_rate": 8.216168115545213e-05, "loss": 0.783, "step": 13781 }, { "epoch": 0.9338031031912731, "grad_norm": 9.58791732788086, "learning_rate": 8.21603121363543e-05, "loss": 1.0121, "step": 13782 }, { "epoch": 0.9338708584592452, "grad_norm": 6.080748081207275, "learning_rate": 8.215894311725649e-05, "loss": 0.6494, "step": 13783 }, { "epoch": 0.9339386137272173, "grad_norm": 6.609582901000977, "learning_rate": 8.215757409815868e-05, "loss": 0.7456, "step": 13784 }, { "epoch": 0.9340063689951894, "grad_norm": 7.191807746887207, "learning_rate": 8.215620507906086e-05, "loss": 0.7427, "step": 13785 }, { "epoch": 0.9340741242631615, "grad_norm": 6.619680404663086, "learning_rate": 8.215483605996304e-05, "loss": 0.8251, "step": 13786 }, { "epoch": 0.9341418795311336, "grad_norm": 6.401364803314209, "learning_rate": 8.215346704086524e-05, "loss": 0.6732, "step": 13787 }, { "epoch": 0.9342096347991057, "grad_norm": 4.641686916351318, "learning_rate": 8.215209802176742e-05, "loss": 0.5844, "step": 13788 }, { "epoch": 0.9342773900670777, "grad_norm": 5.998747825622559, "learning_rate": 8.21507290026696e-05, "loss": 0.9057, "step": 13789 }, { "epoch": 0.9343451453350498, "grad_norm": 6.920633316040039, "learning_rate": 8.214935998357178e-05, "loss": 1.0017, "step": 13790 }, { "epoch": 0.9344129006030218, "grad_norm": 5.0339460372924805, "learning_rate": 8.214799096447396e-05, "loss": 0.7182, "step": 13791 }, { "epoch": 0.9344806558709939, "grad_norm": 6.2433762550354, "learning_rate": 8.214662194537615e-05, "loss": 0.5608, "step": 13792 }, { "epoch": 0.934548411138966, "grad_norm": 4.9427056312561035, "learning_rate": 8.214525292627833e-05, "loss": 0.6635, "step": 13793 }, { "epoch": 0.9346161664069381, "grad_norm": 5.293502330780029, "learning_rate": 8.214388390718051e-05, "loss": 0.6524, "step": 13794 }, { "epoch": 0.9346839216749102, "grad_norm": 5.291229724884033, "learning_rate": 8.214251488808269e-05, "loss": 0.8088, "step": 13795 }, { "epoch": 0.9347516769428823, "grad_norm": 6.6330389976501465, "learning_rate": 8.214114586898489e-05, "loss": 0.83, "step": 13796 }, { "epoch": 0.9348194322108544, "grad_norm": 4.883660316467285, "learning_rate": 8.213977684988707e-05, "loss": 0.849, "step": 13797 }, { "epoch": 0.9348871874788265, "grad_norm": 9.394811630249023, "learning_rate": 8.213840783078925e-05, "loss": 1.1274, "step": 13798 }, { "epoch": 0.9349549427467986, "grad_norm": 7.191971778869629, "learning_rate": 8.213703881169143e-05, "loss": 1.022, "step": 13799 }, { "epoch": 0.9350226980147707, "grad_norm": 5.284100532531738, "learning_rate": 8.21356697925936e-05, "loss": 0.7243, "step": 13800 }, { "epoch": 0.9350904532827428, "grad_norm": 4.5223469734191895, "learning_rate": 8.21343007734958e-05, "loss": 0.7437, "step": 13801 }, { "epoch": 0.9351582085507149, "grad_norm": 9.137798309326172, "learning_rate": 8.213293175439798e-05, "loss": 0.8741, "step": 13802 }, { "epoch": 0.935225963818687, "grad_norm": 5.241482257843018, "learning_rate": 8.213156273530016e-05, "loss": 0.7517, "step": 13803 }, { "epoch": 0.935293719086659, "grad_norm": 6.769075870513916, "learning_rate": 8.213019371620234e-05, "loss": 0.9897, "step": 13804 }, { "epoch": 0.935361474354631, "grad_norm": 5.048720836639404, "learning_rate": 8.212882469710454e-05, "loss": 0.7311, "step": 13805 }, { "epoch": 0.9354292296226031, "grad_norm": 5.8939385414123535, "learning_rate": 8.212745567800672e-05, "loss": 0.7927, "step": 13806 }, { "epoch": 0.9354969848905752, "grad_norm": 6.683951377868652, "learning_rate": 8.21260866589089e-05, "loss": 0.7332, "step": 13807 }, { "epoch": 0.9355647401585473, "grad_norm": 6.631941318511963, "learning_rate": 8.212471763981108e-05, "loss": 0.8927, "step": 13808 }, { "epoch": 0.9356324954265194, "grad_norm": 6.161096096038818, "learning_rate": 8.212334862071326e-05, "loss": 0.9347, "step": 13809 }, { "epoch": 0.9357002506944915, "grad_norm": 9.04511547088623, "learning_rate": 8.212197960161545e-05, "loss": 0.7569, "step": 13810 }, { "epoch": 0.9357680059624636, "grad_norm": 6.299793720245361, "learning_rate": 8.212061058251763e-05, "loss": 0.8722, "step": 13811 }, { "epoch": 0.9358357612304357, "grad_norm": 6.218029975891113, "learning_rate": 8.211924156341981e-05, "loss": 0.4517, "step": 13812 }, { "epoch": 0.9359035164984078, "grad_norm": 6.690893650054932, "learning_rate": 8.211787254432199e-05, "loss": 0.7149, "step": 13813 }, { "epoch": 0.9359712717663798, "grad_norm": 5.8090009689331055, "learning_rate": 8.211650352522417e-05, "loss": 0.7553, "step": 13814 }, { "epoch": 0.9360390270343519, "grad_norm": 6.034711837768555, "learning_rate": 8.211513450612637e-05, "loss": 0.7217, "step": 13815 }, { "epoch": 0.936106782302324, "grad_norm": 5.093395233154297, "learning_rate": 8.211376548702855e-05, "loss": 0.9626, "step": 13816 }, { "epoch": 0.9361745375702961, "grad_norm": 7.061664581298828, "learning_rate": 8.211239646793073e-05, "loss": 1.0739, "step": 13817 }, { "epoch": 0.9362422928382682, "grad_norm": 5.97583532333374, "learning_rate": 8.211102744883291e-05, "loss": 1.0293, "step": 13818 }, { "epoch": 0.9363100481062403, "grad_norm": 6.2469282150268555, "learning_rate": 8.21096584297351e-05, "loss": 0.8048, "step": 13819 }, { "epoch": 0.9363778033742124, "grad_norm": 6.801420211791992, "learning_rate": 8.210828941063728e-05, "loss": 1.1723, "step": 13820 }, { "epoch": 0.9364455586421845, "grad_norm": 5.765828609466553, "learning_rate": 8.210692039153946e-05, "loss": 0.8612, "step": 13821 }, { "epoch": 0.9365133139101565, "grad_norm": 5.4772233963012695, "learning_rate": 8.210555137244164e-05, "loss": 0.7415, "step": 13822 }, { "epoch": 0.9365810691781286, "grad_norm": 5.3950042724609375, "learning_rate": 8.210418235334382e-05, "loss": 0.623, "step": 13823 }, { "epoch": 0.9366488244461006, "grad_norm": 6.950892925262451, "learning_rate": 8.210281333424602e-05, "loss": 0.9234, "step": 13824 }, { "epoch": 0.9367165797140727, "grad_norm": 6.8263044357299805, "learning_rate": 8.21014443151482e-05, "loss": 0.6791, "step": 13825 }, { "epoch": 0.9367843349820448, "grad_norm": 6.001094818115234, "learning_rate": 8.210007529605038e-05, "loss": 0.8851, "step": 13826 }, { "epoch": 0.9368520902500169, "grad_norm": 5.480862617492676, "learning_rate": 8.209870627695256e-05, "loss": 0.6752, "step": 13827 }, { "epoch": 0.936919845517989, "grad_norm": 8.880070686340332, "learning_rate": 8.209733725785475e-05, "loss": 0.6895, "step": 13828 }, { "epoch": 0.9369876007859611, "grad_norm": 5.244697570800781, "learning_rate": 8.209596823875693e-05, "loss": 0.7239, "step": 13829 }, { "epoch": 0.9370553560539332, "grad_norm": 5.342809677124023, "learning_rate": 8.209459921965911e-05, "loss": 0.9466, "step": 13830 }, { "epoch": 0.9371231113219053, "grad_norm": 5.547102928161621, "learning_rate": 8.20932302005613e-05, "loss": 0.668, "step": 13831 }, { "epoch": 0.9371908665898774, "grad_norm": 5.530054092407227, "learning_rate": 8.209186118146349e-05, "loss": 0.7622, "step": 13832 }, { "epoch": 0.9372586218578495, "grad_norm": 6.689416885375977, "learning_rate": 8.209049216236567e-05, "loss": 0.7291, "step": 13833 }, { "epoch": 0.9373263771258216, "grad_norm": 5.052443981170654, "learning_rate": 8.208912314326786e-05, "loss": 0.7491, "step": 13834 }, { "epoch": 0.9373941323937937, "grad_norm": 8.584953308105469, "learning_rate": 8.208775412417004e-05, "loss": 0.867, "step": 13835 }, { "epoch": 0.9374618876617657, "grad_norm": 6.425045967102051, "learning_rate": 8.208638510507222e-05, "loss": 0.6844, "step": 13836 }, { "epoch": 0.9375296429297378, "grad_norm": 7.346054553985596, "learning_rate": 8.20850160859744e-05, "loss": 0.7747, "step": 13837 }, { "epoch": 0.9375973981977098, "grad_norm": 5.191468715667725, "learning_rate": 8.20836470668766e-05, "loss": 0.8087, "step": 13838 }, { "epoch": 0.9376651534656819, "grad_norm": 5.554154396057129, "learning_rate": 8.208227804777878e-05, "loss": 0.6445, "step": 13839 }, { "epoch": 0.937732908733654, "grad_norm": 6.577019214630127, "learning_rate": 8.208090902868096e-05, "loss": 0.9777, "step": 13840 }, { "epoch": 0.9378006640016261, "grad_norm": 5.5466742515563965, "learning_rate": 8.207954000958314e-05, "loss": 0.7303, "step": 13841 }, { "epoch": 0.9378684192695982, "grad_norm": 6.088878154754639, "learning_rate": 8.207817099048533e-05, "loss": 0.8181, "step": 13842 }, { "epoch": 0.9379361745375703, "grad_norm": 6.197640895843506, "learning_rate": 8.207680197138751e-05, "loss": 0.7192, "step": 13843 }, { "epoch": 0.9380039298055424, "grad_norm": 4.92954158782959, "learning_rate": 8.207543295228969e-05, "loss": 0.6276, "step": 13844 }, { "epoch": 0.9380716850735145, "grad_norm": 5.180205345153809, "learning_rate": 8.207406393319187e-05, "loss": 0.783, "step": 13845 }, { "epoch": 0.9381394403414866, "grad_norm": 5.946224212646484, "learning_rate": 8.207269491409405e-05, "loss": 0.775, "step": 13846 }, { "epoch": 0.9382071956094586, "grad_norm": 5.4234490394592285, "learning_rate": 8.207132589499625e-05, "loss": 0.6428, "step": 13847 }, { "epoch": 0.9382749508774307, "grad_norm": 4.664503574371338, "learning_rate": 8.206995687589843e-05, "loss": 0.7846, "step": 13848 }, { "epoch": 0.9383427061454028, "grad_norm": 5.197528839111328, "learning_rate": 8.20685878568006e-05, "loss": 0.7588, "step": 13849 }, { "epoch": 0.9384104614133749, "grad_norm": 6.116962909698486, "learning_rate": 8.206721883770279e-05, "loss": 0.9564, "step": 13850 }, { "epoch": 0.938478216681347, "grad_norm": 5.113432884216309, "learning_rate": 8.206584981860498e-05, "loss": 0.7534, "step": 13851 }, { "epoch": 0.9385459719493191, "grad_norm": 6.907203674316406, "learning_rate": 8.206448079950716e-05, "loss": 0.9213, "step": 13852 }, { "epoch": 0.9386137272172912, "grad_norm": 7.037908554077148, "learning_rate": 8.206311178040934e-05, "loss": 0.7943, "step": 13853 }, { "epoch": 0.9386814824852632, "grad_norm": 6.385385036468506, "learning_rate": 8.206174276131152e-05, "loss": 0.7192, "step": 13854 }, { "epoch": 0.9387492377532353, "grad_norm": 6.877847194671631, "learning_rate": 8.20603737422137e-05, "loss": 1.0433, "step": 13855 }, { "epoch": 0.9388169930212074, "grad_norm": 6.244350910186768, "learning_rate": 8.20590047231159e-05, "loss": 0.7018, "step": 13856 }, { "epoch": 0.9388847482891794, "grad_norm": 5.529423236846924, "learning_rate": 8.205763570401808e-05, "loss": 0.825, "step": 13857 }, { "epoch": 0.9389525035571515, "grad_norm": 5.644784450531006, "learning_rate": 8.205626668492026e-05, "loss": 0.6996, "step": 13858 }, { "epoch": 0.9390202588251236, "grad_norm": 6.088039875030518, "learning_rate": 8.205489766582244e-05, "loss": 0.7494, "step": 13859 }, { "epoch": 0.9390880140930957, "grad_norm": 4.0728936195373535, "learning_rate": 8.205352864672463e-05, "loss": 0.5493, "step": 13860 }, { "epoch": 0.9391557693610678, "grad_norm": 6.4296698570251465, "learning_rate": 8.205215962762681e-05, "loss": 0.8059, "step": 13861 }, { "epoch": 0.9392235246290399, "grad_norm": 4.51001501083374, "learning_rate": 8.205079060852899e-05, "loss": 0.6298, "step": 13862 }, { "epoch": 0.939291279897012, "grad_norm": 5.673875331878662, "learning_rate": 8.204942158943117e-05, "loss": 0.8035, "step": 13863 }, { "epoch": 0.9393590351649841, "grad_norm": 6.233775615692139, "learning_rate": 8.204805257033335e-05, "loss": 0.7389, "step": 13864 }, { "epoch": 0.9394267904329562, "grad_norm": 6.151493549346924, "learning_rate": 8.204668355123555e-05, "loss": 0.6998, "step": 13865 }, { "epoch": 0.9394945457009283, "grad_norm": 4.745614051818848, "learning_rate": 8.204531453213773e-05, "loss": 0.5749, "step": 13866 }, { "epoch": 0.9395623009689004, "grad_norm": 6.141815185546875, "learning_rate": 8.20439455130399e-05, "loss": 0.5891, "step": 13867 }, { "epoch": 0.9396300562368725, "grad_norm": 6.6597490310668945, "learning_rate": 8.204257649394209e-05, "loss": 0.7727, "step": 13868 }, { "epoch": 0.9396978115048445, "grad_norm": 6.619930267333984, "learning_rate": 8.204120747484427e-05, "loss": 0.6702, "step": 13869 }, { "epoch": 0.9397655667728166, "grad_norm": 5.507278919219971, "learning_rate": 8.203983845574646e-05, "loss": 0.7741, "step": 13870 }, { "epoch": 0.9398333220407886, "grad_norm": 5.685328960418701, "learning_rate": 8.203846943664864e-05, "loss": 0.9224, "step": 13871 }, { "epoch": 0.9399010773087607, "grad_norm": 5.3461456298828125, "learning_rate": 8.203710041755082e-05, "loss": 0.6182, "step": 13872 }, { "epoch": 0.9399688325767328, "grad_norm": 8.528802871704102, "learning_rate": 8.2035731398453e-05, "loss": 0.7898, "step": 13873 }, { "epoch": 0.9400365878447049, "grad_norm": 6.036088943481445, "learning_rate": 8.20343623793552e-05, "loss": 1.0215, "step": 13874 }, { "epoch": 0.940104343112677, "grad_norm": 7.7004289627075195, "learning_rate": 8.203299336025738e-05, "loss": 0.7898, "step": 13875 }, { "epoch": 0.9401720983806491, "grad_norm": 6.902210712432861, "learning_rate": 8.203162434115956e-05, "loss": 0.7609, "step": 13876 }, { "epoch": 0.9402398536486212, "grad_norm": 9.464262962341309, "learning_rate": 8.203025532206175e-05, "loss": 1.1929, "step": 13877 }, { "epoch": 0.9403076089165933, "grad_norm": 6.184045791625977, "learning_rate": 8.202888630296393e-05, "loss": 0.7753, "step": 13878 }, { "epoch": 0.9403753641845654, "grad_norm": 7.176342964172363, "learning_rate": 8.202751728386611e-05, "loss": 0.6658, "step": 13879 }, { "epoch": 0.9404431194525374, "grad_norm": 5.406729221343994, "learning_rate": 8.20261482647683e-05, "loss": 0.6304, "step": 13880 }, { "epoch": 0.9405108747205095, "grad_norm": 6.102396488189697, "learning_rate": 8.202477924567049e-05, "loss": 0.8271, "step": 13881 }, { "epoch": 0.9405786299884816, "grad_norm": 5.444383144378662, "learning_rate": 8.202341022657267e-05, "loss": 0.6307, "step": 13882 }, { "epoch": 0.9406463852564537, "grad_norm": 4.637354850769043, "learning_rate": 8.202204120747486e-05, "loss": 0.7097, "step": 13883 }, { "epoch": 0.9407141405244258, "grad_norm": 9.361139297485352, "learning_rate": 8.202067218837704e-05, "loss": 0.7545, "step": 13884 }, { "epoch": 0.9407818957923979, "grad_norm": 5.362797260284424, "learning_rate": 8.201930316927922e-05, "loss": 0.8605, "step": 13885 }, { "epoch": 0.94084965106037, "grad_norm": 5.900387287139893, "learning_rate": 8.20179341501814e-05, "loss": 0.6518, "step": 13886 }, { "epoch": 0.940917406328342, "grad_norm": 5.875095844268799, "learning_rate": 8.201656513108358e-05, "loss": 0.7603, "step": 13887 }, { "epoch": 0.9409851615963141, "grad_norm": 7.324997425079346, "learning_rate": 8.201519611198577e-05, "loss": 0.7821, "step": 13888 }, { "epoch": 0.9410529168642862, "grad_norm": 6.422165870666504, "learning_rate": 8.201382709288796e-05, "loss": 0.7973, "step": 13889 }, { "epoch": 0.9411206721322583, "grad_norm": 5.074305534362793, "learning_rate": 8.201245807379014e-05, "loss": 0.6436, "step": 13890 }, { "epoch": 0.9411884274002303, "grad_norm": 6.061481952667236, "learning_rate": 8.201108905469232e-05, "loss": 0.7467, "step": 13891 }, { "epoch": 0.9412561826682024, "grad_norm": 5.551920413970947, "learning_rate": 8.20097200355945e-05, "loss": 0.6732, "step": 13892 }, { "epoch": 0.9413239379361745, "grad_norm": 6.896834373474121, "learning_rate": 8.200835101649669e-05, "loss": 0.7616, "step": 13893 }, { "epoch": 0.9413916932041466, "grad_norm": 6.073055744171143, "learning_rate": 8.200698199739887e-05, "loss": 0.6834, "step": 13894 }, { "epoch": 0.9414594484721187, "grad_norm": 7.429131984710693, "learning_rate": 8.200561297830105e-05, "loss": 0.6438, "step": 13895 }, { "epoch": 0.9415272037400908, "grad_norm": 8.393160820007324, "learning_rate": 8.200424395920323e-05, "loss": 0.8012, "step": 13896 }, { "epoch": 0.9415949590080629, "grad_norm": 6.223710060119629, "learning_rate": 8.200287494010542e-05, "loss": 0.8734, "step": 13897 }, { "epoch": 0.941662714276035, "grad_norm": 4.849613666534424, "learning_rate": 8.20015059210076e-05, "loss": 0.8162, "step": 13898 }, { "epoch": 0.9417304695440071, "grad_norm": 5.789686679840088, "learning_rate": 8.200013690190979e-05, "loss": 0.6433, "step": 13899 }, { "epoch": 0.9417982248119792, "grad_norm": 5.670434951782227, "learning_rate": 8.199876788281197e-05, "loss": 0.7761, "step": 13900 }, { "epoch": 0.9418659800799513, "grad_norm": 6.698735237121582, "learning_rate": 8.199739886371415e-05, "loss": 0.6752, "step": 13901 }, { "epoch": 0.9419337353479234, "grad_norm": 6.325132369995117, "learning_rate": 8.199602984461634e-05, "loss": 0.7789, "step": 13902 }, { "epoch": 0.9420014906158953, "grad_norm": 5.625211715698242, "learning_rate": 8.199466082551852e-05, "loss": 0.883, "step": 13903 }, { "epoch": 0.9420692458838674, "grad_norm": 6.5696516036987305, "learning_rate": 8.19932918064207e-05, "loss": 0.7999, "step": 13904 }, { "epoch": 0.9421370011518395, "grad_norm": 4.903794288635254, "learning_rate": 8.199192278732288e-05, "loss": 0.8234, "step": 13905 }, { "epoch": 0.9422047564198116, "grad_norm": 4.687190532684326, "learning_rate": 8.199055376822508e-05, "loss": 0.7286, "step": 13906 }, { "epoch": 0.9422725116877837, "grad_norm": 6.075998783111572, "learning_rate": 8.198918474912726e-05, "loss": 0.8778, "step": 13907 }, { "epoch": 0.9423402669557558, "grad_norm": 6.295614719390869, "learning_rate": 8.198781573002944e-05, "loss": 0.8878, "step": 13908 }, { "epoch": 0.9424080222237279, "grad_norm": 8.557680130004883, "learning_rate": 8.198644671093162e-05, "loss": 0.776, "step": 13909 }, { "epoch": 0.9424757774917, "grad_norm": 6.864640712738037, "learning_rate": 8.19850776918338e-05, "loss": 1.0199, "step": 13910 }, { "epoch": 0.9425435327596721, "grad_norm": 5.549873352050781, "learning_rate": 8.198370867273599e-05, "loss": 0.8881, "step": 13911 }, { "epoch": 0.9426112880276442, "grad_norm": 6.9793548583984375, "learning_rate": 8.198233965363817e-05, "loss": 0.79, "step": 13912 }, { "epoch": 0.9426790432956162, "grad_norm": 7.035325527191162, "learning_rate": 8.198097063454035e-05, "loss": 0.7801, "step": 13913 }, { "epoch": 0.9427467985635883, "grad_norm": 6.695631980895996, "learning_rate": 8.197960161544253e-05, "loss": 0.7903, "step": 13914 }, { "epoch": 0.9428145538315604, "grad_norm": 7.544031143188477, "learning_rate": 8.197823259634473e-05, "loss": 0.5813, "step": 13915 }, { "epoch": 0.9428823090995325, "grad_norm": 5.196893215179443, "learning_rate": 8.19768635772469e-05, "loss": 0.7587, "step": 13916 }, { "epoch": 0.9429500643675046, "grad_norm": 6.548089027404785, "learning_rate": 8.197549455814909e-05, "loss": 0.9122, "step": 13917 }, { "epoch": 0.9430178196354767, "grad_norm": 4.924033164978027, "learning_rate": 8.197412553905127e-05, "loss": 0.6318, "step": 13918 }, { "epoch": 0.9430855749034488, "grad_norm": 5.648553371429443, "learning_rate": 8.197275651995345e-05, "loss": 0.7063, "step": 13919 }, { "epoch": 0.9431533301714208, "grad_norm": 7.385311126708984, "learning_rate": 8.197138750085564e-05, "loss": 0.782, "step": 13920 }, { "epoch": 0.9432210854393929, "grad_norm": 5.233833312988281, "learning_rate": 8.197001848175782e-05, "loss": 0.6857, "step": 13921 }, { "epoch": 0.943288840707365, "grad_norm": 7.0770487785339355, "learning_rate": 8.196864946266e-05, "loss": 0.6966, "step": 13922 }, { "epoch": 0.943356595975337, "grad_norm": 6.8348307609558105, "learning_rate": 8.19672804435622e-05, "loss": 0.8207, "step": 13923 }, { "epoch": 0.9434243512433091, "grad_norm": 4.237405300140381, "learning_rate": 8.196591142446438e-05, "loss": 0.8074, "step": 13924 }, { "epoch": 0.9434921065112812, "grad_norm": 6.041856288909912, "learning_rate": 8.196454240536656e-05, "loss": 0.7794, "step": 13925 }, { "epoch": 0.9435598617792533, "grad_norm": 8.183391571044922, "learning_rate": 8.196317338626875e-05, "loss": 0.7523, "step": 13926 }, { "epoch": 0.9436276170472254, "grad_norm": 5.387772083282471, "learning_rate": 8.196180436717093e-05, "loss": 0.7759, "step": 13927 }, { "epoch": 0.9436953723151975, "grad_norm": 5.296437740325928, "learning_rate": 8.196043534807311e-05, "loss": 0.7972, "step": 13928 }, { "epoch": 0.9437631275831696, "grad_norm": 5.269390106201172, "learning_rate": 8.19590663289753e-05, "loss": 0.7055, "step": 13929 }, { "epoch": 0.9438308828511417, "grad_norm": 7.258601665496826, "learning_rate": 8.195769730987748e-05, "loss": 0.8937, "step": 13930 }, { "epoch": 0.9438986381191138, "grad_norm": 6.784587383270264, "learning_rate": 8.195632829077966e-05, "loss": 1.0161, "step": 13931 }, { "epoch": 0.9439663933870859, "grad_norm": 5.093071937561035, "learning_rate": 8.195495927168185e-05, "loss": 0.7299, "step": 13932 }, { "epoch": 0.944034148655058, "grad_norm": 5.3693013191223145, "learning_rate": 8.195359025258403e-05, "loss": 0.7403, "step": 13933 }, { "epoch": 0.9441019039230301, "grad_norm": 5.527505874633789, "learning_rate": 8.195222123348622e-05, "loss": 0.6796, "step": 13934 }, { "epoch": 0.9441696591910022, "grad_norm": 5.87165641784668, "learning_rate": 8.19508522143884e-05, "loss": 0.7385, "step": 13935 }, { "epoch": 0.9442374144589741, "grad_norm": 6.087599754333496, "learning_rate": 8.194948319529058e-05, "loss": 0.545, "step": 13936 }, { "epoch": 0.9443051697269462, "grad_norm": 5.642385005950928, "learning_rate": 8.194811417619276e-05, "loss": 0.7826, "step": 13937 }, { "epoch": 0.9443729249949183, "grad_norm": 4.871824741363525, "learning_rate": 8.194674515709495e-05, "loss": 0.6956, "step": 13938 }, { "epoch": 0.9444406802628904, "grad_norm": 4.260469913482666, "learning_rate": 8.194537613799713e-05, "loss": 0.561, "step": 13939 }, { "epoch": 0.9445084355308625, "grad_norm": 5.726165294647217, "learning_rate": 8.194400711889932e-05, "loss": 0.9119, "step": 13940 }, { "epoch": 0.9445761907988346, "grad_norm": 8.435538291931152, "learning_rate": 8.19426380998015e-05, "loss": 1.0353, "step": 13941 }, { "epoch": 0.9446439460668067, "grad_norm": 5.619915962219238, "learning_rate": 8.194126908070368e-05, "loss": 0.7213, "step": 13942 }, { "epoch": 0.9447117013347788, "grad_norm": 6.574455261230469, "learning_rate": 8.193990006160587e-05, "loss": 0.9633, "step": 13943 }, { "epoch": 0.9447794566027509, "grad_norm": 6.439619064331055, "learning_rate": 8.193853104250805e-05, "loss": 0.7964, "step": 13944 }, { "epoch": 0.944847211870723, "grad_norm": 6.305572509765625, "learning_rate": 8.193716202341023e-05, "loss": 0.72, "step": 13945 }, { "epoch": 0.944914967138695, "grad_norm": 5.285477638244629, "learning_rate": 8.193579300431241e-05, "loss": 0.9051, "step": 13946 }, { "epoch": 0.9449827224066671, "grad_norm": 5.163026332855225, "learning_rate": 8.193442398521459e-05, "loss": 0.57, "step": 13947 }, { "epoch": 0.9450504776746392, "grad_norm": 5.176440715789795, "learning_rate": 8.193305496611678e-05, "loss": 0.7803, "step": 13948 }, { "epoch": 0.9451182329426113, "grad_norm": 6.866820335388184, "learning_rate": 8.193168594701897e-05, "loss": 0.7486, "step": 13949 }, { "epoch": 0.9451859882105834, "grad_norm": 6.827968597412109, "learning_rate": 8.193031692792115e-05, "loss": 0.7738, "step": 13950 }, { "epoch": 0.9452537434785555, "grad_norm": 6.111838340759277, "learning_rate": 8.192894790882333e-05, "loss": 0.8302, "step": 13951 }, { "epoch": 0.9453214987465275, "grad_norm": 6.111614227294922, "learning_rate": 8.192757888972552e-05, "loss": 0.8413, "step": 13952 }, { "epoch": 0.9453892540144996, "grad_norm": 5.463453769683838, "learning_rate": 8.19262098706277e-05, "loss": 0.7054, "step": 13953 }, { "epoch": 0.9454570092824717, "grad_norm": 6.099150657653809, "learning_rate": 8.192484085152988e-05, "loss": 0.596, "step": 13954 }, { "epoch": 0.9455247645504438, "grad_norm": 6.040791988372803, "learning_rate": 8.192347183243206e-05, "loss": 0.6796, "step": 13955 }, { "epoch": 0.9455925198184159, "grad_norm": 6.111145973205566, "learning_rate": 8.192210281333424e-05, "loss": 0.8444, "step": 13956 }, { "epoch": 0.945660275086388, "grad_norm": 6.970156192779541, "learning_rate": 8.192073379423644e-05, "loss": 0.5742, "step": 13957 }, { "epoch": 0.94572803035436, "grad_norm": 6.0487165451049805, "learning_rate": 8.191936477513862e-05, "loss": 0.8061, "step": 13958 }, { "epoch": 0.9457957856223321, "grad_norm": 4.807767868041992, "learning_rate": 8.19179957560408e-05, "loss": 0.6691, "step": 13959 }, { "epoch": 0.9458635408903042, "grad_norm": 5.062884330749512, "learning_rate": 8.191662673694298e-05, "loss": 0.9048, "step": 13960 }, { "epoch": 0.9459312961582763, "grad_norm": 6.347843647003174, "learning_rate": 8.191525771784517e-05, "loss": 0.8603, "step": 13961 }, { "epoch": 0.9459990514262484, "grad_norm": 5.537858009338379, "learning_rate": 8.191388869874735e-05, "loss": 0.8675, "step": 13962 }, { "epoch": 0.9460668066942205, "grad_norm": 5.778237819671631, "learning_rate": 8.191251967964953e-05, "loss": 0.5481, "step": 13963 }, { "epoch": 0.9461345619621926, "grad_norm": 5.969203472137451, "learning_rate": 8.191115066055171e-05, "loss": 0.7099, "step": 13964 }, { "epoch": 0.9462023172301647, "grad_norm": 7.126950263977051, "learning_rate": 8.190978164145389e-05, "loss": 0.8117, "step": 13965 }, { "epoch": 0.9462700724981368, "grad_norm": 5.3434624671936035, "learning_rate": 8.190841262235609e-05, "loss": 0.8133, "step": 13966 }, { "epoch": 0.9463378277661089, "grad_norm": 5.5889105796813965, "learning_rate": 8.190704360325827e-05, "loss": 0.8104, "step": 13967 }, { "epoch": 0.946405583034081, "grad_norm": 6.536361217498779, "learning_rate": 8.190567458416045e-05, "loss": 0.7897, "step": 13968 }, { "epoch": 0.9464733383020529, "grad_norm": 6.82296085357666, "learning_rate": 8.190430556506264e-05, "loss": 1.0064, "step": 13969 }, { "epoch": 0.946541093570025, "grad_norm": 6.4476470947265625, "learning_rate": 8.190293654596482e-05, "loss": 0.9211, "step": 13970 }, { "epoch": 0.9466088488379971, "grad_norm": 6.636000156402588, "learning_rate": 8.1901567526867e-05, "loss": 0.6633, "step": 13971 }, { "epoch": 0.9466766041059692, "grad_norm": 6.36086368560791, "learning_rate": 8.19001985077692e-05, "loss": 0.6275, "step": 13972 }, { "epoch": 0.9467443593739413, "grad_norm": 7.286365032196045, "learning_rate": 8.189882948867137e-05, "loss": 0.7431, "step": 13973 }, { "epoch": 0.9468121146419134, "grad_norm": 7.394494533538818, "learning_rate": 8.189746046957356e-05, "loss": 0.8701, "step": 13974 }, { "epoch": 0.9468798699098855, "grad_norm": 5.378866195678711, "learning_rate": 8.189609145047575e-05, "loss": 0.6846, "step": 13975 }, { "epoch": 0.9469476251778576, "grad_norm": 9.122882843017578, "learning_rate": 8.189472243137793e-05, "loss": 0.8023, "step": 13976 }, { "epoch": 0.9470153804458297, "grad_norm": 6.454582214355469, "learning_rate": 8.189335341228011e-05, "loss": 0.7631, "step": 13977 }, { "epoch": 0.9470831357138018, "grad_norm": 5.218212604522705, "learning_rate": 8.189198439318229e-05, "loss": 0.7752, "step": 13978 }, { "epoch": 0.9471508909817739, "grad_norm": 6.383564472198486, "learning_rate": 8.189061537408447e-05, "loss": 0.7355, "step": 13979 }, { "epoch": 0.9472186462497459, "grad_norm": 4.193008899688721, "learning_rate": 8.188924635498666e-05, "loss": 0.5955, "step": 13980 }, { "epoch": 0.947286401517718, "grad_norm": 6.7840423583984375, "learning_rate": 8.188787733588884e-05, "loss": 0.8695, "step": 13981 }, { "epoch": 0.9473541567856901, "grad_norm": 5.606986999511719, "learning_rate": 8.188650831679102e-05, "loss": 0.8062, "step": 13982 }, { "epoch": 0.9474219120536622, "grad_norm": 4.632526397705078, "learning_rate": 8.18851392976932e-05, "loss": 0.9447, "step": 13983 }, { "epoch": 0.9474896673216343, "grad_norm": 7.079931259155273, "learning_rate": 8.18837702785954e-05, "loss": 0.7606, "step": 13984 }, { "epoch": 0.9475574225896063, "grad_norm": 6.6071858406066895, "learning_rate": 8.188240125949758e-05, "loss": 1.0442, "step": 13985 }, { "epoch": 0.9476251778575784, "grad_norm": 6.782355308532715, "learning_rate": 8.188103224039976e-05, "loss": 0.6173, "step": 13986 }, { "epoch": 0.9476929331255505, "grad_norm": 6.116751194000244, "learning_rate": 8.187966322130194e-05, "loss": 0.7438, "step": 13987 }, { "epoch": 0.9477606883935226, "grad_norm": 6.912460803985596, "learning_rate": 8.187829420220412e-05, "loss": 0.7509, "step": 13988 }, { "epoch": 0.9478284436614947, "grad_norm": 6.737586498260498, "learning_rate": 8.187692518310631e-05, "loss": 0.8741, "step": 13989 }, { "epoch": 0.9478961989294667, "grad_norm": 4.6144938468933105, "learning_rate": 8.18755561640085e-05, "loss": 0.8468, "step": 13990 }, { "epoch": 0.9479639541974388, "grad_norm": 6.440502166748047, "learning_rate": 8.187418714491068e-05, "loss": 0.7353, "step": 13991 }, { "epoch": 0.9480317094654109, "grad_norm": 7.09966516494751, "learning_rate": 8.187281812581286e-05, "loss": 0.8539, "step": 13992 }, { "epoch": 0.948099464733383, "grad_norm": 7.213123321533203, "learning_rate": 8.187144910671505e-05, "loss": 0.5578, "step": 13993 }, { "epoch": 0.9481672200013551, "grad_norm": 5.926023006439209, "learning_rate": 8.187008008761723e-05, "loss": 0.7481, "step": 13994 }, { "epoch": 0.9482349752693272, "grad_norm": 6.6714768409729, "learning_rate": 8.186871106851941e-05, "loss": 0.8587, "step": 13995 }, { "epoch": 0.9483027305372993, "grad_norm": 6.985418319702148, "learning_rate": 8.186734204942159e-05, "loss": 0.9756, "step": 13996 }, { "epoch": 0.9483704858052714, "grad_norm": 4.339714050292969, "learning_rate": 8.186597303032377e-05, "loss": 0.7206, "step": 13997 }, { "epoch": 0.9484382410732435, "grad_norm": 5.789028644561768, "learning_rate": 8.186460401122596e-05, "loss": 0.7851, "step": 13998 }, { "epoch": 0.9485059963412156, "grad_norm": 4.326128005981445, "learning_rate": 8.186323499212814e-05, "loss": 0.6611, "step": 13999 }, { "epoch": 0.9485737516091877, "grad_norm": 4.751250267028809, "learning_rate": 8.186186597303033e-05, "loss": 0.791, "step": 14000 }, { "epoch": 0.9486415068771596, "grad_norm": 6.147752285003662, "learning_rate": 8.18604969539325e-05, "loss": 0.9781, "step": 14001 }, { "epoch": 0.9487092621451317, "grad_norm": 5.021920680999756, "learning_rate": 8.185912793483469e-05, "loss": 0.634, "step": 14002 }, { "epoch": 0.9487770174131038, "grad_norm": 5.617037773132324, "learning_rate": 8.185775891573688e-05, "loss": 0.6997, "step": 14003 }, { "epoch": 0.9488447726810759, "grad_norm": 5.281715393066406, "learning_rate": 8.185638989663906e-05, "loss": 0.6422, "step": 14004 }, { "epoch": 0.948912527949048, "grad_norm": 5.466105937957764, "learning_rate": 8.185502087754124e-05, "loss": 0.7793, "step": 14005 }, { "epoch": 0.9489802832170201, "grad_norm": 8.395241737365723, "learning_rate": 8.185365185844342e-05, "loss": 1.0168, "step": 14006 }, { "epoch": 0.9490480384849922, "grad_norm": 5.434301376342773, "learning_rate": 8.185228283934561e-05, "loss": 0.6043, "step": 14007 }, { "epoch": 0.9491157937529643, "grad_norm": 6.209643363952637, "learning_rate": 8.18509138202478e-05, "loss": 0.9309, "step": 14008 }, { "epoch": 0.9491835490209364, "grad_norm": 5.438107013702393, "learning_rate": 8.184954480114998e-05, "loss": 0.8226, "step": 14009 }, { "epoch": 0.9492513042889085, "grad_norm": 5.4521050453186035, "learning_rate": 8.184817578205216e-05, "loss": 0.9139, "step": 14010 }, { "epoch": 0.9493190595568806, "grad_norm": 6.864973068237305, "learning_rate": 8.184680676295434e-05, "loss": 0.9011, "step": 14011 }, { "epoch": 0.9493868148248527, "grad_norm": 3.955416202545166, "learning_rate": 8.184543774385653e-05, "loss": 0.6179, "step": 14012 }, { "epoch": 0.9494545700928247, "grad_norm": 4.496962547302246, "learning_rate": 8.184406872475871e-05, "loss": 0.7532, "step": 14013 }, { "epoch": 0.9495223253607968, "grad_norm": 7.178885459899902, "learning_rate": 8.184269970566089e-05, "loss": 0.8709, "step": 14014 }, { "epoch": 0.9495900806287689, "grad_norm": 8.725399017333984, "learning_rate": 8.184133068656308e-05, "loss": 0.6709, "step": 14015 }, { "epoch": 0.949657835896741, "grad_norm": 4.831404685974121, "learning_rate": 8.183996166746526e-05, "loss": 0.6221, "step": 14016 }, { "epoch": 0.949725591164713, "grad_norm": 5.273083209991455, "learning_rate": 8.183859264836745e-05, "loss": 0.6876, "step": 14017 }, { "epoch": 0.9497933464326851, "grad_norm": 5.9395833015441895, "learning_rate": 8.183722362926964e-05, "loss": 0.7032, "step": 14018 }, { "epoch": 0.9498611017006572, "grad_norm": 5.861425876617432, "learning_rate": 8.183585461017182e-05, "loss": 0.7697, "step": 14019 }, { "epoch": 0.9499288569686293, "grad_norm": 6.970915794372559, "learning_rate": 8.1834485591074e-05, "loss": 0.693, "step": 14020 }, { "epoch": 0.9499966122366014, "grad_norm": 5.096658229827881, "learning_rate": 8.18331165719762e-05, "loss": 0.9212, "step": 14021 }, { "epoch": 0.9500643675045735, "grad_norm": 6.328000068664551, "learning_rate": 8.183174755287837e-05, "loss": 0.7588, "step": 14022 }, { "epoch": 0.9501321227725456, "grad_norm": 5.18841552734375, "learning_rate": 8.183037853378055e-05, "loss": 0.57, "step": 14023 }, { "epoch": 0.9501998780405176, "grad_norm": 7.505560398101807, "learning_rate": 8.182900951468273e-05, "loss": 0.7265, "step": 14024 }, { "epoch": 0.9502676333084897, "grad_norm": 6.134244918823242, "learning_rate": 8.182764049558492e-05, "loss": 0.6735, "step": 14025 }, { "epoch": 0.9503353885764618, "grad_norm": 6.682973384857178, "learning_rate": 8.182627147648711e-05, "loss": 0.7814, "step": 14026 }, { "epoch": 0.9504031438444339, "grad_norm": 6.492305755615234, "learning_rate": 8.182490245738929e-05, "loss": 0.7248, "step": 14027 }, { "epoch": 0.950470899112406, "grad_norm": 4.836461067199707, "learning_rate": 8.182353343829147e-05, "loss": 0.5105, "step": 14028 }, { "epoch": 0.9505386543803781, "grad_norm": 6.350803375244141, "learning_rate": 8.182216441919365e-05, "loss": 0.9946, "step": 14029 }, { "epoch": 0.9506064096483502, "grad_norm": 6.154294490814209, "learning_rate": 8.182079540009584e-05, "loss": 0.6859, "step": 14030 }, { "epoch": 0.9506741649163223, "grad_norm": 5.958618640899658, "learning_rate": 8.181942638099802e-05, "loss": 0.8397, "step": 14031 }, { "epoch": 0.9507419201842944, "grad_norm": 5.353884696960449, "learning_rate": 8.18180573619002e-05, "loss": 0.8651, "step": 14032 }, { "epoch": 0.9508096754522665, "grad_norm": 4.889853000640869, "learning_rate": 8.181668834280238e-05, "loss": 0.7572, "step": 14033 }, { "epoch": 0.9508774307202384, "grad_norm": 6.175332069396973, "learning_rate": 8.181531932370457e-05, "loss": 0.9212, "step": 14034 }, { "epoch": 0.9509451859882105, "grad_norm": 6.263129711151123, "learning_rate": 8.181395030460676e-05, "loss": 0.6871, "step": 14035 }, { "epoch": 0.9510129412561826, "grad_norm": 5.999005317687988, "learning_rate": 8.181258128550894e-05, "loss": 0.8786, "step": 14036 }, { "epoch": 0.9510806965241547, "grad_norm": 5.486205577850342, "learning_rate": 8.181121226641112e-05, "loss": 0.7679, "step": 14037 }, { "epoch": 0.9511484517921268, "grad_norm": 4.255964756011963, "learning_rate": 8.18098432473133e-05, "loss": 0.5138, "step": 14038 }, { "epoch": 0.9512162070600989, "grad_norm": 7.037053108215332, "learning_rate": 8.18084742282155e-05, "loss": 0.8799, "step": 14039 }, { "epoch": 0.951283962328071, "grad_norm": 5.0967302322387695, "learning_rate": 8.180710520911767e-05, "loss": 0.6126, "step": 14040 }, { "epoch": 0.9513517175960431, "grad_norm": 4.5067458152771, "learning_rate": 8.180573619001985e-05, "loss": 0.5718, "step": 14041 }, { "epoch": 0.9514194728640152, "grad_norm": 4.694755554199219, "learning_rate": 8.180436717092204e-05, "loss": 0.712, "step": 14042 }, { "epoch": 0.9514872281319873, "grad_norm": 6.1123738288879395, "learning_rate": 8.180299815182422e-05, "loss": 0.7131, "step": 14043 }, { "epoch": 0.9515549833999594, "grad_norm": 7.102774143218994, "learning_rate": 8.180162913272641e-05, "loss": 0.6774, "step": 14044 }, { "epoch": 0.9516227386679315, "grad_norm": 5.857600212097168, "learning_rate": 8.180026011362859e-05, "loss": 0.7464, "step": 14045 }, { "epoch": 0.9516904939359035, "grad_norm": 5.622432708740234, "learning_rate": 8.179889109453077e-05, "loss": 0.6566, "step": 14046 }, { "epoch": 0.9517582492038756, "grad_norm": 8.488066673278809, "learning_rate": 8.179752207543295e-05, "loss": 0.9199, "step": 14047 }, { "epoch": 0.9518260044718477, "grad_norm": 7.23390531539917, "learning_rate": 8.179615305633514e-05, "loss": 0.884, "step": 14048 }, { "epoch": 0.9518937597398198, "grad_norm": 6.6131391525268555, "learning_rate": 8.179478403723732e-05, "loss": 1.0312, "step": 14049 }, { "epoch": 0.9519615150077918, "grad_norm": 5.167440891265869, "learning_rate": 8.17934150181395e-05, "loss": 0.6041, "step": 14050 }, { "epoch": 0.9520292702757639, "grad_norm": 6.33022403717041, "learning_rate": 8.179204599904169e-05, "loss": 0.8595, "step": 14051 }, { "epoch": 0.952097025543736, "grad_norm": 6.872222900390625, "learning_rate": 8.179067697994387e-05, "loss": 0.7336, "step": 14052 }, { "epoch": 0.9521647808117081, "grad_norm": 4.217645168304443, "learning_rate": 8.178930796084606e-05, "loss": 0.4555, "step": 14053 }, { "epoch": 0.9522325360796802, "grad_norm": 4.8638529777526855, "learning_rate": 8.178793894174824e-05, "loss": 0.6661, "step": 14054 }, { "epoch": 0.9523002913476523, "grad_norm": 7.580918788909912, "learning_rate": 8.178656992265042e-05, "loss": 0.7857, "step": 14055 }, { "epoch": 0.9523680466156244, "grad_norm": 6.182562351226807, "learning_rate": 8.17852009035526e-05, "loss": 0.7932, "step": 14056 }, { "epoch": 0.9524358018835964, "grad_norm": 7.604414939880371, "learning_rate": 8.178383188445478e-05, "loss": 0.5992, "step": 14057 }, { "epoch": 0.9525035571515685, "grad_norm": 6.542990684509277, "learning_rate": 8.178246286535697e-05, "loss": 0.888, "step": 14058 }, { "epoch": 0.9525713124195406, "grad_norm": 5.210031986236572, "learning_rate": 8.178109384625916e-05, "loss": 0.7931, "step": 14059 }, { "epoch": 0.9526390676875127, "grad_norm": 8.13759994506836, "learning_rate": 8.177972482716134e-05, "loss": 0.6912, "step": 14060 }, { "epoch": 0.9527068229554848, "grad_norm": 5.919729709625244, "learning_rate": 8.177835580806353e-05, "loss": 0.7952, "step": 14061 }, { "epoch": 0.9527745782234569, "grad_norm": 5.694857597351074, "learning_rate": 8.177698678896571e-05, "loss": 0.7651, "step": 14062 }, { "epoch": 0.952842333491429, "grad_norm": 6.005049705505371, "learning_rate": 8.177561776986789e-05, "loss": 0.6854, "step": 14063 }, { "epoch": 0.9529100887594011, "grad_norm": 7.230431079864502, "learning_rate": 8.177424875077008e-05, "loss": 0.815, "step": 14064 }, { "epoch": 0.9529778440273732, "grad_norm": 6.85237455368042, "learning_rate": 8.177287973167226e-05, "loss": 0.6984, "step": 14065 }, { "epoch": 0.9530455992953452, "grad_norm": 5.569474697113037, "learning_rate": 8.177151071257444e-05, "loss": 0.8195, "step": 14066 }, { "epoch": 0.9531133545633172, "grad_norm": 8.014307022094727, "learning_rate": 8.177014169347664e-05, "loss": 0.8865, "step": 14067 }, { "epoch": 0.9531811098312893, "grad_norm": 5.345339775085449, "learning_rate": 8.176877267437882e-05, "loss": 0.8422, "step": 14068 }, { "epoch": 0.9532488650992614, "grad_norm": 7.717896461486816, "learning_rate": 8.1767403655281e-05, "loss": 0.8969, "step": 14069 }, { "epoch": 0.9533166203672335, "grad_norm": 5.41009521484375, "learning_rate": 8.176603463618318e-05, "loss": 0.9792, "step": 14070 }, { "epoch": 0.9533843756352056, "grad_norm": 4.825246334075928, "learning_rate": 8.176466561708537e-05, "loss": 0.5923, "step": 14071 }, { "epoch": 0.9534521309031777, "grad_norm": 4.686081409454346, "learning_rate": 8.176329659798755e-05, "loss": 0.8116, "step": 14072 }, { "epoch": 0.9535198861711498, "grad_norm": 4.988349914550781, "learning_rate": 8.176192757888973e-05, "loss": 0.8101, "step": 14073 }, { "epoch": 0.9535876414391219, "grad_norm": 6.294574737548828, "learning_rate": 8.176055855979191e-05, "loss": 0.7203, "step": 14074 }, { "epoch": 0.953655396707094, "grad_norm": 4.857511520385742, "learning_rate": 8.17591895406941e-05, "loss": 0.8133, "step": 14075 }, { "epoch": 0.9537231519750661, "grad_norm": 6.600233554840088, "learning_rate": 8.175782052159629e-05, "loss": 0.9022, "step": 14076 }, { "epoch": 0.9537909072430382, "grad_norm": 6.04002046585083, "learning_rate": 8.175645150249847e-05, "loss": 0.7713, "step": 14077 }, { "epoch": 0.9538586625110103, "grad_norm": 4.784701824188232, "learning_rate": 8.175508248340065e-05, "loss": 0.5853, "step": 14078 }, { "epoch": 0.9539264177789823, "grad_norm": 5.057199954986572, "learning_rate": 8.175371346430283e-05, "loss": 0.7118, "step": 14079 }, { "epoch": 0.9539941730469544, "grad_norm": 6.518017768859863, "learning_rate": 8.175234444520501e-05, "loss": 0.8091, "step": 14080 }, { "epoch": 0.9540619283149265, "grad_norm": 6.6895575523376465, "learning_rate": 8.17509754261072e-05, "loss": 0.8893, "step": 14081 }, { "epoch": 0.9541296835828986, "grad_norm": 5.571439743041992, "learning_rate": 8.174960640700938e-05, "loss": 0.6693, "step": 14082 }, { "epoch": 0.9541974388508706, "grad_norm": 5.339274883270264, "learning_rate": 8.174823738791156e-05, "loss": 0.5533, "step": 14083 }, { "epoch": 0.9542651941188427, "grad_norm": 6.4230523109436035, "learning_rate": 8.174686836881374e-05, "loss": 0.6819, "step": 14084 }, { "epoch": 0.9543329493868148, "grad_norm": 5.337852954864502, "learning_rate": 8.174549934971594e-05, "loss": 0.5664, "step": 14085 }, { "epoch": 0.9544007046547869, "grad_norm": 5.271894454956055, "learning_rate": 8.174413033061812e-05, "loss": 0.678, "step": 14086 }, { "epoch": 0.954468459922759, "grad_norm": 5.684970855712891, "learning_rate": 8.17427613115203e-05, "loss": 0.8186, "step": 14087 }, { "epoch": 0.9545362151907311, "grad_norm": 8.407366752624512, "learning_rate": 8.174139229242248e-05, "loss": 0.7385, "step": 14088 }, { "epoch": 0.9546039704587032, "grad_norm": 5.270580768585205, "learning_rate": 8.174002327332466e-05, "loss": 0.7859, "step": 14089 }, { "epoch": 0.9546717257266752, "grad_norm": 6.393465518951416, "learning_rate": 8.173865425422685e-05, "loss": 0.7205, "step": 14090 }, { "epoch": 0.9547394809946473, "grad_norm": 6.487541675567627, "learning_rate": 8.173728523512903e-05, "loss": 0.9124, "step": 14091 }, { "epoch": 0.9548072362626194, "grad_norm": 5.790227890014648, "learning_rate": 8.173591621603121e-05, "loss": 0.8357, "step": 14092 }, { "epoch": 0.9548749915305915, "grad_norm": 6.707381725311279, "learning_rate": 8.17345471969334e-05, "loss": 0.8281, "step": 14093 }, { "epoch": 0.9549427467985636, "grad_norm": 5.875377655029297, "learning_rate": 8.173317817783559e-05, "loss": 0.917, "step": 14094 }, { "epoch": 0.9550105020665357, "grad_norm": 9.384751319885254, "learning_rate": 8.173180915873777e-05, "loss": 0.8046, "step": 14095 }, { "epoch": 0.9550782573345078, "grad_norm": 4.465388298034668, "learning_rate": 8.173044013963995e-05, "loss": 0.4959, "step": 14096 }, { "epoch": 0.9551460126024799, "grad_norm": 5.929595947265625, "learning_rate": 8.172907112054213e-05, "loss": 0.655, "step": 14097 }, { "epoch": 0.955213767870452, "grad_norm": 6.126537322998047, "learning_rate": 8.172770210144431e-05, "loss": 0.8669, "step": 14098 }, { "epoch": 0.955281523138424, "grad_norm": 4.894435405731201, "learning_rate": 8.17263330823465e-05, "loss": 0.8827, "step": 14099 }, { "epoch": 0.955349278406396, "grad_norm": 5.8009490966796875, "learning_rate": 8.172496406324868e-05, "loss": 0.6743, "step": 14100 }, { "epoch": 0.9554170336743681, "grad_norm": 6.6259965896606445, "learning_rate": 8.172359504415086e-05, "loss": 0.7902, "step": 14101 }, { "epoch": 0.9554847889423402, "grad_norm": 8.393582344055176, "learning_rate": 8.172222602505305e-05, "loss": 0.7179, "step": 14102 }, { "epoch": 0.9555525442103123, "grad_norm": 5.586965560913086, "learning_rate": 8.172085700595524e-05, "loss": 0.8491, "step": 14103 }, { "epoch": 0.9556202994782844, "grad_norm": 6.042308807373047, "learning_rate": 8.171948798685742e-05, "loss": 0.5834, "step": 14104 }, { "epoch": 0.9556880547462565, "grad_norm": 10.39201831817627, "learning_rate": 8.17181189677596e-05, "loss": 0.8719, "step": 14105 }, { "epoch": 0.9557558100142286, "grad_norm": 6.800583839416504, "learning_rate": 8.171674994866178e-05, "loss": 0.6832, "step": 14106 }, { "epoch": 0.9558235652822007, "grad_norm": 4.868492603302002, "learning_rate": 8.171538092956396e-05, "loss": 0.8444, "step": 14107 }, { "epoch": 0.9558913205501728, "grad_norm": 6.521236419677734, "learning_rate": 8.171401191046615e-05, "loss": 0.6928, "step": 14108 }, { "epoch": 0.9559590758181449, "grad_norm": 6.01854944229126, "learning_rate": 8.171264289136833e-05, "loss": 0.9047, "step": 14109 }, { "epoch": 0.956026831086117, "grad_norm": 5.620432376861572, "learning_rate": 8.171127387227052e-05, "loss": 0.625, "step": 14110 }, { "epoch": 0.9560945863540891, "grad_norm": 6.074321269989014, "learning_rate": 8.170990485317271e-05, "loss": 0.8075, "step": 14111 }, { "epoch": 0.9561623416220612, "grad_norm": 5.468603134155273, "learning_rate": 8.170853583407489e-05, "loss": 0.9023, "step": 14112 }, { "epoch": 0.9562300968900332, "grad_norm": 7.003314971923828, "learning_rate": 8.170716681497707e-05, "loss": 1.0302, "step": 14113 }, { "epoch": 0.9562978521580053, "grad_norm": 5.144251346588135, "learning_rate": 8.170579779587926e-05, "loss": 0.7184, "step": 14114 }, { "epoch": 0.9563656074259773, "grad_norm": 5.8166823387146, "learning_rate": 8.170442877678144e-05, "loss": 0.8568, "step": 14115 }, { "epoch": 0.9564333626939494, "grad_norm": 5.200114727020264, "learning_rate": 8.170305975768362e-05, "loss": 0.8245, "step": 14116 }, { "epoch": 0.9565011179619215, "grad_norm": 6.467376708984375, "learning_rate": 8.170169073858582e-05, "loss": 0.8682, "step": 14117 }, { "epoch": 0.9565688732298936, "grad_norm": 5.500349998474121, "learning_rate": 8.1700321719488e-05, "loss": 0.6647, "step": 14118 }, { "epoch": 0.9566366284978657, "grad_norm": 5.1452765464782715, "learning_rate": 8.169895270039018e-05, "loss": 0.7039, "step": 14119 }, { "epoch": 0.9567043837658378, "grad_norm": 7.137358665466309, "learning_rate": 8.169758368129236e-05, "loss": 0.7246, "step": 14120 }, { "epoch": 0.9567721390338099, "grad_norm": 5.405989170074463, "learning_rate": 8.169621466219454e-05, "loss": 0.8214, "step": 14121 }, { "epoch": 0.956839894301782, "grad_norm": 6.709090232849121, "learning_rate": 8.169484564309673e-05, "loss": 0.8312, "step": 14122 }, { "epoch": 0.956907649569754, "grad_norm": 5.676616668701172, "learning_rate": 8.169347662399891e-05, "loss": 0.9024, "step": 14123 }, { "epoch": 0.9569754048377261, "grad_norm": 4.852606296539307, "learning_rate": 8.16921076049011e-05, "loss": 0.7261, "step": 14124 }, { "epoch": 0.9570431601056982, "grad_norm": 6.199010372161865, "learning_rate": 8.169073858580327e-05, "loss": 0.7481, "step": 14125 }, { "epoch": 0.9571109153736703, "grad_norm": 5.474722385406494, "learning_rate": 8.168936956670547e-05, "loss": 0.7711, "step": 14126 }, { "epoch": 0.9571786706416424, "grad_norm": 6.915562152862549, "learning_rate": 8.168800054760765e-05, "loss": 0.9033, "step": 14127 }, { "epoch": 0.9572464259096145, "grad_norm": 6.63683557510376, "learning_rate": 8.168663152850983e-05, "loss": 0.9166, "step": 14128 }, { "epoch": 0.9573141811775866, "grad_norm": 5.392688751220703, "learning_rate": 8.168526250941201e-05, "loss": 0.7428, "step": 14129 }, { "epoch": 0.9573819364455587, "grad_norm": 7.527129173278809, "learning_rate": 8.168389349031419e-05, "loss": 0.7533, "step": 14130 }, { "epoch": 0.9574496917135308, "grad_norm": 7.458190441131592, "learning_rate": 8.168252447121638e-05, "loss": 0.9663, "step": 14131 }, { "epoch": 0.9575174469815028, "grad_norm": 5.647728443145752, "learning_rate": 8.168115545211856e-05, "loss": 0.8357, "step": 14132 }, { "epoch": 0.9575852022494749, "grad_norm": 6.73082971572876, "learning_rate": 8.167978643302074e-05, "loss": 0.9228, "step": 14133 }, { "epoch": 0.957652957517447, "grad_norm": 5.708244800567627, "learning_rate": 8.167841741392292e-05, "loss": 0.742, "step": 14134 }, { "epoch": 0.957720712785419, "grad_norm": 5.192925453186035, "learning_rate": 8.16770483948251e-05, "loss": 0.5755, "step": 14135 }, { "epoch": 0.9577884680533911, "grad_norm": 5.057267665863037, "learning_rate": 8.16756793757273e-05, "loss": 0.7179, "step": 14136 }, { "epoch": 0.9578562233213632, "grad_norm": 5.001532554626465, "learning_rate": 8.167431035662948e-05, "loss": 0.7356, "step": 14137 }, { "epoch": 0.9579239785893353, "grad_norm": 5.9870781898498535, "learning_rate": 8.167294133753166e-05, "loss": 0.6898, "step": 14138 }, { "epoch": 0.9579917338573074, "grad_norm": 6.193863868713379, "learning_rate": 8.167157231843384e-05, "loss": 0.7337, "step": 14139 }, { "epoch": 0.9580594891252795, "grad_norm": 5.425492286682129, "learning_rate": 8.167020329933603e-05, "loss": 0.5645, "step": 14140 }, { "epoch": 0.9581272443932516, "grad_norm": 6.2710700035095215, "learning_rate": 8.166883428023821e-05, "loss": 0.6244, "step": 14141 }, { "epoch": 0.9581949996612237, "grad_norm": 6.745750904083252, "learning_rate": 8.16674652611404e-05, "loss": 0.7168, "step": 14142 }, { "epoch": 0.9582627549291958, "grad_norm": 4.833362579345703, "learning_rate": 8.166609624204257e-05, "loss": 0.6343, "step": 14143 }, { "epoch": 0.9583305101971679, "grad_norm": 7.704063415527344, "learning_rate": 8.166472722294476e-05, "loss": 0.8166, "step": 14144 }, { "epoch": 0.95839826546514, "grad_norm": 4.506795406341553, "learning_rate": 8.166335820384695e-05, "loss": 0.7254, "step": 14145 }, { "epoch": 0.958466020733112, "grad_norm": 4.970558166503906, "learning_rate": 8.166198918474913e-05, "loss": 0.6922, "step": 14146 }, { "epoch": 0.9585337760010841, "grad_norm": 6.441205024719238, "learning_rate": 8.166062016565131e-05, "loss": 0.8758, "step": 14147 }, { "epoch": 0.9586015312690561, "grad_norm": 5.769437789916992, "learning_rate": 8.165925114655349e-05, "loss": 0.6237, "step": 14148 }, { "epoch": 0.9586692865370282, "grad_norm": 5.401442527770996, "learning_rate": 8.165788212745568e-05, "loss": 1.0237, "step": 14149 }, { "epoch": 0.9587370418050003, "grad_norm": 6.560751438140869, "learning_rate": 8.165651310835786e-05, "loss": 1.0011, "step": 14150 }, { "epoch": 0.9588047970729724, "grad_norm": 5.372631072998047, "learning_rate": 8.165514408926004e-05, "loss": 0.787, "step": 14151 }, { "epoch": 0.9588725523409445, "grad_norm": 4.6542558670043945, "learning_rate": 8.165377507016222e-05, "loss": 0.6897, "step": 14152 }, { "epoch": 0.9589403076089166, "grad_norm": 6.8552141189575195, "learning_rate": 8.16524060510644e-05, "loss": 0.9957, "step": 14153 }, { "epoch": 0.9590080628768887, "grad_norm": 6.167290687561035, "learning_rate": 8.16510370319666e-05, "loss": 0.7579, "step": 14154 }, { "epoch": 0.9590758181448608, "grad_norm": 4.993210315704346, "learning_rate": 8.164966801286878e-05, "loss": 0.8123, "step": 14155 }, { "epoch": 0.9591435734128329, "grad_norm": 6.3289713859558105, "learning_rate": 8.164829899377096e-05, "loss": 0.7427, "step": 14156 }, { "epoch": 0.9592113286808049, "grad_norm": 4.5227837562561035, "learning_rate": 8.164692997467315e-05, "loss": 0.7607, "step": 14157 }, { "epoch": 0.959279083948777, "grad_norm": 5.881022930145264, "learning_rate": 8.164556095557533e-05, "loss": 0.8952, "step": 14158 }, { "epoch": 0.9593468392167491, "grad_norm": 5.290414810180664, "learning_rate": 8.164419193647751e-05, "loss": 0.7087, "step": 14159 }, { "epoch": 0.9594145944847212, "grad_norm": 6.688441276550293, "learning_rate": 8.164282291737971e-05, "loss": 0.635, "step": 14160 }, { "epoch": 0.9594823497526933, "grad_norm": 5.5742058753967285, "learning_rate": 8.164145389828189e-05, "loss": 0.5194, "step": 14161 }, { "epoch": 0.9595501050206654, "grad_norm": 7.248497486114502, "learning_rate": 8.164008487918407e-05, "loss": 0.603, "step": 14162 }, { "epoch": 0.9596178602886375, "grad_norm": 5.80116081237793, "learning_rate": 8.163871586008626e-05, "loss": 0.7965, "step": 14163 }, { "epoch": 0.9596856155566095, "grad_norm": 7.90059232711792, "learning_rate": 8.163734684098844e-05, "loss": 1.0141, "step": 14164 }, { "epoch": 0.9597533708245816, "grad_norm": 4.742366790771484, "learning_rate": 8.163597782189062e-05, "loss": 0.8223, "step": 14165 }, { "epoch": 0.9598211260925537, "grad_norm": 7.762453079223633, "learning_rate": 8.16346088027928e-05, "loss": 0.7047, "step": 14166 }, { "epoch": 0.9598888813605257, "grad_norm": 5.143554210662842, "learning_rate": 8.163323978369498e-05, "loss": 0.7639, "step": 14167 }, { "epoch": 0.9599566366284978, "grad_norm": 6.1197285652160645, "learning_rate": 8.163187076459718e-05, "loss": 0.6587, "step": 14168 }, { "epoch": 0.9600243918964699, "grad_norm": 5.0258049964904785, "learning_rate": 8.163050174549936e-05, "loss": 0.576, "step": 14169 }, { "epoch": 0.960092147164442, "grad_norm": 4.709690570831299, "learning_rate": 8.162913272640154e-05, "loss": 0.5911, "step": 14170 }, { "epoch": 0.9601599024324141, "grad_norm": 5.788050174713135, "learning_rate": 8.162776370730372e-05, "loss": 0.7834, "step": 14171 }, { "epoch": 0.9602276577003862, "grad_norm": 5.154922008514404, "learning_rate": 8.162639468820591e-05, "loss": 0.5965, "step": 14172 }, { "epoch": 0.9602954129683583, "grad_norm": 8.04469108581543, "learning_rate": 8.16250256691081e-05, "loss": 0.722, "step": 14173 }, { "epoch": 0.9603631682363304, "grad_norm": 6.361427307128906, "learning_rate": 8.162365665001027e-05, "loss": 0.478, "step": 14174 }, { "epoch": 0.9604309235043025, "grad_norm": 6.577165603637695, "learning_rate": 8.162228763091245e-05, "loss": 0.9066, "step": 14175 }, { "epoch": 0.9604986787722746, "grad_norm": 6.255192756652832, "learning_rate": 8.162091861181463e-05, "loss": 0.8372, "step": 14176 }, { "epoch": 0.9605664340402467, "grad_norm": 7.014744758605957, "learning_rate": 8.161954959271683e-05, "loss": 0.9053, "step": 14177 }, { "epoch": 0.9606341893082188, "grad_norm": 5.058319091796875, "learning_rate": 8.161818057361901e-05, "loss": 0.7486, "step": 14178 }, { "epoch": 0.9607019445761908, "grad_norm": 6.558164119720459, "learning_rate": 8.161681155452119e-05, "loss": 0.8439, "step": 14179 }, { "epoch": 0.9607696998441629, "grad_norm": 6.055545330047607, "learning_rate": 8.161544253542337e-05, "loss": 0.6836, "step": 14180 }, { "epoch": 0.9608374551121349, "grad_norm": 6.022161483764648, "learning_rate": 8.161407351632556e-05, "loss": 0.7918, "step": 14181 }, { "epoch": 0.960905210380107, "grad_norm": 5.69798469543457, "learning_rate": 8.161270449722774e-05, "loss": 0.9815, "step": 14182 }, { "epoch": 0.9609729656480791, "grad_norm": 4.769881248474121, "learning_rate": 8.161133547812992e-05, "loss": 0.7156, "step": 14183 }, { "epoch": 0.9610407209160512, "grad_norm": 5.956376552581787, "learning_rate": 8.16099664590321e-05, "loss": 0.8751, "step": 14184 }, { "epoch": 0.9611084761840233, "grad_norm": 5.9965128898620605, "learning_rate": 8.160859743993428e-05, "loss": 0.808, "step": 14185 }, { "epoch": 0.9611762314519954, "grad_norm": 5.199033737182617, "learning_rate": 8.160722842083648e-05, "loss": 0.6829, "step": 14186 }, { "epoch": 0.9612439867199675, "grad_norm": 6.520019054412842, "learning_rate": 8.160585940173866e-05, "loss": 0.8844, "step": 14187 }, { "epoch": 0.9613117419879396, "grad_norm": 5.944149017333984, "learning_rate": 8.160449038264084e-05, "loss": 0.7207, "step": 14188 }, { "epoch": 0.9613794972559117, "grad_norm": 5.289858818054199, "learning_rate": 8.160312136354302e-05, "loss": 0.7532, "step": 14189 }, { "epoch": 0.9614472525238837, "grad_norm": 5.0533223152160645, "learning_rate": 8.16017523444452e-05, "loss": 0.9016, "step": 14190 }, { "epoch": 0.9615150077918558, "grad_norm": 5.705595016479492, "learning_rate": 8.16003833253474e-05, "loss": 0.6944, "step": 14191 }, { "epoch": 0.9615827630598279, "grad_norm": 5.3832292556762695, "learning_rate": 8.159901430624957e-05, "loss": 0.6348, "step": 14192 }, { "epoch": 0.9616505183278, "grad_norm": 5.7924041748046875, "learning_rate": 8.159764528715175e-05, "loss": 0.7927, "step": 14193 }, { "epoch": 0.9617182735957721, "grad_norm": 5.289419174194336, "learning_rate": 8.159627626805393e-05, "loss": 0.7989, "step": 14194 }, { "epoch": 0.9617860288637442, "grad_norm": 7.650732040405273, "learning_rate": 8.159490724895613e-05, "loss": 0.82, "step": 14195 }, { "epoch": 0.9618537841317163, "grad_norm": 6.727295398712158, "learning_rate": 8.159353822985831e-05, "loss": 0.7943, "step": 14196 }, { "epoch": 0.9619215393996883, "grad_norm": 5.865251064300537, "learning_rate": 8.159216921076049e-05, "loss": 0.8241, "step": 14197 }, { "epoch": 0.9619892946676604, "grad_norm": 6.261574745178223, "learning_rate": 8.159080019166267e-05, "loss": 0.8547, "step": 14198 }, { "epoch": 0.9620570499356325, "grad_norm": 5.138889789581299, "learning_rate": 8.158943117256485e-05, "loss": 0.806, "step": 14199 }, { "epoch": 0.9621248052036045, "grad_norm": 6.540297508239746, "learning_rate": 8.158806215346704e-05, "loss": 0.783, "step": 14200 }, { "epoch": 0.9621925604715766, "grad_norm": 5.47922945022583, "learning_rate": 8.158669313436922e-05, "loss": 0.9455, "step": 14201 }, { "epoch": 0.9622603157395487, "grad_norm": 4.53643798828125, "learning_rate": 8.15853241152714e-05, "loss": 0.6482, "step": 14202 }, { "epoch": 0.9623280710075208, "grad_norm": 7.245009422302246, "learning_rate": 8.15839550961736e-05, "loss": 1.0062, "step": 14203 }, { "epoch": 0.9623958262754929, "grad_norm": 5.578246116638184, "learning_rate": 8.158258607707578e-05, "loss": 0.7564, "step": 14204 }, { "epoch": 0.962463581543465, "grad_norm": 5.920526027679443, "learning_rate": 8.158121705797796e-05, "loss": 0.6843, "step": 14205 }, { "epoch": 0.9625313368114371, "grad_norm": 6.317756652832031, "learning_rate": 8.157984803888015e-05, "loss": 0.8409, "step": 14206 }, { "epoch": 0.9625990920794092, "grad_norm": 7.818577766418457, "learning_rate": 8.157847901978233e-05, "loss": 0.8203, "step": 14207 }, { "epoch": 0.9626668473473813, "grad_norm": 6.75308084487915, "learning_rate": 8.157711000068451e-05, "loss": 0.6391, "step": 14208 }, { "epoch": 0.9627346026153534, "grad_norm": 8.25421142578125, "learning_rate": 8.157574098158671e-05, "loss": 0.7555, "step": 14209 }, { "epoch": 0.9628023578833255, "grad_norm": 7.660693168640137, "learning_rate": 8.157437196248889e-05, "loss": 0.9737, "step": 14210 }, { "epoch": 0.9628701131512976, "grad_norm": 5.611537456512451, "learning_rate": 8.157300294339107e-05, "loss": 0.835, "step": 14211 }, { "epoch": 0.9629378684192696, "grad_norm": 6.340275287628174, "learning_rate": 8.157163392429325e-05, "loss": 0.6979, "step": 14212 }, { "epoch": 0.9630056236872416, "grad_norm": 7.211668968200684, "learning_rate": 8.157026490519543e-05, "loss": 0.8135, "step": 14213 }, { "epoch": 0.9630733789552137, "grad_norm": 7.507893085479736, "learning_rate": 8.156889588609762e-05, "loss": 1.0498, "step": 14214 }, { "epoch": 0.9631411342231858, "grad_norm": 6.938470840454102, "learning_rate": 8.15675268669998e-05, "loss": 0.6888, "step": 14215 }, { "epoch": 0.9632088894911579, "grad_norm": 6.91562557220459, "learning_rate": 8.156615784790198e-05, "loss": 0.6908, "step": 14216 }, { "epoch": 0.96327664475913, "grad_norm": 5.775163650512695, "learning_rate": 8.156478882880416e-05, "loss": 0.7053, "step": 14217 }, { "epoch": 0.9633444000271021, "grad_norm": 6.454747676849365, "learning_rate": 8.156341980970636e-05, "loss": 0.7105, "step": 14218 }, { "epoch": 0.9634121552950742, "grad_norm": 6.036716461181641, "learning_rate": 8.156205079060854e-05, "loss": 1.0272, "step": 14219 }, { "epoch": 0.9634799105630463, "grad_norm": 5.749178886413574, "learning_rate": 8.156068177151072e-05, "loss": 0.8823, "step": 14220 }, { "epoch": 0.9635476658310184, "grad_norm": 6.171823978424072, "learning_rate": 8.15593127524129e-05, "loss": 0.6209, "step": 14221 }, { "epoch": 0.9636154210989905, "grad_norm": 6.761941432952881, "learning_rate": 8.155794373331508e-05, "loss": 0.7299, "step": 14222 }, { "epoch": 0.9636831763669625, "grad_norm": 4.620570659637451, "learning_rate": 8.155657471421727e-05, "loss": 0.9088, "step": 14223 }, { "epoch": 0.9637509316349346, "grad_norm": 5.813077449798584, "learning_rate": 8.155520569511945e-05, "loss": 0.7737, "step": 14224 }, { "epoch": 0.9638186869029067, "grad_norm": 6.989836692810059, "learning_rate": 8.155383667602163e-05, "loss": 1.0168, "step": 14225 }, { "epoch": 0.9638864421708788, "grad_norm": 8.76766586303711, "learning_rate": 8.155246765692381e-05, "loss": 0.7488, "step": 14226 }, { "epoch": 0.9639541974388509, "grad_norm": 6.669302940368652, "learning_rate": 8.155109863782601e-05, "loss": 0.8634, "step": 14227 }, { "epoch": 0.964021952706823, "grad_norm": 6.353033065795898, "learning_rate": 8.154972961872819e-05, "loss": 0.8018, "step": 14228 }, { "epoch": 0.964089707974795, "grad_norm": 7.134089946746826, "learning_rate": 8.154836059963037e-05, "loss": 0.7599, "step": 14229 }, { "epoch": 0.9641574632427671, "grad_norm": 5.467618465423584, "learning_rate": 8.154699158053255e-05, "loss": 0.7725, "step": 14230 }, { "epoch": 0.9642252185107392, "grad_norm": 7.044497489929199, "learning_rate": 8.154562256143473e-05, "loss": 0.9365, "step": 14231 }, { "epoch": 0.9642929737787113, "grad_norm": 5.421668529510498, "learning_rate": 8.154425354233692e-05, "loss": 0.7032, "step": 14232 }, { "epoch": 0.9643607290466834, "grad_norm": 7.38834285736084, "learning_rate": 8.15428845232391e-05, "loss": 0.8996, "step": 14233 }, { "epoch": 0.9644284843146554, "grad_norm": 6.052585124969482, "learning_rate": 8.154151550414128e-05, "loss": 0.7436, "step": 14234 }, { "epoch": 0.9644962395826275, "grad_norm": 5.307525634765625, "learning_rate": 8.154014648504346e-05, "loss": 0.6881, "step": 14235 }, { "epoch": 0.9645639948505996, "grad_norm": 5.4196062088012695, "learning_rate": 8.153877746594566e-05, "loss": 0.8157, "step": 14236 }, { "epoch": 0.9646317501185717, "grad_norm": 7.922184467315674, "learning_rate": 8.153740844684784e-05, "loss": 0.7146, "step": 14237 }, { "epoch": 0.9646995053865438, "grad_norm": 6.831099510192871, "learning_rate": 8.153603942775002e-05, "loss": 0.7294, "step": 14238 }, { "epoch": 0.9647672606545159, "grad_norm": 4.776399612426758, "learning_rate": 8.15346704086522e-05, "loss": 0.6228, "step": 14239 }, { "epoch": 0.964835015922488, "grad_norm": 6.230729103088379, "learning_rate": 8.153330138955438e-05, "loss": 0.8572, "step": 14240 }, { "epoch": 0.9649027711904601, "grad_norm": 5.276001453399658, "learning_rate": 8.153193237045657e-05, "loss": 0.7314, "step": 14241 }, { "epoch": 0.9649705264584322, "grad_norm": 7.109437465667725, "learning_rate": 8.153056335135875e-05, "loss": 1.0352, "step": 14242 }, { "epoch": 0.9650382817264043, "grad_norm": 7.070680141448975, "learning_rate": 8.152919433226093e-05, "loss": 0.8926, "step": 14243 }, { "epoch": 0.9651060369943764, "grad_norm": 6.073431015014648, "learning_rate": 8.152782531316311e-05, "loss": 0.7816, "step": 14244 }, { "epoch": 0.9651737922623485, "grad_norm": 8.69691276550293, "learning_rate": 8.15264562940653e-05, "loss": 0.9329, "step": 14245 }, { "epoch": 0.9652415475303204, "grad_norm": 5.673532962799072, "learning_rate": 8.152508727496749e-05, "loss": 0.7321, "step": 14246 }, { "epoch": 0.9653093027982925, "grad_norm": 5.062224864959717, "learning_rate": 8.152371825586967e-05, "loss": 0.7234, "step": 14247 }, { "epoch": 0.9653770580662646, "grad_norm": 7.115236282348633, "learning_rate": 8.152234923677185e-05, "loss": 0.6885, "step": 14248 }, { "epoch": 0.9654448133342367, "grad_norm": 5.9798173904418945, "learning_rate": 8.152098021767404e-05, "loss": 0.8179, "step": 14249 }, { "epoch": 0.9655125686022088, "grad_norm": 6.661346435546875, "learning_rate": 8.151961119857622e-05, "loss": 0.7467, "step": 14250 }, { "epoch": 0.9655803238701809, "grad_norm": 6.935898780822754, "learning_rate": 8.15182421794784e-05, "loss": 0.6645, "step": 14251 }, { "epoch": 0.965648079138153, "grad_norm": 4.401814937591553, "learning_rate": 8.15168731603806e-05, "loss": 0.5323, "step": 14252 }, { "epoch": 0.9657158344061251, "grad_norm": 5.884891033172607, "learning_rate": 8.151550414128278e-05, "loss": 0.7988, "step": 14253 }, { "epoch": 0.9657835896740972, "grad_norm": 5.859108924865723, "learning_rate": 8.151413512218496e-05, "loss": 0.7108, "step": 14254 }, { "epoch": 0.9658513449420693, "grad_norm": 6.580816745758057, "learning_rate": 8.151276610308715e-05, "loss": 0.8701, "step": 14255 }, { "epoch": 0.9659191002100413, "grad_norm": 5.882786750793457, "learning_rate": 8.151139708398933e-05, "loss": 0.735, "step": 14256 }, { "epoch": 0.9659868554780134, "grad_norm": 6.60660982131958, "learning_rate": 8.151002806489151e-05, "loss": 0.7883, "step": 14257 }, { "epoch": 0.9660546107459855, "grad_norm": 6.070260047912598, "learning_rate": 8.15086590457937e-05, "loss": 0.763, "step": 14258 }, { "epoch": 0.9661223660139576, "grad_norm": 5.992532730102539, "learning_rate": 8.150729002669589e-05, "loss": 1.0009, "step": 14259 }, { "epoch": 0.9661901212819297, "grad_norm": 5.648770332336426, "learning_rate": 8.150592100759807e-05, "loss": 0.6503, "step": 14260 }, { "epoch": 0.9662578765499018, "grad_norm": 7.13828706741333, "learning_rate": 8.150455198850025e-05, "loss": 0.6251, "step": 14261 }, { "epoch": 0.9663256318178738, "grad_norm": 5.505036354064941, "learning_rate": 8.150318296940243e-05, "loss": 0.5837, "step": 14262 }, { "epoch": 0.9663933870858459, "grad_norm": 7.71685791015625, "learning_rate": 8.150181395030461e-05, "loss": 0.6676, "step": 14263 }, { "epoch": 0.966461142353818, "grad_norm": 5.185730934143066, "learning_rate": 8.15004449312068e-05, "loss": 0.6651, "step": 14264 }, { "epoch": 0.9665288976217901, "grad_norm": 6.014042854309082, "learning_rate": 8.149907591210898e-05, "loss": 0.6928, "step": 14265 }, { "epoch": 0.9665966528897622, "grad_norm": 6.797776699066162, "learning_rate": 8.149770689301116e-05, "loss": 0.6754, "step": 14266 }, { "epoch": 0.9666644081577342, "grad_norm": 4.643877029418945, "learning_rate": 8.149633787391334e-05, "loss": 0.4717, "step": 14267 }, { "epoch": 0.9667321634257063, "grad_norm": 5.930227756500244, "learning_rate": 8.149496885481552e-05, "loss": 0.6709, "step": 14268 }, { "epoch": 0.9667999186936784, "grad_norm": 6.046914100646973, "learning_rate": 8.149359983571772e-05, "loss": 0.7147, "step": 14269 }, { "epoch": 0.9668676739616505, "grad_norm": 6.127531051635742, "learning_rate": 8.14922308166199e-05, "loss": 0.6944, "step": 14270 }, { "epoch": 0.9669354292296226, "grad_norm": 7.445454120635986, "learning_rate": 8.149086179752208e-05, "loss": 0.7084, "step": 14271 }, { "epoch": 0.9670031844975947, "grad_norm": 5.6586995124816895, "learning_rate": 8.148949277842426e-05, "loss": 0.7989, "step": 14272 }, { "epoch": 0.9670709397655668, "grad_norm": 7.164182186126709, "learning_rate": 8.148812375932645e-05, "loss": 0.9223, "step": 14273 }, { "epoch": 0.9671386950335389, "grad_norm": 5.000169277191162, "learning_rate": 8.148675474022863e-05, "loss": 0.723, "step": 14274 }, { "epoch": 0.967206450301511, "grad_norm": 6.657342433929443, "learning_rate": 8.148538572113081e-05, "loss": 0.6303, "step": 14275 }, { "epoch": 0.9672742055694831, "grad_norm": 6.077153205871582, "learning_rate": 8.1484016702033e-05, "loss": 0.8003, "step": 14276 }, { "epoch": 0.9673419608374552, "grad_norm": 6.259696960449219, "learning_rate": 8.148264768293517e-05, "loss": 0.8303, "step": 14277 }, { "epoch": 0.9674097161054271, "grad_norm": 5.3037190437316895, "learning_rate": 8.148127866383737e-05, "loss": 0.5536, "step": 14278 }, { "epoch": 0.9674774713733992, "grad_norm": 4.652920722961426, "learning_rate": 8.147990964473955e-05, "loss": 0.5946, "step": 14279 }, { "epoch": 0.9675452266413713, "grad_norm": 8.112478256225586, "learning_rate": 8.147854062564173e-05, "loss": 0.7329, "step": 14280 }, { "epoch": 0.9676129819093434, "grad_norm": 6.562613487243652, "learning_rate": 8.147717160654391e-05, "loss": 0.81, "step": 14281 }, { "epoch": 0.9676807371773155, "grad_norm": 4.556642532348633, "learning_rate": 8.14758025874461e-05, "loss": 0.925, "step": 14282 }, { "epoch": 0.9677484924452876, "grad_norm": 8.244071006774902, "learning_rate": 8.147443356834828e-05, "loss": 1.117, "step": 14283 }, { "epoch": 0.9678162477132597, "grad_norm": 7.206400394439697, "learning_rate": 8.147306454925046e-05, "loss": 0.5954, "step": 14284 }, { "epoch": 0.9678840029812318, "grad_norm": 6.174105644226074, "learning_rate": 8.147169553015264e-05, "loss": 0.8997, "step": 14285 }, { "epoch": 0.9679517582492039, "grad_norm": 6.076737880706787, "learning_rate": 8.147032651105482e-05, "loss": 0.7188, "step": 14286 }, { "epoch": 0.968019513517176, "grad_norm": 6.2391462326049805, "learning_rate": 8.146895749195702e-05, "loss": 0.8216, "step": 14287 }, { "epoch": 0.9680872687851481, "grad_norm": 6.028003215789795, "learning_rate": 8.14675884728592e-05, "loss": 0.8959, "step": 14288 }, { "epoch": 0.9681550240531202, "grad_norm": 7.618561744689941, "learning_rate": 8.146621945376138e-05, "loss": 0.7853, "step": 14289 }, { "epoch": 0.9682227793210922, "grad_norm": 5.370663642883301, "learning_rate": 8.146485043466356e-05, "loss": 0.7142, "step": 14290 }, { "epoch": 0.9682905345890643, "grad_norm": 5.558692455291748, "learning_rate": 8.146348141556574e-05, "loss": 0.7017, "step": 14291 }, { "epoch": 0.9683582898570364, "grad_norm": 4.1195902824401855, "learning_rate": 8.146211239646793e-05, "loss": 0.6091, "step": 14292 }, { "epoch": 0.9684260451250085, "grad_norm": 4.796550750732422, "learning_rate": 8.146074337737011e-05, "loss": 0.6297, "step": 14293 }, { "epoch": 0.9684938003929806, "grad_norm": 5.585738658905029, "learning_rate": 8.14593743582723e-05, "loss": 0.7508, "step": 14294 }, { "epoch": 0.9685615556609526, "grad_norm": 7.204619407653809, "learning_rate": 8.145800533917449e-05, "loss": 0.8782, "step": 14295 }, { "epoch": 0.9686293109289247, "grad_norm": 6.494380474090576, "learning_rate": 8.145663632007667e-05, "loss": 0.8831, "step": 14296 }, { "epoch": 0.9686970661968968, "grad_norm": 4.968986511230469, "learning_rate": 8.145526730097885e-05, "loss": 0.6463, "step": 14297 }, { "epoch": 0.9687648214648689, "grad_norm": 4.912354946136475, "learning_rate": 8.145389828188104e-05, "loss": 0.8504, "step": 14298 }, { "epoch": 0.968832576732841, "grad_norm": 8.093084335327148, "learning_rate": 8.145252926278322e-05, "loss": 0.7451, "step": 14299 }, { "epoch": 0.968900332000813, "grad_norm": 4.853938579559326, "learning_rate": 8.14511602436854e-05, "loss": 0.7233, "step": 14300 }, { "epoch": 0.9689680872687851, "grad_norm": 5.394782066345215, "learning_rate": 8.14497912245876e-05, "loss": 0.9122, "step": 14301 }, { "epoch": 0.9690358425367572, "grad_norm": 5.5748820304870605, "learning_rate": 8.144842220548978e-05, "loss": 0.9087, "step": 14302 }, { "epoch": 0.9691035978047293, "grad_norm": 5.095332145690918, "learning_rate": 8.144705318639196e-05, "loss": 0.8094, "step": 14303 }, { "epoch": 0.9691713530727014, "grad_norm": 6.252867221832275, "learning_rate": 8.144568416729414e-05, "loss": 0.8471, "step": 14304 }, { "epoch": 0.9692391083406735, "grad_norm": 5.3329949378967285, "learning_rate": 8.144431514819633e-05, "loss": 0.7303, "step": 14305 }, { "epoch": 0.9693068636086456, "grad_norm": 6.976050853729248, "learning_rate": 8.144294612909851e-05, "loss": 0.7868, "step": 14306 }, { "epoch": 0.9693746188766177, "grad_norm": 6.1792073249816895, "learning_rate": 8.144157711000069e-05, "loss": 0.9973, "step": 14307 }, { "epoch": 0.9694423741445898, "grad_norm": 4.349170684814453, "learning_rate": 8.144020809090287e-05, "loss": 0.7458, "step": 14308 }, { "epoch": 0.9695101294125619, "grad_norm": 7.618136405944824, "learning_rate": 8.143883907180505e-05, "loss": 0.6679, "step": 14309 }, { "epoch": 0.969577884680534, "grad_norm": 5.811389923095703, "learning_rate": 8.143747005270725e-05, "loss": 0.6621, "step": 14310 }, { "epoch": 0.9696456399485059, "grad_norm": 7.1004252433776855, "learning_rate": 8.143610103360943e-05, "loss": 0.9404, "step": 14311 }, { "epoch": 0.969713395216478, "grad_norm": 6.3730998039245605, "learning_rate": 8.143473201451161e-05, "loss": 0.9286, "step": 14312 }, { "epoch": 0.9697811504844501, "grad_norm": 5.840987205505371, "learning_rate": 8.143336299541379e-05, "loss": 0.6846, "step": 14313 }, { "epoch": 0.9698489057524222, "grad_norm": 6.054294109344482, "learning_rate": 8.143199397631598e-05, "loss": 0.836, "step": 14314 }, { "epoch": 0.9699166610203943, "grad_norm": 5.046802043914795, "learning_rate": 8.143062495721816e-05, "loss": 0.7202, "step": 14315 }, { "epoch": 0.9699844162883664, "grad_norm": 4.955052375793457, "learning_rate": 8.142925593812034e-05, "loss": 0.7018, "step": 14316 }, { "epoch": 0.9700521715563385, "grad_norm": 5.141872882843018, "learning_rate": 8.142788691902252e-05, "loss": 0.817, "step": 14317 }, { "epoch": 0.9701199268243106, "grad_norm": 9.178304672241211, "learning_rate": 8.14265178999247e-05, "loss": 0.6665, "step": 14318 }, { "epoch": 0.9701876820922827, "grad_norm": 9.323168754577637, "learning_rate": 8.14251488808269e-05, "loss": 0.997, "step": 14319 }, { "epoch": 0.9702554373602548, "grad_norm": 5.158806800842285, "learning_rate": 8.142377986172908e-05, "loss": 0.619, "step": 14320 }, { "epoch": 0.9703231926282269, "grad_norm": 4.141146183013916, "learning_rate": 8.142241084263126e-05, "loss": 0.6202, "step": 14321 }, { "epoch": 0.970390947896199, "grad_norm": 6.7444634437561035, "learning_rate": 8.142104182353344e-05, "loss": 0.9239, "step": 14322 }, { "epoch": 0.970458703164171, "grad_norm": 6.318787574768066, "learning_rate": 8.141967280443562e-05, "loss": 0.6277, "step": 14323 }, { "epoch": 0.9705264584321431, "grad_norm": 7.356907844543457, "learning_rate": 8.141830378533781e-05, "loss": 0.8639, "step": 14324 }, { "epoch": 0.9705942137001152, "grad_norm": 6.62352180480957, "learning_rate": 8.141693476623999e-05, "loss": 0.8497, "step": 14325 }, { "epoch": 0.9706619689680873, "grad_norm": 7.7815093994140625, "learning_rate": 8.141556574714217e-05, "loss": 0.788, "step": 14326 }, { "epoch": 0.9707297242360593, "grad_norm": 5.715222358703613, "learning_rate": 8.141419672804435e-05, "loss": 0.6403, "step": 14327 }, { "epoch": 0.9707974795040314, "grad_norm": 6.145988464355469, "learning_rate": 8.141282770894655e-05, "loss": 0.4776, "step": 14328 }, { "epoch": 0.9708652347720035, "grad_norm": 5.906881809234619, "learning_rate": 8.141145868984873e-05, "loss": 0.6636, "step": 14329 }, { "epoch": 0.9709329900399756, "grad_norm": 4.619365215301514, "learning_rate": 8.141008967075091e-05, "loss": 0.678, "step": 14330 }, { "epoch": 0.9710007453079477, "grad_norm": 6.293912410736084, "learning_rate": 8.140872065165309e-05, "loss": 0.6734, "step": 14331 }, { "epoch": 0.9710685005759198, "grad_norm": 8.143105506896973, "learning_rate": 8.140735163255527e-05, "loss": 0.708, "step": 14332 }, { "epoch": 0.9711362558438918, "grad_norm": 5.470721244812012, "learning_rate": 8.140598261345746e-05, "loss": 0.9017, "step": 14333 }, { "epoch": 0.9712040111118639, "grad_norm": 6.059875011444092, "learning_rate": 8.140461359435964e-05, "loss": 0.8164, "step": 14334 }, { "epoch": 0.971271766379836, "grad_norm": 6.589235782623291, "learning_rate": 8.140324457526182e-05, "loss": 0.9063, "step": 14335 }, { "epoch": 0.9713395216478081, "grad_norm": 5.400428771972656, "learning_rate": 8.1401875556164e-05, "loss": 0.5756, "step": 14336 }, { "epoch": 0.9714072769157802, "grad_norm": 4.511440277099609, "learning_rate": 8.14005065370662e-05, "loss": 0.6725, "step": 14337 }, { "epoch": 0.9714750321837523, "grad_norm": 8.179146766662598, "learning_rate": 8.139913751796838e-05, "loss": 0.8878, "step": 14338 }, { "epoch": 0.9715427874517244, "grad_norm": 9.839139938354492, "learning_rate": 8.139776849887056e-05, "loss": 0.9057, "step": 14339 }, { "epoch": 0.9716105427196965, "grad_norm": 4.975765705108643, "learning_rate": 8.139639947977274e-05, "loss": 0.694, "step": 14340 }, { "epoch": 0.9716782979876686, "grad_norm": 4.968737602233887, "learning_rate": 8.139503046067492e-05, "loss": 0.5297, "step": 14341 }, { "epoch": 0.9717460532556407, "grad_norm": 7.143984317779541, "learning_rate": 8.139366144157711e-05, "loss": 0.8471, "step": 14342 }, { "epoch": 0.9718138085236128, "grad_norm": 6.1333818435668945, "learning_rate": 8.13922924224793e-05, "loss": 0.8179, "step": 14343 }, { "epoch": 0.9718815637915847, "grad_norm": 5.501560688018799, "learning_rate": 8.139092340338147e-05, "loss": 0.6908, "step": 14344 }, { "epoch": 0.9719493190595568, "grad_norm": 5.844865322113037, "learning_rate": 8.138955438428367e-05, "loss": 0.7405, "step": 14345 }, { "epoch": 0.9720170743275289, "grad_norm": 8.380400657653809, "learning_rate": 8.138818536518585e-05, "loss": 0.9078, "step": 14346 }, { "epoch": 0.972084829595501, "grad_norm": 5.638479232788086, "learning_rate": 8.138681634608803e-05, "loss": 0.7455, "step": 14347 }, { "epoch": 0.9721525848634731, "grad_norm": 8.631559371948242, "learning_rate": 8.138544732699022e-05, "loss": 0.7743, "step": 14348 }, { "epoch": 0.9722203401314452, "grad_norm": 4.673583507537842, "learning_rate": 8.13840783078924e-05, "loss": 0.5662, "step": 14349 }, { "epoch": 0.9722880953994173, "grad_norm": 4.844860553741455, "learning_rate": 8.138270928879458e-05, "loss": 0.6779, "step": 14350 }, { "epoch": 0.9723558506673894, "grad_norm": 5.8541154861450195, "learning_rate": 8.138134026969678e-05, "loss": 0.7505, "step": 14351 }, { "epoch": 0.9724236059353615, "grad_norm": 4.8828654289245605, "learning_rate": 8.137997125059896e-05, "loss": 0.7733, "step": 14352 }, { "epoch": 0.9724913612033336, "grad_norm": 5.3108086585998535, "learning_rate": 8.137860223150114e-05, "loss": 0.7705, "step": 14353 }, { "epoch": 0.9725591164713057, "grad_norm": 6.572815418243408, "learning_rate": 8.137723321240332e-05, "loss": 1.0102, "step": 14354 }, { "epoch": 0.9726268717392778, "grad_norm": 6.047853946685791, "learning_rate": 8.13758641933055e-05, "loss": 0.9682, "step": 14355 }, { "epoch": 0.9726946270072498, "grad_norm": 5.171698093414307, "learning_rate": 8.137449517420769e-05, "loss": 0.8997, "step": 14356 }, { "epoch": 0.9727623822752219, "grad_norm": 5.896270275115967, "learning_rate": 8.137312615510987e-05, "loss": 0.9272, "step": 14357 }, { "epoch": 0.972830137543194, "grad_norm": 6.361771583557129, "learning_rate": 8.137175713601205e-05, "loss": 0.8364, "step": 14358 }, { "epoch": 0.9728978928111661, "grad_norm": 6.05178165435791, "learning_rate": 8.137038811691423e-05, "loss": 0.9812, "step": 14359 }, { "epoch": 0.9729656480791381, "grad_norm": 5.797706127166748, "learning_rate": 8.136901909781643e-05, "loss": 0.8025, "step": 14360 }, { "epoch": 0.9730334033471102, "grad_norm": 5.319764614105225, "learning_rate": 8.136765007871861e-05, "loss": 0.708, "step": 14361 }, { "epoch": 0.9731011586150823, "grad_norm": 6.920981407165527, "learning_rate": 8.136628105962079e-05, "loss": 0.6947, "step": 14362 }, { "epoch": 0.9731689138830544, "grad_norm": 8.11839771270752, "learning_rate": 8.136491204052297e-05, "loss": 0.8238, "step": 14363 }, { "epoch": 0.9732366691510265, "grad_norm": 5.530993938446045, "learning_rate": 8.136354302142515e-05, "loss": 0.5501, "step": 14364 }, { "epoch": 0.9733044244189986, "grad_norm": 5.251955986022949, "learning_rate": 8.136217400232734e-05, "loss": 0.706, "step": 14365 }, { "epoch": 0.9733721796869707, "grad_norm": 6.496428966522217, "learning_rate": 8.136080498322952e-05, "loss": 0.8281, "step": 14366 }, { "epoch": 0.9734399349549427, "grad_norm": 4.871181488037109, "learning_rate": 8.13594359641317e-05, "loss": 0.7147, "step": 14367 }, { "epoch": 0.9735076902229148, "grad_norm": 5.397392272949219, "learning_rate": 8.135806694503388e-05, "loss": 0.733, "step": 14368 }, { "epoch": 0.9735754454908869, "grad_norm": 5.09192419052124, "learning_rate": 8.135669792593608e-05, "loss": 0.8182, "step": 14369 }, { "epoch": 0.973643200758859, "grad_norm": 7.045880317687988, "learning_rate": 8.135532890683826e-05, "loss": 0.8304, "step": 14370 }, { "epoch": 0.9737109560268311, "grad_norm": 5.559905529022217, "learning_rate": 8.135395988774044e-05, "loss": 0.7637, "step": 14371 }, { "epoch": 0.9737787112948032, "grad_norm": 4.846694469451904, "learning_rate": 8.135259086864262e-05, "loss": 0.7953, "step": 14372 }, { "epoch": 0.9738464665627753, "grad_norm": 4.448090076446533, "learning_rate": 8.13512218495448e-05, "loss": 0.7446, "step": 14373 }, { "epoch": 0.9739142218307474, "grad_norm": 5.447312355041504, "learning_rate": 8.134985283044699e-05, "loss": 0.8034, "step": 14374 }, { "epoch": 0.9739819770987195, "grad_norm": 6.372121334075928, "learning_rate": 8.134848381134917e-05, "loss": 0.7034, "step": 14375 }, { "epoch": 0.9740497323666915, "grad_norm": 6.085412979125977, "learning_rate": 8.134711479225135e-05, "loss": 0.7508, "step": 14376 }, { "epoch": 0.9741174876346635, "grad_norm": 5.742150783538818, "learning_rate": 8.134574577315353e-05, "loss": 0.8239, "step": 14377 }, { "epoch": 0.9741852429026356, "grad_norm": 5.154967308044434, "learning_rate": 8.134437675405571e-05, "loss": 0.6667, "step": 14378 }, { "epoch": 0.9742529981706077, "grad_norm": 6.060741901397705, "learning_rate": 8.134300773495791e-05, "loss": 0.803, "step": 14379 }, { "epoch": 0.9743207534385798, "grad_norm": 6.090625286102295, "learning_rate": 8.134163871586009e-05, "loss": 0.8115, "step": 14380 }, { "epoch": 0.9743885087065519, "grad_norm": 6.097843170166016, "learning_rate": 8.134026969676227e-05, "loss": 0.7386, "step": 14381 }, { "epoch": 0.974456263974524, "grad_norm": 6.539214611053467, "learning_rate": 8.133890067766445e-05, "loss": 0.6162, "step": 14382 }, { "epoch": 0.9745240192424961, "grad_norm": 5.923181056976318, "learning_rate": 8.133753165856664e-05, "loss": 1.0001, "step": 14383 }, { "epoch": 0.9745917745104682, "grad_norm": 6.29371976852417, "learning_rate": 8.133616263946882e-05, "loss": 0.7975, "step": 14384 }, { "epoch": 0.9746595297784403, "grad_norm": 6.171021461486816, "learning_rate": 8.1334793620371e-05, "loss": 0.8991, "step": 14385 }, { "epoch": 0.9747272850464124, "grad_norm": 5.814321041107178, "learning_rate": 8.133342460127318e-05, "loss": 0.525, "step": 14386 }, { "epoch": 0.9747950403143845, "grad_norm": 5.774600028991699, "learning_rate": 8.133205558217536e-05, "loss": 0.7866, "step": 14387 }, { "epoch": 0.9748627955823566, "grad_norm": 5.909730434417725, "learning_rate": 8.133068656307756e-05, "loss": 0.6867, "step": 14388 }, { "epoch": 0.9749305508503286, "grad_norm": 6.84520149230957, "learning_rate": 8.132931754397974e-05, "loss": 0.7958, "step": 14389 }, { "epoch": 0.9749983061183007, "grad_norm": 6.0649518966674805, "learning_rate": 8.132794852488192e-05, "loss": 0.8553, "step": 14390 }, { "epoch": 0.9750660613862728, "grad_norm": 6.470561504364014, "learning_rate": 8.132657950578411e-05, "loss": 0.7563, "step": 14391 }, { "epoch": 0.9751338166542449, "grad_norm": 5.470592021942139, "learning_rate": 8.132521048668629e-05, "loss": 0.7057, "step": 14392 }, { "epoch": 0.9752015719222169, "grad_norm": 5.857933044433594, "learning_rate": 8.132384146758847e-05, "loss": 0.6859, "step": 14393 }, { "epoch": 0.975269327190189, "grad_norm": 6.267986297607422, "learning_rate": 8.132247244849067e-05, "loss": 0.6744, "step": 14394 }, { "epoch": 0.9753370824581611, "grad_norm": 6.363813400268555, "learning_rate": 8.132110342939285e-05, "loss": 0.7194, "step": 14395 }, { "epoch": 0.9754048377261332, "grad_norm": 5.119122505187988, "learning_rate": 8.131973441029503e-05, "loss": 0.7292, "step": 14396 }, { "epoch": 0.9754725929941053, "grad_norm": 5.589879035949707, "learning_rate": 8.131836539119722e-05, "loss": 0.786, "step": 14397 }, { "epoch": 0.9755403482620774, "grad_norm": 6.809702396392822, "learning_rate": 8.13169963720994e-05, "loss": 0.7465, "step": 14398 }, { "epoch": 0.9756081035300495, "grad_norm": 5.51494026184082, "learning_rate": 8.131562735300158e-05, "loss": 0.7014, "step": 14399 }, { "epoch": 0.9756758587980215, "grad_norm": 5.632194519042969, "learning_rate": 8.131425833390376e-05, "loss": 0.7217, "step": 14400 }, { "epoch": 0.9757436140659936, "grad_norm": 4.676552772521973, "learning_rate": 8.131288931480594e-05, "loss": 0.6789, "step": 14401 }, { "epoch": 0.9758113693339657, "grad_norm": 5.2599945068359375, "learning_rate": 8.131152029570814e-05, "loss": 0.6691, "step": 14402 }, { "epoch": 0.9758791246019378, "grad_norm": 5.05983829498291, "learning_rate": 8.131015127661032e-05, "loss": 0.6848, "step": 14403 }, { "epoch": 0.9759468798699099, "grad_norm": 5.274669170379639, "learning_rate": 8.13087822575125e-05, "loss": 0.7596, "step": 14404 }, { "epoch": 0.976014635137882, "grad_norm": 6.312376976013184, "learning_rate": 8.130741323841468e-05, "loss": 0.6008, "step": 14405 }, { "epoch": 0.9760823904058541, "grad_norm": 5.092833995819092, "learning_rate": 8.130604421931687e-05, "loss": 0.7497, "step": 14406 }, { "epoch": 0.9761501456738262, "grad_norm": 4.330989837646484, "learning_rate": 8.130467520021905e-05, "loss": 0.6639, "step": 14407 }, { "epoch": 0.9762179009417983, "grad_norm": 6.680692195892334, "learning_rate": 8.130330618112123e-05, "loss": 0.9758, "step": 14408 }, { "epoch": 0.9762856562097703, "grad_norm": 4.872468948364258, "learning_rate": 8.130193716202341e-05, "loss": 0.5678, "step": 14409 }, { "epoch": 0.9763534114777424, "grad_norm": 5.9608659744262695, "learning_rate": 8.130056814292559e-05, "loss": 0.766, "step": 14410 }, { "epoch": 0.9764211667457144, "grad_norm": 8.0074462890625, "learning_rate": 8.129919912382779e-05, "loss": 0.808, "step": 14411 }, { "epoch": 0.9764889220136865, "grad_norm": 6.391083240509033, "learning_rate": 8.129783010472997e-05, "loss": 0.9077, "step": 14412 }, { "epoch": 0.9765566772816586, "grad_norm": 9.006216049194336, "learning_rate": 8.129646108563215e-05, "loss": 0.8002, "step": 14413 }, { "epoch": 0.9766244325496307, "grad_norm": 5.454526901245117, "learning_rate": 8.129509206653433e-05, "loss": 0.662, "step": 14414 }, { "epoch": 0.9766921878176028, "grad_norm": 5.372074604034424, "learning_rate": 8.129372304743652e-05, "loss": 0.7083, "step": 14415 }, { "epoch": 0.9767599430855749, "grad_norm": 5.040616035461426, "learning_rate": 8.12923540283387e-05, "loss": 0.7178, "step": 14416 }, { "epoch": 0.976827698353547, "grad_norm": 6.438399314880371, "learning_rate": 8.129098500924088e-05, "loss": 0.9576, "step": 14417 }, { "epoch": 0.9768954536215191, "grad_norm": 5.687475204467773, "learning_rate": 8.128961599014306e-05, "loss": 0.8086, "step": 14418 }, { "epoch": 0.9769632088894912, "grad_norm": 5.61614990234375, "learning_rate": 8.128824697104524e-05, "loss": 0.7917, "step": 14419 }, { "epoch": 0.9770309641574633, "grad_norm": 6.389354228973389, "learning_rate": 8.128687795194744e-05, "loss": 0.7769, "step": 14420 }, { "epoch": 0.9770987194254354, "grad_norm": 7.1694231033325195, "learning_rate": 8.128550893284962e-05, "loss": 0.6114, "step": 14421 }, { "epoch": 0.9771664746934074, "grad_norm": 6.265122890472412, "learning_rate": 8.12841399137518e-05, "loss": 0.6074, "step": 14422 }, { "epoch": 0.9772342299613795, "grad_norm": 9.472161293029785, "learning_rate": 8.128277089465398e-05, "loss": 0.5208, "step": 14423 }, { "epoch": 0.9773019852293516, "grad_norm": 4.76262903213501, "learning_rate": 8.128140187555616e-05, "loss": 0.7851, "step": 14424 }, { "epoch": 0.9773697404973236, "grad_norm": 4.94804048538208, "learning_rate": 8.128003285645835e-05, "loss": 0.7913, "step": 14425 }, { "epoch": 0.9774374957652957, "grad_norm": 5.631475448608398, "learning_rate": 8.127866383736053e-05, "loss": 0.7001, "step": 14426 }, { "epoch": 0.9775052510332678, "grad_norm": 6.504068851470947, "learning_rate": 8.127729481826271e-05, "loss": 0.6949, "step": 14427 }, { "epoch": 0.9775730063012399, "grad_norm": 4.534459114074707, "learning_rate": 8.12759257991649e-05, "loss": 0.6107, "step": 14428 }, { "epoch": 0.977640761569212, "grad_norm": 5.583250045776367, "learning_rate": 8.127455678006709e-05, "loss": 0.6784, "step": 14429 }, { "epoch": 0.9777085168371841, "grad_norm": 5.706220626831055, "learning_rate": 8.127318776096927e-05, "loss": 0.7571, "step": 14430 }, { "epoch": 0.9777762721051562, "grad_norm": 5.701255798339844, "learning_rate": 8.127181874187145e-05, "loss": 0.7379, "step": 14431 }, { "epoch": 0.9778440273731283, "grad_norm": 5.034109592437744, "learning_rate": 8.127044972277363e-05, "loss": 0.6254, "step": 14432 }, { "epoch": 0.9779117826411003, "grad_norm": 7.113419532775879, "learning_rate": 8.126908070367581e-05, "loss": 0.7667, "step": 14433 }, { "epoch": 0.9779795379090724, "grad_norm": 5.958342552185059, "learning_rate": 8.1267711684578e-05, "loss": 0.8242, "step": 14434 }, { "epoch": 0.9780472931770445, "grad_norm": 5.358188629150391, "learning_rate": 8.126634266548018e-05, "loss": 0.8029, "step": 14435 }, { "epoch": 0.9781150484450166, "grad_norm": 5.703382968902588, "learning_rate": 8.126497364638236e-05, "loss": 0.64, "step": 14436 }, { "epoch": 0.9781828037129887, "grad_norm": 6.205333709716797, "learning_rate": 8.126360462728456e-05, "loss": 0.7054, "step": 14437 }, { "epoch": 0.9782505589809608, "grad_norm": 6.106006145477295, "learning_rate": 8.126223560818674e-05, "loss": 0.7458, "step": 14438 }, { "epoch": 0.9783183142489329, "grad_norm": 7.8420305252075195, "learning_rate": 8.126086658908892e-05, "loss": 0.9453, "step": 14439 }, { "epoch": 0.978386069516905, "grad_norm": 5.559987545013428, "learning_rate": 8.125949756999111e-05, "loss": 0.5765, "step": 14440 }, { "epoch": 0.978453824784877, "grad_norm": 6.2737040519714355, "learning_rate": 8.125812855089329e-05, "loss": 0.6614, "step": 14441 }, { "epoch": 0.9785215800528491, "grad_norm": 6.900593280792236, "learning_rate": 8.125675953179547e-05, "loss": 0.8438, "step": 14442 }, { "epoch": 0.9785893353208212, "grad_norm": 5.86058235168457, "learning_rate": 8.125539051269767e-05, "loss": 0.7445, "step": 14443 }, { "epoch": 0.9786570905887932, "grad_norm": 4.0302348136901855, "learning_rate": 8.125402149359985e-05, "loss": 0.767, "step": 14444 }, { "epoch": 0.9787248458567653, "grad_norm": 5.090617656707764, "learning_rate": 8.125265247450203e-05, "loss": 0.6529, "step": 14445 }, { "epoch": 0.9787926011247374, "grad_norm": 5.470541477203369, "learning_rate": 8.125128345540421e-05, "loss": 0.7689, "step": 14446 }, { "epoch": 0.9788603563927095, "grad_norm": 5.9749627113342285, "learning_rate": 8.12499144363064e-05, "loss": 1.0099, "step": 14447 }, { "epoch": 0.9789281116606816, "grad_norm": 5.537027359008789, "learning_rate": 8.124854541720858e-05, "loss": 0.6033, "step": 14448 }, { "epoch": 0.9789958669286537, "grad_norm": 4.773642063140869, "learning_rate": 8.124717639811076e-05, "loss": 0.6899, "step": 14449 }, { "epoch": 0.9790636221966258, "grad_norm": 6.153696537017822, "learning_rate": 8.124580737901294e-05, "loss": 0.9249, "step": 14450 }, { "epoch": 0.9791313774645979, "grad_norm": 5.920269966125488, "learning_rate": 8.124443835991512e-05, "loss": 0.7868, "step": 14451 }, { "epoch": 0.97919913273257, "grad_norm": 5.916412830352783, "learning_rate": 8.124306934081732e-05, "loss": 0.6885, "step": 14452 }, { "epoch": 0.9792668880005421, "grad_norm": 7.684385299682617, "learning_rate": 8.12417003217195e-05, "loss": 0.8591, "step": 14453 }, { "epoch": 0.9793346432685142, "grad_norm": 4.769680500030518, "learning_rate": 8.124033130262168e-05, "loss": 0.59, "step": 14454 }, { "epoch": 0.9794023985364863, "grad_norm": 6.372145175933838, "learning_rate": 8.123896228352386e-05, "loss": 0.9633, "step": 14455 }, { "epoch": 0.9794701538044583, "grad_norm": 4.667510509490967, "learning_rate": 8.123759326442604e-05, "loss": 0.6554, "step": 14456 }, { "epoch": 0.9795379090724304, "grad_norm": 6.719541549682617, "learning_rate": 8.123622424532823e-05, "loss": 0.6992, "step": 14457 }, { "epoch": 0.9796056643404024, "grad_norm": 5.012288570404053, "learning_rate": 8.123485522623041e-05, "loss": 0.6368, "step": 14458 }, { "epoch": 0.9796734196083745, "grad_norm": 5.666974067687988, "learning_rate": 8.123348620713259e-05, "loss": 0.737, "step": 14459 }, { "epoch": 0.9797411748763466, "grad_norm": 6.177326679229736, "learning_rate": 8.123211718803477e-05, "loss": 0.8604, "step": 14460 }, { "epoch": 0.9798089301443187, "grad_norm": 6.877957344055176, "learning_rate": 8.123074816893697e-05, "loss": 0.7917, "step": 14461 }, { "epoch": 0.9798766854122908, "grad_norm": 7.843241214752197, "learning_rate": 8.122937914983915e-05, "loss": 0.68, "step": 14462 }, { "epoch": 0.9799444406802629, "grad_norm": 5.025186061859131, "learning_rate": 8.122801013074133e-05, "loss": 0.7137, "step": 14463 }, { "epoch": 0.980012195948235, "grad_norm": 5.27938175201416, "learning_rate": 8.122664111164351e-05, "loss": 0.7029, "step": 14464 }, { "epoch": 0.9800799512162071, "grad_norm": 6.0738301277160645, "learning_rate": 8.122527209254569e-05, "loss": 0.7922, "step": 14465 }, { "epoch": 0.9801477064841791, "grad_norm": 7.217299461364746, "learning_rate": 8.122390307344788e-05, "loss": 0.6941, "step": 14466 }, { "epoch": 0.9802154617521512, "grad_norm": 4.96075439453125, "learning_rate": 8.122253405435006e-05, "loss": 0.6182, "step": 14467 }, { "epoch": 0.9802832170201233, "grad_norm": 5.550745964050293, "learning_rate": 8.122116503525224e-05, "loss": 0.7081, "step": 14468 }, { "epoch": 0.9803509722880954, "grad_norm": 6.0965752601623535, "learning_rate": 8.121979601615442e-05, "loss": 0.6694, "step": 14469 }, { "epoch": 0.9804187275560675, "grad_norm": 6.101776123046875, "learning_rate": 8.121842699705662e-05, "loss": 0.7803, "step": 14470 }, { "epoch": 0.9804864828240396, "grad_norm": 5.098122596740723, "learning_rate": 8.12170579779588e-05, "loss": 0.8434, "step": 14471 }, { "epoch": 0.9805542380920117, "grad_norm": 4.281574726104736, "learning_rate": 8.121568895886098e-05, "loss": 0.7203, "step": 14472 }, { "epoch": 0.9806219933599838, "grad_norm": 7.562485694885254, "learning_rate": 8.121431993976316e-05, "loss": 0.8041, "step": 14473 }, { "epoch": 0.9806897486279558, "grad_norm": 8.38601303100586, "learning_rate": 8.121295092066534e-05, "loss": 0.6553, "step": 14474 }, { "epoch": 0.9807575038959279, "grad_norm": 7.885288715362549, "learning_rate": 8.121158190156753e-05, "loss": 0.9021, "step": 14475 }, { "epoch": 0.9808252591639, "grad_norm": 5.9442877769470215, "learning_rate": 8.121021288246971e-05, "loss": 0.7343, "step": 14476 }, { "epoch": 0.980893014431872, "grad_norm": 7.727574348449707, "learning_rate": 8.120884386337189e-05, "loss": 0.9092, "step": 14477 }, { "epoch": 0.9809607696998441, "grad_norm": 7.154834270477295, "learning_rate": 8.120747484427407e-05, "loss": 0.824, "step": 14478 }, { "epoch": 0.9810285249678162, "grad_norm": 5.356253623962402, "learning_rate": 8.120610582517625e-05, "loss": 0.7594, "step": 14479 }, { "epoch": 0.9810962802357883, "grad_norm": 6.602542400360107, "learning_rate": 8.120473680607845e-05, "loss": 0.7641, "step": 14480 }, { "epoch": 0.9811640355037604, "grad_norm": 5.2316060066223145, "learning_rate": 8.120336778698063e-05, "loss": 0.7131, "step": 14481 }, { "epoch": 0.9812317907717325, "grad_norm": 5.679458141326904, "learning_rate": 8.120199876788281e-05, "loss": 0.7762, "step": 14482 }, { "epoch": 0.9812995460397046, "grad_norm": 6.47125244140625, "learning_rate": 8.1200629748785e-05, "loss": 0.7332, "step": 14483 }, { "epoch": 0.9813673013076767, "grad_norm": 4.7544169425964355, "learning_rate": 8.119926072968718e-05, "loss": 0.7308, "step": 14484 }, { "epoch": 0.9814350565756488, "grad_norm": 8.939432144165039, "learning_rate": 8.119789171058936e-05, "loss": 0.9312, "step": 14485 }, { "epoch": 0.9815028118436209, "grad_norm": 6.649098873138428, "learning_rate": 8.119652269149156e-05, "loss": 0.7315, "step": 14486 }, { "epoch": 0.981570567111593, "grad_norm": 6.609123706817627, "learning_rate": 8.119515367239374e-05, "loss": 0.7891, "step": 14487 }, { "epoch": 0.981638322379565, "grad_norm": 4.026298522949219, "learning_rate": 8.119378465329592e-05, "loss": 0.6256, "step": 14488 }, { "epoch": 0.9817060776475371, "grad_norm": 5.0410075187683105, "learning_rate": 8.119241563419811e-05, "loss": 0.5491, "step": 14489 }, { "epoch": 0.9817738329155091, "grad_norm": 6.390183448791504, "learning_rate": 8.119104661510029e-05, "loss": 0.6793, "step": 14490 }, { "epoch": 0.9818415881834812, "grad_norm": 5.242403984069824, "learning_rate": 8.118967759600247e-05, "loss": 0.6271, "step": 14491 }, { "epoch": 0.9819093434514533, "grad_norm": 6.249171257019043, "learning_rate": 8.118830857690465e-05, "loss": 0.8151, "step": 14492 }, { "epoch": 0.9819770987194254, "grad_norm": 6.398001194000244, "learning_rate": 8.118693955780685e-05, "loss": 0.6908, "step": 14493 }, { "epoch": 0.9820448539873975, "grad_norm": 8.341864585876465, "learning_rate": 8.118557053870903e-05, "loss": 0.9941, "step": 14494 }, { "epoch": 0.9821126092553696, "grad_norm": 7.000919818878174, "learning_rate": 8.11842015196112e-05, "loss": 0.751, "step": 14495 }, { "epoch": 0.9821803645233417, "grad_norm": 6.331650257110596, "learning_rate": 8.118283250051339e-05, "loss": 0.6585, "step": 14496 }, { "epoch": 0.9822481197913138, "grad_norm": 6.7721848487854, "learning_rate": 8.118146348141557e-05, "loss": 0.8229, "step": 14497 }, { "epoch": 0.9823158750592859, "grad_norm": 6.479053497314453, "learning_rate": 8.118009446231776e-05, "loss": 0.8373, "step": 14498 }, { "epoch": 0.982383630327258, "grad_norm": 6.043035984039307, "learning_rate": 8.117872544321994e-05, "loss": 0.7682, "step": 14499 }, { "epoch": 0.98245138559523, "grad_norm": 6.7178778648376465, "learning_rate": 8.117735642412212e-05, "loss": 0.6542, "step": 14500 }, { "epoch": 0.9825191408632021, "grad_norm": 9.148183822631836, "learning_rate": 8.11759874050243e-05, "loss": 0.7574, "step": 14501 }, { "epoch": 0.9825868961311742, "grad_norm": 5.086976051330566, "learning_rate": 8.11746183859265e-05, "loss": 0.6415, "step": 14502 }, { "epoch": 0.9826546513991463, "grad_norm": 6.981233596801758, "learning_rate": 8.117324936682868e-05, "loss": 1.0351, "step": 14503 }, { "epoch": 0.9827224066671184, "grad_norm": 6.20076847076416, "learning_rate": 8.117188034773086e-05, "loss": 0.734, "step": 14504 }, { "epoch": 0.9827901619350905, "grad_norm": 6.142386436462402, "learning_rate": 8.117051132863304e-05, "loss": 0.9636, "step": 14505 }, { "epoch": 0.9828579172030626, "grad_norm": 5.595977306365967, "learning_rate": 8.116914230953522e-05, "loss": 0.8142, "step": 14506 }, { "epoch": 0.9829256724710346, "grad_norm": 5.602009296417236, "learning_rate": 8.116777329043741e-05, "loss": 0.8624, "step": 14507 }, { "epoch": 0.9829934277390067, "grad_norm": 5.37421989440918, "learning_rate": 8.116640427133959e-05, "loss": 0.9434, "step": 14508 }, { "epoch": 0.9830611830069788, "grad_norm": 6.307192325592041, "learning_rate": 8.116503525224177e-05, "loss": 0.7215, "step": 14509 }, { "epoch": 0.9831289382749508, "grad_norm": 5.990005970001221, "learning_rate": 8.116366623314395e-05, "loss": 0.6622, "step": 14510 }, { "epoch": 0.9831966935429229, "grad_norm": 6.682214736938477, "learning_rate": 8.116229721404613e-05, "loss": 0.8302, "step": 14511 }, { "epoch": 0.983264448810895, "grad_norm": 8.388869285583496, "learning_rate": 8.116092819494833e-05, "loss": 0.948, "step": 14512 }, { "epoch": 0.9833322040788671, "grad_norm": 6.352821350097656, "learning_rate": 8.115955917585051e-05, "loss": 0.8845, "step": 14513 }, { "epoch": 0.9833999593468392, "grad_norm": 5.503759384155273, "learning_rate": 8.115819015675269e-05, "loss": 0.6619, "step": 14514 }, { "epoch": 0.9834677146148113, "grad_norm": 8.024614334106445, "learning_rate": 8.115682113765487e-05, "loss": 0.6846, "step": 14515 }, { "epoch": 0.9835354698827834, "grad_norm": 5.9330902099609375, "learning_rate": 8.115545211855706e-05, "loss": 0.7577, "step": 14516 }, { "epoch": 0.9836032251507555, "grad_norm": 5.600277423858643, "learning_rate": 8.115408309945924e-05, "loss": 0.8694, "step": 14517 }, { "epoch": 0.9836709804187276, "grad_norm": 5.870060443878174, "learning_rate": 8.115271408036142e-05, "loss": 0.7465, "step": 14518 }, { "epoch": 0.9837387356866997, "grad_norm": 5.257311820983887, "learning_rate": 8.11513450612636e-05, "loss": 0.58, "step": 14519 }, { "epoch": 0.9838064909546718, "grad_norm": 5.327232837677002, "learning_rate": 8.114997604216578e-05, "loss": 0.823, "step": 14520 }, { "epoch": 0.9838742462226439, "grad_norm": 7.09609842300415, "learning_rate": 8.114860702306798e-05, "loss": 0.7926, "step": 14521 }, { "epoch": 0.983942001490616, "grad_norm": 5.685835838317871, "learning_rate": 8.114723800397016e-05, "loss": 0.8741, "step": 14522 }, { "epoch": 0.9840097567585879, "grad_norm": 5.486968517303467, "learning_rate": 8.114586898487234e-05, "loss": 0.8518, "step": 14523 }, { "epoch": 0.98407751202656, "grad_norm": 4.868188858032227, "learning_rate": 8.114449996577452e-05, "loss": 0.6013, "step": 14524 }, { "epoch": 0.9841452672945321, "grad_norm": 6.388209819793701, "learning_rate": 8.114313094667671e-05, "loss": 0.827, "step": 14525 }, { "epoch": 0.9842130225625042, "grad_norm": 6.438693523406982, "learning_rate": 8.114176192757889e-05, "loss": 0.9702, "step": 14526 }, { "epoch": 0.9842807778304763, "grad_norm": 4.985934734344482, "learning_rate": 8.114039290848107e-05, "loss": 0.6018, "step": 14527 }, { "epoch": 0.9843485330984484, "grad_norm": 6.320969581604004, "learning_rate": 8.113902388938325e-05, "loss": 0.7803, "step": 14528 }, { "epoch": 0.9844162883664205, "grad_norm": 7.519181728363037, "learning_rate": 8.113765487028545e-05, "loss": 0.7211, "step": 14529 }, { "epoch": 0.9844840436343926, "grad_norm": 5.232812404632568, "learning_rate": 8.113628585118763e-05, "loss": 1.0151, "step": 14530 }, { "epoch": 0.9845517989023647, "grad_norm": 6.038102149963379, "learning_rate": 8.113491683208981e-05, "loss": 0.8097, "step": 14531 }, { "epoch": 0.9846195541703368, "grad_norm": 6.541941165924072, "learning_rate": 8.1133547812992e-05, "loss": 0.704, "step": 14532 }, { "epoch": 0.9846873094383088, "grad_norm": 6.207378387451172, "learning_rate": 8.113217879389418e-05, "loss": 0.6292, "step": 14533 }, { "epoch": 0.9847550647062809, "grad_norm": 7.586195945739746, "learning_rate": 8.113080977479636e-05, "loss": 0.8351, "step": 14534 }, { "epoch": 0.984822819974253, "grad_norm": 5.649599075317383, "learning_rate": 8.112944075569856e-05, "loss": 0.7277, "step": 14535 }, { "epoch": 0.9848905752422251, "grad_norm": 6.21948766708374, "learning_rate": 8.112807173660074e-05, "loss": 0.8101, "step": 14536 }, { "epoch": 0.9849583305101972, "grad_norm": 5.665065765380859, "learning_rate": 8.112670271750292e-05, "loss": 0.9378, "step": 14537 }, { "epoch": 0.9850260857781693, "grad_norm": 6.819377899169922, "learning_rate": 8.11253336984051e-05, "loss": 0.8962, "step": 14538 }, { "epoch": 0.9850938410461413, "grad_norm": 5.853285789489746, "learning_rate": 8.112396467930729e-05, "loss": 0.7442, "step": 14539 }, { "epoch": 0.9851615963141134, "grad_norm": 6.921646595001221, "learning_rate": 8.112259566020947e-05, "loss": 0.8819, "step": 14540 }, { "epoch": 0.9852293515820855, "grad_norm": 5.253473281860352, "learning_rate": 8.112122664111165e-05, "loss": 0.8615, "step": 14541 }, { "epoch": 0.9852971068500576, "grad_norm": 6.091032981872559, "learning_rate": 8.111985762201383e-05, "loss": 0.9232, "step": 14542 }, { "epoch": 0.9853648621180296, "grad_norm": 4.946970462799072, "learning_rate": 8.111848860291601e-05, "loss": 0.5256, "step": 14543 }, { "epoch": 0.9854326173860017, "grad_norm": 7.805113792419434, "learning_rate": 8.11171195838182e-05, "loss": 0.7799, "step": 14544 }, { "epoch": 0.9855003726539738, "grad_norm": 5.948545932769775, "learning_rate": 8.111575056472039e-05, "loss": 0.8107, "step": 14545 }, { "epoch": 0.9855681279219459, "grad_norm": 4.266178131103516, "learning_rate": 8.111438154562257e-05, "loss": 0.6324, "step": 14546 }, { "epoch": 0.985635883189918, "grad_norm": 5.126527309417725, "learning_rate": 8.111301252652475e-05, "loss": 0.6852, "step": 14547 }, { "epoch": 0.9857036384578901, "grad_norm": 7.069212436676025, "learning_rate": 8.111164350742694e-05, "loss": 0.7833, "step": 14548 }, { "epoch": 0.9857713937258622, "grad_norm": 4.909544944763184, "learning_rate": 8.111027448832912e-05, "loss": 0.7001, "step": 14549 }, { "epoch": 0.9858391489938343, "grad_norm": 8.045904159545898, "learning_rate": 8.11089054692313e-05, "loss": 0.7565, "step": 14550 }, { "epoch": 0.9859069042618064, "grad_norm": 6.734328269958496, "learning_rate": 8.110753645013348e-05, "loss": 0.6459, "step": 14551 }, { "epoch": 0.9859746595297785, "grad_norm": 6.844288349151611, "learning_rate": 8.110616743103566e-05, "loss": 0.6501, "step": 14552 }, { "epoch": 0.9860424147977506, "grad_norm": 6.474472522735596, "learning_rate": 8.110479841193786e-05, "loss": 0.8652, "step": 14553 }, { "epoch": 0.9861101700657227, "grad_norm": 5.087035655975342, "learning_rate": 8.110342939284004e-05, "loss": 0.6611, "step": 14554 }, { "epoch": 0.9861779253336947, "grad_norm": 5.499162673950195, "learning_rate": 8.110206037374222e-05, "loss": 0.7477, "step": 14555 }, { "epoch": 0.9862456806016667, "grad_norm": 6.489079475402832, "learning_rate": 8.11006913546444e-05, "loss": 0.6523, "step": 14556 }, { "epoch": 0.9863134358696388, "grad_norm": 5.147678375244141, "learning_rate": 8.109932233554658e-05, "loss": 0.7646, "step": 14557 }, { "epoch": 0.9863811911376109, "grad_norm": 6.976253986358643, "learning_rate": 8.109795331644877e-05, "loss": 0.7634, "step": 14558 }, { "epoch": 0.986448946405583, "grad_norm": 6.211210250854492, "learning_rate": 8.109658429735095e-05, "loss": 0.788, "step": 14559 }, { "epoch": 0.9865167016735551, "grad_norm": 5.917699813842773, "learning_rate": 8.109521527825313e-05, "loss": 0.8491, "step": 14560 }, { "epoch": 0.9865844569415272, "grad_norm": 9.201217651367188, "learning_rate": 8.109384625915531e-05, "loss": 0.8377, "step": 14561 }, { "epoch": 0.9866522122094993, "grad_norm": 6.403718948364258, "learning_rate": 8.10924772400575e-05, "loss": 0.991, "step": 14562 }, { "epoch": 0.9867199674774714, "grad_norm": 4.908394813537598, "learning_rate": 8.109110822095969e-05, "loss": 0.7742, "step": 14563 }, { "epoch": 0.9867877227454435, "grad_norm": 5.759329795837402, "learning_rate": 8.108973920186187e-05, "loss": 0.682, "step": 14564 }, { "epoch": 0.9868554780134156, "grad_norm": 5.077083587646484, "learning_rate": 8.108837018276405e-05, "loss": 0.6575, "step": 14565 }, { "epoch": 0.9869232332813876, "grad_norm": 6.402769088745117, "learning_rate": 8.108700116366623e-05, "loss": 0.6313, "step": 14566 }, { "epoch": 0.9869909885493597, "grad_norm": 6.894180774688721, "learning_rate": 8.108563214456842e-05, "loss": 0.779, "step": 14567 }, { "epoch": 0.9870587438173318, "grad_norm": 7.2585062980651855, "learning_rate": 8.10842631254706e-05, "loss": 0.8281, "step": 14568 }, { "epoch": 0.9871264990853039, "grad_norm": 4.876138210296631, "learning_rate": 8.108289410637278e-05, "loss": 0.6501, "step": 14569 }, { "epoch": 0.987194254353276, "grad_norm": 6.457757949829102, "learning_rate": 8.108152508727496e-05, "loss": 0.6609, "step": 14570 }, { "epoch": 0.9872620096212481, "grad_norm": 5.902544021606445, "learning_rate": 8.108015606817716e-05, "loss": 0.8231, "step": 14571 }, { "epoch": 0.9873297648892201, "grad_norm": 7.340898513793945, "learning_rate": 8.107878704907934e-05, "loss": 0.9017, "step": 14572 }, { "epoch": 0.9873975201571922, "grad_norm": 4.826013088226318, "learning_rate": 8.107741802998152e-05, "loss": 0.7838, "step": 14573 }, { "epoch": 0.9874652754251643, "grad_norm": 6.566142559051514, "learning_rate": 8.10760490108837e-05, "loss": 0.8644, "step": 14574 }, { "epoch": 0.9875330306931364, "grad_norm": 6.540748119354248, "learning_rate": 8.107467999178589e-05, "loss": 0.7461, "step": 14575 }, { "epoch": 0.9876007859611085, "grad_norm": 8.179930686950684, "learning_rate": 8.107331097268807e-05, "loss": 0.6283, "step": 14576 }, { "epoch": 0.9876685412290805, "grad_norm": 6.814093112945557, "learning_rate": 8.107194195359025e-05, "loss": 0.6905, "step": 14577 }, { "epoch": 0.9877362964970526, "grad_norm": 5.342179775238037, "learning_rate": 8.107057293449245e-05, "loss": 0.636, "step": 14578 }, { "epoch": 0.9878040517650247, "grad_norm": 4.811430931091309, "learning_rate": 8.106920391539463e-05, "loss": 0.7746, "step": 14579 }, { "epoch": 0.9878718070329968, "grad_norm": 10.804598808288574, "learning_rate": 8.10678348962968e-05, "loss": 0.7885, "step": 14580 }, { "epoch": 0.9879395623009689, "grad_norm": 5.6060709953308105, "learning_rate": 8.1066465877199e-05, "loss": 1.06, "step": 14581 }, { "epoch": 0.988007317568941, "grad_norm": 5.063103675842285, "learning_rate": 8.106509685810118e-05, "loss": 0.8395, "step": 14582 }, { "epoch": 0.9880750728369131, "grad_norm": 6.959988594055176, "learning_rate": 8.106372783900336e-05, "loss": 0.6454, "step": 14583 }, { "epoch": 0.9881428281048852, "grad_norm": 8.020045280456543, "learning_rate": 8.106235881990554e-05, "loss": 0.7632, "step": 14584 }, { "epoch": 0.9882105833728573, "grad_norm": 5.018332481384277, "learning_rate": 8.106098980080774e-05, "loss": 0.5049, "step": 14585 }, { "epoch": 0.9882783386408294, "grad_norm": 6.3518967628479, "learning_rate": 8.105962078170992e-05, "loss": 0.7407, "step": 14586 }, { "epoch": 0.9883460939088015, "grad_norm": 6.3573899269104, "learning_rate": 8.10582517626121e-05, "loss": 0.8245, "step": 14587 }, { "epoch": 0.9884138491767734, "grad_norm": 7.538825988769531, "learning_rate": 8.105688274351428e-05, "loss": 1.0365, "step": 14588 }, { "epoch": 0.9884816044447455, "grad_norm": 6.961862564086914, "learning_rate": 8.105551372441646e-05, "loss": 0.688, "step": 14589 }, { "epoch": 0.9885493597127176, "grad_norm": 4.587418556213379, "learning_rate": 8.105414470531865e-05, "loss": 0.565, "step": 14590 }, { "epoch": 0.9886171149806897, "grad_norm": 6.72902774810791, "learning_rate": 8.105277568622083e-05, "loss": 0.7931, "step": 14591 }, { "epoch": 0.9886848702486618, "grad_norm": 5.065114974975586, "learning_rate": 8.105140666712301e-05, "loss": 0.6943, "step": 14592 }, { "epoch": 0.9887526255166339, "grad_norm": 4.853360652923584, "learning_rate": 8.105003764802519e-05, "loss": 0.6457, "step": 14593 }, { "epoch": 0.988820380784606, "grad_norm": 8.033453941345215, "learning_rate": 8.104866862892739e-05, "loss": 0.7982, "step": 14594 }, { "epoch": 0.9888881360525781, "grad_norm": 5.311244487762451, "learning_rate": 8.104729960982957e-05, "loss": 0.8363, "step": 14595 }, { "epoch": 0.9889558913205502, "grad_norm": 6.700189590454102, "learning_rate": 8.104593059073175e-05, "loss": 0.9098, "step": 14596 }, { "epoch": 0.9890236465885223, "grad_norm": 6.449888706207275, "learning_rate": 8.104456157163393e-05, "loss": 0.7272, "step": 14597 }, { "epoch": 0.9890914018564944, "grad_norm": 6.6049909591674805, "learning_rate": 8.104319255253611e-05, "loss": 0.7782, "step": 14598 }, { "epoch": 0.9891591571244664, "grad_norm": 5.873762130737305, "learning_rate": 8.10418235334383e-05, "loss": 0.7901, "step": 14599 }, { "epoch": 0.9892269123924385, "grad_norm": 5.933529376983643, "learning_rate": 8.104045451434048e-05, "loss": 0.7743, "step": 14600 }, { "epoch": 0.9892946676604106, "grad_norm": 6.905478477478027, "learning_rate": 8.103908549524266e-05, "loss": 0.7662, "step": 14601 }, { "epoch": 0.9893624229283827, "grad_norm": 5.39354944229126, "learning_rate": 8.103771647614484e-05, "loss": 0.6157, "step": 14602 }, { "epoch": 0.9894301781963548, "grad_norm": 6.440433979034424, "learning_rate": 8.103634745704704e-05, "loss": 0.8263, "step": 14603 }, { "epoch": 0.9894979334643269, "grad_norm": 5.653606414794922, "learning_rate": 8.103497843794922e-05, "loss": 0.7081, "step": 14604 }, { "epoch": 0.9895656887322989, "grad_norm": 5.021646499633789, "learning_rate": 8.10336094188514e-05, "loss": 0.7988, "step": 14605 }, { "epoch": 0.989633444000271, "grad_norm": 7.331817150115967, "learning_rate": 8.103224039975358e-05, "loss": 0.9294, "step": 14606 }, { "epoch": 0.9897011992682431, "grad_norm": 6.1719584465026855, "learning_rate": 8.103087138065576e-05, "loss": 0.7923, "step": 14607 }, { "epoch": 0.9897689545362152, "grad_norm": 7.179246425628662, "learning_rate": 8.102950236155795e-05, "loss": 0.9297, "step": 14608 }, { "epoch": 0.9898367098041873, "grad_norm": 5.388153076171875, "learning_rate": 8.102813334246013e-05, "loss": 0.7016, "step": 14609 }, { "epoch": 0.9899044650721593, "grad_norm": 5.60443115234375, "learning_rate": 8.102676432336231e-05, "loss": 0.7853, "step": 14610 }, { "epoch": 0.9899722203401314, "grad_norm": 6.330133438110352, "learning_rate": 8.102539530426449e-05, "loss": 0.6971, "step": 14611 }, { "epoch": 0.9900399756081035, "grad_norm": 8.957503318786621, "learning_rate": 8.102402628516667e-05, "loss": 0.7834, "step": 14612 }, { "epoch": 0.9901077308760756, "grad_norm": 6.6696929931640625, "learning_rate": 8.102265726606887e-05, "loss": 0.8092, "step": 14613 }, { "epoch": 0.9901754861440477, "grad_norm": 6.236315727233887, "learning_rate": 8.102128824697105e-05, "loss": 0.8904, "step": 14614 }, { "epoch": 0.9902432414120198, "grad_norm": 4.87846040725708, "learning_rate": 8.101991922787323e-05, "loss": 0.6445, "step": 14615 }, { "epoch": 0.9903109966799919, "grad_norm": 6.903649806976318, "learning_rate": 8.101855020877541e-05, "loss": 0.6749, "step": 14616 }, { "epoch": 0.990378751947964, "grad_norm": 5.653696537017822, "learning_rate": 8.10171811896776e-05, "loss": 0.4365, "step": 14617 }, { "epoch": 0.9904465072159361, "grad_norm": 6.188183784484863, "learning_rate": 8.101581217057978e-05, "loss": 0.8174, "step": 14618 }, { "epoch": 0.9905142624839082, "grad_norm": 5.165365695953369, "learning_rate": 8.101444315148196e-05, "loss": 0.6121, "step": 14619 }, { "epoch": 0.9905820177518803, "grad_norm": 7.228616237640381, "learning_rate": 8.101307413238414e-05, "loss": 0.5768, "step": 14620 }, { "epoch": 0.9906497730198522, "grad_norm": 6.415277004241943, "learning_rate": 8.101170511328632e-05, "loss": 0.67, "step": 14621 }, { "epoch": 0.9907175282878243, "grad_norm": 6.051969528198242, "learning_rate": 8.101033609418852e-05, "loss": 0.8459, "step": 14622 }, { "epoch": 0.9907852835557964, "grad_norm": 5.808727741241455, "learning_rate": 8.10089670750907e-05, "loss": 0.738, "step": 14623 }, { "epoch": 0.9908530388237685, "grad_norm": 7.00636625289917, "learning_rate": 8.100759805599288e-05, "loss": 0.6573, "step": 14624 }, { "epoch": 0.9909207940917406, "grad_norm": 6.8472900390625, "learning_rate": 8.100622903689507e-05, "loss": 1.0119, "step": 14625 }, { "epoch": 0.9909885493597127, "grad_norm": 6.012302875518799, "learning_rate": 8.100486001779725e-05, "loss": 0.5468, "step": 14626 }, { "epoch": 0.9910563046276848, "grad_norm": 4.420849323272705, "learning_rate": 8.100349099869943e-05, "loss": 0.5134, "step": 14627 }, { "epoch": 0.9911240598956569, "grad_norm": 5.315083980560303, "learning_rate": 8.100212197960163e-05, "loss": 0.803, "step": 14628 }, { "epoch": 0.991191815163629, "grad_norm": 8.18484115600586, "learning_rate": 8.10007529605038e-05, "loss": 1.0685, "step": 14629 }, { "epoch": 0.9912595704316011, "grad_norm": 6.134372711181641, "learning_rate": 8.099938394140599e-05, "loss": 0.8893, "step": 14630 }, { "epoch": 0.9913273256995732, "grad_norm": 7.056315898895264, "learning_rate": 8.099801492230818e-05, "loss": 1.0268, "step": 14631 }, { "epoch": 0.9913950809675453, "grad_norm": 7.75009298324585, "learning_rate": 8.099664590321036e-05, "loss": 0.8632, "step": 14632 }, { "epoch": 0.9914628362355173, "grad_norm": 5.707569122314453, "learning_rate": 8.099527688411254e-05, "loss": 0.7005, "step": 14633 }, { "epoch": 0.9915305915034894, "grad_norm": 5.789709091186523, "learning_rate": 8.099390786501472e-05, "loss": 0.9823, "step": 14634 }, { "epoch": 0.9915983467714615, "grad_norm": 7.6483941078186035, "learning_rate": 8.099253884591692e-05, "loss": 0.7865, "step": 14635 }, { "epoch": 0.9916661020394336, "grad_norm": 6.485616683959961, "learning_rate": 8.09911698268191e-05, "loss": 0.7863, "step": 14636 }, { "epoch": 0.9917338573074056, "grad_norm": 4.72672176361084, "learning_rate": 8.098980080772128e-05, "loss": 0.6081, "step": 14637 }, { "epoch": 0.9918016125753777, "grad_norm": 5.638705253601074, "learning_rate": 8.098843178862346e-05, "loss": 0.773, "step": 14638 }, { "epoch": 0.9918693678433498, "grad_norm": 5.565254211425781, "learning_rate": 8.098706276952564e-05, "loss": 0.7205, "step": 14639 }, { "epoch": 0.9919371231113219, "grad_norm": 5.114184379577637, "learning_rate": 8.098569375042783e-05, "loss": 0.6835, "step": 14640 }, { "epoch": 0.992004878379294, "grad_norm": 6.048708438873291, "learning_rate": 8.098432473133001e-05, "loss": 0.8163, "step": 14641 }, { "epoch": 0.9920726336472661, "grad_norm": 7.4688215255737305, "learning_rate": 8.098295571223219e-05, "loss": 0.5373, "step": 14642 }, { "epoch": 0.9921403889152381, "grad_norm": 5.477198600769043, "learning_rate": 8.098158669313437e-05, "loss": 0.8911, "step": 14643 }, { "epoch": 0.9922081441832102, "grad_norm": 5.453397274017334, "learning_rate": 8.098021767403655e-05, "loss": 0.7153, "step": 14644 }, { "epoch": 0.9922758994511823, "grad_norm": 7.451064586639404, "learning_rate": 8.097884865493875e-05, "loss": 0.7384, "step": 14645 }, { "epoch": 0.9923436547191544, "grad_norm": 6.131191253662109, "learning_rate": 8.097747963584093e-05, "loss": 0.7987, "step": 14646 }, { "epoch": 0.9924114099871265, "grad_norm": 6.377108573913574, "learning_rate": 8.09761106167431e-05, "loss": 0.7305, "step": 14647 }, { "epoch": 0.9924791652550986, "grad_norm": 7.368172645568848, "learning_rate": 8.097474159764529e-05, "loss": 0.7361, "step": 14648 }, { "epoch": 0.9925469205230707, "grad_norm": 5.075170993804932, "learning_rate": 8.097337257854748e-05, "loss": 0.8124, "step": 14649 }, { "epoch": 0.9926146757910428, "grad_norm": 4.833024501800537, "learning_rate": 8.097200355944966e-05, "loss": 0.5609, "step": 14650 }, { "epoch": 0.9926824310590149, "grad_norm": 5.358253002166748, "learning_rate": 8.097063454035184e-05, "loss": 0.6095, "step": 14651 }, { "epoch": 0.992750186326987, "grad_norm": 5.228174209594727, "learning_rate": 8.096926552125402e-05, "loss": 0.8636, "step": 14652 }, { "epoch": 0.992817941594959, "grad_norm": 5.842618465423584, "learning_rate": 8.09678965021562e-05, "loss": 0.6013, "step": 14653 }, { "epoch": 0.992885696862931, "grad_norm": 6.454665184020996, "learning_rate": 8.09665274830584e-05, "loss": 0.7059, "step": 14654 }, { "epoch": 0.9929534521309031, "grad_norm": 4.836513042449951, "learning_rate": 8.096515846396058e-05, "loss": 0.7923, "step": 14655 }, { "epoch": 0.9930212073988752, "grad_norm": 5.523789405822754, "learning_rate": 8.096378944486276e-05, "loss": 0.733, "step": 14656 }, { "epoch": 0.9930889626668473, "grad_norm": 6.529998779296875, "learning_rate": 8.096242042576494e-05, "loss": 0.7117, "step": 14657 }, { "epoch": 0.9931567179348194, "grad_norm": 4.7176408767700195, "learning_rate": 8.096105140666713e-05, "loss": 0.603, "step": 14658 }, { "epoch": 0.9932244732027915, "grad_norm": 5.4773993492126465, "learning_rate": 8.095968238756931e-05, "loss": 0.6101, "step": 14659 }, { "epoch": 0.9932922284707636, "grad_norm": 5.661228179931641, "learning_rate": 8.095831336847149e-05, "loss": 0.7999, "step": 14660 }, { "epoch": 0.9933599837387357, "grad_norm": 6.280917644500732, "learning_rate": 8.095694434937367e-05, "loss": 0.6691, "step": 14661 }, { "epoch": 0.9934277390067078, "grad_norm": 7.072371959686279, "learning_rate": 8.095557533027585e-05, "loss": 0.8523, "step": 14662 }, { "epoch": 0.9934954942746799, "grad_norm": 5.228603363037109, "learning_rate": 8.095420631117805e-05, "loss": 0.6841, "step": 14663 }, { "epoch": 0.993563249542652, "grad_norm": 6.489750862121582, "learning_rate": 8.095283729208023e-05, "loss": 0.9927, "step": 14664 }, { "epoch": 0.993631004810624, "grad_norm": 6.142980575561523, "learning_rate": 8.09514682729824e-05, "loss": 0.62, "step": 14665 }, { "epoch": 0.9936987600785961, "grad_norm": 6.388856887817383, "learning_rate": 8.095009925388459e-05, "loss": 0.6953, "step": 14666 }, { "epoch": 0.9937665153465682, "grad_norm": 6.71579647064209, "learning_rate": 8.094873023478677e-05, "loss": 0.6856, "step": 14667 }, { "epoch": 0.9938342706145403, "grad_norm": 4.24629545211792, "learning_rate": 8.094736121568896e-05, "loss": 0.9145, "step": 14668 }, { "epoch": 0.9939020258825124, "grad_norm": 5.748496055603027, "learning_rate": 8.094599219659114e-05, "loss": 0.8127, "step": 14669 }, { "epoch": 0.9939697811504844, "grad_norm": 6.51283597946167, "learning_rate": 8.094462317749332e-05, "loss": 0.9989, "step": 14670 }, { "epoch": 0.9940375364184565, "grad_norm": 5.742320537567139, "learning_rate": 8.094325415839552e-05, "loss": 0.7456, "step": 14671 }, { "epoch": 0.9941052916864286, "grad_norm": 5.277899265289307, "learning_rate": 8.09418851392977e-05, "loss": 0.7498, "step": 14672 }, { "epoch": 0.9941730469544007, "grad_norm": 5.872328281402588, "learning_rate": 8.094051612019988e-05, "loss": 0.7343, "step": 14673 }, { "epoch": 0.9942408022223728, "grad_norm": 5.946358680725098, "learning_rate": 8.093914710110207e-05, "loss": 0.7802, "step": 14674 }, { "epoch": 0.9943085574903449, "grad_norm": 6.713497161865234, "learning_rate": 8.093777808200425e-05, "loss": 0.9639, "step": 14675 }, { "epoch": 0.994376312758317, "grad_norm": 5.236899375915527, "learning_rate": 8.093640906290643e-05, "loss": 0.7111, "step": 14676 }, { "epoch": 0.994444068026289, "grad_norm": 6.121020317077637, "learning_rate": 8.093504004380862e-05, "loss": 0.8627, "step": 14677 }, { "epoch": 0.9945118232942611, "grad_norm": 6.684620380401611, "learning_rate": 8.09336710247108e-05, "loss": 0.8863, "step": 14678 }, { "epoch": 0.9945795785622332, "grad_norm": 5.249447345733643, "learning_rate": 8.093230200561299e-05, "loss": 0.6532, "step": 14679 }, { "epoch": 0.9946473338302053, "grad_norm": 5.633591651916504, "learning_rate": 8.093093298651517e-05, "loss": 0.8192, "step": 14680 }, { "epoch": 0.9947150890981774, "grad_norm": 5.300892353057861, "learning_rate": 8.092956396741736e-05, "loss": 0.6628, "step": 14681 }, { "epoch": 0.9947828443661495, "grad_norm": 6.91768741607666, "learning_rate": 8.092819494831954e-05, "loss": 0.9256, "step": 14682 }, { "epoch": 0.9948505996341216, "grad_norm": 6.193354606628418, "learning_rate": 8.092682592922172e-05, "loss": 0.8907, "step": 14683 }, { "epoch": 0.9949183549020937, "grad_norm": 5.23224401473999, "learning_rate": 8.09254569101239e-05, "loss": 0.6606, "step": 14684 }, { "epoch": 0.9949861101700658, "grad_norm": 5.615985870361328, "learning_rate": 8.092408789102608e-05, "loss": 0.7438, "step": 14685 }, { "epoch": 0.9950538654380378, "grad_norm": 4.997095584869385, "learning_rate": 8.092271887192828e-05, "loss": 0.6359, "step": 14686 }, { "epoch": 0.9951216207060098, "grad_norm": 4.995532989501953, "learning_rate": 8.092134985283046e-05, "loss": 0.7636, "step": 14687 }, { "epoch": 0.9951893759739819, "grad_norm": 5.627157688140869, "learning_rate": 8.091998083373264e-05, "loss": 0.7524, "step": 14688 }, { "epoch": 0.995257131241954, "grad_norm": 6.29923152923584, "learning_rate": 8.091861181463482e-05, "loss": 0.8923, "step": 14689 }, { "epoch": 0.9953248865099261, "grad_norm": 5.104131698608398, "learning_rate": 8.091724279553701e-05, "loss": 0.6621, "step": 14690 }, { "epoch": 0.9953926417778982, "grad_norm": 6.44821834564209, "learning_rate": 8.091587377643919e-05, "loss": 1.0041, "step": 14691 }, { "epoch": 0.9954603970458703, "grad_norm": 4.547702789306641, "learning_rate": 8.091450475734137e-05, "loss": 0.7067, "step": 14692 }, { "epoch": 0.9955281523138424, "grad_norm": 5.443078517913818, "learning_rate": 8.091313573824355e-05, "loss": 0.7664, "step": 14693 }, { "epoch": 0.9955959075818145, "grad_norm": 5.759512901306152, "learning_rate": 8.091176671914573e-05, "loss": 0.7241, "step": 14694 }, { "epoch": 0.9956636628497866, "grad_norm": 4.9956746101379395, "learning_rate": 8.091039770004793e-05, "loss": 0.6777, "step": 14695 }, { "epoch": 0.9957314181177587, "grad_norm": 6.713644027709961, "learning_rate": 8.09090286809501e-05, "loss": 0.7727, "step": 14696 }, { "epoch": 0.9957991733857308, "grad_norm": 5.988772869110107, "learning_rate": 8.090765966185229e-05, "loss": 0.6803, "step": 14697 }, { "epoch": 0.9958669286537029, "grad_norm": 5.6772966384887695, "learning_rate": 8.090629064275447e-05, "loss": 0.733, "step": 14698 }, { "epoch": 0.995934683921675, "grad_norm": 5.93316125869751, "learning_rate": 8.090492162365665e-05, "loss": 0.7119, "step": 14699 }, { "epoch": 0.996002439189647, "grad_norm": 5.35646915435791, "learning_rate": 8.090355260455884e-05, "loss": 0.9498, "step": 14700 }, { "epoch": 0.9960701944576191, "grad_norm": 6.932188510894775, "learning_rate": 8.090218358546102e-05, "loss": 0.7779, "step": 14701 }, { "epoch": 0.9961379497255911, "grad_norm": 4.6098856925964355, "learning_rate": 8.09008145663632e-05, "loss": 0.7614, "step": 14702 }, { "epoch": 0.9962057049935632, "grad_norm": 6.402628421783447, "learning_rate": 8.089944554726538e-05, "loss": 0.7212, "step": 14703 }, { "epoch": 0.9962734602615353, "grad_norm": 5.7452473640441895, "learning_rate": 8.089807652816758e-05, "loss": 0.9323, "step": 14704 }, { "epoch": 0.9963412155295074, "grad_norm": 8.710042953491211, "learning_rate": 8.089670750906976e-05, "loss": 0.7616, "step": 14705 }, { "epoch": 0.9964089707974795, "grad_norm": 6.53500509262085, "learning_rate": 8.089533848997194e-05, "loss": 0.8054, "step": 14706 }, { "epoch": 0.9964767260654516, "grad_norm": 6.090463161468506, "learning_rate": 8.089396947087412e-05, "loss": 0.8993, "step": 14707 }, { "epoch": 0.9965444813334237, "grad_norm": 4.988658428192139, "learning_rate": 8.08926004517763e-05, "loss": 0.6045, "step": 14708 }, { "epoch": 0.9966122366013958, "grad_norm": 5.254270553588867, "learning_rate": 8.089123143267849e-05, "loss": 0.7135, "step": 14709 }, { "epoch": 0.9966799918693678, "grad_norm": 6.4775848388671875, "learning_rate": 8.088986241358067e-05, "loss": 0.8647, "step": 14710 }, { "epoch": 0.9967477471373399, "grad_norm": 8.165122985839844, "learning_rate": 8.088849339448285e-05, "loss": 0.5061, "step": 14711 }, { "epoch": 0.996815502405312, "grad_norm": 6.5195159912109375, "learning_rate": 8.088712437538503e-05, "loss": 1.0305, "step": 14712 }, { "epoch": 0.9968832576732841, "grad_norm": 4.683186054229736, "learning_rate": 8.088575535628723e-05, "loss": 0.696, "step": 14713 }, { "epoch": 0.9969510129412562, "grad_norm": 8.721222877502441, "learning_rate": 8.08843863371894e-05, "loss": 0.7871, "step": 14714 }, { "epoch": 0.9970187682092283, "grad_norm": 4.208126544952393, "learning_rate": 8.088301731809159e-05, "loss": 0.4982, "step": 14715 }, { "epoch": 0.9970865234772004, "grad_norm": 8.587434768676758, "learning_rate": 8.088164829899377e-05, "loss": 0.7851, "step": 14716 }, { "epoch": 0.9971542787451725, "grad_norm": 6.511172294616699, "learning_rate": 8.088027927989596e-05, "loss": 0.6705, "step": 14717 }, { "epoch": 0.9972220340131446, "grad_norm": 9.866682052612305, "learning_rate": 8.087891026079814e-05, "loss": 0.7538, "step": 14718 }, { "epoch": 0.9972897892811166, "grad_norm": 5.130730628967285, "learning_rate": 8.087754124170032e-05, "loss": 0.5205, "step": 14719 }, { "epoch": 0.9973575445490886, "grad_norm": 6.190724849700928, "learning_rate": 8.087617222260252e-05, "loss": 0.8204, "step": 14720 }, { "epoch": 0.9974252998170607, "grad_norm": 6.667228698730469, "learning_rate": 8.08748032035047e-05, "loss": 0.8681, "step": 14721 }, { "epoch": 0.9974930550850328, "grad_norm": 5.230597019195557, "learning_rate": 8.087343418440688e-05, "loss": 0.6311, "step": 14722 }, { "epoch": 0.9975608103530049, "grad_norm": 4.286562919616699, "learning_rate": 8.087206516530907e-05, "loss": 0.5128, "step": 14723 }, { "epoch": 0.997628565620977, "grad_norm": 8.196592330932617, "learning_rate": 8.087069614621125e-05, "loss": 1.0953, "step": 14724 }, { "epoch": 0.9976963208889491, "grad_norm": 5.942284107208252, "learning_rate": 8.086932712711343e-05, "loss": 0.8896, "step": 14725 }, { "epoch": 0.9977640761569212, "grad_norm": 7.276721477508545, "learning_rate": 8.086795810801561e-05, "loss": 0.8503, "step": 14726 }, { "epoch": 0.9978318314248933, "grad_norm": 6.109722137451172, "learning_rate": 8.08665890889178e-05, "loss": 0.563, "step": 14727 }, { "epoch": 0.9978995866928654, "grad_norm": 7.156121253967285, "learning_rate": 8.086522006981998e-05, "loss": 0.6586, "step": 14728 }, { "epoch": 0.9979673419608375, "grad_norm": 5.004053592681885, "learning_rate": 8.086385105072217e-05, "loss": 0.7652, "step": 14729 }, { "epoch": 0.9980350972288096, "grad_norm": 6.767992973327637, "learning_rate": 8.086248203162435e-05, "loss": 0.9194, "step": 14730 }, { "epoch": 0.9981028524967817, "grad_norm": 4.350346565246582, "learning_rate": 8.086111301252653e-05, "loss": 0.5671, "step": 14731 }, { "epoch": 0.9981706077647537, "grad_norm": 4.5186052322387695, "learning_rate": 8.085974399342872e-05, "loss": 0.7229, "step": 14732 }, { "epoch": 0.9982383630327258, "grad_norm": 4.803741931915283, "learning_rate": 8.08583749743309e-05, "loss": 0.5626, "step": 14733 }, { "epoch": 0.9983061183006979, "grad_norm": 5.849275588989258, "learning_rate": 8.085700595523308e-05, "loss": 0.7053, "step": 14734 }, { "epoch": 0.9983738735686699, "grad_norm": 6.240585803985596, "learning_rate": 8.085563693613526e-05, "loss": 1.0339, "step": 14735 }, { "epoch": 0.998441628836642, "grad_norm": 5.102994441986084, "learning_rate": 8.085426791703745e-05, "loss": 0.6594, "step": 14736 }, { "epoch": 0.9985093841046141, "grad_norm": 7.48859977722168, "learning_rate": 8.085289889793964e-05, "loss": 0.7227, "step": 14737 }, { "epoch": 0.9985771393725862, "grad_norm": 4.829802513122559, "learning_rate": 8.085152987884182e-05, "loss": 0.6739, "step": 14738 }, { "epoch": 0.9986448946405583, "grad_norm": 5.353672504425049, "learning_rate": 8.0850160859744e-05, "loss": 0.8112, "step": 14739 }, { "epoch": 0.9987126499085304, "grad_norm": 5.165256023406982, "learning_rate": 8.084879184064618e-05, "loss": 0.7894, "step": 14740 }, { "epoch": 0.9987804051765025, "grad_norm": 6.365817070007324, "learning_rate": 8.084742282154837e-05, "loss": 0.6763, "step": 14741 }, { "epoch": 0.9988481604444746, "grad_norm": 5.996483325958252, "learning_rate": 8.084605380245055e-05, "loss": 0.6717, "step": 14742 }, { "epoch": 0.9989159157124466, "grad_norm": 5.232832431793213, "learning_rate": 8.084468478335273e-05, "loss": 0.779, "step": 14743 }, { "epoch": 0.9989836709804187, "grad_norm": 7.0810089111328125, "learning_rate": 8.084331576425491e-05, "loss": 1.0872, "step": 14744 }, { "epoch": 0.9990514262483908, "grad_norm": 6.616766452789307, "learning_rate": 8.084194674515709e-05, "loss": 0.8411, "step": 14745 }, { "epoch": 0.9991191815163629, "grad_norm": 6.06799840927124, "learning_rate": 8.084057772605929e-05, "loss": 0.6083, "step": 14746 }, { "epoch": 0.999186936784335, "grad_norm": 6.634158134460449, "learning_rate": 8.083920870696147e-05, "loss": 0.627, "step": 14747 }, { "epoch": 0.9992546920523071, "grad_norm": 6.214893817901611, "learning_rate": 8.083783968786365e-05, "loss": 1.025, "step": 14748 }, { "epoch": 0.9993224473202792, "grad_norm": 7.438349723815918, "learning_rate": 8.083647066876583e-05, "loss": 0.8148, "step": 14749 }, { "epoch": 0.9993902025882513, "grad_norm": 5.228566646575928, "learning_rate": 8.083510164966802e-05, "loss": 0.741, "step": 14750 }, { "epoch": 0.9994579578562233, "grad_norm": 6.1171064376831055, "learning_rate": 8.08337326305702e-05, "loss": 0.6582, "step": 14751 }, { "epoch": 0.9995257131241954, "grad_norm": 5.6771368980407715, "learning_rate": 8.083236361147238e-05, "loss": 0.7143, "step": 14752 }, { "epoch": 0.9995934683921675, "grad_norm": 6.686731815338135, "learning_rate": 8.083099459237456e-05, "loss": 0.9172, "step": 14753 }, { "epoch": 0.9996612236601395, "grad_norm": 5.874754905700684, "learning_rate": 8.082962557327674e-05, "loss": 0.8786, "step": 14754 }, { "epoch": 0.9997289789281116, "grad_norm": 7.658037185668945, "learning_rate": 8.082825655417894e-05, "loss": 0.4877, "step": 14755 }, { "epoch": 0.9997967341960837, "grad_norm": 7.869183540344238, "learning_rate": 8.082688753508112e-05, "loss": 0.811, "step": 14756 }, { "epoch": 0.9997967341960837, "eval_loss": 0.7405052185058594, "eval_noise_accuracy": 0.0, "eval_runtime": 1547.7438, "eval_samples_per_second": 3.32, "eval_steps_per_second": 0.208, "eval_wer": 78.51462535428304, "step": 14756 }, { "epoch": 0.9998644894640558, "grad_norm": 6.051870822906494, "learning_rate": 8.08255185159833e-05, "loss": 0.9022, "step": 14757 }, { "epoch": 0.9999322447320279, "grad_norm": 5.921606063842773, "learning_rate": 8.082414949688548e-05, "loss": 0.8192, "step": 14758 }, { "epoch": 1.0, "grad_norm": 8.148659706115723, "learning_rate": 8.082278047778767e-05, "loss": 0.7852, "step": 14759 }, { "epoch": 1.000067755267972, "grad_norm": 5.229124069213867, "learning_rate": 8.082141145868985e-05, "loss": 0.8277, "step": 14760 }, { "epoch": 1.0001355105359442, "grad_norm": 5.082587242126465, "learning_rate": 8.082004243959203e-05, "loss": 0.7213, "step": 14761 }, { "epoch": 1.0002032658039162, "grad_norm": 6.1493024826049805, "learning_rate": 8.081867342049421e-05, "loss": 0.6404, "step": 14762 }, { "epoch": 1.0002710210718884, "grad_norm": 6.247502326965332, "learning_rate": 8.08173044013964e-05, "loss": 1.1216, "step": 14763 }, { "epoch": 1.0003387763398603, "grad_norm": 6.559756755828857, "learning_rate": 8.081593538229859e-05, "loss": 0.5577, "step": 14764 }, { "epoch": 1.0004065316078325, "grad_norm": 4.563822269439697, "learning_rate": 8.081456636320077e-05, "loss": 0.6457, "step": 14765 }, { "epoch": 1.0004742868758045, "grad_norm": 4.815116882324219, "learning_rate": 8.081319734410296e-05, "loss": 0.8247, "step": 14766 }, { "epoch": 1.0005420421437767, "grad_norm": 4.924731731414795, "learning_rate": 8.081182832500514e-05, "loss": 0.8842, "step": 14767 }, { "epoch": 1.0006097974117487, "grad_norm": 6.610191822052002, "learning_rate": 8.081045930590732e-05, "loss": 0.7282, "step": 14768 }, { "epoch": 1.000677552679721, "grad_norm": 6.175731658935547, "learning_rate": 8.080909028680951e-05, "loss": 0.7621, "step": 14769 }, { "epoch": 1.000745307947693, "grad_norm": 5.9794111251831055, "learning_rate": 8.08077212677117e-05, "loss": 0.7258, "step": 14770 }, { "epoch": 1.000813063215665, "grad_norm": 13.052425384521484, "learning_rate": 8.080635224861388e-05, "loss": 0.8328, "step": 14771 }, { "epoch": 1.000880818483637, "grad_norm": 6.011143684387207, "learning_rate": 8.080498322951606e-05, "loss": 0.62, "step": 14772 }, { "epoch": 1.0009485737516093, "grad_norm": 7.789141654968262, "learning_rate": 8.080361421041825e-05, "loss": 0.7544, "step": 14773 }, { "epoch": 1.0010163290195813, "grad_norm": 5.623871326446533, "learning_rate": 8.080224519132043e-05, "loss": 0.5816, "step": 14774 }, { "epoch": 1.0010840842875535, "grad_norm": 6.538607597351074, "learning_rate": 8.080087617222261e-05, "loss": 0.8612, "step": 14775 }, { "epoch": 1.0011518395555254, "grad_norm": 5.990743160247803, "learning_rate": 8.079950715312479e-05, "loss": 0.7108, "step": 14776 }, { "epoch": 1.0012195948234974, "grad_norm": 5.186371326446533, "learning_rate": 8.079813813402697e-05, "loss": 0.5226, "step": 14777 }, { "epoch": 1.0012873500914696, "grad_norm": 8.262914657592773, "learning_rate": 8.079676911492916e-05, "loss": 0.5101, "step": 14778 }, { "epoch": 1.0013551053594416, "grad_norm": 7.4032769203186035, "learning_rate": 8.079540009583134e-05, "loss": 0.579, "step": 14779 }, { "epoch": 1.0014228606274138, "grad_norm": 5.568051815032959, "learning_rate": 8.079403107673353e-05, "loss": 0.7007, "step": 14780 }, { "epoch": 1.0014906158953858, "grad_norm": 4.613144397735596, "learning_rate": 8.07926620576357e-05, "loss": 0.6206, "step": 14781 }, { "epoch": 1.001558371163358, "grad_norm": 5.073683261871338, "learning_rate": 8.07912930385379e-05, "loss": 0.4866, "step": 14782 }, { "epoch": 1.00162612643133, "grad_norm": 4.251949310302734, "learning_rate": 8.078992401944008e-05, "loss": 0.5746, "step": 14783 }, { "epoch": 1.0016938816993022, "grad_norm": 6.774303436279297, "learning_rate": 8.078855500034226e-05, "loss": 0.81, "step": 14784 }, { "epoch": 1.0017616369672742, "grad_norm": 7.1045918464660645, "learning_rate": 8.078718598124444e-05, "loss": 0.7654, "step": 14785 }, { "epoch": 1.0018293922352464, "grad_norm": 4.904873371124268, "learning_rate": 8.078581696214662e-05, "loss": 0.8256, "step": 14786 }, { "epoch": 1.0018971475032183, "grad_norm": 6.918723106384277, "learning_rate": 8.078444794304881e-05, "loss": 0.7021, "step": 14787 }, { "epoch": 1.0019649027711905, "grad_norm": 6.581482887268066, "learning_rate": 8.0783078923951e-05, "loss": 0.6402, "step": 14788 }, { "epoch": 1.0020326580391625, "grad_norm": 5.610997200012207, "learning_rate": 8.078170990485318e-05, "loss": 0.7777, "step": 14789 }, { "epoch": 1.0021004133071347, "grad_norm": 6.357514381408691, "learning_rate": 8.078034088575536e-05, "loss": 0.7679, "step": 14790 }, { "epoch": 1.0021681685751067, "grad_norm": 6.965141296386719, "learning_rate": 8.077897186665755e-05, "loss": 0.4878, "step": 14791 }, { "epoch": 1.0022359238430787, "grad_norm": 6.122716426849365, "learning_rate": 8.077760284755973e-05, "loss": 0.6921, "step": 14792 }, { "epoch": 1.002303679111051, "grad_norm": 4.804755210876465, "learning_rate": 8.077623382846191e-05, "loss": 0.6617, "step": 14793 }, { "epoch": 1.0023714343790229, "grad_norm": 5.5113844871521, "learning_rate": 8.077486480936409e-05, "loss": 0.7798, "step": 14794 }, { "epoch": 1.002439189646995, "grad_norm": 6.461663246154785, "learning_rate": 8.077349579026627e-05, "loss": 0.7959, "step": 14795 }, { "epoch": 1.002506944914967, "grad_norm": 6.894604206085205, "learning_rate": 8.077212677116846e-05, "loss": 0.7367, "step": 14796 }, { "epoch": 1.0025747001829393, "grad_norm": 5.337347984313965, "learning_rate": 8.077075775207065e-05, "loss": 0.5788, "step": 14797 }, { "epoch": 1.0026424554509112, "grad_norm": 6.755460262298584, "learning_rate": 8.076938873297283e-05, "loss": 0.8379, "step": 14798 }, { "epoch": 1.0027102107188834, "grad_norm": 5.582238674163818, "learning_rate": 8.0768019713875e-05, "loss": 0.6805, "step": 14799 }, { "epoch": 1.0027779659868554, "grad_norm": 5.460515975952148, "learning_rate": 8.076665069477719e-05, "loss": 0.7466, "step": 14800 }, { "epoch": 1.0028457212548276, "grad_norm": 7.810371398925781, "learning_rate": 8.076528167567938e-05, "loss": 0.743, "step": 14801 }, { "epoch": 1.0029134765227996, "grad_norm": 5.568098545074463, "learning_rate": 8.076391265658156e-05, "loss": 0.7604, "step": 14802 }, { "epoch": 1.0029812317907718, "grad_norm": 4.993529319763184, "learning_rate": 8.076254363748374e-05, "loss": 0.5721, "step": 14803 }, { "epoch": 1.0030489870587438, "grad_norm": 4.659602642059326, "learning_rate": 8.076117461838592e-05, "loss": 0.5619, "step": 14804 }, { "epoch": 1.003116742326716, "grad_norm": 4.9584221839904785, "learning_rate": 8.075980559928812e-05, "loss": 0.6407, "step": 14805 }, { "epoch": 1.003184497594688, "grad_norm": 6.771777629852295, "learning_rate": 8.07584365801903e-05, "loss": 0.6165, "step": 14806 }, { "epoch": 1.0032522528626602, "grad_norm": 6.287032604217529, "learning_rate": 8.075706756109248e-05, "loss": 0.8346, "step": 14807 }, { "epoch": 1.0033200081306322, "grad_norm": 5.438133239746094, "learning_rate": 8.075569854199466e-05, "loss": 0.6621, "step": 14808 }, { "epoch": 1.0033877633986041, "grad_norm": 5.8191633224487305, "learning_rate": 8.075432952289685e-05, "loss": 0.6837, "step": 14809 }, { "epoch": 1.0034555186665763, "grad_norm": 5.39967679977417, "learning_rate": 8.075296050379903e-05, "loss": 0.6687, "step": 14810 }, { "epoch": 1.0035232739345483, "grad_norm": 6.197573184967041, "learning_rate": 8.075159148470121e-05, "loss": 0.6243, "step": 14811 }, { "epoch": 1.0035910292025205, "grad_norm": 5.200948715209961, "learning_rate": 8.07502224656034e-05, "loss": 0.6812, "step": 14812 }, { "epoch": 1.0036587844704925, "grad_norm": 4.868316650390625, "learning_rate": 8.074885344650558e-05, "loss": 0.6131, "step": 14813 }, { "epoch": 1.0037265397384647, "grad_norm": 4.966255187988281, "learning_rate": 8.074748442740777e-05, "loss": 0.7787, "step": 14814 }, { "epoch": 1.0037942950064367, "grad_norm": 6.747678756713867, "learning_rate": 8.074611540830996e-05, "loss": 0.6185, "step": 14815 }, { "epoch": 1.003862050274409, "grad_norm": 6.168325424194336, "learning_rate": 8.074474638921214e-05, "loss": 0.7458, "step": 14816 }, { "epoch": 1.0039298055423809, "grad_norm": 4.884849548339844, "learning_rate": 8.074337737011432e-05, "loss": 0.604, "step": 14817 }, { "epoch": 1.003997560810353, "grad_norm": 4.729106903076172, "learning_rate": 8.07420083510165e-05, "loss": 0.6351, "step": 14818 }, { "epoch": 1.004065316078325, "grad_norm": 6.679864406585693, "learning_rate": 8.07406393319187e-05, "loss": 0.8324, "step": 14819 }, { "epoch": 1.0041330713462973, "grad_norm": 5.62283182144165, "learning_rate": 8.073927031282087e-05, "loss": 0.8227, "step": 14820 }, { "epoch": 1.0042008266142692, "grad_norm": 8.426139831542969, "learning_rate": 8.073790129372305e-05, "loss": 0.6578, "step": 14821 }, { "epoch": 1.0042685818822414, "grad_norm": 6.8322038650512695, "learning_rate": 8.073653227462524e-05, "loss": 0.7437, "step": 14822 }, { "epoch": 1.0043363371502134, "grad_norm": 8.170849800109863, "learning_rate": 8.073516325552743e-05, "loss": 0.7791, "step": 14823 }, { "epoch": 1.0044040924181856, "grad_norm": 6.2695441246032715, "learning_rate": 8.073379423642961e-05, "loss": 0.7124, "step": 14824 }, { "epoch": 1.0044718476861576, "grad_norm": 6.771942615509033, "learning_rate": 8.073242521733179e-05, "loss": 0.7781, "step": 14825 }, { "epoch": 1.0045396029541296, "grad_norm": 5.956495761871338, "learning_rate": 8.073105619823397e-05, "loss": 0.6476, "step": 14826 }, { "epoch": 1.0046073582221018, "grad_norm": 7.166341304779053, "learning_rate": 8.072968717913615e-05, "loss": 0.8113, "step": 14827 }, { "epoch": 1.0046751134900738, "grad_norm": 5.417853832244873, "learning_rate": 8.072831816003834e-05, "loss": 0.5846, "step": 14828 }, { "epoch": 1.004742868758046, "grad_norm": 5.144546031951904, "learning_rate": 8.072694914094052e-05, "loss": 0.693, "step": 14829 }, { "epoch": 1.004810624026018, "grad_norm": 4.3079094886779785, "learning_rate": 8.07255801218427e-05, "loss": 0.7312, "step": 14830 }, { "epoch": 1.0048783792939902, "grad_norm": 5.390209674835205, "learning_rate": 8.072421110274489e-05, "loss": 0.6347, "step": 14831 }, { "epoch": 1.0049461345619621, "grad_norm": 5.938960552215576, "learning_rate": 8.072284208364707e-05, "loss": 0.76, "step": 14832 }, { "epoch": 1.0050138898299343, "grad_norm": 5.8623247146606445, "learning_rate": 8.072147306454926e-05, "loss": 0.8752, "step": 14833 }, { "epoch": 1.0050816450979063, "grad_norm": 6.3055219650268555, "learning_rate": 8.072010404545144e-05, "loss": 0.783, "step": 14834 }, { "epoch": 1.0051494003658785, "grad_norm": 6.132151126861572, "learning_rate": 8.071873502635362e-05, "loss": 0.6859, "step": 14835 }, { "epoch": 1.0052171556338505, "grad_norm": 4.891525745391846, "learning_rate": 8.07173660072558e-05, "loss": 0.5293, "step": 14836 }, { "epoch": 1.0052849109018227, "grad_norm": 6.941277980804443, "learning_rate": 8.0715996988158e-05, "loss": 0.5511, "step": 14837 }, { "epoch": 1.0053526661697947, "grad_norm": 7.667896270751953, "learning_rate": 8.071462796906017e-05, "loss": 0.6837, "step": 14838 }, { "epoch": 1.0054204214377669, "grad_norm": 4.408740043640137, "learning_rate": 8.071325894996236e-05, "loss": 0.5733, "step": 14839 }, { "epoch": 1.0054881767057389, "grad_norm": 7.213569164276123, "learning_rate": 8.071188993086454e-05, "loss": 0.6838, "step": 14840 }, { "epoch": 1.0055559319737108, "grad_norm": 6.651516914367676, "learning_rate": 8.071052091176672e-05, "loss": 0.8254, "step": 14841 }, { "epoch": 1.005623687241683, "grad_norm": 5.002716541290283, "learning_rate": 8.070915189266891e-05, "loss": 0.5535, "step": 14842 }, { "epoch": 1.005691442509655, "grad_norm": 7.2078728675842285, "learning_rate": 8.070778287357109e-05, "loss": 0.7439, "step": 14843 }, { "epoch": 1.0057591977776272, "grad_norm": 4.879020690917969, "learning_rate": 8.070641385447327e-05, "loss": 0.7649, "step": 14844 }, { "epoch": 1.0058269530455992, "grad_norm": 7.177512168884277, "learning_rate": 8.070504483537545e-05, "loss": 0.599, "step": 14845 }, { "epoch": 1.0058947083135714, "grad_norm": 7.1817708015441895, "learning_rate": 8.070367581627764e-05, "loss": 0.6034, "step": 14846 }, { "epoch": 1.0059624635815434, "grad_norm": 11.819538116455078, "learning_rate": 8.070230679717982e-05, "loss": 0.6347, "step": 14847 }, { "epoch": 1.0060302188495156, "grad_norm": 5.775310516357422, "learning_rate": 8.0700937778082e-05, "loss": 0.577, "step": 14848 }, { "epoch": 1.0060979741174876, "grad_norm": 6.324820518493652, "learning_rate": 8.069956875898419e-05, "loss": 0.7638, "step": 14849 }, { "epoch": 1.0061657293854598, "grad_norm": 5.748277187347412, "learning_rate": 8.069819973988637e-05, "loss": 0.6587, "step": 14850 }, { "epoch": 1.0062334846534318, "grad_norm": 5.290812015533447, "learning_rate": 8.069683072078856e-05, "loss": 0.5606, "step": 14851 }, { "epoch": 1.006301239921404, "grad_norm": 5.5858941078186035, "learning_rate": 8.069546170169074e-05, "loss": 0.4992, "step": 14852 }, { "epoch": 1.006368995189376, "grad_norm": 5.581445693969727, "learning_rate": 8.069409268259292e-05, "loss": 0.5722, "step": 14853 }, { "epoch": 1.0064367504573482, "grad_norm": 6.363956451416016, "learning_rate": 8.06927236634951e-05, "loss": 0.5024, "step": 14854 }, { "epoch": 1.0065045057253201, "grad_norm": 6.256494998931885, "learning_rate": 8.069135464439728e-05, "loss": 0.4683, "step": 14855 }, { "epoch": 1.0065722609932923, "grad_norm": 5.651278972625732, "learning_rate": 8.068998562529948e-05, "loss": 0.6379, "step": 14856 }, { "epoch": 1.0066400162612643, "grad_norm": 7.8025712966918945, "learning_rate": 8.068861660620166e-05, "loss": 0.7948, "step": 14857 }, { "epoch": 1.0067077715292363, "grad_norm": 6.639724254608154, "learning_rate": 8.068724758710384e-05, "loss": 0.8023, "step": 14858 }, { "epoch": 1.0067755267972085, "grad_norm": 3.8018884658813477, "learning_rate": 8.068587856800603e-05, "loss": 0.5517, "step": 14859 }, { "epoch": 1.0068432820651805, "grad_norm": 5.326318740844727, "learning_rate": 8.068450954890821e-05, "loss": 0.611, "step": 14860 }, { "epoch": 1.0069110373331527, "grad_norm": 6.377422332763672, "learning_rate": 8.068314052981039e-05, "loss": 0.6394, "step": 14861 }, { "epoch": 1.0069787926011247, "grad_norm": 5.33223295211792, "learning_rate": 8.068177151071258e-05, "loss": 0.6427, "step": 14862 }, { "epoch": 1.0070465478690969, "grad_norm": 6.365980625152588, "learning_rate": 8.068040249161476e-05, "loss": 0.7221, "step": 14863 }, { "epoch": 1.0071143031370688, "grad_norm": 6.96981143951416, "learning_rate": 8.067903347251694e-05, "loss": 0.9021, "step": 14864 }, { "epoch": 1.007182058405041, "grad_norm": 8.809569358825684, "learning_rate": 8.067766445341914e-05, "loss": 0.8451, "step": 14865 }, { "epoch": 1.007249813673013, "grad_norm": 6.544775485992432, "learning_rate": 8.067629543432132e-05, "loss": 0.6729, "step": 14866 }, { "epoch": 1.0073175689409852, "grad_norm": 4.71762228012085, "learning_rate": 8.06749264152235e-05, "loss": 0.6192, "step": 14867 }, { "epoch": 1.0073853242089572, "grad_norm": 5.027548313140869, "learning_rate": 8.067355739612568e-05, "loss": 0.6514, "step": 14868 }, { "epoch": 1.0074530794769294, "grad_norm": 6.541376113891602, "learning_rate": 8.067218837702787e-05, "loss": 0.7091, "step": 14869 }, { "epoch": 1.0075208347449014, "grad_norm": 4.369992733001709, "learning_rate": 8.067081935793005e-05, "loss": 0.5207, "step": 14870 }, { "epoch": 1.0075885900128736, "grad_norm": 6.614040851593018, "learning_rate": 8.066945033883223e-05, "loss": 0.4269, "step": 14871 }, { "epoch": 1.0076563452808456, "grad_norm": 5.504267692565918, "learning_rate": 8.066808131973441e-05, "loss": 0.867, "step": 14872 }, { "epoch": 1.0077241005488178, "grad_norm": 5.660549640655518, "learning_rate": 8.06667123006366e-05, "loss": 0.7685, "step": 14873 }, { "epoch": 1.0077918558167898, "grad_norm": 6.468850612640381, "learning_rate": 8.066534328153879e-05, "loss": 0.7089, "step": 14874 }, { "epoch": 1.0078596110847617, "grad_norm": 5.692544460296631, "learning_rate": 8.066397426244097e-05, "loss": 0.6021, "step": 14875 }, { "epoch": 1.007927366352734, "grad_norm": 9.963805198669434, "learning_rate": 8.066260524334315e-05, "loss": 0.5066, "step": 14876 }, { "epoch": 1.007995121620706, "grad_norm": 6.6660308837890625, "learning_rate": 8.066123622424533e-05, "loss": 0.5434, "step": 14877 }, { "epoch": 1.0080628768886781, "grad_norm": 5.235628604888916, "learning_rate": 8.065986720514751e-05, "loss": 0.7677, "step": 14878 }, { "epoch": 1.00813063215665, "grad_norm": 3.8803293704986572, "learning_rate": 8.06584981860497e-05, "loss": 0.5942, "step": 14879 }, { "epoch": 1.0081983874246223, "grad_norm": 6.2981390953063965, "learning_rate": 8.065712916695188e-05, "loss": 0.7267, "step": 14880 }, { "epoch": 1.0082661426925943, "grad_norm": 5.843595504760742, "learning_rate": 8.065576014785406e-05, "loss": 0.6444, "step": 14881 }, { "epoch": 1.0083338979605665, "grad_norm": 4.541314601898193, "learning_rate": 8.065439112875625e-05, "loss": 0.5712, "step": 14882 }, { "epoch": 1.0084016532285385, "grad_norm": 6.081702709197998, "learning_rate": 8.065302210965844e-05, "loss": 0.5991, "step": 14883 }, { "epoch": 1.0084694084965107, "grad_norm": 6.3184943199157715, "learning_rate": 8.065165309056062e-05, "loss": 0.6225, "step": 14884 }, { "epoch": 1.0085371637644827, "grad_norm": 4.152682304382324, "learning_rate": 8.06502840714628e-05, "loss": 0.6958, "step": 14885 }, { "epoch": 1.0086049190324549, "grad_norm": 5.545871257781982, "learning_rate": 8.064891505236498e-05, "loss": 0.6238, "step": 14886 }, { "epoch": 1.0086726743004268, "grad_norm": 5.534969329833984, "learning_rate": 8.064754603326716e-05, "loss": 0.6741, "step": 14887 }, { "epoch": 1.008740429568399, "grad_norm": 5.907772064208984, "learning_rate": 8.064617701416935e-05, "loss": 0.6827, "step": 14888 }, { "epoch": 1.008808184836371, "grad_norm": 6.224740028381348, "learning_rate": 8.064480799507153e-05, "loss": 0.7227, "step": 14889 }, { "epoch": 1.008875940104343, "grad_norm": 5.671234607696533, "learning_rate": 8.064343897597372e-05, "loss": 0.6511, "step": 14890 }, { "epoch": 1.0089436953723152, "grad_norm": 4.597881317138672, "learning_rate": 8.06420699568759e-05, "loss": 0.5303, "step": 14891 }, { "epoch": 1.0090114506402872, "grad_norm": 7.3619561195373535, "learning_rate": 8.064070093777809e-05, "loss": 0.6853, "step": 14892 }, { "epoch": 1.0090792059082594, "grad_norm": 5.377906322479248, "learning_rate": 8.063933191868027e-05, "loss": 0.6067, "step": 14893 }, { "epoch": 1.0091469611762314, "grad_norm": 4.887160778045654, "learning_rate": 8.063796289958245e-05, "loss": 0.7488, "step": 14894 }, { "epoch": 1.0092147164442036, "grad_norm": 6.19053316116333, "learning_rate": 8.063659388048463e-05, "loss": 0.6629, "step": 14895 }, { "epoch": 1.0092824717121756, "grad_norm": 5.102216720581055, "learning_rate": 8.063522486138681e-05, "loss": 0.765, "step": 14896 }, { "epoch": 1.0093502269801478, "grad_norm": 8.829859733581543, "learning_rate": 8.0633855842289e-05, "loss": 0.5257, "step": 14897 }, { "epoch": 1.0094179822481197, "grad_norm": 11.013011932373047, "learning_rate": 8.063248682319118e-05, "loss": 0.7178, "step": 14898 }, { "epoch": 1.009485737516092, "grad_norm": 6.39233922958374, "learning_rate": 8.063111780409337e-05, "loss": 0.7438, "step": 14899 }, { "epoch": 1.009553492784064, "grad_norm": 5.155503749847412, "learning_rate": 8.062974878499555e-05, "loss": 0.5149, "step": 14900 }, { "epoch": 1.0096212480520361, "grad_norm": 5.446846961975098, "learning_rate": 8.062837976589774e-05, "loss": 0.5956, "step": 14901 }, { "epoch": 1.009689003320008, "grad_norm": 4.781370639801025, "learning_rate": 8.062701074679992e-05, "loss": 0.7076, "step": 14902 }, { "epoch": 1.0097567585879803, "grad_norm": 5.34435510635376, "learning_rate": 8.06256417277021e-05, "loss": 0.6906, "step": 14903 }, { "epoch": 1.0098245138559523, "grad_norm": 9.561019897460938, "learning_rate": 8.062427270860428e-05, "loss": 0.6931, "step": 14904 }, { "epoch": 1.0098922691239245, "grad_norm": 5.806305408477783, "learning_rate": 8.062290368950647e-05, "loss": 0.9629, "step": 14905 }, { "epoch": 1.0099600243918965, "grad_norm": 6.2731032371521, "learning_rate": 8.062153467040865e-05, "loss": 0.8255, "step": 14906 }, { "epoch": 1.0100277796598685, "grad_norm": 5.084347248077393, "learning_rate": 8.062016565131084e-05, "loss": 0.756, "step": 14907 }, { "epoch": 1.0100955349278407, "grad_norm": 5.779279708862305, "learning_rate": 8.061879663221303e-05, "loss": 0.8397, "step": 14908 }, { "epoch": 1.0101632901958126, "grad_norm": 8.087071418762207, "learning_rate": 8.061742761311521e-05, "loss": 0.7148, "step": 14909 }, { "epoch": 1.0102310454637848, "grad_norm": 7.416251182556152, "learning_rate": 8.061605859401739e-05, "loss": 0.5286, "step": 14910 }, { "epoch": 1.0102988007317568, "grad_norm": 7.369309425354004, "learning_rate": 8.061468957491958e-05, "loss": 0.6396, "step": 14911 }, { "epoch": 1.010366555999729, "grad_norm": 5.095036029815674, "learning_rate": 8.061332055582176e-05, "loss": 0.5745, "step": 14912 }, { "epoch": 1.010434311267701, "grad_norm": 5.032590866088867, "learning_rate": 8.061195153672394e-05, "loss": 0.8218, "step": 14913 }, { "epoch": 1.0105020665356732, "grad_norm": 5.7722039222717285, "learning_rate": 8.061058251762612e-05, "loss": 0.5213, "step": 14914 }, { "epoch": 1.0105698218036452, "grad_norm": 5.260948181152344, "learning_rate": 8.060921349852832e-05, "loss": 0.755, "step": 14915 }, { "epoch": 1.0106375770716174, "grad_norm": 6.892032623291016, "learning_rate": 8.06078444794305e-05, "loss": 0.7952, "step": 14916 }, { "epoch": 1.0107053323395894, "grad_norm": 6.230368137359619, "learning_rate": 8.060647546033268e-05, "loss": 0.5725, "step": 14917 }, { "epoch": 1.0107730876075616, "grad_norm": 6.269092559814453, "learning_rate": 8.060510644123486e-05, "loss": 0.4972, "step": 14918 }, { "epoch": 1.0108408428755336, "grad_norm": 6.031118869781494, "learning_rate": 8.060373742213704e-05, "loss": 0.8361, "step": 14919 }, { "epoch": 1.0109085981435058, "grad_norm": 8.682488441467285, "learning_rate": 8.060236840303923e-05, "loss": 0.6005, "step": 14920 }, { "epoch": 1.0109763534114777, "grad_norm": 6.82516622543335, "learning_rate": 8.060099938394141e-05, "loss": 0.7075, "step": 14921 }, { "epoch": 1.01104410867945, "grad_norm": 6.251338958740234, "learning_rate": 8.05996303648436e-05, "loss": 0.7116, "step": 14922 }, { "epoch": 1.011111863947422, "grad_norm": 7.739006519317627, "learning_rate": 8.059826134574577e-05, "loss": 0.9093, "step": 14923 }, { "epoch": 1.011179619215394, "grad_norm": 6.702469825744629, "learning_rate": 8.059689232664797e-05, "loss": 0.5837, "step": 14924 }, { "epoch": 1.011247374483366, "grad_norm": 6.131649494171143, "learning_rate": 8.059552330755015e-05, "loss": 0.5026, "step": 14925 }, { "epoch": 1.011315129751338, "grad_norm": 4.714535713195801, "learning_rate": 8.059415428845233e-05, "loss": 0.5705, "step": 14926 }, { "epoch": 1.0113828850193103, "grad_norm": 5.460876941680908, "learning_rate": 8.059278526935451e-05, "loss": 0.9523, "step": 14927 }, { "epoch": 1.0114506402872823, "grad_norm": 5.706765174865723, "learning_rate": 8.059141625025669e-05, "loss": 0.7936, "step": 14928 }, { "epoch": 1.0115183955552545, "grad_norm": 6.572486877441406, "learning_rate": 8.059004723115888e-05, "loss": 0.8149, "step": 14929 }, { "epoch": 1.0115861508232264, "grad_norm": 9.406861305236816, "learning_rate": 8.058867821206106e-05, "loss": 0.5752, "step": 14930 }, { "epoch": 1.0116539060911987, "grad_norm": 5.10825252532959, "learning_rate": 8.058730919296324e-05, "loss": 0.4359, "step": 14931 }, { "epoch": 1.0117216613591706, "grad_norm": 6.5198516845703125, "learning_rate": 8.058594017386542e-05, "loss": 0.7252, "step": 14932 }, { "epoch": 1.0117894166271428, "grad_norm": 5.7710981369018555, "learning_rate": 8.05845711547676e-05, "loss": 0.6666, "step": 14933 }, { "epoch": 1.0118571718951148, "grad_norm": 5.927985191345215, "learning_rate": 8.05832021356698e-05, "loss": 0.7755, "step": 14934 }, { "epoch": 1.011924927163087, "grad_norm": 7.585999488830566, "learning_rate": 8.058183311657198e-05, "loss": 0.7536, "step": 14935 }, { "epoch": 1.011992682431059, "grad_norm": 4.760716915130615, "learning_rate": 8.058046409747416e-05, "loss": 0.6324, "step": 14936 }, { "epoch": 1.0120604376990312, "grad_norm": 5.466569423675537, "learning_rate": 8.057909507837634e-05, "loss": 0.7267, "step": 14937 }, { "epoch": 1.0121281929670032, "grad_norm": 4.7150797843933105, "learning_rate": 8.057772605927853e-05, "loss": 0.6393, "step": 14938 }, { "epoch": 1.0121959482349752, "grad_norm": 4.741540908813477, "learning_rate": 8.057635704018071e-05, "loss": 0.4859, "step": 14939 }, { "epoch": 1.0122637035029474, "grad_norm": 5.607858657836914, "learning_rate": 8.05749880210829e-05, "loss": 0.738, "step": 14940 }, { "epoch": 1.0123314587709193, "grad_norm": 6.989736080169678, "learning_rate": 8.057361900198508e-05, "loss": 0.6129, "step": 14941 }, { "epoch": 1.0123992140388915, "grad_norm": 9.055098533630371, "learning_rate": 8.057224998288726e-05, "loss": 0.7514, "step": 14942 }, { "epoch": 1.0124669693068635, "grad_norm": 5.395411014556885, "learning_rate": 8.057088096378945e-05, "loss": 0.817, "step": 14943 }, { "epoch": 1.0125347245748357, "grad_norm": 5.7741851806640625, "learning_rate": 8.056951194469163e-05, "loss": 0.5735, "step": 14944 }, { "epoch": 1.0126024798428077, "grad_norm": 6.427124500274658, "learning_rate": 8.056814292559381e-05, "loss": 0.613, "step": 14945 }, { "epoch": 1.01267023511078, "grad_norm": 4.106490135192871, "learning_rate": 8.056677390649599e-05, "loss": 0.6788, "step": 14946 }, { "epoch": 1.012737990378752, "grad_norm": 5.7934370040893555, "learning_rate": 8.056540488739818e-05, "loss": 0.7784, "step": 14947 }, { "epoch": 1.012805745646724, "grad_norm": 7.501285076141357, "learning_rate": 8.056403586830036e-05, "loss": 0.77, "step": 14948 }, { "epoch": 1.012873500914696, "grad_norm": 4.6254472732543945, "learning_rate": 8.056266684920254e-05, "loss": 0.5918, "step": 14949 }, { "epoch": 1.0129412561826683, "grad_norm": 5.470935821533203, "learning_rate": 8.056129783010473e-05, "loss": 0.6471, "step": 14950 }, { "epoch": 1.0130090114506403, "grad_norm": 4.878664970397949, "learning_rate": 8.055992881100692e-05, "loss": 0.5186, "step": 14951 }, { "epoch": 1.0130767667186125, "grad_norm": 5.364741802215576, "learning_rate": 8.05585597919091e-05, "loss": 0.7577, "step": 14952 }, { "epoch": 1.0131445219865844, "grad_norm": 5.699579238891602, "learning_rate": 8.055719077281128e-05, "loss": 0.6527, "step": 14953 }, { "epoch": 1.0132122772545566, "grad_norm": 5.406833648681641, "learning_rate": 8.055582175371347e-05, "loss": 0.7961, "step": 14954 }, { "epoch": 1.0132800325225286, "grad_norm": 7.075387954711914, "learning_rate": 8.055445273461565e-05, "loss": 0.5801, "step": 14955 }, { "epoch": 1.0133477877905006, "grad_norm": 7.106254577636719, "learning_rate": 8.055308371551783e-05, "loss": 0.7384, "step": 14956 }, { "epoch": 1.0134155430584728, "grad_norm": 5.849175453186035, "learning_rate": 8.055171469642003e-05, "loss": 0.5413, "step": 14957 }, { "epoch": 1.0134832983264448, "grad_norm": 10.159880638122559, "learning_rate": 8.055034567732221e-05, "loss": 0.6702, "step": 14958 }, { "epoch": 1.013551053594417, "grad_norm": 5.166466236114502, "learning_rate": 8.054897665822439e-05, "loss": 0.8039, "step": 14959 }, { "epoch": 1.013618808862389, "grad_norm": 5.084167957305908, "learning_rate": 8.054760763912657e-05, "loss": 0.6388, "step": 14960 }, { "epoch": 1.0136865641303612, "grad_norm": 7.328620433807373, "learning_rate": 8.054623862002876e-05, "loss": 0.7152, "step": 14961 }, { "epoch": 1.0137543193983332, "grad_norm": 6.941154479980469, "learning_rate": 8.054486960093094e-05, "loss": 0.744, "step": 14962 }, { "epoch": 1.0138220746663054, "grad_norm": 5.957236289978027, "learning_rate": 8.054350058183312e-05, "loss": 0.5999, "step": 14963 }, { "epoch": 1.0138898299342773, "grad_norm": 5.381267547607422, "learning_rate": 8.05421315627353e-05, "loss": 0.8635, "step": 14964 }, { "epoch": 1.0139575852022495, "grad_norm": 5.371838569641113, "learning_rate": 8.054076254363748e-05, "loss": 0.6552, "step": 14965 }, { "epoch": 1.0140253404702215, "grad_norm": 5.056934356689453, "learning_rate": 8.053939352453968e-05, "loss": 0.5843, "step": 14966 }, { "epoch": 1.0140930957381937, "grad_norm": 5.23961877822876, "learning_rate": 8.053802450544186e-05, "loss": 0.8306, "step": 14967 }, { "epoch": 1.0141608510061657, "grad_norm": 6.583441734313965, "learning_rate": 8.053665548634404e-05, "loss": 0.686, "step": 14968 }, { "epoch": 1.014228606274138, "grad_norm": 7.94297981262207, "learning_rate": 8.053528646724622e-05, "loss": 0.7668, "step": 14969 }, { "epoch": 1.01429636154211, "grad_norm": 5.7727885246276855, "learning_rate": 8.053391744814841e-05, "loss": 0.7958, "step": 14970 }, { "epoch": 1.014364116810082, "grad_norm": 4.770255088806152, "learning_rate": 8.05325484290506e-05, "loss": 0.8039, "step": 14971 }, { "epoch": 1.014431872078054, "grad_norm": 6.65435266494751, "learning_rate": 8.053117940995277e-05, "loss": 0.7278, "step": 14972 }, { "epoch": 1.014499627346026, "grad_norm": 5.573418140411377, "learning_rate": 8.052981039085495e-05, "loss": 0.8147, "step": 14973 }, { "epoch": 1.0145673826139983, "grad_norm": 6.491858005523682, "learning_rate": 8.052844137175713e-05, "loss": 0.6853, "step": 14974 }, { "epoch": 1.0146351378819702, "grad_norm": 5.823346138000488, "learning_rate": 8.052707235265933e-05, "loss": 0.6468, "step": 14975 }, { "epoch": 1.0147028931499424, "grad_norm": 7.527129650115967, "learning_rate": 8.052570333356151e-05, "loss": 0.7114, "step": 14976 }, { "epoch": 1.0147706484179144, "grad_norm": 5.738404273986816, "learning_rate": 8.052433431446369e-05, "loss": 0.584, "step": 14977 }, { "epoch": 1.0148384036858866, "grad_norm": 5.364207744598389, "learning_rate": 8.052296529536587e-05, "loss": 0.7407, "step": 14978 }, { "epoch": 1.0149061589538586, "grad_norm": 5.664304256439209, "learning_rate": 8.052159627626806e-05, "loss": 0.5753, "step": 14979 }, { "epoch": 1.0149739142218308, "grad_norm": 8.1001558303833, "learning_rate": 8.052022725717024e-05, "loss": 0.6435, "step": 14980 }, { "epoch": 1.0150416694898028, "grad_norm": 6.447721481323242, "learning_rate": 8.051885823807242e-05, "loss": 0.7063, "step": 14981 }, { "epoch": 1.015109424757775, "grad_norm": 5.5510969161987305, "learning_rate": 8.05174892189746e-05, "loss": 0.5911, "step": 14982 }, { "epoch": 1.015177180025747, "grad_norm": 5.534966468811035, "learning_rate": 8.051612019987678e-05, "loss": 0.5952, "step": 14983 }, { "epoch": 1.0152449352937192, "grad_norm": 5.641912460327148, "learning_rate": 8.051475118077898e-05, "loss": 0.8201, "step": 14984 }, { "epoch": 1.0153126905616912, "grad_norm": 5.643457412719727, "learning_rate": 8.051338216168116e-05, "loss": 0.5825, "step": 14985 }, { "epoch": 1.0153804458296634, "grad_norm": 5.161952972412109, "learning_rate": 8.051201314258334e-05, "loss": 0.5534, "step": 14986 }, { "epoch": 1.0154482010976353, "grad_norm": 6.023798942565918, "learning_rate": 8.051064412348552e-05, "loss": 0.7081, "step": 14987 }, { "epoch": 1.0155159563656073, "grad_norm": 8.365532875061035, "learning_rate": 8.05092751043877e-05, "loss": 0.6779, "step": 14988 }, { "epoch": 1.0155837116335795, "grad_norm": 6.577813625335693, "learning_rate": 8.05079060852899e-05, "loss": 0.6511, "step": 14989 }, { "epoch": 1.0156514669015515, "grad_norm": 6.61271858215332, "learning_rate": 8.050653706619207e-05, "loss": 0.6625, "step": 14990 }, { "epoch": 1.0157192221695237, "grad_norm": 5.8450517654418945, "learning_rate": 8.050516804709425e-05, "loss": 0.6154, "step": 14991 }, { "epoch": 1.0157869774374957, "grad_norm": 6.10211181640625, "learning_rate": 8.050379902799644e-05, "loss": 0.7207, "step": 14992 }, { "epoch": 1.0158547327054679, "grad_norm": 6.336976528167725, "learning_rate": 8.050243000889863e-05, "loss": 0.7093, "step": 14993 }, { "epoch": 1.0159224879734399, "grad_norm": 5.03262996673584, "learning_rate": 8.050106098980081e-05, "loss": 0.5328, "step": 14994 }, { "epoch": 1.015990243241412, "grad_norm": 6.129831314086914, "learning_rate": 8.049969197070299e-05, "loss": 1.0295, "step": 14995 }, { "epoch": 1.016057998509384, "grad_norm": 5.919708728790283, "learning_rate": 8.049832295160517e-05, "loss": 0.8243, "step": 14996 }, { "epoch": 1.0161257537773563, "grad_norm": 11.239713668823242, "learning_rate": 8.049695393250736e-05, "loss": 0.6003, "step": 14997 }, { "epoch": 1.0161935090453282, "grad_norm": 4.68793249130249, "learning_rate": 8.049558491340954e-05, "loss": 0.64, "step": 14998 }, { "epoch": 1.0162612643133004, "grad_norm": 6.901069164276123, "learning_rate": 8.049421589431172e-05, "loss": 0.7363, "step": 14999 }, { "epoch": 1.0163290195812724, "grad_norm": 5.286013603210449, "learning_rate": 8.049284687521392e-05, "loss": 0.5766, "step": 15000 }, { "epoch": 1.0163967748492446, "grad_norm": 9.495491027832031, "learning_rate": 8.04914778561161e-05, "loss": 0.5364, "step": 15001 }, { "epoch": 1.0164645301172166, "grad_norm": 5.1024932861328125, "learning_rate": 8.049010883701828e-05, "loss": 0.7218, "step": 15002 }, { "epoch": 1.0165322853851888, "grad_norm": 6.650432586669922, "learning_rate": 8.048873981792047e-05, "loss": 0.8232, "step": 15003 }, { "epoch": 1.0166000406531608, "grad_norm": 6.3598313331604, "learning_rate": 8.048737079882265e-05, "loss": 0.5487, "step": 15004 }, { "epoch": 1.0166677959211328, "grad_norm": 7.906015872955322, "learning_rate": 8.048600177972483e-05, "loss": 0.7754, "step": 15005 }, { "epoch": 1.016735551189105, "grad_norm": 8.92300033569336, "learning_rate": 8.048463276062701e-05, "loss": 0.8194, "step": 15006 }, { "epoch": 1.016803306457077, "grad_norm": 5.348727703094482, "learning_rate": 8.048326374152921e-05, "loss": 0.7689, "step": 15007 }, { "epoch": 1.0168710617250492, "grad_norm": 4.177212715148926, "learning_rate": 8.048189472243139e-05, "loss": 0.5612, "step": 15008 }, { "epoch": 1.0169388169930211, "grad_norm": 5.465108871459961, "learning_rate": 8.048052570333357e-05, "loss": 0.7477, "step": 15009 }, { "epoch": 1.0170065722609933, "grad_norm": 6.203986644744873, "learning_rate": 8.047915668423575e-05, "loss": 0.6927, "step": 15010 }, { "epoch": 1.0170743275289653, "grad_norm": 5.149818420410156, "learning_rate": 8.047778766513793e-05, "loss": 0.6977, "step": 15011 }, { "epoch": 1.0171420827969375, "grad_norm": 8.325011253356934, "learning_rate": 8.047641864604012e-05, "loss": 0.6829, "step": 15012 }, { "epoch": 1.0172098380649095, "grad_norm": 5.652045726776123, "learning_rate": 8.04750496269423e-05, "loss": 0.7819, "step": 15013 }, { "epoch": 1.0172775933328817, "grad_norm": 5.549155235290527, "learning_rate": 8.047368060784448e-05, "loss": 0.6268, "step": 15014 }, { "epoch": 1.0173453486008537, "grad_norm": 6.955049991607666, "learning_rate": 8.047231158874666e-05, "loss": 0.8359, "step": 15015 }, { "epoch": 1.0174131038688259, "grad_norm": 5.063580513000488, "learning_rate": 8.047094256964886e-05, "loss": 0.6845, "step": 15016 }, { "epoch": 1.0174808591367979, "grad_norm": 4.619150638580322, "learning_rate": 8.046957355055104e-05, "loss": 0.5725, "step": 15017 }, { "epoch": 1.01754861440477, "grad_norm": 5.2023820877075195, "learning_rate": 8.046820453145322e-05, "loss": 0.8112, "step": 15018 }, { "epoch": 1.017616369672742, "grad_norm": 6.25310754776001, "learning_rate": 8.04668355123554e-05, "loss": 0.772, "step": 15019 }, { "epoch": 1.0176841249407143, "grad_norm": 6.057315349578857, "learning_rate": 8.046546649325758e-05, "loss": 0.726, "step": 15020 }, { "epoch": 1.0177518802086862, "grad_norm": 6.200272560119629, "learning_rate": 8.046409747415977e-05, "loss": 0.8038, "step": 15021 }, { "epoch": 1.0178196354766582, "grad_norm": 6.192685604095459, "learning_rate": 8.046272845506195e-05, "loss": 0.8221, "step": 15022 }, { "epoch": 1.0178873907446304, "grad_norm": 6.599914073944092, "learning_rate": 8.046135943596413e-05, "loss": 0.8947, "step": 15023 }, { "epoch": 1.0179551460126024, "grad_norm": 5.081368923187256, "learning_rate": 8.045999041686631e-05, "loss": 0.5658, "step": 15024 }, { "epoch": 1.0180229012805746, "grad_norm": 4.629168510437012, "learning_rate": 8.045862139776851e-05, "loss": 0.4918, "step": 15025 }, { "epoch": 1.0180906565485466, "grad_norm": 5.531469345092773, "learning_rate": 8.045725237867069e-05, "loss": 0.9338, "step": 15026 }, { "epoch": 1.0181584118165188, "grad_norm": 5.594359874725342, "learning_rate": 8.045588335957287e-05, "loss": 0.8965, "step": 15027 }, { "epoch": 1.0182261670844908, "grad_norm": 5.746640205383301, "learning_rate": 8.045451434047505e-05, "loss": 0.8001, "step": 15028 }, { "epoch": 1.018293922352463, "grad_norm": 6.410648822784424, "learning_rate": 8.045314532137723e-05, "loss": 0.6035, "step": 15029 }, { "epoch": 1.018361677620435, "grad_norm": 5.184842109680176, "learning_rate": 8.045177630227942e-05, "loss": 0.6867, "step": 15030 }, { "epoch": 1.0184294328884071, "grad_norm": 4.650674343109131, "learning_rate": 8.04504072831816e-05, "loss": 0.5872, "step": 15031 }, { "epoch": 1.0184971881563791, "grad_norm": 5.6024346351623535, "learning_rate": 8.044903826408378e-05, "loss": 0.7804, "step": 15032 }, { "epoch": 1.0185649434243513, "grad_norm": 4.8978047370910645, "learning_rate": 8.044766924498596e-05, "loss": 0.6067, "step": 15033 }, { "epoch": 1.0186326986923233, "grad_norm": 5.678053379058838, "learning_rate": 8.044630022588816e-05, "loss": 0.7362, "step": 15034 }, { "epoch": 1.0187004539602955, "grad_norm": 5.044686317443848, "learning_rate": 8.044493120679034e-05, "loss": 0.9308, "step": 15035 }, { "epoch": 1.0187682092282675, "grad_norm": 6.241668701171875, "learning_rate": 8.044356218769252e-05, "loss": 0.7732, "step": 15036 }, { "epoch": 1.0188359644962395, "grad_norm": 5.17287540435791, "learning_rate": 8.04421931685947e-05, "loss": 0.6823, "step": 15037 }, { "epoch": 1.0189037197642117, "grad_norm": 5.231973648071289, "learning_rate": 8.044082414949688e-05, "loss": 0.5864, "step": 15038 }, { "epoch": 1.0189714750321837, "grad_norm": 5.848363876342773, "learning_rate": 8.043945513039907e-05, "loss": 0.6496, "step": 15039 }, { "epoch": 1.0190392303001559, "grad_norm": 5.491754531860352, "learning_rate": 8.043808611130125e-05, "loss": 0.6945, "step": 15040 }, { "epoch": 1.0191069855681278, "grad_norm": 5.215052604675293, "learning_rate": 8.043671709220343e-05, "loss": 0.7234, "step": 15041 }, { "epoch": 1.0191747408361, "grad_norm": 4.157303810119629, "learning_rate": 8.043534807310561e-05, "loss": 0.5688, "step": 15042 }, { "epoch": 1.019242496104072, "grad_norm": 5.955345630645752, "learning_rate": 8.043397905400781e-05, "loss": 0.7193, "step": 15043 }, { "epoch": 1.0193102513720442, "grad_norm": 5.868755340576172, "learning_rate": 8.043261003490999e-05, "loss": 0.7041, "step": 15044 }, { "epoch": 1.0193780066400162, "grad_norm": 5.522411823272705, "learning_rate": 8.043124101581217e-05, "loss": 0.6228, "step": 15045 }, { "epoch": 1.0194457619079884, "grad_norm": 5.676867961883545, "learning_rate": 8.042987199671436e-05, "loss": 0.6802, "step": 15046 }, { "epoch": 1.0195135171759604, "grad_norm": 5.678533554077148, "learning_rate": 8.042850297761654e-05, "loss": 0.8044, "step": 15047 }, { "epoch": 1.0195812724439326, "grad_norm": 6.804957866668701, "learning_rate": 8.042713395851872e-05, "loss": 0.8231, "step": 15048 }, { "epoch": 1.0196490277119046, "grad_norm": 4.685696601867676, "learning_rate": 8.042576493942092e-05, "loss": 0.5464, "step": 15049 }, { "epoch": 1.0197167829798768, "grad_norm": 5.141258239746094, "learning_rate": 8.04243959203231e-05, "loss": 0.5144, "step": 15050 }, { "epoch": 1.0197845382478488, "grad_norm": 5.105460166931152, "learning_rate": 8.042302690122528e-05, "loss": 0.6002, "step": 15051 }, { "epoch": 1.019852293515821, "grad_norm": 6.820575714111328, "learning_rate": 8.042165788212746e-05, "loss": 0.5949, "step": 15052 }, { "epoch": 1.019920048783793, "grad_norm": 6.264902114868164, "learning_rate": 8.042028886302965e-05, "loss": 0.8404, "step": 15053 }, { "epoch": 1.019987804051765, "grad_norm": 6.1790385246276855, "learning_rate": 8.041891984393183e-05, "loss": 0.5916, "step": 15054 }, { "epoch": 1.0200555593197371, "grad_norm": 5.779554843902588, "learning_rate": 8.041755082483401e-05, "loss": 0.6559, "step": 15055 }, { "epoch": 1.020123314587709, "grad_norm": 5.888326168060303, "learning_rate": 8.04161818057362e-05, "loss": 0.5994, "step": 15056 }, { "epoch": 1.0201910698556813, "grad_norm": 5.410882949829102, "learning_rate": 8.041481278663839e-05, "loss": 0.7267, "step": 15057 }, { "epoch": 1.0202588251236533, "grad_norm": 5.042235374450684, "learning_rate": 8.041344376754057e-05, "loss": 0.543, "step": 15058 }, { "epoch": 1.0203265803916255, "grad_norm": 5.355964183807373, "learning_rate": 8.041207474844275e-05, "loss": 0.7188, "step": 15059 }, { "epoch": 1.0203943356595975, "grad_norm": 4.4993510246276855, "learning_rate": 8.041070572934493e-05, "loss": 0.5713, "step": 15060 }, { "epoch": 1.0204620909275697, "grad_norm": 6.155606746673584, "learning_rate": 8.040933671024711e-05, "loss": 0.7806, "step": 15061 }, { "epoch": 1.0205298461955417, "grad_norm": 6.213912010192871, "learning_rate": 8.04079676911493e-05, "loss": 0.662, "step": 15062 }, { "epoch": 1.0205976014635139, "grad_norm": 4.450127124786377, "learning_rate": 8.040659867205148e-05, "loss": 0.7145, "step": 15063 }, { "epoch": 1.0206653567314858, "grad_norm": 5.589287757873535, "learning_rate": 8.040522965295366e-05, "loss": 0.7071, "step": 15064 }, { "epoch": 1.020733111999458, "grad_norm": 6.013401985168457, "learning_rate": 8.040386063385584e-05, "loss": 0.5325, "step": 15065 }, { "epoch": 1.02080086726743, "grad_norm": 6.579004287719727, "learning_rate": 8.040249161475802e-05, "loss": 0.6771, "step": 15066 }, { "epoch": 1.0208686225354022, "grad_norm": 6.377874374389648, "learning_rate": 8.040112259566022e-05, "loss": 0.9557, "step": 15067 }, { "epoch": 1.0209363778033742, "grad_norm": 6.904638767242432, "learning_rate": 8.03997535765624e-05, "loss": 0.8783, "step": 15068 }, { "epoch": 1.0210041330713464, "grad_norm": 6.453176975250244, "learning_rate": 8.039838455746458e-05, "loss": 0.675, "step": 15069 }, { "epoch": 1.0210718883393184, "grad_norm": 4.14583158493042, "learning_rate": 8.039701553836676e-05, "loss": 0.6133, "step": 15070 }, { "epoch": 1.0211396436072904, "grad_norm": 9.864669799804688, "learning_rate": 8.039564651926895e-05, "loss": 0.515, "step": 15071 }, { "epoch": 1.0212073988752626, "grad_norm": 13.798471450805664, "learning_rate": 8.039427750017113e-05, "loss": 0.7913, "step": 15072 }, { "epoch": 1.0212751541432346, "grad_norm": 6.997447967529297, "learning_rate": 8.039290848107331e-05, "loss": 0.7798, "step": 15073 }, { "epoch": 1.0213429094112068, "grad_norm": 7.579859256744385, "learning_rate": 8.03915394619755e-05, "loss": 0.4848, "step": 15074 }, { "epoch": 1.0214106646791787, "grad_norm": 6.31156063079834, "learning_rate": 8.039017044287767e-05, "loss": 0.8056, "step": 15075 }, { "epoch": 1.021478419947151, "grad_norm": 10.114006042480469, "learning_rate": 8.038880142377987e-05, "loss": 0.7082, "step": 15076 }, { "epoch": 1.021546175215123, "grad_norm": 4.31792688369751, "learning_rate": 8.038743240468205e-05, "loss": 0.4963, "step": 15077 }, { "epoch": 1.0216139304830951, "grad_norm": 8.745311737060547, "learning_rate": 8.038606338558423e-05, "loss": 0.6986, "step": 15078 }, { "epoch": 1.021681685751067, "grad_norm": 6.955810070037842, "learning_rate": 8.038469436648641e-05, "loss": 0.6558, "step": 15079 }, { "epoch": 1.0217494410190393, "grad_norm": 5.911043167114258, "learning_rate": 8.03833253473886e-05, "loss": 0.8858, "step": 15080 }, { "epoch": 1.0218171962870113, "grad_norm": 5.10263204574585, "learning_rate": 8.038195632829078e-05, "loss": 0.5157, "step": 15081 }, { "epoch": 1.0218849515549835, "grad_norm": 5.808590888977051, "learning_rate": 8.038058730919296e-05, "loss": 0.7309, "step": 15082 }, { "epoch": 1.0219527068229555, "grad_norm": 5.918943405151367, "learning_rate": 8.037921829009514e-05, "loss": 0.6521, "step": 15083 }, { "epoch": 1.0220204620909277, "grad_norm": 10.792163848876953, "learning_rate": 8.037784927099732e-05, "loss": 0.8682, "step": 15084 }, { "epoch": 1.0220882173588997, "grad_norm": 6.594424724578857, "learning_rate": 8.037648025189952e-05, "loss": 0.567, "step": 15085 }, { "epoch": 1.0221559726268716, "grad_norm": 7.634060382843018, "learning_rate": 8.03751112328017e-05, "loss": 0.6299, "step": 15086 }, { "epoch": 1.0222237278948438, "grad_norm": 13.075284004211426, "learning_rate": 8.037374221370388e-05, "loss": 0.6774, "step": 15087 }, { "epoch": 1.0222914831628158, "grad_norm": 5.656576156616211, "learning_rate": 8.037237319460606e-05, "loss": 0.7564, "step": 15088 }, { "epoch": 1.022359238430788, "grad_norm": 4.356131553649902, "learning_rate": 8.037100417550825e-05, "loss": 0.6635, "step": 15089 }, { "epoch": 1.02242699369876, "grad_norm": 8.44076919555664, "learning_rate": 8.036963515641043e-05, "loss": 0.8179, "step": 15090 }, { "epoch": 1.0224947489667322, "grad_norm": 6.754337310791016, "learning_rate": 8.036826613731261e-05, "loss": 0.6055, "step": 15091 }, { "epoch": 1.0225625042347042, "grad_norm": 4.849721908569336, "learning_rate": 8.036689711821481e-05, "loss": 0.5342, "step": 15092 }, { "epoch": 1.0226302595026764, "grad_norm": 6.975347518920898, "learning_rate": 8.036552809911699e-05, "loss": 0.7692, "step": 15093 }, { "epoch": 1.0226980147706484, "grad_norm": 5.742364406585693, "learning_rate": 8.036415908001917e-05, "loss": 0.605, "step": 15094 }, { "epoch": 1.0227657700386206, "grad_norm": 7.3798909187316895, "learning_rate": 8.036279006092136e-05, "loss": 0.7643, "step": 15095 }, { "epoch": 1.0228335253065926, "grad_norm": 6.8255510330200195, "learning_rate": 8.036142104182354e-05, "loss": 0.6628, "step": 15096 }, { "epoch": 1.0229012805745648, "grad_norm": 6.594735145568848, "learning_rate": 8.036005202272572e-05, "loss": 0.7144, "step": 15097 }, { "epoch": 1.0229690358425367, "grad_norm": 5.225235462188721, "learning_rate": 8.03586830036279e-05, "loss": 0.8352, "step": 15098 }, { "epoch": 1.023036791110509, "grad_norm": 6.080996036529541, "learning_rate": 8.03573139845301e-05, "loss": 0.8509, "step": 15099 }, { "epoch": 1.023104546378481, "grad_norm": 9.194795608520508, "learning_rate": 8.035594496543228e-05, "loss": 0.7135, "step": 15100 }, { "epoch": 1.0231723016464531, "grad_norm": 5.869052410125732, "learning_rate": 8.035457594633446e-05, "loss": 0.5557, "step": 15101 }, { "epoch": 1.023240056914425, "grad_norm": 5.882430076599121, "learning_rate": 8.035320692723664e-05, "loss": 0.6686, "step": 15102 }, { "epoch": 1.023307812182397, "grad_norm": 5.315283298492432, "learning_rate": 8.035183790813883e-05, "loss": 0.5841, "step": 15103 }, { "epoch": 1.0233755674503693, "grad_norm": 4.697854042053223, "learning_rate": 8.035046888904101e-05, "loss": 0.6619, "step": 15104 }, { "epoch": 1.0234433227183413, "grad_norm": 5.584891319274902, "learning_rate": 8.034909986994319e-05, "loss": 0.7641, "step": 15105 }, { "epoch": 1.0235110779863135, "grad_norm": 6.978733539581299, "learning_rate": 8.034773085084537e-05, "loss": 0.7094, "step": 15106 }, { "epoch": 1.0235788332542854, "grad_norm": 8.549322128295898, "learning_rate": 8.034636183174755e-05, "loss": 0.6725, "step": 15107 }, { "epoch": 1.0236465885222576, "grad_norm": 5.663527488708496, "learning_rate": 8.034499281264975e-05, "loss": 0.609, "step": 15108 }, { "epoch": 1.0237143437902296, "grad_norm": 5.843603610992432, "learning_rate": 8.034362379355193e-05, "loss": 0.7198, "step": 15109 }, { "epoch": 1.0237820990582018, "grad_norm": 6.912360668182373, "learning_rate": 8.034225477445411e-05, "loss": 0.728, "step": 15110 }, { "epoch": 1.0238498543261738, "grad_norm": 4.6928205490112305, "learning_rate": 8.034088575535629e-05, "loss": 0.8542, "step": 15111 }, { "epoch": 1.023917609594146, "grad_norm": 5.714449405670166, "learning_rate": 8.033951673625848e-05, "loss": 0.8885, "step": 15112 }, { "epoch": 1.023985364862118, "grad_norm": 9.74325180053711, "learning_rate": 8.033814771716066e-05, "loss": 0.7175, "step": 15113 }, { "epoch": 1.0240531201300902, "grad_norm": 6.803297519683838, "learning_rate": 8.033677869806284e-05, "loss": 0.8037, "step": 15114 }, { "epoch": 1.0241208753980622, "grad_norm": 5.692788600921631, "learning_rate": 8.033540967896502e-05, "loss": 0.6073, "step": 15115 }, { "epoch": 1.0241886306660344, "grad_norm": 4.766846179962158, "learning_rate": 8.03340406598672e-05, "loss": 0.687, "step": 15116 }, { "epoch": 1.0242563859340064, "grad_norm": 7.894632816314697, "learning_rate": 8.03326716407694e-05, "loss": 0.6866, "step": 15117 }, { "epoch": 1.0243241412019786, "grad_norm": 8.924421310424805, "learning_rate": 8.033130262167158e-05, "loss": 0.6033, "step": 15118 }, { "epoch": 1.0243918964699505, "grad_norm": 4.983919620513916, "learning_rate": 8.032993360257376e-05, "loss": 0.7635, "step": 15119 }, { "epoch": 1.0244596517379225, "grad_norm": 6.151333808898926, "learning_rate": 8.032856458347594e-05, "loss": 0.5241, "step": 15120 }, { "epoch": 1.0245274070058947, "grad_norm": 5.959475517272949, "learning_rate": 8.032719556437812e-05, "loss": 0.801, "step": 15121 }, { "epoch": 1.0245951622738667, "grad_norm": 6.7095255851745605, "learning_rate": 8.032582654528031e-05, "loss": 0.5638, "step": 15122 }, { "epoch": 1.024662917541839, "grad_norm": 6.7581939697265625, "learning_rate": 8.03244575261825e-05, "loss": 0.7744, "step": 15123 }, { "epoch": 1.024730672809811, "grad_norm": 4.929287433624268, "learning_rate": 8.032308850708467e-05, "loss": 0.5911, "step": 15124 }, { "epoch": 1.024798428077783, "grad_norm": 7.830074310302734, "learning_rate": 8.032171948798685e-05, "loss": 0.5202, "step": 15125 }, { "epoch": 1.024866183345755, "grad_norm": 5.351446628570557, "learning_rate": 8.032035046888905e-05, "loss": 0.5243, "step": 15126 }, { "epoch": 1.0249339386137273, "grad_norm": 6.004148960113525, "learning_rate": 8.031898144979123e-05, "loss": 0.7672, "step": 15127 }, { "epoch": 1.0250016938816993, "grad_norm": 5.840296745300293, "learning_rate": 8.031761243069341e-05, "loss": 0.6149, "step": 15128 }, { "epoch": 1.0250694491496715, "grad_norm": 5.449799537658691, "learning_rate": 8.031624341159559e-05, "loss": 0.9104, "step": 15129 }, { "epoch": 1.0251372044176434, "grad_norm": 4.739322662353516, "learning_rate": 8.031487439249777e-05, "loss": 0.586, "step": 15130 }, { "epoch": 1.0252049596856156, "grad_norm": 6.538787841796875, "learning_rate": 8.031350537339996e-05, "loss": 0.8415, "step": 15131 }, { "epoch": 1.0252727149535876, "grad_norm": 6.406269550323486, "learning_rate": 8.031213635430214e-05, "loss": 0.8221, "step": 15132 }, { "epoch": 1.0253404702215598, "grad_norm": 5.359891891479492, "learning_rate": 8.031076733520432e-05, "loss": 0.7246, "step": 15133 }, { "epoch": 1.0254082254895318, "grad_norm": 6.871723175048828, "learning_rate": 8.03093983161065e-05, "loss": 0.8127, "step": 15134 }, { "epoch": 1.0254759807575038, "grad_norm": 7.51058292388916, "learning_rate": 8.03080292970087e-05, "loss": 0.8611, "step": 15135 }, { "epoch": 1.025543736025476, "grad_norm": 6.180769920349121, "learning_rate": 8.030666027791088e-05, "loss": 0.6036, "step": 15136 }, { "epoch": 1.025611491293448, "grad_norm": 4.803264141082764, "learning_rate": 8.030529125881306e-05, "loss": 0.6907, "step": 15137 }, { "epoch": 1.0256792465614202, "grad_norm": 4.692986965179443, "learning_rate": 8.030392223971524e-05, "loss": 0.5427, "step": 15138 }, { "epoch": 1.0257470018293922, "grad_norm": 4.468636512756348, "learning_rate": 8.030255322061743e-05, "loss": 0.5688, "step": 15139 }, { "epoch": 1.0258147570973644, "grad_norm": 5.465978622436523, "learning_rate": 8.030118420151961e-05, "loss": 0.6728, "step": 15140 }, { "epoch": 1.0258825123653363, "grad_norm": 7.363076686859131, "learning_rate": 8.02998151824218e-05, "loss": 0.608, "step": 15141 }, { "epoch": 1.0259502676333085, "grad_norm": 6.24710750579834, "learning_rate": 8.029844616332399e-05, "loss": 0.7095, "step": 15142 }, { "epoch": 1.0260180229012805, "grad_norm": 5.708677291870117, "learning_rate": 8.029707714422617e-05, "loss": 0.7028, "step": 15143 }, { "epoch": 1.0260857781692527, "grad_norm": 6.651005744934082, "learning_rate": 8.029570812512835e-05, "loss": 0.8753, "step": 15144 }, { "epoch": 1.0261535334372247, "grad_norm": 7.2024688720703125, "learning_rate": 8.029433910603054e-05, "loss": 0.6307, "step": 15145 }, { "epoch": 1.026221288705197, "grad_norm": 8.108603477478027, "learning_rate": 8.029297008693272e-05, "loss": 0.7454, "step": 15146 }, { "epoch": 1.026289043973169, "grad_norm": 8.093022346496582, "learning_rate": 8.02916010678349e-05, "loss": 0.6492, "step": 15147 }, { "epoch": 1.026356799241141, "grad_norm": 6.092846393585205, "learning_rate": 8.029023204873708e-05, "loss": 0.8324, "step": 15148 }, { "epoch": 1.026424554509113, "grad_norm": 7.56104040145874, "learning_rate": 8.028886302963928e-05, "loss": 0.7282, "step": 15149 }, { "epoch": 1.0264923097770853, "grad_norm": 5.667774677276611, "learning_rate": 8.028749401054146e-05, "loss": 0.9169, "step": 15150 }, { "epoch": 1.0265600650450573, "grad_norm": 6.644709587097168, "learning_rate": 8.028612499144364e-05, "loss": 0.6861, "step": 15151 }, { "epoch": 1.0266278203130292, "grad_norm": 5.4562578201293945, "learning_rate": 8.028475597234582e-05, "loss": 0.8854, "step": 15152 }, { "epoch": 1.0266955755810014, "grad_norm": 4.384959697723389, "learning_rate": 8.0283386953248e-05, "loss": 0.6736, "step": 15153 }, { "epoch": 1.0267633308489734, "grad_norm": 5.349459648132324, "learning_rate": 8.028201793415019e-05, "loss": 0.704, "step": 15154 }, { "epoch": 1.0268310861169456, "grad_norm": 4.929141521453857, "learning_rate": 8.028064891505237e-05, "loss": 0.6705, "step": 15155 }, { "epoch": 1.0268988413849176, "grad_norm": 5.738053798675537, "learning_rate": 8.027927989595455e-05, "loss": 0.7195, "step": 15156 }, { "epoch": 1.0269665966528898, "grad_norm": 5.528050899505615, "learning_rate": 8.027791087685673e-05, "loss": 0.707, "step": 15157 }, { "epoch": 1.0270343519208618, "grad_norm": 5.940127372741699, "learning_rate": 8.027654185775893e-05, "loss": 0.7448, "step": 15158 }, { "epoch": 1.027102107188834, "grad_norm": 5.716156482696533, "learning_rate": 8.027517283866111e-05, "loss": 0.5385, "step": 15159 }, { "epoch": 1.027169862456806, "grad_norm": 6.133981227874756, "learning_rate": 8.027380381956329e-05, "loss": 0.459, "step": 15160 }, { "epoch": 1.0272376177247782, "grad_norm": 4.139076232910156, "learning_rate": 8.027243480046547e-05, "loss": 0.5117, "step": 15161 }, { "epoch": 1.0273053729927502, "grad_norm": 5.8663482666015625, "learning_rate": 8.027106578136765e-05, "loss": 0.6677, "step": 15162 }, { "epoch": 1.0273731282607224, "grad_norm": 6.346996784210205, "learning_rate": 8.026969676226984e-05, "loss": 0.794, "step": 15163 }, { "epoch": 1.0274408835286943, "grad_norm": 6.667228698730469, "learning_rate": 8.026832774317202e-05, "loss": 0.6352, "step": 15164 }, { "epoch": 1.0275086387966665, "grad_norm": 4.740386962890625, "learning_rate": 8.02669587240742e-05, "loss": 0.5717, "step": 15165 }, { "epoch": 1.0275763940646385, "grad_norm": 7.014326095581055, "learning_rate": 8.026558970497638e-05, "loss": 0.831, "step": 15166 }, { "epoch": 1.0276441493326105, "grad_norm": 5.337719440460205, "learning_rate": 8.026422068587858e-05, "loss": 0.7297, "step": 15167 }, { "epoch": 1.0277119046005827, "grad_norm": 5.5287957191467285, "learning_rate": 8.026285166678076e-05, "loss": 0.8606, "step": 15168 }, { "epoch": 1.0277796598685547, "grad_norm": 3.9618964195251465, "learning_rate": 8.026148264768294e-05, "loss": 0.4601, "step": 15169 }, { "epoch": 1.0278474151365269, "grad_norm": 6.047200679779053, "learning_rate": 8.026011362858512e-05, "loss": 0.7246, "step": 15170 }, { "epoch": 1.0279151704044989, "grad_norm": 6.743053913116455, "learning_rate": 8.02587446094873e-05, "loss": 0.7362, "step": 15171 }, { "epoch": 1.027982925672471, "grad_norm": 6.415504455566406, "learning_rate": 8.025737559038949e-05, "loss": 0.8653, "step": 15172 }, { "epoch": 1.028050680940443, "grad_norm": 5.755765438079834, "learning_rate": 8.025600657129167e-05, "loss": 0.7041, "step": 15173 }, { "epoch": 1.0281184362084153, "grad_norm": 6.556794166564941, "learning_rate": 8.025463755219385e-05, "loss": 0.8511, "step": 15174 }, { "epoch": 1.0281861914763872, "grad_norm": 5.8607025146484375, "learning_rate": 8.025326853309603e-05, "loss": 0.6352, "step": 15175 }, { "epoch": 1.0282539467443594, "grad_norm": 5.547766208648682, "learning_rate": 8.025189951399821e-05, "loss": 0.7237, "step": 15176 }, { "epoch": 1.0283217020123314, "grad_norm": 6.3242363929748535, "learning_rate": 8.025053049490041e-05, "loss": 0.7579, "step": 15177 }, { "epoch": 1.0283894572803036, "grad_norm": 5.0050764083862305, "learning_rate": 8.024916147580259e-05, "loss": 0.765, "step": 15178 }, { "epoch": 1.0284572125482756, "grad_norm": 6.55527400970459, "learning_rate": 8.024779245670477e-05, "loss": 0.7241, "step": 15179 }, { "epoch": 1.0285249678162478, "grad_norm": 6.457221984863281, "learning_rate": 8.024642343760695e-05, "loss": 0.6609, "step": 15180 }, { "epoch": 1.0285927230842198, "grad_norm": 6.385996341705322, "learning_rate": 8.024505441850914e-05, "loss": 0.8659, "step": 15181 }, { "epoch": 1.028660478352192, "grad_norm": 6.647210121154785, "learning_rate": 8.024368539941132e-05, "loss": 0.7662, "step": 15182 }, { "epoch": 1.028728233620164, "grad_norm": 5.438072204589844, "learning_rate": 8.02423163803135e-05, "loss": 0.5482, "step": 15183 }, { "epoch": 1.028795988888136, "grad_norm": 7.4278998374938965, "learning_rate": 8.024094736121568e-05, "loss": 0.8363, "step": 15184 }, { "epoch": 1.0288637441561082, "grad_norm": 5.1344451904296875, "learning_rate": 8.023957834211788e-05, "loss": 0.7989, "step": 15185 }, { "epoch": 1.0289314994240801, "grad_norm": 4.938210487365723, "learning_rate": 8.023820932302006e-05, "loss": 0.6469, "step": 15186 }, { "epoch": 1.0289992546920523, "grad_norm": 5.672119617462158, "learning_rate": 8.023684030392224e-05, "loss": 0.9169, "step": 15187 }, { "epoch": 1.0290670099600243, "grad_norm": 6.35474157333374, "learning_rate": 8.023547128482443e-05, "loss": 0.6798, "step": 15188 }, { "epoch": 1.0291347652279965, "grad_norm": 10.651616096496582, "learning_rate": 8.023410226572661e-05, "loss": 0.8043, "step": 15189 }, { "epoch": 1.0292025204959685, "grad_norm": 5.502874851226807, "learning_rate": 8.023273324662879e-05, "loss": 0.8463, "step": 15190 }, { "epoch": 1.0292702757639407, "grad_norm": 4.313845634460449, "learning_rate": 8.023136422753099e-05, "loss": 0.6796, "step": 15191 }, { "epoch": 1.0293380310319127, "grad_norm": 6.374643802642822, "learning_rate": 8.022999520843317e-05, "loss": 0.7316, "step": 15192 }, { "epoch": 1.0294057862998849, "grad_norm": 6.878230094909668, "learning_rate": 8.022862618933535e-05, "loss": 0.9329, "step": 15193 }, { "epoch": 1.0294735415678569, "grad_norm": 5.371943950653076, "learning_rate": 8.022725717023753e-05, "loss": 0.5674, "step": 15194 }, { "epoch": 1.029541296835829, "grad_norm": 5.623109340667725, "learning_rate": 8.022588815113972e-05, "loss": 0.7014, "step": 15195 }, { "epoch": 1.029609052103801, "grad_norm": 6.517233848571777, "learning_rate": 8.02245191320419e-05, "loss": 0.8067, "step": 15196 }, { "epoch": 1.0296768073717733, "grad_norm": 6.380861759185791, "learning_rate": 8.022315011294408e-05, "loss": 0.7762, "step": 15197 }, { "epoch": 1.0297445626397452, "grad_norm": 5.307709693908691, "learning_rate": 8.022178109384626e-05, "loss": 0.8181, "step": 15198 }, { "epoch": 1.0298123179077174, "grad_norm": 5.426553249359131, "learning_rate": 8.022041207474844e-05, "loss": 0.8538, "step": 15199 }, { "epoch": 1.0298800731756894, "grad_norm": 5.571786880493164, "learning_rate": 8.021904305565064e-05, "loss": 0.6279, "step": 15200 }, { "epoch": 1.0299478284436614, "grad_norm": 5.296565055847168, "learning_rate": 8.021767403655282e-05, "loss": 0.7286, "step": 15201 }, { "epoch": 1.0300155837116336, "grad_norm": 5.196515083312988, "learning_rate": 8.0216305017455e-05, "loss": 0.5467, "step": 15202 }, { "epoch": 1.0300833389796056, "grad_norm": 5.717817783355713, "learning_rate": 8.021493599835718e-05, "loss": 0.5466, "step": 15203 }, { "epoch": 1.0301510942475778, "grad_norm": 4.812506675720215, "learning_rate": 8.021356697925937e-05, "loss": 0.4824, "step": 15204 }, { "epoch": 1.0302188495155498, "grad_norm": 5.952337741851807, "learning_rate": 8.021219796016155e-05, "loss": 0.6964, "step": 15205 }, { "epoch": 1.030286604783522, "grad_norm": 6.307968616485596, "learning_rate": 8.021082894106373e-05, "loss": 0.804, "step": 15206 }, { "epoch": 1.030354360051494, "grad_norm": 6.3860626220703125, "learning_rate": 8.020945992196591e-05, "loss": 0.5327, "step": 15207 }, { "epoch": 1.0304221153194661, "grad_norm": 6.184783935546875, "learning_rate": 8.02080909028681e-05, "loss": 0.754, "step": 15208 }, { "epoch": 1.0304898705874381, "grad_norm": 5.628595352172852, "learning_rate": 8.020672188377029e-05, "loss": 0.6584, "step": 15209 }, { "epoch": 1.0305576258554103, "grad_norm": 6.341923713684082, "learning_rate": 8.020535286467247e-05, "loss": 0.7252, "step": 15210 }, { "epoch": 1.0306253811233823, "grad_norm": 7.7430853843688965, "learning_rate": 8.020398384557465e-05, "loss": 0.7014, "step": 15211 }, { "epoch": 1.0306931363913545, "grad_norm": 5.762584686279297, "learning_rate": 8.020261482647683e-05, "loss": 0.7568, "step": 15212 }, { "epoch": 1.0307608916593265, "grad_norm": 6.523187637329102, "learning_rate": 8.020124580737902e-05, "loss": 0.7841, "step": 15213 }, { "epoch": 1.0308286469272987, "grad_norm": 6.489431858062744, "learning_rate": 8.01998767882812e-05, "loss": 0.6339, "step": 15214 }, { "epoch": 1.0308964021952707, "grad_norm": 6.271862983703613, "learning_rate": 8.019850776918338e-05, "loss": 0.85, "step": 15215 }, { "epoch": 1.0309641574632427, "grad_norm": 5.647708415985107, "learning_rate": 8.019713875008556e-05, "loss": 0.7023, "step": 15216 }, { "epoch": 1.0310319127312149, "grad_norm": 4.607950210571289, "learning_rate": 8.019576973098774e-05, "loss": 0.7162, "step": 15217 }, { "epoch": 1.0310996679991868, "grad_norm": 6.540196895599365, "learning_rate": 8.019440071188994e-05, "loss": 0.7398, "step": 15218 }, { "epoch": 1.031167423267159, "grad_norm": 6.469182014465332, "learning_rate": 8.019303169279212e-05, "loss": 0.6353, "step": 15219 }, { "epoch": 1.031235178535131, "grad_norm": 6.716250896453857, "learning_rate": 8.01916626736943e-05, "loss": 0.8495, "step": 15220 }, { "epoch": 1.0313029338031032, "grad_norm": 5.285309791564941, "learning_rate": 8.019029365459648e-05, "loss": 0.5978, "step": 15221 }, { "epoch": 1.0313706890710752, "grad_norm": 5.688589572906494, "learning_rate": 8.018892463549867e-05, "loss": 0.8367, "step": 15222 }, { "epoch": 1.0314384443390474, "grad_norm": 6.630375385284424, "learning_rate": 8.018755561640085e-05, "loss": 0.772, "step": 15223 }, { "epoch": 1.0315061996070194, "grad_norm": 5.191856861114502, "learning_rate": 8.018618659730303e-05, "loss": 0.6048, "step": 15224 }, { "epoch": 1.0315739548749916, "grad_norm": 5.3768839836120605, "learning_rate": 8.018481757820521e-05, "loss": 0.5963, "step": 15225 }, { "epoch": 1.0316417101429636, "grad_norm": 6.222872734069824, "learning_rate": 8.01834485591074e-05, "loss": 0.6568, "step": 15226 }, { "epoch": 1.0317094654109358, "grad_norm": 4.519933700561523, "learning_rate": 8.018207954000959e-05, "loss": 0.5297, "step": 15227 }, { "epoch": 1.0317772206789078, "grad_norm": 5.497208595275879, "learning_rate": 8.018071052091177e-05, "loss": 0.723, "step": 15228 }, { "epoch": 1.03184497594688, "grad_norm": 5.850059509277344, "learning_rate": 8.017934150181395e-05, "loss": 0.5526, "step": 15229 }, { "epoch": 1.031912731214852, "grad_norm": 5.479531288146973, "learning_rate": 8.017797248271613e-05, "loss": 0.6473, "step": 15230 }, { "epoch": 1.0319804864828241, "grad_norm": 6.393281936645508, "learning_rate": 8.017660346361832e-05, "loss": 0.8833, "step": 15231 }, { "epoch": 1.0320482417507961, "grad_norm": 5.936949253082275, "learning_rate": 8.01752344445205e-05, "loss": 0.96, "step": 15232 }, { "epoch": 1.032115997018768, "grad_norm": 6.389046669006348, "learning_rate": 8.017386542542268e-05, "loss": 0.8469, "step": 15233 }, { "epoch": 1.0321837522867403, "grad_norm": 5.648378849029541, "learning_rate": 8.017249640632488e-05, "loss": 0.5647, "step": 15234 }, { "epoch": 1.0322515075547123, "grad_norm": 6.237347602844238, "learning_rate": 8.017112738722706e-05, "loss": 0.6116, "step": 15235 }, { "epoch": 1.0323192628226845, "grad_norm": 6.752171993255615, "learning_rate": 8.016975836812924e-05, "loss": 0.7117, "step": 15236 }, { "epoch": 1.0323870180906565, "grad_norm": 7.579265594482422, "learning_rate": 8.016838934903143e-05, "loss": 0.7308, "step": 15237 }, { "epoch": 1.0324547733586287, "grad_norm": 5.256562232971191, "learning_rate": 8.016702032993361e-05, "loss": 0.6299, "step": 15238 }, { "epoch": 1.0325225286266007, "grad_norm": 4.109334468841553, "learning_rate": 8.016565131083579e-05, "loss": 0.3859, "step": 15239 }, { "epoch": 1.0325902838945729, "grad_norm": 4.160473823547363, "learning_rate": 8.016428229173797e-05, "loss": 0.5872, "step": 15240 }, { "epoch": 1.0326580391625448, "grad_norm": 5.3488898277282715, "learning_rate": 8.016291327264017e-05, "loss": 0.6311, "step": 15241 }, { "epoch": 1.032725794430517, "grad_norm": 10.214828491210938, "learning_rate": 8.016154425354235e-05, "loss": 0.8249, "step": 15242 }, { "epoch": 1.032793549698489, "grad_norm": 6.101986885070801, "learning_rate": 8.016017523444453e-05, "loss": 0.7061, "step": 15243 }, { "epoch": 1.0328613049664612, "grad_norm": 4.178615570068359, "learning_rate": 8.015880621534671e-05, "loss": 0.5615, "step": 15244 }, { "epoch": 1.0329290602344332, "grad_norm": 5.862407207489014, "learning_rate": 8.01574371962489e-05, "loss": 0.6751, "step": 15245 }, { "epoch": 1.0329968155024054, "grad_norm": 9.34377670288086, "learning_rate": 8.015606817715108e-05, "loss": 0.9418, "step": 15246 }, { "epoch": 1.0330645707703774, "grad_norm": 5.592286109924316, "learning_rate": 8.015469915805326e-05, "loss": 0.8633, "step": 15247 }, { "epoch": 1.0331323260383494, "grad_norm": 5.773770332336426, "learning_rate": 8.015333013895544e-05, "loss": 0.6302, "step": 15248 }, { "epoch": 1.0332000813063216, "grad_norm": 5.281434535980225, "learning_rate": 8.015196111985762e-05, "loss": 0.6259, "step": 15249 }, { "epoch": 1.0332678365742936, "grad_norm": 5.689375877380371, "learning_rate": 8.015059210075982e-05, "loss": 0.6735, "step": 15250 }, { "epoch": 1.0333355918422658, "grad_norm": 6.111639976501465, "learning_rate": 8.0149223081662e-05, "loss": 0.8211, "step": 15251 }, { "epoch": 1.0334033471102377, "grad_norm": 5.1067328453063965, "learning_rate": 8.014785406256418e-05, "loss": 0.5156, "step": 15252 }, { "epoch": 1.03347110237821, "grad_norm": 6.3971638679504395, "learning_rate": 8.014648504346636e-05, "loss": 0.7773, "step": 15253 }, { "epoch": 1.033538857646182, "grad_norm": 7.105903148651123, "learning_rate": 8.014511602436854e-05, "loss": 0.8546, "step": 15254 }, { "epoch": 1.0336066129141541, "grad_norm": 5.161197185516357, "learning_rate": 8.014374700527073e-05, "loss": 0.7245, "step": 15255 }, { "epoch": 1.033674368182126, "grad_norm": 5.202775001525879, "learning_rate": 8.014237798617291e-05, "loss": 0.7735, "step": 15256 }, { "epoch": 1.0337421234500983, "grad_norm": 5.7077131271362305, "learning_rate": 8.014100896707509e-05, "loss": 0.6378, "step": 15257 }, { "epoch": 1.0338098787180703, "grad_norm": 6.216683864593506, "learning_rate": 8.013963994797727e-05, "loss": 0.8665, "step": 15258 }, { "epoch": 1.0338776339860425, "grad_norm": 6.291334629058838, "learning_rate": 8.013827092887947e-05, "loss": 0.6189, "step": 15259 }, { "epoch": 1.0339453892540145, "grad_norm": 6.946064472198486, "learning_rate": 8.013690190978165e-05, "loss": 0.7349, "step": 15260 }, { "epoch": 1.0340131445219867, "grad_norm": 5.636563301086426, "learning_rate": 8.013553289068383e-05, "loss": 0.5853, "step": 15261 }, { "epoch": 1.0340808997899587, "grad_norm": 6.100147724151611, "learning_rate": 8.013416387158601e-05, "loss": 0.8702, "step": 15262 }, { "epoch": 1.0341486550579309, "grad_norm": 6.185873985290527, "learning_rate": 8.013279485248819e-05, "loss": 0.6916, "step": 15263 }, { "epoch": 1.0342164103259028, "grad_norm": 7.08696174621582, "learning_rate": 8.013142583339038e-05, "loss": 0.6231, "step": 15264 }, { "epoch": 1.034284165593875, "grad_norm": 5.164997100830078, "learning_rate": 8.013005681429256e-05, "loss": 0.5656, "step": 15265 }, { "epoch": 1.034351920861847, "grad_norm": 5.154972553253174, "learning_rate": 8.012868779519474e-05, "loss": 0.5931, "step": 15266 }, { "epoch": 1.034419676129819, "grad_norm": 5.627068996429443, "learning_rate": 8.012731877609692e-05, "loss": 0.7211, "step": 15267 }, { "epoch": 1.0344874313977912, "grad_norm": 4.955535888671875, "learning_rate": 8.012594975699912e-05, "loss": 0.5718, "step": 15268 }, { "epoch": 1.0345551866657632, "grad_norm": 5.209494113922119, "learning_rate": 8.01245807379013e-05, "loss": 0.5789, "step": 15269 }, { "epoch": 1.0346229419337354, "grad_norm": 6.1501312255859375, "learning_rate": 8.012321171880348e-05, "loss": 0.7794, "step": 15270 }, { "epoch": 1.0346906972017074, "grad_norm": 5.943906307220459, "learning_rate": 8.012184269970566e-05, "loss": 0.6906, "step": 15271 }, { "epoch": 1.0347584524696796, "grad_norm": 6.266148567199707, "learning_rate": 8.012047368060784e-05, "loss": 0.6289, "step": 15272 }, { "epoch": 1.0348262077376515, "grad_norm": 5.710838794708252, "learning_rate": 8.011910466151003e-05, "loss": 0.8653, "step": 15273 }, { "epoch": 1.0348939630056238, "grad_norm": 4.5793585777282715, "learning_rate": 8.011773564241221e-05, "loss": 0.6731, "step": 15274 }, { "epoch": 1.0349617182735957, "grad_norm": 5.686688423156738, "learning_rate": 8.011636662331439e-05, "loss": 0.6466, "step": 15275 }, { "epoch": 1.035029473541568, "grad_norm": 6.409125804901123, "learning_rate": 8.011499760421657e-05, "loss": 0.8089, "step": 15276 }, { "epoch": 1.03509722880954, "grad_norm": 9.235058784484863, "learning_rate": 8.011362858511877e-05, "loss": 0.6794, "step": 15277 }, { "epoch": 1.0351649840775121, "grad_norm": 5.486425876617432, "learning_rate": 8.011225956602095e-05, "loss": 0.6879, "step": 15278 }, { "epoch": 1.035232739345484, "grad_norm": 5.220149040222168, "learning_rate": 8.011089054692313e-05, "loss": 0.5114, "step": 15279 }, { "epoch": 1.0353004946134563, "grad_norm": 4.368312835693359, "learning_rate": 8.010952152782532e-05, "loss": 0.455, "step": 15280 }, { "epoch": 1.0353682498814283, "grad_norm": 7.039102554321289, "learning_rate": 8.01081525087275e-05, "loss": 0.9742, "step": 15281 }, { "epoch": 1.0354360051494003, "grad_norm": 5.032625675201416, "learning_rate": 8.010678348962968e-05, "loss": 0.5802, "step": 15282 }, { "epoch": 1.0355037604173725, "grad_norm": 5.188567638397217, "learning_rate": 8.010541447053188e-05, "loss": 0.5383, "step": 15283 }, { "epoch": 1.0355715156853444, "grad_norm": 6.444438934326172, "learning_rate": 8.010404545143406e-05, "loss": 0.6963, "step": 15284 }, { "epoch": 1.0356392709533166, "grad_norm": 4.651839733123779, "learning_rate": 8.010267643233624e-05, "loss": 0.5967, "step": 15285 }, { "epoch": 1.0357070262212886, "grad_norm": 5.508388996124268, "learning_rate": 8.010130741323842e-05, "loss": 0.7505, "step": 15286 }, { "epoch": 1.0357747814892608, "grad_norm": 6.142048358917236, "learning_rate": 8.009993839414061e-05, "loss": 0.7186, "step": 15287 }, { "epoch": 1.0358425367572328, "grad_norm": 5.3503899574279785, "learning_rate": 8.009856937504279e-05, "loss": 0.6446, "step": 15288 }, { "epoch": 1.035910292025205, "grad_norm": 5.894726753234863, "learning_rate": 8.009720035594497e-05, "loss": 0.6653, "step": 15289 }, { "epoch": 1.035978047293177, "grad_norm": 6.199258804321289, "learning_rate": 8.009583133684715e-05, "loss": 0.5282, "step": 15290 }, { "epoch": 1.0360458025611492, "grad_norm": 7.289583683013916, "learning_rate": 8.009446231774935e-05, "loss": 0.6126, "step": 15291 }, { "epoch": 1.0361135578291212, "grad_norm": 5.015061378479004, "learning_rate": 8.009309329865153e-05, "loss": 0.6272, "step": 15292 }, { "epoch": 1.0361813130970934, "grad_norm": 8.497147560119629, "learning_rate": 8.009172427955371e-05, "loss": 0.7675, "step": 15293 }, { "epoch": 1.0362490683650654, "grad_norm": 7.492831230163574, "learning_rate": 8.009035526045589e-05, "loss": 0.8589, "step": 15294 }, { "epoch": 1.0363168236330376, "grad_norm": 8.964046478271484, "learning_rate": 8.008898624135807e-05, "loss": 0.8056, "step": 15295 }, { "epoch": 1.0363845789010095, "grad_norm": 6.669633388519287, "learning_rate": 8.008761722226026e-05, "loss": 0.7112, "step": 15296 }, { "epoch": 1.0364523341689815, "grad_norm": 6.454007148742676, "learning_rate": 8.008624820316244e-05, "loss": 0.6977, "step": 15297 }, { "epoch": 1.0365200894369537, "grad_norm": 5.101663112640381, "learning_rate": 8.008487918406462e-05, "loss": 0.6409, "step": 15298 }, { "epoch": 1.0365878447049257, "grad_norm": 6.301394462585449, "learning_rate": 8.00835101649668e-05, "loss": 0.7558, "step": 15299 }, { "epoch": 1.036655599972898, "grad_norm": 4.543341636657715, "learning_rate": 8.0082141145869e-05, "loss": 0.6226, "step": 15300 }, { "epoch": 1.03672335524087, "grad_norm": 6.267812728881836, "learning_rate": 8.008077212677118e-05, "loss": 0.7714, "step": 15301 }, { "epoch": 1.036791110508842, "grad_norm": 5.134308338165283, "learning_rate": 8.007940310767336e-05, "loss": 0.5897, "step": 15302 }, { "epoch": 1.036858865776814, "grad_norm": 6.004744052886963, "learning_rate": 8.007803408857554e-05, "loss": 0.6461, "step": 15303 }, { "epoch": 1.0369266210447863, "grad_norm": 6.2730631828308105, "learning_rate": 8.007666506947772e-05, "loss": 0.7272, "step": 15304 }, { "epoch": 1.0369943763127583, "grad_norm": 7.032886981964111, "learning_rate": 8.007529605037991e-05, "loss": 0.708, "step": 15305 }, { "epoch": 1.0370621315807305, "grad_norm": 7.366089344024658, "learning_rate": 8.007392703128209e-05, "loss": 0.5729, "step": 15306 }, { "epoch": 1.0371298868487024, "grad_norm": 5.599564075469971, "learning_rate": 8.007255801218427e-05, "loss": 0.7446, "step": 15307 }, { "epoch": 1.0371976421166746, "grad_norm": 4.722618103027344, "learning_rate": 8.007118899308645e-05, "loss": 0.571, "step": 15308 }, { "epoch": 1.0372653973846466, "grad_norm": 5.094542026519775, "learning_rate": 8.006981997398863e-05, "loss": 0.8405, "step": 15309 }, { "epoch": 1.0373331526526188, "grad_norm": 6.558629989624023, "learning_rate": 8.006845095489083e-05, "loss": 0.6621, "step": 15310 }, { "epoch": 1.0374009079205908, "grad_norm": 7.169748306274414, "learning_rate": 8.006708193579301e-05, "loss": 0.7468, "step": 15311 }, { "epoch": 1.037468663188563, "grad_norm": 7.064498424530029, "learning_rate": 8.006571291669519e-05, "loss": 0.9875, "step": 15312 }, { "epoch": 1.037536418456535, "grad_norm": 8.31985092163086, "learning_rate": 8.006434389759737e-05, "loss": 0.7591, "step": 15313 }, { "epoch": 1.037604173724507, "grad_norm": 8.058642387390137, "learning_rate": 8.006297487849956e-05, "loss": 0.7805, "step": 15314 }, { "epoch": 1.0376719289924792, "grad_norm": 7.940720081329346, "learning_rate": 8.006160585940174e-05, "loss": 0.5895, "step": 15315 }, { "epoch": 1.0377396842604512, "grad_norm": 4.879898548126221, "learning_rate": 8.006023684030392e-05, "loss": 0.6924, "step": 15316 }, { "epoch": 1.0378074395284234, "grad_norm": 4.655125617980957, "learning_rate": 8.00588678212061e-05, "loss": 0.6833, "step": 15317 }, { "epoch": 1.0378751947963953, "grad_norm": 6.936232566833496, "learning_rate": 8.005749880210828e-05, "loss": 0.7441, "step": 15318 }, { "epoch": 1.0379429500643675, "grad_norm": 9.707219123840332, "learning_rate": 8.005612978301048e-05, "loss": 0.8677, "step": 15319 }, { "epoch": 1.0380107053323395, "grad_norm": 6.091515064239502, "learning_rate": 8.005476076391266e-05, "loss": 0.8092, "step": 15320 }, { "epoch": 1.0380784606003117, "grad_norm": 8.209754943847656, "learning_rate": 8.005339174481484e-05, "loss": 0.5553, "step": 15321 }, { "epoch": 1.0381462158682837, "grad_norm": 6.475118160247803, "learning_rate": 8.005202272571702e-05, "loss": 0.6681, "step": 15322 }, { "epoch": 1.038213971136256, "grad_norm": 8.269380569458008, "learning_rate": 8.005065370661921e-05, "loss": 0.7209, "step": 15323 }, { "epoch": 1.038281726404228, "grad_norm": 6.947020053863525, "learning_rate": 8.004928468752139e-05, "loss": 0.8772, "step": 15324 }, { "epoch": 1.0383494816722, "grad_norm": 4.593833923339844, "learning_rate": 8.004791566842357e-05, "loss": 0.5989, "step": 15325 }, { "epoch": 1.038417236940172, "grad_norm": 5.82660436630249, "learning_rate": 8.004654664932577e-05, "loss": 0.7262, "step": 15326 }, { "epoch": 1.0384849922081443, "grad_norm": 4.537695407867432, "learning_rate": 8.004517763022795e-05, "loss": 0.4765, "step": 15327 }, { "epoch": 1.0385527474761163, "grad_norm": 7.33888053894043, "learning_rate": 8.004380861113013e-05, "loss": 0.6138, "step": 15328 }, { "epoch": 1.0386205027440885, "grad_norm": 4.84438943862915, "learning_rate": 8.004243959203232e-05, "loss": 0.7464, "step": 15329 }, { "epoch": 1.0386882580120604, "grad_norm": 8.118237495422363, "learning_rate": 8.00410705729345e-05, "loss": 0.4538, "step": 15330 }, { "epoch": 1.0387560132800324, "grad_norm": 6.666751384735107, "learning_rate": 8.003970155383668e-05, "loss": 0.6187, "step": 15331 }, { "epoch": 1.0388237685480046, "grad_norm": 6.705092906951904, "learning_rate": 8.003833253473886e-05, "loss": 0.8744, "step": 15332 }, { "epoch": 1.0388915238159766, "grad_norm": 6.252575874328613, "learning_rate": 8.003696351564106e-05, "loss": 0.654, "step": 15333 }, { "epoch": 1.0389592790839488, "grad_norm": 4.896333694458008, "learning_rate": 8.003559449654324e-05, "loss": 0.6385, "step": 15334 }, { "epoch": 1.0390270343519208, "grad_norm": 6.1699957847595215, "learning_rate": 8.003422547744542e-05, "loss": 0.7625, "step": 15335 }, { "epoch": 1.039094789619893, "grad_norm": 5.307643413543701, "learning_rate": 8.00328564583476e-05, "loss": 0.7154, "step": 15336 }, { "epoch": 1.039162544887865, "grad_norm": 5.892039775848389, "learning_rate": 8.003148743924979e-05, "loss": 0.8284, "step": 15337 }, { "epoch": 1.0392303001558372, "grad_norm": 5.1963372230529785, "learning_rate": 8.003011842015197e-05, "loss": 0.6057, "step": 15338 }, { "epoch": 1.0392980554238092, "grad_norm": 5.593263626098633, "learning_rate": 8.002874940105415e-05, "loss": 0.7043, "step": 15339 }, { "epoch": 1.0393658106917814, "grad_norm": 4.905797481536865, "learning_rate": 8.002738038195633e-05, "loss": 0.6327, "step": 15340 }, { "epoch": 1.0394335659597533, "grad_norm": 7.234451770782471, "learning_rate": 8.002601136285851e-05, "loss": 0.5896, "step": 15341 }, { "epoch": 1.0395013212277255, "grad_norm": 6.471969127655029, "learning_rate": 8.00246423437607e-05, "loss": 0.8219, "step": 15342 }, { "epoch": 1.0395690764956975, "grad_norm": 6.145739555358887, "learning_rate": 8.002327332466289e-05, "loss": 0.6992, "step": 15343 }, { "epoch": 1.0396368317636697, "grad_norm": 4.9458842277526855, "learning_rate": 8.002190430556507e-05, "loss": 0.5296, "step": 15344 }, { "epoch": 1.0397045870316417, "grad_norm": 6.5056562423706055, "learning_rate": 8.002053528646725e-05, "loss": 0.7327, "step": 15345 }, { "epoch": 1.0397723422996137, "grad_norm": 5.6960320472717285, "learning_rate": 8.001916626736944e-05, "loss": 0.5978, "step": 15346 }, { "epoch": 1.0398400975675859, "grad_norm": 5.816061496734619, "learning_rate": 8.001779724827162e-05, "loss": 0.677, "step": 15347 }, { "epoch": 1.0399078528355579, "grad_norm": 5.3048577308654785, "learning_rate": 8.00164282291738e-05, "loss": 0.5438, "step": 15348 }, { "epoch": 1.03997560810353, "grad_norm": 4.549619197845459, "learning_rate": 8.001505921007598e-05, "loss": 0.3974, "step": 15349 }, { "epoch": 1.040043363371502, "grad_norm": 7.864236831665039, "learning_rate": 8.001369019097816e-05, "loss": 0.7221, "step": 15350 }, { "epoch": 1.0401111186394743, "grad_norm": 6.662132740020752, "learning_rate": 8.001232117188036e-05, "loss": 0.7352, "step": 15351 }, { "epoch": 1.0401788739074462, "grad_norm": 5.497745037078857, "learning_rate": 8.001095215278254e-05, "loss": 0.6255, "step": 15352 }, { "epoch": 1.0402466291754184, "grad_norm": 10.039993286132812, "learning_rate": 8.000958313368472e-05, "loss": 0.5771, "step": 15353 }, { "epoch": 1.0403143844433904, "grad_norm": 5.307958126068115, "learning_rate": 8.00082141145869e-05, "loss": 0.5976, "step": 15354 }, { "epoch": 1.0403821397113626, "grad_norm": 5.4257917404174805, "learning_rate": 8.000684509548909e-05, "loss": 0.7848, "step": 15355 }, { "epoch": 1.0404498949793346, "grad_norm": 7.313519477844238, "learning_rate": 8.000547607639127e-05, "loss": 0.9212, "step": 15356 }, { "epoch": 1.0405176502473068, "grad_norm": 6.327539443969727, "learning_rate": 8.000410705729345e-05, "loss": 0.6187, "step": 15357 }, { "epoch": 1.0405854055152788, "grad_norm": 5.164039134979248, "learning_rate": 8.000273803819563e-05, "loss": 0.5013, "step": 15358 }, { "epoch": 1.040653160783251, "grad_norm": 5.942699909210205, "learning_rate": 8.000136901909781e-05, "loss": 0.6973, "step": 15359 }, { "epoch": 1.040720916051223, "grad_norm": 4.6758527755737305, "learning_rate": 8e-05, "loss": 0.6294, "step": 15360 }, { "epoch": 1.0407886713191952, "grad_norm": 7.673499584197998, "learning_rate": 7.999863098090219e-05, "loss": 0.4294, "step": 15361 }, { "epoch": 1.0408564265871671, "grad_norm": 6.975398063659668, "learning_rate": 7.999726196180437e-05, "loss": 1.0246, "step": 15362 }, { "epoch": 1.0409241818551391, "grad_norm": 5.054098606109619, "learning_rate": 7.999589294270655e-05, "loss": 0.6756, "step": 15363 }, { "epoch": 1.0409919371231113, "grad_norm": 5.625120639801025, "learning_rate": 7.999452392360873e-05, "loss": 0.6515, "step": 15364 }, { "epoch": 1.0410596923910833, "grad_norm": 5.1921820640563965, "learning_rate": 7.999315490451092e-05, "loss": 0.707, "step": 15365 }, { "epoch": 1.0411274476590555, "grad_norm": 11.847484588623047, "learning_rate": 7.99917858854131e-05, "loss": 0.641, "step": 15366 }, { "epoch": 1.0411952029270275, "grad_norm": 5.949629783630371, "learning_rate": 7.999041686631528e-05, "loss": 0.6544, "step": 15367 }, { "epoch": 1.0412629581949997, "grad_norm": 5.2862138748168945, "learning_rate": 7.998904784721746e-05, "loss": 0.5595, "step": 15368 }, { "epoch": 1.0413307134629717, "grad_norm": 6.709194660186768, "learning_rate": 7.998767882811966e-05, "loss": 0.6076, "step": 15369 }, { "epoch": 1.0413984687309439, "grad_norm": 5.2147088050842285, "learning_rate": 7.998630980902184e-05, "loss": 0.3128, "step": 15370 }, { "epoch": 1.0414662239989159, "grad_norm": 6.64967155456543, "learning_rate": 7.998494078992402e-05, "loss": 0.6488, "step": 15371 }, { "epoch": 1.041533979266888, "grad_norm": 6.93261194229126, "learning_rate": 7.99835717708262e-05, "loss": 0.802, "step": 15372 }, { "epoch": 1.04160173453486, "grad_norm": 4.657343864440918, "learning_rate": 7.998220275172839e-05, "loss": 0.6395, "step": 15373 }, { "epoch": 1.0416694898028322, "grad_norm": 7.187932968139648, "learning_rate": 7.998083373263057e-05, "loss": 0.6507, "step": 15374 }, { "epoch": 1.0417372450708042, "grad_norm": 7.809886455535889, "learning_rate": 7.997946471353275e-05, "loss": 0.72, "step": 15375 }, { "epoch": 1.0418050003387764, "grad_norm": 7.339730262756348, "learning_rate": 7.997809569443495e-05, "loss": 0.7263, "step": 15376 }, { "epoch": 1.0418727556067484, "grad_norm": 6.153295516967773, "learning_rate": 7.997672667533713e-05, "loss": 0.6731, "step": 15377 }, { "epoch": 1.0419405108747206, "grad_norm": 5.918948650360107, "learning_rate": 7.997535765623931e-05, "loss": 0.6788, "step": 15378 }, { "epoch": 1.0420082661426926, "grad_norm": 7.921314716339111, "learning_rate": 7.99739886371415e-05, "loss": 0.7971, "step": 15379 }, { "epoch": 1.0420760214106646, "grad_norm": 5.347783088684082, "learning_rate": 7.997261961804368e-05, "loss": 0.5056, "step": 15380 }, { "epoch": 1.0421437766786368, "grad_norm": 4.612505912780762, "learning_rate": 7.997125059894586e-05, "loss": 0.6825, "step": 15381 }, { "epoch": 1.0422115319466088, "grad_norm": 8.001241683959961, "learning_rate": 7.996988157984804e-05, "loss": 0.5893, "step": 15382 }, { "epoch": 1.042279287214581, "grad_norm": 5.214502811431885, "learning_rate": 7.996851256075024e-05, "loss": 0.6628, "step": 15383 }, { "epoch": 1.042347042482553, "grad_norm": 8.609155654907227, "learning_rate": 7.996714354165242e-05, "loss": 0.9572, "step": 15384 }, { "epoch": 1.0424147977505251, "grad_norm": 4.416464328765869, "learning_rate": 7.99657745225546e-05, "loss": 0.6881, "step": 15385 }, { "epoch": 1.0424825530184971, "grad_norm": 6.996601104736328, "learning_rate": 7.996440550345678e-05, "loss": 0.8354, "step": 15386 }, { "epoch": 1.0425503082864693, "grad_norm": 7.7891035079956055, "learning_rate": 7.996303648435896e-05, "loss": 0.5579, "step": 15387 }, { "epoch": 1.0426180635544413, "grad_norm": 5.543262004852295, "learning_rate": 7.996166746526115e-05, "loss": 0.7516, "step": 15388 }, { "epoch": 1.0426858188224135, "grad_norm": 5.837127685546875, "learning_rate": 7.996029844616333e-05, "loss": 0.7111, "step": 15389 }, { "epoch": 1.0427535740903855, "grad_norm": 7.789334774017334, "learning_rate": 7.995892942706551e-05, "loss": 0.7419, "step": 15390 }, { "epoch": 1.0428213293583577, "grad_norm": 5.3756022453308105, "learning_rate": 7.995756040796769e-05, "loss": 0.7475, "step": 15391 }, { "epoch": 1.0428890846263297, "grad_norm": 4.391982555389404, "learning_rate": 7.995619138886989e-05, "loss": 0.6616, "step": 15392 }, { "epoch": 1.0429568398943019, "grad_norm": 5.609358787536621, "learning_rate": 7.995482236977207e-05, "loss": 0.817, "step": 15393 }, { "epoch": 1.0430245951622739, "grad_norm": 4.2053046226501465, "learning_rate": 7.995345335067425e-05, "loss": 0.5884, "step": 15394 }, { "epoch": 1.0430923504302458, "grad_norm": 7.365318775177002, "learning_rate": 7.995208433157643e-05, "loss": 0.7841, "step": 15395 }, { "epoch": 1.043160105698218, "grad_norm": 5.268418312072754, "learning_rate": 7.995071531247861e-05, "loss": 0.7165, "step": 15396 }, { "epoch": 1.04322786096619, "grad_norm": 4.734410762786865, "learning_rate": 7.99493462933808e-05, "loss": 0.5459, "step": 15397 }, { "epoch": 1.0432956162341622, "grad_norm": 6.027469635009766, "learning_rate": 7.994797727428298e-05, "loss": 0.7033, "step": 15398 }, { "epoch": 1.0433633715021342, "grad_norm": 5.068356037139893, "learning_rate": 7.994660825518516e-05, "loss": 0.7333, "step": 15399 }, { "epoch": 1.0434311267701064, "grad_norm": 5.465489387512207, "learning_rate": 7.994523923608734e-05, "loss": 0.6387, "step": 15400 }, { "epoch": 1.0434988820380784, "grad_norm": 6.087520122528076, "learning_rate": 7.994387021698954e-05, "loss": 0.6957, "step": 15401 }, { "epoch": 1.0435666373060506, "grad_norm": 6.731315612792969, "learning_rate": 7.994250119789172e-05, "loss": 0.6397, "step": 15402 }, { "epoch": 1.0436343925740226, "grad_norm": 5.57260274887085, "learning_rate": 7.99411321787939e-05, "loss": 0.6853, "step": 15403 }, { "epoch": 1.0437021478419948, "grad_norm": 5.95422887802124, "learning_rate": 7.993976315969608e-05, "loss": 0.6726, "step": 15404 }, { "epoch": 1.0437699031099668, "grad_norm": 5.570609092712402, "learning_rate": 7.993839414059826e-05, "loss": 0.7738, "step": 15405 }, { "epoch": 1.043837658377939, "grad_norm": 5.522849082946777, "learning_rate": 7.993702512150045e-05, "loss": 0.6653, "step": 15406 }, { "epoch": 1.043905413645911, "grad_norm": 5.089807987213135, "learning_rate": 7.993565610240263e-05, "loss": 0.5422, "step": 15407 }, { "epoch": 1.0439731689138831, "grad_norm": 5.952882766723633, "learning_rate": 7.993428708330481e-05, "loss": 0.714, "step": 15408 }, { "epoch": 1.0440409241818551, "grad_norm": 5.655484199523926, "learning_rate": 7.993291806420699e-05, "loss": 0.461, "step": 15409 }, { "epoch": 1.0441086794498273, "grad_norm": 6.936872959136963, "learning_rate": 7.993154904510919e-05, "loss": 0.6569, "step": 15410 }, { "epoch": 1.0441764347177993, "grad_norm": 6.010931968688965, "learning_rate": 7.993018002601137e-05, "loss": 0.7323, "step": 15411 }, { "epoch": 1.0442441899857713, "grad_norm": 7.187067985534668, "learning_rate": 7.992881100691355e-05, "loss": 0.9217, "step": 15412 }, { "epoch": 1.0443119452537435, "grad_norm": 5.070352554321289, "learning_rate": 7.992744198781573e-05, "loss": 0.6433, "step": 15413 }, { "epoch": 1.0443797005217155, "grad_norm": 6.546175479888916, "learning_rate": 7.992607296871791e-05, "loss": 0.671, "step": 15414 }, { "epoch": 1.0444474557896877, "grad_norm": 6.2877349853515625, "learning_rate": 7.99247039496201e-05, "loss": 0.8915, "step": 15415 }, { "epoch": 1.0445152110576597, "grad_norm": 5.39522647857666, "learning_rate": 7.992333493052228e-05, "loss": 0.7388, "step": 15416 }, { "epoch": 1.0445829663256319, "grad_norm": 6.453690052032471, "learning_rate": 7.992196591142446e-05, "loss": 0.7892, "step": 15417 }, { "epoch": 1.0446507215936038, "grad_norm": 6.763200759887695, "learning_rate": 7.992059689232664e-05, "loss": 0.6219, "step": 15418 }, { "epoch": 1.044718476861576, "grad_norm": 5.610644817352295, "learning_rate": 7.991922787322884e-05, "loss": 0.7382, "step": 15419 }, { "epoch": 1.044786232129548, "grad_norm": 6.314178466796875, "learning_rate": 7.991785885413102e-05, "loss": 0.7147, "step": 15420 }, { "epoch": 1.0448539873975202, "grad_norm": 5.987586498260498, "learning_rate": 7.99164898350332e-05, "loss": 0.537, "step": 15421 }, { "epoch": 1.0449217426654922, "grad_norm": 7.661067008972168, "learning_rate": 7.991512081593539e-05, "loss": 0.9005, "step": 15422 }, { "epoch": 1.0449894979334644, "grad_norm": 5.166024684906006, "learning_rate": 7.991375179683757e-05, "loss": 0.755, "step": 15423 }, { "epoch": 1.0450572532014364, "grad_norm": 6.560528755187988, "learning_rate": 7.991238277773975e-05, "loss": 0.7315, "step": 15424 }, { "epoch": 1.0451250084694086, "grad_norm": 6.162966728210449, "learning_rate": 7.991101375864195e-05, "loss": 0.5871, "step": 15425 }, { "epoch": 1.0451927637373806, "grad_norm": 6.7889628410339355, "learning_rate": 7.990964473954413e-05, "loss": 0.7381, "step": 15426 }, { "epoch": 1.0452605190053528, "grad_norm": 5.342012405395508, "learning_rate": 7.99082757204463e-05, "loss": 0.7028, "step": 15427 }, { "epoch": 1.0453282742733248, "grad_norm": 5.235086441040039, "learning_rate": 7.990690670134849e-05, "loss": 0.5145, "step": 15428 }, { "epoch": 1.0453960295412967, "grad_norm": 5.529267311096191, "learning_rate": 7.990553768225068e-05, "loss": 0.5921, "step": 15429 }, { "epoch": 1.045463784809269, "grad_norm": 5.2520880699157715, "learning_rate": 7.990416866315286e-05, "loss": 0.7905, "step": 15430 }, { "epoch": 1.045531540077241, "grad_norm": 5.065273761749268, "learning_rate": 7.990279964405504e-05, "loss": 0.6968, "step": 15431 }, { "epoch": 1.0455992953452131, "grad_norm": 5.409516334533691, "learning_rate": 7.990143062495722e-05, "loss": 0.6831, "step": 15432 }, { "epoch": 1.045667050613185, "grad_norm": 5.545697212219238, "learning_rate": 7.990006160585942e-05, "loss": 0.6522, "step": 15433 }, { "epoch": 1.0457348058811573, "grad_norm": 5.714067459106445, "learning_rate": 7.98986925867616e-05, "loss": 0.5569, "step": 15434 }, { "epoch": 1.0458025611491293, "grad_norm": 7.723472595214844, "learning_rate": 7.989732356766378e-05, "loss": 0.6271, "step": 15435 }, { "epoch": 1.0458703164171015, "grad_norm": 7.13986349105835, "learning_rate": 7.989595454856596e-05, "loss": 0.8158, "step": 15436 }, { "epoch": 1.0459380716850735, "grad_norm": 6.393859386444092, "learning_rate": 7.989458552946814e-05, "loss": 0.575, "step": 15437 }, { "epoch": 1.0460058269530457, "grad_norm": 8.466290473937988, "learning_rate": 7.989321651037033e-05, "loss": 0.6657, "step": 15438 }, { "epoch": 1.0460735822210177, "grad_norm": 4.97887659072876, "learning_rate": 7.989184749127251e-05, "loss": 0.6869, "step": 15439 }, { "epoch": 1.0461413374889899, "grad_norm": 6.120355129241943, "learning_rate": 7.989047847217469e-05, "loss": 0.7158, "step": 15440 }, { "epoch": 1.0462090927569618, "grad_norm": 5.353936672210693, "learning_rate": 7.988910945307687e-05, "loss": 0.6081, "step": 15441 }, { "epoch": 1.046276848024934, "grad_norm": 5.116786956787109, "learning_rate": 7.988774043397905e-05, "loss": 0.725, "step": 15442 }, { "epoch": 1.046344603292906, "grad_norm": 5.482798099517822, "learning_rate": 7.988637141488125e-05, "loss": 0.7154, "step": 15443 }, { "epoch": 1.046412358560878, "grad_norm": 6.663843631744385, "learning_rate": 7.988500239578343e-05, "loss": 0.7235, "step": 15444 }, { "epoch": 1.0464801138288502, "grad_norm": 5.938141822814941, "learning_rate": 7.98836333766856e-05, "loss": 0.5438, "step": 15445 }, { "epoch": 1.0465478690968222, "grad_norm": 4.406172752380371, "learning_rate": 7.988226435758779e-05, "loss": 0.6383, "step": 15446 }, { "epoch": 1.0466156243647944, "grad_norm": 4.991530895233154, "learning_rate": 7.988089533848998e-05, "loss": 0.6158, "step": 15447 }, { "epoch": 1.0466833796327664, "grad_norm": 5.490728378295898, "learning_rate": 7.987952631939216e-05, "loss": 0.6724, "step": 15448 }, { "epoch": 1.0467511349007386, "grad_norm": 6.0448384284973145, "learning_rate": 7.987815730029434e-05, "loss": 0.5423, "step": 15449 }, { "epoch": 1.0468188901687105, "grad_norm": 5.0079731941223145, "learning_rate": 7.987678828119652e-05, "loss": 0.7416, "step": 15450 }, { "epoch": 1.0468866454366828, "grad_norm": 6.357697486877441, "learning_rate": 7.98754192620987e-05, "loss": 0.5595, "step": 15451 }, { "epoch": 1.0469544007046547, "grad_norm": 5.9440226554870605, "learning_rate": 7.98740502430009e-05, "loss": 0.6615, "step": 15452 }, { "epoch": 1.047022155972627, "grad_norm": 5.571059226989746, "learning_rate": 7.987268122390308e-05, "loss": 0.6681, "step": 15453 }, { "epoch": 1.047089911240599, "grad_norm": 5.801420211791992, "learning_rate": 7.987131220480526e-05, "loss": 0.6817, "step": 15454 }, { "epoch": 1.0471576665085711, "grad_norm": 5.256460666656494, "learning_rate": 7.986994318570744e-05, "loss": 0.5682, "step": 15455 }, { "epoch": 1.047225421776543, "grad_norm": 5.437204360961914, "learning_rate": 7.986857416660963e-05, "loss": 0.8709, "step": 15456 }, { "epoch": 1.0472931770445153, "grad_norm": 4.7969651222229, "learning_rate": 7.986720514751181e-05, "loss": 0.5018, "step": 15457 }, { "epoch": 1.0473609323124873, "grad_norm": 8.146381378173828, "learning_rate": 7.986583612841399e-05, "loss": 0.6968, "step": 15458 }, { "epoch": 1.0474286875804595, "grad_norm": 7.133126258850098, "learning_rate": 7.986446710931617e-05, "loss": 0.7164, "step": 15459 }, { "epoch": 1.0474964428484315, "grad_norm": 8.4725980758667, "learning_rate": 7.986309809021835e-05, "loss": 0.543, "step": 15460 }, { "epoch": 1.0475641981164034, "grad_norm": 6.91729211807251, "learning_rate": 7.986172907112055e-05, "loss": 0.4047, "step": 15461 }, { "epoch": 1.0476319533843756, "grad_norm": 5.977921009063721, "learning_rate": 7.986036005202273e-05, "loss": 0.4638, "step": 15462 }, { "epoch": 1.0476997086523476, "grad_norm": 5.780356407165527, "learning_rate": 7.985899103292491e-05, "loss": 0.6565, "step": 15463 }, { "epoch": 1.0477674639203198, "grad_norm": 5.508984088897705, "learning_rate": 7.985762201382709e-05, "loss": 0.6388, "step": 15464 }, { "epoch": 1.0478352191882918, "grad_norm": 7.178645610809326, "learning_rate": 7.985625299472928e-05, "loss": 0.6638, "step": 15465 }, { "epoch": 1.047902974456264, "grad_norm": 5.548744201660156, "learning_rate": 7.985488397563146e-05, "loss": 0.5982, "step": 15466 }, { "epoch": 1.047970729724236, "grad_norm": 6.19132661819458, "learning_rate": 7.985351495653364e-05, "loss": 0.6313, "step": 15467 }, { "epoch": 1.0480384849922082, "grad_norm": 5.953742504119873, "learning_rate": 7.985214593743584e-05, "loss": 0.9055, "step": 15468 }, { "epoch": 1.0481062402601802, "grad_norm": 6.040510177612305, "learning_rate": 7.985077691833802e-05, "loss": 0.8007, "step": 15469 }, { "epoch": 1.0481739955281524, "grad_norm": 6.583937168121338, "learning_rate": 7.98494078992402e-05, "loss": 0.5396, "step": 15470 }, { "epoch": 1.0482417507961244, "grad_norm": 6.282310485839844, "learning_rate": 7.984803888014239e-05, "loss": 0.7954, "step": 15471 }, { "epoch": 1.0483095060640966, "grad_norm": 5.081226348876953, "learning_rate": 7.984666986104457e-05, "loss": 0.753, "step": 15472 }, { "epoch": 1.0483772613320685, "grad_norm": 7.40177583694458, "learning_rate": 7.984530084194675e-05, "loss": 0.8124, "step": 15473 }, { "epoch": 1.0484450166000407, "grad_norm": 5.868223667144775, "learning_rate": 7.984393182284893e-05, "loss": 0.651, "step": 15474 }, { "epoch": 1.0485127718680127, "grad_norm": 4.443064212799072, "learning_rate": 7.984256280375113e-05, "loss": 0.5547, "step": 15475 }, { "epoch": 1.048580527135985, "grad_norm": 6.508821964263916, "learning_rate": 7.98411937846533e-05, "loss": 0.8823, "step": 15476 }, { "epoch": 1.048648282403957, "grad_norm": 8.762590408325195, "learning_rate": 7.983982476555549e-05, "loss": 0.7839, "step": 15477 }, { "epoch": 1.048716037671929, "grad_norm": 6.139303684234619, "learning_rate": 7.983845574645767e-05, "loss": 0.6002, "step": 15478 }, { "epoch": 1.048783792939901, "grad_norm": 5.247832775115967, "learning_rate": 7.983708672735986e-05, "loss": 0.6179, "step": 15479 }, { "epoch": 1.048851548207873, "grad_norm": 6.066807270050049, "learning_rate": 7.983571770826204e-05, "loss": 0.585, "step": 15480 }, { "epoch": 1.0489193034758453, "grad_norm": 4.7573747634887695, "learning_rate": 7.983434868916422e-05, "loss": 0.6707, "step": 15481 }, { "epoch": 1.0489870587438173, "grad_norm": 7.490664958953857, "learning_rate": 7.98329796700664e-05, "loss": 0.7973, "step": 15482 }, { "epoch": 1.0490548140117895, "grad_norm": 4.913379669189453, "learning_rate": 7.983161065096858e-05, "loss": 0.6074, "step": 15483 }, { "epoch": 1.0491225692797614, "grad_norm": 5.44326639175415, "learning_rate": 7.983024163187078e-05, "loss": 0.5872, "step": 15484 }, { "epoch": 1.0491903245477336, "grad_norm": 6.307222843170166, "learning_rate": 7.982887261277296e-05, "loss": 0.7191, "step": 15485 }, { "epoch": 1.0492580798157056, "grad_norm": 7.862999439239502, "learning_rate": 7.982750359367514e-05, "loss": 0.6033, "step": 15486 }, { "epoch": 1.0493258350836778, "grad_norm": 7.700031757354736, "learning_rate": 7.982613457457732e-05, "loss": 0.8108, "step": 15487 }, { "epoch": 1.0493935903516498, "grad_norm": 4.863072872161865, "learning_rate": 7.982476555547951e-05, "loss": 0.6379, "step": 15488 }, { "epoch": 1.049461345619622, "grad_norm": 6.879638671875, "learning_rate": 7.982339653638169e-05, "loss": 0.6425, "step": 15489 }, { "epoch": 1.049529100887594, "grad_norm": 5.878018379211426, "learning_rate": 7.982202751728387e-05, "loss": 0.62, "step": 15490 }, { "epoch": 1.0495968561555662, "grad_norm": 5.8812055587768555, "learning_rate": 7.982065849818605e-05, "loss": 0.5707, "step": 15491 }, { "epoch": 1.0496646114235382, "grad_norm": 5.584284782409668, "learning_rate": 7.981928947908823e-05, "loss": 0.5941, "step": 15492 }, { "epoch": 1.0497323666915102, "grad_norm": 11.247712135314941, "learning_rate": 7.981792045999043e-05, "loss": 0.9765, "step": 15493 }, { "epoch": 1.0498001219594824, "grad_norm": 5.628141403198242, "learning_rate": 7.98165514408926e-05, "loss": 0.5359, "step": 15494 }, { "epoch": 1.0498678772274543, "grad_norm": 6.264639377593994, "learning_rate": 7.981518242179479e-05, "loss": 0.7608, "step": 15495 }, { "epoch": 1.0499356324954265, "grad_norm": 7.300719261169434, "learning_rate": 7.981381340269697e-05, "loss": 0.7166, "step": 15496 }, { "epoch": 1.0500033877633985, "grad_norm": 5.170079231262207, "learning_rate": 7.981244438359915e-05, "loss": 0.5827, "step": 15497 }, { "epoch": 1.0500711430313707, "grad_norm": 6.566158294677734, "learning_rate": 7.981107536450134e-05, "loss": 0.6063, "step": 15498 }, { "epoch": 1.0501388982993427, "grad_norm": 7.797521114349365, "learning_rate": 7.980970634540352e-05, "loss": 0.4916, "step": 15499 }, { "epoch": 1.050206653567315, "grad_norm": 6.960193157196045, "learning_rate": 7.98083373263057e-05, "loss": 0.7103, "step": 15500 }, { "epoch": 1.0502744088352869, "grad_norm": 7.210864543914795, "learning_rate": 7.980696830720788e-05, "loss": 0.7338, "step": 15501 }, { "epoch": 1.050342164103259, "grad_norm": 7.08119535446167, "learning_rate": 7.980559928811008e-05, "loss": 0.6056, "step": 15502 }, { "epoch": 1.050409919371231, "grad_norm": 6.606025695800781, "learning_rate": 7.980423026901226e-05, "loss": 0.7785, "step": 15503 }, { "epoch": 1.0504776746392033, "grad_norm": 7.610130786895752, "learning_rate": 7.980286124991444e-05, "loss": 0.7014, "step": 15504 }, { "epoch": 1.0505454299071753, "grad_norm": 6.512502670288086, "learning_rate": 7.980149223081662e-05, "loss": 0.606, "step": 15505 }, { "epoch": 1.0506131851751475, "grad_norm": 6.440487861633301, "learning_rate": 7.98001232117188e-05, "loss": 0.6465, "step": 15506 }, { "epoch": 1.0506809404431194, "grad_norm": 6.003171920776367, "learning_rate": 7.979875419262099e-05, "loss": 0.5331, "step": 15507 }, { "epoch": 1.0507486957110916, "grad_norm": 5.904346466064453, "learning_rate": 7.979738517352317e-05, "loss": 0.824, "step": 15508 }, { "epoch": 1.0508164509790636, "grad_norm": 5.533935070037842, "learning_rate": 7.979601615442535e-05, "loss": 0.8511, "step": 15509 }, { "epoch": 1.0508842062470356, "grad_norm": 6.289936065673828, "learning_rate": 7.979464713532753e-05, "loss": 0.7926, "step": 15510 }, { "epoch": 1.0509519615150078, "grad_norm": 5.81295919418335, "learning_rate": 7.979327811622973e-05, "loss": 0.9554, "step": 15511 }, { "epoch": 1.0510197167829798, "grad_norm": 5.214400768280029, "learning_rate": 7.97919090971319e-05, "loss": 0.9091, "step": 15512 }, { "epoch": 1.051087472050952, "grad_norm": 7.468234539031982, "learning_rate": 7.979054007803409e-05, "loss": 0.593, "step": 15513 }, { "epoch": 1.051155227318924, "grad_norm": 6.6748366355896, "learning_rate": 7.978917105893628e-05, "loss": 0.9312, "step": 15514 }, { "epoch": 1.0512229825868962, "grad_norm": 7.021498680114746, "learning_rate": 7.978780203983846e-05, "loss": 0.5446, "step": 15515 }, { "epoch": 1.0512907378548682, "grad_norm": 7.151730060577393, "learning_rate": 7.978643302074064e-05, "loss": 0.6672, "step": 15516 }, { "epoch": 1.0513584931228404, "grad_norm": 8.595193862915039, "learning_rate": 7.978506400164284e-05, "loss": 0.8893, "step": 15517 }, { "epoch": 1.0514262483908123, "grad_norm": 6.063235282897949, "learning_rate": 7.978369498254502e-05, "loss": 0.5404, "step": 15518 }, { "epoch": 1.0514940036587845, "grad_norm": 6.944025039672852, "learning_rate": 7.97823259634472e-05, "loss": 0.4193, "step": 15519 }, { "epoch": 1.0515617589267565, "grad_norm": 6.7520976066589355, "learning_rate": 7.978095694434938e-05, "loss": 0.8591, "step": 15520 }, { "epoch": 1.0516295141947287, "grad_norm": 5.863169193267822, "learning_rate": 7.977958792525157e-05, "loss": 0.6715, "step": 15521 }, { "epoch": 1.0516972694627007, "grad_norm": 7.465962886810303, "learning_rate": 7.977821890615375e-05, "loss": 0.6657, "step": 15522 }, { "epoch": 1.051765024730673, "grad_norm": 4.659458637237549, "learning_rate": 7.977684988705593e-05, "loss": 0.8134, "step": 15523 }, { "epoch": 1.0518327799986449, "grad_norm": 7.612015247344971, "learning_rate": 7.977548086795811e-05, "loss": 0.6289, "step": 15524 }, { "epoch": 1.051900535266617, "grad_norm": 5.157435894012451, "learning_rate": 7.97741118488603e-05, "loss": 0.7039, "step": 15525 }, { "epoch": 1.051968290534589, "grad_norm": 5.263278484344482, "learning_rate": 7.977274282976249e-05, "loss": 0.5698, "step": 15526 }, { "epoch": 1.052036045802561, "grad_norm": 5.186944484710693, "learning_rate": 7.977137381066467e-05, "loss": 0.5398, "step": 15527 }, { "epoch": 1.0521038010705333, "grad_norm": 6.908531188964844, "learning_rate": 7.977000479156685e-05, "loss": 0.637, "step": 15528 }, { "epoch": 1.0521715563385052, "grad_norm": 5.298072338104248, "learning_rate": 7.976863577246903e-05, "loss": 0.55, "step": 15529 }, { "epoch": 1.0522393116064774, "grad_norm": 4.47366189956665, "learning_rate": 7.976726675337122e-05, "loss": 0.5943, "step": 15530 }, { "epoch": 1.0523070668744494, "grad_norm": 5.410435676574707, "learning_rate": 7.97658977342734e-05, "loss": 0.9073, "step": 15531 }, { "epoch": 1.0523748221424216, "grad_norm": 5.6297478675842285, "learning_rate": 7.976452871517558e-05, "loss": 0.7233, "step": 15532 }, { "epoch": 1.0524425774103936, "grad_norm": 5.776382923126221, "learning_rate": 7.976315969607776e-05, "loss": 0.6116, "step": 15533 }, { "epoch": 1.0525103326783658, "grad_norm": 7.4787774085998535, "learning_rate": 7.976179067697996e-05, "loss": 0.7542, "step": 15534 }, { "epoch": 1.0525780879463378, "grad_norm": 4.613537311553955, "learning_rate": 7.976042165788214e-05, "loss": 0.6745, "step": 15535 }, { "epoch": 1.05264584321431, "grad_norm": 6.3547282218933105, "learning_rate": 7.975905263878432e-05, "loss": 0.7607, "step": 15536 }, { "epoch": 1.052713598482282, "grad_norm": 5.2433366775512695, "learning_rate": 7.97576836196865e-05, "loss": 0.679, "step": 15537 }, { "epoch": 1.0527813537502542, "grad_norm": 6.4581522941589355, "learning_rate": 7.975631460058868e-05, "loss": 0.8027, "step": 15538 }, { "epoch": 1.0528491090182261, "grad_norm": 6.2019267082214355, "learning_rate": 7.975494558149087e-05, "loss": 0.8168, "step": 15539 }, { "epoch": 1.0529168642861984, "grad_norm": 5.71392297744751, "learning_rate": 7.975357656239305e-05, "loss": 0.6555, "step": 15540 }, { "epoch": 1.0529846195541703, "grad_norm": 6.714015007019043, "learning_rate": 7.975220754329523e-05, "loss": 0.6979, "step": 15541 }, { "epoch": 1.0530523748221423, "grad_norm": 6.717672348022461, "learning_rate": 7.975083852419741e-05, "loss": 0.8411, "step": 15542 }, { "epoch": 1.0531201300901145, "grad_norm": 7.295918941497803, "learning_rate": 7.97494695050996e-05, "loss": 0.702, "step": 15543 }, { "epoch": 1.0531878853580865, "grad_norm": 6.989544868469238, "learning_rate": 7.974810048600179e-05, "loss": 0.6153, "step": 15544 }, { "epoch": 1.0532556406260587, "grad_norm": 6.641953945159912, "learning_rate": 7.974673146690397e-05, "loss": 0.4917, "step": 15545 }, { "epoch": 1.0533233958940307, "grad_norm": 7.03810977935791, "learning_rate": 7.974536244780615e-05, "loss": 0.6252, "step": 15546 }, { "epoch": 1.0533911511620029, "grad_norm": 5.024344444274902, "learning_rate": 7.974399342870833e-05, "loss": 0.7038, "step": 15547 }, { "epoch": 1.0534589064299749, "grad_norm": 6.919520854949951, "learning_rate": 7.974262440961052e-05, "loss": 0.9027, "step": 15548 }, { "epoch": 1.053526661697947, "grad_norm": 5.6881537437438965, "learning_rate": 7.97412553905127e-05, "loss": 0.6556, "step": 15549 }, { "epoch": 1.053594416965919, "grad_norm": 6.098855495452881, "learning_rate": 7.973988637141488e-05, "loss": 0.6225, "step": 15550 }, { "epoch": 1.0536621722338912, "grad_norm": 5.481220245361328, "learning_rate": 7.973851735231706e-05, "loss": 0.7031, "step": 15551 }, { "epoch": 1.0537299275018632, "grad_norm": 7.0920586585998535, "learning_rate": 7.973714833321924e-05, "loss": 0.672, "step": 15552 }, { "epoch": 1.0537976827698354, "grad_norm": 4.710086345672607, "learning_rate": 7.973577931412144e-05, "loss": 0.652, "step": 15553 }, { "epoch": 1.0538654380378074, "grad_norm": 5.764717102050781, "learning_rate": 7.973441029502362e-05, "loss": 0.7777, "step": 15554 }, { "epoch": 1.0539331933057796, "grad_norm": 6.428826332092285, "learning_rate": 7.97330412759258e-05, "loss": 0.7785, "step": 15555 }, { "epoch": 1.0540009485737516, "grad_norm": 5.741217136383057, "learning_rate": 7.973167225682798e-05, "loss": 0.6728, "step": 15556 }, { "epoch": 1.0540687038417238, "grad_norm": 7.573585510253906, "learning_rate": 7.973030323773017e-05, "loss": 0.7876, "step": 15557 }, { "epoch": 1.0541364591096958, "grad_norm": 4.414328575134277, "learning_rate": 7.972893421863235e-05, "loss": 0.556, "step": 15558 }, { "epoch": 1.0542042143776678, "grad_norm": 4.814504623413086, "learning_rate": 7.972756519953453e-05, "loss": 0.6419, "step": 15559 }, { "epoch": 1.05427196964564, "grad_norm": 6.726992130279541, "learning_rate": 7.972619618043673e-05, "loss": 0.7071, "step": 15560 }, { "epoch": 1.054339724913612, "grad_norm": 4.732487201690674, "learning_rate": 7.97248271613389e-05, "loss": 0.6135, "step": 15561 }, { "epoch": 1.0544074801815841, "grad_norm": 5.262027740478516, "learning_rate": 7.972345814224109e-05, "loss": 0.657, "step": 15562 }, { "epoch": 1.0544752354495561, "grad_norm": 7.205251693725586, "learning_rate": 7.972208912314328e-05, "loss": 0.6268, "step": 15563 }, { "epoch": 1.0545429907175283, "grad_norm": 6.2166337966918945, "learning_rate": 7.972072010404546e-05, "loss": 0.678, "step": 15564 }, { "epoch": 1.0546107459855003, "grad_norm": 5.0857343673706055, "learning_rate": 7.971935108494764e-05, "loss": 0.6984, "step": 15565 }, { "epoch": 1.0546785012534725, "grad_norm": 5.1078782081604, "learning_rate": 7.971798206584983e-05, "loss": 0.7758, "step": 15566 }, { "epoch": 1.0547462565214445, "grad_norm": 6.676407337188721, "learning_rate": 7.971661304675201e-05, "loss": 0.9758, "step": 15567 }, { "epoch": 1.0548140117894167, "grad_norm": 10.701401710510254, "learning_rate": 7.97152440276542e-05, "loss": 0.7816, "step": 15568 }, { "epoch": 1.0548817670573887, "grad_norm": 11.736532211303711, "learning_rate": 7.971387500855638e-05, "loss": 0.7215, "step": 15569 }, { "epoch": 1.0549495223253609, "grad_norm": 5.571110248565674, "learning_rate": 7.971250598945856e-05, "loss": 0.683, "step": 15570 }, { "epoch": 1.0550172775933329, "grad_norm": 6.118210315704346, "learning_rate": 7.971113697036075e-05, "loss": 0.6195, "step": 15571 }, { "epoch": 1.055085032861305, "grad_norm": 6.9824748039245605, "learning_rate": 7.970976795126293e-05, "loss": 0.6818, "step": 15572 }, { "epoch": 1.055152788129277, "grad_norm": 6.418920040130615, "learning_rate": 7.970839893216511e-05, "loss": 0.6653, "step": 15573 }, { "epoch": 1.0552205433972492, "grad_norm": 5.251111030578613, "learning_rate": 7.970702991306729e-05, "loss": 0.5911, "step": 15574 }, { "epoch": 1.0552882986652212, "grad_norm": 6.46027135848999, "learning_rate": 7.970566089396947e-05, "loss": 0.6771, "step": 15575 }, { "epoch": 1.0553560539331932, "grad_norm": 5.67859411239624, "learning_rate": 7.970429187487166e-05, "loss": 0.6342, "step": 15576 }, { "epoch": 1.0554238092011654, "grad_norm": 6.3809661865234375, "learning_rate": 7.970292285577385e-05, "loss": 0.7269, "step": 15577 }, { "epoch": 1.0554915644691374, "grad_norm": 5.144848346710205, "learning_rate": 7.970155383667603e-05, "loss": 0.7396, "step": 15578 }, { "epoch": 1.0555593197371096, "grad_norm": 6.251219749450684, "learning_rate": 7.97001848175782e-05, "loss": 0.6375, "step": 15579 }, { "epoch": 1.0556270750050816, "grad_norm": 6.255589008331299, "learning_rate": 7.96988157984804e-05, "loss": 0.5346, "step": 15580 }, { "epoch": 1.0556948302730538, "grad_norm": 7.6489481925964355, "learning_rate": 7.969744677938258e-05, "loss": 0.6735, "step": 15581 }, { "epoch": 1.0557625855410258, "grad_norm": 6.068694114685059, "learning_rate": 7.969607776028476e-05, "loss": 0.6175, "step": 15582 }, { "epoch": 1.055830340808998, "grad_norm": 6.7391815185546875, "learning_rate": 7.969470874118694e-05, "loss": 0.8585, "step": 15583 }, { "epoch": 1.05589809607697, "grad_norm": 6.22243595123291, "learning_rate": 7.969333972208912e-05, "loss": 0.5475, "step": 15584 }, { "epoch": 1.0559658513449421, "grad_norm": 5.03702449798584, "learning_rate": 7.969197070299132e-05, "loss": 0.7151, "step": 15585 }, { "epoch": 1.0560336066129141, "grad_norm": 6.093680381774902, "learning_rate": 7.96906016838935e-05, "loss": 0.5883, "step": 15586 }, { "epoch": 1.0561013618808863, "grad_norm": 7.8765034675598145, "learning_rate": 7.968923266479568e-05, "loss": 0.7901, "step": 15587 }, { "epoch": 1.0561691171488583, "grad_norm": 6.4514031410217285, "learning_rate": 7.968786364569786e-05, "loss": 0.6763, "step": 15588 }, { "epoch": 1.0562368724168305, "grad_norm": 5.650920867919922, "learning_rate": 7.968649462660005e-05, "loss": 0.74, "step": 15589 }, { "epoch": 1.0563046276848025, "grad_norm": 5.893711090087891, "learning_rate": 7.968512560750223e-05, "loss": 0.696, "step": 15590 }, { "epoch": 1.0563723829527745, "grad_norm": 5.7477240562438965, "learning_rate": 7.968375658840441e-05, "loss": 0.6714, "step": 15591 }, { "epoch": 1.0564401382207467, "grad_norm": 5.326556205749512, "learning_rate": 7.968238756930659e-05, "loss": 0.6365, "step": 15592 }, { "epoch": 1.0565078934887187, "grad_norm": 6.540344715118408, "learning_rate": 7.968101855020877e-05, "loss": 0.7155, "step": 15593 }, { "epoch": 1.0565756487566909, "grad_norm": 6.695101261138916, "learning_rate": 7.967964953111097e-05, "loss": 0.7152, "step": 15594 }, { "epoch": 1.0566434040246628, "grad_norm": 6.3795905113220215, "learning_rate": 7.967828051201315e-05, "loss": 0.6307, "step": 15595 }, { "epoch": 1.056711159292635, "grad_norm": 6.77623987197876, "learning_rate": 7.967691149291533e-05, "loss": 0.6633, "step": 15596 }, { "epoch": 1.056778914560607, "grad_norm": 7.296997547149658, "learning_rate": 7.96755424738175e-05, "loss": 0.6032, "step": 15597 }, { "epoch": 1.0568466698285792, "grad_norm": 9.698561668395996, "learning_rate": 7.967417345471969e-05, "loss": 0.7025, "step": 15598 }, { "epoch": 1.0569144250965512, "grad_norm": 6.234988212585449, "learning_rate": 7.967280443562188e-05, "loss": 0.7538, "step": 15599 }, { "epoch": 1.0569821803645234, "grad_norm": 5.491406440734863, "learning_rate": 7.967143541652406e-05, "loss": 0.923, "step": 15600 }, { "epoch": 1.0570499356324954, "grad_norm": 5.402745723724365, "learning_rate": 7.967006639742624e-05, "loss": 0.6183, "step": 15601 }, { "epoch": 1.0571176909004676, "grad_norm": 6.674182415008545, "learning_rate": 7.966869737832842e-05, "loss": 0.6448, "step": 15602 }, { "epoch": 1.0571854461684396, "grad_norm": 6.507812976837158, "learning_rate": 7.966732835923062e-05, "loss": 0.8753, "step": 15603 }, { "epoch": 1.0572532014364118, "grad_norm": 5.593377590179443, "learning_rate": 7.96659593401328e-05, "loss": 0.7686, "step": 15604 }, { "epoch": 1.0573209567043838, "grad_norm": 6.690634250640869, "learning_rate": 7.966459032103498e-05, "loss": 1.1595, "step": 15605 }, { "epoch": 1.057388711972356, "grad_norm": 6.244167804718018, "learning_rate": 7.966322130193717e-05, "loss": 0.7725, "step": 15606 }, { "epoch": 1.057456467240328, "grad_norm": 5.173239707946777, "learning_rate": 7.966185228283935e-05, "loss": 0.6084, "step": 15607 }, { "epoch": 1.0575242225083, "grad_norm": 5.758760929107666, "learning_rate": 7.966048326374153e-05, "loss": 0.7413, "step": 15608 }, { "epoch": 1.0575919777762721, "grad_norm": 4.311896800994873, "learning_rate": 7.965911424464372e-05, "loss": 0.5449, "step": 15609 }, { "epoch": 1.057659733044244, "grad_norm": 7.174588203430176, "learning_rate": 7.96577452255459e-05, "loss": 0.7287, "step": 15610 }, { "epoch": 1.0577274883122163, "grad_norm": 5.071308135986328, "learning_rate": 7.965637620644809e-05, "loss": 0.6074, "step": 15611 }, { "epoch": 1.0577952435801883, "grad_norm": 6.668962001800537, "learning_rate": 7.965500718735028e-05, "loss": 0.6961, "step": 15612 }, { "epoch": 1.0578629988481605, "grad_norm": 5.634909629821777, "learning_rate": 7.965363816825246e-05, "loss": 0.7343, "step": 15613 }, { "epoch": 1.0579307541161325, "grad_norm": 5.5863800048828125, "learning_rate": 7.965226914915464e-05, "loss": 0.5128, "step": 15614 }, { "epoch": 1.0579985093841047, "grad_norm": 7.636445045471191, "learning_rate": 7.965090013005682e-05, "loss": 0.6415, "step": 15615 }, { "epoch": 1.0580662646520766, "grad_norm": 8.801673889160156, "learning_rate": 7.9649531110959e-05, "loss": 0.6823, "step": 15616 }, { "epoch": 1.0581340199200489, "grad_norm": 5.3621721267700195, "learning_rate": 7.96481620918612e-05, "loss": 0.8068, "step": 15617 }, { "epoch": 1.0582017751880208, "grad_norm": 6.023779392242432, "learning_rate": 7.964679307276337e-05, "loss": 0.8556, "step": 15618 }, { "epoch": 1.058269530455993, "grad_norm": 5.901587963104248, "learning_rate": 7.964542405366556e-05, "loss": 0.6895, "step": 15619 }, { "epoch": 1.058337285723965, "grad_norm": 5.533476829528809, "learning_rate": 7.964405503456774e-05, "loss": 0.6305, "step": 15620 }, { "epoch": 1.0584050409919372, "grad_norm": 4.859772682189941, "learning_rate": 7.964268601546993e-05, "loss": 0.4848, "step": 15621 }, { "epoch": 1.0584727962599092, "grad_norm": 4.763238430023193, "learning_rate": 7.964131699637211e-05, "loss": 0.6645, "step": 15622 }, { "epoch": 1.0585405515278814, "grad_norm": 5.757406711578369, "learning_rate": 7.963994797727429e-05, "loss": 0.5606, "step": 15623 }, { "epoch": 1.0586083067958534, "grad_norm": 5.710381984710693, "learning_rate": 7.963857895817647e-05, "loss": 0.6795, "step": 15624 }, { "epoch": 1.0586760620638254, "grad_norm": 6.074893951416016, "learning_rate": 7.963720993907865e-05, "loss": 0.7556, "step": 15625 }, { "epoch": 1.0587438173317976, "grad_norm": 4.727344512939453, "learning_rate": 7.963584091998084e-05, "loss": 0.4786, "step": 15626 }, { "epoch": 1.0588115725997695, "grad_norm": 5.696430683135986, "learning_rate": 7.963447190088302e-05, "loss": 0.8202, "step": 15627 }, { "epoch": 1.0588793278677417, "grad_norm": 6.602227210998535, "learning_rate": 7.96331028817852e-05, "loss": 0.5754, "step": 15628 }, { "epoch": 1.0589470831357137, "grad_norm": 5.352349758148193, "learning_rate": 7.963173386268739e-05, "loss": 0.7036, "step": 15629 }, { "epoch": 1.059014838403686, "grad_norm": 4.953490734100342, "learning_rate": 7.963036484358957e-05, "loss": 0.5278, "step": 15630 }, { "epoch": 1.059082593671658, "grad_norm": 5.749557971954346, "learning_rate": 7.962899582449176e-05, "loss": 0.7663, "step": 15631 }, { "epoch": 1.0591503489396301, "grad_norm": 5.01806640625, "learning_rate": 7.962762680539394e-05, "loss": 0.699, "step": 15632 }, { "epoch": 1.059218104207602, "grad_norm": 5.559873104095459, "learning_rate": 7.962625778629612e-05, "loss": 0.8066, "step": 15633 }, { "epoch": 1.0592858594755743, "grad_norm": 7.373801231384277, "learning_rate": 7.96248887671983e-05, "loss": 0.8825, "step": 15634 }, { "epoch": 1.0593536147435463, "grad_norm": 6.330979824066162, "learning_rate": 7.96235197481005e-05, "loss": 0.8277, "step": 15635 }, { "epoch": 1.0594213700115185, "grad_norm": 4.986181735992432, "learning_rate": 7.962215072900268e-05, "loss": 0.7141, "step": 15636 }, { "epoch": 1.0594891252794905, "grad_norm": 6.320187568664551, "learning_rate": 7.962078170990486e-05, "loss": 0.7843, "step": 15637 }, { "epoch": 1.0595568805474627, "grad_norm": 4.531287670135498, "learning_rate": 7.961941269080704e-05, "loss": 0.789, "step": 15638 }, { "epoch": 1.0596246358154346, "grad_norm": 6.472817897796631, "learning_rate": 7.961804367170922e-05, "loss": 0.7853, "step": 15639 }, { "epoch": 1.0596923910834066, "grad_norm": 6.977755546569824, "learning_rate": 7.961667465261141e-05, "loss": 0.8924, "step": 15640 }, { "epoch": 1.0597601463513788, "grad_norm": 4.986513137817383, "learning_rate": 7.961530563351359e-05, "loss": 0.6245, "step": 15641 }, { "epoch": 1.0598279016193508, "grad_norm": 6.008297443389893, "learning_rate": 7.961393661441577e-05, "loss": 0.6167, "step": 15642 }, { "epoch": 1.059895656887323, "grad_norm": 6.505741596221924, "learning_rate": 7.961256759531795e-05, "loss": 0.5389, "step": 15643 }, { "epoch": 1.059963412155295, "grad_norm": 7.218502521514893, "learning_rate": 7.961119857622014e-05, "loss": 0.7364, "step": 15644 }, { "epoch": 1.0600311674232672, "grad_norm": 6.085995197296143, "learning_rate": 7.960982955712233e-05, "loss": 0.9236, "step": 15645 }, { "epoch": 1.0600989226912392, "grad_norm": 5.947787284851074, "learning_rate": 7.96084605380245e-05, "loss": 0.5488, "step": 15646 }, { "epoch": 1.0601666779592114, "grad_norm": 6.727092742919922, "learning_rate": 7.960709151892669e-05, "loss": 0.9123, "step": 15647 }, { "epoch": 1.0602344332271834, "grad_norm": 6.658313751220703, "learning_rate": 7.960572249982887e-05, "loss": 0.8878, "step": 15648 }, { "epoch": 1.0603021884951556, "grad_norm": 7.2491984367370605, "learning_rate": 7.960435348073106e-05, "loss": 0.6701, "step": 15649 }, { "epoch": 1.0603699437631275, "grad_norm": 8.330307960510254, "learning_rate": 7.960298446163324e-05, "loss": 0.8083, "step": 15650 }, { "epoch": 1.0604376990310997, "grad_norm": 6.639986038208008, "learning_rate": 7.960161544253542e-05, "loss": 0.6546, "step": 15651 }, { "epoch": 1.0605054542990717, "grad_norm": 5.291508674621582, "learning_rate": 7.96002464234376e-05, "loss": 0.4886, "step": 15652 }, { "epoch": 1.060573209567044, "grad_norm": 6.049704551696777, "learning_rate": 7.95988774043398e-05, "loss": 0.8144, "step": 15653 }, { "epoch": 1.060640964835016, "grad_norm": 4.995707988739014, "learning_rate": 7.959750838524198e-05, "loss": 0.7146, "step": 15654 }, { "epoch": 1.0607087201029881, "grad_norm": 5.3545451164245605, "learning_rate": 7.959613936614416e-05, "loss": 0.6091, "step": 15655 }, { "epoch": 1.06077647537096, "grad_norm": 5.604588508605957, "learning_rate": 7.959477034704635e-05, "loss": 0.7015, "step": 15656 }, { "epoch": 1.060844230638932, "grad_norm": 4.693150997161865, "learning_rate": 7.959340132794853e-05, "loss": 0.6435, "step": 15657 }, { "epoch": 1.0609119859069043, "grad_norm": 5.757687568664551, "learning_rate": 7.959203230885071e-05, "loss": 0.7502, "step": 15658 }, { "epoch": 1.0609797411748763, "grad_norm": 5.2874064445495605, "learning_rate": 7.95906632897529e-05, "loss": 0.5808, "step": 15659 }, { "epoch": 1.0610474964428485, "grad_norm": 5.154580116271973, "learning_rate": 7.958929427065508e-05, "loss": 0.6679, "step": 15660 }, { "epoch": 1.0611152517108204, "grad_norm": 4.7557830810546875, "learning_rate": 7.958792525155726e-05, "loss": 0.452, "step": 15661 }, { "epoch": 1.0611830069787926, "grad_norm": 4.7612833976745605, "learning_rate": 7.958655623245945e-05, "loss": 0.5888, "step": 15662 }, { "epoch": 1.0612507622467646, "grad_norm": 6.610939025878906, "learning_rate": 7.958518721336164e-05, "loss": 0.8694, "step": 15663 }, { "epoch": 1.0613185175147368, "grad_norm": 4.907252788543701, "learning_rate": 7.958381819426382e-05, "loss": 0.5352, "step": 15664 }, { "epoch": 1.0613862727827088, "grad_norm": 5.652861595153809, "learning_rate": 7.9582449175166e-05, "loss": 0.7831, "step": 15665 }, { "epoch": 1.061454028050681, "grad_norm": 6.604598045349121, "learning_rate": 7.958108015606818e-05, "loss": 0.5988, "step": 15666 }, { "epoch": 1.061521783318653, "grad_norm": 6.462950706481934, "learning_rate": 7.957971113697037e-05, "loss": 0.9984, "step": 15667 }, { "epoch": 1.0615895385866252, "grad_norm": 5.563201427459717, "learning_rate": 7.957834211787255e-05, "loss": 0.583, "step": 15668 }, { "epoch": 1.0616572938545972, "grad_norm": 8.087369918823242, "learning_rate": 7.957697309877473e-05, "loss": 0.7112, "step": 15669 }, { "epoch": 1.0617250491225694, "grad_norm": 6.019291877746582, "learning_rate": 7.957560407967691e-05, "loss": 0.4987, "step": 15670 }, { "epoch": 1.0617928043905414, "grad_norm": 5.730112075805664, "learning_rate": 7.95742350605791e-05, "loss": 0.7165, "step": 15671 }, { "epoch": 1.0618605596585136, "grad_norm": 4.978113651275635, "learning_rate": 7.957286604148129e-05, "loss": 0.6191, "step": 15672 }, { "epoch": 1.0619283149264855, "grad_norm": 14.537919998168945, "learning_rate": 7.957149702238347e-05, "loss": 0.6737, "step": 15673 }, { "epoch": 1.0619960701944575, "grad_norm": 6.772073745727539, "learning_rate": 7.957012800328565e-05, "loss": 0.6738, "step": 15674 }, { "epoch": 1.0620638254624297, "grad_norm": 7.139239311218262, "learning_rate": 7.956875898418783e-05, "loss": 0.7071, "step": 15675 }, { "epoch": 1.0621315807304017, "grad_norm": 3.958707571029663, "learning_rate": 7.956738996509002e-05, "loss": 0.5774, "step": 15676 }, { "epoch": 1.062199335998374, "grad_norm": 7.803884029388428, "learning_rate": 7.95660209459922e-05, "loss": 1.1357, "step": 15677 }, { "epoch": 1.0622670912663459, "grad_norm": 4.009269714355469, "learning_rate": 7.956465192689438e-05, "loss": 0.5232, "step": 15678 }, { "epoch": 1.062334846534318, "grad_norm": 5.269588947296143, "learning_rate": 7.956328290779657e-05, "loss": 0.6048, "step": 15679 }, { "epoch": 1.06240260180229, "grad_norm": 6.135133266448975, "learning_rate": 7.956191388869875e-05, "loss": 0.8373, "step": 15680 }, { "epoch": 1.0624703570702623, "grad_norm": 7.336065769195557, "learning_rate": 7.956054486960094e-05, "loss": 0.7269, "step": 15681 }, { "epoch": 1.0625381123382343, "grad_norm": 5.202469825744629, "learning_rate": 7.955917585050312e-05, "loss": 0.5768, "step": 15682 }, { "epoch": 1.0626058676062065, "grad_norm": 9.39901351928711, "learning_rate": 7.95578068314053e-05, "loss": 0.7882, "step": 15683 }, { "epoch": 1.0626736228741784, "grad_norm": 6.687909126281738, "learning_rate": 7.955643781230748e-05, "loss": 0.7558, "step": 15684 }, { "epoch": 1.0627413781421506, "grad_norm": 6.786401271820068, "learning_rate": 7.955506879320966e-05, "loss": 0.8085, "step": 15685 }, { "epoch": 1.0628091334101226, "grad_norm": 5.360512733459473, "learning_rate": 7.955369977411185e-05, "loss": 0.7416, "step": 15686 }, { "epoch": 1.0628768886780948, "grad_norm": 6.797042369842529, "learning_rate": 7.955233075501403e-05, "loss": 0.6518, "step": 15687 }, { "epoch": 1.0629446439460668, "grad_norm": 9.375871658325195, "learning_rate": 7.955096173591622e-05, "loss": 0.6947, "step": 15688 }, { "epoch": 1.0630123992140388, "grad_norm": 4.795276641845703, "learning_rate": 7.95495927168184e-05, "loss": 0.5838, "step": 15689 }, { "epoch": 1.063080154482011, "grad_norm": 7.5510759353637695, "learning_rate": 7.954822369772059e-05, "loss": 0.6106, "step": 15690 }, { "epoch": 1.063147909749983, "grad_norm": 5.1748175621032715, "learning_rate": 7.954685467862277e-05, "loss": 0.7426, "step": 15691 }, { "epoch": 1.0632156650179552, "grad_norm": 4.72312593460083, "learning_rate": 7.954548565952495e-05, "loss": 0.4954, "step": 15692 }, { "epoch": 1.0632834202859271, "grad_norm": 6.420324325561523, "learning_rate": 7.954411664042713e-05, "loss": 0.7084, "step": 15693 }, { "epoch": 1.0633511755538994, "grad_norm": 5.019575119018555, "learning_rate": 7.954274762132931e-05, "loss": 0.6904, "step": 15694 }, { "epoch": 1.0634189308218713, "grad_norm": 7.187556743621826, "learning_rate": 7.95413786022315e-05, "loss": 0.752, "step": 15695 }, { "epoch": 1.0634866860898435, "grad_norm": 4.740534782409668, "learning_rate": 7.954000958313369e-05, "loss": 0.5472, "step": 15696 }, { "epoch": 1.0635544413578155, "grad_norm": 4.72992467880249, "learning_rate": 7.953864056403587e-05, "loss": 0.5106, "step": 15697 }, { "epoch": 1.0636221966257877, "grad_norm": 8.433707237243652, "learning_rate": 7.953727154493805e-05, "loss": 0.552, "step": 15698 }, { "epoch": 1.0636899518937597, "grad_norm": 7.640308856964111, "learning_rate": 7.953590252584024e-05, "loss": 0.5683, "step": 15699 }, { "epoch": 1.063757707161732, "grad_norm": 5.864986419677734, "learning_rate": 7.953453350674242e-05, "loss": 0.7258, "step": 15700 }, { "epoch": 1.0638254624297039, "grad_norm": 4.7822418212890625, "learning_rate": 7.95331644876446e-05, "loss": 0.7018, "step": 15701 }, { "epoch": 1.063893217697676, "grad_norm": 7.310060501098633, "learning_rate": 7.95317954685468e-05, "loss": 0.6671, "step": 15702 }, { "epoch": 1.063960972965648, "grad_norm": 5.417652606964111, "learning_rate": 7.953042644944897e-05, "loss": 0.7866, "step": 15703 }, { "epoch": 1.06402872823362, "grad_norm": 6.251247882843018, "learning_rate": 7.952905743035115e-05, "loss": 0.7288, "step": 15704 }, { "epoch": 1.0640964835015922, "grad_norm": 4.557955741882324, "learning_rate": 7.952768841125335e-05, "loss": 0.7022, "step": 15705 }, { "epoch": 1.0641642387695642, "grad_norm": 6.946822166442871, "learning_rate": 7.952631939215553e-05, "loss": 0.7494, "step": 15706 }, { "epoch": 1.0642319940375364, "grad_norm": 5.914007663726807, "learning_rate": 7.952495037305771e-05, "loss": 0.7723, "step": 15707 }, { "epoch": 1.0642997493055084, "grad_norm": 5.465998649597168, "learning_rate": 7.952358135395989e-05, "loss": 0.5798, "step": 15708 }, { "epoch": 1.0643675045734806, "grad_norm": 6.636282920837402, "learning_rate": 7.952221233486208e-05, "loss": 0.8187, "step": 15709 }, { "epoch": 1.0644352598414526, "grad_norm": 4.796360969543457, "learning_rate": 7.952084331576426e-05, "loss": 0.4452, "step": 15710 }, { "epoch": 1.0645030151094248, "grad_norm": 6.594966888427734, "learning_rate": 7.951947429666644e-05, "loss": 0.827, "step": 15711 }, { "epoch": 1.0645707703773968, "grad_norm": 4.327628135681152, "learning_rate": 7.951810527756862e-05, "loss": 0.5833, "step": 15712 }, { "epoch": 1.064638525645369, "grad_norm": 4.537369251251221, "learning_rate": 7.951673625847082e-05, "loss": 0.6398, "step": 15713 }, { "epoch": 1.064706280913341, "grad_norm": 5.633305549621582, "learning_rate": 7.9515367239373e-05, "loss": 0.6274, "step": 15714 }, { "epoch": 1.0647740361813132, "grad_norm": 5.848304271697998, "learning_rate": 7.951399822027518e-05, "loss": 0.6827, "step": 15715 }, { "epoch": 1.0648417914492851, "grad_norm": 5.550229072570801, "learning_rate": 7.951262920117736e-05, "loss": 0.5926, "step": 15716 }, { "epoch": 1.0649095467172573, "grad_norm": 5.79759407043457, "learning_rate": 7.951126018207954e-05, "loss": 0.7239, "step": 15717 }, { "epoch": 1.0649773019852293, "grad_norm": 5.386721134185791, "learning_rate": 7.950989116298173e-05, "loss": 0.6221, "step": 15718 }, { "epoch": 1.0650450572532015, "grad_norm": 6.526064395904541, "learning_rate": 7.950852214388391e-05, "loss": 0.6978, "step": 15719 }, { "epoch": 1.0651128125211735, "grad_norm": 5.103979587554932, "learning_rate": 7.95071531247861e-05, "loss": 0.5707, "step": 15720 }, { "epoch": 1.0651805677891457, "grad_norm": 7.769407749176025, "learning_rate": 7.950578410568827e-05, "loss": 0.7698, "step": 15721 }, { "epoch": 1.0652483230571177, "grad_norm": 4.609751224517822, "learning_rate": 7.950441508659047e-05, "loss": 0.8119, "step": 15722 }, { "epoch": 1.0653160783250897, "grad_norm": 4.900935649871826, "learning_rate": 7.950304606749265e-05, "loss": 0.6217, "step": 15723 }, { "epoch": 1.0653838335930619, "grad_norm": 6.532763957977295, "learning_rate": 7.950167704839483e-05, "loss": 0.7083, "step": 15724 }, { "epoch": 1.0654515888610339, "grad_norm": 7.212814807891846, "learning_rate": 7.950030802929701e-05, "loss": 0.6279, "step": 15725 }, { "epoch": 1.065519344129006, "grad_norm": 6.178891181945801, "learning_rate": 7.949893901019919e-05, "loss": 0.7573, "step": 15726 }, { "epoch": 1.065587099396978, "grad_norm": 4.8897528648376465, "learning_rate": 7.949756999110138e-05, "loss": 0.5121, "step": 15727 }, { "epoch": 1.0656548546649502, "grad_norm": 7.742895603179932, "learning_rate": 7.949620097200356e-05, "loss": 0.7652, "step": 15728 }, { "epoch": 1.0657226099329222, "grad_norm": 6.08188533782959, "learning_rate": 7.949483195290574e-05, "loss": 0.7593, "step": 15729 }, { "epoch": 1.0657903652008944, "grad_norm": 6.451333999633789, "learning_rate": 7.949346293380793e-05, "loss": 0.5019, "step": 15730 }, { "epoch": 1.0658581204688664, "grad_norm": 5.50657844543457, "learning_rate": 7.94920939147101e-05, "loss": 0.8513, "step": 15731 }, { "epoch": 1.0659258757368386, "grad_norm": 5.822981834411621, "learning_rate": 7.94907248956123e-05, "loss": 0.673, "step": 15732 }, { "epoch": 1.0659936310048106, "grad_norm": 6.3767991065979, "learning_rate": 7.948935587651448e-05, "loss": 0.7667, "step": 15733 }, { "epoch": 1.0660613862727828, "grad_norm": 6.416679382324219, "learning_rate": 7.948798685741666e-05, "loss": 0.7679, "step": 15734 }, { "epoch": 1.0661291415407548, "grad_norm": 4.7926344871521, "learning_rate": 7.948661783831884e-05, "loss": 0.5938, "step": 15735 }, { "epoch": 1.066196896808727, "grad_norm": 5.087335586547852, "learning_rate": 7.948524881922103e-05, "loss": 0.5486, "step": 15736 }, { "epoch": 1.066264652076699, "grad_norm": 6.492136478424072, "learning_rate": 7.948387980012321e-05, "loss": 0.8351, "step": 15737 }, { "epoch": 1.066332407344671, "grad_norm": 6.300568103790283, "learning_rate": 7.94825107810254e-05, "loss": 0.7037, "step": 15738 }, { "epoch": 1.0664001626126431, "grad_norm": 5.677840709686279, "learning_rate": 7.948114176192758e-05, "loss": 0.6712, "step": 15739 }, { "epoch": 1.0664679178806151, "grad_norm": 4.923882961273193, "learning_rate": 7.947977274282976e-05, "loss": 0.7166, "step": 15740 }, { "epoch": 1.0665356731485873, "grad_norm": 8.807173728942871, "learning_rate": 7.947840372373195e-05, "loss": 0.5985, "step": 15741 }, { "epoch": 1.0666034284165593, "grad_norm": 5.597122669219971, "learning_rate": 7.947703470463413e-05, "loss": 0.5973, "step": 15742 }, { "epoch": 1.0666711836845315, "grad_norm": 6.10443639755249, "learning_rate": 7.947566568553631e-05, "loss": 0.7233, "step": 15743 }, { "epoch": 1.0667389389525035, "grad_norm": 5.447210788726807, "learning_rate": 7.947429666643849e-05, "loss": 0.7122, "step": 15744 }, { "epoch": 1.0668066942204757, "grad_norm": 7.586278438568115, "learning_rate": 7.947292764734068e-05, "loss": 0.8076, "step": 15745 }, { "epoch": 1.0668744494884477, "grad_norm": 7.775143623352051, "learning_rate": 7.947155862824286e-05, "loss": 0.7714, "step": 15746 }, { "epoch": 1.0669422047564199, "grad_norm": 6.429764270782471, "learning_rate": 7.947018960914505e-05, "loss": 0.5349, "step": 15747 }, { "epoch": 1.0670099600243919, "grad_norm": 5.419410705566406, "learning_rate": 7.946882059004724e-05, "loss": 0.6813, "step": 15748 }, { "epoch": 1.067077715292364, "grad_norm": 8.733865737915039, "learning_rate": 7.946745157094942e-05, "loss": 0.7048, "step": 15749 }, { "epoch": 1.067145470560336, "grad_norm": 5.640448093414307, "learning_rate": 7.94660825518516e-05, "loss": 0.6213, "step": 15750 }, { "epoch": 1.0672132258283082, "grad_norm": 6.9355692863464355, "learning_rate": 7.94647135327538e-05, "loss": 0.7853, "step": 15751 }, { "epoch": 1.0672809810962802, "grad_norm": 6.067697048187256, "learning_rate": 7.946334451365597e-05, "loss": 0.7184, "step": 15752 }, { "epoch": 1.0673487363642522, "grad_norm": 5.817471981048584, "learning_rate": 7.946197549455815e-05, "loss": 0.7555, "step": 15753 }, { "epoch": 1.0674164916322244, "grad_norm": 5.668905735015869, "learning_rate": 7.946060647546035e-05, "loss": 0.6692, "step": 15754 }, { "epoch": 1.0674842469001964, "grad_norm": 8.108335494995117, "learning_rate": 7.945923745636253e-05, "loss": 0.9925, "step": 15755 }, { "epoch": 1.0675520021681686, "grad_norm": 9.043440818786621, "learning_rate": 7.945786843726471e-05, "loss": 0.7492, "step": 15756 }, { "epoch": 1.0676197574361406, "grad_norm": 5.559685230255127, "learning_rate": 7.945649941816689e-05, "loss": 0.6123, "step": 15757 }, { "epoch": 1.0676875127041128, "grad_norm": 6.704611778259277, "learning_rate": 7.945513039906907e-05, "loss": 0.8296, "step": 15758 }, { "epoch": 1.0677552679720848, "grad_norm": 4.621387004852295, "learning_rate": 7.945376137997126e-05, "loss": 0.5638, "step": 15759 }, { "epoch": 1.067823023240057, "grad_norm": 7.376697063446045, "learning_rate": 7.945239236087344e-05, "loss": 0.7056, "step": 15760 }, { "epoch": 1.067890778508029, "grad_norm": 6.9395527839660645, "learning_rate": 7.945102334177562e-05, "loss": 0.9354, "step": 15761 }, { "epoch": 1.0679585337760011, "grad_norm": 6.474776268005371, "learning_rate": 7.94496543226778e-05, "loss": 0.569, "step": 15762 }, { "epoch": 1.0680262890439731, "grad_norm": 5.319557189941406, "learning_rate": 7.944828530357998e-05, "loss": 0.9097, "step": 15763 }, { "epoch": 1.0680940443119453, "grad_norm": 6.072210788726807, "learning_rate": 7.944691628448218e-05, "loss": 0.8421, "step": 15764 }, { "epoch": 1.0681617995799173, "grad_norm": 5.9819865226745605, "learning_rate": 7.944554726538436e-05, "loss": 0.8024, "step": 15765 }, { "epoch": 1.0682295548478895, "grad_norm": 4.52716588973999, "learning_rate": 7.944417824628654e-05, "loss": 0.6446, "step": 15766 }, { "epoch": 1.0682973101158615, "grad_norm": 5.481772422790527, "learning_rate": 7.944280922718872e-05, "loss": 0.5395, "step": 15767 }, { "epoch": 1.0683650653838337, "grad_norm": 5.463979244232178, "learning_rate": 7.944144020809091e-05, "loss": 0.5777, "step": 15768 }, { "epoch": 1.0684328206518057, "grad_norm": 6.096555233001709, "learning_rate": 7.94400711889931e-05, "loss": 0.6996, "step": 15769 }, { "epoch": 1.0685005759197779, "grad_norm": 4.486513137817383, "learning_rate": 7.943870216989527e-05, "loss": 0.6826, "step": 15770 }, { "epoch": 1.0685683311877499, "grad_norm": 5.200229167938232, "learning_rate": 7.943733315079745e-05, "loss": 0.6869, "step": 15771 }, { "epoch": 1.0686360864557218, "grad_norm": 7.219493389129639, "learning_rate": 7.943596413169963e-05, "loss": 0.6478, "step": 15772 }, { "epoch": 1.068703841723694, "grad_norm": 5.901253700256348, "learning_rate": 7.943459511260183e-05, "loss": 0.7505, "step": 15773 }, { "epoch": 1.068771596991666, "grad_norm": 5.0671772956848145, "learning_rate": 7.943322609350401e-05, "loss": 0.7321, "step": 15774 }, { "epoch": 1.0688393522596382, "grad_norm": 6.431093215942383, "learning_rate": 7.943185707440619e-05, "loss": 0.6674, "step": 15775 }, { "epoch": 1.0689071075276102, "grad_norm": 5.902375221252441, "learning_rate": 7.943048805530837e-05, "loss": 0.7254, "step": 15776 }, { "epoch": 1.0689748627955824, "grad_norm": 4.459395408630371, "learning_rate": 7.942911903621056e-05, "loss": 0.6371, "step": 15777 }, { "epoch": 1.0690426180635544, "grad_norm": 5.590951442718506, "learning_rate": 7.942775001711274e-05, "loss": 0.7365, "step": 15778 }, { "epoch": 1.0691103733315266, "grad_norm": 9.108878135681152, "learning_rate": 7.942638099801492e-05, "loss": 0.8241, "step": 15779 }, { "epoch": 1.0691781285994986, "grad_norm": 5.599368572235107, "learning_rate": 7.94250119789171e-05, "loss": 0.8881, "step": 15780 }, { "epoch": 1.0692458838674708, "grad_norm": 5.293349266052246, "learning_rate": 7.942364295981929e-05, "loss": 0.5408, "step": 15781 }, { "epoch": 1.0693136391354428, "grad_norm": 6.881068706512451, "learning_rate": 7.942227394072148e-05, "loss": 0.8773, "step": 15782 }, { "epoch": 1.069381394403415, "grad_norm": 7.475467205047607, "learning_rate": 7.942090492162366e-05, "loss": 0.6288, "step": 15783 }, { "epoch": 1.069449149671387, "grad_norm": 6.4415669441223145, "learning_rate": 7.941953590252584e-05, "loss": 0.7308, "step": 15784 }, { "epoch": 1.0695169049393591, "grad_norm": 4.710741996765137, "learning_rate": 7.941816688342802e-05, "loss": 0.5095, "step": 15785 }, { "epoch": 1.0695846602073311, "grad_norm": 6.763927459716797, "learning_rate": 7.94167978643302e-05, "loss": 0.7424, "step": 15786 }, { "epoch": 1.069652415475303, "grad_norm": 6.286536693572998, "learning_rate": 7.94154288452324e-05, "loss": 0.5971, "step": 15787 }, { "epoch": 1.0697201707432753, "grad_norm": 6.880483150482178, "learning_rate": 7.941405982613457e-05, "loss": 0.8842, "step": 15788 }, { "epoch": 1.0697879260112473, "grad_norm": 5.467273712158203, "learning_rate": 7.941269080703675e-05, "loss": 0.7985, "step": 15789 }, { "epoch": 1.0698556812792195, "grad_norm": 5.367607593536377, "learning_rate": 7.941132178793894e-05, "loss": 0.8427, "step": 15790 }, { "epoch": 1.0699234365471915, "grad_norm": 5.593782424926758, "learning_rate": 7.940995276884113e-05, "loss": 0.5909, "step": 15791 }, { "epoch": 1.0699911918151637, "grad_norm": 4.525633811950684, "learning_rate": 7.940858374974331e-05, "loss": 0.5472, "step": 15792 }, { "epoch": 1.0700589470831356, "grad_norm": 5.167564868927002, "learning_rate": 7.940721473064549e-05, "loss": 0.4875, "step": 15793 }, { "epoch": 1.0701267023511079, "grad_norm": 4.89332914352417, "learning_rate": 7.940584571154768e-05, "loss": 0.731, "step": 15794 }, { "epoch": 1.0701944576190798, "grad_norm": 6.20195198059082, "learning_rate": 7.940447669244986e-05, "loss": 0.6374, "step": 15795 }, { "epoch": 1.070262212887052, "grad_norm": 4.612509727478027, "learning_rate": 7.940310767335204e-05, "loss": 0.5493, "step": 15796 }, { "epoch": 1.070329968155024, "grad_norm": 7.004962921142578, "learning_rate": 7.940173865425424e-05, "loss": 0.5409, "step": 15797 }, { "epoch": 1.0703977234229962, "grad_norm": 5.540370941162109, "learning_rate": 7.940036963515642e-05, "loss": 0.6747, "step": 15798 }, { "epoch": 1.0704654786909682, "grad_norm": 8.158012390136719, "learning_rate": 7.93990006160586e-05, "loss": 0.6241, "step": 15799 }, { "epoch": 1.0705332339589404, "grad_norm": 8.461899757385254, "learning_rate": 7.939763159696079e-05, "loss": 0.5866, "step": 15800 }, { "epoch": 1.0706009892269124, "grad_norm": 6.685081958770752, "learning_rate": 7.939626257786297e-05, "loss": 0.7346, "step": 15801 }, { "epoch": 1.0706687444948844, "grad_norm": 6.544353485107422, "learning_rate": 7.939489355876515e-05, "loss": 0.6462, "step": 15802 }, { "epoch": 1.0707364997628566, "grad_norm": 5.48783016204834, "learning_rate": 7.939352453966733e-05, "loss": 0.6524, "step": 15803 }, { "epoch": 1.0708042550308285, "grad_norm": 6.841857433319092, "learning_rate": 7.939215552056951e-05, "loss": 0.6979, "step": 15804 }, { "epoch": 1.0708720102988007, "grad_norm": 8.2971773147583, "learning_rate": 7.939078650147171e-05, "loss": 0.9404, "step": 15805 }, { "epoch": 1.0709397655667727, "grad_norm": 6.574617385864258, "learning_rate": 7.938941748237389e-05, "loss": 0.699, "step": 15806 }, { "epoch": 1.071007520834745, "grad_norm": 4.680912017822266, "learning_rate": 7.938804846327607e-05, "loss": 0.6734, "step": 15807 }, { "epoch": 1.071075276102717, "grad_norm": 6.463378429412842, "learning_rate": 7.938667944417825e-05, "loss": 0.596, "step": 15808 }, { "epoch": 1.0711430313706891, "grad_norm": 5.354882717132568, "learning_rate": 7.938531042508044e-05, "loss": 0.6923, "step": 15809 }, { "epoch": 1.071210786638661, "grad_norm": 6.985996246337891, "learning_rate": 7.938394140598262e-05, "loss": 0.5105, "step": 15810 }, { "epoch": 1.0712785419066333, "grad_norm": 7.460660457611084, "learning_rate": 7.93825723868848e-05, "loss": 0.8449, "step": 15811 }, { "epoch": 1.0713462971746053, "grad_norm": 7.631282806396484, "learning_rate": 7.938120336778698e-05, "loss": 0.7648, "step": 15812 }, { "epoch": 1.0714140524425775, "grad_norm": 6.466649532318115, "learning_rate": 7.937983434868916e-05, "loss": 0.5677, "step": 15813 }, { "epoch": 1.0714818077105495, "grad_norm": 4.905895709991455, "learning_rate": 7.937846532959136e-05, "loss": 0.7089, "step": 15814 }, { "epoch": 1.0715495629785217, "grad_norm": 5.96923828125, "learning_rate": 7.937709631049354e-05, "loss": 0.8982, "step": 15815 }, { "epoch": 1.0716173182464936, "grad_norm": 5.1013383865356445, "learning_rate": 7.937572729139572e-05, "loss": 0.6006, "step": 15816 }, { "epoch": 1.0716850735144658, "grad_norm": 5.436969757080078, "learning_rate": 7.93743582722979e-05, "loss": 0.6946, "step": 15817 }, { "epoch": 1.0717528287824378, "grad_norm": 4.626980304718018, "learning_rate": 7.937298925320008e-05, "loss": 0.5443, "step": 15818 }, { "epoch": 1.07182058405041, "grad_norm": 4.790196418762207, "learning_rate": 7.937162023410227e-05, "loss": 0.6067, "step": 15819 }, { "epoch": 1.071888339318382, "grad_norm": 7.302325248718262, "learning_rate": 7.937025121500445e-05, "loss": 0.7859, "step": 15820 }, { "epoch": 1.071956094586354, "grad_norm": 9.226067543029785, "learning_rate": 7.936888219590663e-05, "loss": 0.6142, "step": 15821 }, { "epoch": 1.0720238498543262, "grad_norm": 7.070141792297363, "learning_rate": 7.936751317680881e-05, "loss": 0.9445, "step": 15822 }, { "epoch": 1.0720916051222982, "grad_norm": 5.061405658721924, "learning_rate": 7.936614415771101e-05, "loss": 0.5768, "step": 15823 }, { "epoch": 1.0721593603902704, "grad_norm": 4.736205101013184, "learning_rate": 7.936477513861319e-05, "loss": 0.5623, "step": 15824 }, { "epoch": 1.0722271156582424, "grad_norm": 5.544649124145508, "learning_rate": 7.936340611951537e-05, "loss": 0.6344, "step": 15825 }, { "epoch": 1.0722948709262146, "grad_norm": 5.628857135772705, "learning_rate": 7.936203710041755e-05, "loss": 0.7983, "step": 15826 }, { "epoch": 1.0723626261941865, "grad_norm": 7.328362464904785, "learning_rate": 7.936066808131973e-05, "loss": 0.6597, "step": 15827 }, { "epoch": 1.0724303814621587, "grad_norm": 4.993453025817871, "learning_rate": 7.935929906222192e-05, "loss": 0.6878, "step": 15828 }, { "epoch": 1.0724981367301307, "grad_norm": 4.596197605133057, "learning_rate": 7.93579300431241e-05, "loss": 0.7162, "step": 15829 }, { "epoch": 1.072565891998103, "grad_norm": 7.738362789154053, "learning_rate": 7.935656102402628e-05, "loss": 0.644, "step": 15830 }, { "epoch": 1.072633647266075, "grad_norm": 6.615604877471924, "learning_rate": 7.935519200492846e-05, "loss": 0.7576, "step": 15831 }, { "epoch": 1.072701402534047, "grad_norm": 7.723992347717285, "learning_rate": 7.935382298583066e-05, "loss": 0.8502, "step": 15832 }, { "epoch": 1.072769157802019, "grad_norm": 5.472813606262207, "learning_rate": 7.935245396673284e-05, "loss": 0.6046, "step": 15833 }, { "epoch": 1.0728369130699913, "grad_norm": 5.456359386444092, "learning_rate": 7.935108494763502e-05, "loss": 0.9344, "step": 15834 }, { "epoch": 1.0729046683379633, "grad_norm": 5.542731285095215, "learning_rate": 7.93497159285372e-05, "loss": 0.7518, "step": 15835 }, { "epoch": 1.0729724236059353, "grad_norm": 5.318366050720215, "learning_rate": 7.934834690943938e-05, "loss": 0.6448, "step": 15836 }, { "epoch": 1.0730401788739075, "grad_norm": 5.1860432624816895, "learning_rate": 7.934697789034157e-05, "loss": 0.8315, "step": 15837 }, { "epoch": 1.0731079341418794, "grad_norm": 6.298211097717285, "learning_rate": 7.934560887124375e-05, "loss": 0.8517, "step": 15838 }, { "epoch": 1.0731756894098516, "grad_norm": 5.515106201171875, "learning_rate": 7.934423985214593e-05, "loss": 0.462, "step": 15839 }, { "epoch": 1.0732434446778236, "grad_norm": 5.4662394523620605, "learning_rate": 7.934287083304813e-05, "loss": 0.7735, "step": 15840 }, { "epoch": 1.0733111999457958, "grad_norm": 6.610855579376221, "learning_rate": 7.934150181395031e-05, "loss": 0.9312, "step": 15841 }, { "epoch": 1.0733789552137678, "grad_norm": 7.660674571990967, "learning_rate": 7.934013279485249e-05, "loss": 0.5592, "step": 15842 }, { "epoch": 1.07344671048174, "grad_norm": 5.828925609588623, "learning_rate": 7.933876377575468e-05, "loss": 0.7, "step": 15843 }, { "epoch": 1.073514465749712, "grad_norm": 6.494380474090576, "learning_rate": 7.933739475665686e-05, "loss": 0.6033, "step": 15844 }, { "epoch": 1.0735822210176842, "grad_norm": 7.698443412780762, "learning_rate": 7.933602573755904e-05, "loss": 0.8435, "step": 15845 }, { "epoch": 1.0736499762856562, "grad_norm": 5.353089809417725, "learning_rate": 7.933465671846124e-05, "loss": 0.9311, "step": 15846 }, { "epoch": 1.0737177315536284, "grad_norm": 6.445722579956055, "learning_rate": 7.933328769936342e-05, "loss": 0.6781, "step": 15847 }, { "epoch": 1.0737854868216004, "grad_norm": 4.490242004394531, "learning_rate": 7.93319186802656e-05, "loss": 0.8394, "step": 15848 }, { "epoch": 1.0738532420895726, "grad_norm": 4.664844989776611, "learning_rate": 7.933054966116778e-05, "loss": 0.7149, "step": 15849 }, { "epoch": 1.0739209973575445, "grad_norm": 5.997525691986084, "learning_rate": 7.932918064206996e-05, "loss": 0.6835, "step": 15850 }, { "epoch": 1.0739887526255165, "grad_norm": 8.030609130859375, "learning_rate": 7.932781162297215e-05, "loss": 0.8024, "step": 15851 }, { "epoch": 1.0740565078934887, "grad_norm": 7.785398960113525, "learning_rate": 7.932644260387433e-05, "loss": 0.7362, "step": 15852 }, { "epoch": 1.0741242631614607, "grad_norm": 4.920596122741699, "learning_rate": 7.932507358477651e-05, "loss": 0.7132, "step": 15853 }, { "epoch": 1.074192018429433, "grad_norm": 4.117366313934326, "learning_rate": 7.93237045656787e-05, "loss": 0.5307, "step": 15854 }, { "epoch": 1.0742597736974049, "grad_norm": 5.135700702667236, "learning_rate": 7.932233554658089e-05, "loss": 0.6229, "step": 15855 }, { "epoch": 1.074327528965377, "grad_norm": 6.207793712615967, "learning_rate": 7.932096652748307e-05, "loss": 0.5498, "step": 15856 }, { "epoch": 1.074395284233349, "grad_norm": 6.431981563568115, "learning_rate": 7.931959750838525e-05, "loss": 0.6798, "step": 15857 }, { "epoch": 1.0744630395013213, "grad_norm": 5.320846080780029, "learning_rate": 7.931822848928743e-05, "loss": 0.6198, "step": 15858 }, { "epoch": 1.0745307947692933, "grad_norm": 5.788670539855957, "learning_rate": 7.931685947018961e-05, "loss": 0.8206, "step": 15859 }, { "epoch": 1.0745985500372655, "grad_norm": 7.664066314697266, "learning_rate": 7.93154904510918e-05, "loss": 0.6584, "step": 15860 }, { "epoch": 1.0746663053052374, "grad_norm": 6.991944313049316, "learning_rate": 7.931412143199398e-05, "loss": 0.9677, "step": 15861 }, { "epoch": 1.0747340605732096, "grad_norm": 5.329096794128418, "learning_rate": 7.931275241289616e-05, "loss": 0.6164, "step": 15862 }, { "epoch": 1.0748018158411816, "grad_norm": 6.307762145996094, "learning_rate": 7.931138339379834e-05, "loss": 0.8304, "step": 15863 }, { "epoch": 1.0748695711091538, "grad_norm": 5.538243770599365, "learning_rate": 7.931001437470054e-05, "loss": 0.7314, "step": 15864 }, { "epoch": 1.0749373263771258, "grad_norm": 6.261715888977051, "learning_rate": 7.930864535560272e-05, "loss": 0.8585, "step": 15865 }, { "epoch": 1.075005081645098, "grad_norm": 5.783536434173584, "learning_rate": 7.93072763365049e-05, "loss": 0.6301, "step": 15866 }, { "epoch": 1.07507283691307, "grad_norm": 5.780196189880371, "learning_rate": 7.930590731740708e-05, "loss": 0.6306, "step": 15867 }, { "epoch": 1.0751405921810422, "grad_norm": 5.178442478179932, "learning_rate": 7.930453829830926e-05, "loss": 0.4359, "step": 15868 }, { "epoch": 1.0752083474490142, "grad_norm": 7.060993194580078, "learning_rate": 7.930316927921145e-05, "loss": 0.5727, "step": 15869 }, { "epoch": 1.0752761027169861, "grad_norm": 5.602401256561279, "learning_rate": 7.930180026011363e-05, "loss": 0.5486, "step": 15870 }, { "epoch": 1.0753438579849584, "grad_norm": 6.208281993865967, "learning_rate": 7.930043124101581e-05, "loss": 0.7401, "step": 15871 }, { "epoch": 1.0754116132529303, "grad_norm": 10.482538223266602, "learning_rate": 7.9299062221918e-05, "loss": 0.8773, "step": 15872 }, { "epoch": 1.0754793685209025, "grad_norm": 5.301812171936035, "learning_rate": 7.929769320282017e-05, "loss": 0.6965, "step": 15873 }, { "epoch": 1.0755471237888745, "grad_norm": 10.284536361694336, "learning_rate": 7.929632418372237e-05, "loss": 0.7072, "step": 15874 }, { "epoch": 1.0756148790568467, "grad_norm": 5.5219550132751465, "learning_rate": 7.929495516462455e-05, "loss": 0.6087, "step": 15875 }, { "epoch": 1.0756826343248187, "grad_norm": 5.441367149353027, "learning_rate": 7.929358614552673e-05, "loss": 0.7554, "step": 15876 }, { "epoch": 1.075750389592791, "grad_norm": 5.82017707824707, "learning_rate": 7.929221712642891e-05, "loss": 0.6087, "step": 15877 }, { "epoch": 1.0758181448607629, "grad_norm": 5.13921594619751, "learning_rate": 7.92908481073311e-05, "loss": 0.791, "step": 15878 }, { "epoch": 1.075885900128735, "grad_norm": 5.5391926765441895, "learning_rate": 7.928947908823328e-05, "loss": 0.6649, "step": 15879 }, { "epoch": 1.075953655396707, "grad_norm": 6.536075115203857, "learning_rate": 7.928811006913546e-05, "loss": 0.6299, "step": 15880 }, { "epoch": 1.0760214106646793, "grad_norm": 5.445075511932373, "learning_rate": 7.928674105003764e-05, "loss": 0.7024, "step": 15881 }, { "epoch": 1.0760891659326512, "grad_norm": 5.205681800842285, "learning_rate": 7.928537203093982e-05, "loss": 0.4982, "step": 15882 }, { "epoch": 1.0761569212006235, "grad_norm": 4.551524639129639, "learning_rate": 7.928400301184202e-05, "loss": 0.643, "step": 15883 }, { "epoch": 1.0762246764685954, "grad_norm": 5.783960819244385, "learning_rate": 7.92826339927442e-05, "loss": 0.6593, "step": 15884 }, { "epoch": 1.0762924317365674, "grad_norm": 5.964574337005615, "learning_rate": 7.928126497364638e-05, "loss": 0.6811, "step": 15885 }, { "epoch": 1.0763601870045396, "grad_norm": 7.782623767852783, "learning_rate": 7.927989595454856e-05, "loss": 0.7981, "step": 15886 }, { "epoch": 1.0764279422725116, "grad_norm": 6.336511135101318, "learning_rate": 7.927852693545075e-05, "loss": 0.9382, "step": 15887 }, { "epoch": 1.0764956975404838, "grad_norm": 4.654166221618652, "learning_rate": 7.927715791635293e-05, "loss": 0.5157, "step": 15888 }, { "epoch": 1.0765634528084558, "grad_norm": 4.949594974517822, "learning_rate": 7.927578889725511e-05, "loss": 0.662, "step": 15889 }, { "epoch": 1.076631208076428, "grad_norm": 6.451333999633789, "learning_rate": 7.927441987815731e-05, "loss": 0.7434, "step": 15890 }, { "epoch": 1.0766989633444, "grad_norm": 6.374203681945801, "learning_rate": 7.927305085905949e-05, "loss": 0.7248, "step": 15891 }, { "epoch": 1.0767667186123722, "grad_norm": 8.0396089553833, "learning_rate": 7.927168183996167e-05, "loss": 0.589, "step": 15892 }, { "epoch": 1.0768344738803441, "grad_norm": 9.502918243408203, "learning_rate": 7.927031282086386e-05, "loss": 0.9467, "step": 15893 }, { "epoch": 1.0769022291483163, "grad_norm": 5.921857833862305, "learning_rate": 7.926894380176604e-05, "loss": 0.8063, "step": 15894 }, { "epoch": 1.0769699844162883, "grad_norm": 6.634448051452637, "learning_rate": 7.926757478266822e-05, "loss": 0.8519, "step": 15895 }, { "epoch": 1.0770377396842605, "grad_norm": 4.550981044769287, "learning_rate": 7.92662057635704e-05, "loss": 0.6937, "step": 15896 }, { "epoch": 1.0771054949522325, "grad_norm": 6.369821071624756, "learning_rate": 7.92648367444726e-05, "loss": 0.7513, "step": 15897 }, { "epoch": 1.0771732502202047, "grad_norm": 6.474839210510254, "learning_rate": 7.926346772537478e-05, "loss": 0.8707, "step": 15898 }, { "epoch": 1.0772410054881767, "grad_norm": 5.136695861816406, "learning_rate": 7.926209870627696e-05, "loss": 0.5493, "step": 15899 }, { "epoch": 1.0773087607561487, "grad_norm": 5.588931560516357, "learning_rate": 7.926072968717914e-05, "loss": 0.7134, "step": 15900 }, { "epoch": 1.0773765160241209, "grad_norm": 6.223124980926514, "learning_rate": 7.925936066808133e-05, "loss": 0.7647, "step": 15901 }, { "epoch": 1.0774442712920929, "grad_norm": 4.788752555847168, "learning_rate": 7.925799164898351e-05, "loss": 0.439, "step": 15902 }, { "epoch": 1.077512026560065, "grad_norm": 5.260895252227783, "learning_rate": 7.92566226298857e-05, "loss": 0.6204, "step": 15903 }, { "epoch": 1.077579781828037, "grad_norm": 5.823332786560059, "learning_rate": 7.925525361078787e-05, "loss": 0.7579, "step": 15904 }, { "epoch": 1.0776475370960092, "grad_norm": 5.55746603012085, "learning_rate": 7.925388459169005e-05, "loss": 0.6343, "step": 15905 }, { "epoch": 1.0777152923639812, "grad_norm": 5.603518962860107, "learning_rate": 7.925251557259225e-05, "loss": 0.7849, "step": 15906 }, { "epoch": 1.0777830476319534, "grad_norm": 5.885906219482422, "learning_rate": 7.925114655349443e-05, "loss": 0.713, "step": 15907 }, { "epoch": 1.0778508028999254, "grad_norm": 9.000563621520996, "learning_rate": 7.924977753439661e-05, "loss": 0.8531, "step": 15908 }, { "epoch": 1.0779185581678976, "grad_norm": 5.798495769500732, "learning_rate": 7.924840851529879e-05, "loss": 0.7047, "step": 15909 }, { "epoch": 1.0779863134358696, "grad_norm": 5.180234432220459, "learning_rate": 7.924703949620098e-05, "loss": 0.4218, "step": 15910 }, { "epoch": 1.0780540687038418, "grad_norm": 5.090123653411865, "learning_rate": 7.924567047710316e-05, "loss": 0.7473, "step": 15911 }, { "epoch": 1.0781218239718138, "grad_norm": 4.858051300048828, "learning_rate": 7.924430145800534e-05, "loss": 0.5706, "step": 15912 }, { "epoch": 1.078189579239786, "grad_norm": 5.382134914398193, "learning_rate": 7.924293243890752e-05, "loss": 0.7633, "step": 15913 }, { "epoch": 1.078257334507758, "grad_norm": 6.212465763092041, "learning_rate": 7.92415634198097e-05, "loss": 0.6788, "step": 15914 }, { "epoch": 1.0783250897757302, "grad_norm": 6.3222880363464355, "learning_rate": 7.92401944007119e-05, "loss": 0.7848, "step": 15915 }, { "epoch": 1.0783928450437021, "grad_norm": 6.96838903427124, "learning_rate": 7.923882538161408e-05, "loss": 0.8652, "step": 15916 }, { "epoch": 1.0784606003116743, "grad_norm": 4.135616302490234, "learning_rate": 7.923745636251626e-05, "loss": 0.4712, "step": 15917 }, { "epoch": 1.0785283555796463, "grad_norm": 6.912881374359131, "learning_rate": 7.923608734341844e-05, "loss": 0.7325, "step": 15918 }, { "epoch": 1.0785961108476183, "grad_norm": 5.951952934265137, "learning_rate": 7.923471832432062e-05, "loss": 0.645, "step": 15919 }, { "epoch": 1.0786638661155905, "grad_norm": 5.720697402954102, "learning_rate": 7.923334930522281e-05, "loss": 0.751, "step": 15920 }, { "epoch": 1.0787316213835625, "grad_norm": 6.125636577606201, "learning_rate": 7.9231980286125e-05, "loss": 0.7128, "step": 15921 }, { "epoch": 1.0787993766515347, "grad_norm": 7.798324108123779, "learning_rate": 7.923061126702717e-05, "loss": 1.0655, "step": 15922 }, { "epoch": 1.0788671319195067, "grad_norm": 7.423648834228516, "learning_rate": 7.922924224792935e-05, "loss": 0.8611, "step": 15923 }, { "epoch": 1.0789348871874789, "grad_norm": 4.67471170425415, "learning_rate": 7.922787322883155e-05, "loss": 0.4459, "step": 15924 }, { "epoch": 1.0790026424554509, "grad_norm": 6.07832670211792, "learning_rate": 7.922650420973373e-05, "loss": 0.706, "step": 15925 }, { "epoch": 1.079070397723423, "grad_norm": 6.1671624183654785, "learning_rate": 7.922513519063591e-05, "loss": 0.8027, "step": 15926 }, { "epoch": 1.079138152991395, "grad_norm": 6.181910991668701, "learning_rate": 7.922376617153809e-05, "loss": 0.726, "step": 15927 }, { "epoch": 1.0792059082593672, "grad_norm": 6.463537693023682, "learning_rate": 7.922239715244027e-05, "loss": 0.6578, "step": 15928 }, { "epoch": 1.0792736635273392, "grad_norm": 6.259862422943115, "learning_rate": 7.922102813334246e-05, "loss": 0.6437, "step": 15929 }, { "epoch": 1.0793414187953114, "grad_norm": 6.758423805236816, "learning_rate": 7.921965911424464e-05, "loss": 0.6028, "step": 15930 }, { "epoch": 1.0794091740632834, "grad_norm": 5.508148193359375, "learning_rate": 7.921829009514682e-05, "loss": 0.6397, "step": 15931 }, { "epoch": 1.0794769293312556, "grad_norm": 4.803408145904541, "learning_rate": 7.9216921076049e-05, "loss": 0.8471, "step": 15932 }, { "epoch": 1.0795446845992276, "grad_norm": 5.470789432525635, "learning_rate": 7.92155520569512e-05, "loss": 0.9083, "step": 15933 }, { "epoch": 1.0796124398671996, "grad_norm": 5.594363212585449, "learning_rate": 7.921418303785338e-05, "loss": 0.6955, "step": 15934 }, { "epoch": 1.0796801951351718, "grad_norm": 6.420119285583496, "learning_rate": 7.921281401875556e-05, "loss": 0.8706, "step": 15935 }, { "epoch": 1.0797479504031438, "grad_norm": 5.343960285186768, "learning_rate": 7.921144499965775e-05, "loss": 0.8528, "step": 15936 }, { "epoch": 1.079815705671116, "grad_norm": 6.668441295623779, "learning_rate": 7.921007598055993e-05, "loss": 0.82, "step": 15937 }, { "epoch": 1.079883460939088, "grad_norm": 7.410157203674316, "learning_rate": 7.920870696146211e-05, "loss": 0.7367, "step": 15938 }, { "epoch": 1.0799512162070601, "grad_norm": 6.100698471069336, "learning_rate": 7.920733794236431e-05, "loss": 0.862, "step": 15939 }, { "epoch": 1.0800189714750321, "grad_norm": 6.438335418701172, "learning_rate": 7.920596892326649e-05, "loss": 0.7477, "step": 15940 }, { "epoch": 1.0800867267430043, "grad_norm": 6.367231845855713, "learning_rate": 7.920459990416867e-05, "loss": 0.6993, "step": 15941 }, { "epoch": 1.0801544820109763, "grad_norm": 5.784175395965576, "learning_rate": 7.920323088507086e-05, "loss": 0.6835, "step": 15942 }, { "epoch": 1.0802222372789485, "grad_norm": 5.12135648727417, "learning_rate": 7.920186186597304e-05, "loss": 0.7518, "step": 15943 }, { "epoch": 1.0802899925469205, "grad_norm": 5.591551780700684, "learning_rate": 7.920049284687522e-05, "loss": 0.5578, "step": 15944 }, { "epoch": 1.0803577478148927, "grad_norm": 3.941267728805542, "learning_rate": 7.91991238277774e-05, "loss": 0.6835, "step": 15945 }, { "epoch": 1.0804255030828647, "grad_norm": 4.970183849334717, "learning_rate": 7.919775480867958e-05, "loss": 0.6148, "step": 15946 }, { "epoch": 1.0804932583508369, "grad_norm": 5.3328938484191895, "learning_rate": 7.919638578958178e-05, "loss": 0.5961, "step": 15947 }, { "epoch": 1.0805610136188089, "grad_norm": 5.661096096038818, "learning_rate": 7.919501677048396e-05, "loss": 0.5808, "step": 15948 }, { "epoch": 1.0806287688867808, "grad_norm": 4.3002824783325195, "learning_rate": 7.919364775138614e-05, "loss": 0.6531, "step": 15949 }, { "epoch": 1.080696524154753, "grad_norm": 5.652578830718994, "learning_rate": 7.919227873228832e-05, "loss": 0.8663, "step": 15950 }, { "epoch": 1.080764279422725, "grad_norm": 5.4440717697143555, "learning_rate": 7.91909097131905e-05, "loss": 0.8888, "step": 15951 }, { "epoch": 1.0808320346906972, "grad_norm": 5.150991439819336, "learning_rate": 7.918954069409269e-05, "loss": 0.6697, "step": 15952 }, { "epoch": 1.0808997899586692, "grad_norm": 5.136131286621094, "learning_rate": 7.918817167499487e-05, "loss": 0.6577, "step": 15953 }, { "epoch": 1.0809675452266414, "grad_norm": 6.44126033782959, "learning_rate": 7.918680265589705e-05, "loss": 0.6284, "step": 15954 }, { "epoch": 1.0810353004946134, "grad_norm": 8.204652786254883, "learning_rate": 7.918543363679923e-05, "loss": 0.6879, "step": 15955 }, { "epoch": 1.0811030557625856, "grad_norm": 5.8660407066345215, "learning_rate": 7.918406461770143e-05, "loss": 0.5727, "step": 15956 }, { "epoch": 1.0811708110305576, "grad_norm": 6.534690856933594, "learning_rate": 7.918269559860361e-05, "loss": 0.697, "step": 15957 }, { "epoch": 1.0812385662985298, "grad_norm": 8.585152626037598, "learning_rate": 7.918132657950579e-05, "loss": 0.8429, "step": 15958 }, { "epoch": 1.0813063215665017, "grad_norm": 7.084420204162598, "learning_rate": 7.917995756040797e-05, "loss": 0.6254, "step": 15959 }, { "epoch": 1.081374076834474, "grad_norm": 5.689300060272217, "learning_rate": 7.917858854131015e-05, "loss": 0.5956, "step": 15960 }, { "epoch": 1.081441832102446, "grad_norm": 6.1884331703186035, "learning_rate": 7.917721952221234e-05, "loss": 0.7677, "step": 15961 }, { "epoch": 1.0815095873704181, "grad_norm": 10.952715873718262, "learning_rate": 7.917585050311452e-05, "loss": 0.783, "step": 15962 }, { "epoch": 1.0815773426383901, "grad_norm": 8.16262149810791, "learning_rate": 7.91744814840167e-05, "loss": 0.548, "step": 15963 }, { "epoch": 1.0816450979063623, "grad_norm": 5.6903157234191895, "learning_rate": 7.917311246491888e-05, "loss": 0.5904, "step": 15964 }, { "epoch": 1.0817128531743343, "grad_norm": 8.33536148071289, "learning_rate": 7.917174344582108e-05, "loss": 0.7886, "step": 15965 }, { "epoch": 1.0817806084423065, "grad_norm": 9.231719017028809, "learning_rate": 7.917037442672326e-05, "loss": 0.7876, "step": 15966 }, { "epoch": 1.0818483637102785, "grad_norm": 5.142817497253418, "learning_rate": 7.916900540762544e-05, "loss": 0.6525, "step": 15967 }, { "epoch": 1.0819161189782505, "grad_norm": 8.006011962890625, "learning_rate": 7.916763638852762e-05, "loss": 0.6731, "step": 15968 }, { "epoch": 1.0819838742462227, "grad_norm": 4.934510707855225, "learning_rate": 7.91662673694298e-05, "loss": 0.6913, "step": 15969 }, { "epoch": 1.0820516295141946, "grad_norm": 4.291750907897949, "learning_rate": 7.916489835033199e-05, "loss": 0.5819, "step": 15970 }, { "epoch": 1.0821193847821668, "grad_norm": 6.167888164520264, "learning_rate": 7.916352933123417e-05, "loss": 0.5861, "step": 15971 }, { "epoch": 1.0821871400501388, "grad_norm": 6.8970465660095215, "learning_rate": 7.916216031213635e-05, "loss": 0.4904, "step": 15972 }, { "epoch": 1.082254895318111, "grad_norm": 6.821037292480469, "learning_rate": 7.916079129303853e-05, "loss": 0.7735, "step": 15973 }, { "epoch": 1.082322650586083, "grad_norm": 6.066835403442383, "learning_rate": 7.915942227394071e-05, "loss": 0.5559, "step": 15974 }, { "epoch": 1.0823904058540552, "grad_norm": 5.0264716148376465, "learning_rate": 7.915805325484291e-05, "loss": 0.8914, "step": 15975 }, { "epoch": 1.0824581611220272, "grad_norm": 6.044068336486816, "learning_rate": 7.915668423574509e-05, "loss": 0.5114, "step": 15976 }, { "epoch": 1.0825259163899994, "grad_norm": 5.62042236328125, "learning_rate": 7.915531521664727e-05, "loss": 0.6438, "step": 15977 }, { "epoch": 1.0825936716579714, "grad_norm": 5.779903411865234, "learning_rate": 7.915394619754945e-05, "loss": 0.5855, "step": 15978 }, { "epoch": 1.0826614269259436, "grad_norm": 7.046977996826172, "learning_rate": 7.915257717845164e-05, "loss": 0.7729, "step": 15979 }, { "epoch": 1.0827291821939156, "grad_norm": 10.05582332611084, "learning_rate": 7.915120815935382e-05, "loss": 0.7776, "step": 15980 }, { "epoch": 1.0827969374618878, "grad_norm": 6.933775901794434, "learning_rate": 7.9149839140256e-05, "loss": 0.7646, "step": 15981 }, { "epoch": 1.0828646927298597, "grad_norm": 5.1650543212890625, "learning_rate": 7.91484701211582e-05, "loss": 0.5793, "step": 15982 }, { "epoch": 1.0829324479978317, "grad_norm": 6.840242385864258, "learning_rate": 7.914710110206038e-05, "loss": 0.7227, "step": 15983 }, { "epoch": 1.083000203265804, "grad_norm": 4.679685592651367, "learning_rate": 7.914573208296256e-05, "loss": 0.6102, "step": 15984 }, { "epoch": 1.083067958533776, "grad_norm": 6.447381496429443, "learning_rate": 7.914436306386475e-05, "loss": 0.9207, "step": 15985 }, { "epoch": 1.0831357138017481, "grad_norm": 7.533105850219727, "learning_rate": 7.914299404476693e-05, "loss": 0.7438, "step": 15986 }, { "epoch": 1.08320346906972, "grad_norm": 4.451360702514648, "learning_rate": 7.914162502566911e-05, "loss": 0.5668, "step": 15987 }, { "epoch": 1.0832712243376923, "grad_norm": 7.082450866699219, "learning_rate": 7.914025600657131e-05, "loss": 0.6538, "step": 15988 }, { "epoch": 1.0833389796056643, "grad_norm": 5.413089275360107, "learning_rate": 7.913888698747349e-05, "loss": 0.7967, "step": 15989 }, { "epoch": 1.0834067348736365, "grad_norm": 5.9802985191345215, "learning_rate": 7.913751796837567e-05, "loss": 0.7933, "step": 15990 }, { "epoch": 1.0834744901416085, "grad_norm": 6.097925662994385, "learning_rate": 7.913614894927785e-05, "loss": 0.602, "step": 15991 }, { "epoch": 1.0835422454095807, "grad_norm": 5.050368309020996, "learning_rate": 7.913477993018003e-05, "loss": 0.5664, "step": 15992 }, { "epoch": 1.0836100006775526, "grad_norm": 4.976052761077881, "learning_rate": 7.913341091108222e-05, "loss": 0.6543, "step": 15993 }, { "epoch": 1.0836777559455248, "grad_norm": 6.06764554977417, "learning_rate": 7.91320418919844e-05, "loss": 0.6747, "step": 15994 }, { "epoch": 1.0837455112134968, "grad_norm": 8.071044921875, "learning_rate": 7.913067287288658e-05, "loss": 0.6247, "step": 15995 }, { "epoch": 1.083813266481469, "grad_norm": 5.97242546081543, "learning_rate": 7.912930385378876e-05, "loss": 0.6158, "step": 15996 }, { "epoch": 1.083881021749441, "grad_norm": 5.717789173126221, "learning_rate": 7.912793483469096e-05, "loss": 0.7819, "step": 15997 }, { "epoch": 1.083948777017413, "grad_norm": 6.379135608673096, "learning_rate": 7.912656581559314e-05, "loss": 0.8008, "step": 15998 }, { "epoch": 1.0840165322853852, "grad_norm": 5.400828838348389, "learning_rate": 7.912519679649532e-05, "loss": 0.6621, "step": 15999 }, { "epoch": 1.0840842875533572, "grad_norm": 7.098434925079346, "learning_rate": 7.91238277773975e-05, "loss": 0.8617, "step": 16000 }, { "epoch": 1.0841520428213294, "grad_norm": 5.690536975860596, "learning_rate": 7.912245875829968e-05, "loss": 0.7622, "step": 16001 }, { "epoch": 1.0842197980893014, "grad_norm": 5.485647201538086, "learning_rate": 7.912108973920187e-05, "loss": 0.6856, "step": 16002 }, { "epoch": 1.0842875533572736, "grad_norm": 7.707883358001709, "learning_rate": 7.911972072010405e-05, "loss": 0.7483, "step": 16003 }, { "epoch": 1.0843553086252455, "grad_norm": 5.487302303314209, "learning_rate": 7.911835170100623e-05, "loss": 0.6216, "step": 16004 }, { "epoch": 1.0844230638932177, "grad_norm": 5.43400239944458, "learning_rate": 7.911698268190841e-05, "loss": 0.6424, "step": 16005 }, { "epoch": 1.0844908191611897, "grad_norm": 5.037356853485107, "learning_rate": 7.91156136628106e-05, "loss": 0.5823, "step": 16006 }, { "epoch": 1.084558574429162, "grad_norm": 6.4713215827941895, "learning_rate": 7.911424464371279e-05, "loss": 0.6753, "step": 16007 }, { "epoch": 1.084626329697134, "grad_norm": 5.2635393142700195, "learning_rate": 7.911287562461497e-05, "loss": 0.8342, "step": 16008 }, { "epoch": 1.084694084965106, "grad_norm": 5.816706657409668, "learning_rate": 7.911150660551715e-05, "loss": 0.8517, "step": 16009 }, { "epoch": 1.084761840233078, "grad_norm": 5.9537458419799805, "learning_rate": 7.911013758641933e-05, "loss": 0.6601, "step": 16010 }, { "epoch": 1.0848295955010503, "grad_norm": 4.499695301055908, "learning_rate": 7.910876856732152e-05, "loss": 0.5232, "step": 16011 }, { "epoch": 1.0848973507690223, "grad_norm": 5.2069091796875, "learning_rate": 7.91073995482237e-05, "loss": 0.5803, "step": 16012 }, { "epoch": 1.0849651060369945, "grad_norm": 4.306644916534424, "learning_rate": 7.910603052912588e-05, "loss": 0.6398, "step": 16013 }, { "epoch": 1.0850328613049665, "grad_norm": 5.758792400360107, "learning_rate": 7.910466151002806e-05, "loss": 0.7415, "step": 16014 }, { "epoch": 1.0851006165729387, "grad_norm": 6.12398099899292, "learning_rate": 7.910329249093024e-05, "loss": 0.7178, "step": 16015 }, { "epoch": 1.0851683718409106, "grad_norm": 7.941034317016602, "learning_rate": 7.910192347183244e-05, "loss": 0.6802, "step": 16016 }, { "epoch": 1.0852361271088826, "grad_norm": 5.2251176834106445, "learning_rate": 7.910055445273462e-05, "loss": 0.6264, "step": 16017 }, { "epoch": 1.0853038823768548, "grad_norm": 5.844374179840088, "learning_rate": 7.90991854336368e-05, "loss": 0.6761, "step": 16018 }, { "epoch": 1.0853716376448268, "grad_norm": 4.731149196624756, "learning_rate": 7.909781641453898e-05, "loss": 0.7042, "step": 16019 }, { "epoch": 1.085439392912799, "grad_norm": 5.9580206871032715, "learning_rate": 7.909644739544117e-05, "loss": 0.6448, "step": 16020 }, { "epoch": 1.085507148180771, "grad_norm": 6.342482089996338, "learning_rate": 7.909507837634335e-05, "loss": 0.6347, "step": 16021 }, { "epoch": 1.0855749034487432, "grad_norm": 5.999290943145752, "learning_rate": 7.909370935724553e-05, "loss": 0.7697, "step": 16022 }, { "epoch": 1.0856426587167152, "grad_norm": 6.850386142730713, "learning_rate": 7.909234033814771e-05, "loss": 0.6584, "step": 16023 }, { "epoch": 1.0857104139846874, "grad_norm": 6.658447265625, "learning_rate": 7.90909713190499e-05, "loss": 0.7402, "step": 16024 }, { "epoch": 1.0857781692526594, "grad_norm": 5.117592811584473, "learning_rate": 7.908960229995209e-05, "loss": 0.7171, "step": 16025 }, { "epoch": 1.0858459245206316, "grad_norm": 4.4139909744262695, "learning_rate": 7.908823328085427e-05, "loss": 0.5602, "step": 16026 }, { "epoch": 1.0859136797886035, "grad_norm": 8.113272666931152, "learning_rate": 7.908686426175645e-05, "loss": 0.5116, "step": 16027 }, { "epoch": 1.0859814350565757, "grad_norm": 6.033690452575684, "learning_rate": 7.908549524265864e-05, "loss": 0.6415, "step": 16028 }, { "epoch": 1.0860491903245477, "grad_norm": 5.521602630615234, "learning_rate": 7.908412622356082e-05, "loss": 0.7071, "step": 16029 }, { "epoch": 1.08611694559252, "grad_norm": 9.671419143676758, "learning_rate": 7.9082757204463e-05, "loss": 0.746, "step": 16030 }, { "epoch": 1.086184700860492, "grad_norm": 12.12438678741455, "learning_rate": 7.90813881853652e-05, "loss": 0.5135, "step": 16031 }, { "epoch": 1.0862524561284639, "grad_norm": 6.55898904800415, "learning_rate": 7.908001916626738e-05, "loss": 0.6438, "step": 16032 }, { "epoch": 1.086320211396436, "grad_norm": 6.704573154449463, "learning_rate": 7.907865014716956e-05, "loss": 0.5559, "step": 16033 }, { "epoch": 1.086387966664408, "grad_norm": 6.356927871704102, "learning_rate": 7.907728112807175e-05, "loss": 0.7004, "step": 16034 }, { "epoch": 1.0864557219323803, "grad_norm": 6.504284381866455, "learning_rate": 7.907591210897393e-05, "loss": 0.7731, "step": 16035 }, { "epoch": 1.0865234772003523, "grad_norm": 4.6852874755859375, "learning_rate": 7.907454308987611e-05, "loss": 0.5772, "step": 16036 }, { "epoch": 1.0865912324683245, "grad_norm": 5.674740791320801, "learning_rate": 7.907317407077829e-05, "loss": 0.6515, "step": 16037 }, { "epoch": 1.0866589877362964, "grad_norm": 11.06320571899414, "learning_rate": 7.907180505168047e-05, "loss": 0.7314, "step": 16038 }, { "epoch": 1.0867267430042686, "grad_norm": 7.077748775482178, "learning_rate": 7.907043603258267e-05, "loss": 0.7779, "step": 16039 }, { "epoch": 1.0867944982722406, "grad_norm": 4.752398490905762, "learning_rate": 7.906906701348485e-05, "loss": 0.6754, "step": 16040 }, { "epoch": 1.0868622535402128, "grad_norm": 6.530601978302002, "learning_rate": 7.906769799438703e-05, "loss": 0.7912, "step": 16041 }, { "epoch": 1.0869300088081848, "grad_norm": 7.028359889984131, "learning_rate": 7.906632897528921e-05, "loss": 0.7639, "step": 16042 }, { "epoch": 1.086997764076157, "grad_norm": 6.825929164886475, "learning_rate": 7.90649599561914e-05, "loss": 0.5188, "step": 16043 }, { "epoch": 1.087065519344129, "grad_norm": 5.377181053161621, "learning_rate": 7.906359093709358e-05, "loss": 0.7833, "step": 16044 }, { "epoch": 1.0871332746121012, "grad_norm": 5.946323394775391, "learning_rate": 7.906222191799576e-05, "loss": 0.9365, "step": 16045 }, { "epoch": 1.0872010298800732, "grad_norm": 6.080692768096924, "learning_rate": 7.906085289889794e-05, "loss": 0.8694, "step": 16046 }, { "epoch": 1.0872687851480451, "grad_norm": 5.918422698974609, "learning_rate": 7.905948387980012e-05, "loss": 0.8249, "step": 16047 }, { "epoch": 1.0873365404160173, "grad_norm": 8.938027381896973, "learning_rate": 7.905811486070232e-05, "loss": 0.6464, "step": 16048 }, { "epoch": 1.0874042956839893, "grad_norm": 5.869150638580322, "learning_rate": 7.90567458416045e-05, "loss": 0.849, "step": 16049 }, { "epoch": 1.0874720509519615, "grad_norm": 6.127419948577881, "learning_rate": 7.905537682250668e-05, "loss": 0.5813, "step": 16050 }, { "epoch": 1.0875398062199335, "grad_norm": 5.159786701202393, "learning_rate": 7.905400780340886e-05, "loss": 0.5756, "step": 16051 }, { "epoch": 1.0876075614879057, "grad_norm": 6.768173694610596, "learning_rate": 7.905263878431104e-05, "loss": 0.9234, "step": 16052 }, { "epoch": 1.0876753167558777, "grad_norm": 5.480606555938721, "learning_rate": 7.905126976521323e-05, "loss": 0.6028, "step": 16053 }, { "epoch": 1.08774307202385, "grad_norm": 6.204960823059082, "learning_rate": 7.904990074611541e-05, "loss": 0.6402, "step": 16054 }, { "epoch": 1.0878108272918219, "grad_norm": 5.574785232543945, "learning_rate": 7.904853172701759e-05, "loss": 0.6907, "step": 16055 }, { "epoch": 1.087878582559794, "grad_norm": 6.4214043617248535, "learning_rate": 7.904716270791977e-05, "loss": 1.0685, "step": 16056 }, { "epoch": 1.087946337827766, "grad_norm": 6.589137077331543, "learning_rate": 7.904579368882197e-05, "loss": 0.6175, "step": 16057 }, { "epoch": 1.0880140930957383, "grad_norm": 5.765407085418701, "learning_rate": 7.904442466972415e-05, "loss": 0.696, "step": 16058 }, { "epoch": 1.0880818483637102, "grad_norm": 4.7705278396606445, "learning_rate": 7.904305565062633e-05, "loss": 0.6874, "step": 16059 }, { "epoch": 1.0881496036316824, "grad_norm": 6.676177978515625, "learning_rate": 7.904168663152851e-05, "loss": 0.6891, "step": 16060 }, { "epoch": 1.0882173588996544, "grad_norm": 4.436383247375488, "learning_rate": 7.904031761243069e-05, "loss": 0.6336, "step": 16061 }, { "epoch": 1.0882851141676266, "grad_norm": 8.813880920410156, "learning_rate": 7.903894859333288e-05, "loss": 0.7391, "step": 16062 }, { "epoch": 1.0883528694355986, "grad_norm": 5.455590724945068, "learning_rate": 7.903757957423506e-05, "loss": 0.6368, "step": 16063 }, { "epoch": 1.0884206247035708, "grad_norm": 6.536738872528076, "learning_rate": 7.903621055513724e-05, "loss": 0.756, "step": 16064 }, { "epoch": 1.0884883799715428, "grad_norm": 4.461001873016357, "learning_rate": 7.903484153603942e-05, "loss": 0.5279, "step": 16065 }, { "epoch": 1.0885561352395148, "grad_norm": 5.54738187789917, "learning_rate": 7.903347251694162e-05, "loss": 0.5467, "step": 16066 }, { "epoch": 1.088623890507487, "grad_norm": 4.30350923538208, "learning_rate": 7.90321034978438e-05, "loss": 0.5577, "step": 16067 }, { "epoch": 1.088691645775459, "grad_norm": 6.150055885314941, "learning_rate": 7.903073447874598e-05, "loss": 0.6348, "step": 16068 }, { "epoch": 1.0887594010434312, "grad_norm": 4.178039073944092, "learning_rate": 7.902936545964816e-05, "loss": 0.5737, "step": 16069 }, { "epoch": 1.0888271563114031, "grad_norm": 4.776052474975586, "learning_rate": 7.902799644055034e-05, "loss": 0.6767, "step": 16070 }, { "epoch": 1.0888949115793753, "grad_norm": 4.931454658508301, "learning_rate": 7.902662742145253e-05, "loss": 0.6867, "step": 16071 }, { "epoch": 1.0889626668473473, "grad_norm": 5.556695461273193, "learning_rate": 7.902525840235471e-05, "loss": 0.902, "step": 16072 }, { "epoch": 1.0890304221153195, "grad_norm": 6.187075138092041, "learning_rate": 7.90238893832569e-05, "loss": 0.7161, "step": 16073 }, { "epoch": 1.0890981773832915, "grad_norm": 6.942097187042236, "learning_rate": 7.902252036415909e-05, "loss": 0.9483, "step": 16074 }, { "epoch": 1.0891659326512637, "grad_norm": 7.454192161560059, "learning_rate": 7.902115134506127e-05, "loss": 0.768, "step": 16075 }, { "epoch": 1.0892336879192357, "grad_norm": 4.790287494659424, "learning_rate": 7.901978232596345e-05, "loss": 0.7202, "step": 16076 }, { "epoch": 1.089301443187208, "grad_norm": 4.767421722412109, "learning_rate": 7.901841330686564e-05, "loss": 0.7903, "step": 16077 }, { "epoch": 1.0893691984551799, "grad_norm": 5.385760307312012, "learning_rate": 7.901704428776782e-05, "loss": 0.653, "step": 16078 }, { "epoch": 1.089436953723152, "grad_norm": 4.6891655921936035, "learning_rate": 7.901567526867e-05, "loss": 0.5077, "step": 16079 }, { "epoch": 1.089504708991124, "grad_norm": 6.615968704223633, "learning_rate": 7.90143062495722e-05, "loss": 0.6641, "step": 16080 }, { "epoch": 1.089572464259096, "grad_norm": 8.83869457244873, "learning_rate": 7.901293723047438e-05, "loss": 0.7476, "step": 16081 }, { "epoch": 1.0896402195270682, "grad_norm": 5.717437267303467, "learning_rate": 7.901156821137656e-05, "loss": 0.5911, "step": 16082 }, { "epoch": 1.0897079747950402, "grad_norm": 3.9267828464508057, "learning_rate": 7.901019919227874e-05, "loss": 0.4863, "step": 16083 }, { "epoch": 1.0897757300630124, "grad_norm": 6.119167327880859, "learning_rate": 7.900883017318092e-05, "loss": 0.7104, "step": 16084 }, { "epoch": 1.0898434853309844, "grad_norm": 8.956766128540039, "learning_rate": 7.900746115408311e-05, "loss": 0.61, "step": 16085 }, { "epoch": 1.0899112405989566, "grad_norm": 5.749386787414551, "learning_rate": 7.900609213498529e-05, "loss": 0.7368, "step": 16086 }, { "epoch": 1.0899789958669286, "grad_norm": 7.877694606781006, "learning_rate": 7.900472311588747e-05, "loss": 0.5073, "step": 16087 }, { "epoch": 1.0900467511349008, "grad_norm": 6.475144386291504, "learning_rate": 7.900335409678965e-05, "loss": 0.8651, "step": 16088 }, { "epoch": 1.0901145064028728, "grad_norm": 4.912156581878662, "learning_rate": 7.900198507769185e-05, "loss": 0.6296, "step": 16089 }, { "epoch": 1.090182261670845, "grad_norm": 6.647600173950195, "learning_rate": 7.900061605859403e-05, "loss": 0.7773, "step": 16090 }, { "epoch": 1.090250016938817, "grad_norm": 5.223667621612549, "learning_rate": 7.899924703949621e-05, "loss": 0.6258, "step": 16091 }, { "epoch": 1.0903177722067892, "grad_norm": 5.760304927825928, "learning_rate": 7.899787802039839e-05, "loss": 0.6562, "step": 16092 }, { "epoch": 1.0903855274747611, "grad_norm": 6.270285129547119, "learning_rate": 7.899650900130057e-05, "loss": 0.7241, "step": 16093 }, { "epoch": 1.0904532827427333, "grad_norm": 6.746170997619629, "learning_rate": 7.899513998220276e-05, "loss": 0.6451, "step": 16094 }, { "epoch": 1.0905210380107053, "grad_norm": 6.292099952697754, "learning_rate": 7.899377096310494e-05, "loss": 0.4791, "step": 16095 }, { "epoch": 1.0905887932786773, "grad_norm": 7.886358261108398, "learning_rate": 7.899240194400712e-05, "loss": 0.5683, "step": 16096 }, { "epoch": 1.0906565485466495, "grad_norm": 5.620253562927246, "learning_rate": 7.89910329249093e-05, "loss": 0.5844, "step": 16097 }, { "epoch": 1.0907243038146215, "grad_norm": 13.192233085632324, "learning_rate": 7.89896639058115e-05, "loss": 0.8562, "step": 16098 }, { "epoch": 1.0907920590825937, "grad_norm": 4.923586845397949, "learning_rate": 7.898829488671368e-05, "loss": 0.6513, "step": 16099 }, { "epoch": 1.0908598143505657, "grad_norm": 6.001354217529297, "learning_rate": 7.898692586761586e-05, "loss": 0.6313, "step": 16100 }, { "epoch": 1.0909275696185379, "grad_norm": 6.142572402954102, "learning_rate": 7.898555684851804e-05, "loss": 0.5797, "step": 16101 }, { "epoch": 1.0909953248865099, "grad_norm": 5.978823661804199, "learning_rate": 7.898418782942022e-05, "loss": 0.7278, "step": 16102 }, { "epoch": 1.091063080154482, "grad_norm": 6.031432151794434, "learning_rate": 7.898281881032241e-05, "loss": 0.4503, "step": 16103 }, { "epoch": 1.091130835422454, "grad_norm": 6.397644996643066, "learning_rate": 7.898144979122459e-05, "loss": 0.8417, "step": 16104 }, { "epoch": 1.0911985906904262, "grad_norm": 4.83469295501709, "learning_rate": 7.898008077212677e-05, "loss": 0.5679, "step": 16105 }, { "epoch": 1.0912663459583982, "grad_norm": 5.136175155639648, "learning_rate": 7.897871175302895e-05, "loss": 0.7549, "step": 16106 }, { "epoch": 1.0913341012263704, "grad_norm": 5.254212856292725, "learning_rate": 7.897734273393113e-05, "loss": 0.5692, "step": 16107 }, { "epoch": 1.0914018564943424, "grad_norm": 8.869996070861816, "learning_rate": 7.897597371483333e-05, "loss": 0.8202, "step": 16108 }, { "epoch": 1.0914696117623146, "grad_norm": 6.127236843109131, "learning_rate": 7.897460469573551e-05, "loss": 0.5936, "step": 16109 }, { "epoch": 1.0915373670302866, "grad_norm": 6.347018241882324, "learning_rate": 7.897323567663769e-05, "loss": 0.5723, "step": 16110 }, { "epoch": 1.0916051222982588, "grad_norm": 4.602900981903076, "learning_rate": 7.897186665753987e-05, "loss": 0.555, "step": 16111 }, { "epoch": 1.0916728775662308, "grad_norm": 5.458276271820068, "learning_rate": 7.897049763844206e-05, "loss": 0.6425, "step": 16112 }, { "epoch": 1.091740632834203, "grad_norm": 4.358611583709717, "learning_rate": 7.896912861934424e-05, "loss": 0.506, "step": 16113 }, { "epoch": 1.091808388102175, "grad_norm": 5.602118015289307, "learning_rate": 7.896775960024642e-05, "loss": 0.7078, "step": 16114 }, { "epoch": 1.091876143370147, "grad_norm": 5.718192100524902, "learning_rate": 7.89663905811486e-05, "loss": 0.6152, "step": 16115 }, { "epoch": 1.0919438986381191, "grad_norm": 7.62349796295166, "learning_rate": 7.896502156205078e-05, "loss": 0.8543, "step": 16116 }, { "epoch": 1.0920116539060911, "grad_norm": 5.8089189529418945, "learning_rate": 7.896365254295298e-05, "loss": 0.6322, "step": 16117 }, { "epoch": 1.0920794091740633, "grad_norm": 4.909200191497803, "learning_rate": 7.896228352385516e-05, "loss": 0.6035, "step": 16118 }, { "epoch": 1.0921471644420353, "grad_norm": 6.314395427703857, "learning_rate": 7.896091450475734e-05, "loss": 0.7228, "step": 16119 }, { "epoch": 1.0922149197100075, "grad_norm": 4.429898738861084, "learning_rate": 7.895954548565953e-05, "loss": 0.7112, "step": 16120 }, { "epoch": 1.0922826749779795, "grad_norm": 5.228384017944336, "learning_rate": 7.895817646656171e-05, "loss": 0.6499, "step": 16121 }, { "epoch": 1.0923504302459517, "grad_norm": 5.266185760498047, "learning_rate": 7.895680744746389e-05, "loss": 0.6539, "step": 16122 }, { "epoch": 1.0924181855139237, "grad_norm": 5.5866379737854, "learning_rate": 7.895543842836609e-05, "loss": 0.5946, "step": 16123 }, { "epoch": 1.0924859407818959, "grad_norm": 5.785736083984375, "learning_rate": 7.895406940926827e-05, "loss": 0.6127, "step": 16124 }, { "epoch": 1.0925536960498679, "grad_norm": 4.491186141967773, "learning_rate": 7.895270039017045e-05, "loss": 0.3924, "step": 16125 }, { "epoch": 1.09262145131784, "grad_norm": 5.7343831062316895, "learning_rate": 7.895133137107264e-05, "loss": 0.5891, "step": 16126 }, { "epoch": 1.092689206585812, "grad_norm": 5.105741500854492, "learning_rate": 7.894996235197482e-05, "loss": 0.6144, "step": 16127 }, { "epoch": 1.0927569618537842, "grad_norm": 6.519714832305908, "learning_rate": 7.8948593332877e-05, "loss": 0.6786, "step": 16128 }, { "epoch": 1.0928247171217562, "grad_norm": 6.885262489318848, "learning_rate": 7.894722431377918e-05, "loss": 0.7443, "step": 16129 }, { "epoch": 1.0928924723897282, "grad_norm": 7.912429332733154, "learning_rate": 7.894585529468138e-05, "loss": 0.851, "step": 16130 }, { "epoch": 1.0929602276577004, "grad_norm": 8.219959259033203, "learning_rate": 7.894448627558356e-05, "loss": 0.6724, "step": 16131 }, { "epoch": 1.0930279829256724, "grad_norm": 5.2340545654296875, "learning_rate": 7.894311725648574e-05, "loss": 0.8001, "step": 16132 }, { "epoch": 1.0930957381936446, "grad_norm": 5.254332542419434, "learning_rate": 7.894174823738792e-05, "loss": 0.735, "step": 16133 }, { "epoch": 1.0931634934616166, "grad_norm": 6.4383864402771, "learning_rate": 7.89403792182901e-05, "loss": 0.7279, "step": 16134 }, { "epoch": 1.0932312487295888, "grad_norm": 5.197512626647949, "learning_rate": 7.893901019919229e-05, "loss": 0.5941, "step": 16135 }, { "epoch": 1.0932990039975607, "grad_norm": 8.93490982055664, "learning_rate": 7.893764118009447e-05, "loss": 0.9157, "step": 16136 }, { "epoch": 1.093366759265533, "grad_norm": 5.834399223327637, "learning_rate": 7.893627216099665e-05, "loss": 0.6377, "step": 16137 }, { "epoch": 1.093434514533505, "grad_norm": 7.221981048583984, "learning_rate": 7.893490314189883e-05, "loss": 0.8778, "step": 16138 }, { "epoch": 1.0935022698014771, "grad_norm": 8.59212875366211, "learning_rate": 7.893353412280101e-05, "loss": 0.6377, "step": 16139 }, { "epoch": 1.0935700250694491, "grad_norm": 5.231501579284668, "learning_rate": 7.89321651037032e-05, "loss": 0.609, "step": 16140 }, { "epoch": 1.0936377803374213, "grad_norm": 6.0066447257995605, "learning_rate": 7.893079608460539e-05, "loss": 0.6786, "step": 16141 }, { "epoch": 1.0937055356053933, "grad_norm": 5.8601861000061035, "learning_rate": 7.892942706550757e-05, "loss": 0.7069, "step": 16142 }, { "epoch": 1.0937732908733655, "grad_norm": 5.470623016357422, "learning_rate": 7.892805804640975e-05, "loss": 0.6973, "step": 16143 }, { "epoch": 1.0938410461413375, "grad_norm": 6.8463454246521, "learning_rate": 7.892668902731194e-05, "loss": 0.6182, "step": 16144 }, { "epoch": 1.0939088014093095, "grad_norm": 4.355232238769531, "learning_rate": 7.892532000821412e-05, "loss": 0.6908, "step": 16145 }, { "epoch": 1.0939765566772817, "grad_norm": 7.3813676834106445, "learning_rate": 7.89239509891163e-05, "loss": 0.6964, "step": 16146 }, { "epoch": 1.0940443119452536, "grad_norm": 5.615843296051025, "learning_rate": 7.892258197001848e-05, "loss": 0.7703, "step": 16147 }, { "epoch": 1.0941120672132258, "grad_norm": 4.897951602935791, "learning_rate": 7.892121295092066e-05, "loss": 0.7212, "step": 16148 }, { "epoch": 1.0941798224811978, "grad_norm": 6.01582670211792, "learning_rate": 7.891984393182286e-05, "loss": 0.6612, "step": 16149 }, { "epoch": 1.09424757774917, "grad_norm": 6.209997653961182, "learning_rate": 7.891847491272504e-05, "loss": 0.7842, "step": 16150 }, { "epoch": 1.094315333017142, "grad_norm": 5.317853927612305, "learning_rate": 7.891710589362722e-05, "loss": 0.7604, "step": 16151 }, { "epoch": 1.0943830882851142, "grad_norm": 4.876063823699951, "learning_rate": 7.89157368745294e-05, "loss": 0.6274, "step": 16152 }, { "epoch": 1.0944508435530862, "grad_norm": 5.609501361846924, "learning_rate": 7.891436785543159e-05, "loss": 0.7662, "step": 16153 }, { "epoch": 1.0945185988210584, "grad_norm": 5.9666852951049805, "learning_rate": 7.891299883633377e-05, "loss": 0.7864, "step": 16154 }, { "epoch": 1.0945863540890304, "grad_norm": 4.681065559387207, "learning_rate": 7.891162981723595e-05, "loss": 0.727, "step": 16155 }, { "epoch": 1.0946541093570026, "grad_norm": 6.903472423553467, "learning_rate": 7.891026079813813e-05, "loss": 0.894, "step": 16156 }, { "epoch": 1.0947218646249746, "grad_norm": 6.2425408363342285, "learning_rate": 7.890889177904031e-05, "loss": 0.658, "step": 16157 }, { "epoch": 1.0947896198929468, "grad_norm": 5.621673583984375, "learning_rate": 7.890752275994251e-05, "loss": 0.675, "step": 16158 }, { "epoch": 1.0948573751609187, "grad_norm": 6.081178665161133, "learning_rate": 7.890615374084469e-05, "loss": 0.5955, "step": 16159 }, { "epoch": 1.094925130428891, "grad_norm": 6.864749431610107, "learning_rate": 7.890478472174687e-05, "loss": 0.5638, "step": 16160 }, { "epoch": 1.094992885696863, "grad_norm": 5.215126037597656, "learning_rate": 7.890341570264905e-05, "loss": 0.6291, "step": 16161 }, { "epoch": 1.0950606409648351, "grad_norm": 7.269073963165283, "learning_rate": 7.890204668355123e-05, "loss": 0.9472, "step": 16162 }, { "epoch": 1.095128396232807, "grad_norm": 6.205016613006592, "learning_rate": 7.890067766445342e-05, "loss": 0.5892, "step": 16163 }, { "epoch": 1.095196151500779, "grad_norm": 5.993995189666748, "learning_rate": 7.88993086453556e-05, "loss": 0.6912, "step": 16164 }, { "epoch": 1.0952639067687513, "grad_norm": 4.524845600128174, "learning_rate": 7.889793962625778e-05, "loss": 0.5425, "step": 16165 }, { "epoch": 1.0953316620367233, "grad_norm": 6.5391364097595215, "learning_rate": 7.889657060715996e-05, "loss": 0.7429, "step": 16166 }, { "epoch": 1.0953994173046955, "grad_norm": 6.908164978027344, "learning_rate": 7.889520158806216e-05, "loss": 0.6332, "step": 16167 }, { "epoch": 1.0954671725726675, "grad_norm": 6.190462112426758, "learning_rate": 7.889383256896434e-05, "loss": 0.8123, "step": 16168 }, { "epoch": 1.0955349278406397, "grad_norm": 5.4493207931518555, "learning_rate": 7.889246354986652e-05, "loss": 0.6539, "step": 16169 }, { "epoch": 1.0956026831086116, "grad_norm": 5.316275119781494, "learning_rate": 7.889109453076871e-05, "loss": 0.5694, "step": 16170 }, { "epoch": 1.0956704383765838, "grad_norm": 8.243550300598145, "learning_rate": 7.888972551167089e-05, "loss": 1.0688, "step": 16171 }, { "epoch": 1.0957381936445558, "grad_norm": 5.587428092956543, "learning_rate": 7.888835649257307e-05, "loss": 0.7348, "step": 16172 }, { "epoch": 1.095805948912528, "grad_norm": 6.552480697631836, "learning_rate": 7.888698747347527e-05, "loss": 0.5606, "step": 16173 }, { "epoch": 1.0958737041805, "grad_norm": 5.394407272338867, "learning_rate": 7.888561845437745e-05, "loss": 0.6762, "step": 16174 }, { "epoch": 1.0959414594484722, "grad_norm": 9.323013305664062, "learning_rate": 7.888424943527963e-05, "loss": 0.6987, "step": 16175 }, { "epoch": 1.0960092147164442, "grad_norm": 4.646063327789307, "learning_rate": 7.888288041618182e-05, "loss": 0.4748, "step": 16176 }, { "epoch": 1.0960769699844164, "grad_norm": 8.358293533325195, "learning_rate": 7.8881511397084e-05, "loss": 0.6064, "step": 16177 }, { "epoch": 1.0961447252523884, "grad_norm": 8.130820274353027, "learning_rate": 7.888014237798618e-05, "loss": 0.7405, "step": 16178 }, { "epoch": 1.0962124805203604, "grad_norm": 5.757419109344482, "learning_rate": 7.887877335888836e-05, "loss": 0.7595, "step": 16179 }, { "epoch": 1.0962802357883326, "grad_norm": 4.9174580574035645, "learning_rate": 7.887740433979054e-05, "loss": 0.5362, "step": 16180 }, { "epoch": 1.0963479910563045, "grad_norm": 6.0252604484558105, "learning_rate": 7.887603532069274e-05, "loss": 0.6579, "step": 16181 }, { "epoch": 1.0964157463242767, "grad_norm": 6.813238620758057, "learning_rate": 7.887466630159492e-05, "loss": 0.9755, "step": 16182 }, { "epoch": 1.0964835015922487, "grad_norm": 6.028433322906494, "learning_rate": 7.88732972824971e-05, "loss": 0.789, "step": 16183 }, { "epoch": 1.096551256860221, "grad_norm": 4.869696617126465, "learning_rate": 7.887192826339928e-05, "loss": 0.6194, "step": 16184 }, { "epoch": 1.096619012128193, "grad_norm": 5.043181419372559, "learning_rate": 7.887055924430146e-05, "loss": 0.515, "step": 16185 }, { "epoch": 1.096686767396165, "grad_norm": 5.320845127105713, "learning_rate": 7.886919022520365e-05, "loss": 0.5807, "step": 16186 }, { "epoch": 1.096754522664137, "grad_norm": 6.676725387573242, "learning_rate": 7.886782120610583e-05, "loss": 0.8363, "step": 16187 }, { "epoch": 1.0968222779321093, "grad_norm": 5.5581464767456055, "learning_rate": 7.886645218700801e-05, "loss": 0.7815, "step": 16188 }, { "epoch": 1.0968900332000813, "grad_norm": 6.747057914733887, "learning_rate": 7.886508316791019e-05, "loss": 0.781, "step": 16189 }, { "epoch": 1.0969577884680535, "grad_norm": 5.670258522033691, "learning_rate": 7.886371414881239e-05, "loss": 0.7192, "step": 16190 }, { "epoch": 1.0970255437360255, "grad_norm": 6.418565273284912, "learning_rate": 7.886234512971457e-05, "loss": 0.7151, "step": 16191 }, { "epoch": 1.0970932990039977, "grad_norm": 6.145472526550293, "learning_rate": 7.886097611061675e-05, "loss": 0.738, "step": 16192 }, { "epoch": 1.0971610542719696, "grad_norm": 6.247579574584961, "learning_rate": 7.885960709151893e-05, "loss": 0.7107, "step": 16193 }, { "epoch": 1.0972288095399416, "grad_norm": 6.627540111541748, "learning_rate": 7.885823807242111e-05, "loss": 0.6838, "step": 16194 }, { "epoch": 1.0972965648079138, "grad_norm": 5.86225700378418, "learning_rate": 7.88568690533233e-05, "loss": 0.7547, "step": 16195 }, { "epoch": 1.0973643200758858, "grad_norm": 5.816222667694092, "learning_rate": 7.885550003422548e-05, "loss": 0.5167, "step": 16196 }, { "epoch": 1.097432075343858, "grad_norm": 4.933455467224121, "learning_rate": 7.885413101512766e-05, "loss": 0.6946, "step": 16197 }, { "epoch": 1.09749983061183, "grad_norm": 4.601310729980469, "learning_rate": 7.885276199602984e-05, "loss": 0.6676, "step": 16198 }, { "epoch": 1.0975675858798022, "grad_norm": 6.967305660247803, "learning_rate": 7.885139297693204e-05, "loss": 0.7414, "step": 16199 }, { "epoch": 1.0976353411477742, "grad_norm": 5.442778587341309, "learning_rate": 7.885002395783422e-05, "loss": 0.5816, "step": 16200 }, { "epoch": 1.0977030964157464, "grad_norm": 6.803948879241943, "learning_rate": 7.88486549387364e-05, "loss": 0.6741, "step": 16201 }, { "epoch": 1.0977708516837184, "grad_norm": 4.722171783447266, "learning_rate": 7.884728591963858e-05, "loss": 0.7071, "step": 16202 }, { "epoch": 1.0978386069516906, "grad_norm": 6.338668346405029, "learning_rate": 7.884591690054076e-05, "loss": 0.7145, "step": 16203 }, { "epoch": 1.0979063622196625, "grad_norm": 6.372475624084473, "learning_rate": 7.884454788144295e-05, "loss": 0.8761, "step": 16204 }, { "epoch": 1.0979741174876347, "grad_norm": 5.98175048828125, "learning_rate": 7.884317886234513e-05, "loss": 0.7052, "step": 16205 }, { "epoch": 1.0980418727556067, "grad_norm": 7.220608711242676, "learning_rate": 7.884180984324731e-05, "loss": 0.7702, "step": 16206 }, { "epoch": 1.098109628023579, "grad_norm": 4.378786563873291, "learning_rate": 7.884044082414949e-05, "loss": 0.5409, "step": 16207 }, { "epoch": 1.098177383291551, "grad_norm": 4.2132978439331055, "learning_rate": 7.883907180505169e-05, "loss": 0.6008, "step": 16208 }, { "epoch": 1.098245138559523, "grad_norm": 5.9434590339660645, "learning_rate": 7.883770278595387e-05, "loss": 0.6274, "step": 16209 }, { "epoch": 1.098312893827495, "grad_norm": 4.109735012054443, "learning_rate": 7.883633376685605e-05, "loss": 0.7516, "step": 16210 }, { "epoch": 1.0983806490954673, "grad_norm": 5.5988006591796875, "learning_rate": 7.883496474775823e-05, "loss": 0.6469, "step": 16211 }, { "epoch": 1.0984484043634393, "grad_norm": 5.006146430969238, "learning_rate": 7.883359572866041e-05, "loss": 0.7183, "step": 16212 }, { "epoch": 1.0985161596314112, "grad_norm": 5.305348873138428, "learning_rate": 7.88322267095626e-05, "loss": 0.6361, "step": 16213 }, { "epoch": 1.0985839148993835, "grad_norm": 5.728710174560547, "learning_rate": 7.883085769046478e-05, "loss": 0.8569, "step": 16214 }, { "epoch": 1.0986516701673554, "grad_norm": 11.328938484191895, "learning_rate": 7.882948867136696e-05, "loss": 0.5037, "step": 16215 }, { "epoch": 1.0987194254353276, "grad_norm": 4.791298866271973, "learning_rate": 7.882811965226916e-05, "loss": 0.6317, "step": 16216 }, { "epoch": 1.0987871807032996, "grad_norm": 4.028521537780762, "learning_rate": 7.882675063317134e-05, "loss": 0.5422, "step": 16217 }, { "epoch": 1.0988549359712718, "grad_norm": 5.723876953125, "learning_rate": 7.882538161407352e-05, "loss": 0.8359, "step": 16218 }, { "epoch": 1.0989226912392438, "grad_norm": 4.189463138580322, "learning_rate": 7.882401259497571e-05, "loss": 0.5966, "step": 16219 }, { "epoch": 1.098990446507216, "grad_norm": 5.877222061157227, "learning_rate": 7.882264357587789e-05, "loss": 0.5115, "step": 16220 }, { "epoch": 1.099058201775188, "grad_norm": 6.062419891357422, "learning_rate": 7.882127455678007e-05, "loss": 0.6709, "step": 16221 }, { "epoch": 1.0991259570431602, "grad_norm": 8.064435958862305, "learning_rate": 7.881990553768227e-05, "loss": 0.8518, "step": 16222 }, { "epoch": 1.0991937123111322, "grad_norm": 5.81643533706665, "learning_rate": 7.881853651858445e-05, "loss": 0.7286, "step": 16223 }, { "epoch": 1.0992614675791044, "grad_norm": 5.727938652038574, "learning_rate": 7.881716749948663e-05, "loss": 0.8333, "step": 16224 }, { "epoch": 1.0993292228470763, "grad_norm": 8.810190200805664, "learning_rate": 7.88157984803888e-05, "loss": 0.9136, "step": 16225 }, { "epoch": 1.0993969781150486, "grad_norm": 6.392600059509277, "learning_rate": 7.881442946129099e-05, "loss": 0.8322, "step": 16226 }, { "epoch": 1.0994647333830205, "grad_norm": 6.082714080810547, "learning_rate": 7.881306044219318e-05, "loss": 0.649, "step": 16227 }, { "epoch": 1.0995324886509925, "grad_norm": 6.840407848358154, "learning_rate": 7.881169142309536e-05, "loss": 0.5934, "step": 16228 }, { "epoch": 1.0996002439189647, "grad_norm": 6.920519828796387, "learning_rate": 7.881032240399754e-05, "loss": 0.9076, "step": 16229 }, { "epoch": 1.0996679991869367, "grad_norm": 5.431446075439453, "learning_rate": 7.880895338489972e-05, "loss": 0.6394, "step": 16230 }, { "epoch": 1.099735754454909, "grad_norm": 7.608739376068115, "learning_rate": 7.880758436580192e-05, "loss": 0.7753, "step": 16231 }, { "epoch": 1.0998035097228809, "grad_norm": 6.12460470199585, "learning_rate": 7.88062153467041e-05, "loss": 0.7162, "step": 16232 }, { "epoch": 1.099871264990853, "grad_norm": 6.770415782928467, "learning_rate": 7.880484632760628e-05, "loss": 0.842, "step": 16233 }, { "epoch": 1.099939020258825, "grad_norm": 5.251020908355713, "learning_rate": 7.880347730850846e-05, "loss": 0.5031, "step": 16234 }, { "epoch": 1.1000067755267973, "grad_norm": 6.684656620025635, "learning_rate": 7.880210828941064e-05, "loss": 0.6369, "step": 16235 }, { "epoch": 1.1000745307947692, "grad_norm": 6.140908241271973, "learning_rate": 7.880073927031283e-05, "loss": 0.6889, "step": 16236 }, { "epoch": 1.1001422860627414, "grad_norm": 6.55711555480957, "learning_rate": 7.879937025121501e-05, "loss": 0.6736, "step": 16237 }, { "epoch": 1.1002100413307134, "grad_norm": 5.948589324951172, "learning_rate": 7.879800123211719e-05, "loss": 0.6249, "step": 16238 }, { "epoch": 1.1002777965986856, "grad_norm": 5.505045413970947, "learning_rate": 7.879663221301937e-05, "loss": 0.7244, "step": 16239 }, { "epoch": 1.1003455518666576, "grad_norm": 4.607968330383301, "learning_rate": 7.879526319392155e-05, "loss": 0.9175, "step": 16240 }, { "epoch": 1.1004133071346298, "grad_norm": 6.939621448516846, "learning_rate": 7.879389417482375e-05, "loss": 0.7586, "step": 16241 }, { "epoch": 1.1004810624026018, "grad_norm": 6.131583213806152, "learning_rate": 7.879252515572593e-05, "loss": 0.6007, "step": 16242 }, { "epoch": 1.1005488176705738, "grad_norm": 5.63719367980957, "learning_rate": 7.879115613662811e-05, "loss": 0.9402, "step": 16243 }, { "epoch": 1.100616572938546, "grad_norm": 6.878543376922607, "learning_rate": 7.878978711753029e-05, "loss": 0.5446, "step": 16244 }, { "epoch": 1.100684328206518, "grad_norm": 5.816869735717773, "learning_rate": 7.878841809843248e-05, "loss": 0.5093, "step": 16245 }, { "epoch": 1.1007520834744902, "grad_norm": 7.57546329498291, "learning_rate": 7.878704907933466e-05, "loss": 0.8181, "step": 16246 }, { "epoch": 1.1008198387424621, "grad_norm": 6.603987216949463, "learning_rate": 7.878568006023684e-05, "loss": 0.9014, "step": 16247 }, { "epoch": 1.1008875940104343, "grad_norm": 5.182919025421143, "learning_rate": 7.878431104113902e-05, "loss": 0.5691, "step": 16248 }, { "epoch": 1.1009553492784063, "grad_norm": 4.985978126525879, "learning_rate": 7.87829420220412e-05, "loss": 0.6697, "step": 16249 }, { "epoch": 1.1010231045463785, "grad_norm": 5.756229400634766, "learning_rate": 7.87815730029434e-05, "loss": 0.7705, "step": 16250 }, { "epoch": 1.1010908598143505, "grad_norm": 5.34890079498291, "learning_rate": 7.878020398384558e-05, "loss": 0.8372, "step": 16251 }, { "epoch": 1.1011586150823227, "grad_norm": 5.433262825012207, "learning_rate": 7.877883496474776e-05, "loss": 0.8024, "step": 16252 }, { "epoch": 1.1012263703502947, "grad_norm": 6.8542256355285645, "learning_rate": 7.877746594564994e-05, "loss": 0.6715, "step": 16253 }, { "epoch": 1.101294125618267, "grad_norm": 6.2984843254089355, "learning_rate": 7.877609692655213e-05, "loss": 0.9062, "step": 16254 }, { "epoch": 1.1013618808862389, "grad_norm": 3.8605449199676514, "learning_rate": 7.877472790745431e-05, "loss": 0.4797, "step": 16255 }, { "epoch": 1.101429636154211, "grad_norm": 6.800119400024414, "learning_rate": 7.877335888835649e-05, "loss": 0.7336, "step": 16256 }, { "epoch": 1.101497391422183, "grad_norm": 5.705935478210449, "learning_rate": 7.877198986925867e-05, "loss": 0.7437, "step": 16257 }, { "epoch": 1.1015651466901553, "grad_norm": 5.928062915802002, "learning_rate": 7.877062085016085e-05, "loss": 0.7036, "step": 16258 }, { "epoch": 1.1016329019581272, "grad_norm": 5.317257881164551, "learning_rate": 7.876925183106305e-05, "loss": 0.6604, "step": 16259 }, { "epoch": 1.1017006572260994, "grad_norm": 7.342348575592041, "learning_rate": 7.876788281196523e-05, "loss": 0.8121, "step": 16260 }, { "epoch": 1.1017684124940714, "grad_norm": 6.198090553283691, "learning_rate": 7.876651379286741e-05, "loss": 0.6446, "step": 16261 }, { "epoch": 1.1018361677620434, "grad_norm": 6.285484790802002, "learning_rate": 7.87651447737696e-05, "loss": 0.6862, "step": 16262 }, { "epoch": 1.1019039230300156, "grad_norm": 6.3264994621276855, "learning_rate": 7.876377575467178e-05, "loss": 0.5654, "step": 16263 }, { "epoch": 1.1019716782979876, "grad_norm": 4.603121280670166, "learning_rate": 7.876240673557396e-05, "loss": 0.799, "step": 16264 }, { "epoch": 1.1020394335659598, "grad_norm": 8.007383346557617, "learning_rate": 7.876103771647616e-05, "loss": 0.8962, "step": 16265 }, { "epoch": 1.1021071888339318, "grad_norm": 5.806975841522217, "learning_rate": 7.875966869737834e-05, "loss": 0.7357, "step": 16266 }, { "epoch": 1.102174944101904, "grad_norm": 5.703726291656494, "learning_rate": 7.875829967828052e-05, "loss": 0.7392, "step": 16267 }, { "epoch": 1.102242699369876, "grad_norm": 5.793273448944092, "learning_rate": 7.875693065918271e-05, "loss": 0.7173, "step": 16268 }, { "epoch": 1.1023104546378482, "grad_norm": 6.515778541564941, "learning_rate": 7.875556164008489e-05, "loss": 0.6591, "step": 16269 }, { "epoch": 1.1023782099058201, "grad_norm": 7.7042059898376465, "learning_rate": 7.875419262098707e-05, "loss": 0.9228, "step": 16270 }, { "epoch": 1.1024459651737923, "grad_norm": 4.594401836395264, "learning_rate": 7.875282360188925e-05, "loss": 0.5748, "step": 16271 }, { "epoch": 1.1025137204417643, "grad_norm": 8.685628890991211, "learning_rate": 7.875145458279143e-05, "loss": 0.7174, "step": 16272 }, { "epoch": 1.1025814757097365, "grad_norm": 5.300126075744629, "learning_rate": 7.875008556369363e-05, "loss": 0.6349, "step": 16273 }, { "epoch": 1.1026492309777085, "grad_norm": 5.215007305145264, "learning_rate": 7.87487165445958e-05, "loss": 0.5969, "step": 16274 }, { "epoch": 1.1027169862456807, "grad_norm": 6.813204765319824, "learning_rate": 7.874734752549799e-05, "loss": 0.7632, "step": 16275 }, { "epoch": 1.1027847415136527, "grad_norm": 5.5026631355285645, "learning_rate": 7.874597850640017e-05, "loss": 0.6924, "step": 16276 }, { "epoch": 1.1028524967816247, "grad_norm": 7.050729274749756, "learning_rate": 7.874460948730236e-05, "loss": 0.8821, "step": 16277 }, { "epoch": 1.1029202520495969, "grad_norm": 6.8287811279296875, "learning_rate": 7.874324046820454e-05, "loss": 0.7282, "step": 16278 }, { "epoch": 1.1029880073175689, "grad_norm": 5.6110687255859375, "learning_rate": 7.874187144910672e-05, "loss": 0.5387, "step": 16279 }, { "epoch": 1.103055762585541, "grad_norm": 6.597656726837158, "learning_rate": 7.87405024300089e-05, "loss": 0.9386, "step": 16280 }, { "epoch": 1.103123517853513, "grad_norm": 6.492720603942871, "learning_rate": 7.873913341091108e-05, "loss": 0.5659, "step": 16281 }, { "epoch": 1.1031912731214852, "grad_norm": 5.881611347198486, "learning_rate": 7.873776439181328e-05, "loss": 0.5639, "step": 16282 }, { "epoch": 1.1032590283894572, "grad_norm": 5.611779689788818, "learning_rate": 7.873639537271546e-05, "loss": 0.6953, "step": 16283 }, { "epoch": 1.1033267836574294, "grad_norm": 7.153480529785156, "learning_rate": 7.873502635361764e-05, "loss": 0.9283, "step": 16284 }, { "epoch": 1.1033945389254014, "grad_norm": 6.058135986328125, "learning_rate": 7.873365733451982e-05, "loss": 0.7362, "step": 16285 }, { "epoch": 1.1034622941933736, "grad_norm": 5.711484432220459, "learning_rate": 7.873228831542201e-05, "loss": 0.7727, "step": 16286 }, { "epoch": 1.1035300494613456, "grad_norm": 4.450558662414551, "learning_rate": 7.873091929632419e-05, "loss": 0.687, "step": 16287 }, { "epoch": 1.1035978047293178, "grad_norm": 6.392914772033691, "learning_rate": 7.872955027722637e-05, "loss": 0.7404, "step": 16288 }, { "epoch": 1.1036655599972898, "grad_norm": 7.236051082611084, "learning_rate": 7.872818125812855e-05, "loss": 0.8772, "step": 16289 }, { "epoch": 1.103733315265262, "grad_norm": 5.396810054779053, "learning_rate": 7.872681223903073e-05, "loss": 0.7059, "step": 16290 }, { "epoch": 1.103801070533234, "grad_norm": 6.475565433502197, "learning_rate": 7.872544321993293e-05, "loss": 0.7454, "step": 16291 }, { "epoch": 1.103868825801206, "grad_norm": 7.763959884643555, "learning_rate": 7.87240742008351e-05, "loss": 0.8434, "step": 16292 }, { "epoch": 1.1039365810691781, "grad_norm": 5.478339195251465, "learning_rate": 7.872270518173729e-05, "loss": 0.665, "step": 16293 }, { "epoch": 1.1040043363371501, "grad_norm": 6.564233779907227, "learning_rate": 7.872133616263947e-05, "loss": 0.8856, "step": 16294 }, { "epoch": 1.1040720916051223, "grad_norm": 5.313308238983154, "learning_rate": 7.871996714354165e-05, "loss": 0.5264, "step": 16295 }, { "epoch": 1.1041398468730943, "grad_norm": 5.785020351409912, "learning_rate": 7.871859812444384e-05, "loss": 0.5262, "step": 16296 }, { "epoch": 1.1042076021410665, "grad_norm": 4.397270679473877, "learning_rate": 7.871722910534602e-05, "loss": 0.5755, "step": 16297 }, { "epoch": 1.1042753574090385, "grad_norm": 5.507979393005371, "learning_rate": 7.87158600862482e-05, "loss": 0.6667, "step": 16298 }, { "epoch": 1.1043431126770107, "grad_norm": 6.59631872177124, "learning_rate": 7.871449106715038e-05, "loss": 0.6135, "step": 16299 }, { "epoch": 1.1044108679449827, "grad_norm": 5.889465808868408, "learning_rate": 7.871312204805258e-05, "loss": 0.6328, "step": 16300 }, { "epoch": 1.1044786232129549, "grad_norm": 5.357670307159424, "learning_rate": 7.871175302895476e-05, "loss": 0.8888, "step": 16301 }, { "epoch": 1.1045463784809268, "grad_norm": 5.813202381134033, "learning_rate": 7.871038400985694e-05, "loss": 0.8759, "step": 16302 }, { "epoch": 1.104614133748899, "grad_norm": 5.886719226837158, "learning_rate": 7.870901499075912e-05, "loss": 0.8037, "step": 16303 }, { "epoch": 1.104681889016871, "grad_norm": 5.701767444610596, "learning_rate": 7.87076459716613e-05, "loss": 0.7458, "step": 16304 }, { "epoch": 1.1047496442848432, "grad_norm": 4.6818413734436035, "learning_rate": 7.870627695256349e-05, "loss": 0.5317, "step": 16305 }, { "epoch": 1.1048173995528152, "grad_norm": 5.039698123931885, "learning_rate": 7.870490793346567e-05, "loss": 0.6784, "step": 16306 }, { "epoch": 1.1048851548207874, "grad_norm": 8.11133861541748, "learning_rate": 7.870353891436785e-05, "loss": 0.5449, "step": 16307 }, { "epoch": 1.1049529100887594, "grad_norm": 6.868621826171875, "learning_rate": 7.870216989527005e-05, "loss": 0.6633, "step": 16308 }, { "epoch": 1.1050206653567316, "grad_norm": 5.920335292816162, "learning_rate": 7.870080087617223e-05, "loss": 0.7819, "step": 16309 }, { "epoch": 1.1050884206247036, "grad_norm": 5.8382978439331055, "learning_rate": 7.86994318570744e-05, "loss": 0.7254, "step": 16310 }, { "epoch": 1.1051561758926756, "grad_norm": 6.541285514831543, "learning_rate": 7.86980628379766e-05, "loss": 0.6517, "step": 16311 }, { "epoch": 1.1052239311606478, "grad_norm": 6.541675567626953, "learning_rate": 7.869669381887878e-05, "loss": 0.5173, "step": 16312 }, { "epoch": 1.1052916864286197, "grad_norm": 8.50218677520752, "learning_rate": 7.869532479978096e-05, "loss": 0.6266, "step": 16313 }, { "epoch": 1.105359441696592, "grad_norm": 4.665585517883301, "learning_rate": 7.869395578068315e-05, "loss": 0.6517, "step": 16314 }, { "epoch": 1.105427196964564, "grad_norm": 6.082878112792969, "learning_rate": 7.869258676158534e-05, "loss": 0.9384, "step": 16315 }, { "epoch": 1.1054949522325361, "grad_norm": 6.00462532043457, "learning_rate": 7.869121774248752e-05, "loss": 0.7451, "step": 16316 }, { "epoch": 1.1055627075005081, "grad_norm": 6.826976776123047, "learning_rate": 7.86898487233897e-05, "loss": 0.8636, "step": 16317 }, { "epoch": 1.1056304627684803, "grad_norm": 6.036766052246094, "learning_rate": 7.868847970429188e-05, "loss": 0.6864, "step": 16318 }, { "epoch": 1.1056982180364523, "grad_norm": 5.662924766540527, "learning_rate": 7.868711068519407e-05, "loss": 0.6582, "step": 16319 }, { "epoch": 1.1057659733044245, "grad_norm": 5.307290077209473, "learning_rate": 7.868574166609625e-05, "loss": 0.6985, "step": 16320 }, { "epoch": 1.1058337285723965, "grad_norm": 6.92588472366333, "learning_rate": 7.868437264699843e-05, "loss": 0.799, "step": 16321 }, { "epoch": 1.1059014838403687, "grad_norm": 7.620028972625732, "learning_rate": 7.868300362790061e-05, "loss": 0.7356, "step": 16322 }, { "epoch": 1.1059692391083407, "grad_norm": 8.529465675354004, "learning_rate": 7.86816346088028e-05, "loss": 0.5915, "step": 16323 }, { "epoch": 1.1060369943763129, "grad_norm": 5.126700401306152, "learning_rate": 7.868026558970499e-05, "loss": 0.66, "step": 16324 }, { "epoch": 1.1061047496442848, "grad_norm": 5.512331962585449, "learning_rate": 7.867889657060717e-05, "loss": 0.804, "step": 16325 }, { "epoch": 1.1061725049122568, "grad_norm": 5.728443145751953, "learning_rate": 7.867752755150935e-05, "loss": 0.6654, "step": 16326 }, { "epoch": 1.106240260180229, "grad_norm": 4.493624210357666, "learning_rate": 7.867615853241153e-05, "loss": 0.5286, "step": 16327 }, { "epoch": 1.106308015448201, "grad_norm": 5.167482376098633, "learning_rate": 7.867478951331372e-05, "loss": 0.6084, "step": 16328 }, { "epoch": 1.1063757707161732, "grad_norm": 5.0305657386779785, "learning_rate": 7.86734204942159e-05, "loss": 0.8362, "step": 16329 }, { "epoch": 1.1064435259841452, "grad_norm": 5.162072658538818, "learning_rate": 7.867205147511808e-05, "loss": 0.8681, "step": 16330 }, { "epoch": 1.1065112812521174, "grad_norm": 6.101891040802002, "learning_rate": 7.867068245602026e-05, "loss": 0.7332, "step": 16331 }, { "epoch": 1.1065790365200894, "grad_norm": 6.320982456207275, "learning_rate": 7.866931343692246e-05, "loss": 0.9075, "step": 16332 }, { "epoch": 1.1066467917880616, "grad_norm": 5.854201316833496, "learning_rate": 7.866794441782464e-05, "loss": 0.5638, "step": 16333 }, { "epoch": 1.1067145470560336, "grad_norm": 5.276032447814941, "learning_rate": 7.866657539872682e-05, "loss": 0.6531, "step": 16334 }, { "epoch": 1.1067823023240058, "grad_norm": 8.056017875671387, "learning_rate": 7.8665206379629e-05, "loss": 0.5859, "step": 16335 }, { "epoch": 1.1068500575919777, "grad_norm": 5.445096492767334, "learning_rate": 7.866383736053118e-05, "loss": 0.8072, "step": 16336 }, { "epoch": 1.10691781285995, "grad_norm": 9.058344841003418, "learning_rate": 7.866246834143337e-05, "loss": 0.9842, "step": 16337 }, { "epoch": 1.106985568127922, "grad_norm": 5.209854602813721, "learning_rate": 7.866109932233555e-05, "loss": 0.6562, "step": 16338 }, { "epoch": 1.1070533233958941, "grad_norm": 4.583771705627441, "learning_rate": 7.865973030323773e-05, "loss": 0.6274, "step": 16339 }, { "epoch": 1.107121078663866, "grad_norm": 5.5320940017700195, "learning_rate": 7.865836128413991e-05, "loss": 0.6254, "step": 16340 }, { "epoch": 1.107188833931838, "grad_norm": 5.773438930511475, "learning_rate": 7.86569922650421e-05, "loss": 0.8155, "step": 16341 }, { "epoch": 1.1072565891998103, "grad_norm": 5.824255466461182, "learning_rate": 7.865562324594429e-05, "loss": 0.6921, "step": 16342 }, { "epoch": 1.1073243444677823, "grad_norm": 7.974730014801025, "learning_rate": 7.865425422684647e-05, "loss": 0.7548, "step": 16343 }, { "epoch": 1.1073920997357545, "grad_norm": 8.403234481811523, "learning_rate": 7.865288520774865e-05, "loss": 0.6378, "step": 16344 }, { "epoch": 1.1074598550037265, "grad_norm": 6.035255432128906, "learning_rate": 7.865151618865083e-05, "loss": 0.9632, "step": 16345 }, { "epoch": 1.1075276102716987, "grad_norm": 4.658270835876465, "learning_rate": 7.865014716955302e-05, "loss": 0.7039, "step": 16346 }, { "epoch": 1.1075953655396706, "grad_norm": 4.916011333465576, "learning_rate": 7.86487781504552e-05, "loss": 0.5733, "step": 16347 }, { "epoch": 1.1076631208076428, "grad_norm": 5.327943801879883, "learning_rate": 7.864740913135738e-05, "loss": 0.6256, "step": 16348 }, { "epoch": 1.1077308760756148, "grad_norm": 5.153133392333984, "learning_rate": 7.864604011225956e-05, "loss": 0.5463, "step": 16349 }, { "epoch": 1.107798631343587, "grad_norm": 4.027024745941162, "learning_rate": 7.864467109316174e-05, "loss": 0.5824, "step": 16350 }, { "epoch": 1.107866386611559, "grad_norm": 5.886280536651611, "learning_rate": 7.864330207406394e-05, "loss": 0.758, "step": 16351 }, { "epoch": 1.1079341418795312, "grad_norm": 10.420184135437012, "learning_rate": 7.864193305496612e-05, "loss": 0.7657, "step": 16352 }, { "epoch": 1.1080018971475032, "grad_norm": 6.934298515319824, "learning_rate": 7.86405640358683e-05, "loss": 0.579, "step": 16353 }, { "epoch": 1.1080696524154754, "grad_norm": 5.0468573570251465, "learning_rate": 7.863919501677049e-05, "loss": 0.6553, "step": 16354 }, { "epoch": 1.1081374076834474, "grad_norm": 4.991286754608154, "learning_rate": 7.863782599767267e-05, "loss": 0.6482, "step": 16355 }, { "epoch": 1.1082051629514196, "grad_norm": 5.812962055206299, "learning_rate": 7.863645697857485e-05, "loss": 0.7708, "step": 16356 }, { "epoch": 1.1082729182193916, "grad_norm": 6.6987104415893555, "learning_rate": 7.863508795947705e-05, "loss": 0.807, "step": 16357 }, { "epoch": 1.1083406734873638, "grad_norm": 4.859643459320068, "learning_rate": 7.863371894037923e-05, "loss": 0.7066, "step": 16358 }, { "epoch": 1.1084084287553357, "grad_norm": 8.146595001220703, "learning_rate": 7.86323499212814e-05, "loss": 0.7213, "step": 16359 }, { "epoch": 1.1084761840233077, "grad_norm": 5.962488174438477, "learning_rate": 7.86309809021836e-05, "loss": 0.9372, "step": 16360 }, { "epoch": 1.10854393929128, "grad_norm": 6.733558177947998, "learning_rate": 7.862961188308578e-05, "loss": 0.7216, "step": 16361 }, { "epoch": 1.108611694559252, "grad_norm": 6.295005798339844, "learning_rate": 7.862824286398796e-05, "loss": 0.6771, "step": 16362 }, { "epoch": 1.108679449827224, "grad_norm": 5.711909770965576, "learning_rate": 7.862687384489014e-05, "loss": 0.6979, "step": 16363 }, { "epoch": 1.108747205095196, "grad_norm": 7.529008865356445, "learning_rate": 7.862550482579233e-05, "loss": 0.8168, "step": 16364 }, { "epoch": 1.1088149603631683, "grad_norm": 6.682563304901123, "learning_rate": 7.862413580669451e-05, "loss": 0.7012, "step": 16365 }, { "epoch": 1.1088827156311403, "grad_norm": 5.381031513214111, "learning_rate": 7.86227667875967e-05, "loss": 0.6594, "step": 16366 }, { "epoch": 1.1089504708991125, "grad_norm": 5.263582229614258, "learning_rate": 7.862139776849888e-05, "loss": 0.6086, "step": 16367 }, { "epoch": 1.1090182261670845, "grad_norm": 4.777389049530029, "learning_rate": 7.862002874940106e-05, "loss": 0.563, "step": 16368 }, { "epoch": 1.1090859814350567, "grad_norm": 4.717031478881836, "learning_rate": 7.861865973030325e-05, "loss": 0.7422, "step": 16369 }, { "epoch": 1.1091537367030286, "grad_norm": 6.94821310043335, "learning_rate": 7.861729071120543e-05, "loss": 0.8252, "step": 16370 }, { "epoch": 1.1092214919710008, "grad_norm": 6.443717956542969, "learning_rate": 7.861592169210761e-05, "loss": 0.616, "step": 16371 }, { "epoch": 1.1092892472389728, "grad_norm": 5.591299533843994, "learning_rate": 7.861455267300979e-05, "loss": 0.7008, "step": 16372 }, { "epoch": 1.109357002506945, "grad_norm": 6.745190143585205, "learning_rate": 7.861318365391197e-05, "loss": 0.4351, "step": 16373 }, { "epoch": 1.109424757774917, "grad_norm": 4.188071250915527, "learning_rate": 7.861181463481417e-05, "loss": 0.644, "step": 16374 }, { "epoch": 1.109492513042889, "grad_norm": 9.427136421203613, "learning_rate": 7.861044561571635e-05, "loss": 0.6456, "step": 16375 }, { "epoch": 1.1095602683108612, "grad_norm": 6.370525360107422, "learning_rate": 7.860907659661853e-05, "loss": 0.8236, "step": 16376 }, { "epoch": 1.1096280235788332, "grad_norm": 5.648697376251221, "learning_rate": 7.86077075775207e-05, "loss": 0.7623, "step": 16377 }, { "epoch": 1.1096957788468054, "grad_norm": 4.926682472229004, "learning_rate": 7.86063385584229e-05, "loss": 0.5091, "step": 16378 }, { "epoch": 1.1097635341147774, "grad_norm": 5.184731483459473, "learning_rate": 7.860496953932508e-05, "loss": 0.6228, "step": 16379 }, { "epoch": 1.1098312893827496, "grad_norm": 4.412238597869873, "learning_rate": 7.860360052022726e-05, "loss": 0.4859, "step": 16380 }, { "epoch": 1.1098990446507215, "grad_norm": 5.148406505584717, "learning_rate": 7.860223150112944e-05, "loss": 0.6026, "step": 16381 }, { "epoch": 1.1099667999186937, "grad_norm": 6.479032039642334, "learning_rate": 7.860086248203162e-05, "loss": 0.6879, "step": 16382 }, { "epoch": 1.1100345551866657, "grad_norm": 5.107529163360596, "learning_rate": 7.859949346293382e-05, "loss": 0.8316, "step": 16383 }, { "epoch": 1.110102310454638, "grad_norm": 6.092916011810303, "learning_rate": 7.8598124443836e-05, "loss": 0.5405, "step": 16384 }, { "epoch": 1.11017006572261, "grad_norm": 6.275728702545166, "learning_rate": 7.859675542473818e-05, "loss": 0.6238, "step": 16385 }, { "epoch": 1.110237820990582, "grad_norm": 5.813026428222656, "learning_rate": 7.859538640564036e-05, "loss": 0.7023, "step": 16386 }, { "epoch": 1.110305576258554, "grad_norm": 5.524173259735107, "learning_rate": 7.859401738654255e-05, "loss": 0.6626, "step": 16387 }, { "epoch": 1.1103733315265263, "grad_norm": 7.234399318695068, "learning_rate": 7.859264836744473e-05, "loss": 0.5818, "step": 16388 }, { "epoch": 1.1104410867944983, "grad_norm": 5.309559345245361, "learning_rate": 7.859127934834691e-05, "loss": 0.7264, "step": 16389 }, { "epoch": 1.1105088420624702, "grad_norm": 6.291361331939697, "learning_rate": 7.858991032924909e-05, "loss": 0.8169, "step": 16390 }, { "epoch": 1.1105765973304424, "grad_norm": 5.197052955627441, "learning_rate": 7.858854131015127e-05, "loss": 0.7933, "step": 16391 }, { "epoch": 1.1106443525984144, "grad_norm": 6.224545955657959, "learning_rate": 7.858717229105347e-05, "loss": 0.9598, "step": 16392 }, { "epoch": 1.1107121078663866, "grad_norm": 7.769322395324707, "learning_rate": 7.858580327195565e-05, "loss": 0.8685, "step": 16393 }, { "epoch": 1.1107798631343586, "grad_norm": 6.402470588684082, "learning_rate": 7.858443425285783e-05, "loss": 0.8052, "step": 16394 }, { "epoch": 1.1108476184023308, "grad_norm": 5.41104793548584, "learning_rate": 7.858306523376e-05, "loss": 0.6345, "step": 16395 }, { "epoch": 1.1109153736703028, "grad_norm": 6.050509452819824, "learning_rate": 7.85816962146622e-05, "loss": 0.5625, "step": 16396 }, { "epoch": 1.110983128938275, "grad_norm": 5.332095146179199, "learning_rate": 7.858032719556438e-05, "loss": 0.5443, "step": 16397 }, { "epoch": 1.111050884206247, "grad_norm": 5.569375991821289, "learning_rate": 7.857895817646656e-05, "loss": 0.5195, "step": 16398 }, { "epoch": 1.1111186394742192, "grad_norm": 5.020506381988525, "learning_rate": 7.857758915736874e-05, "loss": 0.6129, "step": 16399 }, { "epoch": 1.1111863947421912, "grad_norm": 4.502058029174805, "learning_rate": 7.857622013827092e-05, "loss": 0.6652, "step": 16400 }, { "epoch": 1.1112541500101634, "grad_norm": 5.6798481941223145, "learning_rate": 7.857485111917312e-05, "loss": 0.7755, "step": 16401 }, { "epoch": 1.1113219052781353, "grad_norm": 9.84770679473877, "learning_rate": 7.85734821000753e-05, "loss": 0.786, "step": 16402 }, { "epoch": 1.1113896605461075, "grad_norm": 6.1381378173828125, "learning_rate": 7.857211308097748e-05, "loss": 0.8187, "step": 16403 }, { "epoch": 1.1114574158140795, "grad_norm": 5.588286399841309, "learning_rate": 7.857074406187967e-05, "loss": 0.6508, "step": 16404 }, { "epoch": 1.1115251710820517, "grad_norm": 5.41074275970459, "learning_rate": 7.856937504278185e-05, "loss": 0.6348, "step": 16405 }, { "epoch": 1.1115929263500237, "grad_norm": 6.4471235275268555, "learning_rate": 7.856800602368403e-05, "loss": 0.9603, "step": 16406 }, { "epoch": 1.111660681617996, "grad_norm": 6.701603412628174, "learning_rate": 7.856663700458622e-05, "loss": 0.7937, "step": 16407 }, { "epoch": 1.111728436885968, "grad_norm": 7.06520414352417, "learning_rate": 7.85652679854884e-05, "loss": 0.6763, "step": 16408 }, { "epoch": 1.1117961921539399, "grad_norm": 4.8184404373168945, "learning_rate": 7.856389896639059e-05, "loss": 0.7191, "step": 16409 }, { "epoch": 1.111863947421912, "grad_norm": 5.840653896331787, "learning_rate": 7.856252994729278e-05, "loss": 0.7294, "step": 16410 }, { "epoch": 1.111931702689884, "grad_norm": 5.681267261505127, "learning_rate": 7.856116092819496e-05, "loss": 0.8226, "step": 16411 }, { "epoch": 1.1119994579578563, "grad_norm": 5.182020664215088, "learning_rate": 7.855979190909714e-05, "loss": 0.6697, "step": 16412 }, { "epoch": 1.1120672132258282, "grad_norm": 7.743770599365234, "learning_rate": 7.855842288999932e-05, "loss": 0.8648, "step": 16413 }, { "epoch": 1.1121349684938004, "grad_norm": 4.801437854766846, "learning_rate": 7.85570538709015e-05, "loss": 0.611, "step": 16414 }, { "epoch": 1.1122027237617724, "grad_norm": 4.749513626098633, "learning_rate": 7.85556848518037e-05, "loss": 0.696, "step": 16415 }, { "epoch": 1.1122704790297446, "grad_norm": 6.828696250915527, "learning_rate": 7.855431583270587e-05, "loss": 0.5749, "step": 16416 }, { "epoch": 1.1123382342977166, "grad_norm": 5.405619144439697, "learning_rate": 7.855294681360806e-05, "loss": 0.5387, "step": 16417 }, { "epoch": 1.1124059895656888, "grad_norm": 5.546106338500977, "learning_rate": 7.855157779451024e-05, "loss": 0.6858, "step": 16418 }, { "epoch": 1.1124737448336608, "grad_norm": 5.9285712242126465, "learning_rate": 7.855020877541243e-05, "loss": 0.8244, "step": 16419 }, { "epoch": 1.112541500101633, "grad_norm": 5.90974760055542, "learning_rate": 7.854883975631461e-05, "loss": 0.6853, "step": 16420 }, { "epoch": 1.112609255369605, "grad_norm": 11.681938171386719, "learning_rate": 7.854747073721679e-05, "loss": 0.7719, "step": 16421 }, { "epoch": 1.1126770106375772, "grad_norm": 5.424148082733154, "learning_rate": 7.854610171811897e-05, "loss": 0.6133, "step": 16422 }, { "epoch": 1.1127447659055492, "grad_norm": 6.452503204345703, "learning_rate": 7.854473269902115e-05, "loss": 0.523, "step": 16423 }, { "epoch": 1.1128125211735211, "grad_norm": 11.88731861114502, "learning_rate": 7.854336367992334e-05, "loss": 0.9297, "step": 16424 }, { "epoch": 1.1128802764414933, "grad_norm": 6.110743522644043, "learning_rate": 7.854199466082553e-05, "loss": 0.5357, "step": 16425 }, { "epoch": 1.1129480317094653, "grad_norm": 5.067276477813721, "learning_rate": 7.85406256417277e-05, "loss": 0.5576, "step": 16426 }, { "epoch": 1.1130157869774375, "grad_norm": 6.3760552406311035, "learning_rate": 7.853925662262989e-05, "loss": 0.6453, "step": 16427 }, { "epoch": 1.1130835422454095, "grad_norm": 7.936148166656494, "learning_rate": 7.853788760353207e-05, "loss": 0.8744, "step": 16428 }, { "epoch": 1.1131512975133817, "grad_norm": 4.137428283691406, "learning_rate": 7.853651858443426e-05, "loss": 0.5353, "step": 16429 }, { "epoch": 1.1132190527813537, "grad_norm": 6.208136081695557, "learning_rate": 7.853514956533644e-05, "loss": 0.7925, "step": 16430 }, { "epoch": 1.113286808049326, "grad_norm": 7.687192440032959, "learning_rate": 7.853378054623862e-05, "loss": 0.9704, "step": 16431 }, { "epoch": 1.1133545633172979, "grad_norm": 4.639840602874756, "learning_rate": 7.85324115271408e-05, "loss": 0.6534, "step": 16432 }, { "epoch": 1.11342231858527, "grad_norm": 6.294159889221191, "learning_rate": 7.8531042508043e-05, "loss": 0.8825, "step": 16433 }, { "epoch": 1.113490073853242, "grad_norm": 7.234892845153809, "learning_rate": 7.852967348894518e-05, "loss": 0.6695, "step": 16434 }, { "epoch": 1.1135578291212143, "grad_norm": 7.041029453277588, "learning_rate": 7.852830446984736e-05, "loss": 0.8467, "step": 16435 }, { "epoch": 1.1136255843891862, "grad_norm": 8.180865287780762, "learning_rate": 7.852693545074954e-05, "loss": 0.5544, "step": 16436 }, { "epoch": 1.1136933396571584, "grad_norm": 5.615832328796387, "learning_rate": 7.852556643165172e-05, "loss": 0.788, "step": 16437 }, { "epoch": 1.1137610949251304, "grad_norm": 5.674948692321777, "learning_rate": 7.852419741255391e-05, "loss": 0.6344, "step": 16438 }, { "epoch": 1.1138288501931024, "grad_norm": 5.536764621734619, "learning_rate": 7.852282839345609e-05, "loss": 0.6614, "step": 16439 }, { "epoch": 1.1138966054610746, "grad_norm": 5.49966287612915, "learning_rate": 7.852145937435827e-05, "loss": 0.5425, "step": 16440 }, { "epoch": 1.1139643607290466, "grad_norm": 5.57595157623291, "learning_rate": 7.852009035526045e-05, "loss": 0.5386, "step": 16441 }, { "epoch": 1.1140321159970188, "grad_norm": 4.964458465576172, "learning_rate": 7.851872133616265e-05, "loss": 0.5351, "step": 16442 }, { "epoch": 1.1140998712649908, "grad_norm": 6.3265509605407715, "learning_rate": 7.851735231706483e-05, "loss": 0.6693, "step": 16443 }, { "epoch": 1.114167626532963, "grad_norm": 5.302865505218506, "learning_rate": 7.8515983297967e-05, "loss": 0.6336, "step": 16444 }, { "epoch": 1.114235381800935, "grad_norm": 4.544825553894043, "learning_rate": 7.851461427886919e-05, "loss": 0.5943, "step": 16445 }, { "epoch": 1.1143031370689072, "grad_norm": 5.0601630210876465, "learning_rate": 7.851324525977137e-05, "loss": 0.714, "step": 16446 }, { "epoch": 1.1143708923368791, "grad_norm": 4.90870475769043, "learning_rate": 7.851187624067356e-05, "loss": 0.6078, "step": 16447 }, { "epoch": 1.1144386476048513, "grad_norm": 5.746411323547363, "learning_rate": 7.851050722157574e-05, "loss": 0.5527, "step": 16448 }, { "epoch": 1.1145064028728233, "grad_norm": 5.825742244720459, "learning_rate": 7.850913820247792e-05, "loss": 0.5043, "step": 16449 }, { "epoch": 1.1145741581407955, "grad_norm": 6.97534704208374, "learning_rate": 7.850776918338011e-05, "loss": 0.8069, "step": 16450 }, { "epoch": 1.1146419134087675, "grad_norm": 6.477380752563477, "learning_rate": 7.85064001642823e-05, "loss": 0.6987, "step": 16451 }, { "epoch": 1.1147096686767397, "grad_norm": 5.29969596862793, "learning_rate": 7.850503114518448e-05, "loss": 0.7252, "step": 16452 }, { "epoch": 1.1147774239447117, "grad_norm": 5.611807823181152, "learning_rate": 7.850366212608667e-05, "loss": 0.6814, "step": 16453 }, { "epoch": 1.114845179212684, "grad_norm": 6.4802350997924805, "learning_rate": 7.850229310698885e-05, "loss": 0.4992, "step": 16454 }, { "epoch": 1.1149129344806559, "grad_norm": 5.166659832000732, "learning_rate": 7.850092408789103e-05, "loss": 0.708, "step": 16455 }, { "epoch": 1.114980689748628, "grad_norm": 4.782804012298584, "learning_rate": 7.849955506879322e-05, "loss": 0.7891, "step": 16456 }, { "epoch": 1.1150484450166, "grad_norm": 6.390671730041504, "learning_rate": 7.84981860496954e-05, "loss": 0.731, "step": 16457 }, { "epoch": 1.115116200284572, "grad_norm": 5.91715145111084, "learning_rate": 7.849681703059758e-05, "loss": 0.6395, "step": 16458 }, { "epoch": 1.1151839555525442, "grad_norm": 6.513552665710449, "learning_rate": 7.849544801149977e-05, "loss": 0.809, "step": 16459 }, { "epoch": 1.1152517108205162, "grad_norm": 6.119845390319824, "learning_rate": 7.849407899240195e-05, "loss": 0.6678, "step": 16460 }, { "epoch": 1.1153194660884884, "grad_norm": 4.332753658294678, "learning_rate": 7.849270997330414e-05, "loss": 0.6528, "step": 16461 }, { "epoch": 1.1153872213564604, "grad_norm": 5.871253490447998, "learning_rate": 7.849134095420632e-05, "loss": 0.6437, "step": 16462 }, { "epoch": 1.1154549766244326, "grad_norm": 4.9431047439575195, "learning_rate": 7.84899719351085e-05, "loss": 0.4917, "step": 16463 }, { "epoch": 1.1155227318924046, "grad_norm": 5.625153064727783, "learning_rate": 7.848860291601068e-05, "loss": 0.7017, "step": 16464 }, { "epoch": 1.1155904871603768, "grad_norm": 6.678959369659424, "learning_rate": 7.848723389691287e-05, "loss": 0.7541, "step": 16465 }, { "epoch": 1.1156582424283488, "grad_norm": 5.871247291564941, "learning_rate": 7.848586487781505e-05, "loss": 0.564, "step": 16466 }, { "epoch": 1.115725997696321, "grad_norm": 4.651440143585205, "learning_rate": 7.848449585871723e-05, "loss": 0.5407, "step": 16467 }, { "epoch": 1.115793752964293, "grad_norm": 6.9464521408081055, "learning_rate": 7.848312683961942e-05, "loss": 0.7581, "step": 16468 }, { "epoch": 1.1158615082322652, "grad_norm": 6.172132968902588, "learning_rate": 7.84817578205216e-05, "loss": 0.6855, "step": 16469 }, { "epoch": 1.1159292635002371, "grad_norm": 6.238826274871826, "learning_rate": 7.848038880142379e-05, "loss": 0.5436, "step": 16470 }, { "epoch": 1.1159970187682093, "grad_norm": 7.121433734893799, "learning_rate": 7.847901978232597e-05, "loss": 0.962, "step": 16471 }, { "epoch": 1.1160647740361813, "grad_norm": 6.923501014709473, "learning_rate": 7.847765076322815e-05, "loss": 0.6176, "step": 16472 }, { "epoch": 1.1161325293041533, "grad_norm": 5.844583511352539, "learning_rate": 7.847628174413033e-05, "loss": 0.8946, "step": 16473 }, { "epoch": 1.1162002845721255, "grad_norm": 6.436974048614502, "learning_rate": 7.847491272503252e-05, "loss": 0.7809, "step": 16474 }, { "epoch": 1.1162680398400975, "grad_norm": 5.668829441070557, "learning_rate": 7.84735437059347e-05, "loss": 0.7343, "step": 16475 }, { "epoch": 1.1163357951080697, "grad_norm": 4.371751308441162, "learning_rate": 7.847217468683689e-05, "loss": 0.5913, "step": 16476 }, { "epoch": 1.1164035503760417, "grad_norm": 5.785346031188965, "learning_rate": 7.847080566773907e-05, "loss": 0.5453, "step": 16477 }, { "epoch": 1.1164713056440139, "grad_norm": 6.657867431640625, "learning_rate": 7.846943664864125e-05, "loss": 0.9195, "step": 16478 }, { "epoch": 1.1165390609119858, "grad_norm": 7.955598831176758, "learning_rate": 7.846806762954344e-05, "loss": 0.7671, "step": 16479 }, { "epoch": 1.116606816179958, "grad_norm": 6.41574764251709, "learning_rate": 7.846669861044562e-05, "loss": 0.6941, "step": 16480 }, { "epoch": 1.11667457144793, "grad_norm": 5.509103775024414, "learning_rate": 7.84653295913478e-05, "loss": 0.6792, "step": 16481 }, { "epoch": 1.1167423267159022, "grad_norm": 7.486603260040283, "learning_rate": 7.846396057224998e-05, "loss": 0.6783, "step": 16482 }, { "epoch": 1.1168100819838742, "grad_norm": 5.092047214508057, "learning_rate": 7.846259155315216e-05, "loss": 0.5798, "step": 16483 }, { "epoch": 1.1168778372518464, "grad_norm": 6.867835998535156, "learning_rate": 7.846122253405435e-05, "loss": 0.6381, "step": 16484 }, { "epoch": 1.1169455925198184, "grad_norm": 4.400048732757568, "learning_rate": 7.845985351495654e-05, "loss": 0.5484, "step": 16485 }, { "epoch": 1.1170133477877906, "grad_norm": 5.648205757141113, "learning_rate": 7.845848449585872e-05, "loss": 0.7163, "step": 16486 }, { "epoch": 1.1170811030557626, "grad_norm": 5.113688945770264, "learning_rate": 7.84571154767609e-05, "loss": 0.6432, "step": 16487 }, { "epoch": 1.1171488583237346, "grad_norm": 9.245594024658203, "learning_rate": 7.845574645766309e-05, "loss": 1.042, "step": 16488 }, { "epoch": 1.1172166135917068, "grad_norm": 5.310211181640625, "learning_rate": 7.845437743856527e-05, "loss": 0.7199, "step": 16489 }, { "epoch": 1.1172843688596787, "grad_norm": 5.872876167297363, "learning_rate": 7.845300841946745e-05, "loss": 0.6653, "step": 16490 }, { "epoch": 1.117352124127651, "grad_norm": 6.042059421539307, "learning_rate": 7.845163940036963e-05, "loss": 0.7398, "step": 16491 }, { "epoch": 1.117419879395623, "grad_norm": 6.278406143188477, "learning_rate": 7.845027038127181e-05, "loss": 0.6938, "step": 16492 }, { "epoch": 1.1174876346635951, "grad_norm": 4.715973854064941, "learning_rate": 7.8448901362174e-05, "loss": 0.6145, "step": 16493 }, { "epoch": 1.1175553899315671, "grad_norm": 5.362488269805908, "learning_rate": 7.844753234307619e-05, "loss": 0.7129, "step": 16494 }, { "epoch": 1.1176231451995393, "grad_norm": 5.5275187492370605, "learning_rate": 7.844616332397837e-05, "loss": 0.519, "step": 16495 }, { "epoch": 1.1176909004675113, "grad_norm": 4.727498531341553, "learning_rate": 7.844479430488056e-05, "loss": 0.6799, "step": 16496 }, { "epoch": 1.1177586557354835, "grad_norm": 5.0060200691223145, "learning_rate": 7.844342528578274e-05, "loss": 0.6559, "step": 16497 }, { "epoch": 1.1178264110034555, "grad_norm": 8.323198318481445, "learning_rate": 7.844205626668492e-05, "loss": 0.574, "step": 16498 }, { "epoch": 1.1178941662714277, "grad_norm": 5.619205951690674, "learning_rate": 7.844068724758711e-05, "loss": 0.7148, "step": 16499 }, { "epoch": 1.1179619215393997, "grad_norm": 6.951564788818359, "learning_rate": 7.84393182284893e-05, "loss": 0.6269, "step": 16500 }, { "epoch": 1.1180296768073719, "grad_norm": 5.056802272796631, "learning_rate": 7.843794920939147e-05, "loss": 0.8339, "step": 16501 }, { "epoch": 1.1180974320753438, "grad_norm": 6.4941792488098145, "learning_rate": 7.843658019029367e-05, "loss": 0.7636, "step": 16502 }, { "epoch": 1.1181651873433158, "grad_norm": 6.141055583953857, "learning_rate": 7.843521117119585e-05, "loss": 0.7537, "step": 16503 }, { "epoch": 1.118232942611288, "grad_norm": 7.453089237213135, "learning_rate": 7.843384215209803e-05, "loss": 0.8046, "step": 16504 }, { "epoch": 1.1183006978792602, "grad_norm": 7.412580490112305, "learning_rate": 7.843247313300021e-05, "loss": 0.6542, "step": 16505 }, { "epoch": 1.1183684531472322, "grad_norm": 6.80420446395874, "learning_rate": 7.843110411390239e-05, "loss": 0.6983, "step": 16506 }, { "epoch": 1.1184362084152042, "grad_norm": 5.158776760101318, "learning_rate": 7.842973509480458e-05, "loss": 0.5782, "step": 16507 }, { "epoch": 1.1185039636831764, "grad_norm": 6.552058219909668, "learning_rate": 7.842836607570676e-05, "loss": 0.9792, "step": 16508 }, { "epoch": 1.1185717189511484, "grad_norm": 4.8856024742126465, "learning_rate": 7.842699705660894e-05, "loss": 0.6559, "step": 16509 }, { "epoch": 1.1186394742191206, "grad_norm": 5.895238399505615, "learning_rate": 7.842562803751113e-05, "loss": 0.6988, "step": 16510 }, { "epoch": 1.1187072294870926, "grad_norm": 10.207904815673828, "learning_rate": 7.842425901841332e-05, "loss": 0.9168, "step": 16511 }, { "epoch": 1.1187749847550648, "grad_norm": 4.816634654998779, "learning_rate": 7.84228899993155e-05, "loss": 0.7268, "step": 16512 }, { "epoch": 1.1188427400230367, "grad_norm": 7.246109485626221, "learning_rate": 7.842152098021768e-05, "loss": 0.8063, "step": 16513 }, { "epoch": 1.118910495291009, "grad_norm": 7.619381427764893, "learning_rate": 7.842015196111986e-05, "loss": 0.8026, "step": 16514 }, { "epoch": 1.118978250558981, "grad_norm": 8.938972473144531, "learning_rate": 7.841878294202204e-05, "loss": 1.2743, "step": 16515 }, { "epoch": 1.1190460058269531, "grad_norm": 7.479329586029053, "learning_rate": 7.841741392292423e-05, "loss": 0.6163, "step": 16516 }, { "epoch": 1.119113761094925, "grad_norm": 5.318511009216309, "learning_rate": 7.841604490382641e-05, "loss": 0.7075, "step": 16517 }, { "epoch": 1.1191815163628973, "grad_norm": 6.804056644439697, "learning_rate": 7.84146758847286e-05, "loss": 0.81, "step": 16518 }, { "epoch": 1.1192492716308693, "grad_norm": 5.385700702667236, "learning_rate": 7.841330686563078e-05, "loss": 0.7378, "step": 16519 }, { "epoch": 1.1193170268988415, "grad_norm": 7.53215217590332, "learning_rate": 7.841193784653297e-05, "loss": 0.6082, "step": 16520 }, { "epoch": 1.1193847821668135, "grad_norm": 8.614946365356445, "learning_rate": 7.841056882743515e-05, "loss": 0.6866, "step": 16521 }, { "epoch": 1.1194525374347855, "grad_norm": 6.260163307189941, "learning_rate": 7.840919980833733e-05, "loss": 0.6287, "step": 16522 }, { "epoch": 1.1195202927027577, "grad_norm": 9.336182594299316, "learning_rate": 7.840783078923951e-05, "loss": 0.7944, "step": 16523 }, { "epoch": 1.1195880479707296, "grad_norm": 10.182073593139648, "learning_rate": 7.840646177014169e-05, "loss": 0.6368, "step": 16524 }, { "epoch": 1.1196558032387018, "grad_norm": 5.3863420486450195, "learning_rate": 7.840509275104388e-05, "loss": 0.7848, "step": 16525 }, { "epoch": 1.1197235585066738, "grad_norm": 4.769381999969482, "learning_rate": 7.840372373194606e-05, "loss": 0.5335, "step": 16526 }, { "epoch": 1.119791313774646, "grad_norm": 8.063030242919922, "learning_rate": 7.840235471284825e-05, "loss": 0.7534, "step": 16527 }, { "epoch": 1.119859069042618, "grad_norm": 6.1403045654296875, "learning_rate": 7.840098569375043e-05, "loss": 0.8089, "step": 16528 }, { "epoch": 1.1199268243105902, "grad_norm": 7.606939792633057, "learning_rate": 7.839961667465262e-05, "loss": 0.7578, "step": 16529 }, { "epoch": 1.1199945795785622, "grad_norm": 10.877296447753906, "learning_rate": 7.83982476555548e-05, "loss": 0.54, "step": 16530 }, { "epoch": 1.1200623348465344, "grad_norm": 5.781667709350586, "learning_rate": 7.839687863645698e-05, "loss": 0.8624, "step": 16531 }, { "epoch": 1.1201300901145064, "grad_norm": 4.3829827308654785, "learning_rate": 7.839550961735916e-05, "loss": 0.4979, "step": 16532 }, { "epoch": 1.1201978453824786, "grad_norm": 4.735124111175537, "learning_rate": 7.839414059826134e-05, "loss": 0.6497, "step": 16533 }, { "epoch": 1.1202656006504506, "grad_norm": 5.546881198883057, "learning_rate": 7.839277157916353e-05, "loss": 0.6842, "step": 16534 }, { "epoch": 1.1203333559184228, "grad_norm": 4.9232177734375, "learning_rate": 7.839140256006571e-05, "loss": 0.5805, "step": 16535 }, { "epoch": 1.1204011111863947, "grad_norm": 4.378359794616699, "learning_rate": 7.83900335409679e-05, "loss": 0.6249, "step": 16536 }, { "epoch": 1.1204688664543667, "grad_norm": 5.407273769378662, "learning_rate": 7.838866452187008e-05, "loss": 0.8445, "step": 16537 }, { "epoch": 1.120536621722339, "grad_norm": 4.610024452209473, "learning_rate": 7.838729550277226e-05, "loss": 0.518, "step": 16538 }, { "epoch": 1.120604376990311, "grad_norm": 11.108832359313965, "learning_rate": 7.838592648367445e-05, "loss": 1.08, "step": 16539 }, { "epoch": 1.120672132258283, "grad_norm": 4.610969066619873, "learning_rate": 7.838455746457663e-05, "loss": 0.5164, "step": 16540 }, { "epoch": 1.120739887526255, "grad_norm": 8.567065238952637, "learning_rate": 7.838318844547881e-05, "loss": 1.0346, "step": 16541 }, { "epoch": 1.1208076427942273, "grad_norm": 6.773820877075195, "learning_rate": 7.8381819426381e-05, "loss": 0.7441, "step": 16542 }, { "epoch": 1.1208753980621993, "grad_norm": 9.476489067077637, "learning_rate": 7.838045040728318e-05, "loss": 0.5177, "step": 16543 }, { "epoch": 1.1209431533301715, "grad_norm": 6.598116397857666, "learning_rate": 7.837908138818537e-05, "loss": 0.6661, "step": 16544 }, { "epoch": 1.1210109085981435, "grad_norm": 6.180941581726074, "learning_rate": 7.837771236908756e-05, "loss": 0.8152, "step": 16545 }, { "epoch": 1.1210786638661157, "grad_norm": 6.241885662078857, "learning_rate": 7.837634334998974e-05, "loss": 0.8264, "step": 16546 }, { "epoch": 1.1211464191340876, "grad_norm": 5.314054489135742, "learning_rate": 7.837497433089192e-05, "loss": 0.6123, "step": 16547 }, { "epoch": 1.1212141744020598, "grad_norm": 4.802424430847168, "learning_rate": 7.837360531179411e-05, "loss": 0.6321, "step": 16548 }, { "epoch": 1.1212819296700318, "grad_norm": 5.131801605224609, "learning_rate": 7.83722362926963e-05, "loss": 0.5852, "step": 16549 }, { "epoch": 1.121349684938004, "grad_norm": 4.5471391677856445, "learning_rate": 7.837086727359847e-05, "loss": 0.419, "step": 16550 }, { "epoch": 1.121417440205976, "grad_norm": 6.3551225662231445, "learning_rate": 7.836949825450065e-05, "loss": 0.6822, "step": 16551 }, { "epoch": 1.121485195473948, "grad_norm": 6.698050022125244, "learning_rate": 7.836812923540285e-05, "loss": 0.7381, "step": 16552 }, { "epoch": 1.1215529507419202, "grad_norm": 5.779103755950928, "learning_rate": 7.836676021630503e-05, "loss": 0.7189, "step": 16553 }, { "epoch": 1.1216207060098922, "grad_norm": 5.6808552742004395, "learning_rate": 7.836539119720721e-05, "loss": 0.6718, "step": 16554 }, { "epoch": 1.1216884612778644, "grad_norm": 5.681619644165039, "learning_rate": 7.836402217810939e-05, "loss": 0.4688, "step": 16555 }, { "epoch": 1.1217562165458363, "grad_norm": 5.486327171325684, "learning_rate": 7.836265315901157e-05, "loss": 0.7598, "step": 16556 }, { "epoch": 1.1218239718138086, "grad_norm": 5.492034435272217, "learning_rate": 7.836128413991376e-05, "loss": 0.8031, "step": 16557 }, { "epoch": 1.1218917270817805, "grad_norm": 5.560188293457031, "learning_rate": 7.835991512081594e-05, "loss": 0.7689, "step": 16558 }, { "epoch": 1.1219594823497527, "grad_norm": 6.246037006378174, "learning_rate": 7.835854610171812e-05, "loss": 0.7454, "step": 16559 }, { "epoch": 1.1220272376177247, "grad_norm": 4.9940409660339355, "learning_rate": 7.83571770826203e-05, "loss": 0.5191, "step": 16560 }, { "epoch": 1.122094992885697, "grad_norm": 6.49337100982666, "learning_rate": 7.835580806352249e-05, "loss": 0.732, "step": 16561 }, { "epoch": 1.122162748153669, "grad_norm": 12.41988468170166, "learning_rate": 7.835443904442468e-05, "loss": 0.7257, "step": 16562 }, { "epoch": 1.122230503421641, "grad_norm": 6.870421886444092, "learning_rate": 7.835307002532686e-05, "loss": 0.6377, "step": 16563 }, { "epoch": 1.122298258689613, "grad_norm": 6.671525478363037, "learning_rate": 7.835170100622904e-05, "loss": 0.6479, "step": 16564 }, { "epoch": 1.1223660139575853, "grad_norm": 5.417186260223389, "learning_rate": 7.835033198713122e-05, "loss": 0.9996, "step": 16565 }, { "epoch": 1.1224337692255573, "grad_norm": 8.18811321258545, "learning_rate": 7.834896296803341e-05, "loss": 0.8235, "step": 16566 }, { "epoch": 1.1225015244935295, "grad_norm": 6.063117027282715, "learning_rate": 7.83475939489356e-05, "loss": 0.777, "step": 16567 }, { "epoch": 1.1225692797615014, "grad_norm": 4.244666576385498, "learning_rate": 7.834622492983777e-05, "loss": 0.4565, "step": 16568 }, { "epoch": 1.1226370350294737, "grad_norm": 5.733912467956543, "learning_rate": 7.834485591073995e-05, "loss": 0.6999, "step": 16569 }, { "epoch": 1.1227047902974456, "grad_norm": 5.080024719238281, "learning_rate": 7.834348689164214e-05, "loss": 0.5927, "step": 16570 }, { "epoch": 1.1227725455654176, "grad_norm": 5.610750198364258, "learning_rate": 7.834211787254433e-05, "loss": 0.6637, "step": 16571 }, { "epoch": 1.1228403008333898, "grad_norm": 5.9720635414123535, "learning_rate": 7.834074885344651e-05, "loss": 0.8085, "step": 16572 }, { "epoch": 1.1229080561013618, "grad_norm": 5.229776859283447, "learning_rate": 7.833937983434869e-05, "loss": 0.5649, "step": 16573 }, { "epoch": 1.122975811369334, "grad_norm": 5.457815647125244, "learning_rate": 7.833801081525087e-05, "loss": 0.6228, "step": 16574 }, { "epoch": 1.123043566637306, "grad_norm": 5.012820243835449, "learning_rate": 7.833664179615306e-05, "loss": 0.6635, "step": 16575 }, { "epoch": 1.1231113219052782, "grad_norm": 5.759067535400391, "learning_rate": 7.833527277705524e-05, "loss": 0.634, "step": 16576 }, { "epoch": 1.1231790771732502, "grad_norm": 9.28831958770752, "learning_rate": 7.833390375795742e-05, "loss": 0.7987, "step": 16577 }, { "epoch": 1.1232468324412224, "grad_norm": 6.013178825378418, "learning_rate": 7.83325347388596e-05, "loss": 0.6997, "step": 16578 }, { "epoch": 1.1233145877091943, "grad_norm": 5.971577167510986, "learning_rate": 7.833116571976179e-05, "loss": 0.6976, "step": 16579 }, { "epoch": 1.1233823429771665, "grad_norm": 7.694915771484375, "learning_rate": 7.832979670066398e-05, "loss": 0.9614, "step": 16580 }, { "epoch": 1.1234500982451385, "grad_norm": 5.692512512207031, "learning_rate": 7.832842768156616e-05, "loss": 0.7804, "step": 16581 }, { "epoch": 1.1235178535131107, "grad_norm": 4.4728312492370605, "learning_rate": 7.832705866246834e-05, "loss": 0.5845, "step": 16582 }, { "epoch": 1.1235856087810827, "grad_norm": 5.194689750671387, "learning_rate": 7.832568964337052e-05, "loss": 0.5488, "step": 16583 }, { "epoch": 1.123653364049055, "grad_norm": 5.546979904174805, "learning_rate": 7.832432062427271e-05, "loss": 0.6886, "step": 16584 }, { "epoch": 1.123721119317027, "grad_norm": 5.987720489501953, "learning_rate": 7.83229516051749e-05, "loss": 0.8281, "step": 16585 }, { "epoch": 1.1237888745849989, "grad_norm": 6.79714822769165, "learning_rate": 7.832158258607707e-05, "loss": 0.7205, "step": 16586 }, { "epoch": 1.123856629852971, "grad_norm": 5.197304725646973, "learning_rate": 7.832021356697926e-05, "loss": 0.5863, "step": 16587 }, { "epoch": 1.123924385120943, "grad_norm": 5.661016941070557, "learning_rate": 7.831884454788145e-05, "loss": 0.7591, "step": 16588 }, { "epoch": 1.1239921403889153, "grad_norm": 7.9939494132995605, "learning_rate": 7.831747552878363e-05, "loss": 0.5234, "step": 16589 }, { "epoch": 1.1240598956568872, "grad_norm": 4.955355644226074, "learning_rate": 7.831610650968581e-05, "loss": 0.5613, "step": 16590 }, { "epoch": 1.1241276509248594, "grad_norm": 6.7157883644104, "learning_rate": 7.8314737490588e-05, "loss": 0.9179, "step": 16591 }, { "epoch": 1.1241954061928314, "grad_norm": 5.742237091064453, "learning_rate": 7.831336847149018e-05, "loss": 0.5604, "step": 16592 }, { "epoch": 1.1242631614608036, "grad_norm": 5.890753746032715, "learning_rate": 7.831199945239236e-05, "loss": 0.6248, "step": 16593 }, { "epoch": 1.1243309167287756, "grad_norm": 6.5256242752075195, "learning_rate": 7.831063043329456e-05, "loss": 0.9446, "step": 16594 }, { "epoch": 1.1243986719967478, "grad_norm": 7.787291526794434, "learning_rate": 7.830926141419674e-05, "loss": 0.7736, "step": 16595 }, { "epoch": 1.1244664272647198, "grad_norm": 7.017754077911377, "learning_rate": 7.830789239509892e-05, "loss": 0.4302, "step": 16596 }, { "epoch": 1.124534182532692, "grad_norm": 5.535482406616211, "learning_rate": 7.83065233760011e-05, "loss": 0.8716, "step": 16597 }, { "epoch": 1.124601937800664, "grad_norm": 4.967796802520752, "learning_rate": 7.83051543569033e-05, "loss": 0.639, "step": 16598 }, { "epoch": 1.1246696930686362, "grad_norm": 11.574756622314453, "learning_rate": 7.830378533780547e-05, "loss": 0.6214, "step": 16599 }, { "epoch": 1.1247374483366082, "grad_norm": 5.107860088348389, "learning_rate": 7.830241631870765e-05, "loss": 0.5664, "step": 16600 }, { "epoch": 1.1248052036045801, "grad_norm": 6.146525859832764, "learning_rate": 7.830104729960983e-05, "loss": 0.7694, "step": 16601 }, { "epoch": 1.1248729588725523, "grad_norm": 5.966607570648193, "learning_rate": 7.829967828051201e-05, "loss": 0.6571, "step": 16602 }, { "epoch": 1.1249407141405243, "grad_norm": 6.187711238861084, "learning_rate": 7.829830926141421e-05, "loss": 1.0278, "step": 16603 }, { "epoch": 1.1250084694084965, "grad_norm": 7.682037353515625, "learning_rate": 7.829694024231639e-05, "loss": 0.6538, "step": 16604 }, { "epoch": 1.1250762246764685, "grad_norm": 7.3382415771484375, "learning_rate": 7.829557122321857e-05, "loss": 0.6901, "step": 16605 }, { "epoch": 1.1251439799444407, "grad_norm": 6.714434623718262, "learning_rate": 7.829420220412075e-05, "loss": 0.5718, "step": 16606 }, { "epoch": 1.1252117352124127, "grad_norm": 6.157127857208252, "learning_rate": 7.829283318502294e-05, "loss": 0.7647, "step": 16607 }, { "epoch": 1.125279490480385, "grad_norm": 5.902122497558594, "learning_rate": 7.829146416592512e-05, "loss": 0.7683, "step": 16608 }, { "epoch": 1.1253472457483569, "grad_norm": 5.676656723022461, "learning_rate": 7.82900951468273e-05, "loss": 0.6912, "step": 16609 }, { "epoch": 1.125415001016329, "grad_norm": 5.85725736618042, "learning_rate": 7.828872612772948e-05, "loss": 0.5421, "step": 16610 }, { "epoch": 1.125482756284301, "grad_norm": 6.861623287200928, "learning_rate": 7.828735710863166e-05, "loss": 0.5735, "step": 16611 }, { "epoch": 1.1255505115522733, "grad_norm": 4.622976779937744, "learning_rate": 7.828598808953386e-05, "loss": 0.6974, "step": 16612 }, { "epoch": 1.1256182668202452, "grad_norm": 5.978318691253662, "learning_rate": 7.828461907043604e-05, "loss": 0.7616, "step": 16613 }, { "epoch": 1.1256860220882174, "grad_norm": 8.068818092346191, "learning_rate": 7.828325005133822e-05, "loss": 0.8741, "step": 16614 }, { "epoch": 1.1257537773561894, "grad_norm": 5.705319404602051, "learning_rate": 7.82818810322404e-05, "loss": 0.604, "step": 16615 }, { "epoch": 1.1258215326241616, "grad_norm": 6.627946853637695, "learning_rate": 7.828051201314258e-05, "loss": 0.8083, "step": 16616 }, { "epoch": 1.1258892878921336, "grad_norm": 5.951031684875488, "learning_rate": 7.827914299404477e-05, "loss": 0.7051, "step": 16617 }, { "epoch": 1.1259570431601058, "grad_norm": 5.129096031188965, "learning_rate": 7.827777397494695e-05, "loss": 0.7117, "step": 16618 }, { "epoch": 1.1260247984280778, "grad_norm": 7.520353317260742, "learning_rate": 7.827640495584913e-05, "loss": 0.6925, "step": 16619 }, { "epoch": 1.1260925536960498, "grad_norm": 6.071813106536865, "learning_rate": 7.827503593675131e-05, "loss": 0.6316, "step": 16620 }, { "epoch": 1.126160308964022, "grad_norm": 5.8903679847717285, "learning_rate": 7.827366691765351e-05, "loss": 0.6882, "step": 16621 }, { "epoch": 1.126228064231994, "grad_norm": 5.252291202545166, "learning_rate": 7.827229789855569e-05, "loss": 0.7473, "step": 16622 }, { "epoch": 1.1262958194999662, "grad_norm": 6.164458274841309, "learning_rate": 7.827092887945787e-05, "loss": 0.7696, "step": 16623 }, { "epoch": 1.1263635747679381, "grad_norm": 5.674738883972168, "learning_rate": 7.826955986036005e-05, "loss": 0.6838, "step": 16624 }, { "epoch": 1.1264313300359103, "grad_norm": 6.459506511688232, "learning_rate": 7.826819084126223e-05, "loss": 0.5691, "step": 16625 }, { "epoch": 1.1264990853038823, "grad_norm": 8.24375057220459, "learning_rate": 7.826682182216442e-05, "loss": 0.5813, "step": 16626 }, { "epoch": 1.1265668405718545, "grad_norm": 6.915633201599121, "learning_rate": 7.82654528030666e-05, "loss": 0.6045, "step": 16627 }, { "epoch": 1.1266345958398265, "grad_norm": 7.0347394943237305, "learning_rate": 7.826408378396878e-05, "loss": 1.0053, "step": 16628 }, { "epoch": 1.1267023511077987, "grad_norm": 9.013284683227539, "learning_rate": 7.826271476487097e-05, "loss": 0.5846, "step": 16629 }, { "epoch": 1.1267701063757707, "grad_norm": 5.217116832733154, "learning_rate": 7.826134574577316e-05, "loss": 0.5151, "step": 16630 }, { "epoch": 1.1268378616437429, "grad_norm": 4.9722466468811035, "learning_rate": 7.825997672667534e-05, "loss": 0.6434, "step": 16631 }, { "epoch": 1.1269056169117149, "grad_norm": 4.9107666015625, "learning_rate": 7.825860770757752e-05, "loss": 0.5564, "step": 16632 }, { "epoch": 1.126973372179687, "grad_norm": 5.44745397567749, "learning_rate": 7.82572386884797e-05, "loss": 0.5663, "step": 16633 }, { "epoch": 1.127041127447659, "grad_norm": 5.827592849731445, "learning_rate": 7.825586966938188e-05, "loss": 0.5999, "step": 16634 }, { "epoch": 1.127108882715631, "grad_norm": 6.206888198852539, "learning_rate": 7.825450065028407e-05, "loss": 0.6755, "step": 16635 }, { "epoch": 1.1271766379836032, "grad_norm": 6.200521469116211, "learning_rate": 7.825313163118625e-05, "loss": 0.5529, "step": 16636 }, { "epoch": 1.1272443932515752, "grad_norm": 5.36111307144165, "learning_rate": 7.825176261208845e-05, "loss": 0.762, "step": 16637 }, { "epoch": 1.1273121485195474, "grad_norm": 4.9850664138793945, "learning_rate": 7.825039359299063e-05, "loss": 0.818, "step": 16638 }, { "epoch": 1.1273799037875194, "grad_norm": 5.960310459136963, "learning_rate": 7.824902457389281e-05, "loss": 0.8029, "step": 16639 }, { "epoch": 1.1274476590554916, "grad_norm": 4.687396049499512, "learning_rate": 7.8247655554795e-05, "loss": 0.5797, "step": 16640 }, { "epoch": 1.1275154143234636, "grad_norm": 4.676320552825928, "learning_rate": 7.824628653569718e-05, "loss": 0.6945, "step": 16641 }, { "epoch": 1.1275831695914358, "grad_norm": 6.536602020263672, "learning_rate": 7.824491751659936e-05, "loss": 0.7977, "step": 16642 }, { "epoch": 1.1276509248594078, "grad_norm": 6.302074432373047, "learning_rate": 7.824354849750154e-05, "loss": 0.9523, "step": 16643 }, { "epoch": 1.12771868012738, "grad_norm": 7.156180381774902, "learning_rate": 7.824217947840374e-05, "loss": 0.6981, "step": 16644 }, { "epoch": 1.127786435395352, "grad_norm": 7.256894588470459, "learning_rate": 7.824081045930592e-05, "loss": 0.8173, "step": 16645 }, { "epoch": 1.1278541906633242, "grad_norm": 9.1759033203125, "learning_rate": 7.82394414402081e-05, "loss": 0.6992, "step": 16646 }, { "epoch": 1.1279219459312961, "grad_norm": 6.0825886726379395, "learning_rate": 7.823807242111028e-05, "loss": 0.837, "step": 16647 }, { "epoch": 1.1279897011992683, "grad_norm": 7.240145683288574, "learning_rate": 7.823670340201246e-05, "loss": 0.6456, "step": 16648 }, { "epoch": 1.1280574564672403, "grad_norm": 4.927544116973877, "learning_rate": 7.823533438291465e-05, "loss": 0.7218, "step": 16649 }, { "epoch": 1.1281252117352123, "grad_norm": 9.792190551757812, "learning_rate": 7.823396536381683e-05, "loss": 0.7131, "step": 16650 }, { "epoch": 1.1281929670031845, "grad_norm": 6.260951042175293, "learning_rate": 7.823259634471901e-05, "loss": 0.6938, "step": 16651 }, { "epoch": 1.1282607222711567, "grad_norm": 5.710566997528076, "learning_rate": 7.82312273256212e-05, "loss": 0.6371, "step": 16652 }, { "epoch": 1.1283284775391287, "grad_norm": 5.180080413818359, "learning_rate": 7.822985830652339e-05, "loss": 0.7467, "step": 16653 }, { "epoch": 1.1283962328071007, "grad_norm": 5.3070149421691895, "learning_rate": 7.822848928742557e-05, "loss": 0.5804, "step": 16654 }, { "epoch": 1.1284639880750729, "grad_norm": 5.685241222381592, "learning_rate": 7.822712026832775e-05, "loss": 0.74, "step": 16655 }, { "epoch": 1.1285317433430448, "grad_norm": 5.124477386474609, "learning_rate": 7.822575124922993e-05, "loss": 0.56, "step": 16656 }, { "epoch": 1.128599498611017, "grad_norm": 4.9993577003479, "learning_rate": 7.822438223013211e-05, "loss": 0.628, "step": 16657 }, { "epoch": 1.128667253878989, "grad_norm": 5.491091251373291, "learning_rate": 7.82230132110343e-05, "loss": 0.7368, "step": 16658 }, { "epoch": 1.1287350091469612, "grad_norm": 5.759190559387207, "learning_rate": 7.822164419193648e-05, "loss": 0.6428, "step": 16659 }, { "epoch": 1.1288027644149332, "grad_norm": 6.618605613708496, "learning_rate": 7.822027517283866e-05, "loss": 0.695, "step": 16660 }, { "epoch": 1.1288705196829054, "grad_norm": 5.8363118171691895, "learning_rate": 7.821890615374084e-05, "loss": 0.6375, "step": 16661 }, { "epoch": 1.1289382749508774, "grad_norm": 6.037831783294678, "learning_rate": 7.821753713464304e-05, "loss": 0.6392, "step": 16662 }, { "epoch": 1.1290060302188496, "grad_norm": 6.488530158996582, "learning_rate": 7.821616811554522e-05, "loss": 0.6329, "step": 16663 }, { "epoch": 1.1290737854868216, "grad_norm": 6.927813529968262, "learning_rate": 7.82147990964474e-05, "loss": 0.7017, "step": 16664 }, { "epoch": 1.1291415407547938, "grad_norm": 7.420223712921143, "learning_rate": 7.821343007734958e-05, "loss": 0.9947, "step": 16665 }, { "epoch": 1.1292092960227658, "grad_norm": 6.57767915725708, "learning_rate": 7.821206105825176e-05, "loss": 0.7923, "step": 16666 }, { "epoch": 1.129277051290738, "grad_norm": 5.805924892425537, "learning_rate": 7.821069203915395e-05, "loss": 0.7343, "step": 16667 }, { "epoch": 1.12934480655871, "grad_norm": 5.582193374633789, "learning_rate": 7.820932302005613e-05, "loss": 0.647, "step": 16668 }, { "epoch": 1.129412561826682, "grad_norm": 5.6104607582092285, "learning_rate": 7.820795400095831e-05, "loss": 0.673, "step": 16669 }, { "epoch": 1.1294803170946541, "grad_norm": 6.4317522048950195, "learning_rate": 7.82065849818605e-05, "loss": 0.8305, "step": 16670 }, { "epoch": 1.129548072362626, "grad_norm": 6.044923305511475, "learning_rate": 7.820521596276267e-05, "loss": 0.6202, "step": 16671 }, { "epoch": 1.1296158276305983, "grad_norm": 5.044763088226318, "learning_rate": 7.820384694366487e-05, "loss": 0.644, "step": 16672 }, { "epoch": 1.1296835828985703, "grad_norm": 7.138949871063232, "learning_rate": 7.820247792456705e-05, "loss": 0.8183, "step": 16673 }, { "epoch": 1.1297513381665425, "grad_norm": 6.46617317199707, "learning_rate": 7.820110890546923e-05, "loss": 0.6277, "step": 16674 }, { "epoch": 1.1298190934345145, "grad_norm": 7.636822700500488, "learning_rate": 7.819973988637141e-05, "loss": 0.6938, "step": 16675 }, { "epoch": 1.1298868487024867, "grad_norm": 7.69808292388916, "learning_rate": 7.81983708672736e-05, "loss": 0.7331, "step": 16676 }, { "epoch": 1.1299546039704587, "grad_norm": 6.143553733825684, "learning_rate": 7.819700184817578e-05, "loss": 0.8705, "step": 16677 }, { "epoch": 1.1300223592384309, "grad_norm": 6.685749530792236, "learning_rate": 7.819563282907796e-05, "loss": 0.7717, "step": 16678 }, { "epoch": 1.1300901145064028, "grad_norm": 6.317489147186279, "learning_rate": 7.819426380998014e-05, "loss": 0.6593, "step": 16679 }, { "epoch": 1.130157869774375, "grad_norm": 5.497630596160889, "learning_rate": 7.819289479088233e-05, "loss": 0.7693, "step": 16680 }, { "epoch": 1.130225625042347, "grad_norm": 7.087355136871338, "learning_rate": 7.819152577178452e-05, "loss": 0.5709, "step": 16681 }, { "epoch": 1.1302933803103192, "grad_norm": 9.138280868530273, "learning_rate": 7.81901567526867e-05, "loss": 0.9426, "step": 16682 }, { "epoch": 1.1303611355782912, "grad_norm": 8.925271987915039, "learning_rate": 7.818878773358888e-05, "loss": 0.7462, "step": 16683 }, { "epoch": 1.1304288908462632, "grad_norm": 5.355180263519287, "learning_rate": 7.818741871449107e-05, "loss": 0.6806, "step": 16684 }, { "epoch": 1.1304966461142354, "grad_norm": 4.724582195281982, "learning_rate": 7.818604969539325e-05, "loss": 0.6157, "step": 16685 }, { "epoch": 1.1305644013822074, "grad_norm": 7.584688186645508, "learning_rate": 7.818468067629543e-05, "loss": 0.6655, "step": 16686 }, { "epoch": 1.1306321566501796, "grad_norm": 5.311779022216797, "learning_rate": 7.818331165719763e-05, "loss": 0.6513, "step": 16687 }, { "epoch": 1.1306999119181516, "grad_norm": 5.313599586486816, "learning_rate": 7.818194263809981e-05, "loss": 0.6251, "step": 16688 }, { "epoch": 1.1307676671861238, "grad_norm": 4.748965740203857, "learning_rate": 7.818057361900199e-05, "loss": 0.5648, "step": 16689 }, { "epoch": 1.1308354224540957, "grad_norm": 5.075737476348877, "learning_rate": 7.817920459990418e-05, "loss": 0.6007, "step": 16690 }, { "epoch": 1.130903177722068, "grad_norm": 6.251148700714111, "learning_rate": 7.817783558080636e-05, "loss": 0.635, "step": 16691 }, { "epoch": 1.13097093299004, "grad_norm": 8.291831970214844, "learning_rate": 7.817646656170854e-05, "loss": 0.7627, "step": 16692 }, { "epoch": 1.1310386882580121, "grad_norm": 6.881071090698242, "learning_rate": 7.817509754261072e-05, "loss": 0.7503, "step": 16693 }, { "epoch": 1.131106443525984, "grad_norm": 7.16835355758667, "learning_rate": 7.81737285235129e-05, "loss": 0.7823, "step": 16694 }, { "epoch": 1.1311741987939563, "grad_norm": 4.8891096115112305, "learning_rate": 7.81723595044151e-05, "loss": 0.6679, "step": 16695 }, { "epoch": 1.1312419540619283, "grad_norm": 5.029726505279541, "learning_rate": 7.817099048531728e-05, "loss": 0.6262, "step": 16696 }, { "epoch": 1.1313097093299005, "grad_norm": 6.097971439361572, "learning_rate": 7.816962146621946e-05, "loss": 0.6416, "step": 16697 }, { "epoch": 1.1313774645978725, "grad_norm": 6.032967567443848, "learning_rate": 7.816825244712164e-05, "loss": 0.7701, "step": 16698 }, { "epoch": 1.1314452198658445, "grad_norm": 7.488419055938721, "learning_rate": 7.816688342802383e-05, "loss": 0.8387, "step": 16699 }, { "epoch": 1.1315129751338167, "grad_norm": 6.614134788513184, "learning_rate": 7.816551440892601e-05, "loss": 0.7412, "step": 16700 }, { "epoch": 1.1315807304017889, "grad_norm": 4.813358306884766, "learning_rate": 7.81641453898282e-05, "loss": 0.7146, "step": 16701 }, { "epoch": 1.1316484856697608, "grad_norm": 5.225235939025879, "learning_rate": 7.816277637073037e-05, "loss": 0.704, "step": 16702 }, { "epoch": 1.1317162409377328, "grad_norm": 6.127220630645752, "learning_rate": 7.816140735163255e-05, "loss": 0.7934, "step": 16703 }, { "epoch": 1.131783996205705, "grad_norm": 5.334741592407227, "learning_rate": 7.816003833253475e-05, "loss": 0.5111, "step": 16704 }, { "epoch": 1.131851751473677, "grad_norm": 5.337317943572998, "learning_rate": 7.815866931343693e-05, "loss": 0.908, "step": 16705 }, { "epoch": 1.1319195067416492, "grad_norm": 7.072608470916748, "learning_rate": 7.815730029433911e-05, "loss": 0.884, "step": 16706 }, { "epoch": 1.1319872620096212, "grad_norm": 6.734743595123291, "learning_rate": 7.815593127524129e-05, "loss": 0.6501, "step": 16707 }, { "epoch": 1.1320550172775934, "grad_norm": 6.5684814453125, "learning_rate": 7.815456225614348e-05, "loss": 0.9087, "step": 16708 }, { "epoch": 1.1321227725455654, "grad_norm": 5.775663375854492, "learning_rate": 7.815319323704566e-05, "loss": 0.6729, "step": 16709 }, { "epoch": 1.1321905278135376, "grad_norm": 4.196002960205078, "learning_rate": 7.815182421794784e-05, "loss": 0.6439, "step": 16710 }, { "epoch": 1.1322582830815096, "grad_norm": 9.43163776397705, "learning_rate": 7.815045519885002e-05, "loss": 0.72, "step": 16711 }, { "epoch": 1.1323260383494818, "grad_norm": 5.663300514221191, "learning_rate": 7.81490861797522e-05, "loss": 0.6189, "step": 16712 }, { "epoch": 1.1323937936174537, "grad_norm": 5.103667736053467, "learning_rate": 7.81477171606544e-05, "loss": 0.9228, "step": 16713 }, { "epoch": 1.1324615488854257, "grad_norm": 11.563446044921875, "learning_rate": 7.814634814155658e-05, "loss": 0.6651, "step": 16714 }, { "epoch": 1.132529304153398, "grad_norm": 6.818861961364746, "learning_rate": 7.814497912245876e-05, "loss": 0.6716, "step": 16715 }, { "epoch": 1.1325970594213701, "grad_norm": 8.50759506225586, "learning_rate": 7.814361010336094e-05, "loss": 0.5372, "step": 16716 }, { "epoch": 1.132664814689342, "grad_norm": 5.753479957580566, "learning_rate": 7.814224108426313e-05, "loss": 0.7041, "step": 16717 }, { "epoch": 1.132732569957314, "grad_norm": 6.033994197845459, "learning_rate": 7.814087206516531e-05, "loss": 0.6911, "step": 16718 }, { "epoch": 1.1328003252252863, "grad_norm": 5.7770771980285645, "learning_rate": 7.81395030460675e-05, "loss": 0.6144, "step": 16719 }, { "epoch": 1.1328680804932583, "grad_norm": 6.466103553771973, "learning_rate": 7.813813402696967e-05, "loss": 0.822, "step": 16720 }, { "epoch": 1.1329358357612305, "grad_norm": 6.284421920776367, "learning_rate": 7.813676500787185e-05, "loss": 0.715, "step": 16721 }, { "epoch": 1.1330035910292025, "grad_norm": 5.6262407302856445, "learning_rate": 7.813539598877405e-05, "loss": 0.7364, "step": 16722 }, { "epoch": 1.1330713462971747, "grad_norm": 4.806675910949707, "learning_rate": 7.813402696967623e-05, "loss": 0.4433, "step": 16723 }, { "epoch": 1.1331391015651466, "grad_norm": 5.149235248565674, "learning_rate": 7.813265795057841e-05, "loss": 0.4811, "step": 16724 }, { "epoch": 1.1332068568331188, "grad_norm": 5.818775653839111, "learning_rate": 7.813128893148059e-05, "loss": 0.5279, "step": 16725 }, { "epoch": 1.1332746121010908, "grad_norm": 5.656182289123535, "learning_rate": 7.812991991238277e-05, "loss": 0.6816, "step": 16726 }, { "epoch": 1.133342367369063, "grad_norm": 4.287816524505615, "learning_rate": 7.812855089328496e-05, "loss": 0.6402, "step": 16727 }, { "epoch": 1.133410122637035, "grad_norm": 6.147930145263672, "learning_rate": 7.812718187418714e-05, "loss": 0.6251, "step": 16728 }, { "epoch": 1.1334778779050072, "grad_norm": 5.128780364990234, "learning_rate": 7.812581285508932e-05, "loss": 0.7653, "step": 16729 }, { "epoch": 1.1335456331729792, "grad_norm": 4.769107341766357, "learning_rate": 7.812444383599152e-05, "loss": 0.5929, "step": 16730 }, { "epoch": 1.1336133884409514, "grad_norm": 4.782686233520508, "learning_rate": 7.81230748168937e-05, "loss": 0.5916, "step": 16731 }, { "epoch": 1.1336811437089234, "grad_norm": 5.834915637969971, "learning_rate": 7.812170579779588e-05, "loss": 0.6614, "step": 16732 }, { "epoch": 1.1337488989768953, "grad_norm": 5.408773899078369, "learning_rate": 7.812033677869807e-05, "loss": 0.7269, "step": 16733 }, { "epoch": 1.1338166542448676, "grad_norm": 6.599340438842773, "learning_rate": 7.811896775960025e-05, "loss": 0.4876, "step": 16734 }, { "epoch": 1.1338844095128395, "grad_norm": 4.747675895690918, "learning_rate": 7.811759874050243e-05, "loss": 0.674, "step": 16735 }, { "epoch": 1.1339521647808117, "grad_norm": 5.954026222229004, "learning_rate": 7.811622972140463e-05, "loss": 0.6632, "step": 16736 }, { "epoch": 1.1340199200487837, "grad_norm": 4.728879928588867, "learning_rate": 7.811486070230681e-05, "loss": 0.6869, "step": 16737 }, { "epoch": 1.134087675316756, "grad_norm": 5.576781749725342, "learning_rate": 7.811349168320899e-05, "loss": 0.6638, "step": 16738 }, { "epoch": 1.134155430584728, "grad_norm": 5.9168782234191895, "learning_rate": 7.811212266411117e-05, "loss": 0.759, "step": 16739 }, { "epoch": 1.1342231858527, "grad_norm": 5.378051280975342, "learning_rate": 7.811075364501336e-05, "loss": 0.5783, "step": 16740 }, { "epoch": 1.134290941120672, "grad_norm": 7.849323749542236, "learning_rate": 7.810938462591554e-05, "loss": 0.7713, "step": 16741 }, { "epoch": 1.1343586963886443, "grad_norm": 5.6644415855407715, "learning_rate": 7.810801560681772e-05, "loss": 0.5597, "step": 16742 }, { "epoch": 1.1344264516566163, "grad_norm": 6.990406513214111, "learning_rate": 7.81066465877199e-05, "loss": 0.5516, "step": 16743 }, { "epoch": 1.1344942069245885, "grad_norm": 6.040196418762207, "learning_rate": 7.810527756862208e-05, "loss": 0.7269, "step": 16744 }, { "epoch": 1.1345619621925604, "grad_norm": 6.549674034118652, "learning_rate": 7.810390854952428e-05, "loss": 0.797, "step": 16745 }, { "epoch": 1.1346297174605326, "grad_norm": 5.927361011505127, "learning_rate": 7.810253953042646e-05, "loss": 0.6387, "step": 16746 }, { "epoch": 1.1346974727285046, "grad_norm": 5.803552150726318, "learning_rate": 7.810117051132864e-05, "loss": 0.7569, "step": 16747 }, { "epoch": 1.1347652279964766, "grad_norm": 6.477768898010254, "learning_rate": 7.809980149223082e-05, "loss": 0.8403, "step": 16748 }, { "epoch": 1.1348329832644488, "grad_norm": 6.080210208892822, "learning_rate": 7.8098432473133e-05, "loss": 0.8066, "step": 16749 }, { "epoch": 1.134900738532421, "grad_norm": 7.755611896514893, "learning_rate": 7.809706345403519e-05, "loss": 0.7821, "step": 16750 }, { "epoch": 1.134968493800393, "grad_norm": 5.807285308837891, "learning_rate": 7.809569443493737e-05, "loss": 0.7686, "step": 16751 }, { "epoch": 1.135036249068365, "grad_norm": 4.9453020095825195, "learning_rate": 7.809432541583955e-05, "loss": 0.5902, "step": 16752 }, { "epoch": 1.1351040043363372, "grad_norm": 5.776493072509766, "learning_rate": 7.809295639674173e-05, "loss": 0.568, "step": 16753 }, { "epoch": 1.1351717596043092, "grad_norm": 8.138739585876465, "learning_rate": 7.809158737764393e-05, "loss": 0.9117, "step": 16754 }, { "epoch": 1.1352395148722814, "grad_norm": 4.630499839782715, "learning_rate": 7.809021835854611e-05, "loss": 0.7508, "step": 16755 }, { "epoch": 1.1353072701402533, "grad_norm": 4.743904113769531, "learning_rate": 7.808884933944829e-05, "loss": 0.6519, "step": 16756 }, { "epoch": 1.1353750254082255, "grad_norm": 6.640937328338623, "learning_rate": 7.808748032035047e-05, "loss": 0.6931, "step": 16757 }, { "epoch": 1.1354427806761975, "grad_norm": 6.060274600982666, "learning_rate": 7.808611130125265e-05, "loss": 0.7295, "step": 16758 }, { "epoch": 1.1355105359441697, "grad_norm": 5.2093000411987305, "learning_rate": 7.808474228215484e-05, "loss": 0.629, "step": 16759 }, { "epoch": 1.1355782912121417, "grad_norm": 5.407078266143799, "learning_rate": 7.808337326305702e-05, "loss": 0.6372, "step": 16760 }, { "epoch": 1.135646046480114, "grad_norm": 6.483882904052734, "learning_rate": 7.80820042439592e-05, "loss": 0.6638, "step": 16761 }, { "epoch": 1.135713801748086, "grad_norm": 5.735899925231934, "learning_rate": 7.808063522486138e-05, "loss": 0.9567, "step": 16762 }, { "epoch": 1.1357815570160579, "grad_norm": 6.978238105773926, "learning_rate": 7.807926620576358e-05, "loss": 0.6355, "step": 16763 }, { "epoch": 1.13584931228403, "grad_norm": 6.08992338180542, "learning_rate": 7.807789718666576e-05, "loss": 0.8777, "step": 16764 }, { "epoch": 1.1359170675520023, "grad_norm": 6.668683052062988, "learning_rate": 7.807652816756794e-05, "loss": 0.5351, "step": 16765 }, { "epoch": 1.1359848228199743, "grad_norm": 5.498349666595459, "learning_rate": 7.807515914847012e-05, "loss": 0.7362, "step": 16766 }, { "epoch": 1.1360525780879462, "grad_norm": 5.9772868156433105, "learning_rate": 7.80737901293723e-05, "loss": 0.7146, "step": 16767 }, { "epoch": 1.1361203333559184, "grad_norm": 6.178906440734863, "learning_rate": 7.80724211102745e-05, "loss": 0.6685, "step": 16768 }, { "epoch": 1.1361880886238904, "grad_norm": 5.166858196258545, "learning_rate": 7.807105209117667e-05, "loss": 0.7369, "step": 16769 }, { "epoch": 1.1362558438918626, "grad_norm": 5.280497074127197, "learning_rate": 7.806968307207885e-05, "loss": 0.6309, "step": 16770 }, { "epoch": 1.1363235991598346, "grad_norm": 4.654086112976074, "learning_rate": 7.806831405298103e-05, "loss": 0.6645, "step": 16771 }, { "epoch": 1.1363913544278068, "grad_norm": 5.552098751068115, "learning_rate": 7.806694503388321e-05, "loss": 0.6494, "step": 16772 }, { "epoch": 1.1364591096957788, "grad_norm": 5.359954833984375, "learning_rate": 7.806557601478541e-05, "loss": 0.8581, "step": 16773 }, { "epoch": 1.136526864963751, "grad_norm": 5.112419128417969, "learning_rate": 7.806420699568759e-05, "loss": 0.6707, "step": 16774 }, { "epoch": 1.136594620231723, "grad_norm": 5.130734443664551, "learning_rate": 7.806283797658977e-05, "loss": 0.5441, "step": 16775 }, { "epoch": 1.1366623754996952, "grad_norm": 4.156247615814209, "learning_rate": 7.806146895749196e-05, "loss": 0.6177, "step": 16776 }, { "epoch": 1.1367301307676672, "grad_norm": 7.025331020355225, "learning_rate": 7.806009993839414e-05, "loss": 0.7531, "step": 16777 }, { "epoch": 1.1367978860356394, "grad_norm": 6.272400856018066, "learning_rate": 7.805873091929632e-05, "loss": 0.5415, "step": 16778 }, { "epoch": 1.1368656413036113, "grad_norm": 5.994542121887207, "learning_rate": 7.805736190019852e-05, "loss": 0.5739, "step": 16779 }, { "epoch": 1.1369333965715835, "grad_norm": 7.8840718269348145, "learning_rate": 7.80559928811007e-05, "loss": 0.7447, "step": 16780 }, { "epoch": 1.1370011518395555, "grad_norm": 5.8447651863098145, "learning_rate": 7.805462386200288e-05, "loss": 0.5919, "step": 16781 }, { "epoch": 1.1370689071075275, "grad_norm": 7.363594055175781, "learning_rate": 7.805325484290507e-05, "loss": 0.7014, "step": 16782 }, { "epoch": 1.1371366623754997, "grad_norm": 5.474704265594482, "learning_rate": 7.805188582380725e-05, "loss": 0.6021, "step": 16783 }, { "epoch": 1.1372044176434717, "grad_norm": 6.830411434173584, "learning_rate": 7.805051680470943e-05, "loss": 0.6739, "step": 16784 }, { "epoch": 1.137272172911444, "grad_norm": 5.505611896514893, "learning_rate": 7.804914778561161e-05, "loss": 0.5898, "step": 16785 }, { "epoch": 1.1373399281794159, "grad_norm": 9.52569580078125, "learning_rate": 7.804777876651381e-05, "loss": 0.6024, "step": 16786 }, { "epoch": 1.137407683447388, "grad_norm": 6.93997049331665, "learning_rate": 7.804640974741599e-05, "loss": 0.6355, "step": 16787 }, { "epoch": 1.13747543871536, "grad_norm": 5.036192893981934, "learning_rate": 7.804504072831817e-05, "loss": 0.7398, "step": 16788 }, { "epoch": 1.1375431939833323, "grad_norm": 5.485294818878174, "learning_rate": 7.804367170922035e-05, "loss": 0.7825, "step": 16789 }, { "epoch": 1.1376109492513042, "grad_norm": 5.600368976593018, "learning_rate": 7.804230269012253e-05, "loss": 0.7205, "step": 16790 }, { "epoch": 1.1376787045192764, "grad_norm": 5.522429943084717, "learning_rate": 7.804093367102472e-05, "loss": 0.7872, "step": 16791 }, { "epoch": 1.1377464597872484, "grad_norm": 5.987887382507324, "learning_rate": 7.80395646519269e-05, "loss": 0.6782, "step": 16792 }, { "epoch": 1.1378142150552206, "grad_norm": 5.354010105133057, "learning_rate": 7.803819563282908e-05, "loss": 0.6363, "step": 16793 }, { "epoch": 1.1378819703231926, "grad_norm": 6.39069938659668, "learning_rate": 7.803682661373126e-05, "loss": 0.7089, "step": 16794 }, { "epoch": 1.1379497255911648, "grad_norm": 5.727047443389893, "learning_rate": 7.803545759463346e-05, "loss": 0.7008, "step": 16795 }, { "epoch": 1.1380174808591368, "grad_norm": 7.178287506103516, "learning_rate": 7.803408857553564e-05, "loss": 0.7488, "step": 16796 }, { "epoch": 1.1380852361271088, "grad_norm": 6.201870441436768, "learning_rate": 7.803271955643782e-05, "loss": 0.6254, "step": 16797 }, { "epoch": 1.138152991395081, "grad_norm": 6.881004810333252, "learning_rate": 7.803135053734e-05, "loss": 0.6558, "step": 16798 }, { "epoch": 1.1382207466630532, "grad_norm": 6.2866291999816895, "learning_rate": 7.802998151824218e-05, "loss": 0.6537, "step": 16799 }, { "epoch": 1.1382885019310252, "grad_norm": 5.333223342895508, "learning_rate": 7.802861249914437e-05, "loss": 0.6693, "step": 16800 }, { "epoch": 1.1383562571989971, "grad_norm": 6.103747367858887, "learning_rate": 7.802724348004655e-05, "loss": 0.6822, "step": 16801 }, { "epoch": 1.1384240124669693, "grad_norm": 10.445125579833984, "learning_rate": 7.802587446094873e-05, "loss": 0.7734, "step": 16802 }, { "epoch": 1.1384917677349413, "grad_norm": 5.043206691741943, "learning_rate": 7.802450544185091e-05, "loss": 0.6898, "step": 16803 }, { "epoch": 1.1385595230029135, "grad_norm": 7.022032260894775, "learning_rate": 7.80231364227531e-05, "loss": 0.6542, "step": 16804 }, { "epoch": 1.1386272782708855, "grad_norm": 7.176365852355957, "learning_rate": 7.802176740365529e-05, "loss": 0.7367, "step": 16805 }, { "epoch": 1.1386950335388577, "grad_norm": 4.644754409790039, "learning_rate": 7.802039838455747e-05, "loss": 0.6602, "step": 16806 }, { "epoch": 1.1387627888068297, "grad_norm": 5.330687522888184, "learning_rate": 7.801902936545965e-05, "loss": 0.5775, "step": 16807 }, { "epoch": 1.1388305440748019, "grad_norm": 5.8319501876831055, "learning_rate": 7.801766034636183e-05, "loss": 0.599, "step": 16808 }, { "epoch": 1.1388982993427739, "grad_norm": 6.8199286460876465, "learning_rate": 7.801629132726402e-05, "loss": 0.8001, "step": 16809 }, { "epoch": 1.138966054610746, "grad_norm": 6.324723720550537, "learning_rate": 7.80149223081662e-05, "loss": 0.773, "step": 16810 }, { "epoch": 1.139033809878718, "grad_norm": 8.192002296447754, "learning_rate": 7.801355328906838e-05, "loss": 0.647, "step": 16811 }, { "epoch": 1.13910156514669, "grad_norm": 7.2895050048828125, "learning_rate": 7.801218426997056e-05, "loss": 0.8682, "step": 16812 }, { "epoch": 1.1391693204146622, "grad_norm": 6.973208904266357, "learning_rate": 7.801081525087274e-05, "loss": 0.667, "step": 16813 }, { "epoch": 1.1392370756826344, "grad_norm": 6.551448345184326, "learning_rate": 7.800944623177494e-05, "loss": 0.7502, "step": 16814 }, { "epoch": 1.1393048309506064, "grad_norm": 5.396692752838135, "learning_rate": 7.800807721267712e-05, "loss": 0.9984, "step": 16815 }, { "epoch": 1.1393725862185784, "grad_norm": 4.447607040405273, "learning_rate": 7.80067081935793e-05, "loss": 0.6526, "step": 16816 }, { "epoch": 1.1394403414865506, "grad_norm": 8.116009712219238, "learning_rate": 7.800533917448148e-05, "loss": 0.6774, "step": 16817 }, { "epoch": 1.1395080967545226, "grad_norm": 5.555187225341797, "learning_rate": 7.800397015538367e-05, "loss": 0.7358, "step": 16818 }, { "epoch": 1.1395758520224948, "grad_norm": 7.893671035766602, "learning_rate": 7.800260113628585e-05, "loss": 0.4427, "step": 16819 }, { "epoch": 1.1396436072904668, "grad_norm": 6.3167853355407715, "learning_rate": 7.800123211718803e-05, "loss": 0.7659, "step": 16820 }, { "epoch": 1.139711362558439, "grad_norm": 7.062374591827393, "learning_rate": 7.799986309809021e-05, "loss": 0.8105, "step": 16821 }, { "epoch": 1.139779117826411, "grad_norm": 6.286994457244873, "learning_rate": 7.799849407899241e-05, "loss": 0.6914, "step": 16822 }, { "epoch": 1.1398468730943832, "grad_norm": 5.267313480377197, "learning_rate": 7.799712505989459e-05, "loss": 0.6803, "step": 16823 }, { "epoch": 1.1399146283623551, "grad_norm": 5.873160362243652, "learning_rate": 7.799575604079677e-05, "loss": 0.8037, "step": 16824 }, { "epoch": 1.1399823836303273, "grad_norm": 6.283346176147461, "learning_rate": 7.799438702169896e-05, "loss": 0.6624, "step": 16825 }, { "epoch": 1.1400501388982993, "grad_norm": 6.988799571990967, "learning_rate": 7.799301800260114e-05, "loss": 0.7537, "step": 16826 }, { "epoch": 1.1401178941662715, "grad_norm": 6.114986419677734, "learning_rate": 7.799164898350332e-05, "loss": 0.7707, "step": 16827 }, { "epoch": 1.1401856494342435, "grad_norm": 5.79031229019165, "learning_rate": 7.799027996440552e-05, "loss": 0.5132, "step": 16828 }, { "epoch": 1.1402534047022157, "grad_norm": 6.05492639541626, "learning_rate": 7.79889109453077e-05, "loss": 0.604, "step": 16829 }, { "epoch": 1.1403211599701877, "grad_norm": 5.041617393493652, "learning_rate": 7.798754192620988e-05, "loss": 0.7133, "step": 16830 }, { "epoch": 1.1403889152381597, "grad_norm": 6.444592475891113, "learning_rate": 7.798617290711206e-05, "loss": 0.653, "step": 16831 }, { "epoch": 1.1404566705061319, "grad_norm": 6.009650230407715, "learning_rate": 7.798480388801425e-05, "loss": 0.6469, "step": 16832 }, { "epoch": 1.1405244257741038, "grad_norm": 8.08381462097168, "learning_rate": 7.798343486891643e-05, "loss": 0.7966, "step": 16833 }, { "epoch": 1.140592181042076, "grad_norm": 5.3437676429748535, "learning_rate": 7.798206584981861e-05, "loss": 0.7883, "step": 16834 }, { "epoch": 1.140659936310048, "grad_norm": 5.706287860870361, "learning_rate": 7.798069683072079e-05, "loss": 0.8088, "step": 16835 }, { "epoch": 1.1407276915780202, "grad_norm": 5.616438865661621, "learning_rate": 7.797932781162297e-05, "loss": 0.5782, "step": 16836 }, { "epoch": 1.1407954468459922, "grad_norm": 5.1152777671813965, "learning_rate": 7.797795879252517e-05, "loss": 0.8015, "step": 16837 }, { "epoch": 1.1408632021139644, "grad_norm": 5.392358303070068, "learning_rate": 7.797658977342735e-05, "loss": 0.6961, "step": 16838 }, { "epoch": 1.1409309573819364, "grad_norm": 5.752062797546387, "learning_rate": 7.797522075432953e-05, "loss": 0.7793, "step": 16839 }, { "epoch": 1.1409987126499086, "grad_norm": 6.018402576446533, "learning_rate": 7.797385173523171e-05, "loss": 0.6414, "step": 16840 }, { "epoch": 1.1410664679178806, "grad_norm": 4.536129951477051, "learning_rate": 7.79724827161339e-05, "loss": 0.4998, "step": 16841 }, { "epoch": 1.1411342231858528, "grad_norm": 5.575126647949219, "learning_rate": 7.797111369703608e-05, "loss": 0.6466, "step": 16842 }, { "epoch": 1.1412019784538248, "grad_norm": 8.243531227111816, "learning_rate": 7.796974467793826e-05, "loss": 0.9981, "step": 16843 }, { "epoch": 1.141269733721797, "grad_norm": 6.683679580688477, "learning_rate": 7.796837565884044e-05, "loss": 0.8015, "step": 16844 }, { "epoch": 1.141337488989769, "grad_norm": 7.662330627441406, "learning_rate": 7.796700663974262e-05, "loss": 0.6639, "step": 16845 }, { "epoch": 1.141405244257741, "grad_norm": 4.948780536651611, "learning_rate": 7.796563762064482e-05, "loss": 0.6174, "step": 16846 }, { "epoch": 1.1414729995257131, "grad_norm": 6.792630195617676, "learning_rate": 7.7964268601547e-05, "loss": 0.7958, "step": 16847 }, { "epoch": 1.1415407547936853, "grad_norm": 5.603211879730225, "learning_rate": 7.796289958244918e-05, "loss": 0.8995, "step": 16848 }, { "epoch": 1.1416085100616573, "grad_norm": 7.316423416137695, "learning_rate": 7.796153056335136e-05, "loss": 0.592, "step": 16849 }, { "epoch": 1.1416762653296293, "grad_norm": 5.690333843231201, "learning_rate": 7.796016154425355e-05, "loss": 0.568, "step": 16850 }, { "epoch": 1.1417440205976015, "grad_norm": 5.481085777282715, "learning_rate": 7.795879252515573e-05, "loss": 0.8911, "step": 16851 }, { "epoch": 1.1418117758655735, "grad_norm": 5.878414154052734, "learning_rate": 7.795742350605791e-05, "loss": 0.5673, "step": 16852 }, { "epoch": 1.1418795311335457, "grad_norm": 6.474081993103027, "learning_rate": 7.79560544869601e-05, "loss": 0.6036, "step": 16853 }, { "epoch": 1.1419472864015177, "grad_norm": 5.981825351715088, "learning_rate": 7.795468546786227e-05, "loss": 0.7612, "step": 16854 }, { "epoch": 1.1420150416694899, "grad_norm": 5.728933811187744, "learning_rate": 7.795331644876447e-05, "loss": 0.7179, "step": 16855 }, { "epoch": 1.1420827969374618, "grad_norm": 5.504831790924072, "learning_rate": 7.795194742966665e-05, "loss": 0.9218, "step": 16856 }, { "epoch": 1.142150552205434, "grad_norm": 6.834359169006348, "learning_rate": 7.795057841056883e-05, "loss": 0.7309, "step": 16857 }, { "epoch": 1.142218307473406, "grad_norm": 6.295196056365967, "learning_rate": 7.794920939147101e-05, "loss": 0.8664, "step": 16858 }, { "epoch": 1.1422860627413782, "grad_norm": 6.097269058227539, "learning_rate": 7.794784037237319e-05, "loss": 0.6478, "step": 16859 }, { "epoch": 1.1423538180093502, "grad_norm": 6.314675331115723, "learning_rate": 7.794647135327538e-05, "loss": 0.8395, "step": 16860 }, { "epoch": 1.1424215732773222, "grad_norm": 4.988530158996582, "learning_rate": 7.794510233417756e-05, "loss": 0.5958, "step": 16861 }, { "epoch": 1.1424893285452944, "grad_norm": 5.622345924377441, "learning_rate": 7.794373331507974e-05, "loss": 0.4794, "step": 16862 }, { "epoch": 1.1425570838132666, "grad_norm": 5.737193584442139, "learning_rate": 7.794236429598192e-05, "loss": 0.7704, "step": 16863 }, { "epoch": 1.1426248390812386, "grad_norm": 6.432234764099121, "learning_rate": 7.794099527688412e-05, "loss": 0.5991, "step": 16864 }, { "epoch": 1.1426925943492106, "grad_norm": 6.44136381149292, "learning_rate": 7.79396262577863e-05, "loss": 0.7507, "step": 16865 }, { "epoch": 1.1427603496171828, "grad_norm": 4.948369979858398, "learning_rate": 7.793825723868848e-05, "loss": 0.7673, "step": 16866 }, { "epoch": 1.1428281048851547, "grad_norm": 5.5636162757873535, "learning_rate": 7.793688821959066e-05, "loss": 0.4776, "step": 16867 }, { "epoch": 1.142895860153127, "grad_norm": 7.1905741691589355, "learning_rate": 7.793551920049285e-05, "loss": 0.8011, "step": 16868 }, { "epoch": 1.142963615421099, "grad_norm": 6.490988254547119, "learning_rate": 7.793415018139503e-05, "loss": 0.8343, "step": 16869 }, { "epoch": 1.1430313706890711, "grad_norm": 5.666626453399658, "learning_rate": 7.793278116229721e-05, "loss": 0.6194, "step": 16870 }, { "epoch": 1.143099125957043, "grad_norm": 7.139801025390625, "learning_rate": 7.793141214319941e-05, "loss": 0.6305, "step": 16871 }, { "epoch": 1.1431668812250153, "grad_norm": 5.280755043029785, "learning_rate": 7.793004312410159e-05, "loss": 0.69, "step": 16872 }, { "epoch": 1.1432346364929873, "grad_norm": 6.890513896942139, "learning_rate": 7.792867410500377e-05, "loss": 0.9433, "step": 16873 }, { "epoch": 1.1433023917609595, "grad_norm": 7.743908882141113, "learning_rate": 7.792730508590596e-05, "loss": 0.6906, "step": 16874 }, { "epoch": 1.1433701470289315, "grad_norm": 9.331748962402344, "learning_rate": 7.792593606680814e-05, "loss": 0.581, "step": 16875 }, { "epoch": 1.1434379022969037, "grad_norm": 5.105838775634766, "learning_rate": 7.792456704771032e-05, "loss": 0.6817, "step": 16876 }, { "epoch": 1.1435056575648757, "grad_norm": 7.092813014984131, "learning_rate": 7.79231980286125e-05, "loss": 0.5524, "step": 16877 }, { "epoch": 1.1435734128328479, "grad_norm": 5.3407673835754395, "learning_rate": 7.79218290095147e-05, "loss": 0.6377, "step": 16878 }, { "epoch": 1.1436411681008198, "grad_norm": 8.154608726501465, "learning_rate": 7.792045999041688e-05, "loss": 0.7605, "step": 16879 }, { "epoch": 1.1437089233687918, "grad_norm": 6.083822250366211, "learning_rate": 7.791909097131906e-05, "loss": 0.6825, "step": 16880 }, { "epoch": 1.143776678636764, "grad_norm": 5.946974754333496, "learning_rate": 7.791772195222124e-05, "loss": 0.6463, "step": 16881 }, { "epoch": 1.143844433904736, "grad_norm": 4.58415412902832, "learning_rate": 7.791635293312342e-05, "loss": 0.5366, "step": 16882 }, { "epoch": 1.1439121891727082, "grad_norm": 5.008115768432617, "learning_rate": 7.791498391402561e-05, "loss": 0.6225, "step": 16883 }, { "epoch": 1.1439799444406802, "grad_norm": 6.249969005584717, "learning_rate": 7.791361489492779e-05, "loss": 0.6601, "step": 16884 }, { "epoch": 1.1440476997086524, "grad_norm": 5.142440319061279, "learning_rate": 7.791224587582997e-05, "loss": 0.5248, "step": 16885 }, { "epoch": 1.1441154549766244, "grad_norm": 4.58303165435791, "learning_rate": 7.791087685673215e-05, "loss": 0.6253, "step": 16886 }, { "epoch": 1.1441832102445966, "grad_norm": 5.830782413482666, "learning_rate": 7.790950783763435e-05, "loss": 0.7958, "step": 16887 }, { "epoch": 1.1442509655125686, "grad_norm": 6.463974952697754, "learning_rate": 7.790813881853653e-05, "loss": 0.7663, "step": 16888 }, { "epoch": 1.1443187207805408, "grad_norm": 7.233205318450928, "learning_rate": 7.790676979943871e-05, "loss": 0.7953, "step": 16889 }, { "epoch": 1.1443864760485127, "grad_norm": 5.574187278747559, "learning_rate": 7.790540078034089e-05, "loss": 0.5744, "step": 16890 }, { "epoch": 1.144454231316485, "grad_norm": 6.045961856842041, "learning_rate": 7.790403176124307e-05, "loss": 0.6247, "step": 16891 }, { "epoch": 1.144521986584457, "grad_norm": 5.329028606414795, "learning_rate": 7.790266274214526e-05, "loss": 0.6026, "step": 16892 }, { "epoch": 1.1445897418524291, "grad_norm": 6.763497829437256, "learning_rate": 7.790129372304744e-05, "loss": 0.6519, "step": 16893 }, { "epoch": 1.144657497120401, "grad_norm": 5.743830680847168, "learning_rate": 7.789992470394962e-05, "loss": 0.6935, "step": 16894 }, { "epoch": 1.144725252388373, "grad_norm": 4.693161487579346, "learning_rate": 7.78985556848518e-05, "loss": 0.5889, "step": 16895 }, { "epoch": 1.1447930076563453, "grad_norm": 6.895607948303223, "learning_rate": 7.7897186665754e-05, "loss": 0.561, "step": 16896 }, { "epoch": 1.1448607629243175, "grad_norm": 5.308940410614014, "learning_rate": 7.789581764665618e-05, "loss": 0.8614, "step": 16897 }, { "epoch": 1.1449285181922895, "grad_norm": 6.690733909606934, "learning_rate": 7.789444862755836e-05, "loss": 0.5136, "step": 16898 }, { "epoch": 1.1449962734602614, "grad_norm": 4.7946624755859375, "learning_rate": 7.789307960846054e-05, "loss": 0.5016, "step": 16899 }, { "epoch": 1.1450640287282337, "grad_norm": 5.833794593811035, "learning_rate": 7.789171058936272e-05, "loss": 0.5225, "step": 16900 }, { "epoch": 1.1451317839962056, "grad_norm": 6.518613338470459, "learning_rate": 7.789034157026491e-05, "loss": 0.7277, "step": 16901 }, { "epoch": 1.1451995392641778, "grad_norm": 5.891839981079102, "learning_rate": 7.788897255116709e-05, "loss": 0.5277, "step": 16902 }, { "epoch": 1.1452672945321498, "grad_norm": 7.279749393463135, "learning_rate": 7.788760353206927e-05, "loss": 0.6505, "step": 16903 }, { "epoch": 1.145335049800122, "grad_norm": 5.246143341064453, "learning_rate": 7.788623451297145e-05, "loss": 0.6035, "step": 16904 }, { "epoch": 1.145402805068094, "grad_norm": 6.899950981140137, "learning_rate": 7.788486549387363e-05, "loss": 0.6674, "step": 16905 }, { "epoch": 1.1454705603360662, "grad_norm": 5.364004611968994, "learning_rate": 7.788349647477583e-05, "loss": 0.6255, "step": 16906 }, { "epoch": 1.1455383156040382, "grad_norm": 6.842263698577881, "learning_rate": 7.788212745567801e-05, "loss": 0.6159, "step": 16907 }, { "epoch": 1.1456060708720104, "grad_norm": 6.661463260650635, "learning_rate": 7.788075843658019e-05, "loss": 0.6893, "step": 16908 }, { "epoch": 1.1456738261399824, "grad_norm": 7.597821235656738, "learning_rate": 7.787938941748237e-05, "loss": 0.666, "step": 16909 }, { "epoch": 1.1457415814079543, "grad_norm": 6.1869354248046875, "learning_rate": 7.787802039838456e-05, "loss": 0.5579, "step": 16910 }, { "epoch": 1.1458093366759265, "grad_norm": 5.620526313781738, "learning_rate": 7.787665137928674e-05, "loss": 0.6303, "step": 16911 }, { "epoch": 1.1458770919438988, "grad_norm": 6.049895286560059, "learning_rate": 7.787528236018892e-05, "loss": 0.5657, "step": 16912 }, { "epoch": 1.1459448472118707, "grad_norm": 5.7843756675720215, "learning_rate": 7.78739133410911e-05, "loss": 0.6183, "step": 16913 }, { "epoch": 1.1460126024798427, "grad_norm": 7.550663471221924, "learning_rate": 7.787254432199328e-05, "loss": 0.8373, "step": 16914 }, { "epoch": 1.146080357747815, "grad_norm": 6.411697864532471, "learning_rate": 7.787117530289548e-05, "loss": 0.7271, "step": 16915 }, { "epoch": 1.146148113015787, "grad_norm": 5.198284149169922, "learning_rate": 7.786980628379766e-05, "loss": 0.7704, "step": 16916 }, { "epoch": 1.146215868283759, "grad_norm": 7.581902980804443, "learning_rate": 7.786843726469984e-05, "loss": 0.8091, "step": 16917 }, { "epoch": 1.146283623551731, "grad_norm": 6.926025390625, "learning_rate": 7.786706824560203e-05, "loss": 0.5879, "step": 16918 }, { "epoch": 1.1463513788197033, "grad_norm": 6.271979808807373, "learning_rate": 7.786569922650421e-05, "loss": 0.8076, "step": 16919 }, { "epoch": 1.1464191340876753, "grad_norm": 5.187648296356201, "learning_rate": 7.786433020740639e-05, "loss": 0.6618, "step": 16920 }, { "epoch": 1.1464868893556475, "grad_norm": 5.481020450592041, "learning_rate": 7.786296118830859e-05, "loss": 0.6813, "step": 16921 }, { "epoch": 1.1465546446236194, "grad_norm": 5.956170082092285, "learning_rate": 7.786159216921077e-05, "loss": 0.6529, "step": 16922 }, { "epoch": 1.1466223998915916, "grad_norm": 6.283894062042236, "learning_rate": 7.786022315011295e-05, "loss": 0.6642, "step": 16923 }, { "epoch": 1.1466901551595636, "grad_norm": 6.540280342102051, "learning_rate": 7.785885413101514e-05, "loss": 0.9651, "step": 16924 }, { "epoch": 1.1467579104275358, "grad_norm": 5.8760528564453125, "learning_rate": 7.785748511191732e-05, "loss": 0.5935, "step": 16925 }, { "epoch": 1.1468256656955078, "grad_norm": 6.100462436676025, "learning_rate": 7.78561160928195e-05, "loss": 0.6853, "step": 16926 }, { "epoch": 1.14689342096348, "grad_norm": 8.684874534606934, "learning_rate": 7.785474707372168e-05, "loss": 0.8358, "step": 16927 }, { "epoch": 1.146961176231452, "grad_norm": 9.009882926940918, "learning_rate": 7.785337805462388e-05, "loss": 0.6265, "step": 16928 }, { "epoch": 1.147028931499424, "grad_norm": 5.696036338806152, "learning_rate": 7.785200903552606e-05, "loss": 0.6748, "step": 16929 }, { "epoch": 1.1470966867673962, "grad_norm": 5.4167866706848145, "learning_rate": 7.785064001642824e-05, "loss": 0.7013, "step": 16930 }, { "epoch": 1.1471644420353682, "grad_norm": 9.469526290893555, "learning_rate": 7.784927099733042e-05, "loss": 0.9617, "step": 16931 }, { "epoch": 1.1472321973033404, "grad_norm": 7.8615593910217285, "learning_rate": 7.78479019782326e-05, "loss": 0.6682, "step": 16932 }, { "epoch": 1.1472999525713123, "grad_norm": 5.507148742675781, "learning_rate": 7.784653295913479e-05, "loss": 0.5723, "step": 16933 }, { "epoch": 1.1473677078392845, "grad_norm": 7.456667423248291, "learning_rate": 7.784516394003697e-05, "loss": 0.8836, "step": 16934 }, { "epoch": 1.1474354631072565, "grad_norm": 6.641378402709961, "learning_rate": 7.784379492093915e-05, "loss": 0.8819, "step": 16935 }, { "epoch": 1.1475032183752287, "grad_norm": 7.157406330108643, "learning_rate": 7.784242590184133e-05, "loss": 0.6635, "step": 16936 }, { "epoch": 1.1475709736432007, "grad_norm": 5.309220314025879, "learning_rate": 7.784105688274351e-05, "loss": 0.5876, "step": 16937 }, { "epoch": 1.147638728911173, "grad_norm": 6.855804920196533, "learning_rate": 7.783968786364571e-05, "loss": 0.7916, "step": 16938 }, { "epoch": 1.147706484179145, "grad_norm": 6.300415992736816, "learning_rate": 7.783831884454789e-05, "loss": 0.7407, "step": 16939 }, { "epoch": 1.147774239447117, "grad_norm": 5.024459362030029, "learning_rate": 7.783694982545007e-05, "loss": 0.669, "step": 16940 }, { "epoch": 1.147841994715089, "grad_norm": 5.556980609893799, "learning_rate": 7.783558080635225e-05, "loss": 0.6507, "step": 16941 }, { "epoch": 1.1479097499830613, "grad_norm": 6.7080535888671875, "learning_rate": 7.783421178725444e-05, "loss": 0.8388, "step": 16942 }, { "epoch": 1.1479775052510333, "grad_norm": 5.859105110168457, "learning_rate": 7.783284276815662e-05, "loss": 0.7661, "step": 16943 }, { "epoch": 1.1480452605190052, "grad_norm": 5.6181159019470215, "learning_rate": 7.78314737490588e-05, "loss": 0.7629, "step": 16944 }, { "epoch": 1.1481130157869774, "grad_norm": 5.8782854080200195, "learning_rate": 7.783010472996098e-05, "loss": 0.7253, "step": 16945 }, { "epoch": 1.1481807710549496, "grad_norm": 4.898889064788818, "learning_rate": 7.782873571086316e-05, "loss": 0.6607, "step": 16946 }, { "epoch": 1.1482485263229216, "grad_norm": 6.8231329917907715, "learning_rate": 7.782736669176536e-05, "loss": 0.6647, "step": 16947 }, { "epoch": 1.1483162815908936, "grad_norm": 4.969480514526367, "learning_rate": 7.782599767266754e-05, "loss": 0.5449, "step": 16948 }, { "epoch": 1.1483840368588658, "grad_norm": 7.149383544921875, "learning_rate": 7.782462865356972e-05, "loss": 0.6182, "step": 16949 }, { "epoch": 1.1484517921268378, "grad_norm": 6.851714134216309, "learning_rate": 7.78232596344719e-05, "loss": 0.7021, "step": 16950 }, { "epoch": 1.14851954739481, "grad_norm": 6.9529924392700195, "learning_rate": 7.782189061537409e-05, "loss": 0.7937, "step": 16951 }, { "epoch": 1.148587302662782, "grad_norm": 5.88994026184082, "learning_rate": 7.782052159627627e-05, "loss": 0.9483, "step": 16952 }, { "epoch": 1.1486550579307542, "grad_norm": 5.6566643714904785, "learning_rate": 7.781915257717845e-05, "loss": 0.5805, "step": 16953 }, { "epoch": 1.1487228131987262, "grad_norm": 6.150807857513428, "learning_rate": 7.781778355808063e-05, "loss": 0.7529, "step": 16954 }, { "epoch": 1.1487905684666984, "grad_norm": 5.5446038246154785, "learning_rate": 7.781641453898281e-05, "loss": 0.6351, "step": 16955 }, { "epoch": 1.1488583237346703, "grad_norm": 5.691783428192139, "learning_rate": 7.781504551988501e-05, "loss": 0.5886, "step": 16956 }, { "epoch": 1.1489260790026425, "grad_norm": 5.392401218414307, "learning_rate": 7.781367650078719e-05, "loss": 0.5915, "step": 16957 }, { "epoch": 1.1489938342706145, "grad_norm": 7.51052713394165, "learning_rate": 7.781230748168937e-05, "loss": 0.713, "step": 16958 }, { "epoch": 1.1490615895385865, "grad_norm": 6.7278523445129395, "learning_rate": 7.781093846259155e-05, "loss": 0.6796, "step": 16959 }, { "epoch": 1.1491293448065587, "grad_norm": 8.327152252197266, "learning_rate": 7.780956944349373e-05, "loss": 0.6551, "step": 16960 }, { "epoch": 1.149197100074531, "grad_norm": 7.18127965927124, "learning_rate": 7.780820042439592e-05, "loss": 0.6148, "step": 16961 }, { "epoch": 1.149264855342503, "grad_norm": 4.836153030395508, "learning_rate": 7.78068314052981e-05, "loss": 0.5284, "step": 16962 }, { "epoch": 1.1493326106104749, "grad_norm": 5.184370994567871, "learning_rate": 7.780546238620028e-05, "loss": 0.5887, "step": 16963 }, { "epoch": 1.149400365878447, "grad_norm": 7.310936450958252, "learning_rate": 7.780409336710248e-05, "loss": 0.6862, "step": 16964 }, { "epoch": 1.149468121146419, "grad_norm": 5.159470081329346, "learning_rate": 7.780272434800466e-05, "loss": 0.6848, "step": 16965 }, { "epoch": 1.1495358764143913, "grad_norm": 5.7770161628723145, "learning_rate": 7.780135532890684e-05, "loss": 0.9589, "step": 16966 }, { "epoch": 1.1496036316823632, "grad_norm": 6.089867115020752, "learning_rate": 7.779998630980903e-05, "loss": 0.6904, "step": 16967 }, { "epoch": 1.1496713869503354, "grad_norm": 4.882379531860352, "learning_rate": 7.779861729071121e-05, "loss": 0.5668, "step": 16968 }, { "epoch": 1.1497391422183074, "grad_norm": 6.222308158874512, "learning_rate": 7.779724827161339e-05, "loss": 0.8463, "step": 16969 }, { "epoch": 1.1498068974862796, "grad_norm": 7.721385478973389, "learning_rate": 7.779587925251559e-05, "loss": 0.869, "step": 16970 }, { "epoch": 1.1498746527542516, "grad_norm": 4.819281101226807, "learning_rate": 7.779451023341777e-05, "loss": 0.5424, "step": 16971 }, { "epoch": 1.1499424080222238, "grad_norm": 5.347133636474609, "learning_rate": 7.779314121431995e-05, "loss": 0.5816, "step": 16972 }, { "epoch": 1.1500101632901958, "grad_norm": 6.357661724090576, "learning_rate": 7.779177219522213e-05, "loss": 0.7384, "step": 16973 }, { "epoch": 1.150077918558168, "grad_norm": 6.177036762237549, "learning_rate": 7.779040317612432e-05, "loss": 0.6792, "step": 16974 }, { "epoch": 1.15014567382614, "grad_norm": 5.559142589569092, "learning_rate": 7.77890341570265e-05, "loss": 0.6939, "step": 16975 }, { "epoch": 1.1502134290941122, "grad_norm": 4.8017473220825195, "learning_rate": 7.778766513792868e-05, "loss": 0.7028, "step": 16976 }, { "epoch": 1.1502811843620842, "grad_norm": 5.330672264099121, "learning_rate": 7.778629611883086e-05, "loss": 0.7333, "step": 16977 }, { "epoch": 1.1503489396300561, "grad_norm": 7.376459121704102, "learning_rate": 7.778492709973304e-05, "loss": 0.6522, "step": 16978 }, { "epoch": 1.1504166948980283, "grad_norm": 4.467447280883789, "learning_rate": 7.778355808063524e-05, "loss": 0.5219, "step": 16979 }, { "epoch": 1.1504844501660003, "grad_norm": 5.761083126068115, "learning_rate": 7.778218906153742e-05, "loss": 0.6823, "step": 16980 }, { "epoch": 1.1505522054339725, "grad_norm": 7.13974666595459, "learning_rate": 7.77808200424396e-05, "loss": 0.6415, "step": 16981 }, { "epoch": 1.1506199607019445, "grad_norm": 5.323427200317383, "learning_rate": 7.777945102334178e-05, "loss": 0.7416, "step": 16982 }, { "epoch": 1.1506877159699167, "grad_norm": 8.04574203491211, "learning_rate": 7.777808200424397e-05, "loss": 0.7871, "step": 16983 }, { "epoch": 1.1507554712378887, "grad_norm": 7.360369682312012, "learning_rate": 7.777671298514615e-05, "loss": 0.7206, "step": 16984 }, { "epoch": 1.1508232265058609, "grad_norm": 6.681160926818848, "learning_rate": 7.777534396604833e-05, "loss": 0.7463, "step": 16985 }, { "epoch": 1.1508909817738329, "grad_norm": 7.055415630340576, "learning_rate": 7.777397494695051e-05, "loss": 0.8558, "step": 16986 }, { "epoch": 1.150958737041805, "grad_norm": 6.905728340148926, "learning_rate": 7.777260592785269e-05, "loss": 0.5685, "step": 16987 }, { "epoch": 1.151026492309777, "grad_norm": 5.242982387542725, "learning_rate": 7.777123690875489e-05, "loss": 0.4737, "step": 16988 }, { "epoch": 1.1510942475777493, "grad_norm": 4.060085773468018, "learning_rate": 7.776986788965707e-05, "loss": 0.6109, "step": 16989 }, { "epoch": 1.1511620028457212, "grad_norm": 6.4642229080200195, "learning_rate": 7.776849887055925e-05, "loss": 0.9386, "step": 16990 }, { "epoch": 1.1512297581136934, "grad_norm": 4.4571123123168945, "learning_rate": 7.776712985146143e-05, "loss": 0.6162, "step": 16991 }, { "epoch": 1.1512975133816654, "grad_norm": 4.490910530090332, "learning_rate": 7.776576083236361e-05, "loss": 0.6082, "step": 16992 }, { "epoch": 1.1513652686496374, "grad_norm": 7.802062034606934, "learning_rate": 7.77643918132658e-05, "loss": 0.7308, "step": 16993 }, { "epoch": 1.1514330239176096, "grad_norm": 4.726348876953125, "learning_rate": 7.776302279416798e-05, "loss": 0.7596, "step": 16994 }, { "epoch": 1.1515007791855818, "grad_norm": 7.226054668426514, "learning_rate": 7.776165377507016e-05, "loss": 0.8948, "step": 16995 }, { "epoch": 1.1515685344535538, "grad_norm": 4.567721843719482, "learning_rate": 7.776028475597234e-05, "loss": 0.5063, "step": 16996 }, { "epoch": 1.1516362897215258, "grad_norm": 8.410115242004395, "learning_rate": 7.775891573687454e-05, "loss": 0.6893, "step": 16997 }, { "epoch": 1.151704044989498, "grad_norm": 5.118034839630127, "learning_rate": 7.775754671777672e-05, "loss": 0.6749, "step": 16998 }, { "epoch": 1.15177180025747, "grad_norm": 6.0118184089660645, "learning_rate": 7.77561776986789e-05, "loss": 0.6228, "step": 16999 }, { "epoch": 1.1518395555254421, "grad_norm": 4.889934062957764, "learning_rate": 7.775480867958108e-05, "loss": 0.7567, "step": 17000 }, { "epoch": 1.1519073107934141, "grad_norm": 4.716844081878662, "learning_rate": 7.775343966048326e-05, "loss": 0.6083, "step": 17001 }, { "epoch": 1.1519750660613863, "grad_norm": 6.07877254486084, "learning_rate": 7.775207064138545e-05, "loss": 0.7686, "step": 17002 }, { "epoch": 1.1520428213293583, "grad_norm": 6.810544013977051, "learning_rate": 7.775070162228763e-05, "loss": 0.5769, "step": 17003 }, { "epoch": 1.1521105765973305, "grad_norm": 6.12678337097168, "learning_rate": 7.774933260318981e-05, "loss": 0.5156, "step": 17004 }, { "epoch": 1.1521783318653025, "grad_norm": 5.999094009399414, "learning_rate": 7.774796358409199e-05, "loss": 0.5038, "step": 17005 }, { "epoch": 1.1522460871332747, "grad_norm": 5.4058332443237305, "learning_rate": 7.774659456499419e-05, "loss": 0.7177, "step": 17006 }, { "epoch": 1.1523138424012467, "grad_norm": 4.426954746246338, "learning_rate": 7.774522554589637e-05, "loss": 0.4639, "step": 17007 }, { "epoch": 1.1523815976692187, "grad_norm": 5.5401530265808105, "learning_rate": 7.774385652679855e-05, "loss": 0.818, "step": 17008 }, { "epoch": 1.1524493529371909, "grad_norm": 5.004909038543701, "learning_rate": 7.774248750770073e-05, "loss": 0.8341, "step": 17009 }, { "epoch": 1.152517108205163, "grad_norm": 8.242889404296875, "learning_rate": 7.774111848860292e-05, "loss": 0.8211, "step": 17010 }, { "epoch": 1.152584863473135, "grad_norm": 5.827528476715088, "learning_rate": 7.77397494695051e-05, "loss": 0.6921, "step": 17011 }, { "epoch": 1.152652618741107, "grad_norm": 8.284795761108398, "learning_rate": 7.773838045040728e-05, "loss": 0.7018, "step": 17012 }, { "epoch": 1.1527203740090792, "grad_norm": 8.282230377197266, "learning_rate": 7.773701143130948e-05, "loss": 0.7349, "step": 17013 }, { "epoch": 1.1527881292770512, "grad_norm": 6.315917015075684, "learning_rate": 7.773564241221166e-05, "loss": 0.7272, "step": 17014 }, { "epoch": 1.1528558845450234, "grad_norm": 5.585415363311768, "learning_rate": 7.773427339311384e-05, "loss": 0.567, "step": 17015 }, { "epoch": 1.1529236398129954, "grad_norm": 4.947295665740967, "learning_rate": 7.773290437401603e-05, "loss": 0.5206, "step": 17016 }, { "epoch": 1.1529913950809676, "grad_norm": 5.741033554077148, "learning_rate": 7.773153535491821e-05, "loss": 0.651, "step": 17017 }, { "epoch": 1.1530591503489396, "grad_norm": 5.564809322357178, "learning_rate": 7.773016633582039e-05, "loss": 0.7332, "step": 17018 }, { "epoch": 1.1531269056169118, "grad_norm": 6.336185932159424, "learning_rate": 7.772879731672257e-05, "loss": 0.8473, "step": 17019 }, { "epoch": 1.1531946608848838, "grad_norm": 10.212484359741211, "learning_rate": 7.772742829762477e-05, "loss": 0.598, "step": 17020 }, { "epoch": 1.153262416152856, "grad_norm": 5.718522548675537, "learning_rate": 7.772605927852695e-05, "loss": 0.7474, "step": 17021 }, { "epoch": 1.153330171420828, "grad_norm": 5.485384464263916, "learning_rate": 7.772469025942913e-05, "loss": 0.8378, "step": 17022 }, { "epoch": 1.1533979266888001, "grad_norm": 4.341707229614258, "learning_rate": 7.77233212403313e-05, "loss": 0.6496, "step": 17023 }, { "epoch": 1.1534656819567721, "grad_norm": 5.866021633148193, "learning_rate": 7.772195222123349e-05, "loss": 0.611, "step": 17024 }, { "epoch": 1.1535334372247443, "grad_norm": 5.35130500793457, "learning_rate": 7.772058320213568e-05, "loss": 0.7405, "step": 17025 }, { "epoch": 1.1536011924927163, "grad_norm": 5.8092780113220215, "learning_rate": 7.771921418303786e-05, "loss": 0.7732, "step": 17026 }, { "epoch": 1.1536689477606883, "grad_norm": 4.408275604248047, "learning_rate": 7.771784516394004e-05, "loss": 0.7354, "step": 17027 }, { "epoch": 1.1537367030286605, "grad_norm": 5.520874500274658, "learning_rate": 7.771647614484222e-05, "loss": 0.57, "step": 17028 }, { "epoch": 1.1538044582966325, "grad_norm": 6.835943698883057, "learning_rate": 7.771510712574442e-05, "loss": 0.8913, "step": 17029 }, { "epoch": 1.1538722135646047, "grad_norm": 6.106658458709717, "learning_rate": 7.77137381066466e-05, "loss": 0.7279, "step": 17030 }, { "epoch": 1.1539399688325767, "grad_norm": 4.718100547790527, "learning_rate": 7.771236908754878e-05, "loss": 0.5556, "step": 17031 }, { "epoch": 1.1540077241005489, "grad_norm": 6.6284871101379395, "learning_rate": 7.771100006845096e-05, "loss": 0.8279, "step": 17032 }, { "epoch": 1.1540754793685208, "grad_norm": 6.730345726013184, "learning_rate": 7.770963104935314e-05, "loss": 0.5271, "step": 17033 }, { "epoch": 1.154143234636493, "grad_norm": 8.18601131439209, "learning_rate": 7.770826203025533e-05, "loss": 0.7659, "step": 17034 }, { "epoch": 1.154210989904465, "grad_norm": 5.387197017669678, "learning_rate": 7.770689301115751e-05, "loss": 0.5248, "step": 17035 }, { "epoch": 1.1542787451724372, "grad_norm": 7.745261192321777, "learning_rate": 7.770552399205969e-05, "loss": 0.9322, "step": 17036 }, { "epoch": 1.1543465004404092, "grad_norm": 5.017677307128906, "learning_rate": 7.770415497296187e-05, "loss": 0.6844, "step": 17037 }, { "epoch": 1.1544142557083814, "grad_norm": 5.272484302520752, "learning_rate": 7.770278595386407e-05, "loss": 0.9975, "step": 17038 }, { "epoch": 1.1544820109763534, "grad_norm": 6.785526752471924, "learning_rate": 7.770141693476625e-05, "loss": 0.8657, "step": 17039 }, { "epoch": 1.1545497662443256, "grad_norm": 6.357969284057617, "learning_rate": 7.770004791566843e-05, "loss": 0.9176, "step": 17040 }, { "epoch": 1.1546175215122976, "grad_norm": 5.590163707733154, "learning_rate": 7.769867889657061e-05, "loss": 0.6711, "step": 17041 }, { "epoch": 1.1546852767802696, "grad_norm": 6.2367377281188965, "learning_rate": 7.769730987747279e-05, "loss": 0.767, "step": 17042 }, { "epoch": 1.1547530320482418, "grad_norm": 5.25771951675415, "learning_rate": 7.769594085837498e-05, "loss": 0.7303, "step": 17043 }, { "epoch": 1.154820787316214, "grad_norm": 5.044614791870117, "learning_rate": 7.769457183927716e-05, "loss": 0.5922, "step": 17044 }, { "epoch": 1.154888542584186, "grad_norm": 7.982863426208496, "learning_rate": 7.769320282017934e-05, "loss": 1.0373, "step": 17045 }, { "epoch": 1.154956297852158, "grad_norm": 5.957779884338379, "learning_rate": 7.769183380108152e-05, "loss": 0.6403, "step": 17046 }, { "epoch": 1.1550240531201301, "grad_norm": 5.288189888000488, "learning_rate": 7.76904647819837e-05, "loss": 0.6481, "step": 17047 }, { "epoch": 1.155091808388102, "grad_norm": 6.193086624145508, "learning_rate": 7.76890957628859e-05, "loss": 0.9038, "step": 17048 }, { "epoch": 1.1551595636560743, "grad_norm": 8.397180557250977, "learning_rate": 7.768772674378808e-05, "loss": 0.689, "step": 17049 }, { "epoch": 1.1552273189240463, "grad_norm": 5.024189472198486, "learning_rate": 7.768635772469026e-05, "loss": 0.7721, "step": 17050 }, { "epoch": 1.1552950741920185, "grad_norm": 5.071702480316162, "learning_rate": 7.768498870559244e-05, "loss": 0.9724, "step": 17051 }, { "epoch": 1.1553628294599905, "grad_norm": 6.766203880310059, "learning_rate": 7.768361968649463e-05, "loss": 0.8014, "step": 17052 }, { "epoch": 1.1554305847279627, "grad_norm": 7.111300945281982, "learning_rate": 7.768225066739681e-05, "loss": 0.6352, "step": 17053 }, { "epoch": 1.1554983399959347, "grad_norm": 7.011511325836182, "learning_rate": 7.768088164829899e-05, "loss": 0.8808, "step": 17054 }, { "epoch": 1.1555660952639069, "grad_norm": 5.766463756561279, "learning_rate": 7.767951262920117e-05, "loss": 0.7257, "step": 17055 }, { "epoch": 1.1556338505318788, "grad_norm": 5.269811153411865, "learning_rate": 7.767814361010337e-05, "loss": 0.557, "step": 17056 }, { "epoch": 1.1557016057998508, "grad_norm": 5.217252254486084, "learning_rate": 7.767677459100555e-05, "loss": 0.6047, "step": 17057 }, { "epoch": 1.155769361067823, "grad_norm": 5.220236778259277, "learning_rate": 7.767540557190773e-05, "loss": 0.6139, "step": 17058 }, { "epoch": 1.1558371163357952, "grad_norm": 5.2868499755859375, "learning_rate": 7.767403655280992e-05, "loss": 0.5907, "step": 17059 }, { "epoch": 1.1559048716037672, "grad_norm": 5.082064151763916, "learning_rate": 7.76726675337121e-05, "loss": 0.6459, "step": 17060 }, { "epoch": 1.1559726268717392, "grad_norm": 5.143309116363525, "learning_rate": 7.767129851461428e-05, "loss": 0.7064, "step": 17061 }, { "epoch": 1.1560403821397114, "grad_norm": 5.849390029907227, "learning_rate": 7.766992949551648e-05, "loss": 0.632, "step": 17062 }, { "epoch": 1.1561081374076834, "grad_norm": 5.502965927124023, "learning_rate": 7.766856047641866e-05, "loss": 0.5645, "step": 17063 }, { "epoch": 1.1561758926756556, "grad_norm": 7.527248382568359, "learning_rate": 7.766719145732084e-05, "loss": 0.707, "step": 17064 }, { "epoch": 1.1562436479436276, "grad_norm": 11.085021018981934, "learning_rate": 7.766582243822302e-05, "loss": 0.5026, "step": 17065 }, { "epoch": 1.1563114032115998, "grad_norm": 4.332429885864258, "learning_rate": 7.766445341912521e-05, "loss": 0.4451, "step": 17066 }, { "epoch": 1.1563791584795717, "grad_norm": 6.877386569976807, "learning_rate": 7.766308440002739e-05, "loss": 0.6717, "step": 17067 }, { "epoch": 1.156446913747544, "grad_norm": 8.853668212890625, "learning_rate": 7.766171538092957e-05, "loss": 0.7166, "step": 17068 }, { "epoch": 1.156514669015516, "grad_norm": 6.745375156402588, "learning_rate": 7.766034636183175e-05, "loss": 0.7458, "step": 17069 }, { "epoch": 1.1565824242834881, "grad_norm": 4.851019859313965, "learning_rate": 7.765897734273393e-05, "loss": 0.5425, "step": 17070 }, { "epoch": 1.15665017955146, "grad_norm": 8.129114151000977, "learning_rate": 7.765760832363613e-05, "loss": 0.7745, "step": 17071 }, { "epoch": 1.1567179348194323, "grad_norm": 7.2463603019714355, "learning_rate": 7.76562393045383e-05, "loss": 0.7822, "step": 17072 }, { "epoch": 1.1567856900874043, "grad_norm": 6.648437023162842, "learning_rate": 7.765487028544049e-05, "loss": 0.7371, "step": 17073 }, { "epoch": 1.1568534453553765, "grad_norm": 4.990374565124512, "learning_rate": 7.765350126634267e-05, "loss": 0.6751, "step": 17074 }, { "epoch": 1.1569212006233485, "grad_norm": 11.30855655670166, "learning_rate": 7.765213224724486e-05, "loss": 0.5861, "step": 17075 }, { "epoch": 1.1569889558913204, "grad_norm": 6.271945953369141, "learning_rate": 7.765076322814704e-05, "loss": 0.6633, "step": 17076 }, { "epoch": 1.1570567111592927, "grad_norm": 6.8997392654418945, "learning_rate": 7.764939420904922e-05, "loss": 0.6447, "step": 17077 }, { "epoch": 1.1571244664272646, "grad_norm": 7.4019389152526855, "learning_rate": 7.76480251899514e-05, "loss": 0.7981, "step": 17078 }, { "epoch": 1.1571922216952368, "grad_norm": 8.452839851379395, "learning_rate": 7.764665617085358e-05, "loss": 0.9539, "step": 17079 }, { "epoch": 1.1572599769632088, "grad_norm": 5.080465793609619, "learning_rate": 7.764528715175578e-05, "loss": 0.6904, "step": 17080 }, { "epoch": 1.157327732231181, "grad_norm": 5.674568176269531, "learning_rate": 7.764391813265796e-05, "loss": 0.5814, "step": 17081 }, { "epoch": 1.157395487499153, "grad_norm": 5.936946392059326, "learning_rate": 7.764254911356014e-05, "loss": 0.7214, "step": 17082 }, { "epoch": 1.1574632427671252, "grad_norm": 5.97898530960083, "learning_rate": 7.764118009446232e-05, "loss": 0.8698, "step": 17083 }, { "epoch": 1.1575309980350972, "grad_norm": 6.230330467224121, "learning_rate": 7.763981107536451e-05, "loss": 0.7555, "step": 17084 }, { "epoch": 1.1575987533030694, "grad_norm": 4.197784900665283, "learning_rate": 7.763844205626669e-05, "loss": 0.5187, "step": 17085 }, { "epoch": 1.1576665085710414, "grad_norm": 7.4203643798828125, "learning_rate": 7.763707303716887e-05, "loss": 0.9264, "step": 17086 }, { "epoch": 1.1577342638390136, "grad_norm": 8.698983192443848, "learning_rate": 7.763570401807105e-05, "loss": 0.605, "step": 17087 }, { "epoch": 1.1578020191069855, "grad_norm": 6.924211502075195, "learning_rate": 7.763433499897323e-05, "loss": 0.7919, "step": 17088 }, { "epoch": 1.1578697743749577, "grad_norm": 4.469345569610596, "learning_rate": 7.763296597987543e-05, "loss": 0.4836, "step": 17089 }, { "epoch": 1.1579375296429297, "grad_norm": 6.712977409362793, "learning_rate": 7.76315969607776e-05, "loss": 0.7123, "step": 17090 }, { "epoch": 1.1580052849109017, "grad_norm": 5.67714786529541, "learning_rate": 7.763022794167979e-05, "loss": 0.7613, "step": 17091 }, { "epoch": 1.158073040178874, "grad_norm": 6.554959297180176, "learning_rate": 7.762885892258197e-05, "loss": 0.7535, "step": 17092 }, { "epoch": 1.1581407954468461, "grad_norm": 6.625911235809326, "learning_rate": 7.762748990348415e-05, "loss": 0.8759, "step": 17093 }, { "epoch": 1.158208550714818, "grad_norm": 6.199068069458008, "learning_rate": 7.762612088438634e-05, "loss": 1.0106, "step": 17094 }, { "epoch": 1.15827630598279, "grad_norm": 7.14901876449585, "learning_rate": 7.762475186528852e-05, "loss": 0.665, "step": 17095 }, { "epoch": 1.1583440612507623, "grad_norm": 8.458894729614258, "learning_rate": 7.76233828461907e-05, "loss": 0.8775, "step": 17096 }, { "epoch": 1.1584118165187343, "grad_norm": 5.443024635314941, "learning_rate": 7.762201382709288e-05, "loss": 0.6247, "step": 17097 }, { "epoch": 1.1584795717867065, "grad_norm": 5.4433817863464355, "learning_rate": 7.762064480799508e-05, "loss": 0.8298, "step": 17098 }, { "epoch": 1.1585473270546784, "grad_norm": 8.201144218444824, "learning_rate": 7.761927578889726e-05, "loss": 0.5813, "step": 17099 }, { "epoch": 1.1586150823226506, "grad_norm": 5.2088236808776855, "learning_rate": 7.761790676979944e-05, "loss": 0.7396, "step": 17100 }, { "epoch": 1.1586828375906226, "grad_norm": 7.154380798339844, "learning_rate": 7.761653775070162e-05, "loss": 0.6321, "step": 17101 }, { "epoch": 1.1587505928585948, "grad_norm": 7.217487812042236, "learning_rate": 7.761516873160381e-05, "loss": 0.4882, "step": 17102 }, { "epoch": 1.1588183481265668, "grad_norm": 5.230490684509277, "learning_rate": 7.761379971250599e-05, "loss": 0.6101, "step": 17103 }, { "epoch": 1.158886103394539, "grad_norm": 8.068559646606445, "learning_rate": 7.761243069340817e-05, "loss": 0.594, "step": 17104 }, { "epoch": 1.158953858662511, "grad_norm": 4.883063316345215, "learning_rate": 7.761106167431037e-05, "loss": 0.6828, "step": 17105 }, { "epoch": 1.159021613930483, "grad_norm": 6.669475078582764, "learning_rate": 7.760969265521255e-05, "loss": 0.8075, "step": 17106 }, { "epoch": 1.1590893691984552, "grad_norm": 7.147501468658447, "learning_rate": 7.760832363611473e-05, "loss": 0.6016, "step": 17107 }, { "epoch": 1.1591571244664274, "grad_norm": 5.705437660217285, "learning_rate": 7.760695461701692e-05, "loss": 0.7725, "step": 17108 }, { "epoch": 1.1592248797343994, "grad_norm": 7.769946098327637, "learning_rate": 7.76055855979191e-05, "loss": 0.6734, "step": 17109 }, { "epoch": 1.1592926350023713, "grad_norm": 5.215763092041016, "learning_rate": 7.760421657882128e-05, "loss": 0.659, "step": 17110 }, { "epoch": 1.1593603902703435, "grad_norm": 6.284592628479004, "learning_rate": 7.760284755972346e-05, "loss": 0.7078, "step": 17111 }, { "epoch": 1.1594281455383155, "grad_norm": 6.298258304595947, "learning_rate": 7.760147854062566e-05, "loss": 0.4822, "step": 17112 }, { "epoch": 1.1594959008062877, "grad_norm": 6.485654830932617, "learning_rate": 7.760010952152784e-05, "loss": 0.7425, "step": 17113 }, { "epoch": 1.1595636560742597, "grad_norm": 4.581310272216797, "learning_rate": 7.759874050243002e-05, "loss": 0.5134, "step": 17114 }, { "epoch": 1.159631411342232, "grad_norm": 5.878685474395752, "learning_rate": 7.75973714833322e-05, "loss": 0.71, "step": 17115 }, { "epoch": 1.159699166610204, "grad_norm": 5.818406581878662, "learning_rate": 7.759600246423439e-05, "loss": 0.705, "step": 17116 }, { "epoch": 1.159766921878176, "grad_norm": 6.247715950012207, "learning_rate": 7.759463344513657e-05, "loss": 0.747, "step": 17117 }, { "epoch": 1.159834677146148, "grad_norm": 7.777171611785889, "learning_rate": 7.759326442603875e-05, "loss": 0.8842, "step": 17118 }, { "epoch": 1.1599024324141203, "grad_norm": 4.369725704193115, "learning_rate": 7.759189540694093e-05, "loss": 0.5713, "step": 17119 }, { "epoch": 1.1599701876820923, "grad_norm": 5.170098304748535, "learning_rate": 7.759052638784311e-05, "loss": 0.7709, "step": 17120 }, { "epoch": 1.1600379429500645, "grad_norm": 7.886532306671143, "learning_rate": 7.75891573687453e-05, "loss": 0.5762, "step": 17121 }, { "epoch": 1.1601056982180364, "grad_norm": 6.549617290496826, "learning_rate": 7.758778834964749e-05, "loss": 0.597, "step": 17122 }, { "epoch": 1.1601734534860086, "grad_norm": 8.460987091064453, "learning_rate": 7.758641933054967e-05, "loss": 0.7194, "step": 17123 }, { "epoch": 1.1602412087539806, "grad_norm": 8.659561157226562, "learning_rate": 7.758505031145185e-05, "loss": 0.845, "step": 17124 }, { "epoch": 1.1603089640219526, "grad_norm": 6.515053749084473, "learning_rate": 7.758368129235403e-05, "loss": 0.9518, "step": 17125 }, { "epoch": 1.1603767192899248, "grad_norm": 14.59138011932373, "learning_rate": 7.758231227325622e-05, "loss": 0.6451, "step": 17126 }, { "epoch": 1.1604444745578968, "grad_norm": 5.754178047180176, "learning_rate": 7.75809432541584e-05, "loss": 0.6652, "step": 17127 }, { "epoch": 1.160512229825869, "grad_norm": 5.252496242523193, "learning_rate": 7.757957423506058e-05, "loss": 0.6412, "step": 17128 }, { "epoch": 1.160579985093841, "grad_norm": 7.788862228393555, "learning_rate": 7.757820521596276e-05, "loss": 0.7068, "step": 17129 }, { "epoch": 1.1606477403618132, "grad_norm": 5.3729143142700195, "learning_rate": 7.757683619686496e-05, "loss": 0.6523, "step": 17130 }, { "epoch": 1.1607154956297852, "grad_norm": 4.968121528625488, "learning_rate": 7.757546717776714e-05, "loss": 0.5823, "step": 17131 }, { "epoch": 1.1607832508977574, "grad_norm": 6.920864105224609, "learning_rate": 7.757409815866932e-05, "loss": 0.8567, "step": 17132 }, { "epoch": 1.1608510061657293, "grad_norm": 8.479312896728516, "learning_rate": 7.75727291395715e-05, "loss": 0.8317, "step": 17133 }, { "epoch": 1.1609187614337015, "grad_norm": 8.27676010131836, "learning_rate": 7.757136012047368e-05, "loss": 0.7197, "step": 17134 }, { "epoch": 1.1609865167016735, "grad_norm": 4.960324764251709, "learning_rate": 7.756999110137587e-05, "loss": 0.6568, "step": 17135 }, { "epoch": 1.1610542719696457, "grad_norm": 5.801924705505371, "learning_rate": 7.756862208227805e-05, "loss": 0.7134, "step": 17136 }, { "epoch": 1.1611220272376177, "grad_norm": 7.101438045501709, "learning_rate": 7.756725306318023e-05, "loss": 0.7979, "step": 17137 }, { "epoch": 1.16118978250559, "grad_norm": 3.8948190212249756, "learning_rate": 7.756588404408241e-05, "loss": 0.6097, "step": 17138 }, { "epoch": 1.1612575377735619, "grad_norm": 5.797446250915527, "learning_rate": 7.75645150249846e-05, "loss": 0.68, "step": 17139 }, { "epoch": 1.1613252930415339, "grad_norm": 7.5896525382995605, "learning_rate": 7.756314600588679e-05, "loss": 0.7619, "step": 17140 }, { "epoch": 1.161393048309506, "grad_norm": 4.365643501281738, "learning_rate": 7.756177698678897e-05, "loss": 0.7374, "step": 17141 }, { "epoch": 1.1614608035774783, "grad_norm": 5.378994941711426, "learning_rate": 7.756040796769115e-05, "loss": 0.7987, "step": 17142 }, { "epoch": 1.1615285588454503, "grad_norm": 5.658888339996338, "learning_rate": 7.755903894859333e-05, "loss": 0.6007, "step": 17143 }, { "epoch": 1.1615963141134222, "grad_norm": 4.8605170249938965, "learning_rate": 7.755766992949552e-05, "loss": 0.5425, "step": 17144 }, { "epoch": 1.1616640693813944, "grad_norm": 6.8122100830078125, "learning_rate": 7.75563009103977e-05, "loss": 0.5716, "step": 17145 }, { "epoch": 1.1617318246493664, "grad_norm": 5.280952453613281, "learning_rate": 7.755493189129988e-05, "loss": 0.8245, "step": 17146 }, { "epoch": 1.1617995799173386, "grad_norm": 6.752953052520752, "learning_rate": 7.755356287220206e-05, "loss": 0.7489, "step": 17147 }, { "epoch": 1.1618673351853106, "grad_norm": 5.453287124633789, "learning_rate": 7.755219385310424e-05, "loss": 0.6679, "step": 17148 }, { "epoch": 1.1619350904532828, "grad_norm": 5.091671466827393, "learning_rate": 7.755082483400644e-05, "loss": 0.5512, "step": 17149 }, { "epoch": 1.1620028457212548, "grad_norm": 7.830700397491455, "learning_rate": 7.754945581490862e-05, "loss": 0.7014, "step": 17150 }, { "epoch": 1.162070600989227, "grad_norm": 6.83212947845459, "learning_rate": 7.75480867958108e-05, "loss": 0.4873, "step": 17151 }, { "epoch": 1.162138356257199, "grad_norm": 6.0037841796875, "learning_rate": 7.754671777671299e-05, "loss": 0.6945, "step": 17152 }, { "epoch": 1.1622061115251712, "grad_norm": 5.744686603546143, "learning_rate": 7.754534875761517e-05, "loss": 0.805, "step": 17153 }, { "epoch": 1.1622738667931432, "grad_norm": 8.571599960327148, "learning_rate": 7.754397973851737e-05, "loss": 0.8147, "step": 17154 }, { "epoch": 1.1623416220611151, "grad_norm": 5.277167320251465, "learning_rate": 7.754261071941955e-05, "loss": 0.6513, "step": 17155 }, { "epoch": 1.1624093773290873, "grad_norm": 5.50898551940918, "learning_rate": 7.754124170032173e-05, "loss": 0.7769, "step": 17156 }, { "epoch": 1.1624771325970595, "grad_norm": 6.313873767852783, "learning_rate": 7.75398726812239e-05, "loss": 0.7234, "step": 17157 }, { "epoch": 1.1625448878650315, "grad_norm": 5.537959575653076, "learning_rate": 7.75385036621261e-05, "loss": 0.7568, "step": 17158 }, { "epoch": 1.1626126431330035, "grad_norm": 5.766448497772217, "learning_rate": 7.753713464302828e-05, "loss": 0.6761, "step": 17159 }, { "epoch": 1.1626803984009757, "grad_norm": 6.09381628036499, "learning_rate": 7.753576562393046e-05, "loss": 0.8554, "step": 17160 }, { "epoch": 1.1627481536689477, "grad_norm": 5.5013251304626465, "learning_rate": 7.753439660483264e-05, "loss": 0.6297, "step": 17161 }, { "epoch": 1.1628159089369199, "grad_norm": 6.226048946380615, "learning_rate": 7.753302758573483e-05, "loss": 0.7102, "step": 17162 }, { "epoch": 1.1628836642048919, "grad_norm": 5.695722579956055, "learning_rate": 7.753165856663702e-05, "loss": 0.5396, "step": 17163 }, { "epoch": 1.162951419472864, "grad_norm": 6.318460941314697, "learning_rate": 7.75302895475392e-05, "loss": 0.7032, "step": 17164 }, { "epoch": 1.163019174740836, "grad_norm": 6.319543361663818, "learning_rate": 7.752892052844138e-05, "loss": 0.5056, "step": 17165 }, { "epoch": 1.1630869300088083, "grad_norm": 5.164156913757324, "learning_rate": 7.752755150934356e-05, "loss": 0.5995, "step": 17166 }, { "epoch": 1.1631546852767802, "grad_norm": 5.994420051574707, "learning_rate": 7.752618249024575e-05, "loss": 1.1241, "step": 17167 }, { "epoch": 1.1632224405447524, "grad_norm": 5.081625461578369, "learning_rate": 7.752481347114793e-05, "loss": 0.6978, "step": 17168 }, { "epoch": 1.1632901958127244, "grad_norm": 5.624451637268066, "learning_rate": 7.752344445205011e-05, "loss": 0.7173, "step": 17169 }, { "epoch": 1.1633579510806966, "grad_norm": 4.424026966094971, "learning_rate": 7.752207543295229e-05, "loss": 0.4382, "step": 17170 }, { "epoch": 1.1634257063486686, "grad_norm": 6.661045551300049, "learning_rate": 7.752070641385449e-05, "loss": 0.6755, "step": 17171 }, { "epoch": 1.1634934616166408, "grad_norm": 5.17708158493042, "learning_rate": 7.751933739475667e-05, "loss": 0.4775, "step": 17172 }, { "epoch": 1.1635612168846128, "grad_norm": 6.10673713684082, "learning_rate": 7.751796837565885e-05, "loss": 0.7051, "step": 17173 }, { "epoch": 1.1636289721525848, "grad_norm": 5.64744234085083, "learning_rate": 7.751659935656103e-05, "loss": 0.7049, "step": 17174 }, { "epoch": 1.163696727420557, "grad_norm": 7.636764049530029, "learning_rate": 7.75152303374632e-05, "loss": 0.7381, "step": 17175 }, { "epoch": 1.163764482688529, "grad_norm": 10.210199356079102, "learning_rate": 7.75138613183654e-05, "loss": 0.8437, "step": 17176 }, { "epoch": 1.1638322379565011, "grad_norm": 4.762551784515381, "learning_rate": 7.751249229926758e-05, "loss": 0.5048, "step": 17177 }, { "epoch": 1.1638999932244731, "grad_norm": 5.5700883865356445, "learning_rate": 7.751112328016976e-05, "loss": 0.8869, "step": 17178 }, { "epoch": 1.1639677484924453, "grad_norm": 5.91666316986084, "learning_rate": 7.750975426107194e-05, "loss": 0.6451, "step": 17179 }, { "epoch": 1.1640355037604173, "grad_norm": 5.347517013549805, "learning_rate": 7.750838524197412e-05, "loss": 0.6594, "step": 17180 }, { "epoch": 1.1641032590283895, "grad_norm": 5.739136219024658, "learning_rate": 7.750701622287632e-05, "loss": 0.6144, "step": 17181 }, { "epoch": 1.1641710142963615, "grad_norm": 5.545490264892578, "learning_rate": 7.75056472037785e-05, "loss": 0.5842, "step": 17182 }, { "epoch": 1.1642387695643337, "grad_norm": 7.26037073135376, "learning_rate": 7.750427818468068e-05, "loss": 0.7446, "step": 17183 }, { "epoch": 1.1643065248323057, "grad_norm": 8.13321590423584, "learning_rate": 7.750290916558286e-05, "loss": 0.7403, "step": 17184 }, { "epoch": 1.1643742801002779, "grad_norm": 5.851143836975098, "learning_rate": 7.750154014648505e-05, "loss": 0.6878, "step": 17185 }, { "epoch": 1.1644420353682499, "grad_norm": 5.682823181152344, "learning_rate": 7.750017112738723e-05, "loss": 0.7803, "step": 17186 }, { "epoch": 1.164509790636222, "grad_norm": 7.069244861602783, "learning_rate": 7.749880210828941e-05, "loss": 1.0644, "step": 17187 }, { "epoch": 1.164577545904194, "grad_norm": 4.923807144165039, "learning_rate": 7.749743308919159e-05, "loss": 0.5087, "step": 17188 }, { "epoch": 1.164645301172166, "grad_norm": 7.384223937988281, "learning_rate": 7.749606407009377e-05, "loss": 0.6694, "step": 17189 }, { "epoch": 1.1647130564401382, "grad_norm": 4.950394630432129, "learning_rate": 7.749469505099597e-05, "loss": 0.5682, "step": 17190 }, { "epoch": 1.1647808117081102, "grad_norm": 3.849876642227173, "learning_rate": 7.749332603189815e-05, "loss": 0.4433, "step": 17191 }, { "epoch": 1.1648485669760824, "grad_norm": 6.983705520629883, "learning_rate": 7.749195701280033e-05, "loss": 0.9134, "step": 17192 }, { "epoch": 1.1649163222440544, "grad_norm": 5.343315601348877, "learning_rate": 7.74905879937025e-05, "loss": 0.6362, "step": 17193 }, { "epoch": 1.1649840775120266, "grad_norm": 8.281697273254395, "learning_rate": 7.74892189746047e-05, "loss": 0.8319, "step": 17194 }, { "epoch": 1.1650518327799986, "grad_norm": 7.667835712432861, "learning_rate": 7.748784995550688e-05, "loss": 0.6045, "step": 17195 }, { "epoch": 1.1651195880479708, "grad_norm": 7.925049304962158, "learning_rate": 7.748648093640906e-05, "loss": 0.7674, "step": 17196 }, { "epoch": 1.1651873433159428, "grad_norm": 6.60289192199707, "learning_rate": 7.748511191731124e-05, "loss": 0.7478, "step": 17197 }, { "epoch": 1.165255098583915, "grad_norm": 4.811013698577881, "learning_rate": 7.748374289821344e-05, "loss": 0.5568, "step": 17198 }, { "epoch": 1.165322853851887, "grad_norm": 5.416989803314209, "learning_rate": 7.748237387911562e-05, "loss": 0.667, "step": 17199 }, { "epoch": 1.1653906091198591, "grad_norm": 5.876220703125, "learning_rate": 7.74810048600178e-05, "loss": 0.8068, "step": 17200 }, { "epoch": 1.1654583643878311, "grad_norm": 5.452373504638672, "learning_rate": 7.747963584091999e-05, "loss": 0.7206, "step": 17201 }, { "epoch": 1.1655261196558033, "grad_norm": 5.000153064727783, "learning_rate": 7.747826682182217e-05, "loss": 0.8598, "step": 17202 }, { "epoch": 1.1655938749237753, "grad_norm": 4.371356010437012, "learning_rate": 7.747689780272435e-05, "loss": 0.6802, "step": 17203 }, { "epoch": 1.1656616301917473, "grad_norm": 5.595484733581543, "learning_rate": 7.747552878362654e-05, "loss": 0.71, "step": 17204 }, { "epoch": 1.1657293854597195, "grad_norm": 6.505746364593506, "learning_rate": 7.747415976452873e-05, "loss": 0.7463, "step": 17205 }, { "epoch": 1.1657971407276917, "grad_norm": 8.62131404876709, "learning_rate": 7.74727907454309e-05, "loss": 0.5391, "step": 17206 }, { "epoch": 1.1658648959956637, "grad_norm": 4.648898601531982, "learning_rate": 7.747142172633309e-05, "loss": 0.5731, "step": 17207 }, { "epoch": 1.1659326512636357, "grad_norm": 4.276782989501953, "learning_rate": 7.747005270723528e-05, "loss": 0.5733, "step": 17208 }, { "epoch": 1.1660004065316079, "grad_norm": 7.70058012008667, "learning_rate": 7.746868368813746e-05, "loss": 0.655, "step": 17209 }, { "epoch": 1.1660681617995798, "grad_norm": 6.740431785583496, "learning_rate": 7.746731466903964e-05, "loss": 0.782, "step": 17210 }, { "epoch": 1.166135917067552, "grad_norm": 5.23295783996582, "learning_rate": 7.746594564994182e-05, "loss": 0.7198, "step": 17211 }, { "epoch": 1.166203672335524, "grad_norm": 4.6036458015441895, "learning_rate": 7.7464576630844e-05, "loss": 0.7787, "step": 17212 }, { "epoch": 1.1662714276034962, "grad_norm": 4.967904567718506, "learning_rate": 7.74632076117462e-05, "loss": 0.6894, "step": 17213 }, { "epoch": 1.1663391828714682, "grad_norm": 6.81717586517334, "learning_rate": 7.746183859264838e-05, "loss": 0.6447, "step": 17214 }, { "epoch": 1.1664069381394404, "grad_norm": 5.5367112159729, "learning_rate": 7.746046957355056e-05, "loss": 0.6741, "step": 17215 }, { "epoch": 1.1664746934074124, "grad_norm": 6.124912261962891, "learning_rate": 7.745910055445274e-05, "loss": 0.8282, "step": 17216 }, { "epoch": 1.1665424486753846, "grad_norm": 7.484246253967285, "learning_rate": 7.745773153535493e-05, "loss": 0.8897, "step": 17217 }, { "epoch": 1.1666102039433566, "grad_norm": 5.709711074829102, "learning_rate": 7.745636251625711e-05, "loss": 0.7469, "step": 17218 }, { "epoch": 1.1666779592113288, "grad_norm": 5.799917221069336, "learning_rate": 7.745499349715929e-05, "loss": 0.6627, "step": 17219 }, { "epoch": 1.1667457144793008, "grad_norm": 8.688015937805176, "learning_rate": 7.745362447806147e-05, "loss": 0.8772, "step": 17220 }, { "epoch": 1.166813469747273, "grad_norm": 5.448634147644043, "learning_rate": 7.745225545896365e-05, "loss": 0.5945, "step": 17221 }, { "epoch": 1.166881225015245, "grad_norm": 5.151609420776367, "learning_rate": 7.745088643986585e-05, "loss": 0.557, "step": 17222 }, { "epoch": 1.166948980283217, "grad_norm": 7.523448467254639, "learning_rate": 7.744951742076803e-05, "loss": 0.7231, "step": 17223 }, { "epoch": 1.1670167355511891, "grad_norm": 6.95708703994751, "learning_rate": 7.74481484016702e-05, "loss": 0.9136, "step": 17224 }, { "epoch": 1.167084490819161, "grad_norm": 4.518056869506836, "learning_rate": 7.744677938257239e-05, "loss": 0.6328, "step": 17225 }, { "epoch": 1.1671522460871333, "grad_norm": 5.213376522064209, "learning_rate": 7.744541036347457e-05, "loss": 0.5484, "step": 17226 }, { "epoch": 1.1672200013551053, "grad_norm": 6.388365268707275, "learning_rate": 7.744404134437676e-05, "loss": 0.6401, "step": 17227 }, { "epoch": 1.1672877566230775, "grad_norm": 5.914669990539551, "learning_rate": 7.744267232527894e-05, "loss": 0.7474, "step": 17228 }, { "epoch": 1.1673555118910495, "grad_norm": 7.565332889556885, "learning_rate": 7.744130330618112e-05, "loss": 0.637, "step": 17229 }, { "epoch": 1.1674232671590217, "grad_norm": 4.865163326263428, "learning_rate": 7.74399342870833e-05, "loss": 0.6212, "step": 17230 }, { "epoch": 1.1674910224269937, "grad_norm": 4.324069976806641, "learning_rate": 7.74385652679855e-05, "loss": 0.533, "step": 17231 }, { "epoch": 1.1675587776949659, "grad_norm": 5.414638519287109, "learning_rate": 7.743719624888768e-05, "loss": 0.4943, "step": 17232 }, { "epoch": 1.1676265329629378, "grad_norm": 6.219473838806152, "learning_rate": 7.743582722978986e-05, "loss": 0.5288, "step": 17233 }, { "epoch": 1.16769428823091, "grad_norm": 6.105441093444824, "learning_rate": 7.743445821069204e-05, "loss": 0.5303, "step": 17234 }, { "epoch": 1.167762043498882, "grad_norm": 5.521098613739014, "learning_rate": 7.743308919159422e-05, "loss": 0.6609, "step": 17235 }, { "epoch": 1.1678297987668542, "grad_norm": 5.3490424156188965, "learning_rate": 7.743172017249641e-05, "loss": 0.5023, "step": 17236 }, { "epoch": 1.1678975540348262, "grad_norm": 7.1635541915893555, "learning_rate": 7.743035115339859e-05, "loss": 0.8012, "step": 17237 }, { "epoch": 1.1679653093027982, "grad_norm": 6.178504467010498, "learning_rate": 7.742898213430077e-05, "loss": 0.8079, "step": 17238 }, { "epoch": 1.1680330645707704, "grad_norm": 4.957045078277588, "learning_rate": 7.742761311520295e-05, "loss": 0.6104, "step": 17239 }, { "epoch": 1.1681008198387424, "grad_norm": 8.841684341430664, "learning_rate": 7.742624409610515e-05, "loss": 0.551, "step": 17240 }, { "epoch": 1.1681685751067146, "grad_norm": 5.315309524536133, "learning_rate": 7.742487507700733e-05, "loss": 0.6352, "step": 17241 }, { "epoch": 1.1682363303746865, "grad_norm": 7.584867477416992, "learning_rate": 7.74235060579095e-05, "loss": 0.6937, "step": 17242 }, { "epoch": 1.1683040856426588, "grad_norm": 4.14179801940918, "learning_rate": 7.742213703881169e-05, "loss": 0.5933, "step": 17243 }, { "epoch": 1.1683718409106307, "grad_norm": 6.462168216705322, "learning_rate": 7.742076801971388e-05, "loss": 0.6561, "step": 17244 }, { "epoch": 1.168439596178603, "grad_norm": 6.578998565673828, "learning_rate": 7.741939900061606e-05, "loss": 0.7552, "step": 17245 }, { "epoch": 1.168507351446575, "grad_norm": 6.081613540649414, "learning_rate": 7.741802998151824e-05, "loss": 0.5051, "step": 17246 }, { "epoch": 1.1685751067145471, "grad_norm": 7.021066188812256, "learning_rate": 7.741666096242043e-05, "loss": 0.7581, "step": 17247 }, { "epoch": 1.168642861982519, "grad_norm": 6.672050476074219, "learning_rate": 7.741529194332262e-05, "loss": 0.832, "step": 17248 }, { "epoch": 1.1687106172504913, "grad_norm": 6.199688911437988, "learning_rate": 7.74139229242248e-05, "loss": 0.8048, "step": 17249 }, { "epoch": 1.1687783725184633, "grad_norm": 5.789974212646484, "learning_rate": 7.741255390512699e-05, "loss": 0.8967, "step": 17250 }, { "epoch": 1.1688461277864355, "grad_norm": 4.966305732727051, "learning_rate": 7.741118488602917e-05, "loss": 0.778, "step": 17251 }, { "epoch": 1.1689138830544075, "grad_norm": 4.729147434234619, "learning_rate": 7.740981586693135e-05, "loss": 0.4176, "step": 17252 }, { "epoch": 1.1689816383223794, "grad_norm": 5.638984203338623, "learning_rate": 7.740844684783353e-05, "loss": 0.6909, "step": 17253 }, { "epoch": 1.1690493935903516, "grad_norm": 5.719832420349121, "learning_rate": 7.740707782873572e-05, "loss": 0.5901, "step": 17254 }, { "epoch": 1.1691171488583239, "grad_norm": 5.108078956604004, "learning_rate": 7.74057088096379e-05, "loss": 0.5979, "step": 17255 }, { "epoch": 1.1691849041262958, "grad_norm": 5.3545732498168945, "learning_rate": 7.740433979054009e-05, "loss": 0.5158, "step": 17256 }, { "epoch": 1.1692526593942678, "grad_norm": 5.225886344909668, "learning_rate": 7.740297077144227e-05, "loss": 0.7653, "step": 17257 }, { "epoch": 1.16932041466224, "grad_norm": 5.831471920013428, "learning_rate": 7.740160175234445e-05, "loss": 0.7989, "step": 17258 }, { "epoch": 1.169388169930212, "grad_norm": 8.2451810836792, "learning_rate": 7.740023273324664e-05, "loss": 0.6536, "step": 17259 }, { "epoch": 1.1694559251981842, "grad_norm": 6.761453151702881, "learning_rate": 7.739886371414882e-05, "loss": 0.6763, "step": 17260 }, { "epoch": 1.1695236804661562, "grad_norm": 5.2614240646362305, "learning_rate": 7.7397494695051e-05, "loss": 0.7062, "step": 17261 }, { "epoch": 1.1695914357341284, "grad_norm": 6.049597263336182, "learning_rate": 7.739612567595318e-05, "loss": 0.5726, "step": 17262 }, { "epoch": 1.1696591910021004, "grad_norm": 6.359036922454834, "learning_rate": 7.739475665685537e-05, "loss": 0.8202, "step": 17263 }, { "epoch": 1.1697269462700726, "grad_norm": 5.320462703704834, "learning_rate": 7.739338763775755e-05, "loss": 0.5932, "step": 17264 }, { "epoch": 1.1697947015380445, "grad_norm": 6.084722518920898, "learning_rate": 7.739201861865974e-05, "loss": 0.5786, "step": 17265 }, { "epoch": 1.1698624568060167, "grad_norm": 7.363154888153076, "learning_rate": 7.739064959956192e-05, "loss": 0.8631, "step": 17266 }, { "epoch": 1.1699302120739887, "grad_norm": 7.075015544891357, "learning_rate": 7.73892805804641e-05, "loss": 0.8241, "step": 17267 }, { "epoch": 1.169997967341961, "grad_norm": 5.505892276763916, "learning_rate": 7.738791156136629e-05, "loss": 0.6984, "step": 17268 }, { "epoch": 1.170065722609933, "grad_norm": 6.062048435211182, "learning_rate": 7.738654254226847e-05, "loss": 0.5967, "step": 17269 }, { "epoch": 1.1701334778779051, "grad_norm": 5.961355686187744, "learning_rate": 7.738517352317065e-05, "loss": 0.8252, "step": 17270 }, { "epoch": 1.170201233145877, "grad_norm": 5.551196098327637, "learning_rate": 7.738380450407283e-05, "loss": 0.7744, "step": 17271 }, { "epoch": 1.170268988413849, "grad_norm": 5.391801357269287, "learning_rate": 7.738243548497502e-05, "loss": 0.5997, "step": 17272 }, { "epoch": 1.1703367436818213, "grad_norm": 6.983804702758789, "learning_rate": 7.73810664658772e-05, "loss": 0.5914, "step": 17273 }, { "epoch": 1.1704044989497933, "grad_norm": 5.468769550323486, "learning_rate": 7.737969744677939e-05, "loss": 0.7202, "step": 17274 }, { "epoch": 1.1704722542177655, "grad_norm": 5.046876907348633, "learning_rate": 7.737832842768157e-05, "loss": 0.5294, "step": 17275 }, { "epoch": 1.1705400094857374, "grad_norm": 6.133605480194092, "learning_rate": 7.737695940858375e-05, "loss": 0.7646, "step": 17276 }, { "epoch": 1.1706077647537096, "grad_norm": 8.28398323059082, "learning_rate": 7.737559038948594e-05, "loss": 0.7566, "step": 17277 }, { "epoch": 1.1706755200216816, "grad_norm": 6.076024532318115, "learning_rate": 7.737422137038812e-05, "loss": 0.6782, "step": 17278 }, { "epoch": 1.1707432752896538, "grad_norm": 6.060565948486328, "learning_rate": 7.73728523512903e-05, "loss": 0.7511, "step": 17279 }, { "epoch": 1.1708110305576258, "grad_norm": 7.766849517822266, "learning_rate": 7.737148333219248e-05, "loss": 0.6535, "step": 17280 }, { "epoch": 1.170878785825598, "grad_norm": 5.996023654937744, "learning_rate": 7.737011431309466e-05, "loss": 0.7026, "step": 17281 }, { "epoch": 1.17094654109357, "grad_norm": 6.4893646240234375, "learning_rate": 7.736874529399686e-05, "loss": 0.8587, "step": 17282 }, { "epoch": 1.1710142963615422, "grad_norm": 5.484610557556152, "learning_rate": 7.736737627489904e-05, "loss": 0.5746, "step": 17283 }, { "epoch": 1.1710820516295142, "grad_norm": 7.2935309410095215, "learning_rate": 7.736600725580122e-05, "loss": 0.7526, "step": 17284 }, { "epoch": 1.1711498068974864, "grad_norm": 6.469519138336182, "learning_rate": 7.73646382367034e-05, "loss": 0.6975, "step": 17285 }, { "epoch": 1.1712175621654584, "grad_norm": 6.044117450714111, "learning_rate": 7.736326921760559e-05, "loss": 0.694, "step": 17286 }, { "epoch": 1.1712853174334303, "grad_norm": 5.34628963470459, "learning_rate": 7.736190019850777e-05, "loss": 0.7829, "step": 17287 }, { "epoch": 1.1713530727014025, "grad_norm": 6.716300010681152, "learning_rate": 7.736053117940995e-05, "loss": 0.6242, "step": 17288 }, { "epoch": 1.1714208279693745, "grad_norm": 6.748067378997803, "learning_rate": 7.735916216031213e-05, "loss": 0.7363, "step": 17289 }, { "epoch": 1.1714885832373467, "grad_norm": 5.220841407775879, "learning_rate": 7.735779314121433e-05, "loss": 0.5735, "step": 17290 }, { "epoch": 1.1715563385053187, "grad_norm": 5.8814496994018555, "learning_rate": 7.73564241221165e-05, "loss": 0.7137, "step": 17291 }, { "epoch": 1.171624093773291, "grad_norm": 7.384030342102051, "learning_rate": 7.735505510301869e-05, "loss": 0.6728, "step": 17292 }, { "epoch": 1.171691849041263, "grad_norm": 4.854050636291504, "learning_rate": 7.735368608392088e-05, "loss": 0.8363, "step": 17293 }, { "epoch": 1.171759604309235, "grad_norm": 6.271542072296143, "learning_rate": 7.735231706482306e-05, "loss": 0.6886, "step": 17294 }, { "epoch": 1.171827359577207, "grad_norm": 8.313980102539062, "learning_rate": 7.735094804572524e-05, "loss": 0.6262, "step": 17295 }, { "epoch": 1.1718951148451793, "grad_norm": 5.353496551513672, "learning_rate": 7.734957902662743e-05, "loss": 0.7738, "step": 17296 }, { "epoch": 1.1719628701131513, "grad_norm": 5.875715732574463, "learning_rate": 7.734821000752961e-05, "loss": 0.6644, "step": 17297 }, { "epoch": 1.1720306253811235, "grad_norm": 5.350170135498047, "learning_rate": 7.73468409884318e-05, "loss": 0.5832, "step": 17298 }, { "epoch": 1.1720983806490954, "grad_norm": 4.67872953414917, "learning_rate": 7.734547196933398e-05, "loss": 0.6281, "step": 17299 }, { "epoch": 1.1721661359170676, "grad_norm": 6.967146396636963, "learning_rate": 7.734410295023617e-05, "loss": 0.5029, "step": 17300 }, { "epoch": 1.1722338911850396, "grad_norm": 5.5732221603393555, "learning_rate": 7.734273393113835e-05, "loss": 0.7479, "step": 17301 }, { "epoch": 1.1723016464530116, "grad_norm": 5.325867176055908, "learning_rate": 7.734136491204053e-05, "loss": 0.5204, "step": 17302 }, { "epoch": 1.1723694017209838, "grad_norm": 6.015812873840332, "learning_rate": 7.733999589294271e-05, "loss": 0.7403, "step": 17303 }, { "epoch": 1.172437156988956, "grad_norm": 7.1903581619262695, "learning_rate": 7.73386268738449e-05, "loss": 0.6715, "step": 17304 }, { "epoch": 1.172504912256928, "grad_norm": 5.407469272613525, "learning_rate": 7.733725785474708e-05, "loss": 0.8218, "step": 17305 }, { "epoch": 1.1725726675249, "grad_norm": 5.6599249839782715, "learning_rate": 7.733588883564926e-05, "loss": 0.7434, "step": 17306 }, { "epoch": 1.1726404227928722, "grad_norm": 5.750308036804199, "learning_rate": 7.733451981655145e-05, "loss": 0.8658, "step": 17307 }, { "epoch": 1.1727081780608442, "grad_norm": 5.779434680938721, "learning_rate": 7.733315079745363e-05, "loss": 0.5016, "step": 17308 }, { "epoch": 1.1727759333288164, "grad_norm": 4.432728290557861, "learning_rate": 7.733178177835582e-05, "loss": 0.5855, "step": 17309 }, { "epoch": 1.1728436885967883, "grad_norm": 5.154073715209961, "learning_rate": 7.7330412759258e-05, "loss": 0.6117, "step": 17310 }, { "epoch": 1.1729114438647605, "grad_norm": 5.967854022979736, "learning_rate": 7.732904374016018e-05, "loss": 0.8827, "step": 17311 }, { "epoch": 1.1729791991327325, "grad_norm": 7.544610500335693, "learning_rate": 7.732767472106236e-05, "loss": 0.8073, "step": 17312 }, { "epoch": 1.1730469544007047, "grad_norm": 6.066003322601318, "learning_rate": 7.732630570196454e-05, "loss": 0.9099, "step": 17313 }, { "epoch": 1.1731147096686767, "grad_norm": 6.47094202041626, "learning_rate": 7.732493668286673e-05, "loss": 0.7394, "step": 17314 }, { "epoch": 1.173182464936649, "grad_norm": 6.1977925300598145, "learning_rate": 7.732356766376891e-05, "loss": 0.5849, "step": 17315 }, { "epoch": 1.1732502202046209, "grad_norm": 6.772620677947998, "learning_rate": 7.73221986446711e-05, "loss": 0.8209, "step": 17316 }, { "epoch": 1.173317975472593, "grad_norm": 5.0960693359375, "learning_rate": 7.732082962557328e-05, "loss": 0.6163, "step": 17317 }, { "epoch": 1.173385730740565, "grad_norm": 8.258513450622559, "learning_rate": 7.731946060647547e-05, "loss": 0.7963, "step": 17318 }, { "epoch": 1.1734534860085373, "grad_norm": 5.403371810913086, "learning_rate": 7.731809158737765e-05, "loss": 0.5509, "step": 17319 }, { "epoch": 1.1735212412765093, "grad_norm": 4.698668956756592, "learning_rate": 7.731672256827983e-05, "loss": 0.712, "step": 17320 }, { "epoch": 1.1735889965444812, "grad_norm": 5.766746997833252, "learning_rate": 7.731535354918201e-05, "loss": 0.7452, "step": 17321 }, { "epoch": 1.1736567518124534, "grad_norm": 5.701009750366211, "learning_rate": 7.731398453008419e-05, "loss": 1.0187, "step": 17322 }, { "epoch": 1.1737245070804254, "grad_norm": 4.641021251678467, "learning_rate": 7.731261551098638e-05, "loss": 0.5497, "step": 17323 }, { "epoch": 1.1737922623483976, "grad_norm": 6.468865871429443, "learning_rate": 7.731124649188857e-05, "loss": 0.9236, "step": 17324 }, { "epoch": 1.1738600176163696, "grad_norm": 4.343420505523682, "learning_rate": 7.730987747279075e-05, "loss": 0.4876, "step": 17325 }, { "epoch": 1.1739277728843418, "grad_norm": 6.445211887359619, "learning_rate": 7.730850845369293e-05, "loss": 0.7793, "step": 17326 }, { "epoch": 1.1739955281523138, "grad_norm": 5.686356544494629, "learning_rate": 7.730713943459512e-05, "loss": 0.6946, "step": 17327 }, { "epoch": 1.174063283420286, "grad_norm": 7.529437065124512, "learning_rate": 7.73057704154973e-05, "loss": 0.7395, "step": 17328 }, { "epoch": 1.174131038688258, "grad_norm": 8.172284126281738, "learning_rate": 7.730440139639948e-05, "loss": 0.6705, "step": 17329 }, { "epoch": 1.1741987939562302, "grad_norm": 8.046764373779297, "learning_rate": 7.730303237730166e-05, "loss": 0.8011, "step": 17330 }, { "epoch": 1.1742665492242021, "grad_norm": 4.910728454589844, "learning_rate": 7.730166335820384e-05, "loss": 0.7334, "step": 17331 }, { "epoch": 1.1743343044921744, "grad_norm": 6.085247993469238, "learning_rate": 7.730029433910603e-05, "loss": 0.5327, "step": 17332 }, { "epoch": 1.1744020597601463, "grad_norm": 4.2402520179748535, "learning_rate": 7.729892532000822e-05, "loss": 0.7252, "step": 17333 }, { "epoch": 1.1744698150281185, "grad_norm": 7.234801769256592, "learning_rate": 7.72975563009104e-05, "loss": 0.9923, "step": 17334 }, { "epoch": 1.1745375702960905, "grad_norm": 7.858911991119385, "learning_rate": 7.729618728181258e-05, "loss": 0.6549, "step": 17335 }, { "epoch": 1.1746053255640625, "grad_norm": 4.676433563232422, "learning_rate": 7.729481826271477e-05, "loss": 0.7768, "step": 17336 }, { "epoch": 1.1746730808320347, "grad_norm": 4.907873630523682, "learning_rate": 7.729344924361695e-05, "loss": 0.6704, "step": 17337 }, { "epoch": 1.1747408361000067, "grad_norm": 5.7177839279174805, "learning_rate": 7.729208022451913e-05, "loss": 0.713, "step": 17338 }, { "epoch": 1.1748085913679789, "grad_norm": 5.389528751373291, "learning_rate": 7.729071120542132e-05, "loss": 0.6746, "step": 17339 }, { "epoch": 1.1748763466359509, "grad_norm": 7.09976863861084, "learning_rate": 7.72893421863235e-05, "loss": 0.6885, "step": 17340 }, { "epoch": 1.174944101903923, "grad_norm": 7.006227493286133, "learning_rate": 7.728797316722569e-05, "loss": 0.9149, "step": 17341 }, { "epoch": 1.175011857171895, "grad_norm": 6.186046600341797, "learning_rate": 7.728660414812788e-05, "loss": 0.7265, "step": 17342 }, { "epoch": 1.1750796124398672, "grad_norm": 6.1935834884643555, "learning_rate": 7.728523512903006e-05, "loss": 0.8993, "step": 17343 }, { "epoch": 1.1751473677078392, "grad_norm": 5.259952545166016, "learning_rate": 7.728386610993224e-05, "loss": 0.7487, "step": 17344 }, { "epoch": 1.1752151229758114, "grad_norm": 7.327434539794922, "learning_rate": 7.728249709083442e-05, "loss": 1.0751, "step": 17345 }, { "epoch": 1.1752828782437834, "grad_norm": 5.855953216552734, "learning_rate": 7.728112807173661e-05, "loss": 0.6628, "step": 17346 }, { "epoch": 1.1753506335117556, "grad_norm": 5.843200206756592, "learning_rate": 7.72797590526388e-05, "loss": 0.8061, "step": 17347 }, { "epoch": 1.1754183887797276, "grad_norm": 5.277237892150879, "learning_rate": 7.727839003354097e-05, "loss": 0.7312, "step": 17348 }, { "epoch": 1.1754861440476998, "grad_norm": 5.734367847442627, "learning_rate": 7.727702101444315e-05, "loss": 0.6302, "step": 17349 }, { "epoch": 1.1755538993156718, "grad_norm": 4.641081809997559, "learning_rate": 7.727565199534535e-05, "loss": 0.5845, "step": 17350 }, { "epoch": 1.1756216545836438, "grad_norm": 5.636540412902832, "learning_rate": 7.727428297624753e-05, "loss": 0.7319, "step": 17351 }, { "epoch": 1.175689409851616, "grad_norm": 5.056623458862305, "learning_rate": 7.727291395714971e-05, "loss": 0.6003, "step": 17352 }, { "epoch": 1.1757571651195882, "grad_norm": 8.262724876403809, "learning_rate": 7.727154493805189e-05, "loss": 0.7032, "step": 17353 }, { "epoch": 1.1758249203875601, "grad_norm": 5.76828145980835, "learning_rate": 7.727017591895407e-05, "loss": 0.6717, "step": 17354 }, { "epoch": 1.1758926756555321, "grad_norm": 4.917673587799072, "learning_rate": 7.726880689985626e-05, "loss": 0.8381, "step": 17355 }, { "epoch": 1.1759604309235043, "grad_norm": 5.0473127365112305, "learning_rate": 7.726743788075844e-05, "loss": 0.7817, "step": 17356 }, { "epoch": 1.1760281861914763, "grad_norm": 6.269956588745117, "learning_rate": 7.726606886166062e-05, "loss": 0.7135, "step": 17357 }, { "epoch": 1.1760959414594485, "grad_norm": 5.429281711578369, "learning_rate": 7.72646998425628e-05, "loss": 0.5025, "step": 17358 }, { "epoch": 1.1761636967274205, "grad_norm": 6.20123815536499, "learning_rate": 7.726333082346499e-05, "loss": 0.618, "step": 17359 }, { "epoch": 1.1762314519953927, "grad_norm": 7.61653995513916, "learning_rate": 7.726196180436718e-05, "loss": 0.6468, "step": 17360 }, { "epoch": 1.1762992072633647, "grad_norm": 6.944809913635254, "learning_rate": 7.726059278526936e-05, "loss": 0.6998, "step": 17361 }, { "epoch": 1.1763669625313369, "grad_norm": 5.237542629241943, "learning_rate": 7.725922376617154e-05, "loss": 0.6578, "step": 17362 }, { "epoch": 1.1764347177993089, "grad_norm": 4.988475799560547, "learning_rate": 7.725785474707372e-05, "loss": 0.5578, "step": 17363 }, { "epoch": 1.176502473067281, "grad_norm": 6.360912322998047, "learning_rate": 7.725648572797591e-05, "loss": 0.6835, "step": 17364 }, { "epoch": 1.176570228335253, "grad_norm": 5.701276779174805, "learning_rate": 7.72551167088781e-05, "loss": 0.6863, "step": 17365 }, { "epoch": 1.1766379836032252, "grad_norm": 5.497084140777588, "learning_rate": 7.725374768978027e-05, "loss": 0.6515, "step": 17366 }, { "epoch": 1.1767057388711972, "grad_norm": 5.4620771408081055, "learning_rate": 7.725237867068246e-05, "loss": 0.6462, "step": 17367 }, { "epoch": 1.1767734941391694, "grad_norm": 5.820620059967041, "learning_rate": 7.725100965158464e-05, "loss": 0.5846, "step": 17368 }, { "epoch": 1.1768412494071414, "grad_norm": 5.675363540649414, "learning_rate": 7.724964063248683e-05, "loss": 0.8976, "step": 17369 }, { "epoch": 1.1769090046751134, "grad_norm": 6.178694248199463, "learning_rate": 7.724827161338901e-05, "loss": 0.7486, "step": 17370 }, { "epoch": 1.1769767599430856, "grad_norm": 7.601099014282227, "learning_rate": 7.724690259429119e-05, "loss": 0.6428, "step": 17371 }, { "epoch": 1.1770445152110576, "grad_norm": 5.221908092498779, "learning_rate": 7.724553357519337e-05, "loss": 0.5738, "step": 17372 }, { "epoch": 1.1771122704790298, "grad_norm": 8.009245872497559, "learning_rate": 7.724416455609556e-05, "loss": 0.8131, "step": 17373 }, { "epoch": 1.1771800257470018, "grad_norm": 4.87775182723999, "learning_rate": 7.724279553699774e-05, "loss": 0.8272, "step": 17374 }, { "epoch": 1.177247781014974, "grad_norm": 6.45685338973999, "learning_rate": 7.724142651789993e-05, "loss": 0.7668, "step": 17375 }, { "epoch": 1.177315536282946, "grad_norm": 6.50517463684082, "learning_rate": 7.72400574988021e-05, "loss": 1.0197, "step": 17376 }, { "epoch": 1.1773832915509181, "grad_norm": 8.832415580749512, "learning_rate": 7.723868847970429e-05, "loss": 0.5662, "step": 17377 }, { "epoch": 1.1774510468188901, "grad_norm": 4.831343650817871, "learning_rate": 7.723731946060648e-05, "loss": 0.5595, "step": 17378 }, { "epoch": 1.1775188020868623, "grad_norm": 5.61880350112915, "learning_rate": 7.723595044150866e-05, "loss": 0.7912, "step": 17379 }, { "epoch": 1.1775865573548343, "grad_norm": 6.002482891082764, "learning_rate": 7.723458142241084e-05, "loss": 0.6987, "step": 17380 }, { "epoch": 1.1776543126228065, "grad_norm": 8.140533447265625, "learning_rate": 7.723321240331302e-05, "loss": 0.863, "step": 17381 }, { "epoch": 1.1777220678907785, "grad_norm": 6.138432502746582, "learning_rate": 7.723184338421521e-05, "loss": 0.8564, "step": 17382 }, { "epoch": 1.1777898231587507, "grad_norm": 5.278548717498779, "learning_rate": 7.72304743651174e-05, "loss": 0.6709, "step": 17383 }, { "epoch": 1.1778575784267227, "grad_norm": 5.611672401428223, "learning_rate": 7.722910534601958e-05, "loss": 0.8298, "step": 17384 }, { "epoch": 1.1779253336946947, "grad_norm": 5.170347690582275, "learning_rate": 7.722773632692177e-05, "loss": 1.0291, "step": 17385 }, { "epoch": 1.1779930889626669, "grad_norm": 6.799687385559082, "learning_rate": 7.722636730782395e-05, "loss": 0.6598, "step": 17386 }, { "epoch": 1.1780608442306388, "grad_norm": 8.000127792358398, "learning_rate": 7.722499828872613e-05, "loss": 0.6322, "step": 17387 }, { "epoch": 1.178128599498611, "grad_norm": 6.476943492889404, "learning_rate": 7.722362926962832e-05, "loss": 0.6098, "step": 17388 }, { "epoch": 1.178196354766583, "grad_norm": 8.889633178710938, "learning_rate": 7.72222602505305e-05, "loss": 0.9242, "step": 17389 }, { "epoch": 1.1782641100345552, "grad_norm": 5.422774791717529, "learning_rate": 7.722089123143268e-05, "loss": 0.6973, "step": 17390 }, { "epoch": 1.1783318653025272, "grad_norm": 5.284212112426758, "learning_rate": 7.721952221233486e-05, "loss": 0.5893, "step": 17391 }, { "epoch": 1.1783996205704994, "grad_norm": 5.717512607574463, "learning_rate": 7.721815319323706e-05, "loss": 0.6507, "step": 17392 }, { "epoch": 1.1784673758384714, "grad_norm": 5.993809700012207, "learning_rate": 7.721678417413924e-05, "loss": 0.7245, "step": 17393 }, { "epoch": 1.1785351311064436, "grad_norm": 5.827890872955322, "learning_rate": 7.721541515504142e-05, "loss": 0.8094, "step": 17394 }, { "epoch": 1.1786028863744156, "grad_norm": 5.711609363555908, "learning_rate": 7.72140461359436e-05, "loss": 0.6623, "step": 17395 }, { "epoch": 1.1786706416423878, "grad_norm": 4.487983226776123, "learning_rate": 7.72126771168458e-05, "loss": 0.6765, "step": 17396 }, { "epoch": 1.1787383969103598, "grad_norm": 6.461648464202881, "learning_rate": 7.721130809774797e-05, "loss": 0.7344, "step": 17397 }, { "epoch": 1.178806152178332, "grad_norm": 5.570317268371582, "learning_rate": 7.720993907865015e-05, "loss": 0.674, "step": 17398 }, { "epoch": 1.178873907446304, "grad_norm": 6.213165760040283, "learning_rate": 7.720857005955233e-05, "loss": 0.6197, "step": 17399 }, { "epoch": 1.178941662714276, "grad_norm": 6.4340500831604, "learning_rate": 7.720720104045451e-05, "loss": 0.7551, "step": 17400 }, { "epoch": 1.1790094179822481, "grad_norm": 9.295565605163574, "learning_rate": 7.720583202135671e-05, "loss": 0.8534, "step": 17401 }, { "epoch": 1.1790771732502203, "grad_norm": 5.942518711090088, "learning_rate": 7.720446300225889e-05, "loss": 0.5783, "step": 17402 }, { "epoch": 1.1791449285181923, "grad_norm": 6.883828163146973, "learning_rate": 7.720309398316107e-05, "loss": 0.8772, "step": 17403 }, { "epoch": 1.1792126837861643, "grad_norm": 5.310081958770752, "learning_rate": 7.720172496406325e-05, "loss": 0.685, "step": 17404 }, { "epoch": 1.1792804390541365, "grad_norm": 5.378293037414551, "learning_rate": 7.720035594496544e-05, "loss": 0.7761, "step": 17405 }, { "epoch": 1.1793481943221085, "grad_norm": 5.762744426727295, "learning_rate": 7.719898692586762e-05, "loss": 0.7877, "step": 17406 }, { "epoch": 1.1794159495900807, "grad_norm": 6.828834533691406, "learning_rate": 7.71976179067698e-05, "loss": 0.7059, "step": 17407 }, { "epoch": 1.1794837048580527, "grad_norm": 5.380794525146484, "learning_rate": 7.719624888767198e-05, "loss": 0.8572, "step": 17408 }, { "epoch": 1.1795514601260249, "grad_norm": 5.599371910095215, "learning_rate": 7.719487986857417e-05, "loss": 0.6622, "step": 17409 }, { "epoch": 1.1796192153939968, "grad_norm": 6.256725311279297, "learning_rate": 7.719351084947636e-05, "loss": 0.6448, "step": 17410 }, { "epoch": 1.179686970661969, "grad_norm": 5.341912746429443, "learning_rate": 7.719214183037854e-05, "loss": 0.5639, "step": 17411 }, { "epoch": 1.179754725929941, "grad_norm": 10.262722969055176, "learning_rate": 7.719077281128072e-05, "loss": 0.6745, "step": 17412 }, { "epoch": 1.1798224811979132, "grad_norm": 6.846330642700195, "learning_rate": 7.71894037921829e-05, "loss": 0.6262, "step": 17413 }, { "epoch": 1.1798902364658852, "grad_norm": 5.529928684234619, "learning_rate": 7.718803477308508e-05, "loss": 0.8384, "step": 17414 }, { "epoch": 1.1799579917338574, "grad_norm": 5.655308723449707, "learning_rate": 7.718666575398727e-05, "loss": 0.701, "step": 17415 }, { "epoch": 1.1800257470018294, "grad_norm": 5.416240692138672, "learning_rate": 7.718529673488945e-05, "loss": 0.5655, "step": 17416 }, { "epoch": 1.1800935022698016, "grad_norm": 6.7942705154418945, "learning_rate": 7.718392771579163e-05, "loss": 0.7546, "step": 17417 }, { "epoch": 1.1801612575377736, "grad_norm": 6.258289813995361, "learning_rate": 7.718255869669382e-05, "loss": 0.8086, "step": 17418 }, { "epoch": 1.1802290128057455, "grad_norm": 7.271621227264404, "learning_rate": 7.718118967759601e-05, "loss": 0.7594, "step": 17419 }, { "epoch": 1.1802967680737178, "grad_norm": 7.857482433319092, "learning_rate": 7.717982065849819e-05, "loss": 0.7947, "step": 17420 }, { "epoch": 1.1803645233416897, "grad_norm": 5.672320365905762, "learning_rate": 7.717845163940037e-05, "loss": 0.9218, "step": 17421 }, { "epoch": 1.180432278609662, "grad_norm": 7.510291576385498, "learning_rate": 7.717708262030255e-05, "loss": 0.8078, "step": 17422 }, { "epoch": 1.180500033877634, "grad_norm": 7.348386287689209, "learning_rate": 7.717571360120473e-05, "loss": 1.0443, "step": 17423 }, { "epoch": 1.1805677891456061, "grad_norm": 5.457333087921143, "learning_rate": 7.717434458210692e-05, "loss": 0.7723, "step": 17424 }, { "epoch": 1.180635544413578, "grad_norm": 6.572354793548584, "learning_rate": 7.71729755630091e-05, "loss": 0.5917, "step": 17425 }, { "epoch": 1.1807032996815503, "grad_norm": 5.724460601806641, "learning_rate": 7.717160654391129e-05, "loss": 0.7562, "step": 17426 }, { "epoch": 1.1807710549495223, "grad_norm": 6.148141860961914, "learning_rate": 7.717023752481347e-05, "loss": 0.8323, "step": 17427 }, { "epoch": 1.1808388102174945, "grad_norm": 5.528962135314941, "learning_rate": 7.716886850571566e-05, "loss": 0.6606, "step": 17428 }, { "epoch": 1.1809065654854665, "grad_norm": 7.883500099182129, "learning_rate": 7.716749948661784e-05, "loss": 0.8106, "step": 17429 }, { "epoch": 1.1809743207534387, "grad_norm": 4.686129093170166, "learning_rate": 7.716613046752002e-05, "loss": 0.6902, "step": 17430 }, { "epoch": 1.1810420760214106, "grad_norm": 6.630269527435303, "learning_rate": 7.71647614484222e-05, "loss": 0.6001, "step": 17431 }, { "epoch": 1.1811098312893828, "grad_norm": 6.239437580108643, "learning_rate": 7.71633924293244e-05, "loss": 0.5679, "step": 17432 }, { "epoch": 1.1811775865573548, "grad_norm": 5.275092124938965, "learning_rate": 7.716202341022657e-05, "loss": 0.6574, "step": 17433 }, { "epoch": 1.1812453418253268, "grad_norm": 4.501307487487793, "learning_rate": 7.716065439112875e-05, "loss": 0.6631, "step": 17434 }, { "epoch": 1.181313097093299, "grad_norm": 4.495173931121826, "learning_rate": 7.715928537203095e-05, "loss": 0.5067, "step": 17435 }, { "epoch": 1.181380852361271, "grad_norm": 5.3275275230407715, "learning_rate": 7.715791635293313e-05, "loss": 0.5386, "step": 17436 }, { "epoch": 1.1814486076292432, "grad_norm": 9.05092716217041, "learning_rate": 7.715654733383531e-05, "loss": 0.7015, "step": 17437 }, { "epoch": 1.1815163628972152, "grad_norm": 6.003642559051514, "learning_rate": 7.71551783147375e-05, "loss": 0.7313, "step": 17438 }, { "epoch": 1.1815841181651874, "grad_norm": 4.795654296875, "learning_rate": 7.715380929563968e-05, "loss": 0.6542, "step": 17439 }, { "epoch": 1.1816518734331594, "grad_norm": 7.203171730041504, "learning_rate": 7.715244027654186e-05, "loss": 0.7249, "step": 17440 }, { "epoch": 1.1817196287011316, "grad_norm": 6.126667022705078, "learning_rate": 7.715107125744404e-05, "loss": 0.7768, "step": 17441 }, { "epoch": 1.1817873839691035, "grad_norm": 8.417376518249512, "learning_rate": 7.714970223834624e-05, "loss": 0.5883, "step": 17442 }, { "epoch": 1.1818551392370757, "grad_norm": 10.3145112991333, "learning_rate": 7.714833321924842e-05, "loss": 0.7328, "step": 17443 }, { "epoch": 1.1819228945050477, "grad_norm": 6.670629024505615, "learning_rate": 7.71469642001506e-05, "loss": 0.8297, "step": 17444 }, { "epoch": 1.18199064977302, "grad_norm": 6.059311389923096, "learning_rate": 7.714559518105278e-05, "loss": 0.7134, "step": 17445 }, { "epoch": 1.182058405040992, "grad_norm": 5.818484306335449, "learning_rate": 7.714422616195496e-05, "loss": 0.5291, "step": 17446 }, { "epoch": 1.1821261603089641, "grad_norm": 5.98686408996582, "learning_rate": 7.714285714285715e-05, "loss": 0.5893, "step": 17447 }, { "epoch": 1.182193915576936, "grad_norm": 5.623051643371582, "learning_rate": 7.714148812375933e-05, "loss": 0.6766, "step": 17448 }, { "epoch": 1.182261670844908, "grad_norm": 6.6503586769104, "learning_rate": 7.714011910466151e-05, "loss": 0.551, "step": 17449 }, { "epoch": 1.1823294261128803, "grad_norm": 8.11122989654541, "learning_rate": 7.71387500855637e-05, "loss": 0.6561, "step": 17450 }, { "epoch": 1.1823971813808525, "grad_norm": 6.778097152709961, "learning_rate": 7.713738106646589e-05, "loss": 0.8429, "step": 17451 }, { "epoch": 1.1824649366488245, "grad_norm": 4.3668107986450195, "learning_rate": 7.713601204736807e-05, "loss": 0.6024, "step": 17452 }, { "epoch": 1.1825326919167964, "grad_norm": 10.554072380065918, "learning_rate": 7.713464302827025e-05, "loss": 0.5214, "step": 17453 }, { "epoch": 1.1826004471847686, "grad_norm": 6.878169059753418, "learning_rate": 7.713327400917243e-05, "loss": 0.727, "step": 17454 }, { "epoch": 1.1826682024527406, "grad_norm": 5.530303001403809, "learning_rate": 7.713190499007461e-05, "loss": 0.6007, "step": 17455 }, { "epoch": 1.1827359577207128, "grad_norm": 6.1936354637146, "learning_rate": 7.71305359709768e-05, "loss": 0.6258, "step": 17456 }, { "epoch": 1.1828037129886848, "grad_norm": 8.023333549499512, "learning_rate": 7.712916695187898e-05, "loss": 0.6559, "step": 17457 }, { "epoch": 1.182871468256657, "grad_norm": 5.9769110679626465, "learning_rate": 7.712779793278116e-05, "loss": 0.6817, "step": 17458 }, { "epoch": 1.182939223524629, "grad_norm": 6.784766674041748, "learning_rate": 7.712642891368334e-05, "loss": 0.6251, "step": 17459 }, { "epoch": 1.1830069787926012, "grad_norm": 5.5998382568359375, "learning_rate": 7.712505989458554e-05, "loss": 0.7717, "step": 17460 }, { "epoch": 1.1830747340605732, "grad_norm": 5.586696147918701, "learning_rate": 7.712369087548772e-05, "loss": 0.6819, "step": 17461 }, { "epoch": 1.1831424893285454, "grad_norm": 5.863603591918945, "learning_rate": 7.71223218563899e-05, "loss": 0.6594, "step": 17462 }, { "epoch": 1.1832102445965174, "grad_norm": 9.297795295715332, "learning_rate": 7.712095283729208e-05, "loss": 0.7023, "step": 17463 }, { "epoch": 1.1832779998644896, "grad_norm": 4.512932300567627, "learning_rate": 7.711958381819426e-05, "loss": 0.5351, "step": 17464 }, { "epoch": 1.1833457551324615, "grad_norm": 5.763913631439209, "learning_rate": 7.711821479909645e-05, "loss": 0.5338, "step": 17465 }, { "epoch": 1.1834135104004337, "grad_norm": 6.064705848693848, "learning_rate": 7.711684577999863e-05, "loss": 0.6775, "step": 17466 }, { "epoch": 1.1834812656684057, "grad_norm": 7.101622581481934, "learning_rate": 7.711547676090081e-05, "loss": 0.8101, "step": 17467 }, { "epoch": 1.1835490209363777, "grad_norm": 5.599690914154053, "learning_rate": 7.7114107741803e-05, "loss": 0.7406, "step": 17468 }, { "epoch": 1.18361677620435, "grad_norm": 6.1384124755859375, "learning_rate": 7.711273872270518e-05, "loss": 0.8194, "step": 17469 }, { "epoch": 1.1836845314723219, "grad_norm": 4.1809611320495605, "learning_rate": 7.711136970360737e-05, "loss": 0.672, "step": 17470 }, { "epoch": 1.183752286740294, "grad_norm": 6.009151935577393, "learning_rate": 7.711000068450955e-05, "loss": 0.9584, "step": 17471 }, { "epoch": 1.183820042008266, "grad_norm": 5.323096752166748, "learning_rate": 7.710863166541173e-05, "loss": 0.51, "step": 17472 }, { "epoch": 1.1838877972762383, "grad_norm": 5.447154998779297, "learning_rate": 7.710726264631391e-05, "loss": 0.5468, "step": 17473 }, { "epoch": 1.1839555525442103, "grad_norm": 8.218817710876465, "learning_rate": 7.71058936272161e-05, "loss": 0.7344, "step": 17474 }, { "epoch": 1.1840233078121825, "grad_norm": 5.56463098526001, "learning_rate": 7.710452460811828e-05, "loss": 0.5611, "step": 17475 }, { "epoch": 1.1840910630801544, "grad_norm": 6.369572639465332, "learning_rate": 7.710315558902046e-05, "loss": 0.9165, "step": 17476 }, { "epoch": 1.1841588183481266, "grad_norm": 7.159587860107422, "learning_rate": 7.710178656992265e-05, "loss": 0.8848, "step": 17477 }, { "epoch": 1.1842265736160986, "grad_norm": 5.673058986663818, "learning_rate": 7.710041755082484e-05, "loss": 0.7024, "step": 17478 }, { "epoch": 1.1842943288840708, "grad_norm": 4.88538122177124, "learning_rate": 7.709904853172702e-05, "loss": 0.7115, "step": 17479 }, { "epoch": 1.1843620841520428, "grad_norm": 4.682442665100098, "learning_rate": 7.70976795126292e-05, "loss": 0.5612, "step": 17480 }, { "epoch": 1.184429839420015, "grad_norm": 5.996410369873047, "learning_rate": 7.70963104935314e-05, "loss": 0.6594, "step": 17481 }, { "epoch": 1.184497594687987, "grad_norm": 7.457950115203857, "learning_rate": 7.709494147443357e-05, "loss": 0.7196, "step": 17482 }, { "epoch": 1.184565349955959, "grad_norm": 9.7376127243042, "learning_rate": 7.709357245533575e-05, "loss": 0.5854, "step": 17483 }, { "epoch": 1.1846331052239312, "grad_norm": 7.527082920074463, "learning_rate": 7.709220343623795e-05, "loss": 0.6919, "step": 17484 }, { "epoch": 1.1847008604919032, "grad_norm": 6.091211795806885, "learning_rate": 7.709083441714013e-05, "loss": 1.0379, "step": 17485 }, { "epoch": 1.1847686157598754, "grad_norm": 5.3425750732421875, "learning_rate": 7.708946539804231e-05, "loss": 0.8544, "step": 17486 }, { "epoch": 1.1848363710278473, "grad_norm": 5.463139057159424, "learning_rate": 7.708809637894449e-05, "loss": 0.5774, "step": 17487 }, { "epoch": 1.1849041262958195, "grad_norm": 4.83765983581543, "learning_rate": 7.708672735984668e-05, "loss": 0.6807, "step": 17488 }, { "epoch": 1.1849718815637915, "grad_norm": 6.051468372344971, "learning_rate": 7.708535834074886e-05, "loss": 0.7163, "step": 17489 }, { "epoch": 1.1850396368317637, "grad_norm": 5.282536506652832, "learning_rate": 7.708398932165104e-05, "loss": 0.7486, "step": 17490 }, { "epoch": 1.1851073920997357, "grad_norm": 6.964774131774902, "learning_rate": 7.708262030255322e-05, "loss": 0.8596, "step": 17491 }, { "epoch": 1.185175147367708, "grad_norm": 5.607154846191406, "learning_rate": 7.70812512834554e-05, "loss": 0.5895, "step": 17492 }, { "epoch": 1.1852429026356799, "grad_norm": 6.054999351501465, "learning_rate": 7.70798822643576e-05, "loss": 0.7251, "step": 17493 }, { "epoch": 1.185310657903652, "grad_norm": 5.619368553161621, "learning_rate": 7.707851324525978e-05, "loss": 0.4792, "step": 17494 }, { "epoch": 1.185378413171624, "grad_norm": 5.682523250579834, "learning_rate": 7.707714422616196e-05, "loss": 0.6317, "step": 17495 }, { "epoch": 1.1854461684395963, "grad_norm": 5.336386680603027, "learning_rate": 7.707577520706414e-05, "loss": 0.7298, "step": 17496 }, { "epoch": 1.1855139237075683, "grad_norm": 4.904666900634766, "learning_rate": 7.707440618796633e-05, "loss": 0.5701, "step": 17497 }, { "epoch": 1.1855816789755402, "grad_norm": 7.6780571937561035, "learning_rate": 7.707303716886851e-05, "loss": 0.8559, "step": 17498 }, { "epoch": 1.1856494342435124, "grad_norm": 6.725180149078369, "learning_rate": 7.70716681497707e-05, "loss": 0.7298, "step": 17499 }, { "epoch": 1.1857171895114846, "grad_norm": 6.419045925140381, "learning_rate": 7.707029913067287e-05, "loss": 0.5927, "step": 17500 }, { "epoch": 1.1857849447794566, "grad_norm": 5.36964225769043, "learning_rate": 7.706893011157505e-05, "loss": 0.5406, "step": 17501 }, { "epoch": 1.1858527000474286, "grad_norm": 4.675073623657227, "learning_rate": 7.706756109247725e-05, "loss": 0.4895, "step": 17502 }, { "epoch": 1.1859204553154008, "grad_norm": 4.49945592880249, "learning_rate": 7.706619207337943e-05, "loss": 0.5266, "step": 17503 }, { "epoch": 1.1859882105833728, "grad_norm": 5.286568641662598, "learning_rate": 7.706482305428161e-05, "loss": 0.8502, "step": 17504 }, { "epoch": 1.186055965851345, "grad_norm": 6.318417549133301, "learning_rate": 7.706345403518379e-05, "loss": 0.9895, "step": 17505 }, { "epoch": 1.186123721119317, "grad_norm": 6.430423259735107, "learning_rate": 7.706208501608598e-05, "loss": 0.7683, "step": 17506 }, { "epoch": 1.1861914763872892, "grad_norm": 5.740241050720215, "learning_rate": 7.706071599698816e-05, "loss": 0.9374, "step": 17507 }, { "epoch": 1.1862592316552611, "grad_norm": 12.021029472351074, "learning_rate": 7.705934697789034e-05, "loss": 0.5365, "step": 17508 }, { "epoch": 1.1863269869232334, "grad_norm": 7.397732257843018, "learning_rate": 7.705797795879252e-05, "loss": 0.5, "step": 17509 }, { "epoch": 1.1863947421912053, "grad_norm": 6.771727561950684, "learning_rate": 7.70566089396947e-05, "loss": 0.545, "step": 17510 }, { "epoch": 1.1864624974591775, "grad_norm": 6.742783069610596, "learning_rate": 7.70552399205969e-05, "loss": 0.9527, "step": 17511 }, { "epoch": 1.1865302527271495, "grad_norm": 5.033138275146484, "learning_rate": 7.705387090149908e-05, "loss": 0.5438, "step": 17512 }, { "epoch": 1.1865980079951217, "grad_norm": 7.680271148681641, "learning_rate": 7.705250188240126e-05, "loss": 0.543, "step": 17513 }, { "epoch": 1.1866657632630937, "grad_norm": 6.076967716217041, "learning_rate": 7.705113286330344e-05, "loss": 0.8015, "step": 17514 }, { "epoch": 1.186733518531066, "grad_norm": 5.922079086303711, "learning_rate": 7.704976384420563e-05, "loss": 0.6701, "step": 17515 }, { "epoch": 1.1868012737990379, "grad_norm": 6.352739334106445, "learning_rate": 7.704839482510781e-05, "loss": 0.6318, "step": 17516 }, { "epoch": 1.1868690290670099, "grad_norm": 7.644301414489746, "learning_rate": 7.704702580601e-05, "loss": 0.6471, "step": 17517 }, { "epoch": 1.186936784334982, "grad_norm": 5.560800552368164, "learning_rate": 7.704565678691217e-05, "loss": 0.5093, "step": 17518 }, { "epoch": 1.187004539602954, "grad_norm": 8.711373329162598, "learning_rate": 7.704428776781435e-05, "loss": 0.6588, "step": 17519 }, { "epoch": 1.1870722948709262, "grad_norm": 5.718215465545654, "learning_rate": 7.704291874871655e-05, "loss": 0.8604, "step": 17520 }, { "epoch": 1.1871400501388982, "grad_norm": 6.022330284118652, "learning_rate": 7.704154972961873e-05, "loss": 0.7205, "step": 17521 }, { "epoch": 1.1872078054068704, "grad_norm": 7.659083366394043, "learning_rate": 7.704018071052091e-05, "loss": 0.4675, "step": 17522 }, { "epoch": 1.1872755606748424, "grad_norm": 6.095610618591309, "learning_rate": 7.703881169142309e-05, "loss": 0.6772, "step": 17523 }, { "epoch": 1.1873433159428146, "grad_norm": 5.625333786010742, "learning_rate": 7.703744267232528e-05, "loss": 0.618, "step": 17524 }, { "epoch": 1.1874110712107866, "grad_norm": 7.851975440979004, "learning_rate": 7.703607365322746e-05, "loss": 0.763, "step": 17525 }, { "epoch": 1.1874788264787588, "grad_norm": 9.398256301879883, "learning_rate": 7.703470463412964e-05, "loss": 0.6771, "step": 17526 }, { "epoch": 1.1875465817467308, "grad_norm": 9.461082458496094, "learning_rate": 7.703333561503184e-05, "loss": 0.9216, "step": 17527 }, { "epoch": 1.187614337014703, "grad_norm": 7.010152339935303, "learning_rate": 7.703196659593402e-05, "loss": 0.8298, "step": 17528 }, { "epoch": 1.187682092282675, "grad_norm": 5.272936820983887, "learning_rate": 7.70305975768362e-05, "loss": 0.7349, "step": 17529 }, { "epoch": 1.1877498475506472, "grad_norm": 6.14058780670166, "learning_rate": 7.702922855773839e-05, "loss": 0.6735, "step": 17530 }, { "epoch": 1.1878176028186191, "grad_norm": 8.246026039123535, "learning_rate": 7.702785953864057e-05, "loss": 0.6286, "step": 17531 }, { "epoch": 1.1878853580865911, "grad_norm": 8.610701560974121, "learning_rate": 7.702649051954275e-05, "loss": 0.5919, "step": 17532 }, { "epoch": 1.1879531133545633, "grad_norm": 8.594643592834473, "learning_rate": 7.702512150044493e-05, "loss": 0.4835, "step": 17533 }, { "epoch": 1.1880208686225353, "grad_norm": 4.968395709991455, "learning_rate": 7.702375248134713e-05, "loss": 0.5888, "step": 17534 }, { "epoch": 1.1880886238905075, "grad_norm": 6.422661781311035, "learning_rate": 7.702238346224931e-05, "loss": 0.5625, "step": 17535 }, { "epoch": 1.1881563791584795, "grad_norm": 9.77650260925293, "learning_rate": 7.702101444315149e-05, "loss": 0.7993, "step": 17536 }, { "epoch": 1.1882241344264517, "grad_norm": 5.65856409072876, "learning_rate": 7.701964542405367e-05, "loss": 0.5667, "step": 17537 }, { "epoch": 1.1882918896944237, "grad_norm": 6.888467311859131, "learning_rate": 7.701827640495586e-05, "loss": 0.9473, "step": 17538 }, { "epoch": 1.1883596449623959, "grad_norm": 5.9407548904418945, "learning_rate": 7.701690738585804e-05, "loss": 0.888, "step": 17539 }, { "epoch": 1.1884274002303679, "grad_norm": 4.671491622924805, "learning_rate": 7.701553836676022e-05, "loss": 0.5841, "step": 17540 }, { "epoch": 1.18849515549834, "grad_norm": 6.472855567932129, "learning_rate": 7.70141693476624e-05, "loss": 0.5864, "step": 17541 }, { "epoch": 1.188562910766312, "grad_norm": 6.266822338104248, "learning_rate": 7.701280032856458e-05, "loss": 0.6501, "step": 17542 }, { "epoch": 1.1886306660342842, "grad_norm": 5.629112243652344, "learning_rate": 7.701143130946678e-05, "loss": 0.5439, "step": 17543 }, { "epoch": 1.1886984213022562, "grad_norm": 4.828508377075195, "learning_rate": 7.701006229036896e-05, "loss": 0.5442, "step": 17544 }, { "epoch": 1.1887661765702284, "grad_norm": 5.5869059562683105, "learning_rate": 7.700869327127114e-05, "loss": 0.8221, "step": 17545 }, { "epoch": 1.1888339318382004, "grad_norm": 6.538342475891113, "learning_rate": 7.700732425217332e-05, "loss": 0.6933, "step": 17546 }, { "epoch": 1.1889016871061724, "grad_norm": 6.4926981925964355, "learning_rate": 7.70059552330755e-05, "loss": 0.8851, "step": 17547 }, { "epoch": 1.1889694423741446, "grad_norm": 6.433711528778076, "learning_rate": 7.700458621397769e-05, "loss": 0.7035, "step": 17548 }, { "epoch": 1.1890371976421168, "grad_norm": 9.490755081176758, "learning_rate": 7.700321719487987e-05, "loss": 0.6435, "step": 17549 }, { "epoch": 1.1891049529100888, "grad_norm": 5.095461845397949, "learning_rate": 7.700184817578205e-05, "loss": 0.4888, "step": 17550 }, { "epoch": 1.1891727081780608, "grad_norm": 6.992951393127441, "learning_rate": 7.700047915668423e-05, "loss": 0.6976, "step": 17551 }, { "epoch": 1.189240463446033, "grad_norm": 5.5607709884643555, "learning_rate": 7.699911013758643e-05, "loss": 0.5983, "step": 17552 }, { "epoch": 1.189308218714005, "grad_norm": 5.8281402587890625, "learning_rate": 7.699774111848861e-05, "loss": 0.9768, "step": 17553 }, { "epoch": 1.1893759739819771, "grad_norm": 6.718166351318359, "learning_rate": 7.699637209939079e-05, "loss": 0.7467, "step": 17554 }, { "epoch": 1.1894437292499491, "grad_norm": 5.299029350280762, "learning_rate": 7.699500308029297e-05, "loss": 0.4338, "step": 17555 }, { "epoch": 1.1895114845179213, "grad_norm": 7.574790954589844, "learning_rate": 7.699363406119515e-05, "loss": 0.5508, "step": 17556 }, { "epoch": 1.1895792397858933, "grad_norm": 5.672619819641113, "learning_rate": 7.699226504209734e-05, "loss": 0.8091, "step": 17557 }, { "epoch": 1.1896469950538655, "grad_norm": 4.973649978637695, "learning_rate": 7.699089602299952e-05, "loss": 0.5796, "step": 17558 }, { "epoch": 1.1897147503218375, "grad_norm": 6.112509250640869, "learning_rate": 7.69895270039017e-05, "loss": 0.7684, "step": 17559 }, { "epoch": 1.1897825055898097, "grad_norm": 5.4346723556518555, "learning_rate": 7.698815798480388e-05, "loss": 0.5285, "step": 17560 }, { "epoch": 1.1898502608577817, "grad_norm": 5.107007026672363, "learning_rate": 7.698678896570608e-05, "loss": 0.6484, "step": 17561 }, { "epoch": 1.1899180161257537, "grad_norm": 4.733124732971191, "learning_rate": 7.698541994660826e-05, "loss": 0.658, "step": 17562 }, { "epoch": 1.1899857713937259, "grad_norm": 3.8034684658050537, "learning_rate": 7.698405092751044e-05, "loss": 0.507, "step": 17563 }, { "epoch": 1.190053526661698, "grad_norm": 5.694223880767822, "learning_rate": 7.698268190841262e-05, "loss": 0.622, "step": 17564 }, { "epoch": 1.19012128192967, "grad_norm": 5.963348865509033, "learning_rate": 7.69813128893148e-05, "loss": 0.5774, "step": 17565 }, { "epoch": 1.190189037197642, "grad_norm": 6.509255886077881, "learning_rate": 7.6979943870217e-05, "loss": 0.6859, "step": 17566 }, { "epoch": 1.1902567924656142, "grad_norm": 5.054556369781494, "learning_rate": 7.697857485111917e-05, "loss": 0.7494, "step": 17567 }, { "epoch": 1.1903245477335862, "grad_norm": 6.527555465698242, "learning_rate": 7.697720583202135e-05, "loss": 1.0898, "step": 17568 }, { "epoch": 1.1903923030015584, "grad_norm": 4.6289896965026855, "learning_rate": 7.697583681292353e-05, "loss": 0.5725, "step": 17569 }, { "epoch": 1.1904600582695304, "grad_norm": 6.46843147277832, "learning_rate": 7.697446779382573e-05, "loss": 0.7417, "step": 17570 }, { "epoch": 1.1905278135375026, "grad_norm": 6.553291320800781, "learning_rate": 7.697309877472791e-05, "loss": 0.7798, "step": 17571 }, { "epoch": 1.1905955688054746, "grad_norm": 4.56556510925293, "learning_rate": 7.697172975563009e-05, "loss": 0.5706, "step": 17572 }, { "epoch": 1.1906633240734468, "grad_norm": 6.320700645446777, "learning_rate": 7.697036073653228e-05, "loss": 0.6367, "step": 17573 }, { "epoch": 1.1907310793414188, "grad_norm": 7.7214226722717285, "learning_rate": 7.696899171743446e-05, "loss": 0.8209, "step": 17574 }, { "epoch": 1.190798834609391, "grad_norm": 5.526475429534912, "learning_rate": 7.696762269833664e-05, "loss": 0.7385, "step": 17575 }, { "epoch": 1.190866589877363, "grad_norm": 5.44613790512085, "learning_rate": 7.696625367923884e-05, "loss": 0.608, "step": 17576 }, { "epoch": 1.1909343451453351, "grad_norm": 5.303778648376465, "learning_rate": 7.696488466014102e-05, "loss": 0.7954, "step": 17577 }, { "epoch": 1.1910021004133071, "grad_norm": 5.654077529907227, "learning_rate": 7.69635156410432e-05, "loss": 0.8997, "step": 17578 }, { "epoch": 1.1910698556812793, "grad_norm": 5.673882961273193, "learning_rate": 7.696214662194538e-05, "loss": 0.6305, "step": 17579 }, { "epoch": 1.1911376109492513, "grad_norm": 6.7035675048828125, "learning_rate": 7.696077760284757e-05, "loss": 0.6158, "step": 17580 }, { "epoch": 1.1912053662172233, "grad_norm": 5.399929046630859, "learning_rate": 7.695940858374975e-05, "loss": 0.6918, "step": 17581 }, { "epoch": 1.1912731214851955, "grad_norm": 6.211287975311279, "learning_rate": 7.695803956465193e-05, "loss": 0.7219, "step": 17582 }, { "epoch": 1.1913408767531675, "grad_norm": 6.3117876052856445, "learning_rate": 7.695667054555411e-05, "loss": 0.7323, "step": 17583 }, { "epoch": 1.1914086320211397, "grad_norm": 5.704460144042969, "learning_rate": 7.695530152645631e-05, "loss": 0.7325, "step": 17584 }, { "epoch": 1.1914763872891116, "grad_norm": 6.604908466339111, "learning_rate": 7.695393250735849e-05, "loss": 0.6055, "step": 17585 }, { "epoch": 1.1915441425570839, "grad_norm": 5.135843276977539, "learning_rate": 7.695256348826067e-05, "loss": 0.7543, "step": 17586 }, { "epoch": 1.1916118978250558, "grad_norm": 6.044817924499512, "learning_rate": 7.695119446916285e-05, "loss": 0.6591, "step": 17587 }, { "epoch": 1.191679653093028, "grad_norm": 10.055615425109863, "learning_rate": 7.694982545006503e-05, "loss": 0.6775, "step": 17588 }, { "epoch": 1.191747408361, "grad_norm": 5.508429050445557, "learning_rate": 7.694845643096722e-05, "loss": 0.8237, "step": 17589 }, { "epoch": 1.1918151636289722, "grad_norm": 5.428280353546143, "learning_rate": 7.69470874118694e-05, "loss": 0.5512, "step": 17590 }, { "epoch": 1.1918829188969442, "grad_norm": 6.358389377593994, "learning_rate": 7.694571839277158e-05, "loss": 0.9419, "step": 17591 }, { "epoch": 1.1919506741649164, "grad_norm": 4.868531227111816, "learning_rate": 7.694434937367376e-05, "loss": 0.6449, "step": 17592 }, { "epoch": 1.1920184294328884, "grad_norm": 4.3719000816345215, "learning_rate": 7.694298035457596e-05, "loss": 0.6134, "step": 17593 }, { "epoch": 1.1920861847008606, "grad_norm": 12.582817077636719, "learning_rate": 7.694161133547814e-05, "loss": 0.5755, "step": 17594 }, { "epoch": 1.1921539399688326, "grad_norm": 5.3732781410217285, "learning_rate": 7.694024231638032e-05, "loss": 0.732, "step": 17595 }, { "epoch": 1.1922216952368045, "grad_norm": 5.49517822265625, "learning_rate": 7.69388732972825e-05, "loss": 0.5249, "step": 17596 }, { "epoch": 1.1922894505047767, "grad_norm": 4.9107584953308105, "learning_rate": 7.693750427818468e-05, "loss": 0.5645, "step": 17597 }, { "epoch": 1.192357205772749, "grad_norm": 6.208014965057373, "learning_rate": 7.693613525908687e-05, "loss": 0.7483, "step": 17598 }, { "epoch": 1.192424961040721, "grad_norm": 5.240930080413818, "learning_rate": 7.693476623998905e-05, "loss": 0.5941, "step": 17599 }, { "epoch": 1.192492716308693, "grad_norm": 5.509008884429932, "learning_rate": 7.693339722089123e-05, "loss": 0.6262, "step": 17600 }, { "epoch": 1.1925604715766651, "grad_norm": 4.510783672332764, "learning_rate": 7.693202820179341e-05, "loss": 0.6087, "step": 17601 }, { "epoch": 1.192628226844637, "grad_norm": 7.165205478668213, "learning_rate": 7.69306591826956e-05, "loss": 0.7364, "step": 17602 }, { "epoch": 1.1926959821126093, "grad_norm": 5.687561511993408, "learning_rate": 7.692929016359779e-05, "loss": 0.8353, "step": 17603 }, { "epoch": 1.1927637373805813, "grad_norm": 7.553353786468506, "learning_rate": 7.692792114449997e-05, "loss": 1.0092, "step": 17604 }, { "epoch": 1.1928314926485535, "grad_norm": 5.063547611236572, "learning_rate": 7.692655212540215e-05, "loss": 0.7275, "step": 17605 }, { "epoch": 1.1928992479165255, "grad_norm": 5.614800930023193, "learning_rate": 7.692518310630433e-05, "loss": 0.628, "step": 17606 }, { "epoch": 1.1929670031844977, "grad_norm": 6.790213584899902, "learning_rate": 7.692381408720652e-05, "loss": 0.7567, "step": 17607 }, { "epoch": 1.1930347584524696, "grad_norm": 6.226934432983398, "learning_rate": 7.69224450681087e-05, "loss": 0.7992, "step": 17608 }, { "epoch": 1.1931025137204418, "grad_norm": 5.4407525062561035, "learning_rate": 7.692107604901088e-05, "loss": 0.59, "step": 17609 }, { "epoch": 1.1931702689884138, "grad_norm": 4.665156841278076, "learning_rate": 7.691970702991306e-05, "loss": 0.5459, "step": 17610 }, { "epoch": 1.1932380242563858, "grad_norm": 6.73590612411499, "learning_rate": 7.691833801081524e-05, "loss": 0.6124, "step": 17611 }, { "epoch": 1.193305779524358, "grad_norm": 5.0771074295043945, "learning_rate": 7.691696899171744e-05, "loss": 0.579, "step": 17612 }, { "epoch": 1.1933735347923302, "grad_norm": 9.059715270996094, "learning_rate": 7.691559997261962e-05, "loss": 0.8435, "step": 17613 }, { "epoch": 1.1934412900603022, "grad_norm": 6.360510349273682, "learning_rate": 7.69142309535218e-05, "loss": 0.7151, "step": 17614 }, { "epoch": 1.1935090453282742, "grad_norm": 4.936126232147217, "learning_rate": 7.691286193442398e-05, "loss": 0.6126, "step": 17615 }, { "epoch": 1.1935768005962464, "grad_norm": 5.501605987548828, "learning_rate": 7.691149291532617e-05, "loss": 0.6608, "step": 17616 }, { "epoch": 1.1936445558642184, "grad_norm": 5.651332855224609, "learning_rate": 7.691012389622835e-05, "loss": 0.5896, "step": 17617 }, { "epoch": 1.1937123111321906, "grad_norm": 5.945789813995361, "learning_rate": 7.690875487713053e-05, "loss": 0.9682, "step": 17618 }, { "epoch": 1.1937800664001625, "grad_norm": 6.9971604347229, "learning_rate": 7.690738585803273e-05, "loss": 0.6775, "step": 17619 }, { "epoch": 1.1938478216681347, "grad_norm": 5.1300506591796875, "learning_rate": 7.690601683893491e-05, "loss": 0.6251, "step": 17620 }, { "epoch": 1.1939155769361067, "grad_norm": 6.18295431137085, "learning_rate": 7.690464781983709e-05, "loss": 0.7284, "step": 17621 }, { "epoch": 1.193983332204079, "grad_norm": 5.232485294342041, "learning_rate": 7.690327880073928e-05, "loss": 0.6627, "step": 17622 }, { "epoch": 1.194051087472051, "grad_norm": 4.987381458282471, "learning_rate": 7.690190978164146e-05, "loss": 0.5891, "step": 17623 }, { "epoch": 1.1941188427400231, "grad_norm": 5.091625213623047, "learning_rate": 7.690054076254364e-05, "loss": 0.6001, "step": 17624 }, { "epoch": 1.194186598007995, "grad_norm": 5.142812252044678, "learning_rate": 7.689917174344582e-05, "loss": 0.4461, "step": 17625 }, { "epoch": 1.1942543532759673, "grad_norm": 5.461329460144043, "learning_rate": 7.689780272434802e-05, "loss": 0.7192, "step": 17626 }, { "epoch": 1.1943221085439393, "grad_norm": 5.987907886505127, "learning_rate": 7.68964337052502e-05, "loss": 0.7833, "step": 17627 }, { "epoch": 1.1943898638119115, "grad_norm": 6.716150760650635, "learning_rate": 7.689506468615238e-05, "loss": 0.6963, "step": 17628 }, { "epoch": 1.1944576190798835, "grad_norm": 7.355491638183594, "learning_rate": 7.689369566705456e-05, "loss": 0.483, "step": 17629 }, { "epoch": 1.1945253743478554, "grad_norm": 6.228071689605713, "learning_rate": 7.689232664795675e-05, "loss": 0.677, "step": 17630 }, { "epoch": 1.1945931296158276, "grad_norm": 6.996883869171143, "learning_rate": 7.689095762885893e-05, "loss": 0.8552, "step": 17631 }, { "epoch": 1.1946608848837996, "grad_norm": 4.602889537811279, "learning_rate": 7.688958860976111e-05, "loss": 0.5121, "step": 17632 }, { "epoch": 1.1947286401517718, "grad_norm": 5.40228271484375, "learning_rate": 7.688821959066329e-05, "loss": 0.5775, "step": 17633 }, { "epoch": 1.1947963954197438, "grad_norm": 5.13663387298584, "learning_rate": 7.688685057156547e-05, "loss": 0.7632, "step": 17634 }, { "epoch": 1.194864150687716, "grad_norm": 9.4126558303833, "learning_rate": 7.688548155246767e-05, "loss": 0.8483, "step": 17635 }, { "epoch": 1.194931905955688, "grad_norm": 5.480384349822998, "learning_rate": 7.688411253336985e-05, "loss": 0.6682, "step": 17636 }, { "epoch": 1.1949996612236602, "grad_norm": 5.476341247558594, "learning_rate": 7.688274351427203e-05, "loss": 0.6641, "step": 17637 }, { "epoch": 1.1950674164916322, "grad_norm": 4.595249652862549, "learning_rate": 7.688137449517421e-05, "loss": 0.5387, "step": 17638 }, { "epoch": 1.1951351717596044, "grad_norm": 5.596693515777588, "learning_rate": 7.68800054760764e-05, "loss": 0.7083, "step": 17639 }, { "epoch": 1.1952029270275764, "grad_norm": 6.919038772583008, "learning_rate": 7.687863645697858e-05, "loss": 0.6535, "step": 17640 }, { "epoch": 1.1952706822955486, "grad_norm": 5.938333988189697, "learning_rate": 7.687726743788076e-05, "loss": 0.8527, "step": 17641 }, { "epoch": 1.1953384375635205, "grad_norm": 5.0389299392700195, "learning_rate": 7.687589841878294e-05, "loss": 0.7157, "step": 17642 }, { "epoch": 1.1954061928314927, "grad_norm": 8.325102806091309, "learning_rate": 7.687452939968512e-05, "loss": 0.7156, "step": 17643 }, { "epoch": 1.1954739480994647, "grad_norm": 6.028964519500732, "learning_rate": 7.687316038058732e-05, "loss": 0.7134, "step": 17644 }, { "epoch": 1.1955417033674367, "grad_norm": 7.535099983215332, "learning_rate": 7.68717913614895e-05, "loss": 0.7695, "step": 17645 }, { "epoch": 1.195609458635409, "grad_norm": 5.430510520935059, "learning_rate": 7.687042234239168e-05, "loss": 0.8219, "step": 17646 }, { "epoch": 1.195677213903381, "grad_norm": 5.3170013427734375, "learning_rate": 7.686905332329386e-05, "loss": 0.9118, "step": 17647 }, { "epoch": 1.195744969171353, "grad_norm": 8.100552558898926, "learning_rate": 7.686768430419605e-05, "loss": 0.8288, "step": 17648 }, { "epoch": 1.195812724439325, "grad_norm": 6.240118980407715, "learning_rate": 7.686631528509823e-05, "loss": 0.8284, "step": 17649 }, { "epoch": 1.1958804797072973, "grad_norm": 6.110140323638916, "learning_rate": 7.686494626600041e-05, "loss": 0.7722, "step": 17650 }, { "epoch": 1.1959482349752693, "grad_norm": 6.435049057006836, "learning_rate": 7.68635772469026e-05, "loss": 0.6591, "step": 17651 }, { "epoch": 1.1960159902432415, "grad_norm": 5.954352855682373, "learning_rate": 7.686220822780477e-05, "loss": 0.6074, "step": 17652 }, { "epoch": 1.1960837455112134, "grad_norm": 5.165590763092041, "learning_rate": 7.686083920870697e-05, "loss": 0.7499, "step": 17653 }, { "epoch": 1.1961515007791856, "grad_norm": 7.155406475067139, "learning_rate": 7.685947018960915e-05, "loss": 0.8336, "step": 17654 }, { "epoch": 1.1962192560471576, "grad_norm": 5.120672702789307, "learning_rate": 7.685810117051133e-05, "loss": 0.6934, "step": 17655 }, { "epoch": 1.1962870113151298, "grad_norm": 6.173689365386963, "learning_rate": 7.685673215141351e-05, "loss": 0.4323, "step": 17656 }, { "epoch": 1.1963547665831018, "grad_norm": 4.619163513183594, "learning_rate": 7.685536313231569e-05, "loss": 0.4938, "step": 17657 }, { "epoch": 1.196422521851074, "grad_norm": 8.75632381439209, "learning_rate": 7.685399411321788e-05, "loss": 1.0098, "step": 17658 }, { "epoch": 1.196490277119046, "grad_norm": 6.584256172180176, "learning_rate": 7.685262509412006e-05, "loss": 0.5902, "step": 17659 }, { "epoch": 1.196558032387018, "grad_norm": 6.670119762420654, "learning_rate": 7.685125607502224e-05, "loss": 0.8715, "step": 17660 }, { "epoch": 1.1966257876549902, "grad_norm": 8.594388008117676, "learning_rate": 7.684988705592442e-05, "loss": 0.7048, "step": 17661 }, { "epoch": 1.1966935429229624, "grad_norm": 7.816542148590088, "learning_rate": 7.684851803682662e-05, "loss": 0.9348, "step": 17662 }, { "epoch": 1.1967612981909344, "grad_norm": 4.398709774017334, "learning_rate": 7.68471490177288e-05, "loss": 0.5985, "step": 17663 }, { "epoch": 1.1968290534589063, "grad_norm": 7.980318069458008, "learning_rate": 7.684577999863098e-05, "loss": 0.8392, "step": 17664 }, { "epoch": 1.1968968087268785, "grad_norm": 7.779125213623047, "learning_rate": 7.684441097953316e-05, "loss": 0.6489, "step": 17665 }, { "epoch": 1.1969645639948505, "grad_norm": 6.15852165222168, "learning_rate": 7.684304196043535e-05, "loss": 0.8325, "step": 17666 }, { "epoch": 1.1970323192628227, "grad_norm": 5.582149982452393, "learning_rate": 7.684167294133753e-05, "loss": 0.6112, "step": 17667 }, { "epoch": 1.1971000745307947, "grad_norm": 6.231848239898682, "learning_rate": 7.684030392223973e-05, "loss": 0.704, "step": 17668 }, { "epoch": 1.197167829798767, "grad_norm": 8.48556900024414, "learning_rate": 7.683893490314191e-05, "loss": 0.8127, "step": 17669 }, { "epoch": 1.1972355850667389, "grad_norm": 5.312507152557373, "learning_rate": 7.683756588404409e-05, "loss": 0.5856, "step": 17670 }, { "epoch": 1.197303340334711, "grad_norm": 4.0072407722473145, "learning_rate": 7.683619686494628e-05, "loss": 0.6048, "step": 17671 }, { "epoch": 1.197371095602683, "grad_norm": 4.753046989440918, "learning_rate": 7.683482784584846e-05, "loss": 0.7208, "step": 17672 }, { "epoch": 1.1974388508706553, "grad_norm": 4.263588905334473, "learning_rate": 7.683345882675064e-05, "loss": 0.6492, "step": 17673 }, { "epoch": 1.1975066061386272, "grad_norm": 5.775005340576172, "learning_rate": 7.683208980765282e-05, "loss": 0.8061, "step": 17674 }, { "epoch": 1.1975743614065995, "grad_norm": 6.587451457977295, "learning_rate": 7.6830720788555e-05, "loss": 0.7187, "step": 17675 }, { "epoch": 1.1976421166745714, "grad_norm": 7.55980110168457, "learning_rate": 7.68293517694572e-05, "loss": 0.7459, "step": 17676 }, { "epoch": 1.1977098719425436, "grad_norm": 5.851051330566406, "learning_rate": 7.682798275035938e-05, "loss": 0.7314, "step": 17677 }, { "epoch": 1.1977776272105156, "grad_norm": 6.21124792098999, "learning_rate": 7.682661373126156e-05, "loss": 0.6496, "step": 17678 }, { "epoch": 1.1978453824784876, "grad_norm": 6.034067630767822, "learning_rate": 7.682524471216374e-05, "loss": 0.7823, "step": 17679 }, { "epoch": 1.1979131377464598, "grad_norm": 5.624752521514893, "learning_rate": 7.682387569306592e-05, "loss": 0.9108, "step": 17680 }, { "epoch": 1.1979808930144318, "grad_norm": 7.310678005218506, "learning_rate": 7.682250667396811e-05, "loss": 0.7976, "step": 17681 }, { "epoch": 1.198048648282404, "grad_norm": 5.958901405334473, "learning_rate": 7.682113765487029e-05, "loss": 0.6814, "step": 17682 }, { "epoch": 1.198116403550376, "grad_norm": 5.8540120124816895, "learning_rate": 7.681976863577247e-05, "loss": 0.9074, "step": 17683 }, { "epoch": 1.1981841588183482, "grad_norm": 11.885223388671875, "learning_rate": 7.681839961667465e-05, "loss": 0.6314, "step": 17684 }, { "epoch": 1.1982519140863201, "grad_norm": 5.285735607147217, "learning_rate": 7.681703059757685e-05, "loss": 0.7848, "step": 17685 }, { "epoch": 1.1983196693542923, "grad_norm": 6.814820289611816, "learning_rate": 7.681566157847903e-05, "loss": 0.7201, "step": 17686 }, { "epoch": 1.1983874246222643, "grad_norm": 7.1838788986206055, "learning_rate": 7.681429255938121e-05, "loss": 0.8404, "step": 17687 }, { "epoch": 1.1984551798902365, "grad_norm": 5.949917793273926, "learning_rate": 7.681292354028339e-05, "loss": 0.7767, "step": 17688 }, { "epoch": 1.1985229351582085, "grad_norm": 6.235685348510742, "learning_rate": 7.681155452118557e-05, "loss": 0.717, "step": 17689 }, { "epoch": 1.1985906904261807, "grad_norm": 5.224851131439209, "learning_rate": 7.681018550208776e-05, "loss": 0.7371, "step": 17690 }, { "epoch": 1.1986584456941527, "grad_norm": 5.613080978393555, "learning_rate": 7.680881648298994e-05, "loss": 0.5199, "step": 17691 }, { "epoch": 1.198726200962125, "grad_norm": 8.915862083435059, "learning_rate": 7.680744746389212e-05, "loss": 0.717, "step": 17692 }, { "epoch": 1.1987939562300969, "grad_norm": 5.137214660644531, "learning_rate": 7.68060784447943e-05, "loss": 0.6165, "step": 17693 }, { "epoch": 1.1988617114980689, "grad_norm": 4.481411933898926, "learning_rate": 7.68047094256965e-05, "loss": 0.7008, "step": 17694 }, { "epoch": 1.198929466766041, "grad_norm": 4.9299798011779785, "learning_rate": 7.680334040659868e-05, "loss": 0.6167, "step": 17695 }, { "epoch": 1.1989972220340133, "grad_norm": 6.480248928070068, "learning_rate": 7.680197138750086e-05, "loss": 0.6755, "step": 17696 }, { "epoch": 1.1990649773019852, "grad_norm": 7.132851600646973, "learning_rate": 7.680060236840304e-05, "loss": 0.8065, "step": 17697 }, { "epoch": 1.1991327325699572, "grad_norm": 6.641706943511963, "learning_rate": 7.679923334930522e-05, "loss": 0.7251, "step": 17698 }, { "epoch": 1.1992004878379294, "grad_norm": 10.569890022277832, "learning_rate": 7.679786433020741e-05, "loss": 0.6988, "step": 17699 }, { "epoch": 1.1992682431059014, "grad_norm": 5.934963226318359, "learning_rate": 7.679649531110959e-05, "loss": 0.8192, "step": 17700 }, { "epoch": 1.1993359983738736, "grad_norm": 7.19936466217041, "learning_rate": 7.679512629201177e-05, "loss": 0.6698, "step": 17701 }, { "epoch": 1.1994037536418456, "grad_norm": 4.598423957824707, "learning_rate": 7.679375727291395e-05, "loss": 0.6782, "step": 17702 }, { "epoch": 1.1994715089098178, "grad_norm": 6.056293487548828, "learning_rate": 7.679238825381615e-05, "loss": 1.0353, "step": 17703 }, { "epoch": 1.1995392641777898, "grad_norm": 4.992430210113525, "learning_rate": 7.679101923471833e-05, "loss": 0.51, "step": 17704 }, { "epoch": 1.199607019445762, "grad_norm": 6.1313886642456055, "learning_rate": 7.678965021562051e-05, "loss": 0.6802, "step": 17705 }, { "epoch": 1.199674774713734, "grad_norm": 8.208282470703125, "learning_rate": 7.678828119652269e-05, "loss": 0.7752, "step": 17706 }, { "epoch": 1.1997425299817062, "grad_norm": 8.525566101074219, "learning_rate": 7.678691217742487e-05, "loss": 0.7824, "step": 17707 }, { "epoch": 1.1998102852496781, "grad_norm": 9.573042869567871, "learning_rate": 7.678554315832706e-05, "loss": 0.8599, "step": 17708 }, { "epoch": 1.1998780405176501, "grad_norm": 6.6548871994018555, "learning_rate": 7.678417413922924e-05, "loss": 0.5811, "step": 17709 }, { "epoch": 1.1999457957856223, "grad_norm": 7.299272537231445, "learning_rate": 7.678280512013142e-05, "loss": 0.7162, "step": 17710 }, { "epoch": 1.2000135510535945, "grad_norm": 6.244133949279785, "learning_rate": 7.67814361010336e-05, "loss": 0.8619, "step": 17711 }, { "epoch": 1.2000813063215665, "grad_norm": 5.655728816986084, "learning_rate": 7.67800670819358e-05, "loss": 0.5859, "step": 17712 }, { "epoch": 1.2001490615895385, "grad_norm": 6.566688060760498, "learning_rate": 7.677869806283798e-05, "loss": 0.7493, "step": 17713 }, { "epoch": 1.2002168168575107, "grad_norm": 7.644930839538574, "learning_rate": 7.677732904374016e-05, "loss": 0.7606, "step": 17714 }, { "epoch": 1.2002845721254827, "grad_norm": 6.584826946258545, "learning_rate": 7.677596002464235e-05, "loss": 0.816, "step": 17715 }, { "epoch": 1.2003523273934549, "grad_norm": 10.395920753479004, "learning_rate": 7.677459100554453e-05, "loss": 0.9352, "step": 17716 }, { "epoch": 1.2004200826614269, "grad_norm": 6.787402629852295, "learning_rate": 7.677322198644671e-05, "loss": 0.7819, "step": 17717 }, { "epoch": 1.200487837929399, "grad_norm": 5.836369037628174, "learning_rate": 7.67718529673489e-05, "loss": 0.7411, "step": 17718 }, { "epoch": 1.200555593197371, "grad_norm": 5.070427417755127, "learning_rate": 7.677048394825109e-05, "loss": 0.6364, "step": 17719 }, { "epoch": 1.2006233484653432, "grad_norm": 5.135974407196045, "learning_rate": 7.676911492915327e-05, "loss": 0.6565, "step": 17720 }, { "epoch": 1.2006911037333152, "grad_norm": 3.663120746612549, "learning_rate": 7.676774591005545e-05, "loss": 0.4482, "step": 17721 }, { "epoch": 1.2007588590012874, "grad_norm": 4.547859191894531, "learning_rate": 7.676637689095764e-05, "loss": 0.5517, "step": 17722 }, { "epoch": 1.2008266142692594, "grad_norm": 5.410218238830566, "learning_rate": 7.676500787185982e-05, "loss": 0.6991, "step": 17723 }, { "epoch": 1.2008943695372316, "grad_norm": 5.378064155578613, "learning_rate": 7.6763638852762e-05, "loss": 0.6361, "step": 17724 }, { "epoch": 1.2009621248052036, "grad_norm": 4.933994770050049, "learning_rate": 7.676226983366418e-05, "loss": 0.587, "step": 17725 }, { "epoch": 1.2010298800731758, "grad_norm": 5.668514728546143, "learning_rate": 7.676090081456638e-05, "loss": 0.5187, "step": 17726 }, { "epoch": 1.2010976353411478, "grad_norm": 6.826091289520264, "learning_rate": 7.675953179546856e-05, "loss": 0.656, "step": 17727 }, { "epoch": 1.2011653906091198, "grad_norm": 7.071187496185303, "learning_rate": 7.675816277637074e-05, "loss": 0.5693, "step": 17728 }, { "epoch": 1.201233145877092, "grad_norm": 7.474967002868652, "learning_rate": 7.675679375727292e-05, "loss": 0.4735, "step": 17729 }, { "epoch": 1.201300901145064, "grad_norm": 6.620955467224121, "learning_rate": 7.67554247381751e-05, "loss": 0.8567, "step": 17730 }, { "epoch": 1.2013686564130361, "grad_norm": 5.957265853881836, "learning_rate": 7.675405571907729e-05, "loss": 0.5876, "step": 17731 }, { "epoch": 1.2014364116810081, "grad_norm": 6.6812028884887695, "learning_rate": 7.675268669997947e-05, "loss": 0.7244, "step": 17732 }, { "epoch": 1.2015041669489803, "grad_norm": 6.950610160827637, "learning_rate": 7.675131768088165e-05, "loss": 0.7549, "step": 17733 }, { "epoch": 1.2015719222169523, "grad_norm": 6.661945343017578, "learning_rate": 7.674994866178383e-05, "loss": 0.8251, "step": 17734 }, { "epoch": 1.2016396774849245, "grad_norm": 6.50548791885376, "learning_rate": 7.674857964268601e-05, "loss": 0.7072, "step": 17735 }, { "epoch": 1.2017074327528965, "grad_norm": 10.191153526306152, "learning_rate": 7.674721062358821e-05, "loss": 0.5644, "step": 17736 }, { "epoch": 1.2017751880208687, "grad_norm": 5.519306659698486, "learning_rate": 7.674584160449039e-05, "loss": 0.649, "step": 17737 }, { "epoch": 1.2018429432888407, "grad_norm": 8.362431526184082, "learning_rate": 7.674447258539257e-05, "loss": 0.8843, "step": 17738 }, { "epoch": 1.2019106985568129, "grad_norm": 6.758439540863037, "learning_rate": 7.674310356629475e-05, "loss": 0.6952, "step": 17739 }, { "epoch": 1.2019784538247849, "grad_norm": 5.742153167724609, "learning_rate": 7.674173454719694e-05, "loss": 0.7082, "step": 17740 }, { "epoch": 1.202046209092757, "grad_norm": 6.582305431365967, "learning_rate": 7.674036552809912e-05, "loss": 0.734, "step": 17741 }, { "epoch": 1.202113964360729, "grad_norm": 5.384820938110352, "learning_rate": 7.67389965090013e-05, "loss": 0.7698, "step": 17742 }, { "epoch": 1.202181719628701, "grad_norm": 7.0777082443237305, "learning_rate": 7.673762748990348e-05, "loss": 0.5331, "step": 17743 }, { "epoch": 1.2022494748966732, "grad_norm": 6.164592742919922, "learning_rate": 7.673625847080566e-05, "loss": 0.6242, "step": 17744 }, { "epoch": 1.2023172301646454, "grad_norm": 6.713252544403076, "learning_rate": 7.673488945170786e-05, "loss": 0.5903, "step": 17745 }, { "epoch": 1.2023849854326174, "grad_norm": 5.783125877380371, "learning_rate": 7.673352043261004e-05, "loss": 0.7877, "step": 17746 }, { "epoch": 1.2024527407005894, "grad_norm": 8.683629989624023, "learning_rate": 7.673215141351222e-05, "loss": 0.6339, "step": 17747 }, { "epoch": 1.2025204959685616, "grad_norm": 8.471028327941895, "learning_rate": 7.67307823944144e-05, "loss": 0.7041, "step": 17748 }, { "epoch": 1.2025882512365336, "grad_norm": 5.869851112365723, "learning_rate": 7.672941337531659e-05, "loss": 0.7802, "step": 17749 }, { "epoch": 1.2026560065045058, "grad_norm": 5.1376776695251465, "learning_rate": 7.672804435621877e-05, "loss": 0.637, "step": 17750 }, { "epoch": 1.2027237617724778, "grad_norm": 4.885056972503662, "learning_rate": 7.672667533712095e-05, "loss": 0.6424, "step": 17751 }, { "epoch": 1.20279151704045, "grad_norm": 5.53432035446167, "learning_rate": 7.672530631802313e-05, "loss": 0.7047, "step": 17752 }, { "epoch": 1.202859272308422, "grad_norm": 8.100170135498047, "learning_rate": 7.672393729892531e-05, "loss": 0.6902, "step": 17753 }, { "epoch": 1.2029270275763941, "grad_norm": 6.803896903991699, "learning_rate": 7.672256827982751e-05, "loss": 0.6291, "step": 17754 }, { "epoch": 1.2029947828443661, "grad_norm": 4.285371780395508, "learning_rate": 7.672119926072969e-05, "loss": 0.6842, "step": 17755 }, { "epoch": 1.2030625381123383, "grad_norm": 5.140682697296143, "learning_rate": 7.671983024163187e-05, "loss": 0.7823, "step": 17756 }, { "epoch": 1.2031302933803103, "grad_norm": 6.522716045379639, "learning_rate": 7.671846122253405e-05, "loss": 0.6746, "step": 17757 }, { "epoch": 1.2031980486482823, "grad_norm": 7.171764850616455, "learning_rate": 7.671709220343624e-05, "loss": 0.7369, "step": 17758 }, { "epoch": 1.2032658039162545, "grad_norm": 8.315983772277832, "learning_rate": 7.671572318433842e-05, "loss": 0.9706, "step": 17759 }, { "epoch": 1.2033335591842267, "grad_norm": 5.393362998962402, "learning_rate": 7.67143541652406e-05, "loss": 0.5944, "step": 17760 }, { "epoch": 1.2034013144521987, "grad_norm": 7.974836826324463, "learning_rate": 7.67129851461428e-05, "loss": 0.7715, "step": 17761 }, { "epoch": 1.2034690697201706, "grad_norm": 8.586780548095703, "learning_rate": 7.671161612704498e-05, "loss": 0.7409, "step": 17762 }, { "epoch": 1.2035368249881429, "grad_norm": 5.896920204162598, "learning_rate": 7.671024710794716e-05, "loss": 0.9213, "step": 17763 }, { "epoch": 1.2036045802561148, "grad_norm": 6.252879619598389, "learning_rate": 7.670887808884935e-05, "loss": 0.6313, "step": 17764 }, { "epoch": 1.203672335524087, "grad_norm": 5.003981113433838, "learning_rate": 7.670750906975153e-05, "loss": 0.8074, "step": 17765 }, { "epoch": 1.203740090792059, "grad_norm": 6.52988862991333, "learning_rate": 7.670614005065371e-05, "loss": 0.6897, "step": 17766 }, { "epoch": 1.2038078460600312, "grad_norm": 5.87505578994751, "learning_rate": 7.670477103155589e-05, "loss": 0.6301, "step": 17767 }, { "epoch": 1.2038756013280032, "grad_norm": 5.153764724731445, "learning_rate": 7.670340201245809e-05, "loss": 0.8281, "step": 17768 }, { "epoch": 1.2039433565959754, "grad_norm": 5.710607051849365, "learning_rate": 7.670203299336027e-05, "loss": 0.6222, "step": 17769 }, { "epoch": 1.2040111118639474, "grad_norm": 6.170762538909912, "learning_rate": 7.670066397426245e-05, "loss": 0.7103, "step": 17770 }, { "epoch": 1.2040788671319196, "grad_norm": 6.375565052032471, "learning_rate": 7.669929495516463e-05, "loss": 0.6191, "step": 17771 }, { "epoch": 1.2041466223998916, "grad_norm": 4.824178218841553, "learning_rate": 7.669792593606682e-05, "loss": 0.7482, "step": 17772 }, { "epoch": 1.2042143776678638, "grad_norm": 5.683229446411133, "learning_rate": 7.6696556916969e-05, "loss": 0.6528, "step": 17773 }, { "epoch": 1.2042821329358357, "grad_norm": 7.511515140533447, "learning_rate": 7.669518789787118e-05, "loss": 0.7132, "step": 17774 }, { "epoch": 1.204349888203808, "grad_norm": 4.747464179992676, "learning_rate": 7.669381887877336e-05, "loss": 0.6468, "step": 17775 }, { "epoch": 1.20441764347178, "grad_norm": 6.176259517669678, "learning_rate": 7.669244985967554e-05, "loss": 0.8254, "step": 17776 }, { "epoch": 1.204485398739752, "grad_norm": 6.217640399932861, "learning_rate": 7.669108084057774e-05, "loss": 0.7821, "step": 17777 }, { "epoch": 1.2045531540077241, "grad_norm": 7.800964832305908, "learning_rate": 7.668971182147992e-05, "loss": 0.8139, "step": 17778 }, { "epoch": 1.204620909275696, "grad_norm": 6.084750175476074, "learning_rate": 7.66883428023821e-05, "loss": 0.5719, "step": 17779 }, { "epoch": 1.2046886645436683, "grad_norm": 6.607977390289307, "learning_rate": 7.668697378328428e-05, "loss": 0.7541, "step": 17780 }, { "epoch": 1.2047564198116403, "grad_norm": 5.644943714141846, "learning_rate": 7.668560476418647e-05, "loss": 0.7759, "step": 17781 }, { "epoch": 1.2048241750796125, "grad_norm": 10.316941261291504, "learning_rate": 7.668423574508865e-05, "loss": 0.6069, "step": 17782 }, { "epoch": 1.2048919303475845, "grad_norm": 7.116324424743652, "learning_rate": 7.668286672599083e-05, "loss": 0.7976, "step": 17783 }, { "epoch": 1.2049596856155567, "grad_norm": 4.7204365730285645, "learning_rate": 7.668149770689301e-05, "loss": 0.6528, "step": 17784 }, { "epoch": 1.2050274408835286, "grad_norm": 4.601452350616455, "learning_rate": 7.668012868779519e-05, "loss": 0.7411, "step": 17785 }, { "epoch": 1.2050951961515008, "grad_norm": 5.289638519287109, "learning_rate": 7.667875966869739e-05, "loss": 0.7751, "step": 17786 }, { "epoch": 1.2051629514194728, "grad_norm": 4.839632987976074, "learning_rate": 7.667739064959957e-05, "loss": 0.5549, "step": 17787 }, { "epoch": 1.205230706687445, "grad_norm": 5.583590030670166, "learning_rate": 7.667602163050175e-05, "loss": 0.6464, "step": 17788 }, { "epoch": 1.205298461955417, "grad_norm": 7.026573181152344, "learning_rate": 7.667465261140393e-05, "loss": 0.764, "step": 17789 }, { "epoch": 1.2053662172233892, "grad_norm": 6.217905044555664, "learning_rate": 7.667328359230611e-05, "loss": 0.6443, "step": 17790 }, { "epoch": 1.2054339724913612, "grad_norm": 6.040329933166504, "learning_rate": 7.66719145732083e-05, "loss": 0.6784, "step": 17791 }, { "epoch": 1.2055017277593332, "grad_norm": 9.230742454528809, "learning_rate": 7.667054555411048e-05, "loss": 0.6793, "step": 17792 }, { "epoch": 1.2055694830273054, "grad_norm": 8.235578536987305, "learning_rate": 7.666917653501266e-05, "loss": 0.8269, "step": 17793 }, { "epoch": 1.2056372382952776, "grad_norm": 6.351599216461182, "learning_rate": 7.666780751591484e-05, "loss": 0.7285, "step": 17794 }, { "epoch": 1.2057049935632496, "grad_norm": 7.1128973960876465, "learning_rate": 7.666643849681704e-05, "loss": 0.8007, "step": 17795 }, { "epoch": 1.2057727488312215, "grad_norm": 6.171035289764404, "learning_rate": 7.666506947771922e-05, "loss": 0.7373, "step": 17796 }, { "epoch": 1.2058405040991937, "grad_norm": 5.534034729003906, "learning_rate": 7.66637004586214e-05, "loss": 0.6877, "step": 17797 }, { "epoch": 1.2059082593671657, "grad_norm": 9.175405502319336, "learning_rate": 7.666233143952358e-05, "loss": 0.6967, "step": 17798 }, { "epoch": 1.205976014635138, "grad_norm": 5.125663757324219, "learning_rate": 7.666096242042576e-05, "loss": 0.5005, "step": 17799 }, { "epoch": 1.20604376990311, "grad_norm": 6.7034687995910645, "learning_rate": 7.665959340132795e-05, "loss": 0.8659, "step": 17800 }, { "epoch": 1.206111525171082, "grad_norm": 5.710980415344238, "learning_rate": 7.665822438223013e-05, "loss": 0.6413, "step": 17801 }, { "epoch": 1.206179280439054, "grad_norm": 4.896183967590332, "learning_rate": 7.665685536313231e-05, "loss": 0.5167, "step": 17802 }, { "epoch": 1.2062470357070263, "grad_norm": 6.997365474700928, "learning_rate": 7.665548634403449e-05, "loss": 0.743, "step": 17803 }, { "epoch": 1.2063147909749983, "grad_norm": 5.428713798522949, "learning_rate": 7.665411732493669e-05, "loss": 0.7474, "step": 17804 }, { "epoch": 1.2063825462429705, "grad_norm": 4.988559722900391, "learning_rate": 7.665274830583887e-05, "loss": 0.5755, "step": 17805 }, { "epoch": 1.2064503015109425, "grad_norm": 5.562001705169678, "learning_rate": 7.665137928674105e-05, "loss": 0.64, "step": 17806 }, { "epoch": 1.2065180567789144, "grad_norm": 6.794801235198975, "learning_rate": 7.665001026764324e-05, "loss": 0.6635, "step": 17807 }, { "epoch": 1.2065858120468866, "grad_norm": 7.743435382843018, "learning_rate": 7.664864124854542e-05, "loss": 0.6183, "step": 17808 }, { "epoch": 1.2066535673148588, "grad_norm": 5.286825180053711, "learning_rate": 7.66472722294476e-05, "loss": 0.7029, "step": 17809 }, { "epoch": 1.2067213225828308, "grad_norm": 3.8852932453155518, "learning_rate": 7.66459032103498e-05, "loss": 0.5791, "step": 17810 }, { "epoch": 1.2067890778508028, "grad_norm": 10.570182800292969, "learning_rate": 7.664453419125198e-05, "loss": 0.7256, "step": 17811 }, { "epoch": 1.206856833118775, "grad_norm": 6.794580936431885, "learning_rate": 7.664316517215416e-05, "loss": 0.6337, "step": 17812 }, { "epoch": 1.206924588386747, "grad_norm": 6.657121658325195, "learning_rate": 7.664179615305634e-05, "loss": 0.8513, "step": 17813 }, { "epoch": 1.2069923436547192, "grad_norm": 5.735461235046387, "learning_rate": 7.664042713395853e-05, "loss": 0.7064, "step": 17814 }, { "epoch": 1.2070600989226912, "grad_norm": 5.639031887054443, "learning_rate": 7.663905811486071e-05, "loss": 0.7369, "step": 17815 }, { "epoch": 1.2071278541906634, "grad_norm": 7.826430797576904, "learning_rate": 7.663768909576289e-05, "loss": 0.6114, "step": 17816 }, { "epoch": 1.2071956094586354, "grad_norm": 5.544642448425293, "learning_rate": 7.663632007666507e-05, "loss": 0.543, "step": 17817 }, { "epoch": 1.2072633647266076, "grad_norm": 6.61638069152832, "learning_rate": 7.663495105756727e-05, "loss": 0.6926, "step": 17818 }, { "epoch": 1.2073311199945795, "grad_norm": 4.663098335266113, "learning_rate": 7.663358203846945e-05, "loss": 0.7931, "step": 17819 }, { "epoch": 1.2073988752625517, "grad_norm": 6.7196478843688965, "learning_rate": 7.663221301937163e-05, "loss": 0.8295, "step": 17820 }, { "epoch": 1.2074666305305237, "grad_norm": 5.3182196617126465, "learning_rate": 7.663084400027381e-05, "loss": 0.624, "step": 17821 }, { "epoch": 1.207534385798496, "grad_norm": 5.531083106994629, "learning_rate": 7.662947498117599e-05, "loss": 0.4976, "step": 17822 }, { "epoch": 1.207602141066468, "grad_norm": 5.298411846160889, "learning_rate": 7.662810596207818e-05, "loss": 0.6012, "step": 17823 }, { "epoch": 1.20766989633444, "grad_norm": 4.497094631195068, "learning_rate": 7.662673694298036e-05, "loss": 0.6826, "step": 17824 }, { "epoch": 1.207737651602412, "grad_norm": 5.269436836242676, "learning_rate": 7.662536792388254e-05, "loss": 0.6327, "step": 17825 }, { "epoch": 1.207805406870384, "grad_norm": 5.4644646644592285, "learning_rate": 7.662399890478472e-05, "loss": 0.71, "step": 17826 }, { "epoch": 1.2078731621383563, "grad_norm": 5.628913879394531, "learning_rate": 7.662262988568692e-05, "loss": 0.7919, "step": 17827 }, { "epoch": 1.2079409174063283, "grad_norm": 5.46560525894165, "learning_rate": 7.66212608665891e-05, "loss": 0.8439, "step": 17828 }, { "epoch": 1.2080086726743005, "grad_norm": 5.631601810455322, "learning_rate": 7.661989184749128e-05, "loss": 0.6336, "step": 17829 }, { "epoch": 1.2080764279422724, "grad_norm": 5.358332633972168, "learning_rate": 7.661852282839346e-05, "loss": 0.9302, "step": 17830 }, { "epoch": 1.2081441832102446, "grad_norm": 6.63771915435791, "learning_rate": 7.661715380929564e-05, "loss": 0.7401, "step": 17831 }, { "epoch": 1.2082119384782166, "grad_norm": 6.354868412017822, "learning_rate": 7.661578479019783e-05, "loss": 0.7275, "step": 17832 }, { "epoch": 1.2082796937461888, "grad_norm": 7.358850479125977, "learning_rate": 7.661441577110001e-05, "loss": 0.7485, "step": 17833 }, { "epoch": 1.2083474490141608, "grad_norm": 5.550168991088867, "learning_rate": 7.661304675200219e-05, "loss": 0.7116, "step": 17834 }, { "epoch": 1.208415204282133, "grad_norm": 6.618070125579834, "learning_rate": 7.661167773290437e-05, "loss": 0.6558, "step": 17835 }, { "epoch": 1.208482959550105, "grad_norm": 7.177542209625244, "learning_rate": 7.661030871380657e-05, "loss": 0.6931, "step": 17836 }, { "epoch": 1.2085507148180772, "grad_norm": 5.309508800506592, "learning_rate": 7.660893969470875e-05, "loss": 0.6781, "step": 17837 }, { "epoch": 1.2086184700860492, "grad_norm": 5.462770462036133, "learning_rate": 7.660757067561093e-05, "loss": 0.6715, "step": 17838 }, { "epoch": 1.2086862253540214, "grad_norm": 5.621263027191162, "learning_rate": 7.660620165651311e-05, "loss": 0.6802, "step": 17839 }, { "epoch": 1.2087539806219934, "grad_norm": 6.031620979309082, "learning_rate": 7.660483263741529e-05, "loss": 0.708, "step": 17840 }, { "epoch": 1.2088217358899653, "grad_norm": 5.407814979553223, "learning_rate": 7.660346361831748e-05, "loss": 0.6426, "step": 17841 }, { "epoch": 1.2088894911579375, "grad_norm": 5.029693603515625, "learning_rate": 7.660209459921966e-05, "loss": 0.7588, "step": 17842 }, { "epoch": 1.2089572464259097, "grad_norm": 5.002652645111084, "learning_rate": 7.660072558012184e-05, "loss": 0.6632, "step": 17843 }, { "epoch": 1.2090250016938817, "grad_norm": 5.994513988494873, "learning_rate": 7.659935656102402e-05, "loss": 0.8487, "step": 17844 }, { "epoch": 1.2090927569618537, "grad_norm": 6.450139045715332, "learning_rate": 7.65979875419262e-05, "loss": 0.5194, "step": 17845 }, { "epoch": 1.209160512229826, "grad_norm": 6.206781387329102, "learning_rate": 7.65966185228284e-05, "loss": 0.6804, "step": 17846 }, { "epoch": 1.2092282674977979, "grad_norm": 6.784191608428955, "learning_rate": 7.659524950373058e-05, "loss": 0.8282, "step": 17847 }, { "epoch": 1.20929602276577, "grad_norm": 7.1671671867370605, "learning_rate": 7.659388048463276e-05, "loss": 0.8137, "step": 17848 }, { "epoch": 1.209363778033742, "grad_norm": 5.292774200439453, "learning_rate": 7.659251146553494e-05, "loss": 0.5384, "step": 17849 }, { "epoch": 1.2094315333017143, "grad_norm": 6.33740758895874, "learning_rate": 7.659114244643713e-05, "loss": 0.6687, "step": 17850 }, { "epoch": 1.2094992885696862, "grad_norm": 8.038208961486816, "learning_rate": 7.658977342733931e-05, "loss": 0.8412, "step": 17851 }, { "epoch": 1.2095670438376585, "grad_norm": 5.751485347747803, "learning_rate": 7.658840440824149e-05, "loss": 0.845, "step": 17852 }, { "epoch": 1.2096347991056304, "grad_norm": 5.004260540008545, "learning_rate": 7.658703538914369e-05, "loss": 0.594, "step": 17853 }, { "epoch": 1.2097025543736026, "grad_norm": 6.479980945587158, "learning_rate": 7.658566637004587e-05, "loss": 0.7178, "step": 17854 }, { "epoch": 1.2097703096415746, "grad_norm": 5.524226665496826, "learning_rate": 7.658429735094805e-05, "loss": 0.5855, "step": 17855 }, { "epoch": 1.2098380649095466, "grad_norm": 6.297536849975586, "learning_rate": 7.658292833185024e-05, "loss": 0.5762, "step": 17856 }, { "epoch": 1.2099058201775188, "grad_norm": 5.452970504760742, "learning_rate": 7.658155931275242e-05, "loss": 0.6021, "step": 17857 }, { "epoch": 1.209973575445491, "grad_norm": 6.164029121398926, "learning_rate": 7.65801902936546e-05, "loss": 0.834, "step": 17858 }, { "epoch": 1.210041330713463, "grad_norm": 6.116921901702881, "learning_rate": 7.65788212745568e-05, "loss": 0.6033, "step": 17859 }, { "epoch": 1.210109085981435, "grad_norm": 4.469160556793213, "learning_rate": 7.657745225545898e-05, "loss": 0.8679, "step": 17860 }, { "epoch": 1.2101768412494072, "grad_norm": 6.041546821594238, "learning_rate": 7.657608323636116e-05, "loss": 0.5729, "step": 17861 }, { "epoch": 1.2102445965173791, "grad_norm": 5.222907066345215, "learning_rate": 7.657471421726334e-05, "loss": 0.596, "step": 17862 }, { "epoch": 1.2103123517853513, "grad_norm": 6.178247451782227, "learning_rate": 7.657334519816552e-05, "loss": 0.571, "step": 17863 }, { "epoch": 1.2103801070533233, "grad_norm": 5.390204429626465, "learning_rate": 7.657197617906771e-05, "loss": 0.8544, "step": 17864 }, { "epoch": 1.2104478623212955, "grad_norm": 5.556325435638428, "learning_rate": 7.657060715996989e-05, "loss": 0.5408, "step": 17865 }, { "epoch": 1.2105156175892675, "grad_norm": 6.003518581390381, "learning_rate": 7.656923814087207e-05, "loss": 0.9889, "step": 17866 }, { "epoch": 1.2105833728572397, "grad_norm": 8.722765922546387, "learning_rate": 7.656786912177425e-05, "loss": 0.5056, "step": 17867 }, { "epoch": 1.2106511281252117, "grad_norm": 5.819286823272705, "learning_rate": 7.656650010267643e-05, "loss": 0.6311, "step": 17868 }, { "epoch": 1.210718883393184, "grad_norm": 5.321593761444092, "learning_rate": 7.656513108357863e-05, "loss": 0.5492, "step": 17869 }, { "epoch": 1.2107866386611559, "grad_norm": 5.966612339019775, "learning_rate": 7.65637620644808e-05, "loss": 0.801, "step": 17870 }, { "epoch": 1.210854393929128, "grad_norm": 5.289181232452393, "learning_rate": 7.656239304538299e-05, "loss": 0.6989, "step": 17871 }, { "epoch": 1.2109221491971, "grad_norm": 5.957951545715332, "learning_rate": 7.656102402628517e-05, "loss": 0.7726, "step": 17872 }, { "epoch": 1.2109899044650723, "grad_norm": 6.526004314422607, "learning_rate": 7.655965500718736e-05, "loss": 0.8481, "step": 17873 }, { "epoch": 1.2110576597330442, "grad_norm": 8.721756935119629, "learning_rate": 7.655828598808954e-05, "loss": 0.681, "step": 17874 }, { "epoch": 1.2111254150010162, "grad_norm": 5.119085311889648, "learning_rate": 7.655691696899172e-05, "loss": 0.3988, "step": 17875 }, { "epoch": 1.2111931702689884, "grad_norm": 4.622032165527344, "learning_rate": 7.65555479498939e-05, "loss": 0.7325, "step": 17876 }, { "epoch": 1.2112609255369604, "grad_norm": 5.131158828735352, "learning_rate": 7.655417893079608e-05, "loss": 0.58, "step": 17877 }, { "epoch": 1.2113286808049326, "grad_norm": 6.189968585968018, "learning_rate": 7.655280991169828e-05, "loss": 0.9961, "step": 17878 }, { "epoch": 1.2113964360729046, "grad_norm": 5.269293785095215, "learning_rate": 7.655144089260046e-05, "loss": 0.705, "step": 17879 }, { "epoch": 1.2114641913408768, "grad_norm": 4.969742774963379, "learning_rate": 7.655007187350264e-05, "loss": 0.8844, "step": 17880 }, { "epoch": 1.2115319466088488, "grad_norm": 6.767873287200928, "learning_rate": 7.654870285440482e-05, "loss": 1.0077, "step": 17881 }, { "epoch": 1.211599701876821, "grad_norm": 8.03074836730957, "learning_rate": 7.654733383530701e-05, "loss": 0.9055, "step": 17882 }, { "epoch": 1.211667457144793, "grad_norm": 7.549478054046631, "learning_rate": 7.654596481620919e-05, "loss": 0.8107, "step": 17883 }, { "epoch": 1.2117352124127652, "grad_norm": 7.665398597717285, "learning_rate": 7.654459579711137e-05, "loss": 0.5934, "step": 17884 }, { "epoch": 1.2118029676807371, "grad_norm": 7.51274299621582, "learning_rate": 7.654322677801355e-05, "loss": 0.6872, "step": 17885 }, { "epoch": 1.2118707229487093, "grad_norm": 6.046478748321533, "learning_rate": 7.654185775891573e-05, "loss": 0.6458, "step": 17886 }, { "epoch": 1.2119384782166813, "grad_norm": 5.589632511138916, "learning_rate": 7.654048873981793e-05, "loss": 0.7045, "step": 17887 }, { "epoch": 1.2120062334846535, "grad_norm": 7.263500690460205, "learning_rate": 7.65391197207201e-05, "loss": 0.7436, "step": 17888 }, { "epoch": 1.2120739887526255, "grad_norm": 6.018463611602783, "learning_rate": 7.653775070162229e-05, "loss": 0.7441, "step": 17889 }, { "epoch": 1.2121417440205975, "grad_norm": 11.365656852722168, "learning_rate": 7.653638168252447e-05, "loss": 0.7303, "step": 17890 }, { "epoch": 1.2122094992885697, "grad_norm": 6.8662109375, "learning_rate": 7.653501266342666e-05, "loss": 0.9822, "step": 17891 }, { "epoch": 1.212277254556542, "grad_norm": 5.432251930236816, "learning_rate": 7.653364364432884e-05, "loss": 0.718, "step": 17892 }, { "epoch": 1.2123450098245139, "grad_norm": 5.230556488037109, "learning_rate": 7.653227462523102e-05, "loss": 0.5133, "step": 17893 }, { "epoch": 1.2124127650924859, "grad_norm": 5.820934295654297, "learning_rate": 7.65309056061332e-05, "loss": 0.7827, "step": 17894 }, { "epoch": 1.212480520360458, "grad_norm": 7.665424823760986, "learning_rate": 7.652953658703538e-05, "loss": 0.871, "step": 17895 }, { "epoch": 1.21254827562843, "grad_norm": 6.874349594116211, "learning_rate": 7.652816756793758e-05, "loss": 0.7607, "step": 17896 }, { "epoch": 1.2126160308964022, "grad_norm": 4.565303802490234, "learning_rate": 7.652679854883976e-05, "loss": 0.4878, "step": 17897 }, { "epoch": 1.2126837861643742, "grad_norm": 8.851542472839355, "learning_rate": 7.652542952974194e-05, "loss": 0.7029, "step": 17898 }, { "epoch": 1.2127515414323464, "grad_norm": 5.475307464599609, "learning_rate": 7.652406051064413e-05, "loss": 0.7158, "step": 17899 }, { "epoch": 1.2128192967003184, "grad_norm": 5.020805835723877, "learning_rate": 7.652269149154631e-05, "loss": 0.607, "step": 17900 }, { "epoch": 1.2128870519682906, "grad_norm": 4.76190185546875, "learning_rate": 7.652132247244849e-05, "loss": 0.682, "step": 17901 }, { "epoch": 1.2129548072362626, "grad_norm": 5.650835037231445, "learning_rate": 7.651995345335069e-05, "loss": 0.6631, "step": 17902 }, { "epoch": 1.2130225625042348, "grad_norm": 5.15869140625, "learning_rate": 7.651858443425287e-05, "loss": 0.7104, "step": 17903 }, { "epoch": 1.2130903177722068, "grad_norm": 6.4718804359436035, "learning_rate": 7.651721541515505e-05, "loss": 0.828, "step": 17904 }, { "epoch": 1.2131580730401788, "grad_norm": 6.307829856872559, "learning_rate": 7.651584639605724e-05, "loss": 0.9308, "step": 17905 }, { "epoch": 1.213225828308151, "grad_norm": 4.885651111602783, "learning_rate": 7.651447737695942e-05, "loss": 0.8558, "step": 17906 }, { "epoch": 1.2132935835761232, "grad_norm": 7.297476768493652, "learning_rate": 7.65131083578616e-05, "loss": 0.5909, "step": 17907 }, { "epoch": 1.2133613388440951, "grad_norm": 6.080501079559326, "learning_rate": 7.651173933876378e-05, "loss": 0.7439, "step": 17908 }, { "epoch": 1.2134290941120671, "grad_norm": 6.017581939697266, "learning_rate": 7.651037031966596e-05, "loss": 0.6583, "step": 17909 }, { "epoch": 1.2134968493800393, "grad_norm": 7.0170464515686035, "learning_rate": 7.650900130056816e-05, "loss": 0.7311, "step": 17910 }, { "epoch": 1.2135646046480113, "grad_norm": 5.122740268707275, "learning_rate": 7.650763228147034e-05, "loss": 0.7785, "step": 17911 }, { "epoch": 1.2136323599159835, "grad_norm": 4.886419773101807, "learning_rate": 7.650626326237252e-05, "loss": 0.6322, "step": 17912 }, { "epoch": 1.2137001151839555, "grad_norm": 5.377499103546143, "learning_rate": 7.65048942432747e-05, "loss": 0.8079, "step": 17913 }, { "epoch": 1.2137678704519277, "grad_norm": 5.473203182220459, "learning_rate": 7.650352522417689e-05, "loss": 0.6221, "step": 17914 }, { "epoch": 1.2138356257198997, "grad_norm": 6.620959758758545, "learning_rate": 7.650215620507907e-05, "loss": 0.6327, "step": 17915 }, { "epoch": 1.2139033809878719, "grad_norm": 5.501951694488525, "learning_rate": 7.650078718598125e-05, "loss": 0.6191, "step": 17916 }, { "epoch": 1.2139711362558439, "grad_norm": 5.863165378570557, "learning_rate": 7.649941816688343e-05, "loss": 0.4766, "step": 17917 }, { "epoch": 1.214038891523816, "grad_norm": 4.651849269866943, "learning_rate": 7.649804914778561e-05, "loss": 0.8014, "step": 17918 }, { "epoch": 1.214106646791788, "grad_norm": 5.458044528961182, "learning_rate": 7.64966801286878e-05, "loss": 0.6545, "step": 17919 }, { "epoch": 1.2141744020597602, "grad_norm": 6.30871057510376, "learning_rate": 7.649531110958999e-05, "loss": 0.8052, "step": 17920 }, { "epoch": 1.2142421573277322, "grad_norm": 6.08293342590332, "learning_rate": 7.649394209049217e-05, "loss": 0.863, "step": 17921 }, { "epoch": 1.2143099125957044, "grad_norm": 4.689571857452393, "learning_rate": 7.649257307139435e-05, "loss": 0.7528, "step": 17922 }, { "epoch": 1.2143776678636764, "grad_norm": 5.698482513427734, "learning_rate": 7.649120405229653e-05, "loss": 0.7365, "step": 17923 }, { "epoch": 1.2144454231316484, "grad_norm": 6.945766925811768, "learning_rate": 7.648983503319872e-05, "loss": 0.7323, "step": 17924 }, { "epoch": 1.2145131783996206, "grad_norm": 4.9405670166015625, "learning_rate": 7.64884660141009e-05, "loss": 0.5537, "step": 17925 }, { "epoch": 1.2145809336675926, "grad_norm": 6.1244001388549805, "learning_rate": 7.648709699500308e-05, "loss": 0.9281, "step": 17926 }, { "epoch": 1.2146486889355648, "grad_norm": 5.485476970672607, "learning_rate": 7.648572797590526e-05, "loss": 0.7444, "step": 17927 }, { "epoch": 1.2147164442035367, "grad_norm": 6.439229965209961, "learning_rate": 7.648435895680746e-05, "loss": 0.6663, "step": 17928 }, { "epoch": 1.214784199471509, "grad_norm": 4.058414936065674, "learning_rate": 7.648298993770964e-05, "loss": 0.6314, "step": 17929 }, { "epoch": 1.214851954739481, "grad_norm": 4.499625205993652, "learning_rate": 7.648162091861182e-05, "loss": 0.6212, "step": 17930 }, { "epoch": 1.2149197100074531, "grad_norm": 6.286862850189209, "learning_rate": 7.6480251899514e-05, "loss": 0.795, "step": 17931 }, { "epoch": 1.2149874652754251, "grad_norm": 5.967475414276123, "learning_rate": 7.647888288041618e-05, "loss": 0.6406, "step": 17932 }, { "epoch": 1.2150552205433973, "grad_norm": 4.926750659942627, "learning_rate": 7.647751386131837e-05, "loss": 0.8561, "step": 17933 }, { "epoch": 1.2151229758113693, "grad_norm": 4.823459625244141, "learning_rate": 7.647614484222055e-05, "loss": 0.6308, "step": 17934 }, { "epoch": 1.2151907310793415, "grad_norm": 6.4650797843933105, "learning_rate": 7.647477582312273e-05, "loss": 0.9417, "step": 17935 }, { "epoch": 1.2152584863473135, "grad_norm": 6.535785675048828, "learning_rate": 7.647340680402491e-05, "loss": 0.6846, "step": 17936 }, { "epoch": 1.2153262416152857, "grad_norm": 5.6134819984436035, "learning_rate": 7.64720377849271e-05, "loss": 0.8102, "step": 17937 }, { "epoch": 1.2153939968832577, "grad_norm": 8.289133071899414, "learning_rate": 7.647066876582929e-05, "loss": 0.8256, "step": 17938 }, { "epoch": 1.2154617521512296, "grad_norm": 5.382526397705078, "learning_rate": 7.646929974673147e-05, "loss": 0.5605, "step": 17939 }, { "epoch": 1.2155295074192018, "grad_norm": 4.763079643249512, "learning_rate": 7.646793072763365e-05, "loss": 0.4803, "step": 17940 }, { "epoch": 1.215597262687174, "grad_norm": 4.224546432495117, "learning_rate": 7.646656170853583e-05, "loss": 0.4886, "step": 17941 }, { "epoch": 1.215665017955146, "grad_norm": 7.985034465789795, "learning_rate": 7.646519268943802e-05, "loss": 0.6896, "step": 17942 }, { "epoch": 1.215732773223118, "grad_norm": 7.068222522735596, "learning_rate": 7.64638236703402e-05, "loss": 0.7415, "step": 17943 }, { "epoch": 1.2158005284910902, "grad_norm": 5.369699001312256, "learning_rate": 7.646245465124238e-05, "loss": 0.7562, "step": 17944 }, { "epoch": 1.2158682837590622, "grad_norm": 5.549074172973633, "learning_rate": 7.646108563214456e-05, "loss": 0.6803, "step": 17945 }, { "epoch": 1.2159360390270344, "grad_norm": 5.730227947235107, "learning_rate": 7.645971661304676e-05, "loss": 0.5862, "step": 17946 }, { "epoch": 1.2160037942950064, "grad_norm": 5.7196478843688965, "learning_rate": 7.645834759394894e-05, "loss": 0.4924, "step": 17947 }, { "epoch": 1.2160715495629786, "grad_norm": 6.75718355178833, "learning_rate": 7.645697857485112e-05, "loss": 0.7233, "step": 17948 }, { "epoch": 1.2161393048309506, "grad_norm": 8.140740394592285, "learning_rate": 7.645560955575331e-05, "loss": 0.706, "step": 17949 }, { "epoch": 1.2162070600989228, "grad_norm": 9.06883716583252, "learning_rate": 7.645424053665549e-05, "loss": 0.5888, "step": 17950 }, { "epoch": 1.2162748153668947, "grad_norm": 6.231507301330566, "learning_rate": 7.645287151755767e-05, "loss": 0.6705, "step": 17951 }, { "epoch": 1.216342570634867, "grad_norm": 6.362677097320557, "learning_rate": 7.645150249845987e-05, "loss": 0.696, "step": 17952 }, { "epoch": 1.216410325902839, "grad_norm": 5.278102397918701, "learning_rate": 7.645013347936205e-05, "loss": 0.612, "step": 17953 }, { "epoch": 1.216478081170811, "grad_norm": 5.913932800292969, "learning_rate": 7.644876446026423e-05, "loss": 0.8063, "step": 17954 }, { "epoch": 1.2165458364387831, "grad_norm": 6.743002891540527, "learning_rate": 7.64473954411664e-05, "loss": 0.6755, "step": 17955 }, { "epoch": 1.2166135917067553, "grad_norm": 6.559525012969971, "learning_rate": 7.64460264220686e-05, "loss": 0.7889, "step": 17956 }, { "epoch": 1.2166813469747273, "grad_norm": 7.0902228355407715, "learning_rate": 7.644465740297078e-05, "loss": 0.5968, "step": 17957 }, { "epoch": 1.2167491022426993, "grad_norm": 5.9385576248168945, "learning_rate": 7.644328838387296e-05, "loss": 0.7367, "step": 17958 }, { "epoch": 1.2168168575106715, "grad_norm": 5.945615768432617, "learning_rate": 7.644191936477514e-05, "loss": 0.6871, "step": 17959 }, { "epoch": 1.2168846127786435, "grad_norm": 5.105138778686523, "learning_rate": 7.644055034567734e-05, "loss": 0.6814, "step": 17960 }, { "epoch": 1.2169523680466157, "grad_norm": 7.892396926879883, "learning_rate": 7.643918132657952e-05, "loss": 0.7212, "step": 17961 }, { "epoch": 1.2170201233145876, "grad_norm": 7.983614444732666, "learning_rate": 7.64378123074817e-05, "loss": 0.6391, "step": 17962 }, { "epoch": 1.2170878785825598, "grad_norm": 5.618093013763428, "learning_rate": 7.643644328838388e-05, "loss": 0.7687, "step": 17963 }, { "epoch": 1.2171556338505318, "grad_norm": 7.337813854217529, "learning_rate": 7.643507426928606e-05, "loss": 0.6703, "step": 17964 }, { "epoch": 1.217223389118504, "grad_norm": 4.311225891113281, "learning_rate": 7.643370525018825e-05, "loss": 0.6361, "step": 17965 }, { "epoch": 1.217291144386476, "grad_norm": 6.524477958679199, "learning_rate": 7.643233623109043e-05, "loss": 0.7418, "step": 17966 }, { "epoch": 1.2173588996544482, "grad_norm": 6.941540241241455, "learning_rate": 7.643096721199261e-05, "loss": 0.5442, "step": 17967 }, { "epoch": 1.2174266549224202, "grad_norm": 5.137349605560303, "learning_rate": 7.642959819289479e-05, "loss": 0.7523, "step": 17968 }, { "epoch": 1.2174944101903924, "grad_norm": 5.285187721252441, "learning_rate": 7.642822917379699e-05, "loss": 0.5724, "step": 17969 }, { "epoch": 1.2175621654583644, "grad_norm": 6.266340255737305, "learning_rate": 7.642686015469917e-05, "loss": 0.6546, "step": 17970 }, { "epoch": 1.2176299207263366, "grad_norm": 5.239572525024414, "learning_rate": 7.642549113560135e-05, "loss": 0.6406, "step": 17971 }, { "epoch": 1.2176976759943086, "grad_norm": 6.736250400543213, "learning_rate": 7.642412211650353e-05, "loss": 1.0865, "step": 17972 }, { "epoch": 1.2177654312622805, "grad_norm": 8.947715759277344, "learning_rate": 7.64227530974057e-05, "loss": 0.5183, "step": 17973 }, { "epoch": 1.2178331865302527, "grad_norm": 8.303337097167969, "learning_rate": 7.64213840783079e-05, "loss": 1.1634, "step": 17974 }, { "epoch": 1.2179009417982247, "grad_norm": 6.613880634307861, "learning_rate": 7.642001505921008e-05, "loss": 0.7042, "step": 17975 }, { "epoch": 1.217968697066197, "grad_norm": 6.535386085510254, "learning_rate": 7.641864604011226e-05, "loss": 0.7257, "step": 17976 }, { "epoch": 1.218036452334169, "grad_norm": 6.180886745452881, "learning_rate": 7.641727702101444e-05, "loss": 0.6752, "step": 17977 }, { "epoch": 1.218104207602141, "grad_norm": 5.949535369873047, "learning_rate": 7.641590800191662e-05, "loss": 0.7168, "step": 17978 }, { "epoch": 1.218171962870113, "grad_norm": 7.5198283195495605, "learning_rate": 7.641453898281882e-05, "loss": 0.8332, "step": 17979 }, { "epoch": 1.2182397181380853, "grad_norm": 6.259788990020752, "learning_rate": 7.6413169963721e-05, "loss": 0.7097, "step": 17980 }, { "epoch": 1.2183074734060573, "grad_norm": 5.875858306884766, "learning_rate": 7.641180094462318e-05, "loss": 0.6978, "step": 17981 }, { "epoch": 1.2183752286740295, "grad_norm": 7.679528713226318, "learning_rate": 7.641043192552536e-05, "loss": 0.6878, "step": 17982 }, { "epoch": 1.2184429839420015, "grad_norm": 5.411177635192871, "learning_rate": 7.640906290642755e-05, "loss": 0.6506, "step": 17983 }, { "epoch": 1.2185107392099737, "grad_norm": 5.739083766937256, "learning_rate": 7.640769388732973e-05, "loss": 0.689, "step": 17984 }, { "epoch": 1.2185784944779456, "grad_norm": 6.422370910644531, "learning_rate": 7.640632486823191e-05, "loss": 0.7599, "step": 17985 }, { "epoch": 1.2186462497459178, "grad_norm": 5.531982421875, "learning_rate": 7.640495584913409e-05, "loss": 0.7083, "step": 17986 }, { "epoch": 1.2187140050138898, "grad_norm": 9.015816688537598, "learning_rate": 7.640358683003627e-05, "loss": 0.8653, "step": 17987 }, { "epoch": 1.2187817602818618, "grad_norm": 7.057861804962158, "learning_rate": 7.640221781093847e-05, "loss": 0.5141, "step": 17988 }, { "epoch": 1.218849515549834, "grad_norm": 7.158889293670654, "learning_rate": 7.640084879184065e-05, "loss": 0.6141, "step": 17989 }, { "epoch": 1.2189172708178062, "grad_norm": 7.1165771484375, "learning_rate": 7.639947977274283e-05, "loss": 0.7902, "step": 17990 }, { "epoch": 1.2189850260857782, "grad_norm": 6.267188549041748, "learning_rate": 7.639811075364501e-05, "loss": 0.5597, "step": 17991 }, { "epoch": 1.2190527813537502, "grad_norm": 5.786649227142334, "learning_rate": 7.63967417345472e-05, "loss": 0.6786, "step": 17992 }, { "epoch": 1.2191205366217224, "grad_norm": 4.65104341506958, "learning_rate": 7.639537271544938e-05, "loss": 0.5413, "step": 17993 }, { "epoch": 1.2191882918896944, "grad_norm": 6.078935623168945, "learning_rate": 7.639400369635156e-05, "loss": 0.7329, "step": 17994 }, { "epoch": 1.2192560471576666, "grad_norm": 5.8307342529296875, "learning_rate": 7.639263467725376e-05, "loss": 0.7237, "step": 17995 }, { "epoch": 1.2193238024256385, "grad_norm": 5.22831392288208, "learning_rate": 7.639126565815594e-05, "loss": 0.6605, "step": 17996 }, { "epoch": 1.2193915576936107, "grad_norm": 6.974266529083252, "learning_rate": 7.638989663905812e-05, "loss": 0.7335, "step": 17997 }, { "epoch": 1.2194593129615827, "grad_norm": 5.890809059143066, "learning_rate": 7.638852761996031e-05, "loss": 0.8327, "step": 17998 }, { "epoch": 1.219527068229555, "grad_norm": 7.678559303283691, "learning_rate": 7.638715860086249e-05, "loss": 0.7564, "step": 17999 }, { "epoch": 1.219594823497527, "grad_norm": 4.4165191650390625, "learning_rate": 7.638578958176467e-05, "loss": 0.6001, "step": 18000 }, { "epoch": 1.219662578765499, "grad_norm": 7.2417731285095215, "learning_rate": 7.638442056266685e-05, "loss": 0.8368, "step": 18001 }, { "epoch": 1.219730334033471, "grad_norm": 5.695639133453369, "learning_rate": 7.638305154356905e-05, "loss": 0.7352, "step": 18002 }, { "epoch": 1.219798089301443, "grad_norm": 9.774930953979492, "learning_rate": 7.638168252447123e-05, "loss": 0.6938, "step": 18003 }, { "epoch": 1.2198658445694153, "grad_norm": 7.178581714630127, "learning_rate": 7.63803135053734e-05, "loss": 0.5617, "step": 18004 }, { "epoch": 1.2199335998373875, "grad_norm": 4.547571182250977, "learning_rate": 7.637894448627559e-05, "loss": 0.6759, "step": 18005 }, { "epoch": 1.2200013551053595, "grad_norm": 6.263281345367432, "learning_rate": 7.637757546717778e-05, "loss": 0.7855, "step": 18006 }, { "epoch": 1.2200691103733314, "grad_norm": 6.402713298797607, "learning_rate": 7.637620644807996e-05, "loss": 0.6332, "step": 18007 }, { "epoch": 1.2201368656413036, "grad_norm": 7.917328834533691, "learning_rate": 7.637483742898214e-05, "loss": 0.8464, "step": 18008 }, { "epoch": 1.2202046209092756, "grad_norm": 6.9080891609191895, "learning_rate": 7.637346840988432e-05, "loss": 0.6566, "step": 18009 }, { "epoch": 1.2202723761772478, "grad_norm": 5.2827467918396, "learning_rate": 7.63720993907865e-05, "loss": 0.8047, "step": 18010 }, { "epoch": 1.2203401314452198, "grad_norm": 8.067967414855957, "learning_rate": 7.63707303716887e-05, "loss": 0.6424, "step": 18011 }, { "epoch": 1.220407886713192, "grad_norm": 5.451672554016113, "learning_rate": 7.636936135259088e-05, "loss": 0.671, "step": 18012 }, { "epoch": 1.220475641981164, "grad_norm": 8.954378128051758, "learning_rate": 7.636799233349306e-05, "loss": 0.8216, "step": 18013 }, { "epoch": 1.2205433972491362, "grad_norm": 4.8422651290893555, "learning_rate": 7.636662331439524e-05, "loss": 0.4488, "step": 18014 }, { "epoch": 1.2206111525171082, "grad_norm": 7.705546855926514, "learning_rate": 7.636525429529743e-05, "loss": 0.7685, "step": 18015 }, { "epoch": 1.2206789077850804, "grad_norm": 7.396956920623779, "learning_rate": 7.636388527619961e-05, "loss": 0.6365, "step": 18016 }, { "epoch": 1.2207466630530523, "grad_norm": 5.974605083465576, "learning_rate": 7.636251625710179e-05, "loss": 0.5927, "step": 18017 }, { "epoch": 1.2208144183210246, "grad_norm": 5.919744968414307, "learning_rate": 7.636114723800397e-05, "loss": 0.5308, "step": 18018 }, { "epoch": 1.2208821735889965, "grad_norm": 5.136290550231934, "learning_rate": 7.635977821890615e-05, "loss": 0.5941, "step": 18019 }, { "epoch": 1.2209499288569687, "grad_norm": 4.4463210105896, "learning_rate": 7.635840919980835e-05, "loss": 0.4817, "step": 18020 }, { "epoch": 1.2210176841249407, "grad_norm": 6.176875114440918, "learning_rate": 7.635704018071053e-05, "loss": 0.6398, "step": 18021 }, { "epoch": 1.2210854393929127, "grad_norm": 5.770840167999268, "learning_rate": 7.63556711616127e-05, "loss": 0.6786, "step": 18022 }, { "epoch": 1.221153194660885, "grad_norm": 6.276705741882324, "learning_rate": 7.635430214251489e-05, "loss": 0.8613, "step": 18023 }, { "epoch": 1.2212209499288569, "grad_norm": 4.476863384246826, "learning_rate": 7.635293312341708e-05, "loss": 0.6629, "step": 18024 }, { "epoch": 1.221288705196829, "grad_norm": 6.741992950439453, "learning_rate": 7.635156410431926e-05, "loss": 0.8168, "step": 18025 }, { "epoch": 1.221356460464801, "grad_norm": 6.626770973205566, "learning_rate": 7.635019508522144e-05, "loss": 0.6608, "step": 18026 }, { "epoch": 1.2214242157327733, "grad_norm": 6.388763427734375, "learning_rate": 7.634882606612362e-05, "loss": 0.7622, "step": 18027 }, { "epoch": 1.2214919710007452, "grad_norm": 7.007261276245117, "learning_rate": 7.63474570470258e-05, "loss": 0.8817, "step": 18028 }, { "epoch": 1.2215597262687174, "grad_norm": 4.994833469390869, "learning_rate": 7.6346088027928e-05, "loss": 0.525, "step": 18029 }, { "epoch": 1.2216274815366894, "grad_norm": 5.362818717956543, "learning_rate": 7.634471900883018e-05, "loss": 0.6027, "step": 18030 }, { "epoch": 1.2216952368046616, "grad_norm": 6.882465362548828, "learning_rate": 7.634334998973236e-05, "loss": 0.476, "step": 18031 }, { "epoch": 1.2217629920726336, "grad_norm": 8.69393253326416, "learning_rate": 7.634198097063454e-05, "loss": 0.6321, "step": 18032 }, { "epoch": 1.2218307473406058, "grad_norm": 5.9786529541015625, "learning_rate": 7.634061195153672e-05, "loss": 0.7933, "step": 18033 }, { "epoch": 1.2218985026085778, "grad_norm": 5.898993968963623, "learning_rate": 7.633924293243891e-05, "loss": 0.6235, "step": 18034 }, { "epoch": 1.22196625787655, "grad_norm": 4.783423900604248, "learning_rate": 7.633787391334109e-05, "loss": 0.5224, "step": 18035 }, { "epoch": 1.222034013144522, "grad_norm": 7.560459613800049, "learning_rate": 7.633650489424327e-05, "loss": 0.8104, "step": 18036 }, { "epoch": 1.222101768412494, "grad_norm": 4.811002731323242, "learning_rate": 7.633513587514545e-05, "loss": 0.7236, "step": 18037 }, { "epoch": 1.2221695236804662, "grad_norm": 6.19532585144043, "learning_rate": 7.633376685604765e-05, "loss": 0.8387, "step": 18038 }, { "epoch": 1.2222372789484384, "grad_norm": 5.872559547424316, "learning_rate": 7.633239783694983e-05, "loss": 0.7077, "step": 18039 }, { "epoch": 1.2223050342164103, "grad_norm": 4.4188232421875, "learning_rate": 7.6331028817852e-05, "loss": 0.5792, "step": 18040 }, { "epoch": 1.2223727894843823, "grad_norm": 5.051517009735107, "learning_rate": 7.63296597987542e-05, "loss": 0.6725, "step": 18041 }, { "epoch": 1.2224405447523545, "grad_norm": 5.528559684753418, "learning_rate": 7.632829077965638e-05, "loss": 0.8271, "step": 18042 }, { "epoch": 1.2225083000203265, "grad_norm": 6.110957145690918, "learning_rate": 7.632692176055856e-05, "loss": 0.7396, "step": 18043 }, { "epoch": 1.2225760552882987, "grad_norm": 9.123190879821777, "learning_rate": 7.632555274146075e-05, "loss": 0.636, "step": 18044 }, { "epoch": 1.2226438105562707, "grad_norm": 6.206221103668213, "learning_rate": 7.632418372236294e-05, "loss": 0.645, "step": 18045 }, { "epoch": 1.222711565824243, "grad_norm": 8.248867988586426, "learning_rate": 7.632281470326512e-05, "loss": 0.7355, "step": 18046 }, { "epoch": 1.2227793210922149, "grad_norm": 5.231109619140625, "learning_rate": 7.632144568416731e-05, "loss": 0.5745, "step": 18047 }, { "epoch": 1.222847076360187, "grad_norm": 5.008121967315674, "learning_rate": 7.632007666506949e-05, "loss": 0.6544, "step": 18048 }, { "epoch": 1.222914831628159, "grad_norm": 5.843269348144531, "learning_rate": 7.631870764597167e-05, "loss": 0.6308, "step": 18049 }, { "epoch": 1.2229825868961313, "grad_norm": 4.995326042175293, "learning_rate": 7.631733862687385e-05, "loss": 0.7161, "step": 18050 }, { "epoch": 1.2230503421641032, "grad_norm": 5.701000213623047, "learning_rate": 7.631596960777603e-05, "loss": 0.6209, "step": 18051 }, { "epoch": 1.2231180974320752, "grad_norm": 5.274313926696777, "learning_rate": 7.631460058867822e-05, "loss": 0.6137, "step": 18052 }, { "epoch": 1.2231858527000474, "grad_norm": 4.914982318878174, "learning_rate": 7.63132315695804e-05, "loss": 0.7078, "step": 18053 }, { "epoch": 1.2232536079680196, "grad_norm": 4.857698917388916, "learning_rate": 7.631186255048259e-05, "loss": 0.717, "step": 18054 }, { "epoch": 1.2233213632359916, "grad_norm": 7.02639627456665, "learning_rate": 7.631049353138477e-05, "loss": 0.8204, "step": 18055 }, { "epoch": 1.2233891185039636, "grad_norm": 7.193076133728027, "learning_rate": 7.630912451228695e-05, "loss": 0.6439, "step": 18056 }, { "epoch": 1.2234568737719358, "grad_norm": 5.445677757263184, "learning_rate": 7.630775549318914e-05, "loss": 0.656, "step": 18057 }, { "epoch": 1.2235246290399078, "grad_norm": 9.708181381225586, "learning_rate": 7.630638647409132e-05, "loss": 0.5955, "step": 18058 }, { "epoch": 1.22359238430788, "grad_norm": 6.708062171936035, "learning_rate": 7.63050174549935e-05, "loss": 0.7779, "step": 18059 }, { "epoch": 1.223660139575852, "grad_norm": 4.14343786239624, "learning_rate": 7.630364843589568e-05, "loss": 0.6535, "step": 18060 }, { "epoch": 1.2237278948438242, "grad_norm": 5.566597938537598, "learning_rate": 7.630227941679787e-05, "loss": 0.6028, "step": 18061 }, { "epoch": 1.2237956501117961, "grad_norm": 4.511200904846191, "learning_rate": 7.630091039770006e-05, "loss": 0.6257, "step": 18062 }, { "epoch": 1.2238634053797683, "grad_norm": 5.272220611572266, "learning_rate": 7.629954137860224e-05, "loss": 0.7351, "step": 18063 }, { "epoch": 1.2239311606477403, "grad_norm": 5.659876823425293, "learning_rate": 7.629817235950442e-05, "loss": 0.7477, "step": 18064 }, { "epoch": 1.2239989159157125, "grad_norm": 7.069100379943848, "learning_rate": 7.62968033404066e-05, "loss": 0.73, "step": 18065 }, { "epoch": 1.2240666711836845, "grad_norm": 5.5793304443359375, "learning_rate": 7.629543432130879e-05, "loss": 0.8045, "step": 18066 }, { "epoch": 1.2241344264516567, "grad_norm": 5.751017093658447, "learning_rate": 7.629406530221097e-05, "loss": 0.6574, "step": 18067 }, { "epoch": 1.2242021817196287, "grad_norm": 4.8076701164245605, "learning_rate": 7.629269628311315e-05, "loss": 0.6907, "step": 18068 }, { "epoch": 1.224269936987601, "grad_norm": 7.757662773132324, "learning_rate": 7.629132726401533e-05, "loss": 0.7141, "step": 18069 }, { "epoch": 1.2243376922555729, "grad_norm": 5.999941349029541, "learning_rate": 7.628995824491753e-05, "loss": 0.808, "step": 18070 }, { "epoch": 1.2244054475235449, "grad_norm": 5.947088718414307, "learning_rate": 7.62885892258197e-05, "loss": 0.6211, "step": 18071 }, { "epoch": 1.224473202791517, "grad_norm": 4.921745300292969, "learning_rate": 7.628722020672189e-05, "loss": 0.7553, "step": 18072 }, { "epoch": 1.224540958059489, "grad_norm": 4.209071636199951, "learning_rate": 7.628585118762407e-05, "loss": 0.4931, "step": 18073 }, { "epoch": 1.2246087133274612, "grad_norm": 4.7868170738220215, "learning_rate": 7.628448216852625e-05, "loss": 0.5719, "step": 18074 }, { "epoch": 1.2246764685954332, "grad_norm": 5.3280158042907715, "learning_rate": 7.628311314942844e-05, "loss": 0.573, "step": 18075 }, { "epoch": 1.2247442238634054, "grad_norm": 5.4629716873168945, "learning_rate": 7.628174413033062e-05, "loss": 0.5231, "step": 18076 }, { "epoch": 1.2248119791313774, "grad_norm": 4.5787787437438965, "learning_rate": 7.62803751112328e-05, "loss": 0.5587, "step": 18077 }, { "epoch": 1.2248797343993496, "grad_norm": 4.548202037811279, "learning_rate": 7.627900609213498e-05, "loss": 0.5094, "step": 18078 }, { "epoch": 1.2249474896673216, "grad_norm": 6.067834377288818, "learning_rate": 7.627763707303716e-05, "loss": 0.6347, "step": 18079 }, { "epoch": 1.2250152449352938, "grad_norm": 6.163285255432129, "learning_rate": 7.627626805393936e-05, "loss": 0.8042, "step": 18080 }, { "epoch": 1.2250830002032658, "grad_norm": 6.52103853225708, "learning_rate": 7.627489903484154e-05, "loss": 0.7237, "step": 18081 }, { "epoch": 1.225150755471238, "grad_norm": 5.5198893547058105, "learning_rate": 7.627353001574372e-05, "loss": 0.7413, "step": 18082 }, { "epoch": 1.22521851073921, "grad_norm": 5.332125186920166, "learning_rate": 7.62721609966459e-05, "loss": 0.7945, "step": 18083 }, { "epoch": 1.2252862660071822, "grad_norm": 6.892205238342285, "learning_rate": 7.627079197754809e-05, "loss": 0.7906, "step": 18084 }, { "epoch": 1.2253540212751541, "grad_norm": 4.1680908203125, "learning_rate": 7.626942295845027e-05, "loss": 0.6408, "step": 18085 }, { "epoch": 1.2254217765431261, "grad_norm": 8.176078796386719, "learning_rate": 7.626805393935245e-05, "loss": 0.6947, "step": 18086 }, { "epoch": 1.2254895318110983, "grad_norm": 6.414756774902344, "learning_rate": 7.626668492025465e-05, "loss": 0.7691, "step": 18087 }, { "epoch": 1.2255572870790703, "grad_norm": 5.7930474281311035, "learning_rate": 7.626531590115683e-05, "loss": 0.7177, "step": 18088 }, { "epoch": 1.2256250423470425, "grad_norm": 5.2728271484375, "learning_rate": 7.6263946882059e-05, "loss": 0.6034, "step": 18089 }, { "epoch": 1.2256927976150145, "grad_norm": 5.549907684326172, "learning_rate": 7.62625778629612e-05, "loss": 0.6401, "step": 18090 }, { "epoch": 1.2257605528829867, "grad_norm": 6.938556671142578, "learning_rate": 7.626120884386338e-05, "loss": 0.5395, "step": 18091 }, { "epoch": 1.2258283081509587, "grad_norm": 5.350551128387451, "learning_rate": 7.625983982476556e-05, "loss": 0.716, "step": 18092 }, { "epoch": 1.2258960634189309, "grad_norm": 4.515890598297119, "learning_rate": 7.625847080566775e-05, "loss": 0.5628, "step": 18093 }, { "epoch": 1.2259638186869029, "grad_norm": 5.260360240936279, "learning_rate": 7.625710178656993e-05, "loss": 0.7814, "step": 18094 }, { "epoch": 1.226031573954875, "grad_norm": 4.987314224243164, "learning_rate": 7.625573276747211e-05, "loss": 0.9556, "step": 18095 }, { "epoch": 1.226099329222847, "grad_norm": 6.4795241355896, "learning_rate": 7.62543637483743e-05, "loss": 0.7433, "step": 18096 }, { "epoch": 1.2261670844908192, "grad_norm": 4.585717678070068, "learning_rate": 7.625299472927648e-05, "loss": 0.5516, "step": 18097 }, { "epoch": 1.2262348397587912, "grad_norm": 5.807083606719971, "learning_rate": 7.625162571017867e-05, "loss": 1.0551, "step": 18098 }, { "epoch": 1.2263025950267634, "grad_norm": 7.647436141967773, "learning_rate": 7.625025669108085e-05, "loss": 0.5785, "step": 18099 }, { "epoch": 1.2263703502947354, "grad_norm": 5.377509117126465, "learning_rate": 7.624888767198303e-05, "loss": 0.5938, "step": 18100 }, { "epoch": 1.2264381055627074, "grad_norm": 7.228111743927002, "learning_rate": 7.624751865288521e-05, "loss": 0.6299, "step": 18101 }, { "epoch": 1.2265058608306796, "grad_norm": 6.495123386383057, "learning_rate": 7.62461496337874e-05, "loss": 0.7236, "step": 18102 }, { "epoch": 1.2265736160986518, "grad_norm": 5.612201690673828, "learning_rate": 7.624478061468958e-05, "loss": 0.8444, "step": 18103 }, { "epoch": 1.2266413713666238, "grad_norm": 6.513780117034912, "learning_rate": 7.624341159559177e-05, "loss": 0.8117, "step": 18104 }, { "epoch": 1.2267091266345957, "grad_norm": 6.759029388427734, "learning_rate": 7.624204257649395e-05, "loss": 0.9207, "step": 18105 }, { "epoch": 1.226776881902568, "grad_norm": 5.229630470275879, "learning_rate": 7.624067355739613e-05, "loss": 0.718, "step": 18106 }, { "epoch": 1.22684463717054, "grad_norm": 5.184760570526123, "learning_rate": 7.623930453829832e-05, "loss": 0.5565, "step": 18107 }, { "epoch": 1.2269123924385121, "grad_norm": 5.323483943939209, "learning_rate": 7.62379355192005e-05, "loss": 0.5649, "step": 18108 }, { "epoch": 1.2269801477064841, "grad_norm": 5.713914394378662, "learning_rate": 7.623656650010268e-05, "loss": 1.0169, "step": 18109 }, { "epoch": 1.2270479029744563, "grad_norm": 6.060333728790283, "learning_rate": 7.623519748100486e-05, "loss": 0.5474, "step": 18110 }, { "epoch": 1.2271156582424283, "grad_norm": 7.35471248626709, "learning_rate": 7.623382846190704e-05, "loss": 0.9568, "step": 18111 }, { "epoch": 1.2271834135104005, "grad_norm": 5.508981227874756, "learning_rate": 7.623245944280923e-05, "loss": 0.8809, "step": 18112 }, { "epoch": 1.2272511687783725, "grad_norm": 6.507081031799316, "learning_rate": 7.623109042371142e-05, "loss": 0.6141, "step": 18113 }, { "epoch": 1.2273189240463447, "grad_norm": 4.551576614379883, "learning_rate": 7.62297214046136e-05, "loss": 0.5581, "step": 18114 }, { "epoch": 1.2273866793143167, "grad_norm": 5.739194393157959, "learning_rate": 7.622835238551578e-05, "loss": 0.563, "step": 18115 }, { "epoch": 1.2274544345822889, "grad_norm": 7.448077201843262, "learning_rate": 7.622698336641797e-05, "loss": 0.8644, "step": 18116 }, { "epoch": 1.2275221898502608, "grad_norm": 4.7695770263671875, "learning_rate": 7.622561434732015e-05, "loss": 0.6372, "step": 18117 }, { "epoch": 1.227589945118233, "grad_norm": 5.259091854095459, "learning_rate": 7.622424532822233e-05, "loss": 0.7068, "step": 18118 }, { "epoch": 1.227657700386205, "grad_norm": 4.840086936950684, "learning_rate": 7.622287630912451e-05, "loss": 0.668, "step": 18119 }, { "epoch": 1.227725455654177, "grad_norm": 7.2845282554626465, "learning_rate": 7.622150729002669e-05, "loss": 0.5841, "step": 18120 }, { "epoch": 1.2277932109221492, "grad_norm": 5.2438249588012695, "learning_rate": 7.622013827092889e-05, "loss": 0.5784, "step": 18121 }, { "epoch": 1.2278609661901212, "grad_norm": 5.425882816314697, "learning_rate": 7.621876925183107e-05, "loss": 0.4798, "step": 18122 }, { "epoch": 1.2279287214580934, "grad_norm": 4.423764705657959, "learning_rate": 7.621740023273325e-05, "loss": 0.5684, "step": 18123 }, { "epoch": 1.2279964767260654, "grad_norm": 6.794591903686523, "learning_rate": 7.621603121363543e-05, "loss": 0.3746, "step": 18124 }, { "epoch": 1.2280642319940376, "grad_norm": 4.884415626525879, "learning_rate": 7.621466219453762e-05, "loss": 0.6326, "step": 18125 }, { "epoch": 1.2281319872620096, "grad_norm": 4.998366832733154, "learning_rate": 7.62132931754398e-05, "loss": 0.6967, "step": 18126 }, { "epoch": 1.2281997425299818, "grad_norm": 5.243693828582764, "learning_rate": 7.621192415634198e-05, "loss": 0.8389, "step": 18127 }, { "epoch": 1.2282674977979537, "grad_norm": 5.258869171142578, "learning_rate": 7.621055513724416e-05, "loss": 0.6824, "step": 18128 }, { "epoch": 1.228335253065926, "grad_norm": 6.289614677429199, "learning_rate": 7.620918611814634e-05, "loss": 0.9113, "step": 18129 }, { "epoch": 1.228403008333898, "grad_norm": 4.787515163421631, "learning_rate": 7.620781709904854e-05, "loss": 0.6163, "step": 18130 }, { "epoch": 1.2284707636018701, "grad_norm": 6.306053161621094, "learning_rate": 7.620644807995072e-05, "loss": 0.6851, "step": 18131 }, { "epoch": 1.2285385188698421, "grad_norm": 5.287718772888184, "learning_rate": 7.62050790608529e-05, "loss": 0.4938, "step": 18132 }, { "epoch": 1.2286062741378143, "grad_norm": 7.48237943649292, "learning_rate": 7.620371004175509e-05, "loss": 0.7323, "step": 18133 }, { "epoch": 1.2286740294057863, "grad_norm": 5.759404182434082, "learning_rate": 7.620234102265727e-05, "loss": 0.6837, "step": 18134 }, { "epoch": 1.2287417846737583, "grad_norm": 6.255051136016846, "learning_rate": 7.620097200355945e-05, "loss": 0.7534, "step": 18135 }, { "epoch": 1.2288095399417305, "grad_norm": 6.460224628448486, "learning_rate": 7.619960298446164e-05, "loss": 0.813, "step": 18136 }, { "epoch": 1.2288772952097025, "grad_norm": 6.245341777801514, "learning_rate": 7.619823396536382e-05, "loss": 0.8548, "step": 18137 }, { "epoch": 1.2289450504776747, "grad_norm": 7.165752410888672, "learning_rate": 7.6196864946266e-05, "loss": 0.7247, "step": 18138 }, { "epoch": 1.2290128057456466, "grad_norm": 5.944732666015625, "learning_rate": 7.61954959271682e-05, "loss": 0.7436, "step": 18139 }, { "epoch": 1.2290805610136188, "grad_norm": 5.928808212280273, "learning_rate": 7.619412690807038e-05, "loss": 0.6332, "step": 18140 }, { "epoch": 1.2291483162815908, "grad_norm": 7.637009143829346, "learning_rate": 7.619275788897256e-05, "loss": 0.6698, "step": 18141 }, { "epoch": 1.229216071549563, "grad_norm": 6.397332668304443, "learning_rate": 7.619138886987474e-05, "loss": 0.5942, "step": 18142 }, { "epoch": 1.229283826817535, "grad_norm": 7.642723560333252, "learning_rate": 7.619001985077692e-05, "loss": 0.6061, "step": 18143 }, { "epoch": 1.2293515820855072, "grad_norm": 4.819983959197998, "learning_rate": 7.618865083167911e-05, "loss": 0.6466, "step": 18144 }, { "epoch": 1.2294193373534792, "grad_norm": 4.973183631896973, "learning_rate": 7.61872818125813e-05, "loss": 0.5931, "step": 18145 }, { "epoch": 1.2294870926214514, "grad_norm": 5.909261703491211, "learning_rate": 7.618591279348347e-05, "loss": 0.5146, "step": 18146 }, { "epoch": 1.2295548478894234, "grad_norm": 4.776844501495361, "learning_rate": 7.618454377438566e-05, "loss": 0.5519, "step": 18147 }, { "epoch": 1.2296226031573956, "grad_norm": 4.408102512359619, "learning_rate": 7.618317475528785e-05, "loss": 0.488, "step": 18148 }, { "epoch": 1.2296903584253676, "grad_norm": 4.527730464935303, "learning_rate": 7.618180573619003e-05, "loss": 0.6212, "step": 18149 }, { "epoch": 1.2297581136933395, "grad_norm": 5.151172161102295, "learning_rate": 7.618043671709221e-05, "loss": 0.751, "step": 18150 }, { "epoch": 1.2298258689613117, "grad_norm": 4.503907680511475, "learning_rate": 7.617906769799439e-05, "loss": 0.675, "step": 18151 }, { "epoch": 1.229893624229284, "grad_norm": 6.000171661376953, "learning_rate": 7.617769867889657e-05, "loss": 0.5313, "step": 18152 }, { "epoch": 1.229961379497256, "grad_norm": 5.874682903289795, "learning_rate": 7.617632965979876e-05, "loss": 0.8739, "step": 18153 }, { "epoch": 1.230029134765228, "grad_norm": 5.422181129455566, "learning_rate": 7.617496064070094e-05, "loss": 0.7197, "step": 18154 }, { "epoch": 1.2300968900332, "grad_norm": 5.414138317108154, "learning_rate": 7.617359162160313e-05, "loss": 0.6733, "step": 18155 }, { "epoch": 1.230164645301172, "grad_norm": 6.205505847930908, "learning_rate": 7.61722226025053e-05, "loss": 0.8054, "step": 18156 }, { "epoch": 1.2302324005691443, "grad_norm": 7.225019454956055, "learning_rate": 7.61708535834075e-05, "loss": 0.6045, "step": 18157 }, { "epoch": 1.2303001558371163, "grad_norm": 5.018001079559326, "learning_rate": 7.616948456430968e-05, "loss": 0.573, "step": 18158 }, { "epoch": 1.2303679111050885, "grad_norm": 5.88112735748291, "learning_rate": 7.616811554521186e-05, "loss": 0.7554, "step": 18159 }, { "epoch": 1.2304356663730605, "grad_norm": 7.625991344451904, "learning_rate": 7.616674652611404e-05, "loss": 0.9717, "step": 18160 }, { "epoch": 1.2305034216410327, "grad_norm": 5.763574123382568, "learning_rate": 7.616537750701622e-05, "loss": 0.9212, "step": 18161 }, { "epoch": 1.2305711769090046, "grad_norm": 4.682827472686768, "learning_rate": 7.616400848791841e-05, "loss": 0.4873, "step": 18162 }, { "epoch": 1.2306389321769768, "grad_norm": 5.930944442749023, "learning_rate": 7.61626394688206e-05, "loss": 0.6912, "step": 18163 }, { "epoch": 1.2307066874449488, "grad_norm": 6.0003814697265625, "learning_rate": 7.616127044972278e-05, "loss": 0.6373, "step": 18164 }, { "epoch": 1.230774442712921, "grad_norm": 6.296213626861572, "learning_rate": 7.615990143062496e-05, "loss": 0.7201, "step": 18165 }, { "epoch": 1.230842197980893, "grad_norm": 5.7189249992370605, "learning_rate": 7.615853241152714e-05, "loss": 0.7472, "step": 18166 }, { "epoch": 1.2309099532488652, "grad_norm": 6.441144943237305, "learning_rate": 7.615716339242933e-05, "loss": 0.9438, "step": 18167 }, { "epoch": 1.2309777085168372, "grad_norm": 6.301687717437744, "learning_rate": 7.615579437333151e-05, "loss": 0.7934, "step": 18168 }, { "epoch": 1.2310454637848092, "grad_norm": 5.537693023681641, "learning_rate": 7.615442535423369e-05, "loss": 0.7587, "step": 18169 }, { "epoch": 1.2311132190527814, "grad_norm": 9.502969741821289, "learning_rate": 7.615305633513587e-05, "loss": 0.6527, "step": 18170 }, { "epoch": 1.2311809743207534, "grad_norm": 5.102190971374512, "learning_rate": 7.615168731603806e-05, "loss": 0.6343, "step": 18171 }, { "epoch": 1.2312487295887256, "grad_norm": 4.21937370300293, "learning_rate": 7.615031829694025e-05, "loss": 0.4586, "step": 18172 }, { "epoch": 1.2313164848566975, "grad_norm": 6.393093585968018, "learning_rate": 7.614894927784243e-05, "loss": 0.7663, "step": 18173 }, { "epoch": 1.2313842401246697, "grad_norm": 4.278507232666016, "learning_rate": 7.61475802587446e-05, "loss": 0.5946, "step": 18174 }, { "epoch": 1.2314519953926417, "grad_norm": 7.228772163391113, "learning_rate": 7.614621123964679e-05, "loss": 0.8254, "step": 18175 }, { "epoch": 1.231519750660614, "grad_norm": 7.127139091491699, "learning_rate": 7.614484222054898e-05, "loss": 0.9439, "step": 18176 }, { "epoch": 1.231587505928586, "grad_norm": 5.133269786834717, "learning_rate": 7.614347320145116e-05, "loss": 0.5107, "step": 18177 }, { "epoch": 1.231655261196558, "grad_norm": 7.366678714752197, "learning_rate": 7.614210418235334e-05, "loss": 0.7128, "step": 18178 }, { "epoch": 1.23172301646453, "grad_norm": 5.290058135986328, "learning_rate": 7.614073516325552e-05, "loss": 0.7522, "step": 18179 }, { "epoch": 1.2317907717325023, "grad_norm": 8.452763557434082, "learning_rate": 7.613936614415771e-05, "loss": 0.6214, "step": 18180 }, { "epoch": 1.2318585270004743, "grad_norm": 5.031332492828369, "learning_rate": 7.61379971250599e-05, "loss": 0.6009, "step": 18181 }, { "epoch": 1.2319262822684465, "grad_norm": 4.337656497955322, "learning_rate": 7.613662810596208e-05, "loss": 0.4222, "step": 18182 }, { "epoch": 1.2319940375364185, "grad_norm": 7.403090000152588, "learning_rate": 7.613525908686427e-05, "loss": 0.6843, "step": 18183 }, { "epoch": 1.2320617928043904, "grad_norm": 5.537214279174805, "learning_rate": 7.613389006776645e-05, "loss": 0.6674, "step": 18184 }, { "epoch": 1.2321295480723626, "grad_norm": 5.237438201904297, "learning_rate": 7.613252104866864e-05, "loss": 0.7471, "step": 18185 }, { "epoch": 1.2321973033403346, "grad_norm": 4.745239734649658, "learning_rate": 7.613115202957082e-05, "loss": 0.5399, "step": 18186 }, { "epoch": 1.2322650586083068, "grad_norm": 5.67287015914917, "learning_rate": 7.6129783010473e-05, "loss": 0.6627, "step": 18187 }, { "epoch": 1.2323328138762788, "grad_norm": 6.220743179321289, "learning_rate": 7.612841399137518e-05, "loss": 0.6627, "step": 18188 }, { "epoch": 1.232400569144251, "grad_norm": 7.604142189025879, "learning_rate": 7.612704497227737e-05, "loss": 0.6789, "step": 18189 }, { "epoch": 1.232468324412223, "grad_norm": 5.372503280639648, "learning_rate": 7.612567595317956e-05, "loss": 0.7404, "step": 18190 }, { "epoch": 1.2325360796801952, "grad_norm": 5.212665557861328, "learning_rate": 7.612430693408174e-05, "loss": 0.6296, "step": 18191 }, { "epoch": 1.2326038349481672, "grad_norm": 7.466536521911621, "learning_rate": 7.612293791498392e-05, "loss": 0.7127, "step": 18192 }, { "epoch": 1.2326715902161394, "grad_norm": 6.254476070404053, "learning_rate": 7.61215688958861e-05, "loss": 0.849, "step": 18193 }, { "epoch": 1.2327393454841113, "grad_norm": 7.161530494689941, "learning_rate": 7.61201998767883e-05, "loss": 0.4574, "step": 18194 }, { "epoch": 1.2328071007520836, "grad_norm": 7.048676013946533, "learning_rate": 7.611883085769047e-05, "loss": 0.537, "step": 18195 }, { "epoch": 1.2328748560200555, "grad_norm": 4.868967533111572, "learning_rate": 7.611746183859265e-05, "loss": 0.6313, "step": 18196 }, { "epoch": 1.2329426112880277, "grad_norm": 4.794018745422363, "learning_rate": 7.611609281949483e-05, "loss": 0.6326, "step": 18197 }, { "epoch": 1.2330103665559997, "grad_norm": 7.3240556716918945, "learning_rate": 7.611472380039702e-05, "loss": 0.6995, "step": 18198 }, { "epoch": 1.2330781218239717, "grad_norm": 6.228128910064697, "learning_rate": 7.611335478129921e-05, "loss": 0.5818, "step": 18199 }, { "epoch": 1.233145877091944, "grad_norm": 9.069717407226562, "learning_rate": 7.611198576220139e-05, "loss": 0.718, "step": 18200 }, { "epoch": 1.233213632359916, "grad_norm": 5.857358932495117, "learning_rate": 7.611061674310357e-05, "loss": 0.7181, "step": 18201 }, { "epoch": 1.233281387627888, "grad_norm": 9.045475959777832, "learning_rate": 7.610924772400575e-05, "loss": 0.7415, "step": 18202 }, { "epoch": 1.23334914289586, "grad_norm": 6.362603664398193, "learning_rate": 7.610787870490794e-05, "loss": 0.6241, "step": 18203 }, { "epoch": 1.2334168981638323, "grad_norm": 7.243282318115234, "learning_rate": 7.610650968581012e-05, "loss": 0.7114, "step": 18204 }, { "epoch": 1.2334846534318042, "grad_norm": 5.471545219421387, "learning_rate": 7.61051406667123e-05, "loss": 0.8035, "step": 18205 }, { "epoch": 1.2335524086997764, "grad_norm": 6.288739204406738, "learning_rate": 7.610377164761449e-05, "loss": 0.7507, "step": 18206 }, { "epoch": 1.2336201639677484, "grad_norm": 6.829304218292236, "learning_rate": 7.610240262851667e-05, "loss": 0.7863, "step": 18207 }, { "epoch": 1.2336879192357206, "grad_norm": 5.105554580688477, "learning_rate": 7.610103360941886e-05, "loss": 0.7707, "step": 18208 }, { "epoch": 1.2337556745036926, "grad_norm": 7.09926176071167, "learning_rate": 7.609966459032104e-05, "loss": 0.8251, "step": 18209 }, { "epoch": 1.2338234297716648, "grad_norm": 4.862083911895752, "learning_rate": 7.609829557122322e-05, "loss": 0.5155, "step": 18210 }, { "epoch": 1.2338911850396368, "grad_norm": 5.866125583648682, "learning_rate": 7.60969265521254e-05, "loss": 0.6442, "step": 18211 }, { "epoch": 1.233958940307609, "grad_norm": 6.490697860717773, "learning_rate": 7.609555753302758e-05, "loss": 0.7055, "step": 18212 }, { "epoch": 1.234026695575581, "grad_norm": 6.739508152008057, "learning_rate": 7.609418851392977e-05, "loss": 0.8067, "step": 18213 }, { "epoch": 1.2340944508435532, "grad_norm": 7.74035120010376, "learning_rate": 7.609281949483195e-05, "loss": 0.8372, "step": 18214 }, { "epoch": 1.2341622061115252, "grad_norm": 6.445488929748535, "learning_rate": 7.609145047573414e-05, "loss": 0.6183, "step": 18215 }, { "epoch": 1.2342299613794974, "grad_norm": 6.993857383728027, "learning_rate": 7.609008145663632e-05, "loss": 0.6401, "step": 18216 }, { "epoch": 1.2342977166474693, "grad_norm": 5.35559606552124, "learning_rate": 7.608871243753851e-05, "loss": 0.5708, "step": 18217 }, { "epoch": 1.2343654719154413, "grad_norm": 8.084784507751465, "learning_rate": 7.608734341844069e-05, "loss": 0.5623, "step": 18218 }, { "epoch": 1.2344332271834135, "grad_norm": 7.187109470367432, "learning_rate": 7.608597439934287e-05, "loss": 0.7409, "step": 18219 }, { "epoch": 1.2345009824513855, "grad_norm": 6.2048563957214355, "learning_rate": 7.608460538024505e-05, "loss": 0.7278, "step": 18220 }, { "epoch": 1.2345687377193577, "grad_norm": 8.183573722839355, "learning_rate": 7.608323636114723e-05, "loss": 0.8511, "step": 18221 }, { "epoch": 1.2346364929873297, "grad_norm": 8.200891494750977, "learning_rate": 7.608186734204942e-05, "loss": 1.0603, "step": 18222 }, { "epoch": 1.234704248255302, "grad_norm": 4.761917591094971, "learning_rate": 7.60804983229516e-05, "loss": 0.7157, "step": 18223 }, { "epoch": 1.2347720035232739, "grad_norm": 8.363795280456543, "learning_rate": 7.607912930385379e-05, "loss": 0.6852, "step": 18224 }, { "epoch": 1.234839758791246, "grad_norm": 5.754380702972412, "learning_rate": 7.607776028475597e-05, "loss": 0.6346, "step": 18225 }, { "epoch": 1.234907514059218, "grad_norm": 5.477109909057617, "learning_rate": 7.607639126565816e-05, "loss": 0.7659, "step": 18226 }, { "epoch": 1.2349752693271903, "grad_norm": 7.943727493286133, "learning_rate": 7.607502224656034e-05, "loss": 0.63, "step": 18227 }, { "epoch": 1.2350430245951622, "grad_norm": 4.878358364105225, "learning_rate": 7.607365322746252e-05, "loss": 0.6735, "step": 18228 }, { "epoch": 1.2351107798631344, "grad_norm": 5.543867111206055, "learning_rate": 7.607228420836471e-05, "loss": 0.689, "step": 18229 }, { "epoch": 1.2351785351311064, "grad_norm": 5.050788402557373, "learning_rate": 7.60709151892669e-05, "loss": 0.7141, "step": 18230 }, { "epoch": 1.2352462903990786, "grad_norm": 11.664437294006348, "learning_rate": 7.606954617016907e-05, "loss": 0.5473, "step": 18231 }, { "epoch": 1.2353140456670506, "grad_norm": 4.634623050689697, "learning_rate": 7.606817715107127e-05, "loss": 0.7636, "step": 18232 }, { "epoch": 1.2353818009350226, "grad_norm": 7.5991339683532715, "learning_rate": 7.606680813197345e-05, "loss": 0.6636, "step": 18233 }, { "epoch": 1.2354495562029948, "grad_norm": 5.919665336608887, "learning_rate": 7.606543911287563e-05, "loss": 0.6424, "step": 18234 }, { "epoch": 1.2355173114709668, "grad_norm": 6.050735950469971, "learning_rate": 7.606407009377782e-05, "loss": 0.7623, "step": 18235 }, { "epoch": 1.235585066738939, "grad_norm": 5.501585483551025, "learning_rate": 7.606270107468e-05, "loss": 0.5839, "step": 18236 }, { "epoch": 1.235652822006911, "grad_norm": 5.354194164276123, "learning_rate": 7.606133205558218e-05, "loss": 0.8305, "step": 18237 }, { "epoch": 1.2357205772748832, "grad_norm": 4.940706729888916, "learning_rate": 7.605996303648436e-05, "loss": 0.6149, "step": 18238 }, { "epoch": 1.2357883325428551, "grad_norm": 7.159313678741455, "learning_rate": 7.605859401738654e-05, "loss": 0.7691, "step": 18239 }, { "epoch": 1.2358560878108273, "grad_norm": 6.895895481109619, "learning_rate": 7.605722499828874e-05, "loss": 0.6597, "step": 18240 }, { "epoch": 1.2359238430787993, "grad_norm": 6.15907621383667, "learning_rate": 7.605585597919092e-05, "loss": 0.7909, "step": 18241 }, { "epoch": 1.2359915983467715, "grad_norm": 7.679660797119141, "learning_rate": 7.60544869600931e-05, "loss": 0.8247, "step": 18242 }, { "epoch": 1.2360593536147435, "grad_norm": 4.731861114501953, "learning_rate": 7.605311794099528e-05, "loss": 0.7492, "step": 18243 }, { "epoch": 1.2361271088827157, "grad_norm": 5.705376625061035, "learning_rate": 7.605174892189746e-05, "loss": 0.7343, "step": 18244 }, { "epoch": 1.2361948641506877, "grad_norm": 7.357006072998047, "learning_rate": 7.605037990279965e-05, "loss": 0.6074, "step": 18245 }, { "epoch": 1.23626261941866, "grad_norm": 7.494833469390869, "learning_rate": 7.604901088370183e-05, "loss": 0.7074, "step": 18246 }, { "epoch": 1.2363303746866319, "grad_norm": 5.229856967926025, "learning_rate": 7.604764186460401e-05, "loss": 0.7422, "step": 18247 }, { "epoch": 1.2363981299546039, "grad_norm": 4.943398475646973, "learning_rate": 7.60462728455062e-05, "loss": 0.5172, "step": 18248 }, { "epoch": 1.236465885222576, "grad_norm": 6.097332954406738, "learning_rate": 7.604490382640839e-05, "loss": 0.7787, "step": 18249 }, { "epoch": 1.2365336404905483, "grad_norm": 5.074896812438965, "learning_rate": 7.604353480731057e-05, "loss": 0.631, "step": 18250 }, { "epoch": 1.2366013957585202, "grad_norm": 6.022866725921631, "learning_rate": 7.604216578821275e-05, "loss": 0.9236, "step": 18251 }, { "epoch": 1.2366691510264922, "grad_norm": 5.703517436981201, "learning_rate": 7.604079676911493e-05, "loss": 0.66, "step": 18252 }, { "epoch": 1.2367369062944644, "grad_norm": 4.512523651123047, "learning_rate": 7.603942775001711e-05, "loss": 0.7138, "step": 18253 }, { "epoch": 1.2368046615624364, "grad_norm": 4.882974624633789, "learning_rate": 7.60380587309193e-05, "loss": 0.7239, "step": 18254 }, { "epoch": 1.2368724168304086, "grad_norm": 5.847234725952148, "learning_rate": 7.603668971182148e-05, "loss": 0.699, "step": 18255 }, { "epoch": 1.2369401720983806, "grad_norm": 7.679762840270996, "learning_rate": 7.603532069272366e-05, "loss": 0.7491, "step": 18256 }, { "epoch": 1.2370079273663528, "grad_norm": 5.321573257446289, "learning_rate": 7.603395167362585e-05, "loss": 0.4653, "step": 18257 }, { "epoch": 1.2370756826343248, "grad_norm": 5.447080612182617, "learning_rate": 7.603258265452804e-05, "loss": 0.6317, "step": 18258 }, { "epoch": 1.237143437902297, "grad_norm": 7.135063648223877, "learning_rate": 7.603121363543022e-05, "loss": 0.7021, "step": 18259 }, { "epoch": 1.237211193170269, "grad_norm": 4.839768886566162, "learning_rate": 7.60298446163324e-05, "loss": 0.5687, "step": 18260 }, { "epoch": 1.2372789484382412, "grad_norm": 7.4310078620910645, "learning_rate": 7.602847559723458e-05, "loss": 0.811, "step": 18261 }, { "epoch": 1.2373467037062131, "grad_norm": 8.49348258972168, "learning_rate": 7.602710657813676e-05, "loss": 0.8388, "step": 18262 }, { "epoch": 1.2374144589741853, "grad_norm": 7.248007297515869, "learning_rate": 7.602573755903895e-05, "loss": 0.6113, "step": 18263 }, { "epoch": 1.2374822142421573, "grad_norm": 4.8018598556518555, "learning_rate": 7.602436853994113e-05, "loss": 0.5807, "step": 18264 }, { "epoch": 1.2375499695101295, "grad_norm": 6.000351428985596, "learning_rate": 7.602299952084331e-05, "loss": 0.7009, "step": 18265 }, { "epoch": 1.2376177247781015, "grad_norm": 6.559912204742432, "learning_rate": 7.60216305017455e-05, "loss": 0.6852, "step": 18266 }, { "epoch": 1.2376854800460735, "grad_norm": 6.837792873382568, "learning_rate": 7.602026148264768e-05, "loss": 1.0492, "step": 18267 }, { "epoch": 1.2377532353140457, "grad_norm": 4.829981803894043, "learning_rate": 7.601889246354987e-05, "loss": 0.566, "step": 18268 }, { "epoch": 1.2378209905820177, "grad_norm": 7.107032299041748, "learning_rate": 7.601752344445205e-05, "loss": 0.6397, "step": 18269 }, { "epoch": 1.2378887458499899, "grad_norm": 6.705806255340576, "learning_rate": 7.601615442535423e-05, "loss": 0.7072, "step": 18270 }, { "epoch": 1.2379565011179618, "grad_norm": 7.603937149047852, "learning_rate": 7.601478540625641e-05, "loss": 0.7151, "step": 18271 }, { "epoch": 1.238024256385934, "grad_norm": 8.943765640258789, "learning_rate": 7.60134163871586e-05, "loss": 0.5127, "step": 18272 }, { "epoch": 1.238092011653906, "grad_norm": 5.192441463470459, "learning_rate": 7.601204736806078e-05, "loss": 0.5709, "step": 18273 }, { "epoch": 1.2381597669218782, "grad_norm": 7.020391941070557, "learning_rate": 7.601067834896297e-05, "loss": 0.7772, "step": 18274 }, { "epoch": 1.2382275221898502, "grad_norm": 7.921938419342041, "learning_rate": 7.600930932986516e-05, "loss": 0.5601, "step": 18275 }, { "epoch": 1.2382952774578224, "grad_norm": 5.29582405090332, "learning_rate": 7.600794031076734e-05, "loss": 0.7777, "step": 18276 }, { "epoch": 1.2383630327257944, "grad_norm": 6.641912937164307, "learning_rate": 7.600657129166952e-05, "loss": 0.9943, "step": 18277 }, { "epoch": 1.2384307879937666, "grad_norm": 5.117624282836914, "learning_rate": 7.600520227257171e-05, "loss": 0.6456, "step": 18278 }, { "epoch": 1.2384985432617386, "grad_norm": 7.091884136199951, "learning_rate": 7.60038332534739e-05, "loss": 0.7467, "step": 18279 }, { "epoch": 1.2385662985297108, "grad_norm": 7.4439005851745605, "learning_rate": 7.600246423437607e-05, "loss": 0.6892, "step": 18280 }, { "epoch": 1.2386340537976828, "grad_norm": 5.143298149108887, "learning_rate": 7.600109521527827e-05, "loss": 0.6211, "step": 18281 }, { "epoch": 1.2387018090656547, "grad_norm": 4.91877555847168, "learning_rate": 7.599972619618045e-05, "loss": 0.635, "step": 18282 }, { "epoch": 1.238769564333627, "grad_norm": 6.771152496337891, "learning_rate": 7.599835717708263e-05, "loss": 0.8141, "step": 18283 }, { "epoch": 1.238837319601599, "grad_norm": 6.854776859283447, "learning_rate": 7.599698815798481e-05, "loss": 0.7984, "step": 18284 }, { "epoch": 1.2389050748695711, "grad_norm": 5.357391357421875, "learning_rate": 7.599561913888699e-05, "loss": 0.6077, "step": 18285 }, { "epoch": 1.2389728301375431, "grad_norm": 7.865238189697266, "learning_rate": 7.599425011978918e-05, "loss": 0.8676, "step": 18286 }, { "epoch": 1.2390405854055153, "grad_norm": 6.712452411651611, "learning_rate": 7.599288110069136e-05, "loss": 0.9323, "step": 18287 }, { "epoch": 1.2391083406734873, "grad_norm": 5.430103778839111, "learning_rate": 7.599151208159354e-05, "loss": 0.5833, "step": 18288 }, { "epoch": 1.2391760959414595, "grad_norm": 5.1226959228515625, "learning_rate": 7.599014306249572e-05, "loss": 0.5945, "step": 18289 }, { "epoch": 1.2392438512094315, "grad_norm": 5.239380836486816, "learning_rate": 7.598877404339792e-05, "loss": 0.5955, "step": 18290 }, { "epoch": 1.2393116064774037, "grad_norm": 5.962562561035156, "learning_rate": 7.59874050243001e-05, "loss": 0.5801, "step": 18291 }, { "epoch": 1.2393793617453757, "grad_norm": 5.283496856689453, "learning_rate": 7.598603600520228e-05, "loss": 0.7513, "step": 18292 }, { "epoch": 1.2394471170133479, "grad_norm": 5.144606113433838, "learning_rate": 7.598466698610446e-05, "loss": 0.6134, "step": 18293 }, { "epoch": 1.2395148722813198, "grad_norm": 5.917706489562988, "learning_rate": 7.598329796700664e-05, "loss": 0.6122, "step": 18294 }, { "epoch": 1.239582627549292, "grad_norm": 6.156318187713623, "learning_rate": 7.598192894790883e-05, "loss": 0.7378, "step": 18295 }, { "epoch": 1.239650382817264, "grad_norm": 5.589748859405518, "learning_rate": 7.598055992881101e-05, "loss": 0.7661, "step": 18296 }, { "epoch": 1.239718138085236, "grad_norm": 5.590287685394287, "learning_rate": 7.59791909097132e-05, "loss": 0.6075, "step": 18297 }, { "epoch": 1.2397858933532082, "grad_norm": 5.150818824768066, "learning_rate": 7.597782189061537e-05, "loss": 0.5828, "step": 18298 }, { "epoch": 1.2398536486211804, "grad_norm": 6.356131553649902, "learning_rate": 7.597645287151755e-05, "loss": 0.735, "step": 18299 }, { "epoch": 1.2399214038891524, "grad_norm": 6.432243824005127, "learning_rate": 7.597508385241975e-05, "loss": 0.7147, "step": 18300 }, { "epoch": 1.2399891591571244, "grad_norm": 6.962221622467041, "learning_rate": 7.597371483332193e-05, "loss": 0.6691, "step": 18301 }, { "epoch": 1.2400569144250966, "grad_norm": 5.084456443786621, "learning_rate": 7.597234581422411e-05, "loss": 0.5687, "step": 18302 }, { "epoch": 1.2401246696930686, "grad_norm": 5.874925136566162, "learning_rate": 7.597097679512629e-05, "loss": 0.7533, "step": 18303 }, { "epoch": 1.2401924249610408, "grad_norm": 7.665027618408203, "learning_rate": 7.596960777602848e-05, "loss": 0.706, "step": 18304 }, { "epoch": 1.2402601802290127, "grad_norm": 6.6125030517578125, "learning_rate": 7.596823875693066e-05, "loss": 0.6882, "step": 18305 }, { "epoch": 1.240327935496985, "grad_norm": 5.991425514221191, "learning_rate": 7.596686973783284e-05, "loss": 0.6645, "step": 18306 }, { "epoch": 1.240395690764957, "grad_norm": 7.070034980773926, "learning_rate": 7.596550071873502e-05, "loss": 0.7398, "step": 18307 }, { "epoch": 1.2404634460329291, "grad_norm": 4.521208763122559, "learning_rate": 7.59641316996372e-05, "loss": 0.498, "step": 18308 }, { "epoch": 1.240531201300901, "grad_norm": 5.832149982452393, "learning_rate": 7.59627626805394e-05, "loss": 0.5592, "step": 18309 }, { "epoch": 1.2405989565688733, "grad_norm": 6.671751976013184, "learning_rate": 7.596139366144158e-05, "loss": 0.6682, "step": 18310 }, { "epoch": 1.2406667118368453, "grad_norm": 5.849062442779541, "learning_rate": 7.596002464234376e-05, "loss": 0.779, "step": 18311 }, { "epoch": 1.2407344671048175, "grad_norm": 7.465084552764893, "learning_rate": 7.595865562324594e-05, "loss": 0.5536, "step": 18312 }, { "epoch": 1.2408022223727895, "grad_norm": 5.540762901306152, "learning_rate": 7.595728660414813e-05, "loss": 0.7038, "step": 18313 }, { "epoch": 1.2408699776407617, "grad_norm": 6.382152080535889, "learning_rate": 7.595591758505031e-05, "loss": 0.7971, "step": 18314 }, { "epoch": 1.2409377329087337, "grad_norm": 4.793679237365723, "learning_rate": 7.59545485659525e-05, "loss": 0.7614, "step": 18315 }, { "epoch": 1.2410054881767056, "grad_norm": 6.767176628112793, "learning_rate": 7.595317954685467e-05, "loss": 0.7421, "step": 18316 }, { "epoch": 1.2410732434446778, "grad_norm": 6.34992790222168, "learning_rate": 7.595181052775686e-05, "loss": 0.7134, "step": 18317 }, { "epoch": 1.2411409987126498, "grad_norm": 8.232148170471191, "learning_rate": 7.595044150865905e-05, "loss": 0.6995, "step": 18318 }, { "epoch": 1.241208753980622, "grad_norm": 4.682238578796387, "learning_rate": 7.594907248956123e-05, "loss": 0.7234, "step": 18319 }, { "epoch": 1.241276509248594, "grad_norm": 6.7042555809021, "learning_rate": 7.594770347046341e-05, "loss": 0.6465, "step": 18320 }, { "epoch": 1.2413442645165662, "grad_norm": 4.933433532714844, "learning_rate": 7.59463344513656e-05, "loss": 0.5732, "step": 18321 }, { "epoch": 1.2414120197845382, "grad_norm": 6.0281171798706055, "learning_rate": 7.594496543226778e-05, "loss": 0.8858, "step": 18322 }, { "epoch": 1.2414797750525104, "grad_norm": 6.028160095214844, "learning_rate": 7.594359641316996e-05, "loss": 0.6607, "step": 18323 }, { "epoch": 1.2415475303204824, "grad_norm": 5.355819225311279, "learning_rate": 7.594222739407216e-05, "loss": 0.5502, "step": 18324 }, { "epoch": 1.2416152855884546, "grad_norm": 4.877525329589844, "learning_rate": 7.594085837497434e-05, "loss": 0.6762, "step": 18325 }, { "epoch": 1.2416830408564266, "grad_norm": 7.059868335723877, "learning_rate": 7.593948935587652e-05, "loss": 0.7334, "step": 18326 }, { "epoch": 1.2417507961243988, "grad_norm": 7.107666015625, "learning_rate": 7.593812033677871e-05, "loss": 0.7463, "step": 18327 }, { "epoch": 1.2418185513923707, "grad_norm": 4.644011497497559, "learning_rate": 7.593675131768089e-05, "loss": 0.61, "step": 18328 }, { "epoch": 1.241886306660343, "grad_norm": 4.502552509307861, "learning_rate": 7.593538229858307e-05, "loss": 0.527, "step": 18329 }, { "epoch": 1.241954061928315, "grad_norm": 5.403695583343506, "learning_rate": 7.593401327948525e-05, "loss": 0.7903, "step": 18330 }, { "epoch": 1.242021817196287, "grad_norm": 8.830596923828125, "learning_rate": 7.593264426038743e-05, "loss": 0.6086, "step": 18331 }, { "epoch": 1.242089572464259, "grad_norm": 6.801039695739746, "learning_rate": 7.593127524128963e-05, "loss": 0.583, "step": 18332 }, { "epoch": 1.242157327732231, "grad_norm": 6.03915548324585, "learning_rate": 7.592990622219181e-05, "loss": 0.8215, "step": 18333 }, { "epoch": 1.2422250830002033, "grad_norm": 7.676496505737305, "learning_rate": 7.592853720309399e-05, "loss": 0.6051, "step": 18334 }, { "epoch": 1.2422928382681753, "grad_norm": 5.050236701965332, "learning_rate": 7.592716818399617e-05, "loss": 0.5496, "step": 18335 }, { "epoch": 1.2423605935361475, "grad_norm": 6.2748212814331055, "learning_rate": 7.592579916489836e-05, "loss": 0.9785, "step": 18336 }, { "epoch": 1.2424283488041195, "grad_norm": 7.210531711578369, "learning_rate": 7.592443014580054e-05, "loss": 0.936, "step": 18337 }, { "epoch": 1.2424961040720917, "grad_norm": 5.671183109283447, "learning_rate": 7.592306112670272e-05, "loss": 0.6438, "step": 18338 }, { "epoch": 1.2425638593400636, "grad_norm": 5.4255690574646, "learning_rate": 7.59216921076049e-05, "loss": 0.614, "step": 18339 }, { "epoch": 1.2426316146080358, "grad_norm": 9.136259078979492, "learning_rate": 7.592032308850708e-05, "loss": 0.863, "step": 18340 }, { "epoch": 1.2426993698760078, "grad_norm": 9.439860343933105, "learning_rate": 7.591895406940928e-05, "loss": 0.5626, "step": 18341 }, { "epoch": 1.24276712514398, "grad_norm": 5.82816743850708, "learning_rate": 7.591758505031146e-05, "loss": 0.8564, "step": 18342 }, { "epoch": 1.242834880411952, "grad_norm": 8.167750358581543, "learning_rate": 7.591621603121364e-05, "loss": 0.5207, "step": 18343 }, { "epoch": 1.2429026356799242, "grad_norm": 6.8161940574646, "learning_rate": 7.591484701211582e-05, "loss": 0.7033, "step": 18344 }, { "epoch": 1.2429703909478962, "grad_norm": 6.037930965423584, "learning_rate": 7.591347799301801e-05, "loss": 0.9543, "step": 18345 }, { "epoch": 1.2430381462158682, "grad_norm": 6.356756687164307, "learning_rate": 7.59121089739202e-05, "loss": 0.426, "step": 18346 }, { "epoch": 1.2431059014838404, "grad_norm": 6.194242477416992, "learning_rate": 7.591073995482237e-05, "loss": 0.6365, "step": 18347 }, { "epoch": 1.2431736567518126, "grad_norm": 6.263245105743408, "learning_rate": 7.590937093572455e-05, "loss": 0.7125, "step": 18348 }, { "epoch": 1.2432414120197846, "grad_norm": 8.249999046325684, "learning_rate": 7.590800191662673e-05, "loss": 0.7486, "step": 18349 }, { "epoch": 1.2433091672877565, "grad_norm": 6.465185165405273, "learning_rate": 7.590663289752893e-05, "loss": 0.588, "step": 18350 }, { "epoch": 1.2433769225557287, "grad_norm": 6.615595817565918, "learning_rate": 7.590526387843111e-05, "loss": 0.8067, "step": 18351 }, { "epoch": 1.2434446778237007, "grad_norm": 5.744843482971191, "learning_rate": 7.590389485933329e-05, "loss": 0.7306, "step": 18352 }, { "epoch": 1.243512433091673, "grad_norm": 5.4917144775390625, "learning_rate": 7.590252584023547e-05, "loss": 0.6532, "step": 18353 }, { "epoch": 1.243580188359645, "grad_norm": 4.359947204589844, "learning_rate": 7.590115682113765e-05, "loss": 0.7198, "step": 18354 }, { "epoch": 1.243647943627617, "grad_norm": 5.287740707397461, "learning_rate": 7.589978780203984e-05, "loss": 0.6558, "step": 18355 }, { "epoch": 1.243715698895589, "grad_norm": 5.208303928375244, "learning_rate": 7.589841878294202e-05, "loss": 0.7272, "step": 18356 }, { "epoch": 1.2437834541635613, "grad_norm": 5.912323474884033, "learning_rate": 7.58970497638442e-05, "loss": 0.6226, "step": 18357 }, { "epoch": 1.2438512094315333, "grad_norm": 4.994621753692627, "learning_rate": 7.589568074474638e-05, "loss": 0.6522, "step": 18358 }, { "epoch": 1.2439189646995055, "grad_norm": 5.531447410583496, "learning_rate": 7.589431172564858e-05, "loss": 0.6913, "step": 18359 }, { "epoch": 1.2439867199674775, "grad_norm": 6.614006996154785, "learning_rate": 7.589294270655076e-05, "loss": 0.5982, "step": 18360 }, { "epoch": 1.2440544752354497, "grad_norm": 6.066467761993408, "learning_rate": 7.589157368745294e-05, "loss": 0.7662, "step": 18361 }, { "epoch": 1.2441222305034216, "grad_norm": 4.1904826164245605, "learning_rate": 7.589020466835512e-05, "loss": 0.3991, "step": 18362 }, { "epoch": 1.2441899857713938, "grad_norm": 6.30812931060791, "learning_rate": 7.58888356492573e-05, "loss": 0.6071, "step": 18363 }, { "epoch": 1.2442577410393658, "grad_norm": 4.418458461761475, "learning_rate": 7.58874666301595e-05, "loss": 0.661, "step": 18364 }, { "epoch": 1.2443254963073378, "grad_norm": 6.085812568664551, "learning_rate": 7.588609761106167e-05, "loss": 0.5891, "step": 18365 }, { "epoch": 1.24439325157531, "grad_norm": 5.265291690826416, "learning_rate": 7.588472859196385e-05, "loss": 0.6936, "step": 18366 }, { "epoch": 1.244461006843282, "grad_norm": 5.662139892578125, "learning_rate": 7.588335957286605e-05, "loss": 0.6085, "step": 18367 }, { "epoch": 1.2445287621112542, "grad_norm": 4.7674241065979, "learning_rate": 7.588199055376823e-05, "loss": 0.5873, "step": 18368 }, { "epoch": 1.2445965173792262, "grad_norm": 5.590237617492676, "learning_rate": 7.588062153467041e-05, "loss": 0.603, "step": 18369 }, { "epoch": 1.2446642726471984, "grad_norm": 5.36829137802124, "learning_rate": 7.58792525155726e-05, "loss": 0.5697, "step": 18370 }, { "epoch": 1.2447320279151703, "grad_norm": 4.962731838226318, "learning_rate": 7.587788349647478e-05, "loss": 0.5829, "step": 18371 }, { "epoch": 1.2447997831831425, "grad_norm": 5.7452802658081055, "learning_rate": 7.587651447737696e-05, "loss": 0.6695, "step": 18372 }, { "epoch": 1.2448675384511145, "grad_norm": 7.008640766143799, "learning_rate": 7.587514545827916e-05, "loss": 0.834, "step": 18373 }, { "epoch": 1.2449352937190867, "grad_norm": 9.434866905212402, "learning_rate": 7.587377643918134e-05, "loss": 0.618, "step": 18374 }, { "epoch": 1.2450030489870587, "grad_norm": 5.060585975646973, "learning_rate": 7.587240742008352e-05, "loss": 0.618, "step": 18375 }, { "epoch": 1.245070804255031, "grad_norm": 5.655799865722656, "learning_rate": 7.58710384009857e-05, "loss": 0.8284, "step": 18376 }, { "epoch": 1.245138559523003, "grad_norm": 7.925520420074463, "learning_rate": 7.586966938188788e-05, "loss": 0.7431, "step": 18377 }, { "epoch": 1.245206314790975, "grad_norm": 5.230562686920166, "learning_rate": 7.586830036279007e-05, "loss": 0.761, "step": 18378 }, { "epoch": 1.245274070058947, "grad_norm": 5.60720157623291, "learning_rate": 7.586693134369225e-05, "loss": 0.5065, "step": 18379 }, { "epoch": 1.245341825326919, "grad_norm": 6.819098472595215, "learning_rate": 7.586556232459443e-05, "loss": 0.6631, "step": 18380 }, { "epoch": 1.2454095805948913, "grad_norm": 7.610952377319336, "learning_rate": 7.586419330549661e-05, "loss": 0.8025, "step": 18381 }, { "epoch": 1.2454773358628632, "grad_norm": 5.915439605712891, "learning_rate": 7.586282428639881e-05, "loss": 0.781, "step": 18382 }, { "epoch": 1.2455450911308354, "grad_norm": 4.912126541137695, "learning_rate": 7.586145526730099e-05, "loss": 0.5188, "step": 18383 }, { "epoch": 1.2456128463988074, "grad_norm": 5.676177024841309, "learning_rate": 7.586008624820317e-05, "loss": 0.5475, "step": 18384 }, { "epoch": 1.2456806016667796, "grad_norm": 4.516726493835449, "learning_rate": 7.585871722910535e-05, "loss": 0.6143, "step": 18385 }, { "epoch": 1.2457483569347516, "grad_norm": 8.950729370117188, "learning_rate": 7.585734821000753e-05, "loss": 0.5928, "step": 18386 }, { "epoch": 1.2458161122027238, "grad_norm": 7.224480628967285, "learning_rate": 7.585597919090972e-05, "loss": 0.7095, "step": 18387 }, { "epoch": 1.2458838674706958, "grad_norm": 6.877074241638184, "learning_rate": 7.58546101718119e-05, "loss": 0.7253, "step": 18388 }, { "epoch": 1.245951622738668, "grad_norm": 7.589900970458984, "learning_rate": 7.585324115271408e-05, "loss": 0.8208, "step": 18389 }, { "epoch": 1.24601937800664, "grad_norm": 6.306529998779297, "learning_rate": 7.585187213361626e-05, "loss": 0.5556, "step": 18390 }, { "epoch": 1.2460871332746122, "grad_norm": 5.165809631347656, "learning_rate": 7.585050311451846e-05, "loss": 0.8348, "step": 18391 }, { "epoch": 1.2461548885425842, "grad_norm": 6.17510986328125, "learning_rate": 7.584913409542064e-05, "loss": 0.7496, "step": 18392 }, { "epoch": 1.2462226438105564, "grad_norm": 6.940537929534912, "learning_rate": 7.584776507632282e-05, "loss": 0.9307, "step": 18393 }, { "epoch": 1.2462903990785283, "grad_norm": 5.258039951324463, "learning_rate": 7.5846396057225e-05, "loss": 0.5341, "step": 18394 }, { "epoch": 1.2463581543465003, "grad_norm": 5.3323774337768555, "learning_rate": 7.584502703812718e-05, "loss": 0.5748, "step": 18395 }, { "epoch": 1.2464259096144725, "grad_norm": 6.254161834716797, "learning_rate": 7.584365801902937e-05, "loss": 0.7838, "step": 18396 }, { "epoch": 1.2464936648824447, "grad_norm": 6.0442214012146, "learning_rate": 7.584228899993155e-05, "loss": 0.7778, "step": 18397 }, { "epoch": 1.2465614201504167, "grad_norm": 6.557648658752441, "learning_rate": 7.584091998083373e-05, "loss": 0.7761, "step": 18398 }, { "epoch": 1.2466291754183887, "grad_norm": 6.024246692657471, "learning_rate": 7.583955096173591e-05, "loss": 0.8143, "step": 18399 }, { "epoch": 1.246696930686361, "grad_norm": 6.548332691192627, "learning_rate": 7.58381819426381e-05, "loss": 0.7228, "step": 18400 }, { "epoch": 1.2467646859543329, "grad_norm": 8.239317893981934, "learning_rate": 7.583681292354029e-05, "loss": 0.681, "step": 18401 }, { "epoch": 1.246832441222305, "grad_norm": 6.653371810913086, "learning_rate": 7.583544390444247e-05, "loss": 0.5783, "step": 18402 }, { "epoch": 1.246900196490277, "grad_norm": 5.489253044128418, "learning_rate": 7.583407488534465e-05, "loss": 0.7057, "step": 18403 }, { "epoch": 1.2469679517582493, "grad_norm": 10.217595100402832, "learning_rate": 7.583270586624683e-05, "loss": 0.821, "step": 18404 }, { "epoch": 1.2470357070262212, "grad_norm": 5.781734466552734, "learning_rate": 7.583133684714902e-05, "loss": 0.8124, "step": 18405 }, { "epoch": 1.2471034622941934, "grad_norm": 5.485482692718506, "learning_rate": 7.58299678280512e-05, "loss": 0.3988, "step": 18406 }, { "epoch": 1.2471712175621654, "grad_norm": 5.147676944732666, "learning_rate": 7.582859880895338e-05, "loss": 0.6227, "step": 18407 }, { "epoch": 1.2472389728301376, "grad_norm": 5.972175121307373, "learning_rate": 7.582722978985556e-05, "loss": 0.7966, "step": 18408 }, { "epoch": 1.2473067280981096, "grad_norm": 4.4100728034973145, "learning_rate": 7.582586077075774e-05, "loss": 0.5352, "step": 18409 }, { "epoch": 1.2473744833660818, "grad_norm": 8.67353343963623, "learning_rate": 7.582449175165994e-05, "loss": 0.7738, "step": 18410 }, { "epoch": 1.2474422386340538, "grad_norm": 7.108034133911133, "learning_rate": 7.582312273256212e-05, "loss": 0.5752, "step": 18411 }, { "epoch": 1.247509993902026, "grad_norm": 5.655974388122559, "learning_rate": 7.58217537134643e-05, "loss": 0.7622, "step": 18412 }, { "epoch": 1.247577749169998, "grad_norm": 6.0239996910095215, "learning_rate": 7.582038469436649e-05, "loss": 0.4806, "step": 18413 }, { "epoch": 1.24764550443797, "grad_norm": 7.3847527503967285, "learning_rate": 7.581901567526867e-05, "loss": 0.8353, "step": 18414 }, { "epoch": 1.2477132597059422, "grad_norm": 5.64340353012085, "learning_rate": 7.581764665617085e-05, "loss": 0.6279, "step": 18415 }, { "epoch": 1.2477810149739141, "grad_norm": 6.056209564208984, "learning_rate": 7.581627763707305e-05, "loss": 0.7538, "step": 18416 }, { "epoch": 1.2478487702418863, "grad_norm": 5.869894981384277, "learning_rate": 7.581490861797523e-05, "loss": 0.6976, "step": 18417 }, { "epoch": 1.2479165255098583, "grad_norm": 5.403618335723877, "learning_rate": 7.581353959887741e-05, "loss": 0.618, "step": 18418 }, { "epoch": 1.2479842807778305, "grad_norm": 4.804281234741211, "learning_rate": 7.58121705797796e-05, "loss": 0.4805, "step": 18419 }, { "epoch": 1.2480520360458025, "grad_norm": 5.022444725036621, "learning_rate": 7.581080156068178e-05, "loss": 0.9231, "step": 18420 }, { "epoch": 1.2481197913137747, "grad_norm": 5.900055885314941, "learning_rate": 7.580943254158396e-05, "loss": 0.6079, "step": 18421 }, { "epoch": 1.2481875465817467, "grad_norm": 5.404339790344238, "learning_rate": 7.580806352248614e-05, "loss": 0.6443, "step": 18422 }, { "epoch": 1.248255301849719, "grad_norm": 4.7197184562683105, "learning_rate": 7.580669450338834e-05, "loss": 0.6525, "step": 18423 }, { "epoch": 1.2483230571176909, "grad_norm": 6.294169902801514, "learning_rate": 7.580532548429052e-05, "loss": 0.6796, "step": 18424 }, { "epoch": 1.248390812385663, "grad_norm": 5.794606685638428, "learning_rate": 7.58039564651927e-05, "loss": 0.5467, "step": 18425 }, { "epoch": 1.248458567653635, "grad_norm": 4.591580867767334, "learning_rate": 7.580258744609488e-05, "loss": 0.7601, "step": 18426 }, { "epoch": 1.2485263229216073, "grad_norm": 4.168753147125244, "learning_rate": 7.580121842699706e-05, "loss": 0.5565, "step": 18427 }, { "epoch": 1.2485940781895792, "grad_norm": 7.54253625869751, "learning_rate": 7.579984940789925e-05, "loss": 0.6358, "step": 18428 }, { "epoch": 1.2486618334575512, "grad_norm": 4.858458518981934, "learning_rate": 7.579848038880143e-05, "loss": 0.5466, "step": 18429 }, { "epoch": 1.2487295887255234, "grad_norm": 7.536377429962158, "learning_rate": 7.579711136970361e-05, "loss": 0.6683, "step": 18430 }, { "epoch": 1.2487973439934954, "grad_norm": 6.619974613189697, "learning_rate": 7.57957423506058e-05, "loss": 0.5221, "step": 18431 }, { "epoch": 1.2488650992614676, "grad_norm": 9.248383522033691, "learning_rate": 7.579437333150797e-05, "loss": 0.5517, "step": 18432 }, { "epoch": 1.2489328545294396, "grad_norm": 7.319504261016846, "learning_rate": 7.579300431241017e-05, "loss": 0.7803, "step": 18433 }, { "epoch": 1.2490006097974118, "grad_norm": 6.124282360076904, "learning_rate": 7.579163529331235e-05, "loss": 0.6649, "step": 18434 }, { "epoch": 1.2490683650653838, "grad_norm": 4.615629196166992, "learning_rate": 7.579026627421453e-05, "loss": 0.6239, "step": 18435 }, { "epoch": 1.249136120333356, "grad_norm": 7.09864616394043, "learning_rate": 7.578889725511671e-05, "loss": 0.7135, "step": 18436 }, { "epoch": 1.249203875601328, "grad_norm": 5.639509677886963, "learning_rate": 7.57875282360189e-05, "loss": 0.8308, "step": 18437 }, { "epoch": 1.2492716308693002, "grad_norm": 5.514218807220459, "learning_rate": 7.578615921692108e-05, "loss": 0.51, "step": 18438 }, { "epoch": 1.2493393861372721, "grad_norm": 5.990070343017578, "learning_rate": 7.578479019782326e-05, "loss": 0.5471, "step": 18439 }, { "epoch": 1.2494071414052443, "grad_norm": 5.667049407958984, "learning_rate": 7.578342117872544e-05, "loss": 0.5892, "step": 18440 }, { "epoch": 1.2494748966732163, "grad_norm": 7.905735015869141, "learning_rate": 7.578205215962762e-05, "loss": 0.8953, "step": 18441 }, { "epoch": 1.2495426519411885, "grad_norm": 6.189059734344482, "learning_rate": 7.578068314052982e-05, "loss": 1.0339, "step": 18442 }, { "epoch": 1.2496104072091605, "grad_norm": 5.740088939666748, "learning_rate": 7.5779314121432e-05, "loss": 0.7114, "step": 18443 }, { "epoch": 1.2496781624771325, "grad_norm": 6.6557698249816895, "learning_rate": 7.577794510233418e-05, "loss": 0.9342, "step": 18444 }, { "epoch": 1.2497459177451047, "grad_norm": 4.974968910217285, "learning_rate": 7.577657608323636e-05, "loss": 0.736, "step": 18445 }, { "epoch": 1.2497459177451047, "eval_loss": 0.7137033939361572, "eval_noise_accuracy": 0.0, "eval_runtime": 1472.2965, "eval_samples_per_second": 3.49, "eval_steps_per_second": 0.219, "eval_wer": 67.47337717729287, "step": 18445 } ], "logging_steps": 1, "max_steps": 73795, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 3689, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.26547613884416e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }