{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 10775, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 9.280742459396752e-05, "grad_norm": 4879.61962890625, "learning_rate": 0.0, "loss": 127.3116, "step": 1 }, { "epoch": 0.00018561484918793505, "grad_norm": 3584.2548828125, "learning_rate": 1.855287569573284e-08, "loss": 115.5515, "step": 2 }, { "epoch": 0.00027842227378190254, "grad_norm": 4002.786376953125, "learning_rate": 3.710575139146568e-08, "loss": 117.9382, "step": 3 }, { "epoch": 0.0003712296983758701, "grad_norm": 4412.97509765625, "learning_rate": 5.565862708719852e-08, "loss": 129.7185, "step": 4 }, { "epoch": 0.0004640371229698376, "grad_norm": 4368.734375, "learning_rate": 7.421150278293136e-08, "loss": 129.6791, "step": 5 }, { "epoch": 0.0005568445475638051, "grad_norm": 4310.92236328125, "learning_rate": 9.27643784786642e-08, "loss": 122.0588, "step": 6 }, { "epoch": 0.0006496519721577726, "grad_norm": 3980.038818359375, "learning_rate": 1.1131725417439704e-07, "loss": 109.4246, "step": 7 }, { "epoch": 0.0007424593967517402, "grad_norm": 4652.45556640625, "learning_rate": 1.298701298701299e-07, "loss": 130.0705, "step": 8 }, { "epoch": 0.0008352668213457077, "grad_norm": 3789.30615234375, "learning_rate": 1.484230055658627e-07, "loss": 116.8366, "step": 9 }, { "epoch": 0.0009280742459396752, "grad_norm": 4701.10107421875, "learning_rate": 1.6697588126159556e-07, "loss": 125.586, "step": 10 }, { "epoch": 0.0010208816705336427, "grad_norm": 4245.4521484375, "learning_rate": 1.855287569573284e-07, "loss": 112.2454, "step": 11 }, { "epoch": 0.0011136890951276102, "grad_norm": 4051.072265625, "learning_rate": 2.0408163265306121e-07, "loss": 110.9059, "step": 12 }, { "epoch": 0.0012064965197215777, "grad_norm": 4385.4423828125, "learning_rate": 2.2263450834879408e-07, "loss": 120.4515, "step": 13 }, { "epoch": 0.0012993039443155452, "grad_norm": 3833.18408203125, "learning_rate": 2.411873840445269e-07, "loss": 117.0475, "step": 14 }, { "epoch": 0.0013921113689095127, "grad_norm": 3868.090576171875, "learning_rate": 2.597402597402598e-07, "loss": 116.3673, "step": 15 }, { "epoch": 0.0014849187935034804, "grad_norm": 3306.6806640625, "learning_rate": 2.782931354359926e-07, "loss": 108.5505, "step": 16 }, { "epoch": 0.0015777262180974479, "grad_norm": 2663.409912109375, "learning_rate": 2.968460111317254e-07, "loss": 96.7325, "step": 17 }, { "epoch": 0.0016705336426914154, "grad_norm": 2659.489501953125, "learning_rate": 3.153988868274583e-07, "loss": 99.288, "step": 18 }, { "epoch": 0.0017633410672853829, "grad_norm": 2729.846435546875, "learning_rate": 3.339517625231911e-07, "loss": 105.9779, "step": 19 }, { "epoch": 0.0018561484918793504, "grad_norm": 2637.698974609375, "learning_rate": 3.5250463821892393e-07, "loss": 97.2174, "step": 20 }, { "epoch": 0.0019489559164733179, "grad_norm": 2240.01904296875, "learning_rate": 3.710575139146568e-07, "loss": 90.1337, "step": 21 }, { "epoch": 0.0020417633410672853, "grad_norm": 1769.114501953125, "learning_rate": 3.896103896103896e-07, "loss": 81.0407, "step": 22 }, { "epoch": 0.002134570765661253, "grad_norm": 1401.95263671875, "learning_rate": 4.0816326530612243e-07, "loss": 75.0801, "step": 23 }, { "epoch": 0.0022273781902552203, "grad_norm": 1264.65234375, "learning_rate": 4.2671614100185535e-07, "loss": 72.3649, "step": 24 }, { "epoch": 0.002320185614849188, "grad_norm": 1413.0771484375, "learning_rate": 4.4526901669758817e-07, "loss": 74.5712, "step": 25 }, { "epoch": 0.0024129930394431553, "grad_norm": 1236.603515625, "learning_rate": 4.63821892393321e-07, "loss": 70.4863, "step": 26 }, { "epoch": 0.002505800464037123, "grad_norm": 1077.2777099609375, "learning_rate": 4.823747680890538e-07, "loss": 67.816, "step": 27 }, { "epoch": 0.0025986078886310903, "grad_norm": 1035.0184326171875, "learning_rate": 5.009276437847867e-07, "loss": 65.5894, "step": 28 }, { "epoch": 0.002691415313225058, "grad_norm": 1013.7430419921875, "learning_rate": 5.194805194805196e-07, "loss": 66.1121, "step": 29 }, { "epoch": 0.0027842227378190253, "grad_norm": 656.8074951171875, "learning_rate": 5.380333951762524e-07, "loss": 58.3663, "step": 30 }, { "epoch": 0.0028770301624129932, "grad_norm": 517.000732421875, "learning_rate": 5.565862708719852e-07, "loss": 53.7345, "step": 31 }, { "epoch": 0.0029698375870069607, "grad_norm": 442.198974609375, "learning_rate": 5.75139146567718e-07, "loss": 51.5148, "step": 32 }, { "epoch": 0.0030626450116009282, "grad_norm": 379.69989013671875, "learning_rate": 5.936920222634509e-07, "loss": 47.4924, "step": 33 }, { "epoch": 0.0031554524361948957, "grad_norm": 342.2025146484375, "learning_rate": 6.122448979591837e-07, "loss": 49.4764, "step": 34 }, { "epoch": 0.0032482598607888632, "grad_norm": 275.24273681640625, "learning_rate": 6.307977736549166e-07, "loss": 45.1825, "step": 35 }, { "epoch": 0.0033410672853828307, "grad_norm": 262.60479736328125, "learning_rate": 6.493506493506493e-07, "loss": 50.9274, "step": 36 }, { "epoch": 0.0034338747099767982, "grad_norm": 261.78338623046875, "learning_rate": 6.679035250463822e-07, "loss": 45.3698, "step": 37 }, { "epoch": 0.0035266821345707657, "grad_norm": 214.75494384765625, "learning_rate": 6.864564007421151e-07, "loss": 47.8782, "step": 38 }, { "epoch": 0.0036194895591647332, "grad_norm": 197.0145263671875, "learning_rate": 7.050092764378479e-07, "loss": 46.8679, "step": 39 }, { "epoch": 0.0037122969837587007, "grad_norm": 168.3210906982422, "learning_rate": 7.235621521335808e-07, "loss": 44.6622, "step": 40 }, { "epoch": 0.003805104408352668, "grad_norm": 184.19859313964844, "learning_rate": 7.421150278293136e-07, "loss": 47.8012, "step": 41 }, { "epoch": 0.0038979118329466357, "grad_norm": 189.7224578857422, "learning_rate": 7.606679035250464e-07, "loss": 44.9753, "step": 42 }, { "epoch": 0.003990719257540604, "grad_norm": 144.7606658935547, "learning_rate": 7.792207792207792e-07, "loss": 43.0684, "step": 43 }, { "epoch": 0.004083526682134571, "grad_norm": 149.78131103515625, "learning_rate": 7.977736549165121e-07, "loss": 40.4799, "step": 44 }, { "epoch": 0.004176334106728539, "grad_norm": 146.1254425048828, "learning_rate": 8.163265306122449e-07, "loss": 40.3114, "step": 45 }, { "epoch": 0.004269141531322506, "grad_norm": 132.22535705566406, "learning_rate": 8.348794063079778e-07, "loss": 40.8047, "step": 46 }, { "epoch": 0.004361948955916474, "grad_norm": 119.17462158203125, "learning_rate": 8.534322820037107e-07, "loss": 40.6279, "step": 47 }, { "epoch": 0.004454756380510441, "grad_norm": 105.6717758178711, "learning_rate": 8.719851576994434e-07, "loss": 37.5216, "step": 48 }, { "epoch": 0.004547563805104409, "grad_norm": 96.33718872070312, "learning_rate": 8.905380333951763e-07, "loss": 40.0905, "step": 49 }, { "epoch": 0.004640371229698376, "grad_norm": 91.1793212890625, "learning_rate": 9.090909090909091e-07, "loss": 38.8028, "step": 50 }, { "epoch": 0.004733178654292344, "grad_norm": 87.77056121826172, "learning_rate": 9.27643784786642e-07, "loss": 37.5829, "step": 51 }, { "epoch": 0.004825986078886311, "grad_norm": 91.8969955444336, "learning_rate": 9.461966604823748e-07, "loss": 39.5026, "step": 52 }, { "epoch": 0.004918793503480279, "grad_norm": 97.51395416259766, "learning_rate": 9.647495361781077e-07, "loss": 38.9205, "step": 53 }, { "epoch": 0.005011600928074246, "grad_norm": 89.4168472290039, "learning_rate": 9.833024118738404e-07, "loss": 37.1574, "step": 54 }, { "epoch": 0.005104408352668214, "grad_norm": 91.33997344970703, "learning_rate": 1.0018552875695733e-06, "loss": 38.7041, "step": 55 }, { "epoch": 0.005197215777262181, "grad_norm": 104.73255920410156, "learning_rate": 1.0204081632653063e-06, "loss": 41.5397, "step": 56 }, { "epoch": 0.005290023201856149, "grad_norm": 88.4401626586914, "learning_rate": 1.0389610389610392e-06, "loss": 37.4754, "step": 57 }, { "epoch": 0.005382830626450116, "grad_norm": 82.26273345947266, "learning_rate": 1.0575139146567719e-06, "loss": 37.2335, "step": 58 }, { "epoch": 0.005475638051044084, "grad_norm": 80.2092514038086, "learning_rate": 1.0760667903525048e-06, "loss": 37.7407, "step": 59 }, { "epoch": 0.005568445475638051, "grad_norm": 75.86572265625, "learning_rate": 1.0946196660482377e-06, "loss": 39.1427, "step": 60 }, { "epoch": 0.005661252900232019, "grad_norm": 67.89360809326172, "learning_rate": 1.1131725417439704e-06, "loss": 34.2571, "step": 61 }, { "epoch": 0.0057540603248259865, "grad_norm": 71.97481536865234, "learning_rate": 1.1317254174397034e-06, "loss": 34.7685, "step": 62 }, { "epoch": 0.0058468677494199536, "grad_norm": 74.9554443359375, "learning_rate": 1.150278293135436e-06, "loss": 37.9933, "step": 63 }, { "epoch": 0.0059396751740139215, "grad_norm": 71.85779571533203, "learning_rate": 1.168831168831169e-06, "loss": 35.5762, "step": 64 }, { "epoch": 0.0060324825986078886, "grad_norm": 71.91780090332031, "learning_rate": 1.1873840445269017e-06, "loss": 37.2633, "step": 65 }, { "epoch": 0.0061252900232018565, "grad_norm": 69.94337463378906, "learning_rate": 1.2059369202226346e-06, "loss": 34.4513, "step": 66 }, { "epoch": 0.0062180974477958235, "grad_norm": 66.53661346435547, "learning_rate": 1.2244897959183673e-06, "loss": 34.982, "step": 67 }, { "epoch": 0.0063109048723897915, "grad_norm": 73.41341400146484, "learning_rate": 1.2430426716141003e-06, "loss": 35.324, "step": 68 }, { "epoch": 0.0064037122969837585, "grad_norm": 68.86168670654297, "learning_rate": 1.2615955473098332e-06, "loss": 33.2588, "step": 69 }, { "epoch": 0.0064965197215777265, "grad_norm": 70.13764953613281, "learning_rate": 1.280148423005566e-06, "loss": 33.5898, "step": 70 }, { "epoch": 0.0065893271461716935, "grad_norm": 373.185791015625, "learning_rate": 1.2987012987012986e-06, "loss": 32.8635, "step": 71 }, { "epoch": 0.0066821345707656615, "grad_norm": 73.05149841308594, "learning_rate": 1.3172541743970315e-06, "loss": 36.3774, "step": 72 }, { "epoch": 0.0067749419953596285, "grad_norm": 70.08222198486328, "learning_rate": 1.3358070500927644e-06, "loss": 32.3371, "step": 73 }, { "epoch": 0.0068677494199535965, "grad_norm": 66.96409606933594, "learning_rate": 1.3543599257884974e-06, "loss": 34.1134, "step": 74 }, { "epoch": 0.0069605568445475635, "grad_norm": 146.2729949951172, "learning_rate": 1.3729128014842303e-06, "loss": 33.4873, "step": 75 }, { "epoch": 0.0070533642691415314, "grad_norm": 269.388427734375, "learning_rate": 1.3914656771799632e-06, "loss": 33.8864, "step": 76 }, { "epoch": 0.0071461716937354985, "grad_norm": 265.2674560546875, "learning_rate": 1.4100185528756957e-06, "loss": 31.536, "step": 77 }, { "epoch": 0.0072389791183294664, "grad_norm": 160.02296447753906, "learning_rate": 1.4285714285714286e-06, "loss": 32.8868, "step": 78 }, { "epoch": 0.0073317865429234335, "grad_norm": 122.47138977050781, "learning_rate": 1.4471243042671615e-06, "loss": 32.6768, "step": 79 }, { "epoch": 0.007424593967517401, "grad_norm": 95.20236206054688, "learning_rate": 1.4656771799628945e-06, "loss": 31.9629, "step": 80 }, { "epoch": 0.0075174013921113685, "grad_norm": 95.63384246826172, "learning_rate": 1.4842300556586272e-06, "loss": 33.9723, "step": 81 }, { "epoch": 0.007610208816705336, "grad_norm": 74.59329223632812, "learning_rate": 1.50278293135436e-06, "loss": 30.8744, "step": 82 }, { "epoch": 0.007703016241299304, "grad_norm": 72.58363342285156, "learning_rate": 1.5213358070500928e-06, "loss": 33.7405, "step": 83 }, { "epoch": 0.007795823665893271, "grad_norm": 190.61795043945312, "learning_rate": 1.5398886827458257e-06, "loss": 33.3369, "step": 84 }, { "epoch": 0.00788863109048724, "grad_norm": 181.49847412109375, "learning_rate": 1.5584415584415584e-06, "loss": 33.4845, "step": 85 }, { "epoch": 0.007981438515081207, "grad_norm": 82.15458679199219, "learning_rate": 1.5769944341372914e-06, "loss": 34.9714, "step": 86 }, { "epoch": 0.008074245939675173, "grad_norm": 69.4637451171875, "learning_rate": 1.5955473098330243e-06, "loss": 32.1142, "step": 87 }, { "epoch": 0.008167053364269141, "grad_norm": 69.38495635986328, "learning_rate": 1.6141001855287572e-06, "loss": 30.2396, "step": 88 }, { "epoch": 0.00825986078886311, "grad_norm": 68.47895812988281, "learning_rate": 1.6326530612244897e-06, "loss": 31.5425, "step": 89 }, { "epoch": 0.008352668213457077, "grad_norm": 71.27876281738281, "learning_rate": 1.6512059369202226e-06, "loss": 33.8468, "step": 90 }, { "epoch": 0.008445475638051043, "grad_norm": 61.44715118408203, "learning_rate": 1.6697588126159556e-06, "loss": 32.2132, "step": 91 }, { "epoch": 0.008538283062645011, "grad_norm": 66.36126708984375, "learning_rate": 1.6883116883116885e-06, "loss": 30.25, "step": 92 }, { "epoch": 0.00863109048723898, "grad_norm": 62.65122604370117, "learning_rate": 1.7068645640074214e-06, "loss": 30.9014, "step": 93 }, { "epoch": 0.008723897911832947, "grad_norm": 69.48603057861328, "learning_rate": 1.7254174397031543e-06, "loss": 33.1191, "step": 94 }, { "epoch": 0.008816705336426913, "grad_norm": 59.694610595703125, "learning_rate": 1.7439703153988868e-06, "loss": 30.5837, "step": 95 }, { "epoch": 0.008909512761020881, "grad_norm": 65.10872650146484, "learning_rate": 1.7625231910946197e-06, "loss": 32.2251, "step": 96 }, { "epoch": 0.00900232018561485, "grad_norm": 63.26470184326172, "learning_rate": 1.7810760667903527e-06, "loss": 31.494, "step": 97 }, { "epoch": 0.009095127610208817, "grad_norm": 61.61127471923828, "learning_rate": 1.7996289424860856e-06, "loss": 31.3901, "step": 98 }, { "epoch": 0.009187935034802783, "grad_norm": 59.60426712036133, "learning_rate": 1.8181818181818183e-06, "loss": 31.1043, "step": 99 }, { "epoch": 0.009280742459396751, "grad_norm": 63.2060661315918, "learning_rate": 1.8367346938775512e-06, "loss": 30.0112, "step": 100 }, { "epoch": 0.00937354988399072, "grad_norm": 58.36423110961914, "learning_rate": 1.855287569573284e-06, "loss": 29.8153, "step": 101 }, { "epoch": 0.009466357308584687, "grad_norm": 54.467464447021484, "learning_rate": 1.8738404452690168e-06, "loss": 29.8675, "step": 102 }, { "epoch": 0.009559164733178655, "grad_norm": 61.57320022583008, "learning_rate": 1.8923933209647496e-06, "loss": 31.767, "step": 103 }, { "epoch": 0.009651972157772621, "grad_norm": 62.937191009521484, "learning_rate": 1.9109461966604825e-06, "loss": 29.9793, "step": 104 }, { "epoch": 0.00974477958236659, "grad_norm": 60.43418884277344, "learning_rate": 1.9294990723562154e-06, "loss": 28.6642, "step": 105 }, { "epoch": 0.009837587006960557, "grad_norm": 56.097469329833984, "learning_rate": 1.9480519480519483e-06, "loss": 29.1074, "step": 106 }, { "epoch": 0.009930394431554525, "grad_norm": 56.55039596557617, "learning_rate": 1.966604823747681e-06, "loss": 33.2828, "step": 107 }, { "epoch": 0.010023201856148491, "grad_norm": 61.02751922607422, "learning_rate": 1.9851576994434137e-06, "loss": 30.5471, "step": 108 }, { "epoch": 0.01011600928074246, "grad_norm": 58.944278717041016, "learning_rate": 2.0037105751391467e-06, "loss": 30.5836, "step": 109 }, { "epoch": 0.010208816705336427, "grad_norm": 57.77625274658203, "learning_rate": 2.0222634508348796e-06, "loss": 28.7352, "step": 110 }, { "epoch": 0.010301624129930395, "grad_norm": 57.82255554199219, "learning_rate": 2.0408163265306125e-06, "loss": 30.6167, "step": 111 }, { "epoch": 0.010394431554524361, "grad_norm": 52.43824768066406, "learning_rate": 2.0593692022263454e-06, "loss": 28.9285, "step": 112 }, { "epoch": 0.01048723897911833, "grad_norm": 59.02371597290039, "learning_rate": 2.0779220779220784e-06, "loss": 29.7566, "step": 113 }, { "epoch": 0.010580046403712297, "grad_norm": 56.889801025390625, "learning_rate": 2.096474953617811e-06, "loss": 27.7012, "step": 114 }, { "epoch": 0.010672853828306265, "grad_norm": 71.17630004882812, "learning_rate": 2.1150278293135438e-06, "loss": 29.5131, "step": 115 }, { "epoch": 0.010765661252900231, "grad_norm": 60.73147964477539, "learning_rate": 2.1335807050092767e-06, "loss": 30.1924, "step": 116 }, { "epoch": 0.0108584686774942, "grad_norm": 55.37841033935547, "learning_rate": 2.1521335807050096e-06, "loss": 28.9625, "step": 117 }, { "epoch": 0.010951276102088167, "grad_norm": 55.37611389160156, "learning_rate": 2.1706864564007425e-06, "loss": 29.7716, "step": 118 }, { "epoch": 0.011044083526682135, "grad_norm": 53.34806823730469, "learning_rate": 2.1892393320964755e-06, "loss": 29.6299, "step": 119 }, { "epoch": 0.011136890951276101, "grad_norm": 65.14989471435547, "learning_rate": 2.207792207792208e-06, "loss": 31.2718, "step": 120 }, { "epoch": 0.01122969837587007, "grad_norm": 56.000919342041016, "learning_rate": 2.226345083487941e-06, "loss": 29.4096, "step": 121 }, { "epoch": 0.011322505800464037, "grad_norm": 58.46607208251953, "learning_rate": 2.244897959183674e-06, "loss": 29.8037, "step": 122 }, { "epoch": 0.011415313225058005, "grad_norm": 57.96938705444336, "learning_rate": 2.2634508348794067e-06, "loss": 31.7701, "step": 123 }, { "epoch": 0.011508120649651973, "grad_norm": 61.52134704589844, "learning_rate": 2.2820037105751392e-06, "loss": 27.6983, "step": 124 }, { "epoch": 0.01160092807424594, "grad_norm": 62.390995025634766, "learning_rate": 2.300556586270872e-06, "loss": 30.0455, "step": 125 }, { "epoch": 0.011693735498839907, "grad_norm": 91.0291748046875, "learning_rate": 2.319109461966605e-06, "loss": 29.4087, "step": 126 }, { "epoch": 0.011786542923433875, "grad_norm": 56.05697250366211, "learning_rate": 2.337662337662338e-06, "loss": 28.9118, "step": 127 }, { "epoch": 0.011879350348027843, "grad_norm": 53.79587173461914, "learning_rate": 2.3562152133580705e-06, "loss": 29.3922, "step": 128 }, { "epoch": 0.01197215777262181, "grad_norm": 62.42170333862305, "learning_rate": 2.3747680890538034e-06, "loss": 28.7264, "step": 129 }, { "epoch": 0.012064965197215777, "grad_norm": 59.771419525146484, "learning_rate": 2.3933209647495363e-06, "loss": 28.525, "step": 130 }, { "epoch": 0.012157772621809745, "grad_norm": 54.2863883972168, "learning_rate": 2.4118738404452692e-06, "loss": 29.9056, "step": 131 }, { "epoch": 0.012250580046403713, "grad_norm": 456.6405029296875, "learning_rate": 2.4304267161410017e-06, "loss": 31.9358, "step": 132 }, { "epoch": 0.01234338747099768, "grad_norm": 62.8718376159668, "learning_rate": 2.4489795918367347e-06, "loss": 29.8453, "step": 133 }, { "epoch": 0.012436194895591647, "grad_norm": 56.04036331176758, "learning_rate": 2.4675324675324676e-06, "loss": 28.9658, "step": 134 }, { "epoch": 0.012529002320185615, "grad_norm": 55.99494934082031, "learning_rate": 2.4860853432282005e-06, "loss": 28.0417, "step": 135 }, { "epoch": 0.012621809744779583, "grad_norm": 57.45153045654297, "learning_rate": 2.504638218923933e-06, "loss": 29.2479, "step": 136 }, { "epoch": 0.01271461716937355, "grad_norm": 60.50507736206055, "learning_rate": 2.5231910946196664e-06, "loss": 27.3555, "step": 137 }, { "epoch": 0.012807424593967517, "grad_norm": 64.749267578125, "learning_rate": 2.541743970315399e-06, "loss": 28.8469, "step": 138 }, { "epoch": 0.012900232018561485, "grad_norm": 52.18448257446289, "learning_rate": 2.560296846011132e-06, "loss": 29.7103, "step": 139 }, { "epoch": 0.012993039443155453, "grad_norm": 290.1729736328125, "learning_rate": 2.5788497217068647e-06, "loss": 28.371, "step": 140 }, { "epoch": 0.013085846867749419, "grad_norm": 58.31568145751953, "learning_rate": 2.597402597402597e-06, "loss": 30.3254, "step": 141 }, { "epoch": 0.013178654292343387, "grad_norm": 53.90734100341797, "learning_rate": 2.6159554730983305e-06, "loss": 28.3322, "step": 142 }, { "epoch": 0.013271461716937355, "grad_norm": 61.90281295776367, "learning_rate": 2.634508348794063e-06, "loss": 30.0234, "step": 143 }, { "epoch": 0.013364269141531323, "grad_norm": 62.3014030456543, "learning_rate": 2.6530612244897964e-06, "loss": 28.2165, "step": 144 }, { "epoch": 0.01345707656612529, "grad_norm": 67.55115509033203, "learning_rate": 2.671614100185529e-06, "loss": 29.8403, "step": 145 }, { "epoch": 0.013549883990719257, "grad_norm": 55.992774963378906, "learning_rate": 2.6901669758812622e-06, "loss": 28.4062, "step": 146 }, { "epoch": 0.013642691415313225, "grad_norm": 55.484195709228516, "learning_rate": 2.7087198515769947e-06, "loss": 29.0397, "step": 147 }, { "epoch": 0.013735498839907193, "grad_norm": 60.11124801635742, "learning_rate": 2.7272727272727272e-06, "loss": 27.4278, "step": 148 }, { "epoch": 0.01382830626450116, "grad_norm": 52.61357879638672, "learning_rate": 2.7458256029684606e-06, "loss": 27.6142, "step": 149 }, { "epoch": 0.013921113689095127, "grad_norm": 55.787513732910156, "learning_rate": 2.764378478664193e-06, "loss": 28.6338, "step": 150 }, { "epoch": 0.014013921113689095, "grad_norm": 53.99620819091797, "learning_rate": 2.7829313543599264e-06, "loss": 29.6832, "step": 151 }, { "epoch": 0.014106728538283063, "grad_norm": 55.838897705078125, "learning_rate": 2.801484230055659e-06, "loss": 28.9219, "step": 152 }, { "epoch": 0.01419953596287703, "grad_norm": 61.866085052490234, "learning_rate": 2.8200371057513914e-06, "loss": 28.7714, "step": 153 }, { "epoch": 0.014292343387470997, "grad_norm": 51.42800521850586, "learning_rate": 2.8385899814471248e-06, "loss": 28.7882, "step": 154 }, { "epoch": 0.014385150812064965, "grad_norm": 56.980674743652344, "learning_rate": 2.8571428571428573e-06, "loss": 28.24, "step": 155 }, { "epoch": 0.014477958236658933, "grad_norm": 69.33241271972656, "learning_rate": 2.87569573283859e-06, "loss": 29.0673, "step": 156 }, { "epoch": 0.0145707656612529, "grad_norm": 62.86964797973633, "learning_rate": 2.894248608534323e-06, "loss": 28.6443, "step": 157 }, { "epoch": 0.014663573085846867, "grad_norm": 61.55562973022461, "learning_rate": 2.912801484230056e-06, "loss": 29.0175, "step": 158 }, { "epoch": 0.014756380510440835, "grad_norm": 57.21918487548828, "learning_rate": 2.931354359925789e-06, "loss": 29.5216, "step": 159 }, { "epoch": 0.014849187935034803, "grad_norm": 58.1152458190918, "learning_rate": 2.9499072356215214e-06, "loss": 28.395, "step": 160 }, { "epoch": 0.01494199535962877, "grad_norm": 52.813175201416016, "learning_rate": 2.9684601113172544e-06, "loss": 29.3926, "step": 161 }, { "epoch": 0.015034802784222737, "grad_norm": 57.251949310302734, "learning_rate": 2.9870129870129873e-06, "loss": 27.9974, "step": 162 }, { "epoch": 0.015127610208816705, "grad_norm": 58.2590217590332, "learning_rate": 3.00556586270872e-06, "loss": 28.5013, "step": 163 }, { "epoch": 0.015220417633410673, "grad_norm": 57.19724655151367, "learning_rate": 3.0241187384044527e-06, "loss": 27.6963, "step": 164 }, { "epoch": 0.01531322505800464, "grad_norm": 53.96321105957031, "learning_rate": 3.0426716141001856e-06, "loss": 27.1686, "step": 165 }, { "epoch": 0.015406032482598609, "grad_norm": 54.750125885009766, "learning_rate": 3.0612244897959185e-06, "loss": 28.4719, "step": 166 }, { "epoch": 0.015498839907192575, "grad_norm": 54.548194885253906, "learning_rate": 3.0797773654916515e-06, "loss": 29.4169, "step": 167 }, { "epoch": 0.015591647331786543, "grad_norm": 55.488868713378906, "learning_rate": 3.0983302411873844e-06, "loss": 30.59, "step": 168 }, { "epoch": 0.01568445475638051, "grad_norm": 55.988521575927734, "learning_rate": 3.116883116883117e-06, "loss": 30.2471, "step": 169 }, { "epoch": 0.01577726218097448, "grad_norm": 54.42203903198242, "learning_rate": 3.1354359925788502e-06, "loss": 26.3347, "step": 170 }, { "epoch": 0.015870069605568447, "grad_norm": 53.37290954589844, "learning_rate": 3.1539888682745827e-06, "loss": 27.6981, "step": 171 }, { "epoch": 0.015962877030162415, "grad_norm": 52.19496154785156, "learning_rate": 3.1725417439703157e-06, "loss": 27.9129, "step": 172 }, { "epoch": 0.01605568445475638, "grad_norm": 53.45736312866211, "learning_rate": 3.1910946196660486e-06, "loss": 28.4953, "step": 173 }, { "epoch": 0.016148491879350347, "grad_norm": 63.441585540771484, "learning_rate": 3.209647495361781e-06, "loss": 29.404, "step": 174 }, { "epoch": 0.016241299303944315, "grad_norm": 56.118621826171875, "learning_rate": 3.2282003710575144e-06, "loss": 27.1062, "step": 175 }, { "epoch": 0.016334106728538283, "grad_norm": 54.85963821411133, "learning_rate": 3.246753246753247e-06, "loss": 26.0595, "step": 176 }, { "epoch": 0.01642691415313225, "grad_norm": 53.04201889038086, "learning_rate": 3.2653061224489794e-06, "loss": 28.6282, "step": 177 }, { "epoch": 0.01651972157772622, "grad_norm": 52.66358947753906, "learning_rate": 3.2838589981447128e-06, "loss": 28.7986, "step": 178 }, { "epoch": 0.016612529002320187, "grad_norm": 54.00156021118164, "learning_rate": 3.3024118738404453e-06, "loss": 28.897, "step": 179 }, { "epoch": 0.016705336426914155, "grad_norm": 56.831085205078125, "learning_rate": 3.3209647495361786e-06, "loss": 28.6863, "step": 180 }, { "epoch": 0.01679814385150812, "grad_norm": 57.11240768432617, "learning_rate": 3.339517625231911e-06, "loss": 31.4569, "step": 181 }, { "epoch": 0.016890951276102087, "grad_norm": 54.645259857177734, "learning_rate": 3.3580705009276445e-06, "loss": 29.6329, "step": 182 }, { "epoch": 0.016983758700696055, "grad_norm": 53.90031433105469, "learning_rate": 3.376623376623377e-06, "loss": 27.5959, "step": 183 }, { "epoch": 0.017076566125290023, "grad_norm": 54.2324333190918, "learning_rate": 3.3951762523191094e-06, "loss": 29.7916, "step": 184 }, { "epoch": 0.01716937354988399, "grad_norm": 52.989173889160156, "learning_rate": 3.413729128014843e-06, "loss": 28.0535, "step": 185 }, { "epoch": 0.01726218097447796, "grad_norm": 52.332855224609375, "learning_rate": 3.4322820037105753e-06, "loss": 28.4643, "step": 186 }, { "epoch": 0.017354988399071927, "grad_norm": 50.908573150634766, "learning_rate": 3.4508348794063086e-06, "loss": 26.8717, "step": 187 }, { "epoch": 0.017447795823665894, "grad_norm": 55.201663970947266, "learning_rate": 3.469387755102041e-06, "loss": 27.528, "step": 188 }, { "epoch": 0.017540603248259862, "grad_norm": 59.525550842285156, "learning_rate": 3.4879406307977736e-06, "loss": 27.649, "step": 189 }, { "epoch": 0.017633410672853827, "grad_norm": 58.30669021606445, "learning_rate": 3.506493506493507e-06, "loss": 28.5133, "step": 190 }, { "epoch": 0.017726218097447795, "grad_norm": 122.09718322753906, "learning_rate": 3.5250463821892395e-06, "loss": 27.2976, "step": 191 }, { "epoch": 0.017819025522041763, "grad_norm": 55.53460693359375, "learning_rate": 3.5435992578849724e-06, "loss": 26.1268, "step": 192 }, { "epoch": 0.01791183294663573, "grad_norm": 54.55265426635742, "learning_rate": 3.5621521335807053e-06, "loss": 28.0341, "step": 193 }, { "epoch": 0.0180046403712297, "grad_norm": 53.15818786621094, "learning_rate": 3.5807050092764382e-06, "loss": 27.4786, "step": 194 }, { "epoch": 0.018097447795823667, "grad_norm": 57.55740737915039, "learning_rate": 3.599257884972171e-06, "loss": 29.9299, "step": 195 }, { "epoch": 0.018190255220417634, "grad_norm": 53.33743667602539, "learning_rate": 3.6178107606679037e-06, "loss": 28.3839, "step": 196 }, { "epoch": 0.018283062645011602, "grad_norm": 55.26750946044922, "learning_rate": 3.6363636363636366e-06, "loss": 27.2745, "step": 197 }, { "epoch": 0.018375870069605567, "grad_norm": 48.90969467163086, "learning_rate": 3.6549165120593695e-06, "loss": 26.0849, "step": 198 }, { "epoch": 0.018468677494199535, "grad_norm": 52.15176010131836, "learning_rate": 3.6734693877551024e-06, "loss": 27.9256, "step": 199 }, { "epoch": 0.018561484918793503, "grad_norm": 51.80679702758789, "learning_rate": 3.6920222634508353e-06, "loss": 27.289, "step": 200 }, { "epoch": 0.01865429234338747, "grad_norm": 49.59486389160156, "learning_rate": 3.710575139146568e-06, "loss": 24.886, "step": 201 }, { "epoch": 0.01874709976798144, "grad_norm": 52.34904479980469, "learning_rate": 3.7291280148423008e-06, "loss": 26.615, "step": 202 }, { "epoch": 0.018839907192575406, "grad_norm": 49.76240921020508, "learning_rate": 3.7476808905380337e-06, "loss": 27.7225, "step": 203 }, { "epoch": 0.018932714617169374, "grad_norm": 51.4605827331543, "learning_rate": 3.7662337662337666e-06, "loss": 27.9517, "step": 204 }, { "epoch": 0.019025522041763342, "grad_norm": 50.297698974609375, "learning_rate": 3.784786641929499e-06, "loss": 26.869, "step": 205 }, { "epoch": 0.01911832946635731, "grad_norm": 51.275779724121094, "learning_rate": 3.8033395176252325e-06, "loss": 24.754, "step": 206 }, { "epoch": 0.019211136890951275, "grad_norm": 54.539825439453125, "learning_rate": 3.821892393320965e-06, "loss": 27.5644, "step": 207 }, { "epoch": 0.019303944315545243, "grad_norm": 54.596988677978516, "learning_rate": 3.840445269016698e-06, "loss": 28.0842, "step": 208 }, { "epoch": 0.01939675174013921, "grad_norm": 58.2098274230957, "learning_rate": 3.858998144712431e-06, "loss": 27.8703, "step": 209 }, { "epoch": 0.01948955916473318, "grad_norm": 57.086212158203125, "learning_rate": 3.877551020408164e-06, "loss": 29.5128, "step": 210 }, { "epoch": 0.019582366589327146, "grad_norm": 54.715572357177734, "learning_rate": 3.896103896103897e-06, "loss": 26.0486, "step": 211 }, { "epoch": 0.019675174013921114, "grad_norm": 49.89113235473633, "learning_rate": 3.9146567717996296e-06, "loss": 27.8903, "step": 212 }, { "epoch": 0.019767981438515082, "grad_norm": 51.31144714355469, "learning_rate": 3.933209647495362e-06, "loss": 28.4269, "step": 213 }, { "epoch": 0.01986078886310905, "grad_norm": 54.24203109741211, "learning_rate": 3.951762523191095e-06, "loss": 25.6786, "step": 214 }, { "epoch": 0.019953596287703015, "grad_norm": 51.84783935546875, "learning_rate": 3.9703153988868275e-06, "loss": 27.6605, "step": 215 }, { "epoch": 0.020046403712296983, "grad_norm": 54.56235885620117, "learning_rate": 3.98886827458256e-06, "loss": 27.4142, "step": 216 }, { "epoch": 0.02013921113689095, "grad_norm": 55.77119445800781, "learning_rate": 4.007421150278293e-06, "loss": 27.5779, "step": 217 }, { "epoch": 0.02023201856148492, "grad_norm": 57.556732177734375, "learning_rate": 4.025974025974026e-06, "loss": 27.9419, "step": 218 }, { "epoch": 0.020324825986078886, "grad_norm": 53.508018493652344, "learning_rate": 4.044526901669759e-06, "loss": 28.8097, "step": 219 }, { "epoch": 0.020417633410672854, "grad_norm": 53.55928039550781, "learning_rate": 4.063079777365492e-06, "loss": 28.1299, "step": 220 }, { "epoch": 0.020510440835266822, "grad_norm": 55.43370819091797, "learning_rate": 4.081632653061225e-06, "loss": 28.0137, "step": 221 }, { "epoch": 0.02060324825986079, "grad_norm": 50.24539566040039, "learning_rate": 4.100185528756958e-06, "loss": 26.7876, "step": 222 }, { "epoch": 0.020696055684454755, "grad_norm": 51.19827651977539, "learning_rate": 4.118738404452691e-06, "loss": 27.4827, "step": 223 }, { "epoch": 0.020788863109048723, "grad_norm": 51.00681686401367, "learning_rate": 4.137291280148423e-06, "loss": 26.492, "step": 224 }, { "epoch": 0.02088167053364269, "grad_norm": 52.24995040893555, "learning_rate": 4.155844155844157e-06, "loss": 28.2303, "step": 225 }, { "epoch": 0.02097447795823666, "grad_norm": 53.11408233642578, "learning_rate": 4.174397031539889e-06, "loss": 27.7775, "step": 226 }, { "epoch": 0.021067285382830626, "grad_norm": 53.25648498535156, "learning_rate": 4.192949907235622e-06, "loss": 26.3341, "step": 227 }, { "epoch": 0.021160092807424594, "grad_norm": 51.471641540527344, "learning_rate": 4.211502782931355e-06, "loss": 27.8404, "step": 228 }, { "epoch": 0.021252900232018562, "grad_norm": 67.51049041748047, "learning_rate": 4.2300556586270875e-06, "loss": 28.0328, "step": 229 }, { "epoch": 0.02134570765661253, "grad_norm": 54.25205993652344, "learning_rate": 4.2486085343228205e-06, "loss": 28.4024, "step": 230 }, { "epoch": 0.021438515081206498, "grad_norm": 51.91297149658203, "learning_rate": 4.267161410018553e-06, "loss": 26.607, "step": 231 }, { "epoch": 0.021531322505800463, "grad_norm": 55.219337463378906, "learning_rate": 4.2857142857142855e-06, "loss": 26.7939, "step": 232 }, { "epoch": 0.02162412993039443, "grad_norm": 50.91411590576172, "learning_rate": 4.304267161410019e-06, "loss": 26.5443, "step": 233 }, { "epoch": 0.0217169373549884, "grad_norm": 51.76503372192383, "learning_rate": 4.322820037105751e-06, "loss": 30.9872, "step": 234 }, { "epoch": 0.021809744779582366, "grad_norm": 58.73655700683594, "learning_rate": 4.341372912801485e-06, "loss": 26.8784, "step": 235 }, { "epoch": 0.021902552204176334, "grad_norm": 52.804534912109375, "learning_rate": 4.359925788497217e-06, "loss": 27.538, "step": 236 }, { "epoch": 0.021995359628770302, "grad_norm": 54.159664154052734, "learning_rate": 4.378478664192951e-06, "loss": 30.2363, "step": 237 }, { "epoch": 0.02208816705336427, "grad_norm": 50.635826110839844, "learning_rate": 4.397031539888683e-06, "loss": 27.7107, "step": 238 }, { "epoch": 0.022180974477958238, "grad_norm": 51.11122512817383, "learning_rate": 4.415584415584416e-06, "loss": 27.8649, "step": 239 }, { "epoch": 0.022273781902552203, "grad_norm": 49.213157653808594, "learning_rate": 4.434137291280149e-06, "loss": 27.705, "step": 240 }, { "epoch": 0.02236658932714617, "grad_norm": 50.80289840698242, "learning_rate": 4.452690166975882e-06, "loss": 29.4419, "step": 241 }, { "epoch": 0.02245939675174014, "grad_norm": 56.02285385131836, "learning_rate": 4.471243042671615e-06, "loss": 28.6365, "step": 242 }, { "epoch": 0.022552204176334106, "grad_norm": 51.4517936706543, "learning_rate": 4.489795918367348e-06, "loss": 27.4955, "step": 243 }, { "epoch": 0.022645011600928074, "grad_norm": 55.71883010864258, "learning_rate": 4.50834879406308e-06, "loss": 29.1689, "step": 244 }, { "epoch": 0.022737819025522042, "grad_norm": 53.264400482177734, "learning_rate": 4.5269016697588134e-06, "loss": 28.4316, "step": 245 }, { "epoch": 0.02283062645011601, "grad_norm": 48.63359069824219, "learning_rate": 4.5454545454545455e-06, "loss": 25.6622, "step": 246 }, { "epoch": 0.022923433874709978, "grad_norm": 52.30775833129883, "learning_rate": 4.5640074211502784e-06, "loss": 27.154, "step": 247 }, { "epoch": 0.023016241299303946, "grad_norm": 55.2840690612793, "learning_rate": 4.582560296846011e-06, "loss": 26.2999, "step": 248 }, { "epoch": 0.02310904872389791, "grad_norm": 50.781070709228516, "learning_rate": 4.601113172541744e-06, "loss": 28.1167, "step": 249 }, { "epoch": 0.02320185614849188, "grad_norm": 51.29035568237305, "learning_rate": 4.619666048237477e-06, "loss": 26.6631, "step": 250 }, { "epoch": 0.023294663573085846, "grad_norm": 52.405338287353516, "learning_rate": 4.63821892393321e-06, "loss": 27.9232, "step": 251 }, { "epoch": 0.023387470997679814, "grad_norm": 47.8524284362793, "learning_rate": 4.656771799628943e-06, "loss": 26.3231, "step": 252 }, { "epoch": 0.023480278422273782, "grad_norm": 277.9980163574219, "learning_rate": 4.675324675324676e-06, "loss": 28.5533, "step": 253 }, { "epoch": 0.02357308584686775, "grad_norm": 55.864871978759766, "learning_rate": 4.693877551020409e-06, "loss": 28.4372, "step": 254 }, { "epoch": 0.023665893271461718, "grad_norm": 52.69704818725586, "learning_rate": 4.712430426716141e-06, "loss": 27.0162, "step": 255 }, { "epoch": 0.023758700696055686, "grad_norm": 56.423011779785156, "learning_rate": 4.730983302411874e-06, "loss": 26.515, "step": 256 }, { "epoch": 0.02385150812064965, "grad_norm": 54.48466110229492, "learning_rate": 4.749536178107607e-06, "loss": 30.335, "step": 257 }, { "epoch": 0.02394431554524362, "grad_norm": 51.23316192626953, "learning_rate": 4.76808905380334e-06, "loss": 27.5252, "step": 258 }, { "epoch": 0.024037122969837586, "grad_norm": 53.84898376464844, "learning_rate": 4.786641929499073e-06, "loss": 26.5129, "step": 259 }, { "epoch": 0.024129930394431554, "grad_norm": 53.98963165283203, "learning_rate": 4.805194805194806e-06, "loss": 28.3858, "step": 260 }, { "epoch": 0.024222737819025522, "grad_norm": 53.3544921875, "learning_rate": 4.8237476808905385e-06, "loss": 27.4958, "step": 261 }, { "epoch": 0.02431554524361949, "grad_norm": 52.88591766357422, "learning_rate": 4.842300556586271e-06, "loss": 28.0833, "step": 262 }, { "epoch": 0.024408352668213458, "grad_norm": 54.54233932495117, "learning_rate": 4.8608534322820035e-06, "loss": 28.1362, "step": 263 }, { "epoch": 0.024501160092807426, "grad_norm": 50.03864288330078, "learning_rate": 4.879406307977737e-06, "loss": 26.9176, "step": 264 }, { "epoch": 0.02459396751740139, "grad_norm": 57.386207580566406, "learning_rate": 4.897959183673469e-06, "loss": 27.2819, "step": 265 }, { "epoch": 0.02468677494199536, "grad_norm": 53.37666702270508, "learning_rate": 4.916512059369203e-06, "loss": 29.1785, "step": 266 }, { "epoch": 0.024779582366589326, "grad_norm": 58.190093994140625, "learning_rate": 4.935064935064935e-06, "loss": 26.3819, "step": 267 }, { "epoch": 0.024872389791183294, "grad_norm": 53.7491569519043, "learning_rate": 4.953617810760668e-06, "loss": 27.5808, "step": 268 }, { "epoch": 0.024965197215777262, "grad_norm": 52.47393035888672, "learning_rate": 4.972170686456401e-06, "loss": 27.6891, "step": 269 }, { "epoch": 0.02505800464037123, "grad_norm": 55.51715850830078, "learning_rate": 4.990723562152134e-06, "loss": 28.0957, "step": 270 }, { "epoch": 0.025150812064965198, "grad_norm": 51.86997985839844, "learning_rate": 5.009276437847866e-06, "loss": 27.8961, "step": 271 }, { "epoch": 0.025243619489559166, "grad_norm": 64.60863494873047, "learning_rate": 5.0278293135436e-06, "loss": 28.0462, "step": 272 }, { "epoch": 0.025336426914153134, "grad_norm": 50.86996078491211, "learning_rate": 5.046382189239333e-06, "loss": 26.253, "step": 273 }, { "epoch": 0.0254292343387471, "grad_norm": 52.267921447753906, "learning_rate": 5.064935064935065e-06, "loss": 27.2006, "step": 274 }, { "epoch": 0.025522041763341066, "grad_norm": 48.6536865234375, "learning_rate": 5.083487940630798e-06, "loss": 26.9756, "step": 275 }, { "epoch": 0.025614849187935034, "grad_norm": 49.541072845458984, "learning_rate": 5.1020408163265315e-06, "loss": 25.7407, "step": 276 }, { "epoch": 0.025707656612529002, "grad_norm": 56.40632629394531, "learning_rate": 5.120593692022264e-06, "loss": 27.3888, "step": 277 }, { "epoch": 0.02580046403712297, "grad_norm": 51.73103713989258, "learning_rate": 5.1391465677179965e-06, "loss": 26.644, "step": 278 }, { "epoch": 0.025893271461716938, "grad_norm": 52.18782043457031, "learning_rate": 5.157699443413729e-06, "loss": 27.1118, "step": 279 }, { "epoch": 0.025986078886310906, "grad_norm": 50.54853439331055, "learning_rate": 5.176252319109462e-06, "loss": 29.0709, "step": 280 }, { "epoch": 0.026078886310904874, "grad_norm": 63.27244567871094, "learning_rate": 5.194805194805194e-06, "loss": 27.668, "step": 281 }, { "epoch": 0.026171693735498838, "grad_norm": 49.60090637207031, "learning_rate": 5.213358070500928e-06, "loss": 27.5111, "step": 282 }, { "epoch": 0.026264501160092806, "grad_norm": 48.4888916015625, "learning_rate": 5.231910946196661e-06, "loss": 27.0843, "step": 283 }, { "epoch": 0.026357308584686774, "grad_norm": 50.872615814208984, "learning_rate": 5.250463821892394e-06, "loss": 26.602, "step": 284 }, { "epoch": 0.026450116009280742, "grad_norm": 52.74732208251953, "learning_rate": 5.269016697588126e-06, "loss": 29.0207, "step": 285 }, { "epoch": 0.02654292343387471, "grad_norm": 52.135101318359375, "learning_rate": 5.28756957328386e-06, "loss": 28.0831, "step": 286 }, { "epoch": 0.026635730858468678, "grad_norm": 57.819007873535156, "learning_rate": 5.306122448979593e-06, "loss": 27.1626, "step": 287 }, { "epoch": 0.026728538283062646, "grad_norm": 58.53867721557617, "learning_rate": 5.324675324675325e-06, "loss": 27.8041, "step": 288 }, { "epoch": 0.026821345707656614, "grad_norm": 52.060237884521484, "learning_rate": 5.343228200371058e-06, "loss": 25.2097, "step": 289 }, { "epoch": 0.02691415313225058, "grad_norm": 53.356815338134766, "learning_rate": 5.361781076066791e-06, "loss": 27.8706, "step": 290 }, { "epoch": 0.027006960556844546, "grad_norm": 54.17503356933594, "learning_rate": 5.3803339517625245e-06, "loss": 24.8526, "step": 291 }, { "epoch": 0.027099767981438514, "grad_norm": 50.3400764465332, "learning_rate": 5.3988868274582565e-06, "loss": 28.2811, "step": 292 }, { "epoch": 0.027192575406032482, "grad_norm": 63.30010223388672, "learning_rate": 5.4174397031539895e-06, "loss": 25.743, "step": 293 }, { "epoch": 0.02728538283062645, "grad_norm": 52.181846618652344, "learning_rate": 5.435992578849722e-06, "loss": 29.0652, "step": 294 }, { "epoch": 0.027378190255220418, "grad_norm": 55.54167556762695, "learning_rate": 5.4545454545454545e-06, "loss": 28.2864, "step": 295 }, { "epoch": 0.027470997679814386, "grad_norm": 51.0430793762207, "learning_rate": 5.473098330241187e-06, "loss": 27.256, "step": 296 }, { "epoch": 0.027563805104408354, "grad_norm": 49.20680236816406, "learning_rate": 5.491651205936921e-06, "loss": 27.8178, "step": 297 }, { "epoch": 0.02765661252900232, "grad_norm": 49.88323974609375, "learning_rate": 5.510204081632653e-06, "loss": 24.9102, "step": 298 }, { "epoch": 0.027749419953596286, "grad_norm": 53.241790771484375, "learning_rate": 5.528756957328386e-06, "loss": 27.0374, "step": 299 }, { "epoch": 0.027842227378190254, "grad_norm": 54.18423843383789, "learning_rate": 5.547309833024119e-06, "loss": 27.83, "step": 300 }, { "epoch": 0.027935034802784222, "grad_norm": 51.444358825683594, "learning_rate": 5.565862708719853e-06, "loss": 27.5963, "step": 301 }, { "epoch": 0.02802784222737819, "grad_norm": 54.09018325805664, "learning_rate": 5.584415584415585e-06, "loss": 26.7956, "step": 302 }, { "epoch": 0.028120649651972158, "grad_norm": 54.372642517089844, "learning_rate": 5.602968460111318e-06, "loss": 27.8014, "step": 303 }, { "epoch": 0.028213457076566126, "grad_norm": 57.54226303100586, "learning_rate": 5.621521335807051e-06, "loss": 26.3317, "step": 304 }, { "epoch": 0.028306264501160094, "grad_norm": 56.269073486328125, "learning_rate": 5.640074211502783e-06, "loss": 29.0431, "step": 305 }, { "epoch": 0.02839907192575406, "grad_norm": 48.654354095458984, "learning_rate": 5.658627087198516e-06, "loss": 26.5831, "step": 306 }, { "epoch": 0.02849187935034803, "grad_norm": 51.729087829589844, "learning_rate": 5.6771799628942495e-06, "loss": 26.9915, "step": 307 }, { "epoch": 0.028584686774941994, "grad_norm": 58.44960021972656, "learning_rate": 5.6957328385899824e-06, "loss": 26.8603, "step": 308 }, { "epoch": 0.028677494199535962, "grad_norm": 50.83586502075195, "learning_rate": 5.7142857142857145e-06, "loss": 26.8445, "step": 309 }, { "epoch": 0.02877030162412993, "grad_norm": 52.530418395996094, "learning_rate": 5.7328385899814474e-06, "loss": 29.0631, "step": 310 }, { "epoch": 0.028863109048723898, "grad_norm": 53.841758728027344, "learning_rate": 5.75139146567718e-06, "loss": 25.2789, "step": 311 }, { "epoch": 0.028955916473317866, "grad_norm": 51.1548957824707, "learning_rate": 5.7699443413729124e-06, "loss": 29.0973, "step": 312 }, { "epoch": 0.029048723897911834, "grad_norm": 52.296634674072266, "learning_rate": 5.788497217068646e-06, "loss": 28.7208, "step": 313 }, { "epoch": 0.0291415313225058, "grad_norm": 50.34853744506836, "learning_rate": 5.807050092764379e-06, "loss": 26.9841, "step": 314 }, { "epoch": 0.02923433874709977, "grad_norm": 48.965240478515625, "learning_rate": 5.825602968460112e-06, "loss": 26.0561, "step": 315 }, { "epoch": 0.029327146171693734, "grad_norm": 51.088706970214844, "learning_rate": 5.844155844155844e-06, "loss": 27.9892, "step": 316 }, { "epoch": 0.029419953596287702, "grad_norm": 51.02614212036133, "learning_rate": 5.862708719851578e-06, "loss": 25.974, "step": 317 }, { "epoch": 0.02951276102088167, "grad_norm": 45.921974182128906, "learning_rate": 5.881261595547311e-06, "loss": 26.5348, "step": 318 }, { "epoch": 0.029605568445475638, "grad_norm": 51.04883575439453, "learning_rate": 5.899814471243043e-06, "loss": 25.6091, "step": 319 }, { "epoch": 0.029698375870069606, "grad_norm": 49.02399444580078, "learning_rate": 5.918367346938776e-06, "loss": 26.5758, "step": 320 }, { "epoch": 0.029791183294663574, "grad_norm": 51.922481536865234, "learning_rate": 5.936920222634509e-06, "loss": 27.1691, "step": 321 }, { "epoch": 0.02988399071925754, "grad_norm": 54.831600189208984, "learning_rate": 5.955473098330241e-06, "loss": 26.1898, "step": 322 }, { "epoch": 0.02997679814385151, "grad_norm": 52.72639465332031, "learning_rate": 5.9740259740259746e-06, "loss": 27.3543, "step": 323 }, { "epoch": 0.030069605568445474, "grad_norm": 58.8077392578125, "learning_rate": 5.9925788497217075e-06, "loss": 26.815, "step": 324 }, { "epoch": 0.030162412993039442, "grad_norm": 56.1048469543457, "learning_rate": 6.01113172541744e-06, "loss": 27.1234, "step": 325 }, { "epoch": 0.03025522041763341, "grad_norm": 54.992156982421875, "learning_rate": 6.0296846011131725e-06, "loss": 27.4876, "step": 326 }, { "epoch": 0.030348027842227378, "grad_norm": 65.4008560180664, "learning_rate": 6.048237476808905e-06, "loss": 27.2312, "step": 327 }, { "epoch": 0.030440835266821346, "grad_norm": 49.353816986083984, "learning_rate": 6.066790352504639e-06, "loss": 27.2794, "step": 328 }, { "epoch": 0.030533642691415314, "grad_norm": 52.177391052246094, "learning_rate": 6.085343228200371e-06, "loss": 26.4232, "step": 329 }, { "epoch": 0.03062645011600928, "grad_norm": 53.980445861816406, "learning_rate": 6.103896103896104e-06, "loss": 26.5706, "step": 330 }, { "epoch": 0.03071925754060325, "grad_norm": 53.397586822509766, "learning_rate": 6.122448979591837e-06, "loss": 28.8834, "step": 331 }, { "epoch": 0.030812064965197217, "grad_norm": 53.136661529541016, "learning_rate": 6.141001855287571e-06, "loss": 26.3971, "step": 332 }, { "epoch": 0.030904872389791182, "grad_norm": 47.88805389404297, "learning_rate": 6.159554730983303e-06, "loss": 26.1846, "step": 333 }, { "epoch": 0.03099767981438515, "grad_norm": 50.358245849609375, "learning_rate": 6.178107606679036e-06, "loss": 26.8525, "step": 334 }, { "epoch": 0.031090487238979118, "grad_norm": 53.328643798828125, "learning_rate": 6.196660482374769e-06, "loss": 27.8438, "step": 335 }, { "epoch": 0.031183294663573086, "grad_norm": 48.244686126708984, "learning_rate": 6.215213358070501e-06, "loss": 27.0382, "step": 336 }, { "epoch": 0.03127610208816705, "grad_norm": 46.03959655761719, "learning_rate": 6.233766233766234e-06, "loss": 26.4482, "step": 337 }, { "epoch": 0.03136890951276102, "grad_norm": 51.145896911621094, "learning_rate": 6.2523191094619676e-06, "loss": 26.3506, "step": 338 }, { "epoch": 0.031461716937354986, "grad_norm": 53.65150451660156, "learning_rate": 6.2708719851577005e-06, "loss": 27.69, "step": 339 }, { "epoch": 0.03155452436194896, "grad_norm": 51.543861389160156, "learning_rate": 6.2894248608534325e-06, "loss": 25.8923, "step": 340 }, { "epoch": 0.03164733178654292, "grad_norm": 46.0400390625, "learning_rate": 6.3079777365491655e-06, "loss": 26.1671, "step": 341 }, { "epoch": 0.03174013921113689, "grad_norm": 50.54642105102539, "learning_rate": 6.326530612244899e-06, "loss": 26.7421, "step": 342 }, { "epoch": 0.03183294663573086, "grad_norm": 55.91775131225586, "learning_rate": 6.345083487940631e-06, "loss": 26.1078, "step": 343 }, { "epoch": 0.03192575406032483, "grad_norm": 54.08245086669922, "learning_rate": 6.363636363636364e-06, "loss": 26.5411, "step": 344 }, { "epoch": 0.032018561484918794, "grad_norm": 52.22511291503906, "learning_rate": 6.382189239332097e-06, "loss": 26.366, "step": 345 }, { "epoch": 0.03211136890951276, "grad_norm": 723.1151733398438, "learning_rate": 6.40074211502783e-06, "loss": 29.4272, "step": 346 }, { "epoch": 0.03220417633410673, "grad_norm": 50.10744094848633, "learning_rate": 6.419294990723562e-06, "loss": 27.112, "step": 347 }, { "epoch": 0.032296983758700694, "grad_norm": 50.54053497314453, "learning_rate": 6.437847866419296e-06, "loss": 28.0612, "step": 348 }, { "epoch": 0.032389791183294665, "grad_norm": 60.53065872192383, "learning_rate": 6.456400742115029e-06, "loss": 26.2246, "step": 349 }, { "epoch": 0.03248259860788863, "grad_norm": 49.417179107666016, "learning_rate": 6.474953617810761e-06, "loss": 27.7559, "step": 350 }, { "epoch": 0.0325754060324826, "grad_norm": 50.622798919677734, "learning_rate": 6.493506493506494e-06, "loss": 27.264, "step": 351 }, { "epoch": 0.032668213457076566, "grad_norm": 51.83125686645508, "learning_rate": 6.512059369202227e-06, "loss": 28.1966, "step": 352 }, { "epoch": 0.03276102088167054, "grad_norm": 58.21205520629883, "learning_rate": 6.530612244897959e-06, "loss": 26.0623, "step": 353 }, { "epoch": 0.0328538283062645, "grad_norm": 50.269325256347656, "learning_rate": 6.549165120593693e-06, "loss": 26.8957, "step": 354 }, { "epoch": 0.032946635730858466, "grad_norm": 51.58884811401367, "learning_rate": 6.5677179962894255e-06, "loss": 27.4137, "step": 355 }, { "epoch": 0.03303944315545244, "grad_norm": 49.040443420410156, "learning_rate": 6.5862708719851584e-06, "loss": 28.3155, "step": 356 }, { "epoch": 0.0331322505800464, "grad_norm": 50.17155838012695, "learning_rate": 6.6048237476808905e-06, "loss": 25.8845, "step": 357 }, { "epoch": 0.03322505800464037, "grad_norm": 46.43949508666992, "learning_rate": 6.623376623376624e-06, "loss": 27.5963, "step": 358 }, { "epoch": 0.03331786542923434, "grad_norm": 56.99538803100586, "learning_rate": 6.641929499072357e-06, "loss": 27.7678, "step": 359 }, { "epoch": 0.03341067285382831, "grad_norm": 48.29073715209961, "learning_rate": 6.660482374768089e-06, "loss": 26.9754, "step": 360 }, { "epoch": 0.033503480278422273, "grad_norm": 46.70393371582031, "learning_rate": 6.679035250463822e-06, "loss": 27.6412, "step": 361 }, { "epoch": 0.03359628770301624, "grad_norm": 50.88888931274414, "learning_rate": 6.697588126159555e-06, "loss": 26.1569, "step": 362 }, { "epoch": 0.03368909512761021, "grad_norm": 52.59719467163086, "learning_rate": 6.716141001855289e-06, "loss": 26.166, "step": 363 }, { "epoch": 0.033781902552204174, "grad_norm": 53.67612838745117, "learning_rate": 6.734693877551021e-06, "loss": 26.7763, "step": 364 }, { "epoch": 0.033874709976798145, "grad_norm": 59.564327239990234, "learning_rate": 6.753246753246754e-06, "loss": 27.1524, "step": 365 }, { "epoch": 0.03396751740139211, "grad_norm": 49.367679595947266, "learning_rate": 6.771799628942487e-06, "loss": 26.8316, "step": 366 }, { "epoch": 0.03406032482598608, "grad_norm": 49.062313079833984, "learning_rate": 6.790352504638219e-06, "loss": 26.3423, "step": 367 }, { "epoch": 0.034153132250580046, "grad_norm": 52.30550765991211, "learning_rate": 6.808905380333952e-06, "loss": 27.0027, "step": 368 }, { "epoch": 0.03424593967517402, "grad_norm": 49.82954025268555, "learning_rate": 6.827458256029686e-06, "loss": 25.4759, "step": 369 }, { "epoch": 0.03433874709976798, "grad_norm": 47.466243743896484, "learning_rate": 6.8460111317254185e-06, "loss": 24.0034, "step": 370 }, { "epoch": 0.034431554524361946, "grad_norm": 56.72465515136719, "learning_rate": 6.864564007421151e-06, "loss": 24.849, "step": 371 }, { "epoch": 0.03452436194895592, "grad_norm": 50.77724075317383, "learning_rate": 6.8831168831168835e-06, "loss": 27.7351, "step": 372 }, { "epoch": 0.03461716937354988, "grad_norm": 46.77863693237305, "learning_rate": 6.901669758812617e-06, "loss": 25.5864, "step": 373 }, { "epoch": 0.03470997679814385, "grad_norm": 48.528297424316406, "learning_rate": 6.920222634508349e-06, "loss": 29.4736, "step": 374 }, { "epoch": 0.03480278422273782, "grad_norm": 52.082157135009766, "learning_rate": 6.938775510204082e-06, "loss": 26.4787, "step": 375 }, { "epoch": 0.03489559164733179, "grad_norm": 48.501953125, "learning_rate": 6.957328385899815e-06, "loss": 26.3854, "step": 376 }, { "epoch": 0.03498839907192575, "grad_norm": 48.94550704956055, "learning_rate": 6.975881261595547e-06, "loss": 27.6297, "step": 377 }, { "epoch": 0.035081206496519725, "grad_norm": 55.3294677734375, "learning_rate": 6.99443413729128e-06, "loss": 25.9166, "step": 378 }, { "epoch": 0.03517401392111369, "grad_norm": 50.958473205566406, "learning_rate": 7.012987012987014e-06, "loss": 30.3786, "step": 379 }, { "epoch": 0.035266821345707654, "grad_norm": 49.248558044433594, "learning_rate": 7.031539888682747e-06, "loss": 26.3913, "step": 380 }, { "epoch": 0.035359628770301625, "grad_norm": 54.605472564697266, "learning_rate": 7.050092764378479e-06, "loss": 28.3989, "step": 381 }, { "epoch": 0.03545243619489559, "grad_norm": 53.577510833740234, "learning_rate": 7.068645640074212e-06, "loss": 26.803, "step": 382 }, { "epoch": 0.03554524361948956, "grad_norm": 54.58143615722656, "learning_rate": 7.087198515769945e-06, "loss": 26.6448, "step": 383 }, { "epoch": 0.035638051044083525, "grad_norm": 49.77048110961914, "learning_rate": 7.105751391465677e-06, "loss": 25.368, "step": 384 }, { "epoch": 0.0357308584686775, "grad_norm": 46.83162307739258, "learning_rate": 7.124304267161411e-06, "loss": 28.0776, "step": 385 }, { "epoch": 0.03582366589327146, "grad_norm": 50.24225616455078, "learning_rate": 7.1428571428571436e-06, "loss": 27.0357, "step": 386 }, { "epoch": 0.035916473317865426, "grad_norm": 54.21342468261719, "learning_rate": 7.1614100185528765e-06, "loss": 28.0158, "step": 387 }, { "epoch": 0.0360092807424594, "grad_norm": 54.5197868347168, "learning_rate": 7.1799628942486086e-06, "loss": 28.0941, "step": 388 }, { "epoch": 0.03610208816705336, "grad_norm": 48.168540954589844, "learning_rate": 7.198515769944342e-06, "loss": 25.211, "step": 389 }, { "epoch": 0.03619489559164733, "grad_norm": 48.538490295410156, "learning_rate": 7.217068645640075e-06, "loss": 25.857, "step": 390 }, { "epoch": 0.0362877030162413, "grad_norm": 53.28076934814453, "learning_rate": 7.235621521335807e-06, "loss": 27.9024, "step": 391 }, { "epoch": 0.03638051044083527, "grad_norm": 52.48709487915039, "learning_rate": 7.25417439703154e-06, "loss": 28.3234, "step": 392 }, { "epoch": 0.03647331786542923, "grad_norm": 54.30731201171875, "learning_rate": 7.272727272727273e-06, "loss": 25.6493, "step": 393 }, { "epoch": 0.036566125290023205, "grad_norm": 48.55180358886719, "learning_rate": 7.291280148423007e-06, "loss": 27.6173, "step": 394 }, { "epoch": 0.03665893271461717, "grad_norm": 70.04164123535156, "learning_rate": 7.309833024118739e-06, "loss": 26.7288, "step": 395 }, { "epoch": 0.036751740139211134, "grad_norm": 52.233768463134766, "learning_rate": 7.328385899814472e-06, "loss": 26.2769, "step": 396 }, { "epoch": 0.036844547563805105, "grad_norm": 60.687896728515625, "learning_rate": 7.346938775510205e-06, "loss": 28.3515, "step": 397 }, { "epoch": 0.03693735498839907, "grad_norm": 48.10804748535156, "learning_rate": 7.365491651205937e-06, "loss": 26.2973, "step": 398 }, { "epoch": 0.03703016241299304, "grad_norm": 223.0425567626953, "learning_rate": 7.384044526901671e-06, "loss": 29.3254, "step": 399 }, { "epoch": 0.037122969837587005, "grad_norm": 51.303993225097656, "learning_rate": 7.402597402597404e-06, "loss": 28.4603, "step": 400 }, { "epoch": 0.03721577726218098, "grad_norm": 62.364437103271484, "learning_rate": 7.421150278293136e-06, "loss": 27.6499, "step": 401 }, { "epoch": 0.03730858468677494, "grad_norm": 51.829532623291016, "learning_rate": 7.439703153988869e-06, "loss": 25.2735, "step": 402 }, { "epoch": 0.03740139211136891, "grad_norm": 52.00889205932617, "learning_rate": 7.4582560296846015e-06, "loss": 28.2693, "step": 403 }, { "epoch": 0.03749419953596288, "grad_norm": 52.177024841308594, "learning_rate": 7.476808905380335e-06, "loss": 27.5201, "step": 404 }, { "epoch": 0.03758700696055684, "grad_norm": 50.06389617919922, "learning_rate": 7.495361781076067e-06, "loss": 27.0992, "step": 405 }, { "epoch": 0.03767981438515081, "grad_norm": 47.76596450805664, "learning_rate": 7.5139146567718e-06, "loss": 26.5509, "step": 406 }, { "epoch": 0.03777262180974478, "grad_norm": 45.462669372558594, "learning_rate": 7.532467532467533e-06, "loss": 26.3671, "step": 407 }, { "epoch": 0.03786542923433875, "grad_norm": 47.987709045410156, "learning_rate": 7.551020408163265e-06, "loss": 25.75, "step": 408 }, { "epoch": 0.03795823665893271, "grad_norm": 50.25505828857422, "learning_rate": 7.569573283858998e-06, "loss": 26.8742, "step": 409 }, { "epoch": 0.038051044083526685, "grad_norm": 49.07978057861328, "learning_rate": 7.588126159554732e-06, "loss": 28.715, "step": 410 }, { "epoch": 0.03814385150812065, "grad_norm": 47.29757308959961, "learning_rate": 7.606679035250465e-06, "loss": 27.5728, "step": 411 }, { "epoch": 0.03823665893271462, "grad_norm": 48.96763229370117, "learning_rate": 7.625231910946197e-06, "loss": 26.5309, "step": 412 }, { "epoch": 0.038329466357308585, "grad_norm": 52.45938491821289, "learning_rate": 7.64378478664193e-06, "loss": 27.7658, "step": 413 }, { "epoch": 0.03842227378190255, "grad_norm": 49.19373321533203, "learning_rate": 7.662337662337663e-06, "loss": 26.0998, "step": 414 }, { "epoch": 0.03851508120649652, "grad_norm": 51.18046569824219, "learning_rate": 7.680890538033396e-06, "loss": 26.4942, "step": 415 }, { "epoch": 0.038607888631090485, "grad_norm": 49.04461669921875, "learning_rate": 7.699443413729129e-06, "loss": 25.5498, "step": 416 }, { "epoch": 0.03870069605568446, "grad_norm": 50.183555603027344, "learning_rate": 7.717996289424862e-06, "loss": 26.216, "step": 417 }, { "epoch": 0.03879350348027842, "grad_norm": 53.19923400878906, "learning_rate": 7.736549165120595e-06, "loss": 27.9212, "step": 418 }, { "epoch": 0.03888631090487239, "grad_norm": 50.43867492675781, "learning_rate": 7.755102040816327e-06, "loss": 27.1856, "step": 419 }, { "epoch": 0.03897911832946636, "grad_norm": 47.325355529785156, "learning_rate": 7.77365491651206e-06, "loss": 27.0982, "step": 420 }, { "epoch": 0.03907192575406032, "grad_norm": 45.926414489746094, "learning_rate": 7.792207792207793e-06, "loss": 27.94, "step": 421 }, { "epoch": 0.03916473317865429, "grad_norm": 49.56071853637695, "learning_rate": 7.810760667903525e-06, "loss": 26.2944, "step": 422 }, { "epoch": 0.03925754060324826, "grad_norm": 51.303958892822266, "learning_rate": 7.829313543599259e-06, "loss": 27.9351, "step": 423 }, { "epoch": 0.03935034802784223, "grad_norm": 48.855628967285156, "learning_rate": 7.847866419294992e-06, "loss": 26.6065, "step": 424 }, { "epoch": 0.03944315545243619, "grad_norm": 46.17842483520508, "learning_rate": 7.866419294990723e-06, "loss": 27.1564, "step": 425 }, { "epoch": 0.039535962877030165, "grad_norm": 53.334503173828125, "learning_rate": 7.884972170686456e-06, "loss": 26.836, "step": 426 }, { "epoch": 0.03962877030162413, "grad_norm": 46.96249008178711, "learning_rate": 7.90352504638219e-06, "loss": 25.6199, "step": 427 }, { "epoch": 0.0397215777262181, "grad_norm": 52.613990783691406, "learning_rate": 7.922077922077924e-06, "loss": 26.0727, "step": 428 }, { "epoch": 0.039814385150812065, "grad_norm": 46.95781326293945, "learning_rate": 7.940630797773655e-06, "loss": 28.1253, "step": 429 }, { "epoch": 0.03990719257540603, "grad_norm": 52.93965530395508, "learning_rate": 7.959183673469388e-06, "loss": 25.9396, "step": 430 }, { "epoch": 0.04, "grad_norm": 52.45073318481445, "learning_rate": 7.97773654916512e-06, "loss": 27.2799, "step": 431 }, { "epoch": 0.040092807424593965, "grad_norm": 47.977413177490234, "learning_rate": 7.996289424860854e-06, "loss": 25.8007, "step": 432 }, { "epoch": 0.04018561484918794, "grad_norm": 47.09157943725586, "learning_rate": 8.014842300556587e-06, "loss": 27.1735, "step": 433 }, { "epoch": 0.0402784222737819, "grad_norm": 49.995094299316406, "learning_rate": 8.03339517625232e-06, "loss": 26.5129, "step": 434 }, { "epoch": 0.04037122969837587, "grad_norm": 49.954345703125, "learning_rate": 8.051948051948052e-06, "loss": 26.6664, "step": 435 }, { "epoch": 0.04046403712296984, "grad_norm": 48.66532897949219, "learning_rate": 8.070500927643785e-06, "loss": 28.9927, "step": 436 }, { "epoch": 0.04055684454756381, "grad_norm": 56.636600494384766, "learning_rate": 8.089053803339518e-06, "loss": 28.1039, "step": 437 }, { "epoch": 0.04064965197215777, "grad_norm": 46.68605422973633, "learning_rate": 8.107606679035251e-06, "loss": 28.3323, "step": 438 }, { "epoch": 0.04074245939675174, "grad_norm": 49.03940200805664, "learning_rate": 8.126159554730984e-06, "loss": 26.9238, "step": 439 }, { "epoch": 0.04083526682134571, "grad_norm": 48.243404388427734, "learning_rate": 8.144712430426717e-06, "loss": 28.1405, "step": 440 }, { "epoch": 0.04092807424593967, "grad_norm": 45.07194900512695, "learning_rate": 8.16326530612245e-06, "loss": 24.8394, "step": 441 }, { "epoch": 0.041020881670533645, "grad_norm": 51.30935287475586, "learning_rate": 8.181818181818183e-06, "loss": 25.8572, "step": 442 }, { "epoch": 0.04111368909512761, "grad_norm": 48.91477584838867, "learning_rate": 8.200371057513916e-06, "loss": 27.5338, "step": 443 }, { "epoch": 0.04120649651972158, "grad_norm": 48.48388671875, "learning_rate": 8.218923933209649e-06, "loss": 25.4812, "step": 444 }, { "epoch": 0.041299303944315545, "grad_norm": 49.84416580200195, "learning_rate": 8.237476808905382e-06, "loss": 26.3493, "step": 445 }, { "epoch": 0.04139211136890951, "grad_norm": 45.925750732421875, "learning_rate": 8.256029684601113e-06, "loss": 25.6515, "step": 446 }, { "epoch": 0.04148491879350348, "grad_norm": 50.70340347290039, "learning_rate": 8.274582560296846e-06, "loss": 26.3353, "step": 447 }, { "epoch": 0.041577726218097445, "grad_norm": 47.94352340698242, "learning_rate": 8.29313543599258e-06, "loss": 27.3527, "step": 448 }, { "epoch": 0.04167053364269142, "grad_norm": 48.573143005371094, "learning_rate": 8.311688311688313e-06, "loss": 25.8742, "step": 449 }, { "epoch": 0.04176334106728538, "grad_norm": 48.652896881103516, "learning_rate": 8.330241187384045e-06, "loss": 26.8522, "step": 450 }, { "epoch": 0.04185614849187935, "grad_norm": 154.28677368164062, "learning_rate": 8.348794063079778e-06, "loss": 31.2192, "step": 451 }, { "epoch": 0.04194895591647332, "grad_norm": 235.485595703125, "learning_rate": 8.36734693877551e-06, "loss": 29.0833, "step": 452 }, { "epoch": 0.04204176334106729, "grad_norm": 57.62535095214844, "learning_rate": 8.385899814471243e-06, "loss": 26.6644, "step": 453 }, { "epoch": 0.04213457076566125, "grad_norm": 46.006404876708984, "learning_rate": 8.404452690166976e-06, "loss": 26.111, "step": 454 }, { "epoch": 0.04222737819025522, "grad_norm": 47.05760192871094, "learning_rate": 8.42300556586271e-06, "loss": 28.1342, "step": 455 }, { "epoch": 0.04232018561484919, "grad_norm": 47.81825256347656, "learning_rate": 8.441558441558442e-06, "loss": 28.5253, "step": 456 }, { "epoch": 0.04241299303944315, "grad_norm": 48.816184997558594, "learning_rate": 8.460111317254175e-06, "loss": 26.3675, "step": 457 }, { "epoch": 0.042505800464037125, "grad_norm": 44.68061447143555, "learning_rate": 8.478664192949908e-06, "loss": 27.1374, "step": 458 }, { "epoch": 0.04259860788863109, "grad_norm": 48.65315628051758, "learning_rate": 8.497217068645641e-06, "loss": 27.3655, "step": 459 }, { "epoch": 0.04269141531322506, "grad_norm": 46.63502502441406, "learning_rate": 8.515769944341374e-06, "loss": 27.9266, "step": 460 }, { "epoch": 0.042784222737819025, "grad_norm": 51.802955627441406, "learning_rate": 8.534322820037107e-06, "loss": 27.9665, "step": 461 }, { "epoch": 0.042877030162412996, "grad_norm": 48.453067779541016, "learning_rate": 8.55287569573284e-06, "loss": 26.3969, "step": 462 }, { "epoch": 0.04296983758700696, "grad_norm": 48.185401916503906, "learning_rate": 8.571428571428571e-06, "loss": 26.4919, "step": 463 }, { "epoch": 0.043062645011600925, "grad_norm": 48.052574157714844, "learning_rate": 8.589981447124306e-06, "loss": 28.4508, "step": 464 }, { "epoch": 0.0431554524361949, "grad_norm": 48.85379409790039, "learning_rate": 8.608534322820038e-06, "loss": 26.6968, "step": 465 }, { "epoch": 0.04324825986078886, "grad_norm": 47.72921371459961, "learning_rate": 8.627087198515771e-06, "loss": 26.8188, "step": 466 }, { "epoch": 0.04334106728538283, "grad_norm": 53.56642150878906, "learning_rate": 8.645640074211503e-06, "loss": 27.6079, "step": 467 }, { "epoch": 0.0434338747099768, "grad_norm": 46.706520080566406, "learning_rate": 8.664192949907236e-06, "loss": 26.1763, "step": 468 }, { "epoch": 0.04352668213457077, "grad_norm": 47.96636962890625, "learning_rate": 8.68274582560297e-06, "loss": 26.5257, "step": 469 }, { "epoch": 0.04361948955916473, "grad_norm": 60.40324401855469, "learning_rate": 8.701298701298701e-06, "loss": 27.4107, "step": 470 }, { "epoch": 0.043712296983758704, "grad_norm": 45.83900833129883, "learning_rate": 8.719851576994434e-06, "loss": 27.542, "step": 471 }, { "epoch": 0.04380510440835267, "grad_norm": 49.08807373046875, "learning_rate": 8.738404452690167e-06, "loss": 25.7339, "step": 472 }, { "epoch": 0.04389791183294663, "grad_norm": 54.197383880615234, "learning_rate": 8.756957328385902e-06, "loss": 26.5572, "step": 473 }, { "epoch": 0.043990719257540604, "grad_norm": 48.66139221191406, "learning_rate": 8.775510204081633e-06, "loss": 27.5635, "step": 474 }, { "epoch": 0.04408352668213457, "grad_norm": 49.24354553222656, "learning_rate": 8.794063079777366e-06, "loss": 27.0747, "step": 475 }, { "epoch": 0.04417633410672854, "grad_norm": 44.795528411865234, "learning_rate": 8.812615955473099e-06, "loss": 26.15, "step": 476 }, { "epoch": 0.044269141531322505, "grad_norm": 46.5285758972168, "learning_rate": 8.831168831168832e-06, "loss": 26.3453, "step": 477 }, { "epoch": 0.044361948955916476, "grad_norm": 44.461700439453125, "learning_rate": 8.849721706864565e-06, "loss": 25.4714, "step": 478 }, { "epoch": 0.04445475638051044, "grad_norm": 45.33073806762695, "learning_rate": 8.868274582560298e-06, "loss": 26.4261, "step": 479 }, { "epoch": 0.044547563805104405, "grad_norm": 45.84056091308594, "learning_rate": 8.88682745825603e-06, "loss": 28.0077, "step": 480 }, { "epoch": 0.044640371229698377, "grad_norm": 69.771240234375, "learning_rate": 8.905380333951764e-06, "loss": 27.4005, "step": 481 }, { "epoch": 0.04473317865429234, "grad_norm": 46.96486282348633, "learning_rate": 8.923933209647496e-06, "loss": 26.6084, "step": 482 }, { "epoch": 0.04482598607888631, "grad_norm": 47.92680740356445, "learning_rate": 8.94248608534323e-06, "loss": 26.1636, "step": 483 }, { "epoch": 0.04491879350348028, "grad_norm": 55.4206428527832, "learning_rate": 8.96103896103896e-06, "loss": 27.1717, "step": 484 }, { "epoch": 0.04501160092807425, "grad_norm": 52.875797271728516, "learning_rate": 8.979591836734695e-06, "loss": 27.6781, "step": 485 }, { "epoch": 0.04510440835266821, "grad_norm": 49.72709655761719, "learning_rate": 8.998144712430428e-06, "loss": 26.933, "step": 486 }, { "epoch": 0.045197215777262184, "grad_norm": 49.70282745361328, "learning_rate": 9.01669758812616e-06, "loss": 29.5, "step": 487 }, { "epoch": 0.04529002320185615, "grad_norm": 47.438934326171875, "learning_rate": 9.035250463821892e-06, "loss": 26.9323, "step": 488 }, { "epoch": 0.04538283062645011, "grad_norm": 51.6547737121582, "learning_rate": 9.053803339517627e-06, "loss": 27.3893, "step": 489 }, { "epoch": 0.045475638051044084, "grad_norm": 49.89749526977539, "learning_rate": 9.07235621521336e-06, "loss": 26.1803, "step": 490 }, { "epoch": 0.04556844547563805, "grad_norm": 49.551971435546875, "learning_rate": 9.090909090909091e-06, "loss": 28.3543, "step": 491 }, { "epoch": 0.04566125290023202, "grad_norm": 48.903907775878906, "learning_rate": 9.109461966604824e-06, "loss": 27.7416, "step": 492 }, { "epoch": 0.045754060324825985, "grad_norm": 45.17913818359375, "learning_rate": 9.128014842300557e-06, "loss": 24.3938, "step": 493 }, { "epoch": 0.045846867749419956, "grad_norm": 49.26522445678711, "learning_rate": 9.14656771799629e-06, "loss": 27.7723, "step": 494 }, { "epoch": 0.04593967517401392, "grad_norm": 48.824893951416016, "learning_rate": 9.165120593692023e-06, "loss": 27.6501, "step": 495 }, { "epoch": 0.04603248259860789, "grad_norm": 48.01900863647461, "learning_rate": 9.183673469387756e-06, "loss": 27.2653, "step": 496 }, { "epoch": 0.046125290023201856, "grad_norm": 48.34567642211914, "learning_rate": 9.202226345083489e-06, "loss": 25.3995, "step": 497 }, { "epoch": 0.04621809744779582, "grad_norm": 55.977317810058594, "learning_rate": 9.220779220779221e-06, "loss": 26.4412, "step": 498 }, { "epoch": 0.04631090487238979, "grad_norm": 10769.5234375, "learning_rate": 9.239332096474954e-06, "loss": 29.7071, "step": 499 }, { "epoch": 0.04640371229698376, "grad_norm": 50.17621994018555, "learning_rate": 9.257884972170687e-06, "loss": 25.024, "step": 500 }, { "epoch": 0.04649651972157773, "grad_norm": 48.24872970581055, "learning_rate": 9.27643784786642e-06, "loss": 27.5124, "step": 501 }, { "epoch": 0.04658932714617169, "grad_norm": 49.2554931640625, "learning_rate": 9.294990723562153e-06, "loss": 25.8086, "step": 502 }, { "epoch": 0.046682134570765664, "grad_norm": 45.6231575012207, "learning_rate": 9.313543599257886e-06, "loss": 27.1199, "step": 503 }, { "epoch": 0.04677494199535963, "grad_norm": 48.52029800415039, "learning_rate": 9.332096474953617e-06, "loss": 25.9556, "step": 504 }, { "epoch": 0.04686774941995359, "grad_norm": 48.14445877075195, "learning_rate": 9.350649350649352e-06, "loss": 26.8635, "step": 505 }, { "epoch": 0.046960556844547564, "grad_norm": 48.365264892578125, "learning_rate": 9.369202226345085e-06, "loss": 26.7727, "step": 506 }, { "epoch": 0.04705336426914153, "grad_norm": 46.50623321533203, "learning_rate": 9.387755102040818e-06, "loss": 29.2215, "step": 507 }, { "epoch": 0.0471461716937355, "grad_norm": 46.79218292236328, "learning_rate": 9.406307977736549e-06, "loss": 28.6575, "step": 508 }, { "epoch": 0.047238979118329465, "grad_norm": 60.66526412963867, "learning_rate": 9.424860853432282e-06, "loss": 29.0192, "step": 509 }, { "epoch": 0.047331786542923436, "grad_norm": 56.14840316772461, "learning_rate": 9.443413729128017e-06, "loss": 26.2386, "step": 510 }, { "epoch": 0.0474245939675174, "grad_norm": 47.273441314697266, "learning_rate": 9.461966604823748e-06, "loss": 28.2426, "step": 511 }, { "epoch": 0.04751740139211137, "grad_norm": 70.762451171875, "learning_rate": 9.48051948051948e-06, "loss": 26.9742, "step": 512 }, { "epoch": 0.047610208816705336, "grad_norm": 52.1647834777832, "learning_rate": 9.499072356215214e-06, "loss": 25.0482, "step": 513 }, { "epoch": 0.0477030162412993, "grad_norm": 50.206729888916016, "learning_rate": 9.517625231910948e-06, "loss": 26.874, "step": 514 }, { "epoch": 0.04779582366589327, "grad_norm": 66.14610290527344, "learning_rate": 9.53617810760668e-06, "loss": 26.9987, "step": 515 }, { "epoch": 0.04788863109048724, "grad_norm": 47.553104400634766, "learning_rate": 9.554730983302412e-06, "loss": 25.7929, "step": 516 }, { "epoch": 0.04798143851508121, "grad_norm": 48.956607818603516, "learning_rate": 9.573283858998145e-06, "loss": 25.7251, "step": 517 }, { "epoch": 0.04807424593967517, "grad_norm": 59.0118522644043, "learning_rate": 9.591836734693878e-06, "loss": 27.5756, "step": 518 }, { "epoch": 0.048167053364269144, "grad_norm": 45.135074615478516, "learning_rate": 9.610389610389611e-06, "loss": 25.6156, "step": 519 }, { "epoch": 0.04825986078886311, "grad_norm": 59.61967468261719, "learning_rate": 9.628942486085344e-06, "loss": 26.9609, "step": 520 }, { "epoch": 0.04835266821345708, "grad_norm": 50.135581970214844, "learning_rate": 9.647495361781077e-06, "loss": 25.0429, "step": 521 }, { "epoch": 0.048445475638051044, "grad_norm": 47.11003112792969, "learning_rate": 9.66604823747681e-06, "loss": 24.7927, "step": 522 }, { "epoch": 0.04853828306264501, "grad_norm": 47.312522888183594, "learning_rate": 9.684601113172543e-06, "loss": 25.4322, "step": 523 }, { "epoch": 0.04863109048723898, "grad_norm": 47.798309326171875, "learning_rate": 9.703153988868276e-06, "loss": 25.1569, "step": 524 }, { "epoch": 0.048723897911832945, "grad_norm": 46.669925689697266, "learning_rate": 9.721706864564007e-06, "loss": 26.999, "step": 525 }, { "epoch": 0.048816705336426916, "grad_norm": 46.20768737792969, "learning_rate": 9.740259740259742e-06, "loss": 24.3992, "step": 526 }, { "epoch": 0.04890951276102088, "grad_norm": 46.53165054321289, "learning_rate": 9.758812615955475e-06, "loss": 24.715, "step": 527 }, { "epoch": 0.04900232018561485, "grad_norm": 49.813838958740234, "learning_rate": 9.777365491651206e-06, "loss": 26.0309, "step": 528 }, { "epoch": 0.049095127610208816, "grad_norm": 49.84016799926758, "learning_rate": 9.795918367346939e-06, "loss": 28.4538, "step": 529 }, { "epoch": 0.04918793503480278, "grad_norm": 52.2115478515625, "learning_rate": 9.814471243042673e-06, "loss": 28.2388, "step": 530 }, { "epoch": 0.04928074245939675, "grad_norm": 48.80279541015625, "learning_rate": 9.833024118738406e-06, "loss": 28.6837, "step": 531 }, { "epoch": 0.04937354988399072, "grad_norm": 46.906776428222656, "learning_rate": 9.851576994434137e-06, "loss": 25.9024, "step": 532 }, { "epoch": 0.04946635730858469, "grad_norm": 51.85017395019531, "learning_rate": 9.87012987012987e-06, "loss": 26.699, "step": 533 }, { "epoch": 0.04955916473317865, "grad_norm": 44.998924255371094, "learning_rate": 9.888682745825603e-06, "loss": 25.1803, "step": 534 }, { "epoch": 0.049651972157772624, "grad_norm": 60.51942825317383, "learning_rate": 9.907235621521336e-06, "loss": 27.8894, "step": 535 }, { "epoch": 0.04974477958236659, "grad_norm": 64.58516693115234, "learning_rate": 9.925788497217069e-06, "loss": 27.6043, "step": 536 }, { "epoch": 0.04983758700696056, "grad_norm": 63.514320373535156, "learning_rate": 9.944341372912802e-06, "loss": 24.1281, "step": 537 }, { "epoch": 0.049930394431554524, "grad_norm": 110.97765350341797, "learning_rate": 9.962894248608535e-06, "loss": 29.9929, "step": 538 }, { "epoch": 0.05002320185614849, "grad_norm": 47.416290283203125, "learning_rate": 9.981447124304268e-06, "loss": 24.7725, "step": 539 }, { "epoch": 0.05011600928074246, "grad_norm": 46.03754425048828, "learning_rate": 1e-05, "loss": 26.7794, "step": 540 }, { "epoch": 0.050208816705336425, "grad_norm": 50.25762939453125, "learning_rate": 9.999999764506354e-06, "loss": 24.5977, "step": 541 }, { "epoch": 0.050301624129930396, "grad_norm": 58.982025146484375, "learning_rate": 9.999999058025433e-06, "loss": 26.795, "step": 542 }, { "epoch": 0.05039443155452436, "grad_norm": 44.8877067565918, "learning_rate": 9.999997880557308e-06, "loss": 24.6066, "step": 543 }, { "epoch": 0.05048723897911833, "grad_norm": 47.56435012817383, "learning_rate": 9.999996232102086e-06, "loss": 27.2894, "step": 544 }, { "epoch": 0.050580046403712296, "grad_norm": 48.638187408447266, "learning_rate": 9.999994112659925e-06, "loss": 26.582, "step": 545 }, { "epoch": 0.05067285382830627, "grad_norm": 46.78445053100586, "learning_rate": 9.999991522231024e-06, "loss": 26.45, "step": 546 }, { "epoch": 0.05076566125290023, "grad_norm": 47.07038497924805, "learning_rate": 9.999988460815627e-06, "loss": 25.7107, "step": 547 }, { "epoch": 0.0508584686774942, "grad_norm": 48.176788330078125, "learning_rate": 9.999984928414022e-06, "loss": 26.2341, "step": 548 }, { "epoch": 0.05095127610208817, "grad_norm": 47.98350143432617, "learning_rate": 9.99998092502654e-06, "loss": 28.1233, "step": 549 }, { "epoch": 0.05104408352668213, "grad_norm": 45.699249267578125, "learning_rate": 9.999976450653563e-06, "loss": 26.8332, "step": 550 }, { "epoch": 0.051136890951276104, "grad_norm": 133.55154418945312, "learning_rate": 9.999971505295507e-06, "loss": 29.0532, "step": 551 }, { "epoch": 0.05122969837587007, "grad_norm": 45.46145248413086, "learning_rate": 9.999966088952842e-06, "loss": 26.1836, "step": 552 }, { "epoch": 0.05132250580046404, "grad_norm": 46.35340118408203, "learning_rate": 9.999960201626076e-06, "loss": 26.5852, "step": 553 }, { "epoch": 0.051415313225058004, "grad_norm": 42.99744415283203, "learning_rate": 9.999953843315764e-06, "loss": 24.9688, "step": 554 }, { "epoch": 0.051508120649651976, "grad_norm": 51.56480026245117, "learning_rate": 9.999947014022506e-06, "loss": 26.5714, "step": 555 }, { "epoch": 0.05160092807424594, "grad_norm": 44.874759674072266, "learning_rate": 9.999939713746943e-06, "loss": 25.1552, "step": 556 }, { "epoch": 0.051693735498839904, "grad_norm": 46.36491012573242, "learning_rate": 9.999931942489764e-06, "loss": 26.2286, "step": 557 }, { "epoch": 0.051786542923433876, "grad_norm": 45.77663040161133, "learning_rate": 9.999923700251704e-06, "loss": 27.2007, "step": 558 }, { "epoch": 0.05187935034802784, "grad_norm": 47.34976577758789, "learning_rate": 9.999914987033533e-06, "loss": 26.0989, "step": 559 }, { "epoch": 0.05197215777262181, "grad_norm": 47.053245544433594, "learning_rate": 9.999905802836077e-06, "loss": 26.4029, "step": 560 }, { "epoch": 0.052064965197215776, "grad_norm": 43.61326599121094, "learning_rate": 9.999896147660197e-06, "loss": 27.1928, "step": 561 }, { "epoch": 0.05215777262180975, "grad_norm": 49.41153335571289, "learning_rate": 9.999886021506808e-06, "loss": 28.0398, "step": 562 }, { "epoch": 0.05225058004640371, "grad_norm": 43.97298812866211, "learning_rate": 9.99987542437686e-06, "loss": 25.0165, "step": 563 }, { "epoch": 0.052343387470997677, "grad_norm": 82.09635162353516, "learning_rate": 9.99986435627135e-06, "loss": 28.8803, "step": 564 }, { "epoch": 0.05243619489559165, "grad_norm": 44.352394104003906, "learning_rate": 9.999852817191325e-06, "loss": 25.9553, "step": 565 }, { "epoch": 0.05252900232018561, "grad_norm": 55.139312744140625, "learning_rate": 9.999840807137868e-06, "loss": 24.5827, "step": 566 }, { "epoch": 0.052621809744779584, "grad_norm": 42.679664611816406, "learning_rate": 9.999828326112112e-06, "loss": 25.6885, "step": 567 }, { "epoch": 0.05271461716937355, "grad_norm": 43.740596771240234, "learning_rate": 9.999815374115232e-06, "loss": 26.4023, "step": 568 }, { "epoch": 0.05280742459396752, "grad_norm": 41.390716552734375, "learning_rate": 9.99980195114845e-06, "loss": 27.2329, "step": 569 }, { "epoch": 0.052900232018561484, "grad_norm": 42.35981369018555, "learning_rate": 9.999788057213026e-06, "loss": 26.513, "step": 570 }, { "epoch": 0.052993039443155456, "grad_norm": 45.05868911743164, "learning_rate": 9.999773692310273e-06, "loss": 27.041, "step": 571 }, { "epoch": 0.05308584686774942, "grad_norm": 50.542030334472656, "learning_rate": 9.999758856441544e-06, "loss": 27.1682, "step": 572 }, { "epoch": 0.053178654292343384, "grad_norm": 45.572959899902344, "learning_rate": 9.999743549608235e-06, "loss": 25.5212, "step": 573 }, { "epoch": 0.053271461716937356, "grad_norm": 46.37370681762695, "learning_rate": 9.999727771811787e-06, "loss": 27.0759, "step": 574 }, { "epoch": 0.05336426914153132, "grad_norm": 43.63402557373047, "learning_rate": 9.999711523053689e-06, "loss": 28.4691, "step": 575 }, { "epoch": 0.05345707656612529, "grad_norm": 49.91801071166992, "learning_rate": 9.999694803335468e-06, "loss": 26.247, "step": 576 }, { "epoch": 0.053549883990719256, "grad_norm": 43.84367370605469, "learning_rate": 9.999677612658704e-06, "loss": 25.8497, "step": 577 }, { "epoch": 0.05364269141531323, "grad_norm": 45.78596496582031, "learning_rate": 9.999659951025011e-06, "loss": 26.9818, "step": 578 }, { "epoch": 0.05373549883990719, "grad_norm": 43.01783752441406, "learning_rate": 9.999641818436057e-06, "loss": 26.8316, "step": 579 }, { "epoch": 0.05382830626450116, "grad_norm": 44.2232666015625, "learning_rate": 9.999623214893546e-06, "loss": 27.7125, "step": 580 }, { "epoch": 0.05392111368909513, "grad_norm": 47.3172492980957, "learning_rate": 9.999604140399232e-06, "loss": 24.7903, "step": 581 }, { "epoch": 0.05401392111368909, "grad_norm": 47.27372741699219, "learning_rate": 9.999584594954913e-06, "loss": 26.8203, "step": 582 }, { "epoch": 0.054106728538283064, "grad_norm": 42.937705993652344, "learning_rate": 9.99956457856243e-06, "loss": 25.6246, "step": 583 }, { "epoch": 0.05419953596287703, "grad_norm": 44.676483154296875, "learning_rate": 9.999544091223668e-06, "loss": 25.0593, "step": 584 }, { "epoch": 0.054292343387471, "grad_norm": 51.47517395019531, "learning_rate": 9.999523132940555e-06, "loss": 27.3599, "step": 585 }, { "epoch": 0.054385150812064964, "grad_norm": 47.843624114990234, "learning_rate": 9.999501703715068e-06, "loss": 27.6119, "step": 586 }, { "epoch": 0.054477958236658935, "grad_norm": 43.14875411987305, "learning_rate": 9.999479803549224e-06, "loss": 25.7354, "step": 587 }, { "epoch": 0.0545707656612529, "grad_norm": 43.350650787353516, "learning_rate": 9.999457432445087e-06, "loss": 27.329, "step": 588 }, { "epoch": 0.054663573085846864, "grad_norm": 40.35000228881836, "learning_rate": 9.999434590404764e-06, "loss": 25.9758, "step": 589 }, { "epoch": 0.054756380510440836, "grad_norm": 40.83234786987305, "learning_rate": 9.999411277430405e-06, "loss": 27.1617, "step": 590 }, { "epoch": 0.0548491879350348, "grad_norm": 43.70502471923828, "learning_rate": 9.999387493524208e-06, "loss": 27.9004, "step": 591 }, { "epoch": 0.05494199535962877, "grad_norm": 43.580970764160156, "learning_rate": 9.999363238688413e-06, "loss": 24.8199, "step": 592 }, { "epoch": 0.055034802784222736, "grad_norm": 42.97624206542969, "learning_rate": 9.999338512925306e-06, "loss": 26.7786, "step": 593 }, { "epoch": 0.05512761020881671, "grad_norm": 44.9541015625, "learning_rate": 9.999313316237211e-06, "loss": 27.1159, "step": 594 }, { "epoch": 0.05522041763341067, "grad_norm": 46.57854080200195, "learning_rate": 9.999287648626508e-06, "loss": 25.3576, "step": 595 }, { "epoch": 0.05531322505800464, "grad_norm": 46.4020881652832, "learning_rate": 9.99926151009561e-06, "loss": 26.6163, "step": 596 }, { "epoch": 0.05540603248259861, "grad_norm": 46.35017776489258, "learning_rate": 9.999234900646984e-06, "loss": 24.7062, "step": 597 }, { "epoch": 0.05549883990719257, "grad_norm": 48.4564094543457, "learning_rate": 9.99920782028313e-06, "loss": 26.4146, "step": 598 }, { "epoch": 0.055591647331786544, "grad_norm": 48.0246696472168, "learning_rate": 9.999180269006605e-06, "loss": 25.466, "step": 599 }, { "epoch": 0.05568445475638051, "grad_norm": 47.45824432373047, "learning_rate": 9.999152246820001e-06, "loss": 27.7317, "step": 600 }, { "epoch": 0.05577726218097448, "grad_norm": 43.563812255859375, "learning_rate": 9.99912375372596e-06, "loss": 24.4739, "step": 601 }, { "epoch": 0.055870069605568444, "grad_norm": 46.283016204833984, "learning_rate": 9.999094789727161e-06, "loss": 26.4868, "step": 602 }, { "epoch": 0.055962877030162415, "grad_norm": 45.779075622558594, "learning_rate": 9.999065354826338e-06, "loss": 26.9519, "step": 603 }, { "epoch": 0.05605568445475638, "grad_norm": 44.447044372558594, "learning_rate": 9.999035449026261e-06, "loss": 25.9224, "step": 604 }, { "epoch": 0.05614849187935035, "grad_norm": 49.29071044921875, "learning_rate": 9.999005072329748e-06, "loss": 26.7706, "step": 605 }, { "epoch": 0.056241299303944316, "grad_norm": 45.61696243286133, "learning_rate": 9.99897422473966e-06, "loss": 25.9797, "step": 606 }, { "epoch": 0.05633410672853828, "grad_norm": 44.2738037109375, "learning_rate": 9.998942906258902e-06, "loss": 26.2652, "step": 607 }, { "epoch": 0.05642691415313225, "grad_norm": 42.962284088134766, "learning_rate": 9.998911116890423e-06, "loss": 25.1354, "step": 608 }, { "epoch": 0.056519721577726216, "grad_norm": 44.9071159362793, "learning_rate": 9.998878856637222e-06, "loss": 27.1264, "step": 609 }, { "epoch": 0.05661252900232019, "grad_norm": 47.13013458251953, "learning_rate": 9.998846125502333e-06, "loss": 26.3026, "step": 610 }, { "epoch": 0.05670533642691415, "grad_norm": 48.48895263671875, "learning_rate": 9.998812923488842e-06, "loss": 24.9457, "step": 611 }, { "epoch": 0.05679814385150812, "grad_norm": 51.923160552978516, "learning_rate": 9.998779250599877e-06, "loss": 26.6016, "step": 612 }, { "epoch": 0.05689095127610209, "grad_norm": 45.36075210571289, "learning_rate": 9.998745106838608e-06, "loss": 25.2881, "step": 613 }, { "epoch": 0.05698375870069606, "grad_norm": 44.45326614379883, "learning_rate": 9.998710492208252e-06, "loss": 23.9069, "step": 614 }, { "epoch": 0.057076566125290024, "grad_norm": 44.43122482299805, "learning_rate": 9.99867540671207e-06, "loss": 25.0718, "step": 615 }, { "epoch": 0.05716937354988399, "grad_norm": 46.7660026550293, "learning_rate": 9.998639850353365e-06, "loss": 28.0692, "step": 616 }, { "epoch": 0.05726218097447796, "grad_norm": 49.81444549560547, "learning_rate": 9.998603823135489e-06, "loss": 25.3664, "step": 617 }, { "epoch": 0.057354988399071924, "grad_norm": 49.72174835205078, "learning_rate": 9.998567325061834e-06, "loss": 25.3189, "step": 618 }, { "epoch": 0.057447795823665895, "grad_norm": 48.897499084472656, "learning_rate": 9.99853035613584e-06, "loss": 25.1444, "step": 619 }, { "epoch": 0.05754060324825986, "grad_norm": 44.78285598754883, "learning_rate": 9.998492916360987e-06, "loss": 25.8824, "step": 620 }, { "epoch": 0.05763341067285383, "grad_norm": 45.89337921142578, "learning_rate": 9.9984550057408e-06, "loss": 26.1426, "step": 621 }, { "epoch": 0.057726218097447796, "grad_norm": 40.821990966796875, "learning_rate": 9.998416624278857e-06, "loss": 25.3765, "step": 622 }, { "epoch": 0.05781902552204176, "grad_norm": 45.38003158569336, "learning_rate": 9.998377771978767e-06, "loss": 24.6592, "step": 623 }, { "epoch": 0.05791183294663573, "grad_norm": 46.35009002685547, "learning_rate": 9.998338448844193e-06, "loss": 25.9797, "step": 624 }, { "epoch": 0.058004640371229696, "grad_norm": 43.99248504638672, "learning_rate": 9.998298654878837e-06, "loss": 26.0727, "step": 625 }, { "epoch": 0.05809744779582367, "grad_norm": 41.43680191040039, "learning_rate": 9.998258390086449e-06, "loss": 25.4068, "step": 626 }, { "epoch": 0.05819025522041763, "grad_norm": 48.29764175415039, "learning_rate": 9.998217654470822e-06, "loss": 25.8568, "step": 627 }, { "epoch": 0.0582830626450116, "grad_norm": 54.214412689208984, "learning_rate": 9.998176448035792e-06, "loss": 28.0548, "step": 628 }, { "epoch": 0.05837587006960557, "grad_norm": 43.07393264770508, "learning_rate": 9.998134770785242e-06, "loss": 24.5907, "step": 629 }, { "epoch": 0.05846867749419954, "grad_norm": 45.98097229003906, "learning_rate": 9.998092622723095e-06, "loss": 27.7255, "step": 630 }, { "epoch": 0.058561484918793504, "grad_norm": 45.499053955078125, "learning_rate": 9.998050003853326e-06, "loss": 26.9388, "step": 631 }, { "epoch": 0.05865429234338747, "grad_norm": 47.311492919921875, "learning_rate": 9.998006914179945e-06, "loss": 27.0338, "step": 632 }, { "epoch": 0.05874709976798144, "grad_norm": 41.90708541870117, "learning_rate": 9.997963353707014e-06, "loss": 25.0455, "step": 633 }, { "epoch": 0.058839907192575404, "grad_norm": 47.94488525390625, "learning_rate": 9.997919322438634e-06, "loss": 27.4676, "step": 634 }, { "epoch": 0.058932714617169375, "grad_norm": 45.121273040771484, "learning_rate": 9.997874820378955e-06, "loss": 28.2929, "step": 635 }, { "epoch": 0.05902552204176334, "grad_norm": 48.61757278442383, "learning_rate": 9.997829847532165e-06, "loss": 26.647, "step": 636 }, { "epoch": 0.05911832946635731, "grad_norm": 42.50798416137695, "learning_rate": 9.997784403902506e-06, "loss": 26.678, "step": 637 }, { "epoch": 0.059211136890951276, "grad_norm": 43.840457916259766, "learning_rate": 9.997738489494255e-06, "loss": 26.6559, "step": 638 }, { "epoch": 0.05930394431554525, "grad_norm": 45.77558135986328, "learning_rate": 9.997692104311739e-06, "loss": 28.117, "step": 639 }, { "epoch": 0.05939675174013921, "grad_norm": 43.12008285522461, "learning_rate": 9.997645248359324e-06, "loss": 27.4034, "step": 640 }, { "epoch": 0.059489559164733176, "grad_norm": 40.14207077026367, "learning_rate": 9.997597921641426e-06, "loss": 25.4372, "step": 641 }, { "epoch": 0.05958236658932715, "grad_norm": 47.5839729309082, "learning_rate": 9.997550124162505e-06, "loss": 26.2456, "step": 642 }, { "epoch": 0.05967517401392111, "grad_norm": 43.39421463012695, "learning_rate": 9.997501855927059e-06, "loss": 25.6881, "step": 643 }, { "epoch": 0.05976798143851508, "grad_norm": 44.79634094238281, "learning_rate": 9.997453116939638e-06, "loss": 24.4885, "step": 644 }, { "epoch": 0.05986078886310905, "grad_norm": 41.11903381347656, "learning_rate": 9.997403907204833e-06, "loss": 25.4012, "step": 645 }, { "epoch": 0.05995359628770302, "grad_norm": 44.57095718383789, "learning_rate": 9.997354226727277e-06, "loss": 27.9418, "step": 646 }, { "epoch": 0.060046403712296983, "grad_norm": 41.75971603393555, "learning_rate": 9.997304075511652e-06, "loss": 28.368, "step": 647 }, { "epoch": 0.06013921113689095, "grad_norm": 44.56507873535156, "learning_rate": 9.99725345356268e-06, "loss": 26.393, "step": 648 }, { "epoch": 0.06023201856148492, "grad_norm": 45.1552619934082, "learning_rate": 9.997202360885131e-06, "loss": 26.4034, "step": 649 }, { "epoch": 0.060324825986078884, "grad_norm": 45.899253845214844, "learning_rate": 9.997150797483822e-06, "loss": 25.2156, "step": 650 }, { "epoch": 0.060417633410672855, "grad_norm": 50.057090759277344, "learning_rate": 9.997098763363603e-06, "loss": 26.0718, "step": 651 }, { "epoch": 0.06051044083526682, "grad_norm": 47.395484924316406, "learning_rate": 9.997046258529377e-06, "loss": 26.0053, "step": 652 }, { "epoch": 0.06060324825986079, "grad_norm": 59.13998031616211, "learning_rate": 9.996993282986091e-06, "loss": 27.3397, "step": 653 }, { "epoch": 0.060696055684454756, "grad_norm": 45.24436950683594, "learning_rate": 9.996939836738736e-06, "loss": 26.4423, "step": 654 }, { "epoch": 0.06078886310904873, "grad_norm": 44.51789093017578, "learning_rate": 9.996885919792347e-06, "loss": 25.1611, "step": 655 }, { "epoch": 0.06088167053364269, "grad_norm": 45.09213638305664, "learning_rate": 9.996831532152002e-06, "loss": 27.1575, "step": 656 }, { "epoch": 0.060974477958236656, "grad_norm": 42.2539176940918, "learning_rate": 9.996776673822824e-06, "loss": 26.1401, "step": 657 }, { "epoch": 0.06106728538283063, "grad_norm": 51.485347747802734, "learning_rate": 9.996721344809979e-06, "loss": 27.6446, "step": 658 }, { "epoch": 0.06116009280742459, "grad_norm": 45.56991958618164, "learning_rate": 9.99666554511868e-06, "loss": 25.8843, "step": 659 }, { "epoch": 0.06125290023201856, "grad_norm": 45.27667236328125, "learning_rate": 9.996609274754183e-06, "loss": 26.7844, "step": 660 }, { "epoch": 0.06134570765661253, "grad_norm": 44.33086013793945, "learning_rate": 9.996552533721791e-06, "loss": 27.721, "step": 661 }, { "epoch": 0.0614385150812065, "grad_norm": 42.03984069824219, "learning_rate": 9.996495322026844e-06, "loss": 24.9022, "step": 662 }, { "epoch": 0.06153132250580046, "grad_norm": 47.555110931396484, "learning_rate": 9.996437639674736e-06, "loss": 26.6041, "step": 663 }, { "epoch": 0.061624129930394435, "grad_norm": 45.49332046508789, "learning_rate": 9.996379486670898e-06, "loss": 25.9371, "step": 664 }, { "epoch": 0.0617169373549884, "grad_norm": 45.22975158691406, "learning_rate": 9.996320863020809e-06, "loss": 26.2957, "step": 665 }, { "epoch": 0.061809744779582364, "grad_norm": 49.64990997314453, "learning_rate": 9.99626176872999e-06, "loss": 25.3383, "step": 666 }, { "epoch": 0.061902552204176335, "grad_norm": 47.26007080078125, "learning_rate": 9.99620220380401e-06, "loss": 24.9923, "step": 667 }, { "epoch": 0.0619953596287703, "grad_norm": 45.39558792114258, "learning_rate": 9.996142168248474e-06, "loss": 27.8176, "step": 668 }, { "epoch": 0.06208816705336427, "grad_norm": 52.267086029052734, "learning_rate": 9.996081662069043e-06, "loss": 25.188, "step": 669 }, { "epoch": 0.062180974477958235, "grad_norm": 56.26968765258789, "learning_rate": 9.996020685271415e-06, "loss": 26.3431, "step": 670 }, { "epoch": 0.06227378190255221, "grad_norm": 47.63874816894531, "learning_rate": 9.995959237861334e-06, "loss": 25.8924, "step": 671 }, { "epoch": 0.06236658932714617, "grad_norm": 46.78752899169922, "learning_rate": 9.995897319844588e-06, "loss": 25.955, "step": 672 }, { "epoch": 0.062459396751740136, "grad_norm": 51.7826042175293, "learning_rate": 9.995834931227007e-06, "loss": 26.1306, "step": 673 }, { "epoch": 0.0625522041763341, "grad_norm": 44.2587776184082, "learning_rate": 9.995772072014472e-06, "loss": 27.8401, "step": 674 }, { "epoch": 0.06264501160092807, "grad_norm": 46.27628707885742, "learning_rate": 9.995708742212901e-06, "loss": 26.8081, "step": 675 }, { "epoch": 0.06273781902552204, "grad_norm": 45.98931121826172, "learning_rate": 9.995644941828262e-06, "loss": 27.4034, "step": 676 }, { "epoch": 0.06283062645011601, "grad_norm": 40.36174011230469, "learning_rate": 9.995580670866564e-06, "loss": 26.4772, "step": 677 }, { "epoch": 0.06292343387470997, "grad_norm": 47.23126983642578, "learning_rate": 9.99551592933386e-06, "loss": 26.7245, "step": 678 }, { "epoch": 0.06301624129930394, "grad_norm": 44.30828094482422, "learning_rate": 9.99545071723625e-06, "loss": 27.1339, "step": 679 }, { "epoch": 0.06310904872389791, "grad_norm": 45.92216110229492, "learning_rate": 9.995385034579874e-06, "loss": 26.3062, "step": 680 }, { "epoch": 0.06320185614849189, "grad_norm": 46.02046203613281, "learning_rate": 9.995318881370924e-06, "loss": 25.8815, "step": 681 }, { "epoch": 0.06329466357308584, "grad_norm": 58.29305648803711, "learning_rate": 9.995252257615626e-06, "loss": 25.7144, "step": 682 }, { "epoch": 0.06338747099767982, "grad_norm": 43.65707778930664, "learning_rate": 9.99518516332026e-06, "loss": 26.2596, "step": 683 }, { "epoch": 0.06348027842227379, "grad_norm": 46.07326889038086, "learning_rate": 9.995117598491146e-06, "loss": 26.8212, "step": 684 }, { "epoch": 0.06357308584686774, "grad_norm": 45.92272186279297, "learning_rate": 9.995049563134645e-06, "loss": 27.206, "step": 685 }, { "epoch": 0.06366589327146172, "grad_norm": 42.68109130859375, "learning_rate": 9.994981057257169e-06, "loss": 26.8586, "step": 686 }, { "epoch": 0.06375870069605569, "grad_norm": 42.621925354003906, "learning_rate": 9.99491208086517e-06, "loss": 27.1552, "step": 687 }, { "epoch": 0.06385150812064966, "grad_norm": 47.21017074584961, "learning_rate": 9.994842633965143e-06, "loss": 26.3317, "step": 688 }, { "epoch": 0.06394431554524362, "grad_norm": 42.35750198364258, "learning_rate": 9.994772716563634e-06, "loss": 27.0197, "step": 689 }, { "epoch": 0.06403712296983759, "grad_norm": 42.95231628417969, "learning_rate": 9.994702328667225e-06, "loss": 26.1448, "step": 690 }, { "epoch": 0.06412993039443156, "grad_norm": 39.669822692871094, "learning_rate": 9.99463147028255e-06, "loss": 25.7601, "step": 691 }, { "epoch": 0.06422273781902552, "grad_norm": 47.20755386352539, "learning_rate": 9.994560141416282e-06, "loss": 24.119, "step": 692 }, { "epoch": 0.06431554524361949, "grad_norm": 44.13092803955078, "learning_rate": 9.994488342075141e-06, "loss": 27.5712, "step": 693 }, { "epoch": 0.06440835266821346, "grad_norm": 43.5186653137207, "learning_rate": 9.994416072265889e-06, "loss": 24.2561, "step": 694 }, { "epoch": 0.06450116009280743, "grad_norm": 46.985450744628906, "learning_rate": 9.994343331995334e-06, "loss": 25.5779, "step": 695 }, { "epoch": 0.06459396751740139, "grad_norm": 47.350555419921875, "learning_rate": 9.994270121270327e-06, "loss": 27.138, "step": 696 }, { "epoch": 0.06468677494199536, "grad_norm": 45.7126350402832, "learning_rate": 9.994196440097766e-06, "loss": 27.1334, "step": 697 }, { "epoch": 0.06477958236658933, "grad_norm": 53.54954147338867, "learning_rate": 9.994122288484589e-06, "loss": 26.7312, "step": 698 }, { "epoch": 0.06487238979118329, "grad_norm": 44.654727935791016, "learning_rate": 9.994047666437784e-06, "loss": 25.4251, "step": 699 }, { "epoch": 0.06496519721577726, "grad_norm": 43.83708953857422, "learning_rate": 9.993972573964378e-06, "loss": 26.3195, "step": 700 }, { "epoch": 0.06505800464037123, "grad_norm": 45.290287017822266, "learning_rate": 9.993897011071447e-06, "loss": 26.3571, "step": 701 }, { "epoch": 0.0651508120649652, "grad_norm": 41.91029739379883, "learning_rate": 9.993820977766108e-06, "loss": 26.2762, "step": 702 }, { "epoch": 0.06524361948955916, "grad_norm": 43.118160247802734, "learning_rate": 9.99374447405552e-06, "loss": 25.8297, "step": 703 }, { "epoch": 0.06533642691415313, "grad_norm": 46.794864654541016, "learning_rate": 9.993667499946893e-06, "loss": 27.9978, "step": 704 }, { "epoch": 0.0654292343387471, "grad_norm": 44.40357208251953, "learning_rate": 9.993590055447478e-06, "loss": 27.4317, "step": 705 }, { "epoch": 0.06552204176334107, "grad_norm": 43.738956451416016, "learning_rate": 9.993512140564567e-06, "loss": 24.3975, "step": 706 }, { "epoch": 0.06561484918793503, "grad_norm": 45.269832611083984, "learning_rate": 9.993433755305502e-06, "loss": 27.049, "step": 707 }, { "epoch": 0.065707656612529, "grad_norm": 41.06672286987305, "learning_rate": 9.993354899677665e-06, "loss": 25.7771, "step": 708 }, { "epoch": 0.06580046403712297, "grad_norm": 58.156646728515625, "learning_rate": 9.993275573688484e-06, "loss": 26.2001, "step": 709 }, { "epoch": 0.06589327146171693, "grad_norm": 45.549400329589844, "learning_rate": 9.993195777345433e-06, "loss": 26.3721, "step": 710 }, { "epoch": 0.0659860788863109, "grad_norm": 42.68844985961914, "learning_rate": 9.993115510656029e-06, "loss": 24.5593, "step": 711 }, { "epoch": 0.06607888631090487, "grad_norm": 52.26963806152344, "learning_rate": 9.99303477362783e-06, "loss": 25.3071, "step": 712 }, { "epoch": 0.06617169373549885, "grad_norm": 43.60835266113281, "learning_rate": 9.992953566268444e-06, "loss": 26.7135, "step": 713 }, { "epoch": 0.0662645011600928, "grad_norm": 95.89286041259766, "learning_rate": 9.992871888585518e-06, "loss": 25.8958, "step": 714 }, { "epoch": 0.06635730858468677, "grad_norm": 47.257362365722656, "learning_rate": 9.992789740586747e-06, "loss": 28.1034, "step": 715 }, { "epoch": 0.06645011600928075, "grad_norm": 43.028621673583984, "learning_rate": 9.992707122279871e-06, "loss": 25.8571, "step": 716 }, { "epoch": 0.0665429234338747, "grad_norm": 43.874595642089844, "learning_rate": 9.99262403367267e-06, "loss": 26.4888, "step": 717 }, { "epoch": 0.06663573085846868, "grad_norm": 43.215396881103516, "learning_rate": 9.992540474772972e-06, "loss": 22.9325, "step": 718 }, { "epoch": 0.06672853828306265, "grad_norm": 44.14550018310547, "learning_rate": 9.992456445588647e-06, "loss": 25.5099, "step": 719 }, { "epoch": 0.06682134570765662, "grad_norm": 44.8072395324707, "learning_rate": 9.99237194612761e-06, "loss": 26.5893, "step": 720 }, { "epoch": 0.06691415313225058, "grad_norm": 47.157257080078125, "learning_rate": 9.992286976397822e-06, "loss": 26.3581, "step": 721 }, { "epoch": 0.06700696055684455, "grad_norm": 47.605491638183594, "learning_rate": 9.992201536407287e-06, "loss": 26.6371, "step": 722 }, { "epoch": 0.06709976798143852, "grad_norm": 39.633663177490234, "learning_rate": 9.992115626164053e-06, "loss": 25.2662, "step": 723 }, { "epoch": 0.06719257540603248, "grad_norm": 45.816524505615234, "learning_rate": 9.992029245676212e-06, "loss": 25.2839, "step": 724 }, { "epoch": 0.06728538283062645, "grad_norm": 44.07339096069336, "learning_rate": 9.9919423949519e-06, "loss": 25.6508, "step": 725 }, { "epoch": 0.06737819025522042, "grad_norm": 42.98438262939453, "learning_rate": 9.991855073999299e-06, "loss": 26.2662, "step": 726 }, { "epoch": 0.06747099767981439, "grad_norm": 42.57439041137695, "learning_rate": 9.991767282826637e-06, "loss": 25.9565, "step": 727 }, { "epoch": 0.06756380510440835, "grad_norm": 54.87861251831055, "learning_rate": 9.991679021442179e-06, "loss": 25.1307, "step": 728 }, { "epoch": 0.06765661252900232, "grad_norm": 44.54753494262695, "learning_rate": 9.991590289854243e-06, "loss": 25.4303, "step": 729 }, { "epoch": 0.06774941995359629, "grad_norm": 40.55219268798828, "learning_rate": 9.991501088071183e-06, "loss": 25.2559, "step": 730 }, { "epoch": 0.06784222737819026, "grad_norm": 43.994380950927734, "learning_rate": 9.991411416101406e-06, "loss": 23.7254, "step": 731 }, { "epoch": 0.06793503480278422, "grad_norm": 45.821346282958984, "learning_rate": 9.991321273953357e-06, "loss": 26.375, "step": 732 }, { "epoch": 0.06802784222737819, "grad_norm": 41.12624740600586, "learning_rate": 9.991230661635527e-06, "loss": 25.9776, "step": 733 }, { "epoch": 0.06812064965197216, "grad_norm": 44.26263427734375, "learning_rate": 9.99113957915645e-06, "loss": 27.0722, "step": 734 }, { "epoch": 0.06821345707656612, "grad_norm": 42.35219192504883, "learning_rate": 9.991048026524709e-06, "loss": 26.1515, "step": 735 }, { "epoch": 0.06830626450116009, "grad_norm": 41.34218978881836, "learning_rate": 9.990956003748927e-06, "loss": 26.5705, "step": 736 }, { "epoch": 0.06839907192575406, "grad_norm": 42.22749328613281, "learning_rate": 9.99086351083777e-06, "loss": 25.4608, "step": 737 }, { "epoch": 0.06849187935034803, "grad_norm": 44.1505126953125, "learning_rate": 9.990770547799953e-06, "loss": 25.8255, "step": 738 }, { "epoch": 0.06858468677494199, "grad_norm": 42.42935562133789, "learning_rate": 9.990677114644232e-06, "loss": 25.3785, "step": 739 }, { "epoch": 0.06867749419953596, "grad_norm": 47.90282440185547, "learning_rate": 9.990583211379409e-06, "loss": 25.2657, "step": 740 }, { "epoch": 0.06877030162412993, "grad_norm": 40.096710205078125, "learning_rate": 9.990488838014327e-06, "loss": 25.4303, "step": 741 }, { "epoch": 0.06886310904872389, "grad_norm": 43.386600494384766, "learning_rate": 9.99039399455788e-06, "loss": 26.854, "step": 742 }, { "epoch": 0.06895591647331786, "grad_norm": 42.71397018432617, "learning_rate": 9.990298681018997e-06, "loss": 27.9087, "step": 743 }, { "epoch": 0.06904872389791183, "grad_norm": 44.468997955322266, "learning_rate": 9.99020289740666e-06, "loss": 27.5274, "step": 744 }, { "epoch": 0.0691415313225058, "grad_norm": 45.73862838745117, "learning_rate": 9.99010664372989e-06, "loss": 25.7921, "step": 745 }, { "epoch": 0.06923433874709976, "grad_norm": 42.979393005371094, "learning_rate": 9.990009919997757e-06, "loss": 25.3859, "step": 746 }, { "epoch": 0.06932714617169373, "grad_norm": 40.24946594238281, "learning_rate": 9.989912726219366e-06, "loss": 27.5866, "step": 747 }, { "epoch": 0.0694199535962877, "grad_norm": 41.25667190551758, "learning_rate": 9.989815062403877e-06, "loss": 24.3589, "step": 748 }, { "epoch": 0.06951276102088166, "grad_norm": 41.87077331542969, "learning_rate": 9.98971692856049e-06, "loss": 25.842, "step": 749 }, { "epoch": 0.06960556844547564, "grad_norm": 42.393211364746094, "learning_rate": 9.989618324698445e-06, "loss": 25.5157, "step": 750 }, { "epoch": 0.0696983758700696, "grad_norm": 45.53319549560547, "learning_rate": 9.989519250827034e-06, "loss": 27.4149, "step": 751 }, { "epoch": 0.06979118329466358, "grad_norm": 47.38347244262695, "learning_rate": 9.989419706955587e-06, "loss": 26.7994, "step": 752 }, { "epoch": 0.06988399071925754, "grad_norm": 78.79161834716797, "learning_rate": 9.989319693093482e-06, "loss": 28.8707, "step": 753 }, { "epoch": 0.0699767981438515, "grad_norm": 41.47624588012695, "learning_rate": 9.989219209250141e-06, "loss": 23.0445, "step": 754 }, { "epoch": 0.07006960556844548, "grad_norm": 45.649051666259766, "learning_rate": 9.989118255435026e-06, "loss": 25.0006, "step": 755 }, { "epoch": 0.07016241299303945, "grad_norm": 41.510990142822266, "learning_rate": 9.989016831657652e-06, "loss": 26.3722, "step": 756 }, { "epoch": 0.07025522041763341, "grad_norm": 41.753395080566406, "learning_rate": 9.988914937927567e-06, "loss": 28.1611, "step": 757 }, { "epoch": 0.07034802784222738, "grad_norm": 45.23395919799805, "learning_rate": 9.988812574254373e-06, "loss": 26.8512, "step": 758 }, { "epoch": 0.07044083526682135, "grad_norm": 45.52503967285156, "learning_rate": 9.988709740647712e-06, "loss": 24.822, "step": 759 }, { "epoch": 0.07053364269141531, "grad_norm": 56.229957580566406, "learning_rate": 9.98860643711727e-06, "loss": 25.8956, "step": 760 }, { "epoch": 0.07062645011600928, "grad_norm": 44.05463409423828, "learning_rate": 9.988502663672779e-06, "loss": 25.7768, "step": 761 }, { "epoch": 0.07071925754060325, "grad_norm": 68.61372375488281, "learning_rate": 9.98839842032401e-06, "loss": 27.4181, "step": 762 }, { "epoch": 0.07081206496519722, "grad_norm": 48.124412536621094, "learning_rate": 9.988293707080786e-06, "loss": 25.5765, "step": 763 }, { "epoch": 0.07090487238979118, "grad_norm": 45.390968322753906, "learning_rate": 9.988188523952973e-06, "loss": 25.6834, "step": 764 }, { "epoch": 0.07099767981438515, "grad_norm": 41.71124267578125, "learning_rate": 9.988082870950475e-06, "loss": 26.6502, "step": 765 }, { "epoch": 0.07109048723897912, "grad_norm": 45.47574234008789, "learning_rate": 9.987976748083245e-06, "loss": 25.901, "step": 766 }, { "epoch": 0.07118329466357308, "grad_norm": 47.6080207824707, "learning_rate": 9.98787015536128e-06, "loss": 27.2466, "step": 767 }, { "epoch": 0.07127610208816705, "grad_norm": 51.282039642333984, "learning_rate": 9.987763092794621e-06, "loss": 28.5481, "step": 768 }, { "epoch": 0.07136890951276102, "grad_norm": 39.81412124633789, "learning_rate": 9.987655560393352e-06, "loss": 26.2018, "step": 769 }, { "epoch": 0.071461716937355, "grad_norm": 44.48371505737305, "learning_rate": 9.987547558167605e-06, "loss": 26.0234, "step": 770 }, { "epoch": 0.07155452436194895, "grad_norm": 55.80498123168945, "learning_rate": 9.98743908612755e-06, "loss": 25.9543, "step": 771 }, { "epoch": 0.07164733178654292, "grad_norm": 43.405433654785156, "learning_rate": 9.987330144283406e-06, "loss": 24.1773, "step": 772 }, { "epoch": 0.0717401392111369, "grad_norm": 44.29122543334961, "learning_rate": 9.987220732645437e-06, "loss": 25.4989, "step": 773 }, { "epoch": 0.07183294663573085, "grad_norm": 47.320003509521484, "learning_rate": 9.987110851223946e-06, "loss": 24.2864, "step": 774 }, { "epoch": 0.07192575406032482, "grad_norm": 43.89453125, "learning_rate": 9.987000500029286e-06, "loss": 27.2304, "step": 775 }, { "epoch": 0.0720185614849188, "grad_norm": 43.772254943847656, "learning_rate": 9.986889679071853e-06, "loss": 25.8106, "step": 776 }, { "epoch": 0.07211136890951277, "grad_norm": 48.448692321777344, "learning_rate": 9.986778388362081e-06, "loss": 28.067, "step": 777 }, { "epoch": 0.07220417633410672, "grad_norm": 42.20036697387695, "learning_rate": 9.986666627910458e-06, "loss": 24.906, "step": 778 }, { "epoch": 0.0722969837587007, "grad_norm": 40.532073974609375, "learning_rate": 9.98655439772751e-06, "loss": 26.2567, "step": 779 }, { "epoch": 0.07238979118329467, "grad_norm": 88.12779235839844, "learning_rate": 9.986441697823808e-06, "loss": 28.9652, "step": 780 }, { "epoch": 0.07248259860788864, "grad_norm": 41.58530807495117, "learning_rate": 9.986328528209969e-06, "loss": 23.6536, "step": 781 }, { "epoch": 0.0725754060324826, "grad_norm": 43.70457077026367, "learning_rate": 9.986214888896656e-06, "loss": 25.7861, "step": 782 }, { "epoch": 0.07266821345707657, "grad_norm": 44.11785888671875, "learning_rate": 9.986100779894567e-06, "loss": 25.3344, "step": 783 }, { "epoch": 0.07276102088167054, "grad_norm": 44.30296325683594, "learning_rate": 9.985986201214454e-06, "loss": 25.8325, "step": 784 }, { "epoch": 0.0728538283062645, "grad_norm": 42.55406951904297, "learning_rate": 9.985871152867112e-06, "loss": 27.3989, "step": 785 }, { "epoch": 0.07294663573085847, "grad_norm": 44.36310577392578, "learning_rate": 9.985755634863378e-06, "loss": 26.6124, "step": 786 }, { "epoch": 0.07303944315545244, "grad_norm": 49.718833923339844, "learning_rate": 9.985639647214131e-06, "loss": 26.9042, "step": 787 }, { "epoch": 0.07313225058004641, "grad_norm": 50.33287048339844, "learning_rate": 9.985523189930297e-06, "loss": 28.1963, "step": 788 }, { "epoch": 0.07322505800464037, "grad_norm": 43.54338073730469, "learning_rate": 9.985406263022848e-06, "loss": 25.39, "step": 789 }, { "epoch": 0.07331786542923434, "grad_norm": 40.95704650878906, "learning_rate": 9.985288866502799e-06, "loss": 25.7739, "step": 790 }, { "epoch": 0.07341067285382831, "grad_norm": 46.00676345825195, "learning_rate": 9.985171000381203e-06, "loss": 25.2911, "step": 791 }, { "epoch": 0.07350348027842227, "grad_norm": 40.94026184082031, "learning_rate": 9.985052664669168e-06, "loss": 27.0663, "step": 792 }, { "epoch": 0.07359628770301624, "grad_norm": 52.88737869262695, "learning_rate": 9.98493385937784e-06, "loss": 28.3733, "step": 793 }, { "epoch": 0.07368909512761021, "grad_norm": 42.54511642456055, "learning_rate": 9.984814584518407e-06, "loss": 26.3828, "step": 794 }, { "epoch": 0.07378190255220418, "grad_norm": 41.5316276550293, "learning_rate": 9.98469484010211e-06, "loss": 27.759, "step": 795 }, { "epoch": 0.07387470997679814, "grad_norm": 61.55472183227539, "learning_rate": 9.984574626140224e-06, "loss": 27.9351, "step": 796 }, { "epoch": 0.07396751740139211, "grad_norm": 43.73590850830078, "learning_rate": 9.984453942644072e-06, "loss": 26.6522, "step": 797 }, { "epoch": 0.07406032482598608, "grad_norm": 43.20869827270508, "learning_rate": 9.984332789625026e-06, "loss": 26.6943, "step": 798 }, { "epoch": 0.07415313225058005, "grad_norm": 42.27471160888672, "learning_rate": 9.984211167094498e-06, "loss": 27.0534, "step": 799 }, { "epoch": 0.07424593967517401, "grad_norm": 41.438716888427734, "learning_rate": 9.984089075063943e-06, "loss": 26.4449, "step": 800 }, { "epoch": 0.07433874709976798, "grad_norm": 43.7566032409668, "learning_rate": 9.983966513544861e-06, "loss": 26.7725, "step": 801 }, { "epoch": 0.07443155452436195, "grad_norm": 43.113182067871094, "learning_rate": 9.983843482548797e-06, "loss": 26.0461, "step": 802 }, { "epoch": 0.07452436194895591, "grad_norm": 52.87562942504883, "learning_rate": 9.983719982087342e-06, "loss": 26.3594, "step": 803 }, { "epoch": 0.07461716937354988, "grad_norm": 47.445796966552734, "learning_rate": 9.983596012172127e-06, "loss": 26.9438, "step": 804 }, { "epoch": 0.07470997679814385, "grad_norm": 44.851280212402344, "learning_rate": 9.983471572814834e-06, "loss": 27.3039, "step": 805 }, { "epoch": 0.07480278422273783, "grad_norm": 42.192073822021484, "learning_rate": 9.98334666402718e-06, "loss": 25.9077, "step": 806 }, { "epoch": 0.07489559164733178, "grad_norm": 45.680198669433594, "learning_rate": 9.983221285820935e-06, "loss": 24.6434, "step": 807 }, { "epoch": 0.07498839907192575, "grad_norm": 46.06507873535156, "learning_rate": 9.983095438207907e-06, "loss": 26.8709, "step": 808 }, { "epoch": 0.07508120649651973, "grad_norm": 107.0169448852539, "learning_rate": 9.98296912119995e-06, "loss": 24.7668, "step": 809 }, { "epoch": 0.07517401392111368, "grad_norm": 45.62285232543945, "learning_rate": 9.982842334808965e-06, "loss": 24.5329, "step": 810 }, { "epoch": 0.07526682134570765, "grad_norm": 41.14036178588867, "learning_rate": 9.982715079046891e-06, "loss": 26.3357, "step": 811 }, { "epoch": 0.07535962877030163, "grad_norm": 43.84963607788086, "learning_rate": 9.982587353925718e-06, "loss": 25.541, "step": 812 }, { "epoch": 0.0754524361948956, "grad_norm": 44.02555465698242, "learning_rate": 9.98245915945748e-06, "loss": 25.3708, "step": 813 }, { "epoch": 0.07554524361948955, "grad_norm": 42.37523651123047, "learning_rate": 9.98233049565425e-06, "loss": 26.2432, "step": 814 }, { "epoch": 0.07563805104408353, "grad_norm": 41.18764877319336, "learning_rate": 9.982201362528145e-06, "loss": 26.0449, "step": 815 }, { "epoch": 0.0757308584686775, "grad_norm": 42.298095703125, "learning_rate": 9.982071760091334e-06, "loss": 24.2959, "step": 816 }, { "epoch": 0.07582366589327146, "grad_norm": 45.26048278808594, "learning_rate": 9.981941688356021e-06, "loss": 25.9123, "step": 817 }, { "epoch": 0.07591647331786543, "grad_norm": 57.23806381225586, "learning_rate": 9.981811147334461e-06, "loss": 24.8194, "step": 818 }, { "epoch": 0.0760092807424594, "grad_norm": 42.74605941772461, "learning_rate": 9.98168013703895e-06, "loss": 27.3633, "step": 819 }, { "epoch": 0.07610208816705337, "grad_norm": 74.3815689086914, "learning_rate": 9.98154865748183e-06, "loss": 26.8538, "step": 820 }, { "epoch": 0.07619489559164733, "grad_norm": 43.653804779052734, "learning_rate": 9.981416708675482e-06, "loss": 23.6346, "step": 821 }, { "epoch": 0.0762877030162413, "grad_norm": 39.459259033203125, "learning_rate": 9.98128429063234e-06, "loss": 25.7623, "step": 822 }, { "epoch": 0.07638051044083527, "grad_norm": 39.230709075927734, "learning_rate": 9.981151403364875e-06, "loss": 25.5991, "step": 823 }, { "epoch": 0.07647331786542924, "grad_norm": 43.81565475463867, "learning_rate": 9.981018046885604e-06, "loss": 23.4596, "step": 824 }, { "epoch": 0.0765661252900232, "grad_norm": 41.37834167480469, "learning_rate": 9.980884221207092e-06, "loss": 27.1544, "step": 825 }, { "epoch": 0.07665893271461717, "grad_norm": 46.777503967285156, "learning_rate": 9.980749926341941e-06, "loss": 26.7976, "step": 826 }, { "epoch": 0.07675174013921114, "grad_norm": 41.392974853515625, "learning_rate": 9.980615162302805e-06, "loss": 25.3926, "step": 827 }, { "epoch": 0.0768445475638051, "grad_norm": 40.96089553833008, "learning_rate": 9.980479929102377e-06, "loss": 25.5038, "step": 828 }, { "epoch": 0.07693735498839907, "grad_norm": 41.49699020385742, "learning_rate": 9.980344226753394e-06, "loss": 24.6684, "step": 829 }, { "epoch": 0.07703016241299304, "grad_norm": 62.19173049926758, "learning_rate": 9.98020805526864e-06, "loss": 28.7906, "step": 830 }, { "epoch": 0.07712296983758701, "grad_norm": 42.46182632446289, "learning_rate": 9.980071414660943e-06, "loss": 25.4072, "step": 831 }, { "epoch": 0.07721577726218097, "grad_norm": 43.36638641357422, "learning_rate": 9.979934304943173e-06, "loss": 24.6917, "step": 832 }, { "epoch": 0.07730858468677494, "grad_norm": 44.776634216308594, "learning_rate": 9.979796726128247e-06, "loss": 27.7648, "step": 833 }, { "epoch": 0.07740139211136891, "grad_norm": 52.9946403503418, "learning_rate": 9.97965867822912e-06, "loss": 26.5867, "step": 834 }, { "epoch": 0.07749419953596287, "grad_norm": 43.181678771972656, "learning_rate": 9.979520161258803e-06, "loss": 25.9193, "step": 835 }, { "epoch": 0.07758700696055684, "grad_norm": 80.4910659790039, "learning_rate": 9.979381175230337e-06, "loss": 25.6804, "step": 836 }, { "epoch": 0.07767981438515081, "grad_norm": 58.30242156982422, "learning_rate": 9.979241720156819e-06, "loss": 26.155, "step": 837 }, { "epoch": 0.07777262180974479, "grad_norm": 40.70249557495117, "learning_rate": 9.979101796051381e-06, "loss": 25.9569, "step": 838 }, { "epoch": 0.07786542923433874, "grad_norm": 40.74345779418945, "learning_rate": 9.978961402927207e-06, "loss": 26.5974, "step": 839 }, { "epoch": 0.07795823665893271, "grad_norm": 42.590911865234375, "learning_rate": 9.978820540797521e-06, "loss": 25.5856, "step": 840 }, { "epoch": 0.07805104408352669, "grad_norm": 45.096153259277344, "learning_rate": 9.978679209675592e-06, "loss": 27.8284, "step": 841 }, { "epoch": 0.07814385150812064, "grad_norm": 41.154579162597656, "learning_rate": 9.97853740957473e-06, "loss": 25.5223, "step": 842 }, { "epoch": 0.07823665893271461, "grad_norm": 46.90921401977539, "learning_rate": 9.978395140508297e-06, "loss": 26.0943, "step": 843 }, { "epoch": 0.07832946635730859, "grad_norm": 54.77299118041992, "learning_rate": 9.978252402489692e-06, "loss": 26.9574, "step": 844 }, { "epoch": 0.07842227378190256, "grad_norm": 44.993839263916016, "learning_rate": 9.978109195532362e-06, "loss": 26.8262, "step": 845 }, { "epoch": 0.07851508120649651, "grad_norm": 39.573875427246094, "learning_rate": 9.977965519649793e-06, "loss": 26.1252, "step": 846 }, { "epoch": 0.07860788863109049, "grad_norm": 39.699378967285156, "learning_rate": 9.977821374855522e-06, "loss": 26.5146, "step": 847 }, { "epoch": 0.07870069605568446, "grad_norm": 42.58958435058594, "learning_rate": 9.977676761163126e-06, "loss": 26.99, "step": 848 }, { "epoch": 0.07879350348027843, "grad_norm": 46.3494758605957, "learning_rate": 9.977531678586228e-06, "loss": 27.6221, "step": 849 }, { "epoch": 0.07888631090487239, "grad_norm": 40.60935974121094, "learning_rate": 9.977386127138494e-06, "loss": 26.7194, "step": 850 }, { "epoch": 0.07897911832946636, "grad_norm": 42.253684997558594, "learning_rate": 9.977240106833634e-06, "loss": 26.0433, "step": 851 }, { "epoch": 0.07907192575406033, "grad_norm": 63.886932373046875, "learning_rate": 9.977093617685404e-06, "loss": 27.5439, "step": 852 }, { "epoch": 0.07916473317865429, "grad_norm": 63.80686950683594, "learning_rate": 9.976946659707603e-06, "loss": 28.0335, "step": 853 }, { "epoch": 0.07925754060324826, "grad_norm": 41.12339401245117, "learning_rate": 9.976799232914072e-06, "loss": 24.7374, "step": 854 }, { "epoch": 0.07935034802784223, "grad_norm": 45.83350372314453, "learning_rate": 9.9766513373187e-06, "loss": 24.661, "step": 855 }, { "epoch": 0.0794431554524362, "grad_norm": 58.88325119018555, "learning_rate": 9.976502972935419e-06, "loss": 26.4983, "step": 856 }, { "epoch": 0.07953596287703016, "grad_norm": 43.5601921081543, "learning_rate": 9.976354139778201e-06, "loss": 25.787, "step": 857 }, { "epoch": 0.07962877030162413, "grad_norm": 42.875091552734375, "learning_rate": 9.976204837861068e-06, "loss": 26.6524, "step": 858 }, { "epoch": 0.0797215777262181, "grad_norm": 39.8953971862793, "learning_rate": 9.976055067198086e-06, "loss": 25.7736, "step": 859 }, { "epoch": 0.07981438515081206, "grad_norm": 45.76995849609375, "learning_rate": 9.97590482780336e-06, "loss": 25.3295, "step": 860 }, { "epoch": 0.07990719257540603, "grad_norm": 44.39082717895508, "learning_rate": 9.975754119691044e-06, "loss": 24.995, "step": 861 }, { "epoch": 0.08, "grad_norm": 41.14795684814453, "learning_rate": 9.975602942875334e-06, "loss": 24.1387, "step": 862 }, { "epoch": 0.08009280742459397, "grad_norm": 47.00731658935547, "learning_rate": 9.975451297370468e-06, "loss": 25.984, "step": 863 }, { "epoch": 0.08018561484918793, "grad_norm": 41.26708984375, "learning_rate": 9.975299183190734e-06, "loss": 25.938, "step": 864 }, { "epoch": 0.0802784222737819, "grad_norm": 41.78102493286133, "learning_rate": 9.97514660035046e-06, "loss": 25.7941, "step": 865 }, { "epoch": 0.08037122969837587, "grad_norm": 43.519989013671875, "learning_rate": 9.974993548864017e-06, "loss": 26.4979, "step": 866 }, { "epoch": 0.08046403712296983, "grad_norm": 42.812347412109375, "learning_rate": 9.974840028745824e-06, "loss": 27.1062, "step": 867 }, { "epoch": 0.0805568445475638, "grad_norm": 51.03192901611328, "learning_rate": 9.974686040010341e-06, "loss": 27.9919, "step": 868 }, { "epoch": 0.08064965197215777, "grad_norm": 42.40848922729492, "learning_rate": 9.974531582672076e-06, "loss": 27.2872, "step": 869 }, { "epoch": 0.08074245939675175, "grad_norm": 40.989990234375, "learning_rate": 9.974376656745574e-06, "loss": 24.2372, "step": 870 }, { "epoch": 0.0808352668213457, "grad_norm": 46.582008361816406, "learning_rate": 9.974221262245432e-06, "loss": 26.1062, "step": 871 }, { "epoch": 0.08092807424593967, "grad_norm": 43.27277374267578, "learning_rate": 9.974065399186285e-06, "loss": 25.4426, "step": 872 }, { "epoch": 0.08102088167053365, "grad_norm": 42.997947692871094, "learning_rate": 9.97390906758282e-06, "loss": 26.5827, "step": 873 }, { "epoch": 0.08111368909512762, "grad_norm": 39.89257049560547, "learning_rate": 9.973752267449758e-06, "loss": 26.1517, "step": 874 }, { "epoch": 0.08120649651972157, "grad_norm": 45.228179931640625, "learning_rate": 9.97359499880187e-06, "loss": 23.4427, "step": 875 }, { "epoch": 0.08129930394431555, "grad_norm": 49.68173599243164, "learning_rate": 9.973437261653973e-06, "loss": 26.9313, "step": 876 }, { "epoch": 0.08139211136890952, "grad_norm": 41.4825439453125, "learning_rate": 9.973279056020924e-06, "loss": 26.0859, "step": 877 }, { "epoch": 0.08148491879350347, "grad_norm": 58.59028244018555, "learning_rate": 9.973120381917624e-06, "loss": 26.8578, "step": 878 }, { "epoch": 0.08157772621809745, "grad_norm": 44.624568939208984, "learning_rate": 9.97296123935902e-06, "loss": 27.3801, "step": 879 }, { "epoch": 0.08167053364269142, "grad_norm": 49.461151123046875, "learning_rate": 9.972801628360104e-06, "loss": 26.3999, "step": 880 }, { "epoch": 0.08176334106728539, "grad_norm": 43.184532165527344, "learning_rate": 9.972641548935913e-06, "loss": 24.9673, "step": 881 }, { "epoch": 0.08185614849187935, "grad_norm": 38.73515319824219, "learning_rate": 9.972481001101523e-06, "loss": 24.8242, "step": 882 }, { "epoch": 0.08194895591647332, "grad_norm": 40.76364517211914, "learning_rate": 9.972319984872057e-06, "loss": 24.9236, "step": 883 }, { "epoch": 0.08204176334106729, "grad_norm": 43.23008346557617, "learning_rate": 9.972158500262683e-06, "loss": 24.6481, "step": 884 }, { "epoch": 0.08213457076566125, "grad_norm": 41.29668426513672, "learning_rate": 9.971996547288614e-06, "loss": 27.6062, "step": 885 }, { "epoch": 0.08222737819025522, "grad_norm": 41.039180755615234, "learning_rate": 9.971834125965103e-06, "loss": 25.1404, "step": 886 }, { "epoch": 0.08232018561484919, "grad_norm": 45.01261901855469, "learning_rate": 9.971671236307453e-06, "loss": 26.7494, "step": 887 }, { "epoch": 0.08241299303944316, "grad_norm": 42.501068115234375, "learning_rate": 9.971507878331005e-06, "loss": 25.4627, "step": 888 }, { "epoch": 0.08250580046403712, "grad_norm": 42.91053009033203, "learning_rate": 9.971344052051146e-06, "loss": 27.0187, "step": 889 }, { "epoch": 0.08259860788863109, "grad_norm": 37.44386672973633, "learning_rate": 9.97117975748331e-06, "loss": 23.4955, "step": 890 }, { "epoch": 0.08269141531322506, "grad_norm": 44.03499221801758, "learning_rate": 9.971014994642975e-06, "loss": 25.5556, "step": 891 }, { "epoch": 0.08278422273781902, "grad_norm": 43.36013412475586, "learning_rate": 9.970849763545658e-06, "loss": 25.7917, "step": 892 }, { "epoch": 0.08287703016241299, "grad_norm": 44.62901306152344, "learning_rate": 9.970684064206924e-06, "loss": 26.1079, "step": 893 }, { "epoch": 0.08296983758700696, "grad_norm": 39.78826141357422, "learning_rate": 9.970517896642382e-06, "loss": 25.3983, "step": 894 }, { "epoch": 0.08306264501160093, "grad_norm": 41.461708068847656, "learning_rate": 9.970351260867684e-06, "loss": 26.2242, "step": 895 }, { "epoch": 0.08315545243619489, "grad_norm": 40.312129974365234, "learning_rate": 9.970184156898528e-06, "loss": 26.7601, "step": 896 }, { "epoch": 0.08324825986078886, "grad_norm": 41.53472900390625, "learning_rate": 9.970016584750652e-06, "loss": 26.0327, "step": 897 }, { "epoch": 0.08334106728538283, "grad_norm": 46.245121002197266, "learning_rate": 9.969848544439846e-06, "loss": 25.3184, "step": 898 }, { "epoch": 0.0834338747099768, "grad_norm": 42.61747741699219, "learning_rate": 9.969680035981933e-06, "loss": 25.7862, "step": 899 }, { "epoch": 0.08352668213457076, "grad_norm": 42.99306869506836, "learning_rate": 9.96951105939279e-06, "loss": 23.8167, "step": 900 }, { "epoch": 0.08361948955916473, "grad_norm": 39.14652633666992, "learning_rate": 9.969341614688332e-06, "loss": 24.353, "step": 901 }, { "epoch": 0.0837122969837587, "grad_norm": 59.150856018066406, "learning_rate": 9.969171701884524e-06, "loss": 25.5149, "step": 902 }, { "epoch": 0.08380510440835266, "grad_norm": 44.99458312988281, "learning_rate": 9.969001320997364e-06, "loss": 26.0794, "step": 903 }, { "epoch": 0.08389791183294663, "grad_norm": 42.04606628417969, "learning_rate": 9.96883047204291e-06, "loss": 26.0475, "step": 904 }, { "epoch": 0.0839907192575406, "grad_norm": 43.250850677490234, "learning_rate": 9.968659155037251e-06, "loss": 23.3065, "step": 905 }, { "epoch": 0.08408352668213458, "grad_norm": 41.3345832824707, "learning_rate": 9.968487369996523e-06, "loss": 24.7944, "step": 906 }, { "epoch": 0.08417633410672853, "grad_norm": 41.92021560668945, "learning_rate": 9.96831511693691e-06, "loss": 26.6294, "step": 907 }, { "epoch": 0.0842691415313225, "grad_norm": 43.007144927978516, "learning_rate": 9.968142395874641e-06, "loss": 26.4266, "step": 908 }, { "epoch": 0.08436194895591648, "grad_norm": 45.510494232177734, "learning_rate": 9.967969206825978e-06, "loss": 26.5762, "step": 909 }, { "epoch": 0.08445475638051043, "grad_norm": 49.67036819458008, "learning_rate": 9.967795549807242e-06, "loss": 26.4398, "step": 910 }, { "epoch": 0.0845475638051044, "grad_norm": 42.756587982177734, "learning_rate": 9.967621424834789e-06, "loss": 28.4416, "step": 911 }, { "epoch": 0.08464037122969838, "grad_norm": 39.61588668823242, "learning_rate": 9.96744683192502e-06, "loss": 25.1884, "step": 912 }, { "epoch": 0.08473317865429235, "grad_norm": 39.864139556884766, "learning_rate": 9.967271771094383e-06, "loss": 24.5748, "step": 913 }, { "epoch": 0.0848259860788863, "grad_norm": 37.994407653808594, "learning_rate": 9.967096242359366e-06, "loss": 22.6578, "step": 914 }, { "epoch": 0.08491879350348028, "grad_norm": 41.76736831665039, "learning_rate": 9.966920245736505e-06, "loss": 25.8609, "step": 915 }, { "epoch": 0.08501160092807425, "grad_norm": 40.66873550415039, "learning_rate": 9.966743781242379e-06, "loss": 24.126, "step": 916 }, { "epoch": 0.0851044083526682, "grad_norm": 45.6970329284668, "learning_rate": 9.966566848893607e-06, "loss": 25.5534, "step": 917 }, { "epoch": 0.08519721577726218, "grad_norm": 42.69925308227539, "learning_rate": 9.966389448706859e-06, "loss": 25.2218, "step": 918 }, { "epoch": 0.08529002320185615, "grad_norm": 42.219573974609375, "learning_rate": 9.966211580698846e-06, "loss": 26.6023, "step": 919 }, { "epoch": 0.08538283062645012, "grad_norm": 82.19343566894531, "learning_rate": 9.96603324488632e-06, "loss": 25.8419, "step": 920 }, { "epoch": 0.08547563805104408, "grad_norm": 44.579952239990234, "learning_rate": 9.965854441286083e-06, "loss": 27.166, "step": 921 }, { "epoch": 0.08556844547563805, "grad_norm": 38.69047164916992, "learning_rate": 9.965675169914975e-06, "loss": 24.1195, "step": 922 }, { "epoch": 0.08566125290023202, "grad_norm": 40.23135757446289, "learning_rate": 9.965495430789884e-06, "loss": 24.7474, "step": 923 }, { "epoch": 0.08575406032482599, "grad_norm": 42.32756805419922, "learning_rate": 9.96531522392774e-06, "loss": 26.2373, "step": 924 }, { "epoch": 0.08584686774941995, "grad_norm": 40.023746490478516, "learning_rate": 9.96513454934552e-06, "loss": 26.5326, "step": 925 }, { "epoch": 0.08593967517401392, "grad_norm": 44.15977096557617, "learning_rate": 9.964953407060243e-06, "loss": 25.7848, "step": 926 }, { "epoch": 0.08603248259860789, "grad_norm": 43.434234619140625, "learning_rate": 9.96477179708897e-06, "loss": 23.7024, "step": 927 }, { "epoch": 0.08612529002320185, "grad_norm": 44.18355941772461, "learning_rate": 9.964589719448809e-06, "loss": 26.7971, "step": 928 }, { "epoch": 0.08621809744779582, "grad_norm": 41.65182113647461, "learning_rate": 9.964407174156914e-06, "loss": 24.7901, "step": 929 }, { "epoch": 0.0863109048723898, "grad_norm": 39.04987716674805, "learning_rate": 9.964224161230476e-06, "loss": 25.2123, "step": 930 }, { "epoch": 0.08640371229698376, "grad_norm": 40.5372314453125, "learning_rate": 9.964040680686734e-06, "loss": 26.841, "step": 931 }, { "epoch": 0.08649651972157772, "grad_norm": 44.93692398071289, "learning_rate": 9.963856732542978e-06, "loss": 25.6384, "step": 932 }, { "epoch": 0.0865893271461717, "grad_norm": 39.70260238647461, "learning_rate": 9.96367231681653e-06, "loss": 25.0323, "step": 933 }, { "epoch": 0.08668213457076566, "grad_norm": 39.79603576660156, "learning_rate": 9.963487433524762e-06, "loss": 24.2203, "step": 934 }, { "epoch": 0.08677494199535962, "grad_norm": 103.98867797851562, "learning_rate": 9.96330208268509e-06, "loss": 25.5444, "step": 935 }, { "epoch": 0.0868677494199536, "grad_norm": 62.77717590332031, "learning_rate": 9.963116264314974e-06, "loss": 25.1612, "step": 936 }, { "epoch": 0.08696055684454757, "grad_norm": 42.638484954833984, "learning_rate": 9.962929978431918e-06, "loss": 26.0391, "step": 937 }, { "epoch": 0.08705336426914154, "grad_norm": 41.83324432373047, "learning_rate": 9.962743225053469e-06, "loss": 27.2756, "step": 938 }, { "epoch": 0.0871461716937355, "grad_norm": 43.04057312011719, "learning_rate": 9.962556004197217e-06, "loss": 26.5316, "step": 939 }, { "epoch": 0.08723897911832947, "grad_norm": 38.77253341674805, "learning_rate": 9.9623683158808e-06, "loss": 25.1674, "step": 940 }, { "epoch": 0.08733178654292344, "grad_norm": 39.74100875854492, "learning_rate": 9.9621801601219e-06, "loss": 23.6483, "step": 941 }, { "epoch": 0.08742459396751741, "grad_norm": 39.33729934692383, "learning_rate": 9.961991536938237e-06, "loss": 25.0438, "step": 942 }, { "epoch": 0.08751740139211137, "grad_norm": 40.23271942138672, "learning_rate": 9.961802446347578e-06, "loss": 24.7382, "step": 943 }, { "epoch": 0.08761020881670534, "grad_norm": 40.25589370727539, "learning_rate": 9.961612888367737e-06, "loss": 24.6863, "step": 944 }, { "epoch": 0.08770301624129931, "grad_norm": 43.59027862548828, "learning_rate": 9.961422863016572e-06, "loss": 25.2433, "step": 945 }, { "epoch": 0.08779582366589327, "grad_norm": 38.05546569824219, "learning_rate": 9.961232370311977e-06, "loss": 24.7533, "step": 946 }, { "epoch": 0.08788863109048724, "grad_norm": 48.195091247558594, "learning_rate": 9.961041410271903e-06, "loss": 26.6975, "step": 947 }, { "epoch": 0.08798143851508121, "grad_norm": 43.48209762573242, "learning_rate": 9.960849982914332e-06, "loss": 25.9971, "step": 948 }, { "epoch": 0.08807424593967518, "grad_norm": 40.45602035522461, "learning_rate": 9.960658088257297e-06, "loss": 25.1212, "step": 949 }, { "epoch": 0.08816705336426914, "grad_norm": 41.0916748046875, "learning_rate": 9.960465726318878e-06, "loss": 26.1973, "step": 950 }, { "epoch": 0.08825986078886311, "grad_norm": 39.77226257324219, "learning_rate": 9.96027289711719e-06, "loss": 25.9332, "step": 951 }, { "epoch": 0.08835266821345708, "grad_norm": 41.83346939086914, "learning_rate": 9.960079600670403e-06, "loss": 25.3732, "step": 952 }, { "epoch": 0.08844547563805104, "grad_norm": 39.851661682128906, "learning_rate": 9.959885836996718e-06, "loss": 26.1837, "step": 953 }, { "epoch": 0.08853828306264501, "grad_norm": 42.58710479736328, "learning_rate": 9.959691606114393e-06, "loss": 25.4339, "step": 954 }, { "epoch": 0.08863109048723898, "grad_norm": 40.545169830322266, "learning_rate": 9.959496908041721e-06, "loss": 26.3043, "step": 955 }, { "epoch": 0.08872389791183295, "grad_norm": 43.509376525878906, "learning_rate": 9.959301742797041e-06, "loss": 24.9305, "step": 956 }, { "epoch": 0.08881670533642691, "grad_norm": 49.999847412109375, "learning_rate": 9.95910611039874e-06, "loss": 27.2144, "step": 957 }, { "epoch": 0.08890951276102088, "grad_norm": 38.69890594482422, "learning_rate": 9.958910010865243e-06, "loss": 24.1081, "step": 958 }, { "epoch": 0.08900232018561485, "grad_norm": 39.52566146850586, "learning_rate": 9.958713444215025e-06, "loss": 26.4076, "step": 959 }, { "epoch": 0.08909512761020881, "grad_norm": 42.128326416015625, "learning_rate": 9.958516410466601e-06, "loss": 26.0135, "step": 960 }, { "epoch": 0.08918793503480278, "grad_norm": 39.81075668334961, "learning_rate": 9.958318909638531e-06, "loss": 24.7639, "step": 961 }, { "epoch": 0.08928074245939675, "grad_norm": 47.67357635498047, "learning_rate": 9.958120941749418e-06, "loss": 26.6889, "step": 962 }, { "epoch": 0.08937354988399072, "grad_norm": 41.94749450683594, "learning_rate": 9.957922506817914e-06, "loss": 26.4546, "step": 963 }, { "epoch": 0.08946635730858468, "grad_norm": 41.99639129638672, "learning_rate": 9.957723604862705e-06, "loss": 27.6694, "step": 964 }, { "epoch": 0.08955916473317865, "grad_norm": 41.85661697387695, "learning_rate": 9.957524235902531e-06, "loss": 27.804, "step": 965 }, { "epoch": 0.08965197215777262, "grad_norm": 41.62854766845703, "learning_rate": 9.957324399956172e-06, "loss": 27.0086, "step": 966 }, { "epoch": 0.0897447795823666, "grad_norm": 38.458526611328125, "learning_rate": 9.95712409704245e-06, "loss": 26.5944, "step": 967 }, { "epoch": 0.08983758700696055, "grad_norm": 41.3785514831543, "learning_rate": 9.956923327180234e-06, "loss": 25.9581, "step": 968 }, { "epoch": 0.08993039443155453, "grad_norm": 42.5922966003418, "learning_rate": 9.956722090388438e-06, "loss": 26.7212, "step": 969 }, { "epoch": 0.0900232018561485, "grad_norm": 47.22397232055664, "learning_rate": 9.956520386686017e-06, "loss": 26.0688, "step": 970 }, { "epoch": 0.09011600928074245, "grad_norm": 43.173160552978516, "learning_rate": 9.95631821609197e-06, "loss": 26.4683, "step": 971 }, { "epoch": 0.09020881670533643, "grad_norm": 39.734493255615234, "learning_rate": 9.956115578625339e-06, "loss": 23.8326, "step": 972 }, { "epoch": 0.0903016241299304, "grad_norm": 50.9776496887207, "learning_rate": 9.955912474305216e-06, "loss": 25.7409, "step": 973 }, { "epoch": 0.09039443155452437, "grad_norm": 47.786643981933594, "learning_rate": 9.95570890315073e-06, "loss": 26.313, "step": 974 }, { "epoch": 0.09048723897911833, "grad_norm": 47.10563278198242, "learning_rate": 9.955504865181058e-06, "loss": 24.7685, "step": 975 }, { "epoch": 0.0905800464037123, "grad_norm": 39.145606994628906, "learning_rate": 9.95530036041542e-06, "loss": 25.8036, "step": 976 }, { "epoch": 0.09067285382830627, "grad_norm": 42.5091438293457, "learning_rate": 9.955095388873082e-06, "loss": 24.1911, "step": 977 }, { "epoch": 0.09076566125290023, "grad_norm": 42.84299850463867, "learning_rate": 9.954889950573347e-06, "loss": 26.0118, "step": 978 }, { "epoch": 0.0908584686774942, "grad_norm": 46.69637680053711, "learning_rate": 9.954684045535569e-06, "loss": 27.5947, "step": 979 }, { "epoch": 0.09095127610208817, "grad_norm": 39.366851806640625, "learning_rate": 9.954477673779146e-06, "loss": 26.1549, "step": 980 }, { "epoch": 0.09104408352668214, "grad_norm": 40.62654113769531, "learning_rate": 9.954270835323515e-06, "loss": 25.4322, "step": 981 }, { "epoch": 0.0911368909512761, "grad_norm": 40.751338958740234, "learning_rate": 9.95406353018816e-06, "loss": 25.6122, "step": 982 }, { "epoch": 0.09122969837587007, "grad_norm": 40.39167404174805, "learning_rate": 9.953855758392608e-06, "loss": 26.2977, "step": 983 }, { "epoch": 0.09132250580046404, "grad_norm": 40.22406005859375, "learning_rate": 9.953647519956432e-06, "loss": 28.5809, "step": 984 }, { "epoch": 0.091415313225058, "grad_norm": 36.99325942993164, "learning_rate": 9.953438814899247e-06, "loss": 24.9249, "step": 985 }, { "epoch": 0.09150812064965197, "grad_norm": 43.702430725097656, "learning_rate": 9.953229643240714e-06, "loss": 25.55, "step": 986 }, { "epoch": 0.09160092807424594, "grad_norm": 39.269622802734375, "learning_rate": 9.953020005000533e-06, "loss": 25.1364, "step": 987 }, { "epoch": 0.09169373549883991, "grad_norm": 38.2880859375, "learning_rate": 9.952809900198454e-06, "loss": 24.2752, "step": 988 }, { "epoch": 0.09178654292343387, "grad_norm": 39.83544158935547, "learning_rate": 9.95259932885427e-06, "loss": 25.6558, "step": 989 }, { "epoch": 0.09187935034802784, "grad_norm": 39.11392593383789, "learning_rate": 9.952388290987812e-06, "loss": 24.5374, "step": 990 }, { "epoch": 0.09197215777262181, "grad_norm": 44.286529541015625, "learning_rate": 9.95217678661896e-06, "loss": 26.9586, "step": 991 }, { "epoch": 0.09206496519721578, "grad_norm": 46.30522918701172, "learning_rate": 9.95196481576764e-06, "loss": 25.4524, "step": 992 }, { "epoch": 0.09215777262180974, "grad_norm": 40.702022552490234, "learning_rate": 9.951752378453817e-06, "loss": 25.5273, "step": 993 }, { "epoch": 0.09225058004640371, "grad_norm": 63.41793441772461, "learning_rate": 9.951539474697502e-06, "loss": 27.0787, "step": 994 }, { "epoch": 0.09234338747099768, "grad_norm": 44.06489944458008, "learning_rate": 9.951326104518752e-06, "loss": 25.6524, "step": 995 }, { "epoch": 0.09243619489559164, "grad_norm": 41.90898895263672, "learning_rate": 9.951112267937663e-06, "loss": 26.0191, "step": 996 }, { "epoch": 0.09252900232018561, "grad_norm": 39.93344497680664, "learning_rate": 9.950897964974379e-06, "loss": 24.614, "step": 997 }, { "epoch": 0.09262180974477958, "grad_norm": 38.37432098388672, "learning_rate": 9.95068319564909e-06, "loss": 25.2264, "step": 998 }, { "epoch": 0.09271461716937356, "grad_norm": 36.70235061645508, "learning_rate": 9.950467959982021e-06, "loss": 24.6142, "step": 999 }, { "epoch": 0.09280742459396751, "grad_norm": 43.78654479980469, "learning_rate": 9.950252257993451e-06, "loss": 26.5505, "step": 1000 }, { "epoch": 0.09290023201856148, "grad_norm": 50.120506286621094, "learning_rate": 9.950036089703696e-06, "loss": 25.9536, "step": 1001 }, { "epoch": 0.09299303944315546, "grad_norm": 46.59465408325195, "learning_rate": 9.949819455133121e-06, "loss": 24.9404, "step": 1002 }, { "epoch": 0.09308584686774941, "grad_norm": 54.201499938964844, "learning_rate": 9.949602354302131e-06, "loss": 26.178, "step": 1003 }, { "epoch": 0.09317865429234339, "grad_norm": 49.83958053588867, "learning_rate": 9.949384787231176e-06, "loss": 26.6153, "step": 1004 }, { "epoch": 0.09327146171693736, "grad_norm": 38.72168731689453, "learning_rate": 9.94916675394075e-06, "loss": 24.9228, "step": 1005 }, { "epoch": 0.09336426914153133, "grad_norm": 39.58180236816406, "learning_rate": 9.948948254451391e-06, "loss": 25.3018, "step": 1006 }, { "epoch": 0.09345707656612529, "grad_norm": 42.11069869995117, "learning_rate": 9.948729288783684e-06, "loss": 24.7882, "step": 1007 }, { "epoch": 0.09354988399071926, "grad_norm": 41.105010986328125, "learning_rate": 9.948509856958253e-06, "loss": 25.0453, "step": 1008 }, { "epoch": 0.09364269141531323, "grad_norm": 41.134185791015625, "learning_rate": 9.948289958995765e-06, "loss": 25.608, "step": 1009 }, { "epoch": 0.09373549883990719, "grad_norm": 39.954612731933594, "learning_rate": 9.948069594916939e-06, "loss": 24.1224, "step": 1010 }, { "epoch": 0.09382830626450116, "grad_norm": 40.566898345947266, "learning_rate": 9.94784876474253e-06, "loss": 24.8403, "step": 1011 }, { "epoch": 0.09392111368909513, "grad_norm": 37.689571380615234, "learning_rate": 9.94762746849334e-06, "loss": 25.259, "step": 1012 }, { "epoch": 0.0940139211136891, "grad_norm": 39.0108757019043, "learning_rate": 9.947405706190216e-06, "loss": 25.2291, "step": 1013 }, { "epoch": 0.09410672853828306, "grad_norm": 39.834171295166016, "learning_rate": 9.947183477854042e-06, "loss": 27.7053, "step": 1014 }, { "epoch": 0.09419953596287703, "grad_norm": 44.47129821777344, "learning_rate": 9.94696078350576e-06, "loss": 27.4433, "step": 1015 }, { "epoch": 0.094292343387471, "grad_norm": 44.068172454833984, "learning_rate": 9.946737623166341e-06, "loss": 25.7278, "step": 1016 }, { "epoch": 0.09438515081206497, "grad_norm": 41.68885803222656, "learning_rate": 9.946513996856808e-06, "loss": 25.4474, "step": 1017 }, { "epoch": 0.09447795823665893, "grad_norm": 38.871421813964844, "learning_rate": 9.946289904598226e-06, "loss": 26.7883, "step": 1018 }, { "epoch": 0.0945707656612529, "grad_norm": 42.22783660888672, "learning_rate": 9.946065346411702e-06, "loss": 27.2654, "step": 1019 }, { "epoch": 0.09466357308584687, "grad_norm": 39.40402603149414, "learning_rate": 9.945840322318391e-06, "loss": 25.3226, "step": 1020 }, { "epoch": 0.09475638051044083, "grad_norm": 40.2103385925293, "learning_rate": 9.945614832339489e-06, "loss": 24.6827, "step": 1021 }, { "epoch": 0.0948491879350348, "grad_norm": 40.616943359375, "learning_rate": 9.945388876496238e-06, "loss": 25.1325, "step": 1022 }, { "epoch": 0.09494199535962877, "grad_norm": 45.722999572753906, "learning_rate": 9.945162454809923e-06, "loss": 24.859, "step": 1023 }, { "epoch": 0.09503480278422274, "grad_norm": 41.398921966552734, "learning_rate": 9.944935567301868e-06, "loss": 26.4607, "step": 1024 }, { "epoch": 0.0951276102088167, "grad_norm": 39.60587692260742, "learning_rate": 9.944708213993448e-06, "loss": 24.2325, "step": 1025 }, { "epoch": 0.09522041763341067, "grad_norm": 44.694801330566406, "learning_rate": 9.944480394906079e-06, "loss": 24.5028, "step": 1026 }, { "epoch": 0.09531322505800464, "grad_norm": 39.786041259765625, "learning_rate": 9.944252110061221e-06, "loss": 25.4819, "step": 1027 }, { "epoch": 0.0954060324825986, "grad_norm": 40.95759963989258, "learning_rate": 9.944023359480378e-06, "loss": 26.0388, "step": 1028 }, { "epoch": 0.09549883990719257, "grad_norm": 42.24235153198242, "learning_rate": 9.943794143185097e-06, "loss": 26.0824, "step": 1029 }, { "epoch": 0.09559164733178654, "grad_norm": 41.98576736450195, "learning_rate": 9.943564461196972e-06, "loss": 26.0564, "step": 1030 }, { "epoch": 0.09568445475638052, "grad_norm": 41.50382614135742, "learning_rate": 9.943334313537634e-06, "loss": 25.7184, "step": 1031 }, { "epoch": 0.09577726218097447, "grad_norm": 39.45526123046875, "learning_rate": 9.943103700228768e-06, "loss": 26.7187, "step": 1032 }, { "epoch": 0.09587006960556844, "grad_norm": 40.02836227416992, "learning_rate": 9.942872621292091e-06, "loss": 25.4816, "step": 1033 }, { "epoch": 0.09596287703016242, "grad_norm": 41.714534759521484, "learning_rate": 9.942641076749375e-06, "loss": 23.9158, "step": 1034 }, { "epoch": 0.09605568445475637, "grad_norm": 41.146976470947266, "learning_rate": 9.942409066622429e-06, "loss": 25.6478, "step": 1035 }, { "epoch": 0.09614849187935035, "grad_norm": 39.90861892700195, "learning_rate": 9.942176590933105e-06, "loss": 25.645, "step": 1036 }, { "epoch": 0.09624129930394432, "grad_norm": 305.53466796875, "learning_rate": 9.941943649703307e-06, "loss": 25.9557, "step": 1037 }, { "epoch": 0.09633410672853829, "grad_norm": 41.71875, "learning_rate": 9.941710242954976e-06, "loss": 26.7521, "step": 1038 }, { "epoch": 0.09642691415313225, "grad_norm": 42.345909118652344, "learning_rate": 9.941476370710094e-06, "loss": 24.7248, "step": 1039 }, { "epoch": 0.09651972157772622, "grad_norm": 38.39518737792969, "learning_rate": 9.941242032990697e-06, "loss": 25.5346, "step": 1040 }, { "epoch": 0.09661252900232019, "grad_norm": 41.762046813964844, "learning_rate": 9.941007229818856e-06, "loss": 26.7702, "step": 1041 }, { "epoch": 0.09670533642691416, "grad_norm": 39.487308502197266, "learning_rate": 9.940771961216689e-06, "loss": 25.4232, "step": 1042 }, { "epoch": 0.09679814385150812, "grad_norm": 40.426795959472656, "learning_rate": 9.940536227206357e-06, "loss": 24.8774, "step": 1043 }, { "epoch": 0.09689095127610209, "grad_norm": 40.8197135925293, "learning_rate": 9.940300027810067e-06, "loss": 24.9709, "step": 1044 }, { "epoch": 0.09698375870069606, "grad_norm": 46.21306228637695, "learning_rate": 9.940063363050068e-06, "loss": 25.6525, "step": 1045 }, { "epoch": 0.09707656612529002, "grad_norm": 41.29604721069336, "learning_rate": 9.939826232948654e-06, "loss": 27.5826, "step": 1046 }, { "epoch": 0.09716937354988399, "grad_norm": 41.203826904296875, "learning_rate": 9.93958863752816e-06, "loss": 26.0823, "step": 1047 }, { "epoch": 0.09726218097447796, "grad_norm": 42.600955963134766, "learning_rate": 9.939350576810968e-06, "loss": 27.2544, "step": 1048 }, { "epoch": 0.09735498839907193, "grad_norm": 40.301612854003906, "learning_rate": 9.939112050819504e-06, "loss": 24.0558, "step": 1049 }, { "epoch": 0.09744779582366589, "grad_norm": 44.46141815185547, "learning_rate": 9.938873059576235e-06, "loss": 25.799, "step": 1050 }, { "epoch": 0.09754060324825986, "grad_norm": 43.18428039550781, "learning_rate": 9.938633603103673e-06, "loss": 26.2386, "step": 1051 }, { "epoch": 0.09763341067285383, "grad_norm": 40.35280990600586, "learning_rate": 9.938393681424376e-06, "loss": 25.7297, "step": 1052 }, { "epoch": 0.09772621809744779, "grad_norm": 39.621055603027344, "learning_rate": 9.938153294560941e-06, "loss": 25.7898, "step": 1053 }, { "epoch": 0.09781902552204176, "grad_norm": 45.97412109375, "learning_rate": 9.937912442536015e-06, "loss": 25.7274, "step": 1054 }, { "epoch": 0.09791183294663573, "grad_norm": 41.925880432128906, "learning_rate": 9.937671125372283e-06, "loss": 25.5724, "step": 1055 }, { "epoch": 0.0980046403712297, "grad_norm": 43.12976837158203, "learning_rate": 9.93742934309248e-06, "loss": 28.1616, "step": 1056 }, { "epoch": 0.09809744779582366, "grad_norm": 39.80802536010742, "learning_rate": 9.937187095719378e-06, "loss": 25.8602, "step": 1057 }, { "epoch": 0.09819025522041763, "grad_norm": 37.96247482299805, "learning_rate": 9.936944383275797e-06, "loss": 24.9547, "step": 1058 }, { "epoch": 0.0982830626450116, "grad_norm": 39.9728889465332, "learning_rate": 9.9367012057846e-06, "loss": 24.1758, "step": 1059 }, { "epoch": 0.09837587006960556, "grad_norm": 43.13490295410156, "learning_rate": 9.936457563268695e-06, "loss": 24.6178, "step": 1060 }, { "epoch": 0.09846867749419953, "grad_norm": 41.16150665283203, "learning_rate": 9.93621345575103e-06, "loss": 26.1639, "step": 1061 }, { "epoch": 0.0985614849187935, "grad_norm": 38.99013900756836, "learning_rate": 9.9359688832546e-06, "loss": 24.9154, "step": 1062 }, { "epoch": 0.09865429234338748, "grad_norm": 48.42115783691406, "learning_rate": 9.935723845802446e-06, "loss": 25.3925, "step": 1063 }, { "epoch": 0.09874709976798143, "grad_norm": 42.5615348815918, "learning_rate": 9.935478343417646e-06, "loss": 25.9331, "step": 1064 }, { "epoch": 0.0988399071925754, "grad_norm": 39.85590362548828, "learning_rate": 9.935232376123327e-06, "loss": 24.2739, "step": 1065 }, { "epoch": 0.09893271461716938, "grad_norm": 40.506282806396484, "learning_rate": 9.93498594394266e-06, "loss": 23.4486, "step": 1066 }, { "epoch": 0.09902552204176335, "grad_norm": 215.68502807617188, "learning_rate": 9.934739046898856e-06, "loss": 25.5623, "step": 1067 }, { "epoch": 0.0991183294663573, "grad_norm": 40.43233108520508, "learning_rate": 9.934491685015173e-06, "loss": 25.6608, "step": 1068 }, { "epoch": 0.09921113689095128, "grad_norm": 45.06208038330078, "learning_rate": 9.934243858314914e-06, "loss": 26.6445, "step": 1069 }, { "epoch": 0.09930394431554525, "grad_norm": 42.255558013916016, "learning_rate": 9.933995566821419e-06, "loss": 27.0846, "step": 1070 }, { "epoch": 0.0993967517401392, "grad_norm": 39.576171875, "learning_rate": 9.93374681055808e-06, "loss": 25.3442, "step": 1071 }, { "epoch": 0.09948955916473318, "grad_norm": 41.71992111206055, "learning_rate": 9.93349758954833e-06, "loss": 25.9456, "step": 1072 }, { "epoch": 0.09958236658932715, "grad_norm": 39.20787811279297, "learning_rate": 9.933247903815643e-06, "loss": 25.8704, "step": 1073 }, { "epoch": 0.09967517401392112, "grad_norm": 41.48542022705078, "learning_rate": 9.932997753383538e-06, "loss": 26.2767, "step": 1074 }, { "epoch": 0.09976798143851508, "grad_norm": 38.229087829589844, "learning_rate": 9.93274713827558e-06, "loss": 24.7115, "step": 1075 }, { "epoch": 0.09986078886310905, "grad_norm": 38.3483772277832, "learning_rate": 9.932496058515375e-06, "loss": 24.6216, "step": 1076 }, { "epoch": 0.09995359628770302, "grad_norm": 38.76655578613281, "learning_rate": 9.932244514126576e-06, "loss": 24.7281, "step": 1077 }, { "epoch": 0.10004640371229698, "grad_norm": 55.35257339477539, "learning_rate": 9.931992505132876e-06, "loss": 25.5353, "step": 1078 }, { "epoch": 0.10013921113689095, "grad_norm": 43.632530212402344, "learning_rate": 9.931740031558014e-06, "loss": 26.2804, "step": 1079 }, { "epoch": 0.10023201856148492, "grad_norm": 39.64073181152344, "learning_rate": 9.931487093425775e-06, "loss": 27.0791, "step": 1080 }, { "epoch": 0.10032482598607889, "grad_norm": 42.847164154052734, "learning_rate": 9.93123369075998e-06, "loss": 25.2496, "step": 1081 }, { "epoch": 0.10041763341067285, "grad_norm": 40.63569259643555, "learning_rate": 9.930979823584505e-06, "loss": 25.273, "step": 1082 }, { "epoch": 0.10051044083526682, "grad_norm": 38.84287643432617, "learning_rate": 9.930725491923259e-06, "loss": 25.214, "step": 1083 }, { "epoch": 0.10060324825986079, "grad_norm": 42.247493743896484, "learning_rate": 9.9304706958002e-06, "loss": 25.7985, "step": 1084 }, { "epoch": 0.10069605568445476, "grad_norm": 43.04545974731445, "learning_rate": 9.93021543523933e-06, "loss": 25.7013, "step": 1085 }, { "epoch": 0.10078886310904872, "grad_norm": 40.843666076660156, "learning_rate": 9.929959710264695e-06, "loss": 27.3742, "step": 1086 }, { "epoch": 0.10088167053364269, "grad_norm": 42.59556579589844, "learning_rate": 9.929703520900383e-06, "loss": 26.7874, "step": 1087 }, { "epoch": 0.10097447795823666, "grad_norm": 39.004051208496094, "learning_rate": 9.929446867170525e-06, "loss": 25.6997, "step": 1088 }, { "epoch": 0.10106728538283062, "grad_norm": 42.087223052978516, "learning_rate": 9.929189749099297e-06, "loss": 25.4188, "step": 1089 }, { "epoch": 0.10116009280742459, "grad_norm": 40.328834533691406, "learning_rate": 9.92893216671092e-06, "loss": 27.5677, "step": 1090 }, { "epoch": 0.10125290023201856, "grad_norm": 41.586273193359375, "learning_rate": 9.92867412002966e-06, "loss": 25.6605, "step": 1091 }, { "epoch": 0.10134570765661254, "grad_norm": 39.99289321899414, "learning_rate": 9.928415609079821e-06, "loss": 25.0585, "step": 1092 }, { "epoch": 0.10143851508120649, "grad_norm": 39.20059585571289, "learning_rate": 9.928156633885753e-06, "loss": 26.2289, "step": 1093 }, { "epoch": 0.10153132250580046, "grad_norm": 42.91252136230469, "learning_rate": 9.927897194471853e-06, "loss": 26.6711, "step": 1094 }, { "epoch": 0.10162412993039444, "grad_norm": 39.23381042480469, "learning_rate": 9.927637290862562e-06, "loss": 26.157, "step": 1095 }, { "epoch": 0.1017169373549884, "grad_norm": 42.0224609375, "learning_rate": 9.927376923082357e-06, "loss": 24.8715, "step": 1096 }, { "epoch": 0.10180974477958236, "grad_norm": 43.6422119140625, "learning_rate": 9.927116091155769e-06, "loss": 24.0271, "step": 1097 }, { "epoch": 0.10190255220417634, "grad_norm": 41.59701919555664, "learning_rate": 9.926854795107363e-06, "loss": 24.8064, "step": 1098 }, { "epoch": 0.10199535962877031, "grad_norm": 44.5890007019043, "learning_rate": 9.926593034961756e-06, "loss": 25.317, "step": 1099 }, { "epoch": 0.10208816705336426, "grad_norm": 39.54047775268555, "learning_rate": 9.926330810743603e-06, "loss": 25.7658, "step": 1100 }, { "epoch": 0.10218097447795824, "grad_norm": 41.62872314453125, "learning_rate": 9.926068122477605e-06, "loss": 25.3228, "step": 1101 }, { "epoch": 0.10227378190255221, "grad_norm": 41.37626266479492, "learning_rate": 9.925804970188509e-06, "loss": 26.3582, "step": 1102 }, { "epoch": 0.10236658932714617, "grad_norm": 37.61030197143555, "learning_rate": 9.9255413539011e-06, "loss": 24.7784, "step": 1103 }, { "epoch": 0.10245939675174014, "grad_norm": 40.31169891357422, "learning_rate": 9.925277273640211e-06, "loss": 25.0419, "step": 1104 }, { "epoch": 0.10255220417633411, "grad_norm": 42.067047119140625, "learning_rate": 9.925012729430719e-06, "loss": 27.075, "step": 1105 }, { "epoch": 0.10264501160092808, "grad_norm": 39.80172348022461, "learning_rate": 9.924747721297542e-06, "loss": 24.157, "step": 1106 }, { "epoch": 0.10273781902552204, "grad_norm": 39.311222076416016, "learning_rate": 9.924482249265646e-06, "loss": 25.8874, "step": 1107 }, { "epoch": 0.10283062645011601, "grad_norm": 46.38166427612305, "learning_rate": 9.924216313360033e-06, "loss": 26.6615, "step": 1108 }, { "epoch": 0.10292343387470998, "grad_norm": 40.103485107421875, "learning_rate": 9.923949913605757e-06, "loss": 26.0955, "step": 1109 }, { "epoch": 0.10301624129930395, "grad_norm": 57.22760772705078, "learning_rate": 9.92368305002791e-06, "loss": 25.5031, "step": 1110 }, { "epoch": 0.10310904872389791, "grad_norm": 40.25881576538086, "learning_rate": 9.923415722651632e-06, "loss": 24.0961, "step": 1111 }, { "epoch": 0.10320185614849188, "grad_norm": 45.40335464477539, "learning_rate": 9.923147931502104e-06, "loss": 26.9776, "step": 1112 }, { "epoch": 0.10329466357308585, "grad_norm": 42.35226821899414, "learning_rate": 9.922879676604548e-06, "loss": 28.2329, "step": 1113 }, { "epoch": 0.10338747099767981, "grad_norm": 45.42384338378906, "learning_rate": 9.92261095798424e-06, "loss": 25.2459, "step": 1114 }, { "epoch": 0.10348027842227378, "grad_norm": 39.740447998046875, "learning_rate": 9.922341775666485e-06, "loss": 25.4419, "step": 1115 }, { "epoch": 0.10357308584686775, "grad_norm": 40.531028747558594, "learning_rate": 9.922072129676644e-06, "loss": 25.5266, "step": 1116 }, { "epoch": 0.10366589327146172, "grad_norm": 39.148033142089844, "learning_rate": 9.921802020040116e-06, "loss": 25.3961, "step": 1117 }, { "epoch": 0.10375870069605568, "grad_norm": 43.2768440246582, "learning_rate": 9.921531446782344e-06, "loss": 24.1984, "step": 1118 }, { "epoch": 0.10385150812064965, "grad_norm": 43.02067184448242, "learning_rate": 9.921260409928815e-06, "loss": 26.4904, "step": 1119 }, { "epoch": 0.10394431554524362, "grad_norm": 42.79636764526367, "learning_rate": 9.92098890950506e-06, "loss": 26.5669, "step": 1120 }, { "epoch": 0.10403712296983758, "grad_norm": 38.10392761230469, "learning_rate": 9.920716945536655e-06, "loss": 27.1028, "step": 1121 }, { "epoch": 0.10412993039443155, "grad_norm": 42.53533935546875, "learning_rate": 9.920444518049218e-06, "loss": 26.6929, "step": 1122 }, { "epoch": 0.10422273781902552, "grad_norm": 40.42083740234375, "learning_rate": 9.920171627068409e-06, "loss": 23.7648, "step": 1123 }, { "epoch": 0.1043155452436195, "grad_norm": 45.31937789916992, "learning_rate": 9.919898272619935e-06, "loss": 24.3796, "step": 1124 }, { "epoch": 0.10440835266821345, "grad_norm": 41.057743072509766, "learning_rate": 9.919624454729546e-06, "loss": 25.5932, "step": 1125 }, { "epoch": 0.10450116009280742, "grad_norm": 40.816864013671875, "learning_rate": 9.919350173423035e-06, "loss": 25.7913, "step": 1126 }, { "epoch": 0.1045939675174014, "grad_norm": 41.50802993774414, "learning_rate": 9.919075428726237e-06, "loss": 24.5228, "step": 1127 }, { "epoch": 0.10468677494199535, "grad_norm": 42.752620697021484, "learning_rate": 9.918800220665035e-06, "loss": 25.6586, "step": 1128 }, { "epoch": 0.10477958236658932, "grad_norm": 41.74000930786133, "learning_rate": 9.91852454926535e-06, "loss": 25.3106, "step": 1129 }, { "epoch": 0.1048723897911833, "grad_norm": 45.19004440307617, "learning_rate": 9.91824841455315e-06, "loss": 25.8646, "step": 1130 }, { "epoch": 0.10496519721577727, "grad_norm": 63.18574905395508, "learning_rate": 9.917971816554447e-06, "loss": 27.449, "step": 1131 }, { "epoch": 0.10505800464037122, "grad_norm": 46.65525436401367, "learning_rate": 9.917694755295295e-06, "loss": 25.8956, "step": 1132 }, { "epoch": 0.1051508120649652, "grad_norm": 40.752071380615234, "learning_rate": 9.917417230801795e-06, "loss": 23.7871, "step": 1133 }, { "epoch": 0.10524361948955917, "grad_norm": 42.42795944213867, "learning_rate": 9.917139243100088e-06, "loss": 25.5621, "step": 1134 }, { "epoch": 0.10533642691415314, "grad_norm": 40.76258087158203, "learning_rate": 9.916860792216355e-06, "loss": 24.4731, "step": 1135 }, { "epoch": 0.1054292343387471, "grad_norm": 38.35707092285156, "learning_rate": 9.916581878176831e-06, "loss": 25.785, "step": 1136 }, { "epoch": 0.10552204176334107, "grad_norm": 44.07560729980469, "learning_rate": 9.916302501007788e-06, "loss": 26.8873, "step": 1137 }, { "epoch": 0.10561484918793504, "grad_norm": 42.807884216308594, "learning_rate": 9.916022660735543e-06, "loss": 25.3574, "step": 1138 }, { "epoch": 0.105707656612529, "grad_norm": 50.2960319519043, "learning_rate": 9.915742357386454e-06, "loss": 25.0018, "step": 1139 }, { "epoch": 0.10580046403712297, "grad_norm": 38.366607666015625, "learning_rate": 9.915461590986926e-06, "loss": 25.3952, "step": 1140 }, { "epoch": 0.10589327146171694, "grad_norm": 41.33824157714844, "learning_rate": 9.915180361563406e-06, "loss": 25.86, "step": 1141 }, { "epoch": 0.10598607888631091, "grad_norm": 39.98927688598633, "learning_rate": 9.914898669142386e-06, "loss": 27.1972, "step": 1142 }, { "epoch": 0.10607888631090487, "grad_norm": 38.878692626953125, "learning_rate": 9.914616513750401e-06, "loss": 27.8369, "step": 1143 }, { "epoch": 0.10617169373549884, "grad_norm": 42.1334114074707, "learning_rate": 9.914333895414027e-06, "loss": 26.441, "step": 1144 }, { "epoch": 0.10626450116009281, "grad_norm": 37.68767166137695, "learning_rate": 9.91405081415989e-06, "loss": 24.6574, "step": 1145 }, { "epoch": 0.10635730858468677, "grad_norm": 42.37699890136719, "learning_rate": 9.913767270014652e-06, "loss": 24.4288, "step": 1146 }, { "epoch": 0.10645011600928074, "grad_norm": 39.265933990478516, "learning_rate": 9.913483263005024e-06, "loss": 24.8921, "step": 1147 }, { "epoch": 0.10654292343387471, "grad_norm": 38.99738311767578, "learning_rate": 9.913198793157757e-06, "loss": 25.1048, "step": 1148 }, { "epoch": 0.10663573085846868, "grad_norm": 42.96142578125, "learning_rate": 9.912913860499648e-06, "loss": 25.4949, "step": 1149 }, { "epoch": 0.10672853828306264, "grad_norm": 44.26565933227539, "learning_rate": 9.912628465057538e-06, "loss": 27.4675, "step": 1150 }, { "epoch": 0.10682134570765661, "grad_norm": 38.83085250854492, "learning_rate": 9.91234260685831e-06, "loss": 24.0761, "step": 1151 }, { "epoch": 0.10691415313225058, "grad_norm": 40.6199836730957, "learning_rate": 9.912056285928891e-06, "loss": 26.7056, "step": 1152 }, { "epoch": 0.10700696055684454, "grad_norm": 40.43107986450195, "learning_rate": 9.911769502296251e-06, "loss": 26.121, "step": 1153 }, { "epoch": 0.10709976798143851, "grad_norm": 44.29130554199219, "learning_rate": 9.911482255987403e-06, "loss": 24.8404, "step": 1154 }, { "epoch": 0.10719257540603248, "grad_norm": 42.988712310791016, "learning_rate": 9.91119454702941e-06, "loss": 24.6849, "step": 1155 }, { "epoch": 0.10728538283062646, "grad_norm": 37.14909362792969, "learning_rate": 9.91090637544937e-06, "loss": 24.5577, "step": 1156 }, { "epoch": 0.10737819025522041, "grad_norm": 41.751678466796875, "learning_rate": 9.910617741274427e-06, "loss": 26.6026, "step": 1157 }, { "epoch": 0.10747099767981438, "grad_norm": 44.24882125854492, "learning_rate": 9.91032864453177e-06, "loss": 27.217, "step": 1158 }, { "epoch": 0.10756380510440836, "grad_norm": 39.60539245605469, "learning_rate": 9.910039085248633e-06, "loss": 25.4899, "step": 1159 }, { "epoch": 0.10765661252900233, "grad_norm": 40.75236892700195, "learning_rate": 9.909749063452292e-06, "loss": 26.3408, "step": 1160 }, { "epoch": 0.10774941995359628, "grad_norm": 38.20402908325195, "learning_rate": 9.909458579170064e-06, "loss": 25.6691, "step": 1161 }, { "epoch": 0.10784222737819026, "grad_norm": 39.85769271850586, "learning_rate": 9.909167632429313e-06, "loss": 23.5082, "step": 1162 }, { "epoch": 0.10793503480278423, "grad_norm": 66.7009048461914, "learning_rate": 9.908876223257445e-06, "loss": 25.8041, "step": 1163 }, { "epoch": 0.10802784222737818, "grad_norm": 41.513885498046875, "learning_rate": 9.908584351681911e-06, "loss": 23.9734, "step": 1164 }, { "epoch": 0.10812064965197216, "grad_norm": 41.93903350830078, "learning_rate": 9.908292017730205e-06, "loss": 24.9776, "step": 1165 }, { "epoch": 0.10821345707656613, "grad_norm": 38.848384857177734, "learning_rate": 9.907999221429862e-06, "loss": 25.7347, "step": 1166 }, { "epoch": 0.1083062645011601, "grad_norm": 37.40638732910156, "learning_rate": 9.907705962808462e-06, "loss": 24.8434, "step": 1167 }, { "epoch": 0.10839907192575406, "grad_norm": 38.98628234863281, "learning_rate": 9.907412241893635e-06, "loss": 24.9846, "step": 1168 }, { "epoch": 0.10849187935034803, "grad_norm": 40.28971862792969, "learning_rate": 9.90711805871304e-06, "loss": 25.944, "step": 1169 }, { "epoch": 0.108584686774942, "grad_norm": 39.41820526123047, "learning_rate": 9.906823413294398e-06, "loss": 25.9812, "step": 1170 }, { "epoch": 0.10867749419953596, "grad_norm": 37.05510330200195, "learning_rate": 9.906528305665457e-06, "loss": 24.6106, "step": 1171 }, { "epoch": 0.10877030162412993, "grad_norm": 41.94070053100586, "learning_rate": 9.906232735854016e-06, "loss": 25.3536, "step": 1172 }, { "epoch": 0.1088631090487239, "grad_norm": 41.367591857910156, "learning_rate": 9.90593670388792e-06, "loss": 24.2999, "step": 1173 }, { "epoch": 0.10895591647331787, "grad_norm": 133.24427795410156, "learning_rate": 9.905640209795052e-06, "loss": 26.1197, "step": 1174 }, { "epoch": 0.10904872389791183, "grad_norm": 47.144168853759766, "learning_rate": 9.90534325360334e-06, "loss": 25.5748, "step": 1175 }, { "epoch": 0.1091415313225058, "grad_norm": 38.15866470336914, "learning_rate": 9.90504583534076e-06, "loss": 24.767, "step": 1176 }, { "epoch": 0.10923433874709977, "grad_norm": 42.30336380004883, "learning_rate": 9.904747955035325e-06, "loss": 25.3243, "step": 1177 }, { "epoch": 0.10932714617169373, "grad_norm": 43.07011413574219, "learning_rate": 9.904449612715096e-06, "loss": 24.8852, "step": 1178 }, { "epoch": 0.1094199535962877, "grad_norm": 42.988243103027344, "learning_rate": 9.904150808408178e-06, "loss": 25.6158, "step": 1179 }, { "epoch": 0.10951276102088167, "grad_norm": 42.332916259765625, "learning_rate": 9.903851542142716e-06, "loss": 25.5262, "step": 1180 }, { "epoch": 0.10960556844547564, "grad_norm": 36.60274887084961, "learning_rate": 9.903551813946897e-06, "loss": 23.6275, "step": 1181 }, { "epoch": 0.1096983758700696, "grad_norm": 37.0818977355957, "learning_rate": 9.903251623848957e-06, "loss": 25.8425, "step": 1182 }, { "epoch": 0.10979118329466357, "grad_norm": 43.146629333496094, "learning_rate": 9.902950971877174e-06, "loss": 26.3676, "step": 1183 }, { "epoch": 0.10988399071925754, "grad_norm": 41.73398208618164, "learning_rate": 9.90264985805987e-06, "loss": 24.8643, "step": 1184 }, { "epoch": 0.10997679814385151, "grad_norm": 43.07706069946289, "learning_rate": 9.902348282425404e-06, "loss": 26.3215, "step": 1185 }, { "epoch": 0.11006960556844547, "grad_norm": 38.843013763427734, "learning_rate": 9.90204624500219e-06, "loss": 25.7592, "step": 1186 }, { "epoch": 0.11016241299303944, "grad_norm": 39.45341110229492, "learning_rate": 9.901743745818674e-06, "loss": 25.3933, "step": 1187 }, { "epoch": 0.11025522041763341, "grad_norm": 42.29469680786133, "learning_rate": 9.901440784903354e-06, "loss": 25.8056, "step": 1188 }, { "epoch": 0.11034802784222737, "grad_norm": 45.960304260253906, "learning_rate": 9.901137362284765e-06, "loss": 29.5519, "step": 1189 }, { "epoch": 0.11044083526682134, "grad_norm": 40.227577209472656, "learning_rate": 9.900833477991492e-06, "loss": 25.0727, "step": 1190 }, { "epoch": 0.11053364269141532, "grad_norm": 44.02662658691406, "learning_rate": 9.900529132052159e-06, "loss": 25.9987, "step": 1191 }, { "epoch": 0.11062645011600929, "grad_norm": 45.26885986328125, "learning_rate": 9.900224324495433e-06, "loss": 26.5576, "step": 1192 }, { "epoch": 0.11071925754060324, "grad_norm": 41.40431594848633, "learning_rate": 9.899919055350027e-06, "loss": 24.124, "step": 1193 }, { "epoch": 0.11081206496519722, "grad_norm": 38.12504959106445, "learning_rate": 9.8996133246447e-06, "loss": 25.9118, "step": 1194 }, { "epoch": 0.11090487238979119, "grad_norm": 40.55837631225586, "learning_rate": 9.899307132408244e-06, "loss": 25.8676, "step": 1195 }, { "epoch": 0.11099767981438514, "grad_norm": 39.844154357910156, "learning_rate": 9.899000478669507e-06, "loss": 24.4217, "step": 1196 }, { "epoch": 0.11109048723897912, "grad_norm": 42.87827682495117, "learning_rate": 9.898693363457375e-06, "loss": 25.5937, "step": 1197 }, { "epoch": 0.11118329466357309, "grad_norm": 40.202999114990234, "learning_rate": 9.898385786800774e-06, "loss": 23.6366, "step": 1198 }, { "epoch": 0.11127610208816706, "grad_norm": 38.14936447143555, "learning_rate": 9.898077748728678e-06, "loss": 24.2466, "step": 1199 }, { "epoch": 0.11136890951276102, "grad_norm": 41.7305793762207, "learning_rate": 9.897769249270106e-06, "loss": 24.6394, "step": 1200 }, { "epoch": 0.11146171693735499, "grad_norm": 40.150718688964844, "learning_rate": 9.897460288454118e-06, "loss": 25.6621, "step": 1201 }, { "epoch": 0.11155452436194896, "grad_norm": 41.04405975341797, "learning_rate": 9.897150866309812e-06, "loss": 25.0409, "step": 1202 }, { "epoch": 0.11164733178654292, "grad_norm": 44.283180236816406, "learning_rate": 9.89684098286634e-06, "loss": 25.7256, "step": 1203 }, { "epoch": 0.11174013921113689, "grad_norm": 36.52486038208008, "learning_rate": 9.89653063815289e-06, "loss": 24.4847, "step": 1204 }, { "epoch": 0.11183294663573086, "grad_norm": 38.1030387878418, "learning_rate": 9.896219832198697e-06, "loss": 24.0359, "step": 1205 }, { "epoch": 0.11192575406032483, "grad_norm": 41.72102737426758, "learning_rate": 9.895908565033036e-06, "loss": 26.8852, "step": 1206 }, { "epoch": 0.11201856148491879, "grad_norm": 37.23719787597656, "learning_rate": 9.89559683668523e-06, "loss": 26.5164, "step": 1207 }, { "epoch": 0.11211136890951276, "grad_norm": 45.05799865722656, "learning_rate": 9.89528464718464e-06, "loss": 26.7088, "step": 1208 }, { "epoch": 0.11220417633410673, "grad_norm": 41.887664794921875, "learning_rate": 9.894971996560678e-06, "loss": 24.9233, "step": 1209 }, { "epoch": 0.1122969837587007, "grad_norm": 39.04629898071289, "learning_rate": 9.89465888484279e-06, "loss": 24.3002, "step": 1210 }, { "epoch": 0.11238979118329466, "grad_norm": 42.02716827392578, "learning_rate": 9.894345312060474e-06, "loss": 25.8448, "step": 1211 }, { "epoch": 0.11248259860788863, "grad_norm": 41.45887756347656, "learning_rate": 9.894031278243266e-06, "loss": 25.231, "step": 1212 }, { "epoch": 0.1125754060324826, "grad_norm": 39.13980484008789, "learning_rate": 9.893716783420748e-06, "loss": 24.6, "step": 1213 }, { "epoch": 0.11266821345707656, "grad_norm": 42.46750259399414, "learning_rate": 9.893401827622544e-06, "loss": 25.6356, "step": 1214 }, { "epoch": 0.11276102088167053, "grad_norm": 45.42053985595703, "learning_rate": 9.89308641087832e-06, "loss": 24.794, "step": 1215 }, { "epoch": 0.1128538283062645, "grad_norm": 44.29722595214844, "learning_rate": 9.892770533217792e-06, "loss": 25.722, "step": 1216 }, { "epoch": 0.11294663573085847, "grad_norm": 61.6937370300293, "learning_rate": 9.892454194670714e-06, "loss": 26.6317, "step": 1217 }, { "epoch": 0.11303944315545243, "grad_norm": 39.137874603271484, "learning_rate": 9.89213739526688e-06, "loss": 24.6509, "step": 1218 }, { "epoch": 0.1131322505800464, "grad_norm": 39.52689743041992, "learning_rate": 9.891820135036136e-06, "loss": 26.2261, "step": 1219 }, { "epoch": 0.11322505800464037, "grad_norm": 40.194190979003906, "learning_rate": 9.891502414008365e-06, "loss": 25.6606, "step": 1220 }, { "epoch": 0.11331786542923433, "grad_norm": 40.35957336425781, "learning_rate": 9.891184232213495e-06, "loss": 26.7286, "step": 1221 }, { "epoch": 0.1134106728538283, "grad_norm": 40.84104919433594, "learning_rate": 9.8908655896815e-06, "loss": 24.7865, "step": 1222 }, { "epoch": 0.11350348027842228, "grad_norm": 40.247005462646484, "learning_rate": 9.890546486442394e-06, "loss": 25.332, "step": 1223 }, { "epoch": 0.11359628770301625, "grad_norm": 72.20381164550781, "learning_rate": 9.890226922526238e-06, "loss": 24.8421, "step": 1224 }, { "epoch": 0.1136890951276102, "grad_norm": 40.51180648803711, "learning_rate": 9.889906897963129e-06, "loss": 25.4151, "step": 1225 }, { "epoch": 0.11378190255220418, "grad_norm": 38.58125305175781, "learning_rate": 9.889586412783219e-06, "loss": 24.5077, "step": 1226 }, { "epoch": 0.11387470997679815, "grad_norm": 46.44977569580078, "learning_rate": 9.88926546701669e-06, "loss": 23.716, "step": 1227 }, { "epoch": 0.11396751740139212, "grad_norm": 41.9786262512207, "learning_rate": 9.888944060693781e-06, "loss": 25.5866, "step": 1228 }, { "epoch": 0.11406032482598608, "grad_norm": 38.466209411621094, "learning_rate": 9.888622193844763e-06, "loss": 24.7525, "step": 1229 }, { "epoch": 0.11415313225058005, "grad_norm": 39.44414138793945, "learning_rate": 9.888299866499957e-06, "loss": 23.4215, "step": 1230 }, { "epoch": 0.11424593967517402, "grad_norm": 41.00126647949219, "learning_rate": 9.887977078689726e-06, "loss": 23.3721, "step": 1231 }, { "epoch": 0.11433874709976798, "grad_norm": 43.18040084838867, "learning_rate": 9.887653830444473e-06, "loss": 23.7803, "step": 1232 }, { "epoch": 0.11443155452436195, "grad_norm": 42.36586380004883, "learning_rate": 9.887330121794649e-06, "loss": 24.9318, "step": 1233 }, { "epoch": 0.11452436194895592, "grad_norm": 37.81804275512695, "learning_rate": 9.887005952770747e-06, "loss": 25.2791, "step": 1234 }, { "epoch": 0.11461716937354989, "grad_norm": 48.609291076660156, "learning_rate": 9.886681323403303e-06, "loss": 25.2457, "step": 1235 }, { "epoch": 0.11470997679814385, "grad_norm": 47.279850006103516, "learning_rate": 9.886356233722894e-06, "loss": 25.6486, "step": 1236 }, { "epoch": 0.11480278422273782, "grad_norm": 38.716861724853516, "learning_rate": 9.886030683760145e-06, "loss": 25.4833, "step": 1237 }, { "epoch": 0.11489559164733179, "grad_norm": 39.758907318115234, "learning_rate": 9.885704673545723e-06, "loss": 23.759, "step": 1238 }, { "epoch": 0.11498839907192575, "grad_norm": 41.25961685180664, "learning_rate": 9.885378203110332e-06, "loss": 24.6858, "step": 1239 }, { "epoch": 0.11508120649651972, "grad_norm": 42.68478012084961, "learning_rate": 9.88505127248473e-06, "loss": 26.2429, "step": 1240 }, { "epoch": 0.11517401392111369, "grad_norm": 41.28113555908203, "learning_rate": 9.884723881699712e-06, "loss": 24.7898, "step": 1241 }, { "epoch": 0.11526682134570766, "grad_norm": 36.448326110839844, "learning_rate": 9.884396030786116e-06, "loss": 23.7878, "step": 1242 }, { "epoch": 0.11535962877030162, "grad_norm": 38.83512878417969, "learning_rate": 9.884067719774823e-06, "loss": 24.1848, "step": 1243 }, { "epoch": 0.11545243619489559, "grad_norm": 39.743534088134766, "learning_rate": 9.883738948696764e-06, "loss": 24.5656, "step": 1244 }, { "epoch": 0.11554524361948956, "grad_norm": 38.868473052978516, "learning_rate": 9.883409717582907e-06, "loss": 24.9183, "step": 1245 }, { "epoch": 0.11563805104408352, "grad_norm": 38.28993225097656, "learning_rate": 9.883080026464263e-06, "loss": 24.0076, "step": 1246 }, { "epoch": 0.11573085846867749, "grad_norm": 44.83024597167969, "learning_rate": 9.882749875371886e-06, "loss": 25.496, "step": 1247 }, { "epoch": 0.11582366589327146, "grad_norm": 41.7735710144043, "learning_rate": 9.88241926433688e-06, "loss": 25.8035, "step": 1248 }, { "epoch": 0.11591647331786543, "grad_norm": 41.28630065917969, "learning_rate": 9.882088193390385e-06, "loss": 24.518, "step": 1249 }, { "epoch": 0.11600928074245939, "grad_norm": 40.211097717285156, "learning_rate": 9.881756662563588e-06, "loss": 26.5026, "step": 1250 }, { "epoch": 0.11610208816705336, "grad_norm": 72.99161529541016, "learning_rate": 9.881424671887719e-06, "loss": 25.1733, "step": 1251 }, { "epoch": 0.11619489559164733, "grad_norm": 42.69826889038086, "learning_rate": 9.88109222139405e-06, "loss": 27.1352, "step": 1252 }, { "epoch": 0.1162877030162413, "grad_norm": 39.41941452026367, "learning_rate": 9.880759311113895e-06, "loss": 26.9267, "step": 1253 }, { "epoch": 0.11638051044083526, "grad_norm": 43.66968536376953, "learning_rate": 9.880425941078617e-06, "loss": 26.1287, "step": 1254 }, { "epoch": 0.11647331786542924, "grad_norm": 40.07654571533203, "learning_rate": 9.880092111319616e-06, "loss": 24.08, "step": 1255 }, { "epoch": 0.1165661252900232, "grad_norm": 37.26533508300781, "learning_rate": 9.87975782186834e-06, "loss": 25.3139, "step": 1256 }, { "epoch": 0.11665893271461716, "grad_norm": 41.81243896484375, "learning_rate": 9.879423072756273e-06, "loss": 25.094, "step": 1257 }, { "epoch": 0.11675174013921114, "grad_norm": 44.15922546386719, "learning_rate": 9.879087864014953e-06, "loss": 26.8484, "step": 1258 }, { "epoch": 0.1168445475638051, "grad_norm": 40.900001525878906, "learning_rate": 9.878752195675956e-06, "loss": 25.3849, "step": 1259 }, { "epoch": 0.11693735498839908, "grad_norm": 39.17391586303711, "learning_rate": 9.878416067770898e-06, "loss": 24.6935, "step": 1260 }, { "epoch": 0.11703016241299304, "grad_norm": 43.20103073120117, "learning_rate": 9.878079480331444e-06, "loss": 25.2563, "step": 1261 }, { "epoch": 0.11712296983758701, "grad_norm": 39.73512268066406, "learning_rate": 9.877742433389297e-06, "loss": 24.6939, "step": 1262 }, { "epoch": 0.11721577726218098, "grad_norm": 40.23078918457031, "learning_rate": 9.877404926976205e-06, "loss": 25.692, "step": 1263 }, { "epoch": 0.11730858468677494, "grad_norm": 38.26877212524414, "learning_rate": 9.877066961123966e-06, "loss": 25.3888, "step": 1264 }, { "epoch": 0.11740139211136891, "grad_norm": 39.22850036621094, "learning_rate": 9.876728535864413e-06, "loss": 24.4193, "step": 1265 }, { "epoch": 0.11749419953596288, "grad_norm": 39.584964752197266, "learning_rate": 9.87638965122942e-06, "loss": 25.1345, "step": 1266 }, { "epoch": 0.11758700696055685, "grad_norm": 41.592708587646484, "learning_rate": 9.876050307250917e-06, "loss": 24.9726, "step": 1267 }, { "epoch": 0.11767981438515081, "grad_norm": 40.48282241821289, "learning_rate": 9.875710503960863e-06, "loss": 24.6719, "step": 1268 }, { "epoch": 0.11777262180974478, "grad_norm": 48.23808670043945, "learning_rate": 9.87537024139127e-06, "loss": 25.9491, "step": 1269 }, { "epoch": 0.11786542923433875, "grad_norm": 40.88858413696289, "learning_rate": 9.875029519574188e-06, "loss": 26.1045, "step": 1270 }, { "epoch": 0.11795823665893271, "grad_norm": 42.28752517700195, "learning_rate": 9.874688338541713e-06, "loss": 25.183, "step": 1271 }, { "epoch": 0.11805104408352668, "grad_norm": 41.24748611450195, "learning_rate": 9.874346698325983e-06, "loss": 25.8746, "step": 1272 }, { "epoch": 0.11814385150812065, "grad_norm": 40.53864288330078, "learning_rate": 9.87400459895918e-06, "loss": 25.2054, "step": 1273 }, { "epoch": 0.11823665893271462, "grad_norm": 41.84044647216797, "learning_rate": 9.873662040473529e-06, "loss": 25.871, "step": 1274 }, { "epoch": 0.11832946635730858, "grad_norm": 41.00093460083008, "learning_rate": 9.873319022901299e-06, "loss": 25.7887, "step": 1275 }, { "epoch": 0.11842227378190255, "grad_norm": 42.9976692199707, "learning_rate": 9.872975546274799e-06, "loss": 24.6883, "step": 1276 }, { "epoch": 0.11851508120649652, "grad_norm": 39.355037689208984, "learning_rate": 9.872631610626386e-06, "loss": 25.9922, "step": 1277 }, { "epoch": 0.1186078886310905, "grad_norm": 40.74993896484375, "learning_rate": 9.872287215988456e-06, "loss": 26.1872, "step": 1278 }, { "epoch": 0.11870069605568445, "grad_norm": 41.67924118041992, "learning_rate": 9.871942362393452e-06, "loss": 25.7689, "step": 1279 }, { "epoch": 0.11879350348027842, "grad_norm": 90.8895034790039, "learning_rate": 9.871597049873856e-06, "loss": 25.2833, "step": 1280 }, { "epoch": 0.1188863109048724, "grad_norm": 40.4063606262207, "learning_rate": 9.871251278462197e-06, "loss": 24.7892, "step": 1281 }, { "epoch": 0.11897911832946635, "grad_norm": 41.797080993652344, "learning_rate": 9.870905048191046e-06, "loss": 24.6916, "step": 1282 }, { "epoch": 0.11907192575406032, "grad_norm": 42.637908935546875, "learning_rate": 9.870558359093016e-06, "loss": 25.2962, "step": 1283 }, { "epoch": 0.1191647331786543, "grad_norm": 38.115806579589844, "learning_rate": 9.870211211200766e-06, "loss": 25.7337, "step": 1284 }, { "epoch": 0.11925754060324827, "grad_norm": 40.130985260009766, "learning_rate": 9.869863604546995e-06, "loss": 23.9583, "step": 1285 }, { "epoch": 0.11935034802784222, "grad_norm": 40.469154357910156, "learning_rate": 9.869515539164447e-06, "loss": 25.1353, "step": 1286 }, { "epoch": 0.1194431554524362, "grad_norm": 42.4483642578125, "learning_rate": 9.86916701508591e-06, "loss": 27.8448, "step": 1287 }, { "epoch": 0.11953596287703017, "grad_norm": 38.775875091552734, "learning_rate": 9.86881803234421e-06, "loss": 23.4522, "step": 1288 }, { "epoch": 0.11962877030162412, "grad_norm": 240.84703063964844, "learning_rate": 9.868468590972224e-06, "loss": 24.4036, "step": 1289 }, { "epoch": 0.1197215777262181, "grad_norm": 48.08256912231445, "learning_rate": 9.86811869100287e-06, "loss": 24.973, "step": 1290 }, { "epoch": 0.11981438515081207, "grad_norm": 41.01996612548828, "learning_rate": 9.867768332469103e-06, "loss": 25.5553, "step": 1291 }, { "epoch": 0.11990719257540604, "grad_norm": 50.05933380126953, "learning_rate": 9.86741751540393e-06, "loss": 24.1558, "step": 1292 }, { "epoch": 0.12, "grad_norm": 40.59452438354492, "learning_rate": 9.867066239840396e-06, "loss": 26.1568, "step": 1293 }, { "epoch": 0.12009280742459397, "grad_norm": 38.92224884033203, "learning_rate": 9.866714505811587e-06, "loss": 24.8801, "step": 1294 }, { "epoch": 0.12018561484918794, "grad_norm": 37.92877960205078, "learning_rate": 9.866362313350639e-06, "loss": 23.4163, "step": 1295 }, { "epoch": 0.1202784222737819, "grad_norm": 39.125797271728516, "learning_rate": 9.866009662490727e-06, "loss": 25.3885, "step": 1296 }, { "epoch": 0.12037122969837587, "grad_norm": 36.7122802734375, "learning_rate": 9.865656553265071e-06, "loss": 24.7854, "step": 1297 }, { "epoch": 0.12046403712296984, "grad_norm": 42.08183670043945, "learning_rate": 9.865302985706929e-06, "loss": 25.4256, "step": 1298 }, { "epoch": 0.12055684454756381, "grad_norm": 38.06452941894531, "learning_rate": 9.86494895984961e-06, "loss": 23.303, "step": 1299 }, { "epoch": 0.12064965197215777, "grad_norm": 38.06355285644531, "learning_rate": 9.86459447572646e-06, "loss": 25.54, "step": 1300 }, { "epoch": 0.12074245939675174, "grad_norm": 45.19282531738281, "learning_rate": 9.864239533370875e-06, "loss": 27.5042, "step": 1301 }, { "epoch": 0.12083526682134571, "grad_norm": 39.084388732910156, "learning_rate": 9.86388413281628e-06, "loss": 26.0464, "step": 1302 }, { "epoch": 0.12092807424593968, "grad_norm": 39.56044006347656, "learning_rate": 9.863528274096165e-06, "loss": 25.4197, "step": 1303 }, { "epoch": 0.12102088167053364, "grad_norm": 40.53164291381836, "learning_rate": 9.863171957244043e-06, "loss": 24.4607, "step": 1304 }, { "epoch": 0.12111368909512761, "grad_norm": 57.233009338378906, "learning_rate": 9.86281518229348e-06, "loss": 26.1939, "step": 1305 }, { "epoch": 0.12120649651972158, "grad_norm": 37.863040924072266, "learning_rate": 9.862457949278082e-06, "loss": 24.2509, "step": 1306 }, { "epoch": 0.12129930394431554, "grad_norm": 61.53518295288086, "learning_rate": 9.862100258231502e-06, "loss": 24.4264, "step": 1307 }, { "epoch": 0.12139211136890951, "grad_norm": 50.68684768676758, "learning_rate": 9.861742109187433e-06, "loss": 26.1136, "step": 1308 }, { "epoch": 0.12148491879350348, "grad_norm": 39.357421875, "learning_rate": 9.86138350217961e-06, "loss": 25.4505, "step": 1309 }, { "epoch": 0.12157772621809745, "grad_norm": 37.990177154541016, "learning_rate": 9.861024437241816e-06, "loss": 23.6346, "step": 1310 }, { "epoch": 0.12167053364269141, "grad_norm": 38.87714767456055, "learning_rate": 9.86066491440787e-06, "loss": 25.1455, "step": 1311 }, { "epoch": 0.12176334106728538, "grad_norm": 42.01112747192383, "learning_rate": 9.860304933711642e-06, "loss": 25.2664, "step": 1312 }, { "epoch": 0.12185614849187935, "grad_norm": 37.95954132080078, "learning_rate": 9.859944495187039e-06, "loss": 26.1961, "step": 1313 }, { "epoch": 0.12194895591647331, "grad_norm": 40.055423736572266, "learning_rate": 9.859583598868013e-06, "loss": 27.4091, "step": 1314 }, { "epoch": 0.12204176334106728, "grad_norm": 53.68761444091797, "learning_rate": 9.859222244788562e-06, "loss": 27.423, "step": 1315 }, { "epoch": 0.12213457076566125, "grad_norm": 37.35026931762695, "learning_rate": 9.858860432982723e-06, "loss": 25.0901, "step": 1316 }, { "epoch": 0.12222737819025523, "grad_norm": 42.044677734375, "learning_rate": 9.858498163484576e-06, "loss": 27.0093, "step": 1317 }, { "epoch": 0.12232018561484918, "grad_norm": 35.990238189697266, "learning_rate": 9.858135436328247e-06, "loss": 21.9744, "step": 1318 }, { "epoch": 0.12241299303944315, "grad_norm": 40.24034118652344, "learning_rate": 9.857772251547907e-06, "loss": 25.238, "step": 1319 }, { "epoch": 0.12250580046403713, "grad_norm": 40.79290771484375, "learning_rate": 9.857408609177763e-06, "loss": 24.9889, "step": 1320 }, { "epoch": 0.12259860788863108, "grad_norm": 116.76502227783203, "learning_rate": 9.857044509252072e-06, "loss": 25.2748, "step": 1321 }, { "epoch": 0.12269141531322506, "grad_norm": 45.45166778564453, "learning_rate": 9.85667995180513e-06, "loss": 23.7969, "step": 1322 }, { "epoch": 0.12278422273781903, "grad_norm": 39.63626480102539, "learning_rate": 9.856314936871278e-06, "loss": 24.933, "step": 1323 }, { "epoch": 0.122877030162413, "grad_norm": 42.04253005981445, "learning_rate": 9.855949464484898e-06, "loss": 26.4554, "step": 1324 }, { "epoch": 0.12296983758700696, "grad_norm": 37.582908630371094, "learning_rate": 9.855583534680418e-06, "loss": 24.937, "step": 1325 }, { "epoch": 0.12306264501160093, "grad_norm": 36.45637893676758, "learning_rate": 9.855217147492309e-06, "loss": 23.201, "step": 1326 }, { "epoch": 0.1231554524361949, "grad_norm": 41.9315071105957, "learning_rate": 9.854850302955079e-06, "loss": 25.3828, "step": 1327 }, { "epoch": 0.12324825986078887, "grad_norm": 40.948055267333984, "learning_rate": 9.85448300110329e-06, "loss": 25.6163, "step": 1328 }, { "epoch": 0.12334106728538283, "grad_norm": 44.58413314819336, "learning_rate": 9.854115241971537e-06, "loss": 25.42, "step": 1329 }, { "epoch": 0.1234338747099768, "grad_norm": 38.51191329956055, "learning_rate": 9.85374702559446e-06, "loss": 23.7606, "step": 1330 }, { "epoch": 0.12352668213457077, "grad_norm": 40.558921813964844, "learning_rate": 9.853378352006748e-06, "loss": 25.2264, "step": 1331 }, { "epoch": 0.12361948955916473, "grad_norm": 39.9131965637207, "learning_rate": 9.853009221243129e-06, "loss": 24.1403, "step": 1332 }, { "epoch": 0.1237122969837587, "grad_norm": 55.66518783569336, "learning_rate": 9.852639633338372e-06, "loss": 25.5377, "step": 1333 }, { "epoch": 0.12380510440835267, "grad_norm": 39.77279281616211, "learning_rate": 9.852269588327293e-06, "loss": 23.5333, "step": 1334 }, { "epoch": 0.12389791183294664, "grad_norm": 78.03782653808594, "learning_rate": 9.85189908624475e-06, "loss": 24.2827, "step": 1335 }, { "epoch": 0.1239907192575406, "grad_norm": 38.625831604003906, "learning_rate": 9.85152812712564e-06, "loss": 26.5617, "step": 1336 }, { "epoch": 0.12408352668213457, "grad_norm": 40.953590393066406, "learning_rate": 9.851156711004909e-06, "loss": 26.8952, "step": 1337 }, { "epoch": 0.12417633410672854, "grad_norm": 39.374290466308594, "learning_rate": 9.850784837917541e-06, "loss": 25.6039, "step": 1338 }, { "epoch": 0.1242691415313225, "grad_norm": 45.20770263671875, "learning_rate": 9.850412507898571e-06, "loss": 25.4899, "step": 1339 }, { "epoch": 0.12436194895591647, "grad_norm": 46.551307678222656, "learning_rate": 9.850039720983065e-06, "loss": 24.4855, "step": 1340 }, { "epoch": 0.12445475638051044, "grad_norm": 57.556583404541016, "learning_rate": 9.849666477206142e-06, "loss": 25.2263, "step": 1341 }, { "epoch": 0.12454756380510441, "grad_norm": 37.07223129272461, "learning_rate": 9.84929277660296e-06, "loss": 26.0092, "step": 1342 }, { "epoch": 0.12464037122969837, "grad_norm": 41.506771087646484, "learning_rate": 9.84891861920872e-06, "loss": 26.168, "step": 1343 }, { "epoch": 0.12473317865429234, "grad_norm": 62.694053649902344, "learning_rate": 9.848544005058668e-06, "loss": 26.7374, "step": 1344 }, { "epoch": 0.12482598607888631, "grad_norm": 40.83328628540039, "learning_rate": 9.848168934188093e-06, "loss": 26.2252, "step": 1345 }, { "epoch": 0.12491879350348027, "grad_norm": 38.71687316894531, "learning_rate": 9.847793406632322e-06, "loss": 23.6863, "step": 1346 }, { "epoch": 0.12501160092807426, "grad_norm": 39.62246322631836, "learning_rate": 9.84741742242673e-06, "loss": 24.3102, "step": 1347 }, { "epoch": 0.1251044083526682, "grad_norm": 42.52667236328125, "learning_rate": 9.847040981606735e-06, "loss": 26.995, "step": 1348 }, { "epoch": 0.12519721577726217, "grad_norm": 38.787960052490234, "learning_rate": 9.846664084207797e-06, "loss": 23.9529, "step": 1349 }, { "epoch": 0.12529002320185614, "grad_norm": 40.75275421142578, "learning_rate": 9.846286730265418e-06, "loss": 24.2086, "step": 1350 }, { "epoch": 0.12538283062645011, "grad_norm": 40.57324981689453, "learning_rate": 9.845908919815141e-06, "loss": 24.9882, "step": 1351 }, { "epoch": 0.12547563805104409, "grad_norm": 39.130149841308594, "learning_rate": 9.84553065289256e-06, "loss": 24.3469, "step": 1352 }, { "epoch": 0.12556844547563806, "grad_norm": 37.781158447265625, "learning_rate": 9.845151929533304e-06, "loss": 23.9386, "step": 1353 }, { "epoch": 0.12566125290023203, "grad_norm": 43.776885986328125, "learning_rate": 9.844772749773046e-06, "loss": 25.9002, "step": 1354 }, { "epoch": 0.12575406032482597, "grad_norm": 46.542423248291016, "learning_rate": 9.844393113647508e-06, "loss": 26.7561, "step": 1355 }, { "epoch": 0.12584686774941994, "grad_norm": 40.98556900024414, "learning_rate": 9.844013021192447e-06, "loss": 25.6762, "step": 1356 }, { "epoch": 0.12593967517401392, "grad_norm": 50.70872116088867, "learning_rate": 9.843632472443668e-06, "loss": 26.6015, "step": 1357 }, { "epoch": 0.1260324825986079, "grad_norm": 37.262786865234375, "learning_rate": 9.84325146743702e-06, "loss": 24.0222, "step": 1358 }, { "epoch": 0.12612529002320186, "grad_norm": 41.88172149658203, "learning_rate": 9.842870006208389e-06, "loss": 25.6982, "step": 1359 }, { "epoch": 0.12621809744779583, "grad_norm": 37.54060363769531, "learning_rate": 9.84248808879371e-06, "loss": 23.2171, "step": 1360 }, { "epoch": 0.1263109048723898, "grad_norm": 38.615928649902344, "learning_rate": 9.842105715228958e-06, "loss": 25.543, "step": 1361 }, { "epoch": 0.12640371229698377, "grad_norm": 39.71745300292969, "learning_rate": 9.84172288555015e-06, "loss": 25.5878, "step": 1362 }, { "epoch": 0.12649651972157772, "grad_norm": 41.16861343383789, "learning_rate": 9.84133959979335e-06, "loss": 24.7919, "step": 1363 }, { "epoch": 0.1265893271461717, "grad_norm": 43.146244049072266, "learning_rate": 9.840955857994663e-06, "loss": 25.391, "step": 1364 }, { "epoch": 0.12668213457076566, "grad_norm": 39.551727294921875, "learning_rate": 9.840571660190235e-06, "loss": 25.8493, "step": 1365 }, { "epoch": 0.12677494199535963, "grad_norm": 55.148311614990234, "learning_rate": 9.840187006416258e-06, "loss": 24.2298, "step": 1366 }, { "epoch": 0.1268677494199536, "grad_norm": 40.920169830322266, "learning_rate": 9.839801896708961e-06, "loss": 23.9934, "step": 1367 }, { "epoch": 0.12696055684454757, "grad_norm": 42.6407585144043, "learning_rate": 9.839416331104625e-06, "loss": 25.3938, "step": 1368 }, { "epoch": 0.12705336426914154, "grad_norm": 40.58047103881836, "learning_rate": 9.839030309639567e-06, "loss": 25.028, "step": 1369 }, { "epoch": 0.1271461716937355, "grad_norm": 38.23796081542969, "learning_rate": 9.838643832350151e-06, "loss": 23.0653, "step": 1370 }, { "epoch": 0.12723897911832946, "grad_norm": 40.98276138305664, "learning_rate": 9.838256899272783e-06, "loss": 24.9509, "step": 1371 }, { "epoch": 0.12733178654292343, "grad_norm": 42.445735931396484, "learning_rate": 9.837869510443905e-06, "loss": 25.5646, "step": 1372 }, { "epoch": 0.1274245939675174, "grad_norm": 41.1456184387207, "learning_rate": 9.837481665900015e-06, "loss": 25.2114, "step": 1373 }, { "epoch": 0.12751740139211137, "grad_norm": 41.74822998046875, "learning_rate": 9.837093365677644e-06, "loss": 25.0664, "step": 1374 }, { "epoch": 0.12761020881670534, "grad_norm": 40.396488189697266, "learning_rate": 9.83670460981337e-06, "loss": 25.049, "step": 1375 }, { "epoch": 0.12770301624129932, "grad_norm": 41.3569221496582, "learning_rate": 9.83631539834381e-06, "loss": 26.7111, "step": 1376 }, { "epoch": 0.12779582366589326, "grad_norm": 40.38677978515625, "learning_rate": 9.835925731305631e-06, "loss": 24.9702, "step": 1377 }, { "epoch": 0.12788863109048723, "grad_norm": 39.043643951416016, "learning_rate": 9.835535608735535e-06, "loss": 25.3902, "step": 1378 }, { "epoch": 0.1279814385150812, "grad_norm": 41.07154846191406, "learning_rate": 9.835145030670272e-06, "loss": 24.1119, "step": 1379 }, { "epoch": 0.12807424593967517, "grad_norm": 39.71453094482422, "learning_rate": 9.834753997146633e-06, "loss": 23.8757, "step": 1380 }, { "epoch": 0.12816705336426915, "grad_norm": 39.66080093383789, "learning_rate": 9.834362508201453e-06, "loss": 24.192, "step": 1381 }, { "epoch": 0.12825986078886312, "grad_norm": 40.34653854370117, "learning_rate": 9.83397056387161e-06, "loss": 23.4205, "step": 1382 }, { "epoch": 0.1283526682134571, "grad_norm": 35.59199523925781, "learning_rate": 9.833578164194025e-06, "loss": 23.936, "step": 1383 }, { "epoch": 0.12844547563805103, "grad_norm": 37.182186126708984, "learning_rate": 9.833185309205655e-06, "loss": 25.7322, "step": 1384 }, { "epoch": 0.128538283062645, "grad_norm": 41.01671600341797, "learning_rate": 9.832791998943514e-06, "loss": 25.7534, "step": 1385 }, { "epoch": 0.12863109048723897, "grad_norm": 40.025875091552734, "learning_rate": 9.832398233444644e-06, "loss": 25.1899, "step": 1386 }, { "epoch": 0.12872389791183295, "grad_norm": 39.27777862548828, "learning_rate": 9.832004012746141e-06, "loss": 26.0636, "step": 1387 }, { "epoch": 0.12881670533642692, "grad_norm": 39.56749725341797, "learning_rate": 9.83160933688514e-06, "loss": 24.8956, "step": 1388 }, { "epoch": 0.1289095127610209, "grad_norm": 40.778289794921875, "learning_rate": 9.831214205898814e-06, "loss": 26.5471, "step": 1389 }, { "epoch": 0.12900232018561486, "grad_norm": 39.561988830566406, "learning_rate": 9.830818619824387e-06, "loss": 24.4557, "step": 1390 }, { "epoch": 0.1290951276102088, "grad_norm": 52.15243148803711, "learning_rate": 9.830422578699122e-06, "loss": 23.9923, "step": 1391 }, { "epoch": 0.12918793503480278, "grad_norm": 39.78089141845703, "learning_rate": 9.830026082560324e-06, "loss": 27.867, "step": 1392 }, { "epoch": 0.12928074245939675, "grad_norm": 41.98296356201172, "learning_rate": 9.829629131445342e-06, "loss": 27.0499, "step": 1393 }, { "epoch": 0.12937354988399072, "grad_norm": 42.41436004638672, "learning_rate": 9.829231725391568e-06, "loss": 25.6499, "step": 1394 }, { "epoch": 0.1294663573085847, "grad_norm": 40.53886795043945, "learning_rate": 9.828833864436439e-06, "loss": 24.66, "step": 1395 }, { "epoch": 0.12955916473317866, "grad_norm": 36.283870697021484, "learning_rate": 9.828435548617427e-06, "loss": 24.0923, "step": 1396 }, { "epoch": 0.12965197215777263, "grad_norm": 43.59339141845703, "learning_rate": 9.828036777972057e-06, "loss": 25.5931, "step": 1397 }, { "epoch": 0.12974477958236658, "grad_norm": 37.417633056640625, "learning_rate": 9.827637552537893e-06, "loss": 24.6932, "step": 1398 }, { "epoch": 0.12983758700696055, "grad_norm": 53.77067565917969, "learning_rate": 9.827237872352535e-06, "loss": 23.9939, "step": 1399 }, { "epoch": 0.12993039443155452, "grad_norm": 50.37263488769531, "learning_rate": 9.826837737453637e-06, "loss": 25.0267, "step": 1400 }, { "epoch": 0.1300232018561485, "grad_norm": 55.542457580566406, "learning_rate": 9.826437147878889e-06, "loss": 25.2557, "step": 1401 }, { "epoch": 0.13011600928074246, "grad_norm": 38.599178314208984, "learning_rate": 9.826036103666029e-06, "loss": 24.9243, "step": 1402 }, { "epoch": 0.13020881670533643, "grad_norm": 35.144935607910156, "learning_rate": 9.825634604852827e-06, "loss": 24.6453, "step": 1403 }, { "epoch": 0.1303016241299304, "grad_norm": 40.851661682128906, "learning_rate": 9.825232651477109e-06, "loss": 26.6899, "step": 1404 }, { "epoch": 0.13039443155452435, "grad_norm": 44.912357330322266, "learning_rate": 9.824830243576736e-06, "loss": 25.4393, "step": 1405 }, { "epoch": 0.13048723897911832, "grad_norm": 37.75965118408203, "learning_rate": 9.824427381189615e-06, "loss": 25.1035, "step": 1406 }, { "epoch": 0.1305800464037123, "grad_norm": 50.90751266479492, "learning_rate": 9.824024064353693e-06, "loss": 28.046, "step": 1407 }, { "epoch": 0.13067285382830626, "grad_norm": 42.30087661743164, "learning_rate": 9.823620293106963e-06, "loss": 28.8337, "step": 1408 }, { "epoch": 0.13076566125290023, "grad_norm": 85.01931762695312, "learning_rate": 9.823216067487458e-06, "loss": 25.4209, "step": 1409 }, { "epoch": 0.1308584686774942, "grad_norm": 41.01145935058594, "learning_rate": 9.822811387533256e-06, "loss": 25.6332, "step": 1410 }, { "epoch": 0.13095127610208818, "grad_norm": 39.79961013793945, "learning_rate": 9.822406253282474e-06, "loss": 25.8553, "step": 1411 }, { "epoch": 0.13104408352668215, "grad_norm": 40.1265754699707, "learning_rate": 9.822000664773278e-06, "loss": 24.6856, "step": 1412 }, { "epoch": 0.1311368909512761, "grad_norm": 34.890960693359375, "learning_rate": 9.821594622043872e-06, "loss": 24.6935, "step": 1413 }, { "epoch": 0.13122969837587006, "grad_norm": 38.53281784057617, "learning_rate": 9.821188125132507e-06, "loss": 24.9719, "step": 1414 }, { "epoch": 0.13132250580046403, "grad_norm": 37.140220642089844, "learning_rate": 9.82078117407747e-06, "loss": 26.1385, "step": 1415 }, { "epoch": 0.131415313225058, "grad_norm": 40.8682861328125, "learning_rate": 9.820373768917095e-06, "loss": 25.8175, "step": 1416 }, { "epoch": 0.13150812064965198, "grad_norm": 43.17987060546875, "learning_rate": 9.819965909689762e-06, "loss": 24.4332, "step": 1417 }, { "epoch": 0.13160092807424595, "grad_norm": 38.298927307128906, "learning_rate": 9.819557596433887e-06, "loss": 24.6792, "step": 1418 }, { "epoch": 0.13169373549883992, "grad_norm": 37.80183410644531, "learning_rate": 9.819148829187932e-06, "loss": 26.0846, "step": 1419 }, { "epoch": 0.13178654292343386, "grad_norm": 37.73781967163086, "learning_rate": 9.818739607990403e-06, "loss": 24.9203, "step": 1420 }, { "epoch": 0.13187935034802784, "grad_norm": 39.75718307495117, "learning_rate": 9.818329932879848e-06, "loss": 24.9726, "step": 1421 }, { "epoch": 0.1319721577726218, "grad_norm": 38.84518814086914, "learning_rate": 9.817919803894857e-06, "loss": 25.2806, "step": 1422 }, { "epoch": 0.13206496519721578, "grad_norm": 42.259029388427734, "learning_rate": 9.817509221074064e-06, "loss": 23.8894, "step": 1423 }, { "epoch": 0.13215777262180975, "grad_norm": 40.48051834106445, "learning_rate": 9.817098184456145e-06, "loss": 28.1171, "step": 1424 }, { "epoch": 0.13225058004640372, "grad_norm": 36.673057556152344, "learning_rate": 9.816686694079814e-06, "loss": 24.24, "step": 1425 }, { "epoch": 0.1323433874709977, "grad_norm": 39.55156707763672, "learning_rate": 9.816274749983839e-06, "loss": 24.0988, "step": 1426 }, { "epoch": 0.13243619489559164, "grad_norm": 37.81720733642578, "learning_rate": 9.81586235220702e-06, "loss": 25.2792, "step": 1427 }, { "epoch": 0.1325290023201856, "grad_norm": 36.9807243347168, "learning_rate": 9.815449500788203e-06, "loss": 26.7774, "step": 1428 }, { "epoch": 0.13262180974477958, "grad_norm": 39.83710861206055, "learning_rate": 9.81503619576628e-06, "loss": 24.4681, "step": 1429 }, { "epoch": 0.13271461716937355, "grad_norm": 39.51487731933594, "learning_rate": 9.814622437180183e-06, "loss": 26.284, "step": 1430 }, { "epoch": 0.13280742459396752, "grad_norm": 38.25526428222656, "learning_rate": 9.814208225068887e-06, "loss": 25.8727, "step": 1431 }, { "epoch": 0.1329002320185615, "grad_norm": 39.33034133911133, "learning_rate": 9.813793559471408e-06, "loss": 24.7801, "step": 1432 }, { "epoch": 0.13299303944315546, "grad_norm": 48.5117073059082, "learning_rate": 9.813378440426809e-06, "loss": 28.1728, "step": 1433 }, { "epoch": 0.1330858468677494, "grad_norm": 43.25923156738281, "learning_rate": 9.812962867974192e-06, "loss": 25.5149, "step": 1434 }, { "epoch": 0.13317865429234338, "grad_norm": 45.96809768676758, "learning_rate": 9.812546842152701e-06, "loss": 25.5523, "step": 1435 }, { "epoch": 0.13327146171693735, "grad_norm": 43.70383834838867, "learning_rate": 9.812130363001529e-06, "loss": 24.5797, "step": 1436 }, { "epoch": 0.13336426914153132, "grad_norm": 39.489593505859375, "learning_rate": 9.811713430559903e-06, "loss": 26.5749, "step": 1437 }, { "epoch": 0.1334570765661253, "grad_norm": 44.093177795410156, "learning_rate": 9.8112960448671e-06, "loss": 26.1687, "step": 1438 }, { "epoch": 0.13354988399071926, "grad_norm": 39.527957916259766, "learning_rate": 9.810878205962434e-06, "loss": 25.2285, "step": 1439 }, { "epoch": 0.13364269141531324, "grad_norm": 37.20231628417969, "learning_rate": 9.810459913885265e-06, "loss": 26.0096, "step": 1440 }, { "epoch": 0.13373549883990718, "grad_norm": 40.251468658447266, "learning_rate": 9.810041168674997e-06, "loss": 27.1478, "step": 1441 }, { "epoch": 0.13382830626450115, "grad_norm": 47.350196838378906, "learning_rate": 9.809621970371072e-06, "loss": 25.0179, "step": 1442 }, { "epoch": 0.13392111368909512, "grad_norm": 40.682395935058594, "learning_rate": 9.80920231901298e-06, "loss": 26.2932, "step": 1443 }, { "epoch": 0.1340139211136891, "grad_norm": 39.78132629394531, "learning_rate": 9.808782214640252e-06, "loss": 24.1809, "step": 1444 }, { "epoch": 0.13410672853828307, "grad_norm": 36.083030700683594, "learning_rate": 9.808361657292455e-06, "loss": 25.4688, "step": 1445 }, { "epoch": 0.13419953596287704, "grad_norm": 40.00406265258789, "learning_rate": 9.80794064700921e-06, "loss": 26.0741, "step": 1446 }, { "epoch": 0.134292343387471, "grad_norm": 48.57320785522461, "learning_rate": 9.807519183830172e-06, "loss": 24.6791, "step": 1447 }, { "epoch": 0.13438515081206495, "grad_norm": 39.97346496582031, "learning_rate": 9.807097267795045e-06, "loss": 24.7952, "step": 1448 }, { "epoch": 0.13447795823665892, "grad_norm": 37.98685836791992, "learning_rate": 9.806674898943569e-06, "loss": 24.7526, "step": 1449 }, { "epoch": 0.1345707656612529, "grad_norm": 53.936866760253906, "learning_rate": 9.806252077315531e-06, "loss": 23.7788, "step": 1450 }, { "epoch": 0.13466357308584687, "grad_norm": 38.35258483886719, "learning_rate": 9.805828802950761e-06, "loss": 24.3158, "step": 1451 }, { "epoch": 0.13475638051044084, "grad_norm": 44.960365295410156, "learning_rate": 9.805405075889129e-06, "loss": 25.1542, "step": 1452 }, { "epoch": 0.1348491879350348, "grad_norm": 36.905765533447266, "learning_rate": 9.80498089617055e-06, "loss": 25.5179, "step": 1453 }, { "epoch": 0.13494199535962878, "grad_norm": 55.21519470214844, "learning_rate": 9.804556263834982e-06, "loss": 24.4058, "step": 1454 }, { "epoch": 0.13503480278422275, "grad_norm": 40.66426467895508, "learning_rate": 9.80413117892242e-06, "loss": 24.5561, "step": 1455 }, { "epoch": 0.1351276102088167, "grad_norm": 46.055274963378906, "learning_rate": 9.803705641472909e-06, "loss": 25.4429, "step": 1456 }, { "epoch": 0.13522041763341067, "grad_norm": 41.1520881652832, "learning_rate": 9.803279651526534e-06, "loss": 27.1903, "step": 1457 }, { "epoch": 0.13531322505800464, "grad_norm": 39.423343658447266, "learning_rate": 9.802853209123421e-06, "loss": 25.3197, "step": 1458 }, { "epoch": 0.1354060324825986, "grad_norm": 40.068538665771484, "learning_rate": 9.802426314303739e-06, "loss": 25.2677, "step": 1459 }, { "epoch": 0.13549883990719258, "grad_norm": 50.16598892211914, "learning_rate": 9.801998967107702e-06, "loss": 24.3223, "step": 1460 }, { "epoch": 0.13559164733178655, "grad_norm": 36.930458068847656, "learning_rate": 9.801571167575565e-06, "loss": 24.7736, "step": 1461 }, { "epoch": 0.13568445475638052, "grad_norm": 39.80655288696289, "learning_rate": 9.801142915747623e-06, "loss": 26.3795, "step": 1462 }, { "epoch": 0.13577726218097447, "grad_norm": 87.51802825927734, "learning_rate": 9.80071421166422e-06, "loss": 23.4411, "step": 1463 }, { "epoch": 0.13587006960556844, "grad_norm": 45.21234893798828, "learning_rate": 9.800285055365737e-06, "loss": 24.3113, "step": 1464 }, { "epoch": 0.1359628770301624, "grad_norm": 41.79233169555664, "learning_rate": 9.799855446892599e-06, "loss": 25.166, "step": 1465 }, { "epoch": 0.13605568445475638, "grad_norm": 40.1715202331543, "learning_rate": 9.799425386285276e-06, "loss": 26.0237, "step": 1466 }, { "epoch": 0.13614849187935035, "grad_norm": 41.37337875366211, "learning_rate": 9.798994873584275e-06, "loss": 25.7419, "step": 1467 }, { "epoch": 0.13624129930394432, "grad_norm": 37.67875671386719, "learning_rate": 9.798563908830152e-06, "loss": 24.2879, "step": 1468 }, { "epoch": 0.1363341067285383, "grad_norm": 43.65399932861328, "learning_rate": 9.798132492063502e-06, "loss": 26.3883, "step": 1469 }, { "epoch": 0.13642691415313224, "grad_norm": 40.00925064086914, "learning_rate": 9.797700623324964e-06, "loss": 25.7993, "step": 1470 }, { "epoch": 0.1365197215777262, "grad_norm": 36.166011810302734, "learning_rate": 9.797268302655219e-06, "loss": 23.7753, "step": 1471 }, { "epoch": 0.13661252900232018, "grad_norm": 40.48940658569336, "learning_rate": 9.796835530094989e-06, "loss": 26.3943, "step": 1472 }, { "epoch": 0.13670533642691415, "grad_norm": 41.33683395385742, "learning_rate": 9.796402305685041e-06, "loss": 27.4608, "step": 1473 }, { "epoch": 0.13679814385150812, "grad_norm": 39.758758544921875, "learning_rate": 9.795968629466184e-06, "loss": 24.6632, "step": 1474 }, { "epoch": 0.1368909512761021, "grad_norm": 41.342254638671875, "learning_rate": 9.79553450147927e-06, "loss": 26.1099, "step": 1475 }, { "epoch": 0.13698375870069607, "grad_norm": 39.58646774291992, "learning_rate": 9.79509992176519e-06, "loss": 25.2179, "step": 1476 }, { "epoch": 0.13707656612529, "grad_norm": 44.26543426513672, "learning_rate": 9.794664890364882e-06, "loss": 25.6352, "step": 1477 }, { "epoch": 0.13716937354988398, "grad_norm": 40.0400276184082, "learning_rate": 9.794229407319324e-06, "loss": 23.2506, "step": 1478 }, { "epoch": 0.13726218097447795, "grad_norm": 50.651275634765625, "learning_rate": 9.79379347266954e-06, "loss": 24.8469, "step": 1479 }, { "epoch": 0.13735498839907193, "grad_norm": 42.24757766723633, "learning_rate": 9.793357086456591e-06, "loss": 25.1221, "step": 1480 }, { "epoch": 0.1374477958236659, "grad_norm": 44.64360046386719, "learning_rate": 9.792920248721586e-06, "loss": 25.8539, "step": 1481 }, { "epoch": 0.13754060324825987, "grad_norm": 42.00947189331055, "learning_rate": 9.79248295950567e-06, "loss": 23.7111, "step": 1482 }, { "epoch": 0.13763341067285384, "grad_norm": 41.081233978271484, "learning_rate": 9.79204521885004e-06, "loss": 26.1558, "step": 1483 }, { "epoch": 0.13772621809744778, "grad_norm": 41.36091995239258, "learning_rate": 9.791607026795927e-06, "loss": 25.4093, "step": 1484 }, { "epoch": 0.13781902552204175, "grad_norm": 41.71098709106445, "learning_rate": 9.791168383384606e-06, "loss": 24.8433, "step": 1485 }, { "epoch": 0.13791183294663573, "grad_norm": 40.12112045288086, "learning_rate": 9.790729288657399e-06, "loss": 25.8018, "step": 1486 }, { "epoch": 0.1380046403712297, "grad_norm": 39.105281829833984, "learning_rate": 9.790289742655666e-06, "loss": 24.4034, "step": 1487 }, { "epoch": 0.13809744779582367, "grad_norm": 39.125980377197266, "learning_rate": 9.789849745420811e-06, "loss": 24.9303, "step": 1488 }, { "epoch": 0.13819025522041764, "grad_norm": 46.853729248046875, "learning_rate": 9.78940929699428e-06, "loss": 24.7494, "step": 1489 }, { "epoch": 0.1382830626450116, "grad_norm": 40.847862243652344, "learning_rate": 9.788968397417567e-06, "loss": 26.4349, "step": 1490 }, { "epoch": 0.13837587006960556, "grad_norm": 36.72891616821289, "learning_rate": 9.788527046732199e-06, "loss": 25.8926, "step": 1491 }, { "epoch": 0.13846867749419953, "grad_norm": 40.99997329711914, "learning_rate": 9.78808524497975e-06, "loss": 27.1761, "step": 1492 }, { "epoch": 0.1385614849187935, "grad_norm": 39.689231872558594, "learning_rate": 9.787642992201837e-06, "loss": 25.0855, "step": 1493 }, { "epoch": 0.13865429234338747, "grad_norm": 45.06265640258789, "learning_rate": 9.78720028844012e-06, "loss": 25.5435, "step": 1494 }, { "epoch": 0.13874709976798144, "grad_norm": 52.16548156738281, "learning_rate": 9.7867571337363e-06, "loss": 24.7582, "step": 1495 }, { "epoch": 0.1388399071925754, "grad_norm": 44.279014587402344, "learning_rate": 9.786313528132122e-06, "loss": 26.6329, "step": 1496 }, { "epoch": 0.13893271461716938, "grad_norm": 46.31367492675781, "learning_rate": 9.785869471669374e-06, "loss": 24.5205, "step": 1497 }, { "epoch": 0.13902552204176333, "grad_norm": 42.76315689086914, "learning_rate": 9.78542496438988e-06, "loss": 25.0544, "step": 1498 }, { "epoch": 0.1391183294663573, "grad_norm": 39.4957275390625, "learning_rate": 9.784980006335516e-06, "loss": 24.0829, "step": 1499 }, { "epoch": 0.13921113689095127, "grad_norm": 41.91978073120117, "learning_rate": 9.784534597548194e-06, "loss": 26.3491, "step": 1500 }, { "epoch": 0.13930394431554524, "grad_norm": 41.95702362060547, "learning_rate": 9.784088738069869e-06, "loss": 25.1101, "step": 1501 }, { "epoch": 0.1393967517401392, "grad_norm": 42.46504211425781, "learning_rate": 9.783642427942543e-06, "loss": 26.4752, "step": 1502 }, { "epoch": 0.13948955916473318, "grad_norm": 43.550140380859375, "learning_rate": 9.783195667208255e-06, "loss": 26.1064, "step": 1503 }, { "epoch": 0.13958236658932716, "grad_norm": 38.66767120361328, "learning_rate": 9.78274845590909e-06, "loss": 25.1737, "step": 1504 }, { "epoch": 0.13967517401392113, "grad_norm": 38.673500061035156, "learning_rate": 9.782300794087174e-06, "loss": 24.8899, "step": 1505 }, { "epoch": 0.13976798143851507, "grad_norm": 38.22522735595703, "learning_rate": 9.781852681784674e-06, "loss": 24.2601, "step": 1506 }, { "epoch": 0.13986078886310904, "grad_norm": 40.5516471862793, "learning_rate": 9.781404119043803e-06, "loss": 25.9511, "step": 1507 }, { "epoch": 0.139953596287703, "grad_norm": 45.554344177246094, "learning_rate": 9.780955105906813e-06, "loss": 27.3995, "step": 1508 }, { "epoch": 0.14004640371229699, "grad_norm": 42.87487030029297, "learning_rate": 9.780505642416002e-06, "loss": 23.6117, "step": 1509 }, { "epoch": 0.14013921113689096, "grad_norm": 37.46870803833008, "learning_rate": 9.780055728613706e-06, "loss": 23.3302, "step": 1510 }, { "epoch": 0.14023201856148493, "grad_norm": 42.49916076660156, "learning_rate": 9.779605364542307e-06, "loss": 24.5733, "step": 1511 }, { "epoch": 0.1403248259860789, "grad_norm": 38.19403839111328, "learning_rate": 9.779154550244228e-06, "loss": 24.856, "step": 1512 }, { "epoch": 0.14041763341067284, "grad_norm": 38.80712127685547, "learning_rate": 9.778703285761933e-06, "loss": 23.1482, "step": 1513 }, { "epoch": 0.14051044083526681, "grad_norm": 40.94709014892578, "learning_rate": 9.778251571137933e-06, "loss": 26.4136, "step": 1514 }, { "epoch": 0.14060324825986079, "grad_norm": 39.6757926940918, "learning_rate": 9.777799406414775e-06, "loss": 25.2775, "step": 1515 }, { "epoch": 0.14069605568445476, "grad_norm": 38.95577621459961, "learning_rate": 9.777346791635056e-06, "loss": 22.9671, "step": 1516 }, { "epoch": 0.14078886310904873, "grad_norm": 44.14984130859375, "learning_rate": 9.776893726841408e-06, "loss": 25.7245, "step": 1517 }, { "epoch": 0.1408816705336427, "grad_norm": 39.03471755981445, "learning_rate": 9.776440212076507e-06, "loss": 25.3561, "step": 1518 }, { "epoch": 0.14097447795823667, "grad_norm": 38.84245681762695, "learning_rate": 9.775986247383078e-06, "loss": 23.991, "step": 1519 }, { "epoch": 0.14106728538283062, "grad_norm": 39.42207336425781, "learning_rate": 9.77553183280388e-06, "loss": 25.4983, "step": 1520 }, { "epoch": 0.1411600928074246, "grad_norm": 40.64630126953125, "learning_rate": 9.775076968381716e-06, "loss": 24.854, "step": 1521 }, { "epoch": 0.14125290023201856, "grad_norm": 39.76708221435547, "learning_rate": 9.774621654159437e-06, "loss": 25.2944, "step": 1522 }, { "epoch": 0.14134570765661253, "grad_norm": 62.358150482177734, "learning_rate": 9.774165890179931e-06, "loss": 26.2634, "step": 1523 }, { "epoch": 0.1414385150812065, "grad_norm": 37.62146759033203, "learning_rate": 9.77370967648613e-06, "loss": 25.2263, "step": 1524 }, { "epoch": 0.14153132250580047, "grad_norm": 41.44902801513672, "learning_rate": 9.773253013121006e-06, "loss": 24.9543, "step": 1525 }, { "epoch": 0.14162412993039444, "grad_norm": 39.21891784667969, "learning_rate": 9.772795900127578e-06, "loss": 23.6047, "step": 1526 }, { "epoch": 0.1417169373549884, "grad_norm": 40.601318359375, "learning_rate": 9.772338337548903e-06, "loss": 26.3854, "step": 1527 }, { "epoch": 0.14180974477958236, "grad_norm": 37.52143859863281, "learning_rate": 9.771880325428085e-06, "loss": 23.6742, "step": 1528 }, { "epoch": 0.14190255220417633, "grad_norm": 37.895748138427734, "learning_rate": 9.771421863808265e-06, "loss": 24.7571, "step": 1529 }, { "epoch": 0.1419953596287703, "grad_norm": 53.00132369995117, "learning_rate": 9.77096295273263e-06, "loss": 23.969, "step": 1530 }, { "epoch": 0.14208816705336427, "grad_norm": 38.97429656982422, "learning_rate": 9.770503592244407e-06, "loss": 23.7937, "step": 1531 }, { "epoch": 0.14218097447795824, "grad_norm": 39.07414627075195, "learning_rate": 9.770043782386868e-06, "loss": 26.056, "step": 1532 }, { "epoch": 0.14227378190255222, "grad_norm": 38.73295593261719, "learning_rate": 9.769583523203328e-06, "loss": 24.5405, "step": 1533 }, { "epoch": 0.14236658932714616, "grad_norm": 58.64828109741211, "learning_rate": 9.769122814737137e-06, "loss": 25.3821, "step": 1534 }, { "epoch": 0.14245939675174013, "grad_norm": 40.11161804199219, "learning_rate": 9.768661657031697e-06, "loss": 24.3187, "step": 1535 }, { "epoch": 0.1425522041763341, "grad_norm": 39.045902252197266, "learning_rate": 9.768200050130446e-06, "loss": 25.701, "step": 1536 }, { "epoch": 0.14264501160092807, "grad_norm": 39.476131439208984, "learning_rate": 9.767737994076867e-06, "loss": 24.7156, "step": 1537 }, { "epoch": 0.14273781902552204, "grad_norm": 37.8652458190918, "learning_rate": 9.767275488914484e-06, "loss": 24.4762, "step": 1538 }, { "epoch": 0.14283062645011602, "grad_norm": 39.4705810546875, "learning_rate": 9.766812534686862e-06, "loss": 26.2556, "step": 1539 }, { "epoch": 0.14292343387471, "grad_norm": 64.72978973388672, "learning_rate": 9.766349131437613e-06, "loss": 23.8263, "step": 1540 }, { "epoch": 0.14301624129930393, "grad_norm": 37.09354019165039, "learning_rate": 9.765885279210387e-06, "loss": 24.0108, "step": 1541 }, { "epoch": 0.1431090487238979, "grad_norm": 54.31332778930664, "learning_rate": 9.765420978048879e-06, "loss": 26.9273, "step": 1542 }, { "epoch": 0.14320185614849187, "grad_norm": 42.63127899169922, "learning_rate": 9.764956227996822e-06, "loss": 26.1225, "step": 1543 }, { "epoch": 0.14329466357308585, "grad_norm": 37.534114837646484, "learning_rate": 9.764491029097999e-06, "loss": 24.4725, "step": 1544 }, { "epoch": 0.14338747099767982, "grad_norm": 39.0512809753418, "learning_rate": 9.764025381396226e-06, "loss": 25.4575, "step": 1545 }, { "epoch": 0.1434802784222738, "grad_norm": 42.4050407409668, "learning_rate": 9.763559284935368e-06, "loss": 25.291, "step": 1546 }, { "epoch": 0.14357308584686776, "grad_norm": 37.54600524902344, "learning_rate": 9.76309273975933e-06, "loss": 27.6112, "step": 1547 }, { "epoch": 0.1436658932714617, "grad_norm": 36.728607177734375, "learning_rate": 9.76262574591206e-06, "loss": 23.5159, "step": 1548 }, { "epoch": 0.14375870069605567, "grad_norm": 42.68326187133789, "learning_rate": 9.762158303437545e-06, "loss": 22.8642, "step": 1549 }, { "epoch": 0.14385150812064965, "grad_norm": 37.913875579833984, "learning_rate": 9.76169041237982e-06, "loss": 24.593, "step": 1550 }, { "epoch": 0.14394431554524362, "grad_norm": 40.094722747802734, "learning_rate": 9.761222072782957e-06, "loss": 26.7277, "step": 1551 }, { "epoch": 0.1440371229698376, "grad_norm": 39.53681945800781, "learning_rate": 9.760753284691073e-06, "loss": 25.4584, "step": 1552 }, { "epoch": 0.14412993039443156, "grad_norm": 44.75151824951172, "learning_rate": 9.760284048148327e-06, "loss": 26.1639, "step": 1553 }, { "epoch": 0.14422273781902553, "grad_norm": 39.222259521484375, "learning_rate": 9.759814363198921e-06, "loss": 25.8167, "step": 1554 }, { "epoch": 0.1443155452436195, "grad_norm": 43.938899993896484, "learning_rate": 9.759344229887095e-06, "loss": 25.3139, "step": 1555 }, { "epoch": 0.14440835266821345, "grad_norm": 38.03141784667969, "learning_rate": 9.758873648257139e-06, "loss": 24.5629, "step": 1556 }, { "epoch": 0.14450116009280742, "grad_norm": 39.62332534790039, "learning_rate": 9.758402618353376e-06, "loss": 24.8711, "step": 1557 }, { "epoch": 0.1445939675174014, "grad_norm": 39.3458366394043, "learning_rate": 9.757931140220177e-06, "loss": 26.4275, "step": 1558 }, { "epoch": 0.14468677494199536, "grad_norm": 39.349891662597656, "learning_rate": 9.757459213901956e-06, "loss": 24.421, "step": 1559 }, { "epoch": 0.14477958236658933, "grad_norm": 43.463401794433594, "learning_rate": 9.756986839443166e-06, "loss": 22.854, "step": 1560 }, { "epoch": 0.1448723897911833, "grad_norm": 42.30237579345703, "learning_rate": 9.756514016888301e-06, "loss": 25.0829, "step": 1561 }, { "epoch": 0.14496519721577728, "grad_norm": 41.92647171020508, "learning_rate": 9.756040746281906e-06, "loss": 24.7308, "step": 1562 }, { "epoch": 0.14505800464037122, "grad_norm": 40.3254508972168, "learning_rate": 9.755567027668555e-06, "loss": 24.5387, "step": 1563 }, { "epoch": 0.1451508120649652, "grad_norm": 42.68706130981445, "learning_rate": 9.755092861092875e-06, "loss": 26.5808, "step": 1564 }, { "epoch": 0.14524361948955916, "grad_norm": 39.96821212768555, "learning_rate": 9.75461824659953e-06, "loss": 25.2646, "step": 1565 }, { "epoch": 0.14533642691415313, "grad_norm": 63.864906311035156, "learning_rate": 9.754143184233228e-06, "loss": 24.031, "step": 1566 }, { "epoch": 0.1454292343387471, "grad_norm": 41.66082763671875, "learning_rate": 9.753667674038718e-06, "loss": 24.9542, "step": 1567 }, { "epoch": 0.14552204176334108, "grad_norm": 40.722930908203125, "learning_rate": 9.753191716060795e-06, "loss": 22.4735, "step": 1568 }, { "epoch": 0.14561484918793505, "grad_norm": 38.7715950012207, "learning_rate": 9.752715310344288e-06, "loss": 25.5331, "step": 1569 }, { "epoch": 0.145707656612529, "grad_norm": 39.74433517456055, "learning_rate": 9.752238456934077e-06, "loss": 26.4486, "step": 1570 }, { "epoch": 0.14580046403712296, "grad_norm": 190.33737182617188, "learning_rate": 9.751761155875078e-06, "loss": 25.4607, "step": 1571 }, { "epoch": 0.14589327146171693, "grad_norm": 44.49921417236328, "learning_rate": 9.751283407212253e-06, "loss": 25.4753, "step": 1572 }, { "epoch": 0.1459860788863109, "grad_norm": 42.21768569946289, "learning_rate": 9.750805210990603e-06, "loss": 26.3496, "step": 1573 }, { "epoch": 0.14607888631090488, "grad_norm": 40.72707748413086, "learning_rate": 9.750326567255175e-06, "loss": 24.727, "step": 1574 }, { "epoch": 0.14617169373549885, "grad_norm": 34.59959411621094, "learning_rate": 9.749847476051056e-06, "loss": 22.4104, "step": 1575 }, { "epoch": 0.14626450116009282, "grad_norm": 38.678131103515625, "learning_rate": 9.749367937423374e-06, "loss": 27.294, "step": 1576 }, { "epoch": 0.14635730858468676, "grad_norm": 36.900360107421875, "learning_rate": 9.7488879514173e-06, "loss": 23.6077, "step": 1577 }, { "epoch": 0.14645011600928073, "grad_norm": 37.71733474731445, "learning_rate": 9.748407518078048e-06, "loss": 23.0894, "step": 1578 }, { "epoch": 0.1465429234338747, "grad_norm": 38.81952667236328, "learning_rate": 9.747926637450875e-06, "loss": 24.3929, "step": 1579 }, { "epoch": 0.14663573085846868, "grad_norm": 41.753841400146484, "learning_rate": 9.747445309581076e-06, "loss": 24.4193, "step": 1580 }, { "epoch": 0.14672853828306265, "grad_norm": 37.44320297241211, "learning_rate": 9.746963534513994e-06, "loss": 25.1325, "step": 1581 }, { "epoch": 0.14682134570765662, "grad_norm": 37.96147918701172, "learning_rate": 9.746481312295007e-06, "loss": 25.4751, "step": 1582 }, { "epoch": 0.1469141531322506, "grad_norm": 44.58604431152344, "learning_rate": 9.745998642969543e-06, "loss": 25.0478, "step": 1583 }, { "epoch": 0.14700696055684453, "grad_norm": 39.85755157470703, "learning_rate": 9.745515526583066e-06, "loss": 26.0679, "step": 1584 }, { "epoch": 0.1470997679814385, "grad_norm": 37.13945770263672, "learning_rate": 9.745031963181085e-06, "loss": 25.3858, "step": 1585 }, { "epoch": 0.14719257540603248, "grad_norm": 41.880638122558594, "learning_rate": 9.744547952809151e-06, "loss": 25.0684, "step": 1586 }, { "epoch": 0.14728538283062645, "grad_norm": 38.065799713134766, "learning_rate": 9.744063495512854e-06, "loss": 25.5514, "step": 1587 }, { "epoch": 0.14737819025522042, "grad_norm": 38.4324951171875, "learning_rate": 9.743578591337833e-06, "loss": 24.7969, "step": 1588 }, { "epoch": 0.1474709976798144, "grad_norm": 38.54454803466797, "learning_rate": 9.743093240329762e-06, "loss": 26.3353, "step": 1589 }, { "epoch": 0.14756380510440836, "grad_norm": 36.98836898803711, "learning_rate": 9.74260744253436e-06, "loss": 24.649, "step": 1590 }, { "epoch": 0.1476566125290023, "grad_norm": 45.013240814208984, "learning_rate": 9.742121197997387e-06, "loss": 23.6355, "step": 1591 }, { "epoch": 0.14774941995359628, "grad_norm": 38.010215759277344, "learning_rate": 9.741634506764648e-06, "loss": 26.5553, "step": 1592 }, { "epoch": 0.14784222737819025, "grad_norm": 36.10988235473633, "learning_rate": 9.741147368881987e-06, "loss": 24.0383, "step": 1593 }, { "epoch": 0.14793503480278422, "grad_norm": 40.94232940673828, "learning_rate": 9.74065978439529e-06, "loss": 27.1686, "step": 1594 }, { "epoch": 0.1480278422273782, "grad_norm": 39.58552932739258, "learning_rate": 9.74017175335049e-06, "loss": 25.7958, "step": 1595 }, { "epoch": 0.14812064965197216, "grad_norm": 39.675106048583984, "learning_rate": 9.739683275793554e-06, "loss": 25.1665, "step": 1596 }, { "epoch": 0.14821345707656614, "grad_norm": 37.37216567993164, "learning_rate": 9.739194351770498e-06, "loss": 24.5809, "step": 1597 }, { "epoch": 0.1483062645011601, "grad_norm": 33.47835159301758, "learning_rate": 9.738704981327376e-06, "loss": 22.7088, "step": 1598 }, { "epoch": 0.14839907192575405, "grad_norm": 38.13389205932617, "learning_rate": 9.738215164510285e-06, "loss": 24.6643, "step": 1599 }, { "epoch": 0.14849187935034802, "grad_norm": 43.4743766784668, "learning_rate": 9.737724901365368e-06, "loss": 24.6168, "step": 1600 }, { "epoch": 0.148584686774942, "grad_norm": 36.97322082519531, "learning_rate": 9.737234191938803e-06, "loss": 24.1555, "step": 1601 }, { "epoch": 0.14867749419953596, "grad_norm": 39.50067901611328, "learning_rate": 9.736743036276814e-06, "loss": 25.2903, "step": 1602 }, { "epoch": 0.14877030162412994, "grad_norm": 37.1323127746582, "learning_rate": 9.736251434425668e-06, "loss": 24.0897, "step": 1603 }, { "epoch": 0.1488631090487239, "grad_norm": 39.79093933105469, "learning_rate": 9.73575938643167e-06, "loss": 25.9406, "step": 1604 }, { "epoch": 0.14895591647331788, "grad_norm": 37.3690299987793, "learning_rate": 9.735266892341175e-06, "loss": 25.7611, "step": 1605 }, { "epoch": 0.14904872389791182, "grad_norm": 37.880035400390625, "learning_rate": 9.73477395220057e-06, "loss": 26.5077, "step": 1606 }, { "epoch": 0.1491415313225058, "grad_norm": 39.17082595825195, "learning_rate": 9.734280566056289e-06, "loss": 24.7054, "step": 1607 }, { "epoch": 0.14923433874709977, "grad_norm": 149.84393310546875, "learning_rate": 9.73378673395481e-06, "loss": 25.6457, "step": 1608 }, { "epoch": 0.14932714617169374, "grad_norm": 40.721229553222656, "learning_rate": 9.733292455942648e-06, "loss": 25.4675, "step": 1609 }, { "epoch": 0.1494199535962877, "grad_norm": 40.36100387573242, "learning_rate": 9.732797732066364e-06, "loss": 25.0454, "step": 1610 }, { "epoch": 0.14951276102088168, "grad_norm": 40.75986099243164, "learning_rate": 9.73230256237256e-06, "loss": 24.1292, "step": 1611 }, { "epoch": 0.14960556844547565, "grad_norm": 41.36423110961914, "learning_rate": 9.731806946907881e-06, "loss": 24.8306, "step": 1612 }, { "epoch": 0.1496983758700696, "grad_norm": 39.36598587036133, "learning_rate": 9.73131088571901e-06, "loss": 25.2164, "step": 1613 }, { "epoch": 0.14979118329466357, "grad_norm": 37.71989822387695, "learning_rate": 9.730814378852677e-06, "loss": 23.5242, "step": 1614 }, { "epoch": 0.14988399071925754, "grad_norm": 35.99518966674805, "learning_rate": 9.73031742635565e-06, "loss": 26.0813, "step": 1615 }, { "epoch": 0.1499767981438515, "grad_norm": 37.04517364501953, "learning_rate": 9.729820028274742e-06, "loss": 25.1456, "step": 1616 }, { "epoch": 0.15006960556844548, "grad_norm": 40.23942184448242, "learning_rate": 9.729322184656806e-06, "loss": 25.9054, "step": 1617 }, { "epoch": 0.15016241299303945, "grad_norm": 46.22035217285156, "learning_rate": 9.728823895548738e-06, "loss": 24.7874, "step": 1618 }, { "epoch": 0.15025522041763342, "grad_norm": 40.72393798828125, "learning_rate": 9.728325160997473e-06, "loss": 26.4332, "step": 1619 }, { "epoch": 0.15034802784222737, "grad_norm": 37.45405578613281, "learning_rate": 9.727825981049994e-06, "loss": 24.3868, "step": 1620 }, { "epoch": 0.15044083526682134, "grad_norm": 40.33057403564453, "learning_rate": 9.727326355753321e-06, "loss": 25.4371, "step": 1621 }, { "epoch": 0.1505336426914153, "grad_norm": 39.27434158325195, "learning_rate": 9.726826285154518e-06, "loss": 23.6228, "step": 1622 }, { "epoch": 0.15062645011600928, "grad_norm": 41.02465057373047, "learning_rate": 9.72632576930069e-06, "loss": 24.977, "step": 1623 }, { "epoch": 0.15071925754060325, "grad_norm": 40.193572998046875, "learning_rate": 9.725824808238986e-06, "loss": 24.4378, "step": 1624 }, { "epoch": 0.15081206496519722, "grad_norm": 86.17306518554688, "learning_rate": 9.725323402016592e-06, "loss": 26.7137, "step": 1625 }, { "epoch": 0.1509048723897912, "grad_norm": 51.959129333496094, "learning_rate": 9.72482155068074e-06, "loss": 27.0157, "step": 1626 }, { "epoch": 0.15099767981438514, "grad_norm": 39.297359466552734, "learning_rate": 9.724319254278704e-06, "loss": 23.8906, "step": 1627 }, { "epoch": 0.1510904872389791, "grad_norm": 35.5604362487793, "learning_rate": 9.7238165128578e-06, "loss": 23.0541, "step": 1628 }, { "epoch": 0.15118329466357308, "grad_norm": 39.24482727050781, "learning_rate": 9.723313326465384e-06, "loss": 25.5432, "step": 1629 }, { "epoch": 0.15127610208816705, "grad_norm": 41.48999786376953, "learning_rate": 9.722809695148853e-06, "loss": 26.9716, "step": 1630 }, { "epoch": 0.15136890951276102, "grad_norm": 45.453765869140625, "learning_rate": 9.722305618955652e-06, "loss": 24.1359, "step": 1631 }, { "epoch": 0.151461716937355, "grad_norm": 39.09902572631836, "learning_rate": 9.72180109793326e-06, "loss": 24.9194, "step": 1632 }, { "epoch": 0.15155452436194897, "grad_norm": 37.15317916870117, "learning_rate": 9.721296132129203e-06, "loss": 25.3742, "step": 1633 }, { "epoch": 0.1516473317865429, "grad_norm": 38.392642974853516, "learning_rate": 9.720790721591047e-06, "loss": 24.4706, "step": 1634 }, { "epoch": 0.15174013921113688, "grad_norm": 60.29711151123047, "learning_rate": 9.720284866366402e-06, "loss": 26.4205, "step": 1635 }, { "epoch": 0.15183294663573085, "grad_norm": 39.877193450927734, "learning_rate": 9.719778566502916e-06, "loss": 27.539, "step": 1636 }, { "epoch": 0.15192575406032482, "grad_norm": 38.91851806640625, "learning_rate": 9.719271822048282e-06, "loss": 26.3108, "step": 1637 }, { "epoch": 0.1520185614849188, "grad_norm": 36.891624450683594, "learning_rate": 9.718764633050235e-06, "loss": 23.1564, "step": 1638 }, { "epoch": 0.15211136890951277, "grad_norm": 36.59864807128906, "learning_rate": 9.71825699955655e-06, "loss": 24.0302, "step": 1639 }, { "epoch": 0.15220417633410674, "grad_norm": 36.509281158447266, "learning_rate": 9.717748921615045e-06, "loss": 26.1113, "step": 1640 }, { "epoch": 0.15229698375870068, "grad_norm": 38.394866943359375, "learning_rate": 9.71724039927358e-06, "loss": 25.398, "step": 1641 }, { "epoch": 0.15238979118329465, "grad_norm": 37.273799896240234, "learning_rate": 9.716731432580053e-06, "loss": 24.4534, "step": 1642 }, { "epoch": 0.15248259860788863, "grad_norm": 40.59685516357422, "learning_rate": 9.716222021582413e-06, "loss": 24.9746, "step": 1643 }, { "epoch": 0.1525754060324826, "grad_norm": 39.32670211791992, "learning_rate": 9.715712166328643e-06, "loss": 24.423, "step": 1644 }, { "epoch": 0.15266821345707657, "grad_norm": 36.879791259765625, "learning_rate": 9.71520186686677e-06, "loss": 25.2367, "step": 1645 }, { "epoch": 0.15276102088167054, "grad_norm": 36.501564025878906, "learning_rate": 9.714691123244862e-06, "loss": 23.5246, "step": 1646 }, { "epoch": 0.1528538283062645, "grad_norm": 34.86224365234375, "learning_rate": 9.71417993551103e-06, "loss": 24.3625, "step": 1647 }, { "epoch": 0.15294663573085848, "grad_norm": 36.30401611328125, "learning_rate": 9.713668303713428e-06, "loss": 24.2756, "step": 1648 }, { "epoch": 0.15303944315545243, "grad_norm": 38.409828186035156, "learning_rate": 9.713156227900249e-06, "loss": 24.7356, "step": 1649 }, { "epoch": 0.1531322505800464, "grad_norm": 40.040061950683594, "learning_rate": 9.712643708119729e-06, "loss": 24.7287, "step": 1650 }, { "epoch": 0.15322505800464037, "grad_norm": 38.760433197021484, "learning_rate": 9.712130744420147e-06, "loss": 25.4108, "step": 1651 }, { "epoch": 0.15331786542923434, "grad_norm": 39.17263412475586, "learning_rate": 9.711617336849823e-06, "loss": 26.5288, "step": 1652 }, { "epoch": 0.1534106728538283, "grad_norm": 38.53864288330078, "learning_rate": 9.711103485457119e-06, "loss": 24.4804, "step": 1653 }, { "epoch": 0.15350348027842228, "grad_norm": 40.132591247558594, "learning_rate": 9.710589190290436e-06, "loss": 25.2847, "step": 1654 }, { "epoch": 0.15359628770301625, "grad_norm": 36.66795349121094, "learning_rate": 9.710074451398221e-06, "loss": 25.9569, "step": 1655 }, { "epoch": 0.1536890951276102, "grad_norm": 41.203086853027344, "learning_rate": 9.709559268828963e-06, "loss": 24.31, "step": 1656 }, { "epoch": 0.15378190255220417, "grad_norm": 35.35858154296875, "learning_rate": 9.709043642631189e-06, "loss": 24.4438, "step": 1657 }, { "epoch": 0.15387470997679814, "grad_norm": 39.226783752441406, "learning_rate": 9.708527572853469e-06, "loss": 26.5368, "step": 1658 }, { "epoch": 0.1539675174013921, "grad_norm": 38.023006439208984, "learning_rate": 9.708011059544415e-06, "loss": 22.9609, "step": 1659 }, { "epoch": 0.15406032482598608, "grad_norm": 35.743961334228516, "learning_rate": 9.707494102752684e-06, "loss": 24.6002, "step": 1660 }, { "epoch": 0.15415313225058005, "grad_norm": 44.017818450927734, "learning_rate": 9.706976702526971e-06, "loss": 26.0224, "step": 1661 }, { "epoch": 0.15424593967517403, "grad_norm": 38.534305572509766, "learning_rate": 9.706458858916013e-06, "loss": 25.0302, "step": 1662 }, { "epoch": 0.15433874709976797, "grad_norm": 39.18679428100586, "learning_rate": 9.705940571968589e-06, "loss": 26.9294, "step": 1663 }, { "epoch": 0.15443155452436194, "grad_norm": 38.25274658203125, "learning_rate": 9.70542184173352e-06, "loss": 23.8115, "step": 1664 }, { "epoch": 0.1545243619489559, "grad_norm": 100.43157196044922, "learning_rate": 9.704902668259672e-06, "loss": 25.9102, "step": 1665 }, { "epoch": 0.15461716937354988, "grad_norm": 38.93305587768555, "learning_rate": 9.704383051595946e-06, "loss": 22.4903, "step": 1666 }, { "epoch": 0.15470997679814386, "grad_norm": 37.51681137084961, "learning_rate": 9.703862991791293e-06, "loss": 24.5253, "step": 1667 }, { "epoch": 0.15480278422273783, "grad_norm": 37.19426727294922, "learning_rate": 9.703342488894699e-06, "loss": 24.0772, "step": 1668 }, { "epoch": 0.1548955916473318, "grad_norm": 35.8182373046875, "learning_rate": 9.702821542955192e-06, "loss": 23.665, "step": 1669 }, { "epoch": 0.15498839907192574, "grad_norm": 38.77261734008789, "learning_rate": 9.702300154021847e-06, "loss": 23.5591, "step": 1670 }, { "epoch": 0.1550812064965197, "grad_norm": 43.02525329589844, "learning_rate": 9.701778322143776e-06, "loss": 26.1847, "step": 1671 }, { "epoch": 0.15517401392111368, "grad_norm": 43.710365295410156, "learning_rate": 9.701256047370134e-06, "loss": 25.0866, "step": 1672 }, { "epoch": 0.15526682134570766, "grad_norm": 39.02181625366211, "learning_rate": 9.70073332975012e-06, "loss": 25.1323, "step": 1673 }, { "epoch": 0.15535962877030163, "grad_norm": 40.228355407714844, "learning_rate": 9.700210169332968e-06, "loss": 26.6069, "step": 1674 }, { "epoch": 0.1554524361948956, "grad_norm": 38.09223937988281, "learning_rate": 9.699686566167965e-06, "loss": 25.3197, "step": 1675 }, { "epoch": 0.15554524361948957, "grad_norm": 40.35850524902344, "learning_rate": 9.699162520304428e-06, "loss": 24.8115, "step": 1676 }, { "epoch": 0.15563805104408351, "grad_norm": 43.70969009399414, "learning_rate": 9.698638031791721e-06, "loss": 25.0696, "step": 1677 }, { "epoch": 0.15573085846867749, "grad_norm": 39.19758605957031, "learning_rate": 9.698113100679253e-06, "loss": 23.8247, "step": 1678 }, { "epoch": 0.15582366589327146, "grad_norm": 38.74116897583008, "learning_rate": 9.697587727016469e-06, "loss": 25.252, "step": 1679 }, { "epoch": 0.15591647331786543, "grad_norm": 61.3022346496582, "learning_rate": 9.697061910852857e-06, "loss": 23.6737, "step": 1680 }, { "epoch": 0.1560092807424594, "grad_norm": 45.19304656982422, "learning_rate": 9.696535652237948e-06, "loss": 26.0507, "step": 1681 }, { "epoch": 0.15610208816705337, "grad_norm": 39.2211799621582, "learning_rate": 9.696008951221315e-06, "loss": 27.3447, "step": 1682 }, { "epoch": 0.15619489559164734, "grad_norm": 38.000732421875, "learning_rate": 9.695481807852571e-06, "loss": 24.1206, "step": 1683 }, { "epoch": 0.1562877030162413, "grad_norm": 38.21651840209961, "learning_rate": 9.694954222181374e-06, "loss": 25.676, "step": 1684 }, { "epoch": 0.15638051044083526, "grad_norm": 39.4774055480957, "learning_rate": 9.694426194257417e-06, "loss": 26.2956, "step": 1685 }, { "epoch": 0.15647331786542923, "grad_norm": 36.45391845703125, "learning_rate": 9.693897724130442e-06, "loss": 24.548, "step": 1686 }, { "epoch": 0.1565661252900232, "grad_norm": 37.29908752441406, "learning_rate": 9.69336881185023e-06, "loss": 26.3757, "step": 1687 }, { "epoch": 0.15665893271461717, "grad_norm": 34.85911560058594, "learning_rate": 9.692839457466603e-06, "loss": 25.818, "step": 1688 }, { "epoch": 0.15675174013921114, "grad_norm": 40.398521423339844, "learning_rate": 9.692309661029421e-06, "loss": 26.2485, "step": 1689 }, { "epoch": 0.15684454756380511, "grad_norm": 40.82127380371094, "learning_rate": 9.691779422588595e-06, "loss": 25.1465, "step": 1690 }, { "epoch": 0.15693735498839906, "grad_norm": 40.2639045715332, "learning_rate": 9.691248742194066e-06, "loss": 24.9968, "step": 1691 }, { "epoch": 0.15703016241299303, "grad_norm": 37.627105712890625, "learning_rate": 9.690717619895828e-06, "loss": 25.3746, "step": 1692 }, { "epoch": 0.157122969837587, "grad_norm": 43.013858795166016, "learning_rate": 9.690186055743908e-06, "loss": 23.7044, "step": 1693 }, { "epoch": 0.15721577726218097, "grad_norm": 36.391029357910156, "learning_rate": 9.689654049788382e-06, "loss": 23.6286, "step": 1694 }, { "epoch": 0.15730858468677494, "grad_norm": 155.3097381591797, "learning_rate": 9.689121602079361e-06, "loss": 27.1716, "step": 1695 }, { "epoch": 0.15740139211136892, "grad_norm": 50.65730285644531, "learning_rate": 9.688588712666999e-06, "loss": 25.3715, "step": 1696 }, { "epoch": 0.1574941995359629, "grad_norm": 43.266693115234375, "learning_rate": 9.688055381601494e-06, "loss": 25.7319, "step": 1697 }, { "epoch": 0.15758700696055686, "grad_norm": 44.38916778564453, "learning_rate": 9.687521608933086e-06, "loss": 25.2994, "step": 1698 }, { "epoch": 0.1576798143851508, "grad_norm": 36.84001922607422, "learning_rate": 9.686987394712053e-06, "loss": 24.3128, "step": 1699 }, { "epoch": 0.15777262180974477, "grad_norm": 37.86248016357422, "learning_rate": 9.686452738988716e-06, "loss": 27.7233, "step": 1700 }, { "epoch": 0.15786542923433874, "grad_norm": 40.92667007446289, "learning_rate": 9.68591764181344e-06, "loss": 25.7301, "step": 1701 }, { "epoch": 0.15795823665893272, "grad_norm": 37.10387420654297, "learning_rate": 9.685382103236631e-06, "loss": 24.2725, "step": 1702 }, { "epoch": 0.1580510440835267, "grad_norm": 41.006202697753906, "learning_rate": 9.684846123308732e-06, "loss": 24.5546, "step": 1703 }, { "epoch": 0.15814385150812066, "grad_norm": 36.02056121826172, "learning_rate": 9.684309702080234e-06, "loss": 24.8637, "step": 1704 }, { "epoch": 0.15823665893271463, "grad_norm": 44.62125015258789, "learning_rate": 9.683772839601663e-06, "loss": 23.0117, "step": 1705 }, { "epoch": 0.15832946635730857, "grad_norm": 36.43586349487305, "learning_rate": 9.683235535923593e-06, "loss": 25.6251, "step": 1706 }, { "epoch": 0.15842227378190255, "grad_norm": 35.889522552490234, "learning_rate": 9.682697791096636e-06, "loss": 26.8308, "step": 1707 }, { "epoch": 0.15851508120649652, "grad_norm": 39.646610260009766, "learning_rate": 9.682159605171446e-06, "loss": 24.7116, "step": 1708 }, { "epoch": 0.1586078886310905, "grad_norm": 39.408992767333984, "learning_rate": 9.68162097819872e-06, "loss": 24.7436, "step": 1709 }, { "epoch": 0.15870069605568446, "grad_norm": 39.77057647705078, "learning_rate": 9.681081910229194e-06, "loss": 24.7429, "step": 1710 }, { "epoch": 0.15879350348027843, "grad_norm": 44.923160552978516, "learning_rate": 9.680542401313644e-06, "loss": 25.7193, "step": 1711 }, { "epoch": 0.1588863109048724, "grad_norm": 37.45378494262695, "learning_rate": 9.680002451502895e-06, "loss": 24.6972, "step": 1712 }, { "epoch": 0.15897911832946635, "grad_norm": 37.239532470703125, "learning_rate": 9.679462060847808e-06, "loss": 24.1751, "step": 1713 }, { "epoch": 0.15907192575406032, "grad_norm": 38.25157928466797, "learning_rate": 9.678921229399286e-06, "loss": 26.4518, "step": 1714 }, { "epoch": 0.1591647331786543, "grad_norm": 40.10606384277344, "learning_rate": 9.67837995720827e-06, "loss": 24.6094, "step": 1715 }, { "epoch": 0.15925754060324826, "grad_norm": 66.003662109375, "learning_rate": 9.677838244325754e-06, "loss": 24.6994, "step": 1716 }, { "epoch": 0.15935034802784223, "grad_norm": 40.21587371826172, "learning_rate": 9.67729609080276e-06, "loss": 24.7219, "step": 1717 }, { "epoch": 0.1594431554524362, "grad_norm": 38.22514724731445, "learning_rate": 9.676753496690359e-06, "loss": 23.4606, "step": 1718 }, { "epoch": 0.15953596287703017, "grad_norm": 38.41676330566406, "learning_rate": 9.676210462039664e-06, "loss": 25.7355, "step": 1719 }, { "epoch": 0.15962877030162412, "grad_norm": 39.34728240966797, "learning_rate": 9.675666986901826e-06, "loss": 25.2264, "step": 1720 }, { "epoch": 0.1597215777262181, "grad_norm": 40.36542892456055, "learning_rate": 9.675123071328038e-06, "loss": 26.9934, "step": 1721 }, { "epoch": 0.15981438515081206, "grad_norm": 37.867984771728516, "learning_rate": 9.674578715369536e-06, "loss": 25.8882, "step": 1722 }, { "epoch": 0.15990719257540603, "grad_norm": 39.33311462402344, "learning_rate": 9.674033919077597e-06, "loss": 24.4679, "step": 1723 }, { "epoch": 0.16, "grad_norm": 36.075340270996094, "learning_rate": 9.673488682503542e-06, "loss": 25.0374, "step": 1724 }, { "epoch": 0.16009280742459397, "grad_norm": 41.27389144897461, "learning_rate": 9.672943005698725e-06, "loss": 26.1879, "step": 1725 }, { "epoch": 0.16018561484918795, "grad_norm": 35.84650421142578, "learning_rate": 9.672396888714554e-06, "loss": 25.298, "step": 1726 }, { "epoch": 0.1602784222737819, "grad_norm": 38.88680648803711, "learning_rate": 9.671850331602468e-06, "loss": 24.9009, "step": 1727 }, { "epoch": 0.16037122969837586, "grad_norm": 37.157493591308594, "learning_rate": 9.671303334413952e-06, "loss": 24.9085, "step": 1728 }, { "epoch": 0.16046403712296983, "grad_norm": 50.78688430786133, "learning_rate": 9.670755897200533e-06, "loss": 26.3815, "step": 1729 }, { "epoch": 0.1605568445475638, "grad_norm": 38.9389762878418, "learning_rate": 9.670208020013775e-06, "loss": 23.5029, "step": 1730 }, { "epoch": 0.16064965197215778, "grad_norm": 39.722110748291016, "learning_rate": 9.669659702905292e-06, "loss": 23.5367, "step": 1731 }, { "epoch": 0.16074245939675175, "grad_norm": 37.6657600402832, "learning_rate": 9.669110945926727e-06, "loss": 24.6693, "step": 1732 }, { "epoch": 0.16083526682134572, "grad_norm": 37.99954605102539, "learning_rate": 9.668561749129777e-06, "loss": 24.4645, "step": 1733 }, { "epoch": 0.16092807424593966, "grad_norm": 450.06439208984375, "learning_rate": 9.668012112566175e-06, "loss": 23.9729, "step": 1734 }, { "epoch": 0.16102088167053363, "grad_norm": 35.611297607421875, "learning_rate": 9.66746203628769e-06, "loss": 23.3494, "step": 1735 }, { "epoch": 0.1611136890951276, "grad_norm": 36.59012222290039, "learning_rate": 9.666911520346143e-06, "loss": 24.1678, "step": 1736 }, { "epoch": 0.16120649651972158, "grad_norm": 38.390628814697266, "learning_rate": 9.66636056479339e-06, "loss": 24.8141, "step": 1737 }, { "epoch": 0.16129930394431555, "grad_norm": 38.502044677734375, "learning_rate": 9.665809169681329e-06, "loss": 23.8737, "step": 1738 }, { "epoch": 0.16139211136890952, "grad_norm": 38.9051399230957, "learning_rate": 9.6652573350619e-06, "loss": 26.0594, "step": 1739 }, { "epoch": 0.1614849187935035, "grad_norm": 35.86332321166992, "learning_rate": 9.664705060987085e-06, "loss": 26.1384, "step": 1740 }, { "epoch": 0.16157772621809746, "grad_norm": 40.6722297668457, "learning_rate": 9.664152347508906e-06, "loss": 24.2771, "step": 1741 }, { "epoch": 0.1616705336426914, "grad_norm": 37.882179260253906, "learning_rate": 9.66359919467943e-06, "loss": 24.7533, "step": 1742 }, { "epoch": 0.16176334106728538, "grad_norm": 44.465415954589844, "learning_rate": 9.663045602550758e-06, "loss": 25.7519, "step": 1743 }, { "epoch": 0.16185614849187935, "grad_norm": 43.347408294677734, "learning_rate": 9.66249157117504e-06, "loss": 24.5893, "step": 1744 }, { "epoch": 0.16194895591647332, "grad_norm": 36.217529296875, "learning_rate": 9.661937100604462e-06, "loss": 23.7101, "step": 1745 }, { "epoch": 0.1620417633410673, "grad_norm": 37.31309127807617, "learning_rate": 9.661382190891256e-06, "loss": 24.128, "step": 1746 }, { "epoch": 0.16213457076566126, "grad_norm": 41.72209167480469, "learning_rate": 9.660826842087695e-06, "loss": 25.7346, "step": 1747 }, { "epoch": 0.16222737819025523, "grad_norm": 36.880645751953125, "learning_rate": 9.660271054246086e-06, "loss": 25.3311, "step": 1748 }, { "epoch": 0.16232018561484918, "grad_norm": 55.4532356262207, "learning_rate": 9.659714827418786e-06, "loss": 26.0042, "step": 1749 }, { "epoch": 0.16241299303944315, "grad_norm": 41.05231857299805, "learning_rate": 9.65915816165819e-06, "loss": 24.3147, "step": 1750 }, { "epoch": 0.16250580046403712, "grad_norm": 37.26533126831055, "learning_rate": 9.658601057016734e-06, "loss": 24.76, "step": 1751 }, { "epoch": 0.1625986078886311, "grad_norm": 210.4071044921875, "learning_rate": 9.658043513546898e-06, "loss": 27.1319, "step": 1752 }, { "epoch": 0.16269141531322506, "grad_norm": 40.503387451171875, "learning_rate": 9.657485531301197e-06, "loss": 25.6666, "step": 1753 }, { "epoch": 0.16278422273781903, "grad_norm": 37.58750534057617, "learning_rate": 9.656927110332195e-06, "loss": 24.5477, "step": 1754 }, { "epoch": 0.162877030162413, "grad_norm": 37.31981658935547, "learning_rate": 9.656368250692491e-06, "loss": 24.3911, "step": 1755 }, { "epoch": 0.16296983758700695, "grad_norm": 35.85762405395508, "learning_rate": 9.655808952434732e-06, "loss": 23.2873, "step": 1756 }, { "epoch": 0.16306264501160092, "grad_norm": 37.136898040771484, "learning_rate": 9.655249215611599e-06, "loss": 26.4198, "step": 1757 }, { "epoch": 0.1631554524361949, "grad_norm": 43.080909729003906, "learning_rate": 9.65468904027582e-06, "loss": 23.6912, "step": 1758 }, { "epoch": 0.16324825986078886, "grad_norm": 41.211334228515625, "learning_rate": 9.65412842648016e-06, "loss": 25.4178, "step": 1759 }, { "epoch": 0.16334106728538283, "grad_norm": 39.706661224365234, "learning_rate": 9.65356737427743e-06, "loss": 24.5997, "step": 1760 }, { "epoch": 0.1634338747099768, "grad_norm": 47.560462951660156, "learning_rate": 9.653005883720478e-06, "loss": 26.162, "step": 1761 }, { "epoch": 0.16352668213457078, "grad_norm": 69.18511199951172, "learning_rate": 9.652443954862193e-06, "loss": 23.9862, "step": 1762 }, { "epoch": 0.16361948955916472, "grad_norm": 39.205726623535156, "learning_rate": 9.651881587755513e-06, "loss": 24.8693, "step": 1763 }, { "epoch": 0.1637122969837587, "grad_norm": 38.18431091308594, "learning_rate": 9.651318782453407e-06, "loss": 24.7479, "step": 1764 }, { "epoch": 0.16380510440835266, "grad_norm": 36.24689865112305, "learning_rate": 9.650755539008889e-06, "loss": 25.5708, "step": 1765 }, { "epoch": 0.16389791183294664, "grad_norm": 38.127140045166016, "learning_rate": 9.65019185747502e-06, "loss": 26.1414, "step": 1766 }, { "epoch": 0.1639907192575406, "grad_norm": 42.2486572265625, "learning_rate": 9.649627737904892e-06, "loss": 25.2316, "step": 1767 }, { "epoch": 0.16408352668213458, "grad_norm": 38.9820671081543, "learning_rate": 9.649063180351647e-06, "loss": 24.6718, "step": 1768 }, { "epoch": 0.16417633410672855, "grad_norm": 58.25921630859375, "learning_rate": 9.648498184868461e-06, "loss": 24.0053, "step": 1769 }, { "epoch": 0.1642691415313225, "grad_norm": 36.553321838378906, "learning_rate": 9.647932751508561e-06, "loss": 26.1891, "step": 1770 }, { "epoch": 0.16436194895591646, "grad_norm": 47.253334045410156, "learning_rate": 9.647366880325207e-06, "loss": 26.9623, "step": 1771 }, { "epoch": 0.16445475638051044, "grad_norm": 38.387386322021484, "learning_rate": 9.646800571371701e-06, "loss": 24.439, "step": 1772 }, { "epoch": 0.1645475638051044, "grad_norm": 33.93571472167969, "learning_rate": 9.646233824701389e-06, "loss": 24.0325, "step": 1773 }, { "epoch": 0.16464037122969838, "grad_norm": 37.248992919921875, "learning_rate": 9.645666640367657e-06, "loss": 25.9215, "step": 1774 }, { "epoch": 0.16473317865429235, "grad_norm": 35.33894348144531, "learning_rate": 9.645099018423933e-06, "loss": 26.1632, "step": 1775 }, { "epoch": 0.16482598607888632, "grad_norm": 36.13267517089844, "learning_rate": 9.644530958923683e-06, "loss": 25.1143, "step": 1776 }, { "epoch": 0.16491879350348027, "grad_norm": 39.718505859375, "learning_rate": 9.64396246192042e-06, "loss": 26.1982, "step": 1777 }, { "epoch": 0.16501160092807424, "grad_norm": 49.36561584472656, "learning_rate": 9.643393527467693e-06, "loss": 26.4127, "step": 1778 }, { "epoch": 0.1651044083526682, "grad_norm": 40.120174407958984, "learning_rate": 9.642824155619095e-06, "loss": 27.6991, "step": 1779 }, { "epoch": 0.16519721577726218, "grad_norm": 39.81481170654297, "learning_rate": 9.64225434642826e-06, "loss": 23.7418, "step": 1780 }, { "epoch": 0.16529002320185615, "grad_norm": 36.38108444213867, "learning_rate": 9.64168409994886e-06, "loss": 23.8676, "step": 1781 }, { "epoch": 0.16538283062645012, "grad_norm": 43.92228317260742, "learning_rate": 9.641113416234615e-06, "loss": 24.647, "step": 1782 }, { "epoch": 0.1654756380510441, "grad_norm": 38.1786994934082, "learning_rate": 9.640542295339277e-06, "loss": 23.4427, "step": 1783 }, { "epoch": 0.16556844547563804, "grad_norm": 36.64710235595703, "learning_rate": 9.639970737316648e-06, "loss": 24.7021, "step": 1784 }, { "epoch": 0.165661252900232, "grad_norm": 38.23373031616211, "learning_rate": 9.639398742220564e-06, "loss": 25.7747, "step": 1785 }, { "epoch": 0.16575406032482598, "grad_norm": 37.165283203125, "learning_rate": 9.638826310104909e-06, "loss": 25.2233, "step": 1786 }, { "epoch": 0.16584686774941995, "grad_norm": 36.891639709472656, "learning_rate": 9.638253441023603e-06, "loss": 25.3505, "step": 1787 }, { "epoch": 0.16593967517401392, "grad_norm": 39.581459045410156, "learning_rate": 9.637680135030609e-06, "loss": 25.9411, "step": 1788 }, { "epoch": 0.1660324825986079, "grad_norm": 36.217628479003906, "learning_rate": 9.637106392179932e-06, "loss": 23.2653, "step": 1789 }, { "epoch": 0.16612529002320187, "grad_norm": 39.179161071777344, "learning_rate": 9.636532212525614e-06, "loss": 25.4999, "step": 1790 }, { "epoch": 0.16621809744779584, "grad_norm": 37.416568756103516, "learning_rate": 9.635957596121744e-06, "loss": 23.7046, "step": 1791 }, { "epoch": 0.16631090487238978, "grad_norm": 38.73288345336914, "learning_rate": 9.635382543022446e-06, "loss": 24.096, "step": 1792 }, { "epoch": 0.16640371229698375, "grad_norm": 38.331626892089844, "learning_rate": 9.634807053281895e-06, "loss": 25.3635, "step": 1793 }, { "epoch": 0.16649651972157772, "grad_norm": 36.241825103759766, "learning_rate": 9.634231126954296e-06, "loss": 23.0651, "step": 1794 }, { "epoch": 0.1665893271461717, "grad_norm": 35.67493438720703, "learning_rate": 9.6336547640939e-06, "loss": 24.0035, "step": 1795 }, { "epoch": 0.16668213457076567, "grad_norm": 38.318782806396484, "learning_rate": 9.633077964754999e-06, "loss": 23.7787, "step": 1796 }, { "epoch": 0.16677494199535964, "grad_norm": 39.57734298706055, "learning_rate": 9.632500728991926e-06, "loss": 24.2412, "step": 1797 }, { "epoch": 0.1668677494199536, "grad_norm": 51.604129791259766, "learning_rate": 9.631923056859058e-06, "loss": 24.6336, "step": 1798 }, { "epoch": 0.16696055684454755, "grad_norm": 37.860565185546875, "learning_rate": 9.631344948410806e-06, "loss": 24.122, "step": 1799 }, { "epoch": 0.16705336426914152, "grad_norm": 36.000885009765625, "learning_rate": 9.63076640370163e-06, "loss": 23.1434, "step": 1800 }, { "epoch": 0.1671461716937355, "grad_norm": 64.8515853881836, "learning_rate": 9.630187422786024e-06, "loss": 28.2516, "step": 1801 }, { "epoch": 0.16723897911832947, "grad_norm": 37.99462127685547, "learning_rate": 9.62960800571853e-06, "loss": 25.3005, "step": 1802 }, { "epoch": 0.16733178654292344, "grad_norm": 43.64814376831055, "learning_rate": 9.629028152553724e-06, "loss": 24.9294, "step": 1803 }, { "epoch": 0.1674245939675174, "grad_norm": 38.19097900390625, "learning_rate": 9.62844786334623e-06, "loss": 25.2377, "step": 1804 }, { "epoch": 0.16751740139211138, "grad_norm": 42.786251068115234, "learning_rate": 9.627867138150709e-06, "loss": 23.7578, "step": 1805 }, { "epoch": 0.16761020881670533, "grad_norm": 35.51882553100586, "learning_rate": 9.627285977021861e-06, "loss": 23.7114, "step": 1806 }, { "epoch": 0.1677030162412993, "grad_norm": 39.55128860473633, "learning_rate": 9.626704380014433e-06, "loss": 24.8069, "step": 1807 }, { "epoch": 0.16779582366589327, "grad_norm": 35.5503044128418, "learning_rate": 9.626122347183209e-06, "loss": 23.6787, "step": 1808 }, { "epoch": 0.16788863109048724, "grad_norm": 34.337345123291016, "learning_rate": 9.625539878583015e-06, "loss": 24.0606, "step": 1809 }, { "epoch": 0.1679814385150812, "grad_norm": 37.96505355834961, "learning_rate": 9.62495697426872e-06, "loss": 24.0171, "step": 1810 }, { "epoch": 0.16807424593967518, "grad_norm": 35.60015869140625, "learning_rate": 9.624373634295227e-06, "loss": 23.522, "step": 1811 }, { "epoch": 0.16816705336426915, "grad_norm": 42.4754638671875, "learning_rate": 9.623789858717491e-06, "loss": 25.3623, "step": 1812 }, { "epoch": 0.1682598607888631, "grad_norm": 37.65181350708008, "learning_rate": 9.623205647590498e-06, "loss": 26.1049, "step": 1813 }, { "epoch": 0.16835266821345707, "grad_norm": 43.70513153076172, "learning_rate": 9.622621000969283e-06, "loss": 25.4058, "step": 1814 }, { "epoch": 0.16844547563805104, "grad_norm": 42.546722412109375, "learning_rate": 9.622035918908913e-06, "loss": 24.4623, "step": 1815 }, { "epoch": 0.168538283062645, "grad_norm": 49.039894104003906, "learning_rate": 9.621450401464505e-06, "loss": 23.8569, "step": 1816 }, { "epoch": 0.16863109048723898, "grad_norm": 36.43260192871094, "learning_rate": 9.620864448691214e-06, "loss": 24.6262, "step": 1817 }, { "epoch": 0.16872389791183295, "grad_norm": 55.34397506713867, "learning_rate": 9.620278060644232e-06, "loss": 24.1982, "step": 1818 }, { "epoch": 0.16881670533642693, "grad_norm": 41.06460952758789, "learning_rate": 9.619691237378799e-06, "loss": 25.4417, "step": 1819 }, { "epoch": 0.16890951276102087, "grad_norm": 36.17457962036133, "learning_rate": 9.61910397895019e-06, "loss": 24.4561, "step": 1820 }, { "epoch": 0.16900232018561484, "grad_norm": 35.492393493652344, "learning_rate": 9.618516285413721e-06, "loss": 24.2935, "step": 1821 }, { "epoch": 0.1690951276102088, "grad_norm": 37.62760925292969, "learning_rate": 9.617928156824757e-06, "loss": 23.8952, "step": 1822 }, { "epoch": 0.16918793503480278, "grad_norm": 38.74615478515625, "learning_rate": 9.617339593238693e-06, "loss": 25.6125, "step": 1823 }, { "epoch": 0.16928074245939675, "grad_norm": 39.226646423339844, "learning_rate": 9.616750594710972e-06, "loss": 24.6741, "step": 1824 }, { "epoch": 0.16937354988399073, "grad_norm": 46.524932861328125, "learning_rate": 9.616161161297077e-06, "loss": 26.195, "step": 1825 }, { "epoch": 0.1694663573085847, "grad_norm": 40.46265411376953, "learning_rate": 9.61557129305253e-06, "loss": 24.8861, "step": 1826 }, { "epoch": 0.16955916473317864, "grad_norm": 43.58018112182617, "learning_rate": 9.614980990032896e-06, "loss": 26.467, "step": 1827 }, { "epoch": 0.1696519721577726, "grad_norm": 40.335426330566406, "learning_rate": 9.614390252293781e-06, "loss": 25.0637, "step": 1828 }, { "epoch": 0.16974477958236658, "grad_norm": 37.569766998291016, "learning_rate": 9.613799079890828e-06, "loss": 23.5286, "step": 1829 }, { "epoch": 0.16983758700696056, "grad_norm": 38.75937271118164, "learning_rate": 9.613207472879725e-06, "loss": 24.9073, "step": 1830 }, { "epoch": 0.16993039443155453, "grad_norm": 38.578495025634766, "learning_rate": 9.612615431316201e-06, "loss": 25.6444, "step": 1831 }, { "epoch": 0.1700232018561485, "grad_norm": 38.125587463378906, "learning_rate": 9.612022955256023e-06, "loss": 24.4173, "step": 1832 }, { "epoch": 0.17011600928074247, "grad_norm": 38.971229553222656, "learning_rate": 9.611430044755006e-06, "loss": 23.7802, "step": 1833 }, { "epoch": 0.1702088167053364, "grad_norm": 36.484474182128906, "learning_rate": 9.610836699868992e-06, "loss": 24.9516, "step": 1834 }, { "epoch": 0.17030162412993038, "grad_norm": 37.88871383666992, "learning_rate": 9.610242920653881e-06, "loss": 23.5622, "step": 1835 }, { "epoch": 0.17039443155452436, "grad_norm": 39.14670944213867, "learning_rate": 9.6096487071656e-06, "loss": 26.5567, "step": 1836 }, { "epoch": 0.17048723897911833, "grad_norm": 40.37866973876953, "learning_rate": 9.609054059460124e-06, "loss": 24.787, "step": 1837 }, { "epoch": 0.1705800464037123, "grad_norm": 36.974876403808594, "learning_rate": 9.608458977593467e-06, "loss": 22.7203, "step": 1838 }, { "epoch": 0.17067285382830627, "grad_norm": 42.7182502746582, "learning_rate": 9.607863461621684e-06, "loss": 24.7615, "step": 1839 }, { "epoch": 0.17076566125290024, "grad_norm": 58.226417541503906, "learning_rate": 9.607267511600873e-06, "loss": 25.7166, "step": 1840 }, { "epoch": 0.1708584686774942, "grad_norm": 45.53582000732422, "learning_rate": 9.60667112758717e-06, "loss": 24.688, "step": 1841 }, { "epoch": 0.17095127610208816, "grad_norm": 41.420204162597656, "learning_rate": 9.606074309636751e-06, "loss": 24.8034, "step": 1842 }, { "epoch": 0.17104408352668213, "grad_norm": 36.602012634277344, "learning_rate": 9.605477057805838e-06, "loss": 23.1616, "step": 1843 }, { "epoch": 0.1711368909512761, "grad_norm": 36.405921936035156, "learning_rate": 9.604879372150689e-06, "loss": 23.4706, "step": 1844 }, { "epoch": 0.17122969837587007, "grad_norm": 39.359962463378906, "learning_rate": 9.604281252727601e-06, "loss": 23.4751, "step": 1845 }, { "epoch": 0.17132250580046404, "grad_norm": 36.672977447509766, "learning_rate": 9.603682699592922e-06, "loss": 24.9188, "step": 1846 }, { "epoch": 0.171415313225058, "grad_norm": 40.80332946777344, "learning_rate": 9.60308371280303e-06, "loss": 25.7605, "step": 1847 }, { "epoch": 0.17150812064965199, "grad_norm": 40.04555892944336, "learning_rate": 9.602484292414348e-06, "loss": 27.2036, "step": 1848 }, { "epoch": 0.17160092807424593, "grad_norm": 38.63703155517578, "learning_rate": 9.601884438483342e-06, "loss": 23.4776, "step": 1849 }, { "epoch": 0.1716937354988399, "grad_norm": 49.503929138183594, "learning_rate": 9.601284151066515e-06, "loss": 24.3007, "step": 1850 }, { "epoch": 0.17178654292343387, "grad_norm": 39.37955856323242, "learning_rate": 9.600683430220414e-06, "loss": 25.3752, "step": 1851 }, { "epoch": 0.17187935034802784, "grad_norm": 39.6995735168457, "learning_rate": 9.600082276001624e-06, "loss": 27.3516, "step": 1852 }, { "epoch": 0.17197215777262181, "grad_norm": 47.99911880493164, "learning_rate": 9.59948068846677e-06, "loss": 23.5019, "step": 1853 }, { "epoch": 0.17206496519721579, "grad_norm": 38.85637664794922, "learning_rate": 9.598878667672525e-06, "loss": 25.469, "step": 1854 }, { "epoch": 0.17215777262180976, "grad_norm": 38.664894104003906, "learning_rate": 9.598276213675596e-06, "loss": 24.5583, "step": 1855 }, { "epoch": 0.1722505800464037, "grad_norm": 34.23714828491211, "learning_rate": 9.59767332653273e-06, "loss": 24.7096, "step": 1856 }, { "epoch": 0.17234338747099767, "grad_norm": 35.782981872558594, "learning_rate": 9.597070006300722e-06, "loss": 22.322, "step": 1857 }, { "epoch": 0.17243619489559164, "grad_norm": 38.952762603759766, "learning_rate": 9.5964662530364e-06, "loss": 26.2568, "step": 1858 }, { "epoch": 0.17252900232018561, "grad_norm": 46.761085510253906, "learning_rate": 9.595862066796635e-06, "loss": 23.678, "step": 1859 }, { "epoch": 0.1726218097447796, "grad_norm": 45.351585388183594, "learning_rate": 9.595257447638344e-06, "loss": 23.8656, "step": 1860 }, { "epoch": 0.17271461716937356, "grad_norm": 37.97940444946289, "learning_rate": 9.594652395618476e-06, "loss": 25.1058, "step": 1861 }, { "epoch": 0.17280742459396753, "grad_norm": 37.95691680908203, "learning_rate": 9.594046910794029e-06, "loss": 23.8756, "step": 1862 }, { "epoch": 0.17290023201856147, "grad_norm": 46.6726188659668, "learning_rate": 9.593440993222036e-06, "loss": 25.6291, "step": 1863 }, { "epoch": 0.17299303944315544, "grad_norm": 35.10506820678711, "learning_rate": 9.592834642959575e-06, "loss": 23.9653, "step": 1864 }, { "epoch": 0.17308584686774942, "grad_norm": 36.25514221191406, "learning_rate": 9.59222786006376e-06, "loss": 23.5495, "step": 1865 }, { "epoch": 0.1731786542923434, "grad_norm": 41.58308792114258, "learning_rate": 9.59162064459175e-06, "loss": 27.2415, "step": 1866 }, { "epoch": 0.17327146171693736, "grad_norm": 42.58598709106445, "learning_rate": 9.591012996600742e-06, "loss": 25.4405, "step": 1867 }, { "epoch": 0.17336426914153133, "grad_norm": 40.019378662109375, "learning_rate": 9.590404916147976e-06, "loss": 25.3901, "step": 1868 }, { "epoch": 0.1734570765661253, "grad_norm": 35.52545928955078, "learning_rate": 9.589796403290731e-06, "loss": 23.4052, "step": 1869 }, { "epoch": 0.17354988399071924, "grad_norm": 47.55070877075195, "learning_rate": 9.589187458086328e-06, "loss": 25.0952, "step": 1870 }, { "epoch": 0.17364269141531322, "grad_norm": 39.81605911254883, "learning_rate": 9.588578080592129e-06, "loss": 24.7401, "step": 1871 }, { "epoch": 0.1737354988399072, "grad_norm": 37.72281265258789, "learning_rate": 9.587968270865534e-06, "loss": 24.2348, "step": 1872 }, { "epoch": 0.17382830626450116, "grad_norm": 39.00321578979492, "learning_rate": 9.587358028963984e-06, "loss": 24.8832, "step": 1873 }, { "epoch": 0.17392111368909513, "grad_norm": 39.60163116455078, "learning_rate": 9.586747354944968e-06, "loss": 25.7149, "step": 1874 }, { "epoch": 0.1740139211136891, "grad_norm": 37.18523025512695, "learning_rate": 9.586136248866003e-06, "loss": 26.4507, "step": 1875 }, { "epoch": 0.17410672853828307, "grad_norm": 45.679718017578125, "learning_rate": 9.58552471078466e-06, "loss": 24.7147, "step": 1876 }, { "epoch": 0.17419953596287702, "grad_norm": 37.71535873413086, "learning_rate": 9.584912740758539e-06, "loss": 24.1324, "step": 1877 }, { "epoch": 0.174292343387471, "grad_norm": 44.38905334472656, "learning_rate": 9.584300338845289e-06, "loss": 24.3221, "step": 1878 }, { "epoch": 0.17438515081206496, "grad_norm": 38.5311279296875, "learning_rate": 9.583687505102594e-06, "loss": 24.9309, "step": 1879 }, { "epoch": 0.17447795823665893, "grad_norm": 38.14777755737305, "learning_rate": 9.583074239588186e-06, "loss": 25.4426, "step": 1880 }, { "epoch": 0.1745707656612529, "grad_norm": 37.35372543334961, "learning_rate": 9.58246054235983e-06, "loss": 23.8353, "step": 1881 }, { "epoch": 0.17466357308584687, "grad_norm": 41.493408203125, "learning_rate": 9.581846413475335e-06, "loss": 23.6052, "step": 1882 }, { "epoch": 0.17475638051044085, "grad_norm": 43.820648193359375, "learning_rate": 9.58123185299255e-06, "loss": 24.7571, "step": 1883 }, { "epoch": 0.17484918793503482, "grad_norm": 44.12700653076172, "learning_rate": 9.580616860969365e-06, "loss": 23.4631, "step": 1884 }, { "epoch": 0.17494199535962876, "grad_norm": 37.72633361816406, "learning_rate": 9.580001437463712e-06, "loss": 25.9593, "step": 1885 }, { "epoch": 0.17503480278422273, "grad_norm": 36.519283294677734, "learning_rate": 9.579385582533563e-06, "loss": 26.1673, "step": 1886 }, { "epoch": 0.1751276102088167, "grad_norm": 37.23466110229492, "learning_rate": 9.578769296236928e-06, "loss": 26.2394, "step": 1887 }, { "epoch": 0.17522041763341067, "grad_norm": 38.12587356567383, "learning_rate": 9.578152578631857e-06, "loss": 25.5686, "step": 1888 }, { "epoch": 0.17531322505800465, "grad_norm": 35.64651870727539, "learning_rate": 9.57753542977645e-06, "loss": 25.1224, "step": 1889 }, { "epoch": 0.17540603248259862, "grad_norm": 39.72618103027344, "learning_rate": 9.576917849728836e-06, "loss": 25.4529, "step": 1890 }, { "epoch": 0.1754988399071926, "grad_norm": 37.211631774902344, "learning_rate": 9.576299838547189e-06, "loss": 22.56, "step": 1891 }, { "epoch": 0.17559164733178653, "grad_norm": 40.66078567504883, "learning_rate": 9.57568139628973e-06, "loss": 24.7232, "step": 1892 }, { "epoch": 0.1756844547563805, "grad_norm": 43.68046569824219, "learning_rate": 9.575062523014707e-06, "loss": 24.4579, "step": 1893 }, { "epoch": 0.17577726218097448, "grad_norm": 36.863189697265625, "learning_rate": 9.57444321878042e-06, "loss": 23.3714, "step": 1894 }, { "epoch": 0.17587006960556845, "grad_norm": 37.95170211791992, "learning_rate": 9.573823483645206e-06, "loss": 24.829, "step": 1895 }, { "epoch": 0.17596287703016242, "grad_norm": 38.26484298706055, "learning_rate": 9.573203317667442e-06, "loss": 25.5079, "step": 1896 }, { "epoch": 0.1760556844547564, "grad_norm": 37.598514556884766, "learning_rate": 9.572582720905548e-06, "loss": 24.7166, "step": 1897 }, { "epoch": 0.17614849187935036, "grad_norm": 37.169769287109375, "learning_rate": 9.571961693417978e-06, "loss": 25.1006, "step": 1898 }, { "epoch": 0.1762412993039443, "grad_norm": 39.90910720825195, "learning_rate": 9.571340235263236e-06, "loss": 23.6641, "step": 1899 }, { "epoch": 0.17633410672853828, "grad_norm": 42.237884521484375, "learning_rate": 9.57071834649986e-06, "loss": 25.4403, "step": 1900 }, { "epoch": 0.17642691415313225, "grad_norm": 41.43452453613281, "learning_rate": 9.570096027186428e-06, "loss": 26.5719, "step": 1901 }, { "epoch": 0.17651972157772622, "grad_norm": 39.12534713745117, "learning_rate": 9.569473277381565e-06, "loss": 25.184, "step": 1902 }, { "epoch": 0.1766125290023202, "grad_norm": 47.83718490600586, "learning_rate": 9.56885009714393e-06, "loss": 25.5496, "step": 1903 }, { "epoch": 0.17670533642691416, "grad_norm": 56.719608306884766, "learning_rate": 9.568226486532225e-06, "loss": 25.0703, "step": 1904 }, { "epoch": 0.17679814385150813, "grad_norm": 37.95629119873047, "learning_rate": 9.567602445605192e-06, "loss": 23.9727, "step": 1905 }, { "epoch": 0.17689095127610208, "grad_norm": 38.970664978027344, "learning_rate": 9.566977974421619e-06, "loss": 24.7993, "step": 1906 }, { "epoch": 0.17698375870069605, "grad_norm": 37.53853225708008, "learning_rate": 9.566353073040322e-06, "loss": 24.5403, "step": 1907 }, { "epoch": 0.17707656612529002, "grad_norm": 43.658546447753906, "learning_rate": 9.56572774152017e-06, "loss": 25.2796, "step": 1908 }, { "epoch": 0.177169373549884, "grad_norm": 38.12128829956055, "learning_rate": 9.565101979920067e-06, "loss": 22.6715, "step": 1909 }, { "epoch": 0.17726218097447796, "grad_norm": 39.31364440917969, "learning_rate": 9.564475788298957e-06, "loss": 24.1453, "step": 1910 }, { "epoch": 0.17735498839907193, "grad_norm": 46.89065933227539, "learning_rate": 9.563849166715825e-06, "loss": 24.7095, "step": 1911 }, { "epoch": 0.1774477958236659, "grad_norm": 38.668296813964844, "learning_rate": 9.563222115229702e-06, "loss": 25.2519, "step": 1912 }, { "epoch": 0.17754060324825985, "grad_norm": 41.03544616699219, "learning_rate": 9.562594633899648e-06, "loss": 24.2059, "step": 1913 }, { "epoch": 0.17763341067285382, "grad_norm": 35.15399932861328, "learning_rate": 9.561966722784774e-06, "loss": 21.7705, "step": 1914 }, { "epoch": 0.1777262180974478, "grad_norm": 37.83769226074219, "learning_rate": 9.561338381944227e-06, "loss": 24.8432, "step": 1915 }, { "epoch": 0.17781902552204176, "grad_norm": 36.8570556640625, "learning_rate": 9.560709611437195e-06, "loss": 23.1315, "step": 1916 }, { "epoch": 0.17791183294663573, "grad_norm": 41.261844635009766, "learning_rate": 9.560080411322908e-06, "loss": 24.3149, "step": 1917 }, { "epoch": 0.1780046403712297, "grad_norm": 42.70988464355469, "learning_rate": 9.559450781660632e-06, "loss": 26.5684, "step": 1918 }, { "epoch": 0.17809744779582368, "grad_norm": 38.5787467956543, "learning_rate": 9.558820722509679e-06, "loss": 26.3172, "step": 1919 }, { "epoch": 0.17819025522041762, "grad_norm": 37.242645263671875, "learning_rate": 9.558190233929396e-06, "loss": 23.4368, "step": 1920 }, { "epoch": 0.1782830626450116, "grad_norm": 36.672760009765625, "learning_rate": 9.557559315979177e-06, "loss": 25.1539, "step": 1921 }, { "epoch": 0.17837587006960556, "grad_norm": 39.02145767211914, "learning_rate": 9.556927968718453e-06, "loss": 23.5223, "step": 1922 }, { "epoch": 0.17846867749419953, "grad_norm": 35.66323471069336, "learning_rate": 9.556296192206691e-06, "loss": 23.5418, "step": 1923 }, { "epoch": 0.1785614849187935, "grad_norm": 40.011314392089844, "learning_rate": 9.555663986503408e-06, "loss": 24.273, "step": 1924 }, { "epoch": 0.17865429234338748, "grad_norm": 36.258060455322266, "learning_rate": 9.555031351668151e-06, "loss": 23.8846, "step": 1925 }, { "epoch": 0.17874709976798145, "grad_norm": 36.07109451293945, "learning_rate": 9.554398287760515e-06, "loss": 23.2119, "step": 1926 }, { "epoch": 0.1788399071925754, "grad_norm": 39.78252410888672, "learning_rate": 9.553764794840135e-06, "loss": 26.4378, "step": 1927 }, { "epoch": 0.17893271461716936, "grad_norm": 35.54758071899414, "learning_rate": 9.553130872966683e-06, "loss": 23.2075, "step": 1928 }, { "epoch": 0.17902552204176334, "grad_norm": 40.30841827392578, "learning_rate": 9.552496522199872e-06, "loss": 27.604, "step": 1929 }, { "epoch": 0.1791183294663573, "grad_norm": 34.70287322998047, "learning_rate": 9.551861742599456e-06, "loss": 23.8151, "step": 1930 }, { "epoch": 0.17921113689095128, "grad_norm": 86.85968780517578, "learning_rate": 9.55122653422523e-06, "loss": 24.6466, "step": 1931 }, { "epoch": 0.17930394431554525, "grad_norm": 43.32040023803711, "learning_rate": 9.55059089713703e-06, "loss": 23.8234, "step": 1932 }, { "epoch": 0.17939675174013922, "grad_norm": 39.61906433105469, "learning_rate": 9.549954831394733e-06, "loss": 24.2648, "step": 1933 }, { "epoch": 0.1794895591647332, "grad_norm": 35.53108596801758, "learning_rate": 9.549318337058248e-06, "loss": 24.7722, "step": 1934 }, { "epoch": 0.17958236658932714, "grad_norm": 38.115570068359375, "learning_rate": 9.54868141418754e-06, "loss": 25.9259, "step": 1935 }, { "epoch": 0.1796751740139211, "grad_norm": 38.28895568847656, "learning_rate": 9.548044062842599e-06, "loss": 25.0686, "step": 1936 }, { "epoch": 0.17976798143851508, "grad_norm": 39.05668258666992, "learning_rate": 9.547406283083462e-06, "loss": 26.3782, "step": 1937 }, { "epoch": 0.17986078886310905, "grad_norm": 35.77762222290039, "learning_rate": 9.546768074970213e-06, "loss": 24.2032, "step": 1938 }, { "epoch": 0.17995359628770302, "grad_norm": 35.86992645263672, "learning_rate": 9.546129438562961e-06, "loss": 23.546, "step": 1939 }, { "epoch": 0.180046403712297, "grad_norm": 38.995487213134766, "learning_rate": 9.54549037392187e-06, "loss": 26.772, "step": 1940 }, { "epoch": 0.18013921113689096, "grad_norm": 41.282920837402344, "learning_rate": 9.544850881107135e-06, "loss": 24.0453, "step": 1941 }, { "epoch": 0.1802320185614849, "grad_norm": 37.70598220825195, "learning_rate": 9.544210960178995e-06, "loss": 24.8406, "step": 1942 }, { "epoch": 0.18032482598607888, "grad_norm": 39.04556655883789, "learning_rate": 9.54357061119773e-06, "loss": 25.3031, "step": 1943 }, { "epoch": 0.18041763341067285, "grad_norm": 34.251773834228516, "learning_rate": 9.54292983422366e-06, "loss": 23.9919, "step": 1944 }, { "epoch": 0.18051044083526682, "grad_norm": 38.31025695800781, "learning_rate": 9.542288629317142e-06, "loss": 24.9035, "step": 1945 }, { "epoch": 0.1806032482598608, "grad_norm": 34.50477981567383, "learning_rate": 9.541646996538578e-06, "loss": 22.3258, "step": 1946 }, { "epoch": 0.18069605568445476, "grad_norm": 40.564823150634766, "learning_rate": 9.541004935948408e-06, "loss": 25.5443, "step": 1947 }, { "epoch": 0.18078886310904874, "grad_norm": 39.550968170166016, "learning_rate": 9.540362447607111e-06, "loss": 23.2732, "step": 1948 }, { "epoch": 0.18088167053364268, "grad_norm": 43.30647659301758, "learning_rate": 9.53971953157521e-06, "loss": 24.683, "step": 1949 }, { "epoch": 0.18097447795823665, "grad_norm": 38.521549224853516, "learning_rate": 9.539076187913262e-06, "loss": 25.17, "step": 1950 }, { "epoch": 0.18106728538283062, "grad_norm": 42.324398040771484, "learning_rate": 9.538432416681872e-06, "loss": 27.0421, "step": 1951 }, { "epoch": 0.1811600928074246, "grad_norm": 38.05841064453125, "learning_rate": 9.537788217941683e-06, "loss": 22.6884, "step": 1952 }, { "epoch": 0.18125290023201857, "grad_norm": 36.939979553222656, "learning_rate": 9.537143591753373e-06, "loss": 24.3141, "step": 1953 }, { "epoch": 0.18134570765661254, "grad_norm": 36.045589447021484, "learning_rate": 9.536498538177666e-06, "loss": 24.6797, "step": 1954 }, { "epoch": 0.1814385150812065, "grad_norm": 36.85438919067383, "learning_rate": 9.535853057275324e-06, "loss": 24.1841, "step": 1955 }, { "epoch": 0.18153132250580045, "grad_norm": 36.05744171142578, "learning_rate": 9.53520714910715e-06, "loss": 24.7839, "step": 1956 }, { "epoch": 0.18162412993039442, "grad_norm": 38.408809661865234, "learning_rate": 9.534560813733986e-06, "loss": 25.2396, "step": 1957 }, { "epoch": 0.1817169373549884, "grad_norm": 35.604618072509766, "learning_rate": 9.533914051216716e-06, "loss": 24.0279, "step": 1958 }, { "epoch": 0.18180974477958237, "grad_norm": 37.049381256103516, "learning_rate": 9.533266861616265e-06, "loss": 25.6212, "step": 1959 }, { "epoch": 0.18190255220417634, "grad_norm": 40.03840255737305, "learning_rate": 9.532619244993593e-06, "loss": 25.4897, "step": 1960 }, { "epoch": 0.1819953596287703, "grad_norm": 39.24613952636719, "learning_rate": 9.531971201409707e-06, "loss": 25.132, "step": 1961 }, { "epoch": 0.18208816705336428, "grad_norm": 54.96390151977539, "learning_rate": 9.531322730925648e-06, "loss": 24.916, "step": 1962 }, { "epoch": 0.18218097447795822, "grad_norm": 39.43144989013672, "learning_rate": 9.530673833602504e-06, "loss": 25.3073, "step": 1963 }, { "epoch": 0.1822737819025522, "grad_norm": 71.74746704101562, "learning_rate": 9.530024509501396e-06, "loss": 22.9946, "step": 1964 }, { "epoch": 0.18236658932714617, "grad_norm": 36.59702682495117, "learning_rate": 9.529374758683492e-06, "loss": 24.6223, "step": 1965 }, { "epoch": 0.18245939675174014, "grad_norm": 41.11723709106445, "learning_rate": 9.528724581209993e-06, "loss": 25.1202, "step": 1966 }, { "epoch": 0.1825522041763341, "grad_norm": 37.98976516723633, "learning_rate": 9.528073977142147e-06, "loss": 25.5526, "step": 1967 }, { "epoch": 0.18264501160092808, "grad_norm": 36.0003662109375, "learning_rate": 9.527422946541238e-06, "loss": 23.8874, "step": 1968 }, { "epoch": 0.18273781902552205, "grad_norm": 35.955604553222656, "learning_rate": 9.52677148946859e-06, "loss": 25.8838, "step": 1969 }, { "epoch": 0.182830626450116, "grad_norm": 39.9340934753418, "learning_rate": 9.526119605985575e-06, "loss": 23.9516, "step": 1970 }, { "epoch": 0.18292343387470997, "grad_norm": 36.174068450927734, "learning_rate": 9.525467296153591e-06, "loss": 24.7015, "step": 1971 }, { "epoch": 0.18301624129930394, "grad_norm": 35.825069427490234, "learning_rate": 9.524814560034087e-06, "loss": 22.2511, "step": 1972 }, { "epoch": 0.1831090487238979, "grad_norm": 36.350154876708984, "learning_rate": 9.524161397688551e-06, "loss": 23.4634, "step": 1973 }, { "epoch": 0.18320185614849188, "grad_norm": 36.173133850097656, "learning_rate": 9.523507809178506e-06, "loss": 25.6927, "step": 1974 }, { "epoch": 0.18329466357308585, "grad_norm": 37.861480712890625, "learning_rate": 9.52285379456552e-06, "loss": 24.2542, "step": 1975 }, { "epoch": 0.18338747099767982, "grad_norm": 38.506797790527344, "learning_rate": 9.522199353911201e-06, "loss": 24.4394, "step": 1976 }, { "epoch": 0.18348027842227377, "grad_norm": 41.3864631652832, "learning_rate": 9.521544487277193e-06, "loss": 25.0894, "step": 1977 }, { "epoch": 0.18357308584686774, "grad_norm": 39.00338363647461, "learning_rate": 9.520889194725182e-06, "loss": 25.5465, "step": 1978 }, { "epoch": 0.1836658932714617, "grad_norm": 41.56503677368164, "learning_rate": 9.520233476316899e-06, "loss": 23.8962, "step": 1979 }, { "epoch": 0.18375870069605568, "grad_norm": 43.42483139038086, "learning_rate": 9.519577332114107e-06, "loss": 24.7217, "step": 1980 }, { "epoch": 0.18385150812064965, "grad_norm": 38.06370544433594, "learning_rate": 9.518920762178616e-06, "loss": 25.5258, "step": 1981 }, { "epoch": 0.18394431554524363, "grad_norm": 38.9427490234375, "learning_rate": 9.518263766572272e-06, "loss": 24.2245, "step": 1982 }, { "epoch": 0.1840371229698376, "grad_norm": 41.99814987182617, "learning_rate": 9.51760634535696e-06, "loss": 24.3121, "step": 1983 }, { "epoch": 0.18412993039443157, "grad_norm": 39.690162658691406, "learning_rate": 9.516948498594611e-06, "loss": 25.8783, "step": 1984 }, { "epoch": 0.1842227378190255, "grad_norm": 35.251060485839844, "learning_rate": 9.516290226347192e-06, "loss": 24.4111, "step": 1985 }, { "epoch": 0.18431554524361948, "grad_norm": 39.32752227783203, "learning_rate": 9.515631528676709e-06, "loss": 24.3273, "step": 1986 }, { "epoch": 0.18440835266821345, "grad_norm": 41.72018814086914, "learning_rate": 9.514972405645211e-06, "loss": 25.2135, "step": 1987 }, { "epoch": 0.18450116009280743, "grad_norm": 310.12689208984375, "learning_rate": 9.514312857314785e-06, "loss": 25.0892, "step": 1988 }, { "epoch": 0.1845939675174014, "grad_norm": 37.88847732543945, "learning_rate": 9.513652883747559e-06, "loss": 21.8824, "step": 1989 }, { "epoch": 0.18468677494199537, "grad_norm": 42.88656997680664, "learning_rate": 9.5129924850057e-06, "loss": 26.3802, "step": 1990 }, { "epoch": 0.18477958236658934, "grad_norm": 35.45454025268555, "learning_rate": 9.512331661151416e-06, "loss": 26.6863, "step": 1991 }, { "epoch": 0.18487238979118328, "grad_norm": 37.175601959228516, "learning_rate": 9.511670412246956e-06, "loss": 25.0277, "step": 1992 }, { "epoch": 0.18496519721577726, "grad_norm": 59.51166915893555, "learning_rate": 9.51100873835461e-06, "loss": 25.7485, "step": 1993 }, { "epoch": 0.18505800464037123, "grad_norm": 42.388023376464844, "learning_rate": 9.510346639536701e-06, "loss": 23.8465, "step": 1994 }, { "epoch": 0.1851508120649652, "grad_norm": 38.74897766113281, "learning_rate": 9.5096841158556e-06, "loss": 25.8189, "step": 1995 }, { "epoch": 0.18524361948955917, "grad_norm": 37.14359664916992, "learning_rate": 9.509021167373716e-06, "loss": 24.0793, "step": 1996 }, { "epoch": 0.18533642691415314, "grad_norm": 66.1111831665039, "learning_rate": 9.508357794153493e-06, "loss": 23.8716, "step": 1997 }, { "epoch": 0.1854292343387471, "grad_norm": 47.08810043334961, "learning_rate": 9.507693996257423e-06, "loss": 25.7173, "step": 1998 }, { "epoch": 0.18552204176334106, "grad_norm": 36.58077621459961, "learning_rate": 9.507029773748033e-06, "loss": 24.5189, "step": 1999 }, { "epoch": 0.18561484918793503, "grad_norm": 47.25212478637695, "learning_rate": 9.50636512668789e-06, "loss": 24.2357, "step": 2000 }, { "epoch": 0.185707656612529, "grad_norm": 40.256038665771484, "learning_rate": 9.505700055139606e-06, "loss": 25.8424, "step": 2001 }, { "epoch": 0.18580046403712297, "grad_norm": 38.45056915283203, "learning_rate": 9.505034559165823e-06, "loss": 22.8731, "step": 2002 }, { "epoch": 0.18589327146171694, "grad_norm": 36.808319091796875, "learning_rate": 9.504368638829233e-06, "loss": 23.5589, "step": 2003 }, { "epoch": 0.1859860788863109, "grad_norm": 36.701534271240234, "learning_rate": 9.503702294192563e-06, "loss": 25.405, "step": 2004 }, { "epoch": 0.18607888631090488, "grad_norm": 38.966644287109375, "learning_rate": 9.50303552531858e-06, "loss": 23.9312, "step": 2005 }, { "epoch": 0.18617169373549883, "grad_norm": 35.860355377197266, "learning_rate": 9.502368332270093e-06, "loss": 23.3555, "step": 2006 }, { "epoch": 0.1862645011600928, "grad_norm": 37.624488830566406, "learning_rate": 9.501700715109952e-06, "loss": 24.207, "step": 2007 }, { "epoch": 0.18635730858468677, "grad_norm": 35.78261947631836, "learning_rate": 9.501032673901041e-06, "loss": 22.9704, "step": 2008 }, { "epoch": 0.18645011600928074, "grad_norm": 40.04035568237305, "learning_rate": 9.50036420870629e-06, "loss": 25.5646, "step": 2009 }, { "epoch": 0.1865429234338747, "grad_norm": 43.49129867553711, "learning_rate": 9.499695319588665e-06, "loss": 28.7745, "step": 2010 }, { "epoch": 0.18663573085846868, "grad_norm": 38.400108337402344, "learning_rate": 9.499026006611176e-06, "loss": 25.1214, "step": 2011 }, { "epoch": 0.18672853828306266, "grad_norm": 38.58463668823242, "learning_rate": 9.498356269836869e-06, "loss": 24.8511, "step": 2012 }, { "epoch": 0.1868213457076566, "grad_norm": 40.1834602355957, "learning_rate": 9.497686109328832e-06, "loss": 24.9779, "step": 2013 }, { "epoch": 0.18691415313225057, "grad_norm": 40.37628173828125, "learning_rate": 9.497015525150192e-06, "loss": 25.6992, "step": 2014 }, { "epoch": 0.18700696055684454, "grad_norm": 39.896549224853516, "learning_rate": 9.496344517364117e-06, "loss": 25.8914, "step": 2015 }, { "epoch": 0.18709976798143851, "grad_norm": 40.171302795410156, "learning_rate": 9.495673086033813e-06, "loss": 24.3093, "step": 2016 }, { "epoch": 0.18719257540603249, "grad_norm": 38.14527893066406, "learning_rate": 9.49500123122253e-06, "loss": 25.6755, "step": 2017 }, { "epoch": 0.18728538283062646, "grad_norm": 37.65095901489258, "learning_rate": 9.49432895299355e-06, "loss": 25.1371, "step": 2018 }, { "epoch": 0.18737819025522043, "grad_norm": 37.397544860839844, "learning_rate": 9.493656251410205e-06, "loss": 26.3006, "step": 2019 }, { "epoch": 0.18747099767981437, "grad_norm": 41.73283386230469, "learning_rate": 9.492983126535859e-06, "loss": 24.8149, "step": 2020 }, { "epoch": 0.18756380510440834, "grad_norm": 39.10298156738281, "learning_rate": 9.492309578433921e-06, "loss": 26.2177, "step": 2021 }, { "epoch": 0.18765661252900231, "grad_norm": 39.46112823486328, "learning_rate": 9.491635607167833e-06, "loss": 23.4898, "step": 2022 }, { "epoch": 0.18774941995359629, "grad_norm": 36.36807632446289, "learning_rate": 9.490961212801086e-06, "loss": 25.1333, "step": 2023 }, { "epoch": 0.18784222737819026, "grad_norm": 39.08053207397461, "learning_rate": 9.490286395397205e-06, "loss": 24.8432, "step": 2024 }, { "epoch": 0.18793503480278423, "grad_norm": 50.23064041137695, "learning_rate": 9.489611155019755e-06, "loss": 25.2662, "step": 2025 }, { "epoch": 0.1880278422273782, "grad_norm": 35.89591979980469, "learning_rate": 9.488935491732342e-06, "loss": 24.5267, "step": 2026 }, { "epoch": 0.18812064965197217, "grad_norm": 38.38645935058594, "learning_rate": 9.488259405598613e-06, "loss": 23.0885, "step": 2027 }, { "epoch": 0.18821345707656612, "grad_norm": 39.81999588012695, "learning_rate": 9.487582896682252e-06, "loss": 26.1922, "step": 2028 }, { "epoch": 0.1883062645011601, "grad_norm": 37.69133758544922, "learning_rate": 9.486905965046986e-06, "loss": 23.153, "step": 2029 }, { "epoch": 0.18839907192575406, "grad_norm": 34.67439270019531, "learning_rate": 9.486228610756578e-06, "loss": 24.4886, "step": 2030 }, { "epoch": 0.18849187935034803, "grad_norm": 39.100341796875, "learning_rate": 9.485550833874837e-06, "loss": 24.8074, "step": 2031 }, { "epoch": 0.188584686774942, "grad_norm": 37.272674560546875, "learning_rate": 9.484872634465602e-06, "loss": 25.1624, "step": 2032 }, { "epoch": 0.18867749419953597, "grad_norm": 41.22929763793945, "learning_rate": 9.484194012592764e-06, "loss": 26.2006, "step": 2033 }, { "epoch": 0.18877030162412994, "grad_norm": 38.35232162475586, "learning_rate": 9.483514968320244e-06, "loss": 24.996, "step": 2034 }, { "epoch": 0.1888631090487239, "grad_norm": 39.66852951049805, "learning_rate": 9.482835501712007e-06, "loss": 24.8567, "step": 2035 }, { "epoch": 0.18895591647331786, "grad_norm": 37.964115142822266, "learning_rate": 9.482155612832057e-06, "loss": 26.8462, "step": 2036 }, { "epoch": 0.18904872389791183, "grad_norm": 37.348636627197266, "learning_rate": 9.481475301744436e-06, "loss": 25.5317, "step": 2037 }, { "epoch": 0.1891415313225058, "grad_norm": 36.437896728515625, "learning_rate": 9.480794568513231e-06, "loss": 25.7024, "step": 2038 }, { "epoch": 0.18923433874709977, "grad_norm": 37.15581512451172, "learning_rate": 9.480113413202563e-06, "loss": 25.708, "step": 2039 }, { "epoch": 0.18932714617169374, "grad_norm": 33.82276916503906, "learning_rate": 9.479431835876596e-06, "loss": 23.1871, "step": 2040 }, { "epoch": 0.18941995359628772, "grad_norm": 36.896087646484375, "learning_rate": 9.478749836599531e-06, "loss": 24.0899, "step": 2041 }, { "epoch": 0.18951276102088166, "grad_norm": 38.367706298828125, "learning_rate": 9.478067415435613e-06, "loss": 25.4854, "step": 2042 }, { "epoch": 0.18960556844547563, "grad_norm": 39.989219665527344, "learning_rate": 9.477384572449124e-06, "loss": 24.2065, "step": 2043 }, { "epoch": 0.1896983758700696, "grad_norm": 38.314308166503906, "learning_rate": 9.476701307704384e-06, "loss": 25.7267, "step": 2044 }, { "epoch": 0.18979118329466357, "grad_norm": 42.45090866088867, "learning_rate": 9.476017621265758e-06, "loss": 23.3666, "step": 2045 }, { "epoch": 0.18988399071925754, "grad_norm": 38.01972579956055, "learning_rate": 9.475333513197645e-06, "loss": 23.9784, "step": 2046 }, { "epoch": 0.18997679814385152, "grad_norm": 37.16306686401367, "learning_rate": 9.474648983564488e-06, "loss": 24.9479, "step": 2047 }, { "epoch": 0.1900696055684455, "grad_norm": 39.07053756713867, "learning_rate": 9.473964032430765e-06, "loss": 25.7239, "step": 2048 }, { "epoch": 0.19016241299303943, "grad_norm": 43.63235092163086, "learning_rate": 9.473278659861e-06, "loss": 24.4493, "step": 2049 }, { "epoch": 0.1902552204176334, "grad_norm": 63.77310562133789, "learning_rate": 9.472592865919752e-06, "loss": 25.4517, "step": 2050 }, { "epoch": 0.19034802784222737, "grad_norm": 37.66568374633789, "learning_rate": 9.471906650671621e-06, "loss": 22.9742, "step": 2051 }, { "epoch": 0.19044083526682135, "grad_norm": 36.582115173339844, "learning_rate": 9.471220014181247e-06, "loss": 24.4719, "step": 2052 }, { "epoch": 0.19053364269141532, "grad_norm": 38.676395416259766, "learning_rate": 9.47053295651331e-06, "loss": 26.0902, "step": 2053 }, { "epoch": 0.1906264501160093, "grad_norm": 38.60817337036133, "learning_rate": 9.469845477732527e-06, "loss": 24.921, "step": 2054 }, { "epoch": 0.19071925754060326, "grad_norm": 37.081451416015625, "learning_rate": 9.469157577903658e-06, "loss": 24.8671, "step": 2055 }, { "epoch": 0.1908120649651972, "grad_norm": 41.150604248046875, "learning_rate": 9.468469257091502e-06, "loss": 24.6125, "step": 2056 }, { "epoch": 0.19090487238979117, "grad_norm": 366.24383544921875, "learning_rate": 9.467780515360896e-06, "loss": 24.8347, "step": 2057 }, { "epoch": 0.19099767981438515, "grad_norm": 39.522911071777344, "learning_rate": 9.467091352776719e-06, "loss": 24.4769, "step": 2058 }, { "epoch": 0.19109048723897912, "grad_norm": 38.70811462402344, "learning_rate": 9.466401769403888e-06, "loss": 25.3118, "step": 2059 }, { "epoch": 0.1911832946635731, "grad_norm": 40.93121337890625, "learning_rate": 9.465711765307358e-06, "loss": 24.4711, "step": 2060 }, { "epoch": 0.19127610208816706, "grad_norm": 38.667274475097656, "learning_rate": 9.465021340552128e-06, "loss": 23.5359, "step": 2061 }, { "epoch": 0.19136890951276103, "grad_norm": 38.42120361328125, "learning_rate": 9.464330495203234e-06, "loss": 25.2123, "step": 2062 }, { "epoch": 0.19146171693735498, "grad_norm": 40.679168701171875, "learning_rate": 9.463639229325751e-06, "loss": 25.4279, "step": 2063 }, { "epoch": 0.19155452436194895, "grad_norm": 37.53049850463867, "learning_rate": 9.462947542984795e-06, "loss": 26.2211, "step": 2064 }, { "epoch": 0.19164733178654292, "grad_norm": 38.203792572021484, "learning_rate": 9.462255436245522e-06, "loss": 22.9809, "step": 2065 }, { "epoch": 0.1917401392111369, "grad_norm": 39.82460403442383, "learning_rate": 9.461562909173126e-06, "loss": 26.3231, "step": 2066 }, { "epoch": 0.19183294663573086, "grad_norm": 38.45499038696289, "learning_rate": 9.460869961832838e-06, "loss": 22.5753, "step": 2067 }, { "epoch": 0.19192575406032483, "grad_norm": 37.77765655517578, "learning_rate": 9.460176594289938e-06, "loss": 22.4916, "step": 2068 }, { "epoch": 0.1920185614849188, "grad_norm": 42.29751205444336, "learning_rate": 9.459482806609734e-06, "loss": 25.5982, "step": 2069 }, { "epoch": 0.19211136890951275, "grad_norm": 38.26124572753906, "learning_rate": 9.458788598857583e-06, "loss": 22.1752, "step": 2070 }, { "epoch": 0.19220417633410672, "grad_norm": 38.25697708129883, "learning_rate": 9.458093971098874e-06, "loss": 24.5943, "step": 2071 }, { "epoch": 0.1922969837587007, "grad_norm": 39.404319763183594, "learning_rate": 9.457398923399044e-06, "loss": 25.336, "step": 2072 }, { "epoch": 0.19238979118329466, "grad_norm": 37.88859939575195, "learning_rate": 9.456703455823559e-06, "loss": 25.6598, "step": 2073 }, { "epoch": 0.19248259860788863, "grad_norm": 38.27128601074219, "learning_rate": 9.456007568437936e-06, "loss": 24.9536, "step": 2074 }, { "epoch": 0.1925754060324826, "grad_norm": 35.878662109375, "learning_rate": 9.45531126130772e-06, "loss": 24.1897, "step": 2075 }, { "epoch": 0.19266821345707658, "grad_norm": 38.61400604248047, "learning_rate": 9.454614534498506e-06, "loss": 24.2212, "step": 2076 }, { "epoch": 0.19276102088167055, "grad_norm": 40.63890838623047, "learning_rate": 9.453917388075921e-06, "loss": 27.1802, "step": 2077 }, { "epoch": 0.1928538283062645, "grad_norm": 49.876853942871094, "learning_rate": 9.453219822105637e-06, "loss": 26.5845, "step": 2078 }, { "epoch": 0.19294663573085846, "grad_norm": 37.83170700073242, "learning_rate": 9.45252183665336e-06, "loss": 25.3526, "step": 2079 }, { "epoch": 0.19303944315545243, "grad_norm": 36.67680740356445, "learning_rate": 9.451823431784843e-06, "loss": 23.4989, "step": 2080 }, { "epoch": 0.1931322505800464, "grad_norm": 36.725215911865234, "learning_rate": 9.45112460756587e-06, "loss": 23.6111, "step": 2081 }, { "epoch": 0.19322505800464038, "grad_norm": 40.21131896972656, "learning_rate": 9.450425364062267e-06, "loss": 23.6715, "step": 2082 }, { "epoch": 0.19331786542923435, "grad_norm": 40.0976448059082, "learning_rate": 9.449725701339906e-06, "loss": 24.2724, "step": 2083 }, { "epoch": 0.19341067285382832, "grad_norm": 40.262290954589844, "learning_rate": 9.449025619464689e-06, "loss": 25.7969, "step": 2084 }, { "epoch": 0.19350348027842226, "grad_norm": 37.91800308227539, "learning_rate": 9.448325118502566e-06, "loss": 24.0507, "step": 2085 }, { "epoch": 0.19359628770301623, "grad_norm": 36.57127380371094, "learning_rate": 9.447624198519518e-06, "loss": 24.1951, "step": 2086 }, { "epoch": 0.1936890951276102, "grad_norm": 40.67359161376953, "learning_rate": 9.446922859581574e-06, "loss": 24.199, "step": 2087 }, { "epoch": 0.19378190255220418, "grad_norm": 36.35812759399414, "learning_rate": 9.446221101754795e-06, "loss": 23.853, "step": 2088 }, { "epoch": 0.19387470997679815, "grad_norm": 39.82135772705078, "learning_rate": 9.445518925105287e-06, "loss": 24.4209, "step": 2089 }, { "epoch": 0.19396751740139212, "grad_norm": 40.11528396606445, "learning_rate": 9.444816329699193e-06, "loss": 25.498, "step": 2090 }, { "epoch": 0.1940603248259861, "grad_norm": 39.679039001464844, "learning_rate": 9.444113315602694e-06, "loss": 25.1736, "step": 2091 }, { "epoch": 0.19415313225058003, "grad_norm": 38.883914947509766, "learning_rate": 9.443409882882014e-06, "loss": 25.885, "step": 2092 }, { "epoch": 0.194245939675174, "grad_norm": 40.79563522338867, "learning_rate": 9.442706031603414e-06, "loss": 26.7668, "step": 2093 }, { "epoch": 0.19433874709976798, "grad_norm": 36.794673919677734, "learning_rate": 9.442001761833194e-06, "loss": 22.9059, "step": 2094 }, { "epoch": 0.19443155452436195, "grad_norm": 49.195533752441406, "learning_rate": 9.441297073637696e-06, "loss": 25.6092, "step": 2095 }, { "epoch": 0.19452436194895592, "grad_norm": 39.338600158691406, "learning_rate": 9.440591967083299e-06, "loss": 25.7141, "step": 2096 }, { "epoch": 0.1946171693735499, "grad_norm": 36.066043853759766, "learning_rate": 9.439886442236423e-06, "loss": 26.6838, "step": 2097 }, { "epoch": 0.19470997679814386, "grad_norm": 36.519474029541016, "learning_rate": 9.439180499163527e-06, "loss": 23.2121, "step": 2098 }, { "epoch": 0.1948027842227378, "grad_norm": 50.444236755371094, "learning_rate": 9.438474137931106e-06, "loss": 24.8332, "step": 2099 }, { "epoch": 0.19489559164733178, "grad_norm": 39.30329132080078, "learning_rate": 9.4377673586057e-06, "loss": 24.916, "step": 2100 }, { "epoch": 0.19498839907192575, "grad_norm": 97.27115631103516, "learning_rate": 9.437060161253888e-06, "loss": 25.547, "step": 2101 }, { "epoch": 0.19508120649651972, "grad_norm": 39.11786651611328, "learning_rate": 9.43635254594228e-06, "loss": 27.6644, "step": 2102 }, { "epoch": 0.1951740139211137, "grad_norm": 36.60896301269531, "learning_rate": 9.435644512737536e-06, "loss": 22.6929, "step": 2103 }, { "epoch": 0.19526682134570766, "grad_norm": 55.67731475830078, "learning_rate": 9.434936061706352e-06, "loss": 22.6788, "step": 2104 }, { "epoch": 0.19535962877030164, "grad_norm": 37.74757766723633, "learning_rate": 9.434227192915458e-06, "loss": 23.6328, "step": 2105 }, { "epoch": 0.19545243619489558, "grad_norm": 42.08207321166992, "learning_rate": 9.433517906431631e-06, "loss": 24.9358, "step": 2106 }, { "epoch": 0.19554524361948955, "grad_norm": 36.1379508972168, "learning_rate": 9.432808202321684e-06, "loss": 23.5479, "step": 2107 }, { "epoch": 0.19563805104408352, "grad_norm": 42.24201583862305, "learning_rate": 9.432098080652468e-06, "loss": 25.5621, "step": 2108 }, { "epoch": 0.1957308584686775, "grad_norm": 46.758026123046875, "learning_rate": 9.431387541490872e-06, "loss": 25.0785, "step": 2109 }, { "epoch": 0.19582366589327146, "grad_norm": 43.14231491088867, "learning_rate": 9.430676584903833e-06, "loss": 26.3132, "step": 2110 }, { "epoch": 0.19591647331786544, "grad_norm": 48.104339599609375, "learning_rate": 9.429965210958318e-06, "loss": 24.0104, "step": 2111 }, { "epoch": 0.1960092807424594, "grad_norm": 35.00914001464844, "learning_rate": 9.429253419721335e-06, "loss": 23.1264, "step": 2112 }, { "epoch": 0.19610208816705335, "grad_norm": 39.6728515625, "learning_rate": 9.428541211259938e-06, "loss": 24.9101, "step": 2113 }, { "epoch": 0.19619489559164732, "grad_norm": 36.459102630615234, "learning_rate": 9.42782858564121e-06, "loss": 23.4395, "step": 2114 }, { "epoch": 0.1962877030162413, "grad_norm": 38.33592224121094, "learning_rate": 9.427115542932279e-06, "loss": 24.0404, "step": 2115 }, { "epoch": 0.19638051044083527, "grad_norm": 37.904666900634766, "learning_rate": 9.426402083200315e-06, "loss": 23.4817, "step": 2116 }, { "epoch": 0.19647331786542924, "grad_norm": 42.88638687133789, "learning_rate": 9.425688206512524e-06, "loss": 24.9461, "step": 2117 }, { "epoch": 0.1965661252900232, "grad_norm": 36.67139434814453, "learning_rate": 9.424973912936147e-06, "loss": 23.6847, "step": 2118 }, { "epoch": 0.19665893271461718, "grad_norm": 36.8656005859375, "learning_rate": 9.424259202538473e-06, "loss": 23.7113, "step": 2119 }, { "epoch": 0.19675174013921112, "grad_norm": 41.9578742980957, "learning_rate": 9.423544075386824e-06, "loss": 25.3896, "step": 2120 }, { "epoch": 0.1968445475638051, "grad_norm": 39.640621185302734, "learning_rate": 9.422828531548564e-06, "loss": 26.0744, "step": 2121 }, { "epoch": 0.19693735498839907, "grad_norm": 35.29205322265625, "learning_rate": 9.422112571091094e-06, "loss": 24.6572, "step": 2122 }, { "epoch": 0.19703016241299304, "grad_norm": 39.496334075927734, "learning_rate": 9.421396194081857e-06, "loss": 26.0081, "step": 2123 }, { "epoch": 0.197122969837587, "grad_norm": 38.53788757324219, "learning_rate": 9.420679400588334e-06, "loss": 23.7227, "step": 2124 }, { "epoch": 0.19721577726218098, "grad_norm": 50.26318359375, "learning_rate": 9.419962190678046e-06, "loss": 24.147, "step": 2125 }, { "epoch": 0.19730858468677495, "grad_norm": 41.03491973876953, "learning_rate": 9.419244564418548e-06, "loss": 25.9533, "step": 2126 }, { "epoch": 0.19740139211136892, "grad_norm": 36.422096252441406, "learning_rate": 9.418526521877442e-06, "loss": 25.9509, "step": 2127 }, { "epoch": 0.19749419953596287, "grad_norm": 68.33576202392578, "learning_rate": 9.417808063122366e-06, "loss": 23.9958, "step": 2128 }, { "epoch": 0.19758700696055684, "grad_norm": 37.356361389160156, "learning_rate": 9.417089188220995e-06, "loss": 24.4092, "step": 2129 }, { "epoch": 0.1976798143851508, "grad_norm": 38.47190475463867, "learning_rate": 9.41636989724105e-06, "loss": 24.5994, "step": 2130 }, { "epoch": 0.19777262180974478, "grad_norm": 36.11150360107422, "learning_rate": 9.415650190250278e-06, "loss": 23.4367, "step": 2131 }, { "epoch": 0.19786542923433875, "grad_norm": 38.8201789855957, "learning_rate": 9.414930067316482e-06, "loss": 26.1303, "step": 2132 }, { "epoch": 0.19795823665893272, "grad_norm": 37.172550201416016, "learning_rate": 9.414209528507492e-06, "loss": 26.4601, "step": 2133 }, { "epoch": 0.1980510440835267, "grad_norm": 74.83688354492188, "learning_rate": 9.413488573891179e-06, "loss": 27.4761, "step": 2134 }, { "epoch": 0.19814385150812064, "grad_norm": 41.9213752746582, "learning_rate": 9.41276720353546e-06, "loss": 25.2592, "step": 2135 }, { "epoch": 0.1982366589327146, "grad_norm": 38.35238265991211, "learning_rate": 9.412045417508281e-06, "loss": 23.7083, "step": 2136 }, { "epoch": 0.19832946635730858, "grad_norm": 47.60683059692383, "learning_rate": 9.411323215877636e-06, "loss": 25.4672, "step": 2137 }, { "epoch": 0.19842227378190255, "grad_norm": 37.573917388916016, "learning_rate": 9.410600598711553e-06, "loss": 26.7763, "step": 2138 }, { "epoch": 0.19851508120649652, "grad_norm": 36.796913146972656, "learning_rate": 9.409877566078102e-06, "loss": 23.271, "step": 2139 }, { "epoch": 0.1986078886310905, "grad_norm": 35.850502014160156, "learning_rate": 9.409154118045389e-06, "loss": 23.4296, "step": 2140 }, { "epoch": 0.19870069605568447, "grad_norm": 41.34143829345703, "learning_rate": 9.408430254681562e-06, "loss": 24.8472, "step": 2141 }, { "epoch": 0.1987935034802784, "grad_norm": 39.962345123291016, "learning_rate": 9.407705976054808e-06, "loss": 25.8313, "step": 2142 }, { "epoch": 0.19888631090487238, "grad_norm": 37.885921478271484, "learning_rate": 9.40698128223335e-06, "loss": 23.3754, "step": 2143 }, { "epoch": 0.19897911832946635, "grad_norm": 35.237091064453125, "learning_rate": 9.406256173285455e-06, "loss": 23.643, "step": 2144 }, { "epoch": 0.19907192575406032, "grad_norm": 34.82457733154297, "learning_rate": 9.405530649279425e-06, "loss": 24.7868, "step": 2145 }, { "epoch": 0.1991647331786543, "grad_norm": 38.021568298339844, "learning_rate": 9.404804710283599e-06, "loss": 23.1942, "step": 2146 }, { "epoch": 0.19925754060324827, "grad_norm": 38.27031326293945, "learning_rate": 9.404078356366366e-06, "loss": 25.1362, "step": 2147 }, { "epoch": 0.19935034802784224, "grad_norm": 42.41630935668945, "learning_rate": 9.40335158759614e-06, "loss": 25.0498, "step": 2148 }, { "epoch": 0.19944315545243618, "grad_norm": 49.315799713134766, "learning_rate": 9.402624404041386e-06, "loss": 25.2598, "step": 2149 }, { "epoch": 0.19953596287703015, "grad_norm": 35.13861846923828, "learning_rate": 9.401896805770599e-06, "loss": 24.0299, "step": 2150 }, { "epoch": 0.19962877030162413, "grad_norm": 36.34459686279297, "learning_rate": 9.401168792852319e-06, "loss": 23.0363, "step": 2151 }, { "epoch": 0.1997215777262181, "grad_norm": 38.984100341796875, "learning_rate": 9.40044036535512e-06, "loss": 26.5006, "step": 2152 }, { "epoch": 0.19981438515081207, "grad_norm": 35.380470275878906, "learning_rate": 9.399711523347624e-06, "loss": 23.01, "step": 2153 }, { "epoch": 0.19990719257540604, "grad_norm": 48.30609893798828, "learning_rate": 9.398982266898481e-06, "loss": 24.2833, "step": 2154 }, { "epoch": 0.2, "grad_norm": 39.036136627197266, "learning_rate": 9.398252596076385e-06, "loss": 23.8499, "step": 2155 }, { "epoch": 0.20009280742459395, "grad_norm": 48.975059509277344, "learning_rate": 9.397522510950072e-06, "loss": 25.8596, "step": 2156 }, { "epoch": 0.20018561484918793, "grad_norm": 46.28031921386719, "learning_rate": 9.396792011588313e-06, "loss": 26.6003, "step": 2157 }, { "epoch": 0.2002784222737819, "grad_norm": 37.170082092285156, "learning_rate": 9.396061098059918e-06, "loss": 23.6748, "step": 2158 }, { "epoch": 0.20037122969837587, "grad_norm": 39.32961654663086, "learning_rate": 9.39532977043374e-06, "loss": 23.9988, "step": 2159 }, { "epoch": 0.20046403712296984, "grad_norm": 37.18091583251953, "learning_rate": 9.394598028778664e-06, "loss": 23.3242, "step": 2160 }, { "epoch": 0.2005568445475638, "grad_norm": 39.83272933959961, "learning_rate": 9.393865873163622e-06, "loss": 25.4976, "step": 2161 }, { "epoch": 0.20064965197215778, "grad_norm": 36.36101531982422, "learning_rate": 9.393133303657578e-06, "loss": 25.3538, "step": 2162 }, { "epoch": 0.20074245939675173, "grad_norm": 39.33948516845703, "learning_rate": 9.392400320329543e-06, "loss": 25.1696, "step": 2163 }, { "epoch": 0.2008352668213457, "grad_norm": 37.67832946777344, "learning_rate": 9.391666923248556e-06, "loss": 24.4782, "step": 2164 }, { "epoch": 0.20092807424593967, "grad_norm": 36.64630889892578, "learning_rate": 9.390933112483704e-06, "loss": 25.975, "step": 2165 }, { "epoch": 0.20102088167053364, "grad_norm": 40.6513557434082, "learning_rate": 9.390198888104113e-06, "loss": 25.2997, "step": 2166 }, { "epoch": 0.2011136890951276, "grad_norm": 36.65974807739258, "learning_rate": 9.389464250178942e-06, "loss": 24.1768, "step": 2167 }, { "epoch": 0.20120649651972158, "grad_norm": 36.257957458496094, "learning_rate": 9.38872919877739e-06, "loss": 24.7161, "step": 2168 }, { "epoch": 0.20129930394431556, "grad_norm": 38.69195556640625, "learning_rate": 9.387993733968702e-06, "loss": 24.8261, "step": 2169 }, { "epoch": 0.20139211136890953, "grad_norm": 32.56559371948242, "learning_rate": 9.387257855822154e-06, "loss": 22.6775, "step": 2170 }, { "epoch": 0.20148491879350347, "grad_norm": 37.28434371948242, "learning_rate": 9.386521564407063e-06, "loss": 25.5734, "step": 2171 }, { "epoch": 0.20157772621809744, "grad_norm": 39.56926727294922, "learning_rate": 9.385784859792787e-06, "loss": 24.3263, "step": 2172 }, { "epoch": 0.2016705336426914, "grad_norm": 38.927703857421875, "learning_rate": 9.385047742048724e-06, "loss": 24.6368, "step": 2173 }, { "epoch": 0.20176334106728538, "grad_norm": 37.859840393066406, "learning_rate": 9.384310211244303e-06, "loss": 25.3022, "step": 2174 }, { "epoch": 0.20185614849187936, "grad_norm": 35.7659797668457, "learning_rate": 9.383572267449004e-06, "loss": 23.3791, "step": 2175 }, { "epoch": 0.20194895591647333, "grad_norm": 46.372779846191406, "learning_rate": 9.382833910732333e-06, "loss": 24.173, "step": 2176 }, { "epoch": 0.2020417633410673, "grad_norm": 37.28851318359375, "learning_rate": 9.382095141163847e-06, "loss": 26.4636, "step": 2177 }, { "epoch": 0.20213457076566124, "grad_norm": 36.92414474487305, "learning_rate": 9.381355958813132e-06, "loss": 24.8898, "step": 2178 }, { "epoch": 0.2022273781902552, "grad_norm": 37.75584411621094, "learning_rate": 9.380616363749822e-06, "loss": 24.5758, "step": 2179 }, { "epoch": 0.20232018561484919, "grad_norm": 35.72252655029297, "learning_rate": 9.37987635604358e-06, "loss": 23.4655, "step": 2180 }, { "epoch": 0.20241299303944316, "grad_norm": 38.9272346496582, "learning_rate": 9.379135935764114e-06, "loss": 25.7313, "step": 2181 }, { "epoch": 0.20250580046403713, "grad_norm": 39.6054801940918, "learning_rate": 9.378395102981171e-06, "loss": 24.8125, "step": 2182 }, { "epoch": 0.2025986078886311, "grad_norm": 37.977787017822266, "learning_rate": 9.377653857764534e-06, "loss": 25.8792, "step": 2183 }, { "epoch": 0.20269141531322507, "grad_norm": 35.48011779785156, "learning_rate": 9.376912200184029e-06, "loss": 24.3341, "step": 2184 }, { "epoch": 0.20278422273781901, "grad_norm": 38.36208724975586, "learning_rate": 9.376170130309517e-06, "loss": 23.2271, "step": 2185 }, { "epoch": 0.20287703016241299, "grad_norm": 39.457847595214844, "learning_rate": 9.375427648210896e-06, "loss": 22.777, "step": 2186 }, { "epoch": 0.20296983758700696, "grad_norm": 36.269927978515625, "learning_rate": 9.37468475395811e-06, "loss": 24.3301, "step": 2187 }, { "epoch": 0.20306264501160093, "grad_norm": 37.15925598144531, "learning_rate": 9.373941447621138e-06, "loss": 24.3535, "step": 2188 }, { "epoch": 0.2031554524361949, "grad_norm": 38.255096435546875, "learning_rate": 9.373197729269994e-06, "loss": 25.3769, "step": 2189 }, { "epoch": 0.20324825986078887, "grad_norm": 38.76576232910156, "learning_rate": 9.372453598974738e-06, "loss": 24.13, "step": 2190 }, { "epoch": 0.20334106728538284, "grad_norm": 35.12339782714844, "learning_rate": 9.371709056805463e-06, "loss": 23.8387, "step": 2191 }, { "epoch": 0.2034338747099768, "grad_norm": 38.2668571472168, "learning_rate": 9.370964102832302e-06, "loss": 24.2757, "step": 2192 }, { "epoch": 0.20352668213457076, "grad_norm": 37.251243591308594, "learning_rate": 9.370218737125431e-06, "loss": 24.2795, "step": 2193 }, { "epoch": 0.20361948955916473, "grad_norm": 36.04050064086914, "learning_rate": 9.369472959755061e-06, "loss": 24.8464, "step": 2194 }, { "epoch": 0.2037122969837587, "grad_norm": 40.049068450927734, "learning_rate": 9.36872677079144e-06, "loss": 24.0161, "step": 2195 }, { "epoch": 0.20380510440835267, "grad_norm": 37.873680114746094, "learning_rate": 9.367980170304857e-06, "loss": 25.77, "step": 2196 }, { "epoch": 0.20389791183294664, "grad_norm": 36.09784698486328, "learning_rate": 9.367233158365643e-06, "loss": 25.1719, "step": 2197 }, { "epoch": 0.20399071925754061, "grad_norm": 45.372840881347656, "learning_rate": 9.366485735044161e-06, "loss": 23.2525, "step": 2198 }, { "epoch": 0.20408352668213456, "grad_norm": 35.79780197143555, "learning_rate": 9.365737900410821e-06, "loss": 23.3494, "step": 2199 }, { "epoch": 0.20417633410672853, "grad_norm": 35.53105545043945, "learning_rate": 9.364989654536063e-06, "loss": 24.4403, "step": 2200 }, { "epoch": 0.2042691415313225, "grad_norm": 35.49892807006836, "learning_rate": 9.36424099749037e-06, "loss": 25.559, "step": 2201 }, { "epoch": 0.20436194895591647, "grad_norm": 36.84992980957031, "learning_rate": 9.363491929344266e-06, "loss": 25.6792, "step": 2202 }, { "epoch": 0.20445475638051044, "grad_norm": 36.041080474853516, "learning_rate": 9.36274245016831e-06, "loss": 22.4222, "step": 2203 }, { "epoch": 0.20454756380510442, "grad_norm": 38.92860412597656, "learning_rate": 9.3619925600331e-06, "loss": 26.6727, "step": 2204 }, { "epoch": 0.2046403712296984, "grad_norm": 35.29120635986328, "learning_rate": 9.361242259009276e-06, "loss": 24.7378, "step": 2205 }, { "epoch": 0.20473317865429233, "grad_norm": 41.47079849243164, "learning_rate": 9.360491547167512e-06, "loss": 24.0556, "step": 2206 }, { "epoch": 0.2048259860788863, "grad_norm": 39.817108154296875, "learning_rate": 9.359740424578527e-06, "loss": 25.0478, "step": 2207 }, { "epoch": 0.20491879350348027, "grad_norm": 37.08626937866211, "learning_rate": 9.35898889131307e-06, "loss": 25.262, "step": 2208 }, { "epoch": 0.20501160092807424, "grad_norm": 38.089962005615234, "learning_rate": 9.358236947441937e-06, "loss": 25.6447, "step": 2209 }, { "epoch": 0.20510440835266822, "grad_norm": 37.44230270385742, "learning_rate": 9.357484593035957e-06, "loss": 24.4263, "step": 2210 }, { "epoch": 0.2051972157772622, "grad_norm": 38.49467468261719, "learning_rate": 9.356731828166001e-06, "loss": 25.7737, "step": 2211 }, { "epoch": 0.20529002320185616, "grad_norm": 39.60102081298828, "learning_rate": 9.355978652902979e-06, "loss": 24.5346, "step": 2212 }, { "epoch": 0.2053828306264501, "grad_norm": 35.706443786621094, "learning_rate": 9.355225067317835e-06, "loss": 23.7055, "step": 2213 }, { "epoch": 0.20547563805104407, "grad_norm": 37.693695068359375, "learning_rate": 9.354471071481557e-06, "loss": 23.2502, "step": 2214 }, { "epoch": 0.20556844547563805, "grad_norm": 38.71403884887695, "learning_rate": 9.353716665465169e-06, "loss": 24.1644, "step": 2215 }, { "epoch": 0.20566125290023202, "grad_norm": 36.419525146484375, "learning_rate": 9.352961849339735e-06, "loss": 25.0553, "step": 2216 }, { "epoch": 0.205754060324826, "grad_norm": 39.11294937133789, "learning_rate": 9.352206623176354e-06, "loss": 25.6818, "step": 2217 }, { "epoch": 0.20584686774941996, "grad_norm": 37.978946685791016, "learning_rate": 9.351450987046169e-06, "loss": 26.1697, "step": 2218 }, { "epoch": 0.20593967517401393, "grad_norm": 37.78565979003906, "learning_rate": 9.350694941020357e-06, "loss": 25.2357, "step": 2219 }, { "epoch": 0.2060324825986079, "grad_norm": 36.140384674072266, "learning_rate": 9.349938485170139e-06, "loss": 25.5905, "step": 2220 }, { "epoch": 0.20612529002320185, "grad_norm": 35.8546257019043, "learning_rate": 9.349181619566769e-06, "loss": 23.9683, "step": 2221 }, { "epoch": 0.20621809744779582, "grad_norm": 36.762550354003906, "learning_rate": 9.348424344281541e-06, "loss": 23.2729, "step": 2222 }, { "epoch": 0.2063109048723898, "grad_norm": 41.19538116455078, "learning_rate": 9.34766665938579e-06, "loss": 25.0793, "step": 2223 }, { "epoch": 0.20640371229698376, "grad_norm": 34.39731216430664, "learning_rate": 9.346908564950886e-06, "loss": 22.7024, "step": 2224 }, { "epoch": 0.20649651972157773, "grad_norm": 39.08955383300781, "learning_rate": 9.346150061048243e-06, "loss": 24.9811, "step": 2225 }, { "epoch": 0.2065893271461717, "grad_norm": 36.29082107543945, "learning_rate": 9.345391147749305e-06, "loss": 24.1915, "step": 2226 }, { "epoch": 0.20668213457076567, "grad_norm": 39.520381927490234, "learning_rate": 9.344631825125565e-06, "loss": 23.5881, "step": 2227 }, { "epoch": 0.20677494199535962, "grad_norm": 36.841644287109375, "learning_rate": 9.343872093248546e-06, "loss": 25.2547, "step": 2228 }, { "epoch": 0.2068677494199536, "grad_norm": 41.28740692138672, "learning_rate": 9.343111952189815e-06, "loss": 24.2088, "step": 2229 }, { "epoch": 0.20696055684454756, "grad_norm": 44.93677520751953, "learning_rate": 9.342351402020973e-06, "loss": 24.5718, "step": 2230 }, { "epoch": 0.20705336426914153, "grad_norm": 40.47905349731445, "learning_rate": 9.341590442813663e-06, "loss": 22.8144, "step": 2231 }, { "epoch": 0.2071461716937355, "grad_norm": 38.59719467163086, "learning_rate": 9.340829074639566e-06, "loss": 23.8066, "step": 2232 }, { "epoch": 0.20723897911832947, "grad_norm": 34.29934310913086, "learning_rate": 9.3400672975704e-06, "loss": 25.1003, "step": 2233 }, { "epoch": 0.20733178654292345, "grad_norm": 36.37393569946289, "learning_rate": 9.339305111677924e-06, "loss": 26.234, "step": 2234 }, { "epoch": 0.2074245939675174, "grad_norm": 104.05065155029297, "learning_rate": 9.338542517033931e-06, "loss": 26.7298, "step": 2235 }, { "epoch": 0.20751740139211136, "grad_norm": 41.70826721191406, "learning_rate": 9.337779513710258e-06, "loss": 25.266, "step": 2236 }, { "epoch": 0.20761020881670533, "grad_norm": 42.694557189941406, "learning_rate": 9.337016101778777e-06, "loss": 25.6304, "step": 2237 }, { "epoch": 0.2077030162412993, "grad_norm": 37.60780334472656, "learning_rate": 9.336252281311401e-06, "loss": 24.6879, "step": 2238 }, { "epoch": 0.20779582366589328, "grad_norm": 37.33119583129883, "learning_rate": 9.335488052380076e-06, "loss": 23.7242, "step": 2239 }, { "epoch": 0.20788863109048725, "grad_norm": 42.90601348876953, "learning_rate": 9.334723415056797e-06, "loss": 26.1224, "step": 2240 }, { "epoch": 0.20798143851508122, "grad_norm": 58.4874382019043, "learning_rate": 9.333958369413585e-06, "loss": 25.2691, "step": 2241 }, { "epoch": 0.20807424593967516, "grad_norm": 44.735679626464844, "learning_rate": 9.333192915522507e-06, "loss": 24.0784, "step": 2242 }, { "epoch": 0.20816705336426913, "grad_norm": 39.64402770996094, "learning_rate": 9.332427053455668e-06, "loss": 25.5723, "step": 2243 }, { "epoch": 0.2082598607888631, "grad_norm": 37.6245002746582, "learning_rate": 9.331660783285208e-06, "loss": 24.031, "step": 2244 }, { "epoch": 0.20835266821345708, "grad_norm": 40.35054397583008, "learning_rate": 9.330894105083312e-06, "loss": 24.3727, "step": 2245 }, { "epoch": 0.20844547563805105, "grad_norm": 41.058223724365234, "learning_rate": 9.330127018922195e-06, "loss": 24.8189, "step": 2246 }, { "epoch": 0.20853828306264502, "grad_norm": 36.56718063354492, "learning_rate": 9.329359524874114e-06, "loss": 24.1256, "step": 2247 }, { "epoch": 0.208631090487239, "grad_norm": 38.01142883300781, "learning_rate": 9.328591623011369e-06, "loss": 24.1314, "step": 2248 }, { "epoch": 0.20872389791183293, "grad_norm": 42.198463439941406, "learning_rate": 9.32782331340629e-06, "loss": 24.7465, "step": 2249 }, { "epoch": 0.2088167053364269, "grad_norm": 62.20770263671875, "learning_rate": 9.327054596131255e-06, "loss": 23.5837, "step": 2250 }, { "epoch": 0.20890951276102088, "grad_norm": 39.525062561035156, "learning_rate": 9.32628547125867e-06, "loss": 24.7237, "step": 2251 }, { "epoch": 0.20900232018561485, "grad_norm": 42.17083740234375, "learning_rate": 9.325515938860989e-06, "loss": 24.8116, "step": 2252 }, { "epoch": 0.20909512761020882, "grad_norm": 36.164031982421875, "learning_rate": 9.324745999010696e-06, "loss": 25.3091, "step": 2253 }, { "epoch": 0.2091879350348028, "grad_norm": 40.8089599609375, "learning_rate": 9.32397565178032e-06, "loss": 25.236, "step": 2254 }, { "epoch": 0.20928074245939676, "grad_norm": 38.888267517089844, "learning_rate": 9.323204897242424e-06, "loss": 24.5783, "step": 2255 }, { "epoch": 0.2093735498839907, "grad_norm": 36.73490524291992, "learning_rate": 9.322433735469614e-06, "loss": 25.038, "step": 2256 }, { "epoch": 0.20946635730858468, "grad_norm": 38.449607849121094, "learning_rate": 9.321662166534528e-06, "loss": 25.0105, "step": 2257 }, { "epoch": 0.20955916473317865, "grad_norm": 36.742950439453125, "learning_rate": 9.320890190509848e-06, "loss": 23.9829, "step": 2258 }, { "epoch": 0.20965197215777262, "grad_norm": 56.489402770996094, "learning_rate": 9.320117807468291e-06, "loss": 23.4232, "step": 2259 }, { "epoch": 0.2097447795823666, "grad_norm": 41.42428207397461, "learning_rate": 9.319345017482617e-06, "loss": 24.4155, "step": 2260 }, { "epoch": 0.20983758700696056, "grad_norm": 39.890377044677734, "learning_rate": 9.318571820625614e-06, "loss": 25.4006, "step": 2261 }, { "epoch": 0.20993039443155453, "grad_norm": 45.48017883300781, "learning_rate": 9.317798216970122e-06, "loss": 25.0409, "step": 2262 }, { "epoch": 0.21002320185614848, "grad_norm": 39.49263000488281, "learning_rate": 9.31702420658901e-06, "loss": 22.9006, "step": 2263 }, { "epoch": 0.21011600928074245, "grad_norm": 37.66644287109375, "learning_rate": 9.316249789555185e-06, "loss": 25.3975, "step": 2264 }, { "epoch": 0.21020881670533642, "grad_norm": 38.852760314941406, "learning_rate": 9.315474965941599e-06, "loss": 24.1835, "step": 2265 }, { "epoch": 0.2103016241299304, "grad_norm": 40.430110931396484, "learning_rate": 9.314699735821236e-06, "loss": 23.929, "step": 2266 }, { "epoch": 0.21039443155452436, "grad_norm": 40.262672424316406, "learning_rate": 9.313924099267123e-06, "loss": 25.4471, "step": 2267 }, { "epoch": 0.21048723897911834, "grad_norm": 35.55903625488281, "learning_rate": 9.313148056352321e-06, "loss": 24.8411, "step": 2268 }, { "epoch": 0.2105800464037123, "grad_norm": 43.17412567138672, "learning_rate": 9.312371607149933e-06, "loss": 23.6224, "step": 2269 }, { "epoch": 0.21067285382830628, "grad_norm": 41.386444091796875, "learning_rate": 9.311594751733097e-06, "loss": 22.6175, "step": 2270 }, { "epoch": 0.21076566125290022, "grad_norm": 38.691734313964844, "learning_rate": 9.31081749017499e-06, "loss": 23.3566, "step": 2271 }, { "epoch": 0.2108584686774942, "grad_norm": 41.134674072265625, "learning_rate": 9.31003982254883e-06, "loss": 25.9961, "step": 2272 }, { "epoch": 0.21095127610208816, "grad_norm": 38.14846420288086, "learning_rate": 9.309261748927873e-06, "loss": 23.8374, "step": 2273 }, { "epoch": 0.21104408352668214, "grad_norm": 35.359981536865234, "learning_rate": 9.308483269385406e-06, "loss": 23.4133, "step": 2274 }, { "epoch": 0.2111368909512761, "grad_norm": 40.541542053222656, "learning_rate": 9.307704383994765e-06, "loss": 25.9788, "step": 2275 }, { "epoch": 0.21122969837587008, "grad_norm": 50.92201614379883, "learning_rate": 9.306925092829318e-06, "loss": 26.4001, "step": 2276 }, { "epoch": 0.21132250580046405, "grad_norm": 35.421844482421875, "learning_rate": 9.306145395962468e-06, "loss": 24.5035, "step": 2277 }, { "epoch": 0.211415313225058, "grad_norm": 34.872310638427734, "learning_rate": 9.305365293467666e-06, "loss": 22.6034, "step": 2278 }, { "epoch": 0.21150812064965197, "grad_norm": 38.28864669799805, "learning_rate": 9.304584785418394e-06, "loss": 23.1648, "step": 2279 }, { "epoch": 0.21160092807424594, "grad_norm": 37.495147705078125, "learning_rate": 9.303803871888172e-06, "loss": 23.9412, "step": 2280 }, { "epoch": 0.2116937354988399, "grad_norm": 40.162689208984375, "learning_rate": 9.303022552950563e-06, "loss": 25.036, "step": 2281 }, { "epoch": 0.21178654292343388, "grad_norm": 37.78753662109375, "learning_rate": 9.302240828679163e-06, "loss": 23.9202, "step": 2282 }, { "epoch": 0.21187935034802785, "grad_norm": 36.457767486572266, "learning_rate": 9.301458699147607e-06, "loss": 24.6095, "step": 2283 }, { "epoch": 0.21197215777262182, "grad_norm": 41.45712661743164, "learning_rate": 9.300676164429575e-06, "loss": 24.2414, "step": 2284 }, { "epoch": 0.21206496519721577, "grad_norm": 37.34781265258789, "learning_rate": 9.299893224598775e-06, "loss": 25.6753, "step": 2285 }, { "epoch": 0.21215777262180974, "grad_norm": 38.59156036376953, "learning_rate": 9.29910987972896e-06, "loss": 25.9583, "step": 2286 }, { "epoch": 0.2122505800464037, "grad_norm": 39.288028717041016, "learning_rate": 9.298326129893917e-06, "loss": 25.8717, "step": 2287 }, { "epoch": 0.21234338747099768, "grad_norm": 46.744232177734375, "learning_rate": 9.297541975167476e-06, "loss": 23.7344, "step": 2288 }, { "epoch": 0.21243619489559165, "grad_norm": 37.65336608886719, "learning_rate": 9.2967574156235e-06, "loss": 25.5253, "step": 2289 }, { "epoch": 0.21252900232018562, "grad_norm": 58.51715850830078, "learning_rate": 9.295972451335896e-06, "loss": 24.9566, "step": 2290 }, { "epoch": 0.2126218097447796, "grad_norm": 42.86698913574219, "learning_rate": 9.295187082378602e-06, "loss": 25.4158, "step": 2291 }, { "epoch": 0.21271461716937354, "grad_norm": 38.477603912353516, "learning_rate": 9.2944013088256e-06, "loss": 25.3764, "step": 2292 }, { "epoch": 0.2128074245939675, "grad_norm": 36.22773361206055, "learning_rate": 9.293615130750906e-06, "loss": 24.0083, "step": 2293 }, { "epoch": 0.21290023201856148, "grad_norm": 40.94712448120117, "learning_rate": 9.292828548228577e-06, "loss": 23.5615, "step": 2294 }, { "epoch": 0.21299303944315545, "grad_norm": 37.07999801635742, "learning_rate": 9.292041561332707e-06, "loss": 22.4001, "step": 2295 }, { "epoch": 0.21308584686774942, "grad_norm": 67.98808288574219, "learning_rate": 9.291254170137428e-06, "loss": 24.8583, "step": 2296 }, { "epoch": 0.2131786542923434, "grad_norm": 37.47377395629883, "learning_rate": 9.29046637471691e-06, "loss": 23.8043, "step": 2297 }, { "epoch": 0.21327146171693737, "grad_norm": 34.3560905456543, "learning_rate": 9.289678175145363e-06, "loss": 22.6972, "step": 2298 }, { "epoch": 0.2133642691415313, "grad_norm": 35.493385314941406, "learning_rate": 9.288889571497031e-06, "loss": 24.7789, "step": 2299 }, { "epoch": 0.21345707656612528, "grad_norm": 38.912628173828125, "learning_rate": 9.2881005638462e-06, "loss": 25.1482, "step": 2300 }, { "epoch": 0.21354988399071925, "grad_norm": 37.080535888671875, "learning_rate": 9.287311152267193e-06, "loss": 24.0059, "step": 2301 }, { "epoch": 0.21364269141531322, "grad_norm": 35.86113739013672, "learning_rate": 9.28652133683437e-06, "loss": 24.6561, "step": 2302 }, { "epoch": 0.2137354988399072, "grad_norm": 34.86043930053711, "learning_rate": 9.285731117622127e-06, "loss": 25.8955, "step": 2303 }, { "epoch": 0.21382830626450117, "grad_norm": 34.14603042602539, "learning_rate": 9.284940494704906e-06, "loss": 24.1778, "step": 2304 }, { "epoch": 0.21392111368909514, "grad_norm": 38.03984451293945, "learning_rate": 9.284149468157177e-06, "loss": 22.7218, "step": 2305 }, { "epoch": 0.21401392111368908, "grad_norm": 37.32996368408203, "learning_rate": 9.283358038053453e-06, "loss": 24.4632, "step": 2306 }, { "epoch": 0.21410672853828305, "grad_norm": 34.947784423828125, "learning_rate": 9.282566204468289e-06, "loss": 24.0777, "step": 2307 }, { "epoch": 0.21419953596287702, "grad_norm": 40.23040771484375, "learning_rate": 9.281773967476268e-06, "loss": 25.4449, "step": 2308 }, { "epoch": 0.214292343387471, "grad_norm": 38.47694396972656, "learning_rate": 9.280981327152022e-06, "loss": 25.3493, "step": 2309 }, { "epoch": 0.21438515081206497, "grad_norm": 34.40301513671875, "learning_rate": 9.28018828357021e-06, "loss": 23.9944, "step": 2310 }, { "epoch": 0.21447795823665894, "grad_norm": 39.330848693847656, "learning_rate": 9.27939483680554e-06, "loss": 23.4556, "step": 2311 }, { "epoch": 0.2145707656612529, "grad_norm": 37.74452209472656, "learning_rate": 9.27860098693275e-06, "loss": 23.5374, "step": 2312 }, { "epoch": 0.21466357308584688, "grad_norm": 37.50822067260742, "learning_rate": 9.27780673402662e-06, "loss": 25.2888, "step": 2313 }, { "epoch": 0.21475638051044083, "grad_norm": 37.86259078979492, "learning_rate": 9.277012078161965e-06, "loss": 24.5647, "step": 2314 }, { "epoch": 0.2148491879350348, "grad_norm": 37.87661361694336, "learning_rate": 9.276217019413639e-06, "loss": 23.6341, "step": 2315 }, { "epoch": 0.21494199535962877, "grad_norm": 35.89344787597656, "learning_rate": 9.275421557856536e-06, "loss": 23.3848, "step": 2316 }, { "epoch": 0.21503480278422274, "grad_norm": 37.91417694091797, "learning_rate": 9.27462569356559e-06, "loss": 24.3685, "step": 2317 }, { "epoch": 0.2151276102088167, "grad_norm": 35.55136489868164, "learning_rate": 9.27382942661576e-06, "loss": 23.6433, "step": 2318 }, { "epoch": 0.21522041763341068, "grad_norm": 38.37742233276367, "learning_rate": 9.273032757082062e-06, "loss": 23.672, "step": 2319 }, { "epoch": 0.21531322505800465, "grad_norm": 39.662410736083984, "learning_rate": 9.272235685039534e-06, "loss": 26.1408, "step": 2320 }, { "epoch": 0.2154060324825986, "grad_norm": 38.899959564208984, "learning_rate": 9.271438210563263e-06, "loss": 24.7998, "step": 2321 }, { "epoch": 0.21549883990719257, "grad_norm": 39.82229995727539, "learning_rate": 9.270640333728364e-06, "loss": 24.9474, "step": 2322 }, { "epoch": 0.21559164733178654, "grad_norm": 39.61211395263672, "learning_rate": 9.269842054609998e-06, "loss": 25.618, "step": 2323 }, { "epoch": 0.2156844547563805, "grad_norm": 38.3154411315918, "learning_rate": 9.269043373283362e-06, "loss": 25.5188, "step": 2324 }, { "epoch": 0.21577726218097448, "grad_norm": 37.76799392700195, "learning_rate": 9.268244289823688e-06, "loss": 23.5058, "step": 2325 }, { "epoch": 0.21587006960556845, "grad_norm": 36.235286712646484, "learning_rate": 9.267444804306246e-06, "loss": 24.1046, "step": 2326 }, { "epoch": 0.21596287703016243, "grad_norm": 44.96757888793945, "learning_rate": 9.266644916806347e-06, "loss": 25.7951, "step": 2327 }, { "epoch": 0.21605568445475637, "grad_norm": 39.927696228027344, "learning_rate": 9.26584462739934e-06, "loss": 24.1978, "step": 2328 }, { "epoch": 0.21614849187935034, "grad_norm": 38.74665451049805, "learning_rate": 9.265043936160606e-06, "loss": 25.2088, "step": 2329 }, { "epoch": 0.2162412993039443, "grad_norm": 39.684444427490234, "learning_rate": 9.264242843165574e-06, "loss": 24.3047, "step": 2330 }, { "epoch": 0.21633410672853828, "grad_norm": 38.697898864746094, "learning_rate": 9.2634413484897e-06, "loss": 23.0732, "step": 2331 }, { "epoch": 0.21642691415313225, "grad_norm": 39.422943115234375, "learning_rate": 9.262639452208487e-06, "loss": 22.5541, "step": 2332 }, { "epoch": 0.21651972157772623, "grad_norm": 45.2515983581543, "learning_rate": 9.261837154397468e-06, "loss": 23.3618, "step": 2333 }, { "epoch": 0.2166125290023202, "grad_norm": 37.48080825805664, "learning_rate": 9.261034455132217e-06, "loss": 26.3325, "step": 2334 }, { "epoch": 0.21670533642691414, "grad_norm": 39.90271759033203, "learning_rate": 9.26023135448835e-06, "loss": 25.623, "step": 2335 }, { "epoch": 0.2167981438515081, "grad_norm": 36.08759307861328, "learning_rate": 9.259427852541515e-06, "loss": 24.3145, "step": 2336 }, { "epoch": 0.21689095127610208, "grad_norm": 37.14887237548828, "learning_rate": 9.258623949367396e-06, "loss": 23.889, "step": 2337 }, { "epoch": 0.21698375870069606, "grad_norm": 38.320735931396484, "learning_rate": 9.257819645041725e-06, "loss": 25.9084, "step": 2338 }, { "epoch": 0.21707656612529003, "grad_norm": 38.33339309692383, "learning_rate": 9.257014939640263e-06, "loss": 25.9707, "step": 2339 }, { "epoch": 0.217169373549884, "grad_norm": 36.82762908935547, "learning_rate": 9.25620983323881e-06, "loss": 24.6815, "step": 2340 }, { "epoch": 0.21726218097447797, "grad_norm": 34.81380081176758, "learning_rate": 9.255404325913206e-06, "loss": 24.126, "step": 2341 }, { "epoch": 0.2173549883990719, "grad_norm": 38.749637603759766, "learning_rate": 9.254598417739328e-06, "loss": 24.6584, "step": 2342 }, { "epoch": 0.21744779582366588, "grad_norm": 35.41460418701172, "learning_rate": 9.253792108793092e-06, "loss": 23.4326, "step": 2343 }, { "epoch": 0.21754060324825986, "grad_norm": 58.007057189941406, "learning_rate": 9.252985399150444e-06, "loss": 25.2893, "step": 2344 }, { "epoch": 0.21763341067285383, "grad_norm": 37.944244384765625, "learning_rate": 9.252178288887382e-06, "loss": 25.0204, "step": 2345 }, { "epoch": 0.2177262180974478, "grad_norm": 39.704044342041016, "learning_rate": 9.251370778079929e-06, "loss": 24.8333, "step": 2346 }, { "epoch": 0.21781902552204177, "grad_norm": 38.63351821899414, "learning_rate": 9.250562866804152e-06, "loss": 25.3158, "step": 2347 }, { "epoch": 0.21791183294663574, "grad_norm": 38.432640075683594, "learning_rate": 9.249754555136154e-06, "loss": 26.1502, "step": 2348 }, { "epoch": 0.21800464037122969, "grad_norm": 38.46434783935547, "learning_rate": 9.248945843152075e-06, "loss": 23.6465, "step": 2349 }, { "epoch": 0.21809744779582366, "grad_norm": 36.92293167114258, "learning_rate": 9.248136730928095e-06, "loss": 24.6624, "step": 2350 }, { "epoch": 0.21819025522041763, "grad_norm": 37.817596435546875, "learning_rate": 9.247327218540429e-06, "loss": 23.6222, "step": 2351 }, { "epoch": 0.2182830626450116, "grad_norm": 37.656063079833984, "learning_rate": 9.246517306065332e-06, "loss": 23.99, "step": 2352 }, { "epoch": 0.21837587006960557, "grad_norm": 36.0504264831543, "learning_rate": 9.245706993579095e-06, "loss": 23.9845, "step": 2353 }, { "epoch": 0.21846867749419954, "grad_norm": 36.95934295654297, "learning_rate": 9.24489628115805e-06, "loss": 27.0588, "step": 2354 }, { "epoch": 0.2185614849187935, "grad_norm": 38.57282638549805, "learning_rate": 9.244085168878558e-06, "loss": 24.7593, "step": 2355 }, { "epoch": 0.21865429234338746, "grad_norm": 38.31361770629883, "learning_rate": 9.24327365681703e-06, "loss": 24.9537, "step": 2356 }, { "epoch": 0.21874709976798143, "grad_norm": 41.67979431152344, "learning_rate": 9.242461745049904e-06, "loss": 24.2424, "step": 2357 }, { "epoch": 0.2188399071925754, "grad_norm": 36.03948974609375, "learning_rate": 9.241649433653663e-06, "loss": 26.4139, "step": 2358 }, { "epoch": 0.21893271461716937, "grad_norm": 38.55380630493164, "learning_rate": 9.240836722704824e-06, "loss": 23.9159, "step": 2359 }, { "epoch": 0.21902552204176334, "grad_norm": 39.70395278930664, "learning_rate": 9.240023612279942e-06, "loss": 25.7158, "step": 2360 }, { "epoch": 0.21911832946635731, "grad_norm": 37.66395950317383, "learning_rate": 9.23921010245561e-06, "loss": 23.3141, "step": 2361 }, { "epoch": 0.21921113689095129, "grad_norm": 138.5194549560547, "learning_rate": 9.238396193308457e-06, "loss": 24.428, "step": 2362 }, { "epoch": 0.21930394431554526, "grad_norm": 33.99930191040039, "learning_rate": 9.237581884915153e-06, "loss": 25.0251, "step": 2363 }, { "epoch": 0.2193967517401392, "grad_norm": 36.81308364868164, "learning_rate": 9.236767177352403e-06, "loss": 24.2588, "step": 2364 }, { "epoch": 0.21948955916473317, "grad_norm": 36.85218048095703, "learning_rate": 9.235952070696952e-06, "loss": 22.7581, "step": 2365 }, { "epoch": 0.21958236658932714, "grad_norm": 34.485408782958984, "learning_rate": 9.235136565025578e-06, "loss": 24.0269, "step": 2366 }, { "epoch": 0.21967517401392112, "grad_norm": 236.53933715820312, "learning_rate": 9.234320660415104e-06, "loss": 23.0725, "step": 2367 }, { "epoch": 0.2197679814385151, "grad_norm": 37.149227142333984, "learning_rate": 9.233504356942379e-06, "loss": 24.1276, "step": 2368 }, { "epoch": 0.21986078886310906, "grad_norm": 38.69004821777344, "learning_rate": 9.232687654684304e-06, "loss": 25.2881, "step": 2369 }, { "epoch": 0.21995359628770303, "grad_norm": 34.7833366394043, "learning_rate": 9.231870553717808e-06, "loss": 23.7959, "step": 2370 }, { "epoch": 0.22004640371229697, "grad_norm": 36.98299789428711, "learning_rate": 9.23105305411986e-06, "loss": 23.1303, "step": 2371 }, { "epoch": 0.22013921113689094, "grad_norm": 39.13471221923828, "learning_rate": 9.230235155967462e-06, "loss": 25.5998, "step": 2372 }, { "epoch": 0.22023201856148492, "grad_norm": 41.86396026611328, "learning_rate": 9.229416859337663e-06, "loss": 25.9992, "step": 2373 }, { "epoch": 0.2203248259860789, "grad_norm": 35.6318473815918, "learning_rate": 9.228598164307544e-06, "loss": 23.3902, "step": 2374 }, { "epoch": 0.22041763341067286, "grad_norm": 35.93389129638672, "learning_rate": 9.227779070954221e-06, "loss": 23.6588, "step": 2375 }, { "epoch": 0.22051044083526683, "grad_norm": 34.22860336303711, "learning_rate": 9.226959579354855e-06, "loss": 24.2551, "step": 2376 }, { "epoch": 0.2206032482598608, "grad_norm": 35.48367691040039, "learning_rate": 9.226139689586635e-06, "loss": 23.7851, "step": 2377 }, { "epoch": 0.22069605568445474, "grad_norm": 36.94647979736328, "learning_rate": 9.225319401726798e-06, "loss": 24.0747, "step": 2378 }, { "epoch": 0.22078886310904872, "grad_norm": 37.0545768737793, "learning_rate": 9.224498715852608e-06, "loss": 21.6597, "step": 2379 }, { "epoch": 0.2208816705336427, "grad_norm": 33.15138244628906, "learning_rate": 9.223677632041374e-06, "loss": 21.8541, "step": 2380 }, { "epoch": 0.22097447795823666, "grad_norm": 49.050941467285156, "learning_rate": 9.22285615037044e-06, "loss": 24.792, "step": 2381 }, { "epoch": 0.22106728538283063, "grad_norm": 35.800682067871094, "learning_rate": 9.222034270917187e-06, "loss": 25.1994, "step": 2382 }, { "epoch": 0.2211600928074246, "grad_norm": 36.242767333984375, "learning_rate": 9.221211993759033e-06, "loss": 25.6576, "step": 2383 }, { "epoch": 0.22125290023201857, "grad_norm": 34.8701286315918, "learning_rate": 9.220389318973437e-06, "loss": 24.0636, "step": 2384 }, { "epoch": 0.22134570765661252, "grad_norm": 35.7218017578125, "learning_rate": 9.21956624663789e-06, "loss": 23.8485, "step": 2385 }, { "epoch": 0.2214385150812065, "grad_norm": 34.16299057006836, "learning_rate": 9.218742776829925e-06, "loss": 24.1323, "step": 2386 }, { "epoch": 0.22153132250580046, "grad_norm": 35.46421432495117, "learning_rate": 9.217918909627111e-06, "loss": 23.8378, "step": 2387 }, { "epoch": 0.22162412993039443, "grad_norm": 35.69295883178711, "learning_rate": 9.217094645107052e-06, "loss": 23.1421, "step": 2388 }, { "epoch": 0.2217169373549884, "grad_norm": 36.54906463623047, "learning_rate": 9.216269983347396e-06, "loss": 24.3358, "step": 2389 }, { "epoch": 0.22180974477958237, "grad_norm": 35.9185905456543, "learning_rate": 9.21544492442582e-06, "loss": 24.5758, "step": 2390 }, { "epoch": 0.22190255220417635, "grad_norm": 35.80988311767578, "learning_rate": 9.214619468420043e-06, "loss": 24.909, "step": 2391 }, { "epoch": 0.2219953596287703, "grad_norm": 290.73382568359375, "learning_rate": 9.213793615407823e-06, "loss": 26.0853, "step": 2392 }, { "epoch": 0.22208816705336426, "grad_norm": 39.05647277832031, "learning_rate": 9.212967365466949e-06, "loss": 24.155, "step": 2393 }, { "epoch": 0.22218097447795823, "grad_norm": 38.4732551574707, "learning_rate": 9.212140718675257e-06, "loss": 24.9974, "step": 2394 }, { "epoch": 0.2222737819025522, "grad_norm": 40.219627380371094, "learning_rate": 9.211313675110614e-06, "loss": 24.203, "step": 2395 }, { "epoch": 0.22236658932714617, "grad_norm": 35.914669036865234, "learning_rate": 9.21048623485092e-06, "loss": 25.8679, "step": 2396 }, { "epoch": 0.22245939675174015, "grad_norm": 37.99845504760742, "learning_rate": 9.209658397974123e-06, "loss": 24.3897, "step": 2397 }, { "epoch": 0.22255220417633412, "grad_norm": 106.12580108642578, "learning_rate": 9.208830164558204e-06, "loss": 23.8116, "step": 2398 }, { "epoch": 0.22264501160092806, "grad_norm": 36.508506774902344, "learning_rate": 9.208001534681178e-06, "loss": 21.7667, "step": 2399 }, { "epoch": 0.22273781902552203, "grad_norm": 38.4080696105957, "learning_rate": 9.207172508421099e-06, "loss": 25.6537, "step": 2400 }, { "epoch": 0.222830626450116, "grad_norm": 37.74574661254883, "learning_rate": 9.20634308585606e-06, "loss": 24.4079, "step": 2401 }, { "epoch": 0.22292343387470998, "grad_norm": 37.287635803222656, "learning_rate": 9.20551326706419e-06, "loss": 24.4435, "step": 2402 }, { "epoch": 0.22301624129930395, "grad_norm": 38.49264907836914, "learning_rate": 9.204683052123658e-06, "loss": 22.524, "step": 2403 }, { "epoch": 0.22310904872389792, "grad_norm": 41.50798416137695, "learning_rate": 9.203852441112665e-06, "loss": 26.4184, "step": 2404 }, { "epoch": 0.2232018561484919, "grad_norm": 40.58479690551758, "learning_rate": 9.203021434109458e-06, "loss": 23.9963, "step": 2405 }, { "epoch": 0.22329466357308583, "grad_norm": 32.41161346435547, "learning_rate": 9.20219003119231e-06, "loss": 23.496, "step": 2406 }, { "epoch": 0.2233874709976798, "grad_norm": 34.074615478515625, "learning_rate": 9.201358232439539e-06, "loss": 24.5504, "step": 2407 }, { "epoch": 0.22348027842227378, "grad_norm": 35.468265533447266, "learning_rate": 9.200526037929497e-06, "loss": 24.7081, "step": 2408 }, { "epoch": 0.22357308584686775, "grad_norm": 39.584041595458984, "learning_rate": 9.199693447740578e-06, "loss": 25.2465, "step": 2409 }, { "epoch": 0.22366589327146172, "grad_norm": 36.594505310058594, "learning_rate": 9.198860461951207e-06, "loss": 23.6703, "step": 2410 }, { "epoch": 0.2237587006960557, "grad_norm": 40.9487190246582, "learning_rate": 9.198027080639849e-06, "loss": 22.9073, "step": 2411 }, { "epoch": 0.22385150812064966, "grad_norm": 36.43324279785156, "learning_rate": 9.197193303885008e-06, "loss": 24.5838, "step": 2412 }, { "epoch": 0.22394431554524363, "grad_norm": 36.80506134033203, "learning_rate": 9.196359131765223e-06, "loss": 24.7675, "step": 2413 }, { "epoch": 0.22403712296983758, "grad_norm": 39.03681945800781, "learning_rate": 9.195524564359072e-06, "loss": 24.6873, "step": 2414 }, { "epoch": 0.22412993039443155, "grad_norm": 36.86278533935547, "learning_rate": 9.194689601745167e-06, "loss": 24.3338, "step": 2415 }, { "epoch": 0.22422273781902552, "grad_norm": 40.06242752075195, "learning_rate": 9.19385424400216e-06, "loss": 24.7928, "step": 2416 }, { "epoch": 0.2243155452436195, "grad_norm": 43.491031646728516, "learning_rate": 9.19301849120874e-06, "loss": 26.2928, "step": 2417 }, { "epoch": 0.22440835266821346, "grad_norm": 40.94717788696289, "learning_rate": 9.192182343443634e-06, "loss": 24.4135, "step": 2418 }, { "epoch": 0.22450116009280743, "grad_norm": 35.311866760253906, "learning_rate": 9.191345800785603e-06, "loss": 24.3698, "step": 2419 }, { "epoch": 0.2245939675174014, "grad_norm": 41.74917221069336, "learning_rate": 9.190508863313449e-06, "loss": 23.9198, "step": 2420 }, { "epoch": 0.22468677494199535, "grad_norm": 38.323150634765625, "learning_rate": 9.189671531106007e-06, "loss": 25.257, "step": 2421 }, { "epoch": 0.22477958236658932, "grad_norm": 32.52473831176758, "learning_rate": 9.188833804242155e-06, "loss": 21.5785, "step": 2422 }, { "epoch": 0.2248723897911833, "grad_norm": 34.45119094848633, "learning_rate": 9.1879956828008e-06, "loss": 24.1486, "step": 2423 }, { "epoch": 0.22496519721577726, "grad_norm": 39.05157470703125, "learning_rate": 9.187157166860894e-06, "loss": 24.7251, "step": 2424 }, { "epoch": 0.22505800464037123, "grad_norm": 35.71006774902344, "learning_rate": 9.186318256501424e-06, "loss": 24.338, "step": 2425 }, { "epoch": 0.2251508120649652, "grad_norm": 34.968082427978516, "learning_rate": 9.18547895180141e-06, "loss": 25.7785, "step": 2426 }, { "epoch": 0.22524361948955918, "grad_norm": 35.59681701660156, "learning_rate": 9.184639252839916e-06, "loss": 25.3089, "step": 2427 }, { "epoch": 0.22533642691415312, "grad_norm": 37.74614715576172, "learning_rate": 9.183799159696037e-06, "loss": 24.1453, "step": 2428 }, { "epoch": 0.2254292343387471, "grad_norm": 37.28047180175781, "learning_rate": 9.182958672448908e-06, "loss": 25.8359, "step": 2429 }, { "epoch": 0.22552204176334106, "grad_norm": 35.372474670410156, "learning_rate": 9.182117791177702e-06, "loss": 23.1922, "step": 2430 }, { "epoch": 0.22561484918793503, "grad_norm": 39.85996627807617, "learning_rate": 9.181276515961626e-06, "loss": 24.1048, "step": 2431 }, { "epoch": 0.225707656612529, "grad_norm": 35.24520492553711, "learning_rate": 9.180434846879928e-06, "loss": 23.8543, "step": 2432 }, { "epoch": 0.22580046403712298, "grad_norm": 36.63982391357422, "learning_rate": 9.17959278401189e-06, "loss": 24.3487, "step": 2433 }, { "epoch": 0.22589327146171695, "grad_norm": 38.00962448120117, "learning_rate": 9.178750327436831e-06, "loss": 25.4685, "step": 2434 }, { "epoch": 0.2259860788863109, "grad_norm": 34.23432922363281, "learning_rate": 9.17790747723411e-06, "loss": 24.7945, "step": 2435 }, { "epoch": 0.22607888631090486, "grad_norm": 38.62664794921875, "learning_rate": 9.177064233483121e-06, "loss": 23.8926, "step": 2436 }, { "epoch": 0.22617169373549884, "grad_norm": 36.52609634399414, "learning_rate": 9.176220596263297e-06, "loss": 24.2434, "step": 2437 }, { "epoch": 0.2262645011600928, "grad_norm": 38.84935760498047, "learning_rate": 9.175376565654102e-06, "loss": 23.5994, "step": 2438 }, { "epoch": 0.22635730858468678, "grad_norm": 37.79142379760742, "learning_rate": 9.174532141735047e-06, "loss": 23.8673, "step": 2439 }, { "epoch": 0.22645011600928075, "grad_norm": 43.13945007324219, "learning_rate": 9.17368732458567e-06, "loss": 24.7912, "step": 2440 }, { "epoch": 0.22654292343387472, "grad_norm": 37.773468017578125, "learning_rate": 9.172842114285552e-06, "loss": 25.371, "step": 2441 }, { "epoch": 0.22663573085846866, "grad_norm": 39.49556350708008, "learning_rate": 9.171996510914311e-06, "loss": 22.6686, "step": 2442 }, { "epoch": 0.22672853828306264, "grad_norm": 36.767250061035156, "learning_rate": 9.171150514551602e-06, "loss": 25.2609, "step": 2443 }, { "epoch": 0.2268213457076566, "grad_norm": 39.16618347167969, "learning_rate": 9.170304125277112e-06, "loss": 25.7594, "step": 2444 }, { "epoch": 0.22691415313225058, "grad_norm": 37.67116928100586, "learning_rate": 9.169457343170569e-06, "loss": 24.0794, "step": 2445 }, { "epoch": 0.22700696055684455, "grad_norm": 36.1450309753418, "learning_rate": 9.168610168311741e-06, "loss": 23.7254, "step": 2446 }, { "epoch": 0.22709976798143852, "grad_norm": 41.138816833496094, "learning_rate": 9.167762600780427e-06, "loss": 25.7689, "step": 2447 }, { "epoch": 0.2271925754060325, "grad_norm": 40.3867073059082, "learning_rate": 9.166914640656467e-06, "loss": 24.2086, "step": 2448 }, { "epoch": 0.22728538283062644, "grad_norm": 36.12367248535156, "learning_rate": 9.166066288019734e-06, "loss": 26.4108, "step": 2449 }, { "epoch": 0.2273781902552204, "grad_norm": 35.98212432861328, "learning_rate": 9.165217542950145e-06, "loss": 23.1837, "step": 2450 }, { "epoch": 0.22747099767981438, "grad_norm": 45.2501335144043, "learning_rate": 9.164368405527648e-06, "loss": 25.2983, "step": 2451 }, { "epoch": 0.22756380510440835, "grad_norm": 35.95292282104492, "learning_rate": 9.163518875832227e-06, "loss": 24.5579, "step": 2452 }, { "epoch": 0.22765661252900232, "grad_norm": 35.03255844116211, "learning_rate": 9.16266895394391e-06, "loss": 24.6726, "step": 2453 }, { "epoch": 0.2277494199535963, "grad_norm": 35.76792907714844, "learning_rate": 9.161818639942752e-06, "loss": 24.5269, "step": 2454 }, { "epoch": 0.22784222737819027, "grad_norm": 37.00077438354492, "learning_rate": 9.160967933908855e-06, "loss": 25.5709, "step": 2455 }, { "epoch": 0.22793503480278424, "grad_norm": 38.61100769042969, "learning_rate": 9.160116835922353e-06, "loss": 24.1506, "step": 2456 }, { "epoch": 0.22802784222737818, "grad_norm": 36.97569274902344, "learning_rate": 9.159265346063415e-06, "loss": 24.8453, "step": 2457 }, { "epoch": 0.22812064965197215, "grad_norm": 35.38863754272461, "learning_rate": 9.15841346441225e-06, "loss": 25.5399, "step": 2458 }, { "epoch": 0.22821345707656612, "grad_norm": 36.29985809326172, "learning_rate": 9.157561191049103e-06, "loss": 22.6833, "step": 2459 }, { "epoch": 0.2283062645011601, "grad_norm": 35.173763275146484, "learning_rate": 9.156708526054257e-06, "loss": 24.0861, "step": 2460 }, { "epoch": 0.22839907192575407, "grad_norm": 38.96932601928711, "learning_rate": 9.15585546950803e-06, "loss": 24.4511, "step": 2461 }, { "epoch": 0.22849187935034804, "grad_norm": 38.185123443603516, "learning_rate": 9.15500202149078e-06, "loss": 27.1456, "step": 2462 }, { "epoch": 0.228584686774942, "grad_norm": 45.86857604980469, "learning_rate": 9.154148182082894e-06, "loss": 23.5566, "step": 2463 }, { "epoch": 0.22867749419953595, "grad_norm": 38.36551284790039, "learning_rate": 9.153293951364808e-06, "loss": 25.1886, "step": 2464 }, { "epoch": 0.22877030162412992, "grad_norm": 38.66740798950195, "learning_rate": 9.152439329416984e-06, "loss": 23.733, "step": 2465 }, { "epoch": 0.2288631090487239, "grad_norm": 38.124874114990234, "learning_rate": 9.151584316319928e-06, "loss": 25.1509, "step": 2466 }, { "epoch": 0.22895591647331787, "grad_norm": 34.1057014465332, "learning_rate": 9.150728912154177e-06, "loss": 23.9781, "step": 2467 }, { "epoch": 0.22904872389791184, "grad_norm": 37.0975456237793, "learning_rate": 9.14987311700031e-06, "loss": 24.6214, "step": 2468 }, { "epoch": 0.2291415313225058, "grad_norm": 63.52012252807617, "learning_rate": 9.14901693093894e-06, "loss": 25.3895, "step": 2469 }, { "epoch": 0.22923433874709978, "grad_norm": 42.11527633666992, "learning_rate": 9.14816035405072e-06, "loss": 23.7432, "step": 2470 }, { "epoch": 0.22932714617169372, "grad_norm": 37.66804504394531, "learning_rate": 9.147303386416333e-06, "loss": 24.7114, "step": 2471 }, { "epoch": 0.2294199535962877, "grad_norm": 36.961334228515625, "learning_rate": 9.146446028116508e-06, "loss": 23.8453, "step": 2472 }, { "epoch": 0.22951276102088167, "grad_norm": 38.18310546875, "learning_rate": 9.145588279232002e-06, "loss": 25.4738, "step": 2473 }, { "epoch": 0.22960556844547564, "grad_norm": 39.65634536743164, "learning_rate": 9.144730139843614e-06, "loss": 24.3142, "step": 2474 }, { "epoch": 0.2296983758700696, "grad_norm": 39.067630767822266, "learning_rate": 9.14387161003218e-06, "loss": 23.9974, "step": 2475 }, { "epoch": 0.22979118329466358, "grad_norm": 38.26171875, "learning_rate": 9.143012689878569e-06, "loss": 25.2896, "step": 2476 }, { "epoch": 0.22988399071925755, "grad_norm": 37.99030685424805, "learning_rate": 9.14215337946369e-06, "loss": 25.1425, "step": 2477 }, { "epoch": 0.2299767981438515, "grad_norm": 50.76092529296875, "learning_rate": 9.141293678868488e-06, "loss": 23.4714, "step": 2478 }, { "epoch": 0.23006960556844547, "grad_norm": 47.375892639160156, "learning_rate": 9.140433588173944e-06, "loss": 25.4068, "step": 2479 }, { "epoch": 0.23016241299303944, "grad_norm": 36.22446060180664, "learning_rate": 9.139573107461079e-06, "loss": 23.6125, "step": 2480 }, { "epoch": 0.2302552204176334, "grad_norm": 38.48814392089844, "learning_rate": 9.138712236810944e-06, "loss": 24.9122, "step": 2481 }, { "epoch": 0.23034802784222738, "grad_norm": 40.969261169433594, "learning_rate": 9.137850976304637e-06, "loss": 24.1241, "step": 2482 }, { "epoch": 0.23044083526682135, "grad_norm": 35.29838943481445, "learning_rate": 9.136989326023279e-06, "loss": 24.8348, "step": 2483 }, { "epoch": 0.23053364269141532, "grad_norm": 36.89698791503906, "learning_rate": 9.136127286048038e-06, "loss": 22.3935, "step": 2484 }, { "epoch": 0.23062645011600927, "grad_norm": 37.93149948120117, "learning_rate": 9.135264856460118e-06, "loss": 25.2936, "step": 2485 }, { "epoch": 0.23071925754060324, "grad_norm": 38.857852935791016, "learning_rate": 9.134402037340757e-06, "loss": 25.8812, "step": 2486 }, { "epoch": 0.2308120649651972, "grad_norm": 36.205787658691406, "learning_rate": 9.133538828771229e-06, "loss": 23.7132, "step": 2487 }, { "epoch": 0.23090487238979118, "grad_norm": 37.93640899658203, "learning_rate": 9.132675230832848e-06, "loss": 24.752, "step": 2488 }, { "epoch": 0.23099767981438515, "grad_norm": 38.419349670410156, "learning_rate": 9.13181124360696e-06, "loss": 24.3481, "step": 2489 }, { "epoch": 0.23109048723897913, "grad_norm": 38.35081100463867, "learning_rate": 9.130946867174952e-06, "loss": 23.5721, "step": 2490 }, { "epoch": 0.2311832946635731, "grad_norm": 37.79486083984375, "learning_rate": 9.130082101618248e-06, "loss": 25.3841, "step": 2491 }, { "epoch": 0.23127610208816704, "grad_norm": 41.4738883972168, "learning_rate": 9.129216947018303e-06, "loss": 24.7436, "step": 2492 }, { "epoch": 0.231368909512761, "grad_norm": 56.9630241394043, "learning_rate": 9.128351403456614e-06, "loss": 26.2236, "step": 2493 }, { "epoch": 0.23146171693735498, "grad_norm": 38.0219841003418, "learning_rate": 9.127485471014714e-06, "loss": 23.634, "step": 2494 }, { "epoch": 0.23155452436194895, "grad_norm": 39.6534423828125, "learning_rate": 9.12661914977417e-06, "loss": 23.9632, "step": 2495 }, { "epoch": 0.23164733178654293, "grad_norm": 36.13579559326172, "learning_rate": 9.125752439816588e-06, "loss": 24.4982, "step": 2496 }, { "epoch": 0.2317401392111369, "grad_norm": 36.9925651550293, "learning_rate": 9.12488534122361e-06, "loss": 23.4898, "step": 2497 }, { "epoch": 0.23183294663573087, "grad_norm": 35.843177795410156, "learning_rate": 9.124017854076915e-06, "loss": 24.4336, "step": 2498 }, { "epoch": 0.2319257540603248, "grad_norm": 45.6388053894043, "learning_rate": 9.123149978458217e-06, "loss": 24.2285, "step": 2499 }, { "epoch": 0.23201856148491878, "grad_norm": 37.90568923950195, "learning_rate": 9.122281714449267e-06, "loss": 22.9542, "step": 2500 }, { "epoch": 0.23211136890951276, "grad_norm": 37.97878646850586, "learning_rate": 9.121413062131858e-06, "loss": 22.8239, "step": 2501 }, { "epoch": 0.23220417633410673, "grad_norm": 39.019927978515625, "learning_rate": 9.120544021587807e-06, "loss": 24.4996, "step": 2502 }, { "epoch": 0.2322969837587007, "grad_norm": 36.14314651489258, "learning_rate": 9.119674592898983e-06, "loss": 25.4745, "step": 2503 }, { "epoch": 0.23238979118329467, "grad_norm": 34.46970748901367, "learning_rate": 9.118804776147278e-06, "loss": 22.5878, "step": 2504 }, { "epoch": 0.23248259860788864, "grad_norm": 36.461307525634766, "learning_rate": 9.11793457141463e-06, "loss": 24.0776, "step": 2505 }, { "epoch": 0.2325754060324826, "grad_norm": 35.04119873046875, "learning_rate": 9.11706397878301e-06, "loss": 25.2539, "step": 2506 }, { "epoch": 0.23266821345707656, "grad_norm": 37.95665740966797, "learning_rate": 9.116192998334425e-06, "loss": 24.3011, "step": 2507 }, { "epoch": 0.23276102088167053, "grad_norm": 36.15116500854492, "learning_rate": 9.115321630150918e-06, "loss": 24.1991, "step": 2508 }, { "epoch": 0.2328538283062645, "grad_norm": 36.92684555053711, "learning_rate": 9.11444987431457e-06, "loss": 24.8071, "step": 2509 }, { "epoch": 0.23294663573085847, "grad_norm": 36.69866943359375, "learning_rate": 9.1135777309075e-06, "loss": 24.3092, "step": 2510 }, { "epoch": 0.23303944315545244, "grad_norm": 36.41432189941406, "learning_rate": 9.112705200011862e-06, "loss": 23.1448, "step": 2511 }, { "epoch": 0.2331322505800464, "grad_norm": 38.464569091796875, "learning_rate": 9.111832281709842e-06, "loss": 25.2588, "step": 2512 }, { "epoch": 0.23322505800464038, "grad_norm": 37.219547271728516, "learning_rate": 9.11095897608367e-06, "loss": 22.4794, "step": 2513 }, { "epoch": 0.23331786542923433, "grad_norm": 36.31865310668945, "learning_rate": 9.11008528321561e-06, "loss": 22.2422, "step": 2514 }, { "epoch": 0.2334106728538283, "grad_norm": 36.58049774169922, "learning_rate": 9.10921120318796e-06, "loss": 23.8895, "step": 2515 }, { "epoch": 0.23350348027842227, "grad_norm": 50.72226333618164, "learning_rate": 9.108336736083055e-06, "loss": 25.1116, "step": 2516 }, { "epoch": 0.23359628770301624, "grad_norm": 41.53886795043945, "learning_rate": 9.107461881983269e-06, "loss": 23.4021, "step": 2517 }, { "epoch": 0.2336890951276102, "grad_norm": 39.07709503173828, "learning_rate": 9.10658664097101e-06, "loss": 24.3058, "step": 2518 }, { "epoch": 0.23378190255220418, "grad_norm": 38.89939880371094, "learning_rate": 9.105711013128728e-06, "loss": 23.4056, "step": 2519 }, { "epoch": 0.23387470997679816, "grad_norm": 38.30149841308594, "learning_rate": 9.104834998538899e-06, "loss": 24.4223, "step": 2520 }, { "epoch": 0.2339675174013921, "grad_norm": 38.21685028076172, "learning_rate": 9.103958597284044e-06, "loss": 25.146, "step": 2521 }, { "epoch": 0.23406032482598607, "grad_norm": 36.44743347167969, "learning_rate": 9.103081809446716e-06, "loss": 25.0112, "step": 2522 }, { "epoch": 0.23415313225058004, "grad_norm": 40.235450744628906, "learning_rate": 9.10220463510951e-06, "loss": 23.652, "step": 2523 }, { "epoch": 0.23424593967517401, "grad_norm": 40.85832595825195, "learning_rate": 9.10132707435505e-06, "loss": 26.2769, "step": 2524 }, { "epoch": 0.23433874709976799, "grad_norm": 36.51659393310547, "learning_rate": 9.100449127266002e-06, "loss": 23.844, "step": 2525 }, { "epoch": 0.23443155452436196, "grad_norm": 35.84725570678711, "learning_rate": 9.099570793925065e-06, "loss": 24.6153, "step": 2526 }, { "epoch": 0.23452436194895593, "grad_norm": 36.07478713989258, "learning_rate": 9.098692074414976e-06, "loss": 23.3606, "step": 2527 }, { "epoch": 0.23461716937354987, "grad_norm": 41.00130081176758, "learning_rate": 9.09781296881851e-06, "loss": 23.404, "step": 2528 }, { "epoch": 0.23470997679814384, "grad_norm": 36.41563034057617, "learning_rate": 9.096933477218476e-06, "loss": 22.2497, "step": 2529 }, { "epoch": 0.23480278422273781, "grad_norm": 35.539527893066406, "learning_rate": 9.096053599697716e-06, "loss": 23.5854, "step": 2530 }, { "epoch": 0.2348955916473318, "grad_norm": 35.9107780456543, "learning_rate": 9.095173336339116e-06, "loss": 23.4532, "step": 2531 }, { "epoch": 0.23498839907192576, "grad_norm": 43.3946533203125, "learning_rate": 9.094292687225594e-06, "loss": 23.4941, "step": 2532 }, { "epoch": 0.23508120649651973, "grad_norm": 48.29473114013672, "learning_rate": 9.093411652440104e-06, "loss": 23.4356, "step": 2533 }, { "epoch": 0.2351740139211137, "grad_norm": 39.71271896362305, "learning_rate": 9.09253023206564e-06, "loss": 24.8417, "step": 2534 }, { "epoch": 0.23526682134570764, "grad_norm": 82.58686828613281, "learning_rate": 9.091648426185226e-06, "loss": 22.7287, "step": 2535 }, { "epoch": 0.23535962877030162, "grad_norm": 35.75199890136719, "learning_rate": 9.090766234881928e-06, "loss": 25.306, "step": 2536 }, { "epoch": 0.2354524361948956, "grad_norm": 36.782344818115234, "learning_rate": 9.089883658238844e-06, "loss": 27.913, "step": 2537 }, { "epoch": 0.23554524361948956, "grad_norm": 37.86561584472656, "learning_rate": 9.089000696339112e-06, "loss": 25.0479, "step": 2538 }, { "epoch": 0.23563805104408353, "grad_norm": 39.432884216308594, "learning_rate": 9.088117349265906e-06, "loss": 25.1425, "step": 2539 }, { "epoch": 0.2357308584686775, "grad_norm": 63.467735290527344, "learning_rate": 9.087233617102434e-06, "loss": 24.081, "step": 2540 }, { "epoch": 0.23582366589327147, "grad_norm": 37.955657958984375, "learning_rate": 9.08634949993194e-06, "loss": 25.8734, "step": 2541 }, { "epoch": 0.23591647331786542, "grad_norm": 37.671226501464844, "learning_rate": 9.085464997837707e-06, "loss": 24.7834, "step": 2542 }, { "epoch": 0.2360092807424594, "grad_norm": 36.043907165527344, "learning_rate": 9.084580110903052e-06, "loss": 24.4905, "step": 2543 }, { "epoch": 0.23610208816705336, "grad_norm": 39.04031753540039, "learning_rate": 9.08369483921133e-06, "loss": 25.395, "step": 2544 }, { "epoch": 0.23619489559164733, "grad_norm": 39.16593933105469, "learning_rate": 9.082809182845931e-06, "loss": 25.5392, "step": 2545 }, { "epoch": 0.2362877030162413, "grad_norm": 37.503135681152344, "learning_rate": 9.081923141890284e-06, "loss": 24.6262, "step": 2546 }, { "epoch": 0.23638051044083527, "grad_norm": 34.919837951660156, "learning_rate": 9.081036716427846e-06, "loss": 23.0859, "step": 2547 }, { "epoch": 0.23647331786542924, "grad_norm": 40.29941940307617, "learning_rate": 9.08014990654212e-06, "loss": 24.2943, "step": 2548 }, { "epoch": 0.2365661252900232, "grad_norm": 35.275230407714844, "learning_rate": 9.07926271231664e-06, "loss": 25.4011, "step": 2549 }, { "epoch": 0.23665893271461716, "grad_norm": 36.499366760253906, "learning_rate": 9.078375133834981e-06, "loss": 24.6134, "step": 2550 }, { "epoch": 0.23675174013921113, "grad_norm": 38.281463623046875, "learning_rate": 9.077487171180746e-06, "loss": 23.6308, "step": 2551 }, { "epoch": 0.2368445475638051, "grad_norm": 37.26796340942383, "learning_rate": 9.076598824437581e-06, "loss": 24.7471, "step": 2552 }, { "epoch": 0.23693735498839907, "grad_norm": 39.698028564453125, "learning_rate": 9.075710093689165e-06, "loss": 24.0639, "step": 2553 }, { "epoch": 0.23703016241299305, "grad_norm": 43.76485061645508, "learning_rate": 9.074820979019215e-06, "loss": 23.6114, "step": 2554 }, { "epoch": 0.23712296983758702, "grad_norm": 37.474605560302734, "learning_rate": 9.073931480511482e-06, "loss": 24.0218, "step": 2555 }, { "epoch": 0.237215777262181, "grad_norm": 37.1693115234375, "learning_rate": 9.073041598249757e-06, "loss": 26.7752, "step": 2556 }, { "epoch": 0.23730858468677493, "grad_norm": 37.01565170288086, "learning_rate": 9.072151332317863e-06, "loss": 24.7348, "step": 2557 }, { "epoch": 0.2374013921113689, "grad_norm": 37.970916748046875, "learning_rate": 9.07126068279966e-06, "loss": 24.4863, "step": 2558 }, { "epoch": 0.23749419953596287, "grad_norm": 35.58756637573242, "learning_rate": 9.070369649779047e-06, "loss": 25.4098, "step": 2559 }, { "epoch": 0.23758700696055685, "grad_norm": 37.59711456298828, "learning_rate": 9.069478233339957e-06, "loss": 24.1761, "step": 2560 }, { "epoch": 0.23767981438515082, "grad_norm": 36.70586013793945, "learning_rate": 9.068586433566355e-06, "loss": 21.7338, "step": 2561 }, { "epoch": 0.2377726218097448, "grad_norm": 42.262542724609375, "learning_rate": 9.067694250542252e-06, "loss": 24.5348, "step": 2562 }, { "epoch": 0.23786542923433876, "grad_norm": 35.25505447387695, "learning_rate": 9.066801684351686e-06, "loss": 23.7424, "step": 2563 }, { "epoch": 0.2379582366589327, "grad_norm": 39.30076599121094, "learning_rate": 9.065908735078735e-06, "loss": 24.8709, "step": 2564 }, { "epoch": 0.23805104408352667, "grad_norm": 34.74933624267578, "learning_rate": 9.065015402807514e-06, "loss": 23.1655, "step": 2565 }, { "epoch": 0.23814385150812065, "grad_norm": 48.57503890991211, "learning_rate": 9.06412168762217e-06, "loss": 24.0708, "step": 2566 }, { "epoch": 0.23823665893271462, "grad_norm": 39.3155403137207, "learning_rate": 9.06322758960689e-06, "loss": 25.4048, "step": 2567 }, { "epoch": 0.2383294663573086, "grad_norm": 38.69676971435547, "learning_rate": 9.062333108845897e-06, "loss": 23.8592, "step": 2568 }, { "epoch": 0.23842227378190256, "grad_norm": 35.244651794433594, "learning_rate": 9.061438245423448e-06, "loss": 24.3035, "step": 2569 }, { "epoch": 0.23851508120649653, "grad_norm": 40.091556549072266, "learning_rate": 9.060542999423836e-06, "loss": 24.7801, "step": 2570 }, { "epoch": 0.23860788863109048, "grad_norm": 36.37802505493164, "learning_rate": 9.05964737093139e-06, "loss": 24.3122, "step": 2571 }, { "epoch": 0.23870069605568445, "grad_norm": 39.66356658935547, "learning_rate": 9.05875136003048e-06, "loss": 23.2982, "step": 2572 }, { "epoch": 0.23879350348027842, "grad_norm": 40.83150100708008, "learning_rate": 9.057854966805504e-06, "loss": 26.0183, "step": 2573 }, { "epoch": 0.2388863109048724, "grad_norm": 38.523677825927734, "learning_rate": 9.0569581913409e-06, "loss": 24.3374, "step": 2574 }, { "epoch": 0.23897911832946636, "grad_norm": 35.574588775634766, "learning_rate": 9.056061033721145e-06, "loss": 23.4869, "step": 2575 }, { "epoch": 0.23907192575406033, "grad_norm": 41.22890853881836, "learning_rate": 9.055163494030748e-06, "loss": 26.1636, "step": 2576 }, { "epoch": 0.2391647331786543, "grad_norm": 33.90434646606445, "learning_rate": 9.054265572354252e-06, "loss": 25.3717, "step": 2577 }, { "epoch": 0.23925754060324825, "grad_norm": 39.74371337890625, "learning_rate": 9.053367268776243e-06, "loss": 25.3452, "step": 2578 }, { "epoch": 0.23935034802784222, "grad_norm": 51.932525634765625, "learning_rate": 9.052468583381334e-06, "loss": 24.5944, "step": 2579 }, { "epoch": 0.2394431554524362, "grad_norm": 40.83399200439453, "learning_rate": 9.051569516254186e-06, "loss": 22.7886, "step": 2580 }, { "epoch": 0.23953596287703016, "grad_norm": 36.616485595703125, "learning_rate": 9.050670067479484e-06, "loss": 23.52, "step": 2581 }, { "epoch": 0.23962877030162413, "grad_norm": 38.35116958618164, "learning_rate": 9.049770237141953e-06, "loss": 23.1921, "step": 2582 }, { "epoch": 0.2397215777262181, "grad_norm": 42.47934341430664, "learning_rate": 9.048870025326357e-06, "loss": 24.1343, "step": 2583 }, { "epoch": 0.23981438515081208, "grad_norm": 37.65860366821289, "learning_rate": 9.047969432117492e-06, "loss": 22.3296, "step": 2584 }, { "epoch": 0.23990719257540602, "grad_norm": 40.1741828918457, "learning_rate": 9.047068457600194e-06, "loss": 25.4731, "step": 2585 }, { "epoch": 0.24, "grad_norm": 37.54026794433594, "learning_rate": 9.046167101859332e-06, "loss": 25.482, "step": 2586 }, { "epoch": 0.24009280742459396, "grad_norm": 39.133575439453125, "learning_rate": 9.045265364979808e-06, "loss": 24.3947, "step": 2587 }, { "epoch": 0.24018561484918793, "grad_norm": 36.42520523071289, "learning_rate": 9.044363247046566e-06, "loss": 22.8338, "step": 2588 }, { "epoch": 0.2402784222737819, "grad_norm": 40.277626037597656, "learning_rate": 9.043460748144583e-06, "loss": 24.0273, "step": 2589 }, { "epoch": 0.24037122969837588, "grad_norm": 36.45010757446289, "learning_rate": 9.042557868358875e-06, "loss": 23.7065, "step": 2590 }, { "epoch": 0.24046403712296985, "grad_norm": 42.658626556396484, "learning_rate": 9.041654607774486e-06, "loss": 24.7592, "step": 2591 }, { "epoch": 0.2405568445475638, "grad_norm": 38.27244186401367, "learning_rate": 9.040750966476502e-06, "loss": 25.3769, "step": 2592 }, { "epoch": 0.24064965197215776, "grad_norm": 36.165916442871094, "learning_rate": 9.039846944550047e-06, "loss": 24.325, "step": 2593 }, { "epoch": 0.24074245939675173, "grad_norm": 36.492347717285156, "learning_rate": 9.038942542080274e-06, "loss": 24.0992, "step": 2594 }, { "epoch": 0.2408352668213457, "grad_norm": 35.47277069091797, "learning_rate": 9.038037759152376e-06, "loss": 24.0998, "step": 2595 }, { "epoch": 0.24092807424593968, "grad_norm": 37.1645393371582, "learning_rate": 9.037132595851582e-06, "loss": 23.4217, "step": 2596 }, { "epoch": 0.24102088167053365, "grad_norm": 38.42499923706055, "learning_rate": 9.036227052263156e-06, "loss": 25.9895, "step": 2597 }, { "epoch": 0.24111368909512762, "grad_norm": 39.28205490112305, "learning_rate": 9.035321128472398e-06, "loss": 24.7618, "step": 2598 }, { "epoch": 0.2412064965197216, "grad_norm": 39.9235725402832, "learning_rate": 9.034414824564646e-06, "loss": 23.5513, "step": 2599 }, { "epoch": 0.24129930394431554, "grad_norm": 42.40240478515625, "learning_rate": 9.033508140625266e-06, "loss": 23.6678, "step": 2600 }, { "epoch": 0.2413921113689095, "grad_norm": 39.01361846923828, "learning_rate": 9.03260107673967e-06, "loss": 25.4814, "step": 2601 }, { "epoch": 0.24148491879350348, "grad_norm": 37.103973388671875, "learning_rate": 9.0316936329933e-06, "loss": 24.7778, "step": 2602 }, { "epoch": 0.24157772621809745, "grad_norm": 38.1701774597168, "learning_rate": 9.030785809471634e-06, "loss": 23.9596, "step": 2603 }, { "epoch": 0.24167053364269142, "grad_norm": 38.32918167114258, "learning_rate": 9.029877606260187e-06, "loss": 23.467, "step": 2604 }, { "epoch": 0.2417633410672854, "grad_norm": 37.92129135131836, "learning_rate": 9.02896902344451e-06, "loss": 25.0989, "step": 2605 }, { "epoch": 0.24185614849187936, "grad_norm": 40.4258918762207, "learning_rate": 9.028060061110186e-06, "loss": 23.63, "step": 2606 }, { "epoch": 0.2419489559164733, "grad_norm": 35.97819900512695, "learning_rate": 9.027150719342843e-06, "loss": 23.224, "step": 2607 }, { "epoch": 0.24204176334106728, "grad_norm": 38.00847625732422, "learning_rate": 9.026240998228135e-06, "loss": 24.4881, "step": 2608 }, { "epoch": 0.24213457076566125, "grad_norm": 36.931278228759766, "learning_rate": 9.025330897851755e-06, "loss": 24.0023, "step": 2609 }, { "epoch": 0.24222737819025522, "grad_norm": 35.68099594116211, "learning_rate": 9.024420418299433e-06, "loss": 24.8315, "step": 2610 }, { "epoch": 0.2423201856148492, "grad_norm": 42.30009460449219, "learning_rate": 9.023509559656934e-06, "loss": 24.9765, "step": 2611 }, { "epoch": 0.24241299303944316, "grad_norm": 39.88618469238281, "learning_rate": 9.02259832201006e-06, "loss": 24.7949, "step": 2612 }, { "epoch": 0.24250580046403714, "grad_norm": 40.12107849121094, "learning_rate": 9.021686705444643e-06, "loss": 25.7377, "step": 2613 }, { "epoch": 0.24259860788863108, "grad_norm": 39.30895233154297, "learning_rate": 9.020774710046558e-06, "loss": 23.7339, "step": 2614 }, { "epoch": 0.24269141531322505, "grad_norm": 34.84951400756836, "learning_rate": 9.019862335901714e-06, "loss": 24.0381, "step": 2615 }, { "epoch": 0.24278422273781902, "grad_norm": 35.975460052490234, "learning_rate": 9.018949583096051e-06, "loss": 23.6085, "step": 2616 }, { "epoch": 0.242877030162413, "grad_norm": 45.23609924316406, "learning_rate": 9.01803645171555e-06, "loss": 25.4063, "step": 2617 }, { "epoch": 0.24296983758700696, "grad_norm": 39.031715393066406, "learning_rate": 9.017122941846226e-06, "loss": 24.326, "step": 2618 }, { "epoch": 0.24306264501160094, "grad_norm": 37.08594512939453, "learning_rate": 9.016209053574127e-06, "loss": 24.7848, "step": 2619 }, { "epoch": 0.2431554524361949, "grad_norm": 37.48942184448242, "learning_rate": 9.015294786985342e-06, "loss": 23.0638, "step": 2620 }, { "epoch": 0.24324825986078885, "grad_norm": 36.060916900634766, "learning_rate": 9.014380142165991e-06, "loss": 22.853, "step": 2621 }, { "epoch": 0.24334106728538282, "grad_norm": 35.377471923828125, "learning_rate": 9.01346511920223e-06, "loss": 25.8232, "step": 2622 }, { "epoch": 0.2434338747099768, "grad_norm": 38.20832443237305, "learning_rate": 9.012549718180255e-06, "loss": 24.9164, "step": 2623 }, { "epoch": 0.24352668213457077, "grad_norm": 36.39716720581055, "learning_rate": 9.01163393918629e-06, "loss": 24.1376, "step": 2624 }, { "epoch": 0.24361948955916474, "grad_norm": 36.40568542480469, "learning_rate": 9.010717782306604e-06, "loss": 24.0851, "step": 2625 }, { "epoch": 0.2437122969837587, "grad_norm": 45.61270523071289, "learning_rate": 9.009801247627494e-06, "loss": 23.8029, "step": 2626 }, { "epoch": 0.24380510440835268, "grad_norm": 36.92704391479492, "learning_rate": 9.008884335235295e-06, "loss": 23.1141, "step": 2627 }, { "epoch": 0.24389791183294662, "grad_norm": 34.8735237121582, "learning_rate": 9.00796704521638e-06, "loss": 22.4051, "step": 2628 }, { "epoch": 0.2439907192575406, "grad_norm": 39.744380950927734, "learning_rate": 9.00704937765715e-06, "loss": 24.5013, "step": 2629 }, { "epoch": 0.24408352668213457, "grad_norm": 38.27133560180664, "learning_rate": 9.006131332644055e-06, "loss": 26.3584, "step": 2630 }, { "epoch": 0.24417633410672854, "grad_norm": 34.410797119140625, "learning_rate": 9.005212910263565e-06, "loss": 23.6704, "step": 2631 }, { "epoch": 0.2442691415313225, "grad_norm": 37.767417907714844, "learning_rate": 9.0042941106022e-06, "loss": 23.6782, "step": 2632 }, { "epoch": 0.24436194895591648, "grad_norm": 40.9019660949707, "learning_rate": 9.0033749337465e-06, "loss": 24.5306, "step": 2633 }, { "epoch": 0.24445475638051045, "grad_norm": 36.99380111694336, "learning_rate": 9.002455379783057e-06, "loss": 22.0138, "step": 2634 }, { "epoch": 0.2445475638051044, "grad_norm": 73.50100708007812, "learning_rate": 9.001535448798487e-06, "loss": 24.8523, "step": 2635 }, { "epoch": 0.24464037122969837, "grad_norm": 42.491310119628906, "learning_rate": 9.000615140879446e-06, "loss": 24.7319, "step": 2636 }, { "epoch": 0.24473317865429234, "grad_norm": 37.48687744140625, "learning_rate": 8.999694456112624e-06, "loss": 25.7086, "step": 2637 }, { "epoch": 0.2448259860788863, "grad_norm": 37.412776947021484, "learning_rate": 8.99877339458475e-06, "loss": 26.7176, "step": 2638 }, { "epoch": 0.24491879350348028, "grad_norm": 35.36710739135742, "learning_rate": 8.99785195638258e-06, "loss": 24.3703, "step": 2639 }, { "epoch": 0.24501160092807425, "grad_norm": 38.707801818847656, "learning_rate": 8.996930141592915e-06, "loss": 23.886, "step": 2640 }, { "epoch": 0.24510440835266822, "grad_norm": 38.680660247802734, "learning_rate": 8.996007950302588e-06, "loss": 23.4447, "step": 2641 }, { "epoch": 0.24519721577726217, "grad_norm": 36.34512710571289, "learning_rate": 8.995085382598465e-06, "loss": 23.3346, "step": 2642 }, { "epoch": 0.24529002320185614, "grad_norm": 37.397369384765625, "learning_rate": 8.994162438567453e-06, "loss": 22.4787, "step": 2643 }, { "epoch": 0.2453828306264501, "grad_norm": 36.72837829589844, "learning_rate": 8.993239118296486e-06, "loss": 23.6181, "step": 2644 }, { "epoch": 0.24547563805104408, "grad_norm": 38.48661804199219, "learning_rate": 8.992315421872543e-06, "loss": 25.0664, "step": 2645 }, { "epoch": 0.24556844547563805, "grad_norm": 34.6528434753418, "learning_rate": 8.99139134938263e-06, "loss": 23.4399, "step": 2646 }, { "epoch": 0.24566125290023202, "grad_norm": 36.65994644165039, "learning_rate": 8.990466900913796e-06, "loss": 24.6571, "step": 2647 }, { "epoch": 0.245754060324826, "grad_norm": 64.48399353027344, "learning_rate": 8.989542076553119e-06, "loss": 25.2953, "step": 2648 }, { "epoch": 0.24584686774941997, "grad_norm": 41.630672454833984, "learning_rate": 8.988616876387717e-06, "loss": 22.5698, "step": 2649 }, { "epoch": 0.2459396751740139, "grad_norm": 35.64985656738281, "learning_rate": 8.987691300504739e-06, "loss": 23.2347, "step": 2650 }, { "epoch": 0.24603248259860788, "grad_norm": 38.99850082397461, "learning_rate": 8.986765348991373e-06, "loss": 23.883, "step": 2651 }, { "epoch": 0.24612529002320185, "grad_norm": 35.99806594848633, "learning_rate": 8.985839021934843e-06, "loss": 22.8574, "step": 2652 }, { "epoch": 0.24621809744779583, "grad_norm": 39.15979766845703, "learning_rate": 8.984912319422405e-06, "loss": 23.6092, "step": 2653 }, { "epoch": 0.2463109048723898, "grad_norm": 41.2887077331543, "learning_rate": 8.983985241541353e-06, "loss": 24.0385, "step": 2654 }, { "epoch": 0.24640371229698377, "grad_norm": 54.959571838378906, "learning_rate": 8.983057788379014e-06, "loss": 26.4967, "step": 2655 }, { "epoch": 0.24649651972157774, "grad_norm": 37.61574935913086, "learning_rate": 8.982129960022752e-06, "loss": 25.102, "step": 2656 }, { "epoch": 0.24658932714617168, "grad_norm": 34.848594665527344, "learning_rate": 8.981201756559968e-06, "loss": 24.748, "step": 2657 }, { "epoch": 0.24668213457076565, "grad_norm": 42.64149856567383, "learning_rate": 8.980273178078093e-06, "loss": 25.0976, "step": 2658 }, { "epoch": 0.24677494199535963, "grad_norm": 44.7788200378418, "learning_rate": 8.9793442246646e-06, "loss": 24.2966, "step": 2659 }, { "epoch": 0.2468677494199536, "grad_norm": 41.9578857421875, "learning_rate": 8.978414896406993e-06, "loss": 22.7363, "step": 2660 }, { "epoch": 0.24696055684454757, "grad_norm": 37.95750045776367, "learning_rate": 8.97748519339281e-06, "loss": 24.2651, "step": 2661 }, { "epoch": 0.24705336426914154, "grad_norm": 35.52049255371094, "learning_rate": 8.97655511570963e-06, "loss": 23.6508, "step": 2662 }, { "epoch": 0.2471461716937355, "grad_norm": 37.290626525878906, "learning_rate": 8.975624663445064e-06, "loss": 23.8561, "step": 2663 }, { "epoch": 0.24723897911832945, "grad_norm": 37.3656005859375, "learning_rate": 8.974693836686755e-06, "loss": 25.2538, "step": 2664 }, { "epoch": 0.24733178654292343, "grad_norm": 37.852386474609375, "learning_rate": 8.973762635522388e-06, "loss": 26.6527, "step": 2665 }, { "epoch": 0.2474245939675174, "grad_norm": 40.005977630615234, "learning_rate": 8.972831060039677e-06, "loss": 24.7919, "step": 2666 }, { "epoch": 0.24751740139211137, "grad_norm": 40.563045501708984, "learning_rate": 8.971899110326377e-06, "loss": 24.9902, "step": 2667 }, { "epoch": 0.24761020881670534, "grad_norm": 35.582237243652344, "learning_rate": 8.970966786470272e-06, "loss": 24.0026, "step": 2668 }, { "epoch": 0.2477030162412993, "grad_norm": 37.07515335083008, "learning_rate": 8.970034088559187e-06, "loss": 25.1902, "step": 2669 }, { "epoch": 0.24779582366589328, "grad_norm": 40.3365592956543, "learning_rate": 8.969101016680977e-06, "loss": 25.9681, "step": 2670 }, { "epoch": 0.24788863109048723, "grad_norm": 67.09483337402344, "learning_rate": 8.968167570923538e-06, "loss": 25.0305, "step": 2671 }, { "epoch": 0.2479814385150812, "grad_norm": 37.740970611572266, "learning_rate": 8.967233751374797e-06, "loss": 23.7578, "step": 2672 }, { "epoch": 0.24807424593967517, "grad_norm": 35.629459381103516, "learning_rate": 8.96629955812272e-06, "loss": 23.3692, "step": 2673 }, { "epoch": 0.24816705336426914, "grad_norm": 36.58329391479492, "learning_rate": 8.9653649912553e-06, "loss": 23.9258, "step": 2674 }, { "epoch": 0.2482598607888631, "grad_norm": 35.060447692871094, "learning_rate": 8.964430050860575e-06, "loss": 23.8353, "step": 2675 }, { "epoch": 0.24835266821345708, "grad_norm": 37.11407470703125, "learning_rate": 8.963494737026612e-06, "loss": 24.493, "step": 2676 }, { "epoch": 0.24844547563805106, "grad_norm": 35.970611572265625, "learning_rate": 8.962559049841518e-06, "loss": 23.9492, "step": 2677 }, { "epoch": 0.248538283062645, "grad_norm": 39.46076965332031, "learning_rate": 8.96162298939343e-06, "loss": 23.0772, "step": 2678 }, { "epoch": 0.24863109048723897, "grad_norm": 37.36570358276367, "learning_rate": 8.960686555770523e-06, "loss": 22.4826, "step": 2679 }, { "epoch": 0.24872389791183294, "grad_norm": 36.246978759765625, "learning_rate": 8.959749749061006e-06, "loss": 24.5411, "step": 2680 }, { "epoch": 0.2488167053364269, "grad_norm": 33.87016296386719, "learning_rate": 8.958812569353125e-06, "loss": 24.784, "step": 2681 }, { "epoch": 0.24890951276102088, "grad_norm": 36.56436538696289, "learning_rate": 8.95787501673516e-06, "loss": 24.8804, "step": 2682 }, { "epoch": 0.24900232018561486, "grad_norm": 37.12805938720703, "learning_rate": 8.956937091295425e-06, "loss": 24.5367, "step": 2683 }, { "epoch": 0.24909512761020883, "grad_norm": 38.512542724609375, "learning_rate": 8.955998793122271e-06, "loss": 24.3541, "step": 2684 }, { "epoch": 0.24918793503480277, "grad_norm": 42.290077209472656, "learning_rate": 8.955060122304082e-06, "loss": 25.0473, "step": 2685 }, { "epoch": 0.24928074245939674, "grad_norm": 37.096004486083984, "learning_rate": 8.954121078929282e-06, "loss": 24.4404, "step": 2686 }, { "epoch": 0.2493735498839907, "grad_norm": 41.356021881103516, "learning_rate": 8.953181663086321e-06, "loss": 24.124, "step": 2687 }, { "epoch": 0.24946635730858469, "grad_norm": 35.46503448486328, "learning_rate": 8.952241874863695e-06, "loss": 24.4003, "step": 2688 }, { "epoch": 0.24955916473317866, "grad_norm": 41.13206481933594, "learning_rate": 8.951301714349925e-06, "loss": 24.3549, "step": 2689 }, { "epoch": 0.24965197215777263, "grad_norm": 36.574764251708984, "learning_rate": 8.950361181633576e-06, "loss": 26.1561, "step": 2690 }, { "epoch": 0.2497447795823666, "grad_norm": 34.75565719604492, "learning_rate": 8.94942027680324e-06, "loss": 24.1064, "step": 2691 }, { "epoch": 0.24983758700696054, "grad_norm": 36.19813537597656, "learning_rate": 8.94847899994755e-06, "loss": 22.7358, "step": 2692 }, { "epoch": 0.24993039443155451, "grad_norm": 37.27785110473633, "learning_rate": 8.947537351155172e-06, "loss": 25.8602, "step": 2693 }, { "epoch": 0.2500232018561485, "grad_norm": 38.3738899230957, "learning_rate": 8.946595330514807e-06, "loss": 23.7409, "step": 2694 }, { "epoch": 0.25011600928074246, "grad_norm": 36.420711517333984, "learning_rate": 8.94565293811519e-06, "loss": 25.5597, "step": 2695 }, { "epoch": 0.2502088167053364, "grad_norm": 37.24421691894531, "learning_rate": 8.944710174045091e-06, "loss": 26.537, "step": 2696 }, { "epoch": 0.2503016241299304, "grad_norm": 36.68516540527344, "learning_rate": 8.943767038393319e-06, "loss": 24.8996, "step": 2697 }, { "epoch": 0.25039443155452434, "grad_norm": 33.90227508544922, "learning_rate": 8.942823531248712e-06, "loss": 25.6393, "step": 2698 }, { "epoch": 0.25048723897911834, "grad_norm": 38.901954650878906, "learning_rate": 8.941879652700147e-06, "loss": 25.2214, "step": 2699 }, { "epoch": 0.2505800464037123, "grad_norm": 35.111881256103516, "learning_rate": 8.940935402836535e-06, "loss": 23.0989, "step": 2700 }, { "epoch": 0.2506728538283063, "grad_norm": 34.77342987060547, "learning_rate": 8.939990781746824e-06, "loss": 24.0152, "step": 2701 }, { "epoch": 0.25076566125290023, "grad_norm": 39.2557373046875, "learning_rate": 8.939045789519993e-06, "loss": 23.6907, "step": 2702 }, { "epoch": 0.2508584686774942, "grad_norm": 37.691070556640625, "learning_rate": 8.938100426245055e-06, "loss": 23.0144, "step": 2703 }, { "epoch": 0.25095127610208817, "grad_norm": 35.71159744262695, "learning_rate": 8.937154692011067e-06, "loss": 26.07, "step": 2704 }, { "epoch": 0.2510440835266821, "grad_norm": 35.818443298339844, "learning_rate": 8.93620858690711e-06, "loss": 23.6885, "step": 2705 }, { "epoch": 0.2511368909512761, "grad_norm": 37.20829391479492, "learning_rate": 8.935262111022306e-06, "loss": 25.2794, "step": 2706 }, { "epoch": 0.25122969837587006, "grad_norm": 32.28104019165039, "learning_rate": 8.934315264445812e-06, "loss": 23.9088, "step": 2707 }, { "epoch": 0.25132250580046406, "grad_norm": 35.414913177490234, "learning_rate": 8.933368047266814e-06, "loss": 24.9843, "step": 2708 }, { "epoch": 0.251415313225058, "grad_norm": 38.018890380859375, "learning_rate": 8.932420459574545e-06, "loss": 24.5417, "step": 2709 }, { "epoch": 0.25150812064965195, "grad_norm": 42.5620231628418, "learning_rate": 8.93147250145826e-06, "loss": 22.4026, "step": 2710 }, { "epoch": 0.25160092807424594, "grad_norm": 38.08344650268555, "learning_rate": 8.930524173007253e-06, "loss": 24.0764, "step": 2711 }, { "epoch": 0.2516937354988399, "grad_norm": 37.014102935791016, "learning_rate": 8.92957547431086e-06, "loss": 23.9959, "step": 2712 }, { "epoch": 0.2517865429234339, "grad_norm": 36.26862716674805, "learning_rate": 8.92862640545844e-06, "loss": 23.6268, "step": 2713 }, { "epoch": 0.25187935034802783, "grad_norm": 36.68168640136719, "learning_rate": 8.927676966539396e-06, "loss": 24.4552, "step": 2714 }, { "epoch": 0.25197215777262183, "grad_norm": 38.84856033325195, "learning_rate": 8.926727157643163e-06, "loss": 25.5373, "step": 2715 }, { "epoch": 0.2520649651972158, "grad_norm": 34.717655181884766, "learning_rate": 8.92577697885921e-06, "loss": 23.8388, "step": 2716 }, { "epoch": 0.2521577726218097, "grad_norm": 41.10861587524414, "learning_rate": 8.92482643027704e-06, "loss": 24.3158, "step": 2717 }, { "epoch": 0.2522505800464037, "grad_norm": 36.56959533691406, "learning_rate": 8.923875511986193e-06, "loss": 22.8769, "step": 2718 }, { "epoch": 0.25234338747099766, "grad_norm": 37.06937026977539, "learning_rate": 8.922924224076245e-06, "loss": 25.0974, "step": 2719 }, { "epoch": 0.25243619489559166, "grad_norm": 39.354469299316406, "learning_rate": 8.921972566636804e-06, "loss": 25.8457, "step": 2720 }, { "epoch": 0.2525290023201856, "grad_norm": 38.05294418334961, "learning_rate": 8.92102053975751e-06, "loss": 23.4827, "step": 2721 }, { "epoch": 0.2526218097447796, "grad_norm": 35.46842575073242, "learning_rate": 8.920068143528046e-06, "loss": 24.2378, "step": 2722 }, { "epoch": 0.25271461716937355, "grad_norm": 36.7994499206543, "learning_rate": 8.919115378038126e-06, "loss": 23.094, "step": 2723 }, { "epoch": 0.25280742459396754, "grad_norm": 42.65662384033203, "learning_rate": 8.918162243377494e-06, "loss": 24.8339, "step": 2724 }, { "epoch": 0.2529002320185615, "grad_norm": 36.685482025146484, "learning_rate": 8.917208739635936e-06, "loss": 25.1244, "step": 2725 }, { "epoch": 0.25299303944315543, "grad_norm": 37.21269607543945, "learning_rate": 8.916254866903267e-06, "loss": 25.7587, "step": 2726 }, { "epoch": 0.25308584686774943, "grad_norm": 51.30405807495117, "learning_rate": 8.915300625269343e-06, "loss": 23.613, "step": 2727 }, { "epoch": 0.2531786542923434, "grad_norm": 34.45680236816406, "learning_rate": 8.914346014824047e-06, "loss": 23.2517, "step": 2728 }, { "epoch": 0.2532714617169374, "grad_norm": 38.00990295410156, "learning_rate": 8.913391035657304e-06, "loss": 23.2419, "step": 2729 }, { "epoch": 0.2533642691415313, "grad_norm": 36.359737396240234, "learning_rate": 8.912435687859068e-06, "loss": 24.4839, "step": 2730 }, { "epoch": 0.2534570765661253, "grad_norm": 45.45674514770508, "learning_rate": 8.911479971519335e-06, "loss": 27.3524, "step": 2731 }, { "epoch": 0.25354988399071926, "grad_norm": 38.426368713378906, "learning_rate": 8.910523886728125e-06, "loss": 24.7925, "step": 2732 }, { "epoch": 0.2536426914153132, "grad_norm": 36.89952087402344, "learning_rate": 8.909567433575503e-06, "loss": 24.3143, "step": 2733 }, { "epoch": 0.2537354988399072, "grad_norm": 34.11061477661133, "learning_rate": 8.908610612151562e-06, "loss": 23.4014, "step": 2734 }, { "epoch": 0.25382830626450115, "grad_norm": 34.978248596191406, "learning_rate": 8.907653422546435e-06, "loss": 24.4371, "step": 2735 }, { "epoch": 0.25392111368909515, "grad_norm": 37.71409225463867, "learning_rate": 8.906695864850284e-06, "loss": 23.8714, "step": 2736 }, { "epoch": 0.2540139211136891, "grad_norm": 36.292938232421875, "learning_rate": 8.905737939153311e-06, "loss": 23.373, "step": 2737 }, { "epoch": 0.2541067285382831, "grad_norm": 39.26005172729492, "learning_rate": 8.904779645545746e-06, "loss": 25.7855, "step": 2738 }, { "epoch": 0.25419953596287703, "grad_norm": 38.99341583251953, "learning_rate": 8.903820984117863e-06, "loss": 23.2111, "step": 2739 }, { "epoch": 0.254292343387471, "grad_norm": 37.78269577026367, "learning_rate": 8.902861954959963e-06, "loss": 24.6612, "step": 2740 }, { "epoch": 0.254385150812065, "grad_norm": 36.887264251708984, "learning_rate": 8.901902558162383e-06, "loss": 23.5288, "step": 2741 }, { "epoch": 0.2544779582366589, "grad_norm": 34.85818099975586, "learning_rate": 8.900942793815498e-06, "loss": 23.2152, "step": 2742 }, { "epoch": 0.2545707656612529, "grad_norm": 40.1444091796875, "learning_rate": 8.899982662009713e-06, "loss": 23.787, "step": 2743 }, { "epoch": 0.25466357308584686, "grad_norm": 34.987606048583984, "learning_rate": 8.899022162835473e-06, "loss": 24.0754, "step": 2744 }, { "epoch": 0.25475638051044086, "grad_norm": 40.20067596435547, "learning_rate": 8.89806129638325e-06, "loss": 24.1278, "step": 2745 }, { "epoch": 0.2548491879350348, "grad_norm": 38.353397369384766, "learning_rate": 8.897100062743562e-06, "loss": 25.0109, "step": 2746 }, { "epoch": 0.25494199535962875, "grad_norm": 36.034141540527344, "learning_rate": 8.896138462006948e-06, "loss": 22.603, "step": 2747 }, { "epoch": 0.25503480278422275, "grad_norm": 37.390625, "learning_rate": 8.895176494263993e-06, "loss": 26.0611, "step": 2748 }, { "epoch": 0.2551276102088167, "grad_norm": 39.31880569458008, "learning_rate": 8.894214159605307e-06, "loss": 24.2626, "step": 2749 }, { "epoch": 0.2552204176334107, "grad_norm": 36.66127014160156, "learning_rate": 8.893251458121545e-06, "loss": 23.6463, "step": 2750 }, { "epoch": 0.25531322505800463, "grad_norm": 38.43252944946289, "learning_rate": 8.892288389903389e-06, "loss": 25.8358, "step": 2751 }, { "epoch": 0.25540603248259863, "grad_norm": 37.80488586425781, "learning_rate": 8.891324955041555e-06, "loss": 24.41, "step": 2752 }, { "epoch": 0.2554988399071926, "grad_norm": 36.40993118286133, "learning_rate": 8.890361153626801e-06, "loss": 24.8305, "step": 2753 }, { "epoch": 0.2555916473317865, "grad_norm": 35.980186462402344, "learning_rate": 8.889396985749909e-06, "loss": 26.3434, "step": 2754 }, { "epoch": 0.2556844547563805, "grad_norm": 44.0291748046875, "learning_rate": 8.888432451501706e-06, "loss": 24.4823, "step": 2755 }, { "epoch": 0.25577726218097446, "grad_norm": 35.966270446777344, "learning_rate": 8.887467550973045e-06, "loss": 25.7214, "step": 2756 }, { "epoch": 0.25587006960556846, "grad_norm": 36.6102409362793, "learning_rate": 8.88650228425482e-06, "loss": 23.9064, "step": 2757 }, { "epoch": 0.2559628770301624, "grad_norm": 43.17401885986328, "learning_rate": 8.885536651437957e-06, "loss": 24.7885, "step": 2758 }, { "epoch": 0.2560556844547564, "grad_norm": 35.25535202026367, "learning_rate": 8.884570652613413e-06, "loss": 23.8467, "step": 2759 }, { "epoch": 0.25614849187935035, "grad_norm": 36.670108795166016, "learning_rate": 8.883604287872186e-06, "loss": 23.7909, "step": 2760 }, { "epoch": 0.2562412993039443, "grad_norm": 40.62493896484375, "learning_rate": 8.882637557305302e-06, "loss": 22.9217, "step": 2761 }, { "epoch": 0.2563341067285383, "grad_norm": 37.282676696777344, "learning_rate": 8.881670461003826e-06, "loss": 24.6999, "step": 2762 }, { "epoch": 0.25642691415313223, "grad_norm": 34.87165832519531, "learning_rate": 8.880702999058856e-06, "loss": 25.1769, "step": 2763 }, { "epoch": 0.25651972157772623, "grad_norm": 36.97711944580078, "learning_rate": 8.879735171561526e-06, "loss": 22.4728, "step": 2764 }, { "epoch": 0.2566125290023202, "grad_norm": 35.931785583496094, "learning_rate": 8.878766978603e-06, "loss": 24.4219, "step": 2765 }, { "epoch": 0.2567053364269142, "grad_norm": 38.6054573059082, "learning_rate": 8.87779842027448e-06, "loss": 24.1449, "step": 2766 }, { "epoch": 0.2567981438515081, "grad_norm": 36.447265625, "learning_rate": 8.876829496667203e-06, "loss": 24.7596, "step": 2767 }, { "epoch": 0.25689095127610206, "grad_norm": 37.599632263183594, "learning_rate": 8.875860207872439e-06, "loss": 21.8269, "step": 2768 }, { "epoch": 0.25698375870069606, "grad_norm": 35.31175231933594, "learning_rate": 8.874890553981494e-06, "loss": 25.0456, "step": 2769 }, { "epoch": 0.25707656612529, "grad_norm": 37.087162017822266, "learning_rate": 8.873920535085703e-06, "loss": 24.2549, "step": 2770 }, { "epoch": 0.257169373549884, "grad_norm": 39.85650634765625, "learning_rate": 8.87295015127644e-06, "loss": 27.3845, "step": 2771 }, { "epoch": 0.25726218097447795, "grad_norm": 34.78450393676758, "learning_rate": 8.871979402645116e-06, "loss": 24.5859, "step": 2772 }, { "epoch": 0.25735498839907195, "grad_norm": 39.33820343017578, "learning_rate": 8.871008289283172e-06, "loss": 27.8839, "step": 2773 }, { "epoch": 0.2574477958236659, "grad_norm": 39.830909729003906, "learning_rate": 8.870036811282084e-06, "loss": 24.86, "step": 2774 }, { "epoch": 0.25754060324825984, "grad_norm": 98.99264526367188, "learning_rate": 8.869064968733358e-06, "loss": 25.1415, "step": 2775 }, { "epoch": 0.25763341067285384, "grad_norm": 37.647857666015625, "learning_rate": 8.868092761728548e-06, "loss": 23.8765, "step": 2776 }, { "epoch": 0.2577262180974478, "grad_norm": 37.82967758178711, "learning_rate": 8.867120190359227e-06, "loss": 21.7502, "step": 2777 }, { "epoch": 0.2578190255220418, "grad_norm": 36.84939193725586, "learning_rate": 8.86614725471701e-06, "loss": 24.8535, "step": 2778 }, { "epoch": 0.2579118329466357, "grad_norm": 38.613460540771484, "learning_rate": 8.865173954893547e-06, "loss": 25.6666, "step": 2779 }, { "epoch": 0.2580046403712297, "grad_norm": 39.903804779052734, "learning_rate": 8.864200290980517e-06, "loss": 25.4472, "step": 2780 }, { "epoch": 0.25809744779582366, "grad_norm": 35.35034942626953, "learning_rate": 8.86322626306964e-06, "loss": 24.3431, "step": 2781 }, { "epoch": 0.2581902552204176, "grad_norm": 37.41541290283203, "learning_rate": 8.862251871252668e-06, "loss": 25.0586, "step": 2782 }, { "epoch": 0.2582830626450116, "grad_norm": 42.23868942260742, "learning_rate": 8.861277115621381e-06, "loss": 25.0543, "step": 2783 }, { "epoch": 0.25837587006960555, "grad_norm": 36.65861129760742, "learning_rate": 8.860301996267601e-06, "loss": 22.4688, "step": 2784 }, { "epoch": 0.25846867749419955, "grad_norm": 33.70378112792969, "learning_rate": 8.859326513283183e-06, "loss": 25.5481, "step": 2785 }, { "epoch": 0.2585614849187935, "grad_norm": 37.968467712402344, "learning_rate": 8.858350666760014e-06, "loss": 24.0732, "step": 2786 }, { "epoch": 0.2586542923433875, "grad_norm": 42.425025939941406, "learning_rate": 8.857374456790018e-06, "loss": 23.7544, "step": 2787 }, { "epoch": 0.25874709976798144, "grad_norm": 49.67418670654297, "learning_rate": 8.85639788346515e-06, "loss": 23.7628, "step": 2788 }, { "epoch": 0.2588399071925754, "grad_norm": 34.179100036621094, "learning_rate": 8.855420946877398e-06, "loss": 22.3154, "step": 2789 }, { "epoch": 0.2589327146171694, "grad_norm": 40.647518157958984, "learning_rate": 8.85444364711879e-06, "loss": 23.674, "step": 2790 }, { "epoch": 0.2590255220417633, "grad_norm": 37.65132141113281, "learning_rate": 8.853465984281387e-06, "loss": 22.9778, "step": 2791 }, { "epoch": 0.2591183294663573, "grad_norm": 38.32786560058594, "learning_rate": 8.852487958457277e-06, "loss": 23.9322, "step": 2792 }, { "epoch": 0.25921113689095127, "grad_norm": 37.3626823425293, "learning_rate": 8.851509569738592e-06, "loss": 23.1472, "step": 2793 }, { "epoch": 0.25930394431554527, "grad_norm": 40.27830505371094, "learning_rate": 8.850530818217493e-06, "loss": 24.2928, "step": 2794 }, { "epoch": 0.2593967517401392, "grad_norm": 38.0190544128418, "learning_rate": 8.849551703986176e-06, "loss": 24.8509, "step": 2795 }, { "epoch": 0.25948955916473315, "grad_norm": 35.077239990234375, "learning_rate": 8.848572227136869e-06, "loss": 25.0622, "step": 2796 }, { "epoch": 0.25958236658932715, "grad_norm": 35.99748992919922, "learning_rate": 8.847592387761837e-06, "loss": 22.722, "step": 2797 }, { "epoch": 0.2596751740139211, "grad_norm": 34.752994537353516, "learning_rate": 8.84661218595338e-06, "loss": 23.8379, "step": 2798 }, { "epoch": 0.2597679814385151, "grad_norm": 37.701904296875, "learning_rate": 8.845631621803829e-06, "loss": 26.5318, "step": 2799 }, { "epoch": 0.25986078886310904, "grad_norm": 36.4660530090332, "learning_rate": 8.844650695405552e-06, "loss": 24.3013, "step": 2800 }, { "epoch": 0.25995359628770304, "grad_norm": 35.99263381958008, "learning_rate": 8.84366940685095e-06, "loss": 22.4795, "step": 2801 }, { "epoch": 0.260046403712297, "grad_norm": 38.60036087036133, "learning_rate": 8.842687756232454e-06, "loss": 26.5162, "step": 2802 }, { "epoch": 0.2601392111368909, "grad_norm": 44.54663848876953, "learning_rate": 8.841705743642536e-06, "loss": 25.3842, "step": 2803 }, { "epoch": 0.2602320185614849, "grad_norm": 37.56055450439453, "learning_rate": 8.840723369173702e-06, "loss": 24.3135, "step": 2804 }, { "epoch": 0.26032482598607887, "grad_norm": 41.9771842956543, "learning_rate": 8.839740632918484e-06, "loss": 24.8205, "step": 2805 }, { "epoch": 0.26041763341067287, "grad_norm": 36.326080322265625, "learning_rate": 8.838757534969455e-06, "loss": 24.9134, "step": 2806 }, { "epoch": 0.2605104408352668, "grad_norm": 37.1086311340332, "learning_rate": 8.837774075419222e-06, "loss": 23.421, "step": 2807 }, { "epoch": 0.2606032482598608, "grad_norm": 34.603858947753906, "learning_rate": 8.83679025436042e-06, "loss": 23.6304, "step": 2808 }, { "epoch": 0.26069605568445475, "grad_norm": 34.79322814941406, "learning_rate": 8.835806071885729e-06, "loss": 24.149, "step": 2809 }, { "epoch": 0.2607888631090487, "grad_norm": 35.79761505126953, "learning_rate": 8.83482152808785e-06, "loss": 24.443, "step": 2810 }, { "epoch": 0.2608816705336427, "grad_norm": 35.87174606323242, "learning_rate": 8.83383662305953e-06, "loss": 24.5917, "step": 2811 }, { "epoch": 0.26097447795823664, "grad_norm": 36.727691650390625, "learning_rate": 8.832851356893542e-06, "loss": 23.8556, "step": 2812 }, { "epoch": 0.26106728538283064, "grad_norm": 33.645023345947266, "learning_rate": 8.831865729682694e-06, "loss": 22.0502, "step": 2813 }, { "epoch": 0.2611600928074246, "grad_norm": 38.38981246948242, "learning_rate": 8.830879741519831e-06, "loss": 25.4734, "step": 2814 }, { "epoch": 0.2612529002320186, "grad_norm": 42.081390380859375, "learning_rate": 8.829893392497833e-06, "loss": 24.7423, "step": 2815 }, { "epoch": 0.2613457076566125, "grad_norm": 37.187618255615234, "learning_rate": 8.828906682709608e-06, "loss": 23.9672, "step": 2816 }, { "epoch": 0.2614385150812065, "grad_norm": 40.92092514038086, "learning_rate": 8.827919612248102e-06, "loss": 23.8685, "step": 2817 }, { "epoch": 0.26153132250580047, "grad_norm": 38.543827056884766, "learning_rate": 8.826932181206296e-06, "loss": 24.2121, "step": 2818 }, { "epoch": 0.2616241299303944, "grad_norm": 37.57852554321289, "learning_rate": 8.825944389677204e-06, "loss": 26.284, "step": 2819 }, { "epoch": 0.2617169373549884, "grad_norm": 36.8482666015625, "learning_rate": 8.824956237753872e-06, "loss": 24.9639, "step": 2820 }, { "epoch": 0.26180974477958235, "grad_norm": 36.59611129760742, "learning_rate": 8.823967725529381e-06, "loss": 24.4567, "step": 2821 }, { "epoch": 0.26190255220417635, "grad_norm": 35.130001068115234, "learning_rate": 8.82297885309685e-06, "loss": 24.2643, "step": 2822 }, { "epoch": 0.2619953596287703, "grad_norm": 35.294639587402344, "learning_rate": 8.821989620549422e-06, "loss": 23.0013, "step": 2823 }, { "epoch": 0.2620881670533643, "grad_norm": 34.838741302490234, "learning_rate": 8.821000027980284e-06, "loss": 22.5933, "step": 2824 }, { "epoch": 0.26218097447795824, "grad_norm": 39.407798767089844, "learning_rate": 8.820010075482654e-06, "loss": 24.8063, "step": 2825 }, { "epoch": 0.2622737819025522, "grad_norm": 37.77704620361328, "learning_rate": 8.81901976314978e-06, "loss": 24.2025, "step": 2826 }, { "epoch": 0.2623665893271462, "grad_norm": 36.70458984375, "learning_rate": 8.81802909107495e-06, "loss": 26.2813, "step": 2827 }, { "epoch": 0.2624593967517401, "grad_norm": 38.240604400634766, "learning_rate": 8.817038059351481e-06, "loss": 24.1456, "step": 2828 }, { "epoch": 0.2625522041763341, "grad_norm": 34.65602111816406, "learning_rate": 8.816046668072727e-06, "loss": 25.8785, "step": 2829 }, { "epoch": 0.26264501160092807, "grad_norm": 37.4174919128418, "learning_rate": 8.815054917332071e-06, "loss": 24.0203, "step": 2830 }, { "epoch": 0.26273781902552207, "grad_norm": 37.713706970214844, "learning_rate": 8.814062807222938e-06, "loss": 23.3463, "step": 2831 }, { "epoch": 0.262830626450116, "grad_norm": 37.872711181640625, "learning_rate": 8.813070337838781e-06, "loss": 24.8772, "step": 2832 }, { "epoch": 0.26292343387470996, "grad_norm": 35.43479537963867, "learning_rate": 8.812077509273085e-06, "loss": 22.4027, "step": 2833 }, { "epoch": 0.26301624129930395, "grad_norm": 36.65686798095703, "learning_rate": 8.811084321619376e-06, "loss": 25.2368, "step": 2834 }, { "epoch": 0.2631090487238979, "grad_norm": 48.06331253051758, "learning_rate": 8.810090774971207e-06, "loss": 25.5045, "step": 2835 }, { "epoch": 0.2632018561484919, "grad_norm": 40.40300369262695, "learning_rate": 8.80909686942217e-06, "loss": 22.8466, "step": 2836 }, { "epoch": 0.26329466357308584, "grad_norm": 35.8880615234375, "learning_rate": 8.808102605065887e-06, "loss": 23.919, "step": 2837 }, { "epoch": 0.26338747099767984, "grad_norm": 35.91891860961914, "learning_rate": 8.807107981996014e-06, "loss": 23.082, "step": 2838 }, { "epoch": 0.2634802784222738, "grad_norm": 37.781005859375, "learning_rate": 8.806113000306242e-06, "loss": 25.1716, "step": 2839 }, { "epoch": 0.2635730858468677, "grad_norm": 39.743446350097656, "learning_rate": 8.805117660090299e-06, "loss": 25.0352, "step": 2840 }, { "epoch": 0.2636658932714617, "grad_norm": 37.27666091918945, "learning_rate": 8.80412196144194e-06, "loss": 24.5079, "step": 2841 }, { "epoch": 0.26375870069605567, "grad_norm": 35.52723693847656, "learning_rate": 8.803125904454961e-06, "loss": 26.5244, "step": 2842 }, { "epoch": 0.26385150812064967, "grad_norm": 34.58030700683594, "learning_rate": 8.802129489223184e-06, "loss": 23.5334, "step": 2843 }, { "epoch": 0.2639443155452436, "grad_norm": 35.950199127197266, "learning_rate": 8.80113271584047e-06, "loss": 25.0031, "step": 2844 }, { "epoch": 0.2640371229698376, "grad_norm": 36.912986755371094, "learning_rate": 8.800135584400714e-06, "loss": 22.6918, "step": 2845 }, { "epoch": 0.26412993039443156, "grad_norm": 37.61330032348633, "learning_rate": 8.799138094997841e-06, "loss": 23.518, "step": 2846 }, { "epoch": 0.2642227378190255, "grad_norm": 36.39005661010742, "learning_rate": 8.798140247725815e-06, "loss": 25.7465, "step": 2847 }, { "epoch": 0.2643155452436195, "grad_norm": 35.4891357421875, "learning_rate": 8.797142042678628e-06, "loss": 21.4417, "step": 2848 }, { "epoch": 0.26440835266821344, "grad_norm": 36.594268798828125, "learning_rate": 8.79614347995031e-06, "loss": 23.9801, "step": 2849 }, { "epoch": 0.26450116009280744, "grad_norm": 39.43661880493164, "learning_rate": 8.795144559634921e-06, "loss": 25.6344, "step": 2850 }, { "epoch": 0.2645939675174014, "grad_norm": 37.99811553955078, "learning_rate": 8.794145281826558e-06, "loss": 23.5591, "step": 2851 }, { "epoch": 0.2646867749419954, "grad_norm": 35.28712844848633, "learning_rate": 8.793145646619352e-06, "loss": 24.814, "step": 2852 }, { "epoch": 0.26477958236658933, "grad_norm": 33.21434783935547, "learning_rate": 8.792145654107465e-06, "loss": 23.5897, "step": 2853 }, { "epoch": 0.26487238979118327, "grad_norm": 43.79789352416992, "learning_rate": 8.791145304385092e-06, "loss": 24.7938, "step": 2854 }, { "epoch": 0.26496519721577727, "grad_norm": 33.2159423828125, "learning_rate": 8.790144597546463e-06, "loss": 23.9293, "step": 2855 }, { "epoch": 0.2650580046403712, "grad_norm": 38.12098693847656, "learning_rate": 8.789143533685847e-06, "loss": 25.5775, "step": 2856 }, { "epoch": 0.2651508120649652, "grad_norm": 35.84138488769531, "learning_rate": 8.788142112897538e-06, "loss": 23.612, "step": 2857 }, { "epoch": 0.26524361948955916, "grad_norm": 36.09181213378906, "learning_rate": 8.787140335275868e-06, "loss": 22.7731, "step": 2858 }, { "epoch": 0.26533642691415316, "grad_norm": 36.09479522705078, "learning_rate": 8.7861382009152e-06, "loss": 25.4895, "step": 2859 }, { "epoch": 0.2654292343387471, "grad_norm": 35.798500061035156, "learning_rate": 8.785135709909935e-06, "loss": 22.0381, "step": 2860 }, { "epoch": 0.26552204176334104, "grad_norm": 36.5163688659668, "learning_rate": 8.784132862354505e-06, "loss": 22.4011, "step": 2861 }, { "epoch": 0.26561484918793504, "grad_norm": 37.70680618286133, "learning_rate": 8.783129658343375e-06, "loss": 25.4989, "step": 2862 }, { "epoch": 0.265707656612529, "grad_norm": 37.130226135253906, "learning_rate": 8.782126097971042e-06, "loss": 24.1416, "step": 2863 }, { "epoch": 0.265800464037123, "grad_norm": 36.01827621459961, "learning_rate": 8.781122181332044e-06, "loss": 25.0294, "step": 2864 }, { "epoch": 0.26589327146171693, "grad_norm": 33.6453971862793, "learning_rate": 8.780117908520942e-06, "loss": 22.7411, "step": 2865 }, { "epoch": 0.26598607888631093, "grad_norm": 34.85501480102539, "learning_rate": 8.779113279632341e-06, "loss": 22.1198, "step": 2866 }, { "epoch": 0.26607888631090487, "grad_norm": 36.961265563964844, "learning_rate": 8.778108294760869e-06, "loss": 24.2822, "step": 2867 }, { "epoch": 0.2661716937354988, "grad_norm": 34.71556854248047, "learning_rate": 8.777102954001199e-06, "loss": 24.1122, "step": 2868 }, { "epoch": 0.2662645011600928, "grad_norm": 43.78834915161133, "learning_rate": 8.776097257448028e-06, "loss": 25.947, "step": 2869 }, { "epoch": 0.26635730858468676, "grad_norm": 34.16387939453125, "learning_rate": 8.775091205196088e-06, "loss": 24.0596, "step": 2870 }, { "epoch": 0.26645011600928076, "grad_norm": 36.07855987548828, "learning_rate": 8.774084797340153e-06, "loss": 22.967, "step": 2871 }, { "epoch": 0.2665429234338747, "grad_norm": 37.105628967285156, "learning_rate": 8.773078033975017e-06, "loss": 24.0141, "step": 2872 }, { "epoch": 0.2666357308584687, "grad_norm": 36.562740325927734, "learning_rate": 8.77207091519552e-06, "loss": 22.8653, "step": 2873 }, { "epoch": 0.26672853828306264, "grad_norm": 37.86820983886719, "learning_rate": 8.771063441096527e-06, "loss": 24.2372, "step": 2874 }, { "epoch": 0.2668213457076566, "grad_norm": 34.81201171875, "learning_rate": 8.770055611772939e-06, "loss": 23.9687, "step": 2875 }, { "epoch": 0.2669141531322506, "grad_norm": 37.27327346801758, "learning_rate": 8.769047427319693e-06, "loss": 25.0025, "step": 2876 }, { "epoch": 0.26700696055684453, "grad_norm": 34.98536682128906, "learning_rate": 8.768038887831758e-06, "loss": 24.1873, "step": 2877 }, { "epoch": 0.26709976798143853, "grad_norm": 35.563682556152344, "learning_rate": 8.767029993404132e-06, "loss": 23.444, "step": 2878 }, { "epoch": 0.2671925754060325, "grad_norm": 39.09500503540039, "learning_rate": 8.766020744131856e-06, "loss": 23.8016, "step": 2879 }, { "epoch": 0.2672853828306265, "grad_norm": 35.59029006958008, "learning_rate": 8.765011140109993e-06, "loss": 22.4101, "step": 2880 }, { "epoch": 0.2673781902552204, "grad_norm": 37.62373733520508, "learning_rate": 8.76400118143365e-06, "loss": 24.1697, "step": 2881 }, { "epoch": 0.26747099767981436, "grad_norm": 34.17100524902344, "learning_rate": 8.762990868197958e-06, "loss": 21.816, "step": 2882 }, { "epoch": 0.26756380510440836, "grad_norm": 34.42733383178711, "learning_rate": 8.761980200498088e-06, "loss": 23.9616, "step": 2883 }, { "epoch": 0.2676566125290023, "grad_norm": 33.34091567993164, "learning_rate": 8.760969178429244e-06, "loss": 22.3612, "step": 2884 }, { "epoch": 0.2677494199535963, "grad_norm": 38.427371978759766, "learning_rate": 8.759957802086658e-06, "loss": 23.846, "step": 2885 }, { "epoch": 0.26784222737819025, "grad_norm": 34.496456146240234, "learning_rate": 8.758946071565605e-06, "loss": 23.2245, "step": 2886 }, { "epoch": 0.26793503480278424, "grad_norm": 37.05408477783203, "learning_rate": 8.75793398696138e-06, "loss": 24.1621, "step": 2887 }, { "epoch": 0.2680278422273782, "grad_norm": 37.479148864746094, "learning_rate": 8.756921548369323e-06, "loss": 24.3023, "step": 2888 }, { "epoch": 0.26812064965197213, "grad_norm": 35.890663146972656, "learning_rate": 8.755908755884805e-06, "loss": 22.8138, "step": 2889 }, { "epoch": 0.26821345707656613, "grad_norm": 39.42181396484375, "learning_rate": 8.754895609603225e-06, "loss": 24.4256, "step": 2890 }, { "epoch": 0.2683062645011601, "grad_norm": 39.05592346191406, "learning_rate": 8.75388210962002e-06, "loss": 22.4411, "step": 2891 }, { "epoch": 0.2683990719257541, "grad_norm": 39.73920440673828, "learning_rate": 8.752868256030658e-06, "loss": 24.3786, "step": 2892 }, { "epoch": 0.268491879350348, "grad_norm": 39.389564514160156, "learning_rate": 8.751854048930643e-06, "loss": 26.7039, "step": 2893 }, { "epoch": 0.268584686774942, "grad_norm": 36.20606231689453, "learning_rate": 8.75083948841551e-06, "loss": 24.1688, "step": 2894 }, { "epoch": 0.26867749419953596, "grad_norm": 34.2805290222168, "learning_rate": 8.749824574580827e-06, "loss": 24.4235, "step": 2895 }, { "epoch": 0.2687703016241299, "grad_norm": 33.957008361816406, "learning_rate": 8.748809307522201e-06, "loss": 22.799, "step": 2896 }, { "epoch": 0.2688631090487239, "grad_norm": 38.43251037597656, "learning_rate": 8.747793687335262e-06, "loss": 23.5882, "step": 2897 }, { "epoch": 0.26895591647331785, "grad_norm": 38.32306671142578, "learning_rate": 8.746777714115681e-06, "loss": 24.6404, "step": 2898 }, { "epoch": 0.26904872389791185, "grad_norm": 40.46121597290039, "learning_rate": 8.745761387959159e-06, "loss": 24.4097, "step": 2899 }, { "epoch": 0.2691415313225058, "grad_norm": 35.68126678466797, "learning_rate": 8.744744708961435e-06, "loss": 24.4677, "step": 2900 }, { "epoch": 0.2692343387470998, "grad_norm": 34.61910629272461, "learning_rate": 8.743727677218274e-06, "loss": 24.4033, "step": 2901 }, { "epoch": 0.26932714617169373, "grad_norm": 38.73149108886719, "learning_rate": 8.742710292825477e-06, "loss": 23.1007, "step": 2902 }, { "epoch": 0.2694199535962877, "grad_norm": 35.41139602661133, "learning_rate": 8.741692555878883e-06, "loss": 23.4975, "step": 2903 }, { "epoch": 0.2695127610208817, "grad_norm": 45.076534271240234, "learning_rate": 8.740674466474357e-06, "loss": 26.7435, "step": 2904 }, { "epoch": 0.2696055684454756, "grad_norm": 36.43702697753906, "learning_rate": 8.739656024707802e-06, "loss": 23.4589, "step": 2905 }, { "epoch": 0.2696983758700696, "grad_norm": 36.931549072265625, "learning_rate": 8.738637230675152e-06, "loss": 23.8751, "step": 2906 }, { "epoch": 0.26979118329466356, "grad_norm": 38.58561706542969, "learning_rate": 8.737618084472375e-06, "loss": 24.5149, "step": 2907 }, { "epoch": 0.26988399071925756, "grad_norm": 32.98828887939453, "learning_rate": 8.736598586195472e-06, "loss": 23.0726, "step": 2908 }, { "epoch": 0.2699767981438515, "grad_norm": 41.035579681396484, "learning_rate": 8.735578735940475e-06, "loss": 27.8142, "step": 2909 }, { "epoch": 0.2700696055684455, "grad_norm": 38.210140228271484, "learning_rate": 8.734558533803456e-06, "loss": 23.22, "step": 2910 }, { "epoch": 0.27016241299303945, "grad_norm": 37.59864044189453, "learning_rate": 8.73353797988051e-06, "loss": 26.9359, "step": 2911 }, { "epoch": 0.2702552204176334, "grad_norm": 35.314598083496094, "learning_rate": 8.732517074267777e-06, "loss": 25.4777, "step": 2912 }, { "epoch": 0.2703480278422274, "grad_norm": 38.01634216308594, "learning_rate": 8.731495817061419e-06, "loss": 21.914, "step": 2913 }, { "epoch": 0.27044083526682133, "grad_norm": 38.411354064941406, "learning_rate": 8.730474208357636e-06, "loss": 24.2002, "step": 2914 }, { "epoch": 0.27053364269141533, "grad_norm": 35.47084045410156, "learning_rate": 8.729452248252662e-06, "loss": 23.8468, "step": 2915 }, { "epoch": 0.2706264501160093, "grad_norm": 37.21380615234375, "learning_rate": 8.728429936842762e-06, "loss": 25.889, "step": 2916 }, { "epoch": 0.2707192575406033, "grad_norm": 37.799861907958984, "learning_rate": 8.727407274224238e-06, "loss": 22.2814, "step": 2917 }, { "epoch": 0.2708120649651972, "grad_norm": 35.5237922668457, "learning_rate": 8.726384260493419e-06, "loss": 25.9352, "step": 2918 }, { "epoch": 0.27090487238979116, "grad_norm": 36.12470245361328, "learning_rate": 8.725360895746671e-06, "loss": 22.2466, "step": 2919 }, { "epoch": 0.27099767981438516, "grad_norm": 57.123050689697266, "learning_rate": 8.724337180080394e-06, "loss": 23.4757, "step": 2920 }, { "epoch": 0.2710904872389791, "grad_norm": 39.65033721923828, "learning_rate": 8.723313113591019e-06, "loss": 24.31, "step": 2921 }, { "epoch": 0.2711832946635731, "grad_norm": 341.45770263671875, "learning_rate": 8.722288696375009e-06, "loss": 25.3671, "step": 2922 }, { "epoch": 0.27127610208816705, "grad_norm": 40.52033996582031, "learning_rate": 8.721263928528863e-06, "loss": 22.7347, "step": 2923 }, { "epoch": 0.27136890951276105, "grad_norm": 38.901737213134766, "learning_rate": 8.720238810149108e-06, "loss": 22.86, "step": 2924 }, { "epoch": 0.271461716937355, "grad_norm": 36.233367919921875, "learning_rate": 8.719213341332313e-06, "loss": 24.4765, "step": 2925 }, { "epoch": 0.27155452436194893, "grad_norm": 42.140647888183594, "learning_rate": 8.718187522175072e-06, "loss": 25.8623, "step": 2926 }, { "epoch": 0.27164733178654293, "grad_norm": 38.52034378051758, "learning_rate": 8.717161352774013e-06, "loss": 23.9221, "step": 2927 }, { "epoch": 0.2717401392111369, "grad_norm": 38.400550842285156, "learning_rate": 8.716134833225803e-06, "loss": 24.2082, "step": 2928 }, { "epoch": 0.2718329466357309, "grad_norm": 44.75780487060547, "learning_rate": 8.71510796362713e-06, "loss": 23.5875, "step": 2929 }, { "epoch": 0.2719257540603248, "grad_norm": 38.67011642456055, "learning_rate": 8.714080744074731e-06, "loss": 22.3257, "step": 2930 }, { "epoch": 0.2720185614849188, "grad_norm": 35.153045654296875, "learning_rate": 8.71305317466536e-06, "loss": 23.6388, "step": 2931 }, { "epoch": 0.27211136890951276, "grad_norm": 34.049888610839844, "learning_rate": 8.712025255495817e-06, "loss": 24.4973, "step": 2932 }, { "epoch": 0.2722041763341067, "grad_norm": 39.018898010253906, "learning_rate": 8.710996986662928e-06, "loss": 24.4884, "step": 2933 }, { "epoch": 0.2722969837587007, "grad_norm": 42.57215881347656, "learning_rate": 8.709968368263553e-06, "loss": 25.7937, "step": 2934 }, { "epoch": 0.27238979118329465, "grad_norm": 45.20417785644531, "learning_rate": 8.708939400394584e-06, "loss": 24.5648, "step": 2935 }, { "epoch": 0.27248259860788865, "grad_norm": 35.72966003417969, "learning_rate": 8.707910083152949e-06, "loss": 23.9023, "step": 2936 }, { "epoch": 0.2725754060324826, "grad_norm": 38.13887405395508, "learning_rate": 8.706880416635603e-06, "loss": 22.734, "step": 2937 }, { "epoch": 0.2726682134570766, "grad_norm": 34.8449821472168, "learning_rate": 8.705850400939545e-06, "loss": 23.1272, "step": 2938 }, { "epoch": 0.27276102088167054, "grad_norm": 34.56462097167969, "learning_rate": 8.704820036161795e-06, "loss": 22.2735, "step": 2939 }, { "epoch": 0.2728538283062645, "grad_norm": 36.8552131652832, "learning_rate": 8.70378932239941e-06, "loss": 25.9737, "step": 2940 }, { "epoch": 0.2729466357308585, "grad_norm": 34.91926193237305, "learning_rate": 8.702758259749481e-06, "loss": 24.8725, "step": 2941 }, { "epoch": 0.2730394431554524, "grad_norm": 35.335025787353516, "learning_rate": 8.701726848309137e-06, "loss": 23.0751, "step": 2942 }, { "epoch": 0.2731322505800464, "grad_norm": 36.9617919921875, "learning_rate": 8.700695088175528e-06, "loss": 24.62, "step": 2943 }, { "epoch": 0.27322505800464036, "grad_norm": 35.763954162597656, "learning_rate": 8.699662979445846e-06, "loss": 24.0711, "step": 2944 }, { "epoch": 0.27331786542923436, "grad_norm": 44.10129165649414, "learning_rate": 8.69863052221731e-06, "loss": 25.788, "step": 2945 }, { "epoch": 0.2734106728538283, "grad_norm": 36.96754455566406, "learning_rate": 8.697597716587181e-06, "loss": 23.3868, "step": 2946 }, { "epoch": 0.27350348027842225, "grad_norm": 40.406917572021484, "learning_rate": 8.69656456265274e-06, "loss": 25.7367, "step": 2947 }, { "epoch": 0.27359628770301625, "grad_norm": 33.8978157043457, "learning_rate": 8.69553106051131e-06, "loss": 22.4299, "step": 2948 }, { "epoch": 0.2736890951276102, "grad_norm": 37.27937698364258, "learning_rate": 8.694497210260247e-06, "loss": 24.3325, "step": 2949 }, { "epoch": 0.2737819025522042, "grad_norm": 35.92909240722656, "learning_rate": 8.693463011996934e-06, "loss": 24.1036, "step": 2950 }, { "epoch": 0.27387470997679814, "grad_norm": 36.6525764465332, "learning_rate": 8.69242846581879e-06, "loss": 23.0966, "step": 2951 }, { "epoch": 0.27396751740139214, "grad_norm": 35.00775909423828, "learning_rate": 8.691393571823266e-06, "loss": 24.34, "step": 2952 }, { "epoch": 0.2740603248259861, "grad_norm": 37.175987243652344, "learning_rate": 8.69035833010785e-06, "loss": 23.0384, "step": 2953 }, { "epoch": 0.27415313225058, "grad_norm": 38.10542678833008, "learning_rate": 8.689322740770054e-06, "loss": 23.9581, "step": 2954 }, { "epoch": 0.274245939675174, "grad_norm": 37.48463821411133, "learning_rate": 8.688286803907431e-06, "loss": 24.6289, "step": 2955 }, { "epoch": 0.27433874709976797, "grad_norm": 38.217071533203125, "learning_rate": 8.687250519617565e-06, "loss": 25.1346, "step": 2956 }, { "epoch": 0.27443155452436196, "grad_norm": 36.315887451171875, "learning_rate": 8.686213887998068e-06, "loss": 24.2634, "step": 2957 }, { "epoch": 0.2745243619489559, "grad_norm": 38.018775939941406, "learning_rate": 8.68517690914659e-06, "loss": 23.7544, "step": 2958 }, { "epoch": 0.2746171693735499, "grad_norm": 36.93827819824219, "learning_rate": 8.684139583160812e-06, "loss": 24.2904, "step": 2959 }, { "epoch": 0.27470997679814385, "grad_norm": 34.438297271728516, "learning_rate": 8.683101910138446e-06, "loss": 23.4376, "step": 2960 }, { "epoch": 0.2748027842227378, "grad_norm": 37.801334381103516, "learning_rate": 8.68206389017724e-06, "loss": 23.0746, "step": 2961 }, { "epoch": 0.2748955916473318, "grad_norm": 38.09806823730469, "learning_rate": 8.68102552337497e-06, "loss": 24.0603, "step": 2962 }, { "epoch": 0.27498839907192574, "grad_norm": 40.06195068359375, "learning_rate": 8.679986809829451e-06, "loss": 26.0449, "step": 2963 }, { "epoch": 0.27508120649651974, "grad_norm": 37.90944290161133, "learning_rate": 8.678947749638525e-06, "loss": 24.6213, "step": 2964 }, { "epoch": 0.2751740139211137, "grad_norm": 57.10788345336914, "learning_rate": 8.67790834290007e-06, "loss": 23.3923, "step": 2965 }, { "epoch": 0.2752668213457077, "grad_norm": 38.91672897338867, "learning_rate": 8.676868589711994e-06, "loss": 25.2277, "step": 2966 }, { "epoch": 0.2753596287703016, "grad_norm": 41.558448791503906, "learning_rate": 8.67582849017224e-06, "loss": 22.2774, "step": 2967 }, { "epoch": 0.27545243619489557, "grad_norm": 34.189456939697266, "learning_rate": 8.674788044378783e-06, "loss": 23.6973, "step": 2968 }, { "epoch": 0.27554524361948957, "grad_norm": 39.06397247314453, "learning_rate": 8.67374725242963e-06, "loss": 24.1785, "step": 2969 }, { "epoch": 0.2756380510440835, "grad_norm": 48.047908782958984, "learning_rate": 8.67270611442282e-06, "loss": 22.722, "step": 2970 }, { "epoch": 0.2757308584686775, "grad_norm": 37.78369903564453, "learning_rate": 8.671664630456428e-06, "loss": 23.0594, "step": 2971 }, { "epoch": 0.27582366589327145, "grad_norm": 37.39549255371094, "learning_rate": 8.670622800628557e-06, "loss": 21.8334, "step": 2972 }, { "epoch": 0.27591647331786545, "grad_norm": 65.07369995117188, "learning_rate": 8.669580625037344e-06, "loss": 22.715, "step": 2973 }, { "epoch": 0.2760092807424594, "grad_norm": 38.848609924316406, "learning_rate": 8.668538103780964e-06, "loss": 26.8483, "step": 2974 }, { "epoch": 0.27610208816705334, "grad_norm": 35.91777420043945, "learning_rate": 8.667495236957614e-06, "loss": 24.3558, "step": 2975 }, { "epoch": 0.27619489559164734, "grad_norm": 39.24696731567383, "learning_rate": 8.666452024665533e-06, "loss": 24.0257, "step": 2976 }, { "epoch": 0.2762877030162413, "grad_norm": 37.924903869628906, "learning_rate": 8.665408467002986e-06, "loss": 24.9813, "step": 2977 }, { "epoch": 0.2763805104408353, "grad_norm": 37.23207473754883, "learning_rate": 8.664364564068277e-06, "loss": 23.9362, "step": 2978 }, { "epoch": 0.2764733178654292, "grad_norm": 36.97885513305664, "learning_rate": 8.663320315959737e-06, "loss": 23.3698, "step": 2979 }, { "epoch": 0.2765661252900232, "grad_norm": 40.44609069824219, "learning_rate": 8.662275722775733e-06, "loss": 24.2462, "step": 2980 }, { "epoch": 0.27665893271461717, "grad_norm": 38.41627502441406, "learning_rate": 8.66123078461466e-06, "loss": 24.9648, "step": 2981 }, { "epoch": 0.2767517401392111, "grad_norm": 36.740169525146484, "learning_rate": 8.660185501574952e-06, "loss": 24.7446, "step": 2982 }, { "epoch": 0.2768445475638051, "grad_norm": 37.28315734863281, "learning_rate": 8.65913987375507e-06, "loss": 23.5699, "step": 2983 }, { "epoch": 0.27693735498839905, "grad_norm": 42.59230422973633, "learning_rate": 8.658093901253508e-06, "loss": 26.0173, "step": 2984 }, { "epoch": 0.27703016241299305, "grad_norm": 35.9691047668457, "learning_rate": 8.657047584168799e-06, "loss": 23.2532, "step": 2985 }, { "epoch": 0.277122969837587, "grad_norm": 35.924198150634766, "learning_rate": 8.656000922599499e-06, "loss": 24.3883, "step": 2986 }, { "epoch": 0.277215777262181, "grad_norm": 46.44746398925781, "learning_rate": 8.654953916644203e-06, "loss": 23.3961, "step": 2987 }, { "epoch": 0.27730858468677494, "grad_norm": 80.15156555175781, "learning_rate": 8.653906566401533e-06, "loss": 24.2285, "step": 2988 }, { "epoch": 0.2774013921113689, "grad_norm": 41.55921173095703, "learning_rate": 8.652858871970151e-06, "loss": 26.9488, "step": 2989 }, { "epoch": 0.2774941995359629, "grad_norm": 36.3670654296875, "learning_rate": 8.651810833448744e-06, "loss": 25.3978, "step": 2990 }, { "epoch": 0.2775870069605568, "grad_norm": 37.35905456542969, "learning_rate": 8.650762450936038e-06, "loss": 22.0119, "step": 2991 }, { "epoch": 0.2776798143851508, "grad_norm": 38.37041473388672, "learning_rate": 8.649713724530784e-06, "loss": 22.5725, "step": 2992 }, { "epoch": 0.27777262180974477, "grad_norm": 37.339210510253906, "learning_rate": 8.648664654331773e-06, "loss": 26.9817, "step": 2993 }, { "epoch": 0.27786542923433877, "grad_norm": 37.064918518066406, "learning_rate": 8.647615240437821e-06, "loss": 23.9951, "step": 2994 }, { "epoch": 0.2779582366589327, "grad_norm": 38.91670227050781, "learning_rate": 8.646565482947784e-06, "loss": 22.2261, "step": 2995 }, { "epoch": 0.27805104408352666, "grad_norm": 41.76258087158203, "learning_rate": 8.645515381960542e-06, "loss": 26.1486, "step": 2996 }, { "epoch": 0.27814385150812065, "grad_norm": 36.79225158691406, "learning_rate": 8.644464937575016e-06, "loss": 24.5749, "step": 2997 }, { "epoch": 0.2782366589327146, "grad_norm": 33.70272445678711, "learning_rate": 8.643414149890155e-06, "loss": 23.136, "step": 2998 }, { "epoch": 0.2783294663573086, "grad_norm": 36.55215835571289, "learning_rate": 8.642363019004938e-06, "loss": 22.7405, "step": 2999 }, { "epoch": 0.27842227378190254, "grad_norm": 45.73494338989258, "learning_rate": 8.64131154501838e-06, "loss": 27.0835, "step": 3000 }, { "epoch": 0.27851508120649654, "grad_norm": 32.91645812988281, "learning_rate": 8.640259728029528e-06, "loss": 23.0171, "step": 3001 }, { "epoch": 0.2786078886310905, "grad_norm": 37.22690200805664, "learning_rate": 8.639207568137457e-06, "loss": 22.9671, "step": 3002 }, { "epoch": 0.2787006960556844, "grad_norm": 37.74056625366211, "learning_rate": 8.638155065441283e-06, "loss": 24.4485, "step": 3003 }, { "epoch": 0.2787935034802784, "grad_norm": 40.925010681152344, "learning_rate": 8.637102220040145e-06, "loss": 25.4151, "step": 3004 }, { "epoch": 0.27888631090487237, "grad_norm": 38.33451461791992, "learning_rate": 8.636049032033221e-06, "loss": 24.2077, "step": 3005 }, { "epoch": 0.27897911832946637, "grad_norm": 37.8538932800293, "learning_rate": 8.634995501519718e-06, "loss": 23.2609, "step": 3006 }, { "epoch": 0.2790719257540603, "grad_norm": 38.09479904174805, "learning_rate": 8.633941628598874e-06, "loss": 26.0283, "step": 3007 }, { "epoch": 0.2791647331786543, "grad_norm": 34.505165100097656, "learning_rate": 8.632887413369964e-06, "loss": 23.5658, "step": 3008 }, { "epoch": 0.27925754060324826, "grad_norm": 36.52965545654297, "learning_rate": 8.631832855932289e-06, "loss": 24.5963, "step": 3009 }, { "epoch": 0.27935034802784225, "grad_norm": 37.294273376464844, "learning_rate": 8.63077795638519e-06, "loss": 24.1885, "step": 3010 }, { "epoch": 0.2794431554524362, "grad_norm": 37.0649299621582, "learning_rate": 8.629722714828031e-06, "loss": 23.752, "step": 3011 }, { "epoch": 0.27953596287703014, "grad_norm": 36.60750961303711, "learning_rate": 8.628667131360218e-06, "loss": 23.6937, "step": 3012 }, { "epoch": 0.27962877030162414, "grad_norm": 34.84831619262695, "learning_rate": 8.627611206081182e-06, "loss": 25.8281, "step": 3013 }, { "epoch": 0.2797215777262181, "grad_norm": 37.72500991821289, "learning_rate": 8.626554939090386e-06, "loss": 24.717, "step": 3014 }, { "epoch": 0.2798143851508121, "grad_norm": 37.10321807861328, "learning_rate": 8.625498330487331e-06, "loss": 25.6718, "step": 3015 }, { "epoch": 0.279907192575406, "grad_norm": 38.43797302246094, "learning_rate": 8.624441380371545e-06, "loss": 23.9523, "step": 3016 }, { "epoch": 0.28, "grad_norm": 38.46922302246094, "learning_rate": 8.623384088842593e-06, "loss": 23.4608, "step": 3017 }, { "epoch": 0.28009280742459397, "grad_norm": 34.60902404785156, "learning_rate": 8.622326456000065e-06, "loss": 23.3106, "step": 3018 }, { "epoch": 0.2801856148491879, "grad_norm": 36.979095458984375, "learning_rate": 8.62126848194359e-06, "loss": 24.498, "step": 3019 }, { "epoch": 0.2802784222737819, "grad_norm": 37.364986419677734, "learning_rate": 8.620210166772825e-06, "loss": 24.8371, "step": 3020 }, { "epoch": 0.28037122969837586, "grad_norm": 48.44660568237305, "learning_rate": 8.619151510587462e-06, "loss": 24.955, "step": 3021 }, { "epoch": 0.28046403712296986, "grad_norm": 36.74538040161133, "learning_rate": 8.618092513487223e-06, "loss": 24.7861, "step": 3022 }, { "epoch": 0.2805568445475638, "grad_norm": 35.41252136230469, "learning_rate": 8.617033175571864e-06, "loss": 24.3655, "step": 3023 }, { "epoch": 0.2806496519721578, "grad_norm": 40.10727310180664, "learning_rate": 8.61597349694117e-06, "loss": 21.9591, "step": 3024 }, { "epoch": 0.28074245939675174, "grad_norm": 50.78147506713867, "learning_rate": 8.614913477694961e-06, "loss": 22.5963, "step": 3025 }, { "epoch": 0.2808352668213457, "grad_norm": 38.969173431396484, "learning_rate": 8.613853117933089e-06, "loss": 25.3165, "step": 3026 }, { "epoch": 0.2809280742459397, "grad_norm": 36.37342834472656, "learning_rate": 8.612792417755435e-06, "loss": 24.6275, "step": 3027 }, { "epoch": 0.28102088167053363, "grad_norm": 38.2716064453125, "learning_rate": 8.611731377261916e-06, "loss": 23.7421, "step": 3028 }, { "epoch": 0.28111368909512763, "grad_norm": 34.71923828125, "learning_rate": 8.61066999655248e-06, "loss": 23.6736, "step": 3029 }, { "epoch": 0.28120649651972157, "grad_norm": 38.547428131103516, "learning_rate": 8.609608275727102e-06, "loss": 24.8562, "step": 3030 }, { "epoch": 0.28129930394431557, "grad_norm": 36.220462799072266, "learning_rate": 8.608546214885797e-06, "loss": 25.6428, "step": 3031 }, { "epoch": 0.2813921113689095, "grad_norm": 38.25474548339844, "learning_rate": 8.607483814128611e-06, "loss": 23.8579, "step": 3032 }, { "epoch": 0.28148491879350346, "grad_norm": 45.89852523803711, "learning_rate": 8.606421073555615e-06, "loss": 23.8836, "step": 3033 }, { "epoch": 0.28157772621809746, "grad_norm": 38.22623062133789, "learning_rate": 8.605357993266919e-06, "loss": 24.5153, "step": 3034 }, { "epoch": 0.2816705336426914, "grad_norm": 40.21441650390625, "learning_rate": 8.604294573362658e-06, "loss": 23.4849, "step": 3035 }, { "epoch": 0.2817633410672854, "grad_norm": 56.014713287353516, "learning_rate": 8.60323081394301e-06, "loss": 24.2836, "step": 3036 }, { "epoch": 0.28185614849187934, "grad_norm": 39.57821273803711, "learning_rate": 8.602166715108176e-06, "loss": 22.9213, "step": 3037 }, { "epoch": 0.28194895591647334, "grad_norm": 43.84012985229492, "learning_rate": 8.601102276958388e-06, "loss": 22.8517, "step": 3038 }, { "epoch": 0.2820417633410673, "grad_norm": 38.247108459472656, "learning_rate": 8.600037499593919e-06, "loss": 24.3285, "step": 3039 }, { "epoch": 0.28213457076566123, "grad_norm": 37.2197265625, "learning_rate": 8.598972383115062e-06, "loss": 23.5094, "step": 3040 }, { "epoch": 0.28222737819025523, "grad_norm": 38.94715881347656, "learning_rate": 8.597906927622153e-06, "loss": 23.0416, "step": 3041 }, { "epoch": 0.2823201856148492, "grad_norm": 42.43755340576172, "learning_rate": 8.596841133215554e-06, "loss": 22.6101, "step": 3042 }, { "epoch": 0.28241299303944317, "grad_norm": 51.80771255493164, "learning_rate": 8.595774999995661e-06, "loss": 24.1505, "step": 3043 }, { "epoch": 0.2825058004640371, "grad_norm": 43.284664154052734, "learning_rate": 8.594708528062899e-06, "loss": 23.4553, "step": 3044 }, { "epoch": 0.2825986078886311, "grad_norm": 46.56657028198242, "learning_rate": 8.59364171751773e-06, "loss": 24.9353, "step": 3045 }, { "epoch": 0.28269141531322506, "grad_norm": 37.39963150024414, "learning_rate": 8.592574568460638e-06, "loss": 23.6667, "step": 3046 }, { "epoch": 0.282784222737819, "grad_norm": 37.222469329833984, "learning_rate": 8.591507080992153e-06, "loss": 25.0879, "step": 3047 }, { "epoch": 0.282877030162413, "grad_norm": 34.78763198852539, "learning_rate": 8.59043925521283e-06, "loss": 23.5717, "step": 3048 }, { "epoch": 0.28296983758700694, "grad_norm": 36.22536087036133, "learning_rate": 8.589371091223248e-06, "loss": 23.0082, "step": 3049 }, { "epoch": 0.28306264501160094, "grad_norm": 37.01848602294922, "learning_rate": 8.588302589124033e-06, "loss": 23.3532, "step": 3050 }, { "epoch": 0.2831554524361949, "grad_norm": 37.02360153198242, "learning_rate": 8.587233749015833e-06, "loss": 25.0972, "step": 3051 }, { "epoch": 0.2832482598607889, "grad_norm": 39.39779281616211, "learning_rate": 8.586164570999327e-06, "loss": 24.4228, "step": 3052 }, { "epoch": 0.28334106728538283, "grad_norm": 38.53446578979492, "learning_rate": 8.585095055175231e-06, "loss": 24.4752, "step": 3053 }, { "epoch": 0.2834338747099768, "grad_norm": 34.0933723449707, "learning_rate": 8.584025201644292e-06, "loss": 22.9209, "step": 3054 }, { "epoch": 0.2835266821345708, "grad_norm": 39.23346710205078, "learning_rate": 8.582955010507285e-06, "loss": 23.5771, "step": 3055 }, { "epoch": 0.2836194895591647, "grad_norm": 46.94480514526367, "learning_rate": 8.58188448186502e-06, "loss": 25.2417, "step": 3056 }, { "epoch": 0.2837122969837587, "grad_norm": 37.653236389160156, "learning_rate": 8.58081361581834e-06, "loss": 23.8124, "step": 3057 }, { "epoch": 0.28380510440835266, "grad_norm": 35.357086181640625, "learning_rate": 8.579742412468116e-06, "loss": 23.0462, "step": 3058 }, { "epoch": 0.28389791183294666, "grad_norm": 40.533958435058594, "learning_rate": 8.578670871915253e-06, "loss": 23.9176, "step": 3059 }, { "epoch": 0.2839907192575406, "grad_norm": 39.4287109375, "learning_rate": 8.577598994260687e-06, "loss": 23.685, "step": 3060 }, { "epoch": 0.28408352668213455, "grad_norm": 38.12900924682617, "learning_rate": 8.576526779605387e-06, "loss": 23.8227, "step": 3061 }, { "epoch": 0.28417633410672855, "grad_norm": 35.630638122558594, "learning_rate": 8.575454228050352e-06, "loss": 22.6456, "step": 3062 }, { "epoch": 0.2842691415313225, "grad_norm": 40.73434829711914, "learning_rate": 8.574381339696614e-06, "loss": 24.4105, "step": 3063 }, { "epoch": 0.2843619489559165, "grad_norm": 38.5074462890625, "learning_rate": 8.573308114645239e-06, "loss": 24.5787, "step": 3064 }, { "epoch": 0.28445475638051043, "grad_norm": 38.48356246948242, "learning_rate": 8.572234552997317e-06, "loss": 23.667, "step": 3065 }, { "epoch": 0.28454756380510443, "grad_norm": 36.506778717041016, "learning_rate": 8.571160654853976e-06, "loss": 24.1438, "step": 3066 }, { "epoch": 0.2846403712296984, "grad_norm": 36.86137390136719, "learning_rate": 8.570086420316378e-06, "loss": 25.6564, "step": 3067 }, { "epoch": 0.2847331786542923, "grad_norm": 45.82810592651367, "learning_rate": 8.56901184948571e-06, "loss": 24.1861, "step": 3068 }, { "epoch": 0.2848259860788863, "grad_norm": 38.88282775878906, "learning_rate": 8.567936942463196e-06, "loss": 25.1502, "step": 3069 }, { "epoch": 0.28491879350348026, "grad_norm": 36.55162048339844, "learning_rate": 8.566861699350086e-06, "loss": 24.2278, "step": 3070 }, { "epoch": 0.28501160092807426, "grad_norm": 34.813507080078125, "learning_rate": 8.56578612024767e-06, "loss": 23.8971, "step": 3071 }, { "epoch": 0.2851044083526682, "grad_norm": 40.010108947753906, "learning_rate": 8.56471020525726e-06, "loss": 22.7395, "step": 3072 }, { "epoch": 0.2851972157772622, "grad_norm": 37.72446823120117, "learning_rate": 8.563633954480209e-06, "loss": 23.7944, "step": 3073 }, { "epoch": 0.28529002320185615, "grad_norm": 39.42986297607422, "learning_rate": 8.562557368017895e-06, "loss": 23.7692, "step": 3074 }, { "epoch": 0.2853828306264501, "grad_norm": 37.909297943115234, "learning_rate": 8.561480445971727e-06, "loss": 25.7119, "step": 3075 }, { "epoch": 0.2854756380510441, "grad_norm": 34.69954299926758, "learning_rate": 8.560403188443152e-06, "loss": 23.3191, "step": 3076 }, { "epoch": 0.28556844547563803, "grad_norm": 41.85603713989258, "learning_rate": 8.559325595533644e-06, "loss": 22.9228, "step": 3077 }, { "epoch": 0.28566125290023203, "grad_norm": 37.03345489501953, "learning_rate": 8.55824766734471e-06, "loss": 23.8799, "step": 3078 }, { "epoch": 0.285754060324826, "grad_norm": 37.16073226928711, "learning_rate": 8.557169403977887e-06, "loss": 25.406, "step": 3079 }, { "epoch": 0.28584686774942, "grad_norm": 37.80863952636719, "learning_rate": 8.556090805534745e-06, "loss": 23.6714, "step": 3080 }, { "epoch": 0.2859396751740139, "grad_norm": 40.28486633300781, "learning_rate": 8.555011872116885e-06, "loss": 23.8761, "step": 3081 }, { "epoch": 0.28603248259860786, "grad_norm": 37.992401123046875, "learning_rate": 8.55393260382594e-06, "loss": 24.6195, "step": 3082 }, { "epoch": 0.28612529002320186, "grad_norm": 39.273921966552734, "learning_rate": 8.552853000763575e-06, "loss": 26.3339, "step": 3083 }, { "epoch": 0.2862180974477958, "grad_norm": 36.87516784667969, "learning_rate": 8.551773063031484e-06, "loss": 22.6883, "step": 3084 }, { "epoch": 0.2863109048723898, "grad_norm": 38.83528518676758, "learning_rate": 8.550692790731396e-06, "loss": 22.6398, "step": 3085 }, { "epoch": 0.28640371229698375, "grad_norm": 52.17118835449219, "learning_rate": 8.54961218396507e-06, "loss": 22.9216, "step": 3086 }, { "epoch": 0.28649651972157775, "grad_norm": 36.862754821777344, "learning_rate": 8.548531242834298e-06, "loss": 23.4869, "step": 3087 }, { "epoch": 0.2865893271461717, "grad_norm": 39.67449188232422, "learning_rate": 8.547449967440896e-06, "loss": 23.317, "step": 3088 }, { "epoch": 0.28668213457076563, "grad_norm": 56.972434997558594, "learning_rate": 8.546368357886724e-06, "loss": 23.7233, "step": 3089 }, { "epoch": 0.28677494199535963, "grad_norm": 36.50337600708008, "learning_rate": 8.545286414273663e-06, "loss": 24.9598, "step": 3090 }, { "epoch": 0.2868677494199536, "grad_norm": 40.316715240478516, "learning_rate": 8.544204136703631e-06, "loss": 24.2202, "step": 3091 }, { "epoch": 0.2869605568445476, "grad_norm": 41.60371017456055, "learning_rate": 8.543121525278575e-06, "loss": 22.8534, "step": 3092 }, { "epoch": 0.2870533642691415, "grad_norm": 39.38489532470703, "learning_rate": 8.542038580100476e-06, "loss": 22.5722, "step": 3093 }, { "epoch": 0.2871461716937355, "grad_norm": 35.88752365112305, "learning_rate": 8.54095530127134e-06, "loss": 22.5894, "step": 3094 }, { "epoch": 0.28723897911832946, "grad_norm": 42.029518127441406, "learning_rate": 8.539871688893216e-06, "loss": 27.7514, "step": 3095 }, { "epoch": 0.2873317865429234, "grad_norm": 37.27233123779297, "learning_rate": 8.538787743068172e-06, "loss": 24.1729, "step": 3096 }, { "epoch": 0.2874245939675174, "grad_norm": 35.77939224243164, "learning_rate": 8.537703463898315e-06, "loss": 22.9061, "step": 3097 }, { "epoch": 0.28751740139211135, "grad_norm": 36.42778015136719, "learning_rate": 8.53661885148578e-06, "loss": 24.2236, "step": 3098 }, { "epoch": 0.28761020881670535, "grad_norm": 38.60282516479492, "learning_rate": 8.535533905932739e-06, "loss": 22.7123, "step": 3099 }, { "epoch": 0.2877030162412993, "grad_norm": 80.15902709960938, "learning_rate": 8.534448627341385e-06, "loss": 25.4384, "step": 3100 }, { "epoch": 0.2877958236658933, "grad_norm": 37.082584381103516, "learning_rate": 8.533363015813953e-06, "loss": 22.8201, "step": 3101 }, { "epoch": 0.28788863109048723, "grad_norm": 34.30775833129883, "learning_rate": 8.532277071452704e-06, "loss": 24.1751, "step": 3102 }, { "epoch": 0.28798143851508123, "grad_norm": 39.226253509521484, "learning_rate": 8.531190794359929e-06, "loss": 23.8656, "step": 3103 }, { "epoch": 0.2880742459396752, "grad_norm": 39.323326110839844, "learning_rate": 8.530104184637954e-06, "loss": 25.4028, "step": 3104 }, { "epoch": 0.2881670533642691, "grad_norm": 36.42726135253906, "learning_rate": 8.529017242389136e-06, "loss": 24.8796, "step": 3105 }, { "epoch": 0.2882598607888631, "grad_norm": 40.33916091918945, "learning_rate": 8.52792996771586e-06, "loss": 24.3152, "step": 3106 }, { "epoch": 0.28835266821345706, "grad_norm": 39.4041748046875, "learning_rate": 8.526842360720547e-06, "loss": 23.3498, "step": 3107 }, { "epoch": 0.28844547563805106, "grad_norm": 38.09064865112305, "learning_rate": 8.525754421505646e-06, "loss": 24.3502, "step": 3108 }, { "epoch": 0.288538283062645, "grad_norm": 35.98192596435547, "learning_rate": 8.524666150173634e-06, "loss": 23.3203, "step": 3109 }, { "epoch": 0.288631090487239, "grad_norm": 39.67578887939453, "learning_rate": 8.523577546827032e-06, "loss": 22.066, "step": 3110 }, { "epoch": 0.28872389791183295, "grad_norm": 43.52291488647461, "learning_rate": 8.522488611568375e-06, "loss": 23.1463, "step": 3111 }, { "epoch": 0.2888167053364269, "grad_norm": 37.702823638916016, "learning_rate": 8.521399344500241e-06, "loss": 24.2109, "step": 3112 }, { "epoch": 0.2889095127610209, "grad_norm": 36.2263069152832, "learning_rate": 8.520309745725239e-06, "loss": 25.0648, "step": 3113 }, { "epoch": 0.28900232018561484, "grad_norm": 36.545589447021484, "learning_rate": 8.519219815346004e-06, "loss": 23.5127, "step": 3114 }, { "epoch": 0.28909512761020884, "grad_norm": 38.43486404418945, "learning_rate": 8.518129553465204e-06, "loss": 23.797, "step": 3115 }, { "epoch": 0.2891879350348028, "grad_norm": 35.503047943115234, "learning_rate": 8.517038960185542e-06, "loss": 22.8141, "step": 3116 }, { "epoch": 0.2892807424593968, "grad_norm": 36.65224838256836, "learning_rate": 8.515948035609745e-06, "loss": 22.2901, "step": 3117 }, { "epoch": 0.2893735498839907, "grad_norm": 39.89836120605469, "learning_rate": 8.514856779840576e-06, "loss": 24.6231, "step": 3118 }, { "epoch": 0.28946635730858467, "grad_norm": 37.01218032836914, "learning_rate": 8.513765192980832e-06, "loss": 24.9492, "step": 3119 }, { "epoch": 0.28955916473317866, "grad_norm": 36.80245590209961, "learning_rate": 8.512673275133334e-06, "loss": 23.3614, "step": 3120 }, { "epoch": 0.2896519721577726, "grad_norm": 37.087215423583984, "learning_rate": 8.511581026400941e-06, "loss": 22.5169, "step": 3121 }, { "epoch": 0.2897447795823666, "grad_norm": 37.9779052734375, "learning_rate": 8.510488446886536e-06, "loss": 24.019, "step": 3122 }, { "epoch": 0.28983758700696055, "grad_norm": 42.07468795776367, "learning_rate": 8.509395536693042e-06, "loss": 24.5899, "step": 3123 }, { "epoch": 0.28993039443155455, "grad_norm": 34.77378845214844, "learning_rate": 8.508302295923405e-06, "loss": 23.8678, "step": 3124 }, { "epoch": 0.2900232018561485, "grad_norm": 41.09677505493164, "learning_rate": 8.507208724680607e-06, "loss": 22.3853, "step": 3125 }, { "epoch": 0.29011600928074244, "grad_norm": 45.10685348510742, "learning_rate": 8.506114823067657e-06, "loss": 24.3582, "step": 3126 }, { "epoch": 0.29020881670533644, "grad_norm": 36.61090087890625, "learning_rate": 8.505020591187601e-06, "loss": 24.019, "step": 3127 }, { "epoch": 0.2903016241299304, "grad_norm": 40.370216369628906, "learning_rate": 8.503926029143511e-06, "loss": 25.7953, "step": 3128 }, { "epoch": 0.2903944315545244, "grad_norm": 36.7525520324707, "learning_rate": 8.502831137038497e-06, "loss": 23.6639, "step": 3129 }, { "epoch": 0.2904872389791183, "grad_norm": 35.37474060058594, "learning_rate": 8.501735914975687e-06, "loss": 23.6065, "step": 3130 }, { "epoch": 0.2905800464037123, "grad_norm": 34.15646743774414, "learning_rate": 8.500640363058253e-06, "loss": 22.3302, "step": 3131 }, { "epoch": 0.29067285382830627, "grad_norm": 36.18010711669922, "learning_rate": 8.49954448138939e-06, "loss": 25.201, "step": 3132 }, { "epoch": 0.2907656612529002, "grad_norm": 798.514404296875, "learning_rate": 8.49844827007233e-06, "loss": 22.8771, "step": 3133 }, { "epoch": 0.2908584686774942, "grad_norm": 46.69921875, "learning_rate": 8.497351729210334e-06, "loss": 24.5107, "step": 3134 }, { "epoch": 0.29095127610208815, "grad_norm": 37.21987533569336, "learning_rate": 8.496254858906692e-06, "loss": 23.5279, "step": 3135 }, { "epoch": 0.29104408352668215, "grad_norm": 37.6861572265625, "learning_rate": 8.495157659264725e-06, "loss": 25.7835, "step": 3136 }, { "epoch": 0.2911368909512761, "grad_norm": 39.63367462158203, "learning_rate": 8.49406013038779e-06, "loss": 24.9631, "step": 3137 }, { "epoch": 0.2912296983758701, "grad_norm": 35.79695129394531, "learning_rate": 8.492962272379268e-06, "loss": 23.3743, "step": 3138 }, { "epoch": 0.29132250580046404, "grad_norm": 41.63576889038086, "learning_rate": 8.491864085342573e-06, "loss": 26.3854, "step": 3139 }, { "epoch": 0.291415313225058, "grad_norm": 39.182003021240234, "learning_rate": 8.490765569381159e-06, "loss": 24.8239, "step": 3140 }, { "epoch": 0.291508120649652, "grad_norm": 39.4730110168457, "learning_rate": 8.489666724598497e-06, "loss": 24.7908, "step": 3141 }, { "epoch": 0.2916009280742459, "grad_norm": 39.61781311035156, "learning_rate": 8.488567551098094e-06, "loss": 23.3039, "step": 3142 }, { "epoch": 0.2916937354988399, "grad_norm": 42.9014778137207, "learning_rate": 8.487468048983496e-06, "loss": 23.7023, "step": 3143 }, { "epoch": 0.29178654292343387, "grad_norm": 37.59280776977539, "learning_rate": 8.486368218358268e-06, "loss": 23.4703, "step": 3144 }, { "epoch": 0.29187935034802787, "grad_norm": 87.37649536132812, "learning_rate": 8.485268059326014e-06, "loss": 26.6706, "step": 3145 }, { "epoch": 0.2919721577726218, "grad_norm": 39.42948532104492, "learning_rate": 8.484167571990364e-06, "loss": 24.0596, "step": 3146 }, { "epoch": 0.29206496519721575, "grad_norm": 37.118309020996094, "learning_rate": 8.483066756454983e-06, "loss": 23.7618, "step": 3147 }, { "epoch": 0.29215777262180975, "grad_norm": 37.349239349365234, "learning_rate": 8.481965612823562e-06, "loss": 23.9177, "step": 3148 }, { "epoch": 0.2922505800464037, "grad_norm": 45.18541717529297, "learning_rate": 8.48086414119983e-06, "loss": 24.602, "step": 3149 }, { "epoch": 0.2923433874709977, "grad_norm": 36.44611740112305, "learning_rate": 8.47976234168754e-06, "loss": 23.3099, "step": 3150 }, { "epoch": 0.29243619489559164, "grad_norm": 35.52149200439453, "learning_rate": 8.478660214390481e-06, "loss": 21.8345, "step": 3151 }, { "epoch": 0.29252900232018564, "grad_norm": 38.07554244995117, "learning_rate": 8.477557759412467e-06, "loss": 23.6453, "step": 3152 }, { "epoch": 0.2926218097447796, "grad_norm": 33.69859313964844, "learning_rate": 8.476454976857352e-06, "loss": 22.7489, "step": 3153 }, { "epoch": 0.2927146171693735, "grad_norm": 35.05374526977539, "learning_rate": 8.47535186682901e-06, "loss": 24.0092, "step": 3154 }, { "epoch": 0.2928074245939675, "grad_norm": 38.426918029785156, "learning_rate": 8.474248429431353e-06, "loss": 23.0264, "step": 3155 }, { "epoch": 0.29290023201856147, "grad_norm": 34.59160614013672, "learning_rate": 8.473144664768322e-06, "loss": 23.7183, "step": 3156 }, { "epoch": 0.29299303944315547, "grad_norm": 39.546390533447266, "learning_rate": 8.47204057294389e-06, "loss": 24.7675, "step": 3157 }, { "epoch": 0.2930858468677494, "grad_norm": 36.348453521728516, "learning_rate": 8.470936154062056e-06, "loss": 23.8155, "step": 3158 }, { "epoch": 0.2931786542923434, "grad_norm": 37.22983169555664, "learning_rate": 8.469831408226858e-06, "loss": 23.1082, "step": 3159 }, { "epoch": 0.29327146171693735, "grad_norm": 36.705997467041016, "learning_rate": 8.468726335542358e-06, "loss": 22.2728, "step": 3160 }, { "epoch": 0.2933642691415313, "grad_norm": 41.2430305480957, "learning_rate": 8.467620936112653e-06, "loss": 23.9293, "step": 3161 }, { "epoch": 0.2934570765661253, "grad_norm": 42.18256759643555, "learning_rate": 8.466515210041866e-06, "loss": 26.1302, "step": 3162 }, { "epoch": 0.29354988399071924, "grad_norm": 40.023597717285156, "learning_rate": 8.465409157434155e-06, "loss": 23.877, "step": 3163 }, { "epoch": 0.29364269141531324, "grad_norm": 36.38922882080078, "learning_rate": 8.464302778393707e-06, "loss": 24.0814, "step": 3164 }, { "epoch": 0.2937354988399072, "grad_norm": 36.562870025634766, "learning_rate": 8.46319607302474e-06, "loss": 23.297, "step": 3165 }, { "epoch": 0.2938283062645012, "grad_norm": 35.07143783569336, "learning_rate": 8.462089041431503e-06, "loss": 23.8694, "step": 3166 }, { "epoch": 0.2939211136890951, "grad_norm": 36.43994903564453, "learning_rate": 8.460981683718275e-06, "loss": 25.6136, "step": 3167 }, { "epoch": 0.29401392111368907, "grad_norm": 35.3132438659668, "learning_rate": 8.459873999989367e-06, "loss": 23.6314, "step": 3168 }, { "epoch": 0.29410672853828307, "grad_norm": 34.97714614868164, "learning_rate": 8.458765990349121e-06, "loss": 22.7865, "step": 3169 }, { "epoch": 0.294199535962877, "grad_norm": 41.83669662475586, "learning_rate": 8.457657654901907e-06, "loss": 25.7796, "step": 3170 }, { "epoch": 0.294292343387471, "grad_norm": 38.53925323486328, "learning_rate": 8.456548993752128e-06, "loss": 23.609, "step": 3171 }, { "epoch": 0.29438515081206496, "grad_norm": 39.20094299316406, "learning_rate": 8.455440007004219e-06, "loss": 24.7344, "step": 3172 }, { "epoch": 0.29447795823665895, "grad_norm": 41.18183135986328, "learning_rate": 8.454330694762639e-06, "loss": 25.6988, "step": 3173 }, { "epoch": 0.2945707656612529, "grad_norm": 37.23411178588867, "learning_rate": 8.453221057131886e-06, "loss": 22.3979, "step": 3174 }, { "epoch": 0.29466357308584684, "grad_norm": 38.542816162109375, "learning_rate": 8.452111094216484e-06, "loss": 23.9331, "step": 3175 }, { "epoch": 0.29475638051044084, "grad_norm": 44.519920349121094, "learning_rate": 8.451000806120989e-06, "loss": 24.6036, "step": 3176 }, { "epoch": 0.2948491879350348, "grad_norm": 36.361331939697266, "learning_rate": 8.449890192949988e-06, "loss": 24.3694, "step": 3177 }, { "epoch": 0.2949419953596288, "grad_norm": 37.529972076416016, "learning_rate": 8.448779254808096e-06, "loss": 22.3222, "step": 3178 }, { "epoch": 0.2950348027842227, "grad_norm": 37.20332717895508, "learning_rate": 8.447667991799962e-06, "loss": 23.6789, "step": 3179 }, { "epoch": 0.2951276102088167, "grad_norm": 37.57679748535156, "learning_rate": 8.446556404030263e-06, "loss": 23.8426, "step": 3180 }, { "epoch": 0.29522041763341067, "grad_norm": 40.092315673828125, "learning_rate": 8.44544449160371e-06, "loss": 24.5498, "step": 3181 }, { "epoch": 0.2953132250580046, "grad_norm": 35.29002380371094, "learning_rate": 8.44433225462504e-06, "loss": 22.4332, "step": 3182 }, { "epoch": 0.2954060324825986, "grad_norm": 39.74557113647461, "learning_rate": 8.443219693199026e-06, "loss": 24.5569, "step": 3183 }, { "epoch": 0.29549883990719256, "grad_norm": 35.754032135009766, "learning_rate": 8.442106807430464e-06, "loss": 24.0423, "step": 3184 }, { "epoch": 0.29559164733178656, "grad_norm": 36.43916320800781, "learning_rate": 8.440993597424188e-06, "loss": 22.7284, "step": 3185 }, { "epoch": 0.2956844547563805, "grad_norm": 38.75927734375, "learning_rate": 8.43988006328506e-06, "loss": 22.8455, "step": 3186 }, { "epoch": 0.2957772621809745, "grad_norm": 36.83292007446289, "learning_rate": 8.438766205117968e-06, "loss": 23.7365, "step": 3187 }, { "epoch": 0.29587006960556844, "grad_norm": 51.121883392333984, "learning_rate": 8.43765202302784e-06, "loss": 24.4893, "step": 3188 }, { "epoch": 0.2959628770301624, "grad_norm": 41.28342056274414, "learning_rate": 8.436537517119626e-06, "loss": 24.0298, "step": 3189 }, { "epoch": 0.2960556844547564, "grad_norm": 41.414642333984375, "learning_rate": 8.435422687498309e-06, "loss": 23.4949, "step": 3190 }, { "epoch": 0.29614849187935033, "grad_norm": 41.9338493347168, "learning_rate": 8.434307534268907e-06, "loss": 23.0732, "step": 3191 }, { "epoch": 0.2962412993039443, "grad_norm": 36.74827194213867, "learning_rate": 8.433192057536458e-06, "loss": 22.9617, "step": 3192 }, { "epoch": 0.29633410672853827, "grad_norm": 33.073848724365234, "learning_rate": 8.432076257406046e-06, "loss": 23.1744, "step": 3193 }, { "epoch": 0.29642691415313227, "grad_norm": 37.82597351074219, "learning_rate": 8.430960133982769e-06, "loss": 23.3511, "step": 3194 }, { "epoch": 0.2965197215777262, "grad_norm": 37.166690826416016, "learning_rate": 8.429843687371766e-06, "loss": 24.2098, "step": 3195 }, { "epoch": 0.2966125290023202, "grad_norm": 40.29180145263672, "learning_rate": 8.428726917678201e-06, "loss": 23.677, "step": 3196 }, { "epoch": 0.29670533642691416, "grad_norm": 36.59696578979492, "learning_rate": 8.427609825007275e-06, "loss": 23.2711, "step": 3197 }, { "epoch": 0.2967981438515081, "grad_norm": 34.59064865112305, "learning_rate": 8.426492409464213e-06, "loss": 23.1738, "step": 3198 }, { "epoch": 0.2968909512761021, "grad_norm": 40.25275421142578, "learning_rate": 8.425374671154273e-06, "loss": 24.6691, "step": 3199 }, { "epoch": 0.29698375870069604, "grad_norm": 40.467994689941406, "learning_rate": 8.424256610182743e-06, "loss": 25.7037, "step": 3200 }, { "epoch": 0.29707656612529004, "grad_norm": 38.695987701416016, "learning_rate": 8.42313822665494e-06, "loss": 23.6832, "step": 3201 }, { "epoch": 0.297169373549884, "grad_norm": 35.17919158935547, "learning_rate": 8.422019520676217e-06, "loss": 23.5365, "step": 3202 }, { "epoch": 0.297262180974478, "grad_norm": 36.69892883300781, "learning_rate": 8.420900492351949e-06, "loss": 22.7758, "step": 3203 }, { "epoch": 0.29735498839907193, "grad_norm": 35.01581954956055, "learning_rate": 8.419781141787549e-06, "loss": 22.275, "step": 3204 }, { "epoch": 0.2974477958236659, "grad_norm": 38.222633361816406, "learning_rate": 8.418661469088453e-06, "loss": 22.4377, "step": 3205 }, { "epoch": 0.29754060324825987, "grad_norm": 36.007869720458984, "learning_rate": 8.417541474360134e-06, "loss": 23.2331, "step": 3206 }, { "epoch": 0.2976334106728538, "grad_norm": 36.80514144897461, "learning_rate": 8.416421157708092e-06, "loss": 24.4386, "step": 3207 }, { "epoch": 0.2977262180974478, "grad_norm": 35.664756774902344, "learning_rate": 8.41530051923786e-06, "loss": 24.5936, "step": 3208 }, { "epoch": 0.29781902552204176, "grad_norm": 37.20265579223633, "learning_rate": 8.414179559054995e-06, "loss": 25.2959, "step": 3209 }, { "epoch": 0.29791183294663576, "grad_norm": 48.29998779296875, "learning_rate": 8.413058277265094e-06, "loss": 24.5236, "step": 3210 }, { "epoch": 0.2980046403712297, "grad_norm": 37.91130065917969, "learning_rate": 8.411936673973776e-06, "loss": 21.8297, "step": 3211 }, { "epoch": 0.29809744779582364, "grad_norm": 37.184303283691406, "learning_rate": 8.41081474928669e-06, "loss": 22.6208, "step": 3212 }, { "epoch": 0.29819025522041764, "grad_norm": 38.210594177246094, "learning_rate": 8.409692503309523e-06, "loss": 24.4096, "step": 3213 }, { "epoch": 0.2982830626450116, "grad_norm": 43.281455993652344, "learning_rate": 8.408569936147987e-06, "loss": 26.7987, "step": 3214 }, { "epoch": 0.2983758700696056, "grad_norm": 38.687339782714844, "learning_rate": 8.407447047907825e-06, "loss": 24.9334, "step": 3215 }, { "epoch": 0.29846867749419953, "grad_norm": 35.84236145019531, "learning_rate": 8.406323838694808e-06, "loss": 25.7968, "step": 3216 }, { "epoch": 0.29856148491879353, "grad_norm": 37.252357482910156, "learning_rate": 8.405200308614742e-06, "loss": 24.4307, "step": 3217 }, { "epoch": 0.2986542923433875, "grad_norm": 35.49608612060547, "learning_rate": 8.40407645777346e-06, "loss": 24.9018, "step": 3218 }, { "epoch": 0.2987470997679814, "grad_norm": 36.17204666137695, "learning_rate": 8.402952286276824e-06, "loss": 25.2697, "step": 3219 }, { "epoch": 0.2988399071925754, "grad_norm": 38.392887115478516, "learning_rate": 8.40182779423073e-06, "loss": 23.7578, "step": 3220 }, { "epoch": 0.29893271461716936, "grad_norm": 36.41714859008789, "learning_rate": 8.400702981741104e-06, "loss": 23.5494, "step": 3221 }, { "epoch": 0.29902552204176336, "grad_norm": 38.312232971191406, "learning_rate": 8.399577848913896e-06, "loss": 22.3654, "step": 3222 }, { "epoch": 0.2991183294663573, "grad_norm": 35.05091094970703, "learning_rate": 8.398452395855095e-06, "loss": 25.9226, "step": 3223 }, { "epoch": 0.2992111368909513, "grad_norm": 41.49482727050781, "learning_rate": 8.397326622670712e-06, "loss": 24.2266, "step": 3224 }, { "epoch": 0.29930394431554525, "grad_norm": 36.44395065307617, "learning_rate": 8.396200529466796e-06, "loss": 23.8994, "step": 3225 }, { "epoch": 0.2993967517401392, "grad_norm": 34.595008850097656, "learning_rate": 8.395074116349417e-06, "loss": 22.1783, "step": 3226 }, { "epoch": 0.2994895591647332, "grad_norm": 32.35873031616211, "learning_rate": 8.393947383424686e-06, "loss": 24.0104, "step": 3227 }, { "epoch": 0.29958236658932713, "grad_norm": 33.98311996459961, "learning_rate": 8.392820330798734e-06, "loss": 23.6105, "step": 3228 }, { "epoch": 0.29967517401392113, "grad_norm": 40.94388961791992, "learning_rate": 8.391692958577731e-06, "loss": 22.3739, "step": 3229 }, { "epoch": 0.2997679814385151, "grad_norm": 35.4415283203125, "learning_rate": 8.390565266867867e-06, "loss": 23.4699, "step": 3230 }, { "epoch": 0.2998607888631091, "grad_norm": 37.68789291381836, "learning_rate": 8.38943725577537e-06, "loss": 23.7827, "step": 3231 }, { "epoch": 0.299953596287703, "grad_norm": 48.61948013305664, "learning_rate": 8.3883089254065e-06, "loss": 24.1143, "step": 3232 }, { "epoch": 0.30004640371229696, "grad_norm": 36.32120895385742, "learning_rate": 8.387180275867535e-06, "loss": 21.8648, "step": 3233 }, { "epoch": 0.30013921113689096, "grad_norm": 35.529151916503906, "learning_rate": 8.386051307264798e-06, "loss": 22.0557, "step": 3234 }, { "epoch": 0.3002320185614849, "grad_norm": 38.26654052734375, "learning_rate": 8.38492201970463e-06, "loss": 25.2259, "step": 3235 }, { "epoch": 0.3003248259860789, "grad_norm": 36.49940872192383, "learning_rate": 8.383792413293408e-06, "loss": 24.2043, "step": 3236 }, { "epoch": 0.30041763341067285, "grad_norm": 39.657325744628906, "learning_rate": 8.382662488137542e-06, "loss": 24.6731, "step": 3237 }, { "epoch": 0.30051044083526685, "grad_norm": 34.296844482421875, "learning_rate": 8.381532244343462e-06, "loss": 25.9432, "step": 3238 }, { "epoch": 0.3006032482598608, "grad_norm": 39.852115631103516, "learning_rate": 8.38040168201764e-06, "loss": 22.768, "step": 3239 }, { "epoch": 0.30069605568445473, "grad_norm": 40.13036346435547, "learning_rate": 8.379270801266569e-06, "loss": 25.3009, "step": 3240 }, { "epoch": 0.30078886310904873, "grad_norm": 40.485172271728516, "learning_rate": 8.378139602196773e-06, "loss": 23.9452, "step": 3241 }, { "epoch": 0.3008816705336427, "grad_norm": 36.43777847290039, "learning_rate": 8.377008084914812e-06, "loss": 23.0897, "step": 3242 }, { "epoch": 0.3009744779582367, "grad_norm": 37.971168518066406, "learning_rate": 8.37587624952727e-06, "loss": 23.1891, "step": 3243 }, { "epoch": 0.3010672853828306, "grad_norm": 42.1998405456543, "learning_rate": 8.374744096140764e-06, "loss": 21.9563, "step": 3244 }, { "epoch": 0.3011600928074246, "grad_norm": 36.225704193115234, "learning_rate": 8.37361162486194e-06, "loss": 24.7132, "step": 3245 }, { "epoch": 0.30125290023201856, "grad_norm": 38.45756149291992, "learning_rate": 8.372478835797473e-06, "loss": 22.8828, "step": 3246 }, { "epoch": 0.3013457076566125, "grad_norm": 38.768253326416016, "learning_rate": 8.371345729054069e-06, "loss": 23.5595, "step": 3247 }, { "epoch": 0.3014385150812065, "grad_norm": 37.38636016845703, "learning_rate": 8.370212304738464e-06, "loss": 23.6381, "step": 3248 }, { "epoch": 0.30153132250580045, "grad_norm": 34.04492950439453, "learning_rate": 8.369078562957425e-06, "loss": 22.3731, "step": 3249 }, { "epoch": 0.30162412993039445, "grad_norm": 36.81015396118164, "learning_rate": 8.367944503817744e-06, "loss": 24.3614, "step": 3250 }, { "epoch": 0.3017169373549884, "grad_norm": 52.66901397705078, "learning_rate": 8.36681012742625e-06, "loss": 23.8438, "step": 3251 }, { "epoch": 0.3018097447795824, "grad_norm": 38.65179443359375, "learning_rate": 8.3656754338898e-06, "loss": 23.1159, "step": 3252 }, { "epoch": 0.30190255220417633, "grad_norm": 34.52086639404297, "learning_rate": 8.364540423315273e-06, "loss": 24.0942, "step": 3253 }, { "epoch": 0.3019953596287703, "grad_norm": 37.1982421875, "learning_rate": 8.363405095809586e-06, "loss": 24.9922, "step": 3254 }, { "epoch": 0.3020881670533643, "grad_norm": 35.25363540649414, "learning_rate": 8.36226945147969e-06, "loss": 23.5723, "step": 3255 }, { "epoch": 0.3021809744779582, "grad_norm": 35.41304397583008, "learning_rate": 8.361133490432553e-06, "loss": 23.4864, "step": 3256 }, { "epoch": 0.3022737819025522, "grad_norm": 37.481937408447266, "learning_rate": 8.359997212775181e-06, "loss": 23.371, "step": 3257 }, { "epoch": 0.30236658932714616, "grad_norm": 37.163612365722656, "learning_rate": 8.358860618614612e-06, "loss": 24.6789, "step": 3258 }, { "epoch": 0.30245939675174016, "grad_norm": 37.843833923339844, "learning_rate": 8.357723708057908e-06, "loss": 25.2681, "step": 3259 }, { "epoch": 0.3025522041763341, "grad_norm": 36.192176818847656, "learning_rate": 8.35658648121216e-06, "loss": 21.8293, "step": 3260 }, { "epoch": 0.30264501160092805, "grad_norm": 35.02368927001953, "learning_rate": 8.355448938184496e-06, "loss": 24.7349, "step": 3261 }, { "epoch": 0.30273781902552205, "grad_norm": 39.84866714477539, "learning_rate": 8.35431107908207e-06, "loss": 23.5457, "step": 3262 }, { "epoch": 0.302830626450116, "grad_norm": 32.9042854309082, "learning_rate": 8.35317290401206e-06, "loss": 23.1712, "step": 3263 }, { "epoch": 0.30292343387471, "grad_norm": 38.122161865234375, "learning_rate": 8.352034413081687e-06, "loss": 25.2261, "step": 3264 }, { "epoch": 0.30301624129930393, "grad_norm": 40.008033752441406, "learning_rate": 8.350895606398188e-06, "loss": 23.1418, "step": 3265 }, { "epoch": 0.30310904872389793, "grad_norm": 47.50201416015625, "learning_rate": 8.349756484068837e-06, "loss": 23.5479, "step": 3266 }, { "epoch": 0.3032018561484919, "grad_norm": 36.03557586669922, "learning_rate": 8.348617046200938e-06, "loss": 24.2328, "step": 3267 }, { "epoch": 0.3032946635730858, "grad_norm": 41.34274673461914, "learning_rate": 8.347477292901823e-06, "loss": 26.6072, "step": 3268 }, { "epoch": 0.3033874709976798, "grad_norm": 40.193031311035156, "learning_rate": 8.34633722427885e-06, "loss": 24.4936, "step": 3269 }, { "epoch": 0.30348027842227376, "grad_norm": 36.15237808227539, "learning_rate": 8.345196840439418e-06, "loss": 24.4282, "step": 3270 }, { "epoch": 0.30357308584686776, "grad_norm": 39.436241149902344, "learning_rate": 8.34405614149094e-06, "loss": 25.0475, "step": 3271 }, { "epoch": 0.3036658932714617, "grad_norm": 38.126220703125, "learning_rate": 8.342915127540873e-06, "loss": 25.0544, "step": 3272 }, { "epoch": 0.3037587006960557, "grad_norm": 40.739078521728516, "learning_rate": 8.341773798696695e-06, "loss": 23.2807, "step": 3273 }, { "epoch": 0.30385150812064965, "grad_norm": 37.682369232177734, "learning_rate": 8.340632155065917e-06, "loss": 21.5283, "step": 3274 }, { "epoch": 0.3039443155452436, "grad_norm": 38.87356948852539, "learning_rate": 8.339490196756079e-06, "loss": 25.9192, "step": 3275 }, { "epoch": 0.3040371229698376, "grad_norm": 39.495811462402344, "learning_rate": 8.33834792387475e-06, "loss": 24.2537, "step": 3276 }, { "epoch": 0.30412993039443154, "grad_norm": 56.413612365722656, "learning_rate": 8.33720533652953e-06, "loss": 23.4765, "step": 3277 }, { "epoch": 0.30422273781902553, "grad_norm": 33.98377990722656, "learning_rate": 8.336062434828045e-06, "loss": 22.9633, "step": 3278 }, { "epoch": 0.3043155452436195, "grad_norm": 36.794158935546875, "learning_rate": 8.334919218877958e-06, "loss": 25.0683, "step": 3279 }, { "epoch": 0.3044083526682135, "grad_norm": 38.18470764160156, "learning_rate": 8.333775688786953e-06, "loss": 24.5693, "step": 3280 }, { "epoch": 0.3045011600928074, "grad_norm": 38.198184967041016, "learning_rate": 8.332631844662751e-06, "loss": 22.8682, "step": 3281 }, { "epoch": 0.30459396751740136, "grad_norm": 37.874637603759766, "learning_rate": 8.331487686613097e-06, "loss": 24.4065, "step": 3282 }, { "epoch": 0.30468677494199536, "grad_norm": 32.257625579833984, "learning_rate": 8.33034321474577e-06, "loss": 23.0844, "step": 3283 }, { "epoch": 0.3047795823665893, "grad_norm": 37.4388313293457, "learning_rate": 8.329198429168573e-06, "loss": 24.7291, "step": 3284 }, { "epoch": 0.3048723897911833, "grad_norm": 35.80132293701172, "learning_rate": 8.328053329989342e-06, "loss": 22.0957, "step": 3285 }, { "epoch": 0.30496519721577725, "grad_norm": 37.103397369384766, "learning_rate": 8.326907917315945e-06, "loss": 25.0685, "step": 3286 }, { "epoch": 0.30505800464037125, "grad_norm": 42.61391830444336, "learning_rate": 8.325762191256279e-06, "loss": 26.1301, "step": 3287 }, { "epoch": 0.3051508120649652, "grad_norm": 37.195228576660156, "learning_rate": 8.324616151918263e-06, "loss": 23.3941, "step": 3288 }, { "epoch": 0.30524361948955914, "grad_norm": 43.90248489379883, "learning_rate": 8.323469799409854e-06, "loss": 23.852, "step": 3289 }, { "epoch": 0.30533642691415314, "grad_norm": 40.2923698425293, "learning_rate": 8.322323133839033e-06, "loss": 23.5097, "step": 3290 }, { "epoch": 0.3054292343387471, "grad_norm": 36.62678146362305, "learning_rate": 8.321176155313816e-06, "loss": 22.5964, "step": 3291 }, { "epoch": 0.3055220417633411, "grad_norm": 35.18718719482422, "learning_rate": 8.320028863942247e-06, "loss": 24.0449, "step": 3292 }, { "epoch": 0.305614849187935, "grad_norm": 38.16632080078125, "learning_rate": 8.318881259832392e-06, "loss": 24.4571, "step": 3293 }, { "epoch": 0.305707656612529, "grad_norm": 35.83650207519531, "learning_rate": 8.317733343092357e-06, "loss": 23.196, "step": 3294 }, { "epoch": 0.30580046403712297, "grad_norm": 38.98539733886719, "learning_rate": 8.316585113830273e-06, "loss": 25.1962, "step": 3295 }, { "epoch": 0.30589327146171696, "grad_norm": 38.671180725097656, "learning_rate": 8.315436572154297e-06, "loss": 24.7877, "step": 3296 }, { "epoch": 0.3059860788863109, "grad_norm": 42.205692291259766, "learning_rate": 8.314287718172623e-06, "loss": 25.8743, "step": 3297 }, { "epoch": 0.30607888631090485, "grad_norm": 39.862552642822266, "learning_rate": 8.313138551993464e-06, "loss": 23.8362, "step": 3298 }, { "epoch": 0.30617169373549885, "grad_norm": 37.210391998291016, "learning_rate": 8.311989073725074e-06, "loss": 24.3374, "step": 3299 }, { "epoch": 0.3062645011600928, "grad_norm": 34.5430793762207, "learning_rate": 8.31083928347573e-06, "loss": 21.7359, "step": 3300 }, { "epoch": 0.3063573085846868, "grad_norm": 37.56930923461914, "learning_rate": 8.309689181353738e-06, "loss": 25.383, "step": 3301 }, { "epoch": 0.30645011600928074, "grad_norm": 40.60382080078125, "learning_rate": 8.308538767467435e-06, "loss": 26.2236, "step": 3302 }, { "epoch": 0.30654292343387474, "grad_norm": 38.644107818603516, "learning_rate": 8.307388041925187e-06, "loss": 22.7258, "step": 3303 }, { "epoch": 0.3066357308584687, "grad_norm": 36.56621551513672, "learning_rate": 8.30623700483539e-06, "loss": 23.0276, "step": 3304 }, { "epoch": 0.3067285382830626, "grad_norm": 36.51255416870117, "learning_rate": 8.30508565630647e-06, "loss": 24.3176, "step": 3305 }, { "epoch": 0.3068213457076566, "grad_norm": 39.74951934814453, "learning_rate": 8.303933996446876e-06, "loss": 23.2822, "step": 3306 }, { "epoch": 0.30691415313225057, "grad_norm": 39.14631271362305, "learning_rate": 8.302782025365096e-06, "loss": 21.2858, "step": 3307 }, { "epoch": 0.30700696055684457, "grad_norm": 36.802459716796875, "learning_rate": 8.301629743169643e-06, "loss": 24.1512, "step": 3308 }, { "epoch": 0.3070997679814385, "grad_norm": 40.537906646728516, "learning_rate": 8.300477149969057e-06, "loss": 24.205, "step": 3309 }, { "epoch": 0.3071925754060325, "grad_norm": 40.8619384765625, "learning_rate": 8.299324245871912e-06, "loss": 22.63, "step": 3310 }, { "epoch": 0.30728538283062645, "grad_norm": 35.563106536865234, "learning_rate": 8.298171030986803e-06, "loss": 23.4079, "step": 3311 }, { "epoch": 0.3073781902552204, "grad_norm": 36.273681640625, "learning_rate": 8.297017505422366e-06, "loss": 23.3081, "step": 3312 }, { "epoch": 0.3074709976798144, "grad_norm": 37.40773010253906, "learning_rate": 8.29586366928726e-06, "loss": 24.7309, "step": 3313 }, { "epoch": 0.30756380510440834, "grad_norm": 33.245948791503906, "learning_rate": 8.294709522690168e-06, "loss": 23.8468, "step": 3314 }, { "epoch": 0.30765661252900234, "grad_norm": 34.33498764038086, "learning_rate": 8.293555065739811e-06, "loss": 24.5688, "step": 3315 }, { "epoch": 0.3077494199535963, "grad_norm": 36.121551513671875, "learning_rate": 8.292400298544938e-06, "loss": 23.7894, "step": 3316 }, { "epoch": 0.3078422273781903, "grad_norm": 35.964080810546875, "learning_rate": 8.29124522121432e-06, "loss": 24.8122, "step": 3317 }, { "epoch": 0.3079350348027842, "grad_norm": 37.89107131958008, "learning_rate": 8.290089833856769e-06, "loss": 22.713, "step": 3318 }, { "epoch": 0.30802784222737817, "grad_norm": 43.85458755493164, "learning_rate": 8.288934136581114e-06, "loss": 23.7267, "step": 3319 }, { "epoch": 0.30812064965197217, "grad_norm": 35.170013427734375, "learning_rate": 8.28777812949622e-06, "loss": 22.8958, "step": 3320 }, { "epoch": 0.3082134570765661, "grad_norm": 33.87965393066406, "learning_rate": 8.286621812710983e-06, "loss": 22.0265, "step": 3321 }, { "epoch": 0.3083062645011601, "grad_norm": 37.930763244628906, "learning_rate": 8.28546518633432e-06, "loss": 23.9273, "step": 3322 }, { "epoch": 0.30839907192575405, "grad_norm": 40.38947677612305, "learning_rate": 8.284308250475186e-06, "loss": 23.6878, "step": 3323 }, { "epoch": 0.30849187935034805, "grad_norm": 39.17112350463867, "learning_rate": 8.28315100524256e-06, "loss": 25.9321, "step": 3324 }, { "epoch": 0.308584686774942, "grad_norm": 69.87838745117188, "learning_rate": 8.281993450745453e-06, "loss": 22.8287, "step": 3325 }, { "epoch": 0.30867749419953594, "grad_norm": 40.02374267578125, "learning_rate": 8.280835587092903e-06, "loss": 23.702, "step": 3326 }, { "epoch": 0.30877030162412994, "grad_norm": 38.566532135009766, "learning_rate": 8.279677414393975e-06, "loss": 24.5894, "step": 3327 }, { "epoch": 0.3088631090487239, "grad_norm": 37.484066009521484, "learning_rate": 8.27851893275777e-06, "loss": 25.2122, "step": 3328 }, { "epoch": 0.3089559164733179, "grad_norm": 34.29559326171875, "learning_rate": 8.277360142293412e-06, "loss": 23.6933, "step": 3329 }, { "epoch": 0.3090487238979118, "grad_norm": 88.5123291015625, "learning_rate": 8.276201043110057e-06, "loss": 22.1779, "step": 3330 }, { "epoch": 0.3091415313225058, "grad_norm": 36.744773864746094, "learning_rate": 8.275041635316888e-06, "loss": 21.3777, "step": 3331 }, { "epoch": 0.30923433874709977, "grad_norm": 36.46384048461914, "learning_rate": 8.27388191902312e-06, "loss": 22.1983, "step": 3332 }, { "epoch": 0.3093271461716937, "grad_norm": 38.17656326293945, "learning_rate": 8.272721894337995e-06, "loss": 24.0289, "step": 3333 }, { "epoch": 0.3094199535962877, "grad_norm": 35.190162658691406, "learning_rate": 8.27156156137078e-06, "loss": 23.2016, "step": 3334 }, { "epoch": 0.30951276102088165, "grad_norm": 38.0809440612793, "learning_rate": 8.270400920230783e-06, "loss": 23.1052, "step": 3335 }, { "epoch": 0.30960556844547565, "grad_norm": 52.41547393798828, "learning_rate": 8.269239971027328e-06, "loss": 24.2527, "step": 3336 }, { "epoch": 0.3096983758700696, "grad_norm": 38.731292724609375, "learning_rate": 8.268078713869774e-06, "loss": 24.4715, "step": 3337 }, { "epoch": 0.3097911832946636, "grad_norm": 37.673702239990234, "learning_rate": 8.266917148867511e-06, "loss": 23.4382, "step": 3338 }, { "epoch": 0.30988399071925754, "grad_norm": 38.00193405151367, "learning_rate": 8.265755276129952e-06, "loss": 24.5106, "step": 3339 }, { "epoch": 0.3099767981438515, "grad_norm": 40.266273498535156, "learning_rate": 8.264593095766546e-06, "loss": 23.8637, "step": 3340 }, { "epoch": 0.3100696055684455, "grad_norm": 40.84332275390625, "learning_rate": 8.263430607886767e-06, "loss": 22.8555, "step": 3341 }, { "epoch": 0.3101624129930394, "grad_norm": 38.64746856689453, "learning_rate": 8.262267812600116e-06, "loss": 26.606, "step": 3342 }, { "epoch": 0.3102552204176334, "grad_norm": 39.65304946899414, "learning_rate": 8.261104710016126e-06, "loss": 25.9393, "step": 3343 }, { "epoch": 0.31034802784222737, "grad_norm": 38.48514938354492, "learning_rate": 8.25994130024436e-06, "loss": 25.2256, "step": 3344 }, { "epoch": 0.31044083526682137, "grad_norm": 33.38129425048828, "learning_rate": 8.258777583394407e-06, "loss": 23.8659, "step": 3345 }, { "epoch": 0.3105336426914153, "grad_norm": 34.54057693481445, "learning_rate": 8.257613559575885e-06, "loss": 23.048, "step": 3346 }, { "epoch": 0.31062645011600926, "grad_norm": 38.70445251464844, "learning_rate": 8.256449228898445e-06, "loss": 25.7822, "step": 3347 }, { "epoch": 0.31071925754060326, "grad_norm": 35.09304428100586, "learning_rate": 8.255284591471762e-06, "loss": 24.3236, "step": 3348 }, { "epoch": 0.3108120649651972, "grad_norm": 45.11933135986328, "learning_rate": 8.254119647405542e-06, "loss": 23.447, "step": 3349 }, { "epoch": 0.3109048723897912, "grad_norm": 43.79585266113281, "learning_rate": 8.252954396809521e-06, "loss": 24.4766, "step": 3350 }, { "epoch": 0.31099767981438514, "grad_norm": 34.396209716796875, "learning_rate": 8.25178883979346e-06, "loss": 23.4803, "step": 3351 }, { "epoch": 0.31109048723897914, "grad_norm": 39.84959411621094, "learning_rate": 8.250622976467155e-06, "loss": 23.7299, "step": 3352 }, { "epoch": 0.3111832946635731, "grad_norm": 36.698814392089844, "learning_rate": 8.249456806940424e-06, "loss": 26.3646, "step": 3353 }, { "epoch": 0.31127610208816703, "grad_norm": 37.689064025878906, "learning_rate": 8.24829033132312e-06, "loss": 23.9363, "step": 3354 }, { "epoch": 0.311368909512761, "grad_norm": 41.265830993652344, "learning_rate": 8.24712354972512e-06, "loss": 24.7656, "step": 3355 }, { "epoch": 0.31146171693735497, "grad_norm": 36.89622497558594, "learning_rate": 8.245956462256332e-06, "loss": 24.5276, "step": 3356 }, { "epoch": 0.31155452436194897, "grad_norm": 39.02539825439453, "learning_rate": 8.244789069026695e-06, "loss": 23.5254, "step": 3357 }, { "epoch": 0.3116473317865429, "grad_norm": 35.311954498291016, "learning_rate": 8.243621370146174e-06, "loss": 23.5957, "step": 3358 }, { "epoch": 0.3117401392111369, "grad_norm": 37.8905143737793, "learning_rate": 8.24245336572476e-06, "loss": 22.9594, "step": 3359 }, { "epoch": 0.31183294663573086, "grad_norm": 40.831634521484375, "learning_rate": 8.241285055872478e-06, "loss": 25.1278, "step": 3360 }, { "epoch": 0.3119257540603248, "grad_norm": 36.39710998535156, "learning_rate": 8.24011644069938e-06, "loss": 24.6107, "step": 3361 }, { "epoch": 0.3120185614849188, "grad_norm": 35.02936935424805, "learning_rate": 8.238947520315548e-06, "loss": 23.0204, "step": 3362 }, { "epoch": 0.31211136890951274, "grad_norm": 37.174564361572266, "learning_rate": 8.237778294831089e-06, "loss": 26.3907, "step": 3363 }, { "epoch": 0.31220417633410674, "grad_norm": 36.3112678527832, "learning_rate": 8.236608764356142e-06, "loss": 23.5178, "step": 3364 }, { "epoch": 0.3122969837587007, "grad_norm": 34.84144592285156, "learning_rate": 8.235438929000872e-06, "loss": 22.5157, "step": 3365 }, { "epoch": 0.3123897911832947, "grad_norm": 35.52983093261719, "learning_rate": 8.23426878887548e-06, "loss": 23.1339, "step": 3366 }, { "epoch": 0.31248259860788863, "grad_norm": 35.821834564208984, "learning_rate": 8.233098344090184e-06, "loss": 23.2446, "step": 3367 }, { "epoch": 0.3125754060324826, "grad_norm": 34.90456771850586, "learning_rate": 8.23192759475524e-06, "loss": 22.5273, "step": 3368 }, { "epoch": 0.31266821345707657, "grad_norm": 38.71061325073242, "learning_rate": 8.230756540980928e-06, "loss": 26.1384, "step": 3369 }, { "epoch": 0.3127610208816705, "grad_norm": 36.95259094238281, "learning_rate": 8.229585182877561e-06, "loss": 25.2805, "step": 3370 }, { "epoch": 0.3128538283062645, "grad_norm": 38.79380416870117, "learning_rate": 8.228413520555476e-06, "loss": 24.4751, "step": 3371 }, { "epoch": 0.31294663573085846, "grad_norm": 40.81614685058594, "learning_rate": 8.227241554125041e-06, "loss": 23.2954, "step": 3372 }, { "epoch": 0.31303944315545246, "grad_norm": 38.5881462097168, "learning_rate": 8.226069283696651e-06, "loss": 23.9573, "step": 3373 }, { "epoch": 0.3131322505800464, "grad_norm": 44.55513381958008, "learning_rate": 8.224896709380732e-06, "loss": 23.6155, "step": 3374 }, { "epoch": 0.31322505800464034, "grad_norm": 37.99260711669922, "learning_rate": 8.22372383128774e-06, "loss": 24.0755, "step": 3375 }, { "epoch": 0.31331786542923434, "grad_norm": 46.01288986206055, "learning_rate": 8.222550649528152e-06, "loss": 24.6265, "step": 3376 }, { "epoch": 0.3134106728538283, "grad_norm": 36.82350540161133, "learning_rate": 8.221377164212483e-06, "loss": 23.538, "step": 3377 }, { "epoch": 0.3135034802784223, "grad_norm": 40.444915771484375, "learning_rate": 8.22020337545127e-06, "loss": 25.2767, "step": 3378 }, { "epoch": 0.31359628770301623, "grad_norm": 38.09272384643555, "learning_rate": 8.219029283355082e-06, "loss": 23.8993, "step": 3379 }, { "epoch": 0.31368909512761023, "grad_norm": 32.53301239013672, "learning_rate": 8.217854888034516e-06, "loss": 22.1214, "step": 3380 }, { "epoch": 0.3137819025522042, "grad_norm": 39.13981628417969, "learning_rate": 8.216680189600194e-06, "loss": 23.4484, "step": 3381 }, { "epoch": 0.3138747099767981, "grad_norm": 34.89923095703125, "learning_rate": 8.215505188162774e-06, "loss": 23.014, "step": 3382 }, { "epoch": 0.3139675174013921, "grad_norm": 40.762142181396484, "learning_rate": 8.214329883832935e-06, "loss": 22.994, "step": 3383 }, { "epoch": 0.31406032482598606, "grad_norm": 39.51612854003906, "learning_rate": 8.213154276721388e-06, "loss": 24.5482, "step": 3384 }, { "epoch": 0.31415313225058006, "grad_norm": 39.05146408081055, "learning_rate": 8.211978366938876e-06, "loss": 21.8716, "step": 3385 }, { "epoch": 0.314245939675174, "grad_norm": 38.90384292602539, "learning_rate": 8.21080215459616e-06, "loss": 24.0059, "step": 3386 }, { "epoch": 0.314338747099768, "grad_norm": 36.49184036254883, "learning_rate": 8.209625639804041e-06, "loss": 23.1079, "step": 3387 }, { "epoch": 0.31443155452436194, "grad_norm": 39.64741134643555, "learning_rate": 8.208448822673346e-06, "loss": 24.9259, "step": 3388 }, { "epoch": 0.31452436194895594, "grad_norm": 35.29840850830078, "learning_rate": 8.207271703314921e-06, "loss": 24.8521, "step": 3389 }, { "epoch": 0.3146171693735499, "grad_norm": 38.11696243286133, "learning_rate": 8.20609428183965e-06, "loss": 24.5152, "step": 3390 }, { "epoch": 0.31470997679814383, "grad_norm": 38.167537689208984, "learning_rate": 8.204916558358448e-06, "loss": 25.2219, "step": 3391 }, { "epoch": 0.31480278422273783, "grad_norm": 36.60590744018555, "learning_rate": 8.203738532982248e-06, "loss": 23.4663, "step": 3392 }, { "epoch": 0.3148955916473318, "grad_norm": 51.843833923339844, "learning_rate": 8.20256020582202e-06, "loss": 25.597, "step": 3393 }, { "epoch": 0.3149883990719258, "grad_norm": 35.92871856689453, "learning_rate": 8.201381576988758e-06, "loss": 25.4951, "step": 3394 }, { "epoch": 0.3150812064965197, "grad_norm": 37.18562316894531, "learning_rate": 8.200202646593487e-06, "loss": 22.7443, "step": 3395 }, { "epoch": 0.3151740139211137, "grad_norm": 47.98997497558594, "learning_rate": 8.199023414747257e-06, "loss": 25.4347, "step": 3396 }, { "epoch": 0.31526682134570766, "grad_norm": 36.04777145385742, "learning_rate": 8.19784388156115e-06, "loss": 22.9361, "step": 3397 }, { "epoch": 0.3153596287703016, "grad_norm": 34.59557342529297, "learning_rate": 8.196664047146276e-06, "loss": 23.9604, "step": 3398 }, { "epoch": 0.3154524361948956, "grad_norm": 40.07792282104492, "learning_rate": 8.195483911613773e-06, "loss": 22.331, "step": 3399 }, { "epoch": 0.31554524361948955, "grad_norm": 42.055660247802734, "learning_rate": 8.194303475074804e-06, "loss": 24.2745, "step": 3400 }, { "epoch": 0.31563805104408355, "grad_norm": 35.854488372802734, "learning_rate": 8.193122737640565e-06, "loss": 23.7316, "step": 3401 }, { "epoch": 0.3157308584686775, "grad_norm": 36.86893844604492, "learning_rate": 8.191941699422276e-06, "loss": 21.6575, "step": 3402 }, { "epoch": 0.3158236658932715, "grad_norm": 37.717140197753906, "learning_rate": 8.190760360531192e-06, "loss": 24.0998, "step": 3403 }, { "epoch": 0.31591647331786543, "grad_norm": 40.81013870239258, "learning_rate": 8.18957872107859e-06, "loss": 24.1363, "step": 3404 }, { "epoch": 0.3160092807424594, "grad_norm": 32.47590637207031, "learning_rate": 8.188396781175777e-06, "loss": 22.9887, "step": 3405 }, { "epoch": 0.3161020881670534, "grad_norm": 36.861629486083984, "learning_rate": 8.187214540934088e-06, "loss": 23.6793, "step": 3406 }, { "epoch": 0.3161948955916473, "grad_norm": 41.155391693115234, "learning_rate": 8.186032000464889e-06, "loss": 23.7718, "step": 3407 }, { "epoch": 0.3162877030162413, "grad_norm": 39.7652702331543, "learning_rate": 8.18484915987957e-06, "loss": 22.7009, "step": 3408 }, { "epoch": 0.31638051044083526, "grad_norm": 36.20189666748047, "learning_rate": 8.183666019289555e-06, "loss": 24.5832, "step": 3409 }, { "epoch": 0.31647331786542926, "grad_norm": 37.29269790649414, "learning_rate": 8.18248257880629e-06, "loss": 25.0933, "step": 3410 }, { "epoch": 0.3165661252900232, "grad_norm": 34.850074768066406, "learning_rate": 8.181298838541254e-06, "loss": 23.1623, "step": 3411 }, { "epoch": 0.31665893271461715, "grad_norm": 36.28433609008789, "learning_rate": 8.18011479860595e-06, "loss": 22.8719, "step": 3412 }, { "epoch": 0.31675174013921115, "grad_norm": 40.39396286010742, "learning_rate": 8.178930459111913e-06, "loss": 24.7202, "step": 3413 }, { "epoch": 0.3168445475638051, "grad_norm": 35.99266052246094, "learning_rate": 8.177745820170705e-06, "loss": 23.5533, "step": 3414 }, { "epoch": 0.3169373549883991, "grad_norm": 36.803672790527344, "learning_rate": 8.176560881893916e-06, "loss": 23.4287, "step": 3415 }, { "epoch": 0.31703016241299303, "grad_norm": 36.974884033203125, "learning_rate": 8.175375644393162e-06, "loss": 23.1087, "step": 3416 }, { "epoch": 0.31712296983758703, "grad_norm": 36.38843536376953, "learning_rate": 8.17419010778009e-06, "loss": 25.5628, "step": 3417 }, { "epoch": 0.317215777262181, "grad_norm": 36.876888275146484, "learning_rate": 8.173004272166379e-06, "loss": 24.9919, "step": 3418 }, { "epoch": 0.3173085846867749, "grad_norm": 35.46146011352539, "learning_rate": 8.171818137663726e-06, "loss": 23.3606, "step": 3419 }, { "epoch": 0.3174013921113689, "grad_norm": 39.8182258605957, "learning_rate": 8.170631704383865e-06, "loss": 25.3872, "step": 3420 }, { "epoch": 0.31749419953596286, "grad_norm": 36.82332992553711, "learning_rate": 8.169444972438556e-06, "loss": 24.1958, "step": 3421 }, { "epoch": 0.31758700696055686, "grad_norm": 36.851036071777344, "learning_rate": 8.16825794193958e-06, "loss": 23.4938, "step": 3422 }, { "epoch": 0.3176798143851508, "grad_norm": 36.47067642211914, "learning_rate": 8.16707061299876e-06, "loss": 23.6673, "step": 3423 }, { "epoch": 0.3177726218097448, "grad_norm": 56.700714111328125, "learning_rate": 8.165882985727935e-06, "loss": 24.8614, "step": 3424 }, { "epoch": 0.31786542923433875, "grad_norm": 44.15202713012695, "learning_rate": 8.164695060238977e-06, "loss": 24.3322, "step": 3425 }, { "epoch": 0.3179582366589327, "grad_norm": 35.964576721191406, "learning_rate": 8.163506836643787e-06, "loss": 25.2751, "step": 3426 }, { "epoch": 0.3180510440835267, "grad_norm": 41.79237747192383, "learning_rate": 8.16231831505429e-06, "loss": 25.5937, "step": 3427 }, { "epoch": 0.31814385150812063, "grad_norm": 38.381412506103516, "learning_rate": 8.161129495582443e-06, "loss": 24.2919, "step": 3428 }, { "epoch": 0.31823665893271463, "grad_norm": 37.486602783203125, "learning_rate": 8.159940378340231e-06, "loss": 23.6938, "step": 3429 }, { "epoch": 0.3183294663573086, "grad_norm": 35.72945785522461, "learning_rate": 8.158750963439666e-06, "loss": 23.6371, "step": 3430 }, { "epoch": 0.3184222737819026, "grad_norm": 34.80747985839844, "learning_rate": 8.157561250992786e-06, "loss": 24.1253, "step": 3431 }, { "epoch": 0.3185150812064965, "grad_norm": 34.32765197753906, "learning_rate": 8.15637124111166e-06, "loss": 22.1784, "step": 3432 }, { "epoch": 0.31860788863109046, "grad_norm": 37.843109130859375, "learning_rate": 8.155180933908381e-06, "loss": 24.0002, "step": 3433 }, { "epoch": 0.31870069605568446, "grad_norm": 36.10955047607422, "learning_rate": 8.153990329495079e-06, "loss": 25.8001, "step": 3434 }, { "epoch": 0.3187935034802784, "grad_norm": 35.387447357177734, "learning_rate": 8.152799427983901e-06, "loss": 22.3915, "step": 3435 }, { "epoch": 0.3188863109048724, "grad_norm": 41.075401306152344, "learning_rate": 8.151608229487028e-06, "loss": 23.3081, "step": 3436 }, { "epoch": 0.31897911832946635, "grad_norm": 37.92433166503906, "learning_rate": 8.150416734116668e-06, "loss": 23.6582, "step": 3437 }, { "epoch": 0.31907192575406035, "grad_norm": 37.81532287597656, "learning_rate": 8.149224941985058e-06, "loss": 23.3564, "step": 3438 }, { "epoch": 0.3191647331786543, "grad_norm": 40.45836639404297, "learning_rate": 8.14803285320446e-06, "loss": 25.0814, "step": 3439 }, { "epoch": 0.31925754060324824, "grad_norm": 42.50493240356445, "learning_rate": 8.146840467887167e-06, "loss": 24.7763, "step": 3440 }, { "epoch": 0.31935034802784223, "grad_norm": 35.15742492675781, "learning_rate": 8.145647786145496e-06, "loss": 23.2812, "step": 3441 }, { "epoch": 0.3194431554524362, "grad_norm": 51.709781646728516, "learning_rate": 8.144454808091799e-06, "loss": 22.0254, "step": 3442 }, { "epoch": 0.3195359628770302, "grad_norm": 40.524044036865234, "learning_rate": 8.14326153383845e-06, "loss": 21.9848, "step": 3443 }, { "epoch": 0.3196287703016241, "grad_norm": 38.176414489746094, "learning_rate": 8.14206796349785e-06, "loss": 24.2741, "step": 3444 }, { "epoch": 0.3197215777262181, "grad_norm": 41.54843521118164, "learning_rate": 8.140874097182434e-06, "loss": 24.9327, "step": 3445 }, { "epoch": 0.31981438515081206, "grad_norm": 39.03684997558594, "learning_rate": 8.139679935004656e-06, "loss": 24.7116, "step": 3446 }, { "epoch": 0.319907192575406, "grad_norm": 34.71841049194336, "learning_rate": 8.138485477077009e-06, "loss": 24.4316, "step": 3447 }, { "epoch": 0.32, "grad_norm": 36.92866134643555, "learning_rate": 8.137290723512003e-06, "loss": 22.4322, "step": 3448 }, { "epoch": 0.32009280742459395, "grad_norm": 40.73342514038086, "learning_rate": 8.136095674422184e-06, "loss": 23.831, "step": 3449 }, { "epoch": 0.32018561484918795, "grad_norm": 34.900352478027344, "learning_rate": 8.134900329920121e-06, "loss": 23.8531, "step": 3450 }, { "epoch": 0.3202784222737819, "grad_norm": 37.8952751159668, "learning_rate": 8.133704690118414e-06, "loss": 26.3771, "step": 3451 }, { "epoch": 0.3203712296983759, "grad_norm": 37.89628219604492, "learning_rate": 8.132508755129687e-06, "loss": 24.221, "step": 3452 }, { "epoch": 0.32046403712296984, "grad_norm": 37.32236099243164, "learning_rate": 8.131312525066595e-06, "loss": 25.2233, "step": 3453 }, { "epoch": 0.3205568445475638, "grad_norm": 35.049530029296875, "learning_rate": 8.130116000041819e-06, "loss": 22.3546, "step": 3454 }, { "epoch": 0.3206496519721578, "grad_norm": 36.79279327392578, "learning_rate": 8.128919180168072e-06, "loss": 24.9701, "step": 3455 }, { "epoch": 0.3207424593967517, "grad_norm": 46.81465530395508, "learning_rate": 8.127722065558087e-06, "loss": 22.2547, "step": 3456 }, { "epoch": 0.3208352668213457, "grad_norm": 38.8197021484375, "learning_rate": 8.126524656324631e-06, "loss": 23.9036, "step": 3457 }, { "epoch": 0.32092807424593967, "grad_norm": 36.747894287109375, "learning_rate": 8.125326952580499e-06, "loss": 23.3251, "step": 3458 }, { "epoch": 0.32102088167053366, "grad_norm": 38.840396881103516, "learning_rate": 8.124128954438508e-06, "loss": 25.199, "step": 3459 }, { "epoch": 0.3211136890951276, "grad_norm": 34.182342529296875, "learning_rate": 8.122930662011507e-06, "loss": 21.2445, "step": 3460 }, { "epoch": 0.32120649651972155, "grad_norm": 34.87372589111328, "learning_rate": 8.121732075412376e-06, "loss": 22.8381, "step": 3461 }, { "epoch": 0.32129930394431555, "grad_norm": 38.83465576171875, "learning_rate": 8.120533194754015e-06, "loss": 22.9912, "step": 3462 }, { "epoch": 0.3213921113689095, "grad_norm": 38.038421630859375, "learning_rate": 8.119334020149355e-06, "loss": 24.6126, "step": 3463 }, { "epoch": 0.3214849187935035, "grad_norm": 35.39216995239258, "learning_rate": 8.118134551711359e-06, "loss": 22.5359, "step": 3464 }, { "epoch": 0.32157772621809744, "grad_norm": 39.04372787475586, "learning_rate": 8.11693478955301e-06, "loss": 24.4408, "step": 3465 }, { "epoch": 0.32167053364269144, "grad_norm": 34.48811721801758, "learning_rate": 8.115734733787324e-06, "loss": 23.0488, "step": 3466 }, { "epoch": 0.3217633410672854, "grad_norm": 35.315486907958984, "learning_rate": 8.114534384527343e-06, "loss": 23.6536, "step": 3467 }, { "epoch": 0.3218561484918793, "grad_norm": 33.926307678222656, "learning_rate": 8.113333741886137e-06, "loss": 22.5924, "step": 3468 }, { "epoch": 0.3219489559164733, "grad_norm": 36.047393798828125, "learning_rate": 8.112132805976807e-06, "loss": 24.0989, "step": 3469 }, { "epoch": 0.32204176334106727, "grad_norm": 34.56425094604492, "learning_rate": 8.11093157691247e-06, "loss": 23.0398, "step": 3470 }, { "epoch": 0.32213457076566127, "grad_norm": 45.49047088623047, "learning_rate": 8.109730054806286e-06, "loss": 24.5685, "step": 3471 }, { "epoch": 0.3222273781902552, "grad_norm": 35.909305572509766, "learning_rate": 8.108528239771434e-06, "loss": 22.9047, "step": 3472 }, { "epoch": 0.3223201856148492, "grad_norm": 40.474212646484375, "learning_rate": 8.107326131921118e-06, "loss": 24.5486, "step": 3473 }, { "epoch": 0.32241299303944315, "grad_norm": 37.848175048828125, "learning_rate": 8.106123731368579e-06, "loss": 25.3948, "step": 3474 }, { "epoch": 0.3225058004640371, "grad_norm": 42.61003112792969, "learning_rate": 8.104921038227073e-06, "loss": 24.8483, "step": 3475 }, { "epoch": 0.3225986078886311, "grad_norm": 36.37267303466797, "learning_rate": 8.103718052609898e-06, "loss": 25.5621, "step": 3476 }, { "epoch": 0.32269141531322504, "grad_norm": 39.475555419921875, "learning_rate": 8.102514774630369e-06, "loss": 25.1072, "step": 3477 }, { "epoch": 0.32278422273781904, "grad_norm": 37.40983963012695, "learning_rate": 8.101311204401832e-06, "loss": 25.1196, "step": 3478 }, { "epoch": 0.322877030162413, "grad_norm": 36.37345886230469, "learning_rate": 8.10010734203766e-06, "loss": 24.8318, "step": 3479 }, { "epoch": 0.322969837587007, "grad_norm": 37.56975173950195, "learning_rate": 8.098903187651252e-06, "loss": 24.6794, "step": 3480 }, { "epoch": 0.3230626450116009, "grad_norm": 34.96579360961914, "learning_rate": 8.097698741356041e-06, "loss": 22.311, "step": 3481 }, { "epoch": 0.3231554524361949, "grad_norm": 38.79948043823242, "learning_rate": 8.096494003265478e-06, "loss": 23.7222, "step": 3482 }, { "epoch": 0.32324825986078887, "grad_norm": 39.13542175292969, "learning_rate": 8.09528897349305e-06, "loss": 23.8649, "step": 3483 }, { "epoch": 0.3233410672853828, "grad_norm": 39.55720520019531, "learning_rate": 8.094083652152265e-06, "loss": 24.2482, "step": 3484 }, { "epoch": 0.3234338747099768, "grad_norm": 38.12487030029297, "learning_rate": 8.092878039356662e-06, "loss": 24.2284, "step": 3485 }, { "epoch": 0.32352668213457075, "grad_norm": 34.87730026245117, "learning_rate": 8.091672135219805e-06, "loss": 26.1576, "step": 3486 }, { "epoch": 0.32361948955916475, "grad_norm": 37.256534576416016, "learning_rate": 8.090465939855292e-06, "loss": 22.3856, "step": 3487 }, { "epoch": 0.3237122969837587, "grad_norm": 38.76301956176758, "learning_rate": 8.08925945337674e-06, "loss": 23.1168, "step": 3488 }, { "epoch": 0.3238051044083527, "grad_norm": 39.69239807128906, "learning_rate": 8.088052675897796e-06, "loss": 27.3872, "step": 3489 }, { "epoch": 0.32389791183294664, "grad_norm": 37.02651596069336, "learning_rate": 8.086845607532138e-06, "loss": 26.0539, "step": 3490 }, { "epoch": 0.3239907192575406, "grad_norm": 32.9331169128418, "learning_rate": 8.085638248393465e-06, "loss": 23.6339, "step": 3491 }, { "epoch": 0.3240835266821346, "grad_norm": 39.594120025634766, "learning_rate": 8.084430598595514e-06, "loss": 23.226, "step": 3492 }, { "epoch": 0.3241763341067285, "grad_norm": 38.2894172668457, "learning_rate": 8.083222658252035e-06, "loss": 23.8791, "step": 3493 }, { "epoch": 0.3242691415313225, "grad_norm": 36.267669677734375, "learning_rate": 8.082014427476818e-06, "loss": 25.2347, "step": 3494 }, { "epoch": 0.32436194895591647, "grad_norm": 45.67020797729492, "learning_rate": 8.080805906383673e-06, "loss": 23.9158, "step": 3495 }, { "epoch": 0.32445475638051047, "grad_norm": 38.103248596191406, "learning_rate": 8.07959709508644e-06, "loss": 24.9945, "step": 3496 }, { "epoch": 0.3245475638051044, "grad_norm": 38.194000244140625, "learning_rate": 8.078387993698986e-06, "loss": 22.5714, "step": 3497 }, { "epoch": 0.32464037122969835, "grad_norm": 37.96816635131836, "learning_rate": 8.077178602335204e-06, "loss": 23.2672, "step": 3498 }, { "epoch": 0.32473317865429235, "grad_norm": 35.99146270751953, "learning_rate": 8.075968921109019e-06, "loss": 20.8563, "step": 3499 }, { "epoch": 0.3248259860788863, "grad_norm": 43.120018005371094, "learning_rate": 8.074758950134376e-06, "loss": 25.2014, "step": 3500 }, { "epoch": 0.3249187935034803, "grad_norm": 37.141048431396484, "learning_rate": 8.073548689525254e-06, "loss": 22.7346, "step": 3501 }, { "epoch": 0.32501160092807424, "grad_norm": 38.27304458618164, "learning_rate": 8.072338139395656e-06, "loss": 22.6213, "step": 3502 }, { "epoch": 0.32510440835266824, "grad_norm": 35.558868408203125, "learning_rate": 8.071127299859611e-06, "loss": 24.7015, "step": 3503 }, { "epoch": 0.3251972157772622, "grad_norm": 38.0550422668457, "learning_rate": 8.069916171031181e-06, "loss": 23.8018, "step": 3504 }, { "epoch": 0.3252900232018561, "grad_norm": 35.35964584350586, "learning_rate": 8.068704753024444e-06, "loss": 23.6332, "step": 3505 }, { "epoch": 0.3253828306264501, "grad_norm": 40.21096420288086, "learning_rate": 8.067493045953519e-06, "loss": 23.7625, "step": 3506 }, { "epoch": 0.32547563805104407, "grad_norm": 39.742183685302734, "learning_rate": 8.066281049932546e-06, "loss": 23.1487, "step": 3507 }, { "epoch": 0.32556844547563807, "grad_norm": 38.55573654174805, "learning_rate": 8.065068765075685e-06, "loss": 23.5829, "step": 3508 }, { "epoch": 0.325661252900232, "grad_norm": 36.93459701538086, "learning_rate": 8.06385619149714e-06, "loss": 22.7522, "step": 3509 }, { "epoch": 0.325754060324826, "grad_norm": 35.998226165771484, "learning_rate": 8.062643329311123e-06, "loss": 23.9558, "step": 3510 }, { "epoch": 0.32584686774941996, "grad_norm": 43.06884002685547, "learning_rate": 8.061430178631889e-06, "loss": 24.8435, "step": 3511 }, { "epoch": 0.3259396751740139, "grad_norm": 36.311058044433594, "learning_rate": 8.06021673957371e-06, "loss": 22.6373, "step": 3512 }, { "epoch": 0.3260324825986079, "grad_norm": 35.58892059326172, "learning_rate": 8.05900301225089e-06, "loss": 22.4131, "step": 3513 }, { "epoch": 0.32612529002320184, "grad_norm": 35.546669006347656, "learning_rate": 8.05778899677776e-06, "loss": 22.5908, "step": 3514 }, { "epoch": 0.32621809744779584, "grad_norm": 39.698829650878906, "learning_rate": 8.056574693268677e-06, "loss": 24.8093, "step": 3515 }, { "epoch": 0.3263109048723898, "grad_norm": 34.03476333618164, "learning_rate": 8.055360101838026e-06, "loss": 23.2292, "step": 3516 }, { "epoch": 0.3264037122969838, "grad_norm": 35.36542510986328, "learning_rate": 8.054145222600216e-06, "loss": 24.1819, "step": 3517 }, { "epoch": 0.3264965197215777, "grad_norm": 36.32382583618164, "learning_rate": 8.052930055669686e-06, "loss": 21.0307, "step": 3518 }, { "epoch": 0.32658932714617167, "grad_norm": 77.8616714477539, "learning_rate": 8.051714601160902e-06, "loss": 25.2722, "step": 3519 }, { "epoch": 0.32668213457076567, "grad_norm": 57.68349075317383, "learning_rate": 8.050498859188358e-06, "loss": 22.2685, "step": 3520 }, { "epoch": 0.3267749419953596, "grad_norm": 37.55133056640625, "learning_rate": 8.049282829866573e-06, "loss": 24.9358, "step": 3521 }, { "epoch": 0.3268677494199536, "grad_norm": 35.54694366455078, "learning_rate": 8.048066513310093e-06, "loss": 23.48, "step": 3522 }, { "epoch": 0.32696055684454756, "grad_norm": 63.80452346801758, "learning_rate": 8.046849909633495e-06, "loss": 25.7211, "step": 3523 }, { "epoch": 0.32705336426914156, "grad_norm": 35.11882019042969, "learning_rate": 8.045633018951376e-06, "loss": 24.782, "step": 3524 }, { "epoch": 0.3271461716937355, "grad_norm": 36.4061393737793, "learning_rate": 8.044415841378365e-06, "loss": 23.5528, "step": 3525 }, { "epoch": 0.32723897911832944, "grad_norm": 33.85454559326172, "learning_rate": 8.043198377029117e-06, "loss": 22.6079, "step": 3526 }, { "epoch": 0.32733178654292344, "grad_norm": 35.86051559448242, "learning_rate": 8.041980626018318e-06, "loss": 22.464, "step": 3527 }, { "epoch": 0.3274245939675174, "grad_norm": 37.0215950012207, "learning_rate": 8.04076258846067e-06, "loss": 26.9939, "step": 3528 }, { "epoch": 0.3275174013921114, "grad_norm": 38.432090759277344, "learning_rate": 8.039544264470918e-06, "loss": 22.3495, "step": 3529 }, { "epoch": 0.32761020881670533, "grad_norm": 44.506683349609375, "learning_rate": 8.038325654163815e-06, "loss": 25.0844, "step": 3530 }, { "epoch": 0.3277030162412993, "grad_norm": 41.3480224609375, "learning_rate": 8.037106757654159e-06, "loss": 23.6887, "step": 3531 }, { "epoch": 0.32779582366589327, "grad_norm": 42.94960403442383, "learning_rate": 8.035887575056763e-06, "loss": 24.5689, "step": 3532 }, { "epoch": 0.3278886310904872, "grad_norm": 35.55921173095703, "learning_rate": 8.03466810648647e-06, "loss": 22.8619, "step": 3533 }, { "epoch": 0.3279814385150812, "grad_norm": 36.55015182495117, "learning_rate": 8.033448352058155e-06, "loss": 23.7688, "step": 3534 }, { "epoch": 0.32807424593967516, "grad_norm": 34.034881591796875, "learning_rate": 8.032228311886712e-06, "loss": 21.7384, "step": 3535 }, { "epoch": 0.32816705336426916, "grad_norm": 35.5771484375, "learning_rate": 8.031007986087067e-06, "loss": 25.0157, "step": 3536 }, { "epoch": 0.3282598607888631, "grad_norm": 34.13312911987305, "learning_rate": 8.029787374774171e-06, "loss": 22.2272, "step": 3537 }, { "epoch": 0.3283526682134571, "grad_norm": 37.82208251953125, "learning_rate": 8.028566478063005e-06, "loss": 24.1837, "step": 3538 }, { "epoch": 0.32844547563805104, "grad_norm": 31.905548095703125, "learning_rate": 8.027345296068571e-06, "loss": 23.6926, "step": 3539 }, { "epoch": 0.328538283062645, "grad_norm": 36.40118408203125, "learning_rate": 8.026123828905902e-06, "loss": 22.6409, "step": 3540 }, { "epoch": 0.328631090487239, "grad_norm": 36.422245025634766, "learning_rate": 8.02490207669006e-06, "loss": 24.576, "step": 3541 }, { "epoch": 0.32872389791183293, "grad_norm": 36.9959602355957, "learning_rate": 8.02368003953613e-06, "loss": 24.7673, "step": 3542 }, { "epoch": 0.32881670533642693, "grad_norm": 37.059696197509766, "learning_rate": 8.02245771755922e-06, "loss": 24.4929, "step": 3543 }, { "epoch": 0.3289095127610209, "grad_norm": 37.723472595214844, "learning_rate": 8.021235110874474e-06, "loss": 24.0098, "step": 3544 }, { "epoch": 0.32900232018561487, "grad_norm": 58.164886474609375, "learning_rate": 8.02001221959706e-06, "loss": 26.7347, "step": 3545 }, { "epoch": 0.3290951276102088, "grad_norm": 87.07539367675781, "learning_rate": 8.018789043842166e-06, "loss": 23.7298, "step": 3546 }, { "epoch": 0.32918793503480276, "grad_norm": 36.02790069580078, "learning_rate": 8.017565583725017e-06, "loss": 24.1537, "step": 3547 }, { "epoch": 0.32928074245939676, "grad_norm": 39.16303253173828, "learning_rate": 8.016341839360857e-06, "loss": 24.7912, "step": 3548 }, { "epoch": 0.3293735498839907, "grad_norm": 36.293861389160156, "learning_rate": 8.01511781086496e-06, "loss": 23.8743, "step": 3549 }, { "epoch": 0.3294663573085847, "grad_norm": 34.83423614501953, "learning_rate": 8.013893498352628e-06, "loss": 24.9217, "step": 3550 }, { "epoch": 0.32955916473317864, "grad_norm": 36.46790313720703, "learning_rate": 8.012668901939186e-06, "loss": 23.7787, "step": 3551 }, { "epoch": 0.32965197215777264, "grad_norm": 34.49028015136719, "learning_rate": 8.011444021739986e-06, "loss": 23.069, "step": 3552 }, { "epoch": 0.3297447795823666, "grad_norm": 35.19580078125, "learning_rate": 8.010218857870416e-06, "loss": 23.4846, "step": 3553 }, { "epoch": 0.32983758700696053, "grad_norm": 37.30398941040039, "learning_rate": 8.008993410445878e-06, "loss": 22.73, "step": 3554 }, { "epoch": 0.32993039443155453, "grad_norm": 34.060794830322266, "learning_rate": 8.007767679581806e-06, "loss": 24.1622, "step": 3555 }, { "epoch": 0.3300232018561485, "grad_norm": 38.304115295410156, "learning_rate": 8.00654166539366e-06, "loss": 21.9035, "step": 3556 }, { "epoch": 0.3301160092807425, "grad_norm": 38.48301315307617, "learning_rate": 8.00531536799693e-06, "loss": 23.4883, "step": 3557 }, { "epoch": 0.3302088167053364, "grad_norm": 36.86412048339844, "learning_rate": 8.004088787507128e-06, "loss": 23.5028, "step": 3558 }, { "epoch": 0.3303016241299304, "grad_norm": 33.46788787841797, "learning_rate": 8.002861924039798e-06, "loss": 24.2178, "step": 3559 }, { "epoch": 0.33039443155452436, "grad_norm": 39.406471252441406, "learning_rate": 8.001634777710503e-06, "loss": 22.3583, "step": 3560 }, { "epoch": 0.3304872389791183, "grad_norm": 37.5178108215332, "learning_rate": 8.00040734863484e-06, "loss": 23.2065, "step": 3561 }, { "epoch": 0.3305800464037123, "grad_norm": 34.595298767089844, "learning_rate": 7.99917963692843e-06, "loss": 24.6664, "step": 3562 }, { "epoch": 0.33067285382830625, "grad_norm": 37.256690979003906, "learning_rate": 7.997951642706916e-06, "loss": 23.9396, "step": 3563 }, { "epoch": 0.33076566125290024, "grad_norm": 38.930973052978516, "learning_rate": 7.996723366085978e-06, "loss": 23.7007, "step": 3564 }, { "epoch": 0.3308584686774942, "grad_norm": 39.97865295410156, "learning_rate": 7.995494807181313e-06, "loss": 24.2827, "step": 3565 }, { "epoch": 0.3309512761020882, "grad_norm": 38.61338424682617, "learning_rate": 7.99426596610865e-06, "loss": 23.8473, "step": 3566 }, { "epoch": 0.33104408352668213, "grad_norm": 35.81296920776367, "learning_rate": 7.99303684298374e-06, "loss": 23.7898, "step": 3567 }, { "epoch": 0.3311368909512761, "grad_norm": 35.82393264770508, "learning_rate": 7.991807437922364e-06, "loss": 22.64, "step": 3568 }, { "epoch": 0.3312296983758701, "grad_norm": 36.673377990722656, "learning_rate": 7.990577751040331e-06, "loss": 23.1986, "step": 3569 }, { "epoch": 0.331322505800464, "grad_norm": 36.24258041381836, "learning_rate": 7.989347782453473e-06, "loss": 23.1861, "step": 3570 }, { "epoch": 0.331415313225058, "grad_norm": 38.58159255981445, "learning_rate": 7.988117532277651e-06, "loss": 25.147, "step": 3571 }, { "epoch": 0.33150812064965196, "grad_norm": 35.52022171020508, "learning_rate": 7.986887000628748e-06, "loss": 23.0459, "step": 3572 }, { "epoch": 0.33160092807424596, "grad_norm": 36.252296447753906, "learning_rate": 7.985656187622682e-06, "loss": 24.1019, "step": 3573 }, { "epoch": 0.3316937354988399, "grad_norm": 42.057159423828125, "learning_rate": 7.984425093375387e-06, "loss": 24.9831, "step": 3574 }, { "epoch": 0.33178654292343385, "grad_norm": 35.7234992980957, "learning_rate": 7.983193718002834e-06, "loss": 22.1859, "step": 3575 }, { "epoch": 0.33187935034802785, "grad_norm": 39.79484939575195, "learning_rate": 7.981962061621012e-06, "loss": 25.0168, "step": 3576 }, { "epoch": 0.3319721577726218, "grad_norm": 36.892879486083984, "learning_rate": 7.980730124345943e-06, "loss": 23.5116, "step": 3577 }, { "epoch": 0.3320649651972158, "grad_norm": 33.5903434753418, "learning_rate": 7.979497906293667e-06, "loss": 22.4689, "step": 3578 }, { "epoch": 0.33215777262180973, "grad_norm": 37.08622360229492, "learning_rate": 7.97826540758026e-06, "loss": 24.0006, "step": 3579 }, { "epoch": 0.33225058004640373, "grad_norm": 42.288455963134766, "learning_rate": 7.977032628321823e-06, "loss": 25.3245, "step": 3580 }, { "epoch": 0.3323433874709977, "grad_norm": 42.23509216308594, "learning_rate": 7.975799568634474e-06, "loss": 24.2679, "step": 3581 }, { "epoch": 0.3324361948955917, "grad_norm": 54.38142395019531, "learning_rate": 7.974566228634369e-06, "loss": 23.6796, "step": 3582 }, { "epoch": 0.3325290023201856, "grad_norm": 35.13725662231445, "learning_rate": 7.973332608437682e-06, "loss": 24.5299, "step": 3583 }, { "epoch": 0.33262180974477956, "grad_norm": 37.47030258178711, "learning_rate": 7.97209870816062e-06, "loss": 23.8232, "step": 3584 }, { "epoch": 0.33271461716937356, "grad_norm": 35.40763473510742, "learning_rate": 7.97086452791941e-06, "loss": 23.8093, "step": 3585 }, { "epoch": 0.3328074245939675, "grad_norm": 35.20343017578125, "learning_rate": 7.969630067830313e-06, "loss": 22.1692, "step": 3586 }, { "epoch": 0.3329002320185615, "grad_norm": 39.13271713256836, "learning_rate": 7.968395328009609e-06, "loss": 25.5483, "step": 3587 }, { "epoch": 0.33299303944315545, "grad_norm": 36.87163543701172, "learning_rate": 7.967160308573607e-06, "loss": 24.4256, "step": 3588 }, { "epoch": 0.33308584686774945, "grad_norm": 35.11121368408203, "learning_rate": 7.965925009638645e-06, "loss": 24.0031, "step": 3589 }, { "epoch": 0.3331786542923434, "grad_norm": 40.0299072265625, "learning_rate": 7.964689431321083e-06, "loss": 22.508, "step": 3590 }, { "epoch": 0.33327146171693733, "grad_norm": 37.93783950805664, "learning_rate": 7.96345357373731e-06, "loss": 25.607, "step": 3591 }, { "epoch": 0.33336426914153133, "grad_norm": 35.53300094604492, "learning_rate": 7.962217437003742e-06, "loss": 23.5295, "step": 3592 }, { "epoch": 0.3334570765661253, "grad_norm": 42.3663215637207, "learning_rate": 7.960981021236815e-06, "loss": 24.9639, "step": 3593 }, { "epoch": 0.3335498839907193, "grad_norm": 38.83027267456055, "learning_rate": 7.959744326553002e-06, "loss": 24.9411, "step": 3594 }, { "epoch": 0.3336426914153132, "grad_norm": 35.74052429199219, "learning_rate": 7.958507353068793e-06, "loss": 23.1622, "step": 3595 }, { "epoch": 0.3337354988399072, "grad_norm": 40.58647155761719, "learning_rate": 7.957270100900712e-06, "loss": 26.211, "step": 3596 }, { "epoch": 0.33382830626450116, "grad_norm": 38.435428619384766, "learning_rate": 7.9560325701653e-06, "loss": 24.7444, "step": 3597 }, { "epoch": 0.3339211136890951, "grad_norm": 38.3966064453125, "learning_rate": 7.95479476097913e-06, "loss": 22.7184, "step": 3598 }, { "epoch": 0.3340139211136891, "grad_norm": 38.775428771972656, "learning_rate": 7.953556673458804e-06, "loss": 26.2211, "step": 3599 }, { "epoch": 0.33410672853828305, "grad_norm": 36.36680221557617, "learning_rate": 7.952318307720943e-06, "loss": 23.5593, "step": 3600 }, { "epoch": 0.33419953596287705, "grad_norm": 36.16909408569336, "learning_rate": 7.9510796638822e-06, "loss": 24.086, "step": 3601 }, { "epoch": 0.334292343387471, "grad_norm": 38.71424102783203, "learning_rate": 7.949840742059252e-06, "loss": 23.4337, "step": 3602 }, { "epoch": 0.334385150812065, "grad_norm": 39.26723098754883, "learning_rate": 7.948601542368801e-06, "loss": 21.9346, "step": 3603 }, { "epoch": 0.33447795823665893, "grad_norm": 38.22543716430664, "learning_rate": 7.947362064927578e-06, "loss": 22.8748, "step": 3604 }, { "epoch": 0.3345707656612529, "grad_norm": 39.454673767089844, "learning_rate": 7.946122309852337e-06, "loss": 25.0719, "step": 3605 }, { "epoch": 0.3346635730858469, "grad_norm": 37.03227615356445, "learning_rate": 7.94488227725986e-06, "loss": 23.0362, "step": 3606 }, { "epoch": 0.3347563805104408, "grad_norm": 41.43178939819336, "learning_rate": 7.943641967266957e-06, "loss": 24.6335, "step": 3607 }, { "epoch": 0.3348491879350348, "grad_norm": 37.338409423828125, "learning_rate": 7.94240137999046e-06, "loss": 24.4166, "step": 3608 }, { "epoch": 0.33494199535962876, "grad_norm": 38.71018981933594, "learning_rate": 7.94116051554723e-06, "loss": 24.0998, "step": 3609 }, { "epoch": 0.33503480278422276, "grad_norm": 34.83491516113281, "learning_rate": 7.939919374054153e-06, "loss": 22.1989, "step": 3610 }, { "epoch": 0.3351276102088167, "grad_norm": 34.5422248840332, "learning_rate": 7.938677955628142e-06, "loss": 23.0706, "step": 3611 }, { "epoch": 0.33522041763341065, "grad_norm": 45.30485534667969, "learning_rate": 7.937436260386134e-06, "loss": 24.6571, "step": 3612 }, { "epoch": 0.33531322505800465, "grad_norm": 34.409934997558594, "learning_rate": 7.936194288445095e-06, "loss": 24.3937, "step": 3613 }, { "epoch": 0.3354060324825986, "grad_norm": 34.5585823059082, "learning_rate": 7.934952039922014e-06, "loss": 24.015, "step": 3614 }, { "epoch": 0.3354988399071926, "grad_norm": 35.28507995605469, "learning_rate": 7.93370951493391e-06, "loss": 23.8401, "step": 3615 }, { "epoch": 0.33559164733178654, "grad_norm": 39.0739631652832, "learning_rate": 7.932466713597824e-06, "loss": 25.971, "step": 3616 }, { "epoch": 0.33568445475638053, "grad_norm": 35.647891998291016, "learning_rate": 7.931223636030825e-06, "loss": 23.7529, "step": 3617 }, { "epoch": 0.3357772621809745, "grad_norm": 37.742652893066406, "learning_rate": 7.929980282350009e-06, "loss": 24.855, "step": 3618 }, { "epoch": 0.3358700696055684, "grad_norm": 39.12990188598633, "learning_rate": 7.928736652672494e-06, "loss": 23.2782, "step": 3619 }, { "epoch": 0.3359628770301624, "grad_norm": 37.289432525634766, "learning_rate": 7.927492747115428e-06, "loss": 24.0728, "step": 3620 }, { "epoch": 0.33605568445475636, "grad_norm": 35.453285217285156, "learning_rate": 7.926248565795986e-06, "loss": 24.6033, "step": 3621 }, { "epoch": 0.33614849187935036, "grad_norm": 37.25461196899414, "learning_rate": 7.925004108831362e-06, "loss": 22.7172, "step": 3622 }, { "epoch": 0.3362412993039443, "grad_norm": 91.5993881225586, "learning_rate": 7.923759376338786e-06, "loss": 24.1052, "step": 3623 }, { "epoch": 0.3363341067285383, "grad_norm": 35.126338958740234, "learning_rate": 7.922514368435506e-06, "loss": 23.6091, "step": 3624 }, { "epoch": 0.33642691415313225, "grad_norm": 35.96662521362305, "learning_rate": 7.921269085238797e-06, "loss": 22.0277, "step": 3625 }, { "epoch": 0.3365197215777262, "grad_norm": 38.73402786254883, "learning_rate": 7.920023526865965e-06, "loss": 24.3008, "step": 3626 }, { "epoch": 0.3366125290023202, "grad_norm": 38.160560607910156, "learning_rate": 7.918777693434335e-06, "loss": 24.1278, "step": 3627 }, { "epoch": 0.33670533642691414, "grad_norm": 35.50178146362305, "learning_rate": 7.917531585061264e-06, "loss": 24.915, "step": 3628 }, { "epoch": 0.33679814385150814, "grad_norm": 35.08008575439453, "learning_rate": 7.916285201864131e-06, "loss": 24.5406, "step": 3629 }, { "epoch": 0.3368909512761021, "grad_norm": 34.189571380615234, "learning_rate": 7.915038543960342e-06, "loss": 25.1346, "step": 3630 }, { "epoch": 0.3369837587006961, "grad_norm": 36.666404724121094, "learning_rate": 7.913791611467332e-06, "loss": 24.3359, "step": 3631 }, { "epoch": 0.33707656612529, "grad_norm": 34.40437316894531, "learning_rate": 7.912544404502554e-06, "loss": 22.3461, "step": 3632 }, { "epoch": 0.33716937354988397, "grad_norm": 38.89826965332031, "learning_rate": 7.911296923183494e-06, "loss": 23.2319, "step": 3633 }, { "epoch": 0.33726218097447797, "grad_norm": 36.55154800415039, "learning_rate": 7.910049167627662e-06, "loss": 24.7208, "step": 3634 }, { "epoch": 0.3373549883990719, "grad_norm": 35.86307907104492, "learning_rate": 7.908801137952593e-06, "loss": 22.6528, "step": 3635 }, { "epoch": 0.3374477958236659, "grad_norm": 38.29680252075195, "learning_rate": 7.907552834275847e-06, "loss": 24.8254, "step": 3636 }, { "epoch": 0.33754060324825985, "grad_norm": 38.296424865722656, "learning_rate": 7.906304256715014e-06, "loss": 23.9246, "step": 3637 }, { "epoch": 0.33763341067285385, "grad_norm": 45.580810546875, "learning_rate": 7.905055405387705e-06, "loss": 26.8129, "step": 3638 }, { "epoch": 0.3377262180974478, "grad_norm": 38.361106872558594, "learning_rate": 7.903806280411558e-06, "loss": 23.3737, "step": 3639 }, { "epoch": 0.33781902552204174, "grad_norm": 37.731971740722656, "learning_rate": 7.902556881904237e-06, "loss": 24.3145, "step": 3640 }, { "epoch": 0.33791183294663574, "grad_norm": 33.04374694824219, "learning_rate": 7.901307209983436e-06, "loss": 22.7684, "step": 3641 }, { "epoch": 0.3380046403712297, "grad_norm": 34.22764205932617, "learning_rate": 7.900057264766865e-06, "loss": 24.6892, "step": 3642 }, { "epoch": 0.3380974477958237, "grad_norm": 36.714927673339844, "learning_rate": 7.898807046372271e-06, "loss": 23.5587, "step": 3643 }, { "epoch": 0.3381902552204176, "grad_norm": 34.29780578613281, "learning_rate": 7.89755655491742e-06, "loss": 21.9973, "step": 3644 }, { "epoch": 0.3382830626450116, "grad_norm": 35.73682403564453, "learning_rate": 7.896305790520101e-06, "loss": 23.5563, "step": 3645 }, { "epoch": 0.33837587006960557, "grad_norm": 36.08637237548828, "learning_rate": 7.89505475329814e-06, "loss": 24.7235, "step": 3646 }, { "epoch": 0.3384686774941995, "grad_norm": 42.304405212402344, "learning_rate": 7.893803443369375e-06, "loss": 25.164, "step": 3647 }, { "epoch": 0.3385614849187935, "grad_norm": 59.71018981933594, "learning_rate": 7.892551860851679e-06, "loss": 23.0801, "step": 3648 }, { "epoch": 0.33865429234338745, "grad_norm": 46.4935302734375, "learning_rate": 7.891300005862949e-06, "loss": 24.1214, "step": 3649 }, { "epoch": 0.33874709976798145, "grad_norm": 37.92573547363281, "learning_rate": 7.890047878521105e-06, "loss": 24.474, "step": 3650 }, { "epoch": 0.3388399071925754, "grad_norm": 42.554962158203125, "learning_rate": 7.888795478944093e-06, "loss": 24.6776, "step": 3651 }, { "epoch": 0.3389327146171694, "grad_norm": 40.1112174987793, "learning_rate": 7.88754280724989e-06, "loss": 24.2725, "step": 3652 }, { "epoch": 0.33902552204176334, "grad_norm": 41.3501091003418, "learning_rate": 7.886289863556491e-06, "loss": 23.3247, "step": 3653 }, { "epoch": 0.3391183294663573, "grad_norm": 37.624794006347656, "learning_rate": 7.88503664798192e-06, "loss": 23.1617, "step": 3654 }, { "epoch": 0.3392111368909513, "grad_norm": 39.60702896118164, "learning_rate": 7.88378316064423e-06, "loss": 25.7403, "step": 3655 }, { "epoch": 0.3393039443155452, "grad_norm": 36.80841827392578, "learning_rate": 7.882529401661491e-06, "loss": 24.6154, "step": 3656 }, { "epoch": 0.3393967517401392, "grad_norm": 37.64913558959961, "learning_rate": 7.88127537115181e-06, "loss": 24.3564, "step": 3657 }, { "epoch": 0.33948955916473317, "grad_norm": 34.91513442993164, "learning_rate": 7.880021069233308e-06, "loss": 22.7193, "step": 3658 }, { "epoch": 0.33958236658932717, "grad_norm": 37.5843391418457, "learning_rate": 7.878766496024142e-06, "loss": 23.8178, "step": 3659 }, { "epoch": 0.3396751740139211, "grad_norm": 39.94353485107422, "learning_rate": 7.877511651642486e-06, "loss": 22.4374, "step": 3660 }, { "epoch": 0.33976798143851505, "grad_norm": 37.872318267822266, "learning_rate": 7.876256536206544e-06, "loss": 24.4158, "step": 3661 }, { "epoch": 0.33986078886310905, "grad_norm": 36.536983489990234, "learning_rate": 7.875001149834544e-06, "loss": 23.6794, "step": 3662 }, { "epoch": 0.339953596287703, "grad_norm": 34.719783782958984, "learning_rate": 7.873745492644744e-06, "loss": 23.4218, "step": 3663 }, { "epoch": 0.340046403712297, "grad_norm": 40.410465240478516, "learning_rate": 7.872489564755419e-06, "loss": 22.0726, "step": 3664 }, { "epoch": 0.34013921113689094, "grad_norm": 37.8339958190918, "learning_rate": 7.871233366284877e-06, "loss": 25.5274, "step": 3665 }, { "epoch": 0.34023201856148494, "grad_norm": 36.16904830932617, "learning_rate": 7.869976897351446e-06, "loss": 23.8165, "step": 3666 }, { "epoch": 0.3403248259860789, "grad_norm": 37.69342803955078, "learning_rate": 7.868720158073486e-06, "loss": 22.0909, "step": 3667 }, { "epoch": 0.3404176334106728, "grad_norm": 38.527496337890625, "learning_rate": 7.867463148569375e-06, "loss": 25.7056, "step": 3668 }, { "epoch": 0.3405104408352668, "grad_norm": 33.00779342651367, "learning_rate": 7.866205868957524e-06, "loss": 23.3207, "step": 3669 }, { "epoch": 0.34060324825986077, "grad_norm": 38.03955078125, "learning_rate": 7.86494831935636e-06, "loss": 24.5104, "step": 3670 }, { "epoch": 0.34069605568445477, "grad_norm": 37.641361236572266, "learning_rate": 7.863690499884347e-06, "loss": 23.3392, "step": 3671 }, { "epoch": 0.3407888631090487, "grad_norm": 36.064544677734375, "learning_rate": 7.862432410659964e-06, "loss": 22.6029, "step": 3672 }, { "epoch": 0.3408816705336427, "grad_norm": 36.27716064453125, "learning_rate": 7.861174051801724e-06, "loss": 23.0194, "step": 3673 }, { "epoch": 0.34097447795823665, "grad_norm": 39.57195281982422, "learning_rate": 7.859915423428155e-06, "loss": 24.1399, "step": 3674 }, { "epoch": 0.34106728538283065, "grad_norm": 35.6920280456543, "learning_rate": 7.858656525657825e-06, "loss": 22.7901, "step": 3675 }, { "epoch": 0.3411600928074246, "grad_norm": 43.92793655395508, "learning_rate": 7.85739735860931e-06, "loss": 25.412, "step": 3676 }, { "epoch": 0.34125290023201854, "grad_norm": 37.5497932434082, "learning_rate": 7.856137922401226e-06, "loss": 24.114, "step": 3677 }, { "epoch": 0.34134570765661254, "grad_norm": 39.38967514038086, "learning_rate": 7.854878217152208e-06, "loss": 24.7208, "step": 3678 }, { "epoch": 0.3414385150812065, "grad_norm": 33.86955261230469, "learning_rate": 7.853618242980916e-06, "loss": 21.8656, "step": 3679 }, { "epoch": 0.3415313225058005, "grad_norm": 36.71171951293945, "learning_rate": 7.852358000006036e-06, "loss": 24.9768, "step": 3680 }, { "epoch": 0.3416241299303944, "grad_norm": 33.64427947998047, "learning_rate": 7.851097488346282e-06, "loss": 24.1964, "step": 3681 }, { "epoch": 0.3417169373549884, "grad_norm": 36.383731842041016, "learning_rate": 7.849836708120387e-06, "loss": 23.9702, "step": 3682 }, { "epoch": 0.34180974477958237, "grad_norm": 37.7753791809082, "learning_rate": 7.848575659447116e-06, "loss": 24.8655, "step": 3683 }, { "epoch": 0.3419025522041763, "grad_norm": 39.138946533203125, "learning_rate": 7.847314342445258e-06, "loss": 22.8613, "step": 3684 }, { "epoch": 0.3419953596287703, "grad_norm": 37.16453552246094, "learning_rate": 7.846052757233622e-06, "loss": 23.5211, "step": 3685 }, { "epoch": 0.34208816705336426, "grad_norm": 37.14106369018555, "learning_rate": 7.84479090393105e-06, "loss": 24.0569, "step": 3686 }, { "epoch": 0.34218097447795826, "grad_norm": 40.734561920166016, "learning_rate": 7.843528782656404e-06, "loss": 23.4171, "step": 3687 }, { "epoch": 0.3422737819025522, "grad_norm": 35.84867477416992, "learning_rate": 7.84226639352857e-06, "loss": 23.8192, "step": 3688 }, { "epoch": 0.3423665893271462, "grad_norm": 38.85390090942383, "learning_rate": 7.841003736666467e-06, "loss": 24.0139, "step": 3689 }, { "epoch": 0.34245939675174014, "grad_norm": 35.6324577331543, "learning_rate": 7.839740812189027e-06, "loss": 23.982, "step": 3690 }, { "epoch": 0.3425522041763341, "grad_norm": 38.86159133911133, "learning_rate": 7.838477620215222e-06, "loss": 23.1054, "step": 3691 }, { "epoch": 0.3426450116009281, "grad_norm": 38.37092208862305, "learning_rate": 7.837214160864037e-06, "loss": 22.9719, "step": 3692 }, { "epoch": 0.34273781902552203, "grad_norm": 38.47151184082031, "learning_rate": 7.835950434254487e-06, "loss": 23.4644, "step": 3693 }, { "epoch": 0.342830626450116, "grad_norm": 36.35835266113281, "learning_rate": 7.834686440505611e-06, "loss": 22.8131, "step": 3694 }, { "epoch": 0.34292343387470997, "grad_norm": 36.7689323425293, "learning_rate": 7.833422179736479e-06, "loss": 25.8284, "step": 3695 }, { "epoch": 0.34301624129930397, "grad_norm": 34.256649017333984, "learning_rate": 7.832157652066173e-06, "loss": 24.7048, "step": 3696 }, { "epoch": 0.3431090487238979, "grad_norm": 36.946781158447266, "learning_rate": 7.830892857613816e-06, "loss": 24.9938, "step": 3697 }, { "epoch": 0.34320185614849186, "grad_norm": 35.82252883911133, "learning_rate": 7.829627796498544e-06, "loss": 25.289, "step": 3698 }, { "epoch": 0.34329466357308586, "grad_norm": 42.307010650634766, "learning_rate": 7.828362468839525e-06, "loss": 24.0164, "step": 3699 }, { "epoch": 0.3433874709976798, "grad_norm": 37.78447723388672, "learning_rate": 7.827096874755947e-06, "loss": 22.4531, "step": 3700 }, { "epoch": 0.3434802784222738, "grad_norm": 40.0549201965332, "learning_rate": 7.82583101436703e-06, "loss": 24.4128, "step": 3701 }, { "epoch": 0.34357308584686774, "grad_norm": 36.73091506958008, "learning_rate": 7.824564887792008e-06, "loss": 24.6939, "step": 3702 }, { "epoch": 0.34366589327146174, "grad_norm": 37.98653793334961, "learning_rate": 7.823298495150153e-06, "loss": 25.0839, "step": 3703 }, { "epoch": 0.3437587006960557, "grad_norm": 40.970680236816406, "learning_rate": 7.822031836560752e-06, "loss": 24.1766, "step": 3704 }, { "epoch": 0.34385150812064963, "grad_norm": 38.00975799560547, "learning_rate": 7.820764912143124e-06, "loss": 22.4186, "step": 3705 }, { "epoch": 0.34394431554524363, "grad_norm": 36.63716125488281, "learning_rate": 7.81949772201661e-06, "loss": 24.0282, "step": 3706 }, { "epoch": 0.34403712296983757, "grad_norm": 39.431705474853516, "learning_rate": 7.818230266300577e-06, "loss": 24.8839, "step": 3707 }, { "epoch": 0.34412993039443157, "grad_norm": 38.11770248413086, "learning_rate": 7.81696254511441e-06, "loss": 22.3087, "step": 3708 }, { "epoch": 0.3442227378190255, "grad_norm": 37.87404251098633, "learning_rate": 7.815694558577531e-06, "loss": 22.3557, "step": 3709 }, { "epoch": 0.3443155452436195, "grad_norm": 35.8440055847168, "learning_rate": 7.81442630680938e-06, "loss": 24.2002, "step": 3710 }, { "epoch": 0.34440835266821346, "grad_norm": 37.13125228881836, "learning_rate": 7.81315778992942e-06, "loss": 23.5987, "step": 3711 }, { "epoch": 0.3445011600928074, "grad_norm": 36.06954574584961, "learning_rate": 7.811889008057147e-06, "loss": 25.4504, "step": 3712 }, { "epoch": 0.3445939675174014, "grad_norm": 39.2651481628418, "learning_rate": 7.810619961312074e-06, "loss": 24.6344, "step": 3713 }, { "epoch": 0.34468677494199534, "grad_norm": 39.53893280029297, "learning_rate": 7.809350649813743e-06, "loss": 24.1488, "step": 3714 }, { "epoch": 0.34477958236658934, "grad_norm": 38.8061408996582, "learning_rate": 7.80808107368172e-06, "loss": 23.4847, "step": 3715 }, { "epoch": 0.3448723897911833, "grad_norm": 37.98600387573242, "learning_rate": 7.806811233035592e-06, "loss": 23.4436, "step": 3716 }, { "epoch": 0.3449651972157773, "grad_norm": 39.747676849365234, "learning_rate": 7.805541127994982e-06, "loss": 23.2982, "step": 3717 }, { "epoch": 0.34505800464037123, "grad_norm": 34.37576675415039, "learning_rate": 7.804270758679524e-06, "loss": 22.9084, "step": 3718 }, { "epoch": 0.3451508120649652, "grad_norm": 38.9544563293457, "learning_rate": 7.803000125208888e-06, "loss": 23.0668, "step": 3719 }, { "epoch": 0.3452436194895592, "grad_norm": 40.11888885498047, "learning_rate": 7.80172922770276e-06, "loss": 23.7047, "step": 3720 }, { "epoch": 0.3453364269141531, "grad_norm": 52.74802780151367, "learning_rate": 7.800458066280861e-06, "loss": 24.3072, "step": 3721 }, { "epoch": 0.3454292343387471, "grad_norm": 38.02614212036133, "learning_rate": 7.799186641062929e-06, "loss": 25.3331, "step": 3722 }, { "epoch": 0.34552204176334106, "grad_norm": 39.5224609375, "learning_rate": 7.797914952168725e-06, "loss": 24.7646, "step": 3723 }, { "epoch": 0.34561484918793506, "grad_norm": 36.023529052734375, "learning_rate": 7.796642999718045e-06, "loss": 23.7528, "step": 3724 }, { "epoch": 0.345707656612529, "grad_norm": 40.40827560424805, "learning_rate": 7.7953707838307e-06, "loss": 24.8222, "step": 3725 }, { "epoch": 0.34580046403712295, "grad_norm": 38.365455627441406, "learning_rate": 7.79409830462653e-06, "loss": 23.6064, "step": 3726 }, { "epoch": 0.34589327146171694, "grad_norm": 41.11859130859375, "learning_rate": 7.7928255622254e-06, "loss": 23.0029, "step": 3727 }, { "epoch": 0.3459860788863109, "grad_norm": 37.477394104003906, "learning_rate": 7.791552556747198e-06, "loss": 22.8303, "step": 3728 }, { "epoch": 0.3460788863109049, "grad_norm": 35.11307144165039, "learning_rate": 7.790279288311838e-06, "loss": 24.1435, "step": 3729 }, { "epoch": 0.34617169373549883, "grad_norm": 36.74949645996094, "learning_rate": 7.78900575703926e-06, "loss": 23.7994, "step": 3730 }, { "epoch": 0.34626450116009283, "grad_norm": 39.85606384277344, "learning_rate": 7.787731963049427e-06, "loss": 22.8839, "step": 3731 }, { "epoch": 0.3463573085846868, "grad_norm": 52.111026763916016, "learning_rate": 7.786457906462329e-06, "loss": 25.5925, "step": 3732 }, { "epoch": 0.3464501160092807, "grad_norm": 38.750022888183594, "learning_rate": 7.785183587397974e-06, "loss": 24.1, "step": 3733 }, { "epoch": 0.3465429234338747, "grad_norm": 39.66249084472656, "learning_rate": 7.783909005976402e-06, "loss": 24.4572, "step": 3734 }, { "epoch": 0.34663573085846866, "grad_norm": 39.594024658203125, "learning_rate": 7.782634162317676e-06, "loss": 25.1599, "step": 3735 }, { "epoch": 0.34672853828306266, "grad_norm": 36.39767837524414, "learning_rate": 7.781359056541883e-06, "loss": 21.9261, "step": 3736 }, { "epoch": 0.3468213457076566, "grad_norm": 36.36308670043945, "learning_rate": 7.780083688769134e-06, "loss": 22.9123, "step": 3737 }, { "epoch": 0.3469141531322506, "grad_norm": 40.25699234008789, "learning_rate": 7.778808059119567e-06, "loss": 24.1362, "step": 3738 }, { "epoch": 0.34700696055684455, "grad_norm": 37.62480545043945, "learning_rate": 7.77753216771334e-06, "loss": 24.7871, "step": 3739 }, { "epoch": 0.3470997679814385, "grad_norm": 35.58245086669922, "learning_rate": 7.776256014670643e-06, "loss": 24.4045, "step": 3740 }, { "epoch": 0.3471925754060325, "grad_norm": 39.87908172607422, "learning_rate": 7.774979600111683e-06, "loss": 24.0268, "step": 3741 }, { "epoch": 0.34728538283062643, "grad_norm": 35.44804000854492, "learning_rate": 7.773702924156697e-06, "loss": 22.8245, "step": 3742 }, { "epoch": 0.34737819025522043, "grad_norm": 39.8094596862793, "learning_rate": 7.772425986925941e-06, "loss": 23.8602, "step": 3743 }, { "epoch": 0.3474709976798144, "grad_norm": 34.920982360839844, "learning_rate": 7.771148788539704e-06, "loss": 23.7892, "step": 3744 }, { "epoch": 0.3475638051044084, "grad_norm": 36.004981994628906, "learning_rate": 7.769871329118293e-06, "loss": 24.3999, "step": 3745 }, { "epoch": 0.3476566125290023, "grad_norm": 35.317413330078125, "learning_rate": 7.768593608782039e-06, "loss": 23.4014, "step": 3746 }, { "epoch": 0.34774941995359626, "grad_norm": 39.93263626098633, "learning_rate": 7.767315627651304e-06, "loss": 22.9826, "step": 3747 }, { "epoch": 0.34784222737819026, "grad_norm": 37.04420471191406, "learning_rate": 7.766037385846468e-06, "loss": 23.0782, "step": 3748 }, { "epoch": 0.3479350348027842, "grad_norm": 36.3195915222168, "learning_rate": 7.76475888348794e-06, "loss": 24.6446, "step": 3749 }, { "epoch": 0.3480278422273782, "grad_norm": 35.410667419433594, "learning_rate": 7.763480120696149e-06, "loss": 23.3158, "step": 3750 }, { "epoch": 0.34812064965197215, "grad_norm": 38.79888916015625, "learning_rate": 7.762201097591553e-06, "loss": 23.4673, "step": 3751 }, { "epoch": 0.34821345707656615, "grad_norm": 45.39243698120117, "learning_rate": 7.760921814294632e-06, "loss": 23.4947, "step": 3752 }, { "epoch": 0.3483062645011601, "grad_norm": 35.256317138671875, "learning_rate": 7.759642270925893e-06, "loss": 24.3639, "step": 3753 }, { "epoch": 0.34839907192575403, "grad_norm": 34.22829818725586, "learning_rate": 7.758362467605864e-06, "loss": 22.2727, "step": 3754 }, { "epoch": 0.34849187935034803, "grad_norm": 38.01703643798828, "learning_rate": 7.757082404455101e-06, "loss": 24.3721, "step": 3755 }, { "epoch": 0.348584686774942, "grad_norm": 40.07099151611328, "learning_rate": 7.755802081594179e-06, "loss": 26.4477, "step": 3756 }, { "epoch": 0.348677494199536, "grad_norm": 55.296871185302734, "learning_rate": 7.754521499143706e-06, "loss": 24.8639, "step": 3757 }, { "epoch": 0.3487703016241299, "grad_norm": 35.89604949951172, "learning_rate": 7.753240657224305e-06, "loss": 22.122, "step": 3758 }, { "epoch": 0.3488631090487239, "grad_norm": 37.80144500732422, "learning_rate": 7.751959555956632e-06, "loss": 23.5458, "step": 3759 }, { "epoch": 0.34895591647331786, "grad_norm": 32.2712287902832, "learning_rate": 7.75067819546136e-06, "loss": 25.1342, "step": 3760 }, { "epoch": 0.3490487238979118, "grad_norm": 37.29047393798828, "learning_rate": 7.749396575859191e-06, "loss": 25.2539, "step": 3761 }, { "epoch": 0.3491415313225058, "grad_norm": 39.094993591308594, "learning_rate": 7.748114697270854e-06, "loss": 23.749, "step": 3762 }, { "epoch": 0.34923433874709975, "grad_norm": 36.68967056274414, "learning_rate": 7.746832559817093e-06, "loss": 22.92, "step": 3763 }, { "epoch": 0.34932714617169375, "grad_norm": 41.879478454589844, "learning_rate": 7.745550163618685e-06, "loss": 26.1425, "step": 3764 }, { "epoch": 0.3494199535962877, "grad_norm": 38.85614013671875, "learning_rate": 7.74426750879643e-06, "loss": 26.1972, "step": 3765 }, { "epoch": 0.3495127610208817, "grad_norm": 34.90163040161133, "learning_rate": 7.742984595471146e-06, "loss": 24.3714, "step": 3766 }, { "epoch": 0.34960556844547563, "grad_norm": 38.68288040161133, "learning_rate": 7.741701423763685e-06, "loss": 26.7218, "step": 3767 }, { "epoch": 0.34969837587006963, "grad_norm": 43.69895553588867, "learning_rate": 7.740417993794918e-06, "loss": 24.2303, "step": 3768 }, { "epoch": 0.3497911832946636, "grad_norm": 37.73736572265625, "learning_rate": 7.739134305685735e-06, "loss": 22.1103, "step": 3769 }, { "epoch": 0.3498839907192575, "grad_norm": 38.06414794921875, "learning_rate": 7.737850359557064e-06, "loss": 23.7385, "step": 3770 }, { "epoch": 0.3499767981438515, "grad_norm": 34.40707778930664, "learning_rate": 7.736566155529847e-06, "loss": 23.3314, "step": 3771 }, { "epoch": 0.35006960556844546, "grad_norm": 38.88332748413086, "learning_rate": 7.735281693725049e-06, "loss": 22.6219, "step": 3772 }, { "epoch": 0.35016241299303946, "grad_norm": 35.608943939208984, "learning_rate": 7.733996974263669e-06, "loss": 22.1858, "step": 3773 }, { "epoch": 0.3502552204176334, "grad_norm": 176.43670654296875, "learning_rate": 7.73271199726672e-06, "loss": 24.3342, "step": 3774 }, { "epoch": 0.3503480278422274, "grad_norm": 38.589576721191406, "learning_rate": 7.731426762855243e-06, "loss": 23.3425, "step": 3775 }, { "epoch": 0.35044083526682135, "grad_norm": 38.455074310302734, "learning_rate": 7.730141271150308e-06, "loss": 22.636, "step": 3776 }, { "epoch": 0.3505336426914153, "grad_norm": 34.91950988769531, "learning_rate": 7.728855522273002e-06, "loss": 22.1981, "step": 3777 }, { "epoch": 0.3506264501160093, "grad_norm": 35.01906204223633, "learning_rate": 7.72756951634444e-06, "loss": 22.8092, "step": 3778 }, { "epoch": 0.35071925754060324, "grad_norm": 38.80899429321289, "learning_rate": 7.726283253485761e-06, "loss": 23.2696, "step": 3779 }, { "epoch": 0.35081206496519723, "grad_norm": 43.27881622314453, "learning_rate": 7.724996733818124e-06, "loss": 24.1259, "step": 3780 }, { "epoch": 0.3509048723897912, "grad_norm": 44.677433013916016, "learning_rate": 7.723709957462721e-06, "loss": 24.6496, "step": 3781 }, { "epoch": 0.3509976798143852, "grad_norm": 36.733951568603516, "learning_rate": 7.722422924540762e-06, "loss": 24.0624, "step": 3782 }, { "epoch": 0.3510904872389791, "grad_norm": 36.26812744140625, "learning_rate": 7.72113563517348e-06, "loss": 24.6999, "step": 3783 }, { "epoch": 0.35118329466357306, "grad_norm": 42.36528778076172, "learning_rate": 7.719848089482138e-06, "loss": 22.8787, "step": 3784 }, { "epoch": 0.35127610208816706, "grad_norm": 38.3178825378418, "learning_rate": 7.718560287588014e-06, "loss": 23.6969, "step": 3785 }, { "epoch": 0.351368909512761, "grad_norm": 39.49159622192383, "learning_rate": 7.71727222961242e-06, "loss": 22.0936, "step": 3786 }, { "epoch": 0.351461716937355, "grad_norm": 38.49611282348633, "learning_rate": 7.715983915676688e-06, "loss": 24.0151, "step": 3787 }, { "epoch": 0.35155452436194895, "grad_norm": 36.16162872314453, "learning_rate": 7.714695345902173e-06, "loss": 23.3475, "step": 3788 }, { "epoch": 0.35164733178654295, "grad_norm": 97.44468688964844, "learning_rate": 7.71340652041025e-06, "loss": 24.4679, "step": 3789 }, { "epoch": 0.3517401392111369, "grad_norm": 36.08712387084961, "learning_rate": 7.712117439322334e-06, "loss": 22.174, "step": 3790 }, { "epoch": 0.35183294663573084, "grad_norm": 37.584312438964844, "learning_rate": 7.710828102759842e-06, "loss": 24.7357, "step": 3791 }, { "epoch": 0.35192575406032484, "grad_norm": 40.91875457763672, "learning_rate": 7.709538510844234e-06, "loss": 22.925, "step": 3792 }, { "epoch": 0.3520185614849188, "grad_norm": 40.49637222290039, "learning_rate": 7.708248663696982e-06, "loss": 23.5705, "step": 3793 }, { "epoch": 0.3521113689095128, "grad_norm": 43.71878433227539, "learning_rate": 7.706958561439586e-06, "loss": 23.3162, "step": 3794 }, { "epoch": 0.3522041763341067, "grad_norm": 36.399818420410156, "learning_rate": 7.705668204193574e-06, "loss": 22.1853, "step": 3795 }, { "epoch": 0.3522969837587007, "grad_norm": 37.615020751953125, "learning_rate": 7.704377592080495e-06, "loss": 23.4532, "step": 3796 }, { "epoch": 0.35238979118329467, "grad_norm": 40.030128479003906, "learning_rate": 7.703086725221914e-06, "loss": 26.5058, "step": 3797 }, { "epoch": 0.3524825986078886, "grad_norm": 33.660369873046875, "learning_rate": 7.701795603739434e-06, "loss": 24.7715, "step": 3798 }, { "epoch": 0.3525754060324826, "grad_norm": 36.52625274658203, "learning_rate": 7.700504227754674e-06, "loss": 24.3632, "step": 3799 }, { "epoch": 0.35266821345707655, "grad_norm": 40.157875061035156, "learning_rate": 7.699212597389277e-06, "loss": 27.6545, "step": 3800 }, { "epoch": 0.35276102088167055, "grad_norm": 36.64374923706055, "learning_rate": 7.697920712764912e-06, "loss": 24.8641, "step": 3801 }, { "epoch": 0.3528538283062645, "grad_norm": 38.17667007446289, "learning_rate": 7.696628574003273e-06, "loss": 25.4042, "step": 3802 }, { "epoch": 0.3529466357308585, "grad_norm": 134.35333251953125, "learning_rate": 7.695336181226074e-06, "loss": 23.8866, "step": 3803 }, { "epoch": 0.35303944315545244, "grad_norm": 42.693119049072266, "learning_rate": 7.694043534555055e-06, "loss": 23.5198, "step": 3804 }, { "epoch": 0.3531322505800464, "grad_norm": 34.331138610839844, "learning_rate": 7.69275063411198e-06, "loss": 21.8772, "step": 3805 }, { "epoch": 0.3532250580046404, "grad_norm": 36.326602935791016, "learning_rate": 7.69145748001864e-06, "loss": 23.2033, "step": 3806 }, { "epoch": 0.3533178654292343, "grad_norm": 33.121891021728516, "learning_rate": 7.690164072396842e-06, "loss": 22.1695, "step": 3807 }, { "epoch": 0.3534106728538283, "grad_norm": 36.15068054199219, "learning_rate": 7.688870411368425e-06, "loss": 21.2475, "step": 3808 }, { "epoch": 0.35350348027842227, "grad_norm": 38.370460510253906, "learning_rate": 7.687576497055249e-06, "loss": 22.5133, "step": 3809 }, { "epoch": 0.35359628770301627, "grad_norm": 39.68361282348633, "learning_rate": 7.686282329579195e-06, "loss": 22.8834, "step": 3810 }, { "epoch": 0.3536890951276102, "grad_norm": 36.54747772216797, "learning_rate": 7.68498790906217e-06, "loss": 24.5352, "step": 3811 }, { "epoch": 0.35378190255220415, "grad_norm": 37.263282775878906, "learning_rate": 7.68369323562611e-06, "loss": 23.45, "step": 3812 }, { "epoch": 0.35387470997679815, "grad_norm": 36.35039138793945, "learning_rate": 7.682398309392966e-06, "loss": 23.7552, "step": 3813 }, { "epoch": 0.3539675174013921, "grad_norm": 40.77560043334961, "learning_rate": 7.681103130484716e-06, "loss": 23.3102, "step": 3814 }, { "epoch": 0.3540603248259861, "grad_norm": 39.31866455078125, "learning_rate": 7.679807699023365e-06, "loss": 24.6454, "step": 3815 }, { "epoch": 0.35415313225058004, "grad_norm": 36.016544342041016, "learning_rate": 7.678512015130936e-06, "loss": 23.3839, "step": 3816 }, { "epoch": 0.35424593967517404, "grad_norm": 35.46403121948242, "learning_rate": 7.677216078929483e-06, "loss": 24.7094, "step": 3817 }, { "epoch": 0.354338747099768, "grad_norm": 37.70671081542969, "learning_rate": 7.67591989054108e-06, "loss": 22.8671, "step": 3818 }, { "epoch": 0.3544315545243619, "grad_norm": 34.28071212768555, "learning_rate": 7.674623450087822e-06, "loss": 23.6285, "step": 3819 }, { "epoch": 0.3545243619489559, "grad_norm": 36.61785888671875, "learning_rate": 7.67332675769183e-06, "loss": 24.1564, "step": 3820 }, { "epoch": 0.35461716937354987, "grad_norm": 40.425148010253906, "learning_rate": 7.67202981347525e-06, "loss": 22.9092, "step": 3821 }, { "epoch": 0.35470997679814387, "grad_norm": 37.773441314697266, "learning_rate": 7.670732617560253e-06, "loss": 22.9513, "step": 3822 }, { "epoch": 0.3548027842227378, "grad_norm": 39.35073471069336, "learning_rate": 7.669435170069029e-06, "loss": 23.0287, "step": 3823 }, { "epoch": 0.3548955916473318, "grad_norm": 37.9561653137207, "learning_rate": 7.668137471123796e-06, "loss": 23.6647, "step": 3824 }, { "epoch": 0.35498839907192575, "grad_norm": 44.857643127441406, "learning_rate": 7.666839520846791e-06, "loss": 22.665, "step": 3825 }, { "epoch": 0.3550812064965197, "grad_norm": 33.538326263427734, "learning_rate": 7.665541319360281e-06, "loss": 23.9499, "step": 3826 }, { "epoch": 0.3551740139211137, "grad_norm": 38.72746658325195, "learning_rate": 7.664242866786554e-06, "loss": 23.8736, "step": 3827 }, { "epoch": 0.35526682134570764, "grad_norm": 37.81048583984375, "learning_rate": 7.662944163247916e-06, "loss": 24.2721, "step": 3828 }, { "epoch": 0.35535962877030164, "grad_norm": 35.086849212646484, "learning_rate": 7.661645208866706e-06, "loss": 22.66, "step": 3829 }, { "epoch": 0.3554524361948956, "grad_norm": 37.62327194213867, "learning_rate": 7.66034600376528e-06, "loss": 22.9368, "step": 3830 }, { "epoch": 0.3555452436194896, "grad_norm": 36.799713134765625, "learning_rate": 7.659046548066021e-06, "loss": 23.6203, "step": 3831 }, { "epoch": 0.3556380510440835, "grad_norm": 36.46220397949219, "learning_rate": 7.657746841891332e-06, "loss": 22.6274, "step": 3832 }, { "epoch": 0.35573085846867747, "grad_norm": 41.38768005371094, "learning_rate": 7.656446885363647e-06, "loss": 23.3269, "step": 3833 }, { "epoch": 0.35582366589327147, "grad_norm": 47.46563720703125, "learning_rate": 7.655146678605414e-06, "loss": 23.2175, "step": 3834 }, { "epoch": 0.3559164733178654, "grad_norm": 35.999900817871094, "learning_rate": 7.65384622173911e-06, "loss": 24.1041, "step": 3835 }, { "epoch": 0.3560092807424594, "grad_norm": 33.620018005371094, "learning_rate": 7.652545514887237e-06, "loss": 22.0087, "step": 3836 }, { "epoch": 0.35610208816705335, "grad_norm": 38.83281707763672, "learning_rate": 7.651244558172314e-06, "loss": 23.6913, "step": 3837 }, { "epoch": 0.35619489559164735, "grad_norm": 41.4980583190918, "learning_rate": 7.649943351716893e-06, "loss": 24.8703, "step": 3838 }, { "epoch": 0.3562877030162413, "grad_norm": 41.310401916503906, "learning_rate": 7.648641895643543e-06, "loss": 25.3829, "step": 3839 }, { "epoch": 0.35638051044083524, "grad_norm": 38.16624069213867, "learning_rate": 7.647340190074854e-06, "loss": 24.1034, "step": 3840 }, { "epoch": 0.35647331786542924, "grad_norm": 39.42036819458008, "learning_rate": 7.646038235133448e-06, "loss": 25.4289, "step": 3841 }, { "epoch": 0.3565661252900232, "grad_norm": 60.615692138671875, "learning_rate": 7.644736030941963e-06, "loss": 23.9608, "step": 3842 }, { "epoch": 0.3566589327146172, "grad_norm": 37.814430236816406, "learning_rate": 7.643433577623064e-06, "loss": 24.4562, "step": 3843 }, { "epoch": 0.3567517401392111, "grad_norm": 36.82878112792969, "learning_rate": 7.64213087529944e-06, "loss": 23.4883, "step": 3844 }, { "epoch": 0.3568445475638051, "grad_norm": 34.80588912963867, "learning_rate": 7.6408279240938e-06, "loss": 21.1279, "step": 3845 }, { "epoch": 0.35693735498839907, "grad_norm": 38.429996490478516, "learning_rate": 7.639524724128881e-06, "loss": 25.2843, "step": 3846 }, { "epoch": 0.357030162412993, "grad_norm": 35.637550354003906, "learning_rate": 7.638221275527439e-06, "loss": 23.888, "step": 3847 }, { "epoch": 0.357122969837587, "grad_norm": 36.009788513183594, "learning_rate": 7.636917578412257e-06, "loss": 22.6817, "step": 3848 }, { "epoch": 0.35721577726218096, "grad_norm": 36.32658767700195, "learning_rate": 7.635613632906141e-06, "loss": 24.6232, "step": 3849 }, { "epoch": 0.35730858468677495, "grad_norm": 37.00507354736328, "learning_rate": 7.634309439131918e-06, "loss": 22.9342, "step": 3850 }, { "epoch": 0.3574013921113689, "grad_norm": 36.27238845825195, "learning_rate": 7.633004997212438e-06, "loss": 25.0039, "step": 3851 }, { "epoch": 0.3574941995359629, "grad_norm": 36.294620513916016, "learning_rate": 7.63170030727058e-06, "loss": 24.7223, "step": 3852 }, { "epoch": 0.35758700696055684, "grad_norm": 38.36330032348633, "learning_rate": 7.63039536942924e-06, "loss": 24.0588, "step": 3853 }, { "epoch": 0.3576798143851508, "grad_norm": 34.509376525878906, "learning_rate": 7.629090183811338e-06, "loss": 22.246, "step": 3854 }, { "epoch": 0.3577726218097448, "grad_norm": 36.45076370239258, "learning_rate": 7.6277847505398226e-06, "loss": 23.7006, "step": 3855 }, { "epoch": 0.35786542923433873, "grad_norm": 34.62930679321289, "learning_rate": 7.626479069737662e-06, "loss": 25.1154, "step": 3856 }, { "epoch": 0.3579582366589327, "grad_norm": 44.69972229003906, "learning_rate": 7.6251731415278465e-06, "loss": 25.7913, "step": 3857 }, { "epoch": 0.35805104408352667, "grad_norm": 34.8245735168457, "learning_rate": 7.623866966033391e-06, "loss": 22.5259, "step": 3858 }, { "epoch": 0.35814385150812067, "grad_norm": 37.68745040893555, "learning_rate": 7.622560543377336e-06, "loss": 23.3246, "step": 3859 }, { "epoch": 0.3582366589327146, "grad_norm": 36.35578536987305, "learning_rate": 7.621253873682739e-06, "loss": 24.2066, "step": 3860 }, { "epoch": 0.35832946635730856, "grad_norm": 34.786705017089844, "learning_rate": 7.61994695707269e-06, "loss": 23.3572, "step": 3861 }, { "epoch": 0.35842227378190256, "grad_norm": 36.480167388916016, "learning_rate": 7.618639793670295e-06, "loss": 23.4644, "step": 3862 }, { "epoch": 0.3585150812064965, "grad_norm": 38.10627746582031, "learning_rate": 7.617332383598686e-06, "loss": 23.7262, "step": 3863 }, { "epoch": 0.3586078886310905, "grad_norm": 34.531131744384766, "learning_rate": 7.616024726981015e-06, "loss": 24.4088, "step": 3864 }, { "epoch": 0.35870069605568444, "grad_norm": 38.580013275146484, "learning_rate": 7.614716823940465e-06, "loss": 26.2227, "step": 3865 }, { "epoch": 0.35879350348027844, "grad_norm": 37.453338623046875, "learning_rate": 7.613408674600232e-06, "loss": 22.8843, "step": 3866 }, { "epoch": 0.3588863109048724, "grad_norm": 36.14254379272461, "learning_rate": 7.612100279083543e-06, "loss": 24.5369, "step": 3867 }, { "epoch": 0.3589791183294664, "grad_norm": 39.180809020996094, "learning_rate": 7.610791637513646e-06, "loss": 21.842, "step": 3868 }, { "epoch": 0.35907192575406033, "grad_norm": 38.71916580200195, "learning_rate": 7.60948275001381e-06, "loss": 24.6086, "step": 3869 }, { "epoch": 0.35916473317865427, "grad_norm": 69.56271362304688, "learning_rate": 7.60817361670733e-06, "loss": 25.3383, "step": 3870 }, { "epoch": 0.35925754060324827, "grad_norm": 38.808509826660156, "learning_rate": 7.606864237717524e-06, "loss": 26.4324, "step": 3871 }, { "epoch": 0.3593503480278422, "grad_norm": 36.4223747253418, "learning_rate": 7.60555461316773e-06, "loss": 22.5382, "step": 3872 }, { "epoch": 0.3594431554524362, "grad_norm": 37.35881805419922, "learning_rate": 7.604244743181313e-06, "loss": 24.1962, "step": 3873 }, { "epoch": 0.35953596287703016, "grad_norm": 35.36634063720703, "learning_rate": 7.602934627881657e-06, "loss": 24.0087, "step": 3874 }, { "epoch": 0.35962877030162416, "grad_norm": 35.78826904296875, "learning_rate": 7.601624267392176e-06, "loss": 22.6172, "step": 3875 }, { "epoch": 0.3597215777262181, "grad_norm": 36.59341049194336, "learning_rate": 7.600313661836298e-06, "loss": 23.0706, "step": 3876 }, { "epoch": 0.35981438515081204, "grad_norm": 63.946041107177734, "learning_rate": 7.599002811337479e-06, "loss": 23.4063, "step": 3877 }, { "epoch": 0.35990719257540604, "grad_norm": 38.87301254272461, "learning_rate": 7.597691716019202e-06, "loss": 25.3148, "step": 3878 }, { "epoch": 0.36, "grad_norm": 39.504573822021484, "learning_rate": 7.596380376004966e-06, "loss": 23.1247, "step": 3879 }, { "epoch": 0.360092807424594, "grad_norm": 38.319210052490234, "learning_rate": 7.595068791418294e-06, "loss": 25.8034, "step": 3880 }, { "epoch": 0.36018561484918793, "grad_norm": 37.30791473388672, "learning_rate": 7.593756962382738e-06, "loss": 26.6628, "step": 3881 }, { "epoch": 0.36027842227378193, "grad_norm": 39.36311340332031, "learning_rate": 7.592444889021866e-06, "loss": 25.4476, "step": 3882 }, { "epoch": 0.3603712296983759, "grad_norm": 36.46391677856445, "learning_rate": 7.591132571459273e-06, "loss": 25.2419, "step": 3883 }, { "epoch": 0.3604640371229698, "grad_norm": 38.402626037597656, "learning_rate": 7.589820009818577e-06, "loss": 23.7535, "step": 3884 }, { "epoch": 0.3605568445475638, "grad_norm": 36.05070877075195, "learning_rate": 7.588507204223416e-06, "loss": 23.0807, "step": 3885 }, { "epoch": 0.36064965197215776, "grad_norm": 34.45843505859375, "learning_rate": 7.587194154797453e-06, "loss": 23.6694, "step": 3886 }, { "epoch": 0.36074245939675176, "grad_norm": 34.99818801879883, "learning_rate": 7.585880861664377e-06, "loss": 25.2826, "step": 3887 }, { "epoch": 0.3608352668213457, "grad_norm": 35.09479904174805, "learning_rate": 7.584567324947893e-06, "loss": 25.1578, "step": 3888 }, { "epoch": 0.3609280742459397, "grad_norm": 37.244903564453125, "learning_rate": 7.583253544771735e-06, "loss": 22.039, "step": 3889 }, { "epoch": 0.36102088167053364, "grad_norm": 39.32594680786133, "learning_rate": 7.581939521259656e-06, "loss": 23.0809, "step": 3890 }, { "epoch": 0.3611136890951276, "grad_norm": 34.55469512939453, "learning_rate": 7.580625254535436e-06, "loss": 23.3499, "step": 3891 }, { "epoch": 0.3612064965197216, "grad_norm": 35.550071716308594, "learning_rate": 7.579310744722876e-06, "loss": 23.1332, "step": 3892 }, { "epoch": 0.36129930394431553, "grad_norm": 34.94321060180664, "learning_rate": 7.577995991945797e-06, "loss": 23.7637, "step": 3893 }, { "epoch": 0.36139211136890953, "grad_norm": 36.67033386230469, "learning_rate": 7.576680996328046e-06, "loss": 24.574, "step": 3894 }, { "epoch": 0.3614849187935035, "grad_norm": 34.57672119140625, "learning_rate": 7.575365757993492e-06, "loss": 22.8706, "step": 3895 }, { "epoch": 0.3615777262180975, "grad_norm": 39.28106689453125, "learning_rate": 7.574050277066027e-06, "loss": 24.2335, "step": 3896 }, { "epoch": 0.3616705336426914, "grad_norm": 37.08916473388672, "learning_rate": 7.572734553669569e-06, "loss": 25.2815, "step": 3897 }, { "epoch": 0.36176334106728536, "grad_norm": 38.97103500366211, "learning_rate": 7.571418587928053e-06, "loss": 24.0723, "step": 3898 }, { "epoch": 0.36185614849187936, "grad_norm": 41.46067810058594, "learning_rate": 7.570102379965438e-06, "loss": 25.0141, "step": 3899 }, { "epoch": 0.3619489559164733, "grad_norm": 38.01504898071289, "learning_rate": 7.568785929905713e-06, "loss": 23.8757, "step": 3900 }, { "epoch": 0.3620417633410673, "grad_norm": 37.81328582763672, "learning_rate": 7.5674692378728785e-06, "loss": 24.3907, "step": 3901 }, { "epoch": 0.36213457076566125, "grad_norm": 35.19700241088867, "learning_rate": 7.566152303990966e-06, "loss": 25.2836, "step": 3902 }, { "epoch": 0.36222737819025524, "grad_norm": 36.70613479614258, "learning_rate": 7.564835128384027e-06, "loss": 22.9191, "step": 3903 }, { "epoch": 0.3623201856148492, "grad_norm": 34.42827606201172, "learning_rate": 7.563517711176138e-06, "loss": 24.4037, "step": 3904 }, { "epoch": 0.36241299303944313, "grad_norm": 35.6619873046875, "learning_rate": 7.562200052491393e-06, "loss": 22.2561, "step": 3905 }, { "epoch": 0.36250580046403713, "grad_norm": 39.1053466796875, "learning_rate": 7.560882152453914e-06, "loss": 24.6815, "step": 3906 }, { "epoch": 0.3625986078886311, "grad_norm": 35.82660675048828, "learning_rate": 7.5595640111878435e-06, "loss": 23.514, "step": 3907 }, { "epoch": 0.3626914153132251, "grad_norm": 38.3070068359375, "learning_rate": 7.558245628817348e-06, "loss": 25.7231, "step": 3908 }, { "epoch": 0.362784222737819, "grad_norm": 34.78242111206055, "learning_rate": 7.556927005466612e-06, "loss": 23.4893, "step": 3909 }, { "epoch": 0.362877030162413, "grad_norm": 34.18699645996094, "learning_rate": 7.555608141259853e-06, "loss": 22.0386, "step": 3910 }, { "epoch": 0.36296983758700696, "grad_norm": 40.89933395385742, "learning_rate": 7.554289036321299e-06, "loss": 24.3398, "step": 3911 }, { "epoch": 0.3630626450116009, "grad_norm": 37.57480239868164, "learning_rate": 7.552969690775209e-06, "loss": 23.7251, "step": 3912 }, { "epoch": 0.3631554524361949, "grad_norm": 48.03770065307617, "learning_rate": 7.551650104745861e-06, "loss": 24.0402, "step": 3913 }, { "epoch": 0.36324825986078885, "grad_norm": 35.209346771240234, "learning_rate": 7.5503302783575575e-06, "loss": 24.4296, "step": 3914 }, { "epoch": 0.36334106728538285, "grad_norm": 36.894840240478516, "learning_rate": 7.549010211734621e-06, "loss": 23.028, "step": 3915 }, { "epoch": 0.3634338747099768, "grad_norm": 39.30384826660156, "learning_rate": 7.547689905001401e-06, "loss": 24.3296, "step": 3916 }, { "epoch": 0.3635266821345708, "grad_norm": 37.06834411621094, "learning_rate": 7.546369358282266e-06, "loss": 24.3395, "step": 3917 }, { "epoch": 0.36361948955916473, "grad_norm": 35.44703674316406, "learning_rate": 7.545048571701606e-06, "loss": 22.1211, "step": 3918 }, { "epoch": 0.3637122969837587, "grad_norm": 66.63021087646484, "learning_rate": 7.543727545383839e-06, "loss": 23.4794, "step": 3919 }, { "epoch": 0.3638051044083527, "grad_norm": 32.73606872558594, "learning_rate": 7.542406279453401e-06, "loss": 23.5868, "step": 3920 }, { "epoch": 0.3638979118329466, "grad_norm": 38.20520782470703, "learning_rate": 7.541084774034752e-06, "loss": 23.5687, "step": 3921 }, { "epoch": 0.3639907192575406, "grad_norm": 40.16505432128906, "learning_rate": 7.539763029252374e-06, "loss": 24.8367, "step": 3922 }, { "epoch": 0.36408352668213456, "grad_norm": 37.04377365112305, "learning_rate": 7.538441045230772e-06, "loss": 26.3669, "step": 3923 }, { "epoch": 0.36417633410672856, "grad_norm": 36.45439147949219, "learning_rate": 7.537118822094474e-06, "loss": 23.9591, "step": 3924 }, { "epoch": 0.3642691415313225, "grad_norm": 35.66072463989258, "learning_rate": 7.53579635996803e-06, "loss": 23.8373, "step": 3925 }, { "epoch": 0.36436194895591645, "grad_norm": 43.686702728271484, "learning_rate": 7.534473658976011e-06, "loss": 26.3609, "step": 3926 }, { "epoch": 0.36445475638051045, "grad_norm": 45.733394622802734, "learning_rate": 7.533150719243015e-06, "loss": 24.5962, "step": 3927 }, { "epoch": 0.3645475638051044, "grad_norm": 35.749366760253906, "learning_rate": 7.5318275408936596e-06, "loss": 23.1565, "step": 3928 }, { "epoch": 0.3646403712296984, "grad_norm": 37.8800048828125, "learning_rate": 7.53050412405258e-06, "loss": 23.6402, "step": 3929 }, { "epoch": 0.36473317865429233, "grad_norm": 35.343605041503906, "learning_rate": 7.529180468844443e-06, "loss": 24.1802, "step": 3930 }, { "epoch": 0.36482598607888633, "grad_norm": 33.92688751220703, "learning_rate": 7.527856575393935e-06, "loss": 23.6955, "step": 3931 }, { "epoch": 0.3649187935034803, "grad_norm": 37.85203170776367, "learning_rate": 7.526532443825758e-06, "loss": 23.5722, "step": 3932 }, { "epoch": 0.3650116009280742, "grad_norm": 39.02396774291992, "learning_rate": 7.525208074264647e-06, "loss": 24.1511, "step": 3933 }, { "epoch": 0.3651044083526682, "grad_norm": 37.15060043334961, "learning_rate": 7.523883466835351e-06, "loss": 22.563, "step": 3934 }, { "epoch": 0.36519721577726216, "grad_norm": 35.11860656738281, "learning_rate": 7.522558621662645e-06, "loss": 24.7304, "step": 3935 }, { "epoch": 0.36529002320185616, "grad_norm": 41.837406158447266, "learning_rate": 7.521233538871329e-06, "loss": 22.7195, "step": 3936 }, { "epoch": 0.3653828306264501, "grad_norm": 35.84320831298828, "learning_rate": 7.5199082185862185e-06, "loss": 22.4777, "step": 3937 }, { "epoch": 0.3654756380510441, "grad_norm": 40.28108596801758, "learning_rate": 7.518582660932158e-06, "loss": 25.2472, "step": 3938 }, { "epoch": 0.36556844547563805, "grad_norm": 35.41853332519531, "learning_rate": 7.517256866034011e-06, "loss": 23.1452, "step": 3939 }, { "epoch": 0.365661252900232, "grad_norm": 40.793357849121094, "learning_rate": 7.5159308340166604e-06, "loss": 23.3794, "step": 3940 }, { "epoch": 0.365754060324826, "grad_norm": 40.034366607666016, "learning_rate": 7.514604565005022e-06, "loss": 21.2707, "step": 3941 }, { "epoch": 0.36584686774941994, "grad_norm": 96.3607177734375, "learning_rate": 7.5132780591240216e-06, "loss": 26.18, "step": 3942 }, { "epoch": 0.36593967517401393, "grad_norm": 37.7065315246582, "learning_rate": 7.511951316498615e-06, "loss": 24.3558, "step": 3943 }, { "epoch": 0.3660324825986079, "grad_norm": 73.312255859375, "learning_rate": 7.510624337253777e-06, "loss": 24.1113, "step": 3944 }, { "epoch": 0.3661252900232019, "grad_norm": 38.944820404052734, "learning_rate": 7.509297121514506e-06, "loss": 24.3001, "step": 3945 }, { "epoch": 0.3662180974477958, "grad_norm": 38.232398986816406, "learning_rate": 7.5079696694058236e-06, "loss": 24.3651, "step": 3946 }, { "epoch": 0.36631090487238976, "grad_norm": 37.07929992675781, "learning_rate": 7.506641981052769e-06, "loss": 22.2828, "step": 3947 }, { "epoch": 0.36640371229698376, "grad_norm": 35.91967010498047, "learning_rate": 7.505314056580411e-06, "loss": 23.7441, "step": 3948 }, { "epoch": 0.3664965197215777, "grad_norm": 35.75128936767578, "learning_rate": 7.503985896113835e-06, "loss": 25.3677, "step": 3949 }, { "epoch": 0.3665893271461717, "grad_norm": 37.994239807128906, "learning_rate": 7.502657499778149e-06, "loss": 24.5262, "step": 3950 }, { "epoch": 0.36668213457076565, "grad_norm": 38.056583404541016, "learning_rate": 7.5013288676984865e-06, "loss": 24.7267, "step": 3951 }, { "epoch": 0.36677494199535965, "grad_norm": 33.512794494628906, "learning_rate": 7.500000000000001e-06, "loss": 25.9657, "step": 3952 }, { "epoch": 0.3668677494199536, "grad_norm": 35.00041580200195, "learning_rate": 7.498670896807867e-06, "loss": 25.0538, "step": 3953 }, { "epoch": 0.36696055684454754, "grad_norm": 34.823184967041016, "learning_rate": 7.497341558247285e-06, "loss": 24.3582, "step": 3954 }, { "epoch": 0.36705336426914154, "grad_norm": 33.50154495239258, "learning_rate": 7.4960119844434744e-06, "loss": 23.0232, "step": 3955 }, { "epoch": 0.3671461716937355, "grad_norm": 42.21277618408203, "learning_rate": 7.4946821755216756e-06, "loss": 26.3648, "step": 3956 }, { "epoch": 0.3672389791183295, "grad_norm": 36.82032775878906, "learning_rate": 7.493352131607157e-06, "loss": 23.5093, "step": 3957 }, { "epoch": 0.3673317865429234, "grad_norm": 38.0164680480957, "learning_rate": 7.492021852825202e-06, "loss": 24.6908, "step": 3958 }, { "epoch": 0.3674245939675174, "grad_norm": 51.41705322265625, "learning_rate": 7.49069133930112e-06, "loss": 25.5334, "step": 3959 }, { "epoch": 0.36751740139211136, "grad_norm": 38.136146545410156, "learning_rate": 7.489360591160245e-06, "loss": 25.7425, "step": 3960 }, { "epoch": 0.36761020881670536, "grad_norm": 35.91650390625, "learning_rate": 7.488029608527927e-06, "loss": 22.9997, "step": 3961 }, { "epoch": 0.3677030162412993, "grad_norm": 37.78654098510742, "learning_rate": 7.486698391529542e-06, "loss": 23.1879, "step": 3962 }, { "epoch": 0.36779582366589325, "grad_norm": 37.16310501098633, "learning_rate": 7.485366940290486e-06, "loss": 24.6201, "step": 3963 }, { "epoch": 0.36788863109048725, "grad_norm": 38.971981048583984, "learning_rate": 7.4840352549361815e-06, "loss": 23.0138, "step": 3964 }, { "epoch": 0.3679814385150812, "grad_norm": 34.408206939697266, "learning_rate": 7.482703335592068e-06, "loss": 22.659, "step": 3965 }, { "epoch": 0.3680742459396752, "grad_norm": 35.78239059448242, "learning_rate": 7.481371182383608e-06, "loss": 23.0048, "step": 3966 }, { "epoch": 0.36816705336426914, "grad_norm": 38.772491455078125, "learning_rate": 7.480038795436289e-06, "loss": 24.2453, "step": 3967 }, { "epoch": 0.36825986078886314, "grad_norm": 36.307960510253906, "learning_rate": 7.478706174875616e-06, "loss": 23.4494, "step": 3968 }, { "epoch": 0.3683526682134571, "grad_norm": 38.548736572265625, "learning_rate": 7.47737332082712e-06, "loss": 23.8991, "step": 3969 }, { "epoch": 0.368445475638051, "grad_norm": 37.13520050048828, "learning_rate": 7.476040233416353e-06, "loss": 24.4418, "step": 3970 }, { "epoch": 0.368538283062645, "grad_norm": 41.816158294677734, "learning_rate": 7.474706912768888e-06, "loss": 23.9365, "step": 3971 }, { "epoch": 0.36863109048723897, "grad_norm": 35.95188522338867, "learning_rate": 7.4733733590103185e-06, "loss": 24.0081, "step": 3972 }, { "epoch": 0.36872389791183297, "grad_norm": 48.133445739746094, "learning_rate": 7.4720395722662646e-06, "loss": 23.8418, "step": 3973 }, { "epoch": 0.3688167053364269, "grad_norm": 40.39079666137695, "learning_rate": 7.470705552662364e-06, "loss": 24.9528, "step": 3974 }, { "epoch": 0.3689095127610209, "grad_norm": 38.742557525634766, "learning_rate": 7.4693713003242794e-06, "loss": 23.1556, "step": 3975 }, { "epoch": 0.36900232018561485, "grad_norm": 35.4532585144043, "learning_rate": 7.468036815377692e-06, "loss": 23.6558, "step": 3976 }, { "epoch": 0.3690951276102088, "grad_norm": 39.52933120727539, "learning_rate": 7.466702097948309e-06, "loss": 23.5539, "step": 3977 }, { "epoch": 0.3691879350348028, "grad_norm": 34.335872650146484, "learning_rate": 7.4653671481618565e-06, "loss": 25.0424, "step": 3978 }, { "epoch": 0.36928074245939674, "grad_norm": 35.626243591308594, "learning_rate": 7.464031966144082e-06, "loss": 23.2545, "step": 3979 }, { "epoch": 0.36937354988399074, "grad_norm": 39.17266845703125, "learning_rate": 7.462696552020758e-06, "loss": 23.4484, "step": 3980 }, { "epoch": 0.3694663573085847, "grad_norm": 43.66853713989258, "learning_rate": 7.461360905917678e-06, "loss": 23.0918, "step": 3981 }, { "epoch": 0.3695591647331787, "grad_norm": 38.41305160522461, "learning_rate": 7.460025027960653e-06, "loss": 24.4944, "step": 3982 }, { "epoch": 0.3696519721577726, "grad_norm": 35.73603820800781, "learning_rate": 7.458688918275523e-06, "loss": 23.3906, "step": 3983 }, { "epoch": 0.36974477958236657, "grad_norm": 34.3641357421875, "learning_rate": 7.457352576988144e-06, "loss": 22.6825, "step": 3984 }, { "epoch": 0.36983758700696057, "grad_norm": 40.8183708190918, "learning_rate": 7.4560160042243954e-06, "loss": 24.5078, "step": 3985 }, { "epoch": 0.3699303944315545, "grad_norm": 38.980247497558594, "learning_rate": 7.454679200110182e-06, "loss": 23.3152, "step": 3986 }, { "epoch": 0.3700232018561485, "grad_norm": 40.21165466308594, "learning_rate": 7.453342164771425e-06, "loss": 23.3395, "step": 3987 }, { "epoch": 0.37011600928074245, "grad_norm": 33.02249526977539, "learning_rate": 7.452004898334069e-06, "loss": 24.1193, "step": 3988 }, { "epoch": 0.37020881670533645, "grad_norm": 37.28691864013672, "learning_rate": 7.450667400924083e-06, "loss": 25.4647, "step": 3989 }, { "epoch": 0.3703016241299304, "grad_norm": 38.436737060546875, "learning_rate": 7.449329672667456e-06, "loss": 24.7885, "step": 3990 }, { "epoch": 0.37039443155452434, "grad_norm": 45.950294494628906, "learning_rate": 7.447991713690195e-06, "loss": 23.7529, "step": 3991 }, { "epoch": 0.37048723897911834, "grad_norm": 41.63507080078125, "learning_rate": 7.446653524118337e-06, "loss": 23.6091, "step": 3992 }, { "epoch": 0.3705800464037123, "grad_norm": 34.157989501953125, "learning_rate": 7.445315104077935e-06, "loss": 21.9386, "step": 3993 }, { "epoch": 0.3706728538283063, "grad_norm": 37.25429916381836, "learning_rate": 7.443976453695061e-06, "loss": 23.6507, "step": 3994 }, { "epoch": 0.3707656612529002, "grad_norm": 39.85495376586914, "learning_rate": 7.4426375730958176e-06, "loss": 25.5719, "step": 3995 }, { "epoch": 0.3708584686774942, "grad_norm": 36.03700637817383, "learning_rate": 7.441298462406321e-06, "loss": 22.2113, "step": 3996 }, { "epoch": 0.37095127610208817, "grad_norm": 39.28831481933594, "learning_rate": 7.4399591217527135e-06, "loss": 24.7124, "step": 3997 }, { "epoch": 0.3710440835266821, "grad_norm": 35.38603973388672, "learning_rate": 7.438619551261154e-06, "loss": 21.5052, "step": 3998 }, { "epoch": 0.3711368909512761, "grad_norm": 39.191558837890625, "learning_rate": 7.4372797510578336e-06, "loss": 23.1046, "step": 3999 }, { "epoch": 0.37122969837587005, "grad_norm": 36.62664794921875, "learning_rate": 7.435939721268951e-06, "loss": 25.597, "step": 4000 }, { "epoch": 0.37132250580046405, "grad_norm": 37.54631042480469, "learning_rate": 7.434599462020736e-06, "loss": 23.3441, "step": 4001 }, { "epoch": 0.371415313225058, "grad_norm": 32.763797760009766, "learning_rate": 7.43325897343944e-06, "loss": 23.1561, "step": 4002 }, { "epoch": 0.371508120649652, "grad_norm": 38.71216583251953, "learning_rate": 7.431918255651332e-06, "loss": 23.085, "step": 4003 }, { "epoch": 0.37160092807424594, "grad_norm": 36.41300582885742, "learning_rate": 7.4305773087827015e-06, "loss": 22.6646, "step": 4004 }, { "epoch": 0.3716937354988399, "grad_norm": 36.78984832763672, "learning_rate": 7.429236132959866e-06, "loss": 25.2648, "step": 4005 }, { "epoch": 0.3717865429234339, "grad_norm": 36.33198928833008, "learning_rate": 7.427894728309159e-06, "loss": 23.3092, "step": 4006 }, { "epoch": 0.3718793503480278, "grad_norm": 35.493629455566406, "learning_rate": 7.4265530949569385e-06, "loss": 23.226, "step": 4007 }, { "epoch": 0.3719721577726218, "grad_norm": 36.355403900146484, "learning_rate": 7.4252112330295835e-06, "loss": 23.7791, "step": 4008 }, { "epoch": 0.37206496519721577, "grad_norm": 34.43935775756836, "learning_rate": 7.423869142653493e-06, "loss": 22.2097, "step": 4009 }, { "epoch": 0.37215777262180977, "grad_norm": 43.53211212158203, "learning_rate": 7.422526823955085e-06, "loss": 23.5914, "step": 4010 }, { "epoch": 0.3722505800464037, "grad_norm": 50.88712692260742, "learning_rate": 7.421184277060809e-06, "loss": 24.1829, "step": 4011 }, { "epoch": 0.37234338747099766, "grad_norm": 36.67601776123047, "learning_rate": 7.419841502097127e-06, "loss": 22.6946, "step": 4012 }, { "epoch": 0.37243619489559165, "grad_norm": 36.140621185302734, "learning_rate": 7.418498499190521e-06, "loss": 22.3487, "step": 4013 }, { "epoch": 0.3725290023201856, "grad_norm": 39.76054382324219, "learning_rate": 7.417155268467505e-06, "loss": 24.3405, "step": 4014 }, { "epoch": 0.3726218097447796, "grad_norm": 36.27997589111328, "learning_rate": 7.415811810054605e-06, "loss": 23.0082, "step": 4015 }, { "epoch": 0.37271461716937354, "grad_norm": 35.04350662231445, "learning_rate": 7.414468124078369e-06, "loss": 23.0303, "step": 4016 }, { "epoch": 0.37280742459396754, "grad_norm": 44.40803909301758, "learning_rate": 7.413124210665372e-06, "loss": 24.7059, "step": 4017 }, { "epoch": 0.3729002320185615, "grad_norm": 45.338478088378906, "learning_rate": 7.411780069942207e-06, "loss": 23.7559, "step": 4018 }, { "epoch": 0.3729930394431554, "grad_norm": 38.58553695678711, "learning_rate": 7.410435702035486e-06, "loss": 23.525, "step": 4019 }, { "epoch": 0.3730858468677494, "grad_norm": 55.56517028808594, "learning_rate": 7.409091107071849e-06, "loss": 25.7952, "step": 4020 }, { "epoch": 0.37317865429234337, "grad_norm": 35.22559356689453, "learning_rate": 7.407746285177949e-06, "loss": 21.2444, "step": 4021 }, { "epoch": 0.37327146171693737, "grad_norm": 37.00148391723633, "learning_rate": 7.406401236480468e-06, "loss": 23.1583, "step": 4022 }, { "epoch": 0.3733642691415313, "grad_norm": 38.15827941894531, "learning_rate": 7.4050559611061056e-06, "loss": 22.294, "step": 4023 }, { "epoch": 0.3734570765661253, "grad_norm": 35.44832229614258, "learning_rate": 7.403710459181583e-06, "loss": 22.9098, "step": 4024 }, { "epoch": 0.37354988399071926, "grad_norm": 36.96829605102539, "learning_rate": 7.402364730833644e-06, "loss": 24.0352, "step": 4025 }, { "epoch": 0.3736426914153132, "grad_norm": 34.7532844543457, "learning_rate": 7.4010187761890504e-06, "loss": 23.5003, "step": 4026 }, { "epoch": 0.3737354988399072, "grad_norm": 39.92850875854492, "learning_rate": 7.39967259537459e-06, "loss": 25.5048, "step": 4027 }, { "epoch": 0.37382830626450114, "grad_norm": 37.92168426513672, "learning_rate": 7.398326188517068e-06, "loss": 23.448, "step": 4028 }, { "epoch": 0.37392111368909514, "grad_norm": 41.615352630615234, "learning_rate": 7.396979555743315e-06, "loss": 23.9371, "step": 4029 }, { "epoch": 0.3740139211136891, "grad_norm": 36.97105026245117, "learning_rate": 7.395632697180177e-06, "loss": 24.3777, "step": 4030 }, { "epoch": 0.3741067285382831, "grad_norm": 36.51676559448242, "learning_rate": 7.394285612954527e-06, "loss": 24.171, "step": 4031 }, { "epoch": 0.37419953596287703, "grad_norm": 38.81295394897461, "learning_rate": 7.392938303193257e-06, "loss": 24.4441, "step": 4032 }, { "epoch": 0.37429234338747097, "grad_norm": 37.60740661621094, "learning_rate": 7.391590768023278e-06, "loss": 24.0884, "step": 4033 }, { "epoch": 0.37438515081206497, "grad_norm": 35.935142517089844, "learning_rate": 7.390243007571527e-06, "loss": 23.1018, "step": 4034 }, { "epoch": 0.3744779582366589, "grad_norm": 38.36695098876953, "learning_rate": 7.388895021964957e-06, "loss": 23.387, "step": 4035 }, { "epoch": 0.3745707656612529, "grad_norm": 37.73149108886719, "learning_rate": 7.3875468113305475e-06, "loss": 23.5425, "step": 4036 }, { "epoch": 0.37466357308584686, "grad_norm": 38.70344543457031, "learning_rate": 7.386198375795295e-06, "loss": 24.3557, "step": 4037 }, { "epoch": 0.37475638051044086, "grad_norm": 36.71825408935547, "learning_rate": 7.384849715486217e-06, "loss": 24.326, "step": 4038 }, { "epoch": 0.3748491879350348, "grad_norm": 36.84855270385742, "learning_rate": 7.383500830530358e-06, "loss": 23.948, "step": 4039 }, { "epoch": 0.37494199535962874, "grad_norm": 37.20037841796875, "learning_rate": 7.382151721054777e-06, "loss": 24.9381, "step": 4040 }, { "epoch": 0.37503480278422274, "grad_norm": 32.349117279052734, "learning_rate": 7.380802387186557e-06, "loss": 23.6292, "step": 4041 }, { "epoch": 0.3751276102088167, "grad_norm": 42.080162048339844, "learning_rate": 7.379452829052801e-06, "loss": 22.6721, "step": 4042 }, { "epoch": 0.3752204176334107, "grad_norm": 39.49871063232422, "learning_rate": 7.378103046780636e-06, "loss": 24.7795, "step": 4043 }, { "epoch": 0.37531322505800463, "grad_norm": 37.926795959472656, "learning_rate": 7.376753040497207e-06, "loss": 23.358, "step": 4044 }, { "epoch": 0.37540603248259863, "grad_norm": 55.15793991088867, "learning_rate": 7.375402810329679e-06, "loss": 21.7736, "step": 4045 }, { "epoch": 0.37549883990719257, "grad_norm": 37.956932067871094, "learning_rate": 7.374052356405243e-06, "loss": 24.0777, "step": 4046 }, { "epoch": 0.3755916473317865, "grad_norm": 40.10184097290039, "learning_rate": 7.37270167885111e-06, "loss": 22.7793, "step": 4047 }, { "epoch": 0.3756844547563805, "grad_norm": 40.30978012084961, "learning_rate": 7.371350777794505e-06, "loss": 22.9241, "step": 4048 }, { "epoch": 0.37577726218097446, "grad_norm": 36.79427719116211, "learning_rate": 7.3699996533626835e-06, "loss": 23.1741, "step": 4049 }, { "epoch": 0.37587006960556846, "grad_norm": 36.40678405761719, "learning_rate": 7.368648305682917e-06, "loss": 23.6356, "step": 4050 }, { "epoch": 0.3759628770301624, "grad_norm": 36.829124450683594, "learning_rate": 7.3672967348825e-06, "loss": 23.3327, "step": 4051 }, { "epoch": 0.3760556844547564, "grad_norm": 37.44097137451172, "learning_rate": 7.365944941088745e-06, "loss": 23.2082, "step": 4052 }, { "epoch": 0.37614849187935034, "grad_norm": 38.11848449707031, "learning_rate": 7.364592924428988e-06, "loss": 24.1876, "step": 4053 }, { "epoch": 0.37624129930394434, "grad_norm": 36.31779098510742, "learning_rate": 7.363240685030587e-06, "loss": 22.8718, "step": 4054 }, { "epoch": 0.3763341067285383, "grad_norm": 35.87519073486328, "learning_rate": 7.361888223020917e-06, "loss": 23.6954, "step": 4055 }, { "epoch": 0.37642691415313223, "grad_norm": 37.16316223144531, "learning_rate": 7.3605355385273805e-06, "loss": 25.8083, "step": 4056 }, { "epoch": 0.37651972157772623, "grad_norm": 37.007808685302734, "learning_rate": 7.3591826316773926e-06, "loss": 24.105, "step": 4057 }, { "epoch": 0.3766125290023202, "grad_norm": 42.99984359741211, "learning_rate": 7.3578295025983955e-06, "loss": 24.4446, "step": 4058 }, { "epoch": 0.3767053364269142, "grad_norm": 40.08490753173828, "learning_rate": 7.356476151417852e-06, "loss": 22.4617, "step": 4059 }, { "epoch": 0.3767981438515081, "grad_norm": 36.6173210144043, "learning_rate": 7.355122578263243e-06, "loss": 25.3221, "step": 4060 }, { "epoch": 0.3768909512761021, "grad_norm": 32.8118896484375, "learning_rate": 7.35376878326207e-06, "loss": 23.2557, "step": 4061 }, { "epoch": 0.37698375870069606, "grad_norm": 35.11009979248047, "learning_rate": 7.3524147665418585e-06, "loss": 22.3197, "step": 4062 }, { "epoch": 0.37707656612529, "grad_norm": 41.44590759277344, "learning_rate": 7.351060528230154e-06, "loss": 24.6388, "step": 4063 }, { "epoch": 0.377169373549884, "grad_norm": 43.41006088256836, "learning_rate": 7.349706068454523e-06, "loss": 25.2636, "step": 4064 }, { "epoch": 0.37726218097447795, "grad_norm": 44.3956184387207, "learning_rate": 7.348351387342552e-06, "loss": 25.1185, "step": 4065 }, { "epoch": 0.37735498839907194, "grad_norm": 37.72819900512695, "learning_rate": 7.346996485021844e-06, "loss": 23.1291, "step": 4066 }, { "epoch": 0.3774477958236659, "grad_norm": 38.68095779418945, "learning_rate": 7.345641361620033e-06, "loss": 22.5577, "step": 4067 }, { "epoch": 0.3775406032482599, "grad_norm": 34.94629669189453, "learning_rate": 7.344286017264765e-06, "loss": 22.911, "step": 4068 }, { "epoch": 0.37763341067285383, "grad_norm": 38.12063217163086, "learning_rate": 7.342930452083713e-06, "loss": 23.579, "step": 4069 }, { "epoch": 0.3777262180974478, "grad_norm": 37.33273696899414, "learning_rate": 7.3415746662045664e-06, "loss": 23.8553, "step": 4070 }, { "epoch": 0.3778190255220418, "grad_norm": 65.21435546875, "learning_rate": 7.340218659755034e-06, "loss": 21.6234, "step": 4071 }, { "epoch": 0.3779118329466357, "grad_norm": 35.3475456237793, "learning_rate": 7.338862432862851e-06, "loss": 23.0189, "step": 4072 }, { "epoch": 0.3780046403712297, "grad_norm": 39.569854736328125, "learning_rate": 7.337505985655771e-06, "loss": 21.9857, "step": 4073 }, { "epoch": 0.37809744779582366, "grad_norm": 42.63257598876953, "learning_rate": 7.336149318261565e-06, "loss": 23.3004, "step": 4074 }, { "epoch": 0.37819025522041766, "grad_norm": 39.576995849609375, "learning_rate": 7.334792430808031e-06, "loss": 23.52, "step": 4075 }, { "epoch": 0.3782830626450116, "grad_norm": 34.563270568847656, "learning_rate": 7.333435323422984e-06, "loss": 21.9851, "step": 4076 }, { "epoch": 0.37837587006960555, "grad_norm": 40.11143112182617, "learning_rate": 7.332077996234256e-06, "loss": 24.6297, "step": 4077 }, { "epoch": 0.37846867749419955, "grad_norm": 38.244529724121094, "learning_rate": 7.330720449369708e-06, "loss": 23.8798, "step": 4078 }, { "epoch": 0.3785614849187935, "grad_norm": 38.27494430541992, "learning_rate": 7.329362682957217e-06, "loss": 25.0526, "step": 4079 }, { "epoch": 0.3786542923433875, "grad_norm": 38.26234436035156, "learning_rate": 7.3280046971246786e-06, "loss": 24.5075, "step": 4080 }, { "epoch": 0.37874709976798143, "grad_norm": 38.061954498291016, "learning_rate": 7.326646492000014e-06, "loss": 25.6875, "step": 4081 }, { "epoch": 0.37883990719257543, "grad_norm": 41.76602554321289, "learning_rate": 7.325288067711164e-06, "loss": 26.9367, "step": 4082 }, { "epoch": 0.3789327146171694, "grad_norm": 37.322750091552734, "learning_rate": 7.323929424386084e-06, "loss": 23.2986, "step": 4083 }, { "epoch": 0.3790255220417633, "grad_norm": 34.489768981933594, "learning_rate": 7.322570562152758e-06, "loss": 23.6942, "step": 4084 }, { "epoch": 0.3791183294663573, "grad_norm": 43.649169921875, "learning_rate": 7.321211481139188e-06, "loss": 24.005, "step": 4085 }, { "epoch": 0.37921113689095126, "grad_norm": 35.33794403076172, "learning_rate": 7.319852181473393e-06, "loss": 23.1109, "step": 4086 }, { "epoch": 0.37930394431554526, "grad_norm": 42.36817932128906, "learning_rate": 7.3184926632834186e-06, "loss": 27.4962, "step": 4087 }, { "epoch": 0.3793967517401392, "grad_norm": 35.79193115234375, "learning_rate": 7.317132926697326e-06, "loss": 23.2982, "step": 4088 }, { "epoch": 0.3794895591647332, "grad_norm": 35.41659927368164, "learning_rate": 7.3157729718432e-06, "loss": 24.1678, "step": 4089 }, { "epoch": 0.37958236658932715, "grad_norm": 37.8217658996582, "learning_rate": 7.3144127988491444e-06, "loss": 23.1604, "step": 4090 }, { "epoch": 0.3796751740139211, "grad_norm": 37.23481750488281, "learning_rate": 7.313052407843284e-06, "loss": 25.5881, "step": 4091 }, { "epoch": 0.3797679814385151, "grad_norm": 35.41973876953125, "learning_rate": 7.311691798953765e-06, "loss": 24.5211, "step": 4092 }, { "epoch": 0.37986078886310903, "grad_norm": 35.23521041870117, "learning_rate": 7.310330972308752e-06, "loss": 21.7124, "step": 4093 }, { "epoch": 0.37995359628770303, "grad_norm": 36.67753601074219, "learning_rate": 7.308969928036433e-06, "loss": 24.4205, "step": 4094 }, { "epoch": 0.380046403712297, "grad_norm": 37.48297882080078, "learning_rate": 7.307608666265014e-06, "loss": 22.7269, "step": 4095 }, { "epoch": 0.380139211136891, "grad_norm": 35.617149353027344, "learning_rate": 7.3062471871227205e-06, "loss": 24.2592, "step": 4096 }, { "epoch": 0.3802320185614849, "grad_norm": 37.2115592956543, "learning_rate": 7.304885490737803e-06, "loss": 24.7674, "step": 4097 }, { "epoch": 0.38032482598607886, "grad_norm": 37.13459014892578, "learning_rate": 7.3035235772385295e-06, "loss": 26.6721, "step": 4098 }, { "epoch": 0.38041763341067286, "grad_norm": 36.567073822021484, "learning_rate": 7.302161446753189e-06, "loss": 25.626, "step": 4099 }, { "epoch": 0.3805104408352668, "grad_norm": 38.384212493896484, "learning_rate": 7.300799099410088e-06, "loss": 25.1528, "step": 4100 }, { "epoch": 0.3806032482598608, "grad_norm": 37.23786926269531, "learning_rate": 7.2994365353375604e-06, "loss": 23.3498, "step": 4101 }, { "epoch": 0.38069605568445475, "grad_norm": 35.76259994506836, "learning_rate": 7.2980737546639535e-06, "loss": 24.0099, "step": 4102 }, { "epoch": 0.38078886310904875, "grad_norm": 38.71157455444336, "learning_rate": 7.296710757517639e-06, "loss": 23.726, "step": 4103 }, { "epoch": 0.3808816705336427, "grad_norm": 35.699378967285156, "learning_rate": 7.295347544027006e-06, "loss": 23.3154, "step": 4104 }, { "epoch": 0.38097447795823663, "grad_norm": 37.67436599731445, "learning_rate": 7.293984114320467e-06, "loss": 24.0489, "step": 4105 }, { "epoch": 0.38106728538283063, "grad_norm": 40.04755401611328, "learning_rate": 7.292620468526454e-06, "loss": 25.2305, "step": 4106 }, { "epoch": 0.3811600928074246, "grad_norm": 37.682613372802734, "learning_rate": 7.291256606773419e-06, "loss": 25.0189, "step": 4107 }, { "epoch": 0.3812529002320186, "grad_norm": 37.482215881347656, "learning_rate": 7.2898925291898326e-06, "loss": 23.1747, "step": 4108 }, { "epoch": 0.3813457076566125, "grad_norm": 37.651824951171875, "learning_rate": 7.2885282359041905e-06, "loss": 24.7996, "step": 4109 }, { "epoch": 0.3814385150812065, "grad_norm": 35.75311279296875, "learning_rate": 7.287163727045002e-06, "loss": 24.0878, "step": 4110 }, { "epoch": 0.38153132250580046, "grad_norm": 39.42632293701172, "learning_rate": 7.285799002740803e-06, "loss": 24.514, "step": 4111 }, { "epoch": 0.3816241299303944, "grad_norm": 41.57173538208008, "learning_rate": 7.284434063120147e-06, "loss": 23.8454, "step": 4112 }, { "epoch": 0.3817169373549884, "grad_norm": 37.15515899658203, "learning_rate": 7.283068908311606e-06, "loss": 23.0648, "step": 4113 }, { "epoch": 0.38180974477958235, "grad_norm": 41.38343811035156, "learning_rate": 7.281703538443775e-06, "loss": 23.7702, "step": 4114 }, { "epoch": 0.38190255220417635, "grad_norm": 41.78950881958008, "learning_rate": 7.280337953645269e-06, "loss": 23.1019, "step": 4115 }, { "epoch": 0.3819953596287703, "grad_norm": 54.0265998840332, "learning_rate": 7.278972154044722e-06, "loss": 22.9843, "step": 4116 }, { "epoch": 0.3820881670533643, "grad_norm": 39.39435577392578, "learning_rate": 7.27760613977079e-06, "loss": 22.3403, "step": 4117 }, { "epoch": 0.38218097447795824, "grad_norm": 40.79094314575195, "learning_rate": 7.2762399109521455e-06, "loss": 24.3762, "step": 4118 }, { "epoch": 0.3822737819025522, "grad_norm": 38.262630462646484, "learning_rate": 7.274873467717487e-06, "loss": 24.6248, "step": 4119 }, { "epoch": 0.3823665893271462, "grad_norm": 36.72293472290039, "learning_rate": 7.273506810195527e-06, "loss": 25.0005, "step": 4120 }, { "epoch": 0.3824593967517401, "grad_norm": 38.78565216064453, "learning_rate": 7.2721399385150045e-06, "loss": 23.5751, "step": 4121 }, { "epoch": 0.3825522041763341, "grad_norm": 37.290313720703125, "learning_rate": 7.270772852804672e-06, "loss": 25.2655, "step": 4122 }, { "epoch": 0.38264501160092806, "grad_norm": 35.98298645019531, "learning_rate": 7.269405553193307e-06, "loss": 23.7963, "step": 4123 }, { "epoch": 0.38273781902552206, "grad_norm": 56.87953186035156, "learning_rate": 7.268038039809706e-06, "loss": 24.2231, "step": 4124 }, { "epoch": 0.382830626450116, "grad_norm": 38.4175910949707, "learning_rate": 7.266670312782684e-06, "loss": 24.4366, "step": 4125 }, { "epoch": 0.38292343387470995, "grad_norm": 36.7321662902832, "learning_rate": 7.2653023722410785e-06, "loss": 24.4609, "step": 4126 }, { "epoch": 0.38301624129930395, "grad_norm": 34.08354949951172, "learning_rate": 7.263934218313747e-06, "loss": 23.3385, "step": 4127 }, { "epoch": 0.3831090487238979, "grad_norm": 68.57706451416016, "learning_rate": 7.2625658511295635e-06, "loss": 24.3826, "step": 4128 }, { "epoch": 0.3832018561484919, "grad_norm": 36.044647216796875, "learning_rate": 7.261197270817427e-06, "loss": 23.1647, "step": 4129 }, { "epoch": 0.38329466357308584, "grad_norm": 37.447547912597656, "learning_rate": 7.259828477506253e-06, "loss": 25.0853, "step": 4130 }, { "epoch": 0.38338747099767984, "grad_norm": 36.003021240234375, "learning_rate": 7.258459471324978e-06, "loss": 24.192, "step": 4131 }, { "epoch": 0.3834802784222738, "grad_norm": 40.58171463012695, "learning_rate": 7.257090252402561e-06, "loss": 24.719, "step": 4132 }, { "epoch": 0.3835730858468677, "grad_norm": 36.07992172241211, "learning_rate": 7.255720820867977e-06, "loss": 23.0161, "step": 4133 }, { "epoch": 0.3836658932714617, "grad_norm": 36.38002395629883, "learning_rate": 7.254351176850223e-06, "loss": 22.8143, "step": 4134 }, { "epoch": 0.38375870069605567, "grad_norm": 34.11980056762695, "learning_rate": 7.252981320478318e-06, "loss": 24.5066, "step": 4135 }, { "epoch": 0.38385150812064966, "grad_norm": 35.75676345825195, "learning_rate": 7.251611251881297e-06, "loss": 22.4712, "step": 4136 }, { "epoch": 0.3839443155452436, "grad_norm": 36.0433235168457, "learning_rate": 7.250240971188215e-06, "loss": 24.0317, "step": 4137 }, { "epoch": 0.3840371229698376, "grad_norm": 35.39084243774414, "learning_rate": 7.248870478528154e-06, "loss": 23.5282, "step": 4138 }, { "epoch": 0.38412993039443155, "grad_norm": 38.246063232421875, "learning_rate": 7.2474997740302085e-06, "loss": 22.489, "step": 4139 }, { "epoch": 0.3842227378190255, "grad_norm": 35.85087966918945, "learning_rate": 7.2461288578234955e-06, "loss": 25.8572, "step": 4140 }, { "epoch": 0.3843155452436195, "grad_norm": 35.104976654052734, "learning_rate": 7.2447577300371494e-06, "loss": 24.0041, "step": 4141 }, { "epoch": 0.38440835266821344, "grad_norm": 39.590824127197266, "learning_rate": 7.243386390800331e-06, "loss": 25.1224, "step": 4142 }, { "epoch": 0.38450116009280744, "grad_norm": 34.6641845703125, "learning_rate": 7.242014840242215e-06, "loss": 22.6901, "step": 4143 }, { "epoch": 0.3845939675174014, "grad_norm": 36.01663589477539, "learning_rate": 7.240643078491998e-06, "loss": 22.3351, "step": 4144 }, { "epoch": 0.3846867749419954, "grad_norm": 36.211673736572266, "learning_rate": 7.239271105678895e-06, "loss": 22.5553, "step": 4145 }, { "epoch": 0.3847795823665893, "grad_norm": 42.366004943847656, "learning_rate": 7.2378989219321475e-06, "loss": 24.2324, "step": 4146 }, { "epoch": 0.38487238979118327, "grad_norm": 38.0569953918457, "learning_rate": 7.236526527381005e-06, "loss": 24.6617, "step": 4147 }, { "epoch": 0.38496519721577727, "grad_norm": 34.27788543701172, "learning_rate": 7.235153922154747e-06, "loss": 22.0015, "step": 4148 }, { "epoch": 0.3850580046403712, "grad_norm": 38.155399322509766, "learning_rate": 7.23378110638267e-06, "loss": 22.9242, "step": 4149 }, { "epoch": 0.3851508120649652, "grad_norm": 35.60148620605469, "learning_rate": 7.232408080194089e-06, "loss": 24.4868, "step": 4150 }, { "epoch": 0.38524361948955915, "grad_norm": 40.91040802001953, "learning_rate": 7.231034843718338e-06, "loss": 22.7311, "step": 4151 }, { "epoch": 0.38533642691415315, "grad_norm": 37.89082717895508, "learning_rate": 7.229661397084775e-06, "loss": 22.2232, "step": 4152 }, { "epoch": 0.3854292343387471, "grad_norm": 38.60063552856445, "learning_rate": 7.228287740422774e-06, "loss": 24.8811, "step": 4153 }, { "epoch": 0.3855220417633411, "grad_norm": 38.082584381103516, "learning_rate": 7.2269138738617315e-06, "loss": 25.0239, "step": 4154 }, { "epoch": 0.38561484918793504, "grad_norm": 36.659889221191406, "learning_rate": 7.225539797531059e-06, "loss": 23.5791, "step": 4155 }, { "epoch": 0.385707656612529, "grad_norm": 39.300018310546875, "learning_rate": 7.224165511560194e-06, "loss": 22.8302, "step": 4156 }, { "epoch": 0.385800464037123, "grad_norm": 37.3175163269043, "learning_rate": 7.222791016078588e-06, "loss": 22.1383, "step": 4157 }, { "epoch": 0.3858932714617169, "grad_norm": 35.894264221191406, "learning_rate": 7.221416311215718e-06, "loss": 23.5996, "step": 4158 }, { "epoch": 0.3859860788863109, "grad_norm": 39.910362243652344, "learning_rate": 7.220041397101075e-06, "loss": 23.7764, "step": 4159 }, { "epoch": 0.38607888631090487, "grad_norm": 39.30271911621094, "learning_rate": 7.2186662738641735e-06, "loss": 24.2889, "step": 4160 }, { "epoch": 0.38617169373549887, "grad_norm": 38.97779846191406, "learning_rate": 7.2172909416345475e-06, "loss": 24.7106, "step": 4161 }, { "epoch": 0.3862645011600928, "grad_norm": 37.69858932495117, "learning_rate": 7.2159154005417485e-06, "loss": 24.5045, "step": 4162 }, { "epoch": 0.38635730858468675, "grad_norm": 35.91106033325195, "learning_rate": 7.2145396507153496e-06, "loss": 24.2715, "step": 4163 }, { "epoch": 0.38645011600928075, "grad_norm": 35.73625183105469, "learning_rate": 7.213163692284943e-06, "loss": 24.2917, "step": 4164 }, { "epoch": 0.3865429234338747, "grad_norm": 36.07172775268555, "learning_rate": 7.211787525380142e-06, "loss": 24.6769, "step": 4165 }, { "epoch": 0.3866357308584687, "grad_norm": 35.51312255859375, "learning_rate": 7.210411150130574e-06, "loss": 22.4047, "step": 4166 }, { "epoch": 0.38672853828306264, "grad_norm": 39.01241683959961, "learning_rate": 7.209034566665893e-06, "loss": 26.6425, "step": 4167 }, { "epoch": 0.38682134570765664, "grad_norm": 38.319278717041016, "learning_rate": 7.207657775115769e-06, "loss": 24.8108, "step": 4168 }, { "epoch": 0.3869141531322506, "grad_norm": 38.61237335205078, "learning_rate": 7.206280775609893e-06, "loss": 24.1691, "step": 4169 }, { "epoch": 0.3870069605568445, "grad_norm": 36.72317123413086, "learning_rate": 7.204903568277975e-06, "loss": 24.6695, "step": 4170 }, { "epoch": 0.3870997679814385, "grad_norm": 36.16539764404297, "learning_rate": 7.203526153249742e-06, "loss": 24.1449, "step": 4171 }, { "epoch": 0.38719257540603247, "grad_norm": 48.51559829711914, "learning_rate": 7.202148530654944e-06, "loss": 25.5445, "step": 4172 }, { "epoch": 0.38728538283062647, "grad_norm": 35.32857131958008, "learning_rate": 7.200770700623352e-06, "loss": 23.5202, "step": 4173 }, { "epoch": 0.3873781902552204, "grad_norm": 37.19990539550781, "learning_rate": 7.199392663284751e-06, "loss": 24.9, "step": 4174 }, { "epoch": 0.3874709976798144, "grad_norm": 40.85108184814453, "learning_rate": 7.198014418768951e-06, "loss": 24.0656, "step": 4175 }, { "epoch": 0.38756380510440835, "grad_norm": 43.78236770629883, "learning_rate": 7.196635967205776e-06, "loss": 23.8767, "step": 4176 }, { "epoch": 0.3876566125290023, "grad_norm": 36.54494094848633, "learning_rate": 7.195257308725076e-06, "loss": 23.3406, "step": 4177 }, { "epoch": 0.3877494199535963, "grad_norm": 34.97544479370117, "learning_rate": 7.1938784434567165e-06, "loss": 23.7562, "step": 4178 }, { "epoch": 0.38784222737819024, "grad_norm": 39.29024124145508, "learning_rate": 7.192499371530581e-06, "loss": 25.9099, "step": 4179 }, { "epoch": 0.38793503480278424, "grad_norm": 36.71049118041992, "learning_rate": 7.1911200930765764e-06, "loss": 23.4683, "step": 4180 }, { "epoch": 0.3880278422273782, "grad_norm": 34.589454650878906, "learning_rate": 7.189740608224628e-06, "loss": 21.9646, "step": 4181 }, { "epoch": 0.3881206496519722, "grad_norm": 40.981651306152344, "learning_rate": 7.188360917104676e-06, "loss": 25.6226, "step": 4182 }, { "epoch": 0.3882134570765661, "grad_norm": 32.7120475769043, "learning_rate": 7.186981019846688e-06, "loss": 22.0772, "step": 4183 }, { "epoch": 0.38830626450116007, "grad_norm": 37.96993637084961, "learning_rate": 7.185600916580646e-06, "loss": 23.8444, "step": 4184 }, { "epoch": 0.38839907192575407, "grad_norm": 36.877201080322266, "learning_rate": 7.184220607436549e-06, "loss": 23.4663, "step": 4185 }, { "epoch": 0.388491879350348, "grad_norm": 39.057437896728516, "learning_rate": 7.182840092544423e-06, "loss": 23.1637, "step": 4186 }, { "epoch": 0.388584686774942, "grad_norm": 37.49863052368164, "learning_rate": 7.181459372034306e-06, "loss": 24.1397, "step": 4187 }, { "epoch": 0.38867749419953596, "grad_norm": 37.13903045654297, "learning_rate": 7.180078446036259e-06, "loss": 24.1981, "step": 4188 }, { "epoch": 0.38877030162412995, "grad_norm": 38.80780029296875, "learning_rate": 7.178697314680363e-06, "loss": 23.3124, "step": 4189 }, { "epoch": 0.3888631090487239, "grad_norm": 43.93647384643555, "learning_rate": 7.177315978096716e-06, "loss": 23.6533, "step": 4190 }, { "epoch": 0.38895591647331784, "grad_norm": 39.1765022277832, "learning_rate": 7.175934436415436e-06, "loss": 23.6644, "step": 4191 }, { "epoch": 0.38904872389791184, "grad_norm": 38.355228424072266, "learning_rate": 7.174552689766662e-06, "loss": 25.8579, "step": 4192 }, { "epoch": 0.3891415313225058, "grad_norm": 48.236610412597656, "learning_rate": 7.173170738280551e-06, "loss": 23.845, "step": 4193 }, { "epoch": 0.3892343387470998, "grad_norm": 40.684043884277344, "learning_rate": 7.1717885820872766e-06, "loss": 23.6556, "step": 4194 }, { "epoch": 0.3893271461716937, "grad_norm": 36.939395904541016, "learning_rate": 7.170406221317036e-06, "loss": 23.168, "step": 4195 }, { "epoch": 0.3894199535962877, "grad_norm": 35.81148147583008, "learning_rate": 7.169023656100046e-06, "loss": 23.6405, "step": 4196 }, { "epoch": 0.38951276102088167, "grad_norm": 36.29375076293945, "learning_rate": 7.167640886566538e-06, "loss": 25.1564, "step": 4197 }, { "epoch": 0.3896055684454756, "grad_norm": 44.63269805908203, "learning_rate": 7.166257912846768e-06, "loss": 23.9296, "step": 4198 }, { "epoch": 0.3896983758700696, "grad_norm": 36.37281799316406, "learning_rate": 7.164874735071006e-06, "loss": 21.5349, "step": 4199 }, { "epoch": 0.38979118329466356, "grad_norm": 39.69844436645508, "learning_rate": 7.163491353369545e-06, "loss": 24.4699, "step": 4200 }, { "epoch": 0.38988399071925756, "grad_norm": 37.069984436035156, "learning_rate": 7.162107767872697e-06, "loss": 23.4604, "step": 4201 }, { "epoch": 0.3899767981438515, "grad_norm": 42.72287368774414, "learning_rate": 7.1607239787107905e-06, "loss": 24.5275, "step": 4202 }, { "epoch": 0.3900696055684455, "grad_norm": 38.00679397583008, "learning_rate": 7.159339986014175e-06, "loss": 26.6004, "step": 4203 }, { "epoch": 0.39016241299303944, "grad_norm": 33.86973571777344, "learning_rate": 7.157955789913221e-06, "loss": 24.9464, "step": 4204 }, { "epoch": 0.3902552204176334, "grad_norm": 35.23176956176758, "learning_rate": 7.156571390538315e-06, "loss": 21.5767, "step": 4205 }, { "epoch": 0.3903480278422274, "grad_norm": 37.342796325683594, "learning_rate": 7.155186788019864e-06, "loss": 23.0933, "step": 4206 }, { "epoch": 0.39044083526682133, "grad_norm": 35.84444046020508, "learning_rate": 7.153801982488293e-06, "loss": 23.8118, "step": 4207 }, { "epoch": 0.39053364269141533, "grad_norm": 37.96680450439453, "learning_rate": 7.152416974074049e-06, "loss": 24.8571, "step": 4208 }, { "epoch": 0.39062645011600927, "grad_norm": 34.13277816772461, "learning_rate": 7.151031762907596e-06, "loss": 23.7275, "step": 4209 }, { "epoch": 0.39071925754060327, "grad_norm": 38.594261169433594, "learning_rate": 7.149646349119417e-06, "loss": 22.4617, "step": 4210 }, { "epoch": 0.3908120649651972, "grad_norm": 39.503211975097656, "learning_rate": 7.148260732840012e-06, "loss": 24.5074, "step": 4211 }, { "epoch": 0.39090487238979116, "grad_norm": 34.550506591796875, "learning_rate": 7.146874914199906e-06, "loss": 24.4193, "step": 4212 }, { "epoch": 0.39099767981438516, "grad_norm": 38.00851058959961, "learning_rate": 7.14548889332964e-06, "loss": 23.8231, "step": 4213 }, { "epoch": 0.3910904872389791, "grad_norm": 39.26141357421875, "learning_rate": 7.144102670359769e-06, "loss": 21.5551, "step": 4214 }, { "epoch": 0.3911832946635731, "grad_norm": 40.9665641784668, "learning_rate": 7.142716245420878e-06, "loss": 23.4941, "step": 4215 }, { "epoch": 0.39127610208816704, "grad_norm": 38.75001525878906, "learning_rate": 7.141329618643559e-06, "loss": 23.9371, "step": 4216 }, { "epoch": 0.39136890951276104, "grad_norm": 35.7895393371582, "learning_rate": 7.139942790158432e-06, "loss": 26.3334, "step": 4217 }, { "epoch": 0.391461716937355, "grad_norm": 35.14055252075195, "learning_rate": 7.138555760096131e-06, "loss": 20.619, "step": 4218 }, { "epoch": 0.39155452436194893, "grad_norm": 38.231441497802734, "learning_rate": 7.1371685285873125e-06, "loss": 25.1631, "step": 4219 }, { "epoch": 0.39164733178654293, "grad_norm": 37.619110107421875, "learning_rate": 7.135781095762648e-06, "loss": 23.9279, "step": 4220 }, { "epoch": 0.3917401392111369, "grad_norm": 36.19876480102539, "learning_rate": 7.134393461752833e-06, "loss": 22.5954, "step": 4221 }, { "epoch": 0.39183294663573087, "grad_norm": 38.98033905029297, "learning_rate": 7.133005626688577e-06, "loss": 23.988, "step": 4222 }, { "epoch": 0.3919257540603248, "grad_norm": 39.90108871459961, "learning_rate": 7.13161759070061e-06, "loss": 21.848, "step": 4223 }, { "epoch": 0.3920185614849188, "grad_norm": 36.0249137878418, "learning_rate": 7.130229353919685e-06, "loss": 22.4615, "step": 4224 }, { "epoch": 0.39211136890951276, "grad_norm": 37.26676940917969, "learning_rate": 7.128840916476566e-06, "loss": 23.6099, "step": 4225 }, { "epoch": 0.3922041763341067, "grad_norm": 37.20930099487305, "learning_rate": 7.127452278502041e-06, "loss": 23.4931, "step": 4226 }, { "epoch": 0.3922969837587007, "grad_norm": 39.57304382324219, "learning_rate": 7.12606344012692e-06, "loss": 23.1826, "step": 4227 }, { "epoch": 0.39238979118329465, "grad_norm": 35.37531661987305, "learning_rate": 7.124674401482023e-06, "loss": 21.4829, "step": 4228 }, { "epoch": 0.39248259860788864, "grad_norm": 34.9900016784668, "learning_rate": 7.123285162698199e-06, "loss": 25.2884, "step": 4229 }, { "epoch": 0.3925754060324826, "grad_norm": 36.38845443725586, "learning_rate": 7.121895723906306e-06, "loss": 24.0581, "step": 4230 }, { "epoch": 0.3926682134570766, "grad_norm": 36.38460159301758, "learning_rate": 7.1205060852372275e-06, "loss": 21.2257, "step": 4231 }, { "epoch": 0.39276102088167053, "grad_norm": 62.625186920166016, "learning_rate": 7.119116246821866e-06, "loss": 24.7957, "step": 4232 }, { "epoch": 0.3928538283062645, "grad_norm": 39.72747039794922, "learning_rate": 7.117726208791136e-06, "loss": 24.5272, "step": 4233 }, { "epoch": 0.3929466357308585, "grad_norm": 38.036376953125, "learning_rate": 7.11633597127598e-06, "loss": 22.9046, "step": 4234 }, { "epoch": 0.3930394431554524, "grad_norm": 37.76702117919922, "learning_rate": 7.114945534407354e-06, "loss": 22.0421, "step": 4235 }, { "epoch": 0.3931322505800464, "grad_norm": 37.460418701171875, "learning_rate": 7.113554898316231e-06, "loss": 24.7847, "step": 4236 }, { "epoch": 0.39322505800464036, "grad_norm": 41.105281829833984, "learning_rate": 7.112164063133608e-06, "loss": 24.4257, "step": 4237 }, { "epoch": 0.39331786542923436, "grad_norm": 37.465293884277344, "learning_rate": 7.110773028990497e-06, "loss": 22.6064, "step": 4238 }, { "epoch": 0.3934106728538283, "grad_norm": 36.97977828979492, "learning_rate": 7.1093817960179305e-06, "loss": 22.6142, "step": 4239 }, { "epoch": 0.39350348027842225, "grad_norm": 37.11012268066406, "learning_rate": 7.10799036434696e-06, "loss": 23.4276, "step": 4240 }, { "epoch": 0.39359628770301625, "grad_norm": 37.90660095214844, "learning_rate": 7.106598734108652e-06, "loss": 24.9399, "step": 4241 }, { "epoch": 0.3936890951276102, "grad_norm": 38.66616439819336, "learning_rate": 7.105206905434097e-06, "loss": 22.3003, "step": 4242 }, { "epoch": 0.3937819025522042, "grad_norm": 36.418006896972656, "learning_rate": 7.103814878454401e-06, "loss": 22.9081, "step": 4243 }, { "epoch": 0.39387470997679813, "grad_norm": 36.338382720947266, "learning_rate": 7.102422653300688e-06, "loss": 22.8736, "step": 4244 }, { "epoch": 0.39396751740139213, "grad_norm": 41.01405334472656, "learning_rate": 7.101030230104105e-06, "loss": 22.94, "step": 4245 }, { "epoch": 0.3940603248259861, "grad_norm": 35.44957733154297, "learning_rate": 7.0996376089958115e-06, "loss": 24.8607, "step": 4246 }, { "epoch": 0.3941531322505801, "grad_norm": 35.770111083984375, "learning_rate": 7.098244790106991e-06, "loss": 24.9426, "step": 4247 }, { "epoch": 0.394245939675174, "grad_norm": 32.843605041503906, "learning_rate": 7.0968517735688445e-06, "loss": 23.1136, "step": 4248 }, { "epoch": 0.39433874709976796, "grad_norm": 35.137386322021484, "learning_rate": 7.095458559512588e-06, "loss": 23.9584, "step": 4249 }, { "epoch": 0.39443155452436196, "grad_norm": 46.36323165893555, "learning_rate": 7.09406514806946e-06, "loss": 24.2308, "step": 4250 }, { "epoch": 0.3945243619489559, "grad_norm": 41.4640998840332, "learning_rate": 7.092671539370717e-06, "loss": 25.2234, "step": 4251 }, { "epoch": 0.3946171693735499, "grad_norm": 37.93375778198242, "learning_rate": 7.091277733547631e-06, "loss": 24.2985, "step": 4252 }, { "epoch": 0.39470997679814385, "grad_norm": 38.560726165771484, "learning_rate": 7.089883730731498e-06, "loss": 24.8281, "step": 4253 }, { "epoch": 0.39480278422273785, "grad_norm": 37.65877151489258, "learning_rate": 7.0884895310536276e-06, "loss": 24.252, "step": 4254 }, { "epoch": 0.3948955916473318, "grad_norm": 41.7879638671875, "learning_rate": 7.08709513464535e-06, "loss": 25.4157, "step": 4255 }, { "epoch": 0.39498839907192573, "grad_norm": 33.97966384887695, "learning_rate": 7.085700541638014e-06, "loss": 22.7717, "step": 4256 }, { "epoch": 0.39508120649651973, "grad_norm": 37.17601013183594, "learning_rate": 7.0843057521629876e-06, "loss": 24.0557, "step": 4257 }, { "epoch": 0.3951740139211137, "grad_norm": 36.157325744628906, "learning_rate": 7.082910766351654e-06, "loss": 25.3041, "step": 4258 }, { "epoch": 0.3952668213457077, "grad_norm": 47.0086555480957, "learning_rate": 7.081515584335421e-06, "loss": 24.1906, "step": 4259 }, { "epoch": 0.3953596287703016, "grad_norm": 36.94343948364258, "learning_rate": 7.080120206245709e-06, "loss": 23.4793, "step": 4260 }, { "epoch": 0.3954524361948956, "grad_norm": 33.129886627197266, "learning_rate": 7.078724632213957e-06, "loss": 21.427, "step": 4261 }, { "epoch": 0.39554524361948956, "grad_norm": 38.942527770996094, "learning_rate": 7.077328862371629e-06, "loss": 23.2876, "step": 4262 }, { "epoch": 0.3956380510440835, "grad_norm": 38.636573791503906, "learning_rate": 7.075932896850199e-06, "loss": 22.2231, "step": 4263 }, { "epoch": 0.3957308584686775, "grad_norm": 39.6094856262207, "learning_rate": 7.074536735781168e-06, "loss": 25.1967, "step": 4264 }, { "epoch": 0.39582366589327145, "grad_norm": 53.94283676147461, "learning_rate": 7.073140379296045e-06, "loss": 23.8447, "step": 4265 }, { "epoch": 0.39591647331786545, "grad_norm": 33.389766693115234, "learning_rate": 7.071743827526367e-06, "loss": 22.2529, "step": 4266 }, { "epoch": 0.3960092807424594, "grad_norm": 43.87825012207031, "learning_rate": 7.070347080603687e-06, "loss": 24.9557, "step": 4267 }, { "epoch": 0.3961020881670534, "grad_norm": 57.399600982666016, "learning_rate": 7.068950138659569e-06, "loss": 23.1766, "step": 4268 }, { "epoch": 0.39619489559164733, "grad_norm": 35.762943267822266, "learning_rate": 7.067553001825609e-06, "loss": 23.1427, "step": 4269 }, { "epoch": 0.3962877030162413, "grad_norm": 37.61439514160156, "learning_rate": 7.066155670233408e-06, "loss": 21.4578, "step": 4270 }, { "epoch": 0.3963805104408353, "grad_norm": 34.94654846191406, "learning_rate": 7.064758144014593e-06, "loss": 24.9262, "step": 4271 }, { "epoch": 0.3964733178654292, "grad_norm": 39.121334075927734, "learning_rate": 7.063360423300808e-06, "loss": 23.5083, "step": 4272 }, { "epoch": 0.3965661252900232, "grad_norm": 37.554080963134766, "learning_rate": 7.061962508223714e-06, "loss": 25.2288, "step": 4273 }, { "epoch": 0.39665893271461716, "grad_norm": 37.89835739135742, "learning_rate": 7.060564398914991e-06, "loss": 24.9807, "step": 4274 }, { "epoch": 0.39675174013921116, "grad_norm": 39.7332649230957, "learning_rate": 7.059166095506338e-06, "loss": 25.4053, "step": 4275 }, { "epoch": 0.3968445475638051, "grad_norm": 40.31143569946289, "learning_rate": 7.0577675981294715e-06, "loss": 24.9028, "step": 4276 }, { "epoch": 0.39693735498839905, "grad_norm": 34.95508575439453, "learning_rate": 7.056368906916124e-06, "loss": 23.9237, "step": 4277 }, { "epoch": 0.39703016241299305, "grad_norm": 44.39488983154297, "learning_rate": 7.054970021998054e-06, "loss": 23.6936, "step": 4278 }, { "epoch": 0.397122969837587, "grad_norm": 37.70206832885742, "learning_rate": 7.053570943507029e-06, "loss": 23.0525, "step": 4279 }, { "epoch": 0.397215777262181, "grad_norm": 37.19991683959961, "learning_rate": 7.052171671574837e-06, "loss": 24.377, "step": 4280 }, { "epoch": 0.39730858468677493, "grad_norm": 37.117923736572266, "learning_rate": 7.050772206333291e-06, "loss": 23.2146, "step": 4281 }, { "epoch": 0.39740139211136893, "grad_norm": 36.36240768432617, "learning_rate": 7.049372547914213e-06, "loss": 24.1276, "step": 4282 }, { "epoch": 0.3974941995359629, "grad_norm": 36.22886276245117, "learning_rate": 7.0479726964494486e-06, "loss": 24.9866, "step": 4283 }, { "epoch": 0.3975870069605568, "grad_norm": 37.1988410949707, "learning_rate": 7.04657265207086e-06, "loss": 23.9129, "step": 4284 }, { "epoch": 0.3976798143851508, "grad_norm": 36.18263244628906, "learning_rate": 7.045172414910328e-06, "loss": 22.5446, "step": 4285 }, { "epoch": 0.39777262180974476, "grad_norm": 37.11881637573242, "learning_rate": 7.043771985099751e-06, "loss": 24.4052, "step": 4286 }, { "epoch": 0.39786542923433876, "grad_norm": 37.3294563293457, "learning_rate": 7.042371362771046e-06, "loss": 27.941, "step": 4287 }, { "epoch": 0.3979582366589327, "grad_norm": 48.89161682128906, "learning_rate": 7.040970548056151e-06, "loss": 23.8394, "step": 4288 }, { "epoch": 0.3980510440835267, "grad_norm": 38.77709197998047, "learning_rate": 7.039569541087014e-06, "loss": 24.8162, "step": 4289 }, { "epoch": 0.39814385150812065, "grad_norm": 34.925758361816406, "learning_rate": 7.038168341995609e-06, "loss": 21.0111, "step": 4290 }, { "epoch": 0.3982366589327146, "grad_norm": 37.61249923706055, "learning_rate": 7.0367669509139256e-06, "loss": 24.0317, "step": 4291 }, { "epoch": 0.3983294663573086, "grad_norm": 36.52742004394531, "learning_rate": 7.035365367973972e-06, "loss": 22.9665, "step": 4292 }, { "epoch": 0.39842227378190254, "grad_norm": 36.993690490722656, "learning_rate": 7.033963593307771e-06, "loss": 22.7536, "step": 4293 }, { "epoch": 0.39851508120649654, "grad_norm": 37.275428771972656, "learning_rate": 7.0325616270473695e-06, "loss": 22.1986, "step": 4294 }, { "epoch": 0.3986078886310905, "grad_norm": 62.57554244995117, "learning_rate": 7.031159469324826e-06, "loss": 23.4682, "step": 4295 }, { "epoch": 0.3987006960556845, "grad_norm": 37.184722900390625, "learning_rate": 7.029757120272222e-06, "loss": 23.7205, "step": 4296 }, { "epoch": 0.3987935034802784, "grad_norm": 34.05231857299805, "learning_rate": 7.028354580021656e-06, "loss": 22.52, "step": 4297 }, { "epoch": 0.39888631090487237, "grad_norm": 41.01810073852539, "learning_rate": 7.0269518487052435e-06, "loss": 22.2715, "step": 4298 }, { "epoch": 0.39897911832946636, "grad_norm": 38.37897491455078, "learning_rate": 7.025548926455117e-06, "loss": 23.3375, "step": 4299 }, { "epoch": 0.3990719257540603, "grad_norm": 36.797183990478516, "learning_rate": 7.024145813403427e-06, "loss": 23.2519, "step": 4300 }, { "epoch": 0.3991647331786543, "grad_norm": 35.25788879394531, "learning_rate": 7.022742509682346e-06, "loss": 22.7308, "step": 4301 }, { "epoch": 0.39925754060324825, "grad_norm": 34.38032913208008, "learning_rate": 7.02133901542406e-06, "loss": 24.4811, "step": 4302 }, { "epoch": 0.39935034802784225, "grad_norm": 37.03548049926758, "learning_rate": 7.019935330760774e-06, "loss": 25.0973, "step": 4303 }, { "epoch": 0.3994431554524362, "grad_norm": 36.599666595458984, "learning_rate": 7.018531455824715e-06, "loss": 23.5571, "step": 4304 }, { "epoch": 0.39953596287703014, "grad_norm": 41.9771728515625, "learning_rate": 7.01712739074812e-06, "loss": 24.2192, "step": 4305 }, { "epoch": 0.39962877030162414, "grad_norm": 36.82293701171875, "learning_rate": 7.0157231356632515e-06, "loss": 23.296, "step": 4306 }, { "epoch": 0.3997215777262181, "grad_norm": 33.626529693603516, "learning_rate": 7.0143186907023866e-06, "loss": 24.8461, "step": 4307 }, { "epoch": 0.3998143851508121, "grad_norm": 36.54719161987305, "learning_rate": 7.0129140559978184e-06, "loss": 23.4696, "step": 4308 }, { "epoch": 0.399907192575406, "grad_norm": 36.10380554199219, "learning_rate": 7.01150923168186e-06, "loss": 22.8535, "step": 4309 }, { "epoch": 0.4, "grad_norm": 35.409481048583984, "learning_rate": 7.010104217886844e-06, "loss": 23.2452, "step": 4310 }, { "epoch": 0.40009280742459397, "grad_norm": 38.0525016784668, "learning_rate": 7.008699014745119e-06, "loss": 23.9543, "step": 4311 }, { "epoch": 0.4001856148491879, "grad_norm": 37.74234390258789, "learning_rate": 7.0072936223890505e-06, "loss": 22.0005, "step": 4312 }, { "epoch": 0.4002784222737819, "grad_norm": 38.89060592651367, "learning_rate": 7.005888040951023e-06, "loss": 23.0495, "step": 4313 }, { "epoch": 0.40037122969837585, "grad_norm": 36.44477081298828, "learning_rate": 7.004482270563441e-06, "loss": 24.5843, "step": 4314 }, { "epoch": 0.40046403712296985, "grad_norm": 38.547386169433594, "learning_rate": 7.003076311358721e-06, "loss": 25.6915, "step": 4315 }, { "epoch": 0.4005568445475638, "grad_norm": 44.64391326904297, "learning_rate": 7.001670163469303e-06, "loss": 25.2594, "step": 4316 }, { "epoch": 0.4006496519721578, "grad_norm": 36.99527359008789, "learning_rate": 7.000263827027641e-06, "loss": 24.0256, "step": 4317 }, { "epoch": 0.40074245939675174, "grad_norm": 39.61839294433594, "learning_rate": 6.998857302166211e-06, "loss": 24.5491, "step": 4318 }, { "epoch": 0.4008352668213457, "grad_norm": 39.86091232299805, "learning_rate": 6.9974505890175e-06, "loss": 24.1707, "step": 4319 }, { "epoch": 0.4009280742459397, "grad_norm": 35.49925231933594, "learning_rate": 6.99604368771402e-06, "loss": 24.3234, "step": 4320 }, { "epoch": 0.4010208816705336, "grad_norm": 35.23417282104492, "learning_rate": 6.994636598388298e-06, "loss": 22.8871, "step": 4321 }, { "epoch": 0.4011136890951276, "grad_norm": 41.0833625793457, "learning_rate": 6.993229321172875e-06, "loss": 23.6832, "step": 4322 }, { "epoch": 0.40120649651972157, "grad_norm": 37.27741241455078, "learning_rate": 6.991821856200316e-06, "loss": 24.1366, "step": 4323 }, { "epoch": 0.40129930394431557, "grad_norm": 33.64212417602539, "learning_rate": 6.990414203603199e-06, "loss": 21.7587, "step": 4324 }, { "epoch": 0.4013921113689095, "grad_norm": 73.12934875488281, "learning_rate": 6.989006363514122e-06, "loss": 21.8801, "step": 4325 }, { "epoch": 0.40148491879350345, "grad_norm": 35.38127136230469, "learning_rate": 6.9875983360657e-06, "loss": 22.072, "step": 4326 }, { "epoch": 0.40157772621809745, "grad_norm": 36.32850646972656, "learning_rate": 6.9861901213905645e-06, "loss": 22.9338, "step": 4327 }, { "epoch": 0.4016705336426914, "grad_norm": 37.52474594116211, "learning_rate": 6.984781719621366e-06, "loss": 24.5603, "step": 4328 }, { "epoch": 0.4017633410672854, "grad_norm": 34.21295166015625, "learning_rate": 6.983373130890773e-06, "loss": 23.0214, "step": 4329 }, { "epoch": 0.40185614849187934, "grad_norm": 35.390533447265625, "learning_rate": 6.9819643553314734e-06, "loss": 22.7293, "step": 4330 }, { "epoch": 0.40194895591647334, "grad_norm": 37.086002349853516, "learning_rate": 6.980555393076165e-06, "loss": 24.3123, "step": 4331 }, { "epoch": 0.4020417633410673, "grad_norm": 40.2778205871582, "learning_rate": 6.979146244257573e-06, "loss": 24.4019, "step": 4332 }, { "epoch": 0.4021345707656612, "grad_norm": 34.64761734008789, "learning_rate": 6.977736909008432e-06, "loss": 22.4015, "step": 4333 }, { "epoch": 0.4022273781902552, "grad_norm": 40.479270935058594, "learning_rate": 6.9763273874615025e-06, "loss": 24.2887, "step": 4334 }, { "epoch": 0.40232018561484917, "grad_norm": 36.68402099609375, "learning_rate": 6.974917679749553e-06, "loss": 24.1694, "step": 4335 }, { "epoch": 0.40241299303944317, "grad_norm": 35.78398895263672, "learning_rate": 6.9735077860053764e-06, "loss": 23.4501, "step": 4336 }, { "epoch": 0.4025058004640371, "grad_norm": 34.59895706176758, "learning_rate": 6.972097706361783e-06, "loss": 23.8104, "step": 4337 }, { "epoch": 0.4025986078886311, "grad_norm": 37.475074768066406, "learning_rate": 6.9706874409515934e-06, "loss": 25.9573, "step": 4338 }, { "epoch": 0.40269141531322505, "grad_norm": 38.18633270263672, "learning_rate": 6.969276989907657e-06, "loss": 23.5408, "step": 4339 }, { "epoch": 0.40278422273781905, "grad_norm": 35.6223030090332, "learning_rate": 6.967866353362833e-06, "loss": 23.1877, "step": 4340 }, { "epoch": 0.402877030162413, "grad_norm": 35.92129135131836, "learning_rate": 6.966455531449996e-06, "loss": 22.4222, "step": 4341 }, { "epoch": 0.40296983758700694, "grad_norm": 33.894508361816406, "learning_rate": 6.965044524302047e-06, "loss": 22.5328, "step": 4342 }, { "epoch": 0.40306264501160094, "grad_norm": 37.703407287597656, "learning_rate": 6.963633332051898e-06, "loss": 24.5514, "step": 4343 }, { "epoch": 0.4031554524361949, "grad_norm": 33.1607551574707, "learning_rate": 6.962221954832476e-06, "loss": 21.874, "step": 4344 }, { "epoch": 0.4032482598607889, "grad_norm": 39.57121658325195, "learning_rate": 6.960810392776733e-06, "loss": 23.3854, "step": 4345 }, { "epoch": 0.4033410672853828, "grad_norm": 34.66355895996094, "learning_rate": 6.9593986460176346e-06, "loss": 22.2331, "step": 4346 }, { "epoch": 0.4034338747099768, "grad_norm": 35.58747100830078, "learning_rate": 6.957986714688161e-06, "loss": 23.1843, "step": 4347 }, { "epoch": 0.40352668213457077, "grad_norm": 36.65839767456055, "learning_rate": 6.956574598921315e-06, "loss": 22.1954, "step": 4348 }, { "epoch": 0.4036194895591647, "grad_norm": 36.769100189208984, "learning_rate": 6.955162298850115e-06, "loss": 23.0681, "step": 4349 }, { "epoch": 0.4037122969837587, "grad_norm": 38.117977142333984, "learning_rate": 6.9537498146075925e-06, "loss": 23.7135, "step": 4350 }, { "epoch": 0.40380510440835266, "grad_norm": 37.65251159667969, "learning_rate": 6.952337146326802e-06, "loss": 22.9382, "step": 4351 }, { "epoch": 0.40389791183294665, "grad_norm": 44.920501708984375, "learning_rate": 6.950924294140815e-06, "loss": 25.571, "step": 4352 }, { "epoch": 0.4039907192575406, "grad_norm": 37.27802276611328, "learning_rate": 6.949511258182716e-06, "loss": 23.6314, "step": 4353 }, { "epoch": 0.4040835266821346, "grad_norm": 34.18697738647461, "learning_rate": 6.948098038585609e-06, "loss": 23.6603, "step": 4354 }, { "epoch": 0.40417633410672854, "grad_norm": 35.91874694824219, "learning_rate": 6.946684635482619e-06, "loss": 23.7386, "step": 4355 }, { "epoch": 0.4042691415313225, "grad_norm": 38.3896598815918, "learning_rate": 6.945271049006882e-06, "loss": 22.4079, "step": 4356 }, { "epoch": 0.4043619489559165, "grad_norm": 36.955284118652344, "learning_rate": 6.943857279291555e-06, "loss": 23.6325, "step": 4357 }, { "epoch": 0.4044547563805104, "grad_norm": 36.207611083984375, "learning_rate": 6.942443326469812e-06, "loss": 21.3819, "step": 4358 }, { "epoch": 0.4045475638051044, "grad_norm": 38.947452545166016, "learning_rate": 6.941029190674844e-06, "loss": 24.2842, "step": 4359 }, { "epoch": 0.40464037122969837, "grad_norm": 34.98283386230469, "learning_rate": 6.939614872039857e-06, "loss": 21.2055, "step": 4360 }, { "epoch": 0.40473317865429237, "grad_norm": 34.998085021972656, "learning_rate": 6.938200370698078e-06, "loss": 24.6646, "step": 4361 }, { "epoch": 0.4048259860788863, "grad_norm": 39.5145149230957, "learning_rate": 6.936785686782751e-06, "loss": 22.699, "step": 4362 }, { "epoch": 0.40491879350348026, "grad_norm": 36.36868667602539, "learning_rate": 6.935370820427132e-06, "loss": 22.2647, "step": 4363 }, { "epoch": 0.40501160092807426, "grad_norm": 38.28718566894531, "learning_rate": 6.9339557717645e-06, "loss": 24.6228, "step": 4364 }, { "epoch": 0.4051044083526682, "grad_norm": 45.355342864990234, "learning_rate": 6.93254054092815e-06, "loss": 23.7807, "step": 4365 }, { "epoch": 0.4051972157772622, "grad_norm": 61.93400573730469, "learning_rate": 6.931125128051389e-06, "loss": 28.8242, "step": 4366 }, { "epoch": 0.40529002320185614, "grad_norm": 36.49854278564453, "learning_rate": 6.92970953326755e-06, "loss": 24.2572, "step": 4367 }, { "epoch": 0.40538283062645014, "grad_norm": 35.199684143066406, "learning_rate": 6.928293756709976e-06, "loss": 24.0768, "step": 4368 }, { "epoch": 0.4054756380510441, "grad_norm": 33.91774368286133, "learning_rate": 6.926877798512029e-06, "loss": 23.0449, "step": 4369 }, { "epoch": 0.40556844547563803, "grad_norm": 35.28032302856445, "learning_rate": 6.925461658807091e-06, "loss": 24.325, "step": 4370 }, { "epoch": 0.40566125290023203, "grad_norm": 36.09845733642578, "learning_rate": 6.9240453377285565e-06, "loss": 22.7787, "step": 4371 }, { "epoch": 0.40575406032482597, "grad_norm": 35.978267669677734, "learning_rate": 6.922628835409841e-06, "loss": 25.0304, "step": 4372 }, { "epoch": 0.40584686774941997, "grad_norm": 40.57661437988281, "learning_rate": 6.921212151984375e-06, "loss": 23.7637, "step": 4373 }, { "epoch": 0.4059396751740139, "grad_norm": 35.566776275634766, "learning_rate": 6.9197952875856044e-06, "loss": 23.8473, "step": 4374 }, { "epoch": 0.4060324825986079, "grad_norm": 35.56998062133789, "learning_rate": 6.9183782423469965e-06, "loss": 23.0992, "step": 4375 }, { "epoch": 0.40612529002320186, "grad_norm": 39.30388259887695, "learning_rate": 6.916961016402034e-06, "loss": 23.8382, "step": 4376 }, { "epoch": 0.4062180974477958, "grad_norm": 40.69670486450195, "learning_rate": 6.9155436098842145e-06, "loss": 26.7968, "step": 4377 }, { "epoch": 0.4063109048723898, "grad_norm": 38.103660583496094, "learning_rate": 6.914126022927053e-06, "loss": 25.4227, "step": 4378 }, { "epoch": 0.40640371229698374, "grad_norm": 37.528263092041016, "learning_rate": 6.912708255664086e-06, "loss": 24.0601, "step": 4379 }, { "epoch": 0.40649651972157774, "grad_norm": 35.06495666503906, "learning_rate": 6.911290308228861e-06, "loss": 23.1475, "step": 4380 }, { "epoch": 0.4065893271461717, "grad_norm": 38.1071662902832, "learning_rate": 6.909872180754945e-06, "loss": 24.8471, "step": 4381 }, { "epoch": 0.4066821345707657, "grad_norm": 34.340606689453125, "learning_rate": 6.908453873375924e-06, "loss": 22.2954, "step": 4382 }, { "epoch": 0.40677494199535963, "grad_norm": 38.7775993347168, "learning_rate": 6.907035386225396e-06, "loss": 23.7137, "step": 4383 }, { "epoch": 0.4068677494199536, "grad_norm": 38.74861526489258, "learning_rate": 6.9056167194369815e-06, "loss": 25.6824, "step": 4384 }, { "epoch": 0.40696055684454757, "grad_norm": 38.42184829711914, "learning_rate": 6.904197873144313e-06, "loss": 23.2041, "step": 4385 }, { "epoch": 0.4070533642691415, "grad_norm": 144.59800720214844, "learning_rate": 6.9027788474810455e-06, "loss": 23.1174, "step": 4386 }, { "epoch": 0.4071461716937355, "grad_norm": 41.00925064086914, "learning_rate": 6.901359642580845e-06, "loss": 25.4109, "step": 4387 }, { "epoch": 0.40723897911832946, "grad_norm": 37.07542419433594, "learning_rate": 6.899940258577397e-06, "loss": 23.6695, "step": 4388 }, { "epoch": 0.40733178654292346, "grad_norm": 34.15910339355469, "learning_rate": 6.898520695604404e-06, "loss": 23.7778, "step": 4389 }, { "epoch": 0.4074245939675174, "grad_norm": 39.58979034423828, "learning_rate": 6.897100953795587e-06, "loss": 23.4351, "step": 4390 }, { "epoch": 0.40751740139211134, "grad_norm": 36.10580825805664, "learning_rate": 6.89568103328468e-06, "loss": 24.3637, "step": 4391 }, { "epoch": 0.40761020881670534, "grad_norm": 41.77208709716797, "learning_rate": 6.894260934205437e-06, "loss": 23.3599, "step": 4392 }, { "epoch": 0.4077030162412993, "grad_norm": 34.7473258972168, "learning_rate": 6.8928406566916265e-06, "loss": 24.0486, "step": 4393 }, { "epoch": 0.4077958236658933, "grad_norm": 35.8108024597168, "learning_rate": 6.891420200877037e-06, "loss": 21.8076, "step": 4394 }, { "epoch": 0.40788863109048723, "grad_norm": 35.36420440673828, "learning_rate": 6.88999956689547e-06, "loss": 22.3107, "step": 4395 }, { "epoch": 0.40798143851508123, "grad_norm": 36.789920806884766, "learning_rate": 6.888578754880747e-06, "loss": 23.6158, "step": 4396 }, { "epoch": 0.4080742459396752, "grad_norm": 38.98810577392578, "learning_rate": 6.887157764966704e-06, "loss": 21.5577, "step": 4397 }, { "epoch": 0.4081670533642691, "grad_norm": 35.50718307495117, "learning_rate": 6.885736597287195e-06, "loss": 23.7001, "step": 4398 }, { "epoch": 0.4082598607888631, "grad_norm": 37.026405334472656, "learning_rate": 6.88431525197609e-06, "loss": 24.7278, "step": 4399 }, { "epoch": 0.40835266821345706, "grad_norm": 35.968910217285156, "learning_rate": 6.882893729167277e-06, "loss": 24.605, "step": 4400 }, { "epoch": 0.40844547563805106, "grad_norm": 40.27352523803711, "learning_rate": 6.881472028994658e-06, "loss": 22.607, "step": 4401 }, { "epoch": 0.408538283062645, "grad_norm": 37.74684524536133, "learning_rate": 6.880050151592156e-06, "loss": 21.2361, "step": 4402 }, { "epoch": 0.408631090487239, "grad_norm": 35.73693084716797, "learning_rate": 6.878628097093707e-06, "loss": 22.3974, "step": 4403 }, { "epoch": 0.40872389791183295, "grad_norm": 37.111576080322266, "learning_rate": 6.8772058656332626e-06, "loss": 22.9715, "step": 4404 }, { "epoch": 0.4088167053364269, "grad_norm": 37.29429244995117, "learning_rate": 6.875783457344799e-06, "loss": 25.1118, "step": 4405 }, { "epoch": 0.4089095127610209, "grad_norm": 33.987091064453125, "learning_rate": 6.874360872362299e-06, "loss": 24.0265, "step": 4406 }, { "epoch": 0.40900232018561483, "grad_norm": 38.498573303222656, "learning_rate": 6.872938110819768e-06, "loss": 24.0451, "step": 4407 }, { "epoch": 0.40909512761020883, "grad_norm": 36.053958892822266, "learning_rate": 6.871515172851224e-06, "loss": 23.0185, "step": 4408 }, { "epoch": 0.4091879350348028, "grad_norm": 43.42454147338867, "learning_rate": 6.870092058590709e-06, "loss": 25.2897, "step": 4409 }, { "epoch": 0.4092807424593968, "grad_norm": 36.03224563598633, "learning_rate": 6.868668768172273e-06, "loss": 21.983, "step": 4410 }, { "epoch": 0.4093735498839907, "grad_norm": 39.269046783447266, "learning_rate": 6.8672453017299865e-06, "loss": 24.4225, "step": 4411 }, { "epoch": 0.40946635730858466, "grad_norm": 36.58551788330078, "learning_rate": 6.865821659397939e-06, "loss": 25.4731, "step": 4412 }, { "epoch": 0.40955916473317866, "grad_norm": 36.53780746459961, "learning_rate": 6.864397841310232e-06, "loss": 23.981, "step": 4413 }, { "epoch": 0.4096519721577726, "grad_norm": 41.52850341796875, "learning_rate": 6.862973847600984e-06, "loss": 26.3352, "step": 4414 }, { "epoch": 0.4097447795823666, "grad_norm": 36.701683044433594, "learning_rate": 6.861549678404336e-06, "loss": 23.5933, "step": 4415 }, { "epoch": 0.40983758700696055, "grad_norm": 37.86817169189453, "learning_rate": 6.860125333854437e-06, "loss": 23.8016, "step": 4416 }, { "epoch": 0.40993039443155455, "grad_norm": 39.11677169799805, "learning_rate": 6.858700814085459e-06, "loss": 24.8566, "step": 4417 }, { "epoch": 0.4100232018561485, "grad_norm": 36.9267463684082, "learning_rate": 6.857276119231586e-06, "loss": 23.4042, "step": 4418 }, { "epoch": 0.41011600928074243, "grad_norm": 38.7027587890625, "learning_rate": 6.8558512494270236e-06, "loss": 22.592, "step": 4419 }, { "epoch": 0.41020881670533643, "grad_norm": 38.18798828125, "learning_rate": 6.854426204805988e-06, "loss": 24.8113, "step": 4420 }, { "epoch": 0.4103016241299304, "grad_norm": 35.227821350097656, "learning_rate": 6.853000985502718e-06, "loss": 24.4029, "step": 4421 }, { "epoch": 0.4103944315545244, "grad_norm": 38.22620391845703, "learning_rate": 6.851575591651461e-06, "loss": 24.2548, "step": 4422 }, { "epoch": 0.4104872389791183, "grad_norm": 37.14151382446289, "learning_rate": 6.850150023386491e-06, "loss": 24.8437, "step": 4423 }, { "epoch": 0.4105800464037123, "grad_norm": 38.5526123046875, "learning_rate": 6.848724280842088e-06, "loss": 24.0849, "step": 4424 }, { "epoch": 0.41067285382830626, "grad_norm": 36.68318557739258, "learning_rate": 6.847298364152556e-06, "loss": 22.5033, "step": 4425 }, { "epoch": 0.4107656612529002, "grad_norm": 41.18030548095703, "learning_rate": 6.845872273452213e-06, "loss": 24.8014, "step": 4426 }, { "epoch": 0.4108584686774942, "grad_norm": 42.0527229309082, "learning_rate": 6.844446008875391e-06, "loss": 23.1908, "step": 4427 }, { "epoch": 0.41095127610208815, "grad_norm": 34.464927673339844, "learning_rate": 6.843019570556443e-06, "loss": 23.9564, "step": 4428 }, { "epoch": 0.41104408352668215, "grad_norm": 33.23735046386719, "learning_rate": 6.841592958629735e-06, "loss": 24.7179, "step": 4429 }, { "epoch": 0.4111368909512761, "grad_norm": 36.65504455566406, "learning_rate": 6.840166173229648e-06, "loss": 22.5152, "step": 4430 }, { "epoch": 0.4112296983758701, "grad_norm": 42.07969284057617, "learning_rate": 6.838739214490584e-06, "loss": 26.3335, "step": 4431 }, { "epoch": 0.41132250580046403, "grad_norm": 37.81605911254883, "learning_rate": 6.837312082546958e-06, "loss": 23.2748, "step": 4432 }, { "epoch": 0.411415313225058, "grad_norm": 35.750144958496094, "learning_rate": 6.835884777533204e-06, "loss": 22.015, "step": 4433 }, { "epoch": 0.411508120649652, "grad_norm": 44.647701263427734, "learning_rate": 6.834457299583768e-06, "loss": 24.9322, "step": 4434 }, { "epoch": 0.4116009280742459, "grad_norm": 43.1092643737793, "learning_rate": 6.833029648833116e-06, "loss": 23.5924, "step": 4435 }, { "epoch": 0.4116937354988399, "grad_norm": 37.33250045776367, "learning_rate": 6.831601825415729e-06, "loss": 23.3276, "step": 4436 }, { "epoch": 0.41178654292343386, "grad_norm": 36.572059631347656, "learning_rate": 6.8301738294661045e-06, "loss": 23.7744, "step": 4437 }, { "epoch": 0.41187935034802786, "grad_norm": 44.57676315307617, "learning_rate": 6.828745661118756e-06, "loss": 25.316, "step": 4438 }, { "epoch": 0.4119721577726218, "grad_norm": 40.28782272338867, "learning_rate": 6.827317320508211e-06, "loss": 22.3256, "step": 4439 }, { "epoch": 0.4120649651972158, "grad_norm": 39.08876419067383, "learning_rate": 6.82588880776902e-06, "loss": 23.4771, "step": 4440 }, { "epoch": 0.41215777262180975, "grad_norm": 36.081031799316406, "learning_rate": 6.824460123035743e-06, "loss": 24.1419, "step": 4441 }, { "epoch": 0.4122505800464037, "grad_norm": 34.213966369628906, "learning_rate": 6.823031266442958e-06, "loss": 22.3798, "step": 4442 }, { "epoch": 0.4123433874709977, "grad_norm": 34.3233757019043, "learning_rate": 6.821602238125259e-06, "loss": 21.3636, "step": 4443 }, { "epoch": 0.41243619489559163, "grad_norm": 43.00429916381836, "learning_rate": 6.820173038217259e-06, "loss": 24.6475, "step": 4444 }, { "epoch": 0.41252900232018563, "grad_norm": 38.63786697387695, "learning_rate": 6.818743666853584e-06, "loss": 24.6709, "step": 4445 }, { "epoch": 0.4126218097447796, "grad_norm": 37.82382583618164, "learning_rate": 6.817314124168877e-06, "loss": 25.6808, "step": 4446 }, { "epoch": 0.4127146171693736, "grad_norm": 35.81309127807617, "learning_rate": 6.8158844102977975e-06, "loss": 22.4471, "step": 4447 }, { "epoch": 0.4128074245939675, "grad_norm": 42.731468200683594, "learning_rate": 6.81445452537502e-06, "loss": 24.071, "step": 4448 }, { "epoch": 0.41290023201856146, "grad_norm": 41.666011810302734, "learning_rate": 6.813024469535236e-06, "loss": 26.2182, "step": 4449 }, { "epoch": 0.41299303944315546, "grad_norm": 39.406341552734375, "learning_rate": 6.811594242913157e-06, "loss": 24.7683, "step": 4450 }, { "epoch": 0.4130858468677494, "grad_norm": 39.735355377197266, "learning_rate": 6.810163845643502e-06, "loss": 24.6678, "step": 4451 }, { "epoch": 0.4131786542923434, "grad_norm": 37.20119094848633, "learning_rate": 6.8087332778610116e-06, "loss": 23.2877, "step": 4452 }, { "epoch": 0.41327146171693735, "grad_norm": 39.71714782714844, "learning_rate": 6.807302539700443e-06, "loss": 23.9238, "step": 4453 }, { "epoch": 0.41336426914153135, "grad_norm": 39.25141525268555, "learning_rate": 6.805871631296568e-06, "loss": 23.6772, "step": 4454 }, { "epoch": 0.4134570765661253, "grad_norm": 40.934295654296875, "learning_rate": 6.804440552784174e-06, "loss": 22.3786, "step": 4455 }, { "epoch": 0.41354988399071924, "grad_norm": 36.54650115966797, "learning_rate": 6.803009304298065e-06, "loss": 22.9609, "step": 4456 }, { "epoch": 0.41364269141531324, "grad_norm": 45.750064849853516, "learning_rate": 6.801577885973061e-06, "loss": 24.3815, "step": 4457 }, { "epoch": 0.4137354988399072, "grad_norm": 38.56968688964844, "learning_rate": 6.800146297943998e-06, "loss": 21.8362, "step": 4458 }, { "epoch": 0.4138283062645012, "grad_norm": 37.02713394165039, "learning_rate": 6.798714540345728e-06, "loss": 22.8048, "step": 4459 }, { "epoch": 0.4139211136890951, "grad_norm": 37.005977630615234, "learning_rate": 6.79728261331312e-06, "loss": 23.458, "step": 4460 }, { "epoch": 0.4140139211136891, "grad_norm": 38.202171325683594, "learning_rate": 6.7958505169810565e-06, "loss": 22.7657, "step": 4461 }, { "epoch": 0.41410672853828306, "grad_norm": 39.70566177368164, "learning_rate": 6.794418251484436e-06, "loss": 22.7684, "step": 4462 }, { "epoch": 0.414199535962877, "grad_norm": 38.82459259033203, "learning_rate": 6.7929858169581775e-06, "loss": 24.3474, "step": 4463 }, { "epoch": 0.414292343387471, "grad_norm": 38.2878532409668, "learning_rate": 6.791553213537209e-06, "loss": 24.8884, "step": 4464 }, { "epoch": 0.41438515081206495, "grad_norm": 38.707454681396484, "learning_rate": 6.790120441356483e-06, "loss": 24.4324, "step": 4465 }, { "epoch": 0.41447795823665895, "grad_norm": 40.918052673339844, "learning_rate": 6.788687500550959e-06, "loss": 24.7037, "step": 4466 }, { "epoch": 0.4145707656612529, "grad_norm": 39.719913482666016, "learning_rate": 6.7872543912556155e-06, "loss": 25.842, "step": 4467 }, { "epoch": 0.4146635730858469, "grad_norm": 36.936832427978516, "learning_rate": 6.785821113605453e-06, "loss": 23.2058, "step": 4468 }, { "epoch": 0.41475638051044084, "grad_norm": 40.18218994140625, "learning_rate": 6.784387667735477e-06, "loss": 24.5826, "step": 4469 }, { "epoch": 0.4148491879350348, "grad_norm": 38.9832649230957, "learning_rate": 6.782954053780719e-06, "loss": 24.8146, "step": 4470 }, { "epoch": 0.4149419953596288, "grad_norm": 34.36275100708008, "learning_rate": 6.781520271876219e-06, "loss": 25.5111, "step": 4471 }, { "epoch": 0.4150348027842227, "grad_norm": 38.543975830078125, "learning_rate": 6.7800863221570355e-06, "loss": 23.1842, "step": 4472 }, { "epoch": 0.4151276102088167, "grad_norm": 37.44243621826172, "learning_rate": 6.778652204758245e-06, "loss": 23.3739, "step": 4473 }, { "epoch": 0.41522041763341067, "grad_norm": 39.349327087402344, "learning_rate": 6.777217919814934e-06, "loss": 23.2604, "step": 4474 }, { "epoch": 0.41531322505800466, "grad_norm": 45.04635238647461, "learning_rate": 6.775783467462214e-06, "loss": 25.9954, "step": 4475 }, { "epoch": 0.4154060324825986, "grad_norm": 38.72298049926758, "learning_rate": 6.774348847835203e-06, "loss": 23.3644, "step": 4476 }, { "epoch": 0.41549883990719255, "grad_norm": 38.943328857421875, "learning_rate": 6.772914061069038e-06, "loss": 25.1705, "step": 4477 }, { "epoch": 0.41559164733178655, "grad_norm": 34.2317008972168, "learning_rate": 6.771479107298874e-06, "loss": 23.4948, "step": 4478 }, { "epoch": 0.4156844547563805, "grad_norm": 37.534385681152344, "learning_rate": 6.77004398665988e-06, "loss": 23.2507, "step": 4479 }, { "epoch": 0.4157772621809745, "grad_norm": 38.96854782104492, "learning_rate": 6.76860869928724e-06, "loss": 24.7615, "step": 4480 }, { "epoch": 0.41587006960556844, "grad_norm": 36.95487976074219, "learning_rate": 6.767173245316155e-06, "loss": 23.0839, "step": 4481 }, { "epoch": 0.41596287703016244, "grad_norm": 36.23761749267578, "learning_rate": 6.76573762488184e-06, "loss": 23.1668, "step": 4482 }, { "epoch": 0.4160556844547564, "grad_norm": 36.57203674316406, "learning_rate": 6.764301838119528e-06, "loss": 22.2854, "step": 4483 }, { "epoch": 0.4161484918793503, "grad_norm": 36.21604537963867, "learning_rate": 6.762865885164465e-06, "loss": 24.494, "step": 4484 }, { "epoch": 0.4162412993039443, "grad_norm": 39.248897552490234, "learning_rate": 6.761429766151917e-06, "loss": 24.8412, "step": 4485 }, { "epoch": 0.41633410672853827, "grad_norm": 40.20840072631836, "learning_rate": 6.75999348121716e-06, "loss": 23.3627, "step": 4486 }, { "epoch": 0.41642691415313227, "grad_norm": 35.63519287109375, "learning_rate": 6.758557030495488e-06, "loss": 23.5531, "step": 4487 }, { "epoch": 0.4165197215777262, "grad_norm": 39.59407043457031, "learning_rate": 6.757120414122214e-06, "loss": 23.8648, "step": 4488 }, { "epoch": 0.4166125290023202, "grad_norm": 37.642005920410156, "learning_rate": 6.755683632232662e-06, "loss": 23.2038, "step": 4489 }, { "epoch": 0.41670533642691415, "grad_norm": 40.900474548339844, "learning_rate": 6.754246684962171e-06, "loss": 22.728, "step": 4490 }, { "epoch": 0.4167981438515081, "grad_norm": 33.637168884277344, "learning_rate": 6.752809572446102e-06, "loss": 21.8437, "step": 4491 }, { "epoch": 0.4168909512761021, "grad_norm": 39.7557373046875, "learning_rate": 6.751372294819825e-06, "loss": 22.2059, "step": 4492 }, { "epoch": 0.41698375870069604, "grad_norm": 42.00825500488281, "learning_rate": 6.749934852218727e-06, "loss": 24.0763, "step": 4493 }, { "epoch": 0.41707656612529004, "grad_norm": 37.63619613647461, "learning_rate": 6.748497244778214e-06, "loss": 22.9988, "step": 4494 }, { "epoch": 0.417169373549884, "grad_norm": 40.80028533935547, "learning_rate": 6.747059472633703e-06, "loss": 22.1505, "step": 4495 }, { "epoch": 0.417262180974478, "grad_norm": 35.34028625488281, "learning_rate": 6.74562153592063e-06, "loss": 22.7911, "step": 4496 }, { "epoch": 0.4173549883990719, "grad_norm": 39.64196014404297, "learning_rate": 6.744183434774442e-06, "loss": 23.7478, "step": 4497 }, { "epoch": 0.41744779582366587, "grad_norm": 48.574222564697266, "learning_rate": 6.742745169330608e-06, "loss": 22.5186, "step": 4498 }, { "epoch": 0.41754060324825987, "grad_norm": 41.19722366333008, "learning_rate": 6.741306739724607e-06, "loss": 21.2865, "step": 4499 }, { "epoch": 0.4176334106728538, "grad_norm": 36.489471435546875, "learning_rate": 6.739868146091934e-06, "loss": 23.6669, "step": 4500 }, { "epoch": 0.4177262180974478, "grad_norm": 35.494285583496094, "learning_rate": 6.738429388568106e-06, "loss": 23.0469, "step": 4501 }, { "epoch": 0.41781902552204175, "grad_norm": 37.361839294433594, "learning_rate": 6.736990467288645e-06, "loss": 23.5977, "step": 4502 }, { "epoch": 0.41791183294663575, "grad_norm": 35.46957778930664, "learning_rate": 6.735551382389096e-06, "loss": 22.4648, "step": 4503 }, { "epoch": 0.4180046403712297, "grad_norm": 37.51515197753906, "learning_rate": 6.734112134005018e-06, "loss": 21.6747, "step": 4504 }, { "epoch": 0.41809744779582364, "grad_norm": 51.10416030883789, "learning_rate": 6.732672722271982e-06, "loss": 24.1609, "step": 4505 }, { "epoch": 0.41819025522041764, "grad_norm": 38.91520690917969, "learning_rate": 6.731233147325578e-06, "loss": 21.7194, "step": 4506 }, { "epoch": 0.4182830626450116, "grad_norm": 37.10841751098633, "learning_rate": 6.729793409301413e-06, "loss": 22.5993, "step": 4507 }, { "epoch": 0.4183758700696056, "grad_norm": 36.721893310546875, "learning_rate": 6.7283535083351025e-06, "loss": 23.1355, "step": 4508 }, { "epoch": 0.4184686774941995, "grad_norm": 103.25321197509766, "learning_rate": 6.726913444562283e-06, "loss": 23.8783, "step": 4509 }, { "epoch": 0.4185614849187935, "grad_norm": 43.823909759521484, "learning_rate": 6.725473218118607e-06, "loss": 23.1262, "step": 4510 }, { "epoch": 0.41865429234338747, "grad_norm": 37.211978912353516, "learning_rate": 6.7240328291397375e-06, "loss": 23.3213, "step": 4511 }, { "epoch": 0.4187470997679814, "grad_norm": 35.29962158203125, "learning_rate": 6.722592277761355e-06, "loss": 23.4477, "step": 4512 }, { "epoch": 0.4188399071925754, "grad_norm": 36.78951644897461, "learning_rate": 6.721151564119159e-06, "loss": 23.0569, "step": 4513 }, { "epoch": 0.41893271461716935, "grad_norm": 56.7115478515625, "learning_rate": 6.719710688348857e-06, "loss": 23.2245, "step": 4514 }, { "epoch": 0.41902552204176335, "grad_norm": 38.58512496948242, "learning_rate": 6.71826965058618e-06, "loss": 24.1311, "step": 4515 }, { "epoch": 0.4191183294663573, "grad_norm": 38.035884857177734, "learning_rate": 6.716828450966867e-06, "loss": 23.8758, "step": 4516 }, { "epoch": 0.4192111368909513, "grad_norm": 38.904815673828125, "learning_rate": 6.715387089626677e-06, "loss": 22.9157, "step": 4517 }, { "epoch": 0.41930394431554524, "grad_norm": 43.59761047363281, "learning_rate": 6.713945566701383e-06, "loss": 23.5485, "step": 4518 }, { "epoch": 0.4193967517401392, "grad_norm": 37.99094009399414, "learning_rate": 6.712503882326769e-06, "loss": 21.2886, "step": 4519 }, { "epoch": 0.4194895591647332, "grad_norm": 41.01102828979492, "learning_rate": 6.711062036638643e-06, "loss": 22.6414, "step": 4520 }, { "epoch": 0.4195823665893271, "grad_norm": 38.068634033203125, "learning_rate": 6.709620029772822e-06, "loss": 22.4092, "step": 4521 }, { "epoch": 0.4196751740139211, "grad_norm": 36.2472038269043, "learning_rate": 6.708177861865136e-06, "loss": 24.0419, "step": 4522 }, { "epoch": 0.41976798143851507, "grad_norm": 36.8455924987793, "learning_rate": 6.706735533051438e-06, "loss": 22.1471, "step": 4523 }, { "epoch": 0.41986078886310907, "grad_norm": 41.29498291015625, "learning_rate": 6.705293043467589e-06, "loss": 23.261, "step": 4524 }, { "epoch": 0.419953596287703, "grad_norm": 41.14959716796875, "learning_rate": 6.703850393249467e-06, "loss": 24.1466, "step": 4525 }, { "epoch": 0.42004640371229696, "grad_norm": 39.4397087097168, "learning_rate": 6.702407582532969e-06, "loss": 23.014, "step": 4526 }, { "epoch": 0.42013921113689096, "grad_norm": 41.97138214111328, "learning_rate": 6.700964611454004e-06, "loss": 22.6845, "step": 4527 }, { "epoch": 0.4202320185614849, "grad_norm": 57.646724700927734, "learning_rate": 6.699521480148492e-06, "loss": 23.7295, "step": 4528 }, { "epoch": 0.4203248259860789, "grad_norm": 39.95262145996094, "learning_rate": 6.698078188752377e-06, "loss": 23.2447, "step": 4529 }, { "epoch": 0.42041763341067284, "grad_norm": 40.23630142211914, "learning_rate": 6.69663473740161e-06, "loss": 24.0232, "step": 4530 }, { "epoch": 0.42051044083526684, "grad_norm": 39.31637191772461, "learning_rate": 6.695191126232163e-06, "loss": 23.9185, "step": 4531 }, { "epoch": 0.4206032482598608, "grad_norm": 44.801658630371094, "learning_rate": 6.69374735538002e-06, "loss": 22.1909, "step": 4532 }, { "epoch": 0.4206960556844548, "grad_norm": 36.45088195800781, "learning_rate": 6.692303424981179e-06, "loss": 22.611, "step": 4533 }, { "epoch": 0.4207888631090487, "grad_norm": 42.52931594848633, "learning_rate": 6.6908593351716565e-06, "loss": 23.584, "step": 4534 }, { "epoch": 0.42088167053364267, "grad_norm": 36.78124237060547, "learning_rate": 6.68941508608748e-06, "loss": 23.289, "step": 4535 }, { "epoch": 0.42097447795823667, "grad_norm": 43.912086486816406, "learning_rate": 6.687970677864696e-06, "loss": 23.8147, "step": 4536 }, { "epoch": 0.4210672853828306, "grad_norm": 49.47977828979492, "learning_rate": 6.6865261106393645e-06, "loss": 22.3737, "step": 4537 }, { "epoch": 0.4211600928074246, "grad_norm": 52.74971389770508, "learning_rate": 6.685081384547556e-06, "loss": 24.0705, "step": 4538 }, { "epoch": 0.42125290023201856, "grad_norm": 39.875267028808594, "learning_rate": 6.683636499725365e-06, "loss": 25.4842, "step": 4539 }, { "epoch": 0.42134570765661256, "grad_norm": 38.903717041015625, "learning_rate": 6.682191456308894e-06, "loss": 22.7196, "step": 4540 }, { "epoch": 0.4214385150812065, "grad_norm": 38.64582824707031, "learning_rate": 6.680746254434261e-06, "loss": 24.0562, "step": 4541 }, { "epoch": 0.42153132250580044, "grad_norm": 41.74186325073242, "learning_rate": 6.679300894237603e-06, "loss": 21.638, "step": 4542 }, { "epoch": 0.42162412993039444, "grad_norm": 34.44390869140625, "learning_rate": 6.677855375855067e-06, "loss": 21.154, "step": 4543 }, { "epoch": 0.4217169373549884, "grad_norm": 38.78133010864258, "learning_rate": 6.676409699422819e-06, "loss": 25.0386, "step": 4544 }, { "epoch": 0.4218097447795824, "grad_norm": 42.99685287475586, "learning_rate": 6.674963865077037e-06, "loss": 22.1552, "step": 4545 }, { "epoch": 0.42190255220417633, "grad_norm": 38.171470642089844, "learning_rate": 6.673517872953914e-06, "loss": 21.1949, "step": 4546 }, { "epoch": 0.42199535962877033, "grad_norm": 36.523677825927734, "learning_rate": 6.67207172318966e-06, "loss": 21.4961, "step": 4547 }, { "epoch": 0.42208816705336427, "grad_norm": 36.679351806640625, "learning_rate": 6.6706254159205e-06, "loss": 21.778, "step": 4548 }, { "epoch": 0.4221809744779582, "grad_norm": 36.90867614746094, "learning_rate": 6.66917895128267e-06, "loss": 23.9083, "step": 4549 }, { "epoch": 0.4222737819025522, "grad_norm": 37.73666000366211, "learning_rate": 6.667732329412424e-06, "loss": 23.791, "step": 4550 }, { "epoch": 0.42236658932714616, "grad_norm": 40.00753402709961, "learning_rate": 6.66628555044603e-06, "loss": 24.3221, "step": 4551 }, { "epoch": 0.42245939675174016, "grad_norm": 36.44166564941406, "learning_rate": 6.664838614519773e-06, "loss": 22.3146, "step": 4552 }, { "epoch": 0.4225522041763341, "grad_norm": 35.55278778076172, "learning_rate": 6.663391521769948e-06, "loss": 22.2495, "step": 4553 }, { "epoch": 0.4226450116009281, "grad_norm": 38.87141799926758, "learning_rate": 6.661944272332867e-06, "loss": 24.2071, "step": 4554 }, { "epoch": 0.42273781902552204, "grad_norm": 37.4675407409668, "learning_rate": 6.660496866344861e-06, "loss": 22.9371, "step": 4555 }, { "epoch": 0.422830626450116, "grad_norm": 36.81068801879883, "learning_rate": 6.659049303942267e-06, "loss": 22.0753, "step": 4556 }, { "epoch": 0.42292343387471, "grad_norm": 37.31032943725586, "learning_rate": 6.657601585261446e-06, "loss": 25.4081, "step": 4557 }, { "epoch": 0.42301624129930393, "grad_norm": 40.88037109375, "learning_rate": 6.656153710438769e-06, "loss": 25.4015, "step": 4558 }, { "epoch": 0.42310904872389793, "grad_norm": 38.88663864135742, "learning_rate": 6.654705679610618e-06, "loss": 23.1838, "step": 4559 }, { "epoch": 0.4232018561484919, "grad_norm": 40.39482116699219, "learning_rate": 6.653257492913398e-06, "loss": 23.4245, "step": 4560 }, { "epoch": 0.42329466357308587, "grad_norm": 36.37776184082031, "learning_rate": 6.651809150483523e-06, "loss": 24.2271, "step": 4561 }, { "epoch": 0.4233874709976798, "grad_norm": 43.949623107910156, "learning_rate": 6.650360652457423e-06, "loss": 24.7701, "step": 4562 }, { "epoch": 0.42348027842227376, "grad_norm": 39.90463638305664, "learning_rate": 6.648911998971543e-06, "loss": 23.0957, "step": 4563 }, { "epoch": 0.42357308584686776, "grad_norm": 38.63093948364258, "learning_rate": 6.647463190162343e-06, "loss": 24.0266, "step": 4564 }, { "epoch": 0.4236658932714617, "grad_norm": 38.11134338378906, "learning_rate": 6.646014226166297e-06, "loss": 22.6177, "step": 4565 }, { "epoch": 0.4237587006960557, "grad_norm": 37.515220642089844, "learning_rate": 6.644565107119895e-06, "loss": 24.2349, "step": 4566 }, { "epoch": 0.42385150812064964, "grad_norm": 35.008304595947266, "learning_rate": 6.643115833159635e-06, "loss": 23.6045, "step": 4567 }, { "epoch": 0.42394431554524364, "grad_norm": 39.57185745239258, "learning_rate": 6.641666404422043e-06, "loss": 22.6055, "step": 4568 }, { "epoch": 0.4240371229698376, "grad_norm": 35.28578567504883, "learning_rate": 6.640216821043645e-06, "loss": 23.1961, "step": 4569 }, { "epoch": 0.42412993039443153, "grad_norm": 38.59833908081055, "learning_rate": 6.6387670831609905e-06, "loss": 26.1301, "step": 4570 }, { "epoch": 0.42422273781902553, "grad_norm": 42.7102165222168, "learning_rate": 6.637317190910642e-06, "loss": 24.4132, "step": 4571 }, { "epoch": 0.4243155452436195, "grad_norm": 34.77544403076172, "learning_rate": 6.6358671444291735e-06, "loss": 24.7921, "step": 4572 }, { "epoch": 0.4244083526682135, "grad_norm": 42.15496826171875, "learning_rate": 6.634416943853176e-06, "loss": 24.7135, "step": 4573 }, { "epoch": 0.4245011600928074, "grad_norm": 39.10353088378906, "learning_rate": 6.6329665893192566e-06, "loss": 24.2412, "step": 4574 }, { "epoch": 0.4245939675174014, "grad_norm": 33.11602020263672, "learning_rate": 6.631516080964034e-06, "loss": 23.8005, "step": 4575 }, { "epoch": 0.42468677494199536, "grad_norm": 40.16057205200195, "learning_rate": 6.630065418924141e-06, "loss": 23.8254, "step": 4576 }, { "epoch": 0.4247795823665893, "grad_norm": 40.48760223388672, "learning_rate": 6.62861460333623e-06, "loss": 26.4025, "step": 4577 }, { "epoch": 0.4248723897911833, "grad_norm": 37.338260650634766, "learning_rate": 6.6271636343369606e-06, "loss": 23.9515, "step": 4578 }, { "epoch": 0.42496519721577725, "grad_norm": 41.133487701416016, "learning_rate": 6.62571251206301e-06, "loss": 23.2013, "step": 4579 }, { "epoch": 0.42505800464037125, "grad_norm": 66.18584442138672, "learning_rate": 6.624261236651074e-06, "loss": 22.4668, "step": 4580 }, { "epoch": 0.4251508120649652, "grad_norm": 37.77796173095703, "learning_rate": 6.6228098082378565e-06, "loss": 23.9627, "step": 4581 }, { "epoch": 0.4252436194895592, "grad_norm": 36.341609954833984, "learning_rate": 6.621358226960078e-06, "loss": 23.0295, "step": 4582 }, { "epoch": 0.42533642691415313, "grad_norm": 38.97991180419922, "learning_rate": 6.619906492954474e-06, "loss": 22.7658, "step": 4583 }, { "epoch": 0.4254292343387471, "grad_norm": 38.438926696777344, "learning_rate": 6.618454606357796e-06, "loss": 24.1811, "step": 4584 }, { "epoch": 0.4255220417633411, "grad_norm": 37.90492248535156, "learning_rate": 6.617002567306806e-06, "loss": 23.7113, "step": 4585 }, { "epoch": 0.425614849187935, "grad_norm": 36.72539138793945, "learning_rate": 6.615550375938282e-06, "loss": 24.4779, "step": 4586 }, { "epoch": 0.425707656612529, "grad_norm": 36.46039962768555, "learning_rate": 6.614098032389021e-06, "loss": 23.9918, "step": 4587 }, { "epoch": 0.42580046403712296, "grad_norm": 38.32666015625, "learning_rate": 6.612645536795823e-06, "loss": 25.2442, "step": 4588 }, { "epoch": 0.42589327146171696, "grad_norm": 38.41118621826172, "learning_rate": 6.611192889295515e-06, "loss": 23.8416, "step": 4589 }, { "epoch": 0.4259860788863109, "grad_norm": 36.04350280761719, "learning_rate": 6.609740090024931e-06, "loss": 21.9017, "step": 4590 }, { "epoch": 0.42607888631090485, "grad_norm": 37.34821319580078, "learning_rate": 6.6082871391209215e-06, "loss": 24.8299, "step": 4591 }, { "epoch": 0.42617169373549885, "grad_norm": 38.812679290771484, "learning_rate": 6.606834036720348e-06, "loss": 24.2941, "step": 4592 }, { "epoch": 0.4262645011600928, "grad_norm": 39.17633819580078, "learning_rate": 6.6053807829600935e-06, "loss": 25.6093, "step": 4593 }, { "epoch": 0.4263573085846868, "grad_norm": 34.25943374633789, "learning_rate": 6.603927377977048e-06, "loss": 22.791, "step": 4594 }, { "epoch": 0.42645011600928073, "grad_norm": 36.5384521484375, "learning_rate": 6.602473821908117e-06, "loss": 24.1671, "step": 4595 }, { "epoch": 0.42654292343387473, "grad_norm": 37.59196853637695, "learning_rate": 6.601020114890227e-06, "loss": 21.7204, "step": 4596 }, { "epoch": 0.4266357308584687, "grad_norm": 37.95591735839844, "learning_rate": 6.599566257060311e-06, "loss": 23.9858, "step": 4597 }, { "epoch": 0.4267285382830626, "grad_norm": 34.898983001708984, "learning_rate": 6.598112248555315e-06, "loss": 24.312, "step": 4598 }, { "epoch": 0.4268213457076566, "grad_norm": 37.220924377441406, "learning_rate": 6.596658089512208e-06, "loss": 23.4297, "step": 4599 }, { "epoch": 0.42691415313225056, "grad_norm": 36.82654571533203, "learning_rate": 6.595203780067966e-06, "loss": 22.6033, "step": 4600 }, { "epoch": 0.42700696055684456, "grad_norm": 38.90611267089844, "learning_rate": 6.593749320359579e-06, "loss": 23.9764, "step": 4601 }, { "epoch": 0.4270997679814385, "grad_norm": 37.61074447631836, "learning_rate": 6.5922947105240585e-06, "loss": 23.1751, "step": 4602 }, { "epoch": 0.4271925754060325, "grad_norm": 35.636756896972656, "learning_rate": 6.590839950698422e-06, "loss": 24.9695, "step": 4603 }, { "epoch": 0.42728538283062645, "grad_norm": 40.7528190612793, "learning_rate": 6.589385041019705e-06, "loss": 24.4515, "step": 4604 }, { "epoch": 0.4273781902552204, "grad_norm": 41.906944274902344, "learning_rate": 6.587929981624954e-06, "loss": 26.2624, "step": 4605 }, { "epoch": 0.4274709976798144, "grad_norm": 39.82648849487305, "learning_rate": 6.5864747726512344e-06, "loss": 25.298, "step": 4606 }, { "epoch": 0.42756380510440833, "grad_norm": 43.524845123291016, "learning_rate": 6.585019414235624e-06, "loss": 23.9515, "step": 4607 }, { "epoch": 0.42765661252900233, "grad_norm": 39.23200607299805, "learning_rate": 6.5835639065152104e-06, "loss": 25.3614, "step": 4608 }, { "epoch": 0.4277494199535963, "grad_norm": 40.6832389831543, "learning_rate": 6.5821082496271015e-06, "loss": 23.0378, "step": 4609 }, { "epoch": 0.4278422273781903, "grad_norm": 34.667049407958984, "learning_rate": 6.580652443708416e-06, "loss": 22.3958, "step": 4610 }, { "epoch": 0.4279350348027842, "grad_norm": 39.00730514526367, "learning_rate": 6.579196488896287e-06, "loss": 22.7043, "step": 4611 }, { "epoch": 0.42802784222737816, "grad_norm": 35.67377471923828, "learning_rate": 6.577740385327861e-06, "loss": 23.3224, "step": 4612 }, { "epoch": 0.42812064965197216, "grad_norm": 37.16350555419922, "learning_rate": 6.576284133140301e-06, "loss": 22.9017, "step": 4613 }, { "epoch": 0.4282134570765661, "grad_norm": 38.77599334716797, "learning_rate": 6.574827732470779e-06, "loss": 24.9971, "step": 4614 }, { "epoch": 0.4283062645011601, "grad_norm": 37.93790817260742, "learning_rate": 6.573371183456487e-06, "loss": 21.6503, "step": 4615 }, { "epoch": 0.42839907192575405, "grad_norm": 40.0390625, "learning_rate": 6.571914486234628e-06, "loss": 23.6202, "step": 4616 }, { "epoch": 0.42849187935034805, "grad_norm": 57.2356071472168, "learning_rate": 6.570457640942418e-06, "loss": 23.8821, "step": 4617 }, { "epoch": 0.428584686774942, "grad_norm": 39.923831939697266, "learning_rate": 6.56900064771709e-06, "loss": 24.1952, "step": 4618 }, { "epoch": 0.42867749419953594, "grad_norm": 55.06513977050781, "learning_rate": 6.567543506695888e-06, "loss": 23.6, "step": 4619 }, { "epoch": 0.42877030162412993, "grad_norm": 47.83948516845703, "learning_rate": 6.56608621801607e-06, "loss": 24.8354, "step": 4620 }, { "epoch": 0.4288631090487239, "grad_norm": 42.57630920410156, "learning_rate": 6.56462878181491e-06, "loss": 23.3039, "step": 4621 }, { "epoch": 0.4289559164733179, "grad_norm": 37.711727142333984, "learning_rate": 6.563171198229694e-06, "loss": 23.748, "step": 4622 }, { "epoch": 0.4290487238979118, "grad_norm": 43.85701370239258, "learning_rate": 6.561713467397724e-06, "loss": 22.2377, "step": 4623 }, { "epoch": 0.4291415313225058, "grad_norm": 35.196754455566406, "learning_rate": 6.560255589456312e-06, "loss": 23.446, "step": 4624 }, { "epoch": 0.42923433874709976, "grad_norm": 38.52943801879883, "learning_rate": 6.558797564542791e-06, "loss": 24.563, "step": 4625 }, { "epoch": 0.42932714617169376, "grad_norm": 37.083885192871094, "learning_rate": 6.5573393927945e-06, "loss": 21.6872, "step": 4626 }, { "epoch": 0.4294199535962877, "grad_norm": 45.31938171386719, "learning_rate": 6.555881074348793e-06, "loss": 23.7082, "step": 4627 }, { "epoch": 0.42951276102088165, "grad_norm": 37.638816833496094, "learning_rate": 6.554422609343044e-06, "loss": 23.5353, "step": 4628 }, { "epoch": 0.42960556844547565, "grad_norm": 34.455474853515625, "learning_rate": 6.552963997914635e-06, "loss": 23.8022, "step": 4629 }, { "epoch": 0.4296983758700696, "grad_norm": 37.509490966796875, "learning_rate": 6.551505240200963e-06, "loss": 23.0384, "step": 4630 }, { "epoch": 0.4297911832946636, "grad_norm": 37.191131591796875, "learning_rate": 6.55004633633944e-06, "loss": 23.9115, "step": 4631 }, { "epoch": 0.42988399071925754, "grad_norm": 40.85862731933594, "learning_rate": 6.548587286467491e-06, "loss": 24.4106, "step": 4632 }, { "epoch": 0.42997679814385154, "grad_norm": 41.117794036865234, "learning_rate": 6.547128090722554e-06, "loss": 24.1091, "step": 4633 }, { "epoch": 0.4300696055684455, "grad_norm": 36.52669143676758, "learning_rate": 6.5456687492420844e-06, "loss": 23.6562, "step": 4634 }, { "epoch": 0.4301624129930394, "grad_norm": 39.200435638427734, "learning_rate": 6.544209262163544e-06, "loss": 24.5749, "step": 4635 }, { "epoch": 0.4302552204176334, "grad_norm": 36.92396545410156, "learning_rate": 6.542749629624416e-06, "loss": 24.4937, "step": 4636 }, { "epoch": 0.43034802784222737, "grad_norm": 39.68750762939453, "learning_rate": 6.541289851762193e-06, "loss": 24.4169, "step": 4637 }, { "epoch": 0.43044083526682136, "grad_norm": 38.44822692871094, "learning_rate": 6.539829928714383e-06, "loss": 22.3023, "step": 4638 }, { "epoch": 0.4305336426914153, "grad_norm": 38.607765197753906, "learning_rate": 6.5383698606185055e-06, "loss": 24.1356, "step": 4639 }, { "epoch": 0.4306264501160093, "grad_norm": 38.28676986694336, "learning_rate": 6.536909647612096e-06, "loss": 22.4309, "step": 4640 }, { "epoch": 0.43071925754060325, "grad_norm": 38.83760070800781, "learning_rate": 6.535449289832703e-06, "loss": 23.2524, "step": 4641 }, { "epoch": 0.4308120649651972, "grad_norm": 37.315040588378906, "learning_rate": 6.53398878741789e-06, "loss": 20.5307, "step": 4642 }, { "epoch": 0.4309048723897912, "grad_norm": 36.61212921142578, "learning_rate": 6.5325281405052295e-06, "loss": 21.6942, "step": 4643 }, { "epoch": 0.43099767981438514, "grad_norm": 37.053897857666016, "learning_rate": 6.531067349232314e-06, "loss": 23.5128, "step": 4644 }, { "epoch": 0.43109048723897914, "grad_norm": 41.60576629638672, "learning_rate": 6.529606413736742e-06, "loss": 23.2283, "step": 4645 }, { "epoch": 0.4311832946635731, "grad_norm": 38.3858642578125, "learning_rate": 6.528145334156135e-06, "loss": 24.1228, "step": 4646 }, { "epoch": 0.4312761020881671, "grad_norm": 38.33317184448242, "learning_rate": 6.5266841106281205e-06, "loss": 24.3262, "step": 4647 }, { "epoch": 0.431368909512761, "grad_norm": 42.326324462890625, "learning_rate": 6.525222743290341e-06, "loss": 23.2491, "step": 4648 }, { "epoch": 0.43146171693735497, "grad_norm": 39.3587532043457, "learning_rate": 6.523761232280455e-06, "loss": 23.3227, "step": 4649 }, { "epoch": 0.43155452436194897, "grad_norm": 33.39513397216797, "learning_rate": 6.522299577736133e-06, "loss": 21.9787, "step": 4650 }, { "epoch": 0.4316473317865429, "grad_norm": 39.699745178222656, "learning_rate": 6.520837779795059e-06, "loss": 25.1608, "step": 4651 }, { "epoch": 0.4317401392111369, "grad_norm": 37.167057037353516, "learning_rate": 6.519375838594933e-06, "loss": 22.9681, "step": 4652 }, { "epoch": 0.43183294663573085, "grad_norm": 38.251888275146484, "learning_rate": 6.517913754273461e-06, "loss": 24.4312, "step": 4653 }, { "epoch": 0.43192575406032485, "grad_norm": 35.07157897949219, "learning_rate": 6.51645152696837e-06, "loss": 24.2344, "step": 4654 }, { "epoch": 0.4320185614849188, "grad_norm": 40.640235900878906, "learning_rate": 6.514989156817401e-06, "loss": 25.3469, "step": 4655 }, { "epoch": 0.43211136890951274, "grad_norm": 49.51832580566406, "learning_rate": 6.5135266439583015e-06, "loss": 21.1535, "step": 4656 }, { "epoch": 0.43220417633410674, "grad_norm": 47.3004150390625, "learning_rate": 6.512063988528838e-06, "loss": 24.5963, "step": 4657 }, { "epoch": 0.4322969837587007, "grad_norm": 38.29965591430664, "learning_rate": 6.51060119066679e-06, "loss": 23.3748, "step": 4658 }, { "epoch": 0.4323897911832947, "grad_norm": 34.72114944458008, "learning_rate": 6.5091382505099466e-06, "loss": 23.1667, "step": 4659 }, { "epoch": 0.4324825986078886, "grad_norm": 37.04323959350586, "learning_rate": 6.507675168196116e-06, "loss": 23.1102, "step": 4660 }, { "epoch": 0.4325754060324826, "grad_norm": 39.56900405883789, "learning_rate": 6.506211943863117e-06, "loss": 23.8259, "step": 4661 }, { "epoch": 0.43266821345707657, "grad_norm": 35.7401237487793, "learning_rate": 6.504748577648777e-06, "loss": 24.4959, "step": 4662 }, { "epoch": 0.4327610208816705, "grad_norm": 37.34291458129883, "learning_rate": 6.503285069690947e-06, "loss": 24.1656, "step": 4663 }, { "epoch": 0.4328538283062645, "grad_norm": 33.67744827270508, "learning_rate": 6.501821420127483e-06, "loss": 22.8727, "step": 4664 }, { "epoch": 0.43294663573085845, "grad_norm": 36.982818603515625, "learning_rate": 6.500357629096257e-06, "loss": 22.308, "step": 4665 }, { "epoch": 0.43303944315545245, "grad_norm": 34.56925964355469, "learning_rate": 6.498893696735155e-06, "loss": 20.9059, "step": 4666 }, { "epoch": 0.4331322505800464, "grad_norm": 36.549922943115234, "learning_rate": 6.497429623182076e-06, "loss": 23.5962, "step": 4667 }, { "epoch": 0.4332250580046404, "grad_norm": 36.320945739746094, "learning_rate": 6.495965408574929e-06, "loss": 21.94, "step": 4668 }, { "epoch": 0.43331786542923434, "grad_norm": 43.05377197265625, "learning_rate": 6.494501053051645e-06, "loss": 22.375, "step": 4669 }, { "epoch": 0.4334106728538283, "grad_norm": 47.86903762817383, "learning_rate": 6.493036556750158e-06, "loss": 22.6028, "step": 4670 }, { "epoch": 0.4335034802784223, "grad_norm": 38.13991928100586, "learning_rate": 6.491571919808421e-06, "loss": 22.8189, "step": 4671 }, { "epoch": 0.4335962877030162, "grad_norm": 35.84099197387695, "learning_rate": 6.4901071423644e-06, "loss": 23.8332, "step": 4672 }, { "epoch": 0.4336890951276102, "grad_norm": 42.36701202392578, "learning_rate": 6.488642224556072e-06, "loss": 23.2211, "step": 4673 }, { "epoch": 0.43378190255220417, "grad_norm": 44.778343200683594, "learning_rate": 6.48717716652143e-06, "loss": 25.0484, "step": 4674 }, { "epoch": 0.43387470997679817, "grad_norm": 34.237648010253906, "learning_rate": 6.485711968398477e-06, "loss": 25.8826, "step": 4675 }, { "epoch": 0.4339675174013921, "grad_norm": 36.84577178955078, "learning_rate": 6.484246630325232e-06, "loss": 20.8611, "step": 4676 }, { "epoch": 0.43406032482598605, "grad_norm": 38.78772735595703, "learning_rate": 6.482781152439728e-06, "loss": 24.871, "step": 4677 }, { "epoch": 0.43415313225058005, "grad_norm": 39.407100677490234, "learning_rate": 6.481315534880004e-06, "loss": 23.4499, "step": 4678 }, { "epoch": 0.434245939675174, "grad_norm": 39.091583251953125, "learning_rate": 6.479849777784122e-06, "loss": 25.1645, "step": 4679 }, { "epoch": 0.434338747099768, "grad_norm": 36.747802734375, "learning_rate": 6.478383881290152e-06, "loss": 23.3286, "step": 4680 }, { "epoch": 0.43443155452436194, "grad_norm": 38.60942459106445, "learning_rate": 6.476917845536176e-06, "loss": 22.8282, "step": 4681 }, { "epoch": 0.43452436194895594, "grad_norm": 36.49805450439453, "learning_rate": 6.475451670660292e-06, "loss": 23.3387, "step": 4682 }, { "epoch": 0.4346171693735499, "grad_norm": 38.28752136230469, "learning_rate": 6.4739853568006105e-06, "loss": 24.3036, "step": 4683 }, { "epoch": 0.4347099767981438, "grad_norm": 46.13259506225586, "learning_rate": 6.472518904095253e-06, "loss": 23.4009, "step": 4684 }, { "epoch": 0.4348027842227378, "grad_norm": 36.8979377746582, "learning_rate": 6.4710523126823574e-06, "loss": 24.1687, "step": 4685 }, { "epoch": 0.43489559164733177, "grad_norm": 40.23602294921875, "learning_rate": 6.469585582700072e-06, "loss": 23.567, "step": 4686 }, { "epoch": 0.43498839907192577, "grad_norm": 35.82912826538086, "learning_rate": 6.468118714286558e-06, "loss": 23.4825, "step": 4687 }, { "epoch": 0.4350812064965197, "grad_norm": 34.308494567871094, "learning_rate": 6.466651707579992e-06, "loss": 21.564, "step": 4688 }, { "epoch": 0.4351740139211137, "grad_norm": 44.03037643432617, "learning_rate": 6.465184562718563e-06, "loss": 25.5845, "step": 4689 }, { "epoch": 0.43526682134570766, "grad_norm": 37.381526947021484, "learning_rate": 6.4637172798404694e-06, "loss": 23.1001, "step": 4690 }, { "epoch": 0.4353596287703016, "grad_norm": 38.50367736816406, "learning_rate": 6.4622498590839285e-06, "loss": 22.9009, "step": 4691 }, { "epoch": 0.4354524361948956, "grad_norm": 35.097694396972656, "learning_rate": 6.460782300587166e-06, "loss": 26.0299, "step": 4692 }, { "epoch": 0.43554524361948954, "grad_norm": 42.41464614868164, "learning_rate": 6.459314604488424e-06, "loss": 23.5974, "step": 4693 }, { "epoch": 0.43563805104408354, "grad_norm": 39.26084518432617, "learning_rate": 6.457846770925953e-06, "loss": 24.3276, "step": 4694 }, { "epoch": 0.4357308584686775, "grad_norm": 39.78727722167969, "learning_rate": 6.4563788000380215e-06, "loss": 22.6485, "step": 4695 }, { "epoch": 0.4358236658932715, "grad_norm": 32.42485427856445, "learning_rate": 6.454910691962907e-06, "loss": 22.7207, "step": 4696 }, { "epoch": 0.4359164733178654, "grad_norm": 35.94852828979492, "learning_rate": 6.453442446838902e-06, "loss": 22.7426, "step": 4697 }, { "epoch": 0.43600928074245937, "grad_norm": 37.26069259643555, "learning_rate": 6.451974064804313e-06, "loss": 24.6972, "step": 4698 }, { "epoch": 0.43610208816705337, "grad_norm": 34.95133972167969, "learning_rate": 6.450505545997456e-06, "loss": 23.6408, "step": 4699 }, { "epoch": 0.4361948955916473, "grad_norm": 36.62559509277344, "learning_rate": 6.449036890556663e-06, "loss": 23.7955, "step": 4700 }, { "epoch": 0.4362877030162413, "grad_norm": 37.727745056152344, "learning_rate": 6.447568098620276e-06, "loss": 22.8371, "step": 4701 }, { "epoch": 0.43638051044083526, "grad_norm": 35.546722412109375, "learning_rate": 6.446099170326654e-06, "loss": 23.105, "step": 4702 }, { "epoch": 0.43647331786542926, "grad_norm": 36.42000198364258, "learning_rate": 6.4446301058141625e-06, "loss": 23.709, "step": 4703 }, { "epoch": 0.4365661252900232, "grad_norm": 34.54533386230469, "learning_rate": 6.443160905221188e-06, "loss": 22.6328, "step": 4704 }, { "epoch": 0.43665893271461714, "grad_norm": 45.26517868041992, "learning_rate": 6.4416915686861225e-06, "loss": 24.5989, "step": 4705 }, { "epoch": 0.43675174013921114, "grad_norm": 44.46525192260742, "learning_rate": 6.440222096347374e-06, "loss": 25.136, "step": 4706 }, { "epoch": 0.4368445475638051, "grad_norm": 34.00661087036133, "learning_rate": 6.438752488343364e-06, "loss": 22.2345, "step": 4707 }, { "epoch": 0.4369373549883991, "grad_norm": 172.63339233398438, "learning_rate": 6.437282744812526e-06, "loss": 22.4058, "step": 4708 }, { "epoch": 0.43703016241299303, "grad_norm": 149.09068298339844, "learning_rate": 6.435812865893306e-06, "loss": 24.0867, "step": 4709 }, { "epoch": 0.437122969837587, "grad_norm": 39.792354583740234, "learning_rate": 6.4343428517241616e-06, "loss": 22.628, "step": 4710 }, { "epoch": 0.43721577726218097, "grad_norm": 37.71266174316406, "learning_rate": 6.432872702443566e-06, "loss": 25.4405, "step": 4711 }, { "epoch": 0.4373085846867749, "grad_norm": 38.03337478637695, "learning_rate": 6.431402418190002e-06, "loss": 22.8669, "step": 4712 }, { "epoch": 0.4374013921113689, "grad_norm": 38.299774169921875, "learning_rate": 6.429931999101968e-06, "loss": 23.6503, "step": 4713 }, { "epoch": 0.43749419953596286, "grad_norm": 40.85523986816406, "learning_rate": 6.428461445317973e-06, "loss": 25.2645, "step": 4714 }, { "epoch": 0.43758700696055686, "grad_norm": 41.0794563293457, "learning_rate": 6.42699075697654e-06, "loss": 23.3759, "step": 4715 }, { "epoch": 0.4376798143851508, "grad_norm": 37.41604995727539, "learning_rate": 6.425519934216204e-06, "loss": 25.1977, "step": 4716 }, { "epoch": 0.4377726218097448, "grad_norm": 40.108699798583984, "learning_rate": 6.424048977175512e-06, "loss": 25.6338, "step": 4717 }, { "epoch": 0.43786542923433874, "grad_norm": 37.00652313232422, "learning_rate": 6.4225778859930244e-06, "loss": 23.671, "step": 4718 }, { "epoch": 0.4379582366589327, "grad_norm": 36.588260650634766, "learning_rate": 6.421106660807315e-06, "loss": 23.1621, "step": 4719 }, { "epoch": 0.4380510440835267, "grad_norm": 36.341793060302734, "learning_rate": 6.41963530175697e-06, "loss": 22.8197, "step": 4720 }, { "epoch": 0.43814385150812063, "grad_norm": 39.72285461425781, "learning_rate": 6.418163808980586e-06, "loss": 23.792, "step": 4721 }, { "epoch": 0.43823665893271463, "grad_norm": 37.88060760498047, "learning_rate": 6.416692182616775e-06, "loss": 23.1295, "step": 4722 }, { "epoch": 0.4383294663573086, "grad_norm": 38.438262939453125, "learning_rate": 6.415220422804162e-06, "loss": 22.1471, "step": 4723 }, { "epoch": 0.43842227378190257, "grad_norm": 40.91181564331055, "learning_rate": 6.41374852968138e-06, "loss": 22.8623, "step": 4724 }, { "epoch": 0.4385150812064965, "grad_norm": 37.327674865722656, "learning_rate": 6.412276503387077e-06, "loss": 22.1325, "step": 4725 }, { "epoch": 0.4386078886310905, "grad_norm": 37.13454818725586, "learning_rate": 6.4108043440599184e-06, "loss": 23.0406, "step": 4726 }, { "epoch": 0.43870069605568446, "grad_norm": 33.87841796875, "learning_rate": 6.409332051838575e-06, "loss": 23.6276, "step": 4727 }, { "epoch": 0.4387935034802784, "grad_norm": 36.3358154296875, "learning_rate": 6.407859626861734e-06, "loss": 23.7714, "step": 4728 }, { "epoch": 0.4388863109048724, "grad_norm": 34.86088562011719, "learning_rate": 6.406387069268092e-06, "loss": 23.3772, "step": 4729 }, { "epoch": 0.43897911832946634, "grad_norm": 38.709228515625, "learning_rate": 6.4049143791963635e-06, "loss": 22.6704, "step": 4730 }, { "epoch": 0.43907192575406034, "grad_norm": 39.0450325012207, "learning_rate": 6.403441556785271e-06, "loss": 22.5408, "step": 4731 }, { "epoch": 0.4391647331786543, "grad_norm": 36.77108383178711, "learning_rate": 6.401968602173547e-06, "loss": 24.0383, "step": 4732 }, { "epoch": 0.4392575406032483, "grad_norm": 36.539371490478516, "learning_rate": 6.400495515499945e-06, "loss": 25.1182, "step": 4733 }, { "epoch": 0.43935034802784223, "grad_norm": 34.74919891357422, "learning_rate": 6.399022296903225e-06, "loss": 23.8852, "step": 4734 }, { "epoch": 0.4394431554524362, "grad_norm": 33.942020416259766, "learning_rate": 6.397548946522157e-06, "loss": 23.5993, "step": 4735 }, { "epoch": 0.4395359628770302, "grad_norm": 40.96925354003906, "learning_rate": 6.3960754644955325e-06, "loss": 23.4006, "step": 4736 }, { "epoch": 0.4396287703016241, "grad_norm": 36.6292610168457, "learning_rate": 6.394601850962143e-06, "loss": 23.2862, "step": 4737 }, { "epoch": 0.4397215777262181, "grad_norm": 38.56114959716797, "learning_rate": 6.393128106060807e-06, "loss": 24.2055, "step": 4738 }, { "epoch": 0.43981438515081206, "grad_norm": 39.175228118896484, "learning_rate": 6.391654229930341e-06, "loss": 23.8277, "step": 4739 }, { "epoch": 0.43990719257540606, "grad_norm": 37.38246154785156, "learning_rate": 6.390180222709583e-06, "loss": 22.9911, "step": 4740 }, { "epoch": 0.44, "grad_norm": 33.66340637207031, "learning_rate": 6.388706084537381e-06, "loss": 24.8376, "step": 4741 }, { "epoch": 0.44009280742459395, "grad_norm": 37.48754119873047, "learning_rate": 6.3872318155525945e-06, "loss": 24.2327, "step": 4742 }, { "epoch": 0.44018561484918795, "grad_norm": 37.510684967041016, "learning_rate": 6.385757415894096e-06, "loss": 23.4444, "step": 4743 }, { "epoch": 0.4402784222737819, "grad_norm": 36.44230651855469, "learning_rate": 6.384282885700771e-06, "loss": 25.0727, "step": 4744 }, { "epoch": 0.4403712296983759, "grad_norm": 35.5988883972168, "learning_rate": 6.382808225111514e-06, "loss": 22.9552, "step": 4745 }, { "epoch": 0.44046403712296983, "grad_norm": 35.76865768432617, "learning_rate": 6.3813334342652375e-06, "loss": 23.6324, "step": 4746 }, { "epoch": 0.44055684454756383, "grad_norm": 37.5621337890625, "learning_rate": 6.3798585133008605e-06, "loss": 23.6114, "step": 4747 }, { "epoch": 0.4406496519721578, "grad_norm": 38.31793212890625, "learning_rate": 6.3783834623573184e-06, "loss": 23.9768, "step": 4748 }, { "epoch": 0.4407424593967517, "grad_norm": 38.17453384399414, "learning_rate": 6.376908281573556e-06, "loss": 24.5201, "step": 4749 }, { "epoch": 0.4408352668213457, "grad_norm": 44.59427261352539, "learning_rate": 6.375432971088533e-06, "loss": 24.0586, "step": 4750 }, { "epoch": 0.44092807424593966, "grad_norm": 39.02895736694336, "learning_rate": 6.37395753104122e-06, "loss": 25.1312, "step": 4751 }, { "epoch": 0.44102088167053366, "grad_norm": 40.448062896728516, "learning_rate": 6.372481961570597e-06, "loss": 23.675, "step": 4752 }, { "epoch": 0.4411136890951276, "grad_norm": 39.807945251464844, "learning_rate": 6.371006262815662e-06, "loss": 22.6818, "step": 4753 }, { "epoch": 0.4412064965197216, "grad_norm": 40.065799713134766, "learning_rate": 6.36953043491542e-06, "loss": 22.9943, "step": 4754 }, { "epoch": 0.44129930394431555, "grad_norm": 35.48535919189453, "learning_rate": 6.368054478008892e-06, "loss": 22.198, "step": 4755 }, { "epoch": 0.4413921113689095, "grad_norm": 33.42097473144531, "learning_rate": 6.366578392235107e-06, "loss": 22.6705, "step": 4756 }, { "epoch": 0.4414849187935035, "grad_norm": 39.583038330078125, "learning_rate": 6.365102177733111e-06, "loss": 24.5558, "step": 4757 }, { "epoch": 0.44157772621809743, "grad_norm": 38.9319953918457, "learning_rate": 6.3636258346419585e-06, "loss": 24.0025, "step": 4758 }, { "epoch": 0.44167053364269143, "grad_norm": 38.10943603515625, "learning_rate": 6.362149363100718e-06, "loss": 22.8681, "step": 4759 }, { "epoch": 0.4417633410672854, "grad_norm": 36.42873764038086, "learning_rate": 6.360672763248468e-06, "loss": 22.7516, "step": 4760 }, { "epoch": 0.4418561484918794, "grad_norm": 35.671058654785156, "learning_rate": 6.359196035224301e-06, "loss": 22.1132, "step": 4761 }, { "epoch": 0.4419489559164733, "grad_norm": 39.245384216308594, "learning_rate": 6.357719179167322e-06, "loss": 23.3868, "step": 4762 }, { "epoch": 0.44204176334106726, "grad_norm": 36.69807434082031, "learning_rate": 6.3562421952166466e-06, "loss": 24.5729, "step": 4763 }, { "epoch": 0.44213457076566126, "grad_norm": 35.613365173339844, "learning_rate": 6.3547650835114014e-06, "loss": 25.7137, "step": 4764 }, { "epoch": 0.4422273781902552, "grad_norm": 35.796783447265625, "learning_rate": 6.353287844190728e-06, "loss": 23.9627, "step": 4765 }, { "epoch": 0.4423201856148492, "grad_norm": 37.61423873901367, "learning_rate": 6.35181047739378e-06, "loss": 22.7359, "step": 4766 }, { "epoch": 0.44241299303944315, "grad_norm": 48.67775344848633, "learning_rate": 6.350332983259718e-06, "loss": 24.3271, "step": 4767 }, { "epoch": 0.44250580046403715, "grad_norm": 34.757171630859375, "learning_rate": 6.348855361927723e-06, "loss": 22.3894, "step": 4768 }, { "epoch": 0.4425986078886311, "grad_norm": 40.9605712890625, "learning_rate": 6.347377613536978e-06, "loss": 23.4437, "step": 4769 }, { "epoch": 0.44269141531322503, "grad_norm": 39.48770523071289, "learning_rate": 6.3458997382266865e-06, "loss": 21.652, "step": 4770 }, { "epoch": 0.44278422273781903, "grad_norm": 35.06171798706055, "learning_rate": 6.34442173613606e-06, "loss": 24.1636, "step": 4771 }, { "epoch": 0.442877030162413, "grad_norm": 36.3773307800293, "learning_rate": 6.342943607404321e-06, "loss": 23.9012, "step": 4772 }, { "epoch": 0.442969837587007, "grad_norm": 40.022430419921875, "learning_rate": 6.341465352170706e-06, "loss": 23.4194, "step": 4773 }, { "epoch": 0.4430626450116009, "grad_norm": 153.1962890625, "learning_rate": 6.339986970574466e-06, "loss": 24.204, "step": 4774 }, { "epoch": 0.4431554524361949, "grad_norm": 41.48257827758789, "learning_rate": 6.338508462754858e-06, "loss": 23.8104, "step": 4775 }, { "epoch": 0.44324825986078886, "grad_norm": 38.51427459716797, "learning_rate": 6.337029828851151e-06, "loss": 24.0931, "step": 4776 }, { "epoch": 0.4433410672853828, "grad_norm": 36.60517501831055, "learning_rate": 6.3355510690026345e-06, "loss": 23.3678, "step": 4777 }, { "epoch": 0.4434338747099768, "grad_norm": 37.27635192871094, "learning_rate": 6.334072183348601e-06, "loss": 24.9761, "step": 4778 }, { "epoch": 0.44352668213457075, "grad_norm": 39.1104850769043, "learning_rate": 6.3325931720283565e-06, "loss": 24.9983, "step": 4779 }, { "epoch": 0.44361948955916475, "grad_norm": 37.78236389160156, "learning_rate": 6.3311140351812215e-06, "loss": 23.2992, "step": 4780 }, { "epoch": 0.4437122969837587, "grad_norm": 37.48663330078125, "learning_rate": 6.329634772946528e-06, "loss": 23.8785, "step": 4781 }, { "epoch": 0.4438051044083527, "grad_norm": 40.11974334716797, "learning_rate": 6.328155385463616e-06, "loss": 24.2084, "step": 4782 }, { "epoch": 0.44389791183294663, "grad_norm": 42.336429595947266, "learning_rate": 6.326675872871842e-06, "loss": 24.2481, "step": 4783 }, { "epoch": 0.4439907192575406, "grad_norm": 36.200138092041016, "learning_rate": 6.3251962353105714e-06, "loss": 21.8706, "step": 4784 }, { "epoch": 0.4440835266821346, "grad_norm": 36.965728759765625, "learning_rate": 6.323716472919185e-06, "loss": 22.2889, "step": 4785 }, { "epoch": 0.4441763341067285, "grad_norm": 51.802974700927734, "learning_rate": 6.322236585837067e-06, "loss": 26.2611, "step": 4786 }, { "epoch": 0.4442691415313225, "grad_norm": 35.85424041748047, "learning_rate": 6.320756574203626e-06, "loss": 22.6761, "step": 4787 }, { "epoch": 0.44436194895591646, "grad_norm": 38.755645751953125, "learning_rate": 6.319276438158271e-06, "loss": 23.8361, "step": 4788 }, { "epoch": 0.44445475638051046, "grad_norm": 36.341041564941406, "learning_rate": 6.317796177840426e-06, "loss": 23.5963, "step": 4789 }, { "epoch": 0.4445475638051044, "grad_norm": 35.52820587158203, "learning_rate": 6.316315793389531e-06, "loss": 20.8315, "step": 4790 }, { "epoch": 0.44464037122969835, "grad_norm": 38.489097595214844, "learning_rate": 6.3148352849450336e-06, "loss": 24.0371, "step": 4791 }, { "epoch": 0.44473317865429235, "grad_norm": 51.27794647216797, "learning_rate": 6.313354652646392e-06, "loss": 23.6956, "step": 4792 }, { "epoch": 0.4448259860788863, "grad_norm": 38.010765075683594, "learning_rate": 6.311873896633081e-06, "loss": 23.747, "step": 4793 }, { "epoch": 0.4449187935034803, "grad_norm": 34.99460983276367, "learning_rate": 6.310393017044581e-06, "loss": 23.8239, "step": 4794 }, { "epoch": 0.44501160092807424, "grad_norm": 34.0656852722168, "learning_rate": 6.3089120140203885e-06, "loss": 27.1757, "step": 4795 }, { "epoch": 0.44510440835266823, "grad_norm": 37.999549865722656, "learning_rate": 6.3074308877000125e-06, "loss": 23.4637, "step": 4796 }, { "epoch": 0.4451972157772622, "grad_norm": 37.21942901611328, "learning_rate": 6.305949638222967e-06, "loss": 22.7546, "step": 4797 }, { "epoch": 0.4452900232018561, "grad_norm": 37.542152404785156, "learning_rate": 6.3044682657287845e-06, "loss": 24.1174, "step": 4798 }, { "epoch": 0.4453828306264501, "grad_norm": 39.267112731933594, "learning_rate": 6.3029867703570055e-06, "loss": 24.2103, "step": 4799 }, { "epoch": 0.44547563805104406, "grad_norm": 39.01575469970703, "learning_rate": 6.301505152247185e-06, "loss": 22.4785, "step": 4800 }, { "epoch": 0.44556844547563806, "grad_norm": 37.625606536865234, "learning_rate": 6.300023411538887e-06, "loss": 23.2607, "step": 4801 }, { "epoch": 0.445661252900232, "grad_norm": 35.09849548339844, "learning_rate": 6.298541548371684e-06, "loss": 23.9962, "step": 4802 }, { "epoch": 0.445754060324826, "grad_norm": 50.29370880126953, "learning_rate": 6.29705956288517e-06, "loss": 23.4165, "step": 4803 }, { "epoch": 0.44584686774941995, "grad_norm": 38.273597717285156, "learning_rate": 6.29557745521894e-06, "loss": 22.2698, "step": 4804 }, { "epoch": 0.4459396751740139, "grad_norm": 38.30338668823242, "learning_rate": 6.294095225512604e-06, "loss": 24.453, "step": 4805 }, { "epoch": 0.4460324825986079, "grad_norm": 34.95662307739258, "learning_rate": 6.2926128739057875e-06, "loss": 25.8094, "step": 4806 }, { "epoch": 0.44612529002320184, "grad_norm": 35.283321380615234, "learning_rate": 6.291130400538123e-06, "loss": 23.6824, "step": 4807 }, { "epoch": 0.44621809744779584, "grad_norm": 55.58103561401367, "learning_rate": 6.289647805549256e-06, "loss": 22.8018, "step": 4808 }, { "epoch": 0.4463109048723898, "grad_norm": 33.61968994140625, "learning_rate": 6.288165089078843e-06, "loss": 22.5768, "step": 4809 }, { "epoch": 0.4464037122969838, "grad_norm": 46.35604476928711, "learning_rate": 6.2866822512665515e-06, "loss": 23.1813, "step": 4810 }, { "epoch": 0.4464965197215777, "grad_norm": 38.38247299194336, "learning_rate": 6.28519929225206e-06, "loss": 24.4064, "step": 4811 }, { "epoch": 0.44658932714617167, "grad_norm": 37.552894592285156, "learning_rate": 6.283716212175062e-06, "loss": 24.3365, "step": 4812 }, { "epoch": 0.44668213457076567, "grad_norm": 43.53669357299805, "learning_rate": 6.28223301117526e-06, "loss": 24.6375, "step": 4813 }, { "epoch": 0.4467749419953596, "grad_norm": 34.840335845947266, "learning_rate": 6.280749689392365e-06, "loss": 24.5706, "step": 4814 }, { "epoch": 0.4468677494199536, "grad_norm": 55.9781608581543, "learning_rate": 6.279266246966104e-06, "loss": 23.8078, "step": 4815 }, { "epoch": 0.44696055684454755, "grad_norm": 35.33032989501953, "learning_rate": 6.277782684036213e-06, "loss": 22.8145, "step": 4816 }, { "epoch": 0.44705336426914155, "grad_norm": 36.82421875, "learning_rate": 6.276299000742441e-06, "loss": 22.5505, "step": 4817 }, { "epoch": 0.4471461716937355, "grad_norm": 36.08983612060547, "learning_rate": 6.2748151972245455e-06, "loss": 23.0911, "step": 4818 }, { "epoch": 0.4472389791183295, "grad_norm": 36.4804573059082, "learning_rate": 6.273331273622298e-06, "loss": 23.1764, "step": 4819 }, { "epoch": 0.44733178654292344, "grad_norm": 38.8491096496582, "learning_rate": 6.271847230075481e-06, "loss": 24.372, "step": 4820 }, { "epoch": 0.4474245939675174, "grad_norm": 34.81413650512695, "learning_rate": 6.270363066723884e-06, "loss": 21.6035, "step": 4821 }, { "epoch": 0.4475174013921114, "grad_norm": 36.88469314575195, "learning_rate": 6.2688787837073174e-06, "loss": 23.8706, "step": 4822 }, { "epoch": 0.4476102088167053, "grad_norm": 39.39130783081055, "learning_rate": 6.2673943811655926e-06, "loss": 21.8292, "step": 4823 }, { "epoch": 0.4477030162412993, "grad_norm": 38.12639617919922, "learning_rate": 6.265909859238536e-06, "loss": 22.7356, "step": 4824 }, { "epoch": 0.44779582366589327, "grad_norm": 34.86381149291992, "learning_rate": 6.264425218065988e-06, "loss": 24.3146, "step": 4825 }, { "epoch": 0.44788863109048727, "grad_norm": 55.07011795043945, "learning_rate": 6.2629404577877986e-06, "loss": 23.8275, "step": 4826 }, { "epoch": 0.4479814385150812, "grad_norm": 34.722293853759766, "learning_rate": 6.261455578543827e-06, "loss": 23.534, "step": 4827 }, { "epoch": 0.44807424593967515, "grad_norm": 36.782928466796875, "learning_rate": 6.259970580473944e-06, "loss": 21.8217, "step": 4828 }, { "epoch": 0.44816705336426915, "grad_norm": 35.312339782714844, "learning_rate": 6.258485463718035e-06, "loss": 23.8533, "step": 4829 }, { "epoch": 0.4482598607888631, "grad_norm": 36.04681396484375, "learning_rate": 6.257000228415994e-06, "loss": 22.6362, "step": 4830 }, { "epoch": 0.4483526682134571, "grad_norm": 39.79847717285156, "learning_rate": 6.255514874707723e-06, "loss": 25.0083, "step": 4831 }, { "epoch": 0.44844547563805104, "grad_norm": 36.70209884643555, "learning_rate": 6.254029402733144e-06, "loss": 22.5837, "step": 4832 }, { "epoch": 0.44853828306264504, "grad_norm": 38.621822357177734, "learning_rate": 6.2525438126321805e-06, "loss": 23.8821, "step": 4833 }, { "epoch": 0.448631090487239, "grad_norm": 37.150352478027344, "learning_rate": 6.251058104544772e-06, "loss": 22.8224, "step": 4834 }, { "epoch": 0.4487238979118329, "grad_norm": 37.64788055419922, "learning_rate": 6.249572278610871e-06, "loss": 22.4726, "step": 4835 }, { "epoch": 0.4488167053364269, "grad_norm": 36.3332633972168, "learning_rate": 6.248086334970435e-06, "loss": 23.5424, "step": 4836 }, { "epoch": 0.44890951276102087, "grad_norm": 35.073055267333984, "learning_rate": 6.246600273763437e-06, "loss": 22.4319, "step": 4837 }, { "epoch": 0.44900232018561487, "grad_norm": 42.004127502441406, "learning_rate": 6.2451140951298626e-06, "loss": 24.815, "step": 4838 }, { "epoch": 0.4490951276102088, "grad_norm": 39.45820617675781, "learning_rate": 6.243627799209704e-06, "loss": 23.4434, "step": 4839 }, { "epoch": 0.4491879350348028, "grad_norm": 39.40226745605469, "learning_rate": 6.2421413861429655e-06, "loss": 24.5801, "step": 4840 }, { "epoch": 0.44928074245939675, "grad_norm": 39.12109375, "learning_rate": 6.240654856069667e-06, "loss": 24.6974, "step": 4841 }, { "epoch": 0.4493735498839907, "grad_norm": 38.69158935546875, "learning_rate": 6.239168209129832e-06, "loss": 24.9103, "step": 4842 }, { "epoch": 0.4494663573085847, "grad_norm": 37.7100944519043, "learning_rate": 6.237681445463501e-06, "loss": 23.3408, "step": 4843 }, { "epoch": 0.44955916473317864, "grad_norm": 38.96684265136719, "learning_rate": 6.2361945652107225e-06, "loss": 25.5619, "step": 4844 }, { "epoch": 0.44965197215777264, "grad_norm": 34.904640197753906, "learning_rate": 6.234707568511558e-06, "loss": 24.9976, "step": 4845 }, { "epoch": 0.4497447795823666, "grad_norm": 36.21414566040039, "learning_rate": 6.233220455506076e-06, "loss": 24.3525, "step": 4846 }, { "epoch": 0.4498375870069606, "grad_norm": 38.14988327026367, "learning_rate": 6.231733226334362e-06, "loss": 24.086, "step": 4847 }, { "epoch": 0.4499303944315545, "grad_norm": 37.26369094848633, "learning_rate": 6.230245881136509e-06, "loss": 23.2676, "step": 4848 }, { "epoch": 0.45002320185614847, "grad_norm": 36.96419906616211, "learning_rate": 6.2287584200526185e-06, "loss": 24.3298, "step": 4849 }, { "epoch": 0.45011600928074247, "grad_norm": 36.15562438964844, "learning_rate": 6.227270843222808e-06, "loss": 22.2285, "step": 4850 }, { "epoch": 0.4502088167053364, "grad_norm": 36.220211029052734, "learning_rate": 6.225783150787203e-06, "loss": 23.9259, "step": 4851 }, { "epoch": 0.4503016241299304, "grad_norm": 38.729644775390625, "learning_rate": 6.22429534288594e-06, "loss": 21.9719, "step": 4852 }, { "epoch": 0.45039443155452435, "grad_norm": 35.01749801635742, "learning_rate": 6.222807419659165e-06, "loss": 22.4133, "step": 4853 }, { "epoch": 0.45048723897911835, "grad_norm": 37.92046356201172, "learning_rate": 6.22131938124704e-06, "loss": 23.0323, "step": 4854 }, { "epoch": 0.4505800464037123, "grad_norm": 41.51595687866211, "learning_rate": 6.219831227789732e-06, "loss": 24.2028, "step": 4855 }, { "epoch": 0.45067285382830624, "grad_norm": 37.085479736328125, "learning_rate": 6.218342959427421e-06, "loss": 24.2827, "step": 4856 }, { "epoch": 0.45076566125290024, "grad_norm": 35.127723693847656, "learning_rate": 6.2168545763003e-06, "loss": 23.2337, "step": 4857 }, { "epoch": 0.4508584686774942, "grad_norm": 36.963443756103516, "learning_rate": 6.215366078548571e-06, "loss": 23.8247, "step": 4858 }, { "epoch": 0.4509512761020882, "grad_norm": 38.382572174072266, "learning_rate": 6.213877466312444e-06, "loss": 24.4901, "step": 4859 }, { "epoch": 0.4510440835266821, "grad_norm": 33.89336013793945, "learning_rate": 6.2123887397321456e-06, "loss": 23.3515, "step": 4860 }, { "epoch": 0.4511368909512761, "grad_norm": 36.43353271484375, "learning_rate": 6.210899898947908e-06, "loss": 21.2399, "step": 4861 }, { "epoch": 0.45122969837587007, "grad_norm": 35.398658752441406, "learning_rate": 6.209410944099977e-06, "loss": 22.315, "step": 4862 }, { "epoch": 0.451322505800464, "grad_norm": 37.39827346801758, "learning_rate": 6.207921875328608e-06, "loss": 24.167, "step": 4863 }, { "epoch": 0.451415313225058, "grad_norm": 37.32732009887695, "learning_rate": 6.2064326927740695e-06, "loss": 24.6283, "step": 4864 }, { "epoch": 0.45150812064965196, "grad_norm": 33.428348541259766, "learning_rate": 6.204943396576634e-06, "loss": 22.3006, "step": 4865 }, { "epoch": 0.45160092807424596, "grad_norm": 66.28421783447266, "learning_rate": 6.203453986876594e-06, "loss": 24.6272, "step": 4866 }, { "epoch": 0.4516937354988399, "grad_norm": 35.5067024230957, "learning_rate": 6.201964463814247e-06, "loss": 23.8757, "step": 4867 }, { "epoch": 0.4517865429234339, "grad_norm": 36.6301383972168, "learning_rate": 6.2004748275299e-06, "loss": 24.6007, "step": 4868 }, { "epoch": 0.45187935034802784, "grad_norm": 35.739959716796875, "learning_rate": 6.198985078163876e-06, "loss": 24.8578, "step": 4869 }, { "epoch": 0.4519721577726218, "grad_norm": 36.51668930053711, "learning_rate": 6.197495215856504e-06, "loss": 23.2734, "step": 4870 }, { "epoch": 0.4520649651972158, "grad_norm": 38.067527770996094, "learning_rate": 6.196005240748125e-06, "loss": 25.547, "step": 4871 }, { "epoch": 0.45215777262180973, "grad_norm": 38.240013122558594, "learning_rate": 6.194515152979093e-06, "loss": 23.8777, "step": 4872 }, { "epoch": 0.4522505800464037, "grad_norm": 38.23591995239258, "learning_rate": 6.193024952689767e-06, "loss": 23.0163, "step": 4873 }, { "epoch": 0.45234338747099767, "grad_norm": 34.89787673950195, "learning_rate": 6.191534640020522e-06, "loss": 22.6035, "step": 4874 }, { "epoch": 0.45243619489559167, "grad_norm": 38.676204681396484, "learning_rate": 6.190044215111743e-06, "loss": 23.8444, "step": 4875 }, { "epoch": 0.4525290023201856, "grad_norm": 37.85464859008789, "learning_rate": 6.188553678103823e-06, "loss": 24.4516, "step": 4876 }, { "epoch": 0.45262180974477956, "grad_norm": 38.034912109375, "learning_rate": 6.187063029137166e-06, "loss": 23.8072, "step": 4877 }, { "epoch": 0.45271461716937356, "grad_norm": 34.14848709106445, "learning_rate": 6.1855722683521865e-06, "loss": 20.9632, "step": 4878 }, { "epoch": 0.4528074245939675, "grad_norm": 41.69614791870117, "learning_rate": 6.1840813958893145e-06, "loss": 22.6906, "step": 4879 }, { "epoch": 0.4529002320185615, "grad_norm": 49.01316833496094, "learning_rate": 6.182590411888982e-06, "loss": 23.8178, "step": 4880 }, { "epoch": 0.45299303944315544, "grad_norm": 37.70463943481445, "learning_rate": 6.181099316491639e-06, "loss": 23.8989, "step": 4881 }, { "epoch": 0.45308584686774944, "grad_norm": 38.071136474609375, "learning_rate": 6.179608109837742e-06, "loss": 23.3132, "step": 4882 }, { "epoch": 0.4531786542923434, "grad_norm": 42.94044876098633, "learning_rate": 6.178116792067757e-06, "loss": 24.0277, "step": 4883 }, { "epoch": 0.45327146171693733, "grad_norm": 71.26327514648438, "learning_rate": 6.176625363322164e-06, "loss": 23.5052, "step": 4884 }, { "epoch": 0.45336426914153133, "grad_norm": 39.38275909423828, "learning_rate": 6.175133823741453e-06, "loss": 22.9252, "step": 4885 }, { "epoch": 0.4534570765661253, "grad_norm": 35.02621078491211, "learning_rate": 6.17364217346612e-06, "loss": 22.2379, "step": 4886 }, { "epoch": 0.45354988399071927, "grad_norm": 33.87840270996094, "learning_rate": 6.172150412636678e-06, "loss": 22.8702, "step": 4887 }, { "epoch": 0.4536426914153132, "grad_norm": 36.948123931884766, "learning_rate": 6.1706585413936445e-06, "loss": 22.7148, "step": 4888 }, { "epoch": 0.4537354988399072, "grad_norm": 7770.25, "learning_rate": 6.169166559877552e-06, "loss": 28.4337, "step": 4889 }, { "epoch": 0.45382830626450116, "grad_norm": 40.12255096435547, "learning_rate": 6.1676744682289415e-06, "loss": 22.7912, "step": 4890 }, { "epoch": 0.4539211136890951, "grad_norm": 42.63099670410156, "learning_rate": 6.166182266588361e-06, "loss": 22.8854, "step": 4891 }, { "epoch": 0.4540139211136891, "grad_norm": 41.47690200805664, "learning_rate": 6.164689955096374e-06, "loss": 22.7649, "step": 4892 }, { "epoch": 0.45410672853828304, "grad_norm": 35.671329498291016, "learning_rate": 6.163197533893555e-06, "loss": 23.9617, "step": 4893 }, { "epoch": 0.45419953596287704, "grad_norm": 42.14841842651367, "learning_rate": 6.161705003120483e-06, "loss": 22.8971, "step": 4894 }, { "epoch": 0.454292343387471, "grad_norm": 41.169639587402344, "learning_rate": 6.160212362917751e-06, "loss": 23.4716, "step": 4895 }, { "epoch": 0.454385150812065, "grad_norm": 40.9019889831543, "learning_rate": 6.158719613425964e-06, "loss": 21.8375, "step": 4896 }, { "epoch": 0.45447795823665893, "grad_norm": 34.90074920654297, "learning_rate": 6.157226754785733e-06, "loss": 21.9886, "step": 4897 }, { "epoch": 0.4545707656612529, "grad_norm": 41.08298873901367, "learning_rate": 6.155733787137682e-06, "loss": 23.6906, "step": 4898 }, { "epoch": 0.4546635730858469, "grad_norm": 39.470924377441406, "learning_rate": 6.1542407106224455e-06, "loss": 23.8398, "step": 4899 }, { "epoch": 0.4547563805104408, "grad_norm": 42.300289154052734, "learning_rate": 6.152747525380666e-06, "loss": 23.3207, "step": 4900 }, { "epoch": 0.4548491879350348, "grad_norm": 38.3836784362793, "learning_rate": 6.151254231553e-06, "loss": 26.6262, "step": 4901 }, { "epoch": 0.45494199535962876, "grad_norm": 39.0955810546875, "learning_rate": 6.1497608292801105e-06, "loss": 24.7963, "step": 4902 }, { "epoch": 0.45503480278422276, "grad_norm": 39.37175750732422, "learning_rate": 6.148267318702672e-06, "loss": 23.7381, "step": 4903 }, { "epoch": 0.4551276102088167, "grad_norm": 36.38678741455078, "learning_rate": 6.14677369996137e-06, "loss": 22.0973, "step": 4904 }, { "epoch": 0.45522041763341065, "grad_norm": 37.52760314941406, "learning_rate": 6.145279973196901e-06, "loss": 23.3181, "step": 4905 }, { "epoch": 0.45531322505800464, "grad_norm": 40.04362487792969, "learning_rate": 6.1437861385499665e-06, "loss": 23.6471, "step": 4906 }, { "epoch": 0.4554060324825986, "grad_norm": 40.20563888549805, "learning_rate": 6.142292196161285e-06, "loss": 26.2114, "step": 4907 }, { "epoch": 0.4554988399071926, "grad_norm": 35.48456573486328, "learning_rate": 6.140798146171581e-06, "loss": 24.6254, "step": 4908 }, { "epoch": 0.45559164733178653, "grad_norm": 40.793373107910156, "learning_rate": 6.139303988721591e-06, "loss": 23.9566, "step": 4909 }, { "epoch": 0.45568445475638053, "grad_norm": 38.88969039916992, "learning_rate": 6.13780972395206e-06, "loss": 23.7187, "step": 4910 }, { "epoch": 0.4557772621809745, "grad_norm": 36.34890365600586, "learning_rate": 6.1363153520037445e-06, "loss": 23.636, "step": 4911 }, { "epoch": 0.4558700696055685, "grad_norm": 38.60865783691406, "learning_rate": 6.13482087301741e-06, "loss": 24.5777, "step": 4912 }, { "epoch": 0.4559628770301624, "grad_norm": 37.329124450683594, "learning_rate": 6.133326287133832e-06, "loss": 24.6753, "step": 4913 }, { "epoch": 0.45605568445475636, "grad_norm": 35.43076705932617, "learning_rate": 6.1318315944937985e-06, "loss": 23.6763, "step": 4914 }, { "epoch": 0.45614849187935036, "grad_norm": 36.44344711303711, "learning_rate": 6.1303367952381045e-06, "loss": 24.2049, "step": 4915 }, { "epoch": 0.4562412993039443, "grad_norm": 35.28415298461914, "learning_rate": 6.128841889507558e-06, "loss": 24.4706, "step": 4916 }, { "epoch": 0.4563341067285383, "grad_norm": 39.79650115966797, "learning_rate": 6.127346877442972e-06, "loss": 24.7607, "step": 4917 }, { "epoch": 0.45642691415313225, "grad_norm": 36.507755279541016, "learning_rate": 6.125851759185175e-06, "loss": 21.4976, "step": 4918 }, { "epoch": 0.45651972157772625, "grad_norm": 39.40718078613281, "learning_rate": 6.124356534875005e-06, "loss": 23.5911, "step": 4919 }, { "epoch": 0.4566125290023202, "grad_norm": 38.71556854248047, "learning_rate": 6.122861204653304e-06, "loss": 24.4871, "step": 4920 }, { "epoch": 0.45670533642691413, "grad_norm": 37.247406005859375, "learning_rate": 6.121365768660933e-06, "loss": 25.9132, "step": 4921 }, { "epoch": 0.45679814385150813, "grad_norm": 39.84709930419922, "learning_rate": 6.119870227038755e-06, "loss": 23.4182, "step": 4922 }, { "epoch": 0.4568909512761021, "grad_norm": 36.40538024902344, "learning_rate": 6.118374579927646e-06, "loss": 23.8961, "step": 4923 }, { "epoch": 0.4569837587006961, "grad_norm": 37.5163459777832, "learning_rate": 6.116878827468495e-06, "loss": 23.5879, "step": 4924 }, { "epoch": 0.45707656612529, "grad_norm": 42.811737060546875, "learning_rate": 6.115382969802197e-06, "loss": 22.9287, "step": 4925 }, { "epoch": 0.457169373549884, "grad_norm": 38.89635467529297, "learning_rate": 6.113887007069657e-06, "loss": 21.8993, "step": 4926 }, { "epoch": 0.45726218097447796, "grad_norm": 34.05547332763672, "learning_rate": 6.1123909394117905e-06, "loss": 24.5706, "step": 4927 }, { "epoch": 0.4573549883990719, "grad_norm": 39.21468734741211, "learning_rate": 6.110894766969525e-06, "loss": 24.1724, "step": 4928 }, { "epoch": 0.4574477958236659, "grad_norm": 35.67378234863281, "learning_rate": 6.109398489883794e-06, "loss": 21.7016, "step": 4929 }, { "epoch": 0.45754060324825985, "grad_norm": 36.963584899902344, "learning_rate": 6.107902108295546e-06, "loss": 23.3749, "step": 4930 }, { "epoch": 0.45763341067285385, "grad_norm": 36.18362045288086, "learning_rate": 6.106405622345733e-06, "loss": 24.9138, "step": 4931 }, { "epoch": 0.4577262180974478, "grad_norm": 35.6588020324707, "learning_rate": 6.104909032175323e-06, "loss": 21.6718, "step": 4932 }, { "epoch": 0.4578190255220418, "grad_norm": 38.89918899536133, "learning_rate": 6.1034123379252895e-06, "loss": 23.0147, "step": 4933 }, { "epoch": 0.45791183294663573, "grad_norm": 51.9432258605957, "learning_rate": 6.1019155397366185e-06, "loss": 25.8889, "step": 4934 }, { "epoch": 0.4580046403712297, "grad_norm": 36.141082763671875, "learning_rate": 6.100418637750302e-06, "loss": 23.9524, "step": 4935 }, { "epoch": 0.4580974477958237, "grad_norm": 36.28806686401367, "learning_rate": 6.098921632107346e-06, "loss": 22.9926, "step": 4936 }, { "epoch": 0.4581902552204176, "grad_norm": 38.279666900634766, "learning_rate": 6.097424522948766e-06, "loss": 24.8315, "step": 4937 }, { "epoch": 0.4582830626450116, "grad_norm": 35.51356887817383, "learning_rate": 6.095927310415584e-06, "loss": 21.8851, "step": 4938 }, { "epoch": 0.45837587006960556, "grad_norm": 35.23210906982422, "learning_rate": 6.094429994648834e-06, "loss": 24.4245, "step": 4939 }, { "epoch": 0.45846867749419956, "grad_norm": 36.73533630371094, "learning_rate": 6.09293257578956e-06, "loss": 23.6094, "step": 4940 }, { "epoch": 0.4585614849187935, "grad_norm": 35.72649002075195, "learning_rate": 6.091435053978815e-06, "loss": 21.2421, "step": 4941 }, { "epoch": 0.45865429234338745, "grad_norm": 602.12353515625, "learning_rate": 6.08993742935766e-06, "loss": 22.6494, "step": 4942 }, { "epoch": 0.45874709976798145, "grad_norm": 49.45653533935547, "learning_rate": 6.088439702067169e-06, "loss": 24.2167, "step": 4943 }, { "epoch": 0.4588399071925754, "grad_norm": 37.73433303833008, "learning_rate": 6.086941872248424e-06, "loss": 23.4496, "step": 4944 }, { "epoch": 0.4589327146171694, "grad_norm": 34.22945785522461, "learning_rate": 6.085443940042516e-06, "loss": 22.5334, "step": 4945 }, { "epoch": 0.45902552204176333, "grad_norm": 37.259761810302734, "learning_rate": 6.083945905590548e-06, "loss": 22.2347, "step": 4946 }, { "epoch": 0.45911832946635733, "grad_norm": 35.423587799072266, "learning_rate": 6.08244776903363e-06, "loss": 23.7726, "step": 4947 }, { "epoch": 0.4592111368909513, "grad_norm": 41.08079147338867, "learning_rate": 6.080949530512882e-06, "loss": 23.9139, "step": 4948 }, { "epoch": 0.4593039443155452, "grad_norm": 37.06245040893555, "learning_rate": 6.0794511901694356e-06, "loss": 21.6656, "step": 4949 }, { "epoch": 0.4593967517401392, "grad_norm": 36.52571105957031, "learning_rate": 6.07795274814443e-06, "loss": 23.4197, "step": 4950 }, { "epoch": 0.45948955916473316, "grad_norm": 33.89413833618164, "learning_rate": 6.0764542045790144e-06, "loss": 21.6061, "step": 4951 }, { "epoch": 0.45958236658932716, "grad_norm": 36.229469299316406, "learning_rate": 6.074955559614348e-06, "loss": 22.9362, "step": 4952 }, { "epoch": 0.4596751740139211, "grad_norm": 38.7841796875, "learning_rate": 6.0734568133916e-06, "loss": 23.7418, "step": 4953 }, { "epoch": 0.4597679814385151, "grad_norm": 37.51893997192383, "learning_rate": 6.071957966051946e-06, "loss": 23.7836, "step": 4954 }, { "epoch": 0.45986078886310905, "grad_norm": 36.196495056152344, "learning_rate": 6.070459017736578e-06, "loss": 24.101, "step": 4955 }, { "epoch": 0.459953596287703, "grad_norm": 36.281490325927734, "learning_rate": 6.068959968586689e-06, "loss": 22.0651, "step": 4956 }, { "epoch": 0.460046403712297, "grad_norm": 43.66861343383789, "learning_rate": 6.067460818743488e-06, "loss": 24.8418, "step": 4957 }, { "epoch": 0.46013921113689094, "grad_norm": 34.85944747924805, "learning_rate": 6.0659615683481885e-06, "loss": 22.8871, "step": 4958 }, { "epoch": 0.46023201856148493, "grad_norm": 37.660865783691406, "learning_rate": 6.064462217542021e-06, "loss": 24.1033, "step": 4959 }, { "epoch": 0.4603248259860789, "grad_norm": 36.97013854980469, "learning_rate": 6.062962766466215e-06, "loss": 23.9568, "step": 4960 }, { "epoch": 0.4604176334106729, "grad_norm": 37.638916015625, "learning_rate": 6.061463215262018e-06, "loss": 24.1384, "step": 4961 }, { "epoch": 0.4605104408352668, "grad_norm": 38.6801643371582, "learning_rate": 6.059963564070683e-06, "loss": 23.358, "step": 4962 }, { "epoch": 0.46060324825986076, "grad_norm": 36.054317474365234, "learning_rate": 6.058463813033475e-06, "loss": 25.9235, "step": 4963 }, { "epoch": 0.46069605568445476, "grad_norm": 35.094844818115234, "learning_rate": 6.056963962291666e-06, "loss": 23.5856, "step": 4964 }, { "epoch": 0.4607888631090487, "grad_norm": 39.30080032348633, "learning_rate": 6.055464011986535e-06, "loss": 21.7692, "step": 4965 }, { "epoch": 0.4608816705336427, "grad_norm": 38.78770446777344, "learning_rate": 6.053963962259377e-06, "loss": 24.5869, "step": 4966 }, { "epoch": 0.46097447795823665, "grad_norm": 35.11830520629883, "learning_rate": 6.0524638132514925e-06, "loss": 21.8693, "step": 4967 }, { "epoch": 0.46106728538283065, "grad_norm": 39.03836441040039, "learning_rate": 6.050963565104191e-06, "loss": 24.7939, "step": 4968 }, { "epoch": 0.4611600928074246, "grad_norm": 41.571407318115234, "learning_rate": 6.049463217958792e-06, "loss": 22.0412, "step": 4969 }, { "epoch": 0.46125290023201854, "grad_norm": 40.08620071411133, "learning_rate": 6.047962771956624e-06, "loss": 23.0496, "step": 4970 }, { "epoch": 0.46134570765661254, "grad_norm": 40.96708297729492, "learning_rate": 6.0464622272390275e-06, "loss": 23.9878, "step": 4971 }, { "epoch": 0.4614385150812065, "grad_norm": 45.162879943847656, "learning_rate": 6.044961583947348e-06, "loss": 24.7433, "step": 4972 }, { "epoch": 0.4615313225058005, "grad_norm": 40.7441291809082, "learning_rate": 6.04346084222294e-06, "loss": 23.1091, "step": 4973 }, { "epoch": 0.4616241299303944, "grad_norm": 34.39943313598633, "learning_rate": 6.041960002207174e-06, "loss": 23.2561, "step": 4974 }, { "epoch": 0.4617169373549884, "grad_norm": 36.52997970581055, "learning_rate": 6.040459064041425e-06, "loss": 22.792, "step": 4975 }, { "epoch": 0.46180974477958237, "grad_norm": 40.27249526977539, "learning_rate": 6.038958027867074e-06, "loss": 24.0387, "step": 4976 }, { "epoch": 0.4619025522041763, "grad_norm": 41.963478088378906, "learning_rate": 6.037456893825517e-06, "loss": 22.8391, "step": 4977 }, { "epoch": 0.4619953596287703, "grad_norm": 44.22065353393555, "learning_rate": 6.035955662058157e-06, "loss": 23.2848, "step": 4978 }, { "epoch": 0.46208816705336425, "grad_norm": 39.5473747253418, "learning_rate": 6.034454332706407e-06, "loss": 26.2472, "step": 4979 }, { "epoch": 0.46218097447795825, "grad_norm": 42.02418518066406, "learning_rate": 6.032952905911686e-06, "loss": 22.8919, "step": 4980 }, { "epoch": 0.4622737819025522, "grad_norm": 36.76453399658203, "learning_rate": 6.031451381815427e-06, "loss": 24.5395, "step": 4981 }, { "epoch": 0.4623665893271462, "grad_norm": 39.305084228515625, "learning_rate": 6.029949760559069e-06, "loss": 23.4374, "step": 4982 }, { "epoch": 0.46245939675174014, "grad_norm": 36.2550163269043, "learning_rate": 6.0284480422840594e-06, "loss": 22.512, "step": 4983 }, { "epoch": 0.4625522041763341, "grad_norm": 46.50851058959961, "learning_rate": 6.026946227131859e-06, "loss": 26.8198, "step": 4984 }, { "epoch": 0.4626450116009281, "grad_norm": 38.131324768066406, "learning_rate": 6.0254443152439335e-06, "loss": 24.9773, "step": 4985 }, { "epoch": 0.462737819025522, "grad_norm": 37.59187698364258, "learning_rate": 6.023942306761758e-06, "loss": 24.3601, "step": 4986 }, { "epoch": 0.462830626450116, "grad_norm": 35.85721969604492, "learning_rate": 6.022440201826819e-06, "loss": 23.4833, "step": 4987 }, { "epoch": 0.46292343387470997, "grad_norm": 40.83408737182617, "learning_rate": 6.020938000580613e-06, "loss": 22.7647, "step": 4988 }, { "epoch": 0.46301624129930397, "grad_norm": 105.72774505615234, "learning_rate": 6.019435703164639e-06, "loss": 24.6817, "step": 4989 }, { "epoch": 0.4631090487238979, "grad_norm": 38.025169372558594, "learning_rate": 6.017933309720414e-06, "loss": 23.4069, "step": 4990 }, { "epoch": 0.46320185614849185, "grad_norm": 38.71463394165039, "learning_rate": 6.016430820389457e-06, "loss": 24.985, "step": 4991 }, { "epoch": 0.46329466357308585, "grad_norm": 37.537166595458984, "learning_rate": 6.014928235313301e-06, "loss": 24.4118, "step": 4992 }, { "epoch": 0.4633874709976798, "grad_norm": 35.92206573486328, "learning_rate": 6.013425554633482e-06, "loss": 21.3141, "step": 4993 }, { "epoch": 0.4634802784222738, "grad_norm": 36.243438720703125, "learning_rate": 6.011922778491552e-06, "loss": 23.3985, "step": 4994 }, { "epoch": 0.46357308584686774, "grad_norm": 60.42271423339844, "learning_rate": 6.010419907029068e-06, "loss": 23.3383, "step": 4995 }, { "epoch": 0.46366589327146174, "grad_norm": 38.40427017211914, "learning_rate": 6.008916940387596e-06, "loss": 22.4514, "step": 4996 }, { "epoch": 0.4637587006960557, "grad_norm": 40.238365173339844, "learning_rate": 6.007413878708711e-06, "loss": 23.3063, "step": 4997 }, { "epoch": 0.4638515081206496, "grad_norm": 37.200077056884766, "learning_rate": 6.005910722134001e-06, "loss": 23.6325, "step": 4998 }, { "epoch": 0.4639443155452436, "grad_norm": 37.046142578125, "learning_rate": 6.004407470805054e-06, "loss": 21.3095, "step": 4999 }, { "epoch": 0.46403712296983757, "grad_norm": 36.07380676269531, "learning_rate": 6.002904124863477e-06, "loss": 22.901, "step": 5000 }, { "epoch": 0.46412993039443157, "grad_norm": 51.787376403808594, "learning_rate": 6.00140068445088e-06, "loss": 24.924, "step": 5001 }, { "epoch": 0.4642227378190255, "grad_norm": 41.160255432128906, "learning_rate": 5.999897149708882e-06, "loss": 23.1015, "step": 5002 }, { "epoch": 0.4643155452436195, "grad_norm": 35.44871139526367, "learning_rate": 5.998393520779115e-06, "loss": 24.8718, "step": 5003 }, { "epoch": 0.46440835266821345, "grad_norm": 40.46607971191406, "learning_rate": 5.996889797803214e-06, "loss": 20.9193, "step": 5004 }, { "epoch": 0.4645011600928074, "grad_norm": 36.39933776855469, "learning_rate": 5.995385980922827e-06, "loss": 23.417, "step": 5005 }, { "epoch": 0.4645939675174014, "grad_norm": 38.77473831176758, "learning_rate": 5.9938820702796105e-06, "loss": 25.2939, "step": 5006 }, { "epoch": 0.46468677494199534, "grad_norm": 34.88372039794922, "learning_rate": 5.992378066015227e-06, "loss": 21.942, "step": 5007 }, { "epoch": 0.46477958236658934, "grad_norm": 41.731929779052734, "learning_rate": 5.990873968271355e-06, "loss": 23.8863, "step": 5008 }, { "epoch": 0.4648723897911833, "grad_norm": 35.940181732177734, "learning_rate": 5.98936977718967e-06, "loss": 24.4836, "step": 5009 }, { "epoch": 0.4649651972157773, "grad_norm": 38.81822204589844, "learning_rate": 5.987865492911866e-06, "loss": 22.8218, "step": 5010 }, { "epoch": 0.4650580046403712, "grad_norm": 39.098106384277344, "learning_rate": 5.986361115579644e-06, "loss": 23.2665, "step": 5011 }, { "epoch": 0.4651508120649652, "grad_norm": 35.7507438659668, "learning_rate": 5.984856645334711e-06, "loss": 21.6024, "step": 5012 }, { "epoch": 0.46524361948955917, "grad_norm": 37.29914474487305, "learning_rate": 5.983352082318785e-06, "loss": 22.1168, "step": 5013 }, { "epoch": 0.4653364269141531, "grad_norm": 38.73807144165039, "learning_rate": 5.981847426673592e-06, "loss": 24.2415, "step": 5014 }, { "epoch": 0.4654292343387471, "grad_norm": 37.3509635925293, "learning_rate": 5.980342678540864e-06, "loss": 23.8425, "step": 5015 }, { "epoch": 0.46552204176334105, "grad_norm": 36.91520309448242, "learning_rate": 5.978837838062348e-06, "loss": 22.6429, "step": 5016 }, { "epoch": 0.46561484918793505, "grad_norm": 39.02547073364258, "learning_rate": 5.977332905379796e-06, "loss": 22.1479, "step": 5017 }, { "epoch": 0.465707656612529, "grad_norm": 39.990577697753906, "learning_rate": 5.9758278806349655e-06, "loss": 22.5778, "step": 5018 }, { "epoch": 0.465800464037123, "grad_norm": 43.198787689208984, "learning_rate": 5.974322763969631e-06, "loss": 21.987, "step": 5019 }, { "epoch": 0.46589327146171694, "grad_norm": 34.98672103881836, "learning_rate": 5.972817555525566e-06, "loss": 22.623, "step": 5020 }, { "epoch": 0.4659860788863109, "grad_norm": 39.16886901855469, "learning_rate": 5.971312255444559e-06, "loss": 24.8887, "step": 5021 }, { "epoch": 0.4660788863109049, "grad_norm": 40.87117385864258, "learning_rate": 5.969806863868407e-06, "loss": 22.5355, "step": 5022 }, { "epoch": 0.4661716937354988, "grad_norm": 41.96625518798828, "learning_rate": 5.968301380938914e-06, "loss": 23.7418, "step": 5023 }, { "epoch": 0.4662645011600928, "grad_norm": 41.83522033691406, "learning_rate": 5.9667958067978885e-06, "loss": 22.2647, "step": 5024 }, { "epoch": 0.46635730858468677, "grad_norm": 39.89690399169922, "learning_rate": 5.965290141587156e-06, "loss": 22.6938, "step": 5025 }, { "epoch": 0.46645011600928077, "grad_norm": 38.04779815673828, "learning_rate": 5.963784385448545e-06, "loss": 24.2516, "step": 5026 }, { "epoch": 0.4665429234338747, "grad_norm": 39.22600173950195, "learning_rate": 5.962278538523893e-06, "loss": 24.7697, "step": 5027 }, { "epoch": 0.46663573085846866, "grad_norm": 37.893280029296875, "learning_rate": 5.9607726009550494e-06, "loss": 23.6523, "step": 5028 }, { "epoch": 0.46672853828306266, "grad_norm": 40.50224304199219, "learning_rate": 5.959266572883867e-06, "loss": 22.8729, "step": 5029 }, { "epoch": 0.4668213457076566, "grad_norm": 38.07081985473633, "learning_rate": 5.957760454452211e-06, "loss": 22.7657, "step": 5030 }, { "epoch": 0.4669141531322506, "grad_norm": 35.92424011230469, "learning_rate": 5.956254245801955e-06, "loss": 25.115, "step": 5031 }, { "epoch": 0.46700696055684454, "grad_norm": 39.52470016479492, "learning_rate": 5.954747947074978e-06, "loss": 23.6656, "step": 5032 }, { "epoch": 0.46709976798143854, "grad_norm": 33.73527526855469, "learning_rate": 5.953241558413171e-06, "loss": 22.9221, "step": 5033 }, { "epoch": 0.4671925754060325, "grad_norm": 37.63105010986328, "learning_rate": 5.9517350799584305e-06, "loss": 23.7096, "step": 5034 }, { "epoch": 0.46728538283062643, "grad_norm": 38.8216438293457, "learning_rate": 5.950228511852665e-06, "loss": 20.4888, "step": 5035 }, { "epoch": 0.4673781902552204, "grad_norm": 39.10382080078125, "learning_rate": 5.948721854237788e-06, "loss": 25.2251, "step": 5036 }, { "epoch": 0.46747099767981437, "grad_norm": 43.0391960144043, "learning_rate": 5.947215107255721e-06, "loss": 23.0055, "step": 5037 }, { "epoch": 0.46756380510440837, "grad_norm": 35.82254409790039, "learning_rate": 5.9457082710484e-06, "loss": 23.4988, "step": 5038 }, { "epoch": 0.4676566125290023, "grad_norm": 37.91384506225586, "learning_rate": 5.944201345757763e-06, "loss": 22.7211, "step": 5039 }, { "epoch": 0.4677494199535963, "grad_norm": 40.451473236083984, "learning_rate": 5.942694331525758e-06, "loss": 23.5945, "step": 5040 }, { "epoch": 0.46784222737819026, "grad_norm": 46.0028190612793, "learning_rate": 5.941187228494342e-06, "loss": 23.9368, "step": 5041 }, { "epoch": 0.4679350348027842, "grad_norm": 38.91112518310547, "learning_rate": 5.939680036805482e-06, "loss": 24.2651, "step": 5042 }, { "epoch": 0.4680278422273782, "grad_norm": 35.2117805480957, "learning_rate": 5.938172756601149e-06, "loss": 23.6877, "step": 5043 }, { "epoch": 0.46812064965197214, "grad_norm": 36.46922302246094, "learning_rate": 5.936665388023326e-06, "loss": 24.3555, "step": 5044 }, { "epoch": 0.46821345707656614, "grad_norm": 43.613162994384766, "learning_rate": 5.935157931214005e-06, "loss": 24.7118, "step": 5045 }, { "epoch": 0.4683062645011601, "grad_norm": 39.59371566772461, "learning_rate": 5.9336503863151825e-06, "loss": 24.2408, "step": 5046 }, { "epoch": 0.4683990719257541, "grad_norm": 39.7075309753418, "learning_rate": 5.932142753468865e-06, "loss": 22.5084, "step": 5047 }, { "epoch": 0.46849187935034803, "grad_norm": 39.74888229370117, "learning_rate": 5.9306350328170705e-06, "loss": 23.4961, "step": 5048 }, { "epoch": 0.46858468677494197, "grad_norm": 34.85017776489258, "learning_rate": 5.929127224501818e-06, "loss": 22.8138, "step": 5049 }, { "epoch": 0.46867749419953597, "grad_norm": 37.55257034301758, "learning_rate": 5.927619328665145e-06, "loss": 22.197, "step": 5050 }, { "epoch": 0.4687703016241299, "grad_norm": 37.14033889770508, "learning_rate": 5.926111345449087e-06, "loss": 23.5343, "step": 5051 }, { "epoch": 0.4688631090487239, "grad_norm": 35.652015686035156, "learning_rate": 5.924603274995693e-06, "loss": 23.1411, "step": 5052 }, { "epoch": 0.46895591647331786, "grad_norm": 40.86742401123047, "learning_rate": 5.923095117447021e-06, "loss": 24.2965, "step": 5053 }, { "epoch": 0.46904872389791186, "grad_norm": 36.80080795288086, "learning_rate": 5.921586872945133e-06, "loss": 22.8362, "step": 5054 }, { "epoch": 0.4691415313225058, "grad_norm": 33.235416412353516, "learning_rate": 5.9200785416321045e-06, "loss": 22.4005, "step": 5055 }, { "epoch": 0.46923433874709974, "grad_norm": 59.803951263427734, "learning_rate": 5.918570123650016e-06, "loss": 21.8692, "step": 5056 }, { "epoch": 0.46932714617169374, "grad_norm": 39.105953216552734, "learning_rate": 5.917061619140955e-06, "loss": 23.436, "step": 5057 }, { "epoch": 0.4694199535962877, "grad_norm": 39.33698272705078, "learning_rate": 5.915553028247021e-06, "loss": 23.2916, "step": 5058 }, { "epoch": 0.4695127610208817, "grad_norm": 37.33076095581055, "learning_rate": 5.914044351110316e-06, "loss": 22.0359, "step": 5059 }, { "epoch": 0.46960556844547563, "grad_norm": 37.0849494934082, "learning_rate": 5.912535587872957e-06, "loss": 23.575, "step": 5060 }, { "epoch": 0.46969837587006963, "grad_norm": 37.8882942199707, "learning_rate": 5.911026738677066e-06, "loss": 24.8621, "step": 5061 }, { "epoch": 0.4697911832946636, "grad_norm": 37.48640441894531, "learning_rate": 5.909517803664769e-06, "loss": 23.789, "step": 5062 }, { "epoch": 0.4698839907192575, "grad_norm": 38.579002380371094, "learning_rate": 5.908008782978206e-06, "loss": 23.5796, "step": 5063 }, { "epoch": 0.4699767981438515, "grad_norm": 35.9329833984375, "learning_rate": 5.906499676759524e-06, "loss": 24.6531, "step": 5064 }, { "epoch": 0.47006960556844546, "grad_norm": 40.824954986572266, "learning_rate": 5.904990485150876e-06, "loss": 24.8737, "step": 5065 }, { "epoch": 0.47016241299303946, "grad_norm": 35.820960998535156, "learning_rate": 5.903481208294423e-06, "loss": 22.821, "step": 5066 }, { "epoch": 0.4702552204176334, "grad_norm": 41.09560775756836, "learning_rate": 5.901971846332336e-06, "loss": 23.8941, "step": 5067 }, { "epoch": 0.4703480278422274, "grad_norm": 36.43619918823242, "learning_rate": 5.9004623994067945e-06, "loss": 24.7003, "step": 5068 }, { "epoch": 0.47044083526682134, "grad_norm": 34.12532424926758, "learning_rate": 5.898952867659981e-06, "loss": 22.7838, "step": 5069 }, { "epoch": 0.4705336426914153, "grad_norm": 36.94231414794922, "learning_rate": 5.897443251234093e-06, "loss": 22.8249, "step": 5070 }, { "epoch": 0.4706264501160093, "grad_norm": 40.606468200683594, "learning_rate": 5.8959335502713314e-06, "loss": 23.3856, "step": 5071 }, { "epoch": 0.47071925754060323, "grad_norm": 39.26792907714844, "learning_rate": 5.8944237649139046e-06, "loss": 23.5759, "step": 5072 }, { "epoch": 0.47081206496519723, "grad_norm": 36.565834045410156, "learning_rate": 5.892913895304033e-06, "loss": 23.7417, "step": 5073 }, { "epoch": 0.4709048723897912, "grad_norm": 37.133602142333984, "learning_rate": 5.891403941583941e-06, "loss": 23.6567, "step": 5074 }, { "epoch": 0.4709976798143852, "grad_norm": 38.242645263671875, "learning_rate": 5.8898939038958624e-06, "loss": 22.6528, "step": 5075 }, { "epoch": 0.4710904872389791, "grad_norm": 36.05827331542969, "learning_rate": 5.88838378238204e-06, "loss": 22.5713, "step": 5076 }, { "epoch": 0.47118329466357306, "grad_norm": 43.696075439453125, "learning_rate": 5.886873577184723e-06, "loss": 22.8928, "step": 5077 }, { "epoch": 0.47127610208816706, "grad_norm": 39.04975509643555, "learning_rate": 5.885363288446167e-06, "loss": 22.9541, "step": 5078 }, { "epoch": 0.471368909512761, "grad_norm": 36.83099365234375, "learning_rate": 5.8838529163086414e-06, "loss": 21.9387, "step": 5079 }, { "epoch": 0.471461716937355, "grad_norm": 40.523441314697266, "learning_rate": 5.882342460914415e-06, "loss": 23.4499, "step": 5080 }, { "epoch": 0.47155452436194895, "grad_norm": 35.77307891845703, "learning_rate": 5.880831922405772e-06, "loss": 23.7047, "step": 5081 }, { "epoch": 0.47164733178654294, "grad_norm": 40.82684326171875, "learning_rate": 5.879321300924999e-06, "loss": 24.6083, "step": 5082 }, { "epoch": 0.4717401392111369, "grad_norm": 42.2487678527832, "learning_rate": 5.877810596614395e-06, "loss": 23.0136, "step": 5083 }, { "epoch": 0.47183294663573083, "grad_norm": 59.13059616088867, "learning_rate": 5.8762998096162635e-06, "loss": 23.5324, "step": 5084 }, { "epoch": 0.47192575406032483, "grad_norm": 35.56163787841797, "learning_rate": 5.874788940072915e-06, "loss": 24.1773, "step": 5085 }, { "epoch": 0.4720185614849188, "grad_norm": 35.805023193359375, "learning_rate": 5.873277988126673e-06, "loss": 24.1529, "step": 5086 }, { "epoch": 0.4721113689095128, "grad_norm": 38.14768981933594, "learning_rate": 5.871766953919863e-06, "loss": 22.9748, "step": 5087 }, { "epoch": 0.4722041763341067, "grad_norm": 33.6349983215332, "learning_rate": 5.8702558375948206e-06, "loss": 21.8923, "step": 5088 }, { "epoch": 0.4722969837587007, "grad_norm": 40.010494232177734, "learning_rate": 5.868744639293891e-06, "loss": 23.3425, "step": 5089 }, { "epoch": 0.47238979118329466, "grad_norm": 38.99460220336914, "learning_rate": 5.8672333591594225e-06, "loss": 23.1612, "step": 5090 }, { "epoch": 0.4724825986078886, "grad_norm": 39.99225997924805, "learning_rate": 5.8657219973337755e-06, "loss": 25.0547, "step": 5091 }, { "epoch": 0.4725754060324826, "grad_norm": 36.615318298339844, "learning_rate": 5.864210553959317e-06, "loss": 22.6673, "step": 5092 }, { "epoch": 0.47266821345707655, "grad_norm": 44.93971633911133, "learning_rate": 5.862699029178419e-06, "loss": 21.1138, "step": 5093 }, { "epoch": 0.47276102088167055, "grad_norm": 37.133750915527344, "learning_rate": 5.861187423133464e-06, "loss": 20.885, "step": 5094 }, { "epoch": 0.4728538283062645, "grad_norm": 36.351951599121094, "learning_rate": 5.859675735966845e-06, "loss": 22.262, "step": 5095 }, { "epoch": 0.4729466357308585, "grad_norm": 37.22173309326172, "learning_rate": 5.858163967820953e-06, "loss": 23.8047, "step": 5096 }, { "epoch": 0.47303944315545243, "grad_norm": 37.41801452636719, "learning_rate": 5.856652118838198e-06, "loss": 24.1051, "step": 5097 }, { "epoch": 0.4731322505800464, "grad_norm": 37.02093505859375, "learning_rate": 5.85514018916099e-06, "loss": 22.5016, "step": 5098 }, { "epoch": 0.4732250580046404, "grad_norm": 36.9518928527832, "learning_rate": 5.853628178931747e-06, "loss": 26.1761, "step": 5099 }, { "epoch": 0.4733178654292343, "grad_norm": 36.71382141113281, "learning_rate": 5.852116088292901e-06, "loss": 23.721, "step": 5100 }, { "epoch": 0.4734106728538283, "grad_norm": 38.151588439941406, "learning_rate": 5.850603917386883e-06, "loss": 24.1795, "step": 5101 }, { "epoch": 0.47350348027842226, "grad_norm": 37.32167434692383, "learning_rate": 5.849091666356139e-06, "loss": 23.3063, "step": 5102 }, { "epoch": 0.47359628770301626, "grad_norm": 37.115421295166016, "learning_rate": 5.847579335343118e-06, "loss": 22.1283, "step": 5103 }, { "epoch": 0.4736890951276102, "grad_norm": 43.73714065551758, "learning_rate": 5.846066924490276e-06, "loss": 23.6093, "step": 5104 }, { "epoch": 0.4737819025522042, "grad_norm": 37.16460037231445, "learning_rate": 5.84455443394008e-06, "loss": 23.7447, "step": 5105 }, { "epoch": 0.47387470997679815, "grad_norm": 37.190711975097656, "learning_rate": 5.843041863835003e-06, "loss": 24.7798, "step": 5106 }, { "epoch": 0.4739675174013921, "grad_norm": 37.963809967041016, "learning_rate": 5.841529214317523e-06, "loss": 24.0407, "step": 5107 }, { "epoch": 0.4740603248259861, "grad_norm": 38.526329040527344, "learning_rate": 5.840016485530131e-06, "loss": 24.2107, "step": 5108 }, { "epoch": 0.47415313225058003, "grad_norm": 38.06797790527344, "learning_rate": 5.83850367761532e-06, "loss": 23.2625, "step": 5109 }, { "epoch": 0.47424593967517403, "grad_norm": 38.748817443847656, "learning_rate": 5.836990790715592e-06, "loss": 23.3951, "step": 5110 }, { "epoch": 0.474338747099768, "grad_norm": 35.63248062133789, "learning_rate": 5.83547782497346e-06, "loss": 23.6511, "step": 5111 }, { "epoch": 0.474431554524362, "grad_norm": 35.2995491027832, "learning_rate": 5.8339647805314404e-06, "loss": 23.3179, "step": 5112 }, { "epoch": 0.4745243619489559, "grad_norm": 35.50492477416992, "learning_rate": 5.832451657532055e-06, "loss": 23.4925, "step": 5113 }, { "epoch": 0.47461716937354986, "grad_norm": 36.24538040161133, "learning_rate": 5.830938456117842e-06, "loss": 24.1166, "step": 5114 }, { "epoch": 0.47470997679814386, "grad_norm": 39.09963607788086, "learning_rate": 5.829425176431336e-06, "loss": 24.925, "step": 5115 }, { "epoch": 0.4748027842227378, "grad_norm": 37.33335494995117, "learning_rate": 5.8279118186150865e-06, "loss": 23.3378, "step": 5116 }, { "epoch": 0.4748955916473318, "grad_norm": 37.09817886352539, "learning_rate": 5.8263983828116465e-06, "loss": 22.2528, "step": 5117 }, { "epoch": 0.47498839907192575, "grad_norm": 38.01935577392578, "learning_rate": 5.824884869163581e-06, "loss": 23.3207, "step": 5118 }, { "epoch": 0.47508120649651975, "grad_norm": 39.772640228271484, "learning_rate": 5.823371277813455e-06, "loss": 24.4484, "step": 5119 }, { "epoch": 0.4751740139211137, "grad_norm": 36.00062561035156, "learning_rate": 5.821857608903846e-06, "loss": 24.1124, "step": 5120 }, { "epoch": 0.47526682134570764, "grad_norm": 41.672203063964844, "learning_rate": 5.82034386257734e-06, "loss": 23.4105, "step": 5121 }, { "epoch": 0.47535962877030163, "grad_norm": 35.73136520385742, "learning_rate": 5.818830038976527e-06, "loss": 22.6876, "step": 5122 }, { "epoch": 0.4754524361948956, "grad_norm": 72.6313247680664, "learning_rate": 5.817316138244005e-06, "loss": 24.064, "step": 5123 }, { "epoch": 0.4755452436194896, "grad_norm": 43.97162628173828, "learning_rate": 5.815802160522379e-06, "loss": 24.5098, "step": 5124 }, { "epoch": 0.4756380510440835, "grad_norm": 37.56058120727539, "learning_rate": 5.814288105954262e-06, "loss": 24.4023, "step": 5125 }, { "epoch": 0.4757308584686775, "grad_norm": 42.60502624511719, "learning_rate": 5.812773974682275e-06, "loss": 24.2141, "step": 5126 }, { "epoch": 0.47582366589327146, "grad_norm": 49.12168502807617, "learning_rate": 5.811259766849045e-06, "loss": 24.8765, "step": 5127 }, { "epoch": 0.4759164733178654, "grad_norm": 38.53932571411133, "learning_rate": 5.809745482597207e-06, "loss": 24.6538, "step": 5128 }, { "epoch": 0.4760092807424594, "grad_norm": 35.97938537597656, "learning_rate": 5.808231122069401e-06, "loss": 23.1588, "step": 5129 }, { "epoch": 0.47610208816705335, "grad_norm": 43.04719924926758, "learning_rate": 5.806716685408278e-06, "loss": 24.3815, "step": 5130 }, { "epoch": 0.47619489559164735, "grad_norm": 39.62317657470703, "learning_rate": 5.805202172756493e-06, "loss": 23.8146, "step": 5131 }, { "epoch": 0.4762877030162413, "grad_norm": 37.090694427490234, "learning_rate": 5.8036875842567085e-06, "loss": 22.7621, "step": 5132 }, { "epoch": 0.4763805104408353, "grad_norm": 36.969573974609375, "learning_rate": 5.802172920051596e-06, "loss": 22.5796, "step": 5133 }, { "epoch": 0.47647331786542924, "grad_norm": 36.73350143432617, "learning_rate": 5.8006581802838345e-06, "loss": 23.9045, "step": 5134 }, { "epoch": 0.4765661252900232, "grad_norm": 32.547054290771484, "learning_rate": 5.799143365096106e-06, "loss": 20.7534, "step": 5135 }, { "epoch": 0.4766589327146172, "grad_norm": 36.906707763671875, "learning_rate": 5.797628474631102e-06, "loss": 23.9409, "step": 5136 }, { "epoch": 0.4767517401392111, "grad_norm": 37.8271369934082, "learning_rate": 5.796113509031523e-06, "loss": 23.389, "step": 5137 }, { "epoch": 0.4768445475638051, "grad_norm": 36.85551834106445, "learning_rate": 5.794598468440075e-06, "loss": 22.7394, "step": 5138 }, { "epoch": 0.47693735498839906, "grad_norm": 36.44023513793945, "learning_rate": 5.79308335299947e-06, "loss": 20.2933, "step": 5139 }, { "epoch": 0.47703016241299306, "grad_norm": 38.92821502685547, "learning_rate": 5.791568162852429e-06, "loss": 22.6026, "step": 5140 }, { "epoch": 0.477122969837587, "grad_norm": 39.69839096069336, "learning_rate": 5.790052898141677e-06, "loss": 23.436, "step": 5141 }, { "epoch": 0.47721577726218095, "grad_norm": 35.474693298339844, "learning_rate": 5.788537559009951e-06, "loss": 23.2771, "step": 5142 }, { "epoch": 0.47730858468677495, "grad_norm": 35.56608200073242, "learning_rate": 5.78702214559999e-06, "loss": 22.7443, "step": 5143 }, { "epoch": 0.4774013921113689, "grad_norm": 37.01808166503906, "learning_rate": 5.785506658054542e-06, "loss": 22.1607, "step": 5144 }, { "epoch": 0.4774941995359629, "grad_norm": 39.46507263183594, "learning_rate": 5.783991096516365e-06, "loss": 23.767, "step": 5145 }, { "epoch": 0.47758700696055684, "grad_norm": 36.647315979003906, "learning_rate": 5.782475461128216e-06, "loss": 22.3594, "step": 5146 }, { "epoch": 0.47767981438515084, "grad_norm": 40.08793258666992, "learning_rate": 5.78095975203287e-06, "loss": 23.2444, "step": 5147 }, { "epoch": 0.4777726218097448, "grad_norm": 309.5570983886719, "learning_rate": 5.7794439693730975e-06, "loss": 24.3754, "step": 5148 }, { "epoch": 0.4778654292343387, "grad_norm": 44.43109130859375, "learning_rate": 5.777928113291684e-06, "loss": 24.407, "step": 5149 }, { "epoch": 0.4779582366589327, "grad_norm": 40.5969352722168, "learning_rate": 5.77641218393142e-06, "loss": 22.8163, "step": 5150 }, { "epoch": 0.47805104408352667, "grad_norm": 41.27810287475586, "learning_rate": 5.7748961814351005e-06, "loss": 23.4655, "step": 5151 }, { "epoch": 0.47814385150812067, "grad_norm": 37.23664474487305, "learning_rate": 5.77338010594553e-06, "loss": 25.0697, "step": 5152 }, { "epoch": 0.4782366589327146, "grad_norm": 38.7730598449707, "learning_rate": 5.7718639576055195e-06, "loss": 24.7646, "step": 5153 }, { "epoch": 0.4783294663573086, "grad_norm": 38.295902252197266, "learning_rate": 5.770347736557884e-06, "loss": 23.9102, "step": 5154 }, { "epoch": 0.47842227378190255, "grad_norm": 37.67552947998047, "learning_rate": 5.76883144294545e-06, "loss": 23.0998, "step": 5155 }, { "epoch": 0.4785150812064965, "grad_norm": 36.099952697753906, "learning_rate": 5.767315076911047e-06, "loss": 23.9176, "step": 5156 }, { "epoch": 0.4786078886310905, "grad_norm": 36.04854965209961, "learning_rate": 5.765798638597515e-06, "loss": 22.9732, "step": 5157 }, { "epoch": 0.47870069605568444, "grad_norm": 38.80345153808594, "learning_rate": 5.764282128147696e-06, "loss": 26.1162, "step": 5158 }, { "epoch": 0.47879350348027844, "grad_norm": 40.247474670410156, "learning_rate": 5.7627655457044425e-06, "loss": 22.7298, "step": 5159 }, { "epoch": 0.4788863109048724, "grad_norm": 42.60142517089844, "learning_rate": 5.761248891410613e-06, "loss": 23.8684, "step": 5160 }, { "epoch": 0.4789791183294664, "grad_norm": 37.20922088623047, "learning_rate": 5.759732165409072e-06, "loss": 23.2031, "step": 5161 }, { "epoch": 0.4790719257540603, "grad_norm": 39.74894714355469, "learning_rate": 5.7582153678426925e-06, "loss": 22.3237, "step": 5162 }, { "epoch": 0.47916473317865427, "grad_norm": 38.3429069519043, "learning_rate": 5.756698498854352e-06, "loss": 22.6591, "step": 5163 }, { "epoch": 0.47925754060324827, "grad_norm": 38.962852478027344, "learning_rate": 5.755181558586934e-06, "loss": 23.8318, "step": 5164 }, { "epoch": 0.4793503480278422, "grad_norm": 39.57332992553711, "learning_rate": 5.753664547183334e-06, "loss": 24.6079, "step": 5165 }, { "epoch": 0.4794431554524362, "grad_norm": 35.508819580078125, "learning_rate": 5.752147464786449e-06, "loss": 23.4136, "step": 5166 }, { "epoch": 0.47953596287703015, "grad_norm": 42.41783905029297, "learning_rate": 5.750630311539181e-06, "loss": 23.875, "step": 5167 }, { "epoch": 0.47962877030162415, "grad_norm": 54.073150634765625, "learning_rate": 5.749113087584448e-06, "loss": 22.6856, "step": 5168 }, { "epoch": 0.4797215777262181, "grad_norm": 38.50831985473633, "learning_rate": 5.747595793065165e-06, "loss": 23.4012, "step": 5169 }, { "epoch": 0.47981438515081204, "grad_norm": 34.18745422363281, "learning_rate": 5.746078428124259e-06, "loss": 24.014, "step": 5170 }, { "epoch": 0.47990719257540604, "grad_norm": 37.695068359375, "learning_rate": 5.744560992904658e-06, "loss": 23.8976, "step": 5171 }, { "epoch": 0.48, "grad_norm": 40.12095260620117, "learning_rate": 5.743043487549306e-06, "loss": 23.9442, "step": 5172 }, { "epoch": 0.480092807424594, "grad_norm": 39.270870208740234, "learning_rate": 5.741525912201145e-06, "loss": 22.2593, "step": 5173 }, { "epoch": 0.4801856148491879, "grad_norm": 38.52102279663086, "learning_rate": 5.740008267003126e-06, "loss": 22.8256, "step": 5174 }, { "epoch": 0.4802784222737819, "grad_norm": 35.31830596923828, "learning_rate": 5.738490552098211e-06, "loss": 20.4447, "step": 5175 }, { "epoch": 0.48037122969837587, "grad_norm": 34.957618713378906, "learning_rate": 5.736972767629361e-06, "loss": 21.9956, "step": 5176 }, { "epoch": 0.4804640371229698, "grad_norm": 38.119361877441406, "learning_rate": 5.7354549137395485e-06, "loss": 21.7736, "step": 5177 }, { "epoch": 0.4805568445475638, "grad_norm": 37.34998321533203, "learning_rate": 5.733936990571752e-06, "loss": 20.4125, "step": 5178 }, { "epoch": 0.48064965197215775, "grad_norm": 40.48111343383789, "learning_rate": 5.732418998268958e-06, "loss": 22.5744, "step": 5179 }, { "epoch": 0.48074245939675175, "grad_norm": 36.335105895996094, "learning_rate": 5.730900936974153e-06, "loss": 22.0707, "step": 5180 }, { "epoch": 0.4808352668213457, "grad_norm": 37.16938781738281, "learning_rate": 5.729382806830338e-06, "loss": 22.2185, "step": 5181 }, { "epoch": 0.4809280742459397, "grad_norm": 38.16156005859375, "learning_rate": 5.727864607980517e-06, "loss": 24.5559, "step": 5182 }, { "epoch": 0.48102088167053364, "grad_norm": 36.86882400512695, "learning_rate": 5.726346340567698e-06, "loss": 20.6979, "step": 5183 }, { "epoch": 0.4811136890951276, "grad_norm": 39.077327728271484, "learning_rate": 5.7248280047348995e-06, "loss": 24.1634, "step": 5184 }, { "epoch": 0.4812064965197216, "grad_norm": 35.047142028808594, "learning_rate": 5.723309600625144e-06, "loss": 22.7874, "step": 5185 }, { "epoch": 0.4812993039443155, "grad_norm": 40.381317138671875, "learning_rate": 5.721791128381464e-06, "loss": 23.7798, "step": 5186 }, { "epoch": 0.4813921113689095, "grad_norm": 41.51405334472656, "learning_rate": 5.7202725881468935e-06, "loss": 21.019, "step": 5187 }, { "epoch": 0.48148491879350347, "grad_norm": 42.34682083129883, "learning_rate": 5.718753980064474e-06, "loss": 23.1836, "step": 5188 }, { "epoch": 0.48157772621809747, "grad_norm": 40.972145080566406, "learning_rate": 5.717235304277258e-06, "loss": 23.7512, "step": 5189 }, { "epoch": 0.4816705336426914, "grad_norm": 35.16571044921875, "learning_rate": 5.715716560928297e-06, "loss": 23.3657, "step": 5190 }, { "epoch": 0.48176334106728536, "grad_norm": 43.9129524230957, "learning_rate": 5.714197750160655e-06, "loss": 24.8776, "step": 5191 }, { "epoch": 0.48185614849187935, "grad_norm": 39.0804443359375, "learning_rate": 5.712678872117401e-06, "loss": 24.458, "step": 5192 }, { "epoch": 0.4819489559164733, "grad_norm": 35.91414260864258, "learning_rate": 5.711159926941606e-06, "loss": 22.782, "step": 5193 }, { "epoch": 0.4820417633410673, "grad_norm": 40.61508560180664, "learning_rate": 5.709640914776355e-06, "loss": 23.977, "step": 5194 }, { "epoch": 0.48213457076566124, "grad_norm": 37.9379768371582, "learning_rate": 5.708121835764733e-06, "loss": 23.6379, "step": 5195 }, { "epoch": 0.48222737819025524, "grad_norm": 35.88343048095703, "learning_rate": 5.706602690049832e-06, "loss": 24.0931, "step": 5196 }, { "epoch": 0.4823201856148492, "grad_norm": 39.32372283935547, "learning_rate": 5.705083477774755e-06, "loss": 24.6385, "step": 5197 }, { "epoch": 0.4824129930394432, "grad_norm": 40.546607971191406, "learning_rate": 5.703564199082605e-06, "loss": 22.6655, "step": 5198 }, { "epoch": 0.4825058004640371, "grad_norm": 41.27825164794922, "learning_rate": 5.702044854116494e-06, "loss": 23.568, "step": 5199 }, { "epoch": 0.48259860788863107, "grad_norm": 37.49526596069336, "learning_rate": 5.7005254430195436e-06, "loss": 20.7327, "step": 5200 }, { "epoch": 0.48269141531322507, "grad_norm": 39.80467224121094, "learning_rate": 5.6990059659348765e-06, "loss": 24.0695, "step": 5201 }, { "epoch": 0.482784222737819, "grad_norm": 41.365596771240234, "learning_rate": 5.697486423005621e-06, "loss": 23.7594, "step": 5202 }, { "epoch": 0.482877030162413, "grad_norm": 36.2502326965332, "learning_rate": 5.69596681437492e-06, "loss": 23.2803, "step": 5203 }, { "epoch": 0.48296983758700696, "grad_norm": 39.28809356689453, "learning_rate": 5.694447140185912e-06, "loss": 23.4803, "step": 5204 }, { "epoch": 0.48306264501160096, "grad_norm": 42.62895202636719, "learning_rate": 5.6929274005817495e-06, "loss": 22.2759, "step": 5205 }, { "epoch": 0.4831554524361949, "grad_norm": 37.63391876220703, "learning_rate": 5.691407595705585e-06, "loss": 23.5377, "step": 5206 }, { "epoch": 0.48324825986078884, "grad_norm": 39.993492126464844, "learning_rate": 5.689887725700582e-06, "loss": 27.0142, "step": 5207 }, { "epoch": 0.48334106728538284, "grad_norm": 42.01078796386719, "learning_rate": 5.688367790709909e-06, "loss": 24.4178, "step": 5208 }, { "epoch": 0.4834338747099768, "grad_norm": 45.298194885253906, "learning_rate": 5.686847790876739e-06, "loss": 21.4813, "step": 5209 }, { "epoch": 0.4835266821345708, "grad_norm": 38.392234802246094, "learning_rate": 5.6853277263442516e-06, "loss": 25.2997, "step": 5210 }, { "epoch": 0.48361948955916473, "grad_norm": 38.327693939208984, "learning_rate": 5.6838075972556354e-06, "loss": 24.3886, "step": 5211 }, { "epoch": 0.4837122969837587, "grad_norm": 40.65587615966797, "learning_rate": 5.682287403754079e-06, "loss": 22.6914, "step": 5212 }, { "epoch": 0.48380510440835267, "grad_norm": 36.81350326538086, "learning_rate": 5.680767145982785e-06, "loss": 23.311, "step": 5213 }, { "epoch": 0.4838979118329466, "grad_norm": 38.875457763671875, "learning_rate": 5.679246824084955e-06, "loss": 25.1104, "step": 5214 }, { "epoch": 0.4839907192575406, "grad_norm": 37.450138092041016, "learning_rate": 5.677726438203799e-06, "loss": 24.403, "step": 5215 }, { "epoch": 0.48408352668213456, "grad_norm": 37.14384078979492, "learning_rate": 5.676205988482536e-06, "loss": 23.8815, "step": 5216 }, { "epoch": 0.48417633410672856, "grad_norm": 36.08530044555664, "learning_rate": 5.674685475064387e-06, "loss": 22.727, "step": 5217 }, { "epoch": 0.4842691415313225, "grad_norm": 43.799983978271484, "learning_rate": 5.67316489809258e-06, "loss": 23.4887, "step": 5218 }, { "epoch": 0.4843619489559165, "grad_norm": 36.38961410522461, "learning_rate": 5.6716442577103514e-06, "loss": 23.3321, "step": 5219 }, { "epoch": 0.48445475638051044, "grad_norm": 38.89949035644531, "learning_rate": 5.6701235540609405e-06, "loss": 24.4082, "step": 5220 }, { "epoch": 0.4845475638051044, "grad_norm": 38.048553466796875, "learning_rate": 5.668602787287592e-06, "loss": 23.3867, "step": 5221 }, { "epoch": 0.4846403712296984, "grad_norm": 37.611114501953125, "learning_rate": 5.6670819575335605e-06, "loss": 23.5019, "step": 5222 }, { "epoch": 0.48473317865429233, "grad_norm": 35.34041213989258, "learning_rate": 5.665561064942104e-06, "loss": 23.0094, "step": 5223 }, { "epoch": 0.48482598607888633, "grad_norm": 36.9976921081543, "learning_rate": 5.664040109656485e-06, "loss": 23.4213, "step": 5224 }, { "epoch": 0.48491879350348027, "grad_norm": 36.024375915527344, "learning_rate": 5.6625190918199765e-06, "loss": 22.4946, "step": 5225 }, { "epoch": 0.48501160092807427, "grad_norm": 40.940956115722656, "learning_rate": 5.660998011575853e-06, "loss": 22.5221, "step": 5226 }, { "epoch": 0.4851044083526682, "grad_norm": 38.303009033203125, "learning_rate": 5.659476869067397e-06, "loss": 22.7892, "step": 5227 }, { "epoch": 0.48519721577726216, "grad_norm": 37.315494537353516, "learning_rate": 5.657955664437894e-06, "loss": 26.0176, "step": 5228 }, { "epoch": 0.48529002320185616, "grad_norm": 39.76142501831055, "learning_rate": 5.65643439783064e-06, "loss": 22.7483, "step": 5229 }, { "epoch": 0.4853828306264501, "grad_norm": 41.44932556152344, "learning_rate": 5.654913069388934e-06, "loss": 25.2022, "step": 5230 }, { "epoch": 0.4854756380510441, "grad_norm": 41.629146575927734, "learning_rate": 5.653391679256081e-06, "loss": 26.3812, "step": 5231 }, { "epoch": 0.48556844547563804, "grad_norm": 35.52128982543945, "learning_rate": 5.651870227575391e-06, "loss": 21.9817, "step": 5232 }, { "epoch": 0.48566125290023204, "grad_norm": 37.306907653808594, "learning_rate": 5.650348714490183e-06, "loss": 20.9894, "step": 5233 }, { "epoch": 0.485754060324826, "grad_norm": 40.324337005615234, "learning_rate": 5.64882714014378e-06, "loss": 23.6121, "step": 5234 }, { "epoch": 0.48584686774941993, "grad_norm": 39.96961975097656, "learning_rate": 5.6473055046795065e-06, "loss": 24.4836, "step": 5235 }, { "epoch": 0.48593967517401393, "grad_norm": 39.40480422973633, "learning_rate": 5.645783808240701e-06, "loss": 24.5378, "step": 5236 }, { "epoch": 0.4860324825986079, "grad_norm": 37.46632385253906, "learning_rate": 5.644262050970701e-06, "loss": 23.1646, "step": 5237 }, { "epoch": 0.4861252900232019, "grad_norm": 36.81684112548828, "learning_rate": 5.642740233012854e-06, "loss": 24.0019, "step": 5238 }, { "epoch": 0.4862180974477958, "grad_norm": 38.63439178466797, "learning_rate": 5.641218354510509e-06, "loss": 22.7729, "step": 5239 }, { "epoch": 0.4863109048723898, "grad_norm": 35.968074798583984, "learning_rate": 5.639696415607024e-06, "loss": 24.364, "step": 5240 }, { "epoch": 0.48640371229698376, "grad_norm": 42.40781021118164, "learning_rate": 5.638174416445764e-06, "loss": 24.4745, "step": 5241 }, { "epoch": 0.4864965197215777, "grad_norm": 35.807682037353516, "learning_rate": 5.636652357170095e-06, "loss": 22.4531, "step": 5242 }, { "epoch": 0.4865893271461717, "grad_norm": 39.057334899902344, "learning_rate": 5.635130237923392e-06, "loss": 23.5773, "step": 5243 }, { "epoch": 0.48668213457076565, "grad_norm": 38.051002502441406, "learning_rate": 5.633608058849033e-06, "loss": 23.4884, "step": 5244 }, { "epoch": 0.48677494199535964, "grad_norm": 35.14885711669922, "learning_rate": 5.632085820090406e-06, "loss": 23.6055, "step": 5245 }, { "epoch": 0.4868677494199536, "grad_norm": 35.081825256347656, "learning_rate": 5.6305635217909e-06, "loss": 21.5514, "step": 5246 }, { "epoch": 0.4869605568445476, "grad_norm": 36.185546875, "learning_rate": 5.6290411640939136e-06, "loss": 22.962, "step": 5247 }, { "epoch": 0.48705336426914153, "grad_norm": 37.87520980834961, "learning_rate": 5.627518747142847e-06, "loss": 22.6725, "step": 5248 }, { "epoch": 0.4871461716937355, "grad_norm": 38.595420837402344, "learning_rate": 5.62599627108111e-06, "loss": 22.7665, "step": 5249 }, { "epoch": 0.4872389791183295, "grad_norm": 34.75749206542969, "learning_rate": 5.624473736052114e-06, "loss": 23.838, "step": 5250 }, { "epoch": 0.4873317865429234, "grad_norm": 38.07061767578125, "learning_rate": 5.62295114219928e-06, "loss": 23.0707, "step": 5251 }, { "epoch": 0.4874245939675174, "grad_norm": 42.14800262451172, "learning_rate": 5.621428489666031e-06, "loss": 26.0045, "step": 5252 }, { "epoch": 0.48751740139211136, "grad_norm": 37.683860778808594, "learning_rate": 5.619905778595797e-06, "loss": 25.3168, "step": 5253 }, { "epoch": 0.48761020881670536, "grad_norm": 35.51264953613281, "learning_rate": 5.6183830091320134e-06, "loss": 22.1547, "step": 5254 }, { "epoch": 0.4877030162412993, "grad_norm": 38.1855354309082, "learning_rate": 5.616860181418122e-06, "loss": 23.5627, "step": 5255 }, { "epoch": 0.48779582366589325, "grad_norm": 37.73514938354492, "learning_rate": 5.61533729559757e-06, "loss": 23.1058, "step": 5256 }, { "epoch": 0.48788863109048725, "grad_norm": 38.70551681518555, "learning_rate": 5.613814351813807e-06, "loss": 24.7446, "step": 5257 }, { "epoch": 0.4879814385150812, "grad_norm": 37.84577560424805, "learning_rate": 5.612291350210293e-06, "loss": 23.2515, "step": 5258 }, { "epoch": 0.4880742459396752, "grad_norm": 38.324337005615234, "learning_rate": 5.610768290930488e-06, "loss": 25.3078, "step": 5259 }, { "epoch": 0.48816705336426913, "grad_norm": 33.33015441894531, "learning_rate": 5.609245174117863e-06, "loss": 23.4548, "step": 5260 }, { "epoch": 0.48825986078886313, "grad_norm": 37.801788330078125, "learning_rate": 5.60772199991589e-06, "loss": 23.6242, "step": 5261 }, { "epoch": 0.4883526682134571, "grad_norm": 36.257137298583984, "learning_rate": 5.6061987684680505e-06, "loss": 21.9518, "step": 5262 }, { "epoch": 0.488445475638051, "grad_norm": 39.784934997558594, "learning_rate": 5.604675479917826e-06, "loss": 21.8164, "step": 5263 }, { "epoch": 0.488538283062645, "grad_norm": 38.60248947143555, "learning_rate": 5.603152134408708e-06, "loss": 23.1588, "step": 5264 }, { "epoch": 0.48863109048723896, "grad_norm": 39.7390022277832, "learning_rate": 5.601628732084192e-06, "loss": 23.595, "step": 5265 }, { "epoch": 0.48872389791183296, "grad_norm": 36.64112854003906, "learning_rate": 5.600105273087778e-06, "loss": 24.3145, "step": 5266 }, { "epoch": 0.4888167053364269, "grad_norm": 38.47116470336914, "learning_rate": 5.598581757562972e-06, "loss": 24.2735, "step": 5267 }, { "epoch": 0.4889095127610209, "grad_norm": 38.48031997680664, "learning_rate": 5.5970581856532864e-06, "loss": 21.9053, "step": 5268 }, { "epoch": 0.48900232018561485, "grad_norm": 38.77784729003906, "learning_rate": 5.595534557502235e-06, "loss": 24.027, "step": 5269 }, { "epoch": 0.4890951276102088, "grad_norm": 34.5377082824707, "learning_rate": 5.594010873253343e-06, "loss": 21.5664, "step": 5270 }, { "epoch": 0.4891879350348028, "grad_norm": 36.635189056396484, "learning_rate": 5.592487133050136e-06, "loss": 24.4417, "step": 5271 }, { "epoch": 0.48928074245939673, "grad_norm": 44.936973571777344, "learning_rate": 5.590963337036146e-06, "loss": 22.4441, "step": 5272 }, { "epoch": 0.48937354988399073, "grad_norm": 41.560543060302734, "learning_rate": 5.589439485354911e-06, "loss": 23.8645, "step": 5273 }, { "epoch": 0.4894663573085847, "grad_norm": 39.39334487915039, "learning_rate": 5.587915578149976e-06, "loss": 23.3643, "step": 5274 }, { "epoch": 0.4895591647331787, "grad_norm": 39.26625061035156, "learning_rate": 5.586391615564886e-06, "loss": 23.4668, "step": 5275 }, { "epoch": 0.4896519721577726, "grad_norm": 37.22351837158203, "learning_rate": 5.584867597743197e-06, "loss": 23.889, "step": 5276 }, { "epoch": 0.48974477958236656, "grad_norm": 36.982025146484375, "learning_rate": 5.5833435248284654e-06, "loss": 23.0095, "step": 5277 }, { "epoch": 0.48983758700696056, "grad_norm": 40.080169677734375, "learning_rate": 5.5818193969642565e-06, "loss": 23.9821, "step": 5278 }, { "epoch": 0.4899303944315545, "grad_norm": 44.28982162475586, "learning_rate": 5.580295214294138e-06, "loss": 22.6744, "step": 5279 }, { "epoch": 0.4900232018561485, "grad_norm": 40.051422119140625, "learning_rate": 5.578770976961685e-06, "loss": 23.0156, "step": 5280 }, { "epoch": 0.49011600928074245, "grad_norm": 34.412818908691406, "learning_rate": 5.577246685110478e-06, "loss": 24.236, "step": 5281 }, { "epoch": 0.49020881670533645, "grad_norm": 38.3242301940918, "learning_rate": 5.5757223388840984e-06, "loss": 25.1728, "step": 5282 }, { "epoch": 0.4903016241299304, "grad_norm": 41.268150329589844, "learning_rate": 5.574197938426139e-06, "loss": 22.983, "step": 5283 }, { "epoch": 0.49039443155452433, "grad_norm": 37.50099563598633, "learning_rate": 5.572673483880192e-06, "loss": 23.5948, "step": 5284 }, { "epoch": 0.49048723897911833, "grad_norm": 41.52535629272461, "learning_rate": 5.571148975389858e-06, "loss": 23.7583, "step": 5285 }, { "epoch": 0.4905800464037123, "grad_norm": 36.42777633666992, "learning_rate": 5.569624413098742e-06, "loss": 23.2215, "step": 5286 }, { "epoch": 0.4906728538283063, "grad_norm": 32.222476959228516, "learning_rate": 5.568099797150454e-06, "loss": 22.0957, "step": 5287 }, { "epoch": 0.4907656612529002, "grad_norm": 36.50577926635742, "learning_rate": 5.5665751276886084e-06, "loss": 24.2154, "step": 5288 }, { "epoch": 0.4908584686774942, "grad_norm": 39.2513427734375, "learning_rate": 5.565050404856826e-06, "loss": 24.3319, "step": 5289 }, { "epoch": 0.49095127610208816, "grad_norm": 40.407928466796875, "learning_rate": 5.5635256287987304e-06, "loss": 23.4543, "step": 5290 }, { "epoch": 0.4910440835266821, "grad_norm": 43.06346130371094, "learning_rate": 5.562000799657953e-06, "loss": 26.3048, "step": 5291 }, { "epoch": 0.4911368909512761, "grad_norm": 36.14029312133789, "learning_rate": 5.560475917578129e-06, "loss": 22.7713, "step": 5292 }, { "epoch": 0.49122969837587005, "grad_norm": 36.9310417175293, "learning_rate": 5.558950982702897e-06, "loss": 21.7778, "step": 5293 }, { "epoch": 0.49132250580046405, "grad_norm": 39.6461181640625, "learning_rate": 5.557425995175902e-06, "loss": 24.1112, "step": 5294 }, { "epoch": 0.491415313225058, "grad_norm": 36.03590393066406, "learning_rate": 5.555900955140795e-06, "loss": 21.9837, "step": 5295 }, { "epoch": 0.491508120649652, "grad_norm": 38.2883415222168, "learning_rate": 5.554375862741231e-06, "loss": 24.0052, "step": 5296 }, { "epoch": 0.49160092807424594, "grad_norm": 38.67517852783203, "learning_rate": 5.55285071812087e-06, "loss": 24.6637, "step": 5297 }, { "epoch": 0.49169373549883993, "grad_norm": 39.71304702758789, "learning_rate": 5.551325521423375e-06, "loss": 22.5368, "step": 5298 }, { "epoch": 0.4917865429234339, "grad_norm": 41.68069076538086, "learning_rate": 5.549800272792418e-06, "loss": 23.4704, "step": 5299 }, { "epoch": 0.4918793503480278, "grad_norm": 39.231842041015625, "learning_rate": 5.548274972371672e-06, "loss": 23.6289, "step": 5300 }, { "epoch": 0.4919721577726218, "grad_norm": 41.194580078125, "learning_rate": 5.5467496203048164e-06, "loss": 25.5902, "step": 5301 }, { "epoch": 0.49206496519721576, "grad_norm": 34.4095458984375, "learning_rate": 5.545224216735537e-06, "loss": 24.3665, "step": 5302 }, { "epoch": 0.49215777262180976, "grad_norm": 36.10327911376953, "learning_rate": 5.543698761807521e-06, "loss": 21.0562, "step": 5303 }, { "epoch": 0.4922505800464037, "grad_norm": 37.79465866088867, "learning_rate": 5.542173255664463e-06, "loss": 21.7352, "step": 5304 }, { "epoch": 0.4923433874709977, "grad_norm": 36.233985900878906, "learning_rate": 5.540647698450062e-06, "loss": 24.4107, "step": 5305 }, { "epoch": 0.49243619489559165, "grad_norm": 33.97433090209961, "learning_rate": 5.539122090308021e-06, "loss": 24.0174, "step": 5306 }, { "epoch": 0.4925290023201856, "grad_norm": 34.77583312988281, "learning_rate": 5.5375964313820496e-06, "loss": 22.3685, "step": 5307 }, { "epoch": 0.4926218097447796, "grad_norm": 36.62163543701172, "learning_rate": 5.536070721815861e-06, "loss": 22.7023, "step": 5308 }, { "epoch": 0.49271461716937354, "grad_norm": 37.522178649902344, "learning_rate": 5.5345449617531724e-06, "loss": 23.0245, "step": 5309 }, { "epoch": 0.49280742459396754, "grad_norm": 36.76591110229492, "learning_rate": 5.533019151337706e-06, "loss": 22.9114, "step": 5310 }, { "epoch": 0.4929002320185615, "grad_norm": 39.28584289550781, "learning_rate": 5.53149329071319e-06, "loss": 22.7085, "step": 5311 }, { "epoch": 0.4929930394431555, "grad_norm": 38.07469940185547, "learning_rate": 5.529967380023358e-06, "loss": 22.4976, "step": 5312 }, { "epoch": 0.4930858468677494, "grad_norm": 36.2829704284668, "learning_rate": 5.528441419411945e-06, "loss": 23.0217, "step": 5313 }, { "epoch": 0.49317865429234337, "grad_norm": 37.642173767089844, "learning_rate": 5.526915409022692e-06, "loss": 22.4123, "step": 5314 }, { "epoch": 0.49327146171693736, "grad_norm": 39.89702224731445, "learning_rate": 5.525389348999347e-06, "loss": 25.6639, "step": 5315 }, { "epoch": 0.4933642691415313, "grad_norm": 40.383140563964844, "learning_rate": 5.523863239485661e-06, "loss": 22.3947, "step": 5316 }, { "epoch": 0.4934570765661253, "grad_norm": 41.093177795410156, "learning_rate": 5.522337080625388e-06, "loss": 24.5552, "step": 5317 }, { "epoch": 0.49354988399071925, "grad_norm": 37.06044387817383, "learning_rate": 5.520810872562289e-06, "loss": 23.5585, "step": 5318 }, { "epoch": 0.49364269141531325, "grad_norm": 41.46257781982422, "learning_rate": 5.519284615440129e-06, "loss": 25.6226, "step": 5319 }, { "epoch": 0.4937354988399072, "grad_norm": 38.28852844238281, "learning_rate": 5.517758309402679e-06, "loss": 23.0914, "step": 5320 }, { "epoch": 0.49382830626450114, "grad_norm": 37.41178512573242, "learning_rate": 5.516231954593711e-06, "loss": 23.2572, "step": 5321 }, { "epoch": 0.49392111368909514, "grad_norm": 40.73159408569336, "learning_rate": 5.514705551157005e-06, "loss": 24.6688, "step": 5322 }, { "epoch": 0.4940139211136891, "grad_norm": 36.286407470703125, "learning_rate": 5.513179099236344e-06, "loss": 23.2606, "step": 5323 }, { "epoch": 0.4941067285382831, "grad_norm": 39.84872817993164, "learning_rate": 5.511652598975516e-06, "loss": 24.0367, "step": 5324 }, { "epoch": 0.494199535962877, "grad_norm": 37.58147048950195, "learning_rate": 5.510126050518314e-06, "loss": 22.9614, "step": 5325 }, { "epoch": 0.494292343387471, "grad_norm": 47.91918182373047, "learning_rate": 5.508599454008532e-06, "loss": 23.5233, "step": 5326 }, { "epoch": 0.49438515081206497, "grad_norm": 36.37141036987305, "learning_rate": 5.507072809589976e-06, "loss": 24.0836, "step": 5327 }, { "epoch": 0.4944779582366589, "grad_norm": 36.04533386230469, "learning_rate": 5.505546117406449e-06, "loss": 24.6359, "step": 5328 }, { "epoch": 0.4945707656612529, "grad_norm": 38.3391227722168, "learning_rate": 5.504019377601762e-06, "loss": 22.5838, "step": 5329 }, { "epoch": 0.49466357308584685, "grad_norm": 37.687767028808594, "learning_rate": 5.502492590319731e-06, "loss": 23.3691, "step": 5330 }, { "epoch": 0.49475638051044085, "grad_norm": 38.31565856933594, "learning_rate": 5.500965755704175e-06, "loss": 23.767, "step": 5331 }, { "epoch": 0.4948491879350348, "grad_norm": 37.247772216796875, "learning_rate": 5.499438873898919e-06, "loss": 25.9072, "step": 5332 }, { "epoch": 0.4949419953596288, "grad_norm": 35.39228820800781, "learning_rate": 5.497911945047787e-06, "loss": 23.2503, "step": 5333 }, { "epoch": 0.49503480278422274, "grad_norm": 37.81028747558594, "learning_rate": 5.496384969294617e-06, "loss": 22.7289, "step": 5334 }, { "epoch": 0.4951276102088167, "grad_norm": 36.78261184692383, "learning_rate": 5.494857946783245e-06, "loss": 22.8659, "step": 5335 }, { "epoch": 0.4952204176334107, "grad_norm": 36.34953689575195, "learning_rate": 5.49333087765751e-06, "loss": 22.0988, "step": 5336 }, { "epoch": 0.4953132250580046, "grad_norm": 34.632686614990234, "learning_rate": 5.49180376206126e-06, "loss": 22.396, "step": 5337 }, { "epoch": 0.4954060324825986, "grad_norm": 44.9306640625, "learning_rate": 5.490276600138346e-06, "loss": 23.6328, "step": 5338 }, { "epoch": 0.49549883990719257, "grad_norm": 49.34177780151367, "learning_rate": 5.488749392032623e-06, "loss": 25.5049, "step": 5339 }, { "epoch": 0.49559164733178657, "grad_norm": 35.64152526855469, "learning_rate": 5.487222137887949e-06, "loss": 24.6558, "step": 5340 }, { "epoch": 0.4956844547563805, "grad_norm": 37.482276916503906, "learning_rate": 5.485694837848187e-06, "loss": 25.3279, "step": 5341 }, { "epoch": 0.49577726218097445, "grad_norm": 36.93852615356445, "learning_rate": 5.484167492057204e-06, "loss": 24.6547, "step": 5342 }, { "epoch": 0.49587006960556845, "grad_norm": 39.177886962890625, "learning_rate": 5.482640100658876e-06, "loss": 23.5503, "step": 5343 }, { "epoch": 0.4959628770301624, "grad_norm": 39.96709442138672, "learning_rate": 5.481112663797076e-06, "loss": 23.9424, "step": 5344 }, { "epoch": 0.4960556844547564, "grad_norm": 37.51043701171875, "learning_rate": 5.479585181615684e-06, "loss": 23.5157, "step": 5345 }, { "epoch": 0.49614849187935034, "grad_norm": 34.297088623046875, "learning_rate": 5.478057654258588e-06, "loss": 23.4926, "step": 5346 }, { "epoch": 0.49624129930394434, "grad_norm": 39.712493896484375, "learning_rate": 5.476530081869677e-06, "loss": 24.4122, "step": 5347 }, { "epoch": 0.4963341067285383, "grad_norm": 36.330726623535156, "learning_rate": 5.475002464592842e-06, "loss": 23.7421, "step": 5348 }, { "epoch": 0.4964269141531322, "grad_norm": 35.808135986328125, "learning_rate": 5.473474802571981e-06, "loss": 22.421, "step": 5349 }, { "epoch": 0.4965197215777262, "grad_norm": 35.220733642578125, "learning_rate": 5.471947095950998e-06, "loss": 22.4153, "step": 5350 }, { "epoch": 0.49661252900232017, "grad_norm": 37.44393539428711, "learning_rate": 5.4704193448737975e-06, "loss": 24.1607, "step": 5351 }, { "epoch": 0.49670533642691417, "grad_norm": 36.66172409057617, "learning_rate": 5.4688915494842886e-06, "loss": 22.1649, "step": 5352 }, { "epoch": 0.4967981438515081, "grad_norm": 41.87100601196289, "learning_rate": 5.467363709926389e-06, "loss": 24.556, "step": 5353 }, { "epoch": 0.4968909512761021, "grad_norm": 38.22198486328125, "learning_rate": 5.465835826344016e-06, "loss": 23.8547, "step": 5354 }, { "epoch": 0.49698375870069605, "grad_norm": 35.20298767089844, "learning_rate": 5.46430789888109e-06, "loss": 20.4428, "step": 5355 }, { "epoch": 0.49707656612529, "grad_norm": 91.78834533691406, "learning_rate": 5.462779927681541e-06, "loss": 23.3432, "step": 5356 }, { "epoch": 0.497169373549884, "grad_norm": 46.468421936035156, "learning_rate": 5.461251912889298e-06, "loss": 24.0068, "step": 5357 }, { "epoch": 0.49726218097447794, "grad_norm": 43.938575744628906, "learning_rate": 5.459723854648297e-06, "loss": 23.1963, "step": 5358 }, { "epoch": 0.49735498839907194, "grad_norm": 40.19114303588867, "learning_rate": 5.458195753102477e-06, "loss": 24.2364, "step": 5359 }, { "epoch": 0.4974477958236659, "grad_norm": 40.009178161621094, "learning_rate": 5.456667608395783e-06, "loss": 23.8025, "step": 5360 }, { "epoch": 0.4975406032482599, "grad_norm": 36.04215621948242, "learning_rate": 5.455139420672158e-06, "loss": 23.4901, "step": 5361 }, { "epoch": 0.4976334106728538, "grad_norm": 44.24441909790039, "learning_rate": 5.453611190075557e-06, "loss": 23.0412, "step": 5362 }, { "epoch": 0.49772621809744777, "grad_norm": 42.9021110534668, "learning_rate": 5.452082916749936e-06, "loss": 21.0089, "step": 5363 }, { "epoch": 0.49781902552204177, "grad_norm": 38.422447204589844, "learning_rate": 5.450554600839251e-06, "loss": 21.8242, "step": 5364 }, { "epoch": 0.4979118329466357, "grad_norm": 41.07073974609375, "learning_rate": 5.449026242487468e-06, "loss": 22.2702, "step": 5365 }, { "epoch": 0.4980046403712297, "grad_norm": 39.78812789916992, "learning_rate": 5.447497841838555e-06, "loss": 22.9784, "step": 5366 }, { "epoch": 0.49809744779582366, "grad_norm": 42.507293701171875, "learning_rate": 5.445969399036481e-06, "loss": 24.0134, "step": 5367 }, { "epoch": 0.49819025522041765, "grad_norm": 41.440372467041016, "learning_rate": 5.444440914225225e-06, "loss": 24.6807, "step": 5368 }, { "epoch": 0.4982830626450116, "grad_norm": 33.65693283081055, "learning_rate": 5.442912387548762e-06, "loss": 23.4746, "step": 5369 }, { "epoch": 0.49837587006960554, "grad_norm": 39.304359436035156, "learning_rate": 5.4413838191510785e-06, "loss": 23.5929, "step": 5370 }, { "epoch": 0.49846867749419954, "grad_norm": 37.06089782714844, "learning_rate": 5.439855209176161e-06, "loss": 22.4327, "step": 5371 }, { "epoch": 0.4985614849187935, "grad_norm": 36.767513275146484, "learning_rate": 5.438326557768e-06, "loss": 22.9311, "step": 5372 }, { "epoch": 0.4986542923433875, "grad_norm": 38.299949645996094, "learning_rate": 5.436797865070591e-06, "loss": 23.1138, "step": 5373 }, { "epoch": 0.4987470997679814, "grad_norm": 41.64824295043945, "learning_rate": 5.4352691312279326e-06, "loss": 23.5691, "step": 5374 }, { "epoch": 0.4988399071925754, "grad_norm": 43.324546813964844, "learning_rate": 5.43374035638403e-06, "loss": 25.1606, "step": 5375 }, { "epoch": 0.49893271461716937, "grad_norm": 40.96958541870117, "learning_rate": 5.432211540682887e-06, "loss": 23.481, "step": 5376 }, { "epoch": 0.4990255220417633, "grad_norm": 38.58354568481445, "learning_rate": 5.4306826842685135e-06, "loss": 24.0525, "step": 5377 }, { "epoch": 0.4991183294663573, "grad_norm": 41.94508743286133, "learning_rate": 5.429153787284927e-06, "loss": 24.4911, "step": 5378 }, { "epoch": 0.49921113689095126, "grad_norm": 37.06912612915039, "learning_rate": 5.427624849876146e-06, "loss": 23.7187, "step": 5379 }, { "epoch": 0.49930394431554526, "grad_norm": 41.687198638916016, "learning_rate": 5.426095872186188e-06, "loss": 22.7591, "step": 5380 }, { "epoch": 0.4993967517401392, "grad_norm": 41.546409606933594, "learning_rate": 5.424566854359083e-06, "loss": 23.4177, "step": 5381 }, { "epoch": 0.4994895591647332, "grad_norm": 38.1724853515625, "learning_rate": 5.423037796538858e-06, "loss": 22.5385, "step": 5382 }, { "epoch": 0.49958236658932714, "grad_norm": 39.68191909790039, "learning_rate": 5.4215086988695485e-06, "loss": 25.3322, "step": 5383 }, { "epoch": 0.4996751740139211, "grad_norm": 35.302608489990234, "learning_rate": 5.41997956149519e-06, "loss": 22.411, "step": 5384 }, { "epoch": 0.4997679814385151, "grad_norm": 36.749420166015625, "learning_rate": 5.418450384559825e-06, "loss": 23.5832, "step": 5385 }, { "epoch": 0.49986078886310903, "grad_norm": 39.52276611328125, "learning_rate": 5.416921168207496e-06, "loss": 22.6436, "step": 5386 }, { "epoch": 0.49995359628770303, "grad_norm": 53.04669189453125, "learning_rate": 5.4153919125822515e-06, "loss": 24.3506, "step": 5387 }, { "epoch": 0.500046403712297, "grad_norm": 38.24148941040039, "learning_rate": 5.413862617828147e-06, "loss": 25.2291, "step": 5388 }, { "epoch": 0.500139211136891, "grad_norm": 37.46637725830078, "learning_rate": 5.412333284089234e-06, "loss": 23.3887, "step": 5389 }, { "epoch": 0.5002320185614849, "grad_norm": 42.85343551635742, "learning_rate": 5.4108039115095736e-06, "loss": 23.1682, "step": 5390 }, { "epoch": 0.5003248259860789, "grad_norm": 35.16429138183594, "learning_rate": 5.409274500233228e-06, "loss": 23.252, "step": 5391 }, { "epoch": 0.5004176334106728, "grad_norm": 37.231998443603516, "learning_rate": 5.4077450504042664e-06, "loss": 21.5082, "step": 5392 }, { "epoch": 0.5005104408352669, "grad_norm": 41.03461837768555, "learning_rate": 5.406215562166755e-06, "loss": 22.0005, "step": 5393 }, { "epoch": 0.5006032482598608, "grad_norm": 41.755775451660156, "learning_rate": 5.4046860356647705e-06, "loss": 24.9087, "step": 5394 }, { "epoch": 0.5006960556844547, "grad_norm": 37.90180206298828, "learning_rate": 5.40315647104239e-06, "loss": 23.3802, "step": 5395 }, { "epoch": 0.5007888631090487, "grad_norm": 34.02543640136719, "learning_rate": 5.401626868443693e-06, "loss": 23.9973, "step": 5396 }, { "epoch": 0.5008816705336427, "grad_norm": 36.438350677490234, "learning_rate": 5.400097228012768e-06, "loss": 23.6165, "step": 5397 }, { "epoch": 0.5009744779582367, "grad_norm": 37.09170913696289, "learning_rate": 5.3985675498937e-06, "loss": 22.2234, "step": 5398 }, { "epoch": 0.5010672853828306, "grad_norm": 40.238983154296875, "learning_rate": 5.397037834230581e-06, "loss": 22.2365, "step": 5399 }, { "epoch": 0.5011600928074246, "grad_norm": 37.045536041259766, "learning_rate": 5.395508081167506e-06, "loss": 22.5975, "step": 5400 }, { "epoch": 0.5012529002320185, "grad_norm": 38.4847526550293, "learning_rate": 5.393978290848577e-06, "loss": 23.3427, "step": 5401 }, { "epoch": 0.5013457076566126, "grad_norm": 36.44441223144531, "learning_rate": 5.392448463417893e-06, "loss": 21.5069, "step": 5402 }, { "epoch": 0.5014385150812065, "grad_norm": 37.94658279418945, "learning_rate": 5.39091859901956e-06, "loss": 22.4896, "step": 5403 }, { "epoch": 0.5015313225058005, "grad_norm": 37.5793571472168, "learning_rate": 5.3893886977976895e-06, "loss": 22.8639, "step": 5404 }, { "epoch": 0.5016241299303944, "grad_norm": 38.7351188659668, "learning_rate": 5.3878587598963935e-06, "loss": 23.5849, "step": 5405 }, { "epoch": 0.5017169373549883, "grad_norm": 36.21440887451172, "learning_rate": 5.3863287854597865e-06, "loss": 23.1554, "step": 5406 }, { "epoch": 0.5018097447795824, "grad_norm": 40.36082077026367, "learning_rate": 5.3847987746319904e-06, "loss": 22.9554, "step": 5407 }, { "epoch": 0.5019025522041763, "grad_norm": 39.24043273925781, "learning_rate": 5.383268727557127e-06, "loss": 22.3016, "step": 5408 }, { "epoch": 0.5019953596287703, "grad_norm": 33.7513427734375, "learning_rate": 5.381738644379324e-06, "loss": 23.232, "step": 5409 }, { "epoch": 0.5020881670533642, "grad_norm": 36.17705535888672, "learning_rate": 5.3802085252427114e-06, "loss": 22.0228, "step": 5410 }, { "epoch": 0.5021809744779583, "grad_norm": 39.5678825378418, "learning_rate": 5.37867837029142e-06, "loss": 22.8999, "step": 5411 }, { "epoch": 0.5022737819025522, "grad_norm": 38.94813537597656, "learning_rate": 5.37714817966959e-06, "loss": 23.4254, "step": 5412 }, { "epoch": 0.5023665893271462, "grad_norm": 35.82416915893555, "learning_rate": 5.375617953521358e-06, "loss": 22.0499, "step": 5413 }, { "epoch": 0.5024593967517401, "grad_norm": 43.587974548339844, "learning_rate": 5.374087691990871e-06, "loss": 23.3946, "step": 5414 }, { "epoch": 0.5025522041763341, "grad_norm": 34.674922943115234, "learning_rate": 5.372557395222273e-06, "loss": 22.3192, "step": 5415 }, { "epoch": 0.5026450116009281, "grad_norm": 36.306705474853516, "learning_rate": 5.371027063359715e-06, "loss": 23.4343, "step": 5416 }, { "epoch": 0.5027378190255221, "grad_norm": 34.52361297607422, "learning_rate": 5.369496696547352e-06, "loss": 24.1501, "step": 5417 }, { "epoch": 0.502830626450116, "grad_norm": 36.446739196777344, "learning_rate": 5.367966294929337e-06, "loss": 23.0434, "step": 5418 }, { "epoch": 0.50292343387471, "grad_norm": 37.75642013549805, "learning_rate": 5.366435858649833e-06, "loss": 23.5243, "step": 5419 }, { "epoch": 0.5030162412993039, "grad_norm": 39.132171630859375, "learning_rate": 5.364905387853003e-06, "loss": 22.128, "step": 5420 }, { "epoch": 0.503109048723898, "grad_norm": 38.25633239746094, "learning_rate": 5.363374882683012e-06, "loss": 23.957, "step": 5421 }, { "epoch": 0.5032018561484919, "grad_norm": 34.32107925415039, "learning_rate": 5.361844343284029e-06, "loss": 23.2045, "step": 5422 }, { "epoch": 0.5032946635730858, "grad_norm": 34.22970962524414, "learning_rate": 5.360313769800231e-06, "loss": 23.1081, "step": 5423 }, { "epoch": 0.5033874709976798, "grad_norm": 42.13739776611328, "learning_rate": 5.35878316237579e-06, "loss": 23.6727, "step": 5424 }, { "epoch": 0.5034802784222738, "grad_norm": 40.41043472290039, "learning_rate": 5.357252521154886e-06, "loss": 25.419, "step": 5425 }, { "epoch": 0.5035730858468678, "grad_norm": 38.6887092590332, "learning_rate": 5.355721846281704e-06, "loss": 22.4086, "step": 5426 }, { "epoch": 0.5036658932714617, "grad_norm": 35.105751037597656, "learning_rate": 5.354191137900427e-06, "loss": 21.6433, "step": 5427 }, { "epoch": 0.5037587006960557, "grad_norm": 39.46664810180664, "learning_rate": 5.3526603961552445e-06, "loss": 24.5598, "step": 5428 }, { "epoch": 0.5038515081206496, "grad_norm": 37.37196731567383, "learning_rate": 5.3511296211903496e-06, "loss": 24.1775, "step": 5429 }, { "epoch": 0.5039443155452437, "grad_norm": 44.51808166503906, "learning_rate": 5.349598813149937e-06, "loss": 22.8483, "step": 5430 }, { "epoch": 0.5040371229698376, "grad_norm": 36.839210510253906, "learning_rate": 5.348067972178204e-06, "loss": 22.8168, "step": 5431 }, { "epoch": 0.5041299303944315, "grad_norm": 39.071128845214844, "learning_rate": 5.346537098419352e-06, "loss": 23.4641, "step": 5432 }, { "epoch": 0.5042227378190255, "grad_norm": 39.44913101196289, "learning_rate": 5.345006192017587e-06, "loss": 22.9638, "step": 5433 }, { "epoch": 0.5043155452436194, "grad_norm": 37.65711212158203, "learning_rate": 5.343475253117114e-06, "loss": 23.1427, "step": 5434 }, { "epoch": 0.5044083526682135, "grad_norm": 38.505332946777344, "learning_rate": 5.341944281862146e-06, "loss": 24.0125, "step": 5435 }, { "epoch": 0.5045011600928074, "grad_norm": 35.89242172241211, "learning_rate": 5.340413278396896e-06, "loss": 24.9561, "step": 5436 }, { "epoch": 0.5045939675174014, "grad_norm": 33.924259185791016, "learning_rate": 5.338882242865578e-06, "loss": 21.6463, "step": 5437 }, { "epoch": 0.5046867749419953, "grad_norm": 37.629554748535156, "learning_rate": 5.337351175412416e-06, "loss": 23.6915, "step": 5438 }, { "epoch": 0.5047795823665894, "grad_norm": 36.79233932495117, "learning_rate": 5.3358200761816295e-06, "loss": 23.2074, "step": 5439 }, { "epoch": 0.5048723897911833, "grad_norm": 38.65776443481445, "learning_rate": 5.334288945317445e-06, "loss": 24.2026, "step": 5440 }, { "epoch": 0.5049651972157773, "grad_norm": 34.77245330810547, "learning_rate": 5.332757782964091e-06, "loss": 22.6035, "step": 5441 }, { "epoch": 0.5050580046403712, "grad_norm": 38.726234436035156, "learning_rate": 5.331226589265801e-06, "loss": 25.6223, "step": 5442 }, { "epoch": 0.5051508120649651, "grad_norm": 42.430999755859375, "learning_rate": 5.329695364366807e-06, "loss": 23.0487, "step": 5443 }, { "epoch": 0.5052436194895592, "grad_norm": 36.535438537597656, "learning_rate": 5.328164108411347e-06, "loss": 23.7377, "step": 5444 }, { "epoch": 0.5053364269141531, "grad_norm": 39.51873016357422, "learning_rate": 5.3266328215436625e-06, "loss": 21.1181, "step": 5445 }, { "epoch": 0.5054292343387471, "grad_norm": 46.854862213134766, "learning_rate": 5.3251015039079966e-06, "loss": 23.25, "step": 5446 }, { "epoch": 0.505522041763341, "grad_norm": 37.358707427978516, "learning_rate": 5.323570155648593e-06, "loss": 24.9432, "step": 5447 }, { "epoch": 0.5056148491879351, "grad_norm": 40.09843063354492, "learning_rate": 5.322038776909705e-06, "loss": 22.7394, "step": 5448 }, { "epoch": 0.505707656612529, "grad_norm": 43.058406829833984, "learning_rate": 5.320507367835582e-06, "loss": 23.6189, "step": 5449 }, { "epoch": 0.505800464037123, "grad_norm": 38.59223937988281, "learning_rate": 5.3189759285704785e-06, "loss": 23.7313, "step": 5450 }, { "epoch": 0.5058932714617169, "grad_norm": 36.768795013427734, "learning_rate": 5.317444459258655e-06, "loss": 23.1101, "step": 5451 }, { "epoch": 0.5059860788863109, "grad_norm": 39.621421813964844, "learning_rate": 5.315912960044369e-06, "loss": 24.0301, "step": 5452 }, { "epoch": 0.5060788863109049, "grad_norm": 38.6140022277832, "learning_rate": 5.314381431071885e-06, "loss": 23.3847, "step": 5453 }, { "epoch": 0.5061716937354989, "grad_norm": 40.508548736572266, "learning_rate": 5.312849872485468e-06, "loss": 23.9754, "step": 5454 }, { "epoch": 0.5062645011600928, "grad_norm": 45.70331573486328, "learning_rate": 5.311318284429389e-06, "loss": 24.3598, "step": 5455 }, { "epoch": 0.5063573085846867, "grad_norm": 36.8646125793457, "learning_rate": 5.30978666704792e-06, "loss": 22.9418, "step": 5456 }, { "epoch": 0.5064501160092807, "grad_norm": 41.4665641784668, "learning_rate": 5.3082550204853325e-06, "loss": 23.9774, "step": 5457 }, { "epoch": 0.5065429234338747, "grad_norm": 38.37295913696289, "learning_rate": 5.3067233448859045e-06, "loss": 23.4113, "step": 5458 }, { "epoch": 0.5066357308584687, "grad_norm": 36.002254486083984, "learning_rate": 5.305191640393918e-06, "loss": 23.0164, "step": 5459 }, { "epoch": 0.5067285382830626, "grad_norm": 38.983272552490234, "learning_rate": 5.303659907153654e-06, "loss": 24.2836, "step": 5460 }, { "epoch": 0.5068213457076566, "grad_norm": 39.09098815917969, "learning_rate": 5.302128145309399e-06, "loss": 23.7815, "step": 5461 }, { "epoch": 0.5069141531322506, "grad_norm": 36.34818649291992, "learning_rate": 5.300596355005441e-06, "loss": 22.8409, "step": 5462 }, { "epoch": 0.5070069605568446, "grad_norm": 40.700538635253906, "learning_rate": 5.299064536386068e-06, "loss": 22.7248, "step": 5463 }, { "epoch": 0.5070997679814385, "grad_norm": 42.16246795654297, "learning_rate": 5.297532689595577e-06, "loss": 24.3337, "step": 5464 }, { "epoch": 0.5071925754060325, "grad_norm": 40.37421417236328, "learning_rate": 5.296000814778264e-06, "loss": 22.9368, "step": 5465 }, { "epoch": 0.5072853828306264, "grad_norm": 37.9775276184082, "learning_rate": 5.294468912078424e-06, "loss": 22.6077, "step": 5466 }, { "epoch": 0.5073781902552205, "grad_norm": 36.78367233276367, "learning_rate": 5.292936981640362e-06, "loss": 23.4014, "step": 5467 }, { "epoch": 0.5074709976798144, "grad_norm": 41.4825439453125, "learning_rate": 5.291405023608381e-06, "loss": 23.4373, "step": 5468 }, { "epoch": 0.5075638051044083, "grad_norm": 37.361202239990234, "learning_rate": 5.289873038126786e-06, "loss": 22.7779, "step": 5469 }, { "epoch": 0.5076566125290023, "grad_norm": 42.498809814453125, "learning_rate": 5.288341025339888e-06, "loss": 22.1454, "step": 5470 }, { "epoch": 0.5077494199535962, "grad_norm": 41.69169616699219, "learning_rate": 5.286808985391999e-06, "loss": 24.1538, "step": 5471 }, { "epoch": 0.5078422273781903, "grad_norm": 35.48735427856445, "learning_rate": 5.285276918427432e-06, "loss": 23.0412, "step": 5472 }, { "epoch": 0.5079350348027842, "grad_norm": 40.23234939575195, "learning_rate": 5.283744824590503e-06, "loss": 25.0649, "step": 5473 }, { "epoch": 0.5080278422273782, "grad_norm": 44.21879577636719, "learning_rate": 5.282212704025533e-06, "loss": 22.6453, "step": 5474 }, { "epoch": 0.5081206496519721, "grad_norm": 36.348384857177734, "learning_rate": 5.2806805568768446e-06, "loss": 23.421, "step": 5475 }, { "epoch": 0.5082134570765662, "grad_norm": 37.96224594116211, "learning_rate": 5.279148383288759e-06, "loss": 22.5896, "step": 5476 }, { "epoch": 0.5083062645011601, "grad_norm": 39.23067092895508, "learning_rate": 5.277616183405607e-06, "loss": 24.9951, "step": 5477 }, { "epoch": 0.5083990719257541, "grad_norm": 34.720977783203125, "learning_rate": 5.276083957371716e-06, "loss": 24.3562, "step": 5478 }, { "epoch": 0.508491879350348, "grad_norm": 37.293968200683594, "learning_rate": 5.274551705331415e-06, "loss": 22.1956, "step": 5479 }, { "epoch": 0.508584686774942, "grad_norm": 40.26507568359375, "learning_rate": 5.273019427429044e-06, "loss": 23.6159, "step": 5480 }, { "epoch": 0.508677494199536, "grad_norm": 37.85521697998047, "learning_rate": 5.271487123808936e-06, "loss": 23.0359, "step": 5481 }, { "epoch": 0.50877030162413, "grad_norm": 36.45867156982422, "learning_rate": 5.269954794615429e-06, "loss": 22.4897, "step": 5482 }, { "epoch": 0.5088631090487239, "grad_norm": 33.51345443725586, "learning_rate": 5.268422439992868e-06, "loss": 23.3538, "step": 5483 }, { "epoch": 0.5089559164733178, "grad_norm": 38.18270492553711, "learning_rate": 5.2668900600855955e-06, "loss": 21.8952, "step": 5484 }, { "epoch": 0.5090487238979118, "grad_norm": 44.59113693237305, "learning_rate": 5.265357655037956e-06, "loss": 24.6936, "step": 5485 }, { "epoch": 0.5091415313225058, "grad_norm": 41.609275817871094, "learning_rate": 5.263825224994301e-06, "loss": 25.1619, "step": 5486 }, { "epoch": 0.5092343387470998, "grad_norm": 38.96906661987305, "learning_rate": 5.26229277009898e-06, "loss": 25.4379, "step": 5487 }, { "epoch": 0.5093271461716937, "grad_norm": 37.49117660522461, "learning_rate": 5.260760290496346e-06, "loss": 23.1566, "step": 5488 }, { "epoch": 0.5094199535962877, "grad_norm": 38.55168533325195, "learning_rate": 5.259227786330756e-06, "loss": 25.2488, "step": 5489 }, { "epoch": 0.5095127610208817, "grad_norm": 39.024513244628906, "learning_rate": 5.257695257746567e-06, "loss": 24.4539, "step": 5490 }, { "epoch": 0.5096055684454757, "grad_norm": 42.44103240966797, "learning_rate": 5.2561627048881394e-06, "loss": 23.7074, "step": 5491 }, { "epoch": 0.5096983758700696, "grad_norm": 41.1716423034668, "learning_rate": 5.2546301278998355e-06, "loss": 23.0152, "step": 5492 }, { "epoch": 0.5097911832946636, "grad_norm": 34.121337890625, "learning_rate": 5.253097526926021e-06, "loss": 20.9714, "step": 5493 }, { "epoch": 0.5098839907192575, "grad_norm": 40.13017654418945, "learning_rate": 5.251564902111063e-06, "loss": 22.8263, "step": 5494 }, { "epoch": 0.5099767981438516, "grad_norm": 36.4417724609375, "learning_rate": 5.250032253599329e-06, "loss": 22.0782, "step": 5495 }, { "epoch": 0.5100696055684455, "grad_norm": 38.5860710144043, "learning_rate": 5.248499581535193e-06, "loss": 25.3248, "step": 5496 }, { "epoch": 0.5101624129930394, "grad_norm": 35.98112487792969, "learning_rate": 5.246966886063029e-06, "loss": 22.5775, "step": 5497 }, { "epoch": 0.5102552204176334, "grad_norm": 40.69144821166992, "learning_rate": 5.24543416732721e-06, "loss": 23.1874, "step": 5498 }, { "epoch": 0.5103480278422273, "grad_norm": 36.59828567504883, "learning_rate": 5.243901425472117e-06, "loss": 23.3967, "step": 5499 }, { "epoch": 0.5104408352668214, "grad_norm": 37.03846740722656, "learning_rate": 5.242368660642128e-06, "loss": 24.2475, "step": 5500 }, { "epoch": 0.5105336426914153, "grad_norm": 35.882816314697266, "learning_rate": 5.2408358729816286e-06, "loss": 24.3554, "step": 5501 }, { "epoch": 0.5106264501160093, "grad_norm": 35.19126892089844, "learning_rate": 5.239303062635001e-06, "loss": 22.902, "step": 5502 }, { "epoch": 0.5107192575406032, "grad_norm": 36.92741775512695, "learning_rate": 5.2377702297466345e-06, "loss": 24.0096, "step": 5503 }, { "epoch": 0.5108120649651973, "grad_norm": 34.6469841003418, "learning_rate": 5.2362373744609166e-06, "loss": 22.6684, "step": 5504 }, { "epoch": 0.5109048723897912, "grad_norm": 36.857513427734375, "learning_rate": 5.234704496922237e-06, "loss": 22.8839, "step": 5505 }, { "epoch": 0.5109976798143852, "grad_norm": 35.10027313232422, "learning_rate": 5.2331715972749904e-06, "loss": 22.2766, "step": 5506 }, { "epoch": 0.5110904872389791, "grad_norm": 35.332035064697266, "learning_rate": 5.231638675663573e-06, "loss": 22.6706, "step": 5507 }, { "epoch": 0.511183294663573, "grad_norm": 44.24470520019531, "learning_rate": 5.2301057322323786e-06, "loss": 23.8302, "step": 5508 }, { "epoch": 0.5112761020881671, "grad_norm": 40.074867248535156, "learning_rate": 5.228572767125812e-06, "loss": 22.6441, "step": 5509 }, { "epoch": 0.511368909512761, "grad_norm": 37.62450408935547, "learning_rate": 5.22703978048827e-06, "loss": 22.9692, "step": 5510 }, { "epoch": 0.511461716937355, "grad_norm": 37.455501556396484, "learning_rate": 5.225506772464156e-06, "loss": 22.8008, "step": 5511 }, { "epoch": 0.5115545243619489, "grad_norm": 46.5788688659668, "learning_rate": 5.223973743197878e-06, "loss": 22.9492, "step": 5512 }, { "epoch": 0.5116473317865429, "grad_norm": 42.37410354614258, "learning_rate": 5.222440692833845e-06, "loss": 23.3737, "step": 5513 }, { "epoch": 0.5117401392111369, "grad_norm": 38.503562927246094, "learning_rate": 5.220907621516461e-06, "loss": 23.0708, "step": 5514 }, { "epoch": 0.5118329466357309, "grad_norm": 37.972633361816406, "learning_rate": 5.219374529390142e-06, "loss": 24.0889, "step": 5515 }, { "epoch": 0.5119257540603248, "grad_norm": 42.89044952392578, "learning_rate": 5.217841416599299e-06, "loss": 22.8473, "step": 5516 }, { "epoch": 0.5120185614849188, "grad_norm": 42.963680267333984, "learning_rate": 5.216308283288349e-06, "loss": 23.988, "step": 5517 }, { "epoch": 0.5121113689095128, "grad_norm": 37.24037551879883, "learning_rate": 5.214775129601707e-06, "loss": 25.2586, "step": 5518 }, { "epoch": 0.5122041763341068, "grad_norm": 46.803192138671875, "learning_rate": 5.213241955683795e-06, "loss": 23.3442, "step": 5519 }, { "epoch": 0.5122969837587007, "grad_norm": 43.61336898803711, "learning_rate": 5.211708761679031e-06, "loss": 23.8314, "step": 5520 }, { "epoch": 0.5123897911832946, "grad_norm": 38.763607025146484, "learning_rate": 5.210175547731841e-06, "loss": 23.4695, "step": 5521 }, { "epoch": 0.5124825986078886, "grad_norm": 33.29350662231445, "learning_rate": 5.208642313986649e-06, "loss": 21.6388, "step": 5522 }, { "epoch": 0.5125754060324826, "grad_norm": 38.2258186340332, "learning_rate": 5.207109060587878e-06, "loss": 22.2351, "step": 5523 }, { "epoch": 0.5126682134570766, "grad_norm": 36.556907653808594, "learning_rate": 5.205575787679963e-06, "loss": 23.1629, "step": 5524 }, { "epoch": 0.5127610208816705, "grad_norm": 36.85776138305664, "learning_rate": 5.20404249540733e-06, "loss": 22.0277, "step": 5525 }, { "epoch": 0.5128538283062645, "grad_norm": 37.43922424316406, "learning_rate": 5.2025091839144124e-06, "loss": 22.9175, "step": 5526 }, { "epoch": 0.5129466357308584, "grad_norm": 38.614139556884766, "learning_rate": 5.200975853345645e-06, "loss": 23.4925, "step": 5527 }, { "epoch": 0.5130394431554525, "grad_norm": 38.556819915771484, "learning_rate": 5.199442503845462e-06, "loss": 24.5245, "step": 5528 }, { "epoch": 0.5131322505800464, "grad_norm": 45.02572250366211, "learning_rate": 5.197909135558302e-06, "loss": 24.6348, "step": 5529 }, { "epoch": 0.5132250580046404, "grad_norm": 38.77293395996094, "learning_rate": 5.196375748628604e-06, "loss": 23.8459, "step": 5530 }, { "epoch": 0.5133178654292343, "grad_norm": 33.70772171020508, "learning_rate": 5.194842343200811e-06, "loss": 22.0853, "step": 5531 }, { "epoch": 0.5134106728538284, "grad_norm": 40.44572830200195, "learning_rate": 5.193308919419363e-06, "loss": 22.4724, "step": 5532 }, { "epoch": 0.5135034802784223, "grad_norm": 44.34925842285156, "learning_rate": 5.191775477428705e-06, "loss": 24.9702, "step": 5533 }, { "epoch": 0.5135962877030162, "grad_norm": 61.51597595214844, "learning_rate": 5.190242017373287e-06, "loss": 23.3111, "step": 5534 }, { "epoch": 0.5136890951276102, "grad_norm": 40.33662796020508, "learning_rate": 5.188708539397554e-06, "loss": 24.2919, "step": 5535 }, { "epoch": 0.5137819025522041, "grad_norm": 45.8161506652832, "learning_rate": 5.1871750436459535e-06, "loss": 21.9505, "step": 5536 }, { "epoch": 0.5138747099767982, "grad_norm": 42.64876937866211, "learning_rate": 5.1856415302629415e-06, "loss": 23.3457, "step": 5537 }, { "epoch": 0.5139675174013921, "grad_norm": 44.75569152832031, "learning_rate": 5.18410799939297e-06, "loss": 25.0005, "step": 5538 }, { "epoch": 0.5140603248259861, "grad_norm": 36.0825080871582, "learning_rate": 5.182574451180491e-06, "loss": 22.8328, "step": 5539 }, { "epoch": 0.51415313225058, "grad_norm": 45.44431686401367, "learning_rate": 5.181040885769963e-06, "loss": 23.799, "step": 5540 }, { "epoch": 0.5142459396751741, "grad_norm": 42.88694381713867, "learning_rate": 5.179507303305844e-06, "loss": 23.8653, "step": 5541 }, { "epoch": 0.514338747099768, "grad_norm": 37.70195770263672, "learning_rate": 5.177973703932593e-06, "loss": 24.1933, "step": 5542 }, { "epoch": 0.514431554524362, "grad_norm": 33.78532791137695, "learning_rate": 5.176440087794671e-06, "loss": 23.3266, "step": 5543 }, { "epoch": 0.5145243619489559, "grad_norm": 35.2850227355957, "learning_rate": 5.1749064550365414e-06, "loss": 23.9069, "step": 5544 }, { "epoch": 0.5146171693735498, "grad_norm": 35.547786712646484, "learning_rate": 5.173372805802668e-06, "loss": 23.8323, "step": 5545 }, { "epoch": 0.5147099767981439, "grad_norm": 36.75347137451172, "learning_rate": 5.171839140237517e-06, "loss": 23.211, "step": 5546 }, { "epoch": 0.5148027842227378, "grad_norm": 37.66685104370117, "learning_rate": 5.170305458485555e-06, "loss": 24.9361, "step": 5547 }, { "epoch": 0.5148955916473318, "grad_norm": 37.974609375, "learning_rate": 5.168771760691252e-06, "loss": 25.8343, "step": 5548 }, { "epoch": 0.5149883990719257, "grad_norm": 36.692710876464844, "learning_rate": 5.167238046999078e-06, "loss": 24.2401, "step": 5549 }, { "epoch": 0.5150812064965197, "grad_norm": 40.24166488647461, "learning_rate": 5.1657043175535045e-06, "loss": 23.4027, "step": 5550 }, { "epoch": 0.5151740139211137, "grad_norm": 40.404998779296875, "learning_rate": 5.164170572499007e-06, "loss": 21.8861, "step": 5551 }, { "epoch": 0.5152668213457077, "grad_norm": 39.513668060302734, "learning_rate": 5.162636811980057e-06, "loss": 23.248, "step": 5552 }, { "epoch": 0.5153596287703016, "grad_norm": 38.363136291503906, "learning_rate": 5.161103036141134e-06, "loss": 24.231, "step": 5553 }, { "epoch": 0.5154524361948956, "grad_norm": 65.86824798583984, "learning_rate": 5.159569245126713e-06, "loss": 21.8941, "step": 5554 }, { "epoch": 0.5155452436194896, "grad_norm": 37.09012985229492, "learning_rate": 5.158035439081274e-06, "loss": 21.9374, "step": 5555 }, { "epoch": 0.5156380510440836, "grad_norm": 38.700557708740234, "learning_rate": 5.156501618149301e-06, "loss": 24.7192, "step": 5556 }, { "epoch": 0.5157308584686775, "grad_norm": 37.85133361816406, "learning_rate": 5.154967782475272e-06, "loss": 22.1949, "step": 5557 }, { "epoch": 0.5158236658932714, "grad_norm": 35.64483642578125, "learning_rate": 5.153433932203671e-06, "loss": 23.4768, "step": 5558 }, { "epoch": 0.5159164733178654, "grad_norm": 43.15265655517578, "learning_rate": 5.151900067478983e-06, "loss": 25.1844, "step": 5559 }, { "epoch": 0.5160092807424594, "grad_norm": 37.51622772216797, "learning_rate": 5.1503661884456965e-06, "loss": 26.2221, "step": 5560 }, { "epoch": 0.5161020881670534, "grad_norm": 37.36909484863281, "learning_rate": 5.148832295248296e-06, "loss": 20.3237, "step": 5561 }, { "epoch": 0.5161948955916473, "grad_norm": 36.13814926147461, "learning_rate": 5.147298388031271e-06, "loss": 22.5113, "step": 5562 }, { "epoch": 0.5162877030162413, "grad_norm": 42.74090576171875, "learning_rate": 5.145764466939113e-06, "loss": 23.4809, "step": 5563 }, { "epoch": 0.5163805104408352, "grad_norm": 38.51095199584961, "learning_rate": 5.144230532116313e-06, "loss": 22.9175, "step": 5564 }, { "epoch": 0.5164733178654293, "grad_norm": 46.86982345581055, "learning_rate": 5.142696583707361e-06, "loss": 23.1482, "step": 5565 }, { "epoch": 0.5165661252900232, "grad_norm": 35.85587692260742, "learning_rate": 5.141162621856756e-06, "loss": 23.1524, "step": 5566 }, { "epoch": 0.5166589327146172, "grad_norm": 37.61322784423828, "learning_rate": 5.139628646708991e-06, "loss": 22.7479, "step": 5567 }, { "epoch": 0.5167517401392111, "grad_norm": 38.59726333618164, "learning_rate": 5.13809465840856e-06, "loss": 22.1684, "step": 5568 }, { "epoch": 0.5168445475638052, "grad_norm": 36.37924575805664, "learning_rate": 5.1365606570999634e-06, "loss": 22.5926, "step": 5569 }, { "epoch": 0.5169373549883991, "grad_norm": 40.0991096496582, "learning_rate": 5.135026642927702e-06, "loss": 22.6045, "step": 5570 }, { "epoch": 0.517030162412993, "grad_norm": 40.874595642089844, "learning_rate": 5.133492616036272e-06, "loss": 21.6674, "step": 5571 }, { "epoch": 0.517122969837587, "grad_norm": 65.94874572753906, "learning_rate": 5.1319585765701775e-06, "loss": 26.6723, "step": 5572 }, { "epoch": 0.5172157772621809, "grad_norm": 37.32643508911133, "learning_rate": 5.13042452467392e-06, "loss": 22.424, "step": 5573 }, { "epoch": 0.517308584686775, "grad_norm": 41.89629364013672, "learning_rate": 5.128890460492004e-06, "loss": 23.6876, "step": 5574 }, { "epoch": 0.5174013921113689, "grad_norm": 39.89307403564453, "learning_rate": 5.127356384168934e-06, "loss": 24.1389, "step": 5575 }, { "epoch": 0.5174941995359629, "grad_norm": 40.90850067138672, "learning_rate": 5.125822295849217e-06, "loss": 23.1952, "step": 5576 }, { "epoch": 0.5175870069605568, "grad_norm": 46.438323974609375, "learning_rate": 5.1242881956773585e-06, "loss": 22.7375, "step": 5577 }, { "epoch": 0.5176798143851508, "grad_norm": 44.951175689697266, "learning_rate": 5.1227540837978685e-06, "loss": 22.5468, "step": 5578 }, { "epoch": 0.5177726218097448, "grad_norm": 38.34579849243164, "learning_rate": 5.121219960355255e-06, "loss": 23.9447, "step": 5579 }, { "epoch": 0.5178654292343388, "grad_norm": 39.4112548828125, "learning_rate": 5.11968582549403e-06, "loss": 23.3152, "step": 5580 }, { "epoch": 0.5179582366589327, "grad_norm": 36.969112396240234, "learning_rate": 5.118151679358704e-06, "loss": 22.1375, "step": 5581 }, { "epoch": 0.5180510440835266, "grad_norm": 42.84189224243164, "learning_rate": 5.1166175220937895e-06, "loss": 24.7822, "step": 5582 }, { "epoch": 0.5181438515081207, "grad_norm": 38.768028259277344, "learning_rate": 5.115083353843802e-06, "loss": 26.1187, "step": 5583 }, { "epoch": 0.5182366589327146, "grad_norm": 40.20003890991211, "learning_rate": 5.113549174753253e-06, "loss": 23.637, "step": 5584 }, { "epoch": 0.5183294663573086, "grad_norm": 39.954681396484375, "learning_rate": 5.1120149849666624e-06, "loss": 23.2855, "step": 5585 }, { "epoch": 0.5184222737819025, "grad_norm": 40.13822937011719, "learning_rate": 5.110480784628544e-06, "loss": 23.5235, "step": 5586 }, { "epoch": 0.5185150812064965, "grad_norm": 37.44269561767578, "learning_rate": 5.108946573883415e-06, "loss": 23.2139, "step": 5587 }, { "epoch": 0.5186078886310905, "grad_norm": 39.00898361206055, "learning_rate": 5.107412352875798e-06, "loss": 23.9851, "step": 5588 }, { "epoch": 0.5187006960556845, "grad_norm": 36.89058303833008, "learning_rate": 5.105878121750209e-06, "loss": 23.4067, "step": 5589 }, { "epoch": 0.5187935034802784, "grad_norm": 38.652557373046875, "learning_rate": 5.1043438806511705e-06, "loss": 22.4854, "step": 5590 }, { "epoch": 0.5188863109048724, "grad_norm": 37.441829681396484, "learning_rate": 5.102809629723203e-06, "loss": 23.7106, "step": 5591 }, { "epoch": 0.5189791183294663, "grad_norm": 35.72856140136719, "learning_rate": 5.10127536911083e-06, "loss": 23.015, "step": 5592 }, { "epoch": 0.5190719257540604, "grad_norm": 36.88618469238281, "learning_rate": 5.099741098958574e-06, "loss": 22.6473, "step": 5593 }, { "epoch": 0.5191647331786543, "grad_norm": 37.27290344238281, "learning_rate": 5.09820681941096e-06, "loss": 22.8491, "step": 5594 }, { "epoch": 0.5192575406032482, "grad_norm": 37.63813400268555, "learning_rate": 5.096672530612514e-06, "loss": 22.9372, "step": 5595 }, { "epoch": 0.5193503480278422, "grad_norm": 37.87267303466797, "learning_rate": 5.095138232707762e-06, "loss": 23.0403, "step": 5596 }, { "epoch": 0.5194431554524362, "grad_norm": 34.01945114135742, "learning_rate": 5.093603925841227e-06, "loss": 22.9447, "step": 5597 }, { "epoch": 0.5195359628770302, "grad_norm": 36.93305587768555, "learning_rate": 5.092069610157443e-06, "loss": 23.8024, "step": 5598 }, { "epoch": 0.5196287703016241, "grad_norm": 39.55243682861328, "learning_rate": 5.090535285800935e-06, "loss": 23.1263, "step": 5599 }, { "epoch": 0.5197215777262181, "grad_norm": 41.03054428100586, "learning_rate": 5.089000952916232e-06, "loss": 24.2906, "step": 5600 }, { "epoch": 0.519814385150812, "grad_norm": 38.55063247680664, "learning_rate": 5.087466611647867e-06, "loss": 23.5346, "step": 5601 }, { "epoch": 0.5199071925754061, "grad_norm": 39.209129333496094, "learning_rate": 5.08593226214037e-06, "loss": 23.8008, "step": 5602 }, { "epoch": 0.52, "grad_norm": 44.226314544677734, "learning_rate": 5.08439790453827e-06, "loss": 23.224, "step": 5603 }, { "epoch": 0.520092807424594, "grad_norm": 35.768035888671875, "learning_rate": 5.082863538986103e-06, "loss": 22.1502, "step": 5604 }, { "epoch": 0.5201856148491879, "grad_norm": 43.30122375488281, "learning_rate": 5.081329165628402e-06, "loss": 23.5497, "step": 5605 }, { "epoch": 0.5202784222737818, "grad_norm": 39.6214714050293, "learning_rate": 5.0797947846096975e-06, "loss": 24.2151, "step": 5606 }, { "epoch": 0.5203712296983759, "grad_norm": 39.14594650268555, "learning_rate": 5.078260396074529e-06, "loss": 22.6894, "step": 5607 }, { "epoch": 0.5204640371229698, "grad_norm": 38.75547790527344, "learning_rate": 5.076726000167432e-06, "loss": 23.4833, "step": 5608 }, { "epoch": 0.5205568445475638, "grad_norm": 36.962615966796875, "learning_rate": 5.075191597032937e-06, "loss": 24.1764, "step": 5609 }, { "epoch": 0.5206496519721577, "grad_norm": 41.51499557495117, "learning_rate": 5.073657186815586e-06, "loss": 23.1543, "step": 5610 }, { "epoch": 0.5207424593967518, "grad_norm": 42.8480339050293, "learning_rate": 5.072122769659917e-06, "loss": 23.6326, "step": 5611 }, { "epoch": 0.5208352668213457, "grad_norm": 38.33436584472656, "learning_rate": 5.070588345710463e-06, "loss": 23.0024, "step": 5612 }, { "epoch": 0.5209280742459397, "grad_norm": 40.311607360839844, "learning_rate": 5.069053915111769e-06, "loss": 22.7189, "step": 5613 }, { "epoch": 0.5210208816705336, "grad_norm": 43.91032409667969, "learning_rate": 5.06751947800837e-06, "loss": 24.0136, "step": 5614 }, { "epoch": 0.5211136890951276, "grad_norm": 38.79682540893555, "learning_rate": 5.065985034544809e-06, "loss": 23.4555, "step": 5615 }, { "epoch": 0.5212064965197216, "grad_norm": 32.97535705566406, "learning_rate": 5.064450584865624e-06, "loss": 21.2083, "step": 5616 }, { "epoch": 0.5212993039443156, "grad_norm": 35.88540267944336, "learning_rate": 5.06291612911536e-06, "loss": 23.7424, "step": 5617 }, { "epoch": 0.5213921113689095, "grad_norm": 45.05177688598633, "learning_rate": 5.061381667438556e-06, "loss": 25.0678, "step": 5618 }, { "epoch": 0.5214849187935034, "grad_norm": 41.729801177978516, "learning_rate": 5.059847199979753e-06, "loss": 22.9791, "step": 5619 }, { "epoch": 0.5215777262180974, "grad_norm": 39.30028533935547, "learning_rate": 5.058312726883496e-06, "loss": 24.108, "step": 5620 }, { "epoch": 0.5216705336426914, "grad_norm": 43.40552520751953, "learning_rate": 5.05677824829433e-06, "loss": 22.2343, "step": 5621 }, { "epoch": 0.5217633410672854, "grad_norm": 40.02497100830078, "learning_rate": 5.055243764356795e-06, "loss": 23.5536, "step": 5622 }, { "epoch": 0.5218561484918793, "grad_norm": 37.36555862426758, "learning_rate": 5.053709275215441e-06, "loss": 23.4611, "step": 5623 }, { "epoch": 0.5219489559164733, "grad_norm": 42.595977783203125, "learning_rate": 5.0521747810148065e-06, "loss": 24.1594, "step": 5624 }, { "epoch": 0.5220417633410673, "grad_norm": 38.907955169677734, "learning_rate": 5.050640281899442e-06, "loss": 25.8741, "step": 5625 }, { "epoch": 0.5221345707656613, "grad_norm": 40.44002151489258, "learning_rate": 5.0491057780138906e-06, "loss": 23.9218, "step": 5626 }, { "epoch": 0.5222273781902552, "grad_norm": 39.934364318847656, "learning_rate": 5.047571269502701e-06, "loss": 24.6369, "step": 5627 }, { "epoch": 0.5223201856148492, "grad_norm": 36.93208694458008, "learning_rate": 5.046036756510417e-06, "loss": 22.9808, "step": 5628 }, { "epoch": 0.5224129930394431, "grad_norm": 34.09813690185547, "learning_rate": 5.044502239181589e-06, "loss": 22.5246, "step": 5629 }, { "epoch": 0.5225058004640372, "grad_norm": 46.31376647949219, "learning_rate": 5.042967717660764e-06, "loss": 23.935, "step": 5630 }, { "epoch": 0.5225986078886311, "grad_norm": 42.49293899536133, "learning_rate": 5.0414331920924865e-06, "loss": 23.1189, "step": 5631 }, { "epoch": 0.522691415313225, "grad_norm": 38.54207992553711, "learning_rate": 5.03989866262131e-06, "loss": 22.6401, "step": 5632 }, { "epoch": 0.522784222737819, "grad_norm": 37.30550765991211, "learning_rate": 5.0383641293917795e-06, "loss": 23.5609, "step": 5633 }, { "epoch": 0.522877030162413, "grad_norm": 39.70903396606445, "learning_rate": 5.036829592548446e-06, "loss": 22.5785, "step": 5634 }, { "epoch": 0.522969837587007, "grad_norm": 43.1698112487793, "learning_rate": 5.035295052235858e-06, "loss": 24.151, "step": 5635 }, { "epoch": 0.5230626450116009, "grad_norm": 36.35567855834961, "learning_rate": 5.0337605085985654e-06, "loss": 22.5033, "step": 5636 }, { "epoch": 0.5231554524361949, "grad_norm": 39.09049606323242, "learning_rate": 5.03222596178112e-06, "loss": 25.293, "step": 5637 }, { "epoch": 0.5232482598607888, "grad_norm": 42.852359771728516, "learning_rate": 5.0306914119280705e-06, "loss": 25.7676, "step": 5638 }, { "epoch": 0.5233410672853829, "grad_norm": 43.87407302856445, "learning_rate": 5.029156859183967e-06, "loss": 24.9138, "step": 5639 }, { "epoch": 0.5234338747099768, "grad_norm": 36.8213005065918, "learning_rate": 5.027622303693363e-06, "loss": 22.6955, "step": 5640 }, { "epoch": 0.5235266821345708, "grad_norm": 38.324947357177734, "learning_rate": 5.026087745600805e-06, "loss": 23.0571, "step": 5641 }, { "epoch": 0.5236194895591647, "grad_norm": 44.0760498046875, "learning_rate": 5.024553185050851e-06, "loss": 22.9313, "step": 5642 }, { "epoch": 0.5237122969837587, "grad_norm": 39.52387237548828, "learning_rate": 5.023018622188048e-06, "loss": 22.4631, "step": 5643 }, { "epoch": 0.5238051044083527, "grad_norm": 44.721466064453125, "learning_rate": 5.021484057156949e-06, "loss": 24.772, "step": 5644 }, { "epoch": 0.5238979118329467, "grad_norm": 40.28618621826172, "learning_rate": 5.019949490102107e-06, "loss": 24.1976, "step": 5645 }, { "epoch": 0.5239907192575406, "grad_norm": 36.19152069091797, "learning_rate": 5.018414921168075e-06, "loss": 22.6088, "step": 5646 }, { "epoch": 0.5240835266821345, "grad_norm": 36.42237091064453, "learning_rate": 5.0168803504994024e-06, "loss": 22.2719, "step": 5647 }, { "epoch": 0.5241763341067286, "grad_norm": 41.914772033691406, "learning_rate": 5.0153457782406445e-06, "loss": 23.4056, "step": 5648 }, { "epoch": 0.5242691415313225, "grad_norm": 46.70215606689453, "learning_rate": 5.013811204536353e-06, "loss": 24.2219, "step": 5649 }, { "epoch": 0.5243619489559165, "grad_norm": 45.9857063293457, "learning_rate": 5.012276629531082e-06, "loss": 23.8923, "step": 5650 }, { "epoch": 0.5244547563805104, "grad_norm": 36.76176071166992, "learning_rate": 5.010742053369382e-06, "loss": 21.7077, "step": 5651 }, { "epoch": 0.5245475638051044, "grad_norm": 36.21367645263672, "learning_rate": 5.0092074761958085e-06, "loss": 23.0614, "step": 5652 }, { "epoch": 0.5246403712296984, "grad_norm": 84.99333190917969, "learning_rate": 5.007672898154915e-06, "loss": 25.9142, "step": 5653 }, { "epoch": 0.5247331786542924, "grad_norm": 38.77506637573242, "learning_rate": 5.006138319391253e-06, "loss": 24.1742, "step": 5654 }, { "epoch": 0.5248259860788863, "grad_norm": 38.10749435424805, "learning_rate": 5.004603740049378e-06, "loss": 24.4213, "step": 5655 }, { "epoch": 0.5249187935034803, "grad_norm": 36.181243896484375, "learning_rate": 5.003069160273841e-06, "loss": 24.6608, "step": 5656 }, { "epoch": 0.5250116009280742, "grad_norm": 34.69965744018555, "learning_rate": 5.001534580209198e-06, "loss": 23.1047, "step": 5657 }, { "epoch": 0.5251044083526683, "grad_norm": 39.45112991333008, "learning_rate": 5e-06, "loss": 26.0412, "step": 5658 }, { "epoch": 0.5251972157772622, "grad_norm": 40.257320404052734, "learning_rate": 4.998465419790803e-06, "loss": 21.8366, "step": 5659 }, { "epoch": 0.5252900232018561, "grad_norm": 39.07912826538086, "learning_rate": 4.9969308397261615e-06, "loss": 23.7883, "step": 5660 }, { "epoch": 0.5253828306264501, "grad_norm": 33.52744674682617, "learning_rate": 4.995396259950624e-06, "loss": 23.9229, "step": 5661 }, { "epoch": 0.5254756380510441, "grad_norm": 38.14943313598633, "learning_rate": 4.993861680608748e-06, "loss": 23.5277, "step": 5662 }, { "epoch": 0.5255684454756381, "grad_norm": 35.16341781616211, "learning_rate": 4.992327101845088e-06, "loss": 23.1592, "step": 5663 }, { "epoch": 0.525661252900232, "grad_norm": 38.82258987426758, "learning_rate": 4.990792523804192e-06, "loss": 22.5028, "step": 5664 }, { "epoch": 0.525754060324826, "grad_norm": 40.327449798583984, "learning_rate": 4.989257946630618e-06, "loss": 21.6581, "step": 5665 }, { "epoch": 0.5258468677494199, "grad_norm": 38.338623046875, "learning_rate": 4.987723370468921e-06, "loss": 26.7355, "step": 5666 }, { "epoch": 0.525939675174014, "grad_norm": 35.87912368774414, "learning_rate": 4.986188795463648e-06, "loss": 23.9038, "step": 5667 }, { "epoch": 0.5260324825986079, "grad_norm": 38.14809799194336, "learning_rate": 4.9846542217593555e-06, "loss": 24.3329, "step": 5668 }, { "epoch": 0.5261252900232019, "grad_norm": 35.79155731201172, "learning_rate": 4.983119649500599e-06, "loss": 24.523, "step": 5669 }, { "epoch": 0.5262180974477958, "grad_norm": 34.718257904052734, "learning_rate": 4.981585078831926e-06, "loss": 23.4298, "step": 5670 }, { "epoch": 0.5263109048723897, "grad_norm": 42.07558822631836, "learning_rate": 4.980050509897892e-06, "loss": 24.2088, "step": 5671 }, { "epoch": 0.5264037122969838, "grad_norm": 41.85136032104492, "learning_rate": 4.978515942843052e-06, "loss": 24.7343, "step": 5672 }, { "epoch": 0.5264965197215777, "grad_norm": 33.395416259765625, "learning_rate": 4.976981377811953e-06, "loss": 21.5183, "step": 5673 }, { "epoch": 0.5265893271461717, "grad_norm": 39.97321319580078, "learning_rate": 4.97544681494915e-06, "loss": 22.8272, "step": 5674 }, { "epoch": 0.5266821345707656, "grad_norm": 40.6630973815918, "learning_rate": 4.973912254399196e-06, "loss": 22.976, "step": 5675 }, { "epoch": 0.5267749419953597, "grad_norm": 32.688011169433594, "learning_rate": 4.972377696306639e-06, "loss": 23.3482, "step": 5676 }, { "epoch": 0.5268677494199536, "grad_norm": 39.93585968017578, "learning_rate": 4.970843140816035e-06, "loss": 24.5841, "step": 5677 }, { "epoch": 0.5269605568445476, "grad_norm": 37.25730514526367, "learning_rate": 4.969308588071932e-06, "loss": 22.0891, "step": 5678 }, { "epoch": 0.5270533642691415, "grad_norm": 39.46461486816406, "learning_rate": 4.967774038218882e-06, "loss": 23.8105, "step": 5679 }, { "epoch": 0.5271461716937355, "grad_norm": 39.934146881103516, "learning_rate": 4.966239491401436e-06, "loss": 23.8628, "step": 5680 }, { "epoch": 0.5272389791183295, "grad_norm": 35.164981842041016, "learning_rate": 4.9647049477641435e-06, "loss": 22.7001, "step": 5681 }, { "epoch": 0.5273317865429235, "grad_norm": 41.52621841430664, "learning_rate": 4.963170407451556e-06, "loss": 23.8017, "step": 5682 }, { "epoch": 0.5274245939675174, "grad_norm": 43.580970764160156, "learning_rate": 4.961635870608223e-06, "loss": 22.5606, "step": 5683 }, { "epoch": 0.5275174013921113, "grad_norm": 38.670101165771484, "learning_rate": 4.960101337378692e-06, "loss": 23.4685, "step": 5684 }, { "epoch": 0.5276102088167053, "grad_norm": 41.04718017578125, "learning_rate": 4.958566807907515e-06, "loss": 24.0525, "step": 5685 }, { "epoch": 0.5277030162412993, "grad_norm": 35.15133285522461, "learning_rate": 4.957032282339239e-06, "loss": 22.726, "step": 5686 }, { "epoch": 0.5277958236658933, "grad_norm": 40.81343460083008, "learning_rate": 4.9554977608184114e-06, "loss": 26.0747, "step": 5687 }, { "epoch": 0.5278886310904872, "grad_norm": 41.11767578125, "learning_rate": 4.953963243489583e-06, "loss": 24.5085, "step": 5688 }, { "epoch": 0.5279814385150812, "grad_norm": 40.22275161743164, "learning_rate": 4.9524287304973016e-06, "loss": 23.8899, "step": 5689 }, { "epoch": 0.5280742459396752, "grad_norm": 32.984500885009766, "learning_rate": 4.95089422198611e-06, "loss": 21.6842, "step": 5690 }, { "epoch": 0.5281670533642692, "grad_norm": 37.528297424316406, "learning_rate": 4.949359718100559e-06, "loss": 23.8196, "step": 5691 }, { "epoch": 0.5282598607888631, "grad_norm": 37.50550842285156, "learning_rate": 4.947825218985195e-06, "loss": 24.9879, "step": 5692 }, { "epoch": 0.528352668213457, "grad_norm": 34.1796989440918, "learning_rate": 4.946290724784562e-06, "loss": 21.281, "step": 5693 }, { "epoch": 0.528445475638051, "grad_norm": 38.74797821044922, "learning_rate": 4.944756235643205e-06, "loss": 24.7239, "step": 5694 }, { "epoch": 0.528538283062645, "grad_norm": 36.194637298583984, "learning_rate": 4.943221751705673e-06, "loss": 22.0974, "step": 5695 }, { "epoch": 0.528631090487239, "grad_norm": 35.672428131103516, "learning_rate": 4.941687273116505e-06, "loss": 23.4993, "step": 5696 }, { "epoch": 0.5287238979118329, "grad_norm": 36.884666442871094, "learning_rate": 4.940152800020248e-06, "loss": 23.8, "step": 5697 }, { "epoch": 0.5288167053364269, "grad_norm": 37.56248474121094, "learning_rate": 4.938618332561447e-06, "loss": 24.2543, "step": 5698 }, { "epoch": 0.5289095127610208, "grad_norm": 40.28647232055664, "learning_rate": 4.937083870884642e-06, "loss": 22.7015, "step": 5699 }, { "epoch": 0.5290023201856149, "grad_norm": 37.98271560668945, "learning_rate": 4.935549415134376e-06, "loss": 22.9389, "step": 5700 }, { "epoch": 0.5290951276102088, "grad_norm": 37.59836196899414, "learning_rate": 4.934014965455193e-06, "loss": 22.5426, "step": 5701 }, { "epoch": 0.5291879350348028, "grad_norm": 37.896854400634766, "learning_rate": 4.9324805219916305e-06, "loss": 22.6059, "step": 5702 }, { "epoch": 0.5292807424593967, "grad_norm": 40.54892349243164, "learning_rate": 4.930946084888232e-06, "loss": 23.5147, "step": 5703 }, { "epoch": 0.5293735498839908, "grad_norm": 43.65106201171875, "learning_rate": 4.929411654289538e-06, "loss": 23.375, "step": 5704 }, { "epoch": 0.5294663573085847, "grad_norm": 39.40802001953125, "learning_rate": 4.927877230340085e-06, "loss": 22.6127, "step": 5705 }, { "epoch": 0.5295591647331787, "grad_norm": 44.30021286010742, "learning_rate": 4.926342813184413e-06, "loss": 23.2736, "step": 5706 }, { "epoch": 0.5296519721577726, "grad_norm": 40.52677536010742, "learning_rate": 4.924808402967065e-06, "loss": 22.7989, "step": 5707 }, { "epoch": 0.5297447795823665, "grad_norm": 41.782432556152344, "learning_rate": 4.923273999832571e-06, "loss": 23.8361, "step": 5708 }, { "epoch": 0.5298375870069606, "grad_norm": 35.16915512084961, "learning_rate": 4.92173960392547e-06, "loss": 24.3894, "step": 5709 }, { "epoch": 0.5299303944315545, "grad_norm": 36.8665885925293, "learning_rate": 4.920205215390303e-06, "loss": 25.3273, "step": 5710 }, { "epoch": 0.5300232018561485, "grad_norm": 40.86982345581055, "learning_rate": 4.918670834371601e-06, "loss": 24.4125, "step": 5711 }, { "epoch": 0.5301160092807424, "grad_norm": 37.87491226196289, "learning_rate": 4.9171364610139e-06, "loss": 22.2625, "step": 5712 }, { "epoch": 0.5302088167053364, "grad_norm": 40.906436920166016, "learning_rate": 4.915602095461732e-06, "loss": 22.9273, "step": 5713 }, { "epoch": 0.5303016241299304, "grad_norm": 36.33694076538086, "learning_rate": 4.914067737859633e-06, "loss": 23.8257, "step": 5714 }, { "epoch": 0.5303944315545244, "grad_norm": 35.78392028808594, "learning_rate": 4.912533388352136e-06, "loss": 24.1209, "step": 5715 }, { "epoch": 0.5304872389791183, "grad_norm": 43.41083526611328, "learning_rate": 4.910999047083769e-06, "loss": 22.2343, "step": 5716 }, { "epoch": 0.5305800464037123, "grad_norm": 38.515357971191406, "learning_rate": 4.909464714199066e-06, "loss": 23.8549, "step": 5717 }, { "epoch": 0.5306728538283063, "grad_norm": 37.383243560791016, "learning_rate": 4.907930389842558e-06, "loss": 24.8154, "step": 5718 }, { "epoch": 0.5307656612529003, "grad_norm": 40.0531005859375, "learning_rate": 4.906396074158774e-06, "loss": 22.6104, "step": 5719 }, { "epoch": 0.5308584686774942, "grad_norm": 40.92761993408203, "learning_rate": 4.90486176729224e-06, "loss": 23.2037, "step": 5720 }, { "epoch": 0.5309512761020881, "grad_norm": 40.77627182006836, "learning_rate": 4.903327469387487e-06, "loss": 24.3434, "step": 5721 }, { "epoch": 0.5310440835266821, "grad_norm": 41.71137237548828, "learning_rate": 4.901793180589042e-06, "loss": 22.3782, "step": 5722 }, { "epoch": 0.5311368909512761, "grad_norm": 36.86064910888672, "learning_rate": 4.900258901041427e-06, "loss": 23.5463, "step": 5723 }, { "epoch": 0.5312296983758701, "grad_norm": 33.09458541870117, "learning_rate": 4.898724630889172e-06, "loss": 22.684, "step": 5724 }, { "epoch": 0.531322505800464, "grad_norm": 40.013023376464844, "learning_rate": 4.897190370276798e-06, "loss": 24.8315, "step": 5725 }, { "epoch": 0.531415313225058, "grad_norm": 34.18851089477539, "learning_rate": 4.895656119348831e-06, "loss": 23.2977, "step": 5726 }, { "epoch": 0.531508120649652, "grad_norm": 37.708126068115234, "learning_rate": 4.894121878249793e-06, "loss": 23.108, "step": 5727 }, { "epoch": 0.531600928074246, "grad_norm": 44.625083923339844, "learning_rate": 4.892587647124203e-06, "loss": 22.514, "step": 5728 }, { "epoch": 0.5316937354988399, "grad_norm": 40.68171310424805, "learning_rate": 4.891053426116586e-06, "loss": 23.0979, "step": 5729 }, { "epoch": 0.5317865429234339, "grad_norm": 38.88066864013672, "learning_rate": 4.889519215371458e-06, "loss": 23.6782, "step": 5730 }, { "epoch": 0.5318793503480278, "grad_norm": 48.72892761230469, "learning_rate": 4.887985015033339e-06, "loss": 23.3333, "step": 5731 }, { "epoch": 0.5319721577726219, "grad_norm": 36.929283142089844, "learning_rate": 4.886450825246748e-06, "loss": 23.2709, "step": 5732 }, { "epoch": 0.5320649651972158, "grad_norm": 35.87595748901367, "learning_rate": 4.8849166461562006e-06, "loss": 23.527, "step": 5733 }, { "epoch": 0.5321577726218097, "grad_norm": 42.967559814453125, "learning_rate": 4.883382477906211e-06, "loss": 22.9609, "step": 5734 }, { "epoch": 0.5322505800464037, "grad_norm": 38.04608917236328, "learning_rate": 4.881848320641297e-06, "loss": 23.8396, "step": 5735 }, { "epoch": 0.5323433874709976, "grad_norm": 38.5785026550293, "learning_rate": 4.880314174505972e-06, "loss": 23.5122, "step": 5736 }, { "epoch": 0.5324361948955917, "grad_norm": 36.4868278503418, "learning_rate": 4.878780039644746e-06, "loss": 24.7044, "step": 5737 }, { "epoch": 0.5325290023201856, "grad_norm": 37.35636901855469, "learning_rate": 4.877245916202132e-06, "loss": 23.0162, "step": 5738 }, { "epoch": 0.5326218097447796, "grad_norm": 38.06555938720703, "learning_rate": 4.875711804322643e-06, "loss": 24.8467, "step": 5739 }, { "epoch": 0.5327146171693735, "grad_norm": 43.624969482421875, "learning_rate": 4.874177704150784e-06, "loss": 23.1482, "step": 5740 }, { "epoch": 0.5328074245939676, "grad_norm": 47.76954650878906, "learning_rate": 4.872643615831066e-06, "loss": 23.7736, "step": 5741 }, { "epoch": 0.5329002320185615, "grad_norm": 42.802154541015625, "learning_rate": 4.871109539507998e-06, "loss": 22.5821, "step": 5742 }, { "epoch": 0.5329930394431555, "grad_norm": 41.01802444458008, "learning_rate": 4.869575475326081e-06, "loss": 23.5946, "step": 5743 }, { "epoch": 0.5330858468677494, "grad_norm": 43.49697494506836, "learning_rate": 4.868041423429823e-06, "loss": 24.9775, "step": 5744 }, { "epoch": 0.5331786542923433, "grad_norm": 38.46813201904297, "learning_rate": 4.86650738396373e-06, "loss": 22.6204, "step": 5745 }, { "epoch": 0.5332714617169374, "grad_norm": 36.40585708618164, "learning_rate": 4.864973357072299e-06, "loss": 22.3152, "step": 5746 }, { "epoch": 0.5333642691415313, "grad_norm": 36.03240966796875, "learning_rate": 4.863439342900038e-06, "loss": 21.6923, "step": 5747 }, { "epoch": 0.5334570765661253, "grad_norm": 37.53516387939453, "learning_rate": 4.861905341591442e-06, "loss": 23.9312, "step": 5748 }, { "epoch": 0.5335498839907192, "grad_norm": 36.2449951171875, "learning_rate": 4.8603713532910115e-06, "loss": 21.7549, "step": 5749 }, { "epoch": 0.5336426914153132, "grad_norm": 38.885162353515625, "learning_rate": 4.858837378143247e-06, "loss": 22.5958, "step": 5750 }, { "epoch": 0.5337354988399072, "grad_norm": 34.16767883300781, "learning_rate": 4.8573034162926395e-06, "loss": 23.7302, "step": 5751 }, { "epoch": 0.5338283062645012, "grad_norm": 40.31822204589844, "learning_rate": 4.855769467883689e-06, "loss": 24.6754, "step": 5752 }, { "epoch": 0.5339211136890951, "grad_norm": 51.42159652709961, "learning_rate": 4.854235533060889e-06, "loss": 24.5985, "step": 5753 }, { "epoch": 0.5340139211136891, "grad_norm": 41.01262283325195, "learning_rate": 4.8527016119687306e-06, "loss": 24.1301, "step": 5754 }, { "epoch": 0.5341067285382831, "grad_norm": 41.09882736206055, "learning_rate": 4.851167704751705e-06, "loss": 23.4262, "step": 5755 }, { "epoch": 0.5341995359628771, "grad_norm": 37.69810485839844, "learning_rate": 4.849633811554306e-06, "loss": 22.722, "step": 5756 }, { "epoch": 0.534292343387471, "grad_norm": 37.514339447021484, "learning_rate": 4.848099932521018e-06, "loss": 22.2532, "step": 5757 }, { "epoch": 0.534385150812065, "grad_norm": 37.41588592529297, "learning_rate": 4.84656606779633e-06, "loss": 25.134, "step": 5758 }, { "epoch": 0.5344779582366589, "grad_norm": 39.001625061035156, "learning_rate": 4.845032217524731e-06, "loss": 23.9805, "step": 5759 }, { "epoch": 0.534570765661253, "grad_norm": 39.07611083984375, "learning_rate": 4.843498381850701e-06, "loss": 23.6271, "step": 5760 }, { "epoch": 0.5346635730858469, "grad_norm": 45.14792251586914, "learning_rate": 4.8419645609187256e-06, "loss": 22.9252, "step": 5761 }, { "epoch": 0.5347563805104408, "grad_norm": 47.33987808227539, "learning_rate": 4.840430754873289e-06, "loss": 26.0133, "step": 5762 }, { "epoch": 0.5348491879350348, "grad_norm": 41.092105865478516, "learning_rate": 4.838896963858868e-06, "loss": 20.6252, "step": 5763 }, { "epoch": 0.5349419953596287, "grad_norm": 45.23508834838867, "learning_rate": 4.837363188019944e-06, "loss": 21.9009, "step": 5764 }, { "epoch": 0.5350348027842228, "grad_norm": 41.23572540283203, "learning_rate": 4.835829427500994e-06, "loss": 22.9002, "step": 5765 }, { "epoch": 0.5351276102088167, "grad_norm": 41.03606414794922, "learning_rate": 4.834295682446496e-06, "loss": 23.0359, "step": 5766 }, { "epoch": 0.5352204176334107, "grad_norm": 41.75422286987305, "learning_rate": 4.8327619530009225e-06, "loss": 24.3884, "step": 5767 }, { "epoch": 0.5353132250580046, "grad_norm": 44.757408142089844, "learning_rate": 4.831228239308749e-06, "loss": 24.0628, "step": 5768 }, { "epoch": 0.5354060324825987, "grad_norm": 39.81459426879883, "learning_rate": 4.829694541514447e-06, "loss": 22.1663, "step": 5769 }, { "epoch": 0.5354988399071926, "grad_norm": 43.596492767333984, "learning_rate": 4.828160859762485e-06, "loss": 23.7824, "step": 5770 }, { "epoch": 0.5355916473317865, "grad_norm": 38.406497955322266, "learning_rate": 4.826627194197334e-06, "loss": 22.82, "step": 5771 }, { "epoch": 0.5356844547563805, "grad_norm": 40.19762420654297, "learning_rate": 4.82509354496346e-06, "loss": 23.4853, "step": 5772 }, { "epoch": 0.5357772621809744, "grad_norm": 57.998512268066406, "learning_rate": 4.82355991220533e-06, "loss": 22.9451, "step": 5773 }, { "epoch": 0.5358700696055685, "grad_norm": 35.34921646118164, "learning_rate": 4.822026296067409e-06, "loss": 22.2324, "step": 5774 }, { "epoch": 0.5359628770301624, "grad_norm": 42.89369583129883, "learning_rate": 4.8204926966941575e-06, "loss": 24.4023, "step": 5775 }, { "epoch": 0.5360556844547564, "grad_norm": 39.290008544921875, "learning_rate": 4.8189591142300385e-06, "loss": 23.4773, "step": 5776 }, { "epoch": 0.5361484918793503, "grad_norm": 31.890214920043945, "learning_rate": 4.817425548819511e-06, "loss": 24.2913, "step": 5777 }, { "epoch": 0.5362412993039443, "grad_norm": 34.01615524291992, "learning_rate": 4.815892000607032e-06, "loss": 22.8017, "step": 5778 }, { "epoch": 0.5363341067285383, "grad_norm": 39.51335144042969, "learning_rate": 4.814358469737059e-06, "loss": 22.254, "step": 5779 }, { "epoch": 0.5364269141531323, "grad_norm": 42.0623779296875, "learning_rate": 4.812824956354047e-06, "loss": 25.2255, "step": 5780 }, { "epoch": 0.5365197215777262, "grad_norm": 41.372474670410156, "learning_rate": 4.811291460602448e-06, "loss": 26.5057, "step": 5781 }, { "epoch": 0.5366125290023201, "grad_norm": 40.76302719116211, "learning_rate": 4.809757982626713e-06, "loss": 24.5713, "step": 5782 }, { "epoch": 0.5367053364269142, "grad_norm": 37.02531433105469, "learning_rate": 4.8082245225712955e-06, "loss": 22.549, "step": 5783 }, { "epoch": 0.5367981438515081, "grad_norm": 53.423377990722656, "learning_rate": 4.8066910805806384e-06, "loss": 23.4855, "step": 5784 }, { "epoch": 0.5368909512761021, "grad_norm": 33.777706146240234, "learning_rate": 4.805157656799192e-06, "loss": 22.8432, "step": 5785 }, { "epoch": 0.536983758700696, "grad_norm": 46.12785339355469, "learning_rate": 4.803624251371397e-06, "loss": 23.7718, "step": 5786 }, { "epoch": 0.53707656612529, "grad_norm": 38.16079330444336, "learning_rate": 4.802090864441699e-06, "loss": 21.9006, "step": 5787 }, { "epoch": 0.537169373549884, "grad_norm": 36.72460174560547, "learning_rate": 4.8005574961545405e-06, "loss": 21.7052, "step": 5788 }, { "epoch": 0.537262180974478, "grad_norm": 39.21273422241211, "learning_rate": 4.799024146654358e-06, "loss": 25.3184, "step": 5789 }, { "epoch": 0.5373549883990719, "grad_norm": 36.55644226074219, "learning_rate": 4.797490816085588e-06, "loss": 23.782, "step": 5790 }, { "epoch": 0.5374477958236659, "grad_norm": 37.77608108520508, "learning_rate": 4.795957504592673e-06, "loss": 23.8066, "step": 5791 }, { "epoch": 0.5375406032482598, "grad_norm": 37.243770599365234, "learning_rate": 4.794424212320038e-06, "loss": 24.6748, "step": 5792 }, { "epoch": 0.5376334106728539, "grad_norm": 38.799678802490234, "learning_rate": 4.792890939412122e-06, "loss": 23.5741, "step": 5793 }, { "epoch": 0.5377262180974478, "grad_norm": 36.80015182495117, "learning_rate": 4.791357686013354e-06, "loss": 23.8398, "step": 5794 }, { "epoch": 0.5378190255220417, "grad_norm": 37.847347259521484, "learning_rate": 4.789824452268161e-06, "loss": 23.1524, "step": 5795 }, { "epoch": 0.5379118329466357, "grad_norm": 38.427921295166016, "learning_rate": 4.78829123832097e-06, "loss": 22.6157, "step": 5796 }, { "epoch": 0.5380046403712297, "grad_norm": 39.16957473754883, "learning_rate": 4.786758044316208e-06, "loss": 24.1665, "step": 5797 }, { "epoch": 0.5380974477958237, "grad_norm": 37.83150100708008, "learning_rate": 4.7852248703982945e-06, "loss": 22.1524, "step": 5798 }, { "epoch": 0.5381902552204176, "grad_norm": 36.76984786987305, "learning_rate": 4.783691716711652e-06, "loss": 24.573, "step": 5799 }, { "epoch": 0.5382830626450116, "grad_norm": 43.39104461669922, "learning_rate": 4.782158583400702e-06, "loss": 24.496, "step": 5800 }, { "epoch": 0.5383758700696055, "grad_norm": 37.96664047241211, "learning_rate": 4.78062547060986e-06, "loss": 22.0101, "step": 5801 }, { "epoch": 0.5384686774941996, "grad_norm": 74.951904296875, "learning_rate": 4.779092378483539e-06, "loss": 22.9927, "step": 5802 }, { "epoch": 0.5385614849187935, "grad_norm": 37.4876594543457, "learning_rate": 4.777559307166158e-06, "loss": 23.6569, "step": 5803 }, { "epoch": 0.5386542923433875, "grad_norm": 38.02653503417969, "learning_rate": 4.7760262568021225e-06, "loss": 24.0339, "step": 5804 }, { "epoch": 0.5387470997679814, "grad_norm": 39.04798126220703, "learning_rate": 4.774493227535844e-06, "loss": 24.2379, "step": 5805 }, { "epoch": 0.5388399071925754, "grad_norm": 53.643898010253906, "learning_rate": 4.772960219511733e-06, "loss": 22.9251, "step": 5806 }, { "epoch": 0.5389327146171694, "grad_norm": 36.263126373291016, "learning_rate": 4.77142723287419e-06, "loss": 23.0183, "step": 5807 }, { "epoch": 0.5390255220417633, "grad_norm": 34.57591247558594, "learning_rate": 4.769894267767621e-06, "loss": 22.1391, "step": 5808 }, { "epoch": 0.5391183294663573, "grad_norm": 38.345890045166016, "learning_rate": 4.7683613243364295e-06, "loss": 22.4833, "step": 5809 }, { "epoch": 0.5392111368909512, "grad_norm": 42.612545013427734, "learning_rate": 4.76682840272501e-06, "loss": 24.2922, "step": 5810 }, { "epoch": 0.5393039443155453, "grad_norm": 39.15567398071289, "learning_rate": 4.765295503077764e-06, "loss": 22.1094, "step": 5811 }, { "epoch": 0.5393967517401392, "grad_norm": 38.2952766418457, "learning_rate": 4.763762625539085e-06, "loss": 22.3561, "step": 5812 }, { "epoch": 0.5394895591647332, "grad_norm": 37.4984245300293, "learning_rate": 4.762229770253366e-06, "loss": 25.0396, "step": 5813 }, { "epoch": 0.5395823665893271, "grad_norm": 38.89163589477539, "learning_rate": 4.760696937364999e-06, "loss": 23.7937, "step": 5814 }, { "epoch": 0.5396751740139211, "grad_norm": 39.07001876831055, "learning_rate": 4.759164127018372e-06, "loss": 23.2022, "step": 5815 }, { "epoch": 0.5397679814385151, "grad_norm": 44.0019645690918, "learning_rate": 4.7576313393578725e-06, "loss": 21.6889, "step": 5816 }, { "epoch": 0.5398607888631091, "grad_norm": 38.680328369140625, "learning_rate": 4.756098574527884e-06, "loss": 21.1606, "step": 5817 }, { "epoch": 0.539953596287703, "grad_norm": 36.50336837768555, "learning_rate": 4.754565832672791e-06, "loss": 23.083, "step": 5818 }, { "epoch": 0.540046403712297, "grad_norm": 43.7386589050293, "learning_rate": 4.753033113936973e-06, "loss": 24.2674, "step": 5819 }, { "epoch": 0.540139211136891, "grad_norm": 36.5733528137207, "learning_rate": 4.751500418464809e-06, "loss": 24.539, "step": 5820 }, { "epoch": 0.540232018561485, "grad_norm": 36.103057861328125, "learning_rate": 4.749967746400672e-06, "loss": 22.3496, "step": 5821 }, { "epoch": 0.5403248259860789, "grad_norm": 53.80440902709961, "learning_rate": 4.748435097888939e-06, "loss": 24.2488, "step": 5822 }, { "epoch": 0.5404176334106728, "grad_norm": 45.37899398803711, "learning_rate": 4.746902473073982e-06, "loss": 23.5312, "step": 5823 }, { "epoch": 0.5405104408352668, "grad_norm": 40.856380462646484, "learning_rate": 4.745369872100166e-06, "loss": 24.021, "step": 5824 }, { "epoch": 0.5406032482598608, "grad_norm": 39.6469612121582, "learning_rate": 4.743837295111861e-06, "loss": 22.076, "step": 5825 }, { "epoch": 0.5406960556844548, "grad_norm": 38.0023193359375, "learning_rate": 4.742304742253436e-06, "loss": 23.6017, "step": 5826 }, { "epoch": 0.5407888631090487, "grad_norm": 42.08652114868164, "learning_rate": 4.740772213669246e-06, "loss": 25.0574, "step": 5827 }, { "epoch": 0.5408816705336427, "grad_norm": 39.94886016845703, "learning_rate": 4.7392397095036545e-06, "loss": 23.6013, "step": 5828 }, { "epoch": 0.5409744779582366, "grad_norm": 39.864158630371094, "learning_rate": 4.737707229901022e-06, "loss": 22.7962, "step": 5829 }, { "epoch": 0.5410672853828307, "grad_norm": 40.138893127441406, "learning_rate": 4.7361747750057e-06, "loss": 23.6585, "step": 5830 }, { "epoch": 0.5411600928074246, "grad_norm": 40.68111038208008, "learning_rate": 4.734642344962045e-06, "loss": 21.672, "step": 5831 }, { "epoch": 0.5412529002320186, "grad_norm": 39.05295944213867, "learning_rate": 4.733109939914407e-06, "loss": 21.9519, "step": 5832 }, { "epoch": 0.5413457076566125, "grad_norm": 40.82905197143555, "learning_rate": 4.731577560007133e-06, "loss": 24.3381, "step": 5833 }, { "epoch": 0.5414385150812066, "grad_norm": 41.33602523803711, "learning_rate": 4.730045205384572e-06, "loss": 24.2583, "step": 5834 }, { "epoch": 0.5415313225058005, "grad_norm": 41.23589324951172, "learning_rate": 4.728512876191067e-06, "loss": 22.6696, "step": 5835 }, { "epoch": 0.5416241299303944, "grad_norm": 36.70692825317383, "learning_rate": 4.726980572570958e-06, "loss": 22.9305, "step": 5836 }, { "epoch": 0.5417169373549884, "grad_norm": 40.39982604980469, "learning_rate": 4.725448294668585e-06, "loss": 23.0308, "step": 5837 }, { "epoch": 0.5418097447795823, "grad_norm": 38.0429801940918, "learning_rate": 4.723916042628287e-06, "loss": 24.5516, "step": 5838 }, { "epoch": 0.5419025522041764, "grad_norm": 37.801475524902344, "learning_rate": 4.722383816594394e-06, "loss": 22.3477, "step": 5839 }, { "epoch": 0.5419953596287703, "grad_norm": 40.941650390625, "learning_rate": 4.720851616711241e-06, "loss": 24.1763, "step": 5840 }, { "epoch": 0.5420881670533643, "grad_norm": 39.962120056152344, "learning_rate": 4.719319443123158e-06, "loss": 22.7661, "step": 5841 }, { "epoch": 0.5421809744779582, "grad_norm": 40.758262634277344, "learning_rate": 4.7177872959744675e-06, "loss": 22.8677, "step": 5842 }, { "epoch": 0.5422737819025522, "grad_norm": 37.10173416137695, "learning_rate": 4.716255175409497e-06, "loss": 22.3973, "step": 5843 }, { "epoch": 0.5423665893271462, "grad_norm": 102.24310302734375, "learning_rate": 4.714723081572571e-06, "loss": 23.4418, "step": 5844 }, { "epoch": 0.5424593967517402, "grad_norm": 40.031272888183594, "learning_rate": 4.713191014608003e-06, "loss": 21.5657, "step": 5845 }, { "epoch": 0.5425522041763341, "grad_norm": 37.48455047607422, "learning_rate": 4.711658974660112e-06, "loss": 23.1207, "step": 5846 }, { "epoch": 0.542645011600928, "grad_norm": 36.67414474487305, "learning_rate": 4.7101269618732155e-06, "loss": 23.167, "step": 5847 }, { "epoch": 0.5427378190255221, "grad_norm": 36.90000534057617, "learning_rate": 4.70859497639162e-06, "loss": 23.0674, "step": 5848 }, { "epoch": 0.542830626450116, "grad_norm": 37.96050262451172, "learning_rate": 4.707063018359638e-06, "loss": 23.1612, "step": 5849 }, { "epoch": 0.54292343387471, "grad_norm": 39.14419174194336, "learning_rate": 4.705531087921578e-06, "loss": 21.829, "step": 5850 }, { "epoch": 0.5430162412993039, "grad_norm": 37.96145248413086, "learning_rate": 4.703999185221738e-06, "loss": 23.1749, "step": 5851 }, { "epoch": 0.5431090487238979, "grad_norm": 38.322933197021484, "learning_rate": 4.702467310404422e-06, "loss": 24.5507, "step": 5852 }, { "epoch": 0.5432018561484919, "grad_norm": 39.59156036376953, "learning_rate": 4.7009354636139335e-06, "loss": 21.5338, "step": 5853 }, { "epoch": 0.5432946635730859, "grad_norm": 42.175636291503906, "learning_rate": 4.699403644994561e-06, "loss": 21.2906, "step": 5854 }, { "epoch": 0.5433874709976798, "grad_norm": 44.12481689453125, "learning_rate": 4.697871854690603e-06, "loss": 23.8367, "step": 5855 }, { "epoch": 0.5434802784222738, "grad_norm": 37.69367980957031, "learning_rate": 4.696340092846347e-06, "loss": 24.3995, "step": 5856 }, { "epoch": 0.5435730858468677, "grad_norm": 39.436790466308594, "learning_rate": 4.694808359606083e-06, "loss": 24.3279, "step": 5857 }, { "epoch": 0.5436658932714618, "grad_norm": 42.148067474365234, "learning_rate": 4.693276655114097e-06, "loss": 22.7868, "step": 5858 }, { "epoch": 0.5437587006960557, "grad_norm": 33.3426513671875, "learning_rate": 4.69174497951467e-06, "loss": 21.9442, "step": 5859 }, { "epoch": 0.5438515081206496, "grad_norm": 41.12788009643555, "learning_rate": 4.690213332952083e-06, "loss": 25.3203, "step": 5860 }, { "epoch": 0.5439443155452436, "grad_norm": 34.05808639526367, "learning_rate": 4.688681715570613e-06, "loss": 21.815, "step": 5861 }, { "epoch": 0.5440371229698376, "grad_norm": 37.27799606323242, "learning_rate": 4.6871501275145325e-06, "loss": 22.6303, "step": 5862 }, { "epoch": 0.5441299303944316, "grad_norm": 42.62395477294922, "learning_rate": 4.685618568928117e-06, "loss": 24.8968, "step": 5863 }, { "epoch": 0.5442227378190255, "grad_norm": 39.75758361816406, "learning_rate": 4.6840870399556334e-06, "loss": 24.4896, "step": 5864 }, { "epoch": 0.5443155452436195, "grad_norm": 34.638729095458984, "learning_rate": 4.682555540741346e-06, "loss": 22.8473, "step": 5865 }, { "epoch": 0.5444083526682134, "grad_norm": 39.36008834838867, "learning_rate": 4.6810240714295214e-06, "loss": 21.528, "step": 5866 }, { "epoch": 0.5445011600928075, "grad_norm": 40.23438262939453, "learning_rate": 4.67949263216442e-06, "loss": 24.801, "step": 5867 }, { "epoch": 0.5445939675174014, "grad_norm": 38.61405563354492, "learning_rate": 4.677961223090297e-06, "loss": 25.1221, "step": 5868 }, { "epoch": 0.5446867749419954, "grad_norm": 42.49147033691406, "learning_rate": 4.676429844351407e-06, "loss": 22.7544, "step": 5869 }, { "epoch": 0.5447795823665893, "grad_norm": 38.546932220458984, "learning_rate": 4.674898496092006e-06, "loss": 24.8121, "step": 5870 }, { "epoch": 0.5448723897911832, "grad_norm": 45.87107849121094, "learning_rate": 4.673367178456339e-06, "loss": 21.9247, "step": 5871 }, { "epoch": 0.5449651972157773, "grad_norm": 39.07808303833008, "learning_rate": 4.671835891588654e-06, "loss": 23.9173, "step": 5872 }, { "epoch": 0.5450580046403712, "grad_norm": 38.182743072509766, "learning_rate": 4.670304635633195e-06, "loss": 22.6246, "step": 5873 }, { "epoch": 0.5451508120649652, "grad_norm": 41.51560592651367, "learning_rate": 4.6687734107342005e-06, "loss": 23.0875, "step": 5874 }, { "epoch": 0.5452436194895591, "grad_norm": 38.4529914855957, "learning_rate": 4.667242217035909e-06, "loss": 22.6325, "step": 5875 }, { "epoch": 0.5453364269141532, "grad_norm": 38.07444381713867, "learning_rate": 4.665711054682557e-06, "loss": 23.0622, "step": 5876 }, { "epoch": 0.5454292343387471, "grad_norm": 41.33369445800781, "learning_rate": 4.664179923818372e-06, "loss": 23.4172, "step": 5877 }, { "epoch": 0.5455220417633411, "grad_norm": 60.20732498168945, "learning_rate": 4.662648824587585e-06, "loss": 22.5144, "step": 5878 }, { "epoch": 0.545614849187935, "grad_norm": 41.447349548339844, "learning_rate": 4.661117757134423e-06, "loss": 24.0446, "step": 5879 }, { "epoch": 0.545707656612529, "grad_norm": 35.11943817138672, "learning_rate": 4.659586721603107e-06, "loss": 23.4843, "step": 5880 }, { "epoch": 0.545800464037123, "grad_norm": 41.851295471191406, "learning_rate": 4.658055718137855e-06, "loss": 23.1946, "step": 5881 }, { "epoch": 0.545893271461717, "grad_norm": 41.9464111328125, "learning_rate": 4.656524746882888e-06, "loss": 24.541, "step": 5882 }, { "epoch": 0.5459860788863109, "grad_norm": 41.94785690307617, "learning_rate": 4.654993807982415e-06, "loss": 24.1123, "step": 5883 }, { "epoch": 0.5460788863109048, "grad_norm": 38.37543487548828, "learning_rate": 4.653462901580648e-06, "loss": 25.8099, "step": 5884 }, { "epoch": 0.5461716937354988, "grad_norm": 39.055965423583984, "learning_rate": 4.651932027821799e-06, "loss": 23.9434, "step": 5885 }, { "epoch": 0.5462645011600928, "grad_norm": 40.6927604675293, "learning_rate": 4.650401186850064e-06, "loss": 24.5798, "step": 5886 }, { "epoch": 0.5463573085846868, "grad_norm": 40.7797737121582, "learning_rate": 4.64887037880965e-06, "loss": 23.8221, "step": 5887 }, { "epoch": 0.5464501160092807, "grad_norm": 35.34585189819336, "learning_rate": 4.647339603844756e-06, "loss": 24.2241, "step": 5888 }, { "epoch": 0.5465429234338747, "grad_norm": 35.00346374511719, "learning_rate": 4.645808862099574e-06, "loss": 24.8226, "step": 5889 }, { "epoch": 0.5466357308584687, "grad_norm": 41.05126190185547, "learning_rate": 4.644278153718299e-06, "loss": 24.0008, "step": 5890 }, { "epoch": 0.5467285382830627, "grad_norm": 38.37701416015625, "learning_rate": 4.6427474788451145e-06, "loss": 23.3109, "step": 5891 }, { "epoch": 0.5468213457076566, "grad_norm": 37.10821533203125, "learning_rate": 4.641216837624211e-06, "loss": 24.427, "step": 5892 }, { "epoch": 0.5469141531322506, "grad_norm": 37.47408676147461, "learning_rate": 4.639686230199772e-06, "loss": 22.8379, "step": 5893 }, { "epoch": 0.5470069605568445, "grad_norm": 34.213645935058594, "learning_rate": 4.6381556567159715e-06, "loss": 22.7272, "step": 5894 }, { "epoch": 0.5470997679814386, "grad_norm": 36.20785903930664, "learning_rate": 4.6366251173169895e-06, "loss": 24.3858, "step": 5895 }, { "epoch": 0.5471925754060325, "grad_norm": 36.843624114990234, "learning_rate": 4.6350946121469994e-06, "loss": 22.6743, "step": 5896 }, { "epoch": 0.5472853828306264, "grad_norm": 37.60434341430664, "learning_rate": 4.633564141350168e-06, "loss": 25.698, "step": 5897 }, { "epoch": 0.5473781902552204, "grad_norm": 35.553321838378906, "learning_rate": 4.632033705070663e-06, "loss": 24.4696, "step": 5898 }, { "epoch": 0.5474709976798143, "grad_norm": 41.32272720336914, "learning_rate": 4.63050330345265e-06, "loss": 21.9456, "step": 5899 }, { "epoch": 0.5475638051044084, "grad_norm": 40.900142669677734, "learning_rate": 4.628972936640286e-06, "loss": 22.804, "step": 5900 }, { "epoch": 0.5476566125290023, "grad_norm": 38.241302490234375, "learning_rate": 4.627442604777728e-06, "loss": 22.7674, "step": 5901 }, { "epoch": 0.5477494199535963, "grad_norm": 41.27857971191406, "learning_rate": 4.62591230800913e-06, "loss": 23.2198, "step": 5902 }, { "epoch": 0.5478422273781902, "grad_norm": 37.85176467895508, "learning_rate": 4.6243820464786435e-06, "loss": 24.2006, "step": 5903 }, { "epoch": 0.5479350348027843, "grad_norm": 36.3328857421875, "learning_rate": 4.622851820330412e-06, "loss": 23.326, "step": 5904 }, { "epoch": 0.5480278422273782, "grad_norm": 38.46231460571289, "learning_rate": 4.621321629708582e-06, "loss": 22.5554, "step": 5905 }, { "epoch": 0.5481206496519722, "grad_norm": 41.62306594848633, "learning_rate": 4.61979147475729e-06, "loss": 23.5527, "step": 5906 }, { "epoch": 0.5482134570765661, "grad_norm": 41.808067321777344, "learning_rate": 4.618261355620677e-06, "loss": 24.086, "step": 5907 }, { "epoch": 0.54830626450116, "grad_norm": 40.409690856933594, "learning_rate": 4.616731272442874e-06, "loss": 25.1636, "step": 5908 }, { "epoch": 0.5483990719257541, "grad_norm": 37.54997634887695, "learning_rate": 4.61520122536801e-06, "loss": 24.8603, "step": 5909 }, { "epoch": 0.548491879350348, "grad_norm": 39.45935821533203, "learning_rate": 4.613671214540214e-06, "loss": 24.8719, "step": 5910 }, { "epoch": 0.548584686774942, "grad_norm": 40.09571838378906, "learning_rate": 4.612141240103609e-06, "loss": 23.1283, "step": 5911 }, { "epoch": 0.5486774941995359, "grad_norm": 39.754173278808594, "learning_rate": 4.610611302202311e-06, "loss": 23.8071, "step": 5912 }, { "epoch": 0.54877030162413, "grad_norm": 38.466346740722656, "learning_rate": 4.6090814009804406e-06, "loss": 22.261, "step": 5913 }, { "epoch": 0.5488631090487239, "grad_norm": 42.32978439331055, "learning_rate": 4.607551536582109e-06, "loss": 23.9868, "step": 5914 }, { "epoch": 0.5489559164733179, "grad_norm": 39.57231521606445, "learning_rate": 4.606021709151424e-06, "loss": 22.5461, "step": 5915 }, { "epoch": 0.5490487238979118, "grad_norm": 38.97020721435547, "learning_rate": 4.604491918832494e-06, "loss": 23.4842, "step": 5916 }, { "epoch": 0.5491415313225058, "grad_norm": 40.51409912109375, "learning_rate": 4.6029621657694215e-06, "loss": 23.2992, "step": 5917 }, { "epoch": 0.5492343387470998, "grad_norm": 45.048583984375, "learning_rate": 4.601432450106302e-06, "loss": 25.2269, "step": 5918 }, { "epoch": 0.5493271461716938, "grad_norm": 43.37971115112305, "learning_rate": 4.599902771987233e-06, "loss": 22.781, "step": 5919 }, { "epoch": 0.5494199535962877, "grad_norm": 41.79082107543945, "learning_rate": 4.598373131556308e-06, "loss": 23.2411, "step": 5920 }, { "epoch": 0.5495127610208816, "grad_norm": 41.50835418701172, "learning_rate": 4.5968435289576115e-06, "loss": 22.6746, "step": 5921 }, { "epoch": 0.5496055684454756, "grad_norm": 44.81855773925781, "learning_rate": 4.59531396433523e-06, "loss": 24.0364, "step": 5922 }, { "epoch": 0.5496983758700696, "grad_norm": 37.394893646240234, "learning_rate": 4.593784437833247e-06, "loss": 23.885, "step": 5923 }, { "epoch": 0.5497911832946636, "grad_norm": 37.377830505371094, "learning_rate": 4.592254949595736e-06, "loss": 23.8247, "step": 5924 }, { "epoch": 0.5498839907192575, "grad_norm": 46.0201301574707, "learning_rate": 4.590725499766774e-06, "loss": 22.7011, "step": 5925 }, { "epoch": 0.5499767981438515, "grad_norm": 41.04819869995117, "learning_rate": 4.589196088490429e-06, "loss": 21.964, "step": 5926 }, { "epoch": 0.5500696055684455, "grad_norm": 40.70973205566406, "learning_rate": 4.587666715910768e-06, "loss": 23.1434, "step": 5927 }, { "epoch": 0.5501624129930395, "grad_norm": 36.99567794799805, "learning_rate": 4.586137382171856e-06, "loss": 22.7048, "step": 5928 }, { "epoch": 0.5502552204176334, "grad_norm": 42.32688522338867, "learning_rate": 4.584608087417749e-06, "loss": 24.9087, "step": 5929 }, { "epoch": 0.5503480278422274, "grad_norm": 42.00630187988281, "learning_rate": 4.583078831792505e-06, "loss": 21.8481, "step": 5930 }, { "epoch": 0.5504408352668213, "grad_norm": 44.843570709228516, "learning_rate": 4.581549615440178e-06, "loss": 25.7434, "step": 5931 }, { "epoch": 0.5505336426914154, "grad_norm": 38.3912353515625, "learning_rate": 4.580020438504812e-06, "loss": 22.7996, "step": 5932 }, { "epoch": 0.5506264501160093, "grad_norm": 43.19978332519531, "learning_rate": 4.578491301130452e-06, "loss": 24.2464, "step": 5933 }, { "epoch": 0.5507192575406032, "grad_norm": 36.73753356933594, "learning_rate": 4.576962203461144e-06, "loss": 21.4765, "step": 5934 }, { "epoch": 0.5508120649651972, "grad_norm": 39.79827880859375, "learning_rate": 4.57543314564092e-06, "loss": 22.1269, "step": 5935 }, { "epoch": 0.5509048723897911, "grad_norm": 37.19331359863281, "learning_rate": 4.573904127813813e-06, "loss": 23.0128, "step": 5936 }, { "epoch": 0.5509976798143852, "grad_norm": 47.05434799194336, "learning_rate": 4.572375150123857e-06, "loss": 23.4747, "step": 5937 }, { "epoch": 0.5510904872389791, "grad_norm": 43.00851821899414, "learning_rate": 4.5708462127150735e-06, "loss": 23.4414, "step": 5938 }, { "epoch": 0.5511832946635731, "grad_norm": 50.67789840698242, "learning_rate": 4.5693173157314865e-06, "loss": 23.1391, "step": 5939 }, { "epoch": 0.551276102088167, "grad_norm": 39.26272201538086, "learning_rate": 4.567788459317116e-06, "loss": 23.8779, "step": 5940 }, { "epoch": 0.5513689095127611, "grad_norm": 39.40165328979492, "learning_rate": 4.566259643615972e-06, "loss": 21.04, "step": 5941 }, { "epoch": 0.551461716937355, "grad_norm": 39.008995056152344, "learning_rate": 4.564730868772067e-06, "loss": 22.0363, "step": 5942 }, { "epoch": 0.551554524361949, "grad_norm": 41.18843460083008, "learning_rate": 4.563202134929411e-06, "loss": 22.4015, "step": 5943 }, { "epoch": 0.5516473317865429, "grad_norm": 38.23518753051758, "learning_rate": 4.561673442232002e-06, "loss": 23.9461, "step": 5944 }, { "epoch": 0.5517401392111368, "grad_norm": 42.71267318725586, "learning_rate": 4.56014479082384e-06, "loss": 25.5906, "step": 5945 }, { "epoch": 0.5518329466357309, "grad_norm": 37.984107971191406, "learning_rate": 4.558616180848922e-06, "loss": 24.7529, "step": 5946 }, { "epoch": 0.5519257540603248, "grad_norm": 36.88230514526367, "learning_rate": 4.557087612451239e-06, "loss": 22.806, "step": 5947 }, { "epoch": 0.5520185614849188, "grad_norm": 43.2031135559082, "learning_rate": 4.5555590857747766e-06, "loss": 25.9499, "step": 5948 }, { "epoch": 0.5521113689095127, "grad_norm": 38.93063735961914, "learning_rate": 4.55403060096352e-06, "loss": 22.2979, "step": 5949 }, { "epoch": 0.5522041763341067, "grad_norm": 43.53142166137695, "learning_rate": 4.552502158161447e-06, "loss": 23.4865, "step": 5950 }, { "epoch": 0.5522969837587007, "grad_norm": 43.94861602783203, "learning_rate": 4.5509737575125326e-06, "loss": 22.1856, "step": 5951 }, { "epoch": 0.5523897911832947, "grad_norm": 109.0718765258789, "learning_rate": 4.54944539916075e-06, "loss": 23.2333, "step": 5952 }, { "epoch": 0.5524825986078886, "grad_norm": 42.9970588684082, "learning_rate": 4.547917083250065e-06, "loss": 24.417, "step": 5953 }, { "epoch": 0.5525754060324826, "grad_norm": 40.22356033325195, "learning_rate": 4.546388809924444e-06, "loss": 25.8958, "step": 5954 }, { "epoch": 0.5526682134570766, "grad_norm": 36.17742919921875, "learning_rate": 4.544860579327843e-06, "loss": 22.5148, "step": 5955 }, { "epoch": 0.5527610208816706, "grad_norm": 38.04827117919922, "learning_rate": 4.5433323916042196e-06, "loss": 22.7732, "step": 5956 }, { "epoch": 0.5528538283062645, "grad_norm": 37.12471389770508, "learning_rate": 4.541804246897524e-06, "loss": 24.0504, "step": 5957 }, { "epoch": 0.5529466357308584, "grad_norm": 41.376224517822266, "learning_rate": 4.540276145351705e-06, "loss": 23.6221, "step": 5958 }, { "epoch": 0.5530394431554524, "grad_norm": 51.170745849609375, "learning_rate": 4.538748087110703e-06, "loss": 21.8465, "step": 5959 }, { "epoch": 0.5531322505800464, "grad_norm": 40.325870513916016, "learning_rate": 4.53722007231846e-06, "loss": 23.9278, "step": 5960 }, { "epoch": 0.5532250580046404, "grad_norm": 45.02004623413086, "learning_rate": 4.535692101118912e-06, "loss": 23.1456, "step": 5961 }, { "epoch": 0.5533178654292343, "grad_norm": 35.8821907043457, "learning_rate": 4.5341641736559865e-06, "loss": 25.1092, "step": 5962 }, { "epoch": 0.5534106728538283, "grad_norm": 42.89332580566406, "learning_rate": 4.532636290073613e-06, "loss": 25.4187, "step": 5963 }, { "epoch": 0.5535034802784222, "grad_norm": 44.20793914794922, "learning_rate": 4.531108450515712e-06, "loss": 22.8533, "step": 5964 }, { "epoch": 0.5535962877030163, "grad_norm": 41.13279724121094, "learning_rate": 4.529580655126204e-06, "loss": 23.4969, "step": 5965 }, { "epoch": 0.5536890951276102, "grad_norm": 38.145633697509766, "learning_rate": 4.528052904049004e-06, "loss": 22.7738, "step": 5966 }, { "epoch": 0.5537819025522042, "grad_norm": 42.68308639526367, "learning_rate": 4.526525197428021e-06, "loss": 21.7933, "step": 5967 }, { "epoch": 0.5538747099767981, "grad_norm": 38.2858772277832, "learning_rate": 4.524997535407159e-06, "loss": 24.0687, "step": 5968 }, { "epoch": 0.5539675174013922, "grad_norm": 39.527278900146484, "learning_rate": 4.523469918130326e-06, "loss": 23.2746, "step": 5969 }, { "epoch": 0.5540603248259861, "grad_norm": 38.64349365234375, "learning_rate": 4.521942345741413e-06, "loss": 22.7689, "step": 5970 }, { "epoch": 0.55415313225058, "grad_norm": 41.00722885131836, "learning_rate": 4.520414818384316e-06, "loss": 22.5899, "step": 5971 }, { "epoch": 0.554245939675174, "grad_norm": 39.15763854980469, "learning_rate": 4.518887336202927e-06, "loss": 25.9388, "step": 5972 }, { "epoch": 0.5543387470997679, "grad_norm": 37.4113883972168, "learning_rate": 4.517359899341126e-06, "loss": 23.3701, "step": 5973 }, { "epoch": 0.554431554524362, "grad_norm": 33.8859977722168, "learning_rate": 4.515832507942796e-06, "loss": 23.7699, "step": 5974 }, { "epoch": 0.5545243619489559, "grad_norm": 41.008262634277344, "learning_rate": 4.514305162151815e-06, "loss": 23.6182, "step": 5975 }, { "epoch": 0.5546171693735499, "grad_norm": 37.382972717285156, "learning_rate": 4.512777862112053e-06, "loss": 20.72, "step": 5976 }, { "epoch": 0.5547099767981438, "grad_norm": 38.37796401977539, "learning_rate": 4.511250607967378e-06, "loss": 23.5396, "step": 5977 }, { "epoch": 0.5548027842227378, "grad_norm": 40.69496536254883, "learning_rate": 4.509723399861655e-06, "loss": 25.7289, "step": 5978 }, { "epoch": 0.5548955916473318, "grad_norm": 40.81804275512695, "learning_rate": 4.5081962379387405e-06, "loss": 24.3929, "step": 5979 }, { "epoch": 0.5549883990719258, "grad_norm": 39.81512451171875, "learning_rate": 4.506669122342491e-06, "loss": 23.5338, "step": 5980 }, { "epoch": 0.5550812064965197, "grad_norm": 43.16718292236328, "learning_rate": 4.505142053216758e-06, "loss": 21.8915, "step": 5981 }, { "epoch": 0.5551740139211137, "grad_norm": 37.50447463989258, "learning_rate": 4.503615030705384e-06, "loss": 23.281, "step": 5982 }, { "epoch": 0.5552668213457077, "grad_norm": 38.65317916870117, "learning_rate": 4.502088054952213e-06, "loss": 24.9647, "step": 5983 }, { "epoch": 0.5553596287703016, "grad_norm": 35.140384674072266, "learning_rate": 4.500561126101084e-06, "loss": 22.8622, "step": 5984 }, { "epoch": 0.5554524361948956, "grad_norm": 45.228431701660156, "learning_rate": 4.499034244295826e-06, "loss": 22.4001, "step": 5985 }, { "epoch": 0.5555452436194895, "grad_norm": 34.71407699584961, "learning_rate": 4.497507409680269e-06, "loss": 23.6508, "step": 5986 }, { "epoch": 0.5556380510440835, "grad_norm": 38.27775955200195, "learning_rate": 4.495980622398239e-06, "loss": 22.7161, "step": 5987 }, { "epoch": 0.5557308584686775, "grad_norm": 38.713897705078125, "learning_rate": 4.494453882593552e-06, "loss": 24.9158, "step": 5988 }, { "epoch": 0.5558236658932715, "grad_norm": 42.40052795410156, "learning_rate": 4.4929271904100245e-06, "loss": 22.4849, "step": 5989 }, { "epoch": 0.5559164733178654, "grad_norm": 43.074188232421875, "learning_rate": 4.491400545991469e-06, "loss": 23.1375, "step": 5990 }, { "epoch": 0.5560092807424594, "grad_norm": 36.94828414916992, "learning_rate": 4.4898739494816876e-06, "loss": 22.7342, "step": 5991 }, { "epoch": 0.5561020881670533, "grad_norm": 38.555450439453125, "learning_rate": 4.4883474010244845e-06, "loss": 22.0179, "step": 5992 }, { "epoch": 0.5561948955916474, "grad_norm": 38.49634552001953, "learning_rate": 4.486820900763657e-06, "loss": 21.9206, "step": 5993 }, { "epoch": 0.5562877030162413, "grad_norm": 42.95304870605469, "learning_rate": 4.485294448842996e-06, "loss": 25.7521, "step": 5994 }, { "epoch": 0.5563805104408353, "grad_norm": 42.93753433227539, "learning_rate": 4.48376804540629e-06, "loss": 22.6916, "step": 5995 }, { "epoch": 0.5564733178654292, "grad_norm": 37.83403778076172, "learning_rate": 4.482241690597322e-06, "loss": 23.9425, "step": 5996 }, { "epoch": 0.5565661252900233, "grad_norm": 38.289798736572266, "learning_rate": 4.480715384559872e-06, "loss": 23.1368, "step": 5997 }, { "epoch": 0.5566589327146172, "grad_norm": 34.162376403808594, "learning_rate": 4.479189127437713e-06, "loss": 20.9487, "step": 5998 }, { "epoch": 0.5567517401392111, "grad_norm": 40.26353073120117, "learning_rate": 4.477662919374614e-06, "loss": 25.0498, "step": 5999 }, { "epoch": 0.5568445475638051, "grad_norm": 39.770870208740234, "learning_rate": 4.476136760514341e-06, "loss": 25.7275, "step": 6000 }, { "epoch": 0.556937354988399, "grad_norm": 39.292388916015625, "learning_rate": 4.474610651000656e-06, "loss": 23.4614, "step": 6001 }, { "epoch": 0.5570301624129931, "grad_norm": 37.910804748535156, "learning_rate": 4.47308459097731e-06, "loss": 23.3935, "step": 6002 }, { "epoch": 0.557122969837587, "grad_norm": 36.1698112487793, "learning_rate": 4.471558580588057e-06, "loss": 24.1462, "step": 6003 }, { "epoch": 0.557215777262181, "grad_norm": 37.914512634277344, "learning_rate": 4.4700326199766445e-06, "loss": 23.1336, "step": 6004 }, { "epoch": 0.5573085846867749, "grad_norm": 39.212947845458984, "learning_rate": 4.468506709286811e-06, "loss": 21.2739, "step": 6005 }, { "epoch": 0.5574013921113689, "grad_norm": 38.5088996887207, "learning_rate": 4.466980848662295e-06, "loss": 21.5178, "step": 6006 }, { "epoch": 0.5574941995359629, "grad_norm": 39.108360290527344, "learning_rate": 4.46545503824683e-06, "loss": 23.6411, "step": 6007 }, { "epoch": 0.5575870069605569, "grad_norm": 37.89012908935547, "learning_rate": 4.46392927818414e-06, "loss": 20.5033, "step": 6008 }, { "epoch": 0.5576798143851508, "grad_norm": 40.6463737487793, "learning_rate": 4.46240356861795e-06, "loss": 26.4591, "step": 6009 }, { "epoch": 0.5577726218097447, "grad_norm": 41.83488082885742, "learning_rate": 4.46087790969198e-06, "loss": 24.5474, "step": 6010 }, { "epoch": 0.5578654292343388, "grad_norm": 37.07063293457031, "learning_rate": 4.459352301549939e-06, "loss": 23.3419, "step": 6011 }, { "epoch": 0.5579582366589327, "grad_norm": 42.69525909423828, "learning_rate": 4.457826744335538e-06, "loss": 22.2095, "step": 6012 }, { "epoch": 0.5580510440835267, "grad_norm": 34.93764114379883, "learning_rate": 4.456301238192482e-06, "loss": 23.3392, "step": 6013 }, { "epoch": 0.5581438515081206, "grad_norm": 38.33206558227539, "learning_rate": 4.454775783264465e-06, "loss": 22.8151, "step": 6014 }, { "epoch": 0.5582366589327146, "grad_norm": 43.394954681396484, "learning_rate": 4.453250379695184e-06, "loss": 23.5526, "step": 6015 }, { "epoch": 0.5583294663573086, "grad_norm": 41.271549224853516, "learning_rate": 4.45172502762833e-06, "loss": 23.0066, "step": 6016 }, { "epoch": 0.5584222737819026, "grad_norm": 36.613304138183594, "learning_rate": 4.450199727207583e-06, "loss": 23.6768, "step": 6017 }, { "epoch": 0.5585150812064965, "grad_norm": 38.58063888549805, "learning_rate": 4.448674478576625e-06, "loss": 21.8317, "step": 6018 }, { "epoch": 0.5586078886310905, "grad_norm": 40.17646408081055, "learning_rate": 4.447149281879132e-06, "loss": 24.4736, "step": 6019 }, { "epoch": 0.5587006960556845, "grad_norm": 42.24567413330078, "learning_rate": 4.4456241372587695e-06, "loss": 21.3781, "step": 6020 }, { "epoch": 0.5587935034802785, "grad_norm": 35.969329833984375, "learning_rate": 4.444099044859205e-06, "loss": 22.7948, "step": 6021 }, { "epoch": 0.5588863109048724, "grad_norm": 59.893096923828125, "learning_rate": 4.4425740048241e-06, "loss": 23.1794, "step": 6022 }, { "epoch": 0.5589791183294663, "grad_norm": 38.19978713989258, "learning_rate": 4.441049017297105e-06, "loss": 22.8527, "step": 6023 }, { "epoch": 0.5590719257540603, "grad_norm": 41.000511169433594, "learning_rate": 4.439524082421872e-06, "loss": 22.9731, "step": 6024 }, { "epoch": 0.5591647331786543, "grad_norm": 37.78328323364258, "learning_rate": 4.437999200342048e-06, "loss": 23.9265, "step": 6025 }, { "epoch": 0.5592575406032483, "grad_norm": 34.15911102294922, "learning_rate": 4.43647437120127e-06, "loss": 22.6941, "step": 6026 }, { "epoch": 0.5593503480278422, "grad_norm": 35.339542388916016, "learning_rate": 4.434949595143175e-06, "loss": 24.5267, "step": 6027 }, { "epoch": 0.5594431554524362, "grad_norm": 35.49898910522461, "learning_rate": 4.433424872311393e-06, "loss": 22.0937, "step": 6028 }, { "epoch": 0.5595359628770301, "grad_norm": 42.139617919921875, "learning_rate": 4.4319002028495465e-06, "loss": 22.4128, "step": 6029 }, { "epoch": 0.5596287703016242, "grad_norm": 39.313358306884766, "learning_rate": 4.430375586901258e-06, "loss": 23.3031, "step": 6030 }, { "epoch": 0.5597215777262181, "grad_norm": 37.571258544921875, "learning_rate": 4.4288510246101435e-06, "loss": 23.4019, "step": 6031 }, { "epoch": 0.559814385150812, "grad_norm": 35.777950286865234, "learning_rate": 4.4273265161198086e-06, "loss": 20.9256, "step": 6032 }, { "epoch": 0.559907192575406, "grad_norm": 37.83156967163086, "learning_rate": 4.425802061573862e-06, "loss": 23.8844, "step": 6033 }, { "epoch": 0.56, "grad_norm": 36.489662170410156, "learning_rate": 4.424277661115902e-06, "loss": 24.8465, "step": 6034 }, { "epoch": 0.560092807424594, "grad_norm": 37.558570861816406, "learning_rate": 4.422753314889524e-06, "loss": 22.2195, "step": 6035 }, { "epoch": 0.5601856148491879, "grad_norm": 47.68328094482422, "learning_rate": 4.421229023038316e-06, "loss": 21.3348, "step": 6036 }, { "epoch": 0.5602784222737819, "grad_norm": 34.77606201171875, "learning_rate": 4.419704785705864e-06, "loss": 22.8213, "step": 6037 }, { "epoch": 0.5603712296983758, "grad_norm": 34.7838134765625, "learning_rate": 4.418180603035745e-06, "loss": 22.1903, "step": 6038 }, { "epoch": 0.5604640371229699, "grad_norm": 34.85394287109375, "learning_rate": 4.416656475171537e-06, "loss": 22.5669, "step": 6039 }, { "epoch": 0.5605568445475638, "grad_norm": 39.532371520996094, "learning_rate": 4.415132402256805e-06, "loss": 22.8766, "step": 6040 }, { "epoch": 0.5606496519721578, "grad_norm": 36.11322021484375, "learning_rate": 4.413608384435115e-06, "loss": 21.5389, "step": 6041 }, { "epoch": 0.5607424593967517, "grad_norm": 43.096046447753906, "learning_rate": 4.412084421850026e-06, "loss": 22.736, "step": 6042 }, { "epoch": 0.5608352668213457, "grad_norm": 42.10653305053711, "learning_rate": 4.41056051464509e-06, "loss": 21.955, "step": 6043 }, { "epoch": 0.5609280742459397, "grad_norm": 59.11994171142578, "learning_rate": 4.409036662963856e-06, "loss": 22.249, "step": 6044 }, { "epoch": 0.5610208816705337, "grad_norm": 42.88226318359375, "learning_rate": 4.407512866949867e-06, "loss": 25.0587, "step": 6045 }, { "epoch": 0.5611136890951276, "grad_norm": 43.36640167236328, "learning_rate": 4.405989126746658e-06, "loss": 23.8855, "step": 6046 }, { "epoch": 0.5612064965197215, "grad_norm": 42.00486755371094, "learning_rate": 4.404465442497765e-06, "loss": 23.9644, "step": 6047 }, { "epoch": 0.5612993039443156, "grad_norm": 60.80055618286133, "learning_rate": 4.402941814346716e-06, "loss": 21.7651, "step": 6048 }, { "epoch": 0.5613921113689095, "grad_norm": 39.87022018432617, "learning_rate": 4.401418242437029e-06, "loss": 21.5049, "step": 6049 }, { "epoch": 0.5614849187935035, "grad_norm": 38.947635650634766, "learning_rate": 4.399894726912223e-06, "loss": 23.762, "step": 6050 }, { "epoch": 0.5615777262180974, "grad_norm": 46.04364013671875, "learning_rate": 4.39837126791581e-06, "loss": 25.2154, "step": 6051 }, { "epoch": 0.5616705336426914, "grad_norm": 40.88571548461914, "learning_rate": 4.396847865591293e-06, "loss": 22.2271, "step": 6052 }, { "epoch": 0.5617633410672854, "grad_norm": 43.56166458129883, "learning_rate": 4.395324520082174e-06, "loss": 23.5126, "step": 6053 }, { "epoch": 0.5618561484918794, "grad_norm": 39.56428527832031, "learning_rate": 4.393801231531952e-06, "loss": 23.9936, "step": 6054 }, { "epoch": 0.5619489559164733, "grad_norm": 40.824100494384766, "learning_rate": 4.3922780000841105e-06, "loss": 23.1918, "step": 6055 }, { "epoch": 0.5620417633410673, "grad_norm": 39.161033630371094, "learning_rate": 4.390754825882138e-06, "loss": 23.7871, "step": 6056 }, { "epoch": 0.5621345707656612, "grad_norm": 37.34345626831055, "learning_rate": 4.3892317090695134e-06, "loss": 21.1098, "step": 6057 }, { "epoch": 0.5622273781902553, "grad_norm": 37.492332458496094, "learning_rate": 4.387708649789709e-06, "loss": 23.4543, "step": 6058 }, { "epoch": 0.5623201856148492, "grad_norm": 38.344085693359375, "learning_rate": 4.386185648186194e-06, "loss": 23.0348, "step": 6059 }, { "epoch": 0.5624129930394431, "grad_norm": 41.379398345947266, "learning_rate": 4.384662704402433e-06, "loss": 22.5561, "step": 6060 }, { "epoch": 0.5625058004640371, "grad_norm": 41.513092041015625, "learning_rate": 4.383139818581879e-06, "loss": 23.0887, "step": 6061 }, { "epoch": 0.5625986078886311, "grad_norm": 44.44325637817383, "learning_rate": 4.3816169908679865e-06, "loss": 23.6911, "step": 6062 }, { "epoch": 0.5626914153132251, "grad_norm": 41.88719177246094, "learning_rate": 4.380094221404205e-06, "loss": 23.1116, "step": 6063 }, { "epoch": 0.562784222737819, "grad_norm": 38.82361602783203, "learning_rate": 4.378571510333971e-06, "loss": 22.6949, "step": 6064 }, { "epoch": 0.562877030162413, "grad_norm": 38.20517349243164, "learning_rate": 4.37704885780072e-06, "loss": 23.5233, "step": 6065 }, { "epoch": 0.5629698375870069, "grad_norm": 38.722251892089844, "learning_rate": 4.375526263947887e-06, "loss": 21.8105, "step": 6066 }, { "epoch": 0.563062645011601, "grad_norm": 38.418418884277344, "learning_rate": 4.374003728918891e-06, "loss": 24.1963, "step": 6067 }, { "epoch": 0.5631554524361949, "grad_norm": 35.64037322998047, "learning_rate": 4.372481252857154e-06, "loss": 22.1855, "step": 6068 }, { "epoch": 0.5632482598607889, "grad_norm": 52.97203063964844, "learning_rate": 4.370958835906088e-06, "loss": 24.0068, "step": 6069 }, { "epoch": 0.5633410672853828, "grad_norm": 45.589473724365234, "learning_rate": 4.369436478209101e-06, "loss": 23.7909, "step": 6070 }, { "epoch": 0.5634338747099767, "grad_norm": 49.66804504394531, "learning_rate": 4.3679141799095965e-06, "loss": 24.835, "step": 6071 }, { "epoch": 0.5635266821345708, "grad_norm": 44.44447708129883, "learning_rate": 4.366391941150969e-06, "loss": 23.8065, "step": 6072 }, { "epoch": 0.5636194895591647, "grad_norm": 39.143104553222656, "learning_rate": 4.36486976207661e-06, "loss": 24.8386, "step": 6073 }, { "epoch": 0.5637122969837587, "grad_norm": 38.22527313232422, "learning_rate": 4.363347642829908e-06, "loss": 22.2294, "step": 6074 }, { "epoch": 0.5638051044083526, "grad_norm": 37.41004943847656, "learning_rate": 4.361825583554237e-06, "loss": 21.7102, "step": 6075 }, { "epoch": 0.5638979118329467, "grad_norm": 38.54340744018555, "learning_rate": 4.360303584392976e-06, "loss": 22.6921, "step": 6076 }, { "epoch": 0.5639907192575406, "grad_norm": 39.18132019042969, "learning_rate": 4.358781645489492e-06, "loss": 24.4312, "step": 6077 }, { "epoch": 0.5640835266821346, "grad_norm": 38.73359680175781, "learning_rate": 4.357259766987147e-06, "loss": 24.1419, "step": 6078 }, { "epoch": 0.5641763341067285, "grad_norm": 39.779052734375, "learning_rate": 4.3557379490293e-06, "loss": 24.6569, "step": 6079 }, { "epoch": 0.5642691415313225, "grad_norm": 42.022705078125, "learning_rate": 4.3542161917593e-06, "loss": 23.8364, "step": 6080 }, { "epoch": 0.5643619489559165, "grad_norm": 43.90532302856445, "learning_rate": 4.352694495320494e-06, "loss": 24.4728, "step": 6081 }, { "epoch": 0.5644547563805105, "grad_norm": 39.66401290893555, "learning_rate": 4.351172859856221e-06, "loss": 23.2794, "step": 6082 }, { "epoch": 0.5645475638051044, "grad_norm": 40.443416595458984, "learning_rate": 4.3496512855098175e-06, "loss": 23.4932, "step": 6083 }, { "epoch": 0.5646403712296983, "grad_norm": 37.99930191040039, "learning_rate": 4.34812977242461e-06, "loss": 22.5102, "step": 6084 }, { "epoch": 0.5647331786542923, "grad_norm": 39.65140151977539, "learning_rate": 4.34660832074392e-06, "loss": 23.6683, "step": 6085 }, { "epoch": 0.5648259860788863, "grad_norm": 47.79204177856445, "learning_rate": 4.345086930611067e-06, "loss": 22.6982, "step": 6086 }, { "epoch": 0.5649187935034803, "grad_norm": 42.959815979003906, "learning_rate": 4.343565602169361e-06, "loss": 21.8233, "step": 6087 }, { "epoch": 0.5650116009280742, "grad_norm": 37.51438522338867, "learning_rate": 4.342044335562108e-06, "loss": 23.603, "step": 6088 }, { "epoch": 0.5651044083526682, "grad_norm": 42.53816604614258, "learning_rate": 4.340523130932605e-06, "loss": 23.8567, "step": 6089 }, { "epoch": 0.5651972157772622, "grad_norm": 37.79109191894531, "learning_rate": 4.339001988424148e-06, "loss": 23.6485, "step": 6090 }, { "epoch": 0.5652900232018562, "grad_norm": 43.88105773925781, "learning_rate": 4.337480908180025e-06, "loss": 23.8302, "step": 6091 }, { "epoch": 0.5653828306264501, "grad_norm": 42.06974411010742, "learning_rate": 4.335959890343516e-06, "loss": 25.3518, "step": 6092 }, { "epoch": 0.5654756380510441, "grad_norm": 52.044620513916016, "learning_rate": 4.334438935057897e-06, "loss": 23.9038, "step": 6093 }, { "epoch": 0.565568445475638, "grad_norm": 39.1532096862793, "learning_rate": 4.33291804246644e-06, "loss": 24.3063, "step": 6094 }, { "epoch": 0.5656612529002321, "grad_norm": 38.374359130859375, "learning_rate": 4.331397212712411e-06, "loss": 22.0442, "step": 6095 }, { "epoch": 0.565754060324826, "grad_norm": 38.48960876464844, "learning_rate": 4.329876445939062e-06, "loss": 24.1058, "step": 6096 }, { "epoch": 0.56584686774942, "grad_norm": 33.37970733642578, "learning_rate": 4.328355742289649e-06, "loss": 22.3612, "step": 6097 }, { "epoch": 0.5659396751740139, "grad_norm": 37.483306884765625, "learning_rate": 4.326835101907422e-06, "loss": 24.5427, "step": 6098 }, { "epoch": 0.5660324825986078, "grad_norm": 42.07987594604492, "learning_rate": 4.3253145249356145e-06, "loss": 24.6277, "step": 6099 }, { "epoch": 0.5661252900232019, "grad_norm": 38.88889694213867, "learning_rate": 4.323794011517464e-06, "loss": 23.2257, "step": 6100 }, { "epoch": 0.5662180974477958, "grad_norm": 41.07001876831055, "learning_rate": 4.322273561796203e-06, "loss": 23.6854, "step": 6101 }, { "epoch": 0.5663109048723898, "grad_norm": 47.585201263427734, "learning_rate": 4.320753175915047e-06, "loss": 24.4578, "step": 6102 }, { "epoch": 0.5664037122969837, "grad_norm": 39.34977340698242, "learning_rate": 4.319232854017218e-06, "loss": 21.8316, "step": 6103 }, { "epoch": 0.5664965197215778, "grad_norm": 37.52015686035156, "learning_rate": 4.317712596245922e-06, "loss": 21.9288, "step": 6104 }, { "epoch": 0.5665893271461717, "grad_norm": 44.62999725341797, "learning_rate": 4.316192402744366e-06, "loss": 23.5727, "step": 6105 }, { "epoch": 0.5666821345707657, "grad_norm": 36.019962310791016, "learning_rate": 4.314672273655751e-06, "loss": 22.4035, "step": 6106 }, { "epoch": 0.5667749419953596, "grad_norm": 37.51826858520508, "learning_rate": 4.313152209123263e-06, "loss": 22.1602, "step": 6107 }, { "epoch": 0.5668677494199535, "grad_norm": 33.21702575683594, "learning_rate": 4.3116322092900925e-06, "loss": 24.6895, "step": 6108 }, { "epoch": 0.5669605568445476, "grad_norm": 38.046142578125, "learning_rate": 4.310112274299421e-06, "loss": 23.8541, "step": 6109 }, { "epoch": 0.5670533642691415, "grad_norm": 37.77193069458008, "learning_rate": 4.308592404294417e-06, "loss": 21.5978, "step": 6110 }, { "epoch": 0.5671461716937355, "grad_norm": 34.86677551269531, "learning_rate": 4.307072599418252e-06, "loss": 21.8528, "step": 6111 }, { "epoch": 0.5672389791183294, "grad_norm": 38.32230758666992, "learning_rate": 4.30555285981409e-06, "loss": 23.6329, "step": 6112 }, { "epoch": 0.5673317865429235, "grad_norm": 35.63985061645508, "learning_rate": 4.304033185625082e-06, "loss": 24.9846, "step": 6113 }, { "epoch": 0.5674245939675174, "grad_norm": 40.24690246582031, "learning_rate": 4.3025135769943786e-06, "loss": 24.8109, "step": 6114 }, { "epoch": 0.5675174013921114, "grad_norm": 37.60550308227539, "learning_rate": 4.300994034065126e-06, "loss": 22.9251, "step": 6115 }, { "epoch": 0.5676102088167053, "grad_norm": 36.262359619140625, "learning_rate": 4.299474556980458e-06, "loss": 22.6728, "step": 6116 }, { "epoch": 0.5677030162412993, "grad_norm": 33.938636779785156, "learning_rate": 4.297955145883506e-06, "loss": 22.3017, "step": 6117 }, { "epoch": 0.5677958236658933, "grad_norm": 41.62661361694336, "learning_rate": 4.296435800917398e-06, "loss": 21.7628, "step": 6118 }, { "epoch": 0.5678886310904873, "grad_norm": 35.954586029052734, "learning_rate": 4.294916522225247e-06, "loss": 21.6897, "step": 6119 }, { "epoch": 0.5679814385150812, "grad_norm": 39.3223762512207, "learning_rate": 4.293397309950168e-06, "loss": 24.1714, "step": 6120 }, { "epoch": 0.5680742459396751, "grad_norm": 36.34047317504883, "learning_rate": 4.291878164235269e-06, "loss": 21.9067, "step": 6121 }, { "epoch": 0.5681670533642691, "grad_norm": 39.20747375488281, "learning_rate": 4.290359085223646e-06, "loss": 24.7522, "step": 6122 }, { "epoch": 0.5682598607888631, "grad_norm": 34.295021057128906, "learning_rate": 4.288840073058393e-06, "loss": 21.1478, "step": 6123 }, { "epoch": 0.5683526682134571, "grad_norm": 37.69523620605469, "learning_rate": 4.2873211278826e-06, "loss": 23.6116, "step": 6124 }, { "epoch": 0.568445475638051, "grad_norm": 36.75920867919922, "learning_rate": 4.285802249839346e-06, "loss": 23.9236, "step": 6125 }, { "epoch": 0.568538283062645, "grad_norm": 36.34601593017578, "learning_rate": 4.284283439071703e-06, "loss": 23.7589, "step": 6126 }, { "epoch": 0.568631090487239, "grad_norm": 36.392208099365234, "learning_rate": 4.282764695722744e-06, "loss": 23.907, "step": 6127 }, { "epoch": 0.568723897911833, "grad_norm": 37.9588623046875, "learning_rate": 4.2812460199355275e-06, "loss": 22.8799, "step": 6128 }, { "epoch": 0.5688167053364269, "grad_norm": 38.387569427490234, "learning_rate": 4.279727411853108e-06, "loss": 23.6773, "step": 6129 }, { "epoch": 0.5689095127610209, "grad_norm": 37.51795959472656, "learning_rate": 4.278208871618537e-06, "loss": 23.3057, "step": 6130 }, { "epoch": 0.5690023201856148, "grad_norm": 37.79434585571289, "learning_rate": 4.276690399374857e-06, "loss": 23.4759, "step": 6131 }, { "epoch": 0.5690951276102089, "grad_norm": 39.07027053833008, "learning_rate": 4.275171995265101e-06, "loss": 25.2357, "step": 6132 }, { "epoch": 0.5691879350348028, "grad_norm": 35.5866813659668, "learning_rate": 4.273653659432304e-06, "loss": 22.7295, "step": 6133 }, { "epoch": 0.5692807424593967, "grad_norm": 40.75004959106445, "learning_rate": 4.272135392019485e-06, "loss": 22.2981, "step": 6134 }, { "epoch": 0.5693735498839907, "grad_norm": 37.72293472290039, "learning_rate": 4.2706171931696635e-06, "loss": 22.0438, "step": 6135 }, { "epoch": 0.5694663573085846, "grad_norm": 36.56329345703125, "learning_rate": 4.269099063025848e-06, "loss": 24.3017, "step": 6136 }, { "epoch": 0.5695591647331787, "grad_norm": 36.662635803222656, "learning_rate": 4.267581001731044e-06, "loss": 23.7005, "step": 6137 }, { "epoch": 0.5696519721577726, "grad_norm": 36.61162567138672, "learning_rate": 4.266063009428249e-06, "loss": 22.0647, "step": 6138 }, { "epoch": 0.5697447795823666, "grad_norm": 38.579002380371094, "learning_rate": 4.264545086260453e-06, "loss": 23.4277, "step": 6139 }, { "epoch": 0.5698375870069605, "grad_norm": 37.931640625, "learning_rate": 4.263027232370641e-06, "loss": 25.0012, "step": 6140 }, { "epoch": 0.5699303944315546, "grad_norm": 37.504154205322266, "learning_rate": 4.261509447901792e-06, "loss": 22.7881, "step": 6141 }, { "epoch": 0.5700232018561485, "grad_norm": 40.90971755981445, "learning_rate": 4.2599917329968756e-06, "loss": 23.3705, "step": 6142 }, { "epoch": 0.5701160092807425, "grad_norm": 36.10054397583008, "learning_rate": 4.258474087798856e-06, "loss": 22.4938, "step": 6143 }, { "epoch": 0.5702088167053364, "grad_norm": 37.117801666259766, "learning_rate": 4.256956512450697e-06, "loss": 24.8483, "step": 6144 }, { "epoch": 0.5703016241299304, "grad_norm": 34.5275764465332, "learning_rate": 4.255439007095343e-06, "loss": 21.6223, "step": 6145 }, { "epoch": 0.5703944315545244, "grad_norm": 48.886192321777344, "learning_rate": 4.253921571875743e-06, "loss": 23.8463, "step": 6146 }, { "epoch": 0.5704872389791183, "grad_norm": 41.57538986206055, "learning_rate": 4.252404206934837e-06, "loss": 22.8002, "step": 6147 }, { "epoch": 0.5705800464037123, "grad_norm": 36.167869567871094, "learning_rate": 4.250886912415553e-06, "loss": 23.6996, "step": 6148 }, { "epoch": 0.5706728538283062, "grad_norm": 39.86750793457031, "learning_rate": 4.249369688460819e-06, "loss": 22.9974, "step": 6149 }, { "epoch": 0.5707656612529002, "grad_norm": 44.053138732910156, "learning_rate": 4.247852535213554e-06, "loss": 24.3232, "step": 6150 }, { "epoch": 0.5708584686774942, "grad_norm": 38.780006408691406, "learning_rate": 4.246335452816667e-06, "loss": 22.563, "step": 6151 }, { "epoch": 0.5709512761020882, "grad_norm": 37.20563507080078, "learning_rate": 4.2448184414130665e-06, "loss": 21.8146, "step": 6152 }, { "epoch": 0.5710440835266821, "grad_norm": 36.83931350708008, "learning_rate": 4.243301501145651e-06, "loss": 23.8711, "step": 6153 }, { "epoch": 0.5711368909512761, "grad_norm": 44.62697982788086, "learning_rate": 4.241784632157309e-06, "loss": 24.5215, "step": 6154 }, { "epoch": 0.5712296983758701, "grad_norm": 37.97932052612305, "learning_rate": 4.240267834590928e-06, "loss": 22.0045, "step": 6155 }, { "epoch": 0.5713225058004641, "grad_norm": 46.54772186279297, "learning_rate": 4.238751108589389e-06, "loss": 24.5015, "step": 6156 }, { "epoch": 0.571415313225058, "grad_norm": 39.50539016723633, "learning_rate": 4.237234454295559e-06, "loss": 23.3972, "step": 6157 }, { "epoch": 0.571508120649652, "grad_norm": 38.49138641357422, "learning_rate": 4.235717871852305e-06, "loss": 20.8625, "step": 6158 }, { "epoch": 0.5716009280742459, "grad_norm": 40.883758544921875, "learning_rate": 4.234201361402488e-06, "loss": 22.2838, "step": 6159 }, { "epoch": 0.57169373549884, "grad_norm": 42.43312072753906, "learning_rate": 4.232684923088954e-06, "loss": 25.0344, "step": 6160 }, { "epoch": 0.5717865429234339, "grad_norm": 36.267356872558594, "learning_rate": 4.231168557054551e-06, "loss": 23.6295, "step": 6161 }, { "epoch": 0.5718793503480278, "grad_norm": 39.05256652832031, "learning_rate": 4.229652263442119e-06, "loss": 21.91, "step": 6162 }, { "epoch": 0.5719721577726218, "grad_norm": 38.17894744873047, "learning_rate": 4.228136042394483e-06, "loss": 22.5847, "step": 6163 }, { "epoch": 0.5720649651972157, "grad_norm": 45.40350341796875, "learning_rate": 4.226619894054471e-06, "loss": 23.3668, "step": 6164 }, { "epoch": 0.5721577726218098, "grad_norm": 38.692447662353516, "learning_rate": 4.225103818564901e-06, "loss": 23.9402, "step": 6165 }, { "epoch": 0.5722505800464037, "grad_norm": 38.042198181152344, "learning_rate": 4.223587816068581e-06, "loss": 23.339, "step": 6166 }, { "epoch": 0.5723433874709977, "grad_norm": 50.9007453918457, "learning_rate": 4.222071886708316e-06, "loss": 25.5983, "step": 6167 }, { "epoch": 0.5724361948955916, "grad_norm": 42.55767822265625, "learning_rate": 4.220556030626904e-06, "loss": 26.4493, "step": 6168 }, { "epoch": 0.5725290023201857, "grad_norm": 39.319053649902344, "learning_rate": 4.219040247967132e-06, "loss": 22.8327, "step": 6169 }, { "epoch": 0.5726218097447796, "grad_norm": 41.92180633544922, "learning_rate": 4.217524538871783e-06, "loss": 25.176, "step": 6170 }, { "epoch": 0.5727146171693736, "grad_norm": 45.18217849731445, "learning_rate": 4.216008903483637e-06, "loss": 23.8063, "step": 6171 }, { "epoch": 0.5728074245939675, "grad_norm": 38.681190490722656, "learning_rate": 4.2144933419454585e-06, "loss": 22.5541, "step": 6172 }, { "epoch": 0.5729002320185614, "grad_norm": 41.88984298706055, "learning_rate": 4.212977854400011e-06, "loss": 24.6396, "step": 6173 }, { "epoch": 0.5729930394431555, "grad_norm": 37.408653259277344, "learning_rate": 4.21146244099005e-06, "loss": 22.3009, "step": 6174 }, { "epoch": 0.5730858468677494, "grad_norm": 40.00270080566406, "learning_rate": 4.2099471018583245e-06, "loss": 22.1194, "step": 6175 }, { "epoch": 0.5731786542923434, "grad_norm": 35.464080810546875, "learning_rate": 4.208431837147574e-06, "loss": 22.4271, "step": 6176 }, { "epoch": 0.5732714617169373, "grad_norm": 37.41453552246094, "learning_rate": 4.206916647000531e-06, "loss": 22.0804, "step": 6177 }, { "epoch": 0.5733642691415313, "grad_norm": 45.54054260253906, "learning_rate": 4.205401531559927e-06, "loss": 21.9981, "step": 6178 }, { "epoch": 0.5734570765661253, "grad_norm": 36.55176544189453, "learning_rate": 4.2038864909684784e-06, "loss": 22.4578, "step": 6179 }, { "epoch": 0.5735498839907193, "grad_norm": 48.15593719482422, "learning_rate": 4.202371525368899e-06, "loss": 26.8239, "step": 6180 }, { "epoch": 0.5736426914153132, "grad_norm": 43.3005485534668, "learning_rate": 4.200856634903895e-06, "loss": 22.825, "step": 6181 }, { "epoch": 0.5737354988399072, "grad_norm": 37.3028564453125, "learning_rate": 4.199341819716168e-06, "loss": 23.2925, "step": 6182 }, { "epoch": 0.5738283062645012, "grad_norm": 38.455013275146484, "learning_rate": 4.1978270799484045e-06, "loss": 22.6431, "step": 6183 }, { "epoch": 0.5739211136890952, "grad_norm": 40.51151657104492, "learning_rate": 4.1963124157432915e-06, "loss": 24.4442, "step": 6184 }, { "epoch": 0.5740139211136891, "grad_norm": 43.261966705322266, "learning_rate": 4.194797827243509e-06, "loss": 22.701, "step": 6185 }, { "epoch": 0.574106728538283, "grad_norm": 42.16401290893555, "learning_rate": 4.193283314591723e-06, "loss": 23.7627, "step": 6186 }, { "epoch": 0.574199535962877, "grad_norm": 36.58956527709961, "learning_rate": 4.1917688779305994e-06, "loss": 23.9242, "step": 6187 }, { "epoch": 0.574292343387471, "grad_norm": 36.798397064208984, "learning_rate": 4.190254517402795e-06, "loss": 22.8961, "step": 6188 }, { "epoch": 0.574385150812065, "grad_norm": 37.502689361572266, "learning_rate": 4.1887402331509565e-06, "loss": 25.493, "step": 6189 }, { "epoch": 0.5744779582366589, "grad_norm": 37.192447662353516, "learning_rate": 4.187226025317726e-06, "loss": 21.8885, "step": 6190 }, { "epoch": 0.5745707656612529, "grad_norm": 39.82575607299805, "learning_rate": 4.1857118940457395e-06, "loss": 22.2329, "step": 6191 }, { "epoch": 0.5746635730858468, "grad_norm": 39.21883773803711, "learning_rate": 4.184197839477622e-06, "loss": 24.1863, "step": 6192 }, { "epoch": 0.5747563805104409, "grad_norm": 37.894187927246094, "learning_rate": 4.182683861755996e-06, "loss": 22.2417, "step": 6193 }, { "epoch": 0.5748491879350348, "grad_norm": 37.743507385253906, "learning_rate": 4.181169961023475e-06, "loss": 22.1, "step": 6194 }, { "epoch": 0.5749419953596288, "grad_norm": 41.445980072021484, "learning_rate": 4.1796561374226605e-06, "loss": 23.9044, "step": 6195 }, { "epoch": 0.5750348027842227, "grad_norm": 205.1024932861328, "learning_rate": 4.1781423910961535e-06, "loss": 22.1621, "step": 6196 }, { "epoch": 0.5751276102088168, "grad_norm": 40.58778762817383, "learning_rate": 4.176628722186548e-06, "loss": 22.1039, "step": 6197 }, { "epoch": 0.5752204176334107, "grad_norm": 41.54694747924805, "learning_rate": 4.175115130836421e-06, "loss": 22.7988, "step": 6198 }, { "epoch": 0.5753132250580046, "grad_norm": 39.86129379272461, "learning_rate": 4.1736016171883534e-06, "loss": 23.514, "step": 6199 }, { "epoch": 0.5754060324825986, "grad_norm": 41.140869140625, "learning_rate": 4.172088181384916e-06, "loss": 22.3121, "step": 6200 }, { "epoch": 0.5754988399071925, "grad_norm": 53.127708435058594, "learning_rate": 4.170574823568666e-06, "loss": 24.1396, "step": 6201 }, { "epoch": 0.5755916473317866, "grad_norm": 43.091224670410156, "learning_rate": 4.169061543882159e-06, "loss": 22.1823, "step": 6202 }, { "epoch": 0.5756844547563805, "grad_norm": 62.71452713012695, "learning_rate": 4.167548342467946e-06, "loss": 26.2886, "step": 6203 }, { "epoch": 0.5757772621809745, "grad_norm": 40.85353469848633, "learning_rate": 4.166035219468561e-06, "loss": 23.9572, "step": 6204 }, { "epoch": 0.5758700696055684, "grad_norm": 35.37824249267578, "learning_rate": 4.164522175026539e-06, "loss": 23.4166, "step": 6205 }, { "epoch": 0.5759628770301625, "grad_norm": 39.135066986083984, "learning_rate": 4.163009209284409e-06, "loss": 24.6141, "step": 6206 }, { "epoch": 0.5760556844547564, "grad_norm": 41.825233459472656, "learning_rate": 4.161496322384682e-06, "loss": 23.3613, "step": 6207 }, { "epoch": 0.5761484918793504, "grad_norm": 41.28408432006836, "learning_rate": 4.15998351446987e-06, "loss": 22.6323, "step": 6208 }, { "epoch": 0.5762412993039443, "grad_norm": 40.0423469543457, "learning_rate": 4.1584707856824785e-06, "loss": 24.781, "step": 6209 }, { "epoch": 0.5763341067285382, "grad_norm": 36.79047393798828, "learning_rate": 4.156958136164999e-06, "loss": 22.246, "step": 6210 }, { "epoch": 0.5764269141531323, "grad_norm": 36.42190933227539, "learning_rate": 4.155445566059921e-06, "loss": 24.3147, "step": 6211 }, { "epoch": 0.5765197215777262, "grad_norm": 43.62853240966797, "learning_rate": 4.153933075509727e-06, "loss": 24.2601, "step": 6212 }, { "epoch": 0.5766125290023202, "grad_norm": 40.82878112792969, "learning_rate": 4.152420664656884e-06, "loss": 22.2185, "step": 6213 }, { "epoch": 0.5767053364269141, "grad_norm": 37.998382568359375, "learning_rate": 4.150908333643862e-06, "loss": 25.4364, "step": 6214 }, { "epoch": 0.5767981438515081, "grad_norm": 36.321434020996094, "learning_rate": 4.149396082613118e-06, "loss": 23.7912, "step": 6215 }, { "epoch": 0.5768909512761021, "grad_norm": 39.548614501953125, "learning_rate": 4.1478839117071e-06, "loss": 25.9615, "step": 6216 }, { "epoch": 0.5769837587006961, "grad_norm": 39.271141052246094, "learning_rate": 4.146371821068254e-06, "loss": 22.4339, "step": 6217 }, { "epoch": 0.57707656612529, "grad_norm": 40.020320892333984, "learning_rate": 4.144859810839011e-06, "loss": 22.3816, "step": 6218 }, { "epoch": 0.577169373549884, "grad_norm": 37.51526641845703, "learning_rate": 4.143347881161804e-06, "loss": 22.2528, "step": 6219 }, { "epoch": 0.577262180974478, "grad_norm": 36.59912872314453, "learning_rate": 4.1418360321790485e-06, "loss": 22.8979, "step": 6220 }, { "epoch": 0.577354988399072, "grad_norm": 41.92799758911133, "learning_rate": 4.140324264033157e-06, "loss": 22.8614, "step": 6221 }, { "epoch": 0.5774477958236659, "grad_norm": 38.750755310058594, "learning_rate": 4.138812576866537e-06, "loss": 23.1283, "step": 6222 }, { "epoch": 0.5775406032482598, "grad_norm": 39.003395080566406, "learning_rate": 4.137300970821583e-06, "loss": 22.0185, "step": 6223 }, { "epoch": 0.5776334106728538, "grad_norm": 39.76426696777344, "learning_rate": 4.135789446040686e-06, "loss": 23.8818, "step": 6224 }, { "epoch": 0.5777262180974478, "grad_norm": 39.71774673461914, "learning_rate": 4.134278002666227e-06, "loss": 22.709, "step": 6225 }, { "epoch": 0.5778190255220418, "grad_norm": 36.4836540222168, "learning_rate": 4.13276664084058e-06, "loss": 21.9953, "step": 6226 }, { "epoch": 0.5779118329466357, "grad_norm": 40.390037536621094, "learning_rate": 4.131255360706111e-06, "loss": 23.0787, "step": 6227 }, { "epoch": 0.5780046403712297, "grad_norm": 47.63038635253906, "learning_rate": 4.12974416240518e-06, "loss": 25.3263, "step": 6228 }, { "epoch": 0.5780974477958236, "grad_norm": 48.70469665527344, "learning_rate": 4.12823304608014e-06, "loss": 25.6248, "step": 6229 }, { "epoch": 0.5781902552204177, "grad_norm": 46.079097747802734, "learning_rate": 4.126722011873328e-06, "loss": 23.9539, "step": 6230 }, { "epoch": 0.5782830626450116, "grad_norm": 40.51308822631836, "learning_rate": 4.125211059927085e-06, "loss": 22.7105, "step": 6231 }, { "epoch": 0.5783758700696056, "grad_norm": 52.907310485839844, "learning_rate": 4.123700190383739e-06, "loss": 23.5194, "step": 6232 }, { "epoch": 0.5784686774941995, "grad_norm": 40.901405334472656, "learning_rate": 4.122189403385606e-06, "loss": 24.8847, "step": 6233 }, { "epoch": 0.5785614849187936, "grad_norm": 39.95673370361328, "learning_rate": 4.120678699075001e-06, "loss": 23.8469, "step": 6234 }, { "epoch": 0.5786542923433875, "grad_norm": 40.18034744262695, "learning_rate": 4.11916807759423e-06, "loss": 24.171, "step": 6235 }, { "epoch": 0.5787470997679814, "grad_norm": 42.83407974243164, "learning_rate": 4.117657539085586e-06, "loss": 23.2111, "step": 6236 }, { "epoch": 0.5788399071925754, "grad_norm": 37.805721282958984, "learning_rate": 4.116147083691359e-06, "loss": 23.3541, "step": 6237 }, { "epoch": 0.5789327146171693, "grad_norm": 104.86310577392578, "learning_rate": 4.114636711553834e-06, "loss": 22.8958, "step": 6238 }, { "epoch": 0.5790255220417634, "grad_norm": 42.67856216430664, "learning_rate": 4.1131264228152785e-06, "loss": 24.0117, "step": 6239 }, { "epoch": 0.5791183294663573, "grad_norm": 44.946746826171875, "learning_rate": 4.11161621761796e-06, "loss": 22.6376, "step": 6240 }, { "epoch": 0.5792111368909513, "grad_norm": 39.40313720703125, "learning_rate": 4.110106096104139e-06, "loss": 23.0396, "step": 6241 }, { "epoch": 0.5793039443155452, "grad_norm": 42.730316162109375, "learning_rate": 4.10859605841606e-06, "loss": 22.9161, "step": 6242 }, { "epoch": 0.5793967517401392, "grad_norm": 43.1264533996582, "learning_rate": 4.107086104695968e-06, "loss": 21.9569, "step": 6243 }, { "epoch": 0.5794895591647332, "grad_norm": 38.69950866699219, "learning_rate": 4.105576235086097e-06, "loss": 24.2659, "step": 6244 }, { "epoch": 0.5795823665893272, "grad_norm": 37.10622787475586, "learning_rate": 4.10406644972867e-06, "loss": 23.6708, "step": 6245 }, { "epoch": 0.5796751740139211, "grad_norm": 36.88980484008789, "learning_rate": 4.10255674876591e-06, "loss": 23.9034, "step": 6246 }, { "epoch": 0.579767981438515, "grad_norm": 40.828372955322266, "learning_rate": 4.10104713234002e-06, "loss": 22.6997, "step": 6247 }, { "epoch": 0.5798607888631091, "grad_norm": 39.29230499267578, "learning_rate": 4.099537600593207e-06, "loss": 26.107, "step": 6248 }, { "epoch": 0.579953596287703, "grad_norm": 42.13066101074219, "learning_rate": 4.098028153667666e-06, "loss": 23.0735, "step": 6249 }, { "epoch": 0.580046403712297, "grad_norm": 37.53302764892578, "learning_rate": 4.096518791705579e-06, "loss": 21.5031, "step": 6250 }, { "epoch": 0.5801392111368909, "grad_norm": 34.96057891845703, "learning_rate": 4.095009514849125e-06, "loss": 22.448, "step": 6251 }, { "epoch": 0.5802320185614849, "grad_norm": 37.83822250366211, "learning_rate": 4.093500323240479e-06, "loss": 23.1156, "step": 6252 }, { "epoch": 0.5803248259860789, "grad_norm": 42.48550033569336, "learning_rate": 4.091991217021795e-06, "loss": 22.2323, "step": 6253 }, { "epoch": 0.5804176334106729, "grad_norm": 37.74830627441406, "learning_rate": 4.090482196335232e-06, "loss": 23.6946, "step": 6254 }, { "epoch": 0.5805104408352668, "grad_norm": 42.79047775268555, "learning_rate": 4.088973261322937e-06, "loss": 22.7861, "step": 6255 }, { "epoch": 0.5806032482598608, "grad_norm": 35.69216537475586, "learning_rate": 4.087464412127044e-06, "loss": 24.2561, "step": 6256 }, { "epoch": 0.5806960556844547, "grad_norm": 44.372344970703125, "learning_rate": 4.085955648889685e-06, "loss": 26.0103, "step": 6257 }, { "epoch": 0.5807888631090488, "grad_norm": 37.2547607421875, "learning_rate": 4.084446971752981e-06, "loss": 24.1682, "step": 6258 }, { "epoch": 0.5808816705336427, "grad_norm": 45.505130767822266, "learning_rate": 4.082938380859046e-06, "loss": 25.6599, "step": 6259 }, { "epoch": 0.5809744779582366, "grad_norm": 40.12705993652344, "learning_rate": 4.081429876349985e-06, "loss": 23.0105, "step": 6260 }, { "epoch": 0.5810672853828306, "grad_norm": 37.300846099853516, "learning_rate": 4.079921458367896e-06, "loss": 24.588, "step": 6261 }, { "epoch": 0.5811600928074246, "grad_norm": 38.911502838134766, "learning_rate": 4.078413127054868e-06, "loss": 23.2172, "step": 6262 }, { "epoch": 0.5812529002320186, "grad_norm": 37.10149383544922, "learning_rate": 4.076904882552981e-06, "loss": 25.0152, "step": 6263 }, { "epoch": 0.5813457076566125, "grad_norm": 36.81111145019531, "learning_rate": 4.075396725004308e-06, "loss": 23.3277, "step": 6264 }, { "epoch": 0.5814385150812065, "grad_norm": 42.10670852661133, "learning_rate": 4.073888654550914e-06, "loss": 22.1157, "step": 6265 }, { "epoch": 0.5815313225058004, "grad_norm": 41.83827209472656, "learning_rate": 4.072380671334857e-06, "loss": 23.5336, "step": 6266 }, { "epoch": 0.5816241299303945, "grad_norm": 40.75801086425781, "learning_rate": 4.070872775498183e-06, "loss": 24.4604, "step": 6267 }, { "epoch": 0.5817169373549884, "grad_norm": 39.76100540161133, "learning_rate": 4.069364967182931e-06, "loss": 23.3727, "step": 6268 }, { "epoch": 0.5818097447795824, "grad_norm": 40.531646728515625, "learning_rate": 4.067857246531136e-06, "loss": 22.16, "step": 6269 }, { "epoch": 0.5819025522041763, "grad_norm": 45.745216369628906, "learning_rate": 4.06634961368482e-06, "loss": 24.1628, "step": 6270 }, { "epoch": 0.5819953596287702, "grad_norm": 39.28800964355469, "learning_rate": 4.064842068785997e-06, "loss": 22.6869, "step": 6271 }, { "epoch": 0.5820881670533643, "grad_norm": 39.74327850341797, "learning_rate": 4.0633346119766756e-06, "loss": 23.0409, "step": 6272 }, { "epoch": 0.5821809744779582, "grad_norm": 39.86220169067383, "learning_rate": 4.061827243398853e-06, "loss": 24.9961, "step": 6273 }, { "epoch": 0.5822737819025522, "grad_norm": 43.05935287475586, "learning_rate": 4.06031996319452e-06, "loss": 24.1909, "step": 6274 }, { "epoch": 0.5823665893271461, "grad_norm": 37.956199645996094, "learning_rate": 4.058812771505658e-06, "loss": 24.1141, "step": 6275 }, { "epoch": 0.5824593967517402, "grad_norm": 37.659523010253906, "learning_rate": 4.057305668474244e-06, "loss": 23.5292, "step": 6276 }, { "epoch": 0.5825522041763341, "grad_norm": 39.4278450012207, "learning_rate": 4.0557986542422385e-06, "loss": 23.5961, "step": 6277 }, { "epoch": 0.5826450116009281, "grad_norm": 42.46863555908203, "learning_rate": 4.0542917289516e-06, "loss": 21.3177, "step": 6278 }, { "epoch": 0.582737819025522, "grad_norm": 37.79871368408203, "learning_rate": 4.0527848927442805e-06, "loss": 22.2, "step": 6279 }, { "epoch": 0.582830626450116, "grad_norm": 37.5665397644043, "learning_rate": 4.051278145762214e-06, "loss": 24.0449, "step": 6280 }, { "epoch": 0.58292343387471, "grad_norm": 39.62079620361328, "learning_rate": 4.0497714881473385e-06, "loss": 23.4399, "step": 6281 }, { "epoch": 0.583016241299304, "grad_norm": 37.422019958496094, "learning_rate": 4.048264920041571e-06, "loss": 23.7792, "step": 6282 }, { "epoch": 0.5831090487238979, "grad_norm": 37.23875045776367, "learning_rate": 4.046758441586831e-06, "loss": 24.5718, "step": 6283 }, { "epoch": 0.5832018561484918, "grad_norm": 37.35773849487305, "learning_rate": 4.045252052925025e-06, "loss": 23.2298, "step": 6284 }, { "epoch": 0.5832946635730858, "grad_norm": 37.15790939331055, "learning_rate": 4.043745754198047e-06, "loss": 22.9081, "step": 6285 }, { "epoch": 0.5833874709976798, "grad_norm": 40.19891357421875, "learning_rate": 4.0422395455477896e-06, "loss": 22.7861, "step": 6286 }, { "epoch": 0.5834802784222738, "grad_norm": 36.557865142822266, "learning_rate": 4.040733427116135e-06, "loss": 24.1507, "step": 6287 }, { "epoch": 0.5835730858468677, "grad_norm": 40.932918548583984, "learning_rate": 4.039227399044952e-06, "loss": 25.0256, "step": 6288 }, { "epoch": 0.5836658932714617, "grad_norm": 40.06391143798828, "learning_rate": 4.037721461476107e-06, "loss": 23.5333, "step": 6289 }, { "epoch": 0.5837587006960557, "grad_norm": 37.3122673034668, "learning_rate": 4.036215614551457e-06, "loss": 22.4164, "step": 6290 }, { "epoch": 0.5838515081206497, "grad_norm": 43.97357940673828, "learning_rate": 4.034709858412846e-06, "loss": 26.7243, "step": 6291 }, { "epoch": 0.5839443155452436, "grad_norm": 38.10322189331055, "learning_rate": 4.033204193202112e-06, "loss": 24.3408, "step": 6292 }, { "epoch": 0.5840371229698376, "grad_norm": 37.64550018310547, "learning_rate": 4.03169861906109e-06, "loss": 22.6745, "step": 6293 }, { "epoch": 0.5841299303944315, "grad_norm": 40.11760330200195, "learning_rate": 4.030193136131594e-06, "loss": 24.5634, "step": 6294 }, { "epoch": 0.5842227378190256, "grad_norm": 41.94294357299805, "learning_rate": 4.028687744555441e-06, "loss": 23.7342, "step": 6295 }, { "epoch": 0.5843155452436195, "grad_norm": 37.37297439575195, "learning_rate": 4.027182444474436e-06, "loss": 22.9756, "step": 6296 }, { "epoch": 0.5844083526682134, "grad_norm": 42.74245834350586, "learning_rate": 4.025677236030371e-06, "loss": 22.5312, "step": 6297 }, { "epoch": 0.5845011600928074, "grad_norm": 40.10068893432617, "learning_rate": 4.024172119365034e-06, "loss": 22.2416, "step": 6298 }, { "epoch": 0.5845939675174014, "grad_norm": 40.01991653442383, "learning_rate": 4.022667094620207e-06, "loss": 22.9979, "step": 6299 }, { "epoch": 0.5846867749419954, "grad_norm": 37.834556579589844, "learning_rate": 4.021162161937653e-06, "loss": 23.9793, "step": 6300 }, { "epoch": 0.5847795823665893, "grad_norm": 43.23537826538086, "learning_rate": 4.019657321459137e-06, "loss": 24.6153, "step": 6301 }, { "epoch": 0.5848723897911833, "grad_norm": 41.79618835449219, "learning_rate": 4.0181525733264115e-06, "loss": 23.4718, "step": 6302 }, { "epoch": 0.5849651972157772, "grad_norm": 36.99419021606445, "learning_rate": 4.016647917681217e-06, "loss": 23.681, "step": 6303 }, { "epoch": 0.5850580046403713, "grad_norm": 36.36692810058594, "learning_rate": 4.0151433546652896e-06, "loss": 23.4782, "step": 6304 }, { "epoch": 0.5851508120649652, "grad_norm": 38.1982307434082, "learning_rate": 4.013638884420357e-06, "loss": 22.8099, "step": 6305 }, { "epoch": 0.5852436194895592, "grad_norm": 37.826168060302734, "learning_rate": 4.012134507088135e-06, "loss": 23.0165, "step": 6306 }, { "epoch": 0.5853364269141531, "grad_norm": 39.3977165222168, "learning_rate": 4.010630222810331e-06, "loss": 25.2219, "step": 6307 }, { "epoch": 0.585429234338747, "grad_norm": 37.408939361572266, "learning_rate": 4.009126031728647e-06, "loss": 23.3752, "step": 6308 }, { "epoch": 0.5855220417633411, "grad_norm": 35.22312927246094, "learning_rate": 4.0076219339847734e-06, "loss": 22.138, "step": 6309 }, { "epoch": 0.585614849187935, "grad_norm": 34.99916458129883, "learning_rate": 4.00611792972039e-06, "loss": 24.4731, "step": 6310 }, { "epoch": 0.585707656612529, "grad_norm": 39.15542984008789, "learning_rate": 4.004614019077174e-06, "loss": 24.1953, "step": 6311 }, { "epoch": 0.5858004640371229, "grad_norm": 35.20135498046875, "learning_rate": 4.003110202196787e-06, "loss": 22.9275, "step": 6312 }, { "epoch": 0.585893271461717, "grad_norm": 41.74477005004883, "learning_rate": 4.001606479220887e-06, "loss": 23.9346, "step": 6313 }, { "epoch": 0.5859860788863109, "grad_norm": 38.0413932800293, "learning_rate": 4.000102850291119e-06, "loss": 23.5719, "step": 6314 }, { "epoch": 0.5860788863109049, "grad_norm": 38.688167572021484, "learning_rate": 3.998599315549122e-06, "loss": 24.4699, "step": 6315 }, { "epoch": 0.5861716937354988, "grad_norm": 38.94092559814453, "learning_rate": 3.997095875136525e-06, "loss": 21.2087, "step": 6316 }, { "epoch": 0.5862645011600928, "grad_norm": 37.270076751708984, "learning_rate": 3.995592529194948e-06, "loss": 21.8532, "step": 6317 }, { "epoch": 0.5863573085846868, "grad_norm": 38.623207092285156, "learning_rate": 3.994089277866001e-06, "loss": 23.13, "step": 6318 }, { "epoch": 0.5864501160092808, "grad_norm": 42.2636833190918, "learning_rate": 3.992586121291291e-06, "loss": 24.4454, "step": 6319 }, { "epoch": 0.5865429234338747, "grad_norm": 34.46853256225586, "learning_rate": 3.991083059612406e-06, "loss": 22.0602, "step": 6320 }, { "epoch": 0.5866357308584687, "grad_norm": 41.0443000793457, "learning_rate": 3.989580092970934e-06, "loss": 23.1981, "step": 6321 }, { "epoch": 0.5867285382830626, "grad_norm": 34.74534225463867, "learning_rate": 3.98807722150845e-06, "loss": 22.8291, "step": 6322 }, { "epoch": 0.5868213457076566, "grad_norm": 39.597572326660156, "learning_rate": 3.986574445366519e-06, "loss": 24.0046, "step": 6323 }, { "epoch": 0.5869141531322506, "grad_norm": 35.94157791137695, "learning_rate": 3.985071764686701e-06, "loss": 21.816, "step": 6324 }, { "epoch": 0.5870069605568445, "grad_norm": 34.953609466552734, "learning_rate": 3.983569179610544e-06, "loss": 24.1561, "step": 6325 }, { "epoch": 0.5870997679814385, "grad_norm": 40.992286682128906, "learning_rate": 3.982066690279588e-06, "loss": 23.2623, "step": 6326 }, { "epoch": 0.5871925754060325, "grad_norm": 45.51819610595703, "learning_rate": 3.980564296835361e-06, "loss": 23.3348, "step": 6327 }, { "epoch": 0.5872853828306265, "grad_norm": 38.94023132324219, "learning_rate": 3.97906199941939e-06, "loss": 23.3453, "step": 6328 }, { "epoch": 0.5873781902552204, "grad_norm": 37.841880798339844, "learning_rate": 3.977559798173182e-06, "loss": 23.6859, "step": 6329 }, { "epoch": 0.5874709976798144, "grad_norm": 51.64731979370117, "learning_rate": 3.976057693238243e-06, "loss": 23.1544, "step": 6330 }, { "epoch": 0.5875638051044083, "grad_norm": 56.296443939208984, "learning_rate": 3.97455568475607e-06, "loss": 22.4829, "step": 6331 }, { "epoch": 0.5876566125290024, "grad_norm": 41.71721649169922, "learning_rate": 3.973053772868143e-06, "loss": 23.4275, "step": 6332 }, { "epoch": 0.5877494199535963, "grad_norm": 39.9912109375, "learning_rate": 3.971551957715941e-06, "loss": 23.7923, "step": 6333 }, { "epoch": 0.5878422273781903, "grad_norm": 37.51467514038086, "learning_rate": 3.970050239440933e-06, "loss": 22.409, "step": 6334 }, { "epoch": 0.5879350348027842, "grad_norm": 41.96778869628906, "learning_rate": 3.968548618184575e-06, "loss": 23.2841, "step": 6335 }, { "epoch": 0.5880278422273781, "grad_norm": 35.38312530517578, "learning_rate": 3.9670470940883144e-06, "loss": 22.1557, "step": 6336 }, { "epoch": 0.5881206496519722, "grad_norm": 38.654991149902344, "learning_rate": 3.965545667293595e-06, "loss": 23.2934, "step": 6337 }, { "epoch": 0.5882134570765661, "grad_norm": 37.802894592285156, "learning_rate": 3.964044337941844e-06, "loss": 22.9038, "step": 6338 }, { "epoch": 0.5883062645011601, "grad_norm": 36.843685150146484, "learning_rate": 3.962543106174484e-06, "loss": 22.4476, "step": 6339 }, { "epoch": 0.588399071925754, "grad_norm": 35.402278900146484, "learning_rate": 3.961041972132928e-06, "loss": 23.8427, "step": 6340 }, { "epoch": 0.5884918793503481, "grad_norm": 39.362178802490234, "learning_rate": 3.959540935958577e-06, "loss": 23.7592, "step": 6341 }, { "epoch": 0.588584686774942, "grad_norm": 44.40837097167969, "learning_rate": 3.9580399977928256e-06, "loss": 23.6962, "step": 6342 }, { "epoch": 0.588677494199536, "grad_norm": 38.43400192260742, "learning_rate": 3.956539157777061e-06, "loss": 21.6447, "step": 6343 }, { "epoch": 0.5887703016241299, "grad_norm": 41.25529479980469, "learning_rate": 3.955038416052655e-06, "loss": 23.7738, "step": 6344 }, { "epoch": 0.5888631090487239, "grad_norm": 41.164146423339844, "learning_rate": 3.953537772760973e-06, "loss": 24.065, "step": 6345 }, { "epoch": 0.5889559164733179, "grad_norm": 44.199954986572266, "learning_rate": 3.952037228043377e-06, "loss": 21.9526, "step": 6346 }, { "epoch": 0.5890487238979119, "grad_norm": 45.58423614501953, "learning_rate": 3.950536782041209e-06, "loss": 22.3662, "step": 6347 }, { "epoch": 0.5891415313225058, "grad_norm": 47.93484115600586, "learning_rate": 3.94903643489581e-06, "loss": 21.987, "step": 6348 }, { "epoch": 0.5892343387470997, "grad_norm": 49.63557052612305, "learning_rate": 3.94753618674851e-06, "loss": 23.0839, "step": 6349 }, { "epoch": 0.5893271461716937, "grad_norm": 43.845245361328125, "learning_rate": 3.946036037740624e-06, "loss": 23.7705, "step": 6350 }, { "epoch": 0.5894199535962877, "grad_norm": 38.122833251953125, "learning_rate": 3.944535988013466e-06, "loss": 22.7009, "step": 6351 }, { "epoch": 0.5895127610208817, "grad_norm": 39.53253936767578, "learning_rate": 3.943036037708337e-06, "loss": 25.0715, "step": 6352 }, { "epoch": 0.5896055684454756, "grad_norm": 36.230628967285156, "learning_rate": 3.9415361869665265e-06, "loss": 23.0323, "step": 6353 }, { "epoch": 0.5896983758700696, "grad_norm": 48.76626205444336, "learning_rate": 3.940036435929318e-06, "loss": 22.883, "step": 6354 }, { "epoch": 0.5897911832946636, "grad_norm": 39.96355438232422, "learning_rate": 3.938536784737983e-06, "loss": 21.1265, "step": 6355 }, { "epoch": 0.5898839907192576, "grad_norm": 46.66938400268555, "learning_rate": 3.937037233533787e-06, "loss": 23.5647, "step": 6356 }, { "epoch": 0.5899767981438515, "grad_norm": 39.258811950683594, "learning_rate": 3.935537782457982e-06, "loss": 23.6097, "step": 6357 }, { "epoch": 0.5900696055684455, "grad_norm": 43.25603485107422, "learning_rate": 3.934038431651812e-06, "loss": 22.0783, "step": 6358 }, { "epoch": 0.5901624129930394, "grad_norm": 41.45757293701172, "learning_rate": 3.9325391812565135e-06, "loss": 22.3547, "step": 6359 }, { "epoch": 0.5902552204176335, "grad_norm": 51.50344467163086, "learning_rate": 3.931040031413313e-06, "loss": 23.0744, "step": 6360 }, { "epoch": 0.5903480278422274, "grad_norm": 42.99349594116211, "learning_rate": 3.929540982263425e-06, "loss": 21.9836, "step": 6361 }, { "epoch": 0.5904408352668213, "grad_norm": 40.16102981567383, "learning_rate": 3.928042033948054e-06, "loss": 25.2707, "step": 6362 }, { "epoch": 0.5905336426914153, "grad_norm": 41.80352020263672, "learning_rate": 3.926543186608403e-06, "loss": 24.4198, "step": 6363 }, { "epoch": 0.5906264501160092, "grad_norm": 42.06308364868164, "learning_rate": 3.925044440385653e-06, "loss": 25.0728, "step": 6364 }, { "epoch": 0.5907192575406033, "grad_norm": 45.62574768066406, "learning_rate": 3.923545795420986e-06, "loss": 22.4524, "step": 6365 }, { "epoch": 0.5908120649651972, "grad_norm": 39.879364013671875, "learning_rate": 3.922047251855572e-06, "loss": 22.5363, "step": 6366 }, { "epoch": 0.5909048723897912, "grad_norm": 35.153350830078125, "learning_rate": 3.920548809830566e-06, "loss": 23.9365, "step": 6367 }, { "epoch": 0.5909976798143851, "grad_norm": 43.306236267089844, "learning_rate": 3.919050469487119e-06, "loss": 23.8548, "step": 6368 }, { "epoch": 0.5910904872389792, "grad_norm": 43.07832717895508, "learning_rate": 3.917552230966372e-06, "loss": 24.2088, "step": 6369 }, { "epoch": 0.5911832946635731, "grad_norm": 37.06593322753906, "learning_rate": 3.916054094409453e-06, "loss": 21.8929, "step": 6370 }, { "epoch": 0.591276102088167, "grad_norm": 45.99698257446289, "learning_rate": 3.914556059957484e-06, "loss": 23.9434, "step": 6371 }, { "epoch": 0.591368909512761, "grad_norm": 41.714630126953125, "learning_rate": 3.913058127751578e-06, "loss": 22.5137, "step": 6372 }, { "epoch": 0.5914617169373549, "grad_norm": 37.43993377685547, "learning_rate": 3.9115602979328325e-06, "loss": 22.5436, "step": 6373 }, { "epoch": 0.591554524361949, "grad_norm": 39.97657775878906, "learning_rate": 3.910062570642341e-06, "loss": 23.4149, "step": 6374 }, { "epoch": 0.5916473317865429, "grad_norm": 38.2767333984375, "learning_rate": 3.908564946021188e-06, "loss": 24.0902, "step": 6375 }, { "epoch": 0.5917401392111369, "grad_norm": 45.514862060546875, "learning_rate": 3.907067424210441e-06, "loss": 24.7425, "step": 6376 }, { "epoch": 0.5918329466357308, "grad_norm": 41.10131072998047, "learning_rate": 3.905570005351166e-06, "loss": 25.3427, "step": 6377 }, { "epoch": 0.5919257540603248, "grad_norm": 40.34930419921875, "learning_rate": 3.904072689584418e-06, "loss": 24.2289, "step": 6378 }, { "epoch": 0.5920185614849188, "grad_norm": 39.59267807006836, "learning_rate": 3.902575477051235e-06, "loss": 23.465, "step": 6379 }, { "epoch": 0.5921113689095128, "grad_norm": 42.125022888183594, "learning_rate": 3.901078367892653e-06, "loss": 24.2257, "step": 6380 }, { "epoch": 0.5922041763341067, "grad_norm": 277.267578125, "learning_rate": 3.8995813622497e-06, "loss": 23.6115, "step": 6381 }, { "epoch": 0.5922969837587007, "grad_norm": 45.86399459838867, "learning_rate": 3.898084460263383e-06, "loss": 22.7785, "step": 6382 }, { "epoch": 0.5923897911832947, "grad_norm": 40.312156677246094, "learning_rate": 3.8965876620747104e-06, "loss": 22.6634, "step": 6383 }, { "epoch": 0.5924825986078887, "grad_norm": 43.07951354980469, "learning_rate": 3.895090967824678e-06, "loss": 21.2421, "step": 6384 }, { "epoch": 0.5925754060324826, "grad_norm": 41.62382507324219, "learning_rate": 3.8935943776542674e-06, "loss": 21.3091, "step": 6385 }, { "epoch": 0.5926682134570765, "grad_norm": 42.23626708984375, "learning_rate": 3.892097891704455e-06, "loss": 23.9455, "step": 6386 }, { "epoch": 0.5927610208816705, "grad_norm": 46.63014221191406, "learning_rate": 3.890601510116207e-06, "loss": 22.7596, "step": 6387 }, { "epoch": 0.5928538283062645, "grad_norm": 50.932559967041016, "learning_rate": 3.889105233030477e-06, "loss": 22.2985, "step": 6388 }, { "epoch": 0.5929466357308585, "grad_norm": 45.78632354736328, "learning_rate": 3.887609060588212e-06, "loss": 23.7735, "step": 6389 }, { "epoch": 0.5930394431554524, "grad_norm": 44.15645217895508, "learning_rate": 3.886112992930345e-06, "loss": 23.8764, "step": 6390 }, { "epoch": 0.5931322505800464, "grad_norm": 38.34895324707031, "learning_rate": 3.884617030197804e-06, "loss": 23.3925, "step": 6391 }, { "epoch": 0.5932250580046404, "grad_norm": 58.21559524536133, "learning_rate": 3.883121172531506e-06, "loss": 26.2709, "step": 6392 }, { "epoch": 0.5933178654292344, "grad_norm": 50.3444938659668, "learning_rate": 3.881625420072355e-06, "loss": 19.6588, "step": 6393 }, { "epoch": 0.5934106728538283, "grad_norm": 50.826560974121094, "learning_rate": 3.880129772961246e-06, "loss": 21.9183, "step": 6394 }, { "epoch": 0.5935034802784223, "grad_norm": 38.6623420715332, "learning_rate": 3.878634231339069e-06, "loss": 21.6781, "step": 6395 }, { "epoch": 0.5935962877030162, "grad_norm": 37.726783752441406, "learning_rate": 3.877138795346697e-06, "loss": 24.2654, "step": 6396 }, { "epoch": 0.5936890951276103, "grad_norm": 36.44832229614258, "learning_rate": 3.875643465124997e-06, "loss": 23.3254, "step": 6397 }, { "epoch": 0.5937819025522042, "grad_norm": 42.711307525634766, "learning_rate": 3.874148240814826e-06, "loss": 21.9194, "step": 6398 }, { "epoch": 0.5938747099767981, "grad_norm": 39.698814392089844, "learning_rate": 3.8726531225570285e-06, "loss": 25.2375, "step": 6399 }, { "epoch": 0.5939675174013921, "grad_norm": 35.75984573364258, "learning_rate": 3.871158110492444e-06, "loss": 22.1853, "step": 6400 }, { "epoch": 0.594060324825986, "grad_norm": 38.08603286743164, "learning_rate": 3.869663204761896e-06, "loss": 23.9719, "step": 6401 }, { "epoch": 0.5941531322505801, "grad_norm": 40.389591217041016, "learning_rate": 3.868168405506202e-06, "loss": 21.8461, "step": 6402 }, { "epoch": 0.594245939675174, "grad_norm": 39.11022186279297, "learning_rate": 3.866673712866169e-06, "loss": 22.9742, "step": 6403 }, { "epoch": 0.594338747099768, "grad_norm": 42.517662048339844, "learning_rate": 3.865179126982592e-06, "loss": 21.9598, "step": 6404 }, { "epoch": 0.5944315545243619, "grad_norm": 41.09169387817383, "learning_rate": 3.863684647996257e-06, "loss": 21.63, "step": 6405 }, { "epoch": 0.594524361948956, "grad_norm": 40.1796989440918, "learning_rate": 3.86219027604794e-06, "loss": 23.6894, "step": 6406 }, { "epoch": 0.5946171693735499, "grad_norm": 35.271602630615234, "learning_rate": 3.860696011278411e-06, "loss": 20.4646, "step": 6407 }, { "epoch": 0.5947099767981439, "grad_norm": 36.80085372924805, "learning_rate": 3.85920185382842e-06, "loss": 23.0976, "step": 6408 }, { "epoch": 0.5948027842227378, "grad_norm": 40.3100471496582, "learning_rate": 3.857707803838716e-06, "loss": 23.7586, "step": 6409 }, { "epoch": 0.5948955916473317, "grad_norm": 43.21480941772461, "learning_rate": 3.856213861450035e-06, "loss": 22.5343, "step": 6410 }, { "epoch": 0.5949883990719258, "grad_norm": 36.12907409667969, "learning_rate": 3.854720026803102e-06, "loss": 22.4605, "step": 6411 }, { "epoch": 0.5950812064965197, "grad_norm": 35.57432174682617, "learning_rate": 3.8532263000386305e-06, "loss": 23.4434, "step": 6412 }, { "epoch": 0.5951740139211137, "grad_norm": 41.93872833251953, "learning_rate": 3.85173268129733e-06, "loss": 23.9363, "step": 6413 }, { "epoch": 0.5952668213457076, "grad_norm": 36.13595962524414, "learning_rate": 3.850239170719891e-06, "loss": 21.3414, "step": 6414 }, { "epoch": 0.5953596287703016, "grad_norm": 40.441741943359375, "learning_rate": 3.8487457684470006e-06, "loss": 24.331, "step": 6415 }, { "epoch": 0.5954524361948956, "grad_norm": 38.848846435546875, "learning_rate": 3.847252474619336e-06, "loss": 23.0117, "step": 6416 }, { "epoch": 0.5955452436194896, "grad_norm": 41.112815856933594, "learning_rate": 3.845759289377556e-06, "loss": 23.3019, "step": 6417 }, { "epoch": 0.5956380510440835, "grad_norm": 37.06907272338867, "learning_rate": 3.844266212862319e-06, "loss": 22.9446, "step": 6418 }, { "epoch": 0.5957308584686775, "grad_norm": 39.19887924194336, "learning_rate": 3.842773245214269e-06, "loss": 23.7431, "step": 6419 }, { "epoch": 0.5958236658932715, "grad_norm": 36.10010528564453, "learning_rate": 3.841280386574037e-06, "loss": 22.1238, "step": 6420 }, { "epoch": 0.5959164733178655, "grad_norm": 43.71247863769531, "learning_rate": 3.839787637082249e-06, "loss": 24.564, "step": 6421 }, { "epoch": 0.5960092807424594, "grad_norm": 40.8269157409668, "learning_rate": 3.8382949968795194e-06, "loss": 22.8697, "step": 6422 }, { "epoch": 0.5961020881670533, "grad_norm": 39.05074691772461, "learning_rate": 3.836802466106446e-06, "loss": 23.8574, "step": 6423 }, { "epoch": 0.5961948955916473, "grad_norm": 40.18401336669922, "learning_rate": 3.835310044903627e-06, "loss": 25.6904, "step": 6424 }, { "epoch": 0.5962877030162413, "grad_norm": 41.15666961669922, "learning_rate": 3.833817733411641e-06, "loss": 22.339, "step": 6425 }, { "epoch": 0.5963805104408353, "grad_norm": 40.503726959228516, "learning_rate": 3.832325531771061e-06, "loss": 23.4082, "step": 6426 }, { "epoch": 0.5964733178654292, "grad_norm": 49.14437484741211, "learning_rate": 3.8308334401224505e-06, "loss": 24.8475, "step": 6427 }, { "epoch": 0.5965661252900232, "grad_norm": 40.34683609008789, "learning_rate": 3.829341458606356e-06, "loss": 21.5458, "step": 6428 }, { "epoch": 0.5966589327146171, "grad_norm": 39.13234329223633, "learning_rate": 3.827849587363323e-06, "loss": 23.9108, "step": 6429 }, { "epoch": 0.5967517401392112, "grad_norm": 49.959739685058594, "learning_rate": 3.826357826533882e-06, "loss": 23.1479, "step": 6430 }, { "epoch": 0.5968445475638051, "grad_norm": 41.28201675415039, "learning_rate": 3.824866176258549e-06, "loss": 21.6397, "step": 6431 }, { "epoch": 0.5969373549883991, "grad_norm": 36.75255584716797, "learning_rate": 3.823374636677837e-06, "loss": 23.9191, "step": 6432 }, { "epoch": 0.597030162412993, "grad_norm": 40.58470153808594, "learning_rate": 3.821883207932246e-06, "loss": 23.5404, "step": 6433 }, { "epoch": 0.5971229698375871, "grad_norm": 44.851417541503906, "learning_rate": 3.820391890162261e-06, "loss": 24.4026, "step": 6434 }, { "epoch": 0.597215777262181, "grad_norm": 38.08666229248047, "learning_rate": 3.818900683508362e-06, "loss": 22.8011, "step": 6435 }, { "epoch": 0.597308584686775, "grad_norm": 36.6661491394043, "learning_rate": 3.8174095881110195e-06, "loss": 25.5646, "step": 6436 }, { "epoch": 0.5974013921113689, "grad_norm": 38.318023681640625, "learning_rate": 3.815918604110687e-06, "loss": 22.4193, "step": 6437 }, { "epoch": 0.5974941995359628, "grad_norm": 42.80752182006836, "learning_rate": 3.8144277316478135e-06, "loss": 24.3434, "step": 6438 }, { "epoch": 0.5975870069605569, "grad_norm": 38.570167541503906, "learning_rate": 3.812936970862836e-06, "loss": 24.1854, "step": 6439 }, { "epoch": 0.5976798143851508, "grad_norm": 39.223323822021484, "learning_rate": 3.8114463218961793e-06, "loss": 22.6653, "step": 6440 }, { "epoch": 0.5977726218097448, "grad_norm": 38.12811279296875, "learning_rate": 3.809955784888258e-06, "loss": 22.0216, "step": 6441 }, { "epoch": 0.5978654292343387, "grad_norm": 39.79069137573242, "learning_rate": 3.8084653599794787e-06, "loss": 23.7936, "step": 6442 }, { "epoch": 0.5979582366589327, "grad_norm": 36.053043365478516, "learning_rate": 3.806975047310235e-06, "loss": 23.655, "step": 6443 }, { "epoch": 0.5980510440835267, "grad_norm": 35.05317306518555, "learning_rate": 3.8054848470209094e-06, "loss": 22.2294, "step": 6444 }, { "epoch": 0.5981438515081207, "grad_norm": 40.68680191040039, "learning_rate": 3.8039947592518768e-06, "loss": 21.6534, "step": 6445 }, { "epoch": 0.5982366589327146, "grad_norm": 40.81818771362305, "learning_rate": 3.802504784143497e-06, "loss": 22.7151, "step": 6446 }, { "epoch": 0.5983294663573085, "grad_norm": 35.61890411376953, "learning_rate": 3.801014921836125e-06, "loss": 21.6559, "step": 6447 }, { "epoch": 0.5984222737819026, "grad_norm": 37.061161041259766, "learning_rate": 3.7995251724701016e-06, "loss": 24.0349, "step": 6448 }, { "epoch": 0.5985150812064965, "grad_norm": 39.864585876464844, "learning_rate": 3.798035536185755e-06, "loss": 23.0942, "step": 6449 }, { "epoch": 0.5986078886310905, "grad_norm": 35.560325622558594, "learning_rate": 3.796546013123407e-06, "loss": 23.4891, "step": 6450 }, { "epoch": 0.5987006960556844, "grad_norm": 38.01163864135742, "learning_rate": 3.795056603423367e-06, "loss": 23.4811, "step": 6451 }, { "epoch": 0.5987935034802784, "grad_norm": 35.869998931884766, "learning_rate": 3.7935673072259326e-06, "loss": 22.7462, "step": 6452 }, { "epoch": 0.5988863109048724, "grad_norm": 41.30424880981445, "learning_rate": 3.7920781246713917e-06, "loss": 23.8875, "step": 6453 }, { "epoch": 0.5989791183294664, "grad_norm": 37.80328369140625, "learning_rate": 3.790589055900024e-06, "loss": 23.2024, "step": 6454 }, { "epoch": 0.5990719257540603, "grad_norm": 36.99026107788086, "learning_rate": 3.7891001010520922e-06, "loss": 21.5482, "step": 6455 }, { "epoch": 0.5991647331786543, "grad_norm": 37.866539001464844, "learning_rate": 3.7876112602678544e-06, "loss": 22.4327, "step": 6456 }, { "epoch": 0.5992575406032482, "grad_norm": 42.84993362426758, "learning_rate": 3.786122533687557e-06, "loss": 20.8899, "step": 6457 }, { "epoch": 0.5993503480278423, "grad_norm": 36.34394073486328, "learning_rate": 3.78463392145143e-06, "loss": 22.7768, "step": 6458 }, { "epoch": 0.5994431554524362, "grad_norm": 39.310245513916016, "learning_rate": 3.7831454236997013e-06, "loss": 21.8866, "step": 6459 }, { "epoch": 0.5995359628770301, "grad_norm": 37.569435119628906, "learning_rate": 3.78165704057258e-06, "loss": 25.1293, "step": 6460 }, { "epoch": 0.5996287703016241, "grad_norm": 39.64424133300781, "learning_rate": 3.780168772210269e-06, "loss": 21.9874, "step": 6461 }, { "epoch": 0.5997215777262181, "grad_norm": 37.1533317565918, "learning_rate": 3.778680618752963e-06, "loss": 23.17, "step": 6462 }, { "epoch": 0.5998143851508121, "grad_norm": 37.07710647583008, "learning_rate": 3.7771925803408365e-06, "loss": 23.5541, "step": 6463 }, { "epoch": 0.599907192575406, "grad_norm": 39.18870544433594, "learning_rate": 3.7757046571140623e-06, "loss": 23.6466, "step": 6464 }, { "epoch": 0.6, "grad_norm": 39.96692657470703, "learning_rate": 3.7742168492127995e-06, "loss": 21.161, "step": 6465 }, { "epoch": 0.6000928074245939, "grad_norm": 35.18462371826172, "learning_rate": 3.7727291567771936e-06, "loss": 21.5052, "step": 6466 }, { "epoch": 0.600185614849188, "grad_norm": 42.42380905151367, "learning_rate": 3.7712415799473824e-06, "loss": 22.5669, "step": 6467 }, { "epoch": 0.6002784222737819, "grad_norm": 33.42573165893555, "learning_rate": 3.7697541188634934e-06, "loss": 22.892, "step": 6468 }, { "epoch": 0.6003712296983759, "grad_norm": 44.512611389160156, "learning_rate": 3.768266773665639e-06, "loss": 22.6246, "step": 6469 }, { "epoch": 0.6004640371229698, "grad_norm": 41.19371032714844, "learning_rate": 3.7667795444939244e-06, "loss": 22.3527, "step": 6470 }, { "epoch": 0.6005568445475638, "grad_norm": 62.53020477294922, "learning_rate": 3.7652924314884453e-06, "loss": 23.176, "step": 6471 }, { "epoch": 0.6006496519721578, "grad_norm": 38.89998245239258, "learning_rate": 3.7638054347892796e-06, "loss": 22.0003, "step": 6472 }, { "epoch": 0.6007424593967517, "grad_norm": 44.32961654663086, "learning_rate": 3.7623185545365002e-06, "loss": 24.1895, "step": 6473 }, { "epoch": 0.6008352668213457, "grad_norm": 42.38055419921875, "learning_rate": 3.760831790870171e-06, "loss": 23.8974, "step": 6474 }, { "epoch": 0.6009280742459396, "grad_norm": 37.523719787597656, "learning_rate": 3.7593451439303354e-06, "loss": 22.9807, "step": 6475 }, { "epoch": 0.6010208816705337, "grad_norm": 50.98272705078125, "learning_rate": 3.757858613857035e-06, "loss": 24.2747, "step": 6476 }, { "epoch": 0.6011136890951276, "grad_norm": 39.02485656738281, "learning_rate": 3.7563722007902988e-06, "loss": 21.5353, "step": 6477 }, { "epoch": 0.6012064965197216, "grad_norm": 40.72894287109375, "learning_rate": 3.754885904870139e-06, "loss": 23.836, "step": 6478 }, { "epoch": 0.6012993039443155, "grad_norm": 49.72025680541992, "learning_rate": 3.7533997262365627e-06, "loss": 23.3533, "step": 6479 }, { "epoch": 0.6013921113689095, "grad_norm": 45.665828704833984, "learning_rate": 3.7519136650295673e-06, "loss": 23.8146, "step": 6480 }, { "epoch": 0.6014849187935035, "grad_norm": 50.362876892089844, "learning_rate": 3.7504277213891314e-06, "loss": 23.6194, "step": 6481 }, { "epoch": 0.6015777262180975, "grad_norm": 40.40447235107422, "learning_rate": 3.748941895455228e-06, "loss": 23.9773, "step": 6482 }, { "epoch": 0.6016705336426914, "grad_norm": 40.927223205566406, "learning_rate": 3.7474561873678216e-06, "loss": 22.7306, "step": 6483 }, { "epoch": 0.6017633410672854, "grad_norm": 44.25010299682617, "learning_rate": 3.745970597266858e-06, "loss": 25.4009, "step": 6484 }, { "epoch": 0.6018561484918794, "grad_norm": 38.6390266418457, "learning_rate": 3.7444851252922772e-06, "loss": 22.673, "step": 6485 }, { "epoch": 0.6019489559164733, "grad_norm": 39.92576599121094, "learning_rate": 3.742999771584008e-06, "loss": 23.7747, "step": 6486 }, { "epoch": 0.6020417633410673, "grad_norm": 39.22288513183594, "learning_rate": 3.7415145362819666e-06, "loss": 25.1292, "step": 6487 }, { "epoch": 0.6021345707656612, "grad_norm": 38.11808395385742, "learning_rate": 3.740029419526057e-06, "loss": 23.7761, "step": 6488 }, { "epoch": 0.6022273781902552, "grad_norm": 39.59920120239258, "learning_rate": 3.738544421456175e-06, "loss": 22.1126, "step": 6489 }, { "epoch": 0.6023201856148492, "grad_norm": 39.157840728759766, "learning_rate": 3.7370595422122035e-06, "loss": 23.3476, "step": 6490 }, { "epoch": 0.6024129930394432, "grad_norm": 42.3441276550293, "learning_rate": 3.735574781934013e-06, "loss": 23.7225, "step": 6491 }, { "epoch": 0.6025058004640371, "grad_norm": 44.03364181518555, "learning_rate": 3.734090140761466e-06, "loss": 22.0905, "step": 6492 }, { "epoch": 0.6025986078886311, "grad_norm": 37.80263137817383, "learning_rate": 3.7326056188344095e-06, "loss": 22.9321, "step": 6493 }, { "epoch": 0.602691415313225, "grad_norm": 38.438846588134766, "learning_rate": 3.731121216292686e-06, "loss": 22.4524, "step": 6494 }, { "epoch": 0.6027842227378191, "grad_norm": 42.681358337402344, "learning_rate": 3.7296369332761174e-06, "loss": 21.4813, "step": 6495 }, { "epoch": 0.602877030162413, "grad_norm": 46.312400817871094, "learning_rate": 3.728152769924521e-06, "loss": 24.9125, "step": 6496 }, { "epoch": 0.602969837587007, "grad_norm": 41.064971923828125, "learning_rate": 3.726668726377705e-06, "loss": 22.8801, "step": 6497 }, { "epoch": 0.6030626450116009, "grad_norm": 36.45918273925781, "learning_rate": 3.7251848027754566e-06, "loss": 21.7293, "step": 6498 }, { "epoch": 0.603155452436195, "grad_norm": 36.976646423339844, "learning_rate": 3.7237009992575602e-06, "loss": 22.7754, "step": 6499 }, { "epoch": 0.6032482598607889, "grad_norm": 40.33970260620117, "learning_rate": 3.722217315963789e-06, "loss": 22.8193, "step": 6500 }, { "epoch": 0.6033410672853828, "grad_norm": 39.54852294921875, "learning_rate": 3.7207337530338976e-06, "loss": 22.2712, "step": 6501 }, { "epoch": 0.6034338747099768, "grad_norm": 42.01256561279297, "learning_rate": 3.719250310607636e-06, "loss": 24.8569, "step": 6502 }, { "epoch": 0.6035266821345707, "grad_norm": 38.61962890625, "learning_rate": 3.7177669888247423e-06, "loss": 22.9992, "step": 6503 }, { "epoch": 0.6036194895591648, "grad_norm": 42.77480697631836, "learning_rate": 3.716283787824939e-06, "loss": 23.7373, "step": 6504 }, { "epoch": 0.6037122969837587, "grad_norm": 42.31361770629883, "learning_rate": 3.7148007077479404e-06, "loss": 23.7393, "step": 6505 }, { "epoch": 0.6038051044083527, "grad_norm": 45.539337158203125, "learning_rate": 3.7133177487334514e-06, "loss": 23.9911, "step": 6506 }, { "epoch": 0.6038979118329466, "grad_norm": 41.99966049194336, "learning_rate": 3.7118349109211593e-06, "loss": 22.018, "step": 6507 }, { "epoch": 0.6039907192575406, "grad_norm": 43.186100006103516, "learning_rate": 3.710352194450745e-06, "loss": 22.5294, "step": 6508 }, { "epoch": 0.6040835266821346, "grad_norm": 40.118446350097656, "learning_rate": 3.7088695994618783e-06, "loss": 22.1581, "step": 6509 }, { "epoch": 0.6041763341067286, "grad_norm": 37.31013870239258, "learning_rate": 3.707387126094213e-06, "loss": 21.4694, "step": 6510 }, { "epoch": 0.6042691415313225, "grad_norm": 45.926788330078125, "learning_rate": 3.705904774487396e-06, "loss": 23.8318, "step": 6511 }, { "epoch": 0.6043619489559164, "grad_norm": 40.118648529052734, "learning_rate": 3.704422544781063e-06, "loss": 24.1663, "step": 6512 }, { "epoch": 0.6044547563805105, "grad_norm": 37.373512268066406, "learning_rate": 3.702940437114832e-06, "loss": 23.5154, "step": 6513 }, { "epoch": 0.6045475638051044, "grad_norm": 36.4521598815918, "learning_rate": 3.7014584516283157e-06, "loss": 23.0124, "step": 6514 }, { "epoch": 0.6046403712296984, "grad_norm": 40.170005798339844, "learning_rate": 3.699976588461116e-06, "loss": 23.574, "step": 6515 }, { "epoch": 0.6047331786542923, "grad_norm": 38.49673080444336, "learning_rate": 3.698494847752816e-06, "loss": 23.7441, "step": 6516 }, { "epoch": 0.6048259860788863, "grad_norm": 42.68400573730469, "learning_rate": 3.697013229642994e-06, "loss": 21.8825, "step": 6517 }, { "epoch": 0.6049187935034803, "grad_norm": 41.595947265625, "learning_rate": 3.695531734271217e-06, "loss": 24.2472, "step": 6518 }, { "epoch": 0.6050116009280743, "grad_norm": 47.15005111694336, "learning_rate": 3.694050361777034e-06, "loss": 25.3133, "step": 6519 }, { "epoch": 0.6051044083526682, "grad_norm": 41.71384811401367, "learning_rate": 3.6925691122999884e-06, "loss": 23.4141, "step": 6520 }, { "epoch": 0.6051972157772622, "grad_norm": 40.358009338378906, "learning_rate": 3.6910879859796123e-06, "loss": 21.7546, "step": 6521 }, { "epoch": 0.6052900232018561, "grad_norm": 42.744850158691406, "learning_rate": 3.6896069829554205e-06, "loss": 23.9124, "step": 6522 }, { "epoch": 0.6053828306264502, "grad_norm": 43.248958587646484, "learning_rate": 3.68812610336692e-06, "loss": 23.2575, "step": 6523 }, { "epoch": 0.6054756380510441, "grad_norm": 40.25508117675781, "learning_rate": 3.6866453473536094e-06, "loss": 24.455, "step": 6524 }, { "epoch": 0.605568445475638, "grad_norm": 41.2027587890625, "learning_rate": 3.685164715054968e-06, "loss": 24.3406, "step": 6525 }, { "epoch": 0.605661252900232, "grad_norm": 34.4127082824707, "learning_rate": 3.6836842066104694e-06, "loss": 22.8848, "step": 6526 }, { "epoch": 0.605754060324826, "grad_norm": 44.30509948730469, "learning_rate": 3.6822038221595757e-06, "loss": 23.9189, "step": 6527 }, { "epoch": 0.60584686774942, "grad_norm": 41.420223236083984, "learning_rate": 3.6807235618417314e-06, "loss": 25.3129, "step": 6528 }, { "epoch": 0.6059396751740139, "grad_norm": 40.43235778808594, "learning_rate": 3.679243425796376e-06, "loss": 23.9473, "step": 6529 }, { "epoch": 0.6060324825986079, "grad_norm": 36.97633361816406, "learning_rate": 3.677763414162934e-06, "loss": 23.1745, "step": 6530 }, { "epoch": 0.6061252900232018, "grad_norm": 38.05999755859375, "learning_rate": 3.6762835270808174e-06, "loss": 20.3617, "step": 6531 }, { "epoch": 0.6062180974477959, "grad_norm": 54.36824417114258, "learning_rate": 3.67480376468943e-06, "loss": 25.9512, "step": 6532 }, { "epoch": 0.6063109048723898, "grad_norm": 40.90085983276367, "learning_rate": 3.6733241271281595e-06, "loss": 22.581, "step": 6533 }, { "epoch": 0.6064037122969838, "grad_norm": 40.12479019165039, "learning_rate": 3.6718446145363857e-06, "loss": 22.3716, "step": 6534 }, { "epoch": 0.6064965197215777, "grad_norm": 45.0186767578125, "learning_rate": 3.670365227053475e-06, "loss": 24.4149, "step": 6535 }, { "epoch": 0.6065893271461716, "grad_norm": 44.26399230957031, "learning_rate": 3.66888596481878e-06, "loss": 22.8801, "step": 6536 }, { "epoch": 0.6066821345707657, "grad_norm": 42.37975311279297, "learning_rate": 3.6674068279716456e-06, "loss": 24.0323, "step": 6537 }, { "epoch": 0.6067749419953596, "grad_norm": 35.252872467041016, "learning_rate": 3.665927816651402e-06, "loss": 24.0783, "step": 6538 }, { "epoch": 0.6068677494199536, "grad_norm": 42.60462951660156, "learning_rate": 3.6644489309973667e-06, "loss": 23.1404, "step": 6539 }, { "epoch": 0.6069605568445475, "grad_norm": 38.965492248535156, "learning_rate": 3.6629701711488485e-06, "loss": 25.1653, "step": 6540 }, { "epoch": 0.6070533642691416, "grad_norm": 37.65920639038086, "learning_rate": 3.6614915372451454e-06, "loss": 26.4336, "step": 6541 }, { "epoch": 0.6071461716937355, "grad_norm": 38.387760162353516, "learning_rate": 3.660013029425535e-06, "loss": 24.6402, "step": 6542 }, { "epoch": 0.6072389791183295, "grad_norm": 38.75569534301758, "learning_rate": 3.6585346478292937e-06, "loss": 22.6192, "step": 6543 }, { "epoch": 0.6073317865429234, "grad_norm": 36.15101623535156, "learning_rate": 3.6570563925956813e-06, "loss": 22.8716, "step": 6544 }, { "epoch": 0.6074245939675174, "grad_norm": 38.33513641357422, "learning_rate": 3.6555782638639425e-06, "loss": 22.4513, "step": 6545 }, { "epoch": 0.6075174013921114, "grad_norm": 41.401371002197266, "learning_rate": 3.6541002617733147e-06, "loss": 25.7123, "step": 6546 }, { "epoch": 0.6076102088167054, "grad_norm": 39.13121795654297, "learning_rate": 3.652622386463024e-06, "loss": 22.3551, "step": 6547 }, { "epoch": 0.6077030162412993, "grad_norm": 36.93301773071289, "learning_rate": 3.6511446380722794e-06, "loss": 23.7341, "step": 6548 }, { "epoch": 0.6077958236658932, "grad_norm": 35.196067810058594, "learning_rate": 3.6496670167402816e-06, "loss": 22.4907, "step": 6549 }, { "epoch": 0.6078886310904872, "grad_norm": 36.20689010620117, "learning_rate": 3.6481895226062223e-06, "loss": 24.9525, "step": 6550 }, { "epoch": 0.6079814385150812, "grad_norm": 40.067012786865234, "learning_rate": 3.646712155809272e-06, "loss": 23.3397, "step": 6551 }, { "epoch": 0.6080742459396752, "grad_norm": 45.74531173706055, "learning_rate": 3.645234916488599e-06, "loss": 24.6241, "step": 6552 }, { "epoch": 0.6081670533642691, "grad_norm": 34.93434143066406, "learning_rate": 3.6437578047833555e-06, "loss": 21.9844, "step": 6553 }, { "epoch": 0.6082598607888631, "grad_norm": 34.68500900268555, "learning_rate": 3.6422808208326793e-06, "loss": 22.2305, "step": 6554 }, { "epoch": 0.6083526682134571, "grad_norm": 40.131046295166016, "learning_rate": 3.640803964775699e-06, "loss": 22.952, "step": 6555 }, { "epoch": 0.6084454756380511, "grad_norm": 43.39884948730469, "learning_rate": 3.6393272367515344e-06, "loss": 23.901, "step": 6556 }, { "epoch": 0.608538283062645, "grad_norm": 39.69865036010742, "learning_rate": 3.637850636899284e-06, "loss": 22.9713, "step": 6557 }, { "epoch": 0.608631090487239, "grad_norm": 35.509029388427734, "learning_rate": 3.636374165358042e-06, "loss": 23.417, "step": 6558 }, { "epoch": 0.6087238979118329, "grad_norm": 43.24834442138672, "learning_rate": 3.634897822266891e-06, "loss": 23.7068, "step": 6559 }, { "epoch": 0.608816705336427, "grad_norm": 35.7794189453125, "learning_rate": 3.6334216077648944e-06, "loss": 22.0123, "step": 6560 }, { "epoch": 0.6089095127610209, "grad_norm": 38.6114616394043, "learning_rate": 3.631945521991109e-06, "loss": 24.3162, "step": 6561 }, { "epoch": 0.6090023201856148, "grad_norm": 39.245906829833984, "learning_rate": 3.6304695650845824e-06, "loss": 22.6245, "step": 6562 }, { "epoch": 0.6090951276102088, "grad_norm": 37.437747955322266, "learning_rate": 3.6289937371843397e-06, "loss": 24.051, "step": 6563 }, { "epoch": 0.6091879350348027, "grad_norm": 42.83933639526367, "learning_rate": 3.6275180384294033e-06, "loss": 24.9976, "step": 6564 }, { "epoch": 0.6092807424593968, "grad_norm": 40.38002395629883, "learning_rate": 3.6260424689587824e-06, "loss": 22.521, "step": 6565 }, { "epoch": 0.6093735498839907, "grad_norm": 36.82134246826172, "learning_rate": 3.6245670289114674e-06, "loss": 23.7145, "step": 6566 }, { "epoch": 0.6094663573085847, "grad_norm": 45.00808334350586, "learning_rate": 3.623091718426446e-06, "loss": 22.895, "step": 6567 }, { "epoch": 0.6095591647331786, "grad_norm": 38.34762191772461, "learning_rate": 3.6216165376426836e-06, "loss": 24.0609, "step": 6568 }, { "epoch": 0.6096519721577727, "grad_norm": 36.853904724121094, "learning_rate": 3.6201414866991404e-06, "loss": 22.5611, "step": 6569 }, { "epoch": 0.6097447795823666, "grad_norm": 43.9279899597168, "learning_rate": 3.618666565734764e-06, "loss": 23.5422, "step": 6570 }, { "epoch": 0.6098375870069606, "grad_norm": 46.916046142578125, "learning_rate": 3.617191774888488e-06, "loss": 22.1807, "step": 6571 }, { "epoch": 0.6099303944315545, "grad_norm": 37.063743591308594, "learning_rate": 3.615717114299231e-06, "loss": 23.5635, "step": 6572 }, { "epoch": 0.6100232018561484, "grad_norm": 38.70698165893555, "learning_rate": 3.6142425841059053e-06, "loss": 24.9042, "step": 6573 }, { "epoch": 0.6101160092807425, "grad_norm": 40.23933410644531, "learning_rate": 3.612768184447407e-06, "loss": 21.1122, "step": 6574 }, { "epoch": 0.6102088167053364, "grad_norm": 36.605804443359375, "learning_rate": 3.6112939154626203e-06, "loss": 22.7558, "step": 6575 }, { "epoch": 0.6103016241299304, "grad_norm": 36.05394744873047, "learning_rate": 3.609819777290418e-06, "loss": 21.0247, "step": 6576 }, { "epoch": 0.6103944315545243, "grad_norm": 42.36701583862305, "learning_rate": 3.6083457700696613e-06, "loss": 23.9551, "step": 6577 }, { "epoch": 0.6104872389791183, "grad_norm": 38.52751541137695, "learning_rate": 3.606871893939195e-06, "loss": 22.9676, "step": 6578 }, { "epoch": 0.6105800464037123, "grad_norm": 37.6234130859375, "learning_rate": 3.6053981490378575e-06, "loss": 23.4123, "step": 6579 }, { "epoch": 0.6106728538283063, "grad_norm": 39.11539077758789, "learning_rate": 3.6039245355044696e-06, "loss": 21.7205, "step": 6580 }, { "epoch": 0.6107656612529002, "grad_norm": 37.65734100341797, "learning_rate": 3.602451053477844e-06, "loss": 23.2016, "step": 6581 }, { "epoch": 0.6108584686774942, "grad_norm": 44.089664459228516, "learning_rate": 3.6009777030967778e-06, "loss": 22.7806, "step": 6582 }, { "epoch": 0.6109512761020882, "grad_norm": 37.9246826171875, "learning_rate": 3.599504484500056e-06, "loss": 25.0254, "step": 6583 }, { "epoch": 0.6110440835266822, "grad_norm": 38.299190521240234, "learning_rate": 3.5980313978264547e-06, "loss": 23.4913, "step": 6584 }, { "epoch": 0.6111368909512761, "grad_norm": 41.67274475097656, "learning_rate": 3.5965584432147327e-06, "loss": 23.9643, "step": 6585 }, { "epoch": 0.61122969837587, "grad_norm": 36.691341400146484, "learning_rate": 3.595085620803638e-06, "loss": 24.1419, "step": 6586 }, { "epoch": 0.611322505800464, "grad_norm": 42.327842712402344, "learning_rate": 3.593612930731908e-06, "loss": 24.4366, "step": 6587 }, { "epoch": 0.611415313225058, "grad_norm": 36.402191162109375, "learning_rate": 3.5921403731382685e-06, "loss": 22.4212, "step": 6588 }, { "epoch": 0.611508120649652, "grad_norm": 37.511940002441406, "learning_rate": 3.590667948161426e-06, "loss": 22.1903, "step": 6589 }, { "epoch": 0.6116009280742459, "grad_norm": 39.72782897949219, "learning_rate": 3.589195655940082e-06, "loss": 22.8512, "step": 6590 }, { "epoch": 0.6116937354988399, "grad_norm": 37.32842254638672, "learning_rate": 3.5877234966129237e-06, "loss": 21.4928, "step": 6591 }, { "epoch": 0.6117865429234339, "grad_norm": 44.12765121459961, "learning_rate": 3.5862514703186222e-06, "loss": 22.188, "step": 6592 }, { "epoch": 0.6118793503480279, "grad_norm": 40.525394439697266, "learning_rate": 3.5847795771958394e-06, "loss": 22.6629, "step": 6593 }, { "epoch": 0.6119721577726218, "grad_norm": 35.24454879760742, "learning_rate": 3.583307817383226e-06, "loss": 21.8927, "step": 6594 }, { "epoch": 0.6120649651972158, "grad_norm": 39.06524658203125, "learning_rate": 3.581836191019415e-06, "loss": 23.0227, "step": 6595 }, { "epoch": 0.6121577726218097, "grad_norm": 37.84872055053711, "learning_rate": 3.580364698243031e-06, "loss": 22.868, "step": 6596 }, { "epoch": 0.6122505800464038, "grad_norm": 38.73262405395508, "learning_rate": 3.5788933391926863e-06, "loss": 24.3993, "step": 6597 }, { "epoch": 0.6123433874709977, "grad_norm": 42.78839111328125, "learning_rate": 3.5774221140069764e-06, "loss": 23.305, "step": 6598 }, { "epoch": 0.6124361948955916, "grad_norm": 39.12778854370117, "learning_rate": 3.575951022824489e-06, "loss": 23.9206, "step": 6599 }, { "epoch": 0.6125290023201856, "grad_norm": 39.799556732177734, "learning_rate": 3.5744800657837984e-06, "loss": 23.3976, "step": 6600 }, { "epoch": 0.6126218097447795, "grad_norm": 149.15945434570312, "learning_rate": 3.573009243023461e-06, "loss": 22.5973, "step": 6601 }, { "epoch": 0.6127146171693736, "grad_norm": 41.3575325012207, "learning_rate": 3.571538554682029e-06, "loss": 23.1913, "step": 6602 }, { "epoch": 0.6128074245939675, "grad_norm": 41.32068634033203, "learning_rate": 3.5700680008980335e-06, "loss": 23.5532, "step": 6603 }, { "epoch": 0.6129002320185615, "grad_norm": 40.04293441772461, "learning_rate": 3.568597581809999e-06, "loss": 23.4484, "step": 6604 }, { "epoch": 0.6129930394431554, "grad_norm": 42.68186569213867, "learning_rate": 3.567127297556436e-06, "loss": 23.2602, "step": 6605 }, { "epoch": 0.6130858468677495, "grad_norm": 44.00777816772461, "learning_rate": 3.56565714827584e-06, "loss": 22.2939, "step": 6606 }, { "epoch": 0.6131786542923434, "grad_norm": 43.135032653808594, "learning_rate": 3.564187134106695e-06, "loss": 23.8552, "step": 6607 }, { "epoch": 0.6132714617169374, "grad_norm": 39.92439270019531, "learning_rate": 3.562717255187476e-06, "loss": 23.7514, "step": 6608 }, { "epoch": 0.6133642691415313, "grad_norm": 44.714256286621094, "learning_rate": 3.561247511656637e-06, "loss": 22.2863, "step": 6609 }, { "epoch": 0.6134570765661252, "grad_norm": 50.417240142822266, "learning_rate": 3.559777903652627e-06, "loss": 22.4401, "step": 6610 }, { "epoch": 0.6135498839907193, "grad_norm": 38.4289436340332, "learning_rate": 3.5583084313138804e-06, "loss": 22.5871, "step": 6611 }, { "epoch": 0.6136426914153132, "grad_norm": 41.25717544555664, "learning_rate": 3.556839094778814e-06, "loss": 22.8801, "step": 6612 }, { "epoch": 0.6137354988399072, "grad_norm": 53.19584274291992, "learning_rate": 3.555369894185838e-06, "loss": 22.7267, "step": 6613 }, { "epoch": 0.6138283062645011, "grad_norm": 45.631839752197266, "learning_rate": 3.553900829673349e-06, "loss": 24.1053, "step": 6614 }, { "epoch": 0.6139211136890951, "grad_norm": 41.419620513916016, "learning_rate": 3.552431901379725e-06, "loss": 22.8609, "step": 6615 }, { "epoch": 0.6140139211136891, "grad_norm": 34.949012756347656, "learning_rate": 3.550963109443338e-06, "loss": 22.8278, "step": 6616 }, { "epoch": 0.6141067285382831, "grad_norm": 44.76578903198242, "learning_rate": 3.549494454002545e-06, "loss": 23.4779, "step": 6617 }, { "epoch": 0.614199535962877, "grad_norm": 45.01349639892578, "learning_rate": 3.5480259351956882e-06, "loss": 23.5411, "step": 6618 }, { "epoch": 0.614292343387471, "grad_norm": 41.61832809448242, "learning_rate": 3.5465575531610986e-06, "loss": 22.3401, "step": 6619 }, { "epoch": 0.614385150812065, "grad_norm": 39.23749923706055, "learning_rate": 3.545089308037094e-06, "loss": 23.1328, "step": 6620 }, { "epoch": 0.614477958236659, "grad_norm": 42.765995025634766, "learning_rate": 3.5436211999619806e-06, "loss": 24.1313, "step": 6621 }, { "epoch": 0.6145707656612529, "grad_norm": 42.98391342163086, "learning_rate": 3.542153229074048e-06, "loss": 22.3125, "step": 6622 }, { "epoch": 0.6146635730858468, "grad_norm": 44.47462844848633, "learning_rate": 3.540685395511578e-06, "loss": 25.6468, "step": 6623 }, { "epoch": 0.6147563805104408, "grad_norm": 40.456485748291016, "learning_rate": 3.5392176994128357e-06, "loss": 21.8501, "step": 6624 }, { "epoch": 0.6148491879350348, "grad_norm": 36.141387939453125, "learning_rate": 3.537750140916073e-06, "loss": 23.1363, "step": 6625 }, { "epoch": 0.6149419953596288, "grad_norm": 37.64046096801758, "learning_rate": 3.536282720159532e-06, "loss": 23.6977, "step": 6626 }, { "epoch": 0.6150348027842227, "grad_norm": 41.40736389160156, "learning_rate": 3.5348154372814386e-06, "loss": 22.2216, "step": 6627 }, { "epoch": 0.6151276102088167, "grad_norm": 40.28507614135742, "learning_rate": 3.5333482924200093e-06, "loss": 23.4004, "step": 6628 }, { "epoch": 0.6152204176334106, "grad_norm": 43.28626251220703, "learning_rate": 3.5318812857134438e-06, "loss": 23.5513, "step": 6629 }, { "epoch": 0.6153132250580047, "grad_norm": 41.66621780395508, "learning_rate": 3.5304144172999295e-06, "loss": 24.2212, "step": 6630 }, { "epoch": 0.6154060324825986, "grad_norm": 40.631595611572266, "learning_rate": 3.5289476873176442e-06, "loss": 23.1673, "step": 6631 }, { "epoch": 0.6154988399071926, "grad_norm": 45.76710891723633, "learning_rate": 3.527481095904748e-06, "loss": 23.7231, "step": 6632 }, { "epoch": 0.6155916473317865, "grad_norm": 37.7254638671875, "learning_rate": 3.5260146431993904e-06, "loss": 22.0292, "step": 6633 }, { "epoch": 0.6156844547563806, "grad_norm": 41.16281509399414, "learning_rate": 3.5245483293397074e-06, "loss": 22.2019, "step": 6634 }, { "epoch": 0.6157772621809745, "grad_norm": 40.428836822509766, "learning_rate": 3.5230821544638256e-06, "loss": 24.4193, "step": 6635 }, { "epoch": 0.6158700696055684, "grad_norm": 34.12022018432617, "learning_rate": 3.5216161187098497e-06, "loss": 23.6627, "step": 6636 }, { "epoch": 0.6159628770301624, "grad_norm": 40.285579681396484, "learning_rate": 3.52015022221588e-06, "loss": 22.3863, "step": 6637 }, { "epoch": 0.6160556844547563, "grad_norm": 37.350162506103516, "learning_rate": 3.5186844651199976e-06, "loss": 22.0826, "step": 6638 }, { "epoch": 0.6161484918793504, "grad_norm": 39.955909729003906, "learning_rate": 3.517218847560274e-06, "loss": 24.2965, "step": 6639 }, { "epoch": 0.6162412993039443, "grad_norm": 40.3950080871582, "learning_rate": 3.5157533696747698e-06, "loss": 22.2213, "step": 6640 }, { "epoch": 0.6163341067285383, "grad_norm": 41.6470947265625, "learning_rate": 3.514288031601524e-06, "loss": 22.9684, "step": 6641 }, { "epoch": 0.6164269141531322, "grad_norm": 44.28416061401367, "learning_rate": 3.512822833478571e-06, "loss": 24.1959, "step": 6642 }, { "epoch": 0.6165197215777262, "grad_norm": 42.26445770263672, "learning_rate": 3.5113577754439297e-06, "loss": 20.5515, "step": 6643 }, { "epoch": 0.6166125290023202, "grad_norm": 44.229209899902344, "learning_rate": 3.5098928576356015e-06, "loss": 22.6971, "step": 6644 }, { "epoch": 0.6167053364269142, "grad_norm": 52.489959716796875, "learning_rate": 3.5084280801915793e-06, "loss": 23.0714, "step": 6645 }, { "epoch": 0.6167981438515081, "grad_norm": 41.581180572509766, "learning_rate": 3.5069634432498443e-06, "loss": 25.1092, "step": 6646 }, { "epoch": 0.616890951276102, "grad_norm": 39.8026237487793, "learning_rate": 3.505498946948357e-06, "loss": 22.3325, "step": 6647 }, { "epoch": 0.6169837587006961, "grad_norm": 41.3873291015625, "learning_rate": 3.504034591425071e-06, "loss": 23.5607, "step": 6648 }, { "epoch": 0.61707656612529, "grad_norm": 47.92784118652344, "learning_rate": 3.5025703768179275e-06, "loss": 22.4387, "step": 6649 }, { "epoch": 0.617169373549884, "grad_norm": 40.93383026123047, "learning_rate": 3.5011063032648472e-06, "loss": 21.4952, "step": 6650 }, { "epoch": 0.6172621809744779, "grad_norm": 40.991947174072266, "learning_rate": 3.499642370903744e-06, "loss": 22.6231, "step": 6651 }, { "epoch": 0.6173549883990719, "grad_norm": 40.778663635253906, "learning_rate": 3.49817857987252e-06, "loss": 23.4948, "step": 6652 }, { "epoch": 0.6174477958236659, "grad_norm": 44.815391540527344, "learning_rate": 3.496714930309055e-06, "loss": 22.1164, "step": 6653 }, { "epoch": 0.6175406032482599, "grad_norm": 39.525840759277344, "learning_rate": 3.4952514223512235e-06, "loss": 24.0011, "step": 6654 }, { "epoch": 0.6176334106728538, "grad_norm": 42.980018615722656, "learning_rate": 3.4937880561368862e-06, "loss": 22.4714, "step": 6655 }, { "epoch": 0.6177262180974478, "grad_norm": 41.1611213684082, "learning_rate": 3.492324831803885e-06, "loss": 22.7166, "step": 6656 }, { "epoch": 0.6178190255220417, "grad_norm": 37.55677795410156, "learning_rate": 3.490861749490054e-06, "loss": 24.0186, "step": 6657 }, { "epoch": 0.6179118329466358, "grad_norm": 37.57683181762695, "learning_rate": 3.4893988093332133e-06, "loss": 22.2582, "step": 6658 }, { "epoch": 0.6180046403712297, "grad_norm": 40.962860107421875, "learning_rate": 3.4879360114711637e-06, "loss": 22.2067, "step": 6659 }, { "epoch": 0.6180974477958237, "grad_norm": 41.0785026550293, "learning_rate": 3.4864733560416998e-06, "loss": 23.58, "step": 6660 }, { "epoch": 0.6181902552204176, "grad_norm": 39.232364654541016, "learning_rate": 3.485010843182602e-06, "loss": 22.9829, "step": 6661 }, { "epoch": 0.6182830626450116, "grad_norm": 40.1163330078125, "learning_rate": 3.4835484730316305e-06, "loss": 24.463, "step": 6662 }, { "epoch": 0.6183758700696056, "grad_norm": 40.72832107543945, "learning_rate": 3.4820862457265405e-06, "loss": 23.7044, "step": 6663 }, { "epoch": 0.6184686774941995, "grad_norm": 42.688175201416016, "learning_rate": 3.4806241614050694e-06, "loss": 22.2255, "step": 6664 }, { "epoch": 0.6185614849187935, "grad_norm": 51.07761764526367, "learning_rate": 3.479162220204941e-06, "loss": 20.9354, "step": 6665 }, { "epoch": 0.6186542923433874, "grad_norm": 44.38532638549805, "learning_rate": 3.477700422263867e-06, "loss": 22.0053, "step": 6666 }, { "epoch": 0.6187470997679815, "grad_norm": 42.207767486572266, "learning_rate": 3.476238767719546e-06, "loss": 22.9597, "step": 6667 }, { "epoch": 0.6188399071925754, "grad_norm": 37.7286376953125, "learning_rate": 3.4747772567096606e-06, "loss": 22.0104, "step": 6668 }, { "epoch": 0.6189327146171694, "grad_norm": 40.14104080200195, "learning_rate": 3.473315889371881e-06, "loss": 25.3541, "step": 6669 }, { "epoch": 0.6190255220417633, "grad_norm": 38.60392761230469, "learning_rate": 3.4718546658438662e-06, "loss": 23.6476, "step": 6670 }, { "epoch": 0.6191183294663573, "grad_norm": 38.796390533447266, "learning_rate": 3.4703935862632588e-06, "loss": 22.7357, "step": 6671 }, { "epoch": 0.6192111368909513, "grad_norm": 46.761451721191406, "learning_rate": 3.468932650767689e-06, "loss": 25.5078, "step": 6672 }, { "epoch": 0.6193039443155453, "grad_norm": 43.412872314453125, "learning_rate": 3.467471859494772e-06, "loss": 22.4948, "step": 6673 }, { "epoch": 0.6193967517401392, "grad_norm": 44.565818786621094, "learning_rate": 3.466011212582111e-06, "loss": 23.5021, "step": 6674 }, { "epoch": 0.6194895591647331, "grad_norm": 41.16922378540039, "learning_rate": 3.4645507101672987e-06, "loss": 25.1756, "step": 6675 }, { "epoch": 0.6195823665893272, "grad_norm": 39.47199249267578, "learning_rate": 3.4630903523879056e-06, "loss": 24.6434, "step": 6676 }, { "epoch": 0.6196751740139211, "grad_norm": 40.37630844116211, "learning_rate": 3.4616301393814954e-06, "loss": 23.5516, "step": 6677 }, { "epoch": 0.6197679814385151, "grad_norm": 35.49363708496094, "learning_rate": 3.4601700712856202e-06, "loss": 22.0272, "step": 6678 }, { "epoch": 0.619860788863109, "grad_norm": 36.60231018066406, "learning_rate": 3.4587101482378083e-06, "loss": 21.4124, "step": 6679 }, { "epoch": 0.619953596287703, "grad_norm": 39.46603775024414, "learning_rate": 3.4572503703755842e-06, "loss": 22.5786, "step": 6680 }, { "epoch": 0.620046403712297, "grad_norm": 49.98093795776367, "learning_rate": 3.4557907378364575e-06, "loss": 22.7775, "step": 6681 }, { "epoch": 0.620139211136891, "grad_norm": 40.35402297973633, "learning_rate": 3.4543312507579172e-06, "loss": 23.6187, "step": 6682 }, { "epoch": 0.6202320185614849, "grad_norm": 36.95585632324219, "learning_rate": 3.4528719092774453e-06, "loss": 21.4579, "step": 6683 }, { "epoch": 0.6203248259860789, "grad_norm": 42.00201416015625, "learning_rate": 3.4514127135325105e-06, "loss": 23.2308, "step": 6684 }, { "epoch": 0.6204176334106729, "grad_norm": 44.887718200683594, "learning_rate": 3.449953663660561e-06, "loss": 24.1187, "step": 6685 }, { "epoch": 0.6205104408352669, "grad_norm": 38.10515594482422, "learning_rate": 3.448494759799037e-06, "loss": 23.0399, "step": 6686 }, { "epoch": 0.6206032482598608, "grad_norm": 35.459434509277344, "learning_rate": 3.447036002085367e-06, "loss": 23.293, "step": 6687 }, { "epoch": 0.6206960556844547, "grad_norm": 37.20920181274414, "learning_rate": 3.445577390656957e-06, "loss": 23.7571, "step": 6688 }, { "epoch": 0.6207888631090487, "grad_norm": 43.51439666748047, "learning_rate": 3.4441189256512075e-06, "loss": 24.3276, "step": 6689 }, { "epoch": 0.6208816705336427, "grad_norm": 35.07217025756836, "learning_rate": 3.4426606072055033e-06, "loss": 24.0636, "step": 6690 }, { "epoch": 0.6209744779582367, "grad_norm": 38.84524154663086, "learning_rate": 3.4412024354572104e-06, "loss": 22.5551, "step": 6691 }, { "epoch": 0.6210672853828306, "grad_norm": 42.20438003540039, "learning_rate": 3.4397444105436874e-06, "loss": 22.8984, "step": 6692 }, { "epoch": 0.6211600928074246, "grad_norm": 45.037818908691406, "learning_rate": 3.438286532602278e-06, "loss": 23.1854, "step": 6693 }, { "epoch": 0.6212529002320185, "grad_norm": 39.055137634277344, "learning_rate": 3.4368288017703066e-06, "loss": 22.5369, "step": 6694 }, { "epoch": 0.6213457076566126, "grad_norm": 40.36314010620117, "learning_rate": 3.435371218185091e-06, "loss": 23.1179, "step": 6695 }, { "epoch": 0.6214385150812065, "grad_norm": 38.14925765991211, "learning_rate": 3.433913781983932e-06, "loss": 22.8927, "step": 6696 }, { "epoch": 0.6215313225058005, "grad_norm": 42.80428695678711, "learning_rate": 3.432456493304114e-06, "loss": 23.9344, "step": 6697 }, { "epoch": 0.6216241299303944, "grad_norm": 50.3642578125, "learning_rate": 3.43099935228291e-06, "loss": 24.0338, "step": 6698 }, { "epoch": 0.6217169373549885, "grad_norm": 40.97419738769531, "learning_rate": 3.4295423590575825e-06, "loss": 22.5806, "step": 6699 }, { "epoch": 0.6218097447795824, "grad_norm": 40.298343658447266, "learning_rate": 3.4280855137653727e-06, "loss": 23.9212, "step": 6700 }, { "epoch": 0.6219025522041763, "grad_norm": 38.18376922607422, "learning_rate": 3.426628816543513e-06, "loss": 23.0194, "step": 6701 }, { "epoch": 0.6219953596287703, "grad_norm": 34.645172119140625, "learning_rate": 3.4251722675292234e-06, "loss": 22.4988, "step": 6702 }, { "epoch": 0.6220881670533642, "grad_norm": 39.33885192871094, "learning_rate": 3.4237158668597013e-06, "loss": 23.5341, "step": 6703 }, { "epoch": 0.6221809744779583, "grad_norm": 40.96612548828125, "learning_rate": 3.4222596146721394e-06, "loss": 21.632, "step": 6704 }, { "epoch": 0.6222737819025522, "grad_norm": 38.74005126953125, "learning_rate": 3.4208035111037154e-06, "loss": 22.6879, "step": 6705 }, { "epoch": 0.6223665893271462, "grad_norm": 34.88032531738281, "learning_rate": 3.4193475562915845e-06, "loss": 24.115, "step": 6706 }, { "epoch": 0.6224593967517401, "grad_norm": 40.544212341308594, "learning_rate": 3.4178917503728993e-06, "loss": 23.4457, "step": 6707 }, { "epoch": 0.6225522041763341, "grad_norm": 41.66859436035156, "learning_rate": 3.4164360934847912e-06, "loss": 24.1009, "step": 6708 }, { "epoch": 0.6226450116009281, "grad_norm": 43.62636947631836, "learning_rate": 3.414980585764378e-06, "loss": 23.3407, "step": 6709 }, { "epoch": 0.622737819025522, "grad_norm": 36.835208892822266, "learning_rate": 3.4135252273487664e-06, "loss": 22.1966, "step": 6710 }, { "epoch": 0.622830626450116, "grad_norm": 46.99162673950195, "learning_rate": 3.4120700183750465e-06, "loss": 24.5408, "step": 6711 }, { "epoch": 0.6229234338747099, "grad_norm": 44.1895866394043, "learning_rate": 3.4106149589802973e-06, "loss": 24.5484, "step": 6712 }, { "epoch": 0.623016241299304, "grad_norm": 40.64708709716797, "learning_rate": 3.40916004930158e-06, "loss": 22.4795, "step": 6713 }, { "epoch": 0.6231090487238979, "grad_norm": 38.80661392211914, "learning_rate": 3.4077052894759423e-06, "loss": 23.534, "step": 6714 }, { "epoch": 0.6232018561484919, "grad_norm": 43.2227668762207, "learning_rate": 3.4062506796404215e-06, "loss": 23.3264, "step": 6715 }, { "epoch": 0.6232946635730858, "grad_norm": 42.973628997802734, "learning_rate": 3.4047962199320373e-06, "loss": 23.6498, "step": 6716 }, { "epoch": 0.6233874709976798, "grad_norm": 48.17850112915039, "learning_rate": 3.403341910487794e-06, "loss": 23.8663, "step": 6717 }, { "epoch": 0.6234802784222738, "grad_norm": 43.34114456176758, "learning_rate": 3.401887751444687e-06, "loss": 25.4183, "step": 6718 }, { "epoch": 0.6235730858468678, "grad_norm": 78.90438079833984, "learning_rate": 3.4004337429396927e-06, "loss": 22.584, "step": 6719 }, { "epoch": 0.6236658932714617, "grad_norm": 48.21119689941406, "learning_rate": 3.3989798851097744e-06, "loss": 23.766, "step": 6720 }, { "epoch": 0.6237587006960557, "grad_norm": 44.11650848388672, "learning_rate": 3.3975261780918823e-06, "loss": 24.2095, "step": 6721 }, { "epoch": 0.6238515081206496, "grad_norm": 37.966068267822266, "learning_rate": 3.3960726220229546e-06, "loss": 23.1932, "step": 6722 }, { "epoch": 0.6239443155452437, "grad_norm": 44.500213623046875, "learning_rate": 3.394619217039908e-06, "loss": 23.2924, "step": 6723 }, { "epoch": 0.6240371229698376, "grad_norm": 38.640193939208984, "learning_rate": 3.393165963279652e-06, "loss": 23.8279, "step": 6724 }, { "epoch": 0.6241299303944315, "grad_norm": 45.827091217041016, "learning_rate": 3.391712860879082e-06, "loss": 24.5818, "step": 6725 }, { "epoch": 0.6242227378190255, "grad_norm": 48.005043029785156, "learning_rate": 3.3902599099750706e-06, "loss": 23.1769, "step": 6726 }, { "epoch": 0.6243155452436195, "grad_norm": 43.215904235839844, "learning_rate": 3.3888071107044856e-06, "loss": 23.3908, "step": 6727 }, { "epoch": 0.6244083526682135, "grad_norm": 38.385196685791016, "learning_rate": 3.3873544632041786e-06, "loss": 22.5519, "step": 6728 }, { "epoch": 0.6245011600928074, "grad_norm": 41.00724792480469, "learning_rate": 3.3859019676109815e-06, "loss": 23.3014, "step": 6729 }, { "epoch": 0.6245939675174014, "grad_norm": 38.448326110839844, "learning_rate": 3.3844496240617175e-06, "loss": 22.6854, "step": 6730 }, { "epoch": 0.6246867749419953, "grad_norm": 45.211063385009766, "learning_rate": 3.3829974326931957e-06, "loss": 21.5959, "step": 6731 }, { "epoch": 0.6247795823665894, "grad_norm": 36.088008880615234, "learning_rate": 3.381545393642205e-06, "loss": 23.4736, "step": 6732 }, { "epoch": 0.6248723897911833, "grad_norm": 35.361026763916016, "learning_rate": 3.3800935070455254e-06, "loss": 23.845, "step": 6733 }, { "epoch": 0.6249651972157773, "grad_norm": 43.884220123291016, "learning_rate": 3.378641773039924e-06, "loss": 24.0061, "step": 6734 }, { "epoch": 0.6250580046403712, "grad_norm": 40.56951141357422, "learning_rate": 3.377190191762145e-06, "loss": 24.919, "step": 6735 }, { "epoch": 0.6251508120649651, "grad_norm": 39.351165771484375, "learning_rate": 3.375738763348926e-06, "loss": 23.2349, "step": 6736 }, { "epoch": 0.6252436194895592, "grad_norm": 38.218441009521484, "learning_rate": 3.374287487936991e-06, "loss": 22.151, "step": 6737 }, { "epoch": 0.6253364269141531, "grad_norm": 36.44416809082031, "learning_rate": 3.3728363656630407e-06, "loss": 22.3168, "step": 6738 }, { "epoch": 0.6254292343387471, "grad_norm": 36.189266204833984, "learning_rate": 3.371385396663771e-06, "loss": 21.5243, "step": 6739 }, { "epoch": 0.625522041763341, "grad_norm": 37.018131256103516, "learning_rate": 3.36993458107586e-06, "loss": 23.2443, "step": 6740 }, { "epoch": 0.6256148491879351, "grad_norm": 42.055110931396484, "learning_rate": 3.3684839190359674e-06, "loss": 23.1533, "step": 6741 }, { "epoch": 0.625707656612529, "grad_norm": 41.28937530517578, "learning_rate": 3.3670334106807434e-06, "loss": 21.2062, "step": 6742 }, { "epoch": 0.625800464037123, "grad_norm": 40.20640563964844, "learning_rate": 3.3655830561468255e-06, "loss": 24.8363, "step": 6743 }, { "epoch": 0.6258932714617169, "grad_norm": 37.89447021484375, "learning_rate": 3.3641328555708286e-06, "loss": 23.2254, "step": 6744 }, { "epoch": 0.6259860788863109, "grad_norm": 37.6920051574707, "learning_rate": 3.3626828090893614e-06, "loss": 22.9926, "step": 6745 }, { "epoch": 0.6260788863109049, "grad_norm": 39.09632110595703, "learning_rate": 3.361232916839011e-06, "loss": 23.2874, "step": 6746 }, { "epoch": 0.6261716937354989, "grad_norm": 49.070106506347656, "learning_rate": 3.3597831789563563e-06, "loss": 22.5066, "step": 6747 }, { "epoch": 0.6262645011600928, "grad_norm": 37.78950881958008, "learning_rate": 3.35833359557796e-06, "loss": 23.5691, "step": 6748 }, { "epoch": 0.6263573085846867, "grad_norm": 37.62428283691406, "learning_rate": 3.356884166840365e-06, "loss": 22.8549, "step": 6749 }, { "epoch": 0.6264501160092807, "grad_norm": 39.899009704589844, "learning_rate": 3.355434892880107e-06, "loss": 22.7649, "step": 6750 }, { "epoch": 0.6265429234338747, "grad_norm": 39.97053527832031, "learning_rate": 3.3539857738337036e-06, "loss": 24.7954, "step": 6751 }, { "epoch": 0.6266357308584687, "grad_norm": 39.46405029296875, "learning_rate": 3.352536809837658e-06, "loss": 24.102, "step": 6752 }, { "epoch": 0.6267285382830626, "grad_norm": 35.92798614501953, "learning_rate": 3.3510880010284573e-06, "loss": 21.7157, "step": 6753 }, { "epoch": 0.6268213457076566, "grad_norm": 46.21517562866211, "learning_rate": 3.349639347542578e-06, "loss": 24.1478, "step": 6754 }, { "epoch": 0.6269141531322506, "grad_norm": 39.72921371459961, "learning_rate": 3.3481908495164793e-06, "loss": 21.0764, "step": 6755 }, { "epoch": 0.6270069605568446, "grad_norm": 39.47279739379883, "learning_rate": 3.3467425070866034e-06, "loss": 23.6048, "step": 6756 }, { "epoch": 0.6270997679814385, "grad_norm": 37.176876068115234, "learning_rate": 3.345294320389384e-06, "loss": 21.4375, "step": 6757 }, { "epoch": 0.6271925754060325, "grad_norm": 49.190223693847656, "learning_rate": 3.343846289561233e-06, "loss": 25.1931, "step": 6758 }, { "epoch": 0.6272853828306264, "grad_norm": 43.502952575683594, "learning_rate": 3.342398414738555e-06, "loss": 23.2938, "step": 6759 }, { "epoch": 0.6273781902552205, "grad_norm": 38.7064094543457, "learning_rate": 3.3409506960577343e-06, "loss": 24.1681, "step": 6760 }, { "epoch": 0.6274709976798144, "grad_norm": 40.694786071777344, "learning_rate": 3.339503133655141e-06, "loss": 23.2848, "step": 6761 }, { "epoch": 0.6275638051044083, "grad_norm": 47.632476806640625, "learning_rate": 3.3380557276671345e-06, "loss": 23.0097, "step": 6762 }, { "epoch": 0.6276566125290023, "grad_norm": 38.481727600097656, "learning_rate": 3.3366084782300546e-06, "loss": 21.9321, "step": 6763 }, { "epoch": 0.6277494199535962, "grad_norm": 39.039371490478516, "learning_rate": 3.335161385480229e-06, "loss": 24.4734, "step": 6764 }, { "epoch": 0.6278422273781903, "grad_norm": 43.819923400878906, "learning_rate": 3.3337144495539707e-06, "loss": 23.5321, "step": 6765 }, { "epoch": 0.6279350348027842, "grad_norm": 45.74109649658203, "learning_rate": 3.332267670587578e-06, "loss": 23.1158, "step": 6766 }, { "epoch": 0.6280278422273782, "grad_norm": 38.35441589355469, "learning_rate": 3.3308210487173315e-06, "loss": 22.577, "step": 6767 }, { "epoch": 0.6281206496519721, "grad_norm": 66.10201263427734, "learning_rate": 3.3293745840795004e-06, "loss": 22.5061, "step": 6768 }, { "epoch": 0.6282134570765662, "grad_norm": 40.34064865112305, "learning_rate": 3.327928276810341e-06, "loss": 25.1777, "step": 6769 }, { "epoch": 0.6283062645011601, "grad_norm": 36.78330612182617, "learning_rate": 3.326482127046087e-06, "loss": 21.2572, "step": 6770 }, { "epoch": 0.6283990719257541, "grad_norm": 40.70991134643555, "learning_rate": 3.3250361349229643e-06, "loss": 23.4786, "step": 6771 }, { "epoch": 0.628491879350348, "grad_norm": 46.87409973144531, "learning_rate": 3.323590300577183e-06, "loss": 23.2027, "step": 6772 }, { "epoch": 0.628584686774942, "grad_norm": 37.70681381225586, "learning_rate": 3.322144624144934e-06, "loss": 22.4298, "step": 6773 }, { "epoch": 0.628677494199536, "grad_norm": 40.15545654296875, "learning_rate": 3.3206991057623977e-06, "loss": 25.3387, "step": 6774 }, { "epoch": 0.62877030162413, "grad_norm": 40.206642150878906, "learning_rate": 3.3192537455657405e-06, "loss": 21.9016, "step": 6775 }, { "epoch": 0.6288631090487239, "grad_norm": 41.76797103881836, "learning_rate": 3.3178085436911077e-06, "loss": 22.9171, "step": 6776 }, { "epoch": 0.6289559164733178, "grad_norm": 44.835506439208984, "learning_rate": 3.3163635002746355e-06, "loss": 22.6826, "step": 6777 }, { "epoch": 0.6290487238979119, "grad_norm": 46.164485931396484, "learning_rate": 3.3149186154524453e-06, "loss": 23.7149, "step": 6778 }, { "epoch": 0.6291415313225058, "grad_norm": 52.55255889892578, "learning_rate": 3.3134738893606376e-06, "loss": 24.2254, "step": 6779 }, { "epoch": 0.6292343387470998, "grad_norm": 46.336669921875, "learning_rate": 3.312029322135306e-06, "loss": 22.143, "step": 6780 }, { "epoch": 0.6293271461716937, "grad_norm": 38.10253143310547, "learning_rate": 3.3105849139125212e-06, "loss": 22.3507, "step": 6781 }, { "epoch": 0.6294199535962877, "grad_norm": 43.011985778808594, "learning_rate": 3.3091406648283448e-06, "loss": 22.8549, "step": 6782 }, { "epoch": 0.6295127610208817, "grad_norm": 38.73746109008789, "learning_rate": 3.307696575018823e-06, "loss": 23.1615, "step": 6783 }, { "epoch": 0.6296055684454757, "grad_norm": 38.61827087402344, "learning_rate": 3.3062526446199817e-06, "loss": 23.5563, "step": 6784 }, { "epoch": 0.6296983758700696, "grad_norm": 41.01640319824219, "learning_rate": 3.3048088737678375e-06, "loss": 23.3584, "step": 6785 }, { "epoch": 0.6297911832946635, "grad_norm": 38.32122039794922, "learning_rate": 3.3033652625983915e-06, "loss": 23.6125, "step": 6786 }, { "epoch": 0.6298839907192575, "grad_norm": 38.55038070678711, "learning_rate": 3.3019218112476247e-06, "loss": 21.9605, "step": 6787 }, { "epoch": 0.6299767981438515, "grad_norm": 38.330963134765625, "learning_rate": 3.3004785198515088e-06, "loss": 25.0122, "step": 6788 }, { "epoch": 0.6300696055684455, "grad_norm": 40.849082946777344, "learning_rate": 3.2990353885459993e-06, "loss": 22.5812, "step": 6789 }, { "epoch": 0.6301624129930394, "grad_norm": 42.75608825683594, "learning_rate": 3.2975924174670314e-06, "loss": 23.1341, "step": 6790 }, { "epoch": 0.6302552204176334, "grad_norm": 42.68653869628906, "learning_rate": 3.296149606750533e-06, "loss": 23.2521, "step": 6791 }, { "epoch": 0.6303480278422274, "grad_norm": 34.73014450073242, "learning_rate": 3.2947069565324134e-06, "loss": 22.4309, "step": 6792 }, { "epoch": 0.6304408352668214, "grad_norm": 38.98622131347656, "learning_rate": 3.2932644669485638e-06, "loss": 21.9277, "step": 6793 }, { "epoch": 0.6305336426914153, "grad_norm": 42.8116569519043, "learning_rate": 3.2918221381348638e-06, "loss": 24.5204, "step": 6794 }, { "epoch": 0.6306264501160093, "grad_norm": 46.525211334228516, "learning_rate": 3.2903799702271804e-06, "loss": 21.8753, "step": 6795 }, { "epoch": 0.6307192575406032, "grad_norm": 44.175296783447266, "learning_rate": 3.288937963361357e-06, "loss": 22.6093, "step": 6796 }, { "epoch": 0.6308120649651973, "grad_norm": 37.59336853027344, "learning_rate": 3.2874961176732308e-06, "loss": 22.2065, "step": 6797 }, { "epoch": 0.6309048723897912, "grad_norm": 54.72071838378906, "learning_rate": 3.286054433298619e-06, "loss": 24.0851, "step": 6798 }, { "epoch": 0.6309976798143851, "grad_norm": 37.46096420288086, "learning_rate": 3.2846129103733237e-06, "loss": 25.2217, "step": 6799 }, { "epoch": 0.6310904872389791, "grad_norm": 40.53494644165039, "learning_rate": 3.2831715490331336e-06, "loss": 23.9531, "step": 6800 }, { "epoch": 0.631183294663573, "grad_norm": 42.71990966796875, "learning_rate": 3.281730349413821e-06, "loss": 22.2769, "step": 6801 }, { "epoch": 0.6312761020881671, "grad_norm": 40.57353591918945, "learning_rate": 3.2802893116511435e-06, "loss": 22.0905, "step": 6802 }, { "epoch": 0.631368909512761, "grad_norm": 35.89642333984375, "learning_rate": 3.2788484358808426e-06, "loss": 22.7834, "step": 6803 }, { "epoch": 0.631461716937355, "grad_norm": 40.604286193847656, "learning_rate": 3.2774077222386465e-06, "loss": 22.9377, "step": 6804 }, { "epoch": 0.6315545243619489, "grad_norm": 45.20671463012695, "learning_rate": 3.2759671708602642e-06, "loss": 25.0413, "step": 6805 }, { "epoch": 0.631647331786543, "grad_norm": 39.979366302490234, "learning_rate": 3.2745267818813947e-06, "loss": 22.935, "step": 6806 }, { "epoch": 0.6317401392111369, "grad_norm": 42.114505767822266, "learning_rate": 3.273086555437718e-06, "loss": 24.0168, "step": 6807 }, { "epoch": 0.6318329466357309, "grad_norm": 37.928016662597656, "learning_rate": 3.2716464916648983e-06, "loss": 20.8901, "step": 6808 }, { "epoch": 0.6319257540603248, "grad_norm": 45.86626052856445, "learning_rate": 3.270206590698589e-06, "loss": 24.0168, "step": 6809 }, { "epoch": 0.6320185614849188, "grad_norm": 40.66773986816406, "learning_rate": 3.2687668526744224e-06, "loss": 24.2494, "step": 6810 }, { "epoch": 0.6321113689095128, "grad_norm": 40.137332916259766, "learning_rate": 3.267327277728019e-06, "loss": 23.505, "step": 6811 }, { "epoch": 0.6322041763341067, "grad_norm": 38.217796325683594, "learning_rate": 3.265887865994984e-06, "loss": 22.4348, "step": 6812 }, { "epoch": 0.6322969837587007, "grad_norm": 37.21495819091797, "learning_rate": 3.2644486176109053e-06, "loss": 22.4924, "step": 6813 }, { "epoch": 0.6323897911832946, "grad_norm": 47.21743392944336, "learning_rate": 3.263009532711356e-06, "loss": 23.3456, "step": 6814 }, { "epoch": 0.6324825986078886, "grad_norm": 43.57052993774414, "learning_rate": 3.2615706114318966e-06, "loss": 23.5471, "step": 6815 }, { "epoch": 0.6325754060324826, "grad_norm": 42.51886749267578, "learning_rate": 3.260131853908066e-06, "loss": 23.8412, "step": 6816 }, { "epoch": 0.6326682134570766, "grad_norm": 41.50390625, "learning_rate": 3.2586932602753945e-06, "loss": 23.6405, "step": 6817 }, { "epoch": 0.6327610208816705, "grad_norm": 40.89370346069336, "learning_rate": 3.2572548306693944e-06, "loss": 25.3385, "step": 6818 }, { "epoch": 0.6328538283062645, "grad_norm": 42.756412506103516, "learning_rate": 3.255816565225559e-06, "loss": 24.3836, "step": 6819 }, { "epoch": 0.6329466357308585, "grad_norm": 38.69647979736328, "learning_rate": 3.254378464079372e-06, "loss": 24.3726, "step": 6820 }, { "epoch": 0.6330394431554525, "grad_norm": 41.82878112792969, "learning_rate": 3.252940527366299e-06, "loss": 23.6747, "step": 6821 }, { "epoch": 0.6331322505800464, "grad_norm": 40.38629150390625, "learning_rate": 3.251502755221787e-06, "loss": 23.8573, "step": 6822 }, { "epoch": 0.6332250580046404, "grad_norm": 40.759273529052734, "learning_rate": 3.2500651477812726e-06, "loss": 25.0348, "step": 6823 }, { "epoch": 0.6333178654292343, "grad_norm": 37.886837005615234, "learning_rate": 3.248627705180177e-06, "loss": 22.3148, "step": 6824 }, { "epoch": 0.6334106728538283, "grad_norm": 41.256492614746094, "learning_rate": 3.247190427553899e-06, "loss": 24.0838, "step": 6825 }, { "epoch": 0.6335034802784223, "grad_norm": 36.55084228515625, "learning_rate": 3.2457533150378286e-06, "loss": 21.9032, "step": 6826 }, { "epoch": 0.6335962877030162, "grad_norm": 64.76021575927734, "learning_rate": 3.2443163677673405e-06, "loss": 22.6915, "step": 6827 }, { "epoch": 0.6336890951276102, "grad_norm": 38.91083526611328, "learning_rate": 3.2428795858777873e-06, "loss": 22.2481, "step": 6828 }, { "epoch": 0.6337819025522041, "grad_norm": 46.93977355957031, "learning_rate": 3.2414429695045122e-06, "loss": 23.614, "step": 6829 }, { "epoch": 0.6338747099767982, "grad_norm": 41.05710220336914, "learning_rate": 3.2400065187828427e-06, "loss": 22.8544, "step": 6830 }, { "epoch": 0.6339675174013921, "grad_norm": 40.24338150024414, "learning_rate": 3.238570233848085e-06, "loss": 24.2017, "step": 6831 }, { "epoch": 0.6340603248259861, "grad_norm": 37.64250183105469, "learning_rate": 3.237134114835535e-06, "loss": 24.505, "step": 6832 }, { "epoch": 0.63415313225058, "grad_norm": 46.19635772705078, "learning_rate": 3.2356981618804744e-06, "loss": 23.9774, "step": 6833 }, { "epoch": 0.6342459396751741, "grad_norm": 40.75550079345703, "learning_rate": 3.234262375118161e-06, "loss": 23.176, "step": 6834 }, { "epoch": 0.634338747099768, "grad_norm": 40.841224670410156, "learning_rate": 3.2328267546838466e-06, "loss": 22.3953, "step": 6835 }, { "epoch": 0.634431554524362, "grad_norm": 39.47502136230469, "learning_rate": 3.2313913007127616e-06, "loss": 23.4387, "step": 6836 }, { "epoch": 0.6345243619489559, "grad_norm": 41.46617126464844, "learning_rate": 3.2299560133401214e-06, "loss": 25.0608, "step": 6837 }, { "epoch": 0.6346171693735498, "grad_norm": 40.57696533203125, "learning_rate": 3.2285208927011265e-06, "loss": 22.3391, "step": 6838 }, { "epoch": 0.6347099767981439, "grad_norm": 46.73707962036133, "learning_rate": 3.2270859389309638e-06, "loss": 22.047, "step": 6839 }, { "epoch": 0.6348027842227378, "grad_norm": 40.128944396972656, "learning_rate": 3.225651152164799e-06, "loss": 23.6004, "step": 6840 }, { "epoch": 0.6348955916473318, "grad_norm": 41.916324615478516, "learning_rate": 3.2242165325377865e-06, "loss": 22.483, "step": 6841 }, { "epoch": 0.6349883990719257, "grad_norm": 44.44395446777344, "learning_rate": 3.2227820801850666e-06, "loss": 24.591, "step": 6842 }, { "epoch": 0.6350812064965197, "grad_norm": 39.52772521972656, "learning_rate": 3.221347795241757e-06, "loss": 23.3692, "step": 6843 }, { "epoch": 0.6351740139211137, "grad_norm": 41.29229736328125, "learning_rate": 3.2199136778429653e-06, "loss": 23.1805, "step": 6844 }, { "epoch": 0.6352668213457077, "grad_norm": 36.967803955078125, "learning_rate": 3.2184797281237823e-06, "loss": 23.4882, "step": 6845 }, { "epoch": 0.6353596287703016, "grad_norm": 44.54920959472656, "learning_rate": 3.2170459462192827e-06, "loss": 22.6796, "step": 6846 }, { "epoch": 0.6354524361948956, "grad_norm": 47.44823455810547, "learning_rate": 3.215612332264523e-06, "loss": 23.623, "step": 6847 }, { "epoch": 0.6355452436194896, "grad_norm": 38.05369567871094, "learning_rate": 3.2141788863945487e-06, "loss": 24.1419, "step": 6848 }, { "epoch": 0.6356380510440836, "grad_norm": 121.6221923828125, "learning_rate": 3.2127456087443853e-06, "loss": 23.5621, "step": 6849 }, { "epoch": 0.6357308584686775, "grad_norm": 47.33191680908203, "learning_rate": 3.211312499449044e-06, "loss": 24.688, "step": 6850 }, { "epoch": 0.6358236658932714, "grad_norm": 48.600093841552734, "learning_rate": 3.2098795586435194e-06, "loss": 22.2911, "step": 6851 }, { "epoch": 0.6359164733178654, "grad_norm": 41.99488067626953, "learning_rate": 3.208446786462791e-06, "loss": 22.7398, "step": 6852 }, { "epoch": 0.6360092807424594, "grad_norm": 41.01887893676758, "learning_rate": 3.2070141830418255e-06, "loss": 25.0322, "step": 6853 }, { "epoch": 0.6361020881670534, "grad_norm": 38.843589782714844, "learning_rate": 3.2055817485155662e-06, "loss": 22.1883, "step": 6854 }, { "epoch": 0.6361948955916473, "grad_norm": 44.49888229370117, "learning_rate": 3.2041494830189456e-06, "loss": 23.715, "step": 6855 }, { "epoch": 0.6362877030162413, "grad_norm": 43.101898193359375, "learning_rate": 3.2027173866868828e-06, "loss": 23.2377, "step": 6856 }, { "epoch": 0.6363805104408352, "grad_norm": 38.89167022705078, "learning_rate": 3.2012854596542733e-06, "loss": 21.8928, "step": 6857 }, { "epoch": 0.6364733178654293, "grad_norm": 39.91608810424805, "learning_rate": 3.199853702056003e-06, "loss": 23.2178, "step": 6858 }, { "epoch": 0.6365661252900232, "grad_norm": 46.55864334106445, "learning_rate": 3.1984221140269413e-06, "loss": 21.7025, "step": 6859 }, { "epoch": 0.6366589327146172, "grad_norm": 41.06334686279297, "learning_rate": 3.1969906957019365e-06, "loss": 22.9846, "step": 6860 }, { "epoch": 0.6367517401392111, "grad_norm": 48.51832580566406, "learning_rate": 3.195559447215827e-06, "loss": 22.6173, "step": 6861 }, { "epoch": 0.6368445475638052, "grad_norm": 38.61809158325195, "learning_rate": 3.1941283687034342e-06, "loss": 23.2448, "step": 6862 }, { "epoch": 0.6369373549883991, "grad_norm": 44.413421630859375, "learning_rate": 3.192697460299558e-06, "loss": 21.595, "step": 6863 }, { "epoch": 0.637030162412993, "grad_norm": 44.17891311645508, "learning_rate": 3.1912667221389892e-06, "loss": 22.914, "step": 6864 }, { "epoch": 0.637122969837587, "grad_norm": 38.890464782714844, "learning_rate": 3.189836154356501e-06, "loss": 23.9321, "step": 6865 }, { "epoch": 0.6372157772621809, "grad_norm": 46.84330368041992, "learning_rate": 3.1884057570868453e-06, "loss": 23.4942, "step": 6866 }, { "epoch": 0.637308584686775, "grad_norm": 44.77336883544922, "learning_rate": 3.1869755304647633e-06, "loss": 23.1585, "step": 6867 }, { "epoch": 0.6374013921113689, "grad_norm": 44.315067291259766, "learning_rate": 3.1855454746249827e-06, "loss": 22.6151, "step": 6868 }, { "epoch": 0.6374941995359629, "grad_norm": 36.61381149291992, "learning_rate": 3.1841155897022045e-06, "loss": 23.6309, "step": 6869 }, { "epoch": 0.6375870069605568, "grad_norm": 42.384151458740234, "learning_rate": 3.182685875831124e-06, "loss": 23.4221, "step": 6870 }, { "epoch": 0.6376798143851509, "grad_norm": 37.519901275634766, "learning_rate": 3.1812563331464188e-06, "loss": 20.8478, "step": 6871 }, { "epoch": 0.6377726218097448, "grad_norm": 45.93992614746094, "learning_rate": 3.179826961782742e-06, "loss": 22.1721, "step": 6872 }, { "epoch": 0.6378654292343388, "grad_norm": 38.320274353027344, "learning_rate": 3.1783977618747413e-06, "loss": 24.1446, "step": 6873 }, { "epoch": 0.6379582366589327, "grad_norm": 38.70114517211914, "learning_rate": 3.1769687335570442e-06, "loss": 22.2654, "step": 6874 }, { "epoch": 0.6380510440835266, "grad_norm": 41.929813385009766, "learning_rate": 3.1755398769642586e-06, "loss": 24.0409, "step": 6875 }, { "epoch": 0.6381438515081207, "grad_norm": 43.951995849609375, "learning_rate": 3.1741111922309797e-06, "loss": 25.2823, "step": 6876 }, { "epoch": 0.6382366589327146, "grad_norm": 43.70832443237305, "learning_rate": 3.17268267949179e-06, "loss": 23.7605, "step": 6877 }, { "epoch": 0.6383294663573086, "grad_norm": 38.9143180847168, "learning_rate": 3.1712543388812457e-06, "loss": 21.9733, "step": 6878 }, { "epoch": 0.6384222737819025, "grad_norm": 51.981292724609375, "learning_rate": 3.1698261705338963e-06, "loss": 23.5428, "step": 6879 }, { "epoch": 0.6385150812064965, "grad_norm": 44.70638656616211, "learning_rate": 3.1683981745842727e-06, "loss": 22.1722, "step": 6880 }, { "epoch": 0.6386078886310905, "grad_norm": 42.363502502441406, "learning_rate": 3.1669703511668846e-06, "loss": 25.2226, "step": 6881 }, { "epoch": 0.6387006960556845, "grad_norm": 37.59235763549805, "learning_rate": 3.165542700416232e-06, "loss": 23.5995, "step": 6882 }, { "epoch": 0.6387935034802784, "grad_norm": 35.1341438293457, "learning_rate": 3.164115222466798e-06, "loss": 22.22, "step": 6883 }, { "epoch": 0.6388863109048724, "grad_norm": 40.039894104003906, "learning_rate": 3.1626879174530423e-06, "loss": 22.2685, "step": 6884 }, { "epoch": 0.6389791183294664, "grad_norm": 42.38588333129883, "learning_rate": 3.1612607855094175e-06, "loss": 24.2154, "step": 6885 }, { "epoch": 0.6390719257540604, "grad_norm": 42.86162185668945, "learning_rate": 3.1598338267703543e-06, "loss": 25.5957, "step": 6886 }, { "epoch": 0.6391647331786543, "grad_norm": 39.77086639404297, "learning_rate": 3.158407041370267e-06, "loss": 24.1068, "step": 6887 }, { "epoch": 0.6392575406032482, "grad_norm": 39.566246032714844, "learning_rate": 3.156980429443559e-06, "loss": 22.7579, "step": 6888 }, { "epoch": 0.6393503480278422, "grad_norm": 51.45303726196289, "learning_rate": 3.1555539911246104e-06, "loss": 23.5191, "step": 6889 }, { "epoch": 0.6394431554524362, "grad_norm": 44.89486312866211, "learning_rate": 3.154127726547789e-06, "loss": 22.1984, "step": 6890 }, { "epoch": 0.6395359628770302, "grad_norm": 42.815731048583984, "learning_rate": 3.152701635847445e-06, "loss": 24.0202, "step": 6891 }, { "epoch": 0.6396287703016241, "grad_norm": 45.4338264465332, "learning_rate": 3.151275719157913e-06, "loss": 22.0188, "step": 6892 }, { "epoch": 0.6397215777262181, "grad_norm": 38.97079086303711, "learning_rate": 3.149849976613511e-06, "loss": 22.5869, "step": 6893 }, { "epoch": 0.639814385150812, "grad_norm": 37.779537200927734, "learning_rate": 3.14842440834854e-06, "loss": 23.8624, "step": 6894 }, { "epoch": 0.6399071925754061, "grad_norm": 39.27262878417969, "learning_rate": 3.146999014497284e-06, "loss": 24.2141, "step": 6895 }, { "epoch": 0.64, "grad_norm": 40.77596664428711, "learning_rate": 3.145573795194013e-06, "loss": 22.7139, "step": 6896 }, { "epoch": 0.640092807424594, "grad_norm": 40.36540985107422, "learning_rate": 3.1441487505729785e-06, "loss": 25.3542, "step": 6897 }, { "epoch": 0.6401856148491879, "grad_norm": 41.98637771606445, "learning_rate": 3.1427238807684145e-06, "loss": 25.1752, "step": 6898 }, { "epoch": 0.640278422273782, "grad_norm": 39.407005310058594, "learning_rate": 3.141299185914542e-06, "loss": 22.678, "step": 6899 }, { "epoch": 0.6403712296983759, "grad_norm": 38.00430679321289, "learning_rate": 3.1398746661455647e-06, "loss": 22.0827, "step": 6900 }, { "epoch": 0.6404640371229698, "grad_norm": 35.60562515258789, "learning_rate": 3.138450321595666e-06, "loss": 21.8626, "step": 6901 }, { "epoch": 0.6405568445475638, "grad_norm": 35.9542350769043, "learning_rate": 3.137026152399016e-06, "loss": 23.1978, "step": 6902 }, { "epoch": 0.6406496519721577, "grad_norm": 48.1390266418457, "learning_rate": 3.1356021586897708e-06, "loss": 24.1518, "step": 6903 }, { "epoch": 0.6407424593967518, "grad_norm": 44.3602294921875, "learning_rate": 3.1341783406020627e-06, "loss": 22.1179, "step": 6904 }, { "epoch": 0.6408352668213457, "grad_norm": 39.37744903564453, "learning_rate": 3.1327546982700134e-06, "loss": 22.3308, "step": 6905 }, { "epoch": 0.6409280742459397, "grad_norm": 46.81345748901367, "learning_rate": 3.131331231827729e-06, "loss": 24.2492, "step": 6906 }, { "epoch": 0.6410208816705336, "grad_norm": 45.033042907714844, "learning_rate": 3.1299079414092925e-06, "loss": 21.6898, "step": 6907 }, { "epoch": 0.6411136890951276, "grad_norm": 38.434364318847656, "learning_rate": 3.1284848271487757e-06, "loss": 24.8871, "step": 6908 }, { "epoch": 0.6412064965197216, "grad_norm": 42.98252487182617, "learning_rate": 3.1270618891802353e-06, "loss": 24.0462, "step": 6909 }, { "epoch": 0.6412993039443156, "grad_norm": 40.519126892089844, "learning_rate": 3.1256391276377022e-06, "loss": 22.7346, "step": 6910 }, { "epoch": 0.6413921113689095, "grad_norm": 37.04784393310547, "learning_rate": 3.1242165426552018e-06, "loss": 22.8536, "step": 6911 }, { "epoch": 0.6414849187935034, "grad_norm": 41.07139205932617, "learning_rate": 3.122794134366738e-06, "loss": 22.6492, "step": 6912 }, { "epoch": 0.6415777262180975, "grad_norm": 35.60694885253906, "learning_rate": 3.121371902906295e-06, "loss": 23.0917, "step": 6913 }, { "epoch": 0.6416705336426914, "grad_norm": 38.857627868652344, "learning_rate": 3.119949848407845e-06, "loss": 21.6476, "step": 6914 }, { "epoch": 0.6417633410672854, "grad_norm": 46.4661979675293, "learning_rate": 3.1185279710053433e-06, "loss": 21.7584, "step": 6915 }, { "epoch": 0.6418561484918793, "grad_norm": 37.605430603027344, "learning_rate": 3.1171062708327247e-06, "loss": 22.8908, "step": 6916 }, { "epoch": 0.6419489559164733, "grad_norm": 39.72312545776367, "learning_rate": 3.1156847480239107e-06, "loss": 22.6241, "step": 6917 }, { "epoch": 0.6420417633410673, "grad_norm": 36.38208770751953, "learning_rate": 3.114263402712807e-06, "loss": 22.5023, "step": 6918 }, { "epoch": 0.6421345707656613, "grad_norm": 35.41022872924805, "learning_rate": 3.112842235033297e-06, "loss": 23.051, "step": 6919 }, { "epoch": 0.6422273781902552, "grad_norm": 44.4876594543457, "learning_rate": 3.1114212451192553e-06, "loss": 21.0353, "step": 6920 }, { "epoch": 0.6423201856148492, "grad_norm": 40.22939682006836, "learning_rate": 3.110000433104532e-06, "loss": 22.472, "step": 6921 }, { "epoch": 0.6424129930394431, "grad_norm": 44.67967224121094, "learning_rate": 3.1085797991229645e-06, "loss": 21.7019, "step": 6922 }, { "epoch": 0.6425058004640372, "grad_norm": 35.34773254394531, "learning_rate": 3.1071593433083756e-06, "loss": 22.5563, "step": 6923 }, { "epoch": 0.6425986078886311, "grad_norm": 40.016265869140625, "learning_rate": 3.105739065794565e-06, "loss": 20.7773, "step": 6924 }, { "epoch": 0.642691415313225, "grad_norm": 38.87164306640625, "learning_rate": 3.104318966715322e-06, "loss": 20.3209, "step": 6925 }, { "epoch": 0.642784222737819, "grad_norm": 38.863040924072266, "learning_rate": 3.1028990462044163e-06, "loss": 23.4577, "step": 6926 }, { "epoch": 0.642877030162413, "grad_norm": 38.16462326049805, "learning_rate": 3.1014793043955976e-06, "loss": 22.1584, "step": 6927 }, { "epoch": 0.642969837587007, "grad_norm": 45.39004135131836, "learning_rate": 3.100059741422604e-06, "loss": 22.04, "step": 6928 }, { "epoch": 0.6430626450116009, "grad_norm": 39.220680236816406, "learning_rate": 3.098640357419158e-06, "loss": 23.1542, "step": 6929 }, { "epoch": 0.6431554524361949, "grad_norm": 41.316368103027344, "learning_rate": 3.0972211525189566e-06, "loss": 22.3112, "step": 6930 }, { "epoch": 0.6432482598607888, "grad_norm": 39.91946029663086, "learning_rate": 3.095802126855687e-06, "loss": 21.0865, "step": 6931 }, { "epoch": 0.6433410672853829, "grad_norm": 43.990516662597656, "learning_rate": 3.09438328056302e-06, "loss": 23.3445, "step": 6932 }, { "epoch": 0.6434338747099768, "grad_norm": 39.218475341796875, "learning_rate": 3.0929646137746054e-06, "loss": 23.1749, "step": 6933 }, { "epoch": 0.6435266821345708, "grad_norm": 41.453731536865234, "learning_rate": 3.0915461266240776e-06, "loss": 25.2266, "step": 6934 }, { "epoch": 0.6436194895591647, "grad_norm": 41.23075866699219, "learning_rate": 3.0901278192450556e-06, "loss": 23.7495, "step": 6935 }, { "epoch": 0.6437122969837586, "grad_norm": 42.92848587036133, "learning_rate": 3.0887096917711408e-06, "loss": 24.3566, "step": 6936 }, { "epoch": 0.6438051044083527, "grad_norm": 43.23047637939453, "learning_rate": 3.0872917443359152e-06, "loss": 22.7495, "step": 6937 }, { "epoch": 0.6438979118329466, "grad_norm": 38.55507278442383, "learning_rate": 3.0858739770729472e-06, "loss": 24.1925, "step": 6938 }, { "epoch": 0.6439907192575406, "grad_norm": 40.23764419555664, "learning_rate": 3.0844563901157863e-06, "loss": 21.6736, "step": 6939 }, { "epoch": 0.6440835266821345, "grad_norm": 43.591068267822266, "learning_rate": 3.0830389835979667e-06, "loss": 23.8256, "step": 6940 }, { "epoch": 0.6441763341067286, "grad_norm": 50.761505126953125, "learning_rate": 3.0816217576530043e-06, "loss": 23.9174, "step": 6941 }, { "epoch": 0.6442691415313225, "grad_norm": 35.38566589355469, "learning_rate": 3.0802047124143964e-06, "loss": 21.7969, "step": 6942 }, { "epoch": 0.6443619489559165, "grad_norm": 37.60525131225586, "learning_rate": 3.0787878480156274e-06, "loss": 23.675, "step": 6943 }, { "epoch": 0.6444547563805104, "grad_norm": 41.05217361450195, "learning_rate": 3.077371164590161e-06, "loss": 22.516, "step": 6944 }, { "epoch": 0.6445475638051044, "grad_norm": 39.16401290893555, "learning_rate": 3.0759546622714443e-06, "loss": 23.5221, "step": 6945 }, { "epoch": 0.6446403712296984, "grad_norm": 44.22963333129883, "learning_rate": 3.0745383411929097e-06, "loss": 23.3109, "step": 6946 }, { "epoch": 0.6447331786542924, "grad_norm": 37.443443298339844, "learning_rate": 3.0731222014879725e-06, "loss": 22.7912, "step": 6947 }, { "epoch": 0.6448259860788863, "grad_norm": 41.10041809082031, "learning_rate": 3.071706243290026e-06, "loss": 23.2867, "step": 6948 }, { "epoch": 0.6449187935034802, "grad_norm": 45.14147186279297, "learning_rate": 3.0702904667324506e-06, "loss": 22.3231, "step": 6949 }, { "epoch": 0.6450116009280742, "grad_norm": 43.49644470214844, "learning_rate": 3.068874871948613e-06, "loss": 23.3823, "step": 6950 }, { "epoch": 0.6451044083526682, "grad_norm": 38.98332595825195, "learning_rate": 3.067459459071852e-06, "loss": 21.8976, "step": 6951 }, { "epoch": 0.6451972157772622, "grad_norm": 44.33964538574219, "learning_rate": 3.0660442282355e-06, "loss": 23.3635, "step": 6952 }, { "epoch": 0.6452900232018561, "grad_norm": 41.37635040283203, "learning_rate": 3.06462917957287e-06, "loss": 23.1102, "step": 6953 }, { "epoch": 0.6453828306264501, "grad_norm": 41.736934661865234, "learning_rate": 3.0632143132172503e-06, "loss": 23.4787, "step": 6954 }, { "epoch": 0.6454756380510441, "grad_norm": 49.25402069091797, "learning_rate": 3.0617996293019214e-06, "loss": 22.9833, "step": 6955 }, { "epoch": 0.6455684454756381, "grad_norm": 42.29207992553711, "learning_rate": 3.0603851279601444e-06, "loss": 22.5304, "step": 6956 }, { "epoch": 0.645661252900232, "grad_norm": 44.57666015625, "learning_rate": 3.058970809325158e-06, "loss": 23.1205, "step": 6957 }, { "epoch": 0.645754060324826, "grad_norm": 37.52349090576172, "learning_rate": 3.0575566735301906e-06, "loss": 24.0627, "step": 6958 }, { "epoch": 0.6458468677494199, "grad_norm": 40.15874099731445, "learning_rate": 3.056142720708447e-06, "loss": 22.9541, "step": 6959 }, { "epoch": 0.645939675174014, "grad_norm": 43.6563606262207, "learning_rate": 3.0547289509931194e-06, "loss": 23.5899, "step": 6960 }, { "epoch": 0.6460324825986079, "grad_norm": 46.84969711303711, "learning_rate": 3.053315364517384e-06, "loss": 23.8629, "step": 6961 }, { "epoch": 0.6461252900232018, "grad_norm": 39.822898864746094, "learning_rate": 3.051901961414392e-06, "loss": 23.5378, "step": 6962 }, { "epoch": 0.6462180974477958, "grad_norm": 38.06345748901367, "learning_rate": 3.0504887418172856e-06, "loss": 22.0862, "step": 6963 }, { "epoch": 0.6463109048723898, "grad_norm": 46.55622863769531, "learning_rate": 3.049075705859188e-06, "loss": 22.4274, "step": 6964 }, { "epoch": 0.6464037122969838, "grad_norm": 39.50572204589844, "learning_rate": 3.047662853673199e-06, "loss": 22.6573, "step": 6965 }, { "epoch": 0.6464965197215777, "grad_norm": 40.069149017333984, "learning_rate": 3.0462501853924088e-06, "loss": 24.1555, "step": 6966 }, { "epoch": 0.6465893271461717, "grad_norm": 36.68749237060547, "learning_rate": 3.044837701149888e-06, "loss": 22.2368, "step": 6967 }, { "epoch": 0.6466821345707656, "grad_norm": 43.773990631103516, "learning_rate": 3.0434254010786856e-06, "loss": 21.4186, "step": 6968 }, { "epoch": 0.6467749419953597, "grad_norm": 45.68836975097656, "learning_rate": 3.042013285311839e-06, "loss": 23.8934, "step": 6969 }, { "epoch": 0.6468677494199536, "grad_norm": 39.806636810302734, "learning_rate": 3.0406013539823675e-06, "loss": 24.5186, "step": 6970 }, { "epoch": 0.6469605568445476, "grad_norm": 41.671661376953125, "learning_rate": 3.039189607223268e-06, "loss": 23.7399, "step": 6971 }, { "epoch": 0.6470533642691415, "grad_norm": 44.25344467163086, "learning_rate": 3.0377780451675243e-06, "loss": 23.5495, "step": 6972 }, { "epoch": 0.6471461716937355, "grad_norm": 43.26245880126953, "learning_rate": 3.0363666679481053e-06, "loss": 22.2478, "step": 6973 }, { "epoch": 0.6472389791183295, "grad_norm": 42.8383903503418, "learning_rate": 3.034955475697954e-06, "loss": 22.9017, "step": 6974 }, { "epoch": 0.6473317865429234, "grad_norm": 37.27983856201172, "learning_rate": 3.0335444685500037e-06, "loss": 23.482, "step": 6975 }, { "epoch": 0.6474245939675174, "grad_norm": 40.575584411621094, "learning_rate": 3.03213364663717e-06, "loss": 22.1702, "step": 6976 }, { "epoch": 0.6475174013921113, "grad_norm": 44.48404312133789, "learning_rate": 3.0307230100923434e-06, "loss": 23.6655, "step": 6977 }, { "epoch": 0.6476102088167054, "grad_norm": 41.92116928100586, "learning_rate": 3.029312559048406e-06, "loss": 24.4625, "step": 6978 }, { "epoch": 0.6477030162412993, "grad_norm": 37.854366302490234, "learning_rate": 3.027902293638219e-06, "loss": 23.7048, "step": 6979 }, { "epoch": 0.6477958236658933, "grad_norm": 44.106021881103516, "learning_rate": 3.0264922139946252e-06, "loss": 23.6795, "step": 6980 }, { "epoch": 0.6478886310904872, "grad_norm": 36.93779754638672, "learning_rate": 3.0250823202504476e-06, "loss": 21.7479, "step": 6981 }, { "epoch": 0.6479814385150812, "grad_norm": 37.72982406616211, "learning_rate": 3.0236726125384996e-06, "loss": 23.6421, "step": 6982 }, { "epoch": 0.6480742459396752, "grad_norm": 38.25825500488281, "learning_rate": 3.0222630909915685e-06, "loss": 22.4514, "step": 6983 }, { "epoch": 0.6481670533642692, "grad_norm": 35.09624481201172, "learning_rate": 3.020853755742428e-06, "loss": 24.567, "step": 6984 }, { "epoch": 0.6482598607888631, "grad_norm": 41.25132751464844, "learning_rate": 3.019444606923836e-06, "loss": 24.9891, "step": 6985 }, { "epoch": 0.648352668213457, "grad_norm": 42.518882751464844, "learning_rate": 3.018035644668528e-06, "loss": 23.1332, "step": 6986 }, { "epoch": 0.648445475638051, "grad_norm": 37.69206619262695, "learning_rate": 3.016626869109227e-06, "loss": 22.5047, "step": 6987 }, { "epoch": 0.648538283062645, "grad_norm": 40.30773162841797, "learning_rate": 3.015218280378635e-06, "loss": 22.3227, "step": 6988 }, { "epoch": 0.648631090487239, "grad_norm": 37.45903396606445, "learning_rate": 3.013809878609437e-06, "loss": 21.9971, "step": 6989 }, { "epoch": 0.6487238979118329, "grad_norm": 39.335723876953125, "learning_rate": 3.0124016639343023e-06, "loss": 22.6092, "step": 6990 }, { "epoch": 0.6488167053364269, "grad_norm": 139.27252197265625, "learning_rate": 3.01099363648588e-06, "loss": 22.9006, "step": 6991 }, { "epoch": 0.6489095127610209, "grad_norm": 44.656490325927734, "learning_rate": 3.0095857963968024e-06, "loss": 20.9019, "step": 6992 }, { "epoch": 0.6490023201856149, "grad_norm": 42.179744720458984, "learning_rate": 3.0081781437996866e-06, "loss": 22.7525, "step": 6993 }, { "epoch": 0.6490951276102088, "grad_norm": 38.76191711425781, "learning_rate": 3.0067706788271267e-06, "loss": 23.8958, "step": 6994 }, { "epoch": 0.6491879350348028, "grad_norm": 40.01629638671875, "learning_rate": 3.005363401611704e-06, "loss": 23.2116, "step": 6995 }, { "epoch": 0.6492807424593967, "grad_norm": 40.82072448730469, "learning_rate": 3.0039563122859815e-06, "loss": 22.4522, "step": 6996 }, { "epoch": 0.6493735498839908, "grad_norm": 41.04677200317383, "learning_rate": 3.0025494109825014e-06, "loss": 22.3014, "step": 6997 }, { "epoch": 0.6494663573085847, "grad_norm": 57.97917938232422, "learning_rate": 3.001142697833791e-06, "loss": 21.9953, "step": 6998 }, { "epoch": 0.6495591647331787, "grad_norm": 52.323970794677734, "learning_rate": 2.9997361729723606e-06, "loss": 22.5156, "step": 6999 }, { "epoch": 0.6496519721577726, "grad_norm": 43.49831771850586, "learning_rate": 2.998329836530699e-06, "loss": 23.8533, "step": 7000 }, { "epoch": 0.6497447795823665, "grad_norm": 39.88275909423828, "learning_rate": 2.99692368864128e-06, "loss": 21.9917, "step": 7001 }, { "epoch": 0.6498375870069606, "grad_norm": 53.68717575073242, "learning_rate": 2.995517729436561e-06, "loss": 22.3402, "step": 7002 }, { "epoch": 0.6499303944315545, "grad_norm": 42.109554290771484, "learning_rate": 2.9941119590489777e-06, "loss": 22.8593, "step": 7003 }, { "epoch": 0.6500232018561485, "grad_norm": 42.29207992553711, "learning_rate": 2.99270637761095e-06, "loss": 24.5214, "step": 7004 }, { "epoch": 0.6501160092807424, "grad_norm": 46.63147735595703, "learning_rate": 2.991300985254883e-06, "loss": 22.6697, "step": 7005 }, { "epoch": 0.6502088167053365, "grad_norm": 37.841094970703125, "learning_rate": 2.989895782113157e-06, "loss": 22.8169, "step": 7006 }, { "epoch": 0.6503016241299304, "grad_norm": 40.26519012451172, "learning_rate": 2.9884907683181407e-06, "loss": 23.1852, "step": 7007 }, { "epoch": 0.6503944315545244, "grad_norm": 38.77997970581055, "learning_rate": 2.9870859440021845e-06, "loss": 23.5261, "step": 7008 }, { "epoch": 0.6504872389791183, "grad_norm": 44.439422607421875, "learning_rate": 2.9856813092976155e-06, "loss": 24.0275, "step": 7009 }, { "epoch": 0.6505800464037123, "grad_norm": 40.891109466552734, "learning_rate": 2.984276864336749e-06, "loss": 23.0493, "step": 7010 }, { "epoch": 0.6506728538283063, "grad_norm": 42.332847595214844, "learning_rate": 2.9828726092518813e-06, "loss": 23.0067, "step": 7011 }, { "epoch": 0.6507656612529003, "grad_norm": 42.09808349609375, "learning_rate": 2.9814685441752867e-06, "loss": 23.8578, "step": 7012 }, { "epoch": 0.6508584686774942, "grad_norm": 44.451744079589844, "learning_rate": 2.9800646692392255e-06, "loss": 25.1987, "step": 7013 }, { "epoch": 0.6509512761020881, "grad_norm": 44.75514602661133, "learning_rate": 2.9786609845759416e-06, "loss": 23.3174, "step": 7014 }, { "epoch": 0.6510440835266821, "grad_norm": 45.48393630981445, "learning_rate": 2.9772574903176556e-06, "loss": 22.9489, "step": 7015 }, { "epoch": 0.6511368909512761, "grad_norm": 44.468040466308594, "learning_rate": 2.975854186596574e-06, "loss": 22.9505, "step": 7016 }, { "epoch": 0.6512296983758701, "grad_norm": 37.748958587646484, "learning_rate": 2.974451073544886e-06, "loss": 22.8598, "step": 7017 }, { "epoch": 0.651322505800464, "grad_norm": 60.18708801269531, "learning_rate": 2.9730481512947578e-06, "loss": 21.9724, "step": 7018 }, { "epoch": 0.651415313225058, "grad_norm": 48.3850212097168, "learning_rate": 2.9716454199783436e-06, "loss": 24.5782, "step": 7019 }, { "epoch": 0.651508120649652, "grad_norm": 43.511966705322266, "learning_rate": 2.970242879727778e-06, "loss": 23.4575, "step": 7020 }, { "epoch": 0.651600928074246, "grad_norm": 43.35303497314453, "learning_rate": 2.9688405306751743e-06, "loss": 23.6677, "step": 7021 }, { "epoch": 0.6516937354988399, "grad_norm": 36.749053955078125, "learning_rate": 2.9674383729526313e-06, "loss": 22.5838, "step": 7022 }, { "epoch": 0.6517865429234339, "grad_norm": 42.15507125854492, "learning_rate": 2.9660364066922303e-06, "loss": 22.889, "step": 7023 }, { "epoch": 0.6518793503480278, "grad_norm": 40.163509368896484, "learning_rate": 2.96463463202603e-06, "loss": 22.5881, "step": 7024 }, { "epoch": 0.6519721577726219, "grad_norm": 47.9214973449707, "learning_rate": 2.963233049086075e-06, "loss": 24.4832, "step": 7025 }, { "epoch": 0.6520649651972158, "grad_norm": 44.65445327758789, "learning_rate": 2.9618316580043915e-06, "loss": 23.9864, "step": 7026 }, { "epoch": 0.6521577726218097, "grad_norm": 44.30995559692383, "learning_rate": 2.960430458912988e-06, "loss": 25.4724, "step": 7027 }, { "epoch": 0.6522505800464037, "grad_norm": 39.48889923095703, "learning_rate": 2.9590294519438524e-06, "loss": 23.3665, "step": 7028 }, { "epoch": 0.6523433874709976, "grad_norm": 41.24784851074219, "learning_rate": 2.957628637228954e-06, "loss": 22.583, "step": 7029 }, { "epoch": 0.6524361948955917, "grad_norm": 41.46567916870117, "learning_rate": 2.956228014900251e-06, "loss": 23.0378, "step": 7030 }, { "epoch": 0.6525290023201856, "grad_norm": 40.65157699584961, "learning_rate": 2.9548275850896746e-06, "loss": 22.7549, "step": 7031 }, { "epoch": 0.6526218097447796, "grad_norm": 42.4113883972168, "learning_rate": 2.953427347929142e-06, "loss": 24.4794, "step": 7032 }, { "epoch": 0.6527146171693735, "grad_norm": 39.44357681274414, "learning_rate": 2.952027303550553e-06, "loss": 23.2444, "step": 7033 }, { "epoch": 0.6528074245939676, "grad_norm": 42.74712371826172, "learning_rate": 2.95062745208579e-06, "loss": 23.6175, "step": 7034 }, { "epoch": 0.6529002320185615, "grad_norm": 45.27143478393555, "learning_rate": 2.9492277936667115e-06, "loss": 22.3304, "step": 7035 }, { "epoch": 0.6529930394431555, "grad_norm": 38.788978576660156, "learning_rate": 2.947828328425163e-06, "loss": 22.7315, "step": 7036 }, { "epoch": 0.6530858468677494, "grad_norm": 46.339481353759766, "learning_rate": 2.9464290564929743e-06, "loss": 23.1828, "step": 7037 }, { "epoch": 0.6531786542923433, "grad_norm": 43.135894775390625, "learning_rate": 2.9450299780019476e-06, "loss": 22.8317, "step": 7038 }, { "epoch": 0.6532714617169374, "grad_norm": 46.00092697143555, "learning_rate": 2.943631093083875e-06, "loss": 23.9769, "step": 7039 }, { "epoch": 0.6533642691415313, "grad_norm": 45.561180114746094, "learning_rate": 2.942232401870531e-06, "loss": 21.9337, "step": 7040 }, { "epoch": 0.6534570765661253, "grad_norm": 41.51706314086914, "learning_rate": 2.9408339044936635e-06, "loss": 23.0264, "step": 7041 }, { "epoch": 0.6535498839907192, "grad_norm": 41.9720458984375, "learning_rate": 2.93943560108501e-06, "loss": 22.7085, "step": 7042 }, { "epoch": 0.6536426914153132, "grad_norm": 39.17914581298828, "learning_rate": 2.938037491776288e-06, "loss": 23.8639, "step": 7043 }, { "epoch": 0.6537354988399072, "grad_norm": 44.957183837890625, "learning_rate": 2.936639576699194e-06, "loss": 22.6824, "step": 7044 }, { "epoch": 0.6538283062645012, "grad_norm": 40.396629333496094, "learning_rate": 2.935241855985408e-06, "loss": 22.9706, "step": 7045 }, { "epoch": 0.6539211136890951, "grad_norm": 40.67278289794922, "learning_rate": 2.933844329766594e-06, "loss": 24.4639, "step": 7046 }, { "epoch": 0.6540139211136891, "grad_norm": 44.280128479003906, "learning_rate": 2.932446998174393e-06, "loss": 22.7075, "step": 7047 }, { "epoch": 0.6541067285382831, "grad_norm": 45.113868713378906, "learning_rate": 2.9310498613404305e-06, "loss": 25.9974, "step": 7048 }, { "epoch": 0.654199535962877, "grad_norm": 39.737342834472656, "learning_rate": 2.9296529193963162e-06, "loss": 23.5855, "step": 7049 }, { "epoch": 0.654292343387471, "grad_norm": 43.511417388916016, "learning_rate": 2.9282561724736335e-06, "loss": 25.0111, "step": 7050 }, { "epoch": 0.6543851508120649, "grad_norm": 50.3613395690918, "learning_rate": 2.926859620703956e-06, "loss": 23.8789, "step": 7051 }, { "epoch": 0.6544779582366589, "grad_norm": 41.5122184753418, "learning_rate": 2.925463264218835e-06, "loss": 23.6905, "step": 7052 }, { "epoch": 0.6545707656612529, "grad_norm": 41.39643096923828, "learning_rate": 2.9240671031498015e-06, "loss": 22.5044, "step": 7053 }, { "epoch": 0.6546635730858469, "grad_norm": 41.642173767089844, "learning_rate": 2.9226711376283716e-06, "loss": 23.3926, "step": 7054 }, { "epoch": 0.6547563805104408, "grad_norm": 46.002498626708984, "learning_rate": 2.921275367786044e-06, "loss": 22.4376, "step": 7055 }, { "epoch": 0.6548491879350348, "grad_norm": 39.435794830322266, "learning_rate": 2.9198797937542935e-06, "loss": 22.8935, "step": 7056 }, { "epoch": 0.6549419953596288, "grad_norm": 38.843849182128906, "learning_rate": 2.91848441566458e-06, "loss": 23.3559, "step": 7057 }, { "epoch": 0.6550348027842228, "grad_norm": 42.30180740356445, "learning_rate": 2.917089233648346e-06, "loss": 23.6592, "step": 7058 }, { "epoch": 0.6551276102088167, "grad_norm": 40.30683517456055, "learning_rate": 2.915694247837014e-06, "loss": 23.5004, "step": 7059 }, { "epoch": 0.6552204176334107, "grad_norm": 40.5025634765625, "learning_rate": 2.9142994583619856e-06, "loss": 22.649, "step": 7060 }, { "epoch": 0.6553132250580046, "grad_norm": 57.76958465576172, "learning_rate": 2.912904865354652e-06, "loss": 21.9898, "step": 7061 }, { "epoch": 0.6554060324825987, "grad_norm": 41.03461837768555, "learning_rate": 2.9115104689463724e-06, "loss": 20.7835, "step": 7062 }, { "epoch": 0.6554988399071926, "grad_norm": 40.83952331542969, "learning_rate": 2.9101162692685047e-06, "loss": 22.1683, "step": 7063 }, { "epoch": 0.6555916473317865, "grad_norm": 42.025177001953125, "learning_rate": 2.9087222664523697e-06, "loss": 22.4717, "step": 7064 }, { "epoch": 0.6556844547563805, "grad_norm": 40.982872009277344, "learning_rate": 2.907328460629284e-06, "loss": 23.4206, "step": 7065 }, { "epoch": 0.6557772621809744, "grad_norm": 65.51336669921875, "learning_rate": 2.905934851930541e-06, "loss": 21.8587, "step": 7066 }, { "epoch": 0.6558700696055685, "grad_norm": 41.761898040771484, "learning_rate": 2.9045414404874135e-06, "loss": 22.2093, "step": 7067 }, { "epoch": 0.6559628770301624, "grad_norm": 46.87989044189453, "learning_rate": 2.903148226431155e-06, "loss": 23.1572, "step": 7068 }, { "epoch": 0.6560556844547564, "grad_norm": 43.88209533691406, "learning_rate": 2.9017552098930103e-06, "loss": 23.7793, "step": 7069 }, { "epoch": 0.6561484918793503, "grad_norm": 48.2603759765625, "learning_rate": 2.900362391004189e-06, "loss": 23.0524, "step": 7070 }, { "epoch": 0.6562412993039444, "grad_norm": 44.78620147705078, "learning_rate": 2.898969769895897e-06, "loss": 21.1688, "step": 7071 }, { "epoch": 0.6563341067285383, "grad_norm": 42.70005798339844, "learning_rate": 2.8975773466993136e-06, "loss": 22.0547, "step": 7072 }, { "epoch": 0.6564269141531323, "grad_norm": 40.47239685058594, "learning_rate": 2.8961851215456015e-06, "loss": 23.6782, "step": 7073 }, { "epoch": 0.6565197215777262, "grad_norm": 50.9987907409668, "learning_rate": 2.8947930945659043e-06, "loss": 23.8358, "step": 7074 }, { "epoch": 0.6566125290023201, "grad_norm": 42.56599807739258, "learning_rate": 2.8934012658913513e-06, "loss": 21.3328, "step": 7075 }, { "epoch": 0.6567053364269142, "grad_norm": 47.69123077392578, "learning_rate": 2.8920096356530424e-06, "loss": 24.3634, "step": 7076 }, { "epoch": 0.6567981438515081, "grad_norm": 53.20855712890625, "learning_rate": 2.8906182039820707e-06, "loss": 22.7599, "step": 7077 }, { "epoch": 0.6568909512761021, "grad_norm": 38.84931564331055, "learning_rate": 2.8892269710095045e-06, "loss": 24.4542, "step": 7078 }, { "epoch": 0.656983758700696, "grad_norm": 39.5160026550293, "learning_rate": 2.8878359368663945e-06, "loss": 23.0214, "step": 7079 }, { "epoch": 0.65707656612529, "grad_norm": 37.68891143798828, "learning_rate": 2.8864451016837703e-06, "loss": 22.8705, "step": 7080 }, { "epoch": 0.657169373549884, "grad_norm": 41.38079071044922, "learning_rate": 2.88505446559265e-06, "loss": 22.298, "step": 7081 }, { "epoch": 0.657262180974478, "grad_norm": 40.015506744384766, "learning_rate": 2.8836640287240213e-06, "loss": 24.2328, "step": 7082 }, { "epoch": 0.6573549883990719, "grad_norm": 40.0511589050293, "learning_rate": 2.882273791208865e-06, "loss": 23.8553, "step": 7083 }, { "epoch": 0.6574477958236659, "grad_norm": 40.676753997802734, "learning_rate": 2.8808837531781374e-06, "loss": 23.1303, "step": 7084 }, { "epoch": 0.6575406032482599, "grad_norm": 36.10685729980469, "learning_rate": 2.879493914762774e-06, "loss": 21.6772, "step": 7085 }, { "epoch": 0.6576334106728539, "grad_norm": 45.27485656738281, "learning_rate": 2.878104276093695e-06, "loss": 23.3015, "step": 7086 }, { "epoch": 0.6577262180974478, "grad_norm": 39.04619598388672, "learning_rate": 2.8767148373018045e-06, "loss": 22.6187, "step": 7087 }, { "epoch": 0.6578190255220417, "grad_norm": 48.35505294799805, "learning_rate": 2.8753255985179773e-06, "loss": 25.538, "step": 7088 }, { "epoch": 0.6579118329466357, "grad_norm": 45.57468795776367, "learning_rate": 2.8739365598730816e-06, "loss": 23.5147, "step": 7089 }, { "epoch": 0.6580046403712297, "grad_norm": 39.16690444946289, "learning_rate": 2.87254772149796e-06, "loss": 22.5628, "step": 7090 }, { "epoch": 0.6580974477958237, "grad_norm": 37.76374053955078, "learning_rate": 2.871159083523436e-06, "loss": 22.5537, "step": 7091 }, { "epoch": 0.6581902552204176, "grad_norm": 42.3433952331543, "learning_rate": 2.869770646080316e-06, "loss": 21.8116, "step": 7092 }, { "epoch": 0.6582830626450116, "grad_norm": 45.55068588256836, "learning_rate": 2.8683824092993897e-06, "loss": 22.4554, "step": 7093 }, { "epoch": 0.6583758700696055, "grad_norm": 53.118011474609375, "learning_rate": 2.8669943733114236e-06, "loss": 23.3371, "step": 7094 }, { "epoch": 0.6584686774941996, "grad_norm": 44.637725830078125, "learning_rate": 2.865606538247168e-06, "loss": 22.5436, "step": 7095 }, { "epoch": 0.6585614849187935, "grad_norm": 42.122100830078125, "learning_rate": 2.8642189042373526e-06, "loss": 24.9476, "step": 7096 }, { "epoch": 0.6586542923433875, "grad_norm": 49.61283874511719, "learning_rate": 2.862831471412689e-06, "loss": 24.7919, "step": 7097 }, { "epoch": 0.6587470997679814, "grad_norm": 45.60297775268555, "learning_rate": 2.8614442399038713e-06, "loss": 23.3361, "step": 7098 }, { "epoch": 0.6588399071925755, "grad_norm": 49.02080154418945, "learning_rate": 2.860057209841569e-06, "loss": 22.4252, "step": 7099 }, { "epoch": 0.6589327146171694, "grad_norm": 41.01652908325195, "learning_rate": 2.858670381356442e-06, "loss": 22.415, "step": 7100 }, { "epoch": 0.6590255220417633, "grad_norm": 46.16868591308594, "learning_rate": 2.8572837545791244e-06, "loss": 22.0433, "step": 7101 }, { "epoch": 0.6591183294663573, "grad_norm": 41.613521575927734, "learning_rate": 2.855897329640232e-06, "loss": 23.4277, "step": 7102 }, { "epoch": 0.6592111368909512, "grad_norm": 39.625022888183594, "learning_rate": 2.854511106670361e-06, "loss": 23.2557, "step": 7103 }, { "epoch": 0.6593039443155453, "grad_norm": 38.71010971069336, "learning_rate": 2.853125085800096e-06, "loss": 23.7384, "step": 7104 }, { "epoch": 0.6593967517401392, "grad_norm": 40.93933868408203, "learning_rate": 2.851739267159988e-06, "loss": 21.4066, "step": 7105 }, { "epoch": 0.6594895591647332, "grad_norm": 38.39805221557617, "learning_rate": 2.8503536508805853e-06, "loss": 24.684, "step": 7106 }, { "epoch": 0.6595823665893271, "grad_norm": 40.8694953918457, "learning_rate": 2.8489682370924054e-06, "loss": 26.0953, "step": 7107 }, { "epoch": 0.6596751740139211, "grad_norm": 37.00065994262695, "learning_rate": 2.8475830259259516e-06, "loss": 24.1932, "step": 7108 }, { "epoch": 0.6597679814385151, "grad_norm": 37.45126724243164, "learning_rate": 2.8461980175117064e-06, "loss": 24.133, "step": 7109 }, { "epoch": 0.6598607888631091, "grad_norm": 42.47001647949219, "learning_rate": 2.8448132119801387e-06, "loss": 25.0594, "step": 7110 }, { "epoch": 0.659953596287703, "grad_norm": 37.30350875854492, "learning_rate": 2.843428609461685e-06, "loss": 23.9334, "step": 7111 }, { "epoch": 0.660046403712297, "grad_norm": 34.4742546081543, "learning_rate": 2.8420442100867795e-06, "loss": 22.3392, "step": 7112 }, { "epoch": 0.660139211136891, "grad_norm": 36.1839599609375, "learning_rate": 2.8406600139858255e-06, "loss": 22.5485, "step": 7113 }, { "epoch": 0.660232018561485, "grad_norm": 43.5017204284668, "learning_rate": 2.839276021289211e-06, "loss": 22.7637, "step": 7114 }, { "epoch": 0.6603248259860789, "grad_norm": 42.10643005371094, "learning_rate": 2.8378922321273033e-06, "loss": 23.986, "step": 7115 }, { "epoch": 0.6604176334106728, "grad_norm": 36.38426971435547, "learning_rate": 2.836508646630457e-06, "loss": 23.8457, "step": 7116 }, { "epoch": 0.6605104408352668, "grad_norm": 38.151123046875, "learning_rate": 2.835125264928995e-06, "loss": 22.9167, "step": 7117 }, { "epoch": 0.6606032482598608, "grad_norm": 41.43521499633789, "learning_rate": 2.833742087153234e-06, "loss": 22.3681, "step": 7118 }, { "epoch": 0.6606960556844548, "grad_norm": 46.5932731628418, "learning_rate": 2.832359113433463e-06, "loss": 25.0909, "step": 7119 }, { "epoch": 0.6607888631090487, "grad_norm": 41.585514068603516, "learning_rate": 2.830976343899956e-06, "loss": 24.3203, "step": 7120 }, { "epoch": 0.6608816705336427, "grad_norm": 42.011024475097656, "learning_rate": 2.829593778682964e-06, "loss": 22.049, "step": 7121 }, { "epoch": 0.6609744779582366, "grad_norm": 49.14251708984375, "learning_rate": 2.828211417912727e-06, "loss": 26.8909, "step": 7122 }, { "epoch": 0.6610672853828307, "grad_norm": 34.83427047729492, "learning_rate": 2.8268292617194515e-06, "loss": 22.2774, "step": 7123 }, { "epoch": 0.6611600928074246, "grad_norm": 37.74324035644531, "learning_rate": 2.8254473102333398e-06, "loss": 20.2186, "step": 7124 }, { "epoch": 0.6612529002320185, "grad_norm": 41.60801315307617, "learning_rate": 2.8240655635845653e-06, "loss": 23.4707, "step": 7125 }, { "epoch": 0.6613457076566125, "grad_norm": 43.21218490600586, "learning_rate": 2.8226840219032857e-06, "loss": 27.9109, "step": 7126 }, { "epoch": 0.6614385150812065, "grad_norm": 41.897762298583984, "learning_rate": 2.8213026853196367e-06, "loss": 24.2876, "step": 7127 }, { "epoch": 0.6615313225058005, "grad_norm": 42.797298431396484, "learning_rate": 2.8199215539637427e-06, "loss": 23.6028, "step": 7128 }, { "epoch": 0.6616241299303944, "grad_norm": 40.25126647949219, "learning_rate": 2.8185406279656945e-06, "loss": 22.5668, "step": 7129 }, { "epoch": 0.6617169373549884, "grad_norm": 42.37539291381836, "learning_rate": 2.817159907455578e-06, "loss": 21.9242, "step": 7130 }, { "epoch": 0.6618097447795823, "grad_norm": 42.67807388305664, "learning_rate": 2.815779392563452e-06, "loss": 23.7734, "step": 7131 }, { "epoch": 0.6619025522041764, "grad_norm": 40.43989181518555, "learning_rate": 2.814399083419357e-06, "loss": 23.392, "step": 7132 }, { "epoch": 0.6619953596287703, "grad_norm": 41.27726745605469, "learning_rate": 2.8130189801533136e-06, "loss": 23.099, "step": 7133 }, { "epoch": 0.6620881670533643, "grad_norm": 40.93647384643555, "learning_rate": 2.8116390828953257e-06, "loss": 21.8395, "step": 7134 }, { "epoch": 0.6621809744779582, "grad_norm": 42.80657196044922, "learning_rate": 2.8102593917753733e-06, "loss": 24.2932, "step": 7135 }, { "epoch": 0.6622737819025521, "grad_norm": 37.54761505126953, "learning_rate": 2.808879906923424e-06, "loss": 24.2606, "step": 7136 }, { "epoch": 0.6623665893271462, "grad_norm": 37.63947296142578, "learning_rate": 2.80750062846942e-06, "loss": 22.98, "step": 7137 }, { "epoch": 0.6624593967517401, "grad_norm": 39.54462432861328, "learning_rate": 2.806121556543285e-06, "loss": 23.8373, "step": 7138 }, { "epoch": 0.6625522041763341, "grad_norm": 38.92192459106445, "learning_rate": 2.8047426912749253e-06, "loss": 24.3941, "step": 7139 }, { "epoch": 0.662645011600928, "grad_norm": 70.2791519165039, "learning_rate": 2.8033640327942235e-06, "loss": 22.0322, "step": 7140 }, { "epoch": 0.6627378190255221, "grad_norm": 40.11421585083008, "learning_rate": 2.8019855812310504e-06, "loss": 21.5995, "step": 7141 }, { "epoch": 0.662830626450116, "grad_norm": 40.72617721557617, "learning_rate": 2.80060733671525e-06, "loss": 23.4142, "step": 7142 }, { "epoch": 0.66292343387471, "grad_norm": 38.919471740722656, "learning_rate": 2.79922929937665e-06, "loss": 21.7831, "step": 7143 }, { "epoch": 0.6630162412993039, "grad_norm": 38.81745147705078, "learning_rate": 2.7978514693450575e-06, "loss": 22.5775, "step": 7144 }, { "epoch": 0.6631090487238979, "grad_norm": 41.633872985839844, "learning_rate": 2.7964738467502607e-06, "loss": 23.4396, "step": 7145 }, { "epoch": 0.6632018561484919, "grad_norm": 43.562171936035156, "learning_rate": 2.7950964317220266e-06, "loss": 22.5157, "step": 7146 }, { "epoch": 0.6632946635730859, "grad_norm": 37.809391021728516, "learning_rate": 2.7937192243901077e-06, "loss": 23.8892, "step": 7147 }, { "epoch": 0.6633874709976798, "grad_norm": 40.60794448852539, "learning_rate": 2.7923422248842315e-06, "loss": 24.3165, "step": 7148 }, { "epoch": 0.6634802784222738, "grad_norm": 40.47639465332031, "learning_rate": 2.7909654333341087e-06, "loss": 23.298, "step": 7149 }, { "epoch": 0.6635730858468677, "grad_norm": 39.92625427246094, "learning_rate": 2.7895888498694256e-06, "loss": 22.5237, "step": 7150 }, { "epoch": 0.6636658932714617, "grad_norm": 40.34151077270508, "learning_rate": 2.788212474619861e-06, "loss": 21.0197, "step": 7151 }, { "epoch": 0.6637587006960557, "grad_norm": 46.68416213989258, "learning_rate": 2.786836307715056e-06, "loss": 23.1192, "step": 7152 }, { "epoch": 0.6638515081206496, "grad_norm": 38.30409240722656, "learning_rate": 2.7854603492846504e-06, "loss": 22.9929, "step": 7153 }, { "epoch": 0.6639443155452436, "grad_norm": 41.67990493774414, "learning_rate": 2.7840845994582523e-06, "loss": 23.1545, "step": 7154 }, { "epoch": 0.6640371229698376, "grad_norm": 48.123226165771484, "learning_rate": 2.7827090583654537e-06, "loss": 24.6719, "step": 7155 }, { "epoch": 0.6641299303944316, "grad_norm": 51.579002380371094, "learning_rate": 2.7813337261358257e-06, "loss": 22.1119, "step": 7156 }, { "epoch": 0.6642227378190255, "grad_norm": 45.08042526245117, "learning_rate": 2.7799586028989273e-06, "loss": 22.2996, "step": 7157 }, { "epoch": 0.6643155452436195, "grad_norm": 39.220703125, "learning_rate": 2.778583688784283e-06, "loss": 22.4993, "step": 7158 }, { "epoch": 0.6644083526682134, "grad_norm": 67.00337982177734, "learning_rate": 2.777208983921412e-06, "loss": 22.0904, "step": 7159 }, { "epoch": 0.6645011600928075, "grad_norm": 46.73050308227539, "learning_rate": 2.7758344884398082e-06, "loss": 22.3503, "step": 7160 }, { "epoch": 0.6645939675174014, "grad_norm": 57.197330474853516, "learning_rate": 2.7744602024689426e-06, "loss": 23.6032, "step": 7161 }, { "epoch": 0.6646867749419954, "grad_norm": 42.16144943237305, "learning_rate": 2.7730861261382684e-06, "loss": 23.7604, "step": 7162 }, { "epoch": 0.6647795823665893, "grad_norm": 39.78868103027344, "learning_rate": 2.7717122595772273e-06, "loss": 22.107, "step": 7163 }, { "epoch": 0.6648723897911833, "grad_norm": 43.593074798583984, "learning_rate": 2.7703386029152246e-06, "loss": 22.2972, "step": 7164 }, { "epoch": 0.6649651972157773, "grad_norm": 44.02328109741211, "learning_rate": 2.768965156281662e-06, "loss": 22.618, "step": 7165 }, { "epoch": 0.6650580046403712, "grad_norm": 41.3726921081543, "learning_rate": 2.7675919198059125e-06, "loss": 21.7486, "step": 7166 }, { "epoch": 0.6651508120649652, "grad_norm": 42.023345947265625, "learning_rate": 2.7662188936173317e-06, "loss": 23.5696, "step": 7167 }, { "epoch": 0.6652436194895591, "grad_norm": 44.47799301147461, "learning_rate": 2.764846077845253e-06, "loss": 22.7907, "step": 7168 }, { "epoch": 0.6653364269141532, "grad_norm": 43.72517013549805, "learning_rate": 2.7634734726189983e-06, "loss": 23.1792, "step": 7169 }, { "epoch": 0.6654292343387471, "grad_norm": 40.92552185058594, "learning_rate": 2.7621010780678546e-06, "loss": 22.5993, "step": 7170 }, { "epoch": 0.6655220417633411, "grad_norm": 39.45994186401367, "learning_rate": 2.7607288943211073e-06, "loss": 23.3732, "step": 7171 }, { "epoch": 0.665614849187935, "grad_norm": 41.63050842285156, "learning_rate": 2.759356921508004e-06, "loss": 22.9508, "step": 7172 }, { "epoch": 0.665707656612529, "grad_norm": 43.054603576660156, "learning_rate": 2.757985159757787e-06, "loss": 22.2229, "step": 7173 }, { "epoch": 0.665800464037123, "grad_norm": 36.453216552734375, "learning_rate": 2.7566136091996708e-06, "loss": 23.2496, "step": 7174 }, { "epoch": 0.665893271461717, "grad_norm": 39.16847610473633, "learning_rate": 2.7552422699628527e-06, "loss": 21.3557, "step": 7175 }, { "epoch": 0.6659860788863109, "grad_norm": 45.03742599487305, "learning_rate": 2.753871142176506e-06, "loss": 21.8073, "step": 7176 }, { "epoch": 0.6660788863109048, "grad_norm": 41.15338897705078, "learning_rate": 2.752500225969793e-06, "loss": 21.5931, "step": 7177 }, { "epoch": 0.6661716937354989, "grad_norm": 47.70986557006836, "learning_rate": 2.751129521471847e-06, "loss": 21.3933, "step": 7178 }, { "epoch": 0.6662645011600928, "grad_norm": 39.18527603149414, "learning_rate": 2.7497590288117857e-06, "loss": 25.3843, "step": 7179 }, { "epoch": 0.6663573085846868, "grad_norm": 38.024078369140625, "learning_rate": 2.7483887481187067e-06, "loss": 22.4722, "step": 7180 }, { "epoch": 0.6664501160092807, "grad_norm": 40.468448638916016, "learning_rate": 2.7470186795216853e-06, "loss": 25.2102, "step": 7181 }, { "epoch": 0.6665429234338747, "grad_norm": 44.89921188354492, "learning_rate": 2.745648823149778e-06, "loss": 22.6953, "step": 7182 }, { "epoch": 0.6666357308584687, "grad_norm": 44.724761962890625, "learning_rate": 2.7442791791320246e-06, "loss": 22.5019, "step": 7183 }, { "epoch": 0.6667285382830627, "grad_norm": 38.88618469238281, "learning_rate": 2.742909747597441e-06, "loss": 21.5559, "step": 7184 }, { "epoch": 0.6668213457076566, "grad_norm": 35.116050720214844, "learning_rate": 2.741540528675023e-06, "loss": 23.4187, "step": 7185 }, { "epoch": 0.6669141531322506, "grad_norm": 39.85459899902344, "learning_rate": 2.7401715224937493e-06, "loss": 24.7798, "step": 7186 }, { "epoch": 0.6670069605568445, "grad_norm": 43.831634521484375, "learning_rate": 2.7388027291825737e-06, "loss": 23.9625, "step": 7187 }, { "epoch": 0.6670997679814386, "grad_norm": 39.74273681640625, "learning_rate": 2.737434148870437e-06, "loss": 24.4834, "step": 7188 }, { "epoch": 0.6671925754060325, "grad_norm": 38.43570327758789, "learning_rate": 2.736065781686254e-06, "loss": 22.6804, "step": 7189 }, { "epoch": 0.6672853828306264, "grad_norm": 35.51130294799805, "learning_rate": 2.7346976277589223e-06, "loss": 21.5818, "step": 7190 }, { "epoch": 0.6673781902552204, "grad_norm": 43.613895416259766, "learning_rate": 2.7333296872173173e-06, "loss": 22.8464, "step": 7191 }, { "epoch": 0.6674709976798144, "grad_norm": 42.8658332824707, "learning_rate": 2.7319619601902956e-06, "loss": 25.3317, "step": 7192 }, { "epoch": 0.6675638051044084, "grad_norm": 44.23567199707031, "learning_rate": 2.730594446806693e-06, "loss": 24.5499, "step": 7193 }, { "epoch": 0.6676566125290023, "grad_norm": 35.975337982177734, "learning_rate": 2.7292271471953287e-06, "loss": 24.5058, "step": 7194 }, { "epoch": 0.6677494199535963, "grad_norm": 44.0107421875, "learning_rate": 2.7278600614849967e-06, "loss": 22.4308, "step": 7195 }, { "epoch": 0.6678422273781902, "grad_norm": 37.497982025146484, "learning_rate": 2.726493189804473e-06, "loss": 22.4555, "step": 7196 }, { "epoch": 0.6679350348027843, "grad_norm": 47.481956481933594, "learning_rate": 2.7251265322825127e-06, "loss": 23.9982, "step": 7197 }, { "epoch": 0.6680278422273782, "grad_norm": 39.21965789794922, "learning_rate": 2.7237600890478557e-06, "loss": 22.6411, "step": 7198 }, { "epoch": 0.6681206496519722, "grad_norm": 39.36366653442383, "learning_rate": 2.7223938602292106e-06, "loss": 22.9024, "step": 7199 }, { "epoch": 0.6682134570765661, "grad_norm": 41.894447326660156, "learning_rate": 2.7210278459552786e-06, "loss": 22.2203, "step": 7200 }, { "epoch": 0.66830626450116, "grad_norm": 37.333290100097656, "learning_rate": 2.719662046354732e-06, "loss": 22.999, "step": 7201 }, { "epoch": 0.6683990719257541, "grad_norm": 40.994606018066406, "learning_rate": 2.7182964615562263e-06, "loss": 22.3376, "step": 7202 }, { "epoch": 0.668491879350348, "grad_norm": 44.511138916015625, "learning_rate": 2.716931091688394e-06, "loss": 22.2782, "step": 7203 }, { "epoch": 0.668584686774942, "grad_norm": 38.858734130859375, "learning_rate": 2.715565936879856e-06, "loss": 22.0981, "step": 7204 }, { "epoch": 0.6686774941995359, "grad_norm": 36.50456619262695, "learning_rate": 2.7142009972591975e-06, "loss": 21.2272, "step": 7205 }, { "epoch": 0.66877030162413, "grad_norm": 41.6695556640625, "learning_rate": 2.712836272955001e-06, "loss": 24.3965, "step": 7206 }, { "epoch": 0.6688631090487239, "grad_norm": 36.06101608276367, "learning_rate": 2.711471764095811e-06, "loss": 24.1063, "step": 7207 }, { "epoch": 0.6689559164733179, "grad_norm": 37.376773834228516, "learning_rate": 2.7101074708101683e-06, "loss": 21.2293, "step": 7208 }, { "epoch": 0.6690487238979118, "grad_norm": 40.28169250488281, "learning_rate": 2.7087433932265828e-06, "loss": 24.4111, "step": 7209 }, { "epoch": 0.6691415313225058, "grad_norm": 44.20171356201172, "learning_rate": 2.707379531473548e-06, "loss": 24.9871, "step": 7210 }, { "epoch": 0.6692343387470998, "grad_norm": 41.1041259765625, "learning_rate": 2.7060158856795334e-06, "loss": 24.1799, "step": 7211 }, { "epoch": 0.6693271461716938, "grad_norm": 38.810279846191406, "learning_rate": 2.704652455972997e-06, "loss": 23.0047, "step": 7212 }, { "epoch": 0.6694199535962877, "grad_norm": 39.614906311035156, "learning_rate": 2.703289242482363e-06, "loss": 22.5205, "step": 7213 }, { "epoch": 0.6695127610208816, "grad_norm": 42.6740608215332, "learning_rate": 2.7019262453360486e-06, "loss": 23.2477, "step": 7214 }, { "epoch": 0.6696055684454756, "grad_norm": 47.79921340942383, "learning_rate": 2.7005634646624412e-06, "loss": 23.1811, "step": 7215 }, { "epoch": 0.6696983758700696, "grad_norm": 40.40968704223633, "learning_rate": 2.6992009005899134e-06, "loss": 23.0269, "step": 7216 }, { "epoch": 0.6697911832946636, "grad_norm": 39.828712463378906, "learning_rate": 2.697838553246812e-06, "loss": 24.7921, "step": 7217 }, { "epoch": 0.6698839907192575, "grad_norm": 42.23349380493164, "learning_rate": 2.696476422761474e-06, "loss": 23.227, "step": 7218 }, { "epoch": 0.6699767981438515, "grad_norm": 41.56769561767578, "learning_rate": 2.6951145092621985e-06, "loss": 22.4249, "step": 7219 }, { "epoch": 0.6700696055684455, "grad_norm": 38.864776611328125, "learning_rate": 2.6937528128772816e-06, "loss": 21.9293, "step": 7220 }, { "epoch": 0.6701624129930395, "grad_norm": 37.700931549072266, "learning_rate": 2.6923913337349896e-06, "loss": 24.121, "step": 7221 }, { "epoch": 0.6702552204176334, "grad_norm": 40.27625274658203, "learning_rate": 2.6910300719635695e-06, "loss": 24.0872, "step": 7222 }, { "epoch": 0.6703480278422274, "grad_norm": 51.054561614990234, "learning_rate": 2.6896690276912486e-06, "loss": 21.6808, "step": 7223 }, { "epoch": 0.6704408352668213, "grad_norm": 46.95988082885742, "learning_rate": 2.688308201046236e-06, "loss": 22.824, "step": 7224 }, { "epoch": 0.6705336426914154, "grad_norm": 40.33201217651367, "learning_rate": 2.6869475921567167e-06, "loss": 22.3094, "step": 7225 }, { "epoch": 0.6706264501160093, "grad_norm": 39.16069030761719, "learning_rate": 2.6855872011508572e-06, "loss": 22.2662, "step": 7226 }, { "epoch": 0.6707192575406032, "grad_norm": 40.36513137817383, "learning_rate": 2.6842270281568017e-06, "loss": 22.2506, "step": 7227 }, { "epoch": 0.6708120649651972, "grad_norm": 42.55392074584961, "learning_rate": 2.6828670733026763e-06, "loss": 21.7894, "step": 7228 }, { "epoch": 0.6709048723897911, "grad_norm": 55.116981506347656, "learning_rate": 2.6815073367165823e-06, "loss": 24.5316, "step": 7229 }, { "epoch": 0.6709976798143852, "grad_norm": 45.25851058959961, "learning_rate": 2.6801478185266076e-06, "loss": 22.6737, "step": 7230 }, { "epoch": 0.6710904872389791, "grad_norm": 59.54354476928711, "learning_rate": 2.6787885188608143e-06, "loss": 22.4009, "step": 7231 }, { "epoch": 0.6711832946635731, "grad_norm": 48.0229606628418, "learning_rate": 2.6774294378472434e-06, "loss": 24.4294, "step": 7232 }, { "epoch": 0.671276102088167, "grad_norm": 51.0621452331543, "learning_rate": 2.6760705756139182e-06, "loss": 22.7371, "step": 7233 }, { "epoch": 0.6713689095127611, "grad_norm": 49.19756317138672, "learning_rate": 2.674711932288837e-06, "loss": 23.3369, "step": 7234 }, { "epoch": 0.671461716937355, "grad_norm": 41.70423889160156, "learning_rate": 2.6733535079999856e-06, "loss": 24.2216, "step": 7235 }, { "epoch": 0.671554524361949, "grad_norm": 38.226829528808594, "learning_rate": 2.6719953028753214e-06, "loss": 25.3919, "step": 7236 }, { "epoch": 0.6716473317865429, "grad_norm": 47.72661209106445, "learning_rate": 2.6706373170427845e-06, "loss": 24.0666, "step": 7237 }, { "epoch": 0.6717401392111368, "grad_norm": 44.578433990478516, "learning_rate": 2.6692795506302926e-06, "loss": 21.8494, "step": 7238 }, { "epoch": 0.6718329466357309, "grad_norm": 45.3897705078125, "learning_rate": 2.667922003765745e-06, "loss": 23.0659, "step": 7239 }, { "epoch": 0.6719257540603248, "grad_norm": 44.103187561035156, "learning_rate": 2.666564676577017e-06, "loss": 22.7718, "step": 7240 }, { "epoch": 0.6720185614849188, "grad_norm": 42.61732482910156, "learning_rate": 2.665207569191971e-06, "loss": 23.0284, "step": 7241 }, { "epoch": 0.6721113689095127, "grad_norm": 41.20086669921875, "learning_rate": 2.6638506817384346e-06, "loss": 22.0208, "step": 7242 }, { "epoch": 0.6722041763341067, "grad_norm": 43.6597900390625, "learning_rate": 2.66249401434423e-06, "loss": 22.9808, "step": 7243 }, { "epoch": 0.6722969837587007, "grad_norm": 52.21067810058594, "learning_rate": 2.6611375671371497e-06, "loss": 23.0742, "step": 7244 }, { "epoch": 0.6723897911832947, "grad_norm": 44.528297424316406, "learning_rate": 2.659781340244968e-06, "loss": 22.809, "step": 7245 }, { "epoch": 0.6724825986078886, "grad_norm": 41.25938415527344, "learning_rate": 2.658425333795435e-06, "loss": 24.134, "step": 7246 }, { "epoch": 0.6725754060324826, "grad_norm": 44.11359405517578, "learning_rate": 2.657069547916289e-06, "loss": 22.0873, "step": 7247 }, { "epoch": 0.6726682134570766, "grad_norm": 38.68235778808594, "learning_rate": 2.655713982735234e-06, "loss": 22.8565, "step": 7248 }, { "epoch": 0.6727610208816706, "grad_norm": 45.4966926574707, "learning_rate": 2.654358638379968e-06, "loss": 22.4632, "step": 7249 }, { "epoch": 0.6728538283062645, "grad_norm": 42.06833267211914, "learning_rate": 2.6530035149781576e-06, "loss": 20.5199, "step": 7250 }, { "epoch": 0.6729466357308584, "grad_norm": 44.08879852294922, "learning_rate": 2.6516486126574516e-06, "loss": 22.6568, "step": 7251 }, { "epoch": 0.6730394431554524, "grad_norm": 37.401206970214844, "learning_rate": 2.6502939315454773e-06, "loss": 22.5366, "step": 7252 }, { "epoch": 0.6731322505800464, "grad_norm": 44.68577194213867, "learning_rate": 2.648939471769848e-06, "loss": 24.3186, "step": 7253 }, { "epoch": 0.6732250580046404, "grad_norm": 53.25735855102539, "learning_rate": 2.647585233458142e-06, "loss": 22.501, "step": 7254 }, { "epoch": 0.6733178654292343, "grad_norm": 99.05010986328125, "learning_rate": 2.6462312167379325e-06, "loss": 21.0627, "step": 7255 }, { "epoch": 0.6734106728538283, "grad_norm": 48.86048126220703, "learning_rate": 2.6448774217367603e-06, "loss": 21.8895, "step": 7256 }, { "epoch": 0.6735034802784223, "grad_norm": 38.68110656738281, "learning_rate": 2.6435238485821504e-06, "loss": 23.6709, "step": 7257 }, { "epoch": 0.6735962877030163, "grad_norm": 41.91596221923828, "learning_rate": 2.642170497401605e-06, "loss": 22.6226, "step": 7258 }, { "epoch": 0.6736890951276102, "grad_norm": 42.26655197143555, "learning_rate": 2.64081736832261e-06, "loss": 21.5202, "step": 7259 }, { "epoch": 0.6737819025522042, "grad_norm": 58.75564956665039, "learning_rate": 2.6394644614726215e-06, "loss": 23.2228, "step": 7260 }, { "epoch": 0.6738747099767981, "grad_norm": 47.541419982910156, "learning_rate": 2.638111776979084e-06, "loss": 21.9778, "step": 7261 }, { "epoch": 0.6739675174013922, "grad_norm": 42.75619888305664, "learning_rate": 2.6367593149694147e-06, "loss": 24.9112, "step": 7262 }, { "epoch": 0.6740603248259861, "grad_norm": 41.836517333984375, "learning_rate": 2.635407075571014e-06, "loss": 21.4101, "step": 7263 }, { "epoch": 0.67415313225058, "grad_norm": 40.3739128112793, "learning_rate": 2.6340550589112556e-06, "loss": 21.7731, "step": 7264 }, { "epoch": 0.674245939675174, "grad_norm": 39.81386184692383, "learning_rate": 2.6327032651175035e-06, "loss": 22.4612, "step": 7265 }, { "epoch": 0.6743387470997679, "grad_norm": 41.67233657836914, "learning_rate": 2.6313516943170836e-06, "loss": 22.9502, "step": 7266 }, { "epoch": 0.674431554524362, "grad_norm": 39.441307067871094, "learning_rate": 2.630000346637317e-06, "loss": 20.6881, "step": 7267 }, { "epoch": 0.6745243619489559, "grad_norm": 41.961612701416016, "learning_rate": 2.628649222205496e-06, "loss": 23.7558, "step": 7268 }, { "epoch": 0.6746171693735499, "grad_norm": 36.50919723510742, "learning_rate": 2.6272983211488927e-06, "loss": 20.7618, "step": 7269 }, { "epoch": 0.6747099767981438, "grad_norm": 38.18784713745117, "learning_rate": 2.6259476435947563e-06, "loss": 23.3952, "step": 7270 }, { "epoch": 0.6748027842227379, "grad_norm": 38.36270523071289, "learning_rate": 2.6245971896703217e-06, "loss": 24.5224, "step": 7271 }, { "epoch": 0.6748955916473318, "grad_norm": 47.31072998046875, "learning_rate": 2.623246959502795e-06, "loss": 23.2608, "step": 7272 }, { "epoch": 0.6749883990719258, "grad_norm": 39.28356170654297, "learning_rate": 2.6218969532193655e-06, "loss": 21.8596, "step": 7273 }, { "epoch": 0.6750812064965197, "grad_norm": 37.85800552368164, "learning_rate": 2.6205471709472e-06, "loss": 24.2662, "step": 7274 }, { "epoch": 0.6751740139211136, "grad_norm": 43.34809494018555, "learning_rate": 2.6191976128134456e-06, "loss": 23.0639, "step": 7275 }, { "epoch": 0.6752668213457077, "grad_norm": 43.926578521728516, "learning_rate": 2.6178482789452254e-06, "loss": 21.3266, "step": 7276 }, { "epoch": 0.6753596287703016, "grad_norm": 49.13916015625, "learning_rate": 2.616499169469643e-06, "loss": 23.7888, "step": 7277 }, { "epoch": 0.6754524361948956, "grad_norm": 45.11201477050781, "learning_rate": 2.615150284513783e-06, "loss": 23.368, "step": 7278 }, { "epoch": 0.6755452436194895, "grad_norm": 35.99482345581055, "learning_rate": 2.613801624204707e-06, "loss": 23.8294, "step": 7279 }, { "epoch": 0.6756380510440835, "grad_norm": 43.745662689208984, "learning_rate": 2.612453188669455e-06, "loss": 22.6614, "step": 7280 }, { "epoch": 0.6757308584686775, "grad_norm": 49.3806266784668, "learning_rate": 2.611104978035043e-06, "loss": 21.7724, "step": 7281 }, { "epoch": 0.6758236658932715, "grad_norm": 40.16378402709961, "learning_rate": 2.609756992428476e-06, "loss": 22.2608, "step": 7282 }, { "epoch": 0.6759164733178654, "grad_norm": 42.50425338745117, "learning_rate": 2.608409231976723e-06, "loss": 23.0007, "step": 7283 }, { "epoch": 0.6760092807424594, "grad_norm": 43.43901824951172, "learning_rate": 2.6070616968067446e-06, "loss": 23.565, "step": 7284 }, { "epoch": 0.6761020881670534, "grad_norm": 47.9091796875, "learning_rate": 2.605714387045474e-06, "loss": 24.5771, "step": 7285 }, { "epoch": 0.6761948955916474, "grad_norm": 39.269649505615234, "learning_rate": 2.6043673028198247e-06, "loss": 23.3259, "step": 7286 }, { "epoch": 0.6762877030162413, "grad_norm": 39.18899154663086, "learning_rate": 2.6030204442566852e-06, "loss": 23.5205, "step": 7287 }, { "epoch": 0.6763805104408352, "grad_norm": 41.82522964477539, "learning_rate": 2.6016738114829333e-06, "loss": 23.1254, "step": 7288 }, { "epoch": 0.6764733178654292, "grad_norm": 39.202911376953125, "learning_rate": 2.60032740462541e-06, "loss": 24.159, "step": 7289 }, { "epoch": 0.6765661252900232, "grad_norm": 48.78193283081055, "learning_rate": 2.5989812238109504e-06, "loss": 22.2698, "step": 7290 }, { "epoch": 0.6766589327146172, "grad_norm": 37.57537841796875, "learning_rate": 2.5976352691663575e-06, "loss": 22.6477, "step": 7291 }, { "epoch": 0.6767517401392111, "grad_norm": 37.910335540771484, "learning_rate": 2.596289540818418e-06, "loss": 22.7865, "step": 7292 }, { "epoch": 0.6768445475638051, "grad_norm": 41.25690460205078, "learning_rate": 2.594944038893894e-06, "loss": 23.5076, "step": 7293 }, { "epoch": 0.676937354988399, "grad_norm": 44.18083190917969, "learning_rate": 2.5935987635195335e-06, "loss": 22.4752, "step": 7294 }, { "epoch": 0.6770301624129931, "grad_norm": 40.142677307128906, "learning_rate": 2.5922537148220516e-06, "loss": 22.5524, "step": 7295 }, { "epoch": 0.677122969837587, "grad_norm": 37.409156799316406, "learning_rate": 2.5909088929281534e-06, "loss": 22.0539, "step": 7296 }, { "epoch": 0.677215777262181, "grad_norm": 39.609317779541016, "learning_rate": 2.5895642979645153e-06, "loss": 25.2461, "step": 7297 }, { "epoch": 0.6773085846867749, "grad_norm": 41.451438903808594, "learning_rate": 2.5882199300577957e-06, "loss": 22.2154, "step": 7298 }, { "epoch": 0.677401392111369, "grad_norm": 39.45222473144531, "learning_rate": 2.5868757893346287e-06, "loss": 25.1146, "step": 7299 }, { "epoch": 0.6774941995359629, "grad_norm": 36.910343170166016, "learning_rate": 2.5855318759216334e-06, "loss": 22.8401, "step": 7300 }, { "epoch": 0.6775870069605568, "grad_norm": 41.600196838378906, "learning_rate": 2.5841881899453968e-06, "loss": 21.8139, "step": 7301 }, { "epoch": 0.6776798143851508, "grad_norm": 37.42445755004883, "learning_rate": 2.582844731532496e-06, "loss": 22.1725, "step": 7302 }, { "epoch": 0.6777726218097447, "grad_norm": 44.42643356323242, "learning_rate": 2.5815015008094794e-06, "loss": 22.6519, "step": 7303 }, { "epoch": 0.6778654292343388, "grad_norm": 43.09536361694336, "learning_rate": 2.580158497902876e-06, "loss": 24.8223, "step": 7304 }, { "epoch": 0.6779582366589327, "grad_norm": 41.72459411621094, "learning_rate": 2.578815722939191e-06, "loss": 22.4805, "step": 7305 }, { "epoch": 0.6780510440835267, "grad_norm": 35.3707275390625, "learning_rate": 2.577473176044917e-06, "loss": 21.7505, "step": 7306 }, { "epoch": 0.6781438515081206, "grad_norm": 49.036949157714844, "learning_rate": 2.5761308573465095e-06, "loss": 22.1969, "step": 7307 }, { "epoch": 0.6782366589327146, "grad_norm": 54.708499908447266, "learning_rate": 2.574788766970418e-06, "loss": 22.2939, "step": 7308 }, { "epoch": 0.6783294663573086, "grad_norm": 38.57728958129883, "learning_rate": 2.5734469050430623e-06, "loss": 22.1884, "step": 7309 }, { "epoch": 0.6784222737819026, "grad_norm": 38.917884826660156, "learning_rate": 2.5721052716908423e-06, "loss": 22.1947, "step": 7310 }, { "epoch": 0.6785150812064965, "grad_norm": 41.44322204589844, "learning_rate": 2.5707638670401366e-06, "loss": 24.0734, "step": 7311 }, { "epoch": 0.6786078886310905, "grad_norm": 43.13881301879883, "learning_rate": 2.5694226912173014e-06, "loss": 22.1459, "step": 7312 }, { "epoch": 0.6787006960556845, "grad_norm": 37.15755844116211, "learning_rate": 2.5680817443486704e-06, "loss": 23.512, "step": 7313 }, { "epoch": 0.6787935034802784, "grad_norm": 198.37753295898438, "learning_rate": 2.566741026560562e-06, "loss": 23.2126, "step": 7314 }, { "epoch": 0.6788863109048724, "grad_norm": 43.426021575927734, "learning_rate": 2.565400537979265e-06, "loss": 22.5123, "step": 7315 }, { "epoch": 0.6789791183294663, "grad_norm": 42.3310432434082, "learning_rate": 2.5640602787310515e-06, "loss": 21.7119, "step": 7316 }, { "epoch": 0.6790719257540603, "grad_norm": 43.2451057434082, "learning_rate": 2.56272024894217e-06, "loss": 21.7532, "step": 7317 }, { "epoch": 0.6791647331786543, "grad_norm": 41.346275329589844, "learning_rate": 2.5613804487388456e-06, "loss": 21.1432, "step": 7318 }, { "epoch": 0.6792575406032483, "grad_norm": 48.58888626098633, "learning_rate": 2.5600408782472886e-06, "loss": 26.3357, "step": 7319 }, { "epoch": 0.6793503480278422, "grad_norm": 43.556697845458984, "learning_rate": 2.55870153759368e-06, "loss": 22.5378, "step": 7320 }, { "epoch": 0.6794431554524362, "grad_norm": 46.47089767456055, "learning_rate": 2.557362426904184e-06, "loss": 23.0583, "step": 7321 }, { "epoch": 0.6795359628770301, "grad_norm": 50.32038497924805, "learning_rate": 2.5560235463049397e-06, "loss": 22.5967, "step": 7322 }, { "epoch": 0.6796287703016242, "grad_norm": 39.167236328125, "learning_rate": 2.5546848959220684e-06, "loss": 23.7404, "step": 7323 }, { "epoch": 0.6797215777262181, "grad_norm": 42.4825553894043, "learning_rate": 2.553346475881663e-06, "loss": 22.4671, "step": 7324 }, { "epoch": 0.679814385150812, "grad_norm": 39.74837875366211, "learning_rate": 2.552008286309805e-06, "loss": 23.5294, "step": 7325 }, { "epoch": 0.679907192575406, "grad_norm": 42.691123962402344, "learning_rate": 2.550670327332546e-06, "loss": 21.8189, "step": 7326 }, { "epoch": 0.68, "grad_norm": 40.82079315185547, "learning_rate": 2.5493325990759184e-06, "loss": 23.3179, "step": 7327 }, { "epoch": 0.680092807424594, "grad_norm": 42.870628356933594, "learning_rate": 2.5479951016659304e-06, "loss": 22.6747, "step": 7328 }, { "epoch": 0.6801856148491879, "grad_norm": 42.5228385925293, "learning_rate": 2.5466578352285775e-06, "loss": 26.5039, "step": 7329 }, { "epoch": 0.6802784222737819, "grad_norm": 45.751678466796875, "learning_rate": 2.5453207998898185e-06, "loss": 26.0076, "step": 7330 }, { "epoch": 0.6803712296983758, "grad_norm": 44.18195343017578, "learning_rate": 2.543983995775604e-06, "loss": 22.6203, "step": 7331 }, { "epoch": 0.6804640371229699, "grad_norm": 43.698036193847656, "learning_rate": 2.542647423011857e-06, "loss": 21.7977, "step": 7332 }, { "epoch": 0.6805568445475638, "grad_norm": 40.44520950317383, "learning_rate": 2.541311081724478e-06, "loss": 22.9942, "step": 7333 }, { "epoch": 0.6806496519721578, "grad_norm": 34.93204116821289, "learning_rate": 2.5399749720393465e-06, "loss": 23.8291, "step": 7334 }, { "epoch": 0.6807424593967517, "grad_norm": 40.22487258911133, "learning_rate": 2.5386390940823247e-06, "loss": 23.0472, "step": 7335 }, { "epoch": 0.6808352668213457, "grad_norm": 42.623966217041016, "learning_rate": 2.5373034479792414e-06, "loss": 23.3989, "step": 7336 }, { "epoch": 0.6809280742459397, "grad_norm": 38.42732238769531, "learning_rate": 2.5359680338559182e-06, "loss": 22.7771, "step": 7337 }, { "epoch": 0.6810208816705337, "grad_norm": 44.37903594970703, "learning_rate": 2.5346328518381447e-06, "loss": 23.35, "step": 7338 }, { "epoch": 0.6811136890951276, "grad_norm": 43.33205032348633, "learning_rate": 2.5332979020516925e-06, "loss": 20.8003, "step": 7339 }, { "epoch": 0.6812064965197215, "grad_norm": 42.79582214355469, "learning_rate": 2.5319631846223074e-06, "loss": 24.4105, "step": 7340 }, { "epoch": 0.6812993039443156, "grad_norm": 43.65955352783203, "learning_rate": 2.5306286996757235e-06, "loss": 21.6985, "step": 7341 }, { "epoch": 0.6813921113689095, "grad_norm": 43.37451934814453, "learning_rate": 2.529294447337637e-06, "loss": 22.1895, "step": 7342 }, { "epoch": 0.6814849187935035, "grad_norm": 52.64890670776367, "learning_rate": 2.5279604277337367e-06, "loss": 24.0977, "step": 7343 }, { "epoch": 0.6815777262180974, "grad_norm": 40.40608215332031, "learning_rate": 2.526626640989683e-06, "loss": 25.3594, "step": 7344 }, { "epoch": 0.6816705336426914, "grad_norm": 42.86077117919922, "learning_rate": 2.525293087231115e-06, "loss": 23.6377, "step": 7345 }, { "epoch": 0.6817633410672854, "grad_norm": 44.865657806396484, "learning_rate": 2.523959766583648e-06, "loss": 23.5438, "step": 7346 }, { "epoch": 0.6818561484918794, "grad_norm": 40.14917755126953, "learning_rate": 2.5226266791728825e-06, "loss": 21.7913, "step": 7347 }, { "epoch": 0.6819489559164733, "grad_norm": 42.7641716003418, "learning_rate": 2.5212938251243858e-06, "loss": 23.8849, "step": 7348 }, { "epoch": 0.6820417633410673, "grad_norm": 43.96195602416992, "learning_rate": 2.5199612045637156e-06, "loss": 23.0993, "step": 7349 }, { "epoch": 0.6821345707656613, "grad_norm": 44.67441940307617, "learning_rate": 2.518628817616394e-06, "loss": 22.1597, "step": 7350 }, { "epoch": 0.6822273781902553, "grad_norm": 77.81644439697266, "learning_rate": 2.5172966644079342e-06, "loss": 24.2046, "step": 7351 }, { "epoch": 0.6823201856148492, "grad_norm": 44.94004440307617, "learning_rate": 2.51596474506382e-06, "loss": 21.0298, "step": 7352 }, { "epoch": 0.6824129930394431, "grad_norm": 43.55167007446289, "learning_rate": 2.514633059709516e-06, "loss": 24.0127, "step": 7353 }, { "epoch": 0.6825058004640371, "grad_norm": 41.860015869140625, "learning_rate": 2.513301608470459e-06, "loss": 22.2982, "step": 7354 }, { "epoch": 0.6825986078886311, "grad_norm": 46.5052375793457, "learning_rate": 2.511970391472076e-06, "loss": 22.4786, "step": 7355 }, { "epoch": 0.6826914153132251, "grad_norm": 42.33362579345703, "learning_rate": 2.510639408839757e-06, "loss": 22.8358, "step": 7356 }, { "epoch": 0.682784222737819, "grad_norm": 43.837799072265625, "learning_rate": 2.5093086606988805e-06, "loss": 23.0725, "step": 7357 }, { "epoch": 0.682877030162413, "grad_norm": 42.3571662902832, "learning_rate": 2.5079781471748e-06, "loss": 23.3122, "step": 7358 }, { "epoch": 0.6829698375870069, "grad_norm": 46.80564498901367, "learning_rate": 2.5066478683928457e-06, "loss": 21.3452, "step": 7359 }, { "epoch": 0.683062645011601, "grad_norm": 48.104209899902344, "learning_rate": 2.505317824478325e-06, "loss": 21.6336, "step": 7360 }, { "epoch": 0.6831554524361949, "grad_norm": 42.22227478027344, "learning_rate": 2.5039880155565272e-06, "loss": 22.3126, "step": 7361 }, { "epoch": 0.6832482598607889, "grad_norm": 50.8147087097168, "learning_rate": 2.502658441752716e-06, "loss": 24.2991, "step": 7362 }, { "epoch": 0.6833410672853828, "grad_norm": 42.95294189453125, "learning_rate": 2.5013291031921336e-06, "loss": 21.8169, "step": 7363 }, { "epoch": 0.6834338747099769, "grad_norm": 42.96591567993164, "learning_rate": 2.5000000000000015e-06, "loss": 23.4739, "step": 7364 }, { "epoch": 0.6835266821345708, "grad_norm": 40.833473205566406, "learning_rate": 2.498671132301514e-06, "loss": 23.3462, "step": 7365 }, { "epoch": 0.6836194895591647, "grad_norm": 46.42110824584961, "learning_rate": 2.4973425002218517e-06, "loss": 23.1114, "step": 7366 }, { "epoch": 0.6837122969837587, "grad_norm": 39.97236251831055, "learning_rate": 2.4960141038861666e-06, "loss": 23.532, "step": 7367 }, { "epoch": 0.6838051044083526, "grad_norm": 47.42936325073242, "learning_rate": 2.4946859434195904e-06, "loss": 22.3924, "step": 7368 }, { "epoch": 0.6838979118329467, "grad_norm": 39.68620681762695, "learning_rate": 2.4933580189472324e-06, "loss": 23.4628, "step": 7369 }, { "epoch": 0.6839907192575406, "grad_norm": 45.53240203857422, "learning_rate": 2.492030330594179e-06, "loss": 23.2437, "step": 7370 }, { "epoch": 0.6840835266821346, "grad_norm": 41.94453430175781, "learning_rate": 2.490702878485494e-06, "loss": 22.6938, "step": 7371 }, { "epoch": 0.6841763341067285, "grad_norm": 40.288272857666016, "learning_rate": 2.4893756627462236e-06, "loss": 22.0026, "step": 7372 }, { "epoch": 0.6842691415313225, "grad_norm": 35.60032653808594, "learning_rate": 2.488048683501386e-06, "loss": 22.2486, "step": 7373 }, { "epoch": 0.6843619489559165, "grad_norm": 39.52511978149414, "learning_rate": 2.4867219408759797e-06, "loss": 21.3617, "step": 7374 }, { "epoch": 0.6844547563805105, "grad_norm": 47.02955627441406, "learning_rate": 2.4853954349949775e-06, "loss": 22.625, "step": 7375 }, { "epoch": 0.6845475638051044, "grad_norm": 47.110191345214844, "learning_rate": 2.4840691659833404e-06, "loss": 22.8758, "step": 7376 }, { "epoch": 0.6846403712296983, "grad_norm": 42.924171447753906, "learning_rate": 2.4827431339659907e-06, "loss": 21.8335, "step": 7377 }, { "epoch": 0.6847331786542924, "grad_norm": 49.973209381103516, "learning_rate": 2.481417339067843e-06, "loss": 21.5429, "step": 7378 }, { "epoch": 0.6848259860788863, "grad_norm": 42.430458068847656, "learning_rate": 2.480091781413782e-06, "loss": 22.1433, "step": 7379 }, { "epoch": 0.6849187935034803, "grad_norm": 42.98833084106445, "learning_rate": 2.478766461128672e-06, "loss": 21.6588, "step": 7380 }, { "epoch": 0.6850116009280742, "grad_norm": 43.19040298461914, "learning_rate": 2.4774413783373535e-06, "loss": 22.5151, "step": 7381 }, { "epoch": 0.6851044083526682, "grad_norm": 38.944881439208984, "learning_rate": 2.476116533164651e-06, "loss": 22.6703, "step": 7382 }, { "epoch": 0.6851972157772622, "grad_norm": 42.78664016723633, "learning_rate": 2.474791925735353e-06, "loss": 23.8799, "step": 7383 }, { "epoch": 0.6852900232018562, "grad_norm": 40.41054916381836, "learning_rate": 2.4734675561742437e-06, "loss": 24.7865, "step": 7384 }, { "epoch": 0.6853828306264501, "grad_norm": 42.94924545288086, "learning_rate": 2.4721434246060665e-06, "loss": 23.869, "step": 7385 }, { "epoch": 0.6854756380510441, "grad_norm": 39.467430114746094, "learning_rate": 2.470819531155557e-06, "loss": 21.7326, "step": 7386 }, { "epoch": 0.685568445475638, "grad_norm": 40.25508117675781, "learning_rate": 2.4694958759474214e-06, "loss": 21.9602, "step": 7387 }, { "epoch": 0.685661252900232, "grad_norm": 40.30769348144531, "learning_rate": 2.4681724591063434e-06, "loss": 23.9258, "step": 7388 }, { "epoch": 0.685754060324826, "grad_norm": 46.20238494873047, "learning_rate": 2.4668492807569847e-06, "loss": 23.9156, "step": 7389 }, { "epoch": 0.6858468677494199, "grad_norm": 42.657859802246094, "learning_rate": 2.465526341023991e-06, "loss": 23.2443, "step": 7390 }, { "epoch": 0.6859396751740139, "grad_norm": 36.676788330078125, "learning_rate": 2.464203640031972e-06, "loss": 22.5884, "step": 7391 }, { "epoch": 0.6860324825986079, "grad_norm": 43.419219970703125, "learning_rate": 2.4628811779055277e-06, "loss": 25.0528, "step": 7392 }, { "epoch": 0.6861252900232019, "grad_norm": 37.3663215637207, "learning_rate": 2.4615589547692303e-06, "loss": 20.9171, "step": 7393 }, { "epoch": 0.6862180974477958, "grad_norm": 35.64348220825195, "learning_rate": 2.460236970747629e-06, "loss": 23.2539, "step": 7394 }, { "epoch": 0.6863109048723898, "grad_norm": 45.837425231933594, "learning_rate": 2.4589152259652495e-06, "loss": 23.4903, "step": 7395 }, { "epoch": 0.6864037122969837, "grad_norm": 40.885963439941406, "learning_rate": 2.4575937205466023e-06, "loss": 22.9589, "step": 7396 }, { "epoch": 0.6864965197215778, "grad_norm": 42.447593688964844, "learning_rate": 2.456272454616162e-06, "loss": 23.2929, "step": 7397 }, { "epoch": 0.6865893271461717, "grad_norm": 37.44725036621094, "learning_rate": 2.454951428298395e-06, "loss": 24.4983, "step": 7398 }, { "epoch": 0.6866821345707657, "grad_norm": 45.398067474365234, "learning_rate": 2.453630641717737e-06, "loss": 24.0799, "step": 7399 }, { "epoch": 0.6867749419953596, "grad_norm": 36.6972541809082, "learning_rate": 2.4523100949986012e-06, "loss": 21.4782, "step": 7400 }, { "epoch": 0.6868677494199535, "grad_norm": 44.55427169799805, "learning_rate": 2.4509897882653794e-06, "loss": 22.8543, "step": 7401 }, { "epoch": 0.6869605568445476, "grad_norm": 42.167076110839844, "learning_rate": 2.449669721642446e-06, "loss": 23.7724, "step": 7402 }, { "epoch": 0.6870533642691415, "grad_norm": 43.8527946472168, "learning_rate": 2.44834989525414e-06, "loss": 24.8144, "step": 7403 }, { "epoch": 0.6871461716937355, "grad_norm": 54.53091049194336, "learning_rate": 2.4470303092247926e-06, "loss": 23.2021, "step": 7404 }, { "epoch": 0.6872389791183294, "grad_norm": 44.4552116394043, "learning_rate": 2.445710963678703e-06, "loss": 22.615, "step": 7405 }, { "epoch": 0.6873317865429235, "grad_norm": 83.21188354492188, "learning_rate": 2.4443918587401494e-06, "loss": 20.6196, "step": 7406 }, { "epoch": 0.6874245939675174, "grad_norm": 44.054561614990234, "learning_rate": 2.4430729945333877e-06, "loss": 22.6174, "step": 7407 }, { "epoch": 0.6875174013921114, "grad_norm": 42.80390548706055, "learning_rate": 2.4417543711826536e-06, "loss": 22.7936, "step": 7408 }, { "epoch": 0.6876102088167053, "grad_norm": 41.438167572021484, "learning_rate": 2.4404359888121577e-06, "loss": 24.0563, "step": 7409 }, { "epoch": 0.6877030162412993, "grad_norm": 37.30592727661133, "learning_rate": 2.4391178475460873e-06, "loss": 22.1728, "step": 7410 }, { "epoch": 0.6877958236658933, "grad_norm": 38.973854064941406, "learning_rate": 2.437799947508609e-06, "loss": 21.1508, "step": 7411 }, { "epoch": 0.6878886310904873, "grad_norm": 45.59116744995117, "learning_rate": 2.436482288823863e-06, "loss": 23.0809, "step": 7412 }, { "epoch": 0.6879814385150812, "grad_norm": 36.956809997558594, "learning_rate": 2.4351648716159727e-06, "loss": 21.5253, "step": 7413 }, { "epoch": 0.6880742459396751, "grad_norm": 40.492774963378906, "learning_rate": 2.4338476960090346e-06, "loss": 22.2842, "step": 7414 }, { "epoch": 0.6881670533642691, "grad_norm": 44.70981979370117, "learning_rate": 2.4325307621271227e-06, "loss": 25.6608, "step": 7415 }, { "epoch": 0.6882598607888631, "grad_norm": 37.671546936035156, "learning_rate": 2.431214070094289e-06, "loss": 22.1806, "step": 7416 }, { "epoch": 0.6883526682134571, "grad_norm": 43.149234771728516, "learning_rate": 2.4298976200345624e-06, "loss": 22.5437, "step": 7417 }, { "epoch": 0.688445475638051, "grad_norm": 37.82444763183594, "learning_rate": 2.4285814120719477e-06, "loss": 23.5433, "step": 7418 }, { "epoch": 0.688538283062645, "grad_norm": 41.271244049072266, "learning_rate": 2.427265446330433e-06, "loss": 22.7111, "step": 7419 }, { "epoch": 0.688631090487239, "grad_norm": 38.753963470458984, "learning_rate": 2.4259497229339725e-06, "loss": 22.6924, "step": 7420 }, { "epoch": 0.688723897911833, "grad_norm": 40.79431915283203, "learning_rate": 2.424634242006509e-06, "loss": 24.1954, "step": 7421 }, { "epoch": 0.6888167053364269, "grad_norm": 39.13860321044922, "learning_rate": 2.423319003671956e-06, "loss": 22.7344, "step": 7422 }, { "epoch": 0.6889095127610209, "grad_norm": 43.498355865478516, "learning_rate": 2.422004008054205e-06, "loss": 22.7709, "step": 7423 }, { "epoch": 0.6890023201856148, "grad_norm": 47.370460510253906, "learning_rate": 2.4206892552771242e-06, "loss": 23.5648, "step": 7424 }, { "epoch": 0.6890951276102089, "grad_norm": 42.58076095581055, "learning_rate": 2.419374745464565e-06, "loss": 22.3079, "step": 7425 }, { "epoch": 0.6891879350348028, "grad_norm": 41.926456451416016, "learning_rate": 2.418060478740344e-06, "loss": 24.5432, "step": 7426 }, { "epoch": 0.6892807424593967, "grad_norm": 42.08720016479492, "learning_rate": 2.4167464552282666e-06, "loss": 22.9885, "step": 7427 }, { "epoch": 0.6893735498839907, "grad_norm": 42.44290542602539, "learning_rate": 2.4154326750521084e-06, "loss": 22.0032, "step": 7428 }, { "epoch": 0.6894663573085846, "grad_norm": 43.59640121459961, "learning_rate": 2.4141191383356254e-06, "loss": 25.8769, "step": 7429 }, { "epoch": 0.6895591647331787, "grad_norm": 42.311100006103516, "learning_rate": 2.4128058452025467e-06, "loss": 22.7045, "step": 7430 }, { "epoch": 0.6896519721577726, "grad_norm": 46.92398452758789, "learning_rate": 2.4114927957765873e-06, "loss": 23.3673, "step": 7431 }, { "epoch": 0.6897447795823666, "grad_norm": 47.08097457885742, "learning_rate": 2.410179990181424e-06, "loss": 22.2833, "step": 7432 }, { "epoch": 0.6898375870069605, "grad_norm": 39.87556457519531, "learning_rate": 2.4088674285407284e-06, "loss": 20.886, "step": 7433 }, { "epoch": 0.6899303944315546, "grad_norm": 44.837825775146484, "learning_rate": 2.407555110978136e-06, "loss": 22.5586, "step": 7434 }, { "epoch": 0.6900232018561485, "grad_norm": 45.84519958496094, "learning_rate": 2.4062430376172648e-06, "loss": 23.7807, "step": 7435 }, { "epoch": 0.6901160092807425, "grad_norm": 39.5589485168457, "learning_rate": 2.404931208581706e-06, "loss": 24.3065, "step": 7436 }, { "epoch": 0.6902088167053364, "grad_norm": 40.14434051513672, "learning_rate": 2.403619623995038e-06, "loss": 21.0871, "step": 7437 }, { "epoch": 0.6903016241299303, "grad_norm": 41.36482620239258, "learning_rate": 2.4023082839807993e-06, "loss": 21.652, "step": 7438 }, { "epoch": 0.6903944315545244, "grad_norm": 41.97937774658203, "learning_rate": 2.4009971886625216e-06, "loss": 23.4963, "step": 7439 }, { "epoch": 0.6904872389791183, "grad_norm": 40.25575256347656, "learning_rate": 2.3996863381637046e-06, "loss": 23.0345, "step": 7440 }, { "epoch": 0.6905800464037123, "grad_norm": 40.71662521362305, "learning_rate": 2.3983757326078273e-06, "loss": 19.9916, "step": 7441 }, { "epoch": 0.6906728538283062, "grad_norm": 60.60919952392578, "learning_rate": 2.397065372118343e-06, "loss": 23.4901, "step": 7442 }, { "epoch": 0.6907656612529003, "grad_norm": 39.025699615478516, "learning_rate": 2.39575525681869e-06, "loss": 24.2224, "step": 7443 }, { "epoch": 0.6908584686774942, "grad_norm": 47.2236213684082, "learning_rate": 2.394445386832271e-06, "loss": 22.7862, "step": 7444 }, { "epoch": 0.6909512761020882, "grad_norm": 44.202274322509766, "learning_rate": 2.393135762282477e-06, "loss": 22.4723, "step": 7445 }, { "epoch": 0.6910440835266821, "grad_norm": 58.84162902832031, "learning_rate": 2.391826383292671e-06, "loss": 21.7615, "step": 7446 }, { "epoch": 0.6911368909512761, "grad_norm": 43.58500671386719, "learning_rate": 2.390517249986191e-06, "loss": 22.421, "step": 7447 }, { "epoch": 0.6912296983758701, "grad_norm": 43.51228332519531, "learning_rate": 2.3892083624863544e-06, "loss": 22.4791, "step": 7448 }, { "epoch": 0.6913225058004641, "grad_norm": 45.507083892822266, "learning_rate": 2.387899720916459e-06, "loss": 22.3207, "step": 7449 }, { "epoch": 0.691415313225058, "grad_norm": 103.58596801757812, "learning_rate": 2.386591325399769e-06, "loss": 21.829, "step": 7450 }, { "epoch": 0.691508120649652, "grad_norm": 46.74037551879883, "learning_rate": 2.3852831760595373e-06, "loss": 23.1207, "step": 7451 }, { "epoch": 0.6916009280742459, "grad_norm": 44.28102111816406, "learning_rate": 2.383975273018986e-06, "loss": 22.5108, "step": 7452 }, { "epoch": 0.69169373549884, "grad_norm": 39.23356628417969, "learning_rate": 2.382667616401317e-06, "loss": 23.9399, "step": 7453 }, { "epoch": 0.6917865429234339, "grad_norm": 43.69925308227539, "learning_rate": 2.3813602063297076e-06, "loss": 22.8512, "step": 7454 }, { "epoch": 0.6918793503480278, "grad_norm": 43.40349578857422, "learning_rate": 2.3800530429273105e-06, "loss": 21.0821, "step": 7455 }, { "epoch": 0.6919721577726218, "grad_norm": 47.42673873901367, "learning_rate": 2.3787461263172617e-06, "loss": 23.1908, "step": 7456 }, { "epoch": 0.6920649651972158, "grad_norm": 39.707984924316406, "learning_rate": 2.3774394566226665e-06, "loss": 22.1814, "step": 7457 }, { "epoch": 0.6921577726218098, "grad_norm": 40.328243255615234, "learning_rate": 2.376133033966611e-06, "loss": 21.4452, "step": 7458 }, { "epoch": 0.6922505800464037, "grad_norm": 40.0956916809082, "learning_rate": 2.374826858472154e-06, "loss": 23.7358, "step": 7459 }, { "epoch": 0.6923433874709977, "grad_norm": 36.31095886230469, "learning_rate": 2.3735209302623406e-06, "loss": 23.4411, "step": 7460 }, { "epoch": 0.6924361948955916, "grad_norm": 40.10928726196289, "learning_rate": 2.3722152494601774e-06, "loss": 23.5152, "step": 7461 }, { "epoch": 0.6925290023201857, "grad_norm": 44.329532623291016, "learning_rate": 2.3709098161886623e-06, "loss": 23.5207, "step": 7462 }, { "epoch": 0.6926218097447796, "grad_norm": 42.067508697509766, "learning_rate": 2.369604630570762e-06, "loss": 22.6327, "step": 7463 }, { "epoch": 0.6927146171693735, "grad_norm": 40.38155746459961, "learning_rate": 2.3682996927294216e-06, "loss": 21.8297, "step": 7464 }, { "epoch": 0.6928074245939675, "grad_norm": 41.18090057373047, "learning_rate": 2.366995002787561e-06, "loss": 23.0547, "step": 7465 }, { "epoch": 0.6929002320185614, "grad_norm": 40.856658935546875, "learning_rate": 2.365690560868084e-06, "loss": 24.5649, "step": 7466 }, { "epoch": 0.6929930394431555, "grad_norm": 60.43521499633789, "learning_rate": 2.364386367093859e-06, "loss": 25.4382, "step": 7467 }, { "epoch": 0.6930858468677494, "grad_norm": 36.11002731323242, "learning_rate": 2.3630824215877426e-06, "loss": 20.417, "step": 7468 }, { "epoch": 0.6931786542923434, "grad_norm": 38.1363639831543, "learning_rate": 2.3617787244725617e-06, "loss": 23.5608, "step": 7469 }, { "epoch": 0.6932714617169373, "grad_norm": 37.6915168762207, "learning_rate": 2.3604752758711207e-06, "loss": 22.5654, "step": 7470 }, { "epoch": 0.6933642691415314, "grad_norm": 39.87705993652344, "learning_rate": 2.3591720759061997e-06, "loss": 21.1921, "step": 7471 }, { "epoch": 0.6934570765661253, "grad_norm": 42.944698333740234, "learning_rate": 2.357869124700563e-06, "loss": 23.3306, "step": 7472 }, { "epoch": 0.6935498839907193, "grad_norm": 46.74530029296875, "learning_rate": 2.356566422376937e-06, "loss": 23.5104, "step": 7473 }, { "epoch": 0.6936426914153132, "grad_norm": 45.80442428588867, "learning_rate": 2.3552639690580385e-06, "loss": 22.7524, "step": 7474 }, { "epoch": 0.6937354988399071, "grad_norm": 43.468170166015625, "learning_rate": 2.353961764866554e-06, "loss": 22.0254, "step": 7475 }, { "epoch": 0.6938283062645012, "grad_norm": 41.931312561035156, "learning_rate": 2.3526598099251473e-06, "loss": 24.6702, "step": 7476 }, { "epoch": 0.6939211136890951, "grad_norm": 62.65961837768555, "learning_rate": 2.3513581043564583e-06, "loss": 25.8258, "step": 7477 }, { "epoch": 0.6940139211136891, "grad_norm": 45.847023010253906, "learning_rate": 2.350056648283109e-06, "loss": 23.2582, "step": 7478 }, { "epoch": 0.694106728538283, "grad_norm": 40.971378326416016, "learning_rate": 2.3487554418276858e-06, "loss": 22.461, "step": 7479 }, { "epoch": 0.694199535962877, "grad_norm": 39.83892822265625, "learning_rate": 2.347454485112765e-06, "loss": 22.2275, "step": 7480 }, { "epoch": 0.694292343387471, "grad_norm": 40.51572036743164, "learning_rate": 2.3461537782608916e-06, "loss": 23.3214, "step": 7481 }, { "epoch": 0.694385150812065, "grad_norm": 39.49013900756836, "learning_rate": 2.3448533213945884e-06, "loss": 23.4523, "step": 7482 }, { "epoch": 0.6944779582366589, "grad_norm": 39.79472732543945, "learning_rate": 2.3435531146363537e-06, "loss": 23.1122, "step": 7483 }, { "epoch": 0.6945707656612529, "grad_norm": 39.66542434692383, "learning_rate": 2.34225315810867e-06, "loss": 22.6347, "step": 7484 }, { "epoch": 0.6946635730858469, "grad_norm": 47.13186264038086, "learning_rate": 2.3409534519339804e-06, "loss": 23.2217, "step": 7485 }, { "epoch": 0.6947563805104409, "grad_norm": 46.31264877319336, "learning_rate": 2.3396539962347215e-06, "loss": 22.7548, "step": 7486 }, { "epoch": 0.6948491879350348, "grad_norm": 44.35762023925781, "learning_rate": 2.338354791133296e-06, "loss": 22.8014, "step": 7487 }, { "epoch": 0.6949419953596288, "grad_norm": 41.825660705566406, "learning_rate": 2.3370558367520856e-06, "loss": 23.9398, "step": 7488 }, { "epoch": 0.6950348027842227, "grad_norm": 42.29883575439453, "learning_rate": 2.3357571332134493e-06, "loss": 21.793, "step": 7489 }, { "epoch": 0.6951276102088167, "grad_norm": 41.08238220214844, "learning_rate": 2.3344586806397207e-06, "loss": 22.2028, "step": 7490 }, { "epoch": 0.6952204176334107, "grad_norm": 50.594390869140625, "learning_rate": 2.333160479153209e-06, "loss": 23.0723, "step": 7491 }, { "epoch": 0.6953132250580046, "grad_norm": 45.7390022277832, "learning_rate": 2.3318625288762057e-06, "loss": 25.232, "step": 7492 }, { "epoch": 0.6954060324825986, "grad_norm": 40.78838348388672, "learning_rate": 2.3305648299309726e-06, "loss": 22.0525, "step": 7493 }, { "epoch": 0.6954988399071925, "grad_norm": 42.10700988769531, "learning_rate": 2.329267382439749e-06, "loss": 22.8579, "step": 7494 }, { "epoch": 0.6955916473317866, "grad_norm": 40.29991149902344, "learning_rate": 2.3279701865247518e-06, "loss": 24.2171, "step": 7495 }, { "epoch": 0.6956844547563805, "grad_norm": 47.24592971801758, "learning_rate": 2.326673242308173e-06, "loss": 23.1715, "step": 7496 }, { "epoch": 0.6957772621809745, "grad_norm": 39.16534423828125, "learning_rate": 2.32537654991218e-06, "loss": 23.2412, "step": 7497 }, { "epoch": 0.6958700696055684, "grad_norm": 42.738067626953125, "learning_rate": 2.3240801094589217e-06, "loss": 23.5195, "step": 7498 }, { "epoch": 0.6959628770301625, "grad_norm": 44.459102630615234, "learning_rate": 2.3227839210705172e-06, "loss": 21.19, "step": 7499 }, { "epoch": 0.6960556844547564, "grad_norm": 45.176841735839844, "learning_rate": 2.321487984869064e-06, "loss": 25.3429, "step": 7500 }, { "epoch": 0.6961484918793504, "grad_norm": 38.99198913574219, "learning_rate": 2.3201923009766373e-06, "loss": 25.6455, "step": 7501 }, { "epoch": 0.6962412993039443, "grad_norm": 41.81814956665039, "learning_rate": 2.318896869515284e-06, "loss": 22.1234, "step": 7502 }, { "epoch": 0.6963341067285382, "grad_norm": 38.91356658935547, "learning_rate": 2.3176016906070347e-06, "loss": 23.2379, "step": 7503 }, { "epoch": 0.6964269141531323, "grad_norm": 46.192691802978516, "learning_rate": 2.3163067643738902e-06, "loss": 22.9469, "step": 7504 }, { "epoch": 0.6965197215777262, "grad_norm": 47.66483688354492, "learning_rate": 2.3150120909378293e-06, "loss": 23.5432, "step": 7505 }, { "epoch": 0.6966125290023202, "grad_norm": 43.01991653442383, "learning_rate": 2.313717670420804e-06, "loss": 23.9012, "step": 7506 }, { "epoch": 0.6967053364269141, "grad_norm": 41.29995346069336, "learning_rate": 2.312423502944753e-06, "loss": 22.9079, "step": 7507 }, { "epoch": 0.6967981438515081, "grad_norm": 47.08407211303711, "learning_rate": 2.3111295886315744e-06, "loss": 22.7947, "step": 7508 }, { "epoch": 0.6968909512761021, "grad_norm": 42.83054733276367, "learning_rate": 2.309835927603158e-06, "loss": 23.79, "step": 7509 }, { "epoch": 0.6969837587006961, "grad_norm": 58.0318489074707, "learning_rate": 2.3085425199813615e-06, "loss": 23.7517, "step": 7510 }, { "epoch": 0.69707656612529, "grad_norm": 52.93315887451172, "learning_rate": 2.307249365888021e-06, "loss": 23.652, "step": 7511 }, { "epoch": 0.697169373549884, "grad_norm": 51.398292541503906, "learning_rate": 2.305956465444945e-06, "loss": 22.3568, "step": 7512 }, { "epoch": 0.697262180974478, "grad_norm": 38.40415573120117, "learning_rate": 2.3046638187739285e-06, "loss": 22.7907, "step": 7513 }, { "epoch": 0.697354988399072, "grad_norm": 40.12932205200195, "learning_rate": 2.3033714259967272e-06, "loss": 23.7534, "step": 7514 }, { "epoch": 0.6974477958236659, "grad_norm": 42.78175354003906, "learning_rate": 2.302079287235088e-06, "loss": 24.5627, "step": 7515 }, { "epoch": 0.6975406032482598, "grad_norm": 43.78660583496094, "learning_rate": 2.3007874026107234e-06, "loss": 23.4369, "step": 7516 }, { "epoch": 0.6976334106728538, "grad_norm": 38.24433517456055, "learning_rate": 2.2994957722453275e-06, "loss": 21.1343, "step": 7517 }, { "epoch": 0.6977262180974478, "grad_norm": 44.467613220214844, "learning_rate": 2.2982043962605653e-06, "loss": 24.3041, "step": 7518 }, { "epoch": 0.6978190255220418, "grad_norm": 38.64802932739258, "learning_rate": 2.2969132747780875e-06, "loss": 21.1268, "step": 7519 }, { "epoch": 0.6979118329466357, "grad_norm": 45.0289306640625, "learning_rate": 2.2956224079195074e-06, "loss": 24.0183, "step": 7520 }, { "epoch": 0.6980046403712297, "grad_norm": 37.92389678955078, "learning_rate": 2.2943317958064254e-06, "loss": 22.2047, "step": 7521 }, { "epoch": 0.6980974477958236, "grad_norm": 38.72602462768555, "learning_rate": 2.293041438560414e-06, "loss": 22.2223, "step": 7522 }, { "epoch": 0.6981902552204177, "grad_norm": 37.70900344848633, "learning_rate": 2.2917513363030196e-06, "loss": 23.9829, "step": 7523 }, { "epoch": 0.6982830626450116, "grad_norm": 43.28147888183594, "learning_rate": 2.290461489155768e-06, "loss": 23.3403, "step": 7524 }, { "epoch": 0.6983758700696056, "grad_norm": 41.06464385986328, "learning_rate": 2.2891718972401593e-06, "loss": 22.3102, "step": 7525 }, { "epoch": 0.6984686774941995, "grad_norm": 44.60195541381836, "learning_rate": 2.287882560677668e-06, "loss": 22.0548, "step": 7526 }, { "epoch": 0.6985614849187936, "grad_norm": 40.37272644042969, "learning_rate": 2.2865934795897505e-06, "loss": 23.9507, "step": 7527 }, { "epoch": 0.6986542923433875, "grad_norm": 38.88156509399414, "learning_rate": 2.285304654097829e-06, "loss": 22.0231, "step": 7528 }, { "epoch": 0.6987470997679814, "grad_norm": 38.5976448059082, "learning_rate": 2.2840160843233133e-06, "loss": 22.3394, "step": 7529 }, { "epoch": 0.6988399071925754, "grad_norm": 40.416778564453125, "learning_rate": 2.2827277703875806e-06, "loss": 22.6725, "step": 7530 }, { "epoch": 0.6989327146171693, "grad_norm": 42.673011779785156, "learning_rate": 2.2814397124119874e-06, "loss": 24.2781, "step": 7531 }, { "epoch": 0.6990255220417634, "grad_norm": 40.33644485473633, "learning_rate": 2.2801519105178633e-06, "loss": 21.3296, "step": 7532 }, { "epoch": 0.6991183294663573, "grad_norm": 119.62822723388672, "learning_rate": 2.278864364826522e-06, "loss": 23.427, "step": 7533 }, { "epoch": 0.6992111368909513, "grad_norm": 43.598812103271484, "learning_rate": 2.277577075459239e-06, "loss": 21.8705, "step": 7534 }, { "epoch": 0.6993039443155452, "grad_norm": 40.635562896728516, "learning_rate": 2.2762900425372796e-06, "loss": 23.909, "step": 7535 }, { "epoch": 0.6993967517401393, "grad_norm": 40.72815704345703, "learning_rate": 2.275003266181877e-06, "loss": 22.7992, "step": 7536 }, { "epoch": 0.6994895591647332, "grad_norm": 39.93526840209961, "learning_rate": 2.273716746514243e-06, "loss": 24.7078, "step": 7537 }, { "epoch": 0.6995823665893272, "grad_norm": 43.251583099365234, "learning_rate": 2.2724304836555618e-06, "loss": 22.6992, "step": 7538 }, { "epoch": 0.6996751740139211, "grad_norm": 42.158512115478516, "learning_rate": 2.271144477726999e-06, "loss": 22.7973, "step": 7539 }, { "epoch": 0.699767981438515, "grad_norm": 39.52051544189453, "learning_rate": 2.2698587288496938e-06, "loss": 22.291, "step": 7540 }, { "epoch": 0.6998607888631091, "grad_norm": 40.54612731933594, "learning_rate": 2.268573237144758e-06, "loss": 23.4436, "step": 7541 }, { "epoch": 0.699953596287703, "grad_norm": 40.219608306884766, "learning_rate": 2.267288002733283e-06, "loss": 23.343, "step": 7542 }, { "epoch": 0.700046403712297, "grad_norm": 39.562557220458984, "learning_rate": 2.2660030257363336e-06, "loss": 23.022, "step": 7543 }, { "epoch": 0.7001392111368909, "grad_norm": 37.249420166015625, "learning_rate": 2.2647183062749505e-06, "loss": 22.8752, "step": 7544 }, { "epoch": 0.7002320185614849, "grad_norm": 42.9730339050293, "learning_rate": 2.2634338444701543e-06, "loss": 24.2184, "step": 7545 }, { "epoch": 0.7003248259860789, "grad_norm": 40.9127082824707, "learning_rate": 2.262149640442936e-06, "loss": 21.953, "step": 7546 }, { "epoch": 0.7004176334106729, "grad_norm": 45.48175048828125, "learning_rate": 2.2608656943142654e-06, "loss": 22.2796, "step": 7547 }, { "epoch": 0.7005104408352668, "grad_norm": 43.86019515991211, "learning_rate": 2.2595820062050854e-06, "loss": 23.3323, "step": 7548 }, { "epoch": 0.7006032482598608, "grad_norm": 46.62934112548828, "learning_rate": 2.2582985762363154e-06, "loss": 23.1378, "step": 7549 }, { "epoch": 0.7006960556844548, "grad_norm": 38.65616226196289, "learning_rate": 2.257015404528854e-06, "loss": 20.8071, "step": 7550 }, { "epoch": 0.7007888631090488, "grad_norm": 54.7678108215332, "learning_rate": 2.255732491203572e-06, "loss": 22.518, "step": 7551 }, { "epoch": 0.7008816705336427, "grad_norm": 41.161991119384766, "learning_rate": 2.2544498363813155e-06, "loss": 24.9943, "step": 7552 }, { "epoch": 0.7009744779582366, "grad_norm": 49.515689849853516, "learning_rate": 2.253167440182907e-06, "loss": 24.5477, "step": 7553 }, { "epoch": 0.7010672853828306, "grad_norm": 43.042022705078125, "learning_rate": 2.2518853027291487e-06, "loss": 23.1357, "step": 7554 }, { "epoch": 0.7011600928074246, "grad_norm": 40.42638397216797, "learning_rate": 2.250603424140808e-06, "loss": 24.7468, "step": 7555 }, { "epoch": 0.7012529002320186, "grad_norm": 38.62236404418945, "learning_rate": 2.2493218045386406e-06, "loss": 21.9254, "step": 7556 }, { "epoch": 0.7013457076566125, "grad_norm": 41.24299621582031, "learning_rate": 2.2480404440433694e-06, "loss": 22.6111, "step": 7557 }, { "epoch": 0.7014385150812065, "grad_norm": 43.13557434082031, "learning_rate": 2.246759342775696e-06, "loss": 20.5013, "step": 7558 }, { "epoch": 0.7015313225058004, "grad_norm": 39.92993927001953, "learning_rate": 2.245478500856294e-06, "loss": 23.2653, "step": 7559 }, { "epoch": 0.7016241299303945, "grad_norm": 43.65103530883789, "learning_rate": 2.2441979184058223e-06, "loss": 22.5817, "step": 7560 }, { "epoch": 0.7017169373549884, "grad_norm": 41.21628952026367, "learning_rate": 2.2429175955448994e-06, "loss": 22.9962, "step": 7561 }, { "epoch": 0.7018097447795824, "grad_norm": 40.907352447509766, "learning_rate": 2.2416375323941376e-06, "loss": 23.2289, "step": 7562 }, { "epoch": 0.7019025522041763, "grad_norm": 48.59254837036133, "learning_rate": 2.240357729074107e-06, "loss": 23.3106, "step": 7563 }, { "epoch": 0.7019953596287704, "grad_norm": 38.9950065612793, "learning_rate": 2.239078185705368e-06, "loss": 22.3804, "step": 7564 }, { "epoch": 0.7020881670533643, "grad_norm": 44.65127944946289, "learning_rate": 2.2377989024084486e-06, "loss": 22.1475, "step": 7565 }, { "epoch": 0.7021809744779582, "grad_norm": 40.54676818847656, "learning_rate": 2.2365198793038526e-06, "loss": 23.3185, "step": 7566 }, { "epoch": 0.7022737819025522, "grad_norm": 43.27558135986328, "learning_rate": 2.2352411165120613e-06, "loss": 21.795, "step": 7567 }, { "epoch": 0.7023665893271461, "grad_norm": 37.50407791137695, "learning_rate": 2.2339626141535343e-06, "loss": 23.0069, "step": 7568 }, { "epoch": 0.7024593967517402, "grad_norm": 44.341094970703125, "learning_rate": 2.2326843723486975e-06, "loss": 22.4018, "step": 7569 }, { "epoch": 0.7025522041763341, "grad_norm": 38.764732360839844, "learning_rate": 2.231406391217962e-06, "loss": 22.8908, "step": 7570 }, { "epoch": 0.7026450116009281, "grad_norm": 42.348548889160156, "learning_rate": 2.23012867088171e-06, "loss": 21.1752, "step": 7571 }, { "epoch": 0.702737819025522, "grad_norm": 35.249656677246094, "learning_rate": 2.2288512114602986e-06, "loss": 23.7682, "step": 7572 }, { "epoch": 0.702830626450116, "grad_norm": 36.03929138183594, "learning_rate": 2.227574013074059e-06, "loss": 23.5734, "step": 7573 }, { "epoch": 0.70292343387471, "grad_norm": 40.67552185058594, "learning_rate": 2.2262970758433067e-06, "loss": 22.0288, "step": 7574 }, { "epoch": 0.703016241299304, "grad_norm": 40.472747802734375, "learning_rate": 2.225020399888318e-06, "loss": 23.7643, "step": 7575 }, { "epoch": 0.7031090487238979, "grad_norm": 44.372459411621094, "learning_rate": 2.2237439853293584e-06, "loss": 23.1951, "step": 7576 }, { "epoch": 0.7032018561484918, "grad_norm": 39.56254959106445, "learning_rate": 2.2224678322866606e-06, "loss": 21.7643, "step": 7577 }, { "epoch": 0.7032946635730859, "grad_norm": 38.64934158325195, "learning_rate": 2.2211919408804357e-06, "loss": 23.0705, "step": 7578 }, { "epoch": 0.7033874709976798, "grad_norm": 40.02668762207031, "learning_rate": 2.2199163112308666e-06, "loss": 23.1227, "step": 7579 }, { "epoch": 0.7034802784222738, "grad_norm": 36.44013977050781, "learning_rate": 2.2186409434581197e-06, "loss": 22.2002, "step": 7580 }, { "epoch": 0.7035730858468677, "grad_norm": 43.00910186767578, "learning_rate": 2.2173658376823255e-06, "loss": 23.4863, "step": 7581 }, { "epoch": 0.7036658932714617, "grad_norm": 45.97541046142578, "learning_rate": 2.2160909940236004e-06, "loss": 22.9604, "step": 7582 }, { "epoch": 0.7037587006960557, "grad_norm": 40.82637023925781, "learning_rate": 2.2148164126020293e-06, "loss": 22.4781, "step": 7583 }, { "epoch": 0.7038515081206497, "grad_norm": 41.74089813232422, "learning_rate": 2.213542093537675e-06, "loss": 24.4232, "step": 7584 }, { "epoch": 0.7039443155452436, "grad_norm": 43.886775970458984, "learning_rate": 2.2122680369505735e-06, "loss": 24.3336, "step": 7585 }, { "epoch": 0.7040371229698376, "grad_norm": 40.61821746826172, "learning_rate": 2.21099424296074e-06, "loss": 24.682, "step": 7586 }, { "epoch": 0.7041299303944315, "grad_norm": 40.863075256347656, "learning_rate": 2.2097207116881627e-06, "loss": 22.5587, "step": 7587 }, { "epoch": 0.7042227378190256, "grad_norm": 41.150917053222656, "learning_rate": 2.2084474432528043e-06, "loss": 22.8094, "step": 7588 }, { "epoch": 0.7043155452436195, "grad_norm": 41.0268669128418, "learning_rate": 2.2071744377746025e-06, "loss": 23.1007, "step": 7589 }, { "epoch": 0.7044083526682134, "grad_norm": 55.861270904541016, "learning_rate": 2.2059016953734723e-06, "loss": 22.0108, "step": 7590 }, { "epoch": 0.7045011600928074, "grad_norm": 38.36789321899414, "learning_rate": 2.2046292161693013e-06, "loss": 20.6955, "step": 7591 }, { "epoch": 0.7045939675174014, "grad_norm": 50.62488555908203, "learning_rate": 2.203357000281956e-06, "loss": 21.8278, "step": 7592 }, { "epoch": 0.7046867749419954, "grad_norm": 42.36001205444336, "learning_rate": 2.202085047831275e-06, "loss": 23.4921, "step": 7593 }, { "epoch": 0.7047795823665893, "grad_norm": 38.96023178100586, "learning_rate": 2.2008133589370727e-06, "loss": 21.8534, "step": 7594 }, { "epoch": 0.7048723897911833, "grad_norm": 39.19072723388672, "learning_rate": 2.1995419337191397e-06, "loss": 22.9634, "step": 7595 }, { "epoch": 0.7049651972157772, "grad_norm": 48.68085479736328, "learning_rate": 2.1982707722972383e-06, "loss": 25.1159, "step": 7596 }, { "epoch": 0.7050580046403713, "grad_norm": 44.24137496948242, "learning_rate": 2.196999874791115e-06, "loss": 22.4343, "step": 7597 }, { "epoch": 0.7051508120649652, "grad_norm": 60.13058853149414, "learning_rate": 2.1957292413204767e-06, "loss": 23.2885, "step": 7598 }, { "epoch": 0.7052436194895592, "grad_norm": 40.296939849853516, "learning_rate": 2.1944588720050197e-06, "loss": 23.7093, "step": 7599 }, { "epoch": 0.7053364269141531, "grad_norm": 39.37297439575195, "learning_rate": 2.1931887669644086e-06, "loss": 21.4662, "step": 7600 }, { "epoch": 0.705429234338747, "grad_norm": 39.23233413696289, "learning_rate": 2.191918926318283e-06, "loss": 22.3443, "step": 7601 }, { "epoch": 0.7055220417633411, "grad_norm": 38.15044403076172, "learning_rate": 2.1906493501862574e-06, "loss": 23.8008, "step": 7602 }, { "epoch": 0.705614849187935, "grad_norm": 37.51749038696289, "learning_rate": 2.1893800386879284e-06, "loss": 22.464, "step": 7603 }, { "epoch": 0.705707656612529, "grad_norm": 42.760414123535156, "learning_rate": 2.1881109919428536e-06, "loss": 23.5249, "step": 7604 }, { "epoch": 0.7058004640371229, "grad_norm": 47.680137634277344, "learning_rate": 2.18684221007058e-06, "loss": 23.6656, "step": 7605 }, { "epoch": 0.705893271461717, "grad_norm": 53.5086784362793, "learning_rate": 2.1855736931906223e-06, "loss": 23.2839, "step": 7606 }, { "epoch": 0.7059860788863109, "grad_norm": 44.9589729309082, "learning_rate": 2.1843054414224705e-06, "loss": 23.5301, "step": 7607 }, { "epoch": 0.7060788863109049, "grad_norm": 38.31300735473633, "learning_rate": 2.1830374548855905e-06, "loss": 21.9511, "step": 7608 }, { "epoch": 0.7061716937354988, "grad_norm": 45.24998474121094, "learning_rate": 2.181769733699427e-06, "loss": 22.118, "step": 7609 }, { "epoch": 0.7062645011600928, "grad_norm": 48.420711517333984, "learning_rate": 2.1805022779833902e-06, "loss": 24.6336, "step": 7610 }, { "epoch": 0.7063573085846868, "grad_norm": 42.800315856933594, "learning_rate": 2.1792350878568756e-06, "loss": 25.5705, "step": 7611 }, { "epoch": 0.7064501160092808, "grad_norm": 38.76068878173828, "learning_rate": 2.1779681634392484e-06, "loss": 22.3319, "step": 7612 }, { "epoch": 0.7065429234338747, "grad_norm": 43.32973861694336, "learning_rate": 2.176701504849849e-06, "loss": 23.318, "step": 7613 }, { "epoch": 0.7066357308584686, "grad_norm": 49.6510009765625, "learning_rate": 2.1754351122079926e-06, "loss": 20.8068, "step": 7614 }, { "epoch": 0.7067285382830626, "grad_norm": 38.51468276977539, "learning_rate": 2.1741689856329745e-06, "loss": 21.6713, "step": 7615 }, { "epoch": 0.7068213457076566, "grad_norm": 43.08981704711914, "learning_rate": 2.1729031252440537e-06, "loss": 21.948, "step": 7616 }, { "epoch": 0.7069141531322506, "grad_norm": 40.938236236572266, "learning_rate": 2.1716375311604763e-06, "loss": 23.6269, "step": 7617 }, { "epoch": 0.7070069605568445, "grad_norm": 39.34926223754883, "learning_rate": 2.1703722035014567e-06, "loss": 22.7616, "step": 7618 }, { "epoch": 0.7070997679814385, "grad_norm": 105.7030258178711, "learning_rate": 2.169107142386186e-06, "loss": 22.5553, "step": 7619 }, { "epoch": 0.7071925754060325, "grad_norm": 43.65333938598633, "learning_rate": 2.167842347933826e-06, "loss": 22.5602, "step": 7620 }, { "epoch": 0.7072853828306265, "grad_norm": 39.679771423339844, "learning_rate": 2.1665778202635252e-06, "loss": 22.4661, "step": 7621 }, { "epoch": 0.7073781902552204, "grad_norm": 39.142547607421875, "learning_rate": 2.165313559494389e-06, "loss": 21.7993, "step": 7622 }, { "epoch": 0.7074709976798144, "grad_norm": 41.138755798339844, "learning_rate": 2.1640495657455153e-06, "loss": 22.8917, "step": 7623 }, { "epoch": 0.7075638051044083, "grad_norm": 76.9186782836914, "learning_rate": 2.162785839135965e-06, "loss": 23.5195, "step": 7624 }, { "epoch": 0.7076566125290024, "grad_norm": 43.76244354248047, "learning_rate": 2.16152237978478e-06, "loss": 22.9386, "step": 7625 }, { "epoch": 0.7077494199535963, "grad_norm": 41.032108306884766, "learning_rate": 2.1602591878109724e-06, "loss": 21.789, "step": 7626 }, { "epoch": 0.7078422273781902, "grad_norm": 38.75725555419922, "learning_rate": 2.158996263333537e-06, "loss": 21.357, "step": 7627 }, { "epoch": 0.7079350348027842, "grad_norm": 41.866050720214844, "learning_rate": 2.1577336064714304e-06, "loss": 24.9385, "step": 7628 }, { "epoch": 0.7080278422273782, "grad_norm": 37.32830047607422, "learning_rate": 2.156471217343598e-06, "loss": 22.9016, "step": 7629 }, { "epoch": 0.7081206496519722, "grad_norm": 40.571807861328125, "learning_rate": 2.1552090960689515e-06, "loss": 22.5028, "step": 7630 }, { "epoch": 0.7082134570765661, "grad_norm": 39.794071197509766, "learning_rate": 2.1539472427663788e-06, "loss": 25.2311, "step": 7631 }, { "epoch": 0.7083062645011601, "grad_norm": 45.54057312011719, "learning_rate": 2.1526856575547444e-06, "loss": 24.5557, "step": 7632 }, { "epoch": 0.708399071925754, "grad_norm": 42.21247863769531, "learning_rate": 2.151424340552884e-06, "loss": 21.7507, "step": 7633 }, { "epoch": 0.7084918793503481, "grad_norm": 35.1392822265625, "learning_rate": 2.1501632918796138e-06, "loss": 23.0301, "step": 7634 }, { "epoch": 0.708584686774942, "grad_norm": 39.829017639160156, "learning_rate": 2.1489025116537203e-06, "loss": 22.9411, "step": 7635 }, { "epoch": 0.708677494199536, "grad_norm": 41.04707717895508, "learning_rate": 2.1476419999939656e-06, "loss": 22.8912, "step": 7636 }, { "epoch": 0.7087703016241299, "grad_norm": 46.748291015625, "learning_rate": 2.1463817570190863e-06, "loss": 22.6174, "step": 7637 }, { "epoch": 0.7088631090487238, "grad_norm": 43.42934036254883, "learning_rate": 2.1451217828477945e-06, "loss": 25.0306, "step": 7638 }, { "epoch": 0.7089559164733179, "grad_norm": 39.186126708984375, "learning_rate": 2.1438620775987747e-06, "loss": 22.9207, "step": 7639 }, { "epoch": 0.7090487238979118, "grad_norm": 39.59721755981445, "learning_rate": 2.1426026413906908e-06, "loss": 23.6369, "step": 7640 }, { "epoch": 0.7091415313225058, "grad_norm": 45.35660171508789, "learning_rate": 2.1413434743421773e-06, "loss": 22.6962, "step": 7641 }, { "epoch": 0.7092343387470997, "grad_norm": 37.324188232421875, "learning_rate": 2.1400845765718455e-06, "loss": 22.1179, "step": 7642 }, { "epoch": 0.7093271461716938, "grad_norm": 40.07119369506836, "learning_rate": 2.1388259481982763e-06, "loss": 22.7941, "step": 7643 }, { "epoch": 0.7094199535962877, "grad_norm": 39.26460647583008, "learning_rate": 2.1375675893400373e-06, "loss": 26.3445, "step": 7644 }, { "epoch": 0.7095127610208817, "grad_norm": 42.75782775878906, "learning_rate": 2.136309500115654e-06, "loss": 23.8345, "step": 7645 }, { "epoch": 0.7096055684454756, "grad_norm": 42.44035339355469, "learning_rate": 2.13505168064364e-06, "loss": 25.6933, "step": 7646 }, { "epoch": 0.7096983758700696, "grad_norm": 44.95430374145508, "learning_rate": 2.133794131042478e-06, "loss": 22.4634, "step": 7647 }, { "epoch": 0.7097911832946636, "grad_norm": 42.63533401489258, "learning_rate": 2.132536851430626e-06, "loss": 22.3996, "step": 7648 }, { "epoch": 0.7098839907192576, "grad_norm": 44.996559143066406, "learning_rate": 2.1312798419265146e-06, "loss": 23.4644, "step": 7649 }, { "epoch": 0.7099767981438515, "grad_norm": 40.20848083496094, "learning_rate": 2.130023102648556e-06, "loss": 20.0391, "step": 7650 }, { "epoch": 0.7100696055684454, "grad_norm": 38.7659797668457, "learning_rate": 2.1287666337151248e-06, "loss": 23.5299, "step": 7651 }, { "epoch": 0.7101624129930394, "grad_norm": 35.722755432128906, "learning_rate": 2.1275104352445823e-06, "loss": 22.321, "step": 7652 }, { "epoch": 0.7102552204176334, "grad_norm": 41.132381439208984, "learning_rate": 2.1262545073552576e-06, "loss": 23.4228, "step": 7653 }, { "epoch": 0.7103480278422274, "grad_norm": 37.47875213623047, "learning_rate": 2.1249988501654567e-06, "loss": 21.5388, "step": 7654 }, { "epoch": 0.7104408352668213, "grad_norm": 49.35233688354492, "learning_rate": 2.1237434637934563e-06, "loss": 21.4346, "step": 7655 }, { "epoch": 0.7105336426914153, "grad_norm": 43.194053649902344, "learning_rate": 2.1224883483575166e-06, "loss": 25.7193, "step": 7656 }, { "epoch": 0.7106264501160093, "grad_norm": 42.216827392578125, "learning_rate": 2.1212335039758592e-06, "loss": 23.622, "step": 7657 }, { "epoch": 0.7107192575406033, "grad_norm": 43.69883728027344, "learning_rate": 2.119978930766692e-06, "loss": 22.2081, "step": 7658 }, { "epoch": 0.7108120649651972, "grad_norm": 44.87590026855469, "learning_rate": 2.118724628848192e-06, "loss": 22.9866, "step": 7659 }, { "epoch": 0.7109048723897912, "grad_norm": 40.036014556884766, "learning_rate": 2.11747059833851e-06, "loss": 23.7304, "step": 7660 }, { "epoch": 0.7109976798143851, "grad_norm": 43.38006591796875, "learning_rate": 2.1162168393557707e-06, "loss": 22.0633, "step": 7661 }, { "epoch": 0.7110904872389792, "grad_norm": 43.58487319946289, "learning_rate": 2.1149633520180813e-06, "loss": 24.3145, "step": 7662 }, { "epoch": 0.7111832946635731, "grad_norm": 52.07444763183594, "learning_rate": 2.11371013644351e-06, "loss": 22.6726, "step": 7663 }, { "epoch": 0.711276102088167, "grad_norm": 53.13688659667969, "learning_rate": 2.112457192750111e-06, "loss": 23.4365, "step": 7664 }, { "epoch": 0.711368909512761, "grad_norm": 43.202362060546875, "learning_rate": 2.111204521055907e-06, "loss": 23.5225, "step": 7665 }, { "epoch": 0.7114617169373549, "grad_norm": 38.81950759887695, "learning_rate": 2.109952121478897e-06, "loss": 23.13, "step": 7666 }, { "epoch": 0.711554524361949, "grad_norm": 41.07573699951172, "learning_rate": 2.1086999941370533e-06, "loss": 23.6657, "step": 7667 }, { "epoch": 0.7116473317865429, "grad_norm": 45.35695266723633, "learning_rate": 2.1074481391483233e-06, "loss": 24.9735, "step": 7668 }, { "epoch": 0.7117401392111369, "grad_norm": 46.386077880859375, "learning_rate": 2.1061965566306262e-06, "loss": 23.5292, "step": 7669 }, { "epoch": 0.7118329466357308, "grad_norm": 38.87629699707031, "learning_rate": 2.104945246701862e-06, "loss": 22.8112, "step": 7670 }, { "epoch": 0.7119257540603249, "grad_norm": 39.66481018066406, "learning_rate": 2.1036942094798996e-06, "loss": 21.5498, "step": 7671 }, { "epoch": 0.7120185614849188, "grad_norm": 42.26978302001953, "learning_rate": 2.102443445082583e-06, "loss": 21.2647, "step": 7672 }, { "epoch": 0.7121113689095128, "grad_norm": 41.80373764038086, "learning_rate": 2.101192953627731e-06, "loss": 21.5273, "step": 7673 }, { "epoch": 0.7122041763341067, "grad_norm": 41.09294509887695, "learning_rate": 2.099942735233136e-06, "loss": 24.658, "step": 7674 }, { "epoch": 0.7122969837587007, "grad_norm": 42.65986251831055, "learning_rate": 2.0986927900165657e-06, "loss": 24.0999, "step": 7675 }, { "epoch": 0.7123897911832947, "grad_norm": 42.59476852416992, "learning_rate": 2.0974431180957634e-06, "loss": 21.9606, "step": 7676 }, { "epoch": 0.7124825986078887, "grad_norm": 37.45216369628906, "learning_rate": 2.0961937195884437e-06, "loss": 21.7879, "step": 7677 }, { "epoch": 0.7125754060324826, "grad_norm": 40.766780853271484, "learning_rate": 2.094944594612297e-06, "loss": 22.446, "step": 7678 }, { "epoch": 0.7126682134570765, "grad_norm": 41.97855758666992, "learning_rate": 2.0936957432849875e-06, "loss": 22.3289, "step": 7679 }, { "epoch": 0.7127610208816705, "grad_norm": 70.27047729492188, "learning_rate": 2.0924471657241526e-06, "loss": 23.5623, "step": 7680 }, { "epoch": 0.7128538283062645, "grad_norm": 41.60215377807617, "learning_rate": 2.091198862047408e-06, "loss": 22.2293, "step": 7681 }, { "epoch": 0.7129466357308585, "grad_norm": 42.298072814941406, "learning_rate": 2.089950832372339e-06, "loss": 21.9569, "step": 7682 }, { "epoch": 0.7130394431554524, "grad_norm": 41.632850646972656, "learning_rate": 2.0887030768165073e-06, "loss": 23.4765, "step": 7683 }, { "epoch": 0.7131322505800464, "grad_norm": 43.553443908691406, "learning_rate": 2.087455595497448e-06, "loss": 22.3318, "step": 7684 }, { "epoch": 0.7132250580046404, "grad_norm": 42.11030960083008, "learning_rate": 2.0862083885326706e-06, "loss": 24.0594, "step": 7685 }, { "epoch": 0.7133178654292344, "grad_norm": 43.75117492675781, "learning_rate": 2.084961456039657e-06, "loss": 22.426, "step": 7686 }, { "epoch": 0.7134106728538283, "grad_norm": 47.14305114746094, "learning_rate": 2.083714798135869e-06, "loss": 23.6504, "step": 7687 }, { "epoch": 0.7135034802784223, "grad_norm": 38.131256103515625, "learning_rate": 2.0824684149387365e-06, "loss": 23.4184, "step": 7688 }, { "epoch": 0.7135962877030162, "grad_norm": 41.384456634521484, "learning_rate": 2.0812223065656663e-06, "loss": 24.8923, "step": 7689 }, { "epoch": 0.7136890951276103, "grad_norm": 39.120399475097656, "learning_rate": 2.0799764731340354e-06, "loss": 23.6702, "step": 7690 }, { "epoch": 0.7137819025522042, "grad_norm": 42.37550354003906, "learning_rate": 2.078730914761205e-06, "loss": 23.2906, "step": 7691 }, { "epoch": 0.7138747099767981, "grad_norm": 49.18587875366211, "learning_rate": 2.0774856315644955e-06, "loss": 22.787, "step": 7692 }, { "epoch": 0.7139675174013921, "grad_norm": 37.519168853759766, "learning_rate": 2.0762406236612153e-06, "loss": 23.0736, "step": 7693 }, { "epoch": 0.714060324825986, "grad_norm": 57.65970993041992, "learning_rate": 2.0749958911686384e-06, "loss": 22.4818, "step": 7694 }, { "epoch": 0.7141531322505801, "grad_norm": 46.527095794677734, "learning_rate": 2.0737514342040165e-06, "loss": 22.0637, "step": 7695 }, { "epoch": 0.714245939675174, "grad_norm": 40.68206787109375, "learning_rate": 2.072507252884572e-06, "loss": 21.8006, "step": 7696 }, { "epoch": 0.714338747099768, "grad_norm": 51.19852066040039, "learning_rate": 2.071263347327509e-06, "loss": 22.1189, "step": 7697 }, { "epoch": 0.7144315545243619, "grad_norm": 61.1267204284668, "learning_rate": 2.0700197176499927e-06, "loss": 24.2136, "step": 7698 }, { "epoch": 0.714524361948956, "grad_norm": 150.97752380371094, "learning_rate": 2.068776363969176e-06, "loss": 23.7448, "step": 7699 }, { "epoch": 0.7146171693735499, "grad_norm": 49.64712142944336, "learning_rate": 2.0675332864021767e-06, "loss": 22.6909, "step": 7700 }, { "epoch": 0.7147099767981439, "grad_norm": 45.89891815185547, "learning_rate": 2.0662904850660916e-06, "loss": 21.8407, "step": 7701 }, { "epoch": 0.7148027842227378, "grad_norm": 45.23023223876953, "learning_rate": 2.0650479600779872e-06, "loss": 23.6316, "step": 7702 }, { "epoch": 0.7148955916473317, "grad_norm": 46.514305114746094, "learning_rate": 2.063805711554907e-06, "loss": 21.0607, "step": 7703 }, { "epoch": 0.7149883990719258, "grad_norm": 38.56816482543945, "learning_rate": 2.0625637396138666e-06, "loss": 21.6336, "step": 7704 }, { "epoch": 0.7150812064965197, "grad_norm": 53.99733352661133, "learning_rate": 2.0613220443718606e-06, "loss": 23.0089, "step": 7705 }, { "epoch": 0.7151740139211137, "grad_norm": 47.634281158447266, "learning_rate": 2.0600806259458477e-06, "loss": 20.4362, "step": 7706 }, { "epoch": 0.7152668213457076, "grad_norm": 44.972164154052734, "learning_rate": 2.058839484452771e-06, "loss": 22.2721, "step": 7707 }, { "epoch": 0.7153596287703016, "grad_norm": 37.533748626708984, "learning_rate": 2.057598620009541e-06, "loss": 21.444, "step": 7708 }, { "epoch": 0.7154524361948956, "grad_norm": 38.590301513671875, "learning_rate": 2.0563580327330445e-06, "loss": 21.8384, "step": 7709 }, { "epoch": 0.7155452436194896, "grad_norm": 43.098426818847656, "learning_rate": 2.0551177227401397e-06, "loss": 24.8466, "step": 7710 }, { "epoch": 0.7156380510440835, "grad_norm": 39.17954635620117, "learning_rate": 2.053877690147666e-06, "loss": 22.6625, "step": 7711 }, { "epoch": 0.7157308584686775, "grad_norm": 41.59489822387695, "learning_rate": 2.0526379350724236e-06, "loss": 21.7522, "step": 7712 }, { "epoch": 0.7158236658932715, "grad_norm": 45.07548904418945, "learning_rate": 2.0513984576312007e-06, "loss": 22.2816, "step": 7713 }, { "epoch": 0.7159164733178655, "grad_norm": 44.7779655456543, "learning_rate": 2.05015925794075e-06, "loss": 22.3174, "step": 7714 }, { "epoch": 0.7160092807424594, "grad_norm": 47.57727813720703, "learning_rate": 2.0489203361178016e-06, "loss": 24.3739, "step": 7715 }, { "epoch": 0.7161020881670533, "grad_norm": 39.528770446777344, "learning_rate": 2.0476816922790575e-06, "loss": 21.8312, "step": 7716 }, { "epoch": 0.7161948955916473, "grad_norm": 35.58277130126953, "learning_rate": 2.0464433265411976e-06, "loss": 23.8225, "step": 7717 }, { "epoch": 0.7162877030162413, "grad_norm": 35.070899963378906, "learning_rate": 2.045205239020871e-06, "loss": 22.7173, "step": 7718 }, { "epoch": 0.7163805104408353, "grad_norm": 48.47041320800781, "learning_rate": 2.043967429834703e-06, "loss": 23.735, "step": 7719 }, { "epoch": 0.7164733178654292, "grad_norm": 69.64813232421875, "learning_rate": 2.042729899099291e-06, "loss": 25.1941, "step": 7720 }, { "epoch": 0.7165661252900232, "grad_norm": 41.892845153808594, "learning_rate": 2.0414926469312086e-06, "loss": 23.4227, "step": 7721 }, { "epoch": 0.7166589327146171, "grad_norm": 42.220054626464844, "learning_rate": 2.040255673446999e-06, "loss": 23.1222, "step": 7722 }, { "epoch": 0.7167517401392112, "grad_norm": 43.819679260253906, "learning_rate": 2.039018978763186e-06, "loss": 23.8953, "step": 7723 }, { "epoch": 0.7168445475638051, "grad_norm": 44.42707824707031, "learning_rate": 2.037782562996261e-06, "loss": 23.039, "step": 7724 }, { "epoch": 0.7169373549883991, "grad_norm": 47.03778839111328, "learning_rate": 2.036546426262692e-06, "loss": 23.2444, "step": 7725 }, { "epoch": 0.717030162412993, "grad_norm": 41.306339263916016, "learning_rate": 2.0353105686789185e-06, "loss": 22.5984, "step": 7726 }, { "epoch": 0.717122969837587, "grad_norm": 46.1240119934082, "learning_rate": 2.0340749903613555e-06, "loss": 22.3696, "step": 7727 }, { "epoch": 0.717215777262181, "grad_norm": 44.93548583984375, "learning_rate": 2.0328396914263925e-06, "loss": 24.7803, "step": 7728 }, { "epoch": 0.7173085846867749, "grad_norm": 46.889102935791016, "learning_rate": 2.0316046719903914e-06, "loss": 23.923, "step": 7729 }, { "epoch": 0.7174013921113689, "grad_norm": 38.7808952331543, "learning_rate": 2.0303699321696872e-06, "loss": 20.5989, "step": 7730 }, { "epoch": 0.7174941995359628, "grad_norm": 43.71791076660156, "learning_rate": 2.0291354720805902e-06, "loss": 23.7189, "step": 7731 }, { "epoch": 0.7175870069605569, "grad_norm": 36.48637008666992, "learning_rate": 2.027901291839382e-06, "loss": 21.8721, "step": 7732 }, { "epoch": 0.7176798143851508, "grad_norm": 43.2630615234375, "learning_rate": 2.026667391562318e-06, "loss": 22.3824, "step": 7733 }, { "epoch": 0.7177726218097448, "grad_norm": 41.891212463378906, "learning_rate": 2.025433771365632e-06, "loss": 21.4791, "step": 7734 }, { "epoch": 0.7178654292343387, "grad_norm": 42.595394134521484, "learning_rate": 2.0242004313655265e-06, "loss": 23.2958, "step": 7735 }, { "epoch": 0.7179582366589328, "grad_norm": 38.29051971435547, "learning_rate": 2.022967371678179e-06, "loss": 23.2376, "step": 7736 }, { "epoch": 0.7180510440835267, "grad_norm": 37.7364387512207, "learning_rate": 2.021734592419738e-06, "loss": 22.0999, "step": 7737 }, { "epoch": 0.7181438515081207, "grad_norm": 41.4488639831543, "learning_rate": 2.0205020937063343e-06, "loss": 23.4153, "step": 7738 }, { "epoch": 0.7182366589327146, "grad_norm": 43.91640853881836, "learning_rate": 2.0192698756540585e-06, "loss": 23.217, "step": 7739 }, { "epoch": 0.7183294663573085, "grad_norm": 36.504920959472656, "learning_rate": 2.0180379383789907e-06, "loss": 21.5861, "step": 7740 }, { "epoch": 0.7184222737819026, "grad_norm": 47.1670036315918, "learning_rate": 2.016806281997167e-06, "loss": 24.4913, "step": 7741 }, { "epoch": 0.7185150812064965, "grad_norm": 45.20200729370117, "learning_rate": 2.015574906624614e-06, "loss": 23.7193, "step": 7742 }, { "epoch": 0.7186078886310905, "grad_norm": 42.822235107421875, "learning_rate": 2.0143438123773206e-06, "loss": 21.1502, "step": 7743 }, { "epoch": 0.7187006960556844, "grad_norm": 49.799224853515625, "learning_rate": 2.0131129993712534e-06, "loss": 23.4647, "step": 7744 }, { "epoch": 0.7187935034802784, "grad_norm": 41.482364654541016, "learning_rate": 2.0118824677223503e-06, "loss": 23.4002, "step": 7745 }, { "epoch": 0.7188863109048724, "grad_norm": 44.961082458496094, "learning_rate": 2.0106522175465292e-06, "loss": 23.3143, "step": 7746 }, { "epoch": 0.7189791183294664, "grad_norm": 39.57782745361328, "learning_rate": 2.0094222489596697e-06, "loss": 23.7188, "step": 7747 }, { "epoch": 0.7190719257540603, "grad_norm": 51.47217559814453, "learning_rate": 2.008192562077637e-06, "loss": 24.3202, "step": 7748 }, { "epoch": 0.7191647331786543, "grad_norm": 39.34018325805664, "learning_rate": 2.0069631570162623e-06, "loss": 22.5373, "step": 7749 }, { "epoch": 0.7192575406032483, "grad_norm": 42.67317199707031, "learning_rate": 2.0057340338913533e-06, "loss": 24.4779, "step": 7750 }, { "epoch": 0.7193503480278423, "grad_norm": 44.40361022949219, "learning_rate": 2.0045051928186876e-06, "loss": 22.6914, "step": 7751 }, { "epoch": 0.7194431554524362, "grad_norm": 38.80586624145508, "learning_rate": 2.0032766339140246e-06, "loss": 22.8179, "step": 7752 }, { "epoch": 0.7195359628770301, "grad_norm": 38.99007797241211, "learning_rate": 2.0020483572930844e-06, "loss": 22.2269, "step": 7753 }, { "epoch": 0.7196287703016241, "grad_norm": 40.399349212646484, "learning_rate": 2.000820363071573e-06, "loss": 23.262, "step": 7754 }, { "epoch": 0.7197215777262181, "grad_norm": 41.01594924926758, "learning_rate": 1.999592651365162e-06, "loss": 21.7735, "step": 7755 }, { "epoch": 0.7198143851508121, "grad_norm": 47.280094146728516, "learning_rate": 1.9983652222894994e-06, "loss": 24.5253, "step": 7756 }, { "epoch": 0.719907192575406, "grad_norm": 44.96778869628906, "learning_rate": 1.997138075960204e-06, "loss": 23.8611, "step": 7757 }, { "epoch": 0.72, "grad_norm": 45.57372283935547, "learning_rate": 1.9959112124928743e-06, "loss": 23.6188, "step": 7758 }, { "epoch": 0.7200928074245939, "grad_norm": 40.80558395385742, "learning_rate": 1.9946846320030716e-06, "loss": 24.9058, "step": 7759 }, { "epoch": 0.720185614849188, "grad_norm": 38.22731018066406, "learning_rate": 1.9934583346063417e-06, "loss": 22.0877, "step": 7760 }, { "epoch": 0.7202784222737819, "grad_norm": 44.569923400878906, "learning_rate": 1.992232320418197e-06, "loss": 22.7891, "step": 7761 }, { "epoch": 0.7203712296983759, "grad_norm": 41.55550003051758, "learning_rate": 1.991006589554125e-06, "loss": 23.9372, "step": 7762 }, { "epoch": 0.7204640371229698, "grad_norm": 42.96210861206055, "learning_rate": 1.989781142129584e-06, "loss": 23.66, "step": 7763 }, { "epoch": 0.7205568445475639, "grad_norm": 40.736534118652344, "learning_rate": 1.988555978260013e-06, "loss": 24.5997, "step": 7764 }, { "epoch": 0.7206496519721578, "grad_norm": 48.42625045776367, "learning_rate": 1.9873310980608156e-06, "loss": 22.8355, "step": 7765 }, { "epoch": 0.7207424593967517, "grad_norm": 51.17233657836914, "learning_rate": 1.9861065016473743e-06, "loss": 22.7205, "step": 7766 }, { "epoch": 0.7208352668213457, "grad_norm": 40.58219909667969, "learning_rate": 1.9848821891350415e-06, "loss": 23.3849, "step": 7767 }, { "epoch": 0.7209280742459396, "grad_norm": 41.854190826416016, "learning_rate": 1.9836581606391452e-06, "loss": 24.0185, "step": 7768 }, { "epoch": 0.7210208816705337, "grad_norm": 42.969120025634766, "learning_rate": 1.9824344162749835e-06, "loss": 23.8103, "step": 7769 }, { "epoch": 0.7211136890951276, "grad_norm": 39.93701934814453, "learning_rate": 1.981210956157834e-06, "loss": 22.2765, "step": 7770 }, { "epoch": 0.7212064965197216, "grad_norm": 45.847068786621094, "learning_rate": 1.9799877804029414e-06, "loss": 21.9722, "step": 7771 }, { "epoch": 0.7212993039443155, "grad_norm": 47.553749084472656, "learning_rate": 1.978764889125526e-06, "loss": 22.9599, "step": 7772 }, { "epoch": 0.7213921113689095, "grad_norm": 40.8317756652832, "learning_rate": 1.9775422824407815e-06, "loss": 21.7794, "step": 7773 }, { "epoch": 0.7214849187935035, "grad_norm": 43.51388168334961, "learning_rate": 1.9763199604638717e-06, "loss": 23.1182, "step": 7774 }, { "epoch": 0.7215777262180975, "grad_norm": 53.074790954589844, "learning_rate": 1.9750979233099414e-06, "loss": 22.9059, "step": 7775 }, { "epoch": 0.7216705336426914, "grad_norm": 54.144954681396484, "learning_rate": 1.973876171094097e-06, "loss": 23.4359, "step": 7776 }, { "epoch": 0.7217633410672853, "grad_norm": 41.43882751464844, "learning_rate": 1.97265470393143e-06, "loss": 22.6317, "step": 7777 }, { "epoch": 0.7218561484918794, "grad_norm": 43.293853759765625, "learning_rate": 1.971433521936997e-06, "loss": 24.3969, "step": 7778 }, { "epoch": 0.7219489559164733, "grad_norm": 51.80973815917969, "learning_rate": 1.97021262522583e-06, "loss": 23.1506, "step": 7779 }, { "epoch": 0.7220417633410673, "grad_norm": 54.363956451416016, "learning_rate": 1.9689920139129336e-06, "loss": 22.3527, "step": 7780 }, { "epoch": 0.7221345707656612, "grad_norm": 38.052371978759766, "learning_rate": 1.967771688113291e-06, "loss": 21.4524, "step": 7781 }, { "epoch": 0.7222273781902552, "grad_norm": 44.788116455078125, "learning_rate": 1.966551647941847e-06, "loss": 22.2735, "step": 7782 }, { "epoch": 0.7223201856148492, "grad_norm": 55.61736297607422, "learning_rate": 1.965331893513531e-06, "loss": 24.696, "step": 7783 }, { "epoch": 0.7224129930394432, "grad_norm": 52.66632843017578, "learning_rate": 1.96411242494324e-06, "loss": 22.8684, "step": 7784 }, { "epoch": 0.7225058004640371, "grad_norm": 37.916648864746094, "learning_rate": 1.9628932423458437e-06, "loss": 22.7758, "step": 7785 }, { "epoch": 0.7225986078886311, "grad_norm": 41.522830963134766, "learning_rate": 1.9616743458361848e-06, "loss": 23.2002, "step": 7786 }, { "epoch": 0.722691415313225, "grad_norm": 48.44813537597656, "learning_rate": 1.960455735529086e-06, "loss": 22.0781, "step": 7787 }, { "epoch": 0.7227842227378191, "grad_norm": 44.821475982666016, "learning_rate": 1.9592374115393293e-06, "loss": 24.823, "step": 7788 }, { "epoch": 0.722877030162413, "grad_norm": 44.60807800292969, "learning_rate": 1.9580193739816837e-06, "loss": 24.5647, "step": 7789 }, { "epoch": 0.722969837587007, "grad_norm": 40.33831024169922, "learning_rate": 1.956801622970883e-06, "loss": 23.3063, "step": 7790 }, { "epoch": 0.7230626450116009, "grad_norm": 40.61172103881836, "learning_rate": 1.955584158621637e-06, "loss": 23.7731, "step": 7791 }, { "epoch": 0.723155452436195, "grad_norm": 43.93839645385742, "learning_rate": 1.9543669810486255e-06, "loss": 24.7049, "step": 7792 }, { "epoch": 0.7232482598607889, "grad_norm": 41.51211929321289, "learning_rate": 1.9531500903665084e-06, "loss": 22.4505, "step": 7793 }, { "epoch": 0.7233410672853828, "grad_norm": 48.91501235961914, "learning_rate": 1.951933486689907e-06, "loss": 24.2397, "step": 7794 }, { "epoch": 0.7234338747099768, "grad_norm": 46.14607238769531, "learning_rate": 1.950717170133428e-06, "loss": 24.5449, "step": 7795 }, { "epoch": 0.7235266821345707, "grad_norm": 53.67176055908203, "learning_rate": 1.949501140811643e-06, "loss": 25.907, "step": 7796 }, { "epoch": 0.7236194895591648, "grad_norm": 44.1884880065918, "learning_rate": 1.9482853988391e-06, "loss": 24.7372, "step": 7797 }, { "epoch": 0.7237122969837587, "grad_norm": 49.40996170043945, "learning_rate": 1.9470699443303147e-06, "loss": 22.3227, "step": 7798 }, { "epoch": 0.7238051044083527, "grad_norm": 41.79349899291992, "learning_rate": 1.9458547773997877e-06, "loss": 22.9565, "step": 7799 }, { "epoch": 0.7238979118329466, "grad_norm": 46.25222396850586, "learning_rate": 1.9446398981619757e-06, "loss": 23.6955, "step": 7800 }, { "epoch": 0.7239907192575405, "grad_norm": 44.716468811035156, "learning_rate": 1.9434253067313236e-06, "loss": 23.1105, "step": 7801 }, { "epoch": 0.7240835266821346, "grad_norm": 39.95166015625, "learning_rate": 1.9422110032222403e-06, "loss": 21.4911, "step": 7802 }, { "epoch": 0.7241763341067285, "grad_norm": 42.35878372192383, "learning_rate": 1.9409969877491108e-06, "loss": 24.3104, "step": 7803 }, { "epoch": 0.7242691415313225, "grad_norm": 51.34566879272461, "learning_rate": 1.939783260426291e-06, "loss": 22.0305, "step": 7804 }, { "epoch": 0.7243619489559164, "grad_norm": 44.80022430419922, "learning_rate": 1.9385698213681142e-06, "loss": 22.9194, "step": 7805 }, { "epoch": 0.7244547563805105, "grad_norm": 47.66455078125, "learning_rate": 1.937356670688878e-06, "loss": 22.1194, "step": 7806 }, { "epoch": 0.7245475638051044, "grad_norm": 46.12554931640625, "learning_rate": 1.9361438085028628e-06, "loss": 22.0605, "step": 7807 }, { "epoch": 0.7246403712296984, "grad_norm": 49.29154586791992, "learning_rate": 1.9349312349243153e-06, "loss": 25.0639, "step": 7808 }, { "epoch": 0.7247331786542923, "grad_norm": 72.67792510986328, "learning_rate": 1.9337189500674574e-06, "loss": 21.6253, "step": 7809 }, { "epoch": 0.7248259860788863, "grad_norm": 52.61027908325195, "learning_rate": 1.9325069540464824e-06, "loss": 25.7563, "step": 7810 }, { "epoch": 0.7249187935034803, "grad_norm": 45.41082000732422, "learning_rate": 1.931295246975556e-06, "loss": 23.6903, "step": 7811 }, { "epoch": 0.7250116009280743, "grad_norm": 45.260955810546875, "learning_rate": 1.9300838289688216e-06, "loss": 23.824, "step": 7812 }, { "epoch": 0.7251044083526682, "grad_norm": 47.82421875, "learning_rate": 1.92887270014039e-06, "loss": 21.3052, "step": 7813 }, { "epoch": 0.7251972157772621, "grad_norm": 57.66840362548828, "learning_rate": 1.9276618606043456e-06, "loss": 20.2585, "step": 7814 }, { "epoch": 0.7252900232018561, "grad_norm": 43.21269226074219, "learning_rate": 1.9264513104747474e-06, "loss": 22.8451, "step": 7815 }, { "epoch": 0.7253828306264501, "grad_norm": 44.45250701904297, "learning_rate": 1.9252410498656253e-06, "loss": 22.072, "step": 7816 }, { "epoch": 0.7254756380510441, "grad_norm": 40.69401931762695, "learning_rate": 1.924031078890982e-06, "loss": 22.5182, "step": 7817 }, { "epoch": 0.725568445475638, "grad_norm": 38.15959930419922, "learning_rate": 1.9228213976647964e-06, "loss": 23.2285, "step": 7818 }, { "epoch": 0.725661252900232, "grad_norm": 41.594608306884766, "learning_rate": 1.9216120063010157e-06, "loss": 23.504, "step": 7819 }, { "epoch": 0.725754060324826, "grad_norm": 36.569583892822266, "learning_rate": 1.920402904913562e-06, "loss": 22.9175, "step": 7820 }, { "epoch": 0.72584686774942, "grad_norm": 39.64033126831055, "learning_rate": 1.9191940936163277e-06, "loss": 23.9033, "step": 7821 }, { "epoch": 0.7259396751740139, "grad_norm": 42.859683990478516, "learning_rate": 1.9179855725231846e-06, "loss": 22.7518, "step": 7822 }, { "epoch": 0.7260324825986079, "grad_norm": 50.114925384521484, "learning_rate": 1.916777341747965e-06, "loss": 22.0542, "step": 7823 }, { "epoch": 0.7261252900232018, "grad_norm": 40.6969108581543, "learning_rate": 1.915569401404488e-06, "loss": 23.7622, "step": 7824 }, { "epoch": 0.7262180974477959, "grad_norm": 39.82087326049805, "learning_rate": 1.914361751606535e-06, "loss": 24.2651, "step": 7825 }, { "epoch": 0.7263109048723898, "grad_norm": 54.222774505615234, "learning_rate": 1.913154392467864e-06, "loss": 22.9485, "step": 7826 }, { "epoch": 0.7264037122969838, "grad_norm": 52.5023307800293, "learning_rate": 1.9119473241022043e-06, "loss": 24.5304, "step": 7827 }, { "epoch": 0.7264965197215777, "grad_norm": 43.91718673706055, "learning_rate": 1.910740546623263e-06, "loss": 23.3048, "step": 7828 }, { "epoch": 0.7265893271461717, "grad_norm": 44.179622650146484, "learning_rate": 1.9095340601447088e-06, "loss": 24.9986, "step": 7829 }, { "epoch": 0.7266821345707657, "grad_norm": 41.61439895629883, "learning_rate": 1.908327864780195e-06, "loss": 23.8829, "step": 7830 }, { "epoch": 0.7267749419953596, "grad_norm": 46.86958694458008, "learning_rate": 1.90712196064334e-06, "loss": 23.263, "step": 7831 }, { "epoch": 0.7268677494199536, "grad_norm": 45.12103271484375, "learning_rate": 1.9059163478477372e-06, "loss": 22.9535, "step": 7832 }, { "epoch": 0.7269605568445475, "grad_norm": 47.73809051513672, "learning_rate": 1.9047110265069506e-06, "loss": 22.4977, "step": 7833 }, { "epoch": 0.7270533642691416, "grad_norm": 45.55898666381836, "learning_rate": 1.9035059967345237e-06, "loss": 23.6862, "step": 7834 }, { "epoch": 0.7271461716937355, "grad_norm": 48.309852600097656, "learning_rate": 1.9023012586439599e-06, "loss": 22.5593, "step": 7835 }, { "epoch": 0.7272389791183295, "grad_norm": 45.5168571472168, "learning_rate": 1.9010968123487478e-06, "loss": 23.0649, "step": 7836 }, { "epoch": 0.7273317865429234, "grad_norm": 46.82072830200195, "learning_rate": 1.8998926579623416e-06, "loss": 21.7278, "step": 7837 }, { "epoch": 0.7274245939675174, "grad_norm": 39.28481674194336, "learning_rate": 1.8986887955981698e-06, "loss": 22.5622, "step": 7838 }, { "epoch": 0.7275174013921114, "grad_norm": 44.619964599609375, "learning_rate": 1.8974852253696307e-06, "loss": 23.1426, "step": 7839 }, { "epoch": 0.7276102088167054, "grad_norm": 51.15237808227539, "learning_rate": 1.8962819473901034e-06, "loss": 23.4426, "step": 7840 }, { "epoch": 0.7277030162412993, "grad_norm": 40.991668701171875, "learning_rate": 1.8950789617729266e-06, "loss": 20.8652, "step": 7841 }, { "epoch": 0.7277958236658932, "grad_norm": 70.81974792480469, "learning_rate": 1.8938762686314238e-06, "loss": 24.465, "step": 7842 }, { "epoch": 0.7278886310904873, "grad_norm": 132.54202270507812, "learning_rate": 1.892673868078883e-06, "loss": 22.9533, "step": 7843 }, { "epoch": 0.7279814385150812, "grad_norm": 44.811824798583984, "learning_rate": 1.8914717602285687e-06, "loss": 21.9841, "step": 7844 }, { "epoch": 0.7280742459396752, "grad_norm": 48.921653747558594, "learning_rate": 1.890269945193715e-06, "loss": 23.493, "step": 7845 }, { "epoch": 0.7281670533642691, "grad_norm": 44.297447204589844, "learning_rate": 1.889068423087531e-06, "loss": 22.6531, "step": 7846 }, { "epoch": 0.7282598607888631, "grad_norm": 42.85111618041992, "learning_rate": 1.8878671940231947e-06, "loss": 25.6635, "step": 7847 }, { "epoch": 0.7283526682134571, "grad_norm": 38.669193267822266, "learning_rate": 1.8866662581138646e-06, "loss": 22.5082, "step": 7848 }, { "epoch": 0.7284454756380511, "grad_norm": 42.67036056518555, "learning_rate": 1.8854656154726574e-06, "loss": 22.1396, "step": 7849 }, { "epoch": 0.728538283062645, "grad_norm": 40.805076599121094, "learning_rate": 1.8842652662126776e-06, "loss": 22.6106, "step": 7850 }, { "epoch": 0.728631090487239, "grad_norm": 45.69175720214844, "learning_rate": 1.883065210446992e-06, "loss": 23.2367, "step": 7851 }, { "epoch": 0.7287238979118329, "grad_norm": 40.66453552246094, "learning_rate": 1.881865448288644e-06, "loss": 25.6318, "step": 7852 }, { "epoch": 0.728816705336427, "grad_norm": 42.29513168334961, "learning_rate": 1.880665979850645e-06, "loss": 24.3919, "step": 7853 }, { "epoch": 0.7289095127610209, "grad_norm": 39.29762268066406, "learning_rate": 1.8794668052459863e-06, "loss": 22.2434, "step": 7854 }, { "epoch": 0.7290023201856148, "grad_norm": 42.077980041503906, "learning_rate": 1.878267924587625e-06, "loss": 21.8098, "step": 7855 }, { "epoch": 0.7290951276102088, "grad_norm": 42.37080383300781, "learning_rate": 1.877069337988493e-06, "loss": 22.6021, "step": 7856 }, { "epoch": 0.7291879350348028, "grad_norm": 38.5994873046875, "learning_rate": 1.8758710455614937e-06, "loss": 23.0398, "step": 7857 }, { "epoch": 0.7292807424593968, "grad_norm": 65.37903594970703, "learning_rate": 1.874673047419502e-06, "loss": 21.6385, "step": 7858 }, { "epoch": 0.7293735498839907, "grad_norm": 47.17138671875, "learning_rate": 1.873475343675369e-06, "loss": 23.886, "step": 7859 }, { "epoch": 0.7294663573085847, "grad_norm": 50.46756362915039, "learning_rate": 1.8722779344419139e-06, "loss": 21.851, "step": 7860 }, { "epoch": 0.7295591647331786, "grad_norm": 37.586177825927734, "learning_rate": 1.8710808198319297e-06, "loss": 22.4005, "step": 7861 }, { "epoch": 0.7296519721577727, "grad_norm": 39.51507568359375, "learning_rate": 1.8698839999581818e-06, "loss": 23.8246, "step": 7862 }, { "epoch": 0.7297447795823666, "grad_norm": 48.0449104309082, "learning_rate": 1.868687474933407e-06, "loss": 23.509, "step": 7863 }, { "epoch": 0.7298375870069606, "grad_norm": 38.475486755371094, "learning_rate": 1.8674912448703142e-06, "loss": 23.2366, "step": 7864 }, { "epoch": 0.7299303944315545, "grad_norm": 49.021820068359375, "learning_rate": 1.8662953098815873e-06, "loss": 23.0332, "step": 7865 }, { "epoch": 0.7300232018561484, "grad_norm": 44.548179626464844, "learning_rate": 1.8650996700798797e-06, "loss": 23.697, "step": 7866 }, { "epoch": 0.7301160092807425, "grad_norm": 61.040565490722656, "learning_rate": 1.8639043255778173e-06, "loss": 22.4213, "step": 7867 }, { "epoch": 0.7302088167053364, "grad_norm": 46.518218994140625, "learning_rate": 1.8627092764879968e-06, "loss": 20.8559, "step": 7868 }, { "epoch": 0.7303016241299304, "grad_norm": 48.515804290771484, "learning_rate": 1.8615145229229936e-06, "loss": 22.4816, "step": 7869 }, { "epoch": 0.7303944315545243, "grad_norm": 50.69367599487305, "learning_rate": 1.860320064995344e-06, "loss": 23.6964, "step": 7870 }, { "epoch": 0.7304872389791184, "grad_norm": 40.54956817626953, "learning_rate": 1.859125902817568e-06, "loss": 22.2289, "step": 7871 }, { "epoch": 0.7305800464037123, "grad_norm": 42.543399810791016, "learning_rate": 1.8579320365021508e-06, "loss": 25.4951, "step": 7872 }, { "epoch": 0.7306728538283063, "grad_norm": 41.34763717651367, "learning_rate": 1.8567384661615518e-06, "loss": 22.7767, "step": 7873 }, { "epoch": 0.7307656612529002, "grad_norm": 50.8846435546875, "learning_rate": 1.8555451919082001e-06, "loss": 23.1963, "step": 7874 }, { "epoch": 0.7308584686774942, "grad_norm": 49.60435104370117, "learning_rate": 1.8543522138545046e-06, "loss": 22.9289, "step": 7875 }, { "epoch": 0.7309512761020882, "grad_norm": 48.51580047607422, "learning_rate": 1.8531595321128337e-06, "loss": 25.0555, "step": 7876 }, { "epoch": 0.7310440835266822, "grad_norm": 42.133758544921875, "learning_rate": 1.8519671467955408e-06, "loss": 24.4447, "step": 7877 }, { "epoch": 0.7311368909512761, "grad_norm": 46.27880096435547, "learning_rate": 1.8507750580149436e-06, "loss": 25.6199, "step": 7878 }, { "epoch": 0.73122969837587, "grad_norm": 45.00960922241211, "learning_rate": 1.849583265883333e-06, "loss": 22.0573, "step": 7879 }, { "epoch": 0.731322505800464, "grad_norm": 48.08145523071289, "learning_rate": 1.848391770512974e-06, "loss": 23.9695, "step": 7880 }, { "epoch": 0.731415313225058, "grad_norm": 41.776187896728516, "learning_rate": 1.8472005720161013e-06, "loss": 22.1881, "step": 7881 }, { "epoch": 0.731508120649652, "grad_norm": 42.911521911621094, "learning_rate": 1.8460096705049218e-06, "loss": 22.7212, "step": 7882 }, { "epoch": 0.7316009280742459, "grad_norm": 40.33405685424805, "learning_rate": 1.8448190660916204e-06, "loss": 23.7953, "step": 7883 }, { "epoch": 0.7316937354988399, "grad_norm": 48.4317512512207, "learning_rate": 1.8436287588883416e-06, "loss": 23.4202, "step": 7884 }, { "epoch": 0.7317865429234339, "grad_norm": 43.13227081298828, "learning_rate": 1.8424387490072154e-06, "loss": 23.2668, "step": 7885 }, { "epoch": 0.7318793503480279, "grad_norm": 47.059391021728516, "learning_rate": 1.8412490365603358e-06, "loss": 21.0561, "step": 7886 }, { "epoch": 0.7319721577726218, "grad_norm": 48.89973831176758, "learning_rate": 1.84005962165977e-06, "loss": 23.935, "step": 7887 }, { "epoch": 0.7320649651972158, "grad_norm": 41.08107376098633, "learning_rate": 1.838870504417557e-06, "loss": 22.4286, "step": 7888 }, { "epoch": 0.7321577726218097, "grad_norm": 40.67061233520508, "learning_rate": 1.837681684945713e-06, "loss": 21.9942, "step": 7889 }, { "epoch": 0.7322505800464038, "grad_norm": 38.879886627197266, "learning_rate": 1.836493163356215e-06, "loss": 21.9839, "step": 7890 }, { "epoch": 0.7323433874709977, "grad_norm": 37.33763122558594, "learning_rate": 1.8353049397610244e-06, "loss": 22.4923, "step": 7891 }, { "epoch": 0.7324361948955916, "grad_norm": 42.7529296875, "learning_rate": 1.834117014272067e-06, "loss": 23.5854, "step": 7892 }, { "epoch": 0.7325290023201856, "grad_norm": 43.50611114501953, "learning_rate": 1.8329293870012422e-06, "loss": 23.1976, "step": 7893 }, { "epoch": 0.7326218097447795, "grad_norm": 35.989505767822266, "learning_rate": 1.8317420580604196e-06, "loss": 23.7685, "step": 7894 }, { "epoch": 0.7327146171693736, "grad_norm": 42.918270111083984, "learning_rate": 1.830555027561448e-06, "loss": 22.8519, "step": 7895 }, { "epoch": 0.7328074245939675, "grad_norm": 45.98215103149414, "learning_rate": 1.8293682956161357e-06, "loss": 23.7751, "step": 7896 }, { "epoch": 0.7329002320185615, "grad_norm": 43.66897201538086, "learning_rate": 1.8281818623362752e-06, "loss": 24.5781, "step": 7897 }, { "epoch": 0.7329930394431554, "grad_norm": 41.45905303955078, "learning_rate": 1.826995727833623e-06, "loss": 21.8902, "step": 7898 }, { "epoch": 0.7330858468677495, "grad_norm": 48.591392517089844, "learning_rate": 1.8258098922199108e-06, "loss": 22.1968, "step": 7899 }, { "epoch": 0.7331786542923434, "grad_norm": 46.38149642944336, "learning_rate": 1.8246243556068394e-06, "loss": 22.6287, "step": 7900 }, { "epoch": 0.7332714617169374, "grad_norm": 43.91429901123047, "learning_rate": 1.8234391181060862e-06, "loss": 22.9287, "step": 7901 }, { "epoch": 0.7333642691415313, "grad_norm": 45.090675354003906, "learning_rate": 1.8222541798292965e-06, "loss": 23.4157, "step": 7902 }, { "epoch": 0.7334570765661252, "grad_norm": 43.90468215942383, "learning_rate": 1.8210695408880885e-06, "loss": 23.3895, "step": 7903 }, { "epoch": 0.7335498839907193, "grad_norm": 43.150054931640625, "learning_rate": 1.8198852013940515e-06, "loss": 20.8508, "step": 7904 }, { "epoch": 0.7336426914153132, "grad_norm": 44.468055725097656, "learning_rate": 1.8187011614587464e-06, "loss": 22.2641, "step": 7905 }, { "epoch": 0.7337354988399072, "grad_norm": 41.29554748535156, "learning_rate": 1.8175174211937096e-06, "loss": 21.7431, "step": 7906 }, { "epoch": 0.7338283062645011, "grad_norm": 39.77127456665039, "learning_rate": 1.8163339807104452e-06, "loss": 22.6605, "step": 7907 }, { "epoch": 0.7339211136890951, "grad_norm": 46.72307586669922, "learning_rate": 1.8151508401204298e-06, "loss": 23.5933, "step": 7908 }, { "epoch": 0.7340139211136891, "grad_norm": 43.0804557800293, "learning_rate": 1.8139679995351122e-06, "loss": 23.3355, "step": 7909 }, { "epoch": 0.7341067285382831, "grad_norm": 42.178504943847656, "learning_rate": 1.8127854590659133e-06, "loss": 23.4959, "step": 7910 }, { "epoch": 0.734199535962877, "grad_norm": 45.42108917236328, "learning_rate": 1.8116032188242238e-06, "loss": 21.902, "step": 7911 }, { "epoch": 0.734292343387471, "grad_norm": 45.06502151489258, "learning_rate": 1.8104212789214109e-06, "loss": 22.6252, "step": 7912 }, { "epoch": 0.734385150812065, "grad_norm": 46.76778030395508, "learning_rate": 1.8092396394688083e-06, "loss": 21.9743, "step": 7913 }, { "epoch": 0.734477958236659, "grad_norm": 45.67961883544922, "learning_rate": 1.8080583005777241e-06, "loss": 23.4224, "step": 7914 }, { "epoch": 0.7345707656612529, "grad_norm": 46.2536735534668, "learning_rate": 1.806877262359437e-06, "loss": 25.0435, "step": 7915 }, { "epoch": 0.7346635730858468, "grad_norm": 50.492095947265625, "learning_rate": 1.8056965249251984e-06, "loss": 25.2299, "step": 7916 }, { "epoch": 0.7347563805104408, "grad_norm": 48.00199508666992, "learning_rate": 1.804516088386228e-06, "loss": 21.8706, "step": 7917 }, { "epoch": 0.7348491879350348, "grad_norm": 50.75858688354492, "learning_rate": 1.803335952853726e-06, "loss": 25.4236, "step": 7918 }, { "epoch": 0.7349419953596288, "grad_norm": 45.186885833740234, "learning_rate": 1.8021561184388509e-06, "loss": 24.3247, "step": 7919 }, { "epoch": 0.7350348027842227, "grad_norm": 43.11194610595703, "learning_rate": 1.800976585252745e-06, "loss": 24.7004, "step": 7920 }, { "epoch": 0.7351276102088167, "grad_norm": 44.65547561645508, "learning_rate": 1.799797353406516e-06, "loss": 23.0321, "step": 7921 }, { "epoch": 0.7352204176334107, "grad_norm": 46.31467819213867, "learning_rate": 1.7986184230112446e-06, "loss": 22.9703, "step": 7922 }, { "epoch": 0.7353132250580047, "grad_norm": 51.41649627685547, "learning_rate": 1.797439794177981e-06, "loss": 21.7777, "step": 7923 }, { "epoch": 0.7354060324825986, "grad_norm": 39.874664306640625, "learning_rate": 1.7962614670177548e-06, "loss": 24.2434, "step": 7924 }, { "epoch": 0.7354988399071926, "grad_norm": 47.085994720458984, "learning_rate": 1.7950834416415535e-06, "loss": 22.8582, "step": 7925 }, { "epoch": 0.7355916473317865, "grad_norm": 45.544132232666016, "learning_rate": 1.7939057181603504e-06, "loss": 21.8549, "step": 7926 }, { "epoch": 0.7356844547563806, "grad_norm": 38.86890411376953, "learning_rate": 1.7927282966850817e-06, "loss": 19.1764, "step": 7927 }, { "epoch": 0.7357772621809745, "grad_norm": 39.42053985595703, "learning_rate": 1.7915511773266576e-06, "loss": 22.3509, "step": 7928 }, { "epoch": 0.7358700696055684, "grad_norm": 46.18171310424805, "learning_rate": 1.7903743601959577e-06, "loss": 23.6462, "step": 7929 }, { "epoch": 0.7359628770301624, "grad_norm": 47.56087112426758, "learning_rate": 1.789197845403841e-06, "loss": 24.5276, "step": 7930 }, { "epoch": 0.7360556844547563, "grad_norm": 47.385597229003906, "learning_rate": 1.7880216330611256e-06, "loss": 23.5414, "step": 7931 }, { "epoch": 0.7361484918793504, "grad_norm": 36.43497848510742, "learning_rate": 1.7868457232786117e-06, "loss": 23.3471, "step": 7932 }, { "epoch": 0.7362412993039443, "grad_norm": 37.7968635559082, "learning_rate": 1.785670116167066e-06, "loss": 21.4075, "step": 7933 }, { "epoch": 0.7363341067285383, "grad_norm": 42.371559143066406, "learning_rate": 1.7844948118372278e-06, "loss": 23.2203, "step": 7934 }, { "epoch": 0.7364269141531322, "grad_norm": 36.55956268310547, "learning_rate": 1.7833198103998057e-06, "loss": 22.6011, "step": 7935 }, { "epoch": 0.7365197215777263, "grad_norm": 42.22533416748047, "learning_rate": 1.782145111965487e-06, "loss": 22.7966, "step": 7936 }, { "epoch": 0.7366125290023202, "grad_norm": 38.08241271972656, "learning_rate": 1.7809707166449187e-06, "loss": 23.6458, "step": 7937 }, { "epoch": 0.7367053364269142, "grad_norm": 38.423397064208984, "learning_rate": 1.7797966245487314e-06, "loss": 21.9186, "step": 7938 }, { "epoch": 0.7367981438515081, "grad_norm": 41.02607727050781, "learning_rate": 1.7786228357875184e-06, "loss": 21.5002, "step": 7939 }, { "epoch": 0.736890951276102, "grad_norm": 40.945167541503906, "learning_rate": 1.7774493504718492e-06, "loss": 21.128, "step": 7940 }, { "epoch": 0.7369837587006961, "grad_norm": 43.35567092895508, "learning_rate": 1.7762761687122609e-06, "loss": 22.5634, "step": 7941 }, { "epoch": 0.73707656612529, "grad_norm": 43.41945266723633, "learning_rate": 1.7751032906192694e-06, "loss": 24.0207, "step": 7942 }, { "epoch": 0.737169373549884, "grad_norm": 38.6530647277832, "learning_rate": 1.7739307163033497e-06, "loss": 21.8541, "step": 7943 }, { "epoch": 0.7372621809744779, "grad_norm": 45.79686737060547, "learning_rate": 1.7727584458749608e-06, "loss": 22.4929, "step": 7944 }, { "epoch": 0.7373549883990719, "grad_norm": 42.938167572021484, "learning_rate": 1.771586479444526e-06, "loss": 22.7688, "step": 7945 }, { "epoch": 0.7374477958236659, "grad_norm": 42.58140563964844, "learning_rate": 1.7704148171224406e-06, "loss": 22.9164, "step": 7946 }, { "epoch": 0.7375406032482599, "grad_norm": 47.9987907409668, "learning_rate": 1.7692434590190715e-06, "loss": 23.845, "step": 7947 }, { "epoch": 0.7376334106728538, "grad_norm": 46.3938102722168, "learning_rate": 1.7680724052447606e-06, "loss": 23.0621, "step": 7948 }, { "epoch": 0.7377262180974478, "grad_norm": 45.85064697265625, "learning_rate": 1.7669016559098169e-06, "loss": 23.2774, "step": 7949 }, { "epoch": 0.7378190255220418, "grad_norm": 43.61310577392578, "learning_rate": 1.7657312111245218e-06, "loss": 23.4985, "step": 7950 }, { "epoch": 0.7379118329466358, "grad_norm": 41.484375, "learning_rate": 1.7645610709991283e-06, "loss": 23.8793, "step": 7951 }, { "epoch": 0.7380046403712297, "grad_norm": 49.92266845703125, "learning_rate": 1.7633912356438588e-06, "loss": 22.3428, "step": 7952 }, { "epoch": 0.7380974477958236, "grad_norm": 49.319313049316406, "learning_rate": 1.7622217051689133e-06, "loss": 22.3393, "step": 7953 }, { "epoch": 0.7381902552204176, "grad_norm": 41.61479187011719, "learning_rate": 1.761052479684453e-06, "loss": 22.0859, "step": 7954 }, { "epoch": 0.7382830626450116, "grad_norm": 39.29310989379883, "learning_rate": 1.7598835593006207e-06, "loss": 21.3907, "step": 7955 }, { "epoch": 0.7383758700696056, "grad_norm": 48.24034118652344, "learning_rate": 1.7587149441275236e-06, "loss": 22.7908, "step": 7956 }, { "epoch": 0.7384686774941995, "grad_norm": 45.4600830078125, "learning_rate": 1.7575466342752424e-06, "loss": 20.9379, "step": 7957 }, { "epoch": 0.7385614849187935, "grad_norm": 46.652565002441406, "learning_rate": 1.7563786298538271e-06, "loss": 23.0455, "step": 7958 }, { "epoch": 0.7386542923433874, "grad_norm": 50.9702262878418, "learning_rate": 1.7552109309733067e-06, "loss": 22.2849, "step": 7959 }, { "epoch": 0.7387470997679815, "grad_norm": 43.60974884033203, "learning_rate": 1.7540435377436677e-06, "loss": 21.8103, "step": 7960 }, { "epoch": 0.7388399071925754, "grad_norm": 43.353084564208984, "learning_rate": 1.7528764502748814e-06, "loss": 21.9974, "step": 7961 }, { "epoch": 0.7389327146171694, "grad_norm": 40.85281753540039, "learning_rate": 1.751709668676882e-06, "loss": 23.3816, "step": 7962 }, { "epoch": 0.7390255220417633, "grad_norm": 38.75143051147461, "learning_rate": 1.7505431930595779e-06, "loss": 22.2188, "step": 7963 }, { "epoch": 0.7391183294663574, "grad_norm": 41.99207305908203, "learning_rate": 1.749377023532846e-06, "loss": 23.777, "step": 7964 }, { "epoch": 0.7392111368909513, "grad_norm": 41.173240661621094, "learning_rate": 1.748211160206542e-06, "loss": 23.8625, "step": 7965 }, { "epoch": 0.7393039443155452, "grad_norm": 39.4373893737793, "learning_rate": 1.7470456031904809e-06, "loss": 21.5513, "step": 7966 }, { "epoch": 0.7393967517401392, "grad_norm": 36.84370040893555, "learning_rate": 1.745880352594459e-06, "loss": 22.1505, "step": 7967 }, { "epoch": 0.7394895591647331, "grad_norm": 40.573341369628906, "learning_rate": 1.7447154085282398e-06, "loss": 21.843, "step": 7968 }, { "epoch": 0.7395823665893272, "grad_norm": 42.9443359375, "learning_rate": 1.743550771101557e-06, "loss": 22.3964, "step": 7969 }, { "epoch": 0.7396751740139211, "grad_norm": 42.51144790649414, "learning_rate": 1.7423864404241147e-06, "loss": 23.3073, "step": 7970 }, { "epoch": 0.7397679814385151, "grad_norm": 42.499237060546875, "learning_rate": 1.7412224166055957e-06, "loss": 22.2154, "step": 7971 }, { "epoch": 0.739860788863109, "grad_norm": 40.79459762573242, "learning_rate": 1.7400586997556411e-06, "loss": 24.8415, "step": 7972 }, { "epoch": 0.739953596287703, "grad_norm": 41.49909973144531, "learning_rate": 1.738895289983875e-06, "loss": 23.0211, "step": 7973 }, { "epoch": 0.740046403712297, "grad_norm": 45.91376495361328, "learning_rate": 1.7377321873998858e-06, "loss": 24.7746, "step": 7974 }, { "epoch": 0.740139211136891, "grad_norm": 41.41473388671875, "learning_rate": 1.736569392113235e-06, "loss": 23.0728, "step": 7975 }, { "epoch": 0.7402320185614849, "grad_norm": 48.65142059326172, "learning_rate": 1.735406904233453e-06, "loss": 21.7215, "step": 7976 }, { "epoch": 0.7403248259860788, "grad_norm": 40.83098602294922, "learning_rate": 1.7342447238700494e-06, "loss": 22.6667, "step": 7977 }, { "epoch": 0.7404176334106729, "grad_norm": 41.351627349853516, "learning_rate": 1.7330828511324905e-06, "loss": 22.9695, "step": 7978 }, { "epoch": 0.7405104408352668, "grad_norm": 46.75410842895508, "learning_rate": 1.731921286130227e-06, "loss": 23.9347, "step": 7979 }, { "epoch": 0.7406032482598608, "grad_norm": 47.77284240722656, "learning_rate": 1.7307600289726745e-06, "loss": 23.8925, "step": 7980 }, { "epoch": 0.7406960556844547, "grad_norm": 41.33888244628906, "learning_rate": 1.7295990797692197e-06, "loss": 23.0748, "step": 7981 }, { "epoch": 0.7407888631090487, "grad_norm": 42.31142807006836, "learning_rate": 1.7284384386292196e-06, "loss": 25.0007, "step": 7982 }, { "epoch": 0.7408816705336427, "grad_norm": 37.45339584350586, "learning_rate": 1.7272781056620092e-06, "loss": 22.4435, "step": 7983 }, { "epoch": 0.7409744779582367, "grad_norm": 44.42240905761719, "learning_rate": 1.7261180809768812e-06, "loss": 22.9617, "step": 7984 }, { "epoch": 0.7410672853828306, "grad_norm": 39.37672424316406, "learning_rate": 1.724958364683113e-06, "loss": 21.8759, "step": 7985 }, { "epoch": 0.7411600928074246, "grad_norm": 39.64585494995117, "learning_rate": 1.7237989568899444e-06, "loss": 22.6272, "step": 7986 }, { "epoch": 0.7412529002320185, "grad_norm": 49.11962127685547, "learning_rate": 1.7226398577065895e-06, "loss": 23.3018, "step": 7987 }, { "epoch": 0.7413457076566126, "grad_norm": 37.0425910949707, "learning_rate": 1.7214810672422321e-06, "loss": 20.7575, "step": 7988 }, { "epoch": 0.7414385150812065, "grad_norm": 40.61330032348633, "learning_rate": 1.7203225856060268e-06, "loss": 24.0609, "step": 7989 }, { "epoch": 0.7415313225058004, "grad_norm": 50.03195571899414, "learning_rate": 1.7191644129070989e-06, "loss": 22.8524, "step": 7990 }, { "epoch": 0.7416241299303944, "grad_norm": 39.68755340576172, "learning_rate": 1.7180065492545483e-06, "loss": 23.2534, "step": 7991 }, { "epoch": 0.7417169373549884, "grad_norm": 40.241756439208984, "learning_rate": 1.7168489947574407e-06, "loss": 24.079, "step": 7992 }, { "epoch": 0.7418097447795824, "grad_norm": 46.574554443359375, "learning_rate": 1.7156917495248154e-06, "loss": 21.2482, "step": 7993 }, { "epoch": 0.7419025522041763, "grad_norm": 47.73391342163086, "learning_rate": 1.714534813665682e-06, "loss": 24.2838, "step": 7994 }, { "epoch": 0.7419953596287703, "grad_norm": 41.41178894042969, "learning_rate": 1.7133781872890188e-06, "loss": 22.9449, "step": 7995 }, { "epoch": 0.7420881670533642, "grad_norm": 35.03352355957031, "learning_rate": 1.7122218705037803e-06, "loss": 24.3545, "step": 7996 }, { "epoch": 0.7421809744779583, "grad_norm": 41.041629791259766, "learning_rate": 1.7110658634188876e-06, "loss": 21.9429, "step": 7997 }, { "epoch": 0.7422737819025522, "grad_norm": 43.1103630065918, "learning_rate": 1.7099101661432326e-06, "loss": 23.2925, "step": 7998 }, { "epoch": 0.7423665893271462, "grad_norm": 48.104190826416016, "learning_rate": 1.7087547787856785e-06, "loss": 25.5962, "step": 7999 }, { "epoch": 0.7424593967517401, "grad_norm": 43.22578811645508, "learning_rate": 1.7075997014550645e-06, "loss": 22.7859, "step": 8000 }, { "epoch": 0.742552204176334, "grad_norm": 41.311065673828125, "learning_rate": 1.706444934260189e-06, "loss": 23.2616, "step": 8001 }, { "epoch": 0.7426450116009281, "grad_norm": 45.64255142211914, "learning_rate": 1.7052904773098333e-06, "loss": 21.9805, "step": 8002 }, { "epoch": 0.742737819025522, "grad_norm": 46.2102165222168, "learning_rate": 1.7041363307127422e-06, "loss": 24.7591, "step": 8003 }, { "epoch": 0.742830626450116, "grad_norm": 47.16775894165039, "learning_rate": 1.7029824945776346e-06, "loss": 23.45, "step": 8004 }, { "epoch": 0.7429234338747099, "grad_norm": 75.8892593383789, "learning_rate": 1.7018289690131962e-06, "loss": 24.0503, "step": 8005 }, { "epoch": 0.743016241299304, "grad_norm": 50.25883102416992, "learning_rate": 1.700675754128091e-06, "loss": 22.261, "step": 8006 }, { "epoch": 0.7431090487238979, "grad_norm": 48.9502067565918, "learning_rate": 1.6995228500309435e-06, "loss": 21.8889, "step": 8007 }, { "epoch": 0.7432018561484919, "grad_norm": 41.061954498291016, "learning_rate": 1.6983702568303578e-06, "loss": 25.234, "step": 8008 }, { "epoch": 0.7432946635730858, "grad_norm": 44.796241760253906, "learning_rate": 1.697217974634905e-06, "loss": 25.4337, "step": 8009 }, { "epoch": 0.7433874709976798, "grad_norm": 39.170433044433594, "learning_rate": 1.6960660035531256e-06, "loss": 22.939, "step": 8010 }, { "epoch": 0.7434802784222738, "grad_norm": 40.534481048583984, "learning_rate": 1.6949143436935323e-06, "loss": 22.7432, "step": 8011 }, { "epoch": 0.7435730858468678, "grad_norm": 39.18068313598633, "learning_rate": 1.6937629951646123e-06, "loss": 23.5795, "step": 8012 }, { "epoch": 0.7436658932714617, "grad_norm": 40.66992950439453, "learning_rate": 1.692611958074814e-06, "loss": 23.4723, "step": 8013 }, { "epoch": 0.7437587006960557, "grad_norm": 47.76400375366211, "learning_rate": 1.6914612325325663e-06, "loss": 21.8178, "step": 8014 }, { "epoch": 0.7438515081206497, "grad_norm": 42.32775115966797, "learning_rate": 1.6903108186462635e-06, "loss": 23.5085, "step": 8015 }, { "epoch": 0.7439443155452437, "grad_norm": 40.033424377441406, "learning_rate": 1.6891607165242718e-06, "loss": 21.5909, "step": 8016 }, { "epoch": 0.7440371229698376, "grad_norm": 43.41678237915039, "learning_rate": 1.6880109262749255e-06, "loss": 23.996, "step": 8017 }, { "epoch": 0.7441299303944315, "grad_norm": 41.68964767456055, "learning_rate": 1.6868614480065376e-06, "loss": 21.592, "step": 8018 }, { "epoch": 0.7442227378190255, "grad_norm": 40.87068176269531, "learning_rate": 1.685712281827379e-06, "loss": 23.7521, "step": 8019 }, { "epoch": 0.7443155452436195, "grad_norm": 40.44187927246094, "learning_rate": 1.6845634278457035e-06, "loss": 23.5679, "step": 8020 }, { "epoch": 0.7444083526682135, "grad_norm": 39.180259704589844, "learning_rate": 1.6834148861697285e-06, "loss": 22.7365, "step": 8021 }, { "epoch": 0.7445011600928074, "grad_norm": 45.376617431640625, "learning_rate": 1.6822666569076434e-06, "loss": 23.9412, "step": 8022 }, { "epoch": 0.7445939675174014, "grad_norm": 41.70014953613281, "learning_rate": 1.6811187401676093e-06, "loss": 23.1376, "step": 8023 }, { "epoch": 0.7446867749419953, "grad_norm": 43.931034088134766, "learning_rate": 1.679971136057756e-06, "loss": 23.6351, "step": 8024 }, { "epoch": 0.7447795823665894, "grad_norm": 41.3137092590332, "learning_rate": 1.6788238446861837e-06, "loss": 22.44, "step": 8025 }, { "epoch": 0.7448723897911833, "grad_norm": 43.291175842285156, "learning_rate": 1.6776768661609693e-06, "loss": 23.6178, "step": 8026 }, { "epoch": 0.7449651972157773, "grad_norm": 42.8687858581543, "learning_rate": 1.6765302005901484e-06, "loss": 21.4491, "step": 8027 }, { "epoch": 0.7450580046403712, "grad_norm": 39.94051742553711, "learning_rate": 1.6753838480817397e-06, "loss": 20.3117, "step": 8028 }, { "epoch": 0.7451508120649653, "grad_norm": 40.060630798339844, "learning_rate": 1.6742378087437239e-06, "loss": 22.6385, "step": 8029 }, { "epoch": 0.7452436194895592, "grad_norm": 44.08261489868164, "learning_rate": 1.6730920826840564e-06, "loss": 24.0017, "step": 8030 }, { "epoch": 0.7453364269141531, "grad_norm": 37.87934112548828, "learning_rate": 1.6719466700106586e-06, "loss": 21.707, "step": 8031 }, { "epoch": 0.7454292343387471, "grad_norm": 39.97831344604492, "learning_rate": 1.6708015708314295e-06, "loss": 22.674, "step": 8032 }, { "epoch": 0.745522041763341, "grad_norm": 41.576412200927734, "learning_rate": 1.6696567852542328e-06, "loss": 23.6749, "step": 8033 }, { "epoch": 0.7456148491879351, "grad_norm": 47.51845932006836, "learning_rate": 1.6685123133869046e-06, "loss": 22.8914, "step": 8034 }, { "epoch": 0.745707656612529, "grad_norm": 40.25841522216797, "learning_rate": 1.6673681553372505e-06, "loss": 22.6285, "step": 8035 }, { "epoch": 0.745800464037123, "grad_norm": 56.14592361450195, "learning_rate": 1.6662243112130483e-06, "loss": 22.8321, "step": 8036 }, { "epoch": 0.7458932714617169, "grad_norm": 43.840511322021484, "learning_rate": 1.6650807811220433e-06, "loss": 22.1785, "step": 8037 }, { "epoch": 0.7459860788863109, "grad_norm": 53.8661003112793, "learning_rate": 1.6639375651719558e-06, "loss": 22.8326, "step": 8038 }, { "epoch": 0.7460788863109049, "grad_norm": 40.42652130126953, "learning_rate": 1.6627946634704722e-06, "loss": 21.2939, "step": 8039 }, { "epoch": 0.7461716937354989, "grad_norm": 44.669525146484375, "learning_rate": 1.661652076125252e-06, "loss": 20.8062, "step": 8040 }, { "epoch": 0.7462645011600928, "grad_norm": 40.61602783203125, "learning_rate": 1.6605098032439232e-06, "loss": 23.779, "step": 8041 }, { "epoch": 0.7463573085846867, "grad_norm": 45.41217041015625, "learning_rate": 1.6593678449340833e-06, "loss": 21.6512, "step": 8042 }, { "epoch": 0.7464501160092808, "grad_norm": 45.99013137817383, "learning_rate": 1.658226201303305e-06, "loss": 23.2777, "step": 8043 }, { "epoch": 0.7465429234338747, "grad_norm": 41.89171600341797, "learning_rate": 1.6570848724591276e-06, "loss": 21.3848, "step": 8044 }, { "epoch": 0.7466357308584687, "grad_norm": 43.40046691894531, "learning_rate": 1.6559438585090608e-06, "loss": 23.6965, "step": 8045 }, { "epoch": 0.7467285382830626, "grad_norm": 46.68951416015625, "learning_rate": 1.6548031595605829e-06, "loss": 23.5134, "step": 8046 }, { "epoch": 0.7468213457076566, "grad_norm": 44.679443359375, "learning_rate": 1.653662775721151e-06, "loss": 23.1855, "step": 8047 }, { "epoch": 0.7469141531322506, "grad_norm": 43.62125778198242, "learning_rate": 1.6525227070981787e-06, "loss": 21.4701, "step": 8048 }, { "epoch": 0.7470069605568446, "grad_norm": 40.55266189575195, "learning_rate": 1.6513829537990628e-06, "loss": 23.7445, "step": 8049 }, { "epoch": 0.7470997679814385, "grad_norm": 45.4840087890625, "learning_rate": 1.6502435159311642e-06, "loss": 21.3529, "step": 8050 }, { "epoch": 0.7471925754060325, "grad_norm": 43.52858352661133, "learning_rate": 1.6491043936018141e-06, "loss": 24.7851, "step": 8051 }, { "epoch": 0.7472853828306264, "grad_norm": 40.299415588378906, "learning_rate": 1.6479655869183142e-06, "loss": 25.395, "step": 8052 }, { "epoch": 0.7473781902552205, "grad_norm": 48.166099548339844, "learning_rate": 1.6468270959879413e-06, "loss": 22.3233, "step": 8053 }, { "epoch": 0.7474709976798144, "grad_norm": 43.59721755981445, "learning_rate": 1.6456889209179322e-06, "loss": 21.912, "step": 8054 }, { "epoch": 0.7475638051044083, "grad_norm": 42.92112350463867, "learning_rate": 1.6445510618155048e-06, "loss": 22.5597, "step": 8055 }, { "epoch": 0.7476566125290023, "grad_norm": 42.52131652832031, "learning_rate": 1.643413518787841e-06, "loss": 25.485, "step": 8056 }, { "epoch": 0.7477494199535963, "grad_norm": 41.232120513916016, "learning_rate": 1.6422762919420942e-06, "loss": 22.7443, "step": 8057 }, { "epoch": 0.7478422273781903, "grad_norm": 43.7713623046875, "learning_rate": 1.6411393813853893e-06, "loss": 22.3854, "step": 8058 }, { "epoch": 0.7479350348027842, "grad_norm": 44.23508834838867, "learning_rate": 1.6400027872248193e-06, "loss": 24.1563, "step": 8059 }, { "epoch": 0.7480278422273782, "grad_norm": 43.52193069458008, "learning_rate": 1.6388665095674471e-06, "loss": 21.5921, "step": 8060 }, { "epoch": 0.7481206496519721, "grad_norm": 50.898033142089844, "learning_rate": 1.6377305485203127e-06, "loss": 23.1837, "step": 8061 }, { "epoch": 0.7482134570765662, "grad_norm": 42.480567932128906, "learning_rate": 1.6365949041904133e-06, "loss": 22.9574, "step": 8062 }, { "epoch": 0.7483062645011601, "grad_norm": 41.31928253173828, "learning_rate": 1.6354595766847287e-06, "loss": 22.1497, "step": 8063 }, { "epoch": 0.7483990719257541, "grad_norm": 48.8806266784668, "learning_rate": 1.6343245661102031e-06, "loss": 23.3016, "step": 8064 }, { "epoch": 0.748491879350348, "grad_norm": 43.33990478515625, "learning_rate": 1.6331898725737505e-06, "loss": 23.9681, "step": 8065 }, { "epoch": 0.7485846867749419, "grad_norm": 44.86664581298828, "learning_rate": 1.6320554961822554e-06, "loss": 24.9541, "step": 8066 }, { "epoch": 0.748677494199536, "grad_norm": 45.30389404296875, "learning_rate": 1.630921437042578e-06, "loss": 24.3814, "step": 8067 }, { "epoch": 0.7487703016241299, "grad_norm": 39.35531234741211, "learning_rate": 1.6297876952615367e-06, "loss": 21.3687, "step": 8068 }, { "epoch": 0.7488631090487239, "grad_norm": 39.19651794433594, "learning_rate": 1.6286542709459318e-06, "loss": 20.697, "step": 8069 }, { "epoch": 0.7489559164733178, "grad_norm": 43.91836166381836, "learning_rate": 1.6275211642025285e-06, "loss": 24.6339, "step": 8070 }, { "epoch": 0.7490487238979119, "grad_norm": 57.47885513305664, "learning_rate": 1.6263883751380616e-06, "loss": 21.2826, "step": 8071 }, { "epoch": 0.7491415313225058, "grad_norm": 40.87251281738281, "learning_rate": 1.6252559038592359e-06, "loss": 22.4541, "step": 8072 }, { "epoch": 0.7492343387470998, "grad_norm": 45.751121520996094, "learning_rate": 1.624123750472732e-06, "loss": 24.8485, "step": 8073 }, { "epoch": 0.7493271461716937, "grad_norm": 41.279117584228516, "learning_rate": 1.6229919150851892e-06, "loss": 22.6946, "step": 8074 }, { "epoch": 0.7494199535962877, "grad_norm": 39.05118179321289, "learning_rate": 1.6218603978032283e-06, "loss": 23.2986, "step": 8075 }, { "epoch": 0.7495127610208817, "grad_norm": 42.505615234375, "learning_rate": 1.620729198733434e-06, "loss": 24.9404, "step": 8076 }, { "epoch": 0.7496055684454757, "grad_norm": 45.73093032836914, "learning_rate": 1.6195983179823622e-06, "loss": 21.9162, "step": 8077 }, { "epoch": 0.7496983758700696, "grad_norm": 40.70046615600586, "learning_rate": 1.6184677556565376e-06, "loss": 24.683, "step": 8078 }, { "epoch": 0.7497911832946635, "grad_norm": 48.762718200683594, "learning_rate": 1.6173375118624595e-06, "loss": 22.9327, "step": 8079 }, { "epoch": 0.7498839907192575, "grad_norm": 57.15214538574219, "learning_rate": 1.6162075867065924e-06, "loss": 22.7493, "step": 8080 }, { "epoch": 0.7499767981438515, "grad_norm": 40.530120849609375, "learning_rate": 1.615077980295372e-06, "loss": 21.7243, "step": 8081 }, { "epoch": 0.7500696055684455, "grad_norm": 39.395896911621094, "learning_rate": 1.6139486927352048e-06, "loss": 24.2337, "step": 8082 }, { "epoch": 0.7501624129930394, "grad_norm": 43.87650680541992, "learning_rate": 1.6128197241324667e-06, "loss": 22.5762, "step": 8083 }, { "epoch": 0.7502552204176334, "grad_norm": 41.904762268066406, "learning_rate": 1.6116910745935016e-06, "loss": 24.4352, "step": 8084 }, { "epoch": 0.7503480278422274, "grad_norm": 46.35160827636719, "learning_rate": 1.6105627442246292e-06, "loss": 24.0507, "step": 8085 }, { "epoch": 0.7504408352668214, "grad_norm": 45.569488525390625, "learning_rate": 1.6094347331321336e-06, "loss": 23.7313, "step": 8086 }, { "epoch": 0.7505336426914153, "grad_norm": 45.753517150878906, "learning_rate": 1.6083070414222706e-06, "loss": 22.5574, "step": 8087 }, { "epoch": 0.7506264501160093, "grad_norm": 43.90756607055664, "learning_rate": 1.6071796692012663e-06, "loss": 21.1927, "step": 8088 }, { "epoch": 0.7507192575406032, "grad_norm": 46.216888427734375, "learning_rate": 1.6060526165753137e-06, "loss": 22.3503, "step": 8089 }, { "epoch": 0.7508120649651973, "grad_norm": 42.77340316772461, "learning_rate": 1.6049258836505821e-06, "loss": 24.3973, "step": 8090 }, { "epoch": 0.7509048723897912, "grad_norm": 42.86940383911133, "learning_rate": 1.6037994705332055e-06, "loss": 21.0868, "step": 8091 }, { "epoch": 0.7509976798143851, "grad_norm": 40.59014129638672, "learning_rate": 1.6026733773292892e-06, "loss": 21.5049, "step": 8092 }, { "epoch": 0.7510904872389791, "grad_norm": 46.33759307861328, "learning_rate": 1.6015476041449074e-06, "loss": 22.4003, "step": 8093 }, { "epoch": 0.751183294663573, "grad_norm": 49.81652069091797, "learning_rate": 1.6004221510861057e-06, "loss": 21.3716, "step": 8094 }, { "epoch": 0.7512761020881671, "grad_norm": 46.99300003051758, "learning_rate": 1.5992970182588974e-06, "loss": 20.9814, "step": 8095 }, { "epoch": 0.751368909512761, "grad_norm": 36.0236701965332, "learning_rate": 1.5981722057692716e-06, "loss": 22.9709, "step": 8096 }, { "epoch": 0.751461716937355, "grad_norm": 45.508331298828125, "learning_rate": 1.5970477137231771e-06, "loss": 23.9107, "step": 8097 }, { "epoch": 0.7515545243619489, "grad_norm": 41.02193069458008, "learning_rate": 1.5959235422265422e-06, "loss": 21.7141, "step": 8098 }, { "epoch": 0.751647331786543, "grad_norm": 50.13432312011719, "learning_rate": 1.5947996913852598e-06, "loss": 24.0318, "step": 8099 }, { "epoch": 0.7517401392111369, "grad_norm": 37.33250045776367, "learning_rate": 1.5936761613051937e-06, "loss": 21.7732, "step": 8100 }, { "epoch": 0.7518329466357309, "grad_norm": 45.51816940307617, "learning_rate": 1.5925529520921763e-06, "loss": 21.86, "step": 8101 }, { "epoch": 0.7519257540603248, "grad_norm": 50.058319091796875, "learning_rate": 1.5914300638520152e-06, "loss": 25.6535, "step": 8102 }, { "epoch": 0.7520185614849187, "grad_norm": 51.95301055908203, "learning_rate": 1.5903074966904775e-06, "loss": 21.8894, "step": 8103 }, { "epoch": 0.7521113689095128, "grad_norm": 48.36713790893555, "learning_rate": 1.589185250713311e-06, "loss": 22.7795, "step": 8104 }, { "epoch": 0.7522041763341067, "grad_norm": 40.343719482421875, "learning_rate": 1.5880633260262274e-06, "loss": 22.6224, "step": 8105 }, { "epoch": 0.7522969837587007, "grad_norm": 43.4578742980957, "learning_rate": 1.5869417227349077e-06, "loss": 24.2581, "step": 8106 }, { "epoch": 0.7523897911832946, "grad_norm": 42.97850036621094, "learning_rate": 1.585820440945004e-06, "loss": 22.6972, "step": 8107 }, { "epoch": 0.7524825986078887, "grad_norm": 45.96971130371094, "learning_rate": 1.5846994807621423e-06, "loss": 23.6744, "step": 8108 }, { "epoch": 0.7525754060324826, "grad_norm": 41.499027252197266, "learning_rate": 1.583578842291908e-06, "loss": 22.3004, "step": 8109 }, { "epoch": 0.7526682134570766, "grad_norm": 45.358821868896484, "learning_rate": 1.5824585256398668e-06, "loss": 22.139, "step": 8110 }, { "epoch": 0.7527610208816705, "grad_norm": 47.16853332519531, "learning_rate": 1.5813385309115487e-06, "loss": 22.9587, "step": 8111 }, { "epoch": 0.7528538283062645, "grad_norm": 51.439842224121094, "learning_rate": 1.580218858212454e-06, "loss": 23.1923, "step": 8112 }, { "epoch": 0.7529466357308585, "grad_norm": 47.560768127441406, "learning_rate": 1.5790995076480508e-06, "loss": 24.2899, "step": 8113 }, { "epoch": 0.7530394431554525, "grad_norm": 44.722354888916016, "learning_rate": 1.5779804793237851e-06, "loss": 23.0637, "step": 8114 }, { "epoch": 0.7531322505800464, "grad_norm": 41.57172393798828, "learning_rate": 1.5768617733450596e-06, "loss": 22.5324, "step": 8115 }, { "epoch": 0.7532250580046403, "grad_norm": 51.28605651855469, "learning_rate": 1.575743389817258e-06, "loss": 22.73, "step": 8116 }, { "epoch": 0.7533178654292343, "grad_norm": 45.083431243896484, "learning_rate": 1.574625328845728e-06, "loss": 22.4086, "step": 8117 }, { "epoch": 0.7534106728538283, "grad_norm": 39.62190246582031, "learning_rate": 1.5735075905357882e-06, "loss": 23.8904, "step": 8118 }, { "epoch": 0.7535034802784223, "grad_norm": 49.17794418334961, "learning_rate": 1.572390174992725e-06, "loss": 22.4921, "step": 8119 }, { "epoch": 0.7535962877030162, "grad_norm": 51.91410446166992, "learning_rate": 1.5712730823218004e-06, "loss": 24.5259, "step": 8120 }, { "epoch": 0.7536890951276102, "grad_norm": 45.41508483886719, "learning_rate": 1.570156312628236e-06, "loss": 21.9196, "step": 8121 }, { "epoch": 0.7537819025522042, "grad_norm": 44.55122756958008, "learning_rate": 1.569039866017233e-06, "loss": 22.7096, "step": 8122 }, { "epoch": 0.7538747099767982, "grad_norm": 46.1175537109375, "learning_rate": 1.5679237425939564e-06, "loss": 23.2027, "step": 8123 }, { "epoch": 0.7539675174013921, "grad_norm": 41.960506439208984, "learning_rate": 1.5668079424635424e-06, "loss": 25.7966, "step": 8124 }, { "epoch": 0.7540603248259861, "grad_norm": 45.446128845214844, "learning_rate": 1.5656924657310947e-06, "loss": 25.4706, "step": 8125 }, { "epoch": 0.75415313225058, "grad_norm": 43.518699645996094, "learning_rate": 1.5645773125016911e-06, "loss": 21.8563, "step": 8126 }, { "epoch": 0.7542459396751741, "grad_norm": 49.59709167480469, "learning_rate": 1.5634624828803758e-06, "loss": 23.2454, "step": 8127 }, { "epoch": 0.754338747099768, "grad_norm": 48.919029235839844, "learning_rate": 1.5623479769721618e-06, "loss": 23.997, "step": 8128 }, { "epoch": 0.754431554524362, "grad_norm": 46.0756950378418, "learning_rate": 1.5612337948820334e-06, "loss": 22.9653, "step": 8129 }, { "epoch": 0.7545243619489559, "grad_norm": 48.81376266479492, "learning_rate": 1.5601199367149432e-06, "loss": 22.9571, "step": 8130 }, { "epoch": 0.7546171693735498, "grad_norm": 52.97751235961914, "learning_rate": 1.559006402575814e-06, "loss": 23.9919, "step": 8131 }, { "epoch": 0.7547099767981439, "grad_norm": 57.5828857421875, "learning_rate": 1.5578931925695367e-06, "loss": 23.3883, "step": 8132 }, { "epoch": 0.7548027842227378, "grad_norm": 42.10822677612305, "learning_rate": 1.5567803068009757e-06, "loss": 23.4992, "step": 8133 }, { "epoch": 0.7548955916473318, "grad_norm": 50.01456069946289, "learning_rate": 1.5556677453749603e-06, "loss": 23.1487, "step": 8134 }, { "epoch": 0.7549883990719257, "grad_norm": 54.089111328125, "learning_rate": 1.554555508396291e-06, "loss": 22.7204, "step": 8135 }, { "epoch": 0.7550812064965198, "grad_norm": 42.75338363647461, "learning_rate": 1.5534435959697363e-06, "loss": 21.6567, "step": 8136 }, { "epoch": 0.7551740139211137, "grad_norm": 45.06753158569336, "learning_rate": 1.5523320082000398e-06, "loss": 22.8297, "step": 8137 }, { "epoch": 0.7552668213457077, "grad_norm": 49.754417419433594, "learning_rate": 1.5512207451919049e-06, "loss": 23.1926, "step": 8138 }, { "epoch": 0.7553596287703016, "grad_norm": 45.88603210449219, "learning_rate": 1.5501098070500136e-06, "loss": 24.0219, "step": 8139 }, { "epoch": 0.7554524361948955, "grad_norm": 42.618408203125, "learning_rate": 1.5489991938790123e-06, "loss": 21.458, "step": 8140 }, { "epoch": 0.7555452436194896, "grad_norm": 59.39434814453125, "learning_rate": 1.5478889057835179e-06, "loss": 24.5121, "step": 8141 }, { "epoch": 0.7556380510440835, "grad_norm": 50.67658996582031, "learning_rate": 1.5467789428681145e-06, "loss": 22.9852, "step": 8142 }, { "epoch": 0.7557308584686775, "grad_norm": 37.586978912353516, "learning_rate": 1.5456693052373634e-06, "loss": 22.3858, "step": 8143 }, { "epoch": 0.7558236658932714, "grad_norm": 50.10877990722656, "learning_rate": 1.5445599929957827e-06, "loss": 22.7914, "step": 8144 }, { "epoch": 0.7559164733178654, "grad_norm": 51.90781021118164, "learning_rate": 1.5434510062478724e-06, "loss": 21.5046, "step": 8145 }, { "epoch": 0.7560092807424594, "grad_norm": 41.59897232055664, "learning_rate": 1.542342345098094e-06, "loss": 23.8132, "step": 8146 }, { "epoch": 0.7561020881670534, "grad_norm": 45.78547286987305, "learning_rate": 1.5412340096508798e-06, "loss": 23.0747, "step": 8147 }, { "epoch": 0.7561948955916473, "grad_norm": 42.01878356933594, "learning_rate": 1.5401260000106321e-06, "loss": 23.9421, "step": 8148 }, { "epoch": 0.7562877030162413, "grad_norm": 40.15046691894531, "learning_rate": 1.539018316281727e-06, "loss": 22.7849, "step": 8149 }, { "epoch": 0.7563805104408353, "grad_norm": 40.74738693237305, "learning_rate": 1.537910958568498e-06, "loss": 23.0779, "step": 8150 }, { "epoch": 0.7564733178654293, "grad_norm": 45.13121032714844, "learning_rate": 1.5368039269752616e-06, "loss": 23.6133, "step": 8151 }, { "epoch": 0.7565661252900232, "grad_norm": 40.632144927978516, "learning_rate": 1.535697221606295e-06, "loss": 22.7918, "step": 8152 }, { "epoch": 0.7566589327146171, "grad_norm": 47.76820755004883, "learning_rate": 1.5345908425658474e-06, "loss": 24.0838, "step": 8153 }, { "epoch": 0.7567517401392111, "grad_norm": 39.77253723144531, "learning_rate": 1.5334847899581344e-06, "loss": 23.1101, "step": 8154 }, { "epoch": 0.7568445475638051, "grad_norm": 48.43281936645508, "learning_rate": 1.5323790638873493e-06, "loss": 24.3967, "step": 8155 }, { "epoch": 0.7569373549883991, "grad_norm": 47.20525360107422, "learning_rate": 1.531273664457642e-06, "loss": 22.7795, "step": 8156 }, { "epoch": 0.757030162412993, "grad_norm": 49.304786682128906, "learning_rate": 1.5301685917731423e-06, "loss": 23.5113, "step": 8157 }, { "epoch": 0.757122969837587, "grad_norm": 44.60597229003906, "learning_rate": 1.5290638459379447e-06, "loss": 24.5127, "step": 8158 }, { "epoch": 0.7572157772621809, "grad_norm": 39.4039421081543, "learning_rate": 1.5279594270561133e-06, "loss": 21.8598, "step": 8159 }, { "epoch": 0.757308584686775, "grad_norm": 45.691551208496094, "learning_rate": 1.526855335231679e-06, "loss": 24.0654, "step": 8160 }, { "epoch": 0.7574013921113689, "grad_norm": 44.8150520324707, "learning_rate": 1.5257515705686504e-06, "loss": 23.6444, "step": 8161 }, { "epoch": 0.7574941995359629, "grad_norm": 42.90151596069336, "learning_rate": 1.5246481331709922e-06, "loss": 23.5045, "step": 8162 }, { "epoch": 0.7575870069605568, "grad_norm": 50.275306701660156, "learning_rate": 1.5235450231426501e-06, "loss": 21.6155, "step": 8163 }, { "epoch": 0.7576798143851509, "grad_norm": 45.40434265136719, "learning_rate": 1.522442240587534e-06, "loss": 24.2153, "step": 8164 }, { "epoch": 0.7577726218097448, "grad_norm": 47.03071975708008, "learning_rate": 1.5213397856095213e-06, "loss": 23.1866, "step": 8165 }, { "epoch": 0.7578654292343387, "grad_norm": 45.014034271240234, "learning_rate": 1.5202376583124617e-06, "loss": 22.8682, "step": 8166 }, { "epoch": 0.7579582366589327, "grad_norm": 48.81307601928711, "learning_rate": 1.519135858800172e-06, "loss": 23.5779, "step": 8167 }, { "epoch": 0.7580510440835266, "grad_norm": 48.791072845458984, "learning_rate": 1.5180343871764387e-06, "loss": 23.9542, "step": 8168 }, { "epoch": 0.7581438515081207, "grad_norm": 39.1357536315918, "learning_rate": 1.516933243545019e-06, "loss": 23.6804, "step": 8169 }, { "epoch": 0.7582366589327146, "grad_norm": 46.09904861450195, "learning_rate": 1.5158324280096376e-06, "loss": 23.8787, "step": 8170 }, { "epoch": 0.7583294663573086, "grad_norm": 46.32526779174805, "learning_rate": 1.514731940673988e-06, "loss": 22.9231, "step": 8171 }, { "epoch": 0.7584222737819025, "grad_norm": 48.1141357421875, "learning_rate": 1.5136317816417333e-06, "loss": 24.2492, "step": 8172 }, { "epoch": 0.7585150812064965, "grad_norm": 44.20439147949219, "learning_rate": 1.5125319510165043e-06, "loss": 20.7047, "step": 8173 }, { "epoch": 0.7586078886310905, "grad_norm": 43.68159103393555, "learning_rate": 1.511432448901905e-06, "loss": 25.144, "step": 8174 }, { "epoch": 0.7587006960556845, "grad_norm": 50.78133773803711, "learning_rate": 1.510333275401505e-06, "loss": 23.2302, "step": 8175 }, { "epoch": 0.7587935034802784, "grad_norm": 42.55486297607422, "learning_rate": 1.5092344306188427e-06, "loss": 21.8821, "step": 8176 }, { "epoch": 0.7588863109048724, "grad_norm": 43.41671371459961, "learning_rate": 1.5081359146574266e-06, "loss": 23.0774, "step": 8177 }, { "epoch": 0.7589791183294664, "grad_norm": 42.87071228027344, "learning_rate": 1.5070377276207348e-06, "loss": 21.8206, "step": 8178 }, { "epoch": 0.7590719257540604, "grad_norm": 47.39765548706055, "learning_rate": 1.5059398696122112e-06, "loss": 21.7358, "step": 8179 }, { "epoch": 0.7591647331786543, "grad_norm": 52.66228103637695, "learning_rate": 1.5048423407352752e-06, "loss": 23.3867, "step": 8180 }, { "epoch": 0.7592575406032482, "grad_norm": 36.861114501953125, "learning_rate": 1.5037451410933096e-06, "loss": 23.1576, "step": 8181 }, { "epoch": 0.7593503480278422, "grad_norm": 46.294124603271484, "learning_rate": 1.5026482707896672e-06, "loss": 22.0542, "step": 8182 }, { "epoch": 0.7594431554524362, "grad_norm": 48.05995559692383, "learning_rate": 1.5015517299276695e-06, "loss": 24.2331, "step": 8183 }, { "epoch": 0.7595359628770302, "grad_norm": 38.9955940246582, "learning_rate": 1.5004555186106124e-06, "loss": 22.6521, "step": 8184 }, { "epoch": 0.7596287703016241, "grad_norm": 48.3316650390625, "learning_rate": 1.4993596369417496e-06, "loss": 21.8305, "step": 8185 }, { "epoch": 0.7597215777262181, "grad_norm": 45.018165588378906, "learning_rate": 1.498264085024315e-06, "loss": 21.9898, "step": 8186 }, { "epoch": 0.759814385150812, "grad_norm": 53.173404693603516, "learning_rate": 1.4971688629615056e-06, "loss": 23.1792, "step": 8187 }, { "epoch": 0.7599071925754061, "grad_norm": 48.14506530761719, "learning_rate": 1.4960739708564886e-06, "loss": 22.8905, "step": 8188 }, { "epoch": 0.76, "grad_norm": 40.05710220336914, "learning_rate": 1.4949794088123988e-06, "loss": 23.0606, "step": 8189 }, { "epoch": 0.760092807424594, "grad_norm": 49.823368072509766, "learning_rate": 1.4938851769323449e-06, "loss": 23.1757, "step": 8190 }, { "epoch": 0.7601856148491879, "grad_norm": 54.53566360473633, "learning_rate": 1.4927912753193947e-06, "loss": 23.8924, "step": 8191 }, { "epoch": 0.760278422273782, "grad_norm": 57.92000961303711, "learning_rate": 1.4916977040765961e-06, "loss": 22.3922, "step": 8192 }, { "epoch": 0.7603712296983759, "grad_norm": 45.54944610595703, "learning_rate": 1.4906044633069594e-06, "loss": 24.0055, "step": 8193 }, { "epoch": 0.7604640371229698, "grad_norm": 244.5059356689453, "learning_rate": 1.4895115531134647e-06, "loss": 22.713, "step": 8194 }, { "epoch": 0.7605568445475638, "grad_norm": 47.62857437133789, "learning_rate": 1.4884189735990602e-06, "loss": 22.4946, "step": 8195 }, { "epoch": 0.7606496519721577, "grad_norm": 51.53895950317383, "learning_rate": 1.487326724866668e-06, "loss": 22.1943, "step": 8196 }, { "epoch": 0.7607424593967518, "grad_norm": 39.14374923706055, "learning_rate": 1.486234807019169e-06, "loss": 22.2527, "step": 8197 }, { "epoch": 0.7608352668213457, "grad_norm": 44.377506256103516, "learning_rate": 1.4851432201594246e-06, "loss": 23.2118, "step": 8198 }, { "epoch": 0.7609280742459397, "grad_norm": 47.410919189453125, "learning_rate": 1.4840519643902574e-06, "loss": 21.9029, "step": 8199 }, { "epoch": 0.7610208816705336, "grad_norm": 49.81077575683594, "learning_rate": 1.4829610398144606e-06, "loss": 21.4171, "step": 8200 }, { "epoch": 0.7611136890951277, "grad_norm": 37.70271301269531, "learning_rate": 1.4818704465347972e-06, "loss": 22.9456, "step": 8201 }, { "epoch": 0.7612064965197216, "grad_norm": 46.51840591430664, "learning_rate": 1.4807801846539977e-06, "loss": 23.8557, "step": 8202 }, { "epoch": 0.7612993039443156, "grad_norm": 49.47417449951172, "learning_rate": 1.479690254274761e-06, "loss": 23.8031, "step": 8203 }, { "epoch": 0.7613921113689095, "grad_norm": 43.53385925292969, "learning_rate": 1.4786006554997605e-06, "loss": 22.6625, "step": 8204 }, { "epoch": 0.7614849187935034, "grad_norm": 40.593544006347656, "learning_rate": 1.4775113884316267e-06, "loss": 24.5458, "step": 8205 }, { "epoch": 0.7615777262180975, "grad_norm": 38.28972625732422, "learning_rate": 1.4764224531729709e-06, "loss": 22.3478, "step": 8206 }, { "epoch": 0.7616705336426914, "grad_norm": 42.90385055541992, "learning_rate": 1.475333849826367e-06, "loss": 24.25, "step": 8207 }, { "epoch": 0.7617633410672854, "grad_norm": 42.97233963012695, "learning_rate": 1.4742455784943576e-06, "loss": 22.9487, "step": 8208 }, { "epoch": 0.7618561484918793, "grad_norm": 40.115909576416016, "learning_rate": 1.4731576392794539e-06, "loss": 23.6222, "step": 8209 }, { "epoch": 0.7619489559164733, "grad_norm": 40.17276382446289, "learning_rate": 1.4720700322841408e-06, "loss": 25.2919, "step": 8210 }, { "epoch": 0.7620417633410673, "grad_norm": 37.58049392700195, "learning_rate": 1.4709827576108653e-06, "loss": 21.7051, "step": 8211 }, { "epoch": 0.7621345707656613, "grad_norm": 39.07350158691406, "learning_rate": 1.4698958153620474e-06, "loss": 20.8998, "step": 8212 }, { "epoch": 0.7622273781902552, "grad_norm": 44.5604362487793, "learning_rate": 1.4688092056400732e-06, "loss": 23.9015, "step": 8213 }, { "epoch": 0.7623201856148492, "grad_norm": 43.62387466430664, "learning_rate": 1.4677229285472988e-06, "loss": 23.899, "step": 8214 }, { "epoch": 0.7624129930394432, "grad_norm": 41.55075454711914, "learning_rate": 1.4666369841860474e-06, "loss": 21.2853, "step": 8215 }, { "epoch": 0.7625058004640372, "grad_norm": 49.98456573486328, "learning_rate": 1.4655513726586158e-06, "loss": 24.1063, "step": 8216 }, { "epoch": 0.7625986078886311, "grad_norm": 39.03759002685547, "learning_rate": 1.4644660940672628e-06, "loss": 22.2941, "step": 8217 }, { "epoch": 0.762691415313225, "grad_norm": 42.837406158447266, "learning_rate": 1.4633811485142202e-06, "loss": 22.1544, "step": 8218 }, { "epoch": 0.762784222737819, "grad_norm": 43.71394729614258, "learning_rate": 1.4622965361016866e-06, "loss": 22.8408, "step": 8219 }, { "epoch": 0.762877030162413, "grad_norm": 38.835182189941406, "learning_rate": 1.4612122569318282e-06, "loss": 24.1993, "step": 8220 }, { "epoch": 0.762969837587007, "grad_norm": 50.61320114135742, "learning_rate": 1.460128311106785e-06, "loss": 21.2165, "step": 8221 }, { "epoch": 0.7630626450116009, "grad_norm": 42.14393615722656, "learning_rate": 1.4590446987286593e-06, "loss": 22.2881, "step": 8222 }, { "epoch": 0.7631554524361949, "grad_norm": 44.42790603637695, "learning_rate": 1.4579614198995257e-06, "loss": 22.362, "step": 8223 }, { "epoch": 0.7632482598607888, "grad_norm": 166.91494750976562, "learning_rate": 1.456878474721426e-06, "loss": 22.4385, "step": 8224 }, { "epoch": 0.7633410672853829, "grad_norm": 51.68723678588867, "learning_rate": 1.4557958632963703e-06, "loss": 23.2768, "step": 8225 }, { "epoch": 0.7634338747099768, "grad_norm": 37.3659782409668, "learning_rate": 1.4547135857263372e-06, "loss": 21.3918, "step": 8226 }, { "epoch": 0.7635266821345708, "grad_norm": 36.891876220703125, "learning_rate": 1.4536316421132768e-06, "loss": 23.5038, "step": 8227 }, { "epoch": 0.7636194895591647, "grad_norm": 41.47496795654297, "learning_rate": 1.4525500325591047e-06, "loss": 21.8922, "step": 8228 }, { "epoch": 0.7637122969837588, "grad_norm": 43.95850372314453, "learning_rate": 1.451468757165705e-06, "loss": 23.1101, "step": 8229 }, { "epoch": 0.7638051044083527, "grad_norm": 42.243682861328125, "learning_rate": 1.4503878160349295e-06, "loss": 22.4431, "step": 8230 }, { "epoch": 0.7638979118329466, "grad_norm": 40.00571060180664, "learning_rate": 1.4493072092686056e-06, "loss": 24.1439, "step": 8231 }, { "epoch": 0.7639907192575406, "grad_norm": 40.107078552246094, "learning_rate": 1.448226936968517e-06, "loss": 21.871, "step": 8232 }, { "epoch": 0.7640835266821345, "grad_norm": 53.14753723144531, "learning_rate": 1.4471469992364268e-06, "loss": 21.0387, "step": 8233 }, { "epoch": 0.7641763341067286, "grad_norm": 44.86482238769531, "learning_rate": 1.4460673961740618e-06, "loss": 23.832, "step": 8234 }, { "epoch": 0.7642691415313225, "grad_norm": 40.40147018432617, "learning_rate": 1.4449881278831174e-06, "loss": 23.7522, "step": 8235 }, { "epoch": 0.7643619489559165, "grad_norm": 45.81751251220703, "learning_rate": 1.4439091944652578e-06, "loss": 22.2028, "step": 8236 }, { "epoch": 0.7644547563805104, "grad_norm": 41.36286163330078, "learning_rate": 1.4428305960221156e-06, "loss": 23.042, "step": 8237 }, { "epoch": 0.7645475638051044, "grad_norm": 38.593299865722656, "learning_rate": 1.4417523326552911e-06, "loss": 22.4287, "step": 8238 }, { "epoch": 0.7646403712296984, "grad_norm": 42.52444076538086, "learning_rate": 1.4406744044663585e-06, "loss": 22.3373, "step": 8239 }, { "epoch": 0.7647331786542924, "grad_norm": 51.4033088684082, "learning_rate": 1.4395968115568487e-06, "loss": 23.5354, "step": 8240 }, { "epoch": 0.7648259860788863, "grad_norm": 39.57311248779297, "learning_rate": 1.4385195540282742e-06, "loss": 22.4718, "step": 8241 }, { "epoch": 0.7649187935034802, "grad_norm": 47.67689514160156, "learning_rate": 1.4374426319821084e-06, "loss": 23.144, "step": 8242 }, { "epoch": 0.7650116009280743, "grad_norm": 41.72162628173828, "learning_rate": 1.436366045519793e-06, "loss": 22.583, "step": 8243 }, { "epoch": 0.7651044083526682, "grad_norm": 46.24822235107422, "learning_rate": 1.4352897947427396e-06, "loss": 23.4652, "step": 8244 }, { "epoch": 0.7651972157772622, "grad_norm": 44.2333984375, "learning_rate": 1.434213879752333e-06, "loss": 24.8149, "step": 8245 }, { "epoch": 0.7652900232018561, "grad_norm": 41.80295944213867, "learning_rate": 1.4331383006499145e-06, "loss": 22.7745, "step": 8246 }, { "epoch": 0.7653828306264501, "grad_norm": 41.783382415771484, "learning_rate": 1.4320630575368067e-06, "loss": 22.3097, "step": 8247 }, { "epoch": 0.7654756380510441, "grad_norm": 38.57090377807617, "learning_rate": 1.4309881505142919e-06, "loss": 23.181, "step": 8248 }, { "epoch": 0.7655684454756381, "grad_norm": 40.991851806640625, "learning_rate": 1.4299135796836245e-06, "loss": 23.489, "step": 8249 }, { "epoch": 0.765661252900232, "grad_norm": 48.18623352050781, "learning_rate": 1.4288393451460248e-06, "loss": 24.1057, "step": 8250 }, { "epoch": 0.765754060324826, "grad_norm": 42.51839828491211, "learning_rate": 1.427765447002687e-06, "loss": 23.4486, "step": 8251 }, { "epoch": 0.7658468677494199, "grad_norm": 52.2319221496582, "learning_rate": 1.426691885354764e-06, "loss": 22.1311, "step": 8252 }, { "epoch": 0.765939675174014, "grad_norm": 45.05815124511719, "learning_rate": 1.4256186603033872e-06, "loss": 22.9278, "step": 8253 }, { "epoch": 0.7660324825986079, "grad_norm": 43.931427001953125, "learning_rate": 1.4245457719496497e-06, "loss": 23.2446, "step": 8254 }, { "epoch": 0.7661252900232018, "grad_norm": 46.38532257080078, "learning_rate": 1.4234732203946156e-06, "loss": 24.3093, "step": 8255 }, { "epoch": 0.7662180974477958, "grad_norm": 44.9113883972168, "learning_rate": 1.422401005739314e-06, "loss": 21.9173, "step": 8256 }, { "epoch": 0.7663109048723898, "grad_norm": 53.26792526245117, "learning_rate": 1.4213291280847484e-06, "loss": 23.0683, "step": 8257 }, { "epoch": 0.7664037122969838, "grad_norm": 50.33382034301758, "learning_rate": 1.4202575875318852e-06, "loss": 24.0574, "step": 8258 }, { "epoch": 0.7664965197215777, "grad_norm": 44.104740142822266, "learning_rate": 1.419186384181661e-06, "loss": 24.1155, "step": 8259 }, { "epoch": 0.7665893271461717, "grad_norm": 50.912052154541016, "learning_rate": 1.418115518134981e-06, "loss": 23.9126, "step": 8260 }, { "epoch": 0.7666821345707656, "grad_norm": 41.9775390625, "learning_rate": 1.417044989492717e-06, "loss": 23.006, "step": 8261 }, { "epoch": 0.7667749419953597, "grad_norm": 38.85124969482422, "learning_rate": 1.4159747983557093e-06, "loss": 22.39, "step": 8262 }, { "epoch": 0.7668677494199536, "grad_norm": 40.745086669921875, "learning_rate": 1.4149049448247699e-06, "loss": 22.081, "step": 8263 }, { "epoch": 0.7669605568445476, "grad_norm": 55.20100784301758, "learning_rate": 1.4138354290006745e-06, "loss": 23.1295, "step": 8264 }, { "epoch": 0.7670533642691415, "grad_norm": 46.62846755981445, "learning_rate": 1.4127662509841695e-06, "loss": 22.1155, "step": 8265 }, { "epoch": 0.7671461716937354, "grad_norm": 64.97105407714844, "learning_rate": 1.411697410875968e-06, "loss": 25.7949, "step": 8266 }, { "epoch": 0.7672389791183295, "grad_norm": 41.620269775390625, "learning_rate": 1.4106289087767505e-06, "loss": 24.3864, "step": 8267 }, { "epoch": 0.7673317865429234, "grad_norm": 40.56422805786133, "learning_rate": 1.4095607447871711e-06, "loss": 23.4544, "step": 8268 }, { "epoch": 0.7674245939675174, "grad_norm": 49.014583587646484, "learning_rate": 1.408492919007846e-06, "loss": 23.545, "step": 8269 }, { "epoch": 0.7675174013921113, "grad_norm": 40.680049896240234, "learning_rate": 1.407425431539362e-06, "loss": 21.8645, "step": 8270 }, { "epoch": 0.7676102088167054, "grad_norm": 41.252479553222656, "learning_rate": 1.406358282482273e-06, "loss": 24.812, "step": 8271 }, { "epoch": 0.7677030162412993, "grad_norm": 50.276023864746094, "learning_rate": 1.4052914719371024e-06, "loss": 22.8614, "step": 8272 }, { "epoch": 0.7677958236658933, "grad_norm": 47.01552200317383, "learning_rate": 1.4042250000043394e-06, "loss": 25.3755, "step": 8273 }, { "epoch": 0.7678886310904872, "grad_norm": 46.637535095214844, "learning_rate": 1.4031588667844476e-06, "loss": 21.9497, "step": 8274 }, { "epoch": 0.7679814385150812, "grad_norm": 54.1989631652832, "learning_rate": 1.4020930723778476e-06, "loss": 23.2504, "step": 8275 }, { "epoch": 0.7680742459396752, "grad_norm": 52.93015670776367, "learning_rate": 1.401027616884939e-06, "loss": 21.7732, "step": 8276 }, { "epoch": 0.7681670533642692, "grad_norm": 42.373836517333984, "learning_rate": 1.399962500406084e-06, "loss": 21.9455, "step": 8277 }, { "epoch": 0.7682598607888631, "grad_norm": 52.00114440917969, "learning_rate": 1.3988977230416134e-06, "loss": 21.2648, "step": 8278 }, { "epoch": 0.768352668213457, "grad_norm": 50.78197479248047, "learning_rate": 1.3978332848918253e-06, "loss": 22.6726, "step": 8279 }, { "epoch": 0.768445475638051, "grad_norm": 134.52279663085938, "learning_rate": 1.3967691860569915e-06, "loss": 22.8295, "step": 8280 }, { "epoch": 0.768538283062645, "grad_norm": 52.493465423583984, "learning_rate": 1.3957054266373415e-06, "loss": 22.7812, "step": 8281 }, { "epoch": 0.768631090487239, "grad_norm": 50.67073440551758, "learning_rate": 1.394642006733083e-06, "loss": 24.0447, "step": 8282 }, { "epoch": 0.7687238979118329, "grad_norm": 78.76543426513672, "learning_rate": 1.3935789264443862e-06, "loss": 21.8084, "step": 8283 }, { "epoch": 0.7688167053364269, "grad_norm": 49.6024284362793, "learning_rate": 1.3925161858713904e-06, "loss": 24.9687, "step": 8284 }, { "epoch": 0.7689095127610209, "grad_norm": 43.63773727416992, "learning_rate": 1.3914537851142017e-06, "loss": 24.3103, "step": 8285 }, { "epoch": 0.7690023201856149, "grad_norm": 37.036842346191406, "learning_rate": 1.3903917242729004e-06, "loss": 22.5102, "step": 8286 }, { "epoch": 0.7690951276102088, "grad_norm": 40.97560501098633, "learning_rate": 1.3893300034475227e-06, "loss": 21.8194, "step": 8287 }, { "epoch": 0.7691879350348028, "grad_norm": 40.95058059692383, "learning_rate": 1.3882686227380859e-06, "loss": 22.5127, "step": 8288 }, { "epoch": 0.7692807424593967, "grad_norm": 39.49747848510742, "learning_rate": 1.3872075822445668e-06, "loss": 24.4302, "step": 8289 }, { "epoch": 0.7693735498839908, "grad_norm": 42.063568115234375, "learning_rate": 1.3861468820669132e-06, "loss": 21.8981, "step": 8290 }, { "epoch": 0.7694663573085847, "grad_norm": 43.806697845458984, "learning_rate": 1.3850865223050391e-06, "loss": 21.3917, "step": 8291 }, { "epoch": 0.7695591647331786, "grad_norm": 56.36725997924805, "learning_rate": 1.3840265030588323e-06, "loss": 23.833, "step": 8292 }, { "epoch": 0.7696519721577726, "grad_norm": 49.33119201660156, "learning_rate": 1.382966824428137e-06, "loss": 23.494, "step": 8293 }, { "epoch": 0.7697447795823665, "grad_norm": 42.42390060424805, "learning_rate": 1.3819074865127773e-06, "loss": 23.9284, "step": 8294 }, { "epoch": 0.7698375870069606, "grad_norm": 40.04791259765625, "learning_rate": 1.380848489412539e-06, "loss": 22.0022, "step": 8295 }, { "epoch": 0.7699303944315545, "grad_norm": 46.07219696044922, "learning_rate": 1.3797898332271763e-06, "loss": 21.9867, "step": 8296 }, { "epoch": 0.7700232018561485, "grad_norm": 54.98798370361328, "learning_rate": 1.378731518056411e-06, "loss": 21.935, "step": 8297 }, { "epoch": 0.7701160092807424, "grad_norm": 42.93257522583008, "learning_rate": 1.3776735439999379e-06, "loss": 19.7282, "step": 8298 }, { "epoch": 0.7702088167053365, "grad_norm": 37.15677261352539, "learning_rate": 1.376615911157409e-06, "loss": 21.2255, "step": 8299 }, { "epoch": 0.7703016241299304, "grad_norm": 41.401275634765625, "learning_rate": 1.375558619628456e-06, "loss": 22.9101, "step": 8300 }, { "epoch": 0.7703944315545244, "grad_norm": 45.7236442565918, "learning_rate": 1.374501669512671e-06, "loss": 24.1872, "step": 8301 }, { "epoch": 0.7704872389791183, "grad_norm": 41.86359786987305, "learning_rate": 1.3734450609096162e-06, "loss": 21.7686, "step": 8302 }, { "epoch": 0.7705800464037122, "grad_norm": 39.20802688598633, "learning_rate": 1.3723887939188201e-06, "loss": 21.6616, "step": 8303 }, { "epoch": 0.7706728538283063, "grad_norm": 43.71381378173828, "learning_rate": 1.3713328686397832e-06, "loss": 21.7486, "step": 8304 }, { "epoch": 0.7707656612529002, "grad_norm": 43.08344268798828, "learning_rate": 1.3702772851719692e-06, "loss": 24.2779, "step": 8305 }, { "epoch": 0.7708584686774942, "grad_norm": 43.38848114013672, "learning_rate": 1.3692220436148119e-06, "loss": 22.123, "step": 8306 }, { "epoch": 0.7709512761020881, "grad_norm": 47.618675231933594, "learning_rate": 1.368167144067712e-06, "loss": 21.7427, "step": 8307 }, { "epoch": 0.7710440835266822, "grad_norm": 45.45457077026367, "learning_rate": 1.3671125866300383e-06, "loss": 22.435, "step": 8308 }, { "epoch": 0.7711368909512761, "grad_norm": 45.23890686035156, "learning_rate": 1.3660583714011277e-06, "loss": 22.6397, "step": 8309 }, { "epoch": 0.7712296983758701, "grad_norm": 44.685359954833984, "learning_rate": 1.365004498480283e-06, "loss": 24.1901, "step": 8310 }, { "epoch": 0.771322505800464, "grad_norm": 41.69276428222656, "learning_rate": 1.3639509679667796e-06, "loss": 21.8547, "step": 8311 }, { "epoch": 0.771415313225058, "grad_norm": 40.65248489379883, "learning_rate": 1.3628977799598552e-06, "loss": 23.5787, "step": 8312 }, { "epoch": 0.771508120649652, "grad_norm": 37.155155181884766, "learning_rate": 1.3618449345587188e-06, "loss": 22.7581, "step": 8313 }, { "epoch": 0.771600928074246, "grad_norm": 47.657745361328125, "learning_rate": 1.3607924318625427e-06, "loss": 22.5776, "step": 8314 }, { "epoch": 0.7716937354988399, "grad_norm": 46.911128997802734, "learning_rate": 1.359740271970475e-06, "loss": 23.3837, "step": 8315 }, { "epoch": 0.7717865429234338, "grad_norm": 43.92593002319336, "learning_rate": 1.358688454981621e-06, "loss": 24.4937, "step": 8316 }, { "epoch": 0.7718793503480278, "grad_norm": 41.084861755371094, "learning_rate": 1.357636980995063e-06, "loss": 22.9451, "step": 8317 }, { "epoch": 0.7719721577726218, "grad_norm": 45.88160705566406, "learning_rate": 1.3565858501098462e-06, "loss": 21.8642, "step": 8318 }, { "epoch": 0.7720649651972158, "grad_norm": 45.29952621459961, "learning_rate": 1.355535062424984e-06, "loss": 21.4334, "step": 8319 }, { "epoch": 0.7721577726218097, "grad_norm": 43.94038009643555, "learning_rate": 1.3544846180394571e-06, "loss": 22.9211, "step": 8320 }, { "epoch": 0.7722505800464037, "grad_norm": 44.502017974853516, "learning_rate": 1.3534345170522183e-06, "loss": 23.506, "step": 8321 }, { "epoch": 0.7723433874709977, "grad_norm": 44.4830436706543, "learning_rate": 1.3523847595621792e-06, "loss": 21.2455, "step": 8322 }, { "epoch": 0.7724361948955917, "grad_norm": 44.838722229003906, "learning_rate": 1.3513353456682288e-06, "loss": 22.0971, "step": 8323 }, { "epoch": 0.7725290023201856, "grad_norm": 43.76213836669922, "learning_rate": 1.350286275469217e-06, "loss": 24.3202, "step": 8324 }, { "epoch": 0.7726218097447796, "grad_norm": 44.886043548583984, "learning_rate": 1.3492375490639636e-06, "loss": 22.2124, "step": 8325 }, { "epoch": 0.7727146171693735, "grad_norm": 41.66078567504883, "learning_rate": 1.3481891665512553e-06, "loss": 21.3787, "step": 8326 }, { "epoch": 0.7728074245939676, "grad_norm": 51.75339126586914, "learning_rate": 1.3471411280298512e-06, "loss": 24.4688, "step": 8327 }, { "epoch": 0.7729002320185615, "grad_norm": 52.962432861328125, "learning_rate": 1.3460934335984677e-06, "loss": 25.1868, "step": 8328 }, { "epoch": 0.7729930394431554, "grad_norm": 42.18776321411133, "learning_rate": 1.3450460833557994e-06, "loss": 21.3031, "step": 8329 }, { "epoch": 0.7730858468677494, "grad_norm": 38.98902893066406, "learning_rate": 1.343999077400503e-06, "loss": 23.4095, "step": 8330 }, { "epoch": 0.7731786542923433, "grad_norm": 45.98486328125, "learning_rate": 1.342952415831203e-06, "loss": 22.18, "step": 8331 }, { "epoch": 0.7732714617169374, "grad_norm": 41.70913314819336, "learning_rate": 1.341906098746491e-06, "loss": 23.3929, "step": 8332 }, { "epoch": 0.7733642691415313, "grad_norm": 42.695281982421875, "learning_rate": 1.3408601262449328e-06, "loss": 21.8366, "step": 8333 }, { "epoch": 0.7734570765661253, "grad_norm": 44.16326904296875, "learning_rate": 1.3398144984250493e-06, "loss": 20.8924, "step": 8334 }, { "epoch": 0.7735498839907192, "grad_norm": 37.7723503112793, "learning_rate": 1.338769215385341e-06, "loss": 23.4971, "step": 8335 }, { "epoch": 0.7736426914153133, "grad_norm": 43.25844192504883, "learning_rate": 1.3377242772242693e-06, "loss": 22.1646, "step": 8336 }, { "epoch": 0.7737354988399072, "grad_norm": 43.13319396972656, "learning_rate": 1.3366796840402646e-06, "loss": 22.5992, "step": 8337 }, { "epoch": 0.7738283062645012, "grad_norm": 40.69828414916992, "learning_rate": 1.335635435931723e-06, "loss": 23.8713, "step": 8338 }, { "epoch": 0.7739211136890951, "grad_norm": 42.95890808105469, "learning_rate": 1.334591532997016e-06, "loss": 23.7904, "step": 8339 }, { "epoch": 0.774013921113689, "grad_norm": 43.8325309753418, "learning_rate": 1.3335479753344688e-06, "loss": 23.4391, "step": 8340 }, { "epoch": 0.7741067285382831, "grad_norm": 41.0993766784668, "learning_rate": 1.3325047630423876e-06, "loss": 23.9488, "step": 8341 }, { "epoch": 0.774199535962877, "grad_norm": 51.00526809692383, "learning_rate": 1.3314618962190384e-06, "loss": 21.0974, "step": 8342 }, { "epoch": 0.774292343387471, "grad_norm": 43.10427474975586, "learning_rate": 1.3304193749626565e-06, "loss": 22.358, "step": 8343 }, { "epoch": 0.7743851508120649, "grad_norm": 45.210269927978516, "learning_rate": 1.3293771993714455e-06, "loss": 24.7465, "step": 8344 }, { "epoch": 0.7744779582366589, "grad_norm": 43.09852600097656, "learning_rate": 1.3283353695435746e-06, "loss": 23.6968, "step": 8345 }, { "epoch": 0.7745707656612529, "grad_norm": 40.9372673034668, "learning_rate": 1.3272938855771805e-06, "loss": 23.6929, "step": 8346 }, { "epoch": 0.7746635730858469, "grad_norm": 44.64277267456055, "learning_rate": 1.3262527475703714e-06, "loss": 22.7845, "step": 8347 }, { "epoch": 0.7747563805104408, "grad_norm": 46.940059661865234, "learning_rate": 1.3252119556212185e-06, "loss": 23.2749, "step": 8348 }, { "epoch": 0.7748491879350348, "grad_norm": 49.352474212646484, "learning_rate": 1.3241715098277613e-06, "loss": 23.2585, "step": 8349 }, { "epoch": 0.7749419953596288, "grad_norm": 46.05180358886719, "learning_rate": 1.3231314102880078e-06, "loss": 21.7604, "step": 8350 }, { "epoch": 0.7750348027842228, "grad_norm": 43.64311599731445, "learning_rate": 1.322091657099931e-06, "loss": 23.8046, "step": 8351 }, { "epoch": 0.7751276102088167, "grad_norm": 47.75933837890625, "learning_rate": 1.3210522503614753e-06, "loss": 22.3065, "step": 8352 }, { "epoch": 0.7752204176334107, "grad_norm": 47.54188919067383, "learning_rate": 1.32001319017055e-06, "loss": 24.1778, "step": 8353 }, { "epoch": 0.7753132250580046, "grad_norm": 51.17168045043945, "learning_rate": 1.31897447662503e-06, "loss": 22.6983, "step": 8354 }, { "epoch": 0.7754060324825987, "grad_norm": 47.63557815551758, "learning_rate": 1.3179361098227616e-06, "loss": 23.0025, "step": 8355 }, { "epoch": 0.7754988399071926, "grad_norm": 49.26292419433594, "learning_rate": 1.3168980898615551e-06, "loss": 22.8567, "step": 8356 }, { "epoch": 0.7755916473317865, "grad_norm": 39.40117645263672, "learning_rate": 1.315860416839188e-06, "loss": 23.2941, "step": 8357 }, { "epoch": 0.7756844547563805, "grad_norm": 44.708030700683594, "learning_rate": 1.3148230908534098e-06, "loss": 23.8975, "step": 8358 }, { "epoch": 0.7757772621809744, "grad_norm": 42.745155334472656, "learning_rate": 1.3137861120019323e-06, "loss": 19.8202, "step": 8359 }, { "epoch": 0.7758700696055685, "grad_norm": 44.344364166259766, "learning_rate": 1.3127494803824365e-06, "loss": 22.2605, "step": 8360 }, { "epoch": 0.7759628770301624, "grad_norm": 43.100669860839844, "learning_rate": 1.3117131960925677e-06, "loss": 23.176, "step": 8361 }, { "epoch": 0.7760556844547564, "grad_norm": 47.23306655883789, "learning_rate": 1.3106772592299476e-06, "loss": 21.442, "step": 8362 }, { "epoch": 0.7761484918793503, "grad_norm": 44.825843811035156, "learning_rate": 1.3096416698921516e-06, "loss": 22.7458, "step": 8363 }, { "epoch": 0.7762412993039444, "grad_norm": 40.03792190551758, "learning_rate": 1.3086064281767346e-06, "loss": 23.0046, "step": 8364 }, { "epoch": 0.7763341067285383, "grad_norm": 46.05224609375, "learning_rate": 1.3075715341812117e-06, "loss": 23.1927, "step": 8365 }, { "epoch": 0.7764269141531323, "grad_norm": 46.271728515625, "learning_rate": 1.3065369880030682e-06, "loss": 21.8883, "step": 8366 }, { "epoch": 0.7765197215777262, "grad_norm": 50.68403625488281, "learning_rate": 1.3055027897397531e-06, "loss": 22.3689, "step": 8367 }, { "epoch": 0.7766125290023201, "grad_norm": 43.82984161376953, "learning_rate": 1.304468939488691e-06, "loss": 22.5815, "step": 8368 }, { "epoch": 0.7767053364269142, "grad_norm": 42.75226974487305, "learning_rate": 1.3034354373472608e-06, "loss": 23.65, "step": 8369 }, { "epoch": 0.7767981438515081, "grad_norm": 43.197227478027344, "learning_rate": 1.302402283412821e-06, "loss": 23.261, "step": 8370 }, { "epoch": 0.7768909512761021, "grad_norm": 43.52825164794922, "learning_rate": 1.3013694777826903e-06, "loss": 22.9823, "step": 8371 }, { "epoch": 0.776983758700696, "grad_norm": 40.505252838134766, "learning_rate": 1.3003370205541566e-06, "loss": 23.5952, "step": 8372 }, { "epoch": 0.77707656612529, "grad_norm": 44.74798583984375, "learning_rate": 1.2993049118244728e-06, "loss": 23.6758, "step": 8373 }, { "epoch": 0.777169373549884, "grad_norm": 44.8935661315918, "learning_rate": 1.2982731516908653e-06, "loss": 23.717, "step": 8374 }, { "epoch": 0.777262180974478, "grad_norm": 45.61332702636719, "learning_rate": 1.2972417402505183e-06, "loss": 22.3808, "step": 8375 }, { "epoch": 0.7773549883990719, "grad_norm": 37.347957611083984, "learning_rate": 1.2962106776005917e-06, "loss": 22.7257, "step": 8376 }, { "epoch": 0.7774477958236659, "grad_norm": 47.922767639160156, "learning_rate": 1.2951799638382079e-06, "loss": 23.113, "step": 8377 }, { "epoch": 0.7775406032482599, "grad_norm": 41.18473434448242, "learning_rate": 1.2941495990604575e-06, "loss": 21.4778, "step": 8378 }, { "epoch": 0.7776334106728539, "grad_norm": 51.75506591796875, "learning_rate": 1.2931195833643978e-06, "loss": 22.9174, "step": 8379 }, { "epoch": 0.7777262180974478, "grad_norm": 44.70914077758789, "learning_rate": 1.292089916847054e-06, "loss": 21.5516, "step": 8380 }, { "epoch": 0.7778190255220417, "grad_norm": 48.04548645019531, "learning_rate": 1.2910605996054171e-06, "loss": 22.2935, "step": 8381 }, { "epoch": 0.7779118329466357, "grad_norm": 44.4797477722168, "learning_rate": 1.2900316317364498e-06, "loss": 22.289, "step": 8382 }, { "epoch": 0.7780046403712297, "grad_norm": 47.43708038330078, "learning_rate": 1.2890030133370724e-06, "loss": 22.8502, "step": 8383 }, { "epoch": 0.7780974477958237, "grad_norm": 45.87504196166992, "learning_rate": 1.2879747445041829e-06, "loss": 24.1539, "step": 8384 }, { "epoch": 0.7781902552204176, "grad_norm": 44.55377197265625, "learning_rate": 1.28694682533464e-06, "loss": 21.3205, "step": 8385 }, { "epoch": 0.7782830626450116, "grad_norm": 40.48408508300781, "learning_rate": 1.2859192559252714e-06, "loss": 22.3975, "step": 8386 }, { "epoch": 0.7783758700696055, "grad_norm": 39.2049674987793, "learning_rate": 1.2848920363728701e-06, "loss": 23.5404, "step": 8387 }, { "epoch": 0.7784686774941996, "grad_norm": 45.37190628051758, "learning_rate": 1.2838651667742014e-06, "loss": 22.3213, "step": 8388 }, { "epoch": 0.7785614849187935, "grad_norm": 50.29631805419922, "learning_rate": 1.2828386472259879e-06, "loss": 23.6467, "step": 8389 }, { "epoch": 0.7786542923433875, "grad_norm": 45.08635330200195, "learning_rate": 1.2818124778249296e-06, "loss": 23.3047, "step": 8390 }, { "epoch": 0.7787470997679814, "grad_norm": 40.51723098754883, "learning_rate": 1.2807866586676887e-06, "loss": 21.6492, "step": 8391 }, { "epoch": 0.7788399071925755, "grad_norm": 40.87790298461914, "learning_rate": 1.2797611898508932e-06, "loss": 22.3214, "step": 8392 }, { "epoch": 0.7789327146171694, "grad_norm": 43.326175689697266, "learning_rate": 1.2787360714711388e-06, "loss": 21.8445, "step": 8393 }, { "epoch": 0.7790255220417633, "grad_norm": 41.05646896362305, "learning_rate": 1.2777113036249927e-06, "loss": 24.278, "step": 8394 }, { "epoch": 0.7791183294663573, "grad_norm": 42.697967529296875, "learning_rate": 1.2766868864089827e-06, "loss": 23.4872, "step": 8395 }, { "epoch": 0.7792111368909512, "grad_norm": 55.68217849731445, "learning_rate": 1.275662819919607e-06, "loss": 21.8491, "step": 8396 }, { "epoch": 0.7793039443155453, "grad_norm": 59.65074920654297, "learning_rate": 1.2746391042533302e-06, "loss": 21.3571, "step": 8397 }, { "epoch": 0.7793967517401392, "grad_norm": 45.308780670166016, "learning_rate": 1.2736157395065818e-06, "loss": 22.5534, "step": 8398 }, { "epoch": 0.7794895591647332, "grad_norm": 47.59526443481445, "learning_rate": 1.272592725775763e-06, "loss": 23.6947, "step": 8399 }, { "epoch": 0.7795823665893271, "grad_norm": 56.71463394165039, "learning_rate": 1.2715700631572387e-06, "loss": 22.3704, "step": 8400 }, { "epoch": 0.7796751740139212, "grad_norm": 52.67107009887695, "learning_rate": 1.2705477517473398e-06, "loss": 22.8166, "step": 8401 }, { "epoch": 0.7797679814385151, "grad_norm": 44.175575256347656, "learning_rate": 1.2695257916423658e-06, "loss": 22.7356, "step": 8402 }, { "epoch": 0.7798607888631091, "grad_norm": 37.91702651977539, "learning_rate": 1.2685041829385836e-06, "loss": 20.3866, "step": 8403 }, { "epoch": 0.779953596287703, "grad_norm": 51.330772399902344, "learning_rate": 1.2674829257322235e-06, "loss": 22.8463, "step": 8404 }, { "epoch": 0.7800464037122969, "grad_norm": 57.52138137817383, "learning_rate": 1.2664620201194888e-06, "loss": 22.8146, "step": 8405 }, { "epoch": 0.780139211136891, "grad_norm": 49.67097854614258, "learning_rate": 1.2654414661965447e-06, "loss": 23.2617, "step": 8406 }, { "epoch": 0.7802320185614849, "grad_norm": 47.429405212402344, "learning_rate": 1.2644212640595254e-06, "loss": 21.8348, "step": 8407 }, { "epoch": 0.7803248259860789, "grad_norm": 49.11106872558594, "learning_rate": 1.2634014138045286e-06, "loss": 21.7562, "step": 8408 }, { "epoch": 0.7804176334106728, "grad_norm": 53.159889221191406, "learning_rate": 1.2623819155276267e-06, "loss": 25.3481, "step": 8409 }, { "epoch": 0.7805104408352668, "grad_norm": 47.67450714111328, "learning_rate": 1.2613627693248486e-06, "loss": 23.2415, "step": 8410 }, { "epoch": 0.7806032482598608, "grad_norm": 49.350276947021484, "learning_rate": 1.2603439752921986e-06, "loss": 22.448, "step": 8411 }, { "epoch": 0.7806960556844548, "grad_norm": 57.06298065185547, "learning_rate": 1.2593255335256438e-06, "loss": 22.4921, "step": 8412 }, { "epoch": 0.7807888631090487, "grad_norm": 46.53200149536133, "learning_rate": 1.2583074441211179e-06, "loss": 22.9788, "step": 8413 }, { "epoch": 0.7808816705336427, "grad_norm": 38.69063186645508, "learning_rate": 1.2572897071745238e-06, "loss": 24.2488, "step": 8414 }, { "epoch": 0.7809744779582367, "grad_norm": 43.101558685302734, "learning_rate": 1.256272322781728e-06, "loss": 22.5738, "step": 8415 }, { "epoch": 0.7810672853828307, "grad_norm": 42.68724822998047, "learning_rate": 1.2552552910385656e-06, "loss": 23.6701, "step": 8416 }, { "epoch": 0.7811600928074246, "grad_norm": 52.951324462890625, "learning_rate": 1.2542386120408417e-06, "loss": 21.7458, "step": 8417 }, { "epoch": 0.7812529002320185, "grad_norm": 58.97239303588867, "learning_rate": 1.2532222858843202e-06, "loss": 23.1522, "step": 8418 }, { "epoch": 0.7813457076566125, "grad_norm": 45.466087341308594, "learning_rate": 1.2522063126647393e-06, "loss": 21.9317, "step": 8419 }, { "epoch": 0.7814385150812065, "grad_norm": 49.73066711425781, "learning_rate": 1.2511906924778012e-06, "loss": 22.7062, "step": 8420 }, { "epoch": 0.7815313225058005, "grad_norm": 43.21509552001953, "learning_rate": 1.2501754254191733e-06, "loss": 22.3311, "step": 8421 }, { "epoch": 0.7816241299303944, "grad_norm": 42.687583923339844, "learning_rate": 1.2491605115844907e-06, "loss": 22.5489, "step": 8422 }, { "epoch": 0.7817169373549884, "grad_norm": 46.89894104003906, "learning_rate": 1.24814595106936e-06, "loss": 24.3531, "step": 8423 }, { "epoch": 0.7818097447795823, "grad_norm": 57.25407791137695, "learning_rate": 1.2471317439693436e-06, "loss": 23.567, "step": 8424 }, { "epoch": 0.7819025522041764, "grad_norm": 40.78981018066406, "learning_rate": 1.2461178903799825e-06, "loss": 23.4864, "step": 8425 }, { "epoch": 0.7819953596287703, "grad_norm": 40.662384033203125, "learning_rate": 1.2451043903967775e-06, "loss": 23.0329, "step": 8426 }, { "epoch": 0.7820881670533643, "grad_norm": 38.90907669067383, "learning_rate": 1.2440912441151976e-06, "loss": 22.1547, "step": 8427 }, { "epoch": 0.7821809744779582, "grad_norm": 44.24068069458008, "learning_rate": 1.2430784516306765e-06, "loss": 23.2436, "step": 8428 }, { "epoch": 0.7822737819025523, "grad_norm": 44.571136474609375, "learning_rate": 1.2420660130386226e-06, "loss": 22.6509, "step": 8429 }, { "epoch": 0.7823665893271462, "grad_norm": 42.03685760498047, "learning_rate": 1.2410539284343975e-06, "loss": 21.4268, "step": 8430 }, { "epoch": 0.7824593967517401, "grad_norm": 42.85675811767578, "learning_rate": 1.2400421979133426e-06, "loss": 22.3769, "step": 8431 }, { "epoch": 0.7825522041763341, "grad_norm": 39.29827880859375, "learning_rate": 1.2390308215707581e-06, "loss": 22.3355, "step": 8432 }, { "epoch": 0.782645011600928, "grad_norm": 44.260955810546875, "learning_rate": 1.2380197995019134e-06, "loss": 23.5904, "step": 8433 }, { "epoch": 0.7827378190255221, "grad_norm": 45.30107116699219, "learning_rate": 1.2370091318020434e-06, "loss": 23.4658, "step": 8434 }, { "epoch": 0.782830626450116, "grad_norm": 38.585628509521484, "learning_rate": 1.2359988185663536e-06, "loss": 21.359, "step": 8435 }, { "epoch": 0.78292343387471, "grad_norm": 51.434295654296875, "learning_rate": 1.2349888598900078e-06, "loss": 21.8824, "step": 8436 }, { "epoch": 0.7830162412993039, "grad_norm": 41.507205963134766, "learning_rate": 1.2339792558681457e-06, "loss": 23.8176, "step": 8437 }, { "epoch": 0.7831090487238979, "grad_norm": 44.654808044433594, "learning_rate": 1.2329700065958683e-06, "loss": 23.7899, "step": 8438 }, { "epoch": 0.7832018561484919, "grad_norm": 42.33988571166992, "learning_rate": 1.231961112168244e-06, "loss": 25.8531, "step": 8439 }, { "epoch": 0.7832946635730859, "grad_norm": 44.165435791015625, "learning_rate": 1.2309525726803069e-06, "loss": 22.3406, "step": 8440 }, { "epoch": 0.7833874709976798, "grad_norm": 42.937164306640625, "learning_rate": 1.229944388227061e-06, "loss": 21.5706, "step": 8441 }, { "epoch": 0.7834802784222737, "grad_norm": 42.09278106689453, "learning_rate": 1.2289365589034746e-06, "loss": 21.5347, "step": 8442 }, { "epoch": 0.7835730858468678, "grad_norm": 47.27475357055664, "learning_rate": 1.2279290848044818e-06, "loss": 24.0945, "step": 8443 }, { "epoch": 0.7836658932714617, "grad_norm": 44.38739776611328, "learning_rate": 1.2269219660249838e-06, "loss": 21.9144, "step": 8444 }, { "epoch": 0.7837587006960557, "grad_norm": 49.8515510559082, "learning_rate": 1.2259152026598476e-06, "loss": 22.8425, "step": 8445 }, { "epoch": 0.7838515081206496, "grad_norm": 45.50150680541992, "learning_rate": 1.2249087948039112e-06, "loss": 22.4657, "step": 8446 }, { "epoch": 0.7839443155452436, "grad_norm": 46.95643997192383, "learning_rate": 1.2239027425519733e-06, "loss": 23.1923, "step": 8447 }, { "epoch": 0.7840371229698376, "grad_norm": 38.14840316772461, "learning_rate": 1.2228970459988015e-06, "loss": 21.6591, "step": 8448 }, { "epoch": 0.7841299303944316, "grad_norm": 45.6682014465332, "learning_rate": 1.2218917052391305e-06, "loss": 22.6987, "step": 8449 }, { "epoch": 0.7842227378190255, "grad_norm": 43.31544876098633, "learning_rate": 1.2208867203676605e-06, "loss": 22.0421, "step": 8450 }, { "epoch": 0.7843155452436195, "grad_norm": 40.94695281982422, "learning_rate": 1.2198820914790576e-06, "loss": 22.565, "step": 8451 }, { "epoch": 0.7844083526682134, "grad_norm": 37.85623550415039, "learning_rate": 1.2188778186679584e-06, "loss": 22.5966, "step": 8452 }, { "epoch": 0.7845011600928075, "grad_norm": 50.427215576171875, "learning_rate": 1.2178739020289582e-06, "loss": 23.2498, "step": 8453 }, { "epoch": 0.7845939675174014, "grad_norm": 52.69024658203125, "learning_rate": 1.2168703416566274e-06, "loss": 23.945, "step": 8454 }, { "epoch": 0.7846867749419953, "grad_norm": 43.831459045410156, "learning_rate": 1.2158671376454973e-06, "loss": 22.1479, "step": 8455 }, { "epoch": 0.7847795823665893, "grad_norm": 41.814510345458984, "learning_rate": 1.2148642900900666e-06, "loss": 22.1228, "step": 8456 }, { "epoch": 0.7848723897911833, "grad_norm": 51.2713737487793, "learning_rate": 1.2138617990848007e-06, "loss": 22.8846, "step": 8457 }, { "epoch": 0.7849651972157773, "grad_norm": 49.4461784362793, "learning_rate": 1.2128596647241353e-06, "loss": 22.8779, "step": 8458 }, { "epoch": 0.7850580046403712, "grad_norm": 49.0419807434082, "learning_rate": 1.2118578871024633e-06, "loss": 23.2961, "step": 8459 }, { "epoch": 0.7851508120649652, "grad_norm": 42.110660552978516, "learning_rate": 1.2108564663141541e-06, "loss": 22.3845, "step": 8460 }, { "epoch": 0.7852436194895591, "grad_norm": 39.98268508911133, "learning_rate": 1.2098554024535369e-06, "loss": 23.7002, "step": 8461 }, { "epoch": 0.7853364269141532, "grad_norm": 51.235836029052734, "learning_rate": 1.2088546956149106e-06, "loss": 22.6908, "step": 8462 }, { "epoch": 0.7854292343387471, "grad_norm": 41.0346794128418, "learning_rate": 1.2078543458925368e-06, "loss": 22.4636, "step": 8463 }, { "epoch": 0.7855220417633411, "grad_norm": 37.17689514160156, "learning_rate": 1.2068543533806504e-06, "loss": 22.1564, "step": 8464 }, { "epoch": 0.785614849187935, "grad_norm": 42.05303192138672, "learning_rate": 1.2058547181734425e-06, "loss": 22.9635, "step": 8465 }, { "epoch": 0.785707656612529, "grad_norm": 42.59893798828125, "learning_rate": 1.2048554403650803e-06, "loss": 23.3709, "step": 8466 }, { "epoch": 0.785800464037123, "grad_norm": 39.889888763427734, "learning_rate": 1.203856520049692e-06, "loss": 21.9823, "step": 8467 }, { "epoch": 0.785893271461717, "grad_norm": 41.079166412353516, "learning_rate": 1.2028579573213738e-06, "loss": 20.0779, "step": 8468 }, { "epoch": 0.7859860788863109, "grad_norm": 37.5017204284668, "learning_rate": 1.2018597522741854e-06, "loss": 21.9108, "step": 8469 }, { "epoch": 0.7860788863109048, "grad_norm": 39.18726348876953, "learning_rate": 1.2008619050021604e-06, "loss": 24.4983, "step": 8470 }, { "epoch": 0.7861716937354989, "grad_norm": 44.079566955566406, "learning_rate": 1.1998644155992872e-06, "loss": 23.4736, "step": 8471 }, { "epoch": 0.7862645011600928, "grad_norm": 41.92399597167969, "learning_rate": 1.1988672841595312e-06, "loss": 25.1559, "step": 8472 }, { "epoch": 0.7863573085846868, "grad_norm": 42.22224426269531, "learning_rate": 1.197870510776818e-06, "loss": 24.3267, "step": 8473 }, { "epoch": 0.7864501160092807, "grad_norm": 41.865394592285156, "learning_rate": 1.1968740955450408e-06, "loss": 21.6286, "step": 8474 }, { "epoch": 0.7865429234338747, "grad_norm": 43.262821197509766, "learning_rate": 1.1958780385580592e-06, "loss": 21.2535, "step": 8475 }, { "epoch": 0.7866357308584687, "grad_norm": 44.99382400512695, "learning_rate": 1.1948823399097026e-06, "loss": 22.628, "step": 8476 }, { "epoch": 0.7867285382830627, "grad_norm": 44.256099700927734, "learning_rate": 1.193886999693758e-06, "loss": 21.92, "step": 8477 }, { "epoch": 0.7868213457076566, "grad_norm": 51.7080192565918, "learning_rate": 1.1928920180039877e-06, "loss": 22.3455, "step": 8478 }, { "epoch": 0.7869141531322505, "grad_norm": 45.233253479003906, "learning_rate": 1.191897394934115e-06, "loss": 22.7123, "step": 8479 }, { "epoch": 0.7870069605568445, "grad_norm": 46.799278259277344, "learning_rate": 1.1909031305778317e-06, "loss": 23.4035, "step": 8480 }, { "epoch": 0.7870997679814385, "grad_norm": 53.14274597167969, "learning_rate": 1.1899092250287925e-06, "loss": 23.3239, "step": 8481 }, { "epoch": 0.7871925754060325, "grad_norm": 56.73371124267578, "learning_rate": 1.188915678380626e-06, "loss": 24.0992, "step": 8482 }, { "epoch": 0.7872853828306264, "grad_norm": 46.59151077270508, "learning_rate": 1.1879224907269154e-06, "loss": 22.8328, "step": 8483 }, { "epoch": 0.7873781902552204, "grad_norm": 41.01668930053711, "learning_rate": 1.186929662161221e-06, "loss": 23.6137, "step": 8484 }, { "epoch": 0.7874709976798144, "grad_norm": 54.2538948059082, "learning_rate": 1.185937192777063e-06, "loss": 25.7022, "step": 8485 }, { "epoch": 0.7875638051044084, "grad_norm": 48.510684967041016, "learning_rate": 1.1849450826679299e-06, "loss": 21.2896, "step": 8486 }, { "epoch": 0.7876566125290023, "grad_norm": 45.44152069091797, "learning_rate": 1.1839533319272762e-06, "loss": 21.4071, "step": 8487 }, { "epoch": 0.7877494199535963, "grad_norm": 46.27809143066406, "learning_rate": 1.18296194064852e-06, "loss": 22.6341, "step": 8488 }, { "epoch": 0.7878422273781902, "grad_norm": 52.47123336791992, "learning_rate": 1.1819709089250508e-06, "loss": 25.2827, "step": 8489 }, { "epoch": 0.7879350348027843, "grad_norm": 48.50679016113281, "learning_rate": 1.180980236850221e-06, "loss": 22.0198, "step": 8490 }, { "epoch": 0.7880278422273782, "grad_norm": 43.548221588134766, "learning_rate": 1.179989924517348e-06, "loss": 21.9362, "step": 8491 }, { "epoch": 0.7881206496519721, "grad_norm": 39.82028579711914, "learning_rate": 1.178999972019716e-06, "loss": 24.2306, "step": 8492 }, { "epoch": 0.7882134570765661, "grad_norm": 43.644718170166016, "learning_rate": 1.1780103794505803e-06, "loss": 23.6713, "step": 8493 }, { "epoch": 0.7883062645011601, "grad_norm": 41.24805450439453, "learning_rate": 1.1770211469031522e-06, "loss": 22.2203, "step": 8494 }, { "epoch": 0.7883990719257541, "grad_norm": 50.24007034301758, "learning_rate": 1.1760322744706194e-06, "loss": 23.4967, "step": 8495 }, { "epoch": 0.788491879350348, "grad_norm": 40.735450744628906, "learning_rate": 1.1750437622461293e-06, "loss": 22.2259, "step": 8496 }, { "epoch": 0.788584686774942, "grad_norm": 46.41108322143555, "learning_rate": 1.1740556103227969e-06, "loss": 23.5043, "step": 8497 }, { "epoch": 0.7886774941995359, "grad_norm": 56.18806457519531, "learning_rate": 1.1730678187937029e-06, "loss": 23.5557, "step": 8498 }, { "epoch": 0.78877030162413, "grad_norm": 57.38531494140625, "learning_rate": 1.1720803877518994e-06, "loss": 22.3358, "step": 8499 }, { "epoch": 0.7888631090487239, "grad_norm": 36.75337219238281, "learning_rate": 1.1710933172903926e-06, "loss": 23.1367, "step": 8500 }, { "epoch": 0.7889559164733179, "grad_norm": 44.397216796875, "learning_rate": 1.1701066075021683e-06, "loss": 21.3063, "step": 8501 }, { "epoch": 0.7890487238979118, "grad_norm": 42.47690963745117, "learning_rate": 1.1691202584801692e-06, "loss": 23.1686, "step": 8502 }, { "epoch": 0.7891415313225058, "grad_norm": 41.643463134765625, "learning_rate": 1.1681342703173071e-06, "loss": 23.555, "step": 8503 }, { "epoch": 0.7892343387470998, "grad_norm": 40.97657775878906, "learning_rate": 1.1671486431064589e-06, "loss": 22.3098, "step": 8504 }, { "epoch": 0.7893271461716937, "grad_norm": 46.098419189453125, "learning_rate": 1.1661633769404718e-06, "loss": 23.5064, "step": 8505 }, { "epoch": 0.7894199535962877, "grad_norm": 38.771156311035156, "learning_rate": 1.1651784719121495e-06, "loss": 22.4228, "step": 8506 }, { "epoch": 0.7895127610208816, "grad_norm": 39.87617492675781, "learning_rate": 1.1641939281142722e-06, "loss": 21.8584, "step": 8507 }, { "epoch": 0.7896055684454757, "grad_norm": 38.50775909423828, "learning_rate": 1.1632097456395802e-06, "loss": 22.0589, "step": 8508 }, { "epoch": 0.7896983758700696, "grad_norm": 43.010643005371094, "learning_rate": 1.1622259245807805e-06, "loss": 22.0387, "step": 8509 }, { "epoch": 0.7897911832946636, "grad_norm": 42.432167053222656, "learning_rate": 1.1612424650305455e-06, "loss": 22.7208, "step": 8510 }, { "epoch": 0.7898839907192575, "grad_norm": 35.384342193603516, "learning_rate": 1.1602593670815183e-06, "loss": 22.6088, "step": 8511 }, { "epoch": 0.7899767981438515, "grad_norm": 43.820289611816406, "learning_rate": 1.1592766308262993e-06, "loss": 22.3244, "step": 8512 }, { "epoch": 0.7900696055684455, "grad_norm": 45.65749740600586, "learning_rate": 1.1582942563574635e-06, "loss": 22.3245, "step": 8513 }, { "epoch": 0.7901624129930395, "grad_norm": 43.902488708496094, "learning_rate": 1.1573122437675465e-06, "loss": 22.6166, "step": 8514 }, { "epoch": 0.7902552204176334, "grad_norm": 39.61223602294922, "learning_rate": 1.1563305931490526e-06, "loss": 22.4205, "step": 8515 }, { "epoch": 0.7903480278422274, "grad_norm": 45.02031707763672, "learning_rate": 1.1553493045944474e-06, "loss": 21.2777, "step": 8516 }, { "epoch": 0.7904408352668213, "grad_norm": 41.26104736328125, "learning_rate": 1.1543683781961717e-06, "loss": 23.3479, "step": 8517 }, { "epoch": 0.7905336426914154, "grad_norm": 43.081275939941406, "learning_rate": 1.1533878140466204e-06, "loss": 22.4329, "step": 8518 }, { "epoch": 0.7906264501160093, "grad_norm": 72.73150634765625, "learning_rate": 1.1524076122381633e-06, "loss": 23.0171, "step": 8519 }, { "epoch": 0.7907192575406032, "grad_norm": 45.190277099609375, "learning_rate": 1.1514277728631323e-06, "loss": 22.7419, "step": 8520 }, { "epoch": 0.7908120649651972, "grad_norm": 43.03189468383789, "learning_rate": 1.1504482960138264e-06, "loss": 22.9065, "step": 8521 }, { "epoch": 0.7909048723897912, "grad_norm": 47.15449523925781, "learning_rate": 1.1494691817825082e-06, "loss": 24.9073, "step": 8522 }, { "epoch": 0.7909976798143852, "grad_norm": 41.62260818481445, "learning_rate": 1.1484904302614097e-06, "loss": 21.9753, "step": 8523 }, { "epoch": 0.7910904872389791, "grad_norm": 40.75815200805664, "learning_rate": 1.1475120415427238e-06, "loss": 22.3434, "step": 8524 }, { "epoch": 0.7911832946635731, "grad_norm": 48.439395904541016, "learning_rate": 1.1465340157186155e-06, "loss": 24.7481, "step": 8525 }, { "epoch": 0.791276102088167, "grad_norm": 53.49782180786133, "learning_rate": 1.1455563528812113e-06, "loss": 22.1121, "step": 8526 }, { "epoch": 0.7913689095127611, "grad_norm": 43.95254135131836, "learning_rate": 1.1445790531226042e-06, "loss": 23.1184, "step": 8527 }, { "epoch": 0.791461716937355, "grad_norm": 42.83152389526367, "learning_rate": 1.1436021165348537e-06, "loss": 21.7733, "step": 8528 }, { "epoch": 0.791554524361949, "grad_norm": 39.9528923034668, "learning_rate": 1.1426255432099849e-06, "loss": 22.9431, "step": 8529 }, { "epoch": 0.7916473317865429, "grad_norm": 83.92366027832031, "learning_rate": 1.1416493332399864e-06, "loss": 22.1701, "step": 8530 }, { "epoch": 0.7917401392111368, "grad_norm": 40.65919494628906, "learning_rate": 1.1406734867168178e-06, "loss": 21.9502, "step": 8531 }, { "epoch": 0.7918329466357309, "grad_norm": 44.02054214477539, "learning_rate": 1.1396980037324e-06, "loss": 22.6517, "step": 8532 }, { "epoch": 0.7919257540603248, "grad_norm": 48.07724380493164, "learning_rate": 1.1387228843786214e-06, "loss": 24.7089, "step": 8533 }, { "epoch": 0.7920185614849188, "grad_norm": 44.60580062866211, "learning_rate": 1.1377481287473353e-06, "loss": 23.413, "step": 8534 }, { "epoch": 0.7921113689095127, "grad_norm": 45.584625244140625, "learning_rate": 1.1367737369303595e-06, "loss": 23.8256, "step": 8535 }, { "epoch": 0.7922041763341068, "grad_norm": 44.795902252197266, "learning_rate": 1.1357997090194827e-06, "loss": 23.6696, "step": 8536 }, { "epoch": 0.7922969837587007, "grad_norm": 43.744468688964844, "learning_rate": 1.134826045106454e-06, "loss": 23.6449, "step": 8537 }, { "epoch": 0.7923897911832947, "grad_norm": 48.17573547363281, "learning_rate": 1.1338527452829912e-06, "loss": 24.9027, "step": 8538 }, { "epoch": 0.7924825986078886, "grad_norm": 45.38467025756836, "learning_rate": 1.1328798096407733e-06, "loss": 23.9391, "step": 8539 }, { "epoch": 0.7925754060324826, "grad_norm": 43.723323822021484, "learning_rate": 1.131907238271454e-06, "loss": 22.6934, "step": 8540 }, { "epoch": 0.7926682134570766, "grad_norm": 43.26198196411133, "learning_rate": 1.1309350312666413e-06, "loss": 23.1035, "step": 8541 }, { "epoch": 0.7927610208816706, "grad_norm": 44.13113021850586, "learning_rate": 1.129963188717918e-06, "loss": 23.0143, "step": 8542 }, { "epoch": 0.7928538283062645, "grad_norm": 51.99113464355469, "learning_rate": 1.128991710716829e-06, "loss": 22.9384, "step": 8543 }, { "epoch": 0.7929466357308584, "grad_norm": 51.1817626953125, "learning_rate": 1.128020597354884e-06, "loss": 24.6704, "step": 8544 }, { "epoch": 0.7930394431554524, "grad_norm": 40.93629837036133, "learning_rate": 1.1270498487235588e-06, "loss": 21.4172, "step": 8545 }, { "epoch": 0.7931322505800464, "grad_norm": 39.68571472167969, "learning_rate": 1.1260794649142992e-06, "loss": 21.7096, "step": 8546 }, { "epoch": 0.7932250580046404, "grad_norm": 43.951290130615234, "learning_rate": 1.1251094460185075e-06, "loss": 25.6434, "step": 8547 }, { "epoch": 0.7933178654292343, "grad_norm": 56.4156608581543, "learning_rate": 1.124139792127561e-06, "loss": 26.3591, "step": 8548 }, { "epoch": 0.7934106728538283, "grad_norm": 47.358341217041016, "learning_rate": 1.1231705033327973e-06, "loss": 23.7501, "step": 8549 }, { "epoch": 0.7935034802784223, "grad_norm": 48.3626823425293, "learning_rate": 1.122201579725521e-06, "loss": 23.9242, "step": 8550 }, { "epoch": 0.7935962877030163, "grad_norm": 50.9031982421875, "learning_rate": 1.1212330213970007e-06, "loss": 24.5145, "step": 8551 }, { "epoch": 0.7936890951276102, "grad_norm": 48.893226623535156, "learning_rate": 1.120264828438477e-06, "loss": 24.3088, "step": 8552 }, { "epoch": 0.7937819025522042, "grad_norm": 47.736114501953125, "learning_rate": 1.1192970009411446e-06, "loss": 22.8014, "step": 8553 }, { "epoch": 0.7938747099767981, "grad_norm": 41.76570510864258, "learning_rate": 1.118329538996175e-06, "loss": 22.8444, "step": 8554 }, { "epoch": 0.7939675174013922, "grad_norm": 56.10418701171875, "learning_rate": 1.1173624426947e-06, "loss": 22.7534, "step": 8555 }, { "epoch": 0.7940603248259861, "grad_norm": 51.011817932128906, "learning_rate": 1.1163957121278163e-06, "loss": 21.0717, "step": 8556 }, { "epoch": 0.79415313225058, "grad_norm": 48.58343505859375, "learning_rate": 1.1154293473865886e-06, "loss": 21.6155, "step": 8557 }, { "epoch": 0.794245939675174, "grad_norm": 44.7453498840332, "learning_rate": 1.1144633485620448e-06, "loss": 22.976, "step": 8558 }, { "epoch": 0.7943387470997679, "grad_norm": 45.253780364990234, "learning_rate": 1.1134977157451792e-06, "loss": 22.6332, "step": 8559 }, { "epoch": 0.794431554524362, "grad_norm": 59.726688385009766, "learning_rate": 1.1125324490269562e-06, "loss": 24.2352, "step": 8560 }, { "epoch": 0.7945243619489559, "grad_norm": 40.483863830566406, "learning_rate": 1.1115675484982952e-06, "loss": 22.4131, "step": 8561 }, { "epoch": 0.7946171693735499, "grad_norm": 39.47389221191406, "learning_rate": 1.1106030142500917e-06, "loss": 24.7008, "step": 8562 }, { "epoch": 0.7947099767981438, "grad_norm": 48.418697357177734, "learning_rate": 1.109638846373201e-06, "loss": 23.5354, "step": 8563 }, { "epoch": 0.7948027842227379, "grad_norm": 41.58010482788086, "learning_rate": 1.1086750449584461e-06, "loss": 22.4559, "step": 8564 }, { "epoch": 0.7948955916473318, "grad_norm": 39.625831604003906, "learning_rate": 1.1077116100966117e-06, "loss": 21.9116, "step": 8565 }, { "epoch": 0.7949883990719258, "grad_norm": 42.530494689941406, "learning_rate": 1.1067485418784567e-06, "loss": 23.2194, "step": 8566 }, { "epoch": 0.7950812064965197, "grad_norm": 51.90943145751953, "learning_rate": 1.105785840394693e-06, "loss": 22.3919, "step": 8567 }, { "epoch": 0.7951740139211136, "grad_norm": 44.43183517456055, "learning_rate": 1.104823505736009e-06, "loss": 23.7425, "step": 8568 }, { "epoch": 0.7952668213457077, "grad_norm": 44.96480178833008, "learning_rate": 1.1038615379930528e-06, "loss": 23.3262, "step": 8569 }, { "epoch": 0.7953596287703016, "grad_norm": 48.12953186035156, "learning_rate": 1.1028999372564402e-06, "loss": 23.6274, "step": 8570 }, { "epoch": 0.7954524361948956, "grad_norm": 40.77424621582031, "learning_rate": 1.101938703616749e-06, "loss": 20.9366, "step": 8571 }, { "epoch": 0.7955452436194895, "grad_norm": 45.45458984375, "learning_rate": 1.100977837164528e-06, "loss": 21.5636, "step": 8572 }, { "epoch": 0.7956380510440835, "grad_norm": 44.79138946533203, "learning_rate": 1.1000173379902873e-06, "loss": 22.3623, "step": 8573 }, { "epoch": 0.7957308584686775, "grad_norm": 39.49344253540039, "learning_rate": 1.0990572061845034e-06, "loss": 22.7374, "step": 8574 }, { "epoch": 0.7958236658932715, "grad_norm": 43.85563659667969, "learning_rate": 1.0980974418376178e-06, "loss": 23.6619, "step": 8575 }, { "epoch": 0.7959164733178654, "grad_norm": 40.23245620727539, "learning_rate": 1.0971380450400387e-06, "loss": 21.6885, "step": 8576 }, { "epoch": 0.7960092807424594, "grad_norm": 39.170230865478516, "learning_rate": 1.0961790158821374e-06, "loss": 22.018, "step": 8577 }, { "epoch": 0.7961020881670534, "grad_norm": 43.199527740478516, "learning_rate": 1.0952203544542539e-06, "loss": 25.1903, "step": 8578 }, { "epoch": 0.7961948955916474, "grad_norm": 42.02116394042969, "learning_rate": 1.0942620608466908e-06, "loss": 22.9233, "step": 8579 }, { "epoch": 0.7962877030162413, "grad_norm": 45.23162078857422, "learning_rate": 1.093304135149717e-06, "loss": 23.0505, "step": 8580 }, { "epoch": 0.7963805104408352, "grad_norm": 38.48308563232422, "learning_rate": 1.0923465774535662e-06, "loss": 21.949, "step": 8581 }, { "epoch": 0.7964733178654292, "grad_norm": 46.11056900024414, "learning_rate": 1.091389387848437e-06, "loss": 21.5569, "step": 8582 }, { "epoch": 0.7965661252900232, "grad_norm": 46.602752685546875, "learning_rate": 1.0904325664244975e-06, "loss": 22.787, "step": 8583 }, { "epoch": 0.7966589327146172, "grad_norm": 38.402156829833984, "learning_rate": 1.0894761132718757e-06, "loss": 21.8029, "step": 8584 }, { "epoch": 0.7967517401392111, "grad_norm": 49.16321563720703, "learning_rate": 1.088520028480667e-06, "loss": 22.6736, "step": 8585 }, { "epoch": 0.7968445475638051, "grad_norm": 49.730247497558594, "learning_rate": 1.0875643121409307e-06, "loss": 24.5079, "step": 8586 }, { "epoch": 0.7969373549883991, "grad_norm": 38.17039489746094, "learning_rate": 1.0866089643426981e-06, "loss": 22.8098, "step": 8587 }, { "epoch": 0.7970301624129931, "grad_norm": 41.84379959106445, "learning_rate": 1.0856539851759534e-06, "loss": 23.8886, "step": 8588 }, { "epoch": 0.797122969837587, "grad_norm": 46.92616271972656, "learning_rate": 1.084699374730659e-06, "loss": 22.8468, "step": 8589 }, { "epoch": 0.797215777262181, "grad_norm": 52.419612884521484, "learning_rate": 1.0837451330967335e-06, "loss": 22.7871, "step": 8590 }, { "epoch": 0.7973085846867749, "grad_norm": 56.348167419433594, "learning_rate": 1.0827912603640657e-06, "loss": 22.6889, "step": 8591 }, { "epoch": 0.797401392111369, "grad_norm": 45.804664611816406, "learning_rate": 1.0818377566225075e-06, "loss": 23.4232, "step": 8592 }, { "epoch": 0.7974941995359629, "grad_norm": 53.72705841064453, "learning_rate": 1.0808846219618763e-06, "loss": 22.6843, "step": 8593 }, { "epoch": 0.7975870069605568, "grad_norm": 58.80373764038086, "learning_rate": 1.0799318564719536e-06, "loss": 22.4924, "step": 8594 }, { "epoch": 0.7976798143851508, "grad_norm": 49.999691009521484, "learning_rate": 1.0789794602424913e-06, "loss": 22.6648, "step": 8595 }, { "epoch": 0.7977726218097447, "grad_norm": 57.687660217285156, "learning_rate": 1.0780274333631984e-06, "loss": 22.0496, "step": 8596 }, { "epoch": 0.7978654292343388, "grad_norm": 56.430076599121094, "learning_rate": 1.077075775923756e-06, "loss": 23.5255, "step": 8597 }, { "epoch": 0.7979582366589327, "grad_norm": 55.03654861450195, "learning_rate": 1.0761244880138078e-06, "loss": 23.4803, "step": 8598 }, { "epoch": 0.7980510440835267, "grad_norm": 49.96549606323242, "learning_rate": 1.0751735697229621e-06, "loss": 22.373, "step": 8599 }, { "epoch": 0.7981438515081206, "grad_norm": 32.34907913208008, "learning_rate": 1.074223021140791e-06, "loss": 23.6472, "step": 8600 }, { "epoch": 0.7982366589327147, "grad_norm": 40.064796447753906, "learning_rate": 1.073272842356839e-06, "loss": 23.3725, "step": 8601 }, { "epoch": 0.7983294663573086, "grad_norm": 43.750389099121094, "learning_rate": 1.0723230334606043e-06, "loss": 22.6121, "step": 8602 }, { "epoch": 0.7984222737819026, "grad_norm": 51.45723342895508, "learning_rate": 1.071373594541561e-06, "loss": 22.7639, "step": 8603 }, { "epoch": 0.7985150812064965, "grad_norm": 47.6658935546875, "learning_rate": 1.070424525689142e-06, "loss": 22.1804, "step": 8604 }, { "epoch": 0.7986078886310904, "grad_norm": 43.44184875488281, "learning_rate": 1.0694758269927475e-06, "loss": 24.5463, "step": 8605 }, { "epoch": 0.7987006960556845, "grad_norm": 70.56101989746094, "learning_rate": 1.0685274985417416e-06, "loss": 23.5966, "step": 8606 }, { "epoch": 0.7987935034802784, "grad_norm": 46.25269317626953, "learning_rate": 1.0675795404254575e-06, "loss": 24.1377, "step": 8607 }, { "epoch": 0.7988863109048724, "grad_norm": 43.799461364746094, "learning_rate": 1.0666319527331853e-06, "loss": 23.0584, "step": 8608 }, { "epoch": 0.7989791183294663, "grad_norm": 42.7459831237793, "learning_rate": 1.0656847355541906e-06, "loss": 21.557, "step": 8609 }, { "epoch": 0.7990719257540603, "grad_norm": 45.52761459350586, "learning_rate": 1.0647378889776956e-06, "loss": 23.1648, "step": 8610 }, { "epoch": 0.7991647331786543, "grad_norm": 42.406253814697266, "learning_rate": 1.0637914130928922e-06, "loss": 22.0825, "step": 8611 }, { "epoch": 0.7992575406032483, "grad_norm": 43.37019729614258, "learning_rate": 1.062845307988934e-06, "loss": 21.7348, "step": 8612 }, { "epoch": 0.7993503480278422, "grad_norm": 55.68409729003906, "learning_rate": 1.0618995737549465e-06, "loss": 23.0273, "step": 8613 }, { "epoch": 0.7994431554524362, "grad_norm": 55.681514739990234, "learning_rate": 1.0609542104800091e-06, "loss": 21.5677, "step": 8614 }, { "epoch": 0.7995359628770302, "grad_norm": 38.052337646484375, "learning_rate": 1.0600092182531773e-06, "loss": 22.6768, "step": 8615 }, { "epoch": 0.7996287703016242, "grad_norm": 44.84330749511719, "learning_rate": 1.0590645971634655e-06, "loss": 23.5164, "step": 8616 }, { "epoch": 0.7997215777262181, "grad_norm": 51.5294075012207, "learning_rate": 1.0581203472998546e-06, "loss": 22.8716, "step": 8617 }, { "epoch": 0.799814385150812, "grad_norm": 44.12324523925781, "learning_rate": 1.0571764687512892e-06, "loss": 22.2631, "step": 8618 }, { "epoch": 0.799907192575406, "grad_norm": 40.886024475097656, "learning_rate": 1.0562329616066824e-06, "loss": 21.4894, "step": 8619 }, { "epoch": 0.8, "grad_norm": 52.52323913574219, "learning_rate": 1.05528982595491e-06, "loss": 22.1975, "step": 8620 }, { "epoch": 0.800092807424594, "grad_norm": 48.80614471435547, "learning_rate": 1.0543470618848123e-06, "loss": 23.575, "step": 8621 }, { "epoch": 0.8001856148491879, "grad_norm": 47.44401550292969, "learning_rate": 1.0534046694851945e-06, "loss": 22.2102, "step": 8622 }, { "epoch": 0.8002784222737819, "grad_norm": 49.61766052246094, "learning_rate": 1.052462648844829e-06, "loss": 24.7621, "step": 8623 }, { "epoch": 0.8003712296983758, "grad_norm": 44.33308029174805, "learning_rate": 1.0515210000524501e-06, "loss": 22.8418, "step": 8624 }, { "epoch": 0.8004640371229699, "grad_norm": 41.473411560058594, "learning_rate": 1.0505797231967602e-06, "loss": 22.6248, "step": 8625 }, { "epoch": 0.8005568445475638, "grad_norm": 48.876426696777344, "learning_rate": 1.0496388183664253e-06, "loss": 23.5951, "step": 8626 }, { "epoch": 0.8006496519721578, "grad_norm": 43.939208984375, "learning_rate": 1.0486982856500754e-06, "loss": 22.5653, "step": 8627 }, { "epoch": 0.8007424593967517, "grad_norm": 48.06269836425781, "learning_rate": 1.0477581251363066e-06, "loss": 25.0792, "step": 8628 }, { "epoch": 0.8008352668213458, "grad_norm": 41.348384857177734, "learning_rate": 1.0468183369136787e-06, "loss": 23.1891, "step": 8629 }, { "epoch": 0.8009280742459397, "grad_norm": 40.158870697021484, "learning_rate": 1.0458789210707204e-06, "loss": 23.6106, "step": 8630 }, { "epoch": 0.8010208816705336, "grad_norm": 43.533790588378906, "learning_rate": 1.0449398776959179e-06, "loss": 21.4643, "step": 8631 }, { "epoch": 0.8011136890951276, "grad_norm": 41.224979400634766, "learning_rate": 1.04400120687773e-06, "loss": 23.0477, "step": 8632 }, { "epoch": 0.8012064965197215, "grad_norm": 41.50869369506836, "learning_rate": 1.0430629087045763e-06, "loss": 23.4982, "step": 8633 }, { "epoch": 0.8012993039443156, "grad_norm": 51.524574279785156, "learning_rate": 1.0421249832648416e-06, "loss": 21.712, "step": 8634 }, { "epoch": 0.8013921113689095, "grad_norm": 50.79678726196289, "learning_rate": 1.041187430646875e-06, "loss": 21.2753, "step": 8635 }, { "epoch": 0.8014849187935035, "grad_norm": 49.390933990478516, "learning_rate": 1.0402502509389955e-06, "loss": 22.9239, "step": 8636 }, { "epoch": 0.8015777262180974, "grad_norm": 49.192665100097656, "learning_rate": 1.0393134442294777e-06, "loss": 22.8326, "step": 8637 }, { "epoch": 0.8016705336426914, "grad_norm": 44.4560546875, "learning_rate": 1.038377010606571e-06, "loss": 22.4472, "step": 8638 }, { "epoch": 0.8017633410672854, "grad_norm": 43.33120346069336, "learning_rate": 1.0374409501584832e-06, "loss": 22.9557, "step": 8639 }, { "epoch": 0.8018561484918794, "grad_norm": 53.53840637207031, "learning_rate": 1.0365052629733884e-06, "loss": 23.1198, "step": 8640 }, { "epoch": 0.8019489559164733, "grad_norm": 55.0433464050293, "learning_rate": 1.0355699491394255e-06, "loss": 24.058, "step": 8641 }, { "epoch": 0.8020417633410672, "grad_norm": 50.01616668701172, "learning_rate": 1.0346350087447022e-06, "loss": 22.5947, "step": 8642 }, { "epoch": 0.8021345707656613, "grad_norm": 45.987823486328125, "learning_rate": 1.0337004418772821e-06, "loss": 22.0131, "step": 8643 }, { "epoch": 0.8022273781902552, "grad_norm": 56.12137985229492, "learning_rate": 1.0327662486252033e-06, "loss": 23.9438, "step": 8644 }, { "epoch": 0.8023201856148492, "grad_norm": 54.09919357299805, "learning_rate": 1.0318324290764625e-06, "loss": 22.6239, "step": 8645 }, { "epoch": 0.8024129930394431, "grad_norm": 54.18600082397461, "learning_rate": 1.0308989833190241e-06, "loss": 22.3127, "step": 8646 }, { "epoch": 0.8025058004640371, "grad_norm": 44.87971878051758, "learning_rate": 1.0299659114408146e-06, "loss": 24.4339, "step": 8647 }, { "epoch": 0.8025986078886311, "grad_norm": 47.75373840332031, "learning_rate": 1.0290332135297305e-06, "loss": 23.4696, "step": 8648 }, { "epoch": 0.8026914153132251, "grad_norm": 43.91990280151367, "learning_rate": 1.028100889673625e-06, "loss": 22.2259, "step": 8649 }, { "epoch": 0.802784222737819, "grad_norm": 50.46001434326172, "learning_rate": 1.0271689399603236e-06, "loss": 23.8479, "step": 8650 }, { "epoch": 0.802877030162413, "grad_norm": 47.72675323486328, "learning_rate": 1.0262373644776135e-06, "loss": 21.2087, "step": 8651 }, { "epoch": 0.8029698375870069, "grad_norm": 52.23284149169922, "learning_rate": 1.025306163313246e-06, "loss": 22.952, "step": 8652 }, { "epoch": 0.803062645011601, "grad_norm": 48.840614318847656, "learning_rate": 1.024375336554937e-06, "loss": 23.9848, "step": 8653 }, { "epoch": 0.8031554524361949, "grad_norm": 43.18195343017578, "learning_rate": 1.023444884290371e-06, "loss": 22.9204, "step": 8654 }, { "epoch": 0.8032482598607888, "grad_norm": 44.951560974121094, "learning_rate": 1.0225148066071906e-06, "loss": 24.0106, "step": 8655 }, { "epoch": 0.8033410672853828, "grad_norm": 58.76495361328125, "learning_rate": 1.0215851035930092e-06, "loss": 23.2496, "step": 8656 }, { "epoch": 0.8034338747099768, "grad_norm": 42.0028076171875, "learning_rate": 1.0206557753354013e-06, "loss": 24.0012, "step": 8657 }, { "epoch": 0.8035266821345708, "grad_norm": 40.78239440917969, "learning_rate": 1.0197268219219087e-06, "loss": 21.0501, "step": 8658 }, { "epoch": 0.8036194895591647, "grad_norm": 42.478668212890625, "learning_rate": 1.0187982434400334e-06, "loss": 21.0038, "step": 8659 }, { "epoch": 0.8037122969837587, "grad_norm": 47.04631423950195, "learning_rate": 1.0178700399772502e-06, "loss": 21.8613, "step": 8660 }, { "epoch": 0.8038051044083526, "grad_norm": 60.498329162597656, "learning_rate": 1.016942211620987e-06, "loss": 22.1072, "step": 8661 }, { "epoch": 0.8038979118329467, "grad_norm": 47.519710540771484, "learning_rate": 1.0160147584586483e-06, "loss": 22.4343, "step": 8662 }, { "epoch": 0.8039907192575406, "grad_norm": 57.3051643371582, "learning_rate": 1.015087680577596e-06, "loss": 21.5501, "step": 8663 }, { "epoch": 0.8040835266821346, "grad_norm": 49.81831359863281, "learning_rate": 1.0141609780651585e-06, "loss": 22.6229, "step": 8664 }, { "epoch": 0.8041763341067285, "grad_norm": 46.71056365966797, "learning_rate": 1.0132346510086282e-06, "loss": 24.0931, "step": 8665 }, { "epoch": 0.8042691415313225, "grad_norm": 43.91878890991211, "learning_rate": 1.0123086994952618e-06, "loss": 24.3427, "step": 8666 }, { "epoch": 0.8043619489559165, "grad_norm": 42.26651382446289, "learning_rate": 1.011383123612285e-06, "loss": 22.3018, "step": 8667 }, { "epoch": 0.8044547563805104, "grad_norm": 49.02854919433594, "learning_rate": 1.010457923446882e-06, "loss": 23.9344, "step": 8668 }, { "epoch": 0.8045475638051044, "grad_norm": 46.71913528442383, "learning_rate": 1.0095330990862056e-06, "loss": 23.4986, "step": 8669 }, { "epoch": 0.8046403712296983, "grad_norm": 49.9620246887207, "learning_rate": 1.008608650617371e-06, "loss": 25.1772, "step": 8670 }, { "epoch": 0.8047331786542924, "grad_norm": 39.850399017333984, "learning_rate": 1.007684578127459e-06, "loss": 22.5021, "step": 8671 }, { "epoch": 0.8048259860788863, "grad_norm": 41.1053581237793, "learning_rate": 1.0067608817035141e-06, "loss": 22.3948, "step": 8672 }, { "epoch": 0.8049187935034803, "grad_norm": 53.71865463256836, "learning_rate": 1.0058375614325483e-06, "loss": 23.4161, "step": 8673 }, { "epoch": 0.8050116009280742, "grad_norm": 39.22990417480469, "learning_rate": 1.004914617401535e-06, "loss": 21.9854, "step": 8674 }, { "epoch": 0.8051044083526682, "grad_norm": 43.373111724853516, "learning_rate": 1.0039920496974132e-06, "loss": 23.4716, "step": 8675 }, { "epoch": 0.8051972157772622, "grad_norm": 42.960289001464844, "learning_rate": 1.0030698584070848e-06, "loss": 21.7969, "step": 8676 }, { "epoch": 0.8052900232018562, "grad_norm": 49.85628890991211, "learning_rate": 1.0021480436174218e-06, "loss": 23.0838, "step": 8677 }, { "epoch": 0.8053828306264501, "grad_norm": 41.36916732788086, "learning_rate": 1.0012266054152519e-06, "loss": 24.7557, "step": 8678 }, { "epoch": 0.805475638051044, "grad_norm": 43.57814025878906, "learning_rate": 1.0003055438873766e-06, "loss": 23.6904, "step": 8679 }, { "epoch": 0.8055684454756381, "grad_norm": 41.545536041259766, "learning_rate": 9.993848591205552e-07, "loss": 21.5238, "step": 8680 }, { "epoch": 0.805661252900232, "grad_norm": 46.831172943115234, "learning_rate": 9.98464551201514e-07, "loss": 23.2052, "step": 8681 }, { "epoch": 0.805754060324826, "grad_norm": 45.88098907470703, "learning_rate": 9.975446202169432e-07, "loss": 22.2783, "step": 8682 }, { "epoch": 0.8058468677494199, "grad_norm": 40.49927520751953, "learning_rate": 9.96625066253501e-07, "loss": 24.0181, "step": 8683 }, { "epoch": 0.8059396751740139, "grad_norm": 41.16511535644531, "learning_rate": 9.957058893978022e-07, "loss": 21.523, "step": 8684 }, { "epoch": 0.8060324825986079, "grad_norm": 49.14775466918945, "learning_rate": 9.947870897364349e-07, "loss": 22.0767, "step": 8685 }, { "epoch": 0.8061252900232019, "grad_norm": 48.87626266479492, "learning_rate": 9.938686673559466e-07, "loss": 21.461, "step": 8686 }, { "epoch": 0.8062180974477958, "grad_norm": 46.30635070800781, "learning_rate": 9.929506223428498e-07, "loss": 22.0414, "step": 8687 }, { "epoch": 0.8063109048723898, "grad_norm": 39.44064712524414, "learning_rate": 9.92032954783621e-07, "loss": 21.3604, "step": 8688 }, { "epoch": 0.8064037122969837, "grad_norm": 52.382774353027344, "learning_rate": 9.911156647647064e-07, "loss": 25.3352, "step": 8689 }, { "epoch": 0.8064965197215778, "grad_norm": 51.30404281616211, "learning_rate": 9.901987523725064e-07, "loss": 23.1062, "step": 8690 }, { "epoch": 0.8065893271461717, "grad_norm": 41.101524353027344, "learning_rate": 9.892822176933964e-07, "loss": 21.2083, "step": 8691 }, { "epoch": 0.8066821345707657, "grad_norm": 48.18989562988281, "learning_rate": 9.883660608137097e-07, "loss": 23.7489, "step": 8692 }, { "epoch": 0.8067749419953596, "grad_norm": 38.41518020629883, "learning_rate": 9.874502818197469e-07, "loss": 23.0557, "step": 8693 }, { "epoch": 0.8068677494199537, "grad_norm": 46.7562141418457, "learning_rate": 9.865348807977698e-07, "loss": 22.204, "step": 8694 }, { "epoch": 0.8069605568445476, "grad_norm": 48.136905670166016, "learning_rate": 9.856198578340116e-07, "loss": 23.1775, "step": 8695 }, { "epoch": 0.8070533642691415, "grad_norm": 42.077308654785156, "learning_rate": 9.84705213014659e-07, "loss": 23.8212, "step": 8696 }, { "epoch": 0.8071461716937355, "grad_norm": 45.55227279663086, "learning_rate": 9.837909464258748e-07, "loss": 22.704, "step": 8697 }, { "epoch": 0.8072389791183294, "grad_norm": 46.792205810546875, "learning_rate": 9.828770581537756e-07, "loss": 24.5458, "step": 8698 }, { "epoch": 0.8073317865429235, "grad_norm": 47.0485725402832, "learning_rate": 9.819635482844514e-07, "loss": 22.5829, "step": 8699 }, { "epoch": 0.8074245939675174, "grad_norm": 47.98857879638672, "learning_rate": 9.81050416903951e-07, "loss": 23.336, "step": 8700 }, { "epoch": 0.8075174013921114, "grad_norm": 47.26089096069336, "learning_rate": 9.801376640982884e-07, "loss": 23.482, "step": 8701 }, { "epoch": 0.8076102088167053, "grad_norm": 48.92228317260742, "learning_rate": 9.792252899534428e-07, "loss": 22.7522, "step": 8702 }, { "epoch": 0.8077030162412993, "grad_norm": 45.672855377197266, "learning_rate": 9.783132945553586e-07, "loss": 23.3282, "step": 8703 }, { "epoch": 0.8077958236658933, "grad_norm": 43.34829330444336, "learning_rate": 9.774016779899427e-07, "loss": 21.6147, "step": 8704 }, { "epoch": 0.8078886310904873, "grad_norm": 56.248435974121094, "learning_rate": 9.764904403430675e-07, "loss": 21.9581, "step": 8705 }, { "epoch": 0.8079814385150812, "grad_norm": 47.7056999206543, "learning_rate": 9.755795817005686e-07, "loss": 22.78, "step": 8706 }, { "epoch": 0.8080742459396751, "grad_norm": 45.62349319458008, "learning_rate": 9.746691021482468e-07, "loss": 21.3297, "step": 8707 }, { "epoch": 0.8081670533642692, "grad_norm": 46.780006408691406, "learning_rate": 9.737590017718661e-07, "loss": 24.8761, "step": 8708 }, { "epoch": 0.8082598607888631, "grad_norm": 49.95833206176758, "learning_rate": 9.728492806571577e-07, "loss": 22.7825, "step": 8709 }, { "epoch": 0.8083526682134571, "grad_norm": 45.18899154663086, "learning_rate": 9.719399388898142e-07, "loss": 21.3293, "step": 8710 }, { "epoch": 0.808445475638051, "grad_norm": 44.176544189453125, "learning_rate": 9.710309765554927e-07, "loss": 23.4759, "step": 8711 }, { "epoch": 0.808538283062645, "grad_norm": 55.45886993408203, "learning_rate": 9.701223937398152e-07, "loss": 22.4591, "step": 8712 }, { "epoch": 0.808631090487239, "grad_norm": 60.2940788269043, "learning_rate": 9.692141905283669e-07, "loss": 22.8604, "step": 8713 }, { "epoch": 0.808723897911833, "grad_norm": 50.49529266357422, "learning_rate": 9.683063670067011e-07, "loss": 22.4361, "step": 8714 }, { "epoch": 0.8088167053364269, "grad_norm": 42.146568298339844, "learning_rate": 9.67398923260331e-07, "loss": 24.1259, "step": 8715 }, { "epoch": 0.8089095127610209, "grad_norm": 49.53450393676758, "learning_rate": 9.664918593747346e-07, "loss": 21.408, "step": 8716 }, { "epoch": 0.8090023201856148, "grad_norm": 47.25519561767578, "learning_rate": 9.655851754353563e-07, "loss": 24.7812, "step": 8717 }, { "epoch": 0.8090951276102089, "grad_norm": 52.87359619140625, "learning_rate": 9.646788715276024e-07, "loss": 21.4085, "step": 8718 }, { "epoch": 0.8091879350348028, "grad_norm": 39.95579528808594, "learning_rate": 9.637729477368441e-07, "loss": 22.4719, "step": 8719 }, { "epoch": 0.8092807424593967, "grad_norm": 42.777801513671875, "learning_rate": 9.628674041484186e-07, "loss": 22.0719, "step": 8720 }, { "epoch": 0.8093735498839907, "grad_norm": 44.658119201660156, "learning_rate": 9.619622408476253e-07, "loss": 22.8316, "step": 8721 }, { "epoch": 0.8094663573085847, "grad_norm": 45.16792297363281, "learning_rate": 9.610574579197284e-07, "loss": 22.5794, "step": 8722 }, { "epoch": 0.8095591647331787, "grad_norm": 41.14055633544922, "learning_rate": 9.601530554499545e-07, "loss": 20.1339, "step": 8723 }, { "epoch": 0.8096519721577726, "grad_norm": 61.26792526245117, "learning_rate": 9.592490335234993e-07, "loss": 24.0722, "step": 8724 }, { "epoch": 0.8097447795823666, "grad_norm": 39.9941291809082, "learning_rate": 9.583453922255154e-07, "loss": 22.1701, "step": 8725 }, { "epoch": 0.8098375870069605, "grad_norm": 43.56901168823242, "learning_rate": 9.57442131641127e-07, "loss": 23.5911, "step": 8726 }, { "epoch": 0.8099303944315546, "grad_norm": 46.72429656982422, "learning_rate": 9.565392518554167e-07, "loss": 22.114, "step": 8727 }, { "epoch": 0.8100232018561485, "grad_norm": 49.78627014160156, "learning_rate": 9.55636752953435e-07, "loss": 24.1226, "step": 8728 }, { "epoch": 0.8101160092807425, "grad_norm": 48.76344299316406, "learning_rate": 9.547346350201925e-07, "loss": 23.039, "step": 8729 }, { "epoch": 0.8102088167053364, "grad_norm": 39.83134460449219, "learning_rate": 9.538328981406714e-07, "loss": 22.7028, "step": 8730 }, { "epoch": 0.8103016241299303, "grad_norm": 49.483890533447266, "learning_rate": 9.529315423998064e-07, "loss": 22.1682, "step": 8731 }, { "epoch": 0.8103944315545244, "grad_norm": 46.730464935302734, "learning_rate": 9.520305678825081e-07, "loss": 24.0525, "step": 8732 }, { "epoch": 0.8104872389791183, "grad_norm": 39.34233856201172, "learning_rate": 9.511299746736446e-07, "loss": 22.7455, "step": 8733 }, { "epoch": 0.8105800464037123, "grad_norm": 51.26451110839844, "learning_rate": 9.502297628580487e-07, "loss": 22.9183, "step": 8734 }, { "epoch": 0.8106728538283062, "grad_norm": 52.51384353637695, "learning_rate": 9.493299325205185e-07, "loss": 23.0697, "step": 8735 }, { "epoch": 0.8107656612529003, "grad_norm": 47.78787612915039, "learning_rate": 9.484304837458158e-07, "loss": 22.7951, "step": 8736 }, { "epoch": 0.8108584686774942, "grad_norm": 35.478389739990234, "learning_rate": 9.47531416618665e-07, "loss": 22.9542, "step": 8737 }, { "epoch": 0.8109512761020882, "grad_norm": 50.55698776245117, "learning_rate": 9.466327312237594e-07, "loss": 22.9308, "step": 8738 }, { "epoch": 0.8110440835266821, "grad_norm": 47.98820495605469, "learning_rate": 9.457344276457486e-07, "loss": 23.9613, "step": 8739 }, { "epoch": 0.8111368909512761, "grad_norm": 45.932777404785156, "learning_rate": 9.448365059692543e-07, "loss": 23.8347, "step": 8740 }, { "epoch": 0.8112296983758701, "grad_norm": 38.48731231689453, "learning_rate": 9.439389662788561e-07, "loss": 23.7858, "step": 8741 }, { "epoch": 0.8113225058004641, "grad_norm": 46.58414077758789, "learning_rate": 9.430418086591008e-07, "loss": 22.0178, "step": 8742 }, { "epoch": 0.811415313225058, "grad_norm": 55.56070327758789, "learning_rate": 9.421450331944975e-07, "loss": 22.4583, "step": 8743 }, { "epoch": 0.8115081206496519, "grad_norm": 41.66885757446289, "learning_rate": 9.41248639969523e-07, "loss": 22.3853, "step": 8744 }, { "epoch": 0.8116009280742459, "grad_norm": 43.19668960571289, "learning_rate": 9.403526290686104e-07, "loss": 22.7752, "step": 8745 }, { "epoch": 0.8116937354988399, "grad_norm": 49.2261848449707, "learning_rate": 9.39457000576166e-07, "loss": 23.2958, "step": 8746 }, { "epoch": 0.8117865429234339, "grad_norm": 44.118194580078125, "learning_rate": 9.385617545765541e-07, "loss": 21.6548, "step": 8747 }, { "epoch": 0.8118793503480278, "grad_norm": 47.79022216796875, "learning_rate": 9.376668911541042e-07, "loss": 23.5145, "step": 8748 }, { "epoch": 0.8119721577726218, "grad_norm": 44.109317779541016, "learning_rate": 9.367724103931097e-07, "loss": 22.7301, "step": 8749 }, { "epoch": 0.8120649651972158, "grad_norm": 49.78317642211914, "learning_rate": 9.358783123778309e-07, "loss": 24.0247, "step": 8750 }, { "epoch": 0.8121577726218098, "grad_norm": 57.88772201538086, "learning_rate": 9.349845971924876e-07, "loss": 23.5484, "step": 8751 }, { "epoch": 0.8122505800464037, "grad_norm": 49.893653869628906, "learning_rate": 9.340912649212658e-07, "loss": 22.4461, "step": 8752 }, { "epoch": 0.8123433874709977, "grad_norm": 36.446720123291016, "learning_rate": 9.331983156483155e-07, "loss": 21.4029, "step": 8753 }, { "epoch": 0.8124361948955916, "grad_norm": 43.76341247558594, "learning_rate": 9.323057494577498e-07, "loss": 21.846, "step": 8754 }, { "epoch": 0.8125290023201857, "grad_norm": 51.047725677490234, "learning_rate": 9.314135664336449e-07, "loss": 22.7366, "step": 8755 }, { "epoch": 0.8126218097447796, "grad_norm": 52.87228775024414, "learning_rate": 9.305217666600447e-07, "loss": 24.301, "step": 8756 }, { "epoch": 0.8127146171693735, "grad_norm": 42.92961120605469, "learning_rate": 9.296303502209536e-07, "loss": 22.7955, "step": 8757 }, { "epoch": 0.8128074245939675, "grad_norm": 47.906314849853516, "learning_rate": 9.287393172003407e-07, "loss": 23.2722, "step": 8758 }, { "epoch": 0.8129002320185614, "grad_norm": 44.12218475341797, "learning_rate": 9.278486676821386e-07, "loss": 24.646, "step": 8759 }, { "epoch": 0.8129930394431555, "grad_norm": 51.094810485839844, "learning_rate": 9.269584017502431e-07, "loss": 26.0467, "step": 8760 }, { "epoch": 0.8130858468677494, "grad_norm": 48.35110092163086, "learning_rate": 9.260685194885183e-07, "loss": 22.3875, "step": 8761 }, { "epoch": 0.8131786542923434, "grad_norm": 44.82464599609375, "learning_rate": 9.25179020980786e-07, "loss": 23.3278, "step": 8762 }, { "epoch": 0.8132714617169373, "grad_norm": 46.69940948486328, "learning_rate": 9.242899063108362e-07, "loss": 22.5301, "step": 8763 }, { "epoch": 0.8133642691415314, "grad_norm": 59.428260803222656, "learning_rate": 9.234011755624206e-07, "loss": 22.7592, "step": 8764 }, { "epoch": 0.8134570765661253, "grad_norm": 45.77119827270508, "learning_rate": 9.225128288192553e-07, "loss": 22.2662, "step": 8765 }, { "epoch": 0.8135498839907193, "grad_norm": 51.67848587036133, "learning_rate": 9.216248661650196e-07, "loss": 24.462, "step": 8766 }, { "epoch": 0.8136426914153132, "grad_norm": 48.48676300048828, "learning_rate": 9.207372876833593e-07, "loss": 24.1174, "step": 8767 }, { "epoch": 0.8137354988399071, "grad_norm": 48.39693832397461, "learning_rate": 9.198500934578808e-07, "loss": 22.9959, "step": 8768 }, { "epoch": 0.8138283062645012, "grad_norm": 46.500099182128906, "learning_rate": 9.189632835721551e-07, "loss": 22.685, "step": 8769 }, { "epoch": 0.8139211136890951, "grad_norm": 52.47998046875, "learning_rate": 9.180768581097188e-07, "loss": 22.1811, "step": 8770 }, { "epoch": 0.8140139211136891, "grad_norm": 59.37156677246094, "learning_rate": 9.1719081715407e-07, "loss": 22.7975, "step": 8771 }, { "epoch": 0.814106728538283, "grad_norm": 52.0859260559082, "learning_rate": 9.163051607886703e-07, "loss": 21.5334, "step": 8772 }, { "epoch": 0.8141995359628771, "grad_norm": 47.56318664550781, "learning_rate": 9.154198890969501e-07, "loss": 23.5917, "step": 8773 }, { "epoch": 0.814292343387471, "grad_norm": 52.26230239868164, "learning_rate": 9.145350021622945e-07, "loss": 22.9602, "step": 8774 }, { "epoch": 0.814385150812065, "grad_norm": 55.870567321777344, "learning_rate": 9.136505000680618e-07, "loss": 25.4328, "step": 8775 }, { "epoch": 0.8144779582366589, "grad_norm": 46.9765625, "learning_rate": 9.127663828975686e-07, "loss": 22.3777, "step": 8776 }, { "epoch": 0.8145707656612529, "grad_norm": 52.949249267578125, "learning_rate": 9.118826507340961e-07, "loss": 22.647, "step": 8777 }, { "epoch": 0.8146635730858469, "grad_norm": 56.98141098022461, "learning_rate": 9.109993036608883e-07, "loss": 21.944, "step": 8778 }, { "epoch": 0.8147563805104409, "grad_norm": 43.207706451416016, "learning_rate": 9.101163417611586e-07, "loss": 22.9597, "step": 8779 }, { "epoch": 0.8148491879350348, "grad_norm": 43.52167510986328, "learning_rate": 9.092337651180744e-07, "loss": 24.0811, "step": 8780 }, { "epoch": 0.8149419953596287, "grad_norm": 47.21885681152344, "learning_rate": 9.083515738147758e-07, "loss": 24.5474, "step": 8781 }, { "epoch": 0.8150348027842227, "grad_norm": 45.0410270690918, "learning_rate": 9.07469767934362e-07, "loss": 21.9817, "step": 8782 }, { "epoch": 0.8151276102088167, "grad_norm": 45.452911376953125, "learning_rate": 9.065883475598969e-07, "loss": 22.884, "step": 8783 }, { "epoch": 0.8152204176334107, "grad_norm": 44.43452453613281, "learning_rate": 9.057073127744065e-07, "loss": 22.4946, "step": 8784 }, { "epoch": 0.8153132250580046, "grad_norm": 51.45472717285156, "learning_rate": 9.04826663660886e-07, "loss": 23.2374, "step": 8785 }, { "epoch": 0.8154060324825986, "grad_norm": 60.8486328125, "learning_rate": 9.039464003022852e-07, "loss": 22.3118, "step": 8786 }, { "epoch": 0.8154988399071926, "grad_norm": 45.92472457885742, "learning_rate": 9.030665227815266e-07, "loss": 24.2944, "step": 8787 }, { "epoch": 0.8155916473317866, "grad_norm": 43.24489974975586, "learning_rate": 9.021870311814912e-07, "loss": 22.4444, "step": 8788 }, { "epoch": 0.8156844547563805, "grad_norm": 51.69511413574219, "learning_rate": 9.013079255850244e-07, "loss": 24.0592, "step": 8789 }, { "epoch": 0.8157772621809745, "grad_norm": 62.43528366088867, "learning_rate": 9.004292060749347e-07, "loss": 22.7764, "step": 8790 }, { "epoch": 0.8158700696055684, "grad_norm": 46.185062408447266, "learning_rate": 8.995508727339997e-07, "loss": 21.5867, "step": 8791 }, { "epoch": 0.8159628770301625, "grad_norm": 51.08913803100586, "learning_rate": 8.986729256449501e-07, "loss": 22.2753, "step": 8792 }, { "epoch": 0.8160556844547564, "grad_norm": 42.11807632446289, "learning_rate": 8.977953648904908e-07, "loss": 21.3384, "step": 8793 }, { "epoch": 0.8161484918793503, "grad_norm": 44.821048736572266, "learning_rate": 8.969181905532842e-07, "loss": 23.1581, "step": 8794 }, { "epoch": 0.8162412993039443, "grad_norm": 45.300689697265625, "learning_rate": 8.960414027159575e-07, "loss": 20.6054, "step": 8795 }, { "epoch": 0.8163341067285382, "grad_norm": 47.05747985839844, "learning_rate": 8.951650014611019e-07, "loss": 23.5075, "step": 8796 }, { "epoch": 0.8164269141531323, "grad_norm": 52.5832633972168, "learning_rate": 8.942889868712728e-07, "loss": 22.3743, "step": 8797 }, { "epoch": 0.8165197215777262, "grad_norm": 51.05508041381836, "learning_rate": 8.934133590289896e-07, "loss": 21.4824, "step": 8798 }, { "epoch": 0.8166125290023202, "grad_norm": 43.03278732299805, "learning_rate": 8.92538118016732e-07, "loss": 23.8939, "step": 8799 }, { "epoch": 0.8167053364269141, "grad_norm": 43.746768951416016, "learning_rate": 8.916632639169464e-07, "loss": 22.8691, "step": 8800 }, { "epoch": 0.8167981438515082, "grad_norm": 48.2575569152832, "learning_rate": 8.907887968120427e-07, "loss": 23.2289, "step": 8801 }, { "epoch": 0.8168909512761021, "grad_norm": 73.95426940917969, "learning_rate": 8.899147167843908e-07, "loss": 23.5222, "step": 8802 }, { "epoch": 0.8169837587006961, "grad_norm": 42.84318161010742, "learning_rate": 8.890410239163299e-07, "loss": 21.7309, "step": 8803 }, { "epoch": 0.81707656612529, "grad_norm": 41.99667739868164, "learning_rate": 8.881677182901588e-07, "loss": 23.5795, "step": 8804 }, { "epoch": 0.817169373549884, "grad_norm": 37.84958267211914, "learning_rate": 8.872947999881398e-07, "loss": 22.3715, "step": 8805 }, { "epoch": 0.817262180974478, "grad_norm": 44.99850082397461, "learning_rate": 8.864222690925006e-07, "loss": 22.3453, "step": 8806 }, { "epoch": 0.817354988399072, "grad_norm": 38.973243713378906, "learning_rate": 8.855501256854293e-07, "loss": 21.7545, "step": 8807 }, { "epoch": 0.8174477958236659, "grad_norm": 42.780059814453125, "learning_rate": 8.846783698490835e-07, "loss": 23.0797, "step": 8808 }, { "epoch": 0.8175406032482598, "grad_norm": 46.13897705078125, "learning_rate": 8.838070016655758e-07, "loss": 23.2536, "step": 8809 }, { "epoch": 0.8176334106728538, "grad_norm": 42.54448699951172, "learning_rate": 8.829360212169902e-07, "loss": 24.5424, "step": 8810 }, { "epoch": 0.8177262180974478, "grad_norm": 41.10308074951172, "learning_rate": 8.820654285853702e-07, "loss": 23.187, "step": 8811 }, { "epoch": 0.8178190255220418, "grad_norm": 47.10741424560547, "learning_rate": 8.811952238527227e-07, "loss": 21.9643, "step": 8812 }, { "epoch": 0.8179118329466357, "grad_norm": 46.50883102416992, "learning_rate": 8.803254071010176e-07, "loss": 23.146, "step": 8813 }, { "epoch": 0.8180046403712297, "grad_norm": 40.92120361328125, "learning_rate": 8.794559784121936e-07, "loss": 22.583, "step": 8814 }, { "epoch": 0.8180974477958237, "grad_norm": 49.8800163269043, "learning_rate": 8.785869378681433e-07, "loss": 23.4194, "step": 8815 }, { "epoch": 0.8181902552204177, "grad_norm": 43.495487213134766, "learning_rate": 8.777182855507322e-07, "loss": 22.9359, "step": 8816 }, { "epoch": 0.8182830626450116, "grad_norm": 46.938270568847656, "learning_rate": 8.768500215417835e-07, "loss": 22.0821, "step": 8817 }, { "epoch": 0.8183758700696055, "grad_norm": 48.850852966308594, "learning_rate": 8.75982145923086e-07, "loss": 23.8003, "step": 8818 }, { "epoch": 0.8184686774941995, "grad_norm": 40.665950775146484, "learning_rate": 8.751146587763898e-07, "loss": 22.3476, "step": 8819 }, { "epoch": 0.8185614849187935, "grad_norm": 44.24278259277344, "learning_rate": 8.742475601834133e-07, "loss": 22.5629, "step": 8820 }, { "epoch": 0.8186542923433875, "grad_norm": 51.02583694458008, "learning_rate": 8.733808502258306e-07, "loss": 22.289, "step": 8821 }, { "epoch": 0.8187470997679814, "grad_norm": 57.028961181640625, "learning_rate": 8.725145289852871e-07, "loss": 23.4365, "step": 8822 }, { "epoch": 0.8188399071925754, "grad_norm": 47.04326629638672, "learning_rate": 8.716485965433868e-07, "loss": 22.5848, "step": 8823 }, { "epoch": 0.8189327146171693, "grad_norm": 46.32017517089844, "learning_rate": 8.707830529816985e-07, "loss": 24.3456, "step": 8824 }, { "epoch": 0.8190255220417634, "grad_norm": 47.6707878112793, "learning_rate": 8.69917898381753e-07, "loss": 22.4251, "step": 8825 }, { "epoch": 0.8191183294663573, "grad_norm": 49.0466194152832, "learning_rate": 8.690531328250489e-07, "loss": 22.7878, "step": 8826 }, { "epoch": 0.8192111368909513, "grad_norm": 39.7484016418457, "learning_rate": 8.681887563930407e-07, "loss": 23.4717, "step": 8827 }, { "epoch": 0.8193039443155452, "grad_norm": 41.50008010864258, "learning_rate": 8.673247691671538e-07, "loss": 20.1092, "step": 8828 }, { "epoch": 0.8193967517401393, "grad_norm": 44.31941223144531, "learning_rate": 8.664611712287718e-07, "loss": 22.9209, "step": 8829 }, { "epoch": 0.8194895591647332, "grad_norm": 51.86107635498047, "learning_rate": 8.655979626592442e-07, "loss": 22.7337, "step": 8830 }, { "epoch": 0.8195823665893271, "grad_norm": 41.09666442871094, "learning_rate": 8.647351435398821e-07, "loss": 23.6402, "step": 8831 }, { "epoch": 0.8196751740139211, "grad_norm": 50.72532653808594, "learning_rate": 8.638727139519637e-07, "loss": 23.0013, "step": 8832 }, { "epoch": 0.819767981438515, "grad_norm": 52.5495719909668, "learning_rate": 8.63010673976723e-07, "loss": 23.6692, "step": 8833 }, { "epoch": 0.8198607888631091, "grad_norm": 59.15644836425781, "learning_rate": 8.621490236953656e-07, "loss": 22.2916, "step": 8834 }, { "epoch": 0.819953596287703, "grad_norm": 47.472686767578125, "learning_rate": 8.612877631890559e-07, "loss": 22.0038, "step": 8835 }, { "epoch": 0.820046403712297, "grad_norm": 51.841426849365234, "learning_rate": 8.604268925389219e-07, "loss": 23.3857, "step": 8836 }, { "epoch": 0.8201392111368909, "grad_norm": 47.246246337890625, "learning_rate": 8.595664118260555e-07, "loss": 24.1352, "step": 8837 }, { "epoch": 0.8202320185614849, "grad_norm": 52.32408142089844, "learning_rate": 8.587063211315138e-07, "loss": 20.867, "step": 8838 }, { "epoch": 0.8203248259860789, "grad_norm": 42.44586181640625, "learning_rate": 8.578466205363112e-07, "loss": 22.0541, "step": 8839 }, { "epoch": 0.8204176334106729, "grad_norm": 49.78633499145508, "learning_rate": 8.569873101214327e-07, "loss": 23.9243, "step": 8840 }, { "epoch": 0.8205104408352668, "grad_norm": 47.466522216796875, "learning_rate": 8.561283899678219e-07, "loss": 21.8485, "step": 8841 }, { "epoch": 0.8206032482598608, "grad_norm": 44.83600997924805, "learning_rate": 8.552698601563875e-07, "loss": 22.8165, "step": 8842 }, { "epoch": 0.8206960556844548, "grad_norm": 53.17327117919922, "learning_rate": 8.544117207679997e-07, "loss": 21.6151, "step": 8843 }, { "epoch": 0.8207888631090487, "grad_norm": 59.20921325683594, "learning_rate": 8.535539718834929e-07, "loss": 23.4172, "step": 8844 }, { "epoch": 0.8208816705336427, "grad_norm": 68.58738708496094, "learning_rate": 8.526966135836667e-07, "loss": 23.3236, "step": 8845 }, { "epoch": 0.8209744779582366, "grad_norm": 52.02812194824219, "learning_rate": 8.518396459492811e-07, "loss": 22.1708, "step": 8846 }, { "epoch": 0.8210672853828306, "grad_norm": 40.42235565185547, "learning_rate": 8.509830690610598e-07, "loss": 23.9098, "step": 8847 }, { "epoch": 0.8211600928074246, "grad_norm": 46.162593841552734, "learning_rate": 8.501268829996912e-07, "loss": 22.3343, "step": 8848 }, { "epoch": 0.8212529002320186, "grad_norm": 51.17346954345703, "learning_rate": 8.492710878458249e-07, "loss": 23.013, "step": 8849 }, { "epoch": 0.8213457076566125, "grad_norm": 50.963260650634766, "learning_rate": 8.484156836800739e-07, "loss": 22.6696, "step": 8850 }, { "epoch": 0.8214385150812065, "grad_norm": 49.307186126708984, "learning_rate": 8.475606705830169e-07, "loss": 22.0671, "step": 8851 }, { "epoch": 0.8215313225058004, "grad_norm": 52.82609176635742, "learning_rate": 8.467060486351936e-07, "loss": 23.611, "step": 8852 }, { "epoch": 0.8216241299303945, "grad_norm": 47.66315460205078, "learning_rate": 8.458518179171065e-07, "loss": 23.2458, "step": 8853 }, { "epoch": 0.8217169373549884, "grad_norm": 54.53066635131836, "learning_rate": 8.449979785092216e-07, "loss": 22.6965, "step": 8854 }, { "epoch": 0.8218097447795824, "grad_norm": 46.25164031982422, "learning_rate": 8.44144530491971e-07, "loss": 23.5187, "step": 8855 }, { "epoch": 0.8219025522041763, "grad_norm": 64.88127136230469, "learning_rate": 8.432914739457432e-07, "loss": 23.6048, "step": 8856 }, { "epoch": 0.8219953596287704, "grad_norm": 50.43095397949219, "learning_rate": 8.424388089508972e-07, "loss": 23.7268, "step": 8857 }, { "epoch": 0.8220881670533643, "grad_norm": 43.445770263671875, "learning_rate": 8.415865355877512e-07, "loss": 23.739, "step": 8858 }, { "epoch": 0.8221809744779582, "grad_norm": 45.302703857421875, "learning_rate": 8.407346539365869e-07, "loss": 21.7527, "step": 8859 }, { "epoch": 0.8222737819025522, "grad_norm": 47.21940231323242, "learning_rate": 8.398831640776478e-07, "loss": 22.3504, "step": 8860 }, { "epoch": 0.8223665893271461, "grad_norm": 48.96095657348633, "learning_rate": 8.39032066091146e-07, "loss": 23.2603, "step": 8861 }, { "epoch": 0.8224593967517402, "grad_norm": 50.91179275512695, "learning_rate": 8.38181360057248e-07, "loss": 23.9912, "step": 8862 }, { "epoch": 0.8225522041763341, "grad_norm": 43.71589660644531, "learning_rate": 8.373310460560919e-07, "loss": 22.5076, "step": 8863 }, { "epoch": 0.8226450116009281, "grad_norm": 43.2867546081543, "learning_rate": 8.364811241677739e-07, "loss": 23.513, "step": 8864 }, { "epoch": 0.822737819025522, "grad_norm": 51.84027862548828, "learning_rate": 8.356315944723537e-07, "loss": 25.4707, "step": 8865 }, { "epoch": 0.822830626450116, "grad_norm": 48.48659133911133, "learning_rate": 8.347824570498547e-07, "loss": 22.0436, "step": 8866 }, { "epoch": 0.82292343387471, "grad_norm": 39.8003044128418, "learning_rate": 8.339337119802665e-07, "loss": 22.1934, "step": 8867 }, { "epoch": 0.823016241299304, "grad_norm": 48.39372634887695, "learning_rate": 8.330853593435345e-07, "loss": 23.3619, "step": 8868 }, { "epoch": 0.8231090487238979, "grad_norm": 47.93852233886719, "learning_rate": 8.322373992195737e-07, "loss": 22.0526, "step": 8869 }, { "epoch": 0.8232018561484918, "grad_norm": 43.94700622558594, "learning_rate": 8.313898316882602e-07, "loss": 23.0423, "step": 8870 }, { "epoch": 0.8232946635730859, "grad_norm": 41.620548248291016, "learning_rate": 8.305426568294317e-07, "loss": 23.2465, "step": 8871 }, { "epoch": 0.8233874709976798, "grad_norm": 46.29085159301758, "learning_rate": 8.29695874722889e-07, "loss": 21.1187, "step": 8872 }, { "epoch": 0.8234802784222738, "grad_norm": 44.14297866821289, "learning_rate": 8.288494854484002e-07, "loss": 23.8925, "step": 8873 }, { "epoch": 0.8235730858468677, "grad_norm": 47.29560852050781, "learning_rate": 8.280034890856886e-07, "loss": 22.1028, "step": 8874 }, { "epoch": 0.8236658932714617, "grad_norm": 39.85525894165039, "learning_rate": 8.271578857144491e-07, "loss": 22.8568, "step": 8875 }, { "epoch": 0.8237587006960557, "grad_norm": 47.604530334472656, "learning_rate": 8.263126754143313e-07, "loss": 22.6984, "step": 8876 }, { "epoch": 0.8238515081206497, "grad_norm": 43.07512283325195, "learning_rate": 8.25467858264955e-07, "loss": 23.4058, "step": 8877 }, { "epoch": 0.8239443155452436, "grad_norm": 85.46119689941406, "learning_rate": 8.246234343458992e-07, "loss": 24.0111, "step": 8878 }, { "epoch": 0.8240371229698376, "grad_norm": 49.77314376831055, "learning_rate": 8.237794037367058e-07, "loss": 21.9418, "step": 8879 }, { "epoch": 0.8241299303944316, "grad_norm": 48.01167297363281, "learning_rate": 8.229357665168791e-07, "loss": 23.5059, "step": 8880 }, { "epoch": 0.8242227378190256, "grad_norm": 48.808082580566406, "learning_rate": 8.220925227658922e-07, "loss": 21.5112, "step": 8881 }, { "epoch": 0.8243155452436195, "grad_norm": 48.04280471801758, "learning_rate": 8.212496725631702e-07, "loss": 23.852, "step": 8882 }, { "epoch": 0.8244083526682134, "grad_norm": 47.87946701049805, "learning_rate": 8.204072159881127e-07, "loss": 24.9965, "step": 8883 }, { "epoch": 0.8245011600928074, "grad_norm": 52.153743743896484, "learning_rate": 8.195651531200743e-07, "loss": 23.5346, "step": 8884 }, { "epoch": 0.8245939675174014, "grad_norm": 62.96525192260742, "learning_rate": 8.187234840383762e-07, "loss": 22.5543, "step": 8885 }, { "epoch": 0.8246867749419954, "grad_norm": 66.91331481933594, "learning_rate": 8.178822088222992e-07, "loss": 24.0771, "step": 8886 }, { "epoch": 0.8247795823665893, "grad_norm": 56.54151916503906, "learning_rate": 8.17041327551093e-07, "loss": 23.3877, "step": 8887 }, { "epoch": 0.8248723897911833, "grad_norm": 48.587886810302734, "learning_rate": 8.162008403039645e-07, "loss": 22.0666, "step": 8888 }, { "epoch": 0.8249651972157772, "grad_norm": 65.46841430664062, "learning_rate": 8.15360747160086e-07, "loss": 22.4746, "step": 8889 }, { "epoch": 0.8250580046403713, "grad_norm": 73.94837188720703, "learning_rate": 8.145210481985915e-07, "loss": 23.9541, "step": 8890 }, { "epoch": 0.8251508120649652, "grad_norm": 50.472808837890625, "learning_rate": 8.136817434985772e-07, "loss": 24.0161, "step": 8891 }, { "epoch": 0.8252436194895592, "grad_norm": 51.286746978759766, "learning_rate": 8.12842833139107e-07, "loss": 24.7183, "step": 8892 }, { "epoch": 0.8253364269141531, "grad_norm": 47.31365203857422, "learning_rate": 8.120043171992015e-07, "loss": 23.5749, "step": 8893 }, { "epoch": 0.8254292343387472, "grad_norm": 46.36897277832031, "learning_rate": 8.111661957578476e-07, "loss": 23.2212, "step": 8894 }, { "epoch": 0.8255220417633411, "grad_norm": 51.58533477783203, "learning_rate": 8.103284688939944e-07, "loss": 23.3964, "step": 8895 }, { "epoch": 0.825614849187935, "grad_norm": 47.249237060546875, "learning_rate": 8.094911366865527e-07, "loss": 22.3368, "step": 8896 }, { "epoch": 0.825707656612529, "grad_norm": 62.38105010986328, "learning_rate": 8.08654199214397e-07, "loss": 22.6098, "step": 8897 }, { "epoch": 0.8258004640371229, "grad_norm": 45.03770446777344, "learning_rate": 8.078176565563661e-07, "loss": 21.6532, "step": 8898 }, { "epoch": 0.825893271461717, "grad_norm": 40.9788703918457, "learning_rate": 8.069815087912596e-07, "loss": 21.7751, "step": 8899 }, { "epoch": 0.8259860788863109, "grad_norm": 47.73257064819336, "learning_rate": 8.061457559978402e-07, "loss": 22.6194, "step": 8900 }, { "epoch": 0.8260788863109049, "grad_norm": 45.00977325439453, "learning_rate": 8.053103982548332e-07, "loss": 21.6545, "step": 8901 }, { "epoch": 0.8261716937354988, "grad_norm": 49.327964782714844, "learning_rate": 8.044754356409295e-07, "loss": 22.3355, "step": 8902 }, { "epoch": 0.8262645011600928, "grad_norm": 53.331321716308594, "learning_rate": 8.036408682347768e-07, "loss": 23.0278, "step": 8903 }, { "epoch": 0.8263573085846868, "grad_norm": 45.751739501953125, "learning_rate": 8.028066961149921e-07, "loss": 20.5686, "step": 8904 }, { "epoch": 0.8264501160092808, "grad_norm": 54.34896469116211, "learning_rate": 8.019729193601517e-07, "loss": 23.4987, "step": 8905 }, { "epoch": 0.8265429234338747, "grad_norm": 56.35228729248047, "learning_rate": 8.011395380487946e-07, "loss": 23.9857, "step": 8906 }, { "epoch": 0.8266357308584686, "grad_norm": 46.50550842285156, "learning_rate": 8.003065522594227e-07, "loss": 21.6022, "step": 8907 }, { "epoch": 0.8267285382830627, "grad_norm": 42.902442932128906, "learning_rate": 7.994739620705039e-07, "loss": 20.6924, "step": 8908 }, { "epoch": 0.8268213457076566, "grad_norm": 48.06761932373047, "learning_rate": 7.986417675604624e-07, "loss": 22.2539, "step": 8909 }, { "epoch": 0.8269141531322506, "grad_norm": 54.1999397277832, "learning_rate": 7.978099688076912e-07, "loss": 24.414, "step": 8910 }, { "epoch": 0.8270069605568445, "grad_norm": 45.979347229003906, "learning_rate": 7.969785658905432e-07, "loss": 21.4184, "step": 8911 }, { "epoch": 0.8270997679814385, "grad_norm": 61.6815071105957, "learning_rate": 7.961475588873341e-07, "loss": 26.2697, "step": 8912 }, { "epoch": 0.8271925754060325, "grad_norm": 43.0562629699707, "learning_rate": 7.953169478763428e-07, "loss": 22.0839, "step": 8913 }, { "epoch": 0.8272853828306265, "grad_norm": 46.07413864135742, "learning_rate": 7.944867329358108e-07, "loss": 22.5067, "step": 8914 }, { "epoch": 0.8273781902552204, "grad_norm": 47.22595977783203, "learning_rate": 7.93656914143941e-07, "loss": 22.961, "step": 8915 }, { "epoch": 0.8274709976798144, "grad_norm": 44.81443786621094, "learning_rate": 7.928274915789035e-07, "loss": 24.4236, "step": 8916 }, { "epoch": 0.8275638051044083, "grad_norm": 44.347686767578125, "learning_rate": 7.91998465318824e-07, "loss": 22.3077, "step": 8917 }, { "epoch": 0.8276566125290024, "grad_norm": 43.01852798461914, "learning_rate": 7.911698354417968e-07, "loss": 23.0065, "step": 8918 }, { "epoch": 0.8277494199535963, "grad_norm": 42.40456008911133, "learning_rate": 7.90341602025877e-07, "loss": 22.0397, "step": 8919 }, { "epoch": 0.8278422273781902, "grad_norm": 56.25163650512695, "learning_rate": 7.895137651490808e-07, "loss": 22.9835, "step": 8920 }, { "epoch": 0.8279350348027842, "grad_norm": 46.40983581542969, "learning_rate": 7.886863248893878e-07, "loss": 22.8714, "step": 8921 }, { "epoch": 0.8280278422273782, "grad_norm": 50.30448913574219, "learning_rate": 7.878592813247443e-07, "loss": 23.1405, "step": 8922 }, { "epoch": 0.8281206496519722, "grad_norm": 45.365787506103516, "learning_rate": 7.870326345330509e-07, "loss": 22.5143, "step": 8923 }, { "epoch": 0.8282134570765661, "grad_norm": 43.726356506347656, "learning_rate": 7.862063845921791e-07, "loss": 23.6269, "step": 8924 }, { "epoch": 0.8283062645011601, "grad_norm": 66.48937225341797, "learning_rate": 7.853805315799584e-07, "loss": 22.2502, "step": 8925 }, { "epoch": 0.828399071925754, "grad_norm": 61.03458023071289, "learning_rate": 7.845550755741821e-07, "loss": 22.86, "step": 8926 }, { "epoch": 0.8284918793503481, "grad_norm": 47.9559326171875, "learning_rate": 7.837300166526052e-07, "loss": 23.4011, "step": 8927 }, { "epoch": 0.828584686774942, "grad_norm": 38.53911590576172, "learning_rate": 7.829053548929488e-07, "loss": 22.6353, "step": 8928 }, { "epoch": 0.828677494199536, "grad_norm": 49.324920654296875, "learning_rate": 7.820810903728904e-07, "loss": 22.866, "step": 8929 }, { "epoch": 0.8287703016241299, "grad_norm": 65.88811492919922, "learning_rate": 7.81257223170076e-07, "loss": 23.6876, "step": 8930 }, { "epoch": 0.8288631090487238, "grad_norm": 42.65113830566406, "learning_rate": 7.804337533621115e-07, "loss": 24.2606, "step": 8931 }, { "epoch": 0.8289559164733179, "grad_norm": 47.22065353393555, "learning_rate": 7.796106810265652e-07, "loss": 23.3413, "step": 8932 }, { "epoch": 0.8290487238979118, "grad_norm": 43.36579895019531, "learning_rate": 7.787880062409675e-07, "loss": 22.3071, "step": 8933 }, { "epoch": 0.8291415313225058, "grad_norm": 50.29561233520508, "learning_rate": 7.779657290828146e-07, "loss": 22.2982, "step": 8934 }, { "epoch": 0.8292343387470997, "grad_norm": 48.85327911376953, "learning_rate": 7.771438496295613e-07, "loss": 25.3981, "step": 8935 }, { "epoch": 0.8293271461716938, "grad_norm": 47.11534118652344, "learning_rate": 7.763223679586273e-07, "loss": 21.3538, "step": 8936 }, { "epoch": 0.8294199535962877, "grad_norm": 44.08624267578125, "learning_rate": 7.755012841473936e-07, "loss": 21.5447, "step": 8937 }, { "epoch": 0.8295127610208817, "grad_norm": 41.04391860961914, "learning_rate": 7.746805982732031e-07, "loss": 22.0852, "step": 8938 }, { "epoch": 0.8296055684454756, "grad_norm": 41.03662109375, "learning_rate": 7.738603104133646e-07, "loss": 23.9116, "step": 8939 }, { "epoch": 0.8296983758700696, "grad_norm": 42.37323760986328, "learning_rate": 7.730404206451459e-07, "loss": 23.4461, "step": 8940 }, { "epoch": 0.8297911832946636, "grad_norm": 46.54389190673828, "learning_rate": 7.722209290457788e-07, "loss": 25.0137, "step": 8941 }, { "epoch": 0.8298839907192576, "grad_norm": 40.06513977050781, "learning_rate": 7.714018356924574e-07, "loss": 22.4199, "step": 8942 }, { "epoch": 0.8299767981438515, "grad_norm": 54.64662170410156, "learning_rate": 7.705831406623382e-07, "loss": 22.8285, "step": 8943 }, { "epoch": 0.8300696055684454, "grad_norm": 47.97248077392578, "learning_rate": 7.697648440325383e-07, "loss": 22.4474, "step": 8944 }, { "epoch": 0.8301624129930394, "grad_norm": 46.16348648071289, "learning_rate": 7.689469458801424e-07, "loss": 22.6128, "step": 8945 }, { "epoch": 0.8302552204176334, "grad_norm": 52.53805160522461, "learning_rate": 7.681294462821925e-07, "loss": 22.8504, "step": 8946 }, { "epoch": 0.8303480278422274, "grad_norm": 46.49330520629883, "learning_rate": 7.673123453156961e-07, "loss": 22.4002, "step": 8947 }, { "epoch": 0.8304408352668213, "grad_norm": 60.76664352416992, "learning_rate": 7.66495643057621e-07, "loss": 21.3912, "step": 8948 }, { "epoch": 0.8305336426914153, "grad_norm": 54.27738952636719, "learning_rate": 7.656793395848982e-07, "loss": 21.7389, "step": 8949 }, { "epoch": 0.8306264501160093, "grad_norm": 47.75255584716797, "learning_rate": 7.648634349744216e-07, "loss": 22.5278, "step": 8950 }, { "epoch": 0.8307192575406033, "grad_norm": 40.54618835449219, "learning_rate": 7.640479293030501e-07, "loss": 22.6319, "step": 8951 }, { "epoch": 0.8308120649651972, "grad_norm": 48.0165901184082, "learning_rate": 7.632328226475971e-07, "loss": 20.8176, "step": 8952 }, { "epoch": 0.8309048723897912, "grad_norm": 42.9343147277832, "learning_rate": 7.624181150848481e-07, "loss": 22.9318, "step": 8953 }, { "epoch": 0.8309976798143851, "grad_norm": 47.962894439697266, "learning_rate": 7.616038066915444e-07, "loss": 22.0067, "step": 8954 }, { "epoch": 0.8310904872389792, "grad_norm": 39.46397018432617, "learning_rate": 7.607898975443923e-07, "loss": 24.6785, "step": 8955 }, { "epoch": 0.8311832946635731, "grad_norm": 39.9678840637207, "learning_rate": 7.599763877200583e-07, "loss": 23.8431, "step": 8956 }, { "epoch": 0.831276102088167, "grad_norm": 46.27312088012695, "learning_rate": 7.591632772951774e-07, "loss": 24.3595, "step": 8957 }, { "epoch": 0.831368909512761, "grad_norm": 46.22541046142578, "learning_rate": 7.58350566346337e-07, "loss": 22.8868, "step": 8958 }, { "epoch": 0.8314617169373549, "grad_norm": 41.400821685791016, "learning_rate": 7.575382549500964e-07, "loss": 24.1827, "step": 8959 }, { "epoch": 0.831554524361949, "grad_norm": 39.71821212768555, "learning_rate": 7.567263431829719e-07, "loss": 23.472, "step": 8960 }, { "epoch": 0.8316473317865429, "grad_norm": 45.78053665161133, "learning_rate": 7.559148311214432e-07, "loss": 24.3141, "step": 8961 }, { "epoch": 0.8317401392111369, "grad_norm": 42.547821044921875, "learning_rate": 7.551037188419519e-07, "loss": 22.1294, "step": 8962 }, { "epoch": 0.8318329466357308, "grad_norm": 44.495948791503906, "learning_rate": 7.542930064209064e-07, "loss": 23.2516, "step": 8963 }, { "epoch": 0.8319257540603249, "grad_norm": 49.81782913208008, "learning_rate": 7.53482693934669e-07, "loss": 24.1729, "step": 8964 }, { "epoch": 0.8320185614849188, "grad_norm": 46.3390998840332, "learning_rate": 7.526727814595719e-07, "loss": 24.9535, "step": 8965 }, { "epoch": 0.8321113689095128, "grad_norm": 52.95983123779297, "learning_rate": 7.518632690719063e-07, "loss": 21.692, "step": 8966 }, { "epoch": 0.8322041763341067, "grad_norm": 50.29143142700195, "learning_rate": 7.510541568479263e-07, "loss": 22.4079, "step": 8967 }, { "epoch": 0.8322969837587006, "grad_norm": 49.75962448120117, "learning_rate": 7.502454448638469e-07, "loss": 21.8996, "step": 8968 }, { "epoch": 0.8323897911832947, "grad_norm": 55.11880111694336, "learning_rate": 7.494371331958495e-07, "loss": 22.0215, "step": 8969 }, { "epoch": 0.8324825986078886, "grad_norm": 45.750457763671875, "learning_rate": 7.486292219200714e-07, "loss": 23.1687, "step": 8970 }, { "epoch": 0.8325754060324826, "grad_norm": 41.89622497558594, "learning_rate": 7.47821711112619e-07, "loss": 22.8879, "step": 8971 }, { "epoch": 0.8326682134570765, "grad_norm": 41.77143859863281, "learning_rate": 7.47014600849556e-07, "loss": 23.3505, "step": 8972 }, { "epoch": 0.8327610208816706, "grad_norm": 49.96198654174805, "learning_rate": 7.46207891206911e-07, "loss": 22.9562, "step": 8973 }, { "epoch": 0.8328538283062645, "grad_norm": 48.398075103759766, "learning_rate": 7.45401582260672e-07, "loss": 23.6845, "step": 8974 }, { "epoch": 0.8329466357308585, "grad_norm": 47.30657958984375, "learning_rate": 7.445956740867955e-07, "loss": 23.0031, "step": 8975 }, { "epoch": 0.8330394431554524, "grad_norm": 43.68507385253906, "learning_rate": 7.437901667611908e-07, "loss": 24.2753, "step": 8976 }, { "epoch": 0.8331322505800464, "grad_norm": 59.033023834228516, "learning_rate": 7.429850603597383e-07, "loss": 22.3165, "step": 8977 }, { "epoch": 0.8332250580046404, "grad_norm": 53.46031188964844, "learning_rate": 7.421803549582763e-07, "loss": 25.1072, "step": 8978 }, { "epoch": 0.8333178654292344, "grad_norm": 51.13470458984375, "learning_rate": 7.413760506326051e-07, "loss": 23.8229, "step": 8979 }, { "epoch": 0.8334106728538283, "grad_norm": 37.37314987182617, "learning_rate": 7.405721474584876e-07, "loss": 23.471, "step": 8980 }, { "epoch": 0.8335034802784222, "grad_norm": 38.92497253417969, "learning_rate": 7.397686455116515e-07, "loss": 22.3348, "step": 8981 }, { "epoch": 0.8335962877030162, "grad_norm": 47.69047546386719, "learning_rate": 7.389655448677834e-07, "loss": 22.2707, "step": 8982 }, { "epoch": 0.8336890951276102, "grad_norm": 47.867855072021484, "learning_rate": 7.381628456025336e-07, "loss": 21.7131, "step": 8983 }, { "epoch": 0.8337819025522042, "grad_norm": 42.62386703491211, "learning_rate": 7.373605477915141e-07, "loss": 22.5004, "step": 8984 }, { "epoch": 0.8338747099767981, "grad_norm": 80.17161560058594, "learning_rate": 7.365586515102985e-07, "loss": 21.2373, "step": 8985 }, { "epoch": 0.8339675174013921, "grad_norm": 46.953067779541016, "learning_rate": 7.357571568344268e-07, "loss": 22.055, "step": 8986 }, { "epoch": 0.8340603248259861, "grad_norm": 58.44902801513672, "learning_rate": 7.349560638393926e-07, "loss": 22.6314, "step": 8987 }, { "epoch": 0.8341531322505801, "grad_norm": 50.446083068847656, "learning_rate": 7.341553726006611e-07, "loss": 23.7606, "step": 8988 }, { "epoch": 0.834245939675174, "grad_norm": 50.44229507446289, "learning_rate": 7.333550831936537e-07, "loss": 23.1114, "step": 8989 }, { "epoch": 0.834338747099768, "grad_norm": 44.5272102355957, "learning_rate": 7.32555195693756e-07, "loss": 22.6773, "step": 8990 }, { "epoch": 0.8344315545243619, "grad_norm": 57.35908126831055, "learning_rate": 7.317557101763134e-07, "loss": 25.5855, "step": 8991 }, { "epoch": 0.834524361948956, "grad_norm": 55.715240478515625, "learning_rate": 7.309566267166396e-07, "loss": 23.3282, "step": 8992 }, { "epoch": 0.8346171693735499, "grad_norm": 50.43999099731445, "learning_rate": 7.301579453900015e-07, "loss": 24.0617, "step": 8993 }, { "epoch": 0.8347099767981438, "grad_norm": 49.79038619995117, "learning_rate": 7.293596662716362e-07, "loss": 22.9824, "step": 8994 }, { "epoch": 0.8348027842227378, "grad_norm": 44.28499984741211, "learning_rate": 7.285617894367386e-07, "loss": 22.8483, "step": 8995 }, { "epoch": 0.8348955916473317, "grad_norm": 44.55655288696289, "learning_rate": 7.277643149604663e-07, "loss": 24.7285, "step": 8996 }, { "epoch": 0.8349883990719258, "grad_norm": 53.0625114440918, "learning_rate": 7.269672429179387e-07, "loss": 21.9088, "step": 8997 }, { "epoch": 0.8350812064965197, "grad_norm": 57.24496078491211, "learning_rate": 7.261705733842406e-07, "loss": 25.9089, "step": 8998 }, { "epoch": 0.8351740139211137, "grad_norm": 43.02980041503906, "learning_rate": 7.253743064344126e-07, "loss": 21.724, "step": 8999 }, { "epoch": 0.8352668213457076, "grad_norm": 54.3087158203125, "learning_rate": 7.245784421434643e-07, "loss": 24.6733, "step": 9000 }, { "epoch": 0.8353596287703017, "grad_norm": 45.92677307128906, "learning_rate": 7.237829805863622e-07, "loss": 23.7595, "step": 9001 }, { "epoch": 0.8354524361948956, "grad_norm": 47.3524055480957, "learning_rate": 7.229879218380376e-07, "loss": 20.4433, "step": 9002 }, { "epoch": 0.8355452436194896, "grad_norm": 45.68354415893555, "learning_rate": 7.221932659733815e-07, "loss": 22.7434, "step": 9003 }, { "epoch": 0.8356380510440835, "grad_norm": 57.52254867553711, "learning_rate": 7.213990130672521e-07, "loss": 22.0303, "step": 9004 }, { "epoch": 0.8357308584686775, "grad_norm": 46.56045913696289, "learning_rate": 7.206051631944611e-07, "loss": 22.1131, "step": 9005 }, { "epoch": 0.8358236658932715, "grad_norm": 40.924964904785156, "learning_rate": 7.198117164297908e-07, "loss": 21.9939, "step": 9006 }, { "epoch": 0.8359164733178654, "grad_norm": 44.42767333984375, "learning_rate": 7.190186728479803e-07, "loss": 23.0028, "step": 9007 }, { "epoch": 0.8360092807424594, "grad_norm": 44.75509262084961, "learning_rate": 7.182260325237333e-07, "loss": 23.6667, "step": 9008 }, { "epoch": 0.8361020881670533, "grad_norm": 42.68733596801758, "learning_rate": 7.174337955317123e-07, "loss": 22.7988, "step": 9009 }, { "epoch": 0.8361948955916473, "grad_norm": 55.40569305419922, "learning_rate": 7.166419619465481e-07, "loss": 25.243, "step": 9010 }, { "epoch": 0.8362877030162413, "grad_norm": 47.03751754760742, "learning_rate": 7.158505318428244e-07, "loss": 23.6366, "step": 9011 }, { "epoch": 0.8363805104408353, "grad_norm": 46.404136657714844, "learning_rate": 7.150595052950954e-07, "loss": 23.015, "step": 9012 }, { "epoch": 0.8364733178654292, "grad_norm": 42.716636657714844, "learning_rate": 7.142688823778732e-07, "loss": 21.917, "step": 9013 }, { "epoch": 0.8365661252900232, "grad_norm": 50.09954833984375, "learning_rate": 7.134786631656315e-07, "loss": 23.735, "step": 9014 }, { "epoch": 0.8366589327146172, "grad_norm": 40.513275146484375, "learning_rate": 7.12688847732807e-07, "loss": 23.4959, "step": 9015 }, { "epoch": 0.8367517401392112, "grad_norm": 53.14872741699219, "learning_rate": 7.118994361538006e-07, "loss": 23.0882, "step": 9016 }, { "epoch": 0.8368445475638051, "grad_norm": 50.85881423950195, "learning_rate": 7.111104285029691e-07, "loss": 24.5047, "step": 9017 }, { "epoch": 0.836937354988399, "grad_norm": 51.41965866088867, "learning_rate": 7.103218248546379e-07, "loss": 22.9557, "step": 9018 }, { "epoch": 0.837030162412993, "grad_norm": 40.74098587036133, "learning_rate": 7.095336252830903e-07, "loss": 21.8672, "step": 9019 }, { "epoch": 0.837122969837587, "grad_norm": 40.0436897277832, "learning_rate": 7.087458298625732e-07, "loss": 24.5509, "step": 9020 }, { "epoch": 0.837215777262181, "grad_norm": 55.42299270629883, "learning_rate": 7.079584386672949e-07, "loss": 24.1099, "step": 9021 }, { "epoch": 0.8373085846867749, "grad_norm": 52.362152099609375, "learning_rate": 7.071714517714251e-07, "loss": 22.5666, "step": 9022 }, { "epoch": 0.8374013921113689, "grad_norm": 50.40076446533203, "learning_rate": 7.063848692490954e-07, "loss": 22.4029, "step": 9023 }, { "epoch": 0.8374941995359628, "grad_norm": 49.26288986206055, "learning_rate": 7.055986911744017e-07, "loss": 24.6354, "step": 9024 }, { "epoch": 0.8375870069605569, "grad_norm": 48.81089782714844, "learning_rate": 7.048129176213991e-07, "loss": 23.4059, "step": 9025 }, { "epoch": 0.8376798143851508, "grad_norm": 71.10871124267578, "learning_rate": 7.040275486641052e-07, "loss": 21.963, "step": 9026 }, { "epoch": 0.8377726218097448, "grad_norm": 49.59476852416992, "learning_rate": 7.032425843765006e-07, "loss": 23.2896, "step": 9027 }, { "epoch": 0.8378654292343387, "grad_norm": 40.2073860168457, "learning_rate": 7.024580248325247e-07, "loss": 21.2967, "step": 9028 }, { "epoch": 0.8379582366589328, "grad_norm": 51.88953399658203, "learning_rate": 7.016738701060837e-07, "loss": 21.2785, "step": 9029 }, { "epoch": 0.8380510440835267, "grad_norm": 74.12690734863281, "learning_rate": 7.008901202710416e-07, "loss": 23.9543, "step": 9030 }, { "epoch": 0.8381438515081207, "grad_norm": 47.71806335449219, "learning_rate": 7.001067754012264e-07, "loss": 24.1572, "step": 9031 }, { "epoch": 0.8382366589327146, "grad_norm": 42.26490020751953, "learning_rate": 6.993238355704257e-07, "loss": 21.2014, "step": 9032 }, { "epoch": 0.8383294663573085, "grad_norm": 66.87417602539062, "learning_rate": 6.985413008523934e-07, "loss": 21.9073, "step": 9033 }, { "epoch": 0.8384222737819026, "grad_norm": 52.063087463378906, "learning_rate": 6.977591713208387e-07, "loss": 22.3142, "step": 9034 }, { "epoch": 0.8385150812064965, "grad_norm": 49.896209716796875, "learning_rate": 6.969774470494383e-07, "loss": 23.2412, "step": 9035 }, { "epoch": 0.8386078886310905, "grad_norm": 41.76824188232422, "learning_rate": 6.961961281118285e-07, "loss": 25.472, "step": 9036 }, { "epoch": 0.8387006960556844, "grad_norm": 52.58428955078125, "learning_rate": 6.954152145816073e-07, "loss": 21.7979, "step": 9037 }, { "epoch": 0.8387935034802784, "grad_norm": 49.87070083618164, "learning_rate": 6.946347065323339e-07, "loss": 24.2623, "step": 9038 }, { "epoch": 0.8388863109048724, "grad_norm": 47.680076599121094, "learning_rate": 6.938546040375327e-07, "loss": 21.582, "step": 9039 }, { "epoch": 0.8389791183294664, "grad_norm": 46.45832443237305, "learning_rate": 6.930749071706838e-07, "loss": 23.9302, "step": 9040 }, { "epoch": 0.8390719257540603, "grad_norm": 56.071502685546875, "learning_rate": 6.922956160052357e-07, "loss": 26.4238, "step": 9041 }, { "epoch": 0.8391647331786543, "grad_norm": 49.60708999633789, "learning_rate": 6.915167306145943e-07, "loss": 22.6476, "step": 9042 }, { "epoch": 0.8392575406032483, "grad_norm": 51.144195556640625, "learning_rate": 6.907382510721288e-07, "loss": 25.3467, "step": 9043 }, { "epoch": 0.8393503480278423, "grad_norm": 47.53609085083008, "learning_rate": 6.899601774511694e-07, "loss": 22.3134, "step": 9044 }, { "epoch": 0.8394431554524362, "grad_norm": 46.17481231689453, "learning_rate": 6.89182509825011e-07, "loss": 22.1351, "step": 9045 }, { "epoch": 0.8395359628770301, "grad_norm": 42.383609771728516, "learning_rate": 6.884052482669046e-07, "loss": 22.5983, "step": 9046 }, { "epoch": 0.8396287703016241, "grad_norm": 57.43461990356445, "learning_rate": 6.876283928500683e-07, "loss": 22.5958, "step": 9047 }, { "epoch": 0.8397215777262181, "grad_norm": 60.79397201538086, "learning_rate": 6.868519436476795e-07, "loss": 22.5951, "step": 9048 }, { "epoch": 0.8398143851508121, "grad_norm": 47.96572494506836, "learning_rate": 6.860759007328782e-07, "loss": 22.2526, "step": 9049 }, { "epoch": 0.839907192575406, "grad_norm": 44.8883056640625, "learning_rate": 6.853002641787637e-07, "loss": 23.5914, "step": 9050 }, { "epoch": 0.84, "grad_norm": 53.964290618896484, "learning_rate": 6.845250340584031e-07, "loss": 21.3285, "step": 9051 }, { "epoch": 0.8400928074245939, "grad_norm": 67.54584503173828, "learning_rate": 6.837502104448157e-07, "loss": 21.0007, "step": 9052 }, { "epoch": 0.840185614849188, "grad_norm": 57.767765045166016, "learning_rate": 6.829757934109932e-07, "loss": 23.8691, "step": 9053 }, { "epoch": 0.8402784222737819, "grad_norm": 44.32430648803711, "learning_rate": 6.822017830298788e-07, "loss": 23.2299, "step": 9054 }, { "epoch": 0.8403712296983759, "grad_norm": 48.54935073852539, "learning_rate": 6.81428179374386e-07, "loss": 21.9502, "step": 9055 }, { "epoch": 0.8404640371229698, "grad_norm": 50.79725646972656, "learning_rate": 6.806549825173853e-07, "loss": 20.8988, "step": 9056 }, { "epoch": 0.8405568445475639, "grad_norm": 59.42207336425781, "learning_rate": 6.798821925317095e-07, "loss": 23.5523, "step": 9057 }, { "epoch": 0.8406496519721578, "grad_norm": 48.63291549682617, "learning_rate": 6.791098094901521e-07, "loss": 23.0069, "step": 9058 }, { "epoch": 0.8407424593967517, "grad_norm": 51.04400634765625, "learning_rate": 6.783378334654733e-07, "loss": 19.6134, "step": 9059 }, { "epoch": 0.8408352668213457, "grad_norm": 47.49065017700195, "learning_rate": 6.775662645303871e-07, "loss": 21.2811, "step": 9060 }, { "epoch": 0.8409280742459396, "grad_norm": 59.52553939819336, "learning_rate": 6.767951027575765e-07, "loss": 22.5616, "step": 9061 }, { "epoch": 0.8410208816705337, "grad_norm": 41.48929214477539, "learning_rate": 6.760243482196816e-07, "loss": 22.5798, "step": 9062 }, { "epoch": 0.8411136890951276, "grad_norm": 45.71079635620117, "learning_rate": 6.752540009893055e-07, "loss": 23.095, "step": 9063 }, { "epoch": 0.8412064965197216, "grad_norm": 50.591312408447266, "learning_rate": 6.744840611390125e-07, "loss": 22.4862, "step": 9064 }, { "epoch": 0.8412993039443155, "grad_norm": 45.395233154296875, "learning_rate": 6.737145287413305e-07, "loss": 22.5835, "step": 9065 }, { "epoch": 0.8413921113689096, "grad_norm": 41.75109100341797, "learning_rate": 6.729454038687461e-07, "loss": 23.9197, "step": 9066 }, { "epoch": 0.8414849187935035, "grad_norm": 43.776214599609375, "learning_rate": 6.721766865937102e-07, "loss": 23.045, "step": 9067 }, { "epoch": 0.8415777262180975, "grad_norm": 45.709957122802734, "learning_rate": 6.714083769886325e-07, "loss": 21.8506, "step": 9068 }, { "epoch": 0.8416705336426914, "grad_norm": 43.75277328491211, "learning_rate": 6.706404751258872e-07, "loss": 22.3228, "step": 9069 }, { "epoch": 0.8417633410672853, "grad_norm": 55.00334167480469, "learning_rate": 6.698729810778065e-07, "loss": 22.944, "step": 9070 }, { "epoch": 0.8418561484918794, "grad_norm": 45.78875732421875, "learning_rate": 6.691058949166895e-07, "loss": 22.8741, "step": 9071 }, { "epoch": 0.8419489559164733, "grad_norm": 46.33350372314453, "learning_rate": 6.683392167147917e-07, "loss": 22.9186, "step": 9072 }, { "epoch": 0.8420417633410673, "grad_norm": 101.7928695678711, "learning_rate": 6.675729465443331e-07, "loss": 22.1369, "step": 9073 }, { "epoch": 0.8421345707656612, "grad_norm": 46.603424072265625, "learning_rate": 6.668070844774943e-07, "loss": 22.6905, "step": 9074 }, { "epoch": 0.8422273781902552, "grad_norm": 52.47272491455078, "learning_rate": 6.660416305864159e-07, "loss": 22.0102, "step": 9075 }, { "epoch": 0.8423201856148492, "grad_norm": 52.758270263671875, "learning_rate": 6.652765849432041e-07, "loss": 23.0808, "step": 9076 }, { "epoch": 0.8424129930394432, "grad_norm": 52.773681640625, "learning_rate": 6.64511947619923e-07, "loss": 24.9345, "step": 9077 }, { "epoch": 0.8425058004640371, "grad_norm": 48.5563850402832, "learning_rate": 6.637477186886004e-07, "loss": 23.8219, "step": 9078 }, { "epoch": 0.8425986078886311, "grad_norm": 46.04377365112305, "learning_rate": 6.629838982212227e-07, "loss": 23.1342, "step": 9079 }, { "epoch": 0.8426914153132251, "grad_norm": 67.3409652709961, "learning_rate": 6.622204862897436e-07, "loss": 23.0189, "step": 9080 }, { "epoch": 0.842784222737819, "grad_norm": 56.756980895996094, "learning_rate": 6.614574829660697e-07, "loss": 22.1203, "step": 9081 }, { "epoch": 0.842877030162413, "grad_norm": 45.895172119140625, "learning_rate": 6.606948883220776e-07, "loss": 22.1591, "step": 9082 }, { "epoch": 0.8429698375870069, "grad_norm": 45.08063507080078, "learning_rate": 6.59932702429601e-07, "loss": 22.0381, "step": 9083 }, { "epoch": 0.8430626450116009, "grad_norm": 55.721317291259766, "learning_rate": 6.591709253604356e-07, "loss": 22.9717, "step": 9084 }, { "epoch": 0.8431554524361949, "grad_norm": 50.41217803955078, "learning_rate": 6.584095571863375e-07, "loss": 21.3258, "step": 9085 }, { "epoch": 0.8432482598607889, "grad_norm": 53.69365310668945, "learning_rate": 6.576485979790287e-07, "loss": 21.0692, "step": 9086 }, { "epoch": 0.8433410672853828, "grad_norm": 42.47173309326172, "learning_rate": 6.568880478101863e-07, "loss": 22.1667, "step": 9087 }, { "epoch": 0.8434338747099768, "grad_norm": 55.07345199584961, "learning_rate": 6.561279067514558e-07, "loss": 22.1753, "step": 9088 }, { "epoch": 0.8435266821345707, "grad_norm": 48.62520217895508, "learning_rate": 6.553681748744361e-07, "loss": 20.093, "step": 9089 }, { "epoch": 0.8436194895591648, "grad_norm": 49.57721710205078, "learning_rate": 6.546088522506955e-07, "loss": 23.3943, "step": 9090 }, { "epoch": 0.8437122969837587, "grad_norm": 41.012821197509766, "learning_rate": 6.538499389517594e-07, "loss": 23.9038, "step": 9091 }, { "epoch": 0.8438051044083527, "grad_norm": 57.7396354675293, "learning_rate": 6.530914350491152e-07, "loss": 21.4823, "step": 9092 }, { "epoch": 0.8438979118329466, "grad_norm": 54.43248748779297, "learning_rate": 6.523333406142112e-07, "loss": 23.4566, "step": 9093 }, { "epoch": 0.8439907192575407, "grad_norm": 42.236114501953125, "learning_rate": 6.51575655718461e-07, "loss": 23.3104, "step": 9094 }, { "epoch": 0.8440835266821346, "grad_norm": 38.8353157043457, "learning_rate": 6.508183804332324e-07, "loss": 21.5373, "step": 9095 }, { "epoch": 0.8441763341067285, "grad_norm": 46.026485443115234, "learning_rate": 6.500615148298617e-07, "loss": 23.0338, "step": 9096 }, { "epoch": 0.8442691415313225, "grad_norm": 42.0343132019043, "learning_rate": 6.493050589796435e-07, "loss": 20.4504, "step": 9097 }, { "epoch": 0.8443619489559164, "grad_norm": 40.86143112182617, "learning_rate": 6.48549012953833e-07, "loss": 24.0431, "step": 9098 }, { "epoch": 0.8444547563805105, "grad_norm": 49.752960205078125, "learning_rate": 6.477933768236472e-07, "loss": 24.8724, "step": 9099 }, { "epoch": 0.8445475638051044, "grad_norm": 49.435302734375, "learning_rate": 6.47038150660268e-07, "loss": 23.6065, "step": 9100 }, { "epoch": 0.8446403712296984, "grad_norm": 45.29553985595703, "learning_rate": 6.462833345348323e-07, "loss": 22.3849, "step": 9101 }, { "epoch": 0.8447331786542923, "grad_norm": 42.91513442993164, "learning_rate": 6.455289285184446e-07, "loss": 22.4701, "step": 9102 }, { "epoch": 0.8448259860788863, "grad_norm": 49.019962310791016, "learning_rate": 6.447749326821667e-07, "loss": 22.1248, "step": 9103 }, { "epoch": 0.8449187935034803, "grad_norm": 42.11711120605469, "learning_rate": 6.44021347097023e-07, "loss": 21.7198, "step": 9104 }, { "epoch": 0.8450116009280743, "grad_norm": 49.000205993652344, "learning_rate": 6.432681718339989e-07, "loss": 22.1883, "step": 9105 }, { "epoch": 0.8451044083526682, "grad_norm": 44.11849594116211, "learning_rate": 6.425154069640449e-07, "loss": 21.7203, "step": 9106 }, { "epoch": 0.8451972157772621, "grad_norm": 45.32766342163086, "learning_rate": 6.417630525580642e-07, "loss": 25.5728, "step": 9107 }, { "epoch": 0.8452900232018562, "grad_norm": 43.874488830566406, "learning_rate": 6.410111086869314e-07, "loss": 23.4608, "step": 9108 }, { "epoch": 0.8453828306264501, "grad_norm": 50.256683349609375, "learning_rate": 6.402595754214752e-07, "loss": 24.1602, "step": 9109 }, { "epoch": 0.8454756380510441, "grad_norm": 46.91184616088867, "learning_rate": 6.395084528324885e-07, "loss": 23.4491, "step": 9110 }, { "epoch": 0.845568445475638, "grad_norm": 38.06874465942383, "learning_rate": 6.387577409907247e-07, "loss": 22.2212, "step": 9111 }, { "epoch": 0.845661252900232, "grad_norm": 41.14292907714844, "learning_rate": 6.380074399669007e-07, "loss": 22.4986, "step": 9112 }, { "epoch": 0.845754060324826, "grad_norm": 42.80358123779297, "learning_rate": 6.372575498316913e-07, "loss": 24.0174, "step": 9113 }, { "epoch": 0.84584686774942, "grad_norm": 39.363948822021484, "learning_rate": 6.365080706557352e-07, "loss": 21.4081, "step": 9114 }, { "epoch": 0.8459396751740139, "grad_norm": 55.38364791870117, "learning_rate": 6.357590025096311e-07, "loss": 23.2831, "step": 9115 }, { "epoch": 0.8460324825986079, "grad_norm": 39.44127655029297, "learning_rate": 6.350103454639389e-07, "loss": 23.0417, "step": 9116 }, { "epoch": 0.8461252900232018, "grad_norm": 43.6309928894043, "learning_rate": 6.342620995891797e-07, "loss": 22.3188, "step": 9117 }, { "epoch": 0.8462180974477959, "grad_norm": 40.890411376953125, "learning_rate": 6.335142649558385e-07, "loss": 22.2905, "step": 9118 }, { "epoch": 0.8463109048723898, "grad_norm": 40.14493179321289, "learning_rate": 6.32766841634358e-07, "loss": 23.3759, "step": 9119 }, { "epoch": 0.8464037122969837, "grad_norm": 40.733367919921875, "learning_rate": 6.320198296951435e-07, "loss": 22.3562, "step": 9120 }, { "epoch": 0.8464965197215777, "grad_norm": 56.990196228027344, "learning_rate": 6.312732292085616e-07, "loss": 23.4622, "step": 9121 }, { "epoch": 0.8465893271461717, "grad_norm": 48.11865997314453, "learning_rate": 6.305270402449398e-07, "loss": 24.3502, "step": 9122 }, { "epoch": 0.8466821345707657, "grad_norm": 47.45366668701172, "learning_rate": 6.297812628745686e-07, "loss": 23.0761, "step": 9123 }, { "epoch": 0.8467749419953596, "grad_norm": 49.83107376098633, "learning_rate": 6.290358971676974e-07, "loss": 26.21, "step": 9124 }, { "epoch": 0.8468677494199536, "grad_norm": 52.61568069458008, "learning_rate": 6.282909431945378e-07, "loss": 22.0372, "step": 9125 }, { "epoch": 0.8469605568445475, "grad_norm": 46.48454284667969, "learning_rate": 6.27546401025263e-07, "loss": 22.7574, "step": 9126 }, { "epoch": 0.8470533642691416, "grad_norm": 44.315738677978516, "learning_rate": 6.268022707300064e-07, "loss": 23.1867, "step": 9127 }, { "epoch": 0.8471461716937355, "grad_norm": 52.291160583496094, "learning_rate": 6.260585523788626e-07, "loss": 23.1853, "step": 9128 }, { "epoch": 0.8472389791183295, "grad_norm": 53.26505661010742, "learning_rate": 6.25315246041891e-07, "loss": 23.3202, "step": 9129 }, { "epoch": 0.8473317865429234, "grad_norm": 44.73221969604492, "learning_rate": 6.245723517891045e-07, "loss": 20.8893, "step": 9130 }, { "epoch": 0.8474245939675173, "grad_norm": 36.90519332885742, "learning_rate": 6.238298696904854e-07, "loss": 20.5473, "step": 9131 }, { "epoch": 0.8475174013921114, "grad_norm": 52.72863006591797, "learning_rate": 6.230877998159724e-07, "loss": 23.4877, "step": 9132 }, { "epoch": 0.8476102088167053, "grad_norm": 49.47641372680664, "learning_rate": 6.22346142235467e-07, "loss": 23.6788, "step": 9133 }, { "epoch": 0.8477030162412993, "grad_norm": 44.808780670166016, "learning_rate": 6.216048970188304e-07, "loss": 21.8461, "step": 9134 }, { "epoch": 0.8477958236658932, "grad_norm": 49.81590270996094, "learning_rate": 6.208640642358882e-07, "loss": 20.4257, "step": 9135 }, { "epoch": 0.8478886310904873, "grad_norm": 50.80873107910156, "learning_rate": 6.201236439564218e-07, "loss": 22.8416, "step": 9136 }, { "epoch": 0.8479814385150812, "grad_norm": 48.7307243347168, "learning_rate": 6.193836362501798e-07, "loss": 21.5866, "step": 9137 }, { "epoch": 0.8480742459396752, "grad_norm": 43.99540328979492, "learning_rate": 6.186440411868683e-07, "loss": 25.4708, "step": 9138 }, { "epoch": 0.8481670533642691, "grad_norm": 45.50382995605469, "learning_rate": 6.179048588361542e-07, "loss": 21.9768, "step": 9139 }, { "epoch": 0.8482598607888631, "grad_norm": 54.720428466796875, "learning_rate": 6.171660892676668e-07, "loss": 25.1135, "step": 9140 }, { "epoch": 0.8483526682134571, "grad_norm": 46.077545166015625, "learning_rate": 6.164277325509988e-07, "loss": 22.1109, "step": 9141 }, { "epoch": 0.8484454756380511, "grad_norm": 76.07791137695312, "learning_rate": 6.156897887556973e-07, "loss": 22.9725, "step": 9142 }, { "epoch": 0.848538283062645, "grad_norm": 41.59626770019531, "learning_rate": 6.149522579512779e-07, "loss": 22.0518, "step": 9143 }, { "epoch": 0.848631090487239, "grad_norm": 46.14698791503906, "learning_rate": 6.142151402072133e-07, "loss": 22.1161, "step": 9144 }, { "epoch": 0.8487238979118329, "grad_norm": 43.92464065551758, "learning_rate": 6.134784355929385e-07, "loss": 23.757, "step": 9145 }, { "epoch": 0.848816705336427, "grad_norm": 52.240013122558594, "learning_rate": 6.127421441778469e-07, "loss": 25.0956, "step": 9146 }, { "epoch": 0.8489095127610209, "grad_norm": 48.92861557006836, "learning_rate": 6.120062660312998e-07, "loss": 21.979, "step": 9147 }, { "epoch": 0.8490023201856148, "grad_norm": 45.02134323120117, "learning_rate": 6.112708012226098e-07, "loss": 22.1563, "step": 9148 }, { "epoch": 0.8490951276102088, "grad_norm": 52.428958892822266, "learning_rate": 6.105357498210602e-07, "loss": 21.4849, "step": 9149 }, { "epoch": 0.8491879350348028, "grad_norm": 53.658512115478516, "learning_rate": 6.098011118958885e-07, "loss": 21.0256, "step": 9150 }, { "epoch": 0.8492807424593968, "grad_norm": 42.865684509277344, "learning_rate": 6.090668875162964e-07, "loss": 21.02, "step": 9151 }, { "epoch": 0.8493735498839907, "grad_norm": 46.63743209838867, "learning_rate": 6.08333076751445e-07, "loss": 24.4172, "step": 9152 }, { "epoch": 0.8494663573085847, "grad_norm": 46.25678634643555, "learning_rate": 6.075996796704603e-07, "loss": 26.3079, "step": 9153 }, { "epoch": 0.8495591647331786, "grad_norm": 45.94027328491211, "learning_rate": 6.068666963424225e-07, "loss": 22.3535, "step": 9154 }, { "epoch": 0.8496519721577727, "grad_norm": 46.63081741333008, "learning_rate": 6.061341268363802e-07, "loss": 22.7216, "step": 9155 }, { "epoch": 0.8497447795823666, "grad_norm": 42.43733596801758, "learning_rate": 6.054019712213377e-07, "loss": 23.4624, "step": 9156 }, { "epoch": 0.8498375870069605, "grad_norm": 42.01472854614258, "learning_rate": 6.046702295662626e-07, "loss": 22.7545, "step": 9157 }, { "epoch": 0.8499303944315545, "grad_norm": 49.85673141479492, "learning_rate": 6.039389019400821e-07, "loss": 23.2729, "step": 9158 }, { "epoch": 0.8500232018561485, "grad_norm": 44.30947494506836, "learning_rate": 6.032079884116876e-07, "loss": 22.0774, "step": 9159 }, { "epoch": 0.8501160092807425, "grad_norm": 42.624149322509766, "learning_rate": 6.024774890499285e-07, "loss": 22.842, "step": 9160 }, { "epoch": 0.8502088167053364, "grad_norm": 49.07706069946289, "learning_rate": 6.017474039236154e-07, "loss": 23.6792, "step": 9161 }, { "epoch": 0.8503016241299304, "grad_norm": 42.80832290649414, "learning_rate": 6.010177331015205e-07, "loss": 22.5417, "step": 9162 }, { "epoch": 0.8503944315545243, "grad_norm": 51.87101745605469, "learning_rate": 6.002884766523776e-07, "loss": 24.1461, "step": 9163 }, { "epoch": 0.8504872389791184, "grad_norm": 46.370391845703125, "learning_rate": 5.995596346448796e-07, "loss": 23.7035, "step": 9164 }, { "epoch": 0.8505800464037123, "grad_norm": 48.57484436035156, "learning_rate": 5.988312071476815e-07, "loss": 22.8124, "step": 9165 }, { "epoch": 0.8506728538283063, "grad_norm": 41.95756530761719, "learning_rate": 5.981031942294019e-07, "loss": 23.6907, "step": 9166 }, { "epoch": 0.8507656612529002, "grad_norm": 53.22388458251953, "learning_rate": 5.973755959586153e-07, "loss": 23.7131, "step": 9167 }, { "epoch": 0.8508584686774942, "grad_norm": 44.44558334350586, "learning_rate": 5.966484124038602e-07, "loss": 20.632, "step": 9168 }, { "epoch": 0.8509512761020882, "grad_norm": 52.9047737121582, "learning_rate": 5.959216436336351e-07, "loss": 22.9929, "step": 9169 }, { "epoch": 0.8510440835266821, "grad_norm": 45.54267120361328, "learning_rate": 5.951952897164015e-07, "loss": 23.6939, "step": 9170 }, { "epoch": 0.8511368909512761, "grad_norm": 43.82585525512695, "learning_rate": 5.944693507205774e-07, "loss": 23.7645, "step": 9171 }, { "epoch": 0.85122969837587, "grad_norm": 49.26111602783203, "learning_rate": 5.937438267145468e-07, "loss": 24.2587, "step": 9172 }, { "epoch": 0.8513225058004641, "grad_norm": 46.18398666381836, "learning_rate": 5.930187177666508e-07, "loss": 22.9966, "step": 9173 }, { "epoch": 0.851415313225058, "grad_norm": 44.5840950012207, "learning_rate": 5.922940239451935e-07, "loss": 21.0556, "step": 9174 }, { "epoch": 0.851508120649652, "grad_norm": 44.0051155090332, "learning_rate": 5.915697453184382e-07, "loss": 22.3961, "step": 9175 }, { "epoch": 0.8516009280742459, "grad_norm": 42.37406921386719, "learning_rate": 5.908458819546126e-07, "loss": 21.408, "step": 9176 }, { "epoch": 0.8516937354988399, "grad_norm": 51.38330078125, "learning_rate": 5.901224339218991e-07, "loss": 24.0952, "step": 9177 }, { "epoch": 0.8517865429234339, "grad_norm": 38.49162673950195, "learning_rate": 5.893994012884474e-07, "loss": 21.1774, "step": 9178 }, { "epoch": 0.8518793503480279, "grad_norm": 42.62986755371094, "learning_rate": 5.886767841223651e-07, "loss": 23.4553, "step": 9179 }, { "epoch": 0.8519721577726218, "grad_norm": 42.12639236450195, "learning_rate": 5.879545824917199e-07, "loss": 21.7712, "step": 9180 }, { "epoch": 0.8520649651972158, "grad_norm": 37.36929702758789, "learning_rate": 5.872327964645414e-07, "loss": 21.9162, "step": 9181 }, { "epoch": 0.8521577726218097, "grad_norm": 42.667572021484375, "learning_rate": 5.865114261088217e-07, "loss": 23.3802, "step": 9182 }, { "epoch": 0.8522505800464037, "grad_norm": 45.41171646118164, "learning_rate": 5.857904714925094e-07, "loss": 24.4736, "step": 9183 }, { "epoch": 0.8523433874709977, "grad_norm": 39.35023880004883, "learning_rate": 5.850699326835191e-07, "loss": 25.0086, "step": 9184 }, { "epoch": 0.8524361948955916, "grad_norm": 74.95996856689453, "learning_rate": 5.843498097497219e-07, "loss": 23.7955, "step": 9185 }, { "epoch": 0.8525290023201856, "grad_norm": 46.49570083618164, "learning_rate": 5.836301027589525e-07, "loss": 22.2914, "step": 9186 }, { "epoch": 0.8526218097447796, "grad_norm": 54.65010452270508, "learning_rate": 5.829108117790044e-07, "loss": 25.2123, "step": 9187 }, { "epoch": 0.8527146171693736, "grad_norm": 62.742774963378906, "learning_rate": 5.821919368776363e-07, "loss": 22.9961, "step": 9188 }, { "epoch": 0.8528074245939675, "grad_norm": 42.00845718383789, "learning_rate": 5.81473478122559e-07, "loss": 21.4511, "step": 9189 }, { "epoch": 0.8529002320185615, "grad_norm": 42.21364974975586, "learning_rate": 5.807554355814537e-07, "loss": 22.6138, "step": 9190 }, { "epoch": 0.8529930394431554, "grad_norm": 42.63636016845703, "learning_rate": 5.800378093219572e-07, "loss": 21.8604, "step": 9191 }, { "epoch": 0.8530858468677495, "grad_norm": 44.54887771606445, "learning_rate": 5.793205994116674e-07, "loss": 23.1685, "step": 9192 }, { "epoch": 0.8531786542923434, "grad_norm": 40.85696029663086, "learning_rate": 5.786038059181432e-07, "loss": 24.4364, "step": 9193 }, { "epoch": 0.8532714617169374, "grad_norm": 54.361305236816406, "learning_rate": 5.778874289089076e-07, "loss": 23.717, "step": 9194 }, { "epoch": 0.8533642691415313, "grad_norm": 56.623985290527344, "learning_rate": 5.771714684514368e-07, "loss": 21.9307, "step": 9195 }, { "epoch": 0.8534570765661252, "grad_norm": 48.0814208984375, "learning_rate": 5.764559246131768e-07, "loss": 21.6829, "step": 9196 }, { "epoch": 0.8535498839907193, "grad_norm": 46.37574768066406, "learning_rate": 5.757407974615282e-07, "loss": 22.313, "step": 9197 }, { "epoch": 0.8536426914153132, "grad_norm": 46.51735305786133, "learning_rate": 5.750260870638541e-07, "loss": 23.7382, "step": 9198 }, { "epoch": 0.8537354988399072, "grad_norm": 51.61660385131836, "learning_rate": 5.743117934874781e-07, "loss": 25.9032, "step": 9199 }, { "epoch": 0.8538283062645011, "grad_norm": 47.08278274536133, "learning_rate": 5.735979167996858e-07, "loss": 22.4167, "step": 9200 }, { "epoch": 0.8539211136890952, "grad_norm": 50.69264221191406, "learning_rate": 5.728844570677211e-07, "loss": 27.0939, "step": 9201 }, { "epoch": 0.8540139211136891, "grad_norm": 50.10664749145508, "learning_rate": 5.72171414358792e-07, "loss": 23.391, "step": 9202 }, { "epoch": 0.8541067285382831, "grad_norm": 52.71302795410156, "learning_rate": 5.714587887400641e-07, "loss": 22.8109, "step": 9203 }, { "epoch": 0.854199535962877, "grad_norm": 50.89594650268555, "learning_rate": 5.707465802786655e-07, "loss": 22.4849, "step": 9204 }, { "epoch": 0.854292343387471, "grad_norm": 53.37335205078125, "learning_rate": 5.700347890416835e-07, "loss": 24.5483, "step": 9205 }, { "epoch": 0.854385150812065, "grad_norm": 43.10047912597656, "learning_rate": 5.693234150961674e-07, "loss": 23.3111, "step": 9206 }, { "epoch": 0.854477958236659, "grad_norm": 43.80073547363281, "learning_rate": 5.686124585091279e-07, "loss": 22.9853, "step": 9207 }, { "epoch": 0.8545707656612529, "grad_norm": 67.937744140625, "learning_rate": 5.679019193475343e-07, "loss": 23.31, "step": 9208 }, { "epoch": 0.8546635730858468, "grad_norm": 49.55891799926758, "learning_rate": 5.671917976783176e-07, "loss": 23.7069, "step": 9209 }, { "epoch": 0.8547563805104408, "grad_norm": 44.69163513183594, "learning_rate": 5.664820935683695e-07, "loss": 24.3972, "step": 9210 }, { "epoch": 0.8548491879350348, "grad_norm": 47.02169418334961, "learning_rate": 5.657728070845431e-07, "loss": 24.1968, "step": 9211 }, { "epoch": 0.8549419953596288, "grad_norm": 48.570003509521484, "learning_rate": 5.65063938293649e-07, "loss": 23.1942, "step": 9212 }, { "epoch": 0.8550348027842227, "grad_norm": 46.64689636230469, "learning_rate": 5.64355487262464e-07, "loss": 22.1132, "step": 9213 }, { "epoch": 0.8551276102088167, "grad_norm": 52.838409423828125, "learning_rate": 5.636474540577208e-07, "loss": 22.1904, "step": 9214 }, { "epoch": 0.8552204176334107, "grad_norm": 43.00339126586914, "learning_rate": 5.629398387461144e-07, "loss": 22.4703, "step": 9215 }, { "epoch": 0.8553132250580047, "grad_norm": 51.320011138916016, "learning_rate": 5.622326413942997e-07, "loss": 21.3825, "step": 9216 }, { "epoch": 0.8554060324825986, "grad_norm": 51.393096923828125, "learning_rate": 5.615258620688952e-07, "loss": 23.0228, "step": 9217 }, { "epoch": 0.8554988399071926, "grad_norm": 48.238304138183594, "learning_rate": 5.608195008364742e-07, "loss": 22.7133, "step": 9218 }, { "epoch": 0.8555916473317865, "grad_norm": 41.97380065917969, "learning_rate": 5.601135577635774e-07, "loss": 23.5994, "step": 9219 }, { "epoch": 0.8556844547563806, "grad_norm": 52.04362487792969, "learning_rate": 5.594080329167012e-07, "loss": 24.0155, "step": 9220 }, { "epoch": 0.8557772621809745, "grad_norm": 53.966129302978516, "learning_rate": 5.587029263623045e-07, "loss": 23.3724, "step": 9221 }, { "epoch": 0.8558700696055684, "grad_norm": 48.81806945800781, "learning_rate": 5.579982381668058e-07, "loss": 24.4803, "step": 9222 }, { "epoch": 0.8559628770301624, "grad_norm": 45.22784423828125, "learning_rate": 5.572939683965878e-07, "loss": 21.2131, "step": 9223 }, { "epoch": 0.8560556844547563, "grad_norm": 56.221702575683594, "learning_rate": 5.565901171179866e-07, "loss": 23.2294, "step": 9224 }, { "epoch": 0.8561484918793504, "grad_norm": 48.35111618041992, "learning_rate": 5.558866843973071e-07, "loss": 21.6289, "step": 9225 }, { "epoch": 0.8562412993039443, "grad_norm": 50.2177848815918, "learning_rate": 5.551836703008084e-07, "loss": 22.1324, "step": 9226 }, { "epoch": 0.8563341067285383, "grad_norm": 52.75030517578125, "learning_rate": 5.544810748947143e-07, "loss": 22.9544, "step": 9227 }, { "epoch": 0.8564269141531322, "grad_norm": 42.16910171508789, "learning_rate": 5.537788982452052e-07, "loss": 22.6472, "step": 9228 }, { "epoch": 0.8565197215777263, "grad_norm": 46.614341735839844, "learning_rate": 5.530771404184276e-07, "loss": 22.8215, "step": 9229 }, { "epoch": 0.8566125290023202, "grad_norm": 58.2462272644043, "learning_rate": 5.52375801480482e-07, "loss": 22.1203, "step": 9230 }, { "epoch": 0.8567053364269142, "grad_norm": 47.099674224853516, "learning_rate": 5.516748814974365e-07, "loss": 22.5654, "step": 9231 }, { "epoch": 0.8567981438515081, "grad_norm": 39.58165740966797, "learning_rate": 5.509743805353113e-07, "loss": 21.8868, "step": 9232 }, { "epoch": 0.856890951276102, "grad_norm": 54.426918029785156, "learning_rate": 5.502742986600956e-07, "loss": 24.2078, "step": 9233 }, { "epoch": 0.8569837587006961, "grad_norm": 47.52063751220703, "learning_rate": 5.495746359377335e-07, "loss": 23.7613, "step": 9234 }, { "epoch": 0.85707656612529, "grad_norm": 40.20014953613281, "learning_rate": 5.488753924341328e-07, "loss": 23.0395, "step": 9235 }, { "epoch": 0.857169373549884, "grad_norm": 55.091670989990234, "learning_rate": 5.481765682151579e-07, "loss": 22.8389, "step": 9236 }, { "epoch": 0.8572621809744779, "grad_norm": 45.65734100341797, "learning_rate": 5.474781633466403e-07, "loss": 23.5394, "step": 9237 }, { "epoch": 0.8573549883990719, "grad_norm": 45.871826171875, "learning_rate": 5.467801778943638e-07, "loss": 22.1187, "step": 9238 }, { "epoch": 0.8574477958236659, "grad_norm": 46.018218994140625, "learning_rate": 5.460826119240791e-07, "loss": 22.3586, "step": 9239 }, { "epoch": 0.8575406032482599, "grad_norm": 38.85052490234375, "learning_rate": 5.453854655014956e-07, "loss": 23.5916, "step": 9240 }, { "epoch": 0.8576334106728538, "grad_norm": 50.15283966064453, "learning_rate": 5.446887386922812e-07, "loss": 22.9654, "step": 9241 }, { "epoch": 0.8577262180974478, "grad_norm": 47.42285919189453, "learning_rate": 5.439924315620659e-07, "loss": 22.7525, "step": 9242 }, { "epoch": 0.8578190255220418, "grad_norm": 48.24629211425781, "learning_rate": 5.432965441764415e-07, "loss": 23.6298, "step": 9243 }, { "epoch": 0.8579118329466358, "grad_norm": 43.78814697265625, "learning_rate": 5.426010766009582e-07, "loss": 21.2591, "step": 9244 }, { "epoch": 0.8580046403712297, "grad_norm": 54.728885650634766, "learning_rate": 5.419060289011263e-07, "loss": 22.7037, "step": 9245 }, { "epoch": 0.8580974477958236, "grad_norm": 48.577232360839844, "learning_rate": 5.412114011424191e-07, "loss": 24.1033, "step": 9246 }, { "epoch": 0.8581902552204176, "grad_norm": 60.793190002441406, "learning_rate": 5.405171933902675e-07, "loss": 25.3074, "step": 9247 }, { "epoch": 0.8582830626450116, "grad_norm": 48.006839752197266, "learning_rate": 5.398234057100638e-07, "loss": 23.3137, "step": 9248 }, { "epoch": 0.8583758700696056, "grad_norm": 52.288551330566406, "learning_rate": 5.391300381671627e-07, "loss": 23.5789, "step": 9249 }, { "epoch": 0.8584686774941995, "grad_norm": 43.3028678894043, "learning_rate": 5.384370908268766e-07, "loss": 21.8567, "step": 9250 }, { "epoch": 0.8585614849187935, "grad_norm": 39.78280258178711, "learning_rate": 5.377445637544793e-07, "loss": 24.7798, "step": 9251 }, { "epoch": 0.8586542923433875, "grad_norm": 45.86000442504883, "learning_rate": 5.370524570152059e-07, "loss": 23.9048, "step": 9252 }, { "epoch": 0.8587470997679815, "grad_norm": 53.58234786987305, "learning_rate": 5.363607706742491e-07, "loss": 22.7444, "step": 9253 }, { "epoch": 0.8588399071925754, "grad_norm": 49.47931671142578, "learning_rate": 5.356695047967663e-07, "loss": 22.1707, "step": 9254 }, { "epoch": 0.8589327146171694, "grad_norm": 44.71501922607422, "learning_rate": 5.349786594478723e-07, "loss": 22.2847, "step": 9255 }, { "epoch": 0.8590255220417633, "grad_norm": 41.69610595703125, "learning_rate": 5.342882346926431e-07, "loss": 23.5658, "step": 9256 }, { "epoch": 0.8591183294663574, "grad_norm": 44.05058670043945, "learning_rate": 5.33598230596114e-07, "loss": 23.7179, "step": 9257 }, { "epoch": 0.8592111368909513, "grad_norm": 50.42746353149414, "learning_rate": 5.329086472232825e-07, "loss": 23.8365, "step": 9258 }, { "epoch": 0.8593039443155452, "grad_norm": 45.54170227050781, "learning_rate": 5.322194846391044e-07, "loss": 23.1259, "step": 9259 }, { "epoch": 0.8593967517401392, "grad_norm": 41.4318733215332, "learning_rate": 5.315307429084993e-07, "loss": 20.9216, "step": 9260 }, { "epoch": 0.8594895591647331, "grad_norm": 44.782432556152344, "learning_rate": 5.308424220963432e-07, "loss": 21.6051, "step": 9261 }, { "epoch": 0.8595823665893272, "grad_norm": 47.66032791137695, "learning_rate": 5.301545222674748e-07, "loss": 23.0927, "step": 9262 }, { "epoch": 0.8596751740139211, "grad_norm": 51.56569290161133, "learning_rate": 5.294670434866911e-07, "loss": 22.5642, "step": 9263 }, { "epoch": 0.8597679814385151, "grad_norm": 54.98352813720703, "learning_rate": 5.287799858187548e-07, "loss": 21.3729, "step": 9264 }, { "epoch": 0.859860788863109, "grad_norm": 52.299232482910156, "learning_rate": 5.280933493283796e-07, "loss": 22.064, "step": 9265 }, { "epoch": 0.8599535962877031, "grad_norm": 50.81991195678711, "learning_rate": 5.274071340802495e-07, "loss": 22.1992, "step": 9266 }, { "epoch": 0.860046403712297, "grad_norm": 48.379676818847656, "learning_rate": 5.267213401390008e-07, "loss": 22.982, "step": 9267 }, { "epoch": 0.860139211136891, "grad_norm": 46.88730239868164, "learning_rate": 5.260359675692356e-07, "loss": 23.3237, "step": 9268 }, { "epoch": 0.8602320185614849, "grad_norm": 47.80116653442383, "learning_rate": 5.253510164355141e-07, "loss": 23.6088, "step": 9269 }, { "epoch": 0.8603248259860788, "grad_norm": 46.36674880981445, "learning_rate": 5.246664868023565e-07, "loss": 22.4883, "step": 9270 }, { "epoch": 0.8604176334106729, "grad_norm": 58.8003044128418, "learning_rate": 5.23982378734243e-07, "loss": 22.4476, "step": 9271 }, { "epoch": 0.8605104408352668, "grad_norm": 51.50596618652344, "learning_rate": 5.232986922956174e-07, "loss": 24.1734, "step": 9272 }, { "epoch": 0.8606032482598608, "grad_norm": 47.08131408691406, "learning_rate": 5.226154275508771e-07, "loss": 22.4562, "step": 9273 }, { "epoch": 0.8606960556844547, "grad_norm": 44.163856506347656, "learning_rate": 5.21932584564388e-07, "loss": 23.4568, "step": 9274 }, { "epoch": 0.8607888631090487, "grad_norm": 48.80731201171875, "learning_rate": 5.212501634004702e-07, "loss": 24.5273, "step": 9275 }, { "epoch": 0.8608816705336427, "grad_norm": 60.094764709472656, "learning_rate": 5.205681641234062e-07, "loss": 22.8805, "step": 9276 }, { "epoch": 0.8609744779582367, "grad_norm": 49.08254623413086, "learning_rate": 5.198865867974378e-07, "loss": 24.3687, "step": 9277 }, { "epoch": 0.8610672853828306, "grad_norm": 48.88011169433594, "learning_rate": 5.192054314867706e-07, "loss": 24.752, "step": 9278 }, { "epoch": 0.8611600928074246, "grad_norm": 47.65726089477539, "learning_rate": 5.185246982555641e-07, "loss": 22.8899, "step": 9279 }, { "epoch": 0.8612529002320186, "grad_norm": 42.80295944213867, "learning_rate": 5.178443871679445e-07, "loss": 21.8441, "step": 9280 }, { "epoch": 0.8613457076566126, "grad_norm": 56.59320068359375, "learning_rate": 5.171644982879942e-07, "loss": 22.2942, "step": 9281 }, { "epoch": 0.8614385150812065, "grad_norm": 48.346256256103516, "learning_rate": 5.16485031679757e-07, "loss": 22.0845, "step": 9282 }, { "epoch": 0.8615313225058004, "grad_norm": 56.83096694946289, "learning_rate": 5.158059874072358e-07, "loss": 22.3559, "step": 9283 }, { "epoch": 0.8616241299303944, "grad_norm": 43.150089263916016, "learning_rate": 5.151273655343981e-07, "loss": 22.6467, "step": 9284 }, { "epoch": 0.8617169373549884, "grad_norm": 46.58761215209961, "learning_rate": 5.144491661251649e-07, "loss": 23.5746, "step": 9285 }, { "epoch": 0.8618097447795824, "grad_norm": 41.95178985595703, "learning_rate": 5.137713892434226e-07, "loss": 20.9645, "step": 9286 }, { "epoch": 0.8619025522041763, "grad_norm": 44.68587875366211, "learning_rate": 5.130940349530161e-07, "loss": 23.81, "step": 9287 }, { "epoch": 0.8619953596287703, "grad_norm": 44.76853561401367, "learning_rate": 5.1241710331775e-07, "loss": 22.3444, "step": 9288 }, { "epoch": 0.8620881670533642, "grad_norm": 39.85830307006836, "learning_rate": 5.117405944013887e-07, "loss": 22.5117, "step": 9289 }, { "epoch": 0.8621809744779583, "grad_norm": 38.70995330810547, "learning_rate": 5.110645082676591e-07, "loss": 19.7429, "step": 9290 }, { "epoch": 0.8622737819025522, "grad_norm": 154.1187744140625, "learning_rate": 5.103888449802469e-07, "loss": 21.4951, "step": 9291 }, { "epoch": 0.8623665893271462, "grad_norm": 47.307769775390625, "learning_rate": 5.097136046027967e-07, "loss": 23.9757, "step": 9292 }, { "epoch": 0.8624593967517401, "grad_norm": 43.630680084228516, "learning_rate": 5.09038787198915e-07, "loss": 23.5772, "step": 9293 }, { "epoch": 0.8625522041763342, "grad_norm": 47.88856887817383, "learning_rate": 5.08364392832168e-07, "loss": 21.9859, "step": 9294 }, { "epoch": 0.8626450116009281, "grad_norm": 45.99951171875, "learning_rate": 5.076904215660805e-07, "loss": 24.7548, "step": 9295 }, { "epoch": 0.862737819025522, "grad_norm": 42.299644470214844, "learning_rate": 5.070168734641412e-07, "loss": 23.5905, "step": 9296 }, { "epoch": 0.862830626450116, "grad_norm": 42.79845428466797, "learning_rate": 5.063437485897959e-07, "loss": 21.7492, "step": 9297 }, { "epoch": 0.8629234338747099, "grad_norm": 45.101890563964844, "learning_rate": 5.056710470064508e-07, "loss": 23.8686, "step": 9298 }, { "epoch": 0.863016241299304, "grad_norm": 40.64525604248047, "learning_rate": 5.049987687774727e-07, "loss": 20.6937, "step": 9299 }, { "epoch": 0.8631090487238979, "grad_norm": 46.170433044433594, "learning_rate": 5.043269139661872e-07, "loss": 22.1392, "step": 9300 }, { "epoch": 0.8632018561484919, "grad_norm": 50.80873107910156, "learning_rate": 5.036554826358842e-07, "loss": 24.2863, "step": 9301 }, { "epoch": 0.8632946635730858, "grad_norm": 40.12826919555664, "learning_rate": 5.029844748498092e-07, "loss": 23.4763, "step": 9302 }, { "epoch": 0.8633874709976798, "grad_norm": 46.95001983642578, "learning_rate": 5.023138906711695e-07, "loss": 22.516, "step": 9303 }, { "epoch": 0.8634802784222738, "grad_norm": 53.26718521118164, "learning_rate": 5.016437301631327e-07, "loss": 22.86, "step": 9304 }, { "epoch": 0.8635730858468678, "grad_norm": 44.130455017089844, "learning_rate": 5.009739933888258e-07, "loss": 21.3921, "step": 9305 }, { "epoch": 0.8636658932714617, "grad_norm": 43.79192352294922, "learning_rate": 5.003046804113354e-07, "loss": 22.2656, "step": 9306 }, { "epoch": 0.8637587006960556, "grad_norm": 47.59242630004883, "learning_rate": 4.996357912937122e-07, "loss": 22.9284, "step": 9307 }, { "epoch": 0.8638515081206497, "grad_norm": 42.44779968261719, "learning_rate": 4.989673260989603e-07, "loss": 23.9164, "step": 9308 }, { "epoch": 0.8639443155452436, "grad_norm": 61.69033432006836, "learning_rate": 4.982992848900497e-07, "loss": 24.7378, "step": 9309 }, { "epoch": 0.8640371229698376, "grad_norm": 48.724464416503906, "learning_rate": 4.97631667729907e-07, "loss": 22.6813, "step": 9310 }, { "epoch": 0.8641299303944315, "grad_norm": 45.4096565246582, "learning_rate": 4.969644746814211e-07, "loss": 22.5381, "step": 9311 }, { "epoch": 0.8642227378190255, "grad_norm": 60.4405632019043, "learning_rate": 4.962977058074381e-07, "loss": 22.3577, "step": 9312 }, { "epoch": 0.8643155452436195, "grad_norm": 49.22508239746094, "learning_rate": 4.956313611707686e-07, "loss": 22.934, "step": 9313 }, { "epoch": 0.8644083526682135, "grad_norm": 48.40491485595703, "learning_rate": 4.949654408341775e-07, "loss": 21.9921, "step": 9314 }, { "epoch": 0.8645011600928074, "grad_norm": 44.237300872802734, "learning_rate": 4.942999448603958e-07, "loss": 24.2407, "step": 9315 }, { "epoch": 0.8645939675174014, "grad_norm": 62.06575012207031, "learning_rate": 4.936348733121093e-07, "loss": 23.4054, "step": 9316 }, { "epoch": 0.8646867749419953, "grad_norm": 52.59642028808594, "learning_rate": 4.929702262519676e-07, "loss": 24.6572, "step": 9317 }, { "epoch": 0.8647795823665894, "grad_norm": 45.70806121826172, "learning_rate": 4.92306003742577e-07, "loss": 23.1109, "step": 9318 }, { "epoch": 0.8648723897911833, "grad_norm": 52.112979888916016, "learning_rate": 4.916422058465081e-07, "loss": 22.923, "step": 9319 }, { "epoch": 0.8649651972157772, "grad_norm": 61.79477310180664, "learning_rate": 4.909788326262854e-07, "loss": 22.925, "step": 9320 }, { "epoch": 0.8650580046403712, "grad_norm": 62.86668014526367, "learning_rate": 4.903158841444006e-07, "loss": 23.7401, "step": 9321 }, { "epoch": 0.8651508120649652, "grad_norm": 60.38111877441406, "learning_rate": 4.896533604632997e-07, "loss": 22.2431, "step": 9322 }, { "epoch": 0.8652436194895592, "grad_norm": 48.07355499267578, "learning_rate": 4.889912616453918e-07, "loss": 21.9996, "step": 9323 }, { "epoch": 0.8653364269141531, "grad_norm": 47.363529205322266, "learning_rate": 4.883295877530431e-07, "loss": 21.9396, "step": 9324 }, { "epoch": 0.8654292343387471, "grad_norm": 51.929813385009766, "learning_rate": 4.876683388485847e-07, "loss": 22.4587, "step": 9325 }, { "epoch": 0.865522041763341, "grad_norm": 54.93717956542969, "learning_rate": 4.87007514994301e-07, "loss": 22.4885, "step": 9326 }, { "epoch": 0.8656148491879351, "grad_norm": 79.78508758544922, "learning_rate": 4.863471162524424e-07, "loss": 22.2356, "step": 9327 }, { "epoch": 0.865707656612529, "grad_norm": 47.1519889831543, "learning_rate": 4.856871426852166e-07, "loss": 23.5617, "step": 9328 }, { "epoch": 0.865800464037123, "grad_norm": 45.13167953491211, "learning_rate": 4.850275943547905e-07, "loss": 22.8248, "step": 9329 }, { "epoch": 0.8658932714617169, "grad_norm": 55.68489074707031, "learning_rate": 4.843684713232916e-07, "loss": 21.5792, "step": 9330 }, { "epoch": 0.8659860788863109, "grad_norm": 51.650264739990234, "learning_rate": 4.837097736528096e-07, "loss": 22.4605, "step": 9331 }, { "epoch": 0.8660788863109049, "grad_norm": 49.19512176513672, "learning_rate": 4.830515014053894e-07, "loss": 21.6555, "step": 9332 }, { "epoch": 0.8661716937354988, "grad_norm": 47.41484069824219, "learning_rate": 4.823936546430408e-07, "loss": 23.4908, "step": 9333 }, { "epoch": 0.8662645011600928, "grad_norm": 70.79849243164062, "learning_rate": 4.817362334277304e-07, "loss": 23.804, "step": 9334 }, { "epoch": 0.8663573085846867, "grad_norm": 63.033546447753906, "learning_rate": 4.810792378213863e-07, "loss": 23.1667, "step": 9335 }, { "epoch": 0.8664501160092808, "grad_norm": 56.86884307861328, "learning_rate": 4.804226678858936e-07, "loss": 22.3768, "step": 9336 }, { "epoch": 0.8665429234338747, "grad_norm": 55.134464263916016, "learning_rate": 4.797665236831023e-07, "loss": 21.5057, "step": 9337 }, { "epoch": 0.8666357308584687, "grad_norm": 70.90741729736328, "learning_rate": 4.791108052748184e-07, "loss": 21.2611, "step": 9338 }, { "epoch": 0.8667285382830626, "grad_norm": 56.03361511230469, "learning_rate": 4.784555127228091e-07, "loss": 22.3983, "step": 9339 }, { "epoch": 0.8668213457076566, "grad_norm": 46.98633575439453, "learning_rate": 4.778006460888007e-07, "loss": 22.6442, "step": 9340 }, { "epoch": 0.8669141531322506, "grad_norm": 59.01889419555664, "learning_rate": 4.771462054344805e-07, "loss": 21.5223, "step": 9341 }, { "epoch": 0.8670069605568446, "grad_norm": 48.45460510253906, "learning_rate": 4.764921908214948e-07, "loss": 20.8698, "step": 9342 }, { "epoch": 0.8670997679814385, "grad_norm": 58.32361602783203, "learning_rate": 4.7583860231144916e-07, "loss": 22.2197, "step": 9343 }, { "epoch": 0.8671925754060325, "grad_norm": 41.43564987182617, "learning_rate": 4.7518543996591284e-07, "loss": 21.826, "step": 9344 }, { "epoch": 0.8672853828306265, "grad_norm": 51.9643669128418, "learning_rate": 4.745327038464098e-07, "loss": 22.2991, "step": 9345 }, { "epoch": 0.8673781902552204, "grad_norm": 48.85662078857422, "learning_rate": 4.738803940144265e-07, "loss": 22.3602, "step": 9346 }, { "epoch": 0.8674709976798144, "grad_norm": 46.929019927978516, "learning_rate": 4.732285105314083e-07, "loss": 22.3061, "step": 9347 }, { "epoch": 0.8675638051044083, "grad_norm": 48.42430114746094, "learning_rate": 4.725770534587637e-07, "loss": 23.7517, "step": 9348 }, { "epoch": 0.8676566125290023, "grad_norm": 52.69715881347656, "learning_rate": 4.7192602285785426e-07, "loss": 23.2993, "step": 9349 }, { "epoch": 0.8677494199535963, "grad_norm": 60.7343864440918, "learning_rate": 4.7127541879000817e-07, "loss": 23.4359, "step": 9350 }, { "epoch": 0.8678422273781903, "grad_norm": 50.360206604003906, "learning_rate": 4.7062524131651024e-07, "loss": 22.9718, "step": 9351 }, { "epoch": 0.8679350348027842, "grad_norm": 55.11302185058594, "learning_rate": 4.699754904986048e-07, "loss": 22.6648, "step": 9352 }, { "epoch": 0.8680278422273782, "grad_norm": 59.022953033447266, "learning_rate": 4.6932616639749684e-07, "loss": 23.6176, "step": 9353 }, { "epoch": 0.8681206496519721, "grad_norm": 57.06752395629883, "learning_rate": 4.6867726907435295e-07, "loss": 23.6908, "step": 9354 }, { "epoch": 0.8682134570765662, "grad_norm": 47.619178771972656, "learning_rate": 4.6802879859029427e-07, "loss": 23.5039, "step": 9355 }, { "epoch": 0.8683062645011601, "grad_norm": 45.99892807006836, "learning_rate": 4.6738075500640747e-07, "loss": 24.3793, "step": 9356 }, { "epoch": 0.868399071925754, "grad_norm": 59.943267822265625, "learning_rate": 4.667331383837359e-07, "loss": 22.6277, "step": 9357 }, { "epoch": 0.868491879350348, "grad_norm": 53.282527923583984, "learning_rate": 4.6608594878328364e-07, "loss": 20.3329, "step": 9358 }, { "epoch": 0.868584686774942, "grad_norm": 45.70891189575195, "learning_rate": 4.654391862660135e-07, "loss": 22.0799, "step": 9359 }, { "epoch": 0.868677494199536, "grad_norm": 50.15103530883789, "learning_rate": 4.647928508928512e-07, "loss": 23.0075, "step": 9360 }, { "epoch": 0.8687703016241299, "grad_norm": 47.37512969970703, "learning_rate": 4.641469427246764e-07, "loss": 23.2518, "step": 9361 }, { "epoch": 0.8688631090487239, "grad_norm": 63.5941047668457, "learning_rate": 4.6350146182233436e-07, "loss": 23.4433, "step": 9362 }, { "epoch": 0.8689559164733178, "grad_norm": 45.93911361694336, "learning_rate": 4.6285640824662746e-07, "loss": 22.3891, "step": 9363 }, { "epoch": 0.8690487238979119, "grad_norm": 41.59749221801758, "learning_rate": 4.6221178205831776e-07, "loss": 23.2255, "step": 9364 }, { "epoch": 0.8691415313225058, "grad_norm": 43.841880798339844, "learning_rate": 4.6156758331812656e-07, "loss": 22.5571, "step": 9365 }, { "epoch": 0.8692343387470998, "grad_norm": 55.26628875732422, "learning_rate": 4.6092381208673875e-07, "loss": 23.6771, "step": 9366 }, { "epoch": 0.8693271461716937, "grad_norm": 45.54048156738281, "learning_rate": 4.6028046842479136e-07, "loss": 21.3378, "step": 9367 }, { "epoch": 0.8694199535962877, "grad_norm": 46.693363189697266, "learning_rate": 4.5963755239288974e-07, "loss": 24.2096, "step": 9368 }, { "epoch": 0.8695127610208817, "grad_norm": 49.059757232666016, "learning_rate": 4.589950640515933e-07, "loss": 23.8236, "step": 9369 }, { "epoch": 0.8696055684454757, "grad_norm": 46.284053802490234, "learning_rate": 4.5835300346142253e-07, "loss": 23.6712, "step": 9370 }, { "epoch": 0.8696983758700696, "grad_norm": 48.919681549072266, "learning_rate": 4.577113706828579e-07, "loss": 23.8225, "step": 9371 }, { "epoch": 0.8697911832946635, "grad_norm": 43.145545959472656, "learning_rate": 4.5707016577634156e-07, "loss": 22.5519, "step": 9372 }, { "epoch": 0.8698839907192576, "grad_norm": 41.63405227661133, "learning_rate": 4.564293888022703e-07, "loss": 22.2273, "step": 9373 }, { "epoch": 0.8699767981438515, "grad_norm": 57.329017639160156, "learning_rate": 4.5578903982100684e-07, "loss": 24.1549, "step": 9374 }, { "epoch": 0.8700696055684455, "grad_norm": 47.0857048034668, "learning_rate": 4.5514911889286693e-07, "loss": 21.4897, "step": 9375 }, { "epoch": 0.8701624129930394, "grad_norm": 42.240509033203125, "learning_rate": 4.545096260781323e-07, "loss": 22.8467, "step": 9376 }, { "epoch": 0.8702552204176334, "grad_norm": 44.406028747558594, "learning_rate": 4.5387056143704036e-07, "loss": 24.3746, "step": 9377 }, { "epoch": 0.8703480278422274, "grad_norm": 51.38887023925781, "learning_rate": 4.532319250297901e-07, "loss": 21.6701, "step": 9378 }, { "epoch": 0.8704408352668214, "grad_norm": 44.16295623779297, "learning_rate": 4.5259371691653786e-07, "loss": 21.8956, "step": 9379 }, { "epoch": 0.8705336426914153, "grad_norm": 39.80545425415039, "learning_rate": 4.519559371574028e-07, "loss": 21.5481, "step": 9380 }, { "epoch": 0.8706264501160093, "grad_norm": 52.497703552246094, "learning_rate": 4.5131858581246246e-07, "loss": 23.0605, "step": 9381 }, { "epoch": 0.8707192575406032, "grad_norm": 48.16587448120117, "learning_rate": 4.506816629417526e-07, "loss": 22.6047, "step": 9382 }, { "epoch": 0.8708120649651973, "grad_norm": 54.21892166137695, "learning_rate": 4.500451686052698e-07, "loss": 24.0145, "step": 9383 }, { "epoch": 0.8709048723897912, "grad_norm": 48.22736740112305, "learning_rate": 4.494091028629699e-07, "loss": 21.5716, "step": 9384 }, { "epoch": 0.8709976798143851, "grad_norm": 50.00468063354492, "learning_rate": 4.4877346577477e-07, "loss": 23.7863, "step": 9385 }, { "epoch": 0.8710904872389791, "grad_norm": 54.21012496948242, "learning_rate": 4.4813825740054496e-07, "loss": 22.8796, "step": 9386 }, { "epoch": 0.8711832946635731, "grad_norm": 47.46445083618164, "learning_rate": 4.4750347780012915e-07, "loss": 21.633, "step": 9387 }, { "epoch": 0.8712761020881671, "grad_norm": 51.998043060302734, "learning_rate": 4.468691270333181e-07, "loss": 21.7986, "step": 9388 }, { "epoch": 0.871368909512761, "grad_norm": 61.827369689941406, "learning_rate": 4.4623520515986564e-07, "loss": 21.8939, "step": 9389 }, { "epoch": 0.871461716937355, "grad_norm": 48.91069412231445, "learning_rate": 4.4560171223948457e-07, "loss": 21.9221, "step": 9390 }, { "epoch": 0.8715545243619489, "grad_norm": 50.151641845703125, "learning_rate": 4.4496864833184995e-07, "loss": 22.2656, "step": 9391 }, { "epoch": 0.871647331786543, "grad_norm": 58.620113372802734, "learning_rate": 4.4433601349659404e-07, "loss": 22.4564, "step": 9392 }, { "epoch": 0.8717401392111369, "grad_norm": 53.580596923828125, "learning_rate": 4.437038077933098e-07, "loss": 24.1895, "step": 9393 }, { "epoch": 0.8718329466357309, "grad_norm": 55.844200134277344, "learning_rate": 4.4307203128154784e-07, "loss": 23.5661, "step": 9394 }, { "epoch": 0.8719257540603248, "grad_norm": 45.1138916015625, "learning_rate": 4.424406840208234e-07, "loss": 21.6454, "step": 9395 }, { "epoch": 0.8720185614849187, "grad_norm": 57.720516204833984, "learning_rate": 4.418097660706039e-07, "loss": 22.9289, "step": 9396 }, { "epoch": 0.8721113689095128, "grad_norm": 52.51947784423828, "learning_rate": 4.411792774903223e-07, "loss": 23.9569, "step": 9397 }, { "epoch": 0.8722041763341067, "grad_norm": 54.027191162109375, "learning_rate": 4.405492183393689e-07, "loss": 22.0066, "step": 9398 }, { "epoch": 0.8722969837587007, "grad_norm": 46.77800750732422, "learning_rate": 4.399195886770935e-07, "loss": 23.5155, "step": 9399 }, { "epoch": 0.8723897911832946, "grad_norm": 49.477745056152344, "learning_rate": 4.3929038856280415e-07, "loss": 22.8467, "step": 9400 }, { "epoch": 0.8724825986078887, "grad_norm": 38.96854782104492, "learning_rate": 4.3866161805577345e-07, "loss": 21.6273, "step": 9401 }, { "epoch": 0.8725754060324826, "grad_norm": 46.290985107421875, "learning_rate": 4.380332772152257e-07, "loss": 23.2316, "step": 9402 }, { "epoch": 0.8726682134570766, "grad_norm": 50.59125518798828, "learning_rate": 4.374053661003519e-07, "loss": 22.1096, "step": 9403 }, { "epoch": 0.8727610208816705, "grad_norm": 53.111419677734375, "learning_rate": 4.3677788477029915e-07, "loss": 22.7782, "step": 9404 }, { "epoch": 0.8728538283062645, "grad_norm": 48.80801010131836, "learning_rate": 4.3615083328417405e-07, "loss": 22.2363, "step": 9405 }, { "epoch": 0.8729466357308585, "grad_norm": 48.944034576416016, "learning_rate": 4.3552421170104274e-07, "loss": 23.9741, "step": 9406 }, { "epoch": 0.8730394431554525, "grad_norm": 57.850440979003906, "learning_rate": 4.348980200799341e-07, "loss": 23.1727, "step": 9407 }, { "epoch": 0.8731322505800464, "grad_norm": 47.40395736694336, "learning_rate": 4.342722584798298e-07, "loss": 23.0668, "step": 9408 }, { "epoch": 0.8732250580046403, "grad_norm": 38.98786926269531, "learning_rate": 4.336469269596788e-07, "loss": 22.7712, "step": 9409 }, { "epoch": 0.8733178654292343, "grad_norm": 48.83300018310547, "learning_rate": 4.3302202557838235e-07, "loss": 22.9887, "step": 9410 }, { "epoch": 0.8734106728538283, "grad_norm": 57.148380279541016, "learning_rate": 4.3239755439480725e-07, "loss": 24.0608, "step": 9411 }, { "epoch": 0.8735034802784223, "grad_norm": 58.350563049316406, "learning_rate": 4.3177351346777584e-07, "loss": 25.7207, "step": 9412 }, { "epoch": 0.8735962877030162, "grad_norm": 53.09178924560547, "learning_rate": 4.311499028560717e-07, "loss": 24.7253, "step": 9413 }, { "epoch": 0.8736890951276102, "grad_norm": 48.338985443115234, "learning_rate": 4.3052672261843564e-07, "loss": 24.1418, "step": 9414 }, { "epoch": 0.8737819025522042, "grad_norm": 55.658424377441406, "learning_rate": 4.299039728135734e-07, "loss": 22.7976, "step": 9415 }, { "epoch": 0.8738747099767982, "grad_norm": 85.05902862548828, "learning_rate": 4.2928165350014204e-07, "loss": 23.2038, "step": 9416 }, { "epoch": 0.8739675174013921, "grad_norm": 56.859153747558594, "learning_rate": 4.2865976473676516e-07, "loss": 23.0543, "step": 9417 }, { "epoch": 0.8740603248259861, "grad_norm": 50.4085807800293, "learning_rate": 4.280383065820232e-07, "loss": 21.1011, "step": 9418 }, { "epoch": 0.87415313225058, "grad_norm": 43.8848762512207, "learning_rate": 4.2741727909445474e-07, "loss": 22.7829, "step": 9419 }, { "epoch": 0.874245939675174, "grad_norm": 52.99628448486328, "learning_rate": 4.267966823325581e-07, "loss": 21.8284, "step": 9420 }, { "epoch": 0.874338747099768, "grad_norm": 46.86452102661133, "learning_rate": 4.2617651635479593e-07, "loss": 23.0776, "step": 9421 }, { "epoch": 0.8744315545243619, "grad_norm": 46.28444290161133, "learning_rate": 4.2555678121958157e-07, "loss": 23.0684, "step": 9422 }, { "epoch": 0.8745243619489559, "grad_norm": 43.03843688964844, "learning_rate": 4.2493747698529487e-07, "loss": 22.5283, "step": 9423 }, { "epoch": 0.8746171693735498, "grad_norm": 44.5257682800293, "learning_rate": 4.243186037102731e-07, "loss": 20.3881, "step": 9424 }, { "epoch": 0.8747099767981439, "grad_norm": 53.896461486816406, "learning_rate": 4.237001614528119e-07, "loss": 22.9524, "step": 9425 }, { "epoch": 0.8748027842227378, "grad_norm": 49.79648971557617, "learning_rate": 4.230821502711657e-07, "loss": 23.8811, "step": 9426 }, { "epoch": 0.8748955916473318, "grad_norm": 59.80575942993164, "learning_rate": 4.2246457022355136e-07, "loss": 25.5511, "step": 9427 }, { "epoch": 0.8749883990719257, "grad_norm": 45.422603607177734, "learning_rate": 4.218474213681434e-07, "loss": 23.6001, "step": 9428 }, { "epoch": 0.8750812064965198, "grad_norm": 42.93220901489258, "learning_rate": 4.2123070376307475e-07, "loss": 24.7139, "step": 9429 }, { "epoch": 0.8751740139211137, "grad_norm": 50.68771743774414, "learning_rate": 4.2061441746643896e-07, "loss": 21.103, "step": 9430 }, { "epoch": 0.8752668213457077, "grad_norm": 49.5827751159668, "learning_rate": 4.199985625362879e-07, "loss": 22.2456, "step": 9431 }, { "epoch": 0.8753596287703016, "grad_norm": 45.14073944091797, "learning_rate": 4.193831390306352e-07, "loss": 24.2947, "step": 9432 }, { "epoch": 0.8754524361948955, "grad_norm": 42.02375793457031, "learning_rate": 4.187681470074512e-07, "loss": 21.1897, "step": 9433 }, { "epoch": 0.8755452436194896, "grad_norm": 43.2984504699707, "learning_rate": 4.1815358652466667e-07, "loss": 22.2694, "step": 9434 }, { "epoch": 0.8756380510440835, "grad_norm": 51.507755279541016, "learning_rate": 4.1753945764017147e-07, "loss": 21.871, "step": 9435 }, { "epoch": 0.8757308584686775, "grad_norm": 45.49148178100586, "learning_rate": 4.1692576041181486e-07, "loss": 22.6536, "step": 9436 }, { "epoch": 0.8758236658932714, "grad_norm": 55.20854949951172, "learning_rate": 4.1631249489740567e-07, "loss": 23.3345, "step": 9437 }, { "epoch": 0.8759164733178654, "grad_norm": 45.15796661376953, "learning_rate": 4.156996611547126e-07, "loss": 21.8275, "step": 9438 }, { "epoch": 0.8760092807424594, "grad_norm": 60.01768112182617, "learning_rate": 4.150872592414623e-07, "loss": 23.0419, "step": 9439 }, { "epoch": 0.8761020881670534, "grad_norm": 40.76087951660156, "learning_rate": 4.14475289215342e-07, "loss": 24.3934, "step": 9440 }, { "epoch": 0.8761948955916473, "grad_norm": 43.39229202270508, "learning_rate": 4.1386375113399657e-07, "loss": 22.457, "step": 9441 }, { "epoch": 0.8762877030162413, "grad_norm": 43.780738830566406, "learning_rate": 4.1325264505503394e-07, "loss": 23.5386, "step": 9442 }, { "epoch": 0.8763805104408353, "grad_norm": 51.580291748046875, "learning_rate": 4.1264197103601523e-07, "loss": 22.2573, "step": 9443 }, { "epoch": 0.8764733178654293, "grad_norm": 46.183223724365234, "learning_rate": 4.1203172913446774e-07, "loss": 21.0786, "step": 9444 }, { "epoch": 0.8765661252900232, "grad_norm": 56.916561126708984, "learning_rate": 4.114219194078717e-07, "loss": 24.1948, "step": 9445 }, { "epoch": 0.8766589327146171, "grad_norm": 50.02518081665039, "learning_rate": 4.1081254191367217e-07, "loss": 21.5254, "step": 9446 }, { "epoch": 0.8767517401392111, "grad_norm": 55.492095947265625, "learning_rate": 4.1020359670926937e-07, "loss": 21.7468, "step": 9447 }, { "epoch": 0.8768445475638051, "grad_norm": 44.0953369140625, "learning_rate": 4.0959508385202517e-07, "loss": 22.9638, "step": 9448 }, { "epoch": 0.8769373549883991, "grad_norm": 50.750999450683594, "learning_rate": 4.0898700339925824e-07, "loss": 22.744, "step": 9449 }, { "epoch": 0.877030162412993, "grad_norm": 45.2163200378418, "learning_rate": 4.0837935540825214e-07, "loss": 24.1259, "step": 9450 }, { "epoch": 0.877122969837587, "grad_norm": 40.606300354003906, "learning_rate": 4.077721399362411e-07, "loss": 22.8401, "step": 9451 }, { "epoch": 0.877215777262181, "grad_norm": 48.12916564941406, "learning_rate": 4.071653570404266e-07, "loss": 22.603, "step": 9452 }, { "epoch": 0.877308584686775, "grad_norm": 43.163475036621094, "learning_rate": 4.0655900677796456e-07, "loss": 23.1696, "step": 9453 }, { "epoch": 0.8774013921113689, "grad_norm": 47.00019836425781, "learning_rate": 4.059530892059721e-07, "loss": 23.7877, "step": 9454 }, { "epoch": 0.8774941995359629, "grad_norm": 46.616939544677734, "learning_rate": 4.0534760438152355e-07, "loss": 20.966, "step": 9455 }, { "epoch": 0.8775870069605568, "grad_norm": 43.66257858276367, "learning_rate": 4.047425523616577e-07, "loss": 21.0343, "step": 9456 }, { "epoch": 0.8776798143851509, "grad_norm": 54.53062438964844, "learning_rate": 4.0413793320336513e-07, "loss": 22.5692, "step": 9457 }, { "epoch": 0.8777726218097448, "grad_norm": 49.838199615478516, "learning_rate": 4.0353374696360135e-07, "loss": 23.2491, "step": 9458 }, { "epoch": 0.8778654292343387, "grad_norm": 44.92452621459961, "learning_rate": 4.029299936992792e-07, "loss": 22.4154, "step": 9459 }, { "epoch": 0.8779582366589327, "grad_norm": 45.89826202392578, "learning_rate": 4.023266734672704e-07, "loss": 23.4401, "step": 9460 }, { "epoch": 0.8780510440835266, "grad_norm": 43.07537841796875, "learning_rate": 4.01723786324405e-07, "loss": 20.4387, "step": 9461 }, { "epoch": 0.8781438515081207, "grad_norm": 53.23699188232422, "learning_rate": 4.0112133232747596e-07, "loss": 22.0132, "step": 9462 }, { "epoch": 0.8782366589327146, "grad_norm": 52.772647857666016, "learning_rate": 4.005193115332301e-07, "loss": 22.785, "step": 9463 }, { "epoch": 0.8783294663573086, "grad_norm": 44.128265380859375, "learning_rate": 3.9991772399837814e-07, "loss": 22.2104, "step": 9464 }, { "epoch": 0.8784222737819025, "grad_norm": 47.169429779052734, "learning_rate": 3.993165697795875e-07, "loss": 24.5229, "step": 9465 }, { "epoch": 0.8785150812064966, "grad_norm": 52.69623565673828, "learning_rate": 3.9871584893348583e-07, "loss": 24.1405, "step": 9466 }, { "epoch": 0.8786078886310905, "grad_norm": 51.765193939208984, "learning_rate": 3.981155615166582e-07, "loss": 23.6731, "step": 9467 }, { "epoch": 0.8787006960556845, "grad_norm": 46.97134017944336, "learning_rate": 3.9751570758565284e-07, "loss": 23.3354, "step": 9468 }, { "epoch": 0.8787935034802784, "grad_norm": 45.00625991821289, "learning_rate": 3.9691628719697117e-07, "loss": 22.426, "step": 9469 }, { "epoch": 0.8788863109048723, "grad_norm": 46.79701232910156, "learning_rate": 3.9631730040707915e-07, "loss": 23.1947, "step": 9470 }, { "epoch": 0.8789791183294664, "grad_norm": 59.58045959472656, "learning_rate": 3.9571874727239945e-07, "loss": 23.3307, "step": 9471 }, { "epoch": 0.8790719257540603, "grad_norm": 44.02096176147461, "learning_rate": 3.951206278493136e-07, "loss": 23.1656, "step": 9472 }, { "epoch": 0.8791647331786543, "grad_norm": 54.04811096191406, "learning_rate": 3.945229421941632e-07, "loss": 22.611, "step": 9473 }, { "epoch": 0.8792575406032482, "grad_norm": 59.35768127441406, "learning_rate": 3.9392569036324936e-07, "loss": 20.8311, "step": 9474 }, { "epoch": 0.8793503480278422, "grad_norm": 51.51789093017578, "learning_rate": 3.9332887241283144e-07, "loss": 24.0379, "step": 9475 }, { "epoch": 0.8794431554524362, "grad_norm": 57.40877914428711, "learning_rate": 3.9273248839912793e-07, "loss": 22.9738, "step": 9476 }, { "epoch": 0.8795359628770302, "grad_norm": 50.79579544067383, "learning_rate": 3.921365383783166e-07, "loss": 21.8328, "step": 9477 }, { "epoch": 0.8796287703016241, "grad_norm": 42.40015411376953, "learning_rate": 3.915410224065341e-07, "loss": 24.1306, "step": 9478 }, { "epoch": 0.8797215777262181, "grad_norm": 45.0009765625, "learning_rate": 3.9094594053987856e-07, "loss": 22.0364, "step": 9479 }, { "epoch": 0.8798143851508121, "grad_norm": 44.13018035888672, "learning_rate": 3.9035129283440165e-07, "loss": 22.3368, "step": 9480 }, { "epoch": 0.8799071925754061, "grad_norm": 43.254207611083984, "learning_rate": 3.8975707934612086e-07, "loss": 22.0523, "step": 9481 }, { "epoch": 0.88, "grad_norm": 39.4044303894043, "learning_rate": 3.89163300131008e-07, "loss": 22.7186, "step": 9482 }, { "epoch": 0.880092807424594, "grad_norm": 48.02632141113281, "learning_rate": 3.8856995524499617e-07, "loss": 23.8699, "step": 9483 }, { "epoch": 0.8801856148491879, "grad_norm": 44.83537673950195, "learning_rate": 3.879770447439757e-07, "loss": 22.6894, "step": 9484 }, { "epoch": 0.880278422273782, "grad_norm": 39.38096618652344, "learning_rate": 3.8738456868380014e-07, "loss": 21.1694, "step": 9485 }, { "epoch": 0.8803712296983759, "grad_norm": 53.407691955566406, "learning_rate": 3.867925271202755e-07, "loss": 24.3296, "step": 9486 }, { "epoch": 0.8804640371229698, "grad_norm": 51.3682861328125, "learning_rate": 3.862009201091732e-07, "loss": 22.1031, "step": 9487 }, { "epoch": 0.8805568445475638, "grad_norm": 51.1667594909668, "learning_rate": 3.856097477062209e-07, "loss": 22.6994, "step": 9488 }, { "epoch": 0.8806496519721577, "grad_norm": 64.26643371582031, "learning_rate": 3.8501900996710473e-07, "loss": 23.5488, "step": 9489 }, { "epoch": 0.8807424593967518, "grad_norm": 42.85670471191406, "learning_rate": 3.8442870694747e-07, "loss": 22.6617, "step": 9490 }, { "epoch": 0.8808352668213457, "grad_norm": 54.93069839477539, "learning_rate": 3.8383883870292405e-07, "loss": 21.9778, "step": 9491 }, { "epoch": 0.8809280742459397, "grad_norm": 47.87703323364258, "learning_rate": 3.8324940528902845e-07, "loss": 23.2778, "step": 9492 }, { "epoch": 0.8810208816705336, "grad_norm": 58.467247009277344, "learning_rate": 3.8266040676130824e-07, "loss": 20.8864, "step": 9493 }, { "epoch": 0.8811136890951277, "grad_norm": 70.41571044921875, "learning_rate": 3.820718431752446e-07, "loss": 22.9738, "step": 9494 }, { "epoch": 0.8812064965197216, "grad_norm": 50.740692138671875, "learning_rate": 3.8148371458627933e-07, "loss": 22.554, "step": 9495 }, { "epoch": 0.8812993039443155, "grad_norm": 54.401763916015625, "learning_rate": 3.8089602104981136e-07, "loss": 23.2065, "step": 9496 }, { "epoch": 0.8813921113689095, "grad_norm": 48.75249099731445, "learning_rate": 3.8030876262120264e-07, "loss": 24.4067, "step": 9497 }, { "epoch": 0.8814849187935034, "grad_norm": 47.85419845581055, "learning_rate": 3.797219393557677e-07, "loss": 22.4241, "step": 9498 }, { "epoch": 0.8815777262180975, "grad_norm": 42.41463088989258, "learning_rate": 3.7913555130878687e-07, "loss": 22.9105, "step": 9499 }, { "epoch": 0.8816705336426914, "grad_norm": 53.89733123779297, "learning_rate": 3.785495985354953e-07, "loss": 21.5338, "step": 9500 }, { "epoch": 0.8817633410672854, "grad_norm": 57.44932556152344, "learning_rate": 3.779640810910878e-07, "loss": 23.4932, "step": 9501 }, { "epoch": 0.8818561484918793, "grad_norm": 50.3206672668457, "learning_rate": 3.7737899903071863e-07, "loss": 25.9709, "step": 9502 }, { "epoch": 0.8819489559164733, "grad_norm": 41.333248138427734, "learning_rate": 3.7679435240950304e-07, "loss": 23.7207, "step": 9503 }, { "epoch": 0.8820417633410673, "grad_norm": 55.72567367553711, "learning_rate": 3.762101412825098e-07, "loss": 22.9501, "step": 9504 }, { "epoch": 0.8821345707656613, "grad_norm": 44.16583251953125, "learning_rate": 3.7562636570477316e-07, "loss": 23.861, "step": 9505 }, { "epoch": 0.8822273781902552, "grad_norm": 53.770263671875, "learning_rate": 3.7504302573128205e-07, "loss": 24.0151, "step": 9506 }, { "epoch": 0.8823201856148492, "grad_norm": 50.026634216308594, "learning_rate": 3.7446012141698574e-07, "loss": 21.3525, "step": 9507 }, { "epoch": 0.8824129930394432, "grad_norm": 41.34040451049805, "learning_rate": 3.7387765281679145e-07, "loss": 21.57, "step": 9508 }, { "epoch": 0.8825058004640371, "grad_norm": 55.21821594238281, "learning_rate": 3.732956199855686e-07, "loss": 23.5446, "step": 9509 }, { "epoch": 0.8825986078886311, "grad_norm": 55.16337203979492, "learning_rate": 3.727140229781401e-07, "loss": 22.105, "step": 9510 }, { "epoch": 0.882691415313225, "grad_norm": 43.39909744262695, "learning_rate": 3.721328618492931e-07, "loss": 21.9342, "step": 9511 }, { "epoch": 0.882784222737819, "grad_norm": 54.975425720214844, "learning_rate": 3.715521366537711e-07, "loss": 22.8148, "step": 9512 }, { "epoch": 0.882877030162413, "grad_norm": 46.83665084838867, "learning_rate": 3.70971847446277e-07, "loss": 23.1279, "step": 9513 }, { "epoch": 0.882969837587007, "grad_norm": 49.74616622924805, "learning_rate": 3.7039199428147154e-07, "loss": 23.1514, "step": 9514 }, { "epoch": 0.8830626450116009, "grad_norm": 41.53280258178711, "learning_rate": 3.698125772139771e-07, "loss": 22.3348, "step": 9515 }, { "epoch": 0.8831554524361949, "grad_norm": 45.39799880981445, "learning_rate": 3.6923359629837117e-07, "loss": 23.0145, "step": 9516 }, { "epoch": 0.8832482598607888, "grad_norm": 47.77035903930664, "learning_rate": 3.6865505158919454e-07, "loss": 22.3712, "step": 9517 }, { "epoch": 0.8833410672853829, "grad_norm": 46.760955810546875, "learning_rate": 3.6807694314094366e-07, "loss": 22.7614, "step": 9518 }, { "epoch": 0.8834338747099768, "grad_norm": 45.1274528503418, "learning_rate": 3.6749927100807437e-07, "loss": 22.8345, "step": 9519 }, { "epoch": 0.8835266821345708, "grad_norm": 51.395423889160156, "learning_rate": 3.6692203524500256e-07, "loss": 23.1512, "step": 9520 }, { "epoch": 0.8836194895591647, "grad_norm": 49.41496276855469, "learning_rate": 3.663452359061015e-07, "loss": 20.4031, "step": 9521 }, { "epoch": 0.8837122969837587, "grad_norm": 51.08439636230469, "learning_rate": 3.657688730457054e-07, "loss": 22.3119, "step": 9522 }, { "epoch": 0.8838051044083527, "grad_norm": 53.14482879638672, "learning_rate": 3.651929467181059e-07, "loss": 22.1738, "step": 9523 }, { "epoch": 0.8838979118329466, "grad_norm": 53.39820861816406, "learning_rate": 3.6461745697755345e-07, "loss": 22.8108, "step": 9524 }, { "epoch": 0.8839907192575406, "grad_norm": 44.71888732910156, "learning_rate": 3.6404240387825697e-07, "loss": 22.4173, "step": 9525 }, { "epoch": 0.8840835266821345, "grad_norm": 43.534183502197266, "learning_rate": 3.6346778747438803e-07, "loss": 22.954, "step": 9526 }, { "epoch": 0.8841763341067286, "grad_norm": 62.08293533325195, "learning_rate": 3.6289360782006956e-07, "loss": 21.2831, "step": 9527 }, { "epoch": 0.8842691415313225, "grad_norm": 57.09722137451172, "learning_rate": 3.6231986496939153e-07, "loss": 23.8842, "step": 9528 }, { "epoch": 0.8843619489559165, "grad_norm": 43.54206085205078, "learning_rate": 3.6174655897639746e-07, "loss": 22.3074, "step": 9529 }, { "epoch": 0.8844547563805104, "grad_norm": 49.44265365600586, "learning_rate": 3.6117368989509125e-07, "loss": 22.4441, "step": 9530 }, { "epoch": 0.8845475638051044, "grad_norm": 46.9544677734375, "learning_rate": 3.606012577794354e-07, "loss": 22.5458, "step": 9531 }, { "epoch": 0.8846403712296984, "grad_norm": 50.81195831298828, "learning_rate": 3.6002926268335405e-07, "loss": 22.8514, "step": 9532 }, { "epoch": 0.8847331786542924, "grad_norm": 53.81944274902344, "learning_rate": 3.594577046607239e-07, "loss": 22.9167, "step": 9533 }, { "epoch": 0.8848259860788863, "grad_norm": 52.395023345947266, "learning_rate": 3.5888658376538654e-07, "loss": 22.166, "step": 9534 }, { "epoch": 0.8849187935034802, "grad_norm": 57.16504669189453, "learning_rate": 3.5831590005114046e-07, "loss": 23.5918, "step": 9535 }, { "epoch": 0.8850116009280743, "grad_norm": 67.39329528808594, "learning_rate": 3.577456535717416e-07, "loss": 23.3639, "step": 9536 }, { "epoch": 0.8851044083526682, "grad_norm": 49.72423553466797, "learning_rate": 3.571758443809048e-07, "loss": 22.1904, "step": 9537 }, { "epoch": 0.8851972157772622, "grad_norm": 57.77580642700195, "learning_rate": 3.566064725323082e-07, "loss": 22.7215, "step": 9538 }, { "epoch": 0.8852900232018561, "grad_norm": 62.655941009521484, "learning_rate": 3.560375380795805e-07, "loss": 21.9673, "step": 9539 }, { "epoch": 0.8853828306264501, "grad_norm": 48.10779571533203, "learning_rate": 3.554690410763173e-07, "loss": 22.8782, "step": 9540 }, { "epoch": 0.8854756380510441, "grad_norm": 59.22517395019531, "learning_rate": 3.54900981576069e-07, "loss": 23.3904, "step": 9541 }, { "epoch": 0.8855684454756381, "grad_norm": 44.39751434326172, "learning_rate": 3.5433335963234396e-07, "loss": 22.692, "step": 9542 }, { "epoch": 0.885661252900232, "grad_norm": 44.55557632446289, "learning_rate": 3.53766175298611e-07, "loss": 23.9512, "step": 9543 }, { "epoch": 0.885754060324826, "grad_norm": 59.384971618652344, "learning_rate": 3.5319942862830024e-07, "loss": 22.6916, "step": 9544 }, { "epoch": 0.88584686774942, "grad_norm": 49.493473052978516, "learning_rate": 3.526331196747934e-07, "loss": 20.136, "step": 9545 }, { "epoch": 0.885939675174014, "grad_norm": 48.06697082519531, "learning_rate": 3.520672484914384e-07, "loss": 22.0006, "step": 9546 }, { "epoch": 0.8860324825986079, "grad_norm": 46.8712272644043, "learning_rate": 3.5150181513153803e-07, "loss": 22.1933, "step": 9547 }, { "epoch": 0.8861252900232018, "grad_norm": 46.35744857788086, "learning_rate": 3.509368196483548e-07, "loss": 21.21, "step": 9548 }, { "epoch": 0.8862180974477958, "grad_norm": 58.32688903808594, "learning_rate": 3.5037226209510886e-07, "loss": 22.8672, "step": 9549 }, { "epoch": 0.8863109048723898, "grad_norm": 64.02617645263672, "learning_rate": 3.498081425249827e-07, "loss": 21.1718, "step": 9550 }, { "epoch": 0.8864037122969838, "grad_norm": 55.06731033325195, "learning_rate": 3.492444609911111e-07, "loss": 21.8444, "step": 9551 }, { "epoch": 0.8864965197215777, "grad_norm": 46.20305633544922, "learning_rate": 3.4868121754659533e-07, "loss": 22.4505, "step": 9552 }, { "epoch": 0.8865893271461717, "grad_norm": 55.303951263427734, "learning_rate": 3.4811841224448805e-07, "loss": 21.8547, "step": 9553 }, { "epoch": 0.8866821345707656, "grad_norm": 66.58935546875, "learning_rate": 3.4755604513780683e-07, "loss": 23.5344, "step": 9554 }, { "epoch": 0.8867749419953597, "grad_norm": 46.10564041137695, "learning_rate": 3.4699411627952375e-07, "loss": 23.8575, "step": 9555 }, { "epoch": 0.8868677494199536, "grad_norm": 46.768314361572266, "learning_rate": 3.464326257225714e-07, "loss": 21.2051, "step": 9556 }, { "epoch": 0.8869605568445476, "grad_norm": 43.29874038696289, "learning_rate": 3.458715735198398e-07, "loss": 25.9258, "step": 9557 }, { "epoch": 0.8870533642691415, "grad_norm": 51.29227066040039, "learning_rate": 3.4531095972418103e-07, "loss": 21.15, "step": 9558 }, { "epoch": 0.8871461716937356, "grad_norm": 44.58399200439453, "learning_rate": 3.4475078438840124e-07, "loss": 23.3297, "step": 9559 }, { "epoch": 0.8872389791183295, "grad_norm": 43.3798713684082, "learning_rate": 3.4419104756526934e-07, "loss": 23.2758, "step": 9560 }, { "epoch": 0.8873317865429234, "grad_norm": 43.756866455078125, "learning_rate": 3.4363174930750976e-07, "loss": 21.831, "step": 9561 }, { "epoch": 0.8874245939675174, "grad_norm": 39.54043197631836, "learning_rate": 3.4307288966780703e-07, "loss": 21.31, "step": 9562 }, { "epoch": 0.8875174013921113, "grad_norm": 53.41866683959961, "learning_rate": 3.425144686988041e-07, "loss": 23.2365, "step": 9563 }, { "epoch": 0.8876102088167054, "grad_norm": 46.807533264160156, "learning_rate": 3.4195648645310443e-07, "loss": 22.8501, "step": 9564 }, { "epoch": 0.8877030162412993, "grad_norm": 47.556766510009766, "learning_rate": 3.4139894298326703e-07, "loss": 22.0023, "step": 9565 }, { "epoch": 0.8877958236658933, "grad_norm": 45.917564392089844, "learning_rate": 3.408418383418116e-07, "loss": 21.7662, "step": 9566 }, { "epoch": 0.8878886310904872, "grad_norm": 40.18209457397461, "learning_rate": 3.4028517258121563e-07, "loss": 22.0632, "step": 9567 }, { "epoch": 0.8879814385150812, "grad_norm": 44.628089904785156, "learning_rate": 3.3972894575391546e-07, "loss": 23.5695, "step": 9568 }, { "epoch": 0.8880742459396752, "grad_norm": 55.630836486816406, "learning_rate": 3.3917315791230753e-07, "loss": 22.2981, "step": 9569 }, { "epoch": 0.8881670533642692, "grad_norm": 54.61568832397461, "learning_rate": 3.386178091087444e-07, "loss": 22.0248, "step": 9570 }, { "epoch": 0.8882598607888631, "grad_norm": 55.06208419799805, "learning_rate": 3.380628993955387e-07, "loss": 20.8258, "step": 9571 }, { "epoch": 0.888352668213457, "grad_norm": 48.73500442504883, "learning_rate": 3.3750842882496136e-07, "loss": 23.0797, "step": 9572 }, { "epoch": 0.8884454756380511, "grad_norm": 48.560821533203125, "learning_rate": 3.3695439744924396e-07, "loss": 22.4312, "step": 9573 }, { "epoch": 0.888538283062645, "grad_norm": 46.240501403808594, "learning_rate": 3.3640080532057193e-07, "loss": 22.6558, "step": 9574 }, { "epoch": 0.888631090487239, "grad_norm": 44.83445739746094, "learning_rate": 3.358476524910942e-07, "loss": 22.6695, "step": 9575 }, { "epoch": 0.8887238979118329, "grad_norm": 45.23957061767578, "learning_rate": 3.3529493901291567e-07, "loss": 21.8028, "step": 9576 }, { "epoch": 0.8888167053364269, "grad_norm": 64.81147003173828, "learning_rate": 3.347426649381008e-07, "loss": 21.1042, "step": 9577 }, { "epoch": 0.8889095127610209, "grad_norm": 67.21035766601562, "learning_rate": 3.3419083031867137e-07, "loss": 23.4355, "step": 9578 }, { "epoch": 0.8890023201856149, "grad_norm": 43.40477752685547, "learning_rate": 3.336394352066114e-07, "loss": 24.4475, "step": 9579 }, { "epoch": 0.8890951276102088, "grad_norm": 41.92205047607422, "learning_rate": 3.3308847965385746e-07, "loss": 22.1315, "step": 9580 }, { "epoch": 0.8891879350348028, "grad_norm": 58.54566955566406, "learning_rate": 3.325379637123105e-07, "loss": 22.0899, "step": 9581 }, { "epoch": 0.8892807424593967, "grad_norm": 46.3424186706543, "learning_rate": 3.3198788743382784e-07, "loss": 23.0128, "step": 9582 }, { "epoch": 0.8893735498839908, "grad_norm": 72.11888122558594, "learning_rate": 3.314382508702241e-07, "loss": 25.5776, "step": 9583 }, { "epoch": 0.8894663573085847, "grad_norm": 43.27743911743164, "learning_rate": 3.308890540732734e-07, "loss": 22.7108, "step": 9584 }, { "epoch": 0.8895591647331786, "grad_norm": 48.29619598388672, "learning_rate": 3.303402970947106e-07, "loss": 23.2541, "step": 9585 }, { "epoch": 0.8896519721577726, "grad_norm": 60.22664260864258, "learning_rate": 3.297919799862248e-07, "loss": 23.2297, "step": 9586 }, { "epoch": 0.8897447795823666, "grad_norm": 48.851409912109375, "learning_rate": 3.2924410279946914e-07, "loss": 22.4319, "step": 9587 }, { "epoch": 0.8898375870069606, "grad_norm": 44.43635177612305, "learning_rate": 3.286966655860485e-07, "loss": 22.558, "step": 9588 }, { "epoch": 0.8899303944315545, "grad_norm": 50.58085632324219, "learning_rate": 3.2814966839753273e-07, "loss": 20.8663, "step": 9589 }, { "epoch": 0.8900232018561485, "grad_norm": 46.891971588134766, "learning_rate": 3.276031112854466e-07, "loss": 22.8696, "step": 9590 }, { "epoch": 0.8901160092807424, "grad_norm": 43.783485412597656, "learning_rate": 3.2705699430127524e-07, "loss": 21.9876, "step": 9591 }, { "epoch": 0.8902088167053365, "grad_norm": 52.321388244628906, "learning_rate": 3.265113174964596e-07, "loss": 23.1453, "step": 9592 }, { "epoch": 0.8903016241299304, "grad_norm": 49.87046813964844, "learning_rate": 3.2596608092240413e-07, "loss": 22.4733, "step": 9593 }, { "epoch": 0.8903944315545244, "grad_norm": 58.98543930053711, "learning_rate": 3.2542128463046495e-07, "loss": 21.7069, "step": 9594 }, { "epoch": 0.8904872389791183, "grad_norm": 66.6336898803711, "learning_rate": 3.2487692867196384e-07, "loss": 20.9052, "step": 9595 }, { "epoch": 0.8905800464037122, "grad_norm": 49.555599212646484, "learning_rate": 3.243330130981759e-07, "loss": 23.5039, "step": 9596 }, { "epoch": 0.8906728538283063, "grad_norm": 47.429054260253906, "learning_rate": 3.237895379603373e-07, "loss": 23.451, "step": 9597 }, { "epoch": 0.8907656612529002, "grad_norm": 47.021705627441406, "learning_rate": 3.23246503309641e-07, "loss": 21.0583, "step": 9598 }, { "epoch": 0.8908584686774942, "grad_norm": 50.274375915527344, "learning_rate": 3.227039091972417e-07, "loss": 22.3132, "step": 9599 }, { "epoch": 0.8909512761020881, "grad_norm": 51.6857795715332, "learning_rate": 3.2216175567424737e-07, "loss": 21.8847, "step": 9600 }, { "epoch": 0.8910440835266822, "grad_norm": 39.55244445800781, "learning_rate": 3.216200427917299e-07, "loss": 23.0891, "step": 9601 }, { "epoch": 0.8911368909512761, "grad_norm": 43.99661636352539, "learning_rate": 3.2107877060071624e-07, "loss": 21.6003, "step": 9602 }, { "epoch": 0.8912296983758701, "grad_norm": 41.66117858886719, "learning_rate": 3.205379391521934e-07, "loss": 23.6065, "step": 9603 }, { "epoch": 0.891322505800464, "grad_norm": 54.84547424316406, "learning_rate": 3.1999754849710517e-07, "loss": 23.5816, "step": 9604 }, { "epoch": 0.891415313225058, "grad_norm": 45.2086296081543, "learning_rate": 3.194575986863563e-07, "loss": 21.5439, "step": 9605 }, { "epoch": 0.891508120649652, "grad_norm": 42.81178665161133, "learning_rate": 3.189180897708083e-07, "loss": 22.8385, "step": 9606 }, { "epoch": 0.891600928074246, "grad_norm": 51.01221466064453, "learning_rate": 3.183790218012811e-07, "loss": 22.054, "step": 9607 }, { "epoch": 0.8916937354988399, "grad_norm": 51.408836364746094, "learning_rate": 3.178403948285541e-07, "loss": 23.4104, "step": 9608 }, { "epoch": 0.8917865429234338, "grad_norm": 47.73313903808594, "learning_rate": 3.173022089033645e-07, "loss": 23.2272, "step": 9609 }, { "epoch": 0.8918793503480278, "grad_norm": 47.171287536621094, "learning_rate": 3.167644640764073e-07, "loss": 21.4684, "step": 9610 }, { "epoch": 0.8919721577726218, "grad_norm": 44.066226959228516, "learning_rate": 3.1622716039833745e-07, "loss": 21.5368, "step": 9611 }, { "epoch": 0.8920649651972158, "grad_norm": 57.21722412109375, "learning_rate": 3.156902979197679e-07, "loss": 22.0237, "step": 9612 }, { "epoch": 0.8921577726218097, "grad_norm": 48.90345001220703, "learning_rate": 3.151538766912687e-07, "loss": 22.9495, "step": 9613 }, { "epoch": 0.8922505800464037, "grad_norm": 47.2138671875, "learning_rate": 3.1461789676337064e-07, "loss": 22.7184, "step": 9614 }, { "epoch": 0.8923433874709977, "grad_norm": 44.57136917114258, "learning_rate": 3.1408235818655985e-07, "loss": 23.413, "step": 9615 }, { "epoch": 0.8924361948955917, "grad_norm": 50.419559478759766, "learning_rate": 3.135472610112844e-07, "loss": 23.1593, "step": 9616 }, { "epoch": 0.8925290023201856, "grad_norm": 56.7991943359375, "learning_rate": 3.130126052879484e-07, "loss": 24.0073, "step": 9617 }, { "epoch": 0.8926218097447796, "grad_norm": 47.0989990234375, "learning_rate": 3.124783910669155e-07, "loss": 22.4861, "step": 9618 }, { "epoch": 0.8927146171693735, "grad_norm": 45.44621276855469, "learning_rate": 3.1194461839850586e-07, "loss": 24.782, "step": 9619 }, { "epoch": 0.8928074245939676, "grad_norm": 65.44982147216797, "learning_rate": 3.114112873330022e-07, "loss": 25.1447, "step": 9620 }, { "epoch": 0.8929002320185615, "grad_norm": 49.77355194091797, "learning_rate": 3.108783979206403e-07, "loss": 24.5536, "step": 9621 }, { "epoch": 0.8929930394431554, "grad_norm": 46.264862060546875, "learning_rate": 3.1034595021161905e-07, "loss": 22.6489, "step": 9622 }, { "epoch": 0.8930858468677494, "grad_norm": 51.150665283203125, "learning_rate": 3.098139442560916e-07, "loss": 22.2396, "step": 9623 }, { "epoch": 0.8931786542923433, "grad_norm": 41.56504440307617, "learning_rate": 3.0928238010417275e-07, "loss": 20.6898, "step": 9624 }, { "epoch": 0.8932714617169374, "grad_norm": 48.41680145263672, "learning_rate": 3.0875125780593487e-07, "loss": 24.7428, "step": 9625 }, { "epoch": 0.8933642691415313, "grad_norm": 51.60981369018555, "learning_rate": 3.082205774114078e-07, "loss": 24.3597, "step": 9626 }, { "epoch": 0.8934570765661253, "grad_norm": 53.78084182739258, "learning_rate": 3.076903389705799e-07, "loss": 24.1103, "step": 9627 }, { "epoch": 0.8935498839907192, "grad_norm": 47.451690673828125, "learning_rate": 3.0716054253339953e-07, "loss": 21.8119, "step": 9628 }, { "epoch": 0.8936426914153133, "grad_norm": 47.965248107910156, "learning_rate": 3.066311881497702e-07, "loss": 21.3878, "step": 9629 }, { "epoch": 0.8937354988399072, "grad_norm": 41.94217300415039, "learning_rate": 3.0610227586955753e-07, "loss": 21.5702, "step": 9630 }, { "epoch": 0.8938283062645012, "grad_norm": 47.82911682128906, "learning_rate": 3.0557380574258334e-07, "loss": 23.6232, "step": 9631 }, { "epoch": 0.8939211136890951, "grad_norm": 53.626983642578125, "learning_rate": 3.050457778186278e-07, "loss": 23.8081, "step": 9632 }, { "epoch": 0.894013921113689, "grad_norm": 43.503814697265625, "learning_rate": 3.045181921474288e-07, "loss": 21.9123, "step": 9633 }, { "epoch": 0.8941067285382831, "grad_norm": 46.38570022583008, "learning_rate": 3.0399104877868623e-07, "loss": 24.7665, "step": 9634 }, { "epoch": 0.894199535962877, "grad_norm": 52.97435760498047, "learning_rate": 3.0346434776205303e-07, "loss": 22.4469, "step": 9635 }, { "epoch": 0.894292343387471, "grad_norm": 42.067562103271484, "learning_rate": 3.029380891471445e-07, "loss": 22.8115, "step": 9636 }, { "epoch": 0.8943851508120649, "grad_norm": 46.64530563354492, "learning_rate": 3.0241227298353273e-07, "loss": 22.7818, "step": 9637 }, { "epoch": 0.894477958236659, "grad_norm": 42.6097297668457, "learning_rate": 3.01886899320748e-07, "loss": 21.2531, "step": 9638 }, { "epoch": 0.8945707656612529, "grad_norm": 50.173004150390625, "learning_rate": 3.0136196820827856e-07, "loss": 23.7197, "step": 9639 }, { "epoch": 0.8946635730858469, "grad_norm": 46.910282135009766, "learning_rate": 3.0083747969557377e-07, "loss": 23.9089, "step": 9640 }, { "epoch": 0.8947563805104408, "grad_norm": 44.31652069091797, "learning_rate": 3.0031343383203626e-07, "loss": 24.2644, "step": 9641 }, { "epoch": 0.8948491879350348, "grad_norm": 43.9557991027832, "learning_rate": 2.997898306670322e-07, "loss": 24.5172, "step": 9642 }, { "epoch": 0.8949419953596288, "grad_norm": 53.55644989013672, "learning_rate": 2.9926667024988197e-07, "loss": 23.5677, "step": 9643 }, { "epoch": 0.8950348027842228, "grad_norm": 47.5320930480957, "learning_rate": 2.987439526298669e-07, "loss": 22.7112, "step": 9644 }, { "epoch": 0.8951276102088167, "grad_norm": 49.91411590576172, "learning_rate": 2.982216778562247e-07, "loss": 20.8095, "step": 9645 }, { "epoch": 0.8952204176334106, "grad_norm": 46.923423767089844, "learning_rate": 2.976998459781544e-07, "loss": 22.733, "step": 9646 }, { "epoch": 0.8953132250580046, "grad_norm": 56.957984924316406, "learning_rate": 2.971784570448083e-07, "loss": 23.5091, "step": 9647 }, { "epoch": 0.8954060324825986, "grad_norm": 45.43132019042969, "learning_rate": 2.966575111053027e-07, "loss": 21.5037, "step": 9648 }, { "epoch": 0.8954988399071926, "grad_norm": 55.677101135253906, "learning_rate": 2.961370082087078e-07, "loss": 23.8218, "step": 9649 }, { "epoch": 0.8955916473317865, "grad_norm": 47.88837432861328, "learning_rate": 2.9561694840405387e-07, "loss": 23.6592, "step": 9650 }, { "epoch": 0.8956844547563805, "grad_norm": 39.50502014160156, "learning_rate": 2.9509733174032887e-07, "loss": 23.1868, "step": 9651 }, { "epoch": 0.8957772621809745, "grad_norm": 53.19398880004883, "learning_rate": 2.945781582664803e-07, "loss": 24.1233, "step": 9652 }, { "epoch": 0.8958700696055685, "grad_norm": 56.00448989868164, "learning_rate": 2.94059428031413e-07, "loss": 23.5149, "step": 9653 }, { "epoch": 0.8959628770301624, "grad_norm": 48.68688201904297, "learning_rate": 2.935411410839889e-07, "loss": 23.0111, "step": 9654 }, { "epoch": 0.8960556844547564, "grad_norm": 44.247867584228516, "learning_rate": 2.930232974730307e-07, "loss": 22.5968, "step": 9655 }, { "epoch": 0.8961484918793503, "grad_norm": 56.759647369384766, "learning_rate": 2.9250589724731717e-07, "loss": 21.5011, "step": 9656 }, { "epoch": 0.8962412993039444, "grad_norm": 61.78764724731445, "learning_rate": 2.919889404555859e-07, "loss": 23.0372, "step": 9657 }, { "epoch": 0.8963341067285383, "grad_norm": 52.13287353515625, "learning_rate": 2.9147242714653243e-07, "loss": 24.843, "step": 9658 }, { "epoch": 0.8964269141531322, "grad_norm": 53.629310607910156, "learning_rate": 2.9095635736881236e-07, "loss": 21.0867, "step": 9659 }, { "epoch": 0.8965197215777262, "grad_norm": 51.32081985473633, "learning_rate": 2.9044073117103777e-07, "loss": 23.8832, "step": 9660 }, { "epoch": 0.8966125290023201, "grad_norm": 55.225669860839844, "learning_rate": 2.899255486017794e-07, "loss": 22.9101, "step": 9661 }, { "epoch": 0.8967053364269142, "grad_norm": 50.163455963134766, "learning_rate": 2.894108097095644e-07, "loss": 22.5592, "step": 9662 }, { "epoch": 0.8967981438515081, "grad_norm": 55.47395324707031, "learning_rate": 2.888965145428835e-07, "loss": 22.917, "step": 9663 }, { "epoch": 0.8968909512761021, "grad_norm": 49.37908935546875, "learning_rate": 2.8838266315017795e-07, "loss": 22.0304, "step": 9664 }, { "epoch": 0.896983758700696, "grad_norm": 50.422401428222656, "learning_rate": 2.8786925557985403e-07, "loss": 21.497, "step": 9665 }, { "epoch": 0.8970765661252901, "grad_norm": 54.652706146240234, "learning_rate": 2.8735629188027247e-07, "loss": 23.6894, "step": 9666 }, { "epoch": 0.897169373549884, "grad_norm": 44.80002975463867, "learning_rate": 2.86843772099753e-07, "loss": 22.9688, "step": 9667 }, { "epoch": 0.897262180974478, "grad_norm": 52.17966079711914, "learning_rate": 2.8633169628657307e-07, "loss": 22.3233, "step": 9668 }, { "epoch": 0.8973549883990719, "grad_norm": 55.45677947998047, "learning_rate": 2.858200644889714e-07, "loss": 22.456, "step": 9669 }, { "epoch": 0.8974477958236659, "grad_norm": 57.656532287597656, "learning_rate": 2.853088767551393e-07, "loss": 23.4355, "step": 9670 }, { "epoch": 0.8975406032482599, "grad_norm": 41.99543762207031, "learning_rate": 2.8479813313323115e-07, "loss": 22.2675, "step": 9671 }, { "epoch": 0.8976334106728538, "grad_norm": 51.52960968017578, "learning_rate": 2.842878336713578e-07, "loss": 22.2469, "step": 9672 }, { "epoch": 0.8977262180974478, "grad_norm": 61.88704299926758, "learning_rate": 2.837779784175876e-07, "loss": 24.8564, "step": 9673 }, { "epoch": 0.8978190255220417, "grad_norm": 60.701595306396484, "learning_rate": 2.8326856741994645e-07, "loss": 22.9621, "step": 9674 }, { "epoch": 0.8979118329466357, "grad_norm": 45.536529541015625, "learning_rate": 2.827596007264227e-07, "loss": 21.3532, "step": 9675 }, { "epoch": 0.8980046403712297, "grad_norm": 45.44068908691406, "learning_rate": 2.822510783849564e-07, "loss": 22.834, "step": 9676 }, { "epoch": 0.8980974477958237, "grad_norm": 44.45988845825195, "learning_rate": 2.8174300044345125e-07, "loss": 22.4307, "step": 9677 }, { "epoch": 0.8981902552204176, "grad_norm": 52.17845916748047, "learning_rate": 2.8123536694976636e-07, "loss": 24.4228, "step": 9678 }, { "epoch": 0.8982830626450116, "grad_norm": 47.314979553222656, "learning_rate": 2.80728177951719e-07, "loss": 23.5393, "step": 9679 }, { "epoch": 0.8983758700696056, "grad_norm": 49.08024597167969, "learning_rate": 2.8022143349708485e-07, "loss": 23.7752, "step": 9680 }, { "epoch": 0.8984686774941996, "grad_norm": 49.61980056762695, "learning_rate": 2.797151336335996e-07, "loss": 22.6592, "step": 9681 }, { "epoch": 0.8985614849187935, "grad_norm": 47.86085510253906, "learning_rate": 2.792092784089534e-07, "loss": 23.5609, "step": 9682 }, { "epoch": 0.8986542923433875, "grad_norm": 52.341182708740234, "learning_rate": 2.787038678707976e-07, "loss": 24.8705, "step": 9683 }, { "epoch": 0.8987470997679814, "grad_norm": 50.02412414550781, "learning_rate": 2.7819890206674083e-07, "loss": 21.5404, "step": 9684 }, { "epoch": 0.8988399071925754, "grad_norm": 43.353885650634766, "learning_rate": 2.7769438104434943e-07, "loss": 20.9852, "step": 9685 }, { "epoch": 0.8989327146171694, "grad_norm": 45.55221176147461, "learning_rate": 2.771903048511465e-07, "loss": 23.614, "step": 9686 }, { "epoch": 0.8990255220417633, "grad_norm": 55.41497802734375, "learning_rate": 2.766866735346174e-07, "loss": 23.5633, "step": 9687 }, { "epoch": 0.8991183294663573, "grad_norm": 52.65095520019531, "learning_rate": 2.7618348714220034e-07, "loss": 21.4412, "step": 9688 }, { "epoch": 0.8992111368909512, "grad_norm": 45.8255729675293, "learning_rate": 2.756807457212962e-07, "loss": 20.7043, "step": 9689 }, { "epoch": 0.8993039443155453, "grad_norm": 60.21347427368164, "learning_rate": 2.7517844931926106e-07, "loss": 24.5548, "step": 9690 }, { "epoch": 0.8993967517401392, "grad_norm": 51.71105194091797, "learning_rate": 2.746765979834098e-07, "loss": 22.7677, "step": 9691 }, { "epoch": 0.8994895591647332, "grad_norm": 41.243812561035156, "learning_rate": 2.741751917610158e-07, "loss": 22.7676, "step": 9692 }, { "epoch": 0.8995823665893271, "grad_norm": 49.908939361572266, "learning_rate": 2.736742306993101e-07, "loss": 22.4257, "step": 9693 }, { "epoch": 0.8996751740139212, "grad_norm": 48.68513870239258, "learning_rate": 2.731737148454816e-07, "loss": 22.8801, "step": 9694 }, { "epoch": 0.8997679814385151, "grad_norm": 41.824771881103516, "learning_rate": 2.7267364424667876e-07, "loss": 21.7429, "step": 9695 }, { "epoch": 0.899860788863109, "grad_norm": 46.652671813964844, "learning_rate": 2.7217401895000664e-07, "loss": 23.248, "step": 9696 }, { "epoch": 0.899953596287703, "grad_norm": 48.233543395996094, "learning_rate": 2.716748390025276e-07, "loss": 22.6175, "step": 9697 }, { "epoch": 0.9000464037122969, "grad_norm": 44.69336700439453, "learning_rate": 2.71176104451264e-07, "loss": 22.78, "step": 9698 }, { "epoch": 0.900139211136891, "grad_norm": 55.76747512817383, "learning_rate": 2.7067781534319493e-07, "loss": 23.208, "step": 9699 }, { "epoch": 0.9002320185614849, "grad_norm": 49.317237854003906, "learning_rate": 2.7017997172525846e-07, "loss": 22.5141, "step": 9700 }, { "epoch": 0.9003248259860789, "grad_norm": 50.525394439697266, "learning_rate": 2.6968257364435033e-07, "loss": 22.3845, "step": 9701 }, { "epoch": 0.9004176334106728, "grad_norm": 49.03762435913086, "learning_rate": 2.6918562114732374e-07, "loss": 22.6823, "step": 9702 }, { "epoch": 0.9005104408352668, "grad_norm": 49.34785079956055, "learning_rate": 2.6868911428099055e-07, "loss": 23.5348, "step": 9703 }, { "epoch": 0.9006032482598608, "grad_norm": 58.04082489013672, "learning_rate": 2.6819305309212005e-07, "loss": 23.055, "step": 9704 }, { "epoch": 0.9006960556844548, "grad_norm": 45.53787612915039, "learning_rate": 2.6769743762743994e-07, "loss": 21.7301, "step": 9705 }, { "epoch": 0.9007888631090487, "grad_norm": 60.829383850097656, "learning_rate": 2.672022679336367e-07, "loss": 20.7139, "step": 9706 }, { "epoch": 0.9008816705336427, "grad_norm": 46.55321502685547, "learning_rate": 2.667075440573536e-07, "loss": 22.6066, "step": 9707 }, { "epoch": 0.9009744779582367, "grad_norm": 46.19241714477539, "learning_rate": 2.6621326604519216e-07, "loss": 22.4579, "step": 9708 }, { "epoch": 0.9010672853828307, "grad_norm": 52.410762786865234, "learning_rate": 2.6571943394371136e-07, "loss": 23.8132, "step": 9709 }, { "epoch": 0.9011600928074246, "grad_norm": 43.827518463134766, "learning_rate": 2.652260477994317e-07, "loss": 23.0774, "step": 9710 }, { "epoch": 0.9012529002320185, "grad_norm": 54.16154098510742, "learning_rate": 2.647331076588255e-07, "loss": 22.8593, "step": 9711 }, { "epoch": 0.9013457076566125, "grad_norm": 52.06182861328125, "learning_rate": 2.64240613568329e-07, "loss": 25.4922, "step": 9712 }, { "epoch": 0.9014385150812065, "grad_norm": 44.22379684448242, "learning_rate": 2.637485655743327e-07, "loss": 22.7447, "step": 9713 }, { "epoch": 0.9015313225058005, "grad_norm": 45.14878463745117, "learning_rate": 2.6325696372318687e-07, "loss": 24.7759, "step": 9714 }, { "epoch": 0.9016241299303944, "grad_norm": 47.83121871948242, "learning_rate": 2.6276580806119776e-07, "loss": 21.4908, "step": 9715 }, { "epoch": 0.9017169373549884, "grad_norm": 51.690208435058594, "learning_rate": 2.622750986346334e-07, "loss": 24.0838, "step": 9716 }, { "epoch": 0.9018097447795823, "grad_norm": 45.78809356689453, "learning_rate": 2.6178483548971456e-07, "loss": 22.1501, "step": 9717 }, { "epoch": 0.9019025522041764, "grad_norm": 46.5643310546875, "learning_rate": 2.6129501867262486e-07, "loss": 22.0653, "step": 9718 }, { "epoch": 0.9019953596287703, "grad_norm": 48.98332214355469, "learning_rate": 2.608056482295035e-07, "loss": 23.3733, "step": 9719 }, { "epoch": 0.9020881670533643, "grad_norm": 48.992252349853516, "learning_rate": 2.6031672420644694e-07, "loss": 22.5678, "step": 9720 }, { "epoch": 0.9021809744779582, "grad_norm": 45.82499313354492, "learning_rate": 2.598282466495111e-07, "loss": 22.2937, "step": 9721 }, { "epoch": 0.9022737819025523, "grad_norm": 50.90786361694336, "learning_rate": 2.593402156047109e-07, "loss": 23.4195, "step": 9722 }, { "epoch": 0.9023665893271462, "grad_norm": 44.11929702758789, "learning_rate": 2.588526311180145e-07, "loss": 21.6601, "step": 9723 }, { "epoch": 0.9024593967517401, "grad_norm": 67.65169525146484, "learning_rate": 2.583654932353535e-07, "loss": 22.1774, "step": 9724 }, { "epoch": 0.9025522041763341, "grad_norm": 53.16868209838867, "learning_rate": 2.578788020026141e-07, "loss": 22.4215, "step": 9725 }, { "epoch": 0.902645011600928, "grad_norm": 50.512107849121094, "learning_rate": 2.573925574656422e-07, "loss": 22.3586, "step": 9726 }, { "epoch": 0.9027378190255221, "grad_norm": 57.19210433959961, "learning_rate": 2.5690675967023905e-07, "loss": 21.1398, "step": 9727 }, { "epoch": 0.902830626450116, "grad_norm": 58.054664611816406, "learning_rate": 2.5642140866216805e-07, "loss": 22.4736, "step": 9728 }, { "epoch": 0.90292343387471, "grad_norm": 44.03322982788086, "learning_rate": 2.5593650448714593e-07, "loss": 21.8879, "step": 9729 }, { "epoch": 0.9030162412993039, "grad_norm": 46.326595306396484, "learning_rate": 2.554520471908511e-07, "loss": 22.6327, "step": 9730 }, { "epoch": 0.903109048723898, "grad_norm": 46.29991149902344, "learning_rate": 2.5496803681891615e-07, "loss": 21.8666, "step": 9731 }, { "epoch": 0.9032018561484919, "grad_norm": 46.78775405883789, "learning_rate": 2.5448447341693493e-07, "loss": 22.8189, "step": 9732 }, { "epoch": 0.9032946635730859, "grad_norm": 52.76607131958008, "learning_rate": 2.540013570304584e-07, "loss": 22.441, "step": 9733 }, { "epoch": 0.9033874709976798, "grad_norm": 56.750614166259766, "learning_rate": 2.535186877049939e-07, "loss": 22.2377, "step": 9734 }, { "epoch": 0.9034802784222737, "grad_norm": 44.517024993896484, "learning_rate": 2.530364654860079e-07, "loss": 22.0191, "step": 9735 }, { "epoch": 0.9035730858468678, "grad_norm": 42.93142318725586, "learning_rate": 2.5255469041892464e-07, "loss": 22.4234, "step": 9736 }, { "epoch": 0.9036658932714617, "grad_norm": 51.686561584472656, "learning_rate": 2.520733625491262e-07, "loss": 23.0464, "step": 9737 }, { "epoch": 0.9037587006960557, "grad_norm": 41.094356536865234, "learning_rate": 2.5159248192195284e-07, "loss": 24.5544, "step": 9738 }, { "epoch": 0.9038515081206496, "grad_norm": 45.22549057006836, "learning_rate": 2.511120485827012e-07, "loss": 21.219, "step": 9739 }, { "epoch": 0.9039443155452436, "grad_norm": 48.648529052734375, "learning_rate": 2.506320625766273e-07, "loss": 23.1183, "step": 9740 }, { "epoch": 0.9040371229698376, "grad_norm": 55.93132019042969, "learning_rate": 2.501525239489444e-07, "loss": 23.4034, "step": 9741 }, { "epoch": 0.9041299303944316, "grad_norm": 45.953643798828125, "learning_rate": 2.4967343274482523e-07, "loss": 22.3051, "step": 9742 }, { "epoch": 0.9042227378190255, "grad_norm": 50.428951263427734, "learning_rate": 2.4919478900939765e-07, "loss": 23.5392, "step": 9743 }, { "epoch": 0.9043155452436195, "grad_norm": 49.21091079711914, "learning_rate": 2.4871659278774884e-07, "loss": 21.4502, "step": 9744 }, { "epoch": 0.9044083526682135, "grad_norm": 44.502281188964844, "learning_rate": 2.482388441249234e-07, "loss": 22.1694, "step": 9745 }, { "epoch": 0.9045011600928075, "grad_norm": 50.68897247314453, "learning_rate": 2.477615430659241e-07, "loss": 23.0439, "step": 9746 }, { "epoch": 0.9045939675174014, "grad_norm": 51.054683685302734, "learning_rate": 2.472846896557124e-07, "loss": 22.004, "step": 9747 }, { "epoch": 0.9046867749419953, "grad_norm": 42.728755950927734, "learning_rate": 2.468082839392061e-07, "loss": 22.1293, "step": 9748 }, { "epoch": 0.9047795823665893, "grad_norm": 46.502532958984375, "learning_rate": 2.463323259612815e-07, "loss": 21.346, "step": 9749 }, { "epoch": 0.9048723897911833, "grad_norm": 51.66471862792969, "learning_rate": 2.458568157667729e-07, "loss": 24.1154, "step": 9750 }, { "epoch": 0.9049651972157773, "grad_norm": 44.00425720214844, "learning_rate": 2.45381753400471e-07, "loss": 22.7301, "step": 9751 }, { "epoch": 0.9050580046403712, "grad_norm": 48.4002571105957, "learning_rate": 2.4490713890712616e-07, "loss": 21.7397, "step": 9752 }, { "epoch": 0.9051508120649652, "grad_norm": 47.051116943359375, "learning_rate": 2.444329723314459e-07, "loss": 23.6452, "step": 9753 }, { "epoch": 0.9052436194895591, "grad_norm": 137.50958251953125, "learning_rate": 2.439592537180963e-07, "loss": 22.6358, "step": 9754 }, { "epoch": 0.9053364269141532, "grad_norm": 54.57889175415039, "learning_rate": 2.4348598311169925e-07, "loss": 22.8713, "step": 9755 }, { "epoch": 0.9054292343387471, "grad_norm": 60.250457763671875, "learning_rate": 2.430131605568353e-07, "loss": 23.1451, "step": 9756 }, { "epoch": 0.9055220417633411, "grad_norm": 43.239295959472656, "learning_rate": 2.4254078609804554e-07, "loss": 22.459, "step": 9757 }, { "epoch": 0.905614849187935, "grad_norm": 41.645355224609375, "learning_rate": 2.4206885977982373e-07, "loss": 22.8523, "step": 9758 }, { "epoch": 0.905707656612529, "grad_norm": 47.22822570800781, "learning_rate": 2.415973816466255e-07, "loss": 23.9808, "step": 9759 }, { "epoch": 0.905800464037123, "grad_norm": 46.32785415649414, "learning_rate": 2.41126351742863e-07, "loss": 21.9207, "step": 9760 }, { "epoch": 0.9058932714617169, "grad_norm": 52.80024719238281, "learning_rate": 2.406557701129053e-07, "loss": 24.9553, "step": 9761 }, { "epoch": 0.9059860788863109, "grad_norm": 44.51327896118164, "learning_rate": 2.4018563680107964e-07, "loss": 23.0105, "step": 9762 }, { "epoch": 0.9060788863109048, "grad_norm": 43.95375061035156, "learning_rate": 2.397159518516734e-07, "loss": 22.8179, "step": 9763 }, { "epoch": 0.9061716937354989, "grad_norm": 51.30051803588867, "learning_rate": 2.392467153089273e-07, "loss": 21.5433, "step": 9764 }, { "epoch": 0.9062645011600928, "grad_norm": 54.473289489746094, "learning_rate": 2.3877792721704486e-07, "loss": 21.418, "step": 9765 }, { "epoch": 0.9063573085846868, "grad_norm": 61.97482681274414, "learning_rate": 2.383095876201813e-07, "loss": 21.2862, "step": 9766 }, { "epoch": 0.9064501160092807, "grad_norm": 48.269168853759766, "learning_rate": 2.3784169656245582e-07, "loss": 23.368, "step": 9767 }, { "epoch": 0.9065429234338747, "grad_norm": 54.649147033691406, "learning_rate": 2.3737425408794202e-07, "loss": 21.0611, "step": 9768 }, { "epoch": 0.9066357308584687, "grad_norm": 52.485782623291016, "learning_rate": 2.369072602406708e-07, "loss": 22.7855, "step": 9769 }, { "epoch": 0.9067285382830627, "grad_norm": 54.14475631713867, "learning_rate": 2.364407150646325e-07, "loss": 21.7289, "step": 9770 }, { "epoch": 0.9068213457076566, "grad_norm": 51.281898498535156, "learning_rate": 2.3597461860377534e-07, "loss": 22.4678, "step": 9771 }, { "epoch": 0.9069141531322505, "grad_norm": 44.63700866699219, "learning_rate": 2.35508970902002e-07, "loss": 23.5842, "step": 9772 }, { "epoch": 0.9070069605568446, "grad_norm": 46.84646224975586, "learning_rate": 2.3504377200317785e-07, "loss": 23.676, "step": 9773 }, { "epoch": 0.9070997679814385, "grad_norm": 53.65129852294922, "learning_rate": 2.3457902195112236e-07, "loss": 22.6337, "step": 9774 }, { "epoch": 0.9071925754060325, "grad_norm": 42.050533294677734, "learning_rate": 2.3411472078961385e-07, "loss": 21.0147, "step": 9775 }, { "epoch": 0.9072853828306264, "grad_norm": 50.75333023071289, "learning_rate": 2.3365086856238785e-07, "loss": 23.6361, "step": 9776 }, { "epoch": 0.9073781902552204, "grad_norm": 46.17741394042969, "learning_rate": 2.331874653131394e-07, "loss": 21.462, "step": 9777 }, { "epoch": 0.9074709976798144, "grad_norm": 42.0527458190918, "learning_rate": 2.3272451108551807e-07, "loss": 22.3761, "step": 9778 }, { "epoch": 0.9075638051044084, "grad_norm": 51.543975830078125, "learning_rate": 2.3226200592313453e-07, "loss": 22.748, "step": 9779 }, { "epoch": 0.9076566125290023, "grad_norm": 47.13154220581055, "learning_rate": 2.31799949869555e-07, "loss": 22.4522, "step": 9780 }, { "epoch": 0.9077494199535963, "grad_norm": 47.16547393798828, "learning_rate": 2.313383429683036e-07, "loss": 22.4677, "step": 9781 }, { "epoch": 0.9078422273781902, "grad_norm": 50.97734069824219, "learning_rate": 2.3087718526286274e-07, "loss": 22.3767, "step": 9782 }, { "epoch": 0.9079350348027843, "grad_norm": 65.21635437011719, "learning_rate": 2.3041647679667323e-07, "loss": 23.3437, "step": 9783 }, { "epoch": 0.9080278422273782, "grad_norm": 43.281673431396484, "learning_rate": 2.2995621761313146e-07, "loss": 23.9569, "step": 9784 }, { "epoch": 0.9081206496519721, "grad_norm": 51.32562255859375, "learning_rate": 2.2949640775559333e-07, "loss": 24.3798, "step": 9785 }, { "epoch": 0.9082134570765661, "grad_norm": 50.92142105102539, "learning_rate": 2.290370472673714e-07, "loss": 21.8371, "step": 9786 }, { "epoch": 0.9083062645011601, "grad_norm": 64.75337982177734, "learning_rate": 2.2857813619173664e-07, "loss": 23.9778, "step": 9787 }, { "epoch": 0.9083990719257541, "grad_norm": 62.49623489379883, "learning_rate": 2.2811967457191608e-07, "loss": 21.5895, "step": 9788 }, { "epoch": 0.908491879350348, "grad_norm": 50.15470886230469, "learning_rate": 2.2766166245109744e-07, "loss": 21.8495, "step": 9789 }, { "epoch": 0.908584686774942, "grad_norm": 48.4677848815918, "learning_rate": 2.272040998724234e-07, "loss": 23.0416, "step": 9790 }, { "epoch": 0.9086774941995359, "grad_norm": 69.46810150146484, "learning_rate": 2.2674698687899564e-07, "loss": 24.4649, "step": 9791 }, { "epoch": 0.90877030162413, "grad_norm": 58.254329681396484, "learning_rate": 2.2629032351387247e-07, "loss": 21.8465, "step": 9792 }, { "epoch": 0.9088631090487239, "grad_norm": 55.24749755859375, "learning_rate": 2.258341098200695e-07, "loss": 22.7932, "step": 9793 }, { "epoch": 0.9089559164733179, "grad_norm": 52.161041259765625, "learning_rate": 2.253783458405634e-07, "loss": 23.2047, "step": 9794 }, { "epoch": 0.9090487238979118, "grad_norm": 46.332881927490234, "learning_rate": 2.2492303161828433e-07, "loss": 22.0683, "step": 9795 }, { "epoch": 0.9091415313225057, "grad_norm": 57.44654083251953, "learning_rate": 2.2446816719612187e-07, "loss": 25.0343, "step": 9796 }, { "epoch": 0.9092343387470998, "grad_norm": 44.735595703125, "learning_rate": 2.2401375261692338e-07, "loss": 21.5015, "step": 9797 }, { "epoch": 0.9093271461716937, "grad_norm": 47.104408264160156, "learning_rate": 2.23559787923493e-07, "loss": 23.4465, "step": 9798 }, { "epoch": 0.9094199535962877, "grad_norm": 48.80741500854492, "learning_rate": 2.2310627315859313e-07, "loss": 22.5412, "step": 9799 }, { "epoch": 0.9095127610208816, "grad_norm": 46.87937545776367, "learning_rate": 2.226532083649452e-07, "loss": 23.6805, "step": 9800 }, { "epoch": 0.9096055684454757, "grad_norm": 42.97400665283203, "learning_rate": 2.222005935852245e-07, "loss": 23.1649, "step": 9801 }, { "epoch": 0.9096983758700696, "grad_norm": 57.866539001464844, "learning_rate": 2.2174842886206805e-07, "loss": 23.5452, "step": 9802 }, { "epoch": 0.9097911832946636, "grad_norm": 58.752323150634766, "learning_rate": 2.2129671423806787e-07, "loss": 23.2113, "step": 9803 }, { "epoch": 0.9098839907192575, "grad_norm": 47.243228912353516, "learning_rate": 2.2084544975577383e-07, "loss": 22.508, "step": 9804 }, { "epoch": 0.9099767981438515, "grad_norm": 54.71851348876953, "learning_rate": 2.2039463545769413e-07, "loss": 22.1783, "step": 9805 }, { "epoch": 0.9100696055684455, "grad_norm": 55.278690338134766, "learning_rate": 2.1994427138629592e-07, "loss": 23.1428, "step": 9806 }, { "epoch": 0.9101624129930395, "grad_norm": 52.69858932495117, "learning_rate": 2.1949435758399972e-07, "loss": 22.1354, "step": 9807 }, { "epoch": 0.9102552204176334, "grad_norm": 62.16366958618164, "learning_rate": 2.1904489409318773e-07, "loss": 22.5188, "step": 9808 }, { "epoch": 0.9103480278422273, "grad_norm": 59.792572021484375, "learning_rate": 2.1859588095619834e-07, "loss": 22.537, "step": 9809 }, { "epoch": 0.9104408352668213, "grad_norm": 64.54402923583984, "learning_rate": 2.1814731821532765e-07, "loss": 22.733, "step": 9810 }, { "epoch": 0.9105336426914153, "grad_norm": 55.27838897705078, "learning_rate": 2.1769920591282745e-07, "loss": 22.5413, "step": 9811 }, { "epoch": 0.9106264501160093, "grad_norm": 47.74138259887695, "learning_rate": 2.1725154409091176e-07, "loss": 22.3229, "step": 9812 }, { "epoch": 0.9107192575406032, "grad_norm": 52.83869934082031, "learning_rate": 2.1680433279174573e-07, "loss": 23.8816, "step": 9813 }, { "epoch": 0.9108120649651972, "grad_norm": 48.49026107788086, "learning_rate": 2.1635757205745845e-07, "loss": 22.7455, "step": 9814 }, { "epoch": 0.9109048723897912, "grad_norm": 50.137855529785156, "learning_rate": 2.1591126193013178e-07, "loss": 20.7991, "step": 9815 }, { "epoch": 0.9109976798143852, "grad_norm": 46.887699127197266, "learning_rate": 2.1546540245180825e-07, "loss": 22.47, "step": 9816 }, { "epoch": 0.9110904872389791, "grad_norm": 48.969852447509766, "learning_rate": 2.1501999366448533e-07, "loss": 22.2405, "step": 9817 }, { "epoch": 0.9111832946635731, "grad_norm": 48.18722915649414, "learning_rate": 2.1457503561012116e-07, "loss": 22.8146, "step": 9818 }, { "epoch": 0.911276102088167, "grad_norm": 51.692413330078125, "learning_rate": 2.1413052833062775e-07, "loss": 23.0585, "step": 9819 }, { "epoch": 0.9113689095127611, "grad_norm": 49.076255798339844, "learning_rate": 2.1368647186787773e-07, "loss": 21.1958, "step": 9820 }, { "epoch": 0.911461716937355, "grad_norm": 50.587730407714844, "learning_rate": 2.1324286626369982e-07, "loss": 22.6498, "step": 9821 }, { "epoch": 0.911554524361949, "grad_norm": 95.90464782714844, "learning_rate": 2.1279971155988066e-07, "loss": 23.1486, "step": 9822 }, { "epoch": 0.9116473317865429, "grad_norm": 59.90353012084961, "learning_rate": 2.1235700779816349e-07, "loss": 21.9541, "step": 9823 }, { "epoch": 0.911740139211137, "grad_norm": 57.57712936401367, "learning_rate": 2.1191475502025217e-07, "loss": 21.9203, "step": 9824 }, { "epoch": 0.9118329466357309, "grad_norm": 51.168087005615234, "learning_rate": 2.1147295326780283e-07, "loss": 20.6101, "step": 9825 }, { "epoch": 0.9119257540603248, "grad_norm": 53.80626678466797, "learning_rate": 2.1103160258243382e-07, "loss": 23.4338, "step": 9826 }, { "epoch": 0.9120185614849188, "grad_norm": 53.12994384765625, "learning_rate": 2.1059070300571916e-07, "loss": 23.0837, "step": 9827 }, { "epoch": 0.9121113689095127, "grad_norm": 57.70051574707031, "learning_rate": 2.1015025457919002e-07, "loss": 23.8783, "step": 9828 }, { "epoch": 0.9122041763341068, "grad_norm": 57.61319351196289, "learning_rate": 2.0971025734433493e-07, "loss": 24.7543, "step": 9829 }, { "epoch": 0.9122969837587007, "grad_norm": 48.84230422973633, "learning_rate": 2.0927071134260236e-07, "loss": 23.3655, "step": 9830 }, { "epoch": 0.9123897911832947, "grad_norm": 52.3580322265625, "learning_rate": 2.0883161661539475e-07, "loss": 23.5007, "step": 9831 }, { "epoch": 0.9124825986078886, "grad_norm": 50.15748596191406, "learning_rate": 2.0839297320407458e-07, "loss": 24.4931, "step": 9832 }, { "epoch": 0.9125754060324826, "grad_norm": 54.41055679321289, "learning_rate": 2.0795478114996047e-07, "loss": 21.9516, "step": 9833 }, { "epoch": 0.9126682134570766, "grad_norm": 51.24482345581055, "learning_rate": 2.075170404943294e-07, "loss": 20.778, "step": 9834 }, { "epoch": 0.9127610208816705, "grad_norm": 48.97748565673828, "learning_rate": 2.0707975127841507e-07, "loss": 23.5101, "step": 9835 }, { "epoch": 0.9128538283062645, "grad_norm": 51.743499755859375, "learning_rate": 2.0664291354340894e-07, "loss": 23.2853, "step": 9836 }, { "epoch": 0.9129466357308584, "grad_norm": 62.55534744262695, "learning_rate": 2.0620652733046032e-07, "loss": 23.7135, "step": 9837 }, { "epoch": 0.9130394431554525, "grad_norm": 49.328094482421875, "learning_rate": 2.0577059268067578e-07, "loss": 23.7973, "step": 9838 }, { "epoch": 0.9131322505800464, "grad_norm": 61.82990264892578, "learning_rate": 2.0533510963511915e-07, "loss": 23.9229, "step": 9839 }, { "epoch": 0.9132250580046404, "grad_norm": 48.31203842163086, "learning_rate": 2.0490007823481096e-07, "loss": 23.9795, "step": 9840 }, { "epoch": 0.9133178654292343, "grad_norm": 45.6464729309082, "learning_rate": 2.0446549852073228e-07, "loss": 22.302, "step": 9841 }, { "epoch": 0.9134106728538283, "grad_norm": 55.568233489990234, "learning_rate": 2.040313705338165e-07, "loss": 21.6975, "step": 9842 }, { "epoch": 0.9135034802784223, "grad_norm": 43.81700134277344, "learning_rate": 2.035976943149598e-07, "loss": 19.9302, "step": 9843 }, { "epoch": 0.9135962877030163, "grad_norm": 48.8061408996582, "learning_rate": 2.0316446990501226e-07, "loss": 23.0459, "step": 9844 }, { "epoch": 0.9136890951276102, "grad_norm": 64.72419738769531, "learning_rate": 2.0273169734478238e-07, "loss": 23.0794, "step": 9845 }, { "epoch": 0.9137819025522042, "grad_norm": 59.29604721069336, "learning_rate": 2.0229937667503641e-07, "loss": 24.7317, "step": 9846 }, { "epoch": 0.9138747099767981, "grad_norm": 55.03865432739258, "learning_rate": 2.01867507936499e-07, "loss": 20.7753, "step": 9847 }, { "epoch": 0.9139675174013921, "grad_norm": 61.72035217285156, "learning_rate": 2.0143609116984876e-07, "loss": 23.8608, "step": 9848 }, { "epoch": 0.9140603248259861, "grad_norm": 57.41250228881836, "learning_rate": 2.010051264157259e-07, "loss": 22.3134, "step": 9849 }, { "epoch": 0.91415313225058, "grad_norm": 59.86149215698242, "learning_rate": 2.0057461371472575e-07, "loss": 21.8574, "step": 9850 }, { "epoch": 0.914245939675174, "grad_norm": 93.6924819946289, "learning_rate": 2.00144553107402e-07, "loss": 23.7277, "step": 9851 }, { "epoch": 0.914338747099768, "grad_norm": 58.385562896728516, "learning_rate": 1.9971494463426332e-07, "loss": 23.6755, "step": 9852 }, { "epoch": 0.914431554524362, "grad_norm": 58.039310455322266, "learning_rate": 1.9928578833578072e-07, "loss": 23.9751, "step": 9853 }, { "epoch": 0.9145243619489559, "grad_norm": 47.5579833984375, "learning_rate": 1.988570842523768e-07, "loss": 21.0684, "step": 9854 }, { "epoch": 0.9146171693735499, "grad_norm": 54.34054946899414, "learning_rate": 1.98428832424436e-07, "loss": 24.1484, "step": 9855 }, { "epoch": 0.9147099767981438, "grad_norm": 74.44837188720703, "learning_rate": 1.9800103289229877e-07, "loss": 21.6268, "step": 9856 }, { "epoch": 0.9148027842227379, "grad_norm": 49.63906478881836, "learning_rate": 1.9757368569626179e-07, "loss": 22.8123, "step": 9857 }, { "epoch": 0.9148955916473318, "grad_norm": 60.809120178222656, "learning_rate": 1.971467908765795e-07, "loss": 21.6001, "step": 9858 }, { "epoch": 0.9149883990719258, "grad_norm": 57.73723220825195, "learning_rate": 1.9672034847346698e-07, "loss": 21.1583, "step": 9859 }, { "epoch": 0.9150812064965197, "grad_norm": 100.95226287841797, "learning_rate": 1.96294358527091e-07, "loss": 26.4115, "step": 9860 }, { "epoch": 0.9151740139211136, "grad_norm": 52.795997619628906, "learning_rate": 1.9586882107758055e-07, "loss": 23.315, "step": 9861 }, { "epoch": 0.9152668213457077, "grad_norm": 48.12239074707031, "learning_rate": 1.9544373616501966e-07, "loss": 21.9471, "step": 9862 }, { "epoch": 0.9153596287703016, "grad_norm": 48.730133056640625, "learning_rate": 1.9501910382945022e-07, "loss": 22.6604, "step": 9863 }, { "epoch": 0.9154524361948956, "grad_norm": 47.78339767456055, "learning_rate": 1.9459492411087078e-07, "loss": 21.5981, "step": 9864 }, { "epoch": 0.9155452436194895, "grad_norm": 47.4063720703125, "learning_rate": 1.9417119704923993e-07, "loss": 22.1535, "step": 9865 }, { "epoch": 0.9156380510440836, "grad_norm": 65.86397552490234, "learning_rate": 1.9374792268446962e-07, "loss": 23.5317, "step": 9866 }, { "epoch": 0.9157308584686775, "grad_norm": 52.523319244384766, "learning_rate": 1.9332510105643242e-07, "loss": 22.6805, "step": 9867 }, { "epoch": 0.9158236658932715, "grad_norm": 62.70693588256836, "learning_rate": 1.9290273220495648e-07, "loss": 25.1415, "step": 9868 }, { "epoch": 0.9159164733178654, "grad_norm": 43.17124557495117, "learning_rate": 1.9248081616982884e-07, "loss": 24.1124, "step": 9869 }, { "epoch": 0.9160092807424594, "grad_norm": 49.73488235473633, "learning_rate": 1.9205935299079158e-07, "loss": 22.5912, "step": 9870 }, { "epoch": 0.9161020881670534, "grad_norm": 51.45980453491211, "learning_rate": 1.9163834270754632e-07, "loss": 22.7031, "step": 9871 }, { "epoch": 0.9161948955916474, "grad_norm": 47.785858154296875, "learning_rate": 1.9121778535975077e-07, "loss": 22.4339, "step": 9872 }, { "epoch": 0.9162877030162413, "grad_norm": 44.103981018066406, "learning_rate": 1.9079768098702046e-07, "loss": 19.9441, "step": 9873 }, { "epoch": 0.9163805104408352, "grad_norm": 46.86737823486328, "learning_rate": 1.9037802962892814e-07, "loss": 23.032, "step": 9874 }, { "epoch": 0.9164733178654292, "grad_norm": 56.20598220825195, "learning_rate": 1.8995883132500449e-07, "loss": 22.6911, "step": 9875 }, { "epoch": 0.9165661252900232, "grad_norm": 49.43415832519531, "learning_rate": 1.8954008611473618e-07, "loss": 22.9522, "step": 9876 }, { "epoch": 0.9166589327146172, "grad_norm": 44.3315315246582, "learning_rate": 1.891217940375678e-07, "loss": 23.4012, "step": 9877 }, { "epoch": 0.9167517401392111, "grad_norm": 58.43988037109375, "learning_rate": 1.8870395513290173e-07, "loss": 22.0636, "step": 9878 }, { "epoch": 0.9168445475638051, "grad_norm": 59.9496955871582, "learning_rate": 1.8828656944009814e-07, "loss": 21.9067, "step": 9879 }, { "epoch": 0.9169373549883991, "grad_norm": 87.61571502685547, "learning_rate": 1.8786963699847228e-07, "loss": 22.8289, "step": 9880 }, { "epoch": 0.9170301624129931, "grad_norm": 54.312747955322266, "learning_rate": 1.8745315784729933e-07, "loss": 21.9827, "step": 9881 }, { "epoch": 0.917122969837587, "grad_norm": 51.67344284057617, "learning_rate": 1.8703713202580963e-07, "loss": 21.9519, "step": 9882 }, { "epoch": 0.917215777262181, "grad_norm": 68.17089080810547, "learning_rate": 1.8662155957319183e-07, "loss": 20.7371, "step": 9883 }, { "epoch": 0.9173085846867749, "grad_norm": 58.71007537841797, "learning_rate": 1.8620644052859237e-07, "loss": 21.8705, "step": 9884 }, { "epoch": 0.917401392111369, "grad_norm": 67.60004425048828, "learning_rate": 1.8579177493111444e-07, "loss": 26.0442, "step": 9885 }, { "epoch": 0.9174941995359629, "grad_norm": 55.220481872558594, "learning_rate": 1.8537756281981845e-07, "loss": 22.4076, "step": 9886 }, { "epoch": 0.9175870069605568, "grad_norm": 49.646121978759766, "learning_rate": 1.8496380423372095e-07, "loss": 21.7693, "step": 9887 }, { "epoch": 0.9176798143851508, "grad_norm": 61.40773010253906, "learning_rate": 1.8455049921179858e-07, "loss": 21.6756, "step": 9888 }, { "epoch": 0.9177726218097447, "grad_norm": 59.77193832397461, "learning_rate": 1.8413764779298237e-07, "loss": 22.9502, "step": 9889 }, { "epoch": 0.9178654292343388, "grad_norm": 49.19362258911133, "learning_rate": 1.8372525001616292e-07, "loss": 22.627, "step": 9890 }, { "epoch": 0.9179582366589327, "grad_norm": 55.7377815246582, "learning_rate": 1.8331330592018638e-07, "loss": 23.875, "step": 9891 }, { "epoch": 0.9180510440835267, "grad_norm": 69.89513397216797, "learning_rate": 1.8290181554385722e-07, "loss": 22.6638, "step": 9892 }, { "epoch": 0.9181438515081206, "grad_norm": 52.117637634277344, "learning_rate": 1.8249077892593614e-07, "loss": 23.0448, "step": 9893 }, { "epoch": 0.9182366589327147, "grad_norm": 55.522071838378906, "learning_rate": 1.8208019610514273e-07, "loss": 23.9688, "step": 9894 }, { "epoch": 0.9183294663573086, "grad_norm": 63.90086364746094, "learning_rate": 1.8167006712015156e-07, "loss": 22.1255, "step": 9895 }, { "epoch": 0.9184222737819026, "grad_norm": 59.83187484741211, "learning_rate": 1.8126039200959732e-07, "loss": 22.9089, "step": 9896 }, { "epoch": 0.9185150812064965, "grad_norm": 51.404354095458984, "learning_rate": 1.8085117081206916e-07, "loss": 24.8062, "step": 9897 }, { "epoch": 0.9186078886310904, "grad_norm": 47.683860778808594, "learning_rate": 1.8044240356611508e-07, "loss": 21.4657, "step": 9898 }, { "epoch": 0.9187006960556845, "grad_norm": 48.851646423339844, "learning_rate": 1.8003409031023933e-07, "loss": 24.4465, "step": 9899 }, { "epoch": 0.9187935034802784, "grad_norm": 47.01018524169922, "learning_rate": 1.7962623108290556e-07, "loss": 22.0517, "step": 9900 }, { "epoch": 0.9188863109048724, "grad_norm": 51.650264739990234, "learning_rate": 1.7921882592253136e-07, "loss": 22.2196, "step": 9901 }, { "epoch": 0.9189791183294663, "grad_norm": 50.08949279785156, "learning_rate": 1.7881187486749384e-07, "loss": 24.6154, "step": 9902 }, { "epoch": 0.9190719257540603, "grad_norm": 54.78816223144531, "learning_rate": 1.7840537795612733e-07, "loss": 20.6761, "step": 9903 }, { "epoch": 0.9191647331786543, "grad_norm": 53.2084846496582, "learning_rate": 1.779993352267223e-07, "loss": 23.229, "step": 9904 }, { "epoch": 0.9192575406032483, "grad_norm": 49.82149887084961, "learning_rate": 1.775937467175265e-07, "loss": 21.2173, "step": 9905 }, { "epoch": 0.9193503480278422, "grad_norm": 56.91930389404297, "learning_rate": 1.7718861246674656e-07, "loss": 23.2648, "step": 9906 }, { "epoch": 0.9194431554524362, "grad_norm": 49.09857177734375, "learning_rate": 1.7678393251254365e-07, "loss": 22.5285, "step": 9907 }, { "epoch": 0.9195359628770302, "grad_norm": 58.290557861328125, "learning_rate": 1.7637970689303895e-07, "loss": 21.4815, "step": 9908 }, { "epoch": 0.9196287703016242, "grad_norm": 55.42329788208008, "learning_rate": 1.759759356463081e-07, "loss": 24.586, "step": 9909 }, { "epoch": 0.9197215777262181, "grad_norm": 53.254066467285156, "learning_rate": 1.7557261881038622e-07, "loss": 24.249, "step": 9910 }, { "epoch": 0.919814385150812, "grad_norm": 54.404170989990234, "learning_rate": 1.7516975642326516e-07, "loss": 23.3366, "step": 9911 }, { "epoch": 0.919907192575406, "grad_norm": 42.70674133300781, "learning_rate": 1.7476734852289235e-07, "loss": 24.7238, "step": 9912 }, { "epoch": 0.92, "grad_norm": 54.241539001464844, "learning_rate": 1.743653951471741e-07, "loss": 22.2235, "step": 9913 }, { "epoch": 0.920092807424594, "grad_norm": 61.1470832824707, "learning_rate": 1.7396389633397458e-07, "loss": 20.9763, "step": 9914 }, { "epoch": 0.9201856148491879, "grad_norm": 53.252471923828125, "learning_rate": 1.7356285212111133e-07, "loss": 23.6683, "step": 9915 }, { "epoch": 0.9202784222737819, "grad_norm": 45.40963363647461, "learning_rate": 1.7316226254636415e-07, "loss": 22.0724, "step": 9916 }, { "epoch": 0.9203712296983759, "grad_norm": 44.93507385253906, "learning_rate": 1.7276212764746614e-07, "loss": 21.8228, "step": 9917 }, { "epoch": 0.9204640371229699, "grad_norm": 52.57918930053711, "learning_rate": 1.7236244746210994e-07, "loss": 23.6332, "step": 9918 }, { "epoch": 0.9205568445475638, "grad_norm": 60.375572204589844, "learning_rate": 1.7196322202794325e-07, "loss": 22.4069, "step": 9919 }, { "epoch": 0.9206496519721578, "grad_norm": 50.35767364501953, "learning_rate": 1.7156445138257372e-07, "loss": 23.8421, "step": 9920 }, { "epoch": 0.9207424593967517, "grad_norm": 42.86250305175781, "learning_rate": 1.71166135563563e-07, "loss": 24.1325, "step": 9921 }, { "epoch": 0.9208352668213458, "grad_norm": 44.64767837524414, "learning_rate": 1.7076827460843216e-07, "loss": 22.9698, "step": 9922 }, { "epoch": 0.9209280742459397, "grad_norm": 52.18754577636719, "learning_rate": 1.7037086855465902e-07, "loss": 21.7608, "step": 9923 }, { "epoch": 0.9210208816705336, "grad_norm": 52.08860397338867, "learning_rate": 1.6997391743967696e-07, "loss": 21.6726, "step": 9924 }, { "epoch": 0.9211136890951276, "grad_norm": 47.88172149658203, "learning_rate": 1.6957742130087884e-07, "loss": 22.5342, "step": 9925 }, { "epoch": 0.9212064965197215, "grad_norm": 45.71292495727539, "learning_rate": 1.6918138017561369e-07, "loss": 21.0287, "step": 9926 }, { "epoch": 0.9212993039443156, "grad_norm": 60.02489471435547, "learning_rate": 1.6878579410118722e-07, "loss": 23.2034, "step": 9927 }, { "epoch": 0.9213921113689095, "grad_norm": 51.27010726928711, "learning_rate": 1.6839066311486242e-07, "loss": 22.7075, "step": 9928 }, { "epoch": 0.9214849187935035, "grad_norm": 67.819091796875, "learning_rate": 1.6799598725385945e-07, "loss": 21.6218, "step": 9929 }, { "epoch": 0.9215777262180974, "grad_norm": 51.22709655761719, "learning_rate": 1.6760176655535643e-07, "loss": 21.1596, "step": 9930 }, { "epoch": 0.9216705336426915, "grad_norm": 51.505916595458984, "learning_rate": 1.672080010564875e-07, "loss": 23.9159, "step": 9931 }, { "epoch": 0.9217633410672854, "grad_norm": 45.42133712768555, "learning_rate": 1.668146907943452e-07, "loss": 23.4828, "step": 9932 }, { "epoch": 0.9218561484918794, "grad_norm": 40.631710052490234, "learning_rate": 1.6642183580597714e-07, "loss": 23.5402, "step": 9933 }, { "epoch": 0.9219489559164733, "grad_norm": 60.24923324584961, "learning_rate": 1.660294361283893e-07, "loss": 22.7825, "step": 9934 }, { "epoch": 0.9220417633410672, "grad_norm": 47.03166961669922, "learning_rate": 1.656374917985465e-07, "loss": 21.0762, "step": 9935 }, { "epoch": 0.9221345707656613, "grad_norm": 47.24888229370117, "learning_rate": 1.65246002853367e-07, "loss": 22.0298, "step": 9936 }, { "epoch": 0.9222273781902552, "grad_norm": 42.73867416381836, "learning_rate": 1.6485496932972856e-07, "loss": 20.4982, "step": 9937 }, { "epoch": 0.9223201856148492, "grad_norm": 41.925472259521484, "learning_rate": 1.644643912644661e-07, "loss": 20.8022, "step": 9938 }, { "epoch": 0.9224129930394431, "grad_norm": 49.983863830566406, "learning_rate": 1.6407426869437028e-07, "loss": 23.94, "step": 9939 }, { "epoch": 0.9225058004640371, "grad_norm": 49.292354583740234, "learning_rate": 1.6368460165618995e-07, "loss": 23.6855, "step": 9940 }, { "epoch": 0.9225986078886311, "grad_norm": 42.333335876464844, "learning_rate": 1.6329539018663187e-07, "loss": 21.9112, "step": 9941 }, { "epoch": 0.9226914153132251, "grad_norm": 47.01129150390625, "learning_rate": 1.6290663432235622e-07, "loss": 22.029, "step": 9942 }, { "epoch": 0.922784222737819, "grad_norm": 49.589996337890625, "learning_rate": 1.6251833409998585e-07, "loss": 21.7169, "step": 9943 }, { "epoch": 0.922877030162413, "grad_norm": 43.56135940551758, "learning_rate": 1.621304895560949e-07, "loss": 22.8651, "step": 9944 }, { "epoch": 0.922969837587007, "grad_norm": 39.490909576416016, "learning_rate": 1.6174310072721965e-07, "loss": 24.3515, "step": 9945 }, { "epoch": 0.923062645011601, "grad_norm": 48.227210998535156, "learning_rate": 1.613561676498493e-07, "loss": 23.438, "step": 9946 }, { "epoch": 0.9231554524361949, "grad_norm": 57.43999099731445, "learning_rate": 1.6096969036043354e-07, "loss": 24.4097, "step": 9947 }, { "epoch": 0.9232482598607888, "grad_norm": 54.75425720214844, "learning_rate": 1.6058366889537546e-07, "loss": 22.6991, "step": 9948 }, { "epoch": 0.9233410672853828, "grad_norm": 48.66526412963867, "learning_rate": 1.6019810329104046e-07, "loss": 21.5858, "step": 9949 }, { "epoch": 0.9234338747099768, "grad_norm": 45.48439025878906, "learning_rate": 1.5981299358374446e-07, "loss": 22.0576, "step": 9950 }, { "epoch": 0.9235266821345708, "grad_norm": 53.32444763183594, "learning_rate": 1.594283398097657e-07, "loss": 23.9644, "step": 9951 }, { "epoch": 0.9236194895591647, "grad_norm": 48.64236831665039, "learning_rate": 1.5904414200533736e-07, "loss": 23.7243, "step": 9952 }, { "epoch": 0.9237122969837587, "grad_norm": 48.30999755859375, "learning_rate": 1.5866040020664995e-07, "loss": 22.9566, "step": 9953 }, { "epoch": 0.9238051044083526, "grad_norm": 44.13703155517578, "learning_rate": 1.5827711444985017e-07, "loss": 22.8068, "step": 9954 }, { "epoch": 0.9238979118329467, "grad_norm": 44.674522399902344, "learning_rate": 1.5789428477104408e-07, "loss": 22.2106, "step": 9955 }, { "epoch": 0.9239907192575406, "grad_norm": 52.218528747558594, "learning_rate": 1.5751191120629118e-07, "loss": 22.816, "step": 9956 }, { "epoch": 0.9240835266821346, "grad_norm": 39.96695327758789, "learning_rate": 1.571299937916121e-07, "loss": 23.3956, "step": 9957 }, { "epoch": 0.9241763341067285, "grad_norm": 45.598880767822266, "learning_rate": 1.5674853256298195e-07, "loss": 23.1361, "step": 9958 }, { "epoch": 0.9242691415313226, "grad_norm": 52.68566131591797, "learning_rate": 1.5636752755633256e-07, "loss": 22.2906, "step": 9959 }, { "epoch": 0.9243619489559165, "grad_norm": 53.051204681396484, "learning_rate": 1.559869788075541e-07, "loss": 22.1297, "step": 9960 }, { "epoch": 0.9244547563805104, "grad_norm": 60.57392883300781, "learning_rate": 1.55606886352494e-07, "loss": 26.9322, "step": 9961 }, { "epoch": 0.9245475638051044, "grad_norm": 46.61067199707031, "learning_rate": 1.5522725022695472e-07, "loss": 23.4239, "step": 9962 }, { "epoch": 0.9246403712296983, "grad_norm": 57.8390007019043, "learning_rate": 1.5484807046669824e-07, "loss": 23.4894, "step": 9963 }, { "epoch": 0.9247331786542924, "grad_norm": 42.18198013305664, "learning_rate": 1.5446934710744154e-07, "loss": 22.5251, "step": 9964 }, { "epoch": 0.9248259860788863, "grad_norm": 44.836570739746094, "learning_rate": 1.540910801848594e-07, "loss": 21.8843, "step": 9965 }, { "epoch": 0.9249187935034803, "grad_norm": 51.75747299194336, "learning_rate": 1.537132697345839e-07, "loss": 21.7367, "step": 9966 }, { "epoch": 0.9250116009280742, "grad_norm": 48.52824783325195, "learning_rate": 1.5333591579220374e-07, "loss": 22.5004, "step": 9967 }, { "epoch": 0.9251044083526682, "grad_norm": 44.0437126159668, "learning_rate": 1.5295901839326554e-07, "loss": 22.5983, "step": 9968 }, { "epoch": 0.9251972157772622, "grad_norm": 61.56073760986328, "learning_rate": 1.5258257757327032e-07, "loss": 23.0534, "step": 9969 }, { "epoch": 0.9252900232018562, "grad_norm": 55.200435638427734, "learning_rate": 1.5220659336767918e-07, "loss": 23.0988, "step": 9970 }, { "epoch": 0.9253828306264501, "grad_norm": 61.096893310546875, "learning_rate": 1.5183106581190821e-07, "loss": 24.3829, "step": 9971 }, { "epoch": 0.925475638051044, "grad_norm": 42.583736419677734, "learning_rate": 1.514559949413319e-07, "loss": 23.1216, "step": 9972 }, { "epoch": 0.9255684454756381, "grad_norm": 55.10496139526367, "learning_rate": 1.5108138079128032e-07, "loss": 22.9763, "step": 9973 }, { "epoch": 0.925661252900232, "grad_norm": 51.56954574584961, "learning_rate": 1.5070722339704136e-07, "loss": 21.768, "step": 9974 }, { "epoch": 0.925754060324826, "grad_norm": 64.05247497558594, "learning_rate": 1.503335227938596e-07, "loss": 22.5036, "step": 9975 }, { "epoch": 0.9258468677494199, "grad_norm": 43.392860412597656, "learning_rate": 1.4996027901693688e-07, "loss": 21.1174, "step": 9976 }, { "epoch": 0.9259396751740139, "grad_norm": 42.97931671142578, "learning_rate": 1.495874921014312e-07, "loss": 22.573, "step": 9977 }, { "epoch": 0.9260324825986079, "grad_norm": 85.0038833618164, "learning_rate": 1.4921516208246002e-07, "loss": 24.0964, "step": 9978 }, { "epoch": 0.9261252900232019, "grad_norm": 79.21961212158203, "learning_rate": 1.4884328899509303e-07, "loss": 22.1151, "step": 9979 }, { "epoch": 0.9262180974477958, "grad_norm": 49.362464904785156, "learning_rate": 1.4847187287436172e-07, "loss": 23.5292, "step": 9980 }, { "epoch": 0.9263109048723898, "grad_norm": 58.37557601928711, "learning_rate": 1.481009137552525e-07, "loss": 22.9979, "step": 9981 }, { "epoch": 0.9264037122969837, "grad_norm": 59.673702239990234, "learning_rate": 1.4773041167270795e-07, "loss": 21.2536, "step": 9982 }, { "epoch": 0.9264965197215778, "grad_norm": 45.25505828857422, "learning_rate": 1.4736036666162802e-07, "loss": 22.508, "step": 9983 }, { "epoch": 0.9265893271461717, "grad_norm": 53.068477630615234, "learning_rate": 1.4699077875687252e-07, "loss": 22.1177, "step": 9984 }, { "epoch": 0.9266821345707656, "grad_norm": 52.1302375793457, "learning_rate": 1.466216479932525e-07, "loss": 21.2564, "step": 9985 }, { "epoch": 0.9267749419953596, "grad_norm": 43.06181335449219, "learning_rate": 1.4625297440554131e-07, "loss": 21.8071, "step": 9986 }, { "epoch": 0.9268677494199536, "grad_norm": 50.401206970214844, "learning_rate": 1.4588475802846614e-07, "loss": 24.2231, "step": 9987 }, { "epoch": 0.9269605568445476, "grad_norm": 54.26498794555664, "learning_rate": 1.45516998896712e-07, "loss": 24.3717, "step": 9988 }, { "epoch": 0.9270533642691415, "grad_norm": 55.64434051513672, "learning_rate": 1.451496970449212e-07, "loss": 22.1359, "step": 9989 }, { "epoch": 0.9271461716937355, "grad_norm": 52.54631423950195, "learning_rate": 1.447828525076933e-07, "loss": 23.6172, "step": 9990 }, { "epoch": 0.9272389791183294, "grad_norm": 69.269775390625, "learning_rate": 1.444164653195823e-07, "loss": 23.1424, "step": 9991 }, { "epoch": 0.9273317865429235, "grad_norm": 50.28752899169922, "learning_rate": 1.4405053551510229e-07, "loss": 23.9843, "step": 9992 }, { "epoch": 0.9274245939675174, "grad_norm": 44.326961517333984, "learning_rate": 1.4368506312872288e-07, "loss": 21.4615, "step": 9993 }, { "epoch": 0.9275174013921114, "grad_norm": 46.05715560913086, "learning_rate": 1.4332004819487044e-07, "loss": 24.6328, "step": 9994 }, { "epoch": 0.9276102088167053, "grad_norm": 47.34359359741211, "learning_rate": 1.4295549074792803e-07, "loss": 22.3033, "step": 9995 }, { "epoch": 0.9277030162412992, "grad_norm": 44.003414154052734, "learning_rate": 1.4259139082223761e-07, "loss": 20.9421, "step": 9996 }, { "epoch": 0.9277958236658933, "grad_norm": 45.39517593383789, "learning_rate": 1.4222774845209397e-07, "loss": 25.1764, "step": 9997 }, { "epoch": 0.9278886310904872, "grad_norm": 52.170413970947266, "learning_rate": 1.4186456367175304e-07, "loss": 21.5879, "step": 9998 }, { "epoch": 0.9279814385150812, "grad_norm": 42.58112716674805, "learning_rate": 1.4150183651542583e-07, "loss": 22.7755, "step": 9999 }, { "epoch": 0.9280742459396751, "grad_norm": 52.741512298583984, "learning_rate": 1.411395670172794e-07, "loss": 21.7345, "step": 10000 }, { "epoch": 0.9281670533642692, "grad_norm": 52.3476676940918, "learning_rate": 1.4077775521143923e-07, "loss": 23.0045, "step": 10001 }, { "epoch": 0.9282598607888631, "grad_norm": 44.659244537353516, "learning_rate": 1.404164011319875e-07, "loss": 23.907, "step": 10002 }, { "epoch": 0.9283526682134571, "grad_norm": 46.35214614868164, "learning_rate": 1.4005550481296205e-07, "loss": 21.6606, "step": 10003 }, { "epoch": 0.928445475638051, "grad_norm": 42.11241149902344, "learning_rate": 1.3969506628835893e-07, "loss": 22.5602, "step": 10004 }, { "epoch": 0.928538283062645, "grad_norm": 48.601078033447266, "learning_rate": 1.3933508559212994e-07, "loss": 21.9675, "step": 10005 }, { "epoch": 0.928631090487239, "grad_norm": 48.29734420776367, "learning_rate": 1.3897556275818513e-07, "loss": 22.0901, "step": 10006 }, { "epoch": 0.928723897911833, "grad_norm": 46.4719352722168, "learning_rate": 1.3861649782038966e-07, "loss": 24.576, "step": 10007 }, { "epoch": 0.9288167053364269, "grad_norm": 54.02959442138672, "learning_rate": 1.3825789081256812e-07, "loss": 23.7748, "step": 10008 }, { "epoch": 0.9289095127610209, "grad_norm": 47.68583679199219, "learning_rate": 1.378997417684985e-07, "loss": 21.1187, "step": 10009 }, { "epoch": 0.9290023201856148, "grad_norm": 42.94106674194336, "learning_rate": 1.3754205072191885e-07, "loss": 22.9191, "step": 10010 }, { "epoch": 0.9290951276102088, "grad_norm": 43.41069030761719, "learning_rate": 1.3718481770652214e-07, "loss": 22.9049, "step": 10011 }, { "epoch": 0.9291879350348028, "grad_norm": 48.539676666259766, "learning_rate": 1.3682804275595873e-07, "loss": 22.8807, "step": 10012 }, { "epoch": 0.9292807424593967, "grad_norm": 43.95304870605469, "learning_rate": 1.3647172590383674e-07, "loss": 23.1394, "step": 10013 }, { "epoch": 0.9293735498839907, "grad_norm": 51.076202392578125, "learning_rate": 1.3611586718371871e-07, "loss": 22.6201, "step": 10014 }, { "epoch": 0.9294663573085847, "grad_norm": 47.36722183227539, "learning_rate": 1.3576046662912733e-07, "loss": 22.6052, "step": 10015 }, { "epoch": 0.9295591647331787, "grad_norm": 57.495277404785156, "learning_rate": 1.3540552427354025e-07, "loss": 24.9021, "step": 10016 }, { "epoch": 0.9296519721577726, "grad_norm": 45.28562927246094, "learning_rate": 1.3505104015039072e-07, "loss": 23.1323, "step": 10017 }, { "epoch": 0.9297447795823666, "grad_norm": 49.92444610595703, "learning_rate": 1.3469701429307147e-07, "loss": 25.307, "step": 10018 }, { "epoch": 0.9298375870069605, "grad_norm": 57.946678161621094, "learning_rate": 1.3434344673493137e-07, "loss": 24.6299, "step": 10019 }, { "epoch": 0.9299303944315546, "grad_norm": 50.92888259887695, "learning_rate": 1.3399033750927327e-07, "loss": 21.0933, "step": 10020 }, { "epoch": 0.9300232018561485, "grad_norm": 44.53714370727539, "learning_rate": 1.336376866493616e-07, "loss": 23.2426, "step": 10021 }, { "epoch": 0.9301160092807425, "grad_norm": 56.96072769165039, "learning_rate": 1.3328549418841375e-07, "loss": 21.7572, "step": 10022 }, { "epoch": 0.9302088167053364, "grad_norm": 58.262306213378906, "learning_rate": 1.3293376015960647e-07, "loss": 24.5481, "step": 10023 }, { "epoch": 0.9303016241299304, "grad_norm": 50.18508529663086, "learning_rate": 1.3258248459607048e-07, "loss": 23.8713, "step": 10024 }, { "epoch": 0.9303944315545244, "grad_norm": 47.3624153137207, "learning_rate": 1.3223166753089767e-07, "loss": 23.0484, "step": 10025 }, { "epoch": 0.9304872389791183, "grad_norm": 43.710933685302734, "learning_rate": 1.3188130899713102e-07, "loss": 21.0272, "step": 10026 }, { "epoch": 0.9305800464037123, "grad_norm": 47.26948165893555, "learning_rate": 1.3153140902777584e-07, "loss": 20.9394, "step": 10027 }, { "epoch": 0.9306728538283062, "grad_norm": 50.5025520324707, "learning_rate": 1.3118196765579072e-07, "loss": 23.0788, "step": 10028 }, { "epoch": 0.9307656612529003, "grad_norm": 58.7818717956543, "learning_rate": 1.3083298491409213e-07, "loss": 21.0849, "step": 10029 }, { "epoch": 0.9308584686774942, "grad_norm": 61.70706558227539, "learning_rate": 1.3048446083555376e-07, "loss": 21.864, "step": 10030 }, { "epoch": 0.9309512761020882, "grad_norm": 50.453224182128906, "learning_rate": 1.3013639545300606e-07, "loss": 23.5847, "step": 10031 }, { "epoch": 0.9310440835266821, "grad_norm": 67.2294921875, "learning_rate": 1.297887887992344e-07, "loss": 23.4086, "step": 10032 }, { "epoch": 0.931136890951276, "grad_norm": 60.578765869140625, "learning_rate": 1.294416409069843e-07, "loss": 23.091, "step": 10033 }, { "epoch": 0.9312296983758701, "grad_norm": 51.68449783325195, "learning_rate": 1.290949518089546e-07, "loss": 23.0009, "step": 10034 }, { "epoch": 0.931322505800464, "grad_norm": 49.63691329956055, "learning_rate": 1.2874872153780415e-07, "loss": 22.9149, "step": 10035 }, { "epoch": 0.931415313225058, "grad_norm": 55.283878326416016, "learning_rate": 1.2840295012614467e-07, "loss": 27.3998, "step": 10036 }, { "epoch": 0.9315081206496519, "grad_norm": 74.5977554321289, "learning_rate": 1.280576376065501e-07, "loss": 22.5483, "step": 10037 }, { "epoch": 0.931600928074246, "grad_norm": 61.29937744140625, "learning_rate": 1.2771278401154496e-07, "loss": 23.8025, "step": 10038 }, { "epoch": 0.9316937354988399, "grad_norm": 46.76296615600586, "learning_rate": 1.273683893736155e-07, "loss": 25.2, "step": 10039 }, { "epoch": 0.9317865429234339, "grad_norm": 53.4208869934082, "learning_rate": 1.2702445372520188e-07, "loss": 21.9183, "step": 10040 }, { "epoch": 0.9318793503480278, "grad_norm": 71.08717346191406, "learning_rate": 1.2668097709870265e-07, "loss": 22.5128, "step": 10041 }, { "epoch": 0.9319721577726218, "grad_norm": 50.41886520385742, "learning_rate": 1.2633795952647133e-07, "loss": 22.5007, "step": 10042 }, { "epoch": 0.9320649651972158, "grad_norm": 50.08906555175781, "learning_rate": 1.2599540104082096e-07, "loss": 22.8876, "step": 10043 }, { "epoch": 0.9321577726218098, "grad_norm": 108.60774230957031, "learning_rate": 1.2565330167401747e-07, "loss": 21.2203, "step": 10044 }, { "epoch": 0.9322505800464037, "grad_norm": 56.888065338134766, "learning_rate": 1.2531166145828833e-07, "loss": 21.7716, "step": 10045 }, { "epoch": 0.9323433874709977, "grad_norm": 65.8400650024414, "learning_rate": 1.249704804258134e-07, "loss": 23.5231, "step": 10046 }, { "epoch": 0.9324361948955916, "grad_norm": 58.694759368896484, "learning_rate": 1.246297586087314e-07, "loss": 21.6394, "step": 10047 }, { "epoch": 0.9325290023201857, "grad_norm": 49.962745666503906, "learning_rate": 1.2428949603913775e-07, "loss": 24.1206, "step": 10048 }, { "epoch": 0.9326218097447796, "grad_norm": 50.10302734375, "learning_rate": 1.2394969274908464e-07, "loss": 22.1885, "step": 10049 }, { "epoch": 0.9327146171693735, "grad_norm": 52.994327545166016, "learning_rate": 1.236103487705792e-07, "loss": 22.5431, "step": 10050 }, { "epoch": 0.9328074245939675, "grad_norm": 50.163002014160156, "learning_rate": 1.2327146413558865e-07, "loss": 20.7131, "step": 10051 }, { "epoch": 0.9329002320185615, "grad_norm": 59.2295036315918, "learning_rate": 1.229330388760336e-07, "loss": 22.8794, "step": 10052 }, { "epoch": 0.9329930394431555, "grad_norm": 46.90842056274414, "learning_rate": 1.2259507302379402e-07, "loss": 21.8662, "step": 10053 }, { "epoch": 0.9330858468677494, "grad_norm": 42.76176834106445, "learning_rate": 1.2225756661070453e-07, "loss": 21.3165, "step": 10054 }, { "epoch": 0.9331786542923434, "grad_norm": 65.72794342041016, "learning_rate": 1.21920519668558e-07, "loss": 22.2945, "step": 10055 }, { "epoch": 0.9332714617169373, "grad_norm": 51.37553787231445, "learning_rate": 1.2158393222910235e-07, "loss": 20.7213, "step": 10056 }, { "epoch": 0.9333642691415314, "grad_norm": 50.326515197753906, "learning_rate": 1.2124780432404503e-07, "loss": 21.5846, "step": 10057 }, { "epoch": 0.9334570765661253, "grad_norm": 43.39256286621094, "learning_rate": 1.209121359850468e-07, "loss": 23.2132, "step": 10058 }, { "epoch": 0.9335498839907193, "grad_norm": 45.799346923828125, "learning_rate": 1.2057692724372794e-07, "loss": 24.3946, "step": 10059 }, { "epoch": 0.9336426914153132, "grad_norm": 51.0338134765625, "learning_rate": 1.2024217813166315e-07, "loss": 21.0366, "step": 10060 }, { "epoch": 0.9337354988399071, "grad_norm": 58.8661003112793, "learning_rate": 1.1990788868038505e-07, "loss": 22.4603, "step": 10061 }, { "epoch": 0.9338283062645012, "grad_norm": 53.20887756347656, "learning_rate": 1.1957405892138397e-07, "loss": 24.0417, "step": 10062 }, { "epoch": 0.9339211136890951, "grad_norm": 41.52434539794922, "learning_rate": 1.192406888861053e-07, "loss": 22.6021, "step": 10063 }, { "epoch": 0.9340139211136891, "grad_norm": 52.66131591796875, "learning_rate": 1.1890777860595171e-07, "loss": 23.0672, "step": 10064 }, { "epoch": 0.934106728538283, "grad_norm": 54.57746887207031, "learning_rate": 1.1857532811228146e-07, "loss": 22.11, "step": 10065 }, { "epoch": 0.9341995359628771, "grad_norm": 52.03687286376953, "learning_rate": 1.1824333743641226e-07, "loss": 23.3957, "step": 10066 }, { "epoch": 0.934292343387471, "grad_norm": 117.6353530883789, "learning_rate": 1.1791180660961521e-07, "loss": 21.8776, "step": 10067 }, { "epoch": 0.934385150812065, "grad_norm": 55.454715728759766, "learning_rate": 1.175807356631209e-07, "loss": 21.8672, "step": 10068 }, { "epoch": 0.9344779582366589, "grad_norm": 47.33834457397461, "learning_rate": 1.1725012462811436e-07, "loss": 22.3567, "step": 10069 }, { "epoch": 0.9345707656612529, "grad_norm": 51.88111877441406, "learning_rate": 1.1691997353573903e-07, "loss": 21.6281, "step": 10070 }, { "epoch": 0.9346635730858469, "grad_norm": 49.548561096191406, "learning_rate": 1.1659028241709392e-07, "loss": 21.0881, "step": 10071 }, { "epoch": 0.9347563805104409, "grad_norm": 37.67759704589844, "learning_rate": 1.1626105130323583e-07, "loss": 22.7517, "step": 10072 }, { "epoch": 0.9348491879350348, "grad_norm": 53.91051483154297, "learning_rate": 1.1593228022517666e-07, "loss": 23.1192, "step": 10073 }, { "epoch": 0.9349419953596287, "grad_norm": 55.14371871948242, "learning_rate": 1.1560396921388551e-07, "loss": 22.3869, "step": 10074 }, { "epoch": 0.9350348027842227, "grad_norm": 39.606422424316406, "learning_rate": 1.1527611830028984e-07, "loss": 23.1078, "step": 10075 }, { "epoch": 0.9351276102088167, "grad_norm": 46.2658805847168, "learning_rate": 1.1494872751527109e-07, "loss": 22.992, "step": 10076 }, { "epoch": 0.9352204176334107, "grad_norm": 61.43990707397461, "learning_rate": 1.1462179688966902e-07, "loss": 21.8572, "step": 10077 }, { "epoch": 0.9353132250580046, "grad_norm": 47.70494079589844, "learning_rate": 1.142953264542801e-07, "loss": 22.5615, "step": 10078 }, { "epoch": 0.9354060324825986, "grad_norm": 51.50865936279297, "learning_rate": 1.1396931623985586e-07, "loss": 24.5317, "step": 10079 }, { "epoch": 0.9354988399071926, "grad_norm": 57.736446380615234, "learning_rate": 1.1364376627710727e-07, "loss": 24.8551, "step": 10080 }, { "epoch": 0.9355916473317866, "grad_norm": 49.531734466552734, "learning_rate": 1.1331867659669927e-07, "loss": 23.7616, "step": 10081 }, { "epoch": 0.9356844547563805, "grad_norm": 59.65107727050781, "learning_rate": 1.1299404722925456e-07, "loss": 23.7486, "step": 10082 }, { "epoch": 0.9357772621809745, "grad_norm": 50.81846237182617, "learning_rate": 1.1266987820535202e-07, "loss": 23.3513, "step": 10083 }, { "epoch": 0.9358700696055684, "grad_norm": 52.297218322753906, "learning_rate": 1.1234616955552891e-07, "loss": 23.7462, "step": 10084 }, { "epoch": 0.9359628770301625, "grad_norm": 44.15332794189453, "learning_rate": 1.120229213102758e-07, "loss": 23.2807, "step": 10085 }, { "epoch": 0.9360556844547564, "grad_norm": 45.357887268066406, "learning_rate": 1.1170013350004449e-07, "loss": 23.462, "step": 10086 }, { "epoch": 0.9361484918793503, "grad_norm": 63.32773971557617, "learning_rate": 1.1137780615523785e-07, "loss": 23.8724, "step": 10087 }, { "epoch": 0.9362412993039443, "grad_norm": 42.515098571777344, "learning_rate": 1.1105593930621994e-07, "loss": 21.9702, "step": 10088 }, { "epoch": 0.9363341067285382, "grad_norm": 64.00721740722656, "learning_rate": 1.1073453298330983e-07, "loss": 20.9346, "step": 10089 }, { "epoch": 0.9364269141531323, "grad_norm": 61.66124725341797, "learning_rate": 1.1041358721678275e-07, "loss": 21.8504, "step": 10090 }, { "epoch": 0.9365197215777262, "grad_norm": 52.673614501953125, "learning_rate": 1.1009310203687062e-07, "loss": 24.9277, "step": 10091 }, { "epoch": 0.9366125290023202, "grad_norm": 47.459468841552734, "learning_rate": 1.0977307747376431e-07, "loss": 22.3522, "step": 10092 }, { "epoch": 0.9367053364269141, "grad_norm": 63.03059005737305, "learning_rate": 1.0945351355760636e-07, "loss": 23.0048, "step": 10093 }, { "epoch": 0.9367981438515082, "grad_norm": 54.94708251953125, "learning_rate": 1.0913441031850048e-07, "loss": 21.2462, "step": 10094 }, { "epoch": 0.9368909512761021, "grad_norm": 48.47300720214844, "learning_rate": 1.0881576778650593e-07, "loss": 24.1661, "step": 10095 }, { "epoch": 0.9369837587006961, "grad_norm": 50.31147003173828, "learning_rate": 1.0849758599163706e-07, "loss": 23.4194, "step": 10096 }, { "epoch": 0.93707656612529, "grad_norm": 52.32028579711914, "learning_rate": 1.0817986496386545e-07, "loss": 22.8071, "step": 10097 }, { "epoch": 0.9371693735498839, "grad_norm": 56.06558609008789, "learning_rate": 1.0786260473312104e-07, "loss": 23.0555, "step": 10098 }, { "epoch": 0.937262180974478, "grad_norm": 47.526466369628906, "learning_rate": 1.0754580532928771e-07, "loss": 23.715, "step": 10099 }, { "epoch": 0.9373549883990719, "grad_norm": 54.082679748535156, "learning_rate": 1.0722946678220824e-07, "loss": 21.7468, "step": 10100 }, { "epoch": 0.9374477958236659, "grad_norm": 56.14624786376953, "learning_rate": 1.0691358912167937e-07, "loss": 23.2451, "step": 10101 }, { "epoch": 0.9375406032482598, "grad_norm": 48.68959045410156, "learning_rate": 1.0659817237745728e-07, "loss": 22.417, "step": 10102 }, { "epoch": 0.9376334106728538, "grad_norm": 57.169071197509766, "learning_rate": 1.0628321657925267e-07, "loss": 23.0989, "step": 10103 }, { "epoch": 0.9377262180974478, "grad_norm": 56.656986236572266, "learning_rate": 1.0596872175673456e-07, "loss": 24.2592, "step": 10104 }, { "epoch": 0.9378190255220418, "grad_norm": 52.234153747558594, "learning_rate": 1.0565468793952649e-07, "loss": 22.8851, "step": 10105 }, { "epoch": 0.9379118329466357, "grad_norm": 50.71062469482422, "learning_rate": 1.0534111515721034e-07, "loss": 22.8969, "step": 10106 }, { "epoch": 0.9380046403712297, "grad_norm": 52.8394660949707, "learning_rate": 1.0502800343932362e-07, "loss": 22.5276, "step": 10107 }, { "epoch": 0.9380974477958237, "grad_norm": 55.21466827392578, "learning_rate": 1.0471535281535994e-07, "loss": 21.4714, "step": 10108 }, { "epoch": 0.9381902552204177, "grad_norm": 45.47407150268555, "learning_rate": 1.044031633147713e-07, "loss": 23.4494, "step": 10109 }, { "epoch": 0.9382830626450116, "grad_norm": 46.63919448852539, "learning_rate": 1.0409143496696528e-07, "loss": 24.8779, "step": 10110 }, { "epoch": 0.9383758700696055, "grad_norm": 48.394859313964844, "learning_rate": 1.0378016780130451e-07, "loss": 24.4707, "step": 10111 }, { "epoch": 0.9384686774941995, "grad_norm": 54.07244873046875, "learning_rate": 1.0346936184711054e-07, "loss": 23.0257, "step": 10112 }, { "epoch": 0.9385614849187935, "grad_norm": 48.02647018432617, "learning_rate": 1.0315901713366161e-07, "loss": 22.5832, "step": 10113 }, { "epoch": 0.9386542923433875, "grad_norm": 48.357948303222656, "learning_rate": 1.0284913369018879e-07, "loss": 21.5554, "step": 10114 }, { "epoch": 0.9387470997679814, "grad_norm": 59.9200439453125, "learning_rate": 1.0253971154588427e-07, "loss": 21.442, "step": 10115 }, { "epoch": 0.9388399071925754, "grad_norm": 52.33993148803711, "learning_rate": 1.0223075072989418e-07, "loss": 22.6522, "step": 10116 }, { "epoch": 0.9389327146171694, "grad_norm": 42.45113754272461, "learning_rate": 1.0192225127132194e-07, "loss": 21.7416, "step": 10117 }, { "epoch": 0.9390255220417634, "grad_norm": 52.01076889038086, "learning_rate": 1.0161421319922704e-07, "loss": 22.9996, "step": 10118 }, { "epoch": 0.9391183294663573, "grad_norm": 54.51620101928711, "learning_rate": 1.0130663654262684e-07, "loss": 22.0117, "step": 10119 }, { "epoch": 0.9392111368909513, "grad_norm": 50.662933349609375, "learning_rate": 1.0099952133049317e-07, "loss": 22.1065, "step": 10120 }, { "epoch": 0.9393039443155452, "grad_norm": 55.88327407836914, "learning_rate": 1.0069286759175679e-07, "loss": 22.6554, "step": 10121 }, { "epoch": 0.9393967517401393, "grad_norm": 92.55574035644531, "learning_rate": 1.0038667535530233e-07, "loss": 22.0824, "step": 10122 }, { "epoch": 0.9394895591647332, "grad_norm": 48.303123474121094, "learning_rate": 1.0008094464997287e-07, "loss": 22.1776, "step": 10123 }, { "epoch": 0.9395823665893271, "grad_norm": 66.18798065185547, "learning_rate": 9.977567550456757e-08, "loss": 26.2711, "step": 10124 }, { "epoch": 0.9396751740139211, "grad_norm": 45.91035079956055, "learning_rate": 9.947086794784233e-08, "loss": 24.0504, "step": 10125 }, { "epoch": 0.939767981438515, "grad_norm": 50.06507110595703, "learning_rate": 9.916652200850807e-08, "loss": 22.7115, "step": 10126 }, { "epoch": 0.9398607888631091, "grad_norm": 57.65532302856445, "learning_rate": 9.886263771523519e-08, "loss": 22.9793, "step": 10127 }, { "epoch": 0.939953596287703, "grad_norm": 55.28084945678711, "learning_rate": 9.855921509664745e-08, "loss": 21.1196, "step": 10128 }, { "epoch": 0.940046403712297, "grad_norm": 52.067447662353516, "learning_rate": 9.825625418132645e-08, "loss": 22.1757, "step": 10129 }, { "epoch": 0.9401392111368909, "grad_norm": 55.150245666503906, "learning_rate": 9.795375499781157e-08, "loss": 21.8926, "step": 10130 }, { "epoch": 0.940232018561485, "grad_norm": 60.267822265625, "learning_rate": 9.765171757459613e-08, "loss": 22.6245, "step": 10131 }, { "epoch": 0.9403248259860789, "grad_norm": 54.55255889892578, "learning_rate": 9.735014194013126e-08, "loss": 22.445, "step": 10132 }, { "epoch": 0.9404176334106729, "grad_norm": 59.800350189208984, "learning_rate": 9.704902812282645e-08, "loss": 22.2728, "step": 10133 }, { "epoch": 0.9405104408352668, "grad_norm": 49.947784423828125, "learning_rate": 9.674837615104349e-08, "loss": 21.5041, "step": 10134 }, { "epoch": 0.9406032482598607, "grad_norm": 58.69426345825195, "learning_rate": 9.644818605310468e-08, "loss": 22.6767, "step": 10135 }, { "epoch": 0.9406960556844548, "grad_norm": 45.9021110534668, "learning_rate": 9.614845785728632e-08, "loss": 22.7671, "step": 10136 }, { "epoch": 0.9407888631090487, "grad_norm": 44.47562789916992, "learning_rate": 9.584919159182304e-08, "loss": 21.8348, "step": 10137 }, { "epoch": 0.9408816705336427, "grad_norm": 59.472747802734375, "learning_rate": 9.555038728490285e-08, "loss": 22.8378, "step": 10138 }, { "epoch": 0.9409744779582366, "grad_norm": 58.6209831237793, "learning_rate": 9.525204496467543e-08, "loss": 23.0117, "step": 10139 }, { "epoch": 0.9410672853828306, "grad_norm": 50.30447006225586, "learning_rate": 9.495416465924113e-08, "loss": 23.278, "step": 10140 }, { "epoch": 0.9411600928074246, "grad_norm": 44.29627227783203, "learning_rate": 9.465674639666078e-08, "loss": 23.5771, "step": 10141 }, { "epoch": 0.9412529002320186, "grad_norm": 50.71921157836914, "learning_rate": 9.435979020495034e-08, "loss": 21.6308, "step": 10142 }, { "epoch": 0.9413457076566125, "grad_norm": 51.38865280151367, "learning_rate": 9.406329611208242e-08, "loss": 18.9974, "step": 10143 }, { "epoch": 0.9414385150812065, "grad_norm": 67.87335205078125, "learning_rate": 9.376726414598525e-08, "loss": 23.3169, "step": 10144 }, { "epoch": 0.9415313225058005, "grad_norm": 49.28765869140625, "learning_rate": 9.347169433454484e-08, "loss": 23.5919, "step": 10145 }, { "epoch": 0.9416241299303945, "grad_norm": 54.7698860168457, "learning_rate": 9.317658670560336e-08, "loss": 22.5428, "step": 10146 }, { "epoch": 0.9417169373549884, "grad_norm": 70.36383819580078, "learning_rate": 9.288194128695915e-08, "loss": 23.1347, "step": 10147 }, { "epoch": 0.9418097447795823, "grad_norm": 56.83586120605469, "learning_rate": 9.258775810636723e-08, "loss": 23.6444, "step": 10148 }, { "epoch": 0.9419025522041763, "grad_norm": 49.057594299316406, "learning_rate": 9.229403719153818e-08, "loss": 22.7351, "step": 10149 }, { "epoch": 0.9419953596287703, "grad_norm": 49.73521423339844, "learning_rate": 9.200077857013934e-08, "loss": 21.0157, "step": 10150 }, { "epoch": 0.9420881670533643, "grad_norm": 48.06296157836914, "learning_rate": 9.170798226979638e-08, "loss": 22.2938, "step": 10151 }, { "epoch": 0.9421809744779582, "grad_norm": 127.13253784179688, "learning_rate": 9.141564831808947e-08, "loss": 21.4497, "step": 10152 }, { "epoch": 0.9422737819025522, "grad_norm": 57.553218841552734, "learning_rate": 9.112377674255545e-08, "loss": 24.3392, "step": 10153 }, { "epoch": 0.9423665893271461, "grad_norm": 42.51286315917969, "learning_rate": 9.083236757068792e-08, "loss": 21.3109, "step": 10154 }, { "epoch": 0.9424593967517402, "grad_norm": 49.71326446533203, "learning_rate": 9.054142082993711e-08, "loss": 21.0153, "step": 10155 }, { "epoch": 0.9425522041763341, "grad_norm": 52.644954681396484, "learning_rate": 9.025093654770945e-08, "loss": 20.9773, "step": 10156 }, { "epoch": 0.9426450116009281, "grad_norm": 47.34363555908203, "learning_rate": 8.996091475136693e-08, "loss": 23.9502, "step": 10157 }, { "epoch": 0.942737819025522, "grad_norm": 58.45089340209961, "learning_rate": 8.967135546823047e-08, "loss": 21.4226, "step": 10158 }, { "epoch": 0.9428306264501161, "grad_norm": 52.87333679199219, "learning_rate": 8.938225872557437e-08, "loss": 26.7557, "step": 10159 }, { "epoch": 0.94292343387471, "grad_norm": 52.28351593017578, "learning_rate": 8.909362455063186e-08, "loss": 22.5107, "step": 10160 }, { "epoch": 0.943016241299304, "grad_norm": 52.85423278808594, "learning_rate": 8.880545297059063e-08, "loss": 24.2584, "step": 10161 }, { "epoch": 0.9431090487238979, "grad_norm": 47.013328552246094, "learning_rate": 8.851774401259671e-08, "loss": 23.4415, "step": 10162 }, { "epoch": 0.9432018561484918, "grad_norm": 45.193546295166016, "learning_rate": 8.82304977037507e-08, "loss": 23.5998, "step": 10163 }, { "epoch": 0.9432946635730859, "grad_norm": 54.52850341796875, "learning_rate": 8.794371407111091e-08, "loss": 22.1021, "step": 10164 }, { "epoch": 0.9433874709976798, "grad_norm": 57.49620056152344, "learning_rate": 8.76573931416913e-08, "loss": 22.5754, "step": 10165 }, { "epoch": 0.9434802784222738, "grad_norm": 52.05438995361328, "learning_rate": 8.73715349424631e-08, "loss": 21.8069, "step": 10166 }, { "epoch": 0.9435730858468677, "grad_norm": 49.28506851196289, "learning_rate": 8.708613950035249e-08, "loss": 22.8369, "step": 10167 }, { "epoch": 0.9436658932714617, "grad_norm": 58.58182144165039, "learning_rate": 8.680120684224469e-08, "loss": 23.0441, "step": 10168 }, { "epoch": 0.9437587006960557, "grad_norm": 49.22996520996094, "learning_rate": 8.651673699497764e-08, "loss": 22.7282, "step": 10169 }, { "epoch": 0.9438515081206497, "grad_norm": 70.54275512695312, "learning_rate": 8.623272998534882e-08, "loss": 22.9926, "step": 10170 }, { "epoch": 0.9439443155452436, "grad_norm": 53.1095085144043, "learning_rate": 8.59491858401107e-08, "loss": 22.8171, "step": 10171 }, { "epoch": 0.9440371229698375, "grad_norm": 61.14168167114258, "learning_rate": 8.566610458597247e-08, "loss": 22.4099, "step": 10172 }, { "epoch": 0.9441299303944316, "grad_norm": 51.381874084472656, "learning_rate": 8.538348624959947e-08, "loss": 22.373, "step": 10173 }, { "epoch": 0.9442227378190255, "grad_norm": 55.617713928222656, "learning_rate": 8.510133085761429e-08, "loss": 21.9077, "step": 10174 }, { "epoch": 0.9443155452436195, "grad_norm": 51.00326156616211, "learning_rate": 8.481963843659458e-08, "loss": 22.0954, "step": 10175 }, { "epoch": 0.9444083526682134, "grad_norm": 46.081512451171875, "learning_rate": 8.45384090130752e-08, "loss": 22.5637, "step": 10176 }, { "epoch": 0.9445011600928074, "grad_norm": 62.98042678833008, "learning_rate": 8.425764261354719e-08, "loss": 23.1021, "step": 10177 }, { "epoch": 0.9445939675174014, "grad_norm": 73.47727966308594, "learning_rate": 8.397733926445828e-08, "loss": 24.7602, "step": 10178 }, { "epoch": 0.9446867749419954, "grad_norm": 50.55430603027344, "learning_rate": 8.36974989922118e-08, "loss": 23.8605, "step": 10179 }, { "epoch": 0.9447795823665893, "grad_norm": 56.56593704223633, "learning_rate": 8.341812182316944e-08, "loss": 21.9717, "step": 10180 }, { "epoch": 0.9448723897911833, "grad_norm": 58.299415588378906, "learning_rate": 8.31392077836457e-08, "loss": 23.5565, "step": 10181 }, { "epoch": 0.9449651972157772, "grad_norm": 48.03874588012695, "learning_rate": 8.286075689991457e-08, "loss": 21.8741, "step": 10182 }, { "epoch": 0.9450580046403713, "grad_norm": 52.67010498046875, "learning_rate": 8.258276919820617e-08, "loss": 24.4258, "step": 10183 }, { "epoch": 0.9451508120649652, "grad_norm": 61.114524841308594, "learning_rate": 8.230524470470513e-08, "loss": 22.4772, "step": 10184 }, { "epoch": 0.9452436194895592, "grad_norm": 51.14420700073242, "learning_rate": 8.202818344555385e-08, "loss": 22.9946, "step": 10185 }, { "epoch": 0.9453364269141531, "grad_norm": 56.38651657104492, "learning_rate": 8.175158544685147e-08, "loss": 22.2343, "step": 10186 }, { "epoch": 0.9454292343387471, "grad_norm": 52.78960418701172, "learning_rate": 8.147545073465158e-08, "loss": 27.271, "step": 10187 }, { "epoch": 0.9455220417633411, "grad_norm": 48.1378059387207, "learning_rate": 8.11997793349667e-08, "loss": 22.6053, "step": 10188 }, { "epoch": 0.945614849187935, "grad_norm": 44.55202102661133, "learning_rate": 8.09245712737633e-08, "loss": 22.5314, "step": 10189 }, { "epoch": 0.945707656612529, "grad_norm": 57.63554763793945, "learning_rate": 8.064982657696618e-08, "loss": 22.4798, "step": 10190 }, { "epoch": 0.9458004640371229, "grad_norm": 51.14069366455078, "learning_rate": 8.037554527045466e-08, "loss": 22.5844, "step": 10191 }, { "epoch": 0.945893271461717, "grad_norm": 49.34809112548828, "learning_rate": 8.010172738006527e-08, "loss": 21.7446, "step": 10192 }, { "epoch": 0.9459860788863109, "grad_norm": 56.67530059814453, "learning_rate": 7.98283729315924e-08, "loss": 22.8717, "step": 10193 }, { "epoch": 0.9460788863109049, "grad_norm": 60.04108428955078, "learning_rate": 7.955548195078433e-08, "loss": 23.4243, "step": 10194 }, { "epoch": 0.9461716937354988, "grad_norm": 46.0315055847168, "learning_rate": 7.92830544633466e-08, "loss": 23.923, "step": 10195 }, { "epoch": 0.9462645011600928, "grad_norm": 49.90822982788086, "learning_rate": 7.90110904949415e-08, "loss": 25.9745, "step": 10196 }, { "epoch": 0.9463573085846868, "grad_norm": 61.484745025634766, "learning_rate": 7.873959007118682e-08, "loss": 23.1082, "step": 10197 }, { "epoch": 0.9464501160092808, "grad_norm": 49.385032653808594, "learning_rate": 7.846855321765767e-08, "loss": 22.6833, "step": 10198 }, { "epoch": 0.9465429234338747, "grad_norm": 50.41891098022461, "learning_rate": 7.819797995988532e-08, "loss": 24.2134, "step": 10199 }, { "epoch": 0.9466357308584686, "grad_norm": 47.4708366394043, "learning_rate": 7.792787032335657e-08, "loss": 22.8106, "step": 10200 }, { "epoch": 0.9467285382830627, "grad_norm": 61.691585540771484, "learning_rate": 7.765822433351556e-08, "loss": 22.3678, "step": 10201 }, { "epoch": 0.9468213457076566, "grad_norm": 50.7387809753418, "learning_rate": 7.738904201576136e-08, "loss": 21.5971, "step": 10202 }, { "epoch": 0.9469141531322506, "grad_norm": 47.10613250732422, "learning_rate": 7.71203233954515e-08, "loss": 24.217, "step": 10203 }, { "epoch": 0.9470069605568445, "grad_norm": 42.93113708496094, "learning_rate": 7.685206849789739e-08, "loss": 21.8305, "step": 10204 }, { "epoch": 0.9470997679814385, "grad_norm": 58.944217681884766, "learning_rate": 7.658427734836881e-08, "loss": 22.6062, "step": 10205 }, { "epoch": 0.9471925754060325, "grad_norm": 54.87253189086914, "learning_rate": 7.631694997209061e-08, "loss": 22.5138, "step": 10206 }, { "epoch": 0.9472853828306265, "grad_norm": 49.46298599243164, "learning_rate": 7.605008639424428e-08, "loss": 23.4482, "step": 10207 }, { "epoch": 0.9473781902552204, "grad_norm": 49.98556900024414, "learning_rate": 7.57836866399675e-08, "loss": 23.7475, "step": 10208 }, { "epoch": 0.9474709976798144, "grad_norm": 50.3024787902832, "learning_rate": 7.551775073435575e-08, "loss": 22.4045, "step": 10209 }, { "epoch": 0.9475638051044084, "grad_norm": 55.25534439086914, "learning_rate": 7.525227870245788e-08, "loss": 23.0608, "step": 10210 }, { "epoch": 0.9476566125290024, "grad_norm": 56.34610366821289, "learning_rate": 7.498727056928168e-08, "loss": 23.6197, "step": 10211 }, { "epoch": 0.9477494199535963, "grad_norm": 47.64138412475586, "learning_rate": 7.472272635978995e-08, "loss": 22.3243, "step": 10212 }, { "epoch": 0.9478422273781902, "grad_norm": 60.64944076538086, "learning_rate": 7.445864609890164e-08, "loss": 22.2789, "step": 10213 }, { "epoch": 0.9479350348027842, "grad_norm": 44.473228454589844, "learning_rate": 7.419502981149296e-08, "loss": 22.7473, "step": 10214 }, { "epoch": 0.9480278422273782, "grad_norm": 56.6291389465332, "learning_rate": 7.393187752239627e-08, "loss": 23.1514, "step": 10215 }, { "epoch": 0.9481206496519722, "grad_norm": 52.4691162109375, "learning_rate": 7.366918925639843e-08, "loss": 23.7711, "step": 10216 }, { "epoch": 0.9482134570765661, "grad_norm": 49.576416015625, "learning_rate": 7.340696503824573e-08, "loss": 22.5263, "step": 10217 }, { "epoch": 0.9483062645011601, "grad_norm": 44.99507522583008, "learning_rate": 7.314520489263787e-08, "loss": 24.1673, "step": 10218 }, { "epoch": 0.948399071925754, "grad_norm": 60.66062927246094, "learning_rate": 7.288390884423291e-08, "loss": 23.0044, "step": 10219 }, { "epoch": 0.9484918793503481, "grad_norm": 54.35358810424805, "learning_rate": 7.26230769176428e-08, "loss": 23.16, "step": 10220 }, { "epoch": 0.948584686774942, "grad_norm": 47.86343002319336, "learning_rate": 7.236270913743904e-08, "loss": 21.6318, "step": 10221 }, { "epoch": 0.948677494199536, "grad_norm": 52.51279067993164, "learning_rate": 7.210280552814642e-08, "loss": 22.7774, "step": 10222 }, { "epoch": 0.9487703016241299, "grad_norm": 54.486141204833984, "learning_rate": 7.184336611424759e-08, "loss": 22.4303, "step": 10223 }, { "epoch": 0.948863109048724, "grad_norm": 55.519989013671875, "learning_rate": 7.158439092018077e-08, "loss": 23.1311, "step": 10224 }, { "epoch": 0.9489559164733179, "grad_norm": 62.52495193481445, "learning_rate": 7.132587997034146e-08, "loss": 23.9446, "step": 10225 }, { "epoch": 0.9490487238979118, "grad_norm": 50.558773040771484, "learning_rate": 7.106783328907963e-08, "loss": 21.8449, "step": 10226 }, { "epoch": 0.9491415313225058, "grad_norm": 65.16565704345703, "learning_rate": 7.081025090070359e-08, "loss": 22.6275, "step": 10227 }, { "epoch": 0.9492343387470997, "grad_norm": 54.5051155090332, "learning_rate": 7.055313282947673e-08, "loss": 21.064, "step": 10228 }, { "epoch": 0.9493271461716938, "grad_norm": 51.99422073364258, "learning_rate": 7.029647909961857e-08, "loss": 23.4372, "step": 10229 }, { "epoch": 0.9494199535962877, "grad_norm": 59.783294677734375, "learning_rate": 7.004028973530586e-08, "loss": 24.302, "step": 10230 }, { "epoch": 0.9495127610208817, "grad_norm": 53.893096923828125, "learning_rate": 6.978456476067041e-08, "loss": 22.9557, "step": 10231 }, { "epoch": 0.9496055684454756, "grad_norm": 46.42013931274414, "learning_rate": 6.952930419980131e-08, "loss": 23.4293, "step": 10232 }, { "epoch": 0.9496983758700696, "grad_norm": 56.409847259521484, "learning_rate": 6.927450807674319e-08, "loss": 21.9875, "step": 10233 }, { "epoch": 0.9497911832946636, "grad_norm": 53.623043060302734, "learning_rate": 6.90201764154963e-08, "loss": 23.4461, "step": 10234 }, { "epoch": 0.9498839907192576, "grad_norm": 48.606895446777344, "learning_rate": 6.876630924001981e-08, "loss": 23.5092, "step": 10235 }, { "epoch": 0.9499767981438515, "grad_norm": 52.69559097290039, "learning_rate": 6.851290657422627e-08, "loss": 24.0864, "step": 10236 }, { "epoch": 0.9500696055684454, "grad_norm": 55.633914947509766, "learning_rate": 6.825996844198601e-08, "loss": 21.5786, "step": 10237 }, { "epoch": 0.9501624129930395, "grad_norm": 59.84642791748047, "learning_rate": 6.800749486712499e-08, "loss": 22.5273, "step": 10238 }, { "epoch": 0.9502552204176334, "grad_norm": 44.09244918823242, "learning_rate": 6.775548587342529e-08, "loss": 21.8332, "step": 10239 }, { "epoch": 0.9503480278422274, "grad_norm": 52.785037994384766, "learning_rate": 6.75039414846257e-08, "loss": 22.2216, "step": 10240 }, { "epoch": 0.9504408352668213, "grad_norm": 50.694244384765625, "learning_rate": 6.72528617244217e-08, "loss": 23.164, "step": 10241 }, { "epoch": 0.9505336426914153, "grad_norm": 41.439029693603516, "learning_rate": 6.700224661646326e-08, "loss": 21.4086, "step": 10242 }, { "epoch": 0.9506264501160093, "grad_norm": 57.674293518066406, "learning_rate": 6.675209618435874e-08, "loss": 22.2291, "step": 10243 }, { "epoch": 0.9507192575406033, "grad_norm": 42.881683349609375, "learning_rate": 6.650241045167094e-08, "loss": 21.4207, "step": 10244 }, { "epoch": 0.9508120649651972, "grad_norm": 50.05889129638672, "learning_rate": 6.625318944191939e-08, "loss": 22.7799, "step": 10245 }, { "epoch": 0.9509048723897912, "grad_norm": 48.8560791015625, "learning_rate": 6.600443317858085e-08, "loss": 24.15, "step": 10246 }, { "epoch": 0.9509976798143851, "grad_norm": 43.26793670654297, "learning_rate": 6.575614168508715e-08, "loss": 21.7584, "step": 10247 }, { "epoch": 0.9510904872389792, "grad_norm": 51.468353271484375, "learning_rate": 6.550831498482679e-08, "loss": 22.7421, "step": 10248 }, { "epoch": 0.9511832946635731, "grad_norm": 45.753936767578125, "learning_rate": 6.526095310114445e-08, "loss": 23.7908, "step": 10249 }, { "epoch": 0.951276102088167, "grad_norm": 43.366390228271484, "learning_rate": 6.501405605734145e-08, "loss": 21.3137, "step": 10250 }, { "epoch": 0.951368909512761, "grad_norm": 59.25817108154297, "learning_rate": 6.476762387667313e-08, "loss": 21.6164, "step": 10251 }, { "epoch": 0.951461716937355, "grad_norm": 55.88442611694336, "learning_rate": 6.452165658235532e-08, "loss": 21.3033, "step": 10252 }, { "epoch": 0.951554524361949, "grad_norm": 49.52070999145508, "learning_rate": 6.427615419755561e-08, "loss": 24.1067, "step": 10253 }, { "epoch": 0.9516473317865429, "grad_norm": 48.02632141113281, "learning_rate": 6.403111674539996e-08, "loss": 22.869, "step": 10254 }, { "epoch": 0.9517401392111369, "grad_norm": 68.50174713134766, "learning_rate": 6.378654424897102e-08, "loss": 22.4837, "step": 10255 }, { "epoch": 0.9518329466357308, "grad_norm": 50.35444259643555, "learning_rate": 6.354243673130644e-08, "loss": 20.8776, "step": 10256 }, { "epoch": 0.9519257540603249, "grad_norm": 53.879703521728516, "learning_rate": 6.329879421540063e-08, "loss": 22.5135, "step": 10257 }, { "epoch": 0.9520185614849188, "grad_norm": 51.556678771972656, "learning_rate": 6.305561672420358e-08, "loss": 19.5822, "step": 10258 }, { "epoch": 0.9521113689095128, "grad_norm": 56.09901809692383, "learning_rate": 6.281290428062304e-08, "loss": 22.4067, "step": 10259 }, { "epoch": 0.9522041763341067, "grad_norm": 59.02207565307617, "learning_rate": 6.257065690752129e-08, "loss": 20.2288, "step": 10260 }, { "epoch": 0.9522969837587006, "grad_norm": 53.638511657714844, "learning_rate": 6.23288746277173e-08, "loss": 22.0966, "step": 10261 }, { "epoch": 0.9523897911832947, "grad_norm": 52.496891021728516, "learning_rate": 6.20875574639862e-08, "loss": 22.1449, "step": 10262 }, { "epoch": 0.9524825986078886, "grad_norm": 51.11360549926758, "learning_rate": 6.184670543905979e-08, "loss": 22.0344, "step": 10263 }, { "epoch": 0.9525754060324826, "grad_norm": 55.000755310058594, "learning_rate": 6.160631857562605e-08, "loss": 24.7171, "step": 10264 }, { "epoch": 0.9526682134570765, "grad_norm": 54.4942512512207, "learning_rate": 6.136639689632795e-08, "loss": 23.2205, "step": 10265 }, { "epoch": 0.9527610208816706, "grad_norm": 43.224342346191406, "learning_rate": 6.112694042376632e-08, "loss": 22.2949, "step": 10266 }, { "epoch": 0.9528538283062645, "grad_norm": 47.26469421386719, "learning_rate": 6.088794918049701e-08, "loss": 21.9995, "step": 10267 }, { "epoch": 0.9529466357308585, "grad_norm": 53.84956359863281, "learning_rate": 6.0649423189032e-08, "loss": 23.1024, "step": 10268 }, { "epoch": 0.9530394431554524, "grad_norm": 59.48360061645508, "learning_rate": 6.041136247184054e-08, "loss": 23.7052, "step": 10269 }, { "epoch": 0.9531322505800464, "grad_norm": 49.92302322387695, "learning_rate": 6.017376705134692e-08, "loss": 22.6393, "step": 10270 }, { "epoch": 0.9532250580046404, "grad_norm": 47.30778884887695, "learning_rate": 5.993663694993213e-08, "loss": 23.8208, "step": 10271 }, { "epoch": 0.9533178654292344, "grad_norm": 43.96870040893555, "learning_rate": 5.969997218993328e-08, "loss": 23.0648, "step": 10272 }, { "epoch": 0.9534106728538283, "grad_norm": 45.09481430053711, "learning_rate": 5.946377279364368e-08, "loss": 21.8153, "step": 10273 }, { "epoch": 0.9535034802784222, "grad_norm": 53.561058044433594, "learning_rate": 5.9228038783312735e-08, "loss": 23.5317, "step": 10274 }, { "epoch": 0.9535962877030162, "grad_norm": 54.93000030517578, "learning_rate": 5.899277018114491e-08, "loss": 23.2856, "step": 10275 }, { "epoch": 0.9536890951276102, "grad_norm": 44.86988067626953, "learning_rate": 5.875796700930359e-08, "loss": 21.5331, "step": 10276 }, { "epoch": 0.9537819025522042, "grad_norm": 57.261993408203125, "learning_rate": 5.8523629289905516e-08, "loss": 22.8014, "step": 10277 }, { "epoch": 0.9538747099767981, "grad_norm": 53.404075622558594, "learning_rate": 5.8289757045025816e-08, "loss": 21.4518, "step": 10278 }, { "epoch": 0.9539675174013921, "grad_norm": 39.84640884399414, "learning_rate": 5.805635029669299e-08, "loss": 21.9165, "step": 10279 }, { "epoch": 0.9540603248259861, "grad_norm": 42.7158203125, "learning_rate": 5.7823409066894985e-08, "loss": 20.5758, "step": 10280 }, { "epoch": 0.9541531322505801, "grad_norm": 53.75983428955078, "learning_rate": 5.75909333775726e-08, "loss": 25.1277, "step": 10281 }, { "epoch": 0.954245939675174, "grad_norm": 49.67818069458008, "learning_rate": 5.735892325062609e-08, "loss": 23.2236, "step": 10282 }, { "epoch": 0.954338747099768, "grad_norm": 46.27217483520508, "learning_rate": 5.712737870790963e-08, "loss": 23.0758, "step": 10283 }, { "epoch": 0.9544315545243619, "grad_norm": 41.281211853027344, "learning_rate": 5.689629977123412e-08, "loss": 23.1308, "step": 10284 }, { "epoch": 0.954524361948956, "grad_norm": 42.69731140136719, "learning_rate": 5.666568646236603e-08, "loss": 22.2073, "step": 10285 }, { "epoch": 0.9546171693735499, "grad_norm": 49.83530807495117, "learning_rate": 5.64355388030291e-08, "loss": 20.1529, "step": 10286 }, { "epoch": 0.9547099767981438, "grad_norm": 54.8348274230957, "learning_rate": 5.6205856814902646e-08, "loss": 21.925, "step": 10287 }, { "epoch": 0.9548027842227378, "grad_norm": 54.0928955078125, "learning_rate": 5.597664051962271e-08, "loss": 23.7781, "step": 10288 }, { "epoch": 0.9548955916473317, "grad_norm": 49.46710968017578, "learning_rate": 5.5747889938779776e-08, "loss": 23.4773, "step": 10289 }, { "epoch": 0.9549883990719258, "grad_norm": 54.93189239501953, "learning_rate": 5.551960509392218e-08, "loss": 23.4605, "step": 10290 }, { "epoch": 0.9550812064965197, "grad_norm": 50.987060546875, "learning_rate": 5.529178600655327e-08, "loss": 24.1397, "step": 10291 }, { "epoch": 0.9551740139211137, "grad_norm": 50.886619567871094, "learning_rate": 5.506443269813366e-08, "loss": 24.7863, "step": 10292 }, { "epoch": 0.9552668213457076, "grad_norm": 47.009735107421875, "learning_rate": 5.4837545190079e-08, "loss": 24.5099, "step": 10293 }, { "epoch": 0.9553596287703017, "grad_norm": 42.668277740478516, "learning_rate": 5.4611123503762186e-08, "loss": 22.5379, "step": 10294 }, { "epoch": 0.9554524361948956, "grad_norm": 53.928592681884766, "learning_rate": 5.4385167660510605e-08, "loss": 22.1978, "step": 10295 }, { "epoch": 0.9555452436194896, "grad_norm": 45.42876434326172, "learning_rate": 5.415967768160946e-08, "loss": 21.6689, "step": 10296 }, { "epoch": 0.9556380510440835, "grad_norm": 40.983253479003906, "learning_rate": 5.3934653588299525e-08, "loss": 21.5734, "step": 10297 }, { "epoch": 0.9557308584686774, "grad_norm": 51.02669906616211, "learning_rate": 5.371009540177607e-08, "loss": 23.053, "step": 10298 }, { "epoch": 0.9558236658932715, "grad_norm": 46.373130798339844, "learning_rate": 5.3486003143193855e-08, "loss": 21.9194, "step": 10299 }, { "epoch": 0.9559164733178654, "grad_norm": 44.38329315185547, "learning_rate": 5.326237683365987e-08, "loss": 23.0729, "step": 10300 }, { "epoch": 0.9560092807424594, "grad_norm": 56.591861724853516, "learning_rate": 5.30392164942406e-08, "loss": 23.0856, "step": 10301 }, { "epoch": 0.9561020881670533, "grad_norm": 50.92586135864258, "learning_rate": 5.281652214595701e-08, "loss": 23.9103, "step": 10302 }, { "epoch": 0.9561948955916474, "grad_norm": 49.58250045776367, "learning_rate": 5.2594293809785645e-08, "loss": 24.4222, "step": 10303 }, { "epoch": 0.9562877030162413, "grad_norm": 47.675315856933594, "learning_rate": 5.237253150666033e-08, "loss": 23.8392, "step": 10304 }, { "epoch": 0.9563805104408353, "grad_norm": 54.293785095214844, "learning_rate": 5.2151235257470455e-08, "loss": 21.7185, "step": 10305 }, { "epoch": 0.9564733178654292, "grad_norm": 51.00064468383789, "learning_rate": 5.1930405083061575e-08, "loss": 21.1775, "step": 10306 }, { "epoch": 0.9565661252900232, "grad_norm": 54.67634582519531, "learning_rate": 5.171004100423482e-08, "loss": 22.5559, "step": 10307 }, { "epoch": 0.9566589327146172, "grad_norm": 59.51408767700195, "learning_rate": 5.149014304174915e-08, "loss": 21.4804, "step": 10308 }, { "epoch": 0.9567517401392112, "grad_norm": 43.88523864746094, "learning_rate": 5.127071121631688e-08, "loss": 23.5461, "step": 10309 }, { "epoch": 0.9568445475638051, "grad_norm": 48.813392639160156, "learning_rate": 5.10517455486087e-08, "loss": 24.3006, "step": 10310 }, { "epoch": 0.956937354988399, "grad_norm": 48.604915618896484, "learning_rate": 5.083324605925144e-08, "loss": 22.0352, "step": 10311 }, { "epoch": 0.957030162412993, "grad_norm": 63.547210693359375, "learning_rate": 5.0615212768825305e-08, "loss": 23.7172, "step": 10312 }, { "epoch": 0.957122969837587, "grad_norm": 53.141212463378906, "learning_rate": 5.039764569787053e-08, "loss": 21.0102, "step": 10313 }, { "epoch": 0.957215777262181, "grad_norm": 50.856231689453125, "learning_rate": 5.018054486687962e-08, "loss": 22.8244, "step": 10314 }, { "epoch": 0.9573085846867749, "grad_norm": 51.374454498291016, "learning_rate": 4.9963910296303984e-08, "loss": 21.2479, "step": 10315 }, { "epoch": 0.9574013921113689, "grad_norm": 52.443443298339844, "learning_rate": 4.9747742006549524e-08, "loss": 22.9918, "step": 10316 }, { "epoch": 0.9574941995359629, "grad_norm": 165.24679565429688, "learning_rate": 4.95320400179794e-08, "loss": 22.6679, "step": 10317 }, { "epoch": 0.9575870069605569, "grad_norm": 64.77871704101562, "learning_rate": 4.931680435091124e-08, "loss": 23.2255, "step": 10318 }, { "epoch": 0.9576798143851508, "grad_norm": 55.43928146362305, "learning_rate": 4.9102035025620497e-08, "loss": 23.1266, "step": 10319 }, { "epoch": 0.9577726218097448, "grad_norm": 44.80368423461914, "learning_rate": 4.8887732062337656e-08, "loss": 22.2404, "step": 10320 }, { "epoch": 0.9578654292343387, "grad_norm": 47.92766571044922, "learning_rate": 4.8673895481249345e-08, "loss": 22.8482, "step": 10321 }, { "epoch": 0.9579582366589328, "grad_norm": 47.38381576538086, "learning_rate": 4.846052530249834e-08, "loss": 22.9536, "step": 10322 }, { "epoch": 0.9580510440835267, "grad_norm": 45.825016021728516, "learning_rate": 4.824762154618412e-08, "loss": 22.1669, "step": 10323 }, { "epoch": 0.9581438515081206, "grad_norm": 46.92314529418945, "learning_rate": 4.80351842323612e-08, "loss": 21.7083, "step": 10324 }, { "epoch": 0.9582366589327146, "grad_norm": 49.535987854003906, "learning_rate": 4.782321338104079e-08, "loss": 22.8883, "step": 10325 }, { "epoch": 0.9583294663573085, "grad_norm": 46.54362106323242, "learning_rate": 4.761170901219025e-08, "loss": 23.0633, "step": 10326 }, { "epoch": 0.9584222737819026, "grad_norm": 52.752925872802734, "learning_rate": 4.740067114573199e-08, "loss": 22.9789, "step": 10327 }, { "epoch": 0.9585150812064965, "grad_norm": 47.22789764404297, "learning_rate": 4.719009980154621e-08, "loss": 23.4474, "step": 10328 }, { "epoch": 0.9586078886310905, "grad_norm": 49.920894622802734, "learning_rate": 4.697999499946704e-08, "loss": 23.5147, "step": 10329 }, { "epoch": 0.9587006960556844, "grad_norm": 52.55079650878906, "learning_rate": 4.6770356759287536e-08, "loss": 22.8558, "step": 10330 }, { "epoch": 0.9587935034802785, "grad_norm": 48.276397705078125, "learning_rate": 4.6561185100753026e-08, "loss": 20.2226, "step": 10331 }, { "epoch": 0.9588863109048724, "grad_norm": 54.03783416748047, "learning_rate": 4.635248004356885e-08, "loss": 21.8874, "step": 10332 }, { "epoch": 0.9589791183294664, "grad_norm": 48.223392486572266, "learning_rate": 4.6144241607392616e-08, "loss": 23.3133, "step": 10333 }, { "epoch": 0.9590719257540603, "grad_norm": 50.19010543823242, "learning_rate": 4.5936469811841967e-08, "loss": 22.8419, "step": 10334 }, { "epoch": 0.9591647331786542, "grad_norm": 50.52358627319336, "learning_rate": 4.5729164676486805e-08, "loss": 22.3696, "step": 10335 }, { "epoch": 0.9592575406032483, "grad_norm": 48.24064636230469, "learning_rate": 4.55223262208554e-08, "loss": 24.433, "step": 10336 }, { "epoch": 0.9593503480278422, "grad_norm": 54.34562301635742, "learning_rate": 4.531595446443104e-08, "loss": 22.9629, "step": 10337 }, { "epoch": 0.9594431554524362, "grad_norm": 56.37398910522461, "learning_rate": 4.5110049426653755e-08, "loss": 22.933, "step": 10338 }, { "epoch": 0.9595359628770301, "grad_norm": 44.48487091064453, "learning_rate": 4.490461112691913e-08, "loss": 25.1124, "step": 10339 }, { "epoch": 0.9596287703016241, "grad_norm": 50.34727096557617, "learning_rate": 4.469963958458001e-08, "loss": 24.2276, "step": 10340 }, { "epoch": 0.9597215777262181, "grad_norm": 40.718177795410156, "learning_rate": 4.449513481894208e-08, "loss": 23.4558, "step": 10341 }, { "epoch": 0.9598143851508121, "grad_norm": 50.615909576416016, "learning_rate": 4.429109684927102e-08, "loss": 21.7649, "step": 10342 }, { "epoch": 0.959907192575406, "grad_norm": 50.952701568603516, "learning_rate": 4.408752569478536e-08, "loss": 22.2507, "step": 10343 }, { "epoch": 0.96, "grad_norm": 53.45765686035156, "learning_rate": 4.388442137466198e-08, "loss": 21.6392, "step": 10344 }, { "epoch": 0.960092807424594, "grad_norm": 44.384254455566406, "learning_rate": 4.368178390803224e-08, "loss": 22.4722, "step": 10345 }, { "epoch": 0.960185614849188, "grad_norm": 47.83024215698242, "learning_rate": 4.347961331398476e-08, "loss": 23.2551, "step": 10346 }, { "epoch": 0.9602784222737819, "grad_norm": 46.2484016418457, "learning_rate": 4.327790961156209e-08, "loss": 22.0173, "step": 10347 }, { "epoch": 0.9603712296983759, "grad_norm": 51.2302360534668, "learning_rate": 4.3076672819765683e-08, "loss": 22.1918, "step": 10348 }, { "epoch": 0.9604640371229698, "grad_norm": 47.74195098876953, "learning_rate": 4.28759029575504e-08, "loss": 22.5393, "step": 10349 }, { "epoch": 0.9605568445475638, "grad_norm": 53.067054748535156, "learning_rate": 4.2675600043829425e-08, "loss": 23.5771, "step": 10350 }, { "epoch": 0.9606496519721578, "grad_norm": 49.86796569824219, "learning_rate": 4.2475764097469895e-08, "loss": 21.7015, "step": 10351 }, { "epoch": 0.9607424593967517, "grad_norm": 47.112674713134766, "learning_rate": 4.22763951372962e-08, "loss": 23.338, "step": 10352 }, { "epoch": 0.9608352668213457, "grad_norm": 44.99763870239258, "learning_rate": 4.207749318208776e-08, "loss": 22.593, "step": 10353 }, { "epoch": 0.9609280742459396, "grad_norm": 59.7681770324707, "learning_rate": 4.1879058250581806e-08, "loss": 21.7886, "step": 10354 }, { "epoch": 0.9610208816705337, "grad_norm": 52.464534759521484, "learning_rate": 4.16810903614695e-08, "loss": 22.241, "step": 10355 }, { "epoch": 0.9611136890951276, "grad_norm": 62.314300537109375, "learning_rate": 4.148358953339926e-08, "loss": 21.4816, "step": 10356 }, { "epoch": 0.9612064965197216, "grad_norm": 46.99819564819336, "learning_rate": 4.128655578497509e-08, "loss": 23.4288, "step": 10357 }, { "epoch": 0.9612993039443155, "grad_norm": 50.81759262084961, "learning_rate": 4.1089989134757144e-08, "loss": 23.9388, "step": 10358 }, { "epoch": 0.9613921113689096, "grad_norm": 57.766597747802734, "learning_rate": 4.089388960126117e-08, "loss": 23.5537, "step": 10359 }, { "epoch": 0.9614849187935035, "grad_norm": 59.98920822143555, "learning_rate": 4.0698257202959613e-08, "loss": 22.7534, "step": 10360 }, { "epoch": 0.9615777262180975, "grad_norm": 54.498226165771484, "learning_rate": 4.05030919582805e-08, "loss": 22.9088, "step": 10361 }, { "epoch": 0.9616705336426914, "grad_norm": 44.43730545043945, "learning_rate": 4.0308393885608034e-08, "loss": 22.9291, "step": 10362 }, { "epoch": 0.9617633410672853, "grad_norm": 59.80250930786133, "learning_rate": 4.011416300328141e-08, "loss": 22.8215, "step": 10363 }, { "epoch": 0.9618561484918794, "grad_norm": 47.60066223144531, "learning_rate": 3.9920399329598236e-08, "loss": 22.5376, "step": 10364 }, { "epoch": 0.9619489559164733, "grad_norm": 50.050331115722656, "learning_rate": 3.97271028828089e-08, "loss": 23.1071, "step": 10365 }, { "epoch": 0.9620417633410673, "grad_norm": 43.05050277709961, "learning_rate": 3.953427368112217e-08, "loss": 23.272, "step": 10366 }, { "epoch": 0.9621345707656612, "grad_norm": 55.510108947753906, "learning_rate": 3.934191174270241e-08, "loss": 23.1383, "step": 10367 }, { "epoch": 0.9622273781902552, "grad_norm": 64.10009765625, "learning_rate": 3.9150017085669566e-08, "loss": 24.0604, "step": 10368 }, { "epoch": 0.9623201856148492, "grad_norm": 58.35082244873047, "learning_rate": 3.895858972809918e-08, "loss": 20.486, "step": 10369 }, { "epoch": 0.9624129930394432, "grad_norm": 62.140628814697266, "learning_rate": 3.8767629688022944e-08, "loss": 23.5387, "step": 10370 }, { "epoch": 0.9625058004640371, "grad_norm": 47.048439025878906, "learning_rate": 3.8577136983429795e-08, "loss": 22.189, "step": 10371 }, { "epoch": 0.962598607888631, "grad_norm": 55.92050552368164, "learning_rate": 3.838711163226316e-08, "loss": 22.8677, "step": 10372 }, { "epoch": 0.9626914153132251, "grad_norm": 56.81600570678711, "learning_rate": 3.8197553652422617e-08, "loss": 22.3434, "step": 10373 }, { "epoch": 0.962784222737819, "grad_norm": 62.490882873535156, "learning_rate": 3.800846306176498e-08, "loss": 24.1939, "step": 10374 }, { "epoch": 0.962877030162413, "grad_norm": 45.433563232421875, "learning_rate": 3.7819839878101006e-08, "loss": 22.2852, "step": 10375 }, { "epoch": 0.9629698375870069, "grad_norm": 57.4256706237793, "learning_rate": 3.7631684119198706e-08, "loss": 22.3784, "step": 10376 }, { "epoch": 0.9630626450116009, "grad_norm": 55.86628723144531, "learning_rate": 3.74439958027828e-08, "loss": 22.7375, "step": 10377 }, { "epoch": 0.9631554524361949, "grad_norm": 60.36445999145508, "learning_rate": 3.7256774946531904e-08, "loss": 23.7341, "step": 10378 }, { "epoch": 0.9632482598607889, "grad_norm": 43.65794372558594, "learning_rate": 3.707002156808304e-08, "loss": 21.0082, "step": 10379 }, { "epoch": 0.9633410672853828, "grad_norm": 46.96131134033203, "learning_rate": 3.688373568502601e-08, "loss": 22.6851, "step": 10380 }, { "epoch": 0.9634338747099768, "grad_norm": 59.20449447631836, "learning_rate": 3.669791731491068e-08, "loss": 23.3464, "step": 10381 }, { "epoch": 0.9635266821345707, "grad_norm": 69.48760223388672, "learning_rate": 3.65125664752386e-08, "loss": 22.6491, "step": 10382 }, { "epoch": 0.9636194895591648, "grad_norm": 48.20665740966797, "learning_rate": 3.63276831834708e-08, "loss": 23.6477, "step": 10383 }, { "epoch": 0.9637122969837587, "grad_norm": 48.83568572998047, "learning_rate": 3.614326745702279e-08, "loss": 22.5594, "step": 10384 }, { "epoch": 0.9638051044083527, "grad_norm": 54.466548919677734, "learning_rate": 3.595931931326513e-08, "loss": 21.7222, "step": 10385 }, { "epoch": 0.9638979118329466, "grad_norm": 58.036190032958984, "learning_rate": 3.577583876952562e-08, "loss": 21.0596, "step": 10386 }, { "epoch": 0.9639907192575407, "grad_norm": 53.89171600341797, "learning_rate": 3.559282584308821e-08, "loss": 22.51, "step": 10387 }, { "epoch": 0.9640835266821346, "grad_norm": 44.87622833251953, "learning_rate": 3.541028055119133e-08, "loss": 21.8679, "step": 10388 }, { "epoch": 0.9641763341067285, "grad_norm": 53.006080627441406, "learning_rate": 3.522820291103068e-08, "loss": 22.7536, "step": 10389 }, { "epoch": 0.9642691415313225, "grad_norm": 54.79231643676758, "learning_rate": 3.504659293975809e-08, "loss": 24.2294, "step": 10390 }, { "epoch": 0.9643619489559164, "grad_norm": 45.50328826904297, "learning_rate": 3.4865450654480434e-08, "loss": 22.4964, "step": 10391 }, { "epoch": 0.9644547563805105, "grad_norm": 42.24722671508789, "learning_rate": 3.468477607226017e-08, "loss": 24.2858, "step": 10392 }, { "epoch": 0.9645475638051044, "grad_norm": 42.40639877319336, "learning_rate": 3.4504569210117576e-08, "loss": 23.2879, "step": 10393 }, { "epoch": 0.9646403712296984, "grad_norm": 116.78781127929688, "learning_rate": 3.4324830085026296e-08, "loss": 20.4833, "step": 10394 }, { "epoch": 0.9647331786542923, "grad_norm": 55.81674575805664, "learning_rate": 3.4145558713918336e-08, "loss": 21.689, "step": 10395 }, { "epoch": 0.9648259860788864, "grad_norm": 49.83292007446289, "learning_rate": 3.396675511368019e-08, "loss": 22.9554, "step": 10396 }, { "epoch": 0.9649187935034803, "grad_norm": 61.62129592895508, "learning_rate": 3.378841930115506e-08, "loss": 22.0906, "step": 10397 }, { "epoch": 0.9650116009280743, "grad_norm": 54.511436462402344, "learning_rate": 3.361055129314117e-08, "loss": 23.6366, "step": 10398 }, { "epoch": 0.9651044083526682, "grad_norm": 65.83631134033203, "learning_rate": 3.3433151106394e-08, "loss": 22.9719, "step": 10399 }, { "epoch": 0.9651972157772621, "grad_norm": 52.12782287597656, "learning_rate": 3.325621875762297e-08, "loss": 23.4538, "step": 10400 }, { "epoch": 0.9652900232018562, "grad_norm": 50.09737777709961, "learning_rate": 3.307975426349585e-08, "loss": 23.7924, "step": 10401 }, { "epoch": 0.9653828306264501, "grad_norm": 54.818416595458984, "learning_rate": 3.2903757640634914e-08, "loss": 24.1675, "step": 10402 }, { "epoch": 0.9654756380510441, "grad_norm": 45.775386810302734, "learning_rate": 3.272822890561855e-08, "loss": 20.8065, "step": 10403 }, { "epoch": 0.965568445475638, "grad_norm": 52.0291862487793, "learning_rate": 3.255316807498077e-08, "loss": 21.6787, "step": 10404 }, { "epoch": 0.965661252900232, "grad_norm": 61.69178009033203, "learning_rate": 3.23785751652117e-08, "loss": 21.634, "step": 10405 }, { "epoch": 0.965754060324826, "grad_norm": 49.642662048339844, "learning_rate": 3.2204450192758195e-08, "loss": 24.6678, "step": 10406 }, { "epoch": 0.96584686774942, "grad_norm": 56.4307975769043, "learning_rate": 3.203079317402213e-08, "loss": 21.1887, "step": 10407 }, { "epoch": 0.9659396751740139, "grad_norm": 56.550209045410156, "learning_rate": 3.1857604125361544e-08, "loss": 21.3479, "step": 10408 }, { "epoch": 0.9660324825986079, "grad_norm": 55.69662857055664, "learning_rate": 3.168488306309003e-08, "loss": 23.2603, "step": 10409 }, { "epoch": 0.9661252900232019, "grad_norm": 60.55289840698242, "learning_rate": 3.151263000347793e-08, "loss": 22.8115, "step": 10410 }, { "epoch": 0.9662180974477959, "grad_norm": 63.93483352661133, "learning_rate": 3.134084496275114e-08, "loss": 23.8174, "step": 10411 }, { "epoch": 0.9663109048723898, "grad_norm": 52.228736877441406, "learning_rate": 3.116952795709116e-08, "loss": 24.583, "step": 10412 }, { "epoch": 0.9664037122969837, "grad_norm": 49.77485275268555, "learning_rate": 3.099867900263565e-08, "loss": 22.9867, "step": 10413 }, { "epoch": 0.9664965197215777, "grad_norm": 59.29950714111328, "learning_rate": 3.082829811547838e-08, "loss": 21.4932, "step": 10414 }, { "epoch": 0.9665893271461717, "grad_norm": 59.55234146118164, "learning_rate": 3.0658385311668204e-08, "loss": 21.1435, "step": 10415 }, { "epoch": 0.9666821345707657, "grad_norm": 42.6882209777832, "learning_rate": 3.048894060721064e-08, "loss": 23.6818, "step": 10416 }, { "epoch": 0.9667749419953596, "grad_norm": 49.298274993896484, "learning_rate": 3.031996401806736e-08, "loss": 21.9196, "step": 10417 }, { "epoch": 0.9668677494199536, "grad_norm": 52.25836181640625, "learning_rate": 3.015145556015508e-08, "loss": 23.5086, "step": 10418 }, { "epoch": 0.9669605568445475, "grad_norm": 46.391868591308594, "learning_rate": 2.998341524934778e-08, "loss": 23.3273, "step": 10419 }, { "epoch": 0.9670533642691416, "grad_norm": 57.34467315673828, "learning_rate": 2.981584310147334e-08, "loss": 21.7875, "step": 10420 }, { "epoch": 0.9671461716937355, "grad_norm": 70.96218872070312, "learning_rate": 2.964873913231692e-08, "loss": 23.2701, "step": 10421 }, { "epoch": 0.9672389791183295, "grad_norm": 66.85875701904297, "learning_rate": 2.948210335761925e-08, "loss": 23.6307, "step": 10422 }, { "epoch": 0.9673317865429234, "grad_norm": 46.40481185913086, "learning_rate": 2.931593579307723e-08, "loss": 22.6133, "step": 10423 }, { "epoch": 0.9674245939675175, "grad_norm": 63.032615661621094, "learning_rate": 2.915023645434334e-08, "loss": 23.3259, "step": 10424 }, { "epoch": 0.9675174013921114, "grad_norm": 51.7944221496582, "learning_rate": 2.8985005357026197e-08, "loss": 22.5062, "step": 10425 }, { "epoch": 0.9676102088167053, "grad_norm": 52.34507369995117, "learning_rate": 2.8820242516690022e-08, "loss": 24.6013, "step": 10426 }, { "epoch": 0.9677030162412993, "grad_norm": 49.9136848449707, "learning_rate": 2.8655947948854624e-08, "loss": 21.8583, "step": 10427 }, { "epoch": 0.9677958236658932, "grad_norm": 53.49595260620117, "learning_rate": 2.8492121668997064e-08, "loss": 24.123, "step": 10428 }, { "epoch": 0.9678886310904873, "grad_norm": 46.53102493286133, "learning_rate": 2.8328763692548334e-08, "loss": 21.7863, "step": 10429 }, { "epoch": 0.9679814385150812, "grad_norm": 46.80758285522461, "learning_rate": 2.8165874034897233e-08, "loss": 23.9423, "step": 10430 }, { "epoch": 0.9680742459396752, "grad_norm": 48.40053176879883, "learning_rate": 2.8003452711387047e-08, "loss": 21.9745, "step": 10431 }, { "epoch": 0.9681670533642691, "grad_norm": 62.6953125, "learning_rate": 2.78414997373172e-08, "loss": 22.8464, "step": 10432 }, { "epoch": 0.9682598607888631, "grad_norm": 64.47484588623047, "learning_rate": 2.7680015127943826e-08, "loss": 21.7251, "step": 10433 }, { "epoch": 0.9683526682134571, "grad_norm": 56.13946533203125, "learning_rate": 2.7518998898478644e-08, "loss": 23.5617, "step": 10434 }, { "epoch": 0.9684454756380511, "grad_norm": 54.66501235961914, "learning_rate": 2.7358451064087854e-08, "loss": 23.4344, "step": 10435 }, { "epoch": 0.968538283062645, "grad_norm": 52.12107849121094, "learning_rate": 2.7198371639895473e-08, "loss": 23.1351, "step": 10436 }, { "epoch": 0.9686310904872389, "grad_norm": 62.3022575378418, "learning_rate": 2.7038760640980544e-08, "loss": 22.3933, "step": 10437 }, { "epoch": 0.968723897911833, "grad_norm": 76.89957427978516, "learning_rate": 2.6879618082377713e-08, "loss": 22.0643, "step": 10438 }, { "epoch": 0.9688167053364269, "grad_norm": 50.66202163696289, "learning_rate": 2.6720943979077764e-08, "loss": 24.9429, "step": 10439 }, { "epoch": 0.9689095127610209, "grad_norm": 46.259788513183594, "learning_rate": 2.6562738346027627e-08, "loss": 22.085, "step": 10440 }, { "epoch": 0.9690023201856148, "grad_norm": 59.212120056152344, "learning_rate": 2.640500119812983e-08, "loss": 22.7942, "step": 10441 }, { "epoch": 0.9690951276102088, "grad_norm": 45.31522750854492, "learning_rate": 2.6247732550243043e-08, "loss": 21.9421, "step": 10442 }, { "epoch": 0.9691879350348028, "grad_norm": 55.829341888427734, "learning_rate": 2.6090932417180968e-08, "loss": 23.0933, "step": 10443 }, { "epoch": 0.9692807424593968, "grad_norm": 50.504764556884766, "learning_rate": 2.593460081371457e-08, "loss": 24.3926, "step": 10444 }, { "epoch": 0.9693735498839907, "grad_norm": 55.03842544555664, "learning_rate": 2.5778737754569293e-08, "loss": 22.8857, "step": 10445 }, { "epoch": 0.9694663573085847, "grad_norm": 63.27805709838867, "learning_rate": 2.562334325442728e-08, "loss": 23.1263, "step": 10446 }, { "epoch": 0.9695591647331786, "grad_norm": 50.625396728515625, "learning_rate": 2.546841732792571e-08, "loss": 24.2157, "step": 10447 }, { "epoch": 0.9696519721577727, "grad_norm": 54.59366989135742, "learning_rate": 2.5313959989659575e-08, "loss": 21.5088, "step": 10448 }, { "epoch": 0.9697447795823666, "grad_norm": 45.683998107910156, "learning_rate": 2.5159971254176686e-08, "loss": 23.7408, "step": 10449 }, { "epoch": 0.9698375870069605, "grad_norm": 43.3000373840332, "learning_rate": 2.500645113598377e-08, "loss": 23.0469, "step": 10450 }, { "epoch": 0.9699303944315545, "grad_norm": 46.039154052734375, "learning_rate": 2.485339964954092e-08, "loss": 21.9208, "step": 10451 }, { "epoch": 0.9700232018561485, "grad_norm": 45.509586334228516, "learning_rate": 2.4700816809266615e-08, "loss": 23.9717, "step": 10452 }, { "epoch": 0.9701160092807425, "grad_norm": 52.698509216308594, "learning_rate": 2.454870262953213e-08, "loss": 22.2708, "step": 10453 }, { "epoch": 0.9702088167053364, "grad_norm": 47.370635986328125, "learning_rate": 2.4397057124667667e-08, "loss": 23.3751, "step": 10454 }, { "epoch": 0.9703016241299304, "grad_norm": 49.69508743286133, "learning_rate": 2.4245880308956805e-08, "loss": 24.0421, "step": 10455 }, { "epoch": 0.9703944315545243, "grad_norm": 44.22993850708008, "learning_rate": 2.409517219664037e-08, "loss": 22.0809, "step": 10456 }, { "epoch": 0.9704872389791184, "grad_norm": 65.1445083618164, "learning_rate": 2.394493280191479e-08, "loss": 24.03, "step": 10457 }, { "epoch": 0.9705800464037123, "grad_norm": 50.243873596191406, "learning_rate": 2.3795162138932072e-08, "loss": 23.0464, "step": 10458 }, { "epoch": 0.9706728538283063, "grad_norm": 51.983985900878906, "learning_rate": 2.3645860221800375e-08, "loss": 21.4634, "step": 10459 }, { "epoch": 0.9707656612529002, "grad_norm": 43.750370025634766, "learning_rate": 2.3497027064583454e-08, "loss": 22.1189, "step": 10460 }, { "epoch": 0.9708584686774941, "grad_norm": 47.86888885498047, "learning_rate": 2.3348662681301205e-08, "loss": 22.8613, "step": 10461 }, { "epoch": 0.9709512761020882, "grad_norm": 48.98831558227539, "learning_rate": 2.320076708592911e-08, "loss": 22.0463, "step": 10462 }, { "epoch": 0.9710440835266821, "grad_norm": 50.191871643066406, "learning_rate": 2.3053340292398808e-08, "loss": 22.2102, "step": 10463 }, { "epoch": 0.9711368909512761, "grad_norm": 47.63894271850586, "learning_rate": 2.290638231459641e-08, "loss": 22.6456, "step": 10464 }, { "epoch": 0.97122969837587, "grad_norm": 46.225303649902344, "learning_rate": 2.27598931663664e-08, "loss": 23.2911, "step": 10465 }, { "epoch": 0.9713225058004641, "grad_norm": 53.67162322998047, "learning_rate": 2.261387286150718e-08, "loss": 22.8734, "step": 10466 }, { "epoch": 0.971415313225058, "grad_norm": 49.998626708984375, "learning_rate": 2.2468321413772752e-08, "loss": 23.2149, "step": 10467 }, { "epoch": 0.971508120649652, "grad_norm": 53.686832427978516, "learning_rate": 2.2323238836874928e-08, "loss": 21.6163, "step": 10468 }, { "epoch": 0.9716009280742459, "grad_norm": 50.048336029052734, "learning_rate": 2.2178625144479438e-08, "loss": 22.9634, "step": 10469 }, { "epoch": 0.9716937354988399, "grad_norm": 49.0636100769043, "learning_rate": 2.2034480350208166e-08, "loss": 22.8302, "step": 10470 }, { "epoch": 0.9717865429234339, "grad_norm": 50.92362594604492, "learning_rate": 2.189080446763969e-08, "loss": 23.3055, "step": 10471 }, { "epoch": 0.9718793503480279, "grad_norm": 51.08971405029297, "learning_rate": 2.1747597510308195e-08, "loss": 23.2268, "step": 10472 }, { "epoch": 0.9719721577726218, "grad_norm": 56.83230972290039, "learning_rate": 2.160485949170288e-08, "loss": 22.479, "step": 10473 }, { "epoch": 0.9720649651972157, "grad_norm": 68.63297271728516, "learning_rate": 2.146259042526966e-08, "loss": 24.4904, "step": 10474 }, { "epoch": 0.9721577726218097, "grad_norm": 55.86235427856445, "learning_rate": 2.1320790324409478e-08, "loss": 23.5619, "step": 10475 }, { "epoch": 0.9722505800464037, "grad_norm": 52.5087776184082, "learning_rate": 2.1179459202479436e-08, "loss": 23.1879, "step": 10476 }, { "epoch": 0.9723433874709977, "grad_norm": 55.62687683105469, "learning_rate": 2.1038597072793877e-08, "loss": 22.7571, "step": 10477 }, { "epoch": 0.9724361948955916, "grad_norm": 52.57883834838867, "learning_rate": 2.0898203948619966e-08, "loss": 22.257, "step": 10478 }, { "epoch": 0.9725290023201856, "grad_norm": 54.21349334716797, "learning_rate": 2.0758279843183794e-08, "loss": 24.8425, "step": 10479 }, { "epoch": 0.9726218097447796, "grad_norm": 48.22308349609375, "learning_rate": 2.0618824769664813e-08, "loss": 23.6076, "step": 10480 }, { "epoch": 0.9727146171693736, "grad_norm": 57.66415023803711, "learning_rate": 2.047983874119974e-08, "loss": 24.0279, "step": 10481 }, { "epoch": 0.9728074245939675, "grad_norm": 85.79920959472656, "learning_rate": 2.0341321770880327e-08, "loss": 24.0265, "step": 10482 }, { "epoch": 0.9729002320185615, "grad_norm": 62.738651275634766, "learning_rate": 2.020327387175558e-08, "loss": 23.3587, "step": 10483 }, { "epoch": 0.9729930394431554, "grad_norm": 57.137176513671875, "learning_rate": 2.0065695056827872e-08, "loss": 23.46, "step": 10484 }, { "epoch": 0.9730858468677495, "grad_norm": 53.96316146850586, "learning_rate": 1.9928585339057395e-08, "loss": 21.5346, "step": 10485 }, { "epoch": 0.9731786542923434, "grad_norm": 49.42522048950195, "learning_rate": 1.979194473136048e-08, "loss": 23.2378, "step": 10486 }, { "epoch": 0.9732714617169373, "grad_norm": 57.00847625732422, "learning_rate": 1.9655773246606834e-08, "loss": 21.4606, "step": 10487 }, { "epoch": 0.9733642691415313, "grad_norm": 51.600669860839844, "learning_rate": 1.9520070897623976e-08, "loss": 22.7295, "step": 10488 }, { "epoch": 0.9734570765661253, "grad_norm": 59.44586944580078, "learning_rate": 1.9384837697195015e-08, "loss": 23.1288, "step": 10489 }, { "epoch": 0.9735498839907193, "grad_norm": 48.05786895751953, "learning_rate": 1.9250073658058643e-08, "loss": 22.4131, "step": 10490 }, { "epoch": 0.9736426914153132, "grad_norm": 46.81379699707031, "learning_rate": 1.91157787929086e-08, "loss": 23.1113, "step": 10491 }, { "epoch": 0.9737354988399072, "grad_norm": 47.32994079589844, "learning_rate": 1.8981953114395878e-08, "loss": 22.6856, "step": 10492 }, { "epoch": 0.9738283062645011, "grad_norm": 56.19862747192383, "learning_rate": 1.884859663512595e-08, "loss": 25.123, "step": 10493 }, { "epoch": 0.9739211136890952, "grad_norm": 56.897037506103516, "learning_rate": 1.8715709367660984e-08, "loss": 23.7046, "step": 10494 }, { "epoch": 0.9740139211136891, "grad_norm": 45.75174331665039, "learning_rate": 1.858329132451875e-08, "loss": 23.5187, "step": 10495 }, { "epoch": 0.9741067285382831, "grad_norm": 108.87071990966797, "learning_rate": 1.8451342518172046e-08, "loss": 23.1248, "step": 10496 }, { "epoch": 0.974199535962877, "grad_norm": 69.94288635253906, "learning_rate": 1.8319862961050928e-08, "loss": 23.078, "step": 10497 }, { "epoch": 0.974292343387471, "grad_norm": 65.60430145263672, "learning_rate": 1.8188852665539936e-08, "loss": 22.4962, "step": 10498 }, { "epoch": 0.974385150812065, "grad_norm": 60.544639587402344, "learning_rate": 1.8058311643979753e-08, "loss": 22.3296, "step": 10499 }, { "epoch": 0.974477958236659, "grad_norm": 52.624263763427734, "learning_rate": 1.792823990866721e-08, "loss": 22.1705, "step": 10500 }, { "epoch": 0.9745707656612529, "grad_norm": 54.21009063720703, "learning_rate": 1.7798637471855284e-08, "loss": 22.9765, "step": 10501 }, { "epoch": 0.9746635730858468, "grad_norm": 64.07402038574219, "learning_rate": 1.766950434575143e-08, "loss": 22.8409, "step": 10502 }, { "epoch": 0.9747563805104409, "grad_norm": 64.79896545410156, "learning_rate": 1.7540840542520364e-08, "loss": 21.7535, "step": 10503 }, { "epoch": 0.9748491879350348, "grad_norm": 66.48727416992188, "learning_rate": 1.741264607428128e-08, "loss": 23.743, "step": 10504 }, { "epoch": 0.9749419953596288, "grad_norm": 50.14303207397461, "learning_rate": 1.728492095310952e-08, "loss": 22.0917, "step": 10505 }, { "epoch": 0.9750348027842227, "grad_norm": 55.82547378540039, "learning_rate": 1.715766519103712e-08, "loss": 22.7452, "step": 10506 }, { "epoch": 0.9751276102088167, "grad_norm": 71.37213134765625, "learning_rate": 1.7030878800051166e-08, "loss": 23.4629, "step": 10507 }, { "epoch": 0.9752204176334107, "grad_norm": 70.9920654296875, "learning_rate": 1.6904561792094877e-08, "loss": 21.6137, "step": 10508 }, { "epoch": 0.9753132250580047, "grad_norm": 57.54529571533203, "learning_rate": 1.6778714179065957e-08, "loss": 23.25, "step": 10509 }, { "epoch": 0.9754060324825986, "grad_norm": 48.68192672729492, "learning_rate": 1.665333597281993e-08, "loss": 21.7096, "step": 10510 }, { "epoch": 0.9754988399071925, "grad_norm": 50.40755844116211, "learning_rate": 1.6528427185166784e-08, "loss": 22.6966, "step": 10511 }, { "epoch": 0.9755916473317865, "grad_norm": 47.51387405395508, "learning_rate": 1.640398782787267e-08, "loss": 22.4383, "step": 10512 }, { "epoch": 0.9756844547563805, "grad_norm": 62.613380432128906, "learning_rate": 1.6280017912658763e-08, "loss": 21.7302, "step": 10513 }, { "epoch": 0.9757772621809745, "grad_norm": 48.06116485595703, "learning_rate": 1.6156517451204057e-08, "loss": 22.027, "step": 10514 }, { "epoch": 0.9758700696055684, "grad_norm": 56.23931884765625, "learning_rate": 1.6033486455140913e-08, "loss": 23.4836, "step": 10515 }, { "epoch": 0.9759628770301624, "grad_norm": 49.71546936035156, "learning_rate": 1.5910924936058947e-08, "loss": 22.6077, "step": 10516 }, { "epoch": 0.9760556844547564, "grad_norm": 53.004844665527344, "learning_rate": 1.5788832905502817e-08, "loss": 21.8733, "step": 10517 }, { "epoch": 0.9761484918793504, "grad_norm": 81.58363342285156, "learning_rate": 1.5667210374973874e-08, "loss": 23.2866, "step": 10518 }, { "epoch": 0.9762412993039443, "grad_norm": 62.874542236328125, "learning_rate": 1.5546057355927957e-08, "loss": 24.6059, "step": 10519 }, { "epoch": 0.9763341067285383, "grad_norm": 46.65214157104492, "learning_rate": 1.5425373859777604e-08, "loss": 20.7858, "step": 10520 }, { "epoch": 0.9764269141531322, "grad_norm": 59.38994216918945, "learning_rate": 1.5305159897891497e-08, "loss": 23.3509, "step": 10521 }, { "epoch": 0.9765197215777263, "grad_norm": 49.93226623535156, "learning_rate": 1.51854154815928e-08, "loss": 24.9845, "step": 10522 }, { "epoch": 0.9766125290023202, "grad_norm": 55.10511779785156, "learning_rate": 1.5066140622161385e-08, "loss": 23.1615, "step": 10523 }, { "epoch": 0.9767053364269142, "grad_norm": 50.266422271728516, "learning_rate": 1.49473353308327e-08, "loss": 23.9256, "step": 10524 }, { "epoch": 0.9767981438515081, "grad_norm": 50.49375915527344, "learning_rate": 1.4828999618797246e-08, "loss": 22.0892, "step": 10525 }, { "epoch": 0.976890951276102, "grad_norm": 57.188636779785156, "learning_rate": 1.4711133497203322e-08, "loss": 24.5295, "step": 10526 }, { "epoch": 0.9769837587006961, "grad_norm": 52.0247917175293, "learning_rate": 1.4593736977152051e-08, "loss": 25.3814, "step": 10527 }, { "epoch": 0.97707656612529, "grad_norm": 49.02528381347656, "learning_rate": 1.4476810069702917e-08, "loss": 20.8828, "step": 10528 }, { "epoch": 0.977169373549884, "grad_norm": 44.2208366394043, "learning_rate": 1.4360352785869892e-08, "loss": 21.2043, "step": 10529 }, { "epoch": 0.9772621809744779, "grad_norm": 51.69071578979492, "learning_rate": 1.4244365136623085e-08, "loss": 23.8803, "step": 10530 }, { "epoch": 0.977354988399072, "grad_norm": 48.99146270751953, "learning_rate": 1.4128847132887647e-08, "loss": 23.4096, "step": 10531 }, { "epoch": 0.9774477958236659, "grad_norm": 59.616981506347656, "learning_rate": 1.4013798785545984e-08, "loss": 25.3279, "step": 10532 }, { "epoch": 0.9775406032482599, "grad_norm": 51.17250442504883, "learning_rate": 1.389922010543443e-08, "loss": 24.6782, "step": 10533 }, { "epoch": 0.9776334106728538, "grad_norm": 60.83012771606445, "learning_rate": 1.378511110334657e-08, "loss": 22.6338, "step": 10534 }, { "epoch": 0.9777262180974478, "grad_norm": 55.34008026123047, "learning_rate": 1.3671471790031033e-08, "loss": 22.672, "step": 10535 }, { "epoch": 0.9778190255220418, "grad_norm": 51.27705001831055, "learning_rate": 1.3558302176192584e-08, "loss": 22.2303, "step": 10536 }, { "epoch": 0.9779118329466358, "grad_norm": 59.505714416503906, "learning_rate": 1.3445602272491032e-08, "loss": 21.8542, "step": 10537 }, { "epoch": 0.9780046403712297, "grad_norm": 64.54635620117188, "learning_rate": 1.3333372089542884e-08, "loss": 22.9325, "step": 10538 }, { "epoch": 0.9780974477958236, "grad_norm": 55.930091857910156, "learning_rate": 1.3221611637919684e-08, "loss": 22.8793, "step": 10539 }, { "epoch": 0.9781902552204176, "grad_norm": 48.33604431152344, "learning_rate": 1.3110320928149123e-08, "loss": 24.4336, "step": 10540 }, { "epoch": 0.9782830626450116, "grad_norm": 46.9769287109375, "learning_rate": 1.2999499970713925e-08, "loss": 22.3242, "step": 10541 }, { "epoch": 0.9783758700696056, "grad_norm": 55.912532806396484, "learning_rate": 1.2889148776054627e-08, "loss": 22.9279, "step": 10542 }, { "epoch": 0.9784686774941995, "grad_norm": 57.259429931640625, "learning_rate": 1.2779267354564028e-08, "loss": 21.7479, "step": 10543 }, { "epoch": 0.9785614849187935, "grad_norm": 71.0410385131836, "learning_rate": 1.2669855716594403e-08, "loss": 22.5447, "step": 10544 }, { "epoch": 0.9786542923433875, "grad_norm": 55.178466796875, "learning_rate": 1.2560913872450842e-08, "loss": 22.3888, "step": 10545 }, { "epoch": 0.9787470997679815, "grad_norm": 50.6383171081543, "learning_rate": 1.2452441832396246e-08, "loss": 22.6329, "step": 10546 }, { "epoch": 0.9788399071925754, "grad_norm": 62.60078811645508, "learning_rate": 1.2344439606647995e-08, "loss": 24.258, "step": 10547 }, { "epoch": 0.9789327146171694, "grad_norm": 51.61668395996094, "learning_rate": 1.2236907205379623e-08, "loss": 21.8765, "step": 10548 }, { "epoch": 0.9790255220417633, "grad_norm": 62.54774475097656, "learning_rate": 1.21298446387208e-08, "loss": 21.9829, "step": 10549 }, { "epoch": 0.9791183294663574, "grad_norm": 52.15629959106445, "learning_rate": 1.2023251916755685e-08, "loss": 24.3424, "step": 10550 }, { "epoch": 0.9792111368909513, "grad_norm": 69.58303833007812, "learning_rate": 1.1917129049525689e-08, "loss": 23.1961, "step": 10551 }, { "epoch": 0.9793039443155452, "grad_norm": 67.21236419677734, "learning_rate": 1.1811476047027814e-08, "loss": 22.4961, "step": 10552 }, { "epoch": 0.9793967517401392, "grad_norm": 47.36763000488281, "learning_rate": 1.1706292919212992e-08, "loss": 23.4924, "step": 10553 }, { "epoch": 0.9794895591647331, "grad_norm": 52.519813537597656, "learning_rate": 1.160157967598996e-08, "loss": 22.5103, "step": 10554 }, { "epoch": 0.9795823665893272, "grad_norm": 58.54918670654297, "learning_rate": 1.1497336327222497e-08, "loss": 22.1972, "step": 10555 }, { "epoch": 0.9796751740139211, "grad_norm": 58.67848587036133, "learning_rate": 1.139356288272997e-08, "loss": 23.2711, "step": 10556 }, { "epoch": 0.9797679814385151, "grad_norm": 44.80353546142578, "learning_rate": 1.1290259352287892e-08, "loss": 23.9762, "step": 10557 }, { "epoch": 0.979860788863109, "grad_norm": 55.6153678894043, "learning_rate": 1.1187425745626256e-08, "loss": 22.7795, "step": 10558 }, { "epoch": 0.9799535962877031, "grad_norm": 55.13979721069336, "learning_rate": 1.1085062072432872e-08, "loss": 23.785, "step": 10559 }, { "epoch": 0.980046403712297, "grad_norm": 57.12319564819336, "learning_rate": 1.0983168342348915e-08, "loss": 23.6291, "step": 10560 }, { "epoch": 0.980139211136891, "grad_norm": 48.92540740966797, "learning_rate": 1.0881744564973928e-08, "loss": 23.4017, "step": 10561 }, { "epoch": 0.9802320185614849, "grad_norm": 57.99217987060547, "learning_rate": 1.0780790749860826e-08, "loss": 21.4022, "step": 10562 }, { "epoch": 0.9803248259860788, "grad_norm": 60.29612350463867, "learning_rate": 1.0680306906519223e-08, "loss": 23.2362, "step": 10563 }, { "epoch": 0.9804176334106729, "grad_norm": 59.24973678588867, "learning_rate": 1.0580293044414325e-08, "loss": 22.449, "step": 10564 }, { "epoch": 0.9805104408352668, "grad_norm": 49.16233444213867, "learning_rate": 1.0480749172968041e-08, "loss": 22.1465, "step": 10565 }, { "epoch": 0.9806032482598608, "grad_norm": 58.72122573852539, "learning_rate": 1.038167530155676e-08, "loss": 24.1651, "step": 10566 }, { "epoch": 0.9806960556844547, "grad_norm": 59.86759567260742, "learning_rate": 1.028307143951246e-08, "loss": 23.146, "step": 10567 }, { "epoch": 0.9807888631090487, "grad_norm": 60.12154769897461, "learning_rate": 1.0184937596123823e-08, "loss": 22.2011, "step": 10568 }, { "epoch": 0.9808816705336427, "grad_norm": 57.21721649169922, "learning_rate": 1.0087273780634566e-08, "loss": 20.5568, "step": 10569 }, { "epoch": 0.9809744779582367, "grad_norm": 60.88856887817383, "learning_rate": 9.99008000224455e-09, "loss": 24.0349, "step": 10570 }, { "epoch": 0.9810672853828306, "grad_norm": 47.64347839355469, "learning_rate": 9.893356270109234e-09, "loss": 23.4392, "step": 10571 }, { "epoch": 0.9811600928074246, "grad_norm": 47.57548141479492, "learning_rate": 9.797102593339659e-09, "loss": 23.27, "step": 10572 }, { "epoch": 0.9812529002320186, "grad_norm": 52.526241302490234, "learning_rate": 9.701318981003016e-09, "loss": 21.7256, "step": 10573 }, { "epoch": 0.9813457076566126, "grad_norm": 226.68341064453125, "learning_rate": 9.60600544212098e-09, "loss": 22.1097, "step": 10574 }, { "epoch": 0.9814385150812065, "grad_norm": 59.752037048339844, "learning_rate": 9.511161985673034e-09, "loss": 23.3294, "step": 10575 }, { "epoch": 0.9815313225058004, "grad_norm": 59.95954132080078, "learning_rate": 9.416788620592032e-09, "loss": 23.15, "step": 10576 }, { "epoch": 0.9816241299303944, "grad_norm": 43.32624816894531, "learning_rate": 9.322885355768641e-09, "loss": 20.751, "step": 10577 }, { "epoch": 0.9817169373549884, "grad_norm": 52.7512092590332, "learning_rate": 9.229452200048006e-09, "loss": 23.9202, "step": 10578 }, { "epoch": 0.9818097447795824, "grad_norm": 55.2501106262207, "learning_rate": 9.136489162230866e-09, "loss": 24.0532, "step": 10579 }, { "epoch": 0.9819025522041763, "grad_norm": 53.28773880004883, "learning_rate": 9.043996251074661e-09, "loss": 23.8667, "step": 10580 }, { "epoch": 0.9819953596287703, "grad_norm": 48.51100540161133, "learning_rate": 8.951973475291865e-09, "loss": 21.9301, "step": 10581 }, { "epoch": 0.9820881670533642, "grad_norm": 49.558998107910156, "learning_rate": 8.860420843550543e-09, "loss": 21.671, "step": 10582 }, { "epoch": 0.9821809744779583, "grad_norm": 60.64822006225586, "learning_rate": 8.769338364474911e-09, "loss": 22.7742, "step": 10583 }, { "epoch": 0.9822737819025522, "grad_norm": 58.28181076049805, "learning_rate": 8.678726046644215e-09, "loss": 21.2357, "step": 10584 }, { "epoch": 0.9823665893271462, "grad_norm": 52.87981414794922, "learning_rate": 8.58858389859496e-09, "loss": 23.0227, "step": 10585 }, { "epoch": 0.9824593967517401, "grad_norm": 46.31877899169922, "learning_rate": 8.498911928817577e-09, "loss": 21.2232, "step": 10586 }, { "epoch": 0.9825522041763342, "grad_norm": 57.44832229614258, "learning_rate": 8.409710145759197e-09, "loss": 22.4996, "step": 10587 }, { "epoch": 0.9826450116009281, "grad_norm": 49.9498176574707, "learning_rate": 8.32097855782199e-09, "loss": 23.011, "step": 10588 }, { "epoch": 0.982737819025522, "grad_norm": 56.15985107421875, "learning_rate": 8.232717173364269e-09, "loss": 21.9581, "step": 10589 }, { "epoch": 0.982830626450116, "grad_norm": 46.66435241699219, "learning_rate": 8.144926000701048e-09, "loss": 22.9284, "step": 10590 }, { "epoch": 0.9829234338747099, "grad_norm": 51.009700775146484, "learning_rate": 8.057605048100715e-09, "loss": 22.7117, "step": 10591 }, { "epoch": 0.983016241299304, "grad_norm": 46.37553787231445, "learning_rate": 7.970754323789465e-09, "loss": 22.8244, "step": 10592 }, { "epoch": 0.9831090487238979, "grad_norm": 63.364532470703125, "learning_rate": 7.884373835947979e-09, "loss": 23.7228, "step": 10593 }, { "epoch": 0.9832018561484919, "grad_norm": 48.63108825683594, "learning_rate": 7.798463592713634e-09, "loss": 23.1777, "step": 10594 }, { "epoch": 0.9832946635730858, "grad_norm": 52.03046798706055, "learning_rate": 7.713023602178293e-09, "loss": 24.3673, "step": 10595 }, { "epoch": 0.9833874709976799, "grad_norm": 47.551334381103516, "learning_rate": 7.628053872390517e-09, "loss": 23.0771, "step": 10596 }, { "epoch": 0.9834802784222738, "grad_norm": 53.24045944213867, "learning_rate": 7.543554411354458e-09, "loss": 23.6013, "step": 10597 }, { "epoch": 0.9835730858468678, "grad_norm": 45.3443489074707, "learning_rate": 7.459525227029307e-09, "loss": 23.4381, "step": 10598 }, { "epoch": 0.9836658932714617, "grad_norm": 56.123069763183594, "learning_rate": 7.3759663273309526e-09, "loss": 22.956, "step": 10599 }, { "epoch": 0.9837587006960556, "grad_norm": 44.76820373535156, "learning_rate": 7.2928777201297655e-09, "loss": 21.0333, "step": 10600 }, { "epoch": 0.9838515081206497, "grad_norm": 49.82082748413086, "learning_rate": 7.210259413252818e-09, "loss": 24.3866, "step": 10601 }, { "epoch": 0.9839443155452436, "grad_norm": 53.65726852416992, "learning_rate": 7.128111414482775e-09, "loss": 22.0896, "step": 10602 }, { "epoch": 0.9840371229698376, "grad_norm": 60.36638641357422, "learning_rate": 7.046433731557334e-09, "loss": 20.9803, "step": 10603 }, { "epoch": 0.9841299303944315, "grad_norm": 54.27272033691406, "learning_rate": 6.965226372170897e-09, "loss": 22.4544, "step": 10604 }, { "epoch": 0.9842227378190255, "grad_norm": 54.1738395690918, "learning_rate": 6.884489343972345e-09, "loss": 23.8379, "step": 10605 }, { "epoch": 0.9843155452436195, "grad_norm": 57.300201416015625, "learning_rate": 6.804222654567261e-09, "loss": 21.6025, "step": 10606 }, { "epoch": 0.9844083526682135, "grad_norm": 55.65424346923828, "learning_rate": 6.724426311516263e-09, "loss": 21.9142, "step": 10607 }, { "epoch": 0.9845011600928074, "grad_norm": 51.38932418823242, "learning_rate": 6.645100322336118e-09, "loss": 21.8665, "step": 10608 }, { "epoch": 0.9845939675174014, "grad_norm": 54.10165786743164, "learning_rate": 6.566244694499735e-09, "loss": 23.5556, "step": 10609 }, { "epoch": 0.9846867749419954, "grad_norm": 52.2014045715332, "learning_rate": 6.487859435433952e-09, "loss": 20.7403, "step": 10610 }, { "epoch": 0.9847795823665894, "grad_norm": 58.24095916748047, "learning_rate": 6.4099445525234175e-09, "loss": 23.3423, "step": 10611 }, { "epoch": 0.9848723897911833, "grad_norm": 58.60952377319336, "learning_rate": 6.33250005310726e-09, "loss": 23.3923, "step": 10612 }, { "epoch": 0.9849651972157772, "grad_norm": 53.57300567626953, "learning_rate": 6.255525944480201e-09, "loss": 21.9547, "step": 10613 }, { "epoch": 0.9850580046403712, "grad_norm": 71.63517761230469, "learning_rate": 6.179022233893106e-09, "loss": 23.9979, "step": 10614 }, { "epoch": 0.9851508120649652, "grad_norm": 50.72233581542969, "learning_rate": 6.102988928552988e-09, "loss": 22.0411, "step": 10615 }, { "epoch": 0.9852436194895592, "grad_norm": 54.64097595214844, "learning_rate": 6.027426035621342e-09, "loss": 24.457, "step": 10616 }, { "epoch": 0.9853364269141531, "grad_norm": 62.0626335144043, "learning_rate": 5.9523335622169164e-09, "loss": 21.259, "step": 10617 }, { "epoch": 0.9854292343387471, "grad_norm": 70.41026306152344, "learning_rate": 5.877711515411833e-09, "loss": 22.8559, "step": 10618 }, { "epoch": 0.985522041763341, "grad_norm": 50.26134490966797, "learning_rate": 5.803559902236022e-09, "loss": 23.7954, "step": 10619 }, { "epoch": 0.9856148491879351, "grad_norm": 73.20579528808594, "learning_rate": 5.7298787296750094e-09, "loss": 23.0544, "step": 10620 }, { "epoch": 0.985707656612529, "grad_norm": 61.664710998535156, "learning_rate": 5.6566680046682424e-09, "loss": 22.6103, "step": 10621 }, { "epoch": 0.985800464037123, "grad_norm": 51.2044792175293, "learning_rate": 5.583927734112981e-09, "loss": 23.7301, "step": 10622 }, { "epoch": 0.9858932714617169, "grad_norm": 47.66179656982422, "learning_rate": 5.511657924859859e-09, "loss": 20.9301, "step": 10623 }, { "epoch": 0.985986078886311, "grad_norm": 57.31880187988281, "learning_rate": 5.439858583718427e-09, "loss": 21.5834, "step": 10624 }, { "epoch": 0.9860788863109049, "grad_norm": 62.91832733154297, "learning_rate": 5.368529717449944e-09, "loss": 22.5736, "step": 10625 }, { "epoch": 0.9861716937354988, "grad_norm": 58.50810241699219, "learning_rate": 5.297671332775145e-09, "loss": 21.5462, "step": 10626 }, { "epoch": 0.9862645011600928, "grad_norm": 64.21490478515625, "learning_rate": 5.227283436367581e-09, "loss": 22.4278, "step": 10627 }, { "epoch": 0.9863573085846867, "grad_norm": 54.657470703125, "learning_rate": 5.157366034858058e-09, "loss": 22.4467, "step": 10628 }, { "epoch": 0.9864501160092808, "grad_norm": 46.914608001708984, "learning_rate": 5.08791913483242e-09, "loss": 23.3481, "step": 10629 }, { "epoch": 0.9865429234338747, "grad_norm": 41.34099578857422, "learning_rate": 5.0189427428326554e-09, "loss": 22.3891, "step": 10630 }, { "epoch": 0.9866357308584687, "grad_norm": 57.36687088012695, "learning_rate": 4.95043686535579e-09, "loss": 21.062, "step": 10631 }, { "epoch": 0.9867285382830626, "grad_norm": 86.3873519897461, "learning_rate": 4.88240150885555e-09, "loss": 23.141, "step": 10632 }, { "epoch": 0.9868213457076566, "grad_norm": 55.98324203491211, "learning_rate": 4.814836679740143e-09, "loss": 21.1331, "step": 10633 }, { "epoch": 0.9869141531322506, "grad_norm": 48.44013214111328, "learning_rate": 4.747742384373921e-09, "loss": 22.2368, "step": 10634 }, { "epoch": 0.9870069605568446, "grad_norm": 55.32573318481445, "learning_rate": 4.681118629077386e-09, "loss": 23.0792, "step": 10635 }, { "epoch": 0.9870997679814385, "grad_norm": 58.77842330932617, "learning_rate": 4.614965420126072e-09, "loss": 23.451, "step": 10636 }, { "epoch": 0.9871925754060324, "grad_norm": 49.92677688598633, "learning_rate": 4.549282763751106e-09, "loss": 23.2395, "step": 10637 }, { "epoch": 0.9872853828306265, "grad_norm": 49.231346130371094, "learning_rate": 4.484070666140872e-09, "loss": 21.655, "step": 10638 }, { "epoch": 0.9873781902552204, "grad_norm": 43.0927619934082, "learning_rate": 4.419329133437122e-09, "loss": 20.4225, "step": 10639 }, { "epoch": 0.9874709976798144, "grad_norm": 50.88467025756836, "learning_rate": 4.355058171738313e-09, "loss": 22.8217, "step": 10640 }, { "epoch": 0.9875638051044083, "grad_norm": 44.82062911987305, "learning_rate": 4.291257787099046e-09, "loss": 23.0135, "step": 10641 }, { "epoch": 0.9876566125290023, "grad_norm": 55.316341400146484, "learning_rate": 4.227927985528957e-09, "loss": 21.6084, "step": 10642 }, { "epoch": 0.9877494199535963, "grad_norm": 41.635501861572266, "learning_rate": 4.165068772993275e-09, "loss": 21.9289, "step": 10643 }, { "epoch": 0.9878422273781903, "grad_norm": 46.20928192138672, "learning_rate": 4.1026801554139296e-09, "loss": 23.1508, "step": 10644 }, { "epoch": 0.9879350348027842, "grad_norm": 49.88068771362305, "learning_rate": 4.040762138667331e-09, "loss": 23.7686, "step": 10645 }, { "epoch": 0.9880278422273782, "grad_norm": 50.29572296142578, "learning_rate": 3.979314728585482e-09, "loss": 23.4861, "step": 10646 }, { "epoch": 0.9881206496519721, "grad_norm": 48.96695327758789, "learning_rate": 3.918337930957638e-09, "loss": 24.3891, "step": 10647 }, { "epoch": 0.9882134570765662, "grad_norm": 53.38557434082031, "learning_rate": 3.857831751526986e-09, "loss": 22.3838, "step": 10648 }, { "epoch": 0.9883062645011601, "grad_norm": 41.63921356201172, "learning_rate": 3.797796195992853e-09, "loss": 22.318, "step": 10649 }, { "epoch": 0.988399071925754, "grad_norm": 45.42374038696289, "learning_rate": 3.738231270010717e-09, "loss": 23.2367, "step": 10650 }, { "epoch": 0.988491879350348, "grad_norm": 49.84400177001953, "learning_rate": 3.6791369791921994e-09, "loss": 23.0006, "step": 10651 }, { "epoch": 0.988584686774942, "grad_norm": 55.9603385925293, "learning_rate": 3.6205133291022932e-09, "loss": 22.5511, "step": 10652 }, { "epoch": 0.988677494199536, "grad_norm": 49.15568542480469, "learning_rate": 3.562360325264358e-09, "loss": 23.5057, "step": 10653 }, { "epoch": 0.9887703016241299, "grad_norm": 51.60378646850586, "learning_rate": 3.504677973156234e-09, "loss": 21.0501, "step": 10654 }, { "epoch": 0.9888631090487239, "grad_norm": 50.55988311767578, "learning_rate": 3.4474662782107983e-09, "loss": 21.6655, "step": 10655 }, { "epoch": 0.9889559164733178, "grad_norm": 61.26314926147461, "learning_rate": 3.3907252458176277e-09, "loss": 23.0331, "step": 10656 }, { "epoch": 0.9890487238979119, "grad_norm": 67.1639404296875, "learning_rate": 3.3344548813213363e-09, "loss": 24.0744, "step": 10657 }, { "epoch": 0.9891415313225058, "grad_norm": 50.15852737426758, "learning_rate": 3.2786551900226836e-09, "loss": 25.927, "step": 10658 }, { "epoch": 0.9892343387470998, "grad_norm": 51.55292892456055, "learning_rate": 3.223326177178021e-09, "loss": 23.4457, "step": 10659 }, { "epoch": 0.9893271461716937, "grad_norm": 52.94868087768555, "learning_rate": 3.168467847998735e-09, "loss": 23.1332, "step": 10660 }, { "epoch": 0.9894199535962876, "grad_norm": 51.29691696166992, "learning_rate": 3.1140802076529143e-09, "loss": 23.7476, "step": 10661 }, { "epoch": 0.9895127610208817, "grad_norm": 50.64518737792969, "learning_rate": 3.060163261263682e-09, "loss": 23.421, "step": 10662 }, { "epoch": 0.9896055684454756, "grad_norm": 57.645511627197266, "learning_rate": 3.0067170139091996e-09, "loss": 22.4782, "step": 10663 }, { "epoch": 0.9896983758700696, "grad_norm": 62.05097961425781, "learning_rate": 2.9537414706243273e-09, "loss": 24.1955, "step": 10664 }, { "epoch": 0.9897911832946635, "grad_norm": 53.2815055847168, "learning_rate": 2.9012366363995183e-09, "loss": 21.8125, "step": 10665 }, { "epoch": 0.9898839907192576, "grad_norm": 54.925933837890625, "learning_rate": 2.8492025161802604e-09, "loss": 24.117, "step": 10666 }, { "epoch": 0.9899767981438515, "grad_norm": 53.83125305175781, "learning_rate": 2.7976391148681893e-09, "loss": 23.6342, "step": 10667 }, { "epoch": 0.9900696055684455, "grad_norm": 50.875282287597656, "learning_rate": 2.7465464373205296e-09, "loss": 22.9462, "step": 10668 }, { "epoch": 0.9901624129930394, "grad_norm": 47.65554428100586, "learning_rate": 2.695924488350099e-09, "loss": 24.691, "step": 10669 }, { "epoch": 0.9902552204176334, "grad_norm": 51.08847427368164, "learning_rate": 2.64577327272475e-09, "loss": 23.8885, "step": 10670 }, { "epoch": 0.9903480278422274, "grad_norm": 54.36460494995117, "learning_rate": 2.596092795169036e-09, "loss": 20.6765, "step": 10671 }, { "epoch": 0.9904408352668214, "grad_norm": 46.91964340209961, "learning_rate": 2.5468830603631035e-09, "loss": 22.9105, "step": 10672 }, { "epoch": 0.9905336426914153, "grad_norm": 43.42071533203125, "learning_rate": 2.4981440729421325e-09, "loss": 21.9934, "step": 10673 }, { "epoch": 0.9906264501160092, "grad_norm": 54.07381057739258, "learning_rate": 2.4498758374968955e-09, "loss": 22.7547, "step": 10674 }, { "epoch": 0.9907192575406032, "grad_norm": 47.005863189697266, "learning_rate": 2.4020783585743114e-09, "loss": 21.1358, "step": 10675 }, { "epoch": 0.9908120649651972, "grad_norm": 48.489070892333984, "learning_rate": 2.3547516406768887e-09, "loss": 24.6852, "step": 10676 }, { "epoch": 0.9909048723897912, "grad_norm": 44.014259338378906, "learning_rate": 2.307895688262729e-09, "loss": 22.236, "step": 10677 }, { "epoch": 0.9909976798143851, "grad_norm": 47.75505065917969, "learning_rate": 2.2615105057455235e-09, "loss": 21.9836, "step": 10678 }, { "epoch": 0.9910904872389791, "grad_norm": 51.228336334228516, "learning_rate": 2.2155960974945544e-09, "loss": 21.5988, "step": 10679 }, { "epoch": 0.9911832946635731, "grad_norm": 53.78989028930664, "learning_rate": 2.1701524678346964e-09, "loss": 20.5192, "step": 10680 }, { "epoch": 0.9912761020881671, "grad_norm": 37.5600700378418, "learning_rate": 2.125179621046414e-09, "loss": 23.6636, "step": 10681 }, { "epoch": 0.991368909512761, "grad_norm": 62.19527816772461, "learning_rate": 2.0806775613668727e-09, "loss": 22.3782, "step": 10682 }, { "epoch": 0.991461716937355, "grad_norm": 56.37523651123047, "learning_rate": 2.0366462929877208e-09, "loss": 23.9197, "step": 10683 }, { "epoch": 0.9915545243619489, "grad_norm": 71.07593536376953, "learning_rate": 1.9930858200561955e-09, "loss": 22.846, "step": 10684 }, { "epoch": 0.991647331786543, "grad_norm": 55.642574310302734, "learning_rate": 1.949996146675681e-09, "loss": 21.4416, "step": 10685 }, { "epoch": 0.9917401392111369, "grad_norm": 41.64809036254883, "learning_rate": 1.9073772769051533e-09, "loss": 21.9129, "step": 10686 }, { "epoch": 0.9918329466357308, "grad_norm": 48.57753372192383, "learning_rate": 1.865229214759734e-09, "loss": 21.8874, "step": 10687 }, { "epoch": 0.9919257540603248, "grad_norm": 45.694087982177734, "learning_rate": 1.8235519642090248e-09, "loss": 23.0947, "step": 10688 }, { "epoch": 0.9920185614849188, "grad_norm": 61.13233947753906, "learning_rate": 1.7823455291793302e-09, "loss": 21.3544, "step": 10689 }, { "epoch": 0.9921113689095128, "grad_norm": 54.2498664855957, "learning_rate": 1.7416099135519892e-09, "loss": 23.9234, "step": 10690 }, { "epoch": 0.9922041763341067, "grad_norm": 50.92909622192383, "learning_rate": 1.7013451211639332e-09, "loss": 22.4986, "step": 10691 }, { "epoch": 0.9922969837587007, "grad_norm": 60.41496658325195, "learning_rate": 1.6615511558082386e-09, "loss": 21.5129, "step": 10692 }, { "epoch": 0.9923897911832946, "grad_norm": 41.97617721557617, "learning_rate": 1.622228021233574e-09, "loss": 23.0878, "step": 10693 }, { "epoch": 0.9924825986078887, "grad_norm": 66.39675903320312, "learning_rate": 1.583375721144198e-09, "loss": 22.239, "step": 10694 }, { "epoch": 0.9925754060324826, "grad_norm": 56.34593200683594, "learning_rate": 1.544994259199406e-09, "loss": 23.0016, "step": 10695 }, { "epoch": 0.9926682134570766, "grad_norm": 60.13408279418945, "learning_rate": 1.5070836390151943e-09, "loss": 22.8306, "step": 10696 }, { "epoch": 0.9927610208816705, "grad_norm": 60.39073181152344, "learning_rate": 1.4696438641614852e-09, "loss": 22.3543, "step": 10697 }, { "epoch": 0.9928538283062645, "grad_norm": 43.44587707519531, "learning_rate": 1.4326749381665673e-09, "loss": 20.2943, "step": 10698 }, { "epoch": 0.9929466357308585, "grad_norm": 58.177371978759766, "learning_rate": 1.3961768645121e-09, "loss": 22.8914, "step": 10699 }, { "epoch": 0.9930394431554525, "grad_norm": 54.426090240478516, "learning_rate": 1.3601496466358887e-09, "loss": 23.2623, "step": 10700 }, { "epoch": 0.9931322505800464, "grad_norm": 62.799766540527344, "learning_rate": 1.3245932879313305e-09, "loss": 22.7523, "step": 10701 }, { "epoch": 0.9932250580046403, "grad_norm": 60.73463439941406, "learning_rate": 1.2895077917490783e-09, "loss": 24.3604, "step": 10702 }, { "epoch": 0.9933178654292344, "grad_norm": 57.61709213256836, "learning_rate": 1.2548931613926008e-09, "loss": 22.6254, "step": 10703 }, { "epoch": 0.9934106728538283, "grad_norm": 47.0391960144043, "learning_rate": 1.220749400123733e-09, "loss": 24.3677, "step": 10704 }, { "epoch": 0.9935034802784223, "grad_norm": 66.38119506835938, "learning_rate": 1.187076511157681e-09, "loss": 23.0311, "step": 10705 }, { "epoch": 0.9935962877030162, "grad_norm": 46.818115234375, "learning_rate": 1.153874497666907e-09, "loss": 21.6517, "step": 10706 }, { "epoch": 0.9936890951276102, "grad_norm": 52.245079040527344, "learning_rate": 1.1211433627789091e-09, "loss": 22.8901, "step": 10707 }, { "epoch": 0.9937819025522042, "grad_norm": 50.695255279541016, "learning_rate": 1.0888831095773323e-09, "loss": 22.555, "step": 10708 }, { "epoch": 0.9938747099767982, "grad_norm": 67.05784606933594, "learning_rate": 1.0570937410997462e-09, "loss": 21.8378, "step": 10709 }, { "epoch": 0.9939675174013921, "grad_norm": 48.36555099487305, "learning_rate": 1.025775260342088e-09, "loss": 22.1685, "step": 10710 }, { "epoch": 0.994060324825986, "grad_norm": 49.15750503540039, "learning_rate": 9.949276702531096e-10, "loss": 22.4383, "step": 10711 }, { "epoch": 0.99415313225058, "grad_norm": 60.245819091796875, "learning_rate": 9.645509737399306e-10, "loss": 23.5003, "step": 10712 }, { "epoch": 0.994245939675174, "grad_norm": 75.54403686523438, "learning_rate": 9.346451736630402e-10, "loss": 22.9667, "step": 10713 }, { "epoch": 0.994338747099768, "grad_norm": 61.50632095336914, "learning_rate": 9.052102728396294e-10, "loss": 21.4223, "step": 10714 }, { "epoch": 0.9944315545243619, "grad_norm": 52.261600494384766, "learning_rate": 8.762462740424804e-10, "loss": 23.1977, "step": 10715 }, { "epoch": 0.9945243619489559, "grad_norm": 55.70056915283203, "learning_rate": 8.477531799999661e-10, "loss": 21.7898, "step": 10716 }, { "epoch": 0.9946171693735499, "grad_norm": 74.2846450805664, "learning_rate": 8.197309933960507e-10, "loss": 20.9511, "step": 10717 }, { "epoch": 0.9947099767981439, "grad_norm": 59.552852630615234, "learning_rate": 7.921797168702894e-10, "loss": 23.3334, "step": 10718 }, { "epoch": 0.9948027842227378, "grad_norm": 49.51508712768555, "learning_rate": 7.650993530178285e-10, "loss": 23.7417, "step": 10719 }, { "epoch": 0.9948955916473318, "grad_norm": 53.09637451171875, "learning_rate": 7.384899043899607e-10, "loss": 23.0107, "step": 10720 }, { "epoch": 0.9949883990719257, "grad_norm": 76.62654113769531, "learning_rate": 7.123513734930143e-10, "loss": 22.7043, "step": 10721 }, { "epoch": 0.9950812064965198, "grad_norm": 57.00877380371094, "learning_rate": 6.866837627889089e-10, "loss": 21.5387, "step": 10722 }, { "epoch": 0.9951740139211137, "grad_norm": 55.803531646728516, "learning_rate": 6.614870746962653e-10, "loss": 25.4735, "step": 10723 }, { "epoch": 0.9952668213457077, "grad_norm": 46.57229995727539, "learning_rate": 6.367613115876303e-10, "loss": 21.0988, "step": 10724 }, { "epoch": 0.9953596287703016, "grad_norm": 62.834171295166016, "learning_rate": 6.125064757922517e-10, "loss": 23.4202, "step": 10725 }, { "epoch": 0.9954524361948955, "grad_norm": 71.21499633789062, "learning_rate": 5.887225695955234e-10, "loss": 21.6721, "step": 10726 }, { "epoch": 0.9955452436194896, "grad_norm": 48.566158294677734, "learning_rate": 5.654095952373207e-10, "loss": 24.0354, "step": 10727 }, { "epoch": 0.9956380510440835, "grad_norm": 61.207672119140625, "learning_rate": 5.425675549136645e-10, "loss": 23.959, "step": 10728 }, { "epoch": 0.9957308584686775, "grad_norm": 57.908199310302734, "learning_rate": 5.201964507767221e-10, "loss": 22.2167, "step": 10729 }, { "epoch": 0.9958236658932714, "grad_norm": 62.07268524169922, "learning_rate": 4.982962849325868e-10, "loss": 23.6379, "step": 10730 }, { "epoch": 0.9959164733178655, "grad_norm": 65.6454849243164, "learning_rate": 4.768670594457181e-10, "loss": 22.6229, "step": 10731 }, { "epoch": 0.9960092807424594, "grad_norm": 57.46426010131836, "learning_rate": 4.5590877633339137e-10, "loss": 22.467, "step": 10732 }, { "epoch": 0.9961020881670534, "grad_norm": 50.340457916259766, "learning_rate": 4.354214375706933e-10, "loss": 22.9984, "step": 10733 }, { "epoch": 0.9961948955916473, "grad_norm": 48.38359832763672, "learning_rate": 4.154050450871916e-10, "loss": 22.7813, "step": 10734 }, { "epoch": 0.9962877030162413, "grad_norm": 51.83679962158203, "learning_rate": 3.958596007686e-10, "loss": 23.5654, "step": 10735 }, { "epoch": 0.9963805104408353, "grad_norm": 47.942142486572266, "learning_rate": 3.767851064551131e-10, "loss": 21.9809, "step": 10736 }, { "epoch": 0.9964733178654293, "grad_norm": 56.471107482910156, "learning_rate": 3.5818156394473725e-10, "loss": 22.6483, "step": 10737 }, { "epoch": 0.9965661252900232, "grad_norm": 57.3587760925293, "learning_rate": 3.4004897498940424e-10, "loss": 22.512, "step": 10738 }, { "epoch": 0.9966589327146171, "grad_norm": 47.82008743286133, "learning_rate": 3.223873412971923e-10, "loss": 23.1425, "step": 10739 }, { "epoch": 0.9967517401392111, "grad_norm": 47.75426483154297, "learning_rate": 3.051966645312154e-10, "loss": 23.7584, "step": 10740 }, { "epoch": 0.9968445475638051, "grad_norm": 51.70509719848633, "learning_rate": 2.884769463118442e-10, "loss": 22.8079, "step": 10741 }, { "epoch": 0.9969373549883991, "grad_norm": 45.445343017578125, "learning_rate": 2.7222818821281973e-10, "loss": 22.8094, "step": 10742 }, { "epoch": 0.997030162412993, "grad_norm": 49.25515365600586, "learning_rate": 2.5645039176569463e-10, "loss": 24.0955, "step": 10743 }, { "epoch": 0.997122969837587, "grad_norm": 47.251243591308594, "learning_rate": 2.411435584565025e-10, "loss": 21.182, "step": 10744 }, { "epoch": 0.997215777262181, "grad_norm": 47.601234436035156, "learning_rate": 2.263076897268679e-10, "loss": 21.1202, "step": 10745 }, { "epoch": 0.997308584686775, "grad_norm": 52.595603942871094, "learning_rate": 2.1194278697456162e-10, "loss": 22.3438, "step": 10746 }, { "epoch": 0.9974013921113689, "grad_norm": 56.40842819213867, "learning_rate": 1.980488515523904e-10, "loss": 21.2982, "step": 10747 }, { "epoch": 0.9974941995359629, "grad_norm": 43.38290023803711, "learning_rate": 1.846258847693072e-10, "loss": 22.4009, "step": 10748 }, { "epoch": 0.9975870069605568, "grad_norm": 51.650306701660156, "learning_rate": 1.7167388788985605e-10, "loss": 23.1811, "step": 10749 }, { "epoch": 0.9976798143851509, "grad_norm": 58.83376693725586, "learning_rate": 1.5919286213361695e-10, "loss": 23.6697, "step": 10750 }, { "epoch": 0.9977726218097448, "grad_norm": 61.86570739746094, "learning_rate": 1.471828086768712e-10, "loss": 21.235, "step": 10751 }, { "epoch": 0.9978654292343387, "grad_norm": 46.23072052001953, "learning_rate": 1.3564372865038088e-10, "loss": 20.1577, "step": 10752 }, { "epoch": 0.9979582366589327, "grad_norm": 47.95684051513672, "learning_rate": 1.2457562314160953e-10, "loss": 20.9761, "step": 10753 }, { "epoch": 0.9980510440835266, "grad_norm": 48.515628814697266, "learning_rate": 1.1397849319305654e-10, "loss": 21.5575, "step": 10754 }, { "epoch": 0.9981438515081207, "grad_norm": 52.997344970703125, "learning_rate": 1.038523398028124e-10, "loss": 22.2791, "step": 10755 }, { "epoch": 0.9982366589327146, "grad_norm": 47.60548400878906, "learning_rate": 9.419716392455868e-11, "loss": 22.6208, "step": 10756 }, { "epoch": 0.9983294663573086, "grad_norm": 43.880531311035156, "learning_rate": 8.501296646756807e-11, "loss": 22.4278, "step": 10757 }, { "epoch": 0.9984222737819025, "grad_norm": 45.781883239746094, "learning_rate": 7.62997482978145e-11, "loss": 22.0333, "step": 10758 }, { "epoch": 0.9985150812064966, "grad_norm": 53.914276123046875, "learning_rate": 6.80575102357528e-11, "loss": 23.7523, "step": 10759 }, { "epoch": 0.9986078886310905, "grad_norm": 50.267822265625, "learning_rate": 6.028625305742886e-11, "loss": 21.3522, "step": 10760 }, { "epoch": 0.9987006960556845, "grad_norm": 57.537010192871094, "learning_rate": 5.2985977495034755e-11, "loss": 22.1075, "step": 10761 }, { "epoch": 0.9987935034802784, "grad_norm": 51.144065856933594, "learning_rate": 4.615668423635367e-11, "loss": 24.6381, "step": 10762 }, { "epoch": 0.9988863109048723, "grad_norm": 66.74154663085938, "learning_rate": 3.9798373924204716e-11, "loss": 23.2196, "step": 10763 }, { "epoch": 0.9989791183294664, "grad_norm": 71.17416381835938, "learning_rate": 3.3911047158663445e-11, "loss": 22.4553, "step": 10764 }, { "epoch": 0.9990719257540603, "grad_norm": 49.47637939453125, "learning_rate": 2.849470449317604e-11, "loss": 20.8315, "step": 10765 }, { "epoch": 0.9991647331786543, "grad_norm": 50.25044250488281, "learning_rate": 2.3549346438445086e-11, "loss": 22.5655, "step": 10766 }, { "epoch": 0.9992575406032482, "grad_norm": 54.631832122802734, "learning_rate": 1.9074973460209146e-11, "loss": 21.2271, "step": 10767 }, { "epoch": 0.9993503480278422, "grad_norm": 66.2260513305664, "learning_rate": 1.5071585979797852e-11, "loss": 21.7526, "step": 10768 }, { "epoch": 0.9994431554524362, "grad_norm": 48.43978500366211, "learning_rate": 1.1539184374131929e-11, "loss": 23.6758, "step": 10769 }, { "epoch": 0.9995359628770302, "grad_norm": 56.07317352294922, "learning_rate": 8.47776897683339e-12, "loss": 22.1182, "step": 10770 }, { "epoch": 0.9996287703016241, "grad_norm": 53.238983154296875, "learning_rate": 5.887340075449999e-12, "loss": 23.7087, "step": 10771 }, { "epoch": 0.9997215777262181, "grad_norm": 53.501712799072266, "learning_rate": 3.767897914785934e-12, "loss": 21.9718, "step": 10772 }, { "epoch": 0.9998143851508121, "grad_norm": 53.77610778808594, "learning_rate": 2.1194426935711164e-12, "loss": 22.6757, "step": 10773 }, { "epoch": 0.9999071925754061, "grad_norm": 55.3211669921875, "learning_rate": 9.419745672367697e-13, "loss": 24.1765, "step": 10774 }, { "epoch": 1.0, "grad_norm": 50.23170852661133, "learning_rate": 2.3549364736030756e-13, "loss": 22.9711, "step": 10775 }, { "epoch": 1.0, "step": 10775, "total_flos": 9.680510303341851e+19, "train_loss": 5.454209606586642, "train_runtime": 38185.3656, "train_samples_per_second": 36.117, "train_steps_per_second": 0.282 } ], "logging_steps": 1.0, "max_steps": 10775, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.680510303341851e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }